1 /* 2 * Copyright (C) 2003 Sistina Software Limited. 3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include <linux/device-mapper.h> 9 10 #include "dm-rq.h" 11 #include "dm-bio-record.h" 12 #include "dm-path-selector.h" 13 #include "dm-uevent.h" 14 15 #include <linux/blkdev.h> 16 #include <linux/ctype.h> 17 #include <linux/init.h> 18 #include <linux/mempool.h> 19 #include <linux/module.h> 20 #include <linux/pagemap.h> 21 #include <linux/slab.h> 22 #include <linux/time.h> 23 #include <linux/timer.h> 24 #include <linux/workqueue.h> 25 #include <linux/delay.h> 26 #include <scsi/scsi_dh.h> 27 #include <linux/atomic.h> 28 #include <linux/blk-mq.h> 29 30 #define DM_MSG_PREFIX "multipath" 31 #define DM_PG_INIT_DELAY_MSECS 2000 32 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) 33 #define QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT 0 34 35 static unsigned long queue_if_no_path_timeout_secs = QUEUE_IF_NO_PATH_TIMEOUT_DEFAULT; 36 37 /* Path properties */ 38 struct pgpath { 39 struct list_head list; 40 41 struct priority_group *pg; /* Owning PG */ 42 unsigned fail_count; /* Cumulative failure count */ 43 44 struct dm_path path; 45 struct delayed_work activate_path; 46 47 bool is_active:1; /* Path status */ 48 }; 49 50 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 51 52 /* 53 * Paths are grouped into Priority Groups and numbered from 1 upwards. 54 * Each has a path selector which controls which path gets used. 55 */ 56 struct priority_group { 57 struct list_head list; 58 59 struct multipath *m; /* Owning multipath instance */ 60 struct path_selector ps; 61 62 unsigned pg_num; /* Reference number */ 63 unsigned nr_pgpaths; /* Number of paths in PG */ 64 struct list_head pgpaths; 65 66 bool bypassed:1; /* Temporarily bypass this PG? */ 67 }; 68 69 /* Multipath context */ 70 struct multipath { 71 unsigned long flags; /* Multipath state flags */ 72 73 spinlock_t lock; 74 enum dm_queue_mode queue_mode; 75 76 struct pgpath *current_pgpath; 77 struct priority_group *current_pg; 78 struct priority_group *next_pg; /* Switch to this PG if set */ 79 80 atomic_t nr_valid_paths; /* Total number of usable paths */ 81 unsigned nr_priority_groups; 82 struct list_head priority_groups; 83 84 const char *hw_handler_name; 85 char *hw_handler_params; 86 wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ 87 unsigned pg_init_retries; /* Number of times to retry pg_init */ 88 unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ 89 atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */ 90 atomic_t pg_init_count; /* Number of times pg_init called */ 91 92 struct mutex work_mutex; 93 struct work_struct trigger_event; 94 struct dm_target *ti; 95 96 struct work_struct process_queued_bios; 97 struct bio_list queued_bios; 98 99 struct timer_list nopath_timer; /* Timeout for queue_if_no_path */ 100 }; 101 102 /* 103 * Context information attached to each io we process. 104 */ 105 struct dm_mpath_io { 106 struct pgpath *pgpath; 107 size_t nr_bytes; 108 }; 109 110 typedef int (*action_fn) (struct pgpath *pgpath); 111 112 static struct workqueue_struct *kmultipathd, *kmpath_handlerd; 113 static void trigger_event(struct work_struct *work); 114 static void activate_or_offline_path(struct pgpath *pgpath); 115 static void activate_path_work(struct work_struct *work); 116 static void process_queued_bios(struct work_struct *work); 117 static void queue_if_no_path_timeout_work(struct timer_list *t); 118 119 /*----------------------------------------------- 120 * Multipath state flags. 121 *-----------------------------------------------*/ 122 123 #define MPATHF_QUEUE_IO 0 /* Must we queue all I/O? */ 124 #define MPATHF_QUEUE_IF_NO_PATH 1 /* Queue I/O if last path fails? */ 125 #define MPATHF_SAVED_QUEUE_IF_NO_PATH 2 /* Saved state during suspension */ 126 #define MPATHF_RETAIN_ATTACHED_HW_HANDLER 3 /* If there's already a hw_handler present, don't change it. */ 127 #define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */ 128 #define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */ 129 #define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */ 130 131 /*----------------------------------------------- 132 * Allocation routines 133 *-----------------------------------------------*/ 134 135 static struct pgpath *alloc_pgpath(void) 136 { 137 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); 138 139 if (!pgpath) 140 return NULL; 141 142 pgpath->is_active = true; 143 144 return pgpath; 145 } 146 147 static void free_pgpath(struct pgpath *pgpath) 148 { 149 kfree(pgpath); 150 } 151 152 static struct priority_group *alloc_priority_group(void) 153 { 154 struct priority_group *pg; 155 156 pg = kzalloc(sizeof(*pg), GFP_KERNEL); 157 158 if (pg) 159 INIT_LIST_HEAD(&pg->pgpaths); 160 161 return pg; 162 } 163 164 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) 165 { 166 struct pgpath *pgpath, *tmp; 167 168 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { 169 list_del(&pgpath->list); 170 dm_put_device(ti, pgpath->path.dev); 171 free_pgpath(pgpath); 172 } 173 } 174 175 static void free_priority_group(struct priority_group *pg, 176 struct dm_target *ti) 177 { 178 struct path_selector *ps = &pg->ps; 179 180 if (ps->type) { 181 ps->type->destroy(ps); 182 dm_put_path_selector(ps->type); 183 } 184 185 free_pgpaths(&pg->pgpaths, ti); 186 kfree(pg); 187 } 188 189 static struct multipath *alloc_multipath(struct dm_target *ti) 190 { 191 struct multipath *m; 192 193 m = kzalloc(sizeof(*m), GFP_KERNEL); 194 if (m) { 195 INIT_LIST_HEAD(&m->priority_groups); 196 spin_lock_init(&m->lock); 197 atomic_set(&m->nr_valid_paths, 0); 198 INIT_WORK(&m->trigger_event, trigger_event); 199 mutex_init(&m->work_mutex); 200 201 m->queue_mode = DM_TYPE_NONE; 202 203 m->ti = ti; 204 ti->private = m; 205 206 timer_setup(&m->nopath_timer, queue_if_no_path_timeout_work, 0); 207 } 208 209 return m; 210 } 211 212 static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) 213 { 214 if (m->queue_mode == DM_TYPE_NONE) { 215 m->queue_mode = DM_TYPE_REQUEST_BASED; 216 } else if (m->queue_mode == DM_TYPE_BIO_BASED) { 217 INIT_WORK(&m->process_queued_bios, process_queued_bios); 218 /* 219 * bio-based doesn't support any direct scsi_dh management; 220 * it just discovers if a scsi_dh is attached. 221 */ 222 set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags); 223 } 224 225 dm_table_set_type(ti->table, m->queue_mode); 226 227 /* 228 * Init fields that are only used when a scsi_dh is attached 229 * - must do this unconditionally (really doesn't hurt non-SCSI uses) 230 */ 231 set_bit(MPATHF_QUEUE_IO, &m->flags); 232 atomic_set(&m->pg_init_in_progress, 0); 233 atomic_set(&m->pg_init_count, 0); 234 m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; 235 init_waitqueue_head(&m->pg_init_wait); 236 237 return 0; 238 } 239 240 static void free_multipath(struct multipath *m) 241 { 242 struct priority_group *pg, *tmp; 243 244 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { 245 list_del(&pg->list); 246 free_priority_group(pg, m->ti); 247 } 248 249 kfree(m->hw_handler_name); 250 kfree(m->hw_handler_params); 251 mutex_destroy(&m->work_mutex); 252 kfree(m); 253 } 254 255 static struct dm_mpath_io *get_mpio(union map_info *info) 256 { 257 return info->ptr; 258 } 259 260 static size_t multipath_per_bio_data_size(void) 261 { 262 return sizeof(struct dm_mpath_io) + sizeof(struct dm_bio_details); 263 } 264 265 static struct dm_mpath_io *get_mpio_from_bio(struct bio *bio) 266 { 267 return dm_per_bio_data(bio, multipath_per_bio_data_size()); 268 } 269 270 static struct dm_bio_details *get_bio_details_from_mpio(struct dm_mpath_io *mpio) 271 { 272 /* dm_bio_details is immediately after the dm_mpath_io in bio's per-bio-data */ 273 void *bio_details = mpio + 1; 274 return bio_details; 275 } 276 277 static void multipath_init_per_bio_data(struct bio *bio, struct dm_mpath_io **mpio_p) 278 { 279 struct dm_mpath_io *mpio = get_mpio_from_bio(bio); 280 struct dm_bio_details *bio_details = get_bio_details_from_mpio(mpio); 281 282 mpio->nr_bytes = bio->bi_iter.bi_size; 283 mpio->pgpath = NULL; 284 *mpio_p = mpio; 285 286 dm_bio_record(bio_details, bio); 287 } 288 289 /*----------------------------------------------- 290 * Path selection 291 *-----------------------------------------------*/ 292 293 static int __pg_init_all_paths(struct multipath *m) 294 { 295 struct pgpath *pgpath; 296 unsigned long pg_init_delay = 0; 297 298 lockdep_assert_held(&m->lock); 299 300 if (atomic_read(&m->pg_init_in_progress) || test_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) 301 return 0; 302 303 atomic_inc(&m->pg_init_count); 304 clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags); 305 306 /* Check here to reset pg_init_required */ 307 if (!m->current_pg) 308 return 0; 309 310 if (test_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags)) 311 pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ? 312 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS); 313 list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { 314 /* Skip failed paths */ 315 if (!pgpath->is_active) 316 continue; 317 if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path, 318 pg_init_delay)) 319 atomic_inc(&m->pg_init_in_progress); 320 } 321 return atomic_read(&m->pg_init_in_progress); 322 } 323 324 static int pg_init_all_paths(struct multipath *m) 325 { 326 int ret; 327 unsigned long flags; 328 329 spin_lock_irqsave(&m->lock, flags); 330 ret = __pg_init_all_paths(m); 331 spin_unlock_irqrestore(&m->lock, flags); 332 333 return ret; 334 } 335 336 static void __switch_pg(struct multipath *m, struct priority_group *pg) 337 { 338 m->current_pg = pg; 339 340 /* Must we initialise the PG first, and queue I/O till it's ready? */ 341 if (m->hw_handler_name) { 342 set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags); 343 set_bit(MPATHF_QUEUE_IO, &m->flags); 344 } else { 345 clear_bit(MPATHF_PG_INIT_REQUIRED, &m->flags); 346 clear_bit(MPATHF_QUEUE_IO, &m->flags); 347 } 348 349 atomic_set(&m->pg_init_count, 0); 350 } 351 352 static struct pgpath *choose_path_in_pg(struct multipath *m, 353 struct priority_group *pg, 354 size_t nr_bytes) 355 { 356 unsigned long flags; 357 struct dm_path *path; 358 struct pgpath *pgpath; 359 360 path = pg->ps.type->select_path(&pg->ps, nr_bytes); 361 if (!path) 362 return ERR_PTR(-ENXIO); 363 364 pgpath = path_to_pgpath(path); 365 366 if (unlikely(READ_ONCE(m->current_pg) != pg)) { 367 /* Only update current_pgpath if pg changed */ 368 spin_lock_irqsave(&m->lock, flags); 369 m->current_pgpath = pgpath; 370 __switch_pg(m, pg); 371 spin_unlock_irqrestore(&m->lock, flags); 372 } 373 374 return pgpath; 375 } 376 377 static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes) 378 { 379 unsigned long flags; 380 struct priority_group *pg; 381 struct pgpath *pgpath; 382 unsigned bypassed = 1; 383 384 if (!atomic_read(&m->nr_valid_paths)) { 385 clear_bit(MPATHF_QUEUE_IO, &m->flags); 386 goto failed; 387 } 388 389 /* Were we instructed to switch PG? */ 390 if (READ_ONCE(m->next_pg)) { 391 spin_lock_irqsave(&m->lock, flags); 392 pg = m->next_pg; 393 if (!pg) { 394 spin_unlock_irqrestore(&m->lock, flags); 395 goto check_current_pg; 396 } 397 m->next_pg = NULL; 398 spin_unlock_irqrestore(&m->lock, flags); 399 pgpath = choose_path_in_pg(m, pg, nr_bytes); 400 if (!IS_ERR_OR_NULL(pgpath)) 401 return pgpath; 402 } 403 404 /* Don't change PG until it has no remaining paths */ 405 check_current_pg: 406 pg = READ_ONCE(m->current_pg); 407 if (pg) { 408 pgpath = choose_path_in_pg(m, pg, nr_bytes); 409 if (!IS_ERR_OR_NULL(pgpath)) 410 return pgpath; 411 } 412 413 /* 414 * Loop through priority groups until we find a valid path. 415 * First time we skip PGs marked 'bypassed'. 416 * Second time we only try the ones we skipped, but set 417 * pg_init_delay_retry so we do not hammer controllers. 418 */ 419 do { 420 list_for_each_entry(pg, &m->priority_groups, list) { 421 if (pg->bypassed == !!bypassed) 422 continue; 423 pgpath = choose_path_in_pg(m, pg, nr_bytes); 424 if (!IS_ERR_OR_NULL(pgpath)) { 425 if (!bypassed) 426 set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags); 427 return pgpath; 428 } 429 } 430 } while (bypassed--); 431 432 failed: 433 spin_lock_irqsave(&m->lock, flags); 434 m->current_pgpath = NULL; 435 m->current_pg = NULL; 436 spin_unlock_irqrestore(&m->lock, flags); 437 438 return NULL; 439 } 440 441 /* 442 * dm_report_EIO() is a macro instead of a function to make pr_debug_ratelimited() 443 * report the function name and line number of the function from which 444 * it has been invoked. 445 */ 446 #define dm_report_EIO(m) \ 447 do { \ 448 struct mapped_device *md = dm_table_get_md((m)->ti->table); \ 449 \ 450 DMDEBUG_LIMIT("%s: returning EIO; QIFNP = %d; SQIFNP = %d; DNFS = %d", \ 451 dm_device_name(md), \ 452 test_bit(MPATHF_QUEUE_IF_NO_PATH, &(m)->flags), \ 453 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &(m)->flags), \ 454 dm_noflush_suspending((m)->ti)); \ 455 } while (0) 456 457 /* 458 * Check whether bios must be queued in the device-mapper core rather 459 * than here in the target. 460 */ 461 static bool __must_push_back(struct multipath *m) 462 { 463 return dm_noflush_suspending(m->ti); 464 } 465 466 static bool must_push_back_rq(struct multipath *m) 467 { 468 return test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) || __must_push_back(m); 469 } 470 471 /* 472 * Map cloned requests (request-based multipath) 473 */ 474 static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, 475 union map_info *map_context, 476 struct request **__clone) 477 { 478 struct multipath *m = ti->private; 479 size_t nr_bytes = blk_rq_bytes(rq); 480 struct pgpath *pgpath; 481 struct block_device *bdev; 482 struct dm_mpath_io *mpio = get_mpio(map_context); 483 struct request_queue *q; 484 struct request *clone; 485 486 /* Do we need to select a new pgpath? */ 487 pgpath = READ_ONCE(m->current_pgpath); 488 if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) 489 pgpath = choose_pgpath(m, nr_bytes); 490 491 if (!pgpath) { 492 if (must_push_back_rq(m)) 493 return DM_MAPIO_DELAY_REQUEUE; 494 dm_report_EIO(m); /* Failed */ 495 return DM_MAPIO_KILL; 496 } else if (test_bit(MPATHF_QUEUE_IO, &m->flags) || 497 test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { 498 pg_init_all_paths(m); 499 return DM_MAPIO_DELAY_REQUEUE; 500 } 501 502 mpio->pgpath = pgpath; 503 mpio->nr_bytes = nr_bytes; 504 505 bdev = pgpath->path.dev->bdev; 506 q = bdev_get_queue(bdev); 507 clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, 508 BLK_MQ_REQ_NOWAIT); 509 if (IS_ERR(clone)) { 510 /* EBUSY, ENODEV or EWOULDBLOCK: requeue */ 511 if (blk_queue_dying(q)) { 512 atomic_inc(&m->pg_init_in_progress); 513 activate_or_offline_path(pgpath); 514 return DM_MAPIO_DELAY_REQUEUE; 515 } 516 517 /* 518 * blk-mq's SCHED_RESTART can cover this requeue, so we 519 * needn't deal with it by DELAY_REQUEUE. More importantly, 520 * we have to return DM_MAPIO_REQUEUE so that blk-mq can 521 * get the queue busy feedback (via BLK_STS_RESOURCE), 522 * otherwise I/O merging can suffer. 523 */ 524 return DM_MAPIO_REQUEUE; 525 } 526 clone->bio = clone->biotail = NULL; 527 clone->rq_disk = bdev->bd_disk; 528 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; 529 *__clone = clone; 530 531 if (pgpath->pg->ps.type->start_io) 532 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, 533 &pgpath->path, 534 nr_bytes); 535 return DM_MAPIO_REMAPPED; 536 } 537 538 static void multipath_release_clone(struct request *clone, 539 union map_info *map_context) 540 { 541 if (unlikely(map_context)) { 542 /* 543 * non-NULL map_context means caller is still map 544 * method; must undo multipath_clone_and_map() 545 */ 546 struct dm_mpath_io *mpio = get_mpio(map_context); 547 struct pgpath *pgpath = mpio->pgpath; 548 549 if (pgpath && pgpath->pg->ps.type->end_io) 550 pgpath->pg->ps.type->end_io(&pgpath->pg->ps, 551 &pgpath->path, 552 mpio->nr_bytes, 553 clone->io_start_time_ns); 554 } 555 556 blk_put_request(clone); 557 } 558 559 /* 560 * Map cloned bios (bio-based multipath) 561 */ 562 563 static struct pgpath *__map_bio(struct multipath *m, struct bio *bio) 564 { 565 struct pgpath *pgpath; 566 unsigned long flags; 567 bool queue_io; 568 569 /* Do we need to select a new pgpath? */ 570 pgpath = READ_ONCE(m->current_pgpath); 571 if (!pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) 572 pgpath = choose_pgpath(m, bio->bi_iter.bi_size); 573 574 /* MPATHF_QUEUE_IO might have been cleared by choose_pgpath. */ 575 queue_io = test_bit(MPATHF_QUEUE_IO, &m->flags); 576 577 if ((pgpath && queue_io) || 578 (!pgpath && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))) { 579 /* Queue for the daemon to resubmit */ 580 spin_lock_irqsave(&m->lock, flags); 581 bio_list_add(&m->queued_bios, bio); 582 spin_unlock_irqrestore(&m->lock, flags); 583 584 /* PG_INIT_REQUIRED cannot be set without QUEUE_IO */ 585 if (queue_io || test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) 586 pg_init_all_paths(m); 587 else if (!queue_io) 588 queue_work(kmultipathd, &m->process_queued_bios); 589 590 return ERR_PTR(-EAGAIN); 591 } 592 593 return pgpath; 594 } 595 596 static int __multipath_map_bio(struct multipath *m, struct bio *bio, 597 struct dm_mpath_io *mpio) 598 { 599 struct pgpath *pgpath = __map_bio(m, bio); 600 601 if (IS_ERR(pgpath)) 602 return DM_MAPIO_SUBMITTED; 603 604 if (!pgpath) { 605 if (__must_push_back(m)) 606 return DM_MAPIO_REQUEUE; 607 dm_report_EIO(m); 608 return DM_MAPIO_KILL; 609 } 610 611 mpio->pgpath = pgpath; 612 613 bio->bi_status = 0; 614 bio_set_dev(bio, pgpath->path.dev->bdev); 615 bio->bi_opf |= REQ_FAILFAST_TRANSPORT; 616 617 if (pgpath->pg->ps.type->start_io) 618 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, 619 &pgpath->path, 620 mpio->nr_bytes); 621 return DM_MAPIO_REMAPPED; 622 } 623 624 static int multipath_map_bio(struct dm_target *ti, struct bio *bio) 625 { 626 struct multipath *m = ti->private; 627 struct dm_mpath_io *mpio = NULL; 628 629 multipath_init_per_bio_data(bio, &mpio); 630 return __multipath_map_bio(m, bio, mpio); 631 } 632 633 static void process_queued_io_list(struct multipath *m) 634 { 635 if (m->queue_mode == DM_TYPE_REQUEST_BASED) 636 dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table)); 637 else if (m->queue_mode == DM_TYPE_BIO_BASED) 638 queue_work(kmultipathd, &m->process_queued_bios); 639 } 640 641 static void process_queued_bios(struct work_struct *work) 642 { 643 int r; 644 unsigned long flags; 645 struct bio *bio; 646 struct bio_list bios; 647 struct blk_plug plug; 648 struct multipath *m = 649 container_of(work, struct multipath, process_queued_bios); 650 651 bio_list_init(&bios); 652 653 spin_lock_irqsave(&m->lock, flags); 654 655 if (bio_list_empty(&m->queued_bios)) { 656 spin_unlock_irqrestore(&m->lock, flags); 657 return; 658 } 659 660 bio_list_merge(&bios, &m->queued_bios); 661 bio_list_init(&m->queued_bios); 662 663 spin_unlock_irqrestore(&m->lock, flags); 664 665 blk_start_plug(&plug); 666 while ((bio = bio_list_pop(&bios))) { 667 struct dm_mpath_io *mpio = get_mpio_from_bio(bio); 668 dm_bio_restore(get_bio_details_from_mpio(mpio), bio); 669 r = __multipath_map_bio(m, bio, mpio); 670 switch (r) { 671 case DM_MAPIO_KILL: 672 bio->bi_status = BLK_STS_IOERR; 673 bio_endio(bio); 674 break; 675 case DM_MAPIO_REQUEUE: 676 bio->bi_status = BLK_STS_DM_REQUEUE; 677 bio_endio(bio); 678 break; 679 case DM_MAPIO_REMAPPED: 680 generic_make_request(bio); 681 break; 682 case DM_MAPIO_SUBMITTED: 683 break; 684 default: 685 WARN_ONCE(true, "__multipath_map_bio() returned %d\n", r); 686 } 687 } 688 blk_finish_plug(&plug); 689 } 690 691 /* 692 * If we run out of usable paths, should we queue I/O or error it? 693 */ 694 static int queue_if_no_path(struct multipath *m, bool queue_if_no_path, 695 bool save_old_value, const char *caller) 696 { 697 unsigned long flags; 698 bool queue_if_no_path_bit, saved_queue_if_no_path_bit; 699 const char *dm_dev_name = dm_device_name(dm_table_get_md(m->ti->table)); 700 701 DMDEBUG("%s: %s caller=%s queue_if_no_path=%d save_old_value=%d", 702 dm_dev_name, __func__, caller, queue_if_no_path, save_old_value); 703 704 spin_lock_irqsave(&m->lock, flags); 705 706 queue_if_no_path_bit = test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); 707 saved_queue_if_no_path_bit = test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); 708 709 if (save_old_value) { 710 if (unlikely(!queue_if_no_path_bit && saved_queue_if_no_path_bit)) { 711 DMERR("%s: QIFNP disabled but saved as enabled, saving again loses state, not saving!", 712 dm_dev_name); 713 } else 714 assign_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path_bit); 715 } else if (!queue_if_no_path && saved_queue_if_no_path_bit) { 716 /* due to "fail_if_no_path" message, need to honor it. */ 717 clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); 718 } 719 assign_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags, queue_if_no_path); 720 721 DMDEBUG("%s: after %s changes; QIFNP = %d; SQIFNP = %d; DNFS = %d", 722 dm_dev_name, __func__, 723 test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags), 724 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags), 725 dm_noflush_suspending(m->ti)); 726 727 spin_unlock_irqrestore(&m->lock, flags); 728 729 if (!queue_if_no_path) { 730 dm_table_run_md_queue_async(m->ti->table); 731 process_queued_io_list(m); 732 } 733 734 return 0; 735 } 736 737 /* 738 * If the queue_if_no_path timeout fires, turn off queue_if_no_path and 739 * process any queued I/O. 740 */ 741 static void queue_if_no_path_timeout_work(struct timer_list *t) 742 { 743 struct multipath *m = from_timer(m, t, nopath_timer); 744 struct mapped_device *md = dm_table_get_md(m->ti->table); 745 746 DMWARN("queue_if_no_path timeout on %s, failing queued IO", dm_device_name(md)); 747 queue_if_no_path(m, false, false, __func__); 748 } 749 750 /* 751 * Enable the queue_if_no_path timeout if necessary. 752 * Called with m->lock held. 753 */ 754 static void enable_nopath_timeout(struct multipath *m) 755 { 756 unsigned long queue_if_no_path_timeout = 757 READ_ONCE(queue_if_no_path_timeout_secs) * HZ; 758 759 lockdep_assert_held(&m->lock); 760 761 if (queue_if_no_path_timeout > 0 && 762 atomic_read(&m->nr_valid_paths) == 0 && 763 test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 764 mod_timer(&m->nopath_timer, 765 jiffies + queue_if_no_path_timeout); 766 } 767 } 768 769 static void disable_nopath_timeout(struct multipath *m) 770 { 771 del_timer_sync(&m->nopath_timer); 772 } 773 774 /* 775 * An event is triggered whenever a path is taken out of use. 776 * Includes path failure and PG bypass. 777 */ 778 static void trigger_event(struct work_struct *work) 779 { 780 struct multipath *m = 781 container_of(work, struct multipath, trigger_event); 782 783 dm_table_event(m->ti->table); 784 } 785 786 /*----------------------------------------------------------------- 787 * Constructor/argument parsing: 788 * <#multipath feature args> [<arg>]* 789 * <#hw_handler args> [hw_handler [<arg>]*] 790 * <#priority groups> 791 * <initial priority group> 792 * [<selector> <#selector args> [<arg>]* 793 * <#paths> <#per-path selector args> 794 * [<path> [<arg>]* ]+ ]+ 795 *---------------------------------------------------------------*/ 796 static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, 797 struct dm_target *ti) 798 { 799 int r; 800 struct path_selector_type *pst; 801 unsigned ps_argc; 802 803 static const struct dm_arg _args[] = { 804 {0, 1024, "invalid number of path selector args"}, 805 }; 806 807 pst = dm_get_path_selector(dm_shift_arg(as)); 808 if (!pst) { 809 ti->error = "unknown path selector type"; 810 return -EINVAL; 811 } 812 813 r = dm_read_arg_group(_args, as, &ps_argc, &ti->error); 814 if (r) { 815 dm_put_path_selector(pst); 816 return -EINVAL; 817 } 818 819 r = pst->create(&pg->ps, ps_argc, as->argv); 820 if (r) { 821 dm_put_path_selector(pst); 822 ti->error = "path selector constructor failed"; 823 return r; 824 } 825 826 pg->ps.type = pst; 827 dm_consume_args(as, ps_argc); 828 829 return 0; 830 } 831 832 static int setup_scsi_dh(struct block_device *bdev, struct multipath *m, 833 const char **attached_handler_name, char **error) 834 { 835 struct request_queue *q = bdev_get_queue(bdev); 836 int r; 837 838 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) { 839 retain: 840 if (*attached_handler_name) { 841 /* 842 * Clear any hw_handler_params associated with a 843 * handler that isn't already attached. 844 */ 845 if (m->hw_handler_name && strcmp(*attached_handler_name, m->hw_handler_name)) { 846 kfree(m->hw_handler_params); 847 m->hw_handler_params = NULL; 848 } 849 850 /* 851 * Reset hw_handler_name to match the attached handler 852 * 853 * NB. This modifies the table line to show the actual 854 * handler instead of the original table passed in. 855 */ 856 kfree(m->hw_handler_name); 857 m->hw_handler_name = *attached_handler_name; 858 *attached_handler_name = NULL; 859 } 860 } 861 862 if (m->hw_handler_name) { 863 r = scsi_dh_attach(q, m->hw_handler_name); 864 if (r == -EBUSY) { 865 char b[BDEVNAME_SIZE]; 866 867 printk(KERN_INFO "dm-mpath: retaining handler on device %s\n", 868 bdevname(bdev, b)); 869 goto retain; 870 } 871 if (r < 0) { 872 *error = "error attaching hardware handler"; 873 return r; 874 } 875 876 if (m->hw_handler_params) { 877 r = scsi_dh_set_params(q, m->hw_handler_params); 878 if (r < 0) { 879 *error = "unable to set hardware handler parameters"; 880 return r; 881 } 882 } 883 } 884 885 return 0; 886 } 887 888 static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, 889 struct dm_target *ti) 890 { 891 int r; 892 struct pgpath *p; 893 struct multipath *m = ti->private; 894 struct request_queue *q; 895 const char *attached_handler_name = NULL; 896 897 /* we need at least a path arg */ 898 if (as->argc < 1) { 899 ti->error = "no device given"; 900 return ERR_PTR(-EINVAL); 901 } 902 903 p = alloc_pgpath(); 904 if (!p) 905 return ERR_PTR(-ENOMEM); 906 907 r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), 908 &p->path.dev); 909 if (r) { 910 ti->error = "error getting device"; 911 goto bad; 912 } 913 914 q = bdev_get_queue(p->path.dev->bdev); 915 attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); 916 if (attached_handler_name || m->hw_handler_name) { 917 INIT_DELAYED_WORK(&p->activate_path, activate_path_work); 918 r = setup_scsi_dh(p->path.dev->bdev, m, &attached_handler_name, &ti->error); 919 kfree(attached_handler_name); 920 if (r) { 921 dm_put_device(ti, p->path.dev); 922 goto bad; 923 } 924 } 925 926 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); 927 if (r) { 928 dm_put_device(ti, p->path.dev); 929 goto bad; 930 } 931 932 return p; 933 bad: 934 free_pgpath(p); 935 return ERR_PTR(r); 936 } 937 938 static struct priority_group *parse_priority_group(struct dm_arg_set *as, 939 struct multipath *m) 940 { 941 static const struct dm_arg _args[] = { 942 {1, 1024, "invalid number of paths"}, 943 {0, 1024, "invalid number of selector args"} 944 }; 945 946 int r; 947 unsigned i, nr_selector_args, nr_args; 948 struct priority_group *pg; 949 struct dm_target *ti = m->ti; 950 951 if (as->argc < 2) { 952 as->argc = 0; 953 ti->error = "not enough priority group arguments"; 954 return ERR_PTR(-EINVAL); 955 } 956 957 pg = alloc_priority_group(); 958 if (!pg) { 959 ti->error = "couldn't allocate priority group"; 960 return ERR_PTR(-ENOMEM); 961 } 962 pg->m = m; 963 964 r = parse_path_selector(as, pg, ti); 965 if (r) 966 goto bad; 967 968 /* 969 * read the paths 970 */ 971 r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error); 972 if (r) 973 goto bad; 974 975 r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error); 976 if (r) 977 goto bad; 978 979 nr_args = 1 + nr_selector_args; 980 for (i = 0; i < pg->nr_pgpaths; i++) { 981 struct pgpath *pgpath; 982 struct dm_arg_set path_args; 983 984 if (as->argc < nr_args) { 985 ti->error = "not enough path parameters"; 986 r = -EINVAL; 987 goto bad; 988 } 989 990 path_args.argc = nr_args; 991 path_args.argv = as->argv; 992 993 pgpath = parse_path(&path_args, &pg->ps, ti); 994 if (IS_ERR(pgpath)) { 995 r = PTR_ERR(pgpath); 996 goto bad; 997 } 998 999 pgpath->pg = pg; 1000 list_add_tail(&pgpath->list, &pg->pgpaths); 1001 dm_consume_args(as, nr_args); 1002 } 1003 1004 return pg; 1005 1006 bad: 1007 free_priority_group(pg, ti); 1008 return ERR_PTR(r); 1009 } 1010 1011 static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) 1012 { 1013 unsigned hw_argc; 1014 int ret; 1015 struct dm_target *ti = m->ti; 1016 1017 static const struct dm_arg _args[] = { 1018 {0, 1024, "invalid number of hardware handler args"}, 1019 }; 1020 1021 if (dm_read_arg_group(_args, as, &hw_argc, &ti->error)) 1022 return -EINVAL; 1023 1024 if (!hw_argc) 1025 return 0; 1026 1027 if (m->queue_mode == DM_TYPE_BIO_BASED) { 1028 dm_consume_args(as, hw_argc); 1029 DMERR("bio-based multipath doesn't allow hardware handler args"); 1030 return 0; 1031 } 1032 1033 m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); 1034 if (!m->hw_handler_name) 1035 return -EINVAL; 1036 1037 if (hw_argc > 1) { 1038 char *p; 1039 int i, j, len = 4; 1040 1041 for (i = 0; i <= hw_argc - 2; i++) 1042 len += strlen(as->argv[i]) + 1; 1043 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL); 1044 if (!p) { 1045 ti->error = "memory allocation failed"; 1046 ret = -ENOMEM; 1047 goto fail; 1048 } 1049 j = sprintf(p, "%d", hw_argc - 1); 1050 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) 1051 j = sprintf(p, "%s", as->argv[i]); 1052 } 1053 dm_consume_args(as, hw_argc - 1); 1054 1055 return 0; 1056 fail: 1057 kfree(m->hw_handler_name); 1058 m->hw_handler_name = NULL; 1059 return ret; 1060 } 1061 1062 static int parse_features(struct dm_arg_set *as, struct multipath *m) 1063 { 1064 int r; 1065 unsigned argc; 1066 struct dm_target *ti = m->ti; 1067 const char *arg_name; 1068 1069 static const struct dm_arg _args[] = { 1070 {0, 8, "invalid number of feature args"}, 1071 {1, 50, "pg_init_retries must be between 1 and 50"}, 1072 {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, 1073 }; 1074 1075 r = dm_read_arg_group(_args, as, &argc, &ti->error); 1076 if (r) 1077 return -EINVAL; 1078 1079 if (!argc) 1080 return 0; 1081 1082 do { 1083 arg_name = dm_shift_arg(as); 1084 argc--; 1085 1086 if (!strcasecmp(arg_name, "queue_if_no_path")) { 1087 r = queue_if_no_path(m, true, false, __func__); 1088 continue; 1089 } 1090 1091 if (!strcasecmp(arg_name, "retain_attached_hw_handler")) { 1092 set_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags); 1093 continue; 1094 } 1095 1096 if (!strcasecmp(arg_name, "pg_init_retries") && 1097 (argc >= 1)) { 1098 r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); 1099 argc--; 1100 continue; 1101 } 1102 1103 if (!strcasecmp(arg_name, "pg_init_delay_msecs") && 1104 (argc >= 1)) { 1105 r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error); 1106 argc--; 1107 continue; 1108 } 1109 1110 if (!strcasecmp(arg_name, "queue_mode") && 1111 (argc >= 1)) { 1112 const char *queue_mode_name = dm_shift_arg(as); 1113 1114 if (!strcasecmp(queue_mode_name, "bio")) 1115 m->queue_mode = DM_TYPE_BIO_BASED; 1116 else if (!strcasecmp(queue_mode_name, "rq") || 1117 !strcasecmp(queue_mode_name, "mq")) 1118 m->queue_mode = DM_TYPE_REQUEST_BASED; 1119 else { 1120 ti->error = "Unknown 'queue_mode' requested"; 1121 r = -EINVAL; 1122 } 1123 argc--; 1124 continue; 1125 } 1126 1127 ti->error = "Unrecognised multipath feature request"; 1128 r = -EINVAL; 1129 } while (argc && !r); 1130 1131 return r; 1132 } 1133 1134 static int multipath_ctr(struct dm_target *ti, unsigned argc, char **argv) 1135 { 1136 /* target arguments */ 1137 static const struct dm_arg _args[] = { 1138 {0, 1024, "invalid number of priority groups"}, 1139 {0, 1024, "invalid initial priority group number"}, 1140 }; 1141 1142 int r; 1143 struct multipath *m; 1144 struct dm_arg_set as; 1145 unsigned pg_count = 0; 1146 unsigned next_pg_num; 1147 unsigned long flags; 1148 1149 as.argc = argc; 1150 as.argv = argv; 1151 1152 m = alloc_multipath(ti); 1153 if (!m) { 1154 ti->error = "can't allocate multipath"; 1155 return -EINVAL; 1156 } 1157 1158 r = parse_features(&as, m); 1159 if (r) 1160 goto bad; 1161 1162 r = alloc_multipath_stage2(ti, m); 1163 if (r) 1164 goto bad; 1165 1166 r = parse_hw_handler(&as, m); 1167 if (r) 1168 goto bad; 1169 1170 r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error); 1171 if (r) 1172 goto bad; 1173 1174 r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error); 1175 if (r) 1176 goto bad; 1177 1178 if ((!m->nr_priority_groups && next_pg_num) || 1179 (m->nr_priority_groups && !next_pg_num)) { 1180 ti->error = "invalid initial priority group"; 1181 r = -EINVAL; 1182 goto bad; 1183 } 1184 1185 /* parse the priority groups */ 1186 while (as.argc) { 1187 struct priority_group *pg; 1188 unsigned nr_valid_paths = atomic_read(&m->nr_valid_paths); 1189 1190 pg = parse_priority_group(&as, m); 1191 if (IS_ERR(pg)) { 1192 r = PTR_ERR(pg); 1193 goto bad; 1194 } 1195 1196 nr_valid_paths += pg->nr_pgpaths; 1197 atomic_set(&m->nr_valid_paths, nr_valid_paths); 1198 1199 list_add_tail(&pg->list, &m->priority_groups); 1200 pg_count++; 1201 pg->pg_num = pg_count; 1202 if (!--next_pg_num) 1203 m->next_pg = pg; 1204 } 1205 1206 if (pg_count != m->nr_priority_groups) { 1207 ti->error = "priority group count mismatch"; 1208 r = -EINVAL; 1209 goto bad; 1210 } 1211 1212 spin_lock_irqsave(&m->lock, flags); 1213 enable_nopath_timeout(m); 1214 spin_unlock_irqrestore(&m->lock, flags); 1215 1216 ti->num_flush_bios = 1; 1217 ti->num_discard_bios = 1; 1218 ti->num_write_same_bios = 1; 1219 ti->num_write_zeroes_bios = 1; 1220 if (m->queue_mode == DM_TYPE_BIO_BASED) 1221 ti->per_io_data_size = multipath_per_bio_data_size(); 1222 else 1223 ti->per_io_data_size = sizeof(struct dm_mpath_io); 1224 1225 return 0; 1226 1227 bad: 1228 free_multipath(m); 1229 return r; 1230 } 1231 1232 static void multipath_wait_for_pg_init_completion(struct multipath *m) 1233 { 1234 DEFINE_WAIT(wait); 1235 1236 while (1) { 1237 prepare_to_wait(&m->pg_init_wait, &wait, TASK_UNINTERRUPTIBLE); 1238 1239 if (!atomic_read(&m->pg_init_in_progress)) 1240 break; 1241 1242 io_schedule(); 1243 } 1244 finish_wait(&m->pg_init_wait, &wait); 1245 } 1246 1247 static void flush_multipath_work(struct multipath *m) 1248 { 1249 if (m->hw_handler_name) { 1250 set_bit(MPATHF_PG_INIT_DISABLED, &m->flags); 1251 smp_mb__after_atomic(); 1252 1253 if (atomic_read(&m->pg_init_in_progress)) 1254 flush_workqueue(kmpath_handlerd); 1255 multipath_wait_for_pg_init_completion(m); 1256 1257 clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags); 1258 smp_mb__after_atomic(); 1259 } 1260 1261 if (m->queue_mode == DM_TYPE_BIO_BASED) 1262 flush_work(&m->process_queued_bios); 1263 flush_work(&m->trigger_event); 1264 } 1265 1266 static void multipath_dtr(struct dm_target *ti) 1267 { 1268 struct multipath *m = ti->private; 1269 1270 disable_nopath_timeout(m); 1271 flush_multipath_work(m); 1272 free_multipath(m); 1273 } 1274 1275 /* 1276 * Take a path out of use. 1277 */ 1278 static int fail_path(struct pgpath *pgpath) 1279 { 1280 unsigned long flags; 1281 struct multipath *m = pgpath->pg->m; 1282 1283 spin_lock_irqsave(&m->lock, flags); 1284 1285 if (!pgpath->is_active) 1286 goto out; 1287 1288 DMWARN("%s: Failing path %s.", 1289 dm_device_name(dm_table_get_md(m->ti->table)), 1290 pgpath->path.dev->name); 1291 1292 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); 1293 pgpath->is_active = false; 1294 pgpath->fail_count++; 1295 1296 atomic_dec(&m->nr_valid_paths); 1297 1298 if (pgpath == m->current_pgpath) 1299 m->current_pgpath = NULL; 1300 1301 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, 1302 pgpath->path.dev->name, atomic_read(&m->nr_valid_paths)); 1303 1304 schedule_work(&m->trigger_event); 1305 1306 enable_nopath_timeout(m); 1307 1308 out: 1309 spin_unlock_irqrestore(&m->lock, flags); 1310 1311 return 0; 1312 } 1313 1314 /* 1315 * Reinstate a previously-failed path 1316 */ 1317 static int reinstate_path(struct pgpath *pgpath) 1318 { 1319 int r = 0, run_queue = 0; 1320 unsigned long flags; 1321 struct multipath *m = pgpath->pg->m; 1322 unsigned nr_valid_paths; 1323 1324 spin_lock_irqsave(&m->lock, flags); 1325 1326 if (pgpath->is_active) 1327 goto out; 1328 1329 DMWARN("%s: Reinstating path %s.", 1330 dm_device_name(dm_table_get_md(m->ti->table)), 1331 pgpath->path.dev->name); 1332 1333 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path); 1334 if (r) 1335 goto out; 1336 1337 pgpath->is_active = true; 1338 1339 nr_valid_paths = atomic_inc_return(&m->nr_valid_paths); 1340 if (nr_valid_paths == 1) { 1341 m->current_pgpath = NULL; 1342 run_queue = 1; 1343 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { 1344 if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) 1345 atomic_inc(&m->pg_init_in_progress); 1346 } 1347 1348 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 1349 pgpath->path.dev->name, nr_valid_paths); 1350 1351 schedule_work(&m->trigger_event); 1352 1353 out: 1354 spin_unlock_irqrestore(&m->lock, flags); 1355 if (run_queue) { 1356 dm_table_run_md_queue_async(m->ti->table); 1357 process_queued_io_list(m); 1358 } 1359 1360 if (pgpath->is_active) 1361 disable_nopath_timeout(m); 1362 1363 return r; 1364 } 1365 1366 /* 1367 * Fail or reinstate all paths that match the provided struct dm_dev. 1368 */ 1369 static int action_dev(struct multipath *m, struct dm_dev *dev, 1370 action_fn action) 1371 { 1372 int r = -EINVAL; 1373 struct pgpath *pgpath; 1374 struct priority_group *pg; 1375 1376 list_for_each_entry(pg, &m->priority_groups, list) { 1377 list_for_each_entry(pgpath, &pg->pgpaths, list) { 1378 if (pgpath->path.dev == dev) 1379 r = action(pgpath); 1380 } 1381 } 1382 1383 return r; 1384 } 1385 1386 /* 1387 * Temporarily try to avoid having to use the specified PG 1388 */ 1389 static void bypass_pg(struct multipath *m, struct priority_group *pg, 1390 bool bypassed) 1391 { 1392 unsigned long flags; 1393 1394 spin_lock_irqsave(&m->lock, flags); 1395 1396 pg->bypassed = bypassed; 1397 m->current_pgpath = NULL; 1398 m->current_pg = NULL; 1399 1400 spin_unlock_irqrestore(&m->lock, flags); 1401 1402 schedule_work(&m->trigger_event); 1403 } 1404 1405 /* 1406 * Switch to using the specified PG from the next I/O that gets mapped 1407 */ 1408 static int switch_pg_num(struct multipath *m, const char *pgstr) 1409 { 1410 struct priority_group *pg; 1411 unsigned pgnum; 1412 unsigned long flags; 1413 char dummy; 1414 1415 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1416 !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) { 1417 DMWARN("invalid PG number supplied to switch_pg_num"); 1418 return -EINVAL; 1419 } 1420 1421 spin_lock_irqsave(&m->lock, flags); 1422 list_for_each_entry(pg, &m->priority_groups, list) { 1423 pg->bypassed = false; 1424 if (--pgnum) 1425 continue; 1426 1427 m->current_pgpath = NULL; 1428 m->current_pg = NULL; 1429 m->next_pg = pg; 1430 } 1431 spin_unlock_irqrestore(&m->lock, flags); 1432 1433 schedule_work(&m->trigger_event); 1434 return 0; 1435 } 1436 1437 /* 1438 * Set/clear bypassed status of a PG. 1439 * PGs are numbered upwards from 1 in the order they were declared. 1440 */ 1441 static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed) 1442 { 1443 struct priority_group *pg; 1444 unsigned pgnum; 1445 char dummy; 1446 1447 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1448 !m->nr_priority_groups || (pgnum > m->nr_priority_groups)) { 1449 DMWARN("invalid PG number supplied to bypass_pg"); 1450 return -EINVAL; 1451 } 1452 1453 list_for_each_entry(pg, &m->priority_groups, list) { 1454 if (!--pgnum) 1455 break; 1456 } 1457 1458 bypass_pg(m, pg, bypassed); 1459 return 0; 1460 } 1461 1462 /* 1463 * Should we retry pg_init immediately? 1464 */ 1465 static bool pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) 1466 { 1467 unsigned long flags; 1468 bool limit_reached = false; 1469 1470 spin_lock_irqsave(&m->lock, flags); 1471 1472 if (atomic_read(&m->pg_init_count) <= m->pg_init_retries && 1473 !test_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) 1474 set_bit(MPATHF_PG_INIT_REQUIRED, &m->flags); 1475 else 1476 limit_reached = true; 1477 1478 spin_unlock_irqrestore(&m->lock, flags); 1479 1480 return limit_reached; 1481 } 1482 1483 static void pg_init_done(void *data, int errors) 1484 { 1485 struct pgpath *pgpath = data; 1486 struct priority_group *pg = pgpath->pg; 1487 struct multipath *m = pg->m; 1488 unsigned long flags; 1489 bool delay_retry = false; 1490 1491 /* device or driver problems */ 1492 switch (errors) { 1493 case SCSI_DH_OK: 1494 break; 1495 case SCSI_DH_NOSYS: 1496 if (!m->hw_handler_name) { 1497 errors = 0; 1498 break; 1499 } 1500 DMERR("Could not failover the device: Handler scsi_dh_%s " 1501 "Error %d.", m->hw_handler_name, errors); 1502 /* 1503 * Fail path for now, so we do not ping pong 1504 */ 1505 fail_path(pgpath); 1506 break; 1507 case SCSI_DH_DEV_TEMP_BUSY: 1508 /* 1509 * Probably doing something like FW upgrade on the 1510 * controller so try the other pg. 1511 */ 1512 bypass_pg(m, pg, true); 1513 break; 1514 case SCSI_DH_RETRY: 1515 /* Wait before retrying. */ 1516 delay_retry = true; 1517 /* fall through */ 1518 case SCSI_DH_IMM_RETRY: 1519 case SCSI_DH_RES_TEMP_UNAVAIL: 1520 if (pg_init_limit_reached(m, pgpath)) 1521 fail_path(pgpath); 1522 errors = 0; 1523 break; 1524 case SCSI_DH_DEV_OFFLINED: 1525 default: 1526 /* 1527 * We probably do not want to fail the path for a device 1528 * error, but this is what the old dm did. In future 1529 * patches we can do more advanced handling. 1530 */ 1531 fail_path(pgpath); 1532 } 1533 1534 spin_lock_irqsave(&m->lock, flags); 1535 if (errors) { 1536 if (pgpath == m->current_pgpath) { 1537 DMERR("Could not failover device. Error %d.", errors); 1538 m->current_pgpath = NULL; 1539 m->current_pg = NULL; 1540 } 1541 } else if (!test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) 1542 pg->bypassed = false; 1543 1544 if (atomic_dec_return(&m->pg_init_in_progress) > 0) 1545 /* Activations of other paths are still on going */ 1546 goto out; 1547 1548 if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) { 1549 if (delay_retry) 1550 set_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags); 1551 else 1552 clear_bit(MPATHF_PG_INIT_DELAY_RETRY, &m->flags); 1553 1554 if (__pg_init_all_paths(m)) 1555 goto out; 1556 } 1557 clear_bit(MPATHF_QUEUE_IO, &m->flags); 1558 1559 process_queued_io_list(m); 1560 1561 /* 1562 * Wake up any thread waiting to suspend. 1563 */ 1564 wake_up(&m->pg_init_wait); 1565 1566 out: 1567 spin_unlock_irqrestore(&m->lock, flags); 1568 } 1569 1570 static void activate_or_offline_path(struct pgpath *pgpath) 1571 { 1572 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); 1573 1574 if (pgpath->is_active && !blk_queue_dying(q)) 1575 scsi_dh_activate(q, pg_init_done, pgpath); 1576 else 1577 pg_init_done(pgpath, SCSI_DH_DEV_OFFLINED); 1578 } 1579 1580 static void activate_path_work(struct work_struct *work) 1581 { 1582 struct pgpath *pgpath = 1583 container_of(work, struct pgpath, activate_path.work); 1584 1585 activate_or_offline_path(pgpath); 1586 } 1587 1588 static int multipath_end_io(struct dm_target *ti, struct request *clone, 1589 blk_status_t error, union map_info *map_context) 1590 { 1591 struct dm_mpath_io *mpio = get_mpio(map_context); 1592 struct pgpath *pgpath = mpio->pgpath; 1593 int r = DM_ENDIO_DONE; 1594 1595 /* 1596 * We don't queue any clone request inside the multipath target 1597 * during end I/O handling, since those clone requests don't have 1598 * bio clones. If we queue them inside the multipath target, 1599 * we need to make bio clones, that requires memory allocation. 1600 * (See drivers/md/dm-rq.c:end_clone_bio() about why the clone requests 1601 * don't have bio clones.) 1602 * Instead of queueing the clone request here, we queue the original 1603 * request into dm core, which will remake a clone request and 1604 * clone bios for it and resubmit it later. 1605 */ 1606 if (error && blk_path_error(error)) { 1607 struct multipath *m = ti->private; 1608 1609 if (error == BLK_STS_RESOURCE) 1610 r = DM_ENDIO_DELAY_REQUEUE; 1611 else 1612 r = DM_ENDIO_REQUEUE; 1613 1614 if (pgpath) 1615 fail_path(pgpath); 1616 1617 if (atomic_read(&m->nr_valid_paths) == 0 && 1618 !must_push_back_rq(m)) { 1619 if (error == BLK_STS_IOERR) 1620 dm_report_EIO(m); 1621 /* complete with the original error */ 1622 r = DM_ENDIO_DONE; 1623 } 1624 } 1625 1626 if (pgpath) { 1627 struct path_selector *ps = &pgpath->pg->ps; 1628 1629 if (ps->type->end_io) 1630 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes, 1631 clone->io_start_time_ns); 1632 } 1633 1634 return r; 1635 } 1636 1637 static int multipath_end_io_bio(struct dm_target *ti, struct bio *clone, 1638 blk_status_t *error) 1639 { 1640 struct multipath *m = ti->private; 1641 struct dm_mpath_io *mpio = get_mpio_from_bio(clone); 1642 struct pgpath *pgpath = mpio->pgpath; 1643 unsigned long flags; 1644 int r = DM_ENDIO_DONE; 1645 1646 if (!*error || !blk_path_error(*error)) 1647 goto done; 1648 1649 if (pgpath) 1650 fail_path(pgpath); 1651 1652 if (atomic_read(&m->nr_valid_paths) == 0 && 1653 !test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) { 1654 if (__must_push_back(m)) { 1655 r = DM_ENDIO_REQUEUE; 1656 } else { 1657 dm_report_EIO(m); 1658 *error = BLK_STS_IOERR; 1659 } 1660 goto done; 1661 } 1662 1663 spin_lock_irqsave(&m->lock, flags); 1664 bio_list_add(&m->queued_bios, clone); 1665 spin_unlock_irqrestore(&m->lock, flags); 1666 if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) 1667 queue_work(kmultipathd, &m->process_queued_bios); 1668 1669 r = DM_ENDIO_INCOMPLETE; 1670 done: 1671 if (pgpath) { 1672 struct path_selector *ps = &pgpath->pg->ps; 1673 1674 if (ps->type->end_io) 1675 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes, 1676 dm_start_time_ns_from_clone(clone)); 1677 } 1678 1679 return r; 1680 } 1681 1682 /* 1683 * Suspend with flush can't complete until all the I/O is processed 1684 * so if the last path fails we must error any remaining I/O. 1685 * - Note that if the freeze_bdev fails while suspending, the 1686 * queue_if_no_path state is lost - userspace should reset it. 1687 * Otherwise, during noflush suspend, queue_if_no_path will not change. 1688 */ 1689 static void multipath_presuspend(struct dm_target *ti) 1690 { 1691 struct multipath *m = ti->private; 1692 1693 /* FIXME: bio-based shouldn't need to always disable queue_if_no_path */ 1694 if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti)) 1695 queue_if_no_path(m, false, true, __func__); 1696 } 1697 1698 static void multipath_postsuspend(struct dm_target *ti) 1699 { 1700 struct multipath *m = ti->private; 1701 1702 mutex_lock(&m->work_mutex); 1703 flush_multipath_work(m); 1704 mutex_unlock(&m->work_mutex); 1705 } 1706 1707 /* 1708 * Restore the queue_if_no_path setting. 1709 */ 1710 static void multipath_resume(struct dm_target *ti) 1711 { 1712 struct multipath *m = ti->private; 1713 unsigned long flags; 1714 1715 spin_lock_irqsave(&m->lock, flags); 1716 if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) { 1717 set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags); 1718 clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags); 1719 } 1720 1721 DMDEBUG("%s: %s finished; QIFNP = %d; SQIFNP = %d", 1722 dm_device_name(dm_table_get_md(m->ti->table)), __func__, 1723 test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags), 1724 test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)); 1725 1726 spin_unlock_irqrestore(&m->lock, flags); 1727 } 1728 1729 /* 1730 * Info output has the following format: 1731 * num_multipath_feature_args [multipath_feature_args]* 1732 * num_handler_status_args [handler_status_args]* 1733 * num_groups init_group_number 1734 * [A|D|E num_ps_status_args [ps_status_args]* 1735 * num_paths num_selector_args 1736 * [path_dev A|F fail_count [selector_args]* ]+ ]+ 1737 * 1738 * Table output has the following format (identical to the constructor string): 1739 * num_feature_args [features_args]* 1740 * num_handler_args hw_handler [hw_handler_args]* 1741 * num_groups init_group_number 1742 * [priority selector-name num_ps_args [ps_args]* 1743 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ 1744 */ 1745 static void multipath_status(struct dm_target *ti, status_type_t type, 1746 unsigned status_flags, char *result, unsigned maxlen) 1747 { 1748 int sz = 0; 1749 unsigned long flags; 1750 struct multipath *m = ti->private; 1751 struct priority_group *pg; 1752 struct pgpath *p; 1753 unsigned pg_num; 1754 char state; 1755 1756 spin_lock_irqsave(&m->lock, flags); 1757 1758 /* Features */ 1759 if (type == STATUSTYPE_INFO) 1760 DMEMIT("2 %u %u ", test_bit(MPATHF_QUEUE_IO, &m->flags), 1761 atomic_read(&m->pg_init_count)); 1762 else { 1763 DMEMIT("%u ", test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags) + 1764 (m->pg_init_retries > 0) * 2 + 1765 (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + 1766 test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags) + 1767 (m->queue_mode != DM_TYPE_REQUEST_BASED) * 2); 1768 1769 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) 1770 DMEMIT("queue_if_no_path "); 1771 if (m->pg_init_retries) 1772 DMEMIT("pg_init_retries %u ", m->pg_init_retries); 1773 if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) 1774 DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); 1775 if (test_bit(MPATHF_RETAIN_ATTACHED_HW_HANDLER, &m->flags)) 1776 DMEMIT("retain_attached_hw_handler "); 1777 if (m->queue_mode != DM_TYPE_REQUEST_BASED) { 1778 switch(m->queue_mode) { 1779 case DM_TYPE_BIO_BASED: 1780 DMEMIT("queue_mode bio "); 1781 break; 1782 default: 1783 WARN_ON_ONCE(true); 1784 break; 1785 } 1786 } 1787 } 1788 1789 if (!m->hw_handler_name || type == STATUSTYPE_INFO) 1790 DMEMIT("0 "); 1791 else 1792 DMEMIT("1 %s ", m->hw_handler_name); 1793 1794 DMEMIT("%u ", m->nr_priority_groups); 1795 1796 if (m->next_pg) 1797 pg_num = m->next_pg->pg_num; 1798 else if (m->current_pg) 1799 pg_num = m->current_pg->pg_num; 1800 else 1801 pg_num = (m->nr_priority_groups ? 1 : 0); 1802 1803 DMEMIT("%u ", pg_num); 1804 1805 switch (type) { 1806 case STATUSTYPE_INFO: 1807 list_for_each_entry(pg, &m->priority_groups, list) { 1808 if (pg->bypassed) 1809 state = 'D'; /* Disabled */ 1810 else if (pg == m->current_pg) 1811 state = 'A'; /* Currently Active */ 1812 else 1813 state = 'E'; /* Enabled */ 1814 1815 DMEMIT("%c ", state); 1816 1817 if (pg->ps.type->status) 1818 sz += pg->ps.type->status(&pg->ps, NULL, type, 1819 result + sz, 1820 maxlen - sz); 1821 else 1822 DMEMIT("0 "); 1823 1824 DMEMIT("%u %u ", pg->nr_pgpaths, 1825 pg->ps.type->info_args); 1826 1827 list_for_each_entry(p, &pg->pgpaths, list) { 1828 DMEMIT("%s %s %u ", p->path.dev->name, 1829 p->is_active ? "A" : "F", 1830 p->fail_count); 1831 if (pg->ps.type->status) 1832 sz += pg->ps.type->status(&pg->ps, 1833 &p->path, type, result + sz, 1834 maxlen - sz); 1835 } 1836 } 1837 break; 1838 1839 case STATUSTYPE_TABLE: 1840 list_for_each_entry(pg, &m->priority_groups, list) { 1841 DMEMIT("%s ", pg->ps.type->name); 1842 1843 if (pg->ps.type->status) 1844 sz += pg->ps.type->status(&pg->ps, NULL, type, 1845 result + sz, 1846 maxlen - sz); 1847 else 1848 DMEMIT("0 "); 1849 1850 DMEMIT("%u %u ", pg->nr_pgpaths, 1851 pg->ps.type->table_args); 1852 1853 list_for_each_entry(p, &pg->pgpaths, list) { 1854 DMEMIT("%s ", p->path.dev->name); 1855 if (pg->ps.type->status) 1856 sz += pg->ps.type->status(&pg->ps, 1857 &p->path, type, result + sz, 1858 maxlen - sz); 1859 } 1860 } 1861 break; 1862 } 1863 1864 spin_unlock_irqrestore(&m->lock, flags); 1865 } 1866 1867 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv, 1868 char *result, unsigned maxlen) 1869 { 1870 int r = -EINVAL; 1871 struct dm_dev *dev; 1872 struct multipath *m = ti->private; 1873 action_fn action; 1874 unsigned long flags; 1875 1876 mutex_lock(&m->work_mutex); 1877 1878 if (dm_suspended(ti)) { 1879 r = -EBUSY; 1880 goto out; 1881 } 1882 1883 if (argc == 1) { 1884 if (!strcasecmp(argv[0], "queue_if_no_path")) { 1885 r = queue_if_no_path(m, true, false, __func__); 1886 spin_lock_irqsave(&m->lock, flags); 1887 enable_nopath_timeout(m); 1888 spin_unlock_irqrestore(&m->lock, flags); 1889 goto out; 1890 } else if (!strcasecmp(argv[0], "fail_if_no_path")) { 1891 r = queue_if_no_path(m, false, false, __func__); 1892 disable_nopath_timeout(m); 1893 goto out; 1894 } 1895 } 1896 1897 if (argc != 2) { 1898 DMWARN("Invalid multipath message arguments. Expected 2 arguments, got %d.", argc); 1899 goto out; 1900 } 1901 1902 if (!strcasecmp(argv[0], "disable_group")) { 1903 r = bypass_pg_num(m, argv[1], true); 1904 goto out; 1905 } else if (!strcasecmp(argv[0], "enable_group")) { 1906 r = bypass_pg_num(m, argv[1], false); 1907 goto out; 1908 } else if (!strcasecmp(argv[0], "switch_group")) { 1909 r = switch_pg_num(m, argv[1]); 1910 goto out; 1911 } else if (!strcasecmp(argv[0], "reinstate_path")) 1912 action = reinstate_path; 1913 else if (!strcasecmp(argv[0], "fail_path")) 1914 action = fail_path; 1915 else { 1916 DMWARN("Unrecognised multipath message received: %s", argv[0]); 1917 goto out; 1918 } 1919 1920 r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev); 1921 if (r) { 1922 DMWARN("message: error getting device %s", 1923 argv[1]); 1924 goto out; 1925 } 1926 1927 r = action_dev(m, dev, action); 1928 1929 dm_put_device(ti, dev); 1930 1931 out: 1932 mutex_unlock(&m->work_mutex); 1933 return r; 1934 } 1935 1936 static int multipath_prepare_ioctl(struct dm_target *ti, 1937 struct block_device **bdev) 1938 { 1939 struct multipath *m = ti->private; 1940 struct pgpath *current_pgpath; 1941 int r; 1942 1943 current_pgpath = READ_ONCE(m->current_pgpath); 1944 if (!current_pgpath || !test_bit(MPATHF_QUEUE_IO, &m->flags)) 1945 current_pgpath = choose_pgpath(m, 0); 1946 1947 if (current_pgpath) { 1948 if (!test_bit(MPATHF_QUEUE_IO, &m->flags)) { 1949 *bdev = current_pgpath->path.dev->bdev; 1950 r = 0; 1951 } else { 1952 /* pg_init has not started or completed */ 1953 r = -ENOTCONN; 1954 } 1955 } else { 1956 /* No path is available */ 1957 if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) 1958 r = -ENOTCONN; 1959 else 1960 r = -EIO; 1961 } 1962 1963 if (r == -ENOTCONN) { 1964 if (!READ_ONCE(m->current_pg)) { 1965 /* Path status changed, redo selection */ 1966 (void) choose_pgpath(m, 0); 1967 } 1968 if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags)) 1969 pg_init_all_paths(m); 1970 dm_table_run_md_queue_async(m->ti->table); 1971 process_queued_io_list(m); 1972 } 1973 1974 /* 1975 * Only pass ioctls through if the device sizes match exactly. 1976 */ 1977 if (!r && ti->len != i_size_read((*bdev)->bd_inode) >> SECTOR_SHIFT) 1978 return 1; 1979 return r; 1980 } 1981 1982 static int multipath_iterate_devices(struct dm_target *ti, 1983 iterate_devices_callout_fn fn, void *data) 1984 { 1985 struct multipath *m = ti->private; 1986 struct priority_group *pg; 1987 struct pgpath *p; 1988 int ret = 0; 1989 1990 list_for_each_entry(pg, &m->priority_groups, list) { 1991 list_for_each_entry(p, &pg->pgpaths, list) { 1992 ret = fn(ti, p->path.dev, ti->begin, ti->len, data); 1993 if (ret) 1994 goto out; 1995 } 1996 } 1997 1998 out: 1999 return ret; 2000 } 2001 2002 static int pgpath_busy(struct pgpath *pgpath) 2003 { 2004 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); 2005 2006 return blk_lld_busy(q); 2007 } 2008 2009 /* 2010 * We return "busy", only when we can map I/Os but underlying devices 2011 * are busy (so even if we map I/Os now, the I/Os will wait on 2012 * the underlying queue). 2013 * In other words, if we want to kill I/Os or queue them inside us 2014 * due to map unavailability, we don't return "busy". Otherwise, 2015 * dm core won't give us the I/Os and we can't do what we want. 2016 */ 2017 static int multipath_busy(struct dm_target *ti) 2018 { 2019 bool busy = false, has_active = false; 2020 struct multipath *m = ti->private; 2021 struct priority_group *pg, *next_pg; 2022 struct pgpath *pgpath; 2023 2024 /* pg_init in progress */ 2025 if (atomic_read(&m->pg_init_in_progress)) 2026 return true; 2027 2028 /* no paths available, for blk-mq: rely on IO mapping to delay requeue */ 2029 if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) 2030 return (m->queue_mode != DM_TYPE_REQUEST_BASED); 2031 2032 /* Guess which priority_group will be used at next mapping time */ 2033 pg = READ_ONCE(m->current_pg); 2034 next_pg = READ_ONCE(m->next_pg); 2035 if (unlikely(!READ_ONCE(m->current_pgpath) && next_pg)) 2036 pg = next_pg; 2037 2038 if (!pg) { 2039 /* 2040 * We don't know which pg will be used at next mapping time. 2041 * We don't call choose_pgpath() here to avoid to trigger 2042 * pg_init just by busy checking. 2043 * So we don't know whether underlying devices we will be using 2044 * at next mapping time are busy or not. Just try mapping. 2045 */ 2046 return busy; 2047 } 2048 2049 /* 2050 * If there is one non-busy active path at least, the path selector 2051 * will be able to select it. So we consider such a pg as not busy. 2052 */ 2053 busy = true; 2054 list_for_each_entry(pgpath, &pg->pgpaths, list) { 2055 if (pgpath->is_active) { 2056 has_active = true; 2057 if (!pgpath_busy(pgpath)) { 2058 busy = false; 2059 break; 2060 } 2061 } 2062 } 2063 2064 if (!has_active) { 2065 /* 2066 * No active path in this pg, so this pg won't be used and 2067 * the current_pg will be changed at next mapping time. 2068 * We need to try mapping to determine it. 2069 */ 2070 busy = false; 2071 } 2072 2073 return busy; 2074 } 2075 2076 /*----------------------------------------------------------------- 2077 * Module setup 2078 *---------------------------------------------------------------*/ 2079 static struct target_type multipath_target = { 2080 .name = "multipath", 2081 .version = {1, 14, 0}, 2082 .features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE | 2083 DM_TARGET_PASSES_INTEGRITY, 2084 .module = THIS_MODULE, 2085 .ctr = multipath_ctr, 2086 .dtr = multipath_dtr, 2087 .clone_and_map_rq = multipath_clone_and_map, 2088 .release_clone_rq = multipath_release_clone, 2089 .rq_end_io = multipath_end_io, 2090 .map = multipath_map_bio, 2091 .end_io = multipath_end_io_bio, 2092 .presuspend = multipath_presuspend, 2093 .postsuspend = multipath_postsuspend, 2094 .resume = multipath_resume, 2095 .status = multipath_status, 2096 .message = multipath_message, 2097 .prepare_ioctl = multipath_prepare_ioctl, 2098 .iterate_devices = multipath_iterate_devices, 2099 .busy = multipath_busy, 2100 }; 2101 2102 static int __init dm_multipath_init(void) 2103 { 2104 int r; 2105 2106 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); 2107 if (!kmultipathd) { 2108 DMERR("failed to create workqueue kmpathd"); 2109 r = -ENOMEM; 2110 goto bad_alloc_kmultipathd; 2111 } 2112 2113 /* 2114 * A separate workqueue is used to handle the device handlers 2115 * to avoid overloading existing workqueue. Overloading the 2116 * old workqueue would also create a bottleneck in the 2117 * path of the storage hardware device activation. 2118 */ 2119 kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd", 2120 WQ_MEM_RECLAIM); 2121 if (!kmpath_handlerd) { 2122 DMERR("failed to create workqueue kmpath_handlerd"); 2123 r = -ENOMEM; 2124 goto bad_alloc_kmpath_handlerd; 2125 } 2126 2127 r = dm_register_target(&multipath_target); 2128 if (r < 0) { 2129 DMERR("request-based register failed %d", r); 2130 r = -EINVAL; 2131 goto bad_register_target; 2132 } 2133 2134 return 0; 2135 2136 bad_register_target: 2137 destroy_workqueue(kmpath_handlerd); 2138 bad_alloc_kmpath_handlerd: 2139 destroy_workqueue(kmultipathd); 2140 bad_alloc_kmultipathd: 2141 return r; 2142 } 2143 2144 static void __exit dm_multipath_exit(void) 2145 { 2146 destroy_workqueue(kmpath_handlerd); 2147 destroy_workqueue(kmultipathd); 2148 2149 dm_unregister_target(&multipath_target); 2150 } 2151 2152 module_init(dm_multipath_init); 2153 module_exit(dm_multipath_exit); 2154 2155 module_param_named(queue_if_no_path_timeout_secs, 2156 queue_if_no_path_timeout_secs, ulong, S_IRUGO | S_IWUSR); 2157 MODULE_PARM_DESC(queue_if_no_path_timeout_secs, "No available paths queue IO timeout in seconds"); 2158 2159 MODULE_DESCRIPTION(DM_NAME " multipath target"); 2160 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); 2161 MODULE_LICENSE("GPL"); 2162