1 /* 2 * Copyright (C) 2003 Sistina Software Limited. 3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include <linux/device-mapper.h> 9 10 #include "dm-path-selector.h" 11 #include "dm-uevent.h" 12 13 #include <linux/ctype.h> 14 #include <linux/init.h> 15 #include <linux/mempool.h> 16 #include <linux/module.h> 17 #include <linux/pagemap.h> 18 #include <linux/slab.h> 19 #include <linux/time.h> 20 #include <linux/workqueue.h> 21 #include <linux/delay.h> 22 #include <scsi/scsi_dh.h> 23 #include <linux/atomic.h> 24 25 #define DM_MSG_PREFIX "multipath" 26 #define DM_PG_INIT_DELAY_MSECS 2000 27 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1) 28 29 /* Path properties */ 30 struct pgpath { 31 struct list_head list; 32 33 struct priority_group *pg; /* Owning PG */ 34 unsigned is_active; /* Path status */ 35 unsigned fail_count; /* Cumulative failure count */ 36 37 struct dm_path path; 38 struct delayed_work activate_path; 39 }; 40 41 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 42 43 /* 44 * Paths are grouped into Priority Groups and numbered from 1 upwards. 45 * Each has a path selector which controls which path gets used. 46 */ 47 struct priority_group { 48 struct list_head list; 49 50 struct multipath *m; /* Owning multipath instance */ 51 struct path_selector ps; 52 53 unsigned pg_num; /* Reference number */ 54 unsigned bypassed; /* Temporarily bypass this PG? */ 55 56 unsigned nr_pgpaths; /* Number of paths in PG */ 57 struct list_head pgpaths; 58 }; 59 60 /* Multipath context */ 61 struct multipath { 62 struct list_head list; 63 struct dm_target *ti; 64 65 const char *hw_handler_name; 66 char *hw_handler_params; 67 68 spinlock_t lock; 69 70 unsigned nr_priority_groups; 71 struct list_head priority_groups; 72 73 wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */ 74 75 unsigned pg_init_required; /* pg_init needs calling? */ 76 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ 77 unsigned pg_init_delay_retry; /* Delay pg_init retry? */ 78 79 unsigned nr_valid_paths; /* Total number of usable paths */ 80 struct pgpath *current_pgpath; 81 struct priority_group *current_pg; 82 struct priority_group *next_pg; /* Switch to this PG if set */ 83 unsigned repeat_count; /* I/Os left before calling PS again */ 84 85 unsigned queue_io:1; /* Must we queue all I/O? */ 86 unsigned queue_if_no_path:1; /* Queue I/O if last path fails? */ 87 unsigned saved_queue_if_no_path:1; /* Saved state during suspension */ 88 unsigned retain_attached_hw_handler:1; /* If there's already a hw_handler present, don't change it. */ 89 90 unsigned pg_init_retries; /* Number of times to retry pg_init */ 91 unsigned pg_init_count; /* Number of times pg_init called */ 92 unsigned pg_init_delay_msecs; /* Number of msecs before pg_init retry */ 93 94 unsigned queue_size; 95 struct work_struct process_queued_ios; 96 struct list_head queued_ios; 97 98 struct work_struct trigger_event; 99 100 /* 101 * We must use a mempool of dm_mpath_io structs so that we 102 * can resubmit bios on error. 103 */ 104 mempool_t *mpio_pool; 105 106 struct mutex work_mutex; 107 }; 108 109 /* 110 * Context information attached to each bio we process. 111 */ 112 struct dm_mpath_io { 113 struct pgpath *pgpath; 114 size_t nr_bytes; 115 }; 116 117 typedef int (*action_fn) (struct pgpath *pgpath); 118 119 #define MIN_IOS 256 /* Mempool size */ 120 121 static struct kmem_cache *_mpio_cache; 122 123 static struct workqueue_struct *kmultipathd, *kmpath_handlerd; 124 static void process_queued_ios(struct work_struct *work); 125 static void trigger_event(struct work_struct *work); 126 static void activate_path(struct work_struct *work); 127 128 129 /*----------------------------------------------- 130 * Allocation routines 131 *-----------------------------------------------*/ 132 133 static struct pgpath *alloc_pgpath(void) 134 { 135 struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL); 136 137 if (pgpath) { 138 pgpath->is_active = 1; 139 INIT_DELAYED_WORK(&pgpath->activate_path, activate_path); 140 } 141 142 return pgpath; 143 } 144 145 static void free_pgpath(struct pgpath *pgpath) 146 { 147 kfree(pgpath); 148 } 149 150 static struct priority_group *alloc_priority_group(void) 151 { 152 struct priority_group *pg; 153 154 pg = kzalloc(sizeof(*pg), GFP_KERNEL); 155 156 if (pg) 157 INIT_LIST_HEAD(&pg->pgpaths); 158 159 return pg; 160 } 161 162 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) 163 { 164 struct pgpath *pgpath, *tmp; 165 struct multipath *m = ti->private; 166 167 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { 168 list_del(&pgpath->list); 169 if (m->hw_handler_name) 170 scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); 171 dm_put_device(ti, pgpath->path.dev); 172 free_pgpath(pgpath); 173 } 174 } 175 176 static void free_priority_group(struct priority_group *pg, 177 struct dm_target *ti) 178 { 179 struct path_selector *ps = &pg->ps; 180 181 if (ps->type) { 182 ps->type->destroy(ps); 183 dm_put_path_selector(ps->type); 184 } 185 186 free_pgpaths(&pg->pgpaths, ti); 187 kfree(pg); 188 } 189 190 static struct multipath *alloc_multipath(struct dm_target *ti) 191 { 192 struct multipath *m; 193 194 m = kzalloc(sizeof(*m), GFP_KERNEL); 195 if (m) { 196 INIT_LIST_HEAD(&m->priority_groups); 197 INIT_LIST_HEAD(&m->queued_ios); 198 spin_lock_init(&m->lock); 199 m->queue_io = 1; 200 m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT; 201 INIT_WORK(&m->process_queued_ios, process_queued_ios); 202 INIT_WORK(&m->trigger_event, trigger_event); 203 init_waitqueue_head(&m->pg_init_wait); 204 mutex_init(&m->work_mutex); 205 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 206 if (!m->mpio_pool) { 207 kfree(m); 208 return NULL; 209 } 210 m->ti = ti; 211 ti->private = m; 212 } 213 214 return m; 215 } 216 217 static void free_multipath(struct multipath *m) 218 { 219 struct priority_group *pg, *tmp; 220 221 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { 222 list_del(&pg->list); 223 free_priority_group(pg, m->ti); 224 } 225 226 kfree(m->hw_handler_name); 227 kfree(m->hw_handler_params); 228 mempool_destroy(m->mpio_pool); 229 kfree(m); 230 } 231 232 static int set_mapinfo(struct multipath *m, union map_info *info) 233 { 234 struct dm_mpath_io *mpio; 235 236 mpio = mempool_alloc(m->mpio_pool, GFP_ATOMIC); 237 if (!mpio) 238 return -ENOMEM; 239 240 memset(mpio, 0, sizeof(*mpio)); 241 info->ptr = mpio; 242 243 return 0; 244 } 245 246 static void clear_mapinfo(struct multipath *m, union map_info *info) 247 { 248 struct dm_mpath_io *mpio = info->ptr; 249 250 info->ptr = NULL; 251 mempool_free(mpio, m->mpio_pool); 252 } 253 254 /*----------------------------------------------- 255 * Path selection 256 *-----------------------------------------------*/ 257 258 static void __pg_init_all_paths(struct multipath *m) 259 { 260 struct pgpath *pgpath; 261 unsigned long pg_init_delay = 0; 262 263 m->pg_init_count++; 264 m->pg_init_required = 0; 265 if (m->pg_init_delay_retry) 266 pg_init_delay = msecs_to_jiffies(m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT ? 267 m->pg_init_delay_msecs : DM_PG_INIT_DELAY_MSECS); 268 list_for_each_entry(pgpath, &m->current_pg->pgpaths, list) { 269 /* Skip failed paths */ 270 if (!pgpath->is_active) 271 continue; 272 if (queue_delayed_work(kmpath_handlerd, &pgpath->activate_path, 273 pg_init_delay)) 274 m->pg_init_in_progress++; 275 } 276 } 277 278 static void __switch_pg(struct multipath *m, struct pgpath *pgpath) 279 { 280 m->current_pg = pgpath->pg; 281 282 /* Must we initialise the PG first, and queue I/O till it's ready? */ 283 if (m->hw_handler_name) { 284 m->pg_init_required = 1; 285 m->queue_io = 1; 286 } else { 287 m->pg_init_required = 0; 288 m->queue_io = 0; 289 } 290 291 m->pg_init_count = 0; 292 } 293 294 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg, 295 size_t nr_bytes) 296 { 297 struct dm_path *path; 298 299 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count, nr_bytes); 300 if (!path) 301 return -ENXIO; 302 303 m->current_pgpath = path_to_pgpath(path); 304 305 if (m->current_pg != pg) 306 __switch_pg(m, m->current_pgpath); 307 308 return 0; 309 } 310 311 static void __choose_pgpath(struct multipath *m, size_t nr_bytes) 312 { 313 struct priority_group *pg; 314 unsigned bypassed = 1; 315 316 if (!m->nr_valid_paths) 317 goto failed; 318 319 /* Were we instructed to switch PG? */ 320 if (m->next_pg) { 321 pg = m->next_pg; 322 m->next_pg = NULL; 323 if (!__choose_path_in_pg(m, pg, nr_bytes)) 324 return; 325 } 326 327 /* Don't change PG until it has no remaining paths */ 328 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg, nr_bytes)) 329 return; 330 331 /* 332 * Loop through priority groups until we find a valid path. 333 * First time we skip PGs marked 'bypassed'. 334 * Second time we only try the ones we skipped, but set 335 * pg_init_delay_retry so we do not hammer controllers. 336 */ 337 do { 338 list_for_each_entry(pg, &m->priority_groups, list) { 339 if (pg->bypassed == bypassed) 340 continue; 341 if (!__choose_path_in_pg(m, pg, nr_bytes)) { 342 if (!bypassed) 343 m->pg_init_delay_retry = 1; 344 return; 345 } 346 } 347 } while (bypassed--); 348 349 failed: 350 m->current_pgpath = NULL; 351 m->current_pg = NULL; 352 } 353 354 /* 355 * Check whether bios must be queued in the device-mapper core rather 356 * than here in the target. 357 * 358 * m->lock must be held on entry. 359 * 360 * If m->queue_if_no_path and m->saved_queue_if_no_path hold the 361 * same value then we are not between multipath_presuspend() 362 * and multipath_resume() calls and we have no need to check 363 * for the DMF_NOFLUSH_SUSPENDING flag. 364 */ 365 static int __must_push_back(struct multipath *m) 366 { 367 return (m->queue_if_no_path != m->saved_queue_if_no_path && 368 dm_noflush_suspending(m->ti)); 369 } 370 371 static int map_io(struct multipath *m, struct request *clone, 372 union map_info *map_context, unsigned was_queued) 373 { 374 int r = DM_MAPIO_REMAPPED; 375 size_t nr_bytes = blk_rq_bytes(clone); 376 unsigned long flags; 377 struct pgpath *pgpath; 378 struct block_device *bdev; 379 struct dm_mpath_io *mpio = map_context->ptr; 380 381 spin_lock_irqsave(&m->lock, flags); 382 383 /* Do we need to select a new pgpath? */ 384 if (!m->current_pgpath || 385 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0))) 386 __choose_pgpath(m, nr_bytes); 387 388 pgpath = m->current_pgpath; 389 390 if (was_queued) 391 m->queue_size--; 392 393 if ((pgpath && m->queue_io) || 394 (!pgpath && m->queue_if_no_path)) { 395 /* Queue for the daemon to resubmit */ 396 list_add_tail(&clone->queuelist, &m->queued_ios); 397 m->queue_size++; 398 if ((m->pg_init_required && !m->pg_init_in_progress) || 399 !m->queue_io) 400 queue_work(kmultipathd, &m->process_queued_ios); 401 pgpath = NULL; 402 r = DM_MAPIO_SUBMITTED; 403 } else if (pgpath) { 404 bdev = pgpath->path.dev->bdev; 405 clone->q = bdev_get_queue(bdev); 406 clone->rq_disk = bdev->bd_disk; 407 } else if (__must_push_back(m)) 408 r = DM_MAPIO_REQUEUE; 409 else 410 r = -EIO; /* Failed */ 411 412 mpio->pgpath = pgpath; 413 mpio->nr_bytes = nr_bytes; 414 415 if (r == DM_MAPIO_REMAPPED && pgpath->pg->ps.type->start_io) 416 pgpath->pg->ps.type->start_io(&pgpath->pg->ps, &pgpath->path, 417 nr_bytes); 418 419 spin_unlock_irqrestore(&m->lock, flags); 420 421 return r; 422 } 423 424 /* 425 * If we run out of usable paths, should we queue I/O or error it? 426 */ 427 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path, 428 unsigned save_old_value) 429 { 430 unsigned long flags; 431 432 spin_lock_irqsave(&m->lock, flags); 433 434 if (save_old_value) 435 m->saved_queue_if_no_path = m->queue_if_no_path; 436 else 437 m->saved_queue_if_no_path = queue_if_no_path; 438 m->queue_if_no_path = queue_if_no_path; 439 if (!m->queue_if_no_path && m->queue_size) 440 queue_work(kmultipathd, &m->process_queued_ios); 441 442 spin_unlock_irqrestore(&m->lock, flags); 443 444 return 0; 445 } 446 447 /*----------------------------------------------------------------- 448 * The multipath daemon is responsible for resubmitting queued ios. 449 *---------------------------------------------------------------*/ 450 451 static void dispatch_queued_ios(struct multipath *m) 452 { 453 int r; 454 unsigned long flags; 455 union map_info *info; 456 struct request *clone, *n; 457 LIST_HEAD(cl); 458 459 spin_lock_irqsave(&m->lock, flags); 460 list_splice_init(&m->queued_ios, &cl); 461 spin_unlock_irqrestore(&m->lock, flags); 462 463 list_for_each_entry_safe(clone, n, &cl, queuelist) { 464 list_del_init(&clone->queuelist); 465 466 info = dm_get_rq_mapinfo(clone); 467 468 r = map_io(m, clone, info, 1); 469 if (r < 0) { 470 clear_mapinfo(m, info); 471 dm_kill_unmapped_request(clone, r); 472 } else if (r == DM_MAPIO_REMAPPED) 473 dm_dispatch_request(clone); 474 else if (r == DM_MAPIO_REQUEUE) { 475 clear_mapinfo(m, info); 476 dm_requeue_unmapped_request(clone); 477 } 478 } 479 } 480 481 static void process_queued_ios(struct work_struct *work) 482 { 483 struct multipath *m = 484 container_of(work, struct multipath, process_queued_ios); 485 struct pgpath *pgpath = NULL; 486 unsigned must_queue = 1; 487 unsigned long flags; 488 489 spin_lock_irqsave(&m->lock, flags); 490 491 if (!m->current_pgpath) 492 __choose_pgpath(m, 0); 493 494 pgpath = m->current_pgpath; 495 496 if ((pgpath && !m->queue_io) || 497 (!pgpath && !m->queue_if_no_path)) 498 must_queue = 0; 499 500 if (m->pg_init_required && !m->pg_init_in_progress && pgpath) 501 __pg_init_all_paths(m); 502 503 spin_unlock_irqrestore(&m->lock, flags); 504 if (!must_queue) 505 dispatch_queued_ios(m); 506 } 507 508 /* 509 * An event is triggered whenever a path is taken out of use. 510 * Includes path failure and PG bypass. 511 */ 512 static void trigger_event(struct work_struct *work) 513 { 514 struct multipath *m = 515 container_of(work, struct multipath, trigger_event); 516 517 dm_table_event(m->ti->table); 518 } 519 520 /*----------------------------------------------------------------- 521 * Constructor/argument parsing: 522 * <#multipath feature args> [<arg>]* 523 * <#hw_handler args> [hw_handler [<arg>]*] 524 * <#priority groups> 525 * <initial priority group> 526 * [<selector> <#selector args> [<arg>]* 527 * <#paths> <#per-path selector args> 528 * [<path> [<arg>]* ]+ ]+ 529 *---------------------------------------------------------------*/ 530 static int parse_path_selector(struct dm_arg_set *as, struct priority_group *pg, 531 struct dm_target *ti) 532 { 533 int r; 534 struct path_selector_type *pst; 535 unsigned ps_argc; 536 537 static struct dm_arg _args[] = { 538 {0, 1024, "invalid number of path selector args"}, 539 }; 540 541 pst = dm_get_path_selector(dm_shift_arg(as)); 542 if (!pst) { 543 ti->error = "unknown path selector type"; 544 return -EINVAL; 545 } 546 547 r = dm_read_arg_group(_args, as, &ps_argc, &ti->error); 548 if (r) { 549 dm_put_path_selector(pst); 550 return -EINVAL; 551 } 552 553 r = pst->create(&pg->ps, ps_argc, as->argv); 554 if (r) { 555 dm_put_path_selector(pst); 556 ti->error = "path selector constructor failed"; 557 return r; 558 } 559 560 pg->ps.type = pst; 561 dm_consume_args(as, ps_argc); 562 563 return 0; 564 } 565 566 static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps, 567 struct dm_target *ti) 568 { 569 int r; 570 struct pgpath *p; 571 struct multipath *m = ti->private; 572 struct request_queue *q = NULL; 573 const char *attached_handler_name; 574 575 /* we need at least a path arg */ 576 if (as->argc < 1) { 577 ti->error = "no device given"; 578 return ERR_PTR(-EINVAL); 579 } 580 581 p = alloc_pgpath(); 582 if (!p) 583 return ERR_PTR(-ENOMEM); 584 585 r = dm_get_device(ti, dm_shift_arg(as), dm_table_get_mode(ti->table), 586 &p->path.dev); 587 if (r) { 588 ti->error = "error getting device"; 589 goto bad; 590 } 591 592 if (m->retain_attached_hw_handler || m->hw_handler_name) 593 q = bdev_get_queue(p->path.dev->bdev); 594 595 if (m->retain_attached_hw_handler) { 596 attached_handler_name = scsi_dh_attached_handler_name(q, GFP_KERNEL); 597 if (attached_handler_name) { 598 /* 599 * Reset hw_handler_name to match the attached handler 600 * and clear any hw_handler_params associated with the 601 * ignored handler. 602 * 603 * NB. This modifies the table line to show the actual 604 * handler instead of the original table passed in. 605 */ 606 kfree(m->hw_handler_name); 607 m->hw_handler_name = attached_handler_name; 608 609 kfree(m->hw_handler_params); 610 m->hw_handler_params = NULL; 611 } 612 } 613 614 if (m->hw_handler_name) { 615 /* 616 * Increments scsi_dh reference, even when using an 617 * already-attached handler. 618 */ 619 r = scsi_dh_attach(q, m->hw_handler_name); 620 if (r == -EBUSY) { 621 /* 622 * Already attached to different hw_handler: 623 * try to reattach with correct one. 624 */ 625 scsi_dh_detach(q); 626 r = scsi_dh_attach(q, m->hw_handler_name); 627 } 628 629 if (r < 0) { 630 ti->error = "error attaching hardware handler"; 631 dm_put_device(ti, p->path.dev); 632 goto bad; 633 } 634 635 if (m->hw_handler_params) { 636 r = scsi_dh_set_params(q, m->hw_handler_params); 637 if (r < 0) { 638 ti->error = "unable to set hardware " 639 "handler parameters"; 640 scsi_dh_detach(q); 641 dm_put_device(ti, p->path.dev); 642 goto bad; 643 } 644 } 645 } 646 647 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); 648 if (r) { 649 dm_put_device(ti, p->path.dev); 650 goto bad; 651 } 652 653 return p; 654 655 bad: 656 free_pgpath(p); 657 return ERR_PTR(r); 658 } 659 660 static struct priority_group *parse_priority_group(struct dm_arg_set *as, 661 struct multipath *m) 662 { 663 static struct dm_arg _args[] = { 664 {1, 1024, "invalid number of paths"}, 665 {0, 1024, "invalid number of selector args"} 666 }; 667 668 int r; 669 unsigned i, nr_selector_args, nr_args; 670 struct priority_group *pg; 671 struct dm_target *ti = m->ti; 672 673 if (as->argc < 2) { 674 as->argc = 0; 675 ti->error = "not enough priority group arguments"; 676 return ERR_PTR(-EINVAL); 677 } 678 679 pg = alloc_priority_group(); 680 if (!pg) { 681 ti->error = "couldn't allocate priority group"; 682 return ERR_PTR(-ENOMEM); 683 } 684 pg->m = m; 685 686 r = parse_path_selector(as, pg, ti); 687 if (r) 688 goto bad; 689 690 /* 691 * read the paths 692 */ 693 r = dm_read_arg(_args, as, &pg->nr_pgpaths, &ti->error); 694 if (r) 695 goto bad; 696 697 r = dm_read_arg(_args + 1, as, &nr_selector_args, &ti->error); 698 if (r) 699 goto bad; 700 701 nr_args = 1 + nr_selector_args; 702 for (i = 0; i < pg->nr_pgpaths; i++) { 703 struct pgpath *pgpath; 704 struct dm_arg_set path_args; 705 706 if (as->argc < nr_args) { 707 ti->error = "not enough path parameters"; 708 r = -EINVAL; 709 goto bad; 710 } 711 712 path_args.argc = nr_args; 713 path_args.argv = as->argv; 714 715 pgpath = parse_path(&path_args, &pg->ps, ti); 716 if (IS_ERR(pgpath)) { 717 r = PTR_ERR(pgpath); 718 goto bad; 719 } 720 721 pgpath->pg = pg; 722 list_add_tail(&pgpath->list, &pg->pgpaths); 723 dm_consume_args(as, nr_args); 724 } 725 726 return pg; 727 728 bad: 729 free_priority_group(pg, ti); 730 return ERR_PTR(r); 731 } 732 733 static int parse_hw_handler(struct dm_arg_set *as, struct multipath *m) 734 { 735 unsigned hw_argc; 736 int ret; 737 struct dm_target *ti = m->ti; 738 739 static struct dm_arg _args[] = { 740 {0, 1024, "invalid number of hardware handler args"}, 741 }; 742 743 if (dm_read_arg_group(_args, as, &hw_argc, &ti->error)) 744 return -EINVAL; 745 746 if (!hw_argc) 747 return 0; 748 749 m->hw_handler_name = kstrdup(dm_shift_arg(as), GFP_KERNEL); 750 if (!try_then_request_module(scsi_dh_handler_exist(m->hw_handler_name), 751 "scsi_dh_%s", m->hw_handler_name)) { 752 ti->error = "unknown hardware handler type"; 753 ret = -EINVAL; 754 goto fail; 755 } 756 757 if (hw_argc > 1) { 758 char *p; 759 int i, j, len = 4; 760 761 for (i = 0; i <= hw_argc - 2; i++) 762 len += strlen(as->argv[i]) + 1; 763 p = m->hw_handler_params = kzalloc(len, GFP_KERNEL); 764 if (!p) { 765 ti->error = "memory allocation failed"; 766 ret = -ENOMEM; 767 goto fail; 768 } 769 j = sprintf(p, "%d", hw_argc - 1); 770 for (i = 0, p+=j+1; i <= hw_argc - 2; i++, p+=j+1) 771 j = sprintf(p, "%s", as->argv[i]); 772 } 773 dm_consume_args(as, hw_argc - 1); 774 775 return 0; 776 fail: 777 kfree(m->hw_handler_name); 778 m->hw_handler_name = NULL; 779 return ret; 780 } 781 782 static int parse_features(struct dm_arg_set *as, struct multipath *m) 783 { 784 int r; 785 unsigned argc; 786 struct dm_target *ti = m->ti; 787 const char *arg_name; 788 789 static struct dm_arg _args[] = { 790 {0, 6, "invalid number of feature args"}, 791 {1, 50, "pg_init_retries must be between 1 and 50"}, 792 {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, 793 }; 794 795 r = dm_read_arg_group(_args, as, &argc, &ti->error); 796 if (r) 797 return -EINVAL; 798 799 if (!argc) 800 return 0; 801 802 do { 803 arg_name = dm_shift_arg(as); 804 argc--; 805 806 if (!strcasecmp(arg_name, "queue_if_no_path")) { 807 r = queue_if_no_path(m, 1, 0); 808 continue; 809 } 810 811 if (!strcasecmp(arg_name, "retain_attached_hw_handler")) { 812 m->retain_attached_hw_handler = 1; 813 continue; 814 } 815 816 if (!strcasecmp(arg_name, "pg_init_retries") && 817 (argc >= 1)) { 818 r = dm_read_arg(_args + 1, as, &m->pg_init_retries, &ti->error); 819 argc--; 820 continue; 821 } 822 823 if (!strcasecmp(arg_name, "pg_init_delay_msecs") && 824 (argc >= 1)) { 825 r = dm_read_arg(_args + 2, as, &m->pg_init_delay_msecs, &ti->error); 826 argc--; 827 continue; 828 } 829 830 ti->error = "Unrecognised multipath feature request"; 831 r = -EINVAL; 832 } while (argc && !r); 833 834 return r; 835 } 836 837 static int multipath_ctr(struct dm_target *ti, unsigned int argc, 838 char **argv) 839 { 840 /* target arguments */ 841 static struct dm_arg _args[] = { 842 {0, 1024, "invalid number of priority groups"}, 843 {0, 1024, "invalid initial priority group number"}, 844 }; 845 846 int r; 847 struct multipath *m; 848 struct dm_arg_set as; 849 unsigned pg_count = 0; 850 unsigned next_pg_num; 851 852 as.argc = argc; 853 as.argv = argv; 854 855 m = alloc_multipath(ti); 856 if (!m) { 857 ti->error = "can't allocate multipath"; 858 return -EINVAL; 859 } 860 861 r = parse_features(&as, m); 862 if (r) 863 goto bad; 864 865 r = parse_hw_handler(&as, m); 866 if (r) 867 goto bad; 868 869 r = dm_read_arg(_args, &as, &m->nr_priority_groups, &ti->error); 870 if (r) 871 goto bad; 872 873 r = dm_read_arg(_args + 1, &as, &next_pg_num, &ti->error); 874 if (r) 875 goto bad; 876 877 if ((!m->nr_priority_groups && next_pg_num) || 878 (m->nr_priority_groups && !next_pg_num)) { 879 ti->error = "invalid initial priority group"; 880 r = -EINVAL; 881 goto bad; 882 } 883 884 /* parse the priority groups */ 885 while (as.argc) { 886 struct priority_group *pg; 887 888 pg = parse_priority_group(&as, m); 889 if (IS_ERR(pg)) { 890 r = PTR_ERR(pg); 891 goto bad; 892 } 893 894 m->nr_valid_paths += pg->nr_pgpaths; 895 list_add_tail(&pg->list, &m->priority_groups); 896 pg_count++; 897 pg->pg_num = pg_count; 898 if (!--next_pg_num) 899 m->next_pg = pg; 900 } 901 902 if (pg_count != m->nr_priority_groups) { 903 ti->error = "priority group count mismatch"; 904 r = -EINVAL; 905 goto bad; 906 } 907 908 ti->num_flush_requests = 1; 909 ti->num_discard_requests = 1; 910 911 return 0; 912 913 bad: 914 free_multipath(m); 915 return r; 916 } 917 918 static void multipath_wait_for_pg_init_completion(struct multipath *m) 919 { 920 DECLARE_WAITQUEUE(wait, current); 921 unsigned long flags; 922 923 add_wait_queue(&m->pg_init_wait, &wait); 924 925 while (1) { 926 set_current_state(TASK_UNINTERRUPTIBLE); 927 928 spin_lock_irqsave(&m->lock, flags); 929 if (!m->pg_init_in_progress) { 930 spin_unlock_irqrestore(&m->lock, flags); 931 break; 932 } 933 spin_unlock_irqrestore(&m->lock, flags); 934 935 io_schedule(); 936 } 937 set_current_state(TASK_RUNNING); 938 939 remove_wait_queue(&m->pg_init_wait, &wait); 940 } 941 942 static void flush_multipath_work(struct multipath *m) 943 { 944 flush_workqueue(kmpath_handlerd); 945 multipath_wait_for_pg_init_completion(m); 946 flush_workqueue(kmultipathd); 947 flush_work(&m->trigger_event); 948 } 949 950 static void multipath_dtr(struct dm_target *ti) 951 { 952 struct multipath *m = ti->private; 953 954 flush_multipath_work(m); 955 free_multipath(m); 956 } 957 958 /* 959 * Map cloned requests 960 */ 961 static int multipath_map(struct dm_target *ti, struct request *clone, 962 union map_info *map_context) 963 { 964 int r; 965 struct multipath *m = (struct multipath *) ti->private; 966 967 if (set_mapinfo(m, map_context) < 0) 968 /* ENOMEM, requeue */ 969 return DM_MAPIO_REQUEUE; 970 971 clone->cmd_flags |= REQ_FAILFAST_TRANSPORT; 972 r = map_io(m, clone, map_context, 0); 973 if (r < 0 || r == DM_MAPIO_REQUEUE) 974 clear_mapinfo(m, map_context); 975 976 return r; 977 } 978 979 /* 980 * Take a path out of use. 981 */ 982 static int fail_path(struct pgpath *pgpath) 983 { 984 unsigned long flags; 985 struct multipath *m = pgpath->pg->m; 986 987 spin_lock_irqsave(&m->lock, flags); 988 989 if (!pgpath->is_active) 990 goto out; 991 992 DMWARN("Failing path %s.", pgpath->path.dev->name); 993 994 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); 995 pgpath->is_active = 0; 996 pgpath->fail_count++; 997 998 m->nr_valid_paths--; 999 1000 if (pgpath == m->current_pgpath) 1001 m->current_pgpath = NULL; 1002 1003 dm_path_uevent(DM_UEVENT_PATH_FAILED, m->ti, 1004 pgpath->path.dev->name, m->nr_valid_paths); 1005 1006 schedule_work(&m->trigger_event); 1007 1008 out: 1009 spin_unlock_irqrestore(&m->lock, flags); 1010 1011 return 0; 1012 } 1013 1014 /* 1015 * Reinstate a previously-failed path 1016 */ 1017 static int reinstate_path(struct pgpath *pgpath) 1018 { 1019 int r = 0; 1020 unsigned long flags; 1021 struct multipath *m = pgpath->pg->m; 1022 1023 spin_lock_irqsave(&m->lock, flags); 1024 1025 if (pgpath->is_active) 1026 goto out; 1027 1028 if (!pgpath->pg->ps.type->reinstate_path) { 1029 DMWARN("Reinstate path not supported by path selector %s", 1030 pgpath->pg->ps.type->name); 1031 r = -EINVAL; 1032 goto out; 1033 } 1034 1035 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path); 1036 if (r) 1037 goto out; 1038 1039 pgpath->is_active = 1; 1040 1041 if (!m->nr_valid_paths++ && m->queue_size) { 1042 m->current_pgpath = NULL; 1043 queue_work(kmultipathd, &m->process_queued_ios); 1044 } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { 1045 if (queue_work(kmpath_handlerd, &pgpath->activate_path.work)) 1046 m->pg_init_in_progress++; 1047 } 1048 1049 dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, 1050 pgpath->path.dev->name, m->nr_valid_paths); 1051 1052 schedule_work(&m->trigger_event); 1053 1054 out: 1055 spin_unlock_irqrestore(&m->lock, flags); 1056 1057 return r; 1058 } 1059 1060 /* 1061 * Fail or reinstate all paths that match the provided struct dm_dev. 1062 */ 1063 static int action_dev(struct multipath *m, struct dm_dev *dev, 1064 action_fn action) 1065 { 1066 int r = -EINVAL; 1067 struct pgpath *pgpath; 1068 struct priority_group *pg; 1069 1070 list_for_each_entry(pg, &m->priority_groups, list) { 1071 list_for_each_entry(pgpath, &pg->pgpaths, list) { 1072 if (pgpath->path.dev == dev) 1073 r = action(pgpath); 1074 } 1075 } 1076 1077 return r; 1078 } 1079 1080 /* 1081 * Temporarily try to avoid having to use the specified PG 1082 */ 1083 static void bypass_pg(struct multipath *m, struct priority_group *pg, 1084 int bypassed) 1085 { 1086 unsigned long flags; 1087 1088 spin_lock_irqsave(&m->lock, flags); 1089 1090 pg->bypassed = bypassed; 1091 m->current_pgpath = NULL; 1092 m->current_pg = NULL; 1093 1094 spin_unlock_irqrestore(&m->lock, flags); 1095 1096 schedule_work(&m->trigger_event); 1097 } 1098 1099 /* 1100 * Switch to using the specified PG from the next I/O that gets mapped 1101 */ 1102 static int switch_pg_num(struct multipath *m, const char *pgstr) 1103 { 1104 struct priority_group *pg; 1105 unsigned pgnum; 1106 unsigned long flags; 1107 char dummy; 1108 1109 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1110 (pgnum > m->nr_priority_groups)) { 1111 DMWARN("invalid PG number supplied to switch_pg_num"); 1112 return -EINVAL; 1113 } 1114 1115 spin_lock_irqsave(&m->lock, flags); 1116 list_for_each_entry(pg, &m->priority_groups, list) { 1117 pg->bypassed = 0; 1118 if (--pgnum) 1119 continue; 1120 1121 m->current_pgpath = NULL; 1122 m->current_pg = NULL; 1123 m->next_pg = pg; 1124 } 1125 spin_unlock_irqrestore(&m->lock, flags); 1126 1127 schedule_work(&m->trigger_event); 1128 return 0; 1129 } 1130 1131 /* 1132 * Set/clear bypassed status of a PG. 1133 * PGs are numbered upwards from 1 in the order they were declared. 1134 */ 1135 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) 1136 { 1137 struct priority_group *pg; 1138 unsigned pgnum; 1139 char dummy; 1140 1141 if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum || 1142 (pgnum > m->nr_priority_groups)) { 1143 DMWARN("invalid PG number supplied to bypass_pg"); 1144 return -EINVAL; 1145 } 1146 1147 list_for_each_entry(pg, &m->priority_groups, list) { 1148 if (!--pgnum) 1149 break; 1150 } 1151 1152 bypass_pg(m, pg, bypassed); 1153 return 0; 1154 } 1155 1156 /* 1157 * Should we retry pg_init immediately? 1158 */ 1159 static int pg_init_limit_reached(struct multipath *m, struct pgpath *pgpath) 1160 { 1161 unsigned long flags; 1162 int limit_reached = 0; 1163 1164 spin_lock_irqsave(&m->lock, flags); 1165 1166 if (m->pg_init_count <= m->pg_init_retries) 1167 m->pg_init_required = 1; 1168 else 1169 limit_reached = 1; 1170 1171 spin_unlock_irqrestore(&m->lock, flags); 1172 1173 return limit_reached; 1174 } 1175 1176 static void pg_init_done(void *data, int errors) 1177 { 1178 struct pgpath *pgpath = data; 1179 struct priority_group *pg = pgpath->pg; 1180 struct multipath *m = pg->m; 1181 unsigned long flags; 1182 unsigned delay_retry = 0; 1183 1184 /* device or driver problems */ 1185 switch (errors) { 1186 case SCSI_DH_OK: 1187 break; 1188 case SCSI_DH_NOSYS: 1189 if (!m->hw_handler_name) { 1190 errors = 0; 1191 break; 1192 } 1193 DMERR("Could not failover the device: Handler scsi_dh_%s " 1194 "Error %d.", m->hw_handler_name, errors); 1195 /* 1196 * Fail path for now, so we do not ping pong 1197 */ 1198 fail_path(pgpath); 1199 break; 1200 case SCSI_DH_DEV_TEMP_BUSY: 1201 /* 1202 * Probably doing something like FW upgrade on the 1203 * controller so try the other pg. 1204 */ 1205 bypass_pg(m, pg, 1); 1206 break; 1207 case SCSI_DH_RETRY: 1208 /* Wait before retrying. */ 1209 delay_retry = 1; 1210 case SCSI_DH_IMM_RETRY: 1211 case SCSI_DH_RES_TEMP_UNAVAIL: 1212 if (pg_init_limit_reached(m, pgpath)) 1213 fail_path(pgpath); 1214 errors = 0; 1215 break; 1216 default: 1217 /* 1218 * We probably do not want to fail the path for a device 1219 * error, but this is what the old dm did. In future 1220 * patches we can do more advanced handling. 1221 */ 1222 fail_path(pgpath); 1223 } 1224 1225 spin_lock_irqsave(&m->lock, flags); 1226 if (errors) { 1227 if (pgpath == m->current_pgpath) { 1228 DMERR("Could not failover device. Error %d.", errors); 1229 m->current_pgpath = NULL; 1230 m->current_pg = NULL; 1231 } 1232 } else if (!m->pg_init_required) 1233 pg->bypassed = 0; 1234 1235 if (--m->pg_init_in_progress) 1236 /* Activations of other paths are still on going */ 1237 goto out; 1238 1239 if (!m->pg_init_required) 1240 m->queue_io = 0; 1241 1242 m->pg_init_delay_retry = delay_retry; 1243 queue_work(kmultipathd, &m->process_queued_ios); 1244 1245 /* 1246 * Wake up any thread waiting to suspend. 1247 */ 1248 wake_up(&m->pg_init_wait); 1249 1250 out: 1251 spin_unlock_irqrestore(&m->lock, flags); 1252 } 1253 1254 static void activate_path(struct work_struct *work) 1255 { 1256 struct pgpath *pgpath = 1257 container_of(work, struct pgpath, activate_path.work); 1258 1259 scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev), 1260 pg_init_done, pgpath); 1261 } 1262 1263 /* 1264 * end_io handling 1265 */ 1266 static int do_end_io(struct multipath *m, struct request *clone, 1267 int error, struct dm_mpath_io *mpio) 1268 { 1269 /* 1270 * We don't queue any clone request inside the multipath target 1271 * during end I/O handling, since those clone requests don't have 1272 * bio clones. If we queue them inside the multipath target, 1273 * we need to make bio clones, that requires memory allocation. 1274 * (See drivers/md/dm.c:end_clone_bio() about why the clone requests 1275 * don't have bio clones.) 1276 * Instead of queueing the clone request here, we queue the original 1277 * request into dm core, which will remake a clone request and 1278 * clone bios for it and resubmit it later. 1279 */ 1280 int r = DM_ENDIO_REQUEUE; 1281 unsigned long flags; 1282 1283 if (!error && !clone->errors) 1284 return 0; /* I/O complete */ 1285 1286 if (error == -EOPNOTSUPP || error == -EREMOTEIO || error == -EILSEQ) 1287 return error; 1288 1289 if (mpio->pgpath) 1290 fail_path(mpio->pgpath); 1291 1292 spin_lock_irqsave(&m->lock, flags); 1293 if (!m->nr_valid_paths) { 1294 if (!m->queue_if_no_path) { 1295 if (!__must_push_back(m)) 1296 r = -EIO; 1297 } else { 1298 if (error == -EBADE) 1299 r = error; 1300 } 1301 } 1302 spin_unlock_irqrestore(&m->lock, flags); 1303 1304 return r; 1305 } 1306 1307 static int multipath_end_io(struct dm_target *ti, struct request *clone, 1308 int error, union map_info *map_context) 1309 { 1310 struct multipath *m = ti->private; 1311 struct dm_mpath_io *mpio = map_context->ptr; 1312 struct pgpath *pgpath; 1313 struct path_selector *ps; 1314 int r; 1315 1316 BUG_ON(!mpio); 1317 1318 r = do_end_io(m, clone, error, mpio); 1319 pgpath = mpio->pgpath; 1320 if (pgpath) { 1321 ps = &pgpath->pg->ps; 1322 if (ps->type->end_io) 1323 ps->type->end_io(ps, &pgpath->path, mpio->nr_bytes); 1324 } 1325 clear_mapinfo(m, map_context); 1326 1327 return r; 1328 } 1329 1330 /* 1331 * Suspend can't complete until all the I/O is processed so if 1332 * the last path fails we must error any remaining I/O. 1333 * Note that if the freeze_bdev fails while suspending, the 1334 * queue_if_no_path state is lost - userspace should reset it. 1335 */ 1336 static void multipath_presuspend(struct dm_target *ti) 1337 { 1338 struct multipath *m = (struct multipath *) ti->private; 1339 1340 queue_if_no_path(m, 0, 1); 1341 } 1342 1343 static void multipath_postsuspend(struct dm_target *ti) 1344 { 1345 struct multipath *m = ti->private; 1346 1347 mutex_lock(&m->work_mutex); 1348 flush_multipath_work(m); 1349 mutex_unlock(&m->work_mutex); 1350 } 1351 1352 /* 1353 * Restore the queue_if_no_path setting. 1354 */ 1355 static void multipath_resume(struct dm_target *ti) 1356 { 1357 struct multipath *m = (struct multipath *) ti->private; 1358 unsigned long flags; 1359 1360 spin_lock_irqsave(&m->lock, flags); 1361 m->queue_if_no_path = m->saved_queue_if_no_path; 1362 spin_unlock_irqrestore(&m->lock, flags); 1363 } 1364 1365 /* 1366 * Info output has the following format: 1367 * num_multipath_feature_args [multipath_feature_args]* 1368 * num_handler_status_args [handler_status_args]* 1369 * num_groups init_group_number 1370 * [A|D|E num_ps_status_args [ps_status_args]* 1371 * num_paths num_selector_args 1372 * [path_dev A|F fail_count [selector_args]* ]+ ]+ 1373 * 1374 * Table output has the following format (identical to the constructor string): 1375 * num_feature_args [features_args]* 1376 * num_handler_args hw_handler [hw_handler_args]* 1377 * num_groups init_group_number 1378 * [priority selector-name num_ps_args [ps_args]* 1379 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ 1380 */ 1381 static int multipath_status(struct dm_target *ti, status_type_t type, 1382 unsigned status_flags, char *result, unsigned maxlen) 1383 { 1384 int sz = 0; 1385 unsigned long flags; 1386 struct multipath *m = (struct multipath *) ti->private; 1387 struct priority_group *pg; 1388 struct pgpath *p; 1389 unsigned pg_num; 1390 char state; 1391 1392 spin_lock_irqsave(&m->lock, flags); 1393 1394 /* Features */ 1395 if (type == STATUSTYPE_INFO) 1396 DMEMIT("2 %u %u ", m->queue_size, m->pg_init_count); 1397 else { 1398 DMEMIT("%u ", m->queue_if_no_path + 1399 (m->pg_init_retries > 0) * 2 + 1400 (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) * 2 + 1401 m->retain_attached_hw_handler); 1402 if (m->queue_if_no_path) 1403 DMEMIT("queue_if_no_path "); 1404 if (m->pg_init_retries) 1405 DMEMIT("pg_init_retries %u ", m->pg_init_retries); 1406 if (m->pg_init_delay_msecs != DM_PG_INIT_DELAY_DEFAULT) 1407 DMEMIT("pg_init_delay_msecs %u ", m->pg_init_delay_msecs); 1408 if (m->retain_attached_hw_handler) 1409 DMEMIT("retain_attached_hw_handler "); 1410 } 1411 1412 if (!m->hw_handler_name || type == STATUSTYPE_INFO) 1413 DMEMIT("0 "); 1414 else 1415 DMEMIT("1 %s ", m->hw_handler_name); 1416 1417 DMEMIT("%u ", m->nr_priority_groups); 1418 1419 if (m->next_pg) 1420 pg_num = m->next_pg->pg_num; 1421 else if (m->current_pg) 1422 pg_num = m->current_pg->pg_num; 1423 else 1424 pg_num = (m->nr_priority_groups ? 1 : 0); 1425 1426 DMEMIT("%u ", pg_num); 1427 1428 switch (type) { 1429 case STATUSTYPE_INFO: 1430 list_for_each_entry(pg, &m->priority_groups, list) { 1431 if (pg->bypassed) 1432 state = 'D'; /* Disabled */ 1433 else if (pg == m->current_pg) 1434 state = 'A'; /* Currently Active */ 1435 else 1436 state = 'E'; /* Enabled */ 1437 1438 DMEMIT("%c ", state); 1439 1440 if (pg->ps.type->status) 1441 sz += pg->ps.type->status(&pg->ps, NULL, type, 1442 result + sz, 1443 maxlen - sz); 1444 else 1445 DMEMIT("0 "); 1446 1447 DMEMIT("%u %u ", pg->nr_pgpaths, 1448 pg->ps.type->info_args); 1449 1450 list_for_each_entry(p, &pg->pgpaths, list) { 1451 DMEMIT("%s %s %u ", p->path.dev->name, 1452 p->is_active ? "A" : "F", 1453 p->fail_count); 1454 if (pg->ps.type->status) 1455 sz += pg->ps.type->status(&pg->ps, 1456 &p->path, type, result + sz, 1457 maxlen - sz); 1458 } 1459 } 1460 break; 1461 1462 case STATUSTYPE_TABLE: 1463 list_for_each_entry(pg, &m->priority_groups, list) { 1464 DMEMIT("%s ", pg->ps.type->name); 1465 1466 if (pg->ps.type->status) 1467 sz += pg->ps.type->status(&pg->ps, NULL, type, 1468 result + sz, 1469 maxlen - sz); 1470 else 1471 DMEMIT("0 "); 1472 1473 DMEMIT("%u %u ", pg->nr_pgpaths, 1474 pg->ps.type->table_args); 1475 1476 list_for_each_entry(p, &pg->pgpaths, list) { 1477 DMEMIT("%s ", p->path.dev->name); 1478 if (pg->ps.type->status) 1479 sz += pg->ps.type->status(&pg->ps, 1480 &p->path, type, result + sz, 1481 maxlen - sz); 1482 } 1483 } 1484 break; 1485 } 1486 1487 spin_unlock_irqrestore(&m->lock, flags); 1488 1489 return 0; 1490 } 1491 1492 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) 1493 { 1494 int r = -EINVAL; 1495 struct dm_dev *dev; 1496 struct multipath *m = (struct multipath *) ti->private; 1497 action_fn action; 1498 1499 mutex_lock(&m->work_mutex); 1500 1501 if (dm_suspended(ti)) { 1502 r = -EBUSY; 1503 goto out; 1504 } 1505 1506 if (argc == 1) { 1507 if (!strcasecmp(argv[0], "queue_if_no_path")) { 1508 r = queue_if_no_path(m, 1, 0); 1509 goto out; 1510 } else if (!strcasecmp(argv[0], "fail_if_no_path")) { 1511 r = queue_if_no_path(m, 0, 0); 1512 goto out; 1513 } 1514 } 1515 1516 if (argc != 2) { 1517 DMWARN("Unrecognised multipath message received."); 1518 goto out; 1519 } 1520 1521 if (!strcasecmp(argv[0], "disable_group")) { 1522 r = bypass_pg_num(m, argv[1], 1); 1523 goto out; 1524 } else if (!strcasecmp(argv[0], "enable_group")) { 1525 r = bypass_pg_num(m, argv[1], 0); 1526 goto out; 1527 } else if (!strcasecmp(argv[0], "switch_group")) { 1528 r = switch_pg_num(m, argv[1]); 1529 goto out; 1530 } else if (!strcasecmp(argv[0], "reinstate_path")) 1531 action = reinstate_path; 1532 else if (!strcasecmp(argv[0], "fail_path")) 1533 action = fail_path; 1534 else { 1535 DMWARN("Unrecognised multipath message received."); 1536 goto out; 1537 } 1538 1539 r = dm_get_device(ti, argv[1], dm_table_get_mode(ti->table), &dev); 1540 if (r) { 1541 DMWARN("message: error getting device %s", 1542 argv[1]); 1543 goto out; 1544 } 1545 1546 r = action_dev(m, dev, action); 1547 1548 dm_put_device(ti, dev); 1549 1550 out: 1551 mutex_unlock(&m->work_mutex); 1552 return r; 1553 } 1554 1555 static int multipath_ioctl(struct dm_target *ti, unsigned int cmd, 1556 unsigned long arg) 1557 { 1558 struct multipath *m = ti->private; 1559 struct pgpath *pgpath; 1560 struct block_device *bdev; 1561 fmode_t mode; 1562 unsigned long flags; 1563 int r; 1564 1565 again: 1566 bdev = NULL; 1567 mode = 0; 1568 r = 0; 1569 1570 spin_lock_irqsave(&m->lock, flags); 1571 1572 if (!m->current_pgpath) 1573 __choose_pgpath(m, 0); 1574 1575 pgpath = m->current_pgpath; 1576 1577 if (pgpath) { 1578 bdev = pgpath->path.dev->bdev; 1579 mode = pgpath->path.dev->mode; 1580 } 1581 1582 if ((pgpath && m->queue_io) || (!pgpath && m->queue_if_no_path)) 1583 r = -EAGAIN; 1584 else if (!bdev) 1585 r = -EIO; 1586 1587 spin_unlock_irqrestore(&m->lock, flags); 1588 1589 /* 1590 * Only pass ioctls through if the device sizes match exactly. 1591 */ 1592 if (!r && ti->len != i_size_read(bdev->bd_inode) >> SECTOR_SHIFT) 1593 r = scsi_verify_blk_ioctl(NULL, cmd); 1594 1595 if (r == -EAGAIN && !fatal_signal_pending(current)) { 1596 queue_work(kmultipathd, &m->process_queued_ios); 1597 msleep(10); 1598 goto again; 1599 } 1600 1601 return r ? : __blkdev_driver_ioctl(bdev, mode, cmd, arg); 1602 } 1603 1604 static int multipath_iterate_devices(struct dm_target *ti, 1605 iterate_devices_callout_fn fn, void *data) 1606 { 1607 struct multipath *m = ti->private; 1608 struct priority_group *pg; 1609 struct pgpath *p; 1610 int ret = 0; 1611 1612 list_for_each_entry(pg, &m->priority_groups, list) { 1613 list_for_each_entry(p, &pg->pgpaths, list) { 1614 ret = fn(ti, p->path.dev, ti->begin, ti->len, data); 1615 if (ret) 1616 goto out; 1617 } 1618 } 1619 1620 out: 1621 return ret; 1622 } 1623 1624 static int __pgpath_busy(struct pgpath *pgpath) 1625 { 1626 struct request_queue *q = bdev_get_queue(pgpath->path.dev->bdev); 1627 1628 return dm_underlying_device_busy(q); 1629 } 1630 1631 /* 1632 * We return "busy", only when we can map I/Os but underlying devices 1633 * are busy (so even if we map I/Os now, the I/Os will wait on 1634 * the underlying queue). 1635 * In other words, if we want to kill I/Os or queue them inside us 1636 * due to map unavailability, we don't return "busy". Otherwise, 1637 * dm core won't give us the I/Os and we can't do what we want. 1638 */ 1639 static int multipath_busy(struct dm_target *ti) 1640 { 1641 int busy = 0, has_active = 0; 1642 struct multipath *m = ti->private; 1643 struct priority_group *pg; 1644 struct pgpath *pgpath; 1645 unsigned long flags; 1646 1647 spin_lock_irqsave(&m->lock, flags); 1648 1649 /* Guess which priority_group will be used at next mapping time */ 1650 if (unlikely(!m->current_pgpath && m->next_pg)) 1651 pg = m->next_pg; 1652 else if (likely(m->current_pg)) 1653 pg = m->current_pg; 1654 else 1655 /* 1656 * We don't know which pg will be used at next mapping time. 1657 * We don't call __choose_pgpath() here to avoid to trigger 1658 * pg_init just by busy checking. 1659 * So we don't know whether underlying devices we will be using 1660 * at next mapping time are busy or not. Just try mapping. 1661 */ 1662 goto out; 1663 1664 /* 1665 * If there is one non-busy active path at least, the path selector 1666 * will be able to select it. So we consider such a pg as not busy. 1667 */ 1668 busy = 1; 1669 list_for_each_entry(pgpath, &pg->pgpaths, list) 1670 if (pgpath->is_active) { 1671 has_active = 1; 1672 1673 if (!__pgpath_busy(pgpath)) { 1674 busy = 0; 1675 break; 1676 } 1677 } 1678 1679 if (!has_active) 1680 /* 1681 * No active path in this pg, so this pg won't be used and 1682 * the current_pg will be changed at next mapping time. 1683 * We need to try mapping to determine it. 1684 */ 1685 busy = 0; 1686 1687 out: 1688 spin_unlock_irqrestore(&m->lock, flags); 1689 1690 return busy; 1691 } 1692 1693 /*----------------------------------------------------------------- 1694 * Module setup 1695 *---------------------------------------------------------------*/ 1696 static struct target_type multipath_target = { 1697 .name = "multipath", 1698 .version = {1, 5, 0}, 1699 .module = THIS_MODULE, 1700 .ctr = multipath_ctr, 1701 .dtr = multipath_dtr, 1702 .map_rq = multipath_map, 1703 .rq_end_io = multipath_end_io, 1704 .presuspend = multipath_presuspend, 1705 .postsuspend = multipath_postsuspend, 1706 .resume = multipath_resume, 1707 .status = multipath_status, 1708 .message = multipath_message, 1709 .ioctl = multipath_ioctl, 1710 .iterate_devices = multipath_iterate_devices, 1711 .busy = multipath_busy, 1712 }; 1713 1714 static int __init dm_multipath_init(void) 1715 { 1716 int r; 1717 1718 /* allocate a slab for the dm_ios */ 1719 _mpio_cache = KMEM_CACHE(dm_mpath_io, 0); 1720 if (!_mpio_cache) 1721 return -ENOMEM; 1722 1723 r = dm_register_target(&multipath_target); 1724 if (r < 0) { 1725 DMERR("register failed %d", r); 1726 kmem_cache_destroy(_mpio_cache); 1727 return -EINVAL; 1728 } 1729 1730 kmultipathd = alloc_workqueue("kmpathd", WQ_MEM_RECLAIM, 0); 1731 if (!kmultipathd) { 1732 DMERR("failed to create workqueue kmpathd"); 1733 dm_unregister_target(&multipath_target); 1734 kmem_cache_destroy(_mpio_cache); 1735 return -ENOMEM; 1736 } 1737 1738 /* 1739 * A separate workqueue is used to handle the device handlers 1740 * to avoid overloading existing workqueue. Overloading the 1741 * old workqueue would also create a bottleneck in the 1742 * path of the storage hardware device activation. 1743 */ 1744 kmpath_handlerd = alloc_ordered_workqueue("kmpath_handlerd", 1745 WQ_MEM_RECLAIM); 1746 if (!kmpath_handlerd) { 1747 DMERR("failed to create workqueue kmpath_handlerd"); 1748 destroy_workqueue(kmultipathd); 1749 dm_unregister_target(&multipath_target); 1750 kmem_cache_destroy(_mpio_cache); 1751 return -ENOMEM; 1752 } 1753 1754 DMINFO("version %u.%u.%u loaded", 1755 multipath_target.version[0], multipath_target.version[1], 1756 multipath_target.version[2]); 1757 1758 return r; 1759 } 1760 1761 static void __exit dm_multipath_exit(void) 1762 { 1763 destroy_workqueue(kmpath_handlerd); 1764 destroy_workqueue(kmultipathd); 1765 1766 dm_unregister_target(&multipath_target); 1767 kmem_cache_destroy(_mpio_cache); 1768 } 1769 1770 module_init(dm_multipath_init); 1771 module_exit(dm_multipath_exit); 1772 1773 MODULE_DESCRIPTION(DM_NAME " multipath target"); 1774 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); 1775 MODULE_LICENSE("GPL"); 1776