1 /* 2 * Copyright (C) 2003 Sistina Software Limited. 3 * Copyright (C) 2004-2005 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include "dm.h" 9 #include "dm-path-selector.h" 10 #include "dm-hw-handler.h" 11 #include "dm-bio-list.h" 12 #include "dm-bio-record.h" 13 14 #include <linux/ctype.h> 15 #include <linux/init.h> 16 #include <linux/mempool.h> 17 #include <linux/module.h> 18 #include <linux/pagemap.h> 19 #include <linux/slab.h> 20 #include <linux/time.h> 21 #include <linux/workqueue.h> 22 #include <asm/atomic.h> 23 24 #define MESG_STR(x) x, sizeof(x) 25 26 /* Path properties */ 27 struct pgpath { 28 struct list_head list; 29 30 struct priority_group *pg; /* Owning PG */ 31 unsigned fail_count; /* Cumulative failure count */ 32 33 struct path path; 34 }; 35 36 #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) 37 38 /* 39 * Paths are grouped into Priority Groups and numbered from 1 upwards. 40 * Each has a path selector which controls which path gets used. 41 */ 42 struct priority_group { 43 struct list_head list; 44 45 struct multipath *m; /* Owning multipath instance */ 46 struct path_selector ps; 47 48 unsigned pg_num; /* Reference number */ 49 unsigned bypassed; /* Temporarily bypass this PG? */ 50 51 unsigned nr_pgpaths; /* Number of paths in PG */ 52 struct list_head pgpaths; 53 }; 54 55 /* Multipath context */ 56 struct multipath { 57 struct list_head list; 58 struct dm_target *ti; 59 60 spinlock_t lock; 61 62 struct hw_handler hw_handler; 63 unsigned nr_priority_groups; 64 struct list_head priority_groups; 65 unsigned pg_init_required; /* pg_init needs calling? */ 66 unsigned pg_init_in_progress; /* Only one pg_init allowed at once */ 67 68 unsigned nr_valid_paths; /* Total number of usable paths */ 69 struct pgpath *current_pgpath; 70 struct priority_group *current_pg; 71 struct priority_group *next_pg; /* Switch to this PG if set */ 72 unsigned repeat_count; /* I/Os left before calling PS again */ 73 74 unsigned queue_io; /* Must we queue all I/O? */ 75 unsigned queue_if_no_path; /* Queue I/O if last path fails? */ 76 unsigned saved_queue_if_no_path;/* Saved state during suspension */ 77 78 struct work_struct process_queued_ios; 79 struct bio_list queued_ios; 80 unsigned queue_size; 81 82 struct work_struct trigger_event; 83 84 /* 85 * We must use a mempool of mpath_io structs so that we 86 * can resubmit bios on error. 87 */ 88 mempool_t *mpio_pool; 89 }; 90 91 /* 92 * Context information attached to each bio we process. 93 */ 94 struct mpath_io { 95 struct pgpath *pgpath; 96 struct dm_bio_details details; 97 }; 98 99 typedef int (*action_fn) (struct pgpath *pgpath); 100 101 #define MIN_IOS 256 /* Mempool size */ 102 103 static kmem_cache_t *_mpio_cache; 104 105 struct workqueue_struct *kmultipathd; 106 static void process_queued_ios(void *data); 107 static void trigger_event(void *data); 108 109 110 /*----------------------------------------------- 111 * Allocation routines 112 *-----------------------------------------------*/ 113 114 static struct pgpath *alloc_pgpath(void) 115 { 116 struct pgpath *pgpath = kmalloc(sizeof(*pgpath), GFP_KERNEL); 117 118 if (pgpath) { 119 memset(pgpath, 0, sizeof(*pgpath)); 120 pgpath->path.is_active = 1; 121 } 122 123 return pgpath; 124 } 125 126 static inline void free_pgpath(struct pgpath *pgpath) 127 { 128 kfree(pgpath); 129 } 130 131 static struct priority_group *alloc_priority_group(void) 132 { 133 struct priority_group *pg; 134 135 pg = kmalloc(sizeof(*pg), GFP_KERNEL); 136 if (!pg) 137 return NULL; 138 139 memset(pg, 0, sizeof(*pg)); 140 INIT_LIST_HEAD(&pg->pgpaths); 141 142 return pg; 143 } 144 145 static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) 146 { 147 struct pgpath *pgpath, *tmp; 148 149 list_for_each_entry_safe(pgpath, tmp, pgpaths, list) { 150 list_del(&pgpath->list); 151 dm_put_device(ti, pgpath->path.dev); 152 free_pgpath(pgpath); 153 } 154 } 155 156 static void free_priority_group(struct priority_group *pg, 157 struct dm_target *ti) 158 { 159 struct path_selector *ps = &pg->ps; 160 161 if (ps->type) { 162 ps->type->destroy(ps); 163 dm_put_path_selector(ps->type); 164 } 165 166 free_pgpaths(&pg->pgpaths, ti); 167 kfree(pg); 168 } 169 170 static struct multipath *alloc_multipath(void) 171 { 172 struct multipath *m; 173 174 m = kmalloc(sizeof(*m), GFP_KERNEL); 175 if (m) { 176 memset(m, 0, sizeof(*m)); 177 INIT_LIST_HEAD(&m->priority_groups); 178 spin_lock_init(&m->lock); 179 m->queue_io = 1; 180 INIT_WORK(&m->process_queued_ios, process_queued_ios, m); 181 INIT_WORK(&m->trigger_event, trigger_event, m); 182 m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); 183 if (!m->mpio_pool) { 184 kfree(m); 185 return NULL; 186 } 187 } 188 189 return m; 190 } 191 192 static void free_multipath(struct multipath *m) 193 { 194 struct priority_group *pg, *tmp; 195 struct hw_handler *hwh = &m->hw_handler; 196 197 list_for_each_entry_safe(pg, tmp, &m->priority_groups, list) { 198 list_del(&pg->list); 199 free_priority_group(pg, m->ti); 200 } 201 202 if (hwh->type) { 203 hwh->type->destroy(hwh); 204 dm_put_hw_handler(hwh->type); 205 } 206 207 mempool_destroy(m->mpio_pool); 208 kfree(m); 209 } 210 211 212 /*----------------------------------------------- 213 * Path selection 214 *-----------------------------------------------*/ 215 216 static void __switch_pg(struct multipath *m, struct pgpath *pgpath) 217 { 218 struct hw_handler *hwh = &m->hw_handler; 219 220 m->current_pg = pgpath->pg; 221 222 /* Must we initialise the PG first, and queue I/O till it's ready? */ 223 if (hwh->type && hwh->type->pg_init) { 224 m->pg_init_required = 1; 225 m->queue_io = 1; 226 } else { 227 m->pg_init_required = 0; 228 m->queue_io = 0; 229 } 230 } 231 232 static int __choose_path_in_pg(struct multipath *m, struct priority_group *pg) 233 { 234 struct path *path; 235 236 path = pg->ps.type->select_path(&pg->ps, &m->repeat_count); 237 if (!path) 238 return -ENXIO; 239 240 m->current_pgpath = path_to_pgpath(path); 241 242 if (m->current_pg != pg) 243 __switch_pg(m, m->current_pgpath); 244 245 return 0; 246 } 247 248 static void __choose_pgpath(struct multipath *m) 249 { 250 struct priority_group *pg; 251 unsigned bypassed = 1; 252 253 if (!m->nr_valid_paths) 254 goto failed; 255 256 /* Were we instructed to switch PG? */ 257 if (m->next_pg) { 258 pg = m->next_pg; 259 m->next_pg = NULL; 260 if (!__choose_path_in_pg(m, pg)) 261 return; 262 } 263 264 /* Don't change PG until it has no remaining paths */ 265 if (m->current_pg && !__choose_path_in_pg(m, m->current_pg)) 266 return; 267 268 /* 269 * Loop through priority groups until we find a valid path. 270 * First time we skip PGs marked 'bypassed'. 271 * Second time we only try the ones we skipped. 272 */ 273 do { 274 list_for_each_entry(pg, &m->priority_groups, list) { 275 if (pg->bypassed == bypassed) 276 continue; 277 if (!__choose_path_in_pg(m, pg)) 278 return; 279 } 280 } while (bypassed--); 281 282 failed: 283 m->current_pgpath = NULL; 284 m->current_pg = NULL; 285 } 286 287 static int map_io(struct multipath *m, struct bio *bio, struct mpath_io *mpio, 288 unsigned was_queued) 289 { 290 int r = 1; 291 unsigned long flags; 292 struct pgpath *pgpath; 293 294 spin_lock_irqsave(&m->lock, flags); 295 296 /* Do we need to select a new pgpath? */ 297 if (!m->current_pgpath || 298 (!m->queue_io && (m->repeat_count && --m->repeat_count == 0))) 299 __choose_pgpath(m); 300 301 pgpath = m->current_pgpath; 302 303 if (was_queued) 304 m->queue_size--; 305 306 if ((pgpath && m->queue_io) || 307 (!pgpath && m->queue_if_no_path)) { 308 /* Queue for the daemon to resubmit */ 309 bio_list_add(&m->queued_ios, bio); 310 m->queue_size++; 311 if ((m->pg_init_required && !m->pg_init_in_progress) || 312 !m->queue_io) 313 queue_work(kmultipathd, &m->process_queued_ios); 314 pgpath = NULL; 315 r = 0; 316 } else if (!pgpath) 317 r = -EIO; /* Failed */ 318 else 319 bio->bi_bdev = pgpath->path.dev->bdev; 320 321 mpio->pgpath = pgpath; 322 323 spin_unlock_irqrestore(&m->lock, flags); 324 325 return r; 326 } 327 328 /* 329 * If we run out of usable paths, should we queue I/O or error it? 330 */ 331 static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path, 332 unsigned save_old_value) 333 { 334 unsigned long flags; 335 336 spin_lock_irqsave(&m->lock, flags); 337 338 if (save_old_value) 339 m->saved_queue_if_no_path = m->queue_if_no_path; 340 else 341 m->saved_queue_if_no_path = queue_if_no_path; 342 m->queue_if_no_path = queue_if_no_path; 343 if (!m->queue_if_no_path && m->queue_size) 344 queue_work(kmultipathd, &m->process_queued_ios); 345 346 spin_unlock_irqrestore(&m->lock, flags); 347 348 return 0; 349 } 350 351 /*----------------------------------------------------------------- 352 * The multipath daemon is responsible for resubmitting queued ios. 353 *---------------------------------------------------------------*/ 354 355 static void dispatch_queued_ios(struct multipath *m) 356 { 357 int r; 358 unsigned long flags; 359 struct bio *bio = NULL, *next; 360 struct mpath_io *mpio; 361 union map_info *info; 362 363 spin_lock_irqsave(&m->lock, flags); 364 bio = bio_list_get(&m->queued_ios); 365 spin_unlock_irqrestore(&m->lock, flags); 366 367 while (bio) { 368 next = bio->bi_next; 369 bio->bi_next = NULL; 370 371 info = dm_get_mapinfo(bio); 372 mpio = info->ptr; 373 374 r = map_io(m, bio, mpio, 1); 375 if (r < 0) 376 bio_endio(bio, bio->bi_size, r); 377 else if (r == 1) 378 generic_make_request(bio); 379 380 bio = next; 381 } 382 } 383 384 static void process_queued_ios(void *data) 385 { 386 struct multipath *m = (struct multipath *) data; 387 struct hw_handler *hwh = &m->hw_handler; 388 struct pgpath *pgpath = NULL; 389 unsigned init_required = 0, must_queue = 1; 390 unsigned long flags; 391 392 spin_lock_irqsave(&m->lock, flags); 393 394 if (!m->queue_size) 395 goto out; 396 397 if (!m->current_pgpath) 398 __choose_pgpath(m); 399 400 pgpath = m->current_pgpath; 401 402 if ((pgpath && !m->queue_io) || 403 (!pgpath && !m->queue_if_no_path)) 404 must_queue = 0; 405 406 if (m->pg_init_required && !m->pg_init_in_progress) { 407 m->pg_init_required = 0; 408 m->pg_init_in_progress = 1; 409 init_required = 1; 410 } 411 412 out: 413 spin_unlock_irqrestore(&m->lock, flags); 414 415 if (init_required) 416 hwh->type->pg_init(hwh, pgpath->pg->bypassed, &pgpath->path); 417 418 if (!must_queue) 419 dispatch_queued_ios(m); 420 } 421 422 /* 423 * An event is triggered whenever a path is taken out of use. 424 * Includes path failure and PG bypass. 425 */ 426 static void trigger_event(void *data) 427 { 428 struct multipath *m = (struct multipath *) data; 429 430 dm_table_event(m->ti->table); 431 } 432 433 /*----------------------------------------------------------------- 434 * Constructor/argument parsing: 435 * <#multipath feature args> [<arg>]* 436 * <#hw_handler args> [hw_handler [<arg>]*] 437 * <#priority groups> 438 * <initial priority group> 439 * [<selector> <#selector args> [<arg>]* 440 * <#paths> <#per-path selector args> 441 * [<path> [<arg>]* ]+ ]+ 442 *---------------------------------------------------------------*/ 443 struct param { 444 unsigned min; 445 unsigned max; 446 char *error; 447 }; 448 449 #define ESTR(s) ("dm-multipath: " s) 450 451 static int read_param(struct param *param, char *str, unsigned *v, char **error) 452 { 453 if (!str || 454 (sscanf(str, "%u", v) != 1) || 455 (*v < param->min) || 456 (*v > param->max)) { 457 *error = param->error; 458 return -EINVAL; 459 } 460 461 return 0; 462 } 463 464 struct arg_set { 465 unsigned argc; 466 char **argv; 467 }; 468 469 static char *shift(struct arg_set *as) 470 { 471 char *r; 472 473 if (as->argc) { 474 as->argc--; 475 r = *as->argv; 476 as->argv++; 477 return r; 478 } 479 480 return NULL; 481 } 482 483 static void consume(struct arg_set *as, unsigned n) 484 { 485 BUG_ON (as->argc < n); 486 as->argc -= n; 487 as->argv += n; 488 } 489 490 static int parse_path_selector(struct arg_set *as, struct priority_group *pg, 491 struct dm_target *ti) 492 { 493 int r; 494 struct path_selector_type *pst; 495 unsigned ps_argc; 496 497 static struct param _params[] = { 498 {0, 1024, ESTR("invalid number of path selector args")}, 499 }; 500 501 pst = dm_get_path_selector(shift(as)); 502 if (!pst) { 503 ti->error = ESTR("unknown path selector type"); 504 return -EINVAL; 505 } 506 507 r = read_param(_params, shift(as), &ps_argc, &ti->error); 508 if (r) 509 return -EINVAL; 510 511 r = pst->create(&pg->ps, ps_argc, as->argv); 512 if (r) { 513 dm_put_path_selector(pst); 514 ti->error = ESTR("path selector constructor failed"); 515 return r; 516 } 517 518 pg->ps.type = pst; 519 consume(as, ps_argc); 520 521 return 0; 522 } 523 524 static struct pgpath *parse_path(struct arg_set *as, struct path_selector *ps, 525 struct dm_target *ti) 526 { 527 int r; 528 struct pgpath *p; 529 530 /* we need at least a path arg */ 531 if (as->argc < 1) { 532 ti->error = ESTR("no device given"); 533 return NULL; 534 } 535 536 p = alloc_pgpath(); 537 if (!p) 538 return NULL; 539 540 r = dm_get_device(ti, shift(as), ti->begin, ti->len, 541 dm_table_get_mode(ti->table), &p->path.dev); 542 if (r) { 543 ti->error = ESTR("error getting device"); 544 goto bad; 545 } 546 547 r = ps->type->add_path(ps, &p->path, as->argc, as->argv, &ti->error); 548 if (r) { 549 dm_put_device(ti, p->path.dev); 550 goto bad; 551 } 552 553 return p; 554 555 bad: 556 free_pgpath(p); 557 return NULL; 558 } 559 560 static struct priority_group *parse_priority_group(struct arg_set *as, 561 struct multipath *m, 562 struct dm_target *ti) 563 { 564 static struct param _params[] = { 565 {1, 1024, ESTR("invalid number of paths")}, 566 {0, 1024, ESTR("invalid number of selector args")} 567 }; 568 569 int r; 570 unsigned i, nr_selector_args, nr_params; 571 struct priority_group *pg; 572 573 if (as->argc < 2) { 574 as->argc = 0; 575 ti->error = ESTR("not enough priority group aruments"); 576 return NULL; 577 } 578 579 pg = alloc_priority_group(); 580 if (!pg) { 581 ti->error = ESTR("couldn't allocate priority group"); 582 return NULL; 583 } 584 pg->m = m; 585 586 r = parse_path_selector(as, pg, ti); 587 if (r) 588 goto bad; 589 590 /* 591 * read the paths 592 */ 593 r = read_param(_params, shift(as), &pg->nr_pgpaths, &ti->error); 594 if (r) 595 goto bad; 596 597 r = read_param(_params + 1, shift(as), &nr_selector_args, &ti->error); 598 if (r) 599 goto bad; 600 601 nr_params = 1 + nr_selector_args; 602 for (i = 0; i < pg->nr_pgpaths; i++) { 603 struct pgpath *pgpath; 604 struct arg_set path_args; 605 606 if (as->argc < nr_params) 607 goto bad; 608 609 path_args.argc = nr_params; 610 path_args.argv = as->argv; 611 612 pgpath = parse_path(&path_args, &pg->ps, ti); 613 if (!pgpath) 614 goto bad; 615 616 pgpath->pg = pg; 617 list_add_tail(&pgpath->list, &pg->pgpaths); 618 consume(as, nr_params); 619 } 620 621 return pg; 622 623 bad: 624 free_priority_group(pg, ti); 625 return NULL; 626 } 627 628 static int parse_hw_handler(struct arg_set *as, struct multipath *m, 629 struct dm_target *ti) 630 { 631 int r; 632 struct hw_handler_type *hwht; 633 unsigned hw_argc; 634 635 static struct param _params[] = { 636 {0, 1024, ESTR("invalid number of hardware handler args")}, 637 }; 638 639 r = read_param(_params, shift(as), &hw_argc, &ti->error); 640 if (r) 641 return -EINVAL; 642 643 if (!hw_argc) 644 return 0; 645 646 hwht = dm_get_hw_handler(shift(as)); 647 if (!hwht) { 648 ti->error = ESTR("unknown hardware handler type"); 649 return -EINVAL; 650 } 651 652 r = hwht->create(&m->hw_handler, hw_argc - 1, as->argv); 653 if (r) { 654 dm_put_hw_handler(hwht); 655 ti->error = ESTR("hardware handler constructor failed"); 656 return r; 657 } 658 659 m->hw_handler.type = hwht; 660 consume(as, hw_argc - 1); 661 662 return 0; 663 } 664 665 static int parse_features(struct arg_set *as, struct multipath *m, 666 struct dm_target *ti) 667 { 668 int r; 669 unsigned argc; 670 671 static struct param _params[] = { 672 {0, 1, ESTR("invalid number of feature args")}, 673 }; 674 675 r = read_param(_params, shift(as), &argc, &ti->error); 676 if (r) 677 return -EINVAL; 678 679 if (!argc) 680 return 0; 681 682 if (!strnicmp(shift(as), MESG_STR("queue_if_no_path"))) 683 return queue_if_no_path(m, 1, 0); 684 else { 685 ti->error = "Unrecognised multipath feature request"; 686 return -EINVAL; 687 } 688 } 689 690 static int multipath_ctr(struct dm_target *ti, unsigned int argc, 691 char **argv) 692 { 693 /* target parameters */ 694 static struct param _params[] = { 695 {1, 1024, ESTR("invalid number of priority groups")}, 696 {1, 1024, ESTR("invalid initial priority group number")}, 697 }; 698 699 int r; 700 struct multipath *m; 701 struct arg_set as; 702 unsigned pg_count = 0; 703 unsigned next_pg_num; 704 705 as.argc = argc; 706 as.argv = argv; 707 708 m = alloc_multipath(); 709 if (!m) { 710 ti->error = ESTR("can't allocate multipath"); 711 return -EINVAL; 712 } 713 714 r = parse_features(&as, m, ti); 715 if (r) 716 goto bad; 717 718 r = parse_hw_handler(&as, m, ti); 719 if (r) 720 goto bad; 721 722 r = read_param(_params, shift(&as), &m->nr_priority_groups, &ti->error); 723 if (r) 724 goto bad; 725 726 r = read_param(_params + 1, shift(&as), &next_pg_num, &ti->error); 727 if (r) 728 goto bad; 729 730 /* parse the priority groups */ 731 while (as.argc) { 732 struct priority_group *pg; 733 734 pg = parse_priority_group(&as, m, ti); 735 if (!pg) { 736 r = -EINVAL; 737 goto bad; 738 } 739 740 m->nr_valid_paths += pg->nr_pgpaths; 741 list_add_tail(&pg->list, &m->priority_groups); 742 pg_count++; 743 pg->pg_num = pg_count; 744 if (!--next_pg_num) 745 m->next_pg = pg; 746 } 747 748 if (pg_count != m->nr_priority_groups) { 749 ti->error = ESTR("priority group count mismatch"); 750 r = -EINVAL; 751 goto bad; 752 } 753 754 ti->private = m; 755 m->ti = ti; 756 757 return 0; 758 759 bad: 760 free_multipath(m); 761 return r; 762 } 763 764 static void multipath_dtr(struct dm_target *ti) 765 { 766 struct multipath *m = (struct multipath *) ti->private; 767 768 flush_workqueue(kmultipathd); 769 free_multipath(m); 770 } 771 772 /* 773 * Map bios, recording original fields for later in case we have to resubmit 774 */ 775 static int multipath_map(struct dm_target *ti, struct bio *bio, 776 union map_info *map_context) 777 { 778 int r; 779 struct mpath_io *mpio; 780 struct multipath *m = (struct multipath *) ti->private; 781 782 if (bio_barrier(bio)) 783 return -EOPNOTSUPP; 784 785 mpio = mempool_alloc(m->mpio_pool, GFP_NOIO); 786 dm_bio_record(&mpio->details, bio); 787 788 map_context->ptr = mpio; 789 bio->bi_rw |= (1 << BIO_RW_FAILFAST); 790 r = map_io(m, bio, mpio, 0); 791 if (r < 0) 792 mempool_free(mpio, m->mpio_pool); 793 794 return r; 795 } 796 797 /* 798 * Take a path out of use. 799 */ 800 static int fail_path(struct pgpath *pgpath) 801 { 802 unsigned long flags; 803 struct multipath *m = pgpath->pg->m; 804 805 spin_lock_irqsave(&m->lock, flags); 806 807 if (!pgpath->path.is_active) 808 goto out; 809 810 DMWARN("dm-multipath: Failing path %s.", pgpath->path.dev->name); 811 812 pgpath->pg->ps.type->fail_path(&pgpath->pg->ps, &pgpath->path); 813 pgpath->path.is_active = 0; 814 pgpath->fail_count++; 815 816 m->nr_valid_paths--; 817 818 if (pgpath == m->current_pgpath) 819 m->current_pgpath = NULL; 820 821 queue_work(kmultipathd, &m->trigger_event); 822 823 out: 824 spin_unlock_irqrestore(&m->lock, flags); 825 826 return 0; 827 } 828 829 /* 830 * Reinstate a previously-failed path 831 */ 832 static int reinstate_path(struct pgpath *pgpath) 833 { 834 int r = 0; 835 unsigned long flags; 836 struct multipath *m = pgpath->pg->m; 837 838 spin_lock_irqsave(&m->lock, flags); 839 840 if (pgpath->path.is_active) 841 goto out; 842 843 if (!pgpath->pg->ps.type) { 844 DMWARN("Reinstate path not supported by path selector %s", 845 pgpath->pg->ps.type->name); 846 r = -EINVAL; 847 goto out; 848 } 849 850 r = pgpath->pg->ps.type->reinstate_path(&pgpath->pg->ps, &pgpath->path); 851 if (r) 852 goto out; 853 854 pgpath->path.is_active = 1; 855 856 m->current_pgpath = NULL; 857 if (!m->nr_valid_paths++ && m->queue_size) 858 queue_work(kmultipathd, &m->process_queued_ios); 859 860 queue_work(kmultipathd, &m->trigger_event); 861 862 out: 863 spin_unlock_irqrestore(&m->lock, flags); 864 865 return r; 866 } 867 868 /* 869 * Fail or reinstate all paths that match the provided struct dm_dev. 870 */ 871 static int action_dev(struct multipath *m, struct dm_dev *dev, 872 action_fn action) 873 { 874 int r = 0; 875 struct pgpath *pgpath; 876 struct priority_group *pg; 877 878 list_for_each_entry(pg, &m->priority_groups, list) { 879 list_for_each_entry(pgpath, &pg->pgpaths, list) { 880 if (pgpath->path.dev == dev) 881 r = action(pgpath); 882 } 883 } 884 885 return r; 886 } 887 888 /* 889 * Temporarily try to avoid having to use the specified PG 890 */ 891 static void bypass_pg(struct multipath *m, struct priority_group *pg, 892 int bypassed) 893 { 894 unsigned long flags; 895 896 spin_lock_irqsave(&m->lock, flags); 897 898 pg->bypassed = bypassed; 899 m->current_pgpath = NULL; 900 m->current_pg = NULL; 901 902 spin_unlock_irqrestore(&m->lock, flags); 903 904 queue_work(kmultipathd, &m->trigger_event); 905 } 906 907 /* 908 * Switch to using the specified PG from the next I/O that gets mapped 909 */ 910 static int switch_pg_num(struct multipath *m, const char *pgstr) 911 { 912 struct priority_group *pg; 913 unsigned pgnum; 914 unsigned long flags; 915 916 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum || 917 (pgnum > m->nr_priority_groups)) { 918 DMWARN("invalid PG number supplied to switch_pg_num"); 919 return -EINVAL; 920 } 921 922 spin_lock_irqsave(&m->lock, flags); 923 list_for_each_entry(pg, &m->priority_groups, list) { 924 pg->bypassed = 0; 925 if (--pgnum) 926 continue; 927 928 m->current_pgpath = NULL; 929 m->current_pg = NULL; 930 m->next_pg = pg; 931 } 932 spin_unlock_irqrestore(&m->lock, flags); 933 934 queue_work(kmultipathd, &m->trigger_event); 935 return 0; 936 } 937 938 /* 939 * Set/clear bypassed status of a PG. 940 * PGs are numbered upwards from 1 in the order they were declared. 941 */ 942 static int bypass_pg_num(struct multipath *m, const char *pgstr, int bypassed) 943 { 944 struct priority_group *pg; 945 unsigned pgnum; 946 947 if (!pgstr || (sscanf(pgstr, "%u", &pgnum) != 1) || !pgnum || 948 (pgnum > m->nr_priority_groups)) { 949 DMWARN("invalid PG number supplied to bypass_pg"); 950 return -EINVAL; 951 } 952 953 list_for_each_entry(pg, &m->priority_groups, list) { 954 if (!--pgnum) 955 break; 956 } 957 958 bypass_pg(m, pg, bypassed); 959 return 0; 960 } 961 962 /* 963 * pg_init must call this when it has completed its initialisation 964 */ 965 void dm_pg_init_complete(struct path *path, unsigned err_flags) 966 { 967 struct pgpath *pgpath = path_to_pgpath(path); 968 struct priority_group *pg = pgpath->pg; 969 struct multipath *m = pg->m; 970 unsigned long flags; 971 972 /* We insist on failing the path if the PG is already bypassed. */ 973 if (err_flags && pg->bypassed) 974 err_flags |= MP_FAIL_PATH; 975 976 if (err_flags & MP_FAIL_PATH) 977 fail_path(pgpath); 978 979 if (err_flags & MP_BYPASS_PG) 980 bypass_pg(m, pg, 1); 981 982 spin_lock_irqsave(&m->lock, flags); 983 if (err_flags) { 984 m->current_pgpath = NULL; 985 m->current_pg = NULL; 986 } else if (!m->pg_init_required) 987 m->queue_io = 0; 988 989 m->pg_init_in_progress = 0; 990 queue_work(kmultipathd, &m->process_queued_ios); 991 spin_unlock_irqrestore(&m->lock, flags); 992 } 993 994 /* 995 * end_io handling 996 */ 997 static int do_end_io(struct multipath *m, struct bio *bio, 998 int error, struct mpath_io *mpio) 999 { 1000 struct hw_handler *hwh = &m->hw_handler; 1001 unsigned err_flags = MP_FAIL_PATH; /* Default behavior */ 1002 unsigned long flags; 1003 1004 if (!error) 1005 return 0; /* I/O complete */ 1006 1007 if ((error == -EWOULDBLOCK) && bio_rw_ahead(bio)) 1008 return error; 1009 1010 if (error == -EOPNOTSUPP) 1011 return error; 1012 1013 spin_lock_irqsave(&m->lock, flags); 1014 if (!m->nr_valid_paths) { 1015 if (!m->queue_if_no_path) { 1016 spin_unlock_irqrestore(&m->lock, flags); 1017 return -EIO; 1018 } else { 1019 spin_unlock_irqrestore(&m->lock, flags); 1020 goto requeue; 1021 } 1022 } 1023 spin_unlock_irqrestore(&m->lock, flags); 1024 1025 if (hwh->type && hwh->type->error) 1026 err_flags = hwh->type->error(hwh, bio); 1027 1028 if (mpio->pgpath) { 1029 if (err_flags & MP_FAIL_PATH) 1030 fail_path(mpio->pgpath); 1031 1032 if (err_flags & MP_BYPASS_PG) 1033 bypass_pg(m, mpio->pgpath->pg, 1); 1034 } 1035 1036 if (err_flags & MP_ERROR_IO) 1037 return -EIO; 1038 1039 requeue: 1040 dm_bio_restore(&mpio->details, bio); 1041 1042 /* queue for the daemon to resubmit or fail */ 1043 spin_lock_irqsave(&m->lock, flags); 1044 bio_list_add(&m->queued_ios, bio); 1045 m->queue_size++; 1046 if (!m->queue_io) 1047 queue_work(kmultipathd, &m->process_queued_ios); 1048 spin_unlock_irqrestore(&m->lock, flags); 1049 1050 return 1; /* io not complete */ 1051 } 1052 1053 static int multipath_end_io(struct dm_target *ti, struct bio *bio, 1054 int error, union map_info *map_context) 1055 { 1056 struct multipath *m = (struct multipath *) ti->private; 1057 struct mpath_io *mpio = (struct mpath_io *) map_context->ptr; 1058 struct pgpath *pgpath = mpio->pgpath; 1059 struct path_selector *ps; 1060 int r; 1061 1062 r = do_end_io(m, bio, error, mpio); 1063 if (pgpath) { 1064 ps = &pgpath->pg->ps; 1065 if (ps->type->end_io) 1066 ps->type->end_io(ps, &pgpath->path); 1067 } 1068 if (r <= 0) 1069 mempool_free(mpio, m->mpio_pool); 1070 1071 return r; 1072 } 1073 1074 /* 1075 * Suspend can't complete until all the I/O is processed so if 1076 * the last path fails we must error any remaining I/O. 1077 * Note that if the freeze_bdev fails while suspending, the 1078 * queue_if_no_path state is lost - userspace should reset it. 1079 */ 1080 static void multipath_presuspend(struct dm_target *ti) 1081 { 1082 struct multipath *m = (struct multipath *) ti->private; 1083 1084 queue_if_no_path(m, 0, 1); 1085 } 1086 1087 /* 1088 * Restore the queue_if_no_path setting. 1089 */ 1090 static void multipath_resume(struct dm_target *ti) 1091 { 1092 struct multipath *m = (struct multipath *) ti->private; 1093 unsigned long flags; 1094 1095 spin_lock_irqsave(&m->lock, flags); 1096 m->queue_if_no_path = m->saved_queue_if_no_path; 1097 spin_unlock_irqrestore(&m->lock, flags); 1098 } 1099 1100 /* 1101 * Info output has the following format: 1102 * num_multipath_feature_args [multipath_feature_args]* 1103 * num_handler_status_args [handler_status_args]* 1104 * num_groups init_group_number 1105 * [A|D|E num_ps_status_args [ps_status_args]* 1106 * num_paths num_selector_args 1107 * [path_dev A|F fail_count [selector_args]* ]+ ]+ 1108 * 1109 * Table output has the following format (identical to the constructor string): 1110 * num_feature_args [features_args]* 1111 * num_handler_args hw_handler [hw_handler_args]* 1112 * num_groups init_group_number 1113 * [priority selector-name num_ps_args [ps_args]* 1114 * num_paths num_selector_args [path_dev [selector_args]* ]+ ]+ 1115 */ 1116 static int multipath_status(struct dm_target *ti, status_type_t type, 1117 char *result, unsigned int maxlen) 1118 { 1119 int sz = 0; 1120 unsigned long flags; 1121 struct multipath *m = (struct multipath *) ti->private; 1122 struct hw_handler *hwh = &m->hw_handler; 1123 struct priority_group *pg; 1124 struct pgpath *p; 1125 unsigned pg_num; 1126 char state; 1127 1128 spin_lock_irqsave(&m->lock, flags); 1129 1130 /* Features */ 1131 if (type == STATUSTYPE_INFO) 1132 DMEMIT("1 %u ", m->queue_size); 1133 else if (m->queue_if_no_path) 1134 DMEMIT("1 queue_if_no_path "); 1135 else 1136 DMEMIT("0 "); 1137 1138 if (hwh->type && hwh->type->status) 1139 sz += hwh->type->status(hwh, type, result + sz, maxlen - sz); 1140 else if (!hwh->type || type == STATUSTYPE_INFO) 1141 DMEMIT("0 "); 1142 else 1143 DMEMIT("1 %s ", hwh->type->name); 1144 1145 DMEMIT("%u ", m->nr_priority_groups); 1146 1147 if (m->next_pg) 1148 pg_num = m->next_pg->pg_num; 1149 else if (m->current_pg) 1150 pg_num = m->current_pg->pg_num; 1151 else 1152 pg_num = 1; 1153 1154 DMEMIT("%u ", pg_num); 1155 1156 switch (type) { 1157 case STATUSTYPE_INFO: 1158 list_for_each_entry(pg, &m->priority_groups, list) { 1159 if (pg->bypassed) 1160 state = 'D'; /* Disabled */ 1161 else if (pg == m->current_pg) 1162 state = 'A'; /* Currently Active */ 1163 else 1164 state = 'E'; /* Enabled */ 1165 1166 DMEMIT("%c ", state); 1167 1168 if (pg->ps.type->status) 1169 sz += pg->ps.type->status(&pg->ps, NULL, type, 1170 result + sz, 1171 maxlen - sz); 1172 else 1173 DMEMIT("0 "); 1174 1175 DMEMIT("%u %u ", pg->nr_pgpaths, 1176 pg->ps.type->info_args); 1177 1178 list_for_each_entry(p, &pg->pgpaths, list) { 1179 DMEMIT("%s %s %u ", p->path.dev->name, 1180 p->path.is_active ? "A" : "F", 1181 p->fail_count); 1182 if (pg->ps.type->status) 1183 sz += pg->ps.type->status(&pg->ps, 1184 &p->path, type, result + sz, 1185 maxlen - sz); 1186 } 1187 } 1188 break; 1189 1190 case STATUSTYPE_TABLE: 1191 list_for_each_entry(pg, &m->priority_groups, list) { 1192 DMEMIT("%s ", pg->ps.type->name); 1193 1194 if (pg->ps.type->status) 1195 sz += pg->ps.type->status(&pg->ps, NULL, type, 1196 result + sz, 1197 maxlen - sz); 1198 else 1199 DMEMIT("0 "); 1200 1201 DMEMIT("%u %u ", pg->nr_pgpaths, 1202 pg->ps.type->table_args); 1203 1204 list_for_each_entry(p, &pg->pgpaths, list) { 1205 DMEMIT("%s ", p->path.dev->name); 1206 if (pg->ps.type->status) 1207 sz += pg->ps.type->status(&pg->ps, 1208 &p->path, type, result + sz, 1209 maxlen - sz); 1210 } 1211 } 1212 break; 1213 } 1214 1215 spin_unlock_irqrestore(&m->lock, flags); 1216 1217 return 0; 1218 } 1219 1220 static int multipath_message(struct dm_target *ti, unsigned argc, char **argv) 1221 { 1222 int r; 1223 struct dm_dev *dev; 1224 struct multipath *m = (struct multipath *) ti->private; 1225 action_fn action; 1226 1227 if (argc == 1) { 1228 if (!strnicmp(argv[0], MESG_STR("queue_if_no_path"))) 1229 return queue_if_no_path(m, 1, 0); 1230 else if (!strnicmp(argv[0], MESG_STR("fail_if_no_path"))) 1231 return queue_if_no_path(m, 0, 0); 1232 } 1233 1234 if (argc != 2) 1235 goto error; 1236 1237 if (!strnicmp(argv[0], MESG_STR("disable_group"))) 1238 return bypass_pg_num(m, argv[1], 1); 1239 else if (!strnicmp(argv[0], MESG_STR("enable_group"))) 1240 return bypass_pg_num(m, argv[1], 0); 1241 else if (!strnicmp(argv[0], MESG_STR("switch_group"))) 1242 return switch_pg_num(m, argv[1]); 1243 else if (!strnicmp(argv[0], MESG_STR("reinstate_path"))) 1244 action = reinstate_path; 1245 else if (!strnicmp(argv[0], MESG_STR("fail_path"))) 1246 action = fail_path; 1247 else 1248 goto error; 1249 1250 r = dm_get_device(ti, argv[1], ti->begin, ti->len, 1251 dm_table_get_mode(ti->table), &dev); 1252 if (r) { 1253 DMWARN("dm-multipath message: error getting device %s", 1254 argv[1]); 1255 return -EINVAL; 1256 } 1257 1258 r = action_dev(m, dev, action); 1259 1260 dm_put_device(ti, dev); 1261 1262 return r; 1263 1264 error: 1265 DMWARN("Unrecognised multipath message received."); 1266 return -EINVAL; 1267 } 1268 1269 /*----------------------------------------------------------------- 1270 * Module setup 1271 *---------------------------------------------------------------*/ 1272 static struct target_type multipath_target = { 1273 .name = "multipath", 1274 .version = {1, 0, 4}, 1275 .module = THIS_MODULE, 1276 .ctr = multipath_ctr, 1277 .dtr = multipath_dtr, 1278 .map = multipath_map, 1279 .end_io = multipath_end_io, 1280 .presuspend = multipath_presuspend, 1281 .resume = multipath_resume, 1282 .status = multipath_status, 1283 .message = multipath_message, 1284 }; 1285 1286 static int __init dm_multipath_init(void) 1287 { 1288 int r; 1289 1290 /* allocate a slab for the dm_ios */ 1291 _mpio_cache = kmem_cache_create("dm_mpath", sizeof(struct mpath_io), 1292 0, 0, NULL, NULL); 1293 if (!_mpio_cache) 1294 return -ENOMEM; 1295 1296 r = dm_register_target(&multipath_target); 1297 if (r < 0) { 1298 DMERR("%s: register failed %d", multipath_target.name, r); 1299 kmem_cache_destroy(_mpio_cache); 1300 return -EINVAL; 1301 } 1302 1303 kmultipathd = create_workqueue("kmpathd"); 1304 if (!kmultipathd) { 1305 DMERR("%s: failed to create workqueue kmpathd", 1306 multipath_target.name); 1307 dm_unregister_target(&multipath_target); 1308 kmem_cache_destroy(_mpio_cache); 1309 return -ENOMEM; 1310 } 1311 1312 DMINFO("dm-multipath version %u.%u.%u loaded", 1313 multipath_target.version[0], multipath_target.version[1], 1314 multipath_target.version[2]); 1315 1316 return r; 1317 } 1318 1319 static void __exit dm_multipath_exit(void) 1320 { 1321 int r; 1322 1323 destroy_workqueue(kmultipathd); 1324 1325 r = dm_unregister_target(&multipath_target); 1326 if (r < 0) 1327 DMERR("%s: target unregister failed %d", 1328 multipath_target.name, r); 1329 kmem_cache_destroy(_mpio_cache); 1330 } 1331 1332 EXPORT_SYMBOL_GPL(dm_pg_init_complete); 1333 1334 module_init(dm_multipath_init); 1335 module_exit(dm_multipath_exit); 1336 1337 MODULE_DESCRIPTION(DM_NAME " multipath target"); 1338 MODULE_AUTHOR("Sistina Software <dm-devel@redhat.com>"); 1339 MODULE_LICENSE("GPL"); 1340