1 /* 2 * Copyright (C) 2001 Sistina Software (UK) Limited. 3 * Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved. 4 * 5 * This file is released under the GPL. 6 */ 7 8 #include "dm.h" 9 10 #include <linux/module.h> 11 #include <linux/vmalloc.h> 12 #include <linux/blkdev.h> 13 #include <linux/namei.h> 14 #include <linux/ctype.h> 15 #include <linux/slab.h> 16 #include <linux/interrupt.h> 17 #include <linux/mutex.h> 18 #include <linux/delay.h> 19 #include <asm/atomic.h> 20 21 #define DM_MSG_PREFIX "table" 22 23 #define MAX_DEPTH 16 24 #define NODE_SIZE L1_CACHE_BYTES 25 #define KEYS_PER_NODE (NODE_SIZE / sizeof(sector_t)) 26 #define CHILDREN_PER_NODE (KEYS_PER_NODE + 1) 27 28 /* 29 * The table has always exactly one reference from either mapped_device->map 30 * or hash_cell->new_map. This reference is not counted in table->holders. 31 * A pair of dm_create_table/dm_destroy_table functions is used for table 32 * creation/destruction. 33 * 34 * Temporary references from the other code increase table->holders. A pair 35 * of dm_table_get/dm_table_put functions is used to manipulate it. 36 * 37 * When the table is about to be destroyed, we wait for table->holders to 38 * drop to zero. 39 */ 40 41 struct dm_table { 42 struct mapped_device *md; 43 atomic_t holders; 44 45 /* btree table */ 46 unsigned int depth; 47 unsigned int counts[MAX_DEPTH]; /* in nodes */ 48 sector_t *index[MAX_DEPTH]; 49 50 unsigned int num_targets; 51 unsigned int num_allocated; 52 sector_t *highs; 53 struct dm_target *targets; 54 55 /* 56 * Indicates the rw permissions for the new logical 57 * device. This should be a combination of FMODE_READ 58 * and FMODE_WRITE. 59 */ 60 fmode_t mode; 61 62 /* a list of devices used by this table */ 63 struct list_head devices; 64 65 /* 66 * These are optimistic limits taken from all the 67 * targets, some targets will need smaller limits. 68 */ 69 struct io_restrictions limits; 70 71 /* events get handed up using this callback */ 72 void (*event_fn)(void *); 73 void *event_context; 74 }; 75 76 /* 77 * Similar to ceiling(log_size(n)) 78 */ 79 static unsigned int int_log(unsigned int n, unsigned int base) 80 { 81 int result = 0; 82 83 while (n > 1) { 84 n = dm_div_up(n, base); 85 result++; 86 } 87 88 return result; 89 } 90 91 /* 92 * Returns the minimum that is _not_ zero, unless both are zero. 93 */ 94 #define min_not_zero(l, r) (l == 0) ? r : ((r == 0) ? l : min(l, r)) 95 96 /* 97 * Combine two io_restrictions, always taking the lower value. 98 */ 99 static void combine_restrictions_low(struct io_restrictions *lhs, 100 struct io_restrictions *rhs) 101 { 102 lhs->max_sectors = 103 min_not_zero(lhs->max_sectors, rhs->max_sectors); 104 105 lhs->max_phys_segments = 106 min_not_zero(lhs->max_phys_segments, rhs->max_phys_segments); 107 108 lhs->max_hw_segments = 109 min_not_zero(lhs->max_hw_segments, rhs->max_hw_segments); 110 111 lhs->logical_block_size = max(lhs->logical_block_size, 112 rhs->logical_block_size); 113 114 lhs->max_segment_size = 115 min_not_zero(lhs->max_segment_size, rhs->max_segment_size); 116 117 lhs->max_hw_sectors = 118 min_not_zero(lhs->max_hw_sectors, rhs->max_hw_sectors); 119 120 lhs->seg_boundary_mask = 121 min_not_zero(lhs->seg_boundary_mask, rhs->seg_boundary_mask); 122 123 lhs->bounce_pfn = min_not_zero(lhs->bounce_pfn, rhs->bounce_pfn); 124 125 lhs->no_cluster |= rhs->no_cluster; 126 } 127 128 /* 129 * Calculate the index of the child node of the n'th node k'th key. 130 */ 131 static inline unsigned int get_child(unsigned int n, unsigned int k) 132 { 133 return (n * CHILDREN_PER_NODE) + k; 134 } 135 136 /* 137 * Return the n'th node of level l from table t. 138 */ 139 static inline sector_t *get_node(struct dm_table *t, 140 unsigned int l, unsigned int n) 141 { 142 return t->index[l] + (n * KEYS_PER_NODE); 143 } 144 145 /* 146 * Return the highest key that you could lookup from the n'th 147 * node on level l of the btree. 148 */ 149 static sector_t high(struct dm_table *t, unsigned int l, unsigned int n) 150 { 151 for (; l < t->depth - 1; l++) 152 n = get_child(n, CHILDREN_PER_NODE - 1); 153 154 if (n >= t->counts[l]) 155 return (sector_t) - 1; 156 157 return get_node(t, l, n)[KEYS_PER_NODE - 1]; 158 } 159 160 /* 161 * Fills in a level of the btree based on the highs of the level 162 * below it. 163 */ 164 static int setup_btree_index(unsigned int l, struct dm_table *t) 165 { 166 unsigned int n, k; 167 sector_t *node; 168 169 for (n = 0U; n < t->counts[l]; n++) { 170 node = get_node(t, l, n); 171 172 for (k = 0U; k < KEYS_PER_NODE; k++) 173 node[k] = high(t, l + 1, get_child(n, k)); 174 } 175 176 return 0; 177 } 178 179 void *dm_vcalloc(unsigned long nmemb, unsigned long elem_size) 180 { 181 unsigned long size; 182 void *addr; 183 184 /* 185 * Check that we're not going to overflow. 186 */ 187 if (nmemb > (ULONG_MAX / elem_size)) 188 return NULL; 189 190 size = nmemb * elem_size; 191 addr = vmalloc(size); 192 if (addr) 193 memset(addr, 0, size); 194 195 return addr; 196 } 197 198 /* 199 * highs, and targets are managed as dynamic arrays during a 200 * table load. 201 */ 202 static int alloc_targets(struct dm_table *t, unsigned int num) 203 { 204 sector_t *n_highs; 205 struct dm_target *n_targets; 206 int n = t->num_targets; 207 208 /* 209 * Allocate both the target array and offset array at once. 210 * Append an empty entry to catch sectors beyond the end of 211 * the device. 212 */ 213 n_highs = (sector_t *) dm_vcalloc(num + 1, sizeof(struct dm_target) + 214 sizeof(sector_t)); 215 if (!n_highs) 216 return -ENOMEM; 217 218 n_targets = (struct dm_target *) (n_highs + num); 219 220 if (n) { 221 memcpy(n_highs, t->highs, sizeof(*n_highs) * n); 222 memcpy(n_targets, t->targets, sizeof(*n_targets) * n); 223 } 224 225 memset(n_highs + n, -1, sizeof(*n_highs) * (num - n)); 226 vfree(t->highs); 227 228 t->num_allocated = num; 229 t->highs = n_highs; 230 t->targets = n_targets; 231 232 return 0; 233 } 234 235 int dm_table_create(struct dm_table **result, fmode_t mode, 236 unsigned num_targets, struct mapped_device *md) 237 { 238 struct dm_table *t = kzalloc(sizeof(*t), GFP_KERNEL); 239 240 if (!t) 241 return -ENOMEM; 242 243 INIT_LIST_HEAD(&t->devices); 244 atomic_set(&t->holders, 0); 245 246 if (!num_targets) 247 num_targets = KEYS_PER_NODE; 248 249 num_targets = dm_round_up(num_targets, KEYS_PER_NODE); 250 251 if (alloc_targets(t, num_targets)) { 252 kfree(t); 253 t = NULL; 254 return -ENOMEM; 255 } 256 257 t->mode = mode; 258 t->md = md; 259 *result = t; 260 return 0; 261 } 262 263 static void free_devices(struct list_head *devices) 264 { 265 struct list_head *tmp, *next; 266 267 list_for_each_safe(tmp, next, devices) { 268 struct dm_dev_internal *dd = 269 list_entry(tmp, struct dm_dev_internal, list); 270 kfree(dd); 271 } 272 } 273 274 void dm_table_destroy(struct dm_table *t) 275 { 276 unsigned int i; 277 278 while (atomic_read(&t->holders)) 279 msleep(1); 280 smp_mb(); 281 282 /* free the indexes (see dm_table_complete) */ 283 if (t->depth >= 2) 284 vfree(t->index[t->depth - 2]); 285 286 /* free the targets */ 287 for (i = 0; i < t->num_targets; i++) { 288 struct dm_target *tgt = t->targets + i; 289 290 if (tgt->type->dtr) 291 tgt->type->dtr(tgt); 292 293 dm_put_target_type(tgt->type); 294 } 295 296 vfree(t->highs); 297 298 /* free the device list */ 299 if (t->devices.next != &t->devices) { 300 DMWARN("devices still present during destroy: " 301 "dm_table_remove_device calls missing"); 302 303 free_devices(&t->devices); 304 } 305 306 kfree(t); 307 } 308 309 void dm_table_get(struct dm_table *t) 310 { 311 atomic_inc(&t->holders); 312 } 313 314 void dm_table_put(struct dm_table *t) 315 { 316 if (!t) 317 return; 318 319 smp_mb__before_atomic_dec(); 320 atomic_dec(&t->holders); 321 } 322 323 /* 324 * Checks to see if we need to extend highs or targets. 325 */ 326 static inline int check_space(struct dm_table *t) 327 { 328 if (t->num_targets >= t->num_allocated) 329 return alloc_targets(t, t->num_allocated * 2); 330 331 return 0; 332 } 333 334 /* 335 * See if we've already got a device in the list. 336 */ 337 static struct dm_dev_internal *find_device(struct list_head *l, dev_t dev) 338 { 339 struct dm_dev_internal *dd; 340 341 list_for_each_entry (dd, l, list) 342 if (dd->dm_dev.bdev->bd_dev == dev) 343 return dd; 344 345 return NULL; 346 } 347 348 /* 349 * Open a device so we can use it as a map destination. 350 */ 351 static int open_dev(struct dm_dev_internal *d, dev_t dev, 352 struct mapped_device *md) 353 { 354 static char *_claim_ptr = "I belong to device-mapper"; 355 struct block_device *bdev; 356 357 int r; 358 359 BUG_ON(d->dm_dev.bdev); 360 361 bdev = open_by_devnum(dev, d->dm_dev.mode); 362 if (IS_ERR(bdev)) 363 return PTR_ERR(bdev); 364 r = bd_claim_by_disk(bdev, _claim_ptr, dm_disk(md)); 365 if (r) 366 blkdev_put(bdev, d->dm_dev.mode); 367 else 368 d->dm_dev.bdev = bdev; 369 return r; 370 } 371 372 /* 373 * Close a device that we've been using. 374 */ 375 static void close_dev(struct dm_dev_internal *d, struct mapped_device *md) 376 { 377 if (!d->dm_dev.bdev) 378 return; 379 380 bd_release_from_disk(d->dm_dev.bdev, dm_disk(md)); 381 blkdev_put(d->dm_dev.bdev, d->dm_dev.mode); 382 d->dm_dev.bdev = NULL; 383 } 384 385 /* 386 * If possible, this checks an area of a destination device is valid. 387 */ 388 static int check_device_area(struct dm_dev_internal *dd, sector_t start, 389 sector_t len) 390 { 391 sector_t dev_size = dd->dm_dev.bdev->bd_inode->i_size >> SECTOR_SHIFT; 392 393 if (!dev_size) 394 return 1; 395 396 return ((start < dev_size) && (len <= (dev_size - start))); 397 } 398 399 /* 400 * This upgrades the mode on an already open dm_dev, being 401 * careful to leave things as they were if we fail to reopen the 402 * device and not to touch the existing bdev field in case 403 * it is accessed concurrently inside dm_table_any_congested(). 404 */ 405 static int upgrade_mode(struct dm_dev_internal *dd, fmode_t new_mode, 406 struct mapped_device *md) 407 { 408 int r; 409 struct dm_dev_internal dd_new, dd_old; 410 411 dd_new = dd_old = *dd; 412 413 dd_new.dm_dev.mode |= new_mode; 414 dd_new.dm_dev.bdev = NULL; 415 416 r = open_dev(&dd_new, dd->dm_dev.bdev->bd_dev, md); 417 if (r) 418 return r; 419 420 dd->dm_dev.mode |= new_mode; 421 close_dev(&dd_old, md); 422 423 return 0; 424 } 425 426 /* 427 * Add a device to the list, or just increment the usage count if 428 * it's already present. 429 */ 430 static int __table_get_device(struct dm_table *t, struct dm_target *ti, 431 const char *path, sector_t start, sector_t len, 432 fmode_t mode, struct dm_dev **result) 433 { 434 int r; 435 dev_t uninitialized_var(dev); 436 struct dm_dev_internal *dd; 437 unsigned int major, minor; 438 439 BUG_ON(!t); 440 441 if (sscanf(path, "%u:%u", &major, &minor) == 2) { 442 /* Extract the major/minor numbers */ 443 dev = MKDEV(major, minor); 444 if (MAJOR(dev) != major || MINOR(dev) != minor) 445 return -EOVERFLOW; 446 } else { 447 /* convert the path to a device */ 448 struct block_device *bdev = lookup_bdev(path); 449 450 if (IS_ERR(bdev)) 451 return PTR_ERR(bdev); 452 dev = bdev->bd_dev; 453 bdput(bdev); 454 } 455 456 dd = find_device(&t->devices, dev); 457 if (!dd) { 458 dd = kmalloc(sizeof(*dd), GFP_KERNEL); 459 if (!dd) 460 return -ENOMEM; 461 462 dd->dm_dev.mode = mode; 463 dd->dm_dev.bdev = NULL; 464 465 if ((r = open_dev(dd, dev, t->md))) { 466 kfree(dd); 467 return r; 468 } 469 470 format_dev_t(dd->dm_dev.name, dev); 471 472 atomic_set(&dd->count, 0); 473 list_add(&dd->list, &t->devices); 474 475 } else if (dd->dm_dev.mode != (mode | dd->dm_dev.mode)) { 476 r = upgrade_mode(dd, mode, t->md); 477 if (r) 478 return r; 479 } 480 atomic_inc(&dd->count); 481 482 if (!check_device_area(dd, start, len)) { 483 DMWARN("device %s too small for target", path); 484 dm_put_device(ti, &dd->dm_dev); 485 return -EINVAL; 486 } 487 488 *result = &dd->dm_dev; 489 490 return 0; 491 } 492 493 void dm_set_device_limits(struct dm_target *ti, struct block_device *bdev) 494 { 495 struct request_queue *q = bdev_get_queue(bdev); 496 struct io_restrictions *rs = &ti->limits; 497 char b[BDEVNAME_SIZE]; 498 499 if (unlikely(!q)) { 500 DMWARN("%s: Cannot set limits for nonexistent device %s", 501 dm_device_name(ti->table->md), bdevname(bdev, b)); 502 return; 503 } 504 505 /* 506 * Combine the device limits low. 507 * 508 * FIXME: if we move an io_restriction struct 509 * into q this would just be a call to 510 * combine_restrictions_low() 511 */ 512 rs->max_sectors = 513 min_not_zero(rs->max_sectors, queue_max_sectors(q)); 514 515 /* 516 * Check if merge fn is supported. 517 * If not we'll force DM to use PAGE_SIZE or 518 * smaller I/O, just to be safe. 519 */ 520 521 if (q->merge_bvec_fn && !ti->type->merge) 522 rs->max_sectors = 523 min_not_zero(rs->max_sectors, 524 (unsigned int) (PAGE_SIZE >> 9)); 525 526 rs->max_phys_segments = 527 min_not_zero(rs->max_phys_segments, 528 queue_max_phys_segments(q)); 529 530 rs->max_hw_segments = 531 min_not_zero(rs->max_hw_segments, queue_max_hw_segments(q)); 532 533 rs->logical_block_size = max(rs->logical_block_size, 534 queue_logical_block_size(q)); 535 536 rs->max_segment_size = 537 min_not_zero(rs->max_segment_size, queue_max_segment_size(q)); 538 539 rs->max_hw_sectors = 540 min_not_zero(rs->max_hw_sectors, queue_max_hw_sectors(q)); 541 542 rs->seg_boundary_mask = 543 min_not_zero(rs->seg_boundary_mask, 544 queue_segment_boundary(q)); 545 546 rs->bounce_pfn = min_not_zero(rs->bounce_pfn, queue_bounce_pfn(q)); 547 548 rs->no_cluster |= !test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); 549 } 550 EXPORT_SYMBOL_GPL(dm_set_device_limits); 551 552 int dm_get_device(struct dm_target *ti, const char *path, sector_t start, 553 sector_t len, fmode_t mode, struct dm_dev **result) 554 { 555 int r = __table_get_device(ti->table, ti, path, 556 start, len, mode, result); 557 558 if (!r) 559 dm_set_device_limits(ti, (*result)->bdev); 560 561 return r; 562 } 563 564 /* 565 * Decrement a devices use count and remove it if necessary. 566 */ 567 void dm_put_device(struct dm_target *ti, struct dm_dev *d) 568 { 569 struct dm_dev_internal *dd = container_of(d, struct dm_dev_internal, 570 dm_dev); 571 572 if (atomic_dec_and_test(&dd->count)) { 573 close_dev(dd, ti->table->md); 574 list_del(&dd->list); 575 kfree(dd); 576 } 577 } 578 579 /* 580 * Checks to see if the target joins onto the end of the table. 581 */ 582 static int adjoin(struct dm_table *table, struct dm_target *ti) 583 { 584 struct dm_target *prev; 585 586 if (!table->num_targets) 587 return !ti->begin; 588 589 prev = &table->targets[table->num_targets - 1]; 590 return (ti->begin == (prev->begin + prev->len)); 591 } 592 593 /* 594 * Used to dynamically allocate the arg array. 595 */ 596 static char **realloc_argv(unsigned *array_size, char **old_argv) 597 { 598 char **argv; 599 unsigned new_size; 600 601 new_size = *array_size ? *array_size * 2 : 64; 602 argv = kmalloc(new_size * sizeof(*argv), GFP_KERNEL); 603 if (argv) { 604 memcpy(argv, old_argv, *array_size * sizeof(*argv)); 605 *array_size = new_size; 606 } 607 608 kfree(old_argv); 609 return argv; 610 } 611 612 /* 613 * Destructively splits up the argument list to pass to ctr. 614 */ 615 int dm_split_args(int *argc, char ***argvp, char *input) 616 { 617 char *start, *end = input, *out, **argv = NULL; 618 unsigned array_size = 0; 619 620 *argc = 0; 621 622 if (!input) { 623 *argvp = NULL; 624 return 0; 625 } 626 627 argv = realloc_argv(&array_size, argv); 628 if (!argv) 629 return -ENOMEM; 630 631 while (1) { 632 start = end; 633 634 /* Skip whitespace */ 635 while (*start && isspace(*start)) 636 start++; 637 638 if (!*start) 639 break; /* success, we hit the end */ 640 641 /* 'out' is used to remove any back-quotes */ 642 end = out = start; 643 while (*end) { 644 /* Everything apart from '\0' can be quoted */ 645 if (*end == '\\' && *(end + 1)) { 646 *out++ = *(end + 1); 647 end += 2; 648 continue; 649 } 650 651 if (isspace(*end)) 652 break; /* end of token */ 653 654 *out++ = *end++; 655 } 656 657 /* have we already filled the array ? */ 658 if ((*argc + 1) > array_size) { 659 argv = realloc_argv(&array_size, argv); 660 if (!argv) 661 return -ENOMEM; 662 } 663 664 /* we know this is whitespace */ 665 if (*end) 666 end++; 667 668 /* terminate the string and put it in the array */ 669 *out = '\0'; 670 argv[*argc] = start; 671 (*argc)++; 672 } 673 674 *argvp = argv; 675 return 0; 676 } 677 678 static void check_for_valid_limits(struct io_restrictions *rs) 679 { 680 if (!rs->max_sectors) 681 rs->max_sectors = SAFE_MAX_SECTORS; 682 if (!rs->max_hw_sectors) 683 rs->max_hw_sectors = SAFE_MAX_SECTORS; 684 if (!rs->max_phys_segments) 685 rs->max_phys_segments = MAX_PHYS_SEGMENTS; 686 if (!rs->max_hw_segments) 687 rs->max_hw_segments = MAX_HW_SEGMENTS; 688 if (!rs->logical_block_size) 689 rs->logical_block_size = 1 << SECTOR_SHIFT; 690 if (!rs->max_segment_size) 691 rs->max_segment_size = MAX_SEGMENT_SIZE; 692 if (!rs->seg_boundary_mask) 693 rs->seg_boundary_mask = BLK_SEG_BOUNDARY_MASK; 694 if (!rs->bounce_pfn) 695 rs->bounce_pfn = -1; 696 } 697 698 int dm_table_add_target(struct dm_table *t, const char *type, 699 sector_t start, sector_t len, char *params) 700 { 701 int r = -EINVAL, argc; 702 char **argv; 703 struct dm_target *tgt; 704 705 if ((r = check_space(t))) 706 return r; 707 708 tgt = t->targets + t->num_targets; 709 memset(tgt, 0, sizeof(*tgt)); 710 711 if (!len) { 712 DMERR("%s: zero-length target", dm_device_name(t->md)); 713 return -EINVAL; 714 } 715 716 tgt->type = dm_get_target_type(type); 717 if (!tgt->type) { 718 DMERR("%s: %s: unknown target type", dm_device_name(t->md), 719 type); 720 return -EINVAL; 721 } 722 723 tgt->table = t; 724 tgt->begin = start; 725 tgt->len = len; 726 tgt->error = "Unknown error"; 727 728 /* 729 * Does this target adjoin the previous one ? 730 */ 731 if (!adjoin(t, tgt)) { 732 tgt->error = "Gap in table"; 733 r = -EINVAL; 734 goto bad; 735 } 736 737 r = dm_split_args(&argc, &argv, params); 738 if (r) { 739 tgt->error = "couldn't split parameters (insufficient memory)"; 740 goto bad; 741 } 742 743 r = tgt->type->ctr(tgt, argc, argv); 744 kfree(argv); 745 if (r) 746 goto bad; 747 748 t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; 749 750 /* FIXME: the plan is to combine high here and then have 751 * the merge fn apply the target level restrictions. */ 752 combine_restrictions_low(&t->limits, &tgt->limits); 753 return 0; 754 755 bad: 756 DMERR("%s: %s: %s", dm_device_name(t->md), type, tgt->error); 757 dm_put_target_type(tgt->type); 758 return r; 759 } 760 761 static int setup_indexes(struct dm_table *t) 762 { 763 int i; 764 unsigned int total = 0; 765 sector_t *indexes; 766 767 /* allocate the space for *all* the indexes */ 768 for (i = t->depth - 2; i >= 0; i--) { 769 t->counts[i] = dm_div_up(t->counts[i + 1], CHILDREN_PER_NODE); 770 total += t->counts[i]; 771 } 772 773 indexes = (sector_t *) dm_vcalloc(total, (unsigned long) NODE_SIZE); 774 if (!indexes) 775 return -ENOMEM; 776 777 /* set up internal nodes, bottom-up */ 778 for (i = t->depth - 2; i >= 0; i--) { 779 t->index[i] = indexes; 780 indexes += (KEYS_PER_NODE * t->counts[i]); 781 setup_btree_index(i, t); 782 } 783 784 return 0; 785 } 786 787 /* 788 * Builds the btree to index the map. 789 */ 790 int dm_table_complete(struct dm_table *t) 791 { 792 int r = 0; 793 unsigned int leaf_nodes; 794 795 check_for_valid_limits(&t->limits); 796 797 /* how many indexes will the btree have ? */ 798 leaf_nodes = dm_div_up(t->num_targets, KEYS_PER_NODE); 799 t->depth = 1 + int_log(leaf_nodes, CHILDREN_PER_NODE); 800 801 /* leaf layer has already been set up */ 802 t->counts[t->depth - 1] = leaf_nodes; 803 t->index[t->depth - 1] = t->highs; 804 805 if (t->depth >= 2) 806 r = setup_indexes(t); 807 808 return r; 809 } 810 811 static DEFINE_MUTEX(_event_lock); 812 void dm_table_event_callback(struct dm_table *t, 813 void (*fn)(void *), void *context) 814 { 815 mutex_lock(&_event_lock); 816 t->event_fn = fn; 817 t->event_context = context; 818 mutex_unlock(&_event_lock); 819 } 820 821 void dm_table_event(struct dm_table *t) 822 { 823 /* 824 * You can no longer call dm_table_event() from interrupt 825 * context, use a bottom half instead. 826 */ 827 BUG_ON(in_interrupt()); 828 829 mutex_lock(&_event_lock); 830 if (t->event_fn) 831 t->event_fn(t->event_context); 832 mutex_unlock(&_event_lock); 833 } 834 835 sector_t dm_table_get_size(struct dm_table *t) 836 { 837 return t->num_targets ? (t->highs[t->num_targets - 1] + 1) : 0; 838 } 839 840 struct dm_target *dm_table_get_target(struct dm_table *t, unsigned int index) 841 { 842 if (index >= t->num_targets) 843 return NULL; 844 845 return t->targets + index; 846 } 847 848 /* 849 * Search the btree for the correct target. 850 * 851 * Caller should check returned pointer with dm_target_is_valid() 852 * to trap I/O beyond end of device. 853 */ 854 struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector) 855 { 856 unsigned int l, n = 0, k = 0; 857 sector_t *node; 858 859 for (l = 0; l < t->depth; l++) { 860 n = get_child(n, k); 861 node = get_node(t, l, n); 862 863 for (k = 0; k < KEYS_PER_NODE; k++) 864 if (node[k] >= sector) 865 break; 866 } 867 868 return &t->targets[(KEYS_PER_NODE * n) + k]; 869 } 870 871 /* 872 * Set the integrity profile for this device if all devices used have 873 * matching profiles. 874 */ 875 static void dm_table_set_integrity(struct dm_table *t) 876 { 877 struct list_head *devices = dm_table_get_devices(t); 878 struct dm_dev_internal *prev = NULL, *dd = NULL; 879 880 if (!blk_get_integrity(dm_disk(t->md))) 881 return; 882 883 list_for_each_entry(dd, devices, list) { 884 if (prev && 885 blk_integrity_compare(prev->dm_dev.bdev->bd_disk, 886 dd->dm_dev.bdev->bd_disk) < 0) { 887 DMWARN("%s: integrity not set: %s and %s mismatch", 888 dm_device_name(t->md), 889 prev->dm_dev.bdev->bd_disk->disk_name, 890 dd->dm_dev.bdev->bd_disk->disk_name); 891 goto no_integrity; 892 } 893 prev = dd; 894 } 895 896 if (!prev || !bdev_get_integrity(prev->dm_dev.bdev)) 897 goto no_integrity; 898 899 blk_integrity_register(dm_disk(t->md), 900 bdev_get_integrity(prev->dm_dev.bdev)); 901 902 return; 903 904 no_integrity: 905 blk_integrity_register(dm_disk(t->md), NULL); 906 907 return; 908 } 909 910 void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q) 911 { 912 /* 913 * Make sure we obey the optimistic sub devices 914 * restrictions. 915 */ 916 blk_queue_max_sectors(q, t->limits.max_sectors); 917 blk_queue_max_phys_segments(q, t->limits.max_phys_segments); 918 blk_queue_max_hw_segments(q, t->limits.max_hw_segments); 919 blk_queue_logical_block_size(q, t->limits.logical_block_size); 920 blk_queue_max_segment_size(q, t->limits.max_segment_size); 921 blk_queue_max_hw_sectors(q, t->limits.max_hw_sectors); 922 blk_queue_segment_boundary(q, t->limits.seg_boundary_mask); 923 blk_queue_bounce_limit(q, t->limits.bounce_pfn); 924 925 if (t->limits.no_cluster) 926 queue_flag_clear_unlocked(QUEUE_FLAG_CLUSTER, q); 927 else 928 queue_flag_set_unlocked(QUEUE_FLAG_CLUSTER, q); 929 930 dm_table_set_integrity(t); 931 } 932 933 unsigned int dm_table_get_num_targets(struct dm_table *t) 934 { 935 return t->num_targets; 936 } 937 938 struct list_head *dm_table_get_devices(struct dm_table *t) 939 { 940 return &t->devices; 941 } 942 943 fmode_t dm_table_get_mode(struct dm_table *t) 944 { 945 return t->mode; 946 } 947 948 static void suspend_targets(struct dm_table *t, unsigned postsuspend) 949 { 950 int i = t->num_targets; 951 struct dm_target *ti = t->targets; 952 953 while (i--) { 954 if (postsuspend) { 955 if (ti->type->postsuspend) 956 ti->type->postsuspend(ti); 957 } else if (ti->type->presuspend) 958 ti->type->presuspend(ti); 959 960 ti++; 961 } 962 } 963 964 void dm_table_presuspend_targets(struct dm_table *t) 965 { 966 if (!t) 967 return; 968 969 suspend_targets(t, 0); 970 } 971 972 void dm_table_postsuspend_targets(struct dm_table *t) 973 { 974 if (!t) 975 return; 976 977 suspend_targets(t, 1); 978 } 979 980 int dm_table_resume_targets(struct dm_table *t) 981 { 982 int i, r = 0; 983 984 for (i = 0; i < t->num_targets; i++) { 985 struct dm_target *ti = t->targets + i; 986 987 if (!ti->type->preresume) 988 continue; 989 990 r = ti->type->preresume(ti); 991 if (r) 992 return r; 993 } 994 995 for (i = 0; i < t->num_targets; i++) { 996 struct dm_target *ti = t->targets + i; 997 998 if (ti->type->resume) 999 ti->type->resume(ti); 1000 } 1001 1002 return 0; 1003 } 1004 1005 int dm_table_any_congested(struct dm_table *t, int bdi_bits) 1006 { 1007 struct dm_dev_internal *dd; 1008 struct list_head *devices = dm_table_get_devices(t); 1009 int r = 0; 1010 1011 list_for_each_entry(dd, devices, list) { 1012 struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); 1013 char b[BDEVNAME_SIZE]; 1014 1015 if (likely(q)) 1016 r |= bdi_congested(&q->backing_dev_info, bdi_bits); 1017 else 1018 DMWARN_LIMIT("%s: any_congested: nonexistent device %s", 1019 dm_device_name(t->md), 1020 bdevname(dd->dm_dev.bdev, b)); 1021 } 1022 1023 return r; 1024 } 1025 1026 void dm_table_unplug_all(struct dm_table *t) 1027 { 1028 struct dm_dev_internal *dd; 1029 struct list_head *devices = dm_table_get_devices(t); 1030 1031 list_for_each_entry(dd, devices, list) { 1032 struct request_queue *q = bdev_get_queue(dd->dm_dev.bdev); 1033 char b[BDEVNAME_SIZE]; 1034 1035 if (likely(q)) 1036 blk_unplug(q); 1037 else 1038 DMWARN_LIMIT("%s: Cannot unplug nonexistent device %s", 1039 dm_device_name(t->md), 1040 bdevname(dd->dm_dev.bdev, b)); 1041 } 1042 } 1043 1044 struct mapped_device *dm_table_get_md(struct dm_table *t) 1045 { 1046 dm_get(t->md); 1047 1048 return t->md; 1049 } 1050 1051 EXPORT_SYMBOL(dm_vcalloc); 1052 EXPORT_SYMBOL(dm_get_device); 1053 EXPORT_SYMBOL(dm_put_device); 1054 EXPORT_SYMBOL(dm_table_event); 1055 EXPORT_SYMBOL(dm_table_get_size); 1056 EXPORT_SYMBOL(dm_table_get_mode); 1057 EXPORT_SYMBOL(dm_table_get_md); 1058 EXPORT_SYMBOL(dm_table_put); 1059 EXPORT_SYMBOL(dm_table_get); 1060 EXPORT_SYMBOL(dm_table_unplug_all); 1061