1 /* 2 * Copyright (C) 2011 Red Hat, Inc. 3 * 4 * This file is released under the GPL. 5 */ 6 7 #include "dm-space-map.h" 8 #include "dm-space-map-common.h" 9 #include "dm-space-map-metadata.h" 10 11 #include <linux/list.h> 12 #include <linux/slab.h> 13 #include <linux/device-mapper.h> 14 15 #define DM_MSG_PREFIX "space map metadata" 16 17 /*----------------------------------------------------------------*/ 18 19 /* 20 * An edge triggered threshold. 21 */ 22 struct threshold { 23 bool threshold_set; 24 bool value_set; 25 dm_block_t threshold; 26 dm_block_t current_value; 27 dm_sm_threshold_fn fn; 28 void *context; 29 }; 30 31 static void threshold_init(struct threshold *t) 32 { 33 t->threshold_set = false; 34 t->value_set = false; 35 } 36 37 static void set_threshold(struct threshold *t, dm_block_t value, 38 dm_sm_threshold_fn fn, void *context) 39 { 40 t->threshold_set = true; 41 t->threshold = value; 42 t->fn = fn; 43 t->context = context; 44 } 45 46 static bool below_threshold(struct threshold *t, dm_block_t value) 47 { 48 return t->threshold_set && value <= t->threshold; 49 } 50 51 static bool threshold_already_triggered(struct threshold *t) 52 { 53 return t->value_set && below_threshold(t, t->current_value); 54 } 55 56 static void check_threshold(struct threshold *t, dm_block_t value) 57 { 58 if (below_threshold(t, value) && 59 !threshold_already_triggered(t)) 60 t->fn(t->context); 61 62 t->value_set = true; 63 t->current_value = value; 64 } 65 66 /*----------------------------------------------------------------*/ 67 68 /* 69 * Space map interface. 70 * 71 * The low level disk format is written using the standard btree and 72 * transaction manager. This means that performing disk operations may 73 * cause us to recurse into the space map in order to allocate new blocks. 74 * For this reason we have a pool of pre-allocated blocks large enough to 75 * service any metadata_ll_disk operation. 76 */ 77 78 /* 79 * FIXME: we should calculate this based on the size of the device. 80 * Only the metadata space map needs this functionality. 81 */ 82 #define MAX_RECURSIVE_ALLOCATIONS 1024 83 84 enum block_op_type { 85 BOP_INC, 86 BOP_DEC 87 }; 88 89 struct block_op { 90 enum block_op_type type; 91 dm_block_t block; 92 }; 93 94 struct bop_ring_buffer { 95 unsigned begin; 96 unsigned end; 97 struct block_op bops[MAX_RECURSIVE_ALLOCATIONS + 1]; 98 }; 99 100 static void brb_init(struct bop_ring_buffer *brb) 101 { 102 brb->begin = 0; 103 brb->end = 0; 104 } 105 106 static bool brb_empty(struct bop_ring_buffer *brb) 107 { 108 return brb->begin == brb->end; 109 } 110 111 static unsigned brb_next(struct bop_ring_buffer *brb, unsigned old) 112 { 113 unsigned r = old + 1; 114 return (r >= (sizeof(brb->bops) / sizeof(*brb->bops))) ? 0 : r; 115 } 116 117 static int brb_push(struct bop_ring_buffer *brb, 118 enum block_op_type type, dm_block_t b) 119 { 120 struct block_op *bop; 121 unsigned next = brb_next(brb, brb->end); 122 123 /* 124 * We don't allow the last bop to be filled, this way we can 125 * differentiate between full and empty. 126 */ 127 if (next == brb->begin) 128 return -ENOMEM; 129 130 bop = brb->bops + brb->end; 131 bop->type = type; 132 bop->block = b; 133 134 brb->end = next; 135 136 return 0; 137 } 138 139 static int brb_pop(struct bop_ring_buffer *brb, struct block_op *result) 140 { 141 struct block_op *bop; 142 143 if (brb_empty(brb)) 144 return -ENODATA; 145 146 bop = brb->bops + brb->begin; 147 result->type = bop->type; 148 result->block = bop->block; 149 150 brb->begin = brb_next(brb, brb->begin); 151 152 return 0; 153 } 154 155 /*----------------------------------------------------------------*/ 156 157 struct sm_metadata { 158 struct dm_space_map sm; 159 160 struct ll_disk ll; 161 struct ll_disk old_ll; 162 163 dm_block_t begin; 164 165 unsigned recursion_count; 166 unsigned allocated_this_transaction; 167 struct bop_ring_buffer uncommitted; 168 169 struct threshold threshold; 170 }; 171 172 static int add_bop(struct sm_metadata *smm, enum block_op_type type, dm_block_t b) 173 { 174 int r = brb_push(&smm->uncommitted, type, b); 175 176 if (r) { 177 DMERR("too many recursive allocations"); 178 return -ENOMEM; 179 } 180 181 return 0; 182 } 183 184 static int commit_bop(struct sm_metadata *smm, struct block_op *op) 185 { 186 int r = 0; 187 enum allocation_event ev; 188 189 switch (op->type) { 190 case BOP_INC: 191 r = sm_ll_inc(&smm->ll, op->block, &ev); 192 break; 193 194 case BOP_DEC: 195 r = sm_ll_dec(&smm->ll, op->block, &ev); 196 break; 197 } 198 199 return r; 200 } 201 202 static void in(struct sm_metadata *smm) 203 { 204 smm->recursion_count++; 205 } 206 207 static int out(struct sm_metadata *smm) 208 { 209 int r = 0; 210 211 /* 212 * If we're not recursing then very bad things are happening. 213 */ 214 if (!smm->recursion_count) { 215 DMERR("lost track of recursion depth"); 216 return -ENOMEM; 217 } 218 219 if (smm->recursion_count == 1) { 220 while (!brb_empty(&smm->uncommitted)) { 221 struct block_op bop; 222 223 r = brb_pop(&smm->uncommitted, &bop); 224 if (r) { 225 DMERR("bug in bop ring buffer"); 226 break; 227 } 228 229 r = commit_bop(smm, &bop); 230 if (r) 231 break; 232 } 233 } 234 235 smm->recursion_count--; 236 237 return r; 238 } 239 240 /* 241 * When using the out() function above, we often want to combine an error 242 * code for the operation run in the recursive context with that from 243 * out(). 244 */ 245 static int combine_errors(int r1, int r2) 246 { 247 return r1 ? r1 : r2; 248 } 249 250 static int recursing(struct sm_metadata *smm) 251 { 252 return smm->recursion_count; 253 } 254 255 static void sm_metadata_destroy(struct dm_space_map *sm) 256 { 257 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 258 259 kfree(smm); 260 } 261 262 static int sm_metadata_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 263 { 264 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 265 266 *count = smm->ll.nr_blocks; 267 268 return 0; 269 } 270 271 static int sm_metadata_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 272 { 273 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 274 275 *count = smm->old_ll.nr_blocks - smm->old_ll.nr_allocated - 276 smm->allocated_this_transaction; 277 278 return 0; 279 } 280 281 static int sm_metadata_get_count(struct dm_space_map *sm, dm_block_t b, 282 uint32_t *result) 283 { 284 int r; 285 unsigned i; 286 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 287 unsigned adjustment = 0; 288 289 /* 290 * We may have some uncommitted adjustments to add. This list 291 * should always be really short. 292 */ 293 for (i = smm->uncommitted.begin; 294 i != smm->uncommitted.end; 295 i = brb_next(&smm->uncommitted, i)) { 296 struct block_op *op = smm->uncommitted.bops + i; 297 298 if (op->block != b) 299 continue; 300 301 switch (op->type) { 302 case BOP_INC: 303 adjustment++; 304 break; 305 306 case BOP_DEC: 307 adjustment--; 308 break; 309 } 310 } 311 312 r = sm_ll_lookup(&smm->ll, b, result); 313 if (r) 314 return r; 315 316 *result += adjustment; 317 318 return 0; 319 } 320 321 static int sm_metadata_count_is_more_than_one(struct dm_space_map *sm, 322 dm_block_t b, int *result) 323 { 324 int r, adjustment = 0; 325 unsigned i; 326 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 327 uint32_t rc; 328 329 /* 330 * We may have some uncommitted adjustments to add. This list 331 * should always be really short. 332 */ 333 for (i = smm->uncommitted.begin; 334 i != smm->uncommitted.end; 335 i = brb_next(&smm->uncommitted, i)) { 336 337 struct block_op *op = smm->uncommitted.bops + i; 338 339 if (op->block != b) 340 continue; 341 342 switch (op->type) { 343 case BOP_INC: 344 adjustment++; 345 break; 346 347 case BOP_DEC: 348 adjustment--; 349 break; 350 } 351 } 352 353 if (adjustment > 1) { 354 *result = 1; 355 return 0; 356 } 357 358 r = sm_ll_lookup_bitmap(&smm->ll, b, &rc); 359 if (r) 360 return r; 361 362 if (rc == 3) 363 /* 364 * We err on the side of caution, and always return true. 365 */ 366 *result = 1; 367 else 368 *result = rc + adjustment > 1; 369 370 return 0; 371 } 372 373 static int sm_metadata_set_count(struct dm_space_map *sm, dm_block_t b, 374 uint32_t count) 375 { 376 int r, r2; 377 enum allocation_event ev; 378 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 379 380 if (smm->recursion_count) { 381 DMERR("cannot recurse set_count()"); 382 return -EINVAL; 383 } 384 385 in(smm); 386 r = sm_ll_insert(&smm->ll, b, count, &ev); 387 r2 = out(smm); 388 389 return combine_errors(r, r2); 390 } 391 392 static int sm_metadata_inc_block(struct dm_space_map *sm, dm_block_t b) 393 { 394 int r, r2 = 0; 395 enum allocation_event ev; 396 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 397 398 if (recursing(smm)) 399 r = add_bop(smm, BOP_INC, b); 400 else { 401 in(smm); 402 r = sm_ll_inc(&smm->ll, b, &ev); 403 r2 = out(smm); 404 } 405 406 return combine_errors(r, r2); 407 } 408 409 static int sm_metadata_dec_block(struct dm_space_map *sm, dm_block_t b) 410 { 411 int r, r2 = 0; 412 enum allocation_event ev; 413 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 414 415 if (recursing(smm)) 416 r = add_bop(smm, BOP_DEC, b); 417 else { 418 in(smm); 419 r = sm_ll_dec(&smm->ll, b, &ev); 420 r2 = out(smm); 421 } 422 423 return combine_errors(r, r2); 424 } 425 426 static int sm_metadata_new_block_(struct dm_space_map *sm, dm_block_t *b) 427 { 428 int r, r2 = 0; 429 enum allocation_event ev; 430 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 431 432 r = sm_ll_find_free_block(&smm->old_ll, smm->begin, smm->old_ll.nr_blocks, b); 433 if (r) 434 return r; 435 436 smm->begin = *b + 1; 437 438 if (recursing(smm)) 439 r = add_bop(smm, BOP_INC, *b); 440 else { 441 in(smm); 442 r = sm_ll_inc(&smm->ll, *b, &ev); 443 r2 = out(smm); 444 } 445 446 if (!r) 447 smm->allocated_this_transaction++; 448 449 return combine_errors(r, r2); 450 } 451 452 static int sm_metadata_new_block(struct dm_space_map *sm, dm_block_t *b) 453 { 454 dm_block_t count; 455 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 456 457 int r = sm_metadata_new_block_(sm, b); 458 if (r) { 459 DMERR_LIMIT("unable to allocate new metadata block"); 460 return r; 461 } 462 463 r = sm_metadata_get_nr_free(sm, &count); 464 if (r) { 465 DMERR_LIMIT("couldn't get free block count"); 466 return r; 467 } 468 469 check_threshold(&smm->threshold, count); 470 471 return r; 472 } 473 474 static int sm_metadata_commit(struct dm_space_map *sm) 475 { 476 int r; 477 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 478 479 r = sm_ll_commit(&smm->ll); 480 if (r) 481 return r; 482 483 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 484 smm->begin = 0; 485 smm->allocated_this_transaction = 0; 486 487 return 0; 488 } 489 490 static int sm_metadata_register_threshold_callback(struct dm_space_map *sm, 491 dm_block_t threshold, 492 dm_sm_threshold_fn fn, 493 void *context) 494 { 495 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 496 497 set_threshold(&smm->threshold, threshold, fn, context); 498 499 return 0; 500 } 501 502 static int sm_metadata_root_size(struct dm_space_map *sm, size_t *result) 503 { 504 *result = sizeof(struct disk_sm_root); 505 506 return 0; 507 } 508 509 static int sm_metadata_copy_root(struct dm_space_map *sm, void *where_le, size_t max) 510 { 511 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 512 struct disk_sm_root root_le; 513 514 root_le.nr_blocks = cpu_to_le64(smm->ll.nr_blocks); 515 root_le.nr_allocated = cpu_to_le64(smm->ll.nr_allocated); 516 root_le.bitmap_root = cpu_to_le64(smm->ll.bitmap_root); 517 root_le.ref_count_root = cpu_to_le64(smm->ll.ref_count_root); 518 519 if (max < sizeof(root_le)) 520 return -ENOSPC; 521 522 memcpy(where_le, &root_le, sizeof(root_le)); 523 524 return 0; 525 } 526 527 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks); 528 529 static struct dm_space_map ops = { 530 .destroy = sm_metadata_destroy, 531 .extend = sm_metadata_extend, 532 .get_nr_blocks = sm_metadata_get_nr_blocks, 533 .get_nr_free = sm_metadata_get_nr_free, 534 .get_count = sm_metadata_get_count, 535 .count_is_more_than_one = sm_metadata_count_is_more_than_one, 536 .set_count = sm_metadata_set_count, 537 .inc_block = sm_metadata_inc_block, 538 .dec_block = sm_metadata_dec_block, 539 .new_block = sm_metadata_new_block, 540 .commit = sm_metadata_commit, 541 .root_size = sm_metadata_root_size, 542 .copy_root = sm_metadata_copy_root, 543 .register_threshold_callback = sm_metadata_register_threshold_callback 544 }; 545 546 /*----------------------------------------------------------------*/ 547 548 /* 549 * When a new space map is created that manages its own space. We use 550 * this tiny bootstrap allocator. 551 */ 552 static void sm_bootstrap_destroy(struct dm_space_map *sm) 553 { 554 } 555 556 static int sm_bootstrap_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 557 { 558 DMERR("bootstrap doesn't support extend"); 559 560 return -EINVAL; 561 } 562 563 static int sm_bootstrap_get_nr_blocks(struct dm_space_map *sm, dm_block_t *count) 564 { 565 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 566 567 *count = smm->ll.nr_blocks; 568 569 return 0; 570 } 571 572 static int sm_bootstrap_get_nr_free(struct dm_space_map *sm, dm_block_t *count) 573 { 574 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 575 576 *count = smm->ll.nr_blocks - smm->begin; 577 578 return 0; 579 } 580 581 static int sm_bootstrap_get_count(struct dm_space_map *sm, dm_block_t b, 582 uint32_t *result) 583 { 584 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 585 586 *result = (b < smm->begin) ? 1 : 0; 587 588 return 0; 589 } 590 591 static int sm_bootstrap_count_is_more_than_one(struct dm_space_map *sm, 592 dm_block_t b, int *result) 593 { 594 *result = 0; 595 596 return 0; 597 } 598 599 static int sm_bootstrap_set_count(struct dm_space_map *sm, dm_block_t b, 600 uint32_t count) 601 { 602 DMERR("bootstrap doesn't support set_count"); 603 604 return -EINVAL; 605 } 606 607 static int sm_bootstrap_new_block(struct dm_space_map *sm, dm_block_t *b) 608 { 609 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 610 611 /* 612 * We know the entire device is unused. 613 */ 614 if (smm->begin == smm->ll.nr_blocks) 615 return -ENOSPC; 616 617 *b = smm->begin++; 618 619 return 0; 620 } 621 622 static int sm_bootstrap_inc_block(struct dm_space_map *sm, dm_block_t b) 623 { 624 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 625 626 return add_bop(smm, BOP_INC, b); 627 } 628 629 static int sm_bootstrap_dec_block(struct dm_space_map *sm, dm_block_t b) 630 { 631 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 632 633 return add_bop(smm, BOP_DEC, b); 634 } 635 636 static int sm_bootstrap_commit(struct dm_space_map *sm) 637 { 638 return 0; 639 } 640 641 static int sm_bootstrap_root_size(struct dm_space_map *sm, size_t *result) 642 { 643 DMERR("bootstrap doesn't support root_size"); 644 645 return -EINVAL; 646 } 647 648 static int sm_bootstrap_copy_root(struct dm_space_map *sm, void *where, 649 size_t max) 650 { 651 DMERR("bootstrap doesn't support copy_root"); 652 653 return -EINVAL; 654 } 655 656 static struct dm_space_map bootstrap_ops = { 657 .destroy = sm_bootstrap_destroy, 658 .extend = sm_bootstrap_extend, 659 .get_nr_blocks = sm_bootstrap_get_nr_blocks, 660 .get_nr_free = sm_bootstrap_get_nr_free, 661 .get_count = sm_bootstrap_get_count, 662 .count_is_more_than_one = sm_bootstrap_count_is_more_than_one, 663 .set_count = sm_bootstrap_set_count, 664 .inc_block = sm_bootstrap_inc_block, 665 .dec_block = sm_bootstrap_dec_block, 666 .new_block = sm_bootstrap_new_block, 667 .commit = sm_bootstrap_commit, 668 .root_size = sm_bootstrap_root_size, 669 .copy_root = sm_bootstrap_copy_root, 670 .register_threshold_callback = NULL 671 }; 672 673 /*----------------------------------------------------------------*/ 674 675 static int sm_metadata_extend(struct dm_space_map *sm, dm_block_t extra_blocks) 676 { 677 int r, i; 678 enum allocation_event ev; 679 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 680 dm_block_t old_len = smm->ll.nr_blocks; 681 682 /* 683 * Flick into a mode where all blocks get allocated in the new area. 684 */ 685 smm->begin = old_len; 686 memcpy(sm, &bootstrap_ops, sizeof(*sm)); 687 688 /* 689 * Extend. 690 */ 691 r = sm_ll_extend(&smm->ll, extra_blocks); 692 if (r) 693 goto out; 694 695 /* 696 * We repeatedly increment then commit until the commit doesn't 697 * allocate any new blocks. 698 */ 699 do { 700 for (i = old_len; !r && i < smm->begin; i++) { 701 r = sm_ll_inc(&smm->ll, i, &ev); 702 if (r) 703 goto out; 704 } 705 old_len = smm->begin; 706 707 r = sm_ll_commit(&smm->ll); 708 if (r) 709 goto out; 710 711 } while (old_len != smm->begin); 712 713 out: 714 /* 715 * Switch back to normal behaviour. 716 */ 717 memcpy(sm, &ops, sizeof(*sm)); 718 return r; 719 } 720 721 /*----------------------------------------------------------------*/ 722 723 struct dm_space_map *dm_sm_metadata_init(void) 724 { 725 struct sm_metadata *smm; 726 727 smm = kmalloc(sizeof(*smm), GFP_KERNEL); 728 if (!smm) 729 return ERR_PTR(-ENOMEM); 730 731 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 732 733 return &smm->sm; 734 } 735 736 int dm_sm_metadata_create(struct dm_space_map *sm, 737 struct dm_transaction_manager *tm, 738 dm_block_t nr_blocks, 739 dm_block_t superblock) 740 { 741 int r; 742 dm_block_t i; 743 enum allocation_event ev; 744 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 745 746 smm->begin = superblock + 1; 747 smm->recursion_count = 0; 748 smm->allocated_this_transaction = 0; 749 brb_init(&smm->uncommitted); 750 threshold_init(&smm->threshold); 751 752 memcpy(&smm->sm, &bootstrap_ops, sizeof(smm->sm)); 753 754 r = sm_ll_new_metadata(&smm->ll, tm); 755 if (r) 756 return r; 757 758 if (nr_blocks > DM_SM_METADATA_MAX_BLOCKS) 759 nr_blocks = DM_SM_METADATA_MAX_BLOCKS; 760 r = sm_ll_extend(&smm->ll, nr_blocks); 761 if (r) 762 return r; 763 764 memcpy(&smm->sm, &ops, sizeof(smm->sm)); 765 766 /* 767 * Now we need to update the newly created data structures with the 768 * allocated blocks that they were built from. 769 */ 770 for (i = superblock; !r && i < smm->begin; i++) 771 r = sm_ll_inc(&smm->ll, i, &ev); 772 773 if (r) 774 return r; 775 776 return sm_metadata_commit(sm); 777 } 778 779 int dm_sm_metadata_open(struct dm_space_map *sm, 780 struct dm_transaction_manager *tm, 781 void *root_le, size_t len) 782 { 783 int r; 784 struct sm_metadata *smm = container_of(sm, struct sm_metadata, sm); 785 786 r = sm_ll_open_metadata(&smm->ll, tm, root_le, len); 787 if (r) 788 return r; 789 790 smm->begin = 0; 791 smm->recursion_count = 0; 792 smm->allocated_this_transaction = 0; 793 brb_init(&smm->uncommitted); 794 threshold_init(&smm->threshold); 795 796 memcpy(&smm->old_ll, &smm->ll, sizeof(smm->old_ll)); 797 return 0; 798 } 799