1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/math64.h> 22 #include "ctree.h" 23 #include "free-space-cache.h" 24 #include "transaction.h" 25 26 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 27 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 28 29 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 30 u64 offset) 31 { 32 BUG_ON(offset < bitmap_start); 33 offset -= bitmap_start; 34 return (unsigned long)(div64_u64(offset, sectorsize)); 35 } 36 37 static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) 38 { 39 return (unsigned long)(div64_u64(bytes, sectorsize)); 40 } 41 42 static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, 43 u64 offset) 44 { 45 u64 bitmap_start; 46 u64 bytes_per_bitmap; 47 48 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; 49 bitmap_start = offset - block_group->key.objectid; 50 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 51 bitmap_start *= bytes_per_bitmap; 52 bitmap_start += block_group->key.objectid; 53 54 return bitmap_start; 55 } 56 57 static int tree_insert_offset(struct rb_root *root, u64 offset, 58 struct rb_node *node, int bitmap) 59 { 60 struct rb_node **p = &root->rb_node; 61 struct rb_node *parent = NULL; 62 struct btrfs_free_space *info; 63 64 while (*p) { 65 parent = *p; 66 info = rb_entry(parent, struct btrfs_free_space, offset_index); 67 68 if (offset < info->offset) { 69 p = &(*p)->rb_left; 70 } else if (offset > info->offset) { 71 p = &(*p)->rb_right; 72 } else { 73 /* 74 * we could have a bitmap entry and an extent entry 75 * share the same offset. If this is the case, we want 76 * the extent entry to always be found first if we do a 77 * linear search through the tree, since we want to have 78 * the quickest allocation time, and allocating from an 79 * extent is faster than allocating from a bitmap. So 80 * if we're inserting a bitmap and we find an entry at 81 * this offset, we want to go right, or after this entry 82 * logically. If we are inserting an extent and we've 83 * found a bitmap, we want to go left, or before 84 * logically. 85 */ 86 if (bitmap) { 87 WARN_ON(info->bitmap); 88 p = &(*p)->rb_right; 89 } else { 90 WARN_ON(!info->bitmap); 91 p = &(*p)->rb_left; 92 } 93 } 94 } 95 96 rb_link_node(node, parent, p); 97 rb_insert_color(node, root); 98 99 return 0; 100 } 101 102 /* 103 * searches the tree for the given offset. 104 * 105 * fuzzy - If this is set, then we are trying to make an allocation, and we just 106 * want a section that has at least bytes size and comes at or after the given 107 * offset. 108 */ 109 static struct btrfs_free_space * 110 tree_search_offset(struct btrfs_block_group_cache *block_group, 111 u64 offset, int bitmap_only, int fuzzy) 112 { 113 struct rb_node *n = block_group->free_space_offset.rb_node; 114 struct btrfs_free_space *entry, *prev = NULL; 115 116 /* find entry that is closest to the 'offset' */ 117 while (1) { 118 if (!n) { 119 entry = NULL; 120 break; 121 } 122 123 entry = rb_entry(n, struct btrfs_free_space, offset_index); 124 prev = entry; 125 126 if (offset < entry->offset) 127 n = n->rb_left; 128 else if (offset > entry->offset) 129 n = n->rb_right; 130 else 131 break; 132 } 133 134 if (bitmap_only) { 135 if (!entry) 136 return NULL; 137 if (entry->bitmap) 138 return entry; 139 140 /* 141 * bitmap entry and extent entry may share same offset, 142 * in that case, bitmap entry comes after extent entry. 143 */ 144 n = rb_next(n); 145 if (!n) 146 return NULL; 147 entry = rb_entry(n, struct btrfs_free_space, offset_index); 148 if (entry->offset != offset) 149 return NULL; 150 151 WARN_ON(!entry->bitmap); 152 return entry; 153 } else if (entry) { 154 if (entry->bitmap) { 155 /* 156 * if previous extent entry covers the offset, 157 * we should return it instead of the bitmap entry 158 */ 159 n = &entry->offset_index; 160 while (1) { 161 n = rb_prev(n); 162 if (!n) 163 break; 164 prev = rb_entry(n, struct btrfs_free_space, 165 offset_index); 166 if (!prev->bitmap) { 167 if (prev->offset + prev->bytes > offset) 168 entry = prev; 169 break; 170 } 171 } 172 } 173 return entry; 174 } 175 176 if (!prev) 177 return NULL; 178 179 /* find last entry before the 'offset' */ 180 entry = prev; 181 if (entry->offset > offset) { 182 n = rb_prev(&entry->offset_index); 183 if (n) { 184 entry = rb_entry(n, struct btrfs_free_space, 185 offset_index); 186 BUG_ON(entry->offset > offset); 187 } else { 188 if (fuzzy) 189 return entry; 190 else 191 return NULL; 192 } 193 } 194 195 if (entry->bitmap) { 196 n = &entry->offset_index; 197 while (1) { 198 n = rb_prev(n); 199 if (!n) 200 break; 201 prev = rb_entry(n, struct btrfs_free_space, 202 offset_index); 203 if (!prev->bitmap) { 204 if (prev->offset + prev->bytes > offset) 205 return prev; 206 break; 207 } 208 } 209 if (entry->offset + BITS_PER_BITMAP * 210 block_group->sectorsize > offset) 211 return entry; 212 } else if (entry->offset + entry->bytes > offset) 213 return entry; 214 215 if (!fuzzy) 216 return NULL; 217 218 while (1) { 219 if (entry->bitmap) { 220 if (entry->offset + BITS_PER_BITMAP * 221 block_group->sectorsize > offset) 222 break; 223 } else { 224 if (entry->offset + entry->bytes > offset) 225 break; 226 } 227 228 n = rb_next(&entry->offset_index); 229 if (!n) 230 return NULL; 231 entry = rb_entry(n, struct btrfs_free_space, offset_index); 232 } 233 return entry; 234 } 235 236 static void unlink_free_space(struct btrfs_block_group_cache *block_group, 237 struct btrfs_free_space *info) 238 { 239 rb_erase(&info->offset_index, &block_group->free_space_offset); 240 block_group->free_extents--; 241 block_group->free_space -= info->bytes; 242 } 243 244 static int link_free_space(struct btrfs_block_group_cache *block_group, 245 struct btrfs_free_space *info) 246 { 247 int ret = 0; 248 249 BUG_ON(!info->bitmap && !info->bytes); 250 ret = tree_insert_offset(&block_group->free_space_offset, info->offset, 251 &info->offset_index, (info->bitmap != NULL)); 252 if (ret) 253 return ret; 254 255 block_group->free_space += info->bytes; 256 block_group->free_extents++; 257 return ret; 258 } 259 260 static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) 261 { 262 u64 max_bytes, possible_bytes; 263 264 /* 265 * The goal is to keep the total amount of memory used per 1gb of space 266 * at or below 32k, so we need to adjust how much memory we allow to be 267 * used by extent based free space tracking 268 */ 269 max_bytes = MAX_CACHE_BYTES_PER_GIG * 270 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); 271 272 possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) + 273 (sizeof(struct btrfs_free_space) * 274 block_group->extents_thresh); 275 276 if (possible_bytes > max_bytes) { 277 int extent_bytes = max_bytes - 278 (block_group->total_bitmaps * PAGE_CACHE_SIZE); 279 280 if (extent_bytes <= 0) { 281 block_group->extents_thresh = 0; 282 return; 283 } 284 285 block_group->extents_thresh = extent_bytes / 286 (sizeof(struct btrfs_free_space)); 287 } 288 } 289 290 static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, 291 struct btrfs_free_space *info, u64 offset, 292 u64 bytes) 293 { 294 unsigned long start, end; 295 unsigned long i; 296 297 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 298 end = start + bytes_to_bits(bytes, block_group->sectorsize); 299 BUG_ON(end > BITS_PER_BITMAP); 300 301 for (i = start; i < end; i++) 302 clear_bit(i, info->bitmap); 303 304 info->bytes -= bytes; 305 block_group->free_space -= bytes; 306 } 307 308 static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, 309 struct btrfs_free_space *info, u64 offset, 310 u64 bytes) 311 { 312 unsigned long start, end; 313 unsigned long i; 314 315 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 316 end = start + bytes_to_bits(bytes, block_group->sectorsize); 317 BUG_ON(end > BITS_PER_BITMAP); 318 319 for (i = start; i < end; i++) 320 set_bit(i, info->bitmap); 321 322 info->bytes += bytes; 323 block_group->free_space += bytes; 324 } 325 326 static int search_bitmap(struct btrfs_block_group_cache *block_group, 327 struct btrfs_free_space *bitmap_info, u64 *offset, 328 u64 *bytes) 329 { 330 unsigned long found_bits = 0; 331 unsigned long bits, i; 332 unsigned long next_zero; 333 334 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, 335 max_t(u64, *offset, bitmap_info->offset)); 336 bits = bytes_to_bits(*bytes, block_group->sectorsize); 337 338 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 339 i < BITS_PER_BITMAP; 340 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { 341 next_zero = find_next_zero_bit(bitmap_info->bitmap, 342 BITS_PER_BITMAP, i); 343 if ((next_zero - i) >= bits) { 344 found_bits = next_zero - i; 345 break; 346 } 347 i = next_zero; 348 } 349 350 if (found_bits) { 351 *offset = (u64)(i * block_group->sectorsize) + 352 bitmap_info->offset; 353 *bytes = (u64)(found_bits) * block_group->sectorsize; 354 return 0; 355 } 356 357 return -1; 358 } 359 360 static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache 361 *block_group, u64 *offset, 362 u64 *bytes, int debug) 363 { 364 struct btrfs_free_space *entry; 365 struct rb_node *node; 366 int ret; 367 368 if (!block_group->free_space_offset.rb_node) 369 return NULL; 370 371 entry = tree_search_offset(block_group, 372 offset_to_bitmap(block_group, *offset), 373 0, 1); 374 if (!entry) 375 return NULL; 376 377 for (node = &entry->offset_index; node; node = rb_next(node)) { 378 entry = rb_entry(node, struct btrfs_free_space, offset_index); 379 if (entry->bytes < *bytes) 380 continue; 381 382 if (entry->bitmap) { 383 ret = search_bitmap(block_group, entry, offset, bytes); 384 if (!ret) 385 return entry; 386 continue; 387 } 388 389 *offset = entry->offset; 390 *bytes = entry->bytes; 391 return entry; 392 } 393 394 return NULL; 395 } 396 397 static void add_new_bitmap(struct btrfs_block_group_cache *block_group, 398 struct btrfs_free_space *info, u64 offset) 399 { 400 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 401 int max_bitmaps = (int)div64_u64(block_group->key.offset + 402 bytes_per_bg - 1, bytes_per_bg); 403 BUG_ON(block_group->total_bitmaps >= max_bitmaps); 404 405 info->offset = offset_to_bitmap(block_group, offset); 406 link_free_space(block_group, info); 407 block_group->total_bitmaps++; 408 409 recalculate_thresholds(block_group); 410 } 411 412 static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 413 struct btrfs_free_space *bitmap_info, 414 u64 *offset, u64 *bytes) 415 { 416 u64 end; 417 418 again: 419 end = bitmap_info->offset + 420 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; 421 422 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 423 bitmap_clear_bits(block_group, bitmap_info, *offset, 424 end - *offset + 1); 425 *bytes -= end - *offset + 1; 426 *offset = end + 1; 427 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 428 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); 429 *bytes = 0; 430 } 431 432 if (*bytes) { 433 if (!bitmap_info->bytes) { 434 unlink_free_space(block_group, bitmap_info); 435 kfree(bitmap_info->bitmap); 436 kfree(bitmap_info); 437 block_group->total_bitmaps--; 438 recalculate_thresholds(block_group); 439 } 440 441 bitmap_info = tree_search_offset(block_group, 442 offset_to_bitmap(block_group, 443 *offset), 444 1, 0); 445 if (!bitmap_info) 446 return -EINVAL; 447 448 if (!bitmap_info->bitmap) 449 return -EAGAIN; 450 451 goto again; 452 } else if (!bitmap_info->bytes) { 453 unlink_free_space(block_group, bitmap_info); 454 kfree(bitmap_info->bitmap); 455 kfree(bitmap_info); 456 block_group->total_bitmaps--; 457 recalculate_thresholds(block_group); 458 } 459 460 return 0; 461 } 462 463 static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, 464 struct btrfs_free_space *info) 465 { 466 struct btrfs_free_space *bitmap_info; 467 int added = 0; 468 u64 bytes, offset, end; 469 int ret; 470 471 /* 472 * If we are below the extents threshold then we can add this as an 473 * extent, and don't have to deal with the bitmap 474 */ 475 if (block_group->free_extents < block_group->extents_thresh && 476 info->bytes > block_group->sectorsize * 4) 477 return 0; 478 479 /* 480 * some block groups are so tiny they can't be enveloped by a bitmap, so 481 * don't even bother to create a bitmap for this 482 */ 483 if (BITS_PER_BITMAP * block_group->sectorsize > 484 block_group->key.offset) 485 return 0; 486 487 bytes = info->bytes; 488 offset = info->offset; 489 490 again: 491 bitmap_info = tree_search_offset(block_group, 492 offset_to_bitmap(block_group, offset), 493 1, 0); 494 if (!bitmap_info) { 495 BUG_ON(added); 496 goto new_bitmap; 497 } 498 499 end = bitmap_info->offset + 500 (u64)(BITS_PER_BITMAP * block_group->sectorsize); 501 502 if (offset >= bitmap_info->offset && offset + bytes > end) { 503 bitmap_set_bits(block_group, bitmap_info, offset, 504 end - offset); 505 bytes -= end - offset; 506 offset = end; 507 added = 0; 508 } else if (offset >= bitmap_info->offset && offset + bytes <= end) { 509 bitmap_set_bits(block_group, bitmap_info, offset, bytes); 510 bytes = 0; 511 } else { 512 BUG(); 513 } 514 515 if (!bytes) { 516 ret = 1; 517 goto out; 518 } else 519 goto again; 520 521 new_bitmap: 522 if (info && info->bitmap) { 523 add_new_bitmap(block_group, info, offset); 524 added = 1; 525 info = NULL; 526 goto again; 527 } else { 528 spin_unlock(&block_group->tree_lock); 529 530 /* no pre-allocated info, allocate a new one */ 531 if (!info) { 532 info = kzalloc(sizeof(struct btrfs_free_space), 533 GFP_NOFS); 534 if (!info) { 535 spin_lock(&block_group->tree_lock); 536 ret = -ENOMEM; 537 goto out; 538 } 539 } 540 541 /* allocate the bitmap */ 542 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 543 spin_lock(&block_group->tree_lock); 544 if (!info->bitmap) { 545 ret = -ENOMEM; 546 goto out; 547 } 548 goto again; 549 } 550 551 out: 552 if (info) { 553 if (info->bitmap) 554 kfree(info->bitmap); 555 kfree(info); 556 } 557 558 return ret; 559 } 560 561 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 562 u64 offset, u64 bytes) 563 { 564 struct btrfs_free_space *right_info = NULL; 565 struct btrfs_free_space *left_info = NULL; 566 struct btrfs_free_space *info = NULL; 567 int ret = 0; 568 569 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); 570 if (!info) 571 return -ENOMEM; 572 573 info->offset = offset; 574 info->bytes = bytes; 575 576 spin_lock(&block_group->tree_lock); 577 578 /* 579 * first we want to see if there is free space adjacent to the range we 580 * are adding, if there is remove that struct and add a new one to 581 * cover the entire range 582 */ 583 right_info = tree_search_offset(block_group, offset + bytes, 0, 0); 584 if (right_info && rb_prev(&right_info->offset_index)) 585 left_info = rb_entry(rb_prev(&right_info->offset_index), 586 struct btrfs_free_space, offset_index); 587 else 588 left_info = tree_search_offset(block_group, offset - 1, 0, 0); 589 590 /* 591 * If there was no extent directly to the left or right of this new 592 * extent then we know we're going to have to allocate a new extent, so 593 * before we do that see if we need to drop this into a bitmap 594 */ 595 if ((!left_info || left_info->bitmap) && 596 (!right_info || right_info->bitmap)) { 597 ret = insert_into_bitmap(block_group, info); 598 599 if (ret < 0) { 600 goto out; 601 } else if (ret) { 602 ret = 0; 603 goto out; 604 } 605 } 606 607 if (right_info && !right_info->bitmap) { 608 unlink_free_space(block_group, right_info); 609 info->bytes += right_info->bytes; 610 kfree(right_info); 611 } 612 613 if (left_info && !left_info->bitmap && 614 left_info->offset + left_info->bytes == offset) { 615 unlink_free_space(block_group, left_info); 616 info->offset = left_info->offset; 617 info->bytes += left_info->bytes; 618 kfree(left_info); 619 } 620 621 ret = link_free_space(block_group, info); 622 if (ret) 623 kfree(info); 624 out: 625 spin_unlock(&block_group->tree_lock); 626 627 if (ret) { 628 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 629 BUG_ON(ret == -EEXIST); 630 } 631 632 return ret; 633 } 634 635 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 636 u64 offset, u64 bytes) 637 { 638 struct btrfs_free_space *info; 639 struct btrfs_free_space *next_info = NULL; 640 int ret = 0; 641 642 spin_lock(&block_group->tree_lock); 643 644 again: 645 info = tree_search_offset(block_group, offset, 0, 0); 646 if (!info) { 647 WARN_ON(1); 648 goto out_lock; 649 } 650 651 if (info->bytes < bytes && rb_next(&info->offset_index)) { 652 u64 end; 653 next_info = rb_entry(rb_next(&info->offset_index), 654 struct btrfs_free_space, 655 offset_index); 656 657 if (next_info->bitmap) 658 end = next_info->offset + BITS_PER_BITMAP * 659 block_group->sectorsize - 1; 660 else 661 end = next_info->offset + next_info->bytes; 662 663 if (next_info->bytes < bytes || 664 next_info->offset > offset || offset > end) { 665 printk(KERN_CRIT "Found free space at %llu, size %llu," 666 " trying to use %llu\n", 667 (unsigned long long)info->offset, 668 (unsigned long long)info->bytes, 669 (unsigned long long)bytes); 670 WARN_ON(1); 671 ret = -EINVAL; 672 goto out_lock; 673 } 674 675 info = next_info; 676 } 677 678 if (info->bytes == bytes) { 679 unlink_free_space(block_group, info); 680 if (info->bitmap) { 681 kfree(info->bitmap); 682 block_group->total_bitmaps--; 683 } 684 kfree(info); 685 goto out_lock; 686 } 687 688 if (!info->bitmap && info->offset == offset) { 689 unlink_free_space(block_group, info); 690 info->offset += bytes; 691 info->bytes -= bytes; 692 link_free_space(block_group, info); 693 goto out_lock; 694 } 695 696 if (!info->bitmap && info->offset <= offset && 697 info->offset + info->bytes >= offset + bytes) { 698 u64 old_start = info->offset; 699 /* 700 * we're freeing space in the middle of the info, 701 * this can happen during tree log replay 702 * 703 * first unlink the old info and then 704 * insert it again after the hole we're creating 705 */ 706 unlink_free_space(block_group, info); 707 if (offset + bytes < info->offset + info->bytes) { 708 u64 old_end = info->offset + info->bytes; 709 710 info->offset = offset + bytes; 711 info->bytes = old_end - info->offset; 712 ret = link_free_space(block_group, info); 713 WARN_ON(ret); 714 if (ret) 715 goto out_lock; 716 } else { 717 /* the hole we're creating ends at the end 718 * of the info struct, just free the info 719 */ 720 kfree(info); 721 } 722 spin_unlock(&block_group->tree_lock); 723 724 /* step two, insert a new info struct to cover 725 * anything before the hole 726 */ 727 ret = btrfs_add_free_space(block_group, old_start, 728 offset - old_start); 729 WARN_ON(ret); 730 goto out; 731 } 732 733 ret = remove_from_bitmap(block_group, info, &offset, &bytes); 734 if (ret == -EAGAIN) 735 goto again; 736 BUG_ON(ret); 737 out_lock: 738 spin_unlock(&block_group->tree_lock); 739 out: 740 return ret; 741 } 742 743 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 744 u64 bytes) 745 { 746 struct btrfs_free_space *info; 747 struct rb_node *n; 748 int count = 0; 749 750 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { 751 info = rb_entry(n, struct btrfs_free_space, offset_index); 752 if (info->bytes >= bytes) 753 count++; 754 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 755 (unsigned long long)info->offset, 756 (unsigned long long)info->bytes, 757 (info->bitmap) ? "yes" : "no"); 758 } 759 printk(KERN_INFO "block group has cluster?: %s\n", 760 list_empty(&block_group->cluster_list) ? "no" : "yes"); 761 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 762 "\n", count); 763 } 764 765 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) 766 { 767 struct btrfs_free_space *info; 768 struct rb_node *n; 769 u64 ret = 0; 770 771 for (n = rb_first(&block_group->free_space_offset); n; 772 n = rb_next(n)) { 773 info = rb_entry(n, struct btrfs_free_space, offset_index); 774 ret += info->bytes; 775 } 776 777 return ret; 778 } 779 780 /* 781 * for a given cluster, put all of its extents back into the free 782 * space cache. If the block group passed doesn't match the block group 783 * pointed to by the cluster, someone else raced in and freed the 784 * cluster already. In that case, we just return without changing anything 785 */ 786 static int 787 __btrfs_return_cluster_to_free_space( 788 struct btrfs_block_group_cache *block_group, 789 struct btrfs_free_cluster *cluster) 790 { 791 struct btrfs_free_space *entry; 792 struct rb_node *node; 793 bool bitmap; 794 795 spin_lock(&cluster->lock); 796 if (cluster->block_group != block_group) 797 goto out; 798 799 bitmap = cluster->points_to_bitmap; 800 cluster->block_group = NULL; 801 cluster->window_start = 0; 802 list_del_init(&cluster->block_group_list); 803 cluster->points_to_bitmap = false; 804 805 if (bitmap) 806 goto out; 807 808 node = rb_first(&cluster->root); 809 while (node) { 810 entry = rb_entry(node, struct btrfs_free_space, offset_index); 811 node = rb_next(&entry->offset_index); 812 rb_erase(&entry->offset_index, &cluster->root); 813 BUG_ON(entry->bitmap); 814 tree_insert_offset(&block_group->free_space_offset, 815 entry->offset, &entry->offset_index, 0); 816 } 817 cluster->root.rb_node = NULL; 818 819 out: 820 spin_unlock(&cluster->lock); 821 btrfs_put_block_group(block_group); 822 return 0; 823 } 824 825 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 826 { 827 struct btrfs_free_space *info; 828 struct rb_node *node; 829 struct btrfs_free_cluster *cluster; 830 struct list_head *head; 831 832 spin_lock(&block_group->tree_lock); 833 while ((head = block_group->cluster_list.next) != 834 &block_group->cluster_list) { 835 cluster = list_entry(head, struct btrfs_free_cluster, 836 block_group_list); 837 838 WARN_ON(cluster->block_group != block_group); 839 __btrfs_return_cluster_to_free_space(block_group, cluster); 840 if (need_resched()) { 841 spin_unlock(&block_group->tree_lock); 842 cond_resched(); 843 spin_lock(&block_group->tree_lock); 844 } 845 } 846 847 while ((node = rb_last(&block_group->free_space_offset)) != NULL) { 848 info = rb_entry(node, struct btrfs_free_space, offset_index); 849 unlink_free_space(block_group, info); 850 if (info->bitmap) 851 kfree(info->bitmap); 852 kfree(info); 853 if (need_resched()) { 854 spin_unlock(&block_group->tree_lock); 855 cond_resched(); 856 spin_lock(&block_group->tree_lock); 857 } 858 } 859 860 spin_unlock(&block_group->tree_lock); 861 } 862 863 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 864 u64 offset, u64 bytes, u64 empty_size) 865 { 866 struct btrfs_free_space *entry = NULL; 867 u64 bytes_search = bytes + empty_size; 868 u64 ret = 0; 869 870 spin_lock(&block_group->tree_lock); 871 entry = find_free_space(block_group, &offset, &bytes_search, 0); 872 if (!entry) 873 goto out; 874 875 ret = offset; 876 if (entry->bitmap) { 877 bitmap_clear_bits(block_group, entry, offset, bytes); 878 if (!entry->bytes) { 879 unlink_free_space(block_group, entry); 880 kfree(entry->bitmap); 881 kfree(entry); 882 block_group->total_bitmaps--; 883 recalculate_thresholds(block_group); 884 } 885 } else { 886 unlink_free_space(block_group, entry); 887 entry->offset += bytes; 888 entry->bytes -= bytes; 889 if (!entry->bytes) 890 kfree(entry); 891 else 892 link_free_space(block_group, entry); 893 } 894 895 out: 896 spin_unlock(&block_group->tree_lock); 897 898 return ret; 899 } 900 901 /* 902 * given a cluster, put all of its extents back into the free space 903 * cache. If a block group is passed, this function will only free 904 * a cluster that belongs to the passed block group. 905 * 906 * Otherwise, it'll get a reference on the block group pointed to by the 907 * cluster and remove the cluster from it. 908 */ 909 int btrfs_return_cluster_to_free_space( 910 struct btrfs_block_group_cache *block_group, 911 struct btrfs_free_cluster *cluster) 912 { 913 int ret; 914 915 /* first, get a safe pointer to the block group */ 916 spin_lock(&cluster->lock); 917 if (!block_group) { 918 block_group = cluster->block_group; 919 if (!block_group) { 920 spin_unlock(&cluster->lock); 921 return 0; 922 } 923 } else if (cluster->block_group != block_group) { 924 /* someone else has already freed it don't redo their work */ 925 spin_unlock(&cluster->lock); 926 return 0; 927 } 928 atomic_inc(&block_group->count); 929 spin_unlock(&cluster->lock); 930 931 /* now return any extents the cluster had on it */ 932 spin_lock(&block_group->tree_lock); 933 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 934 spin_unlock(&block_group->tree_lock); 935 936 /* finally drop our ref */ 937 btrfs_put_block_group(block_group); 938 return ret; 939 } 940 941 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 942 struct btrfs_free_cluster *cluster, 943 u64 bytes, u64 min_start) 944 { 945 struct btrfs_free_space *entry; 946 int err; 947 u64 search_start = cluster->window_start; 948 u64 search_bytes = bytes; 949 u64 ret = 0; 950 951 spin_lock(&block_group->tree_lock); 952 spin_lock(&cluster->lock); 953 954 if (!cluster->points_to_bitmap) 955 goto out; 956 957 if (cluster->block_group != block_group) 958 goto out; 959 960 entry = tree_search_offset(block_group, search_start, 0, 0); 961 962 if (!entry || !entry->bitmap) 963 goto out; 964 965 search_start = min_start; 966 search_bytes = bytes; 967 968 err = search_bitmap(block_group, entry, &search_start, 969 &search_bytes); 970 if (err) 971 goto out; 972 973 ret = search_start; 974 bitmap_clear_bits(block_group, entry, ret, bytes); 975 out: 976 spin_unlock(&cluster->lock); 977 spin_unlock(&block_group->tree_lock); 978 979 return ret; 980 } 981 982 /* 983 * given a cluster, try to allocate 'bytes' from it, returns 0 984 * if it couldn't find anything suitably large, or a logical disk offset 985 * if things worked out 986 */ 987 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 988 struct btrfs_free_cluster *cluster, u64 bytes, 989 u64 min_start) 990 { 991 struct btrfs_free_space *entry = NULL; 992 struct rb_node *node; 993 u64 ret = 0; 994 995 if (cluster->points_to_bitmap) 996 return btrfs_alloc_from_bitmap(block_group, cluster, bytes, 997 min_start); 998 999 spin_lock(&cluster->lock); 1000 if (bytes > cluster->max_size) 1001 goto out; 1002 1003 if (cluster->block_group != block_group) 1004 goto out; 1005 1006 node = rb_first(&cluster->root); 1007 if (!node) 1008 goto out; 1009 1010 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1011 1012 while(1) { 1013 if (entry->bytes < bytes || entry->offset < min_start) { 1014 struct rb_node *node; 1015 1016 node = rb_next(&entry->offset_index); 1017 if (!node) 1018 break; 1019 entry = rb_entry(node, struct btrfs_free_space, 1020 offset_index); 1021 continue; 1022 } 1023 ret = entry->offset; 1024 1025 entry->offset += bytes; 1026 entry->bytes -= bytes; 1027 1028 if (entry->bytes == 0) { 1029 rb_erase(&entry->offset_index, &cluster->root); 1030 kfree(entry); 1031 } 1032 break; 1033 } 1034 out: 1035 spin_unlock(&cluster->lock); 1036 1037 return ret; 1038 } 1039 1040 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 1041 struct btrfs_free_space *entry, 1042 struct btrfs_free_cluster *cluster, 1043 u64 offset, u64 bytes, u64 min_bytes) 1044 { 1045 unsigned long next_zero; 1046 unsigned long i; 1047 unsigned long search_bits; 1048 unsigned long total_bits; 1049 unsigned long found_bits; 1050 unsigned long start = 0; 1051 unsigned long total_found = 0; 1052 bool found = false; 1053 1054 i = offset_to_bit(entry->offset, block_group->sectorsize, 1055 max_t(u64, offset, entry->offset)); 1056 search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 1057 total_bits = bytes_to_bits(bytes, block_group->sectorsize); 1058 1059 again: 1060 found_bits = 0; 1061 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); 1062 i < BITS_PER_BITMAP; 1063 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { 1064 next_zero = find_next_zero_bit(entry->bitmap, 1065 BITS_PER_BITMAP, i); 1066 if (next_zero - i >= search_bits) { 1067 found_bits = next_zero - i; 1068 break; 1069 } 1070 i = next_zero; 1071 } 1072 1073 if (!found_bits) 1074 return -1; 1075 1076 if (!found) { 1077 start = i; 1078 found = true; 1079 } 1080 1081 total_found += found_bits; 1082 1083 if (cluster->max_size < found_bits * block_group->sectorsize) 1084 cluster->max_size = found_bits * block_group->sectorsize; 1085 1086 if (total_found < total_bits) { 1087 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); 1088 if (i - start > total_bits * 2) { 1089 total_found = 0; 1090 cluster->max_size = 0; 1091 found = false; 1092 } 1093 goto again; 1094 } 1095 1096 cluster->window_start = start * block_group->sectorsize + 1097 entry->offset; 1098 cluster->points_to_bitmap = true; 1099 1100 return 0; 1101 } 1102 1103 /* 1104 * here we try to find a cluster of blocks in a block group. The goal 1105 * is to find at least bytes free and up to empty_size + bytes free. 1106 * We might not find them all in one contiguous area. 1107 * 1108 * returns zero and sets up cluster if things worked out, otherwise 1109 * it returns -enospc 1110 */ 1111 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 1112 struct btrfs_root *root, 1113 struct btrfs_block_group_cache *block_group, 1114 struct btrfs_free_cluster *cluster, 1115 u64 offset, u64 bytes, u64 empty_size) 1116 { 1117 struct btrfs_free_space *entry = NULL; 1118 struct rb_node *node; 1119 struct btrfs_free_space *next; 1120 struct btrfs_free_space *last = NULL; 1121 u64 min_bytes; 1122 u64 window_start; 1123 u64 window_free; 1124 u64 max_extent = 0; 1125 bool found_bitmap = false; 1126 int ret; 1127 1128 /* for metadata, allow allocates with more holes */ 1129 if (btrfs_test_opt(root, SSD_SPREAD)) { 1130 min_bytes = bytes + empty_size; 1131 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 1132 /* 1133 * we want to do larger allocations when we are 1134 * flushing out the delayed refs, it helps prevent 1135 * making more work as we go along. 1136 */ 1137 if (trans->transaction->delayed_refs.flushing) 1138 min_bytes = max(bytes, (bytes + empty_size) >> 1); 1139 else 1140 min_bytes = max(bytes, (bytes + empty_size) >> 4); 1141 } else 1142 min_bytes = max(bytes, (bytes + empty_size) >> 2); 1143 1144 spin_lock(&block_group->tree_lock); 1145 spin_lock(&cluster->lock); 1146 1147 /* someone already found a cluster, hooray */ 1148 if (cluster->block_group) { 1149 ret = 0; 1150 goto out; 1151 } 1152 again: 1153 entry = tree_search_offset(block_group, offset, found_bitmap, 1); 1154 if (!entry) { 1155 ret = -ENOSPC; 1156 goto out; 1157 } 1158 1159 /* 1160 * If found_bitmap is true, we exhausted our search for extent entries, 1161 * and we just want to search all of the bitmaps that we can find, and 1162 * ignore any extent entries we find. 1163 */ 1164 while (entry->bitmap || found_bitmap || 1165 (!entry->bitmap && entry->bytes < min_bytes)) { 1166 struct rb_node *node = rb_next(&entry->offset_index); 1167 1168 if (entry->bitmap && entry->bytes > bytes + empty_size) { 1169 ret = btrfs_bitmap_cluster(block_group, entry, cluster, 1170 offset, bytes + empty_size, 1171 min_bytes); 1172 if (!ret) 1173 goto got_it; 1174 } 1175 1176 if (!node) { 1177 ret = -ENOSPC; 1178 goto out; 1179 } 1180 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1181 } 1182 1183 /* 1184 * We already searched all the extent entries from the passed in offset 1185 * to the end and didn't find enough space for the cluster, and we also 1186 * didn't find any bitmaps that met our criteria, just go ahead and exit 1187 */ 1188 if (found_bitmap) { 1189 ret = -ENOSPC; 1190 goto out; 1191 } 1192 1193 cluster->points_to_bitmap = false; 1194 window_start = entry->offset; 1195 window_free = entry->bytes; 1196 last = entry; 1197 max_extent = entry->bytes; 1198 1199 while (1) { 1200 /* out window is just right, lets fill it */ 1201 if (window_free >= bytes + empty_size) 1202 break; 1203 1204 node = rb_next(&last->offset_index); 1205 if (!node) { 1206 if (found_bitmap) 1207 goto again; 1208 ret = -ENOSPC; 1209 goto out; 1210 } 1211 next = rb_entry(node, struct btrfs_free_space, offset_index); 1212 1213 /* 1214 * we found a bitmap, so if this search doesn't result in a 1215 * cluster, we know to go and search again for the bitmaps and 1216 * start looking for space there 1217 */ 1218 if (next->bitmap) { 1219 if (!found_bitmap) 1220 offset = next->offset; 1221 found_bitmap = true; 1222 last = next; 1223 continue; 1224 } 1225 1226 /* 1227 * we haven't filled the empty size and the window is 1228 * very large. reset and try again 1229 */ 1230 if (next->offset - (last->offset + last->bytes) > 128 * 1024 || 1231 next->offset - window_start > (bytes + empty_size) * 2) { 1232 entry = next; 1233 window_start = entry->offset; 1234 window_free = entry->bytes; 1235 last = entry; 1236 max_extent = 0; 1237 } else { 1238 last = next; 1239 window_free += next->bytes; 1240 if (entry->bytes > max_extent) 1241 max_extent = entry->bytes; 1242 } 1243 } 1244 1245 cluster->window_start = entry->offset; 1246 1247 /* 1248 * now we've found our entries, pull them out of the free space 1249 * cache and put them into the cluster rbtree 1250 * 1251 * The cluster includes an rbtree, but only uses the offset index 1252 * of each free space cache entry. 1253 */ 1254 while (1) { 1255 node = rb_next(&entry->offset_index); 1256 if (entry->bitmap && node) { 1257 entry = rb_entry(node, struct btrfs_free_space, 1258 offset_index); 1259 continue; 1260 } else if (entry->bitmap && !node) { 1261 break; 1262 } 1263 1264 rb_erase(&entry->offset_index, &block_group->free_space_offset); 1265 ret = tree_insert_offset(&cluster->root, entry->offset, 1266 &entry->offset_index, 0); 1267 BUG_ON(ret); 1268 1269 if (!node || entry == last) 1270 break; 1271 1272 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1273 } 1274 1275 cluster->max_size = max_extent; 1276 got_it: 1277 ret = 0; 1278 atomic_inc(&block_group->count); 1279 list_add_tail(&cluster->block_group_list, &block_group->cluster_list); 1280 cluster->block_group = block_group; 1281 out: 1282 spin_unlock(&cluster->lock); 1283 spin_unlock(&block_group->tree_lock); 1284 1285 return ret; 1286 } 1287 1288 /* 1289 * simple code to zero out a cluster 1290 */ 1291 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 1292 { 1293 spin_lock_init(&cluster->lock); 1294 spin_lock_init(&cluster->refill_lock); 1295 cluster->root.rb_node = NULL; 1296 cluster->max_size = 0; 1297 cluster->points_to_bitmap = false; 1298 INIT_LIST_HEAD(&cluster->block_group_list); 1299 cluster->block_group = NULL; 1300 } 1301 1302