1 /* 2 * Copyright (C) 2008 Red Hat. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/pagemap.h> 20 #include <linux/sched.h> 21 #include <linux/math64.h> 22 #include "ctree.h" 23 #include "free-space-cache.h" 24 #include "transaction.h" 25 26 #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 27 #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 28 29 static inline unsigned long offset_to_bit(u64 bitmap_start, u64 sectorsize, 30 u64 offset) 31 { 32 BUG_ON(offset < bitmap_start); 33 offset -= bitmap_start; 34 return (unsigned long)(div64_u64(offset, sectorsize)); 35 } 36 37 static inline unsigned long bytes_to_bits(u64 bytes, u64 sectorsize) 38 { 39 return (unsigned long)(div64_u64(bytes, sectorsize)); 40 } 41 42 static inline u64 offset_to_bitmap(struct btrfs_block_group_cache *block_group, 43 u64 offset) 44 { 45 u64 bitmap_start; 46 u64 bytes_per_bitmap; 47 48 bytes_per_bitmap = BITS_PER_BITMAP * block_group->sectorsize; 49 bitmap_start = offset - block_group->key.objectid; 50 bitmap_start = div64_u64(bitmap_start, bytes_per_bitmap); 51 bitmap_start *= bytes_per_bitmap; 52 bitmap_start += block_group->key.objectid; 53 54 return bitmap_start; 55 } 56 57 static int tree_insert_offset(struct rb_root *root, u64 offset, 58 struct rb_node *node, int bitmap) 59 { 60 struct rb_node **p = &root->rb_node; 61 struct rb_node *parent = NULL; 62 struct btrfs_free_space *info; 63 64 while (*p) { 65 parent = *p; 66 info = rb_entry(parent, struct btrfs_free_space, offset_index); 67 68 if (offset < info->offset) { 69 p = &(*p)->rb_left; 70 } else if (offset > info->offset) { 71 p = &(*p)->rb_right; 72 } else { 73 /* 74 * we could have a bitmap entry and an extent entry 75 * share the same offset. If this is the case, we want 76 * the extent entry to always be found first if we do a 77 * linear search through the tree, since we want to have 78 * the quickest allocation time, and allocating from an 79 * extent is faster than allocating from a bitmap. So 80 * if we're inserting a bitmap and we find an entry at 81 * this offset, we want to go right, or after this entry 82 * logically. If we are inserting an extent and we've 83 * found a bitmap, we want to go left, or before 84 * logically. 85 */ 86 if (bitmap) { 87 WARN_ON(info->bitmap); 88 p = &(*p)->rb_right; 89 } else { 90 WARN_ON(!info->bitmap); 91 p = &(*p)->rb_left; 92 } 93 } 94 } 95 96 rb_link_node(node, parent, p); 97 rb_insert_color(node, root); 98 99 return 0; 100 } 101 102 /* 103 * searches the tree for the given offset. 104 * 105 * fuzzy - If this is set, then we are trying to make an allocation, and we just 106 * want a section that has at least bytes size and comes at or after the given 107 * offset. 108 */ 109 static struct btrfs_free_space * 110 tree_search_offset(struct btrfs_block_group_cache *block_group, 111 u64 offset, int bitmap_only, int fuzzy) 112 { 113 struct rb_node *n = block_group->free_space_offset.rb_node; 114 struct btrfs_free_space *entry, *prev = NULL; 115 116 /* find entry that is closest to the 'offset' */ 117 while (1) { 118 if (!n) { 119 entry = NULL; 120 break; 121 } 122 123 entry = rb_entry(n, struct btrfs_free_space, offset_index); 124 prev = entry; 125 126 if (offset < entry->offset) 127 n = n->rb_left; 128 else if (offset > entry->offset) 129 n = n->rb_right; 130 else 131 break; 132 } 133 134 if (bitmap_only) { 135 if (!entry) 136 return NULL; 137 if (entry->bitmap) 138 return entry; 139 140 /* 141 * bitmap entry and extent entry may share same offset, 142 * in that case, bitmap entry comes after extent entry. 143 */ 144 n = rb_next(n); 145 if (!n) 146 return NULL; 147 entry = rb_entry(n, struct btrfs_free_space, offset_index); 148 if (entry->offset != offset) 149 return NULL; 150 151 WARN_ON(!entry->bitmap); 152 return entry; 153 } else if (entry) { 154 if (entry->bitmap) { 155 /* 156 * if previous extent entry covers the offset, 157 * we should return it instead of the bitmap entry 158 */ 159 n = &entry->offset_index; 160 while (1) { 161 n = rb_prev(n); 162 if (!n) 163 break; 164 prev = rb_entry(n, struct btrfs_free_space, 165 offset_index); 166 if (!prev->bitmap) { 167 if (prev->offset + prev->bytes > offset) 168 entry = prev; 169 break; 170 } 171 } 172 } 173 return entry; 174 } 175 176 if (!prev) 177 return NULL; 178 179 /* find last entry before the 'offset' */ 180 entry = prev; 181 if (entry->offset > offset) { 182 n = rb_prev(&entry->offset_index); 183 if (n) { 184 entry = rb_entry(n, struct btrfs_free_space, 185 offset_index); 186 BUG_ON(entry->offset > offset); 187 } else { 188 if (fuzzy) 189 return entry; 190 else 191 return NULL; 192 } 193 } 194 195 if (entry->bitmap) { 196 n = &entry->offset_index; 197 while (1) { 198 n = rb_prev(n); 199 if (!n) 200 break; 201 prev = rb_entry(n, struct btrfs_free_space, 202 offset_index); 203 if (!prev->bitmap) { 204 if (prev->offset + prev->bytes > offset) 205 return prev; 206 break; 207 } 208 } 209 if (entry->offset + BITS_PER_BITMAP * 210 block_group->sectorsize > offset) 211 return entry; 212 } else if (entry->offset + entry->bytes > offset) 213 return entry; 214 215 if (!fuzzy) 216 return NULL; 217 218 while (1) { 219 if (entry->bitmap) { 220 if (entry->offset + BITS_PER_BITMAP * 221 block_group->sectorsize > offset) 222 break; 223 } else { 224 if (entry->offset + entry->bytes > offset) 225 break; 226 } 227 228 n = rb_next(&entry->offset_index); 229 if (!n) 230 return NULL; 231 entry = rb_entry(n, struct btrfs_free_space, offset_index); 232 } 233 return entry; 234 } 235 236 static void unlink_free_space(struct btrfs_block_group_cache *block_group, 237 struct btrfs_free_space *info) 238 { 239 rb_erase(&info->offset_index, &block_group->free_space_offset); 240 block_group->free_extents--; 241 block_group->free_space -= info->bytes; 242 } 243 244 static int link_free_space(struct btrfs_block_group_cache *block_group, 245 struct btrfs_free_space *info) 246 { 247 int ret = 0; 248 249 BUG_ON(!info->bitmap && !info->bytes); 250 ret = tree_insert_offset(&block_group->free_space_offset, info->offset, 251 &info->offset_index, (info->bitmap != NULL)); 252 if (ret) 253 return ret; 254 255 block_group->free_space += info->bytes; 256 block_group->free_extents++; 257 return ret; 258 } 259 260 static void recalculate_thresholds(struct btrfs_block_group_cache *block_group) 261 { 262 u64 max_bytes, possible_bytes; 263 264 /* 265 * The goal is to keep the total amount of memory used per 1gb of space 266 * at or below 32k, so we need to adjust how much memory we allow to be 267 * used by extent based free space tracking 268 */ 269 max_bytes = MAX_CACHE_BYTES_PER_GIG * 270 (div64_u64(block_group->key.offset, 1024 * 1024 * 1024)); 271 272 possible_bytes = (block_group->total_bitmaps * PAGE_CACHE_SIZE) + 273 (sizeof(struct btrfs_free_space) * 274 block_group->extents_thresh); 275 276 if (possible_bytes > max_bytes) { 277 int extent_bytes = max_bytes - 278 (block_group->total_bitmaps * PAGE_CACHE_SIZE); 279 280 if (extent_bytes <= 0) { 281 block_group->extents_thresh = 0; 282 return; 283 } 284 285 block_group->extents_thresh = extent_bytes / 286 (sizeof(struct btrfs_free_space)); 287 } 288 } 289 290 static void bitmap_clear_bits(struct btrfs_block_group_cache *block_group, 291 struct btrfs_free_space *info, u64 offset, 292 u64 bytes) 293 { 294 unsigned long start, end; 295 unsigned long i; 296 297 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 298 end = start + bytes_to_bits(bytes, block_group->sectorsize); 299 BUG_ON(end > BITS_PER_BITMAP); 300 301 for (i = start; i < end; i++) 302 clear_bit(i, info->bitmap); 303 304 info->bytes -= bytes; 305 block_group->free_space -= bytes; 306 } 307 308 static void bitmap_set_bits(struct btrfs_block_group_cache *block_group, 309 struct btrfs_free_space *info, u64 offset, 310 u64 bytes) 311 { 312 unsigned long start, end; 313 unsigned long i; 314 315 start = offset_to_bit(info->offset, block_group->sectorsize, offset); 316 end = start + bytes_to_bits(bytes, block_group->sectorsize); 317 BUG_ON(end > BITS_PER_BITMAP); 318 319 for (i = start; i < end; i++) 320 set_bit(i, info->bitmap); 321 322 info->bytes += bytes; 323 block_group->free_space += bytes; 324 } 325 326 static int search_bitmap(struct btrfs_block_group_cache *block_group, 327 struct btrfs_free_space *bitmap_info, u64 *offset, 328 u64 *bytes) 329 { 330 unsigned long found_bits = 0; 331 unsigned long bits, i; 332 unsigned long next_zero; 333 334 i = offset_to_bit(bitmap_info->offset, block_group->sectorsize, 335 max_t(u64, *offset, bitmap_info->offset)); 336 bits = bytes_to_bits(*bytes, block_group->sectorsize); 337 338 for (i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i); 339 i < BITS_PER_BITMAP; 340 i = find_next_bit(bitmap_info->bitmap, BITS_PER_BITMAP, i + 1)) { 341 next_zero = find_next_zero_bit(bitmap_info->bitmap, 342 BITS_PER_BITMAP, i); 343 if ((next_zero - i) >= bits) { 344 found_bits = next_zero - i; 345 break; 346 } 347 i = next_zero; 348 } 349 350 if (found_bits) { 351 *offset = (u64)(i * block_group->sectorsize) + 352 bitmap_info->offset; 353 *bytes = (u64)(found_bits) * block_group->sectorsize; 354 return 0; 355 } 356 357 return -1; 358 } 359 360 static struct btrfs_free_space *find_free_space(struct btrfs_block_group_cache 361 *block_group, u64 *offset, 362 u64 *bytes, int debug) 363 { 364 struct btrfs_free_space *entry; 365 struct rb_node *node; 366 int ret; 367 368 if (!block_group->free_space_offset.rb_node) 369 return NULL; 370 371 entry = tree_search_offset(block_group, 372 offset_to_bitmap(block_group, *offset), 373 0, 1); 374 if (!entry) 375 return NULL; 376 377 for (node = &entry->offset_index; node; node = rb_next(node)) { 378 entry = rb_entry(node, struct btrfs_free_space, offset_index); 379 if (entry->bytes < *bytes) 380 continue; 381 382 if (entry->bitmap) { 383 ret = search_bitmap(block_group, entry, offset, bytes); 384 if (!ret) 385 return entry; 386 continue; 387 } 388 389 *offset = entry->offset; 390 *bytes = entry->bytes; 391 return entry; 392 } 393 394 return NULL; 395 } 396 397 static void add_new_bitmap(struct btrfs_block_group_cache *block_group, 398 struct btrfs_free_space *info, u64 offset) 399 { 400 u64 bytes_per_bg = BITS_PER_BITMAP * block_group->sectorsize; 401 int max_bitmaps = (int)div64_u64(block_group->key.offset + 402 bytes_per_bg - 1, bytes_per_bg); 403 BUG_ON(block_group->total_bitmaps >= max_bitmaps); 404 405 info->offset = offset_to_bitmap(block_group, offset); 406 link_free_space(block_group, info); 407 block_group->total_bitmaps++; 408 409 recalculate_thresholds(block_group); 410 } 411 412 static noinline int remove_from_bitmap(struct btrfs_block_group_cache *block_group, 413 struct btrfs_free_space *bitmap_info, 414 u64 *offset, u64 *bytes) 415 { 416 u64 end; 417 u64 search_start, search_bytes; 418 int ret; 419 420 again: 421 end = bitmap_info->offset + 422 (u64)(BITS_PER_BITMAP * block_group->sectorsize) - 1; 423 424 /* 425 * XXX - this can go away after a few releases. 426 * 427 * since the only user of btrfs_remove_free_space is the tree logging 428 * stuff, and the only way to test that is under crash conditions, we 429 * want to have this debug stuff here just in case somethings not 430 * working. Search the bitmap for the space we are trying to use to 431 * make sure its actually there. If its not there then we need to stop 432 * because something has gone wrong. 433 */ 434 search_start = *offset; 435 search_bytes = *bytes; 436 ret = search_bitmap(block_group, bitmap_info, &search_start, 437 &search_bytes); 438 BUG_ON(ret < 0 || search_start != *offset); 439 440 if (*offset > bitmap_info->offset && *offset + *bytes > end) { 441 bitmap_clear_bits(block_group, bitmap_info, *offset, 442 end - *offset + 1); 443 *bytes -= end - *offset + 1; 444 *offset = end + 1; 445 } else if (*offset >= bitmap_info->offset && *offset + *bytes <= end) { 446 bitmap_clear_bits(block_group, bitmap_info, *offset, *bytes); 447 *bytes = 0; 448 } 449 450 if (*bytes) { 451 struct rb_node *next = rb_next(&bitmap_info->offset_index); 452 if (!bitmap_info->bytes) { 453 unlink_free_space(block_group, bitmap_info); 454 kfree(bitmap_info->bitmap); 455 kfree(bitmap_info); 456 block_group->total_bitmaps--; 457 recalculate_thresholds(block_group); 458 } 459 460 /* 461 * no entry after this bitmap, but we still have bytes to 462 * remove, so something has gone wrong. 463 */ 464 if (!next) 465 return -EINVAL; 466 467 bitmap_info = rb_entry(next, struct btrfs_free_space, 468 offset_index); 469 470 /* 471 * if the next entry isn't a bitmap we need to return to let the 472 * extent stuff do its work. 473 */ 474 if (!bitmap_info->bitmap) 475 return -EAGAIN; 476 477 /* 478 * Ok the next item is a bitmap, but it may not actually hold 479 * the information for the rest of this free space stuff, so 480 * look for it, and if we don't find it return so we can try 481 * everything over again. 482 */ 483 search_start = *offset; 484 search_bytes = *bytes; 485 ret = search_bitmap(block_group, bitmap_info, &search_start, 486 &search_bytes); 487 if (ret < 0 || search_start != *offset) 488 return -EAGAIN; 489 490 goto again; 491 } else if (!bitmap_info->bytes) { 492 unlink_free_space(block_group, bitmap_info); 493 kfree(bitmap_info->bitmap); 494 kfree(bitmap_info); 495 block_group->total_bitmaps--; 496 recalculate_thresholds(block_group); 497 } 498 499 return 0; 500 } 501 502 static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, 503 struct btrfs_free_space *info) 504 { 505 struct btrfs_free_space *bitmap_info; 506 int added = 0; 507 u64 bytes, offset, end; 508 int ret; 509 510 /* 511 * If we are below the extents threshold then we can add this as an 512 * extent, and don't have to deal with the bitmap 513 */ 514 if (block_group->free_extents < block_group->extents_thresh && 515 info->bytes > block_group->sectorsize * 4) 516 return 0; 517 518 /* 519 * some block groups are so tiny they can't be enveloped by a bitmap, so 520 * don't even bother to create a bitmap for this 521 */ 522 if (BITS_PER_BITMAP * block_group->sectorsize > 523 block_group->key.offset) 524 return 0; 525 526 bytes = info->bytes; 527 offset = info->offset; 528 529 again: 530 bitmap_info = tree_search_offset(block_group, 531 offset_to_bitmap(block_group, offset), 532 1, 0); 533 if (!bitmap_info) { 534 BUG_ON(added); 535 goto new_bitmap; 536 } 537 538 end = bitmap_info->offset + 539 (u64)(BITS_PER_BITMAP * block_group->sectorsize); 540 541 if (offset >= bitmap_info->offset && offset + bytes > end) { 542 bitmap_set_bits(block_group, bitmap_info, offset, 543 end - offset); 544 bytes -= end - offset; 545 offset = end; 546 added = 0; 547 } else if (offset >= bitmap_info->offset && offset + bytes <= end) { 548 bitmap_set_bits(block_group, bitmap_info, offset, bytes); 549 bytes = 0; 550 } else { 551 BUG(); 552 } 553 554 if (!bytes) { 555 ret = 1; 556 goto out; 557 } else 558 goto again; 559 560 new_bitmap: 561 if (info && info->bitmap) { 562 add_new_bitmap(block_group, info, offset); 563 added = 1; 564 info = NULL; 565 goto again; 566 } else { 567 spin_unlock(&block_group->tree_lock); 568 569 /* no pre-allocated info, allocate a new one */ 570 if (!info) { 571 info = kzalloc(sizeof(struct btrfs_free_space), 572 GFP_NOFS); 573 if (!info) { 574 spin_lock(&block_group->tree_lock); 575 ret = -ENOMEM; 576 goto out; 577 } 578 } 579 580 /* allocate the bitmap */ 581 info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); 582 spin_lock(&block_group->tree_lock); 583 if (!info->bitmap) { 584 ret = -ENOMEM; 585 goto out; 586 } 587 goto again; 588 } 589 590 out: 591 if (info) { 592 if (info->bitmap) 593 kfree(info->bitmap); 594 kfree(info); 595 } 596 597 return ret; 598 } 599 600 int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, 601 u64 offset, u64 bytes) 602 { 603 struct btrfs_free_space *right_info = NULL; 604 struct btrfs_free_space *left_info = NULL; 605 struct btrfs_free_space *info = NULL; 606 int ret = 0; 607 608 info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); 609 if (!info) 610 return -ENOMEM; 611 612 info->offset = offset; 613 info->bytes = bytes; 614 615 spin_lock(&block_group->tree_lock); 616 617 /* 618 * first we want to see if there is free space adjacent to the range we 619 * are adding, if there is remove that struct and add a new one to 620 * cover the entire range 621 */ 622 right_info = tree_search_offset(block_group, offset + bytes, 0, 0); 623 if (right_info && rb_prev(&right_info->offset_index)) 624 left_info = rb_entry(rb_prev(&right_info->offset_index), 625 struct btrfs_free_space, offset_index); 626 else 627 left_info = tree_search_offset(block_group, offset - 1, 0, 0); 628 629 /* 630 * If there was no extent directly to the left or right of this new 631 * extent then we know we're going to have to allocate a new extent, so 632 * before we do that see if we need to drop this into a bitmap 633 */ 634 if ((!left_info || left_info->bitmap) && 635 (!right_info || right_info->bitmap)) { 636 ret = insert_into_bitmap(block_group, info); 637 638 if (ret < 0) { 639 goto out; 640 } else if (ret) { 641 ret = 0; 642 goto out; 643 } 644 } 645 646 if (right_info && !right_info->bitmap) { 647 unlink_free_space(block_group, right_info); 648 info->bytes += right_info->bytes; 649 kfree(right_info); 650 } 651 652 if (left_info && !left_info->bitmap && 653 left_info->offset + left_info->bytes == offset) { 654 unlink_free_space(block_group, left_info); 655 info->offset = left_info->offset; 656 info->bytes += left_info->bytes; 657 kfree(left_info); 658 } 659 660 ret = link_free_space(block_group, info); 661 if (ret) 662 kfree(info); 663 out: 664 spin_unlock(&block_group->tree_lock); 665 666 if (ret) { 667 printk(KERN_CRIT "btrfs: unable to add free space :%d\n", ret); 668 BUG_ON(ret == -EEXIST); 669 } 670 671 return ret; 672 } 673 674 int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, 675 u64 offset, u64 bytes) 676 { 677 struct btrfs_free_space *info; 678 struct btrfs_free_space *next_info = NULL; 679 int ret = 0; 680 681 spin_lock(&block_group->tree_lock); 682 683 again: 684 info = tree_search_offset(block_group, offset, 0, 0); 685 if (!info) { 686 /* 687 * oops didn't find an extent that matched the space we wanted 688 * to remove, look for a bitmap instead 689 */ 690 info = tree_search_offset(block_group, 691 offset_to_bitmap(block_group, offset), 692 1, 0); 693 if (!info) { 694 WARN_ON(1); 695 goto out_lock; 696 } 697 } 698 699 if (info->bytes < bytes && rb_next(&info->offset_index)) { 700 u64 end; 701 next_info = rb_entry(rb_next(&info->offset_index), 702 struct btrfs_free_space, 703 offset_index); 704 705 if (next_info->bitmap) 706 end = next_info->offset + BITS_PER_BITMAP * 707 block_group->sectorsize - 1; 708 else 709 end = next_info->offset + next_info->bytes; 710 711 if (next_info->bytes < bytes || 712 next_info->offset > offset || offset > end) { 713 printk(KERN_CRIT "Found free space at %llu, size %llu," 714 " trying to use %llu\n", 715 (unsigned long long)info->offset, 716 (unsigned long long)info->bytes, 717 (unsigned long long)bytes); 718 WARN_ON(1); 719 ret = -EINVAL; 720 goto out_lock; 721 } 722 723 info = next_info; 724 } 725 726 if (info->bytes == bytes) { 727 unlink_free_space(block_group, info); 728 if (info->bitmap) { 729 kfree(info->bitmap); 730 block_group->total_bitmaps--; 731 } 732 kfree(info); 733 goto out_lock; 734 } 735 736 if (!info->bitmap && info->offset == offset) { 737 unlink_free_space(block_group, info); 738 info->offset += bytes; 739 info->bytes -= bytes; 740 link_free_space(block_group, info); 741 goto out_lock; 742 } 743 744 if (!info->bitmap && info->offset <= offset && 745 info->offset + info->bytes >= offset + bytes) { 746 u64 old_start = info->offset; 747 /* 748 * we're freeing space in the middle of the info, 749 * this can happen during tree log replay 750 * 751 * first unlink the old info and then 752 * insert it again after the hole we're creating 753 */ 754 unlink_free_space(block_group, info); 755 if (offset + bytes < info->offset + info->bytes) { 756 u64 old_end = info->offset + info->bytes; 757 758 info->offset = offset + bytes; 759 info->bytes = old_end - info->offset; 760 ret = link_free_space(block_group, info); 761 WARN_ON(ret); 762 if (ret) 763 goto out_lock; 764 } else { 765 /* the hole we're creating ends at the end 766 * of the info struct, just free the info 767 */ 768 kfree(info); 769 } 770 spin_unlock(&block_group->tree_lock); 771 772 /* step two, insert a new info struct to cover 773 * anything before the hole 774 */ 775 ret = btrfs_add_free_space(block_group, old_start, 776 offset - old_start); 777 WARN_ON(ret); 778 goto out; 779 } 780 781 ret = remove_from_bitmap(block_group, info, &offset, &bytes); 782 if (ret == -EAGAIN) 783 goto again; 784 BUG_ON(ret); 785 out_lock: 786 spin_unlock(&block_group->tree_lock); 787 out: 788 return ret; 789 } 790 791 void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, 792 u64 bytes) 793 { 794 struct btrfs_free_space *info; 795 struct rb_node *n; 796 int count = 0; 797 798 for (n = rb_first(&block_group->free_space_offset); n; n = rb_next(n)) { 799 info = rb_entry(n, struct btrfs_free_space, offset_index); 800 if (info->bytes >= bytes) 801 count++; 802 printk(KERN_CRIT "entry offset %llu, bytes %llu, bitmap %s\n", 803 (unsigned long long)info->offset, 804 (unsigned long long)info->bytes, 805 (info->bitmap) ? "yes" : "no"); 806 } 807 printk(KERN_INFO "block group has cluster?: %s\n", 808 list_empty(&block_group->cluster_list) ? "no" : "yes"); 809 printk(KERN_INFO "%d blocks of free space at or bigger than bytes is" 810 "\n", count); 811 } 812 813 u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group) 814 { 815 struct btrfs_free_space *info; 816 struct rb_node *n; 817 u64 ret = 0; 818 819 for (n = rb_first(&block_group->free_space_offset); n; 820 n = rb_next(n)) { 821 info = rb_entry(n, struct btrfs_free_space, offset_index); 822 ret += info->bytes; 823 } 824 825 return ret; 826 } 827 828 /* 829 * for a given cluster, put all of its extents back into the free 830 * space cache. If the block group passed doesn't match the block group 831 * pointed to by the cluster, someone else raced in and freed the 832 * cluster already. In that case, we just return without changing anything 833 */ 834 static int 835 __btrfs_return_cluster_to_free_space( 836 struct btrfs_block_group_cache *block_group, 837 struct btrfs_free_cluster *cluster) 838 { 839 struct btrfs_free_space *entry; 840 struct rb_node *node; 841 bool bitmap; 842 843 spin_lock(&cluster->lock); 844 if (cluster->block_group != block_group) 845 goto out; 846 847 bitmap = cluster->points_to_bitmap; 848 cluster->block_group = NULL; 849 cluster->window_start = 0; 850 list_del_init(&cluster->block_group_list); 851 cluster->points_to_bitmap = false; 852 853 if (bitmap) 854 goto out; 855 856 node = rb_first(&cluster->root); 857 while (node) { 858 entry = rb_entry(node, struct btrfs_free_space, offset_index); 859 node = rb_next(&entry->offset_index); 860 rb_erase(&entry->offset_index, &cluster->root); 861 BUG_ON(entry->bitmap); 862 tree_insert_offset(&block_group->free_space_offset, 863 entry->offset, &entry->offset_index, 0); 864 } 865 cluster->root.rb_node = NULL; 866 867 out: 868 spin_unlock(&cluster->lock); 869 btrfs_put_block_group(block_group); 870 return 0; 871 } 872 873 void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) 874 { 875 struct btrfs_free_space *info; 876 struct rb_node *node; 877 struct btrfs_free_cluster *cluster; 878 struct list_head *head; 879 880 spin_lock(&block_group->tree_lock); 881 while ((head = block_group->cluster_list.next) != 882 &block_group->cluster_list) { 883 cluster = list_entry(head, struct btrfs_free_cluster, 884 block_group_list); 885 886 WARN_ON(cluster->block_group != block_group); 887 __btrfs_return_cluster_to_free_space(block_group, cluster); 888 if (need_resched()) { 889 spin_unlock(&block_group->tree_lock); 890 cond_resched(); 891 spin_lock(&block_group->tree_lock); 892 } 893 } 894 895 while ((node = rb_last(&block_group->free_space_offset)) != NULL) { 896 info = rb_entry(node, struct btrfs_free_space, offset_index); 897 unlink_free_space(block_group, info); 898 if (info->bitmap) 899 kfree(info->bitmap); 900 kfree(info); 901 if (need_resched()) { 902 spin_unlock(&block_group->tree_lock); 903 cond_resched(); 904 spin_lock(&block_group->tree_lock); 905 } 906 } 907 908 spin_unlock(&block_group->tree_lock); 909 } 910 911 u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, 912 u64 offset, u64 bytes, u64 empty_size) 913 { 914 struct btrfs_free_space *entry = NULL; 915 u64 bytes_search = bytes + empty_size; 916 u64 ret = 0; 917 918 spin_lock(&block_group->tree_lock); 919 entry = find_free_space(block_group, &offset, &bytes_search, 0); 920 if (!entry) 921 goto out; 922 923 ret = offset; 924 if (entry->bitmap) { 925 bitmap_clear_bits(block_group, entry, offset, bytes); 926 if (!entry->bytes) { 927 unlink_free_space(block_group, entry); 928 kfree(entry->bitmap); 929 kfree(entry); 930 block_group->total_bitmaps--; 931 recalculate_thresholds(block_group); 932 } 933 } else { 934 unlink_free_space(block_group, entry); 935 entry->offset += bytes; 936 entry->bytes -= bytes; 937 if (!entry->bytes) 938 kfree(entry); 939 else 940 link_free_space(block_group, entry); 941 } 942 943 out: 944 spin_unlock(&block_group->tree_lock); 945 946 return ret; 947 } 948 949 /* 950 * given a cluster, put all of its extents back into the free space 951 * cache. If a block group is passed, this function will only free 952 * a cluster that belongs to the passed block group. 953 * 954 * Otherwise, it'll get a reference on the block group pointed to by the 955 * cluster and remove the cluster from it. 956 */ 957 int btrfs_return_cluster_to_free_space( 958 struct btrfs_block_group_cache *block_group, 959 struct btrfs_free_cluster *cluster) 960 { 961 int ret; 962 963 /* first, get a safe pointer to the block group */ 964 spin_lock(&cluster->lock); 965 if (!block_group) { 966 block_group = cluster->block_group; 967 if (!block_group) { 968 spin_unlock(&cluster->lock); 969 return 0; 970 } 971 } else if (cluster->block_group != block_group) { 972 /* someone else has already freed it don't redo their work */ 973 spin_unlock(&cluster->lock); 974 return 0; 975 } 976 atomic_inc(&block_group->count); 977 spin_unlock(&cluster->lock); 978 979 /* now return any extents the cluster had on it */ 980 spin_lock(&block_group->tree_lock); 981 ret = __btrfs_return_cluster_to_free_space(block_group, cluster); 982 spin_unlock(&block_group->tree_lock); 983 984 /* finally drop our ref */ 985 btrfs_put_block_group(block_group); 986 return ret; 987 } 988 989 static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, 990 struct btrfs_free_cluster *cluster, 991 u64 bytes, u64 min_start) 992 { 993 struct btrfs_free_space *entry; 994 int err; 995 u64 search_start = cluster->window_start; 996 u64 search_bytes = bytes; 997 u64 ret = 0; 998 999 spin_lock(&block_group->tree_lock); 1000 spin_lock(&cluster->lock); 1001 1002 if (!cluster->points_to_bitmap) 1003 goto out; 1004 1005 if (cluster->block_group != block_group) 1006 goto out; 1007 1008 /* 1009 * search_start is the beginning of the bitmap, but at some point it may 1010 * be a good idea to point to the actual start of the free area in the 1011 * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only 1012 * to 1 to make sure we get the bitmap entry 1013 */ 1014 entry = tree_search_offset(block_group, 1015 offset_to_bitmap(block_group, search_start), 1016 1, 0); 1017 if (!entry || !entry->bitmap) 1018 goto out; 1019 1020 search_start = min_start; 1021 search_bytes = bytes; 1022 1023 err = search_bitmap(block_group, entry, &search_start, 1024 &search_bytes); 1025 if (err) 1026 goto out; 1027 1028 ret = search_start; 1029 bitmap_clear_bits(block_group, entry, ret, bytes); 1030 out: 1031 spin_unlock(&cluster->lock); 1032 spin_unlock(&block_group->tree_lock); 1033 1034 return ret; 1035 } 1036 1037 /* 1038 * given a cluster, try to allocate 'bytes' from it, returns 0 1039 * if it couldn't find anything suitably large, or a logical disk offset 1040 * if things worked out 1041 */ 1042 u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, 1043 struct btrfs_free_cluster *cluster, u64 bytes, 1044 u64 min_start) 1045 { 1046 struct btrfs_free_space *entry = NULL; 1047 struct rb_node *node; 1048 u64 ret = 0; 1049 1050 if (cluster->points_to_bitmap) 1051 return btrfs_alloc_from_bitmap(block_group, cluster, bytes, 1052 min_start); 1053 1054 spin_lock(&cluster->lock); 1055 if (bytes > cluster->max_size) 1056 goto out; 1057 1058 if (cluster->block_group != block_group) 1059 goto out; 1060 1061 node = rb_first(&cluster->root); 1062 if (!node) 1063 goto out; 1064 1065 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1066 1067 while(1) { 1068 if (entry->bytes < bytes || entry->offset < min_start) { 1069 struct rb_node *node; 1070 1071 node = rb_next(&entry->offset_index); 1072 if (!node) 1073 break; 1074 entry = rb_entry(node, struct btrfs_free_space, 1075 offset_index); 1076 continue; 1077 } 1078 ret = entry->offset; 1079 1080 entry->offset += bytes; 1081 entry->bytes -= bytes; 1082 1083 if (entry->bytes == 0) { 1084 rb_erase(&entry->offset_index, &cluster->root); 1085 kfree(entry); 1086 } 1087 break; 1088 } 1089 out: 1090 spin_unlock(&cluster->lock); 1091 1092 return ret; 1093 } 1094 1095 static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, 1096 struct btrfs_free_space *entry, 1097 struct btrfs_free_cluster *cluster, 1098 u64 offset, u64 bytes, u64 min_bytes) 1099 { 1100 unsigned long next_zero; 1101 unsigned long i; 1102 unsigned long search_bits; 1103 unsigned long total_bits; 1104 unsigned long found_bits; 1105 unsigned long start = 0; 1106 unsigned long total_found = 0; 1107 bool found = false; 1108 1109 i = offset_to_bit(entry->offset, block_group->sectorsize, 1110 max_t(u64, offset, entry->offset)); 1111 search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); 1112 total_bits = bytes_to_bits(bytes, block_group->sectorsize); 1113 1114 again: 1115 found_bits = 0; 1116 for (i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i); 1117 i < BITS_PER_BITMAP; 1118 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, i + 1)) { 1119 next_zero = find_next_zero_bit(entry->bitmap, 1120 BITS_PER_BITMAP, i); 1121 if (next_zero - i >= search_bits) { 1122 found_bits = next_zero - i; 1123 break; 1124 } 1125 i = next_zero; 1126 } 1127 1128 if (!found_bits) 1129 return -1; 1130 1131 if (!found) { 1132 start = i; 1133 found = true; 1134 } 1135 1136 total_found += found_bits; 1137 1138 if (cluster->max_size < found_bits * block_group->sectorsize) 1139 cluster->max_size = found_bits * block_group->sectorsize; 1140 1141 if (total_found < total_bits) { 1142 i = find_next_bit(entry->bitmap, BITS_PER_BITMAP, next_zero); 1143 if (i - start > total_bits * 2) { 1144 total_found = 0; 1145 cluster->max_size = 0; 1146 found = false; 1147 } 1148 goto again; 1149 } 1150 1151 cluster->window_start = start * block_group->sectorsize + 1152 entry->offset; 1153 cluster->points_to_bitmap = true; 1154 1155 return 0; 1156 } 1157 1158 /* 1159 * here we try to find a cluster of blocks in a block group. The goal 1160 * is to find at least bytes free and up to empty_size + bytes free. 1161 * We might not find them all in one contiguous area. 1162 * 1163 * returns zero and sets up cluster if things worked out, otherwise 1164 * it returns -enospc 1165 */ 1166 int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, 1167 struct btrfs_root *root, 1168 struct btrfs_block_group_cache *block_group, 1169 struct btrfs_free_cluster *cluster, 1170 u64 offset, u64 bytes, u64 empty_size) 1171 { 1172 struct btrfs_free_space *entry = NULL; 1173 struct rb_node *node; 1174 struct btrfs_free_space *next; 1175 struct btrfs_free_space *last = NULL; 1176 u64 min_bytes; 1177 u64 window_start; 1178 u64 window_free; 1179 u64 max_extent = 0; 1180 bool found_bitmap = false; 1181 int ret; 1182 1183 /* for metadata, allow allocates with more holes */ 1184 if (btrfs_test_opt(root, SSD_SPREAD)) { 1185 min_bytes = bytes + empty_size; 1186 } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { 1187 /* 1188 * we want to do larger allocations when we are 1189 * flushing out the delayed refs, it helps prevent 1190 * making more work as we go along. 1191 */ 1192 if (trans->transaction->delayed_refs.flushing) 1193 min_bytes = max(bytes, (bytes + empty_size) >> 1); 1194 else 1195 min_bytes = max(bytes, (bytes + empty_size) >> 4); 1196 } else 1197 min_bytes = max(bytes, (bytes + empty_size) >> 2); 1198 1199 spin_lock(&block_group->tree_lock); 1200 spin_lock(&cluster->lock); 1201 1202 /* someone already found a cluster, hooray */ 1203 if (cluster->block_group) { 1204 ret = 0; 1205 goto out; 1206 } 1207 again: 1208 entry = tree_search_offset(block_group, offset, found_bitmap, 1); 1209 if (!entry) { 1210 ret = -ENOSPC; 1211 goto out; 1212 } 1213 1214 /* 1215 * If found_bitmap is true, we exhausted our search for extent entries, 1216 * and we just want to search all of the bitmaps that we can find, and 1217 * ignore any extent entries we find. 1218 */ 1219 while (entry->bitmap || found_bitmap || 1220 (!entry->bitmap && entry->bytes < min_bytes)) { 1221 struct rb_node *node = rb_next(&entry->offset_index); 1222 1223 if (entry->bitmap && entry->bytes > bytes + empty_size) { 1224 ret = btrfs_bitmap_cluster(block_group, entry, cluster, 1225 offset, bytes + empty_size, 1226 min_bytes); 1227 if (!ret) 1228 goto got_it; 1229 } 1230 1231 if (!node) { 1232 ret = -ENOSPC; 1233 goto out; 1234 } 1235 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1236 } 1237 1238 /* 1239 * We already searched all the extent entries from the passed in offset 1240 * to the end and didn't find enough space for the cluster, and we also 1241 * didn't find any bitmaps that met our criteria, just go ahead and exit 1242 */ 1243 if (found_bitmap) { 1244 ret = -ENOSPC; 1245 goto out; 1246 } 1247 1248 cluster->points_to_bitmap = false; 1249 window_start = entry->offset; 1250 window_free = entry->bytes; 1251 last = entry; 1252 max_extent = entry->bytes; 1253 1254 while (1) { 1255 /* out window is just right, lets fill it */ 1256 if (window_free >= bytes + empty_size) 1257 break; 1258 1259 node = rb_next(&last->offset_index); 1260 if (!node) { 1261 if (found_bitmap) 1262 goto again; 1263 ret = -ENOSPC; 1264 goto out; 1265 } 1266 next = rb_entry(node, struct btrfs_free_space, offset_index); 1267 1268 /* 1269 * we found a bitmap, so if this search doesn't result in a 1270 * cluster, we know to go and search again for the bitmaps and 1271 * start looking for space there 1272 */ 1273 if (next->bitmap) { 1274 if (!found_bitmap) 1275 offset = next->offset; 1276 found_bitmap = true; 1277 last = next; 1278 continue; 1279 } 1280 1281 /* 1282 * we haven't filled the empty size and the window is 1283 * very large. reset and try again 1284 */ 1285 if (next->offset - (last->offset + last->bytes) > 128 * 1024 || 1286 next->offset - window_start > (bytes + empty_size) * 2) { 1287 entry = next; 1288 window_start = entry->offset; 1289 window_free = entry->bytes; 1290 last = entry; 1291 max_extent = 0; 1292 } else { 1293 last = next; 1294 window_free += next->bytes; 1295 if (entry->bytes > max_extent) 1296 max_extent = entry->bytes; 1297 } 1298 } 1299 1300 cluster->window_start = entry->offset; 1301 1302 /* 1303 * now we've found our entries, pull them out of the free space 1304 * cache and put them into the cluster rbtree 1305 * 1306 * The cluster includes an rbtree, but only uses the offset index 1307 * of each free space cache entry. 1308 */ 1309 while (1) { 1310 node = rb_next(&entry->offset_index); 1311 if (entry->bitmap && node) { 1312 entry = rb_entry(node, struct btrfs_free_space, 1313 offset_index); 1314 continue; 1315 } else if (entry->bitmap && !node) { 1316 break; 1317 } 1318 1319 rb_erase(&entry->offset_index, &block_group->free_space_offset); 1320 ret = tree_insert_offset(&cluster->root, entry->offset, 1321 &entry->offset_index, 0); 1322 BUG_ON(ret); 1323 1324 if (!node || entry == last) 1325 break; 1326 1327 entry = rb_entry(node, struct btrfs_free_space, offset_index); 1328 } 1329 1330 cluster->max_size = max_extent; 1331 got_it: 1332 ret = 0; 1333 atomic_inc(&block_group->count); 1334 list_add_tail(&cluster->block_group_list, &block_group->cluster_list); 1335 cluster->block_group = block_group; 1336 out: 1337 spin_unlock(&cluster->lock); 1338 spin_unlock(&block_group->tree_lock); 1339 1340 return ret; 1341 } 1342 1343 /* 1344 * simple code to zero out a cluster 1345 */ 1346 void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) 1347 { 1348 spin_lock_init(&cluster->lock); 1349 spin_lock_init(&cluster->refill_lock); 1350 cluster->root.rb_node = NULL; 1351 cluster->max_size = 0; 1352 cluster->points_to_bitmap = false; 1353 INIT_LIST_HEAD(&cluster->block_group_list); 1354 cluster->block_group = NULL; 1355 } 1356 1357