1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2015 Facebook. All rights reserved. 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/sched/mm.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "free-space-tree.h" 13 #include "transaction.h" 14 #include "block-group.h" 15 #include "fs.h" 16 #include "accessors.h" 17 #include "extent-tree.h" 18 #include "root-tree.h" 19 20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 21 struct btrfs_block_group *block_group, 22 struct btrfs_path *path); 23 24 static struct btrfs_root *btrfs_free_space_root( 25 struct btrfs_block_group *block_group) 26 { 27 struct btrfs_key key = { 28 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 29 .type = BTRFS_ROOT_ITEM_KEY, 30 .offset = 0, 31 }; 32 33 if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2)) 34 key.offset = block_group->global_root_id; 35 return btrfs_global_root(block_group->fs_info, &key); 36 } 37 38 void set_free_space_tree_thresholds(struct btrfs_block_group *cache) 39 { 40 u32 bitmap_range; 41 size_t bitmap_size; 42 u64 num_bitmaps, total_bitmap_size; 43 44 if (WARN_ON(cache->length == 0)) 45 btrfs_warn(cache->fs_info, "block group %llu length is zero", 46 cache->start); 47 48 /* 49 * We convert to bitmaps when the disk space required for using extents 50 * exceeds that required for using bitmaps. 51 */ 52 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 53 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range); 54 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE; 55 total_bitmap_size = num_bitmaps * bitmap_size; 56 cache->bitmap_high_thresh = div_u64(total_bitmap_size, 57 sizeof(struct btrfs_item)); 58 59 /* 60 * We allow for a small buffer between the high threshold and low 61 * threshold to avoid thrashing back and forth between the two formats. 62 */ 63 if (cache->bitmap_high_thresh > 100) 64 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100; 65 else 66 cache->bitmap_low_thresh = 0; 67 } 68 69 static int add_new_free_space_info(struct btrfs_trans_handle *trans, 70 struct btrfs_block_group *block_group, 71 struct btrfs_path *path) 72 { 73 struct btrfs_root *root = btrfs_free_space_root(block_group); 74 struct btrfs_free_space_info *info; 75 struct btrfs_key key; 76 struct extent_buffer *leaf; 77 int ret; 78 79 key.objectid = block_group->start; 80 key.type = BTRFS_FREE_SPACE_INFO_KEY; 81 key.offset = block_group->length; 82 83 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info)); 84 if (ret) 85 goto out; 86 87 leaf = path->nodes[0]; 88 info = btrfs_item_ptr(leaf, path->slots[0], 89 struct btrfs_free_space_info); 90 btrfs_set_free_space_extent_count(leaf, info, 0); 91 btrfs_set_free_space_flags(leaf, info, 0); 92 93 ret = 0; 94 out: 95 btrfs_release_path(path); 96 return ret; 97 } 98 99 EXPORT_FOR_TESTS 100 struct btrfs_free_space_info *search_free_space_info( 101 struct btrfs_trans_handle *trans, 102 struct btrfs_block_group *block_group, 103 struct btrfs_path *path, int cow) 104 { 105 struct btrfs_fs_info *fs_info = block_group->fs_info; 106 struct btrfs_root *root = btrfs_free_space_root(block_group); 107 struct btrfs_key key; 108 int ret; 109 110 key.objectid = block_group->start; 111 key.type = BTRFS_FREE_SPACE_INFO_KEY; 112 key.offset = block_group->length; 113 114 ret = btrfs_search_slot(trans, root, &key, path, 0, cow); 115 if (ret < 0) 116 return ERR_PTR(ret); 117 if (ret != 0) { 118 btrfs_warn(fs_info, "missing free space info for %llu", 119 block_group->start); 120 DEBUG_WARN(); 121 return ERR_PTR(-ENOENT); 122 } 123 124 return btrfs_item_ptr(path->nodes[0], path->slots[0], 125 struct btrfs_free_space_info); 126 } 127 128 /* 129 * btrfs_search_slot() but we're looking for the greatest key less than the 130 * passed key. 131 */ 132 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans, 133 struct btrfs_root *root, 134 struct btrfs_key *key, struct btrfs_path *p, 135 int ins_len, int cow) 136 { 137 int ret; 138 139 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow); 140 if (ret < 0) 141 return ret; 142 143 if (ret == 0) { 144 DEBUG_WARN(); 145 return -EIO; 146 } 147 148 if (p->slots[0] == 0) { 149 DEBUG_WARN("no previous slot found"); 150 return -EIO; 151 } 152 p->slots[0]--; 153 154 return 0; 155 } 156 157 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info, 158 u64 size) 159 { 160 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE); 161 } 162 163 static unsigned long *alloc_bitmap(u32 bitmap_size) 164 { 165 unsigned long *ret; 166 unsigned int nofs_flag; 167 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long)); 168 169 /* 170 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse 171 * into the filesystem as the free space bitmap can be modified in the 172 * critical section of a transaction commit. 173 * 174 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we 175 * know that recursion is unsafe. 176 */ 177 nofs_flag = memalloc_nofs_save(); 178 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL); 179 memalloc_nofs_restore(nofs_flag); 180 return ret; 181 } 182 183 static void le_bitmap_set(unsigned long *map, unsigned int start, int len) 184 { 185 u8 *p = ((u8 *)map) + BIT_BYTE(start); 186 const unsigned int size = start + len; 187 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE); 188 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start); 189 190 while (len - bits_to_set >= 0) { 191 *p |= mask_to_set; 192 len -= bits_to_set; 193 bits_to_set = BITS_PER_BYTE; 194 mask_to_set = ~0; 195 p++; 196 } 197 if (len) { 198 mask_to_set &= BITMAP_LAST_BYTE_MASK(size); 199 *p |= mask_to_set; 200 } 201 } 202 203 EXPORT_FOR_TESTS 204 int convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, 205 struct btrfs_block_group *block_group, 206 struct btrfs_path *path) 207 { 208 struct btrfs_fs_info *fs_info = trans->fs_info; 209 struct btrfs_root *root = btrfs_free_space_root(block_group); 210 struct btrfs_free_space_info *info; 211 struct btrfs_key key, found_key; 212 struct extent_buffer *leaf; 213 unsigned long *bitmap; 214 char *bitmap_cursor; 215 u64 start, end; 216 u64 bitmap_range, i; 217 u32 bitmap_size, flags, expected_extent_count; 218 u32 extent_count = 0; 219 int done = 0, nr; 220 int ret; 221 222 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 223 bitmap = alloc_bitmap(bitmap_size); 224 if (!bitmap) { 225 ret = -ENOMEM; 226 btrfs_abort_transaction(trans, ret); 227 goto out; 228 } 229 230 start = block_group->start; 231 end = block_group->start + block_group->length; 232 233 key.objectid = end - 1; 234 key.type = (u8)-1; 235 key.offset = (u64)-1; 236 237 while (!done) { 238 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 239 if (ret) { 240 btrfs_abort_transaction(trans, ret); 241 goto out; 242 } 243 244 leaf = path->nodes[0]; 245 nr = 0; 246 path->slots[0]++; 247 while (path->slots[0] > 0) { 248 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 249 250 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 251 ASSERT(found_key.objectid == block_group->start); 252 ASSERT(found_key.offset == block_group->length); 253 done = 1; 254 break; 255 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) { 256 u64 first, last; 257 258 ASSERT(found_key.objectid >= start); 259 ASSERT(found_key.objectid < end); 260 ASSERT(found_key.objectid + found_key.offset <= end); 261 262 first = div_u64(found_key.objectid - start, 263 fs_info->sectorsize); 264 last = div_u64(found_key.objectid + found_key.offset - start, 265 fs_info->sectorsize); 266 le_bitmap_set(bitmap, first, last - first); 267 268 extent_count++; 269 nr++; 270 path->slots[0]--; 271 } else { 272 ASSERT(0); 273 } 274 } 275 276 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 277 if (ret) { 278 btrfs_abort_transaction(trans, ret); 279 goto out; 280 } 281 btrfs_release_path(path); 282 } 283 284 info = search_free_space_info(trans, block_group, path, 1); 285 if (IS_ERR(info)) { 286 ret = PTR_ERR(info); 287 btrfs_abort_transaction(trans, ret); 288 goto out; 289 } 290 leaf = path->nodes[0]; 291 flags = btrfs_free_space_flags(leaf, info); 292 flags |= BTRFS_FREE_SPACE_USING_BITMAPS; 293 btrfs_set_free_space_flags(leaf, info, flags); 294 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 295 btrfs_release_path(path); 296 297 if (extent_count != expected_extent_count) { 298 btrfs_err(fs_info, 299 "incorrect extent count for %llu; counted %u, expected %u", 300 block_group->start, extent_count, 301 expected_extent_count); 302 ret = -EIO; 303 btrfs_abort_transaction(trans, ret); 304 goto out; 305 } 306 307 bitmap_cursor = (char *)bitmap; 308 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 309 i = start; 310 while (i < end) { 311 unsigned long ptr; 312 u64 extent_size; 313 u32 data_size; 314 315 extent_size = min(end - i, bitmap_range); 316 data_size = free_space_bitmap_size(fs_info, extent_size); 317 318 key.objectid = i; 319 key.type = BTRFS_FREE_SPACE_BITMAP_KEY; 320 key.offset = extent_size; 321 322 ret = btrfs_insert_empty_item(trans, root, path, &key, 323 data_size); 324 if (ret) { 325 btrfs_abort_transaction(trans, ret); 326 goto out; 327 } 328 329 leaf = path->nodes[0]; 330 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 331 write_extent_buffer(leaf, bitmap_cursor, ptr, 332 data_size); 333 btrfs_release_path(path); 334 335 i += extent_size; 336 bitmap_cursor += data_size; 337 } 338 339 ret = 0; 340 out: 341 kvfree(bitmap); 342 return ret; 343 } 344 345 EXPORT_FOR_TESTS 346 int convert_free_space_to_extents(struct btrfs_trans_handle *trans, 347 struct btrfs_block_group *block_group, 348 struct btrfs_path *path) 349 { 350 struct btrfs_fs_info *fs_info = trans->fs_info; 351 struct btrfs_root *root = btrfs_free_space_root(block_group); 352 struct btrfs_free_space_info *info; 353 struct btrfs_key key, found_key; 354 struct extent_buffer *leaf; 355 unsigned long *bitmap; 356 u64 start, end; 357 u32 bitmap_size, flags, expected_extent_count; 358 unsigned long nrbits, start_bit, end_bit; 359 u32 extent_count = 0; 360 int done = 0, nr; 361 int ret; 362 363 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 364 bitmap = alloc_bitmap(bitmap_size); 365 if (!bitmap) { 366 ret = -ENOMEM; 367 btrfs_abort_transaction(trans, ret); 368 goto out; 369 } 370 371 start = block_group->start; 372 end = block_group->start + block_group->length; 373 374 key.objectid = end - 1; 375 key.type = (u8)-1; 376 key.offset = (u64)-1; 377 378 while (!done) { 379 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 380 if (ret) { 381 btrfs_abort_transaction(trans, ret); 382 goto out; 383 } 384 385 leaf = path->nodes[0]; 386 nr = 0; 387 path->slots[0]++; 388 while (path->slots[0] > 0) { 389 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 390 391 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 392 ASSERT(found_key.objectid == block_group->start); 393 ASSERT(found_key.offset == block_group->length); 394 done = 1; 395 break; 396 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 397 unsigned long ptr; 398 char *bitmap_cursor; 399 u32 bitmap_pos, data_size; 400 401 ASSERT(found_key.objectid >= start); 402 ASSERT(found_key.objectid < end); 403 ASSERT(found_key.objectid + found_key.offset <= end); 404 405 bitmap_pos = div_u64(found_key.objectid - start, 406 fs_info->sectorsize * 407 BITS_PER_BYTE); 408 bitmap_cursor = ((char *)bitmap) + bitmap_pos; 409 data_size = free_space_bitmap_size(fs_info, 410 found_key.offset); 411 412 ptr = btrfs_item_ptr_offset(leaf, path->slots[0] - 1); 413 read_extent_buffer(leaf, bitmap_cursor, ptr, 414 data_size); 415 416 nr++; 417 path->slots[0]--; 418 } else { 419 ASSERT(0); 420 } 421 } 422 423 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 424 if (ret) { 425 btrfs_abort_transaction(trans, ret); 426 goto out; 427 } 428 btrfs_release_path(path); 429 } 430 431 info = search_free_space_info(trans, block_group, path, 1); 432 if (IS_ERR(info)) { 433 ret = PTR_ERR(info); 434 btrfs_abort_transaction(trans, ret); 435 goto out; 436 } 437 leaf = path->nodes[0]; 438 flags = btrfs_free_space_flags(leaf, info); 439 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; 440 btrfs_set_free_space_flags(leaf, info, flags); 441 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 442 btrfs_release_path(path); 443 444 nrbits = block_group->length >> block_group->fs_info->sectorsize_bits; 445 start_bit = find_next_bit_le(bitmap, nrbits, 0); 446 447 while (start_bit < nrbits) { 448 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit); 449 ASSERT(start_bit < end_bit); 450 451 key.objectid = start + start_bit * block_group->fs_info->sectorsize; 452 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 453 key.offset = (end_bit - start_bit) * block_group->fs_info->sectorsize; 454 455 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 456 if (ret) { 457 btrfs_abort_transaction(trans, ret); 458 goto out; 459 } 460 btrfs_release_path(path); 461 462 extent_count++; 463 464 start_bit = find_next_bit_le(bitmap, nrbits, end_bit); 465 } 466 467 if (extent_count != expected_extent_count) { 468 btrfs_err(fs_info, 469 "incorrect extent count for %llu; counted %u, expected %u", 470 block_group->start, extent_count, 471 expected_extent_count); 472 ret = -EIO; 473 btrfs_abort_transaction(trans, ret); 474 goto out; 475 } 476 477 ret = 0; 478 out: 479 kvfree(bitmap); 480 return ret; 481 } 482 483 static int update_free_space_extent_count(struct btrfs_trans_handle *trans, 484 struct btrfs_block_group *block_group, 485 struct btrfs_path *path, 486 int new_extents) 487 { 488 struct btrfs_free_space_info *info; 489 u32 flags; 490 u32 extent_count; 491 int ret = 0; 492 493 if (new_extents == 0) 494 return 0; 495 496 info = search_free_space_info(trans, block_group, path, 1); 497 if (IS_ERR(info)) { 498 ret = PTR_ERR(info); 499 goto out; 500 } 501 flags = btrfs_free_space_flags(path->nodes[0], info); 502 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 503 504 extent_count += new_extents; 505 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count); 506 btrfs_release_path(path); 507 508 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 509 extent_count > block_group->bitmap_high_thresh) { 510 ret = convert_free_space_to_bitmaps(trans, block_group, path); 511 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 512 extent_count < block_group->bitmap_low_thresh) { 513 ret = convert_free_space_to_extents(trans, block_group, path); 514 } 515 516 out: 517 return ret; 518 } 519 520 EXPORT_FOR_TESTS 521 int free_space_test_bit(struct btrfs_block_group *block_group, 522 struct btrfs_path *path, u64 offset) 523 { 524 struct extent_buffer *leaf; 525 struct btrfs_key key; 526 u64 found_start, found_end; 527 unsigned long ptr, i; 528 529 leaf = path->nodes[0]; 530 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 531 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 532 533 found_start = key.objectid; 534 found_end = key.objectid + key.offset; 535 ASSERT(offset >= found_start && offset < found_end); 536 537 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 538 i = div_u64(offset - found_start, 539 block_group->fs_info->sectorsize); 540 return !!extent_buffer_test_bit(leaf, ptr, i); 541 } 542 543 static void free_space_set_bits(struct btrfs_trans_handle *trans, 544 struct btrfs_block_group *block_group, 545 struct btrfs_path *path, u64 *start, u64 *size, 546 int bit) 547 { 548 struct btrfs_fs_info *fs_info = block_group->fs_info; 549 struct extent_buffer *leaf; 550 struct btrfs_key key; 551 u64 end = *start + *size; 552 u64 found_start, found_end; 553 unsigned long ptr, first, last; 554 555 leaf = path->nodes[0]; 556 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 557 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 558 559 found_start = key.objectid; 560 found_end = key.objectid + key.offset; 561 ASSERT(*start >= found_start && *start < found_end); 562 ASSERT(end > found_start); 563 564 if (end > found_end) 565 end = found_end; 566 567 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 568 first = (*start - found_start) >> fs_info->sectorsize_bits; 569 last = (end - found_start) >> fs_info->sectorsize_bits; 570 if (bit) 571 extent_buffer_bitmap_set(leaf, ptr, first, last - first); 572 else 573 extent_buffer_bitmap_clear(leaf, ptr, first, last - first); 574 btrfs_mark_buffer_dirty(trans, leaf); 575 576 *size -= end - *start; 577 *start = end; 578 } 579 580 /* 581 * We can't use btrfs_next_item() in modify_free_space_bitmap() because 582 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy 583 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're 584 * looking for. 585 */ 586 static int free_space_next_bitmap(struct btrfs_trans_handle *trans, 587 struct btrfs_root *root, struct btrfs_path *p) 588 { 589 struct btrfs_key key; 590 591 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) { 592 p->slots[0]++; 593 return 0; 594 } 595 596 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]); 597 btrfs_release_path(p); 598 599 key.objectid += key.offset; 600 key.type = (u8)-1; 601 key.offset = (u64)-1; 602 603 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1); 604 } 605 606 /* 607 * If remove is 1, then we are removing free space, thus clearing bits in the 608 * bitmap. If remove is 0, then we are adding free space, thus setting bits in 609 * the bitmap. 610 */ 611 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, 612 struct btrfs_block_group *block_group, 613 struct btrfs_path *path, 614 u64 start, u64 size, int remove) 615 { 616 struct btrfs_root *root = btrfs_free_space_root(block_group); 617 struct btrfs_key key; 618 u64 end = start + size; 619 u64 cur_start, cur_size; 620 int prev_bit, next_bit; 621 int new_extents; 622 int ret; 623 624 /* 625 * Read the bit for the block immediately before the extent of space if 626 * that block is within the block group. 627 */ 628 if (start > block_group->start) { 629 u64 prev_block = start - block_group->fs_info->sectorsize; 630 631 key.objectid = prev_block; 632 key.type = (u8)-1; 633 key.offset = (u64)-1; 634 635 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 636 if (ret) 637 goto out; 638 639 prev_bit = free_space_test_bit(block_group, path, prev_block); 640 641 /* The previous block may have been in the previous bitmap. */ 642 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 643 if (start >= key.objectid + key.offset) { 644 ret = free_space_next_bitmap(trans, root, path); 645 if (ret) 646 goto out; 647 } 648 } else { 649 key.objectid = start; 650 key.type = (u8)-1; 651 key.offset = (u64)-1; 652 653 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 654 if (ret) 655 goto out; 656 657 prev_bit = -1; 658 } 659 660 /* 661 * Iterate over all of the bitmaps overlapped by the extent of space, 662 * clearing/setting bits as required. 663 */ 664 cur_start = start; 665 cur_size = size; 666 while (1) { 667 free_space_set_bits(trans, block_group, path, &cur_start, &cur_size, 668 !remove); 669 if (cur_size == 0) 670 break; 671 ret = free_space_next_bitmap(trans, root, path); 672 if (ret) 673 goto out; 674 } 675 676 /* 677 * Read the bit for the block immediately after the extent of space if 678 * that block is within the block group. 679 */ 680 if (end < block_group->start + block_group->length) { 681 /* The next block may be in the next bitmap. */ 682 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 683 if (end >= key.objectid + key.offset) { 684 ret = free_space_next_bitmap(trans, root, path); 685 if (ret) 686 goto out; 687 } 688 689 next_bit = free_space_test_bit(block_group, path, end); 690 } else { 691 next_bit = -1; 692 } 693 694 if (remove) { 695 new_extents = -1; 696 if (prev_bit == 1) { 697 /* Leftover on the left. */ 698 new_extents++; 699 } 700 if (next_bit == 1) { 701 /* Leftover on the right. */ 702 new_extents++; 703 } 704 } else { 705 new_extents = 1; 706 if (prev_bit == 1) { 707 /* Merging with neighbor on the left. */ 708 new_extents--; 709 } 710 if (next_bit == 1) { 711 /* Merging with neighbor on the right. */ 712 new_extents--; 713 } 714 } 715 716 btrfs_release_path(path); 717 ret = update_free_space_extent_count(trans, block_group, path, 718 new_extents); 719 720 out: 721 return ret; 722 } 723 724 static int remove_free_space_extent(struct btrfs_trans_handle *trans, 725 struct btrfs_block_group *block_group, 726 struct btrfs_path *path, 727 u64 start, u64 size) 728 { 729 struct btrfs_root *root = btrfs_free_space_root(block_group); 730 struct btrfs_key key; 731 u64 found_start, found_end; 732 u64 end = start + size; 733 int new_extents = -1; 734 int ret; 735 736 key.objectid = start; 737 key.type = (u8)-1; 738 key.offset = (u64)-1; 739 740 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 741 if (ret) 742 goto out; 743 744 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 745 746 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 747 748 found_start = key.objectid; 749 found_end = key.objectid + key.offset; 750 ASSERT(start >= found_start && end <= found_end); 751 752 /* 753 * Okay, now that we've found the free space extent which contains the 754 * free space that we are removing, there are four cases: 755 * 756 * 1. We're using the whole extent: delete the key we found and 757 * decrement the free space extent count. 758 * 2. We are using part of the extent starting at the beginning: delete 759 * the key we found and insert a new key representing the leftover at 760 * the end. There is no net change in the number of extents. 761 * 3. We are using part of the extent ending at the end: delete the key 762 * we found and insert a new key representing the leftover at the 763 * beginning. There is no net change in the number of extents. 764 * 4. We are using part of the extent in the middle: delete the key we 765 * found and insert two new keys representing the leftovers on each 766 * side. Where we used to have one extent, we now have two, so increment 767 * the extent count. We may need to convert the block group to bitmaps 768 * as a result. 769 */ 770 771 /* Delete the existing key (cases 1-4). */ 772 ret = btrfs_del_item(trans, root, path); 773 if (ret) 774 goto out; 775 776 /* Add a key for leftovers at the beginning (cases 3 and 4). */ 777 if (start > found_start) { 778 key.objectid = found_start; 779 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 780 key.offset = start - found_start; 781 782 btrfs_release_path(path); 783 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 784 if (ret) 785 goto out; 786 new_extents++; 787 } 788 789 /* Add a key for leftovers at the end (cases 2 and 4). */ 790 if (end < found_end) { 791 key.objectid = end; 792 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 793 key.offset = found_end - end; 794 795 btrfs_release_path(path); 796 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 797 if (ret) 798 goto out; 799 new_extents++; 800 } 801 802 btrfs_release_path(path); 803 ret = update_free_space_extent_count(trans, block_group, path, 804 new_extents); 805 806 out: 807 return ret; 808 } 809 810 EXPORT_FOR_TESTS 811 int __remove_from_free_space_tree(struct btrfs_trans_handle *trans, 812 struct btrfs_block_group *block_group, 813 struct btrfs_path *path, u64 start, u64 size) 814 { 815 struct btrfs_free_space_info *info; 816 u32 flags; 817 int ret; 818 819 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { 820 ret = __add_block_group_free_space(trans, block_group, path); 821 if (ret) 822 return ret; 823 } 824 825 info = search_free_space_info(NULL, block_group, path, 0); 826 if (IS_ERR(info)) 827 return PTR_ERR(info); 828 flags = btrfs_free_space_flags(path->nodes[0], info); 829 btrfs_release_path(path); 830 831 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) { 832 return modify_free_space_bitmap(trans, block_group, path, 833 start, size, 1); 834 } else { 835 return remove_free_space_extent(trans, block_group, path, 836 start, size); 837 } 838 } 839 840 int remove_from_free_space_tree(struct btrfs_trans_handle *trans, 841 u64 start, u64 size) 842 { 843 struct btrfs_block_group *block_group; 844 struct btrfs_path *path; 845 int ret; 846 847 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 848 return 0; 849 850 path = btrfs_alloc_path(); 851 if (!path) { 852 ret = -ENOMEM; 853 btrfs_abort_transaction(trans, ret); 854 goto out; 855 } 856 857 block_group = btrfs_lookup_block_group(trans->fs_info, start); 858 if (!block_group) { 859 DEBUG_WARN("no block group found for start=%llu", start); 860 ret = -ENOENT; 861 btrfs_abort_transaction(trans, ret); 862 goto out; 863 } 864 865 mutex_lock(&block_group->free_space_lock); 866 ret = __remove_from_free_space_tree(trans, block_group, path, start, 867 size); 868 mutex_unlock(&block_group->free_space_lock); 869 if (ret) 870 btrfs_abort_transaction(trans, ret); 871 872 btrfs_put_block_group(block_group); 873 out: 874 btrfs_free_path(path); 875 return ret; 876 } 877 878 static int add_free_space_extent(struct btrfs_trans_handle *trans, 879 struct btrfs_block_group *block_group, 880 struct btrfs_path *path, 881 u64 start, u64 size) 882 { 883 struct btrfs_root *root = btrfs_free_space_root(block_group); 884 struct btrfs_key key, new_key; 885 u64 found_start, found_end; 886 u64 end = start + size; 887 int new_extents = 1; 888 int ret; 889 890 /* 891 * We are adding a new extent of free space, but we need to merge 892 * extents. There are four cases here: 893 * 894 * 1. The new extent does not have any immediate neighbors to merge 895 * with: add the new key and increment the free space extent count. We 896 * may need to convert the block group to bitmaps as a result. 897 * 2. The new extent has an immediate neighbor before it: remove the 898 * previous key and insert a new key combining both of them. There is no 899 * net change in the number of extents. 900 * 3. The new extent has an immediate neighbor after it: remove the next 901 * key and insert a new key combining both of them. There is no net 902 * change in the number of extents. 903 * 4. The new extent has immediate neighbors on both sides: remove both 904 * of the keys and insert a new key combining all of them. Where we used 905 * to have two extents, we now have one, so decrement the extent count. 906 */ 907 908 new_key.objectid = start; 909 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 910 new_key.offset = size; 911 912 /* Search for a neighbor on the left. */ 913 if (start == block_group->start) 914 goto right; 915 key.objectid = start - 1; 916 key.type = (u8)-1; 917 key.offset = (u64)-1; 918 919 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 920 if (ret) 921 goto out; 922 923 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 924 925 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 926 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 927 btrfs_release_path(path); 928 goto right; 929 } 930 931 found_start = key.objectid; 932 found_end = key.objectid + key.offset; 933 ASSERT(found_start >= block_group->start && 934 found_end > block_group->start); 935 ASSERT(found_start < start && found_end <= start); 936 937 /* 938 * Delete the neighbor on the left and absorb it into the new key (cases 939 * 2 and 4). 940 */ 941 if (found_end == start) { 942 ret = btrfs_del_item(trans, root, path); 943 if (ret) 944 goto out; 945 new_key.objectid = found_start; 946 new_key.offset += key.offset; 947 new_extents--; 948 } 949 btrfs_release_path(path); 950 951 right: 952 /* Search for a neighbor on the right. */ 953 if (end == block_group->start + block_group->length) 954 goto insert; 955 key.objectid = end; 956 key.type = (u8)-1; 957 key.offset = (u64)-1; 958 959 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 960 if (ret) 961 goto out; 962 963 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 964 965 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 966 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 967 btrfs_release_path(path); 968 goto insert; 969 } 970 971 found_start = key.objectid; 972 found_end = key.objectid + key.offset; 973 ASSERT(found_start >= block_group->start && 974 found_end > block_group->start); 975 ASSERT((found_start < start && found_end <= start) || 976 (found_start >= end && found_end > end)); 977 978 /* 979 * Delete the neighbor on the right and absorb it into the new key 980 * (cases 3 and 4). 981 */ 982 if (found_start == end) { 983 ret = btrfs_del_item(trans, root, path); 984 if (ret) 985 goto out; 986 new_key.offset += key.offset; 987 new_extents--; 988 } 989 btrfs_release_path(path); 990 991 insert: 992 /* Insert the new key (cases 1-4). */ 993 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0); 994 if (ret) 995 goto out; 996 997 btrfs_release_path(path); 998 ret = update_free_space_extent_count(trans, block_group, path, 999 new_extents); 1000 1001 out: 1002 return ret; 1003 } 1004 1005 EXPORT_FOR_TESTS 1006 int __add_to_free_space_tree(struct btrfs_trans_handle *trans, 1007 struct btrfs_block_group *block_group, 1008 struct btrfs_path *path, u64 start, u64 size) 1009 { 1010 struct btrfs_free_space_info *info; 1011 u32 flags; 1012 int ret; 1013 1014 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { 1015 ret = __add_block_group_free_space(trans, block_group, path); 1016 if (ret) 1017 return ret; 1018 } 1019 1020 info = search_free_space_info(NULL, block_group, path, 0); 1021 if (IS_ERR(info)) 1022 return PTR_ERR(info); 1023 flags = btrfs_free_space_flags(path->nodes[0], info); 1024 btrfs_release_path(path); 1025 1026 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) { 1027 return modify_free_space_bitmap(trans, block_group, path, 1028 start, size, 0); 1029 } else { 1030 return add_free_space_extent(trans, block_group, path, start, 1031 size); 1032 } 1033 } 1034 1035 int add_to_free_space_tree(struct btrfs_trans_handle *trans, 1036 u64 start, u64 size) 1037 { 1038 struct btrfs_block_group *block_group; 1039 struct btrfs_path *path; 1040 int ret; 1041 1042 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1043 return 0; 1044 1045 path = btrfs_alloc_path(); 1046 if (!path) { 1047 ret = -ENOMEM; 1048 btrfs_abort_transaction(trans, ret); 1049 goto out; 1050 } 1051 1052 block_group = btrfs_lookup_block_group(trans->fs_info, start); 1053 if (!block_group) { 1054 DEBUG_WARN("no block group found for start=%llu", start); 1055 ret = -ENOENT; 1056 btrfs_abort_transaction(trans, ret); 1057 goto out; 1058 } 1059 1060 mutex_lock(&block_group->free_space_lock); 1061 ret = __add_to_free_space_tree(trans, block_group, path, start, size); 1062 mutex_unlock(&block_group->free_space_lock); 1063 if (ret) 1064 btrfs_abort_transaction(trans, ret); 1065 1066 btrfs_put_block_group(block_group); 1067 out: 1068 btrfs_free_path(path); 1069 return ret; 1070 } 1071 1072 /* 1073 * Populate the free space tree by walking the extent tree. Operations on the 1074 * extent tree that happen as a result of writes to the free space tree will go 1075 * through the normal add/remove hooks. 1076 */ 1077 static int populate_free_space_tree(struct btrfs_trans_handle *trans, 1078 struct btrfs_block_group *block_group) 1079 { 1080 struct btrfs_root *extent_root; 1081 BTRFS_PATH_AUTO_FREE(path); 1082 BTRFS_PATH_AUTO_FREE(path2); 1083 struct btrfs_key key; 1084 u64 start, end; 1085 int ret; 1086 1087 path = btrfs_alloc_path(); 1088 if (!path) 1089 return -ENOMEM; 1090 1091 path2 = btrfs_alloc_path(); 1092 if (!path2) 1093 return -ENOMEM; 1094 1095 path->reada = READA_FORWARD; 1096 1097 ret = add_new_free_space_info(trans, block_group, path2); 1098 if (ret) 1099 return ret; 1100 1101 mutex_lock(&block_group->free_space_lock); 1102 1103 /* 1104 * Iterate through all of the extent and metadata items in this block 1105 * group, adding the free space between them and the free space at the 1106 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than 1107 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's 1108 * contained in. 1109 */ 1110 key.objectid = block_group->start; 1111 key.type = BTRFS_EXTENT_ITEM_KEY; 1112 key.offset = 0; 1113 1114 extent_root = btrfs_extent_root(trans->fs_info, key.objectid); 1115 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); 1116 if (ret < 0) 1117 goto out_locked; 1118 /* 1119 * If ret is 1 (no key found), it means this is an empty block group, 1120 * without any extents allocated from it and there's no block group 1121 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree 1122 * because we are using the block group tree feature, so block group 1123 * items are stored in the block group tree. It also means there are no 1124 * extents allocated for block groups with a start offset beyond this 1125 * block group's end offset (this is the last, highest, block group). 1126 */ 1127 if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE)) 1128 ASSERT(ret == 0); 1129 1130 start = block_group->start; 1131 end = block_group->start + block_group->length; 1132 while (ret == 0) { 1133 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1134 1135 if (key.type == BTRFS_EXTENT_ITEM_KEY || 1136 key.type == BTRFS_METADATA_ITEM_KEY) { 1137 if (key.objectid >= end) 1138 break; 1139 1140 if (start < key.objectid) { 1141 ret = __add_to_free_space_tree(trans, 1142 block_group, 1143 path2, start, 1144 key.objectid - 1145 start); 1146 if (ret) 1147 goto out_locked; 1148 } 1149 start = key.objectid; 1150 if (key.type == BTRFS_METADATA_ITEM_KEY) 1151 start += trans->fs_info->nodesize; 1152 else 1153 start += key.offset; 1154 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 1155 if (key.objectid != block_group->start) 1156 break; 1157 } 1158 1159 ret = btrfs_next_item(extent_root, path); 1160 if (ret < 0) 1161 goto out_locked; 1162 } 1163 if (start < end) { 1164 ret = __add_to_free_space_tree(trans, block_group, path2, 1165 start, end - start); 1166 if (ret) 1167 goto out_locked; 1168 } 1169 1170 ret = 0; 1171 out_locked: 1172 mutex_unlock(&block_group->free_space_lock); 1173 1174 return ret; 1175 } 1176 1177 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) 1178 { 1179 struct btrfs_trans_handle *trans; 1180 struct btrfs_root *tree_root = fs_info->tree_root; 1181 struct btrfs_root *free_space_root; 1182 struct btrfs_block_group *block_group; 1183 struct rb_node *node; 1184 int ret; 1185 1186 trans = btrfs_start_transaction(tree_root, 0); 1187 if (IS_ERR(trans)) 1188 return PTR_ERR(trans); 1189 1190 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1191 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1192 free_space_root = btrfs_create_tree(trans, 1193 BTRFS_FREE_SPACE_TREE_OBJECTID); 1194 if (IS_ERR(free_space_root)) { 1195 ret = PTR_ERR(free_space_root); 1196 btrfs_abort_transaction(trans, ret); 1197 btrfs_end_transaction(trans); 1198 goto out_clear; 1199 } 1200 ret = btrfs_global_root_insert(free_space_root); 1201 if (ret) { 1202 btrfs_put_root(free_space_root); 1203 btrfs_abort_transaction(trans, ret); 1204 btrfs_end_transaction(trans); 1205 goto out_clear; 1206 } 1207 1208 node = rb_first_cached(&fs_info->block_group_cache_tree); 1209 while (node) { 1210 block_group = rb_entry(node, struct btrfs_block_group, 1211 cache_node); 1212 ret = populate_free_space_tree(trans, block_group); 1213 if (ret) { 1214 btrfs_abort_transaction(trans, ret); 1215 btrfs_end_transaction(trans); 1216 goto out_clear; 1217 } 1218 node = rb_next(node); 1219 } 1220 1221 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1222 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1223 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1224 ret = btrfs_commit_transaction(trans); 1225 1226 /* 1227 * Now that we've committed the transaction any reading of our commit 1228 * root will be safe, so we can cache from the free space tree now. 1229 */ 1230 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1231 return ret; 1232 1233 out_clear: 1234 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1235 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1236 return ret; 1237 } 1238 1239 static int clear_free_space_tree(struct btrfs_trans_handle *trans, 1240 struct btrfs_root *root) 1241 { 1242 BTRFS_PATH_AUTO_FREE(path); 1243 struct btrfs_key key; 1244 struct rb_node *node; 1245 int nr; 1246 int ret; 1247 1248 path = btrfs_alloc_path(); 1249 if (!path) 1250 return -ENOMEM; 1251 1252 key.objectid = 0; 1253 key.type = 0; 1254 key.offset = 0; 1255 1256 while (1) { 1257 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1258 if (ret < 0) 1259 return ret; 1260 1261 nr = btrfs_header_nritems(path->nodes[0]); 1262 if (!nr) 1263 break; 1264 1265 path->slots[0] = 0; 1266 ret = btrfs_del_items(trans, root, path, 0, nr); 1267 if (ret) 1268 return ret; 1269 1270 btrfs_release_path(path); 1271 } 1272 1273 node = rb_first_cached(&trans->fs_info->block_group_cache_tree); 1274 while (node) { 1275 struct btrfs_block_group *bg; 1276 1277 bg = rb_entry(node, struct btrfs_block_group, cache_node); 1278 clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); 1279 node = rb_next(node); 1280 cond_resched(); 1281 } 1282 1283 return 0; 1284 } 1285 1286 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info) 1287 { 1288 struct btrfs_trans_handle *trans; 1289 struct btrfs_root *tree_root = fs_info->tree_root; 1290 struct btrfs_key key = { 1291 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1292 .type = BTRFS_ROOT_ITEM_KEY, 1293 .offset = 0, 1294 }; 1295 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1296 int ret; 1297 1298 trans = btrfs_start_transaction(tree_root, 0); 1299 if (IS_ERR(trans)) 1300 return PTR_ERR(trans); 1301 1302 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1303 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1304 1305 ret = clear_free_space_tree(trans, free_space_root); 1306 if (ret) { 1307 btrfs_abort_transaction(trans, ret); 1308 btrfs_end_transaction(trans); 1309 return ret; 1310 } 1311 1312 ret = btrfs_del_root(trans, &free_space_root->root_key); 1313 if (ret) { 1314 btrfs_abort_transaction(trans, ret); 1315 btrfs_end_transaction(trans); 1316 return ret; 1317 } 1318 1319 btrfs_global_root_delete(free_space_root); 1320 1321 spin_lock(&fs_info->trans_lock); 1322 list_del(&free_space_root->dirty_list); 1323 spin_unlock(&fs_info->trans_lock); 1324 1325 btrfs_tree_lock(free_space_root->node); 1326 btrfs_clear_buffer_dirty(trans, free_space_root->node); 1327 btrfs_tree_unlock(free_space_root->node); 1328 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), 1329 free_space_root->node, 0, 1); 1330 btrfs_put_root(free_space_root); 1331 if (ret < 0) { 1332 btrfs_abort_transaction(trans, ret); 1333 btrfs_end_transaction(trans); 1334 return ret; 1335 } 1336 1337 return btrfs_commit_transaction(trans); 1338 } 1339 1340 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) 1341 { 1342 struct btrfs_trans_handle *trans; 1343 struct btrfs_key key = { 1344 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1345 .type = BTRFS_ROOT_ITEM_KEY, 1346 .offset = 0, 1347 }; 1348 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1349 struct rb_node *node; 1350 int ret; 1351 1352 trans = btrfs_start_transaction(free_space_root, 1); 1353 if (IS_ERR(trans)) 1354 return PTR_ERR(trans); 1355 1356 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1357 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1358 1359 ret = clear_free_space_tree(trans, free_space_root); 1360 if (ret) { 1361 btrfs_abort_transaction(trans, ret); 1362 btrfs_end_transaction(trans); 1363 return ret; 1364 } 1365 1366 node = rb_first_cached(&fs_info->block_group_cache_tree); 1367 while (node) { 1368 struct btrfs_block_group *block_group; 1369 1370 block_group = rb_entry(node, struct btrfs_block_group, 1371 cache_node); 1372 1373 if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, 1374 &block_group->runtime_flags)) 1375 goto next; 1376 1377 ret = populate_free_space_tree(trans, block_group); 1378 if (ret) { 1379 btrfs_abort_transaction(trans, ret); 1380 btrfs_end_transaction(trans); 1381 return ret; 1382 } 1383 next: 1384 if (btrfs_should_end_transaction(trans)) { 1385 btrfs_end_transaction(trans); 1386 trans = btrfs_start_transaction(free_space_root, 1); 1387 if (IS_ERR(trans)) 1388 return PTR_ERR(trans); 1389 } 1390 node = rb_next(node); 1391 } 1392 1393 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1394 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1395 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1396 1397 ret = btrfs_commit_transaction(trans); 1398 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1399 return ret; 1400 } 1401 1402 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 1403 struct btrfs_block_group *block_group, 1404 struct btrfs_path *path) 1405 { 1406 int ret; 1407 1408 clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags); 1409 1410 /* 1411 * While rebuilding the free space tree we may allocate new metadata 1412 * block groups while modifying the free space tree. 1413 * 1414 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we 1415 * can use multiple transactions, every time btrfs_end_transaction() is 1416 * called at btrfs_rebuild_free_space_tree() we finish the creation of 1417 * new block groups by calling btrfs_create_pending_block_groups(), and 1418 * that in turn calls us, through add_block_group_free_space(), to add 1419 * a free space info item and a free space extent item for the block 1420 * group. 1421 * 1422 * Then later btrfs_rebuild_free_space_tree() may find such new block 1423 * groups and processes them with populate_free_space_tree(), which can 1424 * fail with EEXIST since there are already items for the block group in 1425 * the free space tree. Notice that we say "may find" because a new 1426 * block group may be added to the block groups rbtree in a node before 1427 * or after the block group currently being processed by the rebuild 1428 * process. So signal the rebuild process to skip such new block groups 1429 * if it finds them. 1430 */ 1431 set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); 1432 1433 ret = add_new_free_space_info(trans, block_group, path); 1434 if (ret) 1435 return ret; 1436 1437 return __add_to_free_space_tree(trans, block_group, path, 1438 block_group->start, 1439 block_group->length); 1440 } 1441 1442 int add_block_group_free_space(struct btrfs_trans_handle *trans, 1443 struct btrfs_block_group *block_group) 1444 { 1445 struct btrfs_fs_info *fs_info = trans->fs_info; 1446 struct btrfs_path *path = NULL; 1447 int ret = 0; 1448 1449 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) 1450 return 0; 1451 1452 mutex_lock(&block_group->free_space_lock); 1453 if (!test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) 1454 goto out; 1455 1456 path = btrfs_alloc_path(); 1457 if (!path) { 1458 ret = -ENOMEM; 1459 goto out; 1460 } 1461 1462 ret = __add_block_group_free_space(trans, block_group, path); 1463 1464 out: 1465 btrfs_free_path(path); 1466 mutex_unlock(&block_group->free_space_lock); 1467 if (ret) 1468 btrfs_abort_transaction(trans, ret); 1469 return ret; 1470 } 1471 1472 int remove_block_group_free_space(struct btrfs_trans_handle *trans, 1473 struct btrfs_block_group *block_group) 1474 { 1475 struct btrfs_root *root = btrfs_free_space_root(block_group); 1476 struct btrfs_path *path; 1477 struct btrfs_key key, found_key; 1478 struct extent_buffer *leaf; 1479 u64 start, end; 1480 int done = 0, nr; 1481 int ret; 1482 1483 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1484 return 0; 1485 1486 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { 1487 /* We never added this block group to the free space tree. */ 1488 return 0; 1489 } 1490 1491 path = btrfs_alloc_path(); 1492 if (!path) { 1493 ret = -ENOMEM; 1494 goto out; 1495 } 1496 1497 start = block_group->start; 1498 end = block_group->start + block_group->length; 1499 1500 key.objectid = end - 1; 1501 key.type = (u8)-1; 1502 key.offset = (u64)-1; 1503 1504 while (!done) { 1505 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 1506 if (ret) 1507 goto out; 1508 1509 leaf = path->nodes[0]; 1510 nr = 0; 1511 path->slots[0]++; 1512 while (path->slots[0] > 0) { 1513 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 1514 1515 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 1516 ASSERT(found_key.objectid == block_group->start); 1517 ASSERT(found_key.offset == block_group->length); 1518 done = 1; 1519 nr++; 1520 path->slots[0]--; 1521 break; 1522 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY || 1523 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 1524 ASSERT(found_key.objectid >= start); 1525 ASSERT(found_key.objectid < end); 1526 ASSERT(found_key.objectid + found_key.offset <= end); 1527 nr++; 1528 path->slots[0]--; 1529 } else { 1530 ASSERT(0); 1531 } 1532 } 1533 1534 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 1535 if (ret) 1536 goto out; 1537 btrfs_release_path(path); 1538 } 1539 1540 ret = 0; 1541 out: 1542 btrfs_free_path(path); 1543 if (ret) 1544 btrfs_abort_transaction(trans, ret); 1545 return ret; 1546 } 1547 1548 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, 1549 struct btrfs_path *path, 1550 u32 expected_extent_count) 1551 { 1552 struct btrfs_block_group *block_group; 1553 struct btrfs_fs_info *fs_info; 1554 struct btrfs_root *root; 1555 struct btrfs_key key; 1556 int prev_bit = 0, bit; 1557 /* Initialize to silence GCC. */ 1558 u64 extent_start = 0; 1559 u64 end, offset; 1560 u64 total_found = 0; 1561 u32 extent_count = 0; 1562 int ret; 1563 1564 block_group = caching_ctl->block_group; 1565 fs_info = block_group->fs_info; 1566 root = btrfs_free_space_root(block_group); 1567 1568 end = block_group->start + block_group->length; 1569 1570 while (1) { 1571 ret = btrfs_next_item(root, path); 1572 if (ret < 0) 1573 goto out; 1574 if (ret) 1575 break; 1576 1577 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1578 1579 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1580 break; 1581 1582 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 1583 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1584 1585 offset = key.objectid; 1586 while (offset < key.objectid + key.offset) { 1587 bit = free_space_test_bit(block_group, path, offset); 1588 if (prev_bit == 0 && bit == 1) { 1589 extent_start = offset; 1590 } else if (prev_bit == 1 && bit == 0) { 1591 u64 space_added; 1592 1593 ret = btrfs_add_new_free_space(block_group, 1594 extent_start, 1595 offset, 1596 &space_added); 1597 if (ret) 1598 goto out; 1599 total_found += space_added; 1600 if (total_found > CACHING_CTL_WAKE_UP) { 1601 total_found = 0; 1602 wake_up(&caching_ctl->wait); 1603 } 1604 extent_count++; 1605 } 1606 prev_bit = bit; 1607 offset += fs_info->sectorsize; 1608 } 1609 } 1610 if (prev_bit == 1) { 1611 ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL); 1612 if (ret) 1613 goto out; 1614 extent_count++; 1615 } 1616 1617 if (extent_count != expected_extent_count) { 1618 btrfs_err(fs_info, 1619 "incorrect extent count for %llu; counted %u, expected %u", 1620 block_group->start, extent_count, 1621 expected_extent_count); 1622 DEBUG_WARN(); 1623 ret = -EIO; 1624 goto out; 1625 } 1626 1627 ret = 0; 1628 out: 1629 return ret; 1630 } 1631 1632 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, 1633 struct btrfs_path *path, 1634 u32 expected_extent_count) 1635 { 1636 struct btrfs_block_group *block_group; 1637 struct btrfs_fs_info *fs_info; 1638 struct btrfs_root *root; 1639 struct btrfs_key key; 1640 u64 end; 1641 u64 total_found = 0; 1642 u32 extent_count = 0; 1643 int ret; 1644 1645 block_group = caching_ctl->block_group; 1646 fs_info = block_group->fs_info; 1647 root = btrfs_free_space_root(block_group); 1648 1649 end = block_group->start + block_group->length; 1650 1651 while (1) { 1652 u64 space_added; 1653 1654 ret = btrfs_next_item(root, path); 1655 if (ret < 0) 1656 goto out; 1657 if (ret) 1658 break; 1659 1660 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1661 1662 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1663 break; 1664 1665 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 1666 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1667 1668 ret = btrfs_add_new_free_space(block_group, key.objectid, 1669 key.objectid + key.offset, 1670 &space_added); 1671 if (ret) 1672 goto out; 1673 total_found += space_added; 1674 if (total_found > CACHING_CTL_WAKE_UP) { 1675 total_found = 0; 1676 wake_up(&caching_ctl->wait); 1677 } 1678 extent_count++; 1679 } 1680 1681 if (extent_count != expected_extent_count) { 1682 btrfs_err(fs_info, 1683 "incorrect extent count for %llu; counted %u, expected %u", 1684 block_group->start, extent_count, 1685 expected_extent_count); 1686 DEBUG_WARN(); 1687 ret = -EIO; 1688 goto out; 1689 } 1690 1691 ret = 0; 1692 out: 1693 return ret; 1694 } 1695 1696 int load_free_space_tree(struct btrfs_caching_control *caching_ctl) 1697 { 1698 struct btrfs_block_group *block_group; 1699 struct btrfs_free_space_info *info; 1700 BTRFS_PATH_AUTO_FREE(path); 1701 u32 extent_count, flags; 1702 1703 block_group = caching_ctl->block_group; 1704 1705 path = btrfs_alloc_path(); 1706 if (!path) 1707 return -ENOMEM; 1708 1709 /* 1710 * Just like caching_thread() doesn't want to deadlock on the extent 1711 * tree, we don't want to deadlock on the free space tree. 1712 */ 1713 path->skip_locking = 1; 1714 path->search_commit_root = 1; 1715 path->reada = READA_FORWARD; 1716 1717 info = search_free_space_info(NULL, block_group, path, 0); 1718 if (IS_ERR(info)) 1719 return PTR_ERR(info); 1720 1721 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 1722 flags = btrfs_free_space_flags(path->nodes[0], info); 1723 1724 /* 1725 * We left path pointing to the free space info item, so now 1726 * load_free_space_foo can just iterate through the free space tree from 1727 * there. 1728 */ 1729 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) 1730 return load_free_space_bitmaps(caching_ctl, path, extent_count); 1731 else 1732 return load_free_space_extents(caching_ctl, path, extent_count); 1733 } 1734