1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2015 Facebook. All rights reserved. 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/sched/mm.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "free-space-tree.h" 13 #include "transaction.h" 14 #include "block-group.h" 15 #include "fs.h" 16 #include "accessors.h" 17 #include "extent-tree.h" 18 #include "root-tree.h" 19 20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 21 struct btrfs_block_group *block_group, 22 struct btrfs_path *path); 23 24 static struct btrfs_root *btrfs_free_space_root( 25 struct btrfs_block_group *block_group) 26 { 27 struct btrfs_key key = { 28 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 29 .type = BTRFS_ROOT_ITEM_KEY, 30 .offset = 0, 31 }; 32 33 if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2)) 34 key.offset = block_group->global_root_id; 35 return btrfs_global_root(block_group->fs_info, &key); 36 } 37 38 void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache) 39 { 40 u32 bitmap_range; 41 size_t bitmap_size; 42 u64 num_bitmaps, total_bitmap_size; 43 44 if (WARN_ON(cache->length == 0)) 45 btrfs_warn(cache->fs_info, "block group %llu length is zero", 46 cache->start); 47 48 /* 49 * We convert to bitmaps when the disk space required for using extents 50 * exceeds that required for using bitmaps. 51 */ 52 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 53 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range); 54 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE; 55 total_bitmap_size = num_bitmaps * bitmap_size; 56 cache->bitmap_high_thresh = div_u64(total_bitmap_size, 57 sizeof(struct btrfs_item)); 58 59 /* 60 * We allow for a small buffer between the high threshold and low 61 * threshold to avoid thrashing back and forth between the two formats. 62 */ 63 if (cache->bitmap_high_thresh > 100) 64 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100; 65 else 66 cache->bitmap_low_thresh = 0; 67 } 68 69 static int add_new_free_space_info(struct btrfs_trans_handle *trans, 70 struct btrfs_block_group *block_group, 71 struct btrfs_path *path) 72 { 73 struct btrfs_root *root = btrfs_free_space_root(block_group); 74 struct btrfs_free_space_info *info; 75 struct btrfs_key key; 76 struct extent_buffer *leaf; 77 int ret; 78 79 key.objectid = block_group->start; 80 key.type = BTRFS_FREE_SPACE_INFO_KEY; 81 key.offset = block_group->length; 82 83 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info)); 84 if (ret) 85 return ret; 86 87 leaf = path->nodes[0]; 88 info = btrfs_item_ptr(leaf, path->slots[0], 89 struct btrfs_free_space_info); 90 btrfs_set_free_space_extent_count(leaf, info, 0); 91 btrfs_set_free_space_flags(leaf, info, 0); 92 btrfs_release_path(path); 93 return 0; 94 } 95 96 EXPORT_FOR_TESTS 97 struct btrfs_free_space_info *btrfs_search_free_space_info( 98 struct btrfs_trans_handle *trans, 99 struct btrfs_block_group *block_group, 100 struct btrfs_path *path, int cow) 101 { 102 struct btrfs_fs_info *fs_info = block_group->fs_info; 103 struct btrfs_root *root = btrfs_free_space_root(block_group); 104 struct btrfs_key key; 105 int ret; 106 107 key.objectid = block_group->start; 108 key.type = BTRFS_FREE_SPACE_INFO_KEY; 109 key.offset = block_group->length; 110 111 ret = btrfs_search_slot(trans, root, &key, path, 0, cow); 112 if (ret < 0) 113 return ERR_PTR(ret); 114 if (ret != 0) { 115 btrfs_warn(fs_info, "missing free space info for %llu", 116 block_group->start); 117 DEBUG_WARN(); 118 return ERR_PTR(-ENOENT); 119 } 120 121 return btrfs_item_ptr(path->nodes[0], path->slots[0], 122 struct btrfs_free_space_info); 123 } 124 125 /* 126 * btrfs_search_slot() but we're looking for the greatest key less than the 127 * passed key. 128 */ 129 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans, 130 struct btrfs_root *root, 131 struct btrfs_key *key, struct btrfs_path *p, 132 int ins_len, int cow) 133 { 134 int ret; 135 136 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow); 137 if (ret < 0) 138 return ret; 139 140 if (unlikely(ret == 0)) { 141 DEBUG_WARN(); 142 return -EIO; 143 } 144 145 if (unlikely(p->slots[0] == 0)) { 146 DEBUG_WARN("no previous slot found"); 147 return -EIO; 148 } 149 p->slots[0]--; 150 151 return 0; 152 } 153 154 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info, 155 u64 size) 156 { 157 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE); 158 } 159 160 static unsigned long *alloc_bitmap(u32 bitmap_size) 161 { 162 unsigned long *ret; 163 unsigned int nofs_flag; 164 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long)); 165 166 /* 167 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse 168 * into the filesystem as the free space bitmap can be modified in the 169 * critical section of a transaction commit. 170 * 171 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we 172 * know that recursion is unsafe. 173 */ 174 nofs_flag = memalloc_nofs_save(); 175 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL); 176 memalloc_nofs_restore(nofs_flag); 177 return ret; 178 } 179 180 static void le_bitmap_set(unsigned long *map, unsigned int start, int len) 181 { 182 u8 *p = ((u8 *)map) + BIT_BYTE(start); 183 const unsigned int size = start + len; 184 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE); 185 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start); 186 187 while (len - bits_to_set >= 0) { 188 *p |= mask_to_set; 189 len -= bits_to_set; 190 bits_to_set = BITS_PER_BYTE; 191 mask_to_set = ~0; 192 p++; 193 } 194 if (len) { 195 mask_to_set &= BITMAP_LAST_BYTE_MASK(size); 196 *p |= mask_to_set; 197 } 198 } 199 200 EXPORT_FOR_TESTS 201 int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, 202 struct btrfs_block_group *block_group, 203 struct btrfs_path *path) 204 { 205 struct btrfs_fs_info *fs_info = trans->fs_info; 206 struct btrfs_root *root = btrfs_free_space_root(block_group); 207 struct btrfs_free_space_info *info; 208 struct btrfs_key key, found_key; 209 struct extent_buffer *leaf; 210 unsigned long *bitmap; 211 char *bitmap_cursor; 212 u64 start, end; 213 u64 bitmap_range, i; 214 u32 bitmap_size, flags, expected_extent_count; 215 u32 extent_count = 0; 216 int done = 0, nr; 217 int ret; 218 219 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 220 bitmap = alloc_bitmap(bitmap_size); 221 if (unlikely(!bitmap)) { 222 ret = -ENOMEM; 223 btrfs_abort_transaction(trans, ret); 224 goto out; 225 } 226 227 start = block_group->start; 228 end = block_group->start + block_group->length; 229 230 key.objectid = end - 1; 231 key.type = (u8)-1; 232 key.offset = (u64)-1; 233 234 while (!done) { 235 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 236 if (unlikely(ret)) { 237 btrfs_abort_transaction(trans, ret); 238 goto out; 239 } 240 241 leaf = path->nodes[0]; 242 nr = 0; 243 path->slots[0]++; 244 while (path->slots[0] > 0) { 245 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 246 247 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 248 ASSERT(found_key.objectid == block_group->start); 249 ASSERT(found_key.offset == block_group->length); 250 done = 1; 251 break; 252 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) { 253 u64 first, last; 254 255 ASSERT(found_key.objectid >= start); 256 ASSERT(found_key.objectid < end); 257 ASSERT(found_key.objectid + found_key.offset <= end); 258 259 first = div_u64(found_key.objectid - start, 260 fs_info->sectorsize); 261 last = div_u64(found_key.objectid + found_key.offset - start, 262 fs_info->sectorsize); 263 le_bitmap_set(bitmap, first, last - first); 264 265 extent_count++; 266 nr++; 267 path->slots[0]--; 268 } else { 269 ASSERT(0); 270 } 271 } 272 273 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 274 if (unlikely(ret)) { 275 btrfs_abort_transaction(trans, ret); 276 goto out; 277 } 278 btrfs_release_path(path); 279 } 280 281 info = btrfs_search_free_space_info(trans, block_group, path, 1); 282 if (IS_ERR(info)) { 283 ret = PTR_ERR(info); 284 btrfs_abort_transaction(trans, ret); 285 goto out; 286 } 287 leaf = path->nodes[0]; 288 flags = btrfs_free_space_flags(leaf, info); 289 flags |= BTRFS_FREE_SPACE_USING_BITMAPS; 290 block_group->using_free_space_bitmaps = true; 291 block_group->using_free_space_bitmaps_cached = true; 292 btrfs_set_free_space_flags(leaf, info, flags); 293 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 294 btrfs_release_path(path); 295 296 if (unlikely(extent_count != expected_extent_count)) { 297 btrfs_err(fs_info, 298 "incorrect extent count for %llu; counted %u, expected %u", 299 block_group->start, extent_count, 300 expected_extent_count); 301 ret = -EIO; 302 btrfs_abort_transaction(trans, ret); 303 goto out; 304 } 305 306 bitmap_cursor = (char *)bitmap; 307 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 308 i = start; 309 while (i < end) { 310 unsigned long ptr; 311 u64 extent_size; 312 u32 data_size; 313 314 extent_size = min(end - i, bitmap_range); 315 data_size = free_space_bitmap_size(fs_info, extent_size); 316 317 key.objectid = i; 318 key.type = BTRFS_FREE_SPACE_BITMAP_KEY; 319 key.offset = extent_size; 320 321 ret = btrfs_insert_empty_item(trans, root, path, &key, 322 data_size); 323 if (unlikely(ret)) { 324 btrfs_abort_transaction(trans, ret); 325 goto out; 326 } 327 328 leaf = path->nodes[0]; 329 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 330 write_extent_buffer(leaf, bitmap_cursor, ptr, 331 data_size); 332 btrfs_release_path(path); 333 334 i += extent_size; 335 bitmap_cursor += data_size; 336 } 337 338 ret = 0; 339 out: 340 kvfree(bitmap); 341 return ret; 342 } 343 344 EXPORT_FOR_TESTS 345 int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans, 346 struct btrfs_block_group *block_group, 347 struct btrfs_path *path) 348 { 349 struct btrfs_fs_info *fs_info = trans->fs_info; 350 struct btrfs_root *root = btrfs_free_space_root(block_group); 351 struct btrfs_free_space_info *info; 352 struct btrfs_key key, found_key; 353 struct extent_buffer *leaf; 354 unsigned long *bitmap; 355 u64 start, end; 356 u32 bitmap_size, flags, expected_extent_count; 357 unsigned long nrbits, start_bit, end_bit; 358 u32 extent_count = 0; 359 int done = 0, nr; 360 int ret; 361 362 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 363 bitmap = alloc_bitmap(bitmap_size); 364 if (unlikely(!bitmap)) { 365 ret = -ENOMEM; 366 btrfs_abort_transaction(trans, ret); 367 goto out; 368 } 369 370 start = block_group->start; 371 end = block_group->start + block_group->length; 372 373 key.objectid = end - 1; 374 key.type = (u8)-1; 375 key.offset = (u64)-1; 376 377 while (!done) { 378 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 379 if (unlikely(ret)) { 380 btrfs_abort_transaction(trans, ret); 381 goto out; 382 } 383 384 leaf = path->nodes[0]; 385 nr = 0; 386 path->slots[0]++; 387 while (path->slots[0] > 0) { 388 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 389 390 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 391 ASSERT(found_key.objectid == block_group->start); 392 ASSERT(found_key.offset == block_group->length); 393 done = 1; 394 break; 395 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 396 unsigned long ptr; 397 char *bitmap_cursor; 398 u32 bitmap_pos, data_size; 399 400 ASSERT(found_key.objectid >= start); 401 ASSERT(found_key.objectid < end); 402 ASSERT(found_key.objectid + found_key.offset <= end); 403 404 bitmap_pos = div_u64(found_key.objectid - start, 405 fs_info->sectorsize * 406 BITS_PER_BYTE); 407 bitmap_cursor = ((char *)bitmap) + bitmap_pos; 408 data_size = free_space_bitmap_size(fs_info, 409 found_key.offset); 410 411 path->slots[0]--; 412 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 413 read_extent_buffer(leaf, bitmap_cursor, ptr, 414 data_size); 415 416 nr++; 417 } else { 418 ASSERT(0); 419 } 420 } 421 422 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 423 if (unlikely(ret)) { 424 btrfs_abort_transaction(trans, ret); 425 goto out; 426 } 427 btrfs_release_path(path); 428 } 429 430 info = btrfs_search_free_space_info(trans, block_group, path, 1); 431 if (IS_ERR(info)) { 432 ret = PTR_ERR(info); 433 btrfs_abort_transaction(trans, ret); 434 goto out; 435 } 436 leaf = path->nodes[0]; 437 flags = btrfs_free_space_flags(leaf, info); 438 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; 439 block_group->using_free_space_bitmaps = false; 440 block_group->using_free_space_bitmaps_cached = true; 441 btrfs_set_free_space_flags(leaf, info, flags); 442 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 443 btrfs_release_path(path); 444 445 nrbits = block_group->length >> fs_info->sectorsize_bits; 446 start_bit = find_next_bit_le(bitmap, nrbits, 0); 447 448 while (start_bit < nrbits) { 449 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit); 450 ASSERT(start_bit < end_bit); 451 452 key.objectid = start + start_bit * fs_info->sectorsize; 453 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 454 key.offset = (end_bit - start_bit) * fs_info->sectorsize; 455 456 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 457 if (unlikely(ret)) { 458 btrfs_abort_transaction(trans, ret); 459 goto out; 460 } 461 btrfs_release_path(path); 462 463 extent_count++; 464 465 start_bit = find_next_bit_le(bitmap, nrbits, end_bit); 466 } 467 468 if (unlikely(extent_count != expected_extent_count)) { 469 btrfs_err(fs_info, 470 "incorrect extent count for %llu; counted %u, expected %u", 471 block_group->start, extent_count, 472 expected_extent_count); 473 ret = -EIO; 474 btrfs_abort_transaction(trans, ret); 475 goto out; 476 } 477 478 ret = 0; 479 out: 480 kvfree(bitmap); 481 return ret; 482 } 483 484 static int update_free_space_extent_count(struct btrfs_trans_handle *trans, 485 struct btrfs_block_group *block_group, 486 struct btrfs_path *path, 487 int new_extents) 488 { 489 struct btrfs_free_space_info *info; 490 u32 flags; 491 u32 extent_count; 492 int ret = 0; 493 494 if (new_extents == 0) 495 return 0; 496 497 info = btrfs_search_free_space_info(trans, block_group, path, 1); 498 if (IS_ERR(info)) 499 return PTR_ERR(info); 500 501 flags = btrfs_free_space_flags(path->nodes[0], info); 502 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 503 504 extent_count += new_extents; 505 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count); 506 btrfs_release_path(path); 507 508 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 509 extent_count > block_group->bitmap_high_thresh) { 510 ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path); 511 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 512 extent_count < block_group->bitmap_low_thresh) { 513 ret = btrfs_convert_free_space_to_extents(trans, block_group, path); 514 } 515 516 return ret; 517 } 518 519 EXPORT_FOR_TESTS 520 bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group, 521 struct btrfs_path *path, u64 offset) 522 { 523 struct extent_buffer *leaf; 524 struct btrfs_key key; 525 u64 found_start, found_end; 526 unsigned long ptr, i; 527 528 leaf = path->nodes[0]; 529 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 530 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 531 532 found_start = key.objectid; 533 found_end = key.objectid + key.offset; 534 ASSERT(offset >= found_start && offset < found_end); 535 536 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 537 i = div_u64(offset - found_start, 538 block_group->fs_info->sectorsize); 539 return extent_buffer_test_bit(leaf, ptr, i); 540 } 541 542 static void free_space_modify_bits(struct btrfs_trans_handle *trans, 543 struct btrfs_block_group *block_group, 544 struct btrfs_path *path, u64 *start, u64 *size, 545 bool set_bits) 546 { 547 struct btrfs_fs_info *fs_info = block_group->fs_info; 548 struct extent_buffer *leaf; 549 struct btrfs_key key; 550 u64 end = *start + *size; 551 u64 found_start, found_end; 552 unsigned long ptr, first, last; 553 554 leaf = path->nodes[0]; 555 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 556 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 557 558 found_start = key.objectid; 559 found_end = key.objectid + key.offset; 560 ASSERT(*start >= found_start && *start < found_end); 561 ASSERT(end > found_start); 562 563 if (end > found_end) 564 end = found_end; 565 566 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 567 first = (*start - found_start) >> fs_info->sectorsize_bits; 568 last = (end - found_start) >> fs_info->sectorsize_bits; 569 if (set_bits) 570 extent_buffer_bitmap_set(leaf, ptr, first, last - first); 571 else 572 extent_buffer_bitmap_clear(leaf, ptr, first, last - first); 573 btrfs_mark_buffer_dirty(trans, leaf); 574 575 *size -= end - *start; 576 *start = end; 577 } 578 579 /* 580 * We can't use btrfs_next_item() in modify_free_space_bitmap() because 581 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy 582 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're 583 * looking for. 584 */ 585 static int free_space_next_bitmap(struct btrfs_trans_handle *trans, 586 struct btrfs_root *root, struct btrfs_path *p) 587 { 588 struct btrfs_key key; 589 590 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) { 591 p->slots[0]++; 592 return 0; 593 } 594 595 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]); 596 btrfs_release_path(p); 597 598 key.objectid += key.offset; 599 key.type = (u8)-1; 600 key.offset = (u64)-1; 601 602 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1); 603 } 604 605 /* 606 * If remove is 1, then we are removing free space, thus clearing bits in the 607 * bitmap. If remove is 0, then we are adding free space, thus setting bits in 608 * the bitmap. 609 */ 610 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, 611 struct btrfs_block_group *block_group, 612 struct btrfs_path *path, 613 u64 start, u64 size, bool remove) 614 { 615 struct btrfs_root *root = btrfs_free_space_root(block_group); 616 struct btrfs_key key; 617 u64 end = start + size; 618 u64 cur_start, cur_size; 619 bool prev_bit_set = false; 620 bool next_bit_set = false; 621 int new_extents; 622 int ret; 623 624 /* 625 * Read the bit for the block immediately before the extent of space if 626 * that block is within the block group. 627 */ 628 if (start > block_group->start) { 629 u64 prev_block = start - block_group->fs_info->sectorsize; 630 631 key.objectid = prev_block; 632 key.type = (u8)-1; 633 key.offset = (u64)-1; 634 635 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 636 if (ret) 637 return ret; 638 639 prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block); 640 641 /* The previous block may have been in the previous bitmap. */ 642 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 643 if (start >= key.objectid + key.offset) { 644 ret = free_space_next_bitmap(trans, root, path); 645 if (ret) 646 return ret; 647 } 648 } else { 649 key.objectid = start; 650 key.type = (u8)-1; 651 key.offset = (u64)-1; 652 653 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 654 if (ret) 655 return ret; 656 } 657 658 /* 659 * Iterate over all of the bitmaps overlapped by the extent of space, 660 * clearing/setting bits as required. 661 */ 662 cur_start = start; 663 cur_size = size; 664 while (1) { 665 free_space_modify_bits(trans, block_group, path, &cur_start, 666 &cur_size, !remove); 667 if (cur_size == 0) 668 break; 669 ret = free_space_next_bitmap(trans, root, path); 670 if (ret) 671 return ret; 672 } 673 674 /* 675 * Read the bit for the block immediately after the extent of space if 676 * that block is within the block group. 677 */ 678 if (end < block_group->start + block_group->length) { 679 /* The next block may be in the next bitmap. */ 680 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 681 if (end >= key.objectid + key.offset) { 682 ret = free_space_next_bitmap(trans, root, path); 683 if (ret) 684 return ret; 685 } 686 687 next_bit_set = btrfs_free_space_test_bit(block_group, path, end); 688 } 689 690 if (remove) { 691 new_extents = -1; 692 if (prev_bit_set) { 693 /* Leftover on the left. */ 694 new_extents++; 695 } 696 if (next_bit_set) { 697 /* Leftover on the right. */ 698 new_extents++; 699 } 700 } else { 701 new_extents = 1; 702 if (prev_bit_set) { 703 /* Merging with neighbor on the left. */ 704 new_extents--; 705 } 706 if (next_bit_set) { 707 /* Merging with neighbor on the right. */ 708 new_extents--; 709 } 710 } 711 712 btrfs_release_path(path); 713 return update_free_space_extent_count(trans, block_group, path, new_extents); 714 } 715 716 static int remove_free_space_extent(struct btrfs_trans_handle *trans, 717 struct btrfs_block_group *block_group, 718 struct btrfs_path *path, 719 u64 start, u64 size) 720 { 721 struct btrfs_root *root = btrfs_free_space_root(block_group); 722 struct btrfs_key key; 723 u64 found_start, found_end; 724 u64 end = start + size; 725 int new_extents = -1; 726 int ret; 727 728 key.objectid = start; 729 key.type = (u8)-1; 730 key.offset = (u64)-1; 731 732 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 733 if (ret) 734 return ret; 735 736 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 737 738 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 739 740 found_start = key.objectid; 741 found_end = key.objectid + key.offset; 742 ASSERT(start >= found_start && end <= found_end); 743 744 /* 745 * Okay, now that we've found the free space extent which contains the 746 * free space that we are removing, there are four cases: 747 * 748 * 1. We're using the whole extent: delete the key we found and 749 * decrement the free space extent count. 750 * 2. We are using part of the extent starting at the beginning: delete 751 * the key we found and insert a new key representing the leftover at 752 * the end. There is no net change in the number of extents. 753 * 3. We are using part of the extent ending at the end: delete the key 754 * we found and insert a new key representing the leftover at the 755 * beginning. There is no net change in the number of extents. 756 * 4. We are using part of the extent in the middle: delete the key we 757 * found and insert two new keys representing the leftovers on each 758 * side. Where we used to have one extent, we now have two, so increment 759 * the extent count. We may need to convert the block group to bitmaps 760 * as a result. 761 */ 762 763 /* Delete the existing key (cases 1-4). */ 764 ret = btrfs_del_item(trans, root, path); 765 if (ret) 766 return ret; 767 768 /* Add a key for leftovers at the beginning (cases 3 and 4). */ 769 if (start > found_start) { 770 key.objectid = found_start; 771 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 772 key.offset = start - found_start; 773 774 btrfs_release_path(path); 775 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 776 if (ret) 777 return ret; 778 new_extents++; 779 } 780 781 /* Add a key for leftovers at the end (cases 2 and 4). */ 782 if (end < found_end) { 783 key.objectid = end; 784 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 785 key.offset = found_end - end; 786 787 btrfs_release_path(path); 788 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 789 if (ret) 790 return ret; 791 new_extents++; 792 } 793 794 btrfs_release_path(path); 795 return update_free_space_extent_count(trans, block_group, path, new_extents); 796 } 797 798 static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path) 799 { 800 struct btrfs_free_space_info *info; 801 u32 flags; 802 803 if (bg->using_free_space_bitmaps_cached) 804 return bg->using_free_space_bitmaps; 805 806 info = btrfs_search_free_space_info(NULL, bg, path, 0); 807 if (IS_ERR(info)) 808 return PTR_ERR(info); 809 flags = btrfs_free_space_flags(path->nodes[0], info); 810 btrfs_release_path(path); 811 812 bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS); 813 bg->using_free_space_bitmaps_cached = true; 814 815 return bg->using_free_space_bitmaps; 816 } 817 818 EXPORT_FOR_TESTS 819 int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 820 struct btrfs_block_group *block_group, 821 struct btrfs_path *path, u64 start, u64 size) 822 { 823 int ret; 824 825 ret = __add_block_group_free_space(trans, block_group, path); 826 if (ret) 827 return ret; 828 829 ret = using_bitmaps(block_group, path); 830 if (ret < 0) 831 return ret; 832 833 if (ret) 834 return modify_free_space_bitmap(trans, block_group, path, 835 start, size, true); 836 837 return remove_free_space_extent(trans, block_group, path, start, size); 838 } 839 840 int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 841 u64 start, u64 size) 842 { 843 struct btrfs_block_group *block_group; 844 struct btrfs_path *path; 845 int ret; 846 847 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 848 return 0; 849 850 path = btrfs_alloc_path(); 851 if (unlikely(!path)) { 852 ret = -ENOMEM; 853 btrfs_abort_transaction(trans, ret); 854 goto out; 855 } 856 857 block_group = btrfs_lookup_block_group(trans->fs_info, start); 858 if (unlikely(!block_group)) { 859 DEBUG_WARN("no block group found for start=%llu", start); 860 ret = -ENOENT; 861 btrfs_abort_transaction(trans, ret); 862 goto out; 863 } 864 865 mutex_lock(&block_group->free_space_lock); 866 ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size); 867 mutex_unlock(&block_group->free_space_lock); 868 if (ret) 869 btrfs_abort_transaction(trans, ret); 870 871 btrfs_put_block_group(block_group); 872 out: 873 btrfs_free_path(path); 874 return ret; 875 } 876 877 static int add_free_space_extent(struct btrfs_trans_handle *trans, 878 struct btrfs_block_group *block_group, 879 struct btrfs_path *path, 880 u64 start, u64 size) 881 { 882 struct btrfs_root *root = btrfs_free_space_root(block_group); 883 struct btrfs_key key, new_key; 884 u64 found_start, found_end; 885 u64 end = start + size; 886 int new_extents = 1; 887 int ret; 888 889 /* 890 * We are adding a new extent of free space, but we need to merge 891 * extents. There are four cases here: 892 * 893 * 1. The new extent does not have any immediate neighbors to merge 894 * with: add the new key and increment the free space extent count. We 895 * may need to convert the block group to bitmaps as a result. 896 * 2. The new extent has an immediate neighbor before it: remove the 897 * previous key and insert a new key combining both of them. There is no 898 * net change in the number of extents. 899 * 3. The new extent has an immediate neighbor after it: remove the next 900 * key and insert a new key combining both of them. There is no net 901 * change in the number of extents. 902 * 4. The new extent has immediate neighbors on both sides: remove both 903 * of the keys and insert a new key combining all of them. Where we used 904 * to have two extents, we now have one, so decrement the extent count. 905 */ 906 907 new_key.objectid = start; 908 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 909 new_key.offset = size; 910 911 /* Search for a neighbor on the left. */ 912 if (start == block_group->start) 913 goto right; 914 key.objectid = start - 1; 915 key.type = (u8)-1; 916 key.offset = (u64)-1; 917 918 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 919 if (ret) 920 return ret; 921 922 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 923 924 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 925 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 926 btrfs_release_path(path); 927 goto right; 928 } 929 930 found_start = key.objectid; 931 found_end = key.objectid + key.offset; 932 ASSERT(found_start >= block_group->start && 933 found_end > block_group->start); 934 ASSERT(found_start < start && found_end <= start); 935 936 /* 937 * Delete the neighbor on the left and absorb it into the new key (cases 938 * 2 and 4). 939 */ 940 if (found_end == start) { 941 ret = btrfs_del_item(trans, root, path); 942 if (ret) 943 return ret; 944 new_key.objectid = found_start; 945 new_key.offset += key.offset; 946 new_extents--; 947 } 948 btrfs_release_path(path); 949 950 right: 951 /* Search for a neighbor on the right. */ 952 if (end == block_group->start + block_group->length) 953 goto insert; 954 key.objectid = end; 955 key.type = (u8)-1; 956 key.offset = (u64)-1; 957 958 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 959 if (ret) 960 return ret; 961 962 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 963 964 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 965 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 966 btrfs_release_path(path); 967 goto insert; 968 } 969 970 found_start = key.objectid; 971 found_end = key.objectid + key.offset; 972 ASSERT(found_start >= block_group->start && 973 found_end > block_group->start); 974 ASSERT((found_start < start && found_end <= start) || 975 (found_start >= end && found_end > end)); 976 977 /* 978 * Delete the neighbor on the right and absorb it into the new key 979 * (cases 3 and 4). 980 */ 981 if (found_start == end) { 982 ret = btrfs_del_item(trans, root, path); 983 if (ret) 984 return ret; 985 new_key.offset += key.offset; 986 new_extents--; 987 } 988 btrfs_release_path(path); 989 990 insert: 991 /* Insert the new key (cases 1-4). */ 992 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0); 993 if (ret) 994 return ret; 995 996 btrfs_release_path(path); 997 return update_free_space_extent_count(trans, block_group, path, new_extents); 998 } 999 1000 EXPORT_FOR_TESTS 1001 int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 1002 struct btrfs_block_group *block_group, 1003 struct btrfs_path *path, u64 start, u64 size) 1004 { 1005 int ret; 1006 1007 ret = __add_block_group_free_space(trans, block_group, path); 1008 if (ret) 1009 return ret; 1010 1011 ret = using_bitmaps(block_group, path); 1012 if (ret < 0) 1013 return ret; 1014 1015 if (ret) 1016 return modify_free_space_bitmap(trans, block_group, path, 1017 start, size, false); 1018 1019 return add_free_space_extent(trans, block_group, path, start, size); 1020 } 1021 1022 int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 1023 u64 start, u64 size) 1024 { 1025 struct btrfs_block_group *block_group; 1026 struct btrfs_path *path; 1027 int ret; 1028 1029 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1030 return 0; 1031 1032 path = btrfs_alloc_path(); 1033 if (unlikely(!path)) { 1034 ret = -ENOMEM; 1035 btrfs_abort_transaction(trans, ret); 1036 goto out; 1037 } 1038 1039 block_group = btrfs_lookup_block_group(trans->fs_info, start); 1040 if (unlikely(!block_group)) { 1041 DEBUG_WARN("no block group found for start=%llu", start); 1042 ret = -ENOENT; 1043 btrfs_abort_transaction(trans, ret); 1044 goto out; 1045 } 1046 1047 mutex_lock(&block_group->free_space_lock); 1048 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size); 1049 mutex_unlock(&block_group->free_space_lock); 1050 if (ret) 1051 btrfs_abort_transaction(trans, ret); 1052 1053 btrfs_put_block_group(block_group); 1054 out: 1055 btrfs_free_path(path); 1056 return ret; 1057 } 1058 1059 /* 1060 * Populate the free space tree by walking the extent tree. Operations on the 1061 * extent tree that happen as a result of writes to the free space tree will go 1062 * through the normal add/remove hooks. 1063 */ 1064 static int populate_free_space_tree(struct btrfs_trans_handle *trans, 1065 struct btrfs_block_group *block_group) 1066 { 1067 struct btrfs_root *extent_root; 1068 BTRFS_PATH_AUTO_FREE(path); 1069 BTRFS_PATH_AUTO_FREE(path2); 1070 struct btrfs_key key; 1071 u64 start, end; 1072 int ret; 1073 1074 path = btrfs_alloc_path(); 1075 if (!path) 1076 return -ENOMEM; 1077 1078 path2 = btrfs_alloc_path(); 1079 if (!path2) 1080 return -ENOMEM; 1081 1082 path->reada = READA_FORWARD; 1083 1084 ret = add_new_free_space_info(trans, block_group, path2); 1085 if (ret) 1086 return ret; 1087 1088 mutex_lock(&block_group->free_space_lock); 1089 1090 /* 1091 * Iterate through all of the extent and metadata items in this block 1092 * group, adding the free space between them and the free space at the 1093 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than 1094 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's 1095 * contained in. 1096 */ 1097 key.objectid = block_group->start; 1098 key.type = BTRFS_EXTENT_ITEM_KEY; 1099 key.offset = 0; 1100 1101 extent_root = btrfs_extent_root(trans->fs_info, key.objectid); 1102 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); 1103 if (ret < 0) 1104 goto out_locked; 1105 /* 1106 * If ret is 1 (no key found), it means this is an empty block group, 1107 * without any extents allocated from it and there's no block group 1108 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree 1109 * because we are using the block group tree feature (so block group 1110 * items are stored in the block group tree) or this is a new block 1111 * group created in the current transaction and its block group item 1112 * was not yet inserted in the extent tree (that happens in 1113 * btrfs_create_pending_block_groups() -> insert_block_group_item()). 1114 * It also means there are no extents allocated for block groups with a 1115 * start offset beyond this block group's end offset (this is the last, 1116 * highest, block group). 1117 */ 1118 start = block_group->start; 1119 end = block_group->start + block_group->length; 1120 while (ret == 0) { 1121 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1122 1123 if (key.type == BTRFS_EXTENT_ITEM_KEY || 1124 key.type == BTRFS_METADATA_ITEM_KEY) { 1125 if (key.objectid >= end) 1126 break; 1127 1128 if (start < key.objectid) { 1129 ret = __btrfs_add_to_free_space_tree(trans, 1130 block_group, 1131 path2, start, 1132 key.objectid - 1133 start); 1134 if (ret) 1135 goto out_locked; 1136 } 1137 start = key.objectid; 1138 if (key.type == BTRFS_METADATA_ITEM_KEY) 1139 start += trans->fs_info->nodesize; 1140 else 1141 start += key.offset; 1142 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 1143 if (key.objectid != block_group->start) 1144 break; 1145 } 1146 1147 ret = btrfs_next_item(extent_root, path); 1148 if (ret < 0) 1149 goto out_locked; 1150 } 1151 if (start < end) { 1152 ret = __btrfs_add_to_free_space_tree(trans, block_group, path2, 1153 start, end - start); 1154 if (ret) 1155 goto out_locked; 1156 } 1157 1158 ret = 0; 1159 out_locked: 1160 mutex_unlock(&block_group->free_space_lock); 1161 1162 return ret; 1163 } 1164 1165 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) 1166 { 1167 struct btrfs_trans_handle *trans; 1168 struct btrfs_root *tree_root = fs_info->tree_root; 1169 struct btrfs_root *free_space_root; 1170 struct btrfs_block_group *block_group; 1171 struct rb_node *node; 1172 int ret; 1173 1174 trans = btrfs_start_transaction(tree_root, 0); 1175 if (IS_ERR(trans)) 1176 return PTR_ERR(trans); 1177 1178 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1179 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1180 free_space_root = btrfs_create_tree(trans, 1181 BTRFS_FREE_SPACE_TREE_OBJECTID); 1182 if (IS_ERR(free_space_root)) { 1183 ret = PTR_ERR(free_space_root); 1184 btrfs_abort_transaction(trans, ret); 1185 btrfs_end_transaction(trans); 1186 goto out_clear; 1187 } 1188 ret = btrfs_global_root_insert(free_space_root); 1189 if (unlikely(ret)) { 1190 btrfs_put_root(free_space_root); 1191 btrfs_abort_transaction(trans, ret); 1192 btrfs_end_transaction(trans); 1193 goto out_clear; 1194 } 1195 1196 node = rb_first_cached(&fs_info->block_group_cache_tree); 1197 while (node) { 1198 block_group = rb_entry(node, struct btrfs_block_group, 1199 cache_node); 1200 ret = populate_free_space_tree(trans, block_group); 1201 if (unlikely(ret)) { 1202 btrfs_abort_transaction(trans, ret); 1203 btrfs_end_transaction(trans); 1204 goto out_clear; 1205 } 1206 node = rb_next(node); 1207 } 1208 1209 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1210 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1211 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1212 ret = btrfs_commit_transaction(trans); 1213 1214 /* 1215 * Now that we've committed the transaction any reading of our commit 1216 * root will be safe, so we can cache from the free space tree now. 1217 */ 1218 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1219 return ret; 1220 1221 out_clear: 1222 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1223 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1224 return ret; 1225 } 1226 1227 static int clear_free_space_tree(struct btrfs_trans_handle *trans, 1228 struct btrfs_root *root) 1229 { 1230 BTRFS_PATH_AUTO_FREE(path); 1231 struct btrfs_key key; 1232 struct rb_node *node; 1233 int nr; 1234 int ret; 1235 1236 path = btrfs_alloc_path(); 1237 if (!path) 1238 return -ENOMEM; 1239 1240 key.objectid = 0; 1241 key.type = 0; 1242 key.offset = 0; 1243 1244 while (1) { 1245 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1246 if (ret < 0) 1247 return ret; 1248 1249 nr = btrfs_header_nritems(path->nodes[0]); 1250 if (!nr) 1251 break; 1252 1253 path->slots[0] = 0; 1254 ret = btrfs_del_items(trans, root, path, 0, nr); 1255 if (ret) 1256 return ret; 1257 1258 btrfs_release_path(path); 1259 } 1260 1261 node = rb_first_cached(&trans->fs_info->block_group_cache_tree); 1262 while (node) { 1263 struct btrfs_block_group *bg; 1264 1265 bg = rb_entry(node, struct btrfs_block_group, cache_node); 1266 clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); 1267 node = rb_next(node); 1268 cond_resched(); 1269 } 1270 1271 return 0; 1272 } 1273 1274 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info) 1275 { 1276 struct btrfs_trans_handle *trans; 1277 struct btrfs_root *tree_root = fs_info->tree_root; 1278 struct btrfs_key key = { 1279 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1280 .type = BTRFS_ROOT_ITEM_KEY, 1281 .offset = 0, 1282 }; 1283 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1284 int ret; 1285 1286 trans = btrfs_start_transaction(tree_root, 0); 1287 if (IS_ERR(trans)) 1288 return PTR_ERR(trans); 1289 1290 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1291 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1292 1293 ret = clear_free_space_tree(trans, free_space_root); 1294 if (unlikely(ret)) { 1295 btrfs_abort_transaction(trans, ret); 1296 btrfs_end_transaction(trans); 1297 return ret; 1298 } 1299 1300 ret = btrfs_del_root(trans, &free_space_root->root_key); 1301 if (unlikely(ret)) { 1302 btrfs_abort_transaction(trans, ret); 1303 btrfs_end_transaction(trans); 1304 return ret; 1305 } 1306 1307 btrfs_global_root_delete(free_space_root); 1308 1309 spin_lock(&fs_info->trans_lock); 1310 list_del(&free_space_root->dirty_list); 1311 spin_unlock(&fs_info->trans_lock); 1312 1313 btrfs_tree_lock(free_space_root->node); 1314 btrfs_clear_buffer_dirty(trans, free_space_root->node); 1315 btrfs_tree_unlock(free_space_root->node); 1316 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), 1317 free_space_root->node, 0, 1); 1318 btrfs_put_root(free_space_root); 1319 if (unlikely(ret < 0)) { 1320 btrfs_abort_transaction(trans, ret); 1321 btrfs_end_transaction(trans); 1322 return ret; 1323 } 1324 1325 return btrfs_commit_transaction(trans); 1326 } 1327 1328 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) 1329 { 1330 struct btrfs_trans_handle *trans; 1331 struct btrfs_key key = { 1332 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1333 .type = BTRFS_ROOT_ITEM_KEY, 1334 .offset = 0, 1335 }; 1336 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1337 struct rb_node *node; 1338 int ret; 1339 1340 trans = btrfs_start_transaction(free_space_root, 1); 1341 if (IS_ERR(trans)) 1342 return PTR_ERR(trans); 1343 1344 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1345 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1346 1347 ret = clear_free_space_tree(trans, free_space_root); 1348 if (unlikely(ret)) { 1349 btrfs_abort_transaction(trans, ret); 1350 btrfs_end_transaction(trans); 1351 return ret; 1352 } 1353 1354 node = rb_first_cached(&fs_info->block_group_cache_tree); 1355 while (node) { 1356 struct btrfs_block_group *block_group; 1357 1358 block_group = rb_entry(node, struct btrfs_block_group, 1359 cache_node); 1360 1361 if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, 1362 &block_group->runtime_flags)) 1363 goto next; 1364 1365 ret = populate_free_space_tree(trans, block_group); 1366 if (unlikely(ret)) { 1367 btrfs_abort_transaction(trans, ret); 1368 btrfs_end_transaction(trans); 1369 return ret; 1370 } 1371 next: 1372 if (btrfs_should_end_transaction(trans)) { 1373 btrfs_end_transaction(trans); 1374 trans = btrfs_start_transaction(free_space_root, 1); 1375 if (IS_ERR(trans)) 1376 return PTR_ERR(trans); 1377 } 1378 node = rb_next(node); 1379 } 1380 1381 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1382 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1383 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1384 1385 ret = btrfs_commit_transaction(trans); 1386 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1387 return ret; 1388 } 1389 1390 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 1391 struct btrfs_block_group *block_group, 1392 struct btrfs_path *path) 1393 { 1394 bool own_path = false; 1395 int ret; 1396 1397 if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, 1398 &block_group->runtime_flags)) 1399 return 0; 1400 1401 /* 1402 * While rebuilding the free space tree we may allocate new metadata 1403 * block groups while modifying the free space tree. 1404 * 1405 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we 1406 * can use multiple transactions, every time btrfs_end_transaction() is 1407 * called at btrfs_rebuild_free_space_tree() we finish the creation of 1408 * new block groups by calling btrfs_create_pending_block_groups(), and 1409 * that in turn calls us, through add_block_group_free_space(), to add 1410 * a free space info item and a free space extent item for the block 1411 * group. 1412 * 1413 * Then later btrfs_rebuild_free_space_tree() may find such new block 1414 * groups and processes them with populate_free_space_tree(), which can 1415 * fail with EEXIST since there are already items for the block group in 1416 * the free space tree. Notice that we say "may find" because a new 1417 * block group may be added to the block groups rbtree in a node before 1418 * or after the block group currently being processed by the rebuild 1419 * process. So signal the rebuild process to skip such new block groups 1420 * if it finds them. 1421 */ 1422 set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); 1423 1424 if (!path) { 1425 path = btrfs_alloc_path(); 1426 if (unlikely(!path)) { 1427 btrfs_abort_transaction(trans, -ENOMEM); 1428 return -ENOMEM; 1429 } 1430 own_path = true; 1431 } 1432 1433 ret = add_new_free_space_info(trans, block_group, path); 1434 if (unlikely(ret)) { 1435 btrfs_abort_transaction(trans, ret); 1436 goto out; 1437 } 1438 1439 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, 1440 block_group->start, block_group->length); 1441 if (ret) 1442 btrfs_abort_transaction(trans, ret); 1443 1444 out: 1445 if (own_path) 1446 btrfs_free_path(path); 1447 1448 return ret; 1449 } 1450 1451 int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans, 1452 struct btrfs_block_group *block_group) 1453 { 1454 int ret; 1455 1456 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1457 return 0; 1458 1459 mutex_lock(&block_group->free_space_lock); 1460 ret = __add_block_group_free_space(trans, block_group, NULL); 1461 mutex_unlock(&block_group->free_space_lock); 1462 return ret; 1463 } 1464 1465 int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, 1466 struct btrfs_block_group *block_group) 1467 { 1468 struct btrfs_root *root = btrfs_free_space_root(block_group); 1469 struct btrfs_path *path; 1470 struct btrfs_key key, found_key; 1471 struct extent_buffer *leaf; 1472 u64 start, end; 1473 int done = 0, nr; 1474 int ret; 1475 1476 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1477 return 0; 1478 1479 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { 1480 /* We never added this block group to the free space tree. */ 1481 return 0; 1482 } 1483 1484 path = btrfs_alloc_path(); 1485 if (unlikely(!path)) { 1486 ret = -ENOMEM; 1487 btrfs_abort_transaction(trans, ret); 1488 goto out; 1489 } 1490 1491 start = block_group->start; 1492 end = block_group->start + block_group->length; 1493 1494 key.objectid = end - 1; 1495 key.type = (u8)-1; 1496 key.offset = (u64)-1; 1497 1498 while (!done) { 1499 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 1500 if (unlikely(ret)) { 1501 btrfs_abort_transaction(trans, ret); 1502 goto out; 1503 } 1504 1505 leaf = path->nodes[0]; 1506 nr = 0; 1507 path->slots[0]++; 1508 while (path->slots[0] > 0) { 1509 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 1510 1511 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 1512 ASSERT(found_key.objectid == block_group->start); 1513 ASSERT(found_key.offset == block_group->length); 1514 done = 1; 1515 nr++; 1516 path->slots[0]--; 1517 break; 1518 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY || 1519 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 1520 ASSERT(found_key.objectid >= start); 1521 ASSERT(found_key.objectid < end); 1522 ASSERT(found_key.objectid + found_key.offset <= end); 1523 nr++; 1524 path->slots[0]--; 1525 } else { 1526 ASSERT(0); 1527 } 1528 } 1529 1530 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 1531 if (unlikely(ret)) { 1532 btrfs_abort_transaction(trans, ret); 1533 goto out; 1534 } 1535 btrfs_release_path(path); 1536 } 1537 1538 ret = 0; 1539 out: 1540 btrfs_free_path(path); 1541 return ret; 1542 } 1543 1544 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, 1545 struct btrfs_path *path, 1546 u32 expected_extent_count) 1547 { 1548 struct btrfs_block_group *block_group; 1549 struct btrfs_fs_info *fs_info; 1550 struct btrfs_root *root; 1551 struct btrfs_key key; 1552 bool prev_bit_set = false; 1553 /* Initialize to silence GCC. */ 1554 u64 extent_start = 0; 1555 u64 end, offset; 1556 u64 total_found = 0; 1557 u32 extent_count = 0; 1558 int ret; 1559 1560 block_group = caching_ctl->block_group; 1561 fs_info = block_group->fs_info; 1562 root = btrfs_free_space_root(block_group); 1563 1564 end = block_group->start + block_group->length; 1565 1566 while (1) { 1567 ret = btrfs_next_item(root, path); 1568 if (ret < 0) 1569 return ret; 1570 if (ret) 1571 break; 1572 1573 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1574 1575 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1576 break; 1577 1578 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 1579 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1580 1581 offset = key.objectid; 1582 while (offset < key.objectid + key.offset) { 1583 bool bit_set; 1584 1585 bit_set = btrfs_free_space_test_bit(block_group, path, offset); 1586 if (!prev_bit_set && bit_set) { 1587 extent_start = offset; 1588 } else if (prev_bit_set && !bit_set) { 1589 u64 space_added; 1590 1591 ret = btrfs_add_new_free_space(block_group, 1592 extent_start, 1593 offset, 1594 &space_added); 1595 if (ret) 1596 return ret; 1597 total_found += space_added; 1598 if (total_found > CACHING_CTL_WAKE_UP) { 1599 total_found = 0; 1600 wake_up(&caching_ctl->wait); 1601 } 1602 extent_count++; 1603 } 1604 prev_bit_set = bit_set; 1605 offset += fs_info->sectorsize; 1606 } 1607 } 1608 if (prev_bit_set) { 1609 ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL); 1610 if (ret) 1611 return ret; 1612 extent_count++; 1613 } 1614 1615 if (unlikely(extent_count != expected_extent_count)) { 1616 btrfs_err(fs_info, 1617 "incorrect extent count for %llu; counted %u, expected %u", 1618 block_group->start, extent_count, 1619 expected_extent_count); 1620 DEBUG_WARN(); 1621 return -EIO; 1622 } 1623 1624 return 0; 1625 } 1626 1627 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, 1628 struct btrfs_path *path, 1629 u32 expected_extent_count) 1630 { 1631 struct btrfs_block_group *block_group; 1632 struct btrfs_fs_info *fs_info; 1633 struct btrfs_root *root; 1634 struct btrfs_key key; 1635 u64 end; 1636 u64 total_found = 0; 1637 u32 extent_count = 0; 1638 int ret; 1639 1640 block_group = caching_ctl->block_group; 1641 fs_info = block_group->fs_info; 1642 root = btrfs_free_space_root(block_group); 1643 1644 end = block_group->start + block_group->length; 1645 1646 while (1) { 1647 u64 space_added; 1648 1649 ret = btrfs_next_item(root, path); 1650 if (ret < 0) 1651 return ret; 1652 if (ret) 1653 break; 1654 1655 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1656 1657 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1658 break; 1659 1660 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 1661 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1662 1663 ret = btrfs_add_new_free_space(block_group, key.objectid, 1664 key.objectid + key.offset, 1665 &space_added); 1666 if (ret) 1667 return ret; 1668 total_found += space_added; 1669 if (total_found > CACHING_CTL_WAKE_UP) { 1670 total_found = 0; 1671 wake_up(&caching_ctl->wait); 1672 } 1673 extent_count++; 1674 } 1675 1676 if (unlikely(extent_count != expected_extent_count)) { 1677 btrfs_err(fs_info, 1678 "incorrect extent count for %llu; counted %u, expected %u", 1679 block_group->start, extent_count, 1680 expected_extent_count); 1681 DEBUG_WARN(); 1682 return -EIO; 1683 } 1684 1685 return 0; 1686 } 1687 1688 int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl) 1689 { 1690 struct btrfs_block_group *block_group; 1691 struct btrfs_free_space_info *info; 1692 BTRFS_PATH_AUTO_FREE(path); 1693 u32 extent_count, flags; 1694 1695 block_group = caching_ctl->block_group; 1696 1697 path = btrfs_alloc_path(); 1698 if (!path) 1699 return -ENOMEM; 1700 1701 /* 1702 * Just like caching_thread() doesn't want to deadlock on the extent 1703 * tree, we don't want to deadlock on the free space tree. 1704 */ 1705 path->skip_locking = 1; 1706 path->search_commit_root = 1; 1707 path->reada = READA_FORWARD; 1708 1709 info = btrfs_search_free_space_info(NULL, block_group, path, 0); 1710 if (IS_ERR(info)) 1711 return PTR_ERR(info); 1712 1713 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 1714 flags = btrfs_free_space_flags(path->nodes[0], info); 1715 1716 /* 1717 * We left path pointing to the free space info item, so now 1718 * load_free_space_foo can just iterate through the free space tree from 1719 * there. 1720 */ 1721 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) 1722 return load_free_space_bitmaps(caching_ctl, path, extent_count); 1723 else 1724 return load_free_space_extents(caching_ctl, path, extent_count); 1725 } 1726