1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2015 Facebook. All rights reserved. 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/sched/mm.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "free-space-tree.h" 13 #include "transaction.h" 14 #include "block-group.h" 15 #include "fs.h" 16 #include "accessors.h" 17 #include "extent-tree.h" 18 #include "root-tree.h" 19 20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 21 struct btrfs_block_group *block_group, 22 struct btrfs_path *path); 23 24 static struct btrfs_root *btrfs_free_space_root( 25 struct btrfs_block_group *block_group) 26 { 27 struct btrfs_key key = { 28 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 29 .type = BTRFS_ROOT_ITEM_KEY, 30 .offset = 0, 31 }; 32 33 if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2)) 34 key.offset = block_group->global_root_id; 35 return btrfs_global_root(block_group->fs_info, &key); 36 } 37 38 void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache) 39 { 40 u32 bitmap_range; 41 size_t bitmap_size; 42 u64 num_bitmaps, total_bitmap_size; 43 44 if (WARN_ON(cache->length == 0)) 45 btrfs_warn(cache->fs_info, "block group %llu length is zero", 46 cache->start); 47 48 /* 49 * We convert to bitmaps when the disk space required for using extents 50 * exceeds that required for using bitmaps. 51 */ 52 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 53 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range); 54 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE; 55 total_bitmap_size = num_bitmaps * bitmap_size; 56 cache->bitmap_high_thresh = div_u64(total_bitmap_size, 57 sizeof(struct btrfs_item)); 58 59 /* 60 * We allow for a small buffer between the high threshold and low 61 * threshold to avoid thrashing back and forth between the two formats. 62 */ 63 if (cache->bitmap_high_thresh > 100) 64 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100; 65 else 66 cache->bitmap_low_thresh = 0; 67 } 68 69 static int add_new_free_space_info(struct btrfs_trans_handle *trans, 70 struct btrfs_block_group *block_group, 71 struct btrfs_path *path) 72 { 73 struct btrfs_root *root = btrfs_free_space_root(block_group); 74 struct btrfs_free_space_info *info; 75 struct btrfs_key key; 76 struct extent_buffer *leaf; 77 int ret; 78 79 key.objectid = block_group->start; 80 key.type = BTRFS_FREE_SPACE_INFO_KEY; 81 key.offset = block_group->length; 82 83 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info)); 84 if (ret) 85 return ret; 86 87 leaf = path->nodes[0]; 88 info = btrfs_item_ptr(leaf, path->slots[0], 89 struct btrfs_free_space_info); 90 btrfs_set_free_space_extent_count(leaf, info, 0); 91 btrfs_set_free_space_flags(leaf, info, 0); 92 btrfs_release_path(path); 93 return 0; 94 } 95 96 EXPORT_FOR_TESTS 97 struct btrfs_free_space_info *btrfs_search_free_space_info( 98 struct btrfs_trans_handle *trans, 99 struct btrfs_block_group *block_group, 100 struct btrfs_path *path, int cow) 101 { 102 struct btrfs_fs_info *fs_info = block_group->fs_info; 103 struct btrfs_root *root = btrfs_free_space_root(block_group); 104 struct btrfs_key key; 105 int ret; 106 107 key.objectid = block_group->start; 108 key.type = BTRFS_FREE_SPACE_INFO_KEY; 109 key.offset = block_group->length; 110 111 ret = btrfs_search_slot(trans, root, &key, path, 0, cow); 112 if (ret < 0) 113 return ERR_PTR(ret); 114 if (ret != 0) { 115 btrfs_warn(fs_info, "missing free space info for %llu", 116 block_group->start); 117 DEBUG_WARN(); 118 return ERR_PTR(-ENOENT); 119 } 120 121 return btrfs_item_ptr(path->nodes[0], path->slots[0], 122 struct btrfs_free_space_info); 123 } 124 125 /* 126 * btrfs_search_slot() but we're looking for the greatest key less than the 127 * passed key. 128 */ 129 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans, 130 struct btrfs_root *root, 131 struct btrfs_key *key, struct btrfs_path *p, 132 int ins_len, int cow) 133 { 134 int ret; 135 136 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow); 137 if (ret < 0) 138 return ret; 139 140 if (ret == 0) { 141 DEBUG_WARN(); 142 return -EIO; 143 } 144 145 if (p->slots[0] == 0) { 146 DEBUG_WARN("no previous slot found"); 147 return -EIO; 148 } 149 p->slots[0]--; 150 151 return 0; 152 } 153 154 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info, 155 u64 size) 156 { 157 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE); 158 } 159 160 static unsigned long *alloc_bitmap(u32 bitmap_size) 161 { 162 unsigned long *ret; 163 unsigned int nofs_flag; 164 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long)); 165 166 /* 167 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse 168 * into the filesystem as the free space bitmap can be modified in the 169 * critical section of a transaction commit. 170 * 171 * TODO: push the memalloc_nofs_{save,restore}() to the caller where we 172 * know that recursion is unsafe. 173 */ 174 nofs_flag = memalloc_nofs_save(); 175 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL); 176 memalloc_nofs_restore(nofs_flag); 177 return ret; 178 } 179 180 static void le_bitmap_set(unsigned long *map, unsigned int start, int len) 181 { 182 u8 *p = ((u8 *)map) + BIT_BYTE(start); 183 const unsigned int size = start + len; 184 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE); 185 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start); 186 187 while (len - bits_to_set >= 0) { 188 *p |= mask_to_set; 189 len -= bits_to_set; 190 bits_to_set = BITS_PER_BYTE; 191 mask_to_set = ~0; 192 p++; 193 } 194 if (len) { 195 mask_to_set &= BITMAP_LAST_BYTE_MASK(size); 196 *p |= mask_to_set; 197 } 198 } 199 200 EXPORT_FOR_TESTS 201 int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, 202 struct btrfs_block_group *block_group, 203 struct btrfs_path *path) 204 { 205 struct btrfs_fs_info *fs_info = trans->fs_info; 206 struct btrfs_root *root = btrfs_free_space_root(block_group); 207 struct btrfs_free_space_info *info; 208 struct btrfs_key key, found_key; 209 struct extent_buffer *leaf; 210 unsigned long *bitmap; 211 char *bitmap_cursor; 212 u64 start, end; 213 u64 bitmap_range, i; 214 u32 bitmap_size, flags, expected_extent_count; 215 u32 extent_count = 0; 216 int done = 0, nr; 217 int ret; 218 219 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 220 bitmap = alloc_bitmap(bitmap_size); 221 if (!bitmap) { 222 ret = -ENOMEM; 223 btrfs_abort_transaction(trans, ret); 224 goto out; 225 } 226 227 start = block_group->start; 228 end = block_group->start + block_group->length; 229 230 key.objectid = end - 1; 231 key.type = (u8)-1; 232 key.offset = (u64)-1; 233 234 while (!done) { 235 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 236 if (ret) { 237 btrfs_abort_transaction(trans, ret); 238 goto out; 239 } 240 241 leaf = path->nodes[0]; 242 nr = 0; 243 path->slots[0]++; 244 while (path->slots[0] > 0) { 245 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 246 247 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 248 ASSERT(found_key.objectid == block_group->start); 249 ASSERT(found_key.offset == block_group->length); 250 done = 1; 251 break; 252 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) { 253 u64 first, last; 254 255 ASSERT(found_key.objectid >= start); 256 ASSERT(found_key.objectid < end); 257 ASSERT(found_key.objectid + found_key.offset <= end); 258 259 first = div_u64(found_key.objectid - start, 260 fs_info->sectorsize); 261 last = div_u64(found_key.objectid + found_key.offset - start, 262 fs_info->sectorsize); 263 le_bitmap_set(bitmap, first, last - first); 264 265 extent_count++; 266 nr++; 267 path->slots[0]--; 268 } else { 269 ASSERT(0); 270 } 271 } 272 273 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 274 if (ret) { 275 btrfs_abort_transaction(trans, ret); 276 goto out; 277 } 278 btrfs_release_path(path); 279 } 280 281 info = btrfs_search_free_space_info(trans, block_group, path, 1); 282 if (IS_ERR(info)) { 283 ret = PTR_ERR(info); 284 btrfs_abort_transaction(trans, ret); 285 goto out; 286 } 287 leaf = path->nodes[0]; 288 flags = btrfs_free_space_flags(leaf, info); 289 flags |= BTRFS_FREE_SPACE_USING_BITMAPS; 290 block_group->using_free_space_bitmaps = true; 291 block_group->using_free_space_bitmaps_cached = true; 292 btrfs_set_free_space_flags(leaf, info, flags); 293 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 294 btrfs_release_path(path); 295 296 if (extent_count != expected_extent_count) { 297 btrfs_err(fs_info, 298 "incorrect extent count for %llu; counted %u, expected %u", 299 block_group->start, extent_count, 300 expected_extent_count); 301 ret = -EIO; 302 btrfs_abort_transaction(trans, ret); 303 goto out; 304 } 305 306 bitmap_cursor = (char *)bitmap; 307 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 308 i = start; 309 while (i < end) { 310 unsigned long ptr; 311 u64 extent_size; 312 u32 data_size; 313 314 extent_size = min(end - i, bitmap_range); 315 data_size = free_space_bitmap_size(fs_info, extent_size); 316 317 key.objectid = i; 318 key.type = BTRFS_FREE_SPACE_BITMAP_KEY; 319 key.offset = extent_size; 320 321 ret = btrfs_insert_empty_item(trans, root, path, &key, 322 data_size); 323 if (ret) { 324 btrfs_abort_transaction(trans, ret); 325 goto out; 326 } 327 328 leaf = path->nodes[0]; 329 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 330 write_extent_buffer(leaf, bitmap_cursor, ptr, 331 data_size); 332 btrfs_release_path(path); 333 334 i += extent_size; 335 bitmap_cursor += data_size; 336 } 337 338 ret = 0; 339 out: 340 kvfree(bitmap); 341 return ret; 342 } 343 344 EXPORT_FOR_TESTS 345 int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans, 346 struct btrfs_block_group *block_group, 347 struct btrfs_path *path) 348 { 349 struct btrfs_fs_info *fs_info = trans->fs_info; 350 struct btrfs_root *root = btrfs_free_space_root(block_group); 351 struct btrfs_free_space_info *info; 352 struct btrfs_key key, found_key; 353 struct extent_buffer *leaf; 354 unsigned long *bitmap; 355 u64 start, end; 356 u32 bitmap_size, flags, expected_extent_count; 357 unsigned long nrbits, start_bit, end_bit; 358 u32 extent_count = 0; 359 int done = 0, nr; 360 int ret; 361 362 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 363 bitmap = alloc_bitmap(bitmap_size); 364 if (!bitmap) { 365 ret = -ENOMEM; 366 btrfs_abort_transaction(trans, ret); 367 goto out; 368 } 369 370 start = block_group->start; 371 end = block_group->start + block_group->length; 372 373 key.objectid = end - 1; 374 key.type = (u8)-1; 375 key.offset = (u64)-1; 376 377 while (!done) { 378 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 379 if (ret) { 380 btrfs_abort_transaction(trans, ret); 381 goto out; 382 } 383 384 leaf = path->nodes[0]; 385 nr = 0; 386 path->slots[0]++; 387 while (path->slots[0] > 0) { 388 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 389 390 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 391 ASSERT(found_key.objectid == block_group->start); 392 ASSERT(found_key.offset == block_group->length); 393 done = 1; 394 break; 395 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 396 unsigned long ptr; 397 char *bitmap_cursor; 398 u32 bitmap_pos, data_size; 399 400 ASSERT(found_key.objectid >= start); 401 ASSERT(found_key.objectid < end); 402 ASSERT(found_key.objectid + found_key.offset <= end); 403 404 bitmap_pos = div_u64(found_key.objectid - start, 405 fs_info->sectorsize * 406 BITS_PER_BYTE); 407 bitmap_cursor = ((char *)bitmap) + bitmap_pos; 408 data_size = free_space_bitmap_size(fs_info, 409 found_key.offset); 410 411 path->slots[0]--; 412 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 413 read_extent_buffer(leaf, bitmap_cursor, ptr, 414 data_size); 415 416 nr++; 417 } else { 418 ASSERT(0); 419 } 420 } 421 422 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 423 if (ret) { 424 btrfs_abort_transaction(trans, ret); 425 goto out; 426 } 427 btrfs_release_path(path); 428 } 429 430 info = btrfs_search_free_space_info(trans, block_group, path, 1); 431 if (IS_ERR(info)) { 432 ret = PTR_ERR(info); 433 btrfs_abort_transaction(trans, ret); 434 goto out; 435 } 436 leaf = path->nodes[0]; 437 flags = btrfs_free_space_flags(leaf, info); 438 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; 439 block_group->using_free_space_bitmaps = false; 440 block_group->using_free_space_bitmaps_cached = true; 441 btrfs_set_free_space_flags(leaf, info, flags); 442 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 443 btrfs_release_path(path); 444 445 nrbits = block_group->length >> fs_info->sectorsize_bits; 446 start_bit = find_next_bit_le(bitmap, nrbits, 0); 447 448 while (start_bit < nrbits) { 449 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit); 450 ASSERT(start_bit < end_bit); 451 452 key.objectid = start + start_bit * fs_info->sectorsize; 453 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 454 key.offset = (end_bit - start_bit) * fs_info->sectorsize; 455 456 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 457 if (ret) { 458 btrfs_abort_transaction(trans, ret); 459 goto out; 460 } 461 btrfs_release_path(path); 462 463 extent_count++; 464 465 start_bit = find_next_bit_le(bitmap, nrbits, end_bit); 466 } 467 468 if (extent_count != expected_extent_count) { 469 btrfs_err(fs_info, 470 "incorrect extent count for %llu; counted %u, expected %u", 471 block_group->start, extent_count, 472 expected_extent_count); 473 ret = -EIO; 474 btrfs_abort_transaction(trans, ret); 475 goto out; 476 } 477 478 ret = 0; 479 out: 480 kvfree(bitmap); 481 return ret; 482 } 483 484 static int update_free_space_extent_count(struct btrfs_trans_handle *trans, 485 struct btrfs_block_group *block_group, 486 struct btrfs_path *path, 487 int new_extents) 488 { 489 struct btrfs_free_space_info *info; 490 u32 flags; 491 u32 extent_count; 492 int ret = 0; 493 494 if (new_extents == 0) 495 return 0; 496 497 info = btrfs_search_free_space_info(trans, block_group, path, 1); 498 if (IS_ERR(info)) 499 return PTR_ERR(info); 500 501 flags = btrfs_free_space_flags(path->nodes[0], info); 502 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 503 504 extent_count += new_extents; 505 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count); 506 btrfs_release_path(path); 507 508 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 509 extent_count > block_group->bitmap_high_thresh) { 510 ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path); 511 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 512 extent_count < block_group->bitmap_low_thresh) { 513 ret = btrfs_convert_free_space_to_extents(trans, block_group, path); 514 } 515 516 return ret; 517 } 518 519 EXPORT_FOR_TESTS 520 bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group, 521 struct btrfs_path *path, u64 offset) 522 { 523 struct extent_buffer *leaf; 524 struct btrfs_key key; 525 u64 found_start, found_end; 526 unsigned long ptr, i; 527 528 leaf = path->nodes[0]; 529 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 530 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 531 532 found_start = key.objectid; 533 found_end = key.objectid + key.offset; 534 ASSERT(offset >= found_start && offset < found_end); 535 536 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 537 i = div_u64(offset - found_start, 538 block_group->fs_info->sectorsize); 539 return extent_buffer_test_bit(leaf, ptr, i); 540 } 541 542 static void free_space_modify_bits(struct btrfs_trans_handle *trans, 543 struct btrfs_block_group *block_group, 544 struct btrfs_path *path, u64 *start, u64 *size, 545 bool set_bits) 546 { 547 struct btrfs_fs_info *fs_info = block_group->fs_info; 548 struct extent_buffer *leaf; 549 struct btrfs_key key; 550 u64 end = *start + *size; 551 u64 found_start, found_end; 552 unsigned long ptr, first, last; 553 554 leaf = path->nodes[0]; 555 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 556 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 557 558 found_start = key.objectid; 559 found_end = key.objectid + key.offset; 560 ASSERT(*start >= found_start && *start < found_end); 561 ASSERT(end > found_start); 562 563 if (end > found_end) 564 end = found_end; 565 566 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 567 first = (*start - found_start) >> fs_info->sectorsize_bits; 568 last = (end - found_start) >> fs_info->sectorsize_bits; 569 if (set_bits) 570 extent_buffer_bitmap_set(leaf, ptr, first, last - first); 571 else 572 extent_buffer_bitmap_clear(leaf, ptr, first, last - first); 573 btrfs_mark_buffer_dirty(trans, leaf); 574 575 *size -= end - *start; 576 *start = end; 577 } 578 579 /* 580 * We can't use btrfs_next_item() in modify_free_space_bitmap() because 581 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy 582 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're 583 * looking for. 584 */ 585 static int free_space_next_bitmap(struct btrfs_trans_handle *trans, 586 struct btrfs_root *root, struct btrfs_path *p) 587 { 588 struct btrfs_key key; 589 590 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) { 591 p->slots[0]++; 592 return 0; 593 } 594 595 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]); 596 btrfs_release_path(p); 597 598 key.objectid += key.offset; 599 key.type = (u8)-1; 600 key.offset = (u64)-1; 601 602 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1); 603 } 604 605 /* 606 * If remove is 1, then we are removing free space, thus clearing bits in the 607 * bitmap. If remove is 0, then we are adding free space, thus setting bits in 608 * the bitmap. 609 */ 610 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, 611 struct btrfs_block_group *block_group, 612 struct btrfs_path *path, 613 u64 start, u64 size, bool remove) 614 { 615 struct btrfs_root *root = btrfs_free_space_root(block_group); 616 struct btrfs_key key; 617 u64 end = start + size; 618 u64 cur_start, cur_size; 619 bool prev_bit_set = false; 620 bool next_bit_set = false; 621 int new_extents; 622 int ret; 623 624 /* 625 * Read the bit for the block immediately before the extent of space if 626 * that block is within the block group. 627 */ 628 if (start > block_group->start) { 629 u64 prev_block = start - block_group->fs_info->sectorsize; 630 631 key.objectid = prev_block; 632 key.type = (u8)-1; 633 key.offset = (u64)-1; 634 635 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 636 if (ret) 637 return ret; 638 639 prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block); 640 641 /* The previous block may have been in the previous bitmap. */ 642 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 643 if (start >= key.objectid + key.offset) { 644 ret = free_space_next_bitmap(trans, root, path); 645 if (ret) 646 return ret; 647 } 648 } else { 649 key.objectid = start; 650 key.type = (u8)-1; 651 key.offset = (u64)-1; 652 653 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 654 if (ret) 655 return ret; 656 } 657 658 /* 659 * Iterate over all of the bitmaps overlapped by the extent of space, 660 * clearing/setting bits as required. 661 */ 662 cur_start = start; 663 cur_size = size; 664 while (1) { 665 free_space_modify_bits(trans, block_group, path, &cur_start, 666 &cur_size, !remove); 667 if (cur_size == 0) 668 break; 669 ret = free_space_next_bitmap(trans, root, path); 670 if (ret) 671 return ret; 672 } 673 674 /* 675 * Read the bit for the block immediately after the extent of space if 676 * that block is within the block group. 677 */ 678 if (end < block_group->start + block_group->length) { 679 /* The next block may be in the next bitmap. */ 680 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 681 if (end >= key.objectid + key.offset) { 682 ret = free_space_next_bitmap(trans, root, path); 683 if (ret) 684 return ret; 685 } 686 687 next_bit_set = btrfs_free_space_test_bit(block_group, path, end); 688 } 689 690 if (remove) { 691 new_extents = -1; 692 if (prev_bit_set) { 693 /* Leftover on the left. */ 694 new_extents++; 695 } 696 if (next_bit_set) { 697 /* Leftover on the right. */ 698 new_extents++; 699 } 700 } else { 701 new_extents = 1; 702 if (prev_bit_set) { 703 /* Merging with neighbor on the left. */ 704 new_extents--; 705 } 706 if (next_bit_set) { 707 /* Merging with neighbor on the right. */ 708 new_extents--; 709 } 710 } 711 712 btrfs_release_path(path); 713 return update_free_space_extent_count(trans, block_group, path, new_extents); 714 } 715 716 static int remove_free_space_extent(struct btrfs_trans_handle *trans, 717 struct btrfs_block_group *block_group, 718 struct btrfs_path *path, 719 u64 start, u64 size) 720 { 721 struct btrfs_root *root = btrfs_free_space_root(block_group); 722 struct btrfs_key key; 723 u64 found_start, found_end; 724 u64 end = start + size; 725 int new_extents = -1; 726 int ret; 727 728 key.objectid = start; 729 key.type = (u8)-1; 730 key.offset = (u64)-1; 731 732 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 733 if (ret) 734 return ret; 735 736 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 737 738 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 739 740 found_start = key.objectid; 741 found_end = key.objectid + key.offset; 742 ASSERT(start >= found_start && end <= found_end); 743 744 /* 745 * Okay, now that we've found the free space extent which contains the 746 * free space that we are removing, there are four cases: 747 * 748 * 1. We're using the whole extent: delete the key we found and 749 * decrement the free space extent count. 750 * 2. We are using part of the extent starting at the beginning: delete 751 * the key we found and insert a new key representing the leftover at 752 * the end. There is no net change in the number of extents. 753 * 3. We are using part of the extent ending at the end: delete the key 754 * we found and insert a new key representing the leftover at the 755 * beginning. There is no net change in the number of extents. 756 * 4. We are using part of the extent in the middle: delete the key we 757 * found and insert two new keys representing the leftovers on each 758 * side. Where we used to have one extent, we now have two, so increment 759 * the extent count. We may need to convert the block group to bitmaps 760 * as a result. 761 */ 762 763 /* Delete the existing key (cases 1-4). */ 764 ret = btrfs_del_item(trans, root, path); 765 if (ret) 766 return ret; 767 768 /* Add a key for leftovers at the beginning (cases 3 and 4). */ 769 if (start > found_start) { 770 key.objectid = found_start; 771 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 772 key.offset = start - found_start; 773 774 btrfs_release_path(path); 775 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 776 if (ret) 777 return ret; 778 new_extents++; 779 } 780 781 /* Add a key for leftovers at the end (cases 2 and 4). */ 782 if (end < found_end) { 783 key.objectid = end; 784 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 785 key.offset = found_end - end; 786 787 btrfs_release_path(path); 788 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 789 if (ret) 790 return ret; 791 new_extents++; 792 } 793 794 btrfs_release_path(path); 795 return update_free_space_extent_count(trans, block_group, path, new_extents); 796 } 797 798 static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path) 799 { 800 struct btrfs_free_space_info *info; 801 u32 flags; 802 803 if (bg->using_free_space_bitmaps_cached) 804 return bg->using_free_space_bitmaps; 805 806 info = btrfs_search_free_space_info(NULL, bg, path, 0); 807 if (IS_ERR(info)) 808 return PTR_ERR(info); 809 flags = btrfs_free_space_flags(path->nodes[0], info); 810 btrfs_release_path(path); 811 812 bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS); 813 bg->using_free_space_bitmaps_cached = true; 814 815 return bg->using_free_space_bitmaps; 816 } 817 818 EXPORT_FOR_TESTS 819 int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 820 struct btrfs_block_group *block_group, 821 struct btrfs_path *path, u64 start, u64 size) 822 { 823 int ret; 824 825 ret = __add_block_group_free_space(trans, block_group, path); 826 if (ret) 827 return ret; 828 829 ret = using_bitmaps(block_group, path); 830 if (ret < 0) 831 return ret; 832 833 if (ret) 834 return modify_free_space_bitmap(trans, block_group, path, 835 start, size, true); 836 837 return remove_free_space_extent(trans, block_group, path, start, size); 838 } 839 840 int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 841 u64 start, u64 size) 842 { 843 struct btrfs_block_group *block_group; 844 struct btrfs_path *path; 845 int ret; 846 847 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 848 return 0; 849 850 path = btrfs_alloc_path(); 851 if (!path) { 852 ret = -ENOMEM; 853 btrfs_abort_transaction(trans, ret); 854 goto out; 855 } 856 857 block_group = btrfs_lookup_block_group(trans->fs_info, start); 858 if (!block_group) { 859 DEBUG_WARN("no block group found for start=%llu", start); 860 ret = -ENOENT; 861 btrfs_abort_transaction(trans, ret); 862 goto out; 863 } 864 865 mutex_lock(&block_group->free_space_lock); 866 ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size); 867 mutex_unlock(&block_group->free_space_lock); 868 if (ret) 869 btrfs_abort_transaction(trans, ret); 870 871 btrfs_put_block_group(block_group); 872 out: 873 btrfs_free_path(path); 874 return ret; 875 } 876 877 static int add_free_space_extent(struct btrfs_trans_handle *trans, 878 struct btrfs_block_group *block_group, 879 struct btrfs_path *path, 880 u64 start, u64 size) 881 { 882 struct btrfs_root *root = btrfs_free_space_root(block_group); 883 struct btrfs_key key, new_key; 884 u64 found_start, found_end; 885 u64 end = start + size; 886 int new_extents = 1; 887 int ret; 888 889 /* 890 * We are adding a new extent of free space, but we need to merge 891 * extents. There are four cases here: 892 * 893 * 1. The new extent does not have any immediate neighbors to merge 894 * with: add the new key and increment the free space extent count. We 895 * may need to convert the block group to bitmaps as a result. 896 * 2. The new extent has an immediate neighbor before it: remove the 897 * previous key and insert a new key combining both of them. There is no 898 * net change in the number of extents. 899 * 3. The new extent has an immediate neighbor after it: remove the next 900 * key and insert a new key combining both of them. There is no net 901 * change in the number of extents. 902 * 4. The new extent has immediate neighbors on both sides: remove both 903 * of the keys and insert a new key combining all of them. Where we used 904 * to have two extents, we now have one, so decrement the extent count. 905 */ 906 907 new_key.objectid = start; 908 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 909 new_key.offset = size; 910 911 /* Search for a neighbor on the left. */ 912 if (start == block_group->start) 913 goto right; 914 key.objectid = start - 1; 915 key.type = (u8)-1; 916 key.offset = (u64)-1; 917 918 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 919 if (ret) 920 return ret; 921 922 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 923 924 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 925 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 926 btrfs_release_path(path); 927 goto right; 928 } 929 930 found_start = key.objectid; 931 found_end = key.objectid + key.offset; 932 ASSERT(found_start >= block_group->start && 933 found_end > block_group->start); 934 ASSERT(found_start < start && found_end <= start); 935 936 /* 937 * Delete the neighbor on the left and absorb it into the new key (cases 938 * 2 and 4). 939 */ 940 if (found_end == start) { 941 ret = btrfs_del_item(trans, root, path); 942 if (ret) 943 return ret; 944 new_key.objectid = found_start; 945 new_key.offset += key.offset; 946 new_extents--; 947 } 948 btrfs_release_path(path); 949 950 right: 951 /* Search for a neighbor on the right. */ 952 if (end == block_group->start + block_group->length) 953 goto insert; 954 key.objectid = end; 955 key.type = (u8)-1; 956 key.offset = (u64)-1; 957 958 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 959 if (ret) 960 return ret; 961 962 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 963 964 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 965 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 966 btrfs_release_path(path); 967 goto insert; 968 } 969 970 found_start = key.objectid; 971 found_end = key.objectid + key.offset; 972 ASSERT(found_start >= block_group->start && 973 found_end > block_group->start); 974 ASSERT((found_start < start && found_end <= start) || 975 (found_start >= end && found_end > end)); 976 977 /* 978 * Delete the neighbor on the right and absorb it into the new key 979 * (cases 3 and 4). 980 */ 981 if (found_start == end) { 982 ret = btrfs_del_item(trans, root, path); 983 if (ret) 984 return ret; 985 new_key.offset += key.offset; 986 new_extents--; 987 } 988 btrfs_release_path(path); 989 990 insert: 991 /* Insert the new key (cases 1-4). */ 992 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0); 993 if (ret) 994 return ret; 995 996 btrfs_release_path(path); 997 return update_free_space_extent_count(trans, block_group, path, new_extents); 998 } 999 1000 EXPORT_FOR_TESTS 1001 int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 1002 struct btrfs_block_group *block_group, 1003 struct btrfs_path *path, u64 start, u64 size) 1004 { 1005 int ret; 1006 1007 ret = __add_block_group_free_space(trans, block_group, path); 1008 if (ret) 1009 return ret; 1010 1011 ret = using_bitmaps(block_group, path); 1012 if (ret < 0) 1013 return ret; 1014 1015 if (ret) 1016 return modify_free_space_bitmap(trans, block_group, path, 1017 start, size, false); 1018 1019 return add_free_space_extent(trans, block_group, path, start, size); 1020 } 1021 1022 int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 1023 u64 start, u64 size) 1024 { 1025 struct btrfs_block_group *block_group; 1026 struct btrfs_path *path; 1027 int ret; 1028 1029 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1030 return 0; 1031 1032 path = btrfs_alloc_path(); 1033 if (!path) { 1034 ret = -ENOMEM; 1035 btrfs_abort_transaction(trans, ret); 1036 goto out; 1037 } 1038 1039 block_group = btrfs_lookup_block_group(trans->fs_info, start); 1040 if (!block_group) { 1041 DEBUG_WARN("no block group found for start=%llu", start); 1042 ret = -ENOENT; 1043 btrfs_abort_transaction(trans, ret); 1044 goto out; 1045 } 1046 1047 mutex_lock(&block_group->free_space_lock); 1048 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size); 1049 mutex_unlock(&block_group->free_space_lock); 1050 if (ret) 1051 btrfs_abort_transaction(trans, ret); 1052 1053 btrfs_put_block_group(block_group); 1054 out: 1055 btrfs_free_path(path); 1056 return ret; 1057 } 1058 1059 /* 1060 * Populate the free space tree by walking the extent tree. Operations on the 1061 * extent tree that happen as a result of writes to the free space tree will go 1062 * through the normal add/remove hooks. 1063 */ 1064 static int populate_free_space_tree(struct btrfs_trans_handle *trans, 1065 struct btrfs_block_group *block_group) 1066 { 1067 struct btrfs_root *extent_root; 1068 BTRFS_PATH_AUTO_FREE(path); 1069 BTRFS_PATH_AUTO_FREE(path2); 1070 struct btrfs_key key; 1071 u64 start, end; 1072 int ret; 1073 1074 path = btrfs_alloc_path(); 1075 if (!path) 1076 return -ENOMEM; 1077 1078 path2 = btrfs_alloc_path(); 1079 if (!path2) 1080 return -ENOMEM; 1081 1082 path->reada = READA_FORWARD; 1083 1084 ret = add_new_free_space_info(trans, block_group, path2); 1085 if (ret) 1086 return ret; 1087 1088 mutex_lock(&block_group->free_space_lock); 1089 1090 /* 1091 * Iterate through all of the extent and metadata items in this block 1092 * group, adding the free space between them and the free space at the 1093 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than 1094 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's 1095 * contained in. 1096 */ 1097 key.objectid = block_group->start; 1098 key.type = BTRFS_EXTENT_ITEM_KEY; 1099 key.offset = 0; 1100 1101 extent_root = btrfs_extent_root(trans->fs_info, key.objectid); 1102 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); 1103 if (ret < 0) 1104 goto out_locked; 1105 /* 1106 * If ret is 1 (no key found), it means this is an empty block group, 1107 * without any extents allocated from it and there's no block group 1108 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree 1109 * because we are using the block group tree feature, so block group 1110 * items are stored in the block group tree. It also means there are no 1111 * extents allocated for block groups with a start offset beyond this 1112 * block group's end offset (this is the last, highest, block group). 1113 */ 1114 if (!btrfs_fs_compat_ro(trans->fs_info, BLOCK_GROUP_TREE)) 1115 ASSERT(ret == 0); 1116 1117 start = block_group->start; 1118 end = block_group->start + block_group->length; 1119 while (ret == 0) { 1120 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1121 1122 if (key.type == BTRFS_EXTENT_ITEM_KEY || 1123 key.type == BTRFS_METADATA_ITEM_KEY) { 1124 if (key.objectid >= end) 1125 break; 1126 1127 if (start < key.objectid) { 1128 ret = __btrfs_add_to_free_space_tree(trans, 1129 block_group, 1130 path2, start, 1131 key.objectid - 1132 start); 1133 if (ret) 1134 goto out_locked; 1135 } 1136 start = key.objectid; 1137 if (key.type == BTRFS_METADATA_ITEM_KEY) 1138 start += trans->fs_info->nodesize; 1139 else 1140 start += key.offset; 1141 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 1142 if (key.objectid != block_group->start) 1143 break; 1144 } 1145 1146 ret = btrfs_next_item(extent_root, path); 1147 if (ret < 0) 1148 goto out_locked; 1149 } 1150 if (start < end) { 1151 ret = __btrfs_add_to_free_space_tree(trans, block_group, path2, 1152 start, end - start); 1153 if (ret) 1154 goto out_locked; 1155 } 1156 1157 ret = 0; 1158 out_locked: 1159 mutex_unlock(&block_group->free_space_lock); 1160 1161 return ret; 1162 } 1163 1164 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) 1165 { 1166 struct btrfs_trans_handle *trans; 1167 struct btrfs_root *tree_root = fs_info->tree_root; 1168 struct btrfs_root *free_space_root; 1169 struct btrfs_block_group *block_group; 1170 struct rb_node *node; 1171 int ret; 1172 1173 trans = btrfs_start_transaction(tree_root, 0); 1174 if (IS_ERR(trans)) 1175 return PTR_ERR(trans); 1176 1177 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1178 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1179 free_space_root = btrfs_create_tree(trans, 1180 BTRFS_FREE_SPACE_TREE_OBJECTID); 1181 if (IS_ERR(free_space_root)) { 1182 ret = PTR_ERR(free_space_root); 1183 btrfs_abort_transaction(trans, ret); 1184 btrfs_end_transaction(trans); 1185 goto out_clear; 1186 } 1187 ret = btrfs_global_root_insert(free_space_root); 1188 if (ret) { 1189 btrfs_put_root(free_space_root); 1190 btrfs_abort_transaction(trans, ret); 1191 btrfs_end_transaction(trans); 1192 goto out_clear; 1193 } 1194 1195 node = rb_first_cached(&fs_info->block_group_cache_tree); 1196 while (node) { 1197 block_group = rb_entry(node, struct btrfs_block_group, 1198 cache_node); 1199 ret = populate_free_space_tree(trans, block_group); 1200 if (ret) { 1201 btrfs_abort_transaction(trans, ret); 1202 btrfs_end_transaction(trans); 1203 goto out_clear; 1204 } 1205 node = rb_next(node); 1206 } 1207 1208 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1209 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1210 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1211 ret = btrfs_commit_transaction(trans); 1212 1213 /* 1214 * Now that we've committed the transaction any reading of our commit 1215 * root will be safe, so we can cache from the free space tree now. 1216 */ 1217 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1218 return ret; 1219 1220 out_clear: 1221 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1222 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1223 return ret; 1224 } 1225 1226 static int clear_free_space_tree(struct btrfs_trans_handle *trans, 1227 struct btrfs_root *root) 1228 { 1229 BTRFS_PATH_AUTO_FREE(path); 1230 struct btrfs_key key; 1231 struct rb_node *node; 1232 int nr; 1233 int ret; 1234 1235 path = btrfs_alloc_path(); 1236 if (!path) 1237 return -ENOMEM; 1238 1239 key.objectid = 0; 1240 key.type = 0; 1241 key.offset = 0; 1242 1243 while (1) { 1244 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1245 if (ret < 0) 1246 return ret; 1247 1248 nr = btrfs_header_nritems(path->nodes[0]); 1249 if (!nr) 1250 break; 1251 1252 path->slots[0] = 0; 1253 ret = btrfs_del_items(trans, root, path, 0, nr); 1254 if (ret) 1255 return ret; 1256 1257 btrfs_release_path(path); 1258 } 1259 1260 node = rb_first_cached(&trans->fs_info->block_group_cache_tree); 1261 while (node) { 1262 struct btrfs_block_group *bg; 1263 1264 bg = rb_entry(node, struct btrfs_block_group, cache_node); 1265 clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); 1266 node = rb_next(node); 1267 cond_resched(); 1268 } 1269 1270 return 0; 1271 } 1272 1273 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info) 1274 { 1275 struct btrfs_trans_handle *trans; 1276 struct btrfs_root *tree_root = fs_info->tree_root; 1277 struct btrfs_key key = { 1278 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1279 .type = BTRFS_ROOT_ITEM_KEY, 1280 .offset = 0, 1281 }; 1282 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1283 int ret; 1284 1285 trans = btrfs_start_transaction(tree_root, 0); 1286 if (IS_ERR(trans)) 1287 return PTR_ERR(trans); 1288 1289 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1290 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1291 1292 ret = clear_free_space_tree(trans, free_space_root); 1293 if (ret) { 1294 btrfs_abort_transaction(trans, ret); 1295 btrfs_end_transaction(trans); 1296 return ret; 1297 } 1298 1299 ret = btrfs_del_root(trans, &free_space_root->root_key); 1300 if (ret) { 1301 btrfs_abort_transaction(trans, ret); 1302 btrfs_end_transaction(trans); 1303 return ret; 1304 } 1305 1306 btrfs_global_root_delete(free_space_root); 1307 1308 spin_lock(&fs_info->trans_lock); 1309 list_del(&free_space_root->dirty_list); 1310 spin_unlock(&fs_info->trans_lock); 1311 1312 btrfs_tree_lock(free_space_root->node); 1313 btrfs_clear_buffer_dirty(trans, free_space_root->node); 1314 btrfs_tree_unlock(free_space_root->node); 1315 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), 1316 free_space_root->node, 0, 1); 1317 btrfs_put_root(free_space_root); 1318 if (ret < 0) { 1319 btrfs_abort_transaction(trans, ret); 1320 btrfs_end_transaction(trans); 1321 return ret; 1322 } 1323 1324 return btrfs_commit_transaction(trans); 1325 } 1326 1327 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) 1328 { 1329 struct btrfs_trans_handle *trans; 1330 struct btrfs_key key = { 1331 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1332 .type = BTRFS_ROOT_ITEM_KEY, 1333 .offset = 0, 1334 }; 1335 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1336 struct rb_node *node; 1337 int ret; 1338 1339 trans = btrfs_start_transaction(free_space_root, 1); 1340 if (IS_ERR(trans)) 1341 return PTR_ERR(trans); 1342 1343 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1344 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1345 1346 ret = clear_free_space_tree(trans, free_space_root); 1347 if (ret) { 1348 btrfs_abort_transaction(trans, ret); 1349 btrfs_end_transaction(trans); 1350 return ret; 1351 } 1352 1353 node = rb_first_cached(&fs_info->block_group_cache_tree); 1354 while (node) { 1355 struct btrfs_block_group *block_group; 1356 1357 block_group = rb_entry(node, struct btrfs_block_group, 1358 cache_node); 1359 1360 if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, 1361 &block_group->runtime_flags)) 1362 goto next; 1363 1364 ret = populate_free_space_tree(trans, block_group); 1365 if (ret) { 1366 btrfs_abort_transaction(trans, ret); 1367 btrfs_end_transaction(trans); 1368 return ret; 1369 } 1370 next: 1371 if (btrfs_should_end_transaction(trans)) { 1372 btrfs_end_transaction(trans); 1373 trans = btrfs_start_transaction(free_space_root, 1); 1374 if (IS_ERR(trans)) 1375 return PTR_ERR(trans); 1376 } 1377 node = rb_next(node); 1378 } 1379 1380 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1381 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1382 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1383 1384 ret = btrfs_commit_transaction(trans); 1385 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1386 return ret; 1387 } 1388 1389 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 1390 struct btrfs_block_group *block_group, 1391 struct btrfs_path *path) 1392 { 1393 bool own_path = false; 1394 int ret; 1395 1396 if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, 1397 &block_group->runtime_flags)) 1398 return 0; 1399 1400 /* 1401 * While rebuilding the free space tree we may allocate new metadata 1402 * block groups while modifying the free space tree. 1403 * 1404 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we 1405 * can use multiple transactions, every time btrfs_end_transaction() is 1406 * called at btrfs_rebuild_free_space_tree() we finish the creation of 1407 * new block groups by calling btrfs_create_pending_block_groups(), and 1408 * that in turn calls us, through add_block_group_free_space(), to add 1409 * a free space info item and a free space extent item for the block 1410 * group. 1411 * 1412 * Then later btrfs_rebuild_free_space_tree() may find such new block 1413 * groups and processes them with populate_free_space_tree(), which can 1414 * fail with EEXIST since there are already items for the block group in 1415 * the free space tree. Notice that we say "may find" because a new 1416 * block group may be added to the block groups rbtree in a node before 1417 * or after the block group currently being processed by the rebuild 1418 * process. So signal the rebuild process to skip such new block groups 1419 * if it finds them. 1420 */ 1421 set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); 1422 1423 if (!path) { 1424 path = btrfs_alloc_path(); 1425 if (!path) { 1426 btrfs_abort_transaction(trans, -ENOMEM); 1427 return -ENOMEM; 1428 } 1429 own_path = true; 1430 } 1431 1432 ret = add_new_free_space_info(trans, block_group, path); 1433 if (ret) { 1434 btrfs_abort_transaction(trans, ret); 1435 goto out; 1436 } 1437 1438 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, 1439 block_group->start, block_group->length); 1440 if (ret) 1441 btrfs_abort_transaction(trans, ret); 1442 1443 out: 1444 if (own_path) 1445 btrfs_free_path(path); 1446 1447 return ret; 1448 } 1449 1450 int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans, 1451 struct btrfs_block_group *block_group) 1452 { 1453 int ret; 1454 1455 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1456 return 0; 1457 1458 mutex_lock(&block_group->free_space_lock); 1459 ret = __add_block_group_free_space(trans, block_group, NULL); 1460 mutex_unlock(&block_group->free_space_lock); 1461 return ret; 1462 } 1463 1464 int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, 1465 struct btrfs_block_group *block_group) 1466 { 1467 struct btrfs_root *root = btrfs_free_space_root(block_group); 1468 struct btrfs_path *path; 1469 struct btrfs_key key, found_key; 1470 struct extent_buffer *leaf; 1471 u64 start, end; 1472 int done = 0, nr; 1473 int ret; 1474 1475 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1476 return 0; 1477 1478 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { 1479 /* We never added this block group to the free space tree. */ 1480 return 0; 1481 } 1482 1483 path = btrfs_alloc_path(); 1484 if (!path) { 1485 ret = -ENOMEM; 1486 btrfs_abort_transaction(trans, ret); 1487 goto out; 1488 } 1489 1490 start = block_group->start; 1491 end = block_group->start + block_group->length; 1492 1493 key.objectid = end - 1; 1494 key.type = (u8)-1; 1495 key.offset = (u64)-1; 1496 1497 while (!done) { 1498 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 1499 if (ret) { 1500 btrfs_abort_transaction(trans, ret); 1501 goto out; 1502 } 1503 1504 leaf = path->nodes[0]; 1505 nr = 0; 1506 path->slots[0]++; 1507 while (path->slots[0] > 0) { 1508 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 1509 1510 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 1511 ASSERT(found_key.objectid == block_group->start); 1512 ASSERT(found_key.offset == block_group->length); 1513 done = 1; 1514 nr++; 1515 path->slots[0]--; 1516 break; 1517 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY || 1518 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 1519 ASSERT(found_key.objectid >= start); 1520 ASSERT(found_key.objectid < end); 1521 ASSERT(found_key.objectid + found_key.offset <= end); 1522 nr++; 1523 path->slots[0]--; 1524 } else { 1525 ASSERT(0); 1526 } 1527 } 1528 1529 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 1530 if (ret) { 1531 btrfs_abort_transaction(trans, ret); 1532 goto out; 1533 } 1534 btrfs_release_path(path); 1535 } 1536 1537 ret = 0; 1538 out: 1539 btrfs_free_path(path); 1540 return ret; 1541 } 1542 1543 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, 1544 struct btrfs_path *path, 1545 u32 expected_extent_count) 1546 { 1547 struct btrfs_block_group *block_group; 1548 struct btrfs_fs_info *fs_info; 1549 struct btrfs_root *root; 1550 struct btrfs_key key; 1551 bool prev_bit_set = false; 1552 /* Initialize to silence GCC. */ 1553 u64 extent_start = 0; 1554 u64 end, offset; 1555 u64 total_found = 0; 1556 u32 extent_count = 0; 1557 int ret; 1558 1559 block_group = caching_ctl->block_group; 1560 fs_info = block_group->fs_info; 1561 root = btrfs_free_space_root(block_group); 1562 1563 end = block_group->start + block_group->length; 1564 1565 while (1) { 1566 ret = btrfs_next_item(root, path); 1567 if (ret < 0) 1568 return ret; 1569 if (ret) 1570 break; 1571 1572 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1573 1574 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1575 break; 1576 1577 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 1578 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1579 1580 offset = key.objectid; 1581 while (offset < key.objectid + key.offset) { 1582 bool bit_set; 1583 1584 bit_set = btrfs_free_space_test_bit(block_group, path, offset); 1585 if (!prev_bit_set && bit_set) { 1586 extent_start = offset; 1587 } else if (prev_bit_set && !bit_set) { 1588 u64 space_added; 1589 1590 ret = btrfs_add_new_free_space(block_group, 1591 extent_start, 1592 offset, 1593 &space_added); 1594 if (ret) 1595 return ret; 1596 total_found += space_added; 1597 if (total_found > CACHING_CTL_WAKE_UP) { 1598 total_found = 0; 1599 wake_up(&caching_ctl->wait); 1600 } 1601 extent_count++; 1602 } 1603 prev_bit_set = bit_set; 1604 offset += fs_info->sectorsize; 1605 } 1606 } 1607 if (prev_bit_set) { 1608 ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL); 1609 if (ret) 1610 return ret; 1611 extent_count++; 1612 } 1613 1614 if (extent_count != expected_extent_count) { 1615 btrfs_err(fs_info, 1616 "incorrect extent count for %llu; counted %u, expected %u", 1617 block_group->start, extent_count, 1618 expected_extent_count); 1619 DEBUG_WARN(); 1620 return -EIO; 1621 } 1622 1623 return 0; 1624 } 1625 1626 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, 1627 struct btrfs_path *path, 1628 u32 expected_extent_count) 1629 { 1630 struct btrfs_block_group *block_group; 1631 struct btrfs_fs_info *fs_info; 1632 struct btrfs_root *root; 1633 struct btrfs_key key; 1634 u64 end; 1635 u64 total_found = 0; 1636 u32 extent_count = 0; 1637 int ret; 1638 1639 block_group = caching_ctl->block_group; 1640 fs_info = block_group->fs_info; 1641 root = btrfs_free_space_root(block_group); 1642 1643 end = block_group->start + block_group->length; 1644 1645 while (1) { 1646 u64 space_added; 1647 1648 ret = btrfs_next_item(root, path); 1649 if (ret < 0) 1650 return ret; 1651 if (ret) 1652 break; 1653 1654 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1655 1656 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1657 break; 1658 1659 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 1660 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1661 1662 ret = btrfs_add_new_free_space(block_group, key.objectid, 1663 key.objectid + key.offset, 1664 &space_added); 1665 if (ret) 1666 return ret; 1667 total_found += space_added; 1668 if (total_found > CACHING_CTL_WAKE_UP) { 1669 total_found = 0; 1670 wake_up(&caching_ctl->wait); 1671 } 1672 extent_count++; 1673 } 1674 1675 if (extent_count != expected_extent_count) { 1676 btrfs_err(fs_info, 1677 "incorrect extent count for %llu; counted %u, expected %u", 1678 block_group->start, extent_count, 1679 expected_extent_count); 1680 DEBUG_WARN(); 1681 return -EIO; 1682 } 1683 1684 return 0; 1685 } 1686 1687 int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl) 1688 { 1689 struct btrfs_block_group *block_group; 1690 struct btrfs_free_space_info *info; 1691 BTRFS_PATH_AUTO_FREE(path); 1692 u32 extent_count, flags; 1693 1694 block_group = caching_ctl->block_group; 1695 1696 path = btrfs_alloc_path(); 1697 if (!path) 1698 return -ENOMEM; 1699 1700 /* 1701 * Just like caching_thread() doesn't want to deadlock on the extent 1702 * tree, we don't want to deadlock on the free space tree. 1703 */ 1704 path->skip_locking = 1; 1705 path->search_commit_root = 1; 1706 path->reada = READA_FORWARD; 1707 1708 info = btrfs_search_free_space_info(NULL, block_group, path, 0); 1709 if (IS_ERR(info)) 1710 return PTR_ERR(info); 1711 1712 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 1713 flags = btrfs_free_space_flags(path->nodes[0], info); 1714 1715 /* 1716 * We left path pointing to the free space info item, so now 1717 * load_free_space_foo can just iterate through the free space tree from 1718 * there. 1719 */ 1720 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) 1721 return load_free_space_bitmaps(caching_ctl, path, extent_count); 1722 else 1723 return load_free_space_extents(caching_ctl, path, extent_count); 1724 } 1725