1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2015 Facebook. All rights reserved. 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/sched/mm.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "free-space-tree.h" 13 #include "transaction.h" 14 #include "block-group.h" 15 #include "fs.h" 16 #include "accessors.h" 17 #include "extent-tree.h" 18 #include "root-tree.h" 19 20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 21 struct btrfs_block_group *block_group, 22 struct btrfs_path *path); 23 24 struct btrfs_root *btrfs_free_space_root(struct btrfs_block_group *block_group) 25 { 26 struct btrfs_key key = { 27 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 28 .type = BTRFS_ROOT_ITEM_KEY, 29 .offset = 0, 30 }; 31 32 if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2)) 33 key.offset = block_group->global_root_id; 34 return btrfs_global_root(block_group->fs_info, &key); 35 } 36 37 void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache) 38 { 39 u32 bitmap_range; 40 size_t bitmap_size; 41 u64 num_bitmaps, total_bitmap_size; 42 43 if (WARN_ON(cache->length == 0)) 44 btrfs_warn(cache->fs_info, "block group %llu length is zero", 45 cache->start); 46 47 /* 48 * We convert to bitmaps when the disk space required for using extents 49 * exceeds that required for using bitmaps. 50 */ 51 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 52 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range); 53 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE; 54 total_bitmap_size = num_bitmaps * bitmap_size; 55 cache->bitmap_high_thresh = div_u64(total_bitmap_size, 56 sizeof(struct btrfs_item)); 57 58 /* 59 * We allow for a small buffer between the high threshold and low 60 * threshold to avoid thrashing back and forth between the two formats. 61 */ 62 if (cache->bitmap_high_thresh > 100) 63 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100; 64 else 65 cache->bitmap_low_thresh = 0; 66 } 67 68 static int add_new_free_space_info(struct btrfs_trans_handle *trans, 69 struct btrfs_block_group *block_group, 70 struct btrfs_path *path) 71 { 72 struct btrfs_root *root = btrfs_free_space_root(block_group); 73 struct btrfs_free_space_info *info; 74 struct btrfs_key key; 75 struct extent_buffer *leaf; 76 int ret; 77 78 key.objectid = block_group->start; 79 key.type = BTRFS_FREE_SPACE_INFO_KEY; 80 key.offset = block_group->length; 81 82 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info)); 83 if (ret) 84 return ret; 85 86 leaf = path->nodes[0]; 87 info = btrfs_item_ptr(leaf, path->slots[0], 88 struct btrfs_free_space_info); 89 btrfs_set_free_space_extent_count(leaf, info, 0); 90 btrfs_set_free_space_flags(leaf, info, 0); 91 btrfs_release_path(path); 92 return 0; 93 } 94 95 struct btrfs_free_space_info *btrfs_search_free_space_info( 96 struct btrfs_trans_handle *trans, 97 struct btrfs_block_group *block_group, 98 struct btrfs_path *path, int cow) 99 { 100 struct btrfs_fs_info *fs_info = block_group->fs_info; 101 struct btrfs_root *root = btrfs_free_space_root(block_group); 102 struct btrfs_key key; 103 int ret; 104 105 key.objectid = block_group->start; 106 key.type = BTRFS_FREE_SPACE_INFO_KEY; 107 key.offset = block_group->length; 108 109 ret = btrfs_search_slot(trans, root, &key, path, 0, cow); 110 if (ret < 0) 111 return ERR_PTR(ret); 112 if (unlikely(ret != 0)) { 113 btrfs_warn(fs_info, "missing free space info for %llu", 114 block_group->start); 115 DEBUG_WARN(); 116 return ERR_PTR(-ENOENT); 117 } 118 119 return btrfs_item_ptr(path->nodes[0], path->slots[0], 120 struct btrfs_free_space_info); 121 } 122 123 /* 124 * btrfs_search_slot() but we're looking for the greatest key less than the 125 * passed key. 126 */ 127 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans, 128 struct btrfs_root *root, 129 struct btrfs_key *key, struct btrfs_path *p, 130 int ins_len, int cow) 131 { 132 int ret; 133 134 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow); 135 if (ret < 0) 136 return ret; 137 138 if (unlikely(ret == 0)) { 139 DEBUG_WARN(); 140 return -EIO; 141 } 142 143 if (unlikely(p->slots[0] == 0)) { 144 DEBUG_WARN("no previous slot found"); 145 return -EIO; 146 } 147 p->slots[0]--; 148 149 return 0; 150 } 151 152 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info, 153 u64 size) 154 { 155 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE); 156 } 157 158 static unsigned long *alloc_bitmap(u32 bitmap_size) 159 { 160 unsigned long *ret; 161 unsigned int nofs_flag; 162 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long)); 163 164 /* 165 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse 166 * into the filesystem here. All callers hold a transaction handle 167 * open, so if a GFP_KERNEL allocation recurses into the filesystem 168 * and triggers a transaction commit, we would deadlock. 169 */ 170 nofs_flag = memalloc_nofs_save(); 171 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL); 172 memalloc_nofs_restore(nofs_flag); 173 return ret; 174 } 175 176 static void le_bitmap_set(unsigned long *map, unsigned int start, int len) 177 { 178 u8 *p = ((u8 *)map) + BIT_BYTE(start); 179 const unsigned int size = start + len; 180 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE); 181 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start); 182 183 while (len - bits_to_set >= 0) { 184 *p |= mask_to_set; 185 len -= bits_to_set; 186 bits_to_set = BITS_PER_BYTE; 187 mask_to_set = ~0; 188 p++; 189 } 190 if (len) { 191 mask_to_set &= BITMAP_LAST_BYTE_MASK(size); 192 *p |= mask_to_set; 193 } 194 } 195 196 EXPORT_FOR_TESTS 197 int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, 198 struct btrfs_block_group *block_group, 199 struct btrfs_path *path) 200 { 201 struct btrfs_fs_info *fs_info = trans->fs_info; 202 struct btrfs_root *root = btrfs_free_space_root(block_group); 203 struct btrfs_free_space_info *info; 204 struct btrfs_key key, found_key; 205 struct extent_buffer *leaf; 206 unsigned long *bitmap; 207 char *bitmap_cursor; 208 u64 start, end; 209 u64 bitmap_range, i; 210 u32 bitmap_size, flags, expected_extent_count; 211 u32 extent_count = 0; 212 bool done = false; 213 int nr; 214 int ret; 215 216 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 217 bitmap = alloc_bitmap(bitmap_size); 218 if (unlikely(!bitmap)) 219 return 0; 220 221 start = block_group->start; 222 end = btrfs_block_group_end(block_group); 223 224 key.objectid = end - 1; 225 key.type = (u8)-1; 226 key.offset = (u64)-1; 227 228 while (!done) { 229 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 230 if (unlikely(ret)) { 231 btrfs_abort_transaction(trans, ret); 232 goto out; 233 } 234 235 leaf = path->nodes[0]; 236 nr = 0; 237 path->slots[0]++; 238 while (path->slots[0] > 0) { 239 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 240 241 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 242 ASSERT(found_key.objectid == block_group->start); 243 ASSERT(found_key.offset == block_group->length); 244 done = true; 245 break; 246 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) { 247 u64 first, last; 248 249 ASSERT(found_key.objectid >= start); 250 ASSERT(found_key.objectid < end); 251 ASSERT(found_key.objectid + found_key.offset <= end); 252 253 first = div_u64(found_key.objectid - start, 254 fs_info->sectorsize); 255 last = div_u64(found_key.objectid + found_key.offset - start, 256 fs_info->sectorsize); 257 le_bitmap_set(bitmap, first, last - first); 258 259 extent_count++; 260 nr++; 261 path->slots[0]--; 262 } else { 263 btrfs_err(fs_info, "unexpected free space tree key type %u", 264 found_key.type); 265 ret = -EUCLEAN; 266 btrfs_abort_transaction(trans, ret); 267 goto out; 268 } 269 } 270 271 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 272 if (unlikely(ret)) { 273 btrfs_abort_transaction(trans, ret); 274 goto out; 275 } 276 btrfs_release_path(path); 277 } 278 279 info = btrfs_search_free_space_info(trans, block_group, path, 1); 280 if (IS_ERR(info)) { 281 ret = PTR_ERR(info); 282 btrfs_abort_transaction(trans, ret); 283 goto out; 284 } 285 leaf = path->nodes[0]; 286 flags = btrfs_free_space_flags(leaf, info); 287 flags |= BTRFS_FREE_SPACE_USING_BITMAPS; 288 block_group->using_free_space_bitmaps = true; 289 block_group->using_free_space_bitmaps_cached = true; 290 btrfs_set_free_space_flags(leaf, info, flags); 291 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 292 btrfs_release_path(path); 293 294 if (unlikely(extent_count != expected_extent_count)) { 295 btrfs_err(fs_info, 296 "incorrect extent count for %llu; counted %u, expected %u", 297 block_group->start, extent_count, 298 expected_extent_count); 299 ret = -EIO; 300 btrfs_abort_transaction(trans, ret); 301 goto out; 302 } 303 304 bitmap_cursor = (char *)bitmap; 305 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 306 i = start; 307 while (i < end) { 308 unsigned long ptr; 309 u64 extent_size; 310 u32 data_size; 311 312 extent_size = min(end - i, bitmap_range); 313 data_size = free_space_bitmap_size(fs_info, extent_size); 314 315 key.objectid = i; 316 key.type = BTRFS_FREE_SPACE_BITMAP_KEY; 317 key.offset = extent_size; 318 319 ret = btrfs_insert_empty_item(trans, root, path, &key, 320 data_size); 321 if (unlikely(ret)) { 322 btrfs_abort_transaction(trans, ret); 323 goto out; 324 } 325 326 leaf = path->nodes[0]; 327 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 328 write_extent_buffer(leaf, bitmap_cursor, ptr, 329 data_size); 330 btrfs_release_path(path); 331 332 i += extent_size; 333 bitmap_cursor += data_size; 334 } 335 336 ret = 0; 337 out: 338 kvfree(bitmap); 339 return ret; 340 } 341 342 EXPORT_FOR_TESTS 343 int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans, 344 struct btrfs_block_group *block_group, 345 struct btrfs_path *path) 346 { 347 struct btrfs_fs_info *fs_info = trans->fs_info; 348 struct btrfs_root *root = btrfs_free_space_root(block_group); 349 struct btrfs_free_space_info *info; 350 struct btrfs_key key, found_key; 351 struct extent_buffer *leaf; 352 unsigned long *bitmap; 353 u64 start, end; 354 u32 bitmap_size, flags, expected_extent_count; 355 unsigned long nrbits, start_bit, end_bit; 356 u32 extent_count = 0; 357 bool done = false; 358 int nr; 359 int ret; 360 361 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 362 bitmap = alloc_bitmap(bitmap_size); 363 if (unlikely(!bitmap)) 364 return 0; 365 366 start = block_group->start; 367 end = btrfs_block_group_end(block_group); 368 369 key.objectid = end - 1; 370 key.type = (u8)-1; 371 key.offset = (u64)-1; 372 373 while (!done) { 374 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 375 if (unlikely(ret)) { 376 btrfs_abort_transaction(trans, ret); 377 goto out; 378 } 379 380 leaf = path->nodes[0]; 381 nr = 0; 382 path->slots[0]++; 383 while (path->slots[0] > 0) { 384 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 385 386 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 387 ASSERT(found_key.objectid == block_group->start); 388 ASSERT(found_key.offset == block_group->length); 389 done = true; 390 break; 391 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 392 unsigned long ptr; 393 char *bitmap_cursor; 394 u32 bitmap_pos, data_size; 395 396 ASSERT(found_key.objectid >= start); 397 ASSERT(found_key.objectid < end); 398 ASSERT(found_key.objectid + found_key.offset <= end); 399 400 bitmap_pos = div_u64(found_key.objectid - start, 401 fs_info->sectorsize * 402 BITS_PER_BYTE); 403 bitmap_cursor = ((char *)bitmap) + bitmap_pos; 404 data_size = free_space_bitmap_size(fs_info, 405 found_key.offset); 406 407 path->slots[0]--; 408 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 409 read_extent_buffer(leaf, bitmap_cursor, ptr, 410 data_size); 411 412 nr++; 413 } else { 414 btrfs_err(fs_info, "unexpected free space tree key type %u", 415 found_key.type); 416 ret = -EUCLEAN; 417 btrfs_abort_transaction(trans, ret); 418 goto out; 419 } 420 } 421 422 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 423 if (unlikely(ret)) { 424 btrfs_abort_transaction(trans, ret); 425 goto out; 426 } 427 btrfs_release_path(path); 428 } 429 430 info = btrfs_search_free_space_info(trans, block_group, path, 1); 431 if (IS_ERR(info)) { 432 ret = PTR_ERR(info); 433 btrfs_abort_transaction(trans, ret); 434 goto out; 435 } 436 leaf = path->nodes[0]; 437 flags = btrfs_free_space_flags(leaf, info); 438 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; 439 block_group->using_free_space_bitmaps = false; 440 block_group->using_free_space_bitmaps_cached = true; 441 btrfs_set_free_space_flags(leaf, info, flags); 442 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 443 btrfs_release_path(path); 444 445 nrbits = block_group->length >> fs_info->sectorsize_bits; 446 start_bit = find_next_bit_le(bitmap, nrbits, 0); 447 448 while (start_bit < nrbits) { 449 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit); 450 ASSERT(start_bit < end_bit); 451 452 key.objectid = start + start_bit * fs_info->sectorsize; 453 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 454 key.offset = (end_bit - start_bit) * fs_info->sectorsize; 455 456 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 457 if (unlikely(ret)) { 458 btrfs_abort_transaction(trans, ret); 459 goto out; 460 } 461 btrfs_release_path(path); 462 463 extent_count++; 464 465 start_bit = find_next_bit_le(bitmap, nrbits, end_bit); 466 } 467 468 if (unlikely(extent_count != expected_extent_count)) { 469 btrfs_err(fs_info, 470 "incorrect extent count for %llu; counted %u, expected %u", 471 block_group->start, extent_count, 472 expected_extent_count); 473 ret = -EIO; 474 btrfs_abort_transaction(trans, ret); 475 goto out; 476 } 477 478 ret = 0; 479 out: 480 kvfree(bitmap); 481 return ret; 482 } 483 484 static int update_free_space_extent_count(struct btrfs_trans_handle *trans, 485 struct btrfs_block_group *block_group, 486 struct btrfs_path *path, 487 int new_extents) 488 { 489 struct btrfs_free_space_info *info; 490 u32 flags; 491 u32 extent_count; 492 int ret = 0; 493 494 if (new_extents == 0) 495 return 0; 496 497 info = btrfs_search_free_space_info(trans, block_group, path, 1); 498 if (IS_ERR(info)) 499 return PTR_ERR(info); 500 501 flags = btrfs_free_space_flags(path->nodes[0], info); 502 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 503 504 extent_count += new_extents; 505 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count); 506 btrfs_release_path(path); 507 508 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 509 extent_count > block_group->bitmap_high_thresh) { 510 ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path); 511 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 512 extent_count < block_group->bitmap_low_thresh) { 513 ret = btrfs_convert_free_space_to_extents(trans, block_group, path); 514 } 515 516 return ret; 517 } 518 519 EXPORT_FOR_TESTS 520 bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group, 521 struct btrfs_path *path, u64 offset) 522 { 523 struct extent_buffer *leaf; 524 struct btrfs_key key; 525 u64 found_start, found_end; 526 unsigned long ptr, i; 527 528 leaf = path->nodes[0]; 529 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 530 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 531 532 found_start = key.objectid; 533 found_end = key.objectid + key.offset; 534 ASSERT(offset >= found_start && offset < found_end); 535 536 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 537 i = div_u64(offset - found_start, 538 block_group->fs_info->sectorsize); 539 return extent_buffer_test_bit(leaf, ptr, i); 540 } 541 542 static void free_space_modify_bits(struct btrfs_trans_handle *trans, 543 struct btrfs_block_group *block_group, 544 struct btrfs_path *path, u64 *start, u64 *size, 545 bool set_bits) 546 { 547 struct btrfs_fs_info *fs_info = block_group->fs_info; 548 struct extent_buffer *leaf; 549 struct btrfs_key key; 550 u64 end = *start + *size; 551 u64 found_start, found_end; 552 unsigned long ptr, first, last; 553 554 leaf = path->nodes[0]; 555 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 556 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 557 558 found_start = key.objectid; 559 found_end = key.objectid + key.offset; 560 ASSERT(*start >= found_start && *start < found_end); 561 ASSERT(end > found_start); 562 563 if (end > found_end) 564 end = found_end; 565 566 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 567 first = (*start - found_start) >> fs_info->sectorsize_bits; 568 last = (end - found_start) >> fs_info->sectorsize_bits; 569 if (set_bits) 570 extent_buffer_bitmap_set(leaf, ptr, first, last - first); 571 else 572 extent_buffer_bitmap_clear(leaf, ptr, first, last - first); 573 btrfs_mark_buffer_dirty(trans, leaf); 574 575 *size -= end - *start; 576 *start = end; 577 } 578 579 /* 580 * We can't use btrfs_next_item() in modify_free_space_bitmap() because 581 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy 582 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're 583 * looking for. 584 */ 585 static int free_space_next_bitmap(struct btrfs_trans_handle *trans, 586 struct btrfs_root *root, struct btrfs_path *p) 587 { 588 struct btrfs_key key; 589 590 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) { 591 p->slots[0]++; 592 return 0; 593 } 594 595 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]); 596 btrfs_release_path(p); 597 598 key.objectid += key.offset; 599 key.type = (u8)-1; 600 key.offset = (u64)-1; 601 602 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1); 603 } 604 605 /* 606 * If remove is 1, then we are removing free space, thus clearing bits in the 607 * bitmap. If remove is 0, then we are adding free space, thus setting bits in 608 * the bitmap. 609 */ 610 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, 611 struct btrfs_block_group *block_group, 612 struct btrfs_path *path, 613 u64 start, u64 size, bool remove) 614 { 615 struct btrfs_root *root = btrfs_free_space_root(block_group); 616 struct btrfs_key key; 617 u64 end = start + size; 618 u64 cur_start, cur_size; 619 bool prev_bit_set = false; 620 bool next_bit_set = false; 621 int new_extents; 622 int ret; 623 624 /* 625 * Read the bit for the block immediately before the extent of space if 626 * that block is within the block group. 627 */ 628 if (start > block_group->start) { 629 u64 prev_block = start - block_group->fs_info->sectorsize; 630 631 key.objectid = prev_block; 632 key.type = (u8)-1; 633 key.offset = (u64)-1; 634 635 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 636 if (ret) 637 return ret; 638 639 prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block); 640 641 /* The previous block may have been in the previous bitmap. */ 642 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 643 if (start >= key.objectid + key.offset) { 644 ret = free_space_next_bitmap(trans, root, path); 645 if (ret) 646 return ret; 647 } 648 } else { 649 key.objectid = start; 650 key.type = (u8)-1; 651 key.offset = (u64)-1; 652 653 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 654 if (ret) 655 return ret; 656 } 657 658 /* 659 * Iterate over all of the bitmaps overlapped by the extent of space, 660 * clearing/setting bits as required. 661 */ 662 cur_start = start; 663 cur_size = size; 664 while (1) { 665 free_space_modify_bits(trans, block_group, path, &cur_start, 666 &cur_size, !remove); 667 if (cur_size == 0) 668 break; 669 ret = free_space_next_bitmap(trans, root, path); 670 if (ret) 671 return ret; 672 } 673 674 /* 675 * Read the bit for the block immediately after the extent of space if 676 * that block is within the block group. 677 */ 678 if (end < btrfs_block_group_end(block_group)) { 679 /* The next block may be in the next bitmap. */ 680 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 681 if (end >= key.objectid + key.offset) { 682 ret = free_space_next_bitmap(trans, root, path); 683 if (ret) 684 return ret; 685 } 686 687 next_bit_set = btrfs_free_space_test_bit(block_group, path, end); 688 } 689 690 if (remove) { 691 new_extents = -1; 692 if (prev_bit_set) { 693 /* Leftover on the left. */ 694 new_extents++; 695 } 696 if (next_bit_set) { 697 /* Leftover on the right. */ 698 new_extents++; 699 } 700 } else { 701 new_extents = 1; 702 if (prev_bit_set) { 703 /* Merging with neighbor on the left. */ 704 new_extents--; 705 } 706 if (next_bit_set) { 707 /* Merging with neighbor on the right. */ 708 new_extents--; 709 } 710 } 711 712 btrfs_release_path(path); 713 return update_free_space_extent_count(trans, block_group, path, new_extents); 714 } 715 716 static int remove_free_space_extent(struct btrfs_trans_handle *trans, 717 struct btrfs_block_group *block_group, 718 struct btrfs_path *path, 719 u64 start, u64 size) 720 { 721 struct btrfs_root *root = btrfs_free_space_root(block_group); 722 struct btrfs_key key; 723 u64 found_start, found_end; 724 u64 end = start + size; 725 int new_extents = -1; 726 int ret; 727 728 key.objectid = start; 729 key.type = (u8)-1; 730 key.offset = (u64)-1; 731 732 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 733 if (ret) 734 return ret; 735 736 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 737 738 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 739 740 found_start = key.objectid; 741 found_end = key.objectid + key.offset; 742 ASSERT(start >= found_start && end <= found_end); 743 744 /* 745 * Okay, now that we've found the free space extent which contains the 746 * free space that we are removing, there are four cases: 747 * 748 * 1. We're using the whole extent: delete the key we found and 749 * decrement the free space extent count. 750 * 2. We are using part of the extent starting at the beginning: delete 751 * the key we found and insert a new key representing the leftover at 752 * the end. There is no net change in the number of extents. 753 * 3. We are using part of the extent ending at the end: delete the key 754 * we found and insert a new key representing the leftover at the 755 * beginning. There is no net change in the number of extents. 756 * 4. We are using part of the extent in the middle: delete the key we 757 * found and insert two new keys representing the leftovers on each 758 * side. Where we used to have one extent, we now have two, so increment 759 * the extent count. We may need to convert the block group to bitmaps 760 * as a result. 761 */ 762 763 /* Delete the existing key (cases 1-4). */ 764 ret = btrfs_del_item(trans, root, path); 765 if (ret) 766 return ret; 767 768 /* Add a key for leftovers at the beginning (cases 3 and 4). */ 769 if (start > found_start) { 770 key.objectid = found_start; 771 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 772 key.offset = start - found_start; 773 774 btrfs_release_path(path); 775 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 776 if (ret) 777 return ret; 778 new_extents++; 779 } 780 781 /* Add a key for leftovers at the end (cases 2 and 4). */ 782 if (end < found_end) { 783 key.objectid = end; 784 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 785 key.offset = found_end - end; 786 787 btrfs_release_path(path); 788 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 789 if (ret) 790 return ret; 791 new_extents++; 792 } 793 794 btrfs_release_path(path); 795 return update_free_space_extent_count(trans, block_group, path, new_extents); 796 } 797 798 static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path) 799 { 800 struct btrfs_free_space_info *info; 801 u32 flags; 802 803 if (bg->using_free_space_bitmaps_cached) 804 return bg->using_free_space_bitmaps; 805 806 info = btrfs_search_free_space_info(NULL, bg, path, 0); 807 if (IS_ERR(info)) 808 return PTR_ERR(info); 809 flags = btrfs_free_space_flags(path->nodes[0], info); 810 btrfs_release_path(path); 811 812 bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS); 813 bg->using_free_space_bitmaps_cached = true; 814 815 return bg->using_free_space_bitmaps; 816 } 817 818 EXPORT_FOR_TESTS 819 int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 820 struct btrfs_block_group *block_group, 821 struct btrfs_path *path, u64 start, u64 size) 822 { 823 int ret; 824 825 ret = __add_block_group_free_space(trans, block_group, path); 826 if (ret) 827 return ret; 828 829 ret = using_bitmaps(block_group, path); 830 if (ret < 0) 831 return ret; 832 833 if (ret) 834 return modify_free_space_bitmap(trans, block_group, path, 835 start, size, true); 836 837 return remove_free_space_extent(trans, block_group, path, start, size); 838 } 839 840 int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 841 u64 start, u64 size) 842 { 843 struct btrfs_block_group *block_group; 844 BTRFS_PATH_AUTO_FREE(path); 845 int ret; 846 847 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 848 return 0; 849 850 path = btrfs_alloc_path(); 851 if (unlikely(!path)) { 852 ret = -ENOMEM; 853 btrfs_abort_transaction(trans, ret); 854 return ret; 855 } 856 857 block_group = btrfs_lookup_block_group(trans->fs_info, start); 858 if (unlikely(!block_group)) { 859 DEBUG_WARN("no block group found for start=%llu", start); 860 ret = -ENOENT; 861 btrfs_abort_transaction(trans, ret); 862 return ret; 863 } 864 865 mutex_lock(&block_group->free_space_lock); 866 ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size); 867 mutex_unlock(&block_group->free_space_lock); 868 if (ret) 869 btrfs_abort_transaction(trans, ret); 870 871 btrfs_put_block_group(block_group); 872 873 return ret; 874 } 875 876 static int add_free_space_extent(struct btrfs_trans_handle *trans, 877 struct btrfs_block_group *block_group, 878 struct btrfs_path *path, 879 u64 start, u64 size) 880 { 881 struct btrfs_root *root = btrfs_free_space_root(block_group); 882 struct btrfs_key key, new_key; 883 u64 found_start, found_end; 884 u64 end = start + size; 885 int new_extents = 1; 886 int ret; 887 888 /* 889 * We are adding a new extent of free space, but we need to merge 890 * extents. There are four cases here: 891 * 892 * 1. The new extent does not have any immediate neighbors to merge 893 * with: add the new key and increment the free space extent count. We 894 * may need to convert the block group to bitmaps as a result. 895 * 2. The new extent has an immediate neighbor before it: remove the 896 * previous key and insert a new key combining both of them. There is no 897 * net change in the number of extents. 898 * 3. The new extent has an immediate neighbor after it: remove the next 899 * key and insert a new key combining both of them. There is no net 900 * change in the number of extents. 901 * 4. The new extent has immediate neighbors on both sides: remove both 902 * of the keys and insert a new key combining all of them. Where we used 903 * to have two extents, we now have one, so decrement the extent count. 904 */ 905 906 new_key.objectid = start; 907 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 908 new_key.offset = size; 909 910 /* Search for a neighbor on the left. */ 911 if (start == block_group->start) 912 goto right; 913 key.objectid = start - 1; 914 key.type = (u8)-1; 915 key.offset = (u64)-1; 916 917 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 918 if (ret) 919 return ret; 920 921 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 922 923 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 924 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 925 btrfs_release_path(path); 926 goto right; 927 } 928 929 found_start = key.objectid; 930 found_end = key.objectid + key.offset; 931 ASSERT(found_start >= block_group->start && 932 found_end > block_group->start); 933 ASSERT(found_start < start && found_end <= start); 934 935 /* 936 * Delete the neighbor on the left and absorb it into the new key (cases 937 * 2 and 4). 938 */ 939 if (found_end == start) { 940 ret = btrfs_del_item(trans, root, path); 941 if (ret) 942 return ret; 943 new_key.objectid = found_start; 944 new_key.offset += key.offset; 945 new_extents--; 946 } 947 btrfs_release_path(path); 948 949 right: 950 /* Search for a neighbor on the right. */ 951 if (end == btrfs_block_group_end(block_group)) 952 goto insert; 953 key.objectid = end; 954 key.type = (u8)-1; 955 key.offset = (u64)-1; 956 957 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 958 if (ret) 959 return ret; 960 961 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 962 963 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 964 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 965 btrfs_release_path(path); 966 goto insert; 967 } 968 969 found_start = key.objectid; 970 found_end = key.objectid + key.offset; 971 ASSERT(found_start >= block_group->start && 972 found_end > block_group->start); 973 ASSERT((found_start < start && found_end <= start) || 974 (found_start >= end && found_end > end)); 975 976 /* 977 * Delete the neighbor on the right and absorb it into the new key 978 * (cases 3 and 4). 979 */ 980 if (found_start == end) { 981 ret = btrfs_del_item(trans, root, path); 982 if (ret) 983 return ret; 984 new_key.offset += key.offset; 985 new_extents--; 986 } 987 btrfs_release_path(path); 988 989 insert: 990 /* Insert the new key (cases 1-4). */ 991 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0); 992 if (ret) 993 return ret; 994 995 btrfs_release_path(path); 996 return update_free_space_extent_count(trans, block_group, path, new_extents); 997 } 998 999 EXPORT_FOR_TESTS 1000 int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 1001 struct btrfs_block_group *block_group, 1002 struct btrfs_path *path, u64 start, u64 size) 1003 { 1004 int ret; 1005 1006 ret = __add_block_group_free_space(trans, block_group, path); 1007 if (ret) 1008 return ret; 1009 1010 ret = using_bitmaps(block_group, path); 1011 if (ret < 0) 1012 return ret; 1013 1014 if (ret) 1015 return modify_free_space_bitmap(trans, block_group, path, 1016 start, size, false); 1017 1018 return add_free_space_extent(trans, block_group, path, start, size); 1019 } 1020 1021 int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 1022 u64 start, u64 size) 1023 { 1024 struct btrfs_block_group *block_group; 1025 BTRFS_PATH_AUTO_FREE(path); 1026 int ret; 1027 1028 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1029 return 0; 1030 1031 path = btrfs_alloc_path(); 1032 if (unlikely(!path)) { 1033 ret = -ENOMEM; 1034 btrfs_abort_transaction(trans, ret); 1035 return ret; 1036 } 1037 1038 block_group = btrfs_lookup_block_group(trans->fs_info, start); 1039 if (unlikely(!block_group)) { 1040 DEBUG_WARN("no block group found for start=%llu", start); 1041 ret = -ENOENT; 1042 btrfs_abort_transaction(trans, ret); 1043 return ret; 1044 } 1045 1046 mutex_lock(&block_group->free_space_lock); 1047 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size); 1048 mutex_unlock(&block_group->free_space_lock); 1049 if (ret) 1050 btrfs_abort_transaction(trans, ret); 1051 1052 btrfs_put_block_group(block_group); 1053 1054 return ret; 1055 } 1056 1057 /* 1058 * Populate the free space tree by walking the extent tree. Operations on the 1059 * extent tree that happen as a result of writes to the free space tree will go 1060 * through the normal add/remove hooks. 1061 */ 1062 static int populate_free_space_tree(struct btrfs_trans_handle *trans, 1063 struct btrfs_block_group *block_group) 1064 { 1065 struct btrfs_root *extent_root; 1066 BTRFS_PATH_AUTO_FREE(path); 1067 BTRFS_PATH_AUTO_FREE(path2); 1068 struct btrfs_key key; 1069 u64 start, end; 1070 int ret; 1071 1072 path = btrfs_alloc_path(); 1073 if (!path) 1074 return -ENOMEM; 1075 1076 path2 = btrfs_alloc_path(); 1077 if (!path2) 1078 return -ENOMEM; 1079 1080 path->reada = READA_FORWARD; 1081 1082 ret = add_new_free_space_info(trans, block_group, path2); 1083 if (ret) 1084 return ret; 1085 1086 extent_root = btrfs_extent_root(trans->fs_info, block_group->start); 1087 if (unlikely(!extent_root)) { 1088 btrfs_err(trans->fs_info, 1089 "missing extent root for block group at offset %llu", 1090 block_group->start); 1091 return -EUCLEAN; 1092 } 1093 1094 mutex_lock(&block_group->free_space_lock); 1095 1096 /* 1097 * Iterate through all of the extent and metadata items in this block 1098 * group, adding the free space between them and the free space at the 1099 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than 1100 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's 1101 * contained in. 1102 */ 1103 key.objectid = block_group->start; 1104 key.type = BTRFS_EXTENT_ITEM_KEY; 1105 key.offset = 0; 1106 1107 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); 1108 if (ret < 0) 1109 goto out_locked; 1110 /* 1111 * If ret is 1 (no key found), it means this is an empty block group, 1112 * without any extents allocated from it and there's no block group 1113 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree 1114 * because we are using the block group tree feature (so block group 1115 * items are stored in the block group tree) or this is a new block 1116 * group created in the current transaction and its block group item 1117 * was not yet inserted in the extent tree (that happens in 1118 * btrfs_create_pending_block_groups() -> insert_block_group_item()). 1119 * It also means there are no extents allocated for block groups with a 1120 * start offset beyond this block group's end offset (this is the last, 1121 * highest, block group). 1122 */ 1123 start = block_group->start; 1124 end = btrfs_block_group_end(block_group); 1125 while (ret == 0) { 1126 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1127 1128 if (key.type == BTRFS_EXTENT_ITEM_KEY || 1129 key.type == BTRFS_METADATA_ITEM_KEY) { 1130 if (key.objectid >= end) 1131 break; 1132 1133 if (start < key.objectid) { 1134 ret = __btrfs_add_to_free_space_tree(trans, 1135 block_group, 1136 path2, start, 1137 key.objectid - 1138 start); 1139 if (ret) 1140 goto out_locked; 1141 } 1142 start = key.objectid; 1143 if (key.type == BTRFS_METADATA_ITEM_KEY) 1144 start += trans->fs_info->nodesize; 1145 else 1146 start += key.offset; 1147 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 1148 if (key.objectid != block_group->start) 1149 break; 1150 } 1151 1152 ret = btrfs_next_item(extent_root, path); 1153 if (ret < 0) 1154 goto out_locked; 1155 } 1156 if (start < end) { 1157 ret = __btrfs_add_to_free_space_tree(trans, block_group, path2, 1158 start, end - start); 1159 if (ret) 1160 goto out_locked; 1161 } 1162 1163 ret = 0; 1164 out_locked: 1165 mutex_unlock(&block_group->free_space_lock); 1166 1167 return ret; 1168 } 1169 1170 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) 1171 { 1172 struct btrfs_trans_handle *trans; 1173 struct btrfs_root *tree_root = fs_info->tree_root; 1174 struct btrfs_root *free_space_root; 1175 struct btrfs_block_group *block_group; 1176 struct rb_node *node; 1177 int ret; 1178 1179 trans = btrfs_start_transaction(tree_root, 0); 1180 if (IS_ERR(trans)) 1181 return PTR_ERR(trans); 1182 1183 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1184 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1185 free_space_root = btrfs_create_tree(trans, 1186 BTRFS_FREE_SPACE_TREE_OBJECTID); 1187 if (IS_ERR(free_space_root)) { 1188 ret = PTR_ERR(free_space_root); 1189 btrfs_abort_transaction(trans, ret); 1190 btrfs_end_transaction(trans); 1191 goto out_clear; 1192 } 1193 ret = btrfs_global_root_insert(free_space_root); 1194 if (unlikely(ret)) { 1195 btrfs_put_root(free_space_root); 1196 btrfs_abort_transaction(trans, ret); 1197 btrfs_end_transaction(trans); 1198 goto out_clear; 1199 } 1200 1201 node = rb_first_cached(&fs_info->block_group_cache_tree); 1202 while (node) { 1203 block_group = rb_entry(node, struct btrfs_block_group, 1204 cache_node); 1205 ret = populate_free_space_tree(trans, block_group); 1206 if (unlikely(ret)) { 1207 btrfs_abort_transaction(trans, ret); 1208 btrfs_end_transaction(trans); 1209 goto out_clear; 1210 } 1211 node = rb_next(node); 1212 } 1213 1214 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1215 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1216 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1217 ret = btrfs_commit_transaction(trans); 1218 1219 /* 1220 * Now that we've committed the transaction any reading of our commit 1221 * root will be safe, so we can cache from the free space tree now. 1222 */ 1223 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1224 return ret; 1225 1226 out_clear: 1227 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1228 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1229 return ret; 1230 } 1231 1232 static int clear_free_space_tree(struct btrfs_trans_handle *trans, 1233 struct btrfs_root *root) 1234 { 1235 BTRFS_PATH_AUTO_FREE(path); 1236 struct btrfs_key key; 1237 struct rb_node *node; 1238 int nr; 1239 int ret; 1240 1241 path = btrfs_alloc_path(); 1242 if (!path) 1243 return -ENOMEM; 1244 1245 key.objectid = 0; 1246 key.type = 0; 1247 key.offset = 0; 1248 1249 while (1) { 1250 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1251 if (ret < 0) 1252 return ret; 1253 1254 nr = btrfs_header_nritems(path->nodes[0]); 1255 if (!nr) 1256 break; 1257 1258 path->slots[0] = 0; 1259 ret = btrfs_del_items(trans, root, path, 0, nr); 1260 if (ret) 1261 return ret; 1262 1263 btrfs_release_path(path); 1264 } 1265 1266 node = rb_first_cached(&trans->fs_info->block_group_cache_tree); 1267 while (node) { 1268 struct btrfs_block_group *bg; 1269 1270 bg = rb_entry(node, struct btrfs_block_group, cache_node); 1271 clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); 1272 node = rb_next(node); 1273 cond_resched(); 1274 } 1275 1276 return 0; 1277 } 1278 1279 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info) 1280 { 1281 struct btrfs_trans_handle *trans; 1282 struct btrfs_root *tree_root = fs_info->tree_root; 1283 struct btrfs_key key = { 1284 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1285 .type = BTRFS_ROOT_ITEM_KEY, 1286 .offset = 0, 1287 }; 1288 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1289 int ret; 1290 1291 trans = btrfs_start_transaction(tree_root, 0); 1292 if (IS_ERR(trans)) 1293 return PTR_ERR(trans); 1294 1295 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1296 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1297 1298 ret = clear_free_space_tree(trans, free_space_root); 1299 if (unlikely(ret)) { 1300 btrfs_abort_transaction(trans, ret); 1301 btrfs_end_transaction(trans); 1302 return ret; 1303 } 1304 1305 ret = btrfs_del_root(trans, &free_space_root->root_key); 1306 if (unlikely(ret)) { 1307 btrfs_abort_transaction(trans, ret); 1308 btrfs_end_transaction(trans); 1309 return ret; 1310 } 1311 1312 btrfs_global_root_delete(free_space_root); 1313 1314 spin_lock(&fs_info->trans_lock); 1315 list_del(&free_space_root->dirty_list); 1316 spin_unlock(&fs_info->trans_lock); 1317 1318 btrfs_tree_lock(free_space_root->node); 1319 btrfs_clear_buffer_dirty(trans, free_space_root->node); 1320 btrfs_tree_unlock(free_space_root->node); 1321 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), 1322 free_space_root->node, 0, 1); 1323 btrfs_put_root(free_space_root); 1324 if (unlikely(ret < 0)) { 1325 btrfs_abort_transaction(trans, ret); 1326 btrfs_end_transaction(trans); 1327 return ret; 1328 } 1329 1330 return btrfs_commit_transaction(trans); 1331 } 1332 1333 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) 1334 { 1335 struct btrfs_trans_handle *trans; 1336 struct btrfs_key key = { 1337 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1338 .type = BTRFS_ROOT_ITEM_KEY, 1339 .offset = 0, 1340 }; 1341 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1342 struct rb_node *node; 1343 int ret; 1344 1345 trans = btrfs_start_transaction(free_space_root, 1); 1346 if (IS_ERR(trans)) 1347 return PTR_ERR(trans); 1348 1349 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1350 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1351 1352 ret = clear_free_space_tree(trans, free_space_root); 1353 if (unlikely(ret)) { 1354 btrfs_abort_transaction(trans, ret); 1355 btrfs_end_transaction(trans); 1356 return ret; 1357 } 1358 1359 node = rb_first_cached(&fs_info->block_group_cache_tree); 1360 while (node) { 1361 struct btrfs_block_group *block_group; 1362 1363 block_group = rb_entry(node, struct btrfs_block_group, 1364 cache_node); 1365 1366 if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, 1367 &block_group->runtime_flags)) 1368 goto next; 1369 1370 ret = populate_free_space_tree(trans, block_group); 1371 if (unlikely(ret)) { 1372 btrfs_abort_transaction(trans, ret); 1373 btrfs_end_transaction(trans); 1374 return ret; 1375 } 1376 next: 1377 if (btrfs_should_end_transaction(trans)) { 1378 btrfs_end_transaction(trans); 1379 trans = btrfs_start_transaction(free_space_root, 1); 1380 if (IS_ERR(trans)) 1381 return PTR_ERR(trans); 1382 } 1383 node = rb_next(node); 1384 } 1385 1386 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1387 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1388 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1389 1390 ret = btrfs_commit_transaction(trans); 1391 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1392 return ret; 1393 } 1394 1395 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 1396 struct btrfs_block_group *block_group, 1397 struct btrfs_path *path) 1398 { 1399 bool own_path = false; 1400 int ret; 1401 1402 if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, 1403 &block_group->runtime_flags)) 1404 return 0; 1405 1406 /* 1407 * While rebuilding the free space tree we may allocate new metadata 1408 * block groups while modifying the free space tree. 1409 * 1410 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we 1411 * can use multiple transactions, every time btrfs_end_transaction() is 1412 * called at btrfs_rebuild_free_space_tree() we finish the creation of 1413 * new block groups by calling btrfs_create_pending_block_groups(), and 1414 * that in turn calls us, through btrfs_add_block_group_free_space(), 1415 * to add a free space info item and a free space extent item for the 1416 * block group. 1417 * 1418 * Then later btrfs_rebuild_free_space_tree() may find such new block 1419 * groups and processes them with populate_free_space_tree(), which can 1420 * fail with EEXIST since there are already items for the block group in 1421 * the free space tree. Notice that we say "may find" because a new 1422 * block group may be added to the block groups rbtree in a node before 1423 * or after the block group currently being processed by the rebuild 1424 * process. So signal the rebuild process to skip such new block groups 1425 * if it finds them. 1426 */ 1427 set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); 1428 1429 if (!path) { 1430 path = btrfs_alloc_path(); 1431 if (unlikely(!path)) { 1432 btrfs_abort_transaction(trans, -ENOMEM); 1433 return -ENOMEM; 1434 } 1435 own_path = true; 1436 } 1437 1438 ret = add_new_free_space_info(trans, block_group, path); 1439 if (unlikely(ret)) { 1440 btrfs_abort_transaction(trans, ret); 1441 goto out; 1442 } 1443 1444 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, 1445 block_group->start, block_group->length); 1446 if (ret) 1447 btrfs_abort_transaction(trans, ret); 1448 1449 out: 1450 if (own_path) 1451 btrfs_free_path(path); 1452 1453 return ret; 1454 } 1455 1456 int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans, 1457 struct btrfs_block_group *block_group) 1458 { 1459 int ret; 1460 1461 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1462 return 0; 1463 1464 mutex_lock(&block_group->free_space_lock); 1465 ret = __add_block_group_free_space(trans, block_group, NULL); 1466 mutex_unlock(&block_group->free_space_lock); 1467 return ret; 1468 } 1469 1470 int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, 1471 struct btrfs_block_group *block_group) 1472 { 1473 struct btrfs_root *root = btrfs_free_space_root(block_group); 1474 BTRFS_PATH_AUTO_FREE(path); 1475 struct btrfs_key key, found_key; 1476 struct extent_buffer *leaf; 1477 u64 start, end; 1478 bool done = false; 1479 int nr; 1480 int ret; 1481 1482 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1483 return 0; 1484 1485 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { 1486 /* We never added this block group to the free space tree. */ 1487 return 0; 1488 } 1489 1490 path = btrfs_alloc_path(); 1491 if (unlikely(!path)) { 1492 ret = -ENOMEM; 1493 btrfs_abort_transaction(trans, ret); 1494 return ret; 1495 } 1496 1497 start = block_group->start; 1498 end = btrfs_block_group_end(block_group); 1499 1500 key.objectid = end - 1; 1501 key.type = (u8)-1; 1502 key.offset = (u64)-1; 1503 1504 while (!done) { 1505 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 1506 if (unlikely(ret)) { 1507 btrfs_abort_transaction(trans, ret); 1508 return ret; 1509 } 1510 1511 leaf = path->nodes[0]; 1512 nr = 0; 1513 path->slots[0]++; 1514 while (path->slots[0] > 0) { 1515 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 1516 1517 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 1518 ASSERT(found_key.objectid == block_group->start); 1519 ASSERT(found_key.offset == block_group->length); 1520 done = true; 1521 nr++; 1522 path->slots[0]--; 1523 break; 1524 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY || 1525 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 1526 ASSERT(found_key.objectid >= start); 1527 ASSERT(found_key.objectid < end); 1528 ASSERT(found_key.objectid + found_key.offset <= end); 1529 nr++; 1530 path->slots[0]--; 1531 } else { 1532 btrfs_err(trans->fs_info, "unexpected free space tree key type %u", 1533 found_key.type); 1534 ret = -EUCLEAN; 1535 btrfs_abort_transaction(trans, ret); 1536 return ret; 1537 } 1538 } 1539 1540 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 1541 if (unlikely(ret)) { 1542 btrfs_abort_transaction(trans, ret); 1543 return ret; 1544 } 1545 btrfs_release_path(path); 1546 } 1547 1548 return 0; 1549 } 1550 1551 static int validate_free_space_key(struct btrfs_block_group *block_group, 1552 const struct btrfs_key *key, u8 expected_type) 1553 { 1554 const u64 end = btrfs_block_group_end(block_group); 1555 1556 if (unlikely(key->type != expected_type)) { 1557 btrfs_err(block_group->fs_info, 1558 "block group %llu has unexpected free space key type %u, expected %u", 1559 block_group->start, key->type, expected_type); 1560 return -EUCLEAN; 1561 } 1562 1563 if (unlikely(key->objectid + key->offset > end)) { 1564 btrfs_err(block_group->fs_info, 1565 "block group %llu has invalid free space key (%llu %u %llu)", 1566 block_group->start, key->objectid, key->type, 1567 key->offset); 1568 return -EUCLEAN; 1569 } 1570 1571 return 0; 1572 } 1573 1574 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, 1575 struct btrfs_path *path, 1576 u32 expected_extent_count) 1577 { 1578 struct btrfs_block_group *block_group = caching_ctl->block_group; 1579 struct btrfs_fs_info *fs_info = block_group->fs_info; 1580 struct btrfs_root *root; 1581 struct btrfs_key key; 1582 bool prev_bit_set = false; 1583 /* Initialize to silence GCC. */ 1584 u64 extent_start = 0; 1585 const u64 end = btrfs_block_group_end(block_group); 1586 u64 offset; 1587 u64 total_found = 0; 1588 u32 extent_count = 0; 1589 int ret; 1590 1591 root = btrfs_free_space_root(block_group); 1592 1593 while (1) { 1594 ret = btrfs_next_item(root, path); 1595 if (ret < 0) 1596 return ret; 1597 if (ret) 1598 break; 1599 1600 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1601 1602 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1603 break; 1604 1605 ret = validate_free_space_key(block_group, &key, BTRFS_FREE_SPACE_BITMAP_KEY); 1606 if (unlikely(ret)) 1607 return ret; 1608 1609 offset = key.objectid; 1610 while (offset < key.objectid + key.offset) { 1611 bool bit_set; 1612 1613 bit_set = btrfs_free_space_test_bit(block_group, path, offset); 1614 if (!prev_bit_set && bit_set) { 1615 extent_start = offset; 1616 } else if (prev_bit_set && !bit_set) { 1617 u64 space_added; 1618 1619 ret = btrfs_add_new_free_space(block_group, 1620 extent_start, 1621 offset, 1622 &space_added); 1623 if (ret) 1624 return ret; 1625 total_found += space_added; 1626 if (total_found > CACHING_CTL_WAKE_UP) { 1627 total_found = 0; 1628 wake_up(&caching_ctl->wait); 1629 } 1630 extent_count++; 1631 } 1632 prev_bit_set = bit_set; 1633 offset += fs_info->sectorsize; 1634 } 1635 } 1636 if (prev_bit_set) { 1637 ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL); 1638 if (ret) 1639 return ret; 1640 extent_count++; 1641 } 1642 1643 if (unlikely(extent_count != expected_extent_count)) { 1644 btrfs_err(fs_info, 1645 "incorrect extent count for %llu; counted %u, expected %u", 1646 block_group->start, extent_count, 1647 expected_extent_count); 1648 DEBUG_WARN(); 1649 return -EIO; 1650 } 1651 1652 return 0; 1653 } 1654 1655 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, 1656 struct btrfs_path *path, 1657 u32 expected_extent_count) 1658 { 1659 struct btrfs_block_group *block_group = caching_ctl->block_group; 1660 struct btrfs_fs_info *fs_info = block_group->fs_info; 1661 struct btrfs_root *root; 1662 struct btrfs_key key; 1663 u64 total_found = 0; 1664 u32 extent_count = 0; 1665 int ret; 1666 1667 root = btrfs_free_space_root(block_group); 1668 1669 while (1) { 1670 u64 space_added; 1671 1672 ret = btrfs_next_item(root, path); 1673 if (ret < 0) 1674 return ret; 1675 if (ret) 1676 break; 1677 1678 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1679 1680 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1681 break; 1682 1683 ret = validate_free_space_key(block_group, &key, BTRFS_FREE_SPACE_EXTENT_KEY); 1684 if (unlikely(ret)) 1685 return ret; 1686 1687 ret = btrfs_add_new_free_space(block_group, key.objectid, 1688 key.objectid + key.offset, 1689 &space_added); 1690 if (ret) 1691 return ret; 1692 total_found += space_added; 1693 if (total_found > CACHING_CTL_WAKE_UP) { 1694 total_found = 0; 1695 wake_up(&caching_ctl->wait); 1696 } 1697 extent_count++; 1698 } 1699 1700 if (unlikely(extent_count != expected_extent_count)) { 1701 btrfs_err(fs_info, 1702 "incorrect extent count for %llu; counted %u, expected %u", 1703 block_group->start, extent_count, 1704 expected_extent_count); 1705 DEBUG_WARN(); 1706 return -EIO; 1707 } 1708 1709 return 0; 1710 } 1711 1712 int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl) 1713 { 1714 struct btrfs_block_group *block_group; 1715 struct btrfs_free_space_info *info; 1716 BTRFS_PATH_AUTO_FREE(path); 1717 u32 extent_count, flags; 1718 1719 block_group = caching_ctl->block_group; 1720 1721 path = btrfs_alloc_path(); 1722 if (!path) 1723 return -ENOMEM; 1724 1725 /* 1726 * Just like caching_thread() doesn't want to deadlock on the extent 1727 * tree, we don't want to deadlock on the free space tree. 1728 */ 1729 path->skip_locking = true; 1730 path->search_commit_root = true; 1731 path->reada = READA_FORWARD; 1732 1733 info = btrfs_search_free_space_info(NULL, block_group, path, 0); 1734 if (IS_ERR(info)) 1735 return PTR_ERR(info); 1736 1737 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 1738 flags = btrfs_free_space_flags(path->nodes[0], info); 1739 1740 /* 1741 * We left path pointing to the free space info item, so now 1742 * load_free_space_foo can just iterate through the free space tree from 1743 * there. 1744 */ 1745 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) 1746 return load_free_space_bitmaps(caching_ctl, path, extent_count); 1747 else 1748 return load_free_space_extents(caching_ctl, path, extent_count); 1749 } 1750 1751 static int delete_orphan_free_space_entries(struct btrfs_root *fst_root, 1752 struct btrfs_path *path, 1753 u64 first_bg_bytenr) 1754 { 1755 struct btrfs_trans_handle *trans; 1756 int ret; 1757 1758 trans = btrfs_start_transaction(fst_root, 1); 1759 if (IS_ERR(trans)) 1760 return PTR_ERR(trans); 1761 1762 while (true) { 1763 struct btrfs_key key = { 0 }; 1764 int i; 1765 1766 ret = btrfs_search_slot(trans, fst_root, &key, path, -1, 1); 1767 if (ret < 0) 1768 break; 1769 ASSERT(ret > 0); 1770 ret = 0; 1771 for (i = 0; i < btrfs_header_nritems(path->nodes[0]); i++) { 1772 btrfs_item_key_to_cpu(path->nodes[0], &key, i); 1773 if (key.objectid >= first_bg_bytenr) { 1774 /* 1775 * Only break the for() loop and continue to 1776 * delete items. 1777 */ 1778 break; 1779 } 1780 } 1781 /* No items to delete, finished. */ 1782 if (i == 0) 1783 break; 1784 1785 ret = btrfs_del_items(trans, fst_root, path, 0, i); 1786 if (ret < 0) 1787 break; 1788 btrfs_release_path(path); 1789 } 1790 btrfs_release_path(path); 1791 btrfs_end_transaction(trans); 1792 if (ret == 0) 1793 btrfs_info(fst_root->fs_info, "deleted orphan free space tree entries"); 1794 return ret; 1795 } 1796 1797 /* Remove any free space entry before the first block group. */ 1798 int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info) 1799 { 1800 BTRFS_PATH_AUTO_RELEASE(path); 1801 struct btrfs_key key = { 1802 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1803 .type = BTRFS_ROOT_ITEM_KEY, 1804 .offset = 0, 1805 }; 1806 struct btrfs_root *root; 1807 struct btrfs_block_group *bg; 1808 u64 first_bg_bytenr; 1809 int ret; 1810 1811 /* 1812 * Extent tree v2 has multiple global roots based on the block group. 1813 * This means we cannot easily grab the global free space tree and locate 1814 * orphan items. Furthermore this is still experimental, all users 1815 * should use the latest btrfs-progs anyway. 1816 */ 1817 if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) 1818 return 0; 1819 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) 1820 return 0; 1821 root = btrfs_global_root(fs_info, &key); 1822 if (!root) 1823 return 0; 1824 1825 key.objectid = 0; 1826 key.type = 0; 1827 key.offset = 0; 1828 1829 bg = btrfs_lookup_first_block_group(fs_info, 0); 1830 if (unlikely(!bg)) { 1831 btrfs_err(fs_info, "no block group found"); 1832 return -EUCLEAN; 1833 } 1834 first_bg_bytenr = bg->start; 1835 btrfs_put_block_group(bg); 1836 1837 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); 1838 if (ret < 0) 1839 return ret; 1840 /* There should not be an all-zero key in fst. */ 1841 ASSERT(ret > 0); 1842 1843 /* Empty free space tree. */ 1844 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) 1845 return 0; 1846 1847 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); 1848 if (key.objectid >= first_bg_bytenr) 1849 return 0; 1850 btrfs_release_path(&path); 1851 return delete_orphan_free_space_entries(root, &path, first_bg_bytenr); 1852 } 1853