1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2015 Facebook. All rights reserved. 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/sched/mm.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "free-space-tree.h" 13 #include "transaction.h" 14 #include "block-group.h" 15 #include "fs.h" 16 #include "accessors.h" 17 #include "extent-tree.h" 18 #include "root-tree.h" 19 20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 21 struct btrfs_block_group *block_group, 22 struct btrfs_path *path); 23 24 struct btrfs_root *btrfs_free_space_root(struct btrfs_block_group *block_group) 25 { 26 struct btrfs_key key = { 27 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 28 .type = BTRFS_ROOT_ITEM_KEY, 29 .offset = 0, 30 }; 31 32 if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2)) 33 key.offset = block_group->global_root_id; 34 return btrfs_global_root(block_group->fs_info, &key); 35 } 36 37 void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache) 38 { 39 u32 bitmap_range; 40 size_t bitmap_size; 41 u64 num_bitmaps, total_bitmap_size; 42 43 if (WARN_ON(cache->length == 0)) 44 btrfs_warn(cache->fs_info, "block group %llu length is zero", 45 cache->start); 46 47 /* 48 * We convert to bitmaps when the disk space required for using extents 49 * exceeds that required for using bitmaps. 50 */ 51 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 52 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range); 53 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE; 54 total_bitmap_size = num_bitmaps * bitmap_size; 55 cache->bitmap_high_thresh = div_u64(total_bitmap_size, 56 sizeof(struct btrfs_item)); 57 58 /* 59 * We allow for a small buffer between the high threshold and low 60 * threshold to avoid thrashing back and forth between the two formats. 61 */ 62 if (cache->bitmap_high_thresh > 100) 63 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100; 64 else 65 cache->bitmap_low_thresh = 0; 66 } 67 68 static int add_new_free_space_info(struct btrfs_trans_handle *trans, 69 struct btrfs_block_group *block_group, 70 struct btrfs_path *path) 71 { 72 struct btrfs_root *root = btrfs_free_space_root(block_group); 73 struct btrfs_free_space_info *info; 74 struct btrfs_key key; 75 struct extent_buffer *leaf; 76 int ret; 77 78 key.objectid = block_group->start; 79 key.type = BTRFS_FREE_SPACE_INFO_KEY; 80 key.offset = block_group->length; 81 82 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info)); 83 if (ret) 84 return ret; 85 86 leaf = path->nodes[0]; 87 info = btrfs_item_ptr(leaf, path->slots[0], 88 struct btrfs_free_space_info); 89 btrfs_set_free_space_extent_count(leaf, info, 0); 90 btrfs_set_free_space_flags(leaf, info, 0); 91 btrfs_release_path(path); 92 return 0; 93 } 94 95 struct btrfs_free_space_info *btrfs_search_free_space_info( 96 struct btrfs_trans_handle *trans, 97 struct btrfs_block_group *block_group, 98 struct btrfs_path *path, int cow) 99 { 100 struct btrfs_fs_info *fs_info = block_group->fs_info; 101 struct btrfs_root *root = btrfs_free_space_root(block_group); 102 struct btrfs_key key; 103 int ret; 104 105 key.objectid = block_group->start; 106 key.type = BTRFS_FREE_SPACE_INFO_KEY; 107 key.offset = block_group->length; 108 109 ret = btrfs_search_slot(trans, root, &key, path, 0, cow); 110 if (ret < 0) 111 return ERR_PTR(ret); 112 if (ret != 0) { 113 btrfs_warn(fs_info, "missing free space info for %llu", 114 block_group->start); 115 DEBUG_WARN(); 116 return ERR_PTR(-ENOENT); 117 } 118 119 return btrfs_item_ptr(path->nodes[0], path->slots[0], 120 struct btrfs_free_space_info); 121 } 122 123 /* 124 * btrfs_search_slot() but we're looking for the greatest key less than the 125 * passed key. 126 */ 127 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans, 128 struct btrfs_root *root, 129 struct btrfs_key *key, struct btrfs_path *p, 130 int ins_len, int cow) 131 { 132 int ret; 133 134 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow); 135 if (ret < 0) 136 return ret; 137 138 if (unlikely(ret == 0)) { 139 DEBUG_WARN(); 140 return -EIO; 141 } 142 143 if (unlikely(p->slots[0] == 0)) { 144 DEBUG_WARN("no previous slot found"); 145 return -EIO; 146 } 147 p->slots[0]--; 148 149 return 0; 150 } 151 152 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info, 153 u64 size) 154 { 155 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE); 156 } 157 158 static unsigned long *alloc_bitmap(u32 bitmap_size) 159 { 160 unsigned long *ret; 161 unsigned int nofs_flag; 162 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long)); 163 164 /* 165 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse 166 * into the filesystem here. All callers hold a transaction handle 167 * open, so if a GFP_KERNEL allocation recurses into the filesystem 168 * and triggers a transaction commit, we would deadlock. 169 */ 170 nofs_flag = memalloc_nofs_save(); 171 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL); 172 memalloc_nofs_restore(nofs_flag); 173 return ret; 174 } 175 176 static void le_bitmap_set(unsigned long *map, unsigned int start, int len) 177 { 178 u8 *p = ((u8 *)map) + BIT_BYTE(start); 179 const unsigned int size = start + len; 180 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE); 181 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start); 182 183 while (len - bits_to_set >= 0) { 184 *p |= mask_to_set; 185 len -= bits_to_set; 186 bits_to_set = BITS_PER_BYTE; 187 mask_to_set = ~0; 188 p++; 189 } 190 if (len) { 191 mask_to_set &= BITMAP_LAST_BYTE_MASK(size); 192 *p |= mask_to_set; 193 } 194 } 195 196 EXPORT_FOR_TESTS 197 int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, 198 struct btrfs_block_group *block_group, 199 struct btrfs_path *path) 200 { 201 struct btrfs_fs_info *fs_info = trans->fs_info; 202 struct btrfs_root *root = btrfs_free_space_root(block_group); 203 struct btrfs_free_space_info *info; 204 struct btrfs_key key, found_key; 205 struct extent_buffer *leaf; 206 unsigned long *bitmap; 207 char *bitmap_cursor; 208 u64 start, end; 209 u64 bitmap_range, i; 210 u32 bitmap_size, flags, expected_extent_count; 211 u32 extent_count = 0; 212 int done = 0, nr; 213 int ret; 214 215 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 216 bitmap = alloc_bitmap(bitmap_size); 217 if (unlikely(!bitmap)) 218 return 0; 219 220 start = block_group->start; 221 end = btrfs_block_group_end(block_group); 222 223 key.objectid = end - 1; 224 key.type = (u8)-1; 225 key.offset = (u64)-1; 226 227 while (!done) { 228 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 229 if (unlikely(ret)) { 230 btrfs_abort_transaction(trans, ret); 231 goto out; 232 } 233 234 leaf = path->nodes[0]; 235 nr = 0; 236 path->slots[0]++; 237 while (path->slots[0] > 0) { 238 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 239 240 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 241 ASSERT(found_key.objectid == block_group->start); 242 ASSERT(found_key.offset == block_group->length); 243 done = 1; 244 break; 245 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) { 246 u64 first, last; 247 248 ASSERT(found_key.objectid >= start); 249 ASSERT(found_key.objectid < end); 250 ASSERT(found_key.objectid + found_key.offset <= end); 251 252 first = div_u64(found_key.objectid - start, 253 fs_info->sectorsize); 254 last = div_u64(found_key.objectid + found_key.offset - start, 255 fs_info->sectorsize); 256 le_bitmap_set(bitmap, first, last - first); 257 258 extent_count++; 259 nr++; 260 path->slots[0]--; 261 } else { 262 ASSERT(0); 263 } 264 } 265 266 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 267 if (unlikely(ret)) { 268 btrfs_abort_transaction(trans, ret); 269 goto out; 270 } 271 btrfs_release_path(path); 272 } 273 274 info = btrfs_search_free_space_info(trans, block_group, path, 1); 275 if (IS_ERR(info)) { 276 ret = PTR_ERR(info); 277 btrfs_abort_transaction(trans, ret); 278 goto out; 279 } 280 leaf = path->nodes[0]; 281 flags = btrfs_free_space_flags(leaf, info); 282 flags |= BTRFS_FREE_SPACE_USING_BITMAPS; 283 block_group->using_free_space_bitmaps = true; 284 block_group->using_free_space_bitmaps_cached = true; 285 btrfs_set_free_space_flags(leaf, info, flags); 286 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 287 btrfs_release_path(path); 288 289 if (unlikely(extent_count != expected_extent_count)) { 290 btrfs_err(fs_info, 291 "incorrect extent count for %llu; counted %u, expected %u", 292 block_group->start, extent_count, 293 expected_extent_count); 294 ret = -EIO; 295 btrfs_abort_transaction(trans, ret); 296 goto out; 297 } 298 299 bitmap_cursor = (char *)bitmap; 300 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 301 i = start; 302 while (i < end) { 303 unsigned long ptr; 304 u64 extent_size; 305 u32 data_size; 306 307 extent_size = min(end - i, bitmap_range); 308 data_size = free_space_bitmap_size(fs_info, extent_size); 309 310 key.objectid = i; 311 key.type = BTRFS_FREE_SPACE_BITMAP_KEY; 312 key.offset = extent_size; 313 314 ret = btrfs_insert_empty_item(trans, root, path, &key, 315 data_size); 316 if (unlikely(ret)) { 317 btrfs_abort_transaction(trans, ret); 318 goto out; 319 } 320 321 leaf = path->nodes[0]; 322 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 323 write_extent_buffer(leaf, bitmap_cursor, ptr, 324 data_size); 325 btrfs_release_path(path); 326 327 i += extent_size; 328 bitmap_cursor += data_size; 329 } 330 331 ret = 0; 332 out: 333 kvfree(bitmap); 334 return ret; 335 } 336 337 EXPORT_FOR_TESTS 338 int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans, 339 struct btrfs_block_group *block_group, 340 struct btrfs_path *path) 341 { 342 struct btrfs_fs_info *fs_info = trans->fs_info; 343 struct btrfs_root *root = btrfs_free_space_root(block_group); 344 struct btrfs_free_space_info *info; 345 struct btrfs_key key, found_key; 346 struct extent_buffer *leaf; 347 unsigned long *bitmap; 348 u64 start, end; 349 u32 bitmap_size, flags, expected_extent_count; 350 unsigned long nrbits, start_bit, end_bit; 351 u32 extent_count = 0; 352 int done = 0, nr; 353 int ret; 354 355 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 356 bitmap = alloc_bitmap(bitmap_size); 357 if (unlikely(!bitmap)) 358 return 0; 359 360 start = block_group->start; 361 end = btrfs_block_group_end(block_group); 362 363 key.objectid = end - 1; 364 key.type = (u8)-1; 365 key.offset = (u64)-1; 366 367 while (!done) { 368 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 369 if (unlikely(ret)) { 370 btrfs_abort_transaction(trans, ret); 371 goto out; 372 } 373 374 leaf = path->nodes[0]; 375 nr = 0; 376 path->slots[0]++; 377 while (path->slots[0] > 0) { 378 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 379 380 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 381 ASSERT(found_key.objectid == block_group->start); 382 ASSERT(found_key.offset == block_group->length); 383 done = 1; 384 break; 385 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 386 unsigned long ptr; 387 char *bitmap_cursor; 388 u32 bitmap_pos, data_size; 389 390 ASSERT(found_key.objectid >= start); 391 ASSERT(found_key.objectid < end); 392 ASSERT(found_key.objectid + found_key.offset <= end); 393 394 bitmap_pos = div_u64(found_key.objectid - start, 395 fs_info->sectorsize * 396 BITS_PER_BYTE); 397 bitmap_cursor = ((char *)bitmap) + bitmap_pos; 398 data_size = free_space_bitmap_size(fs_info, 399 found_key.offset); 400 401 path->slots[0]--; 402 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 403 read_extent_buffer(leaf, bitmap_cursor, ptr, 404 data_size); 405 406 nr++; 407 } else { 408 ASSERT(0); 409 } 410 } 411 412 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 413 if (unlikely(ret)) { 414 btrfs_abort_transaction(trans, ret); 415 goto out; 416 } 417 btrfs_release_path(path); 418 } 419 420 info = btrfs_search_free_space_info(trans, block_group, path, 1); 421 if (IS_ERR(info)) { 422 ret = PTR_ERR(info); 423 btrfs_abort_transaction(trans, ret); 424 goto out; 425 } 426 leaf = path->nodes[0]; 427 flags = btrfs_free_space_flags(leaf, info); 428 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; 429 block_group->using_free_space_bitmaps = false; 430 block_group->using_free_space_bitmaps_cached = true; 431 btrfs_set_free_space_flags(leaf, info, flags); 432 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 433 btrfs_release_path(path); 434 435 nrbits = block_group->length >> fs_info->sectorsize_bits; 436 start_bit = find_next_bit_le(bitmap, nrbits, 0); 437 438 while (start_bit < nrbits) { 439 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit); 440 ASSERT(start_bit < end_bit); 441 442 key.objectid = start + start_bit * fs_info->sectorsize; 443 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 444 key.offset = (end_bit - start_bit) * fs_info->sectorsize; 445 446 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 447 if (unlikely(ret)) { 448 btrfs_abort_transaction(trans, ret); 449 goto out; 450 } 451 btrfs_release_path(path); 452 453 extent_count++; 454 455 start_bit = find_next_bit_le(bitmap, nrbits, end_bit); 456 } 457 458 if (unlikely(extent_count != expected_extent_count)) { 459 btrfs_err(fs_info, 460 "incorrect extent count for %llu; counted %u, expected %u", 461 block_group->start, extent_count, 462 expected_extent_count); 463 ret = -EIO; 464 btrfs_abort_transaction(trans, ret); 465 goto out; 466 } 467 468 ret = 0; 469 out: 470 kvfree(bitmap); 471 return ret; 472 } 473 474 static int update_free_space_extent_count(struct btrfs_trans_handle *trans, 475 struct btrfs_block_group *block_group, 476 struct btrfs_path *path, 477 int new_extents) 478 { 479 struct btrfs_free_space_info *info; 480 u32 flags; 481 u32 extent_count; 482 int ret = 0; 483 484 if (new_extents == 0) 485 return 0; 486 487 info = btrfs_search_free_space_info(trans, block_group, path, 1); 488 if (IS_ERR(info)) 489 return PTR_ERR(info); 490 491 flags = btrfs_free_space_flags(path->nodes[0], info); 492 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 493 494 extent_count += new_extents; 495 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count); 496 btrfs_release_path(path); 497 498 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 499 extent_count > block_group->bitmap_high_thresh) { 500 ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path); 501 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 502 extent_count < block_group->bitmap_low_thresh) { 503 ret = btrfs_convert_free_space_to_extents(trans, block_group, path); 504 } 505 506 return ret; 507 } 508 509 EXPORT_FOR_TESTS 510 bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group, 511 struct btrfs_path *path, u64 offset) 512 { 513 struct extent_buffer *leaf; 514 struct btrfs_key key; 515 u64 found_start, found_end; 516 unsigned long ptr, i; 517 518 leaf = path->nodes[0]; 519 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 520 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 521 522 found_start = key.objectid; 523 found_end = key.objectid + key.offset; 524 ASSERT(offset >= found_start && offset < found_end); 525 526 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 527 i = div_u64(offset - found_start, 528 block_group->fs_info->sectorsize); 529 return extent_buffer_test_bit(leaf, ptr, i); 530 } 531 532 static void free_space_modify_bits(struct btrfs_trans_handle *trans, 533 struct btrfs_block_group *block_group, 534 struct btrfs_path *path, u64 *start, u64 *size, 535 bool set_bits) 536 { 537 struct btrfs_fs_info *fs_info = block_group->fs_info; 538 struct extent_buffer *leaf; 539 struct btrfs_key key; 540 u64 end = *start + *size; 541 u64 found_start, found_end; 542 unsigned long ptr, first, last; 543 544 leaf = path->nodes[0]; 545 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 546 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 547 548 found_start = key.objectid; 549 found_end = key.objectid + key.offset; 550 ASSERT(*start >= found_start && *start < found_end); 551 ASSERT(end > found_start); 552 553 if (end > found_end) 554 end = found_end; 555 556 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 557 first = (*start - found_start) >> fs_info->sectorsize_bits; 558 last = (end - found_start) >> fs_info->sectorsize_bits; 559 if (set_bits) 560 extent_buffer_bitmap_set(leaf, ptr, first, last - first); 561 else 562 extent_buffer_bitmap_clear(leaf, ptr, first, last - first); 563 btrfs_mark_buffer_dirty(trans, leaf); 564 565 *size -= end - *start; 566 *start = end; 567 } 568 569 /* 570 * We can't use btrfs_next_item() in modify_free_space_bitmap() because 571 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy 572 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're 573 * looking for. 574 */ 575 static int free_space_next_bitmap(struct btrfs_trans_handle *trans, 576 struct btrfs_root *root, struct btrfs_path *p) 577 { 578 struct btrfs_key key; 579 580 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) { 581 p->slots[0]++; 582 return 0; 583 } 584 585 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]); 586 btrfs_release_path(p); 587 588 key.objectid += key.offset; 589 key.type = (u8)-1; 590 key.offset = (u64)-1; 591 592 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1); 593 } 594 595 /* 596 * If remove is 1, then we are removing free space, thus clearing bits in the 597 * bitmap. If remove is 0, then we are adding free space, thus setting bits in 598 * the bitmap. 599 */ 600 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, 601 struct btrfs_block_group *block_group, 602 struct btrfs_path *path, 603 u64 start, u64 size, bool remove) 604 { 605 struct btrfs_root *root = btrfs_free_space_root(block_group); 606 struct btrfs_key key; 607 u64 end = start + size; 608 u64 cur_start, cur_size; 609 bool prev_bit_set = false; 610 bool next_bit_set = false; 611 int new_extents; 612 int ret; 613 614 /* 615 * Read the bit for the block immediately before the extent of space if 616 * that block is within the block group. 617 */ 618 if (start > block_group->start) { 619 u64 prev_block = start - block_group->fs_info->sectorsize; 620 621 key.objectid = prev_block; 622 key.type = (u8)-1; 623 key.offset = (u64)-1; 624 625 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 626 if (ret) 627 return ret; 628 629 prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block); 630 631 /* The previous block may have been in the previous bitmap. */ 632 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 633 if (start >= key.objectid + key.offset) { 634 ret = free_space_next_bitmap(trans, root, path); 635 if (ret) 636 return ret; 637 } 638 } else { 639 key.objectid = start; 640 key.type = (u8)-1; 641 key.offset = (u64)-1; 642 643 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 644 if (ret) 645 return ret; 646 } 647 648 /* 649 * Iterate over all of the bitmaps overlapped by the extent of space, 650 * clearing/setting bits as required. 651 */ 652 cur_start = start; 653 cur_size = size; 654 while (1) { 655 free_space_modify_bits(trans, block_group, path, &cur_start, 656 &cur_size, !remove); 657 if (cur_size == 0) 658 break; 659 ret = free_space_next_bitmap(trans, root, path); 660 if (ret) 661 return ret; 662 } 663 664 /* 665 * Read the bit for the block immediately after the extent of space if 666 * that block is within the block group. 667 */ 668 if (end < btrfs_block_group_end(block_group)) { 669 /* The next block may be in the next bitmap. */ 670 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 671 if (end >= key.objectid + key.offset) { 672 ret = free_space_next_bitmap(trans, root, path); 673 if (ret) 674 return ret; 675 } 676 677 next_bit_set = btrfs_free_space_test_bit(block_group, path, end); 678 } 679 680 if (remove) { 681 new_extents = -1; 682 if (prev_bit_set) { 683 /* Leftover on the left. */ 684 new_extents++; 685 } 686 if (next_bit_set) { 687 /* Leftover on the right. */ 688 new_extents++; 689 } 690 } else { 691 new_extents = 1; 692 if (prev_bit_set) { 693 /* Merging with neighbor on the left. */ 694 new_extents--; 695 } 696 if (next_bit_set) { 697 /* Merging with neighbor on the right. */ 698 new_extents--; 699 } 700 } 701 702 btrfs_release_path(path); 703 return update_free_space_extent_count(trans, block_group, path, new_extents); 704 } 705 706 static int remove_free_space_extent(struct btrfs_trans_handle *trans, 707 struct btrfs_block_group *block_group, 708 struct btrfs_path *path, 709 u64 start, u64 size) 710 { 711 struct btrfs_root *root = btrfs_free_space_root(block_group); 712 struct btrfs_key key; 713 u64 found_start, found_end; 714 u64 end = start + size; 715 int new_extents = -1; 716 int ret; 717 718 key.objectid = start; 719 key.type = (u8)-1; 720 key.offset = (u64)-1; 721 722 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 723 if (ret) 724 return ret; 725 726 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 727 728 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 729 730 found_start = key.objectid; 731 found_end = key.objectid + key.offset; 732 ASSERT(start >= found_start && end <= found_end); 733 734 /* 735 * Okay, now that we've found the free space extent which contains the 736 * free space that we are removing, there are four cases: 737 * 738 * 1. We're using the whole extent: delete the key we found and 739 * decrement the free space extent count. 740 * 2. We are using part of the extent starting at the beginning: delete 741 * the key we found and insert a new key representing the leftover at 742 * the end. There is no net change in the number of extents. 743 * 3. We are using part of the extent ending at the end: delete the key 744 * we found and insert a new key representing the leftover at the 745 * beginning. There is no net change in the number of extents. 746 * 4. We are using part of the extent in the middle: delete the key we 747 * found and insert two new keys representing the leftovers on each 748 * side. Where we used to have one extent, we now have two, so increment 749 * the extent count. We may need to convert the block group to bitmaps 750 * as a result. 751 */ 752 753 /* Delete the existing key (cases 1-4). */ 754 ret = btrfs_del_item(trans, root, path); 755 if (ret) 756 return ret; 757 758 /* Add a key for leftovers at the beginning (cases 3 and 4). */ 759 if (start > found_start) { 760 key.objectid = found_start; 761 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 762 key.offset = start - found_start; 763 764 btrfs_release_path(path); 765 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 766 if (ret) 767 return ret; 768 new_extents++; 769 } 770 771 /* Add a key for leftovers at the end (cases 2 and 4). */ 772 if (end < found_end) { 773 key.objectid = end; 774 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 775 key.offset = found_end - end; 776 777 btrfs_release_path(path); 778 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 779 if (ret) 780 return ret; 781 new_extents++; 782 } 783 784 btrfs_release_path(path); 785 return update_free_space_extent_count(trans, block_group, path, new_extents); 786 } 787 788 static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path) 789 { 790 struct btrfs_free_space_info *info; 791 u32 flags; 792 793 if (bg->using_free_space_bitmaps_cached) 794 return bg->using_free_space_bitmaps; 795 796 info = btrfs_search_free_space_info(NULL, bg, path, 0); 797 if (IS_ERR(info)) 798 return PTR_ERR(info); 799 flags = btrfs_free_space_flags(path->nodes[0], info); 800 btrfs_release_path(path); 801 802 bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS); 803 bg->using_free_space_bitmaps_cached = true; 804 805 return bg->using_free_space_bitmaps; 806 } 807 808 EXPORT_FOR_TESTS 809 int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 810 struct btrfs_block_group *block_group, 811 struct btrfs_path *path, u64 start, u64 size) 812 { 813 int ret; 814 815 ret = __add_block_group_free_space(trans, block_group, path); 816 if (ret) 817 return ret; 818 819 ret = using_bitmaps(block_group, path); 820 if (ret < 0) 821 return ret; 822 823 if (ret) 824 return modify_free_space_bitmap(trans, block_group, path, 825 start, size, true); 826 827 return remove_free_space_extent(trans, block_group, path, start, size); 828 } 829 830 int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 831 u64 start, u64 size) 832 { 833 struct btrfs_block_group *block_group; 834 BTRFS_PATH_AUTO_FREE(path); 835 int ret; 836 837 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 838 return 0; 839 840 path = btrfs_alloc_path(); 841 if (unlikely(!path)) { 842 ret = -ENOMEM; 843 btrfs_abort_transaction(trans, ret); 844 return ret; 845 } 846 847 block_group = btrfs_lookup_block_group(trans->fs_info, start); 848 if (unlikely(!block_group)) { 849 DEBUG_WARN("no block group found for start=%llu", start); 850 ret = -ENOENT; 851 btrfs_abort_transaction(trans, ret); 852 return ret; 853 } 854 855 mutex_lock(&block_group->free_space_lock); 856 ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size); 857 mutex_unlock(&block_group->free_space_lock); 858 if (ret) 859 btrfs_abort_transaction(trans, ret); 860 861 btrfs_put_block_group(block_group); 862 863 return ret; 864 } 865 866 static int add_free_space_extent(struct btrfs_trans_handle *trans, 867 struct btrfs_block_group *block_group, 868 struct btrfs_path *path, 869 u64 start, u64 size) 870 { 871 struct btrfs_root *root = btrfs_free_space_root(block_group); 872 struct btrfs_key key, new_key; 873 u64 found_start, found_end; 874 u64 end = start + size; 875 int new_extents = 1; 876 int ret; 877 878 /* 879 * We are adding a new extent of free space, but we need to merge 880 * extents. There are four cases here: 881 * 882 * 1. The new extent does not have any immediate neighbors to merge 883 * with: add the new key and increment the free space extent count. We 884 * may need to convert the block group to bitmaps as a result. 885 * 2. The new extent has an immediate neighbor before it: remove the 886 * previous key and insert a new key combining both of them. There is no 887 * net change in the number of extents. 888 * 3. The new extent has an immediate neighbor after it: remove the next 889 * key and insert a new key combining both of them. There is no net 890 * change in the number of extents. 891 * 4. The new extent has immediate neighbors on both sides: remove both 892 * of the keys and insert a new key combining all of them. Where we used 893 * to have two extents, we now have one, so decrement the extent count. 894 */ 895 896 new_key.objectid = start; 897 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 898 new_key.offset = size; 899 900 /* Search for a neighbor on the left. */ 901 if (start == block_group->start) 902 goto right; 903 key.objectid = start - 1; 904 key.type = (u8)-1; 905 key.offset = (u64)-1; 906 907 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 908 if (ret) 909 return ret; 910 911 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 912 913 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 914 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 915 btrfs_release_path(path); 916 goto right; 917 } 918 919 found_start = key.objectid; 920 found_end = key.objectid + key.offset; 921 ASSERT(found_start >= block_group->start && 922 found_end > block_group->start); 923 ASSERT(found_start < start && found_end <= start); 924 925 /* 926 * Delete the neighbor on the left and absorb it into the new key (cases 927 * 2 and 4). 928 */ 929 if (found_end == start) { 930 ret = btrfs_del_item(trans, root, path); 931 if (ret) 932 return ret; 933 new_key.objectid = found_start; 934 new_key.offset += key.offset; 935 new_extents--; 936 } 937 btrfs_release_path(path); 938 939 right: 940 /* Search for a neighbor on the right. */ 941 if (end == btrfs_block_group_end(block_group)) 942 goto insert; 943 key.objectid = end; 944 key.type = (u8)-1; 945 key.offset = (u64)-1; 946 947 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 948 if (ret) 949 return ret; 950 951 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 952 953 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 954 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 955 btrfs_release_path(path); 956 goto insert; 957 } 958 959 found_start = key.objectid; 960 found_end = key.objectid + key.offset; 961 ASSERT(found_start >= block_group->start && 962 found_end > block_group->start); 963 ASSERT((found_start < start && found_end <= start) || 964 (found_start >= end && found_end > end)); 965 966 /* 967 * Delete the neighbor on the right and absorb it into the new key 968 * (cases 3 and 4). 969 */ 970 if (found_start == end) { 971 ret = btrfs_del_item(trans, root, path); 972 if (ret) 973 return ret; 974 new_key.offset += key.offset; 975 new_extents--; 976 } 977 btrfs_release_path(path); 978 979 insert: 980 /* Insert the new key (cases 1-4). */ 981 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0); 982 if (ret) 983 return ret; 984 985 btrfs_release_path(path); 986 return update_free_space_extent_count(trans, block_group, path, new_extents); 987 } 988 989 EXPORT_FOR_TESTS 990 int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 991 struct btrfs_block_group *block_group, 992 struct btrfs_path *path, u64 start, u64 size) 993 { 994 int ret; 995 996 ret = __add_block_group_free_space(trans, block_group, path); 997 if (ret) 998 return ret; 999 1000 ret = using_bitmaps(block_group, path); 1001 if (ret < 0) 1002 return ret; 1003 1004 if (ret) 1005 return modify_free_space_bitmap(trans, block_group, path, 1006 start, size, false); 1007 1008 return add_free_space_extent(trans, block_group, path, start, size); 1009 } 1010 1011 int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 1012 u64 start, u64 size) 1013 { 1014 struct btrfs_block_group *block_group; 1015 BTRFS_PATH_AUTO_FREE(path); 1016 int ret; 1017 1018 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1019 return 0; 1020 1021 path = btrfs_alloc_path(); 1022 if (unlikely(!path)) { 1023 ret = -ENOMEM; 1024 btrfs_abort_transaction(trans, ret); 1025 return ret; 1026 } 1027 1028 block_group = btrfs_lookup_block_group(trans->fs_info, start); 1029 if (unlikely(!block_group)) { 1030 DEBUG_WARN("no block group found for start=%llu", start); 1031 ret = -ENOENT; 1032 btrfs_abort_transaction(trans, ret); 1033 return ret; 1034 } 1035 1036 mutex_lock(&block_group->free_space_lock); 1037 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size); 1038 mutex_unlock(&block_group->free_space_lock); 1039 if (ret) 1040 btrfs_abort_transaction(trans, ret); 1041 1042 btrfs_put_block_group(block_group); 1043 1044 return ret; 1045 } 1046 1047 /* 1048 * Populate the free space tree by walking the extent tree. Operations on the 1049 * extent tree that happen as a result of writes to the free space tree will go 1050 * through the normal add/remove hooks. 1051 */ 1052 static int populate_free_space_tree(struct btrfs_trans_handle *trans, 1053 struct btrfs_block_group *block_group) 1054 { 1055 struct btrfs_root *extent_root; 1056 BTRFS_PATH_AUTO_FREE(path); 1057 BTRFS_PATH_AUTO_FREE(path2); 1058 struct btrfs_key key; 1059 u64 start, end; 1060 int ret; 1061 1062 path = btrfs_alloc_path(); 1063 if (!path) 1064 return -ENOMEM; 1065 1066 path2 = btrfs_alloc_path(); 1067 if (!path2) 1068 return -ENOMEM; 1069 1070 path->reada = READA_FORWARD; 1071 1072 ret = add_new_free_space_info(trans, block_group, path2); 1073 if (ret) 1074 return ret; 1075 1076 extent_root = btrfs_extent_root(trans->fs_info, block_group->start); 1077 if (unlikely(!extent_root)) { 1078 btrfs_err(trans->fs_info, 1079 "missing extent root for block group at offset %llu", 1080 block_group->start); 1081 return -EUCLEAN; 1082 } 1083 1084 mutex_lock(&block_group->free_space_lock); 1085 1086 /* 1087 * Iterate through all of the extent and metadata items in this block 1088 * group, adding the free space between them and the free space at the 1089 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than 1090 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's 1091 * contained in. 1092 */ 1093 key.objectid = block_group->start; 1094 key.type = BTRFS_EXTENT_ITEM_KEY; 1095 key.offset = 0; 1096 1097 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); 1098 if (ret < 0) 1099 goto out_locked; 1100 /* 1101 * If ret is 1 (no key found), it means this is an empty block group, 1102 * without any extents allocated from it and there's no block group 1103 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree 1104 * because we are using the block group tree feature (so block group 1105 * items are stored in the block group tree) or this is a new block 1106 * group created in the current transaction and its block group item 1107 * was not yet inserted in the extent tree (that happens in 1108 * btrfs_create_pending_block_groups() -> insert_block_group_item()). 1109 * It also means there are no extents allocated for block groups with a 1110 * start offset beyond this block group's end offset (this is the last, 1111 * highest, block group). 1112 */ 1113 start = block_group->start; 1114 end = btrfs_block_group_end(block_group); 1115 while (ret == 0) { 1116 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1117 1118 if (key.type == BTRFS_EXTENT_ITEM_KEY || 1119 key.type == BTRFS_METADATA_ITEM_KEY) { 1120 if (key.objectid >= end) 1121 break; 1122 1123 if (start < key.objectid) { 1124 ret = __btrfs_add_to_free_space_tree(trans, 1125 block_group, 1126 path2, start, 1127 key.objectid - 1128 start); 1129 if (ret) 1130 goto out_locked; 1131 } 1132 start = key.objectid; 1133 if (key.type == BTRFS_METADATA_ITEM_KEY) 1134 start += trans->fs_info->nodesize; 1135 else 1136 start += key.offset; 1137 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 1138 if (key.objectid != block_group->start) 1139 break; 1140 } 1141 1142 ret = btrfs_next_item(extent_root, path); 1143 if (ret < 0) 1144 goto out_locked; 1145 } 1146 if (start < end) { 1147 ret = __btrfs_add_to_free_space_tree(trans, block_group, path2, 1148 start, end - start); 1149 if (ret) 1150 goto out_locked; 1151 } 1152 1153 ret = 0; 1154 out_locked: 1155 mutex_unlock(&block_group->free_space_lock); 1156 1157 return ret; 1158 } 1159 1160 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) 1161 { 1162 struct btrfs_trans_handle *trans; 1163 struct btrfs_root *tree_root = fs_info->tree_root; 1164 struct btrfs_root *free_space_root; 1165 struct btrfs_block_group *block_group; 1166 struct rb_node *node; 1167 int ret; 1168 1169 trans = btrfs_start_transaction(tree_root, 0); 1170 if (IS_ERR(trans)) 1171 return PTR_ERR(trans); 1172 1173 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1174 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1175 free_space_root = btrfs_create_tree(trans, 1176 BTRFS_FREE_SPACE_TREE_OBJECTID); 1177 if (IS_ERR(free_space_root)) { 1178 ret = PTR_ERR(free_space_root); 1179 btrfs_abort_transaction(trans, ret); 1180 btrfs_end_transaction(trans); 1181 goto out_clear; 1182 } 1183 ret = btrfs_global_root_insert(free_space_root); 1184 if (unlikely(ret)) { 1185 btrfs_put_root(free_space_root); 1186 btrfs_abort_transaction(trans, ret); 1187 btrfs_end_transaction(trans); 1188 goto out_clear; 1189 } 1190 1191 node = rb_first_cached(&fs_info->block_group_cache_tree); 1192 while (node) { 1193 block_group = rb_entry(node, struct btrfs_block_group, 1194 cache_node); 1195 ret = populate_free_space_tree(trans, block_group); 1196 if (unlikely(ret)) { 1197 btrfs_abort_transaction(trans, ret); 1198 btrfs_end_transaction(trans); 1199 goto out_clear; 1200 } 1201 node = rb_next(node); 1202 } 1203 1204 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1205 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1206 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1207 ret = btrfs_commit_transaction(trans); 1208 1209 /* 1210 * Now that we've committed the transaction any reading of our commit 1211 * root will be safe, so we can cache from the free space tree now. 1212 */ 1213 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1214 return ret; 1215 1216 out_clear: 1217 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1218 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1219 return ret; 1220 } 1221 1222 static int clear_free_space_tree(struct btrfs_trans_handle *trans, 1223 struct btrfs_root *root) 1224 { 1225 BTRFS_PATH_AUTO_FREE(path); 1226 struct btrfs_key key; 1227 struct rb_node *node; 1228 int nr; 1229 int ret; 1230 1231 path = btrfs_alloc_path(); 1232 if (!path) 1233 return -ENOMEM; 1234 1235 key.objectid = 0; 1236 key.type = 0; 1237 key.offset = 0; 1238 1239 while (1) { 1240 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1241 if (ret < 0) 1242 return ret; 1243 1244 nr = btrfs_header_nritems(path->nodes[0]); 1245 if (!nr) 1246 break; 1247 1248 path->slots[0] = 0; 1249 ret = btrfs_del_items(trans, root, path, 0, nr); 1250 if (ret) 1251 return ret; 1252 1253 btrfs_release_path(path); 1254 } 1255 1256 node = rb_first_cached(&trans->fs_info->block_group_cache_tree); 1257 while (node) { 1258 struct btrfs_block_group *bg; 1259 1260 bg = rb_entry(node, struct btrfs_block_group, cache_node); 1261 clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); 1262 node = rb_next(node); 1263 cond_resched(); 1264 } 1265 1266 return 0; 1267 } 1268 1269 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info) 1270 { 1271 struct btrfs_trans_handle *trans; 1272 struct btrfs_root *tree_root = fs_info->tree_root; 1273 struct btrfs_key key = { 1274 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1275 .type = BTRFS_ROOT_ITEM_KEY, 1276 .offset = 0, 1277 }; 1278 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1279 int ret; 1280 1281 trans = btrfs_start_transaction(tree_root, 0); 1282 if (IS_ERR(trans)) 1283 return PTR_ERR(trans); 1284 1285 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1286 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1287 1288 ret = clear_free_space_tree(trans, free_space_root); 1289 if (unlikely(ret)) { 1290 btrfs_abort_transaction(trans, ret); 1291 btrfs_end_transaction(trans); 1292 return ret; 1293 } 1294 1295 ret = btrfs_del_root(trans, &free_space_root->root_key); 1296 if (unlikely(ret)) { 1297 btrfs_abort_transaction(trans, ret); 1298 btrfs_end_transaction(trans); 1299 return ret; 1300 } 1301 1302 btrfs_global_root_delete(free_space_root); 1303 1304 spin_lock(&fs_info->trans_lock); 1305 list_del(&free_space_root->dirty_list); 1306 spin_unlock(&fs_info->trans_lock); 1307 1308 btrfs_tree_lock(free_space_root->node); 1309 btrfs_clear_buffer_dirty(trans, free_space_root->node); 1310 btrfs_tree_unlock(free_space_root->node); 1311 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), 1312 free_space_root->node, 0, 1); 1313 btrfs_put_root(free_space_root); 1314 if (unlikely(ret < 0)) { 1315 btrfs_abort_transaction(trans, ret); 1316 btrfs_end_transaction(trans); 1317 return ret; 1318 } 1319 1320 return btrfs_commit_transaction(trans); 1321 } 1322 1323 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) 1324 { 1325 struct btrfs_trans_handle *trans; 1326 struct btrfs_key key = { 1327 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1328 .type = BTRFS_ROOT_ITEM_KEY, 1329 .offset = 0, 1330 }; 1331 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1332 struct rb_node *node; 1333 int ret; 1334 1335 trans = btrfs_start_transaction(free_space_root, 1); 1336 if (IS_ERR(trans)) 1337 return PTR_ERR(trans); 1338 1339 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1340 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1341 1342 ret = clear_free_space_tree(trans, free_space_root); 1343 if (unlikely(ret)) { 1344 btrfs_abort_transaction(trans, ret); 1345 btrfs_end_transaction(trans); 1346 return ret; 1347 } 1348 1349 node = rb_first_cached(&fs_info->block_group_cache_tree); 1350 while (node) { 1351 struct btrfs_block_group *block_group; 1352 1353 block_group = rb_entry(node, struct btrfs_block_group, 1354 cache_node); 1355 1356 if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, 1357 &block_group->runtime_flags)) 1358 goto next; 1359 1360 ret = populate_free_space_tree(trans, block_group); 1361 if (unlikely(ret)) { 1362 btrfs_abort_transaction(trans, ret); 1363 btrfs_end_transaction(trans); 1364 return ret; 1365 } 1366 next: 1367 if (btrfs_should_end_transaction(trans)) { 1368 btrfs_end_transaction(trans); 1369 trans = btrfs_start_transaction(free_space_root, 1); 1370 if (IS_ERR(trans)) 1371 return PTR_ERR(trans); 1372 } 1373 node = rb_next(node); 1374 } 1375 1376 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1377 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1378 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1379 1380 ret = btrfs_commit_transaction(trans); 1381 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1382 return ret; 1383 } 1384 1385 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 1386 struct btrfs_block_group *block_group, 1387 struct btrfs_path *path) 1388 { 1389 bool own_path = false; 1390 int ret; 1391 1392 if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, 1393 &block_group->runtime_flags)) 1394 return 0; 1395 1396 /* 1397 * While rebuilding the free space tree we may allocate new metadata 1398 * block groups while modifying the free space tree. 1399 * 1400 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we 1401 * can use multiple transactions, every time btrfs_end_transaction() is 1402 * called at btrfs_rebuild_free_space_tree() we finish the creation of 1403 * new block groups by calling btrfs_create_pending_block_groups(), and 1404 * that in turn calls us, through btrfs_add_block_group_free_space(), 1405 * to add a free space info item and a free space extent item for the 1406 * block group. 1407 * 1408 * Then later btrfs_rebuild_free_space_tree() may find such new block 1409 * groups and processes them with populate_free_space_tree(), which can 1410 * fail with EEXIST since there are already items for the block group in 1411 * the free space tree. Notice that we say "may find" because a new 1412 * block group may be added to the block groups rbtree in a node before 1413 * or after the block group currently being processed by the rebuild 1414 * process. So signal the rebuild process to skip such new block groups 1415 * if it finds them. 1416 */ 1417 set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); 1418 1419 if (!path) { 1420 path = btrfs_alloc_path(); 1421 if (unlikely(!path)) { 1422 btrfs_abort_transaction(trans, -ENOMEM); 1423 return -ENOMEM; 1424 } 1425 own_path = true; 1426 } 1427 1428 ret = add_new_free_space_info(trans, block_group, path); 1429 if (unlikely(ret)) { 1430 btrfs_abort_transaction(trans, ret); 1431 goto out; 1432 } 1433 1434 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, 1435 block_group->start, block_group->length); 1436 if (ret) 1437 btrfs_abort_transaction(trans, ret); 1438 1439 out: 1440 if (own_path) 1441 btrfs_free_path(path); 1442 1443 return ret; 1444 } 1445 1446 int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans, 1447 struct btrfs_block_group *block_group) 1448 { 1449 int ret; 1450 1451 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1452 return 0; 1453 1454 mutex_lock(&block_group->free_space_lock); 1455 ret = __add_block_group_free_space(trans, block_group, NULL); 1456 mutex_unlock(&block_group->free_space_lock); 1457 return ret; 1458 } 1459 1460 int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, 1461 struct btrfs_block_group *block_group) 1462 { 1463 struct btrfs_root *root = btrfs_free_space_root(block_group); 1464 BTRFS_PATH_AUTO_FREE(path); 1465 struct btrfs_key key, found_key; 1466 struct extent_buffer *leaf; 1467 u64 start, end; 1468 int done = 0, nr; 1469 int ret; 1470 1471 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1472 return 0; 1473 1474 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { 1475 /* We never added this block group to the free space tree. */ 1476 return 0; 1477 } 1478 1479 path = btrfs_alloc_path(); 1480 if (unlikely(!path)) { 1481 ret = -ENOMEM; 1482 btrfs_abort_transaction(trans, ret); 1483 return ret; 1484 } 1485 1486 start = block_group->start; 1487 end = btrfs_block_group_end(block_group); 1488 1489 key.objectid = end - 1; 1490 key.type = (u8)-1; 1491 key.offset = (u64)-1; 1492 1493 while (!done) { 1494 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 1495 if (unlikely(ret)) { 1496 btrfs_abort_transaction(trans, ret); 1497 return ret; 1498 } 1499 1500 leaf = path->nodes[0]; 1501 nr = 0; 1502 path->slots[0]++; 1503 while (path->slots[0] > 0) { 1504 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 1505 1506 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 1507 ASSERT(found_key.objectid == block_group->start); 1508 ASSERT(found_key.offset == block_group->length); 1509 done = 1; 1510 nr++; 1511 path->slots[0]--; 1512 break; 1513 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY || 1514 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 1515 ASSERT(found_key.objectid >= start); 1516 ASSERT(found_key.objectid < end); 1517 ASSERT(found_key.objectid + found_key.offset <= end); 1518 nr++; 1519 path->slots[0]--; 1520 } else { 1521 ASSERT(0); 1522 } 1523 } 1524 1525 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 1526 if (unlikely(ret)) { 1527 btrfs_abort_transaction(trans, ret); 1528 return ret; 1529 } 1530 btrfs_release_path(path); 1531 } 1532 1533 return 0; 1534 } 1535 1536 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, 1537 struct btrfs_path *path, 1538 u32 expected_extent_count) 1539 { 1540 struct btrfs_block_group *block_group = caching_ctl->block_group; 1541 struct btrfs_fs_info *fs_info = block_group->fs_info; 1542 struct btrfs_root *root; 1543 struct btrfs_key key; 1544 bool prev_bit_set = false; 1545 /* Initialize to silence GCC. */ 1546 u64 extent_start = 0; 1547 const u64 end = btrfs_block_group_end(block_group); 1548 u64 offset; 1549 u64 total_found = 0; 1550 u32 extent_count = 0; 1551 int ret; 1552 1553 root = btrfs_free_space_root(block_group); 1554 1555 while (1) { 1556 ret = btrfs_next_item(root, path); 1557 if (ret < 0) 1558 return ret; 1559 if (ret) 1560 break; 1561 1562 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1563 1564 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1565 break; 1566 1567 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 1568 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1569 1570 offset = key.objectid; 1571 while (offset < key.objectid + key.offset) { 1572 bool bit_set; 1573 1574 bit_set = btrfs_free_space_test_bit(block_group, path, offset); 1575 if (!prev_bit_set && bit_set) { 1576 extent_start = offset; 1577 } else if (prev_bit_set && !bit_set) { 1578 u64 space_added; 1579 1580 ret = btrfs_add_new_free_space(block_group, 1581 extent_start, 1582 offset, 1583 &space_added); 1584 if (ret) 1585 return ret; 1586 total_found += space_added; 1587 if (total_found > CACHING_CTL_WAKE_UP) { 1588 total_found = 0; 1589 wake_up(&caching_ctl->wait); 1590 } 1591 extent_count++; 1592 } 1593 prev_bit_set = bit_set; 1594 offset += fs_info->sectorsize; 1595 } 1596 } 1597 if (prev_bit_set) { 1598 ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL); 1599 if (ret) 1600 return ret; 1601 extent_count++; 1602 } 1603 1604 if (unlikely(extent_count != expected_extent_count)) { 1605 btrfs_err(fs_info, 1606 "incorrect extent count for %llu; counted %u, expected %u", 1607 block_group->start, extent_count, 1608 expected_extent_count); 1609 DEBUG_WARN(); 1610 return -EIO; 1611 } 1612 1613 return 0; 1614 } 1615 1616 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, 1617 struct btrfs_path *path, 1618 u32 expected_extent_count) 1619 { 1620 struct btrfs_block_group *block_group = caching_ctl->block_group; 1621 struct btrfs_fs_info *fs_info = block_group->fs_info; 1622 struct btrfs_root *root; 1623 struct btrfs_key key; 1624 const u64 end = btrfs_block_group_end(block_group); 1625 u64 total_found = 0; 1626 u32 extent_count = 0; 1627 int ret; 1628 1629 root = btrfs_free_space_root(block_group); 1630 1631 while (1) { 1632 u64 space_added; 1633 1634 ret = btrfs_next_item(root, path); 1635 if (ret < 0) 1636 return ret; 1637 if (ret) 1638 break; 1639 1640 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1641 1642 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1643 break; 1644 1645 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 1646 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1647 1648 ret = btrfs_add_new_free_space(block_group, key.objectid, 1649 key.objectid + key.offset, 1650 &space_added); 1651 if (ret) 1652 return ret; 1653 total_found += space_added; 1654 if (total_found > CACHING_CTL_WAKE_UP) { 1655 total_found = 0; 1656 wake_up(&caching_ctl->wait); 1657 } 1658 extent_count++; 1659 } 1660 1661 if (unlikely(extent_count != expected_extent_count)) { 1662 btrfs_err(fs_info, 1663 "incorrect extent count for %llu; counted %u, expected %u", 1664 block_group->start, extent_count, 1665 expected_extent_count); 1666 DEBUG_WARN(); 1667 return -EIO; 1668 } 1669 1670 return 0; 1671 } 1672 1673 int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl) 1674 { 1675 struct btrfs_block_group *block_group; 1676 struct btrfs_free_space_info *info; 1677 BTRFS_PATH_AUTO_FREE(path); 1678 u32 extent_count, flags; 1679 1680 block_group = caching_ctl->block_group; 1681 1682 path = btrfs_alloc_path(); 1683 if (!path) 1684 return -ENOMEM; 1685 1686 /* 1687 * Just like caching_thread() doesn't want to deadlock on the extent 1688 * tree, we don't want to deadlock on the free space tree. 1689 */ 1690 path->skip_locking = true; 1691 path->search_commit_root = true; 1692 path->reada = READA_FORWARD; 1693 1694 info = btrfs_search_free_space_info(NULL, block_group, path, 0); 1695 if (IS_ERR(info)) 1696 return PTR_ERR(info); 1697 1698 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 1699 flags = btrfs_free_space_flags(path->nodes[0], info); 1700 1701 /* 1702 * We left path pointing to the free space info item, so now 1703 * load_free_space_foo can just iterate through the free space tree from 1704 * there. 1705 */ 1706 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) 1707 return load_free_space_bitmaps(caching_ctl, path, extent_count); 1708 else 1709 return load_free_space_extents(caching_ctl, path, extent_count); 1710 } 1711 1712 static int delete_orphan_free_space_entries(struct btrfs_root *fst_root, 1713 struct btrfs_path *path, 1714 u64 first_bg_bytenr) 1715 { 1716 struct btrfs_trans_handle *trans; 1717 int ret; 1718 1719 trans = btrfs_start_transaction(fst_root, 1); 1720 if (IS_ERR(trans)) 1721 return PTR_ERR(trans); 1722 1723 while (true) { 1724 struct btrfs_key key = { 0 }; 1725 int i; 1726 1727 ret = btrfs_search_slot(trans, fst_root, &key, path, -1, 1); 1728 if (ret < 0) 1729 break; 1730 ASSERT(ret > 0); 1731 ret = 0; 1732 for (i = 0; i < btrfs_header_nritems(path->nodes[0]); i++) { 1733 btrfs_item_key_to_cpu(path->nodes[0], &key, i); 1734 if (key.objectid >= first_bg_bytenr) { 1735 /* 1736 * Only break the for() loop and continue to 1737 * delete items. 1738 */ 1739 break; 1740 } 1741 } 1742 /* No items to delete, finished. */ 1743 if (i == 0) 1744 break; 1745 1746 ret = btrfs_del_items(trans, fst_root, path, 0, i); 1747 if (ret < 0) 1748 break; 1749 btrfs_release_path(path); 1750 } 1751 btrfs_release_path(path); 1752 btrfs_end_transaction(trans); 1753 if (ret == 0) 1754 btrfs_info(fst_root->fs_info, "deleted orphan free space tree entries"); 1755 return ret; 1756 } 1757 1758 /* Remove any free space entry before the first block group. */ 1759 int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info) 1760 { 1761 BTRFS_PATH_AUTO_RELEASE(path); 1762 struct btrfs_key key = { 1763 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1764 .type = BTRFS_ROOT_ITEM_KEY, 1765 .offset = 0, 1766 }; 1767 struct btrfs_root *root; 1768 struct btrfs_block_group *bg; 1769 u64 first_bg_bytenr; 1770 int ret; 1771 1772 /* 1773 * Extent tree v2 has multiple global roots based on the block group. 1774 * This means we cannot easily grab the global free space tree and locate 1775 * orphan items. Furthermore this is still experimental, all users 1776 * should use the latest btrfs-progs anyway. 1777 */ 1778 if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) 1779 return 0; 1780 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) 1781 return 0; 1782 root = btrfs_global_root(fs_info, &key); 1783 if (!root) 1784 return 0; 1785 1786 key.objectid = 0; 1787 key.type = 0; 1788 key.offset = 0; 1789 1790 bg = btrfs_lookup_first_block_group(fs_info, 0); 1791 if (unlikely(!bg)) { 1792 btrfs_err(fs_info, "no block group found"); 1793 return -EUCLEAN; 1794 } 1795 first_bg_bytenr = bg->start; 1796 btrfs_put_block_group(bg); 1797 1798 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); 1799 if (ret < 0) 1800 return ret; 1801 /* There should not be an all-zero key in fst. */ 1802 ASSERT(ret > 0); 1803 1804 /* Empty free space tree. */ 1805 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) 1806 return 0; 1807 1808 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); 1809 if (key.objectid >= first_bg_bytenr) 1810 return 0; 1811 btrfs_release_path(&path); 1812 return delete_orphan_free_space_entries(root, &path, first_bg_bytenr); 1813 } 1814