1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2015 Facebook. All rights reserved. 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/sched/mm.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "free-space-tree.h" 13 #include "transaction.h" 14 #include "block-group.h" 15 #include "fs.h" 16 #include "accessors.h" 17 #include "extent-tree.h" 18 #include "root-tree.h" 19 20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 21 struct btrfs_block_group *block_group, 22 struct btrfs_path *path); 23 24 struct btrfs_root *btrfs_free_space_root(struct btrfs_block_group *block_group) 25 { 26 struct btrfs_key key = { 27 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 28 .type = BTRFS_ROOT_ITEM_KEY, 29 .offset = 0, 30 }; 31 32 if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2)) 33 key.offset = block_group->global_root_id; 34 return btrfs_global_root(block_group->fs_info, &key); 35 } 36 37 void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache) 38 { 39 u32 bitmap_range; 40 size_t bitmap_size; 41 u64 num_bitmaps, total_bitmap_size; 42 43 if (WARN_ON(cache->length == 0)) 44 btrfs_warn(cache->fs_info, "block group %llu length is zero", 45 cache->start); 46 47 /* 48 * We convert to bitmaps when the disk space required for using extents 49 * exceeds that required for using bitmaps. 50 */ 51 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 52 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range); 53 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE; 54 total_bitmap_size = num_bitmaps * bitmap_size; 55 cache->bitmap_high_thresh = div_u64(total_bitmap_size, 56 sizeof(struct btrfs_item)); 57 58 /* 59 * We allow for a small buffer between the high threshold and low 60 * threshold to avoid thrashing back and forth between the two formats. 61 */ 62 if (cache->bitmap_high_thresh > 100) 63 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100; 64 else 65 cache->bitmap_low_thresh = 0; 66 } 67 68 static int add_new_free_space_info(struct btrfs_trans_handle *trans, 69 struct btrfs_block_group *block_group, 70 struct btrfs_path *path) 71 { 72 struct btrfs_root *root = btrfs_free_space_root(block_group); 73 struct btrfs_free_space_info *info; 74 struct btrfs_key key; 75 struct extent_buffer *leaf; 76 int ret; 77 78 key.objectid = block_group->start; 79 key.type = BTRFS_FREE_SPACE_INFO_KEY; 80 key.offset = block_group->length; 81 82 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info)); 83 if (ret) 84 return ret; 85 86 leaf = path->nodes[0]; 87 info = btrfs_item_ptr(leaf, path->slots[0], 88 struct btrfs_free_space_info); 89 btrfs_set_free_space_extent_count(leaf, info, 0); 90 btrfs_set_free_space_flags(leaf, info, 0); 91 btrfs_release_path(path); 92 return 0; 93 } 94 95 struct btrfs_free_space_info *btrfs_search_free_space_info( 96 struct btrfs_trans_handle *trans, 97 struct btrfs_block_group *block_group, 98 struct btrfs_path *path, int cow) 99 { 100 struct btrfs_fs_info *fs_info = block_group->fs_info; 101 struct btrfs_root *root = btrfs_free_space_root(block_group); 102 struct btrfs_key key; 103 int ret; 104 105 key.objectid = block_group->start; 106 key.type = BTRFS_FREE_SPACE_INFO_KEY; 107 key.offset = block_group->length; 108 109 ret = btrfs_search_slot(trans, root, &key, path, 0, cow); 110 if (ret < 0) 111 return ERR_PTR(ret); 112 if (ret != 0) { 113 btrfs_warn(fs_info, "missing free space info for %llu", 114 block_group->start); 115 DEBUG_WARN(); 116 return ERR_PTR(-ENOENT); 117 } 118 119 return btrfs_item_ptr(path->nodes[0], path->slots[0], 120 struct btrfs_free_space_info); 121 } 122 123 /* 124 * btrfs_search_slot() but we're looking for the greatest key less than the 125 * passed key. 126 */ 127 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans, 128 struct btrfs_root *root, 129 struct btrfs_key *key, struct btrfs_path *p, 130 int ins_len, int cow) 131 { 132 int ret; 133 134 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow); 135 if (ret < 0) 136 return ret; 137 138 if (unlikely(ret == 0)) { 139 DEBUG_WARN(); 140 return -EIO; 141 } 142 143 if (unlikely(p->slots[0] == 0)) { 144 DEBUG_WARN("no previous slot found"); 145 return -EIO; 146 } 147 p->slots[0]--; 148 149 return 0; 150 } 151 152 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info, 153 u64 size) 154 { 155 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE); 156 } 157 158 static unsigned long *alloc_bitmap(u32 bitmap_size) 159 { 160 unsigned long *ret; 161 unsigned int nofs_flag; 162 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long)); 163 164 /* 165 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse 166 * into the filesystem here. All callers hold a transaction handle 167 * open, so if a GFP_KERNEL allocation recurses into the filesystem 168 * and triggers a transaction commit, we would deadlock. 169 */ 170 nofs_flag = memalloc_nofs_save(); 171 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL); 172 memalloc_nofs_restore(nofs_flag); 173 return ret; 174 } 175 176 static void le_bitmap_set(unsigned long *map, unsigned int start, int len) 177 { 178 u8 *p = ((u8 *)map) + BIT_BYTE(start); 179 const unsigned int size = start + len; 180 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE); 181 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start); 182 183 while (len - bits_to_set >= 0) { 184 *p |= mask_to_set; 185 len -= bits_to_set; 186 bits_to_set = BITS_PER_BYTE; 187 mask_to_set = ~0; 188 p++; 189 } 190 if (len) { 191 mask_to_set &= BITMAP_LAST_BYTE_MASK(size); 192 *p |= mask_to_set; 193 } 194 } 195 196 EXPORT_FOR_TESTS 197 int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, 198 struct btrfs_block_group *block_group, 199 struct btrfs_path *path) 200 { 201 struct btrfs_fs_info *fs_info = trans->fs_info; 202 struct btrfs_root *root = btrfs_free_space_root(block_group); 203 struct btrfs_free_space_info *info; 204 struct btrfs_key key, found_key; 205 struct extent_buffer *leaf; 206 unsigned long *bitmap; 207 char *bitmap_cursor; 208 u64 start, end; 209 u64 bitmap_range, i; 210 u32 bitmap_size, flags, expected_extent_count; 211 u32 extent_count = 0; 212 int done = 0, nr; 213 int ret; 214 215 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 216 bitmap = alloc_bitmap(bitmap_size); 217 if (unlikely(!bitmap)) 218 return 0; 219 220 start = block_group->start; 221 end = btrfs_block_group_end(block_group); 222 223 key.objectid = end - 1; 224 key.type = (u8)-1; 225 key.offset = (u64)-1; 226 227 while (!done) { 228 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 229 if (unlikely(ret)) { 230 btrfs_abort_transaction(trans, ret); 231 goto out; 232 } 233 234 leaf = path->nodes[0]; 235 nr = 0; 236 path->slots[0]++; 237 while (path->slots[0] > 0) { 238 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 239 240 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 241 ASSERT(found_key.objectid == block_group->start); 242 ASSERT(found_key.offset == block_group->length); 243 done = 1; 244 break; 245 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) { 246 u64 first, last; 247 248 ASSERT(found_key.objectid >= start); 249 ASSERT(found_key.objectid < end); 250 ASSERT(found_key.objectid + found_key.offset <= end); 251 252 first = div_u64(found_key.objectid - start, 253 fs_info->sectorsize); 254 last = div_u64(found_key.objectid + found_key.offset - start, 255 fs_info->sectorsize); 256 le_bitmap_set(bitmap, first, last - first); 257 258 extent_count++; 259 nr++; 260 path->slots[0]--; 261 } else { 262 btrfs_err(fs_info, "unexpected free space tree key type %u", 263 found_key.type); 264 ret = -EUCLEAN; 265 btrfs_abort_transaction(trans, ret); 266 goto out; 267 } 268 } 269 270 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 271 if (unlikely(ret)) { 272 btrfs_abort_transaction(trans, ret); 273 goto out; 274 } 275 btrfs_release_path(path); 276 } 277 278 info = btrfs_search_free_space_info(trans, block_group, path, 1); 279 if (IS_ERR(info)) { 280 ret = PTR_ERR(info); 281 btrfs_abort_transaction(trans, ret); 282 goto out; 283 } 284 leaf = path->nodes[0]; 285 flags = btrfs_free_space_flags(leaf, info); 286 flags |= BTRFS_FREE_SPACE_USING_BITMAPS; 287 block_group->using_free_space_bitmaps = true; 288 block_group->using_free_space_bitmaps_cached = true; 289 btrfs_set_free_space_flags(leaf, info, flags); 290 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 291 btrfs_release_path(path); 292 293 if (unlikely(extent_count != expected_extent_count)) { 294 btrfs_err(fs_info, 295 "incorrect extent count for %llu; counted %u, expected %u", 296 block_group->start, extent_count, 297 expected_extent_count); 298 ret = -EIO; 299 btrfs_abort_transaction(trans, ret); 300 goto out; 301 } 302 303 bitmap_cursor = (char *)bitmap; 304 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 305 i = start; 306 while (i < end) { 307 unsigned long ptr; 308 u64 extent_size; 309 u32 data_size; 310 311 extent_size = min(end - i, bitmap_range); 312 data_size = free_space_bitmap_size(fs_info, extent_size); 313 314 key.objectid = i; 315 key.type = BTRFS_FREE_SPACE_BITMAP_KEY; 316 key.offset = extent_size; 317 318 ret = btrfs_insert_empty_item(trans, root, path, &key, 319 data_size); 320 if (unlikely(ret)) { 321 btrfs_abort_transaction(trans, ret); 322 goto out; 323 } 324 325 leaf = path->nodes[0]; 326 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 327 write_extent_buffer(leaf, bitmap_cursor, ptr, 328 data_size); 329 btrfs_release_path(path); 330 331 i += extent_size; 332 bitmap_cursor += data_size; 333 } 334 335 ret = 0; 336 out: 337 kvfree(bitmap); 338 return ret; 339 } 340 341 EXPORT_FOR_TESTS 342 int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans, 343 struct btrfs_block_group *block_group, 344 struct btrfs_path *path) 345 { 346 struct btrfs_fs_info *fs_info = trans->fs_info; 347 struct btrfs_root *root = btrfs_free_space_root(block_group); 348 struct btrfs_free_space_info *info; 349 struct btrfs_key key, found_key; 350 struct extent_buffer *leaf; 351 unsigned long *bitmap; 352 u64 start, end; 353 u32 bitmap_size, flags, expected_extent_count; 354 unsigned long nrbits, start_bit, end_bit; 355 u32 extent_count = 0; 356 int done = 0, nr; 357 int ret; 358 359 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 360 bitmap = alloc_bitmap(bitmap_size); 361 if (unlikely(!bitmap)) 362 return 0; 363 364 start = block_group->start; 365 end = btrfs_block_group_end(block_group); 366 367 key.objectid = end - 1; 368 key.type = (u8)-1; 369 key.offset = (u64)-1; 370 371 while (!done) { 372 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 373 if (unlikely(ret)) { 374 btrfs_abort_transaction(trans, ret); 375 goto out; 376 } 377 378 leaf = path->nodes[0]; 379 nr = 0; 380 path->slots[0]++; 381 while (path->slots[0] > 0) { 382 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 383 384 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 385 ASSERT(found_key.objectid == block_group->start); 386 ASSERT(found_key.offset == block_group->length); 387 done = 1; 388 break; 389 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 390 unsigned long ptr; 391 char *bitmap_cursor; 392 u32 bitmap_pos, data_size; 393 394 ASSERT(found_key.objectid >= start); 395 ASSERT(found_key.objectid < end); 396 ASSERT(found_key.objectid + found_key.offset <= end); 397 398 bitmap_pos = div_u64(found_key.objectid - start, 399 fs_info->sectorsize * 400 BITS_PER_BYTE); 401 bitmap_cursor = ((char *)bitmap) + bitmap_pos; 402 data_size = free_space_bitmap_size(fs_info, 403 found_key.offset); 404 405 path->slots[0]--; 406 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 407 read_extent_buffer(leaf, bitmap_cursor, ptr, 408 data_size); 409 410 nr++; 411 } else { 412 btrfs_err(fs_info, "unexpected free space tree key type %u", 413 found_key.type); 414 ret = -EUCLEAN; 415 btrfs_abort_transaction(trans, ret); 416 goto out; 417 } 418 } 419 420 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 421 if (unlikely(ret)) { 422 btrfs_abort_transaction(trans, ret); 423 goto out; 424 } 425 btrfs_release_path(path); 426 } 427 428 info = btrfs_search_free_space_info(trans, block_group, path, 1); 429 if (IS_ERR(info)) { 430 ret = PTR_ERR(info); 431 btrfs_abort_transaction(trans, ret); 432 goto out; 433 } 434 leaf = path->nodes[0]; 435 flags = btrfs_free_space_flags(leaf, info); 436 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; 437 block_group->using_free_space_bitmaps = false; 438 block_group->using_free_space_bitmaps_cached = true; 439 btrfs_set_free_space_flags(leaf, info, flags); 440 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 441 btrfs_release_path(path); 442 443 nrbits = block_group->length >> fs_info->sectorsize_bits; 444 start_bit = find_next_bit_le(bitmap, nrbits, 0); 445 446 while (start_bit < nrbits) { 447 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit); 448 ASSERT(start_bit < end_bit); 449 450 key.objectid = start + start_bit * fs_info->sectorsize; 451 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 452 key.offset = (end_bit - start_bit) * fs_info->sectorsize; 453 454 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 455 if (unlikely(ret)) { 456 btrfs_abort_transaction(trans, ret); 457 goto out; 458 } 459 btrfs_release_path(path); 460 461 extent_count++; 462 463 start_bit = find_next_bit_le(bitmap, nrbits, end_bit); 464 } 465 466 if (unlikely(extent_count != expected_extent_count)) { 467 btrfs_err(fs_info, 468 "incorrect extent count for %llu; counted %u, expected %u", 469 block_group->start, extent_count, 470 expected_extent_count); 471 ret = -EIO; 472 btrfs_abort_transaction(trans, ret); 473 goto out; 474 } 475 476 ret = 0; 477 out: 478 kvfree(bitmap); 479 return ret; 480 } 481 482 static int update_free_space_extent_count(struct btrfs_trans_handle *trans, 483 struct btrfs_block_group *block_group, 484 struct btrfs_path *path, 485 int new_extents) 486 { 487 struct btrfs_free_space_info *info; 488 u32 flags; 489 u32 extent_count; 490 int ret = 0; 491 492 if (new_extents == 0) 493 return 0; 494 495 info = btrfs_search_free_space_info(trans, block_group, path, 1); 496 if (IS_ERR(info)) 497 return PTR_ERR(info); 498 499 flags = btrfs_free_space_flags(path->nodes[0], info); 500 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 501 502 extent_count += new_extents; 503 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count); 504 btrfs_release_path(path); 505 506 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 507 extent_count > block_group->bitmap_high_thresh) { 508 ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path); 509 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 510 extent_count < block_group->bitmap_low_thresh) { 511 ret = btrfs_convert_free_space_to_extents(trans, block_group, path); 512 } 513 514 return ret; 515 } 516 517 EXPORT_FOR_TESTS 518 bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group, 519 struct btrfs_path *path, u64 offset) 520 { 521 struct extent_buffer *leaf; 522 struct btrfs_key key; 523 u64 found_start, found_end; 524 unsigned long ptr, i; 525 526 leaf = path->nodes[0]; 527 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 528 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 529 530 found_start = key.objectid; 531 found_end = key.objectid + key.offset; 532 ASSERT(offset >= found_start && offset < found_end); 533 534 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 535 i = div_u64(offset - found_start, 536 block_group->fs_info->sectorsize); 537 return extent_buffer_test_bit(leaf, ptr, i); 538 } 539 540 static void free_space_modify_bits(struct btrfs_trans_handle *trans, 541 struct btrfs_block_group *block_group, 542 struct btrfs_path *path, u64 *start, u64 *size, 543 bool set_bits) 544 { 545 struct btrfs_fs_info *fs_info = block_group->fs_info; 546 struct extent_buffer *leaf; 547 struct btrfs_key key; 548 u64 end = *start + *size; 549 u64 found_start, found_end; 550 unsigned long ptr, first, last; 551 552 leaf = path->nodes[0]; 553 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 554 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 555 556 found_start = key.objectid; 557 found_end = key.objectid + key.offset; 558 ASSERT(*start >= found_start && *start < found_end); 559 ASSERT(end > found_start); 560 561 if (end > found_end) 562 end = found_end; 563 564 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 565 first = (*start - found_start) >> fs_info->sectorsize_bits; 566 last = (end - found_start) >> fs_info->sectorsize_bits; 567 if (set_bits) 568 extent_buffer_bitmap_set(leaf, ptr, first, last - first); 569 else 570 extent_buffer_bitmap_clear(leaf, ptr, first, last - first); 571 btrfs_mark_buffer_dirty(trans, leaf); 572 573 *size -= end - *start; 574 *start = end; 575 } 576 577 /* 578 * We can't use btrfs_next_item() in modify_free_space_bitmap() because 579 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy 580 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're 581 * looking for. 582 */ 583 static int free_space_next_bitmap(struct btrfs_trans_handle *trans, 584 struct btrfs_root *root, struct btrfs_path *p) 585 { 586 struct btrfs_key key; 587 588 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) { 589 p->slots[0]++; 590 return 0; 591 } 592 593 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]); 594 btrfs_release_path(p); 595 596 key.objectid += key.offset; 597 key.type = (u8)-1; 598 key.offset = (u64)-1; 599 600 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1); 601 } 602 603 /* 604 * If remove is 1, then we are removing free space, thus clearing bits in the 605 * bitmap. If remove is 0, then we are adding free space, thus setting bits in 606 * the bitmap. 607 */ 608 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, 609 struct btrfs_block_group *block_group, 610 struct btrfs_path *path, 611 u64 start, u64 size, bool remove) 612 { 613 struct btrfs_root *root = btrfs_free_space_root(block_group); 614 struct btrfs_key key; 615 u64 end = start + size; 616 u64 cur_start, cur_size; 617 bool prev_bit_set = false; 618 bool next_bit_set = false; 619 int new_extents; 620 int ret; 621 622 /* 623 * Read the bit for the block immediately before the extent of space if 624 * that block is within the block group. 625 */ 626 if (start > block_group->start) { 627 u64 prev_block = start - block_group->fs_info->sectorsize; 628 629 key.objectid = prev_block; 630 key.type = (u8)-1; 631 key.offset = (u64)-1; 632 633 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 634 if (ret) 635 return ret; 636 637 prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block); 638 639 /* The previous block may have been in the previous bitmap. */ 640 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 641 if (start >= key.objectid + key.offset) { 642 ret = free_space_next_bitmap(trans, root, path); 643 if (ret) 644 return ret; 645 } 646 } else { 647 key.objectid = start; 648 key.type = (u8)-1; 649 key.offset = (u64)-1; 650 651 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 652 if (ret) 653 return ret; 654 } 655 656 /* 657 * Iterate over all of the bitmaps overlapped by the extent of space, 658 * clearing/setting bits as required. 659 */ 660 cur_start = start; 661 cur_size = size; 662 while (1) { 663 free_space_modify_bits(trans, block_group, path, &cur_start, 664 &cur_size, !remove); 665 if (cur_size == 0) 666 break; 667 ret = free_space_next_bitmap(trans, root, path); 668 if (ret) 669 return ret; 670 } 671 672 /* 673 * Read the bit for the block immediately after the extent of space if 674 * that block is within the block group. 675 */ 676 if (end < btrfs_block_group_end(block_group)) { 677 /* The next block may be in the next bitmap. */ 678 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 679 if (end >= key.objectid + key.offset) { 680 ret = free_space_next_bitmap(trans, root, path); 681 if (ret) 682 return ret; 683 } 684 685 next_bit_set = btrfs_free_space_test_bit(block_group, path, end); 686 } 687 688 if (remove) { 689 new_extents = -1; 690 if (prev_bit_set) { 691 /* Leftover on the left. */ 692 new_extents++; 693 } 694 if (next_bit_set) { 695 /* Leftover on the right. */ 696 new_extents++; 697 } 698 } else { 699 new_extents = 1; 700 if (prev_bit_set) { 701 /* Merging with neighbor on the left. */ 702 new_extents--; 703 } 704 if (next_bit_set) { 705 /* Merging with neighbor on the right. */ 706 new_extents--; 707 } 708 } 709 710 btrfs_release_path(path); 711 return update_free_space_extent_count(trans, block_group, path, new_extents); 712 } 713 714 static int remove_free_space_extent(struct btrfs_trans_handle *trans, 715 struct btrfs_block_group *block_group, 716 struct btrfs_path *path, 717 u64 start, u64 size) 718 { 719 struct btrfs_root *root = btrfs_free_space_root(block_group); 720 struct btrfs_key key; 721 u64 found_start, found_end; 722 u64 end = start + size; 723 int new_extents = -1; 724 int ret; 725 726 key.objectid = start; 727 key.type = (u8)-1; 728 key.offset = (u64)-1; 729 730 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 731 if (ret) 732 return ret; 733 734 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 735 736 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 737 738 found_start = key.objectid; 739 found_end = key.objectid + key.offset; 740 ASSERT(start >= found_start && end <= found_end); 741 742 /* 743 * Okay, now that we've found the free space extent which contains the 744 * free space that we are removing, there are four cases: 745 * 746 * 1. We're using the whole extent: delete the key we found and 747 * decrement the free space extent count. 748 * 2. We are using part of the extent starting at the beginning: delete 749 * the key we found and insert a new key representing the leftover at 750 * the end. There is no net change in the number of extents. 751 * 3. We are using part of the extent ending at the end: delete the key 752 * we found and insert a new key representing the leftover at the 753 * beginning. There is no net change in the number of extents. 754 * 4. We are using part of the extent in the middle: delete the key we 755 * found and insert two new keys representing the leftovers on each 756 * side. Where we used to have one extent, we now have two, so increment 757 * the extent count. We may need to convert the block group to bitmaps 758 * as a result. 759 */ 760 761 /* Delete the existing key (cases 1-4). */ 762 ret = btrfs_del_item(trans, root, path); 763 if (ret) 764 return ret; 765 766 /* Add a key for leftovers at the beginning (cases 3 and 4). */ 767 if (start > found_start) { 768 key.objectid = found_start; 769 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 770 key.offset = start - found_start; 771 772 btrfs_release_path(path); 773 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 774 if (ret) 775 return ret; 776 new_extents++; 777 } 778 779 /* Add a key for leftovers at the end (cases 2 and 4). */ 780 if (end < found_end) { 781 key.objectid = end; 782 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 783 key.offset = found_end - end; 784 785 btrfs_release_path(path); 786 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 787 if (ret) 788 return ret; 789 new_extents++; 790 } 791 792 btrfs_release_path(path); 793 return update_free_space_extent_count(trans, block_group, path, new_extents); 794 } 795 796 static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path) 797 { 798 struct btrfs_free_space_info *info; 799 u32 flags; 800 801 if (bg->using_free_space_bitmaps_cached) 802 return bg->using_free_space_bitmaps; 803 804 info = btrfs_search_free_space_info(NULL, bg, path, 0); 805 if (IS_ERR(info)) 806 return PTR_ERR(info); 807 flags = btrfs_free_space_flags(path->nodes[0], info); 808 btrfs_release_path(path); 809 810 bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS); 811 bg->using_free_space_bitmaps_cached = true; 812 813 return bg->using_free_space_bitmaps; 814 } 815 816 EXPORT_FOR_TESTS 817 int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 818 struct btrfs_block_group *block_group, 819 struct btrfs_path *path, u64 start, u64 size) 820 { 821 int ret; 822 823 ret = __add_block_group_free_space(trans, block_group, path); 824 if (ret) 825 return ret; 826 827 ret = using_bitmaps(block_group, path); 828 if (ret < 0) 829 return ret; 830 831 if (ret) 832 return modify_free_space_bitmap(trans, block_group, path, 833 start, size, true); 834 835 return remove_free_space_extent(trans, block_group, path, start, size); 836 } 837 838 int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 839 u64 start, u64 size) 840 { 841 struct btrfs_block_group *block_group; 842 BTRFS_PATH_AUTO_FREE(path); 843 int ret; 844 845 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 846 return 0; 847 848 path = btrfs_alloc_path(); 849 if (unlikely(!path)) { 850 ret = -ENOMEM; 851 btrfs_abort_transaction(trans, ret); 852 return ret; 853 } 854 855 block_group = btrfs_lookup_block_group(trans->fs_info, start); 856 if (unlikely(!block_group)) { 857 DEBUG_WARN("no block group found for start=%llu", start); 858 ret = -ENOENT; 859 btrfs_abort_transaction(trans, ret); 860 return ret; 861 } 862 863 mutex_lock(&block_group->free_space_lock); 864 ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size); 865 mutex_unlock(&block_group->free_space_lock); 866 if (ret) 867 btrfs_abort_transaction(trans, ret); 868 869 btrfs_put_block_group(block_group); 870 871 return ret; 872 } 873 874 static int add_free_space_extent(struct btrfs_trans_handle *trans, 875 struct btrfs_block_group *block_group, 876 struct btrfs_path *path, 877 u64 start, u64 size) 878 { 879 struct btrfs_root *root = btrfs_free_space_root(block_group); 880 struct btrfs_key key, new_key; 881 u64 found_start, found_end; 882 u64 end = start + size; 883 int new_extents = 1; 884 int ret; 885 886 /* 887 * We are adding a new extent of free space, but we need to merge 888 * extents. There are four cases here: 889 * 890 * 1. The new extent does not have any immediate neighbors to merge 891 * with: add the new key and increment the free space extent count. We 892 * may need to convert the block group to bitmaps as a result. 893 * 2. The new extent has an immediate neighbor before it: remove the 894 * previous key and insert a new key combining both of them. There is no 895 * net change in the number of extents. 896 * 3. The new extent has an immediate neighbor after it: remove the next 897 * key and insert a new key combining both of them. There is no net 898 * change in the number of extents. 899 * 4. The new extent has immediate neighbors on both sides: remove both 900 * of the keys and insert a new key combining all of them. Where we used 901 * to have two extents, we now have one, so decrement the extent count. 902 */ 903 904 new_key.objectid = start; 905 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 906 new_key.offset = size; 907 908 /* Search for a neighbor on the left. */ 909 if (start == block_group->start) 910 goto right; 911 key.objectid = start - 1; 912 key.type = (u8)-1; 913 key.offset = (u64)-1; 914 915 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 916 if (ret) 917 return ret; 918 919 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 920 921 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 922 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 923 btrfs_release_path(path); 924 goto right; 925 } 926 927 found_start = key.objectid; 928 found_end = key.objectid + key.offset; 929 ASSERT(found_start >= block_group->start && 930 found_end > block_group->start); 931 ASSERT(found_start < start && found_end <= start); 932 933 /* 934 * Delete the neighbor on the left and absorb it into the new key (cases 935 * 2 and 4). 936 */ 937 if (found_end == start) { 938 ret = btrfs_del_item(trans, root, path); 939 if (ret) 940 return ret; 941 new_key.objectid = found_start; 942 new_key.offset += key.offset; 943 new_extents--; 944 } 945 btrfs_release_path(path); 946 947 right: 948 /* Search for a neighbor on the right. */ 949 if (end == btrfs_block_group_end(block_group)) 950 goto insert; 951 key.objectid = end; 952 key.type = (u8)-1; 953 key.offset = (u64)-1; 954 955 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 956 if (ret) 957 return ret; 958 959 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 960 961 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 962 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 963 btrfs_release_path(path); 964 goto insert; 965 } 966 967 found_start = key.objectid; 968 found_end = key.objectid + key.offset; 969 ASSERT(found_start >= block_group->start && 970 found_end > block_group->start); 971 ASSERT((found_start < start && found_end <= start) || 972 (found_start >= end && found_end > end)); 973 974 /* 975 * Delete the neighbor on the right and absorb it into the new key 976 * (cases 3 and 4). 977 */ 978 if (found_start == end) { 979 ret = btrfs_del_item(trans, root, path); 980 if (ret) 981 return ret; 982 new_key.offset += key.offset; 983 new_extents--; 984 } 985 btrfs_release_path(path); 986 987 insert: 988 /* Insert the new key (cases 1-4). */ 989 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0); 990 if (ret) 991 return ret; 992 993 btrfs_release_path(path); 994 return update_free_space_extent_count(trans, block_group, path, new_extents); 995 } 996 997 EXPORT_FOR_TESTS 998 int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 999 struct btrfs_block_group *block_group, 1000 struct btrfs_path *path, u64 start, u64 size) 1001 { 1002 int ret; 1003 1004 ret = __add_block_group_free_space(trans, block_group, path); 1005 if (ret) 1006 return ret; 1007 1008 ret = using_bitmaps(block_group, path); 1009 if (ret < 0) 1010 return ret; 1011 1012 if (ret) 1013 return modify_free_space_bitmap(trans, block_group, path, 1014 start, size, false); 1015 1016 return add_free_space_extent(trans, block_group, path, start, size); 1017 } 1018 1019 int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 1020 u64 start, u64 size) 1021 { 1022 struct btrfs_block_group *block_group; 1023 BTRFS_PATH_AUTO_FREE(path); 1024 int ret; 1025 1026 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1027 return 0; 1028 1029 path = btrfs_alloc_path(); 1030 if (unlikely(!path)) { 1031 ret = -ENOMEM; 1032 btrfs_abort_transaction(trans, ret); 1033 return ret; 1034 } 1035 1036 block_group = btrfs_lookup_block_group(trans->fs_info, start); 1037 if (unlikely(!block_group)) { 1038 DEBUG_WARN("no block group found for start=%llu", start); 1039 ret = -ENOENT; 1040 btrfs_abort_transaction(trans, ret); 1041 return ret; 1042 } 1043 1044 mutex_lock(&block_group->free_space_lock); 1045 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size); 1046 mutex_unlock(&block_group->free_space_lock); 1047 if (ret) 1048 btrfs_abort_transaction(trans, ret); 1049 1050 btrfs_put_block_group(block_group); 1051 1052 return ret; 1053 } 1054 1055 /* 1056 * Populate the free space tree by walking the extent tree. Operations on the 1057 * extent tree that happen as a result of writes to the free space tree will go 1058 * through the normal add/remove hooks. 1059 */ 1060 static int populate_free_space_tree(struct btrfs_trans_handle *trans, 1061 struct btrfs_block_group *block_group) 1062 { 1063 struct btrfs_root *extent_root; 1064 BTRFS_PATH_AUTO_FREE(path); 1065 BTRFS_PATH_AUTO_FREE(path2); 1066 struct btrfs_key key; 1067 u64 start, end; 1068 int ret; 1069 1070 path = btrfs_alloc_path(); 1071 if (!path) 1072 return -ENOMEM; 1073 1074 path2 = btrfs_alloc_path(); 1075 if (!path2) 1076 return -ENOMEM; 1077 1078 path->reada = READA_FORWARD; 1079 1080 ret = add_new_free_space_info(trans, block_group, path2); 1081 if (ret) 1082 return ret; 1083 1084 extent_root = btrfs_extent_root(trans->fs_info, block_group->start); 1085 if (unlikely(!extent_root)) { 1086 btrfs_err(trans->fs_info, 1087 "missing extent root for block group at offset %llu", 1088 block_group->start); 1089 return -EUCLEAN; 1090 } 1091 1092 mutex_lock(&block_group->free_space_lock); 1093 1094 /* 1095 * Iterate through all of the extent and metadata items in this block 1096 * group, adding the free space between them and the free space at the 1097 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than 1098 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's 1099 * contained in. 1100 */ 1101 key.objectid = block_group->start; 1102 key.type = BTRFS_EXTENT_ITEM_KEY; 1103 key.offset = 0; 1104 1105 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); 1106 if (ret < 0) 1107 goto out_locked; 1108 /* 1109 * If ret is 1 (no key found), it means this is an empty block group, 1110 * without any extents allocated from it and there's no block group 1111 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree 1112 * because we are using the block group tree feature (so block group 1113 * items are stored in the block group tree) or this is a new block 1114 * group created in the current transaction and its block group item 1115 * was not yet inserted in the extent tree (that happens in 1116 * btrfs_create_pending_block_groups() -> insert_block_group_item()). 1117 * It also means there are no extents allocated for block groups with a 1118 * start offset beyond this block group's end offset (this is the last, 1119 * highest, block group). 1120 */ 1121 start = block_group->start; 1122 end = btrfs_block_group_end(block_group); 1123 while (ret == 0) { 1124 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1125 1126 if (key.type == BTRFS_EXTENT_ITEM_KEY || 1127 key.type == BTRFS_METADATA_ITEM_KEY) { 1128 if (key.objectid >= end) 1129 break; 1130 1131 if (start < key.objectid) { 1132 ret = __btrfs_add_to_free_space_tree(trans, 1133 block_group, 1134 path2, start, 1135 key.objectid - 1136 start); 1137 if (ret) 1138 goto out_locked; 1139 } 1140 start = key.objectid; 1141 if (key.type == BTRFS_METADATA_ITEM_KEY) 1142 start += trans->fs_info->nodesize; 1143 else 1144 start += key.offset; 1145 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 1146 if (key.objectid != block_group->start) 1147 break; 1148 } 1149 1150 ret = btrfs_next_item(extent_root, path); 1151 if (ret < 0) 1152 goto out_locked; 1153 } 1154 if (start < end) { 1155 ret = __btrfs_add_to_free_space_tree(trans, block_group, path2, 1156 start, end - start); 1157 if (ret) 1158 goto out_locked; 1159 } 1160 1161 ret = 0; 1162 out_locked: 1163 mutex_unlock(&block_group->free_space_lock); 1164 1165 return ret; 1166 } 1167 1168 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) 1169 { 1170 struct btrfs_trans_handle *trans; 1171 struct btrfs_root *tree_root = fs_info->tree_root; 1172 struct btrfs_root *free_space_root; 1173 struct btrfs_block_group *block_group; 1174 struct rb_node *node; 1175 int ret; 1176 1177 trans = btrfs_start_transaction(tree_root, 0); 1178 if (IS_ERR(trans)) 1179 return PTR_ERR(trans); 1180 1181 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1182 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1183 free_space_root = btrfs_create_tree(trans, 1184 BTRFS_FREE_SPACE_TREE_OBJECTID); 1185 if (IS_ERR(free_space_root)) { 1186 ret = PTR_ERR(free_space_root); 1187 btrfs_abort_transaction(trans, ret); 1188 btrfs_end_transaction(trans); 1189 goto out_clear; 1190 } 1191 ret = btrfs_global_root_insert(free_space_root); 1192 if (unlikely(ret)) { 1193 btrfs_put_root(free_space_root); 1194 btrfs_abort_transaction(trans, ret); 1195 btrfs_end_transaction(trans); 1196 goto out_clear; 1197 } 1198 1199 node = rb_first_cached(&fs_info->block_group_cache_tree); 1200 while (node) { 1201 block_group = rb_entry(node, struct btrfs_block_group, 1202 cache_node); 1203 ret = populate_free_space_tree(trans, block_group); 1204 if (unlikely(ret)) { 1205 btrfs_abort_transaction(trans, ret); 1206 btrfs_end_transaction(trans); 1207 goto out_clear; 1208 } 1209 node = rb_next(node); 1210 } 1211 1212 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1213 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1214 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1215 ret = btrfs_commit_transaction(trans); 1216 1217 /* 1218 * Now that we've committed the transaction any reading of our commit 1219 * root will be safe, so we can cache from the free space tree now. 1220 */ 1221 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1222 return ret; 1223 1224 out_clear: 1225 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1226 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1227 return ret; 1228 } 1229 1230 static int clear_free_space_tree(struct btrfs_trans_handle *trans, 1231 struct btrfs_root *root) 1232 { 1233 BTRFS_PATH_AUTO_FREE(path); 1234 struct btrfs_key key; 1235 struct rb_node *node; 1236 int nr; 1237 int ret; 1238 1239 path = btrfs_alloc_path(); 1240 if (!path) 1241 return -ENOMEM; 1242 1243 key.objectid = 0; 1244 key.type = 0; 1245 key.offset = 0; 1246 1247 while (1) { 1248 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1249 if (ret < 0) 1250 return ret; 1251 1252 nr = btrfs_header_nritems(path->nodes[0]); 1253 if (!nr) 1254 break; 1255 1256 path->slots[0] = 0; 1257 ret = btrfs_del_items(trans, root, path, 0, nr); 1258 if (ret) 1259 return ret; 1260 1261 btrfs_release_path(path); 1262 } 1263 1264 node = rb_first_cached(&trans->fs_info->block_group_cache_tree); 1265 while (node) { 1266 struct btrfs_block_group *bg; 1267 1268 bg = rb_entry(node, struct btrfs_block_group, cache_node); 1269 clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); 1270 node = rb_next(node); 1271 cond_resched(); 1272 } 1273 1274 return 0; 1275 } 1276 1277 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info) 1278 { 1279 struct btrfs_trans_handle *trans; 1280 struct btrfs_root *tree_root = fs_info->tree_root; 1281 struct btrfs_key key = { 1282 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1283 .type = BTRFS_ROOT_ITEM_KEY, 1284 .offset = 0, 1285 }; 1286 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1287 int ret; 1288 1289 trans = btrfs_start_transaction(tree_root, 0); 1290 if (IS_ERR(trans)) 1291 return PTR_ERR(trans); 1292 1293 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1294 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1295 1296 ret = clear_free_space_tree(trans, free_space_root); 1297 if (unlikely(ret)) { 1298 btrfs_abort_transaction(trans, ret); 1299 btrfs_end_transaction(trans); 1300 return ret; 1301 } 1302 1303 ret = btrfs_del_root(trans, &free_space_root->root_key); 1304 if (unlikely(ret)) { 1305 btrfs_abort_transaction(trans, ret); 1306 btrfs_end_transaction(trans); 1307 return ret; 1308 } 1309 1310 btrfs_global_root_delete(free_space_root); 1311 1312 spin_lock(&fs_info->trans_lock); 1313 list_del(&free_space_root->dirty_list); 1314 spin_unlock(&fs_info->trans_lock); 1315 1316 btrfs_tree_lock(free_space_root->node); 1317 btrfs_clear_buffer_dirty(trans, free_space_root->node); 1318 btrfs_tree_unlock(free_space_root->node); 1319 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), 1320 free_space_root->node, 0, 1); 1321 btrfs_put_root(free_space_root); 1322 if (unlikely(ret < 0)) { 1323 btrfs_abort_transaction(trans, ret); 1324 btrfs_end_transaction(trans); 1325 return ret; 1326 } 1327 1328 return btrfs_commit_transaction(trans); 1329 } 1330 1331 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) 1332 { 1333 struct btrfs_trans_handle *trans; 1334 struct btrfs_key key = { 1335 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1336 .type = BTRFS_ROOT_ITEM_KEY, 1337 .offset = 0, 1338 }; 1339 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1340 struct rb_node *node; 1341 int ret; 1342 1343 trans = btrfs_start_transaction(free_space_root, 1); 1344 if (IS_ERR(trans)) 1345 return PTR_ERR(trans); 1346 1347 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1348 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1349 1350 ret = clear_free_space_tree(trans, free_space_root); 1351 if (unlikely(ret)) { 1352 btrfs_abort_transaction(trans, ret); 1353 btrfs_end_transaction(trans); 1354 return ret; 1355 } 1356 1357 node = rb_first_cached(&fs_info->block_group_cache_tree); 1358 while (node) { 1359 struct btrfs_block_group *block_group; 1360 1361 block_group = rb_entry(node, struct btrfs_block_group, 1362 cache_node); 1363 1364 if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, 1365 &block_group->runtime_flags)) 1366 goto next; 1367 1368 ret = populate_free_space_tree(trans, block_group); 1369 if (unlikely(ret)) { 1370 btrfs_abort_transaction(trans, ret); 1371 btrfs_end_transaction(trans); 1372 return ret; 1373 } 1374 next: 1375 if (btrfs_should_end_transaction(trans)) { 1376 btrfs_end_transaction(trans); 1377 trans = btrfs_start_transaction(free_space_root, 1); 1378 if (IS_ERR(trans)) 1379 return PTR_ERR(trans); 1380 } 1381 node = rb_next(node); 1382 } 1383 1384 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1385 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1386 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1387 1388 ret = btrfs_commit_transaction(trans); 1389 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1390 return ret; 1391 } 1392 1393 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 1394 struct btrfs_block_group *block_group, 1395 struct btrfs_path *path) 1396 { 1397 bool own_path = false; 1398 int ret; 1399 1400 if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, 1401 &block_group->runtime_flags)) 1402 return 0; 1403 1404 /* 1405 * While rebuilding the free space tree we may allocate new metadata 1406 * block groups while modifying the free space tree. 1407 * 1408 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we 1409 * can use multiple transactions, every time btrfs_end_transaction() is 1410 * called at btrfs_rebuild_free_space_tree() we finish the creation of 1411 * new block groups by calling btrfs_create_pending_block_groups(), and 1412 * that in turn calls us, through btrfs_add_block_group_free_space(), 1413 * to add a free space info item and a free space extent item for the 1414 * block group. 1415 * 1416 * Then later btrfs_rebuild_free_space_tree() may find such new block 1417 * groups and processes them with populate_free_space_tree(), which can 1418 * fail with EEXIST since there are already items for the block group in 1419 * the free space tree. Notice that we say "may find" because a new 1420 * block group may be added to the block groups rbtree in a node before 1421 * or after the block group currently being processed by the rebuild 1422 * process. So signal the rebuild process to skip such new block groups 1423 * if it finds them. 1424 */ 1425 set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); 1426 1427 if (!path) { 1428 path = btrfs_alloc_path(); 1429 if (unlikely(!path)) { 1430 btrfs_abort_transaction(trans, -ENOMEM); 1431 return -ENOMEM; 1432 } 1433 own_path = true; 1434 } 1435 1436 ret = add_new_free_space_info(trans, block_group, path); 1437 if (unlikely(ret)) { 1438 btrfs_abort_transaction(trans, ret); 1439 goto out; 1440 } 1441 1442 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, 1443 block_group->start, block_group->length); 1444 if (ret) 1445 btrfs_abort_transaction(trans, ret); 1446 1447 out: 1448 if (own_path) 1449 btrfs_free_path(path); 1450 1451 return ret; 1452 } 1453 1454 int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans, 1455 struct btrfs_block_group *block_group) 1456 { 1457 int ret; 1458 1459 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1460 return 0; 1461 1462 mutex_lock(&block_group->free_space_lock); 1463 ret = __add_block_group_free_space(trans, block_group, NULL); 1464 mutex_unlock(&block_group->free_space_lock); 1465 return ret; 1466 } 1467 1468 int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, 1469 struct btrfs_block_group *block_group) 1470 { 1471 struct btrfs_root *root = btrfs_free_space_root(block_group); 1472 BTRFS_PATH_AUTO_FREE(path); 1473 struct btrfs_key key, found_key; 1474 struct extent_buffer *leaf; 1475 u64 start, end; 1476 int done = 0, nr; 1477 int ret; 1478 1479 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1480 return 0; 1481 1482 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { 1483 /* We never added this block group to the free space tree. */ 1484 return 0; 1485 } 1486 1487 path = btrfs_alloc_path(); 1488 if (unlikely(!path)) { 1489 ret = -ENOMEM; 1490 btrfs_abort_transaction(trans, ret); 1491 return ret; 1492 } 1493 1494 start = block_group->start; 1495 end = btrfs_block_group_end(block_group); 1496 1497 key.objectid = end - 1; 1498 key.type = (u8)-1; 1499 key.offset = (u64)-1; 1500 1501 while (!done) { 1502 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 1503 if (unlikely(ret)) { 1504 btrfs_abort_transaction(trans, ret); 1505 return ret; 1506 } 1507 1508 leaf = path->nodes[0]; 1509 nr = 0; 1510 path->slots[0]++; 1511 while (path->slots[0] > 0) { 1512 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 1513 1514 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 1515 ASSERT(found_key.objectid == block_group->start); 1516 ASSERT(found_key.offset == block_group->length); 1517 done = 1; 1518 nr++; 1519 path->slots[0]--; 1520 break; 1521 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY || 1522 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 1523 ASSERT(found_key.objectid >= start); 1524 ASSERT(found_key.objectid < end); 1525 ASSERT(found_key.objectid + found_key.offset <= end); 1526 nr++; 1527 path->slots[0]--; 1528 } else { 1529 btrfs_err(trans->fs_info, "unexpected free space tree key type %u", 1530 found_key.type); 1531 ret = -EUCLEAN; 1532 btrfs_abort_transaction(trans, ret); 1533 return ret; 1534 } 1535 } 1536 1537 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 1538 if (unlikely(ret)) { 1539 btrfs_abort_transaction(trans, ret); 1540 return ret; 1541 } 1542 btrfs_release_path(path); 1543 } 1544 1545 return 0; 1546 } 1547 1548 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, 1549 struct btrfs_path *path, 1550 u32 expected_extent_count) 1551 { 1552 struct btrfs_block_group *block_group = caching_ctl->block_group; 1553 struct btrfs_fs_info *fs_info = block_group->fs_info; 1554 struct btrfs_root *root; 1555 struct btrfs_key key; 1556 bool prev_bit_set = false; 1557 /* Initialize to silence GCC. */ 1558 u64 extent_start = 0; 1559 const u64 end = btrfs_block_group_end(block_group); 1560 u64 offset; 1561 u64 total_found = 0; 1562 u32 extent_count = 0; 1563 int ret; 1564 1565 root = btrfs_free_space_root(block_group); 1566 1567 while (1) { 1568 ret = btrfs_next_item(root, path); 1569 if (ret < 0) 1570 return ret; 1571 if (ret) 1572 break; 1573 1574 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1575 1576 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1577 break; 1578 1579 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 1580 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1581 1582 offset = key.objectid; 1583 while (offset < key.objectid + key.offset) { 1584 bool bit_set; 1585 1586 bit_set = btrfs_free_space_test_bit(block_group, path, offset); 1587 if (!prev_bit_set && bit_set) { 1588 extent_start = offset; 1589 } else if (prev_bit_set && !bit_set) { 1590 u64 space_added; 1591 1592 ret = btrfs_add_new_free_space(block_group, 1593 extent_start, 1594 offset, 1595 &space_added); 1596 if (ret) 1597 return ret; 1598 total_found += space_added; 1599 if (total_found > CACHING_CTL_WAKE_UP) { 1600 total_found = 0; 1601 wake_up(&caching_ctl->wait); 1602 } 1603 extent_count++; 1604 } 1605 prev_bit_set = bit_set; 1606 offset += fs_info->sectorsize; 1607 } 1608 } 1609 if (prev_bit_set) { 1610 ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL); 1611 if (ret) 1612 return ret; 1613 extent_count++; 1614 } 1615 1616 if (unlikely(extent_count != expected_extent_count)) { 1617 btrfs_err(fs_info, 1618 "incorrect extent count for %llu; counted %u, expected %u", 1619 block_group->start, extent_count, 1620 expected_extent_count); 1621 DEBUG_WARN(); 1622 return -EIO; 1623 } 1624 1625 return 0; 1626 } 1627 1628 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, 1629 struct btrfs_path *path, 1630 u32 expected_extent_count) 1631 { 1632 struct btrfs_block_group *block_group = caching_ctl->block_group; 1633 struct btrfs_fs_info *fs_info = block_group->fs_info; 1634 struct btrfs_root *root; 1635 struct btrfs_key key; 1636 const u64 end = btrfs_block_group_end(block_group); 1637 u64 total_found = 0; 1638 u32 extent_count = 0; 1639 int ret; 1640 1641 root = btrfs_free_space_root(block_group); 1642 1643 while (1) { 1644 u64 space_added; 1645 1646 ret = btrfs_next_item(root, path); 1647 if (ret < 0) 1648 return ret; 1649 if (ret) 1650 break; 1651 1652 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1653 1654 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1655 break; 1656 1657 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 1658 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1659 1660 ret = btrfs_add_new_free_space(block_group, key.objectid, 1661 key.objectid + key.offset, 1662 &space_added); 1663 if (ret) 1664 return ret; 1665 total_found += space_added; 1666 if (total_found > CACHING_CTL_WAKE_UP) { 1667 total_found = 0; 1668 wake_up(&caching_ctl->wait); 1669 } 1670 extent_count++; 1671 } 1672 1673 if (unlikely(extent_count != expected_extent_count)) { 1674 btrfs_err(fs_info, 1675 "incorrect extent count for %llu; counted %u, expected %u", 1676 block_group->start, extent_count, 1677 expected_extent_count); 1678 DEBUG_WARN(); 1679 return -EIO; 1680 } 1681 1682 return 0; 1683 } 1684 1685 int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl) 1686 { 1687 struct btrfs_block_group *block_group; 1688 struct btrfs_free_space_info *info; 1689 BTRFS_PATH_AUTO_FREE(path); 1690 u32 extent_count, flags; 1691 1692 block_group = caching_ctl->block_group; 1693 1694 path = btrfs_alloc_path(); 1695 if (!path) 1696 return -ENOMEM; 1697 1698 /* 1699 * Just like caching_thread() doesn't want to deadlock on the extent 1700 * tree, we don't want to deadlock on the free space tree. 1701 */ 1702 path->skip_locking = true; 1703 path->search_commit_root = true; 1704 path->reada = READA_FORWARD; 1705 1706 info = btrfs_search_free_space_info(NULL, block_group, path, 0); 1707 if (IS_ERR(info)) 1708 return PTR_ERR(info); 1709 1710 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 1711 flags = btrfs_free_space_flags(path->nodes[0], info); 1712 1713 /* 1714 * We left path pointing to the free space info item, so now 1715 * load_free_space_foo can just iterate through the free space tree from 1716 * there. 1717 */ 1718 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) 1719 return load_free_space_bitmaps(caching_ctl, path, extent_count); 1720 else 1721 return load_free_space_extents(caching_ctl, path, extent_count); 1722 } 1723 1724 static int delete_orphan_free_space_entries(struct btrfs_root *fst_root, 1725 struct btrfs_path *path, 1726 u64 first_bg_bytenr) 1727 { 1728 struct btrfs_trans_handle *trans; 1729 int ret; 1730 1731 trans = btrfs_start_transaction(fst_root, 1); 1732 if (IS_ERR(trans)) 1733 return PTR_ERR(trans); 1734 1735 while (true) { 1736 struct btrfs_key key = { 0 }; 1737 int i; 1738 1739 ret = btrfs_search_slot(trans, fst_root, &key, path, -1, 1); 1740 if (ret < 0) 1741 break; 1742 ASSERT(ret > 0); 1743 ret = 0; 1744 for (i = 0; i < btrfs_header_nritems(path->nodes[0]); i++) { 1745 btrfs_item_key_to_cpu(path->nodes[0], &key, i); 1746 if (key.objectid >= first_bg_bytenr) { 1747 /* 1748 * Only break the for() loop and continue to 1749 * delete items. 1750 */ 1751 break; 1752 } 1753 } 1754 /* No items to delete, finished. */ 1755 if (i == 0) 1756 break; 1757 1758 ret = btrfs_del_items(trans, fst_root, path, 0, i); 1759 if (ret < 0) 1760 break; 1761 btrfs_release_path(path); 1762 } 1763 btrfs_release_path(path); 1764 btrfs_end_transaction(trans); 1765 if (ret == 0) 1766 btrfs_info(fst_root->fs_info, "deleted orphan free space tree entries"); 1767 return ret; 1768 } 1769 1770 /* Remove any free space entry before the first block group. */ 1771 int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info) 1772 { 1773 BTRFS_PATH_AUTO_RELEASE(path); 1774 struct btrfs_key key = { 1775 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1776 .type = BTRFS_ROOT_ITEM_KEY, 1777 .offset = 0, 1778 }; 1779 struct btrfs_root *root; 1780 struct btrfs_block_group *bg; 1781 u64 first_bg_bytenr; 1782 int ret; 1783 1784 /* 1785 * Extent tree v2 has multiple global roots based on the block group. 1786 * This means we cannot easily grab the global free space tree and locate 1787 * orphan items. Furthermore this is still experimental, all users 1788 * should use the latest btrfs-progs anyway. 1789 */ 1790 if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) 1791 return 0; 1792 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) 1793 return 0; 1794 root = btrfs_global_root(fs_info, &key); 1795 if (!root) 1796 return 0; 1797 1798 key.objectid = 0; 1799 key.type = 0; 1800 key.offset = 0; 1801 1802 bg = btrfs_lookup_first_block_group(fs_info, 0); 1803 if (unlikely(!bg)) { 1804 btrfs_err(fs_info, "no block group found"); 1805 return -EUCLEAN; 1806 } 1807 first_bg_bytenr = bg->start; 1808 btrfs_put_block_group(bg); 1809 1810 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); 1811 if (ret < 0) 1812 return ret; 1813 /* There should not be an all-zero key in fst. */ 1814 ASSERT(ret > 0); 1815 1816 /* Empty free space tree. */ 1817 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) 1818 return 0; 1819 1820 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); 1821 if (key.objectid >= first_bg_bytenr) 1822 return 0; 1823 btrfs_release_path(&path); 1824 return delete_orphan_free_space_entries(root, &path, first_bg_bytenr); 1825 } 1826