1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2015 Facebook. All rights reserved. 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/sched/mm.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "free-space-tree.h" 13 #include "transaction.h" 14 #include "block-group.h" 15 #include "fs.h" 16 #include "accessors.h" 17 #include "extent-tree.h" 18 #include "root-tree.h" 19 20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 21 struct btrfs_block_group *block_group, 22 struct btrfs_path *path); 23 24 struct btrfs_root *btrfs_free_space_root(struct btrfs_block_group *block_group) 25 { 26 struct btrfs_key key = { 27 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 28 .type = BTRFS_ROOT_ITEM_KEY, 29 .offset = 0, 30 }; 31 32 if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2)) 33 key.offset = block_group->global_root_id; 34 return btrfs_global_root(block_group->fs_info, &key); 35 } 36 37 void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache) 38 { 39 u32 bitmap_range; 40 size_t bitmap_size; 41 u64 num_bitmaps, total_bitmap_size; 42 43 if (WARN_ON(cache->length == 0)) 44 btrfs_warn(cache->fs_info, "block group %llu length is zero", 45 cache->start); 46 47 /* 48 * We convert to bitmaps when the disk space required for using extents 49 * exceeds that required for using bitmaps. 50 */ 51 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 52 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range); 53 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE; 54 total_bitmap_size = num_bitmaps * bitmap_size; 55 cache->bitmap_high_thresh = div_u64(total_bitmap_size, 56 sizeof(struct btrfs_item)); 57 58 /* 59 * We allow for a small buffer between the high threshold and low 60 * threshold to avoid thrashing back and forth between the two formats. 61 */ 62 if (cache->bitmap_high_thresh > 100) 63 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100; 64 else 65 cache->bitmap_low_thresh = 0; 66 } 67 68 static int add_new_free_space_info(struct btrfs_trans_handle *trans, 69 struct btrfs_block_group *block_group, 70 struct btrfs_path *path) 71 { 72 struct btrfs_root *root = btrfs_free_space_root(block_group); 73 struct btrfs_free_space_info *info; 74 struct btrfs_key key; 75 struct extent_buffer *leaf; 76 int ret; 77 78 key.objectid = block_group->start; 79 key.type = BTRFS_FREE_SPACE_INFO_KEY; 80 key.offset = block_group->length; 81 82 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info)); 83 if (ret) 84 return ret; 85 86 leaf = path->nodes[0]; 87 info = btrfs_item_ptr(leaf, path->slots[0], 88 struct btrfs_free_space_info); 89 btrfs_set_free_space_extent_count(leaf, info, 0); 90 btrfs_set_free_space_flags(leaf, info, 0); 91 btrfs_release_path(path); 92 return 0; 93 } 94 95 struct btrfs_free_space_info *btrfs_search_free_space_info( 96 struct btrfs_trans_handle *trans, 97 struct btrfs_block_group *block_group, 98 struct btrfs_path *path, int cow) 99 { 100 struct btrfs_fs_info *fs_info = block_group->fs_info; 101 struct btrfs_root *root = btrfs_free_space_root(block_group); 102 struct btrfs_key key; 103 int ret; 104 105 key.objectid = block_group->start; 106 key.type = BTRFS_FREE_SPACE_INFO_KEY; 107 key.offset = block_group->length; 108 109 ret = btrfs_search_slot(trans, root, &key, path, 0, cow); 110 if (ret < 0) 111 return ERR_PTR(ret); 112 if (ret != 0) { 113 btrfs_warn(fs_info, "missing free space info for %llu", 114 block_group->start); 115 DEBUG_WARN(); 116 return ERR_PTR(-ENOENT); 117 } 118 119 return btrfs_item_ptr(path->nodes[0], path->slots[0], 120 struct btrfs_free_space_info); 121 } 122 123 /* 124 * btrfs_search_slot() but we're looking for the greatest key less than the 125 * passed key. 126 */ 127 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans, 128 struct btrfs_root *root, 129 struct btrfs_key *key, struct btrfs_path *p, 130 int ins_len, int cow) 131 { 132 int ret; 133 134 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow); 135 if (ret < 0) 136 return ret; 137 138 if (unlikely(ret == 0)) { 139 DEBUG_WARN(); 140 return -EIO; 141 } 142 143 if (unlikely(p->slots[0] == 0)) { 144 DEBUG_WARN("no previous slot found"); 145 return -EIO; 146 } 147 p->slots[0]--; 148 149 return 0; 150 } 151 152 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info, 153 u64 size) 154 { 155 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE); 156 } 157 158 static unsigned long *alloc_bitmap(u32 bitmap_size) 159 { 160 unsigned long *ret; 161 unsigned int nofs_flag; 162 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long)); 163 164 /* 165 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse 166 * into the filesystem here. All callers hold a transaction handle 167 * open, so if a GFP_KERNEL allocation recurses into the filesystem 168 * and triggers a transaction commit, we would deadlock. 169 */ 170 nofs_flag = memalloc_nofs_save(); 171 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL); 172 memalloc_nofs_restore(nofs_flag); 173 return ret; 174 } 175 176 static void le_bitmap_set(unsigned long *map, unsigned int start, int len) 177 { 178 u8 *p = ((u8 *)map) + BIT_BYTE(start); 179 const unsigned int size = start + len; 180 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE); 181 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start); 182 183 while (len - bits_to_set >= 0) { 184 *p |= mask_to_set; 185 len -= bits_to_set; 186 bits_to_set = BITS_PER_BYTE; 187 mask_to_set = ~0; 188 p++; 189 } 190 if (len) { 191 mask_to_set &= BITMAP_LAST_BYTE_MASK(size); 192 *p |= mask_to_set; 193 } 194 } 195 196 EXPORT_FOR_TESTS 197 int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, 198 struct btrfs_block_group *block_group, 199 struct btrfs_path *path) 200 { 201 struct btrfs_fs_info *fs_info = trans->fs_info; 202 struct btrfs_root *root = btrfs_free_space_root(block_group); 203 struct btrfs_free_space_info *info; 204 struct btrfs_key key, found_key; 205 struct extent_buffer *leaf; 206 unsigned long *bitmap; 207 char *bitmap_cursor; 208 u64 start, end; 209 u64 bitmap_range, i; 210 u32 bitmap_size, flags, expected_extent_count; 211 u32 extent_count = 0; 212 int done = 0, nr; 213 int ret; 214 215 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 216 bitmap = alloc_bitmap(bitmap_size); 217 if (unlikely(!bitmap)) 218 return 0; 219 220 start = block_group->start; 221 end = btrfs_block_group_end(block_group); 222 223 key.objectid = end - 1; 224 key.type = (u8)-1; 225 key.offset = (u64)-1; 226 227 while (!done) { 228 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 229 if (unlikely(ret)) { 230 btrfs_abort_transaction(trans, ret); 231 goto out; 232 } 233 234 leaf = path->nodes[0]; 235 nr = 0; 236 path->slots[0]++; 237 while (path->slots[0] > 0) { 238 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 239 240 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 241 ASSERT(found_key.objectid == block_group->start); 242 ASSERT(found_key.offset == block_group->length); 243 done = 1; 244 break; 245 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) { 246 u64 first, last; 247 248 ASSERT(found_key.objectid >= start); 249 ASSERT(found_key.objectid < end); 250 ASSERT(found_key.objectid + found_key.offset <= end); 251 252 first = div_u64(found_key.objectid - start, 253 fs_info->sectorsize); 254 last = div_u64(found_key.objectid + found_key.offset - start, 255 fs_info->sectorsize); 256 le_bitmap_set(bitmap, first, last - first); 257 258 extent_count++; 259 nr++; 260 path->slots[0]--; 261 } else { 262 ASSERT(0); 263 } 264 } 265 266 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 267 if (unlikely(ret)) { 268 btrfs_abort_transaction(trans, ret); 269 goto out; 270 } 271 btrfs_release_path(path); 272 } 273 274 info = btrfs_search_free_space_info(trans, block_group, path, 1); 275 if (IS_ERR(info)) { 276 ret = PTR_ERR(info); 277 btrfs_abort_transaction(trans, ret); 278 goto out; 279 } 280 leaf = path->nodes[0]; 281 flags = btrfs_free_space_flags(leaf, info); 282 flags |= BTRFS_FREE_SPACE_USING_BITMAPS; 283 block_group->using_free_space_bitmaps = true; 284 block_group->using_free_space_bitmaps_cached = true; 285 btrfs_set_free_space_flags(leaf, info, flags); 286 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 287 btrfs_release_path(path); 288 289 if (unlikely(extent_count != expected_extent_count)) { 290 btrfs_err(fs_info, 291 "incorrect extent count for %llu; counted %u, expected %u", 292 block_group->start, extent_count, 293 expected_extent_count); 294 ret = -EIO; 295 btrfs_abort_transaction(trans, ret); 296 goto out; 297 } 298 299 bitmap_cursor = (char *)bitmap; 300 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 301 i = start; 302 while (i < end) { 303 unsigned long ptr; 304 u64 extent_size; 305 u32 data_size; 306 307 extent_size = min(end - i, bitmap_range); 308 data_size = free_space_bitmap_size(fs_info, extent_size); 309 310 key.objectid = i; 311 key.type = BTRFS_FREE_SPACE_BITMAP_KEY; 312 key.offset = extent_size; 313 314 ret = btrfs_insert_empty_item(trans, root, path, &key, 315 data_size); 316 if (unlikely(ret)) { 317 btrfs_abort_transaction(trans, ret); 318 goto out; 319 } 320 321 leaf = path->nodes[0]; 322 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 323 write_extent_buffer(leaf, bitmap_cursor, ptr, 324 data_size); 325 btrfs_release_path(path); 326 327 i += extent_size; 328 bitmap_cursor += data_size; 329 } 330 331 ret = 0; 332 out: 333 kvfree(bitmap); 334 return ret; 335 } 336 337 EXPORT_FOR_TESTS 338 int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans, 339 struct btrfs_block_group *block_group, 340 struct btrfs_path *path) 341 { 342 struct btrfs_fs_info *fs_info = trans->fs_info; 343 struct btrfs_root *root = btrfs_free_space_root(block_group); 344 struct btrfs_free_space_info *info; 345 struct btrfs_key key, found_key; 346 struct extent_buffer *leaf; 347 unsigned long *bitmap; 348 u64 start, end; 349 u32 bitmap_size, flags, expected_extent_count; 350 unsigned long nrbits, start_bit, end_bit; 351 u32 extent_count = 0; 352 int done = 0, nr; 353 int ret; 354 355 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 356 bitmap = alloc_bitmap(bitmap_size); 357 if (unlikely(!bitmap)) 358 return 0; 359 360 start = block_group->start; 361 end = btrfs_block_group_end(block_group); 362 363 key.objectid = end - 1; 364 key.type = (u8)-1; 365 key.offset = (u64)-1; 366 367 while (!done) { 368 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 369 if (unlikely(ret)) { 370 btrfs_abort_transaction(trans, ret); 371 goto out; 372 } 373 374 leaf = path->nodes[0]; 375 nr = 0; 376 path->slots[0]++; 377 while (path->slots[0] > 0) { 378 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 379 380 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 381 ASSERT(found_key.objectid == block_group->start); 382 ASSERT(found_key.offset == block_group->length); 383 done = 1; 384 break; 385 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 386 unsigned long ptr; 387 char *bitmap_cursor; 388 u32 bitmap_pos, data_size; 389 390 ASSERT(found_key.objectid >= start); 391 ASSERT(found_key.objectid < end); 392 ASSERT(found_key.objectid + found_key.offset <= end); 393 394 bitmap_pos = div_u64(found_key.objectid - start, 395 fs_info->sectorsize * 396 BITS_PER_BYTE); 397 bitmap_cursor = ((char *)bitmap) + bitmap_pos; 398 data_size = free_space_bitmap_size(fs_info, 399 found_key.offset); 400 401 path->slots[0]--; 402 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 403 read_extent_buffer(leaf, bitmap_cursor, ptr, 404 data_size); 405 406 nr++; 407 } else { 408 ASSERT(0); 409 } 410 } 411 412 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 413 if (unlikely(ret)) { 414 btrfs_abort_transaction(trans, ret); 415 goto out; 416 } 417 btrfs_release_path(path); 418 } 419 420 info = btrfs_search_free_space_info(trans, block_group, path, 1); 421 if (IS_ERR(info)) { 422 ret = PTR_ERR(info); 423 btrfs_abort_transaction(trans, ret); 424 goto out; 425 } 426 leaf = path->nodes[0]; 427 flags = btrfs_free_space_flags(leaf, info); 428 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; 429 block_group->using_free_space_bitmaps = false; 430 block_group->using_free_space_bitmaps_cached = true; 431 btrfs_set_free_space_flags(leaf, info, flags); 432 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 433 btrfs_release_path(path); 434 435 nrbits = block_group->length >> fs_info->sectorsize_bits; 436 start_bit = find_next_bit_le(bitmap, nrbits, 0); 437 438 while (start_bit < nrbits) { 439 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit); 440 ASSERT(start_bit < end_bit); 441 442 key.objectid = start + start_bit * fs_info->sectorsize; 443 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 444 key.offset = (end_bit - start_bit) * fs_info->sectorsize; 445 446 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 447 if (unlikely(ret)) { 448 btrfs_abort_transaction(trans, ret); 449 goto out; 450 } 451 btrfs_release_path(path); 452 453 extent_count++; 454 455 start_bit = find_next_bit_le(bitmap, nrbits, end_bit); 456 } 457 458 if (unlikely(extent_count != expected_extent_count)) { 459 btrfs_err(fs_info, 460 "incorrect extent count for %llu; counted %u, expected %u", 461 block_group->start, extent_count, 462 expected_extent_count); 463 ret = -EIO; 464 btrfs_abort_transaction(trans, ret); 465 goto out; 466 } 467 468 ret = 0; 469 out: 470 kvfree(bitmap); 471 return ret; 472 } 473 474 static int update_free_space_extent_count(struct btrfs_trans_handle *trans, 475 struct btrfs_block_group *block_group, 476 struct btrfs_path *path, 477 int new_extents) 478 { 479 struct btrfs_free_space_info *info; 480 u32 flags; 481 u32 extent_count; 482 int ret = 0; 483 484 if (new_extents == 0) 485 return 0; 486 487 info = btrfs_search_free_space_info(trans, block_group, path, 1); 488 if (IS_ERR(info)) 489 return PTR_ERR(info); 490 491 flags = btrfs_free_space_flags(path->nodes[0], info); 492 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 493 494 extent_count += new_extents; 495 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count); 496 btrfs_release_path(path); 497 498 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 499 extent_count > block_group->bitmap_high_thresh) { 500 ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path); 501 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 502 extent_count < block_group->bitmap_low_thresh) { 503 ret = btrfs_convert_free_space_to_extents(trans, block_group, path); 504 } 505 506 return ret; 507 } 508 509 EXPORT_FOR_TESTS 510 bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group, 511 struct btrfs_path *path, u64 offset) 512 { 513 struct extent_buffer *leaf; 514 struct btrfs_key key; 515 u64 found_start, found_end; 516 unsigned long ptr, i; 517 518 leaf = path->nodes[0]; 519 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 520 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 521 522 found_start = key.objectid; 523 found_end = key.objectid + key.offset; 524 ASSERT(offset >= found_start && offset < found_end); 525 526 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 527 i = div_u64(offset - found_start, 528 block_group->fs_info->sectorsize); 529 return extent_buffer_test_bit(leaf, ptr, i); 530 } 531 532 static void free_space_modify_bits(struct btrfs_trans_handle *trans, 533 struct btrfs_block_group *block_group, 534 struct btrfs_path *path, u64 *start, u64 *size, 535 bool set_bits) 536 { 537 struct btrfs_fs_info *fs_info = block_group->fs_info; 538 struct extent_buffer *leaf; 539 struct btrfs_key key; 540 u64 end = *start + *size; 541 u64 found_start, found_end; 542 unsigned long ptr, first, last; 543 544 leaf = path->nodes[0]; 545 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 546 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 547 548 found_start = key.objectid; 549 found_end = key.objectid + key.offset; 550 ASSERT(*start >= found_start && *start < found_end); 551 ASSERT(end > found_start); 552 553 if (end > found_end) 554 end = found_end; 555 556 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 557 first = (*start - found_start) >> fs_info->sectorsize_bits; 558 last = (end - found_start) >> fs_info->sectorsize_bits; 559 if (set_bits) 560 extent_buffer_bitmap_set(leaf, ptr, first, last - first); 561 else 562 extent_buffer_bitmap_clear(leaf, ptr, first, last - first); 563 btrfs_mark_buffer_dirty(trans, leaf); 564 565 *size -= end - *start; 566 *start = end; 567 } 568 569 /* 570 * We can't use btrfs_next_item() in modify_free_space_bitmap() because 571 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy 572 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're 573 * looking for. 574 */ 575 static int free_space_next_bitmap(struct btrfs_trans_handle *trans, 576 struct btrfs_root *root, struct btrfs_path *p) 577 { 578 struct btrfs_key key; 579 580 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) { 581 p->slots[0]++; 582 return 0; 583 } 584 585 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]); 586 btrfs_release_path(p); 587 588 key.objectid += key.offset; 589 key.type = (u8)-1; 590 key.offset = (u64)-1; 591 592 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1); 593 } 594 595 /* 596 * If remove is 1, then we are removing free space, thus clearing bits in the 597 * bitmap. If remove is 0, then we are adding free space, thus setting bits in 598 * the bitmap. 599 */ 600 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, 601 struct btrfs_block_group *block_group, 602 struct btrfs_path *path, 603 u64 start, u64 size, bool remove) 604 { 605 struct btrfs_root *root = btrfs_free_space_root(block_group); 606 struct btrfs_key key; 607 u64 end = start + size; 608 u64 cur_start, cur_size; 609 bool prev_bit_set = false; 610 bool next_bit_set = false; 611 int new_extents; 612 int ret; 613 614 /* 615 * Read the bit for the block immediately before the extent of space if 616 * that block is within the block group. 617 */ 618 if (start > block_group->start) { 619 u64 prev_block = start - block_group->fs_info->sectorsize; 620 621 key.objectid = prev_block; 622 key.type = (u8)-1; 623 key.offset = (u64)-1; 624 625 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 626 if (ret) 627 return ret; 628 629 prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block); 630 631 /* The previous block may have been in the previous bitmap. */ 632 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 633 if (start >= key.objectid + key.offset) { 634 ret = free_space_next_bitmap(trans, root, path); 635 if (ret) 636 return ret; 637 } 638 } else { 639 key.objectid = start; 640 key.type = (u8)-1; 641 key.offset = (u64)-1; 642 643 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 644 if (ret) 645 return ret; 646 } 647 648 /* 649 * Iterate over all of the bitmaps overlapped by the extent of space, 650 * clearing/setting bits as required. 651 */ 652 cur_start = start; 653 cur_size = size; 654 while (1) { 655 free_space_modify_bits(trans, block_group, path, &cur_start, 656 &cur_size, !remove); 657 if (cur_size == 0) 658 break; 659 ret = free_space_next_bitmap(trans, root, path); 660 if (ret) 661 return ret; 662 } 663 664 /* 665 * Read the bit for the block immediately after the extent of space if 666 * that block is within the block group. 667 */ 668 if (end < btrfs_block_group_end(block_group)) { 669 /* The next block may be in the next bitmap. */ 670 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 671 if (end >= key.objectid + key.offset) { 672 ret = free_space_next_bitmap(trans, root, path); 673 if (ret) 674 return ret; 675 } 676 677 next_bit_set = btrfs_free_space_test_bit(block_group, path, end); 678 } 679 680 if (remove) { 681 new_extents = -1; 682 if (prev_bit_set) { 683 /* Leftover on the left. */ 684 new_extents++; 685 } 686 if (next_bit_set) { 687 /* Leftover on the right. */ 688 new_extents++; 689 } 690 } else { 691 new_extents = 1; 692 if (prev_bit_set) { 693 /* Merging with neighbor on the left. */ 694 new_extents--; 695 } 696 if (next_bit_set) { 697 /* Merging with neighbor on the right. */ 698 new_extents--; 699 } 700 } 701 702 btrfs_release_path(path); 703 return update_free_space_extent_count(trans, block_group, path, new_extents); 704 } 705 706 static int remove_free_space_extent(struct btrfs_trans_handle *trans, 707 struct btrfs_block_group *block_group, 708 struct btrfs_path *path, 709 u64 start, u64 size) 710 { 711 struct btrfs_root *root = btrfs_free_space_root(block_group); 712 struct btrfs_key key; 713 u64 found_start, found_end; 714 u64 end = start + size; 715 int new_extents = -1; 716 int ret; 717 718 key.objectid = start; 719 key.type = (u8)-1; 720 key.offset = (u64)-1; 721 722 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 723 if (ret) 724 return ret; 725 726 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 727 728 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 729 730 found_start = key.objectid; 731 found_end = key.objectid + key.offset; 732 ASSERT(start >= found_start && end <= found_end); 733 734 /* 735 * Okay, now that we've found the free space extent which contains the 736 * free space that we are removing, there are four cases: 737 * 738 * 1. We're using the whole extent: delete the key we found and 739 * decrement the free space extent count. 740 * 2. We are using part of the extent starting at the beginning: delete 741 * the key we found and insert a new key representing the leftover at 742 * the end. There is no net change in the number of extents. 743 * 3. We are using part of the extent ending at the end: delete the key 744 * we found and insert a new key representing the leftover at the 745 * beginning. There is no net change in the number of extents. 746 * 4. We are using part of the extent in the middle: delete the key we 747 * found and insert two new keys representing the leftovers on each 748 * side. Where we used to have one extent, we now have two, so increment 749 * the extent count. We may need to convert the block group to bitmaps 750 * as a result. 751 */ 752 753 /* Delete the existing key (cases 1-4). */ 754 ret = btrfs_del_item(trans, root, path); 755 if (ret) 756 return ret; 757 758 /* Add a key for leftovers at the beginning (cases 3 and 4). */ 759 if (start > found_start) { 760 key.objectid = found_start; 761 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 762 key.offset = start - found_start; 763 764 btrfs_release_path(path); 765 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 766 if (ret) 767 return ret; 768 new_extents++; 769 } 770 771 /* Add a key for leftovers at the end (cases 2 and 4). */ 772 if (end < found_end) { 773 key.objectid = end; 774 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 775 key.offset = found_end - end; 776 777 btrfs_release_path(path); 778 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 779 if (ret) 780 return ret; 781 new_extents++; 782 } 783 784 btrfs_release_path(path); 785 return update_free_space_extent_count(trans, block_group, path, new_extents); 786 } 787 788 static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path) 789 { 790 struct btrfs_free_space_info *info; 791 u32 flags; 792 793 if (bg->using_free_space_bitmaps_cached) 794 return bg->using_free_space_bitmaps; 795 796 info = btrfs_search_free_space_info(NULL, bg, path, 0); 797 if (IS_ERR(info)) 798 return PTR_ERR(info); 799 flags = btrfs_free_space_flags(path->nodes[0], info); 800 btrfs_release_path(path); 801 802 bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS); 803 bg->using_free_space_bitmaps_cached = true; 804 805 return bg->using_free_space_bitmaps; 806 } 807 808 EXPORT_FOR_TESTS 809 int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 810 struct btrfs_block_group *block_group, 811 struct btrfs_path *path, u64 start, u64 size) 812 { 813 int ret; 814 815 ret = __add_block_group_free_space(trans, block_group, path); 816 if (ret) 817 return ret; 818 819 ret = using_bitmaps(block_group, path); 820 if (ret < 0) 821 return ret; 822 823 if (ret) 824 return modify_free_space_bitmap(trans, block_group, path, 825 start, size, true); 826 827 return remove_free_space_extent(trans, block_group, path, start, size); 828 } 829 830 int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 831 u64 start, u64 size) 832 { 833 struct btrfs_block_group *block_group; 834 BTRFS_PATH_AUTO_FREE(path); 835 int ret; 836 837 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 838 return 0; 839 840 path = btrfs_alloc_path(); 841 if (unlikely(!path)) { 842 ret = -ENOMEM; 843 btrfs_abort_transaction(trans, ret); 844 return ret; 845 } 846 847 block_group = btrfs_lookup_block_group(trans->fs_info, start); 848 if (unlikely(!block_group)) { 849 DEBUG_WARN("no block group found for start=%llu", start); 850 ret = -ENOENT; 851 btrfs_abort_transaction(trans, ret); 852 return ret; 853 } 854 855 mutex_lock(&block_group->free_space_lock); 856 ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size); 857 mutex_unlock(&block_group->free_space_lock); 858 if (ret) 859 btrfs_abort_transaction(trans, ret); 860 861 btrfs_put_block_group(block_group); 862 863 return ret; 864 } 865 866 static int add_free_space_extent(struct btrfs_trans_handle *trans, 867 struct btrfs_block_group *block_group, 868 struct btrfs_path *path, 869 u64 start, u64 size) 870 { 871 struct btrfs_root *root = btrfs_free_space_root(block_group); 872 struct btrfs_key key, new_key; 873 u64 found_start, found_end; 874 u64 end = start + size; 875 int new_extents = 1; 876 int ret; 877 878 /* 879 * We are adding a new extent of free space, but we need to merge 880 * extents. There are four cases here: 881 * 882 * 1. The new extent does not have any immediate neighbors to merge 883 * with: add the new key and increment the free space extent count. We 884 * may need to convert the block group to bitmaps as a result. 885 * 2. The new extent has an immediate neighbor before it: remove the 886 * previous key and insert a new key combining both of them. There is no 887 * net change in the number of extents. 888 * 3. The new extent has an immediate neighbor after it: remove the next 889 * key and insert a new key combining both of them. There is no net 890 * change in the number of extents. 891 * 4. The new extent has immediate neighbors on both sides: remove both 892 * of the keys and insert a new key combining all of them. Where we used 893 * to have two extents, we now have one, so decrement the extent count. 894 */ 895 896 new_key.objectid = start; 897 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 898 new_key.offset = size; 899 900 /* Search for a neighbor on the left. */ 901 if (start == block_group->start) 902 goto right; 903 key.objectid = start - 1; 904 key.type = (u8)-1; 905 key.offset = (u64)-1; 906 907 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 908 if (ret) 909 return ret; 910 911 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 912 913 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 914 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 915 btrfs_release_path(path); 916 goto right; 917 } 918 919 found_start = key.objectid; 920 found_end = key.objectid + key.offset; 921 ASSERT(found_start >= block_group->start && 922 found_end > block_group->start); 923 ASSERT(found_start < start && found_end <= start); 924 925 /* 926 * Delete the neighbor on the left and absorb it into the new key (cases 927 * 2 and 4). 928 */ 929 if (found_end == start) { 930 ret = btrfs_del_item(trans, root, path); 931 if (ret) 932 return ret; 933 new_key.objectid = found_start; 934 new_key.offset += key.offset; 935 new_extents--; 936 } 937 btrfs_release_path(path); 938 939 right: 940 /* Search for a neighbor on the right. */ 941 if (end == btrfs_block_group_end(block_group)) 942 goto insert; 943 key.objectid = end; 944 key.type = (u8)-1; 945 key.offset = (u64)-1; 946 947 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 948 if (ret) 949 return ret; 950 951 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 952 953 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 954 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 955 btrfs_release_path(path); 956 goto insert; 957 } 958 959 found_start = key.objectid; 960 found_end = key.objectid + key.offset; 961 ASSERT(found_start >= block_group->start && 962 found_end > block_group->start); 963 ASSERT((found_start < start && found_end <= start) || 964 (found_start >= end && found_end > end)); 965 966 /* 967 * Delete the neighbor on the right and absorb it into the new key 968 * (cases 3 and 4). 969 */ 970 if (found_start == end) { 971 ret = btrfs_del_item(trans, root, path); 972 if (ret) 973 return ret; 974 new_key.offset += key.offset; 975 new_extents--; 976 } 977 btrfs_release_path(path); 978 979 insert: 980 /* Insert the new key (cases 1-4). */ 981 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0); 982 if (ret) 983 return ret; 984 985 btrfs_release_path(path); 986 return update_free_space_extent_count(trans, block_group, path, new_extents); 987 } 988 989 EXPORT_FOR_TESTS 990 int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 991 struct btrfs_block_group *block_group, 992 struct btrfs_path *path, u64 start, u64 size) 993 { 994 int ret; 995 996 ret = __add_block_group_free_space(trans, block_group, path); 997 if (ret) 998 return ret; 999 1000 ret = using_bitmaps(block_group, path); 1001 if (ret < 0) 1002 return ret; 1003 1004 if (ret) 1005 return modify_free_space_bitmap(trans, block_group, path, 1006 start, size, false); 1007 1008 return add_free_space_extent(trans, block_group, path, start, size); 1009 } 1010 1011 int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 1012 u64 start, u64 size) 1013 { 1014 struct btrfs_block_group *block_group; 1015 BTRFS_PATH_AUTO_FREE(path); 1016 int ret; 1017 1018 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1019 return 0; 1020 1021 path = btrfs_alloc_path(); 1022 if (unlikely(!path)) { 1023 ret = -ENOMEM; 1024 btrfs_abort_transaction(trans, ret); 1025 return ret; 1026 } 1027 1028 block_group = btrfs_lookup_block_group(trans->fs_info, start); 1029 if (unlikely(!block_group)) { 1030 DEBUG_WARN("no block group found for start=%llu", start); 1031 ret = -ENOENT; 1032 btrfs_abort_transaction(trans, ret); 1033 return ret; 1034 } 1035 1036 mutex_lock(&block_group->free_space_lock); 1037 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size); 1038 mutex_unlock(&block_group->free_space_lock); 1039 if (ret) 1040 btrfs_abort_transaction(trans, ret); 1041 1042 btrfs_put_block_group(block_group); 1043 1044 return ret; 1045 } 1046 1047 /* 1048 * Populate the free space tree by walking the extent tree. Operations on the 1049 * extent tree that happen as a result of writes to the free space tree will go 1050 * through the normal add/remove hooks. 1051 */ 1052 static int populate_free_space_tree(struct btrfs_trans_handle *trans, 1053 struct btrfs_block_group *block_group) 1054 { 1055 struct btrfs_root *extent_root; 1056 BTRFS_PATH_AUTO_FREE(path); 1057 BTRFS_PATH_AUTO_FREE(path2); 1058 struct btrfs_key key; 1059 u64 start, end; 1060 int ret; 1061 1062 path = btrfs_alloc_path(); 1063 if (!path) 1064 return -ENOMEM; 1065 1066 path2 = btrfs_alloc_path(); 1067 if (!path2) 1068 return -ENOMEM; 1069 1070 path->reada = READA_FORWARD; 1071 1072 ret = add_new_free_space_info(trans, block_group, path2); 1073 if (ret) 1074 return ret; 1075 1076 mutex_lock(&block_group->free_space_lock); 1077 1078 /* 1079 * Iterate through all of the extent and metadata items in this block 1080 * group, adding the free space between them and the free space at the 1081 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than 1082 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's 1083 * contained in. 1084 */ 1085 key.objectid = block_group->start; 1086 key.type = BTRFS_EXTENT_ITEM_KEY; 1087 key.offset = 0; 1088 1089 extent_root = btrfs_extent_root(trans->fs_info, key.objectid); 1090 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); 1091 if (ret < 0) 1092 goto out_locked; 1093 /* 1094 * If ret is 1 (no key found), it means this is an empty block group, 1095 * without any extents allocated from it and there's no block group 1096 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree 1097 * because we are using the block group tree feature (so block group 1098 * items are stored in the block group tree) or this is a new block 1099 * group created in the current transaction and its block group item 1100 * was not yet inserted in the extent tree (that happens in 1101 * btrfs_create_pending_block_groups() -> insert_block_group_item()). 1102 * It also means there are no extents allocated for block groups with a 1103 * start offset beyond this block group's end offset (this is the last, 1104 * highest, block group). 1105 */ 1106 start = block_group->start; 1107 end = btrfs_block_group_end(block_group); 1108 while (ret == 0) { 1109 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1110 1111 if (key.type == BTRFS_EXTENT_ITEM_KEY || 1112 key.type == BTRFS_METADATA_ITEM_KEY) { 1113 if (key.objectid >= end) 1114 break; 1115 1116 if (start < key.objectid) { 1117 ret = __btrfs_add_to_free_space_tree(trans, 1118 block_group, 1119 path2, start, 1120 key.objectid - 1121 start); 1122 if (ret) 1123 goto out_locked; 1124 } 1125 start = key.objectid; 1126 if (key.type == BTRFS_METADATA_ITEM_KEY) 1127 start += trans->fs_info->nodesize; 1128 else 1129 start += key.offset; 1130 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 1131 if (key.objectid != block_group->start) 1132 break; 1133 } 1134 1135 ret = btrfs_next_item(extent_root, path); 1136 if (ret < 0) 1137 goto out_locked; 1138 } 1139 if (start < end) { 1140 ret = __btrfs_add_to_free_space_tree(trans, block_group, path2, 1141 start, end - start); 1142 if (ret) 1143 goto out_locked; 1144 } 1145 1146 ret = 0; 1147 out_locked: 1148 mutex_unlock(&block_group->free_space_lock); 1149 1150 return ret; 1151 } 1152 1153 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) 1154 { 1155 struct btrfs_trans_handle *trans; 1156 struct btrfs_root *tree_root = fs_info->tree_root; 1157 struct btrfs_root *free_space_root; 1158 struct btrfs_block_group *block_group; 1159 struct rb_node *node; 1160 int ret; 1161 1162 trans = btrfs_start_transaction(tree_root, 0); 1163 if (IS_ERR(trans)) 1164 return PTR_ERR(trans); 1165 1166 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1167 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1168 free_space_root = btrfs_create_tree(trans, 1169 BTRFS_FREE_SPACE_TREE_OBJECTID); 1170 if (IS_ERR(free_space_root)) { 1171 ret = PTR_ERR(free_space_root); 1172 btrfs_abort_transaction(trans, ret); 1173 btrfs_end_transaction(trans); 1174 goto out_clear; 1175 } 1176 ret = btrfs_global_root_insert(free_space_root); 1177 if (unlikely(ret)) { 1178 btrfs_put_root(free_space_root); 1179 btrfs_abort_transaction(trans, ret); 1180 btrfs_end_transaction(trans); 1181 goto out_clear; 1182 } 1183 1184 node = rb_first_cached(&fs_info->block_group_cache_tree); 1185 while (node) { 1186 block_group = rb_entry(node, struct btrfs_block_group, 1187 cache_node); 1188 ret = populate_free_space_tree(trans, block_group); 1189 if (unlikely(ret)) { 1190 btrfs_abort_transaction(trans, ret); 1191 btrfs_end_transaction(trans); 1192 goto out_clear; 1193 } 1194 node = rb_next(node); 1195 } 1196 1197 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1198 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1199 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1200 ret = btrfs_commit_transaction(trans); 1201 1202 /* 1203 * Now that we've committed the transaction any reading of our commit 1204 * root will be safe, so we can cache from the free space tree now. 1205 */ 1206 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1207 return ret; 1208 1209 out_clear: 1210 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1211 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1212 return ret; 1213 } 1214 1215 static int clear_free_space_tree(struct btrfs_trans_handle *trans, 1216 struct btrfs_root *root) 1217 { 1218 BTRFS_PATH_AUTO_FREE(path); 1219 struct btrfs_key key; 1220 struct rb_node *node; 1221 int nr; 1222 int ret; 1223 1224 path = btrfs_alloc_path(); 1225 if (!path) 1226 return -ENOMEM; 1227 1228 key.objectid = 0; 1229 key.type = 0; 1230 key.offset = 0; 1231 1232 while (1) { 1233 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1234 if (ret < 0) 1235 return ret; 1236 1237 nr = btrfs_header_nritems(path->nodes[0]); 1238 if (!nr) 1239 break; 1240 1241 path->slots[0] = 0; 1242 ret = btrfs_del_items(trans, root, path, 0, nr); 1243 if (ret) 1244 return ret; 1245 1246 btrfs_release_path(path); 1247 } 1248 1249 node = rb_first_cached(&trans->fs_info->block_group_cache_tree); 1250 while (node) { 1251 struct btrfs_block_group *bg; 1252 1253 bg = rb_entry(node, struct btrfs_block_group, cache_node); 1254 clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); 1255 node = rb_next(node); 1256 cond_resched(); 1257 } 1258 1259 return 0; 1260 } 1261 1262 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info) 1263 { 1264 struct btrfs_trans_handle *trans; 1265 struct btrfs_root *tree_root = fs_info->tree_root; 1266 struct btrfs_key key = { 1267 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1268 .type = BTRFS_ROOT_ITEM_KEY, 1269 .offset = 0, 1270 }; 1271 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1272 int ret; 1273 1274 trans = btrfs_start_transaction(tree_root, 0); 1275 if (IS_ERR(trans)) 1276 return PTR_ERR(trans); 1277 1278 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1279 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1280 1281 ret = clear_free_space_tree(trans, free_space_root); 1282 if (unlikely(ret)) { 1283 btrfs_abort_transaction(trans, ret); 1284 btrfs_end_transaction(trans); 1285 return ret; 1286 } 1287 1288 ret = btrfs_del_root(trans, &free_space_root->root_key); 1289 if (unlikely(ret)) { 1290 btrfs_abort_transaction(trans, ret); 1291 btrfs_end_transaction(trans); 1292 return ret; 1293 } 1294 1295 btrfs_global_root_delete(free_space_root); 1296 1297 spin_lock(&fs_info->trans_lock); 1298 list_del(&free_space_root->dirty_list); 1299 spin_unlock(&fs_info->trans_lock); 1300 1301 btrfs_tree_lock(free_space_root->node); 1302 btrfs_clear_buffer_dirty(trans, free_space_root->node); 1303 btrfs_tree_unlock(free_space_root->node); 1304 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), 1305 free_space_root->node, 0, 1); 1306 btrfs_put_root(free_space_root); 1307 if (unlikely(ret < 0)) { 1308 btrfs_abort_transaction(trans, ret); 1309 btrfs_end_transaction(trans); 1310 return ret; 1311 } 1312 1313 return btrfs_commit_transaction(trans); 1314 } 1315 1316 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) 1317 { 1318 struct btrfs_trans_handle *trans; 1319 struct btrfs_key key = { 1320 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1321 .type = BTRFS_ROOT_ITEM_KEY, 1322 .offset = 0, 1323 }; 1324 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1325 struct rb_node *node; 1326 int ret; 1327 1328 trans = btrfs_start_transaction(free_space_root, 1); 1329 if (IS_ERR(trans)) 1330 return PTR_ERR(trans); 1331 1332 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1333 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1334 1335 ret = clear_free_space_tree(trans, free_space_root); 1336 if (unlikely(ret)) { 1337 btrfs_abort_transaction(trans, ret); 1338 btrfs_end_transaction(trans); 1339 return ret; 1340 } 1341 1342 node = rb_first_cached(&fs_info->block_group_cache_tree); 1343 while (node) { 1344 struct btrfs_block_group *block_group; 1345 1346 block_group = rb_entry(node, struct btrfs_block_group, 1347 cache_node); 1348 1349 if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, 1350 &block_group->runtime_flags)) 1351 goto next; 1352 1353 ret = populate_free_space_tree(trans, block_group); 1354 if (unlikely(ret)) { 1355 btrfs_abort_transaction(trans, ret); 1356 btrfs_end_transaction(trans); 1357 return ret; 1358 } 1359 next: 1360 if (btrfs_should_end_transaction(trans)) { 1361 btrfs_end_transaction(trans); 1362 trans = btrfs_start_transaction(free_space_root, 1); 1363 if (IS_ERR(trans)) 1364 return PTR_ERR(trans); 1365 } 1366 node = rb_next(node); 1367 } 1368 1369 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1370 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1371 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1372 1373 ret = btrfs_commit_transaction(trans); 1374 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1375 return ret; 1376 } 1377 1378 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 1379 struct btrfs_block_group *block_group, 1380 struct btrfs_path *path) 1381 { 1382 bool own_path = false; 1383 int ret; 1384 1385 if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, 1386 &block_group->runtime_flags)) 1387 return 0; 1388 1389 /* 1390 * While rebuilding the free space tree we may allocate new metadata 1391 * block groups while modifying the free space tree. 1392 * 1393 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we 1394 * can use multiple transactions, every time btrfs_end_transaction() is 1395 * called at btrfs_rebuild_free_space_tree() we finish the creation of 1396 * new block groups by calling btrfs_create_pending_block_groups(), and 1397 * that in turn calls us, through btrfs_add_block_group_free_space(), 1398 * to add a free space info item and a free space extent item for the 1399 * block group. 1400 * 1401 * Then later btrfs_rebuild_free_space_tree() may find such new block 1402 * groups and processes them with populate_free_space_tree(), which can 1403 * fail with EEXIST since there are already items for the block group in 1404 * the free space tree. Notice that we say "may find" because a new 1405 * block group may be added to the block groups rbtree in a node before 1406 * or after the block group currently being processed by the rebuild 1407 * process. So signal the rebuild process to skip such new block groups 1408 * if it finds them. 1409 */ 1410 set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); 1411 1412 if (!path) { 1413 path = btrfs_alloc_path(); 1414 if (unlikely(!path)) { 1415 btrfs_abort_transaction(trans, -ENOMEM); 1416 return -ENOMEM; 1417 } 1418 own_path = true; 1419 } 1420 1421 ret = add_new_free_space_info(trans, block_group, path); 1422 if (unlikely(ret)) { 1423 btrfs_abort_transaction(trans, ret); 1424 goto out; 1425 } 1426 1427 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, 1428 block_group->start, block_group->length); 1429 if (ret) 1430 btrfs_abort_transaction(trans, ret); 1431 1432 out: 1433 if (own_path) 1434 btrfs_free_path(path); 1435 1436 return ret; 1437 } 1438 1439 int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans, 1440 struct btrfs_block_group *block_group) 1441 { 1442 int ret; 1443 1444 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1445 return 0; 1446 1447 mutex_lock(&block_group->free_space_lock); 1448 ret = __add_block_group_free_space(trans, block_group, NULL); 1449 mutex_unlock(&block_group->free_space_lock); 1450 return ret; 1451 } 1452 1453 int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, 1454 struct btrfs_block_group *block_group) 1455 { 1456 struct btrfs_root *root = btrfs_free_space_root(block_group); 1457 BTRFS_PATH_AUTO_FREE(path); 1458 struct btrfs_key key, found_key; 1459 struct extent_buffer *leaf; 1460 u64 start, end; 1461 int done = 0, nr; 1462 int ret; 1463 1464 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1465 return 0; 1466 1467 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { 1468 /* We never added this block group to the free space tree. */ 1469 return 0; 1470 } 1471 1472 path = btrfs_alloc_path(); 1473 if (unlikely(!path)) { 1474 ret = -ENOMEM; 1475 btrfs_abort_transaction(trans, ret); 1476 return ret; 1477 } 1478 1479 start = block_group->start; 1480 end = btrfs_block_group_end(block_group); 1481 1482 key.objectid = end - 1; 1483 key.type = (u8)-1; 1484 key.offset = (u64)-1; 1485 1486 while (!done) { 1487 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 1488 if (unlikely(ret)) { 1489 btrfs_abort_transaction(trans, ret); 1490 return ret; 1491 } 1492 1493 leaf = path->nodes[0]; 1494 nr = 0; 1495 path->slots[0]++; 1496 while (path->slots[0] > 0) { 1497 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 1498 1499 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 1500 ASSERT(found_key.objectid == block_group->start); 1501 ASSERT(found_key.offset == block_group->length); 1502 done = 1; 1503 nr++; 1504 path->slots[0]--; 1505 break; 1506 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY || 1507 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 1508 ASSERT(found_key.objectid >= start); 1509 ASSERT(found_key.objectid < end); 1510 ASSERT(found_key.objectid + found_key.offset <= end); 1511 nr++; 1512 path->slots[0]--; 1513 } else { 1514 ASSERT(0); 1515 } 1516 } 1517 1518 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 1519 if (unlikely(ret)) { 1520 btrfs_abort_transaction(trans, ret); 1521 return ret; 1522 } 1523 btrfs_release_path(path); 1524 } 1525 1526 return 0; 1527 } 1528 1529 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, 1530 struct btrfs_path *path, 1531 u32 expected_extent_count) 1532 { 1533 struct btrfs_block_group *block_group = caching_ctl->block_group; 1534 struct btrfs_fs_info *fs_info = block_group->fs_info; 1535 struct btrfs_root *root; 1536 struct btrfs_key key; 1537 bool prev_bit_set = false; 1538 /* Initialize to silence GCC. */ 1539 u64 extent_start = 0; 1540 const u64 end = btrfs_block_group_end(block_group); 1541 u64 offset; 1542 u64 total_found = 0; 1543 u32 extent_count = 0; 1544 int ret; 1545 1546 root = btrfs_free_space_root(block_group); 1547 1548 while (1) { 1549 ret = btrfs_next_item(root, path); 1550 if (ret < 0) 1551 return ret; 1552 if (ret) 1553 break; 1554 1555 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1556 1557 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1558 break; 1559 1560 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 1561 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1562 1563 offset = key.objectid; 1564 while (offset < key.objectid + key.offset) { 1565 bool bit_set; 1566 1567 bit_set = btrfs_free_space_test_bit(block_group, path, offset); 1568 if (!prev_bit_set && bit_set) { 1569 extent_start = offset; 1570 } else if (prev_bit_set && !bit_set) { 1571 u64 space_added; 1572 1573 ret = btrfs_add_new_free_space(block_group, 1574 extent_start, 1575 offset, 1576 &space_added); 1577 if (ret) 1578 return ret; 1579 total_found += space_added; 1580 if (total_found > CACHING_CTL_WAKE_UP) { 1581 total_found = 0; 1582 wake_up(&caching_ctl->wait); 1583 } 1584 extent_count++; 1585 } 1586 prev_bit_set = bit_set; 1587 offset += fs_info->sectorsize; 1588 } 1589 } 1590 if (prev_bit_set) { 1591 ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL); 1592 if (ret) 1593 return ret; 1594 extent_count++; 1595 } 1596 1597 if (unlikely(extent_count != expected_extent_count)) { 1598 btrfs_err(fs_info, 1599 "incorrect extent count for %llu; counted %u, expected %u", 1600 block_group->start, extent_count, 1601 expected_extent_count); 1602 DEBUG_WARN(); 1603 return -EIO; 1604 } 1605 1606 return 0; 1607 } 1608 1609 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, 1610 struct btrfs_path *path, 1611 u32 expected_extent_count) 1612 { 1613 struct btrfs_block_group *block_group = caching_ctl->block_group; 1614 struct btrfs_fs_info *fs_info = block_group->fs_info; 1615 struct btrfs_root *root; 1616 struct btrfs_key key; 1617 const u64 end = btrfs_block_group_end(block_group); 1618 u64 total_found = 0; 1619 u32 extent_count = 0; 1620 int ret; 1621 1622 root = btrfs_free_space_root(block_group); 1623 1624 while (1) { 1625 u64 space_added; 1626 1627 ret = btrfs_next_item(root, path); 1628 if (ret < 0) 1629 return ret; 1630 if (ret) 1631 break; 1632 1633 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1634 1635 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1636 break; 1637 1638 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 1639 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1640 1641 ret = btrfs_add_new_free_space(block_group, key.objectid, 1642 key.objectid + key.offset, 1643 &space_added); 1644 if (ret) 1645 return ret; 1646 total_found += space_added; 1647 if (total_found > CACHING_CTL_WAKE_UP) { 1648 total_found = 0; 1649 wake_up(&caching_ctl->wait); 1650 } 1651 extent_count++; 1652 } 1653 1654 if (unlikely(extent_count != expected_extent_count)) { 1655 btrfs_err(fs_info, 1656 "incorrect extent count for %llu; counted %u, expected %u", 1657 block_group->start, extent_count, 1658 expected_extent_count); 1659 DEBUG_WARN(); 1660 return -EIO; 1661 } 1662 1663 return 0; 1664 } 1665 1666 int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl) 1667 { 1668 struct btrfs_block_group *block_group; 1669 struct btrfs_free_space_info *info; 1670 BTRFS_PATH_AUTO_FREE(path); 1671 u32 extent_count, flags; 1672 1673 block_group = caching_ctl->block_group; 1674 1675 path = btrfs_alloc_path(); 1676 if (!path) 1677 return -ENOMEM; 1678 1679 /* 1680 * Just like caching_thread() doesn't want to deadlock on the extent 1681 * tree, we don't want to deadlock on the free space tree. 1682 */ 1683 path->skip_locking = true; 1684 path->search_commit_root = true; 1685 path->reada = READA_FORWARD; 1686 1687 info = btrfs_search_free_space_info(NULL, block_group, path, 0); 1688 if (IS_ERR(info)) 1689 return PTR_ERR(info); 1690 1691 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 1692 flags = btrfs_free_space_flags(path->nodes[0], info); 1693 1694 /* 1695 * We left path pointing to the free space info item, so now 1696 * load_free_space_foo can just iterate through the free space tree from 1697 * there. 1698 */ 1699 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) 1700 return load_free_space_bitmaps(caching_ctl, path, extent_count); 1701 else 1702 return load_free_space_extents(caching_ctl, path, extent_count); 1703 } 1704 1705 static int delete_orphan_free_space_entries(struct btrfs_root *fst_root, 1706 struct btrfs_path *path, 1707 u64 first_bg_bytenr) 1708 { 1709 struct btrfs_trans_handle *trans; 1710 int ret; 1711 1712 trans = btrfs_start_transaction(fst_root, 1); 1713 if (IS_ERR(trans)) 1714 return PTR_ERR(trans); 1715 1716 while (true) { 1717 struct btrfs_key key = { 0 }; 1718 int i; 1719 1720 ret = btrfs_search_slot(trans, fst_root, &key, path, -1, 1); 1721 if (ret < 0) 1722 break; 1723 ASSERT(ret > 0); 1724 ret = 0; 1725 for (i = 0; i < btrfs_header_nritems(path->nodes[0]); i++) { 1726 btrfs_item_key_to_cpu(path->nodes[0], &key, i); 1727 if (key.objectid >= first_bg_bytenr) { 1728 /* 1729 * Only break the for() loop and continue to 1730 * delete items. 1731 */ 1732 break; 1733 } 1734 } 1735 /* No items to delete, finished. */ 1736 if (i == 0) 1737 break; 1738 1739 ret = btrfs_del_items(trans, fst_root, path, 0, i); 1740 if (ret < 0) 1741 break; 1742 btrfs_release_path(path); 1743 } 1744 btrfs_release_path(path); 1745 btrfs_end_transaction(trans); 1746 if (ret == 0) 1747 btrfs_info(fst_root->fs_info, "deleted orphan free space tree entries"); 1748 return ret; 1749 } 1750 1751 /* Remove any free space entry before the first block group. */ 1752 int btrfs_delete_orphan_free_space_entries(struct btrfs_fs_info *fs_info) 1753 { 1754 BTRFS_PATH_AUTO_RELEASE(path); 1755 struct btrfs_key key = { 1756 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1757 .type = BTRFS_ROOT_ITEM_KEY, 1758 .offset = 0, 1759 }; 1760 struct btrfs_root *root; 1761 struct btrfs_block_group *bg; 1762 u64 first_bg_bytenr; 1763 int ret; 1764 1765 /* 1766 * Extent tree v2 has multiple global roots based on the block group. 1767 * This means we cannot easily grab the global free space tree and locate 1768 * orphan items. Furthermore this is still experimental, all users 1769 * should use the latest btrfs-progs anyway. 1770 */ 1771 if (btrfs_fs_incompat(fs_info, EXTENT_TREE_V2)) 1772 return 0; 1773 if (!btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE)) 1774 return 0; 1775 root = btrfs_global_root(fs_info, &key); 1776 if (!root) 1777 return 0; 1778 1779 key.objectid = 0; 1780 key.type = 0; 1781 key.offset = 0; 1782 1783 bg = btrfs_lookup_first_block_group(fs_info, 0); 1784 if (unlikely(!bg)) { 1785 btrfs_err(fs_info, "no block group found"); 1786 return -EUCLEAN; 1787 } 1788 first_bg_bytenr = bg->start; 1789 btrfs_put_block_group(bg); 1790 1791 ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0); 1792 if (ret < 0) 1793 return ret; 1794 /* There should not be an all-zero key in fst. */ 1795 ASSERT(ret > 0); 1796 1797 /* Empty free space tree. */ 1798 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) 1799 return 0; 1800 1801 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]); 1802 if (key.objectid >= first_bg_bytenr) 1803 return 0; 1804 btrfs_release_path(&path); 1805 return delete_orphan_free_space_entries(root, &path, first_bg_bytenr); 1806 } 1807