1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2015 Facebook. All rights reserved. 4 */ 5 6 #include <linux/kernel.h> 7 #include <linux/sched/mm.h> 8 #include "messages.h" 9 #include "ctree.h" 10 #include "disk-io.h" 11 #include "locking.h" 12 #include "free-space-tree.h" 13 #include "transaction.h" 14 #include "block-group.h" 15 #include "fs.h" 16 #include "accessors.h" 17 #include "extent-tree.h" 18 #include "root-tree.h" 19 20 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 21 struct btrfs_block_group *block_group, 22 struct btrfs_path *path); 23 24 static struct btrfs_root *btrfs_free_space_root( 25 struct btrfs_block_group *block_group) 26 { 27 struct btrfs_key key = { 28 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 29 .type = BTRFS_ROOT_ITEM_KEY, 30 .offset = 0, 31 }; 32 33 if (btrfs_fs_incompat(block_group->fs_info, EXTENT_TREE_V2)) 34 key.offset = block_group->global_root_id; 35 return btrfs_global_root(block_group->fs_info, &key); 36 } 37 38 void btrfs_set_free_space_tree_thresholds(struct btrfs_block_group *cache) 39 { 40 u32 bitmap_range; 41 size_t bitmap_size; 42 u64 num_bitmaps, total_bitmap_size; 43 44 if (WARN_ON(cache->length == 0)) 45 btrfs_warn(cache->fs_info, "block group %llu length is zero", 46 cache->start); 47 48 /* 49 * We convert to bitmaps when the disk space required for using extents 50 * exceeds that required for using bitmaps. 51 */ 52 bitmap_range = cache->fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 53 num_bitmaps = div_u64(cache->length + bitmap_range - 1, bitmap_range); 54 bitmap_size = sizeof(struct btrfs_item) + BTRFS_FREE_SPACE_BITMAP_SIZE; 55 total_bitmap_size = num_bitmaps * bitmap_size; 56 cache->bitmap_high_thresh = div_u64(total_bitmap_size, 57 sizeof(struct btrfs_item)); 58 59 /* 60 * We allow for a small buffer between the high threshold and low 61 * threshold to avoid thrashing back and forth between the two formats. 62 */ 63 if (cache->bitmap_high_thresh > 100) 64 cache->bitmap_low_thresh = cache->bitmap_high_thresh - 100; 65 else 66 cache->bitmap_low_thresh = 0; 67 } 68 69 static int add_new_free_space_info(struct btrfs_trans_handle *trans, 70 struct btrfs_block_group *block_group, 71 struct btrfs_path *path) 72 { 73 struct btrfs_root *root = btrfs_free_space_root(block_group); 74 struct btrfs_free_space_info *info; 75 struct btrfs_key key; 76 struct extent_buffer *leaf; 77 int ret; 78 79 key.objectid = block_group->start; 80 key.type = BTRFS_FREE_SPACE_INFO_KEY; 81 key.offset = block_group->length; 82 83 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*info)); 84 if (ret) 85 return ret; 86 87 leaf = path->nodes[0]; 88 info = btrfs_item_ptr(leaf, path->slots[0], 89 struct btrfs_free_space_info); 90 btrfs_set_free_space_extent_count(leaf, info, 0); 91 btrfs_set_free_space_flags(leaf, info, 0); 92 btrfs_release_path(path); 93 return 0; 94 } 95 96 EXPORT_FOR_TESTS 97 struct btrfs_free_space_info *btrfs_search_free_space_info( 98 struct btrfs_trans_handle *trans, 99 struct btrfs_block_group *block_group, 100 struct btrfs_path *path, int cow) 101 { 102 struct btrfs_fs_info *fs_info = block_group->fs_info; 103 struct btrfs_root *root = btrfs_free_space_root(block_group); 104 struct btrfs_key key; 105 int ret; 106 107 key.objectid = block_group->start; 108 key.type = BTRFS_FREE_SPACE_INFO_KEY; 109 key.offset = block_group->length; 110 111 ret = btrfs_search_slot(trans, root, &key, path, 0, cow); 112 if (ret < 0) 113 return ERR_PTR(ret); 114 if (ret != 0) { 115 btrfs_warn(fs_info, "missing free space info for %llu", 116 block_group->start); 117 DEBUG_WARN(); 118 return ERR_PTR(-ENOENT); 119 } 120 121 return btrfs_item_ptr(path->nodes[0], path->slots[0], 122 struct btrfs_free_space_info); 123 } 124 125 /* 126 * btrfs_search_slot() but we're looking for the greatest key less than the 127 * passed key. 128 */ 129 static int btrfs_search_prev_slot(struct btrfs_trans_handle *trans, 130 struct btrfs_root *root, 131 struct btrfs_key *key, struct btrfs_path *p, 132 int ins_len, int cow) 133 { 134 int ret; 135 136 ret = btrfs_search_slot(trans, root, key, p, ins_len, cow); 137 if (ret < 0) 138 return ret; 139 140 if (unlikely(ret == 0)) { 141 DEBUG_WARN(); 142 return -EIO; 143 } 144 145 if (unlikely(p->slots[0] == 0)) { 146 DEBUG_WARN("no previous slot found"); 147 return -EIO; 148 } 149 p->slots[0]--; 150 151 return 0; 152 } 153 154 static inline u32 free_space_bitmap_size(const struct btrfs_fs_info *fs_info, 155 u64 size) 156 { 157 return DIV_ROUND_UP(size >> fs_info->sectorsize_bits, BITS_PER_BYTE); 158 } 159 160 static unsigned long *alloc_bitmap(u32 bitmap_size) 161 { 162 unsigned long *ret; 163 unsigned int nofs_flag; 164 u32 bitmap_rounded_size = round_up(bitmap_size, sizeof(unsigned long)); 165 166 /* 167 * GFP_NOFS doesn't work with kvmalloc(), but we really can't recurse 168 * into the filesystem here. All callers hold a transaction handle 169 * open, so if a GFP_KERNEL allocation recurses into the filesystem 170 * and triggers a transaction commit, we would deadlock. 171 */ 172 nofs_flag = memalloc_nofs_save(); 173 ret = kvzalloc(bitmap_rounded_size, GFP_KERNEL); 174 memalloc_nofs_restore(nofs_flag); 175 return ret; 176 } 177 178 static void le_bitmap_set(unsigned long *map, unsigned int start, int len) 179 { 180 u8 *p = ((u8 *)map) + BIT_BYTE(start); 181 const unsigned int size = start + len; 182 int bits_to_set = BITS_PER_BYTE - (start % BITS_PER_BYTE); 183 u8 mask_to_set = BITMAP_FIRST_BYTE_MASK(start); 184 185 while (len - bits_to_set >= 0) { 186 *p |= mask_to_set; 187 len -= bits_to_set; 188 bits_to_set = BITS_PER_BYTE; 189 mask_to_set = ~0; 190 p++; 191 } 192 if (len) { 193 mask_to_set &= BITMAP_LAST_BYTE_MASK(size); 194 *p |= mask_to_set; 195 } 196 } 197 198 EXPORT_FOR_TESTS 199 int btrfs_convert_free_space_to_bitmaps(struct btrfs_trans_handle *trans, 200 struct btrfs_block_group *block_group, 201 struct btrfs_path *path) 202 { 203 struct btrfs_fs_info *fs_info = trans->fs_info; 204 struct btrfs_root *root = btrfs_free_space_root(block_group); 205 struct btrfs_free_space_info *info; 206 struct btrfs_key key, found_key; 207 struct extent_buffer *leaf; 208 unsigned long *bitmap; 209 char *bitmap_cursor; 210 u64 start, end; 211 u64 bitmap_range, i; 212 u32 bitmap_size, flags, expected_extent_count; 213 u32 extent_count = 0; 214 int done = 0, nr; 215 int ret; 216 217 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 218 bitmap = alloc_bitmap(bitmap_size); 219 if (unlikely(!bitmap)) 220 return 0; 221 222 start = block_group->start; 223 end = block_group->start + block_group->length; 224 225 key.objectid = end - 1; 226 key.type = (u8)-1; 227 key.offset = (u64)-1; 228 229 while (!done) { 230 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 231 if (unlikely(ret)) { 232 btrfs_abort_transaction(trans, ret); 233 goto out; 234 } 235 236 leaf = path->nodes[0]; 237 nr = 0; 238 path->slots[0]++; 239 while (path->slots[0] > 0) { 240 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 241 242 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 243 ASSERT(found_key.objectid == block_group->start); 244 ASSERT(found_key.offset == block_group->length); 245 done = 1; 246 break; 247 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY) { 248 u64 first, last; 249 250 ASSERT(found_key.objectid >= start); 251 ASSERT(found_key.objectid < end); 252 ASSERT(found_key.objectid + found_key.offset <= end); 253 254 first = div_u64(found_key.objectid - start, 255 fs_info->sectorsize); 256 last = div_u64(found_key.objectid + found_key.offset - start, 257 fs_info->sectorsize); 258 le_bitmap_set(bitmap, first, last - first); 259 260 extent_count++; 261 nr++; 262 path->slots[0]--; 263 } else { 264 ASSERT(0); 265 } 266 } 267 268 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 269 if (unlikely(ret)) { 270 btrfs_abort_transaction(trans, ret); 271 goto out; 272 } 273 btrfs_release_path(path); 274 } 275 276 info = btrfs_search_free_space_info(trans, block_group, path, 1); 277 if (IS_ERR(info)) { 278 ret = PTR_ERR(info); 279 btrfs_abort_transaction(trans, ret); 280 goto out; 281 } 282 leaf = path->nodes[0]; 283 flags = btrfs_free_space_flags(leaf, info); 284 flags |= BTRFS_FREE_SPACE_USING_BITMAPS; 285 block_group->using_free_space_bitmaps = true; 286 block_group->using_free_space_bitmaps_cached = true; 287 btrfs_set_free_space_flags(leaf, info, flags); 288 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 289 btrfs_release_path(path); 290 291 if (unlikely(extent_count != expected_extent_count)) { 292 btrfs_err(fs_info, 293 "incorrect extent count for %llu; counted %u, expected %u", 294 block_group->start, extent_count, 295 expected_extent_count); 296 ret = -EIO; 297 btrfs_abort_transaction(trans, ret); 298 goto out; 299 } 300 301 bitmap_cursor = (char *)bitmap; 302 bitmap_range = fs_info->sectorsize * BTRFS_FREE_SPACE_BITMAP_BITS; 303 i = start; 304 while (i < end) { 305 unsigned long ptr; 306 u64 extent_size; 307 u32 data_size; 308 309 extent_size = min(end - i, bitmap_range); 310 data_size = free_space_bitmap_size(fs_info, extent_size); 311 312 key.objectid = i; 313 key.type = BTRFS_FREE_SPACE_BITMAP_KEY; 314 key.offset = extent_size; 315 316 ret = btrfs_insert_empty_item(trans, root, path, &key, 317 data_size); 318 if (unlikely(ret)) { 319 btrfs_abort_transaction(trans, ret); 320 goto out; 321 } 322 323 leaf = path->nodes[0]; 324 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 325 write_extent_buffer(leaf, bitmap_cursor, ptr, 326 data_size); 327 btrfs_release_path(path); 328 329 i += extent_size; 330 bitmap_cursor += data_size; 331 } 332 333 ret = 0; 334 out: 335 kvfree(bitmap); 336 return ret; 337 } 338 339 EXPORT_FOR_TESTS 340 int btrfs_convert_free_space_to_extents(struct btrfs_trans_handle *trans, 341 struct btrfs_block_group *block_group, 342 struct btrfs_path *path) 343 { 344 struct btrfs_fs_info *fs_info = trans->fs_info; 345 struct btrfs_root *root = btrfs_free_space_root(block_group); 346 struct btrfs_free_space_info *info; 347 struct btrfs_key key, found_key; 348 struct extent_buffer *leaf; 349 unsigned long *bitmap; 350 u64 start, end; 351 u32 bitmap_size, flags, expected_extent_count; 352 unsigned long nrbits, start_bit, end_bit; 353 u32 extent_count = 0; 354 int done = 0, nr; 355 int ret; 356 357 bitmap_size = free_space_bitmap_size(fs_info, block_group->length); 358 bitmap = alloc_bitmap(bitmap_size); 359 if (unlikely(!bitmap)) 360 return 0; 361 362 start = block_group->start; 363 end = block_group->start + block_group->length; 364 365 key.objectid = end - 1; 366 key.type = (u8)-1; 367 key.offset = (u64)-1; 368 369 while (!done) { 370 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 371 if (unlikely(ret)) { 372 btrfs_abort_transaction(trans, ret); 373 goto out; 374 } 375 376 leaf = path->nodes[0]; 377 nr = 0; 378 path->slots[0]++; 379 while (path->slots[0] > 0) { 380 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 381 382 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 383 ASSERT(found_key.objectid == block_group->start); 384 ASSERT(found_key.offset == block_group->length); 385 done = 1; 386 break; 387 } else if (found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 388 unsigned long ptr; 389 char *bitmap_cursor; 390 u32 bitmap_pos, data_size; 391 392 ASSERT(found_key.objectid >= start); 393 ASSERT(found_key.objectid < end); 394 ASSERT(found_key.objectid + found_key.offset <= end); 395 396 bitmap_pos = div_u64(found_key.objectid - start, 397 fs_info->sectorsize * 398 BITS_PER_BYTE); 399 bitmap_cursor = ((char *)bitmap) + bitmap_pos; 400 data_size = free_space_bitmap_size(fs_info, 401 found_key.offset); 402 403 path->slots[0]--; 404 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 405 read_extent_buffer(leaf, bitmap_cursor, ptr, 406 data_size); 407 408 nr++; 409 } else { 410 ASSERT(0); 411 } 412 } 413 414 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 415 if (unlikely(ret)) { 416 btrfs_abort_transaction(trans, ret); 417 goto out; 418 } 419 btrfs_release_path(path); 420 } 421 422 info = btrfs_search_free_space_info(trans, block_group, path, 1); 423 if (IS_ERR(info)) { 424 ret = PTR_ERR(info); 425 btrfs_abort_transaction(trans, ret); 426 goto out; 427 } 428 leaf = path->nodes[0]; 429 flags = btrfs_free_space_flags(leaf, info); 430 flags &= ~BTRFS_FREE_SPACE_USING_BITMAPS; 431 block_group->using_free_space_bitmaps = false; 432 block_group->using_free_space_bitmaps_cached = true; 433 btrfs_set_free_space_flags(leaf, info, flags); 434 expected_extent_count = btrfs_free_space_extent_count(leaf, info); 435 btrfs_release_path(path); 436 437 nrbits = block_group->length >> fs_info->sectorsize_bits; 438 start_bit = find_next_bit_le(bitmap, nrbits, 0); 439 440 while (start_bit < nrbits) { 441 end_bit = find_next_zero_bit_le(bitmap, nrbits, start_bit); 442 ASSERT(start_bit < end_bit); 443 444 key.objectid = start + start_bit * fs_info->sectorsize; 445 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 446 key.offset = (end_bit - start_bit) * fs_info->sectorsize; 447 448 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 449 if (unlikely(ret)) { 450 btrfs_abort_transaction(trans, ret); 451 goto out; 452 } 453 btrfs_release_path(path); 454 455 extent_count++; 456 457 start_bit = find_next_bit_le(bitmap, nrbits, end_bit); 458 } 459 460 if (unlikely(extent_count != expected_extent_count)) { 461 btrfs_err(fs_info, 462 "incorrect extent count for %llu; counted %u, expected %u", 463 block_group->start, extent_count, 464 expected_extent_count); 465 ret = -EIO; 466 btrfs_abort_transaction(trans, ret); 467 goto out; 468 } 469 470 ret = 0; 471 out: 472 kvfree(bitmap); 473 return ret; 474 } 475 476 static int update_free_space_extent_count(struct btrfs_trans_handle *trans, 477 struct btrfs_block_group *block_group, 478 struct btrfs_path *path, 479 int new_extents) 480 { 481 struct btrfs_free_space_info *info; 482 u32 flags; 483 u32 extent_count; 484 int ret = 0; 485 486 if (new_extents == 0) 487 return 0; 488 489 info = btrfs_search_free_space_info(trans, block_group, path, 1); 490 if (IS_ERR(info)) 491 return PTR_ERR(info); 492 493 flags = btrfs_free_space_flags(path->nodes[0], info); 494 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 495 496 extent_count += new_extents; 497 btrfs_set_free_space_extent_count(path->nodes[0], info, extent_count); 498 btrfs_release_path(path); 499 500 if (!(flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 501 extent_count > block_group->bitmap_high_thresh) { 502 ret = btrfs_convert_free_space_to_bitmaps(trans, block_group, path); 503 } else if ((flags & BTRFS_FREE_SPACE_USING_BITMAPS) && 504 extent_count < block_group->bitmap_low_thresh) { 505 ret = btrfs_convert_free_space_to_extents(trans, block_group, path); 506 } 507 508 return ret; 509 } 510 511 EXPORT_FOR_TESTS 512 bool btrfs_free_space_test_bit(struct btrfs_block_group *block_group, 513 struct btrfs_path *path, u64 offset) 514 { 515 struct extent_buffer *leaf; 516 struct btrfs_key key; 517 u64 found_start, found_end; 518 unsigned long ptr, i; 519 520 leaf = path->nodes[0]; 521 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 522 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 523 524 found_start = key.objectid; 525 found_end = key.objectid + key.offset; 526 ASSERT(offset >= found_start && offset < found_end); 527 528 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 529 i = div_u64(offset - found_start, 530 block_group->fs_info->sectorsize); 531 return extent_buffer_test_bit(leaf, ptr, i); 532 } 533 534 static void free_space_modify_bits(struct btrfs_trans_handle *trans, 535 struct btrfs_block_group *block_group, 536 struct btrfs_path *path, u64 *start, u64 *size, 537 bool set_bits) 538 { 539 struct btrfs_fs_info *fs_info = block_group->fs_info; 540 struct extent_buffer *leaf; 541 struct btrfs_key key; 542 u64 end = *start + *size; 543 u64 found_start, found_end; 544 unsigned long ptr, first, last; 545 546 leaf = path->nodes[0]; 547 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 548 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 549 550 found_start = key.objectid; 551 found_end = key.objectid + key.offset; 552 ASSERT(*start >= found_start && *start < found_end); 553 ASSERT(end > found_start); 554 555 if (end > found_end) 556 end = found_end; 557 558 ptr = btrfs_item_ptr_offset(leaf, path->slots[0]); 559 first = (*start - found_start) >> fs_info->sectorsize_bits; 560 last = (end - found_start) >> fs_info->sectorsize_bits; 561 if (set_bits) 562 extent_buffer_bitmap_set(leaf, ptr, first, last - first); 563 else 564 extent_buffer_bitmap_clear(leaf, ptr, first, last - first); 565 btrfs_mark_buffer_dirty(trans, leaf); 566 567 *size -= end - *start; 568 *start = end; 569 } 570 571 /* 572 * We can't use btrfs_next_item() in modify_free_space_bitmap() because 573 * btrfs_next_leaf() doesn't get the path for writing. We can forgo the fancy 574 * tree walking in btrfs_next_leaf() anyways because we know exactly what we're 575 * looking for. 576 */ 577 static int free_space_next_bitmap(struct btrfs_trans_handle *trans, 578 struct btrfs_root *root, struct btrfs_path *p) 579 { 580 struct btrfs_key key; 581 582 if (p->slots[0] + 1 < btrfs_header_nritems(p->nodes[0])) { 583 p->slots[0]++; 584 return 0; 585 } 586 587 btrfs_item_key_to_cpu(p->nodes[0], &key, p->slots[0]); 588 btrfs_release_path(p); 589 590 key.objectid += key.offset; 591 key.type = (u8)-1; 592 key.offset = (u64)-1; 593 594 return btrfs_search_prev_slot(trans, root, &key, p, 0, 1); 595 } 596 597 /* 598 * If remove is 1, then we are removing free space, thus clearing bits in the 599 * bitmap. If remove is 0, then we are adding free space, thus setting bits in 600 * the bitmap. 601 */ 602 static int modify_free_space_bitmap(struct btrfs_trans_handle *trans, 603 struct btrfs_block_group *block_group, 604 struct btrfs_path *path, 605 u64 start, u64 size, bool remove) 606 { 607 struct btrfs_root *root = btrfs_free_space_root(block_group); 608 struct btrfs_key key; 609 u64 end = start + size; 610 u64 cur_start, cur_size; 611 bool prev_bit_set = false; 612 bool next_bit_set = false; 613 int new_extents; 614 int ret; 615 616 /* 617 * Read the bit for the block immediately before the extent of space if 618 * that block is within the block group. 619 */ 620 if (start > block_group->start) { 621 u64 prev_block = start - block_group->fs_info->sectorsize; 622 623 key.objectid = prev_block; 624 key.type = (u8)-1; 625 key.offset = (u64)-1; 626 627 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 628 if (ret) 629 return ret; 630 631 prev_bit_set = btrfs_free_space_test_bit(block_group, path, prev_block); 632 633 /* The previous block may have been in the previous bitmap. */ 634 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 635 if (start >= key.objectid + key.offset) { 636 ret = free_space_next_bitmap(trans, root, path); 637 if (ret) 638 return ret; 639 } 640 } else { 641 key.objectid = start; 642 key.type = (u8)-1; 643 key.offset = (u64)-1; 644 645 ret = btrfs_search_prev_slot(trans, root, &key, path, 0, 1); 646 if (ret) 647 return ret; 648 } 649 650 /* 651 * Iterate over all of the bitmaps overlapped by the extent of space, 652 * clearing/setting bits as required. 653 */ 654 cur_start = start; 655 cur_size = size; 656 while (1) { 657 free_space_modify_bits(trans, block_group, path, &cur_start, 658 &cur_size, !remove); 659 if (cur_size == 0) 660 break; 661 ret = free_space_next_bitmap(trans, root, path); 662 if (ret) 663 return ret; 664 } 665 666 /* 667 * Read the bit for the block immediately after the extent of space if 668 * that block is within the block group. 669 */ 670 if (end < block_group->start + block_group->length) { 671 /* The next block may be in the next bitmap. */ 672 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 673 if (end >= key.objectid + key.offset) { 674 ret = free_space_next_bitmap(trans, root, path); 675 if (ret) 676 return ret; 677 } 678 679 next_bit_set = btrfs_free_space_test_bit(block_group, path, end); 680 } 681 682 if (remove) { 683 new_extents = -1; 684 if (prev_bit_set) { 685 /* Leftover on the left. */ 686 new_extents++; 687 } 688 if (next_bit_set) { 689 /* Leftover on the right. */ 690 new_extents++; 691 } 692 } else { 693 new_extents = 1; 694 if (prev_bit_set) { 695 /* Merging with neighbor on the left. */ 696 new_extents--; 697 } 698 if (next_bit_set) { 699 /* Merging with neighbor on the right. */ 700 new_extents--; 701 } 702 } 703 704 btrfs_release_path(path); 705 return update_free_space_extent_count(trans, block_group, path, new_extents); 706 } 707 708 static int remove_free_space_extent(struct btrfs_trans_handle *trans, 709 struct btrfs_block_group *block_group, 710 struct btrfs_path *path, 711 u64 start, u64 size) 712 { 713 struct btrfs_root *root = btrfs_free_space_root(block_group); 714 struct btrfs_key key; 715 u64 found_start, found_end; 716 u64 end = start + size; 717 int new_extents = -1; 718 int ret; 719 720 key.objectid = start; 721 key.type = (u8)-1; 722 key.offset = (u64)-1; 723 724 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 725 if (ret) 726 return ret; 727 728 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 729 730 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 731 732 found_start = key.objectid; 733 found_end = key.objectid + key.offset; 734 ASSERT(start >= found_start && end <= found_end); 735 736 /* 737 * Okay, now that we've found the free space extent which contains the 738 * free space that we are removing, there are four cases: 739 * 740 * 1. We're using the whole extent: delete the key we found and 741 * decrement the free space extent count. 742 * 2. We are using part of the extent starting at the beginning: delete 743 * the key we found and insert a new key representing the leftover at 744 * the end. There is no net change in the number of extents. 745 * 3. We are using part of the extent ending at the end: delete the key 746 * we found and insert a new key representing the leftover at the 747 * beginning. There is no net change in the number of extents. 748 * 4. We are using part of the extent in the middle: delete the key we 749 * found and insert two new keys representing the leftovers on each 750 * side. Where we used to have one extent, we now have two, so increment 751 * the extent count. We may need to convert the block group to bitmaps 752 * as a result. 753 */ 754 755 /* Delete the existing key (cases 1-4). */ 756 ret = btrfs_del_item(trans, root, path); 757 if (ret) 758 return ret; 759 760 /* Add a key for leftovers at the beginning (cases 3 and 4). */ 761 if (start > found_start) { 762 key.objectid = found_start; 763 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 764 key.offset = start - found_start; 765 766 btrfs_release_path(path); 767 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 768 if (ret) 769 return ret; 770 new_extents++; 771 } 772 773 /* Add a key for leftovers at the end (cases 2 and 4). */ 774 if (end < found_end) { 775 key.objectid = end; 776 key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 777 key.offset = found_end - end; 778 779 btrfs_release_path(path); 780 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 781 if (ret) 782 return ret; 783 new_extents++; 784 } 785 786 btrfs_release_path(path); 787 return update_free_space_extent_count(trans, block_group, path, new_extents); 788 } 789 790 static int using_bitmaps(struct btrfs_block_group *bg, struct btrfs_path *path) 791 { 792 struct btrfs_free_space_info *info; 793 u32 flags; 794 795 if (bg->using_free_space_bitmaps_cached) 796 return bg->using_free_space_bitmaps; 797 798 info = btrfs_search_free_space_info(NULL, bg, path, 0); 799 if (IS_ERR(info)) 800 return PTR_ERR(info); 801 flags = btrfs_free_space_flags(path->nodes[0], info); 802 btrfs_release_path(path); 803 804 bg->using_free_space_bitmaps = (flags & BTRFS_FREE_SPACE_USING_BITMAPS); 805 bg->using_free_space_bitmaps_cached = true; 806 807 return bg->using_free_space_bitmaps; 808 } 809 810 EXPORT_FOR_TESTS 811 int __btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 812 struct btrfs_block_group *block_group, 813 struct btrfs_path *path, u64 start, u64 size) 814 { 815 int ret; 816 817 ret = __add_block_group_free_space(trans, block_group, path); 818 if (ret) 819 return ret; 820 821 ret = using_bitmaps(block_group, path); 822 if (ret < 0) 823 return ret; 824 825 if (ret) 826 return modify_free_space_bitmap(trans, block_group, path, 827 start, size, true); 828 829 return remove_free_space_extent(trans, block_group, path, start, size); 830 } 831 832 int btrfs_remove_from_free_space_tree(struct btrfs_trans_handle *trans, 833 u64 start, u64 size) 834 { 835 struct btrfs_block_group *block_group; 836 BTRFS_PATH_AUTO_FREE(path); 837 int ret; 838 839 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 840 return 0; 841 842 path = btrfs_alloc_path(); 843 if (unlikely(!path)) { 844 ret = -ENOMEM; 845 btrfs_abort_transaction(trans, ret); 846 return ret; 847 } 848 849 block_group = btrfs_lookup_block_group(trans->fs_info, start); 850 if (unlikely(!block_group)) { 851 DEBUG_WARN("no block group found for start=%llu", start); 852 ret = -ENOENT; 853 btrfs_abort_transaction(trans, ret); 854 return ret; 855 } 856 857 mutex_lock(&block_group->free_space_lock); 858 ret = __btrfs_remove_from_free_space_tree(trans, block_group, path, start, size); 859 mutex_unlock(&block_group->free_space_lock); 860 if (ret) 861 btrfs_abort_transaction(trans, ret); 862 863 btrfs_put_block_group(block_group); 864 865 return ret; 866 } 867 868 static int add_free_space_extent(struct btrfs_trans_handle *trans, 869 struct btrfs_block_group *block_group, 870 struct btrfs_path *path, 871 u64 start, u64 size) 872 { 873 struct btrfs_root *root = btrfs_free_space_root(block_group); 874 struct btrfs_key key, new_key; 875 u64 found_start, found_end; 876 u64 end = start + size; 877 int new_extents = 1; 878 int ret; 879 880 /* 881 * We are adding a new extent of free space, but we need to merge 882 * extents. There are four cases here: 883 * 884 * 1. The new extent does not have any immediate neighbors to merge 885 * with: add the new key and increment the free space extent count. We 886 * may need to convert the block group to bitmaps as a result. 887 * 2. The new extent has an immediate neighbor before it: remove the 888 * previous key and insert a new key combining both of them. There is no 889 * net change in the number of extents. 890 * 3. The new extent has an immediate neighbor after it: remove the next 891 * key and insert a new key combining both of them. There is no net 892 * change in the number of extents. 893 * 4. The new extent has immediate neighbors on both sides: remove both 894 * of the keys and insert a new key combining all of them. Where we used 895 * to have two extents, we now have one, so decrement the extent count. 896 */ 897 898 new_key.objectid = start; 899 new_key.type = BTRFS_FREE_SPACE_EXTENT_KEY; 900 new_key.offset = size; 901 902 /* Search for a neighbor on the left. */ 903 if (start == block_group->start) 904 goto right; 905 key.objectid = start - 1; 906 key.type = (u8)-1; 907 key.offset = (u64)-1; 908 909 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 910 if (ret) 911 return ret; 912 913 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 914 915 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 916 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 917 btrfs_release_path(path); 918 goto right; 919 } 920 921 found_start = key.objectid; 922 found_end = key.objectid + key.offset; 923 ASSERT(found_start >= block_group->start && 924 found_end > block_group->start); 925 ASSERT(found_start < start && found_end <= start); 926 927 /* 928 * Delete the neighbor on the left and absorb it into the new key (cases 929 * 2 and 4). 930 */ 931 if (found_end == start) { 932 ret = btrfs_del_item(trans, root, path); 933 if (ret) 934 return ret; 935 new_key.objectid = found_start; 936 new_key.offset += key.offset; 937 new_extents--; 938 } 939 btrfs_release_path(path); 940 941 right: 942 /* Search for a neighbor on the right. */ 943 if (end == block_group->start + block_group->length) 944 goto insert; 945 key.objectid = end; 946 key.type = (u8)-1; 947 key.offset = (u64)-1; 948 949 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 950 if (ret) 951 return ret; 952 953 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 954 955 if (key.type != BTRFS_FREE_SPACE_EXTENT_KEY) { 956 ASSERT(key.type == BTRFS_FREE_SPACE_INFO_KEY); 957 btrfs_release_path(path); 958 goto insert; 959 } 960 961 found_start = key.objectid; 962 found_end = key.objectid + key.offset; 963 ASSERT(found_start >= block_group->start && 964 found_end > block_group->start); 965 ASSERT((found_start < start && found_end <= start) || 966 (found_start >= end && found_end > end)); 967 968 /* 969 * Delete the neighbor on the right and absorb it into the new key 970 * (cases 3 and 4). 971 */ 972 if (found_start == end) { 973 ret = btrfs_del_item(trans, root, path); 974 if (ret) 975 return ret; 976 new_key.offset += key.offset; 977 new_extents--; 978 } 979 btrfs_release_path(path); 980 981 insert: 982 /* Insert the new key (cases 1-4). */ 983 ret = btrfs_insert_empty_item(trans, root, path, &new_key, 0); 984 if (ret) 985 return ret; 986 987 btrfs_release_path(path); 988 return update_free_space_extent_count(trans, block_group, path, new_extents); 989 } 990 991 EXPORT_FOR_TESTS 992 int __btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 993 struct btrfs_block_group *block_group, 994 struct btrfs_path *path, u64 start, u64 size) 995 { 996 int ret; 997 998 ret = __add_block_group_free_space(trans, block_group, path); 999 if (ret) 1000 return ret; 1001 1002 ret = using_bitmaps(block_group, path); 1003 if (ret < 0) 1004 return ret; 1005 1006 if (ret) 1007 return modify_free_space_bitmap(trans, block_group, path, 1008 start, size, false); 1009 1010 return add_free_space_extent(trans, block_group, path, start, size); 1011 } 1012 1013 int btrfs_add_to_free_space_tree(struct btrfs_trans_handle *trans, 1014 u64 start, u64 size) 1015 { 1016 struct btrfs_block_group *block_group; 1017 BTRFS_PATH_AUTO_FREE(path); 1018 int ret; 1019 1020 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1021 return 0; 1022 1023 path = btrfs_alloc_path(); 1024 if (unlikely(!path)) { 1025 ret = -ENOMEM; 1026 btrfs_abort_transaction(trans, ret); 1027 return ret; 1028 } 1029 1030 block_group = btrfs_lookup_block_group(trans->fs_info, start); 1031 if (unlikely(!block_group)) { 1032 DEBUG_WARN("no block group found for start=%llu", start); 1033 ret = -ENOENT; 1034 btrfs_abort_transaction(trans, ret); 1035 return ret; 1036 } 1037 1038 mutex_lock(&block_group->free_space_lock); 1039 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, start, size); 1040 mutex_unlock(&block_group->free_space_lock); 1041 if (ret) 1042 btrfs_abort_transaction(trans, ret); 1043 1044 btrfs_put_block_group(block_group); 1045 1046 return ret; 1047 } 1048 1049 /* 1050 * Populate the free space tree by walking the extent tree. Operations on the 1051 * extent tree that happen as a result of writes to the free space tree will go 1052 * through the normal add/remove hooks. 1053 */ 1054 static int populate_free_space_tree(struct btrfs_trans_handle *trans, 1055 struct btrfs_block_group *block_group) 1056 { 1057 struct btrfs_root *extent_root; 1058 BTRFS_PATH_AUTO_FREE(path); 1059 BTRFS_PATH_AUTO_FREE(path2); 1060 struct btrfs_key key; 1061 u64 start, end; 1062 int ret; 1063 1064 path = btrfs_alloc_path(); 1065 if (!path) 1066 return -ENOMEM; 1067 1068 path2 = btrfs_alloc_path(); 1069 if (!path2) 1070 return -ENOMEM; 1071 1072 path->reada = READA_FORWARD; 1073 1074 ret = add_new_free_space_info(trans, block_group, path2); 1075 if (ret) 1076 return ret; 1077 1078 mutex_lock(&block_group->free_space_lock); 1079 1080 /* 1081 * Iterate through all of the extent and metadata items in this block 1082 * group, adding the free space between them and the free space at the 1083 * end. Note that EXTENT_ITEM and METADATA_ITEM are less than 1084 * BLOCK_GROUP_ITEM, so an extent may precede the block group that it's 1085 * contained in. 1086 */ 1087 key.objectid = block_group->start; 1088 key.type = BTRFS_EXTENT_ITEM_KEY; 1089 key.offset = 0; 1090 1091 extent_root = btrfs_extent_root(trans->fs_info, key.objectid); 1092 ret = btrfs_search_slot_for_read(extent_root, &key, path, 1, 0); 1093 if (ret < 0) 1094 goto out_locked; 1095 /* 1096 * If ret is 1 (no key found), it means this is an empty block group, 1097 * without any extents allocated from it and there's no block group 1098 * item (key BTRFS_BLOCK_GROUP_ITEM_KEY) located in the extent tree 1099 * because we are using the block group tree feature (so block group 1100 * items are stored in the block group tree) or this is a new block 1101 * group created in the current transaction and its block group item 1102 * was not yet inserted in the extent tree (that happens in 1103 * btrfs_create_pending_block_groups() -> insert_block_group_item()). 1104 * It also means there are no extents allocated for block groups with a 1105 * start offset beyond this block group's end offset (this is the last, 1106 * highest, block group). 1107 */ 1108 start = block_group->start; 1109 end = block_group->start + block_group->length; 1110 while (ret == 0) { 1111 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1112 1113 if (key.type == BTRFS_EXTENT_ITEM_KEY || 1114 key.type == BTRFS_METADATA_ITEM_KEY) { 1115 if (key.objectid >= end) 1116 break; 1117 1118 if (start < key.objectid) { 1119 ret = __btrfs_add_to_free_space_tree(trans, 1120 block_group, 1121 path2, start, 1122 key.objectid - 1123 start); 1124 if (ret) 1125 goto out_locked; 1126 } 1127 start = key.objectid; 1128 if (key.type == BTRFS_METADATA_ITEM_KEY) 1129 start += trans->fs_info->nodesize; 1130 else 1131 start += key.offset; 1132 } else if (key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 1133 if (key.objectid != block_group->start) 1134 break; 1135 } 1136 1137 ret = btrfs_next_item(extent_root, path); 1138 if (ret < 0) 1139 goto out_locked; 1140 } 1141 if (start < end) { 1142 ret = __btrfs_add_to_free_space_tree(trans, block_group, path2, 1143 start, end - start); 1144 if (ret) 1145 goto out_locked; 1146 } 1147 1148 ret = 0; 1149 out_locked: 1150 mutex_unlock(&block_group->free_space_lock); 1151 1152 return ret; 1153 } 1154 1155 int btrfs_create_free_space_tree(struct btrfs_fs_info *fs_info) 1156 { 1157 struct btrfs_trans_handle *trans; 1158 struct btrfs_root *tree_root = fs_info->tree_root; 1159 struct btrfs_root *free_space_root; 1160 struct btrfs_block_group *block_group; 1161 struct rb_node *node; 1162 int ret; 1163 1164 trans = btrfs_start_transaction(tree_root, 0); 1165 if (IS_ERR(trans)) 1166 return PTR_ERR(trans); 1167 1168 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1169 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1170 free_space_root = btrfs_create_tree(trans, 1171 BTRFS_FREE_SPACE_TREE_OBJECTID); 1172 if (IS_ERR(free_space_root)) { 1173 ret = PTR_ERR(free_space_root); 1174 btrfs_abort_transaction(trans, ret); 1175 btrfs_end_transaction(trans); 1176 goto out_clear; 1177 } 1178 ret = btrfs_global_root_insert(free_space_root); 1179 if (unlikely(ret)) { 1180 btrfs_put_root(free_space_root); 1181 btrfs_abort_transaction(trans, ret); 1182 btrfs_end_transaction(trans); 1183 goto out_clear; 1184 } 1185 1186 node = rb_first_cached(&fs_info->block_group_cache_tree); 1187 while (node) { 1188 block_group = rb_entry(node, struct btrfs_block_group, 1189 cache_node); 1190 ret = populate_free_space_tree(trans, block_group); 1191 if (unlikely(ret)) { 1192 btrfs_abort_transaction(trans, ret); 1193 btrfs_end_transaction(trans); 1194 goto out_clear; 1195 } 1196 node = rb_next(node); 1197 } 1198 1199 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1200 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1201 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1202 ret = btrfs_commit_transaction(trans); 1203 1204 /* 1205 * Now that we've committed the transaction any reading of our commit 1206 * root will be safe, so we can cache from the free space tree now. 1207 */ 1208 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1209 return ret; 1210 1211 out_clear: 1212 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1213 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1214 return ret; 1215 } 1216 1217 static int clear_free_space_tree(struct btrfs_trans_handle *trans, 1218 struct btrfs_root *root) 1219 { 1220 BTRFS_PATH_AUTO_FREE(path); 1221 struct btrfs_key key; 1222 struct rb_node *node; 1223 int nr; 1224 int ret; 1225 1226 path = btrfs_alloc_path(); 1227 if (!path) 1228 return -ENOMEM; 1229 1230 key.objectid = 0; 1231 key.type = 0; 1232 key.offset = 0; 1233 1234 while (1) { 1235 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1236 if (ret < 0) 1237 return ret; 1238 1239 nr = btrfs_header_nritems(path->nodes[0]); 1240 if (!nr) 1241 break; 1242 1243 path->slots[0] = 0; 1244 ret = btrfs_del_items(trans, root, path, 0, nr); 1245 if (ret) 1246 return ret; 1247 1248 btrfs_release_path(path); 1249 } 1250 1251 node = rb_first_cached(&trans->fs_info->block_group_cache_tree); 1252 while (node) { 1253 struct btrfs_block_group *bg; 1254 1255 bg = rb_entry(node, struct btrfs_block_group, cache_node); 1256 clear_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &bg->runtime_flags); 1257 node = rb_next(node); 1258 cond_resched(); 1259 } 1260 1261 return 0; 1262 } 1263 1264 int btrfs_delete_free_space_tree(struct btrfs_fs_info *fs_info) 1265 { 1266 struct btrfs_trans_handle *trans; 1267 struct btrfs_root *tree_root = fs_info->tree_root; 1268 struct btrfs_key key = { 1269 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1270 .type = BTRFS_ROOT_ITEM_KEY, 1271 .offset = 0, 1272 }; 1273 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1274 int ret; 1275 1276 trans = btrfs_start_transaction(tree_root, 0); 1277 if (IS_ERR(trans)) 1278 return PTR_ERR(trans); 1279 1280 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1281 btrfs_clear_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1282 1283 ret = clear_free_space_tree(trans, free_space_root); 1284 if (unlikely(ret)) { 1285 btrfs_abort_transaction(trans, ret); 1286 btrfs_end_transaction(trans); 1287 return ret; 1288 } 1289 1290 ret = btrfs_del_root(trans, &free_space_root->root_key); 1291 if (unlikely(ret)) { 1292 btrfs_abort_transaction(trans, ret); 1293 btrfs_end_transaction(trans); 1294 return ret; 1295 } 1296 1297 btrfs_global_root_delete(free_space_root); 1298 1299 spin_lock(&fs_info->trans_lock); 1300 list_del(&free_space_root->dirty_list); 1301 spin_unlock(&fs_info->trans_lock); 1302 1303 btrfs_tree_lock(free_space_root->node); 1304 btrfs_clear_buffer_dirty(trans, free_space_root->node); 1305 btrfs_tree_unlock(free_space_root->node); 1306 ret = btrfs_free_tree_block(trans, btrfs_root_id(free_space_root), 1307 free_space_root->node, 0, 1); 1308 btrfs_put_root(free_space_root); 1309 if (unlikely(ret < 0)) { 1310 btrfs_abort_transaction(trans, ret); 1311 btrfs_end_transaction(trans); 1312 return ret; 1313 } 1314 1315 return btrfs_commit_transaction(trans); 1316 } 1317 1318 int btrfs_rebuild_free_space_tree(struct btrfs_fs_info *fs_info) 1319 { 1320 struct btrfs_trans_handle *trans; 1321 struct btrfs_key key = { 1322 .objectid = BTRFS_FREE_SPACE_TREE_OBJECTID, 1323 .type = BTRFS_ROOT_ITEM_KEY, 1324 .offset = 0, 1325 }; 1326 struct btrfs_root *free_space_root = btrfs_global_root(fs_info, &key); 1327 struct rb_node *node; 1328 int ret; 1329 1330 trans = btrfs_start_transaction(free_space_root, 1); 1331 if (IS_ERR(trans)) 1332 return PTR_ERR(trans); 1333 1334 set_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1335 set_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1336 1337 ret = clear_free_space_tree(trans, free_space_root); 1338 if (unlikely(ret)) { 1339 btrfs_abort_transaction(trans, ret); 1340 btrfs_end_transaction(trans); 1341 return ret; 1342 } 1343 1344 node = rb_first_cached(&fs_info->block_group_cache_tree); 1345 while (node) { 1346 struct btrfs_block_group *block_group; 1347 1348 block_group = rb_entry(node, struct btrfs_block_group, 1349 cache_node); 1350 1351 if (test_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, 1352 &block_group->runtime_flags)) 1353 goto next; 1354 1355 ret = populate_free_space_tree(trans, block_group); 1356 if (unlikely(ret)) { 1357 btrfs_abort_transaction(trans, ret); 1358 btrfs_end_transaction(trans); 1359 return ret; 1360 } 1361 next: 1362 if (btrfs_should_end_transaction(trans)) { 1363 btrfs_end_transaction(trans); 1364 trans = btrfs_start_transaction(free_space_root, 1); 1365 if (IS_ERR(trans)) 1366 return PTR_ERR(trans); 1367 } 1368 node = rb_next(node); 1369 } 1370 1371 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE); 1372 btrfs_set_fs_compat_ro(fs_info, FREE_SPACE_TREE_VALID); 1373 clear_bit(BTRFS_FS_CREATING_FREE_SPACE_TREE, &fs_info->flags); 1374 1375 ret = btrfs_commit_transaction(trans); 1376 clear_bit(BTRFS_FS_FREE_SPACE_TREE_UNTRUSTED, &fs_info->flags); 1377 return ret; 1378 } 1379 1380 static int __add_block_group_free_space(struct btrfs_trans_handle *trans, 1381 struct btrfs_block_group *block_group, 1382 struct btrfs_path *path) 1383 { 1384 bool own_path = false; 1385 int ret; 1386 1387 if (!test_and_clear_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, 1388 &block_group->runtime_flags)) 1389 return 0; 1390 1391 /* 1392 * While rebuilding the free space tree we may allocate new metadata 1393 * block groups while modifying the free space tree. 1394 * 1395 * Because during the rebuild (at btrfs_rebuild_free_space_tree()) we 1396 * can use multiple transactions, every time btrfs_end_transaction() is 1397 * called at btrfs_rebuild_free_space_tree() we finish the creation of 1398 * new block groups by calling btrfs_create_pending_block_groups(), and 1399 * that in turn calls us, through add_block_group_free_space(), to add 1400 * a free space info item and a free space extent item for the block 1401 * group. 1402 * 1403 * Then later btrfs_rebuild_free_space_tree() may find such new block 1404 * groups and processes them with populate_free_space_tree(), which can 1405 * fail with EEXIST since there are already items for the block group in 1406 * the free space tree. Notice that we say "may find" because a new 1407 * block group may be added to the block groups rbtree in a node before 1408 * or after the block group currently being processed by the rebuild 1409 * process. So signal the rebuild process to skip such new block groups 1410 * if it finds them. 1411 */ 1412 set_bit(BLOCK_GROUP_FLAG_FREE_SPACE_ADDED, &block_group->runtime_flags); 1413 1414 if (!path) { 1415 path = btrfs_alloc_path(); 1416 if (unlikely(!path)) { 1417 btrfs_abort_transaction(trans, -ENOMEM); 1418 return -ENOMEM; 1419 } 1420 own_path = true; 1421 } 1422 1423 ret = add_new_free_space_info(trans, block_group, path); 1424 if (unlikely(ret)) { 1425 btrfs_abort_transaction(trans, ret); 1426 goto out; 1427 } 1428 1429 ret = __btrfs_add_to_free_space_tree(trans, block_group, path, 1430 block_group->start, block_group->length); 1431 if (ret) 1432 btrfs_abort_transaction(trans, ret); 1433 1434 out: 1435 if (own_path) 1436 btrfs_free_path(path); 1437 1438 return ret; 1439 } 1440 1441 int btrfs_add_block_group_free_space(struct btrfs_trans_handle *trans, 1442 struct btrfs_block_group *block_group) 1443 { 1444 int ret; 1445 1446 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1447 return 0; 1448 1449 mutex_lock(&block_group->free_space_lock); 1450 ret = __add_block_group_free_space(trans, block_group, NULL); 1451 mutex_unlock(&block_group->free_space_lock); 1452 return ret; 1453 } 1454 1455 int btrfs_remove_block_group_free_space(struct btrfs_trans_handle *trans, 1456 struct btrfs_block_group *block_group) 1457 { 1458 struct btrfs_root *root = btrfs_free_space_root(block_group); 1459 BTRFS_PATH_AUTO_FREE(path); 1460 struct btrfs_key key, found_key; 1461 struct extent_buffer *leaf; 1462 u64 start, end; 1463 int done = 0, nr; 1464 int ret; 1465 1466 if (!btrfs_fs_compat_ro(trans->fs_info, FREE_SPACE_TREE)) 1467 return 0; 1468 1469 if (test_bit(BLOCK_GROUP_FLAG_NEEDS_FREE_SPACE, &block_group->runtime_flags)) { 1470 /* We never added this block group to the free space tree. */ 1471 return 0; 1472 } 1473 1474 path = btrfs_alloc_path(); 1475 if (unlikely(!path)) { 1476 ret = -ENOMEM; 1477 btrfs_abort_transaction(trans, ret); 1478 return ret; 1479 } 1480 1481 start = block_group->start; 1482 end = block_group->start + block_group->length; 1483 1484 key.objectid = end - 1; 1485 key.type = (u8)-1; 1486 key.offset = (u64)-1; 1487 1488 while (!done) { 1489 ret = btrfs_search_prev_slot(trans, root, &key, path, -1, 1); 1490 if (unlikely(ret)) { 1491 btrfs_abort_transaction(trans, ret); 1492 return ret; 1493 } 1494 1495 leaf = path->nodes[0]; 1496 nr = 0; 1497 path->slots[0]++; 1498 while (path->slots[0] > 0) { 1499 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0] - 1); 1500 1501 if (found_key.type == BTRFS_FREE_SPACE_INFO_KEY) { 1502 ASSERT(found_key.objectid == block_group->start); 1503 ASSERT(found_key.offset == block_group->length); 1504 done = 1; 1505 nr++; 1506 path->slots[0]--; 1507 break; 1508 } else if (found_key.type == BTRFS_FREE_SPACE_EXTENT_KEY || 1509 found_key.type == BTRFS_FREE_SPACE_BITMAP_KEY) { 1510 ASSERT(found_key.objectid >= start); 1511 ASSERT(found_key.objectid < end); 1512 ASSERT(found_key.objectid + found_key.offset <= end); 1513 nr++; 1514 path->slots[0]--; 1515 } else { 1516 ASSERT(0); 1517 } 1518 } 1519 1520 ret = btrfs_del_items(trans, root, path, path->slots[0], nr); 1521 if (unlikely(ret)) { 1522 btrfs_abort_transaction(trans, ret); 1523 return ret; 1524 } 1525 btrfs_release_path(path); 1526 } 1527 1528 ret = 0; 1529 1530 return ret; 1531 } 1532 1533 static int load_free_space_bitmaps(struct btrfs_caching_control *caching_ctl, 1534 struct btrfs_path *path, 1535 u32 expected_extent_count) 1536 { 1537 struct btrfs_block_group *block_group; 1538 struct btrfs_fs_info *fs_info; 1539 struct btrfs_root *root; 1540 struct btrfs_key key; 1541 bool prev_bit_set = false; 1542 /* Initialize to silence GCC. */ 1543 u64 extent_start = 0; 1544 u64 end, offset; 1545 u64 total_found = 0; 1546 u32 extent_count = 0; 1547 int ret; 1548 1549 block_group = caching_ctl->block_group; 1550 fs_info = block_group->fs_info; 1551 root = btrfs_free_space_root(block_group); 1552 1553 end = block_group->start + block_group->length; 1554 1555 while (1) { 1556 ret = btrfs_next_item(root, path); 1557 if (ret < 0) 1558 return ret; 1559 if (ret) 1560 break; 1561 1562 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1563 1564 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1565 break; 1566 1567 ASSERT(key.type == BTRFS_FREE_SPACE_BITMAP_KEY); 1568 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1569 1570 offset = key.objectid; 1571 while (offset < key.objectid + key.offset) { 1572 bool bit_set; 1573 1574 bit_set = btrfs_free_space_test_bit(block_group, path, offset); 1575 if (!prev_bit_set && bit_set) { 1576 extent_start = offset; 1577 } else if (prev_bit_set && !bit_set) { 1578 u64 space_added; 1579 1580 ret = btrfs_add_new_free_space(block_group, 1581 extent_start, 1582 offset, 1583 &space_added); 1584 if (ret) 1585 return ret; 1586 total_found += space_added; 1587 if (total_found > CACHING_CTL_WAKE_UP) { 1588 total_found = 0; 1589 wake_up(&caching_ctl->wait); 1590 } 1591 extent_count++; 1592 } 1593 prev_bit_set = bit_set; 1594 offset += fs_info->sectorsize; 1595 } 1596 } 1597 if (prev_bit_set) { 1598 ret = btrfs_add_new_free_space(block_group, extent_start, end, NULL); 1599 if (ret) 1600 return ret; 1601 extent_count++; 1602 } 1603 1604 if (unlikely(extent_count != expected_extent_count)) { 1605 btrfs_err(fs_info, 1606 "incorrect extent count for %llu; counted %u, expected %u", 1607 block_group->start, extent_count, 1608 expected_extent_count); 1609 DEBUG_WARN(); 1610 return -EIO; 1611 } 1612 1613 return 0; 1614 } 1615 1616 static int load_free_space_extents(struct btrfs_caching_control *caching_ctl, 1617 struct btrfs_path *path, 1618 u32 expected_extent_count) 1619 { 1620 struct btrfs_block_group *block_group; 1621 struct btrfs_fs_info *fs_info; 1622 struct btrfs_root *root; 1623 struct btrfs_key key; 1624 u64 end; 1625 u64 total_found = 0; 1626 u32 extent_count = 0; 1627 int ret; 1628 1629 block_group = caching_ctl->block_group; 1630 fs_info = block_group->fs_info; 1631 root = btrfs_free_space_root(block_group); 1632 1633 end = block_group->start + block_group->length; 1634 1635 while (1) { 1636 u64 space_added; 1637 1638 ret = btrfs_next_item(root, path); 1639 if (ret < 0) 1640 return ret; 1641 if (ret) 1642 break; 1643 1644 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 1645 1646 if (key.type == BTRFS_FREE_SPACE_INFO_KEY) 1647 break; 1648 1649 ASSERT(key.type == BTRFS_FREE_SPACE_EXTENT_KEY); 1650 ASSERT(key.objectid < end && key.objectid + key.offset <= end); 1651 1652 ret = btrfs_add_new_free_space(block_group, key.objectid, 1653 key.objectid + key.offset, 1654 &space_added); 1655 if (ret) 1656 return ret; 1657 total_found += space_added; 1658 if (total_found > CACHING_CTL_WAKE_UP) { 1659 total_found = 0; 1660 wake_up(&caching_ctl->wait); 1661 } 1662 extent_count++; 1663 } 1664 1665 if (unlikely(extent_count != expected_extent_count)) { 1666 btrfs_err(fs_info, 1667 "incorrect extent count for %llu; counted %u, expected %u", 1668 block_group->start, extent_count, 1669 expected_extent_count); 1670 DEBUG_WARN(); 1671 return -EIO; 1672 } 1673 1674 return 0; 1675 } 1676 1677 int btrfs_load_free_space_tree(struct btrfs_caching_control *caching_ctl) 1678 { 1679 struct btrfs_block_group *block_group; 1680 struct btrfs_free_space_info *info; 1681 BTRFS_PATH_AUTO_FREE(path); 1682 u32 extent_count, flags; 1683 1684 block_group = caching_ctl->block_group; 1685 1686 path = btrfs_alloc_path(); 1687 if (!path) 1688 return -ENOMEM; 1689 1690 /* 1691 * Just like caching_thread() doesn't want to deadlock on the extent 1692 * tree, we don't want to deadlock on the free space tree. 1693 */ 1694 path->skip_locking = true; 1695 path->search_commit_root = true; 1696 path->reada = READA_FORWARD; 1697 1698 info = btrfs_search_free_space_info(NULL, block_group, path, 0); 1699 if (IS_ERR(info)) 1700 return PTR_ERR(info); 1701 1702 extent_count = btrfs_free_space_extent_count(path->nodes[0], info); 1703 flags = btrfs_free_space_flags(path->nodes[0], info); 1704 1705 /* 1706 * We left path pointing to the free space info item, so now 1707 * load_free_space_foo can just iterate through the free space tree from 1708 * there. 1709 */ 1710 if (flags & BTRFS_FREE_SPACE_USING_BITMAPS) 1711 return load_free_space_bitmaps(caching_ctl, path, extent_count); 1712 else 1713 return load_free_space_extents(caching_ctl, path, extent_count); 1714 } 1715