1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include <linux/rcupdate.h> 24 #include <linux/kthread.h> 25 #include <linux/slab.h> 26 #include "compat.h" 27 #include "hash.h" 28 #include "ctree.h" 29 #include "disk-io.h" 30 #include "print-tree.h" 31 #include "transaction.h" 32 #include "volumes.h" 33 #include "locking.h" 34 #include "free-space-cache.h" 35 36 /* control flags for do_chunk_alloc's force field 37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk 38 * if we really need one. 39 * 40 * CHUNK_ALLOC_FORCE means it must try to allocate one 41 * 42 * CHUNK_ALLOC_LIMITED means to only try and allocate one 43 * if we have very few chunks already allocated. This is 44 * used as part of the clustering code to help make sure 45 * we have a good pool of storage to cluster in, without 46 * filling the FS with empty chunks 47 * 48 */ 49 enum { 50 CHUNK_ALLOC_NO_FORCE = 0, 51 CHUNK_ALLOC_FORCE = 1, 52 CHUNK_ALLOC_LIMITED = 2, 53 }; 54 55 static int update_block_group(struct btrfs_trans_handle *trans, 56 struct btrfs_root *root, 57 u64 bytenr, u64 num_bytes, int alloc); 58 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 59 struct btrfs_root *root, 60 u64 bytenr, u64 num_bytes, u64 parent, 61 u64 root_objectid, u64 owner_objectid, 62 u64 owner_offset, int refs_to_drop, 63 struct btrfs_delayed_extent_op *extra_op); 64 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 65 struct extent_buffer *leaf, 66 struct btrfs_extent_item *ei); 67 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 68 struct btrfs_root *root, 69 u64 parent, u64 root_objectid, 70 u64 flags, u64 owner, u64 offset, 71 struct btrfs_key *ins, int ref_mod); 72 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 73 struct btrfs_root *root, 74 u64 parent, u64 root_objectid, 75 u64 flags, struct btrfs_disk_key *key, 76 int level, struct btrfs_key *ins); 77 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 78 struct btrfs_root *extent_root, u64 alloc_bytes, 79 u64 flags, int force); 80 static int find_next_key(struct btrfs_path *path, int level, 81 struct btrfs_key *key); 82 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 83 int dump_block_groups); 84 85 static noinline int 86 block_group_cache_done(struct btrfs_block_group_cache *cache) 87 { 88 smp_mb(); 89 return cache->cached == BTRFS_CACHE_FINISHED; 90 } 91 92 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 93 { 94 return (cache->flags & bits) == bits; 95 } 96 97 static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 98 { 99 atomic_inc(&cache->count); 100 } 101 102 void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 103 { 104 if (atomic_dec_and_test(&cache->count)) { 105 WARN_ON(cache->pinned > 0); 106 WARN_ON(cache->reserved > 0); 107 WARN_ON(cache->reserved_pinned > 0); 108 kfree(cache->free_space_ctl); 109 kfree(cache); 110 } 111 } 112 113 /* 114 * this adds the block group to the fs_info rb tree for the block group 115 * cache 116 */ 117 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 118 struct btrfs_block_group_cache *block_group) 119 { 120 struct rb_node **p; 121 struct rb_node *parent = NULL; 122 struct btrfs_block_group_cache *cache; 123 124 spin_lock(&info->block_group_cache_lock); 125 p = &info->block_group_cache_tree.rb_node; 126 127 while (*p) { 128 parent = *p; 129 cache = rb_entry(parent, struct btrfs_block_group_cache, 130 cache_node); 131 if (block_group->key.objectid < cache->key.objectid) { 132 p = &(*p)->rb_left; 133 } else if (block_group->key.objectid > cache->key.objectid) { 134 p = &(*p)->rb_right; 135 } else { 136 spin_unlock(&info->block_group_cache_lock); 137 return -EEXIST; 138 } 139 } 140 141 rb_link_node(&block_group->cache_node, parent, p); 142 rb_insert_color(&block_group->cache_node, 143 &info->block_group_cache_tree); 144 spin_unlock(&info->block_group_cache_lock); 145 146 return 0; 147 } 148 149 /* 150 * This will return the block group at or after bytenr if contains is 0, else 151 * it will return the block group that contains the bytenr 152 */ 153 static struct btrfs_block_group_cache * 154 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 155 int contains) 156 { 157 struct btrfs_block_group_cache *cache, *ret = NULL; 158 struct rb_node *n; 159 u64 end, start; 160 161 spin_lock(&info->block_group_cache_lock); 162 n = info->block_group_cache_tree.rb_node; 163 164 while (n) { 165 cache = rb_entry(n, struct btrfs_block_group_cache, 166 cache_node); 167 end = cache->key.objectid + cache->key.offset - 1; 168 start = cache->key.objectid; 169 170 if (bytenr < start) { 171 if (!contains && (!ret || start < ret->key.objectid)) 172 ret = cache; 173 n = n->rb_left; 174 } else if (bytenr > start) { 175 if (contains && bytenr <= end) { 176 ret = cache; 177 break; 178 } 179 n = n->rb_right; 180 } else { 181 ret = cache; 182 break; 183 } 184 } 185 if (ret) 186 btrfs_get_block_group(ret); 187 spin_unlock(&info->block_group_cache_lock); 188 189 return ret; 190 } 191 192 static int add_excluded_extent(struct btrfs_root *root, 193 u64 start, u64 num_bytes) 194 { 195 u64 end = start + num_bytes - 1; 196 set_extent_bits(&root->fs_info->freed_extents[0], 197 start, end, EXTENT_UPTODATE, GFP_NOFS); 198 set_extent_bits(&root->fs_info->freed_extents[1], 199 start, end, EXTENT_UPTODATE, GFP_NOFS); 200 return 0; 201 } 202 203 static void free_excluded_extents(struct btrfs_root *root, 204 struct btrfs_block_group_cache *cache) 205 { 206 u64 start, end; 207 208 start = cache->key.objectid; 209 end = start + cache->key.offset - 1; 210 211 clear_extent_bits(&root->fs_info->freed_extents[0], 212 start, end, EXTENT_UPTODATE, GFP_NOFS); 213 clear_extent_bits(&root->fs_info->freed_extents[1], 214 start, end, EXTENT_UPTODATE, GFP_NOFS); 215 } 216 217 static int exclude_super_stripes(struct btrfs_root *root, 218 struct btrfs_block_group_cache *cache) 219 { 220 u64 bytenr; 221 u64 *logical; 222 int stripe_len; 223 int i, nr, ret; 224 225 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { 226 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; 227 cache->bytes_super += stripe_len; 228 ret = add_excluded_extent(root, cache->key.objectid, 229 stripe_len); 230 BUG_ON(ret); 231 } 232 233 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 234 bytenr = btrfs_sb_offset(i); 235 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 236 cache->key.objectid, bytenr, 237 0, &logical, &nr, &stripe_len); 238 BUG_ON(ret); 239 240 while (nr--) { 241 cache->bytes_super += stripe_len; 242 ret = add_excluded_extent(root, logical[nr], 243 stripe_len); 244 BUG_ON(ret); 245 } 246 247 kfree(logical); 248 } 249 return 0; 250 } 251 252 static struct btrfs_caching_control * 253 get_caching_control(struct btrfs_block_group_cache *cache) 254 { 255 struct btrfs_caching_control *ctl; 256 257 spin_lock(&cache->lock); 258 if (cache->cached != BTRFS_CACHE_STARTED) { 259 spin_unlock(&cache->lock); 260 return NULL; 261 } 262 263 /* We're loading it the fast way, so we don't have a caching_ctl. */ 264 if (!cache->caching_ctl) { 265 spin_unlock(&cache->lock); 266 return NULL; 267 } 268 269 ctl = cache->caching_ctl; 270 atomic_inc(&ctl->count); 271 spin_unlock(&cache->lock); 272 return ctl; 273 } 274 275 static void put_caching_control(struct btrfs_caching_control *ctl) 276 { 277 if (atomic_dec_and_test(&ctl->count)) 278 kfree(ctl); 279 } 280 281 /* 282 * this is only called by cache_block_group, since we could have freed extents 283 * we need to check the pinned_extents for any extents that can't be used yet 284 * since their free space will be released as soon as the transaction commits. 285 */ 286 static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 287 struct btrfs_fs_info *info, u64 start, u64 end) 288 { 289 u64 extent_start, extent_end, size, total_added = 0; 290 int ret; 291 292 while (start < end) { 293 ret = find_first_extent_bit(info->pinned_extents, start, 294 &extent_start, &extent_end, 295 EXTENT_DIRTY | EXTENT_UPTODATE); 296 if (ret) 297 break; 298 299 if (extent_start <= start) { 300 start = extent_end + 1; 301 } else if (extent_start > start && extent_start < end) { 302 size = extent_start - start; 303 total_added += size; 304 ret = btrfs_add_free_space(block_group, start, 305 size); 306 BUG_ON(ret); 307 start = extent_end + 1; 308 } else { 309 break; 310 } 311 } 312 313 if (start < end) { 314 size = end - start; 315 total_added += size; 316 ret = btrfs_add_free_space(block_group, start, size); 317 BUG_ON(ret); 318 } 319 320 return total_added; 321 } 322 323 static int caching_kthread(void *data) 324 { 325 struct btrfs_block_group_cache *block_group = data; 326 struct btrfs_fs_info *fs_info = block_group->fs_info; 327 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; 328 struct btrfs_root *extent_root = fs_info->extent_root; 329 struct btrfs_path *path; 330 struct extent_buffer *leaf; 331 struct btrfs_key key; 332 u64 total_found = 0; 333 u64 last = 0; 334 u32 nritems; 335 int ret = 0; 336 337 path = btrfs_alloc_path(); 338 if (!path) 339 return -ENOMEM; 340 341 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 342 343 /* 344 * We don't want to deadlock with somebody trying to allocate a new 345 * extent for the extent root while also trying to search the extent 346 * root to add free space. So we skip locking and search the commit 347 * root, since its read-only 348 */ 349 path->skip_locking = 1; 350 path->search_commit_root = 1; 351 path->reada = 1; 352 353 key.objectid = last; 354 key.offset = 0; 355 key.type = BTRFS_EXTENT_ITEM_KEY; 356 again: 357 mutex_lock(&caching_ctl->mutex); 358 /* need to make sure the commit_root doesn't disappear */ 359 down_read(&fs_info->extent_commit_sem); 360 361 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 362 if (ret < 0) 363 goto err; 364 365 leaf = path->nodes[0]; 366 nritems = btrfs_header_nritems(leaf); 367 368 while (1) { 369 if (btrfs_fs_closing(fs_info) > 1) { 370 last = (u64)-1; 371 break; 372 } 373 374 if (path->slots[0] < nritems) { 375 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 376 } else { 377 ret = find_next_key(path, 0, &key); 378 if (ret) 379 break; 380 381 if (need_resched() || 382 btrfs_next_leaf(extent_root, path)) { 383 caching_ctl->progress = last; 384 btrfs_release_path(path); 385 up_read(&fs_info->extent_commit_sem); 386 mutex_unlock(&caching_ctl->mutex); 387 cond_resched(); 388 goto again; 389 } 390 leaf = path->nodes[0]; 391 nritems = btrfs_header_nritems(leaf); 392 continue; 393 } 394 395 if (key.objectid < block_group->key.objectid) { 396 path->slots[0]++; 397 continue; 398 } 399 400 if (key.objectid >= block_group->key.objectid + 401 block_group->key.offset) 402 break; 403 404 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 405 total_found += add_new_free_space(block_group, 406 fs_info, last, 407 key.objectid); 408 last = key.objectid + key.offset; 409 410 if (total_found > (1024 * 1024 * 2)) { 411 total_found = 0; 412 wake_up(&caching_ctl->wait); 413 } 414 } 415 path->slots[0]++; 416 } 417 ret = 0; 418 419 total_found += add_new_free_space(block_group, fs_info, last, 420 block_group->key.objectid + 421 block_group->key.offset); 422 caching_ctl->progress = (u64)-1; 423 424 spin_lock(&block_group->lock); 425 block_group->caching_ctl = NULL; 426 block_group->cached = BTRFS_CACHE_FINISHED; 427 spin_unlock(&block_group->lock); 428 429 err: 430 btrfs_free_path(path); 431 up_read(&fs_info->extent_commit_sem); 432 433 free_excluded_extents(extent_root, block_group); 434 435 mutex_unlock(&caching_ctl->mutex); 436 wake_up(&caching_ctl->wait); 437 438 put_caching_control(caching_ctl); 439 atomic_dec(&block_group->space_info->caching_threads); 440 btrfs_put_block_group(block_group); 441 442 return 0; 443 } 444 445 static int cache_block_group(struct btrfs_block_group_cache *cache, 446 struct btrfs_trans_handle *trans, 447 struct btrfs_root *root, 448 int load_cache_only) 449 { 450 struct btrfs_fs_info *fs_info = cache->fs_info; 451 struct btrfs_caching_control *caching_ctl; 452 struct task_struct *tsk; 453 int ret = 0; 454 455 smp_mb(); 456 if (cache->cached != BTRFS_CACHE_NO) 457 return 0; 458 459 /* 460 * We can't do the read from on-disk cache during a commit since we need 461 * to have the normal tree locking. Also if we are currently trying to 462 * allocate blocks for the tree root we can't do the fast caching since 463 * we likely hold important locks. 464 */ 465 if (trans && (!trans->transaction->in_commit) && 466 (root && root != root->fs_info->tree_root)) { 467 spin_lock(&cache->lock); 468 if (cache->cached != BTRFS_CACHE_NO) { 469 spin_unlock(&cache->lock); 470 return 0; 471 } 472 cache->cached = BTRFS_CACHE_STARTED; 473 spin_unlock(&cache->lock); 474 475 ret = load_free_space_cache(fs_info, cache); 476 477 spin_lock(&cache->lock); 478 if (ret == 1) { 479 cache->cached = BTRFS_CACHE_FINISHED; 480 cache->last_byte_to_unpin = (u64)-1; 481 } else { 482 cache->cached = BTRFS_CACHE_NO; 483 } 484 spin_unlock(&cache->lock); 485 if (ret == 1) { 486 free_excluded_extents(fs_info->extent_root, cache); 487 return 0; 488 } 489 } 490 491 if (load_cache_only) 492 return 0; 493 494 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); 495 BUG_ON(!caching_ctl); 496 497 INIT_LIST_HEAD(&caching_ctl->list); 498 mutex_init(&caching_ctl->mutex); 499 init_waitqueue_head(&caching_ctl->wait); 500 caching_ctl->block_group = cache; 501 caching_ctl->progress = cache->key.objectid; 502 /* one for caching kthread, one for caching block group list */ 503 atomic_set(&caching_ctl->count, 2); 504 505 spin_lock(&cache->lock); 506 if (cache->cached != BTRFS_CACHE_NO) { 507 spin_unlock(&cache->lock); 508 kfree(caching_ctl); 509 return 0; 510 } 511 cache->caching_ctl = caching_ctl; 512 cache->cached = BTRFS_CACHE_STARTED; 513 spin_unlock(&cache->lock); 514 515 down_write(&fs_info->extent_commit_sem); 516 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 517 up_write(&fs_info->extent_commit_sem); 518 519 atomic_inc(&cache->space_info->caching_threads); 520 btrfs_get_block_group(cache); 521 522 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 523 cache->key.objectid); 524 if (IS_ERR(tsk)) { 525 ret = PTR_ERR(tsk); 526 printk(KERN_ERR "error running thread %d\n", ret); 527 BUG(); 528 } 529 530 return ret; 531 } 532 533 /* 534 * return the block group that starts at or after bytenr 535 */ 536 static struct btrfs_block_group_cache * 537 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 538 { 539 struct btrfs_block_group_cache *cache; 540 541 cache = block_group_cache_tree_search(info, bytenr, 0); 542 543 return cache; 544 } 545 546 /* 547 * return the block group that contains the given bytenr 548 */ 549 struct btrfs_block_group_cache *btrfs_lookup_block_group( 550 struct btrfs_fs_info *info, 551 u64 bytenr) 552 { 553 struct btrfs_block_group_cache *cache; 554 555 cache = block_group_cache_tree_search(info, bytenr, 1); 556 557 return cache; 558 } 559 560 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 561 u64 flags) 562 { 563 struct list_head *head = &info->space_info; 564 struct btrfs_space_info *found; 565 566 flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | 567 BTRFS_BLOCK_GROUP_METADATA; 568 569 rcu_read_lock(); 570 list_for_each_entry_rcu(found, head, list) { 571 if (found->flags & flags) { 572 rcu_read_unlock(); 573 return found; 574 } 575 } 576 rcu_read_unlock(); 577 return NULL; 578 } 579 580 /* 581 * after adding space to the filesystem, we need to clear the full flags 582 * on all the space infos. 583 */ 584 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 585 { 586 struct list_head *head = &info->space_info; 587 struct btrfs_space_info *found; 588 589 rcu_read_lock(); 590 list_for_each_entry_rcu(found, head, list) 591 found->full = 0; 592 rcu_read_unlock(); 593 } 594 595 static u64 div_factor(u64 num, int factor) 596 { 597 if (factor == 10) 598 return num; 599 num *= factor; 600 do_div(num, 10); 601 return num; 602 } 603 604 static u64 div_factor_fine(u64 num, int factor) 605 { 606 if (factor == 100) 607 return num; 608 num *= factor; 609 do_div(num, 100); 610 return num; 611 } 612 613 u64 btrfs_find_block_group(struct btrfs_root *root, 614 u64 search_start, u64 search_hint, int owner) 615 { 616 struct btrfs_block_group_cache *cache; 617 u64 used; 618 u64 last = max(search_hint, search_start); 619 u64 group_start = 0; 620 int full_search = 0; 621 int factor = 9; 622 int wrapped = 0; 623 again: 624 while (1) { 625 cache = btrfs_lookup_first_block_group(root->fs_info, last); 626 if (!cache) 627 break; 628 629 spin_lock(&cache->lock); 630 last = cache->key.objectid + cache->key.offset; 631 used = btrfs_block_group_used(&cache->item); 632 633 if ((full_search || !cache->ro) && 634 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 635 if (used + cache->pinned + cache->reserved < 636 div_factor(cache->key.offset, factor)) { 637 group_start = cache->key.objectid; 638 spin_unlock(&cache->lock); 639 btrfs_put_block_group(cache); 640 goto found; 641 } 642 } 643 spin_unlock(&cache->lock); 644 btrfs_put_block_group(cache); 645 cond_resched(); 646 } 647 if (!wrapped) { 648 last = search_start; 649 wrapped = 1; 650 goto again; 651 } 652 if (!full_search && factor < 10) { 653 last = search_start; 654 full_search = 1; 655 factor = 10; 656 goto again; 657 } 658 found: 659 return group_start; 660 } 661 662 /* simple helper to search for an existing extent at a given offset */ 663 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 664 { 665 int ret; 666 struct btrfs_key key; 667 struct btrfs_path *path; 668 669 path = btrfs_alloc_path(); 670 BUG_ON(!path); 671 key.objectid = start; 672 key.offset = len; 673 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 674 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 675 0, 0); 676 btrfs_free_path(path); 677 return ret; 678 } 679 680 /* 681 * helper function to lookup reference count and flags of extent. 682 * 683 * the head node for delayed ref is used to store the sum of all the 684 * reference count modifications queued up in the rbtree. the head 685 * node may also store the extent flags to set. This way you can check 686 * to see what the reference count and extent flags would be if all of 687 * the delayed refs are not processed. 688 */ 689 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 690 struct btrfs_root *root, u64 bytenr, 691 u64 num_bytes, u64 *refs, u64 *flags) 692 { 693 struct btrfs_delayed_ref_head *head; 694 struct btrfs_delayed_ref_root *delayed_refs; 695 struct btrfs_path *path; 696 struct btrfs_extent_item *ei; 697 struct extent_buffer *leaf; 698 struct btrfs_key key; 699 u32 item_size; 700 u64 num_refs; 701 u64 extent_flags; 702 int ret; 703 704 path = btrfs_alloc_path(); 705 if (!path) 706 return -ENOMEM; 707 708 key.objectid = bytenr; 709 key.type = BTRFS_EXTENT_ITEM_KEY; 710 key.offset = num_bytes; 711 if (!trans) { 712 path->skip_locking = 1; 713 path->search_commit_root = 1; 714 } 715 again: 716 ret = btrfs_search_slot(trans, root->fs_info->extent_root, 717 &key, path, 0, 0); 718 if (ret < 0) 719 goto out_free; 720 721 if (ret == 0) { 722 leaf = path->nodes[0]; 723 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 724 if (item_size >= sizeof(*ei)) { 725 ei = btrfs_item_ptr(leaf, path->slots[0], 726 struct btrfs_extent_item); 727 num_refs = btrfs_extent_refs(leaf, ei); 728 extent_flags = btrfs_extent_flags(leaf, ei); 729 } else { 730 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 731 struct btrfs_extent_item_v0 *ei0; 732 BUG_ON(item_size != sizeof(*ei0)); 733 ei0 = btrfs_item_ptr(leaf, path->slots[0], 734 struct btrfs_extent_item_v0); 735 num_refs = btrfs_extent_refs_v0(leaf, ei0); 736 /* FIXME: this isn't correct for data */ 737 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 738 #else 739 BUG(); 740 #endif 741 } 742 BUG_ON(num_refs == 0); 743 } else { 744 num_refs = 0; 745 extent_flags = 0; 746 ret = 0; 747 } 748 749 if (!trans) 750 goto out; 751 752 delayed_refs = &trans->transaction->delayed_refs; 753 spin_lock(&delayed_refs->lock); 754 head = btrfs_find_delayed_ref_head(trans, bytenr); 755 if (head) { 756 if (!mutex_trylock(&head->mutex)) { 757 atomic_inc(&head->node.refs); 758 spin_unlock(&delayed_refs->lock); 759 760 btrfs_release_path(path); 761 762 /* 763 * Mutex was contended, block until it's released and try 764 * again 765 */ 766 mutex_lock(&head->mutex); 767 mutex_unlock(&head->mutex); 768 btrfs_put_delayed_ref(&head->node); 769 goto again; 770 } 771 if (head->extent_op && head->extent_op->update_flags) 772 extent_flags |= head->extent_op->flags_to_set; 773 else 774 BUG_ON(num_refs == 0); 775 776 num_refs += head->node.ref_mod; 777 mutex_unlock(&head->mutex); 778 } 779 spin_unlock(&delayed_refs->lock); 780 out: 781 WARN_ON(num_refs == 0); 782 if (refs) 783 *refs = num_refs; 784 if (flags) 785 *flags = extent_flags; 786 out_free: 787 btrfs_free_path(path); 788 return ret; 789 } 790 791 /* 792 * Back reference rules. Back refs have three main goals: 793 * 794 * 1) differentiate between all holders of references to an extent so that 795 * when a reference is dropped we can make sure it was a valid reference 796 * before freeing the extent. 797 * 798 * 2) Provide enough information to quickly find the holders of an extent 799 * if we notice a given block is corrupted or bad. 800 * 801 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 802 * maintenance. This is actually the same as #2, but with a slightly 803 * different use case. 804 * 805 * There are two kinds of back refs. The implicit back refs is optimized 806 * for pointers in non-shared tree blocks. For a given pointer in a block, 807 * back refs of this kind provide information about the block's owner tree 808 * and the pointer's key. These information allow us to find the block by 809 * b-tree searching. The full back refs is for pointers in tree blocks not 810 * referenced by their owner trees. The location of tree block is recorded 811 * in the back refs. Actually the full back refs is generic, and can be 812 * used in all cases the implicit back refs is used. The major shortcoming 813 * of the full back refs is its overhead. Every time a tree block gets 814 * COWed, we have to update back refs entry for all pointers in it. 815 * 816 * For a newly allocated tree block, we use implicit back refs for 817 * pointers in it. This means most tree related operations only involve 818 * implicit back refs. For a tree block created in old transaction, the 819 * only way to drop a reference to it is COW it. So we can detect the 820 * event that tree block loses its owner tree's reference and do the 821 * back refs conversion. 822 * 823 * When a tree block is COW'd through a tree, there are four cases: 824 * 825 * The reference count of the block is one and the tree is the block's 826 * owner tree. Nothing to do in this case. 827 * 828 * The reference count of the block is one and the tree is not the 829 * block's owner tree. In this case, full back refs is used for pointers 830 * in the block. Remove these full back refs, add implicit back refs for 831 * every pointers in the new block. 832 * 833 * The reference count of the block is greater than one and the tree is 834 * the block's owner tree. In this case, implicit back refs is used for 835 * pointers in the block. Add full back refs for every pointers in the 836 * block, increase lower level extents' reference counts. The original 837 * implicit back refs are entailed to the new block. 838 * 839 * The reference count of the block is greater than one and the tree is 840 * not the block's owner tree. Add implicit back refs for every pointer in 841 * the new block, increase lower level extents' reference count. 842 * 843 * Back Reference Key composing: 844 * 845 * The key objectid corresponds to the first byte in the extent, 846 * The key type is used to differentiate between types of back refs. 847 * There are different meanings of the key offset for different types 848 * of back refs. 849 * 850 * File extents can be referenced by: 851 * 852 * - multiple snapshots, subvolumes, or different generations in one subvol 853 * - different files inside a single subvolume 854 * - different offsets inside a file (bookend extents in file.c) 855 * 856 * The extent ref structure for the implicit back refs has fields for: 857 * 858 * - Objectid of the subvolume root 859 * - objectid of the file holding the reference 860 * - original offset in the file 861 * - how many bookend extents 862 * 863 * The key offset for the implicit back refs is hash of the first 864 * three fields. 865 * 866 * The extent ref structure for the full back refs has field for: 867 * 868 * - number of pointers in the tree leaf 869 * 870 * The key offset for the implicit back refs is the first byte of 871 * the tree leaf 872 * 873 * When a file extent is allocated, The implicit back refs is used. 874 * the fields are filled in: 875 * 876 * (root_key.objectid, inode objectid, offset in file, 1) 877 * 878 * When a file extent is removed file truncation, we find the 879 * corresponding implicit back refs and check the following fields: 880 * 881 * (btrfs_header_owner(leaf), inode objectid, offset in file) 882 * 883 * Btree extents can be referenced by: 884 * 885 * - Different subvolumes 886 * 887 * Both the implicit back refs and the full back refs for tree blocks 888 * only consist of key. The key offset for the implicit back refs is 889 * objectid of block's owner tree. The key offset for the full back refs 890 * is the first byte of parent block. 891 * 892 * When implicit back refs is used, information about the lowest key and 893 * level of the tree block are required. These information are stored in 894 * tree block info structure. 895 */ 896 897 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 898 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 899 struct btrfs_root *root, 900 struct btrfs_path *path, 901 u64 owner, u32 extra_size) 902 { 903 struct btrfs_extent_item *item; 904 struct btrfs_extent_item_v0 *ei0; 905 struct btrfs_extent_ref_v0 *ref0; 906 struct btrfs_tree_block_info *bi; 907 struct extent_buffer *leaf; 908 struct btrfs_key key; 909 struct btrfs_key found_key; 910 u32 new_size = sizeof(*item); 911 u64 refs; 912 int ret; 913 914 leaf = path->nodes[0]; 915 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 916 917 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 918 ei0 = btrfs_item_ptr(leaf, path->slots[0], 919 struct btrfs_extent_item_v0); 920 refs = btrfs_extent_refs_v0(leaf, ei0); 921 922 if (owner == (u64)-1) { 923 while (1) { 924 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 925 ret = btrfs_next_leaf(root, path); 926 if (ret < 0) 927 return ret; 928 BUG_ON(ret > 0); 929 leaf = path->nodes[0]; 930 } 931 btrfs_item_key_to_cpu(leaf, &found_key, 932 path->slots[0]); 933 BUG_ON(key.objectid != found_key.objectid); 934 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 935 path->slots[0]++; 936 continue; 937 } 938 ref0 = btrfs_item_ptr(leaf, path->slots[0], 939 struct btrfs_extent_ref_v0); 940 owner = btrfs_ref_objectid_v0(leaf, ref0); 941 break; 942 } 943 } 944 btrfs_release_path(path); 945 946 if (owner < BTRFS_FIRST_FREE_OBJECTID) 947 new_size += sizeof(*bi); 948 949 new_size -= sizeof(*ei0); 950 ret = btrfs_search_slot(trans, root, &key, path, 951 new_size + extra_size, 1); 952 if (ret < 0) 953 return ret; 954 BUG_ON(ret); 955 956 ret = btrfs_extend_item(trans, root, path, new_size); 957 958 leaf = path->nodes[0]; 959 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 960 btrfs_set_extent_refs(leaf, item, refs); 961 /* FIXME: get real generation */ 962 btrfs_set_extent_generation(leaf, item, 0); 963 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 964 btrfs_set_extent_flags(leaf, item, 965 BTRFS_EXTENT_FLAG_TREE_BLOCK | 966 BTRFS_BLOCK_FLAG_FULL_BACKREF); 967 bi = (struct btrfs_tree_block_info *)(item + 1); 968 /* FIXME: get first key of the block */ 969 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); 970 btrfs_set_tree_block_level(leaf, bi, (int)owner); 971 } else { 972 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 973 } 974 btrfs_mark_buffer_dirty(leaf); 975 return 0; 976 } 977 #endif 978 979 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 980 { 981 u32 high_crc = ~(u32)0; 982 u32 low_crc = ~(u32)0; 983 __le64 lenum; 984 985 lenum = cpu_to_le64(root_objectid); 986 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 987 lenum = cpu_to_le64(owner); 988 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 989 lenum = cpu_to_le64(offset); 990 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 991 992 return ((u64)high_crc << 31) ^ (u64)low_crc; 993 } 994 995 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 996 struct btrfs_extent_data_ref *ref) 997 { 998 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 999 btrfs_extent_data_ref_objectid(leaf, ref), 1000 btrfs_extent_data_ref_offset(leaf, ref)); 1001 } 1002 1003 static int match_extent_data_ref(struct extent_buffer *leaf, 1004 struct btrfs_extent_data_ref *ref, 1005 u64 root_objectid, u64 owner, u64 offset) 1006 { 1007 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 1008 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 1009 btrfs_extent_data_ref_offset(leaf, ref) != offset) 1010 return 0; 1011 return 1; 1012 } 1013 1014 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 1015 struct btrfs_root *root, 1016 struct btrfs_path *path, 1017 u64 bytenr, u64 parent, 1018 u64 root_objectid, 1019 u64 owner, u64 offset) 1020 { 1021 struct btrfs_key key; 1022 struct btrfs_extent_data_ref *ref; 1023 struct extent_buffer *leaf; 1024 u32 nritems; 1025 int ret; 1026 int recow; 1027 int err = -ENOENT; 1028 1029 key.objectid = bytenr; 1030 if (parent) { 1031 key.type = BTRFS_SHARED_DATA_REF_KEY; 1032 key.offset = parent; 1033 } else { 1034 key.type = BTRFS_EXTENT_DATA_REF_KEY; 1035 key.offset = hash_extent_data_ref(root_objectid, 1036 owner, offset); 1037 } 1038 again: 1039 recow = 0; 1040 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1041 if (ret < 0) { 1042 err = ret; 1043 goto fail; 1044 } 1045 1046 if (parent) { 1047 if (!ret) 1048 return 0; 1049 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1050 key.type = BTRFS_EXTENT_REF_V0_KEY; 1051 btrfs_release_path(path); 1052 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1053 if (ret < 0) { 1054 err = ret; 1055 goto fail; 1056 } 1057 if (!ret) 1058 return 0; 1059 #endif 1060 goto fail; 1061 } 1062 1063 leaf = path->nodes[0]; 1064 nritems = btrfs_header_nritems(leaf); 1065 while (1) { 1066 if (path->slots[0] >= nritems) { 1067 ret = btrfs_next_leaf(root, path); 1068 if (ret < 0) 1069 err = ret; 1070 if (ret) 1071 goto fail; 1072 1073 leaf = path->nodes[0]; 1074 nritems = btrfs_header_nritems(leaf); 1075 recow = 1; 1076 } 1077 1078 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1079 if (key.objectid != bytenr || 1080 key.type != BTRFS_EXTENT_DATA_REF_KEY) 1081 goto fail; 1082 1083 ref = btrfs_item_ptr(leaf, path->slots[0], 1084 struct btrfs_extent_data_ref); 1085 1086 if (match_extent_data_ref(leaf, ref, root_objectid, 1087 owner, offset)) { 1088 if (recow) { 1089 btrfs_release_path(path); 1090 goto again; 1091 } 1092 err = 0; 1093 break; 1094 } 1095 path->slots[0]++; 1096 } 1097 fail: 1098 return err; 1099 } 1100 1101 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 1102 struct btrfs_root *root, 1103 struct btrfs_path *path, 1104 u64 bytenr, u64 parent, 1105 u64 root_objectid, u64 owner, 1106 u64 offset, int refs_to_add) 1107 { 1108 struct btrfs_key key; 1109 struct extent_buffer *leaf; 1110 u32 size; 1111 u32 num_refs; 1112 int ret; 1113 1114 key.objectid = bytenr; 1115 if (parent) { 1116 key.type = BTRFS_SHARED_DATA_REF_KEY; 1117 key.offset = parent; 1118 size = sizeof(struct btrfs_shared_data_ref); 1119 } else { 1120 key.type = BTRFS_EXTENT_DATA_REF_KEY; 1121 key.offset = hash_extent_data_ref(root_objectid, 1122 owner, offset); 1123 size = sizeof(struct btrfs_extent_data_ref); 1124 } 1125 1126 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 1127 if (ret && ret != -EEXIST) 1128 goto fail; 1129 1130 leaf = path->nodes[0]; 1131 if (parent) { 1132 struct btrfs_shared_data_ref *ref; 1133 ref = btrfs_item_ptr(leaf, path->slots[0], 1134 struct btrfs_shared_data_ref); 1135 if (ret == 0) { 1136 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 1137 } else { 1138 num_refs = btrfs_shared_data_ref_count(leaf, ref); 1139 num_refs += refs_to_add; 1140 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 1141 } 1142 } else { 1143 struct btrfs_extent_data_ref *ref; 1144 while (ret == -EEXIST) { 1145 ref = btrfs_item_ptr(leaf, path->slots[0], 1146 struct btrfs_extent_data_ref); 1147 if (match_extent_data_ref(leaf, ref, root_objectid, 1148 owner, offset)) 1149 break; 1150 btrfs_release_path(path); 1151 key.offset++; 1152 ret = btrfs_insert_empty_item(trans, root, path, &key, 1153 size); 1154 if (ret && ret != -EEXIST) 1155 goto fail; 1156 1157 leaf = path->nodes[0]; 1158 } 1159 ref = btrfs_item_ptr(leaf, path->slots[0], 1160 struct btrfs_extent_data_ref); 1161 if (ret == 0) { 1162 btrfs_set_extent_data_ref_root(leaf, ref, 1163 root_objectid); 1164 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 1165 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 1166 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 1167 } else { 1168 num_refs = btrfs_extent_data_ref_count(leaf, ref); 1169 num_refs += refs_to_add; 1170 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 1171 } 1172 } 1173 btrfs_mark_buffer_dirty(leaf); 1174 ret = 0; 1175 fail: 1176 btrfs_release_path(path); 1177 return ret; 1178 } 1179 1180 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 1181 struct btrfs_root *root, 1182 struct btrfs_path *path, 1183 int refs_to_drop) 1184 { 1185 struct btrfs_key key; 1186 struct btrfs_extent_data_ref *ref1 = NULL; 1187 struct btrfs_shared_data_ref *ref2 = NULL; 1188 struct extent_buffer *leaf; 1189 u32 num_refs = 0; 1190 int ret = 0; 1191 1192 leaf = path->nodes[0]; 1193 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1194 1195 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1196 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1197 struct btrfs_extent_data_ref); 1198 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1199 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1200 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1201 struct btrfs_shared_data_ref); 1202 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1203 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1204 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1205 struct btrfs_extent_ref_v0 *ref0; 1206 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1207 struct btrfs_extent_ref_v0); 1208 num_refs = btrfs_ref_count_v0(leaf, ref0); 1209 #endif 1210 } else { 1211 BUG(); 1212 } 1213 1214 BUG_ON(num_refs < refs_to_drop); 1215 num_refs -= refs_to_drop; 1216 1217 if (num_refs == 0) { 1218 ret = btrfs_del_item(trans, root, path); 1219 } else { 1220 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1221 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 1222 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 1223 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 1224 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1225 else { 1226 struct btrfs_extent_ref_v0 *ref0; 1227 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1228 struct btrfs_extent_ref_v0); 1229 btrfs_set_ref_count_v0(leaf, ref0, num_refs); 1230 } 1231 #endif 1232 btrfs_mark_buffer_dirty(leaf); 1233 } 1234 return ret; 1235 } 1236 1237 static noinline u32 extent_data_ref_count(struct btrfs_root *root, 1238 struct btrfs_path *path, 1239 struct btrfs_extent_inline_ref *iref) 1240 { 1241 struct btrfs_key key; 1242 struct extent_buffer *leaf; 1243 struct btrfs_extent_data_ref *ref1; 1244 struct btrfs_shared_data_ref *ref2; 1245 u32 num_refs = 0; 1246 1247 leaf = path->nodes[0]; 1248 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1249 if (iref) { 1250 if (btrfs_extent_inline_ref_type(leaf, iref) == 1251 BTRFS_EXTENT_DATA_REF_KEY) { 1252 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 1253 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1254 } else { 1255 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 1256 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1257 } 1258 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1259 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1260 struct btrfs_extent_data_ref); 1261 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1262 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1263 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1264 struct btrfs_shared_data_ref); 1265 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1266 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1267 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1268 struct btrfs_extent_ref_v0 *ref0; 1269 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1270 struct btrfs_extent_ref_v0); 1271 num_refs = btrfs_ref_count_v0(leaf, ref0); 1272 #endif 1273 } else { 1274 WARN_ON(1); 1275 } 1276 return num_refs; 1277 } 1278 1279 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 1280 struct btrfs_root *root, 1281 struct btrfs_path *path, 1282 u64 bytenr, u64 parent, 1283 u64 root_objectid) 1284 { 1285 struct btrfs_key key; 1286 int ret; 1287 1288 key.objectid = bytenr; 1289 if (parent) { 1290 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1291 key.offset = parent; 1292 } else { 1293 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1294 key.offset = root_objectid; 1295 } 1296 1297 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1298 if (ret > 0) 1299 ret = -ENOENT; 1300 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1301 if (ret == -ENOENT && parent) { 1302 btrfs_release_path(path); 1303 key.type = BTRFS_EXTENT_REF_V0_KEY; 1304 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1305 if (ret > 0) 1306 ret = -ENOENT; 1307 } 1308 #endif 1309 return ret; 1310 } 1311 1312 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 1313 struct btrfs_root *root, 1314 struct btrfs_path *path, 1315 u64 bytenr, u64 parent, 1316 u64 root_objectid) 1317 { 1318 struct btrfs_key key; 1319 int ret; 1320 1321 key.objectid = bytenr; 1322 if (parent) { 1323 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1324 key.offset = parent; 1325 } else { 1326 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1327 key.offset = root_objectid; 1328 } 1329 1330 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1331 btrfs_release_path(path); 1332 return ret; 1333 } 1334 1335 static inline int extent_ref_type(u64 parent, u64 owner) 1336 { 1337 int type; 1338 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1339 if (parent > 0) 1340 type = BTRFS_SHARED_BLOCK_REF_KEY; 1341 else 1342 type = BTRFS_TREE_BLOCK_REF_KEY; 1343 } else { 1344 if (parent > 0) 1345 type = BTRFS_SHARED_DATA_REF_KEY; 1346 else 1347 type = BTRFS_EXTENT_DATA_REF_KEY; 1348 } 1349 return type; 1350 } 1351 1352 static int find_next_key(struct btrfs_path *path, int level, 1353 struct btrfs_key *key) 1354 1355 { 1356 for (; level < BTRFS_MAX_LEVEL; level++) { 1357 if (!path->nodes[level]) 1358 break; 1359 if (path->slots[level] + 1 >= 1360 btrfs_header_nritems(path->nodes[level])) 1361 continue; 1362 if (level == 0) 1363 btrfs_item_key_to_cpu(path->nodes[level], key, 1364 path->slots[level] + 1); 1365 else 1366 btrfs_node_key_to_cpu(path->nodes[level], key, 1367 path->slots[level] + 1); 1368 return 0; 1369 } 1370 return 1; 1371 } 1372 1373 /* 1374 * look for inline back ref. if back ref is found, *ref_ret is set 1375 * to the address of inline back ref, and 0 is returned. 1376 * 1377 * if back ref isn't found, *ref_ret is set to the address where it 1378 * should be inserted, and -ENOENT is returned. 1379 * 1380 * if insert is true and there are too many inline back refs, the path 1381 * points to the extent item, and -EAGAIN is returned. 1382 * 1383 * NOTE: inline back refs are ordered in the same way that back ref 1384 * items in the tree are ordered. 1385 */ 1386 static noinline_for_stack 1387 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1388 struct btrfs_root *root, 1389 struct btrfs_path *path, 1390 struct btrfs_extent_inline_ref **ref_ret, 1391 u64 bytenr, u64 num_bytes, 1392 u64 parent, u64 root_objectid, 1393 u64 owner, u64 offset, int insert) 1394 { 1395 struct btrfs_key key; 1396 struct extent_buffer *leaf; 1397 struct btrfs_extent_item *ei; 1398 struct btrfs_extent_inline_ref *iref; 1399 u64 flags; 1400 u64 item_size; 1401 unsigned long ptr; 1402 unsigned long end; 1403 int extra_size; 1404 int type; 1405 int want; 1406 int ret; 1407 int err = 0; 1408 1409 key.objectid = bytenr; 1410 key.type = BTRFS_EXTENT_ITEM_KEY; 1411 key.offset = num_bytes; 1412 1413 want = extent_ref_type(parent, owner); 1414 if (insert) { 1415 extra_size = btrfs_extent_inline_ref_size(want); 1416 path->keep_locks = 1; 1417 } else 1418 extra_size = -1; 1419 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1420 if (ret < 0) { 1421 err = ret; 1422 goto out; 1423 } 1424 BUG_ON(ret); 1425 1426 leaf = path->nodes[0]; 1427 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1428 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1429 if (item_size < sizeof(*ei)) { 1430 if (!insert) { 1431 err = -ENOENT; 1432 goto out; 1433 } 1434 ret = convert_extent_item_v0(trans, root, path, owner, 1435 extra_size); 1436 if (ret < 0) { 1437 err = ret; 1438 goto out; 1439 } 1440 leaf = path->nodes[0]; 1441 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1442 } 1443 #endif 1444 BUG_ON(item_size < sizeof(*ei)); 1445 1446 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1447 flags = btrfs_extent_flags(leaf, ei); 1448 1449 ptr = (unsigned long)(ei + 1); 1450 end = (unsigned long)ei + item_size; 1451 1452 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1453 ptr += sizeof(struct btrfs_tree_block_info); 1454 BUG_ON(ptr > end); 1455 } else { 1456 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1457 } 1458 1459 err = -ENOENT; 1460 while (1) { 1461 if (ptr >= end) { 1462 WARN_ON(ptr > end); 1463 break; 1464 } 1465 iref = (struct btrfs_extent_inline_ref *)ptr; 1466 type = btrfs_extent_inline_ref_type(leaf, iref); 1467 if (want < type) 1468 break; 1469 if (want > type) { 1470 ptr += btrfs_extent_inline_ref_size(type); 1471 continue; 1472 } 1473 1474 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1475 struct btrfs_extent_data_ref *dref; 1476 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1477 if (match_extent_data_ref(leaf, dref, root_objectid, 1478 owner, offset)) { 1479 err = 0; 1480 break; 1481 } 1482 if (hash_extent_data_ref_item(leaf, dref) < 1483 hash_extent_data_ref(root_objectid, owner, offset)) 1484 break; 1485 } else { 1486 u64 ref_offset; 1487 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1488 if (parent > 0) { 1489 if (parent == ref_offset) { 1490 err = 0; 1491 break; 1492 } 1493 if (ref_offset < parent) 1494 break; 1495 } else { 1496 if (root_objectid == ref_offset) { 1497 err = 0; 1498 break; 1499 } 1500 if (ref_offset < root_objectid) 1501 break; 1502 } 1503 } 1504 ptr += btrfs_extent_inline_ref_size(type); 1505 } 1506 if (err == -ENOENT && insert) { 1507 if (item_size + extra_size >= 1508 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 1509 err = -EAGAIN; 1510 goto out; 1511 } 1512 /* 1513 * To add new inline back ref, we have to make sure 1514 * there is no corresponding back ref item. 1515 * For simplicity, we just do not add new inline back 1516 * ref if there is any kind of item for this block 1517 */ 1518 if (find_next_key(path, 0, &key) == 0 && 1519 key.objectid == bytenr && 1520 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1521 err = -EAGAIN; 1522 goto out; 1523 } 1524 } 1525 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 1526 out: 1527 if (insert) { 1528 path->keep_locks = 0; 1529 btrfs_unlock_up_safe(path, 1); 1530 } 1531 return err; 1532 } 1533 1534 /* 1535 * helper to add new inline back ref 1536 */ 1537 static noinline_for_stack 1538 int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1539 struct btrfs_root *root, 1540 struct btrfs_path *path, 1541 struct btrfs_extent_inline_ref *iref, 1542 u64 parent, u64 root_objectid, 1543 u64 owner, u64 offset, int refs_to_add, 1544 struct btrfs_delayed_extent_op *extent_op) 1545 { 1546 struct extent_buffer *leaf; 1547 struct btrfs_extent_item *ei; 1548 unsigned long ptr; 1549 unsigned long end; 1550 unsigned long item_offset; 1551 u64 refs; 1552 int size; 1553 int type; 1554 int ret; 1555 1556 leaf = path->nodes[0]; 1557 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1558 item_offset = (unsigned long)iref - (unsigned long)ei; 1559 1560 type = extent_ref_type(parent, owner); 1561 size = btrfs_extent_inline_ref_size(type); 1562 1563 ret = btrfs_extend_item(trans, root, path, size); 1564 1565 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1566 refs = btrfs_extent_refs(leaf, ei); 1567 refs += refs_to_add; 1568 btrfs_set_extent_refs(leaf, ei, refs); 1569 if (extent_op) 1570 __run_delayed_extent_op(extent_op, leaf, ei); 1571 1572 ptr = (unsigned long)ei + item_offset; 1573 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1574 if (ptr < end - size) 1575 memmove_extent_buffer(leaf, ptr + size, ptr, 1576 end - size - ptr); 1577 1578 iref = (struct btrfs_extent_inline_ref *)ptr; 1579 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1580 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1581 struct btrfs_extent_data_ref *dref; 1582 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1583 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1584 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1585 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1586 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1587 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1588 struct btrfs_shared_data_ref *sref; 1589 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1590 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1591 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1592 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1593 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1594 } else { 1595 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1596 } 1597 btrfs_mark_buffer_dirty(leaf); 1598 return 0; 1599 } 1600 1601 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1602 struct btrfs_root *root, 1603 struct btrfs_path *path, 1604 struct btrfs_extent_inline_ref **ref_ret, 1605 u64 bytenr, u64 num_bytes, u64 parent, 1606 u64 root_objectid, u64 owner, u64 offset) 1607 { 1608 int ret; 1609 1610 ret = lookup_inline_extent_backref(trans, root, path, ref_ret, 1611 bytenr, num_bytes, parent, 1612 root_objectid, owner, offset, 0); 1613 if (ret != -ENOENT) 1614 return ret; 1615 1616 btrfs_release_path(path); 1617 *ref_ret = NULL; 1618 1619 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1620 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, 1621 root_objectid); 1622 } else { 1623 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, 1624 root_objectid, owner, offset); 1625 } 1626 return ret; 1627 } 1628 1629 /* 1630 * helper to update/remove inline back ref 1631 */ 1632 static noinline_for_stack 1633 int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1634 struct btrfs_root *root, 1635 struct btrfs_path *path, 1636 struct btrfs_extent_inline_ref *iref, 1637 int refs_to_mod, 1638 struct btrfs_delayed_extent_op *extent_op) 1639 { 1640 struct extent_buffer *leaf; 1641 struct btrfs_extent_item *ei; 1642 struct btrfs_extent_data_ref *dref = NULL; 1643 struct btrfs_shared_data_ref *sref = NULL; 1644 unsigned long ptr; 1645 unsigned long end; 1646 u32 item_size; 1647 int size; 1648 int type; 1649 int ret; 1650 u64 refs; 1651 1652 leaf = path->nodes[0]; 1653 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1654 refs = btrfs_extent_refs(leaf, ei); 1655 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1656 refs += refs_to_mod; 1657 btrfs_set_extent_refs(leaf, ei, refs); 1658 if (extent_op) 1659 __run_delayed_extent_op(extent_op, leaf, ei); 1660 1661 type = btrfs_extent_inline_ref_type(leaf, iref); 1662 1663 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1664 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1665 refs = btrfs_extent_data_ref_count(leaf, dref); 1666 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1667 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1668 refs = btrfs_shared_data_ref_count(leaf, sref); 1669 } else { 1670 refs = 1; 1671 BUG_ON(refs_to_mod != -1); 1672 } 1673 1674 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1675 refs += refs_to_mod; 1676 1677 if (refs > 0) { 1678 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1679 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1680 else 1681 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1682 } else { 1683 size = btrfs_extent_inline_ref_size(type); 1684 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1685 ptr = (unsigned long)iref; 1686 end = (unsigned long)ei + item_size; 1687 if (ptr + size < end) 1688 memmove_extent_buffer(leaf, ptr, ptr + size, 1689 end - ptr - size); 1690 item_size -= size; 1691 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1692 } 1693 btrfs_mark_buffer_dirty(leaf); 1694 return 0; 1695 } 1696 1697 static noinline_for_stack 1698 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1699 struct btrfs_root *root, 1700 struct btrfs_path *path, 1701 u64 bytenr, u64 num_bytes, u64 parent, 1702 u64 root_objectid, u64 owner, 1703 u64 offset, int refs_to_add, 1704 struct btrfs_delayed_extent_op *extent_op) 1705 { 1706 struct btrfs_extent_inline_ref *iref; 1707 int ret; 1708 1709 ret = lookup_inline_extent_backref(trans, root, path, &iref, 1710 bytenr, num_bytes, parent, 1711 root_objectid, owner, offset, 1); 1712 if (ret == 0) { 1713 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1714 ret = update_inline_extent_backref(trans, root, path, iref, 1715 refs_to_add, extent_op); 1716 } else if (ret == -ENOENT) { 1717 ret = setup_inline_extent_backref(trans, root, path, iref, 1718 parent, root_objectid, 1719 owner, offset, refs_to_add, 1720 extent_op); 1721 } 1722 return ret; 1723 } 1724 1725 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1726 struct btrfs_root *root, 1727 struct btrfs_path *path, 1728 u64 bytenr, u64 parent, u64 root_objectid, 1729 u64 owner, u64 offset, int refs_to_add) 1730 { 1731 int ret; 1732 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1733 BUG_ON(refs_to_add != 1); 1734 ret = insert_tree_block_ref(trans, root, path, bytenr, 1735 parent, root_objectid); 1736 } else { 1737 ret = insert_extent_data_ref(trans, root, path, bytenr, 1738 parent, root_objectid, 1739 owner, offset, refs_to_add); 1740 } 1741 return ret; 1742 } 1743 1744 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1745 struct btrfs_root *root, 1746 struct btrfs_path *path, 1747 struct btrfs_extent_inline_ref *iref, 1748 int refs_to_drop, int is_data) 1749 { 1750 int ret; 1751 1752 BUG_ON(!is_data && refs_to_drop != 1); 1753 if (iref) { 1754 ret = update_inline_extent_backref(trans, root, path, iref, 1755 -refs_to_drop, NULL); 1756 } else if (is_data) { 1757 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1758 } else { 1759 ret = btrfs_del_item(trans, root, path); 1760 } 1761 return ret; 1762 } 1763 1764 static int btrfs_issue_discard(struct block_device *bdev, 1765 u64 start, u64 len) 1766 { 1767 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); 1768 } 1769 1770 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1771 u64 num_bytes, u64 *actual_bytes) 1772 { 1773 int ret; 1774 u64 discarded_bytes = 0; 1775 struct btrfs_multi_bio *multi = NULL; 1776 1777 1778 /* Tell the block device(s) that the sectors can be discarded */ 1779 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, 1780 bytenr, &num_bytes, &multi, 0); 1781 if (!ret) { 1782 struct btrfs_bio_stripe *stripe = multi->stripes; 1783 int i; 1784 1785 1786 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1787 ret = btrfs_issue_discard(stripe->dev->bdev, 1788 stripe->physical, 1789 stripe->length); 1790 if (!ret) 1791 discarded_bytes += stripe->length; 1792 else if (ret != -EOPNOTSUPP) 1793 break; 1794 } 1795 kfree(multi); 1796 } 1797 if (discarded_bytes && ret == -EOPNOTSUPP) 1798 ret = 0; 1799 1800 if (actual_bytes) 1801 *actual_bytes = discarded_bytes; 1802 1803 1804 return ret; 1805 } 1806 1807 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1808 struct btrfs_root *root, 1809 u64 bytenr, u64 num_bytes, u64 parent, 1810 u64 root_objectid, u64 owner, u64 offset) 1811 { 1812 int ret; 1813 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 1814 root_objectid == BTRFS_TREE_LOG_OBJECTID); 1815 1816 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1817 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 1818 parent, root_objectid, (int)owner, 1819 BTRFS_ADD_DELAYED_REF, NULL); 1820 } else { 1821 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 1822 parent, root_objectid, owner, offset, 1823 BTRFS_ADD_DELAYED_REF, NULL); 1824 } 1825 return ret; 1826 } 1827 1828 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1829 struct btrfs_root *root, 1830 u64 bytenr, u64 num_bytes, 1831 u64 parent, u64 root_objectid, 1832 u64 owner, u64 offset, int refs_to_add, 1833 struct btrfs_delayed_extent_op *extent_op) 1834 { 1835 struct btrfs_path *path; 1836 struct extent_buffer *leaf; 1837 struct btrfs_extent_item *item; 1838 u64 refs; 1839 int ret; 1840 int err = 0; 1841 1842 path = btrfs_alloc_path(); 1843 if (!path) 1844 return -ENOMEM; 1845 1846 path->reada = 1; 1847 path->leave_spinning = 1; 1848 /* this will setup the path even if it fails to insert the back ref */ 1849 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 1850 path, bytenr, num_bytes, parent, 1851 root_objectid, owner, offset, 1852 refs_to_add, extent_op); 1853 if (ret == 0) 1854 goto out; 1855 1856 if (ret != -EAGAIN) { 1857 err = ret; 1858 goto out; 1859 } 1860 1861 leaf = path->nodes[0]; 1862 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1863 refs = btrfs_extent_refs(leaf, item); 1864 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1865 if (extent_op) 1866 __run_delayed_extent_op(extent_op, leaf, item); 1867 1868 btrfs_mark_buffer_dirty(leaf); 1869 btrfs_release_path(path); 1870 1871 path->reada = 1; 1872 path->leave_spinning = 1; 1873 1874 /* now insert the actual backref */ 1875 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1876 path, bytenr, parent, root_objectid, 1877 owner, offset, refs_to_add); 1878 BUG_ON(ret); 1879 out: 1880 btrfs_free_path(path); 1881 return err; 1882 } 1883 1884 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1885 struct btrfs_root *root, 1886 struct btrfs_delayed_ref_node *node, 1887 struct btrfs_delayed_extent_op *extent_op, 1888 int insert_reserved) 1889 { 1890 int ret = 0; 1891 struct btrfs_delayed_data_ref *ref; 1892 struct btrfs_key ins; 1893 u64 parent = 0; 1894 u64 ref_root = 0; 1895 u64 flags = 0; 1896 1897 ins.objectid = node->bytenr; 1898 ins.offset = node->num_bytes; 1899 ins.type = BTRFS_EXTENT_ITEM_KEY; 1900 1901 ref = btrfs_delayed_node_to_data_ref(node); 1902 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1903 parent = ref->parent; 1904 else 1905 ref_root = ref->root; 1906 1907 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1908 if (extent_op) { 1909 BUG_ON(extent_op->update_key); 1910 flags |= extent_op->flags_to_set; 1911 } 1912 ret = alloc_reserved_file_extent(trans, root, 1913 parent, ref_root, flags, 1914 ref->objectid, ref->offset, 1915 &ins, node->ref_mod); 1916 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1917 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1918 node->num_bytes, parent, 1919 ref_root, ref->objectid, 1920 ref->offset, node->ref_mod, 1921 extent_op); 1922 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1923 ret = __btrfs_free_extent(trans, root, node->bytenr, 1924 node->num_bytes, parent, 1925 ref_root, ref->objectid, 1926 ref->offset, node->ref_mod, 1927 extent_op); 1928 } else { 1929 BUG(); 1930 } 1931 return ret; 1932 } 1933 1934 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1935 struct extent_buffer *leaf, 1936 struct btrfs_extent_item *ei) 1937 { 1938 u64 flags = btrfs_extent_flags(leaf, ei); 1939 if (extent_op->update_flags) { 1940 flags |= extent_op->flags_to_set; 1941 btrfs_set_extent_flags(leaf, ei, flags); 1942 } 1943 1944 if (extent_op->update_key) { 1945 struct btrfs_tree_block_info *bi; 1946 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1947 bi = (struct btrfs_tree_block_info *)(ei + 1); 1948 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1949 } 1950 } 1951 1952 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1953 struct btrfs_root *root, 1954 struct btrfs_delayed_ref_node *node, 1955 struct btrfs_delayed_extent_op *extent_op) 1956 { 1957 struct btrfs_key key; 1958 struct btrfs_path *path; 1959 struct btrfs_extent_item *ei; 1960 struct extent_buffer *leaf; 1961 u32 item_size; 1962 int ret; 1963 int err = 0; 1964 1965 path = btrfs_alloc_path(); 1966 if (!path) 1967 return -ENOMEM; 1968 1969 key.objectid = node->bytenr; 1970 key.type = BTRFS_EXTENT_ITEM_KEY; 1971 key.offset = node->num_bytes; 1972 1973 path->reada = 1; 1974 path->leave_spinning = 1; 1975 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1976 path, 0, 1); 1977 if (ret < 0) { 1978 err = ret; 1979 goto out; 1980 } 1981 if (ret > 0) { 1982 err = -EIO; 1983 goto out; 1984 } 1985 1986 leaf = path->nodes[0]; 1987 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1988 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1989 if (item_size < sizeof(*ei)) { 1990 ret = convert_extent_item_v0(trans, root->fs_info->extent_root, 1991 path, (u64)-1, 0); 1992 if (ret < 0) { 1993 err = ret; 1994 goto out; 1995 } 1996 leaf = path->nodes[0]; 1997 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1998 } 1999 #endif 2000 BUG_ON(item_size < sizeof(*ei)); 2001 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2002 __run_delayed_extent_op(extent_op, leaf, ei); 2003 2004 btrfs_mark_buffer_dirty(leaf); 2005 out: 2006 btrfs_free_path(path); 2007 return err; 2008 } 2009 2010 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 2011 struct btrfs_root *root, 2012 struct btrfs_delayed_ref_node *node, 2013 struct btrfs_delayed_extent_op *extent_op, 2014 int insert_reserved) 2015 { 2016 int ret = 0; 2017 struct btrfs_delayed_tree_ref *ref; 2018 struct btrfs_key ins; 2019 u64 parent = 0; 2020 u64 ref_root = 0; 2021 2022 ins.objectid = node->bytenr; 2023 ins.offset = node->num_bytes; 2024 ins.type = BTRFS_EXTENT_ITEM_KEY; 2025 2026 ref = btrfs_delayed_node_to_tree_ref(node); 2027 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2028 parent = ref->parent; 2029 else 2030 ref_root = ref->root; 2031 2032 BUG_ON(node->ref_mod != 1); 2033 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2034 BUG_ON(!extent_op || !extent_op->update_flags || 2035 !extent_op->update_key); 2036 ret = alloc_reserved_tree_block(trans, root, 2037 parent, ref_root, 2038 extent_op->flags_to_set, 2039 &extent_op->key, 2040 ref->level, &ins); 2041 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 2042 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 2043 node->num_bytes, parent, ref_root, 2044 ref->level, 0, 1, extent_op); 2045 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2046 ret = __btrfs_free_extent(trans, root, node->bytenr, 2047 node->num_bytes, parent, ref_root, 2048 ref->level, 0, 1, extent_op); 2049 } else { 2050 BUG(); 2051 } 2052 return ret; 2053 } 2054 2055 /* helper function to actually process a single delayed ref entry */ 2056 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 2057 struct btrfs_root *root, 2058 struct btrfs_delayed_ref_node *node, 2059 struct btrfs_delayed_extent_op *extent_op, 2060 int insert_reserved) 2061 { 2062 int ret; 2063 if (btrfs_delayed_ref_is_head(node)) { 2064 struct btrfs_delayed_ref_head *head; 2065 /* 2066 * we've hit the end of the chain and we were supposed 2067 * to insert this extent into the tree. But, it got 2068 * deleted before we ever needed to insert it, so all 2069 * we have to do is clean up the accounting 2070 */ 2071 BUG_ON(extent_op); 2072 head = btrfs_delayed_node_to_head(node); 2073 if (insert_reserved) { 2074 btrfs_pin_extent(root, node->bytenr, 2075 node->num_bytes, 1); 2076 if (head->is_data) { 2077 ret = btrfs_del_csums(trans, root, 2078 node->bytenr, 2079 node->num_bytes); 2080 BUG_ON(ret); 2081 } 2082 } 2083 mutex_unlock(&head->mutex); 2084 return 0; 2085 } 2086 2087 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 2088 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2089 ret = run_delayed_tree_ref(trans, root, node, extent_op, 2090 insert_reserved); 2091 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 2092 node->type == BTRFS_SHARED_DATA_REF_KEY) 2093 ret = run_delayed_data_ref(trans, root, node, extent_op, 2094 insert_reserved); 2095 else 2096 BUG(); 2097 return ret; 2098 } 2099 2100 static noinline struct btrfs_delayed_ref_node * 2101 select_delayed_ref(struct btrfs_delayed_ref_head *head) 2102 { 2103 struct rb_node *node; 2104 struct btrfs_delayed_ref_node *ref; 2105 int action = BTRFS_ADD_DELAYED_REF; 2106 again: 2107 /* 2108 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 2109 * this prevents ref count from going down to zero when 2110 * there still are pending delayed ref. 2111 */ 2112 node = rb_prev(&head->node.rb_node); 2113 while (1) { 2114 if (!node) 2115 break; 2116 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2117 rb_node); 2118 if (ref->bytenr != head->node.bytenr) 2119 break; 2120 if (ref->action == action) 2121 return ref; 2122 node = rb_prev(node); 2123 } 2124 if (action == BTRFS_ADD_DELAYED_REF) { 2125 action = BTRFS_DROP_DELAYED_REF; 2126 goto again; 2127 } 2128 return NULL; 2129 } 2130 2131 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 2132 struct btrfs_root *root, 2133 struct list_head *cluster) 2134 { 2135 struct btrfs_delayed_ref_root *delayed_refs; 2136 struct btrfs_delayed_ref_node *ref; 2137 struct btrfs_delayed_ref_head *locked_ref = NULL; 2138 struct btrfs_delayed_extent_op *extent_op; 2139 int ret; 2140 int count = 0; 2141 int must_insert_reserved = 0; 2142 2143 delayed_refs = &trans->transaction->delayed_refs; 2144 while (1) { 2145 if (!locked_ref) { 2146 /* pick a new head ref from the cluster list */ 2147 if (list_empty(cluster)) 2148 break; 2149 2150 locked_ref = list_entry(cluster->next, 2151 struct btrfs_delayed_ref_head, cluster); 2152 2153 /* grab the lock that says we are going to process 2154 * all the refs for this head */ 2155 ret = btrfs_delayed_ref_lock(trans, locked_ref); 2156 2157 /* 2158 * we may have dropped the spin lock to get the head 2159 * mutex lock, and that might have given someone else 2160 * time to free the head. If that's true, it has been 2161 * removed from our list and we can move on. 2162 */ 2163 if (ret == -EAGAIN) { 2164 locked_ref = NULL; 2165 count++; 2166 continue; 2167 } 2168 } 2169 2170 /* 2171 * record the must insert reserved flag before we 2172 * drop the spin lock. 2173 */ 2174 must_insert_reserved = locked_ref->must_insert_reserved; 2175 locked_ref->must_insert_reserved = 0; 2176 2177 extent_op = locked_ref->extent_op; 2178 locked_ref->extent_op = NULL; 2179 2180 /* 2181 * locked_ref is the head node, so we have to go one 2182 * node back for any delayed ref updates 2183 */ 2184 ref = select_delayed_ref(locked_ref); 2185 if (!ref) { 2186 /* All delayed refs have been processed, Go ahead 2187 * and send the head node to run_one_delayed_ref, 2188 * so that any accounting fixes can happen 2189 */ 2190 ref = &locked_ref->node; 2191 2192 if (extent_op && must_insert_reserved) { 2193 kfree(extent_op); 2194 extent_op = NULL; 2195 } 2196 2197 if (extent_op) { 2198 spin_unlock(&delayed_refs->lock); 2199 2200 ret = run_delayed_extent_op(trans, root, 2201 ref, extent_op); 2202 BUG_ON(ret); 2203 kfree(extent_op); 2204 2205 cond_resched(); 2206 spin_lock(&delayed_refs->lock); 2207 continue; 2208 } 2209 2210 list_del_init(&locked_ref->cluster); 2211 locked_ref = NULL; 2212 } 2213 2214 ref->in_tree = 0; 2215 rb_erase(&ref->rb_node, &delayed_refs->root); 2216 delayed_refs->num_entries--; 2217 2218 spin_unlock(&delayed_refs->lock); 2219 2220 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2221 must_insert_reserved); 2222 BUG_ON(ret); 2223 2224 btrfs_put_delayed_ref(ref); 2225 kfree(extent_op); 2226 count++; 2227 2228 cond_resched(); 2229 spin_lock(&delayed_refs->lock); 2230 } 2231 return count; 2232 } 2233 2234 /* 2235 * this starts processing the delayed reference count updates and 2236 * extent insertions we have queued up so far. count can be 2237 * 0, which means to process everything in the tree at the start 2238 * of the run (but not newly added entries), or it can be some target 2239 * number you'd like to process. 2240 */ 2241 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2242 struct btrfs_root *root, unsigned long count) 2243 { 2244 struct rb_node *node; 2245 struct btrfs_delayed_ref_root *delayed_refs; 2246 struct btrfs_delayed_ref_node *ref; 2247 struct list_head cluster; 2248 int ret; 2249 int run_all = count == (unsigned long)-1; 2250 int run_most = 0; 2251 2252 if (root == root->fs_info->extent_root) 2253 root = root->fs_info->tree_root; 2254 2255 delayed_refs = &trans->transaction->delayed_refs; 2256 INIT_LIST_HEAD(&cluster); 2257 again: 2258 spin_lock(&delayed_refs->lock); 2259 if (count == 0) { 2260 count = delayed_refs->num_entries * 2; 2261 run_most = 1; 2262 } 2263 while (1) { 2264 if (!(run_all || run_most) && 2265 delayed_refs->num_heads_ready < 64) 2266 break; 2267 2268 /* 2269 * go find something we can process in the rbtree. We start at 2270 * the beginning of the tree, and then build a cluster 2271 * of refs to process starting at the first one we are able to 2272 * lock 2273 */ 2274 ret = btrfs_find_ref_cluster(trans, &cluster, 2275 delayed_refs->run_delayed_start); 2276 if (ret) 2277 break; 2278 2279 ret = run_clustered_refs(trans, root, &cluster); 2280 BUG_ON(ret < 0); 2281 2282 count -= min_t(unsigned long, ret, count); 2283 2284 if (count == 0) 2285 break; 2286 } 2287 2288 if (run_all) { 2289 node = rb_first(&delayed_refs->root); 2290 if (!node) 2291 goto out; 2292 count = (unsigned long)-1; 2293 2294 while (node) { 2295 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2296 rb_node); 2297 if (btrfs_delayed_ref_is_head(ref)) { 2298 struct btrfs_delayed_ref_head *head; 2299 2300 head = btrfs_delayed_node_to_head(ref); 2301 atomic_inc(&ref->refs); 2302 2303 spin_unlock(&delayed_refs->lock); 2304 /* 2305 * Mutex was contended, block until it's 2306 * released and try again 2307 */ 2308 mutex_lock(&head->mutex); 2309 mutex_unlock(&head->mutex); 2310 2311 btrfs_put_delayed_ref(ref); 2312 cond_resched(); 2313 goto again; 2314 } 2315 node = rb_next(node); 2316 } 2317 spin_unlock(&delayed_refs->lock); 2318 schedule_timeout(1); 2319 goto again; 2320 } 2321 out: 2322 spin_unlock(&delayed_refs->lock); 2323 return 0; 2324 } 2325 2326 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2327 struct btrfs_root *root, 2328 u64 bytenr, u64 num_bytes, u64 flags, 2329 int is_data) 2330 { 2331 struct btrfs_delayed_extent_op *extent_op; 2332 int ret; 2333 2334 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2335 if (!extent_op) 2336 return -ENOMEM; 2337 2338 extent_op->flags_to_set = flags; 2339 extent_op->update_flags = 1; 2340 extent_op->update_key = 0; 2341 extent_op->is_data = is_data ? 1 : 0; 2342 2343 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); 2344 if (ret) 2345 kfree(extent_op); 2346 return ret; 2347 } 2348 2349 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, 2350 struct btrfs_root *root, 2351 struct btrfs_path *path, 2352 u64 objectid, u64 offset, u64 bytenr) 2353 { 2354 struct btrfs_delayed_ref_head *head; 2355 struct btrfs_delayed_ref_node *ref; 2356 struct btrfs_delayed_data_ref *data_ref; 2357 struct btrfs_delayed_ref_root *delayed_refs; 2358 struct rb_node *node; 2359 int ret = 0; 2360 2361 ret = -ENOENT; 2362 delayed_refs = &trans->transaction->delayed_refs; 2363 spin_lock(&delayed_refs->lock); 2364 head = btrfs_find_delayed_ref_head(trans, bytenr); 2365 if (!head) 2366 goto out; 2367 2368 if (!mutex_trylock(&head->mutex)) { 2369 atomic_inc(&head->node.refs); 2370 spin_unlock(&delayed_refs->lock); 2371 2372 btrfs_release_path(path); 2373 2374 /* 2375 * Mutex was contended, block until it's released and let 2376 * caller try again 2377 */ 2378 mutex_lock(&head->mutex); 2379 mutex_unlock(&head->mutex); 2380 btrfs_put_delayed_ref(&head->node); 2381 return -EAGAIN; 2382 } 2383 2384 node = rb_prev(&head->node.rb_node); 2385 if (!node) 2386 goto out_unlock; 2387 2388 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2389 2390 if (ref->bytenr != bytenr) 2391 goto out_unlock; 2392 2393 ret = 1; 2394 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2395 goto out_unlock; 2396 2397 data_ref = btrfs_delayed_node_to_data_ref(ref); 2398 2399 node = rb_prev(node); 2400 if (node) { 2401 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2402 if (ref->bytenr == bytenr) 2403 goto out_unlock; 2404 } 2405 2406 if (data_ref->root != root->root_key.objectid || 2407 data_ref->objectid != objectid || data_ref->offset != offset) 2408 goto out_unlock; 2409 2410 ret = 0; 2411 out_unlock: 2412 mutex_unlock(&head->mutex); 2413 out: 2414 spin_unlock(&delayed_refs->lock); 2415 return ret; 2416 } 2417 2418 static noinline int check_committed_ref(struct btrfs_trans_handle *trans, 2419 struct btrfs_root *root, 2420 struct btrfs_path *path, 2421 u64 objectid, u64 offset, u64 bytenr) 2422 { 2423 struct btrfs_root *extent_root = root->fs_info->extent_root; 2424 struct extent_buffer *leaf; 2425 struct btrfs_extent_data_ref *ref; 2426 struct btrfs_extent_inline_ref *iref; 2427 struct btrfs_extent_item *ei; 2428 struct btrfs_key key; 2429 u32 item_size; 2430 int ret; 2431 2432 key.objectid = bytenr; 2433 key.offset = (u64)-1; 2434 key.type = BTRFS_EXTENT_ITEM_KEY; 2435 2436 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2437 if (ret < 0) 2438 goto out; 2439 BUG_ON(ret == 0); 2440 2441 ret = -ENOENT; 2442 if (path->slots[0] == 0) 2443 goto out; 2444 2445 path->slots[0]--; 2446 leaf = path->nodes[0]; 2447 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2448 2449 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2450 goto out; 2451 2452 ret = 1; 2453 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2454 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2455 if (item_size < sizeof(*ei)) { 2456 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2457 goto out; 2458 } 2459 #endif 2460 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2461 2462 if (item_size != sizeof(*ei) + 2463 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2464 goto out; 2465 2466 if (btrfs_extent_generation(leaf, ei) <= 2467 btrfs_root_last_snapshot(&root->root_item)) 2468 goto out; 2469 2470 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2471 if (btrfs_extent_inline_ref_type(leaf, iref) != 2472 BTRFS_EXTENT_DATA_REF_KEY) 2473 goto out; 2474 2475 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2476 if (btrfs_extent_refs(leaf, ei) != 2477 btrfs_extent_data_ref_count(leaf, ref) || 2478 btrfs_extent_data_ref_root(leaf, ref) != 2479 root->root_key.objectid || 2480 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2481 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2482 goto out; 2483 2484 ret = 0; 2485 out: 2486 return ret; 2487 } 2488 2489 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2490 struct btrfs_root *root, 2491 u64 objectid, u64 offset, u64 bytenr) 2492 { 2493 struct btrfs_path *path; 2494 int ret; 2495 int ret2; 2496 2497 path = btrfs_alloc_path(); 2498 if (!path) 2499 return -ENOENT; 2500 2501 do { 2502 ret = check_committed_ref(trans, root, path, objectid, 2503 offset, bytenr); 2504 if (ret && ret != -ENOENT) 2505 goto out; 2506 2507 ret2 = check_delayed_ref(trans, root, path, objectid, 2508 offset, bytenr); 2509 } while (ret2 == -EAGAIN); 2510 2511 if (ret2 && ret2 != -ENOENT) { 2512 ret = ret2; 2513 goto out; 2514 } 2515 2516 if (ret != -ENOENT || ret2 != -ENOENT) 2517 ret = 0; 2518 out: 2519 btrfs_free_path(path); 2520 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) 2521 WARN_ON(ret > 0); 2522 return ret; 2523 } 2524 2525 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2526 struct btrfs_root *root, 2527 struct extent_buffer *buf, 2528 int full_backref, int inc) 2529 { 2530 u64 bytenr; 2531 u64 num_bytes; 2532 u64 parent; 2533 u64 ref_root; 2534 u32 nritems; 2535 struct btrfs_key key; 2536 struct btrfs_file_extent_item *fi; 2537 int i; 2538 int level; 2539 int ret = 0; 2540 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2541 u64, u64, u64, u64, u64, u64); 2542 2543 ref_root = btrfs_header_owner(buf); 2544 nritems = btrfs_header_nritems(buf); 2545 level = btrfs_header_level(buf); 2546 2547 if (!root->ref_cows && level == 0) 2548 return 0; 2549 2550 if (inc) 2551 process_func = btrfs_inc_extent_ref; 2552 else 2553 process_func = btrfs_free_extent; 2554 2555 if (full_backref) 2556 parent = buf->start; 2557 else 2558 parent = 0; 2559 2560 for (i = 0; i < nritems; i++) { 2561 if (level == 0) { 2562 btrfs_item_key_to_cpu(buf, &key, i); 2563 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2564 continue; 2565 fi = btrfs_item_ptr(buf, i, 2566 struct btrfs_file_extent_item); 2567 if (btrfs_file_extent_type(buf, fi) == 2568 BTRFS_FILE_EXTENT_INLINE) 2569 continue; 2570 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2571 if (bytenr == 0) 2572 continue; 2573 2574 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2575 key.offset -= btrfs_file_extent_offset(buf, fi); 2576 ret = process_func(trans, root, bytenr, num_bytes, 2577 parent, ref_root, key.objectid, 2578 key.offset); 2579 if (ret) 2580 goto fail; 2581 } else { 2582 bytenr = btrfs_node_blockptr(buf, i); 2583 num_bytes = btrfs_level_size(root, level - 1); 2584 ret = process_func(trans, root, bytenr, num_bytes, 2585 parent, ref_root, level - 1, 0); 2586 if (ret) 2587 goto fail; 2588 } 2589 } 2590 return 0; 2591 fail: 2592 BUG(); 2593 return ret; 2594 } 2595 2596 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2597 struct extent_buffer *buf, int full_backref) 2598 { 2599 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2600 } 2601 2602 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2603 struct extent_buffer *buf, int full_backref) 2604 { 2605 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2606 } 2607 2608 static int write_one_cache_group(struct btrfs_trans_handle *trans, 2609 struct btrfs_root *root, 2610 struct btrfs_path *path, 2611 struct btrfs_block_group_cache *cache) 2612 { 2613 int ret; 2614 struct btrfs_root *extent_root = root->fs_info->extent_root; 2615 unsigned long bi; 2616 struct extent_buffer *leaf; 2617 2618 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2619 if (ret < 0) 2620 goto fail; 2621 BUG_ON(ret); 2622 2623 leaf = path->nodes[0]; 2624 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2625 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2626 btrfs_mark_buffer_dirty(leaf); 2627 btrfs_release_path(path); 2628 fail: 2629 if (ret) 2630 return ret; 2631 return 0; 2632 2633 } 2634 2635 static struct btrfs_block_group_cache * 2636 next_block_group(struct btrfs_root *root, 2637 struct btrfs_block_group_cache *cache) 2638 { 2639 struct rb_node *node; 2640 spin_lock(&root->fs_info->block_group_cache_lock); 2641 node = rb_next(&cache->cache_node); 2642 btrfs_put_block_group(cache); 2643 if (node) { 2644 cache = rb_entry(node, struct btrfs_block_group_cache, 2645 cache_node); 2646 btrfs_get_block_group(cache); 2647 } else 2648 cache = NULL; 2649 spin_unlock(&root->fs_info->block_group_cache_lock); 2650 return cache; 2651 } 2652 2653 static int cache_save_setup(struct btrfs_block_group_cache *block_group, 2654 struct btrfs_trans_handle *trans, 2655 struct btrfs_path *path) 2656 { 2657 struct btrfs_root *root = block_group->fs_info->tree_root; 2658 struct inode *inode = NULL; 2659 u64 alloc_hint = 0; 2660 int dcs = BTRFS_DC_ERROR; 2661 int num_pages = 0; 2662 int retries = 0; 2663 int ret = 0; 2664 2665 /* 2666 * If this block group is smaller than 100 megs don't bother caching the 2667 * block group. 2668 */ 2669 if (block_group->key.offset < (100 * 1024 * 1024)) { 2670 spin_lock(&block_group->lock); 2671 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 2672 spin_unlock(&block_group->lock); 2673 return 0; 2674 } 2675 2676 again: 2677 inode = lookup_free_space_inode(root, block_group, path); 2678 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 2679 ret = PTR_ERR(inode); 2680 btrfs_release_path(path); 2681 goto out; 2682 } 2683 2684 if (IS_ERR(inode)) { 2685 BUG_ON(retries); 2686 retries++; 2687 2688 if (block_group->ro) 2689 goto out_free; 2690 2691 ret = create_free_space_inode(root, trans, block_group, path); 2692 if (ret) 2693 goto out_free; 2694 goto again; 2695 } 2696 2697 /* 2698 * We want to set the generation to 0, that way if anything goes wrong 2699 * from here on out we know not to trust this cache when we load up next 2700 * time. 2701 */ 2702 BTRFS_I(inode)->generation = 0; 2703 ret = btrfs_update_inode(trans, root, inode); 2704 WARN_ON(ret); 2705 2706 if (i_size_read(inode) > 0) { 2707 ret = btrfs_truncate_free_space_cache(root, trans, path, 2708 inode); 2709 if (ret) 2710 goto out_put; 2711 } 2712 2713 spin_lock(&block_group->lock); 2714 if (block_group->cached != BTRFS_CACHE_FINISHED) { 2715 /* We're not cached, don't bother trying to write stuff out */ 2716 dcs = BTRFS_DC_WRITTEN; 2717 spin_unlock(&block_group->lock); 2718 goto out_put; 2719 } 2720 spin_unlock(&block_group->lock); 2721 2722 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); 2723 if (!num_pages) 2724 num_pages = 1; 2725 2726 /* 2727 * Just to make absolutely sure we have enough space, we're going to 2728 * preallocate 12 pages worth of space for each block group. In 2729 * practice we ought to use at most 8, but we need extra space so we can 2730 * add our header and have a terminator between the extents and the 2731 * bitmaps. 2732 */ 2733 num_pages *= 16; 2734 num_pages *= PAGE_CACHE_SIZE; 2735 2736 ret = btrfs_check_data_free_space(inode, num_pages); 2737 if (ret) 2738 goto out_put; 2739 2740 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, 2741 num_pages, num_pages, 2742 &alloc_hint); 2743 if (!ret) 2744 dcs = BTRFS_DC_SETUP; 2745 btrfs_free_reserved_data_space(inode, num_pages); 2746 out_put: 2747 iput(inode); 2748 out_free: 2749 btrfs_release_path(path); 2750 out: 2751 spin_lock(&block_group->lock); 2752 block_group->disk_cache_state = dcs; 2753 spin_unlock(&block_group->lock); 2754 2755 return ret; 2756 } 2757 2758 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2759 struct btrfs_root *root) 2760 { 2761 struct btrfs_block_group_cache *cache; 2762 int err = 0; 2763 struct btrfs_path *path; 2764 u64 last = 0; 2765 2766 path = btrfs_alloc_path(); 2767 if (!path) 2768 return -ENOMEM; 2769 2770 again: 2771 while (1) { 2772 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2773 while (cache) { 2774 if (cache->disk_cache_state == BTRFS_DC_CLEAR) 2775 break; 2776 cache = next_block_group(root, cache); 2777 } 2778 if (!cache) { 2779 if (last == 0) 2780 break; 2781 last = 0; 2782 continue; 2783 } 2784 err = cache_save_setup(cache, trans, path); 2785 last = cache->key.objectid + cache->key.offset; 2786 btrfs_put_block_group(cache); 2787 } 2788 2789 while (1) { 2790 if (last == 0) { 2791 err = btrfs_run_delayed_refs(trans, root, 2792 (unsigned long)-1); 2793 BUG_ON(err); 2794 } 2795 2796 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2797 while (cache) { 2798 if (cache->disk_cache_state == BTRFS_DC_CLEAR) { 2799 btrfs_put_block_group(cache); 2800 goto again; 2801 } 2802 2803 if (cache->dirty) 2804 break; 2805 cache = next_block_group(root, cache); 2806 } 2807 if (!cache) { 2808 if (last == 0) 2809 break; 2810 last = 0; 2811 continue; 2812 } 2813 2814 if (cache->disk_cache_state == BTRFS_DC_SETUP) 2815 cache->disk_cache_state = BTRFS_DC_NEED_WRITE; 2816 cache->dirty = 0; 2817 last = cache->key.objectid + cache->key.offset; 2818 2819 err = write_one_cache_group(trans, root, path, cache); 2820 BUG_ON(err); 2821 btrfs_put_block_group(cache); 2822 } 2823 2824 while (1) { 2825 /* 2826 * I don't think this is needed since we're just marking our 2827 * preallocated extent as written, but just in case it can't 2828 * hurt. 2829 */ 2830 if (last == 0) { 2831 err = btrfs_run_delayed_refs(trans, root, 2832 (unsigned long)-1); 2833 BUG_ON(err); 2834 } 2835 2836 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2837 while (cache) { 2838 /* 2839 * Really this shouldn't happen, but it could if we 2840 * couldn't write the entire preallocated extent and 2841 * splitting the extent resulted in a new block. 2842 */ 2843 if (cache->dirty) { 2844 btrfs_put_block_group(cache); 2845 goto again; 2846 } 2847 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 2848 break; 2849 cache = next_block_group(root, cache); 2850 } 2851 if (!cache) { 2852 if (last == 0) 2853 break; 2854 last = 0; 2855 continue; 2856 } 2857 2858 btrfs_write_out_cache(root, trans, cache, path); 2859 2860 /* 2861 * If we didn't have an error then the cache state is still 2862 * NEED_WRITE, so we can set it to WRITTEN. 2863 */ 2864 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 2865 cache->disk_cache_state = BTRFS_DC_WRITTEN; 2866 last = cache->key.objectid + cache->key.offset; 2867 btrfs_put_block_group(cache); 2868 } 2869 2870 btrfs_free_path(path); 2871 return 0; 2872 } 2873 2874 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2875 { 2876 struct btrfs_block_group_cache *block_group; 2877 int readonly = 0; 2878 2879 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 2880 if (!block_group || block_group->ro) 2881 readonly = 1; 2882 if (block_group) 2883 btrfs_put_block_group(block_group); 2884 return readonly; 2885 } 2886 2887 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 2888 u64 total_bytes, u64 bytes_used, 2889 struct btrfs_space_info **space_info) 2890 { 2891 struct btrfs_space_info *found; 2892 int i; 2893 int factor; 2894 2895 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | 2896 BTRFS_BLOCK_GROUP_RAID10)) 2897 factor = 2; 2898 else 2899 factor = 1; 2900 2901 found = __find_space_info(info, flags); 2902 if (found) { 2903 spin_lock(&found->lock); 2904 found->total_bytes += total_bytes; 2905 found->disk_total += total_bytes * factor; 2906 found->bytes_used += bytes_used; 2907 found->disk_used += bytes_used * factor; 2908 found->full = 0; 2909 spin_unlock(&found->lock); 2910 *space_info = found; 2911 return 0; 2912 } 2913 found = kzalloc(sizeof(*found), GFP_NOFS); 2914 if (!found) 2915 return -ENOMEM; 2916 2917 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) 2918 INIT_LIST_HEAD(&found->block_groups[i]); 2919 init_rwsem(&found->groups_sem); 2920 spin_lock_init(&found->lock); 2921 found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | 2922 BTRFS_BLOCK_GROUP_SYSTEM | 2923 BTRFS_BLOCK_GROUP_METADATA); 2924 found->total_bytes = total_bytes; 2925 found->disk_total = total_bytes * factor; 2926 found->bytes_used = bytes_used; 2927 found->disk_used = bytes_used * factor; 2928 found->bytes_pinned = 0; 2929 found->bytes_reserved = 0; 2930 found->bytes_readonly = 0; 2931 found->bytes_may_use = 0; 2932 found->full = 0; 2933 found->force_alloc = CHUNK_ALLOC_NO_FORCE; 2934 found->chunk_alloc = 0; 2935 *space_info = found; 2936 list_add_rcu(&found->list, &info->space_info); 2937 atomic_set(&found->caching_threads, 0); 2938 return 0; 2939 } 2940 2941 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 2942 { 2943 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 2944 BTRFS_BLOCK_GROUP_RAID1 | 2945 BTRFS_BLOCK_GROUP_RAID10 | 2946 BTRFS_BLOCK_GROUP_DUP); 2947 if (extra_flags) { 2948 if (flags & BTRFS_BLOCK_GROUP_DATA) 2949 fs_info->avail_data_alloc_bits |= extra_flags; 2950 if (flags & BTRFS_BLOCK_GROUP_METADATA) 2951 fs_info->avail_metadata_alloc_bits |= extra_flags; 2952 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2953 fs_info->avail_system_alloc_bits |= extra_flags; 2954 } 2955 } 2956 2957 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2958 { 2959 /* 2960 * we add in the count of missing devices because we want 2961 * to make sure that any RAID levels on a degraded FS 2962 * continue to be honored. 2963 */ 2964 u64 num_devices = root->fs_info->fs_devices->rw_devices + 2965 root->fs_info->fs_devices->missing_devices; 2966 2967 if (num_devices == 1) 2968 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2969 if (num_devices < 4) 2970 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 2971 2972 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 2973 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2974 BTRFS_BLOCK_GROUP_RAID10))) { 2975 flags &= ~BTRFS_BLOCK_GROUP_DUP; 2976 } 2977 2978 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 2979 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 2980 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 2981 } 2982 2983 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 2984 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 2985 (flags & BTRFS_BLOCK_GROUP_RAID10) | 2986 (flags & BTRFS_BLOCK_GROUP_DUP))) 2987 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 2988 return flags; 2989 } 2990 2991 static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) 2992 { 2993 if (flags & BTRFS_BLOCK_GROUP_DATA) 2994 flags |= root->fs_info->avail_data_alloc_bits & 2995 root->fs_info->data_alloc_profile; 2996 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2997 flags |= root->fs_info->avail_system_alloc_bits & 2998 root->fs_info->system_alloc_profile; 2999 else if (flags & BTRFS_BLOCK_GROUP_METADATA) 3000 flags |= root->fs_info->avail_metadata_alloc_bits & 3001 root->fs_info->metadata_alloc_profile; 3002 return btrfs_reduce_alloc_profile(root, flags); 3003 } 3004 3005 u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) 3006 { 3007 u64 flags; 3008 3009 if (data) 3010 flags = BTRFS_BLOCK_GROUP_DATA; 3011 else if (root == root->fs_info->chunk_root) 3012 flags = BTRFS_BLOCK_GROUP_SYSTEM; 3013 else 3014 flags = BTRFS_BLOCK_GROUP_METADATA; 3015 3016 return get_alloc_profile(root, flags); 3017 } 3018 3019 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 3020 { 3021 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 3022 BTRFS_BLOCK_GROUP_DATA); 3023 } 3024 3025 /* 3026 * This will check the space that the inode allocates from to make sure we have 3027 * enough space for bytes. 3028 */ 3029 int btrfs_check_data_free_space(struct inode *inode, u64 bytes) 3030 { 3031 struct btrfs_space_info *data_sinfo; 3032 struct btrfs_root *root = BTRFS_I(inode)->root; 3033 u64 used; 3034 int ret = 0, committed = 0, alloc_chunk = 1; 3035 3036 /* make sure bytes are sectorsize aligned */ 3037 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3038 3039 if (root == root->fs_info->tree_root || 3040 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { 3041 alloc_chunk = 0; 3042 committed = 1; 3043 } 3044 3045 data_sinfo = BTRFS_I(inode)->space_info; 3046 if (!data_sinfo) 3047 goto alloc; 3048 3049 again: 3050 /* make sure we have enough space to handle the data first */ 3051 spin_lock(&data_sinfo->lock); 3052 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved + 3053 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly + 3054 data_sinfo->bytes_may_use; 3055 3056 if (used + bytes > data_sinfo->total_bytes) { 3057 struct btrfs_trans_handle *trans; 3058 3059 /* 3060 * if we don't have enough free bytes in this space then we need 3061 * to alloc a new chunk. 3062 */ 3063 if (!data_sinfo->full && alloc_chunk) { 3064 u64 alloc_target; 3065 3066 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; 3067 spin_unlock(&data_sinfo->lock); 3068 alloc: 3069 alloc_target = btrfs_get_alloc_profile(root, 1); 3070 trans = btrfs_join_transaction(root); 3071 if (IS_ERR(trans)) 3072 return PTR_ERR(trans); 3073 3074 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3075 bytes + 2 * 1024 * 1024, 3076 alloc_target, 3077 CHUNK_ALLOC_NO_FORCE); 3078 btrfs_end_transaction(trans, root); 3079 if (ret < 0) { 3080 if (ret != -ENOSPC) 3081 return ret; 3082 else 3083 goto commit_trans; 3084 } 3085 3086 if (!data_sinfo) { 3087 btrfs_set_inode_space_info(root, inode); 3088 data_sinfo = BTRFS_I(inode)->space_info; 3089 } 3090 goto again; 3091 } 3092 spin_unlock(&data_sinfo->lock); 3093 3094 /* commit the current transaction and try again */ 3095 commit_trans: 3096 if (!committed && 3097 !atomic_read(&root->fs_info->open_ioctl_trans)) { 3098 committed = 1; 3099 trans = btrfs_join_transaction(root); 3100 if (IS_ERR(trans)) 3101 return PTR_ERR(trans); 3102 ret = btrfs_commit_transaction(trans, root); 3103 if (ret) 3104 return ret; 3105 goto again; 3106 } 3107 3108 return -ENOSPC; 3109 } 3110 data_sinfo->bytes_may_use += bytes; 3111 BTRFS_I(inode)->reserved_bytes += bytes; 3112 spin_unlock(&data_sinfo->lock); 3113 3114 return 0; 3115 } 3116 3117 /* 3118 * called when we are clearing an delalloc extent from the 3119 * inode's io_tree or there was an error for whatever reason 3120 * after calling btrfs_check_data_free_space 3121 */ 3122 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) 3123 { 3124 struct btrfs_root *root = BTRFS_I(inode)->root; 3125 struct btrfs_space_info *data_sinfo; 3126 3127 /* make sure bytes are sectorsize aligned */ 3128 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3129 3130 data_sinfo = BTRFS_I(inode)->space_info; 3131 spin_lock(&data_sinfo->lock); 3132 data_sinfo->bytes_may_use -= bytes; 3133 BTRFS_I(inode)->reserved_bytes -= bytes; 3134 spin_unlock(&data_sinfo->lock); 3135 } 3136 3137 static void force_metadata_allocation(struct btrfs_fs_info *info) 3138 { 3139 struct list_head *head = &info->space_info; 3140 struct btrfs_space_info *found; 3141 3142 rcu_read_lock(); 3143 list_for_each_entry_rcu(found, head, list) { 3144 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3145 found->force_alloc = CHUNK_ALLOC_FORCE; 3146 } 3147 rcu_read_unlock(); 3148 } 3149 3150 static int should_alloc_chunk(struct btrfs_root *root, 3151 struct btrfs_space_info *sinfo, u64 alloc_bytes, 3152 int force) 3153 { 3154 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3155 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; 3156 u64 thresh; 3157 3158 if (force == CHUNK_ALLOC_FORCE) 3159 return 1; 3160 3161 /* 3162 * in limited mode, we want to have some free space up to 3163 * about 1% of the FS size. 3164 */ 3165 if (force == CHUNK_ALLOC_LIMITED) { 3166 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3167 thresh = max_t(u64, 64 * 1024 * 1024, 3168 div_factor_fine(thresh, 1)); 3169 3170 if (num_bytes - num_allocated < thresh) 3171 return 1; 3172 } 3173 3174 /* 3175 * we have two similar checks here, one based on percentage 3176 * and once based on a hard number of 256MB. The idea 3177 * is that if we have a good amount of free 3178 * room, don't allocate a chunk. A good mount is 3179 * less than 80% utilized of the chunks we have allocated, 3180 * or more than 256MB free 3181 */ 3182 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3183 return 0; 3184 3185 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) 3186 return 0; 3187 3188 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3189 3190 /* 256MB or 5% of the FS */ 3191 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3192 3193 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3194 return 0; 3195 return 1; 3196 } 3197 3198 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 3199 struct btrfs_root *extent_root, u64 alloc_bytes, 3200 u64 flags, int force) 3201 { 3202 struct btrfs_space_info *space_info; 3203 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3204 int wait_for_alloc = 0; 3205 int ret = 0; 3206 3207 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3208 3209 space_info = __find_space_info(extent_root->fs_info, flags); 3210 if (!space_info) { 3211 ret = update_space_info(extent_root->fs_info, flags, 3212 0, 0, &space_info); 3213 BUG_ON(ret); 3214 } 3215 BUG_ON(!space_info); 3216 3217 again: 3218 spin_lock(&space_info->lock); 3219 if (space_info->force_alloc) 3220 force = space_info->force_alloc; 3221 if (space_info->full) { 3222 spin_unlock(&space_info->lock); 3223 return 0; 3224 } 3225 3226 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { 3227 spin_unlock(&space_info->lock); 3228 return 0; 3229 } else if (space_info->chunk_alloc) { 3230 wait_for_alloc = 1; 3231 } else { 3232 space_info->chunk_alloc = 1; 3233 } 3234 3235 spin_unlock(&space_info->lock); 3236 3237 mutex_lock(&fs_info->chunk_mutex); 3238 3239 /* 3240 * The chunk_mutex is held throughout the entirety of a chunk 3241 * allocation, so once we've acquired the chunk_mutex we know that the 3242 * other guy is done and we need to recheck and see if we should 3243 * allocate. 3244 */ 3245 if (wait_for_alloc) { 3246 mutex_unlock(&fs_info->chunk_mutex); 3247 wait_for_alloc = 0; 3248 goto again; 3249 } 3250 3251 /* 3252 * If we have mixed data/metadata chunks we want to make sure we keep 3253 * allocating mixed chunks instead of individual chunks. 3254 */ 3255 if (btrfs_mixed_space_info(space_info)) 3256 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); 3257 3258 /* 3259 * if we're doing a data chunk, go ahead and make sure that 3260 * we keep a reasonable number of metadata chunks allocated in the 3261 * FS as well. 3262 */ 3263 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { 3264 fs_info->data_chunk_allocations++; 3265 if (!(fs_info->data_chunk_allocations % 3266 fs_info->metadata_ratio)) 3267 force_metadata_allocation(fs_info); 3268 } 3269 3270 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3271 spin_lock(&space_info->lock); 3272 if (ret) 3273 space_info->full = 1; 3274 else 3275 ret = 1; 3276 3277 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; 3278 space_info->chunk_alloc = 0; 3279 spin_unlock(&space_info->lock); 3280 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3281 return ret; 3282 } 3283 3284 /* 3285 * shrink metadata reservation for delalloc 3286 */ 3287 static int shrink_delalloc(struct btrfs_trans_handle *trans, 3288 struct btrfs_root *root, u64 to_reclaim, int sync) 3289 { 3290 struct btrfs_block_rsv *block_rsv; 3291 struct btrfs_space_info *space_info; 3292 u64 reserved; 3293 u64 max_reclaim; 3294 u64 reclaimed = 0; 3295 long time_left; 3296 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3297 int loops = 0; 3298 unsigned long progress; 3299 3300 block_rsv = &root->fs_info->delalloc_block_rsv; 3301 space_info = block_rsv->space_info; 3302 3303 smp_mb(); 3304 reserved = space_info->bytes_reserved; 3305 progress = space_info->reservation_progress; 3306 3307 if (reserved == 0) 3308 return 0; 3309 3310 /* nothing to shrink - nothing to reclaim */ 3311 if (root->fs_info->delalloc_bytes == 0) 3312 return 0; 3313 3314 max_reclaim = min(reserved, to_reclaim); 3315 3316 while (loops < 1024) { 3317 /* have the flusher threads jump in and do some IO */ 3318 smp_mb(); 3319 nr_pages = min_t(unsigned long, nr_pages, 3320 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); 3321 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3322 3323 spin_lock(&space_info->lock); 3324 if (reserved > space_info->bytes_reserved) 3325 reclaimed += reserved - space_info->bytes_reserved; 3326 reserved = space_info->bytes_reserved; 3327 spin_unlock(&space_info->lock); 3328 3329 loops++; 3330 3331 if (reserved == 0 || reclaimed >= max_reclaim) 3332 break; 3333 3334 if (trans && trans->transaction->blocked) 3335 return -EAGAIN; 3336 3337 time_left = schedule_timeout_interruptible(1); 3338 3339 /* We were interrupted, exit */ 3340 if (time_left) 3341 break; 3342 3343 /* we've kicked the IO a few times, if anything has been freed, 3344 * exit. There is no sense in looping here for a long time 3345 * when we really need to commit the transaction, or there are 3346 * just too many writers without enough free space 3347 */ 3348 3349 if (loops > 3) { 3350 smp_mb(); 3351 if (progress != space_info->reservation_progress) 3352 break; 3353 } 3354 3355 } 3356 return reclaimed >= to_reclaim; 3357 } 3358 3359 /* 3360 * Retries tells us how many times we've called reserve_metadata_bytes. The 3361 * idea is if this is the first call (retries == 0) then we will add to our 3362 * reserved count if we can't make the allocation in order to hold our place 3363 * while we go and try and free up space. That way for retries > 1 we don't try 3364 * and add space, we just check to see if the amount of unused space is >= the 3365 * total space, meaning that our reservation is valid. 3366 * 3367 * However if we don't intend to retry this reservation, pass -1 as retries so 3368 * that it short circuits this logic. 3369 */ 3370 static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, 3371 struct btrfs_root *root, 3372 struct btrfs_block_rsv *block_rsv, 3373 u64 orig_bytes, int flush) 3374 { 3375 struct btrfs_space_info *space_info = block_rsv->space_info; 3376 u64 unused; 3377 u64 num_bytes = orig_bytes; 3378 int retries = 0; 3379 int ret = 0; 3380 bool reserved = false; 3381 bool committed = false; 3382 3383 again: 3384 ret = -ENOSPC; 3385 if (reserved) 3386 num_bytes = 0; 3387 3388 spin_lock(&space_info->lock); 3389 unused = space_info->bytes_used + space_info->bytes_reserved + 3390 space_info->bytes_pinned + space_info->bytes_readonly + 3391 space_info->bytes_may_use; 3392 3393 /* 3394 * The idea here is that we've not already over-reserved the block group 3395 * then we can go ahead and save our reservation first and then start 3396 * flushing if we need to. Otherwise if we've already overcommitted 3397 * lets start flushing stuff first and then come back and try to make 3398 * our reservation. 3399 */ 3400 if (unused <= space_info->total_bytes) { 3401 unused = space_info->total_bytes - unused; 3402 if (unused >= num_bytes) { 3403 if (!reserved) 3404 space_info->bytes_reserved += orig_bytes; 3405 ret = 0; 3406 } else { 3407 /* 3408 * Ok set num_bytes to orig_bytes since we aren't 3409 * overocmmitted, this way we only try and reclaim what 3410 * we need. 3411 */ 3412 num_bytes = orig_bytes; 3413 } 3414 } else { 3415 /* 3416 * Ok we're over committed, set num_bytes to the overcommitted 3417 * amount plus the amount of bytes that we need for this 3418 * reservation. 3419 */ 3420 num_bytes = unused - space_info->total_bytes + 3421 (orig_bytes * (retries + 1)); 3422 } 3423 3424 /* 3425 * Couldn't make our reservation, save our place so while we're trying 3426 * to reclaim space we can actually use it instead of somebody else 3427 * stealing it from us. 3428 */ 3429 if (ret && !reserved) { 3430 space_info->bytes_reserved += orig_bytes; 3431 reserved = true; 3432 } 3433 3434 spin_unlock(&space_info->lock); 3435 3436 if (!ret) 3437 return 0; 3438 3439 if (!flush) 3440 goto out; 3441 3442 /* 3443 * We do synchronous shrinking since we don't actually unreserve 3444 * metadata until after the IO is completed. 3445 */ 3446 ret = shrink_delalloc(trans, root, num_bytes, 1); 3447 if (ret > 0) 3448 return 0; 3449 else if (ret < 0) 3450 goto out; 3451 3452 /* 3453 * So if we were overcommitted it's possible that somebody else flushed 3454 * out enough space and we simply didn't have enough space to reclaim, 3455 * so go back around and try again. 3456 */ 3457 if (retries < 2) { 3458 retries++; 3459 goto again; 3460 } 3461 3462 spin_lock(&space_info->lock); 3463 /* 3464 * Not enough space to be reclaimed, don't bother committing the 3465 * transaction. 3466 */ 3467 if (space_info->bytes_pinned < orig_bytes) 3468 ret = -ENOSPC; 3469 spin_unlock(&space_info->lock); 3470 if (ret) 3471 goto out; 3472 3473 ret = -EAGAIN; 3474 if (trans || committed) 3475 goto out; 3476 3477 ret = -ENOSPC; 3478 trans = btrfs_join_transaction(root); 3479 if (IS_ERR(trans)) 3480 goto out; 3481 ret = btrfs_commit_transaction(trans, root); 3482 if (!ret) { 3483 trans = NULL; 3484 committed = true; 3485 goto again; 3486 } 3487 3488 out: 3489 if (reserved) { 3490 spin_lock(&space_info->lock); 3491 space_info->bytes_reserved -= orig_bytes; 3492 spin_unlock(&space_info->lock); 3493 } 3494 3495 return ret; 3496 } 3497 3498 static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, 3499 struct btrfs_root *root) 3500 { 3501 struct btrfs_block_rsv *block_rsv; 3502 if (root->ref_cows) 3503 block_rsv = trans->block_rsv; 3504 else 3505 block_rsv = root->block_rsv; 3506 3507 if (!block_rsv) 3508 block_rsv = &root->fs_info->empty_block_rsv; 3509 3510 return block_rsv; 3511 } 3512 3513 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 3514 u64 num_bytes) 3515 { 3516 int ret = -ENOSPC; 3517 spin_lock(&block_rsv->lock); 3518 if (block_rsv->reserved >= num_bytes) { 3519 block_rsv->reserved -= num_bytes; 3520 if (block_rsv->reserved < block_rsv->size) 3521 block_rsv->full = 0; 3522 ret = 0; 3523 } 3524 spin_unlock(&block_rsv->lock); 3525 return ret; 3526 } 3527 3528 static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, 3529 u64 num_bytes, int update_size) 3530 { 3531 spin_lock(&block_rsv->lock); 3532 block_rsv->reserved += num_bytes; 3533 if (update_size) 3534 block_rsv->size += num_bytes; 3535 else if (block_rsv->reserved >= block_rsv->size) 3536 block_rsv->full = 1; 3537 spin_unlock(&block_rsv->lock); 3538 } 3539 3540 static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, 3541 struct btrfs_block_rsv *dest, u64 num_bytes) 3542 { 3543 struct btrfs_space_info *space_info = block_rsv->space_info; 3544 3545 spin_lock(&block_rsv->lock); 3546 if (num_bytes == (u64)-1) 3547 num_bytes = block_rsv->size; 3548 block_rsv->size -= num_bytes; 3549 if (block_rsv->reserved >= block_rsv->size) { 3550 num_bytes = block_rsv->reserved - block_rsv->size; 3551 block_rsv->reserved = block_rsv->size; 3552 block_rsv->full = 1; 3553 } else { 3554 num_bytes = 0; 3555 } 3556 spin_unlock(&block_rsv->lock); 3557 3558 if (num_bytes > 0) { 3559 if (dest) { 3560 spin_lock(&dest->lock); 3561 if (!dest->full) { 3562 u64 bytes_to_add; 3563 3564 bytes_to_add = dest->size - dest->reserved; 3565 bytes_to_add = min(num_bytes, bytes_to_add); 3566 dest->reserved += bytes_to_add; 3567 if (dest->reserved >= dest->size) 3568 dest->full = 1; 3569 num_bytes -= bytes_to_add; 3570 } 3571 spin_unlock(&dest->lock); 3572 } 3573 if (num_bytes) { 3574 spin_lock(&space_info->lock); 3575 space_info->bytes_reserved -= num_bytes; 3576 space_info->reservation_progress++; 3577 spin_unlock(&space_info->lock); 3578 } 3579 } 3580 } 3581 3582 static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, 3583 struct btrfs_block_rsv *dst, u64 num_bytes) 3584 { 3585 int ret; 3586 3587 ret = block_rsv_use_bytes(src, num_bytes); 3588 if (ret) 3589 return ret; 3590 3591 block_rsv_add_bytes(dst, num_bytes, 1); 3592 return 0; 3593 } 3594 3595 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) 3596 { 3597 memset(rsv, 0, sizeof(*rsv)); 3598 spin_lock_init(&rsv->lock); 3599 atomic_set(&rsv->usage, 1); 3600 rsv->priority = 6; 3601 INIT_LIST_HEAD(&rsv->list); 3602 } 3603 3604 struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) 3605 { 3606 struct btrfs_block_rsv *block_rsv; 3607 struct btrfs_fs_info *fs_info = root->fs_info; 3608 3609 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); 3610 if (!block_rsv) 3611 return NULL; 3612 3613 btrfs_init_block_rsv(block_rsv); 3614 block_rsv->space_info = __find_space_info(fs_info, 3615 BTRFS_BLOCK_GROUP_METADATA); 3616 return block_rsv; 3617 } 3618 3619 void btrfs_free_block_rsv(struct btrfs_root *root, 3620 struct btrfs_block_rsv *rsv) 3621 { 3622 if (rsv && atomic_dec_and_test(&rsv->usage)) { 3623 btrfs_block_rsv_release(root, rsv, (u64)-1); 3624 if (!rsv->durable) 3625 kfree(rsv); 3626 } 3627 } 3628 3629 /* 3630 * make the block_rsv struct be able to capture freed space. 3631 * the captured space will re-add to the the block_rsv struct 3632 * after transaction commit 3633 */ 3634 void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, 3635 struct btrfs_block_rsv *block_rsv) 3636 { 3637 block_rsv->durable = 1; 3638 mutex_lock(&fs_info->durable_block_rsv_mutex); 3639 list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); 3640 mutex_unlock(&fs_info->durable_block_rsv_mutex); 3641 } 3642 3643 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3644 struct btrfs_root *root, 3645 struct btrfs_block_rsv *block_rsv, 3646 u64 num_bytes) 3647 { 3648 int ret; 3649 3650 if (num_bytes == 0) 3651 return 0; 3652 3653 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); 3654 if (!ret) { 3655 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3656 return 0; 3657 } 3658 3659 return ret; 3660 } 3661 3662 int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, 3663 struct btrfs_root *root, 3664 struct btrfs_block_rsv *block_rsv, 3665 u64 min_reserved, int min_factor) 3666 { 3667 u64 num_bytes = 0; 3668 int commit_trans = 0; 3669 int ret = -ENOSPC; 3670 3671 if (!block_rsv) 3672 return 0; 3673 3674 spin_lock(&block_rsv->lock); 3675 if (min_factor > 0) 3676 num_bytes = div_factor(block_rsv->size, min_factor); 3677 if (min_reserved > num_bytes) 3678 num_bytes = min_reserved; 3679 3680 if (block_rsv->reserved >= num_bytes) { 3681 ret = 0; 3682 } else { 3683 num_bytes -= block_rsv->reserved; 3684 if (block_rsv->durable && 3685 block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) 3686 commit_trans = 1; 3687 } 3688 spin_unlock(&block_rsv->lock); 3689 if (!ret) 3690 return 0; 3691 3692 if (block_rsv->refill_used) { 3693 ret = reserve_metadata_bytes(trans, root, block_rsv, 3694 num_bytes, 0); 3695 if (!ret) { 3696 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3697 return 0; 3698 } 3699 } 3700 3701 if (commit_trans) { 3702 if (trans) 3703 return -EAGAIN; 3704 3705 trans = btrfs_join_transaction(root); 3706 BUG_ON(IS_ERR(trans)); 3707 ret = btrfs_commit_transaction(trans, root); 3708 return 0; 3709 } 3710 3711 return -ENOSPC; 3712 } 3713 3714 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, 3715 struct btrfs_block_rsv *dst_rsv, 3716 u64 num_bytes) 3717 { 3718 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3719 } 3720 3721 void btrfs_block_rsv_release(struct btrfs_root *root, 3722 struct btrfs_block_rsv *block_rsv, 3723 u64 num_bytes) 3724 { 3725 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 3726 if (global_rsv->full || global_rsv == block_rsv || 3727 block_rsv->space_info != global_rsv->space_info) 3728 global_rsv = NULL; 3729 block_rsv_release_bytes(block_rsv, global_rsv, num_bytes); 3730 } 3731 3732 /* 3733 * helper to calculate size of global block reservation. 3734 * the desired value is sum of space used by extent tree, 3735 * checksum tree and root tree 3736 */ 3737 static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) 3738 { 3739 struct btrfs_space_info *sinfo; 3740 u64 num_bytes; 3741 u64 meta_used; 3742 u64 data_used; 3743 int csum_size = btrfs_super_csum_size(&fs_info->super_copy); 3744 3745 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 3746 spin_lock(&sinfo->lock); 3747 data_used = sinfo->bytes_used; 3748 spin_unlock(&sinfo->lock); 3749 3750 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3751 spin_lock(&sinfo->lock); 3752 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) 3753 data_used = 0; 3754 meta_used = sinfo->bytes_used; 3755 spin_unlock(&sinfo->lock); 3756 3757 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * 3758 csum_size * 2; 3759 num_bytes += div64_u64(data_used + meta_used, 50); 3760 3761 if (num_bytes * 3 > meta_used) 3762 num_bytes = div64_u64(meta_used, 3); 3763 3764 return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); 3765 } 3766 3767 static void update_global_block_rsv(struct btrfs_fs_info *fs_info) 3768 { 3769 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; 3770 struct btrfs_space_info *sinfo = block_rsv->space_info; 3771 u64 num_bytes; 3772 3773 num_bytes = calc_global_metadata_size(fs_info); 3774 3775 spin_lock(&block_rsv->lock); 3776 spin_lock(&sinfo->lock); 3777 3778 block_rsv->size = num_bytes; 3779 3780 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 3781 sinfo->bytes_reserved + sinfo->bytes_readonly + 3782 sinfo->bytes_may_use; 3783 3784 if (sinfo->total_bytes > num_bytes) { 3785 num_bytes = sinfo->total_bytes - num_bytes; 3786 block_rsv->reserved += num_bytes; 3787 sinfo->bytes_reserved += num_bytes; 3788 } 3789 3790 if (block_rsv->reserved >= block_rsv->size) { 3791 num_bytes = block_rsv->reserved - block_rsv->size; 3792 sinfo->bytes_reserved -= num_bytes; 3793 sinfo->reservation_progress++; 3794 block_rsv->reserved = block_rsv->size; 3795 block_rsv->full = 1; 3796 } 3797 3798 spin_unlock(&sinfo->lock); 3799 spin_unlock(&block_rsv->lock); 3800 } 3801 3802 static void init_global_block_rsv(struct btrfs_fs_info *fs_info) 3803 { 3804 struct btrfs_space_info *space_info; 3805 3806 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); 3807 fs_info->chunk_block_rsv.space_info = space_info; 3808 fs_info->chunk_block_rsv.priority = 10; 3809 3810 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3811 fs_info->global_block_rsv.space_info = space_info; 3812 fs_info->global_block_rsv.priority = 10; 3813 fs_info->global_block_rsv.refill_used = 1; 3814 fs_info->delalloc_block_rsv.space_info = space_info; 3815 fs_info->trans_block_rsv.space_info = space_info; 3816 fs_info->empty_block_rsv.space_info = space_info; 3817 fs_info->empty_block_rsv.priority = 10; 3818 3819 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; 3820 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; 3821 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; 3822 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; 3823 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; 3824 3825 btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); 3826 3827 btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); 3828 3829 update_global_block_rsv(fs_info); 3830 } 3831 3832 static void release_global_block_rsv(struct btrfs_fs_info *fs_info) 3833 { 3834 block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1); 3835 WARN_ON(fs_info->delalloc_block_rsv.size > 0); 3836 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); 3837 WARN_ON(fs_info->trans_block_rsv.size > 0); 3838 WARN_ON(fs_info->trans_block_rsv.reserved > 0); 3839 WARN_ON(fs_info->chunk_block_rsv.size > 0); 3840 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3841 } 3842 3843 int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, 3844 struct btrfs_root *root, 3845 struct btrfs_block_rsv *rsv) 3846 { 3847 struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; 3848 u64 num_bytes; 3849 int ret; 3850 3851 /* 3852 * Truncate should be freeing data, but give us 2 items just in case it 3853 * needs to use some space. We may want to be smarter about this in the 3854 * future. 3855 */ 3856 num_bytes = btrfs_calc_trans_metadata_size(root, 2); 3857 3858 /* We already have enough bytes, just return */ 3859 if (rsv->reserved >= num_bytes) 3860 return 0; 3861 3862 num_bytes -= rsv->reserved; 3863 3864 /* 3865 * You should have reserved enough space before hand to do this, so this 3866 * should not fail. 3867 */ 3868 ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); 3869 BUG_ON(ret); 3870 3871 return 0; 3872 } 3873 3874 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3875 struct btrfs_root *root, 3876 int num_items) 3877 { 3878 u64 num_bytes; 3879 int ret; 3880 3881 if (num_items == 0 || root->fs_info->chunk_root == root) 3882 return 0; 3883 3884 num_bytes = btrfs_calc_trans_metadata_size(root, num_items); 3885 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3886 num_bytes); 3887 if (!ret) { 3888 trans->bytes_reserved += num_bytes; 3889 trans->block_rsv = &root->fs_info->trans_block_rsv; 3890 } 3891 return ret; 3892 } 3893 3894 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 3895 struct btrfs_root *root) 3896 { 3897 if (!trans->bytes_reserved) 3898 return; 3899 3900 BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); 3901 btrfs_block_rsv_release(root, trans->block_rsv, 3902 trans->bytes_reserved); 3903 trans->bytes_reserved = 0; 3904 } 3905 3906 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, 3907 struct inode *inode) 3908 { 3909 struct btrfs_root *root = BTRFS_I(inode)->root; 3910 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); 3911 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; 3912 3913 /* 3914 * We need to hold space in order to delete our orphan item once we've 3915 * added it, so this takes the reservation so we can release it later 3916 * when we are truly done with the orphan item. 3917 */ 3918 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 3919 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3920 } 3921 3922 void btrfs_orphan_release_metadata(struct inode *inode) 3923 { 3924 struct btrfs_root *root = BTRFS_I(inode)->root; 3925 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 3926 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3927 } 3928 3929 int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, 3930 struct btrfs_pending_snapshot *pending) 3931 { 3932 struct btrfs_root *root = pending->root; 3933 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); 3934 struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; 3935 /* 3936 * two for root back/forward refs, two for directory entries 3937 * and one for root of the snapshot. 3938 */ 3939 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); 3940 dst_rsv->space_info = src_rsv->space_info; 3941 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3942 } 3943 3944 static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) 3945 { 3946 return num_bytes >>= 3; 3947 } 3948 3949 int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) 3950 { 3951 struct btrfs_root *root = BTRFS_I(inode)->root; 3952 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3953 u64 to_reserve; 3954 int nr_extents; 3955 int reserved_extents; 3956 int ret; 3957 3958 if (btrfs_transaction_in_commit(root->fs_info)) 3959 schedule_timeout(1); 3960 3961 num_bytes = ALIGN(num_bytes, root->sectorsize); 3962 3963 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3964 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); 3965 3966 if (nr_extents > reserved_extents) { 3967 nr_extents -= reserved_extents; 3968 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 3969 } else { 3970 nr_extents = 0; 3971 to_reserve = 0; 3972 } 3973 3974 to_reserve += calc_csum_metadata_size(inode, num_bytes); 3975 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 3976 if (ret) 3977 return ret; 3978 3979 atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); 3980 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 3981 3982 block_rsv_add_bytes(block_rsv, to_reserve, 1); 3983 3984 if (block_rsv->size > 512 * 1024 * 1024) 3985 shrink_delalloc(NULL, root, to_reserve, 0); 3986 3987 return 0; 3988 } 3989 3990 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) 3991 { 3992 struct btrfs_root *root = BTRFS_I(inode)->root; 3993 u64 to_free; 3994 int nr_extents; 3995 int reserved_extents; 3996 3997 num_bytes = ALIGN(num_bytes, root->sectorsize); 3998 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 3999 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); 4000 4001 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); 4002 do { 4003 int old, new; 4004 4005 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); 4006 if (nr_extents >= reserved_extents) { 4007 nr_extents = 0; 4008 break; 4009 } 4010 old = reserved_extents; 4011 nr_extents = reserved_extents - nr_extents; 4012 new = reserved_extents - nr_extents; 4013 old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, 4014 reserved_extents, new); 4015 if (likely(old == reserved_extents)) 4016 break; 4017 reserved_extents = old; 4018 } while (1); 4019 4020 to_free = calc_csum_metadata_size(inode, num_bytes); 4021 if (nr_extents > 0) 4022 to_free += btrfs_calc_trans_metadata_size(root, nr_extents); 4023 4024 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4025 to_free); 4026 } 4027 4028 int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) 4029 { 4030 int ret; 4031 4032 ret = btrfs_check_data_free_space(inode, num_bytes); 4033 if (ret) 4034 return ret; 4035 4036 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes); 4037 if (ret) { 4038 btrfs_free_reserved_data_space(inode, num_bytes); 4039 return ret; 4040 } 4041 4042 return 0; 4043 } 4044 4045 void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) 4046 { 4047 btrfs_delalloc_release_metadata(inode, num_bytes); 4048 btrfs_free_reserved_data_space(inode, num_bytes); 4049 } 4050 4051 static int update_block_group(struct btrfs_trans_handle *trans, 4052 struct btrfs_root *root, 4053 u64 bytenr, u64 num_bytes, int alloc) 4054 { 4055 struct btrfs_block_group_cache *cache = NULL; 4056 struct btrfs_fs_info *info = root->fs_info; 4057 u64 total = num_bytes; 4058 u64 old_val; 4059 u64 byte_in_group; 4060 int factor; 4061 4062 /* block accounting for super block */ 4063 spin_lock(&info->delalloc_lock); 4064 old_val = btrfs_super_bytes_used(&info->super_copy); 4065 if (alloc) 4066 old_val += num_bytes; 4067 else 4068 old_val -= num_bytes; 4069 btrfs_set_super_bytes_used(&info->super_copy, old_val); 4070 spin_unlock(&info->delalloc_lock); 4071 4072 while (total) { 4073 cache = btrfs_lookup_block_group(info, bytenr); 4074 if (!cache) 4075 return -1; 4076 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | 4077 BTRFS_BLOCK_GROUP_RAID1 | 4078 BTRFS_BLOCK_GROUP_RAID10)) 4079 factor = 2; 4080 else 4081 factor = 1; 4082 /* 4083 * If this block group has free space cache written out, we 4084 * need to make sure to load it if we are removing space. This 4085 * is because we need the unpinning stage to actually add the 4086 * space back to the block group, otherwise we will leak space. 4087 */ 4088 if (!alloc && cache->cached == BTRFS_CACHE_NO) 4089 cache_block_group(cache, trans, NULL, 1); 4090 4091 byte_in_group = bytenr - cache->key.objectid; 4092 WARN_ON(byte_in_group > cache->key.offset); 4093 4094 spin_lock(&cache->space_info->lock); 4095 spin_lock(&cache->lock); 4096 4097 if (btrfs_super_cache_generation(&info->super_copy) != 0 && 4098 cache->disk_cache_state < BTRFS_DC_CLEAR) 4099 cache->disk_cache_state = BTRFS_DC_CLEAR; 4100 4101 cache->dirty = 1; 4102 old_val = btrfs_block_group_used(&cache->item); 4103 num_bytes = min(total, cache->key.offset - byte_in_group); 4104 if (alloc) { 4105 old_val += num_bytes; 4106 btrfs_set_block_group_used(&cache->item, old_val); 4107 cache->reserved -= num_bytes; 4108 cache->space_info->bytes_reserved -= num_bytes; 4109 cache->space_info->reservation_progress++; 4110 cache->space_info->bytes_used += num_bytes; 4111 cache->space_info->disk_used += num_bytes * factor; 4112 spin_unlock(&cache->lock); 4113 spin_unlock(&cache->space_info->lock); 4114 } else { 4115 old_val -= num_bytes; 4116 btrfs_set_block_group_used(&cache->item, old_val); 4117 cache->pinned += num_bytes; 4118 cache->space_info->bytes_pinned += num_bytes; 4119 cache->space_info->bytes_used -= num_bytes; 4120 cache->space_info->disk_used -= num_bytes * factor; 4121 spin_unlock(&cache->lock); 4122 spin_unlock(&cache->space_info->lock); 4123 4124 set_extent_dirty(info->pinned_extents, 4125 bytenr, bytenr + num_bytes - 1, 4126 GFP_NOFS | __GFP_NOFAIL); 4127 } 4128 btrfs_put_block_group(cache); 4129 total -= num_bytes; 4130 bytenr += num_bytes; 4131 } 4132 return 0; 4133 } 4134 4135 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 4136 { 4137 struct btrfs_block_group_cache *cache; 4138 u64 bytenr; 4139 4140 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 4141 if (!cache) 4142 return 0; 4143 4144 bytenr = cache->key.objectid; 4145 btrfs_put_block_group(cache); 4146 4147 return bytenr; 4148 } 4149 4150 static int pin_down_extent(struct btrfs_root *root, 4151 struct btrfs_block_group_cache *cache, 4152 u64 bytenr, u64 num_bytes, int reserved) 4153 { 4154 spin_lock(&cache->space_info->lock); 4155 spin_lock(&cache->lock); 4156 cache->pinned += num_bytes; 4157 cache->space_info->bytes_pinned += num_bytes; 4158 if (reserved) { 4159 cache->reserved -= num_bytes; 4160 cache->space_info->bytes_reserved -= num_bytes; 4161 cache->space_info->reservation_progress++; 4162 } 4163 spin_unlock(&cache->lock); 4164 spin_unlock(&cache->space_info->lock); 4165 4166 set_extent_dirty(root->fs_info->pinned_extents, bytenr, 4167 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); 4168 return 0; 4169 } 4170 4171 /* 4172 * this function must be called within transaction 4173 */ 4174 int btrfs_pin_extent(struct btrfs_root *root, 4175 u64 bytenr, u64 num_bytes, int reserved) 4176 { 4177 struct btrfs_block_group_cache *cache; 4178 4179 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 4180 BUG_ON(!cache); 4181 4182 pin_down_extent(root, cache, bytenr, num_bytes, reserved); 4183 4184 btrfs_put_block_group(cache); 4185 return 0; 4186 } 4187 4188 /* 4189 * update size of reserved extents. this function may return -EAGAIN 4190 * if 'reserve' is true or 'sinfo' is false. 4191 */ 4192 int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 4193 u64 num_bytes, int reserve, int sinfo) 4194 { 4195 int ret = 0; 4196 if (sinfo) { 4197 struct btrfs_space_info *space_info = cache->space_info; 4198 spin_lock(&space_info->lock); 4199 spin_lock(&cache->lock); 4200 if (reserve) { 4201 if (cache->ro) { 4202 ret = -EAGAIN; 4203 } else { 4204 cache->reserved += num_bytes; 4205 space_info->bytes_reserved += num_bytes; 4206 } 4207 } else { 4208 if (cache->ro) 4209 space_info->bytes_readonly += num_bytes; 4210 cache->reserved -= num_bytes; 4211 space_info->bytes_reserved -= num_bytes; 4212 space_info->reservation_progress++; 4213 } 4214 spin_unlock(&cache->lock); 4215 spin_unlock(&space_info->lock); 4216 } else { 4217 spin_lock(&cache->lock); 4218 if (cache->ro) { 4219 ret = -EAGAIN; 4220 } else { 4221 if (reserve) 4222 cache->reserved += num_bytes; 4223 else 4224 cache->reserved -= num_bytes; 4225 } 4226 spin_unlock(&cache->lock); 4227 } 4228 return ret; 4229 } 4230 4231 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 4232 struct btrfs_root *root) 4233 { 4234 struct btrfs_fs_info *fs_info = root->fs_info; 4235 struct btrfs_caching_control *next; 4236 struct btrfs_caching_control *caching_ctl; 4237 struct btrfs_block_group_cache *cache; 4238 4239 down_write(&fs_info->extent_commit_sem); 4240 4241 list_for_each_entry_safe(caching_ctl, next, 4242 &fs_info->caching_block_groups, list) { 4243 cache = caching_ctl->block_group; 4244 if (block_group_cache_done(cache)) { 4245 cache->last_byte_to_unpin = (u64)-1; 4246 list_del_init(&caching_ctl->list); 4247 put_caching_control(caching_ctl); 4248 } else { 4249 cache->last_byte_to_unpin = caching_ctl->progress; 4250 } 4251 } 4252 4253 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4254 fs_info->pinned_extents = &fs_info->freed_extents[1]; 4255 else 4256 fs_info->pinned_extents = &fs_info->freed_extents[0]; 4257 4258 up_write(&fs_info->extent_commit_sem); 4259 4260 update_global_block_rsv(fs_info); 4261 return 0; 4262 } 4263 4264 static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 4265 { 4266 struct btrfs_fs_info *fs_info = root->fs_info; 4267 struct btrfs_block_group_cache *cache = NULL; 4268 u64 len; 4269 4270 while (start <= end) { 4271 if (!cache || 4272 start >= cache->key.objectid + cache->key.offset) { 4273 if (cache) 4274 btrfs_put_block_group(cache); 4275 cache = btrfs_lookup_block_group(fs_info, start); 4276 BUG_ON(!cache); 4277 } 4278 4279 len = cache->key.objectid + cache->key.offset - start; 4280 len = min(len, end + 1 - start); 4281 4282 if (start < cache->last_byte_to_unpin) { 4283 len = min(len, cache->last_byte_to_unpin - start); 4284 btrfs_add_free_space(cache, start, len); 4285 } 4286 4287 start += len; 4288 4289 spin_lock(&cache->space_info->lock); 4290 spin_lock(&cache->lock); 4291 cache->pinned -= len; 4292 cache->space_info->bytes_pinned -= len; 4293 if (cache->ro) { 4294 cache->space_info->bytes_readonly += len; 4295 } else if (cache->reserved_pinned > 0) { 4296 len = min(len, cache->reserved_pinned); 4297 cache->reserved_pinned -= len; 4298 cache->space_info->bytes_reserved += len; 4299 } 4300 spin_unlock(&cache->lock); 4301 spin_unlock(&cache->space_info->lock); 4302 } 4303 4304 if (cache) 4305 btrfs_put_block_group(cache); 4306 return 0; 4307 } 4308 4309 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 4310 struct btrfs_root *root) 4311 { 4312 struct btrfs_fs_info *fs_info = root->fs_info; 4313 struct extent_io_tree *unpin; 4314 struct btrfs_block_rsv *block_rsv; 4315 struct btrfs_block_rsv *next_rsv; 4316 u64 start; 4317 u64 end; 4318 int idx; 4319 int ret; 4320 4321 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4322 unpin = &fs_info->freed_extents[1]; 4323 else 4324 unpin = &fs_info->freed_extents[0]; 4325 4326 while (1) { 4327 ret = find_first_extent_bit(unpin, 0, &start, &end, 4328 EXTENT_DIRTY); 4329 if (ret) 4330 break; 4331 4332 if (btrfs_test_opt(root, DISCARD)) 4333 ret = btrfs_discard_extent(root, start, 4334 end + 1 - start, NULL); 4335 4336 clear_extent_dirty(unpin, start, end, GFP_NOFS); 4337 unpin_extent_range(root, start, end); 4338 cond_resched(); 4339 } 4340 4341 mutex_lock(&fs_info->durable_block_rsv_mutex); 4342 list_for_each_entry_safe(block_rsv, next_rsv, 4343 &fs_info->durable_block_rsv_list, list) { 4344 4345 idx = trans->transid & 0x1; 4346 if (block_rsv->freed[idx] > 0) { 4347 block_rsv_add_bytes(block_rsv, 4348 block_rsv->freed[idx], 0); 4349 block_rsv->freed[idx] = 0; 4350 } 4351 if (atomic_read(&block_rsv->usage) == 0) { 4352 btrfs_block_rsv_release(root, block_rsv, (u64)-1); 4353 4354 if (block_rsv->freed[0] == 0 && 4355 block_rsv->freed[1] == 0) { 4356 list_del_init(&block_rsv->list); 4357 kfree(block_rsv); 4358 } 4359 } else { 4360 btrfs_block_rsv_release(root, block_rsv, 0); 4361 } 4362 } 4363 mutex_unlock(&fs_info->durable_block_rsv_mutex); 4364 4365 return 0; 4366 } 4367 4368 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 4369 struct btrfs_root *root, 4370 u64 bytenr, u64 num_bytes, u64 parent, 4371 u64 root_objectid, u64 owner_objectid, 4372 u64 owner_offset, int refs_to_drop, 4373 struct btrfs_delayed_extent_op *extent_op) 4374 { 4375 struct btrfs_key key; 4376 struct btrfs_path *path; 4377 struct btrfs_fs_info *info = root->fs_info; 4378 struct btrfs_root *extent_root = info->extent_root; 4379 struct extent_buffer *leaf; 4380 struct btrfs_extent_item *ei; 4381 struct btrfs_extent_inline_ref *iref; 4382 int ret; 4383 int is_data; 4384 int extent_slot = 0; 4385 int found_extent = 0; 4386 int num_to_del = 1; 4387 u32 item_size; 4388 u64 refs; 4389 4390 path = btrfs_alloc_path(); 4391 if (!path) 4392 return -ENOMEM; 4393 4394 path->reada = 1; 4395 path->leave_spinning = 1; 4396 4397 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 4398 BUG_ON(!is_data && refs_to_drop != 1); 4399 4400 ret = lookup_extent_backref(trans, extent_root, path, &iref, 4401 bytenr, num_bytes, parent, 4402 root_objectid, owner_objectid, 4403 owner_offset); 4404 if (ret == 0) { 4405 extent_slot = path->slots[0]; 4406 while (extent_slot >= 0) { 4407 btrfs_item_key_to_cpu(path->nodes[0], &key, 4408 extent_slot); 4409 if (key.objectid != bytenr) 4410 break; 4411 if (key.type == BTRFS_EXTENT_ITEM_KEY && 4412 key.offset == num_bytes) { 4413 found_extent = 1; 4414 break; 4415 } 4416 if (path->slots[0] - extent_slot > 5) 4417 break; 4418 extent_slot--; 4419 } 4420 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 4421 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 4422 if (found_extent && item_size < sizeof(*ei)) 4423 found_extent = 0; 4424 #endif 4425 if (!found_extent) { 4426 BUG_ON(iref); 4427 ret = remove_extent_backref(trans, extent_root, path, 4428 NULL, refs_to_drop, 4429 is_data); 4430 BUG_ON(ret); 4431 btrfs_release_path(path); 4432 path->leave_spinning = 1; 4433 4434 key.objectid = bytenr; 4435 key.type = BTRFS_EXTENT_ITEM_KEY; 4436 key.offset = num_bytes; 4437 4438 ret = btrfs_search_slot(trans, extent_root, 4439 &key, path, -1, 1); 4440 if (ret) { 4441 printk(KERN_ERR "umm, got %d back from search" 4442 ", was looking for %llu\n", ret, 4443 (unsigned long long)bytenr); 4444 btrfs_print_leaf(extent_root, path->nodes[0]); 4445 } 4446 BUG_ON(ret); 4447 extent_slot = path->slots[0]; 4448 } 4449 } else { 4450 btrfs_print_leaf(extent_root, path->nodes[0]); 4451 WARN_ON(1); 4452 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 4453 "parent %llu root %llu owner %llu offset %llu\n", 4454 (unsigned long long)bytenr, 4455 (unsigned long long)parent, 4456 (unsigned long long)root_objectid, 4457 (unsigned long long)owner_objectid, 4458 (unsigned long long)owner_offset); 4459 } 4460 4461 leaf = path->nodes[0]; 4462 item_size = btrfs_item_size_nr(leaf, extent_slot); 4463 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 4464 if (item_size < sizeof(*ei)) { 4465 BUG_ON(found_extent || extent_slot != path->slots[0]); 4466 ret = convert_extent_item_v0(trans, extent_root, path, 4467 owner_objectid, 0); 4468 BUG_ON(ret < 0); 4469 4470 btrfs_release_path(path); 4471 path->leave_spinning = 1; 4472 4473 key.objectid = bytenr; 4474 key.type = BTRFS_EXTENT_ITEM_KEY; 4475 key.offset = num_bytes; 4476 4477 ret = btrfs_search_slot(trans, extent_root, &key, path, 4478 -1, 1); 4479 if (ret) { 4480 printk(KERN_ERR "umm, got %d back from search" 4481 ", was looking for %llu\n", ret, 4482 (unsigned long long)bytenr); 4483 btrfs_print_leaf(extent_root, path->nodes[0]); 4484 } 4485 BUG_ON(ret); 4486 extent_slot = path->slots[0]; 4487 leaf = path->nodes[0]; 4488 item_size = btrfs_item_size_nr(leaf, extent_slot); 4489 } 4490 #endif 4491 BUG_ON(item_size < sizeof(*ei)); 4492 ei = btrfs_item_ptr(leaf, extent_slot, 4493 struct btrfs_extent_item); 4494 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 4495 struct btrfs_tree_block_info *bi; 4496 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 4497 bi = (struct btrfs_tree_block_info *)(ei + 1); 4498 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 4499 } 4500 4501 refs = btrfs_extent_refs(leaf, ei); 4502 BUG_ON(refs < refs_to_drop); 4503 refs -= refs_to_drop; 4504 4505 if (refs > 0) { 4506 if (extent_op) 4507 __run_delayed_extent_op(extent_op, leaf, ei); 4508 /* 4509 * In the case of inline back ref, reference count will 4510 * be updated by remove_extent_backref 4511 */ 4512 if (iref) { 4513 BUG_ON(!found_extent); 4514 } else { 4515 btrfs_set_extent_refs(leaf, ei, refs); 4516 btrfs_mark_buffer_dirty(leaf); 4517 } 4518 if (found_extent) { 4519 ret = remove_extent_backref(trans, extent_root, path, 4520 iref, refs_to_drop, 4521 is_data); 4522 BUG_ON(ret); 4523 } 4524 } else { 4525 if (found_extent) { 4526 BUG_ON(is_data && refs_to_drop != 4527 extent_data_ref_count(root, path, iref)); 4528 if (iref) { 4529 BUG_ON(path->slots[0] != extent_slot); 4530 } else { 4531 BUG_ON(path->slots[0] != extent_slot + 1); 4532 path->slots[0] = extent_slot; 4533 num_to_del = 2; 4534 } 4535 } 4536 4537 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4538 num_to_del); 4539 BUG_ON(ret); 4540 btrfs_release_path(path); 4541 4542 if (is_data) { 4543 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4544 BUG_ON(ret); 4545 } else { 4546 invalidate_mapping_pages(info->btree_inode->i_mapping, 4547 bytenr >> PAGE_CACHE_SHIFT, 4548 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 4549 } 4550 4551 ret = update_block_group(trans, root, bytenr, num_bytes, 0); 4552 BUG_ON(ret); 4553 } 4554 btrfs_free_path(path); 4555 return ret; 4556 } 4557 4558 /* 4559 * when we free an block, it is possible (and likely) that we free the last 4560 * delayed ref for that extent as well. This searches the delayed ref tree for 4561 * a given extent, and if there are no other delayed refs to be processed, it 4562 * removes it from the tree. 4563 */ 4564 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 4565 struct btrfs_root *root, u64 bytenr) 4566 { 4567 struct btrfs_delayed_ref_head *head; 4568 struct btrfs_delayed_ref_root *delayed_refs; 4569 struct btrfs_delayed_ref_node *ref; 4570 struct rb_node *node; 4571 int ret = 0; 4572 4573 delayed_refs = &trans->transaction->delayed_refs; 4574 spin_lock(&delayed_refs->lock); 4575 head = btrfs_find_delayed_ref_head(trans, bytenr); 4576 if (!head) 4577 goto out; 4578 4579 node = rb_prev(&head->node.rb_node); 4580 if (!node) 4581 goto out; 4582 4583 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 4584 4585 /* there are still entries for this ref, we can't drop it */ 4586 if (ref->bytenr == bytenr) 4587 goto out; 4588 4589 if (head->extent_op) { 4590 if (!head->must_insert_reserved) 4591 goto out; 4592 kfree(head->extent_op); 4593 head->extent_op = NULL; 4594 } 4595 4596 /* 4597 * waiting for the lock here would deadlock. If someone else has it 4598 * locked they are already in the process of dropping it anyway 4599 */ 4600 if (!mutex_trylock(&head->mutex)) 4601 goto out; 4602 4603 /* 4604 * at this point we have a head with no other entries. Go 4605 * ahead and process it. 4606 */ 4607 head->node.in_tree = 0; 4608 rb_erase(&head->node.rb_node, &delayed_refs->root); 4609 4610 delayed_refs->num_entries--; 4611 4612 /* 4613 * we don't take a ref on the node because we're removing it from the 4614 * tree, so we just steal the ref the tree was holding. 4615 */ 4616 delayed_refs->num_heads--; 4617 if (list_empty(&head->cluster)) 4618 delayed_refs->num_heads_ready--; 4619 4620 list_del_init(&head->cluster); 4621 spin_unlock(&delayed_refs->lock); 4622 4623 BUG_ON(head->extent_op); 4624 if (head->must_insert_reserved) 4625 ret = 1; 4626 4627 mutex_unlock(&head->mutex); 4628 btrfs_put_delayed_ref(&head->node); 4629 return ret; 4630 out: 4631 spin_unlock(&delayed_refs->lock); 4632 return 0; 4633 } 4634 4635 void btrfs_free_tree_block(struct btrfs_trans_handle *trans, 4636 struct btrfs_root *root, 4637 struct extent_buffer *buf, 4638 u64 parent, int last_ref) 4639 { 4640 struct btrfs_block_rsv *block_rsv; 4641 struct btrfs_block_group_cache *cache = NULL; 4642 int ret; 4643 4644 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4645 ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, 4646 parent, root->root_key.objectid, 4647 btrfs_header_level(buf), 4648 BTRFS_DROP_DELAYED_REF, NULL); 4649 BUG_ON(ret); 4650 } 4651 4652 if (!last_ref) 4653 return; 4654 4655 block_rsv = get_block_rsv(trans, root); 4656 cache = btrfs_lookup_block_group(root->fs_info, buf->start); 4657 if (block_rsv->space_info != cache->space_info) 4658 goto out; 4659 4660 if (btrfs_header_generation(buf) == trans->transid) { 4661 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4662 ret = check_ref_cleanup(trans, root, buf->start); 4663 if (!ret) 4664 goto pin; 4665 } 4666 4667 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 4668 pin_down_extent(root, cache, buf->start, buf->len, 1); 4669 goto pin; 4670 } 4671 4672 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 4673 4674 btrfs_add_free_space(cache, buf->start, buf->len); 4675 ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); 4676 if (ret == -EAGAIN) { 4677 /* block group became read-only */ 4678 btrfs_update_reserved_bytes(cache, buf->len, 0, 1); 4679 goto out; 4680 } 4681 4682 ret = 1; 4683 spin_lock(&block_rsv->lock); 4684 if (block_rsv->reserved < block_rsv->size) { 4685 block_rsv->reserved += buf->len; 4686 ret = 0; 4687 } 4688 spin_unlock(&block_rsv->lock); 4689 4690 if (ret) { 4691 spin_lock(&cache->space_info->lock); 4692 cache->space_info->bytes_reserved -= buf->len; 4693 cache->space_info->reservation_progress++; 4694 spin_unlock(&cache->space_info->lock); 4695 } 4696 goto out; 4697 } 4698 pin: 4699 if (block_rsv->durable && !cache->ro) { 4700 ret = 0; 4701 spin_lock(&cache->lock); 4702 if (!cache->ro) { 4703 cache->reserved_pinned += buf->len; 4704 ret = 1; 4705 } 4706 spin_unlock(&cache->lock); 4707 4708 if (ret) { 4709 spin_lock(&block_rsv->lock); 4710 block_rsv->freed[trans->transid & 0x1] += buf->len; 4711 spin_unlock(&block_rsv->lock); 4712 } 4713 } 4714 out: 4715 /* 4716 * Deleting the buffer, clear the corrupt flag since it doesn't matter 4717 * anymore. 4718 */ 4719 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); 4720 btrfs_put_block_group(cache); 4721 } 4722 4723 int btrfs_free_extent(struct btrfs_trans_handle *trans, 4724 struct btrfs_root *root, 4725 u64 bytenr, u64 num_bytes, u64 parent, 4726 u64 root_objectid, u64 owner, u64 offset) 4727 { 4728 int ret; 4729 4730 /* 4731 * tree log blocks never actually go into the extent allocation 4732 * tree, just update pinning info and exit early. 4733 */ 4734 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 4735 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 4736 /* unlocks the pinned mutex */ 4737 btrfs_pin_extent(root, bytenr, num_bytes, 1); 4738 ret = 0; 4739 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 4740 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 4741 parent, root_objectid, (int)owner, 4742 BTRFS_DROP_DELAYED_REF, NULL); 4743 BUG_ON(ret); 4744 } else { 4745 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 4746 parent, root_objectid, owner, 4747 offset, BTRFS_DROP_DELAYED_REF, NULL); 4748 BUG_ON(ret); 4749 } 4750 return ret; 4751 } 4752 4753 static u64 stripe_align(struct btrfs_root *root, u64 val) 4754 { 4755 u64 mask = ((u64)root->stripesize - 1); 4756 u64 ret = (val + mask) & ~mask; 4757 return ret; 4758 } 4759 4760 /* 4761 * when we wait for progress in the block group caching, its because 4762 * our allocation attempt failed at least once. So, we must sleep 4763 * and let some progress happen before we try again. 4764 * 4765 * This function will sleep at least once waiting for new free space to 4766 * show up, and then it will check the block group free space numbers 4767 * for our min num_bytes. Another option is to have it go ahead 4768 * and look in the rbtree for a free extent of a given size, but this 4769 * is a good start. 4770 */ 4771 static noinline int 4772 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 4773 u64 num_bytes) 4774 { 4775 struct btrfs_caching_control *caching_ctl; 4776 DEFINE_WAIT(wait); 4777 4778 caching_ctl = get_caching_control(cache); 4779 if (!caching_ctl) 4780 return 0; 4781 4782 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4783 (cache->free_space_ctl->free_space >= num_bytes)); 4784 4785 put_caching_control(caching_ctl); 4786 return 0; 4787 } 4788 4789 static noinline int 4790 wait_block_group_cache_done(struct btrfs_block_group_cache *cache) 4791 { 4792 struct btrfs_caching_control *caching_ctl; 4793 DEFINE_WAIT(wait); 4794 4795 caching_ctl = get_caching_control(cache); 4796 if (!caching_ctl) 4797 return 0; 4798 4799 wait_event(caching_ctl->wait, block_group_cache_done(cache)); 4800 4801 put_caching_control(caching_ctl); 4802 return 0; 4803 } 4804 4805 static int get_block_group_index(struct btrfs_block_group_cache *cache) 4806 { 4807 int index; 4808 if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) 4809 index = 0; 4810 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) 4811 index = 1; 4812 else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) 4813 index = 2; 4814 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) 4815 index = 3; 4816 else 4817 index = 4; 4818 return index; 4819 } 4820 4821 enum btrfs_loop_type { 4822 LOOP_FIND_IDEAL = 0, 4823 LOOP_CACHING_NOWAIT = 1, 4824 LOOP_CACHING_WAIT = 2, 4825 LOOP_ALLOC_CHUNK = 3, 4826 LOOP_NO_EMPTY_SIZE = 4, 4827 }; 4828 4829 /* 4830 * walks the btree of allocated extents and find a hole of a given size. 4831 * The key ins is changed to record the hole: 4832 * ins->objectid == block start 4833 * ins->flags = BTRFS_EXTENT_ITEM_KEY 4834 * ins->offset == number of blocks 4835 * Any available blocks before search_start are skipped. 4836 */ 4837 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 4838 struct btrfs_root *orig_root, 4839 u64 num_bytes, u64 empty_size, 4840 u64 search_start, u64 search_end, 4841 u64 hint_byte, struct btrfs_key *ins, 4842 int data) 4843 { 4844 int ret = 0; 4845 struct btrfs_root *root = orig_root->fs_info->extent_root; 4846 struct btrfs_free_cluster *last_ptr = NULL; 4847 struct btrfs_block_group_cache *block_group = NULL; 4848 int empty_cluster = 2 * 1024 * 1024; 4849 int allowed_chunk_alloc = 0; 4850 int done_chunk_alloc = 0; 4851 struct btrfs_space_info *space_info; 4852 int last_ptr_loop = 0; 4853 int loop = 0; 4854 int index = 0; 4855 bool found_uncached_bg = false; 4856 bool failed_cluster_refill = false; 4857 bool failed_alloc = false; 4858 bool use_cluster = true; 4859 u64 ideal_cache_percent = 0; 4860 u64 ideal_cache_offset = 0; 4861 4862 WARN_ON(num_bytes < root->sectorsize); 4863 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4864 ins->objectid = 0; 4865 ins->offset = 0; 4866 4867 space_info = __find_space_info(root->fs_info, data); 4868 if (!space_info) { 4869 printk(KERN_ERR "No space info for %d\n", data); 4870 return -ENOSPC; 4871 } 4872 4873 /* 4874 * If the space info is for both data and metadata it means we have a 4875 * small filesystem and we can't use the clustering stuff. 4876 */ 4877 if (btrfs_mixed_space_info(space_info)) 4878 use_cluster = false; 4879 4880 if (orig_root->ref_cows || empty_size) 4881 allowed_chunk_alloc = 1; 4882 4883 if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { 4884 last_ptr = &root->fs_info->meta_alloc_cluster; 4885 if (!btrfs_test_opt(root, SSD)) 4886 empty_cluster = 64 * 1024; 4887 } 4888 4889 if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && 4890 btrfs_test_opt(root, SSD)) { 4891 last_ptr = &root->fs_info->data_alloc_cluster; 4892 } 4893 4894 if (last_ptr) { 4895 spin_lock(&last_ptr->lock); 4896 if (last_ptr->block_group) 4897 hint_byte = last_ptr->window_start; 4898 spin_unlock(&last_ptr->lock); 4899 } 4900 4901 search_start = max(search_start, first_logical_byte(root, 0)); 4902 search_start = max(search_start, hint_byte); 4903 4904 if (!last_ptr) 4905 empty_cluster = 0; 4906 4907 if (search_start == hint_byte) { 4908 ideal_cache: 4909 block_group = btrfs_lookup_block_group(root->fs_info, 4910 search_start); 4911 /* 4912 * we don't want to use the block group if it doesn't match our 4913 * allocation bits, or if its not cached. 4914 * 4915 * However if we are re-searching with an ideal block group 4916 * picked out then we don't care that the block group is cached. 4917 */ 4918 if (block_group && block_group_bits(block_group, data) && 4919 (block_group->cached != BTRFS_CACHE_NO || 4920 search_start == ideal_cache_offset)) { 4921 down_read(&space_info->groups_sem); 4922 if (list_empty(&block_group->list) || 4923 block_group->ro) { 4924 /* 4925 * someone is removing this block group, 4926 * we can't jump into the have_block_group 4927 * target because our list pointers are not 4928 * valid 4929 */ 4930 btrfs_put_block_group(block_group); 4931 up_read(&space_info->groups_sem); 4932 } else { 4933 index = get_block_group_index(block_group); 4934 goto have_block_group; 4935 } 4936 } else if (block_group) { 4937 btrfs_put_block_group(block_group); 4938 } 4939 } 4940 search: 4941 down_read(&space_info->groups_sem); 4942 list_for_each_entry(block_group, &space_info->block_groups[index], 4943 list) { 4944 u64 offset; 4945 int cached; 4946 4947 btrfs_get_block_group(block_group); 4948 search_start = block_group->key.objectid; 4949 4950 /* 4951 * this can happen if we end up cycling through all the 4952 * raid types, but we want to make sure we only allocate 4953 * for the proper type. 4954 */ 4955 if (!block_group_bits(block_group, data)) { 4956 u64 extra = BTRFS_BLOCK_GROUP_DUP | 4957 BTRFS_BLOCK_GROUP_RAID1 | 4958 BTRFS_BLOCK_GROUP_RAID10; 4959 4960 /* 4961 * if they asked for extra copies and this block group 4962 * doesn't provide them, bail. This does allow us to 4963 * fill raid0 from raid1. 4964 */ 4965 if ((data & extra) && !(block_group->flags & extra)) 4966 goto loop; 4967 } 4968 4969 have_block_group: 4970 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4971 u64 free_percent; 4972 4973 ret = cache_block_group(block_group, trans, 4974 orig_root, 1); 4975 if (block_group->cached == BTRFS_CACHE_FINISHED) 4976 goto have_block_group; 4977 4978 free_percent = btrfs_block_group_used(&block_group->item); 4979 free_percent *= 100; 4980 free_percent = div64_u64(free_percent, 4981 block_group->key.offset); 4982 free_percent = 100 - free_percent; 4983 if (free_percent > ideal_cache_percent && 4984 likely(!block_group->ro)) { 4985 ideal_cache_offset = block_group->key.objectid; 4986 ideal_cache_percent = free_percent; 4987 } 4988 4989 /* 4990 * We only want to start kthread caching if we are at 4991 * the point where we will wait for caching to make 4992 * progress, or if our ideal search is over and we've 4993 * found somebody to start caching. 4994 */ 4995 if (loop > LOOP_CACHING_NOWAIT || 4996 (loop > LOOP_FIND_IDEAL && 4997 atomic_read(&space_info->caching_threads) < 2)) { 4998 ret = cache_block_group(block_group, trans, 4999 orig_root, 0); 5000 BUG_ON(ret); 5001 } 5002 found_uncached_bg = true; 5003 5004 /* 5005 * If loop is set for cached only, try the next block 5006 * group. 5007 */ 5008 if (loop == LOOP_FIND_IDEAL) 5009 goto loop; 5010 } 5011 5012 cached = block_group_cache_done(block_group); 5013 if (unlikely(!cached)) 5014 found_uncached_bg = true; 5015 5016 if (unlikely(block_group->ro)) 5017 goto loop; 5018 5019 spin_lock(&block_group->free_space_ctl->tree_lock); 5020 if (cached && 5021 block_group->free_space_ctl->free_space < 5022 num_bytes + empty_size) { 5023 spin_unlock(&block_group->free_space_ctl->tree_lock); 5024 goto loop; 5025 } 5026 spin_unlock(&block_group->free_space_ctl->tree_lock); 5027 5028 /* 5029 * Ok we want to try and use the cluster allocator, so lets look 5030 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 5031 * have tried the cluster allocator plenty of times at this 5032 * point and not have found anything, so we are likely way too 5033 * fragmented for the clustering stuff to find anything, so lets 5034 * just skip it and let the allocator find whatever block it can 5035 * find 5036 */ 5037 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { 5038 /* 5039 * the refill lock keeps out other 5040 * people trying to start a new cluster 5041 */ 5042 spin_lock(&last_ptr->refill_lock); 5043 if (last_ptr->block_group && 5044 (last_ptr->block_group->ro || 5045 !block_group_bits(last_ptr->block_group, data))) { 5046 offset = 0; 5047 goto refill_cluster; 5048 } 5049 5050 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 5051 num_bytes, search_start); 5052 if (offset) { 5053 /* we have a block, we're done */ 5054 spin_unlock(&last_ptr->refill_lock); 5055 goto checks; 5056 } 5057 5058 spin_lock(&last_ptr->lock); 5059 /* 5060 * whoops, this cluster doesn't actually point to 5061 * this block group. Get a ref on the block 5062 * group is does point to and try again 5063 */ 5064 if (!last_ptr_loop && last_ptr->block_group && 5065 last_ptr->block_group != block_group) { 5066 5067 btrfs_put_block_group(block_group); 5068 block_group = last_ptr->block_group; 5069 btrfs_get_block_group(block_group); 5070 spin_unlock(&last_ptr->lock); 5071 spin_unlock(&last_ptr->refill_lock); 5072 5073 last_ptr_loop = 1; 5074 search_start = block_group->key.objectid; 5075 /* 5076 * we know this block group is properly 5077 * in the list because 5078 * btrfs_remove_block_group, drops the 5079 * cluster before it removes the block 5080 * group from the list 5081 */ 5082 goto have_block_group; 5083 } 5084 spin_unlock(&last_ptr->lock); 5085 refill_cluster: 5086 /* 5087 * this cluster didn't work out, free it and 5088 * start over 5089 */ 5090 btrfs_return_cluster_to_free_space(NULL, last_ptr); 5091 5092 last_ptr_loop = 0; 5093 5094 /* allocate a cluster in this block group */ 5095 ret = btrfs_find_space_cluster(trans, root, 5096 block_group, last_ptr, 5097 offset, num_bytes, 5098 empty_cluster + empty_size); 5099 if (ret == 0) { 5100 /* 5101 * now pull our allocation out of this 5102 * cluster 5103 */ 5104 offset = btrfs_alloc_from_cluster(block_group, 5105 last_ptr, num_bytes, 5106 search_start); 5107 if (offset) { 5108 /* we found one, proceed */ 5109 spin_unlock(&last_ptr->refill_lock); 5110 goto checks; 5111 } 5112 } else if (!cached && loop > LOOP_CACHING_NOWAIT 5113 && !failed_cluster_refill) { 5114 spin_unlock(&last_ptr->refill_lock); 5115 5116 failed_cluster_refill = true; 5117 wait_block_group_cache_progress(block_group, 5118 num_bytes + empty_cluster + empty_size); 5119 goto have_block_group; 5120 } 5121 5122 /* 5123 * at this point we either didn't find a cluster 5124 * or we weren't able to allocate a block from our 5125 * cluster. Free the cluster we've been trying 5126 * to use, and go to the next block group 5127 */ 5128 btrfs_return_cluster_to_free_space(NULL, last_ptr); 5129 spin_unlock(&last_ptr->refill_lock); 5130 goto loop; 5131 } 5132 5133 offset = btrfs_find_space_for_alloc(block_group, search_start, 5134 num_bytes, empty_size); 5135 /* 5136 * If we didn't find a chunk, and we haven't failed on this 5137 * block group before, and this block group is in the middle of 5138 * caching and we are ok with waiting, then go ahead and wait 5139 * for progress to be made, and set failed_alloc to true. 5140 * 5141 * If failed_alloc is true then we've already waited on this 5142 * block group once and should move on to the next block group. 5143 */ 5144 if (!offset && !failed_alloc && !cached && 5145 loop > LOOP_CACHING_NOWAIT) { 5146 wait_block_group_cache_progress(block_group, 5147 num_bytes + empty_size); 5148 failed_alloc = true; 5149 goto have_block_group; 5150 } else if (!offset) { 5151 goto loop; 5152 } 5153 checks: 5154 search_start = stripe_align(root, offset); 5155 /* move on to the next group */ 5156 if (search_start + num_bytes >= search_end) { 5157 btrfs_add_free_space(block_group, offset, num_bytes); 5158 goto loop; 5159 } 5160 5161 /* move on to the next group */ 5162 if (search_start + num_bytes > 5163 block_group->key.objectid + block_group->key.offset) { 5164 btrfs_add_free_space(block_group, offset, num_bytes); 5165 goto loop; 5166 } 5167 5168 ins->objectid = search_start; 5169 ins->offset = num_bytes; 5170 5171 if (offset < search_start) 5172 btrfs_add_free_space(block_group, offset, 5173 search_start - offset); 5174 BUG_ON(offset > search_start); 5175 5176 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, 5177 (data & BTRFS_BLOCK_GROUP_DATA)); 5178 if (ret == -EAGAIN) { 5179 btrfs_add_free_space(block_group, offset, num_bytes); 5180 goto loop; 5181 } 5182 5183 /* we are all good, lets return */ 5184 ins->objectid = search_start; 5185 ins->offset = num_bytes; 5186 5187 if (offset < search_start) 5188 btrfs_add_free_space(block_group, offset, 5189 search_start - offset); 5190 BUG_ON(offset > search_start); 5191 btrfs_put_block_group(block_group); 5192 break; 5193 loop: 5194 failed_cluster_refill = false; 5195 failed_alloc = false; 5196 BUG_ON(index != get_block_group_index(block_group)); 5197 btrfs_put_block_group(block_group); 5198 } 5199 up_read(&space_info->groups_sem); 5200 5201 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) 5202 goto search; 5203 5204 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for 5205 * for them to make caching progress. Also 5206 * determine the best possible bg to cache 5207 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking 5208 * caching kthreads as we move along 5209 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 5210 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 5211 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 5212 * again 5213 */ 5214 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 5215 (found_uncached_bg || empty_size || empty_cluster || 5216 allowed_chunk_alloc)) { 5217 index = 0; 5218 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5219 found_uncached_bg = false; 5220 loop++; 5221 if (!ideal_cache_percent && 5222 atomic_read(&space_info->caching_threads)) 5223 goto search; 5224 5225 /* 5226 * 1 of the following 2 things have happened so far 5227 * 5228 * 1) We found an ideal block group for caching that 5229 * is mostly full and will cache quickly, so we might 5230 * as well wait for it. 5231 * 5232 * 2) We searched for cached only and we didn't find 5233 * anything, and we didn't start any caching kthreads 5234 * either, so chances are we will loop through and 5235 * start a couple caching kthreads, and then come back 5236 * around and just wait for them. This will be slower 5237 * because we will have 2 caching kthreads reading at 5238 * the same time when we could have just started one 5239 * and waited for it to get far enough to give us an 5240 * allocation, so go ahead and go to the wait caching 5241 * loop. 5242 */ 5243 loop = LOOP_CACHING_WAIT; 5244 search_start = ideal_cache_offset; 5245 ideal_cache_percent = 0; 5246 goto ideal_cache; 5247 } else if (loop == LOOP_FIND_IDEAL) { 5248 /* 5249 * Didn't find a uncached bg, wait on anything we find 5250 * next. 5251 */ 5252 loop = LOOP_CACHING_WAIT; 5253 goto search; 5254 } 5255 5256 if (loop < LOOP_CACHING_WAIT) { 5257 loop++; 5258 goto search; 5259 } 5260 5261 if (loop == LOOP_ALLOC_CHUNK) { 5262 empty_size = 0; 5263 empty_cluster = 0; 5264 } 5265 5266 if (allowed_chunk_alloc) { 5267 ret = do_chunk_alloc(trans, root, num_bytes + 5268 2 * 1024 * 1024, data, 5269 CHUNK_ALLOC_LIMITED); 5270 allowed_chunk_alloc = 0; 5271 done_chunk_alloc = 1; 5272 } else if (!done_chunk_alloc && 5273 space_info->force_alloc == CHUNK_ALLOC_NO_FORCE) { 5274 space_info->force_alloc = CHUNK_ALLOC_LIMITED; 5275 } 5276 5277 if (loop < LOOP_NO_EMPTY_SIZE) { 5278 loop++; 5279 goto search; 5280 } 5281 ret = -ENOSPC; 5282 } else if (!ins->objectid) { 5283 ret = -ENOSPC; 5284 } else if (ins->objectid) { 5285 ret = 0; 5286 } 5287 5288 return ret; 5289 } 5290 5291 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 5292 int dump_block_groups) 5293 { 5294 struct btrfs_block_group_cache *cache; 5295 int index = 0; 5296 5297 spin_lock(&info->lock); 5298 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 5299 (unsigned long long)(info->total_bytes - info->bytes_used - 5300 info->bytes_pinned - info->bytes_reserved - 5301 info->bytes_readonly), 5302 (info->full) ? "" : "not "); 5303 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " 5304 "reserved=%llu, may_use=%llu, readonly=%llu\n", 5305 (unsigned long long)info->total_bytes, 5306 (unsigned long long)info->bytes_used, 5307 (unsigned long long)info->bytes_pinned, 5308 (unsigned long long)info->bytes_reserved, 5309 (unsigned long long)info->bytes_may_use, 5310 (unsigned long long)info->bytes_readonly); 5311 spin_unlock(&info->lock); 5312 5313 if (!dump_block_groups) 5314 return; 5315 5316 down_read(&info->groups_sem); 5317 again: 5318 list_for_each_entry(cache, &info->block_groups[index], list) { 5319 spin_lock(&cache->lock); 5320 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 5321 "%llu pinned %llu reserved\n", 5322 (unsigned long long)cache->key.objectid, 5323 (unsigned long long)cache->key.offset, 5324 (unsigned long long)btrfs_block_group_used(&cache->item), 5325 (unsigned long long)cache->pinned, 5326 (unsigned long long)cache->reserved); 5327 btrfs_dump_free_space(cache, bytes); 5328 spin_unlock(&cache->lock); 5329 } 5330 if (++index < BTRFS_NR_RAID_TYPES) 5331 goto again; 5332 up_read(&info->groups_sem); 5333 } 5334 5335 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 5336 struct btrfs_root *root, 5337 u64 num_bytes, u64 min_alloc_size, 5338 u64 empty_size, u64 hint_byte, 5339 u64 search_end, struct btrfs_key *ins, 5340 u64 data) 5341 { 5342 int ret; 5343 u64 search_start = 0; 5344 5345 data = btrfs_get_alloc_profile(root, data); 5346 again: 5347 /* 5348 * the only place that sets empty_size is btrfs_realloc_node, which 5349 * is not called recursively on allocations 5350 */ 5351 if (empty_size || root->ref_cows) 5352 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5353 num_bytes + 2 * 1024 * 1024, data, 5354 CHUNK_ALLOC_NO_FORCE); 5355 5356 WARN_ON(num_bytes < root->sectorsize); 5357 ret = find_free_extent(trans, root, num_bytes, empty_size, 5358 search_start, search_end, hint_byte, 5359 ins, data); 5360 5361 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 5362 num_bytes = num_bytes >> 1; 5363 num_bytes = num_bytes & ~(root->sectorsize - 1); 5364 num_bytes = max(num_bytes, min_alloc_size); 5365 do_chunk_alloc(trans, root->fs_info->extent_root, 5366 num_bytes, data, CHUNK_ALLOC_FORCE); 5367 goto again; 5368 } 5369 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5370 struct btrfs_space_info *sinfo; 5371 5372 sinfo = __find_space_info(root->fs_info, data); 5373 printk(KERN_ERR "btrfs allocation failed flags %llu, " 5374 "wanted %llu\n", (unsigned long long)data, 5375 (unsigned long long)num_bytes); 5376 dump_space_info(sinfo, num_bytes, 1); 5377 } 5378 5379 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); 5380 5381 return ret; 5382 } 5383 5384 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 5385 { 5386 struct btrfs_block_group_cache *cache; 5387 int ret = 0; 5388 5389 cache = btrfs_lookup_block_group(root->fs_info, start); 5390 if (!cache) { 5391 printk(KERN_ERR "Unable to find block group for %llu\n", 5392 (unsigned long long)start); 5393 return -ENOSPC; 5394 } 5395 5396 if (btrfs_test_opt(root, DISCARD)) 5397 ret = btrfs_discard_extent(root, start, len, NULL); 5398 5399 btrfs_add_free_space(cache, start, len); 5400 btrfs_update_reserved_bytes(cache, len, 0, 1); 5401 btrfs_put_block_group(cache); 5402 5403 trace_btrfs_reserved_extent_free(root, start, len); 5404 5405 return ret; 5406 } 5407 5408 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 5409 struct btrfs_root *root, 5410 u64 parent, u64 root_objectid, 5411 u64 flags, u64 owner, u64 offset, 5412 struct btrfs_key *ins, int ref_mod) 5413 { 5414 int ret; 5415 struct btrfs_fs_info *fs_info = root->fs_info; 5416 struct btrfs_extent_item *extent_item; 5417 struct btrfs_extent_inline_ref *iref; 5418 struct btrfs_path *path; 5419 struct extent_buffer *leaf; 5420 int type; 5421 u32 size; 5422 5423 if (parent > 0) 5424 type = BTRFS_SHARED_DATA_REF_KEY; 5425 else 5426 type = BTRFS_EXTENT_DATA_REF_KEY; 5427 5428 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 5429 5430 path = btrfs_alloc_path(); 5431 if (!path) 5432 return -ENOMEM; 5433 5434 path->leave_spinning = 1; 5435 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5436 ins, size); 5437 BUG_ON(ret); 5438 5439 leaf = path->nodes[0]; 5440 extent_item = btrfs_item_ptr(leaf, path->slots[0], 5441 struct btrfs_extent_item); 5442 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 5443 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 5444 btrfs_set_extent_flags(leaf, extent_item, 5445 flags | BTRFS_EXTENT_FLAG_DATA); 5446 5447 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 5448 btrfs_set_extent_inline_ref_type(leaf, iref, type); 5449 if (parent > 0) { 5450 struct btrfs_shared_data_ref *ref; 5451 ref = (struct btrfs_shared_data_ref *)(iref + 1); 5452 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 5453 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 5454 } else { 5455 struct btrfs_extent_data_ref *ref; 5456 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 5457 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 5458 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 5459 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 5460 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 5461 } 5462 5463 btrfs_mark_buffer_dirty(path->nodes[0]); 5464 btrfs_free_path(path); 5465 5466 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 5467 if (ret) { 5468 printk(KERN_ERR "btrfs update block group failed for %llu " 5469 "%llu\n", (unsigned long long)ins->objectid, 5470 (unsigned long long)ins->offset); 5471 BUG(); 5472 } 5473 return ret; 5474 } 5475 5476 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 5477 struct btrfs_root *root, 5478 u64 parent, u64 root_objectid, 5479 u64 flags, struct btrfs_disk_key *key, 5480 int level, struct btrfs_key *ins) 5481 { 5482 int ret; 5483 struct btrfs_fs_info *fs_info = root->fs_info; 5484 struct btrfs_extent_item *extent_item; 5485 struct btrfs_tree_block_info *block_info; 5486 struct btrfs_extent_inline_ref *iref; 5487 struct btrfs_path *path; 5488 struct extent_buffer *leaf; 5489 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 5490 5491 path = btrfs_alloc_path(); 5492 BUG_ON(!path); 5493 5494 path->leave_spinning = 1; 5495 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5496 ins, size); 5497 BUG_ON(ret); 5498 5499 leaf = path->nodes[0]; 5500 extent_item = btrfs_item_ptr(leaf, path->slots[0], 5501 struct btrfs_extent_item); 5502 btrfs_set_extent_refs(leaf, extent_item, 1); 5503 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 5504 btrfs_set_extent_flags(leaf, extent_item, 5505 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 5506 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 5507 5508 btrfs_set_tree_block_key(leaf, block_info, key); 5509 btrfs_set_tree_block_level(leaf, block_info, level); 5510 5511 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 5512 if (parent > 0) { 5513 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 5514 btrfs_set_extent_inline_ref_type(leaf, iref, 5515 BTRFS_SHARED_BLOCK_REF_KEY); 5516 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 5517 } else { 5518 btrfs_set_extent_inline_ref_type(leaf, iref, 5519 BTRFS_TREE_BLOCK_REF_KEY); 5520 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 5521 } 5522 5523 btrfs_mark_buffer_dirty(leaf); 5524 btrfs_free_path(path); 5525 5526 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 5527 if (ret) { 5528 printk(KERN_ERR "btrfs update block group failed for %llu " 5529 "%llu\n", (unsigned long long)ins->objectid, 5530 (unsigned long long)ins->offset); 5531 BUG(); 5532 } 5533 return ret; 5534 } 5535 5536 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 5537 struct btrfs_root *root, 5538 u64 root_objectid, u64 owner, 5539 u64 offset, struct btrfs_key *ins) 5540 { 5541 int ret; 5542 5543 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); 5544 5545 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 5546 0, root_objectid, owner, offset, 5547 BTRFS_ADD_DELAYED_EXTENT, NULL); 5548 return ret; 5549 } 5550 5551 /* 5552 * this is used by the tree logging recovery code. It records that 5553 * an extent has been allocated and makes sure to clear the free 5554 * space cache bits as well 5555 */ 5556 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 5557 struct btrfs_root *root, 5558 u64 root_objectid, u64 owner, u64 offset, 5559 struct btrfs_key *ins) 5560 { 5561 int ret; 5562 struct btrfs_block_group_cache *block_group; 5563 struct btrfs_caching_control *caching_ctl; 5564 u64 start = ins->objectid; 5565 u64 num_bytes = ins->offset; 5566 5567 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5568 cache_block_group(block_group, trans, NULL, 0); 5569 caching_ctl = get_caching_control(block_group); 5570 5571 if (!caching_ctl) { 5572 BUG_ON(!block_group_cache_done(block_group)); 5573 ret = btrfs_remove_free_space(block_group, start, num_bytes); 5574 BUG_ON(ret); 5575 } else { 5576 mutex_lock(&caching_ctl->mutex); 5577 5578 if (start >= caching_ctl->progress) { 5579 ret = add_excluded_extent(root, start, num_bytes); 5580 BUG_ON(ret); 5581 } else if (start + num_bytes <= caching_ctl->progress) { 5582 ret = btrfs_remove_free_space(block_group, 5583 start, num_bytes); 5584 BUG_ON(ret); 5585 } else { 5586 num_bytes = caching_ctl->progress - start; 5587 ret = btrfs_remove_free_space(block_group, 5588 start, num_bytes); 5589 BUG_ON(ret); 5590 5591 start = caching_ctl->progress; 5592 num_bytes = ins->objectid + ins->offset - 5593 caching_ctl->progress; 5594 ret = add_excluded_extent(root, start, num_bytes); 5595 BUG_ON(ret); 5596 } 5597 5598 mutex_unlock(&caching_ctl->mutex); 5599 put_caching_control(caching_ctl); 5600 } 5601 5602 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); 5603 BUG_ON(ret); 5604 btrfs_put_block_group(block_group); 5605 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5606 0, owner, offset, ins, 1); 5607 return ret; 5608 } 5609 5610 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 5611 struct btrfs_root *root, 5612 u64 bytenr, u32 blocksize, 5613 int level) 5614 { 5615 struct extent_buffer *buf; 5616 5617 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 5618 if (!buf) 5619 return ERR_PTR(-ENOMEM); 5620 btrfs_set_header_generation(buf, trans->transid); 5621 btrfs_set_buffer_lockdep_class(buf, level); 5622 btrfs_tree_lock(buf); 5623 clean_tree_block(trans, root, buf); 5624 5625 btrfs_set_lock_blocking(buf); 5626 btrfs_set_buffer_uptodate(buf); 5627 5628 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 5629 /* 5630 * we allow two log transactions at a time, use different 5631 * EXENT bit to differentiate dirty pages. 5632 */ 5633 if (root->log_transid % 2 == 0) 5634 set_extent_dirty(&root->dirty_log_pages, buf->start, 5635 buf->start + buf->len - 1, GFP_NOFS); 5636 else 5637 set_extent_new(&root->dirty_log_pages, buf->start, 5638 buf->start + buf->len - 1, GFP_NOFS); 5639 } else { 5640 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 5641 buf->start + buf->len - 1, GFP_NOFS); 5642 } 5643 trans->blocks_used++; 5644 /* this returns a buffer locked for blocking */ 5645 return buf; 5646 } 5647 5648 static struct btrfs_block_rsv * 5649 use_block_rsv(struct btrfs_trans_handle *trans, 5650 struct btrfs_root *root, u32 blocksize) 5651 { 5652 struct btrfs_block_rsv *block_rsv; 5653 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 5654 int ret; 5655 5656 block_rsv = get_block_rsv(trans, root); 5657 5658 if (block_rsv->size == 0) { 5659 ret = reserve_metadata_bytes(trans, root, block_rsv, 5660 blocksize, 0); 5661 /* 5662 * If we couldn't reserve metadata bytes try and use some from 5663 * the global reserve. 5664 */ 5665 if (ret && block_rsv != global_rsv) { 5666 ret = block_rsv_use_bytes(global_rsv, blocksize); 5667 if (!ret) 5668 return global_rsv; 5669 return ERR_PTR(ret); 5670 } else if (ret) { 5671 return ERR_PTR(ret); 5672 } 5673 return block_rsv; 5674 } 5675 5676 ret = block_rsv_use_bytes(block_rsv, blocksize); 5677 if (!ret) 5678 return block_rsv; 5679 if (ret) { 5680 WARN_ON(1); 5681 ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, 5682 0); 5683 if (!ret) { 5684 spin_lock(&block_rsv->lock); 5685 block_rsv->size += blocksize; 5686 spin_unlock(&block_rsv->lock); 5687 return block_rsv; 5688 } else if (ret && block_rsv != global_rsv) { 5689 ret = block_rsv_use_bytes(global_rsv, blocksize); 5690 if (!ret) 5691 return global_rsv; 5692 } 5693 } 5694 5695 return ERR_PTR(-ENOSPC); 5696 } 5697 5698 static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize) 5699 { 5700 block_rsv_add_bytes(block_rsv, blocksize, 0); 5701 block_rsv_release_bytes(block_rsv, NULL, 0); 5702 } 5703 5704 /* 5705 * finds a free extent and does all the dirty work required for allocation 5706 * returns the key for the extent through ins, and a tree buffer for 5707 * the first block of the extent through buf. 5708 * 5709 * returns the tree buffer or NULL. 5710 */ 5711 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 5712 struct btrfs_root *root, u32 blocksize, 5713 u64 parent, u64 root_objectid, 5714 struct btrfs_disk_key *key, int level, 5715 u64 hint, u64 empty_size) 5716 { 5717 struct btrfs_key ins; 5718 struct btrfs_block_rsv *block_rsv; 5719 struct extent_buffer *buf; 5720 u64 flags = 0; 5721 int ret; 5722 5723 5724 block_rsv = use_block_rsv(trans, root, blocksize); 5725 if (IS_ERR(block_rsv)) 5726 return ERR_CAST(block_rsv); 5727 5728 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, 5729 empty_size, hint, (u64)-1, &ins, 0); 5730 if (ret) { 5731 unuse_block_rsv(block_rsv, blocksize); 5732 return ERR_PTR(ret); 5733 } 5734 5735 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 5736 blocksize, level); 5737 BUG_ON(IS_ERR(buf)); 5738 5739 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 5740 if (parent == 0) 5741 parent = ins.objectid; 5742 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 5743 } else 5744 BUG_ON(parent > 0); 5745 5746 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 5747 struct btrfs_delayed_extent_op *extent_op; 5748 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 5749 BUG_ON(!extent_op); 5750 if (key) 5751 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 5752 else 5753 memset(&extent_op->key, 0, sizeof(extent_op->key)); 5754 extent_op->flags_to_set = flags; 5755 extent_op->update_key = 1; 5756 extent_op->update_flags = 1; 5757 extent_op->is_data = 0; 5758 5759 ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, 5760 ins.offset, parent, root_objectid, 5761 level, BTRFS_ADD_DELAYED_EXTENT, 5762 extent_op); 5763 BUG_ON(ret); 5764 } 5765 return buf; 5766 } 5767 5768 struct walk_control { 5769 u64 refs[BTRFS_MAX_LEVEL]; 5770 u64 flags[BTRFS_MAX_LEVEL]; 5771 struct btrfs_key update_progress; 5772 int stage; 5773 int level; 5774 int shared_level; 5775 int update_ref; 5776 int keep_locks; 5777 int reada_slot; 5778 int reada_count; 5779 }; 5780 5781 #define DROP_REFERENCE 1 5782 #define UPDATE_BACKREF 2 5783 5784 static noinline void reada_walk_down(struct btrfs_trans_handle *trans, 5785 struct btrfs_root *root, 5786 struct walk_control *wc, 5787 struct btrfs_path *path) 5788 { 5789 u64 bytenr; 5790 u64 generation; 5791 u64 refs; 5792 u64 flags; 5793 u32 nritems; 5794 u32 blocksize; 5795 struct btrfs_key key; 5796 struct extent_buffer *eb; 5797 int ret; 5798 int slot; 5799 int nread = 0; 5800 5801 if (path->slots[wc->level] < wc->reada_slot) { 5802 wc->reada_count = wc->reada_count * 2 / 3; 5803 wc->reada_count = max(wc->reada_count, 2); 5804 } else { 5805 wc->reada_count = wc->reada_count * 3 / 2; 5806 wc->reada_count = min_t(int, wc->reada_count, 5807 BTRFS_NODEPTRS_PER_BLOCK(root)); 5808 } 5809 5810 eb = path->nodes[wc->level]; 5811 nritems = btrfs_header_nritems(eb); 5812 blocksize = btrfs_level_size(root, wc->level - 1); 5813 5814 for (slot = path->slots[wc->level]; slot < nritems; slot++) { 5815 if (nread >= wc->reada_count) 5816 break; 5817 5818 cond_resched(); 5819 bytenr = btrfs_node_blockptr(eb, slot); 5820 generation = btrfs_node_ptr_generation(eb, slot); 5821 5822 if (slot == path->slots[wc->level]) 5823 goto reada; 5824 5825 if (wc->stage == UPDATE_BACKREF && 5826 generation <= root->root_key.offset) 5827 continue; 5828 5829 /* We don't lock the tree block, it's OK to be racy here */ 5830 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5831 &refs, &flags); 5832 BUG_ON(ret); 5833 BUG_ON(refs == 0); 5834 5835 if (wc->stage == DROP_REFERENCE) { 5836 if (refs == 1) 5837 goto reada; 5838 5839 if (wc->level == 1 && 5840 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5841 continue; 5842 if (!wc->update_ref || 5843 generation <= root->root_key.offset) 5844 continue; 5845 btrfs_node_key_to_cpu(eb, &key, slot); 5846 ret = btrfs_comp_cpu_keys(&key, 5847 &wc->update_progress); 5848 if (ret < 0) 5849 continue; 5850 } else { 5851 if (wc->level == 1 && 5852 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5853 continue; 5854 } 5855 reada: 5856 ret = readahead_tree_block(root, bytenr, blocksize, 5857 generation); 5858 if (ret) 5859 break; 5860 nread++; 5861 } 5862 wc->reada_slot = slot; 5863 } 5864 5865 /* 5866 * hepler to process tree block while walking down the tree. 5867 * 5868 * when wc->stage == UPDATE_BACKREF, this function updates 5869 * back refs for pointers in the block. 5870 * 5871 * NOTE: return value 1 means we should stop walking down. 5872 */ 5873 static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 5874 struct btrfs_root *root, 5875 struct btrfs_path *path, 5876 struct walk_control *wc, int lookup_info) 5877 { 5878 int level = wc->level; 5879 struct extent_buffer *eb = path->nodes[level]; 5880 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5881 int ret; 5882 5883 if (wc->stage == UPDATE_BACKREF && 5884 btrfs_header_owner(eb) != root->root_key.objectid) 5885 return 1; 5886 5887 /* 5888 * when reference count of tree block is 1, it won't increase 5889 * again. once full backref flag is set, we never clear it. 5890 */ 5891 if (lookup_info && 5892 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5893 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { 5894 BUG_ON(!path->locks[level]); 5895 ret = btrfs_lookup_extent_info(trans, root, 5896 eb->start, eb->len, 5897 &wc->refs[level], 5898 &wc->flags[level]); 5899 BUG_ON(ret); 5900 BUG_ON(wc->refs[level] == 0); 5901 } 5902 5903 if (wc->stage == DROP_REFERENCE) { 5904 if (wc->refs[level] > 1) 5905 return 1; 5906 5907 if (path->locks[level] && !wc->keep_locks) { 5908 btrfs_tree_unlock(eb); 5909 path->locks[level] = 0; 5910 } 5911 return 0; 5912 } 5913 5914 /* wc->stage == UPDATE_BACKREF */ 5915 if (!(wc->flags[level] & flag)) { 5916 BUG_ON(!path->locks[level]); 5917 ret = btrfs_inc_ref(trans, root, eb, 1); 5918 BUG_ON(ret); 5919 ret = btrfs_dec_ref(trans, root, eb, 0); 5920 BUG_ON(ret); 5921 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 5922 eb->len, flag, 0); 5923 BUG_ON(ret); 5924 wc->flags[level] |= flag; 5925 } 5926 5927 /* 5928 * the block is shared by multiple trees, so it's not good to 5929 * keep the tree lock 5930 */ 5931 if (path->locks[level] && level > 0) { 5932 btrfs_tree_unlock(eb); 5933 path->locks[level] = 0; 5934 } 5935 return 0; 5936 } 5937 5938 /* 5939 * hepler to process tree block pointer. 5940 * 5941 * when wc->stage == DROP_REFERENCE, this function checks 5942 * reference count of the block pointed to. if the block 5943 * is shared and we need update back refs for the subtree 5944 * rooted at the block, this function changes wc->stage to 5945 * UPDATE_BACKREF. if the block is shared and there is no 5946 * need to update back, this function drops the reference 5947 * to the block. 5948 * 5949 * NOTE: return value 1 means we should stop walking down. 5950 */ 5951 static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5952 struct btrfs_root *root, 5953 struct btrfs_path *path, 5954 struct walk_control *wc, int *lookup_info) 5955 { 5956 u64 bytenr; 5957 u64 generation; 5958 u64 parent; 5959 u32 blocksize; 5960 struct btrfs_key key; 5961 struct extent_buffer *next; 5962 int level = wc->level; 5963 int reada = 0; 5964 int ret = 0; 5965 5966 generation = btrfs_node_ptr_generation(path->nodes[level], 5967 path->slots[level]); 5968 /* 5969 * if the lower level block was created before the snapshot 5970 * was created, we know there is no need to update back refs 5971 * for the subtree 5972 */ 5973 if (wc->stage == UPDATE_BACKREF && 5974 generation <= root->root_key.offset) { 5975 *lookup_info = 1; 5976 return 1; 5977 } 5978 5979 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 5980 blocksize = btrfs_level_size(root, level - 1); 5981 5982 next = btrfs_find_tree_block(root, bytenr, blocksize); 5983 if (!next) { 5984 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 5985 if (!next) 5986 return -ENOMEM; 5987 reada = 1; 5988 } 5989 btrfs_tree_lock(next); 5990 btrfs_set_lock_blocking(next); 5991 5992 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5993 &wc->refs[level - 1], 5994 &wc->flags[level - 1]); 5995 BUG_ON(ret); 5996 BUG_ON(wc->refs[level - 1] == 0); 5997 *lookup_info = 0; 5998 5999 if (wc->stage == DROP_REFERENCE) { 6000 if (wc->refs[level - 1] > 1) { 6001 if (level == 1 && 6002 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 6003 goto skip; 6004 6005 if (!wc->update_ref || 6006 generation <= root->root_key.offset) 6007 goto skip; 6008 6009 btrfs_node_key_to_cpu(path->nodes[level], &key, 6010 path->slots[level]); 6011 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); 6012 if (ret < 0) 6013 goto skip; 6014 6015 wc->stage = UPDATE_BACKREF; 6016 wc->shared_level = level - 1; 6017 } 6018 } else { 6019 if (level == 1 && 6020 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 6021 goto skip; 6022 } 6023 6024 if (!btrfs_buffer_uptodate(next, generation)) { 6025 btrfs_tree_unlock(next); 6026 free_extent_buffer(next); 6027 next = NULL; 6028 *lookup_info = 1; 6029 } 6030 6031 if (!next) { 6032 if (reada && level == 1) 6033 reada_walk_down(trans, root, wc, path); 6034 next = read_tree_block(root, bytenr, blocksize, generation); 6035 if (!next) 6036 return -EIO; 6037 btrfs_tree_lock(next); 6038 btrfs_set_lock_blocking(next); 6039 } 6040 6041 level--; 6042 BUG_ON(level != btrfs_header_level(next)); 6043 path->nodes[level] = next; 6044 path->slots[level] = 0; 6045 path->locks[level] = 1; 6046 wc->level = level; 6047 if (wc->level == 1) 6048 wc->reada_slot = 0; 6049 return 0; 6050 skip: 6051 wc->refs[level - 1] = 0; 6052 wc->flags[level - 1] = 0; 6053 if (wc->stage == DROP_REFERENCE) { 6054 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 6055 parent = path->nodes[level]->start; 6056 } else { 6057 BUG_ON(root->root_key.objectid != 6058 btrfs_header_owner(path->nodes[level])); 6059 parent = 0; 6060 } 6061 6062 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 6063 root->root_key.objectid, level - 1, 0); 6064 BUG_ON(ret); 6065 } 6066 btrfs_tree_unlock(next); 6067 free_extent_buffer(next); 6068 *lookup_info = 1; 6069 return 1; 6070 } 6071 6072 /* 6073 * hepler to process tree block while walking up the tree. 6074 * 6075 * when wc->stage == DROP_REFERENCE, this function drops 6076 * reference count on the block. 6077 * 6078 * when wc->stage == UPDATE_BACKREF, this function changes 6079 * wc->stage back to DROP_REFERENCE if we changed wc->stage 6080 * to UPDATE_BACKREF previously while processing the block. 6081 * 6082 * NOTE: return value 1 means we should stop walking up. 6083 */ 6084 static noinline int walk_up_proc(struct btrfs_trans_handle *trans, 6085 struct btrfs_root *root, 6086 struct btrfs_path *path, 6087 struct walk_control *wc) 6088 { 6089 int ret; 6090 int level = wc->level; 6091 struct extent_buffer *eb = path->nodes[level]; 6092 u64 parent = 0; 6093 6094 if (wc->stage == UPDATE_BACKREF) { 6095 BUG_ON(wc->shared_level < level); 6096 if (level < wc->shared_level) 6097 goto out; 6098 6099 ret = find_next_key(path, level + 1, &wc->update_progress); 6100 if (ret > 0) 6101 wc->update_ref = 0; 6102 6103 wc->stage = DROP_REFERENCE; 6104 wc->shared_level = -1; 6105 path->slots[level] = 0; 6106 6107 /* 6108 * check reference count again if the block isn't locked. 6109 * we should start walking down the tree again if reference 6110 * count is one. 6111 */ 6112 if (!path->locks[level]) { 6113 BUG_ON(level == 0); 6114 btrfs_tree_lock(eb); 6115 btrfs_set_lock_blocking(eb); 6116 path->locks[level] = 1; 6117 6118 ret = btrfs_lookup_extent_info(trans, root, 6119 eb->start, eb->len, 6120 &wc->refs[level], 6121 &wc->flags[level]); 6122 BUG_ON(ret); 6123 BUG_ON(wc->refs[level] == 0); 6124 if (wc->refs[level] == 1) { 6125 btrfs_tree_unlock(eb); 6126 path->locks[level] = 0; 6127 return 1; 6128 } 6129 } 6130 } 6131 6132 /* wc->stage == DROP_REFERENCE */ 6133 BUG_ON(wc->refs[level] > 1 && !path->locks[level]); 6134 6135 if (wc->refs[level] == 1) { 6136 if (level == 0) { 6137 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6138 ret = btrfs_dec_ref(trans, root, eb, 1); 6139 else 6140 ret = btrfs_dec_ref(trans, root, eb, 0); 6141 BUG_ON(ret); 6142 } 6143 /* make block locked assertion in clean_tree_block happy */ 6144 if (!path->locks[level] && 6145 btrfs_header_generation(eb) == trans->transid) { 6146 btrfs_tree_lock(eb); 6147 btrfs_set_lock_blocking(eb); 6148 path->locks[level] = 1; 6149 } 6150 clean_tree_block(trans, root, eb); 6151 } 6152 6153 if (eb == root->node) { 6154 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6155 parent = eb->start; 6156 else 6157 BUG_ON(root->root_key.objectid != 6158 btrfs_header_owner(eb)); 6159 } else { 6160 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6161 parent = path->nodes[level + 1]->start; 6162 else 6163 BUG_ON(root->root_key.objectid != 6164 btrfs_header_owner(path->nodes[level + 1])); 6165 } 6166 6167 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); 6168 out: 6169 wc->refs[level] = 0; 6170 wc->flags[level] = 0; 6171 return 0; 6172 } 6173 6174 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 6175 struct btrfs_root *root, 6176 struct btrfs_path *path, 6177 struct walk_control *wc) 6178 { 6179 int level = wc->level; 6180 int lookup_info = 1; 6181 int ret; 6182 6183 while (level >= 0) { 6184 ret = walk_down_proc(trans, root, path, wc, lookup_info); 6185 if (ret > 0) 6186 break; 6187 6188 if (level == 0) 6189 break; 6190 6191 if (path->slots[level] >= 6192 btrfs_header_nritems(path->nodes[level])) 6193 break; 6194 6195 ret = do_walk_down(trans, root, path, wc, &lookup_info); 6196 if (ret > 0) { 6197 path->slots[level]++; 6198 continue; 6199 } else if (ret < 0) 6200 return ret; 6201 level = wc->level; 6202 } 6203 return 0; 6204 } 6205 6206 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 6207 struct btrfs_root *root, 6208 struct btrfs_path *path, 6209 struct walk_control *wc, int max_level) 6210 { 6211 int level = wc->level; 6212 int ret; 6213 6214 path->slots[level] = btrfs_header_nritems(path->nodes[level]); 6215 while (level < max_level && path->nodes[level]) { 6216 wc->level = level; 6217 if (path->slots[level] + 1 < 6218 btrfs_header_nritems(path->nodes[level])) { 6219 path->slots[level]++; 6220 return 0; 6221 } else { 6222 ret = walk_up_proc(trans, root, path, wc); 6223 if (ret > 0) 6224 return 0; 6225 6226 if (path->locks[level]) { 6227 btrfs_tree_unlock(path->nodes[level]); 6228 path->locks[level] = 0; 6229 } 6230 free_extent_buffer(path->nodes[level]); 6231 path->nodes[level] = NULL; 6232 level++; 6233 } 6234 } 6235 return 1; 6236 } 6237 6238 /* 6239 * drop a subvolume tree. 6240 * 6241 * this function traverses the tree freeing any blocks that only 6242 * referenced by the tree. 6243 * 6244 * when a shared tree block is found. this function decreases its 6245 * reference count by one. if update_ref is true, this function 6246 * also make sure backrefs for the shared block and all lower level 6247 * blocks are properly updated. 6248 */ 6249 int btrfs_drop_snapshot(struct btrfs_root *root, 6250 struct btrfs_block_rsv *block_rsv, int update_ref) 6251 { 6252 struct btrfs_path *path; 6253 struct btrfs_trans_handle *trans; 6254 struct btrfs_root *tree_root = root->fs_info->tree_root; 6255 struct btrfs_root_item *root_item = &root->root_item; 6256 struct walk_control *wc; 6257 struct btrfs_key key; 6258 int err = 0; 6259 int ret; 6260 int level; 6261 6262 path = btrfs_alloc_path(); 6263 BUG_ON(!path); 6264 6265 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6266 BUG_ON(!wc); 6267 6268 trans = btrfs_start_transaction(tree_root, 0); 6269 BUG_ON(IS_ERR(trans)); 6270 6271 if (block_rsv) 6272 trans->block_rsv = block_rsv; 6273 6274 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 6275 level = btrfs_header_level(root->node); 6276 path->nodes[level] = btrfs_lock_root_node(root); 6277 btrfs_set_lock_blocking(path->nodes[level]); 6278 path->slots[level] = 0; 6279 path->locks[level] = 1; 6280 memset(&wc->update_progress, 0, 6281 sizeof(wc->update_progress)); 6282 } else { 6283 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 6284 memcpy(&wc->update_progress, &key, 6285 sizeof(wc->update_progress)); 6286 6287 level = root_item->drop_level; 6288 BUG_ON(level == 0); 6289 path->lowest_level = level; 6290 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6291 path->lowest_level = 0; 6292 if (ret < 0) { 6293 err = ret; 6294 goto out; 6295 } 6296 WARN_ON(ret > 0); 6297 6298 /* 6299 * unlock our path, this is safe because only this 6300 * function is allowed to delete this snapshot 6301 */ 6302 btrfs_unlock_up_safe(path, 0); 6303 6304 level = btrfs_header_level(root->node); 6305 while (1) { 6306 btrfs_tree_lock(path->nodes[level]); 6307 btrfs_set_lock_blocking(path->nodes[level]); 6308 6309 ret = btrfs_lookup_extent_info(trans, root, 6310 path->nodes[level]->start, 6311 path->nodes[level]->len, 6312 &wc->refs[level], 6313 &wc->flags[level]); 6314 BUG_ON(ret); 6315 BUG_ON(wc->refs[level] == 0); 6316 6317 if (level == root_item->drop_level) 6318 break; 6319 6320 btrfs_tree_unlock(path->nodes[level]); 6321 WARN_ON(wc->refs[level] != 1); 6322 level--; 6323 } 6324 } 6325 6326 wc->level = level; 6327 wc->shared_level = -1; 6328 wc->stage = DROP_REFERENCE; 6329 wc->update_ref = update_ref; 6330 wc->keep_locks = 0; 6331 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 6332 6333 while (1) { 6334 ret = walk_down_tree(trans, root, path, wc); 6335 if (ret < 0) { 6336 err = ret; 6337 break; 6338 } 6339 6340 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 6341 if (ret < 0) { 6342 err = ret; 6343 break; 6344 } 6345 6346 if (ret > 0) { 6347 BUG_ON(wc->stage != DROP_REFERENCE); 6348 break; 6349 } 6350 6351 if (wc->stage == DROP_REFERENCE) { 6352 level = wc->level; 6353 btrfs_node_key(path->nodes[level], 6354 &root_item->drop_progress, 6355 path->slots[level]); 6356 root_item->drop_level = level; 6357 } 6358 6359 BUG_ON(wc->level == 0); 6360 if (btrfs_should_end_transaction(trans, tree_root)) { 6361 ret = btrfs_update_root(trans, tree_root, 6362 &root->root_key, 6363 root_item); 6364 BUG_ON(ret); 6365 6366 btrfs_end_transaction_throttle(trans, tree_root); 6367 trans = btrfs_start_transaction(tree_root, 0); 6368 BUG_ON(IS_ERR(trans)); 6369 if (block_rsv) 6370 trans->block_rsv = block_rsv; 6371 } 6372 } 6373 btrfs_release_path(path); 6374 BUG_ON(err); 6375 6376 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6377 BUG_ON(ret); 6378 6379 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 6380 ret = btrfs_find_last_root(tree_root, root->root_key.objectid, 6381 NULL, NULL); 6382 BUG_ON(ret < 0); 6383 if (ret > 0) { 6384 /* if we fail to delete the orphan item this time 6385 * around, it'll get picked up the next time. 6386 * 6387 * The most common failure here is just -ENOENT. 6388 */ 6389 btrfs_del_orphan_item(trans, tree_root, 6390 root->root_key.objectid); 6391 } 6392 } 6393 6394 if (root->in_radix) { 6395 btrfs_free_fs_root(tree_root->fs_info, root); 6396 } else { 6397 free_extent_buffer(root->node); 6398 free_extent_buffer(root->commit_root); 6399 kfree(root); 6400 } 6401 out: 6402 btrfs_end_transaction_throttle(trans, tree_root); 6403 kfree(wc); 6404 btrfs_free_path(path); 6405 return err; 6406 } 6407 6408 /* 6409 * drop subtree rooted at tree block 'node'. 6410 * 6411 * NOTE: this function will unlock and release tree block 'node' 6412 */ 6413 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 6414 struct btrfs_root *root, 6415 struct extent_buffer *node, 6416 struct extent_buffer *parent) 6417 { 6418 struct btrfs_path *path; 6419 struct walk_control *wc; 6420 int level; 6421 int parent_level; 6422 int ret = 0; 6423 int wret; 6424 6425 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6426 6427 path = btrfs_alloc_path(); 6428 if (!path) 6429 return -ENOMEM; 6430 6431 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6432 if (!wc) { 6433 btrfs_free_path(path); 6434 return -ENOMEM; 6435 } 6436 6437 btrfs_assert_tree_locked(parent); 6438 parent_level = btrfs_header_level(parent); 6439 extent_buffer_get(parent); 6440 path->nodes[parent_level] = parent; 6441 path->slots[parent_level] = btrfs_header_nritems(parent); 6442 6443 btrfs_assert_tree_locked(node); 6444 level = btrfs_header_level(node); 6445 path->nodes[level] = node; 6446 path->slots[level] = 0; 6447 path->locks[level] = 1; 6448 6449 wc->refs[parent_level] = 1; 6450 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 6451 wc->level = level; 6452 wc->shared_level = -1; 6453 wc->stage = DROP_REFERENCE; 6454 wc->update_ref = 0; 6455 wc->keep_locks = 1; 6456 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 6457 6458 while (1) { 6459 wret = walk_down_tree(trans, root, path, wc); 6460 if (wret < 0) { 6461 ret = wret; 6462 break; 6463 } 6464 6465 wret = walk_up_tree(trans, root, path, wc, parent_level); 6466 if (wret < 0) 6467 ret = wret; 6468 if (wret != 0) 6469 break; 6470 } 6471 6472 kfree(wc); 6473 btrfs_free_path(path); 6474 return ret; 6475 } 6476 6477 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6478 { 6479 u64 num_devices; 6480 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 6481 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 6482 6483 /* 6484 * we add in the count of missing devices because we want 6485 * to make sure that any RAID levels on a degraded FS 6486 * continue to be honored. 6487 */ 6488 num_devices = root->fs_info->fs_devices->rw_devices + 6489 root->fs_info->fs_devices->missing_devices; 6490 6491 if (num_devices == 1) { 6492 stripped |= BTRFS_BLOCK_GROUP_DUP; 6493 stripped = flags & ~stripped; 6494 6495 /* turn raid0 into single device chunks */ 6496 if (flags & BTRFS_BLOCK_GROUP_RAID0) 6497 return stripped; 6498 6499 /* turn mirroring into duplication */ 6500 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 6501 BTRFS_BLOCK_GROUP_RAID10)) 6502 return stripped | BTRFS_BLOCK_GROUP_DUP; 6503 return flags; 6504 } else { 6505 /* they already had raid on here, just return */ 6506 if (flags & stripped) 6507 return flags; 6508 6509 stripped |= BTRFS_BLOCK_GROUP_DUP; 6510 stripped = flags & ~stripped; 6511 6512 /* switch duplicated blocks with raid1 */ 6513 if (flags & BTRFS_BLOCK_GROUP_DUP) 6514 return stripped | BTRFS_BLOCK_GROUP_RAID1; 6515 6516 /* turn single device chunks into raid0 */ 6517 return stripped | BTRFS_BLOCK_GROUP_RAID0; 6518 } 6519 return flags; 6520 } 6521 6522 static int set_block_group_ro(struct btrfs_block_group_cache *cache) 6523 { 6524 struct btrfs_space_info *sinfo = cache->space_info; 6525 u64 num_bytes; 6526 int ret = -ENOSPC; 6527 6528 if (cache->ro) 6529 return 0; 6530 6531 spin_lock(&sinfo->lock); 6532 spin_lock(&cache->lock); 6533 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6534 cache->bytes_super - btrfs_block_group_used(&cache->item); 6535 6536 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + 6537 sinfo->bytes_may_use + sinfo->bytes_readonly + 6538 cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { 6539 sinfo->bytes_readonly += num_bytes; 6540 sinfo->bytes_reserved += cache->reserved_pinned; 6541 cache->reserved_pinned = 0; 6542 cache->ro = 1; 6543 ret = 0; 6544 } 6545 6546 spin_unlock(&cache->lock); 6547 spin_unlock(&sinfo->lock); 6548 return ret; 6549 } 6550 6551 int btrfs_set_block_group_ro(struct btrfs_root *root, 6552 struct btrfs_block_group_cache *cache) 6553 6554 { 6555 struct btrfs_trans_handle *trans; 6556 u64 alloc_flags; 6557 int ret; 6558 6559 BUG_ON(cache->ro); 6560 6561 trans = btrfs_join_transaction(root); 6562 BUG_ON(IS_ERR(trans)); 6563 6564 alloc_flags = update_block_group_flags(root, cache->flags); 6565 if (alloc_flags != cache->flags) 6566 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6567 CHUNK_ALLOC_FORCE); 6568 6569 ret = set_block_group_ro(cache); 6570 if (!ret) 6571 goto out; 6572 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 6573 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6574 CHUNK_ALLOC_FORCE); 6575 if (ret < 0) 6576 goto out; 6577 ret = set_block_group_ro(cache); 6578 out: 6579 btrfs_end_transaction(trans, root); 6580 return ret; 6581 } 6582 6583 int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, 6584 struct btrfs_root *root, u64 type) 6585 { 6586 u64 alloc_flags = get_alloc_profile(root, type); 6587 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6588 CHUNK_ALLOC_FORCE); 6589 } 6590 6591 /* 6592 * helper to account the unused space of all the readonly block group in the 6593 * list. takes mirrors into account. 6594 */ 6595 static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) 6596 { 6597 struct btrfs_block_group_cache *block_group; 6598 u64 free_bytes = 0; 6599 int factor; 6600 6601 list_for_each_entry(block_group, groups_list, list) { 6602 spin_lock(&block_group->lock); 6603 6604 if (!block_group->ro) { 6605 spin_unlock(&block_group->lock); 6606 continue; 6607 } 6608 6609 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | 6610 BTRFS_BLOCK_GROUP_RAID10 | 6611 BTRFS_BLOCK_GROUP_DUP)) 6612 factor = 2; 6613 else 6614 factor = 1; 6615 6616 free_bytes += (block_group->key.offset - 6617 btrfs_block_group_used(&block_group->item)) * 6618 factor; 6619 6620 spin_unlock(&block_group->lock); 6621 } 6622 6623 return free_bytes; 6624 } 6625 6626 /* 6627 * helper to account the unused space of all the readonly block group in the 6628 * space_info. takes mirrors into account. 6629 */ 6630 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) 6631 { 6632 int i; 6633 u64 free_bytes = 0; 6634 6635 spin_lock(&sinfo->lock); 6636 6637 for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) 6638 if (!list_empty(&sinfo->block_groups[i])) 6639 free_bytes += __btrfs_get_ro_block_group_free_space( 6640 &sinfo->block_groups[i]); 6641 6642 spin_unlock(&sinfo->lock); 6643 6644 return free_bytes; 6645 } 6646 6647 int btrfs_set_block_group_rw(struct btrfs_root *root, 6648 struct btrfs_block_group_cache *cache) 6649 { 6650 struct btrfs_space_info *sinfo = cache->space_info; 6651 u64 num_bytes; 6652 6653 BUG_ON(!cache->ro); 6654 6655 spin_lock(&sinfo->lock); 6656 spin_lock(&cache->lock); 6657 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6658 cache->bytes_super - btrfs_block_group_used(&cache->item); 6659 sinfo->bytes_readonly -= num_bytes; 6660 cache->ro = 0; 6661 spin_unlock(&cache->lock); 6662 spin_unlock(&sinfo->lock); 6663 return 0; 6664 } 6665 6666 /* 6667 * checks to see if its even possible to relocate this block group. 6668 * 6669 * @return - -1 if it's not a good idea to relocate this block group, 0 if its 6670 * ok to go ahead and try. 6671 */ 6672 int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) 6673 { 6674 struct btrfs_block_group_cache *block_group; 6675 struct btrfs_space_info *space_info; 6676 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 6677 struct btrfs_device *device; 6678 int full = 0; 6679 int ret = 0; 6680 6681 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 6682 6683 /* odd, couldn't find the block group, leave it alone */ 6684 if (!block_group) 6685 return -1; 6686 6687 /* no bytes used, we're good */ 6688 if (!btrfs_block_group_used(&block_group->item)) 6689 goto out; 6690 6691 space_info = block_group->space_info; 6692 spin_lock(&space_info->lock); 6693 6694 full = space_info->full; 6695 6696 /* 6697 * if this is the last block group we have in this space, we can't 6698 * relocate it unless we're able to allocate a new chunk below. 6699 * 6700 * Otherwise, we need to make sure we have room in the space to handle 6701 * all of the extents from this block group. If we can, we're good 6702 */ 6703 if ((space_info->total_bytes != block_group->key.offset) && 6704 (space_info->bytes_used + space_info->bytes_reserved + 6705 space_info->bytes_pinned + space_info->bytes_readonly + 6706 btrfs_block_group_used(&block_group->item) < 6707 space_info->total_bytes)) { 6708 spin_unlock(&space_info->lock); 6709 goto out; 6710 } 6711 spin_unlock(&space_info->lock); 6712 6713 /* 6714 * ok we don't have enough space, but maybe we have free space on our 6715 * devices to allocate new chunks for relocation, so loop through our 6716 * alloc devices and guess if we have enough space. However, if we 6717 * were marked as full, then we know there aren't enough chunks, and we 6718 * can just return. 6719 */ 6720 ret = -1; 6721 if (full) 6722 goto out; 6723 6724 mutex_lock(&root->fs_info->chunk_mutex); 6725 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 6726 u64 min_free = btrfs_block_group_used(&block_group->item); 6727 u64 dev_offset; 6728 6729 /* 6730 * check to make sure we can actually find a chunk with enough 6731 * space to fit our block group in. 6732 */ 6733 if (device->total_bytes > device->bytes_used + min_free) { 6734 ret = find_free_dev_extent(NULL, device, min_free, 6735 &dev_offset, NULL); 6736 if (!ret) 6737 break; 6738 ret = -1; 6739 } 6740 } 6741 mutex_unlock(&root->fs_info->chunk_mutex); 6742 out: 6743 btrfs_put_block_group(block_group); 6744 return ret; 6745 } 6746 6747 static int find_first_block_group(struct btrfs_root *root, 6748 struct btrfs_path *path, struct btrfs_key *key) 6749 { 6750 int ret = 0; 6751 struct btrfs_key found_key; 6752 struct extent_buffer *leaf; 6753 int slot; 6754 6755 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 6756 if (ret < 0) 6757 goto out; 6758 6759 while (1) { 6760 slot = path->slots[0]; 6761 leaf = path->nodes[0]; 6762 if (slot >= btrfs_header_nritems(leaf)) { 6763 ret = btrfs_next_leaf(root, path); 6764 if (ret == 0) 6765 continue; 6766 if (ret < 0) 6767 goto out; 6768 break; 6769 } 6770 btrfs_item_key_to_cpu(leaf, &found_key, slot); 6771 6772 if (found_key.objectid >= key->objectid && 6773 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 6774 ret = 0; 6775 goto out; 6776 } 6777 path->slots[0]++; 6778 } 6779 out: 6780 return ret; 6781 } 6782 6783 void btrfs_put_block_group_cache(struct btrfs_fs_info *info) 6784 { 6785 struct btrfs_block_group_cache *block_group; 6786 u64 last = 0; 6787 6788 while (1) { 6789 struct inode *inode; 6790 6791 block_group = btrfs_lookup_first_block_group(info, last); 6792 while (block_group) { 6793 spin_lock(&block_group->lock); 6794 if (block_group->iref) 6795 break; 6796 spin_unlock(&block_group->lock); 6797 block_group = next_block_group(info->tree_root, 6798 block_group); 6799 } 6800 if (!block_group) { 6801 if (last == 0) 6802 break; 6803 last = 0; 6804 continue; 6805 } 6806 6807 inode = block_group->inode; 6808 block_group->iref = 0; 6809 block_group->inode = NULL; 6810 spin_unlock(&block_group->lock); 6811 iput(inode); 6812 last = block_group->key.objectid + block_group->key.offset; 6813 btrfs_put_block_group(block_group); 6814 } 6815 } 6816 6817 int btrfs_free_block_groups(struct btrfs_fs_info *info) 6818 { 6819 struct btrfs_block_group_cache *block_group; 6820 struct btrfs_space_info *space_info; 6821 struct btrfs_caching_control *caching_ctl; 6822 struct rb_node *n; 6823 6824 down_write(&info->extent_commit_sem); 6825 while (!list_empty(&info->caching_block_groups)) { 6826 caching_ctl = list_entry(info->caching_block_groups.next, 6827 struct btrfs_caching_control, list); 6828 list_del(&caching_ctl->list); 6829 put_caching_control(caching_ctl); 6830 } 6831 up_write(&info->extent_commit_sem); 6832 6833 spin_lock(&info->block_group_cache_lock); 6834 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 6835 block_group = rb_entry(n, struct btrfs_block_group_cache, 6836 cache_node); 6837 rb_erase(&block_group->cache_node, 6838 &info->block_group_cache_tree); 6839 spin_unlock(&info->block_group_cache_lock); 6840 6841 down_write(&block_group->space_info->groups_sem); 6842 list_del(&block_group->list); 6843 up_write(&block_group->space_info->groups_sem); 6844 6845 if (block_group->cached == BTRFS_CACHE_STARTED) 6846 wait_block_group_cache_done(block_group); 6847 6848 /* 6849 * We haven't cached this block group, which means we could 6850 * possibly have excluded extents on this block group. 6851 */ 6852 if (block_group->cached == BTRFS_CACHE_NO) 6853 free_excluded_extents(info->extent_root, block_group); 6854 6855 btrfs_remove_free_space_cache(block_group); 6856 btrfs_put_block_group(block_group); 6857 6858 spin_lock(&info->block_group_cache_lock); 6859 } 6860 spin_unlock(&info->block_group_cache_lock); 6861 6862 /* now that all the block groups are freed, go through and 6863 * free all the space_info structs. This is only called during 6864 * the final stages of unmount, and so we know nobody is 6865 * using them. We call synchronize_rcu() once before we start, 6866 * just to be on the safe side. 6867 */ 6868 synchronize_rcu(); 6869 6870 release_global_block_rsv(info); 6871 6872 while(!list_empty(&info->space_info)) { 6873 space_info = list_entry(info->space_info.next, 6874 struct btrfs_space_info, 6875 list); 6876 if (space_info->bytes_pinned > 0 || 6877 space_info->bytes_reserved > 0) { 6878 WARN_ON(1); 6879 dump_space_info(space_info, 0, 0); 6880 } 6881 list_del(&space_info->list); 6882 kfree(space_info); 6883 } 6884 return 0; 6885 } 6886 6887 static void __link_block_group(struct btrfs_space_info *space_info, 6888 struct btrfs_block_group_cache *cache) 6889 { 6890 int index = get_block_group_index(cache); 6891 6892 down_write(&space_info->groups_sem); 6893 list_add_tail(&cache->list, &space_info->block_groups[index]); 6894 up_write(&space_info->groups_sem); 6895 } 6896 6897 int btrfs_read_block_groups(struct btrfs_root *root) 6898 { 6899 struct btrfs_path *path; 6900 int ret; 6901 struct btrfs_block_group_cache *cache; 6902 struct btrfs_fs_info *info = root->fs_info; 6903 struct btrfs_space_info *space_info; 6904 struct btrfs_key key; 6905 struct btrfs_key found_key; 6906 struct extent_buffer *leaf; 6907 int need_clear = 0; 6908 u64 cache_gen; 6909 6910 root = info->extent_root; 6911 key.objectid = 0; 6912 key.offset = 0; 6913 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 6914 path = btrfs_alloc_path(); 6915 if (!path) 6916 return -ENOMEM; 6917 path->reada = 1; 6918 6919 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); 6920 if (cache_gen != 0 && 6921 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) 6922 need_clear = 1; 6923 if (btrfs_test_opt(root, CLEAR_CACHE)) 6924 need_clear = 1; 6925 if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) 6926 printk(KERN_INFO "btrfs: disk space caching is enabled\n"); 6927 6928 while (1) { 6929 ret = find_first_block_group(root, path, &key); 6930 if (ret > 0) 6931 break; 6932 if (ret != 0) 6933 goto error; 6934 leaf = path->nodes[0]; 6935 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 6936 cache = kzalloc(sizeof(*cache), GFP_NOFS); 6937 if (!cache) { 6938 ret = -ENOMEM; 6939 goto error; 6940 } 6941 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 6942 GFP_NOFS); 6943 if (!cache->free_space_ctl) { 6944 kfree(cache); 6945 ret = -ENOMEM; 6946 goto error; 6947 } 6948 6949 atomic_set(&cache->count, 1); 6950 spin_lock_init(&cache->lock); 6951 cache->fs_info = info; 6952 INIT_LIST_HEAD(&cache->list); 6953 INIT_LIST_HEAD(&cache->cluster_list); 6954 6955 if (need_clear) 6956 cache->disk_cache_state = BTRFS_DC_CLEAR; 6957 6958 read_extent_buffer(leaf, &cache->item, 6959 btrfs_item_ptr_offset(leaf, path->slots[0]), 6960 sizeof(cache->item)); 6961 memcpy(&cache->key, &found_key, sizeof(found_key)); 6962 6963 key.objectid = found_key.objectid + found_key.offset; 6964 btrfs_release_path(path); 6965 cache->flags = btrfs_block_group_flags(&cache->item); 6966 cache->sectorsize = root->sectorsize; 6967 6968 btrfs_init_free_space_ctl(cache); 6969 6970 /* 6971 * We need to exclude the super stripes now so that the space 6972 * info has super bytes accounted for, otherwise we'll think 6973 * we have more space than we actually do. 6974 */ 6975 exclude_super_stripes(root, cache); 6976 6977 /* 6978 * check for two cases, either we are full, and therefore 6979 * don't need to bother with the caching work since we won't 6980 * find any space, or we are empty, and we can just add all 6981 * the space in and be done with it. This saves us _alot_ of 6982 * time, particularly in the full case. 6983 */ 6984 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 6985 cache->last_byte_to_unpin = (u64)-1; 6986 cache->cached = BTRFS_CACHE_FINISHED; 6987 free_excluded_extents(root, cache); 6988 } else if (btrfs_block_group_used(&cache->item) == 0) { 6989 cache->last_byte_to_unpin = (u64)-1; 6990 cache->cached = BTRFS_CACHE_FINISHED; 6991 add_new_free_space(cache, root->fs_info, 6992 found_key.objectid, 6993 found_key.objectid + 6994 found_key.offset); 6995 free_excluded_extents(root, cache); 6996 } 6997 6998 ret = update_space_info(info, cache->flags, found_key.offset, 6999 btrfs_block_group_used(&cache->item), 7000 &space_info); 7001 BUG_ON(ret); 7002 cache->space_info = space_info; 7003 spin_lock(&cache->space_info->lock); 7004 cache->space_info->bytes_readonly += cache->bytes_super; 7005 spin_unlock(&cache->space_info->lock); 7006 7007 __link_block_group(space_info, cache); 7008 7009 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7010 BUG_ON(ret); 7011 7012 set_avail_alloc_bits(root->fs_info, cache->flags); 7013 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7014 set_block_group_ro(cache); 7015 } 7016 7017 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { 7018 if (!(get_alloc_profile(root, space_info->flags) & 7019 (BTRFS_BLOCK_GROUP_RAID10 | 7020 BTRFS_BLOCK_GROUP_RAID1 | 7021 BTRFS_BLOCK_GROUP_DUP))) 7022 continue; 7023 /* 7024 * avoid allocating from un-mirrored block group if there are 7025 * mirrored block groups. 7026 */ 7027 list_for_each_entry(cache, &space_info->block_groups[3], list) 7028 set_block_group_ro(cache); 7029 list_for_each_entry(cache, &space_info->block_groups[4], list) 7030 set_block_group_ro(cache); 7031 } 7032 7033 init_global_block_rsv(info); 7034 ret = 0; 7035 error: 7036 btrfs_free_path(path); 7037 return ret; 7038 } 7039 7040 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 7041 struct btrfs_root *root, u64 bytes_used, 7042 u64 type, u64 chunk_objectid, u64 chunk_offset, 7043 u64 size) 7044 { 7045 int ret; 7046 struct btrfs_root *extent_root; 7047 struct btrfs_block_group_cache *cache; 7048 7049 extent_root = root->fs_info->extent_root; 7050 7051 root->fs_info->last_trans_log_full_commit = trans->transid; 7052 7053 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7054 if (!cache) 7055 return -ENOMEM; 7056 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 7057 GFP_NOFS); 7058 if (!cache->free_space_ctl) { 7059 kfree(cache); 7060 return -ENOMEM; 7061 } 7062 7063 cache->key.objectid = chunk_offset; 7064 cache->key.offset = size; 7065 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7066 cache->sectorsize = root->sectorsize; 7067 cache->fs_info = root->fs_info; 7068 7069 atomic_set(&cache->count, 1); 7070 spin_lock_init(&cache->lock); 7071 INIT_LIST_HEAD(&cache->list); 7072 INIT_LIST_HEAD(&cache->cluster_list); 7073 7074 btrfs_init_free_space_ctl(cache); 7075 7076 btrfs_set_block_group_used(&cache->item, bytes_used); 7077 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7078 cache->flags = type; 7079 btrfs_set_block_group_flags(&cache->item, type); 7080 7081 cache->last_byte_to_unpin = (u64)-1; 7082 cache->cached = BTRFS_CACHE_FINISHED; 7083 exclude_super_stripes(root, cache); 7084 7085 add_new_free_space(cache, root->fs_info, chunk_offset, 7086 chunk_offset + size); 7087 7088 free_excluded_extents(root, cache); 7089 7090 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7091 &cache->space_info); 7092 BUG_ON(ret); 7093 7094 spin_lock(&cache->space_info->lock); 7095 cache->space_info->bytes_readonly += cache->bytes_super; 7096 spin_unlock(&cache->space_info->lock); 7097 7098 __link_block_group(cache->space_info, cache); 7099 7100 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7101 BUG_ON(ret); 7102 7103 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7104 sizeof(cache->item)); 7105 BUG_ON(ret); 7106 7107 set_avail_alloc_bits(extent_root->fs_info, type); 7108 7109 return 0; 7110 } 7111 7112 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 7113 struct btrfs_root *root, u64 group_start) 7114 { 7115 struct btrfs_path *path; 7116 struct btrfs_block_group_cache *block_group; 7117 struct btrfs_free_cluster *cluster; 7118 struct btrfs_root *tree_root = root->fs_info->tree_root; 7119 struct btrfs_key key; 7120 struct inode *inode; 7121 int ret; 7122 int factor; 7123 7124 root = root->fs_info->extent_root; 7125 7126 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 7127 BUG_ON(!block_group); 7128 BUG_ON(!block_group->ro); 7129 7130 /* 7131 * Free the reserved super bytes from this block group before 7132 * remove it. 7133 */ 7134 free_excluded_extents(root, block_group); 7135 7136 memcpy(&key, &block_group->key, sizeof(key)); 7137 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | 7138 BTRFS_BLOCK_GROUP_RAID1 | 7139 BTRFS_BLOCK_GROUP_RAID10)) 7140 factor = 2; 7141 else 7142 factor = 1; 7143 7144 /* make sure this block group isn't part of an allocation cluster */ 7145 cluster = &root->fs_info->data_alloc_cluster; 7146 spin_lock(&cluster->refill_lock); 7147 btrfs_return_cluster_to_free_space(block_group, cluster); 7148 spin_unlock(&cluster->refill_lock); 7149 7150 /* 7151 * make sure this block group isn't part of a metadata 7152 * allocation cluster 7153 */ 7154 cluster = &root->fs_info->meta_alloc_cluster; 7155 spin_lock(&cluster->refill_lock); 7156 btrfs_return_cluster_to_free_space(block_group, cluster); 7157 spin_unlock(&cluster->refill_lock); 7158 7159 path = btrfs_alloc_path(); 7160 BUG_ON(!path); 7161 7162 inode = lookup_free_space_inode(root, block_group, path); 7163 if (!IS_ERR(inode)) { 7164 btrfs_orphan_add(trans, inode); 7165 clear_nlink(inode); 7166 /* One for the block groups ref */ 7167 spin_lock(&block_group->lock); 7168 if (block_group->iref) { 7169 block_group->iref = 0; 7170 block_group->inode = NULL; 7171 spin_unlock(&block_group->lock); 7172 iput(inode); 7173 } else { 7174 spin_unlock(&block_group->lock); 7175 } 7176 /* One for our lookup ref */ 7177 iput(inode); 7178 } 7179 7180 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 7181 key.offset = block_group->key.objectid; 7182 key.type = 0; 7183 7184 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); 7185 if (ret < 0) 7186 goto out; 7187 if (ret > 0) 7188 btrfs_release_path(path); 7189 if (ret == 0) { 7190 ret = btrfs_del_item(trans, tree_root, path); 7191 if (ret) 7192 goto out; 7193 btrfs_release_path(path); 7194 } 7195 7196 spin_lock(&root->fs_info->block_group_cache_lock); 7197 rb_erase(&block_group->cache_node, 7198 &root->fs_info->block_group_cache_tree); 7199 spin_unlock(&root->fs_info->block_group_cache_lock); 7200 7201 down_write(&block_group->space_info->groups_sem); 7202 /* 7203 * we must use list_del_init so people can check to see if they 7204 * are still on the list after taking the semaphore 7205 */ 7206 list_del_init(&block_group->list); 7207 up_write(&block_group->space_info->groups_sem); 7208 7209 if (block_group->cached == BTRFS_CACHE_STARTED) 7210 wait_block_group_cache_done(block_group); 7211 7212 btrfs_remove_free_space_cache(block_group); 7213 7214 spin_lock(&block_group->space_info->lock); 7215 block_group->space_info->total_bytes -= block_group->key.offset; 7216 block_group->space_info->bytes_readonly -= block_group->key.offset; 7217 block_group->space_info->disk_total -= block_group->key.offset * factor; 7218 spin_unlock(&block_group->space_info->lock); 7219 7220 memcpy(&key, &block_group->key, sizeof(key)); 7221 7222 btrfs_clear_space_info_full(root->fs_info); 7223 7224 btrfs_put_block_group(block_group); 7225 btrfs_put_block_group(block_group); 7226 7227 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 7228 if (ret > 0) 7229 ret = -EIO; 7230 if (ret < 0) 7231 goto out; 7232 7233 ret = btrfs_del_item(trans, root, path); 7234 out: 7235 btrfs_free_path(path); 7236 return ret; 7237 } 7238 7239 int btrfs_init_space_info(struct btrfs_fs_info *fs_info) 7240 { 7241 struct btrfs_space_info *space_info; 7242 struct btrfs_super_block *disk_super; 7243 u64 features; 7244 u64 flags; 7245 int mixed = 0; 7246 int ret; 7247 7248 disk_super = &fs_info->super_copy; 7249 if (!btrfs_super_root(disk_super)) 7250 return 1; 7251 7252 features = btrfs_super_incompat_flags(disk_super); 7253 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 7254 mixed = 1; 7255 7256 flags = BTRFS_BLOCK_GROUP_SYSTEM; 7257 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7258 if (ret) 7259 goto out; 7260 7261 if (mixed) { 7262 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; 7263 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7264 } else { 7265 flags = BTRFS_BLOCK_GROUP_METADATA; 7266 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7267 if (ret) 7268 goto out; 7269 7270 flags = BTRFS_BLOCK_GROUP_DATA; 7271 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7272 } 7273 out: 7274 return ret; 7275 } 7276 7277 int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 7278 { 7279 return unpin_extent_range(root, start, end); 7280 } 7281 7282 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 7283 u64 num_bytes, u64 *actual_bytes) 7284 { 7285 return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); 7286 } 7287 7288 int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) 7289 { 7290 struct btrfs_fs_info *fs_info = root->fs_info; 7291 struct btrfs_block_group_cache *cache = NULL; 7292 u64 group_trimmed; 7293 u64 start; 7294 u64 end; 7295 u64 trimmed = 0; 7296 int ret = 0; 7297 7298 cache = btrfs_lookup_block_group(fs_info, range->start); 7299 7300 while (cache) { 7301 if (cache->key.objectid >= (range->start + range->len)) { 7302 btrfs_put_block_group(cache); 7303 break; 7304 } 7305 7306 start = max(range->start, cache->key.objectid); 7307 end = min(range->start + range->len, 7308 cache->key.objectid + cache->key.offset); 7309 7310 if (end - start >= range->minlen) { 7311 if (!block_group_cache_done(cache)) { 7312 ret = cache_block_group(cache, NULL, root, 0); 7313 if (!ret) 7314 wait_block_group_cache_done(cache); 7315 } 7316 ret = btrfs_trim_block_group(cache, 7317 &group_trimmed, 7318 start, 7319 end, 7320 range->minlen); 7321 7322 trimmed += group_trimmed; 7323 if (ret) { 7324 btrfs_put_block_group(cache); 7325 break; 7326 } 7327 } 7328 7329 cache = next_block_group(fs_info->tree_root, cache); 7330 } 7331 7332 range->len = trimmed; 7333 return ret; 7334 } 7335