1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include <linux/rcupdate.h> 24 #include <linux/kthread.h> 25 #include <linux/slab.h> 26 #include "compat.h" 27 #include "hash.h" 28 #include "ctree.h" 29 #include "disk-io.h" 30 #include "print-tree.h" 31 #include "transaction.h" 32 #include "volumes.h" 33 #include "locking.h" 34 #include "free-space-cache.h" 35 36 /* control flags for do_chunk_alloc's force field 37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk 38 * if we really need one. 39 * 40 * CHUNK_ALLOC_FORCE means it must try to allocate one 41 * 42 * CHUNK_ALLOC_LIMITED means to only try and allocate one 43 * if we have very few chunks already allocated. This is 44 * used as part of the clustering code to help make sure 45 * we have a good pool of storage to cluster in, without 46 * filling the FS with empty chunks 47 * 48 */ 49 enum { 50 CHUNK_ALLOC_NO_FORCE = 0, 51 CHUNK_ALLOC_FORCE = 1, 52 CHUNK_ALLOC_LIMITED = 2, 53 }; 54 55 static int update_block_group(struct btrfs_trans_handle *trans, 56 struct btrfs_root *root, 57 u64 bytenr, u64 num_bytes, int alloc); 58 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 59 struct btrfs_root *root, 60 u64 bytenr, u64 num_bytes, u64 parent, 61 u64 root_objectid, u64 owner_objectid, 62 u64 owner_offset, int refs_to_drop, 63 struct btrfs_delayed_extent_op *extra_op); 64 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 65 struct extent_buffer *leaf, 66 struct btrfs_extent_item *ei); 67 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 68 struct btrfs_root *root, 69 u64 parent, u64 root_objectid, 70 u64 flags, u64 owner, u64 offset, 71 struct btrfs_key *ins, int ref_mod); 72 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 73 struct btrfs_root *root, 74 u64 parent, u64 root_objectid, 75 u64 flags, struct btrfs_disk_key *key, 76 int level, struct btrfs_key *ins); 77 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 78 struct btrfs_root *extent_root, u64 alloc_bytes, 79 u64 flags, int force); 80 static int find_next_key(struct btrfs_path *path, int level, 81 struct btrfs_key *key); 82 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 83 int dump_block_groups); 84 85 static noinline int 86 block_group_cache_done(struct btrfs_block_group_cache *cache) 87 { 88 smp_mb(); 89 return cache->cached == BTRFS_CACHE_FINISHED; 90 } 91 92 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 93 { 94 return (cache->flags & bits) == bits; 95 } 96 97 static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 98 { 99 atomic_inc(&cache->count); 100 } 101 102 void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 103 { 104 if (atomic_dec_and_test(&cache->count)) { 105 WARN_ON(cache->pinned > 0); 106 WARN_ON(cache->reserved > 0); 107 WARN_ON(cache->reserved_pinned > 0); 108 kfree(cache->free_space_ctl); 109 kfree(cache); 110 } 111 } 112 113 /* 114 * this adds the block group to the fs_info rb tree for the block group 115 * cache 116 */ 117 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 118 struct btrfs_block_group_cache *block_group) 119 { 120 struct rb_node **p; 121 struct rb_node *parent = NULL; 122 struct btrfs_block_group_cache *cache; 123 124 spin_lock(&info->block_group_cache_lock); 125 p = &info->block_group_cache_tree.rb_node; 126 127 while (*p) { 128 parent = *p; 129 cache = rb_entry(parent, struct btrfs_block_group_cache, 130 cache_node); 131 if (block_group->key.objectid < cache->key.objectid) { 132 p = &(*p)->rb_left; 133 } else if (block_group->key.objectid > cache->key.objectid) { 134 p = &(*p)->rb_right; 135 } else { 136 spin_unlock(&info->block_group_cache_lock); 137 return -EEXIST; 138 } 139 } 140 141 rb_link_node(&block_group->cache_node, parent, p); 142 rb_insert_color(&block_group->cache_node, 143 &info->block_group_cache_tree); 144 spin_unlock(&info->block_group_cache_lock); 145 146 return 0; 147 } 148 149 /* 150 * This will return the block group at or after bytenr if contains is 0, else 151 * it will return the block group that contains the bytenr 152 */ 153 static struct btrfs_block_group_cache * 154 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 155 int contains) 156 { 157 struct btrfs_block_group_cache *cache, *ret = NULL; 158 struct rb_node *n; 159 u64 end, start; 160 161 spin_lock(&info->block_group_cache_lock); 162 n = info->block_group_cache_tree.rb_node; 163 164 while (n) { 165 cache = rb_entry(n, struct btrfs_block_group_cache, 166 cache_node); 167 end = cache->key.objectid + cache->key.offset - 1; 168 start = cache->key.objectid; 169 170 if (bytenr < start) { 171 if (!contains && (!ret || start < ret->key.objectid)) 172 ret = cache; 173 n = n->rb_left; 174 } else if (bytenr > start) { 175 if (contains && bytenr <= end) { 176 ret = cache; 177 break; 178 } 179 n = n->rb_right; 180 } else { 181 ret = cache; 182 break; 183 } 184 } 185 if (ret) 186 btrfs_get_block_group(ret); 187 spin_unlock(&info->block_group_cache_lock); 188 189 return ret; 190 } 191 192 static int add_excluded_extent(struct btrfs_root *root, 193 u64 start, u64 num_bytes) 194 { 195 u64 end = start + num_bytes - 1; 196 set_extent_bits(&root->fs_info->freed_extents[0], 197 start, end, EXTENT_UPTODATE, GFP_NOFS); 198 set_extent_bits(&root->fs_info->freed_extents[1], 199 start, end, EXTENT_UPTODATE, GFP_NOFS); 200 return 0; 201 } 202 203 static void free_excluded_extents(struct btrfs_root *root, 204 struct btrfs_block_group_cache *cache) 205 { 206 u64 start, end; 207 208 start = cache->key.objectid; 209 end = start + cache->key.offset - 1; 210 211 clear_extent_bits(&root->fs_info->freed_extents[0], 212 start, end, EXTENT_UPTODATE, GFP_NOFS); 213 clear_extent_bits(&root->fs_info->freed_extents[1], 214 start, end, EXTENT_UPTODATE, GFP_NOFS); 215 } 216 217 static int exclude_super_stripes(struct btrfs_root *root, 218 struct btrfs_block_group_cache *cache) 219 { 220 u64 bytenr; 221 u64 *logical; 222 int stripe_len; 223 int i, nr, ret; 224 225 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { 226 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; 227 cache->bytes_super += stripe_len; 228 ret = add_excluded_extent(root, cache->key.objectid, 229 stripe_len); 230 BUG_ON(ret); 231 } 232 233 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 234 bytenr = btrfs_sb_offset(i); 235 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 236 cache->key.objectid, bytenr, 237 0, &logical, &nr, &stripe_len); 238 BUG_ON(ret); 239 240 while (nr--) { 241 cache->bytes_super += stripe_len; 242 ret = add_excluded_extent(root, logical[nr], 243 stripe_len); 244 BUG_ON(ret); 245 } 246 247 kfree(logical); 248 } 249 return 0; 250 } 251 252 static struct btrfs_caching_control * 253 get_caching_control(struct btrfs_block_group_cache *cache) 254 { 255 struct btrfs_caching_control *ctl; 256 257 spin_lock(&cache->lock); 258 if (cache->cached != BTRFS_CACHE_STARTED) { 259 spin_unlock(&cache->lock); 260 return NULL; 261 } 262 263 /* We're loading it the fast way, so we don't have a caching_ctl. */ 264 if (!cache->caching_ctl) { 265 spin_unlock(&cache->lock); 266 return NULL; 267 } 268 269 ctl = cache->caching_ctl; 270 atomic_inc(&ctl->count); 271 spin_unlock(&cache->lock); 272 return ctl; 273 } 274 275 static void put_caching_control(struct btrfs_caching_control *ctl) 276 { 277 if (atomic_dec_and_test(&ctl->count)) 278 kfree(ctl); 279 } 280 281 /* 282 * this is only called by cache_block_group, since we could have freed extents 283 * we need to check the pinned_extents for any extents that can't be used yet 284 * since their free space will be released as soon as the transaction commits. 285 */ 286 static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 287 struct btrfs_fs_info *info, u64 start, u64 end) 288 { 289 u64 extent_start, extent_end, size, total_added = 0; 290 int ret; 291 292 while (start < end) { 293 ret = find_first_extent_bit(info->pinned_extents, start, 294 &extent_start, &extent_end, 295 EXTENT_DIRTY | EXTENT_UPTODATE); 296 if (ret) 297 break; 298 299 if (extent_start <= start) { 300 start = extent_end + 1; 301 } else if (extent_start > start && extent_start < end) { 302 size = extent_start - start; 303 total_added += size; 304 ret = btrfs_add_free_space(block_group, start, 305 size); 306 BUG_ON(ret); 307 start = extent_end + 1; 308 } else { 309 break; 310 } 311 } 312 313 if (start < end) { 314 size = end - start; 315 total_added += size; 316 ret = btrfs_add_free_space(block_group, start, size); 317 BUG_ON(ret); 318 } 319 320 return total_added; 321 } 322 323 static int caching_kthread(void *data) 324 { 325 struct btrfs_block_group_cache *block_group = data; 326 struct btrfs_fs_info *fs_info = block_group->fs_info; 327 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; 328 struct btrfs_root *extent_root = fs_info->extent_root; 329 struct btrfs_path *path; 330 struct extent_buffer *leaf; 331 struct btrfs_key key; 332 u64 total_found = 0; 333 u64 last = 0; 334 u32 nritems; 335 int ret = 0; 336 337 path = btrfs_alloc_path(); 338 if (!path) 339 return -ENOMEM; 340 341 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 342 343 /* 344 * We don't want to deadlock with somebody trying to allocate a new 345 * extent for the extent root while also trying to search the extent 346 * root to add free space. So we skip locking and search the commit 347 * root, since its read-only 348 */ 349 path->skip_locking = 1; 350 path->search_commit_root = 1; 351 path->reada = 1; 352 353 key.objectid = last; 354 key.offset = 0; 355 key.type = BTRFS_EXTENT_ITEM_KEY; 356 again: 357 mutex_lock(&caching_ctl->mutex); 358 /* need to make sure the commit_root doesn't disappear */ 359 down_read(&fs_info->extent_commit_sem); 360 361 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 362 if (ret < 0) 363 goto err; 364 365 leaf = path->nodes[0]; 366 nritems = btrfs_header_nritems(leaf); 367 368 while (1) { 369 if (btrfs_fs_closing(fs_info) > 1) { 370 last = (u64)-1; 371 break; 372 } 373 374 if (path->slots[0] < nritems) { 375 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 376 } else { 377 ret = find_next_key(path, 0, &key); 378 if (ret) 379 break; 380 381 if (need_resched() || 382 btrfs_next_leaf(extent_root, path)) { 383 caching_ctl->progress = last; 384 btrfs_release_path(path); 385 up_read(&fs_info->extent_commit_sem); 386 mutex_unlock(&caching_ctl->mutex); 387 cond_resched(); 388 goto again; 389 } 390 leaf = path->nodes[0]; 391 nritems = btrfs_header_nritems(leaf); 392 continue; 393 } 394 395 if (key.objectid < block_group->key.objectid) { 396 path->slots[0]++; 397 continue; 398 } 399 400 if (key.objectid >= block_group->key.objectid + 401 block_group->key.offset) 402 break; 403 404 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 405 total_found += add_new_free_space(block_group, 406 fs_info, last, 407 key.objectid); 408 last = key.objectid + key.offset; 409 410 if (total_found > (1024 * 1024 * 2)) { 411 total_found = 0; 412 wake_up(&caching_ctl->wait); 413 } 414 } 415 path->slots[0]++; 416 } 417 ret = 0; 418 419 total_found += add_new_free_space(block_group, fs_info, last, 420 block_group->key.objectid + 421 block_group->key.offset); 422 caching_ctl->progress = (u64)-1; 423 424 spin_lock(&block_group->lock); 425 block_group->caching_ctl = NULL; 426 block_group->cached = BTRFS_CACHE_FINISHED; 427 spin_unlock(&block_group->lock); 428 429 err: 430 btrfs_free_path(path); 431 up_read(&fs_info->extent_commit_sem); 432 433 free_excluded_extents(extent_root, block_group); 434 435 mutex_unlock(&caching_ctl->mutex); 436 wake_up(&caching_ctl->wait); 437 438 put_caching_control(caching_ctl); 439 atomic_dec(&block_group->space_info->caching_threads); 440 btrfs_put_block_group(block_group); 441 442 return 0; 443 } 444 445 static int cache_block_group(struct btrfs_block_group_cache *cache, 446 struct btrfs_trans_handle *trans, 447 struct btrfs_root *root, 448 int load_cache_only) 449 { 450 struct btrfs_fs_info *fs_info = cache->fs_info; 451 struct btrfs_caching_control *caching_ctl; 452 struct task_struct *tsk; 453 int ret = 0; 454 455 smp_mb(); 456 if (cache->cached != BTRFS_CACHE_NO) 457 return 0; 458 459 /* 460 * We can't do the read from on-disk cache during a commit since we need 461 * to have the normal tree locking. Also if we are currently trying to 462 * allocate blocks for the tree root we can't do the fast caching since 463 * we likely hold important locks. 464 */ 465 if (trans && (!trans->transaction->in_commit) && 466 (root && root != root->fs_info->tree_root)) { 467 spin_lock(&cache->lock); 468 if (cache->cached != BTRFS_CACHE_NO) { 469 spin_unlock(&cache->lock); 470 return 0; 471 } 472 cache->cached = BTRFS_CACHE_STARTED; 473 spin_unlock(&cache->lock); 474 475 ret = load_free_space_cache(fs_info, cache); 476 477 spin_lock(&cache->lock); 478 if (ret == 1) { 479 cache->cached = BTRFS_CACHE_FINISHED; 480 cache->last_byte_to_unpin = (u64)-1; 481 } else { 482 cache->cached = BTRFS_CACHE_NO; 483 } 484 spin_unlock(&cache->lock); 485 if (ret == 1) { 486 free_excluded_extents(fs_info->extent_root, cache); 487 return 0; 488 } 489 } 490 491 if (load_cache_only) 492 return 0; 493 494 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); 495 BUG_ON(!caching_ctl); 496 497 INIT_LIST_HEAD(&caching_ctl->list); 498 mutex_init(&caching_ctl->mutex); 499 init_waitqueue_head(&caching_ctl->wait); 500 caching_ctl->block_group = cache; 501 caching_ctl->progress = cache->key.objectid; 502 /* one for caching kthread, one for caching block group list */ 503 atomic_set(&caching_ctl->count, 2); 504 505 spin_lock(&cache->lock); 506 if (cache->cached != BTRFS_CACHE_NO) { 507 spin_unlock(&cache->lock); 508 kfree(caching_ctl); 509 return 0; 510 } 511 cache->caching_ctl = caching_ctl; 512 cache->cached = BTRFS_CACHE_STARTED; 513 spin_unlock(&cache->lock); 514 515 down_write(&fs_info->extent_commit_sem); 516 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 517 up_write(&fs_info->extent_commit_sem); 518 519 atomic_inc(&cache->space_info->caching_threads); 520 btrfs_get_block_group(cache); 521 522 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 523 cache->key.objectid); 524 if (IS_ERR(tsk)) { 525 ret = PTR_ERR(tsk); 526 printk(KERN_ERR "error running thread %d\n", ret); 527 BUG(); 528 } 529 530 return ret; 531 } 532 533 /* 534 * return the block group that starts at or after bytenr 535 */ 536 static struct btrfs_block_group_cache * 537 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 538 { 539 struct btrfs_block_group_cache *cache; 540 541 cache = block_group_cache_tree_search(info, bytenr, 0); 542 543 return cache; 544 } 545 546 /* 547 * return the block group that contains the given bytenr 548 */ 549 struct btrfs_block_group_cache *btrfs_lookup_block_group( 550 struct btrfs_fs_info *info, 551 u64 bytenr) 552 { 553 struct btrfs_block_group_cache *cache; 554 555 cache = block_group_cache_tree_search(info, bytenr, 1); 556 557 return cache; 558 } 559 560 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 561 u64 flags) 562 { 563 struct list_head *head = &info->space_info; 564 struct btrfs_space_info *found; 565 566 flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | 567 BTRFS_BLOCK_GROUP_METADATA; 568 569 rcu_read_lock(); 570 list_for_each_entry_rcu(found, head, list) { 571 if (found->flags & flags) { 572 rcu_read_unlock(); 573 return found; 574 } 575 } 576 rcu_read_unlock(); 577 return NULL; 578 } 579 580 /* 581 * after adding space to the filesystem, we need to clear the full flags 582 * on all the space infos. 583 */ 584 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 585 { 586 struct list_head *head = &info->space_info; 587 struct btrfs_space_info *found; 588 589 rcu_read_lock(); 590 list_for_each_entry_rcu(found, head, list) 591 found->full = 0; 592 rcu_read_unlock(); 593 } 594 595 static u64 div_factor(u64 num, int factor) 596 { 597 if (factor == 10) 598 return num; 599 num *= factor; 600 do_div(num, 10); 601 return num; 602 } 603 604 static u64 div_factor_fine(u64 num, int factor) 605 { 606 if (factor == 100) 607 return num; 608 num *= factor; 609 do_div(num, 100); 610 return num; 611 } 612 613 u64 btrfs_find_block_group(struct btrfs_root *root, 614 u64 search_start, u64 search_hint, int owner) 615 { 616 struct btrfs_block_group_cache *cache; 617 u64 used; 618 u64 last = max(search_hint, search_start); 619 u64 group_start = 0; 620 int full_search = 0; 621 int factor = 9; 622 int wrapped = 0; 623 again: 624 while (1) { 625 cache = btrfs_lookup_first_block_group(root->fs_info, last); 626 if (!cache) 627 break; 628 629 spin_lock(&cache->lock); 630 last = cache->key.objectid + cache->key.offset; 631 used = btrfs_block_group_used(&cache->item); 632 633 if ((full_search || !cache->ro) && 634 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 635 if (used + cache->pinned + cache->reserved < 636 div_factor(cache->key.offset, factor)) { 637 group_start = cache->key.objectid; 638 spin_unlock(&cache->lock); 639 btrfs_put_block_group(cache); 640 goto found; 641 } 642 } 643 spin_unlock(&cache->lock); 644 btrfs_put_block_group(cache); 645 cond_resched(); 646 } 647 if (!wrapped) { 648 last = search_start; 649 wrapped = 1; 650 goto again; 651 } 652 if (!full_search && factor < 10) { 653 last = search_start; 654 full_search = 1; 655 factor = 10; 656 goto again; 657 } 658 found: 659 return group_start; 660 } 661 662 /* simple helper to search for an existing extent at a given offset */ 663 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 664 { 665 int ret; 666 struct btrfs_key key; 667 struct btrfs_path *path; 668 669 path = btrfs_alloc_path(); 670 BUG_ON(!path); 671 key.objectid = start; 672 key.offset = len; 673 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 674 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 675 0, 0); 676 btrfs_free_path(path); 677 return ret; 678 } 679 680 /* 681 * helper function to lookup reference count and flags of extent. 682 * 683 * the head node for delayed ref is used to store the sum of all the 684 * reference count modifications queued up in the rbtree. the head 685 * node may also store the extent flags to set. This way you can check 686 * to see what the reference count and extent flags would be if all of 687 * the delayed refs are not processed. 688 */ 689 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 690 struct btrfs_root *root, u64 bytenr, 691 u64 num_bytes, u64 *refs, u64 *flags) 692 { 693 struct btrfs_delayed_ref_head *head; 694 struct btrfs_delayed_ref_root *delayed_refs; 695 struct btrfs_path *path; 696 struct btrfs_extent_item *ei; 697 struct extent_buffer *leaf; 698 struct btrfs_key key; 699 u32 item_size; 700 u64 num_refs; 701 u64 extent_flags; 702 int ret; 703 704 path = btrfs_alloc_path(); 705 if (!path) 706 return -ENOMEM; 707 708 key.objectid = bytenr; 709 key.type = BTRFS_EXTENT_ITEM_KEY; 710 key.offset = num_bytes; 711 if (!trans) { 712 path->skip_locking = 1; 713 path->search_commit_root = 1; 714 } 715 again: 716 ret = btrfs_search_slot(trans, root->fs_info->extent_root, 717 &key, path, 0, 0); 718 if (ret < 0) 719 goto out_free; 720 721 if (ret == 0) { 722 leaf = path->nodes[0]; 723 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 724 if (item_size >= sizeof(*ei)) { 725 ei = btrfs_item_ptr(leaf, path->slots[0], 726 struct btrfs_extent_item); 727 num_refs = btrfs_extent_refs(leaf, ei); 728 extent_flags = btrfs_extent_flags(leaf, ei); 729 } else { 730 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 731 struct btrfs_extent_item_v0 *ei0; 732 BUG_ON(item_size != sizeof(*ei0)); 733 ei0 = btrfs_item_ptr(leaf, path->slots[0], 734 struct btrfs_extent_item_v0); 735 num_refs = btrfs_extent_refs_v0(leaf, ei0); 736 /* FIXME: this isn't correct for data */ 737 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 738 #else 739 BUG(); 740 #endif 741 } 742 BUG_ON(num_refs == 0); 743 } else { 744 num_refs = 0; 745 extent_flags = 0; 746 ret = 0; 747 } 748 749 if (!trans) 750 goto out; 751 752 delayed_refs = &trans->transaction->delayed_refs; 753 spin_lock(&delayed_refs->lock); 754 head = btrfs_find_delayed_ref_head(trans, bytenr); 755 if (head) { 756 if (!mutex_trylock(&head->mutex)) { 757 atomic_inc(&head->node.refs); 758 spin_unlock(&delayed_refs->lock); 759 760 btrfs_release_path(path); 761 762 /* 763 * Mutex was contended, block until it's released and try 764 * again 765 */ 766 mutex_lock(&head->mutex); 767 mutex_unlock(&head->mutex); 768 btrfs_put_delayed_ref(&head->node); 769 goto again; 770 } 771 if (head->extent_op && head->extent_op->update_flags) 772 extent_flags |= head->extent_op->flags_to_set; 773 else 774 BUG_ON(num_refs == 0); 775 776 num_refs += head->node.ref_mod; 777 mutex_unlock(&head->mutex); 778 } 779 spin_unlock(&delayed_refs->lock); 780 out: 781 WARN_ON(num_refs == 0); 782 if (refs) 783 *refs = num_refs; 784 if (flags) 785 *flags = extent_flags; 786 out_free: 787 btrfs_free_path(path); 788 return ret; 789 } 790 791 /* 792 * Back reference rules. Back refs have three main goals: 793 * 794 * 1) differentiate between all holders of references to an extent so that 795 * when a reference is dropped we can make sure it was a valid reference 796 * before freeing the extent. 797 * 798 * 2) Provide enough information to quickly find the holders of an extent 799 * if we notice a given block is corrupted or bad. 800 * 801 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 802 * maintenance. This is actually the same as #2, but with a slightly 803 * different use case. 804 * 805 * There are two kinds of back refs. The implicit back refs is optimized 806 * for pointers in non-shared tree blocks. For a given pointer in a block, 807 * back refs of this kind provide information about the block's owner tree 808 * and the pointer's key. These information allow us to find the block by 809 * b-tree searching. The full back refs is for pointers in tree blocks not 810 * referenced by their owner trees. The location of tree block is recorded 811 * in the back refs. Actually the full back refs is generic, and can be 812 * used in all cases the implicit back refs is used. The major shortcoming 813 * of the full back refs is its overhead. Every time a tree block gets 814 * COWed, we have to update back refs entry for all pointers in it. 815 * 816 * For a newly allocated tree block, we use implicit back refs for 817 * pointers in it. This means most tree related operations only involve 818 * implicit back refs. For a tree block created in old transaction, the 819 * only way to drop a reference to it is COW it. So we can detect the 820 * event that tree block loses its owner tree's reference and do the 821 * back refs conversion. 822 * 823 * When a tree block is COW'd through a tree, there are four cases: 824 * 825 * The reference count of the block is one and the tree is the block's 826 * owner tree. Nothing to do in this case. 827 * 828 * The reference count of the block is one and the tree is not the 829 * block's owner tree. In this case, full back refs is used for pointers 830 * in the block. Remove these full back refs, add implicit back refs for 831 * every pointers in the new block. 832 * 833 * The reference count of the block is greater than one and the tree is 834 * the block's owner tree. In this case, implicit back refs is used for 835 * pointers in the block. Add full back refs for every pointers in the 836 * block, increase lower level extents' reference counts. The original 837 * implicit back refs are entailed to the new block. 838 * 839 * The reference count of the block is greater than one and the tree is 840 * not the block's owner tree. Add implicit back refs for every pointer in 841 * the new block, increase lower level extents' reference count. 842 * 843 * Back Reference Key composing: 844 * 845 * The key objectid corresponds to the first byte in the extent, 846 * The key type is used to differentiate between types of back refs. 847 * There are different meanings of the key offset for different types 848 * of back refs. 849 * 850 * File extents can be referenced by: 851 * 852 * - multiple snapshots, subvolumes, or different generations in one subvol 853 * - different files inside a single subvolume 854 * - different offsets inside a file (bookend extents in file.c) 855 * 856 * The extent ref structure for the implicit back refs has fields for: 857 * 858 * - Objectid of the subvolume root 859 * - objectid of the file holding the reference 860 * - original offset in the file 861 * - how many bookend extents 862 * 863 * The key offset for the implicit back refs is hash of the first 864 * three fields. 865 * 866 * The extent ref structure for the full back refs has field for: 867 * 868 * - number of pointers in the tree leaf 869 * 870 * The key offset for the implicit back refs is the first byte of 871 * the tree leaf 872 * 873 * When a file extent is allocated, The implicit back refs is used. 874 * the fields are filled in: 875 * 876 * (root_key.objectid, inode objectid, offset in file, 1) 877 * 878 * When a file extent is removed file truncation, we find the 879 * corresponding implicit back refs and check the following fields: 880 * 881 * (btrfs_header_owner(leaf), inode objectid, offset in file) 882 * 883 * Btree extents can be referenced by: 884 * 885 * - Different subvolumes 886 * 887 * Both the implicit back refs and the full back refs for tree blocks 888 * only consist of key. The key offset for the implicit back refs is 889 * objectid of block's owner tree. The key offset for the full back refs 890 * is the first byte of parent block. 891 * 892 * When implicit back refs is used, information about the lowest key and 893 * level of the tree block are required. These information are stored in 894 * tree block info structure. 895 */ 896 897 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 898 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 899 struct btrfs_root *root, 900 struct btrfs_path *path, 901 u64 owner, u32 extra_size) 902 { 903 struct btrfs_extent_item *item; 904 struct btrfs_extent_item_v0 *ei0; 905 struct btrfs_extent_ref_v0 *ref0; 906 struct btrfs_tree_block_info *bi; 907 struct extent_buffer *leaf; 908 struct btrfs_key key; 909 struct btrfs_key found_key; 910 u32 new_size = sizeof(*item); 911 u64 refs; 912 int ret; 913 914 leaf = path->nodes[0]; 915 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 916 917 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 918 ei0 = btrfs_item_ptr(leaf, path->slots[0], 919 struct btrfs_extent_item_v0); 920 refs = btrfs_extent_refs_v0(leaf, ei0); 921 922 if (owner == (u64)-1) { 923 while (1) { 924 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 925 ret = btrfs_next_leaf(root, path); 926 if (ret < 0) 927 return ret; 928 BUG_ON(ret > 0); 929 leaf = path->nodes[0]; 930 } 931 btrfs_item_key_to_cpu(leaf, &found_key, 932 path->slots[0]); 933 BUG_ON(key.objectid != found_key.objectid); 934 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 935 path->slots[0]++; 936 continue; 937 } 938 ref0 = btrfs_item_ptr(leaf, path->slots[0], 939 struct btrfs_extent_ref_v0); 940 owner = btrfs_ref_objectid_v0(leaf, ref0); 941 break; 942 } 943 } 944 btrfs_release_path(path); 945 946 if (owner < BTRFS_FIRST_FREE_OBJECTID) 947 new_size += sizeof(*bi); 948 949 new_size -= sizeof(*ei0); 950 ret = btrfs_search_slot(trans, root, &key, path, 951 new_size + extra_size, 1); 952 if (ret < 0) 953 return ret; 954 BUG_ON(ret); 955 956 ret = btrfs_extend_item(trans, root, path, new_size); 957 958 leaf = path->nodes[0]; 959 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 960 btrfs_set_extent_refs(leaf, item, refs); 961 /* FIXME: get real generation */ 962 btrfs_set_extent_generation(leaf, item, 0); 963 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 964 btrfs_set_extent_flags(leaf, item, 965 BTRFS_EXTENT_FLAG_TREE_BLOCK | 966 BTRFS_BLOCK_FLAG_FULL_BACKREF); 967 bi = (struct btrfs_tree_block_info *)(item + 1); 968 /* FIXME: get first key of the block */ 969 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); 970 btrfs_set_tree_block_level(leaf, bi, (int)owner); 971 } else { 972 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 973 } 974 btrfs_mark_buffer_dirty(leaf); 975 return 0; 976 } 977 #endif 978 979 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 980 { 981 u32 high_crc = ~(u32)0; 982 u32 low_crc = ~(u32)0; 983 __le64 lenum; 984 985 lenum = cpu_to_le64(root_objectid); 986 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 987 lenum = cpu_to_le64(owner); 988 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 989 lenum = cpu_to_le64(offset); 990 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 991 992 return ((u64)high_crc << 31) ^ (u64)low_crc; 993 } 994 995 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 996 struct btrfs_extent_data_ref *ref) 997 { 998 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 999 btrfs_extent_data_ref_objectid(leaf, ref), 1000 btrfs_extent_data_ref_offset(leaf, ref)); 1001 } 1002 1003 static int match_extent_data_ref(struct extent_buffer *leaf, 1004 struct btrfs_extent_data_ref *ref, 1005 u64 root_objectid, u64 owner, u64 offset) 1006 { 1007 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 1008 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 1009 btrfs_extent_data_ref_offset(leaf, ref) != offset) 1010 return 0; 1011 return 1; 1012 } 1013 1014 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 1015 struct btrfs_root *root, 1016 struct btrfs_path *path, 1017 u64 bytenr, u64 parent, 1018 u64 root_objectid, 1019 u64 owner, u64 offset) 1020 { 1021 struct btrfs_key key; 1022 struct btrfs_extent_data_ref *ref; 1023 struct extent_buffer *leaf; 1024 u32 nritems; 1025 int ret; 1026 int recow; 1027 int err = -ENOENT; 1028 1029 key.objectid = bytenr; 1030 if (parent) { 1031 key.type = BTRFS_SHARED_DATA_REF_KEY; 1032 key.offset = parent; 1033 } else { 1034 key.type = BTRFS_EXTENT_DATA_REF_KEY; 1035 key.offset = hash_extent_data_ref(root_objectid, 1036 owner, offset); 1037 } 1038 again: 1039 recow = 0; 1040 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1041 if (ret < 0) { 1042 err = ret; 1043 goto fail; 1044 } 1045 1046 if (parent) { 1047 if (!ret) 1048 return 0; 1049 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1050 key.type = BTRFS_EXTENT_REF_V0_KEY; 1051 btrfs_release_path(path); 1052 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1053 if (ret < 0) { 1054 err = ret; 1055 goto fail; 1056 } 1057 if (!ret) 1058 return 0; 1059 #endif 1060 goto fail; 1061 } 1062 1063 leaf = path->nodes[0]; 1064 nritems = btrfs_header_nritems(leaf); 1065 while (1) { 1066 if (path->slots[0] >= nritems) { 1067 ret = btrfs_next_leaf(root, path); 1068 if (ret < 0) 1069 err = ret; 1070 if (ret) 1071 goto fail; 1072 1073 leaf = path->nodes[0]; 1074 nritems = btrfs_header_nritems(leaf); 1075 recow = 1; 1076 } 1077 1078 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1079 if (key.objectid != bytenr || 1080 key.type != BTRFS_EXTENT_DATA_REF_KEY) 1081 goto fail; 1082 1083 ref = btrfs_item_ptr(leaf, path->slots[0], 1084 struct btrfs_extent_data_ref); 1085 1086 if (match_extent_data_ref(leaf, ref, root_objectid, 1087 owner, offset)) { 1088 if (recow) { 1089 btrfs_release_path(path); 1090 goto again; 1091 } 1092 err = 0; 1093 break; 1094 } 1095 path->slots[0]++; 1096 } 1097 fail: 1098 return err; 1099 } 1100 1101 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 1102 struct btrfs_root *root, 1103 struct btrfs_path *path, 1104 u64 bytenr, u64 parent, 1105 u64 root_objectid, u64 owner, 1106 u64 offset, int refs_to_add) 1107 { 1108 struct btrfs_key key; 1109 struct extent_buffer *leaf; 1110 u32 size; 1111 u32 num_refs; 1112 int ret; 1113 1114 key.objectid = bytenr; 1115 if (parent) { 1116 key.type = BTRFS_SHARED_DATA_REF_KEY; 1117 key.offset = parent; 1118 size = sizeof(struct btrfs_shared_data_ref); 1119 } else { 1120 key.type = BTRFS_EXTENT_DATA_REF_KEY; 1121 key.offset = hash_extent_data_ref(root_objectid, 1122 owner, offset); 1123 size = sizeof(struct btrfs_extent_data_ref); 1124 } 1125 1126 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 1127 if (ret && ret != -EEXIST) 1128 goto fail; 1129 1130 leaf = path->nodes[0]; 1131 if (parent) { 1132 struct btrfs_shared_data_ref *ref; 1133 ref = btrfs_item_ptr(leaf, path->slots[0], 1134 struct btrfs_shared_data_ref); 1135 if (ret == 0) { 1136 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 1137 } else { 1138 num_refs = btrfs_shared_data_ref_count(leaf, ref); 1139 num_refs += refs_to_add; 1140 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 1141 } 1142 } else { 1143 struct btrfs_extent_data_ref *ref; 1144 while (ret == -EEXIST) { 1145 ref = btrfs_item_ptr(leaf, path->slots[0], 1146 struct btrfs_extent_data_ref); 1147 if (match_extent_data_ref(leaf, ref, root_objectid, 1148 owner, offset)) 1149 break; 1150 btrfs_release_path(path); 1151 key.offset++; 1152 ret = btrfs_insert_empty_item(trans, root, path, &key, 1153 size); 1154 if (ret && ret != -EEXIST) 1155 goto fail; 1156 1157 leaf = path->nodes[0]; 1158 } 1159 ref = btrfs_item_ptr(leaf, path->slots[0], 1160 struct btrfs_extent_data_ref); 1161 if (ret == 0) { 1162 btrfs_set_extent_data_ref_root(leaf, ref, 1163 root_objectid); 1164 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 1165 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 1166 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 1167 } else { 1168 num_refs = btrfs_extent_data_ref_count(leaf, ref); 1169 num_refs += refs_to_add; 1170 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 1171 } 1172 } 1173 btrfs_mark_buffer_dirty(leaf); 1174 ret = 0; 1175 fail: 1176 btrfs_release_path(path); 1177 return ret; 1178 } 1179 1180 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 1181 struct btrfs_root *root, 1182 struct btrfs_path *path, 1183 int refs_to_drop) 1184 { 1185 struct btrfs_key key; 1186 struct btrfs_extent_data_ref *ref1 = NULL; 1187 struct btrfs_shared_data_ref *ref2 = NULL; 1188 struct extent_buffer *leaf; 1189 u32 num_refs = 0; 1190 int ret = 0; 1191 1192 leaf = path->nodes[0]; 1193 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1194 1195 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1196 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1197 struct btrfs_extent_data_ref); 1198 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1199 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1200 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1201 struct btrfs_shared_data_ref); 1202 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1203 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1204 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1205 struct btrfs_extent_ref_v0 *ref0; 1206 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1207 struct btrfs_extent_ref_v0); 1208 num_refs = btrfs_ref_count_v0(leaf, ref0); 1209 #endif 1210 } else { 1211 BUG(); 1212 } 1213 1214 BUG_ON(num_refs < refs_to_drop); 1215 num_refs -= refs_to_drop; 1216 1217 if (num_refs == 0) { 1218 ret = btrfs_del_item(trans, root, path); 1219 } else { 1220 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1221 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 1222 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 1223 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 1224 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1225 else { 1226 struct btrfs_extent_ref_v0 *ref0; 1227 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1228 struct btrfs_extent_ref_v0); 1229 btrfs_set_ref_count_v0(leaf, ref0, num_refs); 1230 } 1231 #endif 1232 btrfs_mark_buffer_dirty(leaf); 1233 } 1234 return ret; 1235 } 1236 1237 static noinline u32 extent_data_ref_count(struct btrfs_root *root, 1238 struct btrfs_path *path, 1239 struct btrfs_extent_inline_ref *iref) 1240 { 1241 struct btrfs_key key; 1242 struct extent_buffer *leaf; 1243 struct btrfs_extent_data_ref *ref1; 1244 struct btrfs_shared_data_ref *ref2; 1245 u32 num_refs = 0; 1246 1247 leaf = path->nodes[0]; 1248 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1249 if (iref) { 1250 if (btrfs_extent_inline_ref_type(leaf, iref) == 1251 BTRFS_EXTENT_DATA_REF_KEY) { 1252 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 1253 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1254 } else { 1255 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 1256 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1257 } 1258 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1259 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1260 struct btrfs_extent_data_ref); 1261 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1262 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1263 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1264 struct btrfs_shared_data_ref); 1265 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1266 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1267 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1268 struct btrfs_extent_ref_v0 *ref0; 1269 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1270 struct btrfs_extent_ref_v0); 1271 num_refs = btrfs_ref_count_v0(leaf, ref0); 1272 #endif 1273 } else { 1274 WARN_ON(1); 1275 } 1276 return num_refs; 1277 } 1278 1279 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 1280 struct btrfs_root *root, 1281 struct btrfs_path *path, 1282 u64 bytenr, u64 parent, 1283 u64 root_objectid) 1284 { 1285 struct btrfs_key key; 1286 int ret; 1287 1288 key.objectid = bytenr; 1289 if (parent) { 1290 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1291 key.offset = parent; 1292 } else { 1293 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1294 key.offset = root_objectid; 1295 } 1296 1297 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1298 if (ret > 0) 1299 ret = -ENOENT; 1300 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1301 if (ret == -ENOENT && parent) { 1302 btrfs_release_path(path); 1303 key.type = BTRFS_EXTENT_REF_V0_KEY; 1304 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1305 if (ret > 0) 1306 ret = -ENOENT; 1307 } 1308 #endif 1309 return ret; 1310 } 1311 1312 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 1313 struct btrfs_root *root, 1314 struct btrfs_path *path, 1315 u64 bytenr, u64 parent, 1316 u64 root_objectid) 1317 { 1318 struct btrfs_key key; 1319 int ret; 1320 1321 key.objectid = bytenr; 1322 if (parent) { 1323 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1324 key.offset = parent; 1325 } else { 1326 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1327 key.offset = root_objectid; 1328 } 1329 1330 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1331 btrfs_release_path(path); 1332 return ret; 1333 } 1334 1335 static inline int extent_ref_type(u64 parent, u64 owner) 1336 { 1337 int type; 1338 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1339 if (parent > 0) 1340 type = BTRFS_SHARED_BLOCK_REF_KEY; 1341 else 1342 type = BTRFS_TREE_BLOCK_REF_KEY; 1343 } else { 1344 if (parent > 0) 1345 type = BTRFS_SHARED_DATA_REF_KEY; 1346 else 1347 type = BTRFS_EXTENT_DATA_REF_KEY; 1348 } 1349 return type; 1350 } 1351 1352 static int find_next_key(struct btrfs_path *path, int level, 1353 struct btrfs_key *key) 1354 1355 { 1356 for (; level < BTRFS_MAX_LEVEL; level++) { 1357 if (!path->nodes[level]) 1358 break; 1359 if (path->slots[level] + 1 >= 1360 btrfs_header_nritems(path->nodes[level])) 1361 continue; 1362 if (level == 0) 1363 btrfs_item_key_to_cpu(path->nodes[level], key, 1364 path->slots[level] + 1); 1365 else 1366 btrfs_node_key_to_cpu(path->nodes[level], key, 1367 path->slots[level] + 1); 1368 return 0; 1369 } 1370 return 1; 1371 } 1372 1373 /* 1374 * look for inline back ref. if back ref is found, *ref_ret is set 1375 * to the address of inline back ref, and 0 is returned. 1376 * 1377 * if back ref isn't found, *ref_ret is set to the address where it 1378 * should be inserted, and -ENOENT is returned. 1379 * 1380 * if insert is true and there are too many inline back refs, the path 1381 * points to the extent item, and -EAGAIN is returned. 1382 * 1383 * NOTE: inline back refs are ordered in the same way that back ref 1384 * items in the tree are ordered. 1385 */ 1386 static noinline_for_stack 1387 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1388 struct btrfs_root *root, 1389 struct btrfs_path *path, 1390 struct btrfs_extent_inline_ref **ref_ret, 1391 u64 bytenr, u64 num_bytes, 1392 u64 parent, u64 root_objectid, 1393 u64 owner, u64 offset, int insert) 1394 { 1395 struct btrfs_key key; 1396 struct extent_buffer *leaf; 1397 struct btrfs_extent_item *ei; 1398 struct btrfs_extent_inline_ref *iref; 1399 u64 flags; 1400 u64 item_size; 1401 unsigned long ptr; 1402 unsigned long end; 1403 int extra_size; 1404 int type; 1405 int want; 1406 int ret; 1407 int err = 0; 1408 1409 key.objectid = bytenr; 1410 key.type = BTRFS_EXTENT_ITEM_KEY; 1411 key.offset = num_bytes; 1412 1413 want = extent_ref_type(parent, owner); 1414 if (insert) { 1415 extra_size = btrfs_extent_inline_ref_size(want); 1416 path->keep_locks = 1; 1417 } else 1418 extra_size = -1; 1419 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1420 if (ret < 0) { 1421 err = ret; 1422 goto out; 1423 } 1424 BUG_ON(ret); 1425 1426 leaf = path->nodes[0]; 1427 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1428 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1429 if (item_size < sizeof(*ei)) { 1430 if (!insert) { 1431 err = -ENOENT; 1432 goto out; 1433 } 1434 ret = convert_extent_item_v0(trans, root, path, owner, 1435 extra_size); 1436 if (ret < 0) { 1437 err = ret; 1438 goto out; 1439 } 1440 leaf = path->nodes[0]; 1441 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1442 } 1443 #endif 1444 BUG_ON(item_size < sizeof(*ei)); 1445 1446 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1447 flags = btrfs_extent_flags(leaf, ei); 1448 1449 ptr = (unsigned long)(ei + 1); 1450 end = (unsigned long)ei + item_size; 1451 1452 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1453 ptr += sizeof(struct btrfs_tree_block_info); 1454 BUG_ON(ptr > end); 1455 } else { 1456 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1457 } 1458 1459 err = -ENOENT; 1460 while (1) { 1461 if (ptr >= end) { 1462 WARN_ON(ptr > end); 1463 break; 1464 } 1465 iref = (struct btrfs_extent_inline_ref *)ptr; 1466 type = btrfs_extent_inline_ref_type(leaf, iref); 1467 if (want < type) 1468 break; 1469 if (want > type) { 1470 ptr += btrfs_extent_inline_ref_size(type); 1471 continue; 1472 } 1473 1474 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1475 struct btrfs_extent_data_ref *dref; 1476 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1477 if (match_extent_data_ref(leaf, dref, root_objectid, 1478 owner, offset)) { 1479 err = 0; 1480 break; 1481 } 1482 if (hash_extent_data_ref_item(leaf, dref) < 1483 hash_extent_data_ref(root_objectid, owner, offset)) 1484 break; 1485 } else { 1486 u64 ref_offset; 1487 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1488 if (parent > 0) { 1489 if (parent == ref_offset) { 1490 err = 0; 1491 break; 1492 } 1493 if (ref_offset < parent) 1494 break; 1495 } else { 1496 if (root_objectid == ref_offset) { 1497 err = 0; 1498 break; 1499 } 1500 if (ref_offset < root_objectid) 1501 break; 1502 } 1503 } 1504 ptr += btrfs_extent_inline_ref_size(type); 1505 } 1506 if (err == -ENOENT && insert) { 1507 if (item_size + extra_size >= 1508 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 1509 err = -EAGAIN; 1510 goto out; 1511 } 1512 /* 1513 * To add new inline back ref, we have to make sure 1514 * there is no corresponding back ref item. 1515 * For simplicity, we just do not add new inline back 1516 * ref if there is any kind of item for this block 1517 */ 1518 if (find_next_key(path, 0, &key) == 0 && 1519 key.objectid == bytenr && 1520 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1521 err = -EAGAIN; 1522 goto out; 1523 } 1524 } 1525 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 1526 out: 1527 if (insert) { 1528 path->keep_locks = 0; 1529 btrfs_unlock_up_safe(path, 1); 1530 } 1531 return err; 1532 } 1533 1534 /* 1535 * helper to add new inline back ref 1536 */ 1537 static noinline_for_stack 1538 int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1539 struct btrfs_root *root, 1540 struct btrfs_path *path, 1541 struct btrfs_extent_inline_ref *iref, 1542 u64 parent, u64 root_objectid, 1543 u64 owner, u64 offset, int refs_to_add, 1544 struct btrfs_delayed_extent_op *extent_op) 1545 { 1546 struct extent_buffer *leaf; 1547 struct btrfs_extent_item *ei; 1548 unsigned long ptr; 1549 unsigned long end; 1550 unsigned long item_offset; 1551 u64 refs; 1552 int size; 1553 int type; 1554 int ret; 1555 1556 leaf = path->nodes[0]; 1557 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1558 item_offset = (unsigned long)iref - (unsigned long)ei; 1559 1560 type = extent_ref_type(parent, owner); 1561 size = btrfs_extent_inline_ref_size(type); 1562 1563 ret = btrfs_extend_item(trans, root, path, size); 1564 1565 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1566 refs = btrfs_extent_refs(leaf, ei); 1567 refs += refs_to_add; 1568 btrfs_set_extent_refs(leaf, ei, refs); 1569 if (extent_op) 1570 __run_delayed_extent_op(extent_op, leaf, ei); 1571 1572 ptr = (unsigned long)ei + item_offset; 1573 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1574 if (ptr < end - size) 1575 memmove_extent_buffer(leaf, ptr + size, ptr, 1576 end - size - ptr); 1577 1578 iref = (struct btrfs_extent_inline_ref *)ptr; 1579 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1580 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1581 struct btrfs_extent_data_ref *dref; 1582 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1583 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1584 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1585 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1586 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1587 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1588 struct btrfs_shared_data_ref *sref; 1589 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1590 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1591 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1592 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1593 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1594 } else { 1595 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1596 } 1597 btrfs_mark_buffer_dirty(leaf); 1598 return 0; 1599 } 1600 1601 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1602 struct btrfs_root *root, 1603 struct btrfs_path *path, 1604 struct btrfs_extent_inline_ref **ref_ret, 1605 u64 bytenr, u64 num_bytes, u64 parent, 1606 u64 root_objectid, u64 owner, u64 offset) 1607 { 1608 int ret; 1609 1610 ret = lookup_inline_extent_backref(trans, root, path, ref_ret, 1611 bytenr, num_bytes, parent, 1612 root_objectid, owner, offset, 0); 1613 if (ret != -ENOENT) 1614 return ret; 1615 1616 btrfs_release_path(path); 1617 *ref_ret = NULL; 1618 1619 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1620 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, 1621 root_objectid); 1622 } else { 1623 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, 1624 root_objectid, owner, offset); 1625 } 1626 return ret; 1627 } 1628 1629 /* 1630 * helper to update/remove inline back ref 1631 */ 1632 static noinline_for_stack 1633 int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1634 struct btrfs_root *root, 1635 struct btrfs_path *path, 1636 struct btrfs_extent_inline_ref *iref, 1637 int refs_to_mod, 1638 struct btrfs_delayed_extent_op *extent_op) 1639 { 1640 struct extent_buffer *leaf; 1641 struct btrfs_extent_item *ei; 1642 struct btrfs_extent_data_ref *dref = NULL; 1643 struct btrfs_shared_data_ref *sref = NULL; 1644 unsigned long ptr; 1645 unsigned long end; 1646 u32 item_size; 1647 int size; 1648 int type; 1649 int ret; 1650 u64 refs; 1651 1652 leaf = path->nodes[0]; 1653 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1654 refs = btrfs_extent_refs(leaf, ei); 1655 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1656 refs += refs_to_mod; 1657 btrfs_set_extent_refs(leaf, ei, refs); 1658 if (extent_op) 1659 __run_delayed_extent_op(extent_op, leaf, ei); 1660 1661 type = btrfs_extent_inline_ref_type(leaf, iref); 1662 1663 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1664 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1665 refs = btrfs_extent_data_ref_count(leaf, dref); 1666 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1667 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1668 refs = btrfs_shared_data_ref_count(leaf, sref); 1669 } else { 1670 refs = 1; 1671 BUG_ON(refs_to_mod != -1); 1672 } 1673 1674 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1675 refs += refs_to_mod; 1676 1677 if (refs > 0) { 1678 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1679 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1680 else 1681 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1682 } else { 1683 size = btrfs_extent_inline_ref_size(type); 1684 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1685 ptr = (unsigned long)iref; 1686 end = (unsigned long)ei + item_size; 1687 if (ptr + size < end) 1688 memmove_extent_buffer(leaf, ptr, ptr + size, 1689 end - ptr - size); 1690 item_size -= size; 1691 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1692 } 1693 btrfs_mark_buffer_dirty(leaf); 1694 return 0; 1695 } 1696 1697 static noinline_for_stack 1698 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1699 struct btrfs_root *root, 1700 struct btrfs_path *path, 1701 u64 bytenr, u64 num_bytes, u64 parent, 1702 u64 root_objectid, u64 owner, 1703 u64 offset, int refs_to_add, 1704 struct btrfs_delayed_extent_op *extent_op) 1705 { 1706 struct btrfs_extent_inline_ref *iref; 1707 int ret; 1708 1709 ret = lookup_inline_extent_backref(trans, root, path, &iref, 1710 bytenr, num_bytes, parent, 1711 root_objectid, owner, offset, 1); 1712 if (ret == 0) { 1713 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1714 ret = update_inline_extent_backref(trans, root, path, iref, 1715 refs_to_add, extent_op); 1716 } else if (ret == -ENOENT) { 1717 ret = setup_inline_extent_backref(trans, root, path, iref, 1718 parent, root_objectid, 1719 owner, offset, refs_to_add, 1720 extent_op); 1721 } 1722 return ret; 1723 } 1724 1725 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1726 struct btrfs_root *root, 1727 struct btrfs_path *path, 1728 u64 bytenr, u64 parent, u64 root_objectid, 1729 u64 owner, u64 offset, int refs_to_add) 1730 { 1731 int ret; 1732 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1733 BUG_ON(refs_to_add != 1); 1734 ret = insert_tree_block_ref(trans, root, path, bytenr, 1735 parent, root_objectid); 1736 } else { 1737 ret = insert_extent_data_ref(trans, root, path, bytenr, 1738 parent, root_objectid, 1739 owner, offset, refs_to_add); 1740 } 1741 return ret; 1742 } 1743 1744 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1745 struct btrfs_root *root, 1746 struct btrfs_path *path, 1747 struct btrfs_extent_inline_ref *iref, 1748 int refs_to_drop, int is_data) 1749 { 1750 int ret; 1751 1752 BUG_ON(!is_data && refs_to_drop != 1); 1753 if (iref) { 1754 ret = update_inline_extent_backref(trans, root, path, iref, 1755 -refs_to_drop, NULL); 1756 } else if (is_data) { 1757 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1758 } else { 1759 ret = btrfs_del_item(trans, root, path); 1760 } 1761 return ret; 1762 } 1763 1764 static int btrfs_issue_discard(struct block_device *bdev, 1765 u64 start, u64 len) 1766 { 1767 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); 1768 } 1769 1770 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1771 u64 num_bytes, u64 *actual_bytes) 1772 { 1773 int ret; 1774 u64 discarded_bytes = 0; 1775 struct btrfs_multi_bio *multi = NULL; 1776 1777 1778 /* Tell the block device(s) that the sectors can be discarded */ 1779 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, 1780 bytenr, &num_bytes, &multi, 0); 1781 if (!ret) { 1782 struct btrfs_bio_stripe *stripe = multi->stripes; 1783 int i; 1784 1785 1786 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1787 ret = btrfs_issue_discard(stripe->dev->bdev, 1788 stripe->physical, 1789 stripe->length); 1790 if (!ret) 1791 discarded_bytes += stripe->length; 1792 else if (ret != -EOPNOTSUPP) 1793 break; 1794 } 1795 kfree(multi); 1796 } 1797 if (discarded_bytes && ret == -EOPNOTSUPP) 1798 ret = 0; 1799 1800 if (actual_bytes) 1801 *actual_bytes = discarded_bytes; 1802 1803 1804 return ret; 1805 } 1806 1807 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1808 struct btrfs_root *root, 1809 u64 bytenr, u64 num_bytes, u64 parent, 1810 u64 root_objectid, u64 owner, u64 offset) 1811 { 1812 int ret; 1813 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 1814 root_objectid == BTRFS_TREE_LOG_OBJECTID); 1815 1816 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1817 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 1818 parent, root_objectid, (int)owner, 1819 BTRFS_ADD_DELAYED_REF, NULL); 1820 } else { 1821 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 1822 parent, root_objectid, owner, offset, 1823 BTRFS_ADD_DELAYED_REF, NULL); 1824 } 1825 return ret; 1826 } 1827 1828 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1829 struct btrfs_root *root, 1830 u64 bytenr, u64 num_bytes, 1831 u64 parent, u64 root_objectid, 1832 u64 owner, u64 offset, int refs_to_add, 1833 struct btrfs_delayed_extent_op *extent_op) 1834 { 1835 struct btrfs_path *path; 1836 struct extent_buffer *leaf; 1837 struct btrfs_extent_item *item; 1838 u64 refs; 1839 int ret; 1840 int err = 0; 1841 1842 path = btrfs_alloc_path(); 1843 if (!path) 1844 return -ENOMEM; 1845 1846 path->reada = 1; 1847 path->leave_spinning = 1; 1848 /* this will setup the path even if it fails to insert the back ref */ 1849 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 1850 path, bytenr, num_bytes, parent, 1851 root_objectid, owner, offset, 1852 refs_to_add, extent_op); 1853 if (ret == 0) 1854 goto out; 1855 1856 if (ret != -EAGAIN) { 1857 err = ret; 1858 goto out; 1859 } 1860 1861 leaf = path->nodes[0]; 1862 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1863 refs = btrfs_extent_refs(leaf, item); 1864 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1865 if (extent_op) 1866 __run_delayed_extent_op(extent_op, leaf, item); 1867 1868 btrfs_mark_buffer_dirty(leaf); 1869 btrfs_release_path(path); 1870 1871 path->reada = 1; 1872 path->leave_spinning = 1; 1873 1874 /* now insert the actual backref */ 1875 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1876 path, bytenr, parent, root_objectid, 1877 owner, offset, refs_to_add); 1878 BUG_ON(ret); 1879 out: 1880 btrfs_free_path(path); 1881 return err; 1882 } 1883 1884 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1885 struct btrfs_root *root, 1886 struct btrfs_delayed_ref_node *node, 1887 struct btrfs_delayed_extent_op *extent_op, 1888 int insert_reserved) 1889 { 1890 int ret = 0; 1891 struct btrfs_delayed_data_ref *ref; 1892 struct btrfs_key ins; 1893 u64 parent = 0; 1894 u64 ref_root = 0; 1895 u64 flags = 0; 1896 1897 ins.objectid = node->bytenr; 1898 ins.offset = node->num_bytes; 1899 ins.type = BTRFS_EXTENT_ITEM_KEY; 1900 1901 ref = btrfs_delayed_node_to_data_ref(node); 1902 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1903 parent = ref->parent; 1904 else 1905 ref_root = ref->root; 1906 1907 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1908 if (extent_op) { 1909 BUG_ON(extent_op->update_key); 1910 flags |= extent_op->flags_to_set; 1911 } 1912 ret = alloc_reserved_file_extent(trans, root, 1913 parent, ref_root, flags, 1914 ref->objectid, ref->offset, 1915 &ins, node->ref_mod); 1916 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1917 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1918 node->num_bytes, parent, 1919 ref_root, ref->objectid, 1920 ref->offset, node->ref_mod, 1921 extent_op); 1922 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1923 ret = __btrfs_free_extent(trans, root, node->bytenr, 1924 node->num_bytes, parent, 1925 ref_root, ref->objectid, 1926 ref->offset, node->ref_mod, 1927 extent_op); 1928 } else { 1929 BUG(); 1930 } 1931 return ret; 1932 } 1933 1934 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1935 struct extent_buffer *leaf, 1936 struct btrfs_extent_item *ei) 1937 { 1938 u64 flags = btrfs_extent_flags(leaf, ei); 1939 if (extent_op->update_flags) { 1940 flags |= extent_op->flags_to_set; 1941 btrfs_set_extent_flags(leaf, ei, flags); 1942 } 1943 1944 if (extent_op->update_key) { 1945 struct btrfs_tree_block_info *bi; 1946 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1947 bi = (struct btrfs_tree_block_info *)(ei + 1); 1948 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1949 } 1950 } 1951 1952 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1953 struct btrfs_root *root, 1954 struct btrfs_delayed_ref_node *node, 1955 struct btrfs_delayed_extent_op *extent_op) 1956 { 1957 struct btrfs_key key; 1958 struct btrfs_path *path; 1959 struct btrfs_extent_item *ei; 1960 struct extent_buffer *leaf; 1961 u32 item_size; 1962 int ret; 1963 int err = 0; 1964 1965 path = btrfs_alloc_path(); 1966 if (!path) 1967 return -ENOMEM; 1968 1969 key.objectid = node->bytenr; 1970 key.type = BTRFS_EXTENT_ITEM_KEY; 1971 key.offset = node->num_bytes; 1972 1973 path->reada = 1; 1974 path->leave_spinning = 1; 1975 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1976 path, 0, 1); 1977 if (ret < 0) { 1978 err = ret; 1979 goto out; 1980 } 1981 if (ret > 0) { 1982 err = -EIO; 1983 goto out; 1984 } 1985 1986 leaf = path->nodes[0]; 1987 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1988 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1989 if (item_size < sizeof(*ei)) { 1990 ret = convert_extent_item_v0(trans, root->fs_info->extent_root, 1991 path, (u64)-1, 0); 1992 if (ret < 0) { 1993 err = ret; 1994 goto out; 1995 } 1996 leaf = path->nodes[0]; 1997 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1998 } 1999 #endif 2000 BUG_ON(item_size < sizeof(*ei)); 2001 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2002 __run_delayed_extent_op(extent_op, leaf, ei); 2003 2004 btrfs_mark_buffer_dirty(leaf); 2005 out: 2006 btrfs_free_path(path); 2007 return err; 2008 } 2009 2010 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 2011 struct btrfs_root *root, 2012 struct btrfs_delayed_ref_node *node, 2013 struct btrfs_delayed_extent_op *extent_op, 2014 int insert_reserved) 2015 { 2016 int ret = 0; 2017 struct btrfs_delayed_tree_ref *ref; 2018 struct btrfs_key ins; 2019 u64 parent = 0; 2020 u64 ref_root = 0; 2021 2022 ins.objectid = node->bytenr; 2023 ins.offset = node->num_bytes; 2024 ins.type = BTRFS_EXTENT_ITEM_KEY; 2025 2026 ref = btrfs_delayed_node_to_tree_ref(node); 2027 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2028 parent = ref->parent; 2029 else 2030 ref_root = ref->root; 2031 2032 BUG_ON(node->ref_mod != 1); 2033 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2034 BUG_ON(!extent_op || !extent_op->update_flags || 2035 !extent_op->update_key); 2036 ret = alloc_reserved_tree_block(trans, root, 2037 parent, ref_root, 2038 extent_op->flags_to_set, 2039 &extent_op->key, 2040 ref->level, &ins); 2041 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 2042 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 2043 node->num_bytes, parent, ref_root, 2044 ref->level, 0, 1, extent_op); 2045 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2046 ret = __btrfs_free_extent(trans, root, node->bytenr, 2047 node->num_bytes, parent, ref_root, 2048 ref->level, 0, 1, extent_op); 2049 } else { 2050 BUG(); 2051 } 2052 return ret; 2053 } 2054 2055 /* helper function to actually process a single delayed ref entry */ 2056 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 2057 struct btrfs_root *root, 2058 struct btrfs_delayed_ref_node *node, 2059 struct btrfs_delayed_extent_op *extent_op, 2060 int insert_reserved) 2061 { 2062 int ret; 2063 if (btrfs_delayed_ref_is_head(node)) { 2064 struct btrfs_delayed_ref_head *head; 2065 /* 2066 * we've hit the end of the chain and we were supposed 2067 * to insert this extent into the tree. But, it got 2068 * deleted before we ever needed to insert it, so all 2069 * we have to do is clean up the accounting 2070 */ 2071 BUG_ON(extent_op); 2072 head = btrfs_delayed_node_to_head(node); 2073 if (insert_reserved) { 2074 btrfs_pin_extent(root, node->bytenr, 2075 node->num_bytes, 1); 2076 if (head->is_data) { 2077 ret = btrfs_del_csums(trans, root, 2078 node->bytenr, 2079 node->num_bytes); 2080 BUG_ON(ret); 2081 } 2082 } 2083 mutex_unlock(&head->mutex); 2084 return 0; 2085 } 2086 2087 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 2088 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2089 ret = run_delayed_tree_ref(trans, root, node, extent_op, 2090 insert_reserved); 2091 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 2092 node->type == BTRFS_SHARED_DATA_REF_KEY) 2093 ret = run_delayed_data_ref(trans, root, node, extent_op, 2094 insert_reserved); 2095 else 2096 BUG(); 2097 return ret; 2098 } 2099 2100 static noinline struct btrfs_delayed_ref_node * 2101 select_delayed_ref(struct btrfs_delayed_ref_head *head) 2102 { 2103 struct rb_node *node; 2104 struct btrfs_delayed_ref_node *ref; 2105 int action = BTRFS_ADD_DELAYED_REF; 2106 again: 2107 /* 2108 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 2109 * this prevents ref count from going down to zero when 2110 * there still are pending delayed ref. 2111 */ 2112 node = rb_prev(&head->node.rb_node); 2113 while (1) { 2114 if (!node) 2115 break; 2116 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2117 rb_node); 2118 if (ref->bytenr != head->node.bytenr) 2119 break; 2120 if (ref->action == action) 2121 return ref; 2122 node = rb_prev(node); 2123 } 2124 if (action == BTRFS_ADD_DELAYED_REF) { 2125 action = BTRFS_DROP_DELAYED_REF; 2126 goto again; 2127 } 2128 return NULL; 2129 } 2130 2131 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 2132 struct btrfs_root *root, 2133 struct list_head *cluster) 2134 { 2135 struct btrfs_delayed_ref_root *delayed_refs; 2136 struct btrfs_delayed_ref_node *ref; 2137 struct btrfs_delayed_ref_head *locked_ref = NULL; 2138 struct btrfs_delayed_extent_op *extent_op; 2139 int ret; 2140 int count = 0; 2141 int must_insert_reserved = 0; 2142 2143 delayed_refs = &trans->transaction->delayed_refs; 2144 while (1) { 2145 if (!locked_ref) { 2146 /* pick a new head ref from the cluster list */ 2147 if (list_empty(cluster)) 2148 break; 2149 2150 locked_ref = list_entry(cluster->next, 2151 struct btrfs_delayed_ref_head, cluster); 2152 2153 /* grab the lock that says we are going to process 2154 * all the refs for this head */ 2155 ret = btrfs_delayed_ref_lock(trans, locked_ref); 2156 2157 /* 2158 * we may have dropped the spin lock to get the head 2159 * mutex lock, and that might have given someone else 2160 * time to free the head. If that's true, it has been 2161 * removed from our list and we can move on. 2162 */ 2163 if (ret == -EAGAIN) { 2164 locked_ref = NULL; 2165 count++; 2166 continue; 2167 } 2168 } 2169 2170 /* 2171 * record the must insert reserved flag before we 2172 * drop the spin lock. 2173 */ 2174 must_insert_reserved = locked_ref->must_insert_reserved; 2175 locked_ref->must_insert_reserved = 0; 2176 2177 extent_op = locked_ref->extent_op; 2178 locked_ref->extent_op = NULL; 2179 2180 /* 2181 * locked_ref is the head node, so we have to go one 2182 * node back for any delayed ref updates 2183 */ 2184 ref = select_delayed_ref(locked_ref); 2185 if (!ref) { 2186 /* All delayed refs have been processed, Go ahead 2187 * and send the head node to run_one_delayed_ref, 2188 * so that any accounting fixes can happen 2189 */ 2190 ref = &locked_ref->node; 2191 2192 if (extent_op && must_insert_reserved) { 2193 kfree(extent_op); 2194 extent_op = NULL; 2195 } 2196 2197 if (extent_op) { 2198 spin_unlock(&delayed_refs->lock); 2199 2200 ret = run_delayed_extent_op(trans, root, 2201 ref, extent_op); 2202 BUG_ON(ret); 2203 kfree(extent_op); 2204 2205 cond_resched(); 2206 spin_lock(&delayed_refs->lock); 2207 continue; 2208 } 2209 2210 list_del_init(&locked_ref->cluster); 2211 locked_ref = NULL; 2212 } 2213 2214 ref->in_tree = 0; 2215 rb_erase(&ref->rb_node, &delayed_refs->root); 2216 delayed_refs->num_entries--; 2217 2218 spin_unlock(&delayed_refs->lock); 2219 2220 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2221 must_insert_reserved); 2222 BUG_ON(ret); 2223 2224 btrfs_put_delayed_ref(ref); 2225 kfree(extent_op); 2226 count++; 2227 2228 cond_resched(); 2229 spin_lock(&delayed_refs->lock); 2230 } 2231 return count; 2232 } 2233 2234 /* 2235 * this starts processing the delayed reference count updates and 2236 * extent insertions we have queued up so far. count can be 2237 * 0, which means to process everything in the tree at the start 2238 * of the run (but not newly added entries), or it can be some target 2239 * number you'd like to process. 2240 */ 2241 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2242 struct btrfs_root *root, unsigned long count) 2243 { 2244 struct rb_node *node; 2245 struct btrfs_delayed_ref_root *delayed_refs; 2246 struct btrfs_delayed_ref_node *ref; 2247 struct list_head cluster; 2248 int ret; 2249 int run_all = count == (unsigned long)-1; 2250 int run_most = 0; 2251 2252 if (root == root->fs_info->extent_root) 2253 root = root->fs_info->tree_root; 2254 2255 delayed_refs = &trans->transaction->delayed_refs; 2256 INIT_LIST_HEAD(&cluster); 2257 again: 2258 spin_lock(&delayed_refs->lock); 2259 if (count == 0) { 2260 count = delayed_refs->num_entries * 2; 2261 run_most = 1; 2262 } 2263 while (1) { 2264 if (!(run_all || run_most) && 2265 delayed_refs->num_heads_ready < 64) 2266 break; 2267 2268 /* 2269 * go find something we can process in the rbtree. We start at 2270 * the beginning of the tree, and then build a cluster 2271 * of refs to process starting at the first one we are able to 2272 * lock 2273 */ 2274 ret = btrfs_find_ref_cluster(trans, &cluster, 2275 delayed_refs->run_delayed_start); 2276 if (ret) 2277 break; 2278 2279 ret = run_clustered_refs(trans, root, &cluster); 2280 BUG_ON(ret < 0); 2281 2282 count -= min_t(unsigned long, ret, count); 2283 2284 if (count == 0) 2285 break; 2286 } 2287 2288 if (run_all) { 2289 node = rb_first(&delayed_refs->root); 2290 if (!node) 2291 goto out; 2292 count = (unsigned long)-1; 2293 2294 while (node) { 2295 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2296 rb_node); 2297 if (btrfs_delayed_ref_is_head(ref)) { 2298 struct btrfs_delayed_ref_head *head; 2299 2300 head = btrfs_delayed_node_to_head(ref); 2301 atomic_inc(&ref->refs); 2302 2303 spin_unlock(&delayed_refs->lock); 2304 /* 2305 * Mutex was contended, block until it's 2306 * released and try again 2307 */ 2308 mutex_lock(&head->mutex); 2309 mutex_unlock(&head->mutex); 2310 2311 btrfs_put_delayed_ref(ref); 2312 cond_resched(); 2313 goto again; 2314 } 2315 node = rb_next(node); 2316 } 2317 spin_unlock(&delayed_refs->lock); 2318 schedule_timeout(1); 2319 goto again; 2320 } 2321 out: 2322 spin_unlock(&delayed_refs->lock); 2323 return 0; 2324 } 2325 2326 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2327 struct btrfs_root *root, 2328 u64 bytenr, u64 num_bytes, u64 flags, 2329 int is_data) 2330 { 2331 struct btrfs_delayed_extent_op *extent_op; 2332 int ret; 2333 2334 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2335 if (!extent_op) 2336 return -ENOMEM; 2337 2338 extent_op->flags_to_set = flags; 2339 extent_op->update_flags = 1; 2340 extent_op->update_key = 0; 2341 extent_op->is_data = is_data ? 1 : 0; 2342 2343 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); 2344 if (ret) 2345 kfree(extent_op); 2346 return ret; 2347 } 2348 2349 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, 2350 struct btrfs_root *root, 2351 struct btrfs_path *path, 2352 u64 objectid, u64 offset, u64 bytenr) 2353 { 2354 struct btrfs_delayed_ref_head *head; 2355 struct btrfs_delayed_ref_node *ref; 2356 struct btrfs_delayed_data_ref *data_ref; 2357 struct btrfs_delayed_ref_root *delayed_refs; 2358 struct rb_node *node; 2359 int ret = 0; 2360 2361 ret = -ENOENT; 2362 delayed_refs = &trans->transaction->delayed_refs; 2363 spin_lock(&delayed_refs->lock); 2364 head = btrfs_find_delayed_ref_head(trans, bytenr); 2365 if (!head) 2366 goto out; 2367 2368 if (!mutex_trylock(&head->mutex)) { 2369 atomic_inc(&head->node.refs); 2370 spin_unlock(&delayed_refs->lock); 2371 2372 btrfs_release_path(path); 2373 2374 /* 2375 * Mutex was contended, block until it's released and let 2376 * caller try again 2377 */ 2378 mutex_lock(&head->mutex); 2379 mutex_unlock(&head->mutex); 2380 btrfs_put_delayed_ref(&head->node); 2381 return -EAGAIN; 2382 } 2383 2384 node = rb_prev(&head->node.rb_node); 2385 if (!node) 2386 goto out_unlock; 2387 2388 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2389 2390 if (ref->bytenr != bytenr) 2391 goto out_unlock; 2392 2393 ret = 1; 2394 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2395 goto out_unlock; 2396 2397 data_ref = btrfs_delayed_node_to_data_ref(ref); 2398 2399 node = rb_prev(node); 2400 if (node) { 2401 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2402 if (ref->bytenr == bytenr) 2403 goto out_unlock; 2404 } 2405 2406 if (data_ref->root != root->root_key.objectid || 2407 data_ref->objectid != objectid || data_ref->offset != offset) 2408 goto out_unlock; 2409 2410 ret = 0; 2411 out_unlock: 2412 mutex_unlock(&head->mutex); 2413 out: 2414 spin_unlock(&delayed_refs->lock); 2415 return ret; 2416 } 2417 2418 static noinline int check_committed_ref(struct btrfs_trans_handle *trans, 2419 struct btrfs_root *root, 2420 struct btrfs_path *path, 2421 u64 objectid, u64 offset, u64 bytenr) 2422 { 2423 struct btrfs_root *extent_root = root->fs_info->extent_root; 2424 struct extent_buffer *leaf; 2425 struct btrfs_extent_data_ref *ref; 2426 struct btrfs_extent_inline_ref *iref; 2427 struct btrfs_extent_item *ei; 2428 struct btrfs_key key; 2429 u32 item_size; 2430 int ret; 2431 2432 key.objectid = bytenr; 2433 key.offset = (u64)-1; 2434 key.type = BTRFS_EXTENT_ITEM_KEY; 2435 2436 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2437 if (ret < 0) 2438 goto out; 2439 BUG_ON(ret == 0); 2440 2441 ret = -ENOENT; 2442 if (path->slots[0] == 0) 2443 goto out; 2444 2445 path->slots[0]--; 2446 leaf = path->nodes[0]; 2447 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2448 2449 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2450 goto out; 2451 2452 ret = 1; 2453 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2454 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2455 if (item_size < sizeof(*ei)) { 2456 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2457 goto out; 2458 } 2459 #endif 2460 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2461 2462 if (item_size != sizeof(*ei) + 2463 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2464 goto out; 2465 2466 if (btrfs_extent_generation(leaf, ei) <= 2467 btrfs_root_last_snapshot(&root->root_item)) 2468 goto out; 2469 2470 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2471 if (btrfs_extent_inline_ref_type(leaf, iref) != 2472 BTRFS_EXTENT_DATA_REF_KEY) 2473 goto out; 2474 2475 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2476 if (btrfs_extent_refs(leaf, ei) != 2477 btrfs_extent_data_ref_count(leaf, ref) || 2478 btrfs_extent_data_ref_root(leaf, ref) != 2479 root->root_key.objectid || 2480 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2481 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2482 goto out; 2483 2484 ret = 0; 2485 out: 2486 return ret; 2487 } 2488 2489 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2490 struct btrfs_root *root, 2491 u64 objectid, u64 offset, u64 bytenr) 2492 { 2493 struct btrfs_path *path; 2494 int ret; 2495 int ret2; 2496 2497 path = btrfs_alloc_path(); 2498 if (!path) 2499 return -ENOENT; 2500 2501 do { 2502 ret = check_committed_ref(trans, root, path, objectid, 2503 offset, bytenr); 2504 if (ret && ret != -ENOENT) 2505 goto out; 2506 2507 ret2 = check_delayed_ref(trans, root, path, objectid, 2508 offset, bytenr); 2509 } while (ret2 == -EAGAIN); 2510 2511 if (ret2 && ret2 != -ENOENT) { 2512 ret = ret2; 2513 goto out; 2514 } 2515 2516 if (ret != -ENOENT || ret2 != -ENOENT) 2517 ret = 0; 2518 out: 2519 btrfs_free_path(path); 2520 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) 2521 WARN_ON(ret > 0); 2522 return ret; 2523 } 2524 2525 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2526 struct btrfs_root *root, 2527 struct extent_buffer *buf, 2528 int full_backref, int inc) 2529 { 2530 u64 bytenr; 2531 u64 num_bytes; 2532 u64 parent; 2533 u64 ref_root; 2534 u32 nritems; 2535 struct btrfs_key key; 2536 struct btrfs_file_extent_item *fi; 2537 int i; 2538 int level; 2539 int ret = 0; 2540 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2541 u64, u64, u64, u64, u64, u64); 2542 2543 ref_root = btrfs_header_owner(buf); 2544 nritems = btrfs_header_nritems(buf); 2545 level = btrfs_header_level(buf); 2546 2547 if (!root->ref_cows && level == 0) 2548 return 0; 2549 2550 if (inc) 2551 process_func = btrfs_inc_extent_ref; 2552 else 2553 process_func = btrfs_free_extent; 2554 2555 if (full_backref) 2556 parent = buf->start; 2557 else 2558 parent = 0; 2559 2560 for (i = 0; i < nritems; i++) { 2561 if (level == 0) { 2562 btrfs_item_key_to_cpu(buf, &key, i); 2563 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2564 continue; 2565 fi = btrfs_item_ptr(buf, i, 2566 struct btrfs_file_extent_item); 2567 if (btrfs_file_extent_type(buf, fi) == 2568 BTRFS_FILE_EXTENT_INLINE) 2569 continue; 2570 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2571 if (bytenr == 0) 2572 continue; 2573 2574 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2575 key.offset -= btrfs_file_extent_offset(buf, fi); 2576 ret = process_func(trans, root, bytenr, num_bytes, 2577 parent, ref_root, key.objectid, 2578 key.offset); 2579 if (ret) 2580 goto fail; 2581 } else { 2582 bytenr = btrfs_node_blockptr(buf, i); 2583 num_bytes = btrfs_level_size(root, level - 1); 2584 ret = process_func(trans, root, bytenr, num_bytes, 2585 parent, ref_root, level - 1, 0); 2586 if (ret) 2587 goto fail; 2588 } 2589 } 2590 return 0; 2591 fail: 2592 BUG(); 2593 return ret; 2594 } 2595 2596 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2597 struct extent_buffer *buf, int full_backref) 2598 { 2599 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2600 } 2601 2602 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2603 struct extent_buffer *buf, int full_backref) 2604 { 2605 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2606 } 2607 2608 static int write_one_cache_group(struct btrfs_trans_handle *trans, 2609 struct btrfs_root *root, 2610 struct btrfs_path *path, 2611 struct btrfs_block_group_cache *cache) 2612 { 2613 int ret; 2614 struct btrfs_root *extent_root = root->fs_info->extent_root; 2615 unsigned long bi; 2616 struct extent_buffer *leaf; 2617 2618 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2619 if (ret < 0) 2620 goto fail; 2621 BUG_ON(ret); 2622 2623 leaf = path->nodes[0]; 2624 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2625 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2626 btrfs_mark_buffer_dirty(leaf); 2627 btrfs_release_path(path); 2628 fail: 2629 if (ret) 2630 return ret; 2631 return 0; 2632 2633 } 2634 2635 static struct btrfs_block_group_cache * 2636 next_block_group(struct btrfs_root *root, 2637 struct btrfs_block_group_cache *cache) 2638 { 2639 struct rb_node *node; 2640 spin_lock(&root->fs_info->block_group_cache_lock); 2641 node = rb_next(&cache->cache_node); 2642 btrfs_put_block_group(cache); 2643 if (node) { 2644 cache = rb_entry(node, struct btrfs_block_group_cache, 2645 cache_node); 2646 btrfs_get_block_group(cache); 2647 } else 2648 cache = NULL; 2649 spin_unlock(&root->fs_info->block_group_cache_lock); 2650 return cache; 2651 } 2652 2653 static int cache_save_setup(struct btrfs_block_group_cache *block_group, 2654 struct btrfs_trans_handle *trans, 2655 struct btrfs_path *path) 2656 { 2657 struct btrfs_root *root = block_group->fs_info->tree_root; 2658 struct inode *inode = NULL; 2659 u64 alloc_hint = 0; 2660 int dcs = BTRFS_DC_ERROR; 2661 int num_pages = 0; 2662 int retries = 0; 2663 int ret = 0; 2664 2665 /* 2666 * If this block group is smaller than 100 megs don't bother caching the 2667 * block group. 2668 */ 2669 if (block_group->key.offset < (100 * 1024 * 1024)) { 2670 spin_lock(&block_group->lock); 2671 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 2672 spin_unlock(&block_group->lock); 2673 return 0; 2674 } 2675 2676 again: 2677 inode = lookup_free_space_inode(root, block_group, path); 2678 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 2679 ret = PTR_ERR(inode); 2680 btrfs_release_path(path); 2681 goto out; 2682 } 2683 2684 if (IS_ERR(inode)) { 2685 BUG_ON(retries); 2686 retries++; 2687 2688 if (block_group->ro) 2689 goto out_free; 2690 2691 ret = create_free_space_inode(root, trans, block_group, path); 2692 if (ret) 2693 goto out_free; 2694 goto again; 2695 } 2696 2697 /* 2698 * We want to set the generation to 0, that way if anything goes wrong 2699 * from here on out we know not to trust this cache when we load up next 2700 * time. 2701 */ 2702 BTRFS_I(inode)->generation = 0; 2703 ret = btrfs_update_inode(trans, root, inode); 2704 WARN_ON(ret); 2705 2706 if (i_size_read(inode) > 0) { 2707 ret = btrfs_truncate_free_space_cache(root, trans, path, 2708 inode); 2709 if (ret) 2710 goto out_put; 2711 } 2712 2713 spin_lock(&block_group->lock); 2714 if (block_group->cached != BTRFS_CACHE_FINISHED) { 2715 /* We're not cached, don't bother trying to write stuff out */ 2716 dcs = BTRFS_DC_WRITTEN; 2717 spin_unlock(&block_group->lock); 2718 goto out_put; 2719 } 2720 spin_unlock(&block_group->lock); 2721 2722 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); 2723 if (!num_pages) 2724 num_pages = 1; 2725 2726 /* 2727 * Just to make absolutely sure we have enough space, we're going to 2728 * preallocate 12 pages worth of space for each block group. In 2729 * practice we ought to use at most 8, but we need extra space so we can 2730 * add our header and have a terminator between the extents and the 2731 * bitmaps. 2732 */ 2733 num_pages *= 16; 2734 num_pages *= PAGE_CACHE_SIZE; 2735 2736 ret = btrfs_check_data_free_space(inode, num_pages); 2737 if (ret) 2738 goto out_put; 2739 2740 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, 2741 num_pages, num_pages, 2742 &alloc_hint); 2743 if (!ret) 2744 dcs = BTRFS_DC_SETUP; 2745 btrfs_free_reserved_data_space(inode, num_pages); 2746 out_put: 2747 iput(inode); 2748 out_free: 2749 btrfs_release_path(path); 2750 out: 2751 spin_lock(&block_group->lock); 2752 block_group->disk_cache_state = dcs; 2753 spin_unlock(&block_group->lock); 2754 2755 return ret; 2756 } 2757 2758 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2759 struct btrfs_root *root) 2760 { 2761 struct btrfs_block_group_cache *cache; 2762 int err = 0; 2763 struct btrfs_path *path; 2764 u64 last = 0; 2765 2766 path = btrfs_alloc_path(); 2767 if (!path) 2768 return -ENOMEM; 2769 2770 again: 2771 while (1) { 2772 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2773 while (cache) { 2774 if (cache->disk_cache_state == BTRFS_DC_CLEAR) 2775 break; 2776 cache = next_block_group(root, cache); 2777 } 2778 if (!cache) { 2779 if (last == 0) 2780 break; 2781 last = 0; 2782 continue; 2783 } 2784 err = cache_save_setup(cache, trans, path); 2785 last = cache->key.objectid + cache->key.offset; 2786 btrfs_put_block_group(cache); 2787 } 2788 2789 while (1) { 2790 if (last == 0) { 2791 err = btrfs_run_delayed_refs(trans, root, 2792 (unsigned long)-1); 2793 BUG_ON(err); 2794 } 2795 2796 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2797 while (cache) { 2798 if (cache->disk_cache_state == BTRFS_DC_CLEAR) { 2799 btrfs_put_block_group(cache); 2800 goto again; 2801 } 2802 2803 if (cache->dirty) 2804 break; 2805 cache = next_block_group(root, cache); 2806 } 2807 if (!cache) { 2808 if (last == 0) 2809 break; 2810 last = 0; 2811 continue; 2812 } 2813 2814 if (cache->disk_cache_state == BTRFS_DC_SETUP) 2815 cache->disk_cache_state = BTRFS_DC_NEED_WRITE; 2816 cache->dirty = 0; 2817 last = cache->key.objectid + cache->key.offset; 2818 2819 err = write_one_cache_group(trans, root, path, cache); 2820 BUG_ON(err); 2821 btrfs_put_block_group(cache); 2822 } 2823 2824 while (1) { 2825 /* 2826 * I don't think this is needed since we're just marking our 2827 * preallocated extent as written, but just in case it can't 2828 * hurt. 2829 */ 2830 if (last == 0) { 2831 err = btrfs_run_delayed_refs(trans, root, 2832 (unsigned long)-1); 2833 BUG_ON(err); 2834 } 2835 2836 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2837 while (cache) { 2838 /* 2839 * Really this shouldn't happen, but it could if we 2840 * couldn't write the entire preallocated extent and 2841 * splitting the extent resulted in a new block. 2842 */ 2843 if (cache->dirty) { 2844 btrfs_put_block_group(cache); 2845 goto again; 2846 } 2847 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 2848 break; 2849 cache = next_block_group(root, cache); 2850 } 2851 if (!cache) { 2852 if (last == 0) 2853 break; 2854 last = 0; 2855 continue; 2856 } 2857 2858 btrfs_write_out_cache(root, trans, cache, path); 2859 2860 /* 2861 * If we didn't have an error then the cache state is still 2862 * NEED_WRITE, so we can set it to WRITTEN. 2863 */ 2864 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 2865 cache->disk_cache_state = BTRFS_DC_WRITTEN; 2866 last = cache->key.objectid + cache->key.offset; 2867 btrfs_put_block_group(cache); 2868 } 2869 2870 btrfs_free_path(path); 2871 return 0; 2872 } 2873 2874 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2875 { 2876 struct btrfs_block_group_cache *block_group; 2877 int readonly = 0; 2878 2879 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 2880 if (!block_group || block_group->ro) 2881 readonly = 1; 2882 if (block_group) 2883 btrfs_put_block_group(block_group); 2884 return readonly; 2885 } 2886 2887 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 2888 u64 total_bytes, u64 bytes_used, 2889 struct btrfs_space_info **space_info) 2890 { 2891 struct btrfs_space_info *found; 2892 int i; 2893 int factor; 2894 2895 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | 2896 BTRFS_BLOCK_GROUP_RAID10)) 2897 factor = 2; 2898 else 2899 factor = 1; 2900 2901 found = __find_space_info(info, flags); 2902 if (found) { 2903 spin_lock(&found->lock); 2904 found->total_bytes += total_bytes; 2905 found->disk_total += total_bytes * factor; 2906 found->bytes_used += bytes_used; 2907 found->disk_used += bytes_used * factor; 2908 found->full = 0; 2909 spin_unlock(&found->lock); 2910 *space_info = found; 2911 return 0; 2912 } 2913 found = kzalloc(sizeof(*found), GFP_NOFS); 2914 if (!found) 2915 return -ENOMEM; 2916 2917 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) 2918 INIT_LIST_HEAD(&found->block_groups[i]); 2919 init_rwsem(&found->groups_sem); 2920 spin_lock_init(&found->lock); 2921 found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | 2922 BTRFS_BLOCK_GROUP_SYSTEM | 2923 BTRFS_BLOCK_GROUP_METADATA); 2924 found->total_bytes = total_bytes; 2925 found->disk_total = total_bytes * factor; 2926 found->bytes_used = bytes_used; 2927 found->disk_used = bytes_used * factor; 2928 found->bytes_pinned = 0; 2929 found->bytes_reserved = 0; 2930 found->bytes_readonly = 0; 2931 found->bytes_may_use = 0; 2932 found->full = 0; 2933 found->force_alloc = CHUNK_ALLOC_NO_FORCE; 2934 found->chunk_alloc = 0; 2935 *space_info = found; 2936 list_add_rcu(&found->list, &info->space_info); 2937 atomic_set(&found->caching_threads, 0); 2938 return 0; 2939 } 2940 2941 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 2942 { 2943 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 2944 BTRFS_BLOCK_GROUP_RAID1 | 2945 BTRFS_BLOCK_GROUP_RAID10 | 2946 BTRFS_BLOCK_GROUP_DUP); 2947 if (extra_flags) { 2948 if (flags & BTRFS_BLOCK_GROUP_DATA) 2949 fs_info->avail_data_alloc_bits |= extra_flags; 2950 if (flags & BTRFS_BLOCK_GROUP_METADATA) 2951 fs_info->avail_metadata_alloc_bits |= extra_flags; 2952 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2953 fs_info->avail_system_alloc_bits |= extra_flags; 2954 } 2955 } 2956 2957 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2958 { 2959 /* 2960 * we add in the count of missing devices because we want 2961 * to make sure that any RAID levels on a degraded FS 2962 * continue to be honored. 2963 */ 2964 u64 num_devices = root->fs_info->fs_devices->rw_devices + 2965 root->fs_info->fs_devices->missing_devices; 2966 2967 if (num_devices == 1) 2968 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2969 if (num_devices < 4) 2970 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 2971 2972 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 2973 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2974 BTRFS_BLOCK_GROUP_RAID10))) { 2975 flags &= ~BTRFS_BLOCK_GROUP_DUP; 2976 } 2977 2978 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 2979 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 2980 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 2981 } 2982 2983 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 2984 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 2985 (flags & BTRFS_BLOCK_GROUP_RAID10) | 2986 (flags & BTRFS_BLOCK_GROUP_DUP))) 2987 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 2988 return flags; 2989 } 2990 2991 static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) 2992 { 2993 if (flags & BTRFS_BLOCK_GROUP_DATA) 2994 flags |= root->fs_info->avail_data_alloc_bits & 2995 root->fs_info->data_alloc_profile; 2996 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2997 flags |= root->fs_info->avail_system_alloc_bits & 2998 root->fs_info->system_alloc_profile; 2999 else if (flags & BTRFS_BLOCK_GROUP_METADATA) 3000 flags |= root->fs_info->avail_metadata_alloc_bits & 3001 root->fs_info->metadata_alloc_profile; 3002 return btrfs_reduce_alloc_profile(root, flags); 3003 } 3004 3005 u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) 3006 { 3007 u64 flags; 3008 3009 if (data) 3010 flags = BTRFS_BLOCK_GROUP_DATA; 3011 else if (root == root->fs_info->chunk_root) 3012 flags = BTRFS_BLOCK_GROUP_SYSTEM; 3013 else 3014 flags = BTRFS_BLOCK_GROUP_METADATA; 3015 3016 return get_alloc_profile(root, flags); 3017 } 3018 3019 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 3020 { 3021 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 3022 BTRFS_BLOCK_GROUP_DATA); 3023 } 3024 3025 /* 3026 * This will check the space that the inode allocates from to make sure we have 3027 * enough space for bytes. 3028 */ 3029 int btrfs_check_data_free_space(struct inode *inode, u64 bytes) 3030 { 3031 struct btrfs_space_info *data_sinfo; 3032 struct btrfs_root *root = BTRFS_I(inode)->root; 3033 u64 used; 3034 int ret = 0, committed = 0, alloc_chunk = 1; 3035 3036 /* make sure bytes are sectorsize aligned */ 3037 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3038 3039 if (root == root->fs_info->tree_root || 3040 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { 3041 alloc_chunk = 0; 3042 committed = 1; 3043 } 3044 3045 data_sinfo = BTRFS_I(inode)->space_info; 3046 if (!data_sinfo) 3047 goto alloc; 3048 3049 again: 3050 /* make sure we have enough space to handle the data first */ 3051 spin_lock(&data_sinfo->lock); 3052 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved + 3053 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly + 3054 data_sinfo->bytes_may_use; 3055 3056 if (used + bytes > data_sinfo->total_bytes) { 3057 struct btrfs_trans_handle *trans; 3058 3059 /* 3060 * if we don't have enough free bytes in this space then we need 3061 * to alloc a new chunk. 3062 */ 3063 if (!data_sinfo->full && alloc_chunk) { 3064 u64 alloc_target; 3065 3066 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; 3067 spin_unlock(&data_sinfo->lock); 3068 alloc: 3069 alloc_target = btrfs_get_alloc_profile(root, 1); 3070 trans = btrfs_join_transaction(root); 3071 if (IS_ERR(trans)) 3072 return PTR_ERR(trans); 3073 3074 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3075 bytes + 2 * 1024 * 1024, 3076 alloc_target, 3077 CHUNK_ALLOC_NO_FORCE); 3078 btrfs_end_transaction(trans, root); 3079 if (ret < 0) { 3080 if (ret != -ENOSPC) 3081 return ret; 3082 else 3083 goto commit_trans; 3084 } 3085 3086 if (!data_sinfo) { 3087 btrfs_set_inode_space_info(root, inode); 3088 data_sinfo = BTRFS_I(inode)->space_info; 3089 } 3090 goto again; 3091 } 3092 3093 /* 3094 * If we have less pinned bytes than we want to allocate then 3095 * don't bother committing the transaction, it won't help us. 3096 */ 3097 if (data_sinfo->bytes_pinned < bytes) 3098 committed = 1; 3099 spin_unlock(&data_sinfo->lock); 3100 3101 /* commit the current transaction and try again */ 3102 commit_trans: 3103 if (!committed && 3104 !atomic_read(&root->fs_info->open_ioctl_trans)) { 3105 committed = 1; 3106 trans = btrfs_join_transaction(root); 3107 if (IS_ERR(trans)) 3108 return PTR_ERR(trans); 3109 ret = btrfs_commit_transaction(trans, root); 3110 if (ret) 3111 return ret; 3112 goto again; 3113 } 3114 3115 return -ENOSPC; 3116 } 3117 data_sinfo->bytes_may_use += bytes; 3118 BTRFS_I(inode)->reserved_bytes += bytes; 3119 spin_unlock(&data_sinfo->lock); 3120 3121 return 0; 3122 } 3123 3124 /* 3125 * called when we are clearing an delalloc extent from the 3126 * inode's io_tree or there was an error for whatever reason 3127 * after calling btrfs_check_data_free_space 3128 */ 3129 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) 3130 { 3131 struct btrfs_root *root = BTRFS_I(inode)->root; 3132 struct btrfs_space_info *data_sinfo; 3133 3134 /* make sure bytes are sectorsize aligned */ 3135 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3136 3137 data_sinfo = BTRFS_I(inode)->space_info; 3138 spin_lock(&data_sinfo->lock); 3139 data_sinfo->bytes_may_use -= bytes; 3140 BTRFS_I(inode)->reserved_bytes -= bytes; 3141 spin_unlock(&data_sinfo->lock); 3142 } 3143 3144 static void force_metadata_allocation(struct btrfs_fs_info *info) 3145 { 3146 struct list_head *head = &info->space_info; 3147 struct btrfs_space_info *found; 3148 3149 rcu_read_lock(); 3150 list_for_each_entry_rcu(found, head, list) { 3151 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3152 found->force_alloc = CHUNK_ALLOC_FORCE; 3153 } 3154 rcu_read_unlock(); 3155 } 3156 3157 static int should_alloc_chunk(struct btrfs_root *root, 3158 struct btrfs_space_info *sinfo, u64 alloc_bytes, 3159 int force) 3160 { 3161 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3162 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; 3163 u64 thresh; 3164 3165 if (force == CHUNK_ALLOC_FORCE) 3166 return 1; 3167 3168 /* 3169 * in limited mode, we want to have some free space up to 3170 * about 1% of the FS size. 3171 */ 3172 if (force == CHUNK_ALLOC_LIMITED) { 3173 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3174 thresh = max_t(u64, 64 * 1024 * 1024, 3175 div_factor_fine(thresh, 1)); 3176 3177 if (num_bytes - num_allocated < thresh) 3178 return 1; 3179 } 3180 3181 /* 3182 * we have two similar checks here, one based on percentage 3183 * and once based on a hard number of 256MB. The idea 3184 * is that if we have a good amount of free 3185 * room, don't allocate a chunk. A good mount is 3186 * less than 80% utilized of the chunks we have allocated, 3187 * or more than 256MB free 3188 */ 3189 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3190 return 0; 3191 3192 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) 3193 return 0; 3194 3195 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3196 3197 /* 256MB or 5% of the FS */ 3198 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3199 3200 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3201 return 0; 3202 return 1; 3203 } 3204 3205 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 3206 struct btrfs_root *extent_root, u64 alloc_bytes, 3207 u64 flags, int force) 3208 { 3209 struct btrfs_space_info *space_info; 3210 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3211 int wait_for_alloc = 0; 3212 int ret = 0; 3213 3214 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3215 3216 space_info = __find_space_info(extent_root->fs_info, flags); 3217 if (!space_info) { 3218 ret = update_space_info(extent_root->fs_info, flags, 3219 0, 0, &space_info); 3220 BUG_ON(ret); 3221 } 3222 BUG_ON(!space_info); 3223 3224 again: 3225 spin_lock(&space_info->lock); 3226 if (space_info->force_alloc) 3227 force = space_info->force_alloc; 3228 if (space_info->full) { 3229 spin_unlock(&space_info->lock); 3230 return 0; 3231 } 3232 3233 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { 3234 spin_unlock(&space_info->lock); 3235 return 0; 3236 } else if (space_info->chunk_alloc) { 3237 wait_for_alloc = 1; 3238 } else { 3239 space_info->chunk_alloc = 1; 3240 } 3241 3242 spin_unlock(&space_info->lock); 3243 3244 mutex_lock(&fs_info->chunk_mutex); 3245 3246 /* 3247 * The chunk_mutex is held throughout the entirety of a chunk 3248 * allocation, so once we've acquired the chunk_mutex we know that the 3249 * other guy is done and we need to recheck and see if we should 3250 * allocate. 3251 */ 3252 if (wait_for_alloc) { 3253 mutex_unlock(&fs_info->chunk_mutex); 3254 wait_for_alloc = 0; 3255 goto again; 3256 } 3257 3258 /* 3259 * If we have mixed data/metadata chunks we want to make sure we keep 3260 * allocating mixed chunks instead of individual chunks. 3261 */ 3262 if (btrfs_mixed_space_info(space_info)) 3263 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); 3264 3265 /* 3266 * if we're doing a data chunk, go ahead and make sure that 3267 * we keep a reasonable number of metadata chunks allocated in the 3268 * FS as well. 3269 */ 3270 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { 3271 fs_info->data_chunk_allocations++; 3272 if (!(fs_info->data_chunk_allocations % 3273 fs_info->metadata_ratio)) 3274 force_metadata_allocation(fs_info); 3275 } 3276 3277 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3278 spin_lock(&space_info->lock); 3279 if (ret) 3280 space_info->full = 1; 3281 else 3282 ret = 1; 3283 3284 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; 3285 space_info->chunk_alloc = 0; 3286 spin_unlock(&space_info->lock); 3287 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3288 return ret; 3289 } 3290 3291 /* 3292 * shrink metadata reservation for delalloc 3293 */ 3294 static int shrink_delalloc(struct btrfs_trans_handle *trans, 3295 struct btrfs_root *root, u64 to_reclaim, int sync) 3296 { 3297 struct btrfs_block_rsv *block_rsv; 3298 struct btrfs_space_info *space_info; 3299 u64 reserved; 3300 u64 max_reclaim; 3301 u64 reclaimed = 0; 3302 long time_left; 3303 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3304 int loops = 0; 3305 unsigned long progress; 3306 3307 block_rsv = &root->fs_info->delalloc_block_rsv; 3308 space_info = block_rsv->space_info; 3309 3310 smp_mb(); 3311 reserved = space_info->bytes_reserved; 3312 progress = space_info->reservation_progress; 3313 3314 if (reserved == 0) 3315 return 0; 3316 3317 /* nothing to shrink - nothing to reclaim */ 3318 if (root->fs_info->delalloc_bytes == 0) 3319 return 0; 3320 3321 max_reclaim = min(reserved, to_reclaim); 3322 3323 while (loops < 1024) { 3324 /* have the flusher threads jump in and do some IO */ 3325 smp_mb(); 3326 nr_pages = min_t(unsigned long, nr_pages, 3327 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); 3328 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3329 3330 spin_lock(&space_info->lock); 3331 if (reserved > space_info->bytes_reserved) 3332 reclaimed += reserved - space_info->bytes_reserved; 3333 reserved = space_info->bytes_reserved; 3334 spin_unlock(&space_info->lock); 3335 3336 loops++; 3337 3338 if (reserved == 0 || reclaimed >= max_reclaim) 3339 break; 3340 3341 if (trans && trans->transaction->blocked) 3342 return -EAGAIN; 3343 3344 time_left = schedule_timeout_interruptible(1); 3345 3346 /* We were interrupted, exit */ 3347 if (time_left) 3348 break; 3349 3350 /* we've kicked the IO a few times, if anything has been freed, 3351 * exit. There is no sense in looping here for a long time 3352 * when we really need to commit the transaction, or there are 3353 * just too many writers without enough free space 3354 */ 3355 3356 if (loops > 3) { 3357 smp_mb(); 3358 if (progress != space_info->reservation_progress) 3359 break; 3360 } 3361 3362 } 3363 return reclaimed >= to_reclaim; 3364 } 3365 3366 /* 3367 * Retries tells us how many times we've called reserve_metadata_bytes. The 3368 * idea is if this is the first call (retries == 0) then we will add to our 3369 * reserved count if we can't make the allocation in order to hold our place 3370 * while we go and try and free up space. That way for retries > 1 we don't try 3371 * and add space, we just check to see if the amount of unused space is >= the 3372 * total space, meaning that our reservation is valid. 3373 * 3374 * However if we don't intend to retry this reservation, pass -1 as retries so 3375 * that it short circuits this logic. 3376 */ 3377 static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, 3378 struct btrfs_root *root, 3379 struct btrfs_block_rsv *block_rsv, 3380 u64 orig_bytes, int flush) 3381 { 3382 struct btrfs_space_info *space_info = block_rsv->space_info; 3383 u64 unused; 3384 u64 num_bytes = orig_bytes; 3385 int retries = 0; 3386 int ret = 0; 3387 bool reserved = false; 3388 bool committed = false; 3389 3390 again: 3391 ret = -ENOSPC; 3392 if (reserved) 3393 num_bytes = 0; 3394 3395 spin_lock(&space_info->lock); 3396 unused = space_info->bytes_used + space_info->bytes_reserved + 3397 space_info->bytes_pinned + space_info->bytes_readonly + 3398 space_info->bytes_may_use; 3399 3400 /* 3401 * The idea here is that we've not already over-reserved the block group 3402 * then we can go ahead and save our reservation first and then start 3403 * flushing if we need to. Otherwise if we've already overcommitted 3404 * lets start flushing stuff first and then come back and try to make 3405 * our reservation. 3406 */ 3407 if (unused <= space_info->total_bytes) { 3408 unused = space_info->total_bytes - unused; 3409 if (unused >= num_bytes) { 3410 if (!reserved) 3411 space_info->bytes_reserved += orig_bytes; 3412 ret = 0; 3413 } else { 3414 /* 3415 * Ok set num_bytes to orig_bytes since we aren't 3416 * overocmmitted, this way we only try and reclaim what 3417 * we need. 3418 */ 3419 num_bytes = orig_bytes; 3420 } 3421 } else { 3422 /* 3423 * Ok we're over committed, set num_bytes to the overcommitted 3424 * amount plus the amount of bytes that we need for this 3425 * reservation. 3426 */ 3427 num_bytes = unused - space_info->total_bytes + 3428 (orig_bytes * (retries + 1)); 3429 } 3430 3431 /* 3432 * Couldn't make our reservation, save our place so while we're trying 3433 * to reclaim space we can actually use it instead of somebody else 3434 * stealing it from us. 3435 */ 3436 if (ret && !reserved) { 3437 space_info->bytes_reserved += orig_bytes; 3438 reserved = true; 3439 } 3440 3441 spin_unlock(&space_info->lock); 3442 3443 if (!ret) 3444 return 0; 3445 3446 if (!flush) 3447 goto out; 3448 3449 /* 3450 * We do synchronous shrinking since we don't actually unreserve 3451 * metadata until after the IO is completed. 3452 */ 3453 ret = shrink_delalloc(trans, root, num_bytes, 1); 3454 if (ret > 0) 3455 return 0; 3456 else if (ret < 0) 3457 goto out; 3458 3459 /* 3460 * So if we were overcommitted it's possible that somebody else flushed 3461 * out enough space and we simply didn't have enough space to reclaim, 3462 * so go back around and try again. 3463 */ 3464 if (retries < 2) { 3465 retries++; 3466 goto again; 3467 } 3468 3469 spin_lock(&space_info->lock); 3470 /* 3471 * Not enough space to be reclaimed, don't bother committing the 3472 * transaction. 3473 */ 3474 if (space_info->bytes_pinned < orig_bytes) 3475 ret = -ENOSPC; 3476 spin_unlock(&space_info->lock); 3477 if (ret) 3478 goto out; 3479 3480 ret = -EAGAIN; 3481 if (trans || committed) 3482 goto out; 3483 3484 ret = -ENOSPC; 3485 trans = btrfs_join_transaction(root); 3486 if (IS_ERR(trans)) 3487 goto out; 3488 ret = btrfs_commit_transaction(trans, root); 3489 if (!ret) { 3490 trans = NULL; 3491 committed = true; 3492 goto again; 3493 } 3494 3495 out: 3496 if (reserved) { 3497 spin_lock(&space_info->lock); 3498 space_info->bytes_reserved -= orig_bytes; 3499 spin_unlock(&space_info->lock); 3500 } 3501 3502 return ret; 3503 } 3504 3505 static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, 3506 struct btrfs_root *root) 3507 { 3508 struct btrfs_block_rsv *block_rsv; 3509 if (root->ref_cows) 3510 block_rsv = trans->block_rsv; 3511 else 3512 block_rsv = root->block_rsv; 3513 3514 if (!block_rsv) 3515 block_rsv = &root->fs_info->empty_block_rsv; 3516 3517 return block_rsv; 3518 } 3519 3520 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 3521 u64 num_bytes) 3522 { 3523 int ret = -ENOSPC; 3524 spin_lock(&block_rsv->lock); 3525 if (block_rsv->reserved >= num_bytes) { 3526 block_rsv->reserved -= num_bytes; 3527 if (block_rsv->reserved < block_rsv->size) 3528 block_rsv->full = 0; 3529 ret = 0; 3530 } 3531 spin_unlock(&block_rsv->lock); 3532 return ret; 3533 } 3534 3535 static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, 3536 u64 num_bytes, int update_size) 3537 { 3538 spin_lock(&block_rsv->lock); 3539 block_rsv->reserved += num_bytes; 3540 if (update_size) 3541 block_rsv->size += num_bytes; 3542 else if (block_rsv->reserved >= block_rsv->size) 3543 block_rsv->full = 1; 3544 spin_unlock(&block_rsv->lock); 3545 } 3546 3547 static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, 3548 struct btrfs_block_rsv *dest, u64 num_bytes) 3549 { 3550 struct btrfs_space_info *space_info = block_rsv->space_info; 3551 3552 spin_lock(&block_rsv->lock); 3553 if (num_bytes == (u64)-1) 3554 num_bytes = block_rsv->size; 3555 block_rsv->size -= num_bytes; 3556 if (block_rsv->reserved >= block_rsv->size) { 3557 num_bytes = block_rsv->reserved - block_rsv->size; 3558 block_rsv->reserved = block_rsv->size; 3559 block_rsv->full = 1; 3560 } else { 3561 num_bytes = 0; 3562 } 3563 spin_unlock(&block_rsv->lock); 3564 3565 if (num_bytes > 0) { 3566 if (dest) { 3567 spin_lock(&dest->lock); 3568 if (!dest->full) { 3569 u64 bytes_to_add; 3570 3571 bytes_to_add = dest->size - dest->reserved; 3572 bytes_to_add = min(num_bytes, bytes_to_add); 3573 dest->reserved += bytes_to_add; 3574 if (dest->reserved >= dest->size) 3575 dest->full = 1; 3576 num_bytes -= bytes_to_add; 3577 } 3578 spin_unlock(&dest->lock); 3579 } 3580 if (num_bytes) { 3581 spin_lock(&space_info->lock); 3582 space_info->bytes_reserved -= num_bytes; 3583 space_info->reservation_progress++; 3584 spin_unlock(&space_info->lock); 3585 } 3586 } 3587 } 3588 3589 static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, 3590 struct btrfs_block_rsv *dst, u64 num_bytes) 3591 { 3592 int ret; 3593 3594 ret = block_rsv_use_bytes(src, num_bytes); 3595 if (ret) 3596 return ret; 3597 3598 block_rsv_add_bytes(dst, num_bytes, 1); 3599 return 0; 3600 } 3601 3602 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) 3603 { 3604 memset(rsv, 0, sizeof(*rsv)); 3605 spin_lock_init(&rsv->lock); 3606 atomic_set(&rsv->usage, 1); 3607 rsv->priority = 6; 3608 INIT_LIST_HEAD(&rsv->list); 3609 } 3610 3611 struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) 3612 { 3613 struct btrfs_block_rsv *block_rsv; 3614 struct btrfs_fs_info *fs_info = root->fs_info; 3615 3616 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); 3617 if (!block_rsv) 3618 return NULL; 3619 3620 btrfs_init_block_rsv(block_rsv); 3621 block_rsv->space_info = __find_space_info(fs_info, 3622 BTRFS_BLOCK_GROUP_METADATA); 3623 return block_rsv; 3624 } 3625 3626 void btrfs_free_block_rsv(struct btrfs_root *root, 3627 struct btrfs_block_rsv *rsv) 3628 { 3629 if (rsv && atomic_dec_and_test(&rsv->usage)) { 3630 btrfs_block_rsv_release(root, rsv, (u64)-1); 3631 if (!rsv->durable) 3632 kfree(rsv); 3633 } 3634 } 3635 3636 /* 3637 * make the block_rsv struct be able to capture freed space. 3638 * the captured space will re-add to the the block_rsv struct 3639 * after transaction commit 3640 */ 3641 void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, 3642 struct btrfs_block_rsv *block_rsv) 3643 { 3644 block_rsv->durable = 1; 3645 mutex_lock(&fs_info->durable_block_rsv_mutex); 3646 list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); 3647 mutex_unlock(&fs_info->durable_block_rsv_mutex); 3648 } 3649 3650 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3651 struct btrfs_root *root, 3652 struct btrfs_block_rsv *block_rsv, 3653 u64 num_bytes) 3654 { 3655 int ret; 3656 3657 if (num_bytes == 0) 3658 return 0; 3659 3660 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); 3661 if (!ret) { 3662 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3663 return 0; 3664 } 3665 3666 return ret; 3667 } 3668 3669 int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, 3670 struct btrfs_root *root, 3671 struct btrfs_block_rsv *block_rsv, 3672 u64 min_reserved, int min_factor) 3673 { 3674 u64 num_bytes = 0; 3675 int commit_trans = 0; 3676 int ret = -ENOSPC; 3677 3678 if (!block_rsv) 3679 return 0; 3680 3681 spin_lock(&block_rsv->lock); 3682 if (min_factor > 0) 3683 num_bytes = div_factor(block_rsv->size, min_factor); 3684 if (min_reserved > num_bytes) 3685 num_bytes = min_reserved; 3686 3687 if (block_rsv->reserved >= num_bytes) { 3688 ret = 0; 3689 } else { 3690 num_bytes -= block_rsv->reserved; 3691 if (block_rsv->durable && 3692 block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) 3693 commit_trans = 1; 3694 } 3695 spin_unlock(&block_rsv->lock); 3696 if (!ret) 3697 return 0; 3698 3699 if (block_rsv->refill_used) { 3700 ret = reserve_metadata_bytes(trans, root, block_rsv, 3701 num_bytes, 0); 3702 if (!ret) { 3703 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3704 return 0; 3705 } 3706 } 3707 3708 if (commit_trans) { 3709 if (trans) 3710 return -EAGAIN; 3711 3712 trans = btrfs_join_transaction(root); 3713 BUG_ON(IS_ERR(trans)); 3714 ret = btrfs_commit_transaction(trans, root); 3715 return 0; 3716 } 3717 3718 return -ENOSPC; 3719 } 3720 3721 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, 3722 struct btrfs_block_rsv *dst_rsv, 3723 u64 num_bytes) 3724 { 3725 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3726 } 3727 3728 void btrfs_block_rsv_release(struct btrfs_root *root, 3729 struct btrfs_block_rsv *block_rsv, 3730 u64 num_bytes) 3731 { 3732 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 3733 if (global_rsv->full || global_rsv == block_rsv || 3734 block_rsv->space_info != global_rsv->space_info) 3735 global_rsv = NULL; 3736 block_rsv_release_bytes(block_rsv, global_rsv, num_bytes); 3737 } 3738 3739 /* 3740 * helper to calculate size of global block reservation. 3741 * the desired value is sum of space used by extent tree, 3742 * checksum tree and root tree 3743 */ 3744 static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) 3745 { 3746 struct btrfs_space_info *sinfo; 3747 u64 num_bytes; 3748 u64 meta_used; 3749 u64 data_used; 3750 int csum_size = btrfs_super_csum_size(&fs_info->super_copy); 3751 3752 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 3753 spin_lock(&sinfo->lock); 3754 data_used = sinfo->bytes_used; 3755 spin_unlock(&sinfo->lock); 3756 3757 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3758 spin_lock(&sinfo->lock); 3759 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) 3760 data_used = 0; 3761 meta_used = sinfo->bytes_used; 3762 spin_unlock(&sinfo->lock); 3763 3764 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * 3765 csum_size * 2; 3766 num_bytes += div64_u64(data_used + meta_used, 50); 3767 3768 if (num_bytes * 3 > meta_used) 3769 num_bytes = div64_u64(meta_used, 3); 3770 3771 return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); 3772 } 3773 3774 static void update_global_block_rsv(struct btrfs_fs_info *fs_info) 3775 { 3776 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; 3777 struct btrfs_space_info *sinfo = block_rsv->space_info; 3778 u64 num_bytes; 3779 3780 num_bytes = calc_global_metadata_size(fs_info); 3781 3782 spin_lock(&block_rsv->lock); 3783 spin_lock(&sinfo->lock); 3784 3785 block_rsv->size = num_bytes; 3786 3787 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 3788 sinfo->bytes_reserved + sinfo->bytes_readonly + 3789 sinfo->bytes_may_use; 3790 3791 if (sinfo->total_bytes > num_bytes) { 3792 num_bytes = sinfo->total_bytes - num_bytes; 3793 block_rsv->reserved += num_bytes; 3794 sinfo->bytes_reserved += num_bytes; 3795 } 3796 3797 if (block_rsv->reserved >= block_rsv->size) { 3798 num_bytes = block_rsv->reserved - block_rsv->size; 3799 sinfo->bytes_reserved -= num_bytes; 3800 sinfo->reservation_progress++; 3801 block_rsv->reserved = block_rsv->size; 3802 block_rsv->full = 1; 3803 } 3804 3805 spin_unlock(&sinfo->lock); 3806 spin_unlock(&block_rsv->lock); 3807 } 3808 3809 static void init_global_block_rsv(struct btrfs_fs_info *fs_info) 3810 { 3811 struct btrfs_space_info *space_info; 3812 3813 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); 3814 fs_info->chunk_block_rsv.space_info = space_info; 3815 fs_info->chunk_block_rsv.priority = 10; 3816 3817 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3818 fs_info->global_block_rsv.space_info = space_info; 3819 fs_info->global_block_rsv.priority = 10; 3820 fs_info->global_block_rsv.refill_used = 1; 3821 fs_info->delalloc_block_rsv.space_info = space_info; 3822 fs_info->trans_block_rsv.space_info = space_info; 3823 fs_info->empty_block_rsv.space_info = space_info; 3824 fs_info->empty_block_rsv.priority = 10; 3825 3826 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; 3827 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; 3828 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; 3829 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; 3830 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; 3831 3832 btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); 3833 3834 btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); 3835 3836 update_global_block_rsv(fs_info); 3837 } 3838 3839 static void release_global_block_rsv(struct btrfs_fs_info *fs_info) 3840 { 3841 block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1); 3842 WARN_ON(fs_info->delalloc_block_rsv.size > 0); 3843 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); 3844 WARN_ON(fs_info->trans_block_rsv.size > 0); 3845 WARN_ON(fs_info->trans_block_rsv.reserved > 0); 3846 WARN_ON(fs_info->chunk_block_rsv.size > 0); 3847 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3848 } 3849 3850 int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, 3851 struct btrfs_root *root, 3852 struct btrfs_block_rsv *rsv) 3853 { 3854 struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; 3855 u64 num_bytes; 3856 int ret; 3857 3858 /* 3859 * Truncate should be freeing data, but give us 2 items just in case it 3860 * needs to use some space. We may want to be smarter about this in the 3861 * future. 3862 */ 3863 num_bytes = btrfs_calc_trans_metadata_size(root, 2); 3864 3865 /* We already have enough bytes, just return */ 3866 if (rsv->reserved >= num_bytes) 3867 return 0; 3868 3869 num_bytes -= rsv->reserved; 3870 3871 /* 3872 * You should have reserved enough space before hand to do this, so this 3873 * should not fail. 3874 */ 3875 ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); 3876 BUG_ON(ret); 3877 3878 return 0; 3879 } 3880 3881 int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, 3882 struct btrfs_root *root, 3883 int num_items) 3884 { 3885 u64 num_bytes; 3886 int ret; 3887 3888 if (num_items == 0 || root->fs_info->chunk_root == root) 3889 return 0; 3890 3891 num_bytes = btrfs_calc_trans_metadata_size(root, num_items); 3892 ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, 3893 num_bytes); 3894 if (!ret) { 3895 trans->bytes_reserved += num_bytes; 3896 trans->block_rsv = &root->fs_info->trans_block_rsv; 3897 } 3898 return ret; 3899 } 3900 3901 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 3902 struct btrfs_root *root) 3903 { 3904 if (!trans->bytes_reserved) 3905 return; 3906 3907 BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); 3908 btrfs_block_rsv_release(root, trans->block_rsv, 3909 trans->bytes_reserved); 3910 trans->bytes_reserved = 0; 3911 } 3912 3913 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, 3914 struct inode *inode) 3915 { 3916 struct btrfs_root *root = BTRFS_I(inode)->root; 3917 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); 3918 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; 3919 3920 /* 3921 * We need to hold space in order to delete our orphan item once we've 3922 * added it, so this takes the reservation so we can release it later 3923 * when we are truly done with the orphan item. 3924 */ 3925 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 3926 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3927 } 3928 3929 void btrfs_orphan_release_metadata(struct inode *inode) 3930 { 3931 struct btrfs_root *root = BTRFS_I(inode)->root; 3932 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 3933 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3934 } 3935 3936 int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, 3937 struct btrfs_pending_snapshot *pending) 3938 { 3939 struct btrfs_root *root = pending->root; 3940 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); 3941 struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; 3942 /* 3943 * two for root back/forward refs, two for directory entries 3944 * and one for root of the snapshot. 3945 */ 3946 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); 3947 dst_rsv->space_info = src_rsv->space_info; 3948 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3949 } 3950 3951 static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) 3952 { 3953 return num_bytes >>= 3; 3954 } 3955 3956 int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) 3957 { 3958 struct btrfs_root *root = BTRFS_I(inode)->root; 3959 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3960 u64 to_reserve; 3961 int nr_extents; 3962 int reserved_extents; 3963 int ret; 3964 3965 if (btrfs_transaction_in_commit(root->fs_info)) 3966 schedule_timeout(1); 3967 3968 num_bytes = ALIGN(num_bytes, root->sectorsize); 3969 3970 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; 3971 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); 3972 3973 if (nr_extents > reserved_extents) { 3974 nr_extents -= reserved_extents; 3975 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 3976 } else { 3977 nr_extents = 0; 3978 to_reserve = 0; 3979 } 3980 3981 to_reserve += calc_csum_metadata_size(inode, num_bytes); 3982 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 3983 if (ret) 3984 return ret; 3985 3986 atomic_add(nr_extents, &BTRFS_I(inode)->reserved_extents); 3987 atomic_inc(&BTRFS_I(inode)->outstanding_extents); 3988 3989 block_rsv_add_bytes(block_rsv, to_reserve, 1); 3990 3991 if (block_rsv->size > 512 * 1024 * 1024) 3992 shrink_delalloc(NULL, root, to_reserve, 0); 3993 3994 return 0; 3995 } 3996 3997 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) 3998 { 3999 struct btrfs_root *root = BTRFS_I(inode)->root; 4000 u64 to_free; 4001 int nr_extents; 4002 int reserved_extents; 4003 4004 num_bytes = ALIGN(num_bytes, root->sectorsize); 4005 atomic_dec(&BTRFS_I(inode)->outstanding_extents); 4006 WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents) < 0); 4007 4008 reserved_extents = atomic_read(&BTRFS_I(inode)->reserved_extents); 4009 do { 4010 int old, new; 4011 4012 nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); 4013 if (nr_extents >= reserved_extents) { 4014 nr_extents = 0; 4015 break; 4016 } 4017 old = reserved_extents; 4018 nr_extents = reserved_extents - nr_extents; 4019 new = reserved_extents - nr_extents; 4020 old = atomic_cmpxchg(&BTRFS_I(inode)->reserved_extents, 4021 reserved_extents, new); 4022 if (likely(old == reserved_extents)) 4023 break; 4024 reserved_extents = old; 4025 } while (1); 4026 4027 to_free = calc_csum_metadata_size(inode, num_bytes); 4028 if (nr_extents > 0) 4029 to_free += btrfs_calc_trans_metadata_size(root, nr_extents); 4030 4031 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4032 to_free); 4033 } 4034 4035 int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) 4036 { 4037 int ret; 4038 4039 ret = btrfs_check_data_free_space(inode, num_bytes); 4040 if (ret) 4041 return ret; 4042 4043 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes); 4044 if (ret) { 4045 btrfs_free_reserved_data_space(inode, num_bytes); 4046 return ret; 4047 } 4048 4049 return 0; 4050 } 4051 4052 void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) 4053 { 4054 btrfs_delalloc_release_metadata(inode, num_bytes); 4055 btrfs_free_reserved_data_space(inode, num_bytes); 4056 } 4057 4058 static int update_block_group(struct btrfs_trans_handle *trans, 4059 struct btrfs_root *root, 4060 u64 bytenr, u64 num_bytes, int alloc) 4061 { 4062 struct btrfs_block_group_cache *cache = NULL; 4063 struct btrfs_fs_info *info = root->fs_info; 4064 u64 total = num_bytes; 4065 u64 old_val; 4066 u64 byte_in_group; 4067 int factor; 4068 4069 /* block accounting for super block */ 4070 spin_lock(&info->delalloc_lock); 4071 old_val = btrfs_super_bytes_used(&info->super_copy); 4072 if (alloc) 4073 old_val += num_bytes; 4074 else 4075 old_val -= num_bytes; 4076 btrfs_set_super_bytes_used(&info->super_copy, old_val); 4077 spin_unlock(&info->delalloc_lock); 4078 4079 while (total) { 4080 cache = btrfs_lookup_block_group(info, bytenr); 4081 if (!cache) 4082 return -1; 4083 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | 4084 BTRFS_BLOCK_GROUP_RAID1 | 4085 BTRFS_BLOCK_GROUP_RAID10)) 4086 factor = 2; 4087 else 4088 factor = 1; 4089 /* 4090 * If this block group has free space cache written out, we 4091 * need to make sure to load it if we are removing space. This 4092 * is because we need the unpinning stage to actually add the 4093 * space back to the block group, otherwise we will leak space. 4094 */ 4095 if (!alloc && cache->cached == BTRFS_CACHE_NO) 4096 cache_block_group(cache, trans, NULL, 1); 4097 4098 byte_in_group = bytenr - cache->key.objectid; 4099 WARN_ON(byte_in_group > cache->key.offset); 4100 4101 spin_lock(&cache->space_info->lock); 4102 spin_lock(&cache->lock); 4103 4104 if (btrfs_super_cache_generation(&info->super_copy) != 0 && 4105 cache->disk_cache_state < BTRFS_DC_CLEAR) 4106 cache->disk_cache_state = BTRFS_DC_CLEAR; 4107 4108 cache->dirty = 1; 4109 old_val = btrfs_block_group_used(&cache->item); 4110 num_bytes = min(total, cache->key.offset - byte_in_group); 4111 if (alloc) { 4112 old_val += num_bytes; 4113 btrfs_set_block_group_used(&cache->item, old_val); 4114 cache->reserved -= num_bytes; 4115 cache->space_info->bytes_reserved -= num_bytes; 4116 cache->space_info->reservation_progress++; 4117 cache->space_info->bytes_used += num_bytes; 4118 cache->space_info->disk_used += num_bytes * factor; 4119 spin_unlock(&cache->lock); 4120 spin_unlock(&cache->space_info->lock); 4121 } else { 4122 old_val -= num_bytes; 4123 btrfs_set_block_group_used(&cache->item, old_val); 4124 cache->pinned += num_bytes; 4125 cache->space_info->bytes_pinned += num_bytes; 4126 cache->space_info->bytes_used -= num_bytes; 4127 cache->space_info->disk_used -= num_bytes * factor; 4128 spin_unlock(&cache->lock); 4129 spin_unlock(&cache->space_info->lock); 4130 4131 set_extent_dirty(info->pinned_extents, 4132 bytenr, bytenr + num_bytes - 1, 4133 GFP_NOFS | __GFP_NOFAIL); 4134 } 4135 btrfs_put_block_group(cache); 4136 total -= num_bytes; 4137 bytenr += num_bytes; 4138 } 4139 return 0; 4140 } 4141 4142 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 4143 { 4144 struct btrfs_block_group_cache *cache; 4145 u64 bytenr; 4146 4147 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 4148 if (!cache) 4149 return 0; 4150 4151 bytenr = cache->key.objectid; 4152 btrfs_put_block_group(cache); 4153 4154 return bytenr; 4155 } 4156 4157 static int pin_down_extent(struct btrfs_root *root, 4158 struct btrfs_block_group_cache *cache, 4159 u64 bytenr, u64 num_bytes, int reserved) 4160 { 4161 spin_lock(&cache->space_info->lock); 4162 spin_lock(&cache->lock); 4163 cache->pinned += num_bytes; 4164 cache->space_info->bytes_pinned += num_bytes; 4165 if (reserved) { 4166 cache->reserved -= num_bytes; 4167 cache->space_info->bytes_reserved -= num_bytes; 4168 cache->space_info->reservation_progress++; 4169 } 4170 spin_unlock(&cache->lock); 4171 spin_unlock(&cache->space_info->lock); 4172 4173 set_extent_dirty(root->fs_info->pinned_extents, bytenr, 4174 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); 4175 return 0; 4176 } 4177 4178 /* 4179 * this function must be called within transaction 4180 */ 4181 int btrfs_pin_extent(struct btrfs_root *root, 4182 u64 bytenr, u64 num_bytes, int reserved) 4183 { 4184 struct btrfs_block_group_cache *cache; 4185 4186 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 4187 BUG_ON(!cache); 4188 4189 pin_down_extent(root, cache, bytenr, num_bytes, reserved); 4190 4191 btrfs_put_block_group(cache); 4192 return 0; 4193 } 4194 4195 /* 4196 * update size of reserved extents. this function may return -EAGAIN 4197 * if 'reserve' is true or 'sinfo' is false. 4198 */ 4199 int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 4200 u64 num_bytes, int reserve, int sinfo) 4201 { 4202 int ret = 0; 4203 if (sinfo) { 4204 struct btrfs_space_info *space_info = cache->space_info; 4205 spin_lock(&space_info->lock); 4206 spin_lock(&cache->lock); 4207 if (reserve) { 4208 if (cache->ro) { 4209 ret = -EAGAIN; 4210 } else { 4211 cache->reserved += num_bytes; 4212 space_info->bytes_reserved += num_bytes; 4213 } 4214 } else { 4215 if (cache->ro) 4216 space_info->bytes_readonly += num_bytes; 4217 cache->reserved -= num_bytes; 4218 space_info->bytes_reserved -= num_bytes; 4219 space_info->reservation_progress++; 4220 } 4221 spin_unlock(&cache->lock); 4222 spin_unlock(&space_info->lock); 4223 } else { 4224 spin_lock(&cache->lock); 4225 if (cache->ro) { 4226 ret = -EAGAIN; 4227 } else { 4228 if (reserve) 4229 cache->reserved += num_bytes; 4230 else 4231 cache->reserved -= num_bytes; 4232 } 4233 spin_unlock(&cache->lock); 4234 } 4235 return ret; 4236 } 4237 4238 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 4239 struct btrfs_root *root) 4240 { 4241 struct btrfs_fs_info *fs_info = root->fs_info; 4242 struct btrfs_caching_control *next; 4243 struct btrfs_caching_control *caching_ctl; 4244 struct btrfs_block_group_cache *cache; 4245 4246 down_write(&fs_info->extent_commit_sem); 4247 4248 list_for_each_entry_safe(caching_ctl, next, 4249 &fs_info->caching_block_groups, list) { 4250 cache = caching_ctl->block_group; 4251 if (block_group_cache_done(cache)) { 4252 cache->last_byte_to_unpin = (u64)-1; 4253 list_del_init(&caching_ctl->list); 4254 put_caching_control(caching_ctl); 4255 } else { 4256 cache->last_byte_to_unpin = caching_ctl->progress; 4257 } 4258 } 4259 4260 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4261 fs_info->pinned_extents = &fs_info->freed_extents[1]; 4262 else 4263 fs_info->pinned_extents = &fs_info->freed_extents[0]; 4264 4265 up_write(&fs_info->extent_commit_sem); 4266 4267 update_global_block_rsv(fs_info); 4268 return 0; 4269 } 4270 4271 static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 4272 { 4273 struct btrfs_fs_info *fs_info = root->fs_info; 4274 struct btrfs_block_group_cache *cache = NULL; 4275 u64 len; 4276 4277 while (start <= end) { 4278 if (!cache || 4279 start >= cache->key.objectid + cache->key.offset) { 4280 if (cache) 4281 btrfs_put_block_group(cache); 4282 cache = btrfs_lookup_block_group(fs_info, start); 4283 BUG_ON(!cache); 4284 } 4285 4286 len = cache->key.objectid + cache->key.offset - start; 4287 len = min(len, end + 1 - start); 4288 4289 if (start < cache->last_byte_to_unpin) { 4290 len = min(len, cache->last_byte_to_unpin - start); 4291 btrfs_add_free_space(cache, start, len); 4292 } 4293 4294 start += len; 4295 4296 spin_lock(&cache->space_info->lock); 4297 spin_lock(&cache->lock); 4298 cache->pinned -= len; 4299 cache->space_info->bytes_pinned -= len; 4300 if (cache->ro) { 4301 cache->space_info->bytes_readonly += len; 4302 } else if (cache->reserved_pinned > 0) { 4303 len = min(len, cache->reserved_pinned); 4304 cache->reserved_pinned -= len; 4305 cache->space_info->bytes_reserved += len; 4306 } 4307 spin_unlock(&cache->lock); 4308 spin_unlock(&cache->space_info->lock); 4309 } 4310 4311 if (cache) 4312 btrfs_put_block_group(cache); 4313 return 0; 4314 } 4315 4316 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 4317 struct btrfs_root *root) 4318 { 4319 struct btrfs_fs_info *fs_info = root->fs_info; 4320 struct extent_io_tree *unpin; 4321 struct btrfs_block_rsv *block_rsv; 4322 struct btrfs_block_rsv *next_rsv; 4323 u64 start; 4324 u64 end; 4325 int idx; 4326 int ret; 4327 4328 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4329 unpin = &fs_info->freed_extents[1]; 4330 else 4331 unpin = &fs_info->freed_extents[0]; 4332 4333 while (1) { 4334 ret = find_first_extent_bit(unpin, 0, &start, &end, 4335 EXTENT_DIRTY); 4336 if (ret) 4337 break; 4338 4339 if (btrfs_test_opt(root, DISCARD)) 4340 ret = btrfs_discard_extent(root, start, 4341 end + 1 - start, NULL); 4342 4343 clear_extent_dirty(unpin, start, end, GFP_NOFS); 4344 unpin_extent_range(root, start, end); 4345 cond_resched(); 4346 } 4347 4348 mutex_lock(&fs_info->durable_block_rsv_mutex); 4349 list_for_each_entry_safe(block_rsv, next_rsv, 4350 &fs_info->durable_block_rsv_list, list) { 4351 4352 idx = trans->transid & 0x1; 4353 if (block_rsv->freed[idx] > 0) { 4354 block_rsv_add_bytes(block_rsv, 4355 block_rsv->freed[idx], 0); 4356 block_rsv->freed[idx] = 0; 4357 } 4358 if (atomic_read(&block_rsv->usage) == 0) { 4359 btrfs_block_rsv_release(root, block_rsv, (u64)-1); 4360 4361 if (block_rsv->freed[0] == 0 && 4362 block_rsv->freed[1] == 0) { 4363 list_del_init(&block_rsv->list); 4364 kfree(block_rsv); 4365 } 4366 } else { 4367 btrfs_block_rsv_release(root, block_rsv, 0); 4368 } 4369 } 4370 mutex_unlock(&fs_info->durable_block_rsv_mutex); 4371 4372 return 0; 4373 } 4374 4375 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 4376 struct btrfs_root *root, 4377 u64 bytenr, u64 num_bytes, u64 parent, 4378 u64 root_objectid, u64 owner_objectid, 4379 u64 owner_offset, int refs_to_drop, 4380 struct btrfs_delayed_extent_op *extent_op) 4381 { 4382 struct btrfs_key key; 4383 struct btrfs_path *path; 4384 struct btrfs_fs_info *info = root->fs_info; 4385 struct btrfs_root *extent_root = info->extent_root; 4386 struct extent_buffer *leaf; 4387 struct btrfs_extent_item *ei; 4388 struct btrfs_extent_inline_ref *iref; 4389 int ret; 4390 int is_data; 4391 int extent_slot = 0; 4392 int found_extent = 0; 4393 int num_to_del = 1; 4394 u32 item_size; 4395 u64 refs; 4396 4397 path = btrfs_alloc_path(); 4398 if (!path) 4399 return -ENOMEM; 4400 4401 path->reada = 1; 4402 path->leave_spinning = 1; 4403 4404 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 4405 BUG_ON(!is_data && refs_to_drop != 1); 4406 4407 ret = lookup_extent_backref(trans, extent_root, path, &iref, 4408 bytenr, num_bytes, parent, 4409 root_objectid, owner_objectid, 4410 owner_offset); 4411 if (ret == 0) { 4412 extent_slot = path->slots[0]; 4413 while (extent_slot >= 0) { 4414 btrfs_item_key_to_cpu(path->nodes[0], &key, 4415 extent_slot); 4416 if (key.objectid != bytenr) 4417 break; 4418 if (key.type == BTRFS_EXTENT_ITEM_KEY && 4419 key.offset == num_bytes) { 4420 found_extent = 1; 4421 break; 4422 } 4423 if (path->slots[0] - extent_slot > 5) 4424 break; 4425 extent_slot--; 4426 } 4427 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 4428 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 4429 if (found_extent && item_size < sizeof(*ei)) 4430 found_extent = 0; 4431 #endif 4432 if (!found_extent) { 4433 BUG_ON(iref); 4434 ret = remove_extent_backref(trans, extent_root, path, 4435 NULL, refs_to_drop, 4436 is_data); 4437 BUG_ON(ret); 4438 btrfs_release_path(path); 4439 path->leave_spinning = 1; 4440 4441 key.objectid = bytenr; 4442 key.type = BTRFS_EXTENT_ITEM_KEY; 4443 key.offset = num_bytes; 4444 4445 ret = btrfs_search_slot(trans, extent_root, 4446 &key, path, -1, 1); 4447 if (ret) { 4448 printk(KERN_ERR "umm, got %d back from search" 4449 ", was looking for %llu\n", ret, 4450 (unsigned long long)bytenr); 4451 btrfs_print_leaf(extent_root, path->nodes[0]); 4452 } 4453 BUG_ON(ret); 4454 extent_slot = path->slots[0]; 4455 } 4456 } else { 4457 btrfs_print_leaf(extent_root, path->nodes[0]); 4458 WARN_ON(1); 4459 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 4460 "parent %llu root %llu owner %llu offset %llu\n", 4461 (unsigned long long)bytenr, 4462 (unsigned long long)parent, 4463 (unsigned long long)root_objectid, 4464 (unsigned long long)owner_objectid, 4465 (unsigned long long)owner_offset); 4466 } 4467 4468 leaf = path->nodes[0]; 4469 item_size = btrfs_item_size_nr(leaf, extent_slot); 4470 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 4471 if (item_size < sizeof(*ei)) { 4472 BUG_ON(found_extent || extent_slot != path->slots[0]); 4473 ret = convert_extent_item_v0(trans, extent_root, path, 4474 owner_objectid, 0); 4475 BUG_ON(ret < 0); 4476 4477 btrfs_release_path(path); 4478 path->leave_spinning = 1; 4479 4480 key.objectid = bytenr; 4481 key.type = BTRFS_EXTENT_ITEM_KEY; 4482 key.offset = num_bytes; 4483 4484 ret = btrfs_search_slot(trans, extent_root, &key, path, 4485 -1, 1); 4486 if (ret) { 4487 printk(KERN_ERR "umm, got %d back from search" 4488 ", was looking for %llu\n", ret, 4489 (unsigned long long)bytenr); 4490 btrfs_print_leaf(extent_root, path->nodes[0]); 4491 } 4492 BUG_ON(ret); 4493 extent_slot = path->slots[0]; 4494 leaf = path->nodes[0]; 4495 item_size = btrfs_item_size_nr(leaf, extent_slot); 4496 } 4497 #endif 4498 BUG_ON(item_size < sizeof(*ei)); 4499 ei = btrfs_item_ptr(leaf, extent_slot, 4500 struct btrfs_extent_item); 4501 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 4502 struct btrfs_tree_block_info *bi; 4503 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 4504 bi = (struct btrfs_tree_block_info *)(ei + 1); 4505 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 4506 } 4507 4508 refs = btrfs_extent_refs(leaf, ei); 4509 BUG_ON(refs < refs_to_drop); 4510 refs -= refs_to_drop; 4511 4512 if (refs > 0) { 4513 if (extent_op) 4514 __run_delayed_extent_op(extent_op, leaf, ei); 4515 /* 4516 * In the case of inline back ref, reference count will 4517 * be updated by remove_extent_backref 4518 */ 4519 if (iref) { 4520 BUG_ON(!found_extent); 4521 } else { 4522 btrfs_set_extent_refs(leaf, ei, refs); 4523 btrfs_mark_buffer_dirty(leaf); 4524 } 4525 if (found_extent) { 4526 ret = remove_extent_backref(trans, extent_root, path, 4527 iref, refs_to_drop, 4528 is_data); 4529 BUG_ON(ret); 4530 } 4531 } else { 4532 if (found_extent) { 4533 BUG_ON(is_data && refs_to_drop != 4534 extent_data_ref_count(root, path, iref)); 4535 if (iref) { 4536 BUG_ON(path->slots[0] != extent_slot); 4537 } else { 4538 BUG_ON(path->slots[0] != extent_slot + 1); 4539 path->slots[0] = extent_slot; 4540 num_to_del = 2; 4541 } 4542 } 4543 4544 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4545 num_to_del); 4546 BUG_ON(ret); 4547 btrfs_release_path(path); 4548 4549 if (is_data) { 4550 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4551 BUG_ON(ret); 4552 } else { 4553 invalidate_mapping_pages(info->btree_inode->i_mapping, 4554 bytenr >> PAGE_CACHE_SHIFT, 4555 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 4556 } 4557 4558 ret = update_block_group(trans, root, bytenr, num_bytes, 0); 4559 BUG_ON(ret); 4560 } 4561 btrfs_free_path(path); 4562 return ret; 4563 } 4564 4565 /* 4566 * when we free an block, it is possible (and likely) that we free the last 4567 * delayed ref for that extent as well. This searches the delayed ref tree for 4568 * a given extent, and if there are no other delayed refs to be processed, it 4569 * removes it from the tree. 4570 */ 4571 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 4572 struct btrfs_root *root, u64 bytenr) 4573 { 4574 struct btrfs_delayed_ref_head *head; 4575 struct btrfs_delayed_ref_root *delayed_refs; 4576 struct btrfs_delayed_ref_node *ref; 4577 struct rb_node *node; 4578 int ret = 0; 4579 4580 delayed_refs = &trans->transaction->delayed_refs; 4581 spin_lock(&delayed_refs->lock); 4582 head = btrfs_find_delayed_ref_head(trans, bytenr); 4583 if (!head) 4584 goto out; 4585 4586 node = rb_prev(&head->node.rb_node); 4587 if (!node) 4588 goto out; 4589 4590 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 4591 4592 /* there are still entries for this ref, we can't drop it */ 4593 if (ref->bytenr == bytenr) 4594 goto out; 4595 4596 if (head->extent_op) { 4597 if (!head->must_insert_reserved) 4598 goto out; 4599 kfree(head->extent_op); 4600 head->extent_op = NULL; 4601 } 4602 4603 /* 4604 * waiting for the lock here would deadlock. If someone else has it 4605 * locked they are already in the process of dropping it anyway 4606 */ 4607 if (!mutex_trylock(&head->mutex)) 4608 goto out; 4609 4610 /* 4611 * at this point we have a head with no other entries. Go 4612 * ahead and process it. 4613 */ 4614 head->node.in_tree = 0; 4615 rb_erase(&head->node.rb_node, &delayed_refs->root); 4616 4617 delayed_refs->num_entries--; 4618 4619 /* 4620 * we don't take a ref on the node because we're removing it from the 4621 * tree, so we just steal the ref the tree was holding. 4622 */ 4623 delayed_refs->num_heads--; 4624 if (list_empty(&head->cluster)) 4625 delayed_refs->num_heads_ready--; 4626 4627 list_del_init(&head->cluster); 4628 spin_unlock(&delayed_refs->lock); 4629 4630 BUG_ON(head->extent_op); 4631 if (head->must_insert_reserved) 4632 ret = 1; 4633 4634 mutex_unlock(&head->mutex); 4635 btrfs_put_delayed_ref(&head->node); 4636 return ret; 4637 out: 4638 spin_unlock(&delayed_refs->lock); 4639 return 0; 4640 } 4641 4642 void btrfs_free_tree_block(struct btrfs_trans_handle *trans, 4643 struct btrfs_root *root, 4644 struct extent_buffer *buf, 4645 u64 parent, int last_ref) 4646 { 4647 struct btrfs_block_rsv *block_rsv; 4648 struct btrfs_block_group_cache *cache = NULL; 4649 int ret; 4650 4651 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4652 ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, 4653 parent, root->root_key.objectid, 4654 btrfs_header_level(buf), 4655 BTRFS_DROP_DELAYED_REF, NULL); 4656 BUG_ON(ret); 4657 } 4658 4659 if (!last_ref) 4660 return; 4661 4662 block_rsv = get_block_rsv(trans, root); 4663 cache = btrfs_lookup_block_group(root->fs_info, buf->start); 4664 if (block_rsv->space_info != cache->space_info) 4665 goto out; 4666 4667 if (btrfs_header_generation(buf) == trans->transid) { 4668 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4669 ret = check_ref_cleanup(trans, root, buf->start); 4670 if (!ret) 4671 goto pin; 4672 } 4673 4674 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 4675 pin_down_extent(root, cache, buf->start, buf->len, 1); 4676 goto pin; 4677 } 4678 4679 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 4680 4681 btrfs_add_free_space(cache, buf->start, buf->len); 4682 ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); 4683 if (ret == -EAGAIN) { 4684 /* block group became read-only */ 4685 btrfs_update_reserved_bytes(cache, buf->len, 0, 1); 4686 goto out; 4687 } 4688 4689 ret = 1; 4690 spin_lock(&block_rsv->lock); 4691 if (block_rsv->reserved < block_rsv->size) { 4692 block_rsv->reserved += buf->len; 4693 ret = 0; 4694 } 4695 spin_unlock(&block_rsv->lock); 4696 4697 if (ret) { 4698 spin_lock(&cache->space_info->lock); 4699 cache->space_info->bytes_reserved -= buf->len; 4700 cache->space_info->reservation_progress++; 4701 spin_unlock(&cache->space_info->lock); 4702 } 4703 goto out; 4704 } 4705 pin: 4706 if (block_rsv->durable && !cache->ro) { 4707 ret = 0; 4708 spin_lock(&cache->lock); 4709 if (!cache->ro) { 4710 cache->reserved_pinned += buf->len; 4711 ret = 1; 4712 } 4713 spin_unlock(&cache->lock); 4714 4715 if (ret) { 4716 spin_lock(&block_rsv->lock); 4717 block_rsv->freed[trans->transid & 0x1] += buf->len; 4718 spin_unlock(&block_rsv->lock); 4719 } 4720 } 4721 out: 4722 /* 4723 * Deleting the buffer, clear the corrupt flag since it doesn't matter 4724 * anymore. 4725 */ 4726 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); 4727 btrfs_put_block_group(cache); 4728 } 4729 4730 int btrfs_free_extent(struct btrfs_trans_handle *trans, 4731 struct btrfs_root *root, 4732 u64 bytenr, u64 num_bytes, u64 parent, 4733 u64 root_objectid, u64 owner, u64 offset) 4734 { 4735 int ret; 4736 4737 /* 4738 * tree log blocks never actually go into the extent allocation 4739 * tree, just update pinning info and exit early. 4740 */ 4741 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 4742 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 4743 /* unlocks the pinned mutex */ 4744 btrfs_pin_extent(root, bytenr, num_bytes, 1); 4745 ret = 0; 4746 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 4747 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 4748 parent, root_objectid, (int)owner, 4749 BTRFS_DROP_DELAYED_REF, NULL); 4750 BUG_ON(ret); 4751 } else { 4752 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 4753 parent, root_objectid, owner, 4754 offset, BTRFS_DROP_DELAYED_REF, NULL); 4755 BUG_ON(ret); 4756 } 4757 return ret; 4758 } 4759 4760 static u64 stripe_align(struct btrfs_root *root, u64 val) 4761 { 4762 u64 mask = ((u64)root->stripesize - 1); 4763 u64 ret = (val + mask) & ~mask; 4764 return ret; 4765 } 4766 4767 /* 4768 * when we wait for progress in the block group caching, its because 4769 * our allocation attempt failed at least once. So, we must sleep 4770 * and let some progress happen before we try again. 4771 * 4772 * This function will sleep at least once waiting for new free space to 4773 * show up, and then it will check the block group free space numbers 4774 * for our min num_bytes. Another option is to have it go ahead 4775 * and look in the rbtree for a free extent of a given size, but this 4776 * is a good start. 4777 */ 4778 static noinline int 4779 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 4780 u64 num_bytes) 4781 { 4782 struct btrfs_caching_control *caching_ctl; 4783 DEFINE_WAIT(wait); 4784 4785 caching_ctl = get_caching_control(cache); 4786 if (!caching_ctl) 4787 return 0; 4788 4789 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4790 (cache->free_space_ctl->free_space >= num_bytes)); 4791 4792 put_caching_control(caching_ctl); 4793 return 0; 4794 } 4795 4796 static noinline int 4797 wait_block_group_cache_done(struct btrfs_block_group_cache *cache) 4798 { 4799 struct btrfs_caching_control *caching_ctl; 4800 DEFINE_WAIT(wait); 4801 4802 caching_ctl = get_caching_control(cache); 4803 if (!caching_ctl) 4804 return 0; 4805 4806 wait_event(caching_ctl->wait, block_group_cache_done(cache)); 4807 4808 put_caching_control(caching_ctl); 4809 return 0; 4810 } 4811 4812 static int get_block_group_index(struct btrfs_block_group_cache *cache) 4813 { 4814 int index; 4815 if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) 4816 index = 0; 4817 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) 4818 index = 1; 4819 else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) 4820 index = 2; 4821 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) 4822 index = 3; 4823 else 4824 index = 4; 4825 return index; 4826 } 4827 4828 enum btrfs_loop_type { 4829 LOOP_FIND_IDEAL = 0, 4830 LOOP_CACHING_NOWAIT = 1, 4831 LOOP_CACHING_WAIT = 2, 4832 LOOP_ALLOC_CHUNK = 3, 4833 LOOP_NO_EMPTY_SIZE = 4, 4834 }; 4835 4836 /* 4837 * walks the btree of allocated extents and find a hole of a given size. 4838 * The key ins is changed to record the hole: 4839 * ins->objectid == block start 4840 * ins->flags = BTRFS_EXTENT_ITEM_KEY 4841 * ins->offset == number of blocks 4842 * Any available blocks before search_start are skipped. 4843 */ 4844 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 4845 struct btrfs_root *orig_root, 4846 u64 num_bytes, u64 empty_size, 4847 u64 search_start, u64 search_end, 4848 u64 hint_byte, struct btrfs_key *ins, 4849 int data) 4850 { 4851 int ret = 0; 4852 struct btrfs_root *root = orig_root->fs_info->extent_root; 4853 struct btrfs_free_cluster *last_ptr = NULL; 4854 struct btrfs_block_group_cache *block_group = NULL; 4855 int empty_cluster = 2 * 1024 * 1024; 4856 int allowed_chunk_alloc = 0; 4857 int done_chunk_alloc = 0; 4858 struct btrfs_space_info *space_info; 4859 int last_ptr_loop = 0; 4860 int loop = 0; 4861 int index = 0; 4862 bool found_uncached_bg = false; 4863 bool failed_cluster_refill = false; 4864 bool failed_alloc = false; 4865 bool use_cluster = true; 4866 u64 ideal_cache_percent = 0; 4867 u64 ideal_cache_offset = 0; 4868 4869 WARN_ON(num_bytes < root->sectorsize); 4870 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4871 ins->objectid = 0; 4872 ins->offset = 0; 4873 4874 space_info = __find_space_info(root->fs_info, data); 4875 if (!space_info) { 4876 printk(KERN_ERR "No space info for %d\n", data); 4877 return -ENOSPC; 4878 } 4879 4880 /* 4881 * If the space info is for both data and metadata it means we have a 4882 * small filesystem and we can't use the clustering stuff. 4883 */ 4884 if (btrfs_mixed_space_info(space_info)) 4885 use_cluster = false; 4886 4887 if (orig_root->ref_cows || empty_size) 4888 allowed_chunk_alloc = 1; 4889 4890 if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { 4891 last_ptr = &root->fs_info->meta_alloc_cluster; 4892 if (!btrfs_test_opt(root, SSD)) 4893 empty_cluster = 64 * 1024; 4894 } 4895 4896 if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && 4897 btrfs_test_opt(root, SSD)) { 4898 last_ptr = &root->fs_info->data_alloc_cluster; 4899 } 4900 4901 if (last_ptr) { 4902 spin_lock(&last_ptr->lock); 4903 if (last_ptr->block_group) 4904 hint_byte = last_ptr->window_start; 4905 spin_unlock(&last_ptr->lock); 4906 } 4907 4908 search_start = max(search_start, first_logical_byte(root, 0)); 4909 search_start = max(search_start, hint_byte); 4910 4911 if (!last_ptr) 4912 empty_cluster = 0; 4913 4914 if (search_start == hint_byte) { 4915 ideal_cache: 4916 block_group = btrfs_lookup_block_group(root->fs_info, 4917 search_start); 4918 /* 4919 * we don't want to use the block group if it doesn't match our 4920 * allocation bits, or if its not cached. 4921 * 4922 * However if we are re-searching with an ideal block group 4923 * picked out then we don't care that the block group is cached. 4924 */ 4925 if (block_group && block_group_bits(block_group, data) && 4926 (block_group->cached != BTRFS_CACHE_NO || 4927 search_start == ideal_cache_offset)) { 4928 down_read(&space_info->groups_sem); 4929 if (list_empty(&block_group->list) || 4930 block_group->ro) { 4931 /* 4932 * someone is removing this block group, 4933 * we can't jump into the have_block_group 4934 * target because our list pointers are not 4935 * valid 4936 */ 4937 btrfs_put_block_group(block_group); 4938 up_read(&space_info->groups_sem); 4939 } else { 4940 index = get_block_group_index(block_group); 4941 goto have_block_group; 4942 } 4943 } else if (block_group) { 4944 btrfs_put_block_group(block_group); 4945 } 4946 } 4947 search: 4948 down_read(&space_info->groups_sem); 4949 list_for_each_entry(block_group, &space_info->block_groups[index], 4950 list) { 4951 u64 offset; 4952 int cached; 4953 4954 btrfs_get_block_group(block_group); 4955 search_start = block_group->key.objectid; 4956 4957 /* 4958 * this can happen if we end up cycling through all the 4959 * raid types, but we want to make sure we only allocate 4960 * for the proper type. 4961 */ 4962 if (!block_group_bits(block_group, data)) { 4963 u64 extra = BTRFS_BLOCK_GROUP_DUP | 4964 BTRFS_BLOCK_GROUP_RAID1 | 4965 BTRFS_BLOCK_GROUP_RAID10; 4966 4967 /* 4968 * if they asked for extra copies and this block group 4969 * doesn't provide them, bail. This does allow us to 4970 * fill raid0 from raid1. 4971 */ 4972 if ((data & extra) && !(block_group->flags & extra)) 4973 goto loop; 4974 } 4975 4976 have_block_group: 4977 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4978 u64 free_percent; 4979 4980 ret = cache_block_group(block_group, trans, 4981 orig_root, 1); 4982 if (block_group->cached == BTRFS_CACHE_FINISHED) 4983 goto have_block_group; 4984 4985 free_percent = btrfs_block_group_used(&block_group->item); 4986 free_percent *= 100; 4987 free_percent = div64_u64(free_percent, 4988 block_group->key.offset); 4989 free_percent = 100 - free_percent; 4990 if (free_percent > ideal_cache_percent && 4991 likely(!block_group->ro)) { 4992 ideal_cache_offset = block_group->key.objectid; 4993 ideal_cache_percent = free_percent; 4994 } 4995 4996 /* 4997 * We only want to start kthread caching if we are at 4998 * the point where we will wait for caching to make 4999 * progress, or if our ideal search is over and we've 5000 * found somebody to start caching. 5001 */ 5002 if (loop > LOOP_CACHING_NOWAIT || 5003 (loop > LOOP_FIND_IDEAL && 5004 atomic_read(&space_info->caching_threads) < 2)) { 5005 ret = cache_block_group(block_group, trans, 5006 orig_root, 0); 5007 BUG_ON(ret); 5008 } 5009 found_uncached_bg = true; 5010 5011 /* 5012 * If loop is set for cached only, try the next block 5013 * group. 5014 */ 5015 if (loop == LOOP_FIND_IDEAL) 5016 goto loop; 5017 } 5018 5019 cached = block_group_cache_done(block_group); 5020 if (unlikely(!cached)) 5021 found_uncached_bg = true; 5022 5023 if (unlikely(block_group->ro)) 5024 goto loop; 5025 5026 spin_lock(&block_group->free_space_ctl->tree_lock); 5027 if (cached && 5028 block_group->free_space_ctl->free_space < 5029 num_bytes + empty_size) { 5030 spin_unlock(&block_group->free_space_ctl->tree_lock); 5031 goto loop; 5032 } 5033 spin_unlock(&block_group->free_space_ctl->tree_lock); 5034 5035 /* 5036 * Ok we want to try and use the cluster allocator, so lets look 5037 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 5038 * have tried the cluster allocator plenty of times at this 5039 * point and not have found anything, so we are likely way too 5040 * fragmented for the clustering stuff to find anything, so lets 5041 * just skip it and let the allocator find whatever block it can 5042 * find 5043 */ 5044 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { 5045 /* 5046 * the refill lock keeps out other 5047 * people trying to start a new cluster 5048 */ 5049 spin_lock(&last_ptr->refill_lock); 5050 if (last_ptr->block_group && 5051 (last_ptr->block_group->ro || 5052 !block_group_bits(last_ptr->block_group, data))) { 5053 offset = 0; 5054 goto refill_cluster; 5055 } 5056 5057 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 5058 num_bytes, search_start); 5059 if (offset) { 5060 /* we have a block, we're done */ 5061 spin_unlock(&last_ptr->refill_lock); 5062 goto checks; 5063 } 5064 5065 spin_lock(&last_ptr->lock); 5066 /* 5067 * whoops, this cluster doesn't actually point to 5068 * this block group. Get a ref on the block 5069 * group is does point to and try again 5070 */ 5071 if (!last_ptr_loop && last_ptr->block_group && 5072 last_ptr->block_group != block_group) { 5073 5074 btrfs_put_block_group(block_group); 5075 block_group = last_ptr->block_group; 5076 btrfs_get_block_group(block_group); 5077 spin_unlock(&last_ptr->lock); 5078 spin_unlock(&last_ptr->refill_lock); 5079 5080 last_ptr_loop = 1; 5081 search_start = block_group->key.objectid; 5082 /* 5083 * we know this block group is properly 5084 * in the list because 5085 * btrfs_remove_block_group, drops the 5086 * cluster before it removes the block 5087 * group from the list 5088 */ 5089 goto have_block_group; 5090 } 5091 spin_unlock(&last_ptr->lock); 5092 refill_cluster: 5093 /* 5094 * this cluster didn't work out, free it and 5095 * start over 5096 */ 5097 btrfs_return_cluster_to_free_space(NULL, last_ptr); 5098 5099 last_ptr_loop = 0; 5100 5101 /* allocate a cluster in this block group */ 5102 ret = btrfs_find_space_cluster(trans, root, 5103 block_group, last_ptr, 5104 offset, num_bytes, 5105 empty_cluster + empty_size); 5106 if (ret == 0) { 5107 /* 5108 * now pull our allocation out of this 5109 * cluster 5110 */ 5111 offset = btrfs_alloc_from_cluster(block_group, 5112 last_ptr, num_bytes, 5113 search_start); 5114 if (offset) { 5115 /* we found one, proceed */ 5116 spin_unlock(&last_ptr->refill_lock); 5117 goto checks; 5118 } 5119 } else if (!cached && loop > LOOP_CACHING_NOWAIT 5120 && !failed_cluster_refill) { 5121 spin_unlock(&last_ptr->refill_lock); 5122 5123 failed_cluster_refill = true; 5124 wait_block_group_cache_progress(block_group, 5125 num_bytes + empty_cluster + empty_size); 5126 goto have_block_group; 5127 } 5128 5129 /* 5130 * at this point we either didn't find a cluster 5131 * or we weren't able to allocate a block from our 5132 * cluster. Free the cluster we've been trying 5133 * to use, and go to the next block group 5134 */ 5135 btrfs_return_cluster_to_free_space(NULL, last_ptr); 5136 spin_unlock(&last_ptr->refill_lock); 5137 goto loop; 5138 } 5139 5140 offset = btrfs_find_space_for_alloc(block_group, search_start, 5141 num_bytes, empty_size); 5142 /* 5143 * If we didn't find a chunk, and we haven't failed on this 5144 * block group before, and this block group is in the middle of 5145 * caching and we are ok with waiting, then go ahead and wait 5146 * for progress to be made, and set failed_alloc to true. 5147 * 5148 * If failed_alloc is true then we've already waited on this 5149 * block group once and should move on to the next block group. 5150 */ 5151 if (!offset && !failed_alloc && !cached && 5152 loop > LOOP_CACHING_NOWAIT) { 5153 wait_block_group_cache_progress(block_group, 5154 num_bytes + empty_size); 5155 failed_alloc = true; 5156 goto have_block_group; 5157 } else if (!offset) { 5158 goto loop; 5159 } 5160 checks: 5161 search_start = stripe_align(root, offset); 5162 /* move on to the next group */ 5163 if (search_start + num_bytes >= search_end) { 5164 btrfs_add_free_space(block_group, offset, num_bytes); 5165 goto loop; 5166 } 5167 5168 /* move on to the next group */ 5169 if (search_start + num_bytes > 5170 block_group->key.objectid + block_group->key.offset) { 5171 btrfs_add_free_space(block_group, offset, num_bytes); 5172 goto loop; 5173 } 5174 5175 ins->objectid = search_start; 5176 ins->offset = num_bytes; 5177 5178 if (offset < search_start) 5179 btrfs_add_free_space(block_group, offset, 5180 search_start - offset); 5181 BUG_ON(offset > search_start); 5182 5183 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, 5184 (data & BTRFS_BLOCK_GROUP_DATA)); 5185 if (ret == -EAGAIN) { 5186 btrfs_add_free_space(block_group, offset, num_bytes); 5187 goto loop; 5188 } 5189 5190 /* we are all good, lets return */ 5191 ins->objectid = search_start; 5192 ins->offset = num_bytes; 5193 5194 if (offset < search_start) 5195 btrfs_add_free_space(block_group, offset, 5196 search_start - offset); 5197 BUG_ON(offset > search_start); 5198 btrfs_put_block_group(block_group); 5199 break; 5200 loop: 5201 failed_cluster_refill = false; 5202 failed_alloc = false; 5203 BUG_ON(index != get_block_group_index(block_group)); 5204 btrfs_put_block_group(block_group); 5205 } 5206 up_read(&space_info->groups_sem); 5207 5208 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) 5209 goto search; 5210 5211 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for 5212 * for them to make caching progress. Also 5213 * determine the best possible bg to cache 5214 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking 5215 * caching kthreads as we move along 5216 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 5217 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 5218 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 5219 * again 5220 */ 5221 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { 5222 index = 0; 5223 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5224 found_uncached_bg = false; 5225 loop++; 5226 if (!ideal_cache_percent && 5227 atomic_read(&space_info->caching_threads)) 5228 goto search; 5229 5230 /* 5231 * 1 of the following 2 things have happened so far 5232 * 5233 * 1) We found an ideal block group for caching that 5234 * is mostly full and will cache quickly, so we might 5235 * as well wait for it. 5236 * 5237 * 2) We searched for cached only and we didn't find 5238 * anything, and we didn't start any caching kthreads 5239 * either, so chances are we will loop through and 5240 * start a couple caching kthreads, and then come back 5241 * around and just wait for them. This will be slower 5242 * because we will have 2 caching kthreads reading at 5243 * the same time when we could have just started one 5244 * and waited for it to get far enough to give us an 5245 * allocation, so go ahead and go to the wait caching 5246 * loop. 5247 */ 5248 loop = LOOP_CACHING_WAIT; 5249 search_start = ideal_cache_offset; 5250 ideal_cache_percent = 0; 5251 goto ideal_cache; 5252 } else if (loop == LOOP_FIND_IDEAL) { 5253 /* 5254 * Didn't find a uncached bg, wait on anything we find 5255 * next. 5256 */ 5257 loop = LOOP_CACHING_WAIT; 5258 goto search; 5259 } 5260 5261 loop++; 5262 5263 if (loop == LOOP_ALLOC_CHUNK) { 5264 if (allowed_chunk_alloc) { 5265 ret = do_chunk_alloc(trans, root, num_bytes + 5266 2 * 1024 * 1024, data, 5267 CHUNK_ALLOC_LIMITED); 5268 allowed_chunk_alloc = 0; 5269 if (ret == 1) 5270 done_chunk_alloc = 1; 5271 } else if (!done_chunk_alloc && 5272 space_info->force_alloc == 5273 CHUNK_ALLOC_NO_FORCE) { 5274 space_info->force_alloc = CHUNK_ALLOC_LIMITED; 5275 } 5276 5277 /* 5278 * We didn't allocate a chunk, go ahead and drop the 5279 * empty size and loop again. 5280 */ 5281 if (!done_chunk_alloc) 5282 loop = LOOP_NO_EMPTY_SIZE; 5283 } 5284 5285 if (loop == LOOP_NO_EMPTY_SIZE) { 5286 empty_size = 0; 5287 empty_cluster = 0; 5288 } 5289 5290 goto search; 5291 } else if (!ins->objectid) { 5292 ret = -ENOSPC; 5293 } else if (ins->objectid) { 5294 ret = 0; 5295 } 5296 5297 return ret; 5298 } 5299 5300 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 5301 int dump_block_groups) 5302 { 5303 struct btrfs_block_group_cache *cache; 5304 int index = 0; 5305 5306 spin_lock(&info->lock); 5307 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 5308 (unsigned long long)(info->total_bytes - info->bytes_used - 5309 info->bytes_pinned - info->bytes_reserved - 5310 info->bytes_readonly), 5311 (info->full) ? "" : "not "); 5312 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " 5313 "reserved=%llu, may_use=%llu, readonly=%llu\n", 5314 (unsigned long long)info->total_bytes, 5315 (unsigned long long)info->bytes_used, 5316 (unsigned long long)info->bytes_pinned, 5317 (unsigned long long)info->bytes_reserved, 5318 (unsigned long long)info->bytes_may_use, 5319 (unsigned long long)info->bytes_readonly); 5320 spin_unlock(&info->lock); 5321 5322 if (!dump_block_groups) 5323 return; 5324 5325 down_read(&info->groups_sem); 5326 again: 5327 list_for_each_entry(cache, &info->block_groups[index], list) { 5328 spin_lock(&cache->lock); 5329 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 5330 "%llu pinned %llu reserved\n", 5331 (unsigned long long)cache->key.objectid, 5332 (unsigned long long)cache->key.offset, 5333 (unsigned long long)btrfs_block_group_used(&cache->item), 5334 (unsigned long long)cache->pinned, 5335 (unsigned long long)cache->reserved); 5336 btrfs_dump_free_space(cache, bytes); 5337 spin_unlock(&cache->lock); 5338 } 5339 if (++index < BTRFS_NR_RAID_TYPES) 5340 goto again; 5341 up_read(&info->groups_sem); 5342 } 5343 5344 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 5345 struct btrfs_root *root, 5346 u64 num_bytes, u64 min_alloc_size, 5347 u64 empty_size, u64 hint_byte, 5348 u64 search_end, struct btrfs_key *ins, 5349 u64 data) 5350 { 5351 int ret; 5352 u64 search_start = 0; 5353 5354 data = btrfs_get_alloc_profile(root, data); 5355 again: 5356 /* 5357 * the only place that sets empty_size is btrfs_realloc_node, which 5358 * is not called recursively on allocations 5359 */ 5360 if (empty_size || root->ref_cows) 5361 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5362 num_bytes + 2 * 1024 * 1024, data, 5363 CHUNK_ALLOC_NO_FORCE); 5364 5365 WARN_ON(num_bytes < root->sectorsize); 5366 ret = find_free_extent(trans, root, num_bytes, empty_size, 5367 search_start, search_end, hint_byte, 5368 ins, data); 5369 5370 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 5371 num_bytes = num_bytes >> 1; 5372 num_bytes = num_bytes & ~(root->sectorsize - 1); 5373 num_bytes = max(num_bytes, min_alloc_size); 5374 do_chunk_alloc(trans, root->fs_info->extent_root, 5375 num_bytes, data, CHUNK_ALLOC_FORCE); 5376 goto again; 5377 } 5378 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5379 struct btrfs_space_info *sinfo; 5380 5381 sinfo = __find_space_info(root->fs_info, data); 5382 printk(KERN_ERR "btrfs allocation failed flags %llu, " 5383 "wanted %llu\n", (unsigned long long)data, 5384 (unsigned long long)num_bytes); 5385 dump_space_info(sinfo, num_bytes, 1); 5386 } 5387 5388 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); 5389 5390 return ret; 5391 } 5392 5393 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 5394 { 5395 struct btrfs_block_group_cache *cache; 5396 int ret = 0; 5397 5398 cache = btrfs_lookup_block_group(root->fs_info, start); 5399 if (!cache) { 5400 printk(KERN_ERR "Unable to find block group for %llu\n", 5401 (unsigned long long)start); 5402 return -ENOSPC; 5403 } 5404 5405 if (btrfs_test_opt(root, DISCARD)) 5406 ret = btrfs_discard_extent(root, start, len, NULL); 5407 5408 btrfs_add_free_space(cache, start, len); 5409 btrfs_update_reserved_bytes(cache, len, 0, 1); 5410 btrfs_put_block_group(cache); 5411 5412 trace_btrfs_reserved_extent_free(root, start, len); 5413 5414 return ret; 5415 } 5416 5417 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 5418 struct btrfs_root *root, 5419 u64 parent, u64 root_objectid, 5420 u64 flags, u64 owner, u64 offset, 5421 struct btrfs_key *ins, int ref_mod) 5422 { 5423 int ret; 5424 struct btrfs_fs_info *fs_info = root->fs_info; 5425 struct btrfs_extent_item *extent_item; 5426 struct btrfs_extent_inline_ref *iref; 5427 struct btrfs_path *path; 5428 struct extent_buffer *leaf; 5429 int type; 5430 u32 size; 5431 5432 if (parent > 0) 5433 type = BTRFS_SHARED_DATA_REF_KEY; 5434 else 5435 type = BTRFS_EXTENT_DATA_REF_KEY; 5436 5437 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 5438 5439 path = btrfs_alloc_path(); 5440 if (!path) 5441 return -ENOMEM; 5442 5443 path->leave_spinning = 1; 5444 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5445 ins, size); 5446 BUG_ON(ret); 5447 5448 leaf = path->nodes[0]; 5449 extent_item = btrfs_item_ptr(leaf, path->slots[0], 5450 struct btrfs_extent_item); 5451 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 5452 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 5453 btrfs_set_extent_flags(leaf, extent_item, 5454 flags | BTRFS_EXTENT_FLAG_DATA); 5455 5456 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 5457 btrfs_set_extent_inline_ref_type(leaf, iref, type); 5458 if (parent > 0) { 5459 struct btrfs_shared_data_ref *ref; 5460 ref = (struct btrfs_shared_data_ref *)(iref + 1); 5461 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 5462 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 5463 } else { 5464 struct btrfs_extent_data_ref *ref; 5465 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 5466 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 5467 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 5468 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 5469 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 5470 } 5471 5472 btrfs_mark_buffer_dirty(path->nodes[0]); 5473 btrfs_free_path(path); 5474 5475 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 5476 if (ret) { 5477 printk(KERN_ERR "btrfs update block group failed for %llu " 5478 "%llu\n", (unsigned long long)ins->objectid, 5479 (unsigned long long)ins->offset); 5480 BUG(); 5481 } 5482 return ret; 5483 } 5484 5485 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 5486 struct btrfs_root *root, 5487 u64 parent, u64 root_objectid, 5488 u64 flags, struct btrfs_disk_key *key, 5489 int level, struct btrfs_key *ins) 5490 { 5491 int ret; 5492 struct btrfs_fs_info *fs_info = root->fs_info; 5493 struct btrfs_extent_item *extent_item; 5494 struct btrfs_tree_block_info *block_info; 5495 struct btrfs_extent_inline_ref *iref; 5496 struct btrfs_path *path; 5497 struct extent_buffer *leaf; 5498 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 5499 5500 path = btrfs_alloc_path(); 5501 BUG_ON(!path); 5502 5503 path->leave_spinning = 1; 5504 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5505 ins, size); 5506 BUG_ON(ret); 5507 5508 leaf = path->nodes[0]; 5509 extent_item = btrfs_item_ptr(leaf, path->slots[0], 5510 struct btrfs_extent_item); 5511 btrfs_set_extent_refs(leaf, extent_item, 1); 5512 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 5513 btrfs_set_extent_flags(leaf, extent_item, 5514 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 5515 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 5516 5517 btrfs_set_tree_block_key(leaf, block_info, key); 5518 btrfs_set_tree_block_level(leaf, block_info, level); 5519 5520 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 5521 if (parent > 0) { 5522 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 5523 btrfs_set_extent_inline_ref_type(leaf, iref, 5524 BTRFS_SHARED_BLOCK_REF_KEY); 5525 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 5526 } else { 5527 btrfs_set_extent_inline_ref_type(leaf, iref, 5528 BTRFS_TREE_BLOCK_REF_KEY); 5529 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 5530 } 5531 5532 btrfs_mark_buffer_dirty(leaf); 5533 btrfs_free_path(path); 5534 5535 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 5536 if (ret) { 5537 printk(KERN_ERR "btrfs update block group failed for %llu " 5538 "%llu\n", (unsigned long long)ins->objectid, 5539 (unsigned long long)ins->offset); 5540 BUG(); 5541 } 5542 return ret; 5543 } 5544 5545 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 5546 struct btrfs_root *root, 5547 u64 root_objectid, u64 owner, 5548 u64 offset, struct btrfs_key *ins) 5549 { 5550 int ret; 5551 5552 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); 5553 5554 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 5555 0, root_objectid, owner, offset, 5556 BTRFS_ADD_DELAYED_EXTENT, NULL); 5557 return ret; 5558 } 5559 5560 /* 5561 * this is used by the tree logging recovery code. It records that 5562 * an extent has been allocated and makes sure to clear the free 5563 * space cache bits as well 5564 */ 5565 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 5566 struct btrfs_root *root, 5567 u64 root_objectid, u64 owner, u64 offset, 5568 struct btrfs_key *ins) 5569 { 5570 int ret; 5571 struct btrfs_block_group_cache *block_group; 5572 struct btrfs_caching_control *caching_ctl; 5573 u64 start = ins->objectid; 5574 u64 num_bytes = ins->offset; 5575 5576 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5577 cache_block_group(block_group, trans, NULL, 0); 5578 caching_ctl = get_caching_control(block_group); 5579 5580 if (!caching_ctl) { 5581 BUG_ON(!block_group_cache_done(block_group)); 5582 ret = btrfs_remove_free_space(block_group, start, num_bytes); 5583 BUG_ON(ret); 5584 } else { 5585 mutex_lock(&caching_ctl->mutex); 5586 5587 if (start >= caching_ctl->progress) { 5588 ret = add_excluded_extent(root, start, num_bytes); 5589 BUG_ON(ret); 5590 } else if (start + num_bytes <= caching_ctl->progress) { 5591 ret = btrfs_remove_free_space(block_group, 5592 start, num_bytes); 5593 BUG_ON(ret); 5594 } else { 5595 num_bytes = caching_ctl->progress - start; 5596 ret = btrfs_remove_free_space(block_group, 5597 start, num_bytes); 5598 BUG_ON(ret); 5599 5600 start = caching_ctl->progress; 5601 num_bytes = ins->objectid + ins->offset - 5602 caching_ctl->progress; 5603 ret = add_excluded_extent(root, start, num_bytes); 5604 BUG_ON(ret); 5605 } 5606 5607 mutex_unlock(&caching_ctl->mutex); 5608 put_caching_control(caching_ctl); 5609 } 5610 5611 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); 5612 BUG_ON(ret); 5613 btrfs_put_block_group(block_group); 5614 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5615 0, owner, offset, ins, 1); 5616 return ret; 5617 } 5618 5619 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 5620 struct btrfs_root *root, 5621 u64 bytenr, u32 blocksize, 5622 int level) 5623 { 5624 struct extent_buffer *buf; 5625 5626 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 5627 if (!buf) 5628 return ERR_PTR(-ENOMEM); 5629 btrfs_set_header_generation(buf, trans->transid); 5630 btrfs_set_buffer_lockdep_class(buf, level); 5631 btrfs_tree_lock(buf); 5632 clean_tree_block(trans, root, buf); 5633 5634 btrfs_set_lock_blocking(buf); 5635 btrfs_set_buffer_uptodate(buf); 5636 5637 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 5638 /* 5639 * we allow two log transactions at a time, use different 5640 * EXENT bit to differentiate dirty pages. 5641 */ 5642 if (root->log_transid % 2 == 0) 5643 set_extent_dirty(&root->dirty_log_pages, buf->start, 5644 buf->start + buf->len - 1, GFP_NOFS); 5645 else 5646 set_extent_new(&root->dirty_log_pages, buf->start, 5647 buf->start + buf->len - 1, GFP_NOFS); 5648 } else { 5649 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 5650 buf->start + buf->len - 1, GFP_NOFS); 5651 } 5652 trans->blocks_used++; 5653 /* this returns a buffer locked for blocking */ 5654 return buf; 5655 } 5656 5657 static struct btrfs_block_rsv * 5658 use_block_rsv(struct btrfs_trans_handle *trans, 5659 struct btrfs_root *root, u32 blocksize) 5660 { 5661 struct btrfs_block_rsv *block_rsv; 5662 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 5663 int ret; 5664 5665 block_rsv = get_block_rsv(trans, root); 5666 5667 if (block_rsv->size == 0) { 5668 ret = reserve_metadata_bytes(trans, root, block_rsv, 5669 blocksize, 0); 5670 /* 5671 * If we couldn't reserve metadata bytes try and use some from 5672 * the global reserve. 5673 */ 5674 if (ret && block_rsv != global_rsv) { 5675 ret = block_rsv_use_bytes(global_rsv, blocksize); 5676 if (!ret) 5677 return global_rsv; 5678 return ERR_PTR(ret); 5679 } else if (ret) { 5680 return ERR_PTR(ret); 5681 } 5682 return block_rsv; 5683 } 5684 5685 ret = block_rsv_use_bytes(block_rsv, blocksize); 5686 if (!ret) 5687 return block_rsv; 5688 if (ret) { 5689 WARN_ON(1); 5690 ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, 5691 0); 5692 if (!ret) { 5693 spin_lock(&block_rsv->lock); 5694 block_rsv->size += blocksize; 5695 spin_unlock(&block_rsv->lock); 5696 return block_rsv; 5697 } else if (ret && block_rsv != global_rsv) { 5698 ret = block_rsv_use_bytes(global_rsv, blocksize); 5699 if (!ret) 5700 return global_rsv; 5701 } 5702 } 5703 5704 return ERR_PTR(-ENOSPC); 5705 } 5706 5707 static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize) 5708 { 5709 block_rsv_add_bytes(block_rsv, blocksize, 0); 5710 block_rsv_release_bytes(block_rsv, NULL, 0); 5711 } 5712 5713 /* 5714 * finds a free extent and does all the dirty work required for allocation 5715 * returns the key for the extent through ins, and a tree buffer for 5716 * the first block of the extent through buf. 5717 * 5718 * returns the tree buffer or NULL. 5719 */ 5720 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 5721 struct btrfs_root *root, u32 blocksize, 5722 u64 parent, u64 root_objectid, 5723 struct btrfs_disk_key *key, int level, 5724 u64 hint, u64 empty_size) 5725 { 5726 struct btrfs_key ins; 5727 struct btrfs_block_rsv *block_rsv; 5728 struct extent_buffer *buf; 5729 u64 flags = 0; 5730 int ret; 5731 5732 5733 block_rsv = use_block_rsv(trans, root, blocksize); 5734 if (IS_ERR(block_rsv)) 5735 return ERR_CAST(block_rsv); 5736 5737 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, 5738 empty_size, hint, (u64)-1, &ins, 0); 5739 if (ret) { 5740 unuse_block_rsv(block_rsv, blocksize); 5741 return ERR_PTR(ret); 5742 } 5743 5744 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 5745 blocksize, level); 5746 BUG_ON(IS_ERR(buf)); 5747 5748 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 5749 if (parent == 0) 5750 parent = ins.objectid; 5751 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 5752 } else 5753 BUG_ON(parent > 0); 5754 5755 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 5756 struct btrfs_delayed_extent_op *extent_op; 5757 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 5758 BUG_ON(!extent_op); 5759 if (key) 5760 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 5761 else 5762 memset(&extent_op->key, 0, sizeof(extent_op->key)); 5763 extent_op->flags_to_set = flags; 5764 extent_op->update_key = 1; 5765 extent_op->update_flags = 1; 5766 extent_op->is_data = 0; 5767 5768 ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, 5769 ins.offset, parent, root_objectid, 5770 level, BTRFS_ADD_DELAYED_EXTENT, 5771 extent_op); 5772 BUG_ON(ret); 5773 } 5774 return buf; 5775 } 5776 5777 struct walk_control { 5778 u64 refs[BTRFS_MAX_LEVEL]; 5779 u64 flags[BTRFS_MAX_LEVEL]; 5780 struct btrfs_key update_progress; 5781 int stage; 5782 int level; 5783 int shared_level; 5784 int update_ref; 5785 int keep_locks; 5786 int reada_slot; 5787 int reada_count; 5788 }; 5789 5790 #define DROP_REFERENCE 1 5791 #define UPDATE_BACKREF 2 5792 5793 static noinline void reada_walk_down(struct btrfs_trans_handle *trans, 5794 struct btrfs_root *root, 5795 struct walk_control *wc, 5796 struct btrfs_path *path) 5797 { 5798 u64 bytenr; 5799 u64 generation; 5800 u64 refs; 5801 u64 flags; 5802 u32 nritems; 5803 u32 blocksize; 5804 struct btrfs_key key; 5805 struct extent_buffer *eb; 5806 int ret; 5807 int slot; 5808 int nread = 0; 5809 5810 if (path->slots[wc->level] < wc->reada_slot) { 5811 wc->reada_count = wc->reada_count * 2 / 3; 5812 wc->reada_count = max(wc->reada_count, 2); 5813 } else { 5814 wc->reada_count = wc->reada_count * 3 / 2; 5815 wc->reada_count = min_t(int, wc->reada_count, 5816 BTRFS_NODEPTRS_PER_BLOCK(root)); 5817 } 5818 5819 eb = path->nodes[wc->level]; 5820 nritems = btrfs_header_nritems(eb); 5821 blocksize = btrfs_level_size(root, wc->level - 1); 5822 5823 for (slot = path->slots[wc->level]; slot < nritems; slot++) { 5824 if (nread >= wc->reada_count) 5825 break; 5826 5827 cond_resched(); 5828 bytenr = btrfs_node_blockptr(eb, slot); 5829 generation = btrfs_node_ptr_generation(eb, slot); 5830 5831 if (slot == path->slots[wc->level]) 5832 goto reada; 5833 5834 if (wc->stage == UPDATE_BACKREF && 5835 generation <= root->root_key.offset) 5836 continue; 5837 5838 /* We don't lock the tree block, it's OK to be racy here */ 5839 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5840 &refs, &flags); 5841 BUG_ON(ret); 5842 BUG_ON(refs == 0); 5843 5844 if (wc->stage == DROP_REFERENCE) { 5845 if (refs == 1) 5846 goto reada; 5847 5848 if (wc->level == 1 && 5849 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5850 continue; 5851 if (!wc->update_ref || 5852 generation <= root->root_key.offset) 5853 continue; 5854 btrfs_node_key_to_cpu(eb, &key, slot); 5855 ret = btrfs_comp_cpu_keys(&key, 5856 &wc->update_progress); 5857 if (ret < 0) 5858 continue; 5859 } else { 5860 if (wc->level == 1 && 5861 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5862 continue; 5863 } 5864 reada: 5865 ret = readahead_tree_block(root, bytenr, blocksize, 5866 generation); 5867 if (ret) 5868 break; 5869 nread++; 5870 } 5871 wc->reada_slot = slot; 5872 } 5873 5874 /* 5875 * hepler to process tree block while walking down the tree. 5876 * 5877 * when wc->stage == UPDATE_BACKREF, this function updates 5878 * back refs for pointers in the block. 5879 * 5880 * NOTE: return value 1 means we should stop walking down. 5881 */ 5882 static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 5883 struct btrfs_root *root, 5884 struct btrfs_path *path, 5885 struct walk_control *wc, int lookup_info) 5886 { 5887 int level = wc->level; 5888 struct extent_buffer *eb = path->nodes[level]; 5889 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5890 int ret; 5891 5892 if (wc->stage == UPDATE_BACKREF && 5893 btrfs_header_owner(eb) != root->root_key.objectid) 5894 return 1; 5895 5896 /* 5897 * when reference count of tree block is 1, it won't increase 5898 * again. once full backref flag is set, we never clear it. 5899 */ 5900 if (lookup_info && 5901 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5902 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { 5903 BUG_ON(!path->locks[level]); 5904 ret = btrfs_lookup_extent_info(trans, root, 5905 eb->start, eb->len, 5906 &wc->refs[level], 5907 &wc->flags[level]); 5908 BUG_ON(ret); 5909 BUG_ON(wc->refs[level] == 0); 5910 } 5911 5912 if (wc->stage == DROP_REFERENCE) { 5913 if (wc->refs[level] > 1) 5914 return 1; 5915 5916 if (path->locks[level] && !wc->keep_locks) { 5917 btrfs_tree_unlock(eb); 5918 path->locks[level] = 0; 5919 } 5920 return 0; 5921 } 5922 5923 /* wc->stage == UPDATE_BACKREF */ 5924 if (!(wc->flags[level] & flag)) { 5925 BUG_ON(!path->locks[level]); 5926 ret = btrfs_inc_ref(trans, root, eb, 1); 5927 BUG_ON(ret); 5928 ret = btrfs_dec_ref(trans, root, eb, 0); 5929 BUG_ON(ret); 5930 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 5931 eb->len, flag, 0); 5932 BUG_ON(ret); 5933 wc->flags[level] |= flag; 5934 } 5935 5936 /* 5937 * the block is shared by multiple trees, so it's not good to 5938 * keep the tree lock 5939 */ 5940 if (path->locks[level] && level > 0) { 5941 btrfs_tree_unlock(eb); 5942 path->locks[level] = 0; 5943 } 5944 return 0; 5945 } 5946 5947 /* 5948 * hepler to process tree block pointer. 5949 * 5950 * when wc->stage == DROP_REFERENCE, this function checks 5951 * reference count of the block pointed to. if the block 5952 * is shared and we need update back refs for the subtree 5953 * rooted at the block, this function changes wc->stage to 5954 * UPDATE_BACKREF. if the block is shared and there is no 5955 * need to update back, this function drops the reference 5956 * to the block. 5957 * 5958 * NOTE: return value 1 means we should stop walking down. 5959 */ 5960 static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5961 struct btrfs_root *root, 5962 struct btrfs_path *path, 5963 struct walk_control *wc, int *lookup_info) 5964 { 5965 u64 bytenr; 5966 u64 generation; 5967 u64 parent; 5968 u32 blocksize; 5969 struct btrfs_key key; 5970 struct extent_buffer *next; 5971 int level = wc->level; 5972 int reada = 0; 5973 int ret = 0; 5974 5975 generation = btrfs_node_ptr_generation(path->nodes[level], 5976 path->slots[level]); 5977 /* 5978 * if the lower level block was created before the snapshot 5979 * was created, we know there is no need to update back refs 5980 * for the subtree 5981 */ 5982 if (wc->stage == UPDATE_BACKREF && 5983 generation <= root->root_key.offset) { 5984 *lookup_info = 1; 5985 return 1; 5986 } 5987 5988 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 5989 blocksize = btrfs_level_size(root, level - 1); 5990 5991 next = btrfs_find_tree_block(root, bytenr, blocksize); 5992 if (!next) { 5993 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 5994 if (!next) 5995 return -ENOMEM; 5996 reada = 1; 5997 } 5998 btrfs_tree_lock(next); 5999 btrfs_set_lock_blocking(next); 6000 6001 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 6002 &wc->refs[level - 1], 6003 &wc->flags[level - 1]); 6004 BUG_ON(ret); 6005 BUG_ON(wc->refs[level - 1] == 0); 6006 *lookup_info = 0; 6007 6008 if (wc->stage == DROP_REFERENCE) { 6009 if (wc->refs[level - 1] > 1) { 6010 if (level == 1 && 6011 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 6012 goto skip; 6013 6014 if (!wc->update_ref || 6015 generation <= root->root_key.offset) 6016 goto skip; 6017 6018 btrfs_node_key_to_cpu(path->nodes[level], &key, 6019 path->slots[level]); 6020 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); 6021 if (ret < 0) 6022 goto skip; 6023 6024 wc->stage = UPDATE_BACKREF; 6025 wc->shared_level = level - 1; 6026 } 6027 } else { 6028 if (level == 1 && 6029 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 6030 goto skip; 6031 } 6032 6033 if (!btrfs_buffer_uptodate(next, generation)) { 6034 btrfs_tree_unlock(next); 6035 free_extent_buffer(next); 6036 next = NULL; 6037 *lookup_info = 1; 6038 } 6039 6040 if (!next) { 6041 if (reada && level == 1) 6042 reada_walk_down(trans, root, wc, path); 6043 next = read_tree_block(root, bytenr, blocksize, generation); 6044 if (!next) 6045 return -EIO; 6046 btrfs_tree_lock(next); 6047 btrfs_set_lock_blocking(next); 6048 } 6049 6050 level--; 6051 BUG_ON(level != btrfs_header_level(next)); 6052 path->nodes[level] = next; 6053 path->slots[level] = 0; 6054 path->locks[level] = 1; 6055 wc->level = level; 6056 if (wc->level == 1) 6057 wc->reada_slot = 0; 6058 return 0; 6059 skip: 6060 wc->refs[level - 1] = 0; 6061 wc->flags[level - 1] = 0; 6062 if (wc->stage == DROP_REFERENCE) { 6063 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 6064 parent = path->nodes[level]->start; 6065 } else { 6066 BUG_ON(root->root_key.objectid != 6067 btrfs_header_owner(path->nodes[level])); 6068 parent = 0; 6069 } 6070 6071 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 6072 root->root_key.objectid, level - 1, 0); 6073 BUG_ON(ret); 6074 } 6075 btrfs_tree_unlock(next); 6076 free_extent_buffer(next); 6077 *lookup_info = 1; 6078 return 1; 6079 } 6080 6081 /* 6082 * hepler to process tree block while walking up the tree. 6083 * 6084 * when wc->stage == DROP_REFERENCE, this function drops 6085 * reference count on the block. 6086 * 6087 * when wc->stage == UPDATE_BACKREF, this function changes 6088 * wc->stage back to DROP_REFERENCE if we changed wc->stage 6089 * to UPDATE_BACKREF previously while processing the block. 6090 * 6091 * NOTE: return value 1 means we should stop walking up. 6092 */ 6093 static noinline int walk_up_proc(struct btrfs_trans_handle *trans, 6094 struct btrfs_root *root, 6095 struct btrfs_path *path, 6096 struct walk_control *wc) 6097 { 6098 int ret; 6099 int level = wc->level; 6100 struct extent_buffer *eb = path->nodes[level]; 6101 u64 parent = 0; 6102 6103 if (wc->stage == UPDATE_BACKREF) { 6104 BUG_ON(wc->shared_level < level); 6105 if (level < wc->shared_level) 6106 goto out; 6107 6108 ret = find_next_key(path, level + 1, &wc->update_progress); 6109 if (ret > 0) 6110 wc->update_ref = 0; 6111 6112 wc->stage = DROP_REFERENCE; 6113 wc->shared_level = -1; 6114 path->slots[level] = 0; 6115 6116 /* 6117 * check reference count again if the block isn't locked. 6118 * we should start walking down the tree again if reference 6119 * count is one. 6120 */ 6121 if (!path->locks[level]) { 6122 BUG_ON(level == 0); 6123 btrfs_tree_lock(eb); 6124 btrfs_set_lock_blocking(eb); 6125 path->locks[level] = 1; 6126 6127 ret = btrfs_lookup_extent_info(trans, root, 6128 eb->start, eb->len, 6129 &wc->refs[level], 6130 &wc->flags[level]); 6131 BUG_ON(ret); 6132 BUG_ON(wc->refs[level] == 0); 6133 if (wc->refs[level] == 1) { 6134 btrfs_tree_unlock(eb); 6135 path->locks[level] = 0; 6136 return 1; 6137 } 6138 } 6139 } 6140 6141 /* wc->stage == DROP_REFERENCE */ 6142 BUG_ON(wc->refs[level] > 1 && !path->locks[level]); 6143 6144 if (wc->refs[level] == 1) { 6145 if (level == 0) { 6146 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6147 ret = btrfs_dec_ref(trans, root, eb, 1); 6148 else 6149 ret = btrfs_dec_ref(trans, root, eb, 0); 6150 BUG_ON(ret); 6151 } 6152 /* make block locked assertion in clean_tree_block happy */ 6153 if (!path->locks[level] && 6154 btrfs_header_generation(eb) == trans->transid) { 6155 btrfs_tree_lock(eb); 6156 btrfs_set_lock_blocking(eb); 6157 path->locks[level] = 1; 6158 } 6159 clean_tree_block(trans, root, eb); 6160 } 6161 6162 if (eb == root->node) { 6163 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6164 parent = eb->start; 6165 else 6166 BUG_ON(root->root_key.objectid != 6167 btrfs_header_owner(eb)); 6168 } else { 6169 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6170 parent = path->nodes[level + 1]->start; 6171 else 6172 BUG_ON(root->root_key.objectid != 6173 btrfs_header_owner(path->nodes[level + 1])); 6174 } 6175 6176 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); 6177 out: 6178 wc->refs[level] = 0; 6179 wc->flags[level] = 0; 6180 return 0; 6181 } 6182 6183 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 6184 struct btrfs_root *root, 6185 struct btrfs_path *path, 6186 struct walk_control *wc) 6187 { 6188 int level = wc->level; 6189 int lookup_info = 1; 6190 int ret; 6191 6192 while (level >= 0) { 6193 ret = walk_down_proc(trans, root, path, wc, lookup_info); 6194 if (ret > 0) 6195 break; 6196 6197 if (level == 0) 6198 break; 6199 6200 if (path->slots[level] >= 6201 btrfs_header_nritems(path->nodes[level])) 6202 break; 6203 6204 ret = do_walk_down(trans, root, path, wc, &lookup_info); 6205 if (ret > 0) { 6206 path->slots[level]++; 6207 continue; 6208 } else if (ret < 0) 6209 return ret; 6210 level = wc->level; 6211 } 6212 return 0; 6213 } 6214 6215 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 6216 struct btrfs_root *root, 6217 struct btrfs_path *path, 6218 struct walk_control *wc, int max_level) 6219 { 6220 int level = wc->level; 6221 int ret; 6222 6223 path->slots[level] = btrfs_header_nritems(path->nodes[level]); 6224 while (level < max_level && path->nodes[level]) { 6225 wc->level = level; 6226 if (path->slots[level] + 1 < 6227 btrfs_header_nritems(path->nodes[level])) { 6228 path->slots[level]++; 6229 return 0; 6230 } else { 6231 ret = walk_up_proc(trans, root, path, wc); 6232 if (ret > 0) 6233 return 0; 6234 6235 if (path->locks[level]) { 6236 btrfs_tree_unlock(path->nodes[level]); 6237 path->locks[level] = 0; 6238 } 6239 free_extent_buffer(path->nodes[level]); 6240 path->nodes[level] = NULL; 6241 level++; 6242 } 6243 } 6244 return 1; 6245 } 6246 6247 /* 6248 * drop a subvolume tree. 6249 * 6250 * this function traverses the tree freeing any blocks that only 6251 * referenced by the tree. 6252 * 6253 * when a shared tree block is found. this function decreases its 6254 * reference count by one. if update_ref is true, this function 6255 * also make sure backrefs for the shared block and all lower level 6256 * blocks are properly updated. 6257 */ 6258 int btrfs_drop_snapshot(struct btrfs_root *root, 6259 struct btrfs_block_rsv *block_rsv, int update_ref) 6260 { 6261 struct btrfs_path *path; 6262 struct btrfs_trans_handle *trans; 6263 struct btrfs_root *tree_root = root->fs_info->tree_root; 6264 struct btrfs_root_item *root_item = &root->root_item; 6265 struct walk_control *wc; 6266 struct btrfs_key key; 6267 int err = 0; 6268 int ret; 6269 int level; 6270 6271 path = btrfs_alloc_path(); 6272 BUG_ON(!path); 6273 6274 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6275 BUG_ON(!wc); 6276 6277 trans = btrfs_start_transaction(tree_root, 0); 6278 BUG_ON(IS_ERR(trans)); 6279 6280 if (block_rsv) 6281 trans->block_rsv = block_rsv; 6282 6283 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 6284 level = btrfs_header_level(root->node); 6285 path->nodes[level] = btrfs_lock_root_node(root); 6286 btrfs_set_lock_blocking(path->nodes[level]); 6287 path->slots[level] = 0; 6288 path->locks[level] = 1; 6289 memset(&wc->update_progress, 0, 6290 sizeof(wc->update_progress)); 6291 } else { 6292 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 6293 memcpy(&wc->update_progress, &key, 6294 sizeof(wc->update_progress)); 6295 6296 level = root_item->drop_level; 6297 BUG_ON(level == 0); 6298 path->lowest_level = level; 6299 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6300 path->lowest_level = 0; 6301 if (ret < 0) { 6302 err = ret; 6303 goto out; 6304 } 6305 WARN_ON(ret > 0); 6306 6307 /* 6308 * unlock our path, this is safe because only this 6309 * function is allowed to delete this snapshot 6310 */ 6311 btrfs_unlock_up_safe(path, 0); 6312 6313 level = btrfs_header_level(root->node); 6314 while (1) { 6315 btrfs_tree_lock(path->nodes[level]); 6316 btrfs_set_lock_blocking(path->nodes[level]); 6317 6318 ret = btrfs_lookup_extent_info(trans, root, 6319 path->nodes[level]->start, 6320 path->nodes[level]->len, 6321 &wc->refs[level], 6322 &wc->flags[level]); 6323 BUG_ON(ret); 6324 BUG_ON(wc->refs[level] == 0); 6325 6326 if (level == root_item->drop_level) 6327 break; 6328 6329 btrfs_tree_unlock(path->nodes[level]); 6330 WARN_ON(wc->refs[level] != 1); 6331 level--; 6332 } 6333 } 6334 6335 wc->level = level; 6336 wc->shared_level = -1; 6337 wc->stage = DROP_REFERENCE; 6338 wc->update_ref = update_ref; 6339 wc->keep_locks = 0; 6340 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 6341 6342 while (1) { 6343 ret = walk_down_tree(trans, root, path, wc); 6344 if (ret < 0) { 6345 err = ret; 6346 break; 6347 } 6348 6349 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 6350 if (ret < 0) { 6351 err = ret; 6352 break; 6353 } 6354 6355 if (ret > 0) { 6356 BUG_ON(wc->stage != DROP_REFERENCE); 6357 break; 6358 } 6359 6360 if (wc->stage == DROP_REFERENCE) { 6361 level = wc->level; 6362 btrfs_node_key(path->nodes[level], 6363 &root_item->drop_progress, 6364 path->slots[level]); 6365 root_item->drop_level = level; 6366 } 6367 6368 BUG_ON(wc->level == 0); 6369 if (btrfs_should_end_transaction(trans, tree_root)) { 6370 ret = btrfs_update_root(trans, tree_root, 6371 &root->root_key, 6372 root_item); 6373 BUG_ON(ret); 6374 6375 btrfs_end_transaction_throttle(trans, tree_root); 6376 trans = btrfs_start_transaction(tree_root, 0); 6377 BUG_ON(IS_ERR(trans)); 6378 if (block_rsv) 6379 trans->block_rsv = block_rsv; 6380 } 6381 } 6382 btrfs_release_path(path); 6383 BUG_ON(err); 6384 6385 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6386 BUG_ON(ret); 6387 6388 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 6389 ret = btrfs_find_last_root(tree_root, root->root_key.objectid, 6390 NULL, NULL); 6391 BUG_ON(ret < 0); 6392 if (ret > 0) { 6393 /* if we fail to delete the orphan item this time 6394 * around, it'll get picked up the next time. 6395 * 6396 * The most common failure here is just -ENOENT. 6397 */ 6398 btrfs_del_orphan_item(trans, tree_root, 6399 root->root_key.objectid); 6400 } 6401 } 6402 6403 if (root->in_radix) { 6404 btrfs_free_fs_root(tree_root->fs_info, root); 6405 } else { 6406 free_extent_buffer(root->node); 6407 free_extent_buffer(root->commit_root); 6408 kfree(root); 6409 } 6410 out: 6411 btrfs_end_transaction_throttle(trans, tree_root); 6412 kfree(wc); 6413 btrfs_free_path(path); 6414 return err; 6415 } 6416 6417 /* 6418 * drop subtree rooted at tree block 'node'. 6419 * 6420 * NOTE: this function will unlock and release tree block 'node' 6421 */ 6422 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 6423 struct btrfs_root *root, 6424 struct extent_buffer *node, 6425 struct extent_buffer *parent) 6426 { 6427 struct btrfs_path *path; 6428 struct walk_control *wc; 6429 int level; 6430 int parent_level; 6431 int ret = 0; 6432 int wret; 6433 6434 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6435 6436 path = btrfs_alloc_path(); 6437 if (!path) 6438 return -ENOMEM; 6439 6440 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6441 if (!wc) { 6442 btrfs_free_path(path); 6443 return -ENOMEM; 6444 } 6445 6446 btrfs_assert_tree_locked(parent); 6447 parent_level = btrfs_header_level(parent); 6448 extent_buffer_get(parent); 6449 path->nodes[parent_level] = parent; 6450 path->slots[parent_level] = btrfs_header_nritems(parent); 6451 6452 btrfs_assert_tree_locked(node); 6453 level = btrfs_header_level(node); 6454 path->nodes[level] = node; 6455 path->slots[level] = 0; 6456 path->locks[level] = 1; 6457 6458 wc->refs[parent_level] = 1; 6459 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 6460 wc->level = level; 6461 wc->shared_level = -1; 6462 wc->stage = DROP_REFERENCE; 6463 wc->update_ref = 0; 6464 wc->keep_locks = 1; 6465 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 6466 6467 while (1) { 6468 wret = walk_down_tree(trans, root, path, wc); 6469 if (wret < 0) { 6470 ret = wret; 6471 break; 6472 } 6473 6474 wret = walk_up_tree(trans, root, path, wc, parent_level); 6475 if (wret < 0) 6476 ret = wret; 6477 if (wret != 0) 6478 break; 6479 } 6480 6481 kfree(wc); 6482 btrfs_free_path(path); 6483 return ret; 6484 } 6485 6486 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6487 { 6488 u64 num_devices; 6489 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 6490 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 6491 6492 /* 6493 * we add in the count of missing devices because we want 6494 * to make sure that any RAID levels on a degraded FS 6495 * continue to be honored. 6496 */ 6497 num_devices = root->fs_info->fs_devices->rw_devices + 6498 root->fs_info->fs_devices->missing_devices; 6499 6500 if (num_devices == 1) { 6501 stripped |= BTRFS_BLOCK_GROUP_DUP; 6502 stripped = flags & ~stripped; 6503 6504 /* turn raid0 into single device chunks */ 6505 if (flags & BTRFS_BLOCK_GROUP_RAID0) 6506 return stripped; 6507 6508 /* turn mirroring into duplication */ 6509 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 6510 BTRFS_BLOCK_GROUP_RAID10)) 6511 return stripped | BTRFS_BLOCK_GROUP_DUP; 6512 return flags; 6513 } else { 6514 /* they already had raid on here, just return */ 6515 if (flags & stripped) 6516 return flags; 6517 6518 stripped |= BTRFS_BLOCK_GROUP_DUP; 6519 stripped = flags & ~stripped; 6520 6521 /* switch duplicated blocks with raid1 */ 6522 if (flags & BTRFS_BLOCK_GROUP_DUP) 6523 return stripped | BTRFS_BLOCK_GROUP_RAID1; 6524 6525 /* turn single device chunks into raid0 */ 6526 return stripped | BTRFS_BLOCK_GROUP_RAID0; 6527 } 6528 return flags; 6529 } 6530 6531 static int set_block_group_ro(struct btrfs_block_group_cache *cache) 6532 { 6533 struct btrfs_space_info *sinfo = cache->space_info; 6534 u64 num_bytes; 6535 int ret = -ENOSPC; 6536 6537 if (cache->ro) 6538 return 0; 6539 6540 spin_lock(&sinfo->lock); 6541 spin_lock(&cache->lock); 6542 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6543 cache->bytes_super - btrfs_block_group_used(&cache->item); 6544 6545 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + 6546 sinfo->bytes_may_use + sinfo->bytes_readonly + 6547 cache->reserved_pinned + num_bytes <= sinfo->total_bytes) { 6548 sinfo->bytes_readonly += num_bytes; 6549 sinfo->bytes_reserved += cache->reserved_pinned; 6550 cache->reserved_pinned = 0; 6551 cache->ro = 1; 6552 ret = 0; 6553 } 6554 6555 spin_unlock(&cache->lock); 6556 spin_unlock(&sinfo->lock); 6557 return ret; 6558 } 6559 6560 int btrfs_set_block_group_ro(struct btrfs_root *root, 6561 struct btrfs_block_group_cache *cache) 6562 6563 { 6564 struct btrfs_trans_handle *trans; 6565 u64 alloc_flags; 6566 int ret; 6567 6568 BUG_ON(cache->ro); 6569 6570 trans = btrfs_join_transaction(root); 6571 BUG_ON(IS_ERR(trans)); 6572 6573 alloc_flags = update_block_group_flags(root, cache->flags); 6574 if (alloc_flags != cache->flags) 6575 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6576 CHUNK_ALLOC_FORCE); 6577 6578 ret = set_block_group_ro(cache); 6579 if (!ret) 6580 goto out; 6581 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 6582 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6583 CHUNK_ALLOC_FORCE); 6584 if (ret < 0) 6585 goto out; 6586 ret = set_block_group_ro(cache); 6587 out: 6588 btrfs_end_transaction(trans, root); 6589 return ret; 6590 } 6591 6592 int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, 6593 struct btrfs_root *root, u64 type) 6594 { 6595 u64 alloc_flags = get_alloc_profile(root, type); 6596 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6597 CHUNK_ALLOC_FORCE); 6598 } 6599 6600 /* 6601 * helper to account the unused space of all the readonly block group in the 6602 * list. takes mirrors into account. 6603 */ 6604 static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) 6605 { 6606 struct btrfs_block_group_cache *block_group; 6607 u64 free_bytes = 0; 6608 int factor; 6609 6610 list_for_each_entry(block_group, groups_list, list) { 6611 spin_lock(&block_group->lock); 6612 6613 if (!block_group->ro) { 6614 spin_unlock(&block_group->lock); 6615 continue; 6616 } 6617 6618 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | 6619 BTRFS_BLOCK_GROUP_RAID10 | 6620 BTRFS_BLOCK_GROUP_DUP)) 6621 factor = 2; 6622 else 6623 factor = 1; 6624 6625 free_bytes += (block_group->key.offset - 6626 btrfs_block_group_used(&block_group->item)) * 6627 factor; 6628 6629 spin_unlock(&block_group->lock); 6630 } 6631 6632 return free_bytes; 6633 } 6634 6635 /* 6636 * helper to account the unused space of all the readonly block group in the 6637 * space_info. takes mirrors into account. 6638 */ 6639 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) 6640 { 6641 int i; 6642 u64 free_bytes = 0; 6643 6644 spin_lock(&sinfo->lock); 6645 6646 for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) 6647 if (!list_empty(&sinfo->block_groups[i])) 6648 free_bytes += __btrfs_get_ro_block_group_free_space( 6649 &sinfo->block_groups[i]); 6650 6651 spin_unlock(&sinfo->lock); 6652 6653 return free_bytes; 6654 } 6655 6656 int btrfs_set_block_group_rw(struct btrfs_root *root, 6657 struct btrfs_block_group_cache *cache) 6658 { 6659 struct btrfs_space_info *sinfo = cache->space_info; 6660 u64 num_bytes; 6661 6662 BUG_ON(!cache->ro); 6663 6664 spin_lock(&sinfo->lock); 6665 spin_lock(&cache->lock); 6666 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6667 cache->bytes_super - btrfs_block_group_used(&cache->item); 6668 sinfo->bytes_readonly -= num_bytes; 6669 cache->ro = 0; 6670 spin_unlock(&cache->lock); 6671 spin_unlock(&sinfo->lock); 6672 return 0; 6673 } 6674 6675 /* 6676 * checks to see if its even possible to relocate this block group. 6677 * 6678 * @return - -1 if it's not a good idea to relocate this block group, 0 if its 6679 * ok to go ahead and try. 6680 */ 6681 int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) 6682 { 6683 struct btrfs_block_group_cache *block_group; 6684 struct btrfs_space_info *space_info; 6685 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 6686 struct btrfs_device *device; 6687 int full = 0; 6688 int ret = 0; 6689 6690 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 6691 6692 /* odd, couldn't find the block group, leave it alone */ 6693 if (!block_group) 6694 return -1; 6695 6696 /* no bytes used, we're good */ 6697 if (!btrfs_block_group_used(&block_group->item)) 6698 goto out; 6699 6700 space_info = block_group->space_info; 6701 spin_lock(&space_info->lock); 6702 6703 full = space_info->full; 6704 6705 /* 6706 * if this is the last block group we have in this space, we can't 6707 * relocate it unless we're able to allocate a new chunk below. 6708 * 6709 * Otherwise, we need to make sure we have room in the space to handle 6710 * all of the extents from this block group. If we can, we're good 6711 */ 6712 if ((space_info->total_bytes != block_group->key.offset) && 6713 (space_info->bytes_used + space_info->bytes_reserved + 6714 space_info->bytes_pinned + space_info->bytes_readonly + 6715 btrfs_block_group_used(&block_group->item) < 6716 space_info->total_bytes)) { 6717 spin_unlock(&space_info->lock); 6718 goto out; 6719 } 6720 spin_unlock(&space_info->lock); 6721 6722 /* 6723 * ok we don't have enough space, but maybe we have free space on our 6724 * devices to allocate new chunks for relocation, so loop through our 6725 * alloc devices and guess if we have enough space. However, if we 6726 * were marked as full, then we know there aren't enough chunks, and we 6727 * can just return. 6728 */ 6729 ret = -1; 6730 if (full) 6731 goto out; 6732 6733 mutex_lock(&root->fs_info->chunk_mutex); 6734 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 6735 u64 min_free = btrfs_block_group_used(&block_group->item); 6736 u64 dev_offset; 6737 6738 /* 6739 * check to make sure we can actually find a chunk with enough 6740 * space to fit our block group in. 6741 */ 6742 if (device->total_bytes > device->bytes_used + min_free) { 6743 ret = find_free_dev_extent(NULL, device, min_free, 6744 &dev_offset, NULL); 6745 if (!ret) 6746 break; 6747 ret = -1; 6748 } 6749 } 6750 mutex_unlock(&root->fs_info->chunk_mutex); 6751 out: 6752 btrfs_put_block_group(block_group); 6753 return ret; 6754 } 6755 6756 static int find_first_block_group(struct btrfs_root *root, 6757 struct btrfs_path *path, struct btrfs_key *key) 6758 { 6759 int ret = 0; 6760 struct btrfs_key found_key; 6761 struct extent_buffer *leaf; 6762 int slot; 6763 6764 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 6765 if (ret < 0) 6766 goto out; 6767 6768 while (1) { 6769 slot = path->slots[0]; 6770 leaf = path->nodes[0]; 6771 if (slot >= btrfs_header_nritems(leaf)) { 6772 ret = btrfs_next_leaf(root, path); 6773 if (ret == 0) 6774 continue; 6775 if (ret < 0) 6776 goto out; 6777 break; 6778 } 6779 btrfs_item_key_to_cpu(leaf, &found_key, slot); 6780 6781 if (found_key.objectid >= key->objectid && 6782 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 6783 ret = 0; 6784 goto out; 6785 } 6786 path->slots[0]++; 6787 } 6788 out: 6789 return ret; 6790 } 6791 6792 void btrfs_put_block_group_cache(struct btrfs_fs_info *info) 6793 { 6794 struct btrfs_block_group_cache *block_group; 6795 u64 last = 0; 6796 6797 while (1) { 6798 struct inode *inode; 6799 6800 block_group = btrfs_lookup_first_block_group(info, last); 6801 while (block_group) { 6802 spin_lock(&block_group->lock); 6803 if (block_group->iref) 6804 break; 6805 spin_unlock(&block_group->lock); 6806 block_group = next_block_group(info->tree_root, 6807 block_group); 6808 } 6809 if (!block_group) { 6810 if (last == 0) 6811 break; 6812 last = 0; 6813 continue; 6814 } 6815 6816 inode = block_group->inode; 6817 block_group->iref = 0; 6818 block_group->inode = NULL; 6819 spin_unlock(&block_group->lock); 6820 iput(inode); 6821 last = block_group->key.objectid + block_group->key.offset; 6822 btrfs_put_block_group(block_group); 6823 } 6824 } 6825 6826 int btrfs_free_block_groups(struct btrfs_fs_info *info) 6827 { 6828 struct btrfs_block_group_cache *block_group; 6829 struct btrfs_space_info *space_info; 6830 struct btrfs_caching_control *caching_ctl; 6831 struct rb_node *n; 6832 6833 down_write(&info->extent_commit_sem); 6834 while (!list_empty(&info->caching_block_groups)) { 6835 caching_ctl = list_entry(info->caching_block_groups.next, 6836 struct btrfs_caching_control, list); 6837 list_del(&caching_ctl->list); 6838 put_caching_control(caching_ctl); 6839 } 6840 up_write(&info->extent_commit_sem); 6841 6842 spin_lock(&info->block_group_cache_lock); 6843 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 6844 block_group = rb_entry(n, struct btrfs_block_group_cache, 6845 cache_node); 6846 rb_erase(&block_group->cache_node, 6847 &info->block_group_cache_tree); 6848 spin_unlock(&info->block_group_cache_lock); 6849 6850 down_write(&block_group->space_info->groups_sem); 6851 list_del(&block_group->list); 6852 up_write(&block_group->space_info->groups_sem); 6853 6854 if (block_group->cached == BTRFS_CACHE_STARTED) 6855 wait_block_group_cache_done(block_group); 6856 6857 /* 6858 * We haven't cached this block group, which means we could 6859 * possibly have excluded extents on this block group. 6860 */ 6861 if (block_group->cached == BTRFS_CACHE_NO) 6862 free_excluded_extents(info->extent_root, block_group); 6863 6864 btrfs_remove_free_space_cache(block_group); 6865 btrfs_put_block_group(block_group); 6866 6867 spin_lock(&info->block_group_cache_lock); 6868 } 6869 spin_unlock(&info->block_group_cache_lock); 6870 6871 /* now that all the block groups are freed, go through and 6872 * free all the space_info structs. This is only called during 6873 * the final stages of unmount, and so we know nobody is 6874 * using them. We call synchronize_rcu() once before we start, 6875 * just to be on the safe side. 6876 */ 6877 synchronize_rcu(); 6878 6879 release_global_block_rsv(info); 6880 6881 while(!list_empty(&info->space_info)) { 6882 space_info = list_entry(info->space_info.next, 6883 struct btrfs_space_info, 6884 list); 6885 if (space_info->bytes_pinned > 0 || 6886 space_info->bytes_reserved > 0) { 6887 WARN_ON(1); 6888 dump_space_info(space_info, 0, 0); 6889 } 6890 list_del(&space_info->list); 6891 kfree(space_info); 6892 } 6893 return 0; 6894 } 6895 6896 static void __link_block_group(struct btrfs_space_info *space_info, 6897 struct btrfs_block_group_cache *cache) 6898 { 6899 int index = get_block_group_index(cache); 6900 6901 down_write(&space_info->groups_sem); 6902 list_add_tail(&cache->list, &space_info->block_groups[index]); 6903 up_write(&space_info->groups_sem); 6904 } 6905 6906 int btrfs_read_block_groups(struct btrfs_root *root) 6907 { 6908 struct btrfs_path *path; 6909 int ret; 6910 struct btrfs_block_group_cache *cache; 6911 struct btrfs_fs_info *info = root->fs_info; 6912 struct btrfs_space_info *space_info; 6913 struct btrfs_key key; 6914 struct btrfs_key found_key; 6915 struct extent_buffer *leaf; 6916 int need_clear = 0; 6917 u64 cache_gen; 6918 6919 root = info->extent_root; 6920 key.objectid = 0; 6921 key.offset = 0; 6922 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 6923 path = btrfs_alloc_path(); 6924 if (!path) 6925 return -ENOMEM; 6926 path->reada = 1; 6927 6928 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); 6929 if (cache_gen != 0 && 6930 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) 6931 need_clear = 1; 6932 if (btrfs_test_opt(root, CLEAR_CACHE)) 6933 need_clear = 1; 6934 if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) 6935 printk(KERN_INFO "btrfs: disk space caching is enabled\n"); 6936 6937 while (1) { 6938 ret = find_first_block_group(root, path, &key); 6939 if (ret > 0) 6940 break; 6941 if (ret != 0) 6942 goto error; 6943 leaf = path->nodes[0]; 6944 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 6945 cache = kzalloc(sizeof(*cache), GFP_NOFS); 6946 if (!cache) { 6947 ret = -ENOMEM; 6948 goto error; 6949 } 6950 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 6951 GFP_NOFS); 6952 if (!cache->free_space_ctl) { 6953 kfree(cache); 6954 ret = -ENOMEM; 6955 goto error; 6956 } 6957 6958 atomic_set(&cache->count, 1); 6959 spin_lock_init(&cache->lock); 6960 cache->fs_info = info; 6961 INIT_LIST_HEAD(&cache->list); 6962 INIT_LIST_HEAD(&cache->cluster_list); 6963 6964 if (need_clear) 6965 cache->disk_cache_state = BTRFS_DC_CLEAR; 6966 6967 read_extent_buffer(leaf, &cache->item, 6968 btrfs_item_ptr_offset(leaf, path->slots[0]), 6969 sizeof(cache->item)); 6970 memcpy(&cache->key, &found_key, sizeof(found_key)); 6971 6972 key.objectid = found_key.objectid + found_key.offset; 6973 btrfs_release_path(path); 6974 cache->flags = btrfs_block_group_flags(&cache->item); 6975 cache->sectorsize = root->sectorsize; 6976 6977 btrfs_init_free_space_ctl(cache); 6978 6979 /* 6980 * We need to exclude the super stripes now so that the space 6981 * info has super bytes accounted for, otherwise we'll think 6982 * we have more space than we actually do. 6983 */ 6984 exclude_super_stripes(root, cache); 6985 6986 /* 6987 * check for two cases, either we are full, and therefore 6988 * don't need to bother with the caching work since we won't 6989 * find any space, or we are empty, and we can just add all 6990 * the space in and be done with it. This saves us _alot_ of 6991 * time, particularly in the full case. 6992 */ 6993 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 6994 cache->last_byte_to_unpin = (u64)-1; 6995 cache->cached = BTRFS_CACHE_FINISHED; 6996 free_excluded_extents(root, cache); 6997 } else if (btrfs_block_group_used(&cache->item) == 0) { 6998 cache->last_byte_to_unpin = (u64)-1; 6999 cache->cached = BTRFS_CACHE_FINISHED; 7000 add_new_free_space(cache, root->fs_info, 7001 found_key.objectid, 7002 found_key.objectid + 7003 found_key.offset); 7004 free_excluded_extents(root, cache); 7005 } 7006 7007 ret = update_space_info(info, cache->flags, found_key.offset, 7008 btrfs_block_group_used(&cache->item), 7009 &space_info); 7010 BUG_ON(ret); 7011 cache->space_info = space_info; 7012 spin_lock(&cache->space_info->lock); 7013 cache->space_info->bytes_readonly += cache->bytes_super; 7014 spin_unlock(&cache->space_info->lock); 7015 7016 __link_block_group(space_info, cache); 7017 7018 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7019 BUG_ON(ret); 7020 7021 set_avail_alloc_bits(root->fs_info, cache->flags); 7022 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7023 set_block_group_ro(cache); 7024 } 7025 7026 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { 7027 if (!(get_alloc_profile(root, space_info->flags) & 7028 (BTRFS_BLOCK_GROUP_RAID10 | 7029 BTRFS_BLOCK_GROUP_RAID1 | 7030 BTRFS_BLOCK_GROUP_DUP))) 7031 continue; 7032 /* 7033 * avoid allocating from un-mirrored block group if there are 7034 * mirrored block groups. 7035 */ 7036 list_for_each_entry(cache, &space_info->block_groups[3], list) 7037 set_block_group_ro(cache); 7038 list_for_each_entry(cache, &space_info->block_groups[4], list) 7039 set_block_group_ro(cache); 7040 } 7041 7042 init_global_block_rsv(info); 7043 ret = 0; 7044 error: 7045 btrfs_free_path(path); 7046 return ret; 7047 } 7048 7049 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 7050 struct btrfs_root *root, u64 bytes_used, 7051 u64 type, u64 chunk_objectid, u64 chunk_offset, 7052 u64 size) 7053 { 7054 int ret; 7055 struct btrfs_root *extent_root; 7056 struct btrfs_block_group_cache *cache; 7057 7058 extent_root = root->fs_info->extent_root; 7059 7060 root->fs_info->last_trans_log_full_commit = trans->transid; 7061 7062 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7063 if (!cache) 7064 return -ENOMEM; 7065 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 7066 GFP_NOFS); 7067 if (!cache->free_space_ctl) { 7068 kfree(cache); 7069 return -ENOMEM; 7070 } 7071 7072 cache->key.objectid = chunk_offset; 7073 cache->key.offset = size; 7074 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7075 cache->sectorsize = root->sectorsize; 7076 cache->fs_info = root->fs_info; 7077 7078 atomic_set(&cache->count, 1); 7079 spin_lock_init(&cache->lock); 7080 INIT_LIST_HEAD(&cache->list); 7081 INIT_LIST_HEAD(&cache->cluster_list); 7082 7083 btrfs_init_free_space_ctl(cache); 7084 7085 btrfs_set_block_group_used(&cache->item, bytes_used); 7086 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7087 cache->flags = type; 7088 btrfs_set_block_group_flags(&cache->item, type); 7089 7090 cache->last_byte_to_unpin = (u64)-1; 7091 cache->cached = BTRFS_CACHE_FINISHED; 7092 exclude_super_stripes(root, cache); 7093 7094 add_new_free_space(cache, root->fs_info, chunk_offset, 7095 chunk_offset + size); 7096 7097 free_excluded_extents(root, cache); 7098 7099 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7100 &cache->space_info); 7101 BUG_ON(ret); 7102 7103 spin_lock(&cache->space_info->lock); 7104 cache->space_info->bytes_readonly += cache->bytes_super; 7105 spin_unlock(&cache->space_info->lock); 7106 7107 __link_block_group(cache->space_info, cache); 7108 7109 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7110 BUG_ON(ret); 7111 7112 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7113 sizeof(cache->item)); 7114 BUG_ON(ret); 7115 7116 set_avail_alloc_bits(extent_root->fs_info, type); 7117 7118 return 0; 7119 } 7120 7121 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 7122 struct btrfs_root *root, u64 group_start) 7123 { 7124 struct btrfs_path *path; 7125 struct btrfs_block_group_cache *block_group; 7126 struct btrfs_free_cluster *cluster; 7127 struct btrfs_root *tree_root = root->fs_info->tree_root; 7128 struct btrfs_key key; 7129 struct inode *inode; 7130 int ret; 7131 int factor; 7132 7133 root = root->fs_info->extent_root; 7134 7135 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 7136 BUG_ON(!block_group); 7137 BUG_ON(!block_group->ro); 7138 7139 /* 7140 * Free the reserved super bytes from this block group before 7141 * remove it. 7142 */ 7143 free_excluded_extents(root, block_group); 7144 7145 memcpy(&key, &block_group->key, sizeof(key)); 7146 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | 7147 BTRFS_BLOCK_GROUP_RAID1 | 7148 BTRFS_BLOCK_GROUP_RAID10)) 7149 factor = 2; 7150 else 7151 factor = 1; 7152 7153 /* make sure this block group isn't part of an allocation cluster */ 7154 cluster = &root->fs_info->data_alloc_cluster; 7155 spin_lock(&cluster->refill_lock); 7156 btrfs_return_cluster_to_free_space(block_group, cluster); 7157 spin_unlock(&cluster->refill_lock); 7158 7159 /* 7160 * make sure this block group isn't part of a metadata 7161 * allocation cluster 7162 */ 7163 cluster = &root->fs_info->meta_alloc_cluster; 7164 spin_lock(&cluster->refill_lock); 7165 btrfs_return_cluster_to_free_space(block_group, cluster); 7166 spin_unlock(&cluster->refill_lock); 7167 7168 path = btrfs_alloc_path(); 7169 BUG_ON(!path); 7170 7171 inode = lookup_free_space_inode(root, block_group, path); 7172 if (!IS_ERR(inode)) { 7173 btrfs_orphan_add(trans, inode); 7174 clear_nlink(inode); 7175 /* One for the block groups ref */ 7176 spin_lock(&block_group->lock); 7177 if (block_group->iref) { 7178 block_group->iref = 0; 7179 block_group->inode = NULL; 7180 spin_unlock(&block_group->lock); 7181 iput(inode); 7182 } else { 7183 spin_unlock(&block_group->lock); 7184 } 7185 /* One for our lookup ref */ 7186 iput(inode); 7187 } 7188 7189 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 7190 key.offset = block_group->key.objectid; 7191 key.type = 0; 7192 7193 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); 7194 if (ret < 0) 7195 goto out; 7196 if (ret > 0) 7197 btrfs_release_path(path); 7198 if (ret == 0) { 7199 ret = btrfs_del_item(trans, tree_root, path); 7200 if (ret) 7201 goto out; 7202 btrfs_release_path(path); 7203 } 7204 7205 spin_lock(&root->fs_info->block_group_cache_lock); 7206 rb_erase(&block_group->cache_node, 7207 &root->fs_info->block_group_cache_tree); 7208 spin_unlock(&root->fs_info->block_group_cache_lock); 7209 7210 down_write(&block_group->space_info->groups_sem); 7211 /* 7212 * we must use list_del_init so people can check to see if they 7213 * are still on the list after taking the semaphore 7214 */ 7215 list_del_init(&block_group->list); 7216 up_write(&block_group->space_info->groups_sem); 7217 7218 if (block_group->cached == BTRFS_CACHE_STARTED) 7219 wait_block_group_cache_done(block_group); 7220 7221 btrfs_remove_free_space_cache(block_group); 7222 7223 spin_lock(&block_group->space_info->lock); 7224 block_group->space_info->total_bytes -= block_group->key.offset; 7225 block_group->space_info->bytes_readonly -= block_group->key.offset; 7226 block_group->space_info->disk_total -= block_group->key.offset * factor; 7227 spin_unlock(&block_group->space_info->lock); 7228 7229 memcpy(&key, &block_group->key, sizeof(key)); 7230 7231 btrfs_clear_space_info_full(root->fs_info); 7232 7233 btrfs_put_block_group(block_group); 7234 btrfs_put_block_group(block_group); 7235 7236 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 7237 if (ret > 0) 7238 ret = -EIO; 7239 if (ret < 0) 7240 goto out; 7241 7242 ret = btrfs_del_item(trans, root, path); 7243 out: 7244 btrfs_free_path(path); 7245 return ret; 7246 } 7247 7248 int btrfs_init_space_info(struct btrfs_fs_info *fs_info) 7249 { 7250 struct btrfs_space_info *space_info; 7251 struct btrfs_super_block *disk_super; 7252 u64 features; 7253 u64 flags; 7254 int mixed = 0; 7255 int ret; 7256 7257 disk_super = &fs_info->super_copy; 7258 if (!btrfs_super_root(disk_super)) 7259 return 1; 7260 7261 features = btrfs_super_incompat_flags(disk_super); 7262 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 7263 mixed = 1; 7264 7265 flags = BTRFS_BLOCK_GROUP_SYSTEM; 7266 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7267 if (ret) 7268 goto out; 7269 7270 if (mixed) { 7271 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; 7272 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7273 } else { 7274 flags = BTRFS_BLOCK_GROUP_METADATA; 7275 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7276 if (ret) 7277 goto out; 7278 7279 flags = BTRFS_BLOCK_GROUP_DATA; 7280 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7281 } 7282 out: 7283 return ret; 7284 } 7285 7286 int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 7287 { 7288 return unpin_extent_range(root, start, end); 7289 } 7290 7291 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 7292 u64 num_bytes, u64 *actual_bytes) 7293 { 7294 return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); 7295 } 7296 7297 int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) 7298 { 7299 struct btrfs_fs_info *fs_info = root->fs_info; 7300 struct btrfs_block_group_cache *cache = NULL; 7301 u64 group_trimmed; 7302 u64 start; 7303 u64 end; 7304 u64 trimmed = 0; 7305 int ret = 0; 7306 7307 cache = btrfs_lookup_block_group(fs_info, range->start); 7308 7309 while (cache) { 7310 if (cache->key.objectid >= (range->start + range->len)) { 7311 btrfs_put_block_group(cache); 7312 break; 7313 } 7314 7315 start = max(range->start, cache->key.objectid); 7316 end = min(range->start + range->len, 7317 cache->key.objectid + cache->key.offset); 7318 7319 if (end - start >= range->minlen) { 7320 if (!block_group_cache_done(cache)) { 7321 ret = cache_block_group(cache, NULL, root, 0); 7322 if (!ret) 7323 wait_block_group_cache_done(cache); 7324 } 7325 ret = btrfs_trim_block_group(cache, 7326 &group_trimmed, 7327 start, 7328 end, 7329 range->minlen); 7330 7331 trimmed += group_trimmed; 7332 if (ret) { 7333 btrfs_put_block_group(cache); 7334 break; 7335 } 7336 } 7337 7338 cache = next_block_group(fs_info->tree_root, cache); 7339 } 7340 7341 range->len = trimmed; 7342 return ret; 7343 } 7344