1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include <linux/rcupdate.h> 24 #include <linux/kthread.h> 25 #include "compat.h" 26 #include "hash.h" 27 #include "ctree.h" 28 #include "disk-io.h" 29 #include "print-tree.h" 30 #include "transaction.h" 31 #include "volumes.h" 32 #include "locking.h" 33 #include "free-space-cache.h" 34 35 static int update_block_group(struct btrfs_trans_handle *trans, 36 struct btrfs_root *root, 37 u64 bytenr, u64 num_bytes, int alloc, 38 int mark_free); 39 static int update_reserved_extents(struct btrfs_block_group_cache *cache, 40 u64 num_bytes, int reserve); 41 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 42 struct btrfs_root *root, 43 u64 bytenr, u64 num_bytes, u64 parent, 44 u64 root_objectid, u64 owner_objectid, 45 u64 owner_offset, int refs_to_drop, 46 struct btrfs_delayed_extent_op *extra_op); 47 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 48 struct extent_buffer *leaf, 49 struct btrfs_extent_item *ei); 50 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 51 struct btrfs_root *root, 52 u64 parent, u64 root_objectid, 53 u64 flags, u64 owner, u64 offset, 54 struct btrfs_key *ins, int ref_mod); 55 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 56 struct btrfs_root *root, 57 u64 parent, u64 root_objectid, 58 u64 flags, struct btrfs_disk_key *key, 59 int level, struct btrfs_key *ins); 60 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 61 struct btrfs_root *extent_root, u64 alloc_bytes, 62 u64 flags, int force); 63 static int pin_down_bytes(struct btrfs_trans_handle *trans, 64 struct btrfs_root *root, 65 struct btrfs_path *path, 66 u64 bytenr, u64 num_bytes, 67 int is_data, int reserved, 68 struct extent_buffer **must_clean); 69 static int find_next_key(struct btrfs_path *path, int level, 70 struct btrfs_key *key); 71 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 72 int dump_block_groups); 73 74 static noinline int 75 block_group_cache_done(struct btrfs_block_group_cache *cache) 76 { 77 smp_mb(); 78 return cache->cached == BTRFS_CACHE_FINISHED; 79 } 80 81 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 82 { 83 return (cache->flags & bits) == bits; 84 } 85 86 /* 87 * this adds the block group to the fs_info rb tree for the block group 88 * cache 89 */ 90 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 91 struct btrfs_block_group_cache *block_group) 92 { 93 struct rb_node **p; 94 struct rb_node *parent = NULL; 95 struct btrfs_block_group_cache *cache; 96 97 spin_lock(&info->block_group_cache_lock); 98 p = &info->block_group_cache_tree.rb_node; 99 100 while (*p) { 101 parent = *p; 102 cache = rb_entry(parent, struct btrfs_block_group_cache, 103 cache_node); 104 if (block_group->key.objectid < cache->key.objectid) { 105 p = &(*p)->rb_left; 106 } else if (block_group->key.objectid > cache->key.objectid) { 107 p = &(*p)->rb_right; 108 } else { 109 spin_unlock(&info->block_group_cache_lock); 110 return -EEXIST; 111 } 112 } 113 114 rb_link_node(&block_group->cache_node, parent, p); 115 rb_insert_color(&block_group->cache_node, 116 &info->block_group_cache_tree); 117 spin_unlock(&info->block_group_cache_lock); 118 119 return 0; 120 } 121 122 /* 123 * This will return the block group at or after bytenr if contains is 0, else 124 * it will return the block group that contains the bytenr 125 */ 126 static struct btrfs_block_group_cache * 127 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 128 int contains) 129 { 130 struct btrfs_block_group_cache *cache, *ret = NULL; 131 struct rb_node *n; 132 u64 end, start; 133 134 spin_lock(&info->block_group_cache_lock); 135 n = info->block_group_cache_tree.rb_node; 136 137 while (n) { 138 cache = rb_entry(n, struct btrfs_block_group_cache, 139 cache_node); 140 end = cache->key.objectid + cache->key.offset - 1; 141 start = cache->key.objectid; 142 143 if (bytenr < start) { 144 if (!contains && (!ret || start < ret->key.objectid)) 145 ret = cache; 146 n = n->rb_left; 147 } else if (bytenr > start) { 148 if (contains && bytenr <= end) { 149 ret = cache; 150 break; 151 } 152 n = n->rb_right; 153 } else { 154 ret = cache; 155 break; 156 } 157 } 158 if (ret) 159 atomic_inc(&ret->count); 160 spin_unlock(&info->block_group_cache_lock); 161 162 return ret; 163 } 164 165 static int add_excluded_extent(struct btrfs_root *root, 166 u64 start, u64 num_bytes) 167 { 168 u64 end = start + num_bytes - 1; 169 set_extent_bits(&root->fs_info->freed_extents[0], 170 start, end, EXTENT_UPTODATE, GFP_NOFS); 171 set_extent_bits(&root->fs_info->freed_extents[1], 172 start, end, EXTENT_UPTODATE, GFP_NOFS); 173 return 0; 174 } 175 176 static void free_excluded_extents(struct btrfs_root *root, 177 struct btrfs_block_group_cache *cache) 178 { 179 u64 start, end; 180 181 start = cache->key.objectid; 182 end = start + cache->key.offset - 1; 183 184 clear_extent_bits(&root->fs_info->freed_extents[0], 185 start, end, EXTENT_UPTODATE, GFP_NOFS); 186 clear_extent_bits(&root->fs_info->freed_extents[1], 187 start, end, EXTENT_UPTODATE, GFP_NOFS); 188 } 189 190 static int exclude_super_stripes(struct btrfs_root *root, 191 struct btrfs_block_group_cache *cache) 192 { 193 u64 bytenr; 194 u64 *logical; 195 int stripe_len; 196 int i, nr, ret; 197 198 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 199 bytenr = btrfs_sb_offset(i); 200 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 201 cache->key.objectid, bytenr, 202 0, &logical, &nr, &stripe_len); 203 BUG_ON(ret); 204 205 while (nr--) { 206 cache->bytes_super += stripe_len; 207 ret = add_excluded_extent(root, logical[nr], 208 stripe_len); 209 BUG_ON(ret); 210 } 211 212 kfree(logical); 213 } 214 return 0; 215 } 216 217 static struct btrfs_caching_control * 218 get_caching_control(struct btrfs_block_group_cache *cache) 219 { 220 struct btrfs_caching_control *ctl; 221 222 spin_lock(&cache->lock); 223 if (cache->cached != BTRFS_CACHE_STARTED) { 224 spin_unlock(&cache->lock); 225 return NULL; 226 } 227 228 ctl = cache->caching_ctl; 229 atomic_inc(&ctl->count); 230 spin_unlock(&cache->lock); 231 return ctl; 232 } 233 234 static void put_caching_control(struct btrfs_caching_control *ctl) 235 { 236 if (atomic_dec_and_test(&ctl->count)) 237 kfree(ctl); 238 } 239 240 /* 241 * this is only called by cache_block_group, since we could have freed extents 242 * we need to check the pinned_extents for any extents that can't be used yet 243 * since their free space will be released as soon as the transaction commits. 244 */ 245 static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 246 struct btrfs_fs_info *info, u64 start, u64 end) 247 { 248 u64 extent_start, extent_end, size, total_added = 0; 249 int ret; 250 251 while (start < end) { 252 ret = find_first_extent_bit(info->pinned_extents, start, 253 &extent_start, &extent_end, 254 EXTENT_DIRTY | EXTENT_UPTODATE); 255 if (ret) 256 break; 257 258 if (extent_start == start) { 259 start = extent_end + 1; 260 } else if (extent_start > start && extent_start < end) { 261 size = extent_start - start; 262 total_added += size; 263 ret = btrfs_add_free_space(block_group, start, 264 size); 265 BUG_ON(ret); 266 start = extent_end + 1; 267 } else { 268 break; 269 } 270 } 271 272 if (start < end) { 273 size = end - start; 274 total_added += size; 275 ret = btrfs_add_free_space(block_group, start, size); 276 BUG_ON(ret); 277 } 278 279 return total_added; 280 } 281 282 static int caching_kthread(void *data) 283 { 284 struct btrfs_block_group_cache *block_group = data; 285 struct btrfs_fs_info *fs_info = block_group->fs_info; 286 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; 287 struct btrfs_root *extent_root = fs_info->extent_root; 288 struct btrfs_path *path; 289 struct extent_buffer *leaf; 290 struct btrfs_key key; 291 u64 total_found = 0; 292 u64 last = 0; 293 u32 nritems; 294 int ret = 0; 295 296 path = btrfs_alloc_path(); 297 if (!path) 298 return -ENOMEM; 299 300 exclude_super_stripes(extent_root, block_group); 301 spin_lock(&block_group->space_info->lock); 302 block_group->space_info->bytes_super += block_group->bytes_super; 303 spin_unlock(&block_group->space_info->lock); 304 305 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 306 307 /* 308 * We don't want to deadlock with somebody trying to allocate a new 309 * extent for the extent root while also trying to search the extent 310 * root to add free space. So we skip locking and search the commit 311 * root, since its read-only 312 */ 313 path->skip_locking = 1; 314 path->search_commit_root = 1; 315 path->reada = 2; 316 317 key.objectid = last; 318 key.offset = 0; 319 key.type = BTRFS_EXTENT_ITEM_KEY; 320 again: 321 mutex_lock(&caching_ctl->mutex); 322 /* need to make sure the commit_root doesn't disappear */ 323 down_read(&fs_info->extent_commit_sem); 324 325 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 326 if (ret < 0) 327 goto err; 328 329 leaf = path->nodes[0]; 330 nritems = btrfs_header_nritems(leaf); 331 332 while (1) { 333 smp_mb(); 334 if (fs_info->closing > 1) { 335 last = (u64)-1; 336 break; 337 } 338 339 if (path->slots[0] < nritems) { 340 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 341 } else { 342 ret = find_next_key(path, 0, &key); 343 if (ret) 344 break; 345 346 caching_ctl->progress = last; 347 btrfs_release_path(extent_root, path); 348 up_read(&fs_info->extent_commit_sem); 349 mutex_unlock(&caching_ctl->mutex); 350 if (btrfs_transaction_in_commit(fs_info)) 351 schedule_timeout(1); 352 else 353 cond_resched(); 354 goto again; 355 } 356 357 if (key.objectid < block_group->key.objectid) { 358 path->slots[0]++; 359 continue; 360 } 361 362 if (key.objectid >= block_group->key.objectid + 363 block_group->key.offset) 364 break; 365 366 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 367 total_found += add_new_free_space(block_group, 368 fs_info, last, 369 key.objectid); 370 last = key.objectid + key.offset; 371 372 if (total_found > (1024 * 1024 * 2)) { 373 total_found = 0; 374 wake_up(&caching_ctl->wait); 375 } 376 } 377 path->slots[0]++; 378 } 379 ret = 0; 380 381 total_found += add_new_free_space(block_group, fs_info, last, 382 block_group->key.objectid + 383 block_group->key.offset); 384 caching_ctl->progress = (u64)-1; 385 386 spin_lock(&block_group->lock); 387 block_group->caching_ctl = NULL; 388 block_group->cached = BTRFS_CACHE_FINISHED; 389 spin_unlock(&block_group->lock); 390 391 err: 392 btrfs_free_path(path); 393 up_read(&fs_info->extent_commit_sem); 394 395 free_excluded_extents(extent_root, block_group); 396 397 mutex_unlock(&caching_ctl->mutex); 398 wake_up(&caching_ctl->wait); 399 400 put_caching_control(caching_ctl); 401 atomic_dec(&block_group->space_info->caching_threads); 402 return 0; 403 } 404 405 static int cache_block_group(struct btrfs_block_group_cache *cache) 406 { 407 struct btrfs_fs_info *fs_info = cache->fs_info; 408 struct btrfs_caching_control *caching_ctl; 409 struct task_struct *tsk; 410 int ret = 0; 411 412 smp_mb(); 413 if (cache->cached != BTRFS_CACHE_NO) 414 return 0; 415 416 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); 417 BUG_ON(!caching_ctl); 418 419 INIT_LIST_HEAD(&caching_ctl->list); 420 mutex_init(&caching_ctl->mutex); 421 init_waitqueue_head(&caching_ctl->wait); 422 caching_ctl->block_group = cache; 423 caching_ctl->progress = cache->key.objectid; 424 /* one for caching kthread, one for caching block group list */ 425 atomic_set(&caching_ctl->count, 2); 426 427 spin_lock(&cache->lock); 428 if (cache->cached != BTRFS_CACHE_NO) { 429 spin_unlock(&cache->lock); 430 kfree(caching_ctl); 431 return 0; 432 } 433 cache->caching_ctl = caching_ctl; 434 cache->cached = BTRFS_CACHE_STARTED; 435 spin_unlock(&cache->lock); 436 437 down_write(&fs_info->extent_commit_sem); 438 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 439 up_write(&fs_info->extent_commit_sem); 440 441 atomic_inc(&cache->space_info->caching_threads); 442 443 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 444 cache->key.objectid); 445 if (IS_ERR(tsk)) { 446 ret = PTR_ERR(tsk); 447 printk(KERN_ERR "error running thread %d\n", ret); 448 BUG(); 449 } 450 451 return ret; 452 } 453 454 /* 455 * return the block group that starts at or after bytenr 456 */ 457 static struct btrfs_block_group_cache * 458 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 459 { 460 struct btrfs_block_group_cache *cache; 461 462 cache = block_group_cache_tree_search(info, bytenr, 0); 463 464 return cache; 465 } 466 467 /* 468 * return the block group that contains the given bytenr 469 */ 470 struct btrfs_block_group_cache *btrfs_lookup_block_group( 471 struct btrfs_fs_info *info, 472 u64 bytenr) 473 { 474 struct btrfs_block_group_cache *cache; 475 476 cache = block_group_cache_tree_search(info, bytenr, 1); 477 478 return cache; 479 } 480 481 void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 482 { 483 if (atomic_dec_and_test(&cache->count)) 484 kfree(cache); 485 } 486 487 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 488 u64 flags) 489 { 490 struct list_head *head = &info->space_info; 491 struct btrfs_space_info *found; 492 493 rcu_read_lock(); 494 list_for_each_entry_rcu(found, head, list) { 495 if (found->flags == flags) { 496 rcu_read_unlock(); 497 return found; 498 } 499 } 500 rcu_read_unlock(); 501 return NULL; 502 } 503 504 /* 505 * after adding space to the filesystem, we need to clear the full flags 506 * on all the space infos. 507 */ 508 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 509 { 510 struct list_head *head = &info->space_info; 511 struct btrfs_space_info *found; 512 513 rcu_read_lock(); 514 list_for_each_entry_rcu(found, head, list) 515 found->full = 0; 516 rcu_read_unlock(); 517 } 518 519 static u64 div_factor(u64 num, int factor) 520 { 521 if (factor == 10) 522 return num; 523 num *= factor; 524 do_div(num, 10); 525 return num; 526 } 527 528 u64 btrfs_find_block_group(struct btrfs_root *root, 529 u64 search_start, u64 search_hint, int owner) 530 { 531 struct btrfs_block_group_cache *cache; 532 u64 used; 533 u64 last = max(search_hint, search_start); 534 u64 group_start = 0; 535 int full_search = 0; 536 int factor = 9; 537 int wrapped = 0; 538 again: 539 while (1) { 540 cache = btrfs_lookup_first_block_group(root->fs_info, last); 541 if (!cache) 542 break; 543 544 spin_lock(&cache->lock); 545 last = cache->key.objectid + cache->key.offset; 546 used = btrfs_block_group_used(&cache->item); 547 548 if ((full_search || !cache->ro) && 549 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 550 if (used + cache->pinned + cache->reserved < 551 div_factor(cache->key.offset, factor)) { 552 group_start = cache->key.objectid; 553 spin_unlock(&cache->lock); 554 btrfs_put_block_group(cache); 555 goto found; 556 } 557 } 558 spin_unlock(&cache->lock); 559 btrfs_put_block_group(cache); 560 cond_resched(); 561 } 562 if (!wrapped) { 563 last = search_start; 564 wrapped = 1; 565 goto again; 566 } 567 if (!full_search && factor < 10) { 568 last = search_start; 569 full_search = 1; 570 factor = 10; 571 goto again; 572 } 573 found: 574 return group_start; 575 } 576 577 /* simple helper to search for an existing extent at a given offset */ 578 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 579 { 580 int ret; 581 struct btrfs_key key; 582 struct btrfs_path *path; 583 584 path = btrfs_alloc_path(); 585 BUG_ON(!path); 586 key.objectid = start; 587 key.offset = len; 588 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 589 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 590 0, 0); 591 btrfs_free_path(path); 592 return ret; 593 } 594 595 /* 596 * Back reference rules. Back refs have three main goals: 597 * 598 * 1) differentiate between all holders of references to an extent so that 599 * when a reference is dropped we can make sure it was a valid reference 600 * before freeing the extent. 601 * 602 * 2) Provide enough information to quickly find the holders of an extent 603 * if we notice a given block is corrupted or bad. 604 * 605 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 606 * maintenance. This is actually the same as #2, but with a slightly 607 * different use case. 608 * 609 * There are two kinds of back refs. The implicit back refs is optimized 610 * for pointers in non-shared tree blocks. For a given pointer in a block, 611 * back refs of this kind provide information about the block's owner tree 612 * and the pointer's key. These information allow us to find the block by 613 * b-tree searching. The full back refs is for pointers in tree blocks not 614 * referenced by their owner trees. The location of tree block is recorded 615 * in the back refs. Actually the full back refs is generic, and can be 616 * used in all cases the implicit back refs is used. The major shortcoming 617 * of the full back refs is its overhead. Every time a tree block gets 618 * COWed, we have to update back refs entry for all pointers in it. 619 * 620 * For a newly allocated tree block, we use implicit back refs for 621 * pointers in it. This means most tree related operations only involve 622 * implicit back refs. For a tree block created in old transaction, the 623 * only way to drop a reference to it is COW it. So we can detect the 624 * event that tree block loses its owner tree's reference and do the 625 * back refs conversion. 626 * 627 * When a tree block is COW'd through a tree, there are four cases: 628 * 629 * The reference count of the block is one and the tree is the block's 630 * owner tree. Nothing to do in this case. 631 * 632 * The reference count of the block is one and the tree is not the 633 * block's owner tree. In this case, full back refs is used for pointers 634 * in the block. Remove these full back refs, add implicit back refs for 635 * every pointers in the new block. 636 * 637 * The reference count of the block is greater than one and the tree is 638 * the block's owner tree. In this case, implicit back refs is used for 639 * pointers in the block. Add full back refs for every pointers in the 640 * block, increase lower level extents' reference counts. The original 641 * implicit back refs are entailed to the new block. 642 * 643 * The reference count of the block is greater than one and the tree is 644 * not the block's owner tree. Add implicit back refs for every pointer in 645 * the new block, increase lower level extents' reference count. 646 * 647 * Back Reference Key composing: 648 * 649 * The key objectid corresponds to the first byte in the extent, 650 * The key type is used to differentiate between types of back refs. 651 * There are different meanings of the key offset for different types 652 * of back refs. 653 * 654 * File extents can be referenced by: 655 * 656 * - multiple snapshots, subvolumes, or different generations in one subvol 657 * - different files inside a single subvolume 658 * - different offsets inside a file (bookend extents in file.c) 659 * 660 * The extent ref structure for the implicit back refs has fields for: 661 * 662 * - Objectid of the subvolume root 663 * - objectid of the file holding the reference 664 * - original offset in the file 665 * - how many bookend extents 666 * 667 * The key offset for the implicit back refs is hash of the first 668 * three fields. 669 * 670 * The extent ref structure for the full back refs has field for: 671 * 672 * - number of pointers in the tree leaf 673 * 674 * The key offset for the implicit back refs is the first byte of 675 * the tree leaf 676 * 677 * When a file extent is allocated, The implicit back refs is used. 678 * the fields are filled in: 679 * 680 * (root_key.objectid, inode objectid, offset in file, 1) 681 * 682 * When a file extent is removed file truncation, we find the 683 * corresponding implicit back refs and check the following fields: 684 * 685 * (btrfs_header_owner(leaf), inode objectid, offset in file) 686 * 687 * Btree extents can be referenced by: 688 * 689 * - Different subvolumes 690 * 691 * Both the implicit back refs and the full back refs for tree blocks 692 * only consist of key. The key offset for the implicit back refs is 693 * objectid of block's owner tree. The key offset for the full back refs 694 * is the first byte of parent block. 695 * 696 * When implicit back refs is used, information about the lowest key and 697 * level of the tree block are required. These information are stored in 698 * tree block info structure. 699 */ 700 701 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 702 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 703 struct btrfs_root *root, 704 struct btrfs_path *path, 705 u64 owner, u32 extra_size) 706 { 707 struct btrfs_extent_item *item; 708 struct btrfs_extent_item_v0 *ei0; 709 struct btrfs_extent_ref_v0 *ref0; 710 struct btrfs_tree_block_info *bi; 711 struct extent_buffer *leaf; 712 struct btrfs_key key; 713 struct btrfs_key found_key; 714 u32 new_size = sizeof(*item); 715 u64 refs; 716 int ret; 717 718 leaf = path->nodes[0]; 719 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 720 721 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 722 ei0 = btrfs_item_ptr(leaf, path->slots[0], 723 struct btrfs_extent_item_v0); 724 refs = btrfs_extent_refs_v0(leaf, ei0); 725 726 if (owner == (u64)-1) { 727 while (1) { 728 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 729 ret = btrfs_next_leaf(root, path); 730 if (ret < 0) 731 return ret; 732 BUG_ON(ret > 0); 733 leaf = path->nodes[0]; 734 } 735 btrfs_item_key_to_cpu(leaf, &found_key, 736 path->slots[0]); 737 BUG_ON(key.objectid != found_key.objectid); 738 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 739 path->slots[0]++; 740 continue; 741 } 742 ref0 = btrfs_item_ptr(leaf, path->slots[0], 743 struct btrfs_extent_ref_v0); 744 owner = btrfs_ref_objectid_v0(leaf, ref0); 745 break; 746 } 747 } 748 btrfs_release_path(root, path); 749 750 if (owner < BTRFS_FIRST_FREE_OBJECTID) 751 new_size += sizeof(*bi); 752 753 new_size -= sizeof(*ei0); 754 ret = btrfs_search_slot(trans, root, &key, path, 755 new_size + extra_size, 1); 756 if (ret < 0) 757 return ret; 758 BUG_ON(ret); 759 760 ret = btrfs_extend_item(trans, root, path, new_size); 761 BUG_ON(ret); 762 763 leaf = path->nodes[0]; 764 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 765 btrfs_set_extent_refs(leaf, item, refs); 766 /* FIXME: get real generation */ 767 btrfs_set_extent_generation(leaf, item, 0); 768 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 769 btrfs_set_extent_flags(leaf, item, 770 BTRFS_EXTENT_FLAG_TREE_BLOCK | 771 BTRFS_BLOCK_FLAG_FULL_BACKREF); 772 bi = (struct btrfs_tree_block_info *)(item + 1); 773 /* FIXME: get first key of the block */ 774 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); 775 btrfs_set_tree_block_level(leaf, bi, (int)owner); 776 } else { 777 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 778 } 779 btrfs_mark_buffer_dirty(leaf); 780 return 0; 781 } 782 #endif 783 784 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 785 { 786 u32 high_crc = ~(u32)0; 787 u32 low_crc = ~(u32)0; 788 __le64 lenum; 789 790 lenum = cpu_to_le64(root_objectid); 791 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 792 lenum = cpu_to_le64(owner); 793 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 794 lenum = cpu_to_le64(offset); 795 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 796 797 return ((u64)high_crc << 31) ^ (u64)low_crc; 798 } 799 800 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 801 struct btrfs_extent_data_ref *ref) 802 { 803 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 804 btrfs_extent_data_ref_objectid(leaf, ref), 805 btrfs_extent_data_ref_offset(leaf, ref)); 806 } 807 808 static int match_extent_data_ref(struct extent_buffer *leaf, 809 struct btrfs_extent_data_ref *ref, 810 u64 root_objectid, u64 owner, u64 offset) 811 { 812 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 813 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 814 btrfs_extent_data_ref_offset(leaf, ref) != offset) 815 return 0; 816 return 1; 817 } 818 819 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 820 struct btrfs_root *root, 821 struct btrfs_path *path, 822 u64 bytenr, u64 parent, 823 u64 root_objectid, 824 u64 owner, u64 offset) 825 { 826 struct btrfs_key key; 827 struct btrfs_extent_data_ref *ref; 828 struct extent_buffer *leaf; 829 u32 nritems; 830 int ret; 831 int recow; 832 int err = -ENOENT; 833 834 key.objectid = bytenr; 835 if (parent) { 836 key.type = BTRFS_SHARED_DATA_REF_KEY; 837 key.offset = parent; 838 } else { 839 key.type = BTRFS_EXTENT_DATA_REF_KEY; 840 key.offset = hash_extent_data_ref(root_objectid, 841 owner, offset); 842 } 843 again: 844 recow = 0; 845 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 846 if (ret < 0) { 847 err = ret; 848 goto fail; 849 } 850 851 if (parent) { 852 if (!ret) 853 return 0; 854 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 855 key.type = BTRFS_EXTENT_REF_V0_KEY; 856 btrfs_release_path(root, path); 857 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 858 if (ret < 0) { 859 err = ret; 860 goto fail; 861 } 862 if (!ret) 863 return 0; 864 #endif 865 goto fail; 866 } 867 868 leaf = path->nodes[0]; 869 nritems = btrfs_header_nritems(leaf); 870 while (1) { 871 if (path->slots[0] >= nritems) { 872 ret = btrfs_next_leaf(root, path); 873 if (ret < 0) 874 err = ret; 875 if (ret) 876 goto fail; 877 878 leaf = path->nodes[0]; 879 nritems = btrfs_header_nritems(leaf); 880 recow = 1; 881 } 882 883 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 884 if (key.objectid != bytenr || 885 key.type != BTRFS_EXTENT_DATA_REF_KEY) 886 goto fail; 887 888 ref = btrfs_item_ptr(leaf, path->slots[0], 889 struct btrfs_extent_data_ref); 890 891 if (match_extent_data_ref(leaf, ref, root_objectid, 892 owner, offset)) { 893 if (recow) { 894 btrfs_release_path(root, path); 895 goto again; 896 } 897 err = 0; 898 break; 899 } 900 path->slots[0]++; 901 } 902 fail: 903 return err; 904 } 905 906 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 907 struct btrfs_root *root, 908 struct btrfs_path *path, 909 u64 bytenr, u64 parent, 910 u64 root_objectid, u64 owner, 911 u64 offset, int refs_to_add) 912 { 913 struct btrfs_key key; 914 struct extent_buffer *leaf; 915 u32 size; 916 u32 num_refs; 917 int ret; 918 919 key.objectid = bytenr; 920 if (parent) { 921 key.type = BTRFS_SHARED_DATA_REF_KEY; 922 key.offset = parent; 923 size = sizeof(struct btrfs_shared_data_ref); 924 } else { 925 key.type = BTRFS_EXTENT_DATA_REF_KEY; 926 key.offset = hash_extent_data_ref(root_objectid, 927 owner, offset); 928 size = sizeof(struct btrfs_extent_data_ref); 929 } 930 931 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 932 if (ret && ret != -EEXIST) 933 goto fail; 934 935 leaf = path->nodes[0]; 936 if (parent) { 937 struct btrfs_shared_data_ref *ref; 938 ref = btrfs_item_ptr(leaf, path->slots[0], 939 struct btrfs_shared_data_ref); 940 if (ret == 0) { 941 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 942 } else { 943 num_refs = btrfs_shared_data_ref_count(leaf, ref); 944 num_refs += refs_to_add; 945 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 946 } 947 } else { 948 struct btrfs_extent_data_ref *ref; 949 while (ret == -EEXIST) { 950 ref = btrfs_item_ptr(leaf, path->slots[0], 951 struct btrfs_extent_data_ref); 952 if (match_extent_data_ref(leaf, ref, root_objectid, 953 owner, offset)) 954 break; 955 btrfs_release_path(root, path); 956 key.offset++; 957 ret = btrfs_insert_empty_item(trans, root, path, &key, 958 size); 959 if (ret && ret != -EEXIST) 960 goto fail; 961 962 leaf = path->nodes[0]; 963 } 964 ref = btrfs_item_ptr(leaf, path->slots[0], 965 struct btrfs_extent_data_ref); 966 if (ret == 0) { 967 btrfs_set_extent_data_ref_root(leaf, ref, 968 root_objectid); 969 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 970 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 971 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 972 } else { 973 num_refs = btrfs_extent_data_ref_count(leaf, ref); 974 num_refs += refs_to_add; 975 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 976 } 977 } 978 btrfs_mark_buffer_dirty(leaf); 979 ret = 0; 980 fail: 981 btrfs_release_path(root, path); 982 return ret; 983 } 984 985 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 986 struct btrfs_root *root, 987 struct btrfs_path *path, 988 int refs_to_drop) 989 { 990 struct btrfs_key key; 991 struct btrfs_extent_data_ref *ref1 = NULL; 992 struct btrfs_shared_data_ref *ref2 = NULL; 993 struct extent_buffer *leaf; 994 u32 num_refs = 0; 995 int ret = 0; 996 997 leaf = path->nodes[0]; 998 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 999 1000 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1001 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1002 struct btrfs_extent_data_ref); 1003 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1004 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1005 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1006 struct btrfs_shared_data_ref); 1007 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1008 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1009 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1010 struct btrfs_extent_ref_v0 *ref0; 1011 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1012 struct btrfs_extent_ref_v0); 1013 num_refs = btrfs_ref_count_v0(leaf, ref0); 1014 #endif 1015 } else { 1016 BUG(); 1017 } 1018 1019 BUG_ON(num_refs < refs_to_drop); 1020 num_refs -= refs_to_drop; 1021 1022 if (num_refs == 0) { 1023 ret = btrfs_del_item(trans, root, path); 1024 } else { 1025 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1026 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 1027 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 1028 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 1029 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1030 else { 1031 struct btrfs_extent_ref_v0 *ref0; 1032 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1033 struct btrfs_extent_ref_v0); 1034 btrfs_set_ref_count_v0(leaf, ref0, num_refs); 1035 } 1036 #endif 1037 btrfs_mark_buffer_dirty(leaf); 1038 } 1039 return ret; 1040 } 1041 1042 static noinline u32 extent_data_ref_count(struct btrfs_root *root, 1043 struct btrfs_path *path, 1044 struct btrfs_extent_inline_ref *iref) 1045 { 1046 struct btrfs_key key; 1047 struct extent_buffer *leaf; 1048 struct btrfs_extent_data_ref *ref1; 1049 struct btrfs_shared_data_ref *ref2; 1050 u32 num_refs = 0; 1051 1052 leaf = path->nodes[0]; 1053 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1054 if (iref) { 1055 if (btrfs_extent_inline_ref_type(leaf, iref) == 1056 BTRFS_EXTENT_DATA_REF_KEY) { 1057 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 1058 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1059 } else { 1060 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 1061 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1062 } 1063 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1064 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1065 struct btrfs_extent_data_ref); 1066 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1067 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1068 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1069 struct btrfs_shared_data_ref); 1070 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1071 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1072 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1073 struct btrfs_extent_ref_v0 *ref0; 1074 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1075 struct btrfs_extent_ref_v0); 1076 num_refs = btrfs_ref_count_v0(leaf, ref0); 1077 #endif 1078 } else { 1079 WARN_ON(1); 1080 } 1081 return num_refs; 1082 } 1083 1084 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 1085 struct btrfs_root *root, 1086 struct btrfs_path *path, 1087 u64 bytenr, u64 parent, 1088 u64 root_objectid) 1089 { 1090 struct btrfs_key key; 1091 int ret; 1092 1093 key.objectid = bytenr; 1094 if (parent) { 1095 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1096 key.offset = parent; 1097 } else { 1098 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1099 key.offset = root_objectid; 1100 } 1101 1102 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1103 if (ret > 0) 1104 ret = -ENOENT; 1105 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1106 if (ret == -ENOENT && parent) { 1107 btrfs_release_path(root, path); 1108 key.type = BTRFS_EXTENT_REF_V0_KEY; 1109 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1110 if (ret > 0) 1111 ret = -ENOENT; 1112 } 1113 #endif 1114 return ret; 1115 } 1116 1117 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 1118 struct btrfs_root *root, 1119 struct btrfs_path *path, 1120 u64 bytenr, u64 parent, 1121 u64 root_objectid) 1122 { 1123 struct btrfs_key key; 1124 int ret; 1125 1126 key.objectid = bytenr; 1127 if (parent) { 1128 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1129 key.offset = parent; 1130 } else { 1131 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1132 key.offset = root_objectid; 1133 } 1134 1135 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1136 btrfs_release_path(root, path); 1137 return ret; 1138 } 1139 1140 static inline int extent_ref_type(u64 parent, u64 owner) 1141 { 1142 int type; 1143 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1144 if (parent > 0) 1145 type = BTRFS_SHARED_BLOCK_REF_KEY; 1146 else 1147 type = BTRFS_TREE_BLOCK_REF_KEY; 1148 } else { 1149 if (parent > 0) 1150 type = BTRFS_SHARED_DATA_REF_KEY; 1151 else 1152 type = BTRFS_EXTENT_DATA_REF_KEY; 1153 } 1154 return type; 1155 } 1156 1157 static int find_next_key(struct btrfs_path *path, int level, 1158 struct btrfs_key *key) 1159 1160 { 1161 for (; level < BTRFS_MAX_LEVEL; level++) { 1162 if (!path->nodes[level]) 1163 break; 1164 if (path->slots[level] + 1 >= 1165 btrfs_header_nritems(path->nodes[level])) 1166 continue; 1167 if (level == 0) 1168 btrfs_item_key_to_cpu(path->nodes[level], key, 1169 path->slots[level] + 1); 1170 else 1171 btrfs_node_key_to_cpu(path->nodes[level], key, 1172 path->slots[level] + 1); 1173 return 0; 1174 } 1175 return 1; 1176 } 1177 1178 /* 1179 * look for inline back ref. if back ref is found, *ref_ret is set 1180 * to the address of inline back ref, and 0 is returned. 1181 * 1182 * if back ref isn't found, *ref_ret is set to the address where it 1183 * should be inserted, and -ENOENT is returned. 1184 * 1185 * if insert is true and there are too many inline back refs, the path 1186 * points to the extent item, and -EAGAIN is returned. 1187 * 1188 * NOTE: inline back refs are ordered in the same way that back ref 1189 * items in the tree are ordered. 1190 */ 1191 static noinline_for_stack 1192 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1193 struct btrfs_root *root, 1194 struct btrfs_path *path, 1195 struct btrfs_extent_inline_ref **ref_ret, 1196 u64 bytenr, u64 num_bytes, 1197 u64 parent, u64 root_objectid, 1198 u64 owner, u64 offset, int insert) 1199 { 1200 struct btrfs_key key; 1201 struct extent_buffer *leaf; 1202 struct btrfs_extent_item *ei; 1203 struct btrfs_extent_inline_ref *iref; 1204 u64 flags; 1205 u64 item_size; 1206 unsigned long ptr; 1207 unsigned long end; 1208 int extra_size; 1209 int type; 1210 int want; 1211 int ret; 1212 int err = 0; 1213 1214 key.objectid = bytenr; 1215 key.type = BTRFS_EXTENT_ITEM_KEY; 1216 key.offset = num_bytes; 1217 1218 want = extent_ref_type(parent, owner); 1219 if (insert) { 1220 extra_size = btrfs_extent_inline_ref_size(want); 1221 path->keep_locks = 1; 1222 } else 1223 extra_size = -1; 1224 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1225 if (ret < 0) { 1226 err = ret; 1227 goto out; 1228 } 1229 BUG_ON(ret); 1230 1231 leaf = path->nodes[0]; 1232 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1233 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1234 if (item_size < sizeof(*ei)) { 1235 if (!insert) { 1236 err = -ENOENT; 1237 goto out; 1238 } 1239 ret = convert_extent_item_v0(trans, root, path, owner, 1240 extra_size); 1241 if (ret < 0) { 1242 err = ret; 1243 goto out; 1244 } 1245 leaf = path->nodes[0]; 1246 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1247 } 1248 #endif 1249 BUG_ON(item_size < sizeof(*ei)); 1250 1251 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1252 flags = btrfs_extent_flags(leaf, ei); 1253 1254 ptr = (unsigned long)(ei + 1); 1255 end = (unsigned long)ei + item_size; 1256 1257 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1258 ptr += sizeof(struct btrfs_tree_block_info); 1259 BUG_ON(ptr > end); 1260 } else { 1261 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1262 } 1263 1264 err = -ENOENT; 1265 while (1) { 1266 if (ptr >= end) { 1267 WARN_ON(ptr > end); 1268 break; 1269 } 1270 iref = (struct btrfs_extent_inline_ref *)ptr; 1271 type = btrfs_extent_inline_ref_type(leaf, iref); 1272 if (want < type) 1273 break; 1274 if (want > type) { 1275 ptr += btrfs_extent_inline_ref_size(type); 1276 continue; 1277 } 1278 1279 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1280 struct btrfs_extent_data_ref *dref; 1281 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1282 if (match_extent_data_ref(leaf, dref, root_objectid, 1283 owner, offset)) { 1284 err = 0; 1285 break; 1286 } 1287 if (hash_extent_data_ref_item(leaf, dref) < 1288 hash_extent_data_ref(root_objectid, owner, offset)) 1289 break; 1290 } else { 1291 u64 ref_offset; 1292 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1293 if (parent > 0) { 1294 if (parent == ref_offset) { 1295 err = 0; 1296 break; 1297 } 1298 if (ref_offset < parent) 1299 break; 1300 } else { 1301 if (root_objectid == ref_offset) { 1302 err = 0; 1303 break; 1304 } 1305 if (ref_offset < root_objectid) 1306 break; 1307 } 1308 } 1309 ptr += btrfs_extent_inline_ref_size(type); 1310 } 1311 if (err == -ENOENT && insert) { 1312 if (item_size + extra_size >= 1313 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 1314 err = -EAGAIN; 1315 goto out; 1316 } 1317 /* 1318 * To add new inline back ref, we have to make sure 1319 * there is no corresponding back ref item. 1320 * For simplicity, we just do not add new inline back 1321 * ref if there is any kind of item for this block 1322 */ 1323 if (find_next_key(path, 0, &key) == 0 && 1324 key.objectid == bytenr && 1325 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1326 err = -EAGAIN; 1327 goto out; 1328 } 1329 } 1330 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 1331 out: 1332 if (insert) { 1333 path->keep_locks = 0; 1334 btrfs_unlock_up_safe(path, 1); 1335 } 1336 return err; 1337 } 1338 1339 /* 1340 * helper to add new inline back ref 1341 */ 1342 static noinline_for_stack 1343 int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1344 struct btrfs_root *root, 1345 struct btrfs_path *path, 1346 struct btrfs_extent_inline_ref *iref, 1347 u64 parent, u64 root_objectid, 1348 u64 owner, u64 offset, int refs_to_add, 1349 struct btrfs_delayed_extent_op *extent_op) 1350 { 1351 struct extent_buffer *leaf; 1352 struct btrfs_extent_item *ei; 1353 unsigned long ptr; 1354 unsigned long end; 1355 unsigned long item_offset; 1356 u64 refs; 1357 int size; 1358 int type; 1359 int ret; 1360 1361 leaf = path->nodes[0]; 1362 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1363 item_offset = (unsigned long)iref - (unsigned long)ei; 1364 1365 type = extent_ref_type(parent, owner); 1366 size = btrfs_extent_inline_ref_size(type); 1367 1368 ret = btrfs_extend_item(trans, root, path, size); 1369 BUG_ON(ret); 1370 1371 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1372 refs = btrfs_extent_refs(leaf, ei); 1373 refs += refs_to_add; 1374 btrfs_set_extent_refs(leaf, ei, refs); 1375 if (extent_op) 1376 __run_delayed_extent_op(extent_op, leaf, ei); 1377 1378 ptr = (unsigned long)ei + item_offset; 1379 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1380 if (ptr < end - size) 1381 memmove_extent_buffer(leaf, ptr + size, ptr, 1382 end - size - ptr); 1383 1384 iref = (struct btrfs_extent_inline_ref *)ptr; 1385 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1386 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1387 struct btrfs_extent_data_ref *dref; 1388 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1389 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1390 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1391 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1392 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1393 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1394 struct btrfs_shared_data_ref *sref; 1395 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1396 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1397 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1398 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1399 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1400 } else { 1401 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1402 } 1403 btrfs_mark_buffer_dirty(leaf); 1404 return 0; 1405 } 1406 1407 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1408 struct btrfs_root *root, 1409 struct btrfs_path *path, 1410 struct btrfs_extent_inline_ref **ref_ret, 1411 u64 bytenr, u64 num_bytes, u64 parent, 1412 u64 root_objectid, u64 owner, u64 offset) 1413 { 1414 int ret; 1415 1416 ret = lookup_inline_extent_backref(trans, root, path, ref_ret, 1417 bytenr, num_bytes, parent, 1418 root_objectid, owner, offset, 0); 1419 if (ret != -ENOENT) 1420 return ret; 1421 1422 btrfs_release_path(root, path); 1423 *ref_ret = NULL; 1424 1425 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1426 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, 1427 root_objectid); 1428 } else { 1429 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, 1430 root_objectid, owner, offset); 1431 } 1432 return ret; 1433 } 1434 1435 /* 1436 * helper to update/remove inline back ref 1437 */ 1438 static noinline_for_stack 1439 int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1440 struct btrfs_root *root, 1441 struct btrfs_path *path, 1442 struct btrfs_extent_inline_ref *iref, 1443 int refs_to_mod, 1444 struct btrfs_delayed_extent_op *extent_op) 1445 { 1446 struct extent_buffer *leaf; 1447 struct btrfs_extent_item *ei; 1448 struct btrfs_extent_data_ref *dref = NULL; 1449 struct btrfs_shared_data_ref *sref = NULL; 1450 unsigned long ptr; 1451 unsigned long end; 1452 u32 item_size; 1453 int size; 1454 int type; 1455 int ret; 1456 u64 refs; 1457 1458 leaf = path->nodes[0]; 1459 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1460 refs = btrfs_extent_refs(leaf, ei); 1461 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1462 refs += refs_to_mod; 1463 btrfs_set_extent_refs(leaf, ei, refs); 1464 if (extent_op) 1465 __run_delayed_extent_op(extent_op, leaf, ei); 1466 1467 type = btrfs_extent_inline_ref_type(leaf, iref); 1468 1469 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1470 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1471 refs = btrfs_extent_data_ref_count(leaf, dref); 1472 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1473 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1474 refs = btrfs_shared_data_ref_count(leaf, sref); 1475 } else { 1476 refs = 1; 1477 BUG_ON(refs_to_mod != -1); 1478 } 1479 1480 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1481 refs += refs_to_mod; 1482 1483 if (refs > 0) { 1484 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1485 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1486 else 1487 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1488 } else { 1489 size = btrfs_extent_inline_ref_size(type); 1490 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1491 ptr = (unsigned long)iref; 1492 end = (unsigned long)ei + item_size; 1493 if (ptr + size < end) 1494 memmove_extent_buffer(leaf, ptr, ptr + size, 1495 end - ptr - size); 1496 item_size -= size; 1497 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1498 BUG_ON(ret); 1499 } 1500 btrfs_mark_buffer_dirty(leaf); 1501 return 0; 1502 } 1503 1504 static noinline_for_stack 1505 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1506 struct btrfs_root *root, 1507 struct btrfs_path *path, 1508 u64 bytenr, u64 num_bytes, u64 parent, 1509 u64 root_objectid, u64 owner, 1510 u64 offset, int refs_to_add, 1511 struct btrfs_delayed_extent_op *extent_op) 1512 { 1513 struct btrfs_extent_inline_ref *iref; 1514 int ret; 1515 1516 ret = lookup_inline_extent_backref(trans, root, path, &iref, 1517 bytenr, num_bytes, parent, 1518 root_objectid, owner, offset, 1); 1519 if (ret == 0) { 1520 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1521 ret = update_inline_extent_backref(trans, root, path, iref, 1522 refs_to_add, extent_op); 1523 } else if (ret == -ENOENT) { 1524 ret = setup_inline_extent_backref(trans, root, path, iref, 1525 parent, root_objectid, 1526 owner, offset, refs_to_add, 1527 extent_op); 1528 } 1529 return ret; 1530 } 1531 1532 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1533 struct btrfs_root *root, 1534 struct btrfs_path *path, 1535 u64 bytenr, u64 parent, u64 root_objectid, 1536 u64 owner, u64 offset, int refs_to_add) 1537 { 1538 int ret; 1539 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1540 BUG_ON(refs_to_add != 1); 1541 ret = insert_tree_block_ref(trans, root, path, bytenr, 1542 parent, root_objectid); 1543 } else { 1544 ret = insert_extent_data_ref(trans, root, path, bytenr, 1545 parent, root_objectid, 1546 owner, offset, refs_to_add); 1547 } 1548 return ret; 1549 } 1550 1551 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1552 struct btrfs_root *root, 1553 struct btrfs_path *path, 1554 struct btrfs_extent_inline_ref *iref, 1555 int refs_to_drop, int is_data) 1556 { 1557 int ret; 1558 1559 BUG_ON(!is_data && refs_to_drop != 1); 1560 if (iref) { 1561 ret = update_inline_extent_backref(trans, root, path, iref, 1562 -refs_to_drop, NULL); 1563 } else if (is_data) { 1564 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1565 } else { 1566 ret = btrfs_del_item(trans, root, path); 1567 } 1568 return ret; 1569 } 1570 1571 #ifdef BIO_RW_DISCARD 1572 static void btrfs_issue_discard(struct block_device *bdev, 1573 u64 start, u64 len) 1574 { 1575 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 1576 DISCARD_FL_BARRIER); 1577 } 1578 #endif 1579 1580 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1581 u64 num_bytes) 1582 { 1583 #ifdef BIO_RW_DISCARD 1584 int ret; 1585 u64 map_length = num_bytes; 1586 struct btrfs_multi_bio *multi = NULL; 1587 1588 /* Tell the block device(s) that the sectors can be discarded */ 1589 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1590 bytenr, &map_length, &multi, 0); 1591 if (!ret) { 1592 struct btrfs_bio_stripe *stripe = multi->stripes; 1593 int i; 1594 1595 if (map_length > num_bytes) 1596 map_length = num_bytes; 1597 1598 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1599 btrfs_issue_discard(stripe->dev->bdev, 1600 stripe->physical, 1601 map_length); 1602 } 1603 kfree(multi); 1604 } 1605 1606 return ret; 1607 #else 1608 return 0; 1609 #endif 1610 } 1611 1612 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1613 struct btrfs_root *root, 1614 u64 bytenr, u64 num_bytes, u64 parent, 1615 u64 root_objectid, u64 owner, u64 offset) 1616 { 1617 int ret; 1618 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 1619 root_objectid == BTRFS_TREE_LOG_OBJECTID); 1620 1621 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1622 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 1623 parent, root_objectid, (int)owner, 1624 BTRFS_ADD_DELAYED_REF, NULL); 1625 } else { 1626 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 1627 parent, root_objectid, owner, offset, 1628 BTRFS_ADD_DELAYED_REF, NULL); 1629 } 1630 return ret; 1631 } 1632 1633 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1634 struct btrfs_root *root, 1635 u64 bytenr, u64 num_bytes, 1636 u64 parent, u64 root_objectid, 1637 u64 owner, u64 offset, int refs_to_add, 1638 struct btrfs_delayed_extent_op *extent_op) 1639 { 1640 struct btrfs_path *path; 1641 struct extent_buffer *leaf; 1642 struct btrfs_extent_item *item; 1643 u64 refs; 1644 int ret; 1645 int err = 0; 1646 1647 path = btrfs_alloc_path(); 1648 if (!path) 1649 return -ENOMEM; 1650 1651 path->reada = 1; 1652 path->leave_spinning = 1; 1653 /* this will setup the path even if it fails to insert the back ref */ 1654 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 1655 path, bytenr, num_bytes, parent, 1656 root_objectid, owner, offset, 1657 refs_to_add, extent_op); 1658 if (ret == 0) 1659 goto out; 1660 1661 if (ret != -EAGAIN) { 1662 err = ret; 1663 goto out; 1664 } 1665 1666 leaf = path->nodes[0]; 1667 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1668 refs = btrfs_extent_refs(leaf, item); 1669 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1670 if (extent_op) 1671 __run_delayed_extent_op(extent_op, leaf, item); 1672 1673 btrfs_mark_buffer_dirty(leaf); 1674 btrfs_release_path(root->fs_info->extent_root, path); 1675 1676 path->reada = 1; 1677 path->leave_spinning = 1; 1678 1679 /* now insert the actual backref */ 1680 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1681 path, bytenr, parent, root_objectid, 1682 owner, offset, refs_to_add); 1683 BUG_ON(ret); 1684 out: 1685 btrfs_free_path(path); 1686 return err; 1687 } 1688 1689 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1690 struct btrfs_root *root, 1691 struct btrfs_delayed_ref_node *node, 1692 struct btrfs_delayed_extent_op *extent_op, 1693 int insert_reserved) 1694 { 1695 int ret = 0; 1696 struct btrfs_delayed_data_ref *ref; 1697 struct btrfs_key ins; 1698 u64 parent = 0; 1699 u64 ref_root = 0; 1700 u64 flags = 0; 1701 1702 ins.objectid = node->bytenr; 1703 ins.offset = node->num_bytes; 1704 ins.type = BTRFS_EXTENT_ITEM_KEY; 1705 1706 ref = btrfs_delayed_node_to_data_ref(node); 1707 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1708 parent = ref->parent; 1709 else 1710 ref_root = ref->root; 1711 1712 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1713 if (extent_op) { 1714 BUG_ON(extent_op->update_key); 1715 flags |= extent_op->flags_to_set; 1716 } 1717 ret = alloc_reserved_file_extent(trans, root, 1718 parent, ref_root, flags, 1719 ref->objectid, ref->offset, 1720 &ins, node->ref_mod); 1721 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1722 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1723 node->num_bytes, parent, 1724 ref_root, ref->objectid, 1725 ref->offset, node->ref_mod, 1726 extent_op); 1727 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1728 ret = __btrfs_free_extent(trans, root, node->bytenr, 1729 node->num_bytes, parent, 1730 ref_root, ref->objectid, 1731 ref->offset, node->ref_mod, 1732 extent_op); 1733 } else { 1734 BUG(); 1735 } 1736 return ret; 1737 } 1738 1739 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1740 struct extent_buffer *leaf, 1741 struct btrfs_extent_item *ei) 1742 { 1743 u64 flags = btrfs_extent_flags(leaf, ei); 1744 if (extent_op->update_flags) { 1745 flags |= extent_op->flags_to_set; 1746 btrfs_set_extent_flags(leaf, ei, flags); 1747 } 1748 1749 if (extent_op->update_key) { 1750 struct btrfs_tree_block_info *bi; 1751 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1752 bi = (struct btrfs_tree_block_info *)(ei + 1); 1753 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1754 } 1755 } 1756 1757 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1758 struct btrfs_root *root, 1759 struct btrfs_delayed_ref_node *node, 1760 struct btrfs_delayed_extent_op *extent_op) 1761 { 1762 struct btrfs_key key; 1763 struct btrfs_path *path; 1764 struct btrfs_extent_item *ei; 1765 struct extent_buffer *leaf; 1766 u32 item_size; 1767 int ret; 1768 int err = 0; 1769 1770 path = btrfs_alloc_path(); 1771 if (!path) 1772 return -ENOMEM; 1773 1774 key.objectid = node->bytenr; 1775 key.type = BTRFS_EXTENT_ITEM_KEY; 1776 key.offset = node->num_bytes; 1777 1778 path->reada = 1; 1779 path->leave_spinning = 1; 1780 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1781 path, 0, 1); 1782 if (ret < 0) { 1783 err = ret; 1784 goto out; 1785 } 1786 if (ret > 0) { 1787 err = -EIO; 1788 goto out; 1789 } 1790 1791 leaf = path->nodes[0]; 1792 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1793 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1794 if (item_size < sizeof(*ei)) { 1795 ret = convert_extent_item_v0(trans, root->fs_info->extent_root, 1796 path, (u64)-1, 0); 1797 if (ret < 0) { 1798 err = ret; 1799 goto out; 1800 } 1801 leaf = path->nodes[0]; 1802 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1803 } 1804 #endif 1805 BUG_ON(item_size < sizeof(*ei)); 1806 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1807 __run_delayed_extent_op(extent_op, leaf, ei); 1808 1809 btrfs_mark_buffer_dirty(leaf); 1810 out: 1811 btrfs_free_path(path); 1812 return err; 1813 } 1814 1815 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 1816 struct btrfs_root *root, 1817 struct btrfs_delayed_ref_node *node, 1818 struct btrfs_delayed_extent_op *extent_op, 1819 int insert_reserved) 1820 { 1821 int ret = 0; 1822 struct btrfs_delayed_tree_ref *ref; 1823 struct btrfs_key ins; 1824 u64 parent = 0; 1825 u64 ref_root = 0; 1826 1827 ins.objectid = node->bytenr; 1828 ins.offset = node->num_bytes; 1829 ins.type = BTRFS_EXTENT_ITEM_KEY; 1830 1831 ref = btrfs_delayed_node_to_tree_ref(node); 1832 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1833 parent = ref->parent; 1834 else 1835 ref_root = ref->root; 1836 1837 BUG_ON(node->ref_mod != 1); 1838 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1839 BUG_ON(!extent_op || !extent_op->update_flags || 1840 !extent_op->update_key); 1841 ret = alloc_reserved_tree_block(trans, root, 1842 parent, ref_root, 1843 extent_op->flags_to_set, 1844 &extent_op->key, 1845 ref->level, &ins); 1846 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1847 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1848 node->num_bytes, parent, ref_root, 1849 ref->level, 0, 1, extent_op); 1850 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1851 ret = __btrfs_free_extent(trans, root, node->bytenr, 1852 node->num_bytes, parent, ref_root, 1853 ref->level, 0, 1, extent_op); 1854 } else { 1855 BUG(); 1856 } 1857 return ret; 1858 } 1859 1860 1861 /* helper function to actually process a single delayed ref entry */ 1862 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1863 struct btrfs_root *root, 1864 struct btrfs_delayed_ref_node *node, 1865 struct btrfs_delayed_extent_op *extent_op, 1866 int insert_reserved) 1867 { 1868 int ret; 1869 if (btrfs_delayed_ref_is_head(node)) { 1870 struct btrfs_delayed_ref_head *head; 1871 /* 1872 * we've hit the end of the chain and we were supposed 1873 * to insert this extent into the tree. But, it got 1874 * deleted before we ever needed to insert it, so all 1875 * we have to do is clean up the accounting 1876 */ 1877 BUG_ON(extent_op); 1878 head = btrfs_delayed_node_to_head(node); 1879 if (insert_reserved) { 1880 int mark_free = 0; 1881 struct extent_buffer *must_clean = NULL; 1882 1883 ret = pin_down_bytes(trans, root, NULL, 1884 node->bytenr, node->num_bytes, 1885 head->is_data, 1, &must_clean); 1886 if (ret > 0) 1887 mark_free = 1; 1888 1889 if (must_clean) { 1890 clean_tree_block(NULL, root, must_clean); 1891 btrfs_tree_unlock(must_clean); 1892 free_extent_buffer(must_clean); 1893 } 1894 if (head->is_data) { 1895 ret = btrfs_del_csums(trans, root, 1896 node->bytenr, 1897 node->num_bytes); 1898 BUG_ON(ret); 1899 } 1900 if (mark_free) { 1901 ret = btrfs_free_reserved_extent(root, 1902 node->bytenr, 1903 node->num_bytes); 1904 BUG_ON(ret); 1905 } 1906 } 1907 mutex_unlock(&head->mutex); 1908 return 0; 1909 } 1910 1911 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1912 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1913 ret = run_delayed_tree_ref(trans, root, node, extent_op, 1914 insert_reserved); 1915 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 1916 node->type == BTRFS_SHARED_DATA_REF_KEY) 1917 ret = run_delayed_data_ref(trans, root, node, extent_op, 1918 insert_reserved); 1919 else 1920 BUG(); 1921 return ret; 1922 } 1923 1924 static noinline struct btrfs_delayed_ref_node * 1925 select_delayed_ref(struct btrfs_delayed_ref_head *head) 1926 { 1927 struct rb_node *node; 1928 struct btrfs_delayed_ref_node *ref; 1929 int action = BTRFS_ADD_DELAYED_REF; 1930 again: 1931 /* 1932 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 1933 * this prevents ref count from going down to zero when 1934 * there still are pending delayed ref. 1935 */ 1936 node = rb_prev(&head->node.rb_node); 1937 while (1) { 1938 if (!node) 1939 break; 1940 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1941 rb_node); 1942 if (ref->bytenr != head->node.bytenr) 1943 break; 1944 if (ref->action == action) 1945 return ref; 1946 node = rb_prev(node); 1947 } 1948 if (action == BTRFS_ADD_DELAYED_REF) { 1949 action = BTRFS_DROP_DELAYED_REF; 1950 goto again; 1951 } 1952 return NULL; 1953 } 1954 1955 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 1956 struct btrfs_root *root, 1957 struct list_head *cluster) 1958 { 1959 struct btrfs_delayed_ref_root *delayed_refs; 1960 struct btrfs_delayed_ref_node *ref; 1961 struct btrfs_delayed_ref_head *locked_ref = NULL; 1962 struct btrfs_delayed_extent_op *extent_op; 1963 int ret; 1964 int count = 0; 1965 int must_insert_reserved = 0; 1966 1967 delayed_refs = &trans->transaction->delayed_refs; 1968 while (1) { 1969 if (!locked_ref) { 1970 /* pick a new head ref from the cluster list */ 1971 if (list_empty(cluster)) 1972 break; 1973 1974 locked_ref = list_entry(cluster->next, 1975 struct btrfs_delayed_ref_head, cluster); 1976 1977 /* grab the lock that says we are going to process 1978 * all the refs for this head */ 1979 ret = btrfs_delayed_ref_lock(trans, locked_ref); 1980 1981 /* 1982 * we may have dropped the spin lock to get the head 1983 * mutex lock, and that might have given someone else 1984 * time to free the head. If that's true, it has been 1985 * removed from our list and we can move on. 1986 */ 1987 if (ret == -EAGAIN) { 1988 locked_ref = NULL; 1989 count++; 1990 continue; 1991 } 1992 } 1993 1994 /* 1995 * record the must insert reserved flag before we 1996 * drop the spin lock. 1997 */ 1998 must_insert_reserved = locked_ref->must_insert_reserved; 1999 locked_ref->must_insert_reserved = 0; 2000 2001 extent_op = locked_ref->extent_op; 2002 locked_ref->extent_op = NULL; 2003 2004 /* 2005 * locked_ref is the head node, so we have to go one 2006 * node back for any delayed ref updates 2007 */ 2008 ref = select_delayed_ref(locked_ref); 2009 if (!ref) { 2010 /* All delayed refs have been processed, Go ahead 2011 * and send the head node to run_one_delayed_ref, 2012 * so that any accounting fixes can happen 2013 */ 2014 ref = &locked_ref->node; 2015 2016 if (extent_op && must_insert_reserved) { 2017 kfree(extent_op); 2018 extent_op = NULL; 2019 } 2020 2021 if (extent_op) { 2022 spin_unlock(&delayed_refs->lock); 2023 2024 ret = run_delayed_extent_op(trans, root, 2025 ref, extent_op); 2026 BUG_ON(ret); 2027 kfree(extent_op); 2028 2029 cond_resched(); 2030 spin_lock(&delayed_refs->lock); 2031 continue; 2032 } 2033 2034 list_del_init(&locked_ref->cluster); 2035 locked_ref = NULL; 2036 } 2037 2038 ref->in_tree = 0; 2039 rb_erase(&ref->rb_node, &delayed_refs->root); 2040 delayed_refs->num_entries--; 2041 2042 spin_unlock(&delayed_refs->lock); 2043 2044 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2045 must_insert_reserved); 2046 BUG_ON(ret); 2047 2048 btrfs_put_delayed_ref(ref); 2049 kfree(extent_op); 2050 count++; 2051 2052 cond_resched(); 2053 spin_lock(&delayed_refs->lock); 2054 } 2055 return count; 2056 } 2057 2058 /* 2059 * this starts processing the delayed reference count updates and 2060 * extent insertions we have queued up so far. count can be 2061 * 0, which means to process everything in the tree at the start 2062 * of the run (but not newly added entries), or it can be some target 2063 * number you'd like to process. 2064 */ 2065 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2066 struct btrfs_root *root, unsigned long count) 2067 { 2068 struct rb_node *node; 2069 struct btrfs_delayed_ref_root *delayed_refs; 2070 struct btrfs_delayed_ref_node *ref; 2071 struct list_head cluster; 2072 int ret; 2073 int run_all = count == (unsigned long)-1; 2074 int run_most = 0; 2075 2076 if (root == root->fs_info->extent_root) 2077 root = root->fs_info->tree_root; 2078 2079 delayed_refs = &trans->transaction->delayed_refs; 2080 INIT_LIST_HEAD(&cluster); 2081 again: 2082 spin_lock(&delayed_refs->lock); 2083 if (count == 0) { 2084 count = delayed_refs->num_entries * 2; 2085 run_most = 1; 2086 } 2087 while (1) { 2088 if (!(run_all || run_most) && 2089 delayed_refs->num_heads_ready < 64) 2090 break; 2091 2092 /* 2093 * go find something we can process in the rbtree. We start at 2094 * the beginning of the tree, and then build a cluster 2095 * of refs to process starting at the first one we are able to 2096 * lock 2097 */ 2098 ret = btrfs_find_ref_cluster(trans, &cluster, 2099 delayed_refs->run_delayed_start); 2100 if (ret) 2101 break; 2102 2103 ret = run_clustered_refs(trans, root, &cluster); 2104 BUG_ON(ret < 0); 2105 2106 count -= min_t(unsigned long, ret, count); 2107 2108 if (count == 0) 2109 break; 2110 } 2111 2112 if (run_all) { 2113 node = rb_first(&delayed_refs->root); 2114 if (!node) 2115 goto out; 2116 count = (unsigned long)-1; 2117 2118 while (node) { 2119 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2120 rb_node); 2121 if (btrfs_delayed_ref_is_head(ref)) { 2122 struct btrfs_delayed_ref_head *head; 2123 2124 head = btrfs_delayed_node_to_head(ref); 2125 atomic_inc(&ref->refs); 2126 2127 spin_unlock(&delayed_refs->lock); 2128 mutex_lock(&head->mutex); 2129 mutex_unlock(&head->mutex); 2130 2131 btrfs_put_delayed_ref(ref); 2132 cond_resched(); 2133 goto again; 2134 } 2135 node = rb_next(node); 2136 } 2137 spin_unlock(&delayed_refs->lock); 2138 schedule_timeout(1); 2139 goto again; 2140 } 2141 out: 2142 spin_unlock(&delayed_refs->lock); 2143 return 0; 2144 } 2145 2146 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2147 struct btrfs_root *root, 2148 u64 bytenr, u64 num_bytes, u64 flags, 2149 int is_data) 2150 { 2151 struct btrfs_delayed_extent_op *extent_op; 2152 int ret; 2153 2154 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2155 if (!extent_op) 2156 return -ENOMEM; 2157 2158 extent_op->flags_to_set = flags; 2159 extent_op->update_flags = 1; 2160 extent_op->update_key = 0; 2161 extent_op->is_data = is_data ? 1 : 0; 2162 2163 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); 2164 if (ret) 2165 kfree(extent_op); 2166 return ret; 2167 } 2168 2169 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, 2170 struct btrfs_root *root, 2171 struct btrfs_path *path, 2172 u64 objectid, u64 offset, u64 bytenr) 2173 { 2174 struct btrfs_delayed_ref_head *head; 2175 struct btrfs_delayed_ref_node *ref; 2176 struct btrfs_delayed_data_ref *data_ref; 2177 struct btrfs_delayed_ref_root *delayed_refs; 2178 struct rb_node *node; 2179 int ret = 0; 2180 2181 ret = -ENOENT; 2182 delayed_refs = &trans->transaction->delayed_refs; 2183 spin_lock(&delayed_refs->lock); 2184 head = btrfs_find_delayed_ref_head(trans, bytenr); 2185 if (!head) 2186 goto out; 2187 2188 if (!mutex_trylock(&head->mutex)) { 2189 atomic_inc(&head->node.refs); 2190 spin_unlock(&delayed_refs->lock); 2191 2192 btrfs_release_path(root->fs_info->extent_root, path); 2193 2194 mutex_lock(&head->mutex); 2195 mutex_unlock(&head->mutex); 2196 btrfs_put_delayed_ref(&head->node); 2197 return -EAGAIN; 2198 } 2199 2200 node = rb_prev(&head->node.rb_node); 2201 if (!node) 2202 goto out_unlock; 2203 2204 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2205 2206 if (ref->bytenr != bytenr) 2207 goto out_unlock; 2208 2209 ret = 1; 2210 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2211 goto out_unlock; 2212 2213 data_ref = btrfs_delayed_node_to_data_ref(ref); 2214 2215 node = rb_prev(node); 2216 if (node) { 2217 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2218 if (ref->bytenr == bytenr) 2219 goto out_unlock; 2220 } 2221 2222 if (data_ref->root != root->root_key.objectid || 2223 data_ref->objectid != objectid || data_ref->offset != offset) 2224 goto out_unlock; 2225 2226 ret = 0; 2227 out_unlock: 2228 mutex_unlock(&head->mutex); 2229 out: 2230 spin_unlock(&delayed_refs->lock); 2231 return ret; 2232 } 2233 2234 static noinline int check_committed_ref(struct btrfs_trans_handle *trans, 2235 struct btrfs_root *root, 2236 struct btrfs_path *path, 2237 u64 objectid, u64 offset, u64 bytenr) 2238 { 2239 struct btrfs_root *extent_root = root->fs_info->extent_root; 2240 struct extent_buffer *leaf; 2241 struct btrfs_extent_data_ref *ref; 2242 struct btrfs_extent_inline_ref *iref; 2243 struct btrfs_extent_item *ei; 2244 struct btrfs_key key; 2245 u32 item_size; 2246 int ret; 2247 2248 key.objectid = bytenr; 2249 key.offset = (u64)-1; 2250 key.type = BTRFS_EXTENT_ITEM_KEY; 2251 2252 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2253 if (ret < 0) 2254 goto out; 2255 BUG_ON(ret == 0); 2256 2257 ret = -ENOENT; 2258 if (path->slots[0] == 0) 2259 goto out; 2260 2261 path->slots[0]--; 2262 leaf = path->nodes[0]; 2263 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2264 2265 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2266 goto out; 2267 2268 ret = 1; 2269 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2270 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2271 if (item_size < sizeof(*ei)) { 2272 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2273 goto out; 2274 } 2275 #endif 2276 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2277 2278 if (item_size != sizeof(*ei) + 2279 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2280 goto out; 2281 2282 if (btrfs_extent_generation(leaf, ei) <= 2283 btrfs_root_last_snapshot(&root->root_item)) 2284 goto out; 2285 2286 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2287 if (btrfs_extent_inline_ref_type(leaf, iref) != 2288 BTRFS_EXTENT_DATA_REF_KEY) 2289 goto out; 2290 2291 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2292 if (btrfs_extent_refs(leaf, ei) != 2293 btrfs_extent_data_ref_count(leaf, ref) || 2294 btrfs_extent_data_ref_root(leaf, ref) != 2295 root->root_key.objectid || 2296 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2297 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2298 goto out; 2299 2300 ret = 0; 2301 out: 2302 return ret; 2303 } 2304 2305 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2306 struct btrfs_root *root, 2307 u64 objectid, u64 offset, u64 bytenr) 2308 { 2309 struct btrfs_path *path; 2310 int ret; 2311 int ret2; 2312 2313 path = btrfs_alloc_path(); 2314 if (!path) 2315 return -ENOENT; 2316 2317 do { 2318 ret = check_committed_ref(trans, root, path, objectid, 2319 offset, bytenr); 2320 if (ret && ret != -ENOENT) 2321 goto out; 2322 2323 ret2 = check_delayed_ref(trans, root, path, objectid, 2324 offset, bytenr); 2325 } while (ret2 == -EAGAIN); 2326 2327 if (ret2 && ret2 != -ENOENT) { 2328 ret = ret2; 2329 goto out; 2330 } 2331 2332 if (ret != -ENOENT || ret2 != -ENOENT) 2333 ret = 0; 2334 out: 2335 btrfs_free_path(path); 2336 return ret; 2337 } 2338 2339 #if 0 2340 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2341 struct extent_buffer *buf, u32 nr_extents) 2342 { 2343 struct btrfs_key key; 2344 struct btrfs_file_extent_item *fi; 2345 u64 root_gen; 2346 u32 nritems; 2347 int i; 2348 int level; 2349 int ret = 0; 2350 int shared = 0; 2351 2352 if (!root->ref_cows) 2353 return 0; 2354 2355 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 2356 shared = 0; 2357 root_gen = root->root_key.offset; 2358 } else { 2359 shared = 1; 2360 root_gen = trans->transid - 1; 2361 } 2362 2363 level = btrfs_header_level(buf); 2364 nritems = btrfs_header_nritems(buf); 2365 2366 if (level == 0) { 2367 struct btrfs_leaf_ref *ref; 2368 struct btrfs_extent_info *info; 2369 2370 ref = btrfs_alloc_leaf_ref(root, nr_extents); 2371 if (!ref) { 2372 ret = -ENOMEM; 2373 goto out; 2374 } 2375 2376 ref->root_gen = root_gen; 2377 ref->bytenr = buf->start; 2378 ref->owner = btrfs_header_owner(buf); 2379 ref->generation = btrfs_header_generation(buf); 2380 ref->nritems = nr_extents; 2381 info = ref->extents; 2382 2383 for (i = 0; nr_extents > 0 && i < nritems; i++) { 2384 u64 disk_bytenr; 2385 btrfs_item_key_to_cpu(buf, &key, i); 2386 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2387 continue; 2388 fi = btrfs_item_ptr(buf, i, 2389 struct btrfs_file_extent_item); 2390 if (btrfs_file_extent_type(buf, fi) == 2391 BTRFS_FILE_EXTENT_INLINE) 2392 continue; 2393 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2394 if (disk_bytenr == 0) 2395 continue; 2396 2397 info->bytenr = disk_bytenr; 2398 info->num_bytes = 2399 btrfs_file_extent_disk_num_bytes(buf, fi); 2400 info->objectid = key.objectid; 2401 info->offset = key.offset; 2402 info++; 2403 } 2404 2405 ret = btrfs_add_leaf_ref(root, ref, shared); 2406 if (ret == -EEXIST && shared) { 2407 struct btrfs_leaf_ref *old; 2408 old = btrfs_lookup_leaf_ref(root, ref->bytenr); 2409 BUG_ON(!old); 2410 btrfs_remove_leaf_ref(root, old); 2411 btrfs_free_leaf_ref(root, old); 2412 ret = btrfs_add_leaf_ref(root, ref, shared); 2413 } 2414 WARN_ON(ret); 2415 btrfs_free_leaf_ref(root, ref); 2416 } 2417 out: 2418 return ret; 2419 } 2420 2421 /* when a block goes through cow, we update the reference counts of 2422 * everything that block points to. The internal pointers of the block 2423 * can be in just about any order, and it is likely to have clusters of 2424 * things that are close together and clusters of things that are not. 2425 * 2426 * To help reduce the seeks that come with updating all of these reference 2427 * counts, sort them by byte number before actual updates are done. 2428 * 2429 * struct refsort is used to match byte number to slot in the btree block. 2430 * we sort based on the byte number and then use the slot to actually 2431 * find the item. 2432 * 2433 * struct refsort is smaller than strcut btrfs_item and smaller than 2434 * struct btrfs_key_ptr. Since we're currently limited to the page size 2435 * for a btree block, there's no way for a kmalloc of refsorts for a 2436 * single node to be bigger than a page. 2437 */ 2438 struct refsort { 2439 u64 bytenr; 2440 u32 slot; 2441 }; 2442 2443 /* 2444 * for passing into sort() 2445 */ 2446 static int refsort_cmp(const void *a_void, const void *b_void) 2447 { 2448 const struct refsort *a = a_void; 2449 const struct refsort *b = b_void; 2450 2451 if (a->bytenr < b->bytenr) 2452 return -1; 2453 if (a->bytenr > b->bytenr) 2454 return 1; 2455 return 0; 2456 } 2457 #endif 2458 2459 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2460 struct btrfs_root *root, 2461 struct extent_buffer *buf, 2462 int full_backref, int inc) 2463 { 2464 u64 bytenr; 2465 u64 num_bytes; 2466 u64 parent; 2467 u64 ref_root; 2468 u32 nritems; 2469 struct btrfs_key key; 2470 struct btrfs_file_extent_item *fi; 2471 int i; 2472 int level; 2473 int ret = 0; 2474 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2475 u64, u64, u64, u64, u64, u64); 2476 2477 ref_root = btrfs_header_owner(buf); 2478 nritems = btrfs_header_nritems(buf); 2479 level = btrfs_header_level(buf); 2480 2481 if (!root->ref_cows && level == 0) 2482 return 0; 2483 2484 if (inc) 2485 process_func = btrfs_inc_extent_ref; 2486 else 2487 process_func = btrfs_free_extent; 2488 2489 if (full_backref) 2490 parent = buf->start; 2491 else 2492 parent = 0; 2493 2494 for (i = 0; i < nritems; i++) { 2495 if (level == 0) { 2496 btrfs_item_key_to_cpu(buf, &key, i); 2497 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2498 continue; 2499 fi = btrfs_item_ptr(buf, i, 2500 struct btrfs_file_extent_item); 2501 if (btrfs_file_extent_type(buf, fi) == 2502 BTRFS_FILE_EXTENT_INLINE) 2503 continue; 2504 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2505 if (bytenr == 0) 2506 continue; 2507 2508 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2509 key.offset -= btrfs_file_extent_offset(buf, fi); 2510 ret = process_func(trans, root, bytenr, num_bytes, 2511 parent, ref_root, key.objectid, 2512 key.offset); 2513 if (ret) 2514 goto fail; 2515 } else { 2516 bytenr = btrfs_node_blockptr(buf, i); 2517 num_bytes = btrfs_level_size(root, level - 1); 2518 ret = process_func(trans, root, bytenr, num_bytes, 2519 parent, ref_root, level - 1, 0); 2520 if (ret) 2521 goto fail; 2522 } 2523 } 2524 return 0; 2525 fail: 2526 BUG(); 2527 return ret; 2528 } 2529 2530 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2531 struct extent_buffer *buf, int full_backref) 2532 { 2533 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2534 } 2535 2536 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2537 struct extent_buffer *buf, int full_backref) 2538 { 2539 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2540 } 2541 2542 static int write_one_cache_group(struct btrfs_trans_handle *trans, 2543 struct btrfs_root *root, 2544 struct btrfs_path *path, 2545 struct btrfs_block_group_cache *cache) 2546 { 2547 int ret; 2548 struct btrfs_root *extent_root = root->fs_info->extent_root; 2549 unsigned long bi; 2550 struct extent_buffer *leaf; 2551 2552 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2553 if (ret < 0) 2554 goto fail; 2555 BUG_ON(ret); 2556 2557 leaf = path->nodes[0]; 2558 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2559 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2560 btrfs_mark_buffer_dirty(leaf); 2561 btrfs_release_path(extent_root, path); 2562 fail: 2563 if (ret) 2564 return ret; 2565 return 0; 2566 2567 } 2568 2569 static struct btrfs_block_group_cache * 2570 next_block_group(struct btrfs_root *root, 2571 struct btrfs_block_group_cache *cache) 2572 { 2573 struct rb_node *node; 2574 spin_lock(&root->fs_info->block_group_cache_lock); 2575 node = rb_next(&cache->cache_node); 2576 btrfs_put_block_group(cache); 2577 if (node) { 2578 cache = rb_entry(node, struct btrfs_block_group_cache, 2579 cache_node); 2580 atomic_inc(&cache->count); 2581 } else 2582 cache = NULL; 2583 spin_unlock(&root->fs_info->block_group_cache_lock); 2584 return cache; 2585 } 2586 2587 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2588 struct btrfs_root *root) 2589 { 2590 struct btrfs_block_group_cache *cache; 2591 int err = 0; 2592 struct btrfs_path *path; 2593 u64 last = 0; 2594 2595 path = btrfs_alloc_path(); 2596 if (!path) 2597 return -ENOMEM; 2598 2599 while (1) { 2600 if (last == 0) { 2601 err = btrfs_run_delayed_refs(trans, root, 2602 (unsigned long)-1); 2603 BUG_ON(err); 2604 } 2605 2606 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2607 while (cache) { 2608 if (cache->dirty) 2609 break; 2610 cache = next_block_group(root, cache); 2611 } 2612 if (!cache) { 2613 if (last == 0) 2614 break; 2615 last = 0; 2616 continue; 2617 } 2618 2619 cache->dirty = 0; 2620 last = cache->key.objectid + cache->key.offset; 2621 2622 err = write_one_cache_group(trans, root, path, cache); 2623 BUG_ON(err); 2624 btrfs_put_block_group(cache); 2625 } 2626 2627 btrfs_free_path(path); 2628 return 0; 2629 } 2630 2631 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2632 { 2633 struct btrfs_block_group_cache *block_group; 2634 int readonly = 0; 2635 2636 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 2637 if (!block_group || block_group->ro) 2638 readonly = 1; 2639 if (block_group) 2640 btrfs_put_block_group(block_group); 2641 return readonly; 2642 } 2643 2644 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 2645 u64 total_bytes, u64 bytes_used, 2646 struct btrfs_space_info **space_info) 2647 { 2648 struct btrfs_space_info *found; 2649 2650 found = __find_space_info(info, flags); 2651 if (found) { 2652 spin_lock(&found->lock); 2653 found->total_bytes += total_bytes; 2654 found->bytes_used += bytes_used; 2655 found->full = 0; 2656 spin_unlock(&found->lock); 2657 *space_info = found; 2658 return 0; 2659 } 2660 found = kzalloc(sizeof(*found), GFP_NOFS); 2661 if (!found) 2662 return -ENOMEM; 2663 2664 INIT_LIST_HEAD(&found->block_groups); 2665 init_rwsem(&found->groups_sem); 2666 spin_lock_init(&found->lock); 2667 found->flags = flags; 2668 found->total_bytes = total_bytes; 2669 found->bytes_used = bytes_used; 2670 found->bytes_pinned = 0; 2671 found->bytes_reserved = 0; 2672 found->bytes_readonly = 0; 2673 found->bytes_delalloc = 0; 2674 found->full = 0; 2675 found->force_alloc = 0; 2676 *space_info = found; 2677 list_add_rcu(&found->list, &info->space_info); 2678 atomic_set(&found->caching_threads, 0); 2679 return 0; 2680 } 2681 2682 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 2683 { 2684 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 2685 BTRFS_BLOCK_GROUP_RAID1 | 2686 BTRFS_BLOCK_GROUP_RAID10 | 2687 BTRFS_BLOCK_GROUP_DUP); 2688 if (extra_flags) { 2689 if (flags & BTRFS_BLOCK_GROUP_DATA) 2690 fs_info->avail_data_alloc_bits |= extra_flags; 2691 if (flags & BTRFS_BLOCK_GROUP_METADATA) 2692 fs_info->avail_metadata_alloc_bits |= extra_flags; 2693 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2694 fs_info->avail_system_alloc_bits |= extra_flags; 2695 } 2696 } 2697 2698 static void set_block_group_readonly(struct btrfs_block_group_cache *cache) 2699 { 2700 spin_lock(&cache->space_info->lock); 2701 spin_lock(&cache->lock); 2702 if (!cache->ro) { 2703 cache->space_info->bytes_readonly += cache->key.offset - 2704 btrfs_block_group_used(&cache->item); 2705 cache->ro = 1; 2706 } 2707 spin_unlock(&cache->lock); 2708 spin_unlock(&cache->space_info->lock); 2709 } 2710 2711 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2712 { 2713 u64 num_devices = root->fs_info->fs_devices->rw_devices; 2714 2715 if (num_devices == 1) 2716 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2717 if (num_devices < 4) 2718 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 2719 2720 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 2721 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2722 BTRFS_BLOCK_GROUP_RAID10))) { 2723 flags &= ~BTRFS_BLOCK_GROUP_DUP; 2724 } 2725 2726 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 2727 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 2728 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 2729 } 2730 2731 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 2732 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 2733 (flags & BTRFS_BLOCK_GROUP_RAID10) | 2734 (flags & BTRFS_BLOCK_GROUP_DUP))) 2735 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 2736 return flags; 2737 } 2738 2739 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) 2740 { 2741 struct btrfs_fs_info *info = root->fs_info; 2742 u64 alloc_profile; 2743 2744 if (data) { 2745 alloc_profile = info->avail_data_alloc_bits & 2746 info->data_alloc_profile; 2747 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 2748 } else if (root == root->fs_info->chunk_root) { 2749 alloc_profile = info->avail_system_alloc_bits & 2750 info->system_alloc_profile; 2751 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; 2752 } else { 2753 alloc_profile = info->avail_metadata_alloc_bits & 2754 info->metadata_alloc_profile; 2755 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 2756 } 2757 2758 return btrfs_reduce_alloc_profile(root, data); 2759 } 2760 2761 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 2762 { 2763 u64 alloc_target; 2764 2765 alloc_target = btrfs_get_alloc_profile(root, 1); 2766 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 2767 alloc_target); 2768 } 2769 2770 static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) 2771 { 2772 u64 num_bytes; 2773 int level; 2774 2775 level = BTRFS_MAX_LEVEL - 2; 2776 /* 2777 * NOTE: these calculations are absolutely the worst possible case. 2778 * This assumes that _every_ item we insert will require a new leaf, and 2779 * that the tree has grown to its maximum level size. 2780 */ 2781 2782 /* 2783 * for every item we insert we could insert both an extent item and a 2784 * extent ref item. Then for ever item we insert, we will need to cow 2785 * both the original leaf, plus the leaf to the left and right of it. 2786 * 2787 * Unless we are talking about the extent root, then we just want the 2788 * number of items * 2, since we just need the extent item plus its ref. 2789 */ 2790 if (root == root->fs_info->extent_root) 2791 num_bytes = num_items * 2; 2792 else 2793 num_bytes = (num_items + (2 * num_items)) * 3; 2794 2795 /* 2796 * num_bytes is total number of leaves we could need times the leaf 2797 * size, and then for every leaf we could end up cow'ing 2 nodes per 2798 * level, down to the leaf level. 2799 */ 2800 num_bytes = (num_bytes * root->leafsize) + 2801 (num_bytes * (level * 2)) * root->nodesize; 2802 2803 return num_bytes; 2804 } 2805 2806 /* 2807 * Unreserve metadata space for delalloc. If we have less reserved credits than 2808 * we have extents, this function does nothing. 2809 */ 2810 int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, 2811 struct inode *inode, int num_items) 2812 { 2813 struct btrfs_fs_info *info = root->fs_info; 2814 struct btrfs_space_info *meta_sinfo; 2815 u64 num_bytes; 2816 u64 alloc_target; 2817 bool bug = false; 2818 2819 /* get the space info for where the metadata will live */ 2820 alloc_target = btrfs_get_alloc_profile(root, 0); 2821 meta_sinfo = __find_space_info(info, alloc_target); 2822 2823 num_bytes = calculate_bytes_needed(root->fs_info->extent_root, 2824 num_items); 2825 2826 spin_lock(&meta_sinfo->lock); 2827 if (BTRFS_I(inode)->delalloc_reserved_extents <= 2828 BTRFS_I(inode)->delalloc_extents) { 2829 spin_unlock(&meta_sinfo->lock); 2830 return 0; 2831 } 2832 2833 BTRFS_I(inode)->delalloc_reserved_extents--; 2834 BUG_ON(BTRFS_I(inode)->delalloc_reserved_extents < 0); 2835 2836 if (meta_sinfo->bytes_delalloc < num_bytes) { 2837 bug = true; 2838 meta_sinfo->bytes_delalloc = 0; 2839 } else { 2840 meta_sinfo->bytes_delalloc -= num_bytes; 2841 } 2842 spin_unlock(&meta_sinfo->lock); 2843 2844 BUG_ON(bug); 2845 2846 return 0; 2847 } 2848 2849 static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) 2850 { 2851 u64 thresh; 2852 2853 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2854 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 2855 meta_sinfo->bytes_super + meta_sinfo->bytes_root + 2856 meta_sinfo->bytes_may_use; 2857 2858 thresh = meta_sinfo->total_bytes - thresh; 2859 thresh *= 80; 2860 do_div(thresh, 100); 2861 if (thresh <= meta_sinfo->bytes_delalloc) 2862 meta_sinfo->force_delalloc = 1; 2863 else 2864 meta_sinfo->force_delalloc = 0; 2865 } 2866 2867 static int maybe_allocate_chunk(struct btrfs_root *root, 2868 struct btrfs_space_info *info) 2869 { 2870 struct btrfs_super_block *disk_super = &root->fs_info->super_copy; 2871 struct btrfs_trans_handle *trans; 2872 bool wait = false; 2873 int ret = 0; 2874 u64 min_metadata; 2875 u64 free_space; 2876 2877 free_space = btrfs_super_total_bytes(disk_super); 2878 /* 2879 * we allow the metadata to grow to a max of either 5gb or 5% of the 2880 * space in the volume. 2881 */ 2882 min_metadata = min((u64)5 * 1024 * 1024 * 1024, 2883 div64_u64(free_space * 5, 100)); 2884 if (info->total_bytes >= min_metadata) { 2885 spin_unlock(&info->lock); 2886 return 0; 2887 } 2888 2889 if (info->full) { 2890 spin_unlock(&info->lock); 2891 return 0; 2892 } 2893 2894 if (!info->allocating_chunk) { 2895 info->force_alloc = 1; 2896 info->allocating_chunk = 1; 2897 init_waitqueue_head(&info->wait); 2898 } else { 2899 wait = true; 2900 } 2901 2902 spin_unlock(&info->lock); 2903 2904 if (wait) { 2905 wait_event(info->wait, 2906 !info->allocating_chunk); 2907 return 1; 2908 } 2909 2910 trans = btrfs_start_transaction(root, 1); 2911 if (!trans) { 2912 ret = -ENOMEM; 2913 goto out; 2914 } 2915 2916 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2917 4096 + 2 * 1024 * 1024, 2918 info->flags, 0); 2919 btrfs_end_transaction(trans, root); 2920 if (ret) 2921 goto out; 2922 out: 2923 spin_lock(&info->lock); 2924 info->allocating_chunk = 0; 2925 spin_unlock(&info->lock); 2926 wake_up(&info->wait); 2927 2928 if (ret) 2929 return 0; 2930 return 1; 2931 } 2932 2933 /* 2934 * Reserve metadata space for delalloc. 2935 */ 2936 int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, 2937 struct inode *inode, int num_items) 2938 { 2939 struct btrfs_fs_info *info = root->fs_info; 2940 struct btrfs_space_info *meta_sinfo; 2941 u64 num_bytes; 2942 u64 used; 2943 u64 alloc_target; 2944 int flushed = 0; 2945 int force_delalloc; 2946 2947 /* get the space info for where the metadata will live */ 2948 alloc_target = btrfs_get_alloc_profile(root, 0); 2949 meta_sinfo = __find_space_info(info, alloc_target); 2950 2951 num_bytes = calculate_bytes_needed(root->fs_info->extent_root, 2952 num_items); 2953 again: 2954 spin_lock(&meta_sinfo->lock); 2955 2956 force_delalloc = meta_sinfo->force_delalloc; 2957 2958 if (unlikely(!meta_sinfo->bytes_root)) 2959 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); 2960 2961 if (!flushed) 2962 meta_sinfo->bytes_delalloc += num_bytes; 2963 2964 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2965 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 2966 meta_sinfo->bytes_super + meta_sinfo->bytes_root + 2967 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; 2968 2969 if (used > meta_sinfo->total_bytes) { 2970 flushed++; 2971 2972 if (flushed == 1) { 2973 if (maybe_allocate_chunk(root, meta_sinfo)) 2974 goto again; 2975 flushed++; 2976 } else { 2977 spin_unlock(&meta_sinfo->lock); 2978 } 2979 2980 if (flushed == 2) { 2981 filemap_flush(inode->i_mapping); 2982 goto again; 2983 } else if (flushed == 3) { 2984 btrfs_start_delalloc_inodes(root); 2985 btrfs_wait_ordered_extents(root, 0); 2986 goto again; 2987 } 2988 spin_lock(&meta_sinfo->lock); 2989 meta_sinfo->bytes_delalloc -= num_bytes; 2990 spin_unlock(&meta_sinfo->lock); 2991 printk(KERN_ERR "enospc, has %d, reserved %d\n", 2992 BTRFS_I(inode)->delalloc_extents, 2993 BTRFS_I(inode)->delalloc_reserved_extents); 2994 dump_space_info(meta_sinfo, 0, 0); 2995 return -ENOSPC; 2996 } 2997 2998 BTRFS_I(inode)->delalloc_reserved_extents++; 2999 check_force_delalloc(meta_sinfo); 3000 spin_unlock(&meta_sinfo->lock); 3001 3002 if (!flushed && force_delalloc) 3003 filemap_flush(inode->i_mapping); 3004 3005 return 0; 3006 } 3007 3008 /* 3009 * unreserve num_items number of items worth of metadata space. This needs to 3010 * be paired with btrfs_reserve_metadata_space. 3011 * 3012 * NOTE: if you have the option, run this _AFTER_ you do a 3013 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref 3014 * oprations which will result in more used metadata, so we want to make sure we 3015 * can do that without issue. 3016 */ 3017 int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) 3018 { 3019 struct btrfs_fs_info *info = root->fs_info; 3020 struct btrfs_space_info *meta_sinfo; 3021 u64 num_bytes; 3022 u64 alloc_target; 3023 bool bug = false; 3024 3025 /* get the space info for where the metadata will live */ 3026 alloc_target = btrfs_get_alloc_profile(root, 0); 3027 meta_sinfo = __find_space_info(info, alloc_target); 3028 3029 num_bytes = calculate_bytes_needed(root, num_items); 3030 3031 spin_lock(&meta_sinfo->lock); 3032 if (meta_sinfo->bytes_may_use < num_bytes) { 3033 bug = true; 3034 meta_sinfo->bytes_may_use = 0; 3035 } else { 3036 meta_sinfo->bytes_may_use -= num_bytes; 3037 } 3038 spin_unlock(&meta_sinfo->lock); 3039 3040 BUG_ON(bug); 3041 3042 return 0; 3043 } 3044 3045 /* 3046 * Reserve some metadata space for use. We'll calculate the worste case number 3047 * of bytes that would be needed to modify num_items number of items. If we 3048 * have space, fantastic, if not, you get -ENOSPC. Please call 3049 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of 3050 * items you reserved, since whatever metadata you needed should have already 3051 * been allocated. 3052 * 3053 * This will commit the transaction to make more space if we don't have enough 3054 * metadata space. THe only time we don't do this is if we're reserving space 3055 * inside of a transaction, then we will just return -ENOSPC and it is the 3056 * callers responsibility to handle it properly. 3057 */ 3058 int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) 3059 { 3060 struct btrfs_fs_info *info = root->fs_info; 3061 struct btrfs_space_info *meta_sinfo; 3062 u64 num_bytes; 3063 u64 used; 3064 u64 alloc_target; 3065 int retries = 0; 3066 3067 /* get the space info for where the metadata will live */ 3068 alloc_target = btrfs_get_alloc_profile(root, 0); 3069 meta_sinfo = __find_space_info(info, alloc_target); 3070 3071 num_bytes = calculate_bytes_needed(root, num_items); 3072 again: 3073 spin_lock(&meta_sinfo->lock); 3074 3075 if (unlikely(!meta_sinfo->bytes_root)) 3076 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); 3077 3078 if (!retries) 3079 meta_sinfo->bytes_may_use += num_bytes; 3080 3081 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 3082 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 3083 meta_sinfo->bytes_super + meta_sinfo->bytes_root + 3084 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; 3085 3086 if (used > meta_sinfo->total_bytes) { 3087 retries++; 3088 if (retries == 1) { 3089 if (maybe_allocate_chunk(root, meta_sinfo)) 3090 goto again; 3091 retries++; 3092 } else { 3093 spin_unlock(&meta_sinfo->lock); 3094 } 3095 3096 if (retries == 2) { 3097 btrfs_start_delalloc_inodes(root); 3098 btrfs_wait_ordered_extents(root, 0); 3099 goto again; 3100 } 3101 spin_lock(&meta_sinfo->lock); 3102 meta_sinfo->bytes_may_use -= num_bytes; 3103 spin_unlock(&meta_sinfo->lock); 3104 3105 dump_space_info(meta_sinfo, 0, 0); 3106 return -ENOSPC; 3107 } 3108 3109 check_force_delalloc(meta_sinfo); 3110 spin_unlock(&meta_sinfo->lock); 3111 3112 return 0; 3113 } 3114 3115 /* 3116 * This will check the space that the inode allocates from to make sure we have 3117 * enough space for bytes. 3118 */ 3119 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 3120 u64 bytes) 3121 { 3122 struct btrfs_space_info *data_sinfo; 3123 int ret = 0, committed = 0; 3124 3125 /* make sure bytes are sectorsize aligned */ 3126 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3127 3128 data_sinfo = BTRFS_I(inode)->space_info; 3129 if (!data_sinfo) 3130 goto alloc; 3131 3132 again: 3133 /* make sure we have enough space to handle the data first */ 3134 spin_lock(&data_sinfo->lock); 3135 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 3136 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 3137 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 3138 data_sinfo->bytes_may_use - data_sinfo->bytes_super < bytes) { 3139 struct btrfs_trans_handle *trans; 3140 3141 /* 3142 * if we don't have enough free bytes in this space then we need 3143 * to alloc a new chunk. 3144 */ 3145 if (!data_sinfo->full) { 3146 u64 alloc_target; 3147 3148 data_sinfo->force_alloc = 1; 3149 spin_unlock(&data_sinfo->lock); 3150 alloc: 3151 alloc_target = btrfs_get_alloc_profile(root, 1); 3152 trans = btrfs_start_transaction(root, 1); 3153 if (!trans) 3154 return -ENOMEM; 3155 3156 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3157 bytes + 2 * 1024 * 1024, 3158 alloc_target, 0); 3159 btrfs_end_transaction(trans, root); 3160 if (ret) 3161 return ret; 3162 3163 if (!data_sinfo) { 3164 btrfs_set_inode_space_info(root, inode); 3165 data_sinfo = BTRFS_I(inode)->space_info; 3166 } 3167 goto again; 3168 } 3169 spin_unlock(&data_sinfo->lock); 3170 3171 /* commit the current transaction and try again */ 3172 if (!committed && !root->fs_info->open_ioctl_trans) { 3173 committed = 1; 3174 trans = btrfs_join_transaction(root, 1); 3175 if (!trans) 3176 return -ENOMEM; 3177 ret = btrfs_commit_transaction(trans, root); 3178 if (ret) 3179 return ret; 3180 goto again; 3181 } 3182 3183 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 3184 ", %llu bytes_used, %llu bytes_reserved, " 3185 "%llu bytes_pinned, %llu bytes_readonly, %llu may use " 3186 "%llu total\n", (unsigned long long)bytes, 3187 (unsigned long long)data_sinfo->bytes_delalloc, 3188 (unsigned long long)data_sinfo->bytes_used, 3189 (unsigned long long)data_sinfo->bytes_reserved, 3190 (unsigned long long)data_sinfo->bytes_pinned, 3191 (unsigned long long)data_sinfo->bytes_readonly, 3192 (unsigned long long)data_sinfo->bytes_may_use, 3193 (unsigned long long)data_sinfo->total_bytes); 3194 return -ENOSPC; 3195 } 3196 data_sinfo->bytes_may_use += bytes; 3197 BTRFS_I(inode)->reserved_bytes += bytes; 3198 spin_unlock(&data_sinfo->lock); 3199 3200 return 0; 3201 } 3202 3203 /* 3204 * if there was an error for whatever reason after calling 3205 * btrfs_check_data_free_space, call this so we can cleanup the counters. 3206 */ 3207 void btrfs_free_reserved_data_space(struct btrfs_root *root, 3208 struct inode *inode, u64 bytes) 3209 { 3210 struct btrfs_space_info *data_sinfo; 3211 3212 /* make sure bytes are sectorsize aligned */ 3213 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3214 3215 data_sinfo = BTRFS_I(inode)->space_info; 3216 spin_lock(&data_sinfo->lock); 3217 data_sinfo->bytes_may_use -= bytes; 3218 BTRFS_I(inode)->reserved_bytes -= bytes; 3219 spin_unlock(&data_sinfo->lock); 3220 } 3221 3222 /* called when we are adding a delalloc extent to the inode's io_tree */ 3223 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, 3224 u64 bytes) 3225 { 3226 struct btrfs_space_info *data_sinfo; 3227 3228 /* get the space info for where this inode will be storing its data */ 3229 data_sinfo = BTRFS_I(inode)->space_info; 3230 3231 /* make sure we have enough space to handle the data first */ 3232 spin_lock(&data_sinfo->lock); 3233 data_sinfo->bytes_delalloc += bytes; 3234 3235 /* 3236 * we are adding a delalloc extent without calling 3237 * btrfs_check_data_free_space first. This happens on a weird 3238 * writepage condition, but shouldn't hurt our accounting 3239 */ 3240 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { 3241 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; 3242 BTRFS_I(inode)->reserved_bytes = 0; 3243 } else { 3244 data_sinfo->bytes_may_use -= bytes; 3245 BTRFS_I(inode)->reserved_bytes -= bytes; 3246 } 3247 3248 spin_unlock(&data_sinfo->lock); 3249 } 3250 3251 /* called when we are clearing an delalloc extent from the inode's io_tree */ 3252 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 3253 u64 bytes) 3254 { 3255 struct btrfs_space_info *info; 3256 3257 info = BTRFS_I(inode)->space_info; 3258 3259 spin_lock(&info->lock); 3260 info->bytes_delalloc -= bytes; 3261 spin_unlock(&info->lock); 3262 } 3263 3264 static void force_metadata_allocation(struct btrfs_fs_info *info) 3265 { 3266 struct list_head *head = &info->space_info; 3267 struct btrfs_space_info *found; 3268 3269 rcu_read_lock(); 3270 list_for_each_entry_rcu(found, head, list) { 3271 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3272 found->force_alloc = 1; 3273 } 3274 rcu_read_unlock(); 3275 } 3276 3277 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 3278 struct btrfs_root *extent_root, u64 alloc_bytes, 3279 u64 flags, int force) 3280 { 3281 struct btrfs_space_info *space_info; 3282 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3283 u64 thresh; 3284 int ret = 0; 3285 3286 mutex_lock(&fs_info->chunk_mutex); 3287 3288 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3289 3290 space_info = __find_space_info(extent_root->fs_info, flags); 3291 if (!space_info) { 3292 ret = update_space_info(extent_root->fs_info, flags, 3293 0, 0, &space_info); 3294 BUG_ON(ret); 3295 } 3296 BUG_ON(!space_info); 3297 3298 spin_lock(&space_info->lock); 3299 if (space_info->force_alloc) 3300 force = 1; 3301 if (space_info->full) { 3302 spin_unlock(&space_info->lock); 3303 goto out; 3304 } 3305 3306 thresh = space_info->total_bytes - space_info->bytes_readonly; 3307 thresh = div_factor(thresh, 8); 3308 if (!force && 3309 (space_info->bytes_used + space_info->bytes_pinned + 3310 space_info->bytes_reserved + alloc_bytes) < thresh) { 3311 spin_unlock(&space_info->lock); 3312 goto out; 3313 } 3314 spin_unlock(&space_info->lock); 3315 3316 /* 3317 * if we're doing a data chunk, go ahead and make sure that 3318 * we keep a reasonable number of metadata chunks allocated in the 3319 * FS as well. 3320 */ 3321 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { 3322 fs_info->data_chunk_allocations++; 3323 if (!(fs_info->data_chunk_allocations % 3324 fs_info->metadata_ratio)) 3325 force_metadata_allocation(fs_info); 3326 } 3327 3328 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3329 spin_lock(&space_info->lock); 3330 if (ret) 3331 space_info->full = 1; 3332 space_info->force_alloc = 0; 3333 spin_unlock(&space_info->lock); 3334 out: 3335 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3336 return ret; 3337 } 3338 3339 static int update_block_group(struct btrfs_trans_handle *trans, 3340 struct btrfs_root *root, 3341 u64 bytenr, u64 num_bytes, int alloc, 3342 int mark_free) 3343 { 3344 struct btrfs_block_group_cache *cache; 3345 struct btrfs_fs_info *info = root->fs_info; 3346 u64 total = num_bytes; 3347 u64 old_val; 3348 u64 byte_in_group; 3349 3350 /* block accounting for super block */ 3351 spin_lock(&info->delalloc_lock); 3352 old_val = btrfs_super_bytes_used(&info->super_copy); 3353 if (alloc) 3354 old_val += num_bytes; 3355 else 3356 old_val -= num_bytes; 3357 btrfs_set_super_bytes_used(&info->super_copy, old_val); 3358 3359 /* block accounting for root item */ 3360 old_val = btrfs_root_used(&root->root_item); 3361 if (alloc) 3362 old_val += num_bytes; 3363 else 3364 old_val -= num_bytes; 3365 btrfs_set_root_used(&root->root_item, old_val); 3366 spin_unlock(&info->delalloc_lock); 3367 3368 while (total) { 3369 cache = btrfs_lookup_block_group(info, bytenr); 3370 if (!cache) 3371 return -1; 3372 byte_in_group = bytenr - cache->key.objectid; 3373 WARN_ON(byte_in_group > cache->key.offset); 3374 3375 spin_lock(&cache->space_info->lock); 3376 spin_lock(&cache->lock); 3377 cache->dirty = 1; 3378 old_val = btrfs_block_group_used(&cache->item); 3379 num_bytes = min(total, cache->key.offset - byte_in_group); 3380 if (alloc) { 3381 old_val += num_bytes; 3382 btrfs_set_block_group_used(&cache->item, old_val); 3383 cache->reserved -= num_bytes; 3384 cache->space_info->bytes_used += num_bytes; 3385 cache->space_info->bytes_reserved -= num_bytes; 3386 if (cache->ro) 3387 cache->space_info->bytes_readonly -= num_bytes; 3388 spin_unlock(&cache->lock); 3389 spin_unlock(&cache->space_info->lock); 3390 } else { 3391 old_val -= num_bytes; 3392 cache->space_info->bytes_used -= num_bytes; 3393 if (cache->ro) 3394 cache->space_info->bytes_readonly += num_bytes; 3395 btrfs_set_block_group_used(&cache->item, old_val); 3396 spin_unlock(&cache->lock); 3397 spin_unlock(&cache->space_info->lock); 3398 if (mark_free) { 3399 int ret; 3400 3401 ret = btrfs_discard_extent(root, bytenr, 3402 num_bytes); 3403 WARN_ON(ret); 3404 3405 ret = btrfs_add_free_space(cache, bytenr, 3406 num_bytes); 3407 WARN_ON(ret); 3408 } 3409 } 3410 btrfs_put_block_group(cache); 3411 total -= num_bytes; 3412 bytenr += num_bytes; 3413 } 3414 return 0; 3415 } 3416 3417 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 3418 { 3419 struct btrfs_block_group_cache *cache; 3420 u64 bytenr; 3421 3422 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 3423 if (!cache) 3424 return 0; 3425 3426 bytenr = cache->key.objectid; 3427 btrfs_put_block_group(cache); 3428 3429 return bytenr; 3430 } 3431 3432 /* 3433 * this function must be called within transaction 3434 */ 3435 int btrfs_pin_extent(struct btrfs_root *root, 3436 u64 bytenr, u64 num_bytes, int reserved) 3437 { 3438 struct btrfs_fs_info *fs_info = root->fs_info; 3439 struct btrfs_block_group_cache *cache; 3440 3441 cache = btrfs_lookup_block_group(fs_info, bytenr); 3442 BUG_ON(!cache); 3443 3444 spin_lock(&cache->space_info->lock); 3445 spin_lock(&cache->lock); 3446 cache->pinned += num_bytes; 3447 cache->space_info->bytes_pinned += num_bytes; 3448 if (reserved) { 3449 cache->reserved -= num_bytes; 3450 cache->space_info->bytes_reserved -= num_bytes; 3451 } 3452 spin_unlock(&cache->lock); 3453 spin_unlock(&cache->space_info->lock); 3454 3455 btrfs_put_block_group(cache); 3456 3457 set_extent_dirty(fs_info->pinned_extents, 3458 bytenr, bytenr + num_bytes - 1, GFP_NOFS); 3459 return 0; 3460 } 3461 3462 static int update_reserved_extents(struct btrfs_block_group_cache *cache, 3463 u64 num_bytes, int reserve) 3464 { 3465 spin_lock(&cache->space_info->lock); 3466 spin_lock(&cache->lock); 3467 if (reserve) { 3468 cache->reserved += num_bytes; 3469 cache->space_info->bytes_reserved += num_bytes; 3470 } else { 3471 cache->reserved -= num_bytes; 3472 cache->space_info->bytes_reserved -= num_bytes; 3473 } 3474 spin_unlock(&cache->lock); 3475 spin_unlock(&cache->space_info->lock); 3476 return 0; 3477 } 3478 3479 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 3480 struct btrfs_root *root) 3481 { 3482 struct btrfs_fs_info *fs_info = root->fs_info; 3483 struct btrfs_caching_control *next; 3484 struct btrfs_caching_control *caching_ctl; 3485 struct btrfs_block_group_cache *cache; 3486 3487 down_write(&fs_info->extent_commit_sem); 3488 3489 list_for_each_entry_safe(caching_ctl, next, 3490 &fs_info->caching_block_groups, list) { 3491 cache = caching_ctl->block_group; 3492 if (block_group_cache_done(cache)) { 3493 cache->last_byte_to_unpin = (u64)-1; 3494 list_del_init(&caching_ctl->list); 3495 put_caching_control(caching_ctl); 3496 } else { 3497 cache->last_byte_to_unpin = caching_ctl->progress; 3498 } 3499 } 3500 3501 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 3502 fs_info->pinned_extents = &fs_info->freed_extents[1]; 3503 else 3504 fs_info->pinned_extents = &fs_info->freed_extents[0]; 3505 3506 up_write(&fs_info->extent_commit_sem); 3507 return 0; 3508 } 3509 3510 static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 3511 { 3512 struct btrfs_fs_info *fs_info = root->fs_info; 3513 struct btrfs_block_group_cache *cache = NULL; 3514 u64 len; 3515 3516 while (start <= end) { 3517 if (!cache || 3518 start >= cache->key.objectid + cache->key.offset) { 3519 if (cache) 3520 btrfs_put_block_group(cache); 3521 cache = btrfs_lookup_block_group(fs_info, start); 3522 BUG_ON(!cache); 3523 } 3524 3525 len = cache->key.objectid + cache->key.offset - start; 3526 len = min(len, end + 1 - start); 3527 3528 if (start < cache->last_byte_to_unpin) { 3529 len = min(len, cache->last_byte_to_unpin - start); 3530 btrfs_add_free_space(cache, start, len); 3531 } 3532 3533 spin_lock(&cache->space_info->lock); 3534 spin_lock(&cache->lock); 3535 cache->pinned -= len; 3536 cache->space_info->bytes_pinned -= len; 3537 spin_unlock(&cache->lock); 3538 spin_unlock(&cache->space_info->lock); 3539 3540 start += len; 3541 } 3542 3543 if (cache) 3544 btrfs_put_block_group(cache); 3545 return 0; 3546 } 3547 3548 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 3549 struct btrfs_root *root) 3550 { 3551 struct btrfs_fs_info *fs_info = root->fs_info; 3552 struct extent_io_tree *unpin; 3553 u64 start; 3554 u64 end; 3555 int ret; 3556 3557 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 3558 unpin = &fs_info->freed_extents[1]; 3559 else 3560 unpin = &fs_info->freed_extents[0]; 3561 3562 while (1) { 3563 ret = find_first_extent_bit(unpin, 0, &start, &end, 3564 EXTENT_DIRTY); 3565 if (ret) 3566 break; 3567 3568 ret = btrfs_discard_extent(root, start, end + 1 - start); 3569 3570 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3571 unpin_extent_range(root, start, end); 3572 cond_resched(); 3573 } 3574 3575 return ret; 3576 } 3577 3578 static int pin_down_bytes(struct btrfs_trans_handle *trans, 3579 struct btrfs_root *root, 3580 struct btrfs_path *path, 3581 u64 bytenr, u64 num_bytes, 3582 int is_data, int reserved, 3583 struct extent_buffer **must_clean) 3584 { 3585 int err = 0; 3586 struct extent_buffer *buf; 3587 3588 if (is_data) 3589 goto pinit; 3590 3591 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 3592 if (!buf) 3593 goto pinit; 3594 3595 /* we can reuse a block if it hasn't been written 3596 * and it is from this transaction. We can't 3597 * reuse anything from the tree log root because 3598 * it has tiny sub-transactions. 3599 */ 3600 if (btrfs_buffer_uptodate(buf, 0) && 3601 btrfs_try_tree_lock(buf)) { 3602 u64 header_owner = btrfs_header_owner(buf); 3603 u64 header_transid = btrfs_header_generation(buf); 3604 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 3605 header_transid == trans->transid && 3606 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3607 *must_clean = buf; 3608 return 1; 3609 } 3610 btrfs_tree_unlock(buf); 3611 } 3612 free_extent_buffer(buf); 3613 pinit: 3614 if (path) 3615 btrfs_set_path_blocking(path); 3616 /* unlocks the pinned mutex */ 3617 btrfs_pin_extent(root, bytenr, num_bytes, reserved); 3618 3619 BUG_ON(err < 0); 3620 return 0; 3621 } 3622 3623 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 3624 struct btrfs_root *root, 3625 u64 bytenr, u64 num_bytes, u64 parent, 3626 u64 root_objectid, u64 owner_objectid, 3627 u64 owner_offset, int refs_to_drop, 3628 struct btrfs_delayed_extent_op *extent_op) 3629 { 3630 struct btrfs_key key; 3631 struct btrfs_path *path; 3632 struct btrfs_fs_info *info = root->fs_info; 3633 struct btrfs_root *extent_root = info->extent_root; 3634 struct extent_buffer *leaf; 3635 struct btrfs_extent_item *ei; 3636 struct btrfs_extent_inline_ref *iref; 3637 int ret; 3638 int is_data; 3639 int extent_slot = 0; 3640 int found_extent = 0; 3641 int num_to_del = 1; 3642 u32 item_size; 3643 u64 refs; 3644 3645 path = btrfs_alloc_path(); 3646 if (!path) 3647 return -ENOMEM; 3648 3649 path->reada = 1; 3650 path->leave_spinning = 1; 3651 3652 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 3653 BUG_ON(!is_data && refs_to_drop != 1); 3654 3655 ret = lookup_extent_backref(trans, extent_root, path, &iref, 3656 bytenr, num_bytes, parent, 3657 root_objectid, owner_objectid, 3658 owner_offset); 3659 if (ret == 0) { 3660 extent_slot = path->slots[0]; 3661 while (extent_slot >= 0) { 3662 btrfs_item_key_to_cpu(path->nodes[0], &key, 3663 extent_slot); 3664 if (key.objectid != bytenr) 3665 break; 3666 if (key.type == BTRFS_EXTENT_ITEM_KEY && 3667 key.offset == num_bytes) { 3668 found_extent = 1; 3669 break; 3670 } 3671 if (path->slots[0] - extent_slot > 5) 3672 break; 3673 extent_slot--; 3674 } 3675 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3676 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 3677 if (found_extent && item_size < sizeof(*ei)) 3678 found_extent = 0; 3679 #endif 3680 if (!found_extent) { 3681 BUG_ON(iref); 3682 ret = remove_extent_backref(trans, extent_root, path, 3683 NULL, refs_to_drop, 3684 is_data); 3685 BUG_ON(ret); 3686 btrfs_release_path(extent_root, path); 3687 path->leave_spinning = 1; 3688 3689 key.objectid = bytenr; 3690 key.type = BTRFS_EXTENT_ITEM_KEY; 3691 key.offset = num_bytes; 3692 3693 ret = btrfs_search_slot(trans, extent_root, 3694 &key, path, -1, 1); 3695 if (ret) { 3696 printk(KERN_ERR "umm, got %d back from search" 3697 ", was looking for %llu\n", ret, 3698 (unsigned long long)bytenr); 3699 btrfs_print_leaf(extent_root, path->nodes[0]); 3700 } 3701 BUG_ON(ret); 3702 extent_slot = path->slots[0]; 3703 } 3704 } else { 3705 btrfs_print_leaf(extent_root, path->nodes[0]); 3706 WARN_ON(1); 3707 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 3708 "parent %llu root %llu owner %llu offset %llu\n", 3709 (unsigned long long)bytenr, 3710 (unsigned long long)parent, 3711 (unsigned long long)root_objectid, 3712 (unsigned long long)owner_objectid, 3713 (unsigned long long)owner_offset); 3714 } 3715 3716 leaf = path->nodes[0]; 3717 item_size = btrfs_item_size_nr(leaf, extent_slot); 3718 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3719 if (item_size < sizeof(*ei)) { 3720 BUG_ON(found_extent || extent_slot != path->slots[0]); 3721 ret = convert_extent_item_v0(trans, extent_root, path, 3722 owner_objectid, 0); 3723 BUG_ON(ret < 0); 3724 3725 btrfs_release_path(extent_root, path); 3726 path->leave_spinning = 1; 3727 3728 key.objectid = bytenr; 3729 key.type = BTRFS_EXTENT_ITEM_KEY; 3730 key.offset = num_bytes; 3731 3732 ret = btrfs_search_slot(trans, extent_root, &key, path, 3733 -1, 1); 3734 if (ret) { 3735 printk(KERN_ERR "umm, got %d back from search" 3736 ", was looking for %llu\n", ret, 3737 (unsigned long long)bytenr); 3738 btrfs_print_leaf(extent_root, path->nodes[0]); 3739 } 3740 BUG_ON(ret); 3741 extent_slot = path->slots[0]; 3742 leaf = path->nodes[0]; 3743 item_size = btrfs_item_size_nr(leaf, extent_slot); 3744 } 3745 #endif 3746 BUG_ON(item_size < sizeof(*ei)); 3747 ei = btrfs_item_ptr(leaf, extent_slot, 3748 struct btrfs_extent_item); 3749 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 3750 struct btrfs_tree_block_info *bi; 3751 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 3752 bi = (struct btrfs_tree_block_info *)(ei + 1); 3753 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 3754 } 3755 3756 refs = btrfs_extent_refs(leaf, ei); 3757 BUG_ON(refs < refs_to_drop); 3758 refs -= refs_to_drop; 3759 3760 if (refs > 0) { 3761 if (extent_op) 3762 __run_delayed_extent_op(extent_op, leaf, ei); 3763 /* 3764 * In the case of inline back ref, reference count will 3765 * be updated by remove_extent_backref 3766 */ 3767 if (iref) { 3768 BUG_ON(!found_extent); 3769 } else { 3770 btrfs_set_extent_refs(leaf, ei, refs); 3771 btrfs_mark_buffer_dirty(leaf); 3772 } 3773 if (found_extent) { 3774 ret = remove_extent_backref(trans, extent_root, path, 3775 iref, refs_to_drop, 3776 is_data); 3777 BUG_ON(ret); 3778 } 3779 } else { 3780 int mark_free = 0; 3781 struct extent_buffer *must_clean = NULL; 3782 3783 if (found_extent) { 3784 BUG_ON(is_data && refs_to_drop != 3785 extent_data_ref_count(root, path, iref)); 3786 if (iref) { 3787 BUG_ON(path->slots[0] != extent_slot); 3788 } else { 3789 BUG_ON(path->slots[0] != extent_slot + 1); 3790 path->slots[0] = extent_slot; 3791 num_to_del = 2; 3792 } 3793 } 3794 3795 ret = pin_down_bytes(trans, root, path, bytenr, 3796 num_bytes, is_data, 0, &must_clean); 3797 if (ret > 0) 3798 mark_free = 1; 3799 BUG_ON(ret < 0); 3800 /* 3801 * it is going to be very rare for someone to be waiting 3802 * on the block we're freeing. del_items might need to 3803 * schedule, so rather than get fancy, just force it 3804 * to blocking here 3805 */ 3806 if (must_clean) 3807 btrfs_set_lock_blocking(must_clean); 3808 3809 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 3810 num_to_del); 3811 BUG_ON(ret); 3812 btrfs_release_path(extent_root, path); 3813 3814 if (must_clean) { 3815 clean_tree_block(NULL, root, must_clean); 3816 btrfs_tree_unlock(must_clean); 3817 free_extent_buffer(must_clean); 3818 } 3819 3820 if (is_data) { 3821 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 3822 BUG_ON(ret); 3823 } else { 3824 invalidate_mapping_pages(info->btree_inode->i_mapping, 3825 bytenr >> PAGE_CACHE_SHIFT, 3826 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 3827 } 3828 3829 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 3830 mark_free); 3831 BUG_ON(ret); 3832 } 3833 btrfs_free_path(path); 3834 return ret; 3835 } 3836 3837 /* 3838 * when we free an extent, it is possible (and likely) that we free the last 3839 * delayed ref for that extent as well. This searches the delayed ref tree for 3840 * a given extent, and if there are no other delayed refs to be processed, it 3841 * removes it from the tree. 3842 */ 3843 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 3844 struct btrfs_root *root, u64 bytenr) 3845 { 3846 struct btrfs_delayed_ref_head *head; 3847 struct btrfs_delayed_ref_root *delayed_refs; 3848 struct btrfs_delayed_ref_node *ref; 3849 struct rb_node *node; 3850 int ret; 3851 3852 delayed_refs = &trans->transaction->delayed_refs; 3853 spin_lock(&delayed_refs->lock); 3854 head = btrfs_find_delayed_ref_head(trans, bytenr); 3855 if (!head) 3856 goto out; 3857 3858 node = rb_prev(&head->node.rb_node); 3859 if (!node) 3860 goto out; 3861 3862 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3863 3864 /* there are still entries for this ref, we can't drop it */ 3865 if (ref->bytenr == bytenr) 3866 goto out; 3867 3868 if (head->extent_op) { 3869 if (!head->must_insert_reserved) 3870 goto out; 3871 kfree(head->extent_op); 3872 head->extent_op = NULL; 3873 } 3874 3875 /* 3876 * waiting for the lock here would deadlock. If someone else has it 3877 * locked they are already in the process of dropping it anyway 3878 */ 3879 if (!mutex_trylock(&head->mutex)) 3880 goto out; 3881 3882 /* 3883 * at this point we have a head with no other entries. Go 3884 * ahead and process it. 3885 */ 3886 head->node.in_tree = 0; 3887 rb_erase(&head->node.rb_node, &delayed_refs->root); 3888 3889 delayed_refs->num_entries--; 3890 3891 /* 3892 * we don't take a ref on the node because we're removing it from the 3893 * tree, so we just steal the ref the tree was holding. 3894 */ 3895 delayed_refs->num_heads--; 3896 if (list_empty(&head->cluster)) 3897 delayed_refs->num_heads_ready--; 3898 3899 list_del_init(&head->cluster); 3900 spin_unlock(&delayed_refs->lock); 3901 3902 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 3903 &head->node, head->extent_op, 3904 head->must_insert_reserved); 3905 BUG_ON(ret); 3906 btrfs_put_delayed_ref(&head->node); 3907 return 0; 3908 out: 3909 spin_unlock(&delayed_refs->lock); 3910 return 0; 3911 } 3912 3913 int btrfs_free_extent(struct btrfs_trans_handle *trans, 3914 struct btrfs_root *root, 3915 u64 bytenr, u64 num_bytes, u64 parent, 3916 u64 root_objectid, u64 owner, u64 offset) 3917 { 3918 int ret; 3919 3920 /* 3921 * tree log blocks never actually go into the extent allocation 3922 * tree, just update pinning info and exit early. 3923 */ 3924 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 3925 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 3926 /* unlocks the pinned mutex */ 3927 btrfs_pin_extent(root, bytenr, num_bytes, 1); 3928 ret = 0; 3929 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 3930 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 3931 parent, root_objectid, (int)owner, 3932 BTRFS_DROP_DELAYED_REF, NULL); 3933 BUG_ON(ret); 3934 ret = check_ref_cleanup(trans, root, bytenr); 3935 BUG_ON(ret); 3936 } else { 3937 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 3938 parent, root_objectid, owner, 3939 offset, BTRFS_DROP_DELAYED_REF, NULL); 3940 BUG_ON(ret); 3941 } 3942 return ret; 3943 } 3944 3945 static u64 stripe_align(struct btrfs_root *root, u64 val) 3946 { 3947 u64 mask = ((u64)root->stripesize - 1); 3948 u64 ret = (val + mask) & ~mask; 3949 return ret; 3950 } 3951 3952 /* 3953 * when we wait for progress in the block group caching, its because 3954 * our allocation attempt failed at least once. So, we must sleep 3955 * and let some progress happen before we try again. 3956 * 3957 * This function will sleep at least once waiting for new free space to 3958 * show up, and then it will check the block group free space numbers 3959 * for our min num_bytes. Another option is to have it go ahead 3960 * and look in the rbtree for a free extent of a given size, but this 3961 * is a good start. 3962 */ 3963 static noinline int 3964 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 3965 u64 num_bytes) 3966 { 3967 struct btrfs_caching_control *caching_ctl; 3968 DEFINE_WAIT(wait); 3969 3970 caching_ctl = get_caching_control(cache); 3971 if (!caching_ctl) 3972 return 0; 3973 3974 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 3975 (cache->free_space >= num_bytes)); 3976 3977 put_caching_control(caching_ctl); 3978 return 0; 3979 } 3980 3981 static noinline int 3982 wait_block_group_cache_done(struct btrfs_block_group_cache *cache) 3983 { 3984 struct btrfs_caching_control *caching_ctl; 3985 DEFINE_WAIT(wait); 3986 3987 caching_ctl = get_caching_control(cache); 3988 if (!caching_ctl) 3989 return 0; 3990 3991 wait_event(caching_ctl->wait, block_group_cache_done(cache)); 3992 3993 put_caching_control(caching_ctl); 3994 return 0; 3995 } 3996 3997 enum btrfs_loop_type { 3998 LOOP_CACHED_ONLY = 0, 3999 LOOP_CACHING_NOWAIT = 1, 4000 LOOP_CACHING_WAIT = 2, 4001 LOOP_ALLOC_CHUNK = 3, 4002 LOOP_NO_EMPTY_SIZE = 4, 4003 }; 4004 4005 /* 4006 * walks the btree of allocated extents and find a hole of a given size. 4007 * The key ins is changed to record the hole: 4008 * ins->objectid == block start 4009 * ins->flags = BTRFS_EXTENT_ITEM_KEY 4010 * ins->offset == number of blocks 4011 * Any available blocks before search_start are skipped. 4012 */ 4013 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 4014 struct btrfs_root *orig_root, 4015 u64 num_bytes, u64 empty_size, 4016 u64 search_start, u64 search_end, 4017 u64 hint_byte, struct btrfs_key *ins, 4018 u64 exclude_start, u64 exclude_nr, 4019 int data) 4020 { 4021 int ret = 0; 4022 struct btrfs_root *root = orig_root->fs_info->extent_root; 4023 struct btrfs_free_cluster *last_ptr = NULL; 4024 struct btrfs_block_group_cache *block_group = NULL; 4025 int empty_cluster = 2 * 1024 * 1024; 4026 int allowed_chunk_alloc = 0; 4027 struct btrfs_space_info *space_info; 4028 int last_ptr_loop = 0; 4029 int loop = 0; 4030 bool found_uncached_bg = false; 4031 bool failed_cluster_refill = false; 4032 4033 WARN_ON(num_bytes < root->sectorsize); 4034 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4035 ins->objectid = 0; 4036 ins->offset = 0; 4037 4038 space_info = __find_space_info(root->fs_info, data); 4039 4040 if (orig_root->ref_cows || empty_size) 4041 allowed_chunk_alloc = 1; 4042 4043 if (data & BTRFS_BLOCK_GROUP_METADATA) { 4044 last_ptr = &root->fs_info->meta_alloc_cluster; 4045 if (!btrfs_test_opt(root, SSD)) 4046 empty_cluster = 64 * 1024; 4047 } 4048 4049 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 4050 last_ptr = &root->fs_info->data_alloc_cluster; 4051 } 4052 4053 if (last_ptr) { 4054 spin_lock(&last_ptr->lock); 4055 if (last_ptr->block_group) 4056 hint_byte = last_ptr->window_start; 4057 spin_unlock(&last_ptr->lock); 4058 } 4059 4060 search_start = max(search_start, first_logical_byte(root, 0)); 4061 search_start = max(search_start, hint_byte); 4062 4063 if (!last_ptr) 4064 empty_cluster = 0; 4065 4066 if (search_start == hint_byte) { 4067 block_group = btrfs_lookup_block_group(root->fs_info, 4068 search_start); 4069 /* 4070 * we don't want to use the block group if it doesn't match our 4071 * allocation bits, or if its not cached. 4072 */ 4073 if (block_group && block_group_bits(block_group, data) && 4074 block_group_cache_done(block_group)) { 4075 down_read(&space_info->groups_sem); 4076 if (list_empty(&block_group->list) || 4077 block_group->ro) { 4078 /* 4079 * someone is removing this block group, 4080 * we can't jump into the have_block_group 4081 * target because our list pointers are not 4082 * valid 4083 */ 4084 btrfs_put_block_group(block_group); 4085 up_read(&space_info->groups_sem); 4086 } else 4087 goto have_block_group; 4088 } else if (block_group) { 4089 btrfs_put_block_group(block_group); 4090 } 4091 } 4092 4093 search: 4094 down_read(&space_info->groups_sem); 4095 list_for_each_entry(block_group, &space_info->block_groups, list) { 4096 u64 offset; 4097 int cached; 4098 4099 atomic_inc(&block_group->count); 4100 search_start = block_group->key.objectid; 4101 4102 have_block_group: 4103 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4104 /* 4105 * we want to start caching kthreads, but not too many 4106 * right off the bat so we don't overwhelm the system, 4107 * so only start them if there are less than 2 and we're 4108 * in the initial allocation phase. 4109 */ 4110 if (loop > LOOP_CACHING_NOWAIT || 4111 atomic_read(&space_info->caching_threads) < 2) { 4112 ret = cache_block_group(block_group); 4113 BUG_ON(ret); 4114 } 4115 } 4116 4117 cached = block_group_cache_done(block_group); 4118 if (unlikely(!cached)) { 4119 found_uncached_bg = true; 4120 4121 /* if we only want cached bgs, loop */ 4122 if (loop == LOOP_CACHED_ONLY) 4123 goto loop; 4124 } 4125 4126 if (unlikely(block_group->ro)) 4127 goto loop; 4128 4129 /* 4130 * Ok we want to try and use the cluster allocator, so lets look 4131 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 4132 * have tried the cluster allocator plenty of times at this 4133 * point and not have found anything, so we are likely way too 4134 * fragmented for the clustering stuff to find anything, so lets 4135 * just skip it and let the allocator find whatever block it can 4136 * find 4137 */ 4138 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { 4139 /* 4140 * the refill lock keeps out other 4141 * people trying to start a new cluster 4142 */ 4143 spin_lock(&last_ptr->refill_lock); 4144 if (last_ptr->block_group && 4145 (last_ptr->block_group->ro || 4146 !block_group_bits(last_ptr->block_group, data))) { 4147 offset = 0; 4148 goto refill_cluster; 4149 } 4150 4151 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 4152 num_bytes, search_start); 4153 if (offset) { 4154 /* we have a block, we're done */ 4155 spin_unlock(&last_ptr->refill_lock); 4156 goto checks; 4157 } 4158 4159 spin_lock(&last_ptr->lock); 4160 /* 4161 * whoops, this cluster doesn't actually point to 4162 * this block group. Get a ref on the block 4163 * group is does point to and try again 4164 */ 4165 if (!last_ptr_loop && last_ptr->block_group && 4166 last_ptr->block_group != block_group) { 4167 4168 btrfs_put_block_group(block_group); 4169 block_group = last_ptr->block_group; 4170 atomic_inc(&block_group->count); 4171 spin_unlock(&last_ptr->lock); 4172 spin_unlock(&last_ptr->refill_lock); 4173 4174 last_ptr_loop = 1; 4175 search_start = block_group->key.objectid; 4176 /* 4177 * we know this block group is properly 4178 * in the list because 4179 * btrfs_remove_block_group, drops the 4180 * cluster before it removes the block 4181 * group from the list 4182 */ 4183 goto have_block_group; 4184 } 4185 spin_unlock(&last_ptr->lock); 4186 refill_cluster: 4187 /* 4188 * this cluster didn't work out, free it and 4189 * start over 4190 */ 4191 btrfs_return_cluster_to_free_space(NULL, last_ptr); 4192 4193 last_ptr_loop = 0; 4194 4195 /* allocate a cluster in this block group */ 4196 ret = btrfs_find_space_cluster(trans, root, 4197 block_group, last_ptr, 4198 offset, num_bytes, 4199 empty_cluster + empty_size); 4200 if (ret == 0) { 4201 /* 4202 * now pull our allocation out of this 4203 * cluster 4204 */ 4205 offset = btrfs_alloc_from_cluster(block_group, 4206 last_ptr, num_bytes, 4207 search_start); 4208 if (offset) { 4209 /* we found one, proceed */ 4210 spin_unlock(&last_ptr->refill_lock); 4211 goto checks; 4212 } 4213 } else if (!cached && loop > LOOP_CACHING_NOWAIT 4214 && !failed_cluster_refill) { 4215 spin_unlock(&last_ptr->refill_lock); 4216 4217 failed_cluster_refill = true; 4218 wait_block_group_cache_progress(block_group, 4219 num_bytes + empty_cluster + empty_size); 4220 goto have_block_group; 4221 } 4222 4223 /* 4224 * at this point we either didn't find a cluster 4225 * or we weren't able to allocate a block from our 4226 * cluster. Free the cluster we've been trying 4227 * to use, and go to the next block group 4228 */ 4229 btrfs_return_cluster_to_free_space(NULL, last_ptr); 4230 spin_unlock(&last_ptr->refill_lock); 4231 goto loop; 4232 } 4233 4234 offset = btrfs_find_space_for_alloc(block_group, search_start, 4235 num_bytes, empty_size); 4236 if (!offset && (cached || (!cached && 4237 loop == LOOP_CACHING_NOWAIT))) { 4238 goto loop; 4239 } else if (!offset && (!cached && 4240 loop > LOOP_CACHING_NOWAIT)) { 4241 wait_block_group_cache_progress(block_group, 4242 num_bytes + empty_size); 4243 goto have_block_group; 4244 } 4245 checks: 4246 search_start = stripe_align(root, offset); 4247 /* move on to the next group */ 4248 if (search_start + num_bytes >= search_end) { 4249 btrfs_add_free_space(block_group, offset, num_bytes); 4250 goto loop; 4251 } 4252 4253 /* move on to the next group */ 4254 if (search_start + num_bytes > 4255 block_group->key.objectid + block_group->key.offset) { 4256 btrfs_add_free_space(block_group, offset, num_bytes); 4257 goto loop; 4258 } 4259 4260 if (exclude_nr > 0 && 4261 (search_start + num_bytes > exclude_start && 4262 search_start < exclude_start + exclude_nr)) { 4263 search_start = exclude_start + exclude_nr; 4264 4265 btrfs_add_free_space(block_group, offset, num_bytes); 4266 /* 4267 * if search_start is still in this block group 4268 * then we just re-search this block group 4269 */ 4270 if (search_start >= block_group->key.objectid && 4271 search_start < (block_group->key.objectid + 4272 block_group->key.offset)) 4273 goto have_block_group; 4274 goto loop; 4275 } 4276 4277 ins->objectid = search_start; 4278 ins->offset = num_bytes; 4279 4280 if (offset < search_start) 4281 btrfs_add_free_space(block_group, offset, 4282 search_start - offset); 4283 BUG_ON(offset > search_start); 4284 4285 update_reserved_extents(block_group, num_bytes, 1); 4286 4287 /* we are all good, lets return */ 4288 break; 4289 loop: 4290 failed_cluster_refill = false; 4291 btrfs_put_block_group(block_group); 4292 } 4293 up_read(&space_info->groups_sem); 4294 4295 /* LOOP_CACHED_ONLY, only search fully cached block groups 4296 * LOOP_CACHING_NOWAIT, search partially cached block groups, but 4297 * dont wait foR them to finish caching 4298 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 4299 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 4300 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 4301 * again 4302 */ 4303 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 4304 (found_uncached_bg || empty_size || empty_cluster || 4305 allowed_chunk_alloc)) { 4306 if (found_uncached_bg) { 4307 found_uncached_bg = false; 4308 if (loop < LOOP_CACHING_WAIT) { 4309 loop++; 4310 goto search; 4311 } 4312 } 4313 4314 if (loop == LOOP_ALLOC_CHUNK) { 4315 empty_size = 0; 4316 empty_cluster = 0; 4317 } 4318 4319 if (allowed_chunk_alloc) { 4320 ret = do_chunk_alloc(trans, root, num_bytes + 4321 2 * 1024 * 1024, data, 1); 4322 allowed_chunk_alloc = 0; 4323 } else { 4324 space_info->force_alloc = 1; 4325 } 4326 4327 if (loop < LOOP_NO_EMPTY_SIZE) { 4328 loop++; 4329 goto search; 4330 } 4331 ret = -ENOSPC; 4332 } else if (!ins->objectid) { 4333 ret = -ENOSPC; 4334 } 4335 4336 /* we found what we needed */ 4337 if (ins->objectid) { 4338 if (!(data & BTRFS_BLOCK_GROUP_DATA)) 4339 trans->block_group = block_group->key.objectid; 4340 4341 btrfs_put_block_group(block_group); 4342 ret = 0; 4343 } 4344 4345 return ret; 4346 } 4347 4348 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 4349 int dump_block_groups) 4350 { 4351 struct btrfs_block_group_cache *cache; 4352 4353 spin_lock(&info->lock); 4354 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 4355 (unsigned long long)(info->total_bytes - info->bytes_used - 4356 info->bytes_pinned - info->bytes_reserved - 4357 info->bytes_super), 4358 (info->full) ? "" : "not "); 4359 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 4360 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" 4361 "\n", 4362 (unsigned long long)info->total_bytes, 4363 (unsigned long long)info->bytes_pinned, 4364 (unsigned long long)info->bytes_delalloc, 4365 (unsigned long long)info->bytes_may_use, 4366 (unsigned long long)info->bytes_used, 4367 (unsigned long long)info->bytes_root, 4368 (unsigned long long)info->bytes_super, 4369 (unsigned long long)info->bytes_reserved); 4370 spin_unlock(&info->lock); 4371 4372 if (!dump_block_groups) 4373 return; 4374 4375 down_read(&info->groups_sem); 4376 list_for_each_entry(cache, &info->block_groups, list) { 4377 spin_lock(&cache->lock); 4378 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 4379 "%llu pinned %llu reserved\n", 4380 (unsigned long long)cache->key.objectid, 4381 (unsigned long long)cache->key.offset, 4382 (unsigned long long)btrfs_block_group_used(&cache->item), 4383 (unsigned long long)cache->pinned, 4384 (unsigned long long)cache->reserved); 4385 btrfs_dump_free_space(cache, bytes); 4386 spin_unlock(&cache->lock); 4387 } 4388 up_read(&info->groups_sem); 4389 } 4390 4391 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 4392 struct btrfs_root *root, 4393 u64 num_bytes, u64 min_alloc_size, 4394 u64 empty_size, u64 hint_byte, 4395 u64 search_end, struct btrfs_key *ins, 4396 u64 data) 4397 { 4398 int ret; 4399 u64 search_start = 0; 4400 struct btrfs_fs_info *info = root->fs_info; 4401 4402 data = btrfs_get_alloc_profile(root, data); 4403 again: 4404 /* 4405 * the only place that sets empty_size is btrfs_realloc_node, which 4406 * is not called recursively on allocations 4407 */ 4408 if (empty_size || root->ref_cows) { 4409 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { 4410 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4411 2 * 1024 * 1024, 4412 BTRFS_BLOCK_GROUP_METADATA | 4413 (info->metadata_alloc_profile & 4414 info->avail_metadata_alloc_bits), 0); 4415 } 4416 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4417 num_bytes + 2 * 1024 * 1024, data, 0); 4418 } 4419 4420 WARN_ON(num_bytes < root->sectorsize); 4421 ret = find_free_extent(trans, root, num_bytes, empty_size, 4422 search_start, search_end, hint_byte, ins, 4423 trans->alloc_exclude_start, 4424 trans->alloc_exclude_nr, data); 4425 4426 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 4427 num_bytes = num_bytes >> 1; 4428 num_bytes = num_bytes & ~(root->sectorsize - 1); 4429 num_bytes = max(num_bytes, min_alloc_size); 4430 do_chunk_alloc(trans, root->fs_info->extent_root, 4431 num_bytes, data, 1); 4432 goto again; 4433 } 4434 if (ret == -ENOSPC) { 4435 struct btrfs_space_info *sinfo; 4436 4437 sinfo = __find_space_info(root->fs_info, data); 4438 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4439 "wanted %llu\n", (unsigned long long)data, 4440 (unsigned long long)num_bytes); 4441 dump_space_info(sinfo, num_bytes, 1); 4442 } 4443 4444 return ret; 4445 } 4446 4447 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 4448 { 4449 struct btrfs_block_group_cache *cache; 4450 int ret = 0; 4451 4452 cache = btrfs_lookup_block_group(root->fs_info, start); 4453 if (!cache) { 4454 printk(KERN_ERR "Unable to find block group for %llu\n", 4455 (unsigned long long)start); 4456 return -ENOSPC; 4457 } 4458 4459 ret = btrfs_discard_extent(root, start, len); 4460 4461 btrfs_add_free_space(cache, start, len); 4462 update_reserved_extents(cache, len, 0); 4463 btrfs_put_block_group(cache); 4464 4465 return ret; 4466 } 4467 4468 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4469 struct btrfs_root *root, 4470 u64 parent, u64 root_objectid, 4471 u64 flags, u64 owner, u64 offset, 4472 struct btrfs_key *ins, int ref_mod) 4473 { 4474 int ret; 4475 struct btrfs_fs_info *fs_info = root->fs_info; 4476 struct btrfs_extent_item *extent_item; 4477 struct btrfs_extent_inline_ref *iref; 4478 struct btrfs_path *path; 4479 struct extent_buffer *leaf; 4480 int type; 4481 u32 size; 4482 4483 if (parent > 0) 4484 type = BTRFS_SHARED_DATA_REF_KEY; 4485 else 4486 type = BTRFS_EXTENT_DATA_REF_KEY; 4487 4488 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 4489 4490 path = btrfs_alloc_path(); 4491 BUG_ON(!path); 4492 4493 path->leave_spinning = 1; 4494 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4495 ins, size); 4496 BUG_ON(ret); 4497 4498 leaf = path->nodes[0]; 4499 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4500 struct btrfs_extent_item); 4501 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 4502 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4503 btrfs_set_extent_flags(leaf, extent_item, 4504 flags | BTRFS_EXTENT_FLAG_DATA); 4505 4506 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 4507 btrfs_set_extent_inline_ref_type(leaf, iref, type); 4508 if (parent > 0) { 4509 struct btrfs_shared_data_ref *ref; 4510 ref = (struct btrfs_shared_data_ref *)(iref + 1); 4511 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 4512 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 4513 } else { 4514 struct btrfs_extent_data_ref *ref; 4515 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 4516 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 4517 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 4518 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 4519 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 4520 } 4521 4522 btrfs_mark_buffer_dirty(path->nodes[0]); 4523 btrfs_free_path(path); 4524 4525 ret = update_block_group(trans, root, ins->objectid, ins->offset, 4526 1, 0); 4527 if (ret) { 4528 printk(KERN_ERR "btrfs update block group failed for %llu " 4529 "%llu\n", (unsigned long long)ins->objectid, 4530 (unsigned long long)ins->offset); 4531 BUG(); 4532 } 4533 return ret; 4534 } 4535 4536 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 4537 struct btrfs_root *root, 4538 u64 parent, u64 root_objectid, 4539 u64 flags, struct btrfs_disk_key *key, 4540 int level, struct btrfs_key *ins) 4541 { 4542 int ret; 4543 struct btrfs_fs_info *fs_info = root->fs_info; 4544 struct btrfs_extent_item *extent_item; 4545 struct btrfs_tree_block_info *block_info; 4546 struct btrfs_extent_inline_ref *iref; 4547 struct btrfs_path *path; 4548 struct extent_buffer *leaf; 4549 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 4550 4551 path = btrfs_alloc_path(); 4552 BUG_ON(!path); 4553 4554 path->leave_spinning = 1; 4555 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4556 ins, size); 4557 BUG_ON(ret); 4558 4559 leaf = path->nodes[0]; 4560 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4561 struct btrfs_extent_item); 4562 btrfs_set_extent_refs(leaf, extent_item, 1); 4563 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4564 btrfs_set_extent_flags(leaf, extent_item, 4565 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 4566 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 4567 4568 btrfs_set_tree_block_key(leaf, block_info, key); 4569 btrfs_set_tree_block_level(leaf, block_info, level); 4570 4571 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 4572 if (parent > 0) { 4573 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4574 btrfs_set_extent_inline_ref_type(leaf, iref, 4575 BTRFS_SHARED_BLOCK_REF_KEY); 4576 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 4577 } else { 4578 btrfs_set_extent_inline_ref_type(leaf, iref, 4579 BTRFS_TREE_BLOCK_REF_KEY); 4580 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 4581 } 4582 4583 btrfs_mark_buffer_dirty(leaf); 4584 btrfs_free_path(path); 4585 4586 ret = update_block_group(trans, root, ins->objectid, ins->offset, 4587 1, 0); 4588 if (ret) { 4589 printk(KERN_ERR "btrfs update block group failed for %llu " 4590 "%llu\n", (unsigned long long)ins->objectid, 4591 (unsigned long long)ins->offset); 4592 BUG(); 4593 } 4594 return ret; 4595 } 4596 4597 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4598 struct btrfs_root *root, 4599 u64 root_objectid, u64 owner, 4600 u64 offset, struct btrfs_key *ins) 4601 { 4602 int ret; 4603 4604 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); 4605 4606 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 4607 0, root_objectid, owner, offset, 4608 BTRFS_ADD_DELAYED_EXTENT, NULL); 4609 return ret; 4610 } 4611 4612 /* 4613 * this is used by the tree logging recovery code. It records that 4614 * an extent has been allocated and makes sure to clear the free 4615 * space cache bits as well 4616 */ 4617 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 4618 struct btrfs_root *root, 4619 u64 root_objectid, u64 owner, u64 offset, 4620 struct btrfs_key *ins) 4621 { 4622 int ret; 4623 struct btrfs_block_group_cache *block_group; 4624 struct btrfs_caching_control *caching_ctl; 4625 u64 start = ins->objectid; 4626 u64 num_bytes = ins->offset; 4627 4628 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4629 cache_block_group(block_group); 4630 caching_ctl = get_caching_control(block_group); 4631 4632 if (!caching_ctl) { 4633 BUG_ON(!block_group_cache_done(block_group)); 4634 ret = btrfs_remove_free_space(block_group, start, num_bytes); 4635 BUG_ON(ret); 4636 } else { 4637 mutex_lock(&caching_ctl->mutex); 4638 4639 if (start >= caching_ctl->progress) { 4640 ret = add_excluded_extent(root, start, num_bytes); 4641 BUG_ON(ret); 4642 } else if (start + num_bytes <= caching_ctl->progress) { 4643 ret = btrfs_remove_free_space(block_group, 4644 start, num_bytes); 4645 BUG_ON(ret); 4646 } else { 4647 num_bytes = caching_ctl->progress - start; 4648 ret = btrfs_remove_free_space(block_group, 4649 start, num_bytes); 4650 BUG_ON(ret); 4651 4652 start = caching_ctl->progress; 4653 num_bytes = ins->objectid + ins->offset - 4654 caching_ctl->progress; 4655 ret = add_excluded_extent(root, start, num_bytes); 4656 BUG_ON(ret); 4657 } 4658 4659 mutex_unlock(&caching_ctl->mutex); 4660 put_caching_control(caching_ctl); 4661 } 4662 4663 update_reserved_extents(block_group, ins->offset, 1); 4664 btrfs_put_block_group(block_group); 4665 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 4666 0, owner, offset, ins, 1); 4667 return ret; 4668 } 4669 4670 /* 4671 * finds a free extent and does all the dirty work required for allocation 4672 * returns the key for the extent through ins, and a tree buffer for 4673 * the first block of the extent through buf. 4674 * 4675 * returns 0 if everything worked, non-zero otherwise. 4676 */ 4677 static int alloc_tree_block(struct btrfs_trans_handle *trans, 4678 struct btrfs_root *root, 4679 u64 num_bytes, u64 parent, u64 root_objectid, 4680 struct btrfs_disk_key *key, int level, 4681 u64 empty_size, u64 hint_byte, u64 search_end, 4682 struct btrfs_key *ins) 4683 { 4684 int ret; 4685 u64 flags = 0; 4686 4687 ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4688 empty_size, hint_byte, search_end, 4689 ins, 0); 4690 if (ret) 4691 return ret; 4692 4693 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4694 if (parent == 0) 4695 parent = ins->objectid; 4696 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 4697 } else 4698 BUG_ON(parent > 0); 4699 4700 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4701 struct btrfs_delayed_extent_op *extent_op; 4702 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 4703 BUG_ON(!extent_op); 4704 if (key) 4705 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 4706 else 4707 memset(&extent_op->key, 0, sizeof(extent_op->key)); 4708 extent_op->flags_to_set = flags; 4709 extent_op->update_key = 1; 4710 extent_op->update_flags = 1; 4711 extent_op->is_data = 0; 4712 4713 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, 4714 ins->offset, parent, root_objectid, 4715 level, BTRFS_ADD_DELAYED_EXTENT, 4716 extent_op); 4717 BUG_ON(ret); 4718 } 4719 return ret; 4720 } 4721 4722 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 4723 struct btrfs_root *root, 4724 u64 bytenr, u32 blocksize, 4725 int level) 4726 { 4727 struct extent_buffer *buf; 4728 4729 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 4730 if (!buf) 4731 return ERR_PTR(-ENOMEM); 4732 btrfs_set_header_generation(buf, trans->transid); 4733 btrfs_set_buffer_lockdep_class(buf, level); 4734 btrfs_tree_lock(buf); 4735 clean_tree_block(trans, root, buf); 4736 4737 btrfs_set_lock_blocking(buf); 4738 btrfs_set_buffer_uptodate(buf); 4739 4740 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4741 set_extent_dirty(&root->dirty_log_pages, buf->start, 4742 buf->start + buf->len - 1, GFP_NOFS); 4743 } else { 4744 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4745 buf->start + buf->len - 1, GFP_NOFS); 4746 } 4747 trans->blocks_used++; 4748 /* this returns a buffer locked for blocking */ 4749 return buf; 4750 } 4751 4752 /* 4753 * helper function to allocate a block for a given tree 4754 * returns the tree buffer or NULL. 4755 */ 4756 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 4757 struct btrfs_root *root, u32 blocksize, 4758 u64 parent, u64 root_objectid, 4759 struct btrfs_disk_key *key, int level, 4760 u64 hint, u64 empty_size) 4761 { 4762 struct btrfs_key ins; 4763 int ret; 4764 struct extent_buffer *buf; 4765 4766 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, 4767 key, level, empty_size, hint, (u64)-1, &ins); 4768 if (ret) { 4769 BUG_ON(ret > 0); 4770 return ERR_PTR(ret); 4771 } 4772 4773 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 4774 blocksize, level); 4775 return buf; 4776 } 4777 4778 struct walk_control { 4779 u64 refs[BTRFS_MAX_LEVEL]; 4780 u64 flags[BTRFS_MAX_LEVEL]; 4781 struct btrfs_key update_progress; 4782 int stage; 4783 int level; 4784 int shared_level; 4785 int update_ref; 4786 int keep_locks; 4787 int reada_slot; 4788 int reada_count; 4789 }; 4790 4791 #define DROP_REFERENCE 1 4792 #define UPDATE_BACKREF 2 4793 4794 static noinline void reada_walk_down(struct btrfs_trans_handle *trans, 4795 struct btrfs_root *root, 4796 struct walk_control *wc, 4797 struct btrfs_path *path) 4798 { 4799 u64 bytenr; 4800 u64 generation; 4801 u64 refs; 4802 u64 last = 0; 4803 u32 nritems; 4804 u32 blocksize; 4805 struct btrfs_key key; 4806 struct extent_buffer *eb; 4807 int ret; 4808 int slot; 4809 int nread = 0; 4810 4811 if (path->slots[wc->level] < wc->reada_slot) { 4812 wc->reada_count = wc->reada_count * 2 / 3; 4813 wc->reada_count = max(wc->reada_count, 2); 4814 } else { 4815 wc->reada_count = wc->reada_count * 3 / 2; 4816 wc->reada_count = min_t(int, wc->reada_count, 4817 BTRFS_NODEPTRS_PER_BLOCK(root)); 4818 } 4819 4820 eb = path->nodes[wc->level]; 4821 nritems = btrfs_header_nritems(eb); 4822 blocksize = btrfs_level_size(root, wc->level - 1); 4823 4824 for (slot = path->slots[wc->level]; slot < nritems; slot++) { 4825 if (nread >= wc->reada_count) 4826 break; 4827 4828 cond_resched(); 4829 bytenr = btrfs_node_blockptr(eb, slot); 4830 generation = btrfs_node_ptr_generation(eb, slot); 4831 4832 if (slot == path->slots[wc->level]) 4833 goto reada; 4834 4835 if (wc->stage == UPDATE_BACKREF && 4836 generation <= root->root_key.offset) 4837 continue; 4838 4839 if (wc->stage == DROP_REFERENCE) { 4840 ret = btrfs_lookup_extent_info(trans, root, 4841 bytenr, blocksize, 4842 &refs, NULL); 4843 BUG_ON(ret); 4844 BUG_ON(refs == 0); 4845 if (refs == 1) 4846 goto reada; 4847 4848 if (!wc->update_ref || 4849 generation <= root->root_key.offset) 4850 continue; 4851 btrfs_node_key_to_cpu(eb, &key, slot); 4852 ret = btrfs_comp_cpu_keys(&key, 4853 &wc->update_progress); 4854 if (ret < 0) 4855 continue; 4856 } 4857 reada: 4858 ret = readahead_tree_block(root, bytenr, blocksize, 4859 generation); 4860 if (ret) 4861 break; 4862 last = bytenr + blocksize; 4863 nread++; 4864 } 4865 wc->reada_slot = slot; 4866 } 4867 4868 /* 4869 * hepler to process tree block while walking down the tree. 4870 * 4871 * when wc->stage == UPDATE_BACKREF, this function updates 4872 * back refs for pointers in the block. 4873 * 4874 * NOTE: return value 1 means we should stop walking down. 4875 */ 4876 static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 4877 struct btrfs_root *root, 4878 struct btrfs_path *path, 4879 struct walk_control *wc) 4880 { 4881 int level = wc->level; 4882 struct extent_buffer *eb = path->nodes[level]; 4883 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 4884 int ret; 4885 4886 if (wc->stage == UPDATE_BACKREF && 4887 btrfs_header_owner(eb) != root->root_key.objectid) 4888 return 1; 4889 4890 /* 4891 * when reference count of tree block is 1, it won't increase 4892 * again. once full backref flag is set, we never clear it. 4893 */ 4894 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 4895 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { 4896 BUG_ON(!path->locks[level]); 4897 ret = btrfs_lookup_extent_info(trans, root, 4898 eb->start, eb->len, 4899 &wc->refs[level], 4900 &wc->flags[level]); 4901 BUG_ON(ret); 4902 BUG_ON(wc->refs[level] == 0); 4903 } 4904 4905 if (wc->stage == DROP_REFERENCE) { 4906 if (wc->refs[level] > 1) 4907 return 1; 4908 4909 if (path->locks[level] && !wc->keep_locks) { 4910 btrfs_tree_unlock(eb); 4911 path->locks[level] = 0; 4912 } 4913 return 0; 4914 } 4915 4916 /* wc->stage == UPDATE_BACKREF */ 4917 if (!(wc->flags[level] & flag)) { 4918 BUG_ON(!path->locks[level]); 4919 ret = btrfs_inc_ref(trans, root, eb, 1); 4920 BUG_ON(ret); 4921 ret = btrfs_dec_ref(trans, root, eb, 0); 4922 BUG_ON(ret); 4923 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 4924 eb->len, flag, 0); 4925 BUG_ON(ret); 4926 wc->flags[level] |= flag; 4927 } 4928 4929 /* 4930 * the block is shared by multiple trees, so it's not good to 4931 * keep the tree lock 4932 */ 4933 if (path->locks[level] && level > 0) { 4934 btrfs_tree_unlock(eb); 4935 path->locks[level] = 0; 4936 } 4937 return 0; 4938 } 4939 4940 /* 4941 * hepler to process tree block pointer. 4942 * 4943 * when wc->stage == DROP_REFERENCE, this function checks 4944 * reference count of the block pointed to. if the block 4945 * is shared and we need update back refs for the subtree 4946 * rooted at the block, this function changes wc->stage to 4947 * UPDATE_BACKREF. if the block is shared and there is no 4948 * need to update back, this function drops the reference 4949 * to the block. 4950 * 4951 * NOTE: return value 1 means we should stop walking down. 4952 */ 4953 static noinline int do_walk_down(struct btrfs_trans_handle *trans, 4954 struct btrfs_root *root, 4955 struct btrfs_path *path, 4956 struct walk_control *wc) 4957 { 4958 u64 bytenr; 4959 u64 generation; 4960 u64 parent; 4961 u32 blocksize; 4962 struct btrfs_key key; 4963 struct extent_buffer *next; 4964 int level = wc->level; 4965 int reada = 0; 4966 int ret = 0; 4967 4968 generation = btrfs_node_ptr_generation(path->nodes[level], 4969 path->slots[level]); 4970 /* 4971 * if the lower level block was created before the snapshot 4972 * was created, we know there is no need to update back refs 4973 * for the subtree 4974 */ 4975 if (wc->stage == UPDATE_BACKREF && 4976 generation <= root->root_key.offset) 4977 return 1; 4978 4979 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 4980 blocksize = btrfs_level_size(root, level - 1); 4981 4982 next = btrfs_find_tree_block(root, bytenr, blocksize); 4983 if (!next) { 4984 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 4985 reada = 1; 4986 } 4987 btrfs_tree_lock(next); 4988 btrfs_set_lock_blocking(next); 4989 4990 if (wc->stage == DROP_REFERENCE) { 4991 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 4992 &wc->refs[level - 1], 4993 &wc->flags[level - 1]); 4994 BUG_ON(ret); 4995 BUG_ON(wc->refs[level - 1] == 0); 4996 4997 if (wc->refs[level - 1] > 1) { 4998 if (!wc->update_ref || 4999 generation <= root->root_key.offset) 5000 goto skip; 5001 5002 btrfs_node_key_to_cpu(path->nodes[level], &key, 5003 path->slots[level]); 5004 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); 5005 if (ret < 0) 5006 goto skip; 5007 5008 wc->stage = UPDATE_BACKREF; 5009 wc->shared_level = level - 1; 5010 } 5011 } 5012 5013 if (!btrfs_buffer_uptodate(next, generation)) { 5014 btrfs_tree_unlock(next); 5015 free_extent_buffer(next); 5016 next = NULL; 5017 } 5018 5019 if (!next) { 5020 if (reada && level == 1) 5021 reada_walk_down(trans, root, wc, path); 5022 next = read_tree_block(root, bytenr, blocksize, generation); 5023 btrfs_tree_lock(next); 5024 btrfs_set_lock_blocking(next); 5025 } 5026 5027 level--; 5028 BUG_ON(level != btrfs_header_level(next)); 5029 path->nodes[level] = next; 5030 path->slots[level] = 0; 5031 path->locks[level] = 1; 5032 wc->level = level; 5033 if (wc->level == 1) 5034 wc->reada_slot = 0; 5035 return 0; 5036 skip: 5037 wc->refs[level - 1] = 0; 5038 wc->flags[level - 1] = 0; 5039 5040 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 5041 parent = path->nodes[level]->start; 5042 } else { 5043 BUG_ON(root->root_key.objectid != 5044 btrfs_header_owner(path->nodes[level])); 5045 parent = 0; 5046 } 5047 5048 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 5049 root->root_key.objectid, level - 1, 0); 5050 BUG_ON(ret); 5051 5052 btrfs_tree_unlock(next); 5053 free_extent_buffer(next); 5054 return 1; 5055 } 5056 5057 /* 5058 * hepler to process tree block while walking up the tree. 5059 * 5060 * when wc->stage == DROP_REFERENCE, this function drops 5061 * reference count on the block. 5062 * 5063 * when wc->stage == UPDATE_BACKREF, this function changes 5064 * wc->stage back to DROP_REFERENCE if we changed wc->stage 5065 * to UPDATE_BACKREF previously while processing the block. 5066 * 5067 * NOTE: return value 1 means we should stop walking up. 5068 */ 5069 static noinline int walk_up_proc(struct btrfs_trans_handle *trans, 5070 struct btrfs_root *root, 5071 struct btrfs_path *path, 5072 struct walk_control *wc) 5073 { 5074 int ret = 0; 5075 int level = wc->level; 5076 struct extent_buffer *eb = path->nodes[level]; 5077 u64 parent = 0; 5078 5079 if (wc->stage == UPDATE_BACKREF) { 5080 BUG_ON(wc->shared_level < level); 5081 if (level < wc->shared_level) 5082 goto out; 5083 5084 ret = find_next_key(path, level + 1, &wc->update_progress); 5085 if (ret > 0) 5086 wc->update_ref = 0; 5087 5088 wc->stage = DROP_REFERENCE; 5089 wc->shared_level = -1; 5090 path->slots[level] = 0; 5091 5092 /* 5093 * check reference count again if the block isn't locked. 5094 * we should start walking down the tree again if reference 5095 * count is one. 5096 */ 5097 if (!path->locks[level]) { 5098 BUG_ON(level == 0); 5099 btrfs_tree_lock(eb); 5100 btrfs_set_lock_blocking(eb); 5101 path->locks[level] = 1; 5102 5103 ret = btrfs_lookup_extent_info(trans, root, 5104 eb->start, eb->len, 5105 &wc->refs[level], 5106 &wc->flags[level]); 5107 BUG_ON(ret); 5108 BUG_ON(wc->refs[level] == 0); 5109 if (wc->refs[level] == 1) { 5110 btrfs_tree_unlock(eb); 5111 path->locks[level] = 0; 5112 return 1; 5113 } 5114 } 5115 } 5116 5117 /* wc->stage == DROP_REFERENCE */ 5118 BUG_ON(wc->refs[level] > 1 && !path->locks[level]); 5119 5120 if (wc->refs[level] == 1) { 5121 if (level == 0) { 5122 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5123 ret = btrfs_dec_ref(trans, root, eb, 1); 5124 else 5125 ret = btrfs_dec_ref(trans, root, eb, 0); 5126 BUG_ON(ret); 5127 } 5128 /* make block locked assertion in clean_tree_block happy */ 5129 if (!path->locks[level] && 5130 btrfs_header_generation(eb) == trans->transid) { 5131 btrfs_tree_lock(eb); 5132 btrfs_set_lock_blocking(eb); 5133 path->locks[level] = 1; 5134 } 5135 clean_tree_block(trans, root, eb); 5136 } 5137 5138 if (eb == root->node) { 5139 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5140 parent = eb->start; 5141 else 5142 BUG_ON(root->root_key.objectid != 5143 btrfs_header_owner(eb)); 5144 } else { 5145 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5146 parent = path->nodes[level + 1]->start; 5147 else 5148 BUG_ON(root->root_key.objectid != 5149 btrfs_header_owner(path->nodes[level + 1])); 5150 } 5151 5152 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, 5153 root->root_key.objectid, level, 0); 5154 BUG_ON(ret); 5155 out: 5156 wc->refs[level] = 0; 5157 wc->flags[level] = 0; 5158 return ret; 5159 } 5160 5161 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 5162 struct btrfs_root *root, 5163 struct btrfs_path *path, 5164 struct walk_control *wc) 5165 { 5166 int level = wc->level; 5167 int ret; 5168 5169 while (level >= 0) { 5170 if (path->slots[level] >= 5171 btrfs_header_nritems(path->nodes[level])) 5172 break; 5173 5174 ret = walk_down_proc(trans, root, path, wc); 5175 if (ret > 0) 5176 break; 5177 5178 if (level == 0) 5179 break; 5180 5181 ret = do_walk_down(trans, root, path, wc); 5182 if (ret > 0) { 5183 path->slots[level]++; 5184 continue; 5185 } 5186 level = wc->level; 5187 } 5188 return 0; 5189 } 5190 5191 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 5192 struct btrfs_root *root, 5193 struct btrfs_path *path, 5194 struct walk_control *wc, int max_level) 5195 { 5196 int level = wc->level; 5197 int ret; 5198 5199 path->slots[level] = btrfs_header_nritems(path->nodes[level]); 5200 while (level < max_level && path->nodes[level]) { 5201 wc->level = level; 5202 if (path->slots[level] + 1 < 5203 btrfs_header_nritems(path->nodes[level])) { 5204 path->slots[level]++; 5205 return 0; 5206 } else { 5207 ret = walk_up_proc(trans, root, path, wc); 5208 if (ret > 0) 5209 return 0; 5210 5211 if (path->locks[level]) { 5212 btrfs_tree_unlock(path->nodes[level]); 5213 path->locks[level] = 0; 5214 } 5215 free_extent_buffer(path->nodes[level]); 5216 path->nodes[level] = NULL; 5217 level++; 5218 } 5219 } 5220 return 1; 5221 } 5222 5223 /* 5224 * drop a subvolume tree. 5225 * 5226 * this function traverses the tree freeing any blocks that only 5227 * referenced by the tree. 5228 * 5229 * when a shared tree block is found. this function decreases its 5230 * reference count by one. if update_ref is true, this function 5231 * also make sure backrefs for the shared block and all lower level 5232 * blocks are properly updated. 5233 */ 5234 int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) 5235 { 5236 struct btrfs_path *path; 5237 struct btrfs_trans_handle *trans; 5238 struct btrfs_root *tree_root = root->fs_info->tree_root; 5239 struct btrfs_root_item *root_item = &root->root_item; 5240 struct walk_control *wc; 5241 struct btrfs_key key; 5242 int err = 0; 5243 int ret; 5244 int level; 5245 5246 path = btrfs_alloc_path(); 5247 BUG_ON(!path); 5248 5249 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5250 BUG_ON(!wc); 5251 5252 trans = btrfs_start_transaction(tree_root, 1); 5253 5254 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 5255 level = btrfs_header_level(root->node); 5256 path->nodes[level] = btrfs_lock_root_node(root); 5257 btrfs_set_lock_blocking(path->nodes[level]); 5258 path->slots[level] = 0; 5259 path->locks[level] = 1; 5260 memset(&wc->update_progress, 0, 5261 sizeof(wc->update_progress)); 5262 } else { 5263 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 5264 memcpy(&wc->update_progress, &key, 5265 sizeof(wc->update_progress)); 5266 5267 level = root_item->drop_level; 5268 BUG_ON(level == 0); 5269 path->lowest_level = level; 5270 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5271 path->lowest_level = 0; 5272 if (ret < 0) { 5273 err = ret; 5274 goto out; 5275 } 5276 WARN_ON(ret > 0); 5277 5278 /* 5279 * unlock our path, this is safe because only this 5280 * function is allowed to delete this snapshot 5281 */ 5282 btrfs_unlock_up_safe(path, 0); 5283 5284 level = btrfs_header_level(root->node); 5285 while (1) { 5286 btrfs_tree_lock(path->nodes[level]); 5287 btrfs_set_lock_blocking(path->nodes[level]); 5288 5289 ret = btrfs_lookup_extent_info(trans, root, 5290 path->nodes[level]->start, 5291 path->nodes[level]->len, 5292 &wc->refs[level], 5293 &wc->flags[level]); 5294 BUG_ON(ret); 5295 BUG_ON(wc->refs[level] == 0); 5296 5297 if (level == root_item->drop_level) 5298 break; 5299 5300 btrfs_tree_unlock(path->nodes[level]); 5301 WARN_ON(wc->refs[level] != 1); 5302 level--; 5303 } 5304 } 5305 5306 wc->level = level; 5307 wc->shared_level = -1; 5308 wc->stage = DROP_REFERENCE; 5309 wc->update_ref = update_ref; 5310 wc->keep_locks = 0; 5311 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 5312 5313 while (1) { 5314 ret = walk_down_tree(trans, root, path, wc); 5315 if (ret < 0) { 5316 err = ret; 5317 break; 5318 } 5319 5320 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 5321 if (ret < 0) { 5322 err = ret; 5323 break; 5324 } 5325 5326 if (ret > 0) { 5327 BUG_ON(wc->stage != DROP_REFERENCE); 5328 break; 5329 } 5330 5331 if (wc->stage == DROP_REFERENCE) { 5332 level = wc->level; 5333 btrfs_node_key(path->nodes[level], 5334 &root_item->drop_progress, 5335 path->slots[level]); 5336 root_item->drop_level = level; 5337 } 5338 5339 BUG_ON(wc->level == 0); 5340 if (trans->transaction->in_commit || 5341 trans->transaction->delayed_refs.flushing) { 5342 ret = btrfs_update_root(trans, tree_root, 5343 &root->root_key, 5344 root_item); 5345 BUG_ON(ret); 5346 5347 btrfs_end_transaction(trans, tree_root); 5348 trans = btrfs_start_transaction(tree_root, 1); 5349 } else { 5350 unsigned long update; 5351 update = trans->delayed_ref_updates; 5352 trans->delayed_ref_updates = 0; 5353 if (update) 5354 btrfs_run_delayed_refs(trans, tree_root, 5355 update); 5356 } 5357 } 5358 btrfs_release_path(root, path); 5359 BUG_ON(err); 5360 5361 ret = btrfs_del_root(trans, tree_root, &root->root_key); 5362 BUG_ON(ret); 5363 5364 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 5365 ret = btrfs_find_last_root(tree_root, root->root_key.objectid, 5366 NULL, NULL); 5367 BUG_ON(ret < 0); 5368 if (ret > 0) { 5369 ret = btrfs_del_orphan_item(trans, tree_root, 5370 root->root_key.objectid); 5371 BUG_ON(ret); 5372 } 5373 } 5374 5375 if (root->in_radix) { 5376 btrfs_free_fs_root(tree_root->fs_info, root); 5377 } else { 5378 free_extent_buffer(root->node); 5379 free_extent_buffer(root->commit_root); 5380 kfree(root); 5381 } 5382 out: 5383 btrfs_end_transaction(trans, tree_root); 5384 kfree(wc); 5385 btrfs_free_path(path); 5386 return err; 5387 } 5388 5389 /* 5390 * drop subtree rooted at tree block 'node'. 5391 * 5392 * NOTE: this function will unlock and release tree block 'node' 5393 */ 5394 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 5395 struct btrfs_root *root, 5396 struct extent_buffer *node, 5397 struct extent_buffer *parent) 5398 { 5399 struct btrfs_path *path; 5400 struct walk_control *wc; 5401 int level; 5402 int parent_level; 5403 int ret = 0; 5404 int wret; 5405 5406 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 5407 5408 path = btrfs_alloc_path(); 5409 BUG_ON(!path); 5410 5411 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5412 BUG_ON(!wc); 5413 5414 btrfs_assert_tree_locked(parent); 5415 parent_level = btrfs_header_level(parent); 5416 extent_buffer_get(parent); 5417 path->nodes[parent_level] = parent; 5418 path->slots[parent_level] = btrfs_header_nritems(parent); 5419 5420 btrfs_assert_tree_locked(node); 5421 level = btrfs_header_level(node); 5422 path->nodes[level] = node; 5423 path->slots[level] = 0; 5424 path->locks[level] = 1; 5425 5426 wc->refs[parent_level] = 1; 5427 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5428 wc->level = level; 5429 wc->shared_level = -1; 5430 wc->stage = DROP_REFERENCE; 5431 wc->update_ref = 0; 5432 wc->keep_locks = 1; 5433 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 5434 5435 while (1) { 5436 wret = walk_down_tree(trans, root, path, wc); 5437 if (wret < 0) { 5438 ret = wret; 5439 break; 5440 } 5441 5442 wret = walk_up_tree(trans, root, path, wc, parent_level); 5443 if (wret < 0) 5444 ret = wret; 5445 if (wret != 0) 5446 break; 5447 } 5448 5449 kfree(wc); 5450 btrfs_free_path(path); 5451 return ret; 5452 } 5453 5454 #if 0 5455 static unsigned long calc_ra(unsigned long start, unsigned long last, 5456 unsigned long nr) 5457 { 5458 return min(last, start + nr - 1); 5459 } 5460 5461 static noinline int relocate_inode_pages(struct inode *inode, u64 start, 5462 u64 len) 5463 { 5464 u64 page_start; 5465 u64 page_end; 5466 unsigned long first_index; 5467 unsigned long last_index; 5468 unsigned long i; 5469 struct page *page; 5470 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5471 struct file_ra_state *ra; 5472 struct btrfs_ordered_extent *ordered; 5473 unsigned int total_read = 0; 5474 unsigned int total_dirty = 0; 5475 int ret = 0; 5476 5477 ra = kzalloc(sizeof(*ra), GFP_NOFS); 5478 5479 mutex_lock(&inode->i_mutex); 5480 first_index = start >> PAGE_CACHE_SHIFT; 5481 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 5482 5483 /* make sure the dirty trick played by the caller work */ 5484 ret = invalidate_inode_pages2_range(inode->i_mapping, 5485 first_index, last_index); 5486 if (ret) 5487 goto out_unlock; 5488 5489 file_ra_state_init(ra, inode->i_mapping); 5490 5491 for (i = first_index ; i <= last_index; i++) { 5492 if (total_read % ra->ra_pages == 0) { 5493 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 5494 calc_ra(i, last_index, ra->ra_pages)); 5495 } 5496 total_read++; 5497 again: 5498 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 5499 BUG_ON(1); 5500 page = grab_cache_page(inode->i_mapping, i); 5501 if (!page) { 5502 ret = -ENOMEM; 5503 goto out_unlock; 5504 } 5505 if (!PageUptodate(page)) { 5506 btrfs_readpage(NULL, page); 5507 lock_page(page); 5508 if (!PageUptodate(page)) { 5509 unlock_page(page); 5510 page_cache_release(page); 5511 ret = -EIO; 5512 goto out_unlock; 5513 } 5514 } 5515 wait_on_page_writeback(page); 5516 5517 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 5518 page_end = page_start + PAGE_CACHE_SIZE - 1; 5519 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 5520 5521 ordered = btrfs_lookup_ordered_extent(inode, page_start); 5522 if (ordered) { 5523 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5524 unlock_page(page); 5525 page_cache_release(page); 5526 btrfs_start_ordered_extent(inode, ordered, 1); 5527 btrfs_put_ordered_extent(ordered); 5528 goto again; 5529 } 5530 set_page_extent_mapped(page); 5531 5532 if (i == first_index) 5533 set_extent_bits(io_tree, page_start, page_end, 5534 EXTENT_BOUNDARY, GFP_NOFS); 5535 btrfs_set_extent_delalloc(inode, page_start, page_end); 5536 5537 set_page_dirty(page); 5538 total_dirty++; 5539 5540 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5541 unlock_page(page); 5542 page_cache_release(page); 5543 } 5544 5545 out_unlock: 5546 kfree(ra); 5547 mutex_unlock(&inode->i_mutex); 5548 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 5549 return ret; 5550 } 5551 5552 static noinline int relocate_data_extent(struct inode *reloc_inode, 5553 struct btrfs_key *extent_key, 5554 u64 offset) 5555 { 5556 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 5557 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; 5558 struct extent_map *em; 5559 u64 start = extent_key->objectid - offset; 5560 u64 end = start + extent_key->offset - 1; 5561 5562 em = alloc_extent_map(GFP_NOFS); 5563 BUG_ON(!em || IS_ERR(em)); 5564 5565 em->start = start; 5566 em->len = extent_key->offset; 5567 em->block_len = extent_key->offset; 5568 em->block_start = extent_key->objectid; 5569 em->bdev = root->fs_info->fs_devices->latest_bdev; 5570 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5571 5572 /* setup extent map to cheat btrfs_readpage */ 5573 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5574 while (1) { 5575 int ret; 5576 write_lock(&em_tree->lock); 5577 ret = add_extent_mapping(em_tree, em); 5578 write_unlock(&em_tree->lock); 5579 if (ret != -EEXIST) { 5580 free_extent_map(em); 5581 break; 5582 } 5583 btrfs_drop_extent_cache(reloc_inode, start, end, 0); 5584 } 5585 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5586 5587 return relocate_inode_pages(reloc_inode, start, extent_key->offset); 5588 } 5589 5590 struct btrfs_ref_path { 5591 u64 extent_start; 5592 u64 nodes[BTRFS_MAX_LEVEL]; 5593 u64 root_objectid; 5594 u64 root_generation; 5595 u64 owner_objectid; 5596 u32 num_refs; 5597 int lowest_level; 5598 int current_level; 5599 int shared_level; 5600 5601 struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; 5602 u64 new_nodes[BTRFS_MAX_LEVEL]; 5603 }; 5604 5605 struct disk_extent { 5606 u64 ram_bytes; 5607 u64 disk_bytenr; 5608 u64 disk_num_bytes; 5609 u64 offset; 5610 u64 num_bytes; 5611 u8 compression; 5612 u8 encryption; 5613 u16 other_encoding; 5614 }; 5615 5616 static int is_cowonly_root(u64 root_objectid) 5617 { 5618 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 5619 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 5620 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 5621 root_objectid == BTRFS_DEV_TREE_OBJECTID || 5622 root_objectid == BTRFS_TREE_LOG_OBJECTID || 5623 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 5624 return 1; 5625 return 0; 5626 } 5627 5628 static noinline int __next_ref_path(struct btrfs_trans_handle *trans, 5629 struct btrfs_root *extent_root, 5630 struct btrfs_ref_path *ref_path, 5631 int first_time) 5632 { 5633 struct extent_buffer *leaf; 5634 struct btrfs_path *path; 5635 struct btrfs_extent_ref *ref; 5636 struct btrfs_key key; 5637 struct btrfs_key found_key; 5638 u64 bytenr; 5639 u32 nritems; 5640 int level; 5641 int ret = 1; 5642 5643 path = btrfs_alloc_path(); 5644 if (!path) 5645 return -ENOMEM; 5646 5647 if (first_time) { 5648 ref_path->lowest_level = -1; 5649 ref_path->current_level = -1; 5650 ref_path->shared_level = -1; 5651 goto walk_up; 5652 } 5653 walk_down: 5654 level = ref_path->current_level - 1; 5655 while (level >= -1) { 5656 u64 parent; 5657 if (level < ref_path->lowest_level) 5658 break; 5659 5660 if (level >= 0) 5661 bytenr = ref_path->nodes[level]; 5662 else 5663 bytenr = ref_path->extent_start; 5664 BUG_ON(bytenr == 0); 5665 5666 parent = ref_path->nodes[level + 1]; 5667 ref_path->nodes[level + 1] = 0; 5668 ref_path->current_level = level; 5669 BUG_ON(parent == 0); 5670 5671 key.objectid = bytenr; 5672 key.offset = parent + 1; 5673 key.type = BTRFS_EXTENT_REF_KEY; 5674 5675 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5676 if (ret < 0) 5677 goto out; 5678 BUG_ON(ret == 0); 5679 5680 leaf = path->nodes[0]; 5681 nritems = btrfs_header_nritems(leaf); 5682 if (path->slots[0] >= nritems) { 5683 ret = btrfs_next_leaf(extent_root, path); 5684 if (ret < 0) 5685 goto out; 5686 if (ret > 0) 5687 goto next; 5688 leaf = path->nodes[0]; 5689 } 5690 5691 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5692 if (found_key.objectid == bytenr && 5693 found_key.type == BTRFS_EXTENT_REF_KEY) { 5694 if (level < ref_path->shared_level) 5695 ref_path->shared_level = level; 5696 goto found; 5697 } 5698 next: 5699 level--; 5700 btrfs_release_path(extent_root, path); 5701 cond_resched(); 5702 } 5703 /* reached lowest level */ 5704 ret = 1; 5705 goto out; 5706 walk_up: 5707 level = ref_path->current_level; 5708 while (level < BTRFS_MAX_LEVEL - 1) { 5709 u64 ref_objectid; 5710 5711 if (level >= 0) 5712 bytenr = ref_path->nodes[level]; 5713 else 5714 bytenr = ref_path->extent_start; 5715 5716 BUG_ON(bytenr == 0); 5717 5718 key.objectid = bytenr; 5719 key.offset = 0; 5720 key.type = BTRFS_EXTENT_REF_KEY; 5721 5722 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5723 if (ret < 0) 5724 goto out; 5725 5726 leaf = path->nodes[0]; 5727 nritems = btrfs_header_nritems(leaf); 5728 if (path->slots[0] >= nritems) { 5729 ret = btrfs_next_leaf(extent_root, path); 5730 if (ret < 0) 5731 goto out; 5732 if (ret > 0) { 5733 /* the extent was freed by someone */ 5734 if (ref_path->lowest_level == level) 5735 goto out; 5736 btrfs_release_path(extent_root, path); 5737 goto walk_down; 5738 } 5739 leaf = path->nodes[0]; 5740 } 5741 5742 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5743 if (found_key.objectid != bytenr || 5744 found_key.type != BTRFS_EXTENT_REF_KEY) { 5745 /* the extent was freed by someone */ 5746 if (ref_path->lowest_level == level) { 5747 ret = 1; 5748 goto out; 5749 } 5750 btrfs_release_path(extent_root, path); 5751 goto walk_down; 5752 } 5753 found: 5754 ref = btrfs_item_ptr(leaf, path->slots[0], 5755 struct btrfs_extent_ref); 5756 ref_objectid = btrfs_ref_objectid(leaf, ref); 5757 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { 5758 if (first_time) { 5759 level = (int)ref_objectid; 5760 BUG_ON(level >= BTRFS_MAX_LEVEL); 5761 ref_path->lowest_level = level; 5762 ref_path->current_level = level; 5763 ref_path->nodes[level] = bytenr; 5764 } else { 5765 WARN_ON(ref_objectid != level); 5766 } 5767 } else { 5768 WARN_ON(level != -1); 5769 } 5770 first_time = 0; 5771 5772 if (ref_path->lowest_level == level) { 5773 ref_path->owner_objectid = ref_objectid; 5774 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); 5775 } 5776 5777 /* 5778 * the block is tree root or the block isn't in reference 5779 * counted tree. 5780 */ 5781 if (found_key.objectid == found_key.offset || 5782 is_cowonly_root(btrfs_ref_root(leaf, ref))) { 5783 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 5784 ref_path->root_generation = 5785 btrfs_ref_generation(leaf, ref); 5786 if (level < 0) { 5787 /* special reference from the tree log */ 5788 ref_path->nodes[0] = found_key.offset; 5789 ref_path->current_level = 0; 5790 } 5791 ret = 0; 5792 goto out; 5793 } 5794 5795 level++; 5796 BUG_ON(ref_path->nodes[level] != 0); 5797 ref_path->nodes[level] = found_key.offset; 5798 ref_path->current_level = level; 5799 5800 /* 5801 * the reference was created in the running transaction, 5802 * no need to continue walking up. 5803 */ 5804 if (btrfs_ref_generation(leaf, ref) == trans->transid) { 5805 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 5806 ref_path->root_generation = 5807 btrfs_ref_generation(leaf, ref); 5808 ret = 0; 5809 goto out; 5810 } 5811 5812 btrfs_release_path(extent_root, path); 5813 cond_resched(); 5814 } 5815 /* reached max tree level, but no tree root found. */ 5816 BUG(); 5817 out: 5818 btrfs_free_path(path); 5819 return ret; 5820 } 5821 5822 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, 5823 struct btrfs_root *extent_root, 5824 struct btrfs_ref_path *ref_path, 5825 u64 extent_start) 5826 { 5827 memset(ref_path, 0, sizeof(*ref_path)); 5828 ref_path->extent_start = extent_start; 5829 5830 return __next_ref_path(trans, extent_root, ref_path, 1); 5831 } 5832 5833 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, 5834 struct btrfs_root *extent_root, 5835 struct btrfs_ref_path *ref_path) 5836 { 5837 return __next_ref_path(trans, extent_root, ref_path, 0); 5838 } 5839 5840 static noinline int get_new_locations(struct inode *reloc_inode, 5841 struct btrfs_key *extent_key, 5842 u64 offset, int no_fragment, 5843 struct disk_extent **extents, 5844 int *nr_extents) 5845 { 5846 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 5847 struct btrfs_path *path; 5848 struct btrfs_file_extent_item *fi; 5849 struct extent_buffer *leaf; 5850 struct disk_extent *exts = *extents; 5851 struct btrfs_key found_key; 5852 u64 cur_pos; 5853 u64 last_byte; 5854 u32 nritems; 5855 int nr = 0; 5856 int max = *nr_extents; 5857 int ret; 5858 5859 WARN_ON(!no_fragment && *extents); 5860 if (!exts) { 5861 max = 1; 5862 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); 5863 if (!exts) 5864 return -ENOMEM; 5865 } 5866 5867 path = btrfs_alloc_path(); 5868 BUG_ON(!path); 5869 5870 cur_pos = extent_key->objectid - offset; 5871 last_byte = extent_key->objectid + extent_key->offset; 5872 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 5873 cur_pos, 0); 5874 if (ret < 0) 5875 goto out; 5876 if (ret > 0) { 5877 ret = -ENOENT; 5878 goto out; 5879 } 5880 5881 while (1) { 5882 leaf = path->nodes[0]; 5883 nritems = btrfs_header_nritems(leaf); 5884 if (path->slots[0] >= nritems) { 5885 ret = btrfs_next_leaf(root, path); 5886 if (ret < 0) 5887 goto out; 5888 if (ret > 0) 5889 break; 5890 leaf = path->nodes[0]; 5891 } 5892 5893 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5894 if (found_key.offset != cur_pos || 5895 found_key.type != BTRFS_EXTENT_DATA_KEY || 5896 found_key.objectid != reloc_inode->i_ino) 5897 break; 5898 5899 fi = btrfs_item_ptr(leaf, path->slots[0], 5900 struct btrfs_file_extent_item); 5901 if (btrfs_file_extent_type(leaf, fi) != 5902 BTRFS_FILE_EXTENT_REG || 5903 btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 5904 break; 5905 5906 if (nr == max) { 5907 struct disk_extent *old = exts; 5908 max *= 2; 5909 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 5910 memcpy(exts, old, sizeof(*exts) * nr); 5911 if (old != *extents) 5912 kfree(old); 5913 } 5914 5915 exts[nr].disk_bytenr = 5916 btrfs_file_extent_disk_bytenr(leaf, fi); 5917 exts[nr].disk_num_bytes = 5918 btrfs_file_extent_disk_num_bytes(leaf, fi); 5919 exts[nr].offset = btrfs_file_extent_offset(leaf, fi); 5920 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5921 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 5922 exts[nr].compression = btrfs_file_extent_compression(leaf, fi); 5923 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); 5924 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, 5925 fi); 5926 BUG_ON(exts[nr].offset > 0); 5927 BUG_ON(exts[nr].compression || exts[nr].encryption); 5928 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); 5929 5930 cur_pos += exts[nr].num_bytes; 5931 nr++; 5932 5933 if (cur_pos + offset >= last_byte) 5934 break; 5935 5936 if (no_fragment) { 5937 ret = 1; 5938 goto out; 5939 } 5940 path->slots[0]++; 5941 } 5942 5943 BUG_ON(cur_pos + offset > last_byte); 5944 if (cur_pos + offset < last_byte) { 5945 ret = -ENOENT; 5946 goto out; 5947 } 5948 ret = 0; 5949 out: 5950 btrfs_free_path(path); 5951 if (ret) { 5952 if (exts != *extents) 5953 kfree(exts); 5954 } else { 5955 *extents = exts; 5956 *nr_extents = nr; 5957 } 5958 return ret; 5959 } 5960 5961 static noinline int replace_one_extent(struct btrfs_trans_handle *trans, 5962 struct btrfs_root *root, 5963 struct btrfs_path *path, 5964 struct btrfs_key *extent_key, 5965 struct btrfs_key *leaf_key, 5966 struct btrfs_ref_path *ref_path, 5967 struct disk_extent *new_extents, 5968 int nr_extents) 5969 { 5970 struct extent_buffer *leaf; 5971 struct btrfs_file_extent_item *fi; 5972 struct inode *inode = NULL; 5973 struct btrfs_key key; 5974 u64 lock_start = 0; 5975 u64 lock_end = 0; 5976 u64 num_bytes; 5977 u64 ext_offset; 5978 u64 search_end = (u64)-1; 5979 u32 nritems; 5980 int nr_scaned = 0; 5981 int extent_locked = 0; 5982 int extent_type; 5983 int ret; 5984 5985 memcpy(&key, leaf_key, sizeof(key)); 5986 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 5987 if (key.objectid < ref_path->owner_objectid || 5988 (key.objectid == ref_path->owner_objectid && 5989 key.type < BTRFS_EXTENT_DATA_KEY)) { 5990 key.objectid = ref_path->owner_objectid; 5991 key.type = BTRFS_EXTENT_DATA_KEY; 5992 key.offset = 0; 5993 } 5994 } 5995 5996 while (1) { 5997 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 5998 if (ret < 0) 5999 goto out; 6000 6001 leaf = path->nodes[0]; 6002 nritems = btrfs_header_nritems(leaf); 6003 next: 6004 if (extent_locked && ret > 0) { 6005 /* 6006 * the file extent item was modified by someone 6007 * before the extent got locked. 6008 */ 6009 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6010 lock_end, GFP_NOFS); 6011 extent_locked = 0; 6012 } 6013 6014 if (path->slots[0] >= nritems) { 6015 if (++nr_scaned > 2) 6016 break; 6017 6018 BUG_ON(extent_locked); 6019 ret = btrfs_next_leaf(root, path); 6020 if (ret < 0) 6021 goto out; 6022 if (ret > 0) 6023 break; 6024 leaf = path->nodes[0]; 6025 nritems = btrfs_header_nritems(leaf); 6026 } 6027 6028 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 6029 6030 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 6031 if ((key.objectid > ref_path->owner_objectid) || 6032 (key.objectid == ref_path->owner_objectid && 6033 key.type > BTRFS_EXTENT_DATA_KEY) || 6034 key.offset >= search_end) 6035 break; 6036 } 6037 6038 if (inode && key.objectid != inode->i_ino) { 6039 BUG_ON(extent_locked); 6040 btrfs_release_path(root, path); 6041 mutex_unlock(&inode->i_mutex); 6042 iput(inode); 6043 inode = NULL; 6044 continue; 6045 } 6046 6047 if (key.type != BTRFS_EXTENT_DATA_KEY) { 6048 path->slots[0]++; 6049 ret = 1; 6050 goto next; 6051 } 6052 fi = btrfs_item_ptr(leaf, path->slots[0], 6053 struct btrfs_file_extent_item); 6054 extent_type = btrfs_file_extent_type(leaf, fi); 6055 if ((extent_type != BTRFS_FILE_EXTENT_REG && 6056 extent_type != BTRFS_FILE_EXTENT_PREALLOC) || 6057 (btrfs_file_extent_disk_bytenr(leaf, fi) != 6058 extent_key->objectid)) { 6059 path->slots[0]++; 6060 ret = 1; 6061 goto next; 6062 } 6063 6064 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6065 ext_offset = btrfs_file_extent_offset(leaf, fi); 6066 6067 if (search_end == (u64)-1) { 6068 search_end = key.offset - ext_offset + 6069 btrfs_file_extent_ram_bytes(leaf, fi); 6070 } 6071 6072 if (!extent_locked) { 6073 lock_start = key.offset; 6074 lock_end = lock_start + num_bytes - 1; 6075 } else { 6076 if (lock_start > key.offset || 6077 lock_end + 1 < key.offset + num_bytes) { 6078 unlock_extent(&BTRFS_I(inode)->io_tree, 6079 lock_start, lock_end, GFP_NOFS); 6080 extent_locked = 0; 6081 } 6082 } 6083 6084 if (!inode) { 6085 btrfs_release_path(root, path); 6086 6087 inode = btrfs_iget_locked(root->fs_info->sb, 6088 key.objectid, root); 6089 if (inode->i_state & I_NEW) { 6090 BTRFS_I(inode)->root = root; 6091 BTRFS_I(inode)->location.objectid = 6092 key.objectid; 6093 BTRFS_I(inode)->location.type = 6094 BTRFS_INODE_ITEM_KEY; 6095 BTRFS_I(inode)->location.offset = 0; 6096 btrfs_read_locked_inode(inode); 6097 unlock_new_inode(inode); 6098 } 6099 /* 6100 * some code call btrfs_commit_transaction while 6101 * holding the i_mutex, so we can't use mutex_lock 6102 * here. 6103 */ 6104 if (is_bad_inode(inode) || 6105 !mutex_trylock(&inode->i_mutex)) { 6106 iput(inode); 6107 inode = NULL; 6108 key.offset = (u64)-1; 6109 goto skip; 6110 } 6111 } 6112 6113 if (!extent_locked) { 6114 struct btrfs_ordered_extent *ordered; 6115 6116 btrfs_release_path(root, path); 6117 6118 lock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6119 lock_end, GFP_NOFS); 6120 ordered = btrfs_lookup_first_ordered_extent(inode, 6121 lock_end); 6122 if (ordered && 6123 ordered->file_offset <= lock_end && 6124 ordered->file_offset + ordered->len > lock_start) { 6125 unlock_extent(&BTRFS_I(inode)->io_tree, 6126 lock_start, lock_end, GFP_NOFS); 6127 btrfs_start_ordered_extent(inode, ordered, 1); 6128 btrfs_put_ordered_extent(ordered); 6129 key.offset += num_bytes; 6130 goto skip; 6131 } 6132 if (ordered) 6133 btrfs_put_ordered_extent(ordered); 6134 6135 extent_locked = 1; 6136 continue; 6137 } 6138 6139 if (nr_extents == 1) { 6140 /* update extent pointer in place */ 6141 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6142 new_extents[0].disk_bytenr); 6143 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6144 new_extents[0].disk_num_bytes); 6145 btrfs_mark_buffer_dirty(leaf); 6146 6147 btrfs_drop_extent_cache(inode, key.offset, 6148 key.offset + num_bytes - 1, 0); 6149 6150 ret = btrfs_inc_extent_ref(trans, root, 6151 new_extents[0].disk_bytenr, 6152 new_extents[0].disk_num_bytes, 6153 leaf->start, 6154 root->root_key.objectid, 6155 trans->transid, 6156 key.objectid); 6157 BUG_ON(ret); 6158 6159 ret = btrfs_free_extent(trans, root, 6160 extent_key->objectid, 6161 extent_key->offset, 6162 leaf->start, 6163 btrfs_header_owner(leaf), 6164 btrfs_header_generation(leaf), 6165 key.objectid, 0); 6166 BUG_ON(ret); 6167 6168 btrfs_release_path(root, path); 6169 key.offset += num_bytes; 6170 } else { 6171 BUG_ON(1); 6172 #if 0 6173 u64 alloc_hint; 6174 u64 extent_len; 6175 int i; 6176 /* 6177 * drop old extent pointer at first, then insert the 6178 * new pointers one bye one 6179 */ 6180 btrfs_release_path(root, path); 6181 ret = btrfs_drop_extents(trans, root, inode, key.offset, 6182 key.offset + num_bytes, 6183 key.offset, &alloc_hint); 6184 BUG_ON(ret); 6185 6186 for (i = 0; i < nr_extents; i++) { 6187 if (ext_offset >= new_extents[i].num_bytes) { 6188 ext_offset -= new_extents[i].num_bytes; 6189 continue; 6190 } 6191 extent_len = min(new_extents[i].num_bytes - 6192 ext_offset, num_bytes); 6193 6194 ret = btrfs_insert_empty_item(trans, root, 6195 path, &key, 6196 sizeof(*fi)); 6197 BUG_ON(ret); 6198 6199 leaf = path->nodes[0]; 6200 fi = btrfs_item_ptr(leaf, path->slots[0], 6201 struct btrfs_file_extent_item); 6202 btrfs_set_file_extent_generation(leaf, fi, 6203 trans->transid); 6204 btrfs_set_file_extent_type(leaf, fi, 6205 BTRFS_FILE_EXTENT_REG); 6206 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6207 new_extents[i].disk_bytenr); 6208 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6209 new_extents[i].disk_num_bytes); 6210 btrfs_set_file_extent_ram_bytes(leaf, fi, 6211 new_extents[i].ram_bytes); 6212 6213 btrfs_set_file_extent_compression(leaf, fi, 6214 new_extents[i].compression); 6215 btrfs_set_file_extent_encryption(leaf, fi, 6216 new_extents[i].encryption); 6217 btrfs_set_file_extent_other_encoding(leaf, fi, 6218 new_extents[i].other_encoding); 6219 6220 btrfs_set_file_extent_num_bytes(leaf, fi, 6221 extent_len); 6222 ext_offset += new_extents[i].offset; 6223 btrfs_set_file_extent_offset(leaf, fi, 6224 ext_offset); 6225 btrfs_mark_buffer_dirty(leaf); 6226 6227 btrfs_drop_extent_cache(inode, key.offset, 6228 key.offset + extent_len - 1, 0); 6229 6230 ret = btrfs_inc_extent_ref(trans, root, 6231 new_extents[i].disk_bytenr, 6232 new_extents[i].disk_num_bytes, 6233 leaf->start, 6234 root->root_key.objectid, 6235 trans->transid, key.objectid); 6236 BUG_ON(ret); 6237 btrfs_release_path(root, path); 6238 6239 inode_add_bytes(inode, extent_len); 6240 6241 ext_offset = 0; 6242 num_bytes -= extent_len; 6243 key.offset += extent_len; 6244 6245 if (num_bytes == 0) 6246 break; 6247 } 6248 BUG_ON(i >= nr_extents); 6249 #endif 6250 } 6251 6252 if (extent_locked) { 6253 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6254 lock_end, GFP_NOFS); 6255 extent_locked = 0; 6256 } 6257 skip: 6258 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 6259 key.offset >= search_end) 6260 break; 6261 6262 cond_resched(); 6263 } 6264 ret = 0; 6265 out: 6266 btrfs_release_path(root, path); 6267 if (inode) { 6268 mutex_unlock(&inode->i_mutex); 6269 if (extent_locked) { 6270 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6271 lock_end, GFP_NOFS); 6272 } 6273 iput(inode); 6274 } 6275 return ret; 6276 } 6277 6278 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 6279 struct btrfs_root *root, 6280 struct extent_buffer *buf, u64 orig_start) 6281 { 6282 int level; 6283 int ret; 6284 6285 BUG_ON(btrfs_header_generation(buf) != trans->transid); 6286 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6287 6288 level = btrfs_header_level(buf); 6289 if (level == 0) { 6290 struct btrfs_leaf_ref *ref; 6291 struct btrfs_leaf_ref *orig_ref; 6292 6293 orig_ref = btrfs_lookup_leaf_ref(root, orig_start); 6294 if (!orig_ref) 6295 return -ENOENT; 6296 6297 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); 6298 if (!ref) { 6299 btrfs_free_leaf_ref(root, orig_ref); 6300 return -ENOMEM; 6301 } 6302 6303 ref->nritems = orig_ref->nritems; 6304 memcpy(ref->extents, orig_ref->extents, 6305 sizeof(ref->extents[0]) * ref->nritems); 6306 6307 btrfs_free_leaf_ref(root, orig_ref); 6308 6309 ref->root_gen = trans->transid; 6310 ref->bytenr = buf->start; 6311 ref->owner = btrfs_header_owner(buf); 6312 ref->generation = btrfs_header_generation(buf); 6313 6314 ret = btrfs_add_leaf_ref(root, ref, 0); 6315 WARN_ON(ret); 6316 btrfs_free_leaf_ref(root, ref); 6317 } 6318 return 0; 6319 } 6320 6321 static noinline int invalidate_extent_cache(struct btrfs_root *root, 6322 struct extent_buffer *leaf, 6323 struct btrfs_block_group_cache *group, 6324 struct btrfs_root *target_root) 6325 { 6326 struct btrfs_key key; 6327 struct inode *inode = NULL; 6328 struct btrfs_file_extent_item *fi; 6329 u64 num_bytes; 6330 u64 skip_objectid = 0; 6331 u32 nritems; 6332 u32 i; 6333 6334 nritems = btrfs_header_nritems(leaf); 6335 for (i = 0; i < nritems; i++) { 6336 btrfs_item_key_to_cpu(leaf, &key, i); 6337 if (key.objectid == skip_objectid || 6338 key.type != BTRFS_EXTENT_DATA_KEY) 6339 continue; 6340 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 6341 if (btrfs_file_extent_type(leaf, fi) == 6342 BTRFS_FILE_EXTENT_INLINE) 6343 continue; 6344 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 6345 continue; 6346 if (!inode || inode->i_ino != key.objectid) { 6347 iput(inode); 6348 inode = btrfs_ilookup(target_root->fs_info->sb, 6349 key.objectid, target_root, 1); 6350 } 6351 if (!inode) { 6352 skip_objectid = key.objectid; 6353 continue; 6354 } 6355 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6356 6357 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6358 key.offset + num_bytes - 1, GFP_NOFS); 6359 btrfs_drop_extent_cache(inode, key.offset, 6360 key.offset + num_bytes - 1, 1); 6361 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6362 key.offset + num_bytes - 1, GFP_NOFS); 6363 cond_resched(); 6364 } 6365 iput(inode); 6366 return 0; 6367 } 6368 6369 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, 6370 struct btrfs_root *root, 6371 struct extent_buffer *leaf, 6372 struct btrfs_block_group_cache *group, 6373 struct inode *reloc_inode) 6374 { 6375 struct btrfs_key key; 6376 struct btrfs_key extent_key; 6377 struct btrfs_file_extent_item *fi; 6378 struct btrfs_leaf_ref *ref; 6379 struct disk_extent *new_extent; 6380 u64 bytenr; 6381 u64 num_bytes; 6382 u32 nritems; 6383 u32 i; 6384 int ext_index; 6385 int nr_extent; 6386 int ret; 6387 6388 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 6389 BUG_ON(!new_extent); 6390 6391 ref = btrfs_lookup_leaf_ref(root, leaf->start); 6392 BUG_ON(!ref); 6393 6394 ext_index = -1; 6395 nritems = btrfs_header_nritems(leaf); 6396 for (i = 0; i < nritems; i++) { 6397 btrfs_item_key_to_cpu(leaf, &key, i); 6398 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 6399 continue; 6400 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 6401 if (btrfs_file_extent_type(leaf, fi) == 6402 BTRFS_FILE_EXTENT_INLINE) 6403 continue; 6404 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6405 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 6406 if (bytenr == 0) 6407 continue; 6408 6409 ext_index++; 6410 if (bytenr >= group->key.objectid + group->key.offset || 6411 bytenr + num_bytes <= group->key.objectid) 6412 continue; 6413 6414 extent_key.objectid = bytenr; 6415 extent_key.offset = num_bytes; 6416 extent_key.type = BTRFS_EXTENT_ITEM_KEY; 6417 nr_extent = 1; 6418 ret = get_new_locations(reloc_inode, &extent_key, 6419 group->key.objectid, 1, 6420 &new_extent, &nr_extent); 6421 if (ret > 0) 6422 continue; 6423 BUG_ON(ret < 0); 6424 6425 BUG_ON(ref->extents[ext_index].bytenr != bytenr); 6426 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); 6427 ref->extents[ext_index].bytenr = new_extent->disk_bytenr; 6428 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; 6429 6430 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6431 new_extent->disk_bytenr); 6432 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6433 new_extent->disk_num_bytes); 6434 btrfs_mark_buffer_dirty(leaf); 6435 6436 ret = btrfs_inc_extent_ref(trans, root, 6437 new_extent->disk_bytenr, 6438 new_extent->disk_num_bytes, 6439 leaf->start, 6440 root->root_key.objectid, 6441 trans->transid, key.objectid); 6442 BUG_ON(ret); 6443 6444 ret = btrfs_free_extent(trans, root, 6445 bytenr, num_bytes, leaf->start, 6446 btrfs_header_owner(leaf), 6447 btrfs_header_generation(leaf), 6448 key.objectid, 0); 6449 BUG_ON(ret); 6450 cond_resched(); 6451 } 6452 kfree(new_extent); 6453 BUG_ON(ext_index + 1 != ref->nritems); 6454 btrfs_free_leaf_ref(root, ref); 6455 return 0; 6456 } 6457 6458 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 6459 struct btrfs_root *root) 6460 { 6461 struct btrfs_root *reloc_root; 6462 int ret; 6463 6464 if (root->reloc_root) { 6465 reloc_root = root->reloc_root; 6466 root->reloc_root = NULL; 6467 list_add(&reloc_root->dead_list, 6468 &root->fs_info->dead_reloc_roots); 6469 6470 btrfs_set_root_bytenr(&reloc_root->root_item, 6471 reloc_root->node->start); 6472 btrfs_set_root_level(&root->root_item, 6473 btrfs_header_level(reloc_root->node)); 6474 memset(&reloc_root->root_item.drop_progress, 0, 6475 sizeof(struct btrfs_disk_key)); 6476 reloc_root->root_item.drop_level = 0; 6477 6478 ret = btrfs_update_root(trans, root->fs_info->tree_root, 6479 &reloc_root->root_key, 6480 &reloc_root->root_item); 6481 BUG_ON(ret); 6482 } 6483 return 0; 6484 } 6485 6486 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) 6487 { 6488 struct btrfs_trans_handle *trans; 6489 struct btrfs_root *reloc_root; 6490 struct btrfs_root *prev_root = NULL; 6491 struct list_head dead_roots; 6492 int ret; 6493 unsigned long nr; 6494 6495 INIT_LIST_HEAD(&dead_roots); 6496 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); 6497 6498 while (!list_empty(&dead_roots)) { 6499 reloc_root = list_entry(dead_roots.prev, 6500 struct btrfs_root, dead_list); 6501 list_del_init(&reloc_root->dead_list); 6502 6503 BUG_ON(reloc_root->commit_root != NULL); 6504 while (1) { 6505 trans = btrfs_join_transaction(root, 1); 6506 BUG_ON(!trans); 6507 6508 mutex_lock(&root->fs_info->drop_mutex); 6509 ret = btrfs_drop_snapshot(trans, reloc_root); 6510 if (ret != -EAGAIN) 6511 break; 6512 mutex_unlock(&root->fs_info->drop_mutex); 6513 6514 nr = trans->blocks_used; 6515 ret = btrfs_end_transaction(trans, root); 6516 BUG_ON(ret); 6517 btrfs_btree_balance_dirty(root, nr); 6518 } 6519 6520 free_extent_buffer(reloc_root->node); 6521 6522 ret = btrfs_del_root(trans, root->fs_info->tree_root, 6523 &reloc_root->root_key); 6524 BUG_ON(ret); 6525 mutex_unlock(&root->fs_info->drop_mutex); 6526 6527 nr = trans->blocks_used; 6528 ret = btrfs_end_transaction(trans, root); 6529 BUG_ON(ret); 6530 btrfs_btree_balance_dirty(root, nr); 6531 6532 kfree(prev_root); 6533 prev_root = reloc_root; 6534 } 6535 if (prev_root) { 6536 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); 6537 kfree(prev_root); 6538 } 6539 return 0; 6540 } 6541 6542 int btrfs_add_dead_reloc_root(struct btrfs_root *root) 6543 { 6544 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); 6545 return 0; 6546 } 6547 6548 int btrfs_cleanup_reloc_trees(struct btrfs_root *root) 6549 { 6550 struct btrfs_root *reloc_root; 6551 struct btrfs_trans_handle *trans; 6552 struct btrfs_key location; 6553 int found; 6554 int ret; 6555 6556 mutex_lock(&root->fs_info->tree_reloc_mutex); 6557 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); 6558 BUG_ON(ret); 6559 found = !list_empty(&root->fs_info->dead_reloc_roots); 6560 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6561 6562 if (found) { 6563 trans = btrfs_start_transaction(root, 1); 6564 BUG_ON(!trans); 6565 ret = btrfs_commit_transaction(trans, root); 6566 BUG_ON(ret); 6567 } 6568 6569 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6570 location.offset = (u64)-1; 6571 location.type = BTRFS_ROOT_ITEM_KEY; 6572 6573 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 6574 BUG_ON(!reloc_root); 6575 btrfs_orphan_cleanup(reloc_root); 6576 return 0; 6577 } 6578 6579 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, 6580 struct btrfs_root *root) 6581 { 6582 struct btrfs_root *reloc_root; 6583 struct extent_buffer *eb; 6584 struct btrfs_root_item *root_item; 6585 struct btrfs_key root_key; 6586 int ret; 6587 6588 BUG_ON(!root->ref_cows); 6589 if (root->reloc_root) 6590 return 0; 6591 6592 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 6593 BUG_ON(!root_item); 6594 6595 ret = btrfs_copy_root(trans, root, root->commit_root, 6596 &eb, BTRFS_TREE_RELOC_OBJECTID); 6597 BUG_ON(ret); 6598 6599 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 6600 root_key.offset = root->root_key.objectid; 6601 root_key.type = BTRFS_ROOT_ITEM_KEY; 6602 6603 memcpy(root_item, &root->root_item, sizeof(root_item)); 6604 btrfs_set_root_refs(root_item, 0); 6605 btrfs_set_root_bytenr(root_item, eb->start); 6606 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 6607 btrfs_set_root_generation(root_item, trans->transid); 6608 6609 btrfs_tree_unlock(eb); 6610 free_extent_buffer(eb); 6611 6612 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 6613 &root_key, root_item); 6614 BUG_ON(ret); 6615 kfree(root_item); 6616 6617 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 6618 &root_key); 6619 BUG_ON(!reloc_root); 6620 reloc_root->last_trans = trans->transid; 6621 reloc_root->commit_root = NULL; 6622 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 6623 6624 root->reloc_root = reloc_root; 6625 return 0; 6626 } 6627 6628 /* 6629 * Core function of space balance. 6630 * 6631 * The idea is using reloc trees to relocate tree blocks in reference 6632 * counted roots. There is one reloc tree for each subvol, and all 6633 * reloc trees share same root key objectid. Reloc trees are snapshots 6634 * of the latest committed roots of subvols (root->commit_root). 6635 * 6636 * To relocate a tree block referenced by a subvol, there are two steps. 6637 * COW the block through subvol's reloc tree, then update block pointer 6638 * in the subvol to point to the new block. Since all reloc trees share 6639 * same root key objectid, doing special handing for tree blocks owned 6640 * by them is easy. Once a tree block has been COWed in one reloc tree, 6641 * we can use the resulting new block directly when the same block is 6642 * required to COW again through other reloc trees. By this way, relocated 6643 * tree blocks are shared between reloc trees, so they are also shared 6644 * between subvols. 6645 */ 6646 static noinline int relocate_one_path(struct btrfs_trans_handle *trans, 6647 struct btrfs_root *root, 6648 struct btrfs_path *path, 6649 struct btrfs_key *first_key, 6650 struct btrfs_ref_path *ref_path, 6651 struct btrfs_block_group_cache *group, 6652 struct inode *reloc_inode) 6653 { 6654 struct btrfs_root *reloc_root; 6655 struct extent_buffer *eb = NULL; 6656 struct btrfs_key *keys; 6657 u64 *nodes; 6658 int level; 6659 int shared_level; 6660 int lowest_level = 0; 6661 int ret; 6662 6663 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 6664 lowest_level = ref_path->owner_objectid; 6665 6666 if (!root->ref_cows) { 6667 path->lowest_level = lowest_level; 6668 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); 6669 BUG_ON(ret < 0); 6670 path->lowest_level = 0; 6671 btrfs_release_path(root, path); 6672 return 0; 6673 } 6674 6675 mutex_lock(&root->fs_info->tree_reloc_mutex); 6676 ret = init_reloc_tree(trans, root); 6677 BUG_ON(ret); 6678 reloc_root = root->reloc_root; 6679 6680 shared_level = ref_path->shared_level; 6681 ref_path->shared_level = BTRFS_MAX_LEVEL - 1; 6682 6683 keys = ref_path->node_keys; 6684 nodes = ref_path->new_nodes; 6685 memset(&keys[shared_level + 1], 0, 6686 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6687 memset(&nodes[shared_level + 1], 0, 6688 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6689 6690 if (nodes[lowest_level] == 0) { 6691 path->lowest_level = lowest_level; 6692 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6693 0, 1); 6694 BUG_ON(ret); 6695 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { 6696 eb = path->nodes[level]; 6697 if (!eb || eb == reloc_root->node) 6698 break; 6699 nodes[level] = eb->start; 6700 if (level == 0) 6701 btrfs_item_key_to_cpu(eb, &keys[level], 0); 6702 else 6703 btrfs_node_key_to_cpu(eb, &keys[level], 0); 6704 } 6705 if (nodes[0] && 6706 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6707 eb = path->nodes[0]; 6708 ret = replace_extents_in_leaf(trans, reloc_root, eb, 6709 group, reloc_inode); 6710 BUG_ON(ret); 6711 } 6712 btrfs_release_path(reloc_root, path); 6713 } else { 6714 ret = btrfs_merge_path(trans, reloc_root, keys, nodes, 6715 lowest_level); 6716 BUG_ON(ret); 6717 } 6718 6719 /* 6720 * replace tree blocks in the fs tree with tree blocks in 6721 * the reloc tree. 6722 */ 6723 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); 6724 BUG_ON(ret < 0); 6725 6726 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6727 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6728 0, 0); 6729 BUG_ON(ret); 6730 extent_buffer_get(path->nodes[0]); 6731 eb = path->nodes[0]; 6732 btrfs_release_path(reloc_root, path); 6733 ret = invalidate_extent_cache(reloc_root, eb, group, root); 6734 BUG_ON(ret); 6735 free_extent_buffer(eb); 6736 } 6737 6738 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6739 path->lowest_level = 0; 6740 return 0; 6741 } 6742 6743 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, 6744 struct btrfs_root *root, 6745 struct btrfs_path *path, 6746 struct btrfs_key *first_key, 6747 struct btrfs_ref_path *ref_path) 6748 { 6749 int ret; 6750 6751 ret = relocate_one_path(trans, root, path, first_key, 6752 ref_path, NULL, NULL); 6753 BUG_ON(ret); 6754 6755 return 0; 6756 } 6757 6758 static noinline int del_extent_zero(struct btrfs_trans_handle *trans, 6759 struct btrfs_root *extent_root, 6760 struct btrfs_path *path, 6761 struct btrfs_key *extent_key) 6762 { 6763 int ret; 6764 6765 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 6766 if (ret) 6767 goto out; 6768 ret = btrfs_del_item(trans, extent_root, path); 6769 out: 6770 btrfs_release_path(extent_root, path); 6771 return ret; 6772 } 6773 6774 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, 6775 struct btrfs_ref_path *ref_path) 6776 { 6777 struct btrfs_key root_key; 6778 6779 root_key.objectid = ref_path->root_objectid; 6780 root_key.type = BTRFS_ROOT_ITEM_KEY; 6781 if (is_cowonly_root(ref_path->root_objectid)) 6782 root_key.offset = 0; 6783 else 6784 root_key.offset = (u64)-1; 6785 6786 return btrfs_read_fs_root_no_name(fs_info, &root_key); 6787 } 6788 6789 static noinline int relocate_one_extent(struct btrfs_root *extent_root, 6790 struct btrfs_path *path, 6791 struct btrfs_key *extent_key, 6792 struct btrfs_block_group_cache *group, 6793 struct inode *reloc_inode, int pass) 6794 { 6795 struct btrfs_trans_handle *trans; 6796 struct btrfs_root *found_root; 6797 struct btrfs_ref_path *ref_path = NULL; 6798 struct disk_extent *new_extents = NULL; 6799 int nr_extents = 0; 6800 int loops; 6801 int ret; 6802 int level; 6803 struct btrfs_key first_key; 6804 u64 prev_block = 0; 6805 6806 6807 trans = btrfs_start_transaction(extent_root, 1); 6808 BUG_ON(!trans); 6809 6810 if (extent_key->objectid == 0) { 6811 ret = del_extent_zero(trans, extent_root, path, extent_key); 6812 goto out; 6813 } 6814 6815 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); 6816 if (!ref_path) { 6817 ret = -ENOMEM; 6818 goto out; 6819 } 6820 6821 for (loops = 0; ; loops++) { 6822 if (loops == 0) { 6823 ret = btrfs_first_ref_path(trans, extent_root, ref_path, 6824 extent_key->objectid); 6825 } else { 6826 ret = btrfs_next_ref_path(trans, extent_root, ref_path); 6827 } 6828 if (ret < 0) 6829 goto out; 6830 if (ret > 0) 6831 break; 6832 6833 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || 6834 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) 6835 continue; 6836 6837 found_root = read_ref_root(extent_root->fs_info, ref_path); 6838 BUG_ON(!found_root); 6839 /* 6840 * for reference counted tree, only process reference paths 6841 * rooted at the latest committed root. 6842 */ 6843 if (found_root->ref_cows && 6844 ref_path->root_generation != found_root->root_key.offset) 6845 continue; 6846 6847 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6848 if (pass == 0) { 6849 /* 6850 * copy data extents to new locations 6851 */ 6852 u64 group_start = group->key.objectid; 6853 ret = relocate_data_extent(reloc_inode, 6854 extent_key, 6855 group_start); 6856 if (ret < 0) 6857 goto out; 6858 break; 6859 } 6860 level = 0; 6861 } else { 6862 level = ref_path->owner_objectid; 6863 } 6864 6865 if (prev_block != ref_path->nodes[level]) { 6866 struct extent_buffer *eb; 6867 u64 block_start = ref_path->nodes[level]; 6868 u64 block_size = btrfs_level_size(found_root, level); 6869 6870 eb = read_tree_block(found_root, block_start, 6871 block_size, 0); 6872 btrfs_tree_lock(eb); 6873 BUG_ON(level != btrfs_header_level(eb)); 6874 6875 if (level == 0) 6876 btrfs_item_key_to_cpu(eb, &first_key, 0); 6877 else 6878 btrfs_node_key_to_cpu(eb, &first_key, 0); 6879 6880 btrfs_tree_unlock(eb); 6881 free_extent_buffer(eb); 6882 prev_block = block_start; 6883 } 6884 6885 mutex_lock(&extent_root->fs_info->trans_mutex); 6886 btrfs_record_root_in_trans(found_root); 6887 mutex_unlock(&extent_root->fs_info->trans_mutex); 6888 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6889 /* 6890 * try to update data extent references while 6891 * keeping metadata shared between snapshots. 6892 */ 6893 if (pass == 1) { 6894 ret = relocate_one_path(trans, found_root, 6895 path, &first_key, ref_path, 6896 group, reloc_inode); 6897 if (ret < 0) 6898 goto out; 6899 continue; 6900 } 6901 /* 6902 * use fallback method to process the remaining 6903 * references. 6904 */ 6905 if (!new_extents) { 6906 u64 group_start = group->key.objectid; 6907 new_extents = kmalloc(sizeof(*new_extents), 6908 GFP_NOFS); 6909 nr_extents = 1; 6910 ret = get_new_locations(reloc_inode, 6911 extent_key, 6912 group_start, 1, 6913 &new_extents, 6914 &nr_extents); 6915 if (ret) 6916 goto out; 6917 } 6918 ret = replace_one_extent(trans, found_root, 6919 path, extent_key, 6920 &first_key, ref_path, 6921 new_extents, nr_extents); 6922 } else { 6923 ret = relocate_tree_block(trans, found_root, path, 6924 &first_key, ref_path); 6925 } 6926 if (ret < 0) 6927 goto out; 6928 } 6929 ret = 0; 6930 out: 6931 btrfs_end_transaction(trans, extent_root); 6932 kfree(new_extents); 6933 kfree(ref_path); 6934 return ret; 6935 } 6936 #endif 6937 6938 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6939 { 6940 u64 num_devices; 6941 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 6942 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 6943 6944 num_devices = root->fs_info->fs_devices->rw_devices; 6945 if (num_devices == 1) { 6946 stripped |= BTRFS_BLOCK_GROUP_DUP; 6947 stripped = flags & ~stripped; 6948 6949 /* turn raid0 into single device chunks */ 6950 if (flags & BTRFS_BLOCK_GROUP_RAID0) 6951 return stripped; 6952 6953 /* turn mirroring into duplication */ 6954 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 6955 BTRFS_BLOCK_GROUP_RAID10)) 6956 return stripped | BTRFS_BLOCK_GROUP_DUP; 6957 return flags; 6958 } else { 6959 /* they already had raid on here, just return */ 6960 if (flags & stripped) 6961 return flags; 6962 6963 stripped |= BTRFS_BLOCK_GROUP_DUP; 6964 stripped = flags & ~stripped; 6965 6966 /* switch duplicated blocks with raid1 */ 6967 if (flags & BTRFS_BLOCK_GROUP_DUP) 6968 return stripped | BTRFS_BLOCK_GROUP_RAID1; 6969 6970 /* turn single device chunks into raid0 */ 6971 return stripped | BTRFS_BLOCK_GROUP_RAID0; 6972 } 6973 return flags; 6974 } 6975 6976 static int __alloc_chunk_for_shrink(struct btrfs_root *root, 6977 struct btrfs_block_group_cache *shrink_block_group, 6978 int force) 6979 { 6980 struct btrfs_trans_handle *trans; 6981 u64 new_alloc_flags; 6982 u64 calc; 6983 6984 spin_lock(&shrink_block_group->lock); 6985 if (btrfs_block_group_used(&shrink_block_group->item) + 6986 shrink_block_group->reserved > 0) { 6987 spin_unlock(&shrink_block_group->lock); 6988 6989 trans = btrfs_start_transaction(root, 1); 6990 spin_lock(&shrink_block_group->lock); 6991 6992 new_alloc_flags = update_block_group_flags(root, 6993 shrink_block_group->flags); 6994 if (new_alloc_flags != shrink_block_group->flags) { 6995 calc = 6996 btrfs_block_group_used(&shrink_block_group->item); 6997 } else { 6998 calc = shrink_block_group->key.offset; 6999 } 7000 spin_unlock(&shrink_block_group->lock); 7001 7002 do_chunk_alloc(trans, root->fs_info->extent_root, 7003 calc + 2 * 1024 * 1024, new_alloc_flags, force); 7004 7005 btrfs_end_transaction(trans, root); 7006 } else 7007 spin_unlock(&shrink_block_group->lock); 7008 return 0; 7009 } 7010 7011 7012 int btrfs_prepare_block_group_relocation(struct btrfs_root *root, 7013 struct btrfs_block_group_cache *group) 7014 7015 { 7016 __alloc_chunk_for_shrink(root, group, 1); 7017 set_block_group_readonly(group); 7018 return 0; 7019 } 7020 7021 /* 7022 * checks to see if its even possible to relocate this block group. 7023 * 7024 * @return - -1 if it's not a good idea to relocate this block group, 0 if its 7025 * ok to go ahead and try. 7026 */ 7027 int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) 7028 { 7029 struct btrfs_block_group_cache *block_group; 7030 struct btrfs_space_info *space_info; 7031 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 7032 struct btrfs_device *device; 7033 int full = 0; 7034 int ret = 0; 7035 7036 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 7037 7038 /* odd, couldn't find the block group, leave it alone */ 7039 if (!block_group) 7040 return -1; 7041 7042 /* no bytes used, we're good */ 7043 if (!btrfs_block_group_used(&block_group->item)) 7044 goto out; 7045 7046 space_info = block_group->space_info; 7047 spin_lock(&space_info->lock); 7048 7049 full = space_info->full; 7050 7051 /* 7052 * if this is the last block group we have in this space, we can't 7053 * relocate it unless we're able to allocate a new chunk below. 7054 * 7055 * Otherwise, we need to make sure we have room in the space to handle 7056 * all of the extents from this block group. If we can, we're good 7057 */ 7058 if ((space_info->total_bytes != block_group->key.offset) && 7059 (space_info->bytes_used + space_info->bytes_reserved + 7060 space_info->bytes_pinned + space_info->bytes_readonly + 7061 btrfs_block_group_used(&block_group->item) < 7062 space_info->total_bytes)) { 7063 spin_unlock(&space_info->lock); 7064 goto out; 7065 } 7066 spin_unlock(&space_info->lock); 7067 7068 /* 7069 * ok we don't have enough space, but maybe we have free space on our 7070 * devices to allocate new chunks for relocation, so loop through our 7071 * alloc devices and guess if we have enough space. However, if we 7072 * were marked as full, then we know there aren't enough chunks, and we 7073 * can just return. 7074 */ 7075 ret = -1; 7076 if (full) 7077 goto out; 7078 7079 mutex_lock(&root->fs_info->chunk_mutex); 7080 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 7081 u64 min_free = btrfs_block_group_used(&block_group->item); 7082 u64 dev_offset, max_avail; 7083 7084 /* 7085 * check to make sure we can actually find a chunk with enough 7086 * space to fit our block group in. 7087 */ 7088 if (device->total_bytes > device->bytes_used + min_free) { 7089 ret = find_free_dev_extent(NULL, device, min_free, 7090 &dev_offset, &max_avail); 7091 if (!ret) 7092 break; 7093 ret = -1; 7094 } 7095 } 7096 mutex_unlock(&root->fs_info->chunk_mutex); 7097 out: 7098 btrfs_put_block_group(block_group); 7099 return ret; 7100 } 7101 7102 static int find_first_block_group(struct btrfs_root *root, 7103 struct btrfs_path *path, struct btrfs_key *key) 7104 { 7105 int ret = 0; 7106 struct btrfs_key found_key; 7107 struct extent_buffer *leaf; 7108 int slot; 7109 7110 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 7111 if (ret < 0) 7112 goto out; 7113 7114 while (1) { 7115 slot = path->slots[0]; 7116 leaf = path->nodes[0]; 7117 if (slot >= btrfs_header_nritems(leaf)) { 7118 ret = btrfs_next_leaf(root, path); 7119 if (ret == 0) 7120 continue; 7121 if (ret < 0) 7122 goto out; 7123 break; 7124 } 7125 btrfs_item_key_to_cpu(leaf, &found_key, slot); 7126 7127 if (found_key.objectid >= key->objectid && 7128 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 7129 ret = 0; 7130 goto out; 7131 } 7132 path->slots[0]++; 7133 } 7134 ret = -ENOENT; 7135 out: 7136 return ret; 7137 } 7138 7139 int btrfs_free_block_groups(struct btrfs_fs_info *info) 7140 { 7141 struct btrfs_block_group_cache *block_group; 7142 struct btrfs_space_info *space_info; 7143 struct btrfs_caching_control *caching_ctl; 7144 struct rb_node *n; 7145 7146 down_write(&info->extent_commit_sem); 7147 while (!list_empty(&info->caching_block_groups)) { 7148 caching_ctl = list_entry(info->caching_block_groups.next, 7149 struct btrfs_caching_control, list); 7150 list_del(&caching_ctl->list); 7151 put_caching_control(caching_ctl); 7152 } 7153 up_write(&info->extent_commit_sem); 7154 7155 spin_lock(&info->block_group_cache_lock); 7156 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 7157 block_group = rb_entry(n, struct btrfs_block_group_cache, 7158 cache_node); 7159 rb_erase(&block_group->cache_node, 7160 &info->block_group_cache_tree); 7161 spin_unlock(&info->block_group_cache_lock); 7162 7163 down_write(&block_group->space_info->groups_sem); 7164 list_del(&block_group->list); 7165 up_write(&block_group->space_info->groups_sem); 7166 7167 if (block_group->cached == BTRFS_CACHE_STARTED) 7168 wait_block_group_cache_done(block_group); 7169 7170 btrfs_remove_free_space_cache(block_group); 7171 7172 WARN_ON(atomic_read(&block_group->count) != 1); 7173 kfree(block_group); 7174 7175 spin_lock(&info->block_group_cache_lock); 7176 } 7177 spin_unlock(&info->block_group_cache_lock); 7178 7179 /* now that all the block groups are freed, go through and 7180 * free all the space_info structs. This is only called during 7181 * the final stages of unmount, and so we know nobody is 7182 * using them. We call synchronize_rcu() once before we start, 7183 * just to be on the safe side. 7184 */ 7185 synchronize_rcu(); 7186 7187 while(!list_empty(&info->space_info)) { 7188 space_info = list_entry(info->space_info.next, 7189 struct btrfs_space_info, 7190 list); 7191 7192 list_del(&space_info->list); 7193 kfree(space_info); 7194 } 7195 return 0; 7196 } 7197 7198 int btrfs_read_block_groups(struct btrfs_root *root) 7199 { 7200 struct btrfs_path *path; 7201 int ret; 7202 struct btrfs_block_group_cache *cache; 7203 struct btrfs_fs_info *info = root->fs_info; 7204 struct btrfs_space_info *space_info; 7205 struct btrfs_key key; 7206 struct btrfs_key found_key; 7207 struct extent_buffer *leaf; 7208 7209 root = info->extent_root; 7210 key.objectid = 0; 7211 key.offset = 0; 7212 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 7213 path = btrfs_alloc_path(); 7214 if (!path) 7215 return -ENOMEM; 7216 7217 while (1) { 7218 ret = find_first_block_group(root, path, &key); 7219 if (ret > 0) { 7220 ret = 0; 7221 goto error; 7222 } 7223 if (ret != 0) 7224 goto error; 7225 7226 leaf = path->nodes[0]; 7227 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 7228 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7229 if (!cache) { 7230 ret = -ENOMEM; 7231 break; 7232 } 7233 7234 atomic_set(&cache->count, 1); 7235 spin_lock_init(&cache->lock); 7236 spin_lock_init(&cache->tree_lock); 7237 cache->fs_info = info; 7238 INIT_LIST_HEAD(&cache->list); 7239 INIT_LIST_HEAD(&cache->cluster_list); 7240 7241 /* 7242 * we only want to have 32k of ram per block group for keeping 7243 * track of free space, and if we pass 1/2 of that we want to 7244 * start converting things over to using bitmaps 7245 */ 7246 cache->extents_thresh = ((1024 * 32) / 2) / 7247 sizeof(struct btrfs_free_space); 7248 7249 read_extent_buffer(leaf, &cache->item, 7250 btrfs_item_ptr_offset(leaf, path->slots[0]), 7251 sizeof(cache->item)); 7252 memcpy(&cache->key, &found_key, sizeof(found_key)); 7253 7254 key.objectid = found_key.objectid + found_key.offset; 7255 btrfs_release_path(root, path); 7256 cache->flags = btrfs_block_group_flags(&cache->item); 7257 cache->sectorsize = root->sectorsize; 7258 7259 /* 7260 * check for two cases, either we are full, and therefore 7261 * don't need to bother with the caching work since we won't 7262 * find any space, or we are empty, and we can just add all 7263 * the space in and be done with it. This saves us _alot_ of 7264 * time, particularly in the full case. 7265 */ 7266 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 7267 exclude_super_stripes(root, cache); 7268 cache->last_byte_to_unpin = (u64)-1; 7269 cache->cached = BTRFS_CACHE_FINISHED; 7270 free_excluded_extents(root, cache); 7271 } else if (btrfs_block_group_used(&cache->item) == 0) { 7272 exclude_super_stripes(root, cache); 7273 cache->last_byte_to_unpin = (u64)-1; 7274 cache->cached = BTRFS_CACHE_FINISHED; 7275 add_new_free_space(cache, root->fs_info, 7276 found_key.objectid, 7277 found_key.objectid + 7278 found_key.offset); 7279 free_excluded_extents(root, cache); 7280 } 7281 7282 ret = update_space_info(info, cache->flags, found_key.offset, 7283 btrfs_block_group_used(&cache->item), 7284 &space_info); 7285 BUG_ON(ret); 7286 cache->space_info = space_info; 7287 spin_lock(&cache->space_info->lock); 7288 cache->space_info->bytes_super += cache->bytes_super; 7289 spin_unlock(&cache->space_info->lock); 7290 7291 down_write(&space_info->groups_sem); 7292 list_add_tail(&cache->list, &space_info->block_groups); 7293 up_write(&space_info->groups_sem); 7294 7295 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7296 BUG_ON(ret); 7297 7298 set_avail_alloc_bits(root->fs_info, cache->flags); 7299 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7300 set_block_group_readonly(cache); 7301 } 7302 ret = 0; 7303 error: 7304 btrfs_free_path(path); 7305 return ret; 7306 } 7307 7308 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 7309 struct btrfs_root *root, u64 bytes_used, 7310 u64 type, u64 chunk_objectid, u64 chunk_offset, 7311 u64 size) 7312 { 7313 int ret; 7314 struct btrfs_root *extent_root; 7315 struct btrfs_block_group_cache *cache; 7316 7317 extent_root = root->fs_info->extent_root; 7318 7319 root->fs_info->last_trans_log_full_commit = trans->transid; 7320 7321 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7322 if (!cache) 7323 return -ENOMEM; 7324 7325 cache->key.objectid = chunk_offset; 7326 cache->key.offset = size; 7327 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7328 cache->sectorsize = root->sectorsize; 7329 7330 /* 7331 * we only want to have 32k of ram per block group for keeping track 7332 * of free space, and if we pass 1/2 of that we want to start 7333 * converting things over to using bitmaps 7334 */ 7335 cache->extents_thresh = ((1024 * 32) / 2) / 7336 sizeof(struct btrfs_free_space); 7337 atomic_set(&cache->count, 1); 7338 spin_lock_init(&cache->lock); 7339 spin_lock_init(&cache->tree_lock); 7340 INIT_LIST_HEAD(&cache->list); 7341 INIT_LIST_HEAD(&cache->cluster_list); 7342 7343 btrfs_set_block_group_used(&cache->item, bytes_used); 7344 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7345 cache->flags = type; 7346 btrfs_set_block_group_flags(&cache->item, type); 7347 7348 cache->last_byte_to_unpin = (u64)-1; 7349 cache->cached = BTRFS_CACHE_FINISHED; 7350 exclude_super_stripes(root, cache); 7351 7352 add_new_free_space(cache, root->fs_info, chunk_offset, 7353 chunk_offset + size); 7354 7355 free_excluded_extents(root, cache); 7356 7357 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7358 &cache->space_info); 7359 BUG_ON(ret); 7360 7361 spin_lock(&cache->space_info->lock); 7362 cache->space_info->bytes_super += cache->bytes_super; 7363 spin_unlock(&cache->space_info->lock); 7364 7365 down_write(&cache->space_info->groups_sem); 7366 list_add_tail(&cache->list, &cache->space_info->block_groups); 7367 up_write(&cache->space_info->groups_sem); 7368 7369 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7370 BUG_ON(ret); 7371 7372 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7373 sizeof(cache->item)); 7374 BUG_ON(ret); 7375 7376 set_avail_alloc_bits(extent_root->fs_info, type); 7377 7378 return 0; 7379 } 7380 7381 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 7382 struct btrfs_root *root, u64 group_start) 7383 { 7384 struct btrfs_path *path; 7385 struct btrfs_block_group_cache *block_group; 7386 struct btrfs_free_cluster *cluster; 7387 struct btrfs_key key; 7388 int ret; 7389 7390 root = root->fs_info->extent_root; 7391 7392 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 7393 BUG_ON(!block_group); 7394 BUG_ON(!block_group->ro); 7395 7396 memcpy(&key, &block_group->key, sizeof(key)); 7397 7398 /* make sure this block group isn't part of an allocation cluster */ 7399 cluster = &root->fs_info->data_alloc_cluster; 7400 spin_lock(&cluster->refill_lock); 7401 btrfs_return_cluster_to_free_space(block_group, cluster); 7402 spin_unlock(&cluster->refill_lock); 7403 7404 /* 7405 * make sure this block group isn't part of a metadata 7406 * allocation cluster 7407 */ 7408 cluster = &root->fs_info->meta_alloc_cluster; 7409 spin_lock(&cluster->refill_lock); 7410 btrfs_return_cluster_to_free_space(block_group, cluster); 7411 spin_unlock(&cluster->refill_lock); 7412 7413 path = btrfs_alloc_path(); 7414 BUG_ON(!path); 7415 7416 spin_lock(&root->fs_info->block_group_cache_lock); 7417 rb_erase(&block_group->cache_node, 7418 &root->fs_info->block_group_cache_tree); 7419 spin_unlock(&root->fs_info->block_group_cache_lock); 7420 7421 down_write(&block_group->space_info->groups_sem); 7422 /* 7423 * we must use list_del_init so people can check to see if they 7424 * are still on the list after taking the semaphore 7425 */ 7426 list_del_init(&block_group->list); 7427 up_write(&block_group->space_info->groups_sem); 7428 7429 if (block_group->cached == BTRFS_CACHE_STARTED) 7430 wait_block_group_cache_done(block_group); 7431 7432 btrfs_remove_free_space_cache(block_group); 7433 7434 spin_lock(&block_group->space_info->lock); 7435 block_group->space_info->total_bytes -= block_group->key.offset; 7436 block_group->space_info->bytes_readonly -= block_group->key.offset; 7437 spin_unlock(&block_group->space_info->lock); 7438 7439 btrfs_clear_space_info_full(root->fs_info); 7440 7441 btrfs_put_block_group(block_group); 7442 btrfs_put_block_group(block_group); 7443 7444 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 7445 if (ret > 0) 7446 ret = -EIO; 7447 if (ret < 0) 7448 goto out; 7449 7450 ret = btrfs_del_item(trans, root, path); 7451 out: 7452 btrfs_free_path(path); 7453 return ret; 7454 } 7455