1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include <linux/rcupdate.h> 24 #include <linux/kthread.h> 25 #include "compat.h" 26 #include "hash.h" 27 #include "ctree.h" 28 #include "disk-io.h" 29 #include "print-tree.h" 30 #include "transaction.h" 31 #include "volumes.h" 32 #include "locking.h" 33 #include "free-space-cache.h" 34 35 static int update_reserved_extents(struct btrfs_root *root, 36 u64 bytenr, u64 num, int reserve); 37 static int update_block_group(struct btrfs_trans_handle *trans, 38 struct btrfs_root *root, 39 u64 bytenr, u64 num_bytes, int alloc, 40 int mark_free); 41 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 42 struct btrfs_root *root, 43 u64 bytenr, u64 num_bytes, u64 parent, 44 u64 root_objectid, u64 owner_objectid, 45 u64 owner_offset, int refs_to_drop, 46 struct btrfs_delayed_extent_op *extra_op); 47 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 48 struct extent_buffer *leaf, 49 struct btrfs_extent_item *ei); 50 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 51 struct btrfs_root *root, 52 u64 parent, u64 root_objectid, 53 u64 flags, u64 owner, u64 offset, 54 struct btrfs_key *ins, int ref_mod); 55 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 56 struct btrfs_root *root, 57 u64 parent, u64 root_objectid, 58 u64 flags, struct btrfs_disk_key *key, 59 int level, struct btrfs_key *ins); 60 61 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 62 struct btrfs_root *extent_root, u64 alloc_bytes, 63 u64 flags, int force); 64 65 static noinline int 66 block_group_cache_done(struct btrfs_block_group_cache *cache) 67 { 68 smp_mb(); 69 return cache->cached == BTRFS_CACHE_FINISHED; 70 } 71 72 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 73 { 74 return (cache->flags & bits) == bits; 75 } 76 77 /* 78 * this adds the block group to the fs_info rb tree for the block group 79 * cache 80 */ 81 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 82 struct btrfs_block_group_cache *block_group) 83 { 84 struct rb_node **p; 85 struct rb_node *parent = NULL; 86 struct btrfs_block_group_cache *cache; 87 88 spin_lock(&info->block_group_cache_lock); 89 p = &info->block_group_cache_tree.rb_node; 90 91 while (*p) { 92 parent = *p; 93 cache = rb_entry(parent, struct btrfs_block_group_cache, 94 cache_node); 95 if (block_group->key.objectid < cache->key.objectid) { 96 p = &(*p)->rb_left; 97 } else if (block_group->key.objectid > cache->key.objectid) { 98 p = &(*p)->rb_right; 99 } else { 100 spin_unlock(&info->block_group_cache_lock); 101 return -EEXIST; 102 } 103 } 104 105 rb_link_node(&block_group->cache_node, parent, p); 106 rb_insert_color(&block_group->cache_node, 107 &info->block_group_cache_tree); 108 spin_unlock(&info->block_group_cache_lock); 109 110 return 0; 111 } 112 113 /* 114 * This will return the block group at or after bytenr if contains is 0, else 115 * it will return the block group that contains the bytenr 116 */ 117 static struct btrfs_block_group_cache * 118 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 119 int contains) 120 { 121 struct btrfs_block_group_cache *cache, *ret = NULL; 122 struct rb_node *n; 123 u64 end, start; 124 125 spin_lock(&info->block_group_cache_lock); 126 n = info->block_group_cache_tree.rb_node; 127 128 while (n) { 129 cache = rb_entry(n, struct btrfs_block_group_cache, 130 cache_node); 131 end = cache->key.objectid + cache->key.offset - 1; 132 start = cache->key.objectid; 133 134 if (bytenr < start) { 135 if (!contains && (!ret || start < ret->key.objectid)) 136 ret = cache; 137 n = n->rb_left; 138 } else if (bytenr > start) { 139 if (contains && bytenr <= end) { 140 ret = cache; 141 break; 142 } 143 n = n->rb_right; 144 } else { 145 ret = cache; 146 break; 147 } 148 } 149 if (ret) 150 atomic_inc(&ret->count); 151 spin_unlock(&info->block_group_cache_lock); 152 153 return ret; 154 } 155 156 /* 157 * We always set EXTENT_LOCKED for the super mirror extents so we don't 158 * overwrite them, so those bits need to be unset. Also, if we are unmounting 159 * with pinned extents still sitting there because we had a block group caching, 160 * we need to clear those now, since we are done. 161 */ 162 void btrfs_free_pinned_extents(struct btrfs_fs_info *info) 163 { 164 u64 start, end, last = 0; 165 int ret; 166 167 while (1) { 168 ret = find_first_extent_bit(&info->pinned_extents, last, 169 &start, &end, 170 EXTENT_LOCKED|EXTENT_DIRTY); 171 if (ret) 172 break; 173 174 clear_extent_bits(&info->pinned_extents, start, end, 175 EXTENT_LOCKED|EXTENT_DIRTY, GFP_NOFS); 176 last = end+1; 177 } 178 } 179 180 static int remove_sb_from_cache(struct btrfs_root *root, 181 struct btrfs_block_group_cache *cache) 182 { 183 struct btrfs_fs_info *fs_info = root->fs_info; 184 u64 bytenr; 185 u64 *logical; 186 int stripe_len; 187 int i, nr, ret; 188 189 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 190 bytenr = btrfs_sb_offset(i); 191 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 192 cache->key.objectid, bytenr, 193 0, &logical, &nr, &stripe_len); 194 BUG_ON(ret); 195 while (nr--) { 196 try_lock_extent(&fs_info->pinned_extents, 197 logical[nr], 198 logical[nr] + stripe_len - 1, GFP_NOFS); 199 } 200 kfree(logical); 201 } 202 203 return 0; 204 } 205 206 /* 207 * this is only called by cache_block_group, since we could have freed extents 208 * we need to check the pinned_extents for any extents that can't be used yet 209 * since their free space will be released as soon as the transaction commits. 210 */ 211 static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 212 struct btrfs_fs_info *info, u64 start, u64 end) 213 { 214 u64 extent_start, extent_end, size, total_added = 0; 215 int ret; 216 217 while (start < end) { 218 ret = find_first_extent_bit(&info->pinned_extents, start, 219 &extent_start, &extent_end, 220 EXTENT_DIRTY|EXTENT_LOCKED); 221 if (ret) 222 break; 223 224 if (extent_start == start) { 225 start = extent_end + 1; 226 } else if (extent_start > start && extent_start < end) { 227 size = extent_start - start; 228 total_added += size; 229 ret = btrfs_add_free_space(block_group, start, 230 size); 231 BUG_ON(ret); 232 start = extent_end + 1; 233 } else { 234 break; 235 } 236 } 237 238 if (start < end) { 239 size = end - start; 240 total_added += size; 241 ret = btrfs_add_free_space(block_group, start, size); 242 BUG_ON(ret); 243 } 244 245 return total_added; 246 } 247 248 static int caching_kthread(void *data) 249 { 250 struct btrfs_block_group_cache *block_group = data; 251 struct btrfs_fs_info *fs_info = block_group->fs_info; 252 u64 last = 0; 253 struct btrfs_path *path; 254 int ret = 0; 255 struct btrfs_key key; 256 struct extent_buffer *leaf; 257 int slot; 258 u64 total_found = 0; 259 260 BUG_ON(!fs_info); 261 262 path = btrfs_alloc_path(); 263 if (!path) 264 return -ENOMEM; 265 266 atomic_inc(&block_group->space_info->caching_threads); 267 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 268 again: 269 /* need to make sure the commit_root doesn't disappear */ 270 down_read(&fs_info->extent_commit_sem); 271 272 /* 273 * We don't want to deadlock with somebody trying to allocate a new 274 * extent for the extent root while also trying to search the extent 275 * root to add free space. So we skip locking and search the commit 276 * root, since its read-only 277 */ 278 path->skip_locking = 1; 279 path->search_commit_root = 1; 280 path->reada = 2; 281 282 key.objectid = last; 283 key.offset = 0; 284 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 285 ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, path, 0, 0); 286 if (ret < 0) 287 goto err; 288 289 while (1) { 290 smp_mb(); 291 if (block_group->fs_info->closing > 1) { 292 last = (u64)-1; 293 break; 294 } 295 296 leaf = path->nodes[0]; 297 slot = path->slots[0]; 298 if (slot >= btrfs_header_nritems(leaf)) { 299 ret = btrfs_next_leaf(fs_info->extent_root, path); 300 if (ret < 0) 301 goto err; 302 else if (ret) 303 break; 304 305 if (need_resched() || 306 btrfs_transaction_in_commit(fs_info)) { 307 btrfs_release_path(fs_info->extent_root, path); 308 up_read(&fs_info->extent_commit_sem); 309 schedule_timeout(1); 310 goto again; 311 } 312 313 continue; 314 } 315 btrfs_item_key_to_cpu(leaf, &key, slot); 316 if (key.objectid < block_group->key.objectid) 317 goto next; 318 319 if (key.objectid >= block_group->key.objectid + 320 block_group->key.offset) 321 break; 322 323 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 324 total_found += add_new_free_space(block_group, 325 fs_info, last, 326 key.objectid); 327 last = key.objectid + key.offset; 328 } 329 330 if (total_found > (1024 * 1024 * 2)) { 331 total_found = 0; 332 wake_up(&block_group->caching_q); 333 } 334 next: 335 path->slots[0]++; 336 } 337 ret = 0; 338 339 total_found += add_new_free_space(block_group, fs_info, last, 340 block_group->key.objectid + 341 block_group->key.offset); 342 343 spin_lock(&block_group->lock); 344 block_group->cached = BTRFS_CACHE_FINISHED; 345 spin_unlock(&block_group->lock); 346 347 err: 348 btrfs_free_path(path); 349 up_read(&fs_info->extent_commit_sem); 350 atomic_dec(&block_group->space_info->caching_threads); 351 wake_up(&block_group->caching_q); 352 353 return 0; 354 } 355 356 static int cache_block_group(struct btrfs_block_group_cache *cache) 357 { 358 struct task_struct *tsk; 359 int ret = 0; 360 361 spin_lock(&cache->lock); 362 if (cache->cached != BTRFS_CACHE_NO) { 363 spin_unlock(&cache->lock); 364 return ret; 365 } 366 cache->cached = BTRFS_CACHE_STARTED; 367 spin_unlock(&cache->lock); 368 369 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 370 cache->key.objectid); 371 if (IS_ERR(tsk)) { 372 ret = PTR_ERR(tsk); 373 printk(KERN_ERR "error running thread %d\n", ret); 374 BUG(); 375 } 376 377 return ret; 378 } 379 380 /* 381 * return the block group that starts at or after bytenr 382 */ 383 static struct btrfs_block_group_cache * 384 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 385 { 386 struct btrfs_block_group_cache *cache; 387 388 cache = block_group_cache_tree_search(info, bytenr, 0); 389 390 return cache; 391 } 392 393 /* 394 * return the block group that contains the given bytenr 395 */ 396 struct btrfs_block_group_cache *btrfs_lookup_block_group( 397 struct btrfs_fs_info *info, 398 u64 bytenr) 399 { 400 struct btrfs_block_group_cache *cache; 401 402 cache = block_group_cache_tree_search(info, bytenr, 1); 403 404 return cache; 405 } 406 407 void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 408 { 409 if (atomic_dec_and_test(&cache->count)) 410 kfree(cache); 411 } 412 413 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 414 u64 flags) 415 { 416 struct list_head *head = &info->space_info; 417 struct btrfs_space_info *found; 418 419 rcu_read_lock(); 420 list_for_each_entry_rcu(found, head, list) { 421 if (found->flags == flags) { 422 rcu_read_unlock(); 423 return found; 424 } 425 } 426 rcu_read_unlock(); 427 return NULL; 428 } 429 430 /* 431 * after adding space to the filesystem, we need to clear the full flags 432 * on all the space infos. 433 */ 434 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 435 { 436 struct list_head *head = &info->space_info; 437 struct btrfs_space_info *found; 438 439 rcu_read_lock(); 440 list_for_each_entry_rcu(found, head, list) 441 found->full = 0; 442 rcu_read_unlock(); 443 } 444 445 static u64 div_factor(u64 num, int factor) 446 { 447 if (factor == 10) 448 return num; 449 num *= factor; 450 do_div(num, 10); 451 return num; 452 } 453 454 u64 btrfs_find_block_group(struct btrfs_root *root, 455 u64 search_start, u64 search_hint, int owner) 456 { 457 struct btrfs_block_group_cache *cache; 458 u64 used; 459 u64 last = max(search_hint, search_start); 460 u64 group_start = 0; 461 int full_search = 0; 462 int factor = 9; 463 int wrapped = 0; 464 again: 465 while (1) { 466 cache = btrfs_lookup_first_block_group(root->fs_info, last); 467 if (!cache) 468 break; 469 470 spin_lock(&cache->lock); 471 last = cache->key.objectid + cache->key.offset; 472 used = btrfs_block_group_used(&cache->item); 473 474 if ((full_search || !cache->ro) && 475 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 476 if (used + cache->pinned + cache->reserved < 477 div_factor(cache->key.offset, factor)) { 478 group_start = cache->key.objectid; 479 spin_unlock(&cache->lock); 480 btrfs_put_block_group(cache); 481 goto found; 482 } 483 } 484 spin_unlock(&cache->lock); 485 btrfs_put_block_group(cache); 486 cond_resched(); 487 } 488 if (!wrapped) { 489 last = search_start; 490 wrapped = 1; 491 goto again; 492 } 493 if (!full_search && factor < 10) { 494 last = search_start; 495 full_search = 1; 496 factor = 10; 497 goto again; 498 } 499 found: 500 return group_start; 501 } 502 503 /* simple helper to search for an existing extent at a given offset */ 504 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 505 { 506 int ret; 507 struct btrfs_key key; 508 struct btrfs_path *path; 509 510 path = btrfs_alloc_path(); 511 BUG_ON(!path); 512 key.objectid = start; 513 key.offset = len; 514 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 515 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 516 0, 0); 517 btrfs_free_path(path); 518 return ret; 519 } 520 521 /* 522 * Back reference rules. Back refs have three main goals: 523 * 524 * 1) differentiate between all holders of references to an extent so that 525 * when a reference is dropped we can make sure it was a valid reference 526 * before freeing the extent. 527 * 528 * 2) Provide enough information to quickly find the holders of an extent 529 * if we notice a given block is corrupted or bad. 530 * 531 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 532 * maintenance. This is actually the same as #2, but with a slightly 533 * different use case. 534 * 535 * There are two kinds of back refs. The implicit back refs is optimized 536 * for pointers in non-shared tree blocks. For a given pointer in a block, 537 * back refs of this kind provide information about the block's owner tree 538 * and the pointer's key. These information allow us to find the block by 539 * b-tree searching. The full back refs is for pointers in tree blocks not 540 * referenced by their owner trees. The location of tree block is recorded 541 * in the back refs. Actually the full back refs is generic, and can be 542 * used in all cases the implicit back refs is used. The major shortcoming 543 * of the full back refs is its overhead. Every time a tree block gets 544 * COWed, we have to update back refs entry for all pointers in it. 545 * 546 * For a newly allocated tree block, we use implicit back refs for 547 * pointers in it. This means most tree related operations only involve 548 * implicit back refs. For a tree block created in old transaction, the 549 * only way to drop a reference to it is COW it. So we can detect the 550 * event that tree block loses its owner tree's reference and do the 551 * back refs conversion. 552 * 553 * When a tree block is COW'd through a tree, there are four cases: 554 * 555 * The reference count of the block is one and the tree is the block's 556 * owner tree. Nothing to do in this case. 557 * 558 * The reference count of the block is one and the tree is not the 559 * block's owner tree. In this case, full back refs is used for pointers 560 * in the block. Remove these full back refs, add implicit back refs for 561 * every pointers in the new block. 562 * 563 * The reference count of the block is greater than one and the tree is 564 * the block's owner tree. In this case, implicit back refs is used for 565 * pointers in the block. Add full back refs for every pointers in the 566 * block, increase lower level extents' reference counts. The original 567 * implicit back refs are entailed to the new block. 568 * 569 * The reference count of the block is greater than one and the tree is 570 * not the block's owner tree. Add implicit back refs for every pointer in 571 * the new block, increase lower level extents' reference count. 572 * 573 * Back Reference Key composing: 574 * 575 * The key objectid corresponds to the first byte in the extent, 576 * The key type is used to differentiate between types of back refs. 577 * There are different meanings of the key offset for different types 578 * of back refs. 579 * 580 * File extents can be referenced by: 581 * 582 * - multiple snapshots, subvolumes, or different generations in one subvol 583 * - different files inside a single subvolume 584 * - different offsets inside a file (bookend extents in file.c) 585 * 586 * The extent ref structure for the implicit back refs has fields for: 587 * 588 * - Objectid of the subvolume root 589 * - objectid of the file holding the reference 590 * - original offset in the file 591 * - how many bookend extents 592 * 593 * The key offset for the implicit back refs is hash of the first 594 * three fields. 595 * 596 * The extent ref structure for the full back refs has field for: 597 * 598 * - number of pointers in the tree leaf 599 * 600 * The key offset for the implicit back refs is the first byte of 601 * the tree leaf 602 * 603 * When a file extent is allocated, The implicit back refs is used. 604 * the fields are filled in: 605 * 606 * (root_key.objectid, inode objectid, offset in file, 1) 607 * 608 * When a file extent is removed file truncation, we find the 609 * corresponding implicit back refs and check the following fields: 610 * 611 * (btrfs_header_owner(leaf), inode objectid, offset in file) 612 * 613 * Btree extents can be referenced by: 614 * 615 * - Different subvolumes 616 * 617 * Both the implicit back refs and the full back refs for tree blocks 618 * only consist of key. The key offset for the implicit back refs is 619 * objectid of block's owner tree. The key offset for the full back refs 620 * is the first byte of parent block. 621 * 622 * When implicit back refs is used, information about the lowest key and 623 * level of the tree block are required. These information are stored in 624 * tree block info structure. 625 */ 626 627 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 628 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 629 struct btrfs_root *root, 630 struct btrfs_path *path, 631 u64 owner, u32 extra_size) 632 { 633 struct btrfs_extent_item *item; 634 struct btrfs_extent_item_v0 *ei0; 635 struct btrfs_extent_ref_v0 *ref0; 636 struct btrfs_tree_block_info *bi; 637 struct extent_buffer *leaf; 638 struct btrfs_key key; 639 struct btrfs_key found_key; 640 u32 new_size = sizeof(*item); 641 u64 refs; 642 int ret; 643 644 leaf = path->nodes[0]; 645 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 646 647 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 648 ei0 = btrfs_item_ptr(leaf, path->slots[0], 649 struct btrfs_extent_item_v0); 650 refs = btrfs_extent_refs_v0(leaf, ei0); 651 652 if (owner == (u64)-1) { 653 while (1) { 654 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 655 ret = btrfs_next_leaf(root, path); 656 if (ret < 0) 657 return ret; 658 BUG_ON(ret > 0); 659 leaf = path->nodes[0]; 660 } 661 btrfs_item_key_to_cpu(leaf, &found_key, 662 path->slots[0]); 663 BUG_ON(key.objectid != found_key.objectid); 664 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 665 path->slots[0]++; 666 continue; 667 } 668 ref0 = btrfs_item_ptr(leaf, path->slots[0], 669 struct btrfs_extent_ref_v0); 670 owner = btrfs_ref_objectid_v0(leaf, ref0); 671 break; 672 } 673 } 674 btrfs_release_path(root, path); 675 676 if (owner < BTRFS_FIRST_FREE_OBJECTID) 677 new_size += sizeof(*bi); 678 679 new_size -= sizeof(*ei0); 680 ret = btrfs_search_slot(trans, root, &key, path, 681 new_size + extra_size, 1); 682 if (ret < 0) 683 return ret; 684 BUG_ON(ret); 685 686 ret = btrfs_extend_item(trans, root, path, new_size); 687 BUG_ON(ret); 688 689 leaf = path->nodes[0]; 690 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 691 btrfs_set_extent_refs(leaf, item, refs); 692 /* FIXME: get real generation */ 693 btrfs_set_extent_generation(leaf, item, 0); 694 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 695 btrfs_set_extent_flags(leaf, item, 696 BTRFS_EXTENT_FLAG_TREE_BLOCK | 697 BTRFS_BLOCK_FLAG_FULL_BACKREF); 698 bi = (struct btrfs_tree_block_info *)(item + 1); 699 /* FIXME: get first key of the block */ 700 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); 701 btrfs_set_tree_block_level(leaf, bi, (int)owner); 702 } else { 703 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 704 } 705 btrfs_mark_buffer_dirty(leaf); 706 return 0; 707 } 708 #endif 709 710 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 711 { 712 u32 high_crc = ~(u32)0; 713 u32 low_crc = ~(u32)0; 714 __le64 lenum; 715 716 lenum = cpu_to_le64(root_objectid); 717 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 718 lenum = cpu_to_le64(owner); 719 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 720 lenum = cpu_to_le64(offset); 721 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 722 723 return ((u64)high_crc << 31) ^ (u64)low_crc; 724 } 725 726 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 727 struct btrfs_extent_data_ref *ref) 728 { 729 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 730 btrfs_extent_data_ref_objectid(leaf, ref), 731 btrfs_extent_data_ref_offset(leaf, ref)); 732 } 733 734 static int match_extent_data_ref(struct extent_buffer *leaf, 735 struct btrfs_extent_data_ref *ref, 736 u64 root_objectid, u64 owner, u64 offset) 737 { 738 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 739 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 740 btrfs_extent_data_ref_offset(leaf, ref) != offset) 741 return 0; 742 return 1; 743 } 744 745 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 746 struct btrfs_root *root, 747 struct btrfs_path *path, 748 u64 bytenr, u64 parent, 749 u64 root_objectid, 750 u64 owner, u64 offset) 751 { 752 struct btrfs_key key; 753 struct btrfs_extent_data_ref *ref; 754 struct extent_buffer *leaf; 755 u32 nritems; 756 int ret; 757 int recow; 758 int err = -ENOENT; 759 760 key.objectid = bytenr; 761 if (parent) { 762 key.type = BTRFS_SHARED_DATA_REF_KEY; 763 key.offset = parent; 764 } else { 765 key.type = BTRFS_EXTENT_DATA_REF_KEY; 766 key.offset = hash_extent_data_ref(root_objectid, 767 owner, offset); 768 } 769 again: 770 recow = 0; 771 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 772 if (ret < 0) { 773 err = ret; 774 goto fail; 775 } 776 777 if (parent) { 778 if (!ret) 779 return 0; 780 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 781 key.type = BTRFS_EXTENT_REF_V0_KEY; 782 btrfs_release_path(root, path); 783 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 784 if (ret < 0) { 785 err = ret; 786 goto fail; 787 } 788 if (!ret) 789 return 0; 790 #endif 791 goto fail; 792 } 793 794 leaf = path->nodes[0]; 795 nritems = btrfs_header_nritems(leaf); 796 while (1) { 797 if (path->slots[0] >= nritems) { 798 ret = btrfs_next_leaf(root, path); 799 if (ret < 0) 800 err = ret; 801 if (ret) 802 goto fail; 803 804 leaf = path->nodes[0]; 805 nritems = btrfs_header_nritems(leaf); 806 recow = 1; 807 } 808 809 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 810 if (key.objectid != bytenr || 811 key.type != BTRFS_EXTENT_DATA_REF_KEY) 812 goto fail; 813 814 ref = btrfs_item_ptr(leaf, path->slots[0], 815 struct btrfs_extent_data_ref); 816 817 if (match_extent_data_ref(leaf, ref, root_objectid, 818 owner, offset)) { 819 if (recow) { 820 btrfs_release_path(root, path); 821 goto again; 822 } 823 err = 0; 824 break; 825 } 826 path->slots[0]++; 827 } 828 fail: 829 return err; 830 } 831 832 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 833 struct btrfs_root *root, 834 struct btrfs_path *path, 835 u64 bytenr, u64 parent, 836 u64 root_objectid, u64 owner, 837 u64 offset, int refs_to_add) 838 { 839 struct btrfs_key key; 840 struct extent_buffer *leaf; 841 u32 size; 842 u32 num_refs; 843 int ret; 844 845 key.objectid = bytenr; 846 if (parent) { 847 key.type = BTRFS_SHARED_DATA_REF_KEY; 848 key.offset = parent; 849 size = sizeof(struct btrfs_shared_data_ref); 850 } else { 851 key.type = BTRFS_EXTENT_DATA_REF_KEY; 852 key.offset = hash_extent_data_ref(root_objectid, 853 owner, offset); 854 size = sizeof(struct btrfs_extent_data_ref); 855 } 856 857 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 858 if (ret && ret != -EEXIST) 859 goto fail; 860 861 leaf = path->nodes[0]; 862 if (parent) { 863 struct btrfs_shared_data_ref *ref; 864 ref = btrfs_item_ptr(leaf, path->slots[0], 865 struct btrfs_shared_data_ref); 866 if (ret == 0) { 867 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 868 } else { 869 num_refs = btrfs_shared_data_ref_count(leaf, ref); 870 num_refs += refs_to_add; 871 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 872 } 873 } else { 874 struct btrfs_extent_data_ref *ref; 875 while (ret == -EEXIST) { 876 ref = btrfs_item_ptr(leaf, path->slots[0], 877 struct btrfs_extent_data_ref); 878 if (match_extent_data_ref(leaf, ref, root_objectid, 879 owner, offset)) 880 break; 881 btrfs_release_path(root, path); 882 key.offset++; 883 ret = btrfs_insert_empty_item(trans, root, path, &key, 884 size); 885 if (ret && ret != -EEXIST) 886 goto fail; 887 888 leaf = path->nodes[0]; 889 } 890 ref = btrfs_item_ptr(leaf, path->slots[0], 891 struct btrfs_extent_data_ref); 892 if (ret == 0) { 893 btrfs_set_extent_data_ref_root(leaf, ref, 894 root_objectid); 895 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 896 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 897 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 898 } else { 899 num_refs = btrfs_extent_data_ref_count(leaf, ref); 900 num_refs += refs_to_add; 901 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 902 } 903 } 904 btrfs_mark_buffer_dirty(leaf); 905 ret = 0; 906 fail: 907 btrfs_release_path(root, path); 908 return ret; 909 } 910 911 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 912 struct btrfs_root *root, 913 struct btrfs_path *path, 914 int refs_to_drop) 915 { 916 struct btrfs_key key; 917 struct btrfs_extent_data_ref *ref1 = NULL; 918 struct btrfs_shared_data_ref *ref2 = NULL; 919 struct extent_buffer *leaf; 920 u32 num_refs = 0; 921 int ret = 0; 922 923 leaf = path->nodes[0]; 924 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 925 926 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 927 ref1 = btrfs_item_ptr(leaf, path->slots[0], 928 struct btrfs_extent_data_ref); 929 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 930 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 931 ref2 = btrfs_item_ptr(leaf, path->slots[0], 932 struct btrfs_shared_data_ref); 933 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 934 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 935 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 936 struct btrfs_extent_ref_v0 *ref0; 937 ref0 = btrfs_item_ptr(leaf, path->slots[0], 938 struct btrfs_extent_ref_v0); 939 num_refs = btrfs_ref_count_v0(leaf, ref0); 940 #endif 941 } else { 942 BUG(); 943 } 944 945 BUG_ON(num_refs < refs_to_drop); 946 num_refs -= refs_to_drop; 947 948 if (num_refs == 0) { 949 ret = btrfs_del_item(trans, root, path); 950 } else { 951 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 952 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 953 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 954 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 955 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 956 else { 957 struct btrfs_extent_ref_v0 *ref0; 958 ref0 = btrfs_item_ptr(leaf, path->slots[0], 959 struct btrfs_extent_ref_v0); 960 btrfs_set_ref_count_v0(leaf, ref0, num_refs); 961 } 962 #endif 963 btrfs_mark_buffer_dirty(leaf); 964 } 965 return ret; 966 } 967 968 static noinline u32 extent_data_ref_count(struct btrfs_root *root, 969 struct btrfs_path *path, 970 struct btrfs_extent_inline_ref *iref) 971 { 972 struct btrfs_key key; 973 struct extent_buffer *leaf; 974 struct btrfs_extent_data_ref *ref1; 975 struct btrfs_shared_data_ref *ref2; 976 u32 num_refs = 0; 977 978 leaf = path->nodes[0]; 979 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 980 if (iref) { 981 if (btrfs_extent_inline_ref_type(leaf, iref) == 982 BTRFS_EXTENT_DATA_REF_KEY) { 983 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 984 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 985 } else { 986 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 987 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 988 } 989 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 990 ref1 = btrfs_item_ptr(leaf, path->slots[0], 991 struct btrfs_extent_data_ref); 992 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 993 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 994 ref2 = btrfs_item_ptr(leaf, path->slots[0], 995 struct btrfs_shared_data_ref); 996 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 997 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 998 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 999 struct btrfs_extent_ref_v0 *ref0; 1000 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1001 struct btrfs_extent_ref_v0); 1002 num_refs = btrfs_ref_count_v0(leaf, ref0); 1003 #endif 1004 } else { 1005 WARN_ON(1); 1006 } 1007 return num_refs; 1008 } 1009 1010 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 1011 struct btrfs_root *root, 1012 struct btrfs_path *path, 1013 u64 bytenr, u64 parent, 1014 u64 root_objectid) 1015 { 1016 struct btrfs_key key; 1017 int ret; 1018 1019 key.objectid = bytenr; 1020 if (parent) { 1021 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1022 key.offset = parent; 1023 } else { 1024 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1025 key.offset = root_objectid; 1026 } 1027 1028 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1029 if (ret > 0) 1030 ret = -ENOENT; 1031 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1032 if (ret == -ENOENT && parent) { 1033 btrfs_release_path(root, path); 1034 key.type = BTRFS_EXTENT_REF_V0_KEY; 1035 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1036 if (ret > 0) 1037 ret = -ENOENT; 1038 } 1039 #endif 1040 return ret; 1041 } 1042 1043 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 1044 struct btrfs_root *root, 1045 struct btrfs_path *path, 1046 u64 bytenr, u64 parent, 1047 u64 root_objectid) 1048 { 1049 struct btrfs_key key; 1050 int ret; 1051 1052 key.objectid = bytenr; 1053 if (parent) { 1054 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1055 key.offset = parent; 1056 } else { 1057 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1058 key.offset = root_objectid; 1059 } 1060 1061 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1062 btrfs_release_path(root, path); 1063 return ret; 1064 } 1065 1066 static inline int extent_ref_type(u64 parent, u64 owner) 1067 { 1068 int type; 1069 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1070 if (parent > 0) 1071 type = BTRFS_SHARED_BLOCK_REF_KEY; 1072 else 1073 type = BTRFS_TREE_BLOCK_REF_KEY; 1074 } else { 1075 if (parent > 0) 1076 type = BTRFS_SHARED_DATA_REF_KEY; 1077 else 1078 type = BTRFS_EXTENT_DATA_REF_KEY; 1079 } 1080 return type; 1081 } 1082 1083 static int find_next_key(struct btrfs_path *path, int level, 1084 struct btrfs_key *key) 1085 1086 { 1087 for (; level < BTRFS_MAX_LEVEL; level++) { 1088 if (!path->nodes[level]) 1089 break; 1090 if (path->slots[level] + 1 >= 1091 btrfs_header_nritems(path->nodes[level])) 1092 continue; 1093 if (level == 0) 1094 btrfs_item_key_to_cpu(path->nodes[level], key, 1095 path->slots[level] + 1); 1096 else 1097 btrfs_node_key_to_cpu(path->nodes[level], key, 1098 path->slots[level] + 1); 1099 return 0; 1100 } 1101 return 1; 1102 } 1103 1104 /* 1105 * look for inline back ref. if back ref is found, *ref_ret is set 1106 * to the address of inline back ref, and 0 is returned. 1107 * 1108 * if back ref isn't found, *ref_ret is set to the address where it 1109 * should be inserted, and -ENOENT is returned. 1110 * 1111 * if insert is true and there are too many inline back refs, the path 1112 * points to the extent item, and -EAGAIN is returned. 1113 * 1114 * NOTE: inline back refs are ordered in the same way that back ref 1115 * items in the tree are ordered. 1116 */ 1117 static noinline_for_stack 1118 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1119 struct btrfs_root *root, 1120 struct btrfs_path *path, 1121 struct btrfs_extent_inline_ref **ref_ret, 1122 u64 bytenr, u64 num_bytes, 1123 u64 parent, u64 root_objectid, 1124 u64 owner, u64 offset, int insert) 1125 { 1126 struct btrfs_key key; 1127 struct extent_buffer *leaf; 1128 struct btrfs_extent_item *ei; 1129 struct btrfs_extent_inline_ref *iref; 1130 u64 flags; 1131 u64 item_size; 1132 unsigned long ptr; 1133 unsigned long end; 1134 int extra_size; 1135 int type; 1136 int want; 1137 int ret; 1138 int err = 0; 1139 1140 key.objectid = bytenr; 1141 key.type = BTRFS_EXTENT_ITEM_KEY; 1142 key.offset = num_bytes; 1143 1144 want = extent_ref_type(parent, owner); 1145 if (insert) { 1146 extra_size = btrfs_extent_inline_ref_size(want); 1147 path->keep_locks = 1; 1148 } else 1149 extra_size = -1; 1150 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1151 if (ret < 0) { 1152 err = ret; 1153 goto out; 1154 } 1155 BUG_ON(ret); 1156 1157 leaf = path->nodes[0]; 1158 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1159 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1160 if (item_size < sizeof(*ei)) { 1161 if (!insert) { 1162 err = -ENOENT; 1163 goto out; 1164 } 1165 ret = convert_extent_item_v0(trans, root, path, owner, 1166 extra_size); 1167 if (ret < 0) { 1168 err = ret; 1169 goto out; 1170 } 1171 leaf = path->nodes[0]; 1172 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1173 } 1174 #endif 1175 BUG_ON(item_size < sizeof(*ei)); 1176 1177 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1178 flags = btrfs_extent_flags(leaf, ei); 1179 1180 ptr = (unsigned long)(ei + 1); 1181 end = (unsigned long)ei + item_size; 1182 1183 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1184 ptr += sizeof(struct btrfs_tree_block_info); 1185 BUG_ON(ptr > end); 1186 } else { 1187 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1188 } 1189 1190 err = -ENOENT; 1191 while (1) { 1192 if (ptr >= end) { 1193 WARN_ON(ptr > end); 1194 break; 1195 } 1196 iref = (struct btrfs_extent_inline_ref *)ptr; 1197 type = btrfs_extent_inline_ref_type(leaf, iref); 1198 if (want < type) 1199 break; 1200 if (want > type) { 1201 ptr += btrfs_extent_inline_ref_size(type); 1202 continue; 1203 } 1204 1205 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1206 struct btrfs_extent_data_ref *dref; 1207 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1208 if (match_extent_data_ref(leaf, dref, root_objectid, 1209 owner, offset)) { 1210 err = 0; 1211 break; 1212 } 1213 if (hash_extent_data_ref_item(leaf, dref) < 1214 hash_extent_data_ref(root_objectid, owner, offset)) 1215 break; 1216 } else { 1217 u64 ref_offset; 1218 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1219 if (parent > 0) { 1220 if (parent == ref_offset) { 1221 err = 0; 1222 break; 1223 } 1224 if (ref_offset < parent) 1225 break; 1226 } else { 1227 if (root_objectid == ref_offset) { 1228 err = 0; 1229 break; 1230 } 1231 if (ref_offset < root_objectid) 1232 break; 1233 } 1234 } 1235 ptr += btrfs_extent_inline_ref_size(type); 1236 } 1237 if (err == -ENOENT && insert) { 1238 if (item_size + extra_size >= 1239 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 1240 err = -EAGAIN; 1241 goto out; 1242 } 1243 /* 1244 * To add new inline back ref, we have to make sure 1245 * there is no corresponding back ref item. 1246 * For simplicity, we just do not add new inline back 1247 * ref if there is any kind of item for this block 1248 */ 1249 if (find_next_key(path, 0, &key) == 0 && 1250 key.objectid == bytenr && 1251 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1252 err = -EAGAIN; 1253 goto out; 1254 } 1255 } 1256 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 1257 out: 1258 if (insert) { 1259 path->keep_locks = 0; 1260 btrfs_unlock_up_safe(path, 1); 1261 } 1262 return err; 1263 } 1264 1265 /* 1266 * helper to add new inline back ref 1267 */ 1268 static noinline_for_stack 1269 int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1270 struct btrfs_root *root, 1271 struct btrfs_path *path, 1272 struct btrfs_extent_inline_ref *iref, 1273 u64 parent, u64 root_objectid, 1274 u64 owner, u64 offset, int refs_to_add, 1275 struct btrfs_delayed_extent_op *extent_op) 1276 { 1277 struct extent_buffer *leaf; 1278 struct btrfs_extent_item *ei; 1279 unsigned long ptr; 1280 unsigned long end; 1281 unsigned long item_offset; 1282 u64 refs; 1283 int size; 1284 int type; 1285 int ret; 1286 1287 leaf = path->nodes[0]; 1288 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1289 item_offset = (unsigned long)iref - (unsigned long)ei; 1290 1291 type = extent_ref_type(parent, owner); 1292 size = btrfs_extent_inline_ref_size(type); 1293 1294 ret = btrfs_extend_item(trans, root, path, size); 1295 BUG_ON(ret); 1296 1297 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1298 refs = btrfs_extent_refs(leaf, ei); 1299 refs += refs_to_add; 1300 btrfs_set_extent_refs(leaf, ei, refs); 1301 if (extent_op) 1302 __run_delayed_extent_op(extent_op, leaf, ei); 1303 1304 ptr = (unsigned long)ei + item_offset; 1305 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1306 if (ptr < end - size) 1307 memmove_extent_buffer(leaf, ptr + size, ptr, 1308 end - size - ptr); 1309 1310 iref = (struct btrfs_extent_inline_ref *)ptr; 1311 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1312 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1313 struct btrfs_extent_data_ref *dref; 1314 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1315 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1316 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1317 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1318 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1319 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1320 struct btrfs_shared_data_ref *sref; 1321 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1322 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1323 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1324 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1325 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1326 } else { 1327 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1328 } 1329 btrfs_mark_buffer_dirty(leaf); 1330 return 0; 1331 } 1332 1333 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1334 struct btrfs_root *root, 1335 struct btrfs_path *path, 1336 struct btrfs_extent_inline_ref **ref_ret, 1337 u64 bytenr, u64 num_bytes, u64 parent, 1338 u64 root_objectid, u64 owner, u64 offset) 1339 { 1340 int ret; 1341 1342 ret = lookup_inline_extent_backref(trans, root, path, ref_ret, 1343 bytenr, num_bytes, parent, 1344 root_objectid, owner, offset, 0); 1345 if (ret != -ENOENT) 1346 return ret; 1347 1348 btrfs_release_path(root, path); 1349 *ref_ret = NULL; 1350 1351 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1352 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, 1353 root_objectid); 1354 } else { 1355 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, 1356 root_objectid, owner, offset); 1357 } 1358 return ret; 1359 } 1360 1361 /* 1362 * helper to update/remove inline back ref 1363 */ 1364 static noinline_for_stack 1365 int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1366 struct btrfs_root *root, 1367 struct btrfs_path *path, 1368 struct btrfs_extent_inline_ref *iref, 1369 int refs_to_mod, 1370 struct btrfs_delayed_extent_op *extent_op) 1371 { 1372 struct extent_buffer *leaf; 1373 struct btrfs_extent_item *ei; 1374 struct btrfs_extent_data_ref *dref = NULL; 1375 struct btrfs_shared_data_ref *sref = NULL; 1376 unsigned long ptr; 1377 unsigned long end; 1378 u32 item_size; 1379 int size; 1380 int type; 1381 int ret; 1382 u64 refs; 1383 1384 leaf = path->nodes[0]; 1385 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1386 refs = btrfs_extent_refs(leaf, ei); 1387 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1388 refs += refs_to_mod; 1389 btrfs_set_extent_refs(leaf, ei, refs); 1390 if (extent_op) 1391 __run_delayed_extent_op(extent_op, leaf, ei); 1392 1393 type = btrfs_extent_inline_ref_type(leaf, iref); 1394 1395 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1396 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1397 refs = btrfs_extent_data_ref_count(leaf, dref); 1398 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1399 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1400 refs = btrfs_shared_data_ref_count(leaf, sref); 1401 } else { 1402 refs = 1; 1403 BUG_ON(refs_to_mod != -1); 1404 } 1405 1406 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1407 refs += refs_to_mod; 1408 1409 if (refs > 0) { 1410 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1411 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1412 else 1413 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1414 } else { 1415 size = btrfs_extent_inline_ref_size(type); 1416 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1417 ptr = (unsigned long)iref; 1418 end = (unsigned long)ei + item_size; 1419 if (ptr + size < end) 1420 memmove_extent_buffer(leaf, ptr, ptr + size, 1421 end - ptr - size); 1422 item_size -= size; 1423 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1424 BUG_ON(ret); 1425 } 1426 btrfs_mark_buffer_dirty(leaf); 1427 return 0; 1428 } 1429 1430 static noinline_for_stack 1431 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1432 struct btrfs_root *root, 1433 struct btrfs_path *path, 1434 u64 bytenr, u64 num_bytes, u64 parent, 1435 u64 root_objectid, u64 owner, 1436 u64 offset, int refs_to_add, 1437 struct btrfs_delayed_extent_op *extent_op) 1438 { 1439 struct btrfs_extent_inline_ref *iref; 1440 int ret; 1441 1442 ret = lookup_inline_extent_backref(trans, root, path, &iref, 1443 bytenr, num_bytes, parent, 1444 root_objectid, owner, offset, 1); 1445 if (ret == 0) { 1446 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1447 ret = update_inline_extent_backref(trans, root, path, iref, 1448 refs_to_add, extent_op); 1449 } else if (ret == -ENOENT) { 1450 ret = setup_inline_extent_backref(trans, root, path, iref, 1451 parent, root_objectid, 1452 owner, offset, refs_to_add, 1453 extent_op); 1454 } 1455 return ret; 1456 } 1457 1458 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1459 struct btrfs_root *root, 1460 struct btrfs_path *path, 1461 u64 bytenr, u64 parent, u64 root_objectid, 1462 u64 owner, u64 offset, int refs_to_add) 1463 { 1464 int ret; 1465 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1466 BUG_ON(refs_to_add != 1); 1467 ret = insert_tree_block_ref(trans, root, path, bytenr, 1468 parent, root_objectid); 1469 } else { 1470 ret = insert_extent_data_ref(trans, root, path, bytenr, 1471 parent, root_objectid, 1472 owner, offset, refs_to_add); 1473 } 1474 return ret; 1475 } 1476 1477 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1478 struct btrfs_root *root, 1479 struct btrfs_path *path, 1480 struct btrfs_extent_inline_ref *iref, 1481 int refs_to_drop, int is_data) 1482 { 1483 int ret; 1484 1485 BUG_ON(!is_data && refs_to_drop != 1); 1486 if (iref) { 1487 ret = update_inline_extent_backref(trans, root, path, iref, 1488 -refs_to_drop, NULL); 1489 } else if (is_data) { 1490 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1491 } else { 1492 ret = btrfs_del_item(trans, root, path); 1493 } 1494 return ret; 1495 } 1496 1497 #ifdef BIO_RW_DISCARD 1498 static void btrfs_issue_discard(struct block_device *bdev, 1499 u64 start, u64 len) 1500 { 1501 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); 1502 } 1503 #endif 1504 1505 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1506 u64 num_bytes) 1507 { 1508 #ifdef BIO_RW_DISCARD 1509 int ret; 1510 u64 map_length = num_bytes; 1511 struct btrfs_multi_bio *multi = NULL; 1512 1513 /* Tell the block device(s) that the sectors can be discarded */ 1514 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1515 bytenr, &map_length, &multi, 0); 1516 if (!ret) { 1517 struct btrfs_bio_stripe *stripe = multi->stripes; 1518 int i; 1519 1520 if (map_length > num_bytes) 1521 map_length = num_bytes; 1522 1523 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1524 btrfs_issue_discard(stripe->dev->bdev, 1525 stripe->physical, 1526 map_length); 1527 } 1528 kfree(multi); 1529 } 1530 1531 return ret; 1532 #else 1533 return 0; 1534 #endif 1535 } 1536 1537 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1538 struct btrfs_root *root, 1539 u64 bytenr, u64 num_bytes, u64 parent, 1540 u64 root_objectid, u64 owner, u64 offset) 1541 { 1542 int ret; 1543 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 1544 root_objectid == BTRFS_TREE_LOG_OBJECTID); 1545 1546 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1547 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 1548 parent, root_objectid, (int)owner, 1549 BTRFS_ADD_DELAYED_REF, NULL); 1550 } else { 1551 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 1552 parent, root_objectid, owner, offset, 1553 BTRFS_ADD_DELAYED_REF, NULL); 1554 } 1555 return ret; 1556 } 1557 1558 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1559 struct btrfs_root *root, 1560 u64 bytenr, u64 num_bytes, 1561 u64 parent, u64 root_objectid, 1562 u64 owner, u64 offset, int refs_to_add, 1563 struct btrfs_delayed_extent_op *extent_op) 1564 { 1565 struct btrfs_path *path; 1566 struct extent_buffer *leaf; 1567 struct btrfs_extent_item *item; 1568 u64 refs; 1569 int ret; 1570 int err = 0; 1571 1572 path = btrfs_alloc_path(); 1573 if (!path) 1574 return -ENOMEM; 1575 1576 path->reada = 1; 1577 path->leave_spinning = 1; 1578 /* this will setup the path even if it fails to insert the back ref */ 1579 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 1580 path, bytenr, num_bytes, parent, 1581 root_objectid, owner, offset, 1582 refs_to_add, extent_op); 1583 if (ret == 0) 1584 goto out; 1585 1586 if (ret != -EAGAIN) { 1587 err = ret; 1588 goto out; 1589 } 1590 1591 leaf = path->nodes[0]; 1592 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1593 refs = btrfs_extent_refs(leaf, item); 1594 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1595 if (extent_op) 1596 __run_delayed_extent_op(extent_op, leaf, item); 1597 1598 btrfs_mark_buffer_dirty(leaf); 1599 btrfs_release_path(root->fs_info->extent_root, path); 1600 1601 path->reada = 1; 1602 path->leave_spinning = 1; 1603 1604 /* now insert the actual backref */ 1605 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1606 path, bytenr, parent, root_objectid, 1607 owner, offset, refs_to_add); 1608 BUG_ON(ret); 1609 out: 1610 btrfs_free_path(path); 1611 return err; 1612 } 1613 1614 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1615 struct btrfs_root *root, 1616 struct btrfs_delayed_ref_node *node, 1617 struct btrfs_delayed_extent_op *extent_op, 1618 int insert_reserved) 1619 { 1620 int ret = 0; 1621 struct btrfs_delayed_data_ref *ref; 1622 struct btrfs_key ins; 1623 u64 parent = 0; 1624 u64 ref_root = 0; 1625 u64 flags = 0; 1626 1627 ins.objectid = node->bytenr; 1628 ins.offset = node->num_bytes; 1629 ins.type = BTRFS_EXTENT_ITEM_KEY; 1630 1631 ref = btrfs_delayed_node_to_data_ref(node); 1632 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1633 parent = ref->parent; 1634 else 1635 ref_root = ref->root; 1636 1637 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1638 if (extent_op) { 1639 BUG_ON(extent_op->update_key); 1640 flags |= extent_op->flags_to_set; 1641 } 1642 ret = alloc_reserved_file_extent(trans, root, 1643 parent, ref_root, flags, 1644 ref->objectid, ref->offset, 1645 &ins, node->ref_mod); 1646 update_reserved_extents(root, ins.objectid, ins.offset, 0); 1647 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1648 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1649 node->num_bytes, parent, 1650 ref_root, ref->objectid, 1651 ref->offset, node->ref_mod, 1652 extent_op); 1653 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1654 ret = __btrfs_free_extent(trans, root, node->bytenr, 1655 node->num_bytes, parent, 1656 ref_root, ref->objectid, 1657 ref->offset, node->ref_mod, 1658 extent_op); 1659 } else { 1660 BUG(); 1661 } 1662 return ret; 1663 } 1664 1665 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1666 struct extent_buffer *leaf, 1667 struct btrfs_extent_item *ei) 1668 { 1669 u64 flags = btrfs_extent_flags(leaf, ei); 1670 if (extent_op->update_flags) { 1671 flags |= extent_op->flags_to_set; 1672 btrfs_set_extent_flags(leaf, ei, flags); 1673 } 1674 1675 if (extent_op->update_key) { 1676 struct btrfs_tree_block_info *bi; 1677 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1678 bi = (struct btrfs_tree_block_info *)(ei + 1); 1679 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1680 } 1681 } 1682 1683 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1684 struct btrfs_root *root, 1685 struct btrfs_delayed_ref_node *node, 1686 struct btrfs_delayed_extent_op *extent_op) 1687 { 1688 struct btrfs_key key; 1689 struct btrfs_path *path; 1690 struct btrfs_extent_item *ei; 1691 struct extent_buffer *leaf; 1692 u32 item_size; 1693 int ret; 1694 int err = 0; 1695 1696 path = btrfs_alloc_path(); 1697 if (!path) 1698 return -ENOMEM; 1699 1700 key.objectid = node->bytenr; 1701 key.type = BTRFS_EXTENT_ITEM_KEY; 1702 key.offset = node->num_bytes; 1703 1704 path->reada = 1; 1705 path->leave_spinning = 1; 1706 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1707 path, 0, 1); 1708 if (ret < 0) { 1709 err = ret; 1710 goto out; 1711 } 1712 if (ret > 0) { 1713 err = -EIO; 1714 goto out; 1715 } 1716 1717 leaf = path->nodes[0]; 1718 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1719 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1720 if (item_size < sizeof(*ei)) { 1721 ret = convert_extent_item_v0(trans, root->fs_info->extent_root, 1722 path, (u64)-1, 0); 1723 if (ret < 0) { 1724 err = ret; 1725 goto out; 1726 } 1727 leaf = path->nodes[0]; 1728 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1729 } 1730 #endif 1731 BUG_ON(item_size < sizeof(*ei)); 1732 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1733 __run_delayed_extent_op(extent_op, leaf, ei); 1734 1735 btrfs_mark_buffer_dirty(leaf); 1736 out: 1737 btrfs_free_path(path); 1738 return err; 1739 } 1740 1741 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 1742 struct btrfs_root *root, 1743 struct btrfs_delayed_ref_node *node, 1744 struct btrfs_delayed_extent_op *extent_op, 1745 int insert_reserved) 1746 { 1747 int ret = 0; 1748 struct btrfs_delayed_tree_ref *ref; 1749 struct btrfs_key ins; 1750 u64 parent = 0; 1751 u64 ref_root = 0; 1752 1753 ins.objectid = node->bytenr; 1754 ins.offset = node->num_bytes; 1755 ins.type = BTRFS_EXTENT_ITEM_KEY; 1756 1757 ref = btrfs_delayed_node_to_tree_ref(node); 1758 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1759 parent = ref->parent; 1760 else 1761 ref_root = ref->root; 1762 1763 BUG_ON(node->ref_mod != 1); 1764 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1765 BUG_ON(!extent_op || !extent_op->update_flags || 1766 !extent_op->update_key); 1767 ret = alloc_reserved_tree_block(trans, root, 1768 parent, ref_root, 1769 extent_op->flags_to_set, 1770 &extent_op->key, 1771 ref->level, &ins); 1772 update_reserved_extents(root, ins.objectid, ins.offset, 0); 1773 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1774 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1775 node->num_bytes, parent, ref_root, 1776 ref->level, 0, 1, extent_op); 1777 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1778 ret = __btrfs_free_extent(trans, root, node->bytenr, 1779 node->num_bytes, parent, ref_root, 1780 ref->level, 0, 1, extent_op); 1781 } else { 1782 BUG(); 1783 } 1784 return ret; 1785 } 1786 1787 1788 /* helper function to actually process a single delayed ref entry */ 1789 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1790 struct btrfs_root *root, 1791 struct btrfs_delayed_ref_node *node, 1792 struct btrfs_delayed_extent_op *extent_op, 1793 int insert_reserved) 1794 { 1795 int ret; 1796 if (btrfs_delayed_ref_is_head(node)) { 1797 struct btrfs_delayed_ref_head *head; 1798 /* 1799 * we've hit the end of the chain and we were supposed 1800 * to insert this extent into the tree. But, it got 1801 * deleted before we ever needed to insert it, so all 1802 * we have to do is clean up the accounting 1803 */ 1804 BUG_ON(extent_op); 1805 head = btrfs_delayed_node_to_head(node); 1806 if (insert_reserved) { 1807 if (head->is_data) { 1808 ret = btrfs_del_csums(trans, root, 1809 node->bytenr, 1810 node->num_bytes); 1811 BUG_ON(ret); 1812 } 1813 btrfs_update_pinned_extents(root, node->bytenr, 1814 node->num_bytes, 1); 1815 update_reserved_extents(root, node->bytenr, 1816 node->num_bytes, 0); 1817 } 1818 mutex_unlock(&head->mutex); 1819 return 0; 1820 } 1821 1822 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1823 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1824 ret = run_delayed_tree_ref(trans, root, node, extent_op, 1825 insert_reserved); 1826 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 1827 node->type == BTRFS_SHARED_DATA_REF_KEY) 1828 ret = run_delayed_data_ref(trans, root, node, extent_op, 1829 insert_reserved); 1830 else 1831 BUG(); 1832 return ret; 1833 } 1834 1835 static noinline struct btrfs_delayed_ref_node * 1836 select_delayed_ref(struct btrfs_delayed_ref_head *head) 1837 { 1838 struct rb_node *node; 1839 struct btrfs_delayed_ref_node *ref; 1840 int action = BTRFS_ADD_DELAYED_REF; 1841 again: 1842 /* 1843 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 1844 * this prevents ref count from going down to zero when 1845 * there still are pending delayed ref. 1846 */ 1847 node = rb_prev(&head->node.rb_node); 1848 while (1) { 1849 if (!node) 1850 break; 1851 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1852 rb_node); 1853 if (ref->bytenr != head->node.bytenr) 1854 break; 1855 if (ref->action == action) 1856 return ref; 1857 node = rb_prev(node); 1858 } 1859 if (action == BTRFS_ADD_DELAYED_REF) { 1860 action = BTRFS_DROP_DELAYED_REF; 1861 goto again; 1862 } 1863 return NULL; 1864 } 1865 1866 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 1867 struct btrfs_root *root, 1868 struct list_head *cluster) 1869 { 1870 struct btrfs_delayed_ref_root *delayed_refs; 1871 struct btrfs_delayed_ref_node *ref; 1872 struct btrfs_delayed_ref_head *locked_ref = NULL; 1873 struct btrfs_delayed_extent_op *extent_op; 1874 int ret; 1875 int count = 0; 1876 int must_insert_reserved = 0; 1877 1878 delayed_refs = &trans->transaction->delayed_refs; 1879 while (1) { 1880 if (!locked_ref) { 1881 /* pick a new head ref from the cluster list */ 1882 if (list_empty(cluster)) 1883 break; 1884 1885 locked_ref = list_entry(cluster->next, 1886 struct btrfs_delayed_ref_head, cluster); 1887 1888 /* grab the lock that says we are going to process 1889 * all the refs for this head */ 1890 ret = btrfs_delayed_ref_lock(trans, locked_ref); 1891 1892 /* 1893 * we may have dropped the spin lock to get the head 1894 * mutex lock, and that might have given someone else 1895 * time to free the head. If that's true, it has been 1896 * removed from our list and we can move on. 1897 */ 1898 if (ret == -EAGAIN) { 1899 locked_ref = NULL; 1900 count++; 1901 continue; 1902 } 1903 } 1904 1905 /* 1906 * record the must insert reserved flag before we 1907 * drop the spin lock. 1908 */ 1909 must_insert_reserved = locked_ref->must_insert_reserved; 1910 locked_ref->must_insert_reserved = 0; 1911 1912 extent_op = locked_ref->extent_op; 1913 locked_ref->extent_op = NULL; 1914 1915 /* 1916 * locked_ref is the head node, so we have to go one 1917 * node back for any delayed ref updates 1918 */ 1919 ref = select_delayed_ref(locked_ref); 1920 if (!ref) { 1921 /* All delayed refs have been processed, Go ahead 1922 * and send the head node to run_one_delayed_ref, 1923 * so that any accounting fixes can happen 1924 */ 1925 ref = &locked_ref->node; 1926 1927 if (extent_op && must_insert_reserved) { 1928 kfree(extent_op); 1929 extent_op = NULL; 1930 } 1931 1932 if (extent_op) { 1933 spin_unlock(&delayed_refs->lock); 1934 1935 ret = run_delayed_extent_op(trans, root, 1936 ref, extent_op); 1937 BUG_ON(ret); 1938 kfree(extent_op); 1939 1940 cond_resched(); 1941 spin_lock(&delayed_refs->lock); 1942 continue; 1943 } 1944 1945 list_del_init(&locked_ref->cluster); 1946 locked_ref = NULL; 1947 } 1948 1949 ref->in_tree = 0; 1950 rb_erase(&ref->rb_node, &delayed_refs->root); 1951 delayed_refs->num_entries--; 1952 1953 spin_unlock(&delayed_refs->lock); 1954 1955 ret = run_one_delayed_ref(trans, root, ref, extent_op, 1956 must_insert_reserved); 1957 BUG_ON(ret); 1958 1959 btrfs_put_delayed_ref(ref); 1960 kfree(extent_op); 1961 count++; 1962 1963 cond_resched(); 1964 spin_lock(&delayed_refs->lock); 1965 } 1966 return count; 1967 } 1968 1969 /* 1970 * this starts processing the delayed reference count updates and 1971 * extent insertions we have queued up so far. count can be 1972 * 0, which means to process everything in the tree at the start 1973 * of the run (but not newly added entries), or it can be some target 1974 * number you'd like to process. 1975 */ 1976 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 1977 struct btrfs_root *root, unsigned long count) 1978 { 1979 struct rb_node *node; 1980 struct btrfs_delayed_ref_root *delayed_refs; 1981 struct btrfs_delayed_ref_node *ref; 1982 struct list_head cluster; 1983 int ret; 1984 int run_all = count == (unsigned long)-1; 1985 int run_most = 0; 1986 1987 if (root == root->fs_info->extent_root) 1988 root = root->fs_info->tree_root; 1989 1990 delayed_refs = &trans->transaction->delayed_refs; 1991 INIT_LIST_HEAD(&cluster); 1992 again: 1993 spin_lock(&delayed_refs->lock); 1994 if (count == 0) { 1995 count = delayed_refs->num_entries * 2; 1996 run_most = 1; 1997 } 1998 while (1) { 1999 if (!(run_all || run_most) && 2000 delayed_refs->num_heads_ready < 64) 2001 break; 2002 2003 /* 2004 * go find something we can process in the rbtree. We start at 2005 * the beginning of the tree, and then build a cluster 2006 * of refs to process starting at the first one we are able to 2007 * lock 2008 */ 2009 ret = btrfs_find_ref_cluster(trans, &cluster, 2010 delayed_refs->run_delayed_start); 2011 if (ret) 2012 break; 2013 2014 ret = run_clustered_refs(trans, root, &cluster); 2015 BUG_ON(ret < 0); 2016 2017 count -= min_t(unsigned long, ret, count); 2018 2019 if (count == 0) 2020 break; 2021 } 2022 2023 if (run_all) { 2024 node = rb_first(&delayed_refs->root); 2025 if (!node) 2026 goto out; 2027 count = (unsigned long)-1; 2028 2029 while (node) { 2030 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2031 rb_node); 2032 if (btrfs_delayed_ref_is_head(ref)) { 2033 struct btrfs_delayed_ref_head *head; 2034 2035 head = btrfs_delayed_node_to_head(ref); 2036 atomic_inc(&ref->refs); 2037 2038 spin_unlock(&delayed_refs->lock); 2039 mutex_lock(&head->mutex); 2040 mutex_unlock(&head->mutex); 2041 2042 btrfs_put_delayed_ref(ref); 2043 cond_resched(); 2044 goto again; 2045 } 2046 node = rb_next(node); 2047 } 2048 spin_unlock(&delayed_refs->lock); 2049 schedule_timeout(1); 2050 goto again; 2051 } 2052 out: 2053 spin_unlock(&delayed_refs->lock); 2054 return 0; 2055 } 2056 2057 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2058 struct btrfs_root *root, 2059 u64 bytenr, u64 num_bytes, u64 flags, 2060 int is_data) 2061 { 2062 struct btrfs_delayed_extent_op *extent_op; 2063 int ret; 2064 2065 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2066 if (!extent_op) 2067 return -ENOMEM; 2068 2069 extent_op->flags_to_set = flags; 2070 extent_op->update_flags = 1; 2071 extent_op->update_key = 0; 2072 extent_op->is_data = is_data ? 1 : 0; 2073 2074 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); 2075 if (ret) 2076 kfree(extent_op); 2077 return ret; 2078 } 2079 2080 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, 2081 struct btrfs_root *root, 2082 struct btrfs_path *path, 2083 u64 objectid, u64 offset, u64 bytenr) 2084 { 2085 struct btrfs_delayed_ref_head *head; 2086 struct btrfs_delayed_ref_node *ref; 2087 struct btrfs_delayed_data_ref *data_ref; 2088 struct btrfs_delayed_ref_root *delayed_refs; 2089 struct rb_node *node; 2090 int ret = 0; 2091 2092 ret = -ENOENT; 2093 delayed_refs = &trans->transaction->delayed_refs; 2094 spin_lock(&delayed_refs->lock); 2095 head = btrfs_find_delayed_ref_head(trans, bytenr); 2096 if (!head) 2097 goto out; 2098 2099 if (!mutex_trylock(&head->mutex)) { 2100 atomic_inc(&head->node.refs); 2101 spin_unlock(&delayed_refs->lock); 2102 2103 btrfs_release_path(root->fs_info->extent_root, path); 2104 2105 mutex_lock(&head->mutex); 2106 mutex_unlock(&head->mutex); 2107 btrfs_put_delayed_ref(&head->node); 2108 return -EAGAIN; 2109 } 2110 2111 node = rb_prev(&head->node.rb_node); 2112 if (!node) 2113 goto out_unlock; 2114 2115 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2116 2117 if (ref->bytenr != bytenr) 2118 goto out_unlock; 2119 2120 ret = 1; 2121 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2122 goto out_unlock; 2123 2124 data_ref = btrfs_delayed_node_to_data_ref(ref); 2125 2126 node = rb_prev(node); 2127 if (node) { 2128 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2129 if (ref->bytenr == bytenr) 2130 goto out_unlock; 2131 } 2132 2133 if (data_ref->root != root->root_key.objectid || 2134 data_ref->objectid != objectid || data_ref->offset != offset) 2135 goto out_unlock; 2136 2137 ret = 0; 2138 out_unlock: 2139 mutex_unlock(&head->mutex); 2140 out: 2141 spin_unlock(&delayed_refs->lock); 2142 return ret; 2143 } 2144 2145 static noinline int check_committed_ref(struct btrfs_trans_handle *trans, 2146 struct btrfs_root *root, 2147 struct btrfs_path *path, 2148 u64 objectid, u64 offset, u64 bytenr) 2149 { 2150 struct btrfs_root *extent_root = root->fs_info->extent_root; 2151 struct extent_buffer *leaf; 2152 struct btrfs_extent_data_ref *ref; 2153 struct btrfs_extent_inline_ref *iref; 2154 struct btrfs_extent_item *ei; 2155 struct btrfs_key key; 2156 u32 item_size; 2157 int ret; 2158 2159 key.objectid = bytenr; 2160 key.offset = (u64)-1; 2161 key.type = BTRFS_EXTENT_ITEM_KEY; 2162 2163 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2164 if (ret < 0) 2165 goto out; 2166 BUG_ON(ret == 0); 2167 2168 ret = -ENOENT; 2169 if (path->slots[0] == 0) 2170 goto out; 2171 2172 path->slots[0]--; 2173 leaf = path->nodes[0]; 2174 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2175 2176 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2177 goto out; 2178 2179 ret = 1; 2180 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2181 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2182 if (item_size < sizeof(*ei)) { 2183 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2184 goto out; 2185 } 2186 #endif 2187 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2188 2189 if (item_size != sizeof(*ei) + 2190 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2191 goto out; 2192 2193 if (btrfs_extent_generation(leaf, ei) <= 2194 btrfs_root_last_snapshot(&root->root_item)) 2195 goto out; 2196 2197 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2198 if (btrfs_extent_inline_ref_type(leaf, iref) != 2199 BTRFS_EXTENT_DATA_REF_KEY) 2200 goto out; 2201 2202 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2203 if (btrfs_extent_refs(leaf, ei) != 2204 btrfs_extent_data_ref_count(leaf, ref) || 2205 btrfs_extent_data_ref_root(leaf, ref) != 2206 root->root_key.objectid || 2207 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2208 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2209 goto out; 2210 2211 ret = 0; 2212 out: 2213 return ret; 2214 } 2215 2216 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2217 struct btrfs_root *root, 2218 u64 objectid, u64 offset, u64 bytenr) 2219 { 2220 struct btrfs_path *path; 2221 int ret; 2222 int ret2; 2223 2224 path = btrfs_alloc_path(); 2225 if (!path) 2226 return -ENOENT; 2227 2228 do { 2229 ret = check_committed_ref(trans, root, path, objectid, 2230 offset, bytenr); 2231 if (ret && ret != -ENOENT) 2232 goto out; 2233 2234 ret2 = check_delayed_ref(trans, root, path, objectid, 2235 offset, bytenr); 2236 } while (ret2 == -EAGAIN); 2237 2238 if (ret2 && ret2 != -ENOENT) { 2239 ret = ret2; 2240 goto out; 2241 } 2242 2243 if (ret != -ENOENT || ret2 != -ENOENT) 2244 ret = 0; 2245 out: 2246 btrfs_free_path(path); 2247 return ret; 2248 } 2249 2250 #if 0 2251 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2252 struct extent_buffer *buf, u32 nr_extents) 2253 { 2254 struct btrfs_key key; 2255 struct btrfs_file_extent_item *fi; 2256 u64 root_gen; 2257 u32 nritems; 2258 int i; 2259 int level; 2260 int ret = 0; 2261 int shared = 0; 2262 2263 if (!root->ref_cows) 2264 return 0; 2265 2266 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 2267 shared = 0; 2268 root_gen = root->root_key.offset; 2269 } else { 2270 shared = 1; 2271 root_gen = trans->transid - 1; 2272 } 2273 2274 level = btrfs_header_level(buf); 2275 nritems = btrfs_header_nritems(buf); 2276 2277 if (level == 0) { 2278 struct btrfs_leaf_ref *ref; 2279 struct btrfs_extent_info *info; 2280 2281 ref = btrfs_alloc_leaf_ref(root, nr_extents); 2282 if (!ref) { 2283 ret = -ENOMEM; 2284 goto out; 2285 } 2286 2287 ref->root_gen = root_gen; 2288 ref->bytenr = buf->start; 2289 ref->owner = btrfs_header_owner(buf); 2290 ref->generation = btrfs_header_generation(buf); 2291 ref->nritems = nr_extents; 2292 info = ref->extents; 2293 2294 for (i = 0; nr_extents > 0 && i < nritems; i++) { 2295 u64 disk_bytenr; 2296 btrfs_item_key_to_cpu(buf, &key, i); 2297 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2298 continue; 2299 fi = btrfs_item_ptr(buf, i, 2300 struct btrfs_file_extent_item); 2301 if (btrfs_file_extent_type(buf, fi) == 2302 BTRFS_FILE_EXTENT_INLINE) 2303 continue; 2304 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2305 if (disk_bytenr == 0) 2306 continue; 2307 2308 info->bytenr = disk_bytenr; 2309 info->num_bytes = 2310 btrfs_file_extent_disk_num_bytes(buf, fi); 2311 info->objectid = key.objectid; 2312 info->offset = key.offset; 2313 info++; 2314 } 2315 2316 ret = btrfs_add_leaf_ref(root, ref, shared); 2317 if (ret == -EEXIST && shared) { 2318 struct btrfs_leaf_ref *old; 2319 old = btrfs_lookup_leaf_ref(root, ref->bytenr); 2320 BUG_ON(!old); 2321 btrfs_remove_leaf_ref(root, old); 2322 btrfs_free_leaf_ref(root, old); 2323 ret = btrfs_add_leaf_ref(root, ref, shared); 2324 } 2325 WARN_ON(ret); 2326 btrfs_free_leaf_ref(root, ref); 2327 } 2328 out: 2329 return ret; 2330 } 2331 2332 /* when a block goes through cow, we update the reference counts of 2333 * everything that block points to. The internal pointers of the block 2334 * can be in just about any order, and it is likely to have clusters of 2335 * things that are close together and clusters of things that are not. 2336 * 2337 * To help reduce the seeks that come with updating all of these reference 2338 * counts, sort them by byte number before actual updates are done. 2339 * 2340 * struct refsort is used to match byte number to slot in the btree block. 2341 * we sort based on the byte number and then use the slot to actually 2342 * find the item. 2343 * 2344 * struct refsort is smaller than strcut btrfs_item and smaller than 2345 * struct btrfs_key_ptr. Since we're currently limited to the page size 2346 * for a btree block, there's no way for a kmalloc of refsorts for a 2347 * single node to be bigger than a page. 2348 */ 2349 struct refsort { 2350 u64 bytenr; 2351 u32 slot; 2352 }; 2353 2354 /* 2355 * for passing into sort() 2356 */ 2357 static int refsort_cmp(const void *a_void, const void *b_void) 2358 { 2359 const struct refsort *a = a_void; 2360 const struct refsort *b = b_void; 2361 2362 if (a->bytenr < b->bytenr) 2363 return -1; 2364 if (a->bytenr > b->bytenr) 2365 return 1; 2366 return 0; 2367 } 2368 #endif 2369 2370 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2371 struct btrfs_root *root, 2372 struct extent_buffer *buf, 2373 int full_backref, int inc) 2374 { 2375 u64 bytenr; 2376 u64 num_bytes; 2377 u64 parent; 2378 u64 ref_root; 2379 u32 nritems; 2380 struct btrfs_key key; 2381 struct btrfs_file_extent_item *fi; 2382 int i; 2383 int level; 2384 int ret = 0; 2385 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2386 u64, u64, u64, u64, u64, u64); 2387 2388 ref_root = btrfs_header_owner(buf); 2389 nritems = btrfs_header_nritems(buf); 2390 level = btrfs_header_level(buf); 2391 2392 if (!root->ref_cows && level == 0) 2393 return 0; 2394 2395 if (inc) 2396 process_func = btrfs_inc_extent_ref; 2397 else 2398 process_func = btrfs_free_extent; 2399 2400 if (full_backref) 2401 parent = buf->start; 2402 else 2403 parent = 0; 2404 2405 for (i = 0; i < nritems; i++) { 2406 if (level == 0) { 2407 btrfs_item_key_to_cpu(buf, &key, i); 2408 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2409 continue; 2410 fi = btrfs_item_ptr(buf, i, 2411 struct btrfs_file_extent_item); 2412 if (btrfs_file_extent_type(buf, fi) == 2413 BTRFS_FILE_EXTENT_INLINE) 2414 continue; 2415 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2416 if (bytenr == 0) 2417 continue; 2418 2419 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2420 key.offset -= btrfs_file_extent_offset(buf, fi); 2421 ret = process_func(trans, root, bytenr, num_bytes, 2422 parent, ref_root, key.objectid, 2423 key.offset); 2424 if (ret) 2425 goto fail; 2426 } else { 2427 bytenr = btrfs_node_blockptr(buf, i); 2428 num_bytes = btrfs_level_size(root, level - 1); 2429 ret = process_func(trans, root, bytenr, num_bytes, 2430 parent, ref_root, level - 1, 0); 2431 if (ret) 2432 goto fail; 2433 } 2434 } 2435 return 0; 2436 fail: 2437 BUG(); 2438 return ret; 2439 } 2440 2441 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2442 struct extent_buffer *buf, int full_backref) 2443 { 2444 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2445 } 2446 2447 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2448 struct extent_buffer *buf, int full_backref) 2449 { 2450 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2451 } 2452 2453 static int write_one_cache_group(struct btrfs_trans_handle *trans, 2454 struct btrfs_root *root, 2455 struct btrfs_path *path, 2456 struct btrfs_block_group_cache *cache) 2457 { 2458 int ret; 2459 struct btrfs_root *extent_root = root->fs_info->extent_root; 2460 unsigned long bi; 2461 struct extent_buffer *leaf; 2462 2463 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2464 if (ret < 0) 2465 goto fail; 2466 BUG_ON(ret); 2467 2468 leaf = path->nodes[0]; 2469 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2470 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2471 btrfs_mark_buffer_dirty(leaf); 2472 btrfs_release_path(extent_root, path); 2473 fail: 2474 if (ret) 2475 return ret; 2476 return 0; 2477 2478 } 2479 2480 static struct btrfs_block_group_cache * 2481 next_block_group(struct btrfs_root *root, 2482 struct btrfs_block_group_cache *cache) 2483 { 2484 struct rb_node *node; 2485 spin_lock(&root->fs_info->block_group_cache_lock); 2486 node = rb_next(&cache->cache_node); 2487 btrfs_put_block_group(cache); 2488 if (node) { 2489 cache = rb_entry(node, struct btrfs_block_group_cache, 2490 cache_node); 2491 atomic_inc(&cache->count); 2492 } else 2493 cache = NULL; 2494 spin_unlock(&root->fs_info->block_group_cache_lock); 2495 return cache; 2496 } 2497 2498 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2499 struct btrfs_root *root) 2500 { 2501 struct btrfs_block_group_cache *cache; 2502 int err = 0; 2503 struct btrfs_path *path; 2504 u64 last = 0; 2505 2506 path = btrfs_alloc_path(); 2507 if (!path) 2508 return -ENOMEM; 2509 2510 while (1) { 2511 if (last == 0) { 2512 err = btrfs_run_delayed_refs(trans, root, 2513 (unsigned long)-1); 2514 BUG_ON(err); 2515 } 2516 2517 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2518 while (cache) { 2519 if (cache->dirty) 2520 break; 2521 cache = next_block_group(root, cache); 2522 } 2523 if (!cache) { 2524 if (last == 0) 2525 break; 2526 last = 0; 2527 continue; 2528 } 2529 2530 cache->dirty = 0; 2531 last = cache->key.objectid + cache->key.offset; 2532 2533 err = write_one_cache_group(trans, root, path, cache); 2534 BUG_ON(err); 2535 btrfs_put_block_group(cache); 2536 } 2537 2538 btrfs_free_path(path); 2539 return 0; 2540 } 2541 2542 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2543 { 2544 struct btrfs_block_group_cache *block_group; 2545 int readonly = 0; 2546 2547 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 2548 if (!block_group || block_group->ro) 2549 readonly = 1; 2550 if (block_group) 2551 btrfs_put_block_group(block_group); 2552 return readonly; 2553 } 2554 2555 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 2556 u64 total_bytes, u64 bytes_used, 2557 struct btrfs_space_info **space_info) 2558 { 2559 struct btrfs_space_info *found; 2560 2561 found = __find_space_info(info, flags); 2562 if (found) { 2563 spin_lock(&found->lock); 2564 found->total_bytes += total_bytes; 2565 found->bytes_used += bytes_used; 2566 found->full = 0; 2567 spin_unlock(&found->lock); 2568 *space_info = found; 2569 return 0; 2570 } 2571 found = kzalloc(sizeof(*found), GFP_NOFS); 2572 if (!found) 2573 return -ENOMEM; 2574 2575 INIT_LIST_HEAD(&found->block_groups); 2576 init_rwsem(&found->groups_sem); 2577 spin_lock_init(&found->lock); 2578 found->flags = flags; 2579 found->total_bytes = total_bytes; 2580 found->bytes_used = bytes_used; 2581 found->bytes_pinned = 0; 2582 found->bytes_reserved = 0; 2583 found->bytes_readonly = 0; 2584 found->bytes_delalloc = 0; 2585 found->full = 0; 2586 found->force_alloc = 0; 2587 *space_info = found; 2588 list_add_rcu(&found->list, &info->space_info); 2589 atomic_set(&found->caching_threads, 0); 2590 return 0; 2591 } 2592 2593 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 2594 { 2595 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 2596 BTRFS_BLOCK_GROUP_RAID1 | 2597 BTRFS_BLOCK_GROUP_RAID10 | 2598 BTRFS_BLOCK_GROUP_DUP); 2599 if (extra_flags) { 2600 if (flags & BTRFS_BLOCK_GROUP_DATA) 2601 fs_info->avail_data_alloc_bits |= extra_flags; 2602 if (flags & BTRFS_BLOCK_GROUP_METADATA) 2603 fs_info->avail_metadata_alloc_bits |= extra_flags; 2604 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2605 fs_info->avail_system_alloc_bits |= extra_flags; 2606 } 2607 } 2608 2609 static void set_block_group_readonly(struct btrfs_block_group_cache *cache) 2610 { 2611 spin_lock(&cache->space_info->lock); 2612 spin_lock(&cache->lock); 2613 if (!cache->ro) { 2614 cache->space_info->bytes_readonly += cache->key.offset - 2615 btrfs_block_group_used(&cache->item); 2616 cache->ro = 1; 2617 } 2618 spin_unlock(&cache->lock); 2619 spin_unlock(&cache->space_info->lock); 2620 } 2621 2622 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2623 { 2624 u64 num_devices = root->fs_info->fs_devices->rw_devices; 2625 2626 if (num_devices == 1) 2627 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2628 if (num_devices < 4) 2629 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 2630 2631 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 2632 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2633 BTRFS_BLOCK_GROUP_RAID10))) { 2634 flags &= ~BTRFS_BLOCK_GROUP_DUP; 2635 } 2636 2637 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 2638 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 2639 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 2640 } 2641 2642 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 2643 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 2644 (flags & BTRFS_BLOCK_GROUP_RAID10) | 2645 (flags & BTRFS_BLOCK_GROUP_DUP))) 2646 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 2647 return flags; 2648 } 2649 2650 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) 2651 { 2652 struct btrfs_fs_info *info = root->fs_info; 2653 u64 alloc_profile; 2654 2655 if (data) { 2656 alloc_profile = info->avail_data_alloc_bits & 2657 info->data_alloc_profile; 2658 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 2659 } else if (root == root->fs_info->chunk_root) { 2660 alloc_profile = info->avail_system_alloc_bits & 2661 info->system_alloc_profile; 2662 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; 2663 } else { 2664 alloc_profile = info->avail_metadata_alloc_bits & 2665 info->metadata_alloc_profile; 2666 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 2667 } 2668 2669 return btrfs_reduce_alloc_profile(root, data); 2670 } 2671 2672 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 2673 { 2674 u64 alloc_target; 2675 2676 alloc_target = btrfs_get_alloc_profile(root, 1); 2677 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 2678 alloc_target); 2679 } 2680 2681 /* 2682 * for now this just makes sure we have at least 5% of our metadata space free 2683 * for use. 2684 */ 2685 int btrfs_check_metadata_free_space(struct btrfs_root *root) 2686 { 2687 struct btrfs_fs_info *info = root->fs_info; 2688 struct btrfs_space_info *meta_sinfo; 2689 u64 alloc_target, thresh; 2690 int committed = 0, ret; 2691 2692 /* get the space info for where the metadata will live */ 2693 alloc_target = btrfs_get_alloc_profile(root, 0); 2694 meta_sinfo = __find_space_info(info, alloc_target); 2695 2696 again: 2697 spin_lock(&meta_sinfo->lock); 2698 if (!meta_sinfo->full) 2699 thresh = meta_sinfo->total_bytes * 80; 2700 else 2701 thresh = meta_sinfo->total_bytes * 95; 2702 2703 do_div(thresh, 100); 2704 2705 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2706 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { 2707 struct btrfs_trans_handle *trans; 2708 if (!meta_sinfo->full) { 2709 meta_sinfo->force_alloc = 1; 2710 spin_unlock(&meta_sinfo->lock); 2711 2712 trans = btrfs_start_transaction(root, 1); 2713 if (!trans) 2714 return -ENOMEM; 2715 2716 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2717 2 * 1024 * 1024, alloc_target, 0); 2718 btrfs_end_transaction(trans, root); 2719 goto again; 2720 } 2721 spin_unlock(&meta_sinfo->lock); 2722 2723 if (!committed) { 2724 committed = 1; 2725 trans = btrfs_join_transaction(root, 1); 2726 if (!trans) 2727 return -ENOMEM; 2728 ret = btrfs_commit_transaction(trans, root); 2729 if (ret) 2730 return ret; 2731 goto again; 2732 } 2733 return -ENOSPC; 2734 } 2735 spin_unlock(&meta_sinfo->lock); 2736 2737 return 0; 2738 } 2739 2740 /* 2741 * This will check the space that the inode allocates from to make sure we have 2742 * enough space for bytes. 2743 */ 2744 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 2745 u64 bytes) 2746 { 2747 struct btrfs_space_info *data_sinfo; 2748 int ret = 0, committed = 0; 2749 2750 /* make sure bytes are sectorsize aligned */ 2751 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2752 2753 data_sinfo = BTRFS_I(inode)->space_info; 2754 again: 2755 /* make sure we have enough space to handle the data first */ 2756 spin_lock(&data_sinfo->lock); 2757 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 2758 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 2759 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 2760 data_sinfo->bytes_may_use < bytes) { 2761 struct btrfs_trans_handle *trans; 2762 2763 /* 2764 * if we don't have enough free bytes in this space then we need 2765 * to alloc a new chunk. 2766 */ 2767 if (!data_sinfo->full) { 2768 u64 alloc_target; 2769 2770 data_sinfo->force_alloc = 1; 2771 spin_unlock(&data_sinfo->lock); 2772 2773 alloc_target = btrfs_get_alloc_profile(root, 1); 2774 trans = btrfs_start_transaction(root, 1); 2775 if (!trans) 2776 return -ENOMEM; 2777 2778 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2779 bytes + 2 * 1024 * 1024, 2780 alloc_target, 0); 2781 btrfs_end_transaction(trans, root); 2782 if (ret) 2783 return ret; 2784 goto again; 2785 } 2786 spin_unlock(&data_sinfo->lock); 2787 2788 /* commit the current transaction and try again */ 2789 if (!committed) { 2790 committed = 1; 2791 trans = btrfs_join_transaction(root, 1); 2792 if (!trans) 2793 return -ENOMEM; 2794 ret = btrfs_commit_transaction(trans, root); 2795 if (ret) 2796 return ret; 2797 goto again; 2798 } 2799 2800 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 2801 ", %llu bytes_used, %llu bytes_reserved, " 2802 "%llu bytes_pinned, %llu bytes_readonly, %llu may use " 2803 "%llu total\n", (unsigned long long)bytes, 2804 (unsigned long long)data_sinfo->bytes_delalloc, 2805 (unsigned long long)data_sinfo->bytes_used, 2806 (unsigned long long)data_sinfo->bytes_reserved, 2807 (unsigned long long)data_sinfo->bytes_pinned, 2808 (unsigned long long)data_sinfo->bytes_readonly, 2809 (unsigned long long)data_sinfo->bytes_may_use, 2810 (unsigned long long)data_sinfo->total_bytes); 2811 return -ENOSPC; 2812 } 2813 data_sinfo->bytes_may_use += bytes; 2814 BTRFS_I(inode)->reserved_bytes += bytes; 2815 spin_unlock(&data_sinfo->lock); 2816 2817 return btrfs_check_metadata_free_space(root); 2818 } 2819 2820 /* 2821 * if there was an error for whatever reason after calling 2822 * btrfs_check_data_free_space, call this so we can cleanup the counters. 2823 */ 2824 void btrfs_free_reserved_data_space(struct btrfs_root *root, 2825 struct inode *inode, u64 bytes) 2826 { 2827 struct btrfs_space_info *data_sinfo; 2828 2829 /* make sure bytes are sectorsize aligned */ 2830 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2831 2832 data_sinfo = BTRFS_I(inode)->space_info; 2833 spin_lock(&data_sinfo->lock); 2834 data_sinfo->bytes_may_use -= bytes; 2835 BTRFS_I(inode)->reserved_bytes -= bytes; 2836 spin_unlock(&data_sinfo->lock); 2837 } 2838 2839 /* called when we are adding a delalloc extent to the inode's io_tree */ 2840 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, 2841 u64 bytes) 2842 { 2843 struct btrfs_space_info *data_sinfo; 2844 2845 /* get the space info for where this inode will be storing its data */ 2846 data_sinfo = BTRFS_I(inode)->space_info; 2847 2848 /* make sure we have enough space to handle the data first */ 2849 spin_lock(&data_sinfo->lock); 2850 data_sinfo->bytes_delalloc += bytes; 2851 2852 /* 2853 * we are adding a delalloc extent without calling 2854 * btrfs_check_data_free_space first. This happens on a weird 2855 * writepage condition, but shouldn't hurt our accounting 2856 */ 2857 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { 2858 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; 2859 BTRFS_I(inode)->reserved_bytes = 0; 2860 } else { 2861 data_sinfo->bytes_may_use -= bytes; 2862 BTRFS_I(inode)->reserved_bytes -= bytes; 2863 } 2864 2865 spin_unlock(&data_sinfo->lock); 2866 } 2867 2868 /* called when we are clearing an delalloc extent from the inode's io_tree */ 2869 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 2870 u64 bytes) 2871 { 2872 struct btrfs_space_info *info; 2873 2874 info = BTRFS_I(inode)->space_info; 2875 2876 spin_lock(&info->lock); 2877 info->bytes_delalloc -= bytes; 2878 spin_unlock(&info->lock); 2879 } 2880 2881 static void force_metadata_allocation(struct btrfs_fs_info *info) 2882 { 2883 struct list_head *head = &info->space_info; 2884 struct btrfs_space_info *found; 2885 2886 rcu_read_lock(); 2887 list_for_each_entry_rcu(found, head, list) { 2888 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 2889 found->force_alloc = 1; 2890 } 2891 rcu_read_unlock(); 2892 } 2893 2894 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 2895 struct btrfs_root *extent_root, u64 alloc_bytes, 2896 u64 flags, int force) 2897 { 2898 struct btrfs_space_info *space_info; 2899 struct btrfs_fs_info *fs_info = extent_root->fs_info; 2900 u64 thresh; 2901 int ret = 0; 2902 2903 mutex_lock(&fs_info->chunk_mutex); 2904 2905 flags = btrfs_reduce_alloc_profile(extent_root, flags); 2906 2907 space_info = __find_space_info(extent_root->fs_info, flags); 2908 if (!space_info) { 2909 ret = update_space_info(extent_root->fs_info, flags, 2910 0, 0, &space_info); 2911 BUG_ON(ret); 2912 } 2913 BUG_ON(!space_info); 2914 2915 spin_lock(&space_info->lock); 2916 if (space_info->force_alloc) { 2917 force = 1; 2918 space_info->force_alloc = 0; 2919 } 2920 if (space_info->full) { 2921 spin_unlock(&space_info->lock); 2922 goto out; 2923 } 2924 2925 thresh = space_info->total_bytes - space_info->bytes_readonly; 2926 thresh = div_factor(thresh, 6); 2927 if (!force && 2928 (space_info->bytes_used + space_info->bytes_pinned + 2929 space_info->bytes_reserved + alloc_bytes) < thresh) { 2930 spin_unlock(&space_info->lock); 2931 goto out; 2932 } 2933 spin_unlock(&space_info->lock); 2934 2935 /* 2936 * if we're doing a data chunk, go ahead and make sure that 2937 * we keep a reasonable number of metadata chunks allocated in the 2938 * FS as well. 2939 */ 2940 if (flags & BTRFS_BLOCK_GROUP_DATA) { 2941 fs_info->data_chunk_allocations++; 2942 if (!(fs_info->data_chunk_allocations % 2943 fs_info->metadata_ratio)) 2944 force_metadata_allocation(fs_info); 2945 } 2946 2947 ret = btrfs_alloc_chunk(trans, extent_root, flags); 2948 if (ret) 2949 space_info->full = 1; 2950 out: 2951 mutex_unlock(&extent_root->fs_info->chunk_mutex); 2952 return ret; 2953 } 2954 2955 static int update_block_group(struct btrfs_trans_handle *trans, 2956 struct btrfs_root *root, 2957 u64 bytenr, u64 num_bytes, int alloc, 2958 int mark_free) 2959 { 2960 struct btrfs_block_group_cache *cache; 2961 struct btrfs_fs_info *info = root->fs_info; 2962 u64 total = num_bytes; 2963 u64 old_val; 2964 u64 byte_in_group; 2965 2966 /* block accounting for super block */ 2967 spin_lock(&info->delalloc_lock); 2968 old_val = btrfs_super_bytes_used(&info->super_copy); 2969 if (alloc) 2970 old_val += num_bytes; 2971 else 2972 old_val -= num_bytes; 2973 btrfs_set_super_bytes_used(&info->super_copy, old_val); 2974 2975 /* block accounting for root item */ 2976 old_val = btrfs_root_used(&root->root_item); 2977 if (alloc) 2978 old_val += num_bytes; 2979 else 2980 old_val -= num_bytes; 2981 btrfs_set_root_used(&root->root_item, old_val); 2982 spin_unlock(&info->delalloc_lock); 2983 2984 while (total) { 2985 cache = btrfs_lookup_block_group(info, bytenr); 2986 if (!cache) 2987 return -1; 2988 byte_in_group = bytenr - cache->key.objectid; 2989 WARN_ON(byte_in_group > cache->key.offset); 2990 2991 spin_lock(&cache->space_info->lock); 2992 spin_lock(&cache->lock); 2993 cache->dirty = 1; 2994 old_val = btrfs_block_group_used(&cache->item); 2995 num_bytes = min(total, cache->key.offset - byte_in_group); 2996 if (alloc) { 2997 old_val += num_bytes; 2998 cache->space_info->bytes_used += num_bytes; 2999 if (cache->ro) 3000 cache->space_info->bytes_readonly -= num_bytes; 3001 btrfs_set_block_group_used(&cache->item, old_val); 3002 spin_unlock(&cache->lock); 3003 spin_unlock(&cache->space_info->lock); 3004 } else { 3005 old_val -= num_bytes; 3006 cache->space_info->bytes_used -= num_bytes; 3007 if (cache->ro) 3008 cache->space_info->bytes_readonly += num_bytes; 3009 btrfs_set_block_group_used(&cache->item, old_val); 3010 spin_unlock(&cache->lock); 3011 spin_unlock(&cache->space_info->lock); 3012 if (mark_free) { 3013 int ret; 3014 3015 ret = btrfs_discard_extent(root, bytenr, 3016 num_bytes); 3017 WARN_ON(ret); 3018 3019 ret = btrfs_add_free_space(cache, bytenr, 3020 num_bytes); 3021 WARN_ON(ret); 3022 } 3023 } 3024 btrfs_put_block_group(cache); 3025 total -= num_bytes; 3026 bytenr += num_bytes; 3027 } 3028 return 0; 3029 } 3030 3031 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 3032 { 3033 struct btrfs_block_group_cache *cache; 3034 u64 bytenr; 3035 3036 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 3037 if (!cache) 3038 return 0; 3039 3040 bytenr = cache->key.objectid; 3041 btrfs_put_block_group(cache); 3042 3043 return bytenr; 3044 } 3045 3046 int btrfs_update_pinned_extents(struct btrfs_root *root, 3047 u64 bytenr, u64 num, int pin) 3048 { 3049 u64 len; 3050 struct btrfs_block_group_cache *cache; 3051 struct btrfs_fs_info *fs_info = root->fs_info; 3052 3053 if (pin) 3054 set_extent_dirty(&fs_info->pinned_extents, 3055 bytenr, bytenr + num - 1, GFP_NOFS); 3056 3057 while (num > 0) { 3058 cache = btrfs_lookup_block_group(fs_info, bytenr); 3059 BUG_ON(!cache); 3060 len = min(num, cache->key.offset - 3061 (bytenr - cache->key.objectid)); 3062 if (pin) { 3063 spin_lock(&cache->space_info->lock); 3064 spin_lock(&cache->lock); 3065 cache->pinned += len; 3066 cache->space_info->bytes_pinned += len; 3067 spin_unlock(&cache->lock); 3068 spin_unlock(&cache->space_info->lock); 3069 fs_info->total_pinned += len; 3070 } else { 3071 int unpin = 0; 3072 3073 /* 3074 * in order to not race with the block group caching, we 3075 * only want to unpin the extent if we are cached. If 3076 * we aren't cached, we want to start async caching this 3077 * block group so we can free the extent the next time 3078 * around. 3079 */ 3080 spin_lock(&cache->space_info->lock); 3081 spin_lock(&cache->lock); 3082 unpin = (cache->cached == BTRFS_CACHE_FINISHED); 3083 if (likely(unpin)) { 3084 cache->pinned -= len; 3085 cache->space_info->bytes_pinned -= len; 3086 fs_info->total_pinned -= len; 3087 } 3088 spin_unlock(&cache->lock); 3089 spin_unlock(&cache->space_info->lock); 3090 3091 if (likely(unpin)) 3092 clear_extent_dirty(&fs_info->pinned_extents, 3093 bytenr, bytenr + len -1, 3094 GFP_NOFS); 3095 else 3096 cache_block_group(cache); 3097 3098 if (unpin) 3099 btrfs_add_free_space(cache, bytenr, len); 3100 } 3101 btrfs_put_block_group(cache); 3102 bytenr += len; 3103 num -= len; 3104 } 3105 return 0; 3106 } 3107 3108 static int update_reserved_extents(struct btrfs_root *root, 3109 u64 bytenr, u64 num, int reserve) 3110 { 3111 u64 len; 3112 struct btrfs_block_group_cache *cache; 3113 struct btrfs_fs_info *fs_info = root->fs_info; 3114 3115 while (num > 0) { 3116 cache = btrfs_lookup_block_group(fs_info, bytenr); 3117 BUG_ON(!cache); 3118 len = min(num, cache->key.offset - 3119 (bytenr - cache->key.objectid)); 3120 3121 spin_lock(&cache->space_info->lock); 3122 spin_lock(&cache->lock); 3123 if (reserve) { 3124 cache->reserved += len; 3125 cache->space_info->bytes_reserved += len; 3126 } else { 3127 cache->reserved -= len; 3128 cache->space_info->bytes_reserved -= len; 3129 } 3130 spin_unlock(&cache->lock); 3131 spin_unlock(&cache->space_info->lock); 3132 btrfs_put_block_group(cache); 3133 bytenr += len; 3134 num -= len; 3135 } 3136 return 0; 3137 } 3138 3139 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 3140 { 3141 u64 last = 0; 3142 u64 start; 3143 u64 end; 3144 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; 3145 int ret; 3146 3147 while (1) { 3148 ret = find_first_extent_bit(pinned_extents, last, 3149 &start, &end, EXTENT_DIRTY); 3150 if (ret) 3151 break; 3152 3153 set_extent_dirty(copy, start, end, GFP_NOFS); 3154 last = end + 1; 3155 } 3156 return 0; 3157 } 3158 3159 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 3160 struct btrfs_root *root, 3161 struct extent_io_tree *unpin) 3162 { 3163 u64 start; 3164 u64 end; 3165 int ret; 3166 3167 while (1) { 3168 ret = find_first_extent_bit(unpin, 0, &start, &end, 3169 EXTENT_DIRTY); 3170 if (ret) 3171 break; 3172 3173 ret = btrfs_discard_extent(root, start, end + 1 - start); 3174 3175 /* unlocks the pinned mutex */ 3176 btrfs_update_pinned_extents(root, start, end + 1 - start, 0); 3177 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3178 3179 cond_resched(); 3180 } 3181 3182 return ret; 3183 } 3184 3185 static int pin_down_bytes(struct btrfs_trans_handle *trans, 3186 struct btrfs_root *root, 3187 struct btrfs_path *path, 3188 u64 bytenr, u64 num_bytes, int is_data, 3189 struct extent_buffer **must_clean) 3190 { 3191 int err = 0; 3192 struct extent_buffer *buf; 3193 3194 if (is_data) 3195 goto pinit; 3196 3197 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 3198 if (!buf) 3199 goto pinit; 3200 3201 /* we can reuse a block if it hasn't been written 3202 * and it is from this transaction. We can't 3203 * reuse anything from the tree log root because 3204 * it has tiny sub-transactions. 3205 */ 3206 if (btrfs_buffer_uptodate(buf, 0) && 3207 btrfs_try_tree_lock(buf)) { 3208 u64 header_owner = btrfs_header_owner(buf); 3209 u64 header_transid = btrfs_header_generation(buf); 3210 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 3211 header_transid == trans->transid && 3212 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3213 *must_clean = buf; 3214 return 1; 3215 } 3216 btrfs_tree_unlock(buf); 3217 } 3218 free_extent_buffer(buf); 3219 pinit: 3220 btrfs_set_path_blocking(path); 3221 /* unlocks the pinned mutex */ 3222 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3223 3224 BUG_ON(err < 0); 3225 return 0; 3226 } 3227 3228 3229 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 3230 struct btrfs_root *root, 3231 u64 bytenr, u64 num_bytes, u64 parent, 3232 u64 root_objectid, u64 owner_objectid, 3233 u64 owner_offset, int refs_to_drop, 3234 struct btrfs_delayed_extent_op *extent_op) 3235 { 3236 struct btrfs_key key; 3237 struct btrfs_path *path; 3238 struct btrfs_fs_info *info = root->fs_info; 3239 struct btrfs_root *extent_root = info->extent_root; 3240 struct extent_buffer *leaf; 3241 struct btrfs_extent_item *ei; 3242 struct btrfs_extent_inline_ref *iref; 3243 int ret; 3244 int is_data; 3245 int extent_slot = 0; 3246 int found_extent = 0; 3247 int num_to_del = 1; 3248 u32 item_size; 3249 u64 refs; 3250 3251 path = btrfs_alloc_path(); 3252 if (!path) 3253 return -ENOMEM; 3254 3255 path->reada = 1; 3256 path->leave_spinning = 1; 3257 3258 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 3259 BUG_ON(!is_data && refs_to_drop != 1); 3260 3261 ret = lookup_extent_backref(trans, extent_root, path, &iref, 3262 bytenr, num_bytes, parent, 3263 root_objectid, owner_objectid, 3264 owner_offset); 3265 if (ret == 0) { 3266 extent_slot = path->slots[0]; 3267 while (extent_slot >= 0) { 3268 btrfs_item_key_to_cpu(path->nodes[0], &key, 3269 extent_slot); 3270 if (key.objectid != bytenr) 3271 break; 3272 if (key.type == BTRFS_EXTENT_ITEM_KEY && 3273 key.offset == num_bytes) { 3274 found_extent = 1; 3275 break; 3276 } 3277 if (path->slots[0] - extent_slot > 5) 3278 break; 3279 extent_slot--; 3280 } 3281 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3282 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 3283 if (found_extent && item_size < sizeof(*ei)) 3284 found_extent = 0; 3285 #endif 3286 if (!found_extent) { 3287 BUG_ON(iref); 3288 ret = remove_extent_backref(trans, extent_root, path, 3289 NULL, refs_to_drop, 3290 is_data); 3291 BUG_ON(ret); 3292 btrfs_release_path(extent_root, path); 3293 path->leave_spinning = 1; 3294 3295 key.objectid = bytenr; 3296 key.type = BTRFS_EXTENT_ITEM_KEY; 3297 key.offset = num_bytes; 3298 3299 ret = btrfs_search_slot(trans, extent_root, 3300 &key, path, -1, 1); 3301 if (ret) { 3302 printk(KERN_ERR "umm, got %d back from search" 3303 ", was looking for %llu\n", ret, 3304 (unsigned long long)bytenr); 3305 btrfs_print_leaf(extent_root, path->nodes[0]); 3306 } 3307 BUG_ON(ret); 3308 extent_slot = path->slots[0]; 3309 } 3310 } else { 3311 btrfs_print_leaf(extent_root, path->nodes[0]); 3312 WARN_ON(1); 3313 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 3314 "parent %llu root %llu owner %llu offset %llu\n", 3315 (unsigned long long)bytenr, 3316 (unsigned long long)parent, 3317 (unsigned long long)root_objectid, 3318 (unsigned long long)owner_objectid, 3319 (unsigned long long)owner_offset); 3320 } 3321 3322 leaf = path->nodes[0]; 3323 item_size = btrfs_item_size_nr(leaf, extent_slot); 3324 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3325 if (item_size < sizeof(*ei)) { 3326 BUG_ON(found_extent || extent_slot != path->slots[0]); 3327 ret = convert_extent_item_v0(trans, extent_root, path, 3328 owner_objectid, 0); 3329 BUG_ON(ret < 0); 3330 3331 btrfs_release_path(extent_root, path); 3332 path->leave_spinning = 1; 3333 3334 key.objectid = bytenr; 3335 key.type = BTRFS_EXTENT_ITEM_KEY; 3336 key.offset = num_bytes; 3337 3338 ret = btrfs_search_slot(trans, extent_root, &key, path, 3339 -1, 1); 3340 if (ret) { 3341 printk(KERN_ERR "umm, got %d back from search" 3342 ", was looking for %llu\n", ret, 3343 (unsigned long long)bytenr); 3344 btrfs_print_leaf(extent_root, path->nodes[0]); 3345 } 3346 BUG_ON(ret); 3347 extent_slot = path->slots[0]; 3348 leaf = path->nodes[0]; 3349 item_size = btrfs_item_size_nr(leaf, extent_slot); 3350 } 3351 #endif 3352 BUG_ON(item_size < sizeof(*ei)); 3353 ei = btrfs_item_ptr(leaf, extent_slot, 3354 struct btrfs_extent_item); 3355 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 3356 struct btrfs_tree_block_info *bi; 3357 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 3358 bi = (struct btrfs_tree_block_info *)(ei + 1); 3359 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 3360 } 3361 3362 refs = btrfs_extent_refs(leaf, ei); 3363 BUG_ON(refs < refs_to_drop); 3364 refs -= refs_to_drop; 3365 3366 if (refs > 0) { 3367 if (extent_op) 3368 __run_delayed_extent_op(extent_op, leaf, ei); 3369 /* 3370 * In the case of inline back ref, reference count will 3371 * be updated by remove_extent_backref 3372 */ 3373 if (iref) { 3374 BUG_ON(!found_extent); 3375 } else { 3376 btrfs_set_extent_refs(leaf, ei, refs); 3377 btrfs_mark_buffer_dirty(leaf); 3378 } 3379 if (found_extent) { 3380 ret = remove_extent_backref(trans, extent_root, path, 3381 iref, refs_to_drop, 3382 is_data); 3383 BUG_ON(ret); 3384 } 3385 } else { 3386 int mark_free = 0; 3387 struct extent_buffer *must_clean = NULL; 3388 3389 if (found_extent) { 3390 BUG_ON(is_data && refs_to_drop != 3391 extent_data_ref_count(root, path, iref)); 3392 if (iref) { 3393 BUG_ON(path->slots[0] != extent_slot); 3394 } else { 3395 BUG_ON(path->slots[0] != extent_slot + 1); 3396 path->slots[0] = extent_slot; 3397 num_to_del = 2; 3398 } 3399 } 3400 3401 ret = pin_down_bytes(trans, root, path, bytenr, 3402 num_bytes, is_data, &must_clean); 3403 if (ret > 0) 3404 mark_free = 1; 3405 BUG_ON(ret < 0); 3406 /* 3407 * it is going to be very rare for someone to be waiting 3408 * on the block we're freeing. del_items might need to 3409 * schedule, so rather than get fancy, just force it 3410 * to blocking here 3411 */ 3412 if (must_clean) 3413 btrfs_set_lock_blocking(must_clean); 3414 3415 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 3416 num_to_del); 3417 BUG_ON(ret); 3418 btrfs_release_path(extent_root, path); 3419 3420 if (must_clean) { 3421 clean_tree_block(NULL, root, must_clean); 3422 btrfs_tree_unlock(must_clean); 3423 free_extent_buffer(must_clean); 3424 } 3425 3426 if (is_data) { 3427 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 3428 BUG_ON(ret); 3429 } else { 3430 invalidate_mapping_pages(info->btree_inode->i_mapping, 3431 bytenr >> PAGE_CACHE_SHIFT, 3432 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 3433 } 3434 3435 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 3436 mark_free); 3437 BUG_ON(ret); 3438 } 3439 btrfs_free_path(path); 3440 return ret; 3441 } 3442 3443 /* 3444 * when we free an extent, it is possible (and likely) that we free the last 3445 * delayed ref for that extent as well. This searches the delayed ref tree for 3446 * a given extent, and if there are no other delayed refs to be processed, it 3447 * removes it from the tree. 3448 */ 3449 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 3450 struct btrfs_root *root, u64 bytenr) 3451 { 3452 struct btrfs_delayed_ref_head *head; 3453 struct btrfs_delayed_ref_root *delayed_refs; 3454 struct btrfs_delayed_ref_node *ref; 3455 struct rb_node *node; 3456 int ret; 3457 3458 delayed_refs = &trans->transaction->delayed_refs; 3459 spin_lock(&delayed_refs->lock); 3460 head = btrfs_find_delayed_ref_head(trans, bytenr); 3461 if (!head) 3462 goto out; 3463 3464 node = rb_prev(&head->node.rb_node); 3465 if (!node) 3466 goto out; 3467 3468 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3469 3470 /* there are still entries for this ref, we can't drop it */ 3471 if (ref->bytenr == bytenr) 3472 goto out; 3473 3474 if (head->extent_op) { 3475 if (!head->must_insert_reserved) 3476 goto out; 3477 kfree(head->extent_op); 3478 head->extent_op = NULL; 3479 } 3480 3481 /* 3482 * waiting for the lock here would deadlock. If someone else has it 3483 * locked they are already in the process of dropping it anyway 3484 */ 3485 if (!mutex_trylock(&head->mutex)) 3486 goto out; 3487 3488 /* 3489 * at this point we have a head with no other entries. Go 3490 * ahead and process it. 3491 */ 3492 head->node.in_tree = 0; 3493 rb_erase(&head->node.rb_node, &delayed_refs->root); 3494 3495 delayed_refs->num_entries--; 3496 3497 /* 3498 * we don't take a ref on the node because we're removing it from the 3499 * tree, so we just steal the ref the tree was holding. 3500 */ 3501 delayed_refs->num_heads--; 3502 if (list_empty(&head->cluster)) 3503 delayed_refs->num_heads_ready--; 3504 3505 list_del_init(&head->cluster); 3506 spin_unlock(&delayed_refs->lock); 3507 3508 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 3509 &head->node, head->extent_op, 3510 head->must_insert_reserved); 3511 BUG_ON(ret); 3512 btrfs_put_delayed_ref(&head->node); 3513 return 0; 3514 out: 3515 spin_unlock(&delayed_refs->lock); 3516 return 0; 3517 } 3518 3519 int btrfs_free_extent(struct btrfs_trans_handle *trans, 3520 struct btrfs_root *root, 3521 u64 bytenr, u64 num_bytes, u64 parent, 3522 u64 root_objectid, u64 owner, u64 offset) 3523 { 3524 int ret; 3525 3526 /* 3527 * tree log blocks never actually go into the extent allocation 3528 * tree, just update pinning info and exit early. 3529 */ 3530 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 3531 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 3532 /* unlocks the pinned mutex */ 3533 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3534 update_reserved_extents(root, bytenr, num_bytes, 0); 3535 ret = 0; 3536 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 3537 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 3538 parent, root_objectid, (int)owner, 3539 BTRFS_DROP_DELAYED_REF, NULL); 3540 BUG_ON(ret); 3541 ret = check_ref_cleanup(trans, root, bytenr); 3542 BUG_ON(ret); 3543 } else { 3544 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 3545 parent, root_objectid, owner, 3546 offset, BTRFS_DROP_DELAYED_REF, NULL); 3547 BUG_ON(ret); 3548 } 3549 return ret; 3550 } 3551 3552 static u64 stripe_align(struct btrfs_root *root, u64 val) 3553 { 3554 u64 mask = ((u64)root->stripesize - 1); 3555 u64 ret = (val + mask) & ~mask; 3556 return ret; 3557 } 3558 3559 /* 3560 * when we wait for progress in the block group caching, its because 3561 * our allocation attempt failed at least once. So, we must sleep 3562 * and let some progress happen before we try again. 3563 * 3564 * This function will sleep at least once waiting for new free space to 3565 * show up, and then it will check the block group free space numbers 3566 * for our min num_bytes. Another option is to have it go ahead 3567 * and look in the rbtree for a free extent of a given size, but this 3568 * is a good start. 3569 */ 3570 static noinline int 3571 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 3572 u64 num_bytes) 3573 { 3574 DEFINE_WAIT(wait); 3575 3576 prepare_to_wait(&cache->caching_q, &wait, TASK_UNINTERRUPTIBLE); 3577 3578 if (block_group_cache_done(cache)) { 3579 finish_wait(&cache->caching_q, &wait); 3580 return 0; 3581 } 3582 schedule(); 3583 finish_wait(&cache->caching_q, &wait); 3584 3585 wait_event(cache->caching_q, block_group_cache_done(cache) || 3586 (cache->free_space >= num_bytes)); 3587 return 0; 3588 } 3589 3590 enum btrfs_loop_type { 3591 LOOP_CACHED_ONLY = 0, 3592 LOOP_CACHING_NOWAIT = 1, 3593 LOOP_CACHING_WAIT = 2, 3594 LOOP_ALLOC_CHUNK = 3, 3595 LOOP_NO_EMPTY_SIZE = 4, 3596 }; 3597 3598 /* 3599 * walks the btree of allocated extents and find a hole of a given size. 3600 * The key ins is changed to record the hole: 3601 * ins->objectid == block start 3602 * ins->flags = BTRFS_EXTENT_ITEM_KEY 3603 * ins->offset == number of blocks 3604 * Any available blocks before search_start are skipped. 3605 */ 3606 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 3607 struct btrfs_root *orig_root, 3608 u64 num_bytes, u64 empty_size, 3609 u64 search_start, u64 search_end, 3610 u64 hint_byte, struct btrfs_key *ins, 3611 u64 exclude_start, u64 exclude_nr, 3612 int data) 3613 { 3614 int ret = 0; 3615 struct btrfs_root *root = orig_root->fs_info->extent_root; 3616 struct btrfs_free_cluster *last_ptr = NULL; 3617 struct btrfs_block_group_cache *block_group = NULL; 3618 int empty_cluster = 2 * 1024 * 1024; 3619 int allowed_chunk_alloc = 0; 3620 struct btrfs_space_info *space_info; 3621 int last_ptr_loop = 0; 3622 int loop = 0; 3623 bool found_uncached_bg = false; 3624 3625 WARN_ON(num_bytes < root->sectorsize); 3626 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 3627 ins->objectid = 0; 3628 ins->offset = 0; 3629 3630 space_info = __find_space_info(root->fs_info, data); 3631 3632 if (orig_root->ref_cows || empty_size) 3633 allowed_chunk_alloc = 1; 3634 3635 if (data & BTRFS_BLOCK_GROUP_METADATA) { 3636 last_ptr = &root->fs_info->meta_alloc_cluster; 3637 if (!btrfs_test_opt(root, SSD)) 3638 empty_cluster = 64 * 1024; 3639 } 3640 3641 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 3642 last_ptr = &root->fs_info->data_alloc_cluster; 3643 } 3644 3645 if (last_ptr) { 3646 spin_lock(&last_ptr->lock); 3647 if (last_ptr->block_group) 3648 hint_byte = last_ptr->window_start; 3649 spin_unlock(&last_ptr->lock); 3650 } 3651 3652 search_start = max(search_start, first_logical_byte(root, 0)); 3653 search_start = max(search_start, hint_byte); 3654 3655 if (!last_ptr) 3656 empty_cluster = 0; 3657 3658 if (search_start == hint_byte) { 3659 block_group = btrfs_lookup_block_group(root->fs_info, 3660 search_start); 3661 /* 3662 * we don't want to use the block group if it doesn't match our 3663 * allocation bits, or if its not cached. 3664 */ 3665 if (block_group && block_group_bits(block_group, data) && 3666 block_group_cache_done(block_group)) { 3667 down_read(&space_info->groups_sem); 3668 if (list_empty(&block_group->list) || 3669 block_group->ro) { 3670 /* 3671 * someone is removing this block group, 3672 * we can't jump into the have_block_group 3673 * target because our list pointers are not 3674 * valid 3675 */ 3676 btrfs_put_block_group(block_group); 3677 up_read(&space_info->groups_sem); 3678 } else 3679 goto have_block_group; 3680 } else if (block_group) { 3681 btrfs_put_block_group(block_group); 3682 } 3683 } 3684 3685 search: 3686 down_read(&space_info->groups_sem); 3687 list_for_each_entry(block_group, &space_info->block_groups, list) { 3688 u64 offset; 3689 int cached; 3690 3691 atomic_inc(&block_group->count); 3692 search_start = block_group->key.objectid; 3693 3694 have_block_group: 3695 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 3696 /* 3697 * we want to start caching kthreads, but not too many 3698 * right off the bat so we don't overwhelm the system, 3699 * so only start them if there are less than 2 and we're 3700 * in the initial allocation phase. 3701 */ 3702 if (loop > LOOP_CACHING_NOWAIT || 3703 atomic_read(&space_info->caching_threads) < 2) { 3704 ret = cache_block_group(block_group); 3705 BUG_ON(ret); 3706 } 3707 } 3708 3709 cached = block_group_cache_done(block_group); 3710 if (unlikely(!cached)) { 3711 found_uncached_bg = true; 3712 3713 /* if we only want cached bgs, loop */ 3714 if (loop == LOOP_CACHED_ONLY) 3715 goto loop; 3716 } 3717 3718 if (unlikely(block_group->ro)) 3719 goto loop; 3720 3721 if (last_ptr) { 3722 /* 3723 * the refill lock keeps out other 3724 * people trying to start a new cluster 3725 */ 3726 spin_lock(&last_ptr->refill_lock); 3727 if (last_ptr->block_group && 3728 (last_ptr->block_group->ro || 3729 !block_group_bits(last_ptr->block_group, data))) { 3730 offset = 0; 3731 goto refill_cluster; 3732 } 3733 3734 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 3735 num_bytes, search_start); 3736 if (offset) { 3737 /* we have a block, we're done */ 3738 spin_unlock(&last_ptr->refill_lock); 3739 goto checks; 3740 } 3741 3742 spin_lock(&last_ptr->lock); 3743 /* 3744 * whoops, this cluster doesn't actually point to 3745 * this block group. Get a ref on the block 3746 * group is does point to and try again 3747 */ 3748 if (!last_ptr_loop && last_ptr->block_group && 3749 last_ptr->block_group != block_group) { 3750 3751 btrfs_put_block_group(block_group); 3752 block_group = last_ptr->block_group; 3753 atomic_inc(&block_group->count); 3754 spin_unlock(&last_ptr->lock); 3755 spin_unlock(&last_ptr->refill_lock); 3756 3757 last_ptr_loop = 1; 3758 search_start = block_group->key.objectid; 3759 /* 3760 * we know this block group is properly 3761 * in the list because 3762 * btrfs_remove_block_group, drops the 3763 * cluster before it removes the block 3764 * group from the list 3765 */ 3766 goto have_block_group; 3767 } 3768 spin_unlock(&last_ptr->lock); 3769 refill_cluster: 3770 /* 3771 * this cluster didn't work out, free it and 3772 * start over 3773 */ 3774 btrfs_return_cluster_to_free_space(NULL, last_ptr); 3775 3776 last_ptr_loop = 0; 3777 3778 /* allocate a cluster in this block group */ 3779 ret = btrfs_find_space_cluster(trans, root, 3780 block_group, last_ptr, 3781 offset, num_bytes, 3782 empty_cluster + empty_size); 3783 if (ret == 0) { 3784 /* 3785 * now pull our allocation out of this 3786 * cluster 3787 */ 3788 offset = btrfs_alloc_from_cluster(block_group, 3789 last_ptr, num_bytes, 3790 search_start); 3791 if (offset) { 3792 /* we found one, proceed */ 3793 spin_unlock(&last_ptr->refill_lock); 3794 goto checks; 3795 } 3796 } else if (!cached && loop > LOOP_CACHING_NOWAIT) { 3797 spin_unlock(&last_ptr->refill_lock); 3798 3799 wait_block_group_cache_progress(block_group, 3800 num_bytes + empty_cluster + empty_size); 3801 goto have_block_group; 3802 } 3803 3804 /* 3805 * at this point we either didn't find a cluster 3806 * or we weren't able to allocate a block from our 3807 * cluster. Free the cluster we've been trying 3808 * to use, and go to the next block group 3809 */ 3810 if (loop < LOOP_NO_EMPTY_SIZE) { 3811 btrfs_return_cluster_to_free_space(NULL, 3812 last_ptr); 3813 spin_unlock(&last_ptr->refill_lock); 3814 goto loop; 3815 } 3816 spin_unlock(&last_ptr->refill_lock); 3817 } 3818 3819 offset = btrfs_find_space_for_alloc(block_group, search_start, 3820 num_bytes, empty_size); 3821 if (!offset && (cached || (!cached && 3822 loop == LOOP_CACHING_NOWAIT))) { 3823 goto loop; 3824 } else if (!offset && (!cached && 3825 loop > LOOP_CACHING_NOWAIT)) { 3826 wait_block_group_cache_progress(block_group, 3827 num_bytes + empty_size); 3828 goto have_block_group; 3829 } 3830 checks: 3831 search_start = stripe_align(root, offset); 3832 /* move on to the next group */ 3833 if (search_start + num_bytes >= search_end) { 3834 btrfs_add_free_space(block_group, offset, num_bytes); 3835 goto loop; 3836 } 3837 3838 /* move on to the next group */ 3839 if (search_start + num_bytes > 3840 block_group->key.objectid + block_group->key.offset) { 3841 btrfs_add_free_space(block_group, offset, num_bytes); 3842 goto loop; 3843 } 3844 3845 if (exclude_nr > 0 && 3846 (search_start + num_bytes > exclude_start && 3847 search_start < exclude_start + exclude_nr)) { 3848 search_start = exclude_start + exclude_nr; 3849 3850 btrfs_add_free_space(block_group, offset, num_bytes); 3851 /* 3852 * if search_start is still in this block group 3853 * then we just re-search this block group 3854 */ 3855 if (search_start >= block_group->key.objectid && 3856 search_start < (block_group->key.objectid + 3857 block_group->key.offset)) 3858 goto have_block_group; 3859 goto loop; 3860 } 3861 3862 ins->objectid = search_start; 3863 ins->offset = num_bytes; 3864 3865 if (offset < search_start) 3866 btrfs_add_free_space(block_group, offset, 3867 search_start - offset); 3868 BUG_ON(offset > search_start); 3869 3870 /* we are all good, lets return */ 3871 break; 3872 loop: 3873 btrfs_put_block_group(block_group); 3874 } 3875 up_read(&space_info->groups_sem); 3876 3877 /* LOOP_CACHED_ONLY, only search fully cached block groups 3878 * LOOP_CACHING_NOWAIT, search partially cached block groups, but 3879 * dont wait foR them to finish caching 3880 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 3881 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 3882 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 3883 * again 3884 */ 3885 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 3886 (found_uncached_bg || empty_size || empty_cluster || 3887 allowed_chunk_alloc)) { 3888 if (found_uncached_bg) { 3889 found_uncached_bg = false; 3890 if (loop < LOOP_CACHING_WAIT) { 3891 loop++; 3892 goto search; 3893 } 3894 } 3895 3896 if (loop == LOOP_ALLOC_CHUNK) { 3897 empty_size = 0; 3898 empty_cluster = 0; 3899 } 3900 3901 if (allowed_chunk_alloc) { 3902 ret = do_chunk_alloc(trans, root, num_bytes + 3903 2 * 1024 * 1024, data, 1); 3904 allowed_chunk_alloc = 0; 3905 } else { 3906 space_info->force_alloc = 1; 3907 } 3908 3909 if (loop < LOOP_NO_EMPTY_SIZE) { 3910 loop++; 3911 goto search; 3912 } 3913 ret = -ENOSPC; 3914 } else if (!ins->objectid) { 3915 ret = -ENOSPC; 3916 } 3917 3918 /* we found what we needed */ 3919 if (ins->objectid) { 3920 if (!(data & BTRFS_BLOCK_GROUP_DATA)) 3921 trans->block_group = block_group->key.objectid; 3922 3923 btrfs_put_block_group(block_group); 3924 ret = 0; 3925 } 3926 3927 return ret; 3928 } 3929 3930 static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 3931 { 3932 struct btrfs_block_group_cache *cache; 3933 3934 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 3935 (unsigned long long)(info->total_bytes - info->bytes_used - 3936 info->bytes_pinned - info->bytes_reserved), 3937 (info->full) ? "" : "not "); 3938 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 3939 " may_use=%llu, used=%llu\n", 3940 (unsigned long long)info->total_bytes, 3941 (unsigned long long)info->bytes_pinned, 3942 (unsigned long long)info->bytes_delalloc, 3943 (unsigned long long)info->bytes_may_use, 3944 (unsigned long long)info->bytes_used); 3945 3946 down_read(&info->groups_sem); 3947 list_for_each_entry(cache, &info->block_groups, list) { 3948 spin_lock(&cache->lock); 3949 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 3950 "%llu pinned %llu reserved\n", 3951 (unsigned long long)cache->key.objectid, 3952 (unsigned long long)cache->key.offset, 3953 (unsigned long long)btrfs_block_group_used(&cache->item), 3954 (unsigned long long)cache->pinned, 3955 (unsigned long long)cache->reserved); 3956 btrfs_dump_free_space(cache, bytes); 3957 spin_unlock(&cache->lock); 3958 } 3959 up_read(&info->groups_sem); 3960 } 3961 3962 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3963 struct btrfs_root *root, 3964 u64 num_bytes, u64 min_alloc_size, 3965 u64 empty_size, u64 hint_byte, 3966 u64 search_end, struct btrfs_key *ins, 3967 u64 data) 3968 { 3969 int ret; 3970 u64 search_start = 0; 3971 struct btrfs_fs_info *info = root->fs_info; 3972 3973 data = btrfs_get_alloc_profile(root, data); 3974 again: 3975 /* 3976 * the only place that sets empty_size is btrfs_realloc_node, which 3977 * is not called recursively on allocations 3978 */ 3979 if (empty_size || root->ref_cows) { 3980 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { 3981 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3982 2 * 1024 * 1024, 3983 BTRFS_BLOCK_GROUP_METADATA | 3984 (info->metadata_alloc_profile & 3985 info->avail_metadata_alloc_bits), 0); 3986 } 3987 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3988 num_bytes + 2 * 1024 * 1024, data, 0); 3989 } 3990 3991 WARN_ON(num_bytes < root->sectorsize); 3992 ret = find_free_extent(trans, root, num_bytes, empty_size, 3993 search_start, search_end, hint_byte, ins, 3994 trans->alloc_exclude_start, 3995 trans->alloc_exclude_nr, data); 3996 3997 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 3998 num_bytes = num_bytes >> 1; 3999 num_bytes = num_bytes & ~(root->sectorsize - 1); 4000 num_bytes = max(num_bytes, min_alloc_size); 4001 do_chunk_alloc(trans, root->fs_info->extent_root, 4002 num_bytes, data, 1); 4003 goto again; 4004 } 4005 if (ret == -ENOSPC) { 4006 struct btrfs_space_info *sinfo; 4007 4008 sinfo = __find_space_info(root->fs_info, data); 4009 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4010 "wanted %llu\n", (unsigned long long)data, 4011 (unsigned long long)num_bytes); 4012 dump_space_info(sinfo, num_bytes); 4013 } 4014 4015 return ret; 4016 } 4017 4018 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 4019 { 4020 struct btrfs_block_group_cache *cache; 4021 int ret = 0; 4022 4023 cache = btrfs_lookup_block_group(root->fs_info, start); 4024 if (!cache) { 4025 printk(KERN_ERR "Unable to find block group for %llu\n", 4026 (unsigned long long)start); 4027 return -ENOSPC; 4028 } 4029 4030 ret = btrfs_discard_extent(root, start, len); 4031 4032 btrfs_add_free_space(cache, start, len); 4033 btrfs_put_block_group(cache); 4034 update_reserved_extents(root, start, len, 0); 4035 4036 return ret; 4037 } 4038 4039 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 4040 struct btrfs_root *root, 4041 u64 num_bytes, u64 min_alloc_size, 4042 u64 empty_size, u64 hint_byte, 4043 u64 search_end, struct btrfs_key *ins, 4044 u64 data) 4045 { 4046 int ret; 4047 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 4048 empty_size, hint_byte, search_end, ins, 4049 data); 4050 if (!ret) 4051 update_reserved_extents(root, ins->objectid, ins->offset, 1); 4052 4053 return ret; 4054 } 4055 4056 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4057 struct btrfs_root *root, 4058 u64 parent, u64 root_objectid, 4059 u64 flags, u64 owner, u64 offset, 4060 struct btrfs_key *ins, int ref_mod) 4061 { 4062 int ret; 4063 struct btrfs_fs_info *fs_info = root->fs_info; 4064 struct btrfs_extent_item *extent_item; 4065 struct btrfs_extent_inline_ref *iref; 4066 struct btrfs_path *path; 4067 struct extent_buffer *leaf; 4068 int type; 4069 u32 size; 4070 4071 if (parent > 0) 4072 type = BTRFS_SHARED_DATA_REF_KEY; 4073 else 4074 type = BTRFS_EXTENT_DATA_REF_KEY; 4075 4076 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 4077 4078 path = btrfs_alloc_path(); 4079 BUG_ON(!path); 4080 4081 path->leave_spinning = 1; 4082 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4083 ins, size); 4084 BUG_ON(ret); 4085 4086 leaf = path->nodes[0]; 4087 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4088 struct btrfs_extent_item); 4089 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 4090 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4091 btrfs_set_extent_flags(leaf, extent_item, 4092 flags | BTRFS_EXTENT_FLAG_DATA); 4093 4094 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 4095 btrfs_set_extent_inline_ref_type(leaf, iref, type); 4096 if (parent > 0) { 4097 struct btrfs_shared_data_ref *ref; 4098 ref = (struct btrfs_shared_data_ref *)(iref + 1); 4099 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 4100 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 4101 } else { 4102 struct btrfs_extent_data_ref *ref; 4103 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 4104 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 4105 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 4106 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 4107 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 4108 } 4109 4110 btrfs_mark_buffer_dirty(path->nodes[0]); 4111 btrfs_free_path(path); 4112 4113 ret = update_block_group(trans, root, ins->objectid, ins->offset, 4114 1, 0); 4115 if (ret) { 4116 printk(KERN_ERR "btrfs update block group failed for %llu " 4117 "%llu\n", (unsigned long long)ins->objectid, 4118 (unsigned long long)ins->offset); 4119 BUG(); 4120 } 4121 return ret; 4122 } 4123 4124 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 4125 struct btrfs_root *root, 4126 u64 parent, u64 root_objectid, 4127 u64 flags, struct btrfs_disk_key *key, 4128 int level, struct btrfs_key *ins) 4129 { 4130 int ret; 4131 struct btrfs_fs_info *fs_info = root->fs_info; 4132 struct btrfs_extent_item *extent_item; 4133 struct btrfs_tree_block_info *block_info; 4134 struct btrfs_extent_inline_ref *iref; 4135 struct btrfs_path *path; 4136 struct extent_buffer *leaf; 4137 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 4138 4139 path = btrfs_alloc_path(); 4140 BUG_ON(!path); 4141 4142 path->leave_spinning = 1; 4143 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4144 ins, size); 4145 BUG_ON(ret); 4146 4147 leaf = path->nodes[0]; 4148 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4149 struct btrfs_extent_item); 4150 btrfs_set_extent_refs(leaf, extent_item, 1); 4151 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4152 btrfs_set_extent_flags(leaf, extent_item, 4153 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 4154 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 4155 4156 btrfs_set_tree_block_key(leaf, block_info, key); 4157 btrfs_set_tree_block_level(leaf, block_info, level); 4158 4159 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 4160 if (parent > 0) { 4161 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4162 btrfs_set_extent_inline_ref_type(leaf, iref, 4163 BTRFS_SHARED_BLOCK_REF_KEY); 4164 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 4165 } else { 4166 btrfs_set_extent_inline_ref_type(leaf, iref, 4167 BTRFS_TREE_BLOCK_REF_KEY); 4168 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 4169 } 4170 4171 btrfs_mark_buffer_dirty(leaf); 4172 btrfs_free_path(path); 4173 4174 ret = update_block_group(trans, root, ins->objectid, ins->offset, 4175 1, 0); 4176 if (ret) { 4177 printk(KERN_ERR "btrfs update block group failed for %llu " 4178 "%llu\n", (unsigned long long)ins->objectid, 4179 (unsigned long long)ins->offset); 4180 BUG(); 4181 } 4182 return ret; 4183 } 4184 4185 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4186 struct btrfs_root *root, 4187 u64 root_objectid, u64 owner, 4188 u64 offset, struct btrfs_key *ins) 4189 { 4190 int ret; 4191 4192 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); 4193 4194 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 4195 0, root_objectid, owner, offset, 4196 BTRFS_ADD_DELAYED_EXTENT, NULL); 4197 return ret; 4198 } 4199 4200 /* 4201 * this is used by the tree logging recovery code. It records that 4202 * an extent has been allocated and makes sure to clear the free 4203 * space cache bits as well 4204 */ 4205 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 4206 struct btrfs_root *root, 4207 u64 root_objectid, u64 owner, u64 offset, 4208 struct btrfs_key *ins) 4209 { 4210 int ret; 4211 struct btrfs_block_group_cache *block_group; 4212 4213 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4214 cache_block_group(block_group); 4215 wait_event(block_group->caching_q, 4216 block_group_cache_done(block_group)); 4217 4218 ret = btrfs_remove_free_space(block_group, ins->objectid, 4219 ins->offset); 4220 BUG_ON(ret); 4221 btrfs_put_block_group(block_group); 4222 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 4223 0, owner, offset, ins, 1); 4224 return ret; 4225 } 4226 4227 /* 4228 * finds a free extent and does all the dirty work required for allocation 4229 * returns the key for the extent through ins, and a tree buffer for 4230 * the first block of the extent through buf. 4231 * 4232 * returns 0 if everything worked, non-zero otherwise. 4233 */ 4234 static int alloc_tree_block(struct btrfs_trans_handle *trans, 4235 struct btrfs_root *root, 4236 u64 num_bytes, u64 parent, u64 root_objectid, 4237 struct btrfs_disk_key *key, int level, 4238 u64 empty_size, u64 hint_byte, u64 search_end, 4239 struct btrfs_key *ins) 4240 { 4241 int ret; 4242 u64 flags = 0; 4243 4244 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4245 empty_size, hint_byte, search_end, 4246 ins, 0); 4247 if (ret) 4248 return ret; 4249 4250 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4251 if (parent == 0) 4252 parent = ins->objectid; 4253 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 4254 } else 4255 BUG_ON(parent > 0); 4256 4257 update_reserved_extents(root, ins->objectid, ins->offset, 1); 4258 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4259 struct btrfs_delayed_extent_op *extent_op; 4260 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 4261 BUG_ON(!extent_op); 4262 if (key) 4263 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 4264 else 4265 memset(&extent_op->key, 0, sizeof(extent_op->key)); 4266 extent_op->flags_to_set = flags; 4267 extent_op->update_key = 1; 4268 extent_op->update_flags = 1; 4269 extent_op->is_data = 0; 4270 4271 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, 4272 ins->offset, parent, root_objectid, 4273 level, BTRFS_ADD_DELAYED_EXTENT, 4274 extent_op); 4275 BUG_ON(ret); 4276 } 4277 return ret; 4278 } 4279 4280 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 4281 struct btrfs_root *root, 4282 u64 bytenr, u32 blocksize, 4283 int level) 4284 { 4285 struct extent_buffer *buf; 4286 4287 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 4288 if (!buf) 4289 return ERR_PTR(-ENOMEM); 4290 btrfs_set_header_generation(buf, trans->transid); 4291 btrfs_set_buffer_lockdep_class(buf, level); 4292 btrfs_tree_lock(buf); 4293 clean_tree_block(trans, root, buf); 4294 4295 btrfs_set_lock_blocking(buf); 4296 btrfs_set_buffer_uptodate(buf); 4297 4298 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4299 set_extent_dirty(&root->dirty_log_pages, buf->start, 4300 buf->start + buf->len - 1, GFP_NOFS); 4301 } else { 4302 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4303 buf->start + buf->len - 1, GFP_NOFS); 4304 } 4305 trans->blocks_used++; 4306 /* this returns a buffer locked for blocking */ 4307 return buf; 4308 } 4309 4310 /* 4311 * helper function to allocate a block for a given tree 4312 * returns the tree buffer or NULL. 4313 */ 4314 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 4315 struct btrfs_root *root, u32 blocksize, 4316 u64 parent, u64 root_objectid, 4317 struct btrfs_disk_key *key, int level, 4318 u64 hint, u64 empty_size) 4319 { 4320 struct btrfs_key ins; 4321 int ret; 4322 struct extent_buffer *buf; 4323 4324 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, 4325 key, level, empty_size, hint, (u64)-1, &ins); 4326 if (ret) { 4327 BUG_ON(ret > 0); 4328 return ERR_PTR(ret); 4329 } 4330 4331 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 4332 blocksize, level); 4333 return buf; 4334 } 4335 4336 #if 0 4337 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4338 struct btrfs_root *root, struct extent_buffer *leaf) 4339 { 4340 u64 disk_bytenr; 4341 u64 num_bytes; 4342 struct btrfs_key key; 4343 struct btrfs_file_extent_item *fi; 4344 u32 nritems; 4345 int i; 4346 int ret; 4347 4348 BUG_ON(!btrfs_is_leaf(leaf)); 4349 nritems = btrfs_header_nritems(leaf); 4350 4351 for (i = 0; i < nritems; i++) { 4352 cond_resched(); 4353 btrfs_item_key_to_cpu(leaf, &key, i); 4354 4355 /* only extents have references, skip everything else */ 4356 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 4357 continue; 4358 4359 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 4360 4361 /* inline extents live in the btree, they don't have refs */ 4362 if (btrfs_file_extent_type(leaf, fi) == 4363 BTRFS_FILE_EXTENT_INLINE) 4364 continue; 4365 4366 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 4367 4368 /* holes don't have refs */ 4369 if (disk_bytenr == 0) 4370 continue; 4371 4372 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 4373 ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, 4374 leaf->start, 0, key.objectid, 0); 4375 BUG_ON(ret); 4376 } 4377 return 0; 4378 } 4379 4380 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 4381 struct btrfs_root *root, 4382 struct btrfs_leaf_ref *ref) 4383 { 4384 int i; 4385 int ret; 4386 struct btrfs_extent_info *info; 4387 struct refsort *sorted; 4388 4389 if (ref->nritems == 0) 4390 return 0; 4391 4392 sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); 4393 for (i = 0; i < ref->nritems; i++) { 4394 sorted[i].bytenr = ref->extents[i].bytenr; 4395 sorted[i].slot = i; 4396 } 4397 sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); 4398 4399 /* 4400 * the items in the ref were sorted when the ref was inserted 4401 * into the ref cache, so this is already in order 4402 */ 4403 for (i = 0; i < ref->nritems; i++) { 4404 info = ref->extents + sorted[i].slot; 4405 ret = btrfs_free_extent(trans, root, info->bytenr, 4406 info->num_bytes, ref->bytenr, 4407 ref->owner, ref->generation, 4408 info->objectid, 0); 4409 4410 atomic_inc(&root->fs_info->throttle_gen); 4411 wake_up(&root->fs_info->transaction_throttle); 4412 cond_resched(); 4413 4414 BUG_ON(ret); 4415 info++; 4416 } 4417 4418 kfree(sorted); 4419 return 0; 4420 } 4421 4422 4423 static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, 4424 struct btrfs_root *root, u64 start, 4425 u64 len, u32 *refs) 4426 { 4427 int ret; 4428 4429 ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); 4430 BUG_ON(ret); 4431 4432 #if 0 /* some debugging code in case we see problems here */ 4433 /* if the refs count is one, it won't get increased again. But 4434 * if the ref count is > 1, someone may be decreasing it at 4435 * the same time we are. 4436 */ 4437 if (*refs != 1) { 4438 struct extent_buffer *eb = NULL; 4439 eb = btrfs_find_create_tree_block(root, start, len); 4440 if (eb) 4441 btrfs_tree_lock(eb); 4442 4443 mutex_lock(&root->fs_info->alloc_mutex); 4444 ret = lookup_extent_ref(NULL, root, start, len, refs); 4445 BUG_ON(ret); 4446 mutex_unlock(&root->fs_info->alloc_mutex); 4447 4448 if (eb) { 4449 btrfs_tree_unlock(eb); 4450 free_extent_buffer(eb); 4451 } 4452 if (*refs == 1) { 4453 printk(KERN_ERR "btrfs block %llu went down to one " 4454 "during drop_snap\n", (unsigned long long)start); 4455 } 4456 4457 } 4458 #endif 4459 4460 cond_resched(); 4461 return ret; 4462 } 4463 4464 4465 /* 4466 * this is used while deleting old snapshots, and it drops the refs 4467 * on a whole subtree starting from a level 1 node. 4468 * 4469 * The idea is to sort all the leaf pointers, and then drop the 4470 * ref on all the leaves in order. Most of the time the leaves 4471 * will have ref cache entries, so no leaf IOs will be required to 4472 * find the extents they have references on. 4473 * 4474 * For each leaf, any references it has are also dropped in order 4475 * 4476 * This ends up dropping the references in something close to optimal 4477 * order for reading and modifying the extent allocation tree. 4478 */ 4479 static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, 4480 struct btrfs_root *root, 4481 struct btrfs_path *path) 4482 { 4483 u64 bytenr; 4484 u64 root_owner; 4485 u64 root_gen; 4486 struct extent_buffer *eb = path->nodes[1]; 4487 struct extent_buffer *leaf; 4488 struct btrfs_leaf_ref *ref; 4489 struct refsort *sorted = NULL; 4490 int nritems = btrfs_header_nritems(eb); 4491 int ret; 4492 int i; 4493 int refi = 0; 4494 int slot = path->slots[1]; 4495 u32 blocksize = btrfs_level_size(root, 0); 4496 u32 refs; 4497 4498 if (nritems == 0) 4499 goto out; 4500 4501 root_owner = btrfs_header_owner(eb); 4502 root_gen = btrfs_header_generation(eb); 4503 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); 4504 4505 /* 4506 * step one, sort all the leaf pointers so we don't scribble 4507 * randomly into the extent allocation tree 4508 */ 4509 for (i = slot; i < nritems; i++) { 4510 sorted[refi].bytenr = btrfs_node_blockptr(eb, i); 4511 sorted[refi].slot = i; 4512 refi++; 4513 } 4514 4515 /* 4516 * nritems won't be zero, but if we're picking up drop_snapshot 4517 * after a crash, slot might be > 0, so double check things 4518 * just in case. 4519 */ 4520 if (refi == 0) 4521 goto out; 4522 4523 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 4524 4525 /* 4526 * the first loop frees everything the leaves point to 4527 */ 4528 for (i = 0; i < refi; i++) { 4529 u64 ptr_gen; 4530 4531 bytenr = sorted[i].bytenr; 4532 4533 /* 4534 * check the reference count on this leaf. If it is > 1 4535 * we just decrement it below and don't update any 4536 * of the refs the leaf points to. 4537 */ 4538 ret = drop_snap_lookup_refcount(trans, root, bytenr, 4539 blocksize, &refs); 4540 BUG_ON(ret); 4541 if (refs != 1) 4542 continue; 4543 4544 ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); 4545 4546 /* 4547 * the leaf only had one reference, which means the 4548 * only thing pointing to this leaf is the snapshot 4549 * we're deleting. It isn't possible for the reference 4550 * count to increase again later 4551 * 4552 * The reference cache is checked for the leaf, 4553 * and if found we'll be able to drop any refs held by 4554 * the leaf without needing to read it in. 4555 */ 4556 ref = btrfs_lookup_leaf_ref(root, bytenr); 4557 if (ref && ref->generation != ptr_gen) { 4558 btrfs_free_leaf_ref(root, ref); 4559 ref = NULL; 4560 } 4561 if (ref) { 4562 ret = cache_drop_leaf_ref(trans, root, ref); 4563 BUG_ON(ret); 4564 btrfs_remove_leaf_ref(root, ref); 4565 btrfs_free_leaf_ref(root, ref); 4566 } else { 4567 /* 4568 * the leaf wasn't in the reference cache, so 4569 * we have to read it. 4570 */ 4571 leaf = read_tree_block(root, bytenr, blocksize, 4572 ptr_gen); 4573 ret = btrfs_drop_leaf_ref(trans, root, leaf); 4574 BUG_ON(ret); 4575 free_extent_buffer(leaf); 4576 } 4577 atomic_inc(&root->fs_info->throttle_gen); 4578 wake_up(&root->fs_info->transaction_throttle); 4579 cond_resched(); 4580 } 4581 4582 /* 4583 * run through the loop again to free the refs on the leaves. 4584 * This is faster than doing it in the loop above because 4585 * the leaves are likely to be clustered together. We end up 4586 * working in nice chunks on the extent allocation tree. 4587 */ 4588 for (i = 0; i < refi; i++) { 4589 bytenr = sorted[i].bytenr; 4590 ret = btrfs_free_extent(trans, root, bytenr, 4591 blocksize, eb->start, 4592 root_owner, root_gen, 0, 1); 4593 BUG_ON(ret); 4594 4595 atomic_inc(&root->fs_info->throttle_gen); 4596 wake_up(&root->fs_info->transaction_throttle); 4597 cond_resched(); 4598 } 4599 out: 4600 kfree(sorted); 4601 4602 /* 4603 * update the path to show we've processed the entire level 1 4604 * node. This will get saved into the root's drop_snapshot_progress 4605 * field so these drops are not repeated again if this transaction 4606 * commits. 4607 */ 4608 path->slots[1] = nritems; 4609 return 0; 4610 } 4611 4612 /* 4613 * helper function for drop_snapshot, this walks down the tree dropping ref 4614 * counts as it goes. 4615 */ 4616 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4617 struct btrfs_root *root, 4618 struct btrfs_path *path, int *level) 4619 { 4620 u64 root_owner; 4621 u64 root_gen; 4622 u64 bytenr; 4623 u64 ptr_gen; 4624 struct extent_buffer *next; 4625 struct extent_buffer *cur; 4626 struct extent_buffer *parent; 4627 u32 blocksize; 4628 int ret; 4629 u32 refs; 4630 4631 WARN_ON(*level < 0); 4632 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4633 ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, 4634 path->nodes[*level]->len, &refs); 4635 BUG_ON(ret); 4636 if (refs > 1) 4637 goto out; 4638 4639 /* 4640 * walk down to the last node level and free all the leaves 4641 */ 4642 while (*level >= 0) { 4643 WARN_ON(*level < 0); 4644 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4645 cur = path->nodes[*level]; 4646 4647 if (btrfs_header_level(cur) != *level) 4648 WARN_ON(1); 4649 4650 if (path->slots[*level] >= 4651 btrfs_header_nritems(cur)) 4652 break; 4653 4654 /* the new code goes down to level 1 and does all the 4655 * leaves pointed to that node in bulk. So, this check 4656 * for level 0 will always be false. 4657 * 4658 * But, the disk format allows the drop_snapshot_progress 4659 * field in the root to leave things in a state where 4660 * a leaf will need cleaning up here. If someone crashes 4661 * with the old code and then boots with the new code, 4662 * we might find a leaf here. 4663 */ 4664 if (*level == 0) { 4665 ret = btrfs_drop_leaf_ref(trans, root, cur); 4666 BUG_ON(ret); 4667 break; 4668 } 4669 4670 /* 4671 * once we get to level one, process the whole node 4672 * at once, including everything below it. 4673 */ 4674 if (*level == 1) { 4675 ret = drop_level_one_refs(trans, root, path); 4676 BUG_ON(ret); 4677 break; 4678 } 4679 4680 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 4681 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 4682 blocksize = btrfs_level_size(root, *level - 1); 4683 4684 ret = drop_snap_lookup_refcount(trans, root, bytenr, 4685 blocksize, &refs); 4686 BUG_ON(ret); 4687 4688 /* 4689 * if there is more than one reference, we don't need 4690 * to read that node to drop any references it has. We 4691 * just drop the ref we hold on that node and move on to the 4692 * next slot in this level. 4693 */ 4694 if (refs != 1) { 4695 parent = path->nodes[*level]; 4696 root_owner = btrfs_header_owner(parent); 4697 root_gen = btrfs_header_generation(parent); 4698 path->slots[*level]++; 4699 4700 ret = btrfs_free_extent(trans, root, bytenr, 4701 blocksize, parent->start, 4702 root_owner, root_gen, 4703 *level - 1, 1); 4704 BUG_ON(ret); 4705 4706 atomic_inc(&root->fs_info->throttle_gen); 4707 wake_up(&root->fs_info->transaction_throttle); 4708 cond_resched(); 4709 4710 continue; 4711 } 4712 4713 /* 4714 * we need to keep freeing things in the next level down. 4715 * read the block and loop around to process it 4716 */ 4717 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4718 WARN_ON(*level <= 0); 4719 if (path->nodes[*level-1]) 4720 free_extent_buffer(path->nodes[*level-1]); 4721 path->nodes[*level-1] = next; 4722 *level = btrfs_header_level(next); 4723 path->slots[*level] = 0; 4724 cond_resched(); 4725 } 4726 out: 4727 WARN_ON(*level < 0); 4728 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4729 4730 if (path->nodes[*level] == root->node) { 4731 parent = path->nodes[*level]; 4732 bytenr = path->nodes[*level]->start; 4733 } else { 4734 parent = path->nodes[*level + 1]; 4735 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); 4736 } 4737 4738 blocksize = btrfs_level_size(root, *level); 4739 root_owner = btrfs_header_owner(parent); 4740 root_gen = btrfs_header_generation(parent); 4741 4742 /* 4743 * cleanup and free the reference on the last node 4744 * we processed 4745 */ 4746 ret = btrfs_free_extent(trans, root, bytenr, blocksize, 4747 parent->start, root_owner, root_gen, 4748 *level, 1); 4749 free_extent_buffer(path->nodes[*level]); 4750 path->nodes[*level] = NULL; 4751 4752 *level += 1; 4753 BUG_ON(ret); 4754 4755 cond_resched(); 4756 return 0; 4757 } 4758 #endif 4759 4760 struct walk_control { 4761 u64 refs[BTRFS_MAX_LEVEL]; 4762 u64 flags[BTRFS_MAX_LEVEL]; 4763 struct btrfs_key update_progress; 4764 int stage; 4765 int level; 4766 int shared_level; 4767 int update_ref; 4768 int keep_locks; 4769 }; 4770 4771 #define DROP_REFERENCE 1 4772 #define UPDATE_BACKREF 2 4773 4774 /* 4775 * hepler to process tree block while walking down the tree. 4776 * 4777 * when wc->stage == DROP_REFERENCE, this function checks 4778 * reference count of the block. if the block is shared and 4779 * we need update back refs for the subtree rooted at the 4780 * block, this function changes wc->stage to UPDATE_BACKREF 4781 * 4782 * when wc->stage == UPDATE_BACKREF, this function updates 4783 * back refs for pointers in the block. 4784 * 4785 * NOTE: return value 1 means we should stop walking down. 4786 */ 4787 static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 4788 struct btrfs_root *root, 4789 struct btrfs_path *path, 4790 struct walk_control *wc) 4791 { 4792 int level = wc->level; 4793 struct extent_buffer *eb = path->nodes[level]; 4794 struct btrfs_key key; 4795 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 4796 int ret; 4797 4798 if (wc->stage == UPDATE_BACKREF && 4799 btrfs_header_owner(eb) != root->root_key.objectid) 4800 return 1; 4801 4802 /* 4803 * when reference count of tree block is 1, it won't increase 4804 * again. once full backref flag is set, we never clear it. 4805 */ 4806 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 4807 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { 4808 BUG_ON(!path->locks[level]); 4809 ret = btrfs_lookup_extent_info(trans, root, 4810 eb->start, eb->len, 4811 &wc->refs[level], 4812 &wc->flags[level]); 4813 BUG_ON(ret); 4814 BUG_ON(wc->refs[level] == 0); 4815 } 4816 4817 if (wc->stage == DROP_REFERENCE && 4818 wc->update_ref && wc->refs[level] > 1) { 4819 BUG_ON(eb == root->node); 4820 BUG_ON(path->slots[level] > 0); 4821 if (level == 0) 4822 btrfs_item_key_to_cpu(eb, &key, path->slots[level]); 4823 else 4824 btrfs_node_key_to_cpu(eb, &key, path->slots[level]); 4825 if (btrfs_header_owner(eb) == root->root_key.objectid && 4826 btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { 4827 wc->stage = UPDATE_BACKREF; 4828 wc->shared_level = level; 4829 } 4830 } 4831 4832 if (wc->stage == DROP_REFERENCE) { 4833 if (wc->refs[level] > 1) 4834 return 1; 4835 4836 if (path->locks[level] && !wc->keep_locks) { 4837 btrfs_tree_unlock(eb); 4838 path->locks[level] = 0; 4839 } 4840 return 0; 4841 } 4842 4843 /* wc->stage == UPDATE_BACKREF */ 4844 if (!(wc->flags[level] & flag)) { 4845 BUG_ON(!path->locks[level]); 4846 ret = btrfs_inc_ref(trans, root, eb, 1); 4847 BUG_ON(ret); 4848 ret = btrfs_dec_ref(trans, root, eb, 0); 4849 BUG_ON(ret); 4850 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 4851 eb->len, flag, 0); 4852 BUG_ON(ret); 4853 wc->flags[level] |= flag; 4854 } 4855 4856 /* 4857 * the block is shared by multiple trees, so it's not good to 4858 * keep the tree lock 4859 */ 4860 if (path->locks[level] && level > 0) { 4861 btrfs_tree_unlock(eb); 4862 path->locks[level] = 0; 4863 } 4864 return 0; 4865 } 4866 4867 /* 4868 * hepler to process tree block while walking up the tree. 4869 * 4870 * when wc->stage == DROP_REFERENCE, this function drops 4871 * reference count on the block. 4872 * 4873 * when wc->stage == UPDATE_BACKREF, this function changes 4874 * wc->stage back to DROP_REFERENCE if we changed wc->stage 4875 * to UPDATE_BACKREF previously while processing the block. 4876 * 4877 * NOTE: return value 1 means we should stop walking up. 4878 */ 4879 static noinline int walk_up_proc(struct btrfs_trans_handle *trans, 4880 struct btrfs_root *root, 4881 struct btrfs_path *path, 4882 struct walk_control *wc) 4883 { 4884 int ret = 0; 4885 int level = wc->level; 4886 struct extent_buffer *eb = path->nodes[level]; 4887 u64 parent = 0; 4888 4889 if (wc->stage == UPDATE_BACKREF) { 4890 BUG_ON(wc->shared_level < level); 4891 if (level < wc->shared_level) 4892 goto out; 4893 4894 BUG_ON(wc->refs[level] <= 1); 4895 ret = find_next_key(path, level + 1, &wc->update_progress); 4896 if (ret > 0) 4897 wc->update_ref = 0; 4898 4899 wc->stage = DROP_REFERENCE; 4900 wc->shared_level = -1; 4901 path->slots[level] = 0; 4902 4903 /* 4904 * check reference count again if the block isn't locked. 4905 * we should start walking down the tree again if reference 4906 * count is one. 4907 */ 4908 if (!path->locks[level]) { 4909 BUG_ON(level == 0); 4910 btrfs_tree_lock(eb); 4911 btrfs_set_lock_blocking(eb); 4912 path->locks[level] = 1; 4913 4914 ret = btrfs_lookup_extent_info(trans, root, 4915 eb->start, eb->len, 4916 &wc->refs[level], 4917 &wc->flags[level]); 4918 BUG_ON(ret); 4919 BUG_ON(wc->refs[level] == 0); 4920 if (wc->refs[level] == 1) { 4921 btrfs_tree_unlock(eb); 4922 path->locks[level] = 0; 4923 return 1; 4924 } 4925 } else { 4926 BUG_ON(level != 0); 4927 } 4928 } 4929 4930 /* wc->stage == DROP_REFERENCE */ 4931 BUG_ON(wc->refs[level] > 1 && !path->locks[level]); 4932 4933 if (wc->refs[level] == 1) { 4934 if (level == 0) { 4935 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 4936 ret = btrfs_dec_ref(trans, root, eb, 1); 4937 else 4938 ret = btrfs_dec_ref(trans, root, eb, 0); 4939 BUG_ON(ret); 4940 } 4941 /* make block locked assertion in clean_tree_block happy */ 4942 if (!path->locks[level] && 4943 btrfs_header_generation(eb) == trans->transid) { 4944 btrfs_tree_lock(eb); 4945 btrfs_set_lock_blocking(eb); 4946 path->locks[level] = 1; 4947 } 4948 clean_tree_block(trans, root, eb); 4949 } 4950 4951 if (eb == root->node) { 4952 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 4953 parent = eb->start; 4954 else 4955 BUG_ON(root->root_key.objectid != 4956 btrfs_header_owner(eb)); 4957 } else { 4958 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 4959 parent = path->nodes[level + 1]->start; 4960 else 4961 BUG_ON(root->root_key.objectid != 4962 btrfs_header_owner(path->nodes[level + 1])); 4963 } 4964 4965 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, 4966 root->root_key.objectid, level, 0); 4967 BUG_ON(ret); 4968 out: 4969 wc->refs[level] = 0; 4970 wc->flags[level] = 0; 4971 return ret; 4972 } 4973 4974 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4975 struct btrfs_root *root, 4976 struct btrfs_path *path, 4977 struct walk_control *wc) 4978 { 4979 struct extent_buffer *next; 4980 struct extent_buffer *cur; 4981 u64 bytenr; 4982 u64 ptr_gen; 4983 u32 blocksize; 4984 int level = wc->level; 4985 int ret; 4986 4987 while (level >= 0) { 4988 cur = path->nodes[level]; 4989 BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); 4990 4991 ret = walk_down_proc(trans, root, path, wc); 4992 if (ret > 0) 4993 break; 4994 4995 if (level == 0) 4996 break; 4997 4998 bytenr = btrfs_node_blockptr(cur, path->slots[level]); 4999 blocksize = btrfs_level_size(root, level - 1); 5000 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); 5001 5002 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 5003 btrfs_tree_lock(next); 5004 btrfs_set_lock_blocking(next); 5005 5006 level--; 5007 BUG_ON(level != btrfs_header_level(next)); 5008 path->nodes[level] = next; 5009 path->slots[level] = 0; 5010 path->locks[level] = 1; 5011 wc->level = level; 5012 } 5013 return 0; 5014 } 5015 5016 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 5017 struct btrfs_root *root, 5018 struct btrfs_path *path, 5019 struct walk_control *wc, int max_level) 5020 { 5021 int level = wc->level; 5022 int ret; 5023 5024 path->slots[level] = btrfs_header_nritems(path->nodes[level]); 5025 while (level < max_level && path->nodes[level]) { 5026 wc->level = level; 5027 if (path->slots[level] + 1 < 5028 btrfs_header_nritems(path->nodes[level])) { 5029 path->slots[level]++; 5030 return 0; 5031 } else { 5032 ret = walk_up_proc(trans, root, path, wc); 5033 if (ret > 0) 5034 return 0; 5035 5036 if (path->locks[level]) { 5037 btrfs_tree_unlock(path->nodes[level]); 5038 path->locks[level] = 0; 5039 } 5040 free_extent_buffer(path->nodes[level]); 5041 path->nodes[level] = NULL; 5042 level++; 5043 } 5044 } 5045 return 1; 5046 } 5047 5048 /* 5049 * drop a subvolume tree. 5050 * 5051 * this function traverses the tree freeing any blocks that only 5052 * referenced by the tree. 5053 * 5054 * when a shared tree block is found. this function decreases its 5055 * reference count by one. if update_ref is true, this function 5056 * also make sure backrefs for the shared block and all lower level 5057 * blocks are properly updated. 5058 */ 5059 int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) 5060 { 5061 struct btrfs_path *path; 5062 struct btrfs_trans_handle *trans; 5063 struct btrfs_root *tree_root = root->fs_info->tree_root; 5064 struct btrfs_root_item *root_item = &root->root_item; 5065 struct walk_control *wc; 5066 struct btrfs_key key; 5067 int err = 0; 5068 int ret; 5069 int level; 5070 5071 path = btrfs_alloc_path(); 5072 BUG_ON(!path); 5073 5074 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5075 BUG_ON(!wc); 5076 5077 trans = btrfs_start_transaction(tree_root, 1); 5078 5079 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 5080 level = btrfs_header_level(root->node); 5081 path->nodes[level] = btrfs_lock_root_node(root); 5082 btrfs_set_lock_blocking(path->nodes[level]); 5083 path->slots[level] = 0; 5084 path->locks[level] = 1; 5085 memset(&wc->update_progress, 0, 5086 sizeof(wc->update_progress)); 5087 } else { 5088 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 5089 memcpy(&wc->update_progress, &key, 5090 sizeof(wc->update_progress)); 5091 5092 level = root_item->drop_level; 5093 BUG_ON(level == 0); 5094 path->lowest_level = level; 5095 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5096 path->lowest_level = 0; 5097 if (ret < 0) { 5098 err = ret; 5099 goto out; 5100 } 5101 btrfs_node_key_to_cpu(path->nodes[level], &key, 5102 path->slots[level]); 5103 WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); 5104 5105 /* 5106 * unlock our path, this is safe because only this 5107 * function is allowed to delete this snapshot 5108 */ 5109 btrfs_unlock_up_safe(path, 0); 5110 5111 level = btrfs_header_level(root->node); 5112 while (1) { 5113 btrfs_tree_lock(path->nodes[level]); 5114 btrfs_set_lock_blocking(path->nodes[level]); 5115 5116 ret = btrfs_lookup_extent_info(trans, root, 5117 path->nodes[level]->start, 5118 path->nodes[level]->len, 5119 &wc->refs[level], 5120 &wc->flags[level]); 5121 BUG_ON(ret); 5122 BUG_ON(wc->refs[level] == 0); 5123 5124 if (level == root_item->drop_level) 5125 break; 5126 5127 btrfs_tree_unlock(path->nodes[level]); 5128 WARN_ON(wc->refs[level] != 1); 5129 level--; 5130 } 5131 } 5132 5133 wc->level = level; 5134 wc->shared_level = -1; 5135 wc->stage = DROP_REFERENCE; 5136 wc->update_ref = update_ref; 5137 wc->keep_locks = 0; 5138 5139 while (1) { 5140 ret = walk_down_tree(trans, root, path, wc); 5141 if (ret < 0) { 5142 err = ret; 5143 break; 5144 } 5145 5146 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 5147 if (ret < 0) { 5148 err = ret; 5149 break; 5150 } 5151 5152 if (ret > 0) { 5153 BUG_ON(wc->stage != DROP_REFERENCE); 5154 break; 5155 } 5156 5157 if (wc->stage == DROP_REFERENCE) { 5158 level = wc->level; 5159 btrfs_node_key(path->nodes[level], 5160 &root_item->drop_progress, 5161 path->slots[level]); 5162 root_item->drop_level = level; 5163 } 5164 5165 BUG_ON(wc->level == 0); 5166 if (trans->transaction->in_commit || 5167 trans->transaction->delayed_refs.flushing) { 5168 ret = btrfs_update_root(trans, tree_root, 5169 &root->root_key, 5170 root_item); 5171 BUG_ON(ret); 5172 5173 btrfs_end_transaction(trans, tree_root); 5174 trans = btrfs_start_transaction(tree_root, 1); 5175 } else { 5176 unsigned long update; 5177 update = trans->delayed_ref_updates; 5178 trans->delayed_ref_updates = 0; 5179 if (update) 5180 btrfs_run_delayed_refs(trans, tree_root, 5181 update); 5182 } 5183 } 5184 btrfs_release_path(root, path); 5185 BUG_ON(err); 5186 5187 ret = btrfs_del_root(trans, tree_root, &root->root_key); 5188 BUG_ON(ret); 5189 5190 free_extent_buffer(root->node); 5191 free_extent_buffer(root->commit_root); 5192 kfree(root); 5193 out: 5194 btrfs_end_transaction(trans, tree_root); 5195 kfree(wc); 5196 btrfs_free_path(path); 5197 return err; 5198 } 5199 5200 /* 5201 * drop subtree rooted at tree block 'node'. 5202 * 5203 * NOTE: this function will unlock and release tree block 'node' 5204 */ 5205 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 5206 struct btrfs_root *root, 5207 struct extent_buffer *node, 5208 struct extent_buffer *parent) 5209 { 5210 struct btrfs_path *path; 5211 struct walk_control *wc; 5212 int level; 5213 int parent_level; 5214 int ret = 0; 5215 int wret; 5216 5217 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 5218 5219 path = btrfs_alloc_path(); 5220 BUG_ON(!path); 5221 5222 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5223 BUG_ON(!wc); 5224 5225 btrfs_assert_tree_locked(parent); 5226 parent_level = btrfs_header_level(parent); 5227 extent_buffer_get(parent); 5228 path->nodes[parent_level] = parent; 5229 path->slots[parent_level] = btrfs_header_nritems(parent); 5230 5231 btrfs_assert_tree_locked(node); 5232 level = btrfs_header_level(node); 5233 path->nodes[level] = node; 5234 path->slots[level] = 0; 5235 path->locks[level] = 1; 5236 5237 wc->refs[parent_level] = 1; 5238 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5239 wc->level = level; 5240 wc->shared_level = -1; 5241 wc->stage = DROP_REFERENCE; 5242 wc->update_ref = 0; 5243 wc->keep_locks = 1; 5244 5245 while (1) { 5246 wret = walk_down_tree(trans, root, path, wc); 5247 if (wret < 0) { 5248 ret = wret; 5249 break; 5250 } 5251 5252 wret = walk_up_tree(trans, root, path, wc, parent_level); 5253 if (wret < 0) 5254 ret = wret; 5255 if (wret != 0) 5256 break; 5257 } 5258 5259 kfree(wc); 5260 btrfs_free_path(path); 5261 return ret; 5262 } 5263 5264 #if 0 5265 static unsigned long calc_ra(unsigned long start, unsigned long last, 5266 unsigned long nr) 5267 { 5268 return min(last, start + nr - 1); 5269 } 5270 5271 static noinline int relocate_inode_pages(struct inode *inode, u64 start, 5272 u64 len) 5273 { 5274 u64 page_start; 5275 u64 page_end; 5276 unsigned long first_index; 5277 unsigned long last_index; 5278 unsigned long i; 5279 struct page *page; 5280 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5281 struct file_ra_state *ra; 5282 struct btrfs_ordered_extent *ordered; 5283 unsigned int total_read = 0; 5284 unsigned int total_dirty = 0; 5285 int ret = 0; 5286 5287 ra = kzalloc(sizeof(*ra), GFP_NOFS); 5288 5289 mutex_lock(&inode->i_mutex); 5290 first_index = start >> PAGE_CACHE_SHIFT; 5291 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 5292 5293 /* make sure the dirty trick played by the caller work */ 5294 ret = invalidate_inode_pages2_range(inode->i_mapping, 5295 first_index, last_index); 5296 if (ret) 5297 goto out_unlock; 5298 5299 file_ra_state_init(ra, inode->i_mapping); 5300 5301 for (i = first_index ; i <= last_index; i++) { 5302 if (total_read % ra->ra_pages == 0) { 5303 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 5304 calc_ra(i, last_index, ra->ra_pages)); 5305 } 5306 total_read++; 5307 again: 5308 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 5309 BUG_ON(1); 5310 page = grab_cache_page(inode->i_mapping, i); 5311 if (!page) { 5312 ret = -ENOMEM; 5313 goto out_unlock; 5314 } 5315 if (!PageUptodate(page)) { 5316 btrfs_readpage(NULL, page); 5317 lock_page(page); 5318 if (!PageUptodate(page)) { 5319 unlock_page(page); 5320 page_cache_release(page); 5321 ret = -EIO; 5322 goto out_unlock; 5323 } 5324 } 5325 wait_on_page_writeback(page); 5326 5327 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 5328 page_end = page_start + PAGE_CACHE_SIZE - 1; 5329 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 5330 5331 ordered = btrfs_lookup_ordered_extent(inode, page_start); 5332 if (ordered) { 5333 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5334 unlock_page(page); 5335 page_cache_release(page); 5336 btrfs_start_ordered_extent(inode, ordered, 1); 5337 btrfs_put_ordered_extent(ordered); 5338 goto again; 5339 } 5340 set_page_extent_mapped(page); 5341 5342 if (i == first_index) 5343 set_extent_bits(io_tree, page_start, page_end, 5344 EXTENT_BOUNDARY, GFP_NOFS); 5345 btrfs_set_extent_delalloc(inode, page_start, page_end); 5346 5347 set_page_dirty(page); 5348 total_dirty++; 5349 5350 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5351 unlock_page(page); 5352 page_cache_release(page); 5353 } 5354 5355 out_unlock: 5356 kfree(ra); 5357 mutex_unlock(&inode->i_mutex); 5358 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 5359 return ret; 5360 } 5361 5362 static noinline int relocate_data_extent(struct inode *reloc_inode, 5363 struct btrfs_key *extent_key, 5364 u64 offset) 5365 { 5366 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 5367 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; 5368 struct extent_map *em; 5369 u64 start = extent_key->objectid - offset; 5370 u64 end = start + extent_key->offset - 1; 5371 5372 em = alloc_extent_map(GFP_NOFS); 5373 BUG_ON(!em || IS_ERR(em)); 5374 5375 em->start = start; 5376 em->len = extent_key->offset; 5377 em->block_len = extent_key->offset; 5378 em->block_start = extent_key->objectid; 5379 em->bdev = root->fs_info->fs_devices->latest_bdev; 5380 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5381 5382 /* setup extent map to cheat btrfs_readpage */ 5383 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5384 while (1) { 5385 int ret; 5386 spin_lock(&em_tree->lock); 5387 ret = add_extent_mapping(em_tree, em); 5388 spin_unlock(&em_tree->lock); 5389 if (ret != -EEXIST) { 5390 free_extent_map(em); 5391 break; 5392 } 5393 btrfs_drop_extent_cache(reloc_inode, start, end, 0); 5394 } 5395 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5396 5397 return relocate_inode_pages(reloc_inode, start, extent_key->offset); 5398 } 5399 5400 struct btrfs_ref_path { 5401 u64 extent_start; 5402 u64 nodes[BTRFS_MAX_LEVEL]; 5403 u64 root_objectid; 5404 u64 root_generation; 5405 u64 owner_objectid; 5406 u32 num_refs; 5407 int lowest_level; 5408 int current_level; 5409 int shared_level; 5410 5411 struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; 5412 u64 new_nodes[BTRFS_MAX_LEVEL]; 5413 }; 5414 5415 struct disk_extent { 5416 u64 ram_bytes; 5417 u64 disk_bytenr; 5418 u64 disk_num_bytes; 5419 u64 offset; 5420 u64 num_bytes; 5421 u8 compression; 5422 u8 encryption; 5423 u16 other_encoding; 5424 }; 5425 5426 static int is_cowonly_root(u64 root_objectid) 5427 { 5428 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 5429 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 5430 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 5431 root_objectid == BTRFS_DEV_TREE_OBJECTID || 5432 root_objectid == BTRFS_TREE_LOG_OBJECTID || 5433 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 5434 return 1; 5435 return 0; 5436 } 5437 5438 static noinline int __next_ref_path(struct btrfs_trans_handle *trans, 5439 struct btrfs_root *extent_root, 5440 struct btrfs_ref_path *ref_path, 5441 int first_time) 5442 { 5443 struct extent_buffer *leaf; 5444 struct btrfs_path *path; 5445 struct btrfs_extent_ref *ref; 5446 struct btrfs_key key; 5447 struct btrfs_key found_key; 5448 u64 bytenr; 5449 u32 nritems; 5450 int level; 5451 int ret = 1; 5452 5453 path = btrfs_alloc_path(); 5454 if (!path) 5455 return -ENOMEM; 5456 5457 if (first_time) { 5458 ref_path->lowest_level = -1; 5459 ref_path->current_level = -1; 5460 ref_path->shared_level = -1; 5461 goto walk_up; 5462 } 5463 walk_down: 5464 level = ref_path->current_level - 1; 5465 while (level >= -1) { 5466 u64 parent; 5467 if (level < ref_path->lowest_level) 5468 break; 5469 5470 if (level >= 0) 5471 bytenr = ref_path->nodes[level]; 5472 else 5473 bytenr = ref_path->extent_start; 5474 BUG_ON(bytenr == 0); 5475 5476 parent = ref_path->nodes[level + 1]; 5477 ref_path->nodes[level + 1] = 0; 5478 ref_path->current_level = level; 5479 BUG_ON(parent == 0); 5480 5481 key.objectid = bytenr; 5482 key.offset = parent + 1; 5483 key.type = BTRFS_EXTENT_REF_KEY; 5484 5485 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5486 if (ret < 0) 5487 goto out; 5488 BUG_ON(ret == 0); 5489 5490 leaf = path->nodes[0]; 5491 nritems = btrfs_header_nritems(leaf); 5492 if (path->slots[0] >= nritems) { 5493 ret = btrfs_next_leaf(extent_root, path); 5494 if (ret < 0) 5495 goto out; 5496 if (ret > 0) 5497 goto next; 5498 leaf = path->nodes[0]; 5499 } 5500 5501 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5502 if (found_key.objectid == bytenr && 5503 found_key.type == BTRFS_EXTENT_REF_KEY) { 5504 if (level < ref_path->shared_level) 5505 ref_path->shared_level = level; 5506 goto found; 5507 } 5508 next: 5509 level--; 5510 btrfs_release_path(extent_root, path); 5511 cond_resched(); 5512 } 5513 /* reached lowest level */ 5514 ret = 1; 5515 goto out; 5516 walk_up: 5517 level = ref_path->current_level; 5518 while (level < BTRFS_MAX_LEVEL - 1) { 5519 u64 ref_objectid; 5520 5521 if (level >= 0) 5522 bytenr = ref_path->nodes[level]; 5523 else 5524 bytenr = ref_path->extent_start; 5525 5526 BUG_ON(bytenr == 0); 5527 5528 key.objectid = bytenr; 5529 key.offset = 0; 5530 key.type = BTRFS_EXTENT_REF_KEY; 5531 5532 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5533 if (ret < 0) 5534 goto out; 5535 5536 leaf = path->nodes[0]; 5537 nritems = btrfs_header_nritems(leaf); 5538 if (path->slots[0] >= nritems) { 5539 ret = btrfs_next_leaf(extent_root, path); 5540 if (ret < 0) 5541 goto out; 5542 if (ret > 0) { 5543 /* the extent was freed by someone */ 5544 if (ref_path->lowest_level == level) 5545 goto out; 5546 btrfs_release_path(extent_root, path); 5547 goto walk_down; 5548 } 5549 leaf = path->nodes[0]; 5550 } 5551 5552 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5553 if (found_key.objectid != bytenr || 5554 found_key.type != BTRFS_EXTENT_REF_KEY) { 5555 /* the extent was freed by someone */ 5556 if (ref_path->lowest_level == level) { 5557 ret = 1; 5558 goto out; 5559 } 5560 btrfs_release_path(extent_root, path); 5561 goto walk_down; 5562 } 5563 found: 5564 ref = btrfs_item_ptr(leaf, path->slots[0], 5565 struct btrfs_extent_ref); 5566 ref_objectid = btrfs_ref_objectid(leaf, ref); 5567 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { 5568 if (first_time) { 5569 level = (int)ref_objectid; 5570 BUG_ON(level >= BTRFS_MAX_LEVEL); 5571 ref_path->lowest_level = level; 5572 ref_path->current_level = level; 5573 ref_path->nodes[level] = bytenr; 5574 } else { 5575 WARN_ON(ref_objectid != level); 5576 } 5577 } else { 5578 WARN_ON(level != -1); 5579 } 5580 first_time = 0; 5581 5582 if (ref_path->lowest_level == level) { 5583 ref_path->owner_objectid = ref_objectid; 5584 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); 5585 } 5586 5587 /* 5588 * the block is tree root or the block isn't in reference 5589 * counted tree. 5590 */ 5591 if (found_key.objectid == found_key.offset || 5592 is_cowonly_root(btrfs_ref_root(leaf, ref))) { 5593 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 5594 ref_path->root_generation = 5595 btrfs_ref_generation(leaf, ref); 5596 if (level < 0) { 5597 /* special reference from the tree log */ 5598 ref_path->nodes[0] = found_key.offset; 5599 ref_path->current_level = 0; 5600 } 5601 ret = 0; 5602 goto out; 5603 } 5604 5605 level++; 5606 BUG_ON(ref_path->nodes[level] != 0); 5607 ref_path->nodes[level] = found_key.offset; 5608 ref_path->current_level = level; 5609 5610 /* 5611 * the reference was created in the running transaction, 5612 * no need to continue walking up. 5613 */ 5614 if (btrfs_ref_generation(leaf, ref) == trans->transid) { 5615 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 5616 ref_path->root_generation = 5617 btrfs_ref_generation(leaf, ref); 5618 ret = 0; 5619 goto out; 5620 } 5621 5622 btrfs_release_path(extent_root, path); 5623 cond_resched(); 5624 } 5625 /* reached max tree level, but no tree root found. */ 5626 BUG(); 5627 out: 5628 btrfs_free_path(path); 5629 return ret; 5630 } 5631 5632 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, 5633 struct btrfs_root *extent_root, 5634 struct btrfs_ref_path *ref_path, 5635 u64 extent_start) 5636 { 5637 memset(ref_path, 0, sizeof(*ref_path)); 5638 ref_path->extent_start = extent_start; 5639 5640 return __next_ref_path(trans, extent_root, ref_path, 1); 5641 } 5642 5643 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, 5644 struct btrfs_root *extent_root, 5645 struct btrfs_ref_path *ref_path) 5646 { 5647 return __next_ref_path(trans, extent_root, ref_path, 0); 5648 } 5649 5650 static noinline int get_new_locations(struct inode *reloc_inode, 5651 struct btrfs_key *extent_key, 5652 u64 offset, int no_fragment, 5653 struct disk_extent **extents, 5654 int *nr_extents) 5655 { 5656 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 5657 struct btrfs_path *path; 5658 struct btrfs_file_extent_item *fi; 5659 struct extent_buffer *leaf; 5660 struct disk_extent *exts = *extents; 5661 struct btrfs_key found_key; 5662 u64 cur_pos; 5663 u64 last_byte; 5664 u32 nritems; 5665 int nr = 0; 5666 int max = *nr_extents; 5667 int ret; 5668 5669 WARN_ON(!no_fragment && *extents); 5670 if (!exts) { 5671 max = 1; 5672 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); 5673 if (!exts) 5674 return -ENOMEM; 5675 } 5676 5677 path = btrfs_alloc_path(); 5678 BUG_ON(!path); 5679 5680 cur_pos = extent_key->objectid - offset; 5681 last_byte = extent_key->objectid + extent_key->offset; 5682 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 5683 cur_pos, 0); 5684 if (ret < 0) 5685 goto out; 5686 if (ret > 0) { 5687 ret = -ENOENT; 5688 goto out; 5689 } 5690 5691 while (1) { 5692 leaf = path->nodes[0]; 5693 nritems = btrfs_header_nritems(leaf); 5694 if (path->slots[0] >= nritems) { 5695 ret = btrfs_next_leaf(root, path); 5696 if (ret < 0) 5697 goto out; 5698 if (ret > 0) 5699 break; 5700 leaf = path->nodes[0]; 5701 } 5702 5703 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5704 if (found_key.offset != cur_pos || 5705 found_key.type != BTRFS_EXTENT_DATA_KEY || 5706 found_key.objectid != reloc_inode->i_ino) 5707 break; 5708 5709 fi = btrfs_item_ptr(leaf, path->slots[0], 5710 struct btrfs_file_extent_item); 5711 if (btrfs_file_extent_type(leaf, fi) != 5712 BTRFS_FILE_EXTENT_REG || 5713 btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 5714 break; 5715 5716 if (nr == max) { 5717 struct disk_extent *old = exts; 5718 max *= 2; 5719 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 5720 memcpy(exts, old, sizeof(*exts) * nr); 5721 if (old != *extents) 5722 kfree(old); 5723 } 5724 5725 exts[nr].disk_bytenr = 5726 btrfs_file_extent_disk_bytenr(leaf, fi); 5727 exts[nr].disk_num_bytes = 5728 btrfs_file_extent_disk_num_bytes(leaf, fi); 5729 exts[nr].offset = btrfs_file_extent_offset(leaf, fi); 5730 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5731 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 5732 exts[nr].compression = btrfs_file_extent_compression(leaf, fi); 5733 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); 5734 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, 5735 fi); 5736 BUG_ON(exts[nr].offset > 0); 5737 BUG_ON(exts[nr].compression || exts[nr].encryption); 5738 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); 5739 5740 cur_pos += exts[nr].num_bytes; 5741 nr++; 5742 5743 if (cur_pos + offset >= last_byte) 5744 break; 5745 5746 if (no_fragment) { 5747 ret = 1; 5748 goto out; 5749 } 5750 path->slots[0]++; 5751 } 5752 5753 BUG_ON(cur_pos + offset > last_byte); 5754 if (cur_pos + offset < last_byte) { 5755 ret = -ENOENT; 5756 goto out; 5757 } 5758 ret = 0; 5759 out: 5760 btrfs_free_path(path); 5761 if (ret) { 5762 if (exts != *extents) 5763 kfree(exts); 5764 } else { 5765 *extents = exts; 5766 *nr_extents = nr; 5767 } 5768 return ret; 5769 } 5770 5771 static noinline int replace_one_extent(struct btrfs_trans_handle *trans, 5772 struct btrfs_root *root, 5773 struct btrfs_path *path, 5774 struct btrfs_key *extent_key, 5775 struct btrfs_key *leaf_key, 5776 struct btrfs_ref_path *ref_path, 5777 struct disk_extent *new_extents, 5778 int nr_extents) 5779 { 5780 struct extent_buffer *leaf; 5781 struct btrfs_file_extent_item *fi; 5782 struct inode *inode = NULL; 5783 struct btrfs_key key; 5784 u64 lock_start = 0; 5785 u64 lock_end = 0; 5786 u64 num_bytes; 5787 u64 ext_offset; 5788 u64 search_end = (u64)-1; 5789 u32 nritems; 5790 int nr_scaned = 0; 5791 int extent_locked = 0; 5792 int extent_type; 5793 int ret; 5794 5795 memcpy(&key, leaf_key, sizeof(key)); 5796 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 5797 if (key.objectid < ref_path->owner_objectid || 5798 (key.objectid == ref_path->owner_objectid && 5799 key.type < BTRFS_EXTENT_DATA_KEY)) { 5800 key.objectid = ref_path->owner_objectid; 5801 key.type = BTRFS_EXTENT_DATA_KEY; 5802 key.offset = 0; 5803 } 5804 } 5805 5806 while (1) { 5807 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 5808 if (ret < 0) 5809 goto out; 5810 5811 leaf = path->nodes[0]; 5812 nritems = btrfs_header_nritems(leaf); 5813 next: 5814 if (extent_locked && ret > 0) { 5815 /* 5816 * the file extent item was modified by someone 5817 * before the extent got locked. 5818 */ 5819 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5820 lock_end, GFP_NOFS); 5821 extent_locked = 0; 5822 } 5823 5824 if (path->slots[0] >= nritems) { 5825 if (++nr_scaned > 2) 5826 break; 5827 5828 BUG_ON(extent_locked); 5829 ret = btrfs_next_leaf(root, path); 5830 if (ret < 0) 5831 goto out; 5832 if (ret > 0) 5833 break; 5834 leaf = path->nodes[0]; 5835 nritems = btrfs_header_nritems(leaf); 5836 } 5837 5838 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 5839 5840 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 5841 if ((key.objectid > ref_path->owner_objectid) || 5842 (key.objectid == ref_path->owner_objectid && 5843 key.type > BTRFS_EXTENT_DATA_KEY) || 5844 key.offset >= search_end) 5845 break; 5846 } 5847 5848 if (inode && key.objectid != inode->i_ino) { 5849 BUG_ON(extent_locked); 5850 btrfs_release_path(root, path); 5851 mutex_unlock(&inode->i_mutex); 5852 iput(inode); 5853 inode = NULL; 5854 continue; 5855 } 5856 5857 if (key.type != BTRFS_EXTENT_DATA_KEY) { 5858 path->slots[0]++; 5859 ret = 1; 5860 goto next; 5861 } 5862 fi = btrfs_item_ptr(leaf, path->slots[0], 5863 struct btrfs_file_extent_item); 5864 extent_type = btrfs_file_extent_type(leaf, fi); 5865 if ((extent_type != BTRFS_FILE_EXTENT_REG && 5866 extent_type != BTRFS_FILE_EXTENT_PREALLOC) || 5867 (btrfs_file_extent_disk_bytenr(leaf, fi) != 5868 extent_key->objectid)) { 5869 path->slots[0]++; 5870 ret = 1; 5871 goto next; 5872 } 5873 5874 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5875 ext_offset = btrfs_file_extent_offset(leaf, fi); 5876 5877 if (search_end == (u64)-1) { 5878 search_end = key.offset - ext_offset + 5879 btrfs_file_extent_ram_bytes(leaf, fi); 5880 } 5881 5882 if (!extent_locked) { 5883 lock_start = key.offset; 5884 lock_end = lock_start + num_bytes - 1; 5885 } else { 5886 if (lock_start > key.offset || 5887 lock_end + 1 < key.offset + num_bytes) { 5888 unlock_extent(&BTRFS_I(inode)->io_tree, 5889 lock_start, lock_end, GFP_NOFS); 5890 extent_locked = 0; 5891 } 5892 } 5893 5894 if (!inode) { 5895 btrfs_release_path(root, path); 5896 5897 inode = btrfs_iget_locked(root->fs_info->sb, 5898 key.objectid, root); 5899 if (inode->i_state & I_NEW) { 5900 BTRFS_I(inode)->root = root; 5901 BTRFS_I(inode)->location.objectid = 5902 key.objectid; 5903 BTRFS_I(inode)->location.type = 5904 BTRFS_INODE_ITEM_KEY; 5905 BTRFS_I(inode)->location.offset = 0; 5906 btrfs_read_locked_inode(inode); 5907 unlock_new_inode(inode); 5908 } 5909 /* 5910 * some code call btrfs_commit_transaction while 5911 * holding the i_mutex, so we can't use mutex_lock 5912 * here. 5913 */ 5914 if (is_bad_inode(inode) || 5915 !mutex_trylock(&inode->i_mutex)) { 5916 iput(inode); 5917 inode = NULL; 5918 key.offset = (u64)-1; 5919 goto skip; 5920 } 5921 } 5922 5923 if (!extent_locked) { 5924 struct btrfs_ordered_extent *ordered; 5925 5926 btrfs_release_path(root, path); 5927 5928 lock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5929 lock_end, GFP_NOFS); 5930 ordered = btrfs_lookup_first_ordered_extent(inode, 5931 lock_end); 5932 if (ordered && 5933 ordered->file_offset <= lock_end && 5934 ordered->file_offset + ordered->len > lock_start) { 5935 unlock_extent(&BTRFS_I(inode)->io_tree, 5936 lock_start, lock_end, GFP_NOFS); 5937 btrfs_start_ordered_extent(inode, ordered, 1); 5938 btrfs_put_ordered_extent(ordered); 5939 key.offset += num_bytes; 5940 goto skip; 5941 } 5942 if (ordered) 5943 btrfs_put_ordered_extent(ordered); 5944 5945 extent_locked = 1; 5946 continue; 5947 } 5948 5949 if (nr_extents == 1) { 5950 /* update extent pointer in place */ 5951 btrfs_set_file_extent_disk_bytenr(leaf, fi, 5952 new_extents[0].disk_bytenr); 5953 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 5954 new_extents[0].disk_num_bytes); 5955 btrfs_mark_buffer_dirty(leaf); 5956 5957 btrfs_drop_extent_cache(inode, key.offset, 5958 key.offset + num_bytes - 1, 0); 5959 5960 ret = btrfs_inc_extent_ref(trans, root, 5961 new_extents[0].disk_bytenr, 5962 new_extents[0].disk_num_bytes, 5963 leaf->start, 5964 root->root_key.objectid, 5965 trans->transid, 5966 key.objectid); 5967 BUG_ON(ret); 5968 5969 ret = btrfs_free_extent(trans, root, 5970 extent_key->objectid, 5971 extent_key->offset, 5972 leaf->start, 5973 btrfs_header_owner(leaf), 5974 btrfs_header_generation(leaf), 5975 key.objectid, 0); 5976 BUG_ON(ret); 5977 5978 btrfs_release_path(root, path); 5979 key.offset += num_bytes; 5980 } else { 5981 BUG_ON(1); 5982 #if 0 5983 u64 alloc_hint; 5984 u64 extent_len; 5985 int i; 5986 /* 5987 * drop old extent pointer at first, then insert the 5988 * new pointers one bye one 5989 */ 5990 btrfs_release_path(root, path); 5991 ret = btrfs_drop_extents(trans, root, inode, key.offset, 5992 key.offset + num_bytes, 5993 key.offset, &alloc_hint); 5994 BUG_ON(ret); 5995 5996 for (i = 0; i < nr_extents; i++) { 5997 if (ext_offset >= new_extents[i].num_bytes) { 5998 ext_offset -= new_extents[i].num_bytes; 5999 continue; 6000 } 6001 extent_len = min(new_extents[i].num_bytes - 6002 ext_offset, num_bytes); 6003 6004 ret = btrfs_insert_empty_item(trans, root, 6005 path, &key, 6006 sizeof(*fi)); 6007 BUG_ON(ret); 6008 6009 leaf = path->nodes[0]; 6010 fi = btrfs_item_ptr(leaf, path->slots[0], 6011 struct btrfs_file_extent_item); 6012 btrfs_set_file_extent_generation(leaf, fi, 6013 trans->transid); 6014 btrfs_set_file_extent_type(leaf, fi, 6015 BTRFS_FILE_EXTENT_REG); 6016 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6017 new_extents[i].disk_bytenr); 6018 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6019 new_extents[i].disk_num_bytes); 6020 btrfs_set_file_extent_ram_bytes(leaf, fi, 6021 new_extents[i].ram_bytes); 6022 6023 btrfs_set_file_extent_compression(leaf, fi, 6024 new_extents[i].compression); 6025 btrfs_set_file_extent_encryption(leaf, fi, 6026 new_extents[i].encryption); 6027 btrfs_set_file_extent_other_encoding(leaf, fi, 6028 new_extents[i].other_encoding); 6029 6030 btrfs_set_file_extent_num_bytes(leaf, fi, 6031 extent_len); 6032 ext_offset += new_extents[i].offset; 6033 btrfs_set_file_extent_offset(leaf, fi, 6034 ext_offset); 6035 btrfs_mark_buffer_dirty(leaf); 6036 6037 btrfs_drop_extent_cache(inode, key.offset, 6038 key.offset + extent_len - 1, 0); 6039 6040 ret = btrfs_inc_extent_ref(trans, root, 6041 new_extents[i].disk_bytenr, 6042 new_extents[i].disk_num_bytes, 6043 leaf->start, 6044 root->root_key.objectid, 6045 trans->transid, key.objectid); 6046 BUG_ON(ret); 6047 btrfs_release_path(root, path); 6048 6049 inode_add_bytes(inode, extent_len); 6050 6051 ext_offset = 0; 6052 num_bytes -= extent_len; 6053 key.offset += extent_len; 6054 6055 if (num_bytes == 0) 6056 break; 6057 } 6058 BUG_ON(i >= nr_extents); 6059 #endif 6060 } 6061 6062 if (extent_locked) { 6063 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6064 lock_end, GFP_NOFS); 6065 extent_locked = 0; 6066 } 6067 skip: 6068 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 6069 key.offset >= search_end) 6070 break; 6071 6072 cond_resched(); 6073 } 6074 ret = 0; 6075 out: 6076 btrfs_release_path(root, path); 6077 if (inode) { 6078 mutex_unlock(&inode->i_mutex); 6079 if (extent_locked) { 6080 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6081 lock_end, GFP_NOFS); 6082 } 6083 iput(inode); 6084 } 6085 return ret; 6086 } 6087 6088 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 6089 struct btrfs_root *root, 6090 struct extent_buffer *buf, u64 orig_start) 6091 { 6092 int level; 6093 int ret; 6094 6095 BUG_ON(btrfs_header_generation(buf) != trans->transid); 6096 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6097 6098 level = btrfs_header_level(buf); 6099 if (level == 0) { 6100 struct btrfs_leaf_ref *ref; 6101 struct btrfs_leaf_ref *orig_ref; 6102 6103 orig_ref = btrfs_lookup_leaf_ref(root, orig_start); 6104 if (!orig_ref) 6105 return -ENOENT; 6106 6107 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); 6108 if (!ref) { 6109 btrfs_free_leaf_ref(root, orig_ref); 6110 return -ENOMEM; 6111 } 6112 6113 ref->nritems = orig_ref->nritems; 6114 memcpy(ref->extents, orig_ref->extents, 6115 sizeof(ref->extents[0]) * ref->nritems); 6116 6117 btrfs_free_leaf_ref(root, orig_ref); 6118 6119 ref->root_gen = trans->transid; 6120 ref->bytenr = buf->start; 6121 ref->owner = btrfs_header_owner(buf); 6122 ref->generation = btrfs_header_generation(buf); 6123 6124 ret = btrfs_add_leaf_ref(root, ref, 0); 6125 WARN_ON(ret); 6126 btrfs_free_leaf_ref(root, ref); 6127 } 6128 return 0; 6129 } 6130 6131 static noinline int invalidate_extent_cache(struct btrfs_root *root, 6132 struct extent_buffer *leaf, 6133 struct btrfs_block_group_cache *group, 6134 struct btrfs_root *target_root) 6135 { 6136 struct btrfs_key key; 6137 struct inode *inode = NULL; 6138 struct btrfs_file_extent_item *fi; 6139 u64 num_bytes; 6140 u64 skip_objectid = 0; 6141 u32 nritems; 6142 u32 i; 6143 6144 nritems = btrfs_header_nritems(leaf); 6145 for (i = 0; i < nritems; i++) { 6146 btrfs_item_key_to_cpu(leaf, &key, i); 6147 if (key.objectid == skip_objectid || 6148 key.type != BTRFS_EXTENT_DATA_KEY) 6149 continue; 6150 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 6151 if (btrfs_file_extent_type(leaf, fi) == 6152 BTRFS_FILE_EXTENT_INLINE) 6153 continue; 6154 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 6155 continue; 6156 if (!inode || inode->i_ino != key.objectid) { 6157 iput(inode); 6158 inode = btrfs_ilookup(target_root->fs_info->sb, 6159 key.objectid, target_root, 1); 6160 } 6161 if (!inode) { 6162 skip_objectid = key.objectid; 6163 continue; 6164 } 6165 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6166 6167 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6168 key.offset + num_bytes - 1, GFP_NOFS); 6169 btrfs_drop_extent_cache(inode, key.offset, 6170 key.offset + num_bytes - 1, 1); 6171 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 6172 key.offset + num_bytes - 1, GFP_NOFS); 6173 cond_resched(); 6174 } 6175 iput(inode); 6176 return 0; 6177 } 6178 6179 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, 6180 struct btrfs_root *root, 6181 struct extent_buffer *leaf, 6182 struct btrfs_block_group_cache *group, 6183 struct inode *reloc_inode) 6184 { 6185 struct btrfs_key key; 6186 struct btrfs_key extent_key; 6187 struct btrfs_file_extent_item *fi; 6188 struct btrfs_leaf_ref *ref; 6189 struct disk_extent *new_extent; 6190 u64 bytenr; 6191 u64 num_bytes; 6192 u32 nritems; 6193 u32 i; 6194 int ext_index; 6195 int nr_extent; 6196 int ret; 6197 6198 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 6199 BUG_ON(!new_extent); 6200 6201 ref = btrfs_lookup_leaf_ref(root, leaf->start); 6202 BUG_ON(!ref); 6203 6204 ext_index = -1; 6205 nritems = btrfs_header_nritems(leaf); 6206 for (i = 0; i < nritems; i++) { 6207 btrfs_item_key_to_cpu(leaf, &key, i); 6208 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 6209 continue; 6210 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 6211 if (btrfs_file_extent_type(leaf, fi) == 6212 BTRFS_FILE_EXTENT_INLINE) 6213 continue; 6214 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6215 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 6216 if (bytenr == 0) 6217 continue; 6218 6219 ext_index++; 6220 if (bytenr >= group->key.objectid + group->key.offset || 6221 bytenr + num_bytes <= group->key.objectid) 6222 continue; 6223 6224 extent_key.objectid = bytenr; 6225 extent_key.offset = num_bytes; 6226 extent_key.type = BTRFS_EXTENT_ITEM_KEY; 6227 nr_extent = 1; 6228 ret = get_new_locations(reloc_inode, &extent_key, 6229 group->key.objectid, 1, 6230 &new_extent, &nr_extent); 6231 if (ret > 0) 6232 continue; 6233 BUG_ON(ret < 0); 6234 6235 BUG_ON(ref->extents[ext_index].bytenr != bytenr); 6236 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); 6237 ref->extents[ext_index].bytenr = new_extent->disk_bytenr; 6238 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; 6239 6240 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6241 new_extent->disk_bytenr); 6242 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6243 new_extent->disk_num_bytes); 6244 btrfs_mark_buffer_dirty(leaf); 6245 6246 ret = btrfs_inc_extent_ref(trans, root, 6247 new_extent->disk_bytenr, 6248 new_extent->disk_num_bytes, 6249 leaf->start, 6250 root->root_key.objectid, 6251 trans->transid, key.objectid); 6252 BUG_ON(ret); 6253 6254 ret = btrfs_free_extent(trans, root, 6255 bytenr, num_bytes, leaf->start, 6256 btrfs_header_owner(leaf), 6257 btrfs_header_generation(leaf), 6258 key.objectid, 0); 6259 BUG_ON(ret); 6260 cond_resched(); 6261 } 6262 kfree(new_extent); 6263 BUG_ON(ext_index + 1 != ref->nritems); 6264 btrfs_free_leaf_ref(root, ref); 6265 return 0; 6266 } 6267 6268 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 6269 struct btrfs_root *root) 6270 { 6271 struct btrfs_root *reloc_root; 6272 int ret; 6273 6274 if (root->reloc_root) { 6275 reloc_root = root->reloc_root; 6276 root->reloc_root = NULL; 6277 list_add(&reloc_root->dead_list, 6278 &root->fs_info->dead_reloc_roots); 6279 6280 btrfs_set_root_bytenr(&reloc_root->root_item, 6281 reloc_root->node->start); 6282 btrfs_set_root_level(&root->root_item, 6283 btrfs_header_level(reloc_root->node)); 6284 memset(&reloc_root->root_item.drop_progress, 0, 6285 sizeof(struct btrfs_disk_key)); 6286 reloc_root->root_item.drop_level = 0; 6287 6288 ret = btrfs_update_root(trans, root->fs_info->tree_root, 6289 &reloc_root->root_key, 6290 &reloc_root->root_item); 6291 BUG_ON(ret); 6292 } 6293 return 0; 6294 } 6295 6296 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) 6297 { 6298 struct btrfs_trans_handle *trans; 6299 struct btrfs_root *reloc_root; 6300 struct btrfs_root *prev_root = NULL; 6301 struct list_head dead_roots; 6302 int ret; 6303 unsigned long nr; 6304 6305 INIT_LIST_HEAD(&dead_roots); 6306 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); 6307 6308 while (!list_empty(&dead_roots)) { 6309 reloc_root = list_entry(dead_roots.prev, 6310 struct btrfs_root, dead_list); 6311 list_del_init(&reloc_root->dead_list); 6312 6313 BUG_ON(reloc_root->commit_root != NULL); 6314 while (1) { 6315 trans = btrfs_join_transaction(root, 1); 6316 BUG_ON(!trans); 6317 6318 mutex_lock(&root->fs_info->drop_mutex); 6319 ret = btrfs_drop_snapshot(trans, reloc_root); 6320 if (ret != -EAGAIN) 6321 break; 6322 mutex_unlock(&root->fs_info->drop_mutex); 6323 6324 nr = trans->blocks_used; 6325 ret = btrfs_end_transaction(trans, root); 6326 BUG_ON(ret); 6327 btrfs_btree_balance_dirty(root, nr); 6328 } 6329 6330 free_extent_buffer(reloc_root->node); 6331 6332 ret = btrfs_del_root(trans, root->fs_info->tree_root, 6333 &reloc_root->root_key); 6334 BUG_ON(ret); 6335 mutex_unlock(&root->fs_info->drop_mutex); 6336 6337 nr = trans->blocks_used; 6338 ret = btrfs_end_transaction(trans, root); 6339 BUG_ON(ret); 6340 btrfs_btree_balance_dirty(root, nr); 6341 6342 kfree(prev_root); 6343 prev_root = reloc_root; 6344 } 6345 if (prev_root) { 6346 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); 6347 kfree(prev_root); 6348 } 6349 return 0; 6350 } 6351 6352 int btrfs_add_dead_reloc_root(struct btrfs_root *root) 6353 { 6354 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); 6355 return 0; 6356 } 6357 6358 int btrfs_cleanup_reloc_trees(struct btrfs_root *root) 6359 { 6360 struct btrfs_root *reloc_root; 6361 struct btrfs_trans_handle *trans; 6362 struct btrfs_key location; 6363 int found; 6364 int ret; 6365 6366 mutex_lock(&root->fs_info->tree_reloc_mutex); 6367 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); 6368 BUG_ON(ret); 6369 found = !list_empty(&root->fs_info->dead_reloc_roots); 6370 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6371 6372 if (found) { 6373 trans = btrfs_start_transaction(root, 1); 6374 BUG_ON(!trans); 6375 ret = btrfs_commit_transaction(trans, root); 6376 BUG_ON(ret); 6377 } 6378 6379 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6380 location.offset = (u64)-1; 6381 location.type = BTRFS_ROOT_ITEM_KEY; 6382 6383 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 6384 BUG_ON(!reloc_root); 6385 btrfs_orphan_cleanup(reloc_root); 6386 return 0; 6387 } 6388 6389 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, 6390 struct btrfs_root *root) 6391 { 6392 struct btrfs_root *reloc_root; 6393 struct extent_buffer *eb; 6394 struct btrfs_root_item *root_item; 6395 struct btrfs_key root_key; 6396 int ret; 6397 6398 BUG_ON(!root->ref_cows); 6399 if (root->reloc_root) 6400 return 0; 6401 6402 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 6403 BUG_ON(!root_item); 6404 6405 ret = btrfs_copy_root(trans, root, root->commit_root, 6406 &eb, BTRFS_TREE_RELOC_OBJECTID); 6407 BUG_ON(ret); 6408 6409 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 6410 root_key.offset = root->root_key.objectid; 6411 root_key.type = BTRFS_ROOT_ITEM_KEY; 6412 6413 memcpy(root_item, &root->root_item, sizeof(root_item)); 6414 btrfs_set_root_refs(root_item, 0); 6415 btrfs_set_root_bytenr(root_item, eb->start); 6416 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 6417 btrfs_set_root_generation(root_item, trans->transid); 6418 6419 btrfs_tree_unlock(eb); 6420 free_extent_buffer(eb); 6421 6422 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 6423 &root_key, root_item); 6424 BUG_ON(ret); 6425 kfree(root_item); 6426 6427 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 6428 &root_key); 6429 BUG_ON(!reloc_root); 6430 reloc_root->last_trans = trans->transid; 6431 reloc_root->commit_root = NULL; 6432 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 6433 6434 root->reloc_root = reloc_root; 6435 return 0; 6436 } 6437 6438 /* 6439 * Core function of space balance. 6440 * 6441 * The idea is using reloc trees to relocate tree blocks in reference 6442 * counted roots. There is one reloc tree for each subvol, and all 6443 * reloc trees share same root key objectid. Reloc trees are snapshots 6444 * of the latest committed roots of subvols (root->commit_root). 6445 * 6446 * To relocate a tree block referenced by a subvol, there are two steps. 6447 * COW the block through subvol's reloc tree, then update block pointer 6448 * in the subvol to point to the new block. Since all reloc trees share 6449 * same root key objectid, doing special handing for tree blocks owned 6450 * by them is easy. Once a tree block has been COWed in one reloc tree, 6451 * we can use the resulting new block directly when the same block is 6452 * required to COW again through other reloc trees. By this way, relocated 6453 * tree blocks are shared between reloc trees, so they are also shared 6454 * between subvols. 6455 */ 6456 static noinline int relocate_one_path(struct btrfs_trans_handle *trans, 6457 struct btrfs_root *root, 6458 struct btrfs_path *path, 6459 struct btrfs_key *first_key, 6460 struct btrfs_ref_path *ref_path, 6461 struct btrfs_block_group_cache *group, 6462 struct inode *reloc_inode) 6463 { 6464 struct btrfs_root *reloc_root; 6465 struct extent_buffer *eb = NULL; 6466 struct btrfs_key *keys; 6467 u64 *nodes; 6468 int level; 6469 int shared_level; 6470 int lowest_level = 0; 6471 int ret; 6472 6473 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 6474 lowest_level = ref_path->owner_objectid; 6475 6476 if (!root->ref_cows) { 6477 path->lowest_level = lowest_level; 6478 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); 6479 BUG_ON(ret < 0); 6480 path->lowest_level = 0; 6481 btrfs_release_path(root, path); 6482 return 0; 6483 } 6484 6485 mutex_lock(&root->fs_info->tree_reloc_mutex); 6486 ret = init_reloc_tree(trans, root); 6487 BUG_ON(ret); 6488 reloc_root = root->reloc_root; 6489 6490 shared_level = ref_path->shared_level; 6491 ref_path->shared_level = BTRFS_MAX_LEVEL - 1; 6492 6493 keys = ref_path->node_keys; 6494 nodes = ref_path->new_nodes; 6495 memset(&keys[shared_level + 1], 0, 6496 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6497 memset(&nodes[shared_level + 1], 0, 6498 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6499 6500 if (nodes[lowest_level] == 0) { 6501 path->lowest_level = lowest_level; 6502 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6503 0, 1); 6504 BUG_ON(ret); 6505 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { 6506 eb = path->nodes[level]; 6507 if (!eb || eb == reloc_root->node) 6508 break; 6509 nodes[level] = eb->start; 6510 if (level == 0) 6511 btrfs_item_key_to_cpu(eb, &keys[level], 0); 6512 else 6513 btrfs_node_key_to_cpu(eb, &keys[level], 0); 6514 } 6515 if (nodes[0] && 6516 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6517 eb = path->nodes[0]; 6518 ret = replace_extents_in_leaf(trans, reloc_root, eb, 6519 group, reloc_inode); 6520 BUG_ON(ret); 6521 } 6522 btrfs_release_path(reloc_root, path); 6523 } else { 6524 ret = btrfs_merge_path(trans, reloc_root, keys, nodes, 6525 lowest_level); 6526 BUG_ON(ret); 6527 } 6528 6529 /* 6530 * replace tree blocks in the fs tree with tree blocks in 6531 * the reloc tree. 6532 */ 6533 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); 6534 BUG_ON(ret < 0); 6535 6536 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6537 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6538 0, 0); 6539 BUG_ON(ret); 6540 extent_buffer_get(path->nodes[0]); 6541 eb = path->nodes[0]; 6542 btrfs_release_path(reloc_root, path); 6543 ret = invalidate_extent_cache(reloc_root, eb, group, root); 6544 BUG_ON(ret); 6545 free_extent_buffer(eb); 6546 } 6547 6548 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6549 path->lowest_level = 0; 6550 return 0; 6551 } 6552 6553 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, 6554 struct btrfs_root *root, 6555 struct btrfs_path *path, 6556 struct btrfs_key *first_key, 6557 struct btrfs_ref_path *ref_path) 6558 { 6559 int ret; 6560 6561 ret = relocate_one_path(trans, root, path, first_key, 6562 ref_path, NULL, NULL); 6563 BUG_ON(ret); 6564 6565 return 0; 6566 } 6567 6568 static noinline int del_extent_zero(struct btrfs_trans_handle *trans, 6569 struct btrfs_root *extent_root, 6570 struct btrfs_path *path, 6571 struct btrfs_key *extent_key) 6572 { 6573 int ret; 6574 6575 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 6576 if (ret) 6577 goto out; 6578 ret = btrfs_del_item(trans, extent_root, path); 6579 out: 6580 btrfs_release_path(extent_root, path); 6581 return ret; 6582 } 6583 6584 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, 6585 struct btrfs_ref_path *ref_path) 6586 { 6587 struct btrfs_key root_key; 6588 6589 root_key.objectid = ref_path->root_objectid; 6590 root_key.type = BTRFS_ROOT_ITEM_KEY; 6591 if (is_cowonly_root(ref_path->root_objectid)) 6592 root_key.offset = 0; 6593 else 6594 root_key.offset = (u64)-1; 6595 6596 return btrfs_read_fs_root_no_name(fs_info, &root_key); 6597 } 6598 6599 static noinline int relocate_one_extent(struct btrfs_root *extent_root, 6600 struct btrfs_path *path, 6601 struct btrfs_key *extent_key, 6602 struct btrfs_block_group_cache *group, 6603 struct inode *reloc_inode, int pass) 6604 { 6605 struct btrfs_trans_handle *trans; 6606 struct btrfs_root *found_root; 6607 struct btrfs_ref_path *ref_path = NULL; 6608 struct disk_extent *new_extents = NULL; 6609 int nr_extents = 0; 6610 int loops; 6611 int ret; 6612 int level; 6613 struct btrfs_key first_key; 6614 u64 prev_block = 0; 6615 6616 6617 trans = btrfs_start_transaction(extent_root, 1); 6618 BUG_ON(!trans); 6619 6620 if (extent_key->objectid == 0) { 6621 ret = del_extent_zero(trans, extent_root, path, extent_key); 6622 goto out; 6623 } 6624 6625 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); 6626 if (!ref_path) { 6627 ret = -ENOMEM; 6628 goto out; 6629 } 6630 6631 for (loops = 0; ; loops++) { 6632 if (loops == 0) { 6633 ret = btrfs_first_ref_path(trans, extent_root, ref_path, 6634 extent_key->objectid); 6635 } else { 6636 ret = btrfs_next_ref_path(trans, extent_root, ref_path); 6637 } 6638 if (ret < 0) 6639 goto out; 6640 if (ret > 0) 6641 break; 6642 6643 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || 6644 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) 6645 continue; 6646 6647 found_root = read_ref_root(extent_root->fs_info, ref_path); 6648 BUG_ON(!found_root); 6649 /* 6650 * for reference counted tree, only process reference paths 6651 * rooted at the latest committed root. 6652 */ 6653 if (found_root->ref_cows && 6654 ref_path->root_generation != found_root->root_key.offset) 6655 continue; 6656 6657 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6658 if (pass == 0) { 6659 /* 6660 * copy data extents to new locations 6661 */ 6662 u64 group_start = group->key.objectid; 6663 ret = relocate_data_extent(reloc_inode, 6664 extent_key, 6665 group_start); 6666 if (ret < 0) 6667 goto out; 6668 break; 6669 } 6670 level = 0; 6671 } else { 6672 level = ref_path->owner_objectid; 6673 } 6674 6675 if (prev_block != ref_path->nodes[level]) { 6676 struct extent_buffer *eb; 6677 u64 block_start = ref_path->nodes[level]; 6678 u64 block_size = btrfs_level_size(found_root, level); 6679 6680 eb = read_tree_block(found_root, block_start, 6681 block_size, 0); 6682 btrfs_tree_lock(eb); 6683 BUG_ON(level != btrfs_header_level(eb)); 6684 6685 if (level == 0) 6686 btrfs_item_key_to_cpu(eb, &first_key, 0); 6687 else 6688 btrfs_node_key_to_cpu(eb, &first_key, 0); 6689 6690 btrfs_tree_unlock(eb); 6691 free_extent_buffer(eb); 6692 prev_block = block_start; 6693 } 6694 6695 mutex_lock(&extent_root->fs_info->trans_mutex); 6696 btrfs_record_root_in_trans(found_root); 6697 mutex_unlock(&extent_root->fs_info->trans_mutex); 6698 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6699 /* 6700 * try to update data extent references while 6701 * keeping metadata shared between snapshots. 6702 */ 6703 if (pass == 1) { 6704 ret = relocate_one_path(trans, found_root, 6705 path, &first_key, ref_path, 6706 group, reloc_inode); 6707 if (ret < 0) 6708 goto out; 6709 continue; 6710 } 6711 /* 6712 * use fallback method to process the remaining 6713 * references. 6714 */ 6715 if (!new_extents) { 6716 u64 group_start = group->key.objectid; 6717 new_extents = kmalloc(sizeof(*new_extents), 6718 GFP_NOFS); 6719 nr_extents = 1; 6720 ret = get_new_locations(reloc_inode, 6721 extent_key, 6722 group_start, 1, 6723 &new_extents, 6724 &nr_extents); 6725 if (ret) 6726 goto out; 6727 } 6728 ret = replace_one_extent(trans, found_root, 6729 path, extent_key, 6730 &first_key, ref_path, 6731 new_extents, nr_extents); 6732 } else { 6733 ret = relocate_tree_block(trans, found_root, path, 6734 &first_key, ref_path); 6735 } 6736 if (ret < 0) 6737 goto out; 6738 } 6739 ret = 0; 6740 out: 6741 btrfs_end_transaction(trans, extent_root); 6742 kfree(new_extents); 6743 kfree(ref_path); 6744 return ret; 6745 } 6746 #endif 6747 6748 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6749 { 6750 u64 num_devices; 6751 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 6752 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 6753 6754 num_devices = root->fs_info->fs_devices->rw_devices; 6755 if (num_devices == 1) { 6756 stripped |= BTRFS_BLOCK_GROUP_DUP; 6757 stripped = flags & ~stripped; 6758 6759 /* turn raid0 into single device chunks */ 6760 if (flags & BTRFS_BLOCK_GROUP_RAID0) 6761 return stripped; 6762 6763 /* turn mirroring into duplication */ 6764 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 6765 BTRFS_BLOCK_GROUP_RAID10)) 6766 return stripped | BTRFS_BLOCK_GROUP_DUP; 6767 return flags; 6768 } else { 6769 /* they already had raid on here, just return */ 6770 if (flags & stripped) 6771 return flags; 6772 6773 stripped |= BTRFS_BLOCK_GROUP_DUP; 6774 stripped = flags & ~stripped; 6775 6776 /* switch duplicated blocks with raid1 */ 6777 if (flags & BTRFS_BLOCK_GROUP_DUP) 6778 return stripped | BTRFS_BLOCK_GROUP_RAID1; 6779 6780 /* turn single device chunks into raid0 */ 6781 return stripped | BTRFS_BLOCK_GROUP_RAID0; 6782 } 6783 return flags; 6784 } 6785 6786 static int __alloc_chunk_for_shrink(struct btrfs_root *root, 6787 struct btrfs_block_group_cache *shrink_block_group, 6788 int force) 6789 { 6790 struct btrfs_trans_handle *trans; 6791 u64 new_alloc_flags; 6792 u64 calc; 6793 6794 spin_lock(&shrink_block_group->lock); 6795 if (btrfs_block_group_used(&shrink_block_group->item) + 6796 shrink_block_group->reserved > 0) { 6797 spin_unlock(&shrink_block_group->lock); 6798 6799 trans = btrfs_start_transaction(root, 1); 6800 spin_lock(&shrink_block_group->lock); 6801 6802 new_alloc_flags = update_block_group_flags(root, 6803 shrink_block_group->flags); 6804 if (new_alloc_flags != shrink_block_group->flags) { 6805 calc = 6806 btrfs_block_group_used(&shrink_block_group->item); 6807 } else { 6808 calc = shrink_block_group->key.offset; 6809 } 6810 spin_unlock(&shrink_block_group->lock); 6811 6812 do_chunk_alloc(trans, root->fs_info->extent_root, 6813 calc + 2 * 1024 * 1024, new_alloc_flags, force); 6814 6815 btrfs_end_transaction(trans, root); 6816 } else 6817 spin_unlock(&shrink_block_group->lock); 6818 return 0; 6819 } 6820 6821 6822 int btrfs_prepare_block_group_relocation(struct btrfs_root *root, 6823 struct btrfs_block_group_cache *group) 6824 6825 { 6826 __alloc_chunk_for_shrink(root, group, 1); 6827 set_block_group_readonly(group); 6828 return 0; 6829 } 6830 6831 #if 0 6832 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 6833 struct btrfs_root *root, 6834 u64 objectid, u64 size) 6835 { 6836 struct btrfs_path *path; 6837 struct btrfs_inode_item *item; 6838 struct extent_buffer *leaf; 6839 int ret; 6840 6841 path = btrfs_alloc_path(); 6842 if (!path) 6843 return -ENOMEM; 6844 6845 path->leave_spinning = 1; 6846 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 6847 if (ret) 6848 goto out; 6849 6850 leaf = path->nodes[0]; 6851 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 6852 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 6853 btrfs_set_inode_generation(leaf, item, 1); 6854 btrfs_set_inode_size(leaf, item, size); 6855 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 6856 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 6857 btrfs_mark_buffer_dirty(leaf); 6858 btrfs_release_path(root, path); 6859 out: 6860 btrfs_free_path(path); 6861 return ret; 6862 } 6863 6864 static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 6865 struct btrfs_block_group_cache *group) 6866 { 6867 struct inode *inode = NULL; 6868 struct btrfs_trans_handle *trans; 6869 struct btrfs_root *root; 6870 struct btrfs_key root_key; 6871 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 6872 int err = 0; 6873 6874 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6875 root_key.type = BTRFS_ROOT_ITEM_KEY; 6876 root_key.offset = (u64)-1; 6877 root = btrfs_read_fs_root_no_name(fs_info, &root_key); 6878 if (IS_ERR(root)) 6879 return ERR_CAST(root); 6880 6881 trans = btrfs_start_transaction(root, 1); 6882 BUG_ON(!trans); 6883 6884 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 6885 if (err) 6886 goto out; 6887 6888 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 6889 BUG_ON(err); 6890 6891 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, 6892 group->key.offset, 0, group->key.offset, 6893 0, 0, 0); 6894 BUG_ON(err); 6895 6896 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); 6897 if (inode->i_state & I_NEW) { 6898 BTRFS_I(inode)->root = root; 6899 BTRFS_I(inode)->location.objectid = objectid; 6900 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 6901 BTRFS_I(inode)->location.offset = 0; 6902 btrfs_read_locked_inode(inode); 6903 unlock_new_inode(inode); 6904 BUG_ON(is_bad_inode(inode)); 6905 } else { 6906 BUG_ON(1); 6907 } 6908 BTRFS_I(inode)->index_cnt = group->key.objectid; 6909 6910 err = btrfs_orphan_add(trans, inode); 6911 out: 6912 btrfs_end_transaction(trans, root); 6913 if (err) { 6914 if (inode) 6915 iput(inode); 6916 inode = ERR_PTR(err); 6917 } 6918 return inode; 6919 } 6920 6921 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 6922 { 6923 6924 struct btrfs_ordered_sum *sums; 6925 struct btrfs_sector_sum *sector_sum; 6926 struct btrfs_ordered_extent *ordered; 6927 struct btrfs_root *root = BTRFS_I(inode)->root; 6928 struct list_head list; 6929 size_t offset; 6930 int ret; 6931 u64 disk_bytenr; 6932 6933 INIT_LIST_HEAD(&list); 6934 6935 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 6936 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 6937 6938 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 6939 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 6940 disk_bytenr + len - 1, &list); 6941 6942 while (!list_empty(&list)) { 6943 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 6944 list_del_init(&sums->list); 6945 6946 sector_sum = sums->sums; 6947 sums->bytenr = ordered->start; 6948 6949 offset = 0; 6950 while (offset < sums->len) { 6951 sector_sum->bytenr += ordered->start - disk_bytenr; 6952 sector_sum++; 6953 offset += root->sectorsize; 6954 } 6955 6956 btrfs_add_ordered_sum(inode, ordered, sums); 6957 } 6958 btrfs_put_ordered_extent(ordered); 6959 return 0; 6960 } 6961 6962 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) 6963 { 6964 struct btrfs_trans_handle *trans; 6965 struct btrfs_path *path; 6966 struct btrfs_fs_info *info = root->fs_info; 6967 struct extent_buffer *leaf; 6968 struct inode *reloc_inode; 6969 struct btrfs_block_group_cache *block_group; 6970 struct btrfs_key key; 6971 u64 skipped; 6972 u64 cur_byte; 6973 u64 total_found; 6974 u32 nritems; 6975 int ret; 6976 int progress; 6977 int pass = 0; 6978 6979 root = root->fs_info->extent_root; 6980 6981 block_group = btrfs_lookup_block_group(info, group_start); 6982 BUG_ON(!block_group); 6983 6984 printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", 6985 (unsigned long long)block_group->key.objectid, 6986 (unsigned long long)block_group->flags); 6987 6988 path = btrfs_alloc_path(); 6989 BUG_ON(!path); 6990 6991 reloc_inode = create_reloc_inode(info, block_group); 6992 BUG_ON(IS_ERR(reloc_inode)); 6993 6994 __alloc_chunk_for_shrink(root, block_group, 1); 6995 set_block_group_readonly(block_group); 6996 6997 btrfs_start_delalloc_inodes(info->tree_root); 6998 btrfs_wait_ordered_extents(info->tree_root, 0); 6999 again: 7000 skipped = 0; 7001 total_found = 0; 7002 progress = 0; 7003 key.objectid = block_group->key.objectid; 7004 key.offset = 0; 7005 key.type = 0; 7006 cur_byte = key.objectid; 7007 7008 trans = btrfs_start_transaction(info->tree_root, 1); 7009 btrfs_commit_transaction(trans, info->tree_root); 7010 7011 mutex_lock(&root->fs_info->cleaner_mutex); 7012 btrfs_clean_old_snapshots(info->tree_root); 7013 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 7014 mutex_unlock(&root->fs_info->cleaner_mutex); 7015 7016 trans = btrfs_start_transaction(info->tree_root, 1); 7017 btrfs_commit_transaction(trans, info->tree_root); 7018 7019 while (1) { 7020 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 7021 if (ret < 0) 7022 goto out; 7023 next: 7024 leaf = path->nodes[0]; 7025 nritems = btrfs_header_nritems(leaf); 7026 if (path->slots[0] >= nritems) { 7027 ret = btrfs_next_leaf(root, path); 7028 if (ret < 0) 7029 goto out; 7030 if (ret == 1) { 7031 ret = 0; 7032 break; 7033 } 7034 leaf = path->nodes[0]; 7035 nritems = btrfs_header_nritems(leaf); 7036 } 7037 7038 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 7039 7040 if (key.objectid >= block_group->key.objectid + 7041 block_group->key.offset) 7042 break; 7043 7044 if (progress && need_resched()) { 7045 btrfs_release_path(root, path); 7046 cond_resched(); 7047 progress = 0; 7048 continue; 7049 } 7050 progress = 1; 7051 7052 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || 7053 key.objectid + key.offset <= cur_byte) { 7054 path->slots[0]++; 7055 goto next; 7056 } 7057 7058 total_found++; 7059 cur_byte = key.objectid + key.offset; 7060 btrfs_release_path(root, path); 7061 7062 __alloc_chunk_for_shrink(root, block_group, 0); 7063 ret = relocate_one_extent(root, path, &key, block_group, 7064 reloc_inode, pass); 7065 BUG_ON(ret < 0); 7066 if (ret > 0) 7067 skipped++; 7068 7069 key.objectid = cur_byte; 7070 key.type = 0; 7071 key.offset = 0; 7072 } 7073 7074 btrfs_release_path(root, path); 7075 7076 if (pass == 0) { 7077 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); 7078 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); 7079 } 7080 7081 if (total_found > 0) { 7082 printk(KERN_INFO "btrfs found %llu extents in pass %d\n", 7083 (unsigned long long)total_found, pass); 7084 pass++; 7085 if (total_found == skipped && pass > 2) { 7086 iput(reloc_inode); 7087 reloc_inode = create_reloc_inode(info, block_group); 7088 pass = 0; 7089 } 7090 goto again; 7091 } 7092 7093 /* delete reloc_inode */ 7094 iput(reloc_inode); 7095 7096 /* unpin extents in this range */ 7097 trans = btrfs_start_transaction(info->tree_root, 1); 7098 btrfs_commit_transaction(trans, info->tree_root); 7099 7100 spin_lock(&block_group->lock); 7101 WARN_ON(block_group->pinned > 0); 7102 WARN_ON(block_group->reserved > 0); 7103 WARN_ON(btrfs_block_group_used(&block_group->item) > 0); 7104 spin_unlock(&block_group->lock); 7105 btrfs_put_block_group(block_group); 7106 ret = 0; 7107 out: 7108 btrfs_free_path(path); 7109 return ret; 7110 } 7111 #endif 7112 7113 static int find_first_block_group(struct btrfs_root *root, 7114 struct btrfs_path *path, struct btrfs_key *key) 7115 { 7116 int ret = 0; 7117 struct btrfs_key found_key; 7118 struct extent_buffer *leaf; 7119 int slot; 7120 7121 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 7122 if (ret < 0) 7123 goto out; 7124 7125 while (1) { 7126 slot = path->slots[0]; 7127 leaf = path->nodes[0]; 7128 if (slot >= btrfs_header_nritems(leaf)) { 7129 ret = btrfs_next_leaf(root, path); 7130 if (ret == 0) 7131 continue; 7132 if (ret < 0) 7133 goto out; 7134 break; 7135 } 7136 btrfs_item_key_to_cpu(leaf, &found_key, slot); 7137 7138 if (found_key.objectid >= key->objectid && 7139 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 7140 ret = 0; 7141 goto out; 7142 } 7143 path->slots[0]++; 7144 } 7145 ret = -ENOENT; 7146 out: 7147 return ret; 7148 } 7149 7150 int btrfs_free_block_groups(struct btrfs_fs_info *info) 7151 { 7152 struct btrfs_block_group_cache *block_group; 7153 struct btrfs_space_info *space_info; 7154 struct rb_node *n; 7155 7156 spin_lock(&info->block_group_cache_lock); 7157 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 7158 block_group = rb_entry(n, struct btrfs_block_group_cache, 7159 cache_node); 7160 rb_erase(&block_group->cache_node, 7161 &info->block_group_cache_tree); 7162 spin_unlock(&info->block_group_cache_lock); 7163 7164 down_write(&block_group->space_info->groups_sem); 7165 list_del(&block_group->list); 7166 up_write(&block_group->space_info->groups_sem); 7167 7168 if (block_group->cached == BTRFS_CACHE_STARTED) 7169 wait_event(block_group->caching_q, 7170 block_group_cache_done(block_group)); 7171 7172 btrfs_remove_free_space_cache(block_group); 7173 7174 WARN_ON(atomic_read(&block_group->count) != 1); 7175 kfree(block_group); 7176 7177 spin_lock(&info->block_group_cache_lock); 7178 } 7179 spin_unlock(&info->block_group_cache_lock); 7180 7181 /* now that all the block groups are freed, go through and 7182 * free all the space_info structs. This is only called during 7183 * the final stages of unmount, and so we know nobody is 7184 * using them. We call synchronize_rcu() once before we start, 7185 * just to be on the safe side. 7186 */ 7187 synchronize_rcu(); 7188 7189 while(!list_empty(&info->space_info)) { 7190 space_info = list_entry(info->space_info.next, 7191 struct btrfs_space_info, 7192 list); 7193 7194 list_del(&space_info->list); 7195 kfree(space_info); 7196 } 7197 return 0; 7198 } 7199 7200 int btrfs_read_block_groups(struct btrfs_root *root) 7201 { 7202 struct btrfs_path *path; 7203 int ret; 7204 struct btrfs_block_group_cache *cache; 7205 struct btrfs_fs_info *info = root->fs_info; 7206 struct btrfs_space_info *space_info; 7207 struct btrfs_key key; 7208 struct btrfs_key found_key; 7209 struct extent_buffer *leaf; 7210 7211 root = info->extent_root; 7212 key.objectid = 0; 7213 key.offset = 0; 7214 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 7215 path = btrfs_alloc_path(); 7216 if (!path) 7217 return -ENOMEM; 7218 7219 while (1) { 7220 ret = find_first_block_group(root, path, &key); 7221 if (ret > 0) { 7222 ret = 0; 7223 goto error; 7224 } 7225 if (ret != 0) 7226 goto error; 7227 7228 leaf = path->nodes[0]; 7229 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 7230 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7231 if (!cache) { 7232 ret = -ENOMEM; 7233 break; 7234 } 7235 7236 atomic_set(&cache->count, 1); 7237 spin_lock_init(&cache->lock); 7238 spin_lock_init(&cache->tree_lock); 7239 cache->fs_info = info; 7240 init_waitqueue_head(&cache->caching_q); 7241 INIT_LIST_HEAD(&cache->list); 7242 INIT_LIST_HEAD(&cache->cluster_list); 7243 7244 /* 7245 * we only want to have 32k of ram per block group for keeping 7246 * track of free space, and if we pass 1/2 of that we want to 7247 * start converting things over to using bitmaps 7248 */ 7249 cache->extents_thresh = ((1024 * 32) / 2) / 7250 sizeof(struct btrfs_free_space); 7251 7252 read_extent_buffer(leaf, &cache->item, 7253 btrfs_item_ptr_offset(leaf, path->slots[0]), 7254 sizeof(cache->item)); 7255 memcpy(&cache->key, &found_key, sizeof(found_key)); 7256 7257 key.objectid = found_key.objectid + found_key.offset; 7258 btrfs_release_path(root, path); 7259 cache->flags = btrfs_block_group_flags(&cache->item); 7260 cache->sectorsize = root->sectorsize; 7261 7262 remove_sb_from_cache(root, cache); 7263 7264 /* 7265 * check for two cases, either we are full, and therefore 7266 * don't need to bother with the caching work since we won't 7267 * find any space, or we are empty, and we can just add all 7268 * the space in and be done with it. This saves us _alot_ of 7269 * time, particularly in the full case. 7270 */ 7271 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 7272 cache->cached = BTRFS_CACHE_FINISHED; 7273 } else if (btrfs_block_group_used(&cache->item) == 0) { 7274 cache->cached = BTRFS_CACHE_FINISHED; 7275 add_new_free_space(cache, root->fs_info, 7276 found_key.objectid, 7277 found_key.objectid + 7278 found_key.offset); 7279 } 7280 7281 ret = update_space_info(info, cache->flags, found_key.offset, 7282 btrfs_block_group_used(&cache->item), 7283 &space_info); 7284 BUG_ON(ret); 7285 cache->space_info = space_info; 7286 down_write(&space_info->groups_sem); 7287 list_add_tail(&cache->list, &space_info->block_groups); 7288 up_write(&space_info->groups_sem); 7289 7290 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7291 BUG_ON(ret); 7292 7293 set_avail_alloc_bits(root->fs_info, cache->flags); 7294 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7295 set_block_group_readonly(cache); 7296 } 7297 ret = 0; 7298 error: 7299 btrfs_free_path(path); 7300 return ret; 7301 } 7302 7303 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 7304 struct btrfs_root *root, u64 bytes_used, 7305 u64 type, u64 chunk_objectid, u64 chunk_offset, 7306 u64 size) 7307 { 7308 int ret; 7309 struct btrfs_root *extent_root; 7310 struct btrfs_block_group_cache *cache; 7311 7312 extent_root = root->fs_info->extent_root; 7313 7314 root->fs_info->last_trans_log_full_commit = trans->transid; 7315 7316 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7317 if (!cache) 7318 return -ENOMEM; 7319 7320 cache->key.objectid = chunk_offset; 7321 cache->key.offset = size; 7322 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7323 cache->sectorsize = root->sectorsize; 7324 7325 /* 7326 * we only want to have 32k of ram per block group for keeping track 7327 * of free space, and if we pass 1/2 of that we want to start 7328 * converting things over to using bitmaps 7329 */ 7330 cache->extents_thresh = ((1024 * 32) / 2) / 7331 sizeof(struct btrfs_free_space); 7332 atomic_set(&cache->count, 1); 7333 spin_lock_init(&cache->lock); 7334 spin_lock_init(&cache->tree_lock); 7335 init_waitqueue_head(&cache->caching_q); 7336 INIT_LIST_HEAD(&cache->list); 7337 INIT_LIST_HEAD(&cache->cluster_list); 7338 7339 btrfs_set_block_group_used(&cache->item, bytes_used); 7340 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7341 cache->flags = type; 7342 btrfs_set_block_group_flags(&cache->item, type); 7343 7344 cache->cached = BTRFS_CACHE_FINISHED; 7345 remove_sb_from_cache(root, cache); 7346 7347 add_new_free_space(cache, root->fs_info, chunk_offset, 7348 chunk_offset + size); 7349 7350 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7351 &cache->space_info); 7352 BUG_ON(ret); 7353 down_write(&cache->space_info->groups_sem); 7354 list_add_tail(&cache->list, &cache->space_info->block_groups); 7355 up_write(&cache->space_info->groups_sem); 7356 7357 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7358 BUG_ON(ret); 7359 7360 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7361 sizeof(cache->item)); 7362 BUG_ON(ret); 7363 7364 set_avail_alloc_bits(extent_root->fs_info, type); 7365 7366 return 0; 7367 } 7368 7369 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 7370 struct btrfs_root *root, u64 group_start) 7371 { 7372 struct btrfs_path *path; 7373 struct btrfs_block_group_cache *block_group; 7374 struct btrfs_free_cluster *cluster; 7375 struct btrfs_key key; 7376 int ret; 7377 7378 root = root->fs_info->extent_root; 7379 7380 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 7381 BUG_ON(!block_group); 7382 BUG_ON(!block_group->ro); 7383 7384 memcpy(&key, &block_group->key, sizeof(key)); 7385 7386 /* make sure this block group isn't part of an allocation cluster */ 7387 cluster = &root->fs_info->data_alloc_cluster; 7388 spin_lock(&cluster->refill_lock); 7389 btrfs_return_cluster_to_free_space(block_group, cluster); 7390 spin_unlock(&cluster->refill_lock); 7391 7392 /* 7393 * make sure this block group isn't part of a metadata 7394 * allocation cluster 7395 */ 7396 cluster = &root->fs_info->meta_alloc_cluster; 7397 spin_lock(&cluster->refill_lock); 7398 btrfs_return_cluster_to_free_space(block_group, cluster); 7399 spin_unlock(&cluster->refill_lock); 7400 7401 path = btrfs_alloc_path(); 7402 BUG_ON(!path); 7403 7404 spin_lock(&root->fs_info->block_group_cache_lock); 7405 rb_erase(&block_group->cache_node, 7406 &root->fs_info->block_group_cache_tree); 7407 spin_unlock(&root->fs_info->block_group_cache_lock); 7408 7409 down_write(&block_group->space_info->groups_sem); 7410 /* 7411 * we must use list_del_init so people can check to see if they 7412 * are still on the list after taking the semaphore 7413 */ 7414 list_del_init(&block_group->list); 7415 up_write(&block_group->space_info->groups_sem); 7416 7417 if (block_group->cached == BTRFS_CACHE_STARTED) 7418 wait_event(block_group->caching_q, 7419 block_group_cache_done(block_group)); 7420 7421 btrfs_remove_free_space_cache(block_group); 7422 7423 spin_lock(&block_group->space_info->lock); 7424 block_group->space_info->total_bytes -= block_group->key.offset; 7425 block_group->space_info->bytes_readonly -= block_group->key.offset; 7426 spin_unlock(&block_group->space_info->lock); 7427 7428 btrfs_clear_space_info_full(root->fs_info); 7429 7430 btrfs_put_block_group(block_group); 7431 btrfs_put_block_group(block_group); 7432 7433 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 7434 if (ret > 0) 7435 ret = -EIO; 7436 if (ret < 0) 7437 goto out; 7438 7439 ret = btrfs_del_item(trans, root, path); 7440 out: 7441 btrfs_free_path(path); 7442 return ret; 7443 } 7444