1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include <linux/rcupdate.h> 24 #include <linux/kthread.h> 25 #include <linux/slab.h> 26 #include "compat.h" 27 #include "hash.h" 28 #include "ctree.h" 29 #include "disk-io.h" 30 #include "print-tree.h" 31 #include "transaction.h" 32 #include "volumes.h" 33 #include "locking.h" 34 #include "free-space-cache.h" 35 36 static int update_block_group(struct btrfs_trans_handle *trans, 37 struct btrfs_root *root, 38 u64 bytenr, u64 num_bytes, int alloc, 39 int mark_free); 40 static int update_reserved_extents(struct btrfs_block_group_cache *cache, 41 u64 num_bytes, int reserve); 42 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 43 struct btrfs_root *root, 44 u64 bytenr, u64 num_bytes, u64 parent, 45 u64 root_objectid, u64 owner_objectid, 46 u64 owner_offset, int refs_to_drop, 47 struct btrfs_delayed_extent_op *extra_op); 48 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 49 struct extent_buffer *leaf, 50 struct btrfs_extent_item *ei); 51 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 52 struct btrfs_root *root, 53 u64 parent, u64 root_objectid, 54 u64 flags, u64 owner, u64 offset, 55 struct btrfs_key *ins, int ref_mod); 56 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 57 struct btrfs_root *root, 58 u64 parent, u64 root_objectid, 59 u64 flags, struct btrfs_disk_key *key, 60 int level, struct btrfs_key *ins); 61 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 62 struct btrfs_root *extent_root, u64 alloc_bytes, 63 u64 flags, int force); 64 static int pin_down_bytes(struct btrfs_trans_handle *trans, 65 struct btrfs_root *root, 66 struct btrfs_path *path, 67 u64 bytenr, u64 num_bytes, 68 int is_data, int reserved, 69 struct extent_buffer **must_clean); 70 static int find_next_key(struct btrfs_path *path, int level, 71 struct btrfs_key *key); 72 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 73 int dump_block_groups); 74 75 static noinline int 76 block_group_cache_done(struct btrfs_block_group_cache *cache) 77 { 78 smp_mb(); 79 return cache->cached == BTRFS_CACHE_FINISHED; 80 } 81 82 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 83 { 84 return (cache->flags & bits) == bits; 85 } 86 87 void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 88 { 89 atomic_inc(&cache->count); 90 } 91 92 void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 93 { 94 if (atomic_dec_and_test(&cache->count)) 95 kfree(cache); 96 } 97 98 /* 99 * this adds the block group to the fs_info rb tree for the block group 100 * cache 101 */ 102 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 103 struct btrfs_block_group_cache *block_group) 104 { 105 struct rb_node **p; 106 struct rb_node *parent = NULL; 107 struct btrfs_block_group_cache *cache; 108 109 spin_lock(&info->block_group_cache_lock); 110 p = &info->block_group_cache_tree.rb_node; 111 112 while (*p) { 113 parent = *p; 114 cache = rb_entry(parent, struct btrfs_block_group_cache, 115 cache_node); 116 if (block_group->key.objectid < cache->key.objectid) { 117 p = &(*p)->rb_left; 118 } else if (block_group->key.objectid > cache->key.objectid) { 119 p = &(*p)->rb_right; 120 } else { 121 spin_unlock(&info->block_group_cache_lock); 122 return -EEXIST; 123 } 124 } 125 126 rb_link_node(&block_group->cache_node, parent, p); 127 rb_insert_color(&block_group->cache_node, 128 &info->block_group_cache_tree); 129 spin_unlock(&info->block_group_cache_lock); 130 131 return 0; 132 } 133 134 /* 135 * This will return the block group at or after bytenr if contains is 0, else 136 * it will return the block group that contains the bytenr 137 */ 138 static struct btrfs_block_group_cache * 139 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 140 int contains) 141 { 142 struct btrfs_block_group_cache *cache, *ret = NULL; 143 struct rb_node *n; 144 u64 end, start; 145 146 spin_lock(&info->block_group_cache_lock); 147 n = info->block_group_cache_tree.rb_node; 148 149 while (n) { 150 cache = rb_entry(n, struct btrfs_block_group_cache, 151 cache_node); 152 end = cache->key.objectid + cache->key.offset - 1; 153 start = cache->key.objectid; 154 155 if (bytenr < start) { 156 if (!contains && (!ret || start < ret->key.objectid)) 157 ret = cache; 158 n = n->rb_left; 159 } else if (bytenr > start) { 160 if (contains && bytenr <= end) { 161 ret = cache; 162 break; 163 } 164 n = n->rb_right; 165 } else { 166 ret = cache; 167 break; 168 } 169 } 170 if (ret) 171 btrfs_get_block_group(ret); 172 spin_unlock(&info->block_group_cache_lock); 173 174 return ret; 175 } 176 177 static int add_excluded_extent(struct btrfs_root *root, 178 u64 start, u64 num_bytes) 179 { 180 u64 end = start + num_bytes - 1; 181 set_extent_bits(&root->fs_info->freed_extents[0], 182 start, end, EXTENT_UPTODATE, GFP_NOFS); 183 set_extent_bits(&root->fs_info->freed_extents[1], 184 start, end, EXTENT_UPTODATE, GFP_NOFS); 185 return 0; 186 } 187 188 static void free_excluded_extents(struct btrfs_root *root, 189 struct btrfs_block_group_cache *cache) 190 { 191 u64 start, end; 192 193 start = cache->key.objectid; 194 end = start + cache->key.offset - 1; 195 196 clear_extent_bits(&root->fs_info->freed_extents[0], 197 start, end, EXTENT_UPTODATE, GFP_NOFS); 198 clear_extent_bits(&root->fs_info->freed_extents[1], 199 start, end, EXTENT_UPTODATE, GFP_NOFS); 200 } 201 202 static int exclude_super_stripes(struct btrfs_root *root, 203 struct btrfs_block_group_cache *cache) 204 { 205 u64 bytenr; 206 u64 *logical; 207 int stripe_len; 208 int i, nr, ret; 209 210 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { 211 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; 212 cache->bytes_super += stripe_len; 213 ret = add_excluded_extent(root, cache->key.objectid, 214 stripe_len); 215 BUG_ON(ret); 216 } 217 218 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 219 bytenr = btrfs_sb_offset(i); 220 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 221 cache->key.objectid, bytenr, 222 0, &logical, &nr, &stripe_len); 223 BUG_ON(ret); 224 225 while (nr--) { 226 cache->bytes_super += stripe_len; 227 ret = add_excluded_extent(root, logical[nr], 228 stripe_len); 229 BUG_ON(ret); 230 } 231 232 kfree(logical); 233 } 234 return 0; 235 } 236 237 static struct btrfs_caching_control * 238 get_caching_control(struct btrfs_block_group_cache *cache) 239 { 240 struct btrfs_caching_control *ctl; 241 242 spin_lock(&cache->lock); 243 if (cache->cached != BTRFS_CACHE_STARTED) { 244 spin_unlock(&cache->lock); 245 return NULL; 246 } 247 248 ctl = cache->caching_ctl; 249 atomic_inc(&ctl->count); 250 spin_unlock(&cache->lock); 251 return ctl; 252 } 253 254 static void put_caching_control(struct btrfs_caching_control *ctl) 255 { 256 if (atomic_dec_and_test(&ctl->count)) 257 kfree(ctl); 258 } 259 260 /* 261 * this is only called by cache_block_group, since we could have freed extents 262 * we need to check the pinned_extents for any extents that can't be used yet 263 * since their free space will be released as soon as the transaction commits. 264 */ 265 static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 266 struct btrfs_fs_info *info, u64 start, u64 end) 267 { 268 u64 extent_start, extent_end, size, total_added = 0; 269 int ret; 270 271 while (start < end) { 272 ret = find_first_extent_bit(info->pinned_extents, start, 273 &extent_start, &extent_end, 274 EXTENT_DIRTY | EXTENT_UPTODATE); 275 if (ret) 276 break; 277 278 if (extent_start <= start) { 279 start = extent_end + 1; 280 } else if (extent_start > start && extent_start < end) { 281 size = extent_start - start; 282 total_added += size; 283 ret = btrfs_add_free_space(block_group, start, 284 size); 285 BUG_ON(ret); 286 start = extent_end + 1; 287 } else { 288 break; 289 } 290 } 291 292 if (start < end) { 293 size = end - start; 294 total_added += size; 295 ret = btrfs_add_free_space(block_group, start, size); 296 BUG_ON(ret); 297 } 298 299 return total_added; 300 } 301 302 static int caching_kthread(void *data) 303 { 304 struct btrfs_block_group_cache *block_group = data; 305 struct btrfs_fs_info *fs_info = block_group->fs_info; 306 struct btrfs_caching_control *caching_ctl = block_group->caching_ctl; 307 struct btrfs_root *extent_root = fs_info->extent_root; 308 struct btrfs_path *path; 309 struct extent_buffer *leaf; 310 struct btrfs_key key; 311 u64 total_found = 0; 312 u64 last = 0; 313 u32 nritems; 314 int ret = 0; 315 316 path = btrfs_alloc_path(); 317 if (!path) 318 return -ENOMEM; 319 320 exclude_super_stripes(extent_root, block_group); 321 spin_lock(&block_group->space_info->lock); 322 block_group->space_info->bytes_super += block_group->bytes_super; 323 spin_unlock(&block_group->space_info->lock); 324 325 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 326 327 /* 328 * We don't want to deadlock with somebody trying to allocate a new 329 * extent for the extent root while also trying to search the extent 330 * root to add free space. So we skip locking and search the commit 331 * root, since its read-only 332 */ 333 path->skip_locking = 1; 334 path->search_commit_root = 1; 335 path->reada = 2; 336 337 key.objectid = last; 338 key.offset = 0; 339 key.type = BTRFS_EXTENT_ITEM_KEY; 340 again: 341 mutex_lock(&caching_ctl->mutex); 342 /* need to make sure the commit_root doesn't disappear */ 343 down_read(&fs_info->extent_commit_sem); 344 345 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 346 if (ret < 0) 347 goto err; 348 349 leaf = path->nodes[0]; 350 nritems = btrfs_header_nritems(leaf); 351 352 while (1) { 353 smp_mb(); 354 if (fs_info->closing > 1) { 355 last = (u64)-1; 356 break; 357 } 358 359 if (path->slots[0] < nritems) { 360 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 361 } else { 362 ret = find_next_key(path, 0, &key); 363 if (ret) 364 break; 365 366 caching_ctl->progress = last; 367 btrfs_release_path(extent_root, path); 368 up_read(&fs_info->extent_commit_sem); 369 mutex_unlock(&caching_ctl->mutex); 370 if (btrfs_transaction_in_commit(fs_info)) 371 schedule_timeout(1); 372 else 373 cond_resched(); 374 goto again; 375 } 376 377 if (key.objectid < block_group->key.objectid) { 378 path->slots[0]++; 379 continue; 380 } 381 382 if (key.objectid >= block_group->key.objectid + 383 block_group->key.offset) 384 break; 385 386 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 387 total_found += add_new_free_space(block_group, 388 fs_info, last, 389 key.objectid); 390 last = key.objectid + key.offset; 391 392 if (total_found > (1024 * 1024 * 2)) { 393 total_found = 0; 394 wake_up(&caching_ctl->wait); 395 } 396 } 397 path->slots[0]++; 398 } 399 ret = 0; 400 401 total_found += add_new_free_space(block_group, fs_info, last, 402 block_group->key.objectid + 403 block_group->key.offset); 404 caching_ctl->progress = (u64)-1; 405 406 spin_lock(&block_group->lock); 407 block_group->caching_ctl = NULL; 408 block_group->cached = BTRFS_CACHE_FINISHED; 409 spin_unlock(&block_group->lock); 410 411 err: 412 btrfs_free_path(path); 413 up_read(&fs_info->extent_commit_sem); 414 415 free_excluded_extents(extent_root, block_group); 416 417 mutex_unlock(&caching_ctl->mutex); 418 wake_up(&caching_ctl->wait); 419 420 put_caching_control(caching_ctl); 421 atomic_dec(&block_group->space_info->caching_threads); 422 btrfs_put_block_group(block_group); 423 424 return 0; 425 } 426 427 static int cache_block_group(struct btrfs_block_group_cache *cache) 428 { 429 struct btrfs_fs_info *fs_info = cache->fs_info; 430 struct btrfs_caching_control *caching_ctl; 431 struct task_struct *tsk; 432 int ret = 0; 433 434 smp_mb(); 435 if (cache->cached != BTRFS_CACHE_NO) 436 return 0; 437 438 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_KERNEL); 439 BUG_ON(!caching_ctl); 440 441 INIT_LIST_HEAD(&caching_ctl->list); 442 mutex_init(&caching_ctl->mutex); 443 init_waitqueue_head(&caching_ctl->wait); 444 caching_ctl->block_group = cache; 445 caching_ctl->progress = cache->key.objectid; 446 /* one for caching kthread, one for caching block group list */ 447 atomic_set(&caching_ctl->count, 2); 448 449 spin_lock(&cache->lock); 450 if (cache->cached != BTRFS_CACHE_NO) { 451 spin_unlock(&cache->lock); 452 kfree(caching_ctl); 453 return 0; 454 } 455 cache->caching_ctl = caching_ctl; 456 cache->cached = BTRFS_CACHE_STARTED; 457 spin_unlock(&cache->lock); 458 459 down_write(&fs_info->extent_commit_sem); 460 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 461 up_write(&fs_info->extent_commit_sem); 462 463 atomic_inc(&cache->space_info->caching_threads); 464 btrfs_get_block_group(cache); 465 466 tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n", 467 cache->key.objectid); 468 if (IS_ERR(tsk)) { 469 ret = PTR_ERR(tsk); 470 printk(KERN_ERR "error running thread %d\n", ret); 471 BUG(); 472 } 473 474 return ret; 475 } 476 477 /* 478 * return the block group that starts at or after bytenr 479 */ 480 static struct btrfs_block_group_cache * 481 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 482 { 483 struct btrfs_block_group_cache *cache; 484 485 cache = block_group_cache_tree_search(info, bytenr, 0); 486 487 return cache; 488 } 489 490 /* 491 * return the block group that contains the given bytenr 492 */ 493 struct btrfs_block_group_cache *btrfs_lookup_block_group( 494 struct btrfs_fs_info *info, 495 u64 bytenr) 496 { 497 struct btrfs_block_group_cache *cache; 498 499 cache = block_group_cache_tree_search(info, bytenr, 1); 500 501 return cache; 502 } 503 504 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 505 u64 flags) 506 { 507 struct list_head *head = &info->space_info; 508 struct btrfs_space_info *found; 509 510 rcu_read_lock(); 511 list_for_each_entry_rcu(found, head, list) { 512 if (found->flags == flags) { 513 rcu_read_unlock(); 514 return found; 515 } 516 } 517 rcu_read_unlock(); 518 return NULL; 519 } 520 521 /* 522 * after adding space to the filesystem, we need to clear the full flags 523 * on all the space infos. 524 */ 525 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 526 { 527 struct list_head *head = &info->space_info; 528 struct btrfs_space_info *found; 529 530 rcu_read_lock(); 531 list_for_each_entry_rcu(found, head, list) 532 found->full = 0; 533 rcu_read_unlock(); 534 } 535 536 static u64 div_factor(u64 num, int factor) 537 { 538 if (factor == 10) 539 return num; 540 num *= factor; 541 do_div(num, 10); 542 return num; 543 } 544 545 u64 btrfs_find_block_group(struct btrfs_root *root, 546 u64 search_start, u64 search_hint, int owner) 547 { 548 struct btrfs_block_group_cache *cache; 549 u64 used; 550 u64 last = max(search_hint, search_start); 551 u64 group_start = 0; 552 int full_search = 0; 553 int factor = 9; 554 int wrapped = 0; 555 again: 556 while (1) { 557 cache = btrfs_lookup_first_block_group(root->fs_info, last); 558 if (!cache) 559 break; 560 561 spin_lock(&cache->lock); 562 last = cache->key.objectid + cache->key.offset; 563 used = btrfs_block_group_used(&cache->item); 564 565 if ((full_search || !cache->ro) && 566 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 567 if (used + cache->pinned + cache->reserved < 568 div_factor(cache->key.offset, factor)) { 569 group_start = cache->key.objectid; 570 spin_unlock(&cache->lock); 571 btrfs_put_block_group(cache); 572 goto found; 573 } 574 } 575 spin_unlock(&cache->lock); 576 btrfs_put_block_group(cache); 577 cond_resched(); 578 } 579 if (!wrapped) { 580 last = search_start; 581 wrapped = 1; 582 goto again; 583 } 584 if (!full_search && factor < 10) { 585 last = search_start; 586 full_search = 1; 587 factor = 10; 588 goto again; 589 } 590 found: 591 return group_start; 592 } 593 594 /* simple helper to search for an existing extent at a given offset */ 595 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 596 { 597 int ret; 598 struct btrfs_key key; 599 struct btrfs_path *path; 600 601 path = btrfs_alloc_path(); 602 BUG_ON(!path); 603 key.objectid = start; 604 key.offset = len; 605 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 606 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 607 0, 0); 608 btrfs_free_path(path); 609 return ret; 610 } 611 612 /* 613 * Back reference rules. Back refs have three main goals: 614 * 615 * 1) differentiate between all holders of references to an extent so that 616 * when a reference is dropped we can make sure it was a valid reference 617 * before freeing the extent. 618 * 619 * 2) Provide enough information to quickly find the holders of an extent 620 * if we notice a given block is corrupted or bad. 621 * 622 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 623 * maintenance. This is actually the same as #2, but with a slightly 624 * different use case. 625 * 626 * There are two kinds of back refs. The implicit back refs is optimized 627 * for pointers in non-shared tree blocks. For a given pointer in a block, 628 * back refs of this kind provide information about the block's owner tree 629 * and the pointer's key. These information allow us to find the block by 630 * b-tree searching. The full back refs is for pointers in tree blocks not 631 * referenced by their owner trees. The location of tree block is recorded 632 * in the back refs. Actually the full back refs is generic, and can be 633 * used in all cases the implicit back refs is used. The major shortcoming 634 * of the full back refs is its overhead. Every time a tree block gets 635 * COWed, we have to update back refs entry for all pointers in it. 636 * 637 * For a newly allocated tree block, we use implicit back refs for 638 * pointers in it. This means most tree related operations only involve 639 * implicit back refs. For a tree block created in old transaction, the 640 * only way to drop a reference to it is COW it. So we can detect the 641 * event that tree block loses its owner tree's reference and do the 642 * back refs conversion. 643 * 644 * When a tree block is COW'd through a tree, there are four cases: 645 * 646 * The reference count of the block is one and the tree is the block's 647 * owner tree. Nothing to do in this case. 648 * 649 * The reference count of the block is one and the tree is not the 650 * block's owner tree. In this case, full back refs is used for pointers 651 * in the block. Remove these full back refs, add implicit back refs for 652 * every pointers in the new block. 653 * 654 * The reference count of the block is greater than one and the tree is 655 * the block's owner tree. In this case, implicit back refs is used for 656 * pointers in the block. Add full back refs for every pointers in the 657 * block, increase lower level extents' reference counts. The original 658 * implicit back refs are entailed to the new block. 659 * 660 * The reference count of the block is greater than one and the tree is 661 * not the block's owner tree. Add implicit back refs for every pointer in 662 * the new block, increase lower level extents' reference count. 663 * 664 * Back Reference Key composing: 665 * 666 * The key objectid corresponds to the first byte in the extent, 667 * The key type is used to differentiate between types of back refs. 668 * There are different meanings of the key offset for different types 669 * of back refs. 670 * 671 * File extents can be referenced by: 672 * 673 * - multiple snapshots, subvolumes, or different generations in one subvol 674 * - different files inside a single subvolume 675 * - different offsets inside a file (bookend extents in file.c) 676 * 677 * The extent ref structure for the implicit back refs has fields for: 678 * 679 * - Objectid of the subvolume root 680 * - objectid of the file holding the reference 681 * - original offset in the file 682 * - how many bookend extents 683 * 684 * The key offset for the implicit back refs is hash of the first 685 * three fields. 686 * 687 * The extent ref structure for the full back refs has field for: 688 * 689 * - number of pointers in the tree leaf 690 * 691 * The key offset for the implicit back refs is the first byte of 692 * the tree leaf 693 * 694 * When a file extent is allocated, The implicit back refs is used. 695 * the fields are filled in: 696 * 697 * (root_key.objectid, inode objectid, offset in file, 1) 698 * 699 * When a file extent is removed file truncation, we find the 700 * corresponding implicit back refs and check the following fields: 701 * 702 * (btrfs_header_owner(leaf), inode objectid, offset in file) 703 * 704 * Btree extents can be referenced by: 705 * 706 * - Different subvolumes 707 * 708 * Both the implicit back refs and the full back refs for tree blocks 709 * only consist of key. The key offset for the implicit back refs is 710 * objectid of block's owner tree. The key offset for the full back refs 711 * is the first byte of parent block. 712 * 713 * When implicit back refs is used, information about the lowest key and 714 * level of the tree block are required. These information are stored in 715 * tree block info structure. 716 */ 717 718 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 719 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 720 struct btrfs_root *root, 721 struct btrfs_path *path, 722 u64 owner, u32 extra_size) 723 { 724 struct btrfs_extent_item *item; 725 struct btrfs_extent_item_v0 *ei0; 726 struct btrfs_extent_ref_v0 *ref0; 727 struct btrfs_tree_block_info *bi; 728 struct extent_buffer *leaf; 729 struct btrfs_key key; 730 struct btrfs_key found_key; 731 u32 new_size = sizeof(*item); 732 u64 refs; 733 int ret; 734 735 leaf = path->nodes[0]; 736 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 737 738 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 739 ei0 = btrfs_item_ptr(leaf, path->slots[0], 740 struct btrfs_extent_item_v0); 741 refs = btrfs_extent_refs_v0(leaf, ei0); 742 743 if (owner == (u64)-1) { 744 while (1) { 745 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 746 ret = btrfs_next_leaf(root, path); 747 if (ret < 0) 748 return ret; 749 BUG_ON(ret > 0); 750 leaf = path->nodes[0]; 751 } 752 btrfs_item_key_to_cpu(leaf, &found_key, 753 path->slots[0]); 754 BUG_ON(key.objectid != found_key.objectid); 755 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 756 path->slots[0]++; 757 continue; 758 } 759 ref0 = btrfs_item_ptr(leaf, path->slots[0], 760 struct btrfs_extent_ref_v0); 761 owner = btrfs_ref_objectid_v0(leaf, ref0); 762 break; 763 } 764 } 765 btrfs_release_path(root, path); 766 767 if (owner < BTRFS_FIRST_FREE_OBJECTID) 768 new_size += sizeof(*bi); 769 770 new_size -= sizeof(*ei0); 771 ret = btrfs_search_slot(trans, root, &key, path, 772 new_size + extra_size, 1); 773 if (ret < 0) 774 return ret; 775 BUG_ON(ret); 776 777 ret = btrfs_extend_item(trans, root, path, new_size); 778 BUG_ON(ret); 779 780 leaf = path->nodes[0]; 781 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 782 btrfs_set_extent_refs(leaf, item, refs); 783 /* FIXME: get real generation */ 784 btrfs_set_extent_generation(leaf, item, 0); 785 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 786 btrfs_set_extent_flags(leaf, item, 787 BTRFS_EXTENT_FLAG_TREE_BLOCK | 788 BTRFS_BLOCK_FLAG_FULL_BACKREF); 789 bi = (struct btrfs_tree_block_info *)(item + 1); 790 /* FIXME: get first key of the block */ 791 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); 792 btrfs_set_tree_block_level(leaf, bi, (int)owner); 793 } else { 794 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 795 } 796 btrfs_mark_buffer_dirty(leaf); 797 return 0; 798 } 799 #endif 800 801 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 802 { 803 u32 high_crc = ~(u32)0; 804 u32 low_crc = ~(u32)0; 805 __le64 lenum; 806 807 lenum = cpu_to_le64(root_objectid); 808 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 809 lenum = cpu_to_le64(owner); 810 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 811 lenum = cpu_to_le64(offset); 812 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 813 814 return ((u64)high_crc << 31) ^ (u64)low_crc; 815 } 816 817 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 818 struct btrfs_extent_data_ref *ref) 819 { 820 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 821 btrfs_extent_data_ref_objectid(leaf, ref), 822 btrfs_extent_data_ref_offset(leaf, ref)); 823 } 824 825 static int match_extent_data_ref(struct extent_buffer *leaf, 826 struct btrfs_extent_data_ref *ref, 827 u64 root_objectid, u64 owner, u64 offset) 828 { 829 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 830 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 831 btrfs_extent_data_ref_offset(leaf, ref) != offset) 832 return 0; 833 return 1; 834 } 835 836 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 837 struct btrfs_root *root, 838 struct btrfs_path *path, 839 u64 bytenr, u64 parent, 840 u64 root_objectid, 841 u64 owner, u64 offset) 842 { 843 struct btrfs_key key; 844 struct btrfs_extent_data_ref *ref; 845 struct extent_buffer *leaf; 846 u32 nritems; 847 int ret; 848 int recow; 849 int err = -ENOENT; 850 851 key.objectid = bytenr; 852 if (parent) { 853 key.type = BTRFS_SHARED_DATA_REF_KEY; 854 key.offset = parent; 855 } else { 856 key.type = BTRFS_EXTENT_DATA_REF_KEY; 857 key.offset = hash_extent_data_ref(root_objectid, 858 owner, offset); 859 } 860 again: 861 recow = 0; 862 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 863 if (ret < 0) { 864 err = ret; 865 goto fail; 866 } 867 868 if (parent) { 869 if (!ret) 870 return 0; 871 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 872 key.type = BTRFS_EXTENT_REF_V0_KEY; 873 btrfs_release_path(root, path); 874 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 875 if (ret < 0) { 876 err = ret; 877 goto fail; 878 } 879 if (!ret) 880 return 0; 881 #endif 882 goto fail; 883 } 884 885 leaf = path->nodes[0]; 886 nritems = btrfs_header_nritems(leaf); 887 while (1) { 888 if (path->slots[0] >= nritems) { 889 ret = btrfs_next_leaf(root, path); 890 if (ret < 0) 891 err = ret; 892 if (ret) 893 goto fail; 894 895 leaf = path->nodes[0]; 896 nritems = btrfs_header_nritems(leaf); 897 recow = 1; 898 } 899 900 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 901 if (key.objectid != bytenr || 902 key.type != BTRFS_EXTENT_DATA_REF_KEY) 903 goto fail; 904 905 ref = btrfs_item_ptr(leaf, path->slots[0], 906 struct btrfs_extent_data_ref); 907 908 if (match_extent_data_ref(leaf, ref, root_objectid, 909 owner, offset)) { 910 if (recow) { 911 btrfs_release_path(root, path); 912 goto again; 913 } 914 err = 0; 915 break; 916 } 917 path->slots[0]++; 918 } 919 fail: 920 return err; 921 } 922 923 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 924 struct btrfs_root *root, 925 struct btrfs_path *path, 926 u64 bytenr, u64 parent, 927 u64 root_objectid, u64 owner, 928 u64 offset, int refs_to_add) 929 { 930 struct btrfs_key key; 931 struct extent_buffer *leaf; 932 u32 size; 933 u32 num_refs; 934 int ret; 935 936 key.objectid = bytenr; 937 if (parent) { 938 key.type = BTRFS_SHARED_DATA_REF_KEY; 939 key.offset = parent; 940 size = sizeof(struct btrfs_shared_data_ref); 941 } else { 942 key.type = BTRFS_EXTENT_DATA_REF_KEY; 943 key.offset = hash_extent_data_ref(root_objectid, 944 owner, offset); 945 size = sizeof(struct btrfs_extent_data_ref); 946 } 947 948 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 949 if (ret && ret != -EEXIST) 950 goto fail; 951 952 leaf = path->nodes[0]; 953 if (parent) { 954 struct btrfs_shared_data_ref *ref; 955 ref = btrfs_item_ptr(leaf, path->slots[0], 956 struct btrfs_shared_data_ref); 957 if (ret == 0) { 958 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 959 } else { 960 num_refs = btrfs_shared_data_ref_count(leaf, ref); 961 num_refs += refs_to_add; 962 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 963 } 964 } else { 965 struct btrfs_extent_data_ref *ref; 966 while (ret == -EEXIST) { 967 ref = btrfs_item_ptr(leaf, path->slots[0], 968 struct btrfs_extent_data_ref); 969 if (match_extent_data_ref(leaf, ref, root_objectid, 970 owner, offset)) 971 break; 972 btrfs_release_path(root, path); 973 key.offset++; 974 ret = btrfs_insert_empty_item(trans, root, path, &key, 975 size); 976 if (ret && ret != -EEXIST) 977 goto fail; 978 979 leaf = path->nodes[0]; 980 } 981 ref = btrfs_item_ptr(leaf, path->slots[0], 982 struct btrfs_extent_data_ref); 983 if (ret == 0) { 984 btrfs_set_extent_data_ref_root(leaf, ref, 985 root_objectid); 986 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 987 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 988 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 989 } else { 990 num_refs = btrfs_extent_data_ref_count(leaf, ref); 991 num_refs += refs_to_add; 992 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 993 } 994 } 995 btrfs_mark_buffer_dirty(leaf); 996 ret = 0; 997 fail: 998 btrfs_release_path(root, path); 999 return ret; 1000 } 1001 1002 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 1003 struct btrfs_root *root, 1004 struct btrfs_path *path, 1005 int refs_to_drop) 1006 { 1007 struct btrfs_key key; 1008 struct btrfs_extent_data_ref *ref1 = NULL; 1009 struct btrfs_shared_data_ref *ref2 = NULL; 1010 struct extent_buffer *leaf; 1011 u32 num_refs = 0; 1012 int ret = 0; 1013 1014 leaf = path->nodes[0]; 1015 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1016 1017 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1018 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1019 struct btrfs_extent_data_ref); 1020 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1021 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1022 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1023 struct btrfs_shared_data_ref); 1024 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1025 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1026 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1027 struct btrfs_extent_ref_v0 *ref0; 1028 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1029 struct btrfs_extent_ref_v0); 1030 num_refs = btrfs_ref_count_v0(leaf, ref0); 1031 #endif 1032 } else { 1033 BUG(); 1034 } 1035 1036 BUG_ON(num_refs < refs_to_drop); 1037 num_refs -= refs_to_drop; 1038 1039 if (num_refs == 0) { 1040 ret = btrfs_del_item(trans, root, path); 1041 } else { 1042 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1043 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 1044 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 1045 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 1046 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1047 else { 1048 struct btrfs_extent_ref_v0 *ref0; 1049 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1050 struct btrfs_extent_ref_v0); 1051 btrfs_set_ref_count_v0(leaf, ref0, num_refs); 1052 } 1053 #endif 1054 btrfs_mark_buffer_dirty(leaf); 1055 } 1056 return ret; 1057 } 1058 1059 static noinline u32 extent_data_ref_count(struct btrfs_root *root, 1060 struct btrfs_path *path, 1061 struct btrfs_extent_inline_ref *iref) 1062 { 1063 struct btrfs_key key; 1064 struct extent_buffer *leaf; 1065 struct btrfs_extent_data_ref *ref1; 1066 struct btrfs_shared_data_ref *ref2; 1067 u32 num_refs = 0; 1068 1069 leaf = path->nodes[0]; 1070 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1071 if (iref) { 1072 if (btrfs_extent_inline_ref_type(leaf, iref) == 1073 BTRFS_EXTENT_DATA_REF_KEY) { 1074 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 1075 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1076 } else { 1077 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 1078 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1079 } 1080 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1081 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1082 struct btrfs_extent_data_ref); 1083 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1084 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1085 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1086 struct btrfs_shared_data_ref); 1087 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1088 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1089 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1090 struct btrfs_extent_ref_v0 *ref0; 1091 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1092 struct btrfs_extent_ref_v0); 1093 num_refs = btrfs_ref_count_v0(leaf, ref0); 1094 #endif 1095 } else { 1096 WARN_ON(1); 1097 } 1098 return num_refs; 1099 } 1100 1101 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 1102 struct btrfs_root *root, 1103 struct btrfs_path *path, 1104 u64 bytenr, u64 parent, 1105 u64 root_objectid) 1106 { 1107 struct btrfs_key key; 1108 int ret; 1109 1110 key.objectid = bytenr; 1111 if (parent) { 1112 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1113 key.offset = parent; 1114 } else { 1115 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1116 key.offset = root_objectid; 1117 } 1118 1119 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1120 if (ret > 0) 1121 ret = -ENOENT; 1122 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1123 if (ret == -ENOENT && parent) { 1124 btrfs_release_path(root, path); 1125 key.type = BTRFS_EXTENT_REF_V0_KEY; 1126 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1127 if (ret > 0) 1128 ret = -ENOENT; 1129 } 1130 #endif 1131 return ret; 1132 } 1133 1134 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 1135 struct btrfs_root *root, 1136 struct btrfs_path *path, 1137 u64 bytenr, u64 parent, 1138 u64 root_objectid) 1139 { 1140 struct btrfs_key key; 1141 int ret; 1142 1143 key.objectid = bytenr; 1144 if (parent) { 1145 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1146 key.offset = parent; 1147 } else { 1148 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1149 key.offset = root_objectid; 1150 } 1151 1152 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1153 btrfs_release_path(root, path); 1154 return ret; 1155 } 1156 1157 static inline int extent_ref_type(u64 parent, u64 owner) 1158 { 1159 int type; 1160 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1161 if (parent > 0) 1162 type = BTRFS_SHARED_BLOCK_REF_KEY; 1163 else 1164 type = BTRFS_TREE_BLOCK_REF_KEY; 1165 } else { 1166 if (parent > 0) 1167 type = BTRFS_SHARED_DATA_REF_KEY; 1168 else 1169 type = BTRFS_EXTENT_DATA_REF_KEY; 1170 } 1171 return type; 1172 } 1173 1174 static int find_next_key(struct btrfs_path *path, int level, 1175 struct btrfs_key *key) 1176 1177 { 1178 for (; level < BTRFS_MAX_LEVEL; level++) { 1179 if (!path->nodes[level]) 1180 break; 1181 if (path->slots[level] + 1 >= 1182 btrfs_header_nritems(path->nodes[level])) 1183 continue; 1184 if (level == 0) 1185 btrfs_item_key_to_cpu(path->nodes[level], key, 1186 path->slots[level] + 1); 1187 else 1188 btrfs_node_key_to_cpu(path->nodes[level], key, 1189 path->slots[level] + 1); 1190 return 0; 1191 } 1192 return 1; 1193 } 1194 1195 /* 1196 * look for inline back ref. if back ref is found, *ref_ret is set 1197 * to the address of inline back ref, and 0 is returned. 1198 * 1199 * if back ref isn't found, *ref_ret is set to the address where it 1200 * should be inserted, and -ENOENT is returned. 1201 * 1202 * if insert is true and there are too many inline back refs, the path 1203 * points to the extent item, and -EAGAIN is returned. 1204 * 1205 * NOTE: inline back refs are ordered in the same way that back ref 1206 * items in the tree are ordered. 1207 */ 1208 static noinline_for_stack 1209 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1210 struct btrfs_root *root, 1211 struct btrfs_path *path, 1212 struct btrfs_extent_inline_ref **ref_ret, 1213 u64 bytenr, u64 num_bytes, 1214 u64 parent, u64 root_objectid, 1215 u64 owner, u64 offset, int insert) 1216 { 1217 struct btrfs_key key; 1218 struct extent_buffer *leaf; 1219 struct btrfs_extent_item *ei; 1220 struct btrfs_extent_inline_ref *iref; 1221 u64 flags; 1222 u64 item_size; 1223 unsigned long ptr; 1224 unsigned long end; 1225 int extra_size; 1226 int type; 1227 int want; 1228 int ret; 1229 int err = 0; 1230 1231 key.objectid = bytenr; 1232 key.type = BTRFS_EXTENT_ITEM_KEY; 1233 key.offset = num_bytes; 1234 1235 want = extent_ref_type(parent, owner); 1236 if (insert) { 1237 extra_size = btrfs_extent_inline_ref_size(want); 1238 path->keep_locks = 1; 1239 } else 1240 extra_size = -1; 1241 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1242 if (ret < 0) { 1243 err = ret; 1244 goto out; 1245 } 1246 BUG_ON(ret); 1247 1248 leaf = path->nodes[0]; 1249 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1250 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1251 if (item_size < sizeof(*ei)) { 1252 if (!insert) { 1253 err = -ENOENT; 1254 goto out; 1255 } 1256 ret = convert_extent_item_v0(trans, root, path, owner, 1257 extra_size); 1258 if (ret < 0) { 1259 err = ret; 1260 goto out; 1261 } 1262 leaf = path->nodes[0]; 1263 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1264 } 1265 #endif 1266 BUG_ON(item_size < sizeof(*ei)); 1267 1268 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1269 flags = btrfs_extent_flags(leaf, ei); 1270 1271 ptr = (unsigned long)(ei + 1); 1272 end = (unsigned long)ei + item_size; 1273 1274 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1275 ptr += sizeof(struct btrfs_tree_block_info); 1276 BUG_ON(ptr > end); 1277 } else { 1278 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1279 } 1280 1281 err = -ENOENT; 1282 while (1) { 1283 if (ptr >= end) { 1284 WARN_ON(ptr > end); 1285 break; 1286 } 1287 iref = (struct btrfs_extent_inline_ref *)ptr; 1288 type = btrfs_extent_inline_ref_type(leaf, iref); 1289 if (want < type) 1290 break; 1291 if (want > type) { 1292 ptr += btrfs_extent_inline_ref_size(type); 1293 continue; 1294 } 1295 1296 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1297 struct btrfs_extent_data_ref *dref; 1298 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1299 if (match_extent_data_ref(leaf, dref, root_objectid, 1300 owner, offset)) { 1301 err = 0; 1302 break; 1303 } 1304 if (hash_extent_data_ref_item(leaf, dref) < 1305 hash_extent_data_ref(root_objectid, owner, offset)) 1306 break; 1307 } else { 1308 u64 ref_offset; 1309 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1310 if (parent > 0) { 1311 if (parent == ref_offset) { 1312 err = 0; 1313 break; 1314 } 1315 if (ref_offset < parent) 1316 break; 1317 } else { 1318 if (root_objectid == ref_offset) { 1319 err = 0; 1320 break; 1321 } 1322 if (ref_offset < root_objectid) 1323 break; 1324 } 1325 } 1326 ptr += btrfs_extent_inline_ref_size(type); 1327 } 1328 if (err == -ENOENT && insert) { 1329 if (item_size + extra_size >= 1330 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 1331 err = -EAGAIN; 1332 goto out; 1333 } 1334 /* 1335 * To add new inline back ref, we have to make sure 1336 * there is no corresponding back ref item. 1337 * For simplicity, we just do not add new inline back 1338 * ref if there is any kind of item for this block 1339 */ 1340 if (find_next_key(path, 0, &key) == 0 && 1341 key.objectid == bytenr && 1342 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1343 err = -EAGAIN; 1344 goto out; 1345 } 1346 } 1347 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 1348 out: 1349 if (insert) { 1350 path->keep_locks = 0; 1351 btrfs_unlock_up_safe(path, 1); 1352 } 1353 return err; 1354 } 1355 1356 /* 1357 * helper to add new inline back ref 1358 */ 1359 static noinline_for_stack 1360 int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1361 struct btrfs_root *root, 1362 struct btrfs_path *path, 1363 struct btrfs_extent_inline_ref *iref, 1364 u64 parent, u64 root_objectid, 1365 u64 owner, u64 offset, int refs_to_add, 1366 struct btrfs_delayed_extent_op *extent_op) 1367 { 1368 struct extent_buffer *leaf; 1369 struct btrfs_extent_item *ei; 1370 unsigned long ptr; 1371 unsigned long end; 1372 unsigned long item_offset; 1373 u64 refs; 1374 int size; 1375 int type; 1376 int ret; 1377 1378 leaf = path->nodes[0]; 1379 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1380 item_offset = (unsigned long)iref - (unsigned long)ei; 1381 1382 type = extent_ref_type(parent, owner); 1383 size = btrfs_extent_inline_ref_size(type); 1384 1385 ret = btrfs_extend_item(trans, root, path, size); 1386 BUG_ON(ret); 1387 1388 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1389 refs = btrfs_extent_refs(leaf, ei); 1390 refs += refs_to_add; 1391 btrfs_set_extent_refs(leaf, ei, refs); 1392 if (extent_op) 1393 __run_delayed_extent_op(extent_op, leaf, ei); 1394 1395 ptr = (unsigned long)ei + item_offset; 1396 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1397 if (ptr < end - size) 1398 memmove_extent_buffer(leaf, ptr + size, ptr, 1399 end - size - ptr); 1400 1401 iref = (struct btrfs_extent_inline_ref *)ptr; 1402 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1403 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1404 struct btrfs_extent_data_ref *dref; 1405 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1406 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1407 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1408 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1409 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1410 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1411 struct btrfs_shared_data_ref *sref; 1412 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1413 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1414 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1415 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1416 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1417 } else { 1418 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1419 } 1420 btrfs_mark_buffer_dirty(leaf); 1421 return 0; 1422 } 1423 1424 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1425 struct btrfs_root *root, 1426 struct btrfs_path *path, 1427 struct btrfs_extent_inline_ref **ref_ret, 1428 u64 bytenr, u64 num_bytes, u64 parent, 1429 u64 root_objectid, u64 owner, u64 offset) 1430 { 1431 int ret; 1432 1433 ret = lookup_inline_extent_backref(trans, root, path, ref_ret, 1434 bytenr, num_bytes, parent, 1435 root_objectid, owner, offset, 0); 1436 if (ret != -ENOENT) 1437 return ret; 1438 1439 btrfs_release_path(root, path); 1440 *ref_ret = NULL; 1441 1442 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1443 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, 1444 root_objectid); 1445 } else { 1446 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, 1447 root_objectid, owner, offset); 1448 } 1449 return ret; 1450 } 1451 1452 /* 1453 * helper to update/remove inline back ref 1454 */ 1455 static noinline_for_stack 1456 int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1457 struct btrfs_root *root, 1458 struct btrfs_path *path, 1459 struct btrfs_extent_inline_ref *iref, 1460 int refs_to_mod, 1461 struct btrfs_delayed_extent_op *extent_op) 1462 { 1463 struct extent_buffer *leaf; 1464 struct btrfs_extent_item *ei; 1465 struct btrfs_extent_data_ref *dref = NULL; 1466 struct btrfs_shared_data_ref *sref = NULL; 1467 unsigned long ptr; 1468 unsigned long end; 1469 u32 item_size; 1470 int size; 1471 int type; 1472 int ret; 1473 u64 refs; 1474 1475 leaf = path->nodes[0]; 1476 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1477 refs = btrfs_extent_refs(leaf, ei); 1478 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1479 refs += refs_to_mod; 1480 btrfs_set_extent_refs(leaf, ei, refs); 1481 if (extent_op) 1482 __run_delayed_extent_op(extent_op, leaf, ei); 1483 1484 type = btrfs_extent_inline_ref_type(leaf, iref); 1485 1486 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1487 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1488 refs = btrfs_extent_data_ref_count(leaf, dref); 1489 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1490 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1491 refs = btrfs_shared_data_ref_count(leaf, sref); 1492 } else { 1493 refs = 1; 1494 BUG_ON(refs_to_mod != -1); 1495 } 1496 1497 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1498 refs += refs_to_mod; 1499 1500 if (refs > 0) { 1501 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1502 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1503 else 1504 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1505 } else { 1506 size = btrfs_extent_inline_ref_size(type); 1507 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1508 ptr = (unsigned long)iref; 1509 end = (unsigned long)ei + item_size; 1510 if (ptr + size < end) 1511 memmove_extent_buffer(leaf, ptr, ptr + size, 1512 end - ptr - size); 1513 item_size -= size; 1514 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1515 BUG_ON(ret); 1516 } 1517 btrfs_mark_buffer_dirty(leaf); 1518 return 0; 1519 } 1520 1521 static noinline_for_stack 1522 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1523 struct btrfs_root *root, 1524 struct btrfs_path *path, 1525 u64 bytenr, u64 num_bytes, u64 parent, 1526 u64 root_objectid, u64 owner, 1527 u64 offset, int refs_to_add, 1528 struct btrfs_delayed_extent_op *extent_op) 1529 { 1530 struct btrfs_extent_inline_ref *iref; 1531 int ret; 1532 1533 ret = lookup_inline_extent_backref(trans, root, path, &iref, 1534 bytenr, num_bytes, parent, 1535 root_objectid, owner, offset, 1); 1536 if (ret == 0) { 1537 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1538 ret = update_inline_extent_backref(trans, root, path, iref, 1539 refs_to_add, extent_op); 1540 } else if (ret == -ENOENT) { 1541 ret = setup_inline_extent_backref(trans, root, path, iref, 1542 parent, root_objectid, 1543 owner, offset, refs_to_add, 1544 extent_op); 1545 } 1546 return ret; 1547 } 1548 1549 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1550 struct btrfs_root *root, 1551 struct btrfs_path *path, 1552 u64 bytenr, u64 parent, u64 root_objectid, 1553 u64 owner, u64 offset, int refs_to_add) 1554 { 1555 int ret; 1556 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1557 BUG_ON(refs_to_add != 1); 1558 ret = insert_tree_block_ref(trans, root, path, bytenr, 1559 parent, root_objectid); 1560 } else { 1561 ret = insert_extent_data_ref(trans, root, path, bytenr, 1562 parent, root_objectid, 1563 owner, offset, refs_to_add); 1564 } 1565 return ret; 1566 } 1567 1568 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1569 struct btrfs_root *root, 1570 struct btrfs_path *path, 1571 struct btrfs_extent_inline_ref *iref, 1572 int refs_to_drop, int is_data) 1573 { 1574 int ret; 1575 1576 BUG_ON(!is_data && refs_to_drop != 1); 1577 if (iref) { 1578 ret = update_inline_extent_backref(trans, root, path, iref, 1579 -refs_to_drop, NULL); 1580 } else if (is_data) { 1581 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1582 } else { 1583 ret = btrfs_del_item(trans, root, path); 1584 } 1585 return ret; 1586 } 1587 1588 static void btrfs_issue_discard(struct block_device *bdev, 1589 u64 start, u64 len) 1590 { 1591 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL, 1592 DISCARD_FL_BARRIER); 1593 } 1594 1595 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1596 u64 num_bytes) 1597 { 1598 int ret; 1599 u64 map_length = num_bytes; 1600 struct btrfs_multi_bio *multi = NULL; 1601 1602 if (!btrfs_test_opt(root, DISCARD)) 1603 return 0; 1604 1605 /* Tell the block device(s) that the sectors can be discarded */ 1606 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1607 bytenr, &map_length, &multi, 0); 1608 if (!ret) { 1609 struct btrfs_bio_stripe *stripe = multi->stripes; 1610 int i; 1611 1612 if (map_length > num_bytes) 1613 map_length = num_bytes; 1614 1615 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1616 btrfs_issue_discard(stripe->dev->bdev, 1617 stripe->physical, 1618 map_length); 1619 } 1620 kfree(multi); 1621 } 1622 1623 return ret; 1624 } 1625 1626 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1627 struct btrfs_root *root, 1628 u64 bytenr, u64 num_bytes, u64 parent, 1629 u64 root_objectid, u64 owner, u64 offset) 1630 { 1631 int ret; 1632 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 1633 root_objectid == BTRFS_TREE_LOG_OBJECTID); 1634 1635 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1636 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 1637 parent, root_objectid, (int)owner, 1638 BTRFS_ADD_DELAYED_REF, NULL); 1639 } else { 1640 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 1641 parent, root_objectid, owner, offset, 1642 BTRFS_ADD_DELAYED_REF, NULL); 1643 } 1644 return ret; 1645 } 1646 1647 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1648 struct btrfs_root *root, 1649 u64 bytenr, u64 num_bytes, 1650 u64 parent, u64 root_objectid, 1651 u64 owner, u64 offset, int refs_to_add, 1652 struct btrfs_delayed_extent_op *extent_op) 1653 { 1654 struct btrfs_path *path; 1655 struct extent_buffer *leaf; 1656 struct btrfs_extent_item *item; 1657 u64 refs; 1658 int ret; 1659 int err = 0; 1660 1661 path = btrfs_alloc_path(); 1662 if (!path) 1663 return -ENOMEM; 1664 1665 path->reada = 1; 1666 path->leave_spinning = 1; 1667 /* this will setup the path even if it fails to insert the back ref */ 1668 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 1669 path, bytenr, num_bytes, parent, 1670 root_objectid, owner, offset, 1671 refs_to_add, extent_op); 1672 if (ret == 0) 1673 goto out; 1674 1675 if (ret != -EAGAIN) { 1676 err = ret; 1677 goto out; 1678 } 1679 1680 leaf = path->nodes[0]; 1681 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1682 refs = btrfs_extent_refs(leaf, item); 1683 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1684 if (extent_op) 1685 __run_delayed_extent_op(extent_op, leaf, item); 1686 1687 btrfs_mark_buffer_dirty(leaf); 1688 btrfs_release_path(root->fs_info->extent_root, path); 1689 1690 path->reada = 1; 1691 path->leave_spinning = 1; 1692 1693 /* now insert the actual backref */ 1694 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1695 path, bytenr, parent, root_objectid, 1696 owner, offset, refs_to_add); 1697 BUG_ON(ret); 1698 out: 1699 btrfs_free_path(path); 1700 return err; 1701 } 1702 1703 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1704 struct btrfs_root *root, 1705 struct btrfs_delayed_ref_node *node, 1706 struct btrfs_delayed_extent_op *extent_op, 1707 int insert_reserved) 1708 { 1709 int ret = 0; 1710 struct btrfs_delayed_data_ref *ref; 1711 struct btrfs_key ins; 1712 u64 parent = 0; 1713 u64 ref_root = 0; 1714 u64 flags = 0; 1715 1716 ins.objectid = node->bytenr; 1717 ins.offset = node->num_bytes; 1718 ins.type = BTRFS_EXTENT_ITEM_KEY; 1719 1720 ref = btrfs_delayed_node_to_data_ref(node); 1721 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1722 parent = ref->parent; 1723 else 1724 ref_root = ref->root; 1725 1726 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1727 if (extent_op) { 1728 BUG_ON(extent_op->update_key); 1729 flags |= extent_op->flags_to_set; 1730 } 1731 ret = alloc_reserved_file_extent(trans, root, 1732 parent, ref_root, flags, 1733 ref->objectid, ref->offset, 1734 &ins, node->ref_mod); 1735 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1736 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1737 node->num_bytes, parent, 1738 ref_root, ref->objectid, 1739 ref->offset, node->ref_mod, 1740 extent_op); 1741 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1742 ret = __btrfs_free_extent(trans, root, node->bytenr, 1743 node->num_bytes, parent, 1744 ref_root, ref->objectid, 1745 ref->offset, node->ref_mod, 1746 extent_op); 1747 } else { 1748 BUG(); 1749 } 1750 return ret; 1751 } 1752 1753 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1754 struct extent_buffer *leaf, 1755 struct btrfs_extent_item *ei) 1756 { 1757 u64 flags = btrfs_extent_flags(leaf, ei); 1758 if (extent_op->update_flags) { 1759 flags |= extent_op->flags_to_set; 1760 btrfs_set_extent_flags(leaf, ei, flags); 1761 } 1762 1763 if (extent_op->update_key) { 1764 struct btrfs_tree_block_info *bi; 1765 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1766 bi = (struct btrfs_tree_block_info *)(ei + 1); 1767 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1768 } 1769 } 1770 1771 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1772 struct btrfs_root *root, 1773 struct btrfs_delayed_ref_node *node, 1774 struct btrfs_delayed_extent_op *extent_op) 1775 { 1776 struct btrfs_key key; 1777 struct btrfs_path *path; 1778 struct btrfs_extent_item *ei; 1779 struct extent_buffer *leaf; 1780 u32 item_size; 1781 int ret; 1782 int err = 0; 1783 1784 path = btrfs_alloc_path(); 1785 if (!path) 1786 return -ENOMEM; 1787 1788 key.objectid = node->bytenr; 1789 key.type = BTRFS_EXTENT_ITEM_KEY; 1790 key.offset = node->num_bytes; 1791 1792 path->reada = 1; 1793 path->leave_spinning = 1; 1794 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1795 path, 0, 1); 1796 if (ret < 0) { 1797 err = ret; 1798 goto out; 1799 } 1800 if (ret > 0) { 1801 err = -EIO; 1802 goto out; 1803 } 1804 1805 leaf = path->nodes[0]; 1806 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1807 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1808 if (item_size < sizeof(*ei)) { 1809 ret = convert_extent_item_v0(trans, root->fs_info->extent_root, 1810 path, (u64)-1, 0); 1811 if (ret < 0) { 1812 err = ret; 1813 goto out; 1814 } 1815 leaf = path->nodes[0]; 1816 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1817 } 1818 #endif 1819 BUG_ON(item_size < sizeof(*ei)); 1820 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1821 __run_delayed_extent_op(extent_op, leaf, ei); 1822 1823 btrfs_mark_buffer_dirty(leaf); 1824 out: 1825 btrfs_free_path(path); 1826 return err; 1827 } 1828 1829 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 1830 struct btrfs_root *root, 1831 struct btrfs_delayed_ref_node *node, 1832 struct btrfs_delayed_extent_op *extent_op, 1833 int insert_reserved) 1834 { 1835 int ret = 0; 1836 struct btrfs_delayed_tree_ref *ref; 1837 struct btrfs_key ins; 1838 u64 parent = 0; 1839 u64 ref_root = 0; 1840 1841 ins.objectid = node->bytenr; 1842 ins.offset = node->num_bytes; 1843 ins.type = BTRFS_EXTENT_ITEM_KEY; 1844 1845 ref = btrfs_delayed_node_to_tree_ref(node); 1846 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1847 parent = ref->parent; 1848 else 1849 ref_root = ref->root; 1850 1851 BUG_ON(node->ref_mod != 1); 1852 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1853 BUG_ON(!extent_op || !extent_op->update_flags || 1854 !extent_op->update_key); 1855 ret = alloc_reserved_tree_block(trans, root, 1856 parent, ref_root, 1857 extent_op->flags_to_set, 1858 &extent_op->key, 1859 ref->level, &ins); 1860 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1861 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1862 node->num_bytes, parent, ref_root, 1863 ref->level, 0, 1, extent_op); 1864 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1865 ret = __btrfs_free_extent(trans, root, node->bytenr, 1866 node->num_bytes, parent, ref_root, 1867 ref->level, 0, 1, extent_op); 1868 } else { 1869 BUG(); 1870 } 1871 return ret; 1872 } 1873 1874 1875 /* helper function to actually process a single delayed ref entry */ 1876 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1877 struct btrfs_root *root, 1878 struct btrfs_delayed_ref_node *node, 1879 struct btrfs_delayed_extent_op *extent_op, 1880 int insert_reserved) 1881 { 1882 int ret; 1883 if (btrfs_delayed_ref_is_head(node)) { 1884 struct btrfs_delayed_ref_head *head; 1885 /* 1886 * we've hit the end of the chain and we were supposed 1887 * to insert this extent into the tree. But, it got 1888 * deleted before we ever needed to insert it, so all 1889 * we have to do is clean up the accounting 1890 */ 1891 BUG_ON(extent_op); 1892 head = btrfs_delayed_node_to_head(node); 1893 if (insert_reserved) { 1894 int mark_free = 0; 1895 struct extent_buffer *must_clean = NULL; 1896 1897 ret = pin_down_bytes(trans, root, NULL, 1898 node->bytenr, node->num_bytes, 1899 head->is_data, 1, &must_clean); 1900 if (ret > 0) 1901 mark_free = 1; 1902 1903 if (must_clean) { 1904 clean_tree_block(NULL, root, must_clean); 1905 btrfs_tree_unlock(must_clean); 1906 free_extent_buffer(must_clean); 1907 } 1908 if (head->is_data) { 1909 ret = btrfs_del_csums(trans, root, 1910 node->bytenr, 1911 node->num_bytes); 1912 BUG_ON(ret); 1913 } 1914 if (mark_free) { 1915 ret = btrfs_free_reserved_extent(root, 1916 node->bytenr, 1917 node->num_bytes); 1918 BUG_ON(ret); 1919 } 1920 } 1921 mutex_unlock(&head->mutex); 1922 return 0; 1923 } 1924 1925 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1926 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1927 ret = run_delayed_tree_ref(trans, root, node, extent_op, 1928 insert_reserved); 1929 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 1930 node->type == BTRFS_SHARED_DATA_REF_KEY) 1931 ret = run_delayed_data_ref(trans, root, node, extent_op, 1932 insert_reserved); 1933 else 1934 BUG(); 1935 return ret; 1936 } 1937 1938 static noinline struct btrfs_delayed_ref_node * 1939 select_delayed_ref(struct btrfs_delayed_ref_head *head) 1940 { 1941 struct rb_node *node; 1942 struct btrfs_delayed_ref_node *ref; 1943 int action = BTRFS_ADD_DELAYED_REF; 1944 again: 1945 /* 1946 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 1947 * this prevents ref count from going down to zero when 1948 * there still are pending delayed ref. 1949 */ 1950 node = rb_prev(&head->node.rb_node); 1951 while (1) { 1952 if (!node) 1953 break; 1954 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1955 rb_node); 1956 if (ref->bytenr != head->node.bytenr) 1957 break; 1958 if (ref->action == action) 1959 return ref; 1960 node = rb_prev(node); 1961 } 1962 if (action == BTRFS_ADD_DELAYED_REF) { 1963 action = BTRFS_DROP_DELAYED_REF; 1964 goto again; 1965 } 1966 return NULL; 1967 } 1968 1969 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 1970 struct btrfs_root *root, 1971 struct list_head *cluster) 1972 { 1973 struct btrfs_delayed_ref_root *delayed_refs; 1974 struct btrfs_delayed_ref_node *ref; 1975 struct btrfs_delayed_ref_head *locked_ref = NULL; 1976 struct btrfs_delayed_extent_op *extent_op; 1977 int ret; 1978 int count = 0; 1979 int must_insert_reserved = 0; 1980 1981 delayed_refs = &trans->transaction->delayed_refs; 1982 while (1) { 1983 if (!locked_ref) { 1984 /* pick a new head ref from the cluster list */ 1985 if (list_empty(cluster)) 1986 break; 1987 1988 locked_ref = list_entry(cluster->next, 1989 struct btrfs_delayed_ref_head, cluster); 1990 1991 /* grab the lock that says we are going to process 1992 * all the refs for this head */ 1993 ret = btrfs_delayed_ref_lock(trans, locked_ref); 1994 1995 /* 1996 * we may have dropped the spin lock to get the head 1997 * mutex lock, and that might have given someone else 1998 * time to free the head. If that's true, it has been 1999 * removed from our list and we can move on. 2000 */ 2001 if (ret == -EAGAIN) { 2002 locked_ref = NULL; 2003 count++; 2004 continue; 2005 } 2006 } 2007 2008 /* 2009 * record the must insert reserved flag before we 2010 * drop the spin lock. 2011 */ 2012 must_insert_reserved = locked_ref->must_insert_reserved; 2013 locked_ref->must_insert_reserved = 0; 2014 2015 extent_op = locked_ref->extent_op; 2016 locked_ref->extent_op = NULL; 2017 2018 /* 2019 * locked_ref is the head node, so we have to go one 2020 * node back for any delayed ref updates 2021 */ 2022 ref = select_delayed_ref(locked_ref); 2023 if (!ref) { 2024 /* All delayed refs have been processed, Go ahead 2025 * and send the head node to run_one_delayed_ref, 2026 * so that any accounting fixes can happen 2027 */ 2028 ref = &locked_ref->node; 2029 2030 if (extent_op && must_insert_reserved) { 2031 kfree(extent_op); 2032 extent_op = NULL; 2033 } 2034 2035 if (extent_op) { 2036 spin_unlock(&delayed_refs->lock); 2037 2038 ret = run_delayed_extent_op(trans, root, 2039 ref, extent_op); 2040 BUG_ON(ret); 2041 kfree(extent_op); 2042 2043 cond_resched(); 2044 spin_lock(&delayed_refs->lock); 2045 continue; 2046 } 2047 2048 list_del_init(&locked_ref->cluster); 2049 locked_ref = NULL; 2050 } 2051 2052 ref->in_tree = 0; 2053 rb_erase(&ref->rb_node, &delayed_refs->root); 2054 delayed_refs->num_entries--; 2055 2056 spin_unlock(&delayed_refs->lock); 2057 2058 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2059 must_insert_reserved); 2060 BUG_ON(ret); 2061 2062 btrfs_put_delayed_ref(ref); 2063 kfree(extent_op); 2064 count++; 2065 2066 cond_resched(); 2067 spin_lock(&delayed_refs->lock); 2068 } 2069 return count; 2070 } 2071 2072 /* 2073 * this starts processing the delayed reference count updates and 2074 * extent insertions we have queued up so far. count can be 2075 * 0, which means to process everything in the tree at the start 2076 * of the run (but not newly added entries), or it can be some target 2077 * number you'd like to process. 2078 */ 2079 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2080 struct btrfs_root *root, unsigned long count) 2081 { 2082 struct rb_node *node; 2083 struct btrfs_delayed_ref_root *delayed_refs; 2084 struct btrfs_delayed_ref_node *ref; 2085 struct list_head cluster; 2086 int ret; 2087 int run_all = count == (unsigned long)-1; 2088 int run_most = 0; 2089 2090 if (root == root->fs_info->extent_root) 2091 root = root->fs_info->tree_root; 2092 2093 delayed_refs = &trans->transaction->delayed_refs; 2094 INIT_LIST_HEAD(&cluster); 2095 again: 2096 spin_lock(&delayed_refs->lock); 2097 if (count == 0) { 2098 count = delayed_refs->num_entries * 2; 2099 run_most = 1; 2100 } 2101 while (1) { 2102 if (!(run_all || run_most) && 2103 delayed_refs->num_heads_ready < 64) 2104 break; 2105 2106 /* 2107 * go find something we can process in the rbtree. We start at 2108 * the beginning of the tree, and then build a cluster 2109 * of refs to process starting at the first one we are able to 2110 * lock 2111 */ 2112 ret = btrfs_find_ref_cluster(trans, &cluster, 2113 delayed_refs->run_delayed_start); 2114 if (ret) 2115 break; 2116 2117 ret = run_clustered_refs(trans, root, &cluster); 2118 BUG_ON(ret < 0); 2119 2120 count -= min_t(unsigned long, ret, count); 2121 2122 if (count == 0) 2123 break; 2124 } 2125 2126 if (run_all) { 2127 node = rb_first(&delayed_refs->root); 2128 if (!node) 2129 goto out; 2130 count = (unsigned long)-1; 2131 2132 while (node) { 2133 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2134 rb_node); 2135 if (btrfs_delayed_ref_is_head(ref)) { 2136 struct btrfs_delayed_ref_head *head; 2137 2138 head = btrfs_delayed_node_to_head(ref); 2139 atomic_inc(&ref->refs); 2140 2141 spin_unlock(&delayed_refs->lock); 2142 mutex_lock(&head->mutex); 2143 mutex_unlock(&head->mutex); 2144 2145 btrfs_put_delayed_ref(ref); 2146 cond_resched(); 2147 goto again; 2148 } 2149 node = rb_next(node); 2150 } 2151 spin_unlock(&delayed_refs->lock); 2152 schedule_timeout(1); 2153 goto again; 2154 } 2155 out: 2156 spin_unlock(&delayed_refs->lock); 2157 return 0; 2158 } 2159 2160 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2161 struct btrfs_root *root, 2162 u64 bytenr, u64 num_bytes, u64 flags, 2163 int is_data) 2164 { 2165 struct btrfs_delayed_extent_op *extent_op; 2166 int ret; 2167 2168 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2169 if (!extent_op) 2170 return -ENOMEM; 2171 2172 extent_op->flags_to_set = flags; 2173 extent_op->update_flags = 1; 2174 extent_op->update_key = 0; 2175 extent_op->is_data = is_data ? 1 : 0; 2176 2177 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); 2178 if (ret) 2179 kfree(extent_op); 2180 return ret; 2181 } 2182 2183 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, 2184 struct btrfs_root *root, 2185 struct btrfs_path *path, 2186 u64 objectid, u64 offset, u64 bytenr) 2187 { 2188 struct btrfs_delayed_ref_head *head; 2189 struct btrfs_delayed_ref_node *ref; 2190 struct btrfs_delayed_data_ref *data_ref; 2191 struct btrfs_delayed_ref_root *delayed_refs; 2192 struct rb_node *node; 2193 int ret = 0; 2194 2195 ret = -ENOENT; 2196 delayed_refs = &trans->transaction->delayed_refs; 2197 spin_lock(&delayed_refs->lock); 2198 head = btrfs_find_delayed_ref_head(trans, bytenr); 2199 if (!head) 2200 goto out; 2201 2202 if (!mutex_trylock(&head->mutex)) { 2203 atomic_inc(&head->node.refs); 2204 spin_unlock(&delayed_refs->lock); 2205 2206 btrfs_release_path(root->fs_info->extent_root, path); 2207 2208 mutex_lock(&head->mutex); 2209 mutex_unlock(&head->mutex); 2210 btrfs_put_delayed_ref(&head->node); 2211 return -EAGAIN; 2212 } 2213 2214 node = rb_prev(&head->node.rb_node); 2215 if (!node) 2216 goto out_unlock; 2217 2218 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2219 2220 if (ref->bytenr != bytenr) 2221 goto out_unlock; 2222 2223 ret = 1; 2224 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2225 goto out_unlock; 2226 2227 data_ref = btrfs_delayed_node_to_data_ref(ref); 2228 2229 node = rb_prev(node); 2230 if (node) { 2231 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2232 if (ref->bytenr == bytenr) 2233 goto out_unlock; 2234 } 2235 2236 if (data_ref->root != root->root_key.objectid || 2237 data_ref->objectid != objectid || data_ref->offset != offset) 2238 goto out_unlock; 2239 2240 ret = 0; 2241 out_unlock: 2242 mutex_unlock(&head->mutex); 2243 out: 2244 spin_unlock(&delayed_refs->lock); 2245 return ret; 2246 } 2247 2248 static noinline int check_committed_ref(struct btrfs_trans_handle *trans, 2249 struct btrfs_root *root, 2250 struct btrfs_path *path, 2251 u64 objectid, u64 offset, u64 bytenr) 2252 { 2253 struct btrfs_root *extent_root = root->fs_info->extent_root; 2254 struct extent_buffer *leaf; 2255 struct btrfs_extent_data_ref *ref; 2256 struct btrfs_extent_inline_ref *iref; 2257 struct btrfs_extent_item *ei; 2258 struct btrfs_key key; 2259 u32 item_size; 2260 int ret; 2261 2262 key.objectid = bytenr; 2263 key.offset = (u64)-1; 2264 key.type = BTRFS_EXTENT_ITEM_KEY; 2265 2266 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2267 if (ret < 0) 2268 goto out; 2269 BUG_ON(ret == 0); 2270 2271 ret = -ENOENT; 2272 if (path->slots[0] == 0) 2273 goto out; 2274 2275 path->slots[0]--; 2276 leaf = path->nodes[0]; 2277 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2278 2279 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2280 goto out; 2281 2282 ret = 1; 2283 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2284 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2285 if (item_size < sizeof(*ei)) { 2286 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2287 goto out; 2288 } 2289 #endif 2290 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2291 2292 if (item_size != sizeof(*ei) + 2293 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2294 goto out; 2295 2296 if (btrfs_extent_generation(leaf, ei) <= 2297 btrfs_root_last_snapshot(&root->root_item)) 2298 goto out; 2299 2300 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2301 if (btrfs_extent_inline_ref_type(leaf, iref) != 2302 BTRFS_EXTENT_DATA_REF_KEY) 2303 goto out; 2304 2305 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2306 if (btrfs_extent_refs(leaf, ei) != 2307 btrfs_extent_data_ref_count(leaf, ref) || 2308 btrfs_extent_data_ref_root(leaf, ref) != 2309 root->root_key.objectid || 2310 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2311 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2312 goto out; 2313 2314 ret = 0; 2315 out: 2316 return ret; 2317 } 2318 2319 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2320 struct btrfs_root *root, 2321 u64 objectid, u64 offset, u64 bytenr) 2322 { 2323 struct btrfs_path *path; 2324 int ret; 2325 int ret2; 2326 2327 path = btrfs_alloc_path(); 2328 if (!path) 2329 return -ENOENT; 2330 2331 do { 2332 ret = check_committed_ref(trans, root, path, objectid, 2333 offset, bytenr); 2334 if (ret && ret != -ENOENT) 2335 goto out; 2336 2337 ret2 = check_delayed_ref(trans, root, path, objectid, 2338 offset, bytenr); 2339 } while (ret2 == -EAGAIN); 2340 2341 if (ret2 && ret2 != -ENOENT) { 2342 ret = ret2; 2343 goto out; 2344 } 2345 2346 if (ret != -ENOENT || ret2 != -ENOENT) 2347 ret = 0; 2348 out: 2349 btrfs_free_path(path); 2350 return ret; 2351 } 2352 2353 #if 0 2354 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2355 struct extent_buffer *buf, u32 nr_extents) 2356 { 2357 struct btrfs_key key; 2358 struct btrfs_file_extent_item *fi; 2359 u64 root_gen; 2360 u32 nritems; 2361 int i; 2362 int level; 2363 int ret = 0; 2364 int shared = 0; 2365 2366 if (!root->ref_cows) 2367 return 0; 2368 2369 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 2370 shared = 0; 2371 root_gen = root->root_key.offset; 2372 } else { 2373 shared = 1; 2374 root_gen = trans->transid - 1; 2375 } 2376 2377 level = btrfs_header_level(buf); 2378 nritems = btrfs_header_nritems(buf); 2379 2380 if (level == 0) { 2381 struct btrfs_leaf_ref *ref; 2382 struct btrfs_extent_info *info; 2383 2384 ref = btrfs_alloc_leaf_ref(root, nr_extents); 2385 if (!ref) { 2386 ret = -ENOMEM; 2387 goto out; 2388 } 2389 2390 ref->root_gen = root_gen; 2391 ref->bytenr = buf->start; 2392 ref->owner = btrfs_header_owner(buf); 2393 ref->generation = btrfs_header_generation(buf); 2394 ref->nritems = nr_extents; 2395 info = ref->extents; 2396 2397 for (i = 0; nr_extents > 0 && i < nritems; i++) { 2398 u64 disk_bytenr; 2399 btrfs_item_key_to_cpu(buf, &key, i); 2400 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2401 continue; 2402 fi = btrfs_item_ptr(buf, i, 2403 struct btrfs_file_extent_item); 2404 if (btrfs_file_extent_type(buf, fi) == 2405 BTRFS_FILE_EXTENT_INLINE) 2406 continue; 2407 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2408 if (disk_bytenr == 0) 2409 continue; 2410 2411 info->bytenr = disk_bytenr; 2412 info->num_bytes = 2413 btrfs_file_extent_disk_num_bytes(buf, fi); 2414 info->objectid = key.objectid; 2415 info->offset = key.offset; 2416 info++; 2417 } 2418 2419 ret = btrfs_add_leaf_ref(root, ref, shared); 2420 if (ret == -EEXIST && shared) { 2421 struct btrfs_leaf_ref *old; 2422 old = btrfs_lookup_leaf_ref(root, ref->bytenr); 2423 BUG_ON(!old); 2424 btrfs_remove_leaf_ref(root, old); 2425 btrfs_free_leaf_ref(root, old); 2426 ret = btrfs_add_leaf_ref(root, ref, shared); 2427 } 2428 WARN_ON(ret); 2429 btrfs_free_leaf_ref(root, ref); 2430 } 2431 out: 2432 return ret; 2433 } 2434 2435 /* when a block goes through cow, we update the reference counts of 2436 * everything that block points to. The internal pointers of the block 2437 * can be in just about any order, and it is likely to have clusters of 2438 * things that are close together and clusters of things that are not. 2439 * 2440 * To help reduce the seeks that come with updating all of these reference 2441 * counts, sort them by byte number before actual updates are done. 2442 * 2443 * struct refsort is used to match byte number to slot in the btree block. 2444 * we sort based on the byte number and then use the slot to actually 2445 * find the item. 2446 * 2447 * struct refsort is smaller than strcut btrfs_item and smaller than 2448 * struct btrfs_key_ptr. Since we're currently limited to the page size 2449 * for a btree block, there's no way for a kmalloc of refsorts for a 2450 * single node to be bigger than a page. 2451 */ 2452 struct refsort { 2453 u64 bytenr; 2454 u32 slot; 2455 }; 2456 2457 /* 2458 * for passing into sort() 2459 */ 2460 static int refsort_cmp(const void *a_void, const void *b_void) 2461 { 2462 const struct refsort *a = a_void; 2463 const struct refsort *b = b_void; 2464 2465 if (a->bytenr < b->bytenr) 2466 return -1; 2467 if (a->bytenr > b->bytenr) 2468 return 1; 2469 return 0; 2470 } 2471 #endif 2472 2473 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2474 struct btrfs_root *root, 2475 struct extent_buffer *buf, 2476 int full_backref, int inc) 2477 { 2478 u64 bytenr; 2479 u64 num_bytes; 2480 u64 parent; 2481 u64 ref_root; 2482 u32 nritems; 2483 struct btrfs_key key; 2484 struct btrfs_file_extent_item *fi; 2485 int i; 2486 int level; 2487 int ret = 0; 2488 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2489 u64, u64, u64, u64, u64, u64); 2490 2491 ref_root = btrfs_header_owner(buf); 2492 nritems = btrfs_header_nritems(buf); 2493 level = btrfs_header_level(buf); 2494 2495 if (!root->ref_cows && level == 0) 2496 return 0; 2497 2498 if (inc) 2499 process_func = btrfs_inc_extent_ref; 2500 else 2501 process_func = btrfs_free_extent; 2502 2503 if (full_backref) 2504 parent = buf->start; 2505 else 2506 parent = 0; 2507 2508 for (i = 0; i < nritems; i++) { 2509 if (level == 0) { 2510 btrfs_item_key_to_cpu(buf, &key, i); 2511 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2512 continue; 2513 fi = btrfs_item_ptr(buf, i, 2514 struct btrfs_file_extent_item); 2515 if (btrfs_file_extent_type(buf, fi) == 2516 BTRFS_FILE_EXTENT_INLINE) 2517 continue; 2518 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2519 if (bytenr == 0) 2520 continue; 2521 2522 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2523 key.offset -= btrfs_file_extent_offset(buf, fi); 2524 ret = process_func(trans, root, bytenr, num_bytes, 2525 parent, ref_root, key.objectid, 2526 key.offset); 2527 if (ret) 2528 goto fail; 2529 } else { 2530 bytenr = btrfs_node_blockptr(buf, i); 2531 num_bytes = btrfs_level_size(root, level - 1); 2532 ret = process_func(trans, root, bytenr, num_bytes, 2533 parent, ref_root, level - 1, 0); 2534 if (ret) 2535 goto fail; 2536 } 2537 } 2538 return 0; 2539 fail: 2540 BUG(); 2541 return ret; 2542 } 2543 2544 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2545 struct extent_buffer *buf, int full_backref) 2546 { 2547 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2548 } 2549 2550 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2551 struct extent_buffer *buf, int full_backref) 2552 { 2553 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2554 } 2555 2556 static int write_one_cache_group(struct btrfs_trans_handle *trans, 2557 struct btrfs_root *root, 2558 struct btrfs_path *path, 2559 struct btrfs_block_group_cache *cache) 2560 { 2561 int ret; 2562 struct btrfs_root *extent_root = root->fs_info->extent_root; 2563 unsigned long bi; 2564 struct extent_buffer *leaf; 2565 2566 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2567 if (ret < 0) 2568 goto fail; 2569 BUG_ON(ret); 2570 2571 leaf = path->nodes[0]; 2572 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2573 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2574 btrfs_mark_buffer_dirty(leaf); 2575 btrfs_release_path(extent_root, path); 2576 fail: 2577 if (ret) 2578 return ret; 2579 return 0; 2580 2581 } 2582 2583 static struct btrfs_block_group_cache * 2584 next_block_group(struct btrfs_root *root, 2585 struct btrfs_block_group_cache *cache) 2586 { 2587 struct rb_node *node; 2588 spin_lock(&root->fs_info->block_group_cache_lock); 2589 node = rb_next(&cache->cache_node); 2590 btrfs_put_block_group(cache); 2591 if (node) { 2592 cache = rb_entry(node, struct btrfs_block_group_cache, 2593 cache_node); 2594 btrfs_get_block_group(cache); 2595 } else 2596 cache = NULL; 2597 spin_unlock(&root->fs_info->block_group_cache_lock); 2598 return cache; 2599 } 2600 2601 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2602 struct btrfs_root *root) 2603 { 2604 struct btrfs_block_group_cache *cache; 2605 int err = 0; 2606 struct btrfs_path *path; 2607 u64 last = 0; 2608 2609 path = btrfs_alloc_path(); 2610 if (!path) 2611 return -ENOMEM; 2612 2613 while (1) { 2614 if (last == 0) { 2615 err = btrfs_run_delayed_refs(trans, root, 2616 (unsigned long)-1); 2617 BUG_ON(err); 2618 } 2619 2620 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2621 while (cache) { 2622 if (cache->dirty) 2623 break; 2624 cache = next_block_group(root, cache); 2625 } 2626 if (!cache) { 2627 if (last == 0) 2628 break; 2629 last = 0; 2630 continue; 2631 } 2632 2633 cache->dirty = 0; 2634 last = cache->key.objectid + cache->key.offset; 2635 2636 err = write_one_cache_group(trans, root, path, cache); 2637 BUG_ON(err); 2638 btrfs_put_block_group(cache); 2639 } 2640 2641 btrfs_free_path(path); 2642 return 0; 2643 } 2644 2645 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2646 { 2647 struct btrfs_block_group_cache *block_group; 2648 int readonly = 0; 2649 2650 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 2651 if (!block_group || block_group->ro) 2652 readonly = 1; 2653 if (block_group) 2654 btrfs_put_block_group(block_group); 2655 return readonly; 2656 } 2657 2658 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 2659 u64 total_bytes, u64 bytes_used, 2660 struct btrfs_space_info **space_info) 2661 { 2662 struct btrfs_space_info *found; 2663 2664 found = __find_space_info(info, flags); 2665 if (found) { 2666 spin_lock(&found->lock); 2667 found->total_bytes += total_bytes; 2668 found->bytes_used += bytes_used; 2669 found->full = 0; 2670 spin_unlock(&found->lock); 2671 *space_info = found; 2672 return 0; 2673 } 2674 found = kzalloc(sizeof(*found), GFP_NOFS); 2675 if (!found) 2676 return -ENOMEM; 2677 2678 INIT_LIST_HEAD(&found->block_groups); 2679 init_rwsem(&found->groups_sem); 2680 init_waitqueue_head(&found->flush_wait); 2681 init_waitqueue_head(&found->allocate_wait); 2682 spin_lock_init(&found->lock); 2683 found->flags = flags; 2684 found->total_bytes = total_bytes; 2685 found->bytes_used = bytes_used; 2686 found->bytes_pinned = 0; 2687 found->bytes_reserved = 0; 2688 found->bytes_readonly = 0; 2689 found->bytes_delalloc = 0; 2690 found->full = 0; 2691 found->force_alloc = 0; 2692 *space_info = found; 2693 list_add_rcu(&found->list, &info->space_info); 2694 atomic_set(&found->caching_threads, 0); 2695 return 0; 2696 } 2697 2698 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 2699 { 2700 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 2701 BTRFS_BLOCK_GROUP_RAID1 | 2702 BTRFS_BLOCK_GROUP_RAID10 | 2703 BTRFS_BLOCK_GROUP_DUP); 2704 if (extra_flags) { 2705 if (flags & BTRFS_BLOCK_GROUP_DATA) 2706 fs_info->avail_data_alloc_bits |= extra_flags; 2707 if (flags & BTRFS_BLOCK_GROUP_METADATA) 2708 fs_info->avail_metadata_alloc_bits |= extra_flags; 2709 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2710 fs_info->avail_system_alloc_bits |= extra_flags; 2711 } 2712 } 2713 2714 static void set_block_group_readonly(struct btrfs_block_group_cache *cache) 2715 { 2716 spin_lock(&cache->space_info->lock); 2717 spin_lock(&cache->lock); 2718 if (!cache->ro) { 2719 cache->space_info->bytes_readonly += cache->key.offset - 2720 btrfs_block_group_used(&cache->item); 2721 cache->ro = 1; 2722 } 2723 spin_unlock(&cache->lock); 2724 spin_unlock(&cache->space_info->lock); 2725 } 2726 2727 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2728 { 2729 u64 num_devices = root->fs_info->fs_devices->rw_devices; 2730 2731 if (num_devices == 1) 2732 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2733 if (num_devices < 4) 2734 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 2735 2736 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 2737 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2738 BTRFS_BLOCK_GROUP_RAID10))) { 2739 flags &= ~BTRFS_BLOCK_GROUP_DUP; 2740 } 2741 2742 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 2743 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 2744 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 2745 } 2746 2747 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 2748 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 2749 (flags & BTRFS_BLOCK_GROUP_RAID10) | 2750 (flags & BTRFS_BLOCK_GROUP_DUP))) 2751 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 2752 return flags; 2753 } 2754 2755 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) 2756 { 2757 struct btrfs_fs_info *info = root->fs_info; 2758 u64 alloc_profile; 2759 2760 if (data) { 2761 alloc_profile = info->avail_data_alloc_bits & 2762 info->data_alloc_profile; 2763 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 2764 } else if (root == root->fs_info->chunk_root) { 2765 alloc_profile = info->avail_system_alloc_bits & 2766 info->system_alloc_profile; 2767 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; 2768 } else { 2769 alloc_profile = info->avail_metadata_alloc_bits & 2770 info->metadata_alloc_profile; 2771 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 2772 } 2773 2774 return btrfs_reduce_alloc_profile(root, data); 2775 } 2776 2777 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 2778 { 2779 u64 alloc_target; 2780 2781 alloc_target = btrfs_get_alloc_profile(root, 1); 2782 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 2783 alloc_target); 2784 } 2785 2786 static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) 2787 { 2788 u64 num_bytes; 2789 int level; 2790 2791 level = BTRFS_MAX_LEVEL - 2; 2792 /* 2793 * NOTE: these calculations are absolutely the worst possible case. 2794 * This assumes that _every_ item we insert will require a new leaf, and 2795 * that the tree has grown to its maximum level size. 2796 */ 2797 2798 /* 2799 * for every item we insert we could insert both an extent item and a 2800 * extent ref item. Then for ever item we insert, we will need to cow 2801 * both the original leaf, plus the leaf to the left and right of it. 2802 * 2803 * Unless we are talking about the extent root, then we just want the 2804 * number of items * 2, since we just need the extent item plus its ref. 2805 */ 2806 if (root == root->fs_info->extent_root) 2807 num_bytes = num_items * 2; 2808 else 2809 num_bytes = (num_items + (2 * num_items)) * 3; 2810 2811 /* 2812 * num_bytes is total number of leaves we could need times the leaf 2813 * size, and then for every leaf we could end up cow'ing 2 nodes per 2814 * level, down to the leaf level. 2815 */ 2816 num_bytes = (num_bytes * root->leafsize) + 2817 (num_bytes * (level * 2)) * root->nodesize; 2818 2819 return num_bytes; 2820 } 2821 2822 /* 2823 * Unreserve metadata space for delalloc. If we have less reserved credits than 2824 * we have extents, this function does nothing. 2825 */ 2826 int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, 2827 struct inode *inode, int num_items) 2828 { 2829 struct btrfs_fs_info *info = root->fs_info; 2830 struct btrfs_space_info *meta_sinfo; 2831 u64 num_bytes; 2832 u64 alloc_target; 2833 bool bug = false; 2834 2835 /* get the space info for where the metadata will live */ 2836 alloc_target = btrfs_get_alloc_profile(root, 0); 2837 meta_sinfo = __find_space_info(info, alloc_target); 2838 2839 num_bytes = calculate_bytes_needed(root->fs_info->extent_root, 2840 num_items); 2841 2842 spin_lock(&meta_sinfo->lock); 2843 spin_lock(&BTRFS_I(inode)->accounting_lock); 2844 if (BTRFS_I(inode)->reserved_extents <= 2845 BTRFS_I(inode)->outstanding_extents) { 2846 spin_unlock(&BTRFS_I(inode)->accounting_lock); 2847 spin_unlock(&meta_sinfo->lock); 2848 return 0; 2849 } 2850 spin_unlock(&BTRFS_I(inode)->accounting_lock); 2851 2852 BTRFS_I(inode)->reserved_extents -= num_items; 2853 BUG_ON(BTRFS_I(inode)->reserved_extents < 0); 2854 2855 if (meta_sinfo->bytes_delalloc < num_bytes) { 2856 bug = true; 2857 meta_sinfo->bytes_delalloc = 0; 2858 } else { 2859 meta_sinfo->bytes_delalloc -= num_bytes; 2860 } 2861 spin_unlock(&meta_sinfo->lock); 2862 2863 BUG_ON(bug); 2864 2865 return 0; 2866 } 2867 2868 static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) 2869 { 2870 u64 thresh; 2871 2872 thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2873 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 2874 meta_sinfo->bytes_super + meta_sinfo->bytes_root + 2875 meta_sinfo->bytes_may_use; 2876 2877 thresh = meta_sinfo->total_bytes - thresh; 2878 thresh *= 80; 2879 do_div(thresh, 100); 2880 if (thresh <= meta_sinfo->bytes_delalloc) 2881 meta_sinfo->force_delalloc = 1; 2882 else 2883 meta_sinfo->force_delalloc = 0; 2884 } 2885 2886 struct async_flush { 2887 struct btrfs_root *root; 2888 struct btrfs_space_info *info; 2889 struct btrfs_work work; 2890 }; 2891 2892 static noinline void flush_delalloc_async(struct btrfs_work *work) 2893 { 2894 struct async_flush *async; 2895 struct btrfs_root *root; 2896 struct btrfs_space_info *info; 2897 2898 async = container_of(work, struct async_flush, work); 2899 root = async->root; 2900 info = async->info; 2901 2902 btrfs_start_delalloc_inodes(root, 0); 2903 wake_up(&info->flush_wait); 2904 btrfs_wait_ordered_extents(root, 0, 0); 2905 2906 spin_lock(&info->lock); 2907 info->flushing = 0; 2908 spin_unlock(&info->lock); 2909 wake_up(&info->flush_wait); 2910 2911 kfree(async); 2912 } 2913 2914 static void wait_on_flush(struct btrfs_space_info *info) 2915 { 2916 DEFINE_WAIT(wait); 2917 u64 used; 2918 2919 while (1) { 2920 prepare_to_wait(&info->flush_wait, &wait, 2921 TASK_UNINTERRUPTIBLE); 2922 spin_lock(&info->lock); 2923 if (!info->flushing) { 2924 spin_unlock(&info->lock); 2925 break; 2926 } 2927 2928 used = info->bytes_used + info->bytes_reserved + 2929 info->bytes_pinned + info->bytes_readonly + 2930 info->bytes_super + info->bytes_root + 2931 info->bytes_may_use + info->bytes_delalloc; 2932 if (used < info->total_bytes) { 2933 spin_unlock(&info->lock); 2934 break; 2935 } 2936 spin_unlock(&info->lock); 2937 schedule(); 2938 } 2939 finish_wait(&info->flush_wait, &wait); 2940 } 2941 2942 static void flush_delalloc(struct btrfs_root *root, 2943 struct btrfs_space_info *info) 2944 { 2945 struct async_flush *async; 2946 bool wait = false; 2947 2948 spin_lock(&info->lock); 2949 2950 if (!info->flushing) 2951 info->flushing = 1; 2952 else 2953 wait = true; 2954 2955 spin_unlock(&info->lock); 2956 2957 if (wait) { 2958 wait_on_flush(info); 2959 return; 2960 } 2961 2962 async = kzalloc(sizeof(*async), GFP_NOFS); 2963 if (!async) 2964 goto flush; 2965 2966 async->root = root; 2967 async->info = info; 2968 async->work.func = flush_delalloc_async; 2969 2970 btrfs_queue_worker(&root->fs_info->enospc_workers, 2971 &async->work); 2972 wait_on_flush(info); 2973 return; 2974 2975 flush: 2976 btrfs_start_delalloc_inodes(root, 0); 2977 btrfs_wait_ordered_extents(root, 0, 0); 2978 2979 spin_lock(&info->lock); 2980 info->flushing = 0; 2981 spin_unlock(&info->lock); 2982 wake_up(&info->flush_wait); 2983 } 2984 2985 static int maybe_allocate_chunk(struct btrfs_root *root, 2986 struct btrfs_space_info *info) 2987 { 2988 struct btrfs_super_block *disk_super = &root->fs_info->super_copy; 2989 struct btrfs_trans_handle *trans; 2990 bool wait = false; 2991 int ret = 0; 2992 u64 min_metadata; 2993 u64 free_space; 2994 2995 free_space = btrfs_super_total_bytes(disk_super); 2996 /* 2997 * we allow the metadata to grow to a max of either 10gb or 5% of the 2998 * space in the volume. 2999 */ 3000 min_metadata = min((u64)10 * 1024 * 1024 * 1024, 3001 div64_u64(free_space * 5, 100)); 3002 if (info->total_bytes >= min_metadata) { 3003 spin_unlock(&info->lock); 3004 return 0; 3005 } 3006 3007 if (info->full) { 3008 spin_unlock(&info->lock); 3009 return 0; 3010 } 3011 3012 if (!info->allocating_chunk) { 3013 info->force_alloc = 1; 3014 info->allocating_chunk = 1; 3015 } else { 3016 wait = true; 3017 } 3018 3019 spin_unlock(&info->lock); 3020 3021 if (wait) { 3022 wait_event(info->allocate_wait, 3023 !info->allocating_chunk); 3024 return 1; 3025 } 3026 3027 trans = btrfs_start_transaction(root, 1); 3028 if (!trans) { 3029 ret = -ENOMEM; 3030 goto out; 3031 } 3032 3033 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3034 4096 + 2 * 1024 * 1024, 3035 info->flags, 0); 3036 btrfs_end_transaction(trans, root); 3037 if (ret) 3038 goto out; 3039 out: 3040 spin_lock(&info->lock); 3041 info->allocating_chunk = 0; 3042 spin_unlock(&info->lock); 3043 wake_up(&info->allocate_wait); 3044 3045 if (ret) 3046 return 0; 3047 return 1; 3048 } 3049 3050 /* 3051 * Reserve metadata space for delalloc. 3052 */ 3053 int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, 3054 struct inode *inode, int num_items) 3055 { 3056 struct btrfs_fs_info *info = root->fs_info; 3057 struct btrfs_space_info *meta_sinfo; 3058 u64 num_bytes; 3059 u64 used; 3060 u64 alloc_target; 3061 int flushed = 0; 3062 int force_delalloc; 3063 3064 /* get the space info for where the metadata will live */ 3065 alloc_target = btrfs_get_alloc_profile(root, 0); 3066 meta_sinfo = __find_space_info(info, alloc_target); 3067 3068 num_bytes = calculate_bytes_needed(root->fs_info->extent_root, 3069 num_items); 3070 again: 3071 spin_lock(&meta_sinfo->lock); 3072 3073 force_delalloc = meta_sinfo->force_delalloc; 3074 3075 if (unlikely(!meta_sinfo->bytes_root)) 3076 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); 3077 3078 if (!flushed) 3079 meta_sinfo->bytes_delalloc += num_bytes; 3080 3081 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 3082 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 3083 meta_sinfo->bytes_super + meta_sinfo->bytes_root + 3084 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; 3085 3086 if (used > meta_sinfo->total_bytes) { 3087 flushed++; 3088 3089 if (flushed == 1) { 3090 if (maybe_allocate_chunk(root, meta_sinfo)) 3091 goto again; 3092 flushed++; 3093 } else { 3094 spin_unlock(&meta_sinfo->lock); 3095 } 3096 3097 if (flushed == 2) { 3098 filemap_flush(inode->i_mapping); 3099 goto again; 3100 } else if (flushed == 3) { 3101 flush_delalloc(root, meta_sinfo); 3102 goto again; 3103 } 3104 spin_lock(&meta_sinfo->lock); 3105 meta_sinfo->bytes_delalloc -= num_bytes; 3106 spin_unlock(&meta_sinfo->lock); 3107 printk(KERN_ERR "enospc, has %d, reserved %d\n", 3108 BTRFS_I(inode)->outstanding_extents, 3109 BTRFS_I(inode)->reserved_extents); 3110 dump_space_info(meta_sinfo, 0, 0); 3111 return -ENOSPC; 3112 } 3113 3114 BTRFS_I(inode)->reserved_extents += num_items; 3115 check_force_delalloc(meta_sinfo); 3116 spin_unlock(&meta_sinfo->lock); 3117 3118 if (!flushed && force_delalloc) 3119 filemap_flush(inode->i_mapping); 3120 3121 return 0; 3122 } 3123 3124 /* 3125 * unreserve num_items number of items worth of metadata space. This needs to 3126 * be paired with btrfs_reserve_metadata_space. 3127 * 3128 * NOTE: if you have the option, run this _AFTER_ you do a 3129 * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref 3130 * oprations which will result in more used metadata, so we want to make sure we 3131 * can do that without issue. 3132 */ 3133 int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) 3134 { 3135 struct btrfs_fs_info *info = root->fs_info; 3136 struct btrfs_space_info *meta_sinfo; 3137 u64 num_bytes; 3138 u64 alloc_target; 3139 bool bug = false; 3140 3141 /* get the space info for where the metadata will live */ 3142 alloc_target = btrfs_get_alloc_profile(root, 0); 3143 meta_sinfo = __find_space_info(info, alloc_target); 3144 3145 num_bytes = calculate_bytes_needed(root, num_items); 3146 3147 spin_lock(&meta_sinfo->lock); 3148 if (meta_sinfo->bytes_may_use < num_bytes) { 3149 bug = true; 3150 meta_sinfo->bytes_may_use = 0; 3151 } else { 3152 meta_sinfo->bytes_may_use -= num_bytes; 3153 } 3154 spin_unlock(&meta_sinfo->lock); 3155 3156 BUG_ON(bug); 3157 3158 return 0; 3159 } 3160 3161 /* 3162 * Reserve some metadata space for use. We'll calculate the worste case number 3163 * of bytes that would be needed to modify num_items number of items. If we 3164 * have space, fantastic, if not, you get -ENOSPC. Please call 3165 * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of 3166 * items you reserved, since whatever metadata you needed should have already 3167 * been allocated. 3168 * 3169 * This will commit the transaction to make more space if we don't have enough 3170 * metadata space. THe only time we don't do this is if we're reserving space 3171 * inside of a transaction, then we will just return -ENOSPC and it is the 3172 * callers responsibility to handle it properly. 3173 */ 3174 int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) 3175 { 3176 struct btrfs_fs_info *info = root->fs_info; 3177 struct btrfs_space_info *meta_sinfo; 3178 u64 num_bytes; 3179 u64 used; 3180 u64 alloc_target; 3181 int retries = 0; 3182 3183 /* get the space info for where the metadata will live */ 3184 alloc_target = btrfs_get_alloc_profile(root, 0); 3185 meta_sinfo = __find_space_info(info, alloc_target); 3186 3187 num_bytes = calculate_bytes_needed(root, num_items); 3188 again: 3189 spin_lock(&meta_sinfo->lock); 3190 3191 if (unlikely(!meta_sinfo->bytes_root)) 3192 meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); 3193 3194 if (!retries) 3195 meta_sinfo->bytes_may_use += num_bytes; 3196 3197 used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 3198 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + 3199 meta_sinfo->bytes_super + meta_sinfo->bytes_root + 3200 meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; 3201 3202 if (used > meta_sinfo->total_bytes) { 3203 retries++; 3204 if (retries == 1) { 3205 if (maybe_allocate_chunk(root, meta_sinfo)) 3206 goto again; 3207 retries++; 3208 } else { 3209 spin_unlock(&meta_sinfo->lock); 3210 } 3211 3212 if (retries == 2) { 3213 flush_delalloc(root, meta_sinfo); 3214 goto again; 3215 } 3216 spin_lock(&meta_sinfo->lock); 3217 meta_sinfo->bytes_may_use -= num_bytes; 3218 spin_unlock(&meta_sinfo->lock); 3219 3220 dump_space_info(meta_sinfo, 0, 0); 3221 return -ENOSPC; 3222 } 3223 3224 check_force_delalloc(meta_sinfo); 3225 spin_unlock(&meta_sinfo->lock); 3226 3227 return 0; 3228 } 3229 3230 /* 3231 * This will check the space that the inode allocates from to make sure we have 3232 * enough space for bytes. 3233 */ 3234 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 3235 u64 bytes) 3236 { 3237 struct btrfs_space_info *data_sinfo; 3238 u64 used; 3239 int ret = 0, committed = 0, flushed = 0; 3240 3241 /* make sure bytes are sectorsize aligned */ 3242 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3243 3244 data_sinfo = BTRFS_I(inode)->space_info; 3245 if (!data_sinfo) 3246 goto alloc; 3247 3248 again: 3249 /* make sure we have enough space to handle the data first */ 3250 spin_lock(&data_sinfo->lock); 3251 used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc + 3252 data_sinfo->bytes_reserved + data_sinfo->bytes_pinned + 3253 data_sinfo->bytes_readonly + data_sinfo->bytes_may_use + 3254 data_sinfo->bytes_super; 3255 3256 if (used + bytes > data_sinfo->total_bytes) { 3257 struct btrfs_trans_handle *trans; 3258 3259 if (!flushed) { 3260 spin_unlock(&data_sinfo->lock); 3261 flush_delalloc(root, data_sinfo); 3262 flushed = 1; 3263 goto again; 3264 } 3265 3266 /* 3267 * if we don't have enough free bytes in this space then we need 3268 * to alloc a new chunk. 3269 */ 3270 if (!data_sinfo->full) { 3271 u64 alloc_target; 3272 3273 data_sinfo->force_alloc = 1; 3274 spin_unlock(&data_sinfo->lock); 3275 alloc: 3276 alloc_target = btrfs_get_alloc_profile(root, 1); 3277 trans = btrfs_start_transaction(root, 1); 3278 if (!trans) 3279 return -ENOMEM; 3280 3281 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3282 bytes + 2 * 1024 * 1024, 3283 alloc_target, 0); 3284 btrfs_end_transaction(trans, root); 3285 if (ret) 3286 return ret; 3287 3288 if (!data_sinfo) { 3289 btrfs_set_inode_space_info(root, inode); 3290 data_sinfo = BTRFS_I(inode)->space_info; 3291 } 3292 goto again; 3293 } 3294 spin_unlock(&data_sinfo->lock); 3295 3296 /* commit the current transaction and try again */ 3297 if (!committed && !root->fs_info->open_ioctl_trans) { 3298 committed = 1; 3299 trans = btrfs_join_transaction(root, 1); 3300 if (!trans) 3301 return -ENOMEM; 3302 ret = btrfs_commit_transaction(trans, root); 3303 if (ret) 3304 return ret; 3305 goto again; 3306 } 3307 3308 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 3309 ", %llu bytes_used, %llu bytes_reserved, " 3310 "%llu bytes_pinned, %llu bytes_readonly, %llu may use " 3311 "%llu total\n", (unsigned long long)bytes, 3312 (unsigned long long)data_sinfo->bytes_delalloc, 3313 (unsigned long long)data_sinfo->bytes_used, 3314 (unsigned long long)data_sinfo->bytes_reserved, 3315 (unsigned long long)data_sinfo->bytes_pinned, 3316 (unsigned long long)data_sinfo->bytes_readonly, 3317 (unsigned long long)data_sinfo->bytes_may_use, 3318 (unsigned long long)data_sinfo->total_bytes); 3319 return -ENOSPC; 3320 } 3321 data_sinfo->bytes_may_use += bytes; 3322 BTRFS_I(inode)->reserved_bytes += bytes; 3323 spin_unlock(&data_sinfo->lock); 3324 3325 return 0; 3326 } 3327 3328 /* 3329 * if there was an error for whatever reason after calling 3330 * btrfs_check_data_free_space, call this so we can cleanup the counters. 3331 */ 3332 void btrfs_free_reserved_data_space(struct btrfs_root *root, 3333 struct inode *inode, u64 bytes) 3334 { 3335 struct btrfs_space_info *data_sinfo; 3336 3337 /* make sure bytes are sectorsize aligned */ 3338 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3339 3340 data_sinfo = BTRFS_I(inode)->space_info; 3341 spin_lock(&data_sinfo->lock); 3342 data_sinfo->bytes_may_use -= bytes; 3343 BTRFS_I(inode)->reserved_bytes -= bytes; 3344 spin_unlock(&data_sinfo->lock); 3345 } 3346 3347 /* called when we are adding a delalloc extent to the inode's io_tree */ 3348 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, 3349 u64 bytes) 3350 { 3351 struct btrfs_space_info *data_sinfo; 3352 3353 /* get the space info for where this inode will be storing its data */ 3354 data_sinfo = BTRFS_I(inode)->space_info; 3355 3356 /* make sure we have enough space to handle the data first */ 3357 spin_lock(&data_sinfo->lock); 3358 data_sinfo->bytes_delalloc += bytes; 3359 3360 /* 3361 * we are adding a delalloc extent without calling 3362 * btrfs_check_data_free_space first. This happens on a weird 3363 * writepage condition, but shouldn't hurt our accounting 3364 */ 3365 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { 3366 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; 3367 BTRFS_I(inode)->reserved_bytes = 0; 3368 } else { 3369 data_sinfo->bytes_may_use -= bytes; 3370 BTRFS_I(inode)->reserved_bytes -= bytes; 3371 } 3372 3373 spin_unlock(&data_sinfo->lock); 3374 } 3375 3376 /* called when we are clearing an delalloc extent from the inode's io_tree */ 3377 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 3378 u64 bytes) 3379 { 3380 struct btrfs_space_info *info; 3381 3382 info = BTRFS_I(inode)->space_info; 3383 3384 spin_lock(&info->lock); 3385 info->bytes_delalloc -= bytes; 3386 spin_unlock(&info->lock); 3387 } 3388 3389 static void force_metadata_allocation(struct btrfs_fs_info *info) 3390 { 3391 struct list_head *head = &info->space_info; 3392 struct btrfs_space_info *found; 3393 3394 rcu_read_lock(); 3395 list_for_each_entry_rcu(found, head, list) { 3396 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3397 found->force_alloc = 1; 3398 } 3399 rcu_read_unlock(); 3400 } 3401 3402 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 3403 struct btrfs_root *extent_root, u64 alloc_bytes, 3404 u64 flags, int force) 3405 { 3406 struct btrfs_space_info *space_info; 3407 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3408 u64 thresh; 3409 int ret = 0; 3410 3411 mutex_lock(&fs_info->chunk_mutex); 3412 3413 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3414 3415 space_info = __find_space_info(extent_root->fs_info, flags); 3416 if (!space_info) { 3417 ret = update_space_info(extent_root->fs_info, flags, 3418 0, 0, &space_info); 3419 BUG_ON(ret); 3420 } 3421 BUG_ON(!space_info); 3422 3423 spin_lock(&space_info->lock); 3424 if (space_info->force_alloc) 3425 force = 1; 3426 if (space_info->full) { 3427 spin_unlock(&space_info->lock); 3428 goto out; 3429 } 3430 3431 thresh = space_info->total_bytes - space_info->bytes_readonly; 3432 thresh = div_factor(thresh, 8); 3433 if (!force && 3434 (space_info->bytes_used + space_info->bytes_pinned + 3435 space_info->bytes_reserved + alloc_bytes) < thresh) { 3436 spin_unlock(&space_info->lock); 3437 goto out; 3438 } 3439 spin_unlock(&space_info->lock); 3440 3441 /* 3442 * if we're doing a data chunk, go ahead and make sure that 3443 * we keep a reasonable number of metadata chunks allocated in the 3444 * FS as well. 3445 */ 3446 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { 3447 fs_info->data_chunk_allocations++; 3448 if (!(fs_info->data_chunk_allocations % 3449 fs_info->metadata_ratio)) 3450 force_metadata_allocation(fs_info); 3451 } 3452 3453 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3454 spin_lock(&space_info->lock); 3455 if (ret) 3456 space_info->full = 1; 3457 space_info->force_alloc = 0; 3458 spin_unlock(&space_info->lock); 3459 out: 3460 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3461 return ret; 3462 } 3463 3464 static int update_block_group(struct btrfs_trans_handle *trans, 3465 struct btrfs_root *root, 3466 u64 bytenr, u64 num_bytes, int alloc, 3467 int mark_free) 3468 { 3469 struct btrfs_block_group_cache *cache; 3470 struct btrfs_fs_info *info = root->fs_info; 3471 u64 total = num_bytes; 3472 u64 old_val; 3473 u64 byte_in_group; 3474 3475 /* block accounting for super block */ 3476 spin_lock(&info->delalloc_lock); 3477 old_val = btrfs_super_bytes_used(&info->super_copy); 3478 if (alloc) 3479 old_val += num_bytes; 3480 else 3481 old_val -= num_bytes; 3482 btrfs_set_super_bytes_used(&info->super_copy, old_val); 3483 spin_unlock(&info->delalloc_lock); 3484 3485 while (total) { 3486 cache = btrfs_lookup_block_group(info, bytenr); 3487 if (!cache) 3488 return -1; 3489 byte_in_group = bytenr - cache->key.objectid; 3490 WARN_ON(byte_in_group > cache->key.offset); 3491 3492 spin_lock(&cache->space_info->lock); 3493 spin_lock(&cache->lock); 3494 cache->dirty = 1; 3495 old_val = btrfs_block_group_used(&cache->item); 3496 num_bytes = min(total, cache->key.offset - byte_in_group); 3497 if (alloc) { 3498 old_val += num_bytes; 3499 btrfs_set_block_group_used(&cache->item, old_val); 3500 cache->reserved -= num_bytes; 3501 cache->space_info->bytes_used += num_bytes; 3502 cache->space_info->bytes_reserved -= num_bytes; 3503 if (cache->ro) 3504 cache->space_info->bytes_readonly -= num_bytes; 3505 spin_unlock(&cache->lock); 3506 spin_unlock(&cache->space_info->lock); 3507 } else { 3508 old_val -= num_bytes; 3509 cache->space_info->bytes_used -= num_bytes; 3510 if (cache->ro) 3511 cache->space_info->bytes_readonly += num_bytes; 3512 btrfs_set_block_group_used(&cache->item, old_val); 3513 spin_unlock(&cache->lock); 3514 spin_unlock(&cache->space_info->lock); 3515 if (mark_free) { 3516 int ret; 3517 3518 ret = btrfs_discard_extent(root, bytenr, 3519 num_bytes); 3520 WARN_ON(ret); 3521 3522 ret = btrfs_add_free_space(cache, bytenr, 3523 num_bytes); 3524 WARN_ON(ret); 3525 } 3526 } 3527 btrfs_put_block_group(cache); 3528 total -= num_bytes; 3529 bytenr += num_bytes; 3530 } 3531 return 0; 3532 } 3533 3534 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 3535 { 3536 struct btrfs_block_group_cache *cache; 3537 u64 bytenr; 3538 3539 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 3540 if (!cache) 3541 return 0; 3542 3543 bytenr = cache->key.objectid; 3544 btrfs_put_block_group(cache); 3545 3546 return bytenr; 3547 } 3548 3549 /* 3550 * this function must be called within transaction 3551 */ 3552 int btrfs_pin_extent(struct btrfs_root *root, 3553 u64 bytenr, u64 num_bytes, int reserved) 3554 { 3555 struct btrfs_fs_info *fs_info = root->fs_info; 3556 struct btrfs_block_group_cache *cache; 3557 3558 cache = btrfs_lookup_block_group(fs_info, bytenr); 3559 BUG_ON(!cache); 3560 3561 spin_lock(&cache->space_info->lock); 3562 spin_lock(&cache->lock); 3563 cache->pinned += num_bytes; 3564 cache->space_info->bytes_pinned += num_bytes; 3565 if (reserved) { 3566 cache->reserved -= num_bytes; 3567 cache->space_info->bytes_reserved -= num_bytes; 3568 } 3569 spin_unlock(&cache->lock); 3570 spin_unlock(&cache->space_info->lock); 3571 3572 btrfs_put_block_group(cache); 3573 3574 set_extent_dirty(fs_info->pinned_extents, 3575 bytenr, bytenr + num_bytes - 1, GFP_NOFS); 3576 return 0; 3577 } 3578 3579 static int update_reserved_extents(struct btrfs_block_group_cache *cache, 3580 u64 num_bytes, int reserve) 3581 { 3582 spin_lock(&cache->space_info->lock); 3583 spin_lock(&cache->lock); 3584 if (reserve) { 3585 cache->reserved += num_bytes; 3586 cache->space_info->bytes_reserved += num_bytes; 3587 } else { 3588 cache->reserved -= num_bytes; 3589 cache->space_info->bytes_reserved -= num_bytes; 3590 } 3591 spin_unlock(&cache->lock); 3592 spin_unlock(&cache->space_info->lock); 3593 return 0; 3594 } 3595 3596 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 3597 struct btrfs_root *root) 3598 { 3599 struct btrfs_fs_info *fs_info = root->fs_info; 3600 struct btrfs_caching_control *next; 3601 struct btrfs_caching_control *caching_ctl; 3602 struct btrfs_block_group_cache *cache; 3603 3604 down_write(&fs_info->extent_commit_sem); 3605 3606 list_for_each_entry_safe(caching_ctl, next, 3607 &fs_info->caching_block_groups, list) { 3608 cache = caching_ctl->block_group; 3609 if (block_group_cache_done(cache)) { 3610 cache->last_byte_to_unpin = (u64)-1; 3611 list_del_init(&caching_ctl->list); 3612 put_caching_control(caching_ctl); 3613 } else { 3614 cache->last_byte_to_unpin = caching_ctl->progress; 3615 } 3616 } 3617 3618 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 3619 fs_info->pinned_extents = &fs_info->freed_extents[1]; 3620 else 3621 fs_info->pinned_extents = &fs_info->freed_extents[0]; 3622 3623 up_write(&fs_info->extent_commit_sem); 3624 return 0; 3625 } 3626 3627 static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 3628 { 3629 struct btrfs_fs_info *fs_info = root->fs_info; 3630 struct btrfs_block_group_cache *cache = NULL; 3631 u64 len; 3632 3633 while (start <= end) { 3634 if (!cache || 3635 start >= cache->key.objectid + cache->key.offset) { 3636 if (cache) 3637 btrfs_put_block_group(cache); 3638 cache = btrfs_lookup_block_group(fs_info, start); 3639 BUG_ON(!cache); 3640 } 3641 3642 len = cache->key.objectid + cache->key.offset - start; 3643 len = min(len, end + 1 - start); 3644 3645 if (start < cache->last_byte_to_unpin) { 3646 len = min(len, cache->last_byte_to_unpin - start); 3647 btrfs_add_free_space(cache, start, len); 3648 } 3649 3650 spin_lock(&cache->space_info->lock); 3651 spin_lock(&cache->lock); 3652 cache->pinned -= len; 3653 cache->space_info->bytes_pinned -= len; 3654 spin_unlock(&cache->lock); 3655 spin_unlock(&cache->space_info->lock); 3656 3657 start += len; 3658 } 3659 3660 if (cache) 3661 btrfs_put_block_group(cache); 3662 return 0; 3663 } 3664 3665 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 3666 struct btrfs_root *root) 3667 { 3668 struct btrfs_fs_info *fs_info = root->fs_info; 3669 struct extent_io_tree *unpin; 3670 u64 start; 3671 u64 end; 3672 int ret; 3673 3674 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 3675 unpin = &fs_info->freed_extents[1]; 3676 else 3677 unpin = &fs_info->freed_extents[0]; 3678 3679 while (1) { 3680 ret = find_first_extent_bit(unpin, 0, &start, &end, 3681 EXTENT_DIRTY); 3682 if (ret) 3683 break; 3684 3685 ret = btrfs_discard_extent(root, start, end + 1 - start); 3686 3687 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3688 unpin_extent_range(root, start, end); 3689 cond_resched(); 3690 } 3691 3692 return ret; 3693 } 3694 3695 static int pin_down_bytes(struct btrfs_trans_handle *trans, 3696 struct btrfs_root *root, 3697 struct btrfs_path *path, 3698 u64 bytenr, u64 num_bytes, 3699 int is_data, int reserved, 3700 struct extent_buffer **must_clean) 3701 { 3702 int err = 0; 3703 struct extent_buffer *buf; 3704 3705 if (is_data) 3706 goto pinit; 3707 3708 /* 3709 * discard is sloooow, and so triggering discards on 3710 * individual btree blocks isn't a good plan. Just 3711 * pin everything in discard mode. 3712 */ 3713 if (btrfs_test_opt(root, DISCARD)) 3714 goto pinit; 3715 3716 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 3717 if (!buf) 3718 goto pinit; 3719 3720 /* we can reuse a block if it hasn't been written 3721 * and it is from this transaction. We can't 3722 * reuse anything from the tree log root because 3723 * it has tiny sub-transactions. 3724 */ 3725 if (btrfs_buffer_uptodate(buf, 0) && 3726 btrfs_try_tree_lock(buf)) { 3727 u64 header_owner = btrfs_header_owner(buf); 3728 u64 header_transid = btrfs_header_generation(buf); 3729 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 3730 header_transid == trans->transid && 3731 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3732 *must_clean = buf; 3733 return 1; 3734 } 3735 btrfs_tree_unlock(buf); 3736 } 3737 free_extent_buffer(buf); 3738 pinit: 3739 if (path) 3740 btrfs_set_path_blocking(path); 3741 /* unlocks the pinned mutex */ 3742 btrfs_pin_extent(root, bytenr, num_bytes, reserved); 3743 3744 BUG_ON(err < 0); 3745 return 0; 3746 } 3747 3748 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 3749 struct btrfs_root *root, 3750 u64 bytenr, u64 num_bytes, u64 parent, 3751 u64 root_objectid, u64 owner_objectid, 3752 u64 owner_offset, int refs_to_drop, 3753 struct btrfs_delayed_extent_op *extent_op) 3754 { 3755 struct btrfs_key key; 3756 struct btrfs_path *path; 3757 struct btrfs_fs_info *info = root->fs_info; 3758 struct btrfs_root *extent_root = info->extent_root; 3759 struct extent_buffer *leaf; 3760 struct btrfs_extent_item *ei; 3761 struct btrfs_extent_inline_ref *iref; 3762 int ret; 3763 int is_data; 3764 int extent_slot = 0; 3765 int found_extent = 0; 3766 int num_to_del = 1; 3767 u32 item_size; 3768 u64 refs; 3769 3770 path = btrfs_alloc_path(); 3771 if (!path) 3772 return -ENOMEM; 3773 3774 path->reada = 1; 3775 path->leave_spinning = 1; 3776 3777 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 3778 BUG_ON(!is_data && refs_to_drop != 1); 3779 3780 ret = lookup_extent_backref(trans, extent_root, path, &iref, 3781 bytenr, num_bytes, parent, 3782 root_objectid, owner_objectid, 3783 owner_offset); 3784 if (ret == 0) { 3785 extent_slot = path->slots[0]; 3786 while (extent_slot >= 0) { 3787 btrfs_item_key_to_cpu(path->nodes[0], &key, 3788 extent_slot); 3789 if (key.objectid != bytenr) 3790 break; 3791 if (key.type == BTRFS_EXTENT_ITEM_KEY && 3792 key.offset == num_bytes) { 3793 found_extent = 1; 3794 break; 3795 } 3796 if (path->slots[0] - extent_slot > 5) 3797 break; 3798 extent_slot--; 3799 } 3800 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3801 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 3802 if (found_extent && item_size < sizeof(*ei)) 3803 found_extent = 0; 3804 #endif 3805 if (!found_extent) { 3806 BUG_ON(iref); 3807 ret = remove_extent_backref(trans, extent_root, path, 3808 NULL, refs_to_drop, 3809 is_data); 3810 BUG_ON(ret); 3811 btrfs_release_path(extent_root, path); 3812 path->leave_spinning = 1; 3813 3814 key.objectid = bytenr; 3815 key.type = BTRFS_EXTENT_ITEM_KEY; 3816 key.offset = num_bytes; 3817 3818 ret = btrfs_search_slot(trans, extent_root, 3819 &key, path, -1, 1); 3820 if (ret) { 3821 printk(KERN_ERR "umm, got %d back from search" 3822 ", was looking for %llu\n", ret, 3823 (unsigned long long)bytenr); 3824 btrfs_print_leaf(extent_root, path->nodes[0]); 3825 } 3826 BUG_ON(ret); 3827 extent_slot = path->slots[0]; 3828 } 3829 } else { 3830 btrfs_print_leaf(extent_root, path->nodes[0]); 3831 WARN_ON(1); 3832 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 3833 "parent %llu root %llu owner %llu offset %llu\n", 3834 (unsigned long long)bytenr, 3835 (unsigned long long)parent, 3836 (unsigned long long)root_objectid, 3837 (unsigned long long)owner_objectid, 3838 (unsigned long long)owner_offset); 3839 } 3840 3841 leaf = path->nodes[0]; 3842 item_size = btrfs_item_size_nr(leaf, extent_slot); 3843 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3844 if (item_size < sizeof(*ei)) { 3845 BUG_ON(found_extent || extent_slot != path->slots[0]); 3846 ret = convert_extent_item_v0(trans, extent_root, path, 3847 owner_objectid, 0); 3848 BUG_ON(ret < 0); 3849 3850 btrfs_release_path(extent_root, path); 3851 path->leave_spinning = 1; 3852 3853 key.objectid = bytenr; 3854 key.type = BTRFS_EXTENT_ITEM_KEY; 3855 key.offset = num_bytes; 3856 3857 ret = btrfs_search_slot(trans, extent_root, &key, path, 3858 -1, 1); 3859 if (ret) { 3860 printk(KERN_ERR "umm, got %d back from search" 3861 ", was looking for %llu\n", ret, 3862 (unsigned long long)bytenr); 3863 btrfs_print_leaf(extent_root, path->nodes[0]); 3864 } 3865 BUG_ON(ret); 3866 extent_slot = path->slots[0]; 3867 leaf = path->nodes[0]; 3868 item_size = btrfs_item_size_nr(leaf, extent_slot); 3869 } 3870 #endif 3871 BUG_ON(item_size < sizeof(*ei)); 3872 ei = btrfs_item_ptr(leaf, extent_slot, 3873 struct btrfs_extent_item); 3874 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 3875 struct btrfs_tree_block_info *bi; 3876 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 3877 bi = (struct btrfs_tree_block_info *)(ei + 1); 3878 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 3879 } 3880 3881 refs = btrfs_extent_refs(leaf, ei); 3882 BUG_ON(refs < refs_to_drop); 3883 refs -= refs_to_drop; 3884 3885 if (refs > 0) { 3886 if (extent_op) 3887 __run_delayed_extent_op(extent_op, leaf, ei); 3888 /* 3889 * In the case of inline back ref, reference count will 3890 * be updated by remove_extent_backref 3891 */ 3892 if (iref) { 3893 BUG_ON(!found_extent); 3894 } else { 3895 btrfs_set_extent_refs(leaf, ei, refs); 3896 btrfs_mark_buffer_dirty(leaf); 3897 } 3898 if (found_extent) { 3899 ret = remove_extent_backref(trans, extent_root, path, 3900 iref, refs_to_drop, 3901 is_data); 3902 BUG_ON(ret); 3903 } 3904 } else { 3905 int mark_free = 0; 3906 struct extent_buffer *must_clean = NULL; 3907 3908 if (found_extent) { 3909 BUG_ON(is_data && refs_to_drop != 3910 extent_data_ref_count(root, path, iref)); 3911 if (iref) { 3912 BUG_ON(path->slots[0] != extent_slot); 3913 } else { 3914 BUG_ON(path->slots[0] != extent_slot + 1); 3915 path->slots[0] = extent_slot; 3916 num_to_del = 2; 3917 } 3918 } 3919 3920 ret = pin_down_bytes(trans, root, path, bytenr, 3921 num_bytes, is_data, 0, &must_clean); 3922 if (ret > 0) 3923 mark_free = 1; 3924 BUG_ON(ret < 0); 3925 /* 3926 * it is going to be very rare for someone to be waiting 3927 * on the block we're freeing. del_items might need to 3928 * schedule, so rather than get fancy, just force it 3929 * to blocking here 3930 */ 3931 if (must_clean) 3932 btrfs_set_lock_blocking(must_clean); 3933 3934 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 3935 num_to_del); 3936 BUG_ON(ret); 3937 btrfs_release_path(extent_root, path); 3938 3939 if (must_clean) { 3940 clean_tree_block(NULL, root, must_clean); 3941 btrfs_tree_unlock(must_clean); 3942 free_extent_buffer(must_clean); 3943 } 3944 3945 if (is_data) { 3946 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 3947 BUG_ON(ret); 3948 } else { 3949 invalidate_mapping_pages(info->btree_inode->i_mapping, 3950 bytenr >> PAGE_CACHE_SHIFT, 3951 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 3952 } 3953 3954 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 3955 mark_free); 3956 BUG_ON(ret); 3957 } 3958 btrfs_free_path(path); 3959 return ret; 3960 } 3961 3962 /* 3963 * when we free an extent, it is possible (and likely) that we free the last 3964 * delayed ref for that extent as well. This searches the delayed ref tree for 3965 * a given extent, and if there are no other delayed refs to be processed, it 3966 * removes it from the tree. 3967 */ 3968 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 3969 struct btrfs_root *root, u64 bytenr) 3970 { 3971 struct btrfs_delayed_ref_head *head; 3972 struct btrfs_delayed_ref_root *delayed_refs; 3973 struct btrfs_delayed_ref_node *ref; 3974 struct rb_node *node; 3975 int ret; 3976 3977 delayed_refs = &trans->transaction->delayed_refs; 3978 spin_lock(&delayed_refs->lock); 3979 head = btrfs_find_delayed_ref_head(trans, bytenr); 3980 if (!head) 3981 goto out; 3982 3983 node = rb_prev(&head->node.rb_node); 3984 if (!node) 3985 goto out; 3986 3987 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3988 3989 /* there are still entries for this ref, we can't drop it */ 3990 if (ref->bytenr == bytenr) 3991 goto out; 3992 3993 if (head->extent_op) { 3994 if (!head->must_insert_reserved) 3995 goto out; 3996 kfree(head->extent_op); 3997 head->extent_op = NULL; 3998 } 3999 4000 /* 4001 * waiting for the lock here would deadlock. If someone else has it 4002 * locked they are already in the process of dropping it anyway 4003 */ 4004 if (!mutex_trylock(&head->mutex)) 4005 goto out; 4006 4007 /* 4008 * at this point we have a head with no other entries. Go 4009 * ahead and process it. 4010 */ 4011 head->node.in_tree = 0; 4012 rb_erase(&head->node.rb_node, &delayed_refs->root); 4013 4014 delayed_refs->num_entries--; 4015 4016 /* 4017 * we don't take a ref on the node because we're removing it from the 4018 * tree, so we just steal the ref the tree was holding. 4019 */ 4020 delayed_refs->num_heads--; 4021 if (list_empty(&head->cluster)) 4022 delayed_refs->num_heads_ready--; 4023 4024 list_del_init(&head->cluster); 4025 spin_unlock(&delayed_refs->lock); 4026 4027 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 4028 &head->node, head->extent_op, 4029 head->must_insert_reserved); 4030 BUG_ON(ret); 4031 btrfs_put_delayed_ref(&head->node); 4032 return 0; 4033 out: 4034 spin_unlock(&delayed_refs->lock); 4035 return 0; 4036 } 4037 4038 int btrfs_free_extent(struct btrfs_trans_handle *trans, 4039 struct btrfs_root *root, 4040 u64 bytenr, u64 num_bytes, u64 parent, 4041 u64 root_objectid, u64 owner, u64 offset) 4042 { 4043 int ret; 4044 4045 /* 4046 * tree log blocks never actually go into the extent allocation 4047 * tree, just update pinning info and exit early. 4048 */ 4049 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 4050 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 4051 /* unlocks the pinned mutex */ 4052 btrfs_pin_extent(root, bytenr, num_bytes, 1); 4053 ret = 0; 4054 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 4055 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 4056 parent, root_objectid, (int)owner, 4057 BTRFS_DROP_DELAYED_REF, NULL); 4058 BUG_ON(ret); 4059 ret = check_ref_cleanup(trans, root, bytenr); 4060 BUG_ON(ret); 4061 } else { 4062 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 4063 parent, root_objectid, owner, 4064 offset, BTRFS_DROP_DELAYED_REF, NULL); 4065 BUG_ON(ret); 4066 } 4067 return ret; 4068 } 4069 4070 int btrfs_free_tree_block(struct btrfs_trans_handle *trans, 4071 struct btrfs_root *root, 4072 u64 bytenr, u32 blocksize, 4073 u64 parent, u64 root_objectid, int level) 4074 { 4075 u64 used; 4076 spin_lock(&root->node_lock); 4077 used = btrfs_root_used(&root->root_item) - blocksize; 4078 btrfs_set_root_used(&root->root_item, used); 4079 spin_unlock(&root->node_lock); 4080 4081 return btrfs_free_extent(trans, root, bytenr, blocksize, 4082 parent, root_objectid, level, 0); 4083 } 4084 4085 static u64 stripe_align(struct btrfs_root *root, u64 val) 4086 { 4087 u64 mask = ((u64)root->stripesize - 1); 4088 u64 ret = (val + mask) & ~mask; 4089 return ret; 4090 } 4091 4092 /* 4093 * when we wait for progress in the block group caching, its because 4094 * our allocation attempt failed at least once. So, we must sleep 4095 * and let some progress happen before we try again. 4096 * 4097 * This function will sleep at least once waiting for new free space to 4098 * show up, and then it will check the block group free space numbers 4099 * for our min num_bytes. Another option is to have it go ahead 4100 * and look in the rbtree for a free extent of a given size, but this 4101 * is a good start. 4102 */ 4103 static noinline int 4104 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 4105 u64 num_bytes) 4106 { 4107 struct btrfs_caching_control *caching_ctl; 4108 DEFINE_WAIT(wait); 4109 4110 caching_ctl = get_caching_control(cache); 4111 if (!caching_ctl) 4112 return 0; 4113 4114 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4115 (cache->free_space >= num_bytes)); 4116 4117 put_caching_control(caching_ctl); 4118 return 0; 4119 } 4120 4121 static noinline int 4122 wait_block_group_cache_done(struct btrfs_block_group_cache *cache) 4123 { 4124 struct btrfs_caching_control *caching_ctl; 4125 DEFINE_WAIT(wait); 4126 4127 caching_ctl = get_caching_control(cache); 4128 if (!caching_ctl) 4129 return 0; 4130 4131 wait_event(caching_ctl->wait, block_group_cache_done(cache)); 4132 4133 put_caching_control(caching_ctl); 4134 return 0; 4135 } 4136 4137 enum btrfs_loop_type { 4138 LOOP_FIND_IDEAL = 0, 4139 LOOP_CACHING_NOWAIT = 1, 4140 LOOP_CACHING_WAIT = 2, 4141 LOOP_ALLOC_CHUNK = 3, 4142 LOOP_NO_EMPTY_SIZE = 4, 4143 }; 4144 4145 /* 4146 * walks the btree of allocated extents and find a hole of a given size. 4147 * The key ins is changed to record the hole: 4148 * ins->objectid == block start 4149 * ins->flags = BTRFS_EXTENT_ITEM_KEY 4150 * ins->offset == number of blocks 4151 * Any available blocks before search_start are skipped. 4152 */ 4153 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 4154 struct btrfs_root *orig_root, 4155 u64 num_bytes, u64 empty_size, 4156 u64 search_start, u64 search_end, 4157 u64 hint_byte, struct btrfs_key *ins, 4158 u64 exclude_start, u64 exclude_nr, 4159 int data) 4160 { 4161 int ret = 0; 4162 struct btrfs_root *root = orig_root->fs_info->extent_root; 4163 struct btrfs_free_cluster *last_ptr = NULL; 4164 struct btrfs_block_group_cache *block_group = NULL; 4165 int empty_cluster = 2 * 1024 * 1024; 4166 int allowed_chunk_alloc = 0; 4167 int done_chunk_alloc = 0; 4168 struct btrfs_space_info *space_info; 4169 int last_ptr_loop = 0; 4170 int loop = 0; 4171 bool found_uncached_bg = false; 4172 bool failed_cluster_refill = false; 4173 bool failed_alloc = false; 4174 u64 ideal_cache_percent = 0; 4175 u64 ideal_cache_offset = 0; 4176 4177 WARN_ON(num_bytes < root->sectorsize); 4178 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4179 ins->objectid = 0; 4180 ins->offset = 0; 4181 4182 space_info = __find_space_info(root->fs_info, data); 4183 if (!space_info) { 4184 printk(KERN_ERR "No space info for %d\n", data); 4185 return -ENOSPC; 4186 } 4187 4188 if (orig_root->ref_cows || empty_size) 4189 allowed_chunk_alloc = 1; 4190 4191 if (data & BTRFS_BLOCK_GROUP_METADATA) { 4192 last_ptr = &root->fs_info->meta_alloc_cluster; 4193 if (!btrfs_test_opt(root, SSD)) 4194 empty_cluster = 64 * 1024; 4195 } 4196 4197 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 4198 last_ptr = &root->fs_info->data_alloc_cluster; 4199 } 4200 4201 if (last_ptr) { 4202 spin_lock(&last_ptr->lock); 4203 if (last_ptr->block_group) 4204 hint_byte = last_ptr->window_start; 4205 spin_unlock(&last_ptr->lock); 4206 } 4207 4208 search_start = max(search_start, first_logical_byte(root, 0)); 4209 search_start = max(search_start, hint_byte); 4210 4211 if (!last_ptr) 4212 empty_cluster = 0; 4213 4214 if (search_start == hint_byte) { 4215 ideal_cache: 4216 block_group = btrfs_lookup_block_group(root->fs_info, 4217 search_start); 4218 /* 4219 * we don't want to use the block group if it doesn't match our 4220 * allocation bits, or if its not cached. 4221 * 4222 * However if we are re-searching with an ideal block group 4223 * picked out then we don't care that the block group is cached. 4224 */ 4225 if (block_group && block_group_bits(block_group, data) && 4226 (block_group->cached != BTRFS_CACHE_NO || 4227 search_start == ideal_cache_offset)) { 4228 down_read(&space_info->groups_sem); 4229 if (list_empty(&block_group->list) || 4230 block_group->ro) { 4231 /* 4232 * someone is removing this block group, 4233 * we can't jump into the have_block_group 4234 * target because our list pointers are not 4235 * valid 4236 */ 4237 btrfs_put_block_group(block_group); 4238 up_read(&space_info->groups_sem); 4239 } else { 4240 goto have_block_group; 4241 } 4242 } else if (block_group) { 4243 btrfs_put_block_group(block_group); 4244 } 4245 } 4246 search: 4247 down_read(&space_info->groups_sem); 4248 list_for_each_entry(block_group, &space_info->block_groups, list) { 4249 u64 offset; 4250 int cached; 4251 4252 btrfs_get_block_group(block_group); 4253 search_start = block_group->key.objectid; 4254 4255 have_block_group: 4256 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4257 u64 free_percent; 4258 4259 free_percent = btrfs_block_group_used(&block_group->item); 4260 free_percent *= 100; 4261 free_percent = div64_u64(free_percent, 4262 block_group->key.offset); 4263 free_percent = 100 - free_percent; 4264 if (free_percent > ideal_cache_percent && 4265 likely(!block_group->ro)) { 4266 ideal_cache_offset = block_group->key.objectid; 4267 ideal_cache_percent = free_percent; 4268 } 4269 4270 /* 4271 * We only want to start kthread caching if we are at 4272 * the point where we will wait for caching to make 4273 * progress, or if our ideal search is over and we've 4274 * found somebody to start caching. 4275 */ 4276 if (loop > LOOP_CACHING_NOWAIT || 4277 (loop > LOOP_FIND_IDEAL && 4278 atomic_read(&space_info->caching_threads) < 2)) { 4279 ret = cache_block_group(block_group); 4280 BUG_ON(ret); 4281 } 4282 found_uncached_bg = true; 4283 4284 /* 4285 * If loop is set for cached only, try the next block 4286 * group. 4287 */ 4288 if (loop == LOOP_FIND_IDEAL) 4289 goto loop; 4290 } 4291 4292 cached = block_group_cache_done(block_group); 4293 if (unlikely(!cached)) 4294 found_uncached_bg = true; 4295 4296 if (unlikely(block_group->ro)) 4297 goto loop; 4298 4299 /* 4300 * Ok we want to try and use the cluster allocator, so lets look 4301 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 4302 * have tried the cluster allocator plenty of times at this 4303 * point and not have found anything, so we are likely way too 4304 * fragmented for the clustering stuff to find anything, so lets 4305 * just skip it and let the allocator find whatever block it can 4306 * find 4307 */ 4308 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { 4309 /* 4310 * the refill lock keeps out other 4311 * people trying to start a new cluster 4312 */ 4313 spin_lock(&last_ptr->refill_lock); 4314 if (last_ptr->block_group && 4315 (last_ptr->block_group->ro || 4316 !block_group_bits(last_ptr->block_group, data))) { 4317 offset = 0; 4318 goto refill_cluster; 4319 } 4320 4321 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 4322 num_bytes, search_start); 4323 if (offset) { 4324 /* we have a block, we're done */ 4325 spin_unlock(&last_ptr->refill_lock); 4326 goto checks; 4327 } 4328 4329 spin_lock(&last_ptr->lock); 4330 /* 4331 * whoops, this cluster doesn't actually point to 4332 * this block group. Get a ref on the block 4333 * group is does point to and try again 4334 */ 4335 if (!last_ptr_loop && last_ptr->block_group && 4336 last_ptr->block_group != block_group) { 4337 4338 btrfs_put_block_group(block_group); 4339 block_group = last_ptr->block_group; 4340 btrfs_get_block_group(block_group); 4341 spin_unlock(&last_ptr->lock); 4342 spin_unlock(&last_ptr->refill_lock); 4343 4344 last_ptr_loop = 1; 4345 search_start = block_group->key.objectid; 4346 /* 4347 * we know this block group is properly 4348 * in the list because 4349 * btrfs_remove_block_group, drops the 4350 * cluster before it removes the block 4351 * group from the list 4352 */ 4353 goto have_block_group; 4354 } 4355 spin_unlock(&last_ptr->lock); 4356 refill_cluster: 4357 /* 4358 * this cluster didn't work out, free it and 4359 * start over 4360 */ 4361 btrfs_return_cluster_to_free_space(NULL, last_ptr); 4362 4363 last_ptr_loop = 0; 4364 4365 /* allocate a cluster in this block group */ 4366 ret = btrfs_find_space_cluster(trans, root, 4367 block_group, last_ptr, 4368 offset, num_bytes, 4369 empty_cluster + empty_size); 4370 if (ret == 0) { 4371 /* 4372 * now pull our allocation out of this 4373 * cluster 4374 */ 4375 offset = btrfs_alloc_from_cluster(block_group, 4376 last_ptr, num_bytes, 4377 search_start); 4378 if (offset) { 4379 /* we found one, proceed */ 4380 spin_unlock(&last_ptr->refill_lock); 4381 goto checks; 4382 } 4383 } else if (!cached && loop > LOOP_CACHING_NOWAIT 4384 && !failed_cluster_refill) { 4385 spin_unlock(&last_ptr->refill_lock); 4386 4387 failed_cluster_refill = true; 4388 wait_block_group_cache_progress(block_group, 4389 num_bytes + empty_cluster + empty_size); 4390 goto have_block_group; 4391 } 4392 4393 /* 4394 * at this point we either didn't find a cluster 4395 * or we weren't able to allocate a block from our 4396 * cluster. Free the cluster we've been trying 4397 * to use, and go to the next block group 4398 */ 4399 btrfs_return_cluster_to_free_space(NULL, last_ptr); 4400 spin_unlock(&last_ptr->refill_lock); 4401 goto loop; 4402 } 4403 4404 offset = btrfs_find_space_for_alloc(block_group, search_start, 4405 num_bytes, empty_size); 4406 /* 4407 * If we didn't find a chunk, and we haven't failed on this 4408 * block group before, and this block group is in the middle of 4409 * caching and we are ok with waiting, then go ahead and wait 4410 * for progress to be made, and set failed_alloc to true. 4411 * 4412 * If failed_alloc is true then we've already waited on this 4413 * block group once and should move on to the next block group. 4414 */ 4415 if (!offset && !failed_alloc && !cached && 4416 loop > LOOP_CACHING_NOWAIT) { 4417 wait_block_group_cache_progress(block_group, 4418 num_bytes + empty_size); 4419 failed_alloc = true; 4420 goto have_block_group; 4421 } else if (!offset) { 4422 goto loop; 4423 } 4424 checks: 4425 search_start = stripe_align(root, offset); 4426 /* move on to the next group */ 4427 if (search_start + num_bytes >= search_end) { 4428 btrfs_add_free_space(block_group, offset, num_bytes); 4429 goto loop; 4430 } 4431 4432 /* move on to the next group */ 4433 if (search_start + num_bytes > 4434 block_group->key.objectid + block_group->key.offset) { 4435 btrfs_add_free_space(block_group, offset, num_bytes); 4436 goto loop; 4437 } 4438 4439 if (exclude_nr > 0 && 4440 (search_start + num_bytes > exclude_start && 4441 search_start < exclude_start + exclude_nr)) { 4442 search_start = exclude_start + exclude_nr; 4443 4444 btrfs_add_free_space(block_group, offset, num_bytes); 4445 /* 4446 * if search_start is still in this block group 4447 * then we just re-search this block group 4448 */ 4449 if (search_start >= block_group->key.objectid && 4450 search_start < (block_group->key.objectid + 4451 block_group->key.offset)) 4452 goto have_block_group; 4453 goto loop; 4454 } 4455 4456 ins->objectid = search_start; 4457 ins->offset = num_bytes; 4458 4459 if (offset < search_start) 4460 btrfs_add_free_space(block_group, offset, 4461 search_start - offset); 4462 BUG_ON(offset > search_start); 4463 4464 update_reserved_extents(block_group, num_bytes, 1); 4465 4466 /* we are all good, lets return */ 4467 break; 4468 loop: 4469 failed_cluster_refill = false; 4470 failed_alloc = false; 4471 btrfs_put_block_group(block_group); 4472 } 4473 up_read(&space_info->groups_sem); 4474 4475 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for 4476 * for them to make caching progress. Also 4477 * determine the best possible bg to cache 4478 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking 4479 * caching kthreads as we move along 4480 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 4481 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 4482 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 4483 * again 4484 */ 4485 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && 4486 (found_uncached_bg || empty_size || empty_cluster || 4487 allowed_chunk_alloc)) { 4488 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 4489 found_uncached_bg = false; 4490 loop++; 4491 if (!ideal_cache_percent && 4492 atomic_read(&space_info->caching_threads)) 4493 goto search; 4494 4495 /* 4496 * 1 of the following 2 things have happened so far 4497 * 4498 * 1) We found an ideal block group for caching that 4499 * is mostly full and will cache quickly, so we might 4500 * as well wait for it. 4501 * 4502 * 2) We searched for cached only and we didn't find 4503 * anything, and we didn't start any caching kthreads 4504 * either, so chances are we will loop through and 4505 * start a couple caching kthreads, and then come back 4506 * around and just wait for them. This will be slower 4507 * because we will have 2 caching kthreads reading at 4508 * the same time when we could have just started one 4509 * and waited for it to get far enough to give us an 4510 * allocation, so go ahead and go to the wait caching 4511 * loop. 4512 */ 4513 loop = LOOP_CACHING_WAIT; 4514 search_start = ideal_cache_offset; 4515 ideal_cache_percent = 0; 4516 goto ideal_cache; 4517 } else if (loop == LOOP_FIND_IDEAL) { 4518 /* 4519 * Didn't find a uncached bg, wait on anything we find 4520 * next. 4521 */ 4522 loop = LOOP_CACHING_WAIT; 4523 goto search; 4524 } 4525 4526 if (loop < LOOP_CACHING_WAIT) { 4527 loop++; 4528 goto search; 4529 } 4530 4531 if (loop == LOOP_ALLOC_CHUNK) { 4532 empty_size = 0; 4533 empty_cluster = 0; 4534 } 4535 4536 if (allowed_chunk_alloc) { 4537 ret = do_chunk_alloc(trans, root, num_bytes + 4538 2 * 1024 * 1024, data, 1); 4539 allowed_chunk_alloc = 0; 4540 done_chunk_alloc = 1; 4541 } else if (!done_chunk_alloc) { 4542 space_info->force_alloc = 1; 4543 } 4544 4545 if (loop < LOOP_NO_EMPTY_SIZE) { 4546 loop++; 4547 goto search; 4548 } 4549 ret = -ENOSPC; 4550 } else if (!ins->objectid) { 4551 ret = -ENOSPC; 4552 } 4553 4554 /* we found what we needed */ 4555 if (ins->objectid) { 4556 if (!(data & BTRFS_BLOCK_GROUP_DATA)) 4557 trans->block_group = block_group->key.objectid; 4558 4559 btrfs_put_block_group(block_group); 4560 ret = 0; 4561 } 4562 4563 return ret; 4564 } 4565 4566 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 4567 int dump_block_groups) 4568 { 4569 struct btrfs_block_group_cache *cache; 4570 4571 spin_lock(&info->lock); 4572 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 4573 (unsigned long long)(info->total_bytes - info->bytes_used - 4574 info->bytes_pinned - info->bytes_reserved - 4575 info->bytes_super), 4576 (info->full) ? "" : "not "); 4577 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 4578 " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" 4579 "\n", 4580 (unsigned long long)info->total_bytes, 4581 (unsigned long long)info->bytes_pinned, 4582 (unsigned long long)info->bytes_delalloc, 4583 (unsigned long long)info->bytes_may_use, 4584 (unsigned long long)info->bytes_used, 4585 (unsigned long long)info->bytes_root, 4586 (unsigned long long)info->bytes_super, 4587 (unsigned long long)info->bytes_reserved); 4588 spin_unlock(&info->lock); 4589 4590 if (!dump_block_groups) 4591 return; 4592 4593 down_read(&info->groups_sem); 4594 list_for_each_entry(cache, &info->block_groups, list) { 4595 spin_lock(&cache->lock); 4596 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 4597 "%llu pinned %llu reserved\n", 4598 (unsigned long long)cache->key.objectid, 4599 (unsigned long long)cache->key.offset, 4600 (unsigned long long)btrfs_block_group_used(&cache->item), 4601 (unsigned long long)cache->pinned, 4602 (unsigned long long)cache->reserved); 4603 btrfs_dump_free_space(cache, bytes); 4604 spin_unlock(&cache->lock); 4605 } 4606 up_read(&info->groups_sem); 4607 } 4608 4609 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 4610 struct btrfs_root *root, 4611 u64 num_bytes, u64 min_alloc_size, 4612 u64 empty_size, u64 hint_byte, 4613 u64 search_end, struct btrfs_key *ins, 4614 u64 data) 4615 { 4616 int ret; 4617 u64 search_start = 0; 4618 4619 data = btrfs_get_alloc_profile(root, data); 4620 again: 4621 /* 4622 * the only place that sets empty_size is btrfs_realloc_node, which 4623 * is not called recursively on allocations 4624 */ 4625 if (empty_size || root->ref_cows) 4626 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 4627 num_bytes + 2 * 1024 * 1024, data, 0); 4628 4629 WARN_ON(num_bytes < root->sectorsize); 4630 ret = find_free_extent(trans, root, num_bytes, empty_size, 4631 search_start, search_end, hint_byte, ins, 4632 trans->alloc_exclude_start, 4633 trans->alloc_exclude_nr, data); 4634 4635 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 4636 num_bytes = num_bytes >> 1; 4637 num_bytes = num_bytes & ~(root->sectorsize - 1); 4638 num_bytes = max(num_bytes, min_alloc_size); 4639 do_chunk_alloc(trans, root->fs_info->extent_root, 4640 num_bytes, data, 1); 4641 goto again; 4642 } 4643 if (ret == -ENOSPC) { 4644 struct btrfs_space_info *sinfo; 4645 4646 sinfo = __find_space_info(root->fs_info, data); 4647 printk(KERN_ERR "btrfs allocation failed flags %llu, " 4648 "wanted %llu\n", (unsigned long long)data, 4649 (unsigned long long)num_bytes); 4650 dump_space_info(sinfo, num_bytes, 1); 4651 } 4652 4653 return ret; 4654 } 4655 4656 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 4657 { 4658 struct btrfs_block_group_cache *cache; 4659 int ret = 0; 4660 4661 cache = btrfs_lookup_block_group(root->fs_info, start); 4662 if (!cache) { 4663 printk(KERN_ERR "Unable to find block group for %llu\n", 4664 (unsigned long long)start); 4665 return -ENOSPC; 4666 } 4667 4668 ret = btrfs_discard_extent(root, start, len); 4669 4670 btrfs_add_free_space(cache, start, len); 4671 update_reserved_extents(cache, len, 0); 4672 btrfs_put_block_group(cache); 4673 4674 return ret; 4675 } 4676 4677 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4678 struct btrfs_root *root, 4679 u64 parent, u64 root_objectid, 4680 u64 flags, u64 owner, u64 offset, 4681 struct btrfs_key *ins, int ref_mod) 4682 { 4683 int ret; 4684 struct btrfs_fs_info *fs_info = root->fs_info; 4685 struct btrfs_extent_item *extent_item; 4686 struct btrfs_extent_inline_ref *iref; 4687 struct btrfs_path *path; 4688 struct extent_buffer *leaf; 4689 int type; 4690 u32 size; 4691 4692 if (parent > 0) 4693 type = BTRFS_SHARED_DATA_REF_KEY; 4694 else 4695 type = BTRFS_EXTENT_DATA_REF_KEY; 4696 4697 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 4698 4699 path = btrfs_alloc_path(); 4700 BUG_ON(!path); 4701 4702 path->leave_spinning = 1; 4703 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4704 ins, size); 4705 BUG_ON(ret); 4706 4707 leaf = path->nodes[0]; 4708 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4709 struct btrfs_extent_item); 4710 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 4711 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4712 btrfs_set_extent_flags(leaf, extent_item, 4713 flags | BTRFS_EXTENT_FLAG_DATA); 4714 4715 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 4716 btrfs_set_extent_inline_ref_type(leaf, iref, type); 4717 if (parent > 0) { 4718 struct btrfs_shared_data_ref *ref; 4719 ref = (struct btrfs_shared_data_ref *)(iref + 1); 4720 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 4721 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 4722 } else { 4723 struct btrfs_extent_data_ref *ref; 4724 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 4725 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 4726 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 4727 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 4728 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 4729 } 4730 4731 btrfs_mark_buffer_dirty(path->nodes[0]); 4732 btrfs_free_path(path); 4733 4734 ret = update_block_group(trans, root, ins->objectid, ins->offset, 4735 1, 0); 4736 if (ret) { 4737 printk(KERN_ERR "btrfs update block group failed for %llu " 4738 "%llu\n", (unsigned long long)ins->objectid, 4739 (unsigned long long)ins->offset); 4740 BUG(); 4741 } 4742 return ret; 4743 } 4744 4745 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 4746 struct btrfs_root *root, 4747 u64 parent, u64 root_objectid, 4748 u64 flags, struct btrfs_disk_key *key, 4749 int level, struct btrfs_key *ins) 4750 { 4751 int ret; 4752 struct btrfs_fs_info *fs_info = root->fs_info; 4753 struct btrfs_extent_item *extent_item; 4754 struct btrfs_tree_block_info *block_info; 4755 struct btrfs_extent_inline_ref *iref; 4756 struct btrfs_path *path; 4757 struct extent_buffer *leaf; 4758 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 4759 4760 path = btrfs_alloc_path(); 4761 BUG_ON(!path); 4762 4763 path->leave_spinning = 1; 4764 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 4765 ins, size); 4766 BUG_ON(ret); 4767 4768 leaf = path->nodes[0]; 4769 extent_item = btrfs_item_ptr(leaf, path->slots[0], 4770 struct btrfs_extent_item); 4771 btrfs_set_extent_refs(leaf, extent_item, 1); 4772 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 4773 btrfs_set_extent_flags(leaf, extent_item, 4774 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 4775 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 4776 4777 btrfs_set_tree_block_key(leaf, block_info, key); 4778 btrfs_set_tree_block_level(leaf, block_info, level); 4779 4780 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 4781 if (parent > 0) { 4782 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4783 btrfs_set_extent_inline_ref_type(leaf, iref, 4784 BTRFS_SHARED_BLOCK_REF_KEY); 4785 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 4786 } else { 4787 btrfs_set_extent_inline_ref_type(leaf, iref, 4788 BTRFS_TREE_BLOCK_REF_KEY); 4789 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 4790 } 4791 4792 btrfs_mark_buffer_dirty(leaf); 4793 btrfs_free_path(path); 4794 4795 ret = update_block_group(trans, root, ins->objectid, ins->offset, 4796 1, 0); 4797 if (ret) { 4798 printk(KERN_ERR "btrfs update block group failed for %llu " 4799 "%llu\n", (unsigned long long)ins->objectid, 4800 (unsigned long long)ins->offset); 4801 BUG(); 4802 } 4803 return ret; 4804 } 4805 4806 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 4807 struct btrfs_root *root, 4808 u64 root_objectid, u64 owner, 4809 u64 offset, struct btrfs_key *ins) 4810 { 4811 int ret; 4812 4813 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); 4814 4815 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 4816 0, root_objectid, owner, offset, 4817 BTRFS_ADD_DELAYED_EXTENT, NULL); 4818 return ret; 4819 } 4820 4821 /* 4822 * this is used by the tree logging recovery code. It records that 4823 * an extent has been allocated and makes sure to clear the free 4824 * space cache bits as well 4825 */ 4826 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 4827 struct btrfs_root *root, 4828 u64 root_objectid, u64 owner, u64 offset, 4829 struct btrfs_key *ins) 4830 { 4831 int ret; 4832 struct btrfs_block_group_cache *block_group; 4833 struct btrfs_caching_control *caching_ctl; 4834 u64 start = ins->objectid; 4835 u64 num_bytes = ins->offset; 4836 4837 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4838 cache_block_group(block_group); 4839 caching_ctl = get_caching_control(block_group); 4840 4841 if (!caching_ctl) { 4842 BUG_ON(!block_group_cache_done(block_group)); 4843 ret = btrfs_remove_free_space(block_group, start, num_bytes); 4844 BUG_ON(ret); 4845 } else { 4846 mutex_lock(&caching_ctl->mutex); 4847 4848 if (start >= caching_ctl->progress) { 4849 ret = add_excluded_extent(root, start, num_bytes); 4850 BUG_ON(ret); 4851 } else if (start + num_bytes <= caching_ctl->progress) { 4852 ret = btrfs_remove_free_space(block_group, 4853 start, num_bytes); 4854 BUG_ON(ret); 4855 } else { 4856 num_bytes = caching_ctl->progress - start; 4857 ret = btrfs_remove_free_space(block_group, 4858 start, num_bytes); 4859 BUG_ON(ret); 4860 4861 start = caching_ctl->progress; 4862 num_bytes = ins->objectid + ins->offset - 4863 caching_ctl->progress; 4864 ret = add_excluded_extent(root, start, num_bytes); 4865 BUG_ON(ret); 4866 } 4867 4868 mutex_unlock(&caching_ctl->mutex); 4869 put_caching_control(caching_ctl); 4870 } 4871 4872 update_reserved_extents(block_group, ins->offset, 1); 4873 btrfs_put_block_group(block_group); 4874 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 4875 0, owner, offset, ins, 1); 4876 return ret; 4877 } 4878 4879 /* 4880 * finds a free extent and does all the dirty work required for allocation 4881 * returns the key for the extent through ins, and a tree buffer for 4882 * the first block of the extent through buf. 4883 * 4884 * returns 0 if everything worked, non-zero otherwise. 4885 */ 4886 static int alloc_tree_block(struct btrfs_trans_handle *trans, 4887 struct btrfs_root *root, 4888 u64 num_bytes, u64 parent, u64 root_objectid, 4889 struct btrfs_disk_key *key, int level, 4890 u64 empty_size, u64 hint_byte, u64 search_end, 4891 struct btrfs_key *ins) 4892 { 4893 int ret; 4894 u64 flags = 0; 4895 4896 ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4897 empty_size, hint_byte, search_end, 4898 ins, 0); 4899 if (ret) 4900 return ret; 4901 4902 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4903 if (parent == 0) 4904 parent = ins->objectid; 4905 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 4906 } else 4907 BUG_ON(parent > 0); 4908 4909 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4910 struct btrfs_delayed_extent_op *extent_op; 4911 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 4912 BUG_ON(!extent_op); 4913 if (key) 4914 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 4915 else 4916 memset(&extent_op->key, 0, sizeof(extent_op->key)); 4917 extent_op->flags_to_set = flags; 4918 extent_op->update_key = 1; 4919 extent_op->update_flags = 1; 4920 extent_op->is_data = 0; 4921 4922 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, 4923 ins->offset, parent, root_objectid, 4924 level, BTRFS_ADD_DELAYED_EXTENT, 4925 extent_op); 4926 BUG_ON(ret); 4927 } 4928 4929 if (root_objectid == root->root_key.objectid) { 4930 u64 used; 4931 spin_lock(&root->node_lock); 4932 used = btrfs_root_used(&root->root_item) + num_bytes; 4933 btrfs_set_root_used(&root->root_item, used); 4934 spin_unlock(&root->node_lock); 4935 } 4936 return ret; 4937 } 4938 4939 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 4940 struct btrfs_root *root, 4941 u64 bytenr, u32 blocksize, 4942 int level) 4943 { 4944 struct extent_buffer *buf; 4945 4946 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 4947 if (!buf) 4948 return ERR_PTR(-ENOMEM); 4949 btrfs_set_header_generation(buf, trans->transid); 4950 btrfs_set_buffer_lockdep_class(buf, level); 4951 btrfs_tree_lock(buf); 4952 clean_tree_block(trans, root, buf); 4953 4954 btrfs_set_lock_blocking(buf); 4955 btrfs_set_buffer_uptodate(buf); 4956 4957 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4958 /* 4959 * we allow two log transactions at a time, use different 4960 * EXENT bit to differentiate dirty pages. 4961 */ 4962 if (root->log_transid % 2 == 0) 4963 set_extent_dirty(&root->dirty_log_pages, buf->start, 4964 buf->start + buf->len - 1, GFP_NOFS); 4965 else 4966 set_extent_new(&root->dirty_log_pages, buf->start, 4967 buf->start + buf->len - 1, GFP_NOFS); 4968 } else { 4969 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4970 buf->start + buf->len - 1, GFP_NOFS); 4971 } 4972 trans->blocks_used++; 4973 /* this returns a buffer locked for blocking */ 4974 return buf; 4975 } 4976 4977 /* 4978 * helper function to allocate a block for a given tree 4979 * returns the tree buffer or NULL. 4980 */ 4981 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 4982 struct btrfs_root *root, u32 blocksize, 4983 u64 parent, u64 root_objectid, 4984 struct btrfs_disk_key *key, int level, 4985 u64 hint, u64 empty_size) 4986 { 4987 struct btrfs_key ins; 4988 int ret; 4989 struct extent_buffer *buf; 4990 4991 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, 4992 key, level, empty_size, hint, (u64)-1, &ins); 4993 if (ret) { 4994 BUG_ON(ret > 0); 4995 return ERR_PTR(ret); 4996 } 4997 4998 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 4999 blocksize, level); 5000 return buf; 5001 } 5002 5003 struct walk_control { 5004 u64 refs[BTRFS_MAX_LEVEL]; 5005 u64 flags[BTRFS_MAX_LEVEL]; 5006 struct btrfs_key update_progress; 5007 int stage; 5008 int level; 5009 int shared_level; 5010 int update_ref; 5011 int keep_locks; 5012 int reada_slot; 5013 int reada_count; 5014 }; 5015 5016 #define DROP_REFERENCE 1 5017 #define UPDATE_BACKREF 2 5018 5019 static noinline void reada_walk_down(struct btrfs_trans_handle *trans, 5020 struct btrfs_root *root, 5021 struct walk_control *wc, 5022 struct btrfs_path *path) 5023 { 5024 u64 bytenr; 5025 u64 generation; 5026 u64 refs; 5027 u64 flags; 5028 u64 last = 0; 5029 u32 nritems; 5030 u32 blocksize; 5031 struct btrfs_key key; 5032 struct extent_buffer *eb; 5033 int ret; 5034 int slot; 5035 int nread = 0; 5036 5037 if (path->slots[wc->level] < wc->reada_slot) { 5038 wc->reada_count = wc->reada_count * 2 / 3; 5039 wc->reada_count = max(wc->reada_count, 2); 5040 } else { 5041 wc->reada_count = wc->reada_count * 3 / 2; 5042 wc->reada_count = min_t(int, wc->reada_count, 5043 BTRFS_NODEPTRS_PER_BLOCK(root)); 5044 } 5045 5046 eb = path->nodes[wc->level]; 5047 nritems = btrfs_header_nritems(eb); 5048 blocksize = btrfs_level_size(root, wc->level - 1); 5049 5050 for (slot = path->slots[wc->level]; slot < nritems; slot++) { 5051 if (nread >= wc->reada_count) 5052 break; 5053 5054 cond_resched(); 5055 bytenr = btrfs_node_blockptr(eb, slot); 5056 generation = btrfs_node_ptr_generation(eb, slot); 5057 5058 if (slot == path->slots[wc->level]) 5059 goto reada; 5060 5061 if (wc->stage == UPDATE_BACKREF && 5062 generation <= root->root_key.offset) 5063 continue; 5064 5065 /* We don't lock the tree block, it's OK to be racy here */ 5066 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5067 &refs, &flags); 5068 BUG_ON(ret); 5069 BUG_ON(refs == 0); 5070 5071 if (wc->stage == DROP_REFERENCE) { 5072 if (refs == 1) 5073 goto reada; 5074 5075 if (wc->level == 1 && 5076 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5077 continue; 5078 if (!wc->update_ref || 5079 generation <= root->root_key.offset) 5080 continue; 5081 btrfs_node_key_to_cpu(eb, &key, slot); 5082 ret = btrfs_comp_cpu_keys(&key, 5083 &wc->update_progress); 5084 if (ret < 0) 5085 continue; 5086 } else { 5087 if (wc->level == 1 && 5088 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5089 continue; 5090 } 5091 reada: 5092 ret = readahead_tree_block(root, bytenr, blocksize, 5093 generation); 5094 if (ret) 5095 break; 5096 last = bytenr + blocksize; 5097 nread++; 5098 } 5099 wc->reada_slot = slot; 5100 } 5101 5102 /* 5103 * hepler to process tree block while walking down the tree. 5104 * 5105 * when wc->stage == UPDATE_BACKREF, this function updates 5106 * back refs for pointers in the block. 5107 * 5108 * NOTE: return value 1 means we should stop walking down. 5109 */ 5110 static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 5111 struct btrfs_root *root, 5112 struct btrfs_path *path, 5113 struct walk_control *wc, int lookup_info) 5114 { 5115 int level = wc->level; 5116 struct extent_buffer *eb = path->nodes[level]; 5117 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5118 int ret; 5119 5120 if (wc->stage == UPDATE_BACKREF && 5121 btrfs_header_owner(eb) != root->root_key.objectid) 5122 return 1; 5123 5124 /* 5125 * when reference count of tree block is 1, it won't increase 5126 * again. once full backref flag is set, we never clear it. 5127 */ 5128 if (lookup_info && 5129 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5130 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { 5131 BUG_ON(!path->locks[level]); 5132 ret = btrfs_lookup_extent_info(trans, root, 5133 eb->start, eb->len, 5134 &wc->refs[level], 5135 &wc->flags[level]); 5136 BUG_ON(ret); 5137 BUG_ON(wc->refs[level] == 0); 5138 } 5139 5140 if (wc->stage == DROP_REFERENCE) { 5141 if (wc->refs[level] > 1) 5142 return 1; 5143 5144 if (path->locks[level] && !wc->keep_locks) { 5145 btrfs_tree_unlock(eb); 5146 path->locks[level] = 0; 5147 } 5148 return 0; 5149 } 5150 5151 /* wc->stage == UPDATE_BACKREF */ 5152 if (!(wc->flags[level] & flag)) { 5153 BUG_ON(!path->locks[level]); 5154 ret = btrfs_inc_ref(trans, root, eb, 1); 5155 BUG_ON(ret); 5156 ret = btrfs_dec_ref(trans, root, eb, 0); 5157 BUG_ON(ret); 5158 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 5159 eb->len, flag, 0); 5160 BUG_ON(ret); 5161 wc->flags[level] |= flag; 5162 } 5163 5164 /* 5165 * the block is shared by multiple trees, so it's not good to 5166 * keep the tree lock 5167 */ 5168 if (path->locks[level] && level > 0) { 5169 btrfs_tree_unlock(eb); 5170 path->locks[level] = 0; 5171 } 5172 return 0; 5173 } 5174 5175 /* 5176 * hepler to process tree block pointer. 5177 * 5178 * when wc->stage == DROP_REFERENCE, this function checks 5179 * reference count of the block pointed to. if the block 5180 * is shared and we need update back refs for the subtree 5181 * rooted at the block, this function changes wc->stage to 5182 * UPDATE_BACKREF. if the block is shared and there is no 5183 * need to update back, this function drops the reference 5184 * to the block. 5185 * 5186 * NOTE: return value 1 means we should stop walking down. 5187 */ 5188 static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5189 struct btrfs_root *root, 5190 struct btrfs_path *path, 5191 struct walk_control *wc, int *lookup_info) 5192 { 5193 u64 bytenr; 5194 u64 generation; 5195 u64 parent; 5196 u32 blocksize; 5197 struct btrfs_key key; 5198 struct extent_buffer *next; 5199 int level = wc->level; 5200 int reada = 0; 5201 int ret = 0; 5202 5203 generation = btrfs_node_ptr_generation(path->nodes[level], 5204 path->slots[level]); 5205 /* 5206 * if the lower level block was created before the snapshot 5207 * was created, we know there is no need to update back refs 5208 * for the subtree 5209 */ 5210 if (wc->stage == UPDATE_BACKREF && 5211 generation <= root->root_key.offset) { 5212 *lookup_info = 1; 5213 return 1; 5214 } 5215 5216 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 5217 blocksize = btrfs_level_size(root, level - 1); 5218 5219 next = btrfs_find_tree_block(root, bytenr, blocksize); 5220 if (!next) { 5221 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 5222 if (!next) 5223 return -ENOMEM; 5224 reada = 1; 5225 } 5226 btrfs_tree_lock(next); 5227 btrfs_set_lock_blocking(next); 5228 5229 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5230 &wc->refs[level - 1], 5231 &wc->flags[level - 1]); 5232 BUG_ON(ret); 5233 BUG_ON(wc->refs[level - 1] == 0); 5234 *lookup_info = 0; 5235 5236 if (wc->stage == DROP_REFERENCE) { 5237 if (wc->refs[level - 1] > 1) { 5238 if (level == 1 && 5239 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5240 goto skip; 5241 5242 if (!wc->update_ref || 5243 generation <= root->root_key.offset) 5244 goto skip; 5245 5246 btrfs_node_key_to_cpu(path->nodes[level], &key, 5247 path->slots[level]); 5248 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); 5249 if (ret < 0) 5250 goto skip; 5251 5252 wc->stage = UPDATE_BACKREF; 5253 wc->shared_level = level - 1; 5254 } 5255 } else { 5256 if (level == 1 && 5257 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5258 goto skip; 5259 } 5260 5261 if (!btrfs_buffer_uptodate(next, generation)) { 5262 btrfs_tree_unlock(next); 5263 free_extent_buffer(next); 5264 next = NULL; 5265 *lookup_info = 1; 5266 } 5267 5268 if (!next) { 5269 if (reada && level == 1) 5270 reada_walk_down(trans, root, wc, path); 5271 next = read_tree_block(root, bytenr, blocksize, generation); 5272 btrfs_tree_lock(next); 5273 btrfs_set_lock_blocking(next); 5274 } 5275 5276 level--; 5277 BUG_ON(level != btrfs_header_level(next)); 5278 path->nodes[level] = next; 5279 path->slots[level] = 0; 5280 path->locks[level] = 1; 5281 wc->level = level; 5282 if (wc->level == 1) 5283 wc->reada_slot = 0; 5284 return 0; 5285 skip: 5286 wc->refs[level - 1] = 0; 5287 wc->flags[level - 1] = 0; 5288 if (wc->stage == DROP_REFERENCE) { 5289 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 5290 parent = path->nodes[level]->start; 5291 } else { 5292 BUG_ON(root->root_key.objectid != 5293 btrfs_header_owner(path->nodes[level])); 5294 parent = 0; 5295 } 5296 5297 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 5298 root->root_key.objectid, level - 1, 0); 5299 BUG_ON(ret); 5300 } 5301 btrfs_tree_unlock(next); 5302 free_extent_buffer(next); 5303 *lookup_info = 1; 5304 return 1; 5305 } 5306 5307 /* 5308 * hepler to process tree block while walking up the tree. 5309 * 5310 * when wc->stage == DROP_REFERENCE, this function drops 5311 * reference count on the block. 5312 * 5313 * when wc->stage == UPDATE_BACKREF, this function changes 5314 * wc->stage back to DROP_REFERENCE if we changed wc->stage 5315 * to UPDATE_BACKREF previously while processing the block. 5316 * 5317 * NOTE: return value 1 means we should stop walking up. 5318 */ 5319 static noinline int walk_up_proc(struct btrfs_trans_handle *trans, 5320 struct btrfs_root *root, 5321 struct btrfs_path *path, 5322 struct walk_control *wc) 5323 { 5324 int ret = 0; 5325 int level = wc->level; 5326 struct extent_buffer *eb = path->nodes[level]; 5327 u64 parent = 0; 5328 5329 if (wc->stage == UPDATE_BACKREF) { 5330 BUG_ON(wc->shared_level < level); 5331 if (level < wc->shared_level) 5332 goto out; 5333 5334 ret = find_next_key(path, level + 1, &wc->update_progress); 5335 if (ret > 0) 5336 wc->update_ref = 0; 5337 5338 wc->stage = DROP_REFERENCE; 5339 wc->shared_level = -1; 5340 path->slots[level] = 0; 5341 5342 /* 5343 * check reference count again if the block isn't locked. 5344 * we should start walking down the tree again if reference 5345 * count is one. 5346 */ 5347 if (!path->locks[level]) { 5348 BUG_ON(level == 0); 5349 btrfs_tree_lock(eb); 5350 btrfs_set_lock_blocking(eb); 5351 path->locks[level] = 1; 5352 5353 ret = btrfs_lookup_extent_info(trans, root, 5354 eb->start, eb->len, 5355 &wc->refs[level], 5356 &wc->flags[level]); 5357 BUG_ON(ret); 5358 BUG_ON(wc->refs[level] == 0); 5359 if (wc->refs[level] == 1) { 5360 btrfs_tree_unlock(eb); 5361 path->locks[level] = 0; 5362 return 1; 5363 } 5364 } 5365 } 5366 5367 /* wc->stage == DROP_REFERENCE */ 5368 BUG_ON(wc->refs[level] > 1 && !path->locks[level]); 5369 5370 if (wc->refs[level] == 1) { 5371 if (level == 0) { 5372 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5373 ret = btrfs_dec_ref(trans, root, eb, 1); 5374 else 5375 ret = btrfs_dec_ref(trans, root, eb, 0); 5376 BUG_ON(ret); 5377 } 5378 /* make block locked assertion in clean_tree_block happy */ 5379 if (!path->locks[level] && 5380 btrfs_header_generation(eb) == trans->transid) { 5381 btrfs_tree_lock(eb); 5382 btrfs_set_lock_blocking(eb); 5383 path->locks[level] = 1; 5384 } 5385 clean_tree_block(trans, root, eb); 5386 } 5387 5388 if (eb == root->node) { 5389 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5390 parent = eb->start; 5391 else 5392 BUG_ON(root->root_key.objectid != 5393 btrfs_header_owner(eb)); 5394 } else { 5395 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 5396 parent = path->nodes[level + 1]->start; 5397 else 5398 BUG_ON(root->root_key.objectid != 5399 btrfs_header_owner(path->nodes[level + 1])); 5400 } 5401 5402 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, 5403 root->root_key.objectid, level, 0); 5404 BUG_ON(ret); 5405 out: 5406 wc->refs[level] = 0; 5407 wc->flags[level] = 0; 5408 return ret; 5409 } 5410 5411 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 5412 struct btrfs_root *root, 5413 struct btrfs_path *path, 5414 struct walk_control *wc) 5415 { 5416 int level = wc->level; 5417 int lookup_info = 1; 5418 int ret; 5419 5420 while (level >= 0) { 5421 ret = walk_down_proc(trans, root, path, wc, lookup_info); 5422 if (ret > 0) 5423 break; 5424 5425 if (level == 0) 5426 break; 5427 5428 if (path->slots[level] >= 5429 btrfs_header_nritems(path->nodes[level])) 5430 break; 5431 5432 ret = do_walk_down(trans, root, path, wc, &lookup_info); 5433 if (ret > 0) { 5434 path->slots[level]++; 5435 continue; 5436 } else if (ret < 0) 5437 return ret; 5438 level = wc->level; 5439 } 5440 return 0; 5441 } 5442 5443 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 5444 struct btrfs_root *root, 5445 struct btrfs_path *path, 5446 struct walk_control *wc, int max_level) 5447 { 5448 int level = wc->level; 5449 int ret; 5450 5451 path->slots[level] = btrfs_header_nritems(path->nodes[level]); 5452 while (level < max_level && path->nodes[level]) { 5453 wc->level = level; 5454 if (path->slots[level] + 1 < 5455 btrfs_header_nritems(path->nodes[level])) { 5456 path->slots[level]++; 5457 return 0; 5458 } else { 5459 ret = walk_up_proc(trans, root, path, wc); 5460 if (ret > 0) 5461 return 0; 5462 5463 if (path->locks[level]) { 5464 btrfs_tree_unlock(path->nodes[level]); 5465 path->locks[level] = 0; 5466 } 5467 free_extent_buffer(path->nodes[level]); 5468 path->nodes[level] = NULL; 5469 level++; 5470 } 5471 } 5472 return 1; 5473 } 5474 5475 /* 5476 * drop a subvolume tree. 5477 * 5478 * this function traverses the tree freeing any blocks that only 5479 * referenced by the tree. 5480 * 5481 * when a shared tree block is found. this function decreases its 5482 * reference count by one. if update_ref is true, this function 5483 * also make sure backrefs for the shared block and all lower level 5484 * blocks are properly updated. 5485 */ 5486 int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) 5487 { 5488 struct btrfs_path *path; 5489 struct btrfs_trans_handle *trans; 5490 struct btrfs_root *tree_root = root->fs_info->tree_root; 5491 struct btrfs_root_item *root_item = &root->root_item; 5492 struct walk_control *wc; 5493 struct btrfs_key key; 5494 int err = 0; 5495 int ret; 5496 int level; 5497 5498 path = btrfs_alloc_path(); 5499 BUG_ON(!path); 5500 5501 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5502 BUG_ON(!wc); 5503 5504 trans = btrfs_start_transaction(tree_root, 1); 5505 5506 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 5507 level = btrfs_header_level(root->node); 5508 path->nodes[level] = btrfs_lock_root_node(root); 5509 btrfs_set_lock_blocking(path->nodes[level]); 5510 path->slots[level] = 0; 5511 path->locks[level] = 1; 5512 memset(&wc->update_progress, 0, 5513 sizeof(wc->update_progress)); 5514 } else { 5515 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 5516 memcpy(&wc->update_progress, &key, 5517 sizeof(wc->update_progress)); 5518 5519 level = root_item->drop_level; 5520 BUG_ON(level == 0); 5521 path->lowest_level = level; 5522 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5523 path->lowest_level = 0; 5524 if (ret < 0) { 5525 err = ret; 5526 goto out; 5527 } 5528 WARN_ON(ret > 0); 5529 5530 /* 5531 * unlock our path, this is safe because only this 5532 * function is allowed to delete this snapshot 5533 */ 5534 btrfs_unlock_up_safe(path, 0); 5535 5536 level = btrfs_header_level(root->node); 5537 while (1) { 5538 btrfs_tree_lock(path->nodes[level]); 5539 btrfs_set_lock_blocking(path->nodes[level]); 5540 5541 ret = btrfs_lookup_extent_info(trans, root, 5542 path->nodes[level]->start, 5543 path->nodes[level]->len, 5544 &wc->refs[level], 5545 &wc->flags[level]); 5546 BUG_ON(ret); 5547 BUG_ON(wc->refs[level] == 0); 5548 5549 if (level == root_item->drop_level) 5550 break; 5551 5552 btrfs_tree_unlock(path->nodes[level]); 5553 WARN_ON(wc->refs[level] != 1); 5554 level--; 5555 } 5556 } 5557 5558 wc->level = level; 5559 wc->shared_level = -1; 5560 wc->stage = DROP_REFERENCE; 5561 wc->update_ref = update_ref; 5562 wc->keep_locks = 0; 5563 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 5564 5565 while (1) { 5566 ret = walk_down_tree(trans, root, path, wc); 5567 if (ret < 0) { 5568 err = ret; 5569 break; 5570 } 5571 5572 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 5573 if (ret < 0) { 5574 err = ret; 5575 break; 5576 } 5577 5578 if (ret > 0) { 5579 BUG_ON(wc->stage != DROP_REFERENCE); 5580 break; 5581 } 5582 5583 if (wc->stage == DROP_REFERENCE) { 5584 level = wc->level; 5585 btrfs_node_key(path->nodes[level], 5586 &root_item->drop_progress, 5587 path->slots[level]); 5588 root_item->drop_level = level; 5589 } 5590 5591 BUG_ON(wc->level == 0); 5592 if (trans->transaction->in_commit || 5593 trans->transaction->delayed_refs.flushing) { 5594 ret = btrfs_update_root(trans, tree_root, 5595 &root->root_key, 5596 root_item); 5597 BUG_ON(ret); 5598 5599 btrfs_end_transaction(trans, tree_root); 5600 trans = btrfs_start_transaction(tree_root, 1); 5601 } else { 5602 unsigned long update; 5603 update = trans->delayed_ref_updates; 5604 trans->delayed_ref_updates = 0; 5605 if (update) 5606 btrfs_run_delayed_refs(trans, tree_root, 5607 update); 5608 } 5609 } 5610 btrfs_release_path(root, path); 5611 BUG_ON(err); 5612 5613 ret = btrfs_del_root(trans, tree_root, &root->root_key); 5614 BUG_ON(ret); 5615 5616 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 5617 ret = btrfs_find_last_root(tree_root, root->root_key.objectid, 5618 NULL, NULL); 5619 BUG_ON(ret < 0); 5620 if (ret > 0) { 5621 ret = btrfs_del_orphan_item(trans, tree_root, 5622 root->root_key.objectid); 5623 BUG_ON(ret); 5624 } 5625 } 5626 5627 if (root->in_radix) { 5628 btrfs_free_fs_root(tree_root->fs_info, root); 5629 } else { 5630 free_extent_buffer(root->node); 5631 free_extent_buffer(root->commit_root); 5632 kfree(root); 5633 } 5634 out: 5635 btrfs_end_transaction(trans, tree_root); 5636 kfree(wc); 5637 btrfs_free_path(path); 5638 return err; 5639 } 5640 5641 /* 5642 * drop subtree rooted at tree block 'node'. 5643 * 5644 * NOTE: this function will unlock and release tree block 'node' 5645 */ 5646 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 5647 struct btrfs_root *root, 5648 struct extent_buffer *node, 5649 struct extent_buffer *parent) 5650 { 5651 struct btrfs_path *path; 5652 struct walk_control *wc; 5653 int level; 5654 int parent_level; 5655 int ret = 0; 5656 int wret; 5657 5658 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 5659 5660 path = btrfs_alloc_path(); 5661 BUG_ON(!path); 5662 5663 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5664 BUG_ON(!wc); 5665 5666 btrfs_assert_tree_locked(parent); 5667 parent_level = btrfs_header_level(parent); 5668 extent_buffer_get(parent); 5669 path->nodes[parent_level] = parent; 5670 path->slots[parent_level] = btrfs_header_nritems(parent); 5671 5672 btrfs_assert_tree_locked(node); 5673 level = btrfs_header_level(node); 5674 path->nodes[level] = node; 5675 path->slots[level] = 0; 5676 path->locks[level] = 1; 5677 5678 wc->refs[parent_level] = 1; 5679 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5680 wc->level = level; 5681 wc->shared_level = -1; 5682 wc->stage = DROP_REFERENCE; 5683 wc->update_ref = 0; 5684 wc->keep_locks = 1; 5685 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 5686 5687 while (1) { 5688 wret = walk_down_tree(trans, root, path, wc); 5689 if (wret < 0) { 5690 ret = wret; 5691 break; 5692 } 5693 5694 wret = walk_up_tree(trans, root, path, wc, parent_level); 5695 if (wret < 0) 5696 ret = wret; 5697 if (wret != 0) 5698 break; 5699 } 5700 5701 kfree(wc); 5702 btrfs_free_path(path); 5703 return ret; 5704 } 5705 5706 #if 0 5707 static unsigned long calc_ra(unsigned long start, unsigned long last, 5708 unsigned long nr) 5709 { 5710 return min(last, start + nr - 1); 5711 } 5712 5713 static noinline int relocate_inode_pages(struct inode *inode, u64 start, 5714 u64 len) 5715 { 5716 u64 page_start; 5717 u64 page_end; 5718 unsigned long first_index; 5719 unsigned long last_index; 5720 unsigned long i; 5721 struct page *page; 5722 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5723 struct file_ra_state *ra; 5724 struct btrfs_ordered_extent *ordered; 5725 unsigned int total_read = 0; 5726 unsigned int total_dirty = 0; 5727 int ret = 0; 5728 5729 ra = kzalloc(sizeof(*ra), GFP_NOFS); 5730 5731 mutex_lock(&inode->i_mutex); 5732 first_index = start >> PAGE_CACHE_SHIFT; 5733 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 5734 5735 /* make sure the dirty trick played by the caller work */ 5736 ret = invalidate_inode_pages2_range(inode->i_mapping, 5737 first_index, last_index); 5738 if (ret) 5739 goto out_unlock; 5740 5741 file_ra_state_init(ra, inode->i_mapping); 5742 5743 for (i = first_index ; i <= last_index; i++) { 5744 if (total_read % ra->ra_pages == 0) { 5745 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 5746 calc_ra(i, last_index, ra->ra_pages)); 5747 } 5748 total_read++; 5749 again: 5750 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 5751 BUG_ON(1); 5752 page = grab_cache_page(inode->i_mapping, i); 5753 if (!page) { 5754 ret = -ENOMEM; 5755 goto out_unlock; 5756 } 5757 if (!PageUptodate(page)) { 5758 btrfs_readpage(NULL, page); 5759 lock_page(page); 5760 if (!PageUptodate(page)) { 5761 unlock_page(page); 5762 page_cache_release(page); 5763 ret = -EIO; 5764 goto out_unlock; 5765 } 5766 } 5767 wait_on_page_writeback(page); 5768 5769 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 5770 page_end = page_start + PAGE_CACHE_SIZE - 1; 5771 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 5772 5773 ordered = btrfs_lookup_ordered_extent(inode, page_start); 5774 if (ordered) { 5775 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5776 unlock_page(page); 5777 page_cache_release(page); 5778 btrfs_start_ordered_extent(inode, ordered, 1); 5779 btrfs_put_ordered_extent(ordered); 5780 goto again; 5781 } 5782 set_page_extent_mapped(page); 5783 5784 if (i == first_index) 5785 set_extent_bits(io_tree, page_start, page_end, 5786 EXTENT_BOUNDARY, GFP_NOFS); 5787 btrfs_set_extent_delalloc(inode, page_start, page_end); 5788 5789 set_page_dirty(page); 5790 total_dirty++; 5791 5792 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5793 unlock_page(page); 5794 page_cache_release(page); 5795 } 5796 5797 out_unlock: 5798 kfree(ra); 5799 mutex_unlock(&inode->i_mutex); 5800 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 5801 return ret; 5802 } 5803 5804 static noinline int relocate_data_extent(struct inode *reloc_inode, 5805 struct btrfs_key *extent_key, 5806 u64 offset) 5807 { 5808 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 5809 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; 5810 struct extent_map *em; 5811 u64 start = extent_key->objectid - offset; 5812 u64 end = start + extent_key->offset - 1; 5813 5814 em = alloc_extent_map(GFP_NOFS); 5815 BUG_ON(!em || IS_ERR(em)); 5816 5817 em->start = start; 5818 em->len = extent_key->offset; 5819 em->block_len = extent_key->offset; 5820 em->block_start = extent_key->objectid; 5821 em->bdev = root->fs_info->fs_devices->latest_bdev; 5822 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5823 5824 /* setup extent map to cheat btrfs_readpage */ 5825 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5826 while (1) { 5827 int ret; 5828 write_lock(&em_tree->lock); 5829 ret = add_extent_mapping(em_tree, em); 5830 write_unlock(&em_tree->lock); 5831 if (ret != -EEXIST) { 5832 free_extent_map(em); 5833 break; 5834 } 5835 btrfs_drop_extent_cache(reloc_inode, start, end, 0); 5836 } 5837 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5838 5839 return relocate_inode_pages(reloc_inode, start, extent_key->offset); 5840 } 5841 5842 struct btrfs_ref_path { 5843 u64 extent_start; 5844 u64 nodes[BTRFS_MAX_LEVEL]; 5845 u64 root_objectid; 5846 u64 root_generation; 5847 u64 owner_objectid; 5848 u32 num_refs; 5849 int lowest_level; 5850 int current_level; 5851 int shared_level; 5852 5853 struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; 5854 u64 new_nodes[BTRFS_MAX_LEVEL]; 5855 }; 5856 5857 struct disk_extent { 5858 u64 ram_bytes; 5859 u64 disk_bytenr; 5860 u64 disk_num_bytes; 5861 u64 offset; 5862 u64 num_bytes; 5863 u8 compression; 5864 u8 encryption; 5865 u16 other_encoding; 5866 }; 5867 5868 static int is_cowonly_root(u64 root_objectid) 5869 { 5870 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 5871 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 5872 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 5873 root_objectid == BTRFS_DEV_TREE_OBJECTID || 5874 root_objectid == BTRFS_TREE_LOG_OBJECTID || 5875 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 5876 return 1; 5877 return 0; 5878 } 5879 5880 static noinline int __next_ref_path(struct btrfs_trans_handle *trans, 5881 struct btrfs_root *extent_root, 5882 struct btrfs_ref_path *ref_path, 5883 int first_time) 5884 { 5885 struct extent_buffer *leaf; 5886 struct btrfs_path *path; 5887 struct btrfs_extent_ref *ref; 5888 struct btrfs_key key; 5889 struct btrfs_key found_key; 5890 u64 bytenr; 5891 u32 nritems; 5892 int level; 5893 int ret = 1; 5894 5895 path = btrfs_alloc_path(); 5896 if (!path) 5897 return -ENOMEM; 5898 5899 if (first_time) { 5900 ref_path->lowest_level = -1; 5901 ref_path->current_level = -1; 5902 ref_path->shared_level = -1; 5903 goto walk_up; 5904 } 5905 walk_down: 5906 level = ref_path->current_level - 1; 5907 while (level >= -1) { 5908 u64 parent; 5909 if (level < ref_path->lowest_level) 5910 break; 5911 5912 if (level >= 0) 5913 bytenr = ref_path->nodes[level]; 5914 else 5915 bytenr = ref_path->extent_start; 5916 BUG_ON(bytenr == 0); 5917 5918 parent = ref_path->nodes[level + 1]; 5919 ref_path->nodes[level + 1] = 0; 5920 ref_path->current_level = level; 5921 BUG_ON(parent == 0); 5922 5923 key.objectid = bytenr; 5924 key.offset = parent + 1; 5925 key.type = BTRFS_EXTENT_REF_KEY; 5926 5927 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5928 if (ret < 0) 5929 goto out; 5930 BUG_ON(ret == 0); 5931 5932 leaf = path->nodes[0]; 5933 nritems = btrfs_header_nritems(leaf); 5934 if (path->slots[0] >= nritems) { 5935 ret = btrfs_next_leaf(extent_root, path); 5936 if (ret < 0) 5937 goto out; 5938 if (ret > 0) 5939 goto next; 5940 leaf = path->nodes[0]; 5941 } 5942 5943 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5944 if (found_key.objectid == bytenr && 5945 found_key.type == BTRFS_EXTENT_REF_KEY) { 5946 if (level < ref_path->shared_level) 5947 ref_path->shared_level = level; 5948 goto found; 5949 } 5950 next: 5951 level--; 5952 btrfs_release_path(extent_root, path); 5953 cond_resched(); 5954 } 5955 /* reached lowest level */ 5956 ret = 1; 5957 goto out; 5958 walk_up: 5959 level = ref_path->current_level; 5960 while (level < BTRFS_MAX_LEVEL - 1) { 5961 u64 ref_objectid; 5962 5963 if (level >= 0) 5964 bytenr = ref_path->nodes[level]; 5965 else 5966 bytenr = ref_path->extent_start; 5967 5968 BUG_ON(bytenr == 0); 5969 5970 key.objectid = bytenr; 5971 key.offset = 0; 5972 key.type = BTRFS_EXTENT_REF_KEY; 5973 5974 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5975 if (ret < 0) 5976 goto out; 5977 5978 leaf = path->nodes[0]; 5979 nritems = btrfs_header_nritems(leaf); 5980 if (path->slots[0] >= nritems) { 5981 ret = btrfs_next_leaf(extent_root, path); 5982 if (ret < 0) 5983 goto out; 5984 if (ret > 0) { 5985 /* the extent was freed by someone */ 5986 if (ref_path->lowest_level == level) 5987 goto out; 5988 btrfs_release_path(extent_root, path); 5989 goto walk_down; 5990 } 5991 leaf = path->nodes[0]; 5992 } 5993 5994 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5995 if (found_key.objectid != bytenr || 5996 found_key.type != BTRFS_EXTENT_REF_KEY) { 5997 /* the extent was freed by someone */ 5998 if (ref_path->lowest_level == level) { 5999 ret = 1; 6000 goto out; 6001 } 6002 btrfs_release_path(extent_root, path); 6003 goto walk_down; 6004 } 6005 found: 6006 ref = btrfs_item_ptr(leaf, path->slots[0], 6007 struct btrfs_extent_ref); 6008 ref_objectid = btrfs_ref_objectid(leaf, ref); 6009 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { 6010 if (first_time) { 6011 level = (int)ref_objectid; 6012 BUG_ON(level >= BTRFS_MAX_LEVEL); 6013 ref_path->lowest_level = level; 6014 ref_path->current_level = level; 6015 ref_path->nodes[level] = bytenr; 6016 } else { 6017 WARN_ON(ref_objectid != level); 6018 } 6019 } else { 6020 WARN_ON(level != -1); 6021 } 6022 first_time = 0; 6023 6024 if (ref_path->lowest_level == level) { 6025 ref_path->owner_objectid = ref_objectid; 6026 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); 6027 } 6028 6029 /* 6030 * the block is tree root or the block isn't in reference 6031 * counted tree. 6032 */ 6033 if (found_key.objectid == found_key.offset || 6034 is_cowonly_root(btrfs_ref_root(leaf, ref))) { 6035 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 6036 ref_path->root_generation = 6037 btrfs_ref_generation(leaf, ref); 6038 if (level < 0) { 6039 /* special reference from the tree log */ 6040 ref_path->nodes[0] = found_key.offset; 6041 ref_path->current_level = 0; 6042 } 6043 ret = 0; 6044 goto out; 6045 } 6046 6047 level++; 6048 BUG_ON(ref_path->nodes[level] != 0); 6049 ref_path->nodes[level] = found_key.offset; 6050 ref_path->current_level = level; 6051 6052 /* 6053 * the reference was created in the running transaction, 6054 * no need to continue walking up. 6055 */ 6056 if (btrfs_ref_generation(leaf, ref) == trans->transid) { 6057 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 6058 ref_path->root_generation = 6059 btrfs_ref_generation(leaf, ref); 6060 ret = 0; 6061 goto out; 6062 } 6063 6064 btrfs_release_path(extent_root, path); 6065 cond_resched(); 6066 } 6067 /* reached max tree level, but no tree root found. */ 6068 BUG(); 6069 out: 6070 btrfs_free_path(path); 6071 return ret; 6072 } 6073 6074 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, 6075 struct btrfs_root *extent_root, 6076 struct btrfs_ref_path *ref_path, 6077 u64 extent_start) 6078 { 6079 memset(ref_path, 0, sizeof(*ref_path)); 6080 ref_path->extent_start = extent_start; 6081 6082 return __next_ref_path(trans, extent_root, ref_path, 1); 6083 } 6084 6085 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, 6086 struct btrfs_root *extent_root, 6087 struct btrfs_ref_path *ref_path) 6088 { 6089 return __next_ref_path(trans, extent_root, ref_path, 0); 6090 } 6091 6092 static noinline int get_new_locations(struct inode *reloc_inode, 6093 struct btrfs_key *extent_key, 6094 u64 offset, int no_fragment, 6095 struct disk_extent **extents, 6096 int *nr_extents) 6097 { 6098 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 6099 struct btrfs_path *path; 6100 struct btrfs_file_extent_item *fi; 6101 struct extent_buffer *leaf; 6102 struct disk_extent *exts = *extents; 6103 struct btrfs_key found_key; 6104 u64 cur_pos; 6105 u64 last_byte; 6106 u32 nritems; 6107 int nr = 0; 6108 int max = *nr_extents; 6109 int ret; 6110 6111 WARN_ON(!no_fragment && *extents); 6112 if (!exts) { 6113 max = 1; 6114 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); 6115 if (!exts) 6116 return -ENOMEM; 6117 } 6118 6119 path = btrfs_alloc_path(); 6120 BUG_ON(!path); 6121 6122 cur_pos = extent_key->objectid - offset; 6123 last_byte = extent_key->objectid + extent_key->offset; 6124 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 6125 cur_pos, 0); 6126 if (ret < 0) 6127 goto out; 6128 if (ret > 0) { 6129 ret = -ENOENT; 6130 goto out; 6131 } 6132 6133 while (1) { 6134 leaf = path->nodes[0]; 6135 nritems = btrfs_header_nritems(leaf); 6136 if (path->slots[0] >= nritems) { 6137 ret = btrfs_next_leaf(root, path); 6138 if (ret < 0) 6139 goto out; 6140 if (ret > 0) 6141 break; 6142 leaf = path->nodes[0]; 6143 } 6144 6145 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 6146 if (found_key.offset != cur_pos || 6147 found_key.type != BTRFS_EXTENT_DATA_KEY || 6148 found_key.objectid != reloc_inode->i_ino) 6149 break; 6150 6151 fi = btrfs_item_ptr(leaf, path->slots[0], 6152 struct btrfs_file_extent_item); 6153 if (btrfs_file_extent_type(leaf, fi) != 6154 BTRFS_FILE_EXTENT_REG || 6155 btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 6156 break; 6157 6158 if (nr == max) { 6159 struct disk_extent *old = exts; 6160 max *= 2; 6161 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 6162 memcpy(exts, old, sizeof(*exts) * nr); 6163 if (old != *extents) 6164 kfree(old); 6165 } 6166 6167 exts[nr].disk_bytenr = 6168 btrfs_file_extent_disk_bytenr(leaf, fi); 6169 exts[nr].disk_num_bytes = 6170 btrfs_file_extent_disk_num_bytes(leaf, fi); 6171 exts[nr].offset = btrfs_file_extent_offset(leaf, fi); 6172 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6173 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 6174 exts[nr].compression = btrfs_file_extent_compression(leaf, fi); 6175 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); 6176 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, 6177 fi); 6178 BUG_ON(exts[nr].offset > 0); 6179 BUG_ON(exts[nr].compression || exts[nr].encryption); 6180 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); 6181 6182 cur_pos += exts[nr].num_bytes; 6183 nr++; 6184 6185 if (cur_pos + offset >= last_byte) 6186 break; 6187 6188 if (no_fragment) { 6189 ret = 1; 6190 goto out; 6191 } 6192 path->slots[0]++; 6193 } 6194 6195 BUG_ON(cur_pos + offset > last_byte); 6196 if (cur_pos + offset < last_byte) { 6197 ret = -ENOENT; 6198 goto out; 6199 } 6200 ret = 0; 6201 out: 6202 btrfs_free_path(path); 6203 if (ret) { 6204 if (exts != *extents) 6205 kfree(exts); 6206 } else { 6207 *extents = exts; 6208 *nr_extents = nr; 6209 } 6210 return ret; 6211 } 6212 6213 static noinline int replace_one_extent(struct btrfs_trans_handle *trans, 6214 struct btrfs_root *root, 6215 struct btrfs_path *path, 6216 struct btrfs_key *extent_key, 6217 struct btrfs_key *leaf_key, 6218 struct btrfs_ref_path *ref_path, 6219 struct disk_extent *new_extents, 6220 int nr_extents) 6221 { 6222 struct extent_buffer *leaf; 6223 struct btrfs_file_extent_item *fi; 6224 struct inode *inode = NULL; 6225 struct btrfs_key key; 6226 u64 lock_start = 0; 6227 u64 lock_end = 0; 6228 u64 num_bytes; 6229 u64 ext_offset; 6230 u64 search_end = (u64)-1; 6231 u32 nritems; 6232 int nr_scaned = 0; 6233 int extent_locked = 0; 6234 int extent_type; 6235 int ret; 6236 6237 memcpy(&key, leaf_key, sizeof(key)); 6238 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 6239 if (key.objectid < ref_path->owner_objectid || 6240 (key.objectid == ref_path->owner_objectid && 6241 key.type < BTRFS_EXTENT_DATA_KEY)) { 6242 key.objectid = ref_path->owner_objectid; 6243 key.type = BTRFS_EXTENT_DATA_KEY; 6244 key.offset = 0; 6245 } 6246 } 6247 6248 while (1) { 6249 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 6250 if (ret < 0) 6251 goto out; 6252 6253 leaf = path->nodes[0]; 6254 nritems = btrfs_header_nritems(leaf); 6255 next: 6256 if (extent_locked && ret > 0) { 6257 /* 6258 * the file extent item was modified by someone 6259 * before the extent got locked. 6260 */ 6261 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6262 lock_end, GFP_NOFS); 6263 extent_locked = 0; 6264 } 6265 6266 if (path->slots[0] >= nritems) { 6267 if (++nr_scaned > 2) 6268 break; 6269 6270 BUG_ON(extent_locked); 6271 ret = btrfs_next_leaf(root, path); 6272 if (ret < 0) 6273 goto out; 6274 if (ret > 0) 6275 break; 6276 leaf = path->nodes[0]; 6277 nritems = btrfs_header_nritems(leaf); 6278 } 6279 6280 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 6281 6282 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 6283 if ((key.objectid > ref_path->owner_objectid) || 6284 (key.objectid == ref_path->owner_objectid && 6285 key.type > BTRFS_EXTENT_DATA_KEY) || 6286 key.offset >= search_end) 6287 break; 6288 } 6289 6290 if (inode && key.objectid != inode->i_ino) { 6291 BUG_ON(extent_locked); 6292 btrfs_release_path(root, path); 6293 mutex_unlock(&inode->i_mutex); 6294 iput(inode); 6295 inode = NULL; 6296 continue; 6297 } 6298 6299 if (key.type != BTRFS_EXTENT_DATA_KEY) { 6300 path->slots[0]++; 6301 ret = 1; 6302 goto next; 6303 } 6304 fi = btrfs_item_ptr(leaf, path->slots[0], 6305 struct btrfs_file_extent_item); 6306 extent_type = btrfs_file_extent_type(leaf, fi); 6307 if ((extent_type != BTRFS_FILE_EXTENT_REG && 6308 extent_type != BTRFS_FILE_EXTENT_PREALLOC) || 6309 (btrfs_file_extent_disk_bytenr(leaf, fi) != 6310 extent_key->objectid)) { 6311 path->slots[0]++; 6312 ret = 1; 6313 goto next; 6314 } 6315 6316 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6317 ext_offset = btrfs_file_extent_offset(leaf, fi); 6318 6319 if (search_end == (u64)-1) { 6320 search_end = key.offset - ext_offset + 6321 btrfs_file_extent_ram_bytes(leaf, fi); 6322 } 6323 6324 if (!extent_locked) { 6325 lock_start = key.offset; 6326 lock_end = lock_start + num_bytes - 1; 6327 } else { 6328 if (lock_start > key.offset || 6329 lock_end + 1 < key.offset + num_bytes) { 6330 unlock_extent(&BTRFS_I(inode)->io_tree, 6331 lock_start, lock_end, GFP_NOFS); 6332 extent_locked = 0; 6333 } 6334 } 6335 6336 if (!inode) { 6337 btrfs_release_path(root, path); 6338 6339 inode = btrfs_iget_locked(root->fs_info->sb, 6340 key.objectid, root); 6341 if (inode->i_state & I_NEW) { 6342 BTRFS_I(inode)->root = root; 6343 BTRFS_I(inode)->location.objectid = 6344 key.objectid; 6345 BTRFS_I(inode)->location.type = 6346 BTRFS_INODE_ITEM_KEY; 6347 BTRFS_I(inode)->location.offset = 0; 6348 btrfs_read_locked_inode(inode); 6349 unlock_new_inode(inode); 6350 } 6351 /* 6352 * some code call btrfs_commit_transaction while 6353 * holding the i_mutex, so we can't use mutex_lock 6354 * here. 6355 */ 6356 if (is_bad_inode(inode) || 6357 !mutex_trylock(&inode->i_mutex)) { 6358 iput(inode); 6359 inode = NULL; 6360 key.offset = (u64)-1; 6361 goto skip; 6362 } 6363 } 6364 6365 if (!extent_locked) { 6366 struct btrfs_ordered_extent *ordered; 6367 6368 btrfs_release_path(root, path); 6369 6370 lock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6371 lock_end, GFP_NOFS); 6372 ordered = btrfs_lookup_first_ordered_extent(inode, 6373 lock_end); 6374 if (ordered && 6375 ordered->file_offset <= lock_end && 6376 ordered->file_offset + ordered->len > lock_start) { 6377 unlock_extent(&BTRFS_I(inode)->io_tree, 6378 lock_start, lock_end, GFP_NOFS); 6379 btrfs_start_ordered_extent(inode, ordered, 1); 6380 btrfs_put_ordered_extent(ordered); 6381 key.offset += num_bytes; 6382 goto skip; 6383 } 6384 if (ordered) 6385 btrfs_put_ordered_extent(ordered); 6386 6387 extent_locked = 1; 6388 continue; 6389 } 6390 6391 if (nr_extents == 1) { 6392 /* update extent pointer in place */ 6393 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6394 new_extents[0].disk_bytenr); 6395 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6396 new_extents[0].disk_num_bytes); 6397 btrfs_mark_buffer_dirty(leaf); 6398 6399 btrfs_drop_extent_cache(inode, key.offset, 6400 key.offset + num_bytes - 1, 0); 6401 6402 ret = btrfs_inc_extent_ref(trans, root, 6403 new_extents[0].disk_bytenr, 6404 new_extents[0].disk_num_bytes, 6405 leaf->start, 6406 root->root_key.objectid, 6407 trans->transid, 6408 key.objectid); 6409 BUG_ON(ret); 6410 6411 ret = btrfs_free_extent(trans, root, 6412 extent_key->objectid, 6413 extent_key->offset, 6414 leaf->start, 6415 btrfs_header_owner(leaf), 6416 btrfs_header_generation(leaf), 6417 key.objectid, 0); 6418 BUG_ON(ret); 6419 6420 btrfs_release_path(root, path); 6421 key.offset += num_bytes; 6422 } else { 6423 BUG_ON(1); 6424 #if 0 6425 u64 alloc_hint; 6426 u64 extent_len; 6427 int i; 6428 /* 6429 * drop old extent pointer at first, then insert the 6430 * new pointers one bye one 6431 */ 6432 btrfs_release_path(root, path); 6433 ret = btrfs_drop_extents(trans, root, inode, key.offset, 6434 key.offset + num_bytes, 6435 key.offset, &alloc_hint); 6436 BUG_ON(ret); 6437 6438 for (i = 0; i < nr_extents; i++) { 6439 if (ext_offset >= new_extents[i].num_bytes) { 6440 ext_offset -= new_extents[i].num_bytes; 6441 continue; 6442 } 6443 extent_len = min(new_extents[i].num_bytes - 6444 ext_offset, num_bytes); 6445 6446 ret = btrfs_insert_empty_item(trans, root, 6447 path, &key, 6448 sizeof(*fi)); 6449 BUG_ON(ret); 6450 6451 leaf = path->nodes[0]; 6452 fi = btrfs_item_ptr(leaf, path->slots[0], 6453 struct btrfs_file_extent_item); 6454 btrfs_set_file_extent_generation(leaf, fi, 6455 trans->transid); 6456 btrfs_set_file_extent_type(leaf, fi, 6457 BTRFS_FILE_EXTENT_REG); 6458 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6459 new_extents[i].disk_bytenr); 6460 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6461 new_extents[i].disk_num_bytes); 6462 btrfs_set_file_extent_ram_bytes(leaf, fi, 6463 new_extents[i].ram_bytes); 6464 6465 btrfs_set_file_extent_compression(leaf, fi, 6466 new_extents[i].compression); 6467 btrfs_set_file_extent_encryption(leaf, fi, 6468 new_extents[i].encryption); 6469 btrfs_set_file_extent_other_encoding(leaf, fi, 6470 new_extents[i].other_encoding); 6471 6472 btrfs_set_file_extent_num_bytes(leaf, fi, 6473 extent_len); 6474 ext_offset += new_extents[i].offset; 6475 btrfs_set_file_extent_offset(leaf, fi, 6476 ext_offset); 6477 btrfs_mark_buffer_dirty(leaf); 6478 6479 btrfs_drop_extent_cache(inode, key.offset, 6480 key.offset + extent_len - 1, 0); 6481 6482 ret = btrfs_inc_extent_ref(trans, root, 6483 new_extents[i].disk_bytenr, 6484 new_extents[i].disk_num_bytes, 6485 leaf->start, 6486 root->root_key.objectid, 6487 trans->transid, key.objectid); 6488 BUG_ON(ret); 6489 btrfs_release_path(root, path); 6490 6491 inode_add_bytes(inode, extent_len); 6492 6493 ext_offset = 0; 6494 num_bytes -= extent_len; 6495 key.offset += extent_len; 6496 6497 if (num_bytes == 0) 6498 break; 6499 } 6500 BUG_ON(i >= nr_extents); 6501 #endif 6502 } 6503 6504 if (extent_locked) { 6505 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6506 lock_end, GFP_NOFS); 6507 extent_locked = 0; 6508 } 6509 skip: 6510 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 6511 key.offset >= search_end) 6512 break; 6513 6514 cond_resched(); 6515 } 6516 ret = 0; 6517 out: 6518 btrfs_release_path(root, path); 6519 if (inode) { 6520 mutex_unlock(&inode->i_mutex); 6521 if (extent_locked) { 6522 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 6523 lock_end, GFP_NOFS); 6524 } 6525 iput(inode); 6526 } 6527 return ret; 6528 } 6529 6530 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 6531 struct btrfs_root *root, 6532 struct extent_buffer *buf, u64 orig_start) 6533 { 6534 int level; 6535 int ret; 6536 6537 BUG_ON(btrfs_header_generation(buf) != trans->transid); 6538 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6539 6540 level = btrfs_header_level(buf); 6541 if (level == 0) { 6542 struct btrfs_leaf_ref *ref; 6543 struct btrfs_leaf_ref *orig_ref; 6544 6545 orig_ref = btrfs_lookup_leaf_ref(root, orig_start); 6546 if (!orig_ref) 6547 return -ENOENT; 6548 6549 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); 6550 if (!ref) { 6551 btrfs_free_leaf_ref(root, orig_ref); 6552 return -ENOMEM; 6553 } 6554 6555 ref->nritems = orig_ref->nritems; 6556 memcpy(ref->extents, orig_ref->extents, 6557 sizeof(ref->extents[0]) * ref->nritems); 6558 6559 btrfs_free_leaf_ref(root, orig_ref); 6560 6561 ref->root_gen = trans->transid; 6562 ref->bytenr = buf->start; 6563 ref->owner = btrfs_header_owner(buf); 6564 ref->generation = btrfs_header_generation(buf); 6565 6566 ret = btrfs_add_leaf_ref(root, ref, 0); 6567 WARN_ON(ret); 6568 btrfs_free_leaf_ref(root, ref); 6569 } 6570 return 0; 6571 } 6572 6573 static noinline int invalidate_extent_cache(struct btrfs_root *root, 6574 struct extent_buffer *leaf, 6575 struct btrfs_block_group_cache *group, 6576 struct btrfs_root *target_root) 6577 { 6578 struct btrfs_key key; 6579 struct inode *inode = NULL; 6580 struct btrfs_file_extent_item *fi; 6581 struct extent_state *cached_state = NULL; 6582 u64 num_bytes; 6583 u64 skip_objectid = 0; 6584 u32 nritems; 6585 u32 i; 6586 6587 nritems = btrfs_header_nritems(leaf); 6588 for (i = 0; i < nritems; i++) { 6589 btrfs_item_key_to_cpu(leaf, &key, i); 6590 if (key.objectid == skip_objectid || 6591 key.type != BTRFS_EXTENT_DATA_KEY) 6592 continue; 6593 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 6594 if (btrfs_file_extent_type(leaf, fi) == 6595 BTRFS_FILE_EXTENT_INLINE) 6596 continue; 6597 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 6598 continue; 6599 if (!inode || inode->i_ino != key.objectid) { 6600 iput(inode); 6601 inode = btrfs_ilookup(target_root->fs_info->sb, 6602 key.objectid, target_root, 1); 6603 } 6604 if (!inode) { 6605 skip_objectid = key.objectid; 6606 continue; 6607 } 6608 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 6609 6610 lock_extent_bits(&BTRFS_I(inode)->io_tree, key.offset, 6611 key.offset + num_bytes - 1, 0, &cached_state, 6612 GFP_NOFS); 6613 btrfs_drop_extent_cache(inode, key.offset, 6614 key.offset + num_bytes - 1, 1); 6615 unlock_extent_cached(&BTRFS_I(inode)->io_tree, key.offset, 6616 key.offset + num_bytes - 1, &cached_state, 6617 GFP_NOFS); 6618 cond_resched(); 6619 } 6620 iput(inode); 6621 return 0; 6622 } 6623 6624 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, 6625 struct btrfs_root *root, 6626 struct extent_buffer *leaf, 6627 struct btrfs_block_group_cache *group, 6628 struct inode *reloc_inode) 6629 { 6630 struct btrfs_key key; 6631 struct btrfs_key extent_key; 6632 struct btrfs_file_extent_item *fi; 6633 struct btrfs_leaf_ref *ref; 6634 struct disk_extent *new_extent; 6635 u64 bytenr; 6636 u64 num_bytes; 6637 u32 nritems; 6638 u32 i; 6639 int ext_index; 6640 int nr_extent; 6641 int ret; 6642 6643 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 6644 BUG_ON(!new_extent); 6645 6646 ref = btrfs_lookup_leaf_ref(root, leaf->start); 6647 BUG_ON(!ref); 6648 6649 ext_index = -1; 6650 nritems = btrfs_header_nritems(leaf); 6651 for (i = 0; i < nritems; i++) { 6652 btrfs_item_key_to_cpu(leaf, &key, i); 6653 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 6654 continue; 6655 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 6656 if (btrfs_file_extent_type(leaf, fi) == 6657 BTRFS_FILE_EXTENT_INLINE) 6658 continue; 6659 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6660 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 6661 if (bytenr == 0) 6662 continue; 6663 6664 ext_index++; 6665 if (bytenr >= group->key.objectid + group->key.offset || 6666 bytenr + num_bytes <= group->key.objectid) 6667 continue; 6668 6669 extent_key.objectid = bytenr; 6670 extent_key.offset = num_bytes; 6671 extent_key.type = BTRFS_EXTENT_ITEM_KEY; 6672 nr_extent = 1; 6673 ret = get_new_locations(reloc_inode, &extent_key, 6674 group->key.objectid, 1, 6675 &new_extent, &nr_extent); 6676 if (ret > 0) 6677 continue; 6678 BUG_ON(ret < 0); 6679 6680 BUG_ON(ref->extents[ext_index].bytenr != bytenr); 6681 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); 6682 ref->extents[ext_index].bytenr = new_extent->disk_bytenr; 6683 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; 6684 6685 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6686 new_extent->disk_bytenr); 6687 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6688 new_extent->disk_num_bytes); 6689 btrfs_mark_buffer_dirty(leaf); 6690 6691 ret = btrfs_inc_extent_ref(trans, root, 6692 new_extent->disk_bytenr, 6693 new_extent->disk_num_bytes, 6694 leaf->start, 6695 root->root_key.objectid, 6696 trans->transid, key.objectid); 6697 BUG_ON(ret); 6698 6699 ret = btrfs_free_extent(trans, root, 6700 bytenr, num_bytes, leaf->start, 6701 btrfs_header_owner(leaf), 6702 btrfs_header_generation(leaf), 6703 key.objectid, 0); 6704 BUG_ON(ret); 6705 cond_resched(); 6706 } 6707 kfree(new_extent); 6708 BUG_ON(ext_index + 1 != ref->nritems); 6709 btrfs_free_leaf_ref(root, ref); 6710 return 0; 6711 } 6712 6713 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 6714 struct btrfs_root *root) 6715 { 6716 struct btrfs_root *reloc_root; 6717 int ret; 6718 6719 if (root->reloc_root) { 6720 reloc_root = root->reloc_root; 6721 root->reloc_root = NULL; 6722 list_add(&reloc_root->dead_list, 6723 &root->fs_info->dead_reloc_roots); 6724 6725 btrfs_set_root_bytenr(&reloc_root->root_item, 6726 reloc_root->node->start); 6727 btrfs_set_root_level(&root->root_item, 6728 btrfs_header_level(reloc_root->node)); 6729 memset(&reloc_root->root_item.drop_progress, 0, 6730 sizeof(struct btrfs_disk_key)); 6731 reloc_root->root_item.drop_level = 0; 6732 6733 ret = btrfs_update_root(trans, root->fs_info->tree_root, 6734 &reloc_root->root_key, 6735 &reloc_root->root_item); 6736 BUG_ON(ret); 6737 } 6738 return 0; 6739 } 6740 6741 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) 6742 { 6743 struct btrfs_trans_handle *trans; 6744 struct btrfs_root *reloc_root; 6745 struct btrfs_root *prev_root = NULL; 6746 struct list_head dead_roots; 6747 int ret; 6748 unsigned long nr; 6749 6750 INIT_LIST_HEAD(&dead_roots); 6751 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); 6752 6753 while (!list_empty(&dead_roots)) { 6754 reloc_root = list_entry(dead_roots.prev, 6755 struct btrfs_root, dead_list); 6756 list_del_init(&reloc_root->dead_list); 6757 6758 BUG_ON(reloc_root->commit_root != NULL); 6759 while (1) { 6760 trans = btrfs_join_transaction(root, 1); 6761 BUG_ON(!trans); 6762 6763 mutex_lock(&root->fs_info->drop_mutex); 6764 ret = btrfs_drop_snapshot(trans, reloc_root); 6765 if (ret != -EAGAIN) 6766 break; 6767 mutex_unlock(&root->fs_info->drop_mutex); 6768 6769 nr = trans->blocks_used; 6770 ret = btrfs_end_transaction(trans, root); 6771 BUG_ON(ret); 6772 btrfs_btree_balance_dirty(root, nr); 6773 } 6774 6775 free_extent_buffer(reloc_root->node); 6776 6777 ret = btrfs_del_root(trans, root->fs_info->tree_root, 6778 &reloc_root->root_key); 6779 BUG_ON(ret); 6780 mutex_unlock(&root->fs_info->drop_mutex); 6781 6782 nr = trans->blocks_used; 6783 ret = btrfs_end_transaction(trans, root); 6784 BUG_ON(ret); 6785 btrfs_btree_balance_dirty(root, nr); 6786 6787 kfree(prev_root); 6788 prev_root = reloc_root; 6789 } 6790 if (prev_root) { 6791 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); 6792 kfree(prev_root); 6793 } 6794 return 0; 6795 } 6796 6797 int btrfs_add_dead_reloc_root(struct btrfs_root *root) 6798 { 6799 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); 6800 return 0; 6801 } 6802 6803 int btrfs_cleanup_reloc_trees(struct btrfs_root *root) 6804 { 6805 struct btrfs_root *reloc_root; 6806 struct btrfs_trans_handle *trans; 6807 struct btrfs_key location; 6808 int found; 6809 int ret; 6810 6811 mutex_lock(&root->fs_info->tree_reloc_mutex); 6812 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); 6813 BUG_ON(ret); 6814 found = !list_empty(&root->fs_info->dead_reloc_roots); 6815 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6816 6817 if (found) { 6818 trans = btrfs_start_transaction(root, 1); 6819 BUG_ON(!trans); 6820 ret = btrfs_commit_transaction(trans, root); 6821 BUG_ON(ret); 6822 } 6823 6824 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6825 location.offset = (u64)-1; 6826 location.type = BTRFS_ROOT_ITEM_KEY; 6827 6828 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 6829 BUG_ON(!reloc_root); 6830 btrfs_orphan_cleanup(reloc_root); 6831 return 0; 6832 } 6833 6834 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, 6835 struct btrfs_root *root) 6836 { 6837 struct btrfs_root *reloc_root; 6838 struct extent_buffer *eb; 6839 struct btrfs_root_item *root_item; 6840 struct btrfs_key root_key; 6841 int ret; 6842 6843 BUG_ON(!root->ref_cows); 6844 if (root->reloc_root) 6845 return 0; 6846 6847 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 6848 BUG_ON(!root_item); 6849 6850 ret = btrfs_copy_root(trans, root, root->commit_root, 6851 &eb, BTRFS_TREE_RELOC_OBJECTID); 6852 BUG_ON(ret); 6853 6854 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 6855 root_key.offset = root->root_key.objectid; 6856 root_key.type = BTRFS_ROOT_ITEM_KEY; 6857 6858 memcpy(root_item, &root->root_item, sizeof(root_item)); 6859 btrfs_set_root_refs(root_item, 0); 6860 btrfs_set_root_bytenr(root_item, eb->start); 6861 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 6862 btrfs_set_root_generation(root_item, trans->transid); 6863 6864 btrfs_tree_unlock(eb); 6865 free_extent_buffer(eb); 6866 6867 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 6868 &root_key, root_item); 6869 BUG_ON(ret); 6870 kfree(root_item); 6871 6872 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 6873 &root_key); 6874 BUG_ON(!reloc_root); 6875 reloc_root->last_trans = trans->transid; 6876 reloc_root->commit_root = NULL; 6877 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 6878 6879 root->reloc_root = reloc_root; 6880 return 0; 6881 } 6882 6883 /* 6884 * Core function of space balance. 6885 * 6886 * The idea is using reloc trees to relocate tree blocks in reference 6887 * counted roots. There is one reloc tree for each subvol, and all 6888 * reloc trees share same root key objectid. Reloc trees are snapshots 6889 * of the latest committed roots of subvols (root->commit_root). 6890 * 6891 * To relocate a tree block referenced by a subvol, there are two steps. 6892 * COW the block through subvol's reloc tree, then update block pointer 6893 * in the subvol to point to the new block. Since all reloc trees share 6894 * same root key objectid, doing special handing for tree blocks owned 6895 * by them is easy. Once a tree block has been COWed in one reloc tree, 6896 * we can use the resulting new block directly when the same block is 6897 * required to COW again through other reloc trees. By this way, relocated 6898 * tree blocks are shared between reloc trees, so they are also shared 6899 * between subvols. 6900 */ 6901 static noinline int relocate_one_path(struct btrfs_trans_handle *trans, 6902 struct btrfs_root *root, 6903 struct btrfs_path *path, 6904 struct btrfs_key *first_key, 6905 struct btrfs_ref_path *ref_path, 6906 struct btrfs_block_group_cache *group, 6907 struct inode *reloc_inode) 6908 { 6909 struct btrfs_root *reloc_root; 6910 struct extent_buffer *eb = NULL; 6911 struct btrfs_key *keys; 6912 u64 *nodes; 6913 int level; 6914 int shared_level; 6915 int lowest_level = 0; 6916 int ret; 6917 6918 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 6919 lowest_level = ref_path->owner_objectid; 6920 6921 if (!root->ref_cows) { 6922 path->lowest_level = lowest_level; 6923 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); 6924 BUG_ON(ret < 0); 6925 path->lowest_level = 0; 6926 btrfs_release_path(root, path); 6927 return 0; 6928 } 6929 6930 mutex_lock(&root->fs_info->tree_reloc_mutex); 6931 ret = init_reloc_tree(trans, root); 6932 BUG_ON(ret); 6933 reloc_root = root->reloc_root; 6934 6935 shared_level = ref_path->shared_level; 6936 ref_path->shared_level = BTRFS_MAX_LEVEL - 1; 6937 6938 keys = ref_path->node_keys; 6939 nodes = ref_path->new_nodes; 6940 memset(&keys[shared_level + 1], 0, 6941 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6942 memset(&nodes[shared_level + 1], 0, 6943 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6944 6945 if (nodes[lowest_level] == 0) { 6946 path->lowest_level = lowest_level; 6947 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6948 0, 1); 6949 BUG_ON(ret); 6950 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { 6951 eb = path->nodes[level]; 6952 if (!eb || eb == reloc_root->node) 6953 break; 6954 nodes[level] = eb->start; 6955 if (level == 0) 6956 btrfs_item_key_to_cpu(eb, &keys[level], 0); 6957 else 6958 btrfs_node_key_to_cpu(eb, &keys[level], 0); 6959 } 6960 if (nodes[0] && 6961 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6962 eb = path->nodes[0]; 6963 ret = replace_extents_in_leaf(trans, reloc_root, eb, 6964 group, reloc_inode); 6965 BUG_ON(ret); 6966 } 6967 btrfs_release_path(reloc_root, path); 6968 } else { 6969 ret = btrfs_merge_path(trans, reloc_root, keys, nodes, 6970 lowest_level); 6971 BUG_ON(ret); 6972 } 6973 6974 /* 6975 * replace tree blocks in the fs tree with tree blocks in 6976 * the reloc tree. 6977 */ 6978 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); 6979 BUG_ON(ret < 0); 6980 6981 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6982 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6983 0, 0); 6984 BUG_ON(ret); 6985 extent_buffer_get(path->nodes[0]); 6986 eb = path->nodes[0]; 6987 btrfs_release_path(reloc_root, path); 6988 ret = invalidate_extent_cache(reloc_root, eb, group, root); 6989 BUG_ON(ret); 6990 free_extent_buffer(eb); 6991 } 6992 6993 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6994 path->lowest_level = 0; 6995 return 0; 6996 } 6997 6998 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, 6999 struct btrfs_root *root, 7000 struct btrfs_path *path, 7001 struct btrfs_key *first_key, 7002 struct btrfs_ref_path *ref_path) 7003 { 7004 int ret; 7005 7006 ret = relocate_one_path(trans, root, path, first_key, 7007 ref_path, NULL, NULL); 7008 BUG_ON(ret); 7009 7010 return 0; 7011 } 7012 7013 static noinline int del_extent_zero(struct btrfs_trans_handle *trans, 7014 struct btrfs_root *extent_root, 7015 struct btrfs_path *path, 7016 struct btrfs_key *extent_key) 7017 { 7018 int ret; 7019 7020 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 7021 if (ret) 7022 goto out; 7023 ret = btrfs_del_item(trans, extent_root, path); 7024 out: 7025 btrfs_release_path(extent_root, path); 7026 return ret; 7027 } 7028 7029 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, 7030 struct btrfs_ref_path *ref_path) 7031 { 7032 struct btrfs_key root_key; 7033 7034 root_key.objectid = ref_path->root_objectid; 7035 root_key.type = BTRFS_ROOT_ITEM_KEY; 7036 if (is_cowonly_root(ref_path->root_objectid)) 7037 root_key.offset = 0; 7038 else 7039 root_key.offset = (u64)-1; 7040 7041 return btrfs_read_fs_root_no_name(fs_info, &root_key); 7042 } 7043 7044 static noinline int relocate_one_extent(struct btrfs_root *extent_root, 7045 struct btrfs_path *path, 7046 struct btrfs_key *extent_key, 7047 struct btrfs_block_group_cache *group, 7048 struct inode *reloc_inode, int pass) 7049 { 7050 struct btrfs_trans_handle *trans; 7051 struct btrfs_root *found_root; 7052 struct btrfs_ref_path *ref_path = NULL; 7053 struct disk_extent *new_extents = NULL; 7054 int nr_extents = 0; 7055 int loops; 7056 int ret; 7057 int level; 7058 struct btrfs_key first_key; 7059 u64 prev_block = 0; 7060 7061 7062 trans = btrfs_start_transaction(extent_root, 1); 7063 BUG_ON(!trans); 7064 7065 if (extent_key->objectid == 0) { 7066 ret = del_extent_zero(trans, extent_root, path, extent_key); 7067 goto out; 7068 } 7069 7070 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); 7071 if (!ref_path) { 7072 ret = -ENOMEM; 7073 goto out; 7074 } 7075 7076 for (loops = 0; ; loops++) { 7077 if (loops == 0) { 7078 ret = btrfs_first_ref_path(trans, extent_root, ref_path, 7079 extent_key->objectid); 7080 } else { 7081 ret = btrfs_next_ref_path(trans, extent_root, ref_path); 7082 } 7083 if (ret < 0) 7084 goto out; 7085 if (ret > 0) 7086 break; 7087 7088 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || 7089 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) 7090 continue; 7091 7092 found_root = read_ref_root(extent_root->fs_info, ref_path); 7093 BUG_ON(!found_root); 7094 /* 7095 * for reference counted tree, only process reference paths 7096 * rooted at the latest committed root. 7097 */ 7098 if (found_root->ref_cows && 7099 ref_path->root_generation != found_root->root_key.offset) 7100 continue; 7101 7102 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 7103 if (pass == 0) { 7104 /* 7105 * copy data extents to new locations 7106 */ 7107 u64 group_start = group->key.objectid; 7108 ret = relocate_data_extent(reloc_inode, 7109 extent_key, 7110 group_start); 7111 if (ret < 0) 7112 goto out; 7113 break; 7114 } 7115 level = 0; 7116 } else { 7117 level = ref_path->owner_objectid; 7118 } 7119 7120 if (prev_block != ref_path->nodes[level]) { 7121 struct extent_buffer *eb; 7122 u64 block_start = ref_path->nodes[level]; 7123 u64 block_size = btrfs_level_size(found_root, level); 7124 7125 eb = read_tree_block(found_root, block_start, 7126 block_size, 0); 7127 btrfs_tree_lock(eb); 7128 BUG_ON(level != btrfs_header_level(eb)); 7129 7130 if (level == 0) 7131 btrfs_item_key_to_cpu(eb, &first_key, 0); 7132 else 7133 btrfs_node_key_to_cpu(eb, &first_key, 0); 7134 7135 btrfs_tree_unlock(eb); 7136 free_extent_buffer(eb); 7137 prev_block = block_start; 7138 } 7139 7140 mutex_lock(&extent_root->fs_info->trans_mutex); 7141 btrfs_record_root_in_trans(found_root); 7142 mutex_unlock(&extent_root->fs_info->trans_mutex); 7143 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 7144 /* 7145 * try to update data extent references while 7146 * keeping metadata shared between snapshots. 7147 */ 7148 if (pass == 1) { 7149 ret = relocate_one_path(trans, found_root, 7150 path, &first_key, ref_path, 7151 group, reloc_inode); 7152 if (ret < 0) 7153 goto out; 7154 continue; 7155 } 7156 /* 7157 * use fallback method to process the remaining 7158 * references. 7159 */ 7160 if (!new_extents) { 7161 u64 group_start = group->key.objectid; 7162 new_extents = kmalloc(sizeof(*new_extents), 7163 GFP_NOFS); 7164 nr_extents = 1; 7165 ret = get_new_locations(reloc_inode, 7166 extent_key, 7167 group_start, 1, 7168 &new_extents, 7169 &nr_extents); 7170 if (ret) 7171 goto out; 7172 } 7173 ret = replace_one_extent(trans, found_root, 7174 path, extent_key, 7175 &first_key, ref_path, 7176 new_extents, nr_extents); 7177 } else { 7178 ret = relocate_tree_block(trans, found_root, path, 7179 &first_key, ref_path); 7180 } 7181 if (ret < 0) 7182 goto out; 7183 } 7184 ret = 0; 7185 out: 7186 btrfs_end_transaction(trans, extent_root); 7187 kfree(new_extents); 7188 kfree(ref_path); 7189 return ret; 7190 } 7191 #endif 7192 7193 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 7194 { 7195 u64 num_devices; 7196 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 7197 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 7198 7199 num_devices = root->fs_info->fs_devices->rw_devices; 7200 if (num_devices == 1) { 7201 stripped |= BTRFS_BLOCK_GROUP_DUP; 7202 stripped = flags & ~stripped; 7203 7204 /* turn raid0 into single device chunks */ 7205 if (flags & BTRFS_BLOCK_GROUP_RAID0) 7206 return stripped; 7207 7208 /* turn mirroring into duplication */ 7209 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 7210 BTRFS_BLOCK_GROUP_RAID10)) 7211 return stripped | BTRFS_BLOCK_GROUP_DUP; 7212 return flags; 7213 } else { 7214 /* they already had raid on here, just return */ 7215 if (flags & stripped) 7216 return flags; 7217 7218 stripped |= BTRFS_BLOCK_GROUP_DUP; 7219 stripped = flags & ~stripped; 7220 7221 /* switch duplicated blocks with raid1 */ 7222 if (flags & BTRFS_BLOCK_GROUP_DUP) 7223 return stripped | BTRFS_BLOCK_GROUP_RAID1; 7224 7225 /* turn single device chunks into raid0 */ 7226 return stripped | BTRFS_BLOCK_GROUP_RAID0; 7227 } 7228 return flags; 7229 } 7230 7231 static int __alloc_chunk_for_shrink(struct btrfs_root *root, 7232 struct btrfs_block_group_cache *shrink_block_group, 7233 int force) 7234 { 7235 struct btrfs_trans_handle *trans; 7236 u64 new_alloc_flags; 7237 u64 calc; 7238 7239 spin_lock(&shrink_block_group->lock); 7240 if (btrfs_block_group_used(&shrink_block_group->item) + 7241 shrink_block_group->reserved > 0) { 7242 spin_unlock(&shrink_block_group->lock); 7243 7244 trans = btrfs_start_transaction(root, 1); 7245 spin_lock(&shrink_block_group->lock); 7246 7247 new_alloc_flags = update_block_group_flags(root, 7248 shrink_block_group->flags); 7249 if (new_alloc_flags != shrink_block_group->flags) { 7250 calc = 7251 btrfs_block_group_used(&shrink_block_group->item); 7252 } else { 7253 calc = shrink_block_group->key.offset; 7254 } 7255 spin_unlock(&shrink_block_group->lock); 7256 7257 do_chunk_alloc(trans, root->fs_info->extent_root, 7258 calc + 2 * 1024 * 1024, new_alloc_flags, force); 7259 7260 btrfs_end_transaction(trans, root); 7261 } else 7262 spin_unlock(&shrink_block_group->lock); 7263 return 0; 7264 } 7265 7266 7267 int btrfs_prepare_block_group_relocation(struct btrfs_root *root, 7268 struct btrfs_block_group_cache *group) 7269 7270 { 7271 __alloc_chunk_for_shrink(root, group, 1); 7272 set_block_group_readonly(group); 7273 return 0; 7274 } 7275 7276 /* 7277 * checks to see if its even possible to relocate this block group. 7278 * 7279 * @return - -1 if it's not a good idea to relocate this block group, 0 if its 7280 * ok to go ahead and try. 7281 */ 7282 int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) 7283 { 7284 struct btrfs_block_group_cache *block_group; 7285 struct btrfs_space_info *space_info; 7286 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 7287 struct btrfs_device *device; 7288 int full = 0; 7289 int ret = 0; 7290 7291 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 7292 7293 /* odd, couldn't find the block group, leave it alone */ 7294 if (!block_group) 7295 return -1; 7296 7297 /* no bytes used, we're good */ 7298 if (!btrfs_block_group_used(&block_group->item)) 7299 goto out; 7300 7301 space_info = block_group->space_info; 7302 spin_lock(&space_info->lock); 7303 7304 full = space_info->full; 7305 7306 /* 7307 * if this is the last block group we have in this space, we can't 7308 * relocate it unless we're able to allocate a new chunk below. 7309 * 7310 * Otherwise, we need to make sure we have room in the space to handle 7311 * all of the extents from this block group. If we can, we're good 7312 */ 7313 if ((space_info->total_bytes != block_group->key.offset) && 7314 (space_info->bytes_used + space_info->bytes_reserved + 7315 space_info->bytes_pinned + space_info->bytes_readonly + 7316 btrfs_block_group_used(&block_group->item) < 7317 space_info->total_bytes)) { 7318 spin_unlock(&space_info->lock); 7319 goto out; 7320 } 7321 spin_unlock(&space_info->lock); 7322 7323 /* 7324 * ok we don't have enough space, but maybe we have free space on our 7325 * devices to allocate new chunks for relocation, so loop through our 7326 * alloc devices and guess if we have enough space. However, if we 7327 * were marked as full, then we know there aren't enough chunks, and we 7328 * can just return. 7329 */ 7330 ret = -1; 7331 if (full) 7332 goto out; 7333 7334 mutex_lock(&root->fs_info->chunk_mutex); 7335 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 7336 u64 min_free = btrfs_block_group_used(&block_group->item); 7337 u64 dev_offset, max_avail; 7338 7339 /* 7340 * check to make sure we can actually find a chunk with enough 7341 * space to fit our block group in. 7342 */ 7343 if (device->total_bytes > device->bytes_used + min_free) { 7344 ret = find_free_dev_extent(NULL, device, min_free, 7345 &dev_offset, &max_avail); 7346 if (!ret) 7347 break; 7348 ret = -1; 7349 } 7350 } 7351 mutex_unlock(&root->fs_info->chunk_mutex); 7352 out: 7353 btrfs_put_block_group(block_group); 7354 return ret; 7355 } 7356 7357 static int find_first_block_group(struct btrfs_root *root, 7358 struct btrfs_path *path, struct btrfs_key *key) 7359 { 7360 int ret = 0; 7361 struct btrfs_key found_key; 7362 struct extent_buffer *leaf; 7363 int slot; 7364 7365 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 7366 if (ret < 0) 7367 goto out; 7368 7369 while (1) { 7370 slot = path->slots[0]; 7371 leaf = path->nodes[0]; 7372 if (slot >= btrfs_header_nritems(leaf)) { 7373 ret = btrfs_next_leaf(root, path); 7374 if (ret == 0) 7375 continue; 7376 if (ret < 0) 7377 goto out; 7378 break; 7379 } 7380 btrfs_item_key_to_cpu(leaf, &found_key, slot); 7381 7382 if (found_key.objectid >= key->objectid && 7383 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 7384 ret = 0; 7385 goto out; 7386 } 7387 path->slots[0]++; 7388 } 7389 out: 7390 return ret; 7391 } 7392 7393 int btrfs_free_block_groups(struct btrfs_fs_info *info) 7394 { 7395 struct btrfs_block_group_cache *block_group; 7396 struct btrfs_space_info *space_info; 7397 struct btrfs_caching_control *caching_ctl; 7398 struct rb_node *n; 7399 7400 down_write(&info->extent_commit_sem); 7401 while (!list_empty(&info->caching_block_groups)) { 7402 caching_ctl = list_entry(info->caching_block_groups.next, 7403 struct btrfs_caching_control, list); 7404 list_del(&caching_ctl->list); 7405 put_caching_control(caching_ctl); 7406 } 7407 up_write(&info->extent_commit_sem); 7408 7409 spin_lock(&info->block_group_cache_lock); 7410 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 7411 block_group = rb_entry(n, struct btrfs_block_group_cache, 7412 cache_node); 7413 rb_erase(&block_group->cache_node, 7414 &info->block_group_cache_tree); 7415 spin_unlock(&info->block_group_cache_lock); 7416 7417 down_write(&block_group->space_info->groups_sem); 7418 list_del(&block_group->list); 7419 up_write(&block_group->space_info->groups_sem); 7420 7421 if (block_group->cached == BTRFS_CACHE_STARTED) 7422 wait_block_group_cache_done(block_group); 7423 7424 btrfs_remove_free_space_cache(block_group); 7425 btrfs_put_block_group(block_group); 7426 7427 spin_lock(&info->block_group_cache_lock); 7428 } 7429 spin_unlock(&info->block_group_cache_lock); 7430 7431 /* now that all the block groups are freed, go through and 7432 * free all the space_info structs. This is only called during 7433 * the final stages of unmount, and so we know nobody is 7434 * using them. We call synchronize_rcu() once before we start, 7435 * just to be on the safe side. 7436 */ 7437 synchronize_rcu(); 7438 7439 while(!list_empty(&info->space_info)) { 7440 space_info = list_entry(info->space_info.next, 7441 struct btrfs_space_info, 7442 list); 7443 7444 list_del(&space_info->list); 7445 kfree(space_info); 7446 } 7447 return 0; 7448 } 7449 7450 int btrfs_read_block_groups(struct btrfs_root *root) 7451 { 7452 struct btrfs_path *path; 7453 int ret; 7454 struct btrfs_block_group_cache *cache; 7455 struct btrfs_fs_info *info = root->fs_info; 7456 struct btrfs_space_info *space_info; 7457 struct btrfs_key key; 7458 struct btrfs_key found_key; 7459 struct extent_buffer *leaf; 7460 7461 root = info->extent_root; 7462 key.objectid = 0; 7463 key.offset = 0; 7464 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 7465 path = btrfs_alloc_path(); 7466 if (!path) 7467 return -ENOMEM; 7468 7469 while (1) { 7470 ret = find_first_block_group(root, path, &key); 7471 if (ret > 0) { 7472 ret = 0; 7473 goto error; 7474 } 7475 if (ret != 0) 7476 goto error; 7477 7478 leaf = path->nodes[0]; 7479 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 7480 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7481 if (!cache) { 7482 ret = -ENOMEM; 7483 break; 7484 } 7485 7486 atomic_set(&cache->count, 1); 7487 spin_lock_init(&cache->lock); 7488 spin_lock_init(&cache->tree_lock); 7489 cache->fs_info = info; 7490 INIT_LIST_HEAD(&cache->list); 7491 INIT_LIST_HEAD(&cache->cluster_list); 7492 7493 /* 7494 * we only want to have 32k of ram per block group for keeping 7495 * track of free space, and if we pass 1/2 of that we want to 7496 * start converting things over to using bitmaps 7497 */ 7498 cache->extents_thresh = ((1024 * 32) / 2) / 7499 sizeof(struct btrfs_free_space); 7500 7501 read_extent_buffer(leaf, &cache->item, 7502 btrfs_item_ptr_offset(leaf, path->slots[0]), 7503 sizeof(cache->item)); 7504 memcpy(&cache->key, &found_key, sizeof(found_key)); 7505 7506 key.objectid = found_key.objectid + found_key.offset; 7507 btrfs_release_path(root, path); 7508 cache->flags = btrfs_block_group_flags(&cache->item); 7509 cache->sectorsize = root->sectorsize; 7510 7511 /* 7512 * check for two cases, either we are full, and therefore 7513 * don't need to bother with the caching work since we won't 7514 * find any space, or we are empty, and we can just add all 7515 * the space in and be done with it. This saves us _alot_ of 7516 * time, particularly in the full case. 7517 */ 7518 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 7519 exclude_super_stripes(root, cache); 7520 cache->last_byte_to_unpin = (u64)-1; 7521 cache->cached = BTRFS_CACHE_FINISHED; 7522 free_excluded_extents(root, cache); 7523 } else if (btrfs_block_group_used(&cache->item) == 0) { 7524 exclude_super_stripes(root, cache); 7525 cache->last_byte_to_unpin = (u64)-1; 7526 cache->cached = BTRFS_CACHE_FINISHED; 7527 add_new_free_space(cache, root->fs_info, 7528 found_key.objectid, 7529 found_key.objectid + 7530 found_key.offset); 7531 free_excluded_extents(root, cache); 7532 } 7533 7534 ret = update_space_info(info, cache->flags, found_key.offset, 7535 btrfs_block_group_used(&cache->item), 7536 &space_info); 7537 BUG_ON(ret); 7538 cache->space_info = space_info; 7539 spin_lock(&cache->space_info->lock); 7540 cache->space_info->bytes_super += cache->bytes_super; 7541 spin_unlock(&cache->space_info->lock); 7542 7543 down_write(&space_info->groups_sem); 7544 list_add_tail(&cache->list, &space_info->block_groups); 7545 up_write(&space_info->groups_sem); 7546 7547 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7548 BUG_ON(ret); 7549 7550 set_avail_alloc_bits(root->fs_info, cache->flags); 7551 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7552 set_block_group_readonly(cache); 7553 } 7554 ret = 0; 7555 error: 7556 btrfs_free_path(path); 7557 return ret; 7558 } 7559 7560 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 7561 struct btrfs_root *root, u64 bytes_used, 7562 u64 type, u64 chunk_objectid, u64 chunk_offset, 7563 u64 size) 7564 { 7565 int ret; 7566 struct btrfs_root *extent_root; 7567 struct btrfs_block_group_cache *cache; 7568 7569 extent_root = root->fs_info->extent_root; 7570 7571 root->fs_info->last_trans_log_full_commit = trans->transid; 7572 7573 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7574 if (!cache) 7575 return -ENOMEM; 7576 7577 cache->key.objectid = chunk_offset; 7578 cache->key.offset = size; 7579 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7580 cache->sectorsize = root->sectorsize; 7581 7582 /* 7583 * we only want to have 32k of ram per block group for keeping track 7584 * of free space, and if we pass 1/2 of that we want to start 7585 * converting things over to using bitmaps 7586 */ 7587 cache->extents_thresh = ((1024 * 32) / 2) / 7588 sizeof(struct btrfs_free_space); 7589 atomic_set(&cache->count, 1); 7590 spin_lock_init(&cache->lock); 7591 spin_lock_init(&cache->tree_lock); 7592 INIT_LIST_HEAD(&cache->list); 7593 INIT_LIST_HEAD(&cache->cluster_list); 7594 7595 btrfs_set_block_group_used(&cache->item, bytes_used); 7596 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7597 cache->flags = type; 7598 btrfs_set_block_group_flags(&cache->item, type); 7599 7600 cache->last_byte_to_unpin = (u64)-1; 7601 cache->cached = BTRFS_CACHE_FINISHED; 7602 exclude_super_stripes(root, cache); 7603 7604 add_new_free_space(cache, root->fs_info, chunk_offset, 7605 chunk_offset + size); 7606 7607 free_excluded_extents(root, cache); 7608 7609 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7610 &cache->space_info); 7611 BUG_ON(ret); 7612 7613 spin_lock(&cache->space_info->lock); 7614 cache->space_info->bytes_super += cache->bytes_super; 7615 spin_unlock(&cache->space_info->lock); 7616 7617 down_write(&cache->space_info->groups_sem); 7618 list_add_tail(&cache->list, &cache->space_info->block_groups); 7619 up_write(&cache->space_info->groups_sem); 7620 7621 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7622 BUG_ON(ret); 7623 7624 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7625 sizeof(cache->item)); 7626 BUG_ON(ret); 7627 7628 set_avail_alloc_bits(extent_root->fs_info, type); 7629 7630 return 0; 7631 } 7632 7633 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 7634 struct btrfs_root *root, u64 group_start) 7635 { 7636 struct btrfs_path *path; 7637 struct btrfs_block_group_cache *block_group; 7638 struct btrfs_free_cluster *cluster; 7639 struct btrfs_key key; 7640 int ret; 7641 7642 root = root->fs_info->extent_root; 7643 7644 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 7645 BUG_ON(!block_group); 7646 BUG_ON(!block_group->ro); 7647 7648 memcpy(&key, &block_group->key, sizeof(key)); 7649 7650 /* make sure this block group isn't part of an allocation cluster */ 7651 cluster = &root->fs_info->data_alloc_cluster; 7652 spin_lock(&cluster->refill_lock); 7653 btrfs_return_cluster_to_free_space(block_group, cluster); 7654 spin_unlock(&cluster->refill_lock); 7655 7656 /* 7657 * make sure this block group isn't part of a metadata 7658 * allocation cluster 7659 */ 7660 cluster = &root->fs_info->meta_alloc_cluster; 7661 spin_lock(&cluster->refill_lock); 7662 btrfs_return_cluster_to_free_space(block_group, cluster); 7663 spin_unlock(&cluster->refill_lock); 7664 7665 path = btrfs_alloc_path(); 7666 BUG_ON(!path); 7667 7668 spin_lock(&root->fs_info->block_group_cache_lock); 7669 rb_erase(&block_group->cache_node, 7670 &root->fs_info->block_group_cache_tree); 7671 spin_unlock(&root->fs_info->block_group_cache_lock); 7672 7673 down_write(&block_group->space_info->groups_sem); 7674 /* 7675 * we must use list_del_init so people can check to see if they 7676 * are still on the list after taking the semaphore 7677 */ 7678 list_del_init(&block_group->list); 7679 up_write(&block_group->space_info->groups_sem); 7680 7681 if (block_group->cached == BTRFS_CACHE_STARTED) 7682 wait_block_group_cache_done(block_group); 7683 7684 btrfs_remove_free_space_cache(block_group); 7685 7686 spin_lock(&block_group->space_info->lock); 7687 block_group->space_info->total_bytes -= block_group->key.offset; 7688 block_group->space_info->bytes_readonly -= block_group->key.offset; 7689 spin_unlock(&block_group->space_info->lock); 7690 7691 btrfs_clear_space_info_full(root->fs_info); 7692 7693 btrfs_put_block_group(block_group); 7694 btrfs_put_block_group(block_group); 7695 7696 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 7697 if (ret > 0) 7698 ret = -EIO; 7699 if (ret < 0) 7700 goto out; 7701 7702 ret = btrfs_del_item(trans, root, path); 7703 out: 7704 btrfs_free_path(path); 7705 return ret; 7706 } 7707