1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include <linux/rcupdate.h> 24 #include "compat.h" 25 #include "hash.h" 26 #include "ctree.h" 27 #include "disk-io.h" 28 #include "print-tree.h" 29 #include "transaction.h" 30 #include "volumes.h" 31 #include "locking.h" 32 #include "free-space-cache.h" 33 34 static int update_reserved_extents(struct btrfs_root *root, 35 u64 bytenr, u64 num, int reserve); 36 static int update_block_group(struct btrfs_trans_handle *trans, 37 struct btrfs_root *root, 38 u64 bytenr, u64 num_bytes, int alloc, 39 int mark_free); 40 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 41 struct btrfs_root *root, 42 u64 bytenr, u64 num_bytes, u64 parent, 43 u64 root_objectid, u64 owner_objectid, 44 u64 owner_offset, int refs_to_drop, 45 struct btrfs_delayed_extent_op *extra_op); 46 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 47 struct extent_buffer *leaf, 48 struct btrfs_extent_item *ei); 49 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 50 struct btrfs_root *root, 51 u64 parent, u64 root_objectid, 52 u64 flags, u64 owner, u64 offset, 53 struct btrfs_key *ins, int ref_mod); 54 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 55 struct btrfs_root *root, 56 u64 parent, u64 root_objectid, 57 u64 flags, struct btrfs_disk_key *key, 58 int level, struct btrfs_key *ins); 59 60 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 61 struct btrfs_root *extent_root, u64 alloc_bytes, 62 u64 flags, int force); 63 64 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 65 { 66 return (cache->flags & bits) == bits; 67 } 68 69 /* 70 * this adds the block group to the fs_info rb tree for the block group 71 * cache 72 */ 73 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 74 struct btrfs_block_group_cache *block_group) 75 { 76 struct rb_node **p; 77 struct rb_node *parent = NULL; 78 struct btrfs_block_group_cache *cache; 79 80 spin_lock(&info->block_group_cache_lock); 81 p = &info->block_group_cache_tree.rb_node; 82 83 while (*p) { 84 parent = *p; 85 cache = rb_entry(parent, struct btrfs_block_group_cache, 86 cache_node); 87 if (block_group->key.objectid < cache->key.objectid) { 88 p = &(*p)->rb_left; 89 } else if (block_group->key.objectid > cache->key.objectid) { 90 p = &(*p)->rb_right; 91 } else { 92 spin_unlock(&info->block_group_cache_lock); 93 return -EEXIST; 94 } 95 } 96 97 rb_link_node(&block_group->cache_node, parent, p); 98 rb_insert_color(&block_group->cache_node, 99 &info->block_group_cache_tree); 100 spin_unlock(&info->block_group_cache_lock); 101 102 return 0; 103 } 104 105 /* 106 * This will return the block group at or after bytenr if contains is 0, else 107 * it will return the block group that contains the bytenr 108 */ 109 static struct btrfs_block_group_cache * 110 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 111 int contains) 112 { 113 struct btrfs_block_group_cache *cache, *ret = NULL; 114 struct rb_node *n; 115 u64 end, start; 116 117 spin_lock(&info->block_group_cache_lock); 118 n = info->block_group_cache_tree.rb_node; 119 120 while (n) { 121 cache = rb_entry(n, struct btrfs_block_group_cache, 122 cache_node); 123 end = cache->key.objectid + cache->key.offset - 1; 124 start = cache->key.objectid; 125 126 if (bytenr < start) { 127 if (!contains && (!ret || start < ret->key.objectid)) 128 ret = cache; 129 n = n->rb_left; 130 } else if (bytenr > start) { 131 if (contains && bytenr <= end) { 132 ret = cache; 133 break; 134 } 135 n = n->rb_right; 136 } else { 137 ret = cache; 138 break; 139 } 140 } 141 if (ret) 142 atomic_inc(&ret->count); 143 spin_unlock(&info->block_group_cache_lock); 144 145 return ret; 146 } 147 148 /* 149 * this is only called by cache_block_group, since we could have freed extents 150 * we need to check the pinned_extents for any extents that can't be used yet 151 * since their free space will be released as soon as the transaction commits. 152 */ 153 static int add_new_free_space(struct btrfs_block_group_cache *block_group, 154 struct btrfs_fs_info *info, u64 start, u64 end) 155 { 156 u64 extent_start, extent_end, size; 157 int ret; 158 159 while (start < end) { 160 ret = find_first_extent_bit(&info->pinned_extents, start, 161 &extent_start, &extent_end, 162 EXTENT_DIRTY); 163 if (ret) 164 break; 165 166 if (extent_start == start) { 167 start = extent_end + 1; 168 } else if (extent_start > start && extent_start < end) { 169 size = extent_start - start; 170 ret = btrfs_add_free_space(block_group, start, 171 size); 172 BUG_ON(ret); 173 start = extent_end + 1; 174 } else { 175 break; 176 } 177 } 178 179 if (start < end) { 180 size = end - start; 181 ret = btrfs_add_free_space(block_group, start, size); 182 BUG_ON(ret); 183 } 184 185 return 0; 186 } 187 188 static int remove_sb_from_cache(struct btrfs_root *root, 189 struct btrfs_block_group_cache *cache) 190 { 191 u64 bytenr; 192 u64 *logical; 193 int stripe_len; 194 int i, nr, ret; 195 196 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 197 bytenr = btrfs_sb_offset(i); 198 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 199 cache->key.objectid, bytenr, 0, 200 &logical, &nr, &stripe_len); 201 BUG_ON(ret); 202 while (nr--) { 203 btrfs_remove_free_space(cache, logical[nr], 204 stripe_len); 205 } 206 kfree(logical); 207 } 208 return 0; 209 } 210 211 static int cache_block_group(struct btrfs_root *root, 212 struct btrfs_block_group_cache *block_group) 213 { 214 struct btrfs_path *path; 215 int ret = 0; 216 struct btrfs_key key; 217 struct extent_buffer *leaf; 218 int slot; 219 u64 last; 220 221 if (!block_group) 222 return 0; 223 224 root = root->fs_info->extent_root; 225 226 if (block_group->cached) 227 return 0; 228 229 path = btrfs_alloc_path(); 230 if (!path) 231 return -ENOMEM; 232 233 path->reada = 2; 234 /* 235 * we get into deadlocks with paths held by callers of this function. 236 * since the alloc_mutex is protecting things right now, just 237 * skip the locking here 238 */ 239 path->skip_locking = 1; 240 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 241 key.objectid = last; 242 key.offset = 0; 243 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 244 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 245 if (ret < 0) 246 goto err; 247 248 while (1) { 249 leaf = path->nodes[0]; 250 slot = path->slots[0]; 251 if (slot >= btrfs_header_nritems(leaf)) { 252 ret = btrfs_next_leaf(root, path); 253 if (ret < 0) 254 goto err; 255 if (ret == 0) 256 continue; 257 else 258 break; 259 } 260 btrfs_item_key_to_cpu(leaf, &key, slot); 261 if (key.objectid < block_group->key.objectid) 262 goto next; 263 264 if (key.objectid >= block_group->key.objectid + 265 block_group->key.offset) 266 break; 267 268 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 269 add_new_free_space(block_group, root->fs_info, last, 270 key.objectid); 271 272 last = key.objectid + key.offset; 273 } 274 next: 275 path->slots[0]++; 276 } 277 278 add_new_free_space(block_group, root->fs_info, last, 279 block_group->key.objectid + 280 block_group->key.offset); 281 282 block_group->cached = 1; 283 remove_sb_from_cache(root, block_group); 284 ret = 0; 285 err: 286 btrfs_free_path(path); 287 return ret; 288 } 289 290 /* 291 * return the block group that starts at or after bytenr 292 */ 293 static struct btrfs_block_group_cache * 294 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 295 { 296 struct btrfs_block_group_cache *cache; 297 298 cache = block_group_cache_tree_search(info, bytenr, 0); 299 300 return cache; 301 } 302 303 /* 304 * return the block group that contains the given bytenr 305 */ 306 struct btrfs_block_group_cache *btrfs_lookup_block_group( 307 struct btrfs_fs_info *info, 308 u64 bytenr) 309 { 310 struct btrfs_block_group_cache *cache; 311 312 cache = block_group_cache_tree_search(info, bytenr, 1); 313 314 return cache; 315 } 316 317 void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 318 { 319 if (atomic_dec_and_test(&cache->count)) 320 kfree(cache); 321 } 322 323 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 324 u64 flags) 325 { 326 struct list_head *head = &info->space_info; 327 struct btrfs_space_info *found; 328 329 rcu_read_lock(); 330 list_for_each_entry_rcu(found, head, list) { 331 if (found->flags == flags) { 332 rcu_read_unlock(); 333 return found; 334 } 335 } 336 rcu_read_unlock(); 337 return NULL; 338 } 339 340 /* 341 * after adding space to the filesystem, we need to clear the full flags 342 * on all the space infos. 343 */ 344 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 345 { 346 struct list_head *head = &info->space_info; 347 struct btrfs_space_info *found; 348 349 rcu_read_lock(); 350 list_for_each_entry_rcu(found, head, list) 351 found->full = 0; 352 rcu_read_unlock(); 353 } 354 355 static u64 div_factor(u64 num, int factor) 356 { 357 if (factor == 10) 358 return num; 359 num *= factor; 360 do_div(num, 10); 361 return num; 362 } 363 364 u64 btrfs_find_block_group(struct btrfs_root *root, 365 u64 search_start, u64 search_hint, int owner) 366 { 367 struct btrfs_block_group_cache *cache; 368 u64 used; 369 u64 last = max(search_hint, search_start); 370 u64 group_start = 0; 371 int full_search = 0; 372 int factor = 9; 373 int wrapped = 0; 374 again: 375 while (1) { 376 cache = btrfs_lookup_first_block_group(root->fs_info, last); 377 if (!cache) 378 break; 379 380 spin_lock(&cache->lock); 381 last = cache->key.objectid + cache->key.offset; 382 used = btrfs_block_group_used(&cache->item); 383 384 if ((full_search || !cache->ro) && 385 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 386 if (used + cache->pinned + cache->reserved < 387 div_factor(cache->key.offset, factor)) { 388 group_start = cache->key.objectid; 389 spin_unlock(&cache->lock); 390 btrfs_put_block_group(cache); 391 goto found; 392 } 393 } 394 spin_unlock(&cache->lock); 395 btrfs_put_block_group(cache); 396 cond_resched(); 397 } 398 if (!wrapped) { 399 last = search_start; 400 wrapped = 1; 401 goto again; 402 } 403 if (!full_search && factor < 10) { 404 last = search_start; 405 full_search = 1; 406 factor = 10; 407 goto again; 408 } 409 found: 410 return group_start; 411 } 412 413 /* simple helper to search for an existing extent at a given offset */ 414 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 415 { 416 int ret; 417 struct btrfs_key key; 418 struct btrfs_path *path; 419 420 path = btrfs_alloc_path(); 421 BUG_ON(!path); 422 key.objectid = start; 423 key.offset = len; 424 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 425 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 426 0, 0); 427 btrfs_free_path(path); 428 return ret; 429 } 430 431 /* 432 * Back reference rules. Back refs have three main goals: 433 * 434 * 1) differentiate between all holders of references to an extent so that 435 * when a reference is dropped we can make sure it was a valid reference 436 * before freeing the extent. 437 * 438 * 2) Provide enough information to quickly find the holders of an extent 439 * if we notice a given block is corrupted or bad. 440 * 441 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 442 * maintenance. This is actually the same as #2, but with a slightly 443 * different use case. 444 * 445 * There are two kinds of back refs. The implicit back refs is optimized 446 * for pointers in non-shared tree blocks. For a given pointer in a block, 447 * back refs of this kind provide information about the block's owner tree 448 * and the pointer's key. These information allow us to find the block by 449 * b-tree searching. The full back refs is for pointers in tree blocks not 450 * referenced by their owner trees. The location of tree block is recorded 451 * in the back refs. Actually the full back refs is generic, and can be 452 * used in all cases the implicit back refs is used. The major shortcoming 453 * of the full back refs is its overhead. Every time a tree block gets 454 * COWed, we have to update back refs entry for all pointers in it. 455 * 456 * For a newly allocated tree block, we use implicit back refs for 457 * pointers in it. This means most tree related operations only involve 458 * implicit back refs. For a tree block created in old transaction, the 459 * only way to drop a reference to it is COW it. So we can detect the 460 * event that tree block loses its owner tree's reference and do the 461 * back refs conversion. 462 * 463 * When a tree block is COW'd through a tree, there are four cases: 464 * 465 * The reference count of the block is one and the tree is the block's 466 * owner tree. Nothing to do in this case. 467 * 468 * The reference count of the block is one and the tree is not the 469 * block's owner tree. In this case, full back refs is used for pointers 470 * in the block. Remove these full back refs, add implicit back refs for 471 * every pointers in the new block. 472 * 473 * The reference count of the block is greater than one and the tree is 474 * the block's owner tree. In this case, implicit back refs is used for 475 * pointers in the block. Add full back refs for every pointers in the 476 * block, increase lower level extents' reference counts. The original 477 * implicit back refs are entailed to the new block. 478 * 479 * The reference count of the block is greater than one and the tree is 480 * not the block's owner tree. Add implicit back refs for every pointer in 481 * the new block, increase lower level extents' reference count. 482 * 483 * Back Reference Key composing: 484 * 485 * The key objectid corresponds to the first byte in the extent, 486 * The key type is used to differentiate between types of back refs. 487 * There are different meanings of the key offset for different types 488 * of back refs. 489 * 490 * File extents can be referenced by: 491 * 492 * - multiple snapshots, subvolumes, or different generations in one subvol 493 * - different files inside a single subvolume 494 * - different offsets inside a file (bookend extents in file.c) 495 * 496 * The extent ref structure for the implicit back refs has fields for: 497 * 498 * - Objectid of the subvolume root 499 * - objectid of the file holding the reference 500 * - original offset in the file 501 * - how many bookend extents 502 * 503 * The key offset for the implicit back refs is hash of the first 504 * three fields. 505 * 506 * The extent ref structure for the full back refs has field for: 507 * 508 * - number of pointers in the tree leaf 509 * 510 * The key offset for the implicit back refs is the first byte of 511 * the tree leaf 512 * 513 * When a file extent is allocated, The implicit back refs is used. 514 * the fields are filled in: 515 * 516 * (root_key.objectid, inode objectid, offset in file, 1) 517 * 518 * When a file extent is removed file truncation, we find the 519 * corresponding implicit back refs and check the following fields: 520 * 521 * (btrfs_header_owner(leaf), inode objectid, offset in file) 522 * 523 * Btree extents can be referenced by: 524 * 525 * - Different subvolumes 526 * 527 * Both the implicit back refs and the full back refs for tree blocks 528 * only consist of key. The key offset for the implicit back refs is 529 * objectid of block's owner tree. The key offset for the full back refs 530 * is the first byte of parent block. 531 * 532 * When implicit back refs is used, information about the lowest key and 533 * level of the tree block are required. These information are stored in 534 * tree block info structure. 535 */ 536 537 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 538 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 539 struct btrfs_root *root, 540 struct btrfs_path *path, 541 u64 owner, u32 extra_size) 542 { 543 struct btrfs_extent_item *item; 544 struct btrfs_extent_item_v0 *ei0; 545 struct btrfs_extent_ref_v0 *ref0; 546 struct btrfs_tree_block_info *bi; 547 struct extent_buffer *leaf; 548 struct btrfs_key key; 549 struct btrfs_key found_key; 550 u32 new_size = sizeof(*item); 551 u64 refs; 552 int ret; 553 554 leaf = path->nodes[0]; 555 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 556 557 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 558 ei0 = btrfs_item_ptr(leaf, path->slots[0], 559 struct btrfs_extent_item_v0); 560 refs = btrfs_extent_refs_v0(leaf, ei0); 561 562 if (owner == (u64)-1) { 563 while (1) { 564 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 565 ret = btrfs_next_leaf(root, path); 566 if (ret < 0) 567 return ret; 568 BUG_ON(ret > 0); 569 leaf = path->nodes[0]; 570 } 571 btrfs_item_key_to_cpu(leaf, &found_key, 572 path->slots[0]); 573 BUG_ON(key.objectid != found_key.objectid); 574 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 575 path->slots[0]++; 576 continue; 577 } 578 ref0 = btrfs_item_ptr(leaf, path->slots[0], 579 struct btrfs_extent_ref_v0); 580 owner = btrfs_ref_objectid_v0(leaf, ref0); 581 break; 582 } 583 } 584 btrfs_release_path(root, path); 585 586 if (owner < BTRFS_FIRST_FREE_OBJECTID) 587 new_size += sizeof(*bi); 588 589 new_size -= sizeof(*ei0); 590 ret = btrfs_search_slot(trans, root, &key, path, 591 new_size + extra_size, 1); 592 if (ret < 0) 593 return ret; 594 BUG_ON(ret); 595 596 ret = btrfs_extend_item(trans, root, path, new_size); 597 BUG_ON(ret); 598 599 leaf = path->nodes[0]; 600 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 601 btrfs_set_extent_refs(leaf, item, refs); 602 /* FIXME: get real generation */ 603 btrfs_set_extent_generation(leaf, item, 0); 604 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 605 btrfs_set_extent_flags(leaf, item, 606 BTRFS_EXTENT_FLAG_TREE_BLOCK | 607 BTRFS_BLOCK_FLAG_FULL_BACKREF); 608 bi = (struct btrfs_tree_block_info *)(item + 1); 609 /* FIXME: get first key of the block */ 610 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); 611 btrfs_set_tree_block_level(leaf, bi, (int)owner); 612 } else { 613 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 614 } 615 btrfs_mark_buffer_dirty(leaf); 616 return 0; 617 } 618 #endif 619 620 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 621 { 622 u32 high_crc = ~(u32)0; 623 u32 low_crc = ~(u32)0; 624 __le64 lenum; 625 626 lenum = cpu_to_le64(root_objectid); 627 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 628 lenum = cpu_to_le64(owner); 629 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 630 lenum = cpu_to_le64(offset); 631 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 632 633 return ((u64)high_crc << 31) ^ (u64)low_crc; 634 } 635 636 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 637 struct btrfs_extent_data_ref *ref) 638 { 639 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 640 btrfs_extent_data_ref_objectid(leaf, ref), 641 btrfs_extent_data_ref_offset(leaf, ref)); 642 } 643 644 static int match_extent_data_ref(struct extent_buffer *leaf, 645 struct btrfs_extent_data_ref *ref, 646 u64 root_objectid, u64 owner, u64 offset) 647 { 648 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 649 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 650 btrfs_extent_data_ref_offset(leaf, ref) != offset) 651 return 0; 652 return 1; 653 } 654 655 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 656 struct btrfs_root *root, 657 struct btrfs_path *path, 658 u64 bytenr, u64 parent, 659 u64 root_objectid, 660 u64 owner, u64 offset) 661 { 662 struct btrfs_key key; 663 struct btrfs_extent_data_ref *ref; 664 struct extent_buffer *leaf; 665 u32 nritems; 666 int ret; 667 int recow; 668 int err = -ENOENT; 669 670 key.objectid = bytenr; 671 if (parent) { 672 key.type = BTRFS_SHARED_DATA_REF_KEY; 673 key.offset = parent; 674 } else { 675 key.type = BTRFS_EXTENT_DATA_REF_KEY; 676 key.offset = hash_extent_data_ref(root_objectid, 677 owner, offset); 678 } 679 again: 680 recow = 0; 681 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 682 if (ret < 0) { 683 err = ret; 684 goto fail; 685 } 686 687 if (parent) { 688 if (!ret) 689 return 0; 690 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 691 key.type = BTRFS_EXTENT_REF_V0_KEY; 692 btrfs_release_path(root, path); 693 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 694 if (ret < 0) { 695 err = ret; 696 goto fail; 697 } 698 if (!ret) 699 return 0; 700 #endif 701 goto fail; 702 } 703 704 leaf = path->nodes[0]; 705 nritems = btrfs_header_nritems(leaf); 706 while (1) { 707 if (path->slots[0] >= nritems) { 708 ret = btrfs_next_leaf(root, path); 709 if (ret < 0) 710 err = ret; 711 if (ret) 712 goto fail; 713 714 leaf = path->nodes[0]; 715 nritems = btrfs_header_nritems(leaf); 716 recow = 1; 717 } 718 719 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 720 if (key.objectid != bytenr || 721 key.type != BTRFS_EXTENT_DATA_REF_KEY) 722 goto fail; 723 724 ref = btrfs_item_ptr(leaf, path->slots[0], 725 struct btrfs_extent_data_ref); 726 727 if (match_extent_data_ref(leaf, ref, root_objectid, 728 owner, offset)) { 729 if (recow) { 730 btrfs_release_path(root, path); 731 goto again; 732 } 733 err = 0; 734 break; 735 } 736 path->slots[0]++; 737 } 738 fail: 739 return err; 740 } 741 742 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 743 struct btrfs_root *root, 744 struct btrfs_path *path, 745 u64 bytenr, u64 parent, 746 u64 root_objectid, u64 owner, 747 u64 offset, int refs_to_add) 748 { 749 struct btrfs_key key; 750 struct extent_buffer *leaf; 751 u32 size; 752 u32 num_refs; 753 int ret; 754 755 key.objectid = bytenr; 756 if (parent) { 757 key.type = BTRFS_SHARED_DATA_REF_KEY; 758 key.offset = parent; 759 size = sizeof(struct btrfs_shared_data_ref); 760 } else { 761 key.type = BTRFS_EXTENT_DATA_REF_KEY; 762 key.offset = hash_extent_data_ref(root_objectid, 763 owner, offset); 764 size = sizeof(struct btrfs_extent_data_ref); 765 } 766 767 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 768 if (ret && ret != -EEXIST) 769 goto fail; 770 771 leaf = path->nodes[0]; 772 if (parent) { 773 struct btrfs_shared_data_ref *ref; 774 ref = btrfs_item_ptr(leaf, path->slots[0], 775 struct btrfs_shared_data_ref); 776 if (ret == 0) { 777 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 778 } else { 779 num_refs = btrfs_shared_data_ref_count(leaf, ref); 780 num_refs += refs_to_add; 781 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 782 } 783 } else { 784 struct btrfs_extent_data_ref *ref; 785 while (ret == -EEXIST) { 786 ref = btrfs_item_ptr(leaf, path->slots[0], 787 struct btrfs_extent_data_ref); 788 if (match_extent_data_ref(leaf, ref, root_objectid, 789 owner, offset)) 790 break; 791 btrfs_release_path(root, path); 792 key.offset++; 793 ret = btrfs_insert_empty_item(trans, root, path, &key, 794 size); 795 if (ret && ret != -EEXIST) 796 goto fail; 797 798 leaf = path->nodes[0]; 799 } 800 ref = btrfs_item_ptr(leaf, path->slots[0], 801 struct btrfs_extent_data_ref); 802 if (ret == 0) { 803 btrfs_set_extent_data_ref_root(leaf, ref, 804 root_objectid); 805 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 806 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 807 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 808 } else { 809 num_refs = btrfs_extent_data_ref_count(leaf, ref); 810 num_refs += refs_to_add; 811 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 812 } 813 } 814 btrfs_mark_buffer_dirty(leaf); 815 ret = 0; 816 fail: 817 btrfs_release_path(root, path); 818 return ret; 819 } 820 821 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 822 struct btrfs_root *root, 823 struct btrfs_path *path, 824 int refs_to_drop) 825 { 826 struct btrfs_key key; 827 struct btrfs_extent_data_ref *ref1 = NULL; 828 struct btrfs_shared_data_ref *ref2 = NULL; 829 struct extent_buffer *leaf; 830 u32 num_refs = 0; 831 int ret = 0; 832 833 leaf = path->nodes[0]; 834 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 835 836 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 837 ref1 = btrfs_item_ptr(leaf, path->slots[0], 838 struct btrfs_extent_data_ref); 839 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 840 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 841 ref2 = btrfs_item_ptr(leaf, path->slots[0], 842 struct btrfs_shared_data_ref); 843 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 844 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 845 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 846 struct btrfs_extent_ref_v0 *ref0; 847 ref0 = btrfs_item_ptr(leaf, path->slots[0], 848 struct btrfs_extent_ref_v0); 849 num_refs = btrfs_ref_count_v0(leaf, ref0); 850 #endif 851 } else { 852 BUG(); 853 } 854 855 BUG_ON(num_refs < refs_to_drop); 856 num_refs -= refs_to_drop; 857 858 if (num_refs == 0) { 859 ret = btrfs_del_item(trans, root, path); 860 } else { 861 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 862 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 863 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 864 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 865 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 866 else { 867 struct btrfs_extent_ref_v0 *ref0; 868 ref0 = btrfs_item_ptr(leaf, path->slots[0], 869 struct btrfs_extent_ref_v0); 870 btrfs_set_ref_count_v0(leaf, ref0, num_refs); 871 } 872 #endif 873 btrfs_mark_buffer_dirty(leaf); 874 } 875 return ret; 876 } 877 878 static noinline u32 extent_data_ref_count(struct btrfs_root *root, 879 struct btrfs_path *path, 880 struct btrfs_extent_inline_ref *iref) 881 { 882 struct btrfs_key key; 883 struct extent_buffer *leaf; 884 struct btrfs_extent_data_ref *ref1; 885 struct btrfs_shared_data_ref *ref2; 886 u32 num_refs = 0; 887 888 leaf = path->nodes[0]; 889 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 890 if (iref) { 891 if (btrfs_extent_inline_ref_type(leaf, iref) == 892 BTRFS_EXTENT_DATA_REF_KEY) { 893 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 894 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 895 } else { 896 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 897 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 898 } 899 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 900 ref1 = btrfs_item_ptr(leaf, path->slots[0], 901 struct btrfs_extent_data_ref); 902 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 903 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 904 ref2 = btrfs_item_ptr(leaf, path->slots[0], 905 struct btrfs_shared_data_ref); 906 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 907 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 908 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 909 struct btrfs_extent_ref_v0 *ref0; 910 ref0 = btrfs_item_ptr(leaf, path->slots[0], 911 struct btrfs_extent_ref_v0); 912 num_refs = btrfs_ref_count_v0(leaf, ref0); 913 #endif 914 } else { 915 WARN_ON(1); 916 } 917 return num_refs; 918 } 919 920 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 921 struct btrfs_root *root, 922 struct btrfs_path *path, 923 u64 bytenr, u64 parent, 924 u64 root_objectid) 925 { 926 struct btrfs_key key; 927 int ret; 928 929 key.objectid = bytenr; 930 if (parent) { 931 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 932 key.offset = parent; 933 } else { 934 key.type = BTRFS_TREE_BLOCK_REF_KEY; 935 key.offset = root_objectid; 936 } 937 938 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 939 if (ret > 0) 940 ret = -ENOENT; 941 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 942 if (ret == -ENOENT && parent) { 943 btrfs_release_path(root, path); 944 key.type = BTRFS_EXTENT_REF_V0_KEY; 945 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 946 if (ret > 0) 947 ret = -ENOENT; 948 } 949 #endif 950 return ret; 951 } 952 953 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 954 struct btrfs_root *root, 955 struct btrfs_path *path, 956 u64 bytenr, u64 parent, 957 u64 root_objectid) 958 { 959 struct btrfs_key key; 960 int ret; 961 962 key.objectid = bytenr; 963 if (parent) { 964 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 965 key.offset = parent; 966 } else { 967 key.type = BTRFS_TREE_BLOCK_REF_KEY; 968 key.offset = root_objectid; 969 } 970 971 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 972 btrfs_release_path(root, path); 973 return ret; 974 } 975 976 static inline int extent_ref_type(u64 parent, u64 owner) 977 { 978 int type; 979 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 980 if (parent > 0) 981 type = BTRFS_SHARED_BLOCK_REF_KEY; 982 else 983 type = BTRFS_TREE_BLOCK_REF_KEY; 984 } else { 985 if (parent > 0) 986 type = BTRFS_SHARED_DATA_REF_KEY; 987 else 988 type = BTRFS_EXTENT_DATA_REF_KEY; 989 } 990 return type; 991 } 992 993 static int find_next_key(struct btrfs_path *path, int level, 994 struct btrfs_key *key) 995 996 { 997 for (; level < BTRFS_MAX_LEVEL; level++) { 998 if (!path->nodes[level]) 999 break; 1000 if (path->slots[level] + 1 >= 1001 btrfs_header_nritems(path->nodes[level])) 1002 continue; 1003 if (level == 0) 1004 btrfs_item_key_to_cpu(path->nodes[level], key, 1005 path->slots[level] + 1); 1006 else 1007 btrfs_node_key_to_cpu(path->nodes[level], key, 1008 path->slots[level] + 1); 1009 return 0; 1010 } 1011 return 1; 1012 } 1013 1014 /* 1015 * look for inline back ref. if back ref is found, *ref_ret is set 1016 * to the address of inline back ref, and 0 is returned. 1017 * 1018 * if back ref isn't found, *ref_ret is set to the address where it 1019 * should be inserted, and -ENOENT is returned. 1020 * 1021 * if insert is true and there are too many inline back refs, the path 1022 * points to the extent item, and -EAGAIN is returned. 1023 * 1024 * NOTE: inline back refs are ordered in the same way that back ref 1025 * items in the tree are ordered. 1026 */ 1027 static noinline_for_stack 1028 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1029 struct btrfs_root *root, 1030 struct btrfs_path *path, 1031 struct btrfs_extent_inline_ref **ref_ret, 1032 u64 bytenr, u64 num_bytes, 1033 u64 parent, u64 root_objectid, 1034 u64 owner, u64 offset, int insert) 1035 { 1036 struct btrfs_key key; 1037 struct extent_buffer *leaf; 1038 struct btrfs_extent_item *ei; 1039 struct btrfs_extent_inline_ref *iref; 1040 u64 flags; 1041 u64 item_size; 1042 unsigned long ptr; 1043 unsigned long end; 1044 int extra_size; 1045 int type; 1046 int want; 1047 int ret; 1048 int err = 0; 1049 1050 key.objectid = bytenr; 1051 key.type = BTRFS_EXTENT_ITEM_KEY; 1052 key.offset = num_bytes; 1053 1054 want = extent_ref_type(parent, owner); 1055 if (insert) { 1056 extra_size = btrfs_extent_inline_ref_size(want); 1057 path->keep_locks = 1; 1058 } else 1059 extra_size = -1; 1060 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1061 if (ret < 0) { 1062 err = ret; 1063 goto out; 1064 } 1065 BUG_ON(ret); 1066 1067 leaf = path->nodes[0]; 1068 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1069 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1070 if (item_size < sizeof(*ei)) { 1071 if (!insert) { 1072 err = -ENOENT; 1073 goto out; 1074 } 1075 ret = convert_extent_item_v0(trans, root, path, owner, 1076 extra_size); 1077 if (ret < 0) { 1078 err = ret; 1079 goto out; 1080 } 1081 leaf = path->nodes[0]; 1082 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1083 } 1084 #endif 1085 BUG_ON(item_size < sizeof(*ei)); 1086 1087 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1088 flags = btrfs_extent_flags(leaf, ei); 1089 1090 ptr = (unsigned long)(ei + 1); 1091 end = (unsigned long)ei + item_size; 1092 1093 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1094 ptr += sizeof(struct btrfs_tree_block_info); 1095 BUG_ON(ptr > end); 1096 } else { 1097 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1098 } 1099 1100 err = -ENOENT; 1101 while (1) { 1102 if (ptr >= end) { 1103 WARN_ON(ptr > end); 1104 break; 1105 } 1106 iref = (struct btrfs_extent_inline_ref *)ptr; 1107 type = btrfs_extent_inline_ref_type(leaf, iref); 1108 if (want < type) 1109 break; 1110 if (want > type) { 1111 ptr += btrfs_extent_inline_ref_size(type); 1112 continue; 1113 } 1114 1115 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1116 struct btrfs_extent_data_ref *dref; 1117 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1118 if (match_extent_data_ref(leaf, dref, root_objectid, 1119 owner, offset)) { 1120 err = 0; 1121 break; 1122 } 1123 if (hash_extent_data_ref_item(leaf, dref) < 1124 hash_extent_data_ref(root_objectid, owner, offset)) 1125 break; 1126 } else { 1127 u64 ref_offset; 1128 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1129 if (parent > 0) { 1130 if (parent == ref_offset) { 1131 err = 0; 1132 break; 1133 } 1134 if (ref_offset < parent) 1135 break; 1136 } else { 1137 if (root_objectid == ref_offset) { 1138 err = 0; 1139 break; 1140 } 1141 if (ref_offset < root_objectid) 1142 break; 1143 } 1144 } 1145 ptr += btrfs_extent_inline_ref_size(type); 1146 } 1147 if (err == -ENOENT && insert) { 1148 if (item_size + extra_size >= 1149 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 1150 err = -EAGAIN; 1151 goto out; 1152 } 1153 /* 1154 * To add new inline back ref, we have to make sure 1155 * there is no corresponding back ref item. 1156 * For simplicity, we just do not add new inline back 1157 * ref if there is any kind of item for this block 1158 */ 1159 if (find_next_key(path, 0, &key) == 0 && 1160 key.objectid == bytenr && 1161 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1162 err = -EAGAIN; 1163 goto out; 1164 } 1165 } 1166 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 1167 out: 1168 if (insert) { 1169 path->keep_locks = 0; 1170 btrfs_unlock_up_safe(path, 1); 1171 } 1172 return err; 1173 } 1174 1175 /* 1176 * helper to add new inline back ref 1177 */ 1178 static noinline_for_stack 1179 int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1180 struct btrfs_root *root, 1181 struct btrfs_path *path, 1182 struct btrfs_extent_inline_ref *iref, 1183 u64 parent, u64 root_objectid, 1184 u64 owner, u64 offset, int refs_to_add, 1185 struct btrfs_delayed_extent_op *extent_op) 1186 { 1187 struct extent_buffer *leaf; 1188 struct btrfs_extent_item *ei; 1189 unsigned long ptr; 1190 unsigned long end; 1191 unsigned long item_offset; 1192 u64 refs; 1193 int size; 1194 int type; 1195 int ret; 1196 1197 leaf = path->nodes[0]; 1198 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1199 item_offset = (unsigned long)iref - (unsigned long)ei; 1200 1201 type = extent_ref_type(parent, owner); 1202 size = btrfs_extent_inline_ref_size(type); 1203 1204 ret = btrfs_extend_item(trans, root, path, size); 1205 BUG_ON(ret); 1206 1207 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1208 refs = btrfs_extent_refs(leaf, ei); 1209 refs += refs_to_add; 1210 btrfs_set_extent_refs(leaf, ei, refs); 1211 if (extent_op) 1212 __run_delayed_extent_op(extent_op, leaf, ei); 1213 1214 ptr = (unsigned long)ei + item_offset; 1215 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1216 if (ptr < end - size) 1217 memmove_extent_buffer(leaf, ptr + size, ptr, 1218 end - size - ptr); 1219 1220 iref = (struct btrfs_extent_inline_ref *)ptr; 1221 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1222 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1223 struct btrfs_extent_data_ref *dref; 1224 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1225 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1226 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1227 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1228 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1229 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1230 struct btrfs_shared_data_ref *sref; 1231 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1232 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1233 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1234 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1235 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1236 } else { 1237 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1238 } 1239 btrfs_mark_buffer_dirty(leaf); 1240 return 0; 1241 } 1242 1243 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1244 struct btrfs_root *root, 1245 struct btrfs_path *path, 1246 struct btrfs_extent_inline_ref **ref_ret, 1247 u64 bytenr, u64 num_bytes, u64 parent, 1248 u64 root_objectid, u64 owner, u64 offset) 1249 { 1250 int ret; 1251 1252 ret = lookup_inline_extent_backref(trans, root, path, ref_ret, 1253 bytenr, num_bytes, parent, 1254 root_objectid, owner, offset, 0); 1255 if (ret != -ENOENT) 1256 return ret; 1257 1258 btrfs_release_path(root, path); 1259 *ref_ret = NULL; 1260 1261 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1262 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, 1263 root_objectid); 1264 } else { 1265 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, 1266 root_objectid, owner, offset); 1267 } 1268 return ret; 1269 } 1270 1271 /* 1272 * helper to update/remove inline back ref 1273 */ 1274 static noinline_for_stack 1275 int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1276 struct btrfs_root *root, 1277 struct btrfs_path *path, 1278 struct btrfs_extent_inline_ref *iref, 1279 int refs_to_mod, 1280 struct btrfs_delayed_extent_op *extent_op) 1281 { 1282 struct extent_buffer *leaf; 1283 struct btrfs_extent_item *ei; 1284 struct btrfs_extent_data_ref *dref = NULL; 1285 struct btrfs_shared_data_ref *sref = NULL; 1286 unsigned long ptr; 1287 unsigned long end; 1288 u32 item_size; 1289 int size; 1290 int type; 1291 int ret; 1292 u64 refs; 1293 1294 leaf = path->nodes[0]; 1295 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1296 refs = btrfs_extent_refs(leaf, ei); 1297 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1298 refs += refs_to_mod; 1299 btrfs_set_extent_refs(leaf, ei, refs); 1300 if (extent_op) 1301 __run_delayed_extent_op(extent_op, leaf, ei); 1302 1303 type = btrfs_extent_inline_ref_type(leaf, iref); 1304 1305 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1306 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1307 refs = btrfs_extent_data_ref_count(leaf, dref); 1308 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1309 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1310 refs = btrfs_shared_data_ref_count(leaf, sref); 1311 } else { 1312 refs = 1; 1313 BUG_ON(refs_to_mod != -1); 1314 } 1315 1316 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1317 refs += refs_to_mod; 1318 1319 if (refs > 0) { 1320 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1321 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1322 else 1323 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1324 } else { 1325 size = btrfs_extent_inline_ref_size(type); 1326 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1327 ptr = (unsigned long)iref; 1328 end = (unsigned long)ei + item_size; 1329 if (ptr + size < end) 1330 memmove_extent_buffer(leaf, ptr, ptr + size, 1331 end - ptr - size); 1332 item_size -= size; 1333 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1334 BUG_ON(ret); 1335 } 1336 btrfs_mark_buffer_dirty(leaf); 1337 return 0; 1338 } 1339 1340 static noinline_for_stack 1341 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1342 struct btrfs_root *root, 1343 struct btrfs_path *path, 1344 u64 bytenr, u64 num_bytes, u64 parent, 1345 u64 root_objectid, u64 owner, 1346 u64 offset, int refs_to_add, 1347 struct btrfs_delayed_extent_op *extent_op) 1348 { 1349 struct btrfs_extent_inline_ref *iref; 1350 int ret; 1351 1352 ret = lookup_inline_extent_backref(trans, root, path, &iref, 1353 bytenr, num_bytes, parent, 1354 root_objectid, owner, offset, 1); 1355 if (ret == 0) { 1356 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1357 ret = update_inline_extent_backref(trans, root, path, iref, 1358 refs_to_add, extent_op); 1359 } else if (ret == -ENOENT) { 1360 ret = setup_inline_extent_backref(trans, root, path, iref, 1361 parent, root_objectid, 1362 owner, offset, refs_to_add, 1363 extent_op); 1364 } 1365 return ret; 1366 } 1367 1368 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1369 struct btrfs_root *root, 1370 struct btrfs_path *path, 1371 u64 bytenr, u64 parent, u64 root_objectid, 1372 u64 owner, u64 offset, int refs_to_add) 1373 { 1374 int ret; 1375 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1376 BUG_ON(refs_to_add != 1); 1377 ret = insert_tree_block_ref(trans, root, path, bytenr, 1378 parent, root_objectid); 1379 } else { 1380 ret = insert_extent_data_ref(trans, root, path, bytenr, 1381 parent, root_objectid, 1382 owner, offset, refs_to_add); 1383 } 1384 return ret; 1385 } 1386 1387 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1388 struct btrfs_root *root, 1389 struct btrfs_path *path, 1390 struct btrfs_extent_inline_ref *iref, 1391 int refs_to_drop, int is_data) 1392 { 1393 int ret; 1394 1395 BUG_ON(!is_data && refs_to_drop != 1); 1396 if (iref) { 1397 ret = update_inline_extent_backref(trans, root, path, iref, 1398 -refs_to_drop, NULL); 1399 } else if (is_data) { 1400 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1401 } else { 1402 ret = btrfs_del_item(trans, root, path); 1403 } 1404 return ret; 1405 } 1406 1407 #ifdef BIO_RW_DISCARD 1408 static void btrfs_issue_discard(struct block_device *bdev, 1409 u64 start, u64 len) 1410 { 1411 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); 1412 } 1413 #endif 1414 1415 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1416 u64 num_bytes) 1417 { 1418 #ifdef BIO_RW_DISCARD 1419 int ret; 1420 u64 map_length = num_bytes; 1421 struct btrfs_multi_bio *multi = NULL; 1422 1423 /* Tell the block device(s) that the sectors can be discarded */ 1424 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1425 bytenr, &map_length, &multi, 0); 1426 if (!ret) { 1427 struct btrfs_bio_stripe *stripe = multi->stripes; 1428 int i; 1429 1430 if (map_length > num_bytes) 1431 map_length = num_bytes; 1432 1433 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1434 btrfs_issue_discard(stripe->dev->bdev, 1435 stripe->physical, 1436 map_length); 1437 } 1438 kfree(multi); 1439 } 1440 1441 return ret; 1442 #else 1443 return 0; 1444 #endif 1445 } 1446 1447 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1448 struct btrfs_root *root, 1449 u64 bytenr, u64 num_bytes, u64 parent, 1450 u64 root_objectid, u64 owner, u64 offset) 1451 { 1452 int ret; 1453 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 1454 root_objectid == BTRFS_TREE_LOG_OBJECTID); 1455 1456 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1457 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 1458 parent, root_objectid, (int)owner, 1459 BTRFS_ADD_DELAYED_REF, NULL); 1460 } else { 1461 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 1462 parent, root_objectid, owner, offset, 1463 BTRFS_ADD_DELAYED_REF, NULL); 1464 } 1465 return ret; 1466 } 1467 1468 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1469 struct btrfs_root *root, 1470 u64 bytenr, u64 num_bytes, 1471 u64 parent, u64 root_objectid, 1472 u64 owner, u64 offset, int refs_to_add, 1473 struct btrfs_delayed_extent_op *extent_op) 1474 { 1475 struct btrfs_path *path; 1476 struct extent_buffer *leaf; 1477 struct btrfs_extent_item *item; 1478 u64 refs; 1479 int ret; 1480 int err = 0; 1481 1482 path = btrfs_alloc_path(); 1483 if (!path) 1484 return -ENOMEM; 1485 1486 path->reada = 1; 1487 path->leave_spinning = 1; 1488 /* this will setup the path even if it fails to insert the back ref */ 1489 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 1490 path, bytenr, num_bytes, parent, 1491 root_objectid, owner, offset, 1492 refs_to_add, extent_op); 1493 if (ret == 0) 1494 goto out; 1495 1496 if (ret != -EAGAIN) { 1497 err = ret; 1498 goto out; 1499 } 1500 1501 leaf = path->nodes[0]; 1502 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1503 refs = btrfs_extent_refs(leaf, item); 1504 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1505 if (extent_op) 1506 __run_delayed_extent_op(extent_op, leaf, item); 1507 1508 btrfs_mark_buffer_dirty(leaf); 1509 btrfs_release_path(root->fs_info->extent_root, path); 1510 1511 path->reada = 1; 1512 path->leave_spinning = 1; 1513 1514 /* now insert the actual backref */ 1515 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1516 path, bytenr, parent, root_objectid, 1517 owner, offset, refs_to_add); 1518 BUG_ON(ret); 1519 out: 1520 btrfs_free_path(path); 1521 return err; 1522 } 1523 1524 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1525 struct btrfs_root *root, 1526 struct btrfs_delayed_ref_node *node, 1527 struct btrfs_delayed_extent_op *extent_op, 1528 int insert_reserved) 1529 { 1530 int ret = 0; 1531 struct btrfs_delayed_data_ref *ref; 1532 struct btrfs_key ins; 1533 u64 parent = 0; 1534 u64 ref_root = 0; 1535 u64 flags = 0; 1536 1537 ins.objectid = node->bytenr; 1538 ins.offset = node->num_bytes; 1539 ins.type = BTRFS_EXTENT_ITEM_KEY; 1540 1541 ref = btrfs_delayed_node_to_data_ref(node); 1542 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1543 parent = ref->parent; 1544 else 1545 ref_root = ref->root; 1546 1547 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1548 if (extent_op) { 1549 BUG_ON(extent_op->update_key); 1550 flags |= extent_op->flags_to_set; 1551 } 1552 ret = alloc_reserved_file_extent(trans, root, 1553 parent, ref_root, flags, 1554 ref->objectid, ref->offset, 1555 &ins, node->ref_mod); 1556 update_reserved_extents(root, ins.objectid, ins.offset, 0); 1557 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1558 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1559 node->num_bytes, parent, 1560 ref_root, ref->objectid, 1561 ref->offset, node->ref_mod, 1562 extent_op); 1563 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1564 ret = __btrfs_free_extent(trans, root, node->bytenr, 1565 node->num_bytes, parent, 1566 ref_root, ref->objectid, 1567 ref->offset, node->ref_mod, 1568 extent_op); 1569 } else { 1570 BUG(); 1571 } 1572 return ret; 1573 } 1574 1575 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1576 struct extent_buffer *leaf, 1577 struct btrfs_extent_item *ei) 1578 { 1579 u64 flags = btrfs_extent_flags(leaf, ei); 1580 if (extent_op->update_flags) { 1581 flags |= extent_op->flags_to_set; 1582 btrfs_set_extent_flags(leaf, ei, flags); 1583 } 1584 1585 if (extent_op->update_key) { 1586 struct btrfs_tree_block_info *bi; 1587 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1588 bi = (struct btrfs_tree_block_info *)(ei + 1); 1589 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1590 } 1591 } 1592 1593 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1594 struct btrfs_root *root, 1595 struct btrfs_delayed_ref_node *node, 1596 struct btrfs_delayed_extent_op *extent_op) 1597 { 1598 struct btrfs_key key; 1599 struct btrfs_path *path; 1600 struct btrfs_extent_item *ei; 1601 struct extent_buffer *leaf; 1602 u32 item_size; 1603 int ret; 1604 int err = 0; 1605 1606 path = btrfs_alloc_path(); 1607 if (!path) 1608 return -ENOMEM; 1609 1610 key.objectid = node->bytenr; 1611 key.type = BTRFS_EXTENT_ITEM_KEY; 1612 key.offset = node->num_bytes; 1613 1614 path->reada = 1; 1615 path->leave_spinning = 1; 1616 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1617 path, 0, 1); 1618 if (ret < 0) { 1619 err = ret; 1620 goto out; 1621 } 1622 if (ret > 0) { 1623 err = -EIO; 1624 goto out; 1625 } 1626 1627 leaf = path->nodes[0]; 1628 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1629 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1630 if (item_size < sizeof(*ei)) { 1631 ret = convert_extent_item_v0(trans, root->fs_info->extent_root, 1632 path, (u64)-1, 0); 1633 if (ret < 0) { 1634 err = ret; 1635 goto out; 1636 } 1637 leaf = path->nodes[0]; 1638 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1639 } 1640 #endif 1641 BUG_ON(item_size < sizeof(*ei)); 1642 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1643 __run_delayed_extent_op(extent_op, leaf, ei); 1644 1645 btrfs_mark_buffer_dirty(leaf); 1646 out: 1647 btrfs_free_path(path); 1648 return err; 1649 } 1650 1651 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 1652 struct btrfs_root *root, 1653 struct btrfs_delayed_ref_node *node, 1654 struct btrfs_delayed_extent_op *extent_op, 1655 int insert_reserved) 1656 { 1657 int ret = 0; 1658 struct btrfs_delayed_tree_ref *ref; 1659 struct btrfs_key ins; 1660 u64 parent = 0; 1661 u64 ref_root = 0; 1662 1663 ins.objectid = node->bytenr; 1664 ins.offset = node->num_bytes; 1665 ins.type = BTRFS_EXTENT_ITEM_KEY; 1666 1667 ref = btrfs_delayed_node_to_tree_ref(node); 1668 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1669 parent = ref->parent; 1670 else 1671 ref_root = ref->root; 1672 1673 BUG_ON(node->ref_mod != 1); 1674 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1675 BUG_ON(!extent_op || !extent_op->update_flags || 1676 !extent_op->update_key); 1677 ret = alloc_reserved_tree_block(trans, root, 1678 parent, ref_root, 1679 extent_op->flags_to_set, 1680 &extent_op->key, 1681 ref->level, &ins); 1682 update_reserved_extents(root, ins.objectid, ins.offset, 0); 1683 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1684 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1685 node->num_bytes, parent, ref_root, 1686 ref->level, 0, 1, extent_op); 1687 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1688 ret = __btrfs_free_extent(trans, root, node->bytenr, 1689 node->num_bytes, parent, ref_root, 1690 ref->level, 0, 1, extent_op); 1691 } else { 1692 BUG(); 1693 } 1694 return ret; 1695 } 1696 1697 1698 /* helper function to actually process a single delayed ref entry */ 1699 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1700 struct btrfs_root *root, 1701 struct btrfs_delayed_ref_node *node, 1702 struct btrfs_delayed_extent_op *extent_op, 1703 int insert_reserved) 1704 { 1705 int ret; 1706 if (btrfs_delayed_ref_is_head(node)) { 1707 struct btrfs_delayed_ref_head *head; 1708 /* 1709 * we've hit the end of the chain and we were supposed 1710 * to insert this extent into the tree. But, it got 1711 * deleted before we ever needed to insert it, so all 1712 * we have to do is clean up the accounting 1713 */ 1714 BUG_ON(extent_op); 1715 head = btrfs_delayed_node_to_head(node); 1716 if (insert_reserved) { 1717 if (head->is_data) { 1718 ret = btrfs_del_csums(trans, root, 1719 node->bytenr, 1720 node->num_bytes); 1721 BUG_ON(ret); 1722 } 1723 btrfs_update_pinned_extents(root, node->bytenr, 1724 node->num_bytes, 1); 1725 update_reserved_extents(root, node->bytenr, 1726 node->num_bytes, 0); 1727 } 1728 mutex_unlock(&head->mutex); 1729 return 0; 1730 } 1731 1732 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1733 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1734 ret = run_delayed_tree_ref(trans, root, node, extent_op, 1735 insert_reserved); 1736 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 1737 node->type == BTRFS_SHARED_DATA_REF_KEY) 1738 ret = run_delayed_data_ref(trans, root, node, extent_op, 1739 insert_reserved); 1740 else 1741 BUG(); 1742 return ret; 1743 } 1744 1745 static noinline struct btrfs_delayed_ref_node * 1746 select_delayed_ref(struct btrfs_delayed_ref_head *head) 1747 { 1748 struct rb_node *node; 1749 struct btrfs_delayed_ref_node *ref; 1750 int action = BTRFS_ADD_DELAYED_REF; 1751 again: 1752 /* 1753 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 1754 * this prevents ref count from going down to zero when 1755 * there still are pending delayed ref. 1756 */ 1757 node = rb_prev(&head->node.rb_node); 1758 while (1) { 1759 if (!node) 1760 break; 1761 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1762 rb_node); 1763 if (ref->bytenr != head->node.bytenr) 1764 break; 1765 if (ref->action == action) 1766 return ref; 1767 node = rb_prev(node); 1768 } 1769 if (action == BTRFS_ADD_DELAYED_REF) { 1770 action = BTRFS_DROP_DELAYED_REF; 1771 goto again; 1772 } 1773 return NULL; 1774 } 1775 1776 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 1777 struct btrfs_root *root, 1778 struct list_head *cluster) 1779 { 1780 struct btrfs_delayed_ref_root *delayed_refs; 1781 struct btrfs_delayed_ref_node *ref; 1782 struct btrfs_delayed_ref_head *locked_ref = NULL; 1783 struct btrfs_delayed_extent_op *extent_op; 1784 int ret; 1785 int count = 0; 1786 int must_insert_reserved = 0; 1787 1788 delayed_refs = &trans->transaction->delayed_refs; 1789 while (1) { 1790 if (!locked_ref) { 1791 /* pick a new head ref from the cluster list */ 1792 if (list_empty(cluster)) 1793 break; 1794 1795 locked_ref = list_entry(cluster->next, 1796 struct btrfs_delayed_ref_head, cluster); 1797 1798 /* grab the lock that says we are going to process 1799 * all the refs for this head */ 1800 ret = btrfs_delayed_ref_lock(trans, locked_ref); 1801 1802 /* 1803 * we may have dropped the spin lock to get the head 1804 * mutex lock, and that might have given someone else 1805 * time to free the head. If that's true, it has been 1806 * removed from our list and we can move on. 1807 */ 1808 if (ret == -EAGAIN) { 1809 locked_ref = NULL; 1810 count++; 1811 continue; 1812 } 1813 } 1814 1815 /* 1816 * record the must insert reserved flag before we 1817 * drop the spin lock. 1818 */ 1819 must_insert_reserved = locked_ref->must_insert_reserved; 1820 locked_ref->must_insert_reserved = 0; 1821 1822 extent_op = locked_ref->extent_op; 1823 locked_ref->extent_op = NULL; 1824 1825 /* 1826 * locked_ref is the head node, so we have to go one 1827 * node back for any delayed ref updates 1828 */ 1829 ref = select_delayed_ref(locked_ref); 1830 if (!ref) { 1831 /* All delayed refs have been processed, Go ahead 1832 * and send the head node to run_one_delayed_ref, 1833 * so that any accounting fixes can happen 1834 */ 1835 ref = &locked_ref->node; 1836 1837 if (extent_op && must_insert_reserved) { 1838 kfree(extent_op); 1839 extent_op = NULL; 1840 } 1841 1842 if (extent_op) { 1843 spin_unlock(&delayed_refs->lock); 1844 1845 ret = run_delayed_extent_op(trans, root, 1846 ref, extent_op); 1847 BUG_ON(ret); 1848 kfree(extent_op); 1849 1850 cond_resched(); 1851 spin_lock(&delayed_refs->lock); 1852 continue; 1853 } 1854 1855 list_del_init(&locked_ref->cluster); 1856 locked_ref = NULL; 1857 } 1858 1859 ref->in_tree = 0; 1860 rb_erase(&ref->rb_node, &delayed_refs->root); 1861 delayed_refs->num_entries--; 1862 1863 spin_unlock(&delayed_refs->lock); 1864 1865 ret = run_one_delayed_ref(trans, root, ref, extent_op, 1866 must_insert_reserved); 1867 BUG_ON(ret); 1868 1869 btrfs_put_delayed_ref(ref); 1870 kfree(extent_op); 1871 count++; 1872 1873 cond_resched(); 1874 spin_lock(&delayed_refs->lock); 1875 } 1876 return count; 1877 } 1878 1879 /* 1880 * this starts processing the delayed reference count updates and 1881 * extent insertions we have queued up so far. count can be 1882 * 0, which means to process everything in the tree at the start 1883 * of the run (but not newly added entries), or it can be some target 1884 * number you'd like to process. 1885 */ 1886 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 1887 struct btrfs_root *root, unsigned long count) 1888 { 1889 struct rb_node *node; 1890 struct btrfs_delayed_ref_root *delayed_refs; 1891 struct btrfs_delayed_ref_node *ref; 1892 struct list_head cluster; 1893 int ret; 1894 int run_all = count == (unsigned long)-1; 1895 int run_most = 0; 1896 1897 if (root == root->fs_info->extent_root) 1898 root = root->fs_info->tree_root; 1899 1900 delayed_refs = &trans->transaction->delayed_refs; 1901 INIT_LIST_HEAD(&cluster); 1902 again: 1903 spin_lock(&delayed_refs->lock); 1904 if (count == 0) { 1905 count = delayed_refs->num_entries * 2; 1906 run_most = 1; 1907 } 1908 while (1) { 1909 if (!(run_all || run_most) && 1910 delayed_refs->num_heads_ready < 64) 1911 break; 1912 1913 /* 1914 * go find something we can process in the rbtree. We start at 1915 * the beginning of the tree, and then build a cluster 1916 * of refs to process starting at the first one we are able to 1917 * lock 1918 */ 1919 ret = btrfs_find_ref_cluster(trans, &cluster, 1920 delayed_refs->run_delayed_start); 1921 if (ret) 1922 break; 1923 1924 ret = run_clustered_refs(trans, root, &cluster); 1925 BUG_ON(ret < 0); 1926 1927 count -= min_t(unsigned long, ret, count); 1928 1929 if (count == 0) 1930 break; 1931 } 1932 1933 if (run_all) { 1934 node = rb_first(&delayed_refs->root); 1935 if (!node) 1936 goto out; 1937 count = (unsigned long)-1; 1938 1939 while (node) { 1940 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1941 rb_node); 1942 if (btrfs_delayed_ref_is_head(ref)) { 1943 struct btrfs_delayed_ref_head *head; 1944 1945 head = btrfs_delayed_node_to_head(ref); 1946 atomic_inc(&ref->refs); 1947 1948 spin_unlock(&delayed_refs->lock); 1949 mutex_lock(&head->mutex); 1950 mutex_unlock(&head->mutex); 1951 1952 btrfs_put_delayed_ref(ref); 1953 cond_resched(); 1954 goto again; 1955 } 1956 node = rb_next(node); 1957 } 1958 spin_unlock(&delayed_refs->lock); 1959 schedule_timeout(1); 1960 goto again; 1961 } 1962 out: 1963 spin_unlock(&delayed_refs->lock); 1964 return 0; 1965 } 1966 1967 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 1968 struct btrfs_root *root, 1969 u64 bytenr, u64 num_bytes, u64 flags, 1970 int is_data) 1971 { 1972 struct btrfs_delayed_extent_op *extent_op; 1973 int ret; 1974 1975 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 1976 if (!extent_op) 1977 return -ENOMEM; 1978 1979 extent_op->flags_to_set = flags; 1980 extent_op->update_flags = 1; 1981 extent_op->update_key = 0; 1982 extent_op->is_data = is_data ? 1 : 0; 1983 1984 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); 1985 if (ret) 1986 kfree(extent_op); 1987 return ret; 1988 } 1989 1990 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, 1991 struct btrfs_root *root, 1992 struct btrfs_path *path, 1993 u64 objectid, u64 offset, u64 bytenr) 1994 { 1995 struct btrfs_delayed_ref_head *head; 1996 struct btrfs_delayed_ref_node *ref; 1997 struct btrfs_delayed_data_ref *data_ref; 1998 struct btrfs_delayed_ref_root *delayed_refs; 1999 struct rb_node *node; 2000 int ret = 0; 2001 2002 ret = -ENOENT; 2003 delayed_refs = &trans->transaction->delayed_refs; 2004 spin_lock(&delayed_refs->lock); 2005 head = btrfs_find_delayed_ref_head(trans, bytenr); 2006 if (!head) 2007 goto out; 2008 2009 if (!mutex_trylock(&head->mutex)) { 2010 atomic_inc(&head->node.refs); 2011 spin_unlock(&delayed_refs->lock); 2012 2013 btrfs_release_path(root->fs_info->extent_root, path); 2014 2015 mutex_lock(&head->mutex); 2016 mutex_unlock(&head->mutex); 2017 btrfs_put_delayed_ref(&head->node); 2018 return -EAGAIN; 2019 } 2020 2021 node = rb_prev(&head->node.rb_node); 2022 if (!node) 2023 goto out_unlock; 2024 2025 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2026 2027 if (ref->bytenr != bytenr) 2028 goto out_unlock; 2029 2030 ret = 1; 2031 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2032 goto out_unlock; 2033 2034 data_ref = btrfs_delayed_node_to_data_ref(ref); 2035 2036 node = rb_prev(node); 2037 if (node) { 2038 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2039 if (ref->bytenr == bytenr) 2040 goto out_unlock; 2041 } 2042 2043 if (data_ref->root != root->root_key.objectid || 2044 data_ref->objectid != objectid || data_ref->offset != offset) 2045 goto out_unlock; 2046 2047 ret = 0; 2048 out_unlock: 2049 mutex_unlock(&head->mutex); 2050 out: 2051 spin_unlock(&delayed_refs->lock); 2052 return ret; 2053 } 2054 2055 static noinline int check_committed_ref(struct btrfs_trans_handle *trans, 2056 struct btrfs_root *root, 2057 struct btrfs_path *path, 2058 u64 objectid, u64 offset, u64 bytenr) 2059 { 2060 struct btrfs_root *extent_root = root->fs_info->extent_root; 2061 struct extent_buffer *leaf; 2062 struct btrfs_extent_data_ref *ref; 2063 struct btrfs_extent_inline_ref *iref; 2064 struct btrfs_extent_item *ei; 2065 struct btrfs_key key; 2066 u32 item_size; 2067 int ret; 2068 2069 key.objectid = bytenr; 2070 key.offset = (u64)-1; 2071 key.type = BTRFS_EXTENT_ITEM_KEY; 2072 2073 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2074 if (ret < 0) 2075 goto out; 2076 BUG_ON(ret == 0); 2077 2078 ret = -ENOENT; 2079 if (path->slots[0] == 0) 2080 goto out; 2081 2082 path->slots[0]--; 2083 leaf = path->nodes[0]; 2084 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2085 2086 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2087 goto out; 2088 2089 ret = 1; 2090 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2091 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2092 if (item_size < sizeof(*ei)) { 2093 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2094 goto out; 2095 } 2096 #endif 2097 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2098 2099 if (item_size != sizeof(*ei) + 2100 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2101 goto out; 2102 2103 if (btrfs_extent_generation(leaf, ei) <= 2104 btrfs_root_last_snapshot(&root->root_item)) 2105 goto out; 2106 2107 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2108 if (btrfs_extent_inline_ref_type(leaf, iref) != 2109 BTRFS_EXTENT_DATA_REF_KEY) 2110 goto out; 2111 2112 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2113 if (btrfs_extent_refs(leaf, ei) != 2114 btrfs_extent_data_ref_count(leaf, ref) || 2115 btrfs_extent_data_ref_root(leaf, ref) != 2116 root->root_key.objectid || 2117 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2118 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2119 goto out; 2120 2121 ret = 0; 2122 out: 2123 return ret; 2124 } 2125 2126 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2127 struct btrfs_root *root, 2128 u64 objectid, u64 offset, u64 bytenr) 2129 { 2130 struct btrfs_path *path; 2131 int ret; 2132 int ret2; 2133 2134 path = btrfs_alloc_path(); 2135 if (!path) 2136 return -ENOENT; 2137 2138 do { 2139 ret = check_committed_ref(trans, root, path, objectid, 2140 offset, bytenr); 2141 if (ret && ret != -ENOENT) 2142 goto out; 2143 2144 ret2 = check_delayed_ref(trans, root, path, objectid, 2145 offset, bytenr); 2146 } while (ret2 == -EAGAIN); 2147 2148 if (ret2 && ret2 != -ENOENT) { 2149 ret = ret2; 2150 goto out; 2151 } 2152 2153 if (ret != -ENOENT || ret2 != -ENOENT) 2154 ret = 0; 2155 out: 2156 btrfs_free_path(path); 2157 return ret; 2158 } 2159 2160 #if 0 2161 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2162 struct extent_buffer *buf, u32 nr_extents) 2163 { 2164 struct btrfs_key key; 2165 struct btrfs_file_extent_item *fi; 2166 u64 root_gen; 2167 u32 nritems; 2168 int i; 2169 int level; 2170 int ret = 0; 2171 int shared = 0; 2172 2173 if (!root->ref_cows) 2174 return 0; 2175 2176 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 2177 shared = 0; 2178 root_gen = root->root_key.offset; 2179 } else { 2180 shared = 1; 2181 root_gen = trans->transid - 1; 2182 } 2183 2184 level = btrfs_header_level(buf); 2185 nritems = btrfs_header_nritems(buf); 2186 2187 if (level == 0) { 2188 struct btrfs_leaf_ref *ref; 2189 struct btrfs_extent_info *info; 2190 2191 ref = btrfs_alloc_leaf_ref(root, nr_extents); 2192 if (!ref) { 2193 ret = -ENOMEM; 2194 goto out; 2195 } 2196 2197 ref->root_gen = root_gen; 2198 ref->bytenr = buf->start; 2199 ref->owner = btrfs_header_owner(buf); 2200 ref->generation = btrfs_header_generation(buf); 2201 ref->nritems = nr_extents; 2202 info = ref->extents; 2203 2204 for (i = 0; nr_extents > 0 && i < nritems; i++) { 2205 u64 disk_bytenr; 2206 btrfs_item_key_to_cpu(buf, &key, i); 2207 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2208 continue; 2209 fi = btrfs_item_ptr(buf, i, 2210 struct btrfs_file_extent_item); 2211 if (btrfs_file_extent_type(buf, fi) == 2212 BTRFS_FILE_EXTENT_INLINE) 2213 continue; 2214 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2215 if (disk_bytenr == 0) 2216 continue; 2217 2218 info->bytenr = disk_bytenr; 2219 info->num_bytes = 2220 btrfs_file_extent_disk_num_bytes(buf, fi); 2221 info->objectid = key.objectid; 2222 info->offset = key.offset; 2223 info++; 2224 } 2225 2226 ret = btrfs_add_leaf_ref(root, ref, shared); 2227 if (ret == -EEXIST && shared) { 2228 struct btrfs_leaf_ref *old; 2229 old = btrfs_lookup_leaf_ref(root, ref->bytenr); 2230 BUG_ON(!old); 2231 btrfs_remove_leaf_ref(root, old); 2232 btrfs_free_leaf_ref(root, old); 2233 ret = btrfs_add_leaf_ref(root, ref, shared); 2234 } 2235 WARN_ON(ret); 2236 btrfs_free_leaf_ref(root, ref); 2237 } 2238 out: 2239 return ret; 2240 } 2241 2242 /* when a block goes through cow, we update the reference counts of 2243 * everything that block points to. The internal pointers of the block 2244 * can be in just about any order, and it is likely to have clusters of 2245 * things that are close together and clusters of things that are not. 2246 * 2247 * To help reduce the seeks that come with updating all of these reference 2248 * counts, sort them by byte number before actual updates are done. 2249 * 2250 * struct refsort is used to match byte number to slot in the btree block. 2251 * we sort based on the byte number and then use the slot to actually 2252 * find the item. 2253 * 2254 * struct refsort is smaller than strcut btrfs_item and smaller than 2255 * struct btrfs_key_ptr. Since we're currently limited to the page size 2256 * for a btree block, there's no way for a kmalloc of refsorts for a 2257 * single node to be bigger than a page. 2258 */ 2259 struct refsort { 2260 u64 bytenr; 2261 u32 slot; 2262 }; 2263 2264 /* 2265 * for passing into sort() 2266 */ 2267 static int refsort_cmp(const void *a_void, const void *b_void) 2268 { 2269 const struct refsort *a = a_void; 2270 const struct refsort *b = b_void; 2271 2272 if (a->bytenr < b->bytenr) 2273 return -1; 2274 if (a->bytenr > b->bytenr) 2275 return 1; 2276 return 0; 2277 } 2278 #endif 2279 2280 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2281 struct btrfs_root *root, 2282 struct extent_buffer *buf, 2283 int full_backref, int inc) 2284 { 2285 u64 bytenr; 2286 u64 num_bytes; 2287 u64 parent; 2288 u64 ref_root; 2289 u32 nritems; 2290 struct btrfs_key key; 2291 struct btrfs_file_extent_item *fi; 2292 int i; 2293 int level; 2294 int ret = 0; 2295 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2296 u64, u64, u64, u64, u64, u64); 2297 2298 ref_root = btrfs_header_owner(buf); 2299 nritems = btrfs_header_nritems(buf); 2300 level = btrfs_header_level(buf); 2301 2302 if (!root->ref_cows && level == 0) 2303 return 0; 2304 2305 if (inc) 2306 process_func = btrfs_inc_extent_ref; 2307 else 2308 process_func = btrfs_free_extent; 2309 2310 if (full_backref) 2311 parent = buf->start; 2312 else 2313 parent = 0; 2314 2315 for (i = 0; i < nritems; i++) { 2316 if (level == 0) { 2317 btrfs_item_key_to_cpu(buf, &key, i); 2318 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2319 continue; 2320 fi = btrfs_item_ptr(buf, i, 2321 struct btrfs_file_extent_item); 2322 if (btrfs_file_extent_type(buf, fi) == 2323 BTRFS_FILE_EXTENT_INLINE) 2324 continue; 2325 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2326 if (bytenr == 0) 2327 continue; 2328 2329 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2330 key.offset -= btrfs_file_extent_offset(buf, fi); 2331 ret = process_func(trans, root, bytenr, num_bytes, 2332 parent, ref_root, key.objectid, 2333 key.offset); 2334 if (ret) 2335 goto fail; 2336 } else { 2337 bytenr = btrfs_node_blockptr(buf, i); 2338 num_bytes = btrfs_level_size(root, level - 1); 2339 ret = process_func(trans, root, bytenr, num_bytes, 2340 parent, ref_root, level - 1, 0); 2341 if (ret) 2342 goto fail; 2343 } 2344 } 2345 return 0; 2346 fail: 2347 BUG(); 2348 return ret; 2349 } 2350 2351 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2352 struct extent_buffer *buf, int full_backref) 2353 { 2354 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2355 } 2356 2357 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2358 struct extent_buffer *buf, int full_backref) 2359 { 2360 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2361 } 2362 2363 static int write_one_cache_group(struct btrfs_trans_handle *trans, 2364 struct btrfs_root *root, 2365 struct btrfs_path *path, 2366 struct btrfs_block_group_cache *cache) 2367 { 2368 int ret; 2369 struct btrfs_root *extent_root = root->fs_info->extent_root; 2370 unsigned long bi; 2371 struct extent_buffer *leaf; 2372 2373 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2374 if (ret < 0) 2375 goto fail; 2376 BUG_ON(ret); 2377 2378 leaf = path->nodes[0]; 2379 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2380 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2381 btrfs_mark_buffer_dirty(leaf); 2382 btrfs_release_path(extent_root, path); 2383 fail: 2384 if (ret) 2385 return ret; 2386 return 0; 2387 2388 } 2389 2390 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2391 struct btrfs_root *root) 2392 { 2393 struct btrfs_block_group_cache *cache, *entry; 2394 struct rb_node *n; 2395 int err = 0; 2396 int werr = 0; 2397 struct btrfs_path *path; 2398 u64 last = 0; 2399 2400 path = btrfs_alloc_path(); 2401 if (!path) 2402 return -ENOMEM; 2403 2404 while (1) { 2405 cache = NULL; 2406 spin_lock(&root->fs_info->block_group_cache_lock); 2407 for (n = rb_first(&root->fs_info->block_group_cache_tree); 2408 n; n = rb_next(n)) { 2409 entry = rb_entry(n, struct btrfs_block_group_cache, 2410 cache_node); 2411 if (entry->dirty) { 2412 cache = entry; 2413 break; 2414 } 2415 } 2416 spin_unlock(&root->fs_info->block_group_cache_lock); 2417 2418 if (!cache) 2419 break; 2420 2421 cache->dirty = 0; 2422 last += cache->key.offset; 2423 2424 err = write_one_cache_group(trans, root, 2425 path, cache); 2426 /* 2427 * if we fail to write the cache group, we want 2428 * to keep it marked dirty in hopes that a later 2429 * write will work 2430 */ 2431 if (err) { 2432 werr = err; 2433 continue; 2434 } 2435 } 2436 btrfs_free_path(path); 2437 return werr; 2438 } 2439 2440 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2441 { 2442 struct btrfs_block_group_cache *block_group; 2443 int readonly = 0; 2444 2445 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 2446 if (!block_group || block_group->ro) 2447 readonly = 1; 2448 if (block_group) 2449 btrfs_put_block_group(block_group); 2450 return readonly; 2451 } 2452 2453 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 2454 u64 total_bytes, u64 bytes_used, 2455 struct btrfs_space_info **space_info) 2456 { 2457 struct btrfs_space_info *found; 2458 2459 found = __find_space_info(info, flags); 2460 if (found) { 2461 spin_lock(&found->lock); 2462 found->total_bytes += total_bytes; 2463 found->bytes_used += bytes_used; 2464 found->full = 0; 2465 spin_unlock(&found->lock); 2466 *space_info = found; 2467 return 0; 2468 } 2469 found = kzalloc(sizeof(*found), GFP_NOFS); 2470 if (!found) 2471 return -ENOMEM; 2472 2473 INIT_LIST_HEAD(&found->block_groups); 2474 init_rwsem(&found->groups_sem); 2475 spin_lock_init(&found->lock); 2476 found->flags = flags; 2477 found->total_bytes = total_bytes; 2478 found->bytes_used = bytes_used; 2479 found->bytes_pinned = 0; 2480 found->bytes_reserved = 0; 2481 found->bytes_readonly = 0; 2482 found->bytes_delalloc = 0; 2483 found->full = 0; 2484 found->force_alloc = 0; 2485 *space_info = found; 2486 list_add_rcu(&found->list, &info->space_info); 2487 return 0; 2488 } 2489 2490 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 2491 { 2492 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 2493 BTRFS_BLOCK_GROUP_RAID1 | 2494 BTRFS_BLOCK_GROUP_RAID10 | 2495 BTRFS_BLOCK_GROUP_DUP); 2496 if (extra_flags) { 2497 if (flags & BTRFS_BLOCK_GROUP_DATA) 2498 fs_info->avail_data_alloc_bits |= extra_flags; 2499 if (flags & BTRFS_BLOCK_GROUP_METADATA) 2500 fs_info->avail_metadata_alloc_bits |= extra_flags; 2501 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2502 fs_info->avail_system_alloc_bits |= extra_flags; 2503 } 2504 } 2505 2506 static void set_block_group_readonly(struct btrfs_block_group_cache *cache) 2507 { 2508 spin_lock(&cache->space_info->lock); 2509 spin_lock(&cache->lock); 2510 if (!cache->ro) { 2511 cache->space_info->bytes_readonly += cache->key.offset - 2512 btrfs_block_group_used(&cache->item); 2513 cache->ro = 1; 2514 } 2515 spin_unlock(&cache->lock); 2516 spin_unlock(&cache->space_info->lock); 2517 } 2518 2519 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2520 { 2521 u64 num_devices = root->fs_info->fs_devices->rw_devices; 2522 2523 if (num_devices == 1) 2524 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2525 if (num_devices < 4) 2526 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 2527 2528 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 2529 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2530 BTRFS_BLOCK_GROUP_RAID10))) { 2531 flags &= ~BTRFS_BLOCK_GROUP_DUP; 2532 } 2533 2534 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 2535 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 2536 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 2537 } 2538 2539 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 2540 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 2541 (flags & BTRFS_BLOCK_GROUP_RAID10) | 2542 (flags & BTRFS_BLOCK_GROUP_DUP))) 2543 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 2544 return flags; 2545 } 2546 2547 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) 2548 { 2549 struct btrfs_fs_info *info = root->fs_info; 2550 u64 alloc_profile; 2551 2552 if (data) { 2553 alloc_profile = info->avail_data_alloc_bits & 2554 info->data_alloc_profile; 2555 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 2556 } else if (root == root->fs_info->chunk_root) { 2557 alloc_profile = info->avail_system_alloc_bits & 2558 info->system_alloc_profile; 2559 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; 2560 } else { 2561 alloc_profile = info->avail_metadata_alloc_bits & 2562 info->metadata_alloc_profile; 2563 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 2564 } 2565 2566 return btrfs_reduce_alloc_profile(root, data); 2567 } 2568 2569 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 2570 { 2571 u64 alloc_target; 2572 2573 alloc_target = btrfs_get_alloc_profile(root, 1); 2574 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 2575 alloc_target); 2576 } 2577 2578 /* 2579 * for now this just makes sure we have at least 5% of our metadata space free 2580 * for use. 2581 */ 2582 int btrfs_check_metadata_free_space(struct btrfs_root *root) 2583 { 2584 struct btrfs_fs_info *info = root->fs_info; 2585 struct btrfs_space_info *meta_sinfo; 2586 u64 alloc_target, thresh; 2587 int committed = 0, ret; 2588 2589 /* get the space info for where the metadata will live */ 2590 alloc_target = btrfs_get_alloc_profile(root, 0); 2591 meta_sinfo = __find_space_info(info, alloc_target); 2592 2593 again: 2594 spin_lock(&meta_sinfo->lock); 2595 if (!meta_sinfo->full) 2596 thresh = meta_sinfo->total_bytes * 80; 2597 else 2598 thresh = meta_sinfo->total_bytes * 95; 2599 2600 do_div(thresh, 100); 2601 2602 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2603 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { 2604 struct btrfs_trans_handle *trans; 2605 if (!meta_sinfo->full) { 2606 meta_sinfo->force_alloc = 1; 2607 spin_unlock(&meta_sinfo->lock); 2608 2609 trans = btrfs_start_transaction(root, 1); 2610 if (!trans) 2611 return -ENOMEM; 2612 2613 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2614 2 * 1024 * 1024, alloc_target, 0); 2615 btrfs_end_transaction(trans, root); 2616 goto again; 2617 } 2618 spin_unlock(&meta_sinfo->lock); 2619 2620 if (!committed) { 2621 committed = 1; 2622 trans = btrfs_join_transaction(root, 1); 2623 if (!trans) 2624 return -ENOMEM; 2625 ret = btrfs_commit_transaction(trans, root); 2626 if (ret) 2627 return ret; 2628 goto again; 2629 } 2630 return -ENOSPC; 2631 } 2632 spin_unlock(&meta_sinfo->lock); 2633 2634 return 0; 2635 } 2636 2637 /* 2638 * This will check the space that the inode allocates from to make sure we have 2639 * enough space for bytes. 2640 */ 2641 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 2642 u64 bytes) 2643 { 2644 struct btrfs_space_info *data_sinfo; 2645 int ret = 0, committed = 0; 2646 2647 /* make sure bytes are sectorsize aligned */ 2648 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2649 2650 data_sinfo = BTRFS_I(inode)->space_info; 2651 again: 2652 /* make sure we have enough space to handle the data first */ 2653 spin_lock(&data_sinfo->lock); 2654 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 2655 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 2656 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 2657 data_sinfo->bytes_may_use < bytes) { 2658 struct btrfs_trans_handle *trans; 2659 2660 /* 2661 * if we don't have enough free bytes in this space then we need 2662 * to alloc a new chunk. 2663 */ 2664 if (!data_sinfo->full) { 2665 u64 alloc_target; 2666 2667 data_sinfo->force_alloc = 1; 2668 spin_unlock(&data_sinfo->lock); 2669 2670 alloc_target = btrfs_get_alloc_profile(root, 1); 2671 trans = btrfs_start_transaction(root, 1); 2672 if (!trans) 2673 return -ENOMEM; 2674 2675 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2676 bytes + 2 * 1024 * 1024, 2677 alloc_target, 0); 2678 btrfs_end_transaction(trans, root); 2679 if (ret) 2680 return ret; 2681 goto again; 2682 } 2683 spin_unlock(&data_sinfo->lock); 2684 2685 /* commit the current transaction and try again */ 2686 if (!committed) { 2687 committed = 1; 2688 trans = btrfs_join_transaction(root, 1); 2689 if (!trans) 2690 return -ENOMEM; 2691 ret = btrfs_commit_transaction(trans, root); 2692 if (ret) 2693 return ret; 2694 goto again; 2695 } 2696 2697 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 2698 ", %llu bytes_used, %llu bytes_reserved, " 2699 "%llu bytes_pinned, %llu bytes_readonly, %llu may use " 2700 "%llu total\n", (unsigned long long)bytes, 2701 (unsigned long long)data_sinfo->bytes_delalloc, 2702 (unsigned long long)data_sinfo->bytes_used, 2703 (unsigned long long)data_sinfo->bytes_reserved, 2704 (unsigned long long)data_sinfo->bytes_pinned, 2705 (unsigned long long)data_sinfo->bytes_readonly, 2706 (unsigned long long)data_sinfo->bytes_may_use, 2707 (unsigned long long)data_sinfo->total_bytes); 2708 return -ENOSPC; 2709 } 2710 data_sinfo->bytes_may_use += bytes; 2711 BTRFS_I(inode)->reserved_bytes += bytes; 2712 spin_unlock(&data_sinfo->lock); 2713 2714 return btrfs_check_metadata_free_space(root); 2715 } 2716 2717 /* 2718 * if there was an error for whatever reason after calling 2719 * btrfs_check_data_free_space, call this so we can cleanup the counters. 2720 */ 2721 void btrfs_free_reserved_data_space(struct btrfs_root *root, 2722 struct inode *inode, u64 bytes) 2723 { 2724 struct btrfs_space_info *data_sinfo; 2725 2726 /* make sure bytes are sectorsize aligned */ 2727 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2728 2729 data_sinfo = BTRFS_I(inode)->space_info; 2730 spin_lock(&data_sinfo->lock); 2731 data_sinfo->bytes_may_use -= bytes; 2732 BTRFS_I(inode)->reserved_bytes -= bytes; 2733 spin_unlock(&data_sinfo->lock); 2734 } 2735 2736 /* called when we are adding a delalloc extent to the inode's io_tree */ 2737 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, 2738 u64 bytes) 2739 { 2740 struct btrfs_space_info *data_sinfo; 2741 2742 /* get the space info for where this inode will be storing its data */ 2743 data_sinfo = BTRFS_I(inode)->space_info; 2744 2745 /* make sure we have enough space to handle the data first */ 2746 spin_lock(&data_sinfo->lock); 2747 data_sinfo->bytes_delalloc += bytes; 2748 2749 /* 2750 * we are adding a delalloc extent without calling 2751 * btrfs_check_data_free_space first. This happens on a weird 2752 * writepage condition, but shouldn't hurt our accounting 2753 */ 2754 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { 2755 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; 2756 BTRFS_I(inode)->reserved_bytes = 0; 2757 } else { 2758 data_sinfo->bytes_may_use -= bytes; 2759 BTRFS_I(inode)->reserved_bytes -= bytes; 2760 } 2761 2762 spin_unlock(&data_sinfo->lock); 2763 } 2764 2765 /* called when we are clearing an delalloc extent from the inode's io_tree */ 2766 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 2767 u64 bytes) 2768 { 2769 struct btrfs_space_info *info; 2770 2771 info = BTRFS_I(inode)->space_info; 2772 2773 spin_lock(&info->lock); 2774 info->bytes_delalloc -= bytes; 2775 spin_unlock(&info->lock); 2776 } 2777 2778 static void force_metadata_allocation(struct btrfs_fs_info *info) 2779 { 2780 struct list_head *head = &info->space_info; 2781 struct btrfs_space_info *found; 2782 2783 rcu_read_lock(); 2784 list_for_each_entry_rcu(found, head, list) { 2785 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 2786 found->force_alloc = 1; 2787 } 2788 rcu_read_unlock(); 2789 } 2790 2791 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 2792 struct btrfs_root *extent_root, u64 alloc_bytes, 2793 u64 flags, int force) 2794 { 2795 struct btrfs_space_info *space_info; 2796 struct btrfs_fs_info *fs_info = extent_root->fs_info; 2797 u64 thresh; 2798 int ret = 0; 2799 2800 mutex_lock(&fs_info->chunk_mutex); 2801 2802 flags = btrfs_reduce_alloc_profile(extent_root, flags); 2803 2804 space_info = __find_space_info(extent_root->fs_info, flags); 2805 if (!space_info) { 2806 ret = update_space_info(extent_root->fs_info, flags, 2807 0, 0, &space_info); 2808 BUG_ON(ret); 2809 } 2810 BUG_ON(!space_info); 2811 2812 spin_lock(&space_info->lock); 2813 if (space_info->force_alloc) { 2814 force = 1; 2815 space_info->force_alloc = 0; 2816 } 2817 if (space_info->full) { 2818 spin_unlock(&space_info->lock); 2819 goto out; 2820 } 2821 2822 thresh = space_info->total_bytes - space_info->bytes_readonly; 2823 thresh = div_factor(thresh, 6); 2824 if (!force && 2825 (space_info->bytes_used + space_info->bytes_pinned + 2826 space_info->bytes_reserved + alloc_bytes) < thresh) { 2827 spin_unlock(&space_info->lock); 2828 goto out; 2829 } 2830 spin_unlock(&space_info->lock); 2831 2832 /* 2833 * if we're doing a data chunk, go ahead and make sure that 2834 * we keep a reasonable number of metadata chunks allocated in the 2835 * FS as well. 2836 */ 2837 if (flags & BTRFS_BLOCK_GROUP_DATA) { 2838 fs_info->data_chunk_allocations++; 2839 if (!(fs_info->data_chunk_allocations % 2840 fs_info->metadata_ratio)) 2841 force_metadata_allocation(fs_info); 2842 } 2843 2844 ret = btrfs_alloc_chunk(trans, extent_root, flags); 2845 if (ret) 2846 space_info->full = 1; 2847 out: 2848 mutex_unlock(&extent_root->fs_info->chunk_mutex); 2849 return ret; 2850 } 2851 2852 static int update_block_group(struct btrfs_trans_handle *trans, 2853 struct btrfs_root *root, 2854 u64 bytenr, u64 num_bytes, int alloc, 2855 int mark_free) 2856 { 2857 struct btrfs_block_group_cache *cache; 2858 struct btrfs_fs_info *info = root->fs_info; 2859 u64 total = num_bytes; 2860 u64 old_val; 2861 u64 byte_in_group; 2862 2863 /* block accounting for super block */ 2864 spin_lock(&info->delalloc_lock); 2865 old_val = btrfs_super_bytes_used(&info->super_copy); 2866 if (alloc) 2867 old_val += num_bytes; 2868 else 2869 old_val -= num_bytes; 2870 btrfs_set_super_bytes_used(&info->super_copy, old_val); 2871 2872 /* block accounting for root item */ 2873 old_val = btrfs_root_used(&root->root_item); 2874 if (alloc) 2875 old_val += num_bytes; 2876 else 2877 old_val -= num_bytes; 2878 btrfs_set_root_used(&root->root_item, old_val); 2879 spin_unlock(&info->delalloc_lock); 2880 2881 while (total) { 2882 cache = btrfs_lookup_block_group(info, bytenr); 2883 if (!cache) 2884 return -1; 2885 byte_in_group = bytenr - cache->key.objectid; 2886 WARN_ON(byte_in_group > cache->key.offset); 2887 2888 spin_lock(&cache->space_info->lock); 2889 spin_lock(&cache->lock); 2890 cache->dirty = 1; 2891 old_val = btrfs_block_group_used(&cache->item); 2892 num_bytes = min(total, cache->key.offset - byte_in_group); 2893 if (alloc) { 2894 old_val += num_bytes; 2895 cache->space_info->bytes_used += num_bytes; 2896 if (cache->ro) 2897 cache->space_info->bytes_readonly -= num_bytes; 2898 btrfs_set_block_group_used(&cache->item, old_val); 2899 spin_unlock(&cache->lock); 2900 spin_unlock(&cache->space_info->lock); 2901 } else { 2902 old_val -= num_bytes; 2903 cache->space_info->bytes_used -= num_bytes; 2904 if (cache->ro) 2905 cache->space_info->bytes_readonly += num_bytes; 2906 btrfs_set_block_group_used(&cache->item, old_val); 2907 spin_unlock(&cache->lock); 2908 spin_unlock(&cache->space_info->lock); 2909 if (mark_free) { 2910 int ret; 2911 2912 ret = btrfs_discard_extent(root, bytenr, 2913 num_bytes); 2914 WARN_ON(ret); 2915 2916 ret = btrfs_add_free_space(cache, bytenr, 2917 num_bytes); 2918 WARN_ON(ret); 2919 } 2920 } 2921 btrfs_put_block_group(cache); 2922 total -= num_bytes; 2923 bytenr += num_bytes; 2924 } 2925 return 0; 2926 } 2927 2928 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 2929 { 2930 struct btrfs_block_group_cache *cache; 2931 u64 bytenr; 2932 2933 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 2934 if (!cache) 2935 return 0; 2936 2937 bytenr = cache->key.objectid; 2938 btrfs_put_block_group(cache); 2939 2940 return bytenr; 2941 } 2942 2943 int btrfs_update_pinned_extents(struct btrfs_root *root, 2944 u64 bytenr, u64 num, int pin) 2945 { 2946 u64 len; 2947 struct btrfs_block_group_cache *cache; 2948 struct btrfs_fs_info *fs_info = root->fs_info; 2949 2950 if (pin) { 2951 set_extent_dirty(&fs_info->pinned_extents, 2952 bytenr, bytenr + num - 1, GFP_NOFS); 2953 } else { 2954 clear_extent_dirty(&fs_info->pinned_extents, 2955 bytenr, bytenr + num - 1, GFP_NOFS); 2956 } 2957 2958 while (num > 0) { 2959 cache = btrfs_lookup_block_group(fs_info, bytenr); 2960 BUG_ON(!cache); 2961 len = min(num, cache->key.offset - 2962 (bytenr - cache->key.objectid)); 2963 if (pin) { 2964 spin_lock(&cache->space_info->lock); 2965 spin_lock(&cache->lock); 2966 cache->pinned += len; 2967 cache->space_info->bytes_pinned += len; 2968 spin_unlock(&cache->lock); 2969 spin_unlock(&cache->space_info->lock); 2970 fs_info->total_pinned += len; 2971 } else { 2972 spin_lock(&cache->space_info->lock); 2973 spin_lock(&cache->lock); 2974 cache->pinned -= len; 2975 cache->space_info->bytes_pinned -= len; 2976 spin_unlock(&cache->lock); 2977 spin_unlock(&cache->space_info->lock); 2978 fs_info->total_pinned -= len; 2979 if (cache->cached) 2980 btrfs_add_free_space(cache, bytenr, len); 2981 } 2982 btrfs_put_block_group(cache); 2983 bytenr += len; 2984 num -= len; 2985 } 2986 return 0; 2987 } 2988 2989 static int update_reserved_extents(struct btrfs_root *root, 2990 u64 bytenr, u64 num, int reserve) 2991 { 2992 u64 len; 2993 struct btrfs_block_group_cache *cache; 2994 struct btrfs_fs_info *fs_info = root->fs_info; 2995 2996 while (num > 0) { 2997 cache = btrfs_lookup_block_group(fs_info, bytenr); 2998 BUG_ON(!cache); 2999 len = min(num, cache->key.offset - 3000 (bytenr - cache->key.objectid)); 3001 3002 spin_lock(&cache->space_info->lock); 3003 spin_lock(&cache->lock); 3004 if (reserve) { 3005 cache->reserved += len; 3006 cache->space_info->bytes_reserved += len; 3007 } else { 3008 cache->reserved -= len; 3009 cache->space_info->bytes_reserved -= len; 3010 } 3011 spin_unlock(&cache->lock); 3012 spin_unlock(&cache->space_info->lock); 3013 btrfs_put_block_group(cache); 3014 bytenr += len; 3015 num -= len; 3016 } 3017 return 0; 3018 } 3019 3020 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 3021 { 3022 u64 last = 0; 3023 u64 start; 3024 u64 end; 3025 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; 3026 int ret; 3027 3028 while (1) { 3029 ret = find_first_extent_bit(pinned_extents, last, 3030 &start, &end, EXTENT_DIRTY); 3031 if (ret) 3032 break; 3033 set_extent_dirty(copy, start, end, GFP_NOFS); 3034 last = end + 1; 3035 } 3036 return 0; 3037 } 3038 3039 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 3040 struct btrfs_root *root, 3041 struct extent_io_tree *unpin) 3042 { 3043 u64 start; 3044 u64 end; 3045 int ret; 3046 3047 while (1) { 3048 ret = find_first_extent_bit(unpin, 0, &start, &end, 3049 EXTENT_DIRTY); 3050 if (ret) 3051 break; 3052 3053 ret = btrfs_discard_extent(root, start, end + 1 - start); 3054 3055 /* unlocks the pinned mutex */ 3056 btrfs_update_pinned_extents(root, start, end + 1 - start, 0); 3057 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3058 3059 cond_resched(); 3060 } 3061 return ret; 3062 } 3063 3064 static int pin_down_bytes(struct btrfs_trans_handle *trans, 3065 struct btrfs_root *root, 3066 struct btrfs_path *path, 3067 u64 bytenr, u64 num_bytes, int is_data, 3068 struct extent_buffer **must_clean) 3069 { 3070 int err = 0; 3071 struct extent_buffer *buf; 3072 3073 if (is_data) 3074 goto pinit; 3075 3076 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 3077 if (!buf) 3078 goto pinit; 3079 3080 /* we can reuse a block if it hasn't been written 3081 * and it is from this transaction. We can't 3082 * reuse anything from the tree log root because 3083 * it has tiny sub-transactions. 3084 */ 3085 if (btrfs_buffer_uptodate(buf, 0) && 3086 btrfs_try_tree_lock(buf)) { 3087 u64 header_owner = btrfs_header_owner(buf); 3088 u64 header_transid = btrfs_header_generation(buf); 3089 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 3090 header_transid == trans->transid && 3091 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3092 *must_clean = buf; 3093 return 1; 3094 } 3095 btrfs_tree_unlock(buf); 3096 } 3097 free_extent_buffer(buf); 3098 pinit: 3099 btrfs_set_path_blocking(path); 3100 /* unlocks the pinned mutex */ 3101 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3102 3103 BUG_ON(err < 0); 3104 return 0; 3105 } 3106 3107 3108 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 3109 struct btrfs_root *root, 3110 u64 bytenr, u64 num_bytes, u64 parent, 3111 u64 root_objectid, u64 owner_objectid, 3112 u64 owner_offset, int refs_to_drop, 3113 struct btrfs_delayed_extent_op *extent_op) 3114 { 3115 struct btrfs_key key; 3116 struct btrfs_path *path; 3117 struct btrfs_fs_info *info = root->fs_info; 3118 struct btrfs_root *extent_root = info->extent_root; 3119 struct extent_buffer *leaf; 3120 struct btrfs_extent_item *ei; 3121 struct btrfs_extent_inline_ref *iref; 3122 int ret; 3123 int is_data; 3124 int extent_slot = 0; 3125 int found_extent = 0; 3126 int num_to_del = 1; 3127 u32 item_size; 3128 u64 refs; 3129 3130 path = btrfs_alloc_path(); 3131 if (!path) 3132 return -ENOMEM; 3133 3134 path->reada = 1; 3135 path->leave_spinning = 1; 3136 3137 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 3138 BUG_ON(!is_data && refs_to_drop != 1); 3139 3140 ret = lookup_extent_backref(trans, extent_root, path, &iref, 3141 bytenr, num_bytes, parent, 3142 root_objectid, owner_objectid, 3143 owner_offset); 3144 if (ret == 0) { 3145 extent_slot = path->slots[0]; 3146 while (extent_slot >= 0) { 3147 btrfs_item_key_to_cpu(path->nodes[0], &key, 3148 extent_slot); 3149 if (key.objectid != bytenr) 3150 break; 3151 if (key.type == BTRFS_EXTENT_ITEM_KEY && 3152 key.offset == num_bytes) { 3153 found_extent = 1; 3154 break; 3155 } 3156 if (path->slots[0] - extent_slot > 5) 3157 break; 3158 extent_slot--; 3159 } 3160 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3161 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 3162 if (found_extent && item_size < sizeof(*ei)) 3163 found_extent = 0; 3164 #endif 3165 if (!found_extent) { 3166 BUG_ON(iref); 3167 ret = remove_extent_backref(trans, extent_root, path, 3168 NULL, refs_to_drop, 3169 is_data); 3170 BUG_ON(ret); 3171 btrfs_release_path(extent_root, path); 3172 path->leave_spinning = 1; 3173 3174 key.objectid = bytenr; 3175 key.type = BTRFS_EXTENT_ITEM_KEY; 3176 key.offset = num_bytes; 3177 3178 ret = btrfs_search_slot(trans, extent_root, 3179 &key, path, -1, 1); 3180 if (ret) { 3181 printk(KERN_ERR "umm, got %d back from search" 3182 ", was looking for %llu\n", ret, 3183 (unsigned long long)bytenr); 3184 btrfs_print_leaf(extent_root, path->nodes[0]); 3185 } 3186 BUG_ON(ret); 3187 extent_slot = path->slots[0]; 3188 } 3189 } else { 3190 btrfs_print_leaf(extent_root, path->nodes[0]); 3191 WARN_ON(1); 3192 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 3193 "parent %llu root %llu owner %llu offset %llu\n", 3194 (unsigned long long)bytenr, 3195 (unsigned long long)parent, 3196 (unsigned long long)root_objectid, 3197 (unsigned long long)owner_objectid, 3198 (unsigned long long)owner_offset); 3199 } 3200 3201 leaf = path->nodes[0]; 3202 item_size = btrfs_item_size_nr(leaf, extent_slot); 3203 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3204 if (item_size < sizeof(*ei)) { 3205 BUG_ON(found_extent || extent_slot != path->slots[0]); 3206 ret = convert_extent_item_v0(trans, extent_root, path, 3207 owner_objectid, 0); 3208 BUG_ON(ret < 0); 3209 3210 btrfs_release_path(extent_root, path); 3211 path->leave_spinning = 1; 3212 3213 key.objectid = bytenr; 3214 key.type = BTRFS_EXTENT_ITEM_KEY; 3215 key.offset = num_bytes; 3216 3217 ret = btrfs_search_slot(trans, extent_root, &key, path, 3218 -1, 1); 3219 if (ret) { 3220 printk(KERN_ERR "umm, got %d back from search" 3221 ", was looking for %llu\n", ret, 3222 (unsigned long long)bytenr); 3223 btrfs_print_leaf(extent_root, path->nodes[0]); 3224 } 3225 BUG_ON(ret); 3226 extent_slot = path->slots[0]; 3227 leaf = path->nodes[0]; 3228 item_size = btrfs_item_size_nr(leaf, extent_slot); 3229 } 3230 #endif 3231 BUG_ON(item_size < sizeof(*ei)); 3232 ei = btrfs_item_ptr(leaf, extent_slot, 3233 struct btrfs_extent_item); 3234 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 3235 struct btrfs_tree_block_info *bi; 3236 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 3237 bi = (struct btrfs_tree_block_info *)(ei + 1); 3238 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 3239 } 3240 3241 refs = btrfs_extent_refs(leaf, ei); 3242 BUG_ON(refs < refs_to_drop); 3243 refs -= refs_to_drop; 3244 3245 if (refs > 0) { 3246 if (extent_op) 3247 __run_delayed_extent_op(extent_op, leaf, ei); 3248 /* 3249 * In the case of inline back ref, reference count will 3250 * be updated by remove_extent_backref 3251 */ 3252 if (iref) { 3253 BUG_ON(!found_extent); 3254 } else { 3255 btrfs_set_extent_refs(leaf, ei, refs); 3256 btrfs_mark_buffer_dirty(leaf); 3257 } 3258 if (found_extent) { 3259 ret = remove_extent_backref(trans, extent_root, path, 3260 iref, refs_to_drop, 3261 is_data); 3262 BUG_ON(ret); 3263 } 3264 } else { 3265 int mark_free = 0; 3266 struct extent_buffer *must_clean = NULL; 3267 3268 if (found_extent) { 3269 BUG_ON(is_data && refs_to_drop != 3270 extent_data_ref_count(root, path, iref)); 3271 if (iref) { 3272 BUG_ON(path->slots[0] != extent_slot); 3273 } else { 3274 BUG_ON(path->slots[0] != extent_slot + 1); 3275 path->slots[0] = extent_slot; 3276 num_to_del = 2; 3277 } 3278 } 3279 3280 ret = pin_down_bytes(trans, root, path, bytenr, 3281 num_bytes, is_data, &must_clean); 3282 if (ret > 0) 3283 mark_free = 1; 3284 BUG_ON(ret < 0); 3285 /* 3286 * it is going to be very rare for someone to be waiting 3287 * on the block we're freeing. del_items might need to 3288 * schedule, so rather than get fancy, just force it 3289 * to blocking here 3290 */ 3291 if (must_clean) 3292 btrfs_set_lock_blocking(must_clean); 3293 3294 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 3295 num_to_del); 3296 BUG_ON(ret); 3297 btrfs_release_path(extent_root, path); 3298 3299 if (must_clean) { 3300 clean_tree_block(NULL, root, must_clean); 3301 btrfs_tree_unlock(must_clean); 3302 free_extent_buffer(must_clean); 3303 } 3304 3305 if (is_data) { 3306 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 3307 BUG_ON(ret); 3308 } else { 3309 invalidate_mapping_pages(info->btree_inode->i_mapping, 3310 bytenr >> PAGE_CACHE_SHIFT, 3311 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 3312 } 3313 3314 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 3315 mark_free); 3316 BUG_ON(ret); 3317 } 3318 btrfs_free_path(path); 3319 return ret; 3320 } 3321 3322 /* 3323 * when we free an extent, it is possible (and likely) that we free the last 3324 * delayed ref for that extent as well. This searches the delayed ref tree for 3325 * a given extent, and if there are no other delayed refs to be processed, it 3326 * removes it from the tree. 3327 */ 3328 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 3329 struct btrfs_root *root, u64 bytenr) 3330 { 3331 struct btrfs_delayed_ref_head *head; 3332 struct btrfs_delayed_ref_root *delayed_refs; 3333 struct btrfs_delayed_ref_node *ref; 3334 struct rb_node *node; 3335 int ret; 3336 3337 delayed_refs = &trans->transaction->delayed_refs; 3338 spin_lock(&delayed_refs->lock); 3339 head = btrfs_find_delayed_ref_head(trans, bytenr); 3340 if (!head) 3341 goto out; 3342 3343 node = rb_prev(&head->node.rb_node); 3344 if (!node) 3345 goto out; 3346 3347 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3348 3349 /* there are still entries for this ref, we can't drop it */ 3350 if (ref->bytenr == bytenr) 3351 goto out; 3352 3353 if (head->extent_op) { 3354 if (!head->must_insert_reserved) 3355 goto out; 3356 kfree(head->extent_op); 3357 head->extent_op = NULL; 3358 } 3359 3360 /* 3361 * waiting for the lock here would deadlock. If someone else has it 3362 * locked they are already in the process of dropping it anyway 3363 */ 3364 if (!mutex_trylock(&head->mutex)) 3365 goto out; 3366 3367 /* 3368 * at this point we have a head with no other entries. Go 3369 * ahead and process it. 3370 */ 3371 head->node.in_tree = 0; 3372 rb_erase(&head->node.rb_node, &delayed_refs->root); 3373 3374 delayed_refs->num_entries--; 3375 3376 /* 3377 * we don't take a ref on the node because we're removing it from the 3378 * tree, so we just steal the ref the tree was holding. 3379 */ 3380 delayed_refs->num_heads--; 3381 if (list_empty(&head->cluster)) 3382 delayed_refs->num_heads_ready--; 3383 3384 list_del_init(&head->cluster); 3385 spin_unlock(&delayed_refs->lock); 3386 3387 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 3388 &head->node, head->extent_op, 3389 head->must_insert_reserved); 3390 BUG_ON(ret); 3391 btrfs_put_delayed_ref(&head->node); 3392 return 0; 3393 out: 3394 spin_unlock(&delayed_refs->lock); 3395 return 0; 3396 } 3397 3398 int btrfs_free_extent(struct btrfs_trans_handle *trans, 3399 struct btrfs_root *root, 3400 u64 bytenr, u64 num_bytes, u64 parent, 3401 u64 root_objectid, u64 owner, u64 offset) 3402 { 3403 int ret; 3404 3405 /* 3406 * tree log blocks never actually go into the extent allocation 3407 * tree, just update pinning info and exit early. 3408 */ 3409 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 3410 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 3411 /* unlocks the pinned mutex */ 3412 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3413 update_reserved_extents(root, bytenr, num_bytes, 0); 3414 ret = 0; 3415 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 3416 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 3417 parent, root_objectid, (int)owner, 3418 BTRFS_DROP_DELAYED_REF, NULL); 3419 BUG_ON(ret); 3420 ret = check_ref_cleanup(trans, root, bytenr); 3421 BUG_ON(ret); 3422 } else { 3423 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 3424 parent, root_objectid, owner, 3425 offset, BTRFS_DROP_DELAYED_REF, NULL); 3426 BUG_ON(ret); 3427 } 3428 return ret; 3429 } 3430 3431 static u64 stripe_align(struct btrfs_root *root, u64 val) 3432 { 3433 u64 mask = ((u64)root->stripesize - 1); 3434 u64 ret = (val + mask) & ~mask; 3435 return ret; 3436 } 3437 3438 /* 3439 * walks the btree of allocated extents and find a hole of a given size. 3440 * The key ins is changed to record the hole: 3441 * ins->objectid == block start 3442 * ins->flags = BTRFS_EXTENT_ITEM_KEY 3443 * ins->offset == number of blocks 3444 * Any available blocks before search_start are skipped. 3445 */ 3446 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 3447 struct btrfs_root *orig_root, 3448 u64 num_bytes, u64 empty_size, 3449 u64 search_start, u64 search_end, 3450 u64 hint_byte, struct btrfs_key *ins, 3451 u64 exclude_start, u64 exclude_nr, 3452 int data) 3453 { 3454 int ret = 0; 3455 struct btrfs_root *root = orig_root->fs_info->extent_root; 3456 struct btrfs_free_cluster *last_ptr = NULL; 3457 struct btrfs_block_group_cache *block_group = NULL; 3458 int empty_cluster = 2 * 1024 * 1024; 3459 int allowed_chunk_alloc = 0; 3460 struct btrfs_space_info *space_info; 3461 int last_ptr_loop = 0; 3462 int loop = 0; 3463 3464 WARN_ON(num_bytes < root->sectorsize); 3465 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 3466 ins->objectid = 0; 3467 ins->offset = 0; 3468 3469 space_info = __find_space_info(root->fs_info, data); 3470 3471 if (orig_root->ref_cows || empty_size) 3472 allowed_chunk_alloc = 1; 3473 3474 if (data & BTRFS_BLOCK_GROUP_METADATA) { 3475 last_ptr = &root->fs_info->meta_alloc_cluster; 3476 if (!btrfs_test_opt(root, SSD)) 3477 empty_cluster = 64 * 1024; 3478 } 3479 3480 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 3481 last_ptr = &root->fs_info->data_alloc_cluster; 3482 } 3483 3484 if (last_ptr) { 3485 spin_lock(&last_ptr->lock); 3486 if (last_ptr->block_group) 3487 hint_byte = last_ptr->window_start; 3488 spin_unlock(&last_ptr->lock); 3489 } 3490 3491 search_start = max(search_start, first_logical_byte(root, 0)); 3492 search_start = max(search_start, hint_byte); 3493 3494 if (!last_ptr) { 3495 empty_cluster = 0; 3496 loop = 1; 3497 } 3498 3499 if (search_start == hint_byte) { 3500 block_group = btrfs_lookup_block_group(root->fs_info, 3501 search_start); 3502 if (block_group && block_group_bits(block_group, data)) { 3503 down_read(&space_info->groups_sem); 3504 if (list_empty(&block_group->list) || 3505 block_group->ro) { 3506 /* 3507 * someone is removing this block group, 3508 * we can't jump into the have_block_group 3509 * target because our list pointers are not 3510 * valid 3511 */ 3512 btrfs_put_block_group(block_group); 3513 up_read(&space_info->groups_sem); 3514 } else 3515 goto have_block_group; 3516 } else if (block_group) { 3517 btrfs_put_block_group(block_group); 3518 } 3519 } 3520 3521 search: 3522 down_read(&space_info->groups_sem); 3523 list_for_each_entry(block_group, &space_info->block_groups, list) { 3524 u64 offset; 3525 3526 atomic_inc(&block_group->count); 3527 search_start = block_group->key.objectid; 3528 3529 have_block_group: 3530 if (unlikely(!block_group->cached)) { 3531 mutex_lock(&block_group->cache_mutex); 3532 ret = cache_block_group(root, block_group); 3533 mutex_unlock(&block_group->cache_mutex); 3534 if (ret) { 3535 btrfs_put_block_group(block_group); 3536 break; 3537 } 3538 } 3539 3540 if (unlikely(block_group->ro)) 3541 goto loop; 3542 3543 if (last_ptr) { 3544 /* 3545 * the refill lock keeps out other 3546 * people trying to start a new cluster 3547 */ 3548 spin_lock(&last_ptr->refill_lock); 3549 if (last_ptr->block_group && 3550 (last_ptr->block_group->ro || 3551 !block_group_bits(last_ptr->block_group, data))) { 3552 offset = 0; 3553 goto refill_cluster; 3554 } 3555 3556 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 3557 num_bytes, search_start); 3558 if (offset) { 3559 /* we have a block, we're done */ 3560 spin_unlock(&last_ptr->refill_lock); 3561 goto checks; 3562 } 3563 3564 spin_lock(&last_ptr->lock); 3565 /* 3566 * whoops, this cluster doesn't actually point to 3567 * this block group. Get a ref on the block 3568 * group is does point to and try again 3569 */ 3570 if (!last_ptr_loop && last_ptr->block_group && 3571 last_ptr->block_group != block_group) { 3572 3573 btrfs_put_block_group(block_group); 3574 block_group = last_ptr->block_group; 3575 atomic_inc(&block_group->count); 3576 spin_unlock(&last_ptr->lock); 3577 spin_unlock(&last_ptr->refill_lock); 3578 3579 last_ptr_loop = 1; 3580 search_start = block_group->key.objectid; 3581 /* 3582 * we know this block group is properly 3583 * in the list because 3584 * btrfs_remove_block_group, drops the 3585 * cluster before it removes the block 3586 * group from the list 3587 */ 3588 goto have_block_group; 3589 } 3590 spin_unlock(&last_ptr->lock); 3591 refill_cluster: 3592 /* 3593 * this cluster didn't work out, free it and 3594 * start over 3595 */ 3596 btrfs_return_cluster_to_free_space(NULL, last_ptr); 3597 3598 last_ptr_loop = 0; 3599 3600 /* allocate a cluster in this block group */ 3601 ret = btrfs_find_space_cluster(trans, root, 3602 block_group, last_ptr, 3603 offset, num_bytes, 3604 empty_cluster + empty_size); 3605 if (ret == 0) { 3606 /* 3607 * now pull our allocation out of this 3608 * cluster 3609 */ 3610 offset = btrfs_alloc_from_cluster(block_group, 3611 last_ptr, num_bytes, 3612 search_start); 3613 if (offset) { 3614 /* we found one, proceed */ 3615 spin_unlock(&last_ptr->refill_lock); 3616 goto checks; 3617 } 3618 } 3619 /* 3620 * at this point we either didn't find a cluster 3621 * or we weren't able to allocate a block from our 3622 * cluster. Free the cluster we've been trying 3623 * to use, and go to the next block group 3624 */ 3625 if (loop < 2) { 3626 btrfs_return_cluster_to_free_space(NULL, 3627 last_ptr); 3628 spin_unlock(&last_ptr->refill_lock); 3629 goto loop; 3630 } 3631 spin_unlock(&last_ptr->refill_lock); 3632 } 3633 3634 offset = btrfs_find_space_for_alloc(block_group, search_start, 3635 num_bytes, empty_size); 3636 if (!offset) 3637 goto loop; 3638 checks: 3639 search_start = stripe_align(root, offset); 3640 3641 /* move on to the next group */ 3642 if (search_start + num_bytes >= search_end) { 3643 btrfs_add_free_space(block_group, offset, num_bytes); 3644 goto loop; 3645 } 3646 3647 /* move on to the next group */ 3648 if (search_start + num_bytes > 3649 block_group->key.objectid + block_group->key.offset) { 3650 btrfs_add_free_space(block_group, offset, num_bytes); 3651 goto loop; 3652 } 3653 3654 if (exclude_nr > 0 && 3655 (search_start + num_bytes > exclude_start && 3656 search_start < exclude_start + exclude_nr)) { 3657 search_start = exclude_start + exclude_nr; 3658 3659 btrfs_add_free_space(block_group, offset, num_bytes); 3660 /* 3661 * if search_start is still in this block group 3662 * then we just re-search this block group 3663 */ 3664 if (search_start >= block_group->key.objectid && 3665 search_start < (block_group->key.objectid + 3666 block_group->key.offset)) 3667 goto have_block_group; 3668 goto loop; 3669 } 3670 3671 ins->objectid = search_start; 3672 ins->offset = num_bytes; 3673 3674 if (offset < search_start) 3675 btrfs_add_free_space(block_group, offset, 3676 search_start - offset); 3677 BUG_ON(offset > search_start); 3678 3679 /* we are all good, lets return */ 3680 break; 3681 loop: 3682 btrfs_put_block_group(block_group); 3683 } 3684 up_read(&space_info->groups_sem); 3685 3686 /* loop == 0, try to find a clustered alloc in every block group 3687 * loop == 1, try again after forcing a chunk allocation 3688 * loop == 2, set empty_size and empty_cluster to 0 and try again 3689 */ 3690 if (!ins->objectid && loop < 3 && 3691 (empty_size || empty_cluster || allowed_chunk_alloc)) { 3692 if (loop >= 2) { 3693 empty_size = 0; 3694 empty_cluster = 0; 3695 } 3696 3697 if (allowed_chunk_alloc) { 3698 ret = do_chunk_alloc(trans, root, num_bytes + 3699 2 * 1024 * 1024, data, 1); 3700 allowed_chunk_alloc = 0; 3701 } else { 3702 space_info->force_alloc = 1; 3703 } 3704 3705 if (loop < 3) { 3706 loop++; 3707 goto search; 3708 } 3709 ret = -ENOSPC; 3710 } else if (!ins->objectid) { 3711 ret = -ENOSPC; 3712 } 3713 3714 /* we found what we needed */ 3715 if (ins->objectid) { 3716 if (!(data & BTRFS_BLOCK_GROUP_DATA)) 3717 trans->block_group = block_group->key.objectid; 3718 3719 btrfs_put_block_group(block_group); 3720 ret = 0; 3721 } 3722 3723 return ret; 3724 } 3725 3726 static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 3727 { 3728 struct btrfs_block_group_cache *cache; 3729 3730 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 3731 (unsigned long long)(info->total_bytes - info->bytes_used - 3732 info->bytes_pinned - info->bytes_reserved), 3733 (info->full) ? "" : "not "); 3734 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 3735 " may_use=%llu, used=%llu\n", 3736 (unsigned long long)info->total_bytes, 3737 (unsigned long long)info->bytes_pinned, 3738 (unsigned long long)info->bytes_delalloc, 3739 (unsigned long long)info->bytes_may_use, 3740 (unsigned long long)info->bytes_used); 3741 3742 down_read(&info->groups_sem); 3743 list_for_each_entry(cache, &info->block_groups, list) { 3744 spin_lock(&cache->lock); 3745 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 3746 "%llu pinned %llu reserved\n", 3747 (unsigned long long)cache->key.objectid, 3748 (unsigned long long)cache->key.offset, 3749 (unsigned long long)btrfs_block_group_used(&cache->item), 3750 (unsigned long long)cache->pinned, 3751 (unsigned long long)cache->reserved); 3752 btrfs_dump_free_space(cache, bytes); 3753 spin_unlock(&cache->lock); 3754 } 3755 up_read(&info->groups_sem); 3756 } 3757 3758 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3759 struct btrfs_root *root, 3760 u64 num_bytes, u64 min_alloc_size, 3761 u64 empty_size, u64 hint_byte, 3762 u64 search_end, struct btrfs_key *ins, 3763 u64 data) 3764 { 3765 int ret; 3766 u64 search_start = 0; 3767 struct btrfs_fs_info *info = root->fs_info; 3768 3769 data = btrfs_get_alloc_profile(root, data); 3770 again: 3771 /* 3772 * the only place that sets empty_size is btrfs_realloc_node, which 3773 * is not called recursively on allocations 3774 */ 3775 if (empty_size || root->ref_cows) { 3776 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { 3777 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3778 2 * 1024 * 1024, 3779 BTRFS_BLOCK_GROUP_METADATA | 3780 (info->metadata_alloc_profile & 3781 info->avail_metadata_alloc_bits), 0); 3782 } 3783 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3784 num_bytes + 2 * 1024 * 1024, data, 0); 3785 } 3786 3787 WARN_ON(num_bytes < root->sectorsize); 3788 ret = find_free_extent(trans, root, num_bytes, empty_size, 3789 search_start, search_end, hint_byte, ins, 3790 trans->alloc_exclude_start, 3791 trans->alloc_exclude_nr, data); 3792 3793 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 3794 num_bytes = num_bytes >> 1; 3795 num_bytes = num_bytes & ~(root->sectorsize - 1); 3796 num_bytes = max(num_bytes, min_alloc_size); 3797 do_chunk_alloc(trans, root->fs_info->extent_root, 3798 num_bytes, data, 1); 3799 goto again; 3800 } 3801 if (ret) { 3802 struct btrfs_space_info *sinfo; 3803 3804 sinfo = __find_space_info(root->fs_info, data); 3805 printk(KERN_ERR "btrfs allocation failed flags %llu, " 3806 "wanted %llu\n", (unsigned long long)data, 3807 (unsigned long long)num_bytes); 3808 dump_space_info(sinfo, num_bytes); 3809 BUG(); 3810 } 3811 3812 return ret; 3813 } 3814 3815 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 3816 { 3817 struct btrfs_block_group_cache *cache; 3818 int ret = 0; 3819 3820 cache = btrfs_lookup_block_group(root->fs_info, start); 3821 if (!cache) { 3822 printk(KERN_ERR "Unable to find block group for %llu\n", 3823 (unsigned long long)start); 3824 return -ENOSPC; 3825 } 3826 3827 ret = btrfs_discard_extent(root, start, len); 3828 3829 btrfs_add_free_space(cache, start, len); 3830 btrfs_put_block_group(cache); 3831 update_reserved_extents(root, start, len, 0); 3832 3833 return ret; 3834 } 3835 3836 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3837 struct btrfs_root *root, 3838 u64 num_bytes, u64 min_alloc_size, 3839 u64 empty_size, u64 hint_byte, 3840 u64 search_end, struct btrfs_key *ins, 3841 u64 data) 3842 { 3843 int ret; 3844 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 3845 empty_size, hint_byte, search_end, ins, 3846 data); 3847 update_reserved_extents(root, ins->objectid, ins->offset, 1); 3848 return ret; 3849 } 3850 3851 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 3852 struct btrfs_root *root, 3853 u64 parent, u64 root_objectid, 3854 u64 flags, u64 owner, u64 offset, 3855 struct btrfs_key *ins, int ref_mod) 3856 { 3857 int ret; 3858 struct btrfs_fs_info *fs_info = root->fs_info; 3859 struct btrfs_extent_item *extent_item; 3860 struct btrfs_extent_inline_ref *iref; 3861 struct btrfs_path *path; 3862 struct extent_buffer *leaf; 3863 int type; 3864 u32 size; 3865 3866 if (parent > 0) 3867 type = BTRFS_SHARED_DATA_REF_KEY; 3868 else 3869 type = BTRFS_EXTENT_DATA_REF_KEY; 3870 3871 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 3872 3873 path = btrfs_alloc_path(); 3874 BUG_ON(!path); 3875 3876 path->leave_spinning = 1; 3877 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 3878 ins, size); 3879 BUG_ON(ret); 3880 3881 leaf = path->nodes[0]; 3882 extent_item = btrfs_item_ptr(leaf, path->slots[0], 3883 struct btrfs_extent_item); 3884 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 3885 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 3886 btrfs_set_extent_flags(leaf, extent_item, 3887 flags | BTRFS_EXTENT_FLAG_DATA); 3888 3889 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 3890 btrfs_set_extent_inline_ref_type(leaf, iref, type); 3891 if (parent > 0) { 3892 struct btrfs_shared_data_ref *ref; 3893 ref = (struct btrfs_shared_data_ref *)(iref + 1); 3894 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 3895 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 3896 } else { 3897 struct btrfs_extent_data_ref *ref; 3898 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 3899 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 3900 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 3901 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 3902 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 3903 } 3904 3905 btrfs_mark_buffer_dirty(path->nodes[0]); 3906 btrfs_free_path(path); 3907 3908 ret = update_block_group(trans, root, ins->objectid, ins->offset, 3909 1, 0); 3910 if (ret) { 3911 printk(KERN_ERR "btrfs update block group failed for %llu " 3912 "%llu\n", (unsigned long long)ins->objectid, 3913 (unsigned long long)ins->offset); 3914 BUG(); 3915 } 3916 return ret; 3917 } 3918 3919 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 3920 struct btrfs_root *root, 3921 u64 parent, u64 root_objectid, 3922 u64 flags, struct btrfs_disk_key *key, 3923 int level, struct btrfs_key *ins) 3924 { 3925 int ret; 3926 struct btrfs_fs_info *fs_info = root->fs_info; 3927 struct btrfs_extent_item *extent_item; 3928 struct btrfs_tree_block_info *block_info; 3929 struct btrfs_extent_inline_ref *iref; 3930 struct btrfs_path *path; 3931 struct extent_buffer *leaf; 3932 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 3933 3934 path = btrfs_alloc_path(); 3935 BUG_ON(!path); 3936 3937 path->leave_spinning = 1; 3938 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 3939 ins, size); 3940 BUG_ON(ret); 3941 3942 leaf = path->nodes[0]; 3943 extent_item = btrfs_item_ptr(leaf, path->slots[0], 3944 struct btrfs_extent_item); 3945 btrfs_set_extent_refs(leaf, extent_item, 1); 3946 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 3947 btrfs_set_extent_flags(leaf, extent_item, 3948 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 3949 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 3950 3951 btrfs_set_tree_block_key(leaf, block_info, key); 3952 btrfs_set_tree_block_level(leaf, block_info, level); 3953 3954 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 3955 if (parent > 0) { 3956 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 3957 btrfs_set_extent_inline_ref_type(leaf, iref, 3958 BTRFS_SHARED_BLOCK_REF_KEY); 3959 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 3960 } else { 3961 btrfs_set_extent_inline_ref_type(leaf, iref, 3962 BTRFS_TREE_BLOCK_REF_KEY); 3963 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 3964 } 3965 3966 btrfs_mark_buffer_dirty(leaf); 3967 btrfs_free_path(path); 3968 3969 ret = update_block_group(trans, root, ins->objectid, ins->offset, 3970 1, 0); 3971 if (ret) { 3972 printk(KERN_ERR "btrfs update block group failed for %llu " 3973 "%llu\n", (unsigned long long)ins->objectid, 3974 (unsigned long long)ins->offset); 3975 BUG(); 3976 } 3977 return ret; 3978 } 3979 3980 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 3981 struct btrfs_root *root, 3982 u64 root_objectid, u64 owner, 3983 u64 offset, struct btrfs_key *ins) 3984 { 3985 int ret; 3986 3987 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); 3988 3989 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 3990 0, root_objectid, owner, offset, 3991 BTRFS_ADD_DELAYED_EXTENT, NULL); 3992 return ret; 3993 } 3994 3995 /* 3996 * this is used by the tree logging recovery code. It records that 3997 * an extent has been allocated and makes sure to clear the free 3998 * space cache bits as well 3999 */ 4000 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 4001 struct btrfs_root *root, 4002 u64 root_objectid, u64 owner, u64 offset, 4003 struct btrfs_key *ins) 4004 { 4005 int ret; 4006 struct btrfs_block_group_cache *block_group; 4007 4008 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4009 mutex_lock(&block_group->cache_mutex); 4010 cache_block_group(root, block_group); 4011 mutex_unlock(&block_group->cache_mutex); 4012 4013 ret = btrfs_remove_free_space(block_group, ins->objectid, 4014 ins->offset); 4015 BUG_ON(ret); 4016 btrfs_put_block_group(block_group); 4017 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 4018 0, owner, offset, ins, 1); 4019 return ret; 4020 } 4021 4022 /* 4023 * finds a free extent and does all the dirty work required for allocation 4024 * returns the key for the extent through ins, and a tree buffer for 4025 * the first block of the extent through buf. 4026 * 4027 * returns 0 if everything worked, non-zero otherwise. 4028 */ 4029 static int alloc_tree_block(struct btrfs_trans_handle *trans, 4030 struct btrfs_root *root, 4031 u64 num_bytes, u64 parent, u64 root_objectid, 4032 struct btrfs_disk_key *key, int level, 4033 u64 empty_size, u64 hint_byte, u64 search_end, 4034 struct btrfs_key *ins) 4035 { 4036 int ret; 4037 u64 flags = 0; 4038 4039 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4040 empty_size, hint_byte, search_end, 4041 ins, 0); 4042 BUG_ON(ret); 4043 4044 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4045 if (parent == 0) 4046 parent = ins->objectid; 4047 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 4048 } else 4049 BUG_ON(parent > 0); 4050 4051 update_reserved_extents(root, ins->objectid, ins->offset, 1); 4052 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4053 struct btrfs_delayed_extent_op *extent_op; 4054 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 4055 BUG_ON(!extent_op); 4056 if (key) 4057 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 4058 else 4059 memset(&extent_op->key, 0, sizeof(extent_op->key)); 4060 extent_op->flags_to_set = flags; 4061 extent_op->update_key = 1; 4062 extent_op->update_flags = 1; 4063 extent_op->is_data = 0; 4064 4065 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, 4066 ins->offset, parent, root_objectid, 4067 level, BTRFS_ADD_DELAYED_EXTENT, 4068 extent_op); 4069 BUG_ON(ret); 4070 } 4071 return ret; 4072 } 4073 4074 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 4075 struct btrfs_root *root, 4076 u64 bytenr, u32 blocksize, 4077 int level) 4078 { 4079 struct extent_buffer *buf; 4080 4081 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 4082 if (!buf) 4083 return ERR_PTR(-ENOMEM); 4084 btrfs_set_header_generation(buf, trans->transid); 4085 btrfs_set_buffer_lockdep_class(buf, level); 4086 btrfs_tree_lock(buf); 4087 clean_tree_block(trans, root, buf); 4088 4089 btrfs_set_lock_blocking(buf); 4090 btrfs_set_buffer_uptodate(buf); 4091 4092 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4093 set_extent_dirty(&root->dirty_log_pages, buf->start, 4094 buf->start + buf->len - 1, GFP_NOFS); 4095 } else { 4096 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4097 buf->start + buf->len - 1, GFP_NOFS); 4098 } 4099 trans->blocks_used++; 4100 /* this returns a buffer locked for blocking */ 4101 return buf; 4102 } 4103 4104 /* 4105 * helper function to allocate a block for a given tree 4106 * returns the tree buffer or NULL. 4107 */ 4108 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 4109 struct btrfs_root *root, u32 blocksize, 4110 u64 parent, u64 root_objectid, 4111 struct btrfs_disk_key *key, int level, 4112 u64 hint, u64 empty_size) 4113 { 4114 struct btrfs_key ins; 4115 int ret; 4116 struct extent_buffer *buf; 4117 4118 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, 4119 key, level, empty_size, hint, (u64)-1, &ins); 4120 if (ret) { 4121 BUG_ON(ret > 0); 4122 return ERR_PTR(ret); 4123 } 4124 4125 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 4126 blocksize, level); 4127 return buf; 4128 } 4129 4130 #if 0 4131 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4132 struct btrfs_root *root, struct extent_buffer *leaf) 4133 { 4134 u64 disk_bytenr; 4135 u64 num_bytes; 4136 struct btrfs_key key; 4137 struct btrfs_file_extent_item *fi; 4138 u32 nritems; 4139 int i; 4140 int ret; 4141 4142 BUG_ON(!btrfs_is_leaf(leaf)); 4143 nritems = btrfs_header_nritems(leaf); 4144 4145 for (i = 0; i < nritems; i++) { 4146 cond_resched(); 4147 btrfs_item_key_to_cpu(leaf, &key, i); 4148 4149 /* only extents have references, skip everything else */ 4150 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 4151 continue; 4152 4153 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 4154 4155 /* inline extents live in the btree, they don't have refs */ 4156 if (btrfs_file_extent_type(leaf, fi) == 4157 BTRFS_FILE_EXTENT_INLINE) 4158 continue; 4159 4160 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 4161 4162 /* holes don't have refs */ 4163 if (disk_bytenr == 0) 4164 continue; 4165 4166 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 4167 ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, 4168 leaf->start, 0, key.objectid, 0); 4169 BUG_ON(ret); 4170 } 4171 return 0; 4172 } 4173 4174 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 4175 struct btrfs_root *root, 4176 struct btrfs_leaf_ref *ref) 4177 { 4178 int i; 4179 int ret; 4180 struct btrfs_extent_info *info; 4181 struct refsort *sorted; 4182 4183 if (ref->nritems == 0) 4184 return 0; 4185 4186 sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); 4187 for (i = 0; i < ref->nritems; i++) { 4188 sorted[i].bytenr = ref->extents[i].bytenr; 4189 sorted[i].slot = i; 4190 } 4191 sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); 4192 4193 /* 4194 * the items in the ref were sorted when the ref was inserted 4195 * into the ref cache, so this is already in order 4196 */ 4197 for (i = 0; i < ref->nritems; i++) { 4198 info = ref->extents + sorted[i].slot; 4199 ret = btrfs_free_extent(trans, root, info->bytenr, 4200 info->num_bytes, ref->bytenr, 4201 ref->owner, ref->generation, 4202 info->objectid, 0); 4203 4204 atomic_inc(&root->fs_info->throttle_gen); 4205 wake_up(&root->fs_info->transaction_throttle); 4206 cond_resched(); 4207 4208 BUG_ON(ret); 4209 info++; 4210 } 4211 4212 kfree(sorted); 4213 return 0; 4214 } 4215 4216 4217 static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, 4218 struct btrfs_root *root, u64 start, 4219 u64 len, u32 *refs) 4220 { 4221 int ret; 4222 4223 ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); 4224 BUG_ON(ret); 4225 4226 #if 0 /* some debugging code in case we see problems here */ 4227 /* if the refs count is one, it won't get increased again. But 4228 * if the ref count is > 1, someone may be decreasing it at 4229 * the same time we are. 4230 */ 4231 if (*refs != 1) { 4232 struct extent_buffer *eb = NULL; 4233 eb = btrfs_find_create_tree_block(root, start, len); 4234 if (eb) 4235 btrfs_tree_lock(eb); 4236 4237 mutex_lock(&root->fs_info->alloc_mutex); 4238 ret = lookup_extent_ref(NULL, root, start, len, refs); 4239 BUG_ON(ret); 4240 mutex_unlock(&root->fs_info->alloc_mutex); 4241 4242 if (eb) { 4243 btrfs_tree_unlock(eb); 4244 free_extent_buffer(eb); 4245 } 4246 if (*refs == 1) { 4247 printk(KERN_ERR "btrfs block %llu went down to one " 4248 "during drop_snap\n", (unsigned long long)start); 4249 } 4250 4251 } 4252 #endif 4253 4254 cond_resched(); 4255 return ret; 4256 } 4257 4258 4259 /* 4260 * this is used while deleting old snapshots, and it drops the refs 4261 * on a whole subtree starting from a level 1 node. 4262 * 4263 * The idea is to sort all the leaf pointers, and then drop the 4264 * ref on all the leaves in order. Most of the time the leaves 4265 * will have ref cache entries, so no leaf IOs will be required to 4266 * find the extents they have references on. 4267 * 4268 * For each leaf, any references it has are also dropped in order 4269 * 4270 * This ends up dropping the references in something close to optimal 4271 * order for reading and modifying the extent allocation tree. 4272 */ 4273 static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, 4274 struct btrfs_root *root, 4275 struct btrfs_path *path) 4276 { 4277 u64 bytenr; 4278 u64 root_owner; 4279 u64 root_gen; 4280 struct extent_buffer *eb = path->nodes[1]; 4281 struct extent_buffer *leaf; 4282 struct btrfs_leaf_ref *ref; 4283 struct refsort *sorted = NULL; 4284 int nritems = btrfs_header_nritems(eb); 4285 int ret; 4286 int i; 4287 int refi = 0; 4288 int slot = path->slots[1]; 4289 u32 blocksize = btrfs_level_size(root, 0); 4290 u32 refs; 4291 4292 if (nritems == 0) 4293 goto out; 4294 4295 root_owner = btrfs_header_owner(eb); 4296 root_gen = btrfs_header_generation(eb); 4297 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); 4298 4299 /* 4300 * step one, sort all the leaf pointers so we don't scribble 4301 * randomly into the extent allocation tree 4302 */ 4303 for (i = slot; i < nritems; i++) { 4304 sorted[refi].bytenr = btrfs_node_blockptr(eb, i); 4305 sorted[refi].slot = i; 4306 refi++; 4307 } 4308 4309 /* 4310 * nritems won't be zero, but if we're picking up drop_snapshot 4311 * after a crash, slot might be > 0, so double check things 4312 * just in case. 4313 */ 4314 if (refi == 0) 4315 goto out; 4316 4317 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 4318 4319 /* 4320 * the first loop frees everything the leaves point to 4321 */ 4322 for (i = 0; i < refi; i++) { 4323 u64 ptr_gen; 4324 4325 bytenr = sorted[i].bytenr; 4326 4327 /* 4328 * check the reference count on this leaf. If it is > 1 4329 * we just decrement it below and don't update any 4330 * of the refs the leaf points to. 4331 */ 4332 ret = drop_snap_lookup_refcount(trans, root, bytenr, 4333 blocksize, &refs); 4334 BUG_ON(ret); 4335 if (refs != 1) 4336 continue; 4337 4338 ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); 4339 4340 /* 4341 * the leaf only had one reference, which means the 4342 * only thing pointing to this leaf is the snapshot 4343 * we're deleting. It isn't possible for the reference 4344 * count to increase again later 4345 * 4346 * The reference cache is checked for the leaf, 4347 * and if found we'll be able to drop any refs held by 4348 * the leaf without needing to read it in. 4349 */ 4350 ref = btrfs_lookup_leaf_ref(root, bytenr); 4351 if (ref && ref->generation != ptr_gen) { 4352 btrfs_free_leaf_ref(root, ref); 4353 ref = NULL; 4354 } 4355 if (ref) { 4356 ret = cache_drop_leaf_ref(trans, root, ref); 4357 BUG_ON(ret); 4358 btrfs_remove_leaf_ref(root, ref); 4359 btrfs_free_leaf_ref(root, ref); 4360 } else { 4361 /* 4362 * the leaf wasn't in the reference cache, so 4363 * we have to read it. 4364 */ 4365 leaf = read_tree_block(root, bytenr, blocksize, 4366 ptr_gen); 4367 ret = btrfs_drop_leaf_ref(trans, root, leaf); 4368 BUG_ON(ret); 4369 free_extent_buffer(leaf); 4370 } 4371 atomic_inc(&root->fs_info->throttle_gen); 4372 wake_up(&root->fs_info->transaction_throttle); 4373 cond_resched(); 4374 } 4375 4376 /* 4377 * run through the loop again to free the refs on the leaves. 4378 * This is faster than doing it in the loop above because 4379 * the leaves are likely to be clustered together. We end up 4380 * working in nice chunks on the extent allocation tree. 4381 */ 4382 for (i = 0; i < refi; i++) { 4383 bytenr = sorted[i].bytenr; 4384 ret = btrfs_free_extent(trans, root, bytenr, 4385 blocksize, eb->start, 4386 root_owner, root_gen, 0, 1); 4387 BUG_ON(ret); 4388 4389 atomic_inc(&root->fs_info->throttle_gen); 4390 wake_up(&root->fs_info->transaction_throttle); 4391 cond_resched(); 4392 } 4393 out: 4394 kfree(sorted); 4395 4396 /* 4397 * update the path to show we've processed the entire level 1 4398 * node. This will get saved into the root's drop_snapshot_progress 4399 * field so these drops are not repeated again if this transaction 4400 * commits. 4401 */ 4402 path->slots[1] = nritems; 4403 return 0; 4404 } 4405 4406 /* 4407 * helper function for drop_snapshot, this walks down the tree dropping ref 4408 * counts as it goes. 4409 */ 4410 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4411 struct btrfs_root *root, 4412 struct btrfs_path *path, int *level) 4413 { 4414 u64 root_owner; 4415 u64 root_gen; 4416 u64 bytenr; 4417 u64 ptr_gen; 4418 struct extent_buffer *next; 4419 struct extent_buffer *cur; 4420 struct extent_buffer *parent; 4421 u32 blocksize; 4422 int ret; 4423 u32 refs; 4424 4425 WARN_ON(*level < 0); 4426 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4427 ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, 4428 path->nodes[*level]->len, &refs); 4429 BUG_ON(ret); 4430 if (refs > 1) 4431 goto out; 4432 4433 /* 4434 * walk down to the last node level and free all the leaves 4435 */ 4436 while (*level >= 0) { 4437 WARN_ON(*level < 0); 4438 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4439 cur = path->nodes[*level]; 4440 4441 if (btrfs_header_level(cur) != *level) 4442 WARN_ON(1); 4443 4444 if (path->slots[*level] >= 4445 btrfs_header_nritems(cur)) 4446 break; 4447 4448 /* the new code goes down to level 1 and does all the 4449 * leaves pointed to that node in bulk. So, this check 4450 * for level 0 will always be false. 4451 * 4452 * But, the disk format allows the drop_snapshot_progress 4453 * field in the root to leave things in a state where 4454 * a leaf will need cleaning up here. If someone crashes 4455 * with the old code and then boots with the new code, 4456 * we might find a leaf here. 4457 */ 4458 if (*level == 0) { 4459 ret = btrfs_drop_leaf_ref(trans, root, cur); 4460 BUG_ON(ret); 4461 break; 4462 } 4463 4464 /* 4465 * once we get to level one, process the whole node 4466 * at once, including everything below it. 4467 */ 4468 if (*level == 1) { 4469 ret = drop_level_one_refs(trans, root, path); 4470 BUG_ON(ret); 4471 break; 4472 } 4473 4474 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 4475 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 4476 blocksize = btrfs_level_size(root, *level - 1); 4477 4478 ret = drop_snap_lookup_refcount(trans, root, bytenr, 4479 blocksize, &refs); 4480 BUG_ON(ret); 4481 4482 /* 4483 * if there is more than one reference, we don't need 4484 * to read that node to drop any references it has. We 4485 * just drop the ref we hold on that node and move on to the 4486 * next slot in this level. 4487 */ 4488 if (refs != 1) { 4489 parent = path->nodes[*level]; 4490 root_owner = btrfs_header_owner(parent); 4491 root_gen = btrfs_header_generation(parent); 4492 path->slots[*level]++; 4493 4494 ret = btrfs_free_extent(trans, root, bytenr, 4495 blocksize, parent->start, 4496 root_owner, root_gen, 4497 *level - 1, 1); 4498 BUG_ON(ret); 4499 4500 atomic_inc(&root->fs_info->throttle_gen); 4501 wake_up(&root->fs_info->transaction_throttle); 4502 cond_resched(); 4503 4504 continue; 4505 } 4506 4507 /* 4508 * we need to keep freeing things in the next level down. 4509 * read the block and loop around to process it 4510 */ 4511 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4512 WARN_ON(*level <= 0); 4513 if (path->nodes[*level-1]) 4514 free_extent_buffer(path->nodes[*level-1]); 4515 path->nodes[*level-1] = next; 4516 *level = btrfs_header_level(next); 4517 path->slots[*level] = 0; 4518 cond_resched(); 4519 } 4520 out: 4521 WARN_ON(*level < 0); 4522 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4523 4524 if (path->nodes[*level] == root->node) { 4525 parent = path->nodes[*level]; 4526 bytenr = path->nodes[*level]->start; 4527 } else { 4528 parent = path->nodes[*level + 1]; 4529 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); 4530 } 4531 4532 blocksize = btrfs_level_size(root, *level); 4533 root_owner = btrfs_header_owner(parent); 4534 root_gen = btrfs_header_generation(parent); 4535 4536 /* 4537 * cleanup and free the reference on the last node 4538 * we processed 4539 */ 4540 ret = btrfs_free_extent(trans, root, bytenr, blocksize, 4541 parent->start, root_owner, root_gen, 4542 *level, 1); 4543 free_extent_buffer(path->nodes[*level]); 4544 path->nodes[*level] = NULL; 4545 4546 *level += 1; 4547 BUG_ON(ret); 4548 4549 cond_resched(); 4550 return 0; 4551 } 4552 #endif 4553 4554 struct walk_control { 4555 u64 refs[BTRFS_MAX_LEVEL]; 4556 u64 flags[BTRFS_MAX_LEVEL]; 4557 struct btrfs_key update_progress; 4558 int stage; 4559 int level; 4560 int shared_level; 4561 int update_ref; 4562 int keep_locks; 4563 }; 4564 4565 #define DROP_REFERENCE 1 4566 #define UPDATE_BACKREF 2 4567 4568 /* 4569 * hepler to process tree block while walking down the tree. 4570 * 4571 * when wc->stage == DROP_REFERENCE, this function checks 4572 * reference count of the block. if the block is shared and 4573 * we need update back refs for the subtree rooted at the 4574 * block, this function changes wc->stage to UPDATE_BACKREF 4575 * 4576 * when wc->stage == UPDATE_BACKREF, this function updates 4577 * back refs for pointers in the block. 4578 * 4579 * NOTE: return value 1 means we should stop walking down. 4580 */ 4581 static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 4582 struct btrfs_root *root, 4583 struct btrfs_path *path, 4584 struct walk_control *wc) 4585 { 4586 int level = wc->level; 4587 struct extent_buffer *eb = path->nodes[level]; 4588 struct btrfs_key key; 4589 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 4590 int ret; 4591 4592 if (wc->stage == UPDATE_BACKREF && 4593 btrfs_header_owner(eb) != root->root_key.objectid) 4594 return 1; 4595 4596 /* 4597 * when reference count of tree block is 1, it won't increase 4598 * again. once full backref flag is set, we never clear it. 4599 */ 4600 if ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 4601 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag))) { 4602 BUG_ON(!path->locks[level]); 4603 ret = btrfs_lookup_extent_info(trans, root, 4604 eb->start, eb->len, 4605 &wc->refs[level], 4606 &wc->flags[level]); 4607 BUG_ON(ret); 4608 BUG_ON(wc->refs[level] == 0); 4609 } 4610 4611 if (wc->stage == DROP_REFERENCE && 4612 wc->update_ref && wc->refs[level] > 1) { 4613 BUG_ON(eb == root->node); 4614 BUG_ON(path->slots[level] > 0); 4615 if (level == 0) 4616 btrfs_item_key_to_cpu(eb, &key, path->slots[level]); 4617 else 4618 btrfs_node_key_to_cpu(eb, &key, path->slots[level]); 4619 if (btrfs_header_owner(eb) == root->root_key.objectid && 4620 btrfs_comp_cpu_keys(&key, &wc->update_progress) >= 0) { 4621 wc->stage = UPDATE_BACKREF; 4622 wc->shared_level = level; 4623 } 4624 } 4625 4626 if (wc->stage == DROP_REFERENCE) { 4627 if (wc->refs[level] > 1) 4628 return 1; 4629 4630 if (path->locks[level] && !wc->keep_locks) { 4631 btrfs_tree_unlock(eb); 4632 path->locks[level] = 0; 4633 } 4634 return 0; 4635 } 4636 4637 /* wc->stage == UPDATE_BACKREF */ 4638 if (!(wc->flags[level] & flag)) { 4639 BUG_ON(!path->locks[level]); 4640 ret = btrfs_inc_ref(trans, root, eb, 1); 4641 BUG_ON(ret); 4642 ret = btrfs_dec_ref(trans, root, eb, 0); 4643 BUG_ON(ret); 4644 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 4645 eb->len, flag, 0); 4646 BUG_ON(ret); 4647 wc->flags[level] |= flag; 4648 } 4649 4650 /* 4651 * the block is shared by multiple trees, so it's not good to 4652 * keep the tree lock 4653 */ 4654 if (path->locks[level] && level > 0) { 4655 btrfs_tree_unlock(eb); 4656 path->locks[level] = 0; 4657 } 4658 return 0; 4659 } 4660 4661 /* 4662 * hepler to process tree block while walking up the tree. 4663 * 4664 * when wc->stage == DROP_REFERENCE, this function drops 4665 * reference count on the block. 4666 * 4667 * when wc->stage == UPDATE_BACKREF, this function changes 4668 * wc->stage back to DROP_REFERENCE if we changed wc->stage 4669 * to UPDATE_BACKREF previously while processing the block. 4670 * 4671 * NOTE: return value 1 means we should stop walking up. 4672 */ 4673 static noinline int walk_up_proc(struct btrfs_trans_handle *trans, 4674 struct btrfs_root *root, 4675 struct btrfs_path *path, 4676 struct walk_control *wc) 4677 { 4678 int ret = 0; 4679 int level = wc->level; 4680 struct extent_buffer *eb = path->nodes[level]; 4681 u64 parent = 0; 4682 4683 if (wc->stage == UPDATE_BACKREF) { 4684 BUG_ON(wc->shared_level < level); 4685 if (level < wc->shared_level) 4686 goto out; 4687 4688 BUG_ON(wc->refs[level] <= 1); 4689 ret = find_next_key(path, level + 1, &wc->update_progress); 4690 if (ret > 0) 4691 wc->update_ref = 0; 4692 4693 wc->stage = DROP_REFERENCE; 4694 wc->shared_level = -1; 4695 path->slots[level] = 0; 4696 4697 /* 4698 * check reference count again if the block isn't locked. 4699 * we should start walking down the tree again if reference 4700 * count is one. 4701 */ 4702 if (!path->locks[level]) { 4703 BUG_ON(level == 0); 4704 btrfs_tree_lock(eb); 4705 btrfs_set_lock_blocking(eb); 4706 path->locks[level] = 1; 4707 4708 ret = btrfs_lookup_extent_info(trans, root, 4709 eb->start, eb->len, 4710 &wc->refs[level], 4711 &wc->flags[level]); 4712 BUG_ON(ret); 4713 BUG_ON(wc->refs[level] == 0); 4714 if (wc->refs[level] == 1) { 4715 btrfs_tree_unlock(eb); 4716 path->locks[level] = 0; 4717 return 1; 4718 } 4719 } else { 4720 BUG_ON(level != 0); 4721 } 4722 } 4723 4724 /* wc->stage == DROP_REFERENCE */ 4725 BUG_ON(wc->refs[level] > 1 && !path->locks[level]); 4726 4727 if (wc->refs[level] == 1) { 4728 if (level == 0) { 4729 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 4730 ret = btrfs_dec_ref(trans, root, eb, 1); 4731 else 4732 ret = btrfs_dec_ref(trans, root, eb, 0); 4733 BUG_ON(ret); 4734 } 4735 /* make block locked assertion in clean_tree_block happy */ 4736 if (!path->locks[level] && 4737 btrfs_header_generation(eb) == trans->transid) { 4738 btrfs_tree_lock(eb); 4739 btrfs_set_lock_blocking(eb); 4740 path->locks[level] = 1; 4741 } 4742 clean_tree_block(trans, root, eb); 4743 } 4744 4745 if (eb == root->node) { 4746 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 4747 parent = eb->start; 4748 else 4749 BUG_ON(root->root_key.objectid != 4750 btrfs_header_owner(eb)); 4751 } else { 4752 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 4753 parent = path->nodes[level + 1]->start; 4754 else 4755 BUG_ON(root->root_key.objectid != 4756 btrfs_header_owner(path->nodes[level + 1])); 4757 } 4758 4759 ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, 4760 root->root_key.objectid, level, 0); 4761 BUG_ON(ret); 4762 out: 4763 wc->refs[level] = 0; 4764 wc->flags[level] = 0; 4765 return ret; 4766 } 4767 4768 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4769 struct btrfs_root *root, 4770 struct btrfs_path *path, 4771 struct walk_control *wc) 4772 { 4773 struct extent_buffer *next; 4774 struct extent_buffer *cur; 4775 u64 bytenr; 4776 u64 ptr_gen; 4777 u32 blocksize; 4778 int level = wc->level; 4779 int ret; 4780 4781 while (level >= 0) { 4782 cur = path->nodes[level]; 4783 BUG_ON(path->slots[level] >= btrfs_header_nritems(cur)); 4784 4785 ret = walk_down_proc(trans, root, path, wc); 4786 if (ret > 0) 4787 break; 4788 4789 if (level == 0) 4790 break; 4791 4792 bytenr = btrfs_node_blockptr(cur, path->slots[level]); 4793 blocksize = btrfs_level_size(root, level - 1); 4794 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[level]); 4795 4796 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4797 btrfs_tree_lock(next); 4798 btrfs_set_lock_blocking(next); 4799 4800 level--; 4801 BUG_ON(level != btrfs_header_level(next)); 4802 path->nodes[level] = next; 4803 path->slots[level] = 0; 4804 path->locks[level] = 1; 4805 wc->level = level; 4806 } 4807 return 0; 4808 } 4809 4810 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 4811 struct btrfs_root *root, 4812 struct btrfs_path *path, 4813 struct walk_control *wc, int max_level) 4814 { 4815 int level = wc->level; 4816 int ret; 4817 4818 path->slots[level] = btrfs_header_nritems(path->nodes[level]); 4819 while (level < max_level && path->nodes[level]) { 4820 wc->level = level; 4821 if (path->slots[level] + 1 < 4822 btrfs_header_nritems(path->nodes[level])) { 4823 path->slots[level]++; 4824 return 0; 4825 } else { 4826 ret = walk_up_proc(trans, root, path, wc); 4827 if (ret > 0) 4828 return 0; 4829 4830 if (path->locks[level]) { 4831 btrfs_tree_unlock(path->nodes[level]); 4832 path->locks[level] = 0; 4833 } 4834 free_extent_buffer(path->nodes[level]); 4835 path->nodes[level] = NULL; 4836 level++; 4837 } 4838 } 4839 return 1; 4840 } 4841 4842 /* 4843 * drop a subvolume tree. 4844 * 4845 * this function traverses the tree freeing any blocks that only 4846 * referenced by the tree. 4847 * 4848 * when a shared tree block is found. this function decreases its 4849 * reference count by one. if update_ref is true, this function 4850 * also make sure backrefs for the shared block and all lower level 4851 * blocks are properly updated. 4852 */ 4853 int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) 4854 { 4855 struct btrfs_path *path; 4856 struct btrfs_trans_handle *trans; 4857 struct btrfs_root *tree_root = root->fs_info->tree_root; 4858 struct btrfs_root_item *root_item = &root->root_item; 4859 struct walk_control *wc; 4860 struct btrfs_key key; 4861 int err = 0; 4862 int ret; 4863 int level; 4864 4865 path = btrfs_alloc_path(); 4866 BUG_ON(!path); 4867 4868 wc = kzalloc(sizeof(*wc), GFP_NOFS); 4869 BUG_ON(!wc); 4870 4871 trans = btrfs_start_transaction(tree_root, 1); 4872 4873 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 4874 level = btrfs_header_level(root->node); 4875 path->nodes[level] = btrfs_lock_root_node(root); 4876 btrfs_set_lock_blocking(path->nodes[level]); 4877 path->slots[level] = 0; 4878 path->locks[level] = 1; 4879 memset(&wc->update_progress, 0, 4880 sizeof(wc->update_progress)); 4881 } else { 4882 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 4883 memcpy(&wc->update_progress, &key, 4884 sizeof(wc->update_progress)); 4885 4886 level = root_item->drop_level; 4887 BUG_ON(level == 0); 4888 path->lowest_level = level; 4889 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4890 path->lowest_level = 0; 4891 if (ret < 0) { 4892 err = ret; 4893 goto out; 4894 } 4895 btrfs_node_key_to_cpu(path->nodes[level], &key, 4896 path->slots[level]); 4897 WARN_ON(memcmp(&key, &wc->update_progress, sizeof(key))); 4898 4899 /* 4900 * unlock our path, this is safe because only this 4901 * function is allowed to delete this snapshot 4902 */ 4903 btrfs_unlock_up_safe(path, 0); 4904 4905 level = btrfs_header_level(root->node); 4906 while (1) { 4907 btrfs_tree_lock(path->nodes[level]); 4908 btrfs_set_lock_blocking(path->nodes[level]); 4909 4910 ret = btrfs_lookup_extent_info(trans, root, 4911 path->nodes[level]->start, 4912 path->nodes[level]->len, 4913 &wc->refs[level], 4914 &wc->flags[level]); 4915 BUG_ON(ret); 4916 BUG_ON(wc->refs[level] == 0); 4917 4918 if (level == root_item->drop_level) 4919 break; 4920 4921 btrfs_tree_unlock(path->nodes[level]); 4922 WARN_ON(wc->refs[level] != 1); 4923 level--; 4924 } 4925 } 4926 4927 wc->level = level; 4928 wc->shared_level = -1; 4929 wc->stage = DROP_REFERENCE; 4930 wc->update_ref = update_ref; 4931 wc->keep_locks = 0; 4932 4933 while (1) { 4934 ret = walk_down_tree(trans, root, path, wc); 4935 if (ret < 0) { 4936 err = ret; 4937 break; 4938 } 4939 4940 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 4941 if (ret < 0) { 4942 err = ret; 4943 break; 4944 } 4945 4946 if (ret > 0) { 4947 BUG_ON(wc->stage != DROP_REFERENCE); 4948 break; 4949 } 4950 4951 if (wc->stage == DROP_REFERENCE) { 4952 level = wc->level; 4953 btrfs_node_key(path->nodes[level], 4954 &root_item->drop_progress, 4955 path->slots[level]); 4956 root_item->drop_level = level; 4957 } 4958 4959 BUG_ON(wc->level == 0); 4960 if (trans->transaction->in_commit || 4961 trans->transaction->delayed_refs.flushing) { 4962 ret = btrfs_update_root(trans, tree_root, 4963 &root->root_key, 4964 root_item); 4965 BUG_ON(ret); 4966 4967 btrfs_end_transaction(trans, tree_root); 4968 trans = btrfs_start_transaction(tree_root, 1); 4969 } else { 4970 unsigned long update; 4971 update = trans->delayed_ref_updates; 4972 trans->delayed_ref_updates = 0; 4973 if (update) 4974 btrfs_run_delayed_refs(trans, tree_root, 4975 update); 4976 } 4977 } 4978 btrfs_release_path(root, path); 4979 BUG_ON(err); 4980 4981 ret = btrfs_del_root(trans, tree_root, &root->root_key); 4982 BUG_ON(ret); 4983 4984 free_extent_buffer(root->node); 4985 free_extent_buffer(root->commit_root); 4986 kfree(root); 4987 out: 4988 btrfs_end_transaction(trans, tree_root); 4989 kfree(wc); 4990 btrfs_free_path(path); 4991 return err; 4992 } 4993 4994 /* 4995 * drop subtree rooted at tree block 'node'. 4996 * 4997 * NOTE: this function will unlock and release tree block 'node' 4998 */ 4999 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 5000 struct btrfs_root *root, 5001 struct extent_buffer *node, 5002 struct extent_buffer *parent) 5003 { 5004 struct btrfs_path *path; 5005 struct walk_control *wc; 5006 int level; 5007 int parent_level; 5008 int ret = 0; 5009 int wret; 5010 5011 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 5012 5013 path = btrfs_alloc_path(); 5014 BUG_ON(!path); 5015 5016 wc = kzalloc(sizeof(*wc), GFP_NOFS); 5017 BUG_ON(!wc); 5018 5019 btrfs_assert_tree_locked(parent); 5020 parent_level = btrfs_header_level(parent); 5021 extent_buffer_get(parent); 5022 path->nodes[parent_level] = parent; 5023 path->slots[parent_level] = btrfs_header_nritems(parent); 5024 5025 btrfs_assert_tree_locked(node); 5026 level = btrfs_header_level(node); 5027 path->nodes[level] = node; 5028 path->slots[level] = 0; 5029 path->locks[level] = 1; 5030 5031 wc->refs[parent_level] = 1; 5032 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5033 wc->level = level; 5034 wc->shared_level = -1; 5035 wc->stage = DROP_REFERENCE; 5036 wc->update_ref = 0; 5037 wc->keep_locks = 1; 5038 5039 while (1) { 5040 wret = walk_down_tree(trans, root, path, wc); 5041 if (wret < 0) { 5042 ret = wret; 5043 break; 5044 } 5045 5046 wret = walk_up_tree(trans, root, path, wc, parent_level); 5047 if (wret < 0) 5048 ret = wret; 5049 if (wret != 0) 5050 break; 5051 } 5052 5053 kfree(wc); 5054 btrfs_free_path(path); 5055 return ret; 5056 } 5057 5058 #if 0 5059 static unsigned long calc_ra(unsigned long start, unsigned long last, 5060 unsigned long nr) 5061 { 5062 return min(last, start + nr - 1); 5063 } 5064 5065 static noinline int relocate_inode_pages(struct inode *inode, u64 start, 5066 u64 len) 5067 { 5068 u64 page_start; 5069 u64 page_end; 5070 unsigned long first_index; 5071 unsigned long last_index; 5072 unsigned long i; 5073 struct page *page; 5074 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 5075 struct file_ra_state *ra; 5076 struct btrfs_ordered_extent *ordered; 5077 unsigned int total_read = 0; 5078 unsigned int total_dirty = 0; 5079 int ret = 0; 5080 5081 ra = kzalloc(sizeof(*ra), GFP_NOFS); 5082 5083 mutex_lock(&inode->i_mutex); 5084 first_index = start >> PAGE_CACHE_SHIFT; 5085 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 5086 5087 /* make sure the dirty trick played by the caller work */ 5088 ret = invalidate_inode_pages2_range(inode->i_mapping, 5089 first_index, last_index); 5090 if (ret) 5091 goto out_unlock; 5092 5093 file_ra_state_init(ra, inode->i_mapping); 5094 5095 for (i = first_index ; i <= last_index; i++) { 5096 if (total_read % ra->ra_pages == 0) { 5097 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 5098 calc_ra(i, last_index, ra->ra_pages)); 5099 } 5100 total_read++; 5101 again: 5102 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 5103 BUG_ON(1); 5104 page = grab_cache_page(inode->i_mapping, i); 5105 if (!page) { 5106 ret = -ENOMEM; 5107 goto out_unlock; 5108 } 5109 if (!PageUptodate(page)) { 5110 btrfs_readpage(NULL, page); 5111 lock_page(page); 5112 if (!PageUptodate(page)) { 5113 unlock_page(page); 5114 page_cache_release(page); 5115 ret = -EIO; 5116 goto out_unlock; 5117 } 5118 } 5119 wait_on_page_writeback(page); 5120 5121 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 5122 page_end = page_start + PAGE_CACHE_SIZE - 1; 5123 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 5124 5125 ordered = btrfs_lookup_ordered_extent(inode, page_start); 5126 if (ordered) { 5127 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5128 unlock_page(page); 5129 page_cache_release(page); 5130 btrfs_start_ordered_extent(inode, ordered, 1); 5131 btrfs_put_ordered_extent(ordered); 5132 goto again; 5133 } 5134 set_page_extent_mapped(page); 5135 5136 if (i == first_index) 5137 set_extent_bits(io_tree, page_start, page_end, 5138 EXTENT_BOUNDARY, GFP_NOFS); 5139 btrfs_set_extent_delalloc(inode, page_start, page_end); 5140 5141 set_page_dirty(page); 5142 total_dirty++; 5143 5144 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 5145 unlock_page(page); 5146 page_cache_release(page); 5147 } 5148 5149 out_unlock: 5150 kfree(ra); 5151 mutex_unlock(&inode->i_mutex); 5152 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 5153 return ret; 5154 } 5155 5156 static noinline int relocate_data_extent(struct inode *reloc_inode, 5157 struct btrfs_key *extent_key, 5158 u64 offset) 5159 { 5160 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 5161 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; 5162 struct extent_map *em; 5163 u64 start = extent_key->objectid - offset; 5164 u64 end = start + extent_key->offset - 1; 5165 5166 em = alloc_extent_map(GFP_NOFS); 5167 BUG_ON(!em || IS_ERR(em)); 5168 5169 em->start = start; 5170 em->len = extent_key->offset; 5171 em->block_len = extent_key->offset; 5172 em->block_start = extent_key->objectid; 5173 em->bdev = root->fs_info->fs_devices->latest_bdev; 5174 set_bit(EXTENT_FLAG_PINNED, &em->flags); 5175 5176 /* setup extent map to cheat btrfs_readpage */ 5177 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5178 while (1) { 5179 int ret; 5180 spin_lock(&em_tree->lock); 5181 ret = add_extent_mapping(em_tree, em); 5182 spin_unlock(&em_tree->lock); 5183 if (ret != -EEXIST) { 5184 free_extent_map(em); 5185 break; 5186 } 5187 btrfs_drop_extent_cache(reloc_inode, start, end, 0); 5188 } 5189 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 5190 5191 return relocate_inode_pages(reloc_inode, start, extent_key->offset); 5192 } 5193 5194 struct btrfs_ref_path { 5195 u64 extent_start; 5196 u64 nodes[BTRFS_MAX_LEVEL]; 5197 u64 root_objectid; 5198 u64 root_generation; 5199 u64 owner_objectid; 5200 u32 num_refs; 5201 int lowest_level; 5202 int current_level; 5203 int shared_level; 5204 5205 struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; 5206 u64 new_nodes[BTRFS_MAX_LEVEL]; 5207 }; 5208 5209 struct disk_extent { 5210 u64 ram_bytes; 5211 u64 disk_bytenr; 5212 u64 disk_num_bytes; 5213 u64 offset; 5214 u64 num_bytes; 5215 u8 compression; 5216 u8 encryption; 5217 u16 other_encoding; 5218 }; 5219 5220 static int is_cowonly_root(u64 root_objectid) 5221 { 5222 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 5223 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 5224 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 5225 root_objectid == BTRFS_DEV_TREE_OBJECTID || 5226 root_objectid == BTRFS_TREE_LOG_OBJECTID || 5227 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 5228 return 1; 5229 return 0; 5230 } 5231 5232 static noinline int __next_ref_path(struct btrfs_trans_handle *trans, 5233 struct btrfs_root *extent_root, 5234 struct btrfs_ref_path *ref_path, 5235 int first_time) 5236 { 5237 struct extent_buffer *leaf; 5238 struct btrfs_path *path; 5239 struct btrfs_extent_ref *ref; 5240 struct btrfs_key key; 5241 struct btrfs_key found_key; 5242 u64 bytenr; 5243 u32 nritems; 5244 int level; 5245 int ret = 1; 5246 5247 path = btrfs_alloc_path(); 5248 if (!path) 5249 return -ENOMEM; 5250 5251 if (first_time) { 5252 ref_path->lowest_level = -1; 5253 ref_path->current_level = -1; 5254 ref_path->shared_level = -1; 5255 goto walk_up; 5256 } 5257 walk_down: 5258 level = ref_path->current_level - 1; 5259 while (level >= -1) { 5260 u64 parent; 5261 if (level < ref_path->lowest_level) 5262 break; 5263 5264 if (level >= 0) 5265 bytenr = ref_path->nodes[level]; 5266 else 5267 bytenr = ref_path->extent_start; 5268 BUG_ON(bytenr == 0); 5269 5270 parent = ref_path->nodes[level + 1]; 5271 ref_path->nodes[level + 1] = 0; 5272 ref_path->current_level = level; 5273 BUG_ON(parent == 0); 5274 5275 key.objectid = bytenr; 5276 key.offset = parent + 1; 5277 key.type = BTRFS_EXTENT_REF_KEY; 5278 5279 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5280 if (ret < 0) 5281 goto out; 5282 BUG_ON(ret == 0); 5283 5284 leaf = path->nodes[0]; 5285 nritems = btrfs_header_nritems(leaf); 5286 if (path->slots[0] >= nritems) { 5287 ret = btrfs_next_leaf(extent_root, path); 5288 if (ret < 0) 5289 goto out; 5290 if (ret > 0) 5291 goto next; 5292 leaf = path->nodes[0]; 5293 } 5294 5295 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5296 if (found_key.objectid == bytenr && 5297 found_key.type == BTRFS_EXTENT_REF_KEY) { 5298 if (level < ref_path->shared_level) 5299 ref_path->shared_level = level; 5300 goto found; 5301 } 5302 next: 5303 level--; 5304 btrfs_release_path(extent_root, path); 5305 cond_resched(); 5306 } 5307 /* reached lowest level */ 5308 ret = 1; 5309 goto out; 5310 walk_up: 5311 level = ref_path->current_level; 5312 while (level < BTRFS_MAX_LEVEL - 1) { 5313 u64 ref_objectid; 5314 5315 if (level >= 0) 5316 bytenr = ref_path->nodes[level]; 5317 else 5318 bytenr = ref_path->extent_start; 5319 5320 BUG_ON(bytenr == 0); 5321 5322 key.objectid = bytenr; 5323 key.offset = 0; 5324 key.type = BTRFS_EXTENT_REF_KEY; 5325 5326 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5327 if (ret < 0) 5328 goto out; 5329 5330 leaf = path->nodes[0]; 5331 nritems = btrfs_header_nritems(leaf); 5332 if (path->slots[0] >= nritems) { 5333 ret = btrfs_next_leaf(extent_root, path); 5334 if (ret < 0) 5335 goto out; 5336 if (ret > 0) { 5337 /* the extent was freed by someone */ 5338 if (ref_path->lowest_level == level) 5339 goto out; 5340 btrfs_release_path(extent_root, path); 5341 goto walk_down; 5342 } 5343 leaf = path->nodes[0]; 5344 } 5345 5346 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5347 if (found_key.objectid != bytenr || 5348 found_key.type != BTRFS_EXTENT_REF_KEY) { 5349 /* the extent was freed by someone */ 5350 if (ref_path->lowest_level == level) { 5351 ret = 1; 5352 goto out; 5353 } 5354 btrfs_release_path(extent_root, path); 5355 goto walk_down; 5356 } 5357 found: 5358 ref = btrfs_item_ptr(leaf, path->slots[0], 5359 struct btrfs_extent_ref); 5360 ref_objectid = btrfs_ref_objectid(leaf, ref); 5361 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { 5362 if (first_time) { 5363 level = (int)ref_objectid; 5364 BUG_ON(level >= BTRFS_MAX_LEVEL); 5365 ref_path->lowest_level = level; 5366 ref_path->current_level = level; 5367 ref_path->nodes[level] = bytenr; 5368 } else { 5369 WARN_ON(ref_objectid != level); 5370 } 5371 } else { 5372 WARN_ON(level != -1); 5373 } 5374 first_time = 0; 5375 5376 if (ref_path->lowest_level == level) { 5377 ref_path->owner_objectid = ref_objectid; 5378 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); 5379 } 5380 5381 /* 5382 * the block is tree root or the block isn't in reference 5383 * counted tree. 5384 */ 5385 if (found_key.objectid == found_key.offset || 5386 is_cowonly_root(btrfs_ref_root(leaf, ref))) { 5387 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 5388 ref_path->root_generation = 5389 btrfs_ref_generation(leaf, ref); 5390 if (level < 0) { 5391 /* special reference from the tree log */ 5392 ref_path->nodes[0] = found_key.offset; 5393 ref_path->current_level = 0; 5394 } 5395 ret = 0; 5396 goto out; 5397 } 5398 5399 level++; 5400 BUG_ON(ref_path->nodes[level] != 0); 5401 ref_path->nodes[level] = found_key.offset; 5402 ref_path->current_level = level; 5403 5404 /* 5405 * the reference was created in the running transaction, 5406 * no need to continue walking up. 5407 */ 5408 if (btrfs_ref_generation(leaf, ref) == trans->transid) { 5409 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 5410 ref_path->root_generation = 5411 btrfs_ref_generation(leaf, ref); 5412 ret = 0; 5413 goto out; 5414 } 5415 5416 btrfs_release_path(extent_root, path); 5417 cond_resched(); 5418 } 5419 /* reached max tree level, but no tree root found. */ 5420 BUG(); 5421 out: 5422 btrfs_free_path(path); 5423 return ret; 5424 } 5425 5426 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, 5427 struct btrfs_root *extent_root, 5428 struct btrfs_ref_path *ref_path, 5429 u64 extent_start) 5430 { 5431 memset(ref_path, 0, sizeof(*ref_path)); 5432 ref_path->extent_start = extent_start; 5433 5434 return __next_ref_path(trans, extent_root, ref_path, 1); 5435 } 5436 5437 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, 5438 struct btrfs_root *extent_root, 5439 struct btrfs_ref_path *ref_path) 5440 { 5441 return __next_ref_path(trans, extent_root, ref_path, 0); 5442 } 5443 5444 static noinline int get_new_locations(struct inode *reloc_inode, 5445 struct btrfs_key *extent_key, 5446 u64 offset, int no_fragment, 5447 struct disk_extent **extents, 5448 int *nr_extents) 5449 { 5450 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 5451 struct btrfs_path *path; 5452 struct btrfs_file_extent_item *fi; 5453 struct extent_buffer *leaf; 5454 struct disk_extent *exts = *extents; 5455 struct btrfs_key found_key; 5456 u64 cur_pos; 5457 u64 last_byte; 5458 u32 nritems; 5459 int nr = 0; 5460 int max = *nr_extents; 5461 int ret; 5462 5463 WARN_ON(!no_fragment && *extents); 5464 if (!exts) { 5465 max = 1; 5466 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); 5467 if (!exts) 5468 return -ENOMEM; 5469 } 5470 5471 path = btrfs_alloc_path(); 5472 BUG_ON(!path); 5473 5474 cur_pos = extent_key->objectid - offset; 5475 last_byte = extent_key->objectid + extent_key->offset; 5476 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 5477 cur_pos, 0); 5478 if (ret < 0) 5479 goto out; 5480 if (ret > 0) { 5481 ret = -ENOENT; 5482 goto out; 5483 } 5484 5485 while (1) { 5486 leaf = path->nodes[0]; 5487 nritems = btrfs_header_nritems(leaf); 5488 if (path->slots[0] >= nritems) { 5489 ret = btrfs_next_leaf(root, path); 5490 if (ret < 0) 5491 goto out; 5492 if (ret > 0) 5493 break; 5494 leaf = path->nodes[0]; 5495 } 5496 5497 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5498 if (found_key.offset != cur_pos || 5499 found_key.type != BTRFS_EXTENT_DATA_KEY || 5500 found_key.objectid != reloc_inode->i_ino) 5501 break; 5502 5503 fi = btrfs_item_ptr(leaf, path->slots[0], 5504 struct btrfs_file_extent_item); 5505 if (btrfs_file_extent_type(leaf, fi) != 5506 BTRFS_FILE_EXTENT_REG || 5507 btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 5508 break; 5509 5510 if (nr == max) { 5511 struct disk_extent *old = exts; 5512 max *= 2; 5513 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 5514 memcpy(exts, old, sizeof(*exts) * nr); 5515 if (old != *extents) 5516 kfree(old); 5517 } 5518 5519 exts[nr].disk_bytenr = 5520 btrfs_file_extent_disk_bytenr(leaf, fi); 5521 exts[nr].disk_num_bytes = 5522 btrfs_file_extent_disk_num_bytes(leaf, fi); 5523 exts[nr].offset = btrfs_file_extent_offset(leaf, fi); 5524 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5525 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 5526 exts[nr].compression = btrfs_file_extent_compression(leaf, fi); 5527 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); 5528 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, 5529 fi); 5530 BUG_ON(exts[nr].offset > 0); 5531 BUG_ON(exts[nr].compression || exts[nr].encryption); 5532 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); 5533 5534 cur_pos += exts[nr].num_bytes; 5535 nr++; 5536 5537 if (cur_pos + offset >= last_byte) 5538 break; 5539 5540 if (no_fragment) { 5541 ret = 1; 5542 goto out; 5543 } 5544 path->slots[0]++; 5545 } 5546 5547 BUG_ON(cur_pos + offset > last_byte); 5548 if (cur_pos + offset < last_byte) { 5549 ret = -ENOENT; 5550 goto out; 5551 } 5552 ret = 0; 5553 out: 5554 btrfs_free_path(path); 5555 if (ret) { 5556 if (exts != *extents) 5557 kfree(exts); 5558 } else { 5559 *extents = exts; 5560 *nr_extents = nr; 5561 } 5562 return ret; 5563 } 5564 5565 static noinline int replace_one_extent(struct btrfs_trans_handle *trans, 5566 struct btrfs_root *root, 5567 struct btrfs_path *path, 5568 struct btrfs_key *extent_key, 5569 struct btrfs_key *leaf_key, 5570 struct btrfs_ref_path *ref_path, 5571 struct disk_extent *new_extents, 5572 int nr_extents) 5573 { 5574 struct extent_buffer *leaf; 5575 struct btrfs_file_extent_item *fi; 5576 struct inode *inode = NULL; 5577 struct btrfs_key key; 5578 u64 lock_start = 0; 5579 u64 lock_end = 0; 5580 u64 num_bytes; 5581 u64 ext_offset; 5582 u64 search_end = (u64)-1; 5583 u32 nritems; 5584 int nr_scaned = 0; 5585 int extent_locked = 0; 5586 int extent_type; 5587 int ret; 5588 5589 memcpy(&key, leaf_key, sizeof(key)); 5590 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 5591 if (key.objectid < ref_path->owner_objectid || 5592 (key.objectid == ref_path->owner_objectid && 5593 key.type < BTRFS_EXTENT_DATA_KEY)) { 5594 key.objectid = ref_path->owner_objectid; 5595 key.type = BTRFS_EXTENT_DATA_KEY; 5596 key.offset = 0; 5597 } 5598 } 5599 5600 while (1) { 5601 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 5602 if (ret < 0) 5603 goto out; 5604 5605 leaf = path->nodes[0]; 5606 nritems = btrfs_header_nritems(leaf); 5607 next: 5608 if (extent_locked && ret > 0) { 5609 /* 5610 * the file extent item was modified by someone 5611 * before the extent got locked. 5612 */ 5613 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5614 lock_end, GFP_NOFS); 5615 extent_locked = 0; 5616 } 5617 5618 if (path->slots[0] >= nritems) { 5619 if (++nr_scaned > 2) 5620 break; 5621 5622 BUG_ON(extent_locked); 5623 ret = btrfs_next_leaf(root, path); 5624 if (ret < 0) 5625 goto out; 5626 if (ret > 0) 5627 break; 5628 leaf = path->nodes[0]; 5629 nritems = btrfs_header_nritems(leaf); 5630 } 5631 5632 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 5633 5634 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 5635 if ((key.objectid > ref_path->owner_objectid) || 5636 (key.objectid == ref_path->owner_objectid && 5637 key.type > BTRFS_EXTENT_DATA_KEY) || 5638 key.offset >= search_end) 5639 break; 5640 } 5641 5642 if (inode && key.objectid != inode->i_ino) { 5643 BUG_ON(extent_locked); 5644 btrfs_release_path(root, path); 5645 mutex_unlock(&inode->i_mutex); 5646 iput(inode); 5647 inode = NULL; 5648 continue; 5649 } 5650 5651 if (key.type != BTRFS_EXTENT_DATA_KEY) { 5652 path->slots[0]++; 5653 ret = 1; 5654 goto next; 5655 } 5656 fi = btrfs_item_ptr(leaf, path->slots[0], 5657 struct btrfs_file_extent_item); 5658 extent_type = btrfs_file_extent_type(leaf, fi); 5659 if ((extent_type != BTRFS_FILE_EXTENT_REG && 5660 extent_type != BTRFS_FILE_EXTENT_PREALLOC) || 5661 (btrfs_file_extent_disk_bytenr(leaf, fi) != 5662 extent_key->objectid)) { 5663 path->slots[0]++; 5664 ret = 1; 5665 goto next; 5666 } 5667 5668 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5669 ext_offset = btrfs_file_extent_offset(leaf, fi); 5670 5671 if (search_end == (u64)-1) { 5672 search_end = key.offset - ext_offset + 5673 btrfs_file_extent_ram_bytes(leaf, fi); 5674 } 5675 5676 if (!extent_locked) { 5677 lock_start = key.offset; 5678 lock_end = lock_start + num_bytes - 1; 5679 } else { 5680 if (lock_start > key.offset || 5681 lock_end + 1 < key.offset + num_bytes) { 5682 unlock_extent(&BTRFS_I(inode)->io_tree, 5683 lock_start, lock_end, GFP_NOFS); 5684 extent_locked = 0; 5685 } 5686 } 5687 5688 if (!inode) { 5689 btrfs_release_path(root, path); 5690 5691 inode = btrfs_iget_locked(root->fs_info->sb, 5692 key.objectid, root); 5693 if (inode->i_state & I_NEW) { 5694 BTRFS_I(inode)->root = root; 5695 BTRFS_I(inode)->location.objectid = 5696 key.objectid; 5697 BTRFS_I(inode)->location.type = 5698 BTRFS_INODE_ITEM_KEY; 5699 BTRFS_I(inode)->location.offset = 0; 5700 btrfs_read_locked_inode(inode); 5701 unlock_new_inode(inode); 5702 } 5703 /* 5704 * some code call btrfs_commit_transaction while 5705 * holding the i_mutex, so we can't use mutex_lock 5706 * here. 5707 */ 5708 if (is_bad_inode(inode) || 5709 !mutex_trylock(&inode->i_mutex)) { 5710 iput(inode); 5711 inode = NULL; 5712 key.offset = (u64)-1; 5713 goto skip; 5714 } 5715 } 5716 5717 if (!extent_locked) { 5718 struct btrfs_ordered_extent *ordered; 5719 5720 btrfs_release_path(root, path); 5721 5722 lock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5723 lock_end, GFP_NOFS); 5724 ordered = btrfs_lookup_first_ordered_extent(inode, 5725 lock_end); 5726 if (ordered && 5727 ordered->file_offset <= lock_end && 5728 ordered->file_offset + ordered->len > lock_start) { 5729 unlock_extent(&BTRFS_I(inode)->io_tree, 5730 lock_start, lock_end, GFP_NOFS); 5731 btrfs_start_ordered_extent(inode, ordered, 1); 5732 btrfs_put_ordered_extent(ordered); 5733 key.offset += num_bytes; 5734 goto skip; 5735 } 5736 if (ordered) 5737 btrfs_put_ordered_extent(ordered); 5738 5739 extent_locked = 1; 5740 continue; 5741 } 5742 5743 if (nr_extents == 1) { 5744 /* update extent pointer in place */ 5745 btrfs_set_file_extent_disk_bytenr(leaf, fi, 5746 new_extents[0].disk_bytenr); 5747 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 5748 new_extents[0].disk_num_bytes); 5749 btrfs_mark_buffer_dirty(leaf); 5750 5751 btrfs_drop_extent_cache(inode, key.offset, 5752 key.offset + num_bytes - 1, 0); 5753 5754 ret = btrfs_inc_extent_ref(trans, root, 5755 new_extents[0].disk_bytenr, 5756 new_extents[0].disk_num_bytes, 5757 leaf->start, 5758 root->root_key.objectid, 5759 trans->transid, 5760 key.objectid); 5761 BUG_ON(ret); 5762 5763 ret = btrfs_free_extent(trans, root, 5764 extent_key->objectid, 5765 extent_key->offset, 5766 leaf->start, 5767 btrfs_header_owner(leaf), 5768 btrfs_header_generation(leaf), 5769 key.objectid, 0); 5770 BUG_ON(ret); 5771 5772 btrfs_release_path(root, path); 5773 key.offset += num_bytes; 5774 } else { 5775 BUG_ON(1); 5776 #if 0 5777 u64 alloc_hint; 5778 u64 extent_len; 5779 int i; 5780 /* 5781 * drop old extent pointer at first, then insert the 5782 * new pointers one bye one 5783 */ 5784 btrfs_release_path(root, path); 5785 ret = btrfs_drop_extents(trans, root, inode, key.offset, 5786 key.offset + num_bytes, 5787 key.offset, &alloc_hint); 5788 BUG_ON(ret); 5789 5790 for (i = 0; i < nr_extents; i++) { 5791 if (ext_offset >= new_extents[i].num_bytes) { 5792 ext_offset -= new_extents[i].num_bytes; 5793 continue; 5794 } 5795 extent_len = min(new_extents[i].num_bytes - 5796 ext_offset, num_bytes); 5797 5798 ret = btrfs_insert_empty_item(trans, root, 5799 path, &key, 5800 sizeof(*fi)); 5801 BUG_ON(ret); 5802 5803 leaf = path->nodes[0]; 5804 fi = btrfs_item_ptr(leaf, path->slots[0], 5805 struct btrfs_file_extent_item); 5806 btrfs_set_file_extent_generation(leaf, fi, 5807 trans->transid); 5808 btrfs_set_file_extent_type(leaf, fi, 5809 BTRFS_FILE_EXTENT_REG); 5810 btrfs_set_file_extent_disk_bytenr(leaf, fi, 5811 new_extents[i].disk_bytenr); 5812 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 5813 new_extents[i].disk_num_bytes); 5814 btrfs_set_file_extent_ram_bytes(leaf, fi, 5815 new_extents[i].ram_bytes); 5816 5817 btrfs_set_file_extent_compression(leaf, fi, 5818 new_extents[i].compression); 5819 btrfs_set_file_extent_encryption(leaf, fi, 5820 new_extents[i].encryption); 5821 btrfs_set_file_extent_other_encoding(leaf, fi, 5822 new_extents[i].other_encoding); 5823 5824 btrfs_set_file_extent_num_bytes(leaf, fi, 5825 extent_len); 5826 ext_offset += new_extents[i].offset; 5827 btrfs_set_file_extent_offset(leaf, fi, 5828 ext_offset); 5829 btrfs_mark_buffer_dirty(leaf); 5830 5831 btrfs_drop_extent_cache(inode, key.offset, 5832 key.offset + extent_len - 1, 0); 5833 5834 ret = btrfs_inc_extent_ref(trans, root, 5835 new_extents[i].disk_bytenr, 5836 new_extents[i].disk_num_bytes, 5837 leaf->start, 5838 root->root_key.objectid, 5839 trans->transid, key.objectid); 5840 BUG_ON(ret); 5841 btrfs_release_path(root, path); 5842 5843 inode_add_bytes(inode, extent_len); 5844 5845 ext_offset = 0; 5846 num_bytes -= extent_len; 5847 key.offset += extent_len; 5848 5849 if (num_bytes == 0) 5850 break; 5851 } 5852 BUG_ON(i >= nr_extents); 5853 #endif 5854 } 5855 5856 if (extent_locked) { 5857 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5858 lock_end, GFP_NOFS); 5859 extent_locked = 0; 5860 } 5861 skip: 5862 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 5863 key.offset >= search_end) 5864 break; 5865 5866 cond_resched(); 5867 } 5868 ret = 0; 5869 out: 5870 btrfs_release_path(root, path); 5871 if (inode) { 5872 mutex_unlock(&inode->i_mutex); 5873 if (extent_locked) { 5874 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5875 lock_end, GFP_NOFS); 5876 } 5877 iput(inode); 5878 } 5879 return ret; 5880 } 5881 5882 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 5883 struct btrfs_root *root, 5884 struct extent_buffer *buf, u64 orig_start) 5885 { 5886 int level; 5887 int ret; 5888 5889 BUG_ON(btrfs_header_generation(buf) != trans->transid); 5890 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 5891 5892 level = btrfs_header_level(buf); 5893 if (level == 0) { 5894 struct btrfs_leaf_ref *ref; 5895 struct btrfs_leaf_ref *orig_ref; 5896 5897 orig_ref = btrfs_lookup_leaf_ref(root, orig_start); 5898 if (!orig_ref) 5899 return -ENOENT; 5900 5901 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); 5902 if (!ref) { 5903 btrfs_free_leaf_ref(root, orig_ref); 5904 return -ENOMEM; 5905 } 5906 5907 ref->nritems = orig_ref->nritems; 5908 memcpy(ref->extents, orig_ref->extents, 5909 sizeof(ref->extents[0]) * ref->nritems); 5910 5911 btrfs_free_leaf_ref(root, orig_ref); 5912 5913 ref->root_gen = trans->transid; 5914 ref->bytenr = buf->start; 5915 ref->owner = btrfs_header_owner(buf); 5916 ref->generation = btrfs_header_generation(buf); 5917 5918 ret = btrfs_add_leaf_ref(root, ref, 0); 5919 WARN_ON(ret); 5920 btrfs_free_leaf_ref(root, ref); 5921 } 5922 return 0; 5923 } 5924 5925 static noinline int invalidate_extent_cache(struct btrfs_root *root, 5926 struct extent_buffer *leaf, 5927 struct btrfs_block_group_cache *group, 5928 struct btrfs_root *target_root) 5929 { 5930 struct btrfs_key key; 5931 struct inode *inode = NULL; 5932 struct btrfs_file_extent_item *fi; 5933 u64 num_bytes; 5934 u64 skip_objectid = 0; 5935 u32 nritems; 5936 u32 i; 5937 5938 nritems = btrfs_header_nritems(leaf); 5939 for (i = 0; i < nritems; i++) { 5940 btrfs_item_key_to_cpu(leaf, &key, i); 5941 if (key.objectid == skip_objectid || 5942 key.type != BTRFS_EXTENT_DATA_KEY) 5943 continue; 5944 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 5945 if (btrfs_file_extent_type(leaf, fi) == 5946 BTRFS_FILE_EXTENT_INLINE) 5947 continue; 5948 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 5949 continue; 5950 if (!inode || inode->i_ino != key.objectid) { 5951 iput(inode); 5952 inode = btrfs_ilookup(target_root->fs_info->sb, 5953 key.objectid, target_root, 1); 5954 } 5955 if (!inode) { 5956 skip_objectid = key.objectid; 5957 continue; 5958 } 5959 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5960 5961 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 5962 key.offset + num_bytes - 1, GFP_NOFS); 5963 btrfs_drop_extent_cache(inode, key.offset, 5964 key.offset + num_bytes - 1, 1); 5965 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 5966 key.offset + num_bytes - 1, GFP_NOFS); 5967 cond_resched(); 5968 } 5969 iput(inode); 5970 return 0; 5971 } 5972 5973 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, 5974 struct btrfs_root *root, 5975 struct extent_buffer *leaf, 5976 struct btrfs_block_group_cache *group, 5977 struct inode *reloc_inode) 5978 { 5979 struct btrfs_key key; 5980 struct btrfs_key extent_key; 5981 struct btrfs_file_extent_item *fi; 5982 struct btrfs_leaf_ref *ref; 5983 struct disk_extent *new_extent; 5984 u64 bytenr; 5985 u64 num_bytes; 5986 u32 nritems; 5987 u32 i; 5988 int ext_index; 5989 int nr_extent; 5990 int ret; 5991 5992 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 5993 BUG_ON(!new_extent); 5994 5995 ref = btrfs_lookup_leaf_ref(root, leaf->start); 5996 BUG_ON(!ref); 5997 5998 ext_index = -1; 5999 nritems = btrfs_header_nritems(leaf); 6000 for (i = 0; i < nritems; i++) { 6001 btrfs_item_key_to_cpu(leaf, &key, i); 6002 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 6003 continue; 6004 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 6005 if (btrfs_file_extent_type(leaf, fi) == 6006 BTRFS_FILE_EXTENT_INLINE) 6007 continue; 6008 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 6009 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 6010 if (bytenr == 0) 6011 continue; 6012 6013 ext_index++; 6014 if (bytenr >= group->key.objectid + group->key.offset || 6015 bytenr + num_bytes <= group->key.objectid) 6016 continue; 6017 6018 extent_key.objectid = bytenr; 6019 extent_key.offset = num_bytes; 6020 extent_key.type = BTRFS_EXTENT_ITEM_KEY; 6021 nr_extent = 1; 6022 ret = get_new_locations(reloc_inode, &extent_key, 6023 group->key.objectid, 1, 6024 &new_extent, &nr_extent); 6025 if (ret > 0) 6026 continue; 6027 BUG_ON(ret < 0); 6028 6029 BUG_ON(ref->extents[ext_index].bytenr != bytenr); 6030 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); 6031 ref->extents[ext_index].bytenr = new_extent->disk_bytenr; 6032 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; 6033 6034 btrfs_set_file_extent_disk_bytenr(leaf, fi, 6035 new_extent->disk_bytenr); 6036 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 6037 new_extent->disk_num_bytes); 6038 btrfs_mark_buffer_dirty(leaf); 6039 6040 ret = btrfs_inc_extent_ref(trans, root, 6041 new_extent->disk_bytenr, 6042 new_extent->disk_num_bytes, 6043 leaf->start, 6044 root->root_key.objectid, 6045 trans->transid, key.objectid); 6046 BUG_ON(ret); 6047 6048 ret = btrfs_free_extent(trans, root, 6049 bytenr, num_bytes, leaf->start, 6050 btrfs_header_owner(leaf), 6051 btrfs_header_generation(leaf), 6052 key.objectid, 0); 6053 BUG_ON(ret); 6054 cond_resched(); 6055 } 6056 kfree(new_extent); 6057 BUG_ON(ext_index + 1 != ref->nritems); 6058 btrfs_free_leaf_ref(root, ref); 6059 return 0; 6060 } 6061 6062 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 6063 struct btrfs_root *root) 6064 { 6065 struct btrfs_root *reloc_root; 6066 int ret; 6067 6068 if (root->reloc_root) { 6069 reloc_root = root->reloc_root; 6070 root->reloc_root = NULL; 6071 list_add(&reloc_root->dead_list, 6072 &root->fs_info->dead_reloc_roots); 6073 6074 btrfs_set_root_bytenr(&reloc_root->root_item, 6075 reloc_root->node->start); 6076 btrfs_set_root_level(&root->root_item, 6077 btrfs_header_level(reloc_root->node)); 6078 memset(&reloc_root->root_item.drop_progress, 0, 6079 sizeof(struct btrfs_disk_key)); 6080 reloc_root->root_item.drop_level = 0; 6081 6082 ret = btrfs_update_root(trans, root->fs_info->tree_root, 6083 &reloc_root->root_key, 6084 &reloc_root->root_item); 6085 BUG_ON(ret); 6086 } 6087 return 0; 6088 } 6089 6090 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) 6091 { 6092 struct btrfs_trans_handle *trans; 6093 struct btrfs_root *reloc_root; 6094 struct btrfs_root *prev_root = NULL; 6095 struct list_head dead_roots; 6096 int ret; 6097 unsigned long nr; 6098 6099 INIT_LIST_HEAD(&dead_roots); 6100 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); 6101 6102 while (!list_empty(&dead_roots)) { 6103 reloc_root = list_entry(dead_roots.prev, 6104 struct btrfs_root, dead_list); 6105 list_del_init(&reloc_root->dead_list); 6106 6107 BUG_ON(reloc_root->commit_root != NULL); 6108 while (1) { 6109 trans = btrfs_join_transaction(root, 1); 6110 BUG_ON(!trans); 6111 6112 mutex_lock(&root->fs_info->drop_mutex); 6113 ret = btrfs_drop_snapshot(trans, reloc_root); 6114 if (ret != -EAGAIN) 6115 break; 6116 mutex_unlock(&root->fs_info->drop_mutex); 6117 6118 nr = trans->blocks_used; 6119 ret = btrfs_end_transaction(trans, root); 6120 BUG_ON(ret); 6121 btrfs_btree_balance_dirty(root, nr); 6122 } 6123 6124 free_extent_buffer(reloc_root->node); 6125 6126 ret = btrfs_del_root(trans, root->fs_info->tree_root, 6127 &reloc_root->root_key); 6128 BUG_ON(ret); 6129 mutex_unlock(&root->fs_info->drop_mutex); 6130 6131 nr = trans->blocks_used; 6132 ret = btrfs_end_transaction(trans, root); 6133 BUG_ON(ret); 6134 btrfs_btree_balance_dirty(root, nr); 6135 6136 kfree(prev_root); 6137 prev_root = reloc_root; 6138 } 6139 if (prev_root) { 6140 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); 6141 kfree(prev_root); 6142 } 6143 return 0; 6144 } 6145 6146 int btrfs_add_dead_reloc_root(struct btrfs_root *root) 6147 { 6148 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); 6149 return 0; 6150 } 6151 6152 int btrfs_cleanup_reloc_trees(struct btrfs_root *root) 6153 { 6154 struct btrfs_root *reloc_root; 6155 struct btrfs_trans_handle *trans; 6156 struct btrfs_key location; 6157 int found; 6158 int ret; 6159 6160 mutex_lock(&root->fs_info->tree_reloc_mutex); 6161 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); 6162 BUG_ON(ret); 6163 found = !list_empty(&root->fs_info->dead_reloc_roots); 6164 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6165 6166 if (found) { 6167 trans = btrfs_start_transaction(root, 1); 6168 BUG_ON(!trans); 6169 ret = btrfs_commit_transaction(trans, root); 6170 BUG_ON(ret); 6171 } 6172 6173 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6174 location.offset = (u64)-1; 6175 location.type = BTRFS_ROOT_ITEM_KEY; 6176 6177 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 6178 BUG_ON(!reloc_root); 6179 btrfs_orphan_cleanup(reloc_root); 6180 return 0; 6181 } 6182 6183 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, 6184 struct btrfs_root *root) 6185 { 6186 struct btrfs_root *reloc_root; 6187 struct extent_buffer *eb; 6188 struct btrfs_root_item *root_item; 6189 struct btrfs_key root_key; 6190 int ret; 6191 6192 BUG_ON(!root->ref_cows); 6193 if (root->reloc_root) 6194 return 0; 6195 6196 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 6197 BUG_ON(!root_item); 6198 6199 ret = btrfs_copy_root(trans, root, root->commit_root, 6200 &eb, BTRFS_TREE_RELOC_OBJECTID); 6201 BUG_ON(ret); 6202 6203 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 6204 root_key.offset = root->root_key.objectid; 6205 root_key.type = BTRFS_ROOT_ITEM_KEY; 6206 6207 memcpy(root_item, &root->root_item, sizeof(root_item)); 6208 btrfs_set_root_refs(root_item, 0); 6209 btrfs_set_root_bytenr(root_item, eb->start); 6210 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 6211 btrfs_set_root_generation(root_item, trans->transid); 6212 6213 btrfs_tree_unlock(eb); 6214 free_extent_buffer(eb); 6215 6216 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 6217 &root_key, root_item); 6218 BUG_ON(ret); 6219 kfree(root_item); 6220 6221 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 6222 &root_key); 6223 BUG_ON(!reloc_root); 6224 reloc_root->last_trans = trans->transid; 6225 reloc_root->commit_root = NULL; 6226 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 6227 6228 root->reloc_root = reloc_root; 6229 return 0; 6230 } 6231 6232 /* 6233 * Core function of space balance. 6234 * 6235 * The idea is using reloc trees to relocate tree blocks in reference 6236 * counted roots. There is one reloc tree for each subvol, and all 6237 * reloc trees share same root key objectid. Reloc trees are snapshots 6238 * of the latest committed roots of subvols (root->commit_root). 6239 * 6240 * To relocate a tree block referenced by a subvol, there are two steps. 6241 * COW the block through subvol's reloc tree, then update block pointer 6242 * in the subvol to point to the new block. Since all reloc trees share 6243 * same root key objectid, doing special handing for tree blocks owned 6244 * by them is easy. Once a tree block has been COWed in one reloc tree, 6245 * we can use the resulting new block directly when the same block is 6246 * required to COW again through other reloc trees. By this way, relocated 6247 * tree blocks are shared between reloc trees, so they are also shared 6248 * between subvols. 6249 */ 6250 static noinline int relocate_one_path(struct btrfs_trans_handle *trans, 6251 struct btrfs_root *root, 6252 struct btrfs_path *path, 6253 struct btrfs_key *first_key, 6254 struct btrfs_ref_path *ref_path, 6255 struct btrfs_block_group_cache *group, 6256 struct inode *reloc_inode) 6257 { 6258 struct btrfs_root *reloc_root; 6259 struct extent_buffer *eb = NULL; 6260 struct btrfs_key *keys; 6261 u64 *nodes; 6262 int level; 6263 int shared_level; 6264 int lowest_level = 0; 6265 int ret; 6266 6267 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 6268 lowest_level = ref_path->owner_objectid; 6269 6270 if (!root->ref_cows) { 6271 path->lowest_level = lowest_level; 6272 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); 6273 BUG_ON(ret < 0); 6274 path->lowest_level = 0; 6275 btrfs_release_path(root, path); 6276 return 0; 6277 } 6278 6279 mutex_lock(&root->fs_info->tree_reloc_mutex); 6280 ret = init_reloc_tree(trans, root); 6281 BUG_ON(ret); 6282 reloc_root = root->reloc_root; 6283 6284 shared_level = ref_path->shared_level; 6285 ref_path->shared_level = BTRFS_MAX_LEVEL - 1; 6286 6287 keys = ref_path->node_keys; 6288 nodes = ref_path->new_nodes; 6289 memset(&keys[shared_level + 1], 0, 6290 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6291 memset(&nodes[shared_level + 1], 0, 6292 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6293 6294 if (nodes[lowest_level] == 0) { 6295 path->lowest_level = lowest_level; 6296 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6297 0, 1); 6298 BUG_ON(ret); 6299 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { 6300 eb = path->nodes[level]; 6301 if (!eb || eb == reloc_root->node) 6302 break; 6303 nodes[level] = eb->start; 6304 if (level == 0) 6305 btrfs_item_key_to_cpu(eb, &keys[level], 0); 6306 else 6307 btrfs_node_key_to_cpu(eb, &keys[level], 0); 6308 } 6309 if (nodes[0] && 6310 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6311 eb = path->nodes[0]; 6312 ret = replace_extents_in_leaf(trans, reloc_root, eb, 6313 group, reloc_inode); 6314 BUG_ON(ret); 6315 } 6316 btrfs_release_path(reloc_root, path); 6317 } else { 6318 ret = btrfs_merge_path(trans, reloc_root, keys, nodes, 6319 lowest_level); 6320 BUG_ON(ret); 6321 } 6322 6323 /* 6324 * replace tree blocks in the fs tree with tree blocks in 6325 * the reloc tree. 6326 */ 6327 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); 6328 BUG_ON(ret < 0); 6329 6330 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6331 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6332 0, 0); 6333 BUG_ON(ret); 6334 extent_buffer_get(path->nodes[0]); 6335 eb = path->nodes[0]; 6336 btrfs_release_path(reloc_root, path); 6337 ret = invalidate_extent_cache(reloc_root, eb, group, root); 6338 BUG_ON(ret); 6339 free_extent_buffer(eb); 6340 } 6341 6342 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6343 path->lowest_level = 0; 6344 return 0; 6345 } 6346 6347 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, 6348 struct btrfs_root *root, 6349 struct btrfs_path *path, 6350 struct btrfs_key *first_key, 6351 struct btrfs_ref_path *ref_path) 6352 { 6353 int ret; 6354 6355 ret = relocate_one_path(trans, root, path, first_key, 6356 ref_path, NULL, NULL); 6357 BUG_ON(ret); 6358 6359 return 0; 6360 } 6361 6362 static noinline int del_extent_zero(struct btrfs_trans_handle *trans, 6363 struct btrfs_root *extent_root, 6364 struct btrfs_path *path, 6365 struct btrfs_key *extent_key) 6366 { 6367 int ret; 6368 6369 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 6370 if (ret) 6371 goto out; 6372 ret = btrfs_del_item(trans, extent_root, path); 6373 out: 6374 btrfs_release_path(extent_root, path); 6375 return ret; 6376 } 6377 6378 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, 6379 struct btrfs_ref_path *ref_path) 6380 { 6381 struct btrfs_key root_key; 6382 6383 root_key.objectid = ref_path->root_objectid; 6384 root_key.type = BTRFS_ROOT_ITEM_KEY; 6385 if (is_cowonly_root(ref_path->root_objectid)) 6386 root_key.offset = 0; 6387 else 6388 root_key.offset = (u64)-1; 6389 6390 return btrfs_read_fs_root_no_name(fs_info, &root_key); 6391 } 6392 6393 static noinline int relocate_one_extent(struct btrfs_root *extent_root, 6394 struct btrfs_path *path, 6395 struct btrfs_key *extent_key, 6396 struct btrfs_block_group_cache *group, 6397 struct inode *reloc_inode, int pass) 6398 { 6399 struct btrfs_trans_handle *trans; 6400 struct btrfs_root *found_root; 6401 struct btrfs_ref_path *ref_path = NULL; 6402 struct disk_extent *new_extents = NULL; 6403 int nr_extents = 0; 6404 int loops; 6405 int ret; 6406 int level; 6407 struct btrfs_key first_key; 6408 u64 prev_block = 0; 6409 6410 6411 trans = btrfs_start_transaction(extent_root, 1); 6412 BUG_ON(!trans); 6413 6414 if (extent_key->objectid == 0) { 6415 ret = del_extent_zero(trans, extent_root, path, extent_key); 6416 goto out; 6417 } 6418 6419 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); 6420 if (!ref_path) { 6421 ret = -ENOMEM; 6422 goto out; 6423 } 6424 6425 for (loops = 0; ; loops++) { 6426 if (loops == 0) { 6427 ret = btrfs_first_ref_path(trans, extent_root, ref_path, 6428 extent_key->objectid); 6429 } else { 6430 ret = btrfs_next_ref_path(trans, extent_root, ref_path); 6431 } 6432 if (ret < 0) 6433 goto out; 6434 if (ret > 0) 6435 break; 6436 6437 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || 6438 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) 6439 continue; 6440 6441 found_root = read_ref_root(extent_root->fs_info, ref_path); 6442 BUG_ON(!found_root); 6443 /* 6444 * for reference counted tree, only process reference paths 6445 * rooted at the latest committed root. 6446 */ 6447 if (found_root->ref_cows && 6448 ref_path->root_generation != found_root->root_key.offset) 6449 continue; 6450 6451 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6452 if (pass == 0) { 6453 /* 6454 * copy data extents to new locations 6455 */ 6456 u64 group_start = group->key.objectid; 6457 ret = relocate_data_extent(reloc_inode, 6458 extent_key, 6459 group_start); 6460 if (ret < 0) 6461 goto out; 6462 break; 6463 } 6464 level = 0; 6465 } else { 6466 level = ref_path->owner_objectid; 6467 } 6468 6469 if (prev_block != ref_path->nodes[level]) { 6470 struct extent_buffer *eb; 6471 u64 block_start = ref_path->nodes[level]; 6472 u64 block_size = btrfs_level_size(found_root, level); 6473 6474 eb = read_tree_block(found_root, block_start, 6475 block_size, 0); 6476 btrfs_tree_lock(eb); 6477 BUG_ON(level != btrfs_header_level(eb)); 6478 6479 if (level == 0) 6480 btrfs_item_key_to_cpu(eb, &first_key, 0); 6481 else 6482 btrfs_node_key_to_cpu(eb, &first_key, 0); 6483 6484 btrfs_tree_unlock(eb); 6485 free_extent_buffer(eb); 6486 prev_block = block_start; 6487 } 6488 6489 mutex_lock(&extent_root->fs_info->trans_mutex); 6490 btrfs_record_root_in_trans(found_root); 6491 mutex_unlock(&extent_root->fs_info->trans_mutex); 6492 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6493 /* 6494 * try to update data extent references while 6495 * keeping metadata shared between snapshots. 6496 */ 6497 if (pass == 1) { 6498 ret = relocate_one_path(trans, found_root, 6499 path, &first_key, ref_path, 6500 group, reloc_inode); 6501 if (ret < 0) 6502 goto out; 6503 continue; 6504 } 6505 /* 6506 * use fallback method to process the remaining 6507 * references. 6508 */ 6509 if (!new_extents) { 6510 u64 group_start = group->key.objectid; 6511 new_extents = kmalloc(sizeof(*new_extents), 6512 GFP_NOFS); 6513 nr_extents = 1; 6514 ret = get_new_locations(reloc_inode, 6515 extent_key, 6516 group_start, 1, 6517 &new_extents, 6518 &nr_extents); 6519 if (ret) 6520 goto out; 6521 } 6522 ret = replace_one_extent(trans, found_root, 6523 path, extent_key, 6524 &first_key, ref_path, 6525 new_extents, nr_extents); 6526 } else { 6527 ret = relocate_tree_block(trans, found_root, path, 6528 &first_key, ref_path); 6529 } 6530 if (ret < 0) 6531 goto out; 6532 } 6533 ret = 0; 6534 out: 6535 btrfs_end_transaction(trans, extent_root); 6536 kfree(new_extents); 6537 kfree(ref_path); 6538 return ret; 6539 } 6540 #endif 6541 6542 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6543 { 6544 u64 num_devices; 6545 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 6546 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 6547 6548 num_devices = root->fs_info->fs_devices->rw_devices; 6549 if (num_devices == 1) { 6550 stripped |= BTRFS_BLOCK_GROUP_DUP; 6551 stripped = flags & ~stripped; 6552 6553 /* turn raid0 into single device chunks */ 6554 if (flags & BTRFS_BLOCK_GROUP_RAID0) 6555 return stripped; 6556 6557 /* turn mirroring into duplication */ 6558 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 6559 BTRFS_BLOCK_GROUP_RAID10)) 6560 return stripped | BTRFS_BLOCK_GROUP_DUP; 6561 return flags; 6562 } else { 6563 /* they already had raid on here, just return */ 6564 if (flags & stripped) 6565 return flags; 6566 6567 stripped |= BTRFS_BLOCK_GROUP_DUP; 6568 stripped = flags & ~stripped; 6569 6570 /* switch duplicated blocks with raid1 */ 6571 if (flags & BTRFS_BLOCK_GROUP_DUP) 6572 return stripped | BTRFS_BLOCK_GROUP_RAID1; 6573 6574 /* turn single device chunks into raid0 */ 6575 return stripped | BTRFS_BLOCK_GROUP_RAID0; 6576 } 6577 return flags; 6578 } 6579 6580 static int __alloc_chunk_for_shrink(struct btrfs_root *root, 6581 struct btrfs_block_group_cache *shrink_block_group, 6582 int force) 6583 { 6584 struct btrfs_trans_handle *trans; 6585 u64 new_alloc_flags; 6586 u64 calc; 6587 6588 spin_lock(&shrink_block_group->lock); 6589 if (btrfs_block_group_used(&shrink_block_group->item) + 6590 shrink_block_group->reserved > 0) { 6591 spin_unlock(&shrink_block_group->lock); 6592 6593 trans = btrfs_start_transaction(root, 1); 6594 spin_lock(&shrink_block_group->lock); 6595 6596 new_alloc_flags = update_block_group_flags(root, 6597 shrink_block_group->flags); 6598 if (new_alloc_flags != shrink_block_group->flags) { 6599 calc = 6600 btrfs_block_group_used(&shrink_block_group->item); 6601 } else { 6602 calc = shrink_block_group->key.offset; 6603 } 6604 spin_unlock(&shrink_block_group->lock); 6605 6606 do_chunk_alloc(trans, root->fs_info->extent_root, 6607 calc + 2 * 1024 * 1024, new_alloc_flags, force); 6608 6609 btrfs_end_transaction(trans, root); 6610 } else 6611 spin_unlock(&shrink_block_group->lock); 6612 return 0; 6613 } 6614 6615 6616 int btrfs_prepare_block_group_relocation(struct btrfs_root *root, 6617 struct btrfs_block_group_cache *group) 6618 6619 { 6620 __alloc_chunk_for_shrink(root, group, 1); 6621 set_block_group_readonly(group); 6622 return 0; 6623 } 6624 6625 #if 0 6626 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 6627 struct btrfs_root *root, 6628 u64 objectid, u64 size) 6629 { 6630 struct btrfs_path *path; 6631 struct btrfs_inode_item *item; 6632 struct extent_buffer *leaf; 6633 int ret; 6634 6635 path = btrfs_alloc_path(); 6636 if (!path) 6637 return -ENOMEM; 6638 6639 path->leave_spinning = 1; 6640 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 6641 if (ret) 6642 goto out; 6643 6644 leaf = path->nodes[0]; 6645 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 6646 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 6647 btrfs_set_inode_generation(leaf, item, 1); 6648 btrfs_set_inode_size(leaf, item, size); 6649 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 6650 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 6651 btrfs_mark_buffer_dirty(leaf); 6652 btrfs_release_path(root, path); 6653 out: 6654 btrfs_free_path(path); 6655 return ret; 6656 } 6657 6658 static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 6659 struct btrfs_block_group_cache *group) 6660 { 6661 struct inode *inode = NULL; 6662 struct btrfs_trans_handle *trans; 6663 struct btrfs_root *root; 6664 struct btrfs_key root_key; 6665 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 6666 int err = 0; 6667 6668 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6669 root_key.type = BTRFS_ROOT_ITEM_KEY; 6670 root_key.offset = (u64)-1; 6671 root = btrfs_read_fs_root_no_name(fs_info, &root_key); 6672 if (IS_ERR(root)) 6673 return ERR_CAST(root); 6674 6675 trans = btrfs_start_transaction(root, 1); 6676 BUG_ON(!trans); 6677 6678 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 6679 if (err) 6680 goto out; 6681 6682 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 6683 BUG_ON(err); 6684 6685 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, 6686 group->key.offset, 0, group->key.offset, 6687 0, 0, 0); 6688 BUG_ON(err); 6689 6690 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); 6691 if (inode->i_state & I_NEW) { 6692 BTRFS_I(inode)->root = root; 6693 BTRFS_I(inode)->location.objectid = objectid; 6694 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 6695 BTRFS_I(inode)->location.offset = 0; 6696 btrfs_read_locked_inode(inode); 6697 unlock_new_inode(inode); 6698 BUG_ON(is_bad_inode(inode)); 6699 } else { 6700 BUG_ON(1); 6701 } 6702 BTRFS_I(inode)->index_cnt = group->key.objectid; 6703 6704 err = btrfs_orphan_add(trans, inode); 6705 out: 6706 btrfs_end_transaction(trans, root); 6707 if (err) { 6708 if (inode) 6709 iput(inode); 6710 inode = ERR_PTR(err); 6711 } 6712 return inode; 6713 } 6714 6715 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 6716 { 6717 6718 struct btrfs_ordered_sum *sums; 6719 struct btrfs_sector_sum *sector_sum; 6720 struct btrfs_ordered_extent *ordered; 6721 struct btrfs_root *root = BTRFS_I(inode)->root; 6722 struct list_head list; 6723 size_t offset; 6724 int ret; 6725 u64 disk_bytenr; 6726 6727 INIT_LIST_HEAD(&list); 6728 6729 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 6730 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 6731 6732 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 6733 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 6734 disk_bytenr + len - 1, &list); 6735 6736 while (!list_empty(&list)) { 6737 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 6738 list_del_init(&sums->list); 6739 6740 sector_sum = sums->sums; 6741 sums->bytenr = ordered->start; 6742 6743 offset = 0; 6744 while (offset < sums->len) { 6745 sector_sum->bytenr += ordered->start - disk_bytenr; 6746 sector_sum++; 6747 offset += root->sectorsize; 6748 } 6749 6750 btrfs_add_ordered_sum(inode, ordered, sums); 6751 } 6752 btrfs_put_ordered_extent(ordered); 6753 return 0; 6754 } 6755 6756 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) 6757 { 6758 struct btrfs_trans_handle *trans; 6759 struct btrfs_path *path; 6760 struct btrfs_fs_info *info = root->fs_info; 6761 struct extent_buffer *leaf; 6762 struct inode *reloc_inode; 6763 struct btrfs_block_group_cache *block_group; 6764 struct btrfs_key key; 6765 u64 skipped; 6766 u64 cur_byte; 6767 u64 total_found; 6768 u32 nritems; 6769 int ret; 6770 int progress; 6771 int pass = 0; 6772 6773 root = root->fs_info->extent_root; 6774 6775 block_group = btrfs_lookup_block_group(info, group_start); 6776 BUG_ON(!block_group); 6777 6778 printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", 6779 (unsigned long long)block_group->key.objectid, 6780 (unsigned long long)block_group->flags); 6781 6782 path = btrfs_alloc_path(); 6783 BUG_ON(!path); 6784 6785 reloc_inode = create_reloc_inode(info, block_group); 6786 BUG_ON(IS_ERR(reloc_inode)); 6787 6788 __alloc_chunk_for_shrink(root, block_group, 1); 6789 set_block_group_readonly(block_group); 6790 6791 btrfs_start_delalloc_inodes(info->tree_root); 6792 btrfs_wait_ordered_extents(info->tree_root, 0); 6793 again: 6794 skipped = 0; 6795 total_found = 0; 6796 progress = 0; 6797 key.objectid = block_group->key.objectid; 6798 key.offset = 0; 6799 key.type = 0; 6800 cur_byte = key.objectid; 6801 6802 trans = btrfs_start_transaction(info->tree_root, 1); 6803 btrfs_commit_transaction(trans, info->tree_root); 6804 6805 mutex_lock(&root->fs_info->cleaner_mutex); 6806 btrfs_clean_old_snapshots(info->tree_root); 6807 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 6808 mutex_unlock(&root->fs_info->cleaner_mutex); 6809 6810 trans = btrfs_start_transaction(info->tree_root, 1); 6811 btrfs_commit_transaction(trans, info->tree_root); 6812 6813 while (1) { 6814 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6815 if (ret < 0) 6816 goto out; 6817 next: 6818 leaf = path->nodes[0]; 6819 nritems = btrfs_header_nritems(leaf); 6820 if (path->slots[0] >= nritems) { 6821 ret = btrfs_next_leaf(root, path); 6822 if (ret < 0) 6823 goto out; 6824 if (ret == 1) { 6825 ret = 0; 6826 break; 6827 } 6828 leaf = path->nodes[0]; 6829 nritems = btrfs_header_nritems(leaf); 6830 } 6831 6832 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 6833 6834 if (key.objectid >= block_group->key.objectid + 6835 block_group->key.offset) 6836 break; 6837 6838 if (progress && need_resched()) { 6839 btrfs_release_path(root, path); 6840 cond_resched(); 6841 progress = 0; 6842 continue; 6843 } 6844 progress = 1; 6845 6846 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || 6847 key.objectid + key.offset <= cur_byte) { 6848 path->slots[0]++; 6849 goto next; 6850 } 6851 6852 total_found++; 6853 cur_byte = key.objectid + key.offset; 6854 btrfs_release_path(root, path); 6855 6856 __alloc_chunk_for_shrink(root, block_group, 0); 6857 ret = relocate_one_extent(root, path, &key, block_group, 6858 reloc_inode, pass); 6859 BUG_ON(ret < 0); 6860 if (ret > 0) 6861 skipped++; 6862 6863 key.objectid = cur_byte; 6864 key.type = 0; 6865 key.offset = 0; 6866 } 6867 6868 btrfs_release_path(root, path); 6869 6870 if (pass == 0) { 6871 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); 6872 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); 6873 } 6874 6875 if (total_found > 0) { 6876 printk(KERN_INFO "btrfs found %llu extents in pass %d\n", 6877 (unsigned long long)total_found, pass); 6878 pass++; 6879 if (total_found == skipped && pass > 2) { 6880 iput(reloc_inode); 6881 reloc_inode = create_reloc_inode(info, block_group); 6882 pass = 0; 6883 } 6884 goto again; 6885 } 6886 6887 /* delete reloc_inode */ 6888 iput(reloc_inode); 6889 6890 /* unpin extents in this range */ 6891 trans = btrfs_start_transaction(info->tree_root, 1); 6892 btrfs_commit_transaction(trans, info->tree_root); 6893 6894 spin_lock(&block_group->lock); 6895 WARN_ON(block_group->pinned > 0); 6896 WARN_ON(block_group->reserved > 0); 6897 WARN_ON(btrfs_block_group_used(&block_group->item) > 0); 6898 spin_unlock(&block_group->lock); 6899 btrfs_put_block_group(block_group); 6900 ret = 0; 6901 out: 6902 btrfs_free_path(path); 6903 return ret; 6904 } 6905 #endif 6906 6907 static int find_first_block_group(struct btrfs_root *root, 6908 struct btrfs_path *path, struct btrfs_key *key) 6909 { 6910 int ret = 0; 6911 struct btrfs_key found_key; 6912 struct extent_buffer *leaf; 6913 int slot; 6914 6915 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 6916 if (ret < 0) 6917 goto out; 6918 6919 while (1) { 6920 slot = path->slots[0]; 6921 leaf = path->nodes[0]; 6922 if (slot >= btrfs_header_nritems(leaf)) { 6923 ret = btrfs_next_leaf(root, path); 6924 if (ret == 0) 6925 continue; 6926 if (ret < 0) 6927 goto out; 6928 break; 6929 } 6930 btrfs_item_key_to_cpu(leaf, &found_key, slot); 6931 6932 if (found_key.objectid >= key->objectid && 6933 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 6934 ret = 0; 6935 goto out; 6936 } 6937 path->slots[0]++; 6938 } 6939 ret = -ENOENT; 6940 out: 6941 return ret; 6942 } 6943 6944 int btrfs_free_block_groups(struct btrfs_fs_info *info) 6945 { 6946 struct btrfs_block_group_cache *block_group; 6947 struct btrfs_space_info *space_info; 6948 struct rb_node *n; 6949 6950 spin_lock(&info->block_group_cache_lock); 6951 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 6952 block_group = rb_entry(n, struct btrfs_block_group_cache, 6953 cache_node); 6954 rb_erase(&block_group->cache_node, 6955 &info->block_group_cache_tree); 6956 spin_unlock(&info->block_group_cache_lock); 6957 6958 btrfs_remove_free_space_cache(block_group); 6959 down_write(&block_group->space_info->groups_sem); 6960 list_del(&block_group->list); 6961 up_write(&block_group->space_info->groups_sem); 6962 6963 WARN_ON(atomic_read(&block_group->count) != 1); 6964 kfree(block_group); 6965 6966 spin_lock(&info->block_group_cache_lock); 6967 } 6968 spin_unlock(&info->block_group_cache_lock); 6969 6970 /* now that all the block groups are freed, go through and 6971 * free all the space_info structs. This is only called during 6972 * the final stages of unmount, and so we know nobody is 6973 * using them. We call synchronize_rcu() once before we start, 6974 * just to be on the safe side. 6975 */ 6976 synchronize_rcu(); 6977 6978 while(!list_empty(&info->space_info)) { 6979 space_info = list_entry(info->space_info.next, 6980 struct btrfs_space_info, 6981 list); 6982 6983 list_del(&space_info->list); 6984 kfree(space_info); 6985 } 6986 return 0; 6987 } 6988 6989 int btrfs_read_block_groups(struct btrfs_root *root) 6990 { 6991 struct btrfs_path *path; 6992 int ret; 6993 struct btrfs_block_group_cache *cache; 6994 struct btrfs_fs_info *info = root->fs_info; 6995 struct btrfs_space_info *space_info; 6996 struct btrfs_key key; 6997 struct btrfs_key found_key; 6998 struct extent_buffer *leaf; 6999 7000 root = info->extent_root; 7001 key.objectid = 0; 7002 key.offset = 0; 7003 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 7004 path = btrfs_alloc_path(); 7005 if (!path) 7006 return -ENOMEM; 7007 7008 while (1) { 7009 ret = find_first_block_group(root, path, &key); 7010 if (ret > 0) { 7011 ret = 0; 7012 goto error; 7013 } 7014 if (ret != 0) 7015 goto error; 7016 7017 leaf = path->nodes[0]; 7018 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 7019 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7020 if (!cache) { 7021 ret = -ENOMEM; 7022 break; 7023 } 7024 7025 atomic_set(&cache->count, 1); 7026 spin_lock_init(&cache->lock); 7027 spin_lock_init(&cache->tree_lock); 7028 mutex_init(&cache->cache_mutex); 7029 INIT_LIST_HEAD(&cache->list); 7030 INIT_LIST_HEAD(&cache->cluster_list); 7031 read_extent_buffer(leaf, &cache->item, 7032 btrfs_item_ptr_offset(leaf, path->slots[0]), 7033 sizeof(cache->item)); 7034 memcpy(&cache->key, &found_key, sizeof(found_key)); 7035 7036 key.objectid = found_key.objectid + found_key.offset; 7037 btrfs_release_path(root, path); 7038 cache->flags = btrfs_block_group_flags(&cache->item); 7039 7040 ret = update_space_info(info, cache->flags, found_key.offset, 7041 btrfs_block_group_used(&cache->item), 7042 &space_info); 7043 BUG_ON(ret); 7044 cache->space_info = space_info; 7045 down_write(&space_info->groups_sem); 7046 list_add_tail(&cache->list, &space_info->block_groups); 7047 up_write(&space_info->groups_sem); 7048 7049 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7050 BUG_ON(ret); 7051 7052 set_avail_alloc_bits(root->fs_info, cache->flags); 7053 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7054 set_block_group_readonly(cache); 7055 } 7056 ret = 0; 7057 error: 7058 btrfs_free_path(path); 7059 return ret; 7060 } 7061 7062 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 7063 struct btrfs_root *root, u64 bytes_used, 7064 u64 type, u64 chunk_objectid, u64 chunk_offset, 7065 u64 size) 7066 { 7067 int ret; 7068 struct btrfs_root *extent_root; 7069 struct btrfs_block_group_cache *cache; 7070 7071 extent_root = root->fs_info->extent_root; 7072 7073 root->fs_info->last_trans_log_full_commit = trans->transid; 7074 7075 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7076 if (!cache) 7077 return -ENOMEM; 7078 7079 cache->key.objectid = chunk_offset; 7080 cache->key.offset = size; 7081 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7082 atomic_set(&cache->count, 1); 7083 spin_lock_init(&cache->lock); 7084 spin_lock_init(&cache->tree_lock); 7085 mutex_init(&cache->cache_mutex); 7086 INIT_LIST_HEAD(&cache->list); 7087 INIT_LIST_HEAD(&cache->cluster_list); 7088 7089 btrfs_set_block_group_used(&cache->item, bytes_used); 7090 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7091 cache->flags = type; 7092 btrfs_set_block_group_flags(&cache->item, type); 7093 7094 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7095 &cache->space_info); 7096 BUG_ON(ret); 7097 down_write(&cache->space_info->groups_sem); 7098 list_add_tail(&cache->list, &cache->space_info->block_groups); 7099 up_write(&cache->space_info->groups_sem); 7100 7101 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7102 BUG_ON(ret); 7103 7104 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7105 sizeof(cache->item)); 7106 BUG_ON(ret); 7107 7108 set_avail_alloc_bits(extent_root->fs_info, type); 7109 7110 return 0; 7111 } 7112 7113 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 7114 struct btrfs_root *root, u64 group_start) 7115 { 7116 struct btrfs_path *path; 7117 struct btrfs_block_group_cache *block_group; 7118 struct btrfs_free_cluster *cluster; 7119 struct btrfs_key key; 7120 int ret; 7121 7122 root = root->fs_info->extent_root; 7123 7124 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 7125 BUG_ON(!block_group); 7126 BUG_ON(!block_group->ro); 7127 7128 memcpy(&key, &block_group->key, sizeof(key)); 7129 7130 /* make sure this block group isn't part of an allocation cluster */ 7131 cluster = &root->fs_info->data_alloc_cluster; 7132 spin_lock(&cluster->refill_lock); 7133 btrfs_return_cluster_to_free_space(block_group, cluster); 7134 spin_unlock(&cluster->refill_lock); 7135 7136 /* 7137 * make sure this block group isn't part of a metadata 7138 * allocation cluster 7139 */ 7140 cluster = &root->fs_info->meta_alloc_cluster; 7141 spin_lock(&cluster->refill_lock); 7142 btrfs_return_cluster_to_free_space(block_group, cluster); 7143 spin_unlock(&cluster->refill_lock); 7144 7145 path = btrfs_alloc_path(); 7146 BUG_ON(!path); 7147 7148 spin_lock(&root->fs_info->block_group_cache_lock); 7149 rb_erase(&block_group->cache_node, 7150 &root->fs_info->block_group_cache_tree); 7151 spin_unlock(&root->fs_info->block_group_cache_lock); 7152 btrfs_remove_free_space_cache(block_group); 7153 down_write(&block_group->space_info->groups_sem); 7154 /* 7155 * we must use list_del_init so people can check to see if they 7156 * are still on the list after taking the semaphore 7157 */ 7158 list_del_init(&block_group->list); 7159 up_write(&block_group->space_info->groups_sem); 7160 7161 spin_lock(&block_group->space_info->lock); 7162 block_group->space_info->total_bytes -= block_group->key.offset; 7163 block_group->space_info->bytes_readonly -= block_group->key.offset; 7164 spin_unlock(&block_group->space_info->lock); 7165 block_group->space_info->full = 0; 7166 7167 btrfs_put_block_group(block_group); 7168 btrfs_put_block_group(block_group); 7169 7170 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 7171 if (ret > 0) 7172 ret = -EIO; 7173 if (ret < 0) 7174 goto out; 7175 7176 ret = btrfs_del_item(trans, root, path); 7177 out: 7178 btrfs_free_path(path); 7179 return ret; 7180 } 7181