1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include <linux/rcupdate.h> 24 #include "compat.h" 25 #include "hash.h" 26 #include "ctree.h" 27 #include "disk-io.h" 28 #include "print-tree.h" 29 #include "transaction.h" 30 #include "volumes.h" 31 #include "locking.h" 32 #include "free-space-cache.h" 33 34 static int update_reserved_extents(struct btrfs_root *root, 35 u64 bytenr, u64 num, int reserve); 36 static int update_block_group(struct btrfs_trans_handle *trans, 37 struct btrfs_root *root, 38 u64 bytenr, u64 num_bytes, int alloc, 39 int mark_free); 40 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 41 struct btrfs_root *root, 42 u64 bytenr, u64 num_bytes, u64 parent, 43 u64 root_objectid, u64 owner_objectid, 44 u64 owner_offset, int refs_to_drop, 45 struct btrfs_delayed_extent_op *extra_op); 46 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 47 struct extent_buffer *leaf, 48 struct btrfs_extent_item *ei); 49 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 50 struct btrfs_root *root, 51 u64 parent, u64 root_objectid, 52 u64 flags, u64 owner, u64 offset, 53 struct btrfs_key *ins, int ref_mod); 54 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 55 struct btrfs_root *root, 56 u64 parent, u64 root_objectid, 57 u64 flags, struct btrfs_disk_key *key, 58 int level, struct btrfs_key *ins); 59 60 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 61 struct btrfs_root *extent_root, u64 alloc_bytes, 62 u64 flags, int force); 63 64 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 65 { 66 return (cache->flags & bits) == bits; 67 } 68 69 /* 70 * this adds the block group to the fs_info rb tree for the block group 71 * cache 72 */ 73 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 74 struct btrfs_block_group_cache *block_group) 75 { 76 struct rb_node **p; 77 struct rb_node *parent = NULL; 78 struct btrfs_block_group_cache *cache; 79 80 spin_lock(&info->block_group_cache_lock); 81 p = &info->block_group_cache_tree.rb_node; 82 83 while (*p) { 84 parent = *p; 85 cache = rb_entry(parent, struct btrfs_block_group_cache, 86 cache_node); 87 if (block_group->key.objectid < cache->key.objectid) { 88 p = &(*p)->rb_left; 89 } else if (block_group->key.objectid > cache->key.objectid) { 90 p = &(*p)->rb_right; 91 } else { 92 spin_unlock(&info->block_group_cache_lock); 93 return -EEXIST; 94 } 95 } 96 97 rb_link_node(&block_group->cache_node, parent, p); 98 rb_insert_color(&block_group->cache_node, 99 &info->block_group_cache_tree); 100 spin_unlock(&info->block_group_cache_lock); 101 102 return 0; 103 } 104 105 /* 106 * This will return the block group at or after bytenr if contains is 0, else 107 * it will return the block group that contains the bytenr 108 */ 109 static struct btrfs_block_group_cache * 110 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 111 int contains) 112 { 113 struct btrfs_block_group_cache *cache, *ret = NULL; 114 struct rb_node *n; 115 u64 end, start; 116 117 spin_lock(&info->block_group_cache_lock); 118 n = info->block_group_cache_tree.rb_node; 119 120 while (n) { 121 cache = rb_entry(n, struct btrfs_block_group_cache, 122 cache_node); 123 end = cache->key.objectid + cache->key.offset - 1; 124 start = cache->key.objectid; 125 126 if (bytenr < start) { 127 if (!contains && (!ret || start < ret->key.objectid)) 128 ret = cache; 129 n = n->rb_left; 130 } else if (bytenr > start) { 131 if (contains && bytenr <= end) { 132 ret = cache; 133 break; 134 } 135 n = n->rb_right; 136 } else { 137 ret = cache; 138 break; 139 } 140 } 141 if (ret) 142 atomic_inc(&ret->count); 143 spin_unlock(&info->block_group_cache_lock); 144 145 return ret; 146 } 147 148 /* 149 * this is only called by cache_block_group, since we could have freed extents 150 * we need to check the pinned_extents for any extents that can't be used yet 151 * since their free space will be released as soon as the transaction commits. 152 */ 153 static int add_new_free_space(struct btrfs_block_group_cache *block_group, 154 struct btrfs_fs_info *info, u64 start, u64 end) 155 { 156 u64 extent_start, extent_end, size; 157 int ret; 158 159 while (start < end) { 160 ret = find_first_extent_bit(&info->pinned_extents, start, 161 &extent_start, &extent_end, 162 EXTENT_DIRTY); 163 if (ret) 164 break; 165 166 if (extent_start == start) { 167 start = extent_end + 1; 168 } else if (extent_start > start && extent_start < end) { 169 size = extent_start - start; 170 ret = btrfs_add_free_space(block_group, start, 171 size); 172 BUG_ON(ret); 173 start = extent_end + 1; 174 } else { 175 break; 176 } 177 } 178 179 if (start < end) { 180 size = end - start; 181 ret = btrfs_add_free_space(block_group, start, size); 182 BUG_ON(ret); 183 } 184 185 return 0; 186 } 187 188 static int remove_sb_from_cache(struct btrfs_root *root, 189 struct btrfs_block_group_cache *cache) 190 { 191 u64 bytenr; 192 u64 *logical; 193 int stripe_len; 194 int i, nr, ret; 195 196 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 197 bytenr = btrfs_sb_offset(i); 198 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 199 cache->key.objectid, bytenr, 0, 200 &logical, &nr, &stripe_len); 201 BUG_ON(ret); 202 while (nr--) { 203 btrfs_remove_free_space(cache, logical[nr], 204 stripe_len); 205 } 206 kfree(logical); 207 } 208 return 0; 209 } 210 211 static int cache_block_group(struct btrfs_root *root, 212 struct btrfs_block_group_cache *block_group) 213 { 214 struct btrfs_path *path; 215 int ret = 0; 216 struct btrfs_key key; 217 struct extent_buffer *leaf; 218 int slot; 219 u64 last; 220 221 if (!block_group) 222 return 0; 223 224 root = root->fs_info->extent_root; 225 226 if (block_group->cached) 227 return 0; 228 229 path = btrfs_alloc_path(); 230 if (!path) 231 return -ENOMEM; 232 233 path->reada = 2; 234 /* 235 * we get into deadlocks with paths held by callers of this function. 236 * since the alloc_mutex is protecting things right now, just 237 * skip the locking here 238 */ 239 path->skip_locking = 1; 240 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 241 key.objectid = last; 242 key.offset = 0; 243 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 244 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 245 if (ret < 0) 246 goto err; 247 248 while (1) { 249 leaf = path->nodes[0]; 250 slot = path->slots[0]; 251 if (slot >= btrfs_header_nritems(leaf)) { 252 ret = btrfs_next_leaf(root, path); 253 if (ret < 0) 254 goto err; 255 if (ret == 0) 256 continue; 257 else 258 break; 259 } 260 btrfs_item_key_to_cpu(leaf, &key, slot); 261 if (key.objectid < block_group->key.objectid) 262 goto next; 263 264 if (key.objectid >= block_group->key.objectid + 265 block_group->key.offset) 266 break; 267 268 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 269 add_new_free_space(block_group, root->fs_info, last, 270 key.objectid); 271 272 last = key.objectid + key.offset; 273 } 274 next: 275 path->slots[0]++; 276 } 277 278 add_new_free_space(block_group, root->fs_info, last, 279 block_group->key.objectid + 280 block_group->key.offset); 281 282 block_group->cached = 1; 283 remove_sb_from_cache(root, block_group); 284 ret = 0; 285 err: 286 btrfs_free_path(path); 287 return ret; 288 } 289 290 /* 291 * return the block group that starts at or after bytenr 292 */ 293 static struct btrfs_block_group_cache * 294 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 295 { 296 struct btrfs_block_group_cache *cache; 297 298 cache = block_group_cache_tree_search(info, bytenr, 0); 299 300 return cache; 301 } 302 303 /* 304 * return the block group that contains the given bytenr 305 */ 306 struct btrfs_block_group_cache *btrfs_lookup_block_group( 307 struct btrfs_fs_info *info, 308 u64 bytenr) 309 { 310 struct btrfs_block_group_cache *cache; 311 312 cache = block_group_cache_tree_search(info, bytenr, 1); 313 314 return cache; 315 } 316 317 void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 318 { 319 if (atomic_dec_and_test(&cache->count)) 320 kfree(cache); 321 } 322 323 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 324 u64 flags) 325 { 326 struct list_head *head = &info->space_info; 327 struct btrfs_space_info *found; 328 329 rcu_read_lock(); 330 list_for_each_entry_rcu(found, head, list) { 331 if (found->flags == flags) { 332 rcu_read_unlock(); 333 return found; 334 } 335 } 336 rcu_read_unlock(); 337 return NULL; 338 } 339 340 /* 341 * after adding space to the filesystem, we need to clear the full flags 342 * on all the space infos. 343 */ 344 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 345 { 346 struct list_head *head = &info->space_info; 347 struct btrfs_space_info *found; 348 349 rcu_read_lock(); 350 list_for_each_entry_rcu(found, head, list) 351 found->full = 0; 352 rcu_read_unlock(); 353 } 354 355 static u64 div_factor(u64 num, int factor) 356 { 357 if (factor == 10) 358 return num; 359 num *= factor; 360 do_div(num, 10); 361 return num; 362 } 363 364 u64 btrfs_find_block_group(struct btrfs_root *root, 365 u64 search_start, u64 search_hint, int owner) 366 { 367 struct btrfs_block_group_cache *cache; 368 u64 used; 369 u64 last = max(search_hint, search_start); 370 u64 group_start = 0; 371 int full_search = 0; 372 int factor = 9; 373 int wrapped = 0; 374 again: 375 while (1) { 376 cache = btrfs_lookup_first_block_group(root->fs_info, last); 377 if (!cache) 378 break; 379 380 spin_lock(&cache->lock); 381 last = cache->key.objectid + cache->key.offset; 382 used = btrfs_block_group_used(&cache->item); 383 384 if ((full_search || !cache->ro) && 385 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 386 if (used + cache->pinned + cache->reserved < 387 div_factor(cache->key.offset, factor)) { 388 group_start = cache->key.objectid; 389 spin_unlock(&cache->lock); 390 btrfs_put_block_group(cache); 391 goto found; 392 } 393 } 394 spin_unlock(&cache->lock); 395 btrfs_put_block_group(cache); 396 cond_resched(); 397 } 398 if (!wrapped) { 399 last = search_start; 400 wrapped = 1; 401 goto again; 402 } 403 if (!full_search && factor < 10) { 404 last = search_start; 405 full_search = 1; 406 factor = 10; 407 goto again; 408 } 409 found: 410 return group_start; 411 } 412 413 /* simple helper to search for an existing extent at a given offset */ 414 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 415 { 416 int ret; 417 struct btrfs_key key; 418 struct btrfs_path *path; 419 420 path = btrfs_alloc_path(); 421 BUG_ON(!path); 422 key.objectid = start; 423 key.offset = len; 424 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 425 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 426 0, 0); 427 btrfs_free_path(path); 428 return ret; 429 } 430 431 /* 432 * Back reference rules. Back refs have three main goals: 433 * 434 * 1) differentiate between all holders of references to an extent so that 435 * when a reference is dropped we can make sure it was a valid reference 436 * before freeing the extent. 437 * 438 * 2) Provide enough information to quickly find the holders of an extent 439 * if we notice a given block is corrupted or bad. 440 * 441 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 442 * maintenance. This is actually the same as #2, but with a slightly 443 * different use case. 444 * 445 * There are two kinds of back refs. The implicit back refs is optimized 446 * for pointers in non-shared tree blocks. For a given pointer in a block, 447 * back refs of this kind provide information about the block's owner tree 448 * and the pointer's key. These information allow us to find the block by 449 * b-tree searching. The full back refs is for pointers in tree blocks not 450 * referenced by their owner trees. The location of tree block is recorded 451 * in the back refs. Actually the full back refs is generic, and can be 452 * used in all cases the implicit back refs is used. The major shortcoming 453 * of the full back refs is its overhead. Every time a tree block gets 454 * COWed, we have to update back refs entry for all pointers in it. 455 * 456 * For a newly allocated tree block, we use implicit back refs for 457 * pointers in it. This means most tree related operations only involve 458 * implicit back refs. For a tree block created in old transaction, the 459 * only way to drop a reference to it is COW it. So we can detect the 460 * event that tree block loses its owner tree's reference and do the 461 * back refs conversion. 462 * 463 * When a tree block is COW'd through a tree, there are four cases: 464 * 465 * The reference count of the block is one and the tree is the block's 466 * owner tree. Nothing to do in this case. 467 * 468 * The reference count of the block is one and the tree is not the 469 * block's owner tree. In this case, full back refs is used for pointers 470 * in the block. Remove these full back refs, add implicit back refs for 471 * every pointers in the new block. 472 * 473 * The reference count of the block is greater than one and the tree is 474 * the block's owner tree. In this case, implicit back refs is used for 475 * pointers in the block. Add full back refs for every pointers in the 476 * block, increase lower level extents' reference counts. The original 477 * implicit back refs are entailed to the new block. 478 * 479 * The reference count of the block is greater than one and the tree is 480 * not the block's owner tree. Add implicit back refs for every pointer in 481 * the new block, increase lower level extents' reference count. 482 * 483 * Back Reference Key composing: 484 * 485 * The key objectid corresponds to the first byte in the extent, 486 * The key type is used to differentiate between types of back refs. 487 * There are different meanings of the key offset for different types 488 * of back refs. 489 * 490 * File extents can be referenced by: 491 * 492 * - multiple snapshots, subvolumes, or different generations in one subvol 493 * - different files inside a single subvolume 494 * - different offsets inside a file (bookend extents in file.c) 495 * 496 * The extent ref structure for the implicit back refs has fields for: 497 * 498 * - Objectid of the subvolume root 499 * - objectid of the file holding the reference 500 * - original offset in the file 501 * - how many bookend extents 502 * 503 * The key offset for the implicit back refs is hash of the first 504 * three fields. 505 * 506 * The extent ref structure for the full back refs has field for: 507 * 508 * - number of pointers in the tree leaf 509 * 510 * The key offset for the implicit back refs is the first byte of 511 * the tree leaf 512 * 513 * When a file extent is allocated, The implicit back refs is used. 514 * the fields are filled in: 515 * 516 * (root_key.objectid, inode objectid, offset in file, 1) 517 * 518 * When a file extent is removed file truncation, we find the 519 * corresponding implicit back refs and check the following fields: 520 * 521 * (btrfs_header_owner(leaf), inode objectid, offset in file) 522 * 523 * Btree extents can be referenced by: 524 * 525 * - Different subvolumes 526 * 527 * Both the implicit back refs and the full back refs for tree blocks 528 * only consist of key. The key offset for the implicit back refs is 529 * objectid of block's owner tree. The key offset for the full back refs 530 * is the first byte of parent block. 531 * 532 * When implicit back refs is used, information about the lowest key and 533 * level of the tree block are required. These information are stored in 534 * tree block info structure. 535 */ 536 537 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 538 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 539 struct btrfs_root *root, 540 struct btrfs_path *path, 541 u64 owner, u32 extra_size) 542 { 543 struct btrfs_extent_item *item; 544 struct btrfs_extent_item_v0 *ei0; 545 struct btrfs_extent_ref_v0 *ref0; 546 struct btrfs_tree_block_info *bi; 547 struct extent_buffer *leaf; 548 struct btrfs_key key; 549 struct btrfs_key found_key; 550 u32 new_size = sizeof(*item); 551 u64 refs; 552 int ret; 553 554 leaf = path->nodes[0]; 555 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 556 557 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 558 ei0 = btrfs_item_ptr(leaf, path->slots[0], 559 struct btrfs_extent_item_v0); 560 refs = btrfs_extent_refs_v0(leaf, ei0); 561 562 if (owner == (u64)-1) { 563 while (1) { 564 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 565 ret = btrfs_next_leaf(root, path); 566 if (ret < 0) 567 return ret; 568 BUG_ON(ret > 0); 569 leaf = path->nodes[0]; 570 } 571 btrfs_item_key_to_cpu(leaf, &found_key, 572 path->slots[0]); 573 BUG_ON(key.objectid != found_key.objectid); 574 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 575 path->slots[0]++; 576 continue; 577 } 578 ref0 = btrfs_item_ptr(leaf, path->slots[0], 579 struct btrfs_extent_ref_v0); 580 owner = btrfs_ref_objectid_v0(leaf, ref0); 581 break; 582 } 583 } 584 btrfs_release_path(root, path); 585 586 if (owner < BTRFS_FIRST_FREE_OBJECTID) 587 new_size += sizeof(*bi); 588 589 new_size -= sizeof(*ei0); 590 ret = btrfs_search_slot(trans, root, &key, path, 591 new_size + extra_size, 1); 592 if (ret < 0) 593 return ret; 594 BUG_ON(ret); 595 596 ret = btrfs_extend_item(trans, root, path, new_size); 597 BUG_ON(ret); 598 599 leaf = path->nodes[0]; 600 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 601 btrfs_set_extent_refs(leaf, item, refs); 602 /* FIXME: get real generation */ 603 btrfs_set_extent_generation(leaf, item, 0); 604 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 605 btrfs_set_extent_flags(leaf, item, 606 BTRFS_EXTENT_FLAG_TREE_BLOCK | 607 BTRFS_BLOCK_FLAG_FULL_BACKREF); 608 bi = (struct btrfs_tree_block_info *)(item + 1); 609 /* FIXME: get first key of the block */ 610 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); 611 btrfs_set_tree_block_level(leaf, bi, (int)owner); 612 } else { 613 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 614 } 615 btrfs_mark_buffer_dirty(leaf); 616 return 0; 617 } 618 #endif 619 620 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 621 { 622 u32 high_crc = ~(u32)0; 623 u32 low_crc = ~(u32)0; 624 __le64 lenum; 625 626 lenum = cpu_to_le64(root_objectid); 627 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 628 lenum = cpu_to_le64(owner); 629 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 630 lenum = cpu_to_le64(offset); 631 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 632 633 return ((u64)high_crc << 31) ^ (u64)low_crc; 634 } 635 636 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 637 struct btrfs_extent_data_ref *ref) 638 { 639 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 640 btrfs_extent_data_ref_objectid(leaf, ref), 641 btrfs_extent_data_ref_offset(leaf, ref)); 642 } 643 644 static int match_extent_data_ref(struct extent_buffer *leaf, 645 struct btrfs_extent_data_ref *ref, 646 u64 root_objectid, u64 owner, u64 offset) 647 { 648 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 649 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 650 btrfs_extent_data_ref_offset(leaf, ref) != offset) 651 return 0; 652 return 1; 653 } 654 655 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 656 struct btrfs_root *root, 657 struct btrfs_path *path, 658 u64 bytenr, u64 parent, 659 u64 root_objectid, 660 u64 owner, u64 offset) 661 { 662 struct btrfs_key key; 663 struct btrfs_extent_data_ref *ref; 664 struct extent_buffer *leaf; 665 u32 nritems; 666 int ret; 667 int recow; 668 int err = -ENOENT; 669 670 key.objectid = bytenr; 671 if (parent) { 672 key.type = BTRFS_SHARED_DATA_REF_KEY; 673 key.offset = parent; 674 } else { 675 key.type = BTRFS_EXTENT_DATA_REF_KEY; 676 key.offset = hash_extent_data_ref(root_objectid, 677 owner, offset); 678 } 679 again: 680 recow = 0; 681 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 682 if (ret < 0) { 683 err = ret; 684 goto fail; 685 } 686 687 if (parent) { 688 if (!ret) 689 return 0; 690 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 691 key.type = BTRFS_EXTENT_REF_V0_KEY; 692 btrfs_release_path(root, path); 693 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 694 if (ret < 0) { 695 err = ret; 696 goto fail; 697 } 698 if (!ret) 699 return 0; 700 #endif 701 goto fail; 702 } 703 704 leaf = path->nodes[0]; 705 nritems = btrfs_header_nritems(leaf); 706 while (1) { 707 if (path->slots[0] >= nritems) { 708 ret = btrfs_next_leaf(root, path); 709 if (ret < 0) 710 err = ret; 711 if (ret) 712 goto fail; 713 714 leaf = path->nodes[0]; 715 nritems = btrfs_header_nritems(leaf); 716 recow = 1; 717 } 718 719 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 720 if (key.objectid != bytenr || 721 key.type != BTRFS_EXTENT_DATA_REF_KEY) 722 goto fail; 723 724 ref = btrfs_item_ptr(leaf, path->slots[0], 725 struct btrfs_extent_data_ref); 726 727 if (match_extent_data_ref(leaf, ref, root_objectid, 728 owner, offset)) { 729 if (recow) { 730 btrfs_release_path(root, path); 731 goto again; 732 } 733 err = 0; 734 break; 735 } 736 path->slots[0]++; 737 } 738 fail: 739 return err; 740 } 741 742 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 743 struct btrfs_root *root, 744 struct btrfs_path *path, 745 u64 bytenr, u64 parent, 746 u64 root_objectid, u64 owner, 747 u64 offset, int refs_to_add) 748 { 749 struct btrfs_key key; 750 struct extent_buffer *leaf; 751 u32 size; 752 u32 num_refs; 753 int ret; 754 755 key.objectid = bytenr; 756 if (parent) { 757 key.type = BTRFS_SHARED_DATA_REF_KEY; 758 key.offset = parent; 759 size = sizeof(struct btrfs_shared_data_ref); 760 } else { 761 key.type = BTRFS_EXTENT_DATA_REF_KEY; 762 key.offset = hash_extent_data_ref(root_objectid, 763 owner, offset); 764 size = sizeof(struct btrfs_extent_data_ref); 765 } 766 767 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 768 if (ret && ret != -EEXIST) 769 goto fail; 770 771 leaf = path->nodes[0]; 772 if (parent) { 773 struct btrfs_shared_data_ref *ref; 774 ref = btrfs_item_ptr(leaf, path->slots[0], 775 struct btrfs_shared_data_ref); 776 if (ret == 0) { 777 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 778 } else { 779 num_refs = btrfs_shared_data_ref_count(leaf, ref); 780 num_refs += refs_to_add; 781 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 782 } 783 } else { 784 struct btrfs_extent_data_ref *ref; 785 while (ret == -EEXIST) { 786 ref = btrfs_item_ptr(leaf, path->slots[0], 787 struct btrfs_extent_data_ref); 788 if (match_extent_data_ref(leaf, ref, root_objectid, 789 owner, offset)) 790 break; 791 btrfs_release_path(root, path); 792 key.offset++; 793 ret = btrfs_insert_empty_item(trans, root, path, &key, 794 size); 795 if (ret && ret != -EEXIST) 796 goto fail; 797 798 leaf = path->nodes[0]; 799 } 800 ref = btrfs_item_ptr(leaf, path->slots[0], 801 struct btrfs_extent_data_ref); 802 if (ret == 0) { 803 btrfs_set_extent_data_ref_root(leaf, ref, 804 root_objectid); 805 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 806 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 807 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 808 } else { 809 num_refs = btrfs_extent_data_ref_count(leaf, ref); 810 num_refs += refs_to_add; 811 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 812 } 813 } 814 btrfs_mark_buffer_dirty(leaf); 815 ret = 0; 816 fail: 817 btrfs_release_path(root, path); 818 return ret; 819 } 820 821 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 822 struct btrfs_root *root, 823 struct btrfs_path *path, 824 int refs_to_drop) 825 { 826 struct btrfs_key key; 827 struct btrfs_extent_data_ref *ref1 = NULL; 828 struct btrfs_shared_data_ref *ref2 = NULL; 829 struct extent_buffer *leaf; 830 u32 num_refs = 0; 831 int ret = 0; 832 833 leaf = path->nodes[0]; 834 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 835 836 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 837 ref1 = btrfs_item_ptr(leaf, path->slots[0], 838 struct btrfs_extent_data_ref); 839 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 840 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 841 ref2 = btrfs_item_ptr(leaf, path->slots[0], 842 struct btrfs_shared_data_ref); 843 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 844 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 845 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 846 struct btrfs_extent_ref_v0 *ref0; 847 ref0 = btrfs_item_ptr(leaf, path->slots[0], 848 struct btrfs_extent_ref_v0); 849 num_refs = btrfs_ref_count_v0(leaf, ref0); 850 #endif 851 } else { 852 BUG(); 853 } 854 855 BUG_ON(num_refs < refs_to_drop); 856 num_refs -= refs_to_drop; 857 858 if (num_refs == 0) { 859 ret = btrfs_del_item(trans, root, path); 860 } else { 861 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 862 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 863 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 864 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 865 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 866 else { 867 struct btrfs_extent_ref_v0 *ref0; 868 ref0 = btrfs_item_ptr(leaf, path->slots[0], 869 struct btrfs_extent_ref_v0); 870 btrfs_set_ref_count_v0(leaf, ref0, num_refs); 871 } 872 #endif 873 btrfs_mark_buffer_dirty(leaf); 874 } 875 return ret; 876 } 877 878 static noinline u32 extent_data_ref_count(struct btrfs_root *root, 879 struct btrfs_path *path, 880 struct btrfs_extent_inline_ref *iref) 881 { 882 struct btrfs_key key; 883 struct extent_buffer *leaf; 884 struct btrfs_extent_data_ref *ref1; 885 struct btrfs_shared_data_ref *ref2; 886 u32 num_refs = 0; 887 888 leaf = path->nodes[0]; 889 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 890 if (iref) { 891 if (btrfs_extent_inline_ref_type(leaf, iref) == 892 BTRFS_EXTENT_DATA_REF_KEY) { 893 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 894 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 895 } else { 896 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 897 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 898 } 899 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 900 ref1 = btrfs_item_ptr(leaf, path->slots[0], 901 struct btrfs_extent_data_ref); 902 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 903 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 904 ref2 = btrfs_item_ptr(leaf, path->slots[0], 905 struct btrfs_shared_data_ref); 906 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 907 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 908 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 909 struct btrfs_extent_ref_v0 *ref0; 910 ref0 = btrfs_item_ptr(leaf, path->slots[0], 911 struct btrfs_extent_ref_v0); 912 num_refs = btrfs_ref_count_v0(leaf, ref0); 913 #endif 914 } else { 915 WARN_ON(1); 916 } 917 return num_refs; 918 } 919 920 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 921 struct btrfs_root *root, 922 struct btrfs_path *path, 923 u64 bytenr, u64 parent, 924 u64 root_objectid) 925 { 926 struct btrfs_key key; 927 int ret; 928 929 key.objectid = bytenr; 930 if (parent) { 931 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 932 key.offset = parent; 933 } else { 934 key.type = BTRFS_TREE_BLOCK_REF_KEY; 935 key.offset = root_objectid; 936 } 937 938 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 939 if (ret > 0) 940 ret = -ENOENT; 941 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 942 if (ret == -ENOENT && parent) { 943 btrfs_release_path(root, path); 944 key.type = BTRFS_EXTENT_REF_V0_KEY; 945 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 946 if (ret > 0) 947 ret = -ENOENT; 948 } 949 #endif 950 return ret; 951 } 952 953 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 954 struct btrfs_root *root, 955 struct btrfs_path *path, 956 u64 bytenr, u64 parent, 957 u64 root_objectid) 958 { 959 struct btrfs_key key; 960 int ret; 961 962 key.objectid = bytenr; 963 if (parent) { 964 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 965 key.offset = parent; 966 } else { 967 key.type = BTRFS_TREE_BLOCK_REF_KEY; 968 key.offset = root_objectid; 969 } 970 971 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 972 btrfs_release_path(root, path); 973 return ret; 974 } 975 976 static inline int extent_ref_type(u64 parent, u64 owner) 977 { 978 int type; 979 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 980 if (parent > 0) 981 type = BTRFS_SHARED_BLOCK_REF_KEY; 982 else 983 type = BTRFS_TREE_BLOCK_REF_KEY; 984 } else { 985 if (parent > 0) 986 type = BTRFS_SHARED_DATA_REF_KEY; 987 else 988 type = BTRFS_EXTENT_DATA_REF_KEY; 989 } 990 return type; 991 } 992 993 static int find_next_key(struct btrfs_path *path, struct btrfs_key *key) 994 995 { 996 int level; 997 BUG_ON(!path->keep_locks); 998 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 999 if (!path->nodes[level]) 1000 break; 1001 btrfs_assert_tree_locked(path->nodes[level]); 1002 if (path->slots[level] + 1 >= 1003 btrfs_header_nritems(path->nodes[level])) 1004 continue; 1005 if (level == 0) 1006 btrfs_item_key_to_cpu(path->nodes[level], key, 1007 path->slots[level] + 1); 1008 else 1009 btrfs_node_key_to_cpu(path->nodes[level], key, 1010 path->slots[level] + 1); 1011 return 0; 1012 } 1013 return 1; 1014 } 1015 1016 /* 1017 * look for inline back ref. if back ref is found, *ref_ret is set 1018 * to the address of inline back ref, and 0 is returned. 1019 * 1020 * if back ref isn't found, *ref_ret is set to the address where it 1021 * should be inserted, and -ENOENT is returned. 1022 * 1023 * if insert is true and there are too many inline back refs, the path 1024 * points to the extent item, and -EAGAIN is returned. 1025 * 1026 * NOTE: inline back refs are ordered in the same way that back ref 1027 * items in the tree are ordered. 1028 */ 1029 static noinline_for_stack 1030 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1031 struct btrfs_root *root, 1032 struct btrfs_path *path, 1033 struct btrfs_extent_inline_ref **ref_ret, 1034 u64 bytenr, u64 num_bytes, 1035 u64 parent, u64 root_objectid, 1036 u64 owner, u64 offset, int insert) 1037 { 1038 struct btrfs_key key; 1039 struct extent_buffer *leaf; 1040 struct btrfs_extent_item *ei; 1041 struct btrfs_extent_inline_ref *iref; 1042 u64 flags; 1043 u64 item_size; 1044 unsigned long ptr; 1045 unsigned long end; 1046 int extra_size; 1047 int type; 1048 int want; 1049 int ret; 1050 int err = 0; 1051 1052 key.objectid = bytenr; 1053 key.type = BTRFS_EXTENT_ITEM_KEY; 1054 key.offset = num_bytes; 1055 1056 want = extent_ref_type(parent, owner); 1057 if (insert) { 1058 extra_size = btrfs_extent_inline_ref_size(want); 1059 path->keep_locks = 1; 1060 } else 1061 extra_size = -1; 1062 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1063 if (ret < 0) { 1064 err = ret; 1065 goto out; 1066 } 1067 BUG_ON(ret); 1068 1069 leaf = path->nodes[0]; 1070 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1071 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1072 if (item_size < sizeof(*ei)) { 1073 if (!insert) { 1074 err = -ENOENT; 1075 goto out; 1076 } 1077 ret = convert_extent_item_v0(trans, root, path, owner, 1078 extra_size); 1079 if (ret < 0) { 1080 err = ret; 1081 goto out; 1082 } 1083 leaf = path->nodes[0]; 1084 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1085 } 1086 #endif 1087 BUG_ON(item_size < sizeof(*ei)); 1088 1089 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1090 flags = btrfs_extent_flags(leaf, ei); 1091 1092 ptr = (unsigned long)(ei + 1); 1093 end = (unsigned long)ei + item_size; 1094 1095 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1096 ptr += sizeof(struct btrfs_tree_block_info); 1097 BUG_ON(ptr > end); 1098 } else { 1099 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1100 } 1101 1102 err = -ENOENT; 1103 while (1) { 1104 if (ptr >= end) { 1105 WARN_ON(ptr > end); 1106 break; 1107 } 1108 iref = (struct btrfs_extent_inline_ref *)ptr; 1109 type = btrfs_extent_inline_ref_type(leaf, iref); 1110 if (want < type) 1111 break; 1112 if (want > type) { 1113 ptr += btrfs_extent_inline_ref_size(type); 1114 continue; 1115 } 1116 1117 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1118 struct btrfs_extent_data_ref *dref; 1119 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1120 if (match_extent_data_ref(leaf, dref, root_objectid, 1121 owner, offset)) { 1122 err = 0; 1123 break; 1124 } 1125 if (hash_extent_data_ref_item(leaf, dref) < 1126 hash_extent_data_ref(root_objectid, owner, offset)) 1127 break; 1128 } else { 1129 u64 ref_offset; 1130 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1131 if (parent > 0) { 1132 if (parent == ref_offset) { 1133 err = 0; 1134 break; 1135 } 1136 if (ref_offset < parent) 1137 break; 1138 } else { 1139 if (root_objectid == ref_offset) { 1140 err = 0; 1141 break; 1142 } 1143 if (ref_offset < root_objectid) 1144 break; 1145 } 1146 } 1147 ptr += btrfs_extent_inline_ref_size(type); 1148 } 1149 if (err == -ENOENT && insert) { 1150 if (item_size + extra_size >= 1151 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 1152 err = -EAGAIN; 1153 goto out; 1154 } 1155 /* 1156 * To add new inline back ref, we have to make sure 1157 * there is no corresponding back ref item. 1158 * For simplicity, we just do not add new inline back 1159 * ref if there is any kind of item for this block 1160 */ 1161 if (find_next_key(path, &key) == 0 && key.objectid == bytenr && 1162 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1163 err = -EAGAIN; 1164 goto out; 1165 } 1166 } 1167 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 1168 out: 1169 if (insert) { 1170 path->keep_locks = 0; 1171 btrfs_unlock_up_safe(path, 1); 1172 } 1173 return err; 1174 } 1175 1176 /* 1177 * helper to add new inline back ref 1178 */ 1179 static noinline_for_stack 1180 int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1181 struct btrfs_root *root, 1182 struct btrfs_path *path, 1183 struct btrfs_extent_inline_ref *iref, 1184 u64 parent, u64 root_objectid, 1185 u64 owner, u64 offset, int refs_to_add, 1186 struct btrfs_delayed_extent_op *extent_op) 1187 { 1188 struct extent_buffer *leaf; 1189 struct btrfs_extent_item *ei; 1190 unsigned long ptr; 1191 unsigned long end; 1192 unsigned long item_offset; 1193 u64 refs; 1194 int size; 1195 int type; 1196 int ret; 1197 1198 leaf = path->nodes[0]; 1199 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1200 item_offset = (unsigned long)iref - (unsigned long)ei; 1201 1202 type = extent_ref_type(parent, owner); 1203 size = btrfs_extent_inline_ref_size(type); 1204 1205 ret = btrfs_extend_item(trans, root, path, size); 1206 BUG_ON(ret); 1207 1208 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1209 refs = btrfs_extent_refs(leaf, ei); 1210 refs += refs_to_add; 1211 btrfs_set_extent_refs(leaf, ei, refs); 1212 if (extent_op) 1213 __run_delayed_extent_op(extent_op, leaf, ei); 1214 1215 ptr = (unsigned long)ei + item_offset; 1216 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1217 if (ptr < end - size) 1218 memmove_extent_buffer(leaf, ptr + size, ptr, 1219 end - size - ptr); 1220 1221 iref = (struct btrfs_extent_inline_ref *)ptr; 1222 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1223 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1224 struct btrfs_extent_data_ref *dref; 1225 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1226 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1227 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1228 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1229 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1230 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1231 struct btrfs_shared_data_ref *sref; 1232 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1233 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1234 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1235 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1236 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1237 } else { 1238 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1239 } 1240 btrfs_mark_buffer_dirty(leaf); 1241 return 0; 1242 } 1243 1244 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1245 struct btrfs_root *root, 1246 struct btrfs_path *path, 1247 struct btrfs_extent_inline_ref **ref_ret, 1248 u64 bytenr, u64 num_bytes, u64 parent, 1249 u64 root_objectid, u64 owner, u64 offset) 1250 { 1251 int ret; 1252 1253 ret = lookup_inline_extent_backref(trans, root, path, ref_ret, 1254 bytenr, num_bytes, parent, 1255 root_objectid, owner, offset, 0); 1256 if (ret != -ENOENT) 1257 return ret; 1258 1259 btrfs_release_path(root, path); 1260 *ref_ret = NULL; 1261 1262 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1263 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, 1264 root_objectid); 1265 } else { 1266 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, 1267 root_objectid, owner, offset); 1268 } 1269 return ret; 1270 } 1271 1272 /* 1273 * helper to update/remove inline back ref 1274 */ 1275 static noinline_for_stack 1276 int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1277 struct btrfs_root *root, 1278 struct btrfs_path *path, 1279 struct btrfs_extent_inline_ref *iref, 1280 int refs_to_mod, 1281 struct btrfs_delayed_extent_op *extent_op) 1282 { 1283 struct extent_buffer *leaf; 1284 struct btrfs_extent_item *ei; 1285 struct btrfs_extent_data_ref *dref = NULL; 1286 struct btrfs_shared_data_ref *sref = NULL; 1287 unsigned long ptr; 1288 unsigned long end; 1289 u32 item_size; 1290 int size; 1291 int type; 1292 int ret; 1293 u64 refs; 1294 1295 leaf = path->nodes[0]; 1296 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1297 refs = btrfs_extent_refs(leaf, ei); 1298 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1299 refs += refs_to_mod; 1300 btrfs_set_extent_refs(leaf, ei, refs); 1301 if (extent_op) 1302 __run_delayed_extent_op(extent_op, leaf, ei); 1303 1304 type = btrfs_extent_inline_ref_type(leaf, iref); 1305 1306 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1307 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1308 refs = btrfs_extent_data_ref_count(leaf, dref); 1309 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1310 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1311 refs = btrfs_shared_data_ref_count(leaf, sref); 1312 } else { 1313 refs = 1; 1314 BUG_ON(refs_to_mod != -1); 1315 } 1316 1317 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1318 refs += refs_to_mod; 1319 1320 if (refs > 0) { 1321 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1322 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1323 else 1324 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1325 } else { 1326 size = btrfs_extent_inline_ref_size(type); 1327 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1328 ptr = (unsigned long)iref; 1329 end = (unsigned long)ei + item_size; 1330 if (ptr + size < end) 1331 memmove_extent_buffer(leaf, ptr, ptr + size, 1332 end - ptr - size); 1333 item_size -= size; 1334 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1335 BUG_ON(ret); 1336 } 1337 btrfs_mark_buffer_dirty(leaf); 1338 return 0; 1339 } 1340 1341 static noinline_for_stack 1342 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1343 struct btrfs_root *root, 1344 struct btrfs_path *path, 1345 u64 bytenr, u64 num_bytes, u64 parent, 1346 u64 root_objectid, u64 owner, 1347 u64 offset, int refs_to_add, 1348 struct btrfs_delayed_extent_op *extent_op) 1349 { 1350 struct btrfs_extent_inline_ref *iref; 1351 int ret; 1352 1353 ret = lookup_inline_extent_backref(trans, root, path, &iref, 1354 bytenr, num_bytes, parent, 1355 root_objectid, owner, offset, 1); 1356 if (ret == 0) { 1357 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1358 ret = update_inline_extent_backref(trans, root, path, iref, 1359 refs_to_add, extent_op); 1360 } else if (ret == -ENOENT) { 1361 ret = setup_inline_extent_backref(trans, root, path, iref, 1362 parent, root_objectid, 1363 owner, offset, refs_to_add, 1364 extent_op); 1365 } 1366 return ret; 1367 } 1368 1369 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1370 struct btrfs_root *root, 1371 struct btrfs_path *path, 1372 u64 bytenr, u64 parent, u64 root_objectid, 1373 u64 owner, u64 offset, int refs_to_add) 1374 { 1375 int ret; 1376 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1377 BUG_ON(refs_to_add != 1); 1378 ret = insert_tree_block_ref(trans, root, path, bytenr, 1379 parent, root_objectid); 1380 } else { 1381 ret = insert_extent_data_ref(trans, root, path, bytenr, 1382 parent, root_objectid, 1383 owner, offset, refs_to_add); 1384 } 1385 return ret; 1386 } 1387 1388 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1389 struct btrfs_root *root, 1390 struct btrfs_path *path, 1391 struct btrfs_extent_inline_ref *iref, 1392 int refs_to_drop, int is_data) 1393 { 1394 int ret; 1395 1396 BUG_ON(!is_data && refs_to_drop != 1); 1397 if (iref) { 1398 ret = update_inline_extent_backref(trans, root, path, iref, 1399 -refs_to_drop, NULL); 1400 } else if (is_data) { 1401 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1402 } else { 1403 ret = btrfs_del_item(trans, root, path); 1404 } 1405 return ret; 1406 } 1407 1408 #ifdef BIO_RW_DISCARD 1409 static void btrfs_issue_discard(struct block_device *bdev, 1410 u64 start, u64 len) 1411 { 1412 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); 1413 } 1414 #endif 1415 1416 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1417 u64 num_bytes) 1418 { 1419 #ifdef BIO_RW_DISCARD 1420 int ret; 1421 u64 map_length = num_bytes; 1422 struct btrfs_multi_bio *multi = NULL; 1423 1424 /* Tell the block device(s) that the sectors can be discarded */ 1425 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 1426 bytenr, &map_length, &multi, 0); 1427 if (!ret) { 1428 struct btrfs_bio_stripe *stripe = multi->stripes; 1429 int i; 1430 1431 if (map_length > num_bytes) 1432 map_length = num_bytes; 1433 1434 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1435 btrfs_issue_discard(stripe->dev->bdev, 1436 stripe->physical, 1437 map_length); 1438 } 1439 kfree(multi); 1440 } 1441 1442 return ret; 1443 #else 1444 return 0; 1445 #endif 1446 } 1447 1448 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1449 struct btrfs_root *root, 1450 u64 bytenr, u64 num_bytes, u64 parent, 1451 u64 root_objectid, u64 owner, u64 offset) 1452 { 1453 int ret; 1454 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 1455 root_objectid == BTRFS_TREE_LOG_OBJECTID); 1456 1457 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1458 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 1459 parent, root_objectid, (int)owner, 1460 BTRFS_ADD_DELAYED_REF, NULL); 1461 } else { 1462 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 1463 parent, root_objectid, owner, offset, 1464 BTRFS_ADD_DELAYED_REF, NULL); 1465 } 1466 return ret; 1467 } 1468 1469 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1470 struct btrfs_root *root, 1471 u64 bytenr, u64 num_bytes, 1472 u64 parent, u64 root_objectid, 1473 u64 owner, u64 offset, int refs_to_add, 1474 struct btrfs_delayed_extent_op *extent_op) 1475 { 1476 struct btrfs_path *path; 1477 struct extent_buffer *leaf; 1478 struct btrfs_extent_item *item; 1479 u64 refs; 1480 int ret; 1481 int err = 0; 1482 1483 path = btrfs_alloc_path(); 1484 if (!path) 1485 return -ENOMEM; 1486 1487 path->reada = 1; 1488 path->leave_spinning = 1; 1489 /* this will setup the path even if it fails to insert the back ref */ 1490 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 1491 path, bytenr, num_bytes, parent, 1492 root_objectid, owner, offset, 1493 refs_to_add, extent_op); 1494 if (ret == 0) 1495 goto out; 1496 1497 if (ret != -EAGAIN) { 1498 err = ret; 1499 goto out; 1500 } 1501 1502 leaf = path->nodes[0]; 1503 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1504 refs = btrfs_extent_refs(leaf, item); 1505 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1506 if (extent_op) 1507 __run_delayed_extent_op(extent_op, leaf, item); 1508 1509 btrfs_mark_buffer_dirty(leaf); 1510 btrfs_release_path(root->fs_info->extent_root, path); 1511 1512 path->reada = 1; 1513 path->leave_spinning = 1; 1514 1515 /* now insert the actual backref */ 1516 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1517 path, bytenr, parent, root_objectid, 1518 owner, offset, refs_to_add); 1519 BUG_ON(ret); 1520 out: 1521 btrfs_free_path(path); 1522 return err; 1523 } 1524 1525 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1526 struct btrfs_root *root, 1527 struct btrfs_delayed_ref_node *node, 1528 struct btrfs_delayed_extent_op *extent_op, 1529 int insert_reserved) 1530 { 1531 int ret = 0; 1532 struct btrfs_delayed_data_ref *ref; 1533 struct btrfs_key ins; 1534 u64 parent = 0; 1535 u64 ref_root = 0; 1536 u64 flags = 0; 1537 1538 ins.objectid = node->bytenr; 1539 ins.offset = node->num_bytes; 1540 ins.type = BTRFS_EXTENT_ITEM_KEY; 1541 1542 ref = btrfs_delayed_node_to_data_ref(node); 1543 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1544 parent = ref->parent; 1545 else 1546 ref_root = ref->root; 1547 1548 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1549 if (extent_op) { 1550 BUG_ON(extent_op->update_key); 1551 flags |= extent_op->flags_to_set; 1552 } 1553 ret = alloc_reserved_file_extent(trans, root, 1554 parent, ref_root, flags, 1555 ref->objectid, ref->offset, 1556 &ins, node->ref_mod); 1557 update_reserved_extents(root, ins.objectid, ins.offset, 0); 1558 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1559 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1560 node->num_bytes, parent, 1561 ref_root, ref->objectid, 1562 ref->offset, node->ref_mod, 1563 extent_op); 1564 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1565 ret = __btrfs_free_extent(trans, root, node->bytenr, 1566 node->num_bytes, parent, 1567 ref_root, ref->objectid, 1568 ref->offset, node->ref_mod, 1569 extent_op); 1570 } else { 1571 BUG(); 1572 } 1573 return ret; 1574 } 1575 1576 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1577 struct extent_buffer *leaf, 1578 struct btrfs_extent_item *ei) 1579 { 1580 u64 flags = btrfs_extent_flags(leaf, ei); 1581 if (extent_op->update_flags) { 1582 flags |= extent_op->flags_to_set; 1583 btrfs_set_extent_flags(leaf, ei, flags); 1584 } 1585 1586 if (extent_op->update_key) { 1587 struct btrfs_tree_block_info *bi; 1588 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1589 bi = (struct btrfs_tree_block_info *)(ei + 1); 1590 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1591 } 1592 } 1593 1594 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1595 struct btrfs_root *root, 1596 struct btrfs_delayed_ref_node *node, 1597 struct btrfs_delayed_extent_op *extent_op) 1598 { 1599 struct btrfs_key key; 1600 struct btrfs_path *path; 1601 struct btrfs_extent_item *ei; 1602 struct extent_buffer *leaf; 1603 u32 item_size; 1604 int ret; 1605 int err = 0; 1606 1607 path = btrfs_alloc_path(); 1608 if (!path) 1609 return -ENOMEM; 1610 1611 key.objectid = node->bytenr; 1612 key.type = BTRFS_EXTENT_ITEM_KEY; 1613 key.offset = node->num_bytes; 1614 1615 path->reada = 1; 1616 path->leave_spinning = 1; 1617 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1618 path, 0, 1); 1619 if (ret < 0) { 1620 err = ret; 1621 goto out; 1622 } 1623 if (ret > 0) { 1624 err = -EIO; 1625 goto out; 1626 } 1627 1628 leaf = path->nodes[0]; 1629 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1630 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1631 if (item_size < sizeof(*ei)) { 1632 ret = convert_extent_item_v0(trans, root->fs_info->extent_root, 1633 path, (u64)-1, 0); 1634 if (ret < 0) { 1635 err = ret; 1636 goto out; 1637 } 1638 leaf = path->nodes[0]; 1639 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1640 } 1641 #endif 1642 BUG_ON(item_size < sizeof(*ei)); 1643 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1644 __run_delayed_extent_op(extent_op, leaf, ei); 1645 1646 btrfs_mark_buffer_dirty(leaf); 1647 out: 1648 btrfs_free_path(path); 1649 return err; 1650 } 1651 1652 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 1653 struct btrfs_root *root, 1654 struct btrfs_delayed_ref_node *node, 1655 struct btrfs_delayed_extent_op *extent_op, 1656 int insert_reserved) 1657 { 1658 int ret = 0; 1659 struct btrfs_delayed_tree_ref *ref; 1660 struct btrfs_key ins; 1661 u64 parent = 0; 1662 u64 ref_root = 0; 1663 1664 ins.objectid = node->bytenr; 1665 ins.offset = node->num_bytes; 1666 ins.type = BTRFS_EXTENT_ITEM_KEY; 1667 1668 ref = btrfs_delayed_node_to_tree_ref(node); 1669 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1670 parent = ref->parent; 1671 else 1672 ref_root = ref->root; 1673 1674 BUG_ON(node->ref_mod != 1); 1675 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1676 BUG_ON(!extent_op || !extent_op->update_flags || 1677 !extent_op->update_key); 1678 ret = alloc_reserved_tree_block(trans, root, 1679 parent, ref_root, 1680 extent_op->flags_to_set, 1681 &extent_op->key, 1682 ref->level, &ins); 1683 update_reserved_extents(root, ins.objectid, ins.offset, 0); 1684 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1685 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1686 node->num_bytes, parent, ref_root, 1687 ref->level, 0, 1, extent_op); 1688 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1689 ret = __btrfs_free_extent(trans, root, node->bytenr, 1690 node->num_bytes, parent, ref_root, 1691 ref->level, 0, 1, extent_op); 1692 } else { 1693 BUG(); 1694 } 1695 return ret; 1696 } 1697 1698 1699 /* helper function to actually process a single delayed ref entry */ 1700 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 1701 struct btrfs_root *root, 1702 struct btrfs_delayed_ref_node *node, 1703 struct btrfs_delayed_extent_op *extent_op, 1704 int insert_reserved) 1705 { 1706 int ret; 1707 if (btrfs_delayed_ref_is_head(node)) { 1708 struct btrfs_delayed_ref_head *head; 1709 /* 1710 * we've hit the end of the chain and we were supposed 1711 * to insert this extent into the tree. But, it got 1712 * deleted before we ever needed to insert it, so all 1713 * we have to do is clean up the accounting 1714 */ 1715 BUG_ON(extent_op); 1716 head = btrfs_delayed_node_to_head(node); 1717 if (insert_reserved) { 1718 if (head->is_data) { 1719 ret = btrfs_del_csums(trans, root, 1720 node->bytenr, 1721 node->num_bytes); 1722 BUG_ON(ret); 1723 } 1724 btrfs_update_pinned_extents(root, node->bytenr, 1725 node->num_bytes, 1); 1726 update_reserved_extents(root, node->bytenr, 1727 node->num_bytes, 0); 1728 } 1729 mutex_unlock(&head->mutex); 1730 return 0; 1731 } 1732 1733 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 1734 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 1735 ret = run_delayed_tree_ref(trans, root, node, extent_op, 1736 insert_reserved); 1737 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 1738 node->type == BTRFS_SHARED_DATA_REF_KEY) 1739 ret = run_delayed_data_ref(trans, root, node, extent_op, 1740 insert_reserved); 1741 else 1742 BUG(); 1743 return ret; 1744 } 1745 1746 static noinline struct btrfs_delayed_ref_node * 1747 select_delayed_ref(struct btrfs_delayed_ref_head *head) 1748 { 1749 struct rb_node *node; 1750 struct btrfs_delayed_ref_node *ref; 1751 int action = BTRFS_ADD_DELAYED_REF; 1752 again: 1753 /* 1754 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 1755 * this prevents ref count from going down to zero when 1756 * there still are pending delayed ref. 1757 */ 1758 node = rb_prev(&head->node.rb_node); 1759 while (1) { 1760 if (!node) 1761 break; 1762 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1763 rb_node); 1764 if (ref->bytenr != head->node.bytenr) 1765 break; 1766 if (ref->action == action) 1767 return ref; 1768 node = rb_prev(node); 1769 } 1770 if (action == BTRFS_ADD_DELAYED_REF) { 1771 action = BTRFS_DROP_DELAYED_REF; 1772 goto again; 1773 } 1774 return NULL; 1775 } 1776 1777 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 1778 struct btrfs_root *root, 1779 struct list_head *cluster) 1780 { 1781 struct btrfs_delayed_ref_root *delayed_refs; 1782 struct btrfs_delayed_ref_node *ref; 1783 struct btrfs_delayed_ref_head *locked_ref = NULL; 1784 struct btrfs_delayed_extent_op *extent_op; 1785 int ret; 1786 int count = 0; 1787 int must_insert_reserved = 0; 1788 1789 delayed_refs = &trans->transaction->delayed_refs; 1790 while (1) { 1791 if (!locked_ref) { 1792 /* pick a new head ref from the cluster list */ 1793 if (list_empty(cluster)) 1794 break; 1795 1796 locked_ref = list_entry(cluster->next, 1797 struct btrfs_delayed_ref_head, cluster); 1798 1799 /* grab the lock that says we are going to process 1800 * all the refs for this head */ 1801 ret = btrfs_delayed_ref_lock(trans, locked_ref); 1802 1803 /* 1804 * we may have dropped the spin lock to get the head 1805 * mutex lock, and that might have given someone else 1806 * time to free the head. If that's true, it has been 1807 * removed from our list and we can move on. 1808 */ 1809 if (ret == -EAGAIN) { 1810 locked_ref = NULL; 1811 count++; 1812 continue; 1813 } 1814 } 1815 1816 /* 1817 * record the must insert reserved flag before we 1818 * drop the spin lock. 1819 */ 1820 must_insert_reserved = locked_ref->must_insert_reserved; 1821 locked_ref->must_insert_reserved = 0; 1822 1823 extent_op = locked_ref->extent_op; 1824 locked_ref->extent_op = NULL; 1825 1826 /* 1827 * locked_ref is the head node, so we have to go one 1828 * node back for any delayed ref updates 1829 */ 1830 ref = select_delayed_ref(locked_ref); 1831 if (!ref) { 1832 /* All delayed refs have been processed, Go ahead 1833 * and send the head node to run_one_delayed_ref, 1834 * so that any accounting fixes can happen 1835 */ 1836 ref = &locked_ref->node; 1837 1838 if (extent_op && must_insert_reserved) { 1839 kfree(extent_op); 1840 extent_op = NULL; 1841 } 1842 1843 if (extent_op) { 1844 spin_unlock(&delayed_refs->lock); 1845 1846 ret = run_delayed_extent_op(trans, root, 1847 ref, extent_op); 1848 BUG_ON(ret); 1849 kfree(extent_op); 1850 1851 cond_resched(); 1852 spin_lock(&delayed_refs->lock); 1853 continue; 1854 } 1855 1856 list_del_init(&locked_ref->cluster); 1857 locked_ref = NULL; 1858 } 1859 1860 ref->in_tree = 0; 1861 rb_erase(&ref->rb_node, &delayed_refs->root); 1862 delayed_refs->num_entries--; 1863 1864 spin_unlock(&delayed_refs->lock); 1865 1866 ret = run_one_delayed_ref(trans, root, ref, extent_op, 1867 must_insert_reserved); 1868 BUG_ON(ret); 1869 1870 btrfs_put_delayed_ref(ref); 1871 kfree(extent_op); 1872 count++; 1873 1874 cond_resched(); 1875 spin_lock(&delayed_refs->lock); 1876 } 1877 return count; 1878 } 1879 1880 /* 1881 * this starts processing the delayed reference count updates and 1882 * extent insertions we have queued up so far. count can be 1883 * 0, which means to process everything in the tree at the start 1884 * of the run (but not newly added entries), or it can be some target 1885 * number you'd like to process. 1886 */ 1887 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 1888 struct btrfs_root *root, unsigned long count) 1889 { 1890 struct rb_node *node; 1891 struct btrfs_delayed_ref_root *delayed_refs; 1892 struct btrfs_delayed_ref_node *ref; 1893 struct list_head cluster; 1894 int ret; 1895 int run_all = count == (unsigned long)-1; 1896 int run_most = 0; 1897 1898 if (root == root->fs_info->extent_root) 1899 root = root->fs_info->tree_root; 1900 1901 delayed_refs = &trans->transaction->delayed_refs; 1902 INIT_LIST_HEAD(&cluster); 1903 again: 1904 spin_lock(&delayed_refs->lock); 1905 if (count == 0) { 1906 count = delayed_refs->num_entries * 2; 1907 run_most = 1; 1908 } 1909 while (1) { 1910 if (!(run_all || run_most) && 1911 delayed_refs->num_heads_ready < 64) 1912 break; 1913 1914 /* 1915 * go find something we can process in the rbtree. We start at 1916 * the beginning of the tree, and then build a cluster 1917 * of refs to process starting at the first one we are able to 1918 * lock 1919 */ 1920 ret = btrfs_find_ref_cluster(trans, &cluster, 1921 delayed_refs->run_delayed_start); 1922 if (ret) 1923 break; 1924 1925 ret = run_clustered_refs(trans, root, &cluster); 1926 BUG_ON(ret < 0); 1927 1928 count -= min_t(unsigned long, ret, count); 1929 1930 if (count == 0) 1931 break; 1932 } 1933 1934 if (run_all) { 1935 node = rb_first(&delayed_refs->root); 1936 if (!node) 1937 goto out; 1938 count = (unsigned long)-1; 1939 1940 while (node) { 1941 ref = rb_entry(node, struct btrfs_delayed_ref_node, 1942 rb_node); 1943 if (btrfs_delayed_ref_is_head(ref)) { 1944 struct btrfs_delayed_ref_head *head; 1945 1946 head = btrfs_delayed_node_to_head(ref); 1947 atomic_inc(&ref->refs); 1948 1949 spin_unlock(&delayed_refs->lock); 1950 mutex_lock(&head->mutex); 1951 mutex_unlock(&head->mutex); 1952 1953 btrfs_put_delayed_ref(ref); 1954 cond_resched(); 1955 goto again; 1956 } 1957 node = rb_next(node); 1958 } 1959 spin_unlock(&delayed_refs->lock); 1960 schedule_timeout(1); 1961 goto again; 1962 } 1963 out: 1964 spin_unlock(&delayed_refs->lock); 1965 return 0; 1966 } 1967 1968 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 1969 struct btrfs_root *root, 1970 u64 bytenr, u64 num_bytes, u64 flags, 1971 int is_data) 1972 { 1973 struct btrfs_delayed_extent_op *extent_op; 1974 int ret; 1975 1976 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 1977 if (!extent_op) 1978 return -ENOMEM; 1979 1980 extent_op->flags_to_set = flags; 1981 extent_op->update_flags = 1; 1982 extent_op->update_key = 0; 1983 extent_op->is_data = is_data ? 1 : 0; 1984 1985 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); 1986 if (ret) 1987 kfree(extent_op); 1988 return ret; 1989 } 1990 1991 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, 1992 struct btrfs_root *root, 1993 struct btrfs_path *path, 1994 u64 objectid, u64 offset, u64 bytenr) 1995 { 1996 struct btrfs_delayed_ref_head *head; 1997 struct btrfs_delayed_ref_node *ref; 1998 struct btrfs_delayed_data_ref *data_ref; 1999 struct btrfs_delayed_ref_root *delayed_refs; 2000 struct rb_node *node; 2001 int ret = 0; 2002 2003 ret = -ENOENT; 2004 delayed_refs = &trans->transaction->delayed_refs; 2005 spin_lock(&delayed_refs->lock); 2006 head = btrfs_find_delayed_ref_head(trans, bytenr); 2007 if (!head) 2008 goto out; 2009 2010 if (!mutex_trylock(&head->mutex)) { 2011 atomic_inc(&head->node.refs); 2012 spin_unlock(&delayed_refs->lock); 2013 2014 btrfs_release_path(root->fs_info->extent_root, path); 2015 2016 mutex_lock(&head->mutex); 2017 mutex_unlock(&head->mutex); 2018 btrfs_put_delayed_ref(&head->node); 2019 return -EAGAIN; 2020 } 2021 2022 node = rb_prev(&head->node.rb_node); 2023 if (!node) 2024 goto out_unlock; 2025 2026 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2027 2028 if (ref->bytenr != bytenr) 2029 goto out_unlock; 2030 2031 ret = 1; 2032 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2033 goto out_unlock; 2034 2035 data_ref = btrfs_delayed_node_to_data_ref(ref); 2036 2037 node = rb_prev(node); 2038 if (node) { 2039 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2040 if (ref->bytenr == bytenr) 2041 goto out_unlock; 2042 } 2043 2044 if (data_ref->root != root->root_key.objectid || 2045 data_ref->objectid != objectid || data_ref->offset != offset) 2046 goto out_unlock; 2047 2048 ret = 0; 2049 out_unlock: 2050 mutex_unlock(&head->mutex); 2051 out: 2052 spin_unlock(&delayed_refs->lock); 2053 return ret; 2054 } 2055 2056 static noinline int check_committed_ref(struct btrfs_trans_handle *trans, 2057 struct btrfs_root *root, 2058 struct btrfs_path *path, 2059 u64 objectid, u64 offset, u64 bytenr) 2060 { 2061 struct btrfs_root *extent_root = root->fs_info->extent_root; 2062 struct extent_buffer *leaf; 2063 struct btrfs_extent_data_ref *ref; 2064 struct btrfs_extent_inline_ref *iref; 2065 struct btrfs_extent_item *ei; 2066 struct btrfs_key key; 2067 u32 item_size; 2068 int ret; 2069 2070 key.objectid = bytenr; 2071 key.offset = (u64)-1; 2072 key.type = BTRFS_EXTENT_ITEM_KEY; 2073 2074 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2075 if (ret < 0) 2076 goto out; 2077 BUG_ON(ret == 0); 2078 2079 ret = -ENOENT; 2080 if (path->slots[0] == 0) 2081 goto out; 2082 2083 path->slots[0]--; 2084 leaf = path->nodes[0]; 2085 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2086 2087 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2088 goto out; 2089 2090 ret = 1; 2091 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2092 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2093 if (item_size < sizeof(*ei)) { 2094 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2095 goto out; 2096 } 2097 #endif 2098 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2099 2100 if (item_size != sizeof(*ei) + 2101 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2102 goto out; 2103 2104 if (btrfs_extent_generation(leaf, ei) <= 2105 btrfs_root_last_snapshot(&root->root_item)) 2106 goto out; 2107 2108 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2109 if (btrfs_extent_inline_ref_type(leaf, iref) != 2110 BTRFS_EXTENT_DATA_REF_KEY) 2111 goto out; 2112 2113 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2114 if (btrfs_extent_refs(leaf, ei) != 2115 btrfs_extent_data_ref_count(leaf, ref) || 2116 btrfs_extent_data_ref_root(leaf, ref) != 2117 root->root_key.objectid || 2118 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2119 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2120 goto out; 2121 2122 ret = 0; 2123 out: 2124 return ret; 2125 } 2126 2127 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2128 struct btrfs_root *root, 2129 u64 objectid, u64 offset, u64 bytenr) 2130 { 2131 struct btrfs_path *path; 2132 int ret; 2133 int ret2; 2134 2135 path = btrfs_alloc_path(); 2136 if (!path) 2137 return -ENOENT; 2138 2139 do { 2140 ret = check_committed_ref(trans, root, path, objectid, 2141 offset, bytenr); 2142 if (ret && ret != -ENOENT) 2143 goto out; 2144 2145 ret2 = check_delayed_ref(trans, root, path, objectid, 2146 offset, bytenr); 2147 } while (ret2 == -EAGAIN); 2148 2149 if (ret2 && ret2 != -ENOENT) { 2150 ret = ret2; 2151 goto out; 2152 } 2153 2154 if (ret != -ENOENT || ret2 != -ENOENT) 2155 ret = 0; 2156 out: 2157 btrfs_free_path(path); 2158 return ret; 2159 } 2160 2161 #if 0 2162 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2163 struct extent_buffer *buf, u32 nr_extents) 2164 { 2165 struct btrfs_key key; 2166 struct btrfs_file_extent_item *fi; 2167 u64 root_gen; 2168 u32 nritems; 2169 int i; 2170 int level; 2171 int ret = 0; 2172 int shared = 0; 2173 2174 if (!root->ref_cows) 2175 return 0; 2176 2177 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 2178 shared = 0; 2179 root_gen = root->root_key.offset; 2180 } else { 2181 shared = 1; 2182 root_gen = trans->transid - 1; 2183 } 2184 2185 level = btrfs_header_level(buf); 2186 nritems = btrfs_header_nritems(buf); 2187 2188 if (level == 0) { 2189 struct btrfs_leaf_ref *ref; 2190 struct btrfs_extent_info *info; 2191 2192 ref = btrfs_alloc_leaf_ref(root, nr_extents); 2193 if (!ref) { 2194 ret = -ENOMEM; 2195 goto out; 2196 } 2197 2198 ref->root_gen = root_gen; 2199 ref->bytenr = buf->start; 2200 ref->owner = btrfs_header_owner(buf); 2201 ref->generation = btrfs_header_generation(buf); 2202 ref->nritems = nr_extents; 2203 info = ref->extents; 2204 2205 for (i = 0; nr_extents > 0 && i < nritems; i++) { 2206 u64 disk_bytenr; 2207 btrfs_item_key_to_cpu(buf, &key, i); 2208 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2209 continue; 2210 fi = btrfs_item_ptr(buf, i, 2211 struct btrfs_file_extent_item); 2212 if (btrfs_file_extent_type(buf, fi) == 2213 BTRFS_FILE_EXTENT_INLINE) 2214 continue; 2215 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2216 if (disk_bytenr == 0) 2217 continue; 2218 2219 info->bytenr = disk_bytenr; 2220 info->num_bytes = 2221 btrfs_file_extent_disk_num_bytes(buf, fi); 2222 info->objectid = key.objectid; 2223 info->offset = key.offset; 2224 info++; 2225 } 2226 2227 ret = btrfs_add_leaf_ref(root, ref, shared); 2228 if (ret == -EEXIST && shared) { 2229 struct btrfs_leaf_ref *old; 2230 old = btrfs_lookup_leaf_ref(root, ref->bytenr); 2231 BUG_ON(!old); 2232 btrfs_remove_leaf_ref(root, old); 2233 btrfs_free_leaf_ref(root, old); 2234 ret = btrfs_add_leaf_ref(root, ref, shared); 2235 } 2236 WARN_ON(ret); 2237 btrfs_free_leaf_ref(root, ref); 2238 } 2239 out: 2240 return ret; 2241 } 2242 2243 /* when a block goes through cow, we update the reference counts of 2244 * everything that block points to. The internal pointers of the block 2245 * can be in just about any order, and it is likely to have clusters of 2246 * things that are close together and clusters of things that are not. 2247 * 2248 * To help reduce the seeks that come with updating all of these reference 2249 * counts, sort them by byte number before actual updates are done. 2250 * 2251 * struct refsort is used to match byte number to slot in the btree block. 2252 * we sort based on the byte number and then use the slot to actually 2253 * find the item. 2254 * 2255 * struct refsort is smaller than strcut btrfs_item and smaller than 2256 * struct btrfs_key_ptr. Since we're currently limited to the page size 2257 * for a btree block, there's no way for a kmalloc of refsorts for a 2258 * single node to be bigger than a page. 2259 */ 2260 struct refsort { 2261 u64 bytenr; 2262 u32 slot; 2263 }; 2264 2265 /* 2266 * for passing into sort() 2267 */ 2268 static int refsort_cmp(const void *a_void, const void *b_void) 2269 { 2270 const struct refsort *a = a_void; 2271 const struct refsort *b = b_void; 2272 2273 if (a->bytenr < b->bytenr) 2274 return -1; 2275 if (a->bytenr > b->bytenr) 2276 return 1; 2277 return 0; 2278 } 2279 #endif 2280 2281 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2282 struct btrfs_root *root, 2283 struct extent_buffer *buf, 2284 int full_backref, int inc) 2285 { 2286 u64 bytenr; 2287 u64 num_bytes; 2288 u64 parent; 2289 u64 ref_root; 2290 u32 nritems; 2291 struct btrfs_key key; 2292 struct btrfs_file_extent_item *fi; 2293 int i; 2294 int level; 2295 int ret = 0; 2296 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2297 u64, u64, u64, u64, u64, u64); 2298 2299 ref_root = btrfs_header_owner(buf); 2300 nritems = btrfs_header_nritems(buf); 2301 level = btrfs_header_level(buf); 2302 2303 if (!root->ref_cows && level == 0) 2304 return 0; 2305 2306 if (inc) 2307 process_func = btrfs_inc_extent_ref; 2308 else 2309 process_func = btrfs_free_extent; 2310 2311 if (full_backref) 2312 parent = buf->start; 2313 else 2314 parent = 0; 2315 2316 for (i = 0; i < nritems; i++) { 2317 if (level == 0) { 2318 btrfs_item_key_to_cpu(buf, &key, i); 2319 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2320 continue; 2321 fi = btrfs_item_ptr(buf, i, 2322 struct btrfs_file_extent_item); 2323 if (btrfs_file_extent_type(buf, fi) == 2324 BTRFS_FILE_EXTENT_INLINE) 2325 continue; 2326 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2327 if (bytenr == 0) 2328 continue; 2329 2330 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2331 key.offset -= btrfs_file_extent_offset(buf, fi); 2332 ret = process_func(trans, root, bytenr, num_bytes, 2333 parent, ref_root, key.objectid, 2334 key.offset); 2335 if (ret) 2336 goto fail; 2337 } else { 2338 bytenr = btrfs_node_blockptr(buf, i); 2339 num_bytes = btrfs_level_size(root, level - 1); 2340 ret = process_func(trans, root, bytenr, num_bytes, 2341 parent, ref_root, level - 1, 0); 2342 if (ret) 2343 goto fail; 2344 } 2345 } 2346 return 0; 2347 fail: 2348 BUG(); 2349 return ret; 2350 } 2351 2352 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2353 struct extent_buffer *buf, int full_backref) 2354 { 2355 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2356 } 2357 2358 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2359 struct extent_buffer *buf, int full_backref) 2360 { 2361 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2362 } 2363 2364 static int write_one_cache_group(struct btrfs_trans_handle *trans, 2365 struct btrfs_root *root, 2366 struct btrfs_path *path, 2367 struct btrfs_block_group_cache *cache) 2368 { 2369 int ret; 2370 struct btrfs_root *extent_root = root->fs_info->extent_root; 2371 unsigned long bi; 2372 struct extent_buffer *leaf; 2373 2374 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2375 if (ret < 0) 2376 goto fail; 2377 BUG_ON(ret); 2378 2379 leaf = path->nodes[0]; 2380 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2381 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2382 btrfs_mark_buffer_dirty(leaf); 2383 btrfs_release_path(extent_root, path); 2384 fail: 2385 if (ret) 2386 return ret; 2387 return 0; 2388 2389 } 2390 2391 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2392 struct btrfs_root *root) 2393 { 2394 struct btrfs_block_group_cache *cache, *entry; 2395 struct rb_node *n; 2396 int err = 0; 2397 int werr = 0; 2398 struct btrfs_path *path; 2399 u64 last = 0; 2400 2401 path = btrfs_alloc_path(); 2402 if (!path) 2403 return -ENOMEM; 2404 2405 while (1) { 2406 cache = NULL; 2407 spin_lock(&root->fs_info->block_group_cache_lock); 2408 for (n = rb_first(&root->fs_info->block_group_cache_tree); 2409 n; n = rb_next(n)) { 2410 entry = rb_entry(n, struct btrfs_block_group_cache, 2411 cache_node); 2412 if (entry->dirty) { 2413 cache = entry; 2414 break; 2415 } 2416 } 2417 spin_unlock(&root->fs_info->block_group_cache_lock); 2418 2419 if (!cache) 2420 break; 2421 2422 cache->dirty = 0; 2423 last += cache->key.offset; 2424 2425 err = write_one_cache_group(trans, root, 2426 path, cache); 2427 /* 2428 * if we fail to write the cache group, we want 2429 * to keep it marked dirty in hopes that a later 2430 * write will work 2431 */ 2432 if (err) { 2433 werr = err; 2434 continue; 2435 } 2436 } 2437 btrfs_free_path(path); 2438 return werr; 2439 } 2440 2441 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2442 { 2443 struct btrfs_block_group_cache *block_group; 2444 int readonly = 0; 2445 2446 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 2447 if (!block_group || block_group->ro) 2448 readonly = 1; 2449 if (block_group) 2450 btrfs_put_block_group(block_group); 2451 return readonly; 2452 } 2453 2454 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 2455 u64 total_bytes, u64 bytes_used, 2456 struct btrfs_space_info **space_info) 2457 { 2458 struct btrfs_space_info *found; 2459 2460 found = __find_space_info(info, flags); 2461 if (found) { 2462 spin_lock(&found->lock); 2463 found->total_bytes += total_bytes; 2464 found->bytes_used += bytes_used; 2465 found->full = 0; 2466 spin_unlock(&found->lock); 2467 *space_info = found; 2468 return 0; 2469 } 2470 found = kzalloc(sizeof(*found), GFP_NOFS); 2471 if (!found) 2472 return -ENOMEM; 2473 2474 INIT_LIST_HEAD(&found->block_groups); 2475 init_rwsem(&found->groups_sem); 2476 spin_lock_init(&found->lock); 2477 found->flags = flags; 2478 found->total_bytes = total_bytes; 2479 found->bytes_used = bytes_used; 2480 found->bytes_pinned = 0; 2481 found->bytes_reserved = 0; 2482 found->bytes_readonly = 0; 2483 found->bytes_delalloc = 0; 2484 found->full = 0; 2485 found->force_alloc = 0; 2486 *space_info = found; 2487 list_add_rcu(&found->list, &info->space_info); 2488 return 0; 2489 } 2490 2491 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 2492 { 2493 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 2494 BTRFS_BLOCK_GROUP_RAID1 | 2495 BTRFS_BLOCK_GROUP_RAID10 | 2496 BTRFS_BLOCK_GROUP_DUP); 2497 if (extra_flags) { 2498 if (flags & BTRFS_BLOCK_GROUP_DATA) 2499 fs_info->avail_data_alloc_bits |= extra_flags; 2500 if (flags & BTRFS_BLOCK_GROUP_METADATA) 2501 fs_info->avail_metadata_alloc_bits |= extra_flags; 2502 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2503 fs_info->avail_system_alloc_bits |= extra_flags; 2504 } 2505 } 2506 2507 static void set_block_group_readonly(struct btrfs_block_group_cache *cache) 2508 { 2509 spin_lock(&cache->space_info->lock); 2510 spin_lock(&cache->lock); 2511 if (!cache->ro) { 2512 cache->space_info->bytes_readonly += cache->key.offset - 2513 btrfs_block_group_used(&cache->item); 2514 cache->ro = 1; 2515 } 2516 spin_unlock(&cache->lock); 2517 spin_unlock(&cache->space_info->lock); 2518 } 2519 2520 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2521 { 2522 u64 num_devices = root->fs_info->fs_devices->rw_devices; 2523 2524 if (num_devices == 1) 2525 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2526 if (num_devices < 4) 2527 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 2528 2529 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 2530 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2531 BTRFS_BLOCK_GROUP_RAID10))) { 2532 flags &= ~BTRFS_BLOCK_GROUP_DUP; 2533 } 2534 2535 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 2536 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 2537 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 2538 } 2539 2540 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 2541 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 2542 (flags & BTRFS_BLOCK_GROUP_RAID10) | 2543 (flags & BTRFS_BLOCK_GROUP_DUP))) 2544 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 2545 return flags; 2546 } 2547 2548 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) 2549 { 2550 struct btrfs_fs_info *info = root->fs_info; 2551 u64 alloc_profile; 2552 2553 if (data) { 2554 alloc_profile = info->avail_data_alloc_bits & 2555 info->data_alloc_profile; 2556 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 2557 } else if (root == root->fs_info->chunk_root) { 2558 alloc_profile = info->avail_system_alloc_bits & 2559 info->system_alloc_profile; 2560 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; 2561 } else { 2562 alloc_profile = info->avail_metadata_alloc_bits & 2563 info->metadata_alloc_profile; 2564 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 2565 } 2566 2567 return btrfs_reduce_alloc_profile(root, data); 2568 } 2569 2570 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 2571 { 2572 u64 alloc_target; 2573 2574 alloc_target = btrfs_get_alloc_profile(root, 1); 2575 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 2576 alloc_target); 2577 } 2578 2579 /* 2580 * for now this just makes sure we have at least 5% of our metadata space free 2581 * for use. 2582 */ 2583 int btrfs_check_metadata_free_space(struct btrfs_root *root) 2584 { 2585 struct btrfs_fs_info *info = root->fs_info; 2586 struct btrfs_space_info *meta_sinfo; 2587 u64 alloc_target, thresh; 2588 int committed = 0, ret; 2589 2590 /* get the space info for where the metadata will live */ 2591 alloc_target = btrfs_get_alloc_profile(root, 0); 2592 meta_sinfo = __find_space_info(info, alloc_target); 2593 2594 again: 2595 spin_lock(&meta_sinfo->lock); 2596 if (!meta_sinfo->full) 2597 thresh = meta_sinfo->total_bytes * 80; 2598 else 2599 thresh = meta_sinfo->total_bytes * 95; 2600 2601 do_div(thresh, 100); 2602 2603 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2604 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { 2605 struct btrfs_trans_handle *trans; 2606 if (!meta_sinfo->full) { 2607 meta_sinfo->force_alloc = 1; 2608 spin_unlock(&meta_sinfo->lock); 2609 2610 trans = btrfs_start_transaction(root, 1); 2611 if (!trans) 2612 return -ENOMEM; 2613 2614 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2615 2 * 1024 * 1024, alloc_target, 0); 2616 btrfs_end_transaction(trans, root); 2617 goto again; 2618 } 2619 spin_unlock(&meta_sinfo->lock); 2620 2621 if (!committed) { 2622 committed = 1; 2623 trans = btrfs_join_transaction(root, 1); 2624 if (!trans) 2625 return -ENOMEM; 2626 ret = btrfs_commit_transaction(trans, root); 2627 if (ret) 2628 return ret; 2629 goto again; 2630 } 2631 return -ENOSPC; 2632 } 2633 spin_unlock(&meta_sinfo->lock); 2634 2635 return 0; 2636 } 2637 2638 /* 2639 * This will check the space that the inode allocates from to make sure we have 2640 * enough space for bytes. 2641 */ 2642 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 2643 u64 bytes) 2644 { 2645 struct btrfs_space_info *data_sinfo; 2646 int ret = 0, committed = 0; 2647 2648 /* make sure bytes are sectorsize aligned */ 2649 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2650 2651 data_sinfo = BTRFS_I(inode)->space_info; 2652 again: 2653 /* make sure we have enough space to handle the data first */ 2654 spin_lock(&data_sinfo->lock); 2655 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 2656 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 2657 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 2658 data_sinfo->bytes_may_use < bytes) { 2659 struct btrfs_trans_handle *trans; 2660 2661 /* 2662 * if we don't have enough free bytes in this space then we need 2663 * to alloc a new chunk. 2664 */ 2665 if (!data_sinfo->full) { 2666 u64 alloc_target; 2667 2668 data_sinfo->force_alloc = 1; 2669 spin_unlock(&data_sinfo->lock); 2670 2671 alloc_target = btrfs_get_alloc_profile(root, 1); 2672 trans = btrfs_start_transaction(root, 1); 2673 if (!trans) 2674 return -ENOMEM; 2675 2676 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2677 bytes + 2 * 1024 * 1024, 2678 alloc_target, 0); 2679 btrfs_end_transaction(trans, root); 2680 if (ret) 2681 return ret; 2682 goto again; 2683 } 2684 spin_unlock(&data_sinfo->lock); 2685 2686 /* commit the current transaction and try again */ 2687 if (!committed) { 2688 committed = 1; 2689 trans = btrfs_join_transaction(root, 1); 2690 if (!trans) 2691 return -ENOMEM; 2692 ret = btrfs_commit_transaction(trans, root); 2693 if (ret) 2694 return ret; 2695 goto again; 2696 } 2697 2698 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 2699 ", %llu bytes_used, %llu bytes_reserved, " 2700 "%llu bytes_pinned, %llu bytes_readonly, %llu may use" 2701 "%llu total\n", (unsigned long long)bytes, 2702 (unsigned long long)data_sinfo->bytes_delalloc, 2703 (unsigned long long)data_sinfo->bytes_used, 2704 (unsigned long long)data_sinfo->bytes_reserved, 2705 (unsigned long long)data_sinfo->bytes_pinned, 2706 (unsigned long long)data_sinfo->bytes_readonly, 2707 (unsigned long long)data_sinfo->bytes_may_use, 2708 (unsigned long long)data_sinfo->total_bytes); 2709 return -ENOSPC; 2710 } 2711 data_sinfo->bytes_may_use += bytes; 2712 BTRFS_I(inode)->reserved_bytes += bytes; 2713 spin_unlock(&data_sinfo->lock); 2714 2715 return btrfs_check_metadata_free_space(root); 2716 } 2717 2718 /* 2719 * if there was an error for whatever reason after calling 2720 * btrfs_check_data_free_space, call this so we can cleanup the counters. 2721 */ 2722 void btrfs_free_reserved_data_space(struct btrfs_root *root, 2723 struct inode *inode, u64 bytes) 2724 { 2725 struct btrfs_space_info *data_sinfo; 2726 2727 /* make sure bytes are sectorsize aligned */ 2728 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2729 2730 data_sinfo = BTRFS_I(inode)->space_info; 2731 spin_lock(&data_sinfo->lock); 2732 data_sinfo->bytes_may_use -= bytes; 2733 BTRFS_I(inode)->reserved_bytes -= bytes; 2734 spin_unlock(&data_sinfo->lock); 2735 } 2736 2737 /* called when we are adding a delalloc extent to the inode's io_tree */ 2738 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, 2739 u64 bytes) 2740 { 2741 struct btrfs_space_info *data_sinfo; 2742 2743 /* get the space info for where this inode will be storing its data */ 2744 data_sinfo = BTRFS_I(inode)->space_info; 2745 2746 /* make sure we have enough space to handle the data first */ 2747 spin_lock(&data_sinfo->lock); 2748 data_sinfo->bytes_delalloc += bytes; 2749 2750 /* 2751 * we are adding a delalloc extent without calling 2752 * btrfs_check_data_free_space first. This happens on a weird 2753 * writepage condition, but shouldn't hurt our accounting 2754 */ 2755 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { 2756 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; 2757 BTRFS_I(inode)->reserved_bytes = 0; 2758 } else { 2759 data_sinfo->bytes_may_use -= bytes; 2760 BTRFS_I(inode)->reserved_bytes -= bytes; 2761 } 2762 2763 spin_unlock(&data_sinfo->lock); 2764 } 2765 2766 /* called when we are clearing an delalloc extent from the inode's io_tree */ 2767 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 2768 u64 bytes) 2769 { 2770 struct btrfs_space_info *info; 2771 2772 info = BTRFS_I(inode)->space_info; 2773 2774 spin_lock(&info->lock); 2775 info->bytes_delalloc -= bytes; 2776 spin_unlock(&info->lock); 2777 } 2778 2779 static void force_metadata_allocation(struct btrfs_fs_info *info) 2780 { 2781 struct list_head *head = &info->space_info; 2782 struct btrfs_space_info *found; 2783 2784 rcu_read_lock(); 2785 list_for_each_entry_rcu(found, head, list) { 2786 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 2787 found->force_alloc = 1; 2788 } 2789 rcu_read_unlock(); 2790 } 2791 2792 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 2793 struct btrfs_root *extent_root, u64 alloc_bytes, 2794 u64 flags, int force) 2795 { 2796 struct btrfs_space_info *space_info; 2797 struct btrfs_fs_info *fs_info = extent_root->fs_info; 2798 u64 thresh; 2799 int ret = 0; 2800 2801 mutex_lock(&fs_info->chunk_mutex); 2802 2803 flags = btrfs_reduce_alloc_profile(extent_root, flags); 2804 2805 space_info = __find_space_info(extent_root->fs_info, flags); 2806 if (!space_info) { 2807 ret = update_space_info(extent_root->fs_info, flags, 2808 0, 0, &space_info); 2809 BUG_ON(ret); 2810 } 2811 BUG_ON(!space_info); 2812 2813 spin_lock(&space_info->lock); 2814 if (space_info->force_alloc) { 2815 force = 1; 2816 space_info->force_alloc = 0; 2817 } 2818 if (space_info->full) { 2819 spin_unlock(&space_info->lock); 2820 goto out; 2821 } 2822 2823 thresh = space_info->total_bytes - space_info->bytes_readonly; 2824 thresh = div_factor(thresh, 6); 2825 if (!force && 2826 (space_info->bytes_used + space_info->bytes_pinned + 2827 space_info->bytes_reserved + alloc_bytes) < thresh) { 2828 spin_unlock(&space_info->lock); 2829 goto out; 2830 } 2831 spin_unlock(&space_info->lock); 2832 2833 /* 2834 * if we're doing a data chunk, go ahead and make sure that 2835 * we keep a reasonable number of metadata chunks allocated in the 2836 * FS as well. 2837 */ 2838 if (flags & BTRFS_BLOCK_GROUP_DATA) { 2839 fs_info->data_chunk_allocations++; 2840 if (!(fs_info->data_chunk_allocations % 2841 fs_info->metadata_ratio)) 2842 force_metadata_allocation(fs_info); 2843 } 2844 2845 ret = btrfs_alloc_chunk(trans, extent_root, flags); 2846 if (ret) 2847 space_info->full = 1; 2848 out: 2849 mutex_unlock(&extent_root->fs_info->chunk_mutex); 2850 return ret; 2851 } 2852 2853 static int update_block_group(struct btrfs_trans_handle *trans, 2854 struct btrfs_root *root, 2855 u64 bytenr, u64 num_bytes, int alloc, 2856 int mark_free) 2857 { 2858 struct btrfs_block_group_cache *cache; 2859 struct btrfs_fs_info *info = root->fs_info; 2860 u64 total = num_bytes; 2861 u64 old_val; 2862 u64 byte_in_group; 2863 2864 /* block accounting for super block */ 2865 spin_lock(&info->delalloc_lock); 2866 old_val = btrfs_super_bytes_used(&info->super_copy); 2867 if (alloc) 2868 old_val += num_bytes; 2869 else 2870 old_val -= num_bytes; 2871 btrfs_set_super_bytes_used(&info->super_copy, old_val); 2872 2873 /* block accounting for root item */ 2874 old_val = btrfs_root_used(&root->root_item); 2875 if (alloc) 2876 old_val += num_bytes; 2877 else 2878 old_val -= num_bytes; 2879 btrfs_set_root_used(&root->root_item, old_val); 2880 spin_unlock(&info->delalloc_lock); 2881 2882 while (total) { 2883 cache = btrfs_lookup_block_group(info, bytenr); 2884 if (!cache) 2885 return -1; 2886 byte_in_group = bytenr - cache->key.objectid; 2887 WARN_ON(byte_in_group > cache->key.offset); 2888 2889 spin_lock(&cache->space_info->lock); 2890 spin_lock(&cache->lock); 2891 cache->dirty = 1; 2892 old_val = btrfs_block_group_used(&cache->item); 2893 num_bytes = min(total, cache->key.offset - byte_in_group); 2894 if (alloc) { 2895 old_val += num_bytes; 2896 cache->space_info->bytes_used += num_bytes; 2897 if (cache->ro) 2898 cache->space_info->bytes_readonly -= num_bytes; 2899 btrfs_set_block_group_used(&cache->item, old_val); 2900 spin_unlock(&cache->lock); 2901 spin_unlock(&cache->space_info->lock); 2902 } else { 2903 old_val -= num_bytes; 2904 cache->space_info->bytes_used -= num_bytes; 2905 if (cache->ro) 2906 cache->space_info->bytes_readonly += num_bytes; 2907 btrfs_set_block_group_used(&cache->item, old_val); 2908 spin_unlock(&cache->lock); 2909 spin_unlock(&cache->space_info->lock); 2910 if (mark_free) { 2911 int ret; 2912 2913 ret = btrfs_discard_extent(root, bytenr, 2914 num_bytes); 2915 WARN_ON(ret); 2916 2917 ret = btrfs_add_free_space(cache, bytenr, 2918 num_bytes); 2919 WARN_ON(ret); 2920 } 2921 } 2922 btrfs_put_block_group(cache); 2923 total -= num_bytes; 2924 bytenr += num_bytes; 2925 } 2926 return 0; 2927 } 2928 2929 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 2930 { 2931 struct btrfs_block_group_cache *cache; 2932 u64 bytenr; 2933 2934 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 2935 if (!cache) 2936 return 0; 2937 2938 bytenr = cache->key.objectid; 2939 btrfs_put_block_group(cache); 2940 2941 return bytenr; 2942 } 2943 2944 int btrfs_update_pinned_extents(struct btrfs_root *root, 2945 u64 bytenr, u64 num, int pin) 2946 { 2947 u64 len; 2948 struct btrfs_block_group_cache *cache; 2949 struct btrfs_fs_info *fs_info = root->fs_info; 2950 2951 if (pin) { 2952 set_extent_dirty(&fs_info->pinned_extents, 2953 bytenr, bytenr + num - 1, GFP_NOFS); 2954 } else { 2955 clear_extent_dirty(&fs_info->pinned_extents, 2956 bytenr, bytenr + num - 1, GFP_NOFS); 2957 } 2958 2959 while (num > 0) { 2960 cache = btrfs_lookup_block_group(fs_info, bytenr); 2961 BUG_ON(!cache); 2962 len = min(num, cache->key.offset - 2963 (bytenr - cache->key.objectid)); 2964 if (pin) { 2965 spin_lock(&cache->space_info->lock); 2966 spin_lock(&cache->lock); 2967 cache->pinned += len; 2968 cache->space_info->bytes_pinned += len; 2969 spin_unlock(&cache->lock); 2970 spin_unlock(&cache->space_info->lock); 2971 fs_info->total_pinned += len; 2972 } else { 2973 spin_lock(&cache->space_info->lock); 2974 spin_lock(&cache->lock); 2975 cache->pinned -= len; 2976 cache->space_info->bytes_pinned -= len; 2977 spin_unlock(&cache->lock); 2978 spin_unlock(&cache->space_info->lock); 2979 fs_info->total_pinned -= len; 2980 if (cache->cached) 2981 btrfs_add_free_space(cache, bytenr, len); 2982 } 2983 btrfs_put_block_group(cache); 2984 bytenr += len; 2985 num -= len; 2986 } 2987 return 0; 2988 } 2989 2990 static int update_reserved_extents(struct btrfs_root *root, 2991 u64 bytenr, u64 num, int reserve) 2992 { 2993 u64 len; 2994 struct btrfs_block_group_cache *cache; 2995 struct btrfs_fs_info *fs_info = root->fs_info; 2996 2997 while (num > 0) { 2998 cache = btrfs_lookup_block_group(fs_info, bytenr); 2999 BUG_ON(!cache); 3000 len = min(num, cache->key.offset - 3001 (bytenr - cache->key.objectid)); 3002 3003 spin_lock(&cache->space_info->lock); 3004 spin_lock(&cache->lock); 3005 if (reserve) { 3006 cache->reserved += len; 3007 cache->space_info->bytes_reserved += len; 3008 } else { 3009 cache->reserved -= len; 3010 cache->space_info->bytes_reserved -= len; 3011 } 3012 spin_unlock(&cache->lock); 3013 spin_unlock(&cache->space_info->lock); 3014 btrfs_put_block_group(cache); 3015 bytenr += len; 3016 num -= len; 3017 } 3018 return 0; 3019 } 3020 3021 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 3022 { 3023 u64 last = 0; 3024 u64 start; 3025 u64 end; 3026 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; 3027 int ret; 3028 3029 while (1) { 3030 ret = find_first_extent_bit(pinned_extents, last, 3031 &start, &end, EXTENT_DIRTY); 3032 if (ret) 3033 break; 3034 set_extent_dirty(copy, start, end, GFP_NOFS); 3035 last = end + 1; 3036 } 3037 return 0; 3038 } 3039 3040 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 3041 struct btrfs_root *root, 3042 struct extent_io_tree *unpin) 3043 { 3044 u64 start; 3045 u64 end; 3046 int ret; 3047 3048 while (1) { 3049 ret = find_first_extent_bit(unpin, 0, &start, &end, 3050 EXTENT_DIRTY); 3051 if (ret) 3052 break; 3053 3054 ret = btrfs_discard_extent(root, start, end + 1 - start); 3055 3056 /* unlocks the pinned mutex */ 3057 btrfs_update_pinned_extents(root, start, end + 1 - start, 0); 3058 clear_extent_dirty(unpin, start, end, GFP_NOFS); 3059 3060 cond_resched(); 3061 } 3062 return ret; 3063 } 3064 3065 static int pin_down_bytes(struct btrfs_trans_handle *trans, 3066 struct btrfs_root *root, 3067 struct btrfs_path *path, 3068 u64 bytenr, u64 num_bytes, int is_data, 3069 struct extent_buffer **must_clean) 3070 { 3071 int err = 0; 3072 struct extent_buffer *buf; 3073 3074 if (is_data) 3075 goto pinit; 3076 3077 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 3078 if (!buf) 3079 goto pinit; 3080 3081 /* we can reuse a block if it hasn't been written 3082 * and it is from this transaction. We can't 3083 * reuse anything from the tree log root because 3084 * it has tiny sub-transactions. 3085 */ 3086 if (btrfs_buffer_uptodate(buf, 0) && 3087 btrfs_try_tree_lock(buf)) { 3088 u64 header_owner = btrfs_header_owner(buf); 3089 u64 header_transid = btrfs_header_generation(buf); 3090 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 3091 header_transid == trans->transid && 3092 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 3093 *must_clean = buf; 3094 return 1; 3095 } 3096 btrfs_tree_unlock(buf); 3097 } 3098 free_extent_buffer(buf); 3099 pinit: 3100 btrfs_set_path_blocking(path); 3101 /* unlocks the pinned mutex */ 3102 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3103 3104 BUG_ON(err < 0); 3105 return 0; 3106 } 3107 3108 3109 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 3110 struct btrfs_root *root, 3111 u64 bytenr, u64 num_bytes, u64 parent, 3112 u64 root_objectid, u64 owner_objectid, 3113 u64 owner_offset, int refs_to_drop, 3114 struct btrfs_delayed_extent_op *extent_op) 3115 { 3116 struct btrfs_key key; 3117 struct btrfs_path *path; 3118 struct btrfs_fs_info *info = root->fs_info; 3119 struct btrfs_root *extent_root = info->extent_root; 3120 struct extent_buffer *leaf; 3121 struct btrfs_extent_item *ei; 3122 struct btrfs_extent_inline_ref *iref; 3123 int ret; 3124 int is_data; 3125 int extent_slot = 0; 3126 int found_extent = 0; 3127 int num_to_del = 1; 3128 u32 item_size; 3129 u64 refs; 3130 3131 path = btrfs_alloc_path(); 3132 if (!path) 3133 return -ENOMEM; 3134 3135 path->reada = 1; 3136 path->leave_spinning = 1; 3137 3138 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 3139 BUG_ON(!is_data && refs_to_drop != 1); 3140 3141 ret = lookup_extent_backref(trans, extent_root, path, &iref, 3142 bytenr, num_bytes, parent, 3143 root_objectid, owner_objectid, 3144 owner_offset); 3145 if (ret == 0) { 3146 extent_slot = path->slots[0]; 3147 while (extent_slot >= 0) { 3148 btrfs_item_key_to_cpu(path->nodes[0], &key, 3149 extent_slot); 3150 if (key.objectid != bytenr) 3151 break; 3152 if (key.type == BTRFS_EXTENT_ITEM_KEY && 3153 key.offset == num_bytes) { 3154 found_extent = 1; 3155 break; 3156 } 3157 if (path->slots[0] - extent_slot > 5) 3158 break; 3159 extent_slot--; 3160 } 3161 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3162 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 3163 if (found_extent && item_size < sizeof(*ei)) 3164 found_extent = 0; 3165 #endif 3166 if (!found_extent) { 3167 BUG_ON(iref); 3168 ret = remove_extent_backref(trans, extent_root, path, 3169 NULL, refs_to_drop, 3170 is_data); 3171 BUG_ON(ret); 3172 btrfs_release_path(extent_root, path); 3173 path->leave_spinning = 1; 3174 3175 key.objectid = bytenr; 3176 key.type = BTRFS_EXTENT_ITEM_KEY; 3177 key.offset = num_bytes; 3178 3179 ret = btrfs_search_slot(trans, extent_root, 3180 &key, path, -1, 1); 3181 if (ret) { 3182 printk(KERN_ERR "umm, got %d back from search" 3183 ", was looking for %llu\n", ret, 3184 (unsigned long long)bytenr); 3185 btrfs_print_leaf(extent_root, path->nodes[0]); 3186 } 3187 BUG_ON(ret); 3188 extent_slot = path->slots[0]; 3189 } 3190 } else { 3191 btrfs_print_leaf(extent_root, path->nodes[0]); 3192 WARN_ON(1); 3193 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 3194 "parent %llu root %llu owner %llu offset %llu\n", 3195 (unsigned long long)bytenr, 3196 (unsigned long long)parent, 3197 (unsigned long long)root_objectid, 3198 (unsigned long long)owner_objectid, 3199 (unsigned long long)owner_offset); 3200 } 3201 3202 leaf = path->nodes[0]; 3203 item_size = btrfs_item_size_nr(leaf, extent_slot); 3204 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3205 if (item_size < sizeof(*ei)) { 3206 BUG_ON(found_extent || extent_slot != path->slots[0]); 3207 ret = convert_extent_item_v0(trans, extent_root, path, 3208 owner_objectid, 0); 3209 BUG_ON(ret < 0); 3210 3211 btrfs_release_path(extent_root, path); 3212 path->leave_spinning = 1; 3213 3214 key.objectid = bytenr; 3215 key.type = BTRFS_EXTENT_ITEM_KEY; 3216 key.offset = num_bytes; 3217 3218 ret = btrfs_search_slot(trans, extent_root, &key, path, 3219 -1, 1); 3220 if (ret) { 3221 printk(KERN_ERR "umm, got %d back from search" 3222 ", was looking for %llu\n", ret, 3223 (unsigned long long)bytenr); 3224 btrfs_print_leaf(extent_root, path->nodes[0]); 3225 } 3226 BUG_ON(ret); 3227 extent_slot = path->slots[0]; 3228 leaf = path->nodes[0]; 3229 item_size = btrfs_item_size_nr(leaf, extent_slot); 3230 } 3231 #endif 3232 BUG_ON(item_size < sizeof(*ei)); 3233 ei = btrfs_item_ptr(leaf, extent_slot, 3234 struct btrfs_extent_item); 3235 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 3236 struct btrfs_tree_block_info *bi; 3237 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 3238 bi = (struct btrfs_tree_block_info *)(ei + 1); 3239 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 3240 } 3241 3242 refs = btrfs_extent_refs(leaf, ei); 3243 BUG_ON(refs < refs_to_drop); 3244 refs -= refs_to_drop; 3245 3246 if (refs > 0) { 3247 if (extent_op) 3248 __run_delayed_extent_op(extent_op, leaf, ei); 3249 /* 3250 * In the case of inline back ref, reference count will 3251 * be updated by remove_extent_backref 3252 */ 3253 if (iref) { 3254 BUG_ON(!found_extent); 3255 } else { 3256 btrfs_set_extent_refs(leaf, ei, refs); 3257 btrfs_mark_buffer_dirty(leaf); 3258 } 3259 if (found_extent) { 3260 ret = remove_extent_backref(trans, extent_root, path, 3261 iref, refs_to_drop, 3262 is_data); 3263 BUG_ON(ret); 3264 } 3265 } else { 3266 int mark_free = 0; 3267 struct extent_buffer *must_clean = NULL; 3268 3269 if (found_extent) { 3270 BUG_ON(is_data && refs_to_drop != 3271 extent_data_ref_count(root, path, iref)); 3272 if (iref) { 3273 BUG_ON(path->slots[0] != extent_slot); 3274 } else { 3275 BUG_ON(path->slots[0] != extent_slot + 1); 3276 path->slots[0] = extent_slot; 3277 num_to_del = 2; 3278 } 3279 } 3280 3281 ret = pin_down_bytes(trans, root, path, bytenr, 3282 num_bytes, is_data, &must_clean); 3283 if (ret > 0) 3284 mark_free = 1; 3285 BUG_ON(ret < 0); 3286 /* 3287 * it is going to be very rare for someone to be waiting 3288 * on the block we're freeing. del_items might need to 3289 * schedule, so rather than get fancy, just force it 3290 * to blocking here 3291 */ 3292 if (must_clean) 3293 btrfs_set_lock_blocking(must_clean); 3294 3295 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 3296 num_to_del); 3297 BUG_ON(ret); 3298 btrfs_release_path(extent_root, path); 3299 3300 if (must_clean) { 3301 clean_tree_block(NULL, root, must_clean); 3302 btrfs_tree_unlock(must_clean); 3303 free_extent_buffer(must_clean); 3304 } 3305 3306 if (is_data) { 3307 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 3308 BUG_ON(ret); 3309 } else { 3310 invalidate_mapping_pages(info->btree_inode->i_mapping, 3311 bytenr >> PAGE_CACHE_SHIFT, 3312 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 3313 } 3314 3315 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 3316 mark_free); 3317 BUG_ON(ret); 3318 } 3319 btrfs_free_path(path); 3320 return ret; 3321 } 3322 3323 /* 3324 * when we free an extent, it is possible (and likely) that we free the last 3325 * delayed ref for that extent as well. This searches the delayed ref tree for 3326 * a given extent, and if there are no other delayed refs to be processed, it 3327 * removes it from the tree. 3328 */ 3329 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 3330 struct btrfs_root *root, u64 bytenr) 3331 { 3332 struct btrfs_delayed_ref_head *head; 3333 struct btrfs_delayed_ref_root *delayed_refs; 3334 struct btrfs_delayed_ref_node *ref; 3335 struct rb_node *node; 3336 int ret; 3337 3338 delayed_refs = &trans->transaction->delayed_refs; 3339 spin_lock(&delayed_refs->lock); 3340 head = btrfs_find_delayed_ref_head(trans, bytenr); 3341 if (!head) 3342 goto out; 3343 3344 node = rb_prev(&head->node.rb_node); 3345 if (!node) 3346 goto out; 3347 3348 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 3349 3350 /* there are still entries for this ref, we can't drop it */ 3351 if (ref->bytenr == bytenr) 3352 goto out; 3353 3354 if (head->extent_op) { 3355 if (!head->must_insert_reserved) 3356 goto out; 3357 kfree(head->extent_op); 3358 head->extent_op = NULL; 3359 } 3360 3361 /* 3362 * waiting for the lock here would deadlock. If someone else has it 3363 * locked they are already in the process of dropping it anyway 3364 */ 3365 if (!mutex_trylock(&head->mutex)) 3366 goto out; 3367 3368 /* 3369 * at this point we have a head with no other entries. Go 3370 * ahead and process it. 3371 */ 3372 head->node.in_tree = 0; 3373 rb_erase(&head->node.rb_node, &delayed_refs->root); 3374 3375 delayed_refs->num_entries--; 3376 3377 /* 3378 * we don't take a ref on the node because we're removing it from the 3379 * tree, so we just steal the ref the tree was holding. 3380 */ 3381 delayed_refs->num_heads--; 3382 if (list_empty(&head->cluster)) 3383 delayed_refs->num_heads_ready--; 3384 3385 list_del_init(&head->cluster); 3386 spin_unlock(&delayed_refs->lock); 3387 3388 ret = run_one_delayed_ref(trans, root->fs_info->tree_root, 3389 &head->node, head->extent_op, 3390 head->must_insert_reserved); 3391 BUG_ON(ret); 3392 btrfs_put_delayed_ref(&head->node); 3393 return 0; 3394 out: 3395 spin_unlock(&delayed_refs->lock); 3396 return 0; 3397 } 3398 3399 int btrfs_free_extent(struct btrfs_trans_handle *trans, 3400 struct btrfs_root *root, 3401 u64 bytenr, u64 num_bytes, u64 parent, 3402 u64 root_objectid, u64 owner, u64 offset) 3403 { 3404 int ret; 3405 3406 /* 3407 * tree log blocks never actually go into the extent allocation 3408 * tree, just update pinning info and exit early. 3409 */ 3410 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 3411 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 3412 /* unlocks the pinned mutex */ 3413 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3414 update_reserved_extents(root, bytenr, num_bytes, 0); 3415 ret = 0; 3416 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 3417 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 3418 parent, root_objectid, (int)owner, 3419 BTRFS_DROP_DELAYED_REF, NULL); 3420 BUG_ON(ret); 3421 ret = check_ref_cleanup(trans, root, bytenr); 3422 BUG_ON(ret); 3423 } else { 3424 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 3425 parent, root_objectid, owner, 3426 offset, BTRFS_DROP_DELAYED_REF, NULL); 3427 BUG_ON(ret); 3428 } 3429 return ret; 3430 } 3431 3432 static u64 stripe_align(struct btrfs_root *root, u64 val) 3433 { 3434 u64 mask = ((u64)root->stripesize - 1); 3435 u64 ret = (val + mask) & ~mask; 3436 return ret; 3437 } 3438 3439 /* 3440 * walks the btree of allocated extents and find a hole of a given size. 3441 * The key ins is changed to record the hole: 3442 * ins->objectid == block start 3443 * ins->flags = BTRFS_EXTENT_ITEM_KEY 3444 * ins->offset == number of blocks 3445 * Any available blocks before search_start are skipped. 3446 */ 3447 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 3448 struct btrfs_root *orig_root, 3449 u64 num_bytes, u64 empty_size, 3450 u64 search_start, u64 search_end, 3451 u64 hint_byte, struct btrfs_key *ins, 3452 u64 exclude_start, u64 exclude_nr, 3453 int data) 3454 { 3455 int ret = 0; 3456 struct btrfs_root *root = orig_root->fs_info->extent_root; 3457 struct btrfs_free_cluster *last_ptr = NULL; 3458 struct btrfs_block_group_cache *block_group = NULL; 3459 int empty_cluster = 2 * 1024 * 1024; 3460 int allowed_chunk_alloc = 0; 3461 struct btrfs_space_info *space_info; 3462 int last_ptr_loop = 0; 3463 int loop = 0; 3464 3465 WARN_ON(num_bytes < root->sectorsize); 3466 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 3467 ins->objectid = 0; 3468 ins->offset = 0; 3469 3470 space_info = __find_space_info(root->fs_info, data); 3471 3472 if (orig_root->ref_cows || empty_size) 3473 allowed_chunk_alloc = 1; 3474 3475 if (data & BTRFS_BLOCK_GROUP_METADATA) { 3476 last_ptr = &root->fs_info->meta_alloc_cluster; 3477 if (!btrfs_test_opt(root, SSD)) 3478 empty_cluster = 64 * 1024; 3479 } 3480 3481 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) { 3482 last_ptr = &root->fs_info->data_alloc_cluster; 3483 } 3484 3485 if (last_ptr) { 3486 spin_lock(&last_ptr->lock); 3487 if (last_ptr->block_group) 3488 hint_byte = last_ptr->window_start; 3489 spin_unlock(&last_ptr->lock); 3490 } 3491 3492 search_start = max(search_start, first_logical_byte(root, 0)); 3493 search_start = max(search_start, hint_byte); 3494 3495 if (!last_ptr) { 3496 empty_cluster = 0; 3497 loop = 1; 3498 } 3499 3500 if (search_start == hint_byte) { 3501 block_group = btrfs_lookup_block_group(root->fs_info, 3502 search_start); 3503 if (block_group && block_group_bits(block_group, data)) { 3504 down_read(&space_info->groups_sem); 3505 if (list_empty(&block_group->list) || 3506 block_group->ro) { 3507 /* 3508 * someone is removing this block group, 3509 * we can't jump into the have_block_group 3510 * target because our list pointers are not 3511 * valid 3512 */ 3513 btrfs_put_block_group(block_group); 3514 up_read(&space_info->groups_sem); 3515 } else 3516 goto have_block_group; 3517 } else if (block_group) { 3518 btrfs_put_block_group(block_group); 3519 } 3520 } 3521 3522 search: 3523 down_read(&space_info->groups_sem); 3524 list_for_each_entry(block_group, &space_info->block_groups, list) { 3525 u64 offset; 3526 3527 atomic_inc(&block_group->count); 3528 search_start = block_group->key.objectid; 3529 3530 have_block_group: 3531 if (unlikely(!block_group->cached)) { 3532 mutex_lock(&block_group->cache_mutex); 3533 ret = cache_block_group(root, block_group); 3534 mutex_unlock(&block_group->cache_mutex); 3535 if (ret) { 3536 btrfs_put_block_group(block_group); 3537 break; 3538 } 3539 } 3540 3541 if (unlikely(block_group->ro)) 3542 goto loop; 3543 3544 if (last_ptr) { 3545 /* 3546 * the refill lock keeps out other 3547 * people trying to start a new cluster 3548 */ 3549 spin_lock(&last_ptr->refill_lock); 3550 if (last_ptr->block_group && 3551 (last_ptr->block_group->ro || 3552 !block_group_bits(last_ptr->block_group, data))) { 3553 offset = 0; 3554 goto refill_cluster; 3555 } 3556 3557 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 3558 num_bytes, search_start); 3559 if (offset) { 3560 /* we have a block, we're done */ 3561 spin_unlock(&last_ptr->refill_lock); 3562 goto checks; 3563 } 3564 3565 spin_lock(&last_ptr->lock); 3566 /* 3567 * whoops, this cluster doesn't actually point to 3568 * this block group. Get a ref on the block 3569 * group is does point to and try again 3570 */ 3571 if (!last_ptr_loop && last_ptr->block_group && 3572 last_ptr->block_group != block_group) { 3573 3574 btrfs_put_block_group(block_group); 3575 block_group = last_ptr->block_group; 3576 atomic_inc(&block_group->count); 3577 spin_unlock(&last_ptr->lock); 3578 spin_unlock(&last_ptr->refill_lock); 3579 3580 last_ptr_loop = 1; 3581 search_start = block_group->key.objectid; 3582 /* 3583 * we know this block group is properly 3584 * in the list because 3585 * btrfs_remove_block_group, drops the 3586 * cluster before it removes the block 3587 * group from the list 3588 */ 3589 goto have_block_group; 3590 } 3591 spin_unlock(&last_ptr->lock); 3592 refill_cluster: 3593 /* 3594 * this cluster didn't work out, free it and 3595 * start over 3596 */ 3597 btrfs_return_cluster_to_free_space(NULL, last_ptr); 3598 3599 last_ptr_loop = 0; 3600 3601 /* allocate a cluster in this block group */ 3602 ret = btrfs_find_space_cluster(trans, root, 3603 block_group, last_ptr, 3604 offset, num_bytes, 3605 empty_cluster + empty_size); 3606 if (ret == 0) { 3607 /* 3608 * now pull our allocation out of this 3609 * cluster 3610 */ 3611 offset = btrfs_alloc_from_cluster(block_group, 3612 last_ptr, num_bytes, 3613 search_start); 3614 if (offset) { 3615 /* we found one, proceed */ 3616 spin_unlock(&last_ptr->refill_lock); 3617 goto checks; 3618 } 3619 } 3620 /* 3621 * at this point we either didn't find a cluster 3622 * or we weren't able to allocate a block from our 3623 * cluster. Free the cluster we've been trying 3624 * to use, and go to the next block group 3625 */ 3626 if (loop < 2) { 3627 btrfs_return_cluster_to_free_space(NULL, 3628 last_ptr); 3629 spin_unlock(&last_ptr->refill_lock); 3630 goto loop; 3631 } 3632 spin_unlock(&last_ptr->refill_lock); 3633 } 3634 3635 offset = btrfs_find_space_for_alloc(block_group, search_start, 3636 num_bytes, empty_size); 3637 if (!offset) 3638 goto loop; 3639 checks: 3640 search_start = stripe_align(root, offset); 3641 3642 /* move on to the next group */ 3643 if (search_start + num_bytes >= search_end) { 3644 btrfs_add_free_space(block_group, offset, num_bytes); 3645 goto loop; 3646 } 3647 3648 /* move on to the next group */ 3649 if (search_start + num_bytes > 3650 block_group->key.objectid + block_group->key.offset) { 3651 btrfs_add_free_space(block_group, offset, num_bytes); 3652 goto loop; 3653 } 3654 3655 if (exclude_nr > 0 && 3656 (search_start + num_bytes > exclude_start && 3657 search_start < exclude_start + exclude_nr)) { 3658 search_start = exclude_start + exclude_nr; 3659 3660 btrfs_add_free_space(block_group, offset, num_bytes); 3661 /* 3662 * if search_start is still in this block group 3663 * then we just re-search this block group 3664 */ 3665 if (search_start >= block_group->key.objectid && 3666 search_start < (block_group->key.objectid + 3667 block_group->key.offset)) 3668 goto have_block_group; 3669 goto loop; 3670 } 3671 3672 ins->objectid = search_start; 3673 ins->offset = num_bytes; 3674 3675 if (offset < search_start) 3676 btrfs_add_free_space(block_group, offset, 3677 search_start - offset); 3678 BUG_ON(offset > search_start); 3679 3680 /* we are all good, lets return */ 3681 break; 3682 loop: 3683 btrfs_put_block_group(block_group); 3684 } 3685 up_read(&space_info->groups_sem); 3686 3687 /* loop == 0, try to find a clustered alloc in every block group 3688 * loop == 1, try again after forcing a chunk allocation 3689 * loop == 2, set empty_size and empty_cluster to 0 and try again 3690 */ 3691 if (!ins->objectid && loop < 3 && 3692 (empty_size || empty_cluster || allowed_chunk_alloc)) { 3693 if (loop >= 2) { 3694 empty_size = 0; 3695 empty_cluster = 0; 3696 } 3697 3698 if (allowed_chunk_alloc) { 3699 ret = do_chunk_alloc(trans, root, num_bytes + 3700 2 * 1024 * 1024, data, 1); 3701 allowed_chunk_alloc = 0; 3702 } else { 3703 space_info->force_alloc = 1; 3704 } 3705 3706 if (loop < 3) { 3707 loop++; 3708 goto search; 3709 } 3710 ret = -ENOSPC; 3711 } else if (!ins->objectid) { 3712 ret = -ENOSPC; 3713 } 3714 3715 /* we found what we needed */ 3716 if (ins->objectid) { 3717 if (!(data & BTRFS_BLOCK_GROUP_DATA)) 3718 trans->block_group = block_group->key.objectid; 3719 3720 btrfs_put_block_group(block_group); 3721 ret = 0; 3722 } 3723 3724 return ret; 3725 } 3726 3727 static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 3728 { 3729 struct btrfs_block_group_cache *cache; 3730 3731 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 3732 (unsigned long long)(info->total_bytes - info->bytes_used - 3733 info->bytes_pinned - info->bytes_reserved), 3734 (info->full) ? "" : "not "); 3735 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 3736 " may_use=%llu, used=%llu\n", 3737 (unsigned long long)info->total_bytes, 3738 (unsigned long long)info->bytes_pinned, 3739 (unsigned long long)info->bytes_delalloc, 3740 (unsigned long long)info->bytes_may_use, 3741 (unsigned long long)info->bytes_used); 3742 3743 down_read(&info->groups_sem); 3744 list_for_each_entry(cache, &info->block_groups, list) { 3745 spin_lock(&cache->lock); 3746 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 3747 "%llu pinned %llu reserved\n", 3748 (unsigned long long)cache->key.objectid, 3749 (unsigned long long)cache->key.offset, 3750 (unsigned long long)btrfs_block_group_used(&cache->item), 3751 (unsigned long long)cache->pinned, 3752 (unsigned long long)cache->reserved); 3753 btrfs_dump_free_space(cache, bytes); 3754 spin_unlock(&cache->lock); 3755 } 3756 up_read(&info->groups_sem); 3757 } 3758 3759 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3760 struct btrfs_root *root, 3761 u64 num_bytes, u64 min_alloc_size, 3762 u64 empty_size, u64 hint_byte, 3763 u64 search_end, struct btrfs_key *ins, 3764 u64 data) 3765 { 3766 int ret; 3767 u64 search_start = 0; 3768 struct btrfs_fs_info *info = root->fs_info; 3769 3770 data = btrfs_get_alloc_profile(root, data); 3771 again: 3772 /* 3773 * the only place that sets empty_size is btrfs_realloc_node, which 3774 * is not called recursively on allocations 3775 */ 3776 if (empty_size || root->ref_cows) { 3777 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { 3778 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3779 2 * 1024 * 1024, 3780 BTRFS_BLOCK_GROUP_METADATA | 3781 (info->metadata_alloc_profile & 3782 info->avail_metadata_alloc_bits), 0); 3783 } 3784 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3785 num_bytes + 2 * 1024 * 1024, data, 0); 3786 } 3787 3788 WARN_ON(num_bytes < root->sectorsize); 3789 ret = find_free_extent(trans, root, num_bytes, empty_size, 3790 search_start, search_end, hint_byte, ins, 3791 trans->alloc_exclude_start, 3792 trans->alloc_exclude_nr, data); 3793 3794 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 3795 num_bytes = num_bytes >> 1; 3796 num_bytes = num_bytes & ~(root->sectorsize - 1); 3797 num_bytes = max(num_bytes, min_alloc_size); 3798 do_chunk_alloc(trans, root->fs_info->extent_root, 3799 num_bytes, data, 1); 3800 goto again; 3801 } 3802 if (ret) { 3803 struct btrfs_space_info *sinfo; 3804 3805 sinfo = __find_space_info(root->fs_info, data); 3806 printk(KERN_ERR "btrfs allocation failed flags %llu, " 3807 "wanted %llu\n", (unsigned long long)data, 3808 (unsigned long long)num_bytes); 3809 dump_space_info(sinfo, num_bytes); 3810 BUG(); 3811 } 3812 3813 return ret; 3814 } 3815 3816 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 3817 { 3818 struct btrfs_block_group_cache *cache; 3819 int ret = 0; 3820 3821 cache = btrfs_lookup_block_group(root->fs_info, start); 3822 if (!cache) { 3823 printk(KERN_ERR "Unable to find block group for %llu\n", 3824 (unsigned long long)start); 3825 return -ENOSPC; 3826 } 3827 3828 ret = btrfs_discard_extent(root, start, len); 3829 3830 btrfs_add_free_space(cache, start, len); 3831 btrfs_put_block_group(cache); 3832 update_reserved_extents(root, start, len, 0); 3833 3834 return ret; 3835 } 3836 3837 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3838 struct btrfs_root *root, 3839 u64 num_bytes, u64 min_alloc_size, 3840 u64 empty_size, u64 hint_byte, 3841 u64 search_end, struct btrfs_key *ins, 3842 u64 data) 3843 { 3844 int ret; 3845 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 3846 empty_size, hint_byte, search_end, ins, 3847 data); 3848 update_reserved_extents(root, ins->objectid, ins->offset, 1); 3849 return ret; 3850 } 3851 3852 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 3853 struct btrfs_root *root, 3854 u64 parent, u64 root_objectid, 3855 u64 flags, u64 owner, u64 offset, 3856 struct btrfs_key *ins, int ref_mod) 3857 { 3858 int ret; 3859 struct btrfs_fs_info *fs_info = root->fs_info; 3860 struct btrfs_extent_item *extent_item; 3861 struct btrfs_extent_inline_ref *iref; 3862 struct btrfs_path *path; 3863 struct extent_buffer *leaf; 3864 int type; 3865 u32 size; 3866 3867 if (parent > 0) 3868 type = BTRFS_SHARED_DATA_REF_KEY; 3869 else 3870 type = BTRFS_EXTENT_DATA_REF_KEY; 3871 3872 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 3873 3874 path = btrfs_alloc_path(); 3875 BUG_ON(!path); 3876 3877 path->leave_spinning = 1; 3878 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 3879 ins, size); 3880 BUG_ON(ret); 3881 3882 leaf = path->nodes[0]; 3883 extent_item = btrfs_item_ptr(leaf, path->slots[0], 3884 struct btrfs_extent_item); 3885 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 3886 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 3887 btrfs_set_extent_flags(leaf, extent_item, 3888 flags | BTRFS_EXTENT_FLAG_DATA); 3889 3890 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 3891 btrfs_set_extent_inline_ref_type(leaf, iref, type); 3892 if (parent > 0) { 3893 struct btrfs_shared_data_ref *ref; 3894 ref = (struct btrfs_shared_data_ref *)(iref + 1); 3895 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 3896 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 3897 } else { 3898 struct btrfs_extent_data_ref *ref; 3899 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 3900 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 3901 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 3902 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 3903 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 3904 } 3905 3906 btrfs_mark_buffer_dirty(path->nodes[0]); 3907 btrfs_free_path(path); 3908 3909 ret = update_block_group(trans, root, ins->objectid, ins->offset, 3910 1, 0); 3911 if (ret) { 3912 printk(KERN_ERR "btrfs update block group failed for %llu " 3913 "%llu\n", (unsigned long long)ins->objectid, 3914 (unsigned long long)ins->offset); 3915 BUG(); 3916 } 3917 return ret; 3918 } 3919 3920 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 3921 struct btrfs_root *root, 3922 u64 parent, u64 root_objectid, 3923 u64 flags, struct btrfs_disk_key *key, 3924 int level, struct btrfs_key *ins) 3925 { 3926 int ret; 3927 struct btrfs_fs_info *fs_info = root->fs_info; 3928 struct btrfs_extent_item *extent_item; 3929 struct btrfs_tree_block_info *block_info; 3930 struct btrfs_extent_inline_ref *iref; 3931 struct btrfs_path *path; 3932 struct extent_buffer *leaf; 3933 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 3934 3935 path = btrfs_alloc_path(); 3936 BUG_ON(!path); 3937 3938 path->leave_spinning = 1; 3939 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 3940 ins, size); 3941 BUG_ON(ret); 3942 3943 leaf = path->nodes[0]; 3944 extent_item = btrfs_item_ptr(leaf, path->slots[0], 3945 struct btrfs_extent_item); 3946 btrfs_set_extent_refs(leaf, extent_item, 1); 3947 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 3948 btrfs_set_extent_flags(leaf, extent_item, 3949 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 3950 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 3951 3952 btrfs_set_tree_block_key(leaf, block_info, key); 3953 btrfs_set_tree_block_level(leaf, block_info, level); 3954 3955 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 3956 if (parent > 0) { 3957 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 3958 btrfs_set_extent_inline_ref_type(leaf, iref, 3959 BTRFS_SHARED_BLOCK_REF_KEY); 3960 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 3961 } else { 3962 btrfs_set_extent_inline_ref_type(leaf, iref, 3963 BTRFS_TREE_BLOCK_REF_KEY); 3964 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 3965 } 3966 3967 btrfs_mark_buffer_dirty(leaf); 3968 btrfs_free_path(path); 3969 3970 ret = update_block_group(trans, root, ins->objectid, ins->offset, 3971 1, 0); 3972 if (ret) { 3973 printk(KERN_ERR "btrfs update block group failed for %llu " 3974 "%llu\n", (unsigned long long)ins->objectid, 3975 (unsigned long long)ins->offset); 3976 BUG(); 3977 } 3978 return ret; 3979 } 3980 3981 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 3982 struct btrfs_root *root, 3983 u64 root_objectid, u64 owner, 3984 u64 offset, struct btrfs_key *ins) 3985 { 3986 int ret; 3987 3988 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); 3989 3990 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 3991 0, root_objectid, owner, offset, 3992 BTRFS_ADD_DELAYED_EXTENT, NULL); 3993 return ret; 3994 } 3995 3996 /* 3997 * this is used by the tree logging recovery code. It records that 3998 * an extent has been allocated and makes sure to clear the free 3999 * space cache bits as well 4000 */ 4001 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 4002 struct btrfs_root *root, 4003 u64 root_objectid, u64 owner, u64 offset, 4004 struct btrfs_key *ins) 4005 { 4006 int ret; 4007 struct btrfs_block_group_cache *block_group; 4008 4009 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 4010 mutex_lock(&block_group->cache_mutex); 4011 cache_block_group(root, block_group); 4012 mutex_unlock(&block_group->cache_mutex); 4013 4014 ret = btrfs_remove_free_space(block_group, ins->objectid, 4015 ins->offset); 4016 BUG_ON(ret); 4017 btrfs_put_block_group(block_group); 4018 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 4019 0, owner, offset, ins, 1); 4020 return ret; 4021 } 4022 4023 /* 4024 * finds a free extent and does all the dirty work required for allocation 4025 * returns the key for the extent through ins, and a tree buffer for 4026 * the first block of the extent through buf. 4027 * 4028 * returns 0 if everything worked, non-zero otherwise. 4029 */ 4030 static int alloc_tree_block(struct btrfs_trans_handle *trans, 4031 struct btrfs_root *root, 4032 u64 num_bytes, u64 parent, u64 root_objectid, 4033 struct btrfs_disk_key *key, int level, 4034 u64 empty_size, u64 hint_byte, u64 search_end, 4035 struct btrfs_key *ins) 4036 { 4037 int ret; 4038 u64 flags = 0; 4039 4040 ret = __btrfs_reserve_extent(trans, root, num_bytes, num_bytes, 4041 empty_size, hint_byte, search_end, 4042 ins, 0); 4043 BUG_ON(ret); 4044 4045 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 4046 if (parent == 0) 4047 parent = ins->objectid; 4048 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 4049 } else 4050 BUG_ON(parent > 0); 4051 4052 update_reserved_extents(root, ins->objectid, ins->offset, 1); 4053 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 4054 struct btrfs_delayed_extent_op *extent_op; 4055 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 4056 BUG_ON(!extent_op); 4057 if (key) 4058 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 4059 else 4060 memset(&extent_op->key, 0, sizeof(extent_op->key)); 4061 extent_op->flags_to_set = flags; 4062 extent_op->update_key = 1; 4063 extent_op->update_flags = 1; 4064 extent_op->is_data = 0; 4065 4066 ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, 4067 ins->offset, parent, root_objectid, 4068 level, BTRFS_ADD_DELAYED_EXTENT, 4069 extent_op); 4070 BUG_ON(ret); 4071 } 4072 return ret; 4073 } 4074 4075 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 4076 struct btrfs_root *root, 4077 u64 bytenr, u32 blocksize, 4078 int level) 4079 { 4080 struct extent_buffer *buf; 4081 4082 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 4083 if (!buf) 4084 return ERR_PTR(-ENOMEM); 4085 btrfs_set_header_generation(buf, trans->transid); 4086 btrfs_set_buffer_lockdep_class(buf, level); 4087 btrfs_tree_lock(buf); 4088 clean_tree_block(trans, root, buf); 4089 4090 btrfs_set_lock_blocking(buf); 4091 btrfs_set_buffer_uptodate(buf); 4092 4093 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 4094 set_extent_dirty(&root->dirty_log_pages, buf->start, 4095 buf->start + buf->len - 1, GFP_NOFS); 4096 } else { 4097 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 4098 buf->start + buf->len - 1, GFP_NOFS); 4099 } 4100 trans->blocks_used++; 4101 /* this returns a buffer locked for blocking */ 4102 return buf; 4103 } 4104 4105 /* 4106 * helper function to allocate a block for a given tree 4107 * returns the tree buffer or NULL. 4108 */ 4109 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 4110 struct btrfs_root *root, u32 blocksize, 4111 u64 parent, u64 root_objectid, 4112 struct btrfs_disk_key *key, int level, 4113 u64 hint, u64 empty_size) 4114 { 4115 struct btrfs_key ins; 4116 int ret; 4117 struct extent_buffer *buf; 4118 4119 ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, 4120 key, level, empty_size, hint, (u64)-1, &ins); 4121 if (ret) { 4122 BUG_ON(ret > 0); 4123 return ERR_PTR(ret); 4124 } 4125 4126 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 4127 blocksize, level); 4128 return buf; 4129 } 4130 4131 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 4132 struct btrfs_root *root, struct extent_buffer *leaf) 4133 { 4134 u64 disk_bytenr; 4135 u64 num_bytes; 4136 struct btrfs_key key; 4137 struct btrfs_file_extent_item *fi; 4138 u32 nritems; 4139 int i; 4140 int ret; 4141 4142 BUG_ON(!btrfs_is_leaf(leaf)); 4143 nritems = btrfs_header_nritems(leaf); 4144 4145 for (i = 0; i < nritems; i++) { 4146 cond_resched(); 4147 btrfs_item_key_to_cpu(leaf, &key, i); 4148 4149 /* only extents have references, skip everything else */ 4150 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 4151 continue; 4152 4153 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 4154 4155 /* inline extents live in the btree, they don't have refs */ 4156 if (btrfs_file_extent_type(leaf, fi) == 4157 BTRFS_FILE_EXTENT_INLINE) 4158 continue; 4159 4160 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 4161 4162 /* holes don't have refs */ 4163 if (disk_bytenr == 0) 4164 continue; 4165 4166 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 4167 ret = btrfs_free_extent(trans, root, disk_bytenr, num_bytes, 4168 leaf->start, 0, key.objectid, 0); 4169 BUG_ON(ret); 4170 } 4171 return 0; 4172 } 4173 4174 #if 0 4175 4176 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 4177 struct btrfs_root *root, 4178 struct btrfs_leaf_ref *ref) 4179 { 4180 int i; 4181 int ret; 4182 struct btrfs_extent_info *info; 4183 struct refsort *sorted; 4184 4185 if (ref->nritems == 0) 4186 return 0; 4187 4188 sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); 4189 for (i = 0; i < ref->nritems; i++) { 4190 sorted[i].bytenr = ref->extents[i].bytenr; 4191 sorted[i].slot = i; 4192 } 4193 sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); 4194 4195 /* 4196 * the items in the ref were sorted when the ref was inserted 4197 * into the ref cache, so this is already in order 4198 */ 4199 for (i = 0; i < ref->nritems; i++) { 4200 info = ref->extents + sorted[i].slot; 4201 ret = btrfs_free_extent(trans, root, info->bytenr, 4202 info->num_bytes, ref->bytenr, 4203 ref->owner, ref->generation, 4204 info->objectid, 0); 4205 4206 atomic_inc(&root->fs_info->throttle_gen); 4207 wake_up(&root->fs_info->transaction_throttle); 4208 cond_resched(); 4209 4210 BUG_ON(ret); 4211 info++; 4212 } 4213 4214 kfree(sorted); 4215 return 0; 4216 } 4217 4218 4219 static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, 4220 struct btrfs_root *root, u64 start, 4221 u64 len, u32 *refs) 4222 { 4223 int ret; 4224 4225 ret = btrfs_lookup_extent_refs(trans, root, start, len, refs); 4226 BUG_ON(ret); 4227 4228 #if 0 /* some debugging code in case we see problems here */ 4229 /* if the refs count is one, it won't get increased again. But 4230 * if the ref count is > 1, someone may be decreasing it at 4231 * the same time we are. 4232 */ 4233 if (*refs != 1) { 4234 struct extent_buffer *eb = NULL; 4235 eb = btrfs_find_create_tree_block(root, start, len); 4236 if (eb) 4237 btrfs_tree_lock(eb); 4238 4239 mutex_lock(&root->fs_info->alloc_mutex); 4240 ret = lookup_extent_ref(NULL, root, start, len, refs); 4241 BUG_ON(ret); 4242 mutex_unlock(&root->fs_info->alloc_mutex); 4243 4244 if (eb) { 4245 btrfs_tree_unlock(eb); 4246 free_extent_buffer(eb); 4247 } 4248 if (*refs == 1) { 4249 printk(KERN_ERR "btrfs block %llu went down to one " 4250 "during drop_snap\n", (unsigned long long)start); 4251 } 4252 4253 } 4254 #endif 4255 4256 cond_resched(); 4257 return ret; 4258 } 4259 4260 4261 /* 4262 * this is used while deleting old snapshots, and it drops the refs 4263 * on a whole subtree starting from a level 1 node. 4264 * 4265 * The idea is to sort all the leaf pointers, and then drop the 4266 * ref on all the leaves in order. Most of the time the leaves 4267 * will have ref cache entries, so no leaf IOs will be required to 4268 * find the extents they have references on. 4269 * 4270 * For each leaf, any references it has are also dropped in order 4271 * 4272 * This ends up dropping the references in something close to optimal 4273 * order for reading and modifying the extent allocation tree. 4274 */ 4275 static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, 4276 struct btrfs_root *root, 4277 struct btrfs_path *path) 4278 { 4279 u64 bytenr; 4280 u64 root_owner; 4281 u64 root_gen; 4282 struct extent_buffer *eb = path->nodes[1]; 4283 struct extent_buffer *leaf; 4284 struct btrfs_leaf_ref *ref; 4285 struct refsort *sorted = NULL; 4286 int nritems = btrfs_header_nritems(eb); 4287 int ret; 4288 int i; 4289 int refi = 0; 4290 int slot = path->slots[1]; 4291 u32 blocksize = btrfs_level_size(root, 0); 4292 u32 refs; 4293 4294 if (nritems == 0) 4295 goto out; 4296 4297 root_owner = btrfs_header_owner(eb); 4298 root_gen = btrfs_header_generation(eb); 4299 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); 4300 4301 /* 4302 * step one, sort all the leaf pointers so we don't scribble 4303 * randomly into the extent allocation tree 4304 */ 4305 for (i = slot; i < nritems; i++) { 4306 sorted[refi].bytenr = btrfs_node_blockptr(eb, i); 4307 sorted[refi].slot = i; 4308 refi++; 4309 } 4310 4311 /* 4312 * nritems won't be zero, but if we're picking up drop_snapshot 4313 * after a crash, slot might be > 0, so double check things 4314 * just in case. 4315 */ 4316 if (refi == 0) 4317 goto out; 4318 4319 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 4320 4321 /* 4322 * the first loop frees everything the leaves point to 4323 */ 4324 for (i = 0; i < refi; i++) { 4325 u64 ptr_gen; 4326 4327 bytenr = sorted[i].bytenr; 4328 4329 /* 4330 * check the reference count on this leaf. If it is > 1 4331 * we just decrement it below and don't update any 4332 * of the refs the leaf points to. 4333 */ 4334 ret = drop_snap_lookup_refcount(trans, root, bytenr, 4335 blocksize, &refs); 4336 BUG_ON(ret); 4337 if (refs != 1) 4338 continue; 4339 4340 ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); 4341 4342 /* 4343 * the leaf only had one reference, which means the 4344 * only thing pointing to this leaf is the snapshot 4345 * we're deleting. It isn't possible for the reference 4346 * count to increase again later 4347 * 4348 * The reference cache is checked for the leaf, 4349 * and if found we'll be able to drop any refs held by 4350 * the leaf without needing to read it in. 4351 */ 4352 ref = btrfs_lookup_leaf_ref(root, bytenr); 4353 if (ref && ref->generation != ptr_gen) { 4354 btrfs_free_leaf_ref(root, ref); 4355 ref = NULL; 4356 } 4357 if (ref) { 4358 ret = cache_drop_leaf_ref(trans, root, ref); 4359 BUG_ON(ret); 4360 btrfs_remove_leaf_ref(root, ref); 4361 btrfs_free_leaf_ref(root, ref); 4362 } else { 4363 /* 4364 * the leaf wasn't in the reference cache, so 4365 * we have to read it. 4366 */ 4367 leaf = read_tree_block(root, bytenr, blocksize, 4368 ptr_gen); 4369 ret = btrfs_drop_leaf_ref(trans, root, leaf); 4370 BUG_ON(ret); 4371 free_extent_buffer(leaf); 4372 } 4373 atomic_inc(&root->fs_info->throttle_gen); 4374 wake_up(&root->fs_info->transaction_throttle); 4375 cond_resched(); 4376 } 4377 4378 /* 4379 * run through the loop again to free the refs on the leaves. 4380 * This is faster than doing it in the loop above because 4381 * the leaves are likely to be clustered together. We end up 4382 * working in nice chunks on the extent allocation tree. 4383 */ 4384 for (i = 0; i < refi; i++) { 4385 bytenr = sorted[i].bytenr; 4386 ret = btrfs_free_extent(trans, root, bytenr, 4387 blocksize, eb->start, 4388 root_owner, root_gen, 0, 1); 4389 BUG_ON(ret); 4390 4391 atomic_inc(&root->fs_info->throttle_gen); 4392 wake_up(&root->fs_info->transaction_throttle); 4393 cond_resched(); 4394 } 4395 out: 4396 kfree(sorted); 4397 4398 /* 4399 * update the path to show we've processed the entire level 1 4400 * node. This will get saved into the root's drop_snapshot_progress 4401 * field so these drops are not repeated again if this transaction 4402 * commits. 4403 */ 4404 path->slots[1] = nritems; 4405 return 0; 4406 } 4407 4408 /* 4409 * helper function for drop_snapshot, this walks down the tree dropping ref 4410 * counts as it goes. 4411 */ 4412 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4413 struct btrfs_root *root, 4414 struct btrfs_path *path, int *level) 4415 { 4416 u64 root_owner; 4417 u64 root_gen; 4418 u64 bytenr; 4419 u64 ptr_gen; 4420 struct extent_buffer *next; 4421 struct extent_buffer *cur; 4422 struct extent_buffer *parent; 4423 u32 blocksize; 4424 int ret; 4425 u32 refs; 4426 4427 WARN_ON(*level < 0); 4428 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4429 ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, 4430 path->nodes[*level]->len, &refs); 4431 BUG_ON(ret); 4432 if (refs > 1) 4433 goto out; 4434 4435 /* 4436 * walk down to the last node level and free all the leaves 4437 */ 4438 while (*level >= 0) { 4439 WARN_ON(*level < 0); 4440 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4441 cur = path->nodes[*level]; 4442 4443 if (btrfs_header_level(cur) != *level) 4444 WARN_ON(1); 4445 4446 if (path->slots[*level] >= 4447 btrfs_header_nritems(cur)) 4448 break; 4449 4450 /* the new code goes down to level 1 and does all the 4451 * leaves pointed to that node in bulk. So, this check 4452 * for level 0 will always be false. 4453 * 4454 * But, the disk format allows the drop_snapshot_progress 4455 * field in the root to leave things in a state where 4456 * a leaf will need cleaning up here. If someone crashes 4457 * with the old code and then boots with the new code, 4458 * we might find a leaf here. 4459 */ 4460 if (*level == 0) { 4461 ret = btrfs_drop_leaf_ref(trans, root, cur); 4462 BUG_ON(ret); 4463 break; 4464 } 4465 4466 /* 4467 * once we get to level one, process the whole node 4468 * at once, including everything below it. 4469 */ 4470 if (*level == 1) { 4471 ret = drop_level_one_refs(trans, root, path); 4472 BUG_ON(ret); 4473 break; 4474 } 4475 4476 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 4477 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 4478 blocksize = btrfs_level_size(root, *level - 1); 4479 4480 ret = drop_snap_lookup_refcount(trans, root, bytenr, 4481 blocksize, &refs); 4482 BUG_ON(ret); 4483 4484 /* 4485 * if there is more than one reference, we don't need 4486 * to read that node to drop any references it has. We 4487 * just drop the ref we hold on that node and move on to the 4488 * next slot in this level. 4489 */ 4490 if (refs != 1) { 4491 parent = path->nodes[*level]; 4492 root_owner = btrfs_header_owner(parent); 4493 root_gen = btrfs_header_generation(parent); 4494 path->slots[*level]++; 4495 4496 ret = btrfs_free_extent(trans, root, bytenr, 4497 blocksize, parent->start, 4498 root_owner, root_gen, 4499 *level - 1, 1); 4500 BUG_ON(ret); 4501 4502 atomic_inc(&root->fs_info->throttle_gen); 4503 wake_up(&root->fs_info->transaction_throttle); 4504 cond_resched(); 4505 4506 continue; 4507 } 4508 4509 /* 4510 * we need to keep freeing things in the next level down. 4511 * read the block and loop around to process it 4512 */ 4513 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4514 WARN_ON(*level <= 0); 4515 if (path->nodes[*level-1]) 4516 free_extent_buffer(path->nodes[*level-1]); 4517 path->nodes[*level-1] = next; 4518 *level = btrfs_header_level(next); 4519 path->slots[*level] = 0; 4520 cond_resched(); 4521 } 4522 out: 4523 WARN_ON(*level < 0); 4524 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4525 4526 if (path->nodes[*level] == root->node) { 4527 parent = path->nodes[*level]; 4528 bytenr = path->nodes[*level]->start; 4529 } else { 4530 parent = path->nodes[*level + 1]; 4531 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); 4532 } 4533 4534 blocksize = btrfs_level_size(root, *level); 4535 root_owner = btrfs_header_owner(parent); 4536 root_gen = btrfs_header_generation(parent); 4537 4538 /* 4539 * cleanup and free the reference on the last node 4540 * we processed 4541 */ 4542 ret = btrfs_free_extent(trans, root, bytenr, blocksize, 4543 parent->start, root_owner, root_gen, 4544 *level, 1); 4545 free_extent_buffer(path->nodes[*level]); 4546 path->nodes[*level] = NULL; 4547 4548 *level += 1; 4549 BUG_ON(ret); 4550 4551 cond_resched(); 4552 return 0; 4553 } 4554 #endif 4555 4556 /* 4557 * helper function for drop_subtree, this function is similar to 4558 * walk_down_tree. The main difference is that it checks reference 4559 * counts while tree blocks are locked. 4560 */ 4561 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4562 struct btrfs_root *root, 4563 struct btrfs_path *path, int *level) 4564 { 4565 struct extent_buffer *next; 4566 struct extent_buffer *cur; 4567 struct extent_buffer *parent; 4568 u64 bytenr; 4569 u64 ptr_gen; 4570 u64 refs; 4571 u64 flags; 4572 u32 blocksize; 4573 int ret; 4574 4575 cur = path->nodes[*level]; 4576 ret = btrfs_lookup_extent_info(trans, root, cur->start, cur->len, 4577 &refs, &flags); 4578 BUG_ON(ret); 4579 if (refs > 1) 4580 goto out; 4581 4582 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4583 4584 while (*level >= 0) { 4585 cur = path->nodes[*level]; 4586 if (*level == 0) { 4587 ret = btrfs_drop_leaf_ref(trans, root, cur); 4588 BUG_ON(ret); 4589 clean_tree_block(trans, root, cur); 4590 break; 4591 } 4592 if (path->slots[*level] >= btrfs_header_nritems(cur)) { 4593 clean_tree_block(trans, root, cur); 4594 break; 4595 } 4596 4597 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 4598 blocksize = btrfs_level_size(root, *level - 1); 4599 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 4600 4601 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4602 btrfs_tree_lock(next); 4603 btrfs_set_lock_blocking(next); 4604 4605 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 4606 &refs, &flags); 4607 BUG_ON(ret); 4608 if (refs > 1) { 4609 parent = path->nodes[*level]; 4610 ret = btrfs_free_extent(trans, root, bytenr, 4611 blocksize, parent->start, 4612 btrfs_header_owner(parent), 4613 *level - 1, 0); 4614 BUG_ON(ret); 4615 path->slots[*level]++; 4616 btrfs_tree_unlock(next); 4617 free_extent_buffer(next); 4618 continue; 4619 } 4620 4621 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 4622 4623 *level = btrfs_header_level(next); 4624 path->nodes[*level] = next; 4625 path->slots[*level] = 0; 4626 path->locks[*level] = 1; 4627 cond_resched(); 4628 } 4629 out: 4630 if (path->nodes[*level] == root->node) 4631 parent = path->nodes[*level]; 4632 else 4633 parent = path->nodes[*level + 1]; 4634 bytenr = path->nodes[*level]->start; 4635 blocksize = path->nodes[*level]->len; 4636 4637 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent->start, 4638 btrfs_header_owner(parent), *level, 0); 4639 BUG_ON(ret); 4640 4641 if (path->locks[*level]) { 4642 btrfs_tree_unlock(path->nodes[*level]); 4643 path->locks[*level] = 0; 4644 } 4645 free_extent_buffer(path->nodes[*level]); 4646 path->nodes[*level] = NULL; 4647 *level += 1; 4648 cond_resched(); 4649 return 0; 4650 } 4651 4652 /* 4653 * helper for dropping snapshots. This walks back up the tree in the path 4654 * to find the first node higher up where we haven't yet gone through 4655 * all the slots 4656 */ 4657 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 4658 struct btrfs_root *root, 4659 struct btrfs_path *path, 4660 int *level, int max_level) 4661 { 4662 struct btrfs_root_item *root_item = &root->root_item; 4663 int i; 4664 int slot; 4665 int ret; 4666 4667 for (i = *level; i < max_level && path->nodes[i]; i++) { 4668 slot = path->slots[i]; 4669 if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { 4670 /* 4671 * there is more work to do in this level. 4672 * Update the drop_progress marker to reflect 4673 * the work we've done so far, and then bump 4674 * the slot number 4675 */ 4676 path->slots[i]++; 4677 WARN_ON(*level == 0); 4678 if (max_level == BTRFS_MAX_LEVEL) { 4679 btrfs_node_key(path->nodes[i], 4680 &root_item->drop_progress, 4681 path->slots[i]); 4682 root_item->drop_level = i; 4683 } 4684 *level = i; 4685 return 0; 4686 } else { 4687 struct extent_buffer *parent; 4688 4689 /* 4690 * this whole node is done, free our reference 4691 * on it and go up one level 4692 */ 4693 if (path->nodes[*level] == root->node) 4694 parent = path->nodes[*level]; 4695 else 4696 parent = path->nodes[*level + 1]; 4697 4698 clean_tree_block(trans, root, path->nodes[i]); 4699 ret = btrfs_free_extent(trans, root, 4700 path->nodes[i]->start, 4701 path->nodes[i]->len, 4702 parent->start, 4703 btrfs_header_owner(parent), 4704 *level, 0); 4705 BUG_ON(ret); 4706 if (path->locks[*level]) { 4707 btrfs_tree_unlock(path->nodes[i]); 4708 path->locks[i] = 0; 4709 } 4710 free_extent_buffer(path->nodes[i]); 4711 path->nodes[i] = NULL; 4712 *level = i + 1; 4713 } 4714 } 4715 return 1; 4716 } 4717 4718 /* 4719 * drop the reference count on the tree rooted at 'snap'. This traverses 4720 * the tree freeing any blocks that have a ref count of zero after being 4721 * decremented. 4722 */ 4723 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 4724 *root) 4725 { 4726 int ret = 0; 4727 int wret; 4728 int level; 4729 struct btrfs_path *path; 4730 int update_count; 4731 struct btrfs_root_item *root_item = &root->root_item; 4732 4733 path = btrfs_alloc_path(); 4734 BUG_ON(!path); 4735 4736 level = btrfs_header_level(root->node); 4737 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 4738 path->nodes[level] = btrfs_lock_root_node(root); 4739 btrfs_set_lock_blocking(path->nodes[level]); 4740 path->slots[level] = 0; 4741 path->locks[level] = 1; 4742 } else { 4743 struct btrfs_key key; 4744 struct btrfs_disk_key found_key; 4745 struct extent_buffer *node; 4746 4747 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 4748 level = root_item->drop_level; 4749 path->lowest_level = level; 4750 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4751 if (wret < 0) { 4752 ret = wret; 4753 goto out; 4754 } 4755 node = path->nodes[level]; 4756 btrfs_node_key(node, &found_key, path->slots[level]); 4757 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 4758 sizeof(found_key))); 4759 /* 4760 * unlock our path, this is safe because only this 4761 * function is allowed to delete this snapshot 4762 */ 4763 btrfs_unlock_up_safe(path, 0); 4764 } 4765 while (1) { 4766 unsigned long update; 4767 wret = walk_down_tree(trans, root, path, &level); 4768 if (wret > 0) 4769 break; 4770 if (wret < 0) 4771 ret = wret; 4772 4773 wret = walk_up_tree(trans, root, path, &level, 4774 BTRFS_MAX_LEVEL); 4775 if (wret > 0) 4776 break; 4777 if (wret < 0) 4778 ret = wret; 4779 if (trans->transaction->in_commit || 4780 trans->transaction->delayed_refs.flushing) { 4781 ret = -EAGAIN; 4782 break; 4783 } 4784 for (update_count = 0; update_count < 16; update_count++) { 4785 update = trans->delayed_ref_updates; 4786 trans->delayed_ref_updates = 0; 4787 if (update) 4788 btrfs_run_delayed_refs(trans, root, update); 4789 else 4790 break; 4791 } 4792 } 4793 out: 4794 btrfs_free_path(path); 4795 return ret; 4796 } 4797 4798 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 4799 struct btrfs_root *root, 4800 struct extent_buffer *node, 4801 struct extent_buffer *parent) 4802 { 4803 struct btrfs_path *path; 4804 int level; 4805 int parent_level; 4806 int ret = 0; 4807 int wret; 4808 4809 path = btrfs_alloc_path(); 4810 BUG_ON(!path); 4811 4812 btrfs_assert_tree_locked(parent); 4813 parent_level = btrfs_header_level(parent); 4814 extent_buffer_get(parent); 4815 path->nodes[parent_level] = parent; 4816 path->slots[parent_level] = btrfs_header_nritems(parent); 4817 4818 btrfs_assert_tree_locked(node); 4819 level = btrfs_header_level(node); 4820 extent_buffer_get(node); 4821 path->nodes[level] = node; 4822 path->slots[level] = 0; 4823 4824 while (1) { 4825 wret = walk_down_tree(trans, root, path, &level); 4826 if (wret < 0) 4827 ret = wret; 4828 if (wret != 0) 4829 break; 4830 4831 wret = walk_up_tree(trans, root, path, &level, parent_level); 4832 if (wret < 0) 4833 ret = wret; 4834 if (wret != 0) 4835 break; 4836 } 4837 4838 btrfs_free_path(path); 4839 return ret; 4840 } 4841 4842 #if 0 4843 static unsigned long calc_ra(unsigned long start, unsigned long last, 4844 unsigned long nr) 4845 { 4846 return min(last, start + nr - 1); 4847 } 4848 4849 static noinline int relocate_inode_pages(struct inode *inode, u64 start, 4850 u64 len) 4851 { 4852 u64 page_start; 4853 u64 page_end; 4854 unsigned long first_index; 4855 unsigned long last_index; 4856 unsigned long i; 4857 struct page *page; 4858 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 4859 struct file_ra_state *ra; 4860 struct btrfs_ordered_extent *ordered; 4861 unsigned int total_read = 0; 4862 unsigned int total_dirty = 0; 4863 int ret = 0; 4864 4865 ra = kzalloc(sizeof(*ra), GFP_NOFS); 4866 4867 mutex_lock(&inode->i_mutex); 4868 first_index = start >> PAGE_CACHE_SHIFT; 4869 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 4870 4871 /* make sure the dirty trick played by the caller work */ 4872 ret = invalidate_inode_pages2_range(inode->i_mapping, 4873 first_index, last_index); 4874 if (ret) 4875 goto out_unlock; 4876 4877 file_ra_state_init(ra, inode->i_mapping); 4878 4879 for (i = first_index ; i <= last_index; i++) { 4880 if (total_read % ra->ra_pages == 0) { 4881 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 4882 calc_ra(i, last_index, ra->ra_pages)); 4883 } 4884 total_read++; 4885 again: 4886 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 4887 BUG_ON(1); 4888 page = grab_cache_page(inode->i_mapping, i); 4889 if (!page) { 4890 ret = -ENOMEM; 4891 goto out_unlock; 4892 } 4893 if (!PageUptodate(page)) { 4894 btrfs_readpage(NULL, page); 4895 lock_page(page); 4896 if (!PageUptodate(page)) { 4897 unlock_page(page); 4898 page_cache_release(page); 4899 ret = -EIO; 4900 goto out_unlock; 4901 } 4902 } 4903 wait_on_page_writeback(page); 4904 4905 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 4906 page_end = page_start + PAGE_CACHE_SIZE - 1; 4907 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 4908 4909 ordered = btrfs_lookup_ordered_extent(inode, page_start); 4910 if (ordered) { 4911 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4912 unlock_page(page); 4913 page_cache_release(page); 4914 btrfs_start_ordered_extent(inode, ordered, 1); 4915 btrfs_put_ordered_extent(ordered); 4916 goto again; 4917 } 4918 set_page_extent_mapped(page); 4919 4920 if (i == first_index) 4921 set_extent_bits(io_tree, page_start, page_end, 4922 EXTENT_BOUNDARY, GFP_NOFS); 4923 btrfs_set_extent_delalloc(inode, page_start, page_end); 4924 4925 set_page_dirty(page); 4926 total_dirty++; 4927 4928 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4929 unlock_page(page); 4930 page_cache_release(page); 4931 } 4932 4933 out_unlock: 4934 kfree(ra); 4935 mutex_unlock(&inode->i_mutex); 4936 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 4937 return ret; 4938 } 4939 4940 static noinline int relocate_data_extent(struct inode *reloc_inode, 4941 struct btrfs_key *extent_key, 4942 u64 offset) 4943 { 4944 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 4945 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; 4946 struct extent_map *em; 4947 u64 start = extent_key->objectid - offset; 4948 u64 end = start + extent_key->offset - 1; 4949 4950 em = alloc_extent_map(GFP_NOFS); 4951 BUG_ON(!em || IS_ERR(em)); 4952 4953 em->start = start; 4954 em->len = extent_key->offset; 4955 em->block_len = extent_key->offset; 4956 em->block_start = extent_key->objectid; 4957 em->bdev = root->fs_info->fs_devices->latest_bdev; 4958 set_bit(EXTENT_FLAG_PINNED, &em->flags); 4959 4960 /* setup extent map to cheat btrfs_readpage */ 4961 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 4962 while (1) { 4963 int ret; 4964 spin_lock(&em_tree->lock); 4965 ret = add_extent_mapping(em_tree, em); 4966 spin_unlock(&em_tree->lock); 4967 if (ret != -EEXIST) { 4968 free_extent_map(em); 4969 break; 4970 } 4971 btrfs_drop_extent_cache(reloc_inode, start, end, 0); 4972 } 4973 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 4974 4975 return relocate_inode_pages(reloc_inode, start, extent_key->offset); 4976 } 4977 4978 struct btrfs_ref_path { 4979 u64 extent_start; 4980 u64 nodes[BTRFS_MAX_LEVEL]; 4981 u64 root_objectid; 4982 u64 root_generation; 4983 u64 owner_objectid; 4984 u32 num_refs; 4985 int lowest_level; 4986 int current_level; 4987 int shared_level; 4988 4989 struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; 4990 u64 new_nodes[BTRFS_MAX_LEVEL]; 4991 }; 4992 4993 struct disk_extent { 4994 u64 ram_bytes; 4995 u64 disk_bytenr; 4996 u64 disk_num_bytes; 4997 u64 offset; 4998 u64 num_bytes; 4999 u8 compression; 5000 u8 encryption; 5001 u16 other_encoding; 5002 }; 5003 5004 static int is_cowonly_root(u64 root_objectid) 5005 { 5006 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 5007 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 5008 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 5009 root_objectid == BTRFS_DEV_TREE_OBJECTID || 5010 root_objectid == BTRFS_TREE_LOG_OBJECTID || 5011 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 5012 return 1; 5013 return 0; 5014 } 5015 5016 static noinline int __next_ref_path(struct btrfs_trans_handle *trans, 5017 struct btrfs_root *extent_root, 5018 struct btrfs_ref_path *ref_path, 5019 int first_time) 5020 { 5021 struct extent_buffer *leaf; 5022 struct btrfs_path *path; 5023 struct btrfs_extent_ref *ref; 5024 struct btrfs_key key; 5025 struct btrfs_key found_key; 5026 u64 bytenr; 5027 u32 nritems; 5028 int level; 5029 int ret = 1; 5030 5031 path = btrfs_alloc_path(); 5032 if (!path) 5033 return -ENOMEM; 5034 5035 if (first_time) { 5036 ref_path->lowest_level = -1; 5037 ref_path->current_level = -1; 5038 ref_path->shared_level = -1; 5039 goto walk_up; 5040 } 5041 walk_down: 5042 level = ref_path->current_level - 1; 5043 while (level >= -1) { 5044 u64 parent; 5045 if (level < ref_path->lowest_level) 5046 break; 5047 5048 if (level >= 0) 5049 bytenr = ref_path->nodes[level]; 5050 else 5051 bytenr = ref_path->extent_start; 5052 BUG_ON(bytenr == 0); 5053 5054 parent = ref_path->nodes[level + 1]; 5055 ref_path->nodes[level + 1] = 0; 5056 ref_path->current_level = level; 5057 BUG_ON(parent == 0); 5058 5059 key.objectid = bytenr; 5060 key.offset = parent + 1; 5061 key.type = BTRFS_EXTENT_REF_KEY; 5062 5063 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5064 if (ret < 0) 5065 goto out; 5066 BUG_ON(ret == 0); 5067 5068 leaf = path->nodes[0]; 5069 nritems = btrfs_header_nritems(leaf); 5070 if (path->slots[0] >= nritems) { 5071 ret = btrfs_next_leaf(extent_root, path); 5072 if (ret < 0) 5073 goto out; 5074 if (ret > 0) 5075 goto next; 5076 leaf = path->nodes[0]; 5077 } 5078 5079 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5080 if (found_key.objectid == bytenr && 5081 found_key.type == BTRFS_EXTENT_REF_KEY) { 5082 if (level < ref_path->shared_level) 5083 ref_path->shared_level = level; 5084 goto found; 5085 } 5086 next: 5087 level--; 5088 btrfs_release_path(extent_root, path); 5089 cond_resched(); 5090 } 5091 /* reached lowest level */ 5092 ret = 1; 5093 goto out; 5094 walk_up: 5095 level = ref_path->current_level; 5096 while (level < BTRFS_MAX_LEVEL - 1) { 5097 u64 ref_objectid; 5098 5099 if (level >= 0) 5100 bytenr = ref_path->nodes[level]; 5101 else 5102 bytenr = ref_path->extent_start; 5103 5104 BUG_ON(bytenr == 0); 5105 5106 key.objectid = bytenr; 5107 key.offset = 0; 5108 key.type = BTRFS_EXTENT_REF_KEY; 5109 5110 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 5111 if (ret < 0) 5112 goto out; 5113 5114 leaf = path->nodes[0]; 5115 nritems = btrfs_header_nritems(leaf); 5116 if (path->slots[0] >= nritems) { 5117 ret = btrfs_next_leaf(extent_root, path); 5118 if (ret < 0) 5119 goto out; 5120 if (ret > 0) { 5121 /* the extent was freed by someone */ 5122 if (ref_path->lowest_level == level) 5123 goto out; 5124 btrfs_release_path(extent_root, path); 5125 goto walk_down; 5126 } 5127 leaf = path->nodes[0]; 5128 } 5129 5130 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5131 if (found_key.objectid != bytenr || 5132 found_key.type != BTRFS_EXTENT_REF_KEY) { 5133 /* the extent was freed by someone */ 5134 if (ref_path->lowest_level == level) { 5135 ret = 1; 5136 goto out; 5137 } 5138 btrfs_release_path(extent_root, path); 5139 goto walk_down; 5140 } 5141 found: 5142 ref = btrfs_item_ptr(leaf, path->slots[0], 5143 struct btrfs_extent_ref); 5144 ref_objectid = btrfs_ref_objectid(leaf, ref); 5145 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { 5146 if (first_time) { 5147 level = (int)ref_objectid; 5148 BUG_ON(level >= BTRFS_MAX_LEVEL); 5149 ref_path->lowest_level = level; 5150 ref_path->current_level = level; 5151 ref_path->nodes[level] = bytenr; 5152 } else { 5153 WARN_ON(ref_objectid != level); 5154 } 5155 } else { 5156 WARN_ON(level != -1); 5157 } 5158 first_time = 0; 5159 5160 if (ref_path->lowest_level == level) { 5161 ref_path->owner_objectid = ref_objectid; 5162 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); 5163 } 5164 5165 /* 5166 * the block is tree root or the block isn't in reference 5167 * counted tree. 5168 */ 5169 if (found_key.objectid == found_key.offset || 5170 is_cowonly_root(btrfs_ref_root(leaf, ref))) { 5171 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 5172 ref_path->root_generation = 5173 btrfs_ref_generation(leaf, ref); 5174 if (level < 0) { 5175 /* special reference from the tree log */ 5176 ref_path->nodes[0] = found_key.offset; 5177 ref_path->current_level = 0; 5178 } 5179 ret = 0; 5180 goto out; 5181 } 5182 5183 level++; 5184 BUG_ON(ref_path->nodes[level] != 0); 5185 ref_path->nodes[level] = found_key.offset; 5186 ref_path->current_level = level; 5187 5188 /* 5189 * the reference was created in the running transaction, 5190 * no need to continue walking up. 5191 */ 5192 if (btrfs_ref_generation(leaf, ref) == trans->transid) { 5193 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 5194 ref_path->root_generation = 5195 btrfs_ref_generation(leaf, ref); 5196 ret = 0; 5197 goto out; 5198 } 5199 5200 btrfs_release_path(extent_root, path); 5201 cond_resched(); 5202 } 5203 /* reached max tree level, but no tree root found. */ 5204 BUG(); 5205 out: 5206 btrfs_free_path(path); 5207 return ret; 5208 } 5209 5210 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, 5211 struct btrfs_root *extent_root, 5212 struct btrfs_ref_path *ref_path, 5213 u64 extent_start) 5214 { 5215 memset(ref_path, 0, sizeof(*ref_path)); 5216 ref_path->extent_start = extent_start; 5217 5218 return __next_ref_path(trans, extent_root, ref_path, 1); 5219 } 5220 5221 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, 5222 struct btrfs_root *extent_root, 5223 struct btrfs_ref_path *ref_path) 5224 { 5225 return __next_ref_path(trans, extent_root, ref_path, 0); 5226 } 5227 5228 static noinline int get_new_locations(struct inode *reloc_inode, 5229 struct btrfs_key *extent_key, 5230 u64 offset, int no_fragment, 5231 struct disk_extent **extents, 5232 int *nr_extents) 5233 { 5234 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 5235 struct btrfs_path *path; 5236 struct btrfs_file_extent_item *fi; 5237 struct extent_buffer *leaf; 5238 struct disk_extent *exts = *extents; 5239 struct btrfs_key found_key; 5240 u64 cur_pos; 5241 u64 last_byte; 5242 u32 nritems; 5243 int nr = 0; 5244 int max = *nr_extents; 5245 int ret; 5246 5247 WARN_ON(!no_fragment && *extents); 5248 if (!exts) { 5249 max = 1; 5250 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); 5251 if (!exts) 5252 return -ENOMEM; 5253 } 5254 5255 path = btrfs_alloc_path(); 5256 BUG_ON(!path); 5257 5258 cur_pos = extent_key->objectid - offset; 5259 last_byte = extent_key->objectid + extent_key->offset; 5260 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 5261 cur_pos, 0); 5262 if (ret < 0) 5263 goto out; 5264 if (ret > 0) { 5265 ret = -ENOENT; 5266 goto out; 5267 } 5268 5269 while (1) { 5270 leaf = path->nodes[0]; 5271 nritems = btrfs_header_nritems(leaf); 5272 if (path->slots[0] >= nritems) { 5273 ret = btrfs_next_leaf(root, path); 5274 if (ret < 0) 5275 goto out; 5276 if (ret > 0) 5277 break; 5278 leaf = path->nodes[0]; 5279 } 5280 5281 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 5282 if (found_key.offset != cur_pos || 5283 found_key.type != BTRFS_EXTENT_DATA_KEY || 5284 found_key.objectid != reloc_inode->i_ino) 5285 break; 5286 5287 fi = btrfs_item_ptr(leaf, path->slots[0], 5288 struct btrfs_file_extent_item); 5289 if (btrfs_file_extent_type(leaf, fi) != 5290 BTRFS_FILE_EXTENT_REG || 5291 btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 5292 break; 5293 5294 if (nr == max) { 5295 struct disk_extent *old = exts; 5296 max *= 2; 5297 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 5298 memcpy(exts, old, sizeof(*exts) * nr); 5299 if (old != *extents) 5300 kfree(old); 5301 } 5302 5303 exts[nr].disk_bytenr = 5304 btrfs_file_extent_disk_bytenr(leaf, fi); 5305 exts[nr].disk_num_bytes = 5306 btrfs_file_extent_disk_num_bytes(leaf, fi); 5307 exts[nr].offset = btrfs_file_extent_offset(leaf, fi); 5308 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5309 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 5310 exts[nr].compression = btrfs_file_extent_compression(leaf, fi); 5311 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); 5312 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, 5313 fi); 5314 BUG_ON(exts[nr].offset > 0); 5315 BUG_ON(exts[nr].compression || exts[nr].encryption); 5316 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); 5317 5318 cur_pos += exts[nr].num_bytes; 5319 nr++; 5320 5321 if (cur_pos + offset >= last_byte) 5322 break; 5323 5324 if (no_fragment) { 5325 ret = 1; 5326 goto out; 5327 } 5328 path->slots[0]++; 5329 } 5330 5331 BUG_ON(cur_pos + offset > last_byte); 5332 if (cur_pos + offset < last_byte) { 5333 ret = -ENOENT; 5334 goto out; 5335 } 5336 ret = 0; 5337 out: 5338 btrfs_free_path(path); 5339 if (ret) { 5340 if (exts != *extents) 5341 kfree(exts); 5342 } else { 5343 *extents = exts; 5344 *nr_extents = nr; 5345 } 5346 return ret; 5347 } 5348 5349 static noinline int replace_one_extent(struct btrfs_trans_handle *trans, 5350 struct btrfs_root *root, 5351 struct btrfs_path *path, 5352 struct btrfs_key *extent_key, 5353 struct btrfs_key *leaf_key, 5354 struct btrfs_ref_path *ref_path, 5355 struct disk_extent *new_extents, 5356 int nr_extents) 5357 { 5358 struct extent_buffer *leaf; 5359 struct btrfs_file_extent_item *fi; 5360 struct inode *inode = NULL; 5361 struct btrfs_key key; 5362 u64 lock_start = 0; 5363 u64 lock_end = 0; 5364 u64 num_bytes; 5365 u64 ext_offset; 5366 u64 search_end = (u64)-1; 5367 u32 nritems; 5368 int nr_scaned = 0; 5369 int extent_locked = 0; 5370 int extent_type; 5371 int ret; 5372 5373 memcpy(&key, leaf_key, sizeof(key)); 5374 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 5375 if (key.objectid < ref_path->owner_objectid || 5376 (key.objectid == ref_path->owner_objectid && 5377 key.type < BTRFS_EXTENT_DATA_KEY)) { 5378 key.objectid = ref_path->owner_objectid; 5379 key.type = BTRFS_EXTENT_DATA_KEY; 5380 key.offset = 0; 5381 } 5382 } 5383 5384 while (1) { 5385 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 5386 if (ret < 0) 5387 goto out; 5388 5389 leaf = path->nodes[0]; 5390 nritems = btrfs_header_nritems(leaf); 5391 next: 5392 if (extent_locked && ret > 0) { 5393 /* 5394 * the file extent item was modified by someone 5395 * before the extent got locked. 5396 */ 5397 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5398 lock_end, GFP_NOFS); 5399 extent_locked = 0; 5400 } 5401 5402 if (path->slots[0] >= nritems) { 5403 if (++nr_scaned > 2) 5404 break; 5405 5406 BUG_ON(extent_locked); 5407 ret = btrfs_next_leaf(root, path); 5408 if (ret < 0) 5409 goto out; 5410 if (ret > 0) 5411 break; 5412 leaf = path->nodes[0]; 5413 nritems = btrfs_header_nritems(leaf); 5414 } 5415 5416 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 5417 5418 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 5419 if ((key.objectid > ref_path->owner_objectid) || 5420 (key.objectid == ref_path->owner_objectid && 5421 key.type > BTRFS_EXTENT_DATA_KEY) || 5422 key.offset >= search_end) 5423 break; 5424 } 5425 5426 if (inode && key.objectid != inode->i_ino) { 5427 BUG_ON(extent_locked); 5428 btrfs_release_path(root, path); 5429 mutex_unlock(&inode->i_mutex); 5430 iput(inode); 5431 inode = NULL; 5432 continue; 5433 } 5434 5435 if (key.type != BTRFS_EXTENT_DATA_KEY) { 5436 path->slots[0]++; 5437 ret = 1; 5438 goto next; 5439 } 5440 fi = btrfs_item_ptr(leaf, path->slots[0], 5441 struct btrfs_file_extent_item); 5442 extent_type = btrfs_file_extent_type(leaf, fi); 5443 if ((extent_type != BTRFS_FILE_EXTENT_REG && 5444 extent_type != BTRFS_FILE_EXTENT_PREALLOC) || 5445 (btrfs_file_extent_disk_bytenr(leaf, fi) != 5446 extent_key->objectid)) { 5447 path->slots[0]++; 5448 ret = 1; 5449 goto next; 5450 } 5451 5452 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5453 ext_offset = btrfs_file_extent_offset(leaf, fi); 5454 5455 if (search_end == (u64)-1) { 5456 search_end = key.offset - ext_offset + 5457 btrfs_file_extent_ram_bytes(leaf, fi); 5458 } 5459 5460 if (!extent_locked) { 5461 lock_start = key.offset; 5462 lock_end = lock_start + num_bytes - 1; 5463 } else { 5464 if (lock_start > key.offset || 5465 lock_end + 1 < key.offset + num_bytes) { 5466 unlock_extent(&BTRFS_I(inode)->io_tree, 5467 lock_start, lock_end, GFP_NOFS); 5468 extent_locked = 0; 5469 } 5470 } 5471 5472 if (!inode) { 5473 btrfs_release_path(root, path); 5474 5475 inode = btrfs_iget_locked(root->fs_info->sb, 5476 key.objectid, root); 5477 if (inode->i_state & I_NEW) { 5478 BTRFS_I(inode)->root = root; 5479 BTRFS_I(inode)->location.objectid = 5480 key.objectid; 5481 BTRFS_I(inode)->location.type = 5482 BTRFS_INODE_ITEM_KEY; 5483 BTRFS_I(inode)->location.offset = 0; 5484 btrfs_read_locked_inode(inode); 5485 unlock_new_inode(inode); 5486 } 5487 /* 5488 * some code call btrfs_commit_transaction while 5489 * holding the i_mutex, so we can't use mutex_lock 5490 * here. 5491 */ 5492 if (is_bad_inode(inode) || 5493 !mutex_trylock(&inode->i_mutex)) { 5494 iput(inode); 5495 inode = NULL; 5496 key.offset = (u64)-1; 5497 goto skip; 5498 } 5499 } 5500 5501 if (!extent_locked) { 5502 struct btrfs_ordered_extent *ordered; 5503 5504 btrfs_release_path(root, path); 5505 5506 lock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5507 lock_end, GFP_NOFS); 5508 ordered = btrfs_lookup_first_ordered_extent(inode, 5509 lock_end); 5510 if (ordered && 5511 ordered->file_offset <= lock_end && 5512 ordered->file_offset + ordered->len > lock_start) { 5513 unlock_extent(&BTRFS_I(inode)->io_tree, 5514 lock_start, lock_end, GFP_NOFS); 5515 btrfs_start_ordered_extent(inode, ordered, 1); 5516 btrfs_put_ordered_extent(ordered); 5517 key.offset += num_bytes; 5518 goto skip; 5519 } 5520 if (ordered) 5521 btrfs_put_ordered_extent(ordered); 5522 5523 extent_locked = 1; 5524 continue; 5525 } 5526 5527 if (nr_extents == 1) { 5528 /* update extent pointer in place */ 5529 btrfs_set_file_extent_disk_bytenr(leaf, fi, 5530 new_extents[0].disk_bytenr); 5531 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 5532 new_extents[0].disk_num_bytes); 5533 btrfs_mark_buffer_dirty(leaf); 5534 5535 btrfs_drop_extent_cache(inode, key.offset, 5536 key.offset + num_bytes - 1, 0); 5537 5538 ret = btrfs_inc_extent_ref(trans, root, 5539 new_extents[0].disk_bytenr, 5540 new_extents[0].disk_num_bytes, 5541 leaf->start, 5542 root->root_key.objectid, 5543 trans->transid, 5544 key.objectid); 5545 BUG_ON(ret); 5546 5547 ret = btrfs_free_extent(trans, root, 5548 extent_key->objectid, 5549 extent_key->offset, 5550 leaf->start, 5551 btrfs_header_owner(leaf), 5552 btrfs_header_generation(leaf), 5553 key.objectid, 0); 5554 BUG_ON(ret); 5555 5556 btrfs_release_path(root, path); 5557 key.offset += num_bytes; 5558 } else { 5559 BUG_ON(1); 5560 #if 0 5561 u64 alloc_hint; 5562 u64 extent_len; 5563 int i; 5564 /* 5565 * drop old extent pointer at first, then insert the 5566 * new pointers one bye one 5567 */ 5568 btrfs_release_path(root, path); 5569 ret = btrfs_drop_extents(trans, root, inode, key.offset, 5570 key.offset + num_bytes, 5571 key.offset, &alloc_hint); 5572 BUG_ON(ret); 5573 5574 for (i = 0; i < nr_extents; i++) { 5575 if (ext_offset >= new_extents[i].num_bytes) { 5576 ext_offset -= new_extents[i].num_bytes; 5577 continue; 5578 } 5579 extent_len = min(new_extents[i].num_bytes - 5580 ext_offset, num_bytes); 5581 5582 ret = btrfs_insert_empty_item(trans, root, 5583 path, &key, 5584 sizeof(*fi)); 5585 BUG_ON(ret); 5586 5587 leaf = path->nodes[0]; 5588 fi = btrfs_item_ptr(leaf, path->slots[0], 5589 struct btrfs_file_extent_item); 5590 btrfs_set_file_extent_generation(leaf, fi, 5591 trans->transid); 5592 btrfs_set_file_extent_type(leaf, fi, 5593 BTRFS_FILE_EXTENT_REG); 5594 btrfs_set_file_extent_disk_bytenr(leaf, fi, 5595 new_extents[i].disk_bytenr); 5596 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 5597 new_extents[i].disk_num_bytes); 5598 btrfs_set_file_extent_ram_bytes(leaf, fi, 5599 new_extents[i].ram_bytes); 5600 5601 btrfs_set_file_extent_compression(leaf, fi, 5602 new_extents[i].compression); 5603 btrfs_set_file_extent_encryption(leaf, fi, 5604 new_extents[i].encryption); 5605 btrfs_set_file_extent_other_encoding(leaf, fi, 5606 new_extents[i].other_encoding); 5607 5608 btrfs_set_file_extent_num_bytes(leaf, fi, 5609 extent_len); 5610 ext_offset += new_extents[i].offset; 5611 btrfs_set_file_extent_offset(leaf, fi, 5612 ext_offset); 5613 btrfs_mark_buffer_dirty(leaf); 5614 5615 btrfs_drop_extent_cache(inode, key.offset, 5616 key.offset + extent_len - 1, 0); 5617 5618 ret = btrfs_inc_extent_ref(trans, root, 5619 new_extents[i].disk_bytenr, 5620 new_extents[i].disk_num_bytes, 5621 leaf->start, 5622 root->root_key.objectid, 5623 trans->transid, key.objectid); 5624 BUG_ON(ret); 5625 btrfs_release_path(root, path); 5626 5627 inode_add_bytes(inode, extent_len); 5628 5629 ext_offset = 0; 5630 num_bytes -= extent_len; 5631 key.offset += extent_len; 5632 5633 if (num_bytes == 0) 5634 break; 5635 } 5636 BUG_ON(i >= nr_extents); 5637 #endif 5638 } 5639 5640 if (extent_locked) { 5641 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5642 lock_end, GFP_NOFS); 5643 extent_locked = 0; 5644 } 5645 skip: 5646 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 5647 key.offset >= search_end) 5648 break; 5649 5650 cond_resched(); 5651 } 5652 ret = 0; 5653 out: 5654 btrfs_release_path(root, path); 5655 if (inode) { 5656 mutex_unlock(&inode->i_mutex); 5657 if (extent_locked) { 5658 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5659 lock_end, GFP_NOFS); 5660 } 5661 iput(inode); 5662 } 5663 return ret; 5664 } 5665 5666 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 5667 struct btrfs_root *root, 5668 struct extent_buffer *buf, u64 orig_start) 5669 { 5670 int level; 5671 int ret; 5672 5673 BUG_ON(btrfs_header_generation(buf) != trans->transid); 5674 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 5675 5676 level = btrfs_header_level(buf); 5677 if (level == 0) { 5678 struct btrfs_leaf_ref *ref; 5679 struct btrfs_leaf_ref *orig_ref; 5680 5681 orig_ref = btrfs_lookup_leaf_ref(root, orig_start); 5682 if (!orig_ref) 5683 return -ENOENT; 5684 5685 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); 5686 if (!ref) { 5687 btrfs_free_leaf_ref(root, orig_ref); 5688 return -ENOMEM; 5689 } 5690 5691 ref->nritems = orig_ref->nritems; 5692 memcpy(ref->extents, orig_ref->extents, 5693 sizeof(ref->extents[0]) * ref->nritems); 5694 5695 btrfs_free_leaf_ref(root, orig_ref); 5696 5697 ref->root_gen = trans->transid; 5698 ref->bytenr = buf->start; 5699 ref->owner = btrfs_header_owner(buf); 5700 ref->generation = btrfs_header_generation(buf); 5701 5702 ret = btrfs_add_leaf_ref(root, ref, 0); 5703 WARN_ON(ret); 5704 btrfs_free_leaf_ref(root, ref); 5705 } 5706 return 0; 5707 } 5708 5709 static noinline int invalidate_extent_cache(struct btrfs_root *root, 5710 struct extent_buffer *leaf, 5711 struct btrfs_block_group_cache *group, 5712 struct btrfs_root *target_root) 5713 { 5714 struct btrfs_key key; 5715 struct inode *inode = NULL; 5716 struct btrfs_file_extent_item *fi; 5717 u64 num_bytes; 5718 u64 skip_objectid = 0; 5719 u32 nritems; 5720 u32 i; 5721 5722 nritems = btrfs_header_nritems(leaf); 5723 for (i = 0; i < nritems; i++) { 5724 btrfs_item_key_to_cpu(leaf, &key, i); 5725 if (key.objectid == skip_objectid || 5726 key.type != BTRFS_EXTENT_DATA_KEY) 5727 continue; 5728 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 5729 if (btrfs_file_extent_type(leaf, fi) == 5730 BTRFS_FILE_EXTENT_INLINE) 5731 continue; 5732 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 5733 continue; 5734 if (!inode || inode->i_ino != key.objectid) { 5735 iput(inode); 5736 inode = btrfs_ilookup(target_root->fs_info->sb, 5737 key.objectid, target_root, 1); 5738 } 5739 if (!inode) { 5740 skip_objectid = key.objectid; 5741 continue; 5742 } 5743 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5744 5745 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 5746 key.offset + num_bytes - 1, GFP_NOFS); 5747 btrfs_drop_extent_cache(inode, key.offset, 5748 key.offset + num_bytes - 1, 1); 5749 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 5750 key.offset + num_bytes - 1, GFP_NOFS); 5751 cond_resched(); 5752 } 5753 iput(inode); 5754 return 0; 5755 } 5756 5757 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, 5758 struct btrfs_root *root, 5759 struct extent_buffer *leaf, 5760 struct btrfs_block_group_cache *group, 5761 struct inode *reloc_inode) 5762 { 5763 struct btrfs_key key; 5764 struct btrfs_key extent_key; 5765 struct btrfs_file_extent_item *fi; 5766 struct btrfs_leaf_ref *ref; 5767 struct disk_extent *new_extent; 5768 u64 bytenr; 5769 u64 num_bytes; 5770 u32 nritems; 5771 u32 i; 5772 int ext_index; 5773 int nr_extent; 5774 int ret; 5775 5776 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 5777 BUG_ON(!new_extent); 5778 5779 ref = btrfs_lookup_leaf_ref(root, leaf->start); 5780 BUG_ON(!ref); 5781 5782 ext_index = -1; 5783 nritems = btrfs_header_nritems(leaf); 5784 for (i = 0; i < nritems; i++) { 5785 btrfs_item_key_to_cpu(leaf, &key, i); 5786 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 5787 continue; 5788 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 5789 if (btrfs_file_extent_type(leaf, fi) == 5790 BTRFS_FILE_EXTENT_INLINE) 5791 continue; 5792 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 5793 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 5794 if (bytenr == 0) 5795 continue; 5796 5797 ext_index++; 5798 if (bytenr >= group->key.objectid + group->key.offset || 5799 bytenr + num_bytes <= group->key.objectid) 5800 continue; 5801 5802 extent_key.objectid = bytenr; 5803 extent_key.offset = num_bytes; 5804 extent_key.type = BTRFS_EXTENT_ITEM_KEY; 5805 nr_extent = 1; 5806 ret = get_new_locations(reloc_inode, &extent_key, 5807 group->key.objectid, 1, 5808 &new_extent, &nr_extent); 5809 if (ret > 0) 5810 continue; 5811 BUG_ON(ret < 0); 5812 5813 BUG_ON(ref->extents[ext_index].bytenr != bytenr); 5814 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); 5815 ref->extents[ext_index].bytenr = new_extent->disk_bytenr; 5816 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; 5817 5818 btrfs_set_file_extent_disk_bytenr(leaf, fi, 5819 new_extent->disk_bytenr); 5820 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 5821 new_extent->disk_num_bytes); 5822 btrfs_mark_buffer_dirty(leaf); 5823 5824 ret = btrfs_inc_extent_ref(trans, root, 5825 new_extent->disk_bytenr, 5826 new_extent->disk_num_bytes, 5827 leaf->start, 5828 root->root_key.objectid, 5829 trans->transid, key.objectid); 5830 BUG_ON(ret); 5831 5832 ret = btrfs_free_extent(trans, root, 5833 bytenr, num_bytes, leaf->start, 5834 btrfs_header_owner(leaf), 5835 btrfs_header_generation(leaf), 5836 key.objectid, 0); 5837 BUG_ON(ret); 5838 cond_resched(); 5839 } 5840 kfree(new_extent); 5841 BUG_ON(ext_index + 1 != ref->nritems); 5842 btrfs_free_leaf_ref(root, ref); 5843 return 0; 5844 } 5845 5846 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 5847 struct btrfs_root *root) 5848 { 5849 struct btrfs_root *reloc_root; 5850 int ret; 5851 5852 if (root->reloc_root) { 5853 reloc_root = root->reloc_root; 5854 root->reloc_root = NULL; 5855 list_add(&reloc_root->dead_list, 5856 &root->fs_info->dead_reloc_roots); 5857 5858 btrfs_set_root_bytenr(&reloc_root->root_item, 5859 reloc_root->node->start); 5860 btrfs_set_root_level(&root->root_item, 5861 btrfs_header_level(reloc_root->node)); 5862 memset(&reloc_root->root_item.drop_progress, 0, 5863 sizeof(struct btrfs_disk_key)); 5864 reloc_root->root_item.drop_level = 0; 5865 5866 ret = btrfs_update_root(trans, root->fs_info->tree_root, 5867 &reloc_root->root_key, 5868 &reloc_root->root_item); 5869 BUG_ON(ret); 5870 } 5871 return 0; 5872 } 5873 5874 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) 5875 { 5876 struct btrfs_trans_handle *trans; 5877 struct btrfs_root *reloc_root; 5878 struct btrfs_root *prev_root = NULL; 5879 struct list_head dead_roots; 5880 int ret; 5881 unsigned long nr; 5882 5883 INIT_LIST_HEAD(&dead_roots); 5884 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); 5885 5886 while (!list_empty(&dead_roots)) { 5887 reloc_root = list_entry(dead_roots.prev, 5888 struct btrfs_root, dead_list); 5889 list_del_init(&reloc_root->dead_list); 5890 5891 BUG_ON(reloc_root->commit_root != NULL); 5892 while (1) { 5893 trans = btrfs_join_transaction(root, 1); 5894 BUG_ON(!trans); 5895 5896 mutex_lock(&root->fs_info->drop_mutex); 5897 ret = btrfs_drop_snapshot(trans, reloc_root); 5898 if (ret != -EAGAIN) 5899 break; 5900 mutex_unlock(&root->fs_info->drop_mutex); 5901 5902 nr = trans->blocks_used; 5903 ret = btrfs_end_transaction(trans, root); 5904 BUG_ON(ret); 5905 btrfs_btree_balance_dirty(root, nr); 5906 } 5907 5908 free_extent_buffer(reloc_root->node); 5909 5910 ret = btrfs_del_root(trans, root->fs_info->tree_root, 5911 &reloc_root->root_key); 5912 BUG_ON(ret); 5913 mutex_unlock(&root->fs_info->drop_mutex); 5914 5915 nr = trans->blocks_used; 5916 ret = btrfs_end_transaction(trans, root); 5917 BUG_ON(ret); 5918 btrfs_btree_balance_dirty(root, nr); 5919 5920 kfree(prev_root); 5921 prev_root = reloc_root; 5922 } 5923 if (prev_root) { 5924 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); 5925 kfree(prev_root); 5926 } 5927 return 0; 5928 } 5929 5930 int btrfs_add_dead_reloc_root(struct btrfs_root *root) 5931 { 5932 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); 5933 return 0; 5934 } 5935 5936 int btrfs_cleanup_reloc_trees(struct btrfs_root *root) 5937 { 5938 struct btrfs_root *reloc_root; 5939 struct btrfs_trans_handle *trans; 5940 struct btrfs_key location; 5941 int found; 5942 int ret; 5943 5944 mutex_lock(&root->fs_info->tree_reloc_mutex); 5945 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); 5946 BUG_ON(ret); 5947 found = !list_empty(&root->fs_info->dead_reloc_roots); 5948 mutex_unlock(&root->fs_info->tree_reloc_mutex); 5949 5950 if (found) { 5951 trans = btrfs_start_transaction(root, 1); 5952 BUG_ON(!trans); 5953 ret = btrfs_commit_transaction(trans, root); 5954 BUG_ON(ret); 5955 } 5956 5957 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 5958 location.offset = (u64)-1; 5959 location.type = BTRFS_ROOT_ITEM_KEY; 5960 5961 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 5962 BUG_ON(!reloc_root); 5963 btrfs_orphan_cleanup(reloc_root); 5964 return 0; 5965 } 5966 5967 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, 5968 struct btrfs_root *root) 5969 { 5970 struct btrfs_root *reloc_root; 5971 struct extent_buffer *eb; 5972 struct btrfs_root_item *root_item; 5973 struct btrfs_key root_key; 5974 int ret; 5975 5976 BUG_ON(!root->ref_cows); 5977 if (root->reloc_root) 5978 return 0; 5979 5980 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 5981 BUG_ON(!root_item); 5982 5983 ret = btrfs_copy_root(trans, root, root->commit_root, 5984 &eb, BTRFS_TREE_RELOC_OBJECTID); 5985 BUG_ON(ret); 5986 5987 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 5988 root_key.offset = root->root_key.objectid; 5989 root_key.type = BTRFS_ROOT_ITEM_KEY; 5990 5991 memcpy(root_item, &root->root_item, sizeof(root_item)); 5992 btrfs_set_root_refs(root_item, 0); 5993 btrfs_set_root_bytenr(root_item, eb->start); 5994 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 5995 btrfs_set_root_generation(root_item, trans->transid); 5996 5997 btrfs_tree_unlock(eb); 5998 free_extent_buffer(eb); 5999 6000 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 6001 &root_key, root_item); 6002 BUG_ON(ret); 6003 kfree(root_item); 6004 6005 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 6006 &root_key); 6007 BUG_ON(!reloc_root); 6008 reloc_root->last_trans = trans->transid; 6009 reloc_root->commit_root = NULL; 6010 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 6011 6012 root->reloc_root = reloc_root; 6013 return 0; 6014 } 6015 6016 /* 6017 * Core function of space balance. 6018 * 6019 * The idea is using reloc trees to relocate tree blocks in reference 6020 * counted roots. There is one reloc tree for each subvol, and all 6021 * reloc trees share same root key objectid. Reloc trees are snapshots 6022 * of the latest committed roots of subvols (root->commit_root). 6023 * 6024 * To relocate a tree block referenced by a subvol, there are two steps. 6025 * COW the block through subvol's reloc tree, then update block pointer 6026 * in the subvol to point to the new block. Since all reloc trees share 6027 * same root key objectid, doing special handing for tree blocks owned 6028 * by them is easy. Once a tree block has been COWed in one reloc tree, 6029 * we can use the resulting new block directly when the same block is 6030 * required to COW again through other reloc trees. By this way, relocated 6031 * tree blocks are shared between reloc trees, so they are also shared 6032 * between subvols. 6033 */ 6034 static noinline int relocate_one_path(struct btrfs_trans_handle *trans, 6035 struct btrfs_root *root, 6036 struct btrfs_path *path, 6037 struct btrfs_key *first_key, 6038 struct btrfs_ref_path *ref_path, 6039 struct btrfs_block_group_cache *group, 6040 struct inode *reloc_inode) 6041 { 6042 struct btrfs_root *reloc_root; 6043 struct extent_buffer *eb = NULL; 6044 struct btrfs_key *keys; 6045 u64 *nodes; 6046 int level; 6047 int shared_level; 6048 int lowest_level = 0; 6049 int ret; 6050 6051 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 6052 lowest_level = ref_path->owner_objectid; 6053 6054 if (!root->ref_cows) { 6055 path->lowest_level = lowest_level; 6056 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); 6057 BUG_ON(ret < 0); 6058 path->lowest_level = 0; 6059 btrfs_release_path(root, path); 6060 return 0; 6061 } 6062 6063 mutex_lock(&root->fs_info->tree_reloc_mutex); 6064 ret = init_reloc_tree(trans, root); 6065 BUG_ON(ret); 6066 reloc_root = root->reloc_root; 6067 6068 shared_level = ref_path->shared_level; 6069 ref_path->shared_level = BTRFS_MAX_LEVEL - 1; 6070 6071 keys = ref_path->node_keys; 6072 nodes = ref_path->new_nodes; 6073 memset(&keys[shared_level + 1], 0, 6074 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6075 memset(&nodes[shared_level + 1], 0, 6076 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); 6077 6078 if (nodes[lowest_level] == 0) { 6079 path->lowest_level = lowest_level; 6080 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6081 0, 1); 6082 BUG_ON(ret); 6083 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { 6084 eb = path->nodes[level]; 6085 if (!eb || eb == reloc_root->node) 6086 break; 6087 nodes[level] = eb->start; 6088 if (level == 0) 6089 btrfs_item_key_to_cpu(eb, &keys[level], 0); 6090 else 6091 btrfs_node_key_to_cpu(eb, &keys[level], 0); 6092 } 6093 if (nodes[0] && 6094 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6095 eb = path->nodes[0]; 6096 ret = replace_extents_in_leaf(trans, reloc_root, eb, 6097 group, reloc_inode); 6098 BUG_ON(ret); 6099 } 6100 btrfs_release_path(reloc_root, path); 6101 } else { 6102 ret = btrfs_merge_path(trans, reloc_root, keys, nodes, 6103 lowest_level); 6104 BUG_ON(ret); 6105 } 6106 6107 /* 6108 * replace tree blocks in the fs tree with tree blocks in 6109 * the reloc tree. 6110 */ 6111 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); 6112 BUG_ON(ret < 0); 6113 6114 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6115 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 6116 0, 0); 6117 BUG_ON(ret); 6118 extent_buffer_get(path->nodes[0]); 6119 eb = path->nodes[0]; 6120 btrfs_release_path(reloc_root, path); 6121 ret = invalidate_extent_cache(reloc_root, eb, group, root); 6122 BUG_ON(ret); 6123 free_extent_buffer(eb); 6124 } 6125 6126 mutex_unlock(&root->fs_info->tree_reloc_mutex); 6127 path->lowest_level = 0; 6128 return 0; 6129 } 6130 6131 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, 6132 struct btrfs_root *root, 6133 struct btrfs_path *path, 6134 struct btrfs_key *first_key, 6135 struct btrfs_ref_path *ref_path) 6136 { 6137 int ret; 6138 6139 ret = relocate_one_path(trans, root, path, first_key, 6140 ref_path, NULL, NULL); 6141 BUG_ON(ret); 6142 6143 return 0; 6144 } 6145 6146 static noinline int del_extent_zero(struct btrfs_trans_handle *trans, 6147 struct btrfs_root *extent_root, 6148 struct btrfs_path *path, 6149 struct btrfs_key *extent_key) 6150 { 6151 int ret; 6152 6153 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 6154 if (ret) 6155 goto out; 6156 ret = btrfs_del_item(trans, extent_root, path); 6157 out: 6158 btrfs_release_path(extent_root, path); 6159 return ret; 6160 } 6161 6162 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, 6163 struct btrfs_ref_path *ref_path) 6164 { 6165 struct btrfs_key root_key; 6166 6167 root_key.objectid = ref_path->root_objectid; 6168 root_key.type = BTRFS_ROOT_ITEM_KEY; 6169 if (is_cowonly_root(ref_path->root_objectid)) 6170 root_key.offset = 0; 6171 else 6172 root_key.offset = (u64)-1; 6173 6174 return btrfs_read_fs_root_no_name(fs_info, &root_key); 6175 } 6176 6177 static noinline int relocate_one_extent(struct btrfs_root *extent_root, 6178 struct btrfs_path *path, 6179 struct btrfs_key *extent_key, 6180 struct btrfs_block_group_cache *group, 6181 struct inode *reloc_inode, int pass) 6182 { 6183 struct btrfs_trans_handle *trans; 6184 struct btrfs_root *found_root; 6185 struct btrfs_ref_path *ref_path = NULL; 6186 struct disk_extent *new_extents = NULL; 6187 int nr_extents = 0; 6188 int loops; 6189 int ret; 6190 int level; 6191 struct btrfs_key first_key; 6192 u64 prev_block = 0; 6193 6194 6195 trans = btrfs_start_transaction(extent_root, 1); 6196 BUG_ON(!trans); 6197 6198 if (extent_key->objectid == 0) { 6199 ret = del_extent_zero(trans, extent_root, path, extent_key); 6200 goto out; 6201 } 6202 6203 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); 6204 if (!ref_path) { 6205 ret = -ENOMEM; 6206 goto out; 6207 } 6208 6209 for (loops = 0; ; loops++) { 6210 if (loops == 0) { 6211 ret = btrfs_first_ref_path(trans, extent_root, ref_path, 6212 extent_key->objectid); 6213 } else { 6214 ret = btrfs_next_ref_path(trans, extent_root, ref_path); 6215 } 6216 if (ret < 0) 6217 goto out; 6218 if (ret > 0) 6219 break; 6220 6221 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || 6222 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) 6223 continue; 6224 6225 found_root = read_ref_root(extent_root->fs_info, ref_path); 6226 BUG_ON(!found_root); 6227 /* 6228 * for reference counted tree, only process reference paths 6229 * rooted at the latest committed root. 6230 */ 6231 if (found_root->ref_cows && 6232 ref_path->root_generation != found_root->root_key.offset) 6233 continue; 6234 6235 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6236 if (pass == 0) { 6237 /* 6238 * copy data extents to new locations 6239 */ 6240 u64 group_start = group->key.objectid; 6241 ret = relocate_data_extent(reloc_inode, 6242 extent_key, 6243 group_start); 6244 if (ret < 0) 6245 goto out; 6246 break; 6247 } 6248 level = 0; 6249 } else { 6250 level = ref_path->owner_objectid; 6251 } 6252 6253 if (prev_block != ref_path->nodes[level]) { 6254 struct extent_buffer *eb; 6255 u64 block_start = ref_path->nodes[level]; 6256 u64 block_size = btrfs_level_size(found_root, level); 6257 6258 eb = read_tree_block(found_root, block_start, 6259 block_size, 0); 6260 btrfs_tree_lock(eb); 6261 BUG_ON(level != btrfs_header_level(eb)); 6262 6263 if (level == 0) 6264 btrfs_item_key_to_cpu(eb, &first_key, 0); 6265 else 6266 btrfs_node_key_to_cpu(eb, &first_key, 0); 6267 6268 btrfs_tree_unlock(eb); 6269 free_extent_buffer(eb); 6270 prev_block = block_start; 6271 } 6272 6273 mutex_lock(&extent_root->fs_info->trans_mutex); 6274 btrfs_record_root_in_trans(found_root); 6275 mutex_unlock(&extent_root->fs_info->trans_mutex); 6276 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 6277 /* 6278 * try to update data extent references while 6279 * keeping metadata shared between snapshots. 6280 */ 6281 if (pass == 1) { 6282 ret = relocate_one_path(trans, found_root, 6283 path, &first_key, ref_path, 6284 group, reloc_inode); 6285 if (ret < 0) 6286 goto out; 6287 continue; 6288 } 6289 /* 6290 * use fallback method to process the remaining 6291 * references. 6292 */ 6293 if (!new_extents) { 6294 u64 group_start = group->key.objectid; 6295 new_extents = kmalloc(sizeof(*new_extents), 6296 GFP_NOFS); 6297 nr_extents = 1; 6298 ret = get_new_locations(reloc_inode, 6299 extent_key, 6300 group_start, 1, 6301 &new_extents, 6302 &nr_extents); 6303 if (ret) 6304 goto out; 6305 } 6306 ret = replace_one_extent(trans, found_root, 6307 path, extent_key, 6308 &first_key, ref_path, 6309 new_extents, nr_extents); 6310 } else { 6311 ret = relocate_tree_block(trans, found_root, path, 6312 &first_key, ref_path); 6313 } 6314 if (ret < 0) 6315 goto out; 6316 } 6317 ret = 0; 6318 out: 6319 btrfs_end_transaction(trans, extent_root); 6320 kfree(new_extents); 6321 kfree(ref_path); 6322 return ret; 6323 } 6324 #endif 6325 6326 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6327 { 6328 u64 num_devices; 6329 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 6330 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 6331 6332 num_devices = root->fs_info->fs_devices->rw_devices; 6333 if (num_devices == 1) { 6334 stripped |= BTRFS_BLOCK_GROUP_DUP; 6335 stripped = flags & ~stripped; 6336 6337 /* turn raid0 into single device chunks */ 6338 if (flags & BTRFS_BLOCK_GROUP_RAID0) 6339 return stripped; 6340 6341 /* turn mirroring into duplication */ 6342 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 6343 BTRFS_BLOCK_GROUP_RAID10)) 6344 return stripped | BTRFS_BLOCK_GROUP_DUP; 6345 return flags; 6346 } else { 6347 /* they already had raid on here, just return */ 6348 if (flags & stripped) 6349 return flags; 6350 6351 stripped |= BTRFS_BLOCK_GROUP_DUP; 6352 stripped = flags & ~stripped; 6353 6354 /* switch duplicated blocks with raid1 */ 6355 if (flags & BTRFS_BLOCK_GROUP_DUP) 6356 return stripped | BTRFS_BLOCK_GROUP_RAID1; 6357 6358 /* turn single device chunks into raid0 */ 6359 return stripped | BTRFS_BLOCK_GROUP_RAID0; 6360 } 6361 return flags; 6362 } 6363 6364 static int __alloc_chunk_for_shrink(struct btrfs_root *root, 6365 struct btrfs_block_group_cache *shrink_block_group, 6366 int force) 6367 { 6368 struct btrfs_trans_handle *trans; 6369 u64 new_alloc_flags; 6370 u64 calc; 6371 6372 spin_lock(&shrink_block_group->lock); 6373 if (btrfs_block_group_used(&shrink_block_group->item) + 6374 shrink_block_group->reserved > 0) { 6375 spin_unlock(&shrink_block_group->lock); 6376 6377 trans = btrfs_start_transaction(root, 1); 6378 spin_lock(&shrink_block_group->lock); 6379 6380 new_alloc_flags = update_block_group_flags(root, 6381 shrink_block_group->flags); 6382 if (new_alloc_flags != shrink_block_group->flags) { 6383 calc = 6384 btrfs_block_group_used(&shrink_block_group->item); 6385 } else { 6386 calc = shrink_block_group->key.offset; 6387 } 6388 spin_unlock(&shrink_block_group->lock); 6389 6390 do_chunk_alloc(trans, root->fs_info->extent_root, 6391 calc + 2 * 1024 * 1024, new_alloc_flags, force); 6392 6393 btrfs_end_transaction(trans, root); 6394 } else 6395 spin_unlock(&shrink_block_group->lock); 6396 return 0; 6397 } 6398 6399 6400 int btrfs_prepare_block_group_relocation(struct btrfs_root *root, 6401 struct btrfs_block_group_cache *group) 6402 6403 { 6404 __alloc_chunk_for_shrink(root, group, 1); 6405 set_block_group_readonly(group); 6406 return 0; 6407 } 6408 6409 #if 0 6410 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 6411 struct btrfs_root *root, 6412 u64 objectid, u64 size) 6413 { 6414 struct btrfs_path *path; 6415 struct btrfs_inode_item *item; 6416 struct extent_buffer *leaf; 6417 int ret; 6418 6419 path = btrfs_alloc_path(); 6420 if (!path) 6421 return -ENOMEM; 6422 6423 path->leave_spinning = 1; 6424 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 6425 if (ret) 6426 goto out; 6427 6428 leaf = path->nodes[0]; 6429 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 6430 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 6431 btrfs_set_inode_generation(leaf, item, 1); 6432 btrfs_set_inode_size(leaf, item, size); 6433 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 6434 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 6435 btrfs_mark_buffer_dirty(leaf); 6436 btrfs_release_path(root, path); 6437 out: 6438 btrfs_free_path(path); 6439 return ret; 6440 } 6441 6442 static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 6443 struct btrfs_block_group_cache *group) 6444 { 6445 struct inode *inode = NULL; 6446 struct btrfs_trans_handle *trans; 6447 struct btrfs_root *root; 6448 struct btrfs_key root_key; 6449 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 6450 int err = 0; 6451 6452 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6453 root_key.type = BTRFS_ROOT_ITEM_KEY; 6454 root_key.offset = (u64)-1; 6455 root = btrfs_read_fs_root_no_name(fs_info, &root_key); 6456 if (IS_ERR(root)) 6457 return ERR_CAST(root); 6458 6459 trans = btrfs_start_transaction(root, 1); 6460 BUG_ON(!trans); 6461 6462 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 6463 if (err) 6464 goto out; 6465 6466 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 6467 BUG_ON(err); 6468 6469 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, 6470 group->key.offset, 0, group->key.offset, 6471 0, 0, 0); 6472 BUG_ON(err); 6473 6474 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); 6475 if (inode->i_state & I_NEW) { 6476 BTRFS_I(inode)->root = root; 6477 BTRFS_I(inode)->location.objectid = objectid; 6478 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 6479 BTRFS_I(inode)->location.offset = 0; 6480 btrfs_read_locked_inode(inode); 6481 unlock_new_inode(inode); 6482 BUG_ON(is_bad_inode(inode)); 6483 } else { 6484 BUG_ON(1); 6485 } 6486 BTRFS_I(inode)->index_cnt = group->key.objectid; 6487 6488 err = btrfs_orphan_add(trans, inode); 6489 out: 6490 btrfs_end_transaction(trans, root); 6491 if (err) { 6492 if (inode) 6493 iput(inode); 6494 inode = ERR_PTR(err); 6495 } 6496 return inode; 6497 } 6498 6499 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 6500 { 6501 6502 struct btrfs_ordered_sum *sums; 6503 struct btrfs_sector_sum *sector_sum; 6504 struct btrfs_ordered_extent *ordered; 6505 struct btrfs_root *root = BTRFS_I(inode)->root; 6506 struct list_head list; 6507 size_t offset; 6508 int ret; 6509 u64 disk_bytenr; 6510 6511 INIT_LIST_HEAD(&list); 6512 6513 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 6514 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 6515 6516 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 6517 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 6518 disk_bytenr + len - 1, &list); 6519 6520 while (!list_empty(&list)) { 6521 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 6522 list_del_init(&sums->list); 6523 6524 sector_sum = sums->sums; 6525 sums->bytenr = ordered->start; 6526 6527 offset = 0; 6528 while (offset < sums->len) { 6529 sector_sum->bytenr += ordered->start - disk_bytenr; 6530 sector_sum++; 6531 offset += root->sectorsize; 6532 } 6533 6534 btrfs_add_ordered_sum(inode, ordered, sums); 6535 } 6536 btrfs_put_ordered_extent(ordered); 6537 return 0; 6538 } 6539 6540 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) 6541 { 6542 struct btrfs_trans_handle *trans; 6543 struct btrfs_path *path; 6544 struct btrfs_fs_info *info = root->fs_info; 6545 struct extent_buffer *leaf; 6546 struct inode *reloc_inode; 6547 struct btrfs_block_group_cache *block_group; 6548 struct btrfs_key key; 6549 u64 skipped; 6550 u64 cur_byte; 6551 u64 total_found; 6552 u32 nritems; 6553 int ret; 6554 int progress; 6555 int pass = 0; 6556 6557 root = root->fs_info->extent_root; 6558 6559 block_group = btrfs_lookup_block_group(info, group_start); 6560 BUG_ON(!block_group); 6561 6562 printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", 6563 (unsigned long long)block_group->key.objectid, 6564 (unsigned long long)block_group->flags); 6565 6566 path = btrfs_alloc_path(); 6567 BUG_ON(!path); 6568 6569 reloc_inode = create_reloc_inode(info, block_group); 6570 BUG_ON(IS_ERR(reloc_inode)); 6571 6572 __alloc_chunk_for_shrink(root, block_group, 1); 6573 set_block_group_readonly(block_group); 6574 6575 btrfs_start_delalloc_inodes(info->tree_root); 6576 btrfs_wait_ordered_extents(info->tree_root, 0); 6577 again: 6578 skipped = 0; 6579 total_found = 0; 6580 progress = 0; 6581 key.objectid = block_group->key.objectid; 6582 key.offset = 0; 6583 key.type = 0; 6584 cur_byte = key.objectid; 6585 6586 trans = btrfs_start_transaction(info->tree_root, 1); 6587 btrfs_commit_transaction(trans, info->tree_root); 6588 6589 mutex_lock(&root->fs_info->cleaner_mutex); 6590 btrfs_clean_old_snapshots(info->tree_root); 6591 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 6592 mutex_unlock(&root->fs_info->cleaner_mutex); 6593 6594 trans = btrfs_start_transaction(info->tree_root, 1); 6595 btrfs_commit_transaction(trans, info->tree_root); 6596 6597 while (1) { 6598 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6599 if (ret < 0) 6600 goto out; 6601 next: 6602 leaf = path->nodes[0]; 6603 nritems = btrfs_header_nritems(leaf); 6604 if (path->slots[0] >= nritems) { 6605 ret = btrfs_next_leaf(root, path); 6606 if (ret < 0) 6607 goto out; 6608 if (ret == 1) { 6609 ret = 0; 6610 break; 6611 } 6612 leaf = path->nodes[0]; 6613 nritems = btrfs_header_nritems(leaf); 6614 } 6615 6616 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 6617 6618 if (key.objectid >= block_group->key.objectid + 6619 block_group->key.offset) 6620 break; 6621 6622 if (progress && need_resched()) { 6623 btrfs_release_path(root, path); 6624 cond_resched(); 6625 progress = 0; 6626 continue; 6627 } 6628 progress = 1; 6629 6630 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || 6631 key.objectid + key.offset <= cur_byte) { 6632 path->slots[0]++; 6633 goto next; 6634 } 6635 6636 total_found++; 6637 cur_byte = key.objectid + key.offset; 6638 btrfs_release_path(root, path); 6639 6640 __alloc_chunk_for_shrink(root, block_group, 0); 6641 ret = relocate_one_extent(root, path, &key, block_group, 6642 reloc_inode, pass); 6643 BUG_ON(ret < 0); 6644 if (ret > 0) 6645 skipped++; 6646 6647 key.objectid = cur_byte; 6648 key.type = 0; 6649 key.offset = 0; 6650 } 6651 6652 btrfs_release_path(root, path); 6653 6654 if (pass == 0) { 6655 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); 6656 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); 6657 } 6658 6659 if (total_found > 0) { 6660 printk(KERN_INFO "btrfs found %llu extents in pass %d\n", 6661 (unsigned long long)total_found, pass); 6662 pass++; 6663 if (total_found == skipped && pass > 2) { 6664 iput(reloc_inode); 6665 reloc_inode = create_reloc_inode(info, block_group); 6666 pass = 0; 6667 } 6668 goto again; 6669 } 6670 6671 /* delete reloc_inode */ 6672 iput(reloc_inode); 6673 6674 /* unpin extents in this range */ 6675 trans = btrfs_start_transaction(info->tree_root, 1); 6676 btrfs_commit_transaction(trans, info->tree_root); 6677 6678 spin_lock(&block_group->lock); 6679 WARN_ON(block_group->pinned > 0); 6680 WARN_ON(block_group->reserved > 0); 6681 WARN_ON(btrfs_block_group_used(&block_group->item) > 0); 6682 spin_unlock(&block_group->lock); 6683 btrfs_put_block_group(block_group); 6684 ret = 0; 6685 out: 6686 btrfs_free_path(path); 6687 return ret; 6688 } 6689 #endif 6690 6691 static int find_first_block_group(struct btrfs_root *root, 6692 struct btrfs_path *path, struct btrfs_key *key) 6693 { 6694 int ret = 0; 6695 struct btrfs_key found_key; 6696 struct extent_buffer *leaf; 6697 int slot; 6698 6699 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 6700 if (ret < 0) 6701 goto out; 6702 6703 while (1) { 6704 slot = path->slots[0]; 6705 leaf = path->nodes[0]; 6706 if (slot >= btrfs_header_nritems(leaf)) { 6707 ret = btrfs_next_leaf(root, path); 6708 if (ret == 0) 6709 continue; 6710 if (ret < 0) 6711 goto out; 6712 break; 6713 } 6714 btrfs_item_key_to_cpu(leaf, &found_key, slot); 6715 6716 if (found_key.objectid >= key->objectid && 6717 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 6718 ret = 0; 6719 goto out; 6720 } 6721 path->slots[0]++; 6722 } 6723 ret = -ENOENT; 6724 out: 6725 return ret; 6726 } 6727 6728 int btrfs_free_block_groups(struct btrfs_fs_info *info) 6729 { 6730 struct btrfs_block_group_cache *block_group; 6731 struct btrfs_space_info *space_info; 6732 struct rb_node *n; 6733 6734 spin_lock(&info->block_group_cache_lock); 6735 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 6736 block_group = rb_entry(n, struct btrfs_block_group_cache, 6737 cache_node); 6738 rb_erase(&block_group->cache_node, 6739 &info->block_group_cache_tree); 6740 spin_unlock(&info->block_group_cache_lock); 6741 6742 btrfs_remove_free_space_cache(block_group); 6743 down_write(&block_group->space_info->groups_sem); 6744 list_del(&block_group->list); 6745 up_write(&block_group->space_info->groups_sem); 6746 6747 WARN_ON(atomic_read(&block_group->count) != 1); 6748 kfree(block_group); 6749 6750 spin_lock(&info->block_group_cache_lock); 6751 } 6752 spin_unlock(&info->block_group_cache_lock); 6753 6754 /* now that all the block groups are freed, go through and 6755 * free all the space_info structs. This is only called during 6756 * the final stages of unmount, and so we know nobody is 6757 * using them. We call synchronize_rcu() once before we start, 6758 * just to be on the safe side. 6759 */ 6760 synchronize_rcu(); 6761 6762 while(!list_empty(&info->space_info)) { 6763 space_info = list_entry(info->space_info.next, 6764 struct btrfs_space_info, 6765 list); 6766 6767 list_del(&space_info->list); 6768 kfree(space_info); 6769 } 6770 return 0; 6771 } 6772 6773 int btrfs_read_block_groups(struct btrfs_root *root) 6774 { 6775 struct btrfs_path *path; 6776 int ret; 6777 struct btrfs_block_group_cache *cache; 6778 struct btrfs_fs_info *info = root->fs_info; 6779 struct btrfs_space_info *space_info; 6780 struct btrfs_key key; 6781 struct btrfs_key found_key; 6782 struct extent_buffer *leaf; 6783 6784 root = info->extent_root; 6785 key.objectid = 0; 6786 key.offset = 0; 6787 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 6788 path = btrfs_alloc_path(); 6789 if (!path) 6790 return -ENOMEM; 6791 6792 while (1) { 6793 ret = find_first_block_group(root, path, &key); 6794 if (ret > 0) { 6795 ret = 0; 6796 goto error; 6797 } 6798 if (ret != 0) 6799 goto error; 6800 6801 leaf = path->nodes[0]; 6802 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 6803 cache = kzalloc(sizeof(*cache), GFP_NOFS); 6804 if (!cache) { 6805 ret = -ENOMEM; 6806 break; 6807 } 6808 6809 atomic_set(&cache->count, 1); 6810 spin_lock_init(&cache->lock); 6811 spin_lock_init(&cache->tree_lock); 6812 mutex_init(&cache->cache_mutex); 6813 INIT_LIST_HEAD(&cache->list); 6814 INIT_LIST_HEAD(&cache->cluster_list); 6815 read_extent_buffer(leaf, &cache->item, 6816 btrfs_item_ptr_offset(leaf, path->slots[0]), 6817 sizeof(cache->item)); 6818 memcpy(&cache->key, &found_key, sizeof(found_key)); 6819 6820 key.objectid = found_key.objectid + found_key.offset; 6821 btrfs_release_path(root, path); 6822 cache->flags = btrfs_block_group_flags(&cache->item); 6823 6824 ret = update_space_info(info, cache->flags, found_key.offset, 6825 btrfs_block_group_used(&cache->item), 6826 &space_info); 6827 BUG_ON(ret); 6828 cache->space_info = space_info; 6829 down_write(&space_info->groups_sem); 6830 list_add_tail(&cache->list, &space_info->block_groups); 6831 up_write(&space_info->groups_sem); 6832 6833 ret = btrfs_add_block_group_cache(root->fs_info, cache); 6834 BUG_ON(ret); 6835 6836 set_avail_alloc_bits(root->fs_info, cache->flags); 6837 if (btrfs_chunk_readonly(root, cache->key.objectid)) 6838 set_block_group_readonly(cache); 6839 } 6840 ret = 0; 6841 error: 6842 btrfs_free_path(path); 6843 return ret; 6844 } 6845 6846 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 6847 struct btrfs_root *root, u64 bytes_used, 6848 u64 type, u64 chunk_objectid, u64 chunk_offset, 6849 u64 size) 6850 { 6851 int ret; 6852 struct btrfs_root *extent_root; 6853 struct btrfs_block_group_cache *cache; 6854 6855 extent_root = root->fs_info->extent_root; 6856 6857 root->fs_info->last_trans_log_full_commit = trans->transid; 6858 6859 cache = kzalloc(sizeof(*cache), GFP_NOFS); 6860 if (!cache) 6861 return -ENOMEM; 6862 6863 cache->key.objectid = chunk_offset; 6864 cache->key.offset = size; 6865 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 6866 atomic_set(&cache->count, 1); 6867 spin_lock_init(&cache->lock); 6868 spin_lock_init(&cache->tree_lock); 6869 mutex_init(&cache->cache_mutex); 6870 INIT_LIST_HEAD(&cache->list); 6871 INIT_LIST_HEAD(&cache->cluster_list); 6872 6873 btrfs_set_block_group_used(&cache->item, bytes_used); 6874 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 6875 cache->flags = type; 6876 btrfs_set_block_group_flags(&cache->item, type); 6877 6878 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 6879 &cache->space_info); 6880 BUG_ON(ret); 6881 down_write(&cache->space_info->groups_sem); 6882 list_add_tail(&cache->list, &cache->space_info->block_groups); 6883 up_write(&cache->space_info->groups_sem); 6884 6885 ret = btrfs_add_block_group_cache(root->fs_info, cache); 6886 BUG_ON(ret); 6887 6888 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 6889 sizeof(cache->item)); 6890 BUG_ON(ret); 6891 6892 set_avail_alloc_bits(extent_root->fs_info, type); 6893 6894 return 0; 6895 } 6896 6897 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 6898 struct btrfs_root *root, u64 group_start) 6899 { 6900 struct btrfs_path *path; 6901 struct btrfs_block_group_cache *block_group; 6902 struct btrfs_free_cluster *cluster; 6903 struct btrfs_key key; 6904 int ret; 6905 6906 root = root->fs_info->extent_root; 6907 6908 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 6909 BUG_ON(!block_group); 6910 BUG_ON(!block_group->ro); 6911 6912 memcpy(&key, &block_group->key, sizeof(key)); 6913 6914 /* make sure this block group isn't part of an allocation cluster */ 6915 cluster = &root->fs_info->data_alloc_cluster; 6916 spin_lock(&cluster->refill_lock); 6917 btrfs_return_cluster_to_free_space(block_group, cluster); 6918 spin_unlock(&cluster->refill_lock); 6919 6920 /* 6921 * make sure this block group isn't part of a metadata 6922 * allocation cluster 6923 */ 6924 cluster = &root->fs_info->meta_alloc_cluster; 6925 spin_lock(&cluster->refill_lock); 6926 btrfs_return_cluster_to_free_space(block_group, cluster); 6927 spin_unlock(&cluster->refill_lock); 6928 6929 path = btrfs_alloc_path(); 6930 BUG_ON(!path); 6931 6932 spin_lock(&root->fs_info->block_group_cache_lock); 6933 rb_erase(&block_group->cache_node, 6934 &root->fs_info->block_group_cache_tree); 6935 spin_unlock(&root->fs_info->block_group_cache_lock); 6936 btrfs_remove_free_space_cache(block_group); 6937 down_write(&block_group->space_info->groups_sem); 6938 /* 6939 * we must use list_del_init so people can check to see if they 6940 * are still on the list after taking the semaphore 6941 */ 6942 list_del_init(&block_group->list); 6943 up_write(&block_group->space_info->groups_sem); 6944 6945 spin_lock(&block_group->space_info->lock); 6946 block_group->space_info->total_bytes -= block_group->key.offset; 6947 block_group->space_info->bytes_readonly -= block_group->key.offset; 6948 spin_unlock(&block_group->space_info->lock); 6949 block_group->space_info->full = 0; 6950 6951 btrfs_put_block_group(block_group); 6952 btrfs_put_block_group(block_group); 6953 6954 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 6955 if (ret > 0) 6956 ret = -EIO; 6957 if (ret < 0) 6958 goto out; 6959 6960 ret = btrfs_del_item(trans, root, path); 6961 out: 6962 btrfs_free_path(path); 6963 return ret; 6964 } 6965