1 /* 2 * Copyright (C) 2009 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include <linux/slab.h> 25 #include "ctree.h" 26 #include "disk-io.h" 27 #include "transaction.h" 28 #include "volumes.h" 29 #include "locking.h" 30 #include "btrfs_inode.h" 31 #include "async-thread.h" 32 #include "free-space-cache.h" 33 34 /* 35 * backref_node, mapping_node and tree_block start with this 36 */ 37 struct tree_entry { 38 struct rb_node rb_node; 39 u64 bytenr; 40 }; 41 42 /* 43 * present a tree block in the backref cache 44 */ 45 struct backref_node { 46 struct rb_node rb_node; 47 u64 bytenr; 48 49 u64 new_bytenr; 50 /* objectid of tree block owner, can be not uptodate */ 51 u64 owner; 52 /* link to pending, changed or detached list */ 53 struct list_head list; 54 /* list of upper level blocks reference this block */ 55 struct list_head upper; 56 /* list of child blocks in the cache */ 57 struct list_head lower; 58 /* NULL if this node is not tree root */ 59 struct btrfs_root *root; 60 /* extent buffer got by COW the block */ 61 struct extent_buffer *eb; 62 /* level of tree block */ 63 unsigned int level:8; 64 /* is the block in non-reference counted tree */ 65 unsigned int cowonly:1; 66 /* 1 if no child node in the cache */ 67 unsigned int lowest:1; 68 /* is the extent buffer locked */ 69 unsigned int locked:1; 70 /* has the block been processed */ 71 unsigned int processed:1; 72 /* have backrefs of this block been checked */ 73 unsigned int checked:1; 74 /* 75 * 1 if corresponding block has been cowed but some upper 76 * level block pointers may not point to the new location 77 */ 78 unsigned int pending:1; 79 /* 80 * 1 if the backref node isn't connected to any other 81 * backref node. 82 */ 83 unsigned int detached:1; 84 }; 85 86 /* 87 * present a block pointer in the backref cache 88 */ 89 struct backref_edge { 90 struct list_head list[2]; 91 struct backref_node *node[2]; 92 }; 93 94 #define LOWER 0 95 #define UPPER 1 96 97 struct backref_cache { 98 /* red black tree of all backref nodes in the cache */ 99 struct rb_root rb_root; 100 /* for passing backref nodes to btrfs_reloc_cow_block */ 101 struct backref_node *path[BTRFS_MAX_LEVEL]; 102 /* 103 * list of blocks that have been cowed but some block 104 * pointers in upper level blocks may not reflect the 105 * new location 106 */ 107 struct list_head pending[BTRFS_MAX_LEVEL]; 108 /* list of backref nodes with no child node */ 109 struct list_head leaves; 110 /* list of blocks that have been cowed in current transaction */ 111 struct list_head changed; 112 /* list of detached backref node. */ 113 struct list_head detached; 114 115 u64 last_trans; 116 117 int nr_nodes; 118 int nr_edges; 119 }; 120 121 /* 122 * map address of tree root to tree 123 */ 124 struct mapping_node { 125 struct rb_node rb_node; 126 u64 bytenr; 127 void *data; 128 }; 129 130 struct mapping_tree { 131 struct rb_root rb_root; 132 spinlock_t lock; 133 }; 134 135 /* 136 * present a tree block to process 137 */ 138 struct tree_block { 139 struct rb_node rb_node; 140 u64 bytenr; 141 struct btrfs_key key; 142 unsigned int level:8; 143 unsigned int key_ready:1; 144 }; 145 146 #define MAX_EXTENTS 128 147 148 struct file_extent_cluster { 149 u64 start; 150 u64 end; 151 u64 boundary[MAX_EXTENTS]; 152 unsigned int nr; 153 }; 154 155 struct reloc_control { 156 /* block group to relocate */ 157 struct btrfs_block_group_cache *block_group; 158 /* extent tree */ 159 struct btrfs_root *extent_root; 160 /* inode for moving data */ 161 struct inode *data_inode; 162 163 struct btrfs_block_rsv *block_rsv; 164 165 struct backref_cache backref_cache; 166 167 struct file_extent_cluster cluster; 168 /* tree blocks have been processed */ 169 struct extent_io_tree processed_blocks; 170 /* map start of tree root to corresponding reloc tree */ 171 struct mapping_tree reloc_root_tree; 172 /* list of reloc trees */ 173 struct list_head reloc_roots; 174 /* size of metadata reservation for merging reloc trees */ 175 u64 merging_rsv_size; 176 /* size of relocated tree nodes */ 177 u64 nodes_relocated; 178 179 u64 search_start; 180 u64 extents_found; 181 182 unsigned int stage:8; 183 unsigned int create_reloc_tree:1; 184 unsigned int merge_reloc_tree:1; 185 unsigned int found_file_extent:1; 186 unsigned int commit_transaction:1; 187 }; 188 189 /* stages of data relocation */ 190 #define MOVE_DATA_EXTENTS 0 191 #define UPDATE_DATA_PTRS 1 192 193 static void remove_backref_node(struct backref_cache *cache, 194 struct backref_node *node); 195 static void __mark_block_processed(struct reloc_control *rc, 196 struct backref_node *node); 197 198 static void mapping_tree_init(struct mapping_tree *tree) 199 { 200 tree->rb_root = RB_ROOT; 201 spin_lock_init(&tree->lock); 202 } 203 204 static void backref_cache_init(struct backref_cache *cache) 205 { 206 int i; 207 cache->rb_root = RB_ROOT; 208 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 209 INIT_LIST_HEAD(&cache->pending[i]); 210 INIT_LIST_HEAD(&cache->changed); 211 INIT_LIST_HEAD(&cache->detached); 212 INIT_LIST_HEAD(&cache->leaves); 213 } 214 215 static void backref_cache_cleanup(struct backref_cache *cache) 216 { 217 struct backref_node *node; 218 int i; 219 220 while (!list_empty(&cache->detached)) { 221 node = list_entry(cache->detached.next, 222 struct backref_node, list); 223 remove_backref_node(cache, node); 224 } 225 226 while (!list_empty(&cache->leaves)) { 227 node = list_entry(cache->leaves.next, 228 struct backref_node, lower); 229 remove_backref_node(cache, node); 230 } 231 232 cache->last_trans = 0; 233 234 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 235 BUG_ON(!list_empty(&cache->pending[i])); 236 BUG_ON(!list_empty(&cache->changed)); 237 BUG_ON(!list_empty(&cache->detached)); 238 BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); 239 BUG_ON(cache->nr_nodes); 240 BUG_ON(cache->nr_edges); 241 } 242 243 static struct backref_node *alloc_backref_node(struct backref_cache *cache) 244 { 245 struct backref_node *node; 246 247 node = kzalloc(sizeof(*node), GFP_NOFS); 248 if (node) { 249 INIT_LIST_HEAD(&node->list); 250 INIT_LIST_HEAD(&node->upper); 251 INIT_LIST_HEAD(&node->lower); 252 RB_CLEAR_NODE(&node->rb_node); 253 cache->nr_nodes++; 254 } 255 return node; 256 } 257 258 static void free_backref_node(struct backref_cache *cache, 259 struct backref_node *node) 260 { 261 if (node) { 262 cache->nr_nodes--; 263 kfree(node); 264 } 265 } 266 267 static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) 268 { 269 struct backref_edge *edge; 270 271 edge = kzalloc(sizeof(*edge), GFP_NOFS); 272 if (edge) 273 cache->nr_edges++; 274 return edge; 275 } 276 277 static void free_backref_edge(struct backref_cache *cache, 278 struct backref_edge *edge) 279 { 280 if (edge) { 281 cache->nr_edges--; 282 kfree(edge); 283 } 284 } 285 286 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, 287 struct rb_node *node) 288 { 289 struct rb_node **p = &root->rb_node; 290 struct rb_node *parent = NULL; 291 struct tree_entry *entry; 292 293 while (*p) { 294 parent = *p; 295 entry = rb_entry(parent, struct tree_entry, rb_node); 296 297 if (bytenr < entry->bytenr) 298 p = &(*p)->rb_left; 299 else if (bytenr > entry->bytenr) 300 p = &(*p)->rb_right; 301 else 302 return parent; 303 } 304 305 rb_link_node(node, parent, p); 306 rb_insert_color(node, root); 307 return NULL; 308 } 309 310 static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) 311 { 312 struct rb_node *n = root->rb_node; 313 struct tree_entry *entry; 314 315 while (n) { 316 entry = rb_entry(n, struct tree_entry, rb_node); 317 318 if (bytenr < entry->bytenr) 319 n = n->rb_left; 320 else if (bytenr > entry->bytenr) 321 n = n->rb_right; 322 else 323 return n; 324 } 325 return NULL; 326 } 327 328 /* 329 * walk up backref nodes until reach node presents tree root 330 */ 331 static struct backref_node *walk_up_backref(struct backref_node *node, 332 struct backref_edge *edges[], 333 int *index) 334 { 335 struct backref_edge *edge; 336 int idx = *index; 337 338 while (!list_empty(&node->upper)) { 339 edge = list_entry(node->upper.next, 340 struct backref_edge, list[LOWER]); 341 edges[idx++] = edge; 342 node = edge->node[UPPER]; 343 } 344 BUG_ON(node->detached); 345 *index = idx; 346 return node; 347 } 348 349 /* 350 * walk down backref nodes to find start of next reference path 351 */ 352 static struct backref_node *walk_down_backref(struct backref_edge *edges[], 353 int *index) 354 { 355 struct backref_edge *edge; 356 struct backref_node *lower; 357 int idx = *index; 358 359 while (idx > 0) { 360 edge = edges[idx - 1]; 361 lower = edge->node[LOWER]; 362 if (list_is_last(&edge->list[LOWER], &lower->upper)) { 363 idx--; 364 continue; 365 } 366 edge = list_entry(edge->list[LOWER].next, 367 struct backref_edge, list[LOWER]); 368 edges[idx - 1] = edge; 369 *index = idx; 370 return edge->node[UPPER]; 371 } 372 *index = 0; 373 return NULL; 374 } 375 376 static void unlock_node_buffer(struct backref_node *node) 377 { 378 if (node->locked) { 379 btrfs_tree_unlock(node->eb); 380 node->locked = 0; 381 } 382 } 383 384 static void drop_node_buffer(struct backref_node *node) 385 { 386 if (node->eb) { 387 unlock_node_buffer(node); 388 free_extent_buffer(node->eb); 389 node->eb = NULL; 390 } 391 } 392 393 static void drop_backref_node(struct backref_cache *tree, 394 struct backref_node *node) 395 { 396 BUG_ON(!list_empty(&node->upper)); 397 398 drop_node_buffer(node); 399 list_del(&node->list); 400 list_del(&node->lower); 401 if (!RB_EMPTY_NODE(&node->rb_node)) 402 rb_erase(&node->rb_node, &tree->rb_root); 403 free_backref_node(tree, node); 404 } 405 406 /* 407 * remove a backref node from the backref cache 408 */ 409 static void remove_backref_node(struct backref_cache *cache, 410 struct backref_node *node) 411 { 412 struct backref_node *upper; 413 struct backref_edge *edge; 414 415 if (!node) 416 return; 417 418 BUG_ON(!node->lowest && !node->detached); 419 while (!list_empty(&node->upper)) { 420 edge = list_entry(node->upper.next, struct backref_edge, 421 list[LOWER]); 422 upper = edge->node[UPPER]; 423 list_del(&edge->list[LOWER]); 424 list_del(&edge->list[UPPER]); 425 free_backref_edge(cache, edge); 426 427 if (RB_EMPTY_NODE(&upper->rb_node)) { 428 BUG_ON(!list_empty(&node->upper)); 429 drop_backref_node(cache, node); 430 node = upper; 431 node->lowest = 1; 432 continue; 433 } 434 /* 435 * add the node to leaf node list if no other 436 * child block cached. 437 */ 438 if (list_empty(&upper->lower)) { 439 list_add_tail(&upper->lower, &cache->leaves); 440 upper->lowest = 1; 441 } 442 } 443 444 drop_backref_node(cache, node); 445 } 446 447 static void update_backref_node(struct backref_cache *cache, 448 struct backref_node *node, u64 bytenr) 449 { 450 struct rb_node *rb_node; 451 rb_erase(&node->rb_node, &cache->rb_root); 452 node->bytenr = bytenr; 453 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); 454 BUG_ON(rb_node); 455 } 456 457 /* 458 * update backref cache after a transaction commit 459 */ 460 static int update_backref_cache(struct btrfs_trans_handle *trans, 461 struct backref_cache *cache) 462 { 463 struct backref_node *node; 464 int level = 0; 465 466 if (cache->last_trans == 0) { 467 cache->last_trans = trans->transid; 468 return 0; 469 } 470 471 if (cache->last_trans == trans->transid) 472 return 0; 473 474 /* 475 * detached nodes are used to avoid unnecessary backref 476 * lookup. transaction commit changes the extent tree. 477 * so the detached nodes are no longer useful. 478 */ 479 while (!list_empty(&cache->detached)) { 480 node = list_entry(cache->detached.next, 481 struct backref_node, list); 482 remove_backref_node(cache, node); 483 } 484 485 while (!list_empty(&cache->changed)) { 486 node = list_entry(cache->changed.next, 487 struct backref_node, list); 488 list_del_init(&node->list); 489 BUG_ON(node->pending); 490 update_backref_node(cache, node, node->new_bytenr); 491 } 492 493 /* 494 * some nodes can be left in the pending list if there were 495 * errors during processing the pending nodes. 496 */ 497 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 498 list_for_each_entry(node, &cache->pending[level], list) { 499 BUG_ON(!node->pending); 500 if (node->bytenr == node->new_bytenr) 501 continue; 502 update_backref_node(cache, node, node->new_bytenr); 503 } 504 } 505 506 cache->last_trans = 0; 507 return 1; 508 } 509 510 static int should_ignore_root(struct btrfs_root *root) 511 { 512 struct btrfs_root *reloc_root; 513 514 if (!root->ref_cows) 515 return 0; 516 517 reloc_root = root->reloc_root; 518 if (!reloc_root) 519 return 0; 520 521 if (btrfs_root_last_snapshot(&reloc_root->root_item) == 522 root->fs_info->running_transaction->transid - 1) 523 return 0; 524 /* 525 * if there is reloc tree and it was created in previous 526 * transaction backref lookup can find the reloc tree, 527 * so backref node for the fs tree root is useless for 528 * relocation. 529 */ 530 return 1; 531 } 532 533 /* 534 * find reloc tree by address of tree root 535 */ 536 static struct btrfs_root *find_reloc_root(struct reloc_control *rc, 537 u64 bytenr) 538 { 539 struct rb_node *rb_node; 540 struct mapping_node *node; 541 struct btrfs_root *root = NULL; 542 543 spin_lock(&rc->reloc_root_tree.lock); 544 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr); 545 if (rb_node) { 546 node = rb_entry(rb_node, struct mapping_node, rb_node); 547 root = (struct btrfs_root *)node->data; 548 } 549 spin_unlock(&rc->reloc_root_tree.lock); 550 return root; 551 } 552 553 static int is_cowonly_root(u64 root_objectid) 554 { 555 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 556 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 557 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 558 root_objectid == BTRFS_DEV_TREE_OBJECTID || 559 root_objectid == BTRFS_TREE_LOG_OBJECTID || 560 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 561 return 1; 562 return 0; 563 } 564 565 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, 566 u64 root_objectid) 567 { 568 struct btrfs_key key; 569 570 key.objectid = root_objectid; 571 key.type = BTRFS_ROOT_ITEM_KEY; 572 if (is_cowonly_root(root_objectid)) 573 key.offset = 0; 574 else 575 key.offset = (u64)-1; 576 577 return btrfs_read_fs_root_no_name(fs_info, &key); 578 } 579 580 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 581 static noinline_for_stack 582 struct btrfs_root *find_tree_root(struct reloc_control *rc, 583 struct extent_buffer *leaf, 584 struct btrfs_extent_ref_v0 *ref0) 585 { 586 struct btrfs_root *root; 587 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0); 588 u64 generation = btrfs_ref_generation_v0(leaf, ref0); 589 590 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID); 591 592 root = read_fs_root(rc->extent_root->fs_info, root_objectid); 593 BUG_ON(IS_ERR(root)); 594 595 if (root->ref_cows && 596 generation != btrfs_root_generation(&root->root_item)) 597 return NULL; 598 599 return root; 600 } 601 #endif 602 603 static noinline_for_stack 604 int find_inline_backref(struct extent_buffer *leaf, int slot, 605 unsigned long *ptr, unsigned long *end) 606 { 607 struct btrfs_extent_item *ei; 608 struct btrfs_tree_block_info *bi; 609 u32 item_size; 610 611 item_size = btrfs_item_size_nr(leaf, slot); 612 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 613 if (item_size < sizeof(*ei)) { 614 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 615 return 1; 616 } 617 #endif 618 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 619 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 620 BTRFS_EXTENT_FLAG_TREE_BLOCK)); 621 622 if (item_size <= sizeof(*ei) + sizeof(*bi)) { 623 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 624 return 1; 625 } 626 627 bi = (struct btrfs_tree_block_info *)(ei + 1); 628 *ptr = (unsigned long)(bi + 1); 629 *end = (unsigned long)ei + item_size; 630 return 0; 631 } 632 633 /* 634 * build backref tree for a given tree block. root of the backref tree 635 * corresponds the tree block, leaves of the backref tree correspond 636 * roots of b-trees that reference the tree block. 637 * 638 * the basic idea of this function is check backrefs of a given block 639 * to find upper level blocks that refernece the block, and then check 640 * bakcrefs of these upper level blocks recursively. the recursion stop 641 * when tree root is reached or backrefs for the block is cached. 642 * 643 * NOTE: if we find backrefs for a block are cached, we know backrefs 644 * for all upper level blocks that directly/indirectly reference the 645 * block are also cached. 646 */ 647 static noinline_for_stack 648 struct backref_node *build_backref_tree(struct reloc_control *rc, 649 struct btrfs_key *node_key, 650 int level, u64 bytenr) 651 { 652 struct backref_cache *cache = &rc->backref_cache; 653 struct btrfs_path *path1; 654 struct btrfs_path *path2; 655 struct extent_buffer *eb; 656 struct btrfs_root *root; 657 struct backref_node *cur; 658 struct backref_node *upper; 659 struct backref_node *lower; 660 struct backref_node *node = NULL; 661 struct backref_node *exist = NULL; 662 struct backref_edge *edge; 663 struct rb_node *rb_node; 664 struct btrfs_key key; 665 unsigned long end; 666 unsigned long ptr; 667 LIST_HEAD(list); 668 LIST_HEAD(useless); 669 int cowonly; 670 int ret; 671 int err = 0; 672 673 path1 = btrfs_alloc_path(); 674 path2 = btrfs_alloc_path(); 675 if (!path1 || !path2) { 676 err = -ENOMEM; 677 goto out; 678 } 679 680 node = alloc_backref_node(cache); 681 if (!node) { 682 err = -ENOMEM; 683 goto out; 684 } 685 686 node->bytenr = bytenr; 687 node->level = level; 688 node->lowest = 1; 689 cur = node; 690 again: 691 end = 0; 692 ptr = 0; 693 key.objectid = cur->bytenr; 694 key.type = BTRFS_EXTENT_ITEM_KEY; 695 key.offset = (u64)-1; 696 697 path1->search_commit_root = 1; 698 path1->skip_locking = 1; 699 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1, 700 0, 0); 701 if (ret < 0) { 702 err = ret; 703 goto out; 704 } 705 BUG_ON(!ret || !path1->slots[0]); 706 707 path1->slots[0]--; 708 709 WARN_ON(cur->checked); 710 if (!list_empty(&cur->upper)) { 711 /* 712 * the backref was added previously when processing 713 * backref of type BTRFS_TREE_BLOCK_REF_KEY 714 */ 715 BUG_ON(!list_is_singular(&cur->upper)); 716 edge = list_entry(cur->upper.next, struct backref_edge, 717 list[LOWER]); 718 BUG_ON(!list_empty(&edge->list[UPPER])); 719 exist = edge->node[UPPER]; 720 /* 721 * add the upper level block to pending list if we need 722 * check its backrefs 723 */ 724 if (!exist->checked) 725 list_add_tail(&edge->list[UPPER], &list); 726 } else { 727 exist = NULL; 728 } 729 730 while (1) { 731 cond_resched(); 732 eb = path1->nodes[0]; 733 734 if (ptr >= end) { 735 if (path1->slots[0] >= btrfs_header_nritems(eb)) { 736 ret = btrfs_next_leaf(rc->extent_root, path1); 737 if (ret < 0) { 738 err = ret; 739 goto out; 740 } 741 if (ret > 0) 742 break; 743 eb = path1->nodes[0]; 744 } 745 746 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]); 747 if (key.objectid != cur->bytenr) { 748 WARN_ON(exist); 749 break; 750 } 751 752 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 753 ret = find_inline_backref(eb, path1->slots[0], 754 &ptr, &end); 755 if (ret) 756 goto next; 757 } 758 } 759 760 if (ptr < end) { 761 /* update key for inline back ref */ 762 struct btrfs_extent_inline_ref *iref; 763 iref = (struct btrfs_extent_inline_ref *)ptr; 764 key.type = btrfs_extent_inline_ref_type(eb, iref); 765 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 766 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY && 767 key.type != BTRFS_SHARED_BLOCK_REF_KEY); 768 } 769 770 if (exist && 771 ((key.type == BTRFS_TREE_BLOCK_REF_KEY && 772 exist->owner == key.offset) || 773 (key.type == BTRFS_SHARED_BLOCK_REF_KEY && 774 exist->bytenr == key.offset))) { 775 exist = NULL; 776 goto next; 777 } 778 779 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 780 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || 781 key.type == BTRFS_EXTENT_REF_V0_KEY) { 782 if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 783 struct btrfs_extent_ref_v0 *ref0; 784 ref0 = btrfs_item_ptr(eb, path1->slots[0], 785 struct btrfs_extent_ref_v0); 786 if (key.objectid == key.offset) { 787 root = find_tree_root(rc, eb, ref0); 788 if (root && !should_ignore_root(root)) 789 cur->root = root; 790 else 791 list_add(&cur->list, &useless); 792 break; 793 } 794 if (is_cowonly_root(btrfs_ref_root_v0(eb, 795 ref0))) 796 cur->cowonly = 1; 797 } 798 #else 799 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 800 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 801 #endif 802 if (key.objectid == key.offset) { 803 /* 804 * only root blocks of reloc trees use 805 * backref of this type. 806 */ 807 root = find_reloc_root(rc, cur->bytenr); 808 BUG_ON(!root); 809 cur->root = root; 810 break; 811 } 812 813 edge = alloc_backref_edge(cache); 814 if (!edge) { 815 err = -ENOMEM; 816 goto out; 817 } 818 rb_node = tree_search(&cache->rb_root, key.offset); 819 if (!rb_node) { 820 upper = alloc_backref_node(cache); 821 if (!upper) { 822 free_backref_edge(cache, edge); 823 err = -ENOMEM; 824 goto out; 825 } 826 upper->bytenr = key.offset; 827 upper->level = cur->level + 1; 828 /* 829 * backrefs for the upper level block isn't 830 * cached, add the block to pending list 831 */ 832 list_add_tail(&edge->list[UPPER], &list); 833 } else { 834 upper = rb_entry(rb_node, struct backref_node, 835 rb_node); 836 BUG_ON(!upper->checked); 837 INIT_LIST_HEAD(&edge->list[UPPER]); 838 } 839 list_add_tail(&edge->list[LOWER], &cur->upper); 840 edge->node[LOWER] = cur; 841 edge->node[UPPER] = upper; 842 843 goto next; 844 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 845 goto next; 846 } 847 848 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */ 849 root = read_fs_root(rc->extent_root->fs_info, key.offset); 850 if (IS_ERR(root)) { 851 err = PTR_ERR(root); 852 goto out; 853 } 854 855 if (!root->ref_cows) 856 cur->cowonly = 1; 857 858 if (btrfs_root_level(&root->root_item) == cur->level) { 859 /* tree root */ 860 BUG_ON(btrfs_root_bytenr(&root->root_item) != 861 cur->bytenr); 862 if (should_ignore_root(root)) 863 list_add(&cur->list, &useless); 864 else 865 cur->root = root; 866 break; 867 } 868 869 level = cur->level + 1; 870 871 /* 872 * searching the tree to find upper level blocks 873 * reference the block. 874 */ 875 path2->search_commit_root = 1; 876 path2->skip_locking = 1; 877 path2->lowest_level = level; 878 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0); 879 path2->lowest_level = 0; 880 if (ret < 0) { 881 err = ret; 882 goto out; 883 } 884 if (ret > 0 && path2->slots[level] > 0) 885 path2->slots[level]--; 886 887 eb = path2->nodes[level]; 888 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != 889 cur->bytenr); 890 891 lower = cur; 892 for (; level < BTRFS_MAX_LEVEL; level++) { 893 if (!path2->nodes[level]) { 894 BUG_ON(btrfs_root_bytenr(&root->root_item) != 895 lower->bytenr); 896 if (should_ignore_root(root)) 897 list_add(&lower->list, &useless); 898 else 899 lower->root = root; 900 break; 901 } 902 903 edge = alloc_backref_edge(cache); 904 if (!edge) { 905 err = -ENOMEM; 906 goto out; 907 } 908 909 eb = path2->nodes[level]; 910 rb_node = tree_search(&cache->rb_root, eb->start); 911 if (!rb_node) { 912 upper = alloc_backref_node(cache); 913 if (!upper) { 914 free_backref_edge(cache, edge); 915 err = -ENOMEM; 916 goto out; 917 } 918 upper->bytenr = eb->start; 919 upper->owner = btrfs_header_owner(eb); 920 upper->level = lower->level + 1; 921 if (!root->ref_cows) 922 upper->cowonly = 1; 923 924 /* 925 * if we know the block isn't shared 926 * we can void checking its backrefs. 927 */ 928 if (btrfs_block_can_be_shared(root, eb)) 929 upper->checked = 0; 930 else 931 upper->checked = 1; 932 933 /* 934 * add the block to pending list if we 935 * need check its backrefs. only block 936 * at 'cur->level + 1' is added to the 937 * tail of pending list. this guarantees 938 * we check backrefs from lower level 939 * blocks to upper level blocks. 940 */ 941 if (!upper->checked && 942 level == cur->level + 1) { 943 list_add_tail(&edge->list[UPPER], 944 &list); 945 } else 946 INIT_LIST_HEAD(&edge->list[UPPER]); 947 } else { 948 upper = rb_entry(rb_node, struct backref_node, 949 rb_node); 950 BUG_ON(!upper->checked); 951 INIT_LIST_HEAD(&edge->list[UPPER]); 952 if (!upper->owner) 953 upper->owner = btrfs_header_owner(eb); 954 } 955 list_add_tail(&edge->list[LOWER], &lower->upper); 956 edge->node[LOWER] = lower; 957 edge->node[UPPER] = upper; 958 959 if (rb_node) 960 break; 961 lower = upper; 962 upper = NULL; 963 } 964 btrfs_release_path(root, path2); 965 next: 966 if (ptr < end) { 967 ptr += btrfs_extent_inline_ref_size(key.type); 968 if (ptr >= end) { 969 WARN_ON(ptr > end); 970 ptr = 0; 971 end = 0; 972 } 973 } 974 if (ptr >= end) 975 path1->slots[0]++; 976 } 977 btrfs_release_path(rc->extent_root, path1); 978 979 cur->checked = 1; 980 WARN_ON(exist); 981 982 /* the pending list isn't empty, take the first block to process */ 983 if (!list_empty(&list)) { 984 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 985 list_del_init(&edge->list[UPPER]); 986 cur = edge->node[UPPER]; 987 goto again; 988 } 989 990 /* 991 * everything goes well, connect backref nodes and insert backref nodes 992 * into the cache. 993 */ 994 BUG_ON(!node->checked); 995 cowonly = node->cowonly; 996 if (!cowonly) { 997 rb_node = tree_insert(&cache->rb_root, node->bytenr, 998 &node->rb_node); 999 BUG_ON(rb_node); 1000 list_add_tail(&node->lower, &cache->leaves); 1001 } 1002 1003 list_for_each_entry(edge, &node->upper, list[LOWER]) 1004 list_add_tail(&edge->list[UPPER], &list); 1005 1006 while (!list_empty(&list)) { 1007 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 1008 list_del_init(&edge->list[UPPER]); 1009 upper = edge->node[UPPER]; 1010 if (upper->detached) { 1011 list_del(&edge->list[LOWER]); 1012 lower = edge->node[LOWER]; 1013 free_backref_edge(cache, edge); 1014 if (list_empty(&lower->upper)) 1015 list_add(&lower->list, &useless); 1016 continue; 1017 } 1018 1019 if (!RB_EMPTY_NODE(&upper->rb_node)) { 1020 if (upper->lowest) { 1021 list_del_init(&upper->lower); 1022 upper->lowest = 0; 1023 } 1024 1025 list_add_tail(&edge->list[UPPER], &upper->lower); 1026 continue; 1027 } 1028 1029 BUG_ON(!upper->checked); 1030 BUG_ON(cowonly != upper->cowonly); 1031 if (!cowonly) { 1032 rb_node = tree_insert(&cache->rb_root, upper->bytenr, 1033 &upper->rb_node); 1034 BUG_ON(rb_node); 1035 } 1036 1037 list_add_tail(&edge->list[UPPER], &upper->lower); 1038 1039 list_for_each_entry(edge, &upper->upper, list[LOWER]) 1040 list_add_tail(&edge->list[UPPER], &list); 1041 } 1042 /* 1043 * process useless backref nodes. backref nodes for tree leaves 1044 * are deleted from the cache. backref nodes for upper level 1045 * tree blocks are left in the cache to avoid unnecessary backref 1046 * lookup. 1047 */ 1048 while (!list_empty(&useless)) { 1049 upper = list_entry(useless.next, struct backref_node, list); 1050 list_del_init(&upper->list); 1051 BUG_ON(!list_empty(&upper->upper)); 1052 if (upper == node) 1053 node = NULL; 1054 if (upper->lowest) { 1055 list_del_init(&upper->lower); 1056 upper->lowest = 0; 1057 } 1058 while (!list_empty(&upper->lower)) { 1059 edge = list_entry(upper->lower.next, 1060 struct backref_edge, list[UPPER]); 1061 list_del(&edge->list[UPPER]); 1062 list_del(&edge->list[LOWER]); 1063 lower = edge->node[LOWER]; 1064 free_backref_edge(cache, edge); 1065 1066 if (list_empty(&lower->upper)) 1067 list_add(&lower->list, &useless); 1068 } 1069 __mark_block_processed(rc, upper); 1070 if (upper->level > 0) { 1071 list_add(&upper->list, &cache->detached); 1072 upper->detached = 1; 1073 } else { 1074 rb_erase(&upper->rb_node, &cache->rb_root); 1075 free_backref_node(cache, upper); 1076 } 1077 } 1078 out: 1079 btrfs_free_path(path1); 1080 btrfs_free_path(path2); 1081 if (err) { 1082 while (!list_empty(&useless)) { 1083 lower = list_entry(useless.next, 1084 struct backref_node, upper); 1085 list_del_init(&lower->upper); 1086 } 1087 upper = node; 1088 INIT_LIST_HEAD(&list); 1089 while (upper) { 1090 if (RB_EMPTY_NODE(&upper->rb_node)) { 1091 list_splice_tail(&upper->upper, &list); 1092 free_backref_node(cache, upper); 1093 } 1094 1095 if (list_empty(&list)) 1096 break; 1097 1098 edge = list_entry(list.next, struct backref_edge, 1099 list[LOWER]); 1100 list_del(&edge->list[LOWER]); 1101 upper = edge->node[UPPER]; 1102 free_backref_edge(cache, edge); 1103 } 1104 return ERR_PTR(err); 1105 } 1106 BUG_ON(node && node->detached); 1107 return node; 1108 } 1109 1110 /* 1111 * helper to add backref node for the newly created snapshot. 1112 * the backref node is created by cloning backref node that 1113 * corresponds to root of source tree 1114 */ 1115 static int clone_backref_node(struct btrfs_trans_handle *trans, 1116 struct reloc_control *rc, 1117 struct btrfs_root *src, 1118 struct btrfs_root *dest) 1119 { 1120 struct btrfs_root *reloc_root = src->reloc_root; 1121 struct backref_cache *cache = &rc->backref_cache; 1122 struct backref_node *node = NULL; 1123 struct backref_node *new_node; 1124 struct backref_edge *edge; 1125 struct backref_edge *new_edge; 1126 struct rb_node *rb_node; 1127 1128 if (cache->last_trans > 0) 1129 update_backref_cache(trans, cache); 1130 1131 rb_node = tree_search(&cache->rb_root, src->commit_root->start); 1132 if (rb_node) { 1133 node = rb_entry(rb_node, struct backref_node, rb_node); 1134 if (node->detached) 1135 node = NULL; 1136 else 1137 BUG_ON(node->new_bytenr != reloc_root->node->start); 1138 } 1139 1140 if (!node) { 1141 rb_node = tree_search(&cache->rb_root, 1142 reloc_root->commit_root->start); 1143 if (rb_node) { 1144 node = rb_entry(rb_node, struct backref_node, 1145 rb_node); 1146 BUG_ON(node->detached); 1147 } 1148 } 1149 1150 if (!node) 1151 return 0; 1152 1153 new_node = alloc_backref_node(cache); 1154 if (!new_node) 1155 return -ENOMEM; 1156 1157 new_node->bytenr = dest->node->start; 1158 new_node->level = node->level; 1159 new_node->lowest = node->lowest; 1160 new_node->checked = 1; 1161 new_node->root = dest; 1162 1163 if (!node->lowest) { 1164 list_for_each_entry(edge, &node->lower, list[UPPER]) { 1165 new_edge = alloc_backref_edge(cache); 1166 if (!new_edge) 1167 goto fail; 1168 1169 new_edge->node[UPPER] = new_node; 1170 new_edge->node[LOWER] = edge->node[LOWER]; 1171 list_add_tail(&new_edge->list[UPPER], 1172 &new_node->lower); 1173 } 1174 } 1175 1176 rb_node = tree_insert(&cache->rb_root, new_node->bytenr, 1177 &new_node->rb_node); 1178 BUG_ON(rb_node); 1179 1180 if (!new_node->lowest) { 1181 list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { 1182 list_add_tail(&new_edge->list[LOWER], 1183 &new_edge->node[LOWER]->upper); 1184 } 1185 } 1186 return 0; 1187 fail: 1188 while (!list_empty(&new_node->lower)) { 1189 new_edge = list_entry(new_node->lower.next, 1190 struct backref_edge, list[UPPER]); 1191 list_del(&new_edge->list[UPPER]); 1192 free_backref_edge(cache, new_edge); 1193 } 1194 free_backref_node(cache, new_node); 1195 return -ENOMEM; 1196 } 1197 1198 /* 1199 * helper to add 'address of tree root -> reloc tree' mapping 1200 */ 1201 static int __add_reloc_root(struct btrfs_root *root) 1202 { 1203 struct rb_node *rb_node; 1204 struct mapping_node *node; 1205 struct reloc_control *rc = root->fs_info->reloc_ctl; 1206 1207 node = kmalloc(sizeof(*node), GFP_NOFS); 1208 BUG_ON(!node); 1209 1210 node->bytenr = root->node->start; 1211 node->data = root; 1212 1213 spin_lock(&rc->reloc_root_tree.lock); 1214 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1215 node->bytenr, &node->rb_node); 1216 spin_unlock(&rc->reloc_root_tree.lock); 1217 BUG_ON(rb_node); 1218 1219 list_add_tail(&root->root_list, &rc->reloc_roots); 1220 return 0; 1221 } 1222 1223 /* 1224 * helper to update/delete the 'address of tree root -> reloc tree' 1225 * mapping 1226 */ 1227 static int __update_reloc_root(struct btrfs_root *root, int del) 1228 { 1229 struct rb_node *rb_node; 1230 struct mapping_node *node = NULL; 1231 struct reloc_control *rc = root->fs_info->reloc_ctl; 1232 1233 spin_lock(&rc->reloc_root_tree.lock); 1234 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 1235 root->commit_root->start); 1236 if (rb_node) { 1237 node = rb_entry(rb_node, struct mapping_node, rb_node); 1238 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 1239 } 1240 spin_unlock(&rc->reloc_root_tree.lock); 1241 1242 BUG_ON((struct btrfs_root *)node->data != root); 1243 1244 if (!del) { 1245 spin_lock(&rc->reloc_root_tree.lock); 1246 node->bytenr = root->node->start; 1247 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 1248 node->bytenr, &node->rb_node); 1249 spin_unlock(&rc->reloc_root_tree.lock); 1250 BUG_ON(rb_node); 1251 } else { 1252 list_del_init(&root->root_list); 1253 kfree(node); 1254 } 1255 return 0; 1256 } 1257 1258 static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, 1259 struct btrfs_root *root, u64 objectid) 1260 { 1261 struct btrfs_root *reloc_root; 1262 struct extent_buffer *eb; 1263 struct btrfs_root_item *root_item; 1264 struct btrfs_key root_key; 1265 int ret; 1266 1267 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 1268 BUG_ON(!root_item); 1269 1270 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 1271 root_key.type = BTRFS_ROOT_ITEM_KEY; 1272 root_key.offset = objectid; 1273 1274 if (root->root_key.objectid == objectid) { 1275 /* called by btrfs_init_reloc_root */ 1276 ret = btrfs_copy_root(trans, root, root->commit_root, &eb, 1277 BTRFS_TREE_RELOC_OBJECTID); 1278 BUG_ON(ret); 1279 1280 btrfs_set_root_last_snapshot(&root->root_item, 1281 trans->transid - 1); 1282 } else { 1283 /* 1284 * called by btrfs_reloc_post_snapshot_hook. 1285 * the source tree is a reloc tree, all tree blocks 1286 * modified after it was created have RELOC flag 1287 * set in their headers. so it's OK to not update 1288 * the 'last_snapshot'. 1289 */ 1290 ret = btrfs_copy_root(trans, root, root->node, &eb, 1291 BTRFS_TREE_RELOC_OBJECTID); 1292 BUG_ON(ret); 1293 } 1294 1295 memcpy(root_item, &root->root_item, sizeof(*root_item)); 1296 btrfs_set_root_bytenr(root_item, eb->start); 1297 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 1298 btrfs_set_root_generation(root_item, trans->transid); 1299 1300 if (root->root_key.objectid == objectid) { 1301 btrfs_set_root_refs(root_item, 0); 1302 memset(&root_item->drop_progress, 0, 1303 sizeof(struct btrfs_disk_key)); 1304 root_item->drop_level = 0; 1305 } 1306 1307 btrfs_tree_unlock(eb); 1308 free_extent_buffer(eb); 1309 1310 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 1311 &root_key, root_item); 1312 BUG_ON(ret); 1313 kfree(root_item); 1314 1315 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 1316 &root_key); 1317 BUG_ON(IS_ERR(reloc_root)); 1318 reloc_root->last_trans = trans->transid; 1319 return reloc_root; 1320 } 1321 1322 /* 1323 * create reloc tree for a given fs tree. reloc tree is just a 1324 * snapshot of the fs tree with special root objectid. 1325 */ 1326 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, 1327 struct btrfs_root *root) 1328 { 1329 struct btrfs_root *reloc_root; 1330 struct reloc_control *rc = root->fs_info->reloc_ctl; 1331 int clear_rsv = 0; 1332 1333 if (root->reloc_root) { 1334 reloc_root = root->reloc_root; 1335 reloc_root->last_trans = trans->transid; 1336 return 0; 1337 } 1338 1339 if (!rc || !rc->create_reloc_tree || 1340 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1341 return 0; 1342 1343 if (!trans->block_rsv) { 1344 trans->block_rsv = rc->block_rsv; 1345 clear_rsv = 1; 1346 } 1347 reloc_root = create_reloc_root(trans, root, root->root_key.objectid); 1348 if (clear_rsv) 1349 trans->block_rsv = NULL; 1350 1351 __add_reloc_root(reloc_root); 1352 root->reloc_root = reloc_root; 1353 return 0; 1354 } 1355 1356 /* 1357 * update root item of reloc tree 1358 */ 1359 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, 1360 struct btrfs_root *root) 1361 { 1362 struct btrfs_root *reloc_root; 1363 struct btrfs_root_item *root_item; 1364 int del = 0; 1365 int ret; 1366 1367 if (!root->reloc_root) 1368 return 0; 1369 1370 reloc_root = root->reloc_root; 1371 root_item = &reloc_root->root_item; 1372 1373 if (root->fs_info->reloc_ctl->merge_reloc_tree && 1374 btrfs_root_refs(root_item) == 0) { 1375 root->reloc_root = NULL; 1376 del = 1; 1377 } 1378 1379 __update_reloc_root(reloc_root, del); 1380 1381 if (reloc_root->commit_root != reloc_root->node) { 1382 btrfs_set_root_node(root_item, reloc_root->node); 1383 free_extent_buffer(reloc_root->commit_root); 1384 reloc_root->commit_root = btrfs_root_node(reloc_root); 1385 } 1386 1387 ret = btrfs_update_root(trans, root->fs_info->tree_root, 1388 &reloc_root->root_key, root_item); 1389 BUG_ON(ret); 1390 return 0; 1391 } 1392 1393 /* 1394 * helper to find first cached inode with inode number >= objectid 1395 * in a subvolume 1396 */ 1397 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid) 1398 { 1399 struct rb_node *node; 1400 struct rb_node *prev; 1401 struct btrfs_inode *entry; 1402 struct inode *inode; 1403 1404 spin_lock(&root->inode_lock); 1405 again: 1406 node = root->inode_tree.rb_node; 1407 prev = NULL; 1408 while (node) { 1409 prev = node; 1410 entry = rb_entry(node, struct btrfs_inode, rb_node); 1411 1412 if (objectid < entry->vfs_inode.i_ino) 1413 node = node->rb_left; 1414 else if (objectid > entry->vfs_inode.i_ino) 1415 node = node->rb_right; 1416 else 1417 break; 1418 } 1419 if (!node) { 1420 while (prev) { 1421 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1422 if (objectid <= entry->vfs_inode.i_ino) { 1423 node = prev; 1424 break; 1425 } 1426 prev = rb_next(prev); 1427 } 1428 } 1429 while (node) { 1430 entry = rb_entry(node, struct btrfs_inode, rb_node); 1431 inode = igrab(&entry->vfs_inode); 1432 if (inode) { 1433 spin_unlock(&root->inode_lock); 1434 return inode; 1435 } 1436 1437 objectid = entry->vfs_inode.i_ino + 1; 1438 if (cond_resched_lock(&root->inode_lock)) 1439 goto again; 1440 1441 node = rb_next(node); 1442 } 1443 spin_unlock(&root->inode_lock); 1444 return NULL; 1445 } 1446 1447 static int in_block_group(u64 bytenr, 1448 struct btrfs_block_group_cache *block_group) 1449 { 1450 if (bytenr >= block_group->key.objectid && 1451 bytenr < block_group->key.objectid + block_group->key.offset) 1452 return 1; 1453 return 0; 1454 } 1455 1456 /* 1457 * get new location of data 1458 */ 1459 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, 1460 u64 bytenr, u64 num_bytes) 1461 { 1462 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 1463 struct btrfs_path *path; 1464 struct btrfs_file_extent_item *fi; 1465 struct extent_buffer *leaf; 1466 int ret; 1467 1468 path = btrfs_alloc_path(); 1469 if (!path) 1470 return -ENOMEM; 1471 1472 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1473 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 1474 bytenr, 0); 1475 if (ret < 0) 1476 goto out; 1477 if (ret > 0) { 1478 ret = -ENOENT; 1479 goto out; 1480 } 1481 1482 leaf = path->nodes[0]; 1483 fi = btrfs_item_ptr(leaf, path->slots[0], 1484 struct btrfs_file_extent_item); 1485 1486 BUG_ON(btrfs_file_extent_offset(leaf, fi) || 1487 btrfs_file_extent_compression(leaf, fi) || 1488 btrfs_file_extent_encryption(leaf, fi) || 1489 btrfs_file_extent_other_encoding(leaf, fi)); 1490 1491 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1492 ret = 1; 1493 goto out; 1494 } 1495 1496 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1497 ret = 0; 1498 out: 1499 btrfs_free_path(path); 1500 return ret; 1501 } 1502 1503 /* 1504 * update file extent items in the tree leaf to point to 1505 * the new locations. 1506 */ 1507 static noinline_for_stack 1508 int replace_file_extents(struct btrfs_trans_handle *trans, 1509 struct reloc_control *rc, 1510 struct btrfs_root *root, 1511 struct extent_buffer *leaf) 1512 { 1513 struct btrfs_key key; 1514 struct btrfs_file_extent_item *fi; 1515 struct inode *inode = NULL; 1516 u64 parent; 1517 u64 bytenr; 1518 u64 new_bytenr = 0; 1519 u64 num_bytes; 1520 u64 end; 1521 u32 nritems; 1522 u32 i; 1523 int ret; 1524 int first = 1; 1525 int dirty = 0; 1526 1527 if (rc->stage != UPDATE_DATA_PTRS) 1528 return 0; 1529 1530 /* reloc trees always use full backref */ 1531 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1532 parent = leaf->start; 1533 else 1534 parent = 0; 1535 1536 nritems = btrfs_header_nritems(leaf); 1537 for (i = 0; i < nritems; i++) { 1538 cond_resched(); 1539 btrfs_item_key_to_cpu(leaf, &key, i); 1540 if (key.type != BTRFS_EXTENT_DATA_KEY) 1541 continue; 1542 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 1543 if (btrfs_file_extent_type(leaf, fi) == 1544 BTRFS_FILE_EXTENT_INLINE) 1545 continue; 1546 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1547 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 1548 if (bytenr == 0) 1549 continue; 1550 if (!in_block_group(bytenr, rc->block_group)) 1551 continue; 1552 1553 /* 1554 * if we are modifying block in fs tree, wait for readpage 1555 * to complete and drop the extent cache 1556 */ 1557 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1558 if (first) { 1559 inode = find_next_inode(root, key.objectid); 1560 first = 0; 1561 } else if (inode && inode->i_ino < key.objectid) { 1562 btrfs_add_delayed_iput(inode); 1563 inode = find_next_inode(root, key.objectid); 1564 } 1565 if (inode && inode->i_ino == key.objectid) { 1566 end = key.offset + 1567 btrfs_file_extent_num_bytes(leaf, fi); 1568 WARN_ON(!IS_ALIGNED(key.offset, 1569 root->sectorsize)); 1570 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1571 end--; 1572 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 1573 key.offset, end, 1574 GFP_NOFS); 1575 if (!ret) 1576 continue; 1577 1578 btrfs_drop_extent_cache(inode, key.offset, end, 1579 1); 1580 unlock_extent(&BTRFS_I(inode)->io_tree, 1581 key.offset, end, GFP_NOFS); 1582 } 1583 } 1584 1585 ret = get_new_location(rc->data_inode, &new_bytenr, 1586 bytenr, num_bytes); 1587 if (ret > 0) { 1588 WARN_ON(1); 1589 continue; 1590 } 1591 BUG_ON(ret < 0); 1592 1593 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1594 dirty = 1; 1595 1596 key.offset -= btrfs_file_extent_offset(leaf, fi); 1597 ret = btrfs_inc_extent_ref(trans, root, new_bytenr, 1598 num_bytes, parent, 1599 btrfs_header_owner(leaf), 1600 key.objectid, key.offset); 1601 BUG_ON(ret); 1602 1603 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1604 parent, btrfs_header_owner(leaf), 1605 key.objectid, key.offset); 1606 BUG_ON(ret); 1607 } 1608 if (dirty) 1609 btrfs_mark_buffer_dirty(leaf); 1610 if (inode) 1611 btrfs_add_delayed_iput(inode); 1612 return 0; 1613 } 1614 1615 static noinline_for_stack 1616 int memcmp_node_keys(struct extent_buffer *eb, int slot, 1617 struct btrfs_path *path, int level) 1618 { 1619 struct btrfs_disk_key key1; 1620 struct btrfs_disk_key key2; 1621 btrfs_node_key(eb, &key1, slot); 1622 btrfs_node_key(path->nodes[level], &key2, path->slots[level]); 1623 return memcmp(&key1, &key2, sizeof(key1)); 1624 } 1625 1626 /* 1627 * try to replace tree blocks in fs tree with the new blocks 1628 * in reloc tree. tree blocks haven't been modified since the 1629 * reloc tree was create can be replaced. 1630 * 1631 * if a block was replaced, level of the block + 1 is returned. 1632 * if no block got replaced, 0 is returned. if there are other 1633 * errors, a negative error number is returned. 1634 */ 1635 static noinline_for_stack 1636 int replace_path(struct btrfs_trans_handle *trans, 1637 struct btrfs_root *dest, struct btrfs_root *src, 1638 struct btrfs_path *path, struct btrfs_key *next_key, 1639 int lowest_level, int max_level) 1640 { 1641 struct extent_buffer *eb; 1642 struct extent_buffer *parent; 1643 struct btrfs_key key; 1644 u64 old_bytenr; 1645 u64 new_bytenr; 1646 u64 old_ptr_gen; 1647 u64 new_ptr_gen; 1648 u64 last_snapshot; 1649 u32 blocksize; 1650 int cow = 0; 1651 int level; 1652 int ret; 1653 int slot; 1654 1655 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 1656 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); 1657 1658 last_snapshot = btrfs_root_last_snapshot(&src->root_item); 1659 again: 1660 slot = path->slots[lowest_level]; 1661 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); 1662 1663 eb = btrfs_lock_root_node(dest); 1664 btrfs_set_lock_blocking(eb); 1665 level = btrfs_header_level(eb); 1666 1667 if (level < lowest_level) { 1668 btrfs_tree_unlock(eb); 1669 free_extent_buffer(eb); 1670 return 0; 1671 } 1672 1673 if (cow) { 1674 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); 1675 BUG_ON(ret); 1676 } 1677 btrfs_set_lock_blocking(eb); 1678 1679 if (next_key) { 1680 next_key->objectid = (u64)-1; 1681 next_key->type = (u8)-1; 1682 next_key->offset = (u64)-1; 1683 } 1684 1685 parent = eb; 1686 while (1) { 1687 level = btrfs_header_level(parent); 1688 BUG_ON(level < lowest_level); 1689 1690 ret = btrfs_bin_search(parent, &key, level, &slot); 1691 if (ret && slot > 0) 1692 slot--; 1693 1694 if (next_key && slot + 1 < btrfs_header_nritems(parent)) 1695 btrfs_node_key_to_cpu(parent, next_key, slot + 1); 1696 1697 old_bytenr = btrfs_node_blockptr(parent, slot); 1698 blocksize = btrfs_level_size(dest, level - 1); 1699 old_ptr_gen = btrfs_node_ptr_generation(parent, slot); 1700 1701 if (level <= max_level) { 1702 eb = path->nodes[level]; 1703 new_bytenr = btrfs_node_blockptr(eb, 1704 path->slots[level]); 1705 new_ptr_gen = btrfs_node_ptr_generation(eb, 1706 path->slots[level]); 1707 } else { 1708 new_bytenr = 0; 1709 new_ptr_gen = 0; 1710 } 1711 1712 if (new_bytenr > 0 && new_bytenr == old_bytenr) { 1713 WARN_ON(1); 1714 ret = level; 1715 break; 1716 } 1717 1718 if (new_bytenr == 0 || old_ptr_gen > last_snapshot || 1719 memcmp_node_keys(parent, slot, path, level)) { 1720 if (level <= lowest_level) { 1721 ret = 0; 1722 break; 1723 } 1724 1725 eb = read_tree_block(dest, old_bytenr, blocksize, 1726 old_ptr_gen); 1727 BUG_ON(!eb); 1728 btrfs_tree_lock(eb); 1729 if (cow) { 1730 ret = btrfs_cow_block(trans, dest, eb, parent, 1731 slot, &eb); 1732 BUG_ON(ret); 1733 } 1734 btrfs_set_lock_blocking(eb); 1735 1736 btrfs_tree_unlock(parent); 1737 free_extent_buffer(parent); 1738 1739 parent = eb; 1740 continue; 1741 } 1742 1743 if (!cow) { 1744 btrfs_tree_unlock(parent); 1745 free_extent_buffer(parent); 1746 cow = 1; 1747 goto again; 1748 } 1749 1750 btrfs_node_key_to_cpu(path->nodes[level], &key, 1751 path->slots[level]); 1752 btrfs_release_path(src, path); 1753 1754 path->lowest_level = level; 1755 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1756 path->lowest_level = 0; 1757 BUG_ON(ret); 1758 1759 /* 1760 * swap blocks in fs tree and reloc tree. 1761 */ 1762 btrfs_set_node_blockptr(parent, slot, new_bytenr); 1763 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen); 1764 btrfs_mark_buffer_dirty(parent); 1765 1766 btrfs_set_node_blockptr(path->nodes[level], 1767 path->slots[level], old_bytenr); 1768 btrfs_set_node_ptr_generation(path->nodes[level], 1769 path->slots[level], old_ptr_gen); 1770 btrfs_mark_buffer_dirty(path->nodes[level]); 1771 1772 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, 1773 path->nodes[level]->start, 1774 src->root_key.objectid, level - 1, 0); 1775 BUG_ON(ret); 1776 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 1777 0, dest->root_key.objectid, level - 1, 1778 0); 1779 BUG_ON(ret); 1780 1781 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, 1782 path->nodes[level]->start, 1783 src->root_key.objectid, level - 1, 0); 1784 BUG_ON(ret); 1785 1786 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 1787 0, dest->root_key.objectid, level - 1, 1788 0); 1789 BUG_ON(ret); 1790 1791 btrfs_unlock_up_safe(path, 0); 1792 1793 ret = level; 1794 break; 1795 } 1796 btrfs_tree_unlock(parent); 1797 free_extent_buffer(parent); 1798 return ret; 1799 } 1800 1801 /* 1802 * helper to find next relocated block in reloc tree 1803 */ 1804 static noinline_for_stack 1805 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1806 int *level) 1807 { 1808 struct extent_buffer *eb; 1809 int i; 1810 u64 last_snapshot; 1811 u32 nritems; 1812 1813 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1814 1815 for (i = 0; i < *level; i++) { 1816 free_extent_buffer(path->nodes[i]); 1817 path->nodes[i] = NULL; 1818 } 1819 1820 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 1821 eb = path->nodes[i]; 1822 nritems = btrfs_header_nritems(eb); 1823 while (path->slots[i] + 1 < nritems) { 1824 path->slots[i]++; 1825 if (btrfs_node_ptr_generation(eb, path->slots[i]) <= 1826 last_snapshot) 1827 continue; 1828 1829 *level = i; 1830 return 0; 1831 } 1832 free_extent_buffer(path->nodes[i]); 1833 path->nodes[i] = NULL; 1834 } 1835 return 1; 1836 } 1837 1838 /* 1839 * walk down reloc tree to find relocated block of lowest level 1840 */ 1841 static noinline_for_stack 1842 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1843 int *level) 1844 { 1845 struct extent_buffer *eb = NULL; 1846 int i; 1847 u64 bytenr; 1848 u64 ptr_gen = 0; 1849 u64 last_snapshot; 1850 u32 blocksize; 1851 u32 nritems; 1852 1853 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1854 1855 for (i = *level; i > 0; i--) { 1856 eb = path->nodes[i]; 1857 nritems = btrfs_header_nritems(eb); 1858 while (path->slots[i] < nritems) { 1859 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]); 1860 if (ptr_gen > last_snapshot) 1861 break; 1862 path->slots[i]++; 1863 } 1864 if (path->slots[i] >= nritems) { 1865 if (i == *level) 1866 break; 1867 *level = i + 1; 1868 return 0; 1869 } 1870 if (i == 1) { 1871 *level = i; 1872 return 0; 1873 } 1874 1875 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 1876 blocksize = btrfs_level_size(root, i - 1); 1877 eb = read_tree_block(root, bytenr, blocksize, ptr_gen); 1878 BUG_ON(btrfs_header_level(eb) != i - 1); 1879 path->nodes[i - 1] = eb; 1880 path->slots[i - 1] = 0; 1881 } 1882 return 1; 1883 } 1884 1885 /* 1886 * invalidate extent cache for file extents whose key in range of 1887 * [min_key, max_key) 1888 */ 1889 static int invalidate_extent_cache(struct btrfs_root *root, 1890 struct btrfs_key *min_key, 1891 struct btrfs_key *max_key) 1892 { 1893 struct inode *inode = NULL; 1894 u64 objectid; 1895 u64 start, end; 1896 1897 objectid = min_key->objectid; 1898 while (1) { 1899 cond_resched(); 1900 iput(inode); 1901 1902 if (objectid > max_key->objectid) 1903 break; 1904 1905 inode = find_next_inode(root, objectid); 1906 if (!inode) 1907 break; 1908 1909 if (inode->i_ino > max_key->objectid) { 1910 iput(inode); 1911 break; 1912 } 1913 1914 objectid = inode->i_ino + 1; 1915 if (!S_ISREG(inode->i_mode)) 1916 continue; 1917 1918 if (unlikely(min_key->objectid == inode->i_ino)) { 1919 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 1920 continue; 1921 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 1922 start = 0; 1923 else { 1924 start = min_key->offset; 1925 WARN_ON(!IS_ALIGNED(start, root->sectorsize)); 1926 } 1927 } else { 1928 start = 0; 1929 } 1930 1931 if (unlikely(max_key->objectid == inode->i_ino)) { 1932 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 1933 continue; 1934 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 1935 end = (u64)-1; 1936 } else { 1937 if (max_key->offset == 0) 1938 continue; 1939 end = max_key->offset; 1940 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1941 end--; 1942 } 1943 } else { 1944 end = (u64)-1; 1945 } 1946 1947 /* the lock_extent waits for readpage to complete */ 1948 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1949 btrfs_drop_extent_cache(inode, start, end, 1); 1950 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1951 } 1952 return 0; 1953 } 1954 1955 static int find_next_key(struct btrfs_path *path, int level, 1956 struct btrfs_key *key) 1957 1958 { 1959 while (level < BTRFS_MAX_LEVEL) { 1960 if (!path->nodes[level]) 1961 break; 1962 if (path->slots[level] + 1 < 1963 btrfs_header_nritems(path->nodes[level])) { 1964 btrfs_node_key_to_cpu(path->nodes[level], key, 1965 path->slots[level] + 1); 1966 return 0; 1967 } 1968 level++; 1969 } 1970 return 1; 1971 } 1972 1973 /* 1974 * merge the relocated tree blocks in reloc tree with corresponding 1975 * fs tree. 1976 */ 1977 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, 1978 struct btrfs_root *root) 1979 { 1980 LIST_HEAD(inode_list); 1981 struct btrfs_key key; 1982 struct btrfs_key next_key; 1983 struct btrfs_trans_handle *trans; 1984 struct btrfs_root *reloc_root; 1985 struct btrfs_root_item *root_item; 1986 struct btrfs_path *path; 1987 struct extent_buffer *leaf; 1988 unsigned long nr; 1989 int level; 1990 int max_level; 1991 int replaced = 0; 1992 int ret; 1993 int err = 0; 1994 u32 min_reserved; 1995 1996 path = btrfs_alloc_path(); 1997 if (!path) 1998 return -ENOMEM; 1999 2000 reloc_root = root->reloc_root; 2001 root_item = &reloc_root->root_item; 2002 2003 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 2004 level = btrfs_root_level(root_item); 2005 extent_buffer_get(reloc_root->node); 2006 path->nodes[level] = reloc_root->node; 2007 path->slots[level] = 0; 2008 } else { 2009 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 2010 2011 level = root_item->drop_level; 2012 BUG_ON(level == 0); 2013 path->lowest_level = level; 2014 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); 2015 path->lowest_level = 0; 2016 if (ret < 0) { 2017 btrfs_free_path(path); 2018 return ret; 2019 } 2020 2021 btrfs_node_key_to_cpu(path->nodes[level], &next_key, 2022 path->slots[level]); 2023 WARN_ON(memcmp(&key, &next_key, sizeof(key))); 2024 2025 btrfs_unlock_up_safe(path, 0); 2026 } 2027 2028 min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2029 memset(&next_key, 0, sizeof(next_key)); 2030 2031 while (1) { 2032 trans = btrfs_start_transaction(root, 0); 2033 BUG_ON(IS_ERR(trans)); 2034 trans->block_rsv = rc->block_rsv; 2035 2036 ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, 2037 min_reserved, 0); 2038 if (ret) { 2039 BUG_ON(ret != -EAGAIN); 2040 ret = btrfs_commit_transaction(trans, root); 2041 BUG_ON(ret); 2042 continue; 2043 } 2044 2045 replaced = 0; 2046 max_level = level; 2047 2048 ret = walk_down_reloc_tree(reloc_root, path, &level); 2049 if (ret < 0) { 2050 err = ret; 2051 goto out; 2052 } 2053 if (ret > 0) 2054 break; 2055 2056 if (!find_next_key(path, level, &key) && 2057 btrfs_comp_cpu_keys(&next_key, &key) >= 0) { 2058 ret = 0; 2059 } else { 2060 ret = replace_path(trans, root, reloc_root, path, 2061 &next_key, level, max_level); 2062 } 2063 if (ret < 0) { 2064 err = ret; 2065 goto out; 2066 } 2067 2068 if (ret > 0) { 2069 level = ret; 2070 btrfs_node_key_to_cpu(path->nodes[level], &key, 2071 path->slots[level]); 2072 replaced = 1; 2073 } 2074 2075 ret = walk_up_reloc_tree(reloc_root, path, &level); 2076 if (ret > 0) 2077 break; 2078 2079 BUG_ON(level == 0); 2080 /* 2081 * save the merging progress in the drop_progress. 2082 * this is OK since root refs == 1 in this case. 2083 */ 2084 btrfs_node_key(path->nodes[level], &root_item->drop_progress, 2085 path->slots[level]); 2086 root_item->drop_level = level; 2087 2088 nr = trans->blocks_used; 2089 btrfs_end_transaction_throttle(trans, root); 2090 2091 btrfs_btree_balance_dirty(root, nr); 2092 2093 if (replaced && rc->stage == UPDATE_DATA_PTRS) 2094 invalidate_extent_cache(root, &key, &next_key); 2095 } 2096 2097 /* 2098 * handle the case only one block in the fs tree need to be 2099 * relocated and the block is tree root. 2100 */ 2101 leaf = btrfs_lock_root_node(root); 2102 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf); 2103 btrfs_tree_unlock(leaf); 2104 free_extent_buffer(leaf); 2105 if (ret < 0) 2106 err = ret; 2107 out: 2108 btrfs_free_path(path); 2109 2110 if (err == 0) { 2111 memset(&root_item->drop_progress, 0, 2112 sizeof(root_item->drop_progress)); 2113 root_item->drop_level = 0; 2114 btrfs_set_root_refs(root_item, 0); 2115 btrfs_update_reloc_root(trans, root); 2116 } 2117 2118 nr = trans->blocks_used; 2119 btrfs_end_transaction_throttle(trans, root); 2120 2121 btrfs_btree_balance_dirty(root, nr); 2122 2123 if (replaced && rc->stage == UPDATE_DATA_PTRS) 2124 invalidate_extent_cache(root, &key, &next_key); 2125 2126 return err; 2127 } 2128 2129 static noinline_for_stack 2130 int prepare_to_merge(struct reloc_control *rc, int err) 2131 { 2132 struct btrfs_root *root = rc->extent_root; 2133 struct btrfs_root *reloc_root; 2134 struct btrfs_trans_handle *trans; 2135 LIST_HEAD(reloc_roots); 2136 u64 num_bytes = 0; 2137 int ret; 2138 2139 mutex_lock(&root->fs_info->trans_mutex); 2140 rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; 2141 rc->merging_rsv_size += rc->nodes_relocated * 2; 2142 mutex_unlock(&root->fs_info->trans_mutex); 2143 again: 2144 if (!err) { 2145 num_bytes = rc->merging_rsv_size; 2146 ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, 2147 num_bytes); 2148 if (ret) 2149 err = ret; 2150 } 2151 2152 trans = btrfs_join_transaction(rc->extent_root, 1); 2153 if (IS_ERR(trans)) { 2154 if (!err) 2155 btrfs_block_rsv_release(rc->extent_root, 2156 rc->block_rsv, num_bytes); 2157 return PTR_ERR(trans); 2158 } 2159 2160 if (!err) { 2161 if (num_bytes != rc->merging_rsv_size) { 2162 btrfs_end_transaction(trans, rc->extent_root); 2163 btrfs_block_rsv_release(rc->extent_root, 2164 rc->block_rsv, num_bytes); 2165 goto again; 2166 } 2167 } 2168 2169 rc->merge_reloc_tree = 1; 2170 2171 while (!list_empty(&rc->reloc_roots)) { 2172 reloc_root = list_entry(rc->reloc_roots.next, 2173 struct btrfs_root, root_list); 2174 list_del_init(&reloc_root->root_list); 2175 2176 root = read_fs_root(reloc_root->fs_info, 2177 reloc_root->root_key.offset); 2178 BUG_ON(IS_ERR(root)); 2179 BUG_ON(root->reloc_root != reloc_root); 2180 2181 /* 2182 * set reference count to 1, so btrfs_recover_relocation 2183 * knows it should resumes merging 2184 */ 2185 if (!err) 2186 btrfs_set_root_refs(&reloc_root->root_item, 1); 2187 btrfs_update_reloc_root(trans, root); 2188 2189 list_add(&reloc_root->root_list, &reloc_roots); 2190 } 2191 2192 list_splice(&reloc_roots, &rc->reloc_roots); 2193 2194 if (!err) 2195 btrfs_commit_transaction(trans, rc->extent_root); 2196 else 2197 btrfs_end_transaction(trans, rc->extent_root); 2198 return err; 2199 } 2200 2201 static noinline_for_stack 2202 int merge_reloc_roots(struct reloc_control *rc) 2203 { 2204 struct btrfs_root *root; 2205 struct btrfs_root *reloc_root; 2206 LIST_HEAD(reloc_roots); 2207 int found = 0; 2208 int ret; 2209 again: 2210 root = rc->extent_root; 2211 mutex_lock(&root->fs_info->trans_mutex); 2212 list_splice_init(&rc->reloc_roots, &reloc_roots); 2213 mutex_unlock(&root->fs_info->trans_mutex); 2214 2215 while (!list_empty(&reloc_roots)) { 2216 found = 1; 2217 reloc_root = list_entry(reloc_roots.next, 2218 struct btrfs_root, root_list); 2219 2220 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 2221 root = read_fs_root(reloc_root->fs_info, 2222 reloc_root->root_key.offset); 2223 BUG_ON(IS_ERR(root)); 2224 BUG_ON(root->reloc_root != reloc_root); 2225 2226 ret = merge_reloc_root(rc, root); 2227 BUG_ON(ret); 2228 } else { 2229 list_del_init(&reloc_root->root_list); 2230 } 2231 btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0); 2232 } 2233 2234 if (found) { 2235 found = 0; 2236 goto again; 2237 } 2238 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); 2239 return 0; 2240 } 2241 2242 static void free_block_list(struct rb_root *blocks) 2243 { 2244 struct tree_block *block; 2245 struct rb_node *rb_node; 2246 while ((rb_node = rb_first(blocks))) { 2247 block = rb_entry(rb_node, struct tree_block, rb_node); 2248 rb_erase(rb_node, blocks); 2249 kfree(block); 2250 } 2251 } 2252 2253 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, 2254 struct btrfs_root *reloc_root) 2255 { 2256 struct btrfs_root *root; 2257 2258 if (reloc_root->last_trans == trans->transid) 2259 return 0; 2260 2261 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset); 2262 BUG_ON(IS_ERR(root)); 2263 BUG_ON(root->reloc_root != reloc_root); 2264 2265 return btrfs_record_root_in_trans(trans, root); 2266 } 2267 2268 static noinline_for_stack 2269 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, 2270 struct reloc_control *rc, 2271 struct backref_node *node, 2272 struct backref_edge *edges[], int *nr) 2273 { 2274 struct backref_node *next; 2275 struct btrfs_root *root; 2276 int index = 0; 2277 2278 next = node; 2279 while (1) { 2280 cond_resched(); 2281 next = walk_up_backref(next, edges, &index); 2282 root = next->root; 2283 BUG_ON(!root); 2284 BUG_ON(!root->ref_cows); 2285 2286 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 2287 record_reloc_root_in_trans(trans, root); 2288 break; 2289 } 2290 2291 btrfs_record_root_in_trans(trans, root); 2292 root = root->reloc_root; 2293 2294 if (next->new_bytenr != root->node->start) { 2295 BUG_ON(next->new_bytenr); 2296 BUG_ON(!list_empty(&next->list)); 2297 next->new_bytenr = root->node->start; 2298 next->root = root; 2299 list_add_tail(&next->list, 2300 &rc->backref_cache.changed); 2301 __mark_block_processed(rc, next); 2302 break; 2303 } 2304 2305 WARN_ON(1); 2306 root = NULL; 2307 next = walk_down_backref(edges, &index); 2308 if (!next || next->level <= node->level) 2309 break; 2310 } 2311 if (!root) 2312 return NULL; 2313 2314 *nr = index; 2315 next = node; 2316 /* setup backref node path for btrfs_reloc_cow_block */ 2317 while (1) { 2318 rc->backref_cache.path[next->level] = next; 2319 if (--index < 0) 2320 break; 2321 next = edges[index]->node[UPPER]; 2322 } 2323 return root; 2324 } 2325 2326 /* 2327 * select a tree root for relocation. return NULL if the block 2328 * is reference counted. we should use do_relocation() in this 2329 * case. return a tree root pointer if the block isn't reference 2330 * counted. return -ENOENT if the block is root of reloc tree. 2331 */ 2332 static noinline_for_stack 2333 struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, 2334 struct backref_node *node) 2335 { 2336 struct backref_node *next; 2337 struct btrfs_root *root; 2338 struct btrfs_root *fs_root = NULL; 2339 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2340 int index = 0; 2341 2342 next = node; 2343 while (1) { 2344 cond_resched(); 2345 next = walk_up_backref(next, edges, &index); 2346 root = next->root; 2347 BUG_ON(!root); 2348 2349 /* no other choice for non-references counted tree */ 2350 if (!root->ref_cows) 2351 return root; 2352 2353 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) 2354 fs_root = root; 2355 2356 if (next != node) 2357 return NULL; 2358 2359 next = walk_down_backref(edges, &index); 2360 if (!next || next->level <= node->level) 2361 break; 2362 } 2363 2364 if (!fs_root) 2365 return ERR_PTR(-ENOENT); 2366 return fs_root; 2367 } 2368 2369 static noinline_for_stack 2370 u64 calcu_metadata_size(struct reloc_control *rc, 2371 struct backref_node *node, int reserve) 2372 { 2373 struct backref_node *next = node; 2374 struct backref_edge *edge; 2375 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2376 u64 num_bytes = 0; 2377 int index = 0; 2378 2379 BUG_ON(reserve && node->processed); 2380 2381 while (next) { 2382 cond_resched(); 2383 while (1) { 2384 if (next->processed && (reserve || next != node)) 2385 break; 2386 2387 num_bytes += btrfs_level_size(rc->extent_root, 2388 next->level); 2389 2390 if (list_empty(&next->upper)) 2391 break; 2392 2393 edge = list_entry(next->upper.next, 2394 struct backref_edge, list[LOWER]); 2395 edges[index++] = edge; 2396 next = edge->node[UPPER]; 2397 } 2398 next = walk_down_backref(edges, &index); 2399 } 2400 return num_bytes; 2401 } 2402 2403 static int reserve_metadata_space(struct btrfs_trans_handle *trans, 2404 struct reloc_control *rc, 2405 struct backref_node *node) 2406 { 2407 struct btrfs_root *root = rc->extent_root; 2408 u64 num_bytes; 2409 int ret; 2410 2411 num_bytes = calcu_metadata_size(rc, node, 1) * 2; 2412 2413 trans->block_rsv = rc->block_rsv; 2414 ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes); 2415 if (ret) { 2416 if (ret == -EAGAIN) 2417 rc->commit_transaction = 1; 2418 return ret; 2419 } 2420 2421 return 0; 2422 } 2423 2424 static void release_metadata_space(struct reloc_control *rc, 2425 struct backref_node *node) 2426 { 2427 u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2; 2428 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); 2429 } 2430 2431 /* 2432 * relocate a block tree, and then update pointers in upper level 2433 * blocks that reference the block to point to the new location. 2434 * 2435 * if called by link_to_upper, the block has already been relocated. 2436 * in that case this function just updates pointers. 2437 */ 2438 static int do_relocation(struct btrfs_trans_handle *trans, 2439 struct reloc_control *rc, 2440 struct backref_node *node, 2441 struct btrfs_key *key, 2442 struct btrfs_path *path, int lowest) 2443 { 2444 struct backref_node *upper; 2445 struct backref_edge *edge; 2446 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2447 struct btrfs_root *root; 2448 struct extent_buffer *eb; 2449 u32 blocksize; 2450 u64 bytenr; 2451 u64 generation; 2452 int nr; 2453 int slot; 2454 int ret; 2455 int err = 0; 2456 2457 BUG_ON(lowest && node->eb); 2458 2459 path->lowest_level = node->level + 1; 2460 rc->backref_cache.path[node->level] = node; 2461 list_for_each_entry(edge, &node->upper, list[LOWER]) { 2462 cond_resched(); 2463 2464 upper = edge->node[UPPER]; 2465 root = select_reloc_root(trans, rc, upper, edges, &nr); 2466 BUG_ON(!root); 2467 2468 if (upper->eb && !upper->locked) { 2469 if (!lowest) { 2470 ret = btrfs_bin_search(upper->eb, key, 2471 upper->level, &slot); 2472 BUG_ON(ret); 2473 bytenr = btrfs_node_blockptr(upper->eb, slot); 2474 if (node->eb->start == bytenr) 2475 goto next; 2476 } 2477 drop_node_buffer(upper); 2478 } 2479 2480 if (!upper->eb) { 2481 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2482 if (ret < 0) { 2483 err = ret; 2484 break; 2485 } 2486 BUG_ON(ret > 0); 2487 2488 if (!upper->eb) { 2489 upper->eb = path->nodes[upper->level]; 2490 path->nodes[upper->level] = NULL; 2491 } else { 2492 BUG_ON(upper->eb != path->nodes[upper->level]); 2493 } 2494 2495 upper->locked = 1; 2496 path->locks[upper->level] = 0; 2497 2498 slot = path->slots[upper->level]; 2499 btrfs_release_path(NULL, path); 2500 } else { 2501 ret = btrfs_bin_search(upper->eb, key, upper->level, 2502 &slot); 2503 BUG_ON(ret); 2504 } 2505 2506 bytenr = btrfs_node_blockptr(upper->eb, slot); 2507 if (lowest) { 2508 BUG_ON(bytenr != node->bytenr); 2509 } else { 2510 if (node->eb->start == bytenr) 2511 goto next; 2512 } 2513 2514 blocksize = btrfs_level_size(root, node->level); 2515 generation = btrfs_node_ptr_generation(upper->eb, slot); 2516 eb = read_tree_block(root, bytenr, blocksize, generation); 2517 if (!eb) { 2518 err = -EIO; 2519 goto next; 2520 } 2521 btrfs_tree_lock(eb); 2522 btrfs_set_lock_blocking(eb); 2523 2524 if (!node->eb) { 2525 ret = btrfs_cow_block(trans, root, eb, upper->eb, 2526 slot, &eb); 2527 btrfs_tree_unlock(eb); 2528 free_extent_buffer(eb); 2529 if (ret < 0) { 2530 err = ret; 2531 goto next; 2532 } 2533 BUG_ON(node->eb != eb); 2534 } else { 2535 btrfs_set_node_blockptr(upper->eb, slot, 2536 node->eb->start); 2537 btrfs_set_node_ptr_generation(upper->eb, slot, 2538 trans->transid); 2539 btrfs_mark_buffer_dirty(upper->eb); 2540 2541 ret = btrfs_inc_extent_ref(trans, root, 2542 node->eb->start, blocksize, 2543 upper->eb->start, 2544 btrfs_header_owner(upper->eb), 2545 node->level, 0); 2546 BUG_ON(ret); 2547 2548 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2549 BUG_ON(ret); 2550 } 2551 next: 2552 if (!upper->pending) 2553 drop_node_buffer(upper); 2554 else 2555 unlock_node_buffer(upper); 2556 if (err) 2557 break; 2558 } 2559 2560 if (!err && node->pending) { 2561 drop_node_buffer(node); 2562 list_move_tail(&node->list, &rc->backref_cache.changed); 2563 node->pending = 0; 2564 } 2565 2566 path->lowest_level = 0; 2567 BUG_ON(err == -ENOSPC); 2568 return err; 2569 } 2570 2571 static int link_to_upper(struct btrfs_trans_handle *trans, 2572 struct reloc_control *rc, 2573 struct backref_node *node, 2574 struct btrfs_path *path) 2575 { 2576 struct btrfs_key key; 2577 2578 btrfs_node_key_to_cpu(node->eb, &key, 0); 2579 return do_relocation(trans, rc, node, &key, path, 0); 2580 } 2581 2582 static int finish_pending_nodes(struct btrfs_trans_handle *trans, 2583 struct reloc_control *rc, 2584 struct btrfs_path *path, int err) 2585 { 2586 LIST_HEAD(list); 2587 struct backref_cache *cache = &rc->backref_cache; 2588 struct backref_node *node; 2589 int level; 2590 int ret; 2591 2592 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2593 while (!list_empty(&cache->pending[level])) { 2594 node = list_entry(cache->pending[level].next, 2595 struct backref_node, list); 2596 list_move_tail(&node->list, &list); 2597 BUG_ON(!node->pending); 2598 2599 if (!err) { 2600 ret = link_to_upper(trans, rc, node, path); 2601 if (ret < 0) 2602 err = ret; 2603 } 2604 } 2605 list_splice_init(&list, &cache->pending[level]); 2606 } 2607 return err; 2608 } 2609 2610 static void mark_block_processed(struct reloc_control *rc, 2611 u64 bytenr, u32 blocksize) 2612 { 2613 set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, 2614 EXTENT_DIRTY, GFP_NOFS); 2615 } 2616 2617 static void __mark_block_processed(struct reloc_control *rc, 2618 struct backref_node *node) 2619 { 2620 u32 blocksize; 2621 if (node->level == 0 || 2622 in_block_group(node->bytenr, rc->block_group)) { 2623 blocksize = btrfs_level_size(rc->extent_root, node->level); 2624 mark_block_processed(rc, node->bytenr, blocksize); 2625 } 2626 node->processed = 1; 2627 } 2628 2629 /* 2630 * mark a block and all blocks directly/indirectly reference the block 2631 * as processed. 2632 */ 2633 static void update_processed_blocks(struct reloc_control *rc, 2634 struct backref_node *node) 2635 { 2636 struct backref_node *next = node; 2637 struct backref_edge *edge; 2638 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2639 int index = 0; 2640 2641 while (next) { 2642 cond_resched(); 2643 while (1) { 2644 if (next->processed) 2645 break; 2646 2647 __mark_block_processed(rc, next); 2648 2649 if (list_empty(&next->upper)) 2650 break; 2651 2652 edge = list_entry(next->upper.next, 2653 struct backref_edge, list[LOWER]); 2654 edges[index++] = edge; 2655 next = edge->node[UPPER]; 2656 } 2657 next = walk_down_backref(edges, &index); 2658 } 2659 } 2660 2661 static int tree_block_processed(u64 bytenr, u32 blocksize, 2662 struct reloc_control *rc) 2663 { 2664 if (test_range_bit(&rc->processed_blocks, bytenr, 2665 bytenr + blocksize - 1, EXTENT_DIRTY, 1, NULL)) 2666 return 1; 2667 return 0; 2668 } 2669 2670 static int get_tree_block_key(struct reloc_control *rc, 2671 struct tree_block *block) 2672 { 2673 struct extent_buffer *eb; 2674 2675 BUG_ON(block->key_ready); 2676 eb = read_tree_block(rc->extent_root, block->bytenr, 2677 block->key.objectid, block->key.offset); 2678 BUG_ON(!eb); 2679 WARN_ON(btrfs_header_level(eb) != block->level); 2680 if (block->level == 0) 2681 btrfs_item_key_to_cpu(eb, &block->key, 0); 2682 else 2683 btrfs_node_key_to_cpu(eb, &block->key, 0); 2684 free_extent_buffer(eb); 2685 block->key_ready = 1; 2686 return 0; 2687 } 2688 2689 static int reada_tree_block(struct reloc_control *rc, 2690 struct tree_block *block) 2691 { 2692 BUG_ON(block->key_ready); 2693 readahead_tree_block(rc->extent_root, block->bytenr, 2694 block->key.objectid, block->key.offset); 2695 return 0; 2696 } 2697 2698 /* 2699 * helper function to relocate a tree block 2700 */ 2701 static int relocate_tree_block(struct btrfs_trans_handle *trans, 2702 struct reloc_control *rc, 2703 struct backref_node *node, 2704 struct btrfs_key *key, 2705 struct btrfs_path *path) 2706 { 2707 struct btrfs_root *root; 2708 int release = 0; 2709 int ret = 0; 2710 2711 if (!node) 2712 return 0; 2713 2714 BUG_ON(node->processed); 2715 root = select_one_root(trans, node); 2716 if (root == ERR_PTR(-ENOENT)) { 2717 update_processed_blocks(rc, node); 2718 goto out; 2719 } 2720 2721 if (!root || root->ref_cows) { 2722 ret = reserve_metadata_space(trans, rc, node); 2723 if (ret) 2724 goto out; 2725 release = 1; 2726 } 2727 2728 if (root) { 2729 if (root->ref_cows) { 2730 BUG_ON(node->new_bytenr); 2731 BUG_ON(!list_empty(&node->list)); 2732 btrfs_record_root_in_trans(trans, root); 2733 root = root->reloc_root; 2734 node->new_bytenr = root->node->start; 2735 node->root = root; 2736 list_add_tail(&node->list, &rc->backref_cache.changed); 2737 } else { 2738 path->lowest_level = node->level; 2739 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2740 btrfs_release_path(root, path); 2741 if (ret > 0) 2742 ret = 0; 2743 } 2744 if (!ret) 2745 update_processed_blocks(rc, node); 2746 } else { 2747 ret = do_relocation(trans, rc, node, key, path, 1); 2748 } 2749 out: 2750 if (ret || node->level == 0 || node->cowonly) { 2751 if (release) 2752 release_metadata_space(rc, node); 2753 remove_backref_node(&rc->backref_cache, node); 2754 } 2755 return ret; 2756 } 2757 2758 /* 2759 * relocate a list of blocks 2760 */ 2761 static noinline_for_stack 2762 int relocate_tree_blocks(struct btrfs_trans_handle *trans, 2763 struct reloc_control *rc, struct rb_root *blocks) 2764 { 2765 struct backref_node *node; 2766 struct btrfs_path *path; 2767 struct tree_block *block; 2768 struct rb_node *rb_node; 2769 int ret; 2770 int err = 0; 2771 2772 path = btrfs_alloc_path(); 2773 if (!path) 2774 return -ENOMEM; 2775 2776 rb_node = rb_first(blocks); 2777 while (rb_node) { 2778 block = rb_entry(rb_node, struct tree_block, rb_node); 2779 if (!block->key_ready) 2780 reada_tree_block(rc, block); 2781 rb_node = rb_next(rb_node); 2782 } 2783 2784 rb_node = rb_first(blocks); 2785 while (rb_node) { 2786 block = rb_entry(rb_node, struct tree_block, rb_node); 2787 if (!block->key_ready) 2788 get_tree_block_key(rc, block); 2789 rb_node = rb_next(rb_node); 2790 } 2791 2792 rb_node = rb_first(blocks); 2793 while (rb_node) { 2794 block = rb_entry(rb_node, struct tree_block, rb_node); 2795 2796 node = build_backref_tree(rc, &block->key, 2797 block->level, block->bytenr); 2798 if (IS_ERR(node)) { 2799 err = PTR_ERR(node); 2800 goto out; 2801 } 2802 2803 ret = relocate_tree_block(trans, rc, node, &block->key, 2804 path); 2805 if (ret < 0) { 2806 if (ret != -EAGAIN || rb_node == rb_first(blocks)) 2807 err = ret; 2808 goto out; 2809 } 2810 rb_node = rb_next(rb_node); 2811 } 2812 out: 2813 free_block_list(blocks); 2814 err = finish_pending_nodes(trans, rc, path, err); 2815 2816 btrfs_free_path(path); 2817 return err; 2818 } 2819 2820 static noinline_for_stack 2821 int prealloc_file_extent_cluster(struct inode *inode, 2822 struct file_extent_cluster *cluster) 2823 { 2824 u64 alloc_hint = 0; 2825 u64 start; 2826 u64 end; 2827 u64 offset = BTRFS_I(inode)->index_cnt; 2828 u64 num_bytes; 2829 int nr = 0; 2830 int ret = 0; 2831 2832 BUG_ON(cluster->start != cluster->boundary[0]); 2833 mutex_lock(&inode->i_mutex); 2834 2835 ret = btrfs_check_data_free_space(inode, cluster->end + 2836 1 - cluster->start); 2837 if (ret) 2838 goto out; 2839 2840 while (nr < cluster->nr) { 2841 start = cluster->boundary[nr] - offset; 2842 if (nr + 1 < cluster->nr) 2843 end = cluster->boundary[nr + 1] - 1 - offset; 2844 else 2845 end = cluster->end - offset; 2846 2847 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2848 num_bytes = end + 1 - start; 2849 ret = btrfs_prealloc_file_range(inode, 0, start, 2850 num_bytes, num_bytes, 2851 end + 1, &alloc_hint); 2852 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2853 if (ret) 2854 break; 2855 nr++; 2856 } 2857 btrfs_free_reserved_data_space(inode, cluster->end + 2858 1 - cluster->start); 2859 out: 2860 mutex_unlock(&inode->i_mutex); 2861 return ret; 2862 } 2863 2864 static noinline_for_stack 2865 int setup_extent_mapping(struct inode *inode, u64 start, u64 end, 2866 u64 block_start) 2867 { 2868 struct btrfs_root *root = BTRFS_I(inode)->root; 2869 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2870 struct extent_map *em; 2871 int ret = 0; 2872 2873 em = alloc_extent_map(GFP_NOFS); 2874 if (!em) 2875 return -ENOMEM; 2876 2877 em->start = start; 2878 em->len = end + 1 - start; 2879 em->block_len = em->len; 2880 em->block_start = block_start; 2881 em->bdev = root->fs_info->fs_devices->latest_bdev; 2882 set_bit(EXTENT_FLAG_PINNED, &em->flags); 2883 2884 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2885 while (1) { 2886 write_lock(&em_tree->lock); 2887 ret = add_extent_mapping(em_tree, em); 2888 write_unlock(&em_tree->lock); 2889 if (ret != -EEXIST) { 2890 free_extent_map(em); 2891 break; 2892 } 2893 btrfs_drop_extent_cache(inode, start, end, 0); 2894 } 2895 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2896 return ret; 2897 } 2898 2899 static int relocate_file_extent_cluster(struct inode *inode, 2900 struct file_extent_cluster *cluster) 2901 { 2902 u64 page_start; 2903 u64 page_end; 2904 u64 offset = BTRFS_I(inode)->index_cnt; 2905 unsigned long index; 2906 unsigned long last_index; 2907 struct page *page; 2908 struct file_ra_state *ra; 2909 int nr = 0; 2910 int ret = 0; 2911 2912 if (!cluster->nr) 2913 return 0; 2914 2915 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2916 if (!ra) 2917 return -ENOMEM; 2918 2919 ret = prealloc_file_extent_cluster(inode, cluster); 2920 if (ret) 2921 goto out; 2922 2923 file_ra_state_init(ra, inode->i_mapping); 2924 2925 ret = setup_extent_mapping(inode, cluster->start - offset, 2926 cluster->end - offset, cluster->start); 2927 if (ret) 2928 goto out; 2929 2930 index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; 2931 last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; 2932 while (index <= last_index) { 2933 ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); 2934 if (ret) 2935 goto out; 2936 2937 page = find_lock_page(inode->i_mapping, index); 2938 if (!page) { 2939 page_cache_sync_readahead(inode->i_mapping, 2940 ra, NULL, index, 2941 last_index + 1 - index); 2942 page = grab_cache_page(inode->i_mapping, index); 2943 if (!page) { 2944 btrfs_delalloc_release_metadata(inode, 2945 PAGE_CACHE_SIZE); 2946 ret = -ENOMEM; 2947 goto out; 2948 } 2949 } 2950 2951 if (PageReadahead(page)) { 2952 page_cache_async_readahead(inode->i_mapping, 2953 ra, NULL, page, index, 2954 last_index + 1 - index); 2955 } 2956 2957 if (!PageUptodate(page)) { 2958 btrfs_readpage(NULL, page); 2959 lock_page(page); 2960 if (!PageUptodate(page)) { 2961 unlock_page(page); 2962 page_cache_release(page); 2963 btrfs_delalloc_release_metadata(inode, 2964 PAGE_CACHE_SIZE); 2965 ret = -EIO; 2966 goto out; 2967 } 2968 } 2969 2970 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2971 page_end = page_start + PAGE_CACHE_SIZE - 1; 2972 2973 lock_extent(&BTRFS_I(inode)->io_tree, 2974 page_start, page_end, GFP_NOFS); 2975 2976 set_page_extent_mapped(page); 2977 2978 if (nr < cluster->nr && 2979 page_start + offset == cluster->boundary[nr]) { 2980 set_extent_bits(&BTRFS_I(inode)->io_tree, 2981 page_start, page_end, 2982 EXTENT_BOUNDARY, GFP_NOFS); 2983 nr++; 2984 } 2985 2986 btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); 2987 set_page_dirty(page); 2988 2989 unlock_extent(&BTRFS_I(inode)->io_tree, 2990 page_start, page_end, GFP_NOFS); 2991 unlock_page(page); 2992 page_cache_release(page); 2993 2994 index++; 2995 balance_dirty_pages_ratelimited(inode->i_mapping); 2996 btrfs_throttle(BTRFS_I(inode)->root); 2997 } 2998 WARN_ON(nr != cluster->nr); 2999 out: 3000 kfree(ra); 3001 return ret; 3002 } 3003 3004 static noinline_for_stack 3005 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key, 3006 struct file_extent_cluster *cluster) 3007 { 3008 int ret; 3009 3010 if (cluster->nr > 0 && extent_key->objectid != cluster->end + 1) { 3011 ret = relocate_file_extent_cluster(inode, cluster); 3012 if (ret) 3013 return ret; 3014 cluster->nr = 0; 3015 } 3016 3017 if (!cluster->nr) 3018 cluster->start = extent_key->objectid; 3019 else 3020 BUG_ON(cluster->nr >= MAX_EXTENTS); 3021 cluster->end = extent_key->objectid + extent_key->offset - 1; 3022 cluster->boundary[cluster->nr] = extent_key->objectid; 3023 cluster->nr++; 3024 3025 if (cluster->nr >= MAX_EXTENTS) { 3026 ret = relocate_file_extent_cluster(inode, cluster); 3027 if (ret) 3028 return ret; 3029 cluster->nr = 0; 3030 } 3031 return 0; 3032 } 3033 3034 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3035 static int get_ref_objectid_v0(struct reloc_control *rc, 3036 struct btrfs_path *path, 3037 struct btrfs_key *extent_key, 3038 u64 *ref_objectid, int *path_change) 3039 { 3040 struct btrfs_key key; 3041 struct extent_buffer *leaf; 3042 struct btrfs_extent_ref_v0 *ref0; 3043 int ret; 3044 int slot; 3045 3046 leaf = path->nodes[0]; 3047 slot = path->slots[0]; 3048 while (1) { 3049 if (slot >= btrfs_header_nritems(leaf)) { 3050 ret = btrfs_next_leaf(rc->extent_root, path); 3051 if (ret < 0) 3052 return ret; 3053 BUG_ON(ret > 0); 3054 leaf = path->nodes[0]; 3055 slot = path->slots[0]; 3056 if (path_change) 3057 *path_change = 1; 3058 } 3059 btrfs_item_key_to_cpu(leaf, &key, slot); 3060 if (key.objectid != extent_key->objectid) 3061 return -ENOENT; 3062 3063 if (key.type != BTRFS_EXTENT_REF_V0_KEY) { 3064 slot++; 3065 continue; 3066 } 3067 ref0 = btrfs_item_ptr(leaf, slot, 3068 struct btrfs_extent_ref_v0); 3069 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0); 3070 break; 3071 } 3072 return 0; 3073 } 3074 #endif 3075 3076 /* 3077 * helper to add a tree block to the list. 3078 * the major work is getting the generation and level of the block 3079 */ 3080 static int add_tree_block(struct reloc_control *rc, 3081 struct btrfs_key *extent_key, 3082 struct btrfs_path *path, 3083 struct rb_root *blocks) 3084 { 3085 struct extent_buffer *eb; 3086 struct btrfs_extent_item *ei; 3087 struct btrfs_tree_block_info *bi; 3088 struct tree_block *block; 3089 struct rb_node *rb_node; 3090 u32 item_size; 3091 int level = -1; 3092 int generation; 3093 3094 eb = path->nodes[0]; 3095 item_size = btrfs_item_size_nr(eb, path->slots[0]); 3096 3097 if (item_size >= sizeof(*ei) + sizeof(*bi)) { 3098 ei = btrfs_item_ptr(eb, path->slots[0], 3099 struct btrfs_extent_item); 3100 bi = (struct btrfs_tree_block_info *)(ei + 1); 3101 generation = btrfs_extent_generation(eb, ei); 3102 level = btrfs_tree_block_level(eb, bi); 3103 } else { 3104 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3105 u64 ref_owner; 3106 int ret; 3107 3108 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 3109 ret = get_ref_objectid_v0(rc, path, extent_key, 3110 &ref_owner, NULL); 3111 if (ret < 0) 3112 return ret; 3113 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 3114 level = (int)ref_owner; 3115 /* FIXME: get real generation */ 3116 generation = 0; 3117 #else 3118 BUG(); 3119 #endif 3120 } 3121 3122 btrfs_release_path(rc->extent_root, path); 3123 3124 BUG_ON(level == -1); 3125 3126 block = kmalloc(sizeof(*block), GFP_NOFS); 3127 if (!block) 3128 return -ENOMEM; 3129 3130 block->bytenr = extent_key->objectid; 3131 block->key.objectid = extent_key->offset; 3132 block->key.offset = generation; 3133 block->level = level; 3134 block->key_ready = 0; 3135 3136 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 3137 BUG_ON(rb_node); 3138 3139 return 0; 3140 } 3141 3142 /* 3143 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY 3144 */ 3145 static int __add_tree_block(struct reloc_control *rc, 3146 u64 bytenr, u32 blocksize, 3147 struct rb_root *blocks) 3148 { 3149 struct btrfs_path *path; 3150 struct btrfs_key key; 3151 int ret; 3152 3153 if (tree_block_processed(bytenr, blocksize, rc)) 3154 return 0; 3155 3156 if (tree_search(blocks, bytenr)) 3157 return 0; 3158 3159 path = btrfs_alloc_path(); 3160 if (!path) 3161 return -ENOMEM; 3162 3163 key.objectid = bytenr; 3164 key.type = BTRFS_EXTENT_ITEM_KEY; 3165 key.offset = blocksize; 3166 3167 path->search_commit_root = 1; 3168 path->skip_locking = 1; 3169 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 3170 if (ret < 0) 3171 goto out; 3172 BUG_ON(ret); 3173 3174 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3175 ret = add_tree_block(rc, &key, path, blocks); 3176 out: 3177 btrfs_free_path(path); 3178 return ret; 3179 } 3180 3181 /* 3182 * helper to check if the block use full backrefs for pointers in it 3183 */ 3184 static int block_use_full_backref(struct reloc_control *rc, 3185 struct extent_buffer *eb) 3186 { 3187 u64 flags; 3188 int ret; 3189 3190 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) || 3191 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) 3192 return 1; 3193 3194 ret = btrfs_lookup_extent_info(NULL, rc->extent_root, 3195 eb->start, eb->len, NULL, &flags); 3196 BUG_ON(ret); 3197 3198 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) 3199 ret = 1; 3200 else 3201 ret = 0; 3202 return ret; 3203 } 3204 3205 static int delete_block_group_cache(struct btrfs_fs_info *fs_info, 3206 struct inode *inode, u64 ino) 3207 { 3208 struct btrfs_key key; 3209 struct btrfs_path *path; 3210 struct btrfs_root *root = fs_info->tree_root; 3211 struct btrfs_trans_handle *trans; 3212 unsigned long nr; 3213 int ret = 0; 3214 3215 if (inode) 3216 goto truncate; 3217 3218 key.objectid = ino; 3219 key.type = BTRFS_INODE_ITEM_KEY; 3220 key.offset = 0; 3221 3222 inode = btrfs_iget(fs_info->sb, &key, root, NULL); 3223 if (!inode || IS_ERR(inode) || is_bad_inode(inode)) { 3224 if (inode && !IS_ERR(inode)) 3225 iput(inode); 3226 return -ENOENT; 3227 } 3228 3229 truncate: 3230 path = btrfs_alloc_path(); 3231 if (!path) { 3232 ret = -ENOMEM; 3233 goto out; 3234 } 3235 3236 trans = btrfs_join_transaction(root, 0); 3237 if (IS_ERR(trans)) { 3238 btrfs_free_path(path); 3239 ret = PTR_ERR(trans); 3240 goto out; 3241 } 3242 3243 ret = btrfs_truncate_free_space_cache(root, trans, path, inode); 3244 3245 btrfs_free_path(path); 3246 nr = trans->blocks_used; 3247 btrfs_end_transaction(trans, root); 3248 btrfs_btree_balance_dirty(root, nr); 3249 out: 3250 iput(inode); 3251 return ret; 3252 } 3253 3254 /* 3255 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 3256 * this function scans fs tree to find blocks reference the data extent 3257 */ 3258 static int find_data_references(struct reloc_control *rc, 3259 struct btrfs_key *extent_key, 3260 struct extent_buffer *leaf, 3261 struct btrfs_extent_data_ref *ref, 3262 struct rb_root *blocks) 3263 { 3264 struct btrfs_path *path; 3265 struct tree_block *block; 3266 struct btrfs_root *root; 3267 struct btrfs_file_extent_item *fi; 3268 struct rb_node *rb_node; 3269 struct btrfs_key key; 3270 u64 ref_root; 3271 u64 ref_objectid; 3272 u64 ref_offset; 3273 u32 ref_count; 3274 u32 nritems; 3275 int err = 0; 3276 int added = 0; 3277 int counted; 3278 int ret; 3279 3280 ref_root = btrfs_extent_data_ref_root(leaf, ref); 3281 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 3282 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 3283 ref_count = btrfs_extent_data_ref_count(leaf, ref); 3284 3285 /* 3286 * This is an extent belonging to the free space cache, lets just delete 3287 * it and redo the search. 3288 */ 3289 if (ref_root == BTRFS_ROOT_TREE_OBJECTID) { 3290 ret = delete_block_group_cache(rc->extent_root->fs_info, 3291 NULL, ref_objectid); 3292 if (ret != -ENOENT) 3293 return ret; 3294 ret = 0; 3295 } 3296 3297 path = btrfs_alloc_path(); 3298 if (!path) 3299 return -ENOMEM; 3300 3301 root = read_fs_root(rc->extent_root->fs_info, ref_root); 3302 if (IS_ERR(root)) { 3303 err = PTR_ERR(root); 3304 goto out; 3305 } 3306 3307 key.objectid = ref_objectid; 3308 key.offset = ref_offset; 3309 key.type = BTRFS_EXTENT_DATA_KEY; 3310 3311 path->search_commit_root = 1; 3312 path->skip_locking = 1; 3313 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 3314 if (ret < 0) { 3315 err = ret; 3316 goto out; 3317 } 3318 3319 leaf = path->nodes[0]; 3320 nritems = btrfs_header_nritems(leaf); 3321 /* 3322 * the references in tree blocks that use full backrefs 3323 * are not counted in 3324 */ 3325 if (block_use_full_backref(rc, leaf)) 3326 counted = 0; 3327 else 3328 counted = 1; 3329 rb_node = tree_search(blocks, leaf->start); 3330 if (rb_node) { 3331 if (counted) 3332 added = 1; 3333 else 3334 path->slots[0] = nritems; 3335 } 3336 3337 while (ref_count > 0) { 3338 while (path->slots[0] >= nritems) { 3339 ret = btrfs_next_leaf(root, path); 3340 if (ret < 0) { 3341 err = ret; 3342 goto out; 3343 } 3344 if (ret > 0) { 3345 WARN_ON(1); 3346 goto out; 3347 } 3348 3349 leaf = path->nodes[0]; 3350 nritems = btrfs_header_nritems(leaf); 3351 added = 0; 3352 3353 if (block_use_full_backref(rc, leaf)) 3354 counted = 0; 3355 else 3356 counted = 1; 3357 rb_node = tree_search(blocks, leaf->start); 3358 if (rb_node) { 3359 if (counted) 3360 added = 1; 3361 else 3362 path->slots[0] = nritems; 3363 } 3364 } 3365 3366 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3367 if (key.objectid != ref_objectid || 3368 key.type != BTRFS_EXTENT_DATA_KEY) { 3369 WARN_ON(1); 3370 break; 3371 } 3372 3373 fi = btrfs_item_ptr(leaf, path->slots[0], 3374 struct btrfs_file_extent_item); 3375 3376 if (btrfs_file_extent_type(leaf, fi) == 3377 BTRFS_FILE_EXTENT_INLINE) 3378 goto next; 3379 3380 if (btrfs_file_extent_disk_bytenr(leaf, fi) != 3381 extent_key->objectid) 3382 goto next; 3383 3384 key.offset -= btrfs_file_extent_offset(leaf, fi); 3385 if (key.offset != ref_offset) 3386 goto next; 3387 3388 if (counted) 3389 ref_count--; 3390 if (added) 3391 goto next; 3392 3393 if (!tree_block_processed(leaf->start, leaf->len, rc)) { 3394 block = kmalloc(sizeof(*block), GFP_NOFS); 3395 if (!block) { 3396 err = -ENOMEM; 3397 break; 3398 } 3399 block->bytenr = leaf->start; 3400 btrfs_item_key_to_cpu(leaf, &block->key, 0); 3401 block->level = 0; 3402 block->key_ready = 1; 3403 rb_node = tree_insert(blocks, block->bytenr, 3404 &block->rb_node); 3405 BUG_ON(rb_node); 3406 } 3407 if (counted) 3408 added = 1; 3409 else 3410 path->slots[0] = nritems; 3411 next: 3412 path->slots[0]++; 3413 3414 } 3415 out: 3416 btrfs_free_path(path); 3417 return err; 3418 } 3419 3420 /* 3421 * hepler to find all tree blocks that reference a given data extent 3422 */ 3423 static noinline_for_stack 3424 int add_data_references(struct reloc_control *rc, 3425 struct btrfs_key *extent_key, 3426 struct btrfs_path *path, 3427 struct rb_root *blocks) 3428 { 3429 struct btrfs_key key; 3430 struct extent_buffer *eb; 3431 struct btrfs_extent_data_ref *dref; 3432 struct btrfs_extent_inline_ref *iref; 3433 unsigned long ptr; 3434 unsigned long end; 3435 u32 blocksize = btrfs_level_size(rc->extent_root, 0); 3436 int ret; 3437 int err = 0; 3438 3439 eb = path->nodes[0]; 3440 ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 3441 end = ptr + btrfs_item_size_nr(eb, path->slots[0]); 3442 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3443 if (ptr + sizeof(struct btrfs_extent_item_v0) == end) 3444 ptr = end; 3445 else 3446 #endif 3447 ptr += sizeof(struct btrfs_extent_item); 3448 3449 while (ptr < end) { 3450 iref = (struct btrfs_extent_inline_ref *)ptr; 3451 key.type = btrfs_extent_inline_ref_type(eb, iref); 3452 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3453 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 3454 ret = __add_tree_block(rc, key.offset, blocksize, 3455 blocks); 3456 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3457 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 3458 ret = find_data_references(rc, extent_key, 3459 eb, dref, blocks); 3460 } else { 3461 BUG(); 3462 } 3463 ptr += btrfs_extent_inline_ref_size(key.type); 3464 } 3465 WARN_ON(ptr > end); 3466 3467 while (1) { 3468 cond_resched(); 3469 eb = path->nodes[0]; 3470 if (path->slots[0] >= btrfs_header_nritems(eb)) { 3471 ret = btrfs_next_leaf(rc->extent_root, path); 3472 if (ret < 0) { 3473 err = ret; 3474 break; 3475 } 3476 if (ret > 0) 3477 break; 3478 eb = path->nodes[0]; 3479 } 3480 3481 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 3482 if (key.objectid != extent_key->objectid) 3483 break; 3484 3485 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3486 if (key.type == BTRFS_SHARED_DATA_REF_KEY || 3487 key.type == BTRFS_EXTENT_REF_V0_KEY) { 3488 #else 3489 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 3490 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3491 #endif 3492 ret = __add_tree_block(rc, key.offset, blocksize, 3493 blocks); 3494 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3495 dref = btrfs_item_ptr(eb, path->slots[0], 3496 struct btrfs_extent_data_ref); 3497 ret = find_data_references(rc, extent_key, 3498 eb, dref, blocks); 3499 } else { 3500 ret = 0; 3501 } 3502 if (ret) { 3503 err = ret; 3504 break; 3505 } 3506 path->slots[0]++; 3507 } 3508 btrfs_release_path(rc->extent_root, path); 3509 if (err) 3510 free_block_list(blocks); 3511 return err; 3512 } 3513 3514 /* 3515 * hepler to find next unprocessed extent 3516 */ 3517 static noinline_for_stack 3518 int find_next_extent(struct btrfs_trans_handle *trans, 3519 struct reloc_control *rc, struct btrfs_path *path, 3520 struct btrfs_key *extent_key) 3521 { 3522 struct btrfs_key key; 3523 struct extent_buffer *leaf; 3524 u64 start, end, last; 3525 int ret; 3526 3527 last = rc->block_group->key.objectid + rc->block_group->key.offset; 3528 while (1) { 3529 cond_resched(); 3530 if (rc->search_start >= last) { 3531 ret = 1; 3532 break; 3533 } 3534 3535 key.objectid = rc->search_start; 3536 key.type = BTRFS_EXTENT_ITEM_KEY; 3537 key.offset = 0; 3538 3539 path->search_commit_root = 1; 3540 path->skip_locking = 1; 3541 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 3542 0, 0); 3543 if (ret < 0) 3544 break; 3545 next: 3546 leaf = path->nodes[0]; 3547 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 3548 ret = btrfs_next_leaf(rc->extent_root, path); 3549 if (ret != 0) 3550 break; 3551 leaf = path->nodes[0]; 3552 } 3553 3554 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3555 if (key.objectid >= last) { 3556 ret = 1; 3557 break; 3558 } 3559 3560 if (key.type != BTRFS_EXTENT_ITEM_KEY || 3561 key.objectid + key.offset <= rc->search_start) { 3562 path->slots[0]++; 3563 goto next; 3564 } 3565 3566 ret = find_first_extent_bit(&rc->processed_blocks, 3567 key.objectid, &start, &end, 3568 EXTENT_DIRTY); 3569 3570 if (ret == 0 && start <= key.objectid) { 3571 btrfs_release_path(rc->extent_root, path); 3572 rc->search_start = end + 1; 3573 } else { 3574 rc->search_start = key.objectid + key.offset; 3575 memcpy(extent_key, &key, sizeof(key)); 3576 return 0; 3577 } 3578 } 3579 btrfs_release_path(rc->extent_root, path); 3580 return ret; 3581 } 3582 3583 static void set_reloc_control(struct reloc_control *rc) 3584 { 3585 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3586 mutex_lock(&fs_info->trans_mutex); 3587 fs_info->reloc_ctl = rc; 3588 mutex_unlock(&fs_info->trans_mutex); 3589 } 3590 3591 static void unset_reloc_control(struct reloc_control *rc) 3592 { 3593 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3594 mutex_lock(&fs_info->trans_mutex); 3595 fs_info->reloc_ctl = NULL; 3596 mutex_unlock(&fs_info->trans_mutex); 3597 } 3598 3599 static int check_extent_flags(u64 flags) 3600 { 3601 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3602 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3603 return 1; 3604 if (!(flags & BTRFS_EXTENT_FLAG_DATA) && 3605 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3606 return 1; 3607 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3608 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 3609 return 1; 3610 return 0; 3611 } 3612 3613 static noinline_for_stack 3614 int prepare_to_relocate(struct reloc_control *rc) 3615 { 3616 struct btrfs_trans_handle *trans; 3617 int ret; 3618 3619 rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); 3620 if (!rc->block_rsv) 3621 return -ENOMEM; 3622 3623 /* 3624 * reserve some space for creating reloc trees. 3625 * btrfs_init_reloc_root will use them when there 3626 * is no reservation in transaction handle. 3627 */ 3628 ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, 3629 rc->extent_root->nodesize * 256); 3630 if (ret) 3631 return ret; 3632 3633 rc->block_rsv->refill_used = 1; 3634 btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); 3635 3636 memset(&rc->cluster, 0, sizeof(rc->cluster)); 3637 rc->search_start = rc->block_group->key.objectid; 3638 rc->extents_found = 0; 3639 rc->nodes_relocated = 0; 3640 rc->merging_rsv_size = 0; 3641 3642 rc->create_reloc_tree = 1; 3643 set_reloc_control(rc); 3644 3645 trans = btrfs_join_transaction(rc->extent_root, 1); 3646 BUG_ON(IS_ERR(trans)); 3647 btrfs_commit_transaction(trans, rc->extent_root); 3648 return 0; 3649 } 3650 3651 static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3652 { 3653 struct rb_root blocks = RB_ROOT; 3654 struct btrfs_key key; 3655 struct btrfs_trans_handle *trans = NULL; 3656 struct btrfs_path *path; 3657 struct btrfs_extent_item *ei; 3658 unsigned long nr; 3659 u64 flags; 3660 u32 item_size; 3661 int ret; 3662 int err = 0; 3663 int progress = 0; 3664 3665 path = btrfs_alloc_path(); 3666 if (!path) 3667 return -ENOMEM; 3668 3669 ret = prepare_to_relocate(rc); 3670 if (ret) { 3671 err = ret; 3672 goto out_free; 3673 } 3674 3675 while (1) { 3676 progress++; 3677 trans = btrfs_start_transaction(rc->extent_root, 0); 3678 BUG_ON(IS_ERR(trans)); 3679 restart: 3680 if (update_backref_cache(trans, &rc->backref_cache)) { 3681 btrfs_end_transaction(trans, rc->extent_root); 3682 continue; 3683 } 3684 3685 ret = find_next_extent(trans, rc, path, &key); 3686 if (ret < 0) 3687 err = ret; 3688 if (ret != 0) 3689 break; 3690 3691 rc->extents_found++; 3692 3693 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 3694 struct btrfs_extent_item); 3695 item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); 3696 if (item_size >= sizeof(*ei)) { 3697 flags = btrfs_extent_flags(path->nodes[0], ei); 3698 ret = check_extent_flags(flags); 3699 BUG_ON(ret); 3700 3701 } else { 3702 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3703 u64 ref_owner; 3704 int path_change = 0; 3705 3706 BUG_ON(item_size != 3707 sizeof(struct btrfs_extent_item_v0)); 3708 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner, 3709 &path_change); 3710 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID) 3711 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; 3712 else 3713 flags = BTRFS_EXTENT_FLAG_DATA; 3714 3715 if (path_change) { 3716 btrfs_release_path(rc->extent_root, path); 3717 3718 path->search_commit_root = 1; 3719 path->skip_locking = 1; 3720 ret = btrfs_search_slot(NULL, rc->extent_root, 3721 &key, path, 0, 0); 3722 if (ret < 0) { 3723 err = ret; 3724 break; 3725 } 3726 BUG_ON(ret > 0); 3727 } 3728 #else 3729 BUG(); 3730 #endif 3731 } 3732 3733 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 3734 ret = add_tree_block(rc, &key, path, &blocks); 3735 } else if (rc->stage == UPDATE_DATA_PTRS && 3736 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3737 ret = add_data_references(rc, &key, path, &blocks); 3738 } else { 3739 btrfs_release_path(rc->extent_root, path); 3740 ret = 0; 3741 } 3742 if (ret < 0) { 3743 err = ret; 3744 break; 3745 } 3746 3747 if (!RB_EMPTY_ROOT(&blocks)) { 3748 ret = relocate_tree_blocks(trans, rc, &blocks); 3749 if (ret < 0) { 3750 if (ret != -EAGAIN) { 3751 err = ret; 3752 break; 3753 } 3754 rc->extents_found--; 3755 rc->search_start = key.objectid; 3756 } 3757 } 3758 3759 ret = btrfs_block_rsv_check(trans, rc->extent_root, 3760 rc->block_rsv, 0, 5); 3761 if (ret < 0) { 3762 if (ret != -EAGAIN) { 3763 err = ret; 3764 WARN_ON(1); 3765 break; 3766 } 3767 rc->commit_transaction = 1; 3768 } 3769 3770 if (rc->commit_transaction) { 3771 rc->commit_transaction = 0; 3772 ret = btrfs_commit_transaction(trans, rc->extent_root); 3773 BUG_ON(ret); 3774 } else { 3775 nr = trans->blocks_used; 3776 btrfs_end_transaction_throttle(trans, rc->extent_root); 3777 btrfs_btree_balance_dirty(rc->extent_root, nr); 3778 } 3779 trans = NULL; 3780 3781 if (rc->stage == MOVE_DATA_EXTENTS && 3782 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3783 rc->found_file_extent = 1; 3784 ret = relocate_data_extent(rc->data_inode, 3785 &key, &rc->cluster); 3786 if (ret < 0) { 3787 err = ret; 3788 break; 3789 } 3790 } 3791 } 3792 if (trans && progress && err == -ENOSPC) { 3793 ret = btrfs_force_chunk_alloc(trans, rc->extent_root, 3794 rc->block_group->flags); 3795 if (ret == 0) { 3796 err = 0; 3797 progress = 0; 3798 goto restart; 3799 } 3800 } 3801 3802 btrfs_release_path(rc->extent_root, path); 3803 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3804 GFP_NOFS); 3805 3806 if (trans) { 3807 nr = trans->blocks_used; 3808 btrfs_end_transaction_throttle(trans, rc->extent_root); 3809 btrfs_btree_balance_dirty(rc->extent_root, nr); 3810 } 3811 3812 if (!err) { 3813 ret = relocate_file_extent_cluster(rc->data_inode, 3814 &rc->cluster); 3815 if (ret < 0) 3816 err = ret; 3817 } 3818 3819 rc->create_reloc_tree = 0; 3820 set_reloc_control(rc); 3821 3822 backref_cache_cleanup(&rc->backref_cache); 3823 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 3824 3825 err = prepare_to_merge(rc, err); 3826 3827 merge_reloc_roots(rc); 3828 3829 rc->merge_reloc_tree = 0; 3830 unset_reloc_control(rc); 3831 btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); 3832 3833 /* get rid of pinned extents */ 3834 trans = btrfs_join_transaction(rc->extent_root, 1); 3835 if (IS_ERR(trans)) 3836 err = PTR_ERR(trans); 3837 else 3838 btrfs_commit_transaction(trans, rc->extent_root); 3839 out_free: 3840 btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); 3841 btrfs_free_path(path); 3842 return err; 3843 } 3844 3845 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 3846 struct btrfs_root *root, u64 objectid) 3847 { 3848 struct btrfs_path *path; 3849 struct btrfs_inode_item *item; 3850 struct extent_buffer *leaf; 3851 int ret; 3852 3853 path = btrfs_alloc_path(); 3854 if (!path) 3855 return -ENOMEM; 3856 3857 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 3858 if (ret) 3859 goto out; 3860 3861 leaf = path->nodes[0]; 3862 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 3863 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 3864 btrfs_set_inode_generation(leaf, item, 1); 3865 btrfs_set_inode_size(leaf, item, 0); 3866 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 3867 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | 3868 BTRFS_INODE_PREALLOC); 3869 btrfs_mark_buffer_dirty(leaf); 3870 btrfs_release_path(root, path); 3871 out: 3872 btrfs_free_path(path); 3873 return ret; 3874 } 3875 3876 /* 3877 * helper to create inode for data relocation. 3878 * the inode is in data relocation tree and its link count is 0 3879 */ 3880 static noinline_for_stack 3881 struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 3882 struct btrfs_block_group_cache *group) 3883 { 3884 struct inode *inode = NULL; 3885 struct btrfs_trans_handle *trans; 3886 struct btrfs_root *root; 3887 struct btrfs_key key; 3888 unsigned long nr; 3889 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 3890 int err = 0; 3891 3892 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); 3893 if (IS_ERR(root)) 3894 return ERR_CAST(root); 3895 3896 trans = btrfs_start_transaction(root, 6); 3897 if (IS_ERR(trans)) 3898 return ERR_CAST(trans); 3899 3900 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 3901 if (err) 3902 goto out; 3903 3904 err = __insert_orphan_inode(trans, root, objectid); 3905 BUG_ON(err); 3906 3907 key.objectid = objectid; 3908 key.type = BTRFS_INODE_ITEM_KEY; 3909 key.offset = 0; 3910 inode = btrfs_iget(root->fs_info->sb, &key, root, NULL); 3911 BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 3912 BTRFS_I(inode)->index_cnt = group->key.objectid; 3913 3914 err = btrfs_orphan_add(trans, inode); 3915 out: 3916 nr = trans->blocks_used; 3917 btrfs_end_transaction(trans, root); 3918 btrfs_btree_balance_dirty(root, nr); 3919 if (err) { 3920 if (inode) 3921 iput(inode); 3922 inode = ERR_PTR(err); 3923 } 3924 return inode; 3925 } 3926 3927 static struct reloc_control *alloc_reloc_control(void) 3928 { 3929 struct reloc_control *rc; 3930 3931 rc = kzalloc(sizeof(*rc), GFP_NOFS); 3932 if (!rc) 3933 return NULL; 3934 3935 INIT_LIST_HEAD(&rc->reloc_roots); 3936 backref_cache_init(&rc->backref_cache); 3937 mapping_tree_init(&rc->reloc_root_tree); 3938 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); 3939 return rc; 3940 } 3941 3942 /* 3943 * function to relocate all extents in a block group. 3944 */ 3945 int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) 3946 { 3947 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3948 struct reloc_control *rc; 3949 struct inode *inode; 3950 struct btrfs_path *path; 3951 int ret; 3952 int rw = 0; 3953 int err = 0; 3954 3955 rc = alloc_reloc_control(); 3956 if (!rc) 3957 return -ENOMEM; 3958 3959 rc->extent_root = extent_root; 3960 3961 rc->block_group = btrfs_lookup_block_group(fs_info, group_start); 3962 BUG_ON(!rc->block_group); 3963 3964 if (!rc->block_group->ro) { 3965 ret = btrfs_set_block_group_ro(extent_root, rc->block_group); 3966 if (ret) { 3967 err = ret; 3968 goto out; 3969 } 3970 rw = 1; 3971 } 3972 3973 path = btrfs_alloc_path(); 3974 if (!path) { 3975 err = -ENOMEM; 3976 goto out; 3977 } 3978 3979 inode = lookup_free_space_inode(fs_info->tree_root, rc->block_group, 3980 path); 3981 btrfs_free_path(path); 3982 3983 if (!IS_ERR(inode)) 3984 ret = delete_block_group_cache(fs_info, inode, 0); 3985 else 3986 ret = PTR_ERR(inode); 3987 3988 if (ret && ret != -ENOENT) { 3989 err = ret; 3990 goto out; 3991 } 3992 3993 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 3994 if (IS_ERR(rc->data_inode)) { 3995 err = PTR_ERR(rc->data_inode); 3996 rc->data_inode = NULL; 3997 goto out; 3998 } 3999 4000 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", 4001 (unsigned long long)rc->block_group->key.objectid, 4002 (unsigned long long)rc->block_group->flags); 4003 4004 btrfs_start_delalloc_inodes(fs_info->tree_root, 0); 4005 btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); 4006 4007 while (1) { 4008 mutex_lock(&fs_info->cleaner_mutex); 4009 4010 btrfs_clean_old_snapshots(fs_info->tree_root); 4011 ret = relocate_block_group(rc); 4012 4013 mutex_unlock(&fs_info->cleaner_mutex); 4014 if (ret < 0) { 4015 err = ret; 4016 goto out; 4017 } 4018 4019 if (rc->extents_found == 0) 4020 break; 4021 4022 printk(KERN_INFO "btrfs: found %llu extents\n", 4023 (unsigned long long)rc->extents_found); 4024 4025 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 4026 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); 4027 invalidate_mapping_pages(rc->data_inode->i_mapping, 4028 0, -1); 4029 rc->stage = UPDATE_DATA_PTRS; 4030 } 4031 } 4032 4033 filemap_write_and_wait_range(fs_info->btree_inode->i_mapping, 4034 rc->block_group->key.objectid, 4035 rc->block_group->key.objectid + 4036 rc->block_group->key.offset - 1); 4037 4038 WARN_ON(rc->block_group->pinned > 0); 4039 WARN_ON(rc->block_group->reserved > 0); 4040 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); 4041 out: 4042 if (err && rw) 4043 btrfs_set_block_group_rw(extent_root, rc->block_group); 4044 iput(rc->data_inode); 4045 btrfs_put_block_group(rc->block_group); 4046 kfree(rc); 4047 return err; 4048 } 4049 4050 static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) 4051 { 4052 struct btrfs_trans_handle *trans; 4053 int ret; 4054 4055 trans = btrfs_start_transaction(root->fs_info->tree_root, 0); 4056 BUG_ON(IS_ERR(trans)); 4057 4058 memset(&root->root_item.drop_progress, 0, 4059 sizeof(root->root_item.drop_progress)); 4060 root->root_item.drop_level = 0; 4061 btrfs_set_root_refs(&root->root_item, 0); 4062 ret = btrfs_update_root(trans, root->fs_info->tree_root, 4063 &root->root_key, &root->root_item); 4064 BUG_ON(ret); 4065 4066 ret = btrfs_end_transaction(trans, root->fs_info->tree_root); 4067 BUG_ON(ret); 4068 return 0; 4069 } 4070 4071 /* 4072 * recover relocation interrupted by system crash. 4073 * 4074 * this function resumes merging reloc trees with corresponding fs trees. 4075 * this is important for keeping the sharing of tree blocks 4076 */ 4077 int btrfs_recover_relocation(struct btrfs_root *root) 4078 { 4079 LIST_HEAD(reloc_roots); 4080 struct btrfs_key key; 4081 struct btrfs_root *fs_root; 4082 struct btrfs_root *reloc_root; 4083 struct btrfs_path *path; 4084 struct extent_buffer *leaf; 4085 struct reloc_control *rc = NULL; 4086 struct btrfs_trans_handle *trans; 4087 int ret; 4088 int err = 0; 4089 4090 path = btrfs_alloc_path(); 4091 if (!path) 4092 return -ENOMEM; 4093 4094 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 4095 key.type = BTRFS_ROOT_ITEM_KEY; 4096 key.offset = (u64)-1; 4097 4098 while (1) { 4099 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, 4100 path, 0, 0); 4101 if (ret < 0) { 4102 err = ret; 4103 goto out; 4104 } 4105 if (ret > 0) { 4106 if (path->slots[0] == 0) 4107 break; 4108 path->slots[0]--; 4109 } 4110 leaf = path->nodes[0]; 4111 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4112 btrfs_release_path(root->fs_info->tree_root, path); 4113 4114 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 4115 key.type != BTRFS_ROOT_ITEM_KEY) 4116 break; 4117 4118 reloc_root = btrfs_read_fs_root_no_radix(root, &key); 4119 if (IS_ERR(reloc_root)) { 4120 err = PTR_ERR(reloc_root); 4121 goto out; 4122 } 4123 4124 list_add(&reloc_root->root_list, &reloc_roots); 4125 4126 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 4127 fs_root = read_fs_root(root->fs_info, 4128 reloc_root->root_key.offset); 4129 if (IS_ERR(fs_root)) { 4130 ret = PTR_ERR(fs_root); 4131 if (ret != -ENOENT) { 4132 err = ret; 4133 goto out; 4134 } 4135 mark_garbage_root(reloc_root); 4136 } 4137 } 4138 4139 if (key.offset == 0) 4140 break; 4141 4142 key.offset--; 4143 } 4144 btrfs_release_path(root->fs_info->tree_root, path); 4145 4146 if (list_empty(&reloc_roots)) 4147 goto out; 4148 4149 rc = alloc_reloc_control(); 4150 if (!rc) { 4151 err = -ENOMEM; 4152 goto out; 4153 } 4154 4155 rc->extent_root = root->fs_info->extent_root; 4156 4157 set_reloc_control(rc); 4158 4159 trans = btrfs_join_transaction(rc->extent_root, 1); 4160 if (IS_ERR(trans)) { 4161 unset_reloc_control(rc); 4162 err = PTR_ERR(trans); 4163 goto out_free; 4164 } 4165 4166 rc->merge_reloc_tree = 1; 4167 4168 while (!list_empty(&reloc_roots)) { 4169 reloc_root = list_entry(reloc_roots.next, 4170 struct btrfs_root, root_list); 4171 list_del(&reloc_root->root_list); 4172 4173 if (btrfs_root_refs(&reloc_root->root_item) == 0) { 4174 list_add_tail(&reloc_root->root_list, 4175 &rc->reloc_roots); 4176 continue; 4177 } 4178 4179 fs_root = read_fs_root(root->fs_info, 4180 reloc_root->root_key.offset); 4181 BUG_ON(IS_ERR(fs_root)); 4182 4183 __add_reloc_root(reloc_root); 4184 fs_root->reloc_root = reloc_root; 4185 } 4186 4187 btrfs_commit_transaction(trans, rc->extent_root); 4188 4189 merge_reloc_roots(rc); 4190 4191 unset_reloc_control(rc); 4192 4193 trans = btrfs_join_transaction(rc->extent_root, 1); 4194 if (IS_ERR(trans)) 4195 err = PTR_ERR(trans); 4196 else 4197 btrfs_commit_transaction(trans, rc->extent_root); 4198 out_free: 4199 kfree(rc); 4200 out: 4201 while (!list_empty(&reloc_roots)) { 4202 reloc_root = list_entry(reloc_roots.next, 4203 struct btrfs_root, root_list); 4204 list_del(&reloc_root->root_list); 4205 free_extent_buffer(reloc_root->node); 4206 free_extent_buffer(reloc_root->commit_root); 4207 kfree(reloc_root); 4208 } 4209 btrfs_free_path(path); 4210 4211 if (err == 0) { 4212 /* cleanup orphan inode in data relocation tree */ 4213 fs_root = read_fs_root(root->fs_info, 4214 BTRFS_DATA_RELOC_TREE_OBJECTID); 4215 if (IS_ERR(fs_root)) 4216 err = PTR_ERR(fs_root); 4217 else 4218 err = btrfs_orphan_cleanup(fs_root); 4219 } 4220 return err; 4221 } 4222 4223 /* 4224 * helper to add ordered checksum for data relocation. 4225 * 4226 * cloning checksum properly handles the nodatasum extents. 4227 * it also saves CPU time to re-calculate the checksum. 4228 */ 4229 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 4230 { 4231 struct btrfs_ordered_sum *sums; 4232 struct btrfs_sector_sum *sector_sum; 4233 struct btrfs_ordered_extent *ordered; 4234 struct btrfs_root *root = BTRFS_I(inode)->root; 4235 size_t offset; 4236 int ret; 4237 u64 disk_bytenr; 4238 LIST_HEAD(list); 4239 4240 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 4241 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 4242 4243 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 4244 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 4245 disk_bytenr + len - 1, &list); 4246 4247 while (!list_empty(&list)) { 4248 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 4249 list_del_init(&sums->list); 4250 4251 sector_sum = sums->sums; 4252 sums->bytenr = ordered->start; 4253 4254 offset = 0; 4255 while (offset < sums->len) { 4256 sector_sum->bytenr += ordered->start - disk_bytenr; 4257 sector_sum++; 4258 offset += root->sectorsize; 4259 } 4260 4261 btrfs_add_ordered_sum(inode, ordered, sums); 4262 } 4263 btrfs_put_ordered_extent(ordered); 4264 return ret; 4265 } 4266 4267 void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, 4268 struct btrfs_root *root, struct extent_buffer *buf, 4269 struct extent_buffer *cow) 4270 { 4271 struct reloc_control *rc; 4272 struct backref_node *node; 4273 int first_cow = 0; 4274 int level; 4275 int ret; 4276 4277 rc = root->fs_info->reloc_ctl; 4278 if (!rc) 4279 return; 4280 4281 BUG_ON(rc->stage == UPDATE_DATA_PTRS && 4282 root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); 4283 4284 level = btrfs_header_level(buf); 4285 if (btrfs_header_generation(buf) <= 4286 btrfs_root_last_snapshot(&root->root_item)) 4287 first_cow = 1; 4288 4289 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && 4290 rc->create_reloc_tree) { 4291 WARN_ON(!first_cow && level == 0); 4292 4293 node = rc->backref_cache.path[level]; 4294 BUG_ON(node->bytenr != buf->start && 4295 node->new_bytenr != buf->start); 4296 4297 drop_node_buffer(node); 4298 extent_buffer_get(cow); 4299 node->eb = cow; 4300 node->new_bytenr = cow->start; 4301 4302 if (!node->pending) { 4303 list_move_tail(&node->list, 4304 &rc->backref_cache.pending[level]); 4305 node->pending = 1; 4306 } 4307 4308 if (first_cow) 4309 __mark_block_processed(rc, node); 4310 4311 if (first_cow && level > 0) 4312 rc->nodes_relocated += buf->len; 4313 } 4314 4315 if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { 4316 ret = replace_file_extents(trans, rc, root, cow); 4317 BUG_ON(ret); 4318 } 4319 } 4320 4321 /* 4322 * called before creating snapshot. it calculates metadata reservation 4323 * requried for relocating tree blocks in the snapshot 4324 */ 4325 void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, 4326 struct btrfs_pending_snapshot *pending, 4327 u64 *bytes_to_reserve) 4328 { 4329 struct btrfs_root *root; 4330 struct reloc_control *rc; 4331 4332 root = pending->root; 4333 if (!root->reloc_root) 4334 return; 4335 4336 rc = root->fs_info->reloc_ctl; 4337 if (!rc->merge_reloc_tree) 4338 return; 4339 4340 root = root->reloc_root; 4341 BUG_ON(btrfs_root_refs(&root->root_item) == 0); 4342 /* 4343 * relocation is in the stage of merging trees. the space 4344 * used by merging a reloc tree is twice the size of 4345 * relocated tree nodes in the worst case. half for cowing 4346 * the reloc tree, half for cowing the fs tree. the space 4347 * used by cowing the reloc tree will be freed after the 4348 * tree is dropped. if we create snapshot, cowing the fs 4349 * tree may use more space than it frees. so we need 4350 * reserve extra space. 4351 */ 4352 *bytes_to_reserve += rc->nodes_relocated; 4353 } 4354 4355 /* 4356 * called after snapshot is created. migrate block reservation 4357 * and create reloc root for the newly created snapshot 4358 */ 4359 void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, 4360 struct btrfs_pending_snapshot *pending) 4361 { 4362 struct btrfs_root *root = pending->root; 4363 struct btrfs_root *reloc_root; 4364 struct btrfs_root *new_root; 4365 struct reloc_control *rc; 4366 int ret; 4367 4368 if (!root->reloc_root) 4369 return; 4370 4371 rc = root->fs_info->reloc_ctl; 4372 rc->merging_rsv_size += rc->nodes_relocated; 4373 4374 if (rc->merge_reloc_tree) { 4375 ret = btrfs_block_rsv_migrate(&pending->block_rsv, 4376 rc->block_rsv, 4377 rc->nodes_relocated); 4378 BUG_ON(ret); 4379 } 4380 4381 new_root = pending->snap; 4382 reloc_root = create_reloc_root(trans, root->reloc_root, 4383 new_root->root_key.objectid); 4384 4385 __add_reloc_root(reloc_root); 4386 new_root->reloc_root = reloc_root; 4387 4388 if (rc->create_reloc_tree) { 4389 ret = clone_backref_node(trans, rc, root, reloc_root); 4390 BUG_ON(ret); 4391 } 4392 } 4393