1 /* 2 * Copyright (C) 2009 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include "ctree.h" 25 #include "disk-io.h" 26 #include "transaction.h" 27 #include "volumes.h" 28 #include "locking.h" 29 #include "btrfs_inode.h" 30 #include "async-thread.h" 31 32 /* 33 * backref_node, mapping_node and tree_block start with this 34 */ 35 struct tree_entry { 36 struct rb_node rb_node; 37 u64 bytenr; 38 }; 39 40 /* 41 * present a tree block in the backref cache 42 */ 43 struct backref_node { 44 struct rb_node rb_node; 45 u64 bytenr; 46 /* objectid tree block owner */ 47 u64 owner; 48 /* list of upper level blocks reference this block */ 49 struct list_head upper; 50 /* list of child blocks in the cache */ 51 struct list_head lower; 52 /* NULL if this node is not tree root */ 53 struct btrfs_root *root; 54 /* extent buffer got by COW the block */ 55 struct extent_buffer *eb; 56 /* level of tree block */ 57 unsigned int level:8; 58 /* 1 if the block is root of old snapshot */ 59 unsigned int old_root:1; 60 /* 1 if no child blocks in the cache */ 61 unsigned int lowest:1; 62 /* is the extent buffer locked */ 63 unsigned int locked:1; 64 /* has the block been processed */ 65 unsigned int processed:1; 66 /* have backrefs of this block been checked */ 67 unsigned int checked:1; 68 }; 69 70 /* 71 * present a block pointer in the backref cache 72 */ 73 struct backref_edge { 74 struct list_head list[2]; 75 struct backref_node *node[2]; 76 u64 blockptr; 77 }; 78 79 #define LOWER 0 80 #define UPPER 1 81 82 struct backref_cache { 83 /* red black tree of all backref nodes in the cache */ 84 struct rb_root rb_root; 85 /* list of backref nodes with no child block in the cache */ 86 struct list_head pending[BTRFS_MAX_LEVEL]; 87 spinlock_t lock; 88 }; 89 90 /* 91 * map address of tree root to tree 92 */ 93 struct mapping_node { 94 struct rb_node rb_node; 95 u64 bytenr; 96 void *data; 97 }; 98 99 struct mapping_tree { 100 struct rb_root rb_root; 101 spinlock_t lock; 102 }; 103 104 /* 105 * present a tree block to process 106 */ 107 struct tree_block { 108 struct rb_node rb_node; 109 u64 bytenr; 110 struct btrfs_key key; 111 unsigned int level:8; 112 unsigned int key_ready:1; 113 }; 114 115 /* inode vector */ 116 #define INODEVEC_SIZE 16 117 118 struct inodevec { 119 struct list_head list; 120 struct inode *inode[INODEVEC_SIZE]; 121 int nr; 122 }; 123 124 struct reloc_control { 125 /* block group to relocate */ 126 struct btrfs_block_group_cache *block_group; 127 /* extent tree */ 128 struct btrfs_root *extent_root; 129 /* inode for moving data */ 130 struct inode *data_inode; 131 struct btrfs_workers workers; 132 /* tree blocks have been processed */ 133 struct extent_io_tree processed_blocks; 134 /* map start of tree root to corresponding reloc tree */ 135 struct mapping_tree reloc_root_tree; 136 /* list of reloc trees */ 137 struct list_head reloc_roots; 138 u64 search_start; 139 u64 extents_found; 140 u64 extents_skipped; 141 int stage; 142 int create_reloc_root; 143 unsigned int found_file_extent:1; 144 unsigned int found_old_snapshot:1; 145 }; 146 147 /* stages of data relocation */ 148 #define MOVE_DATA_EXTENTS 0 149 #define UPDATE_DATA_PTRS 1 150 151 /* 152 * merge reloc tree to corresponding fs tree in worker threads 153 */ 154 struct async_merge { 155 struct btrfs_work work; 156 struct reloc_control *rc; 157 struct btrfs_root *root; 158 struct completion *done; 159 atomic_t *num_pending; 160 }; 161 162 static void mapping_tree_init(struct mapping_tree *tree) 163 { 164 tree->rb_root.rb_node = NULL; 165 spin_lock_init(&tree->lock); 166 } 167 168 static void backref_cache_init(struct backref_cache *cache) 169 { 170 int i; 171 cache->rb_root.rb_node = NULL; 172 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 173 INIT_LIST_HEAD(&cache->pending[i]); 174 spin_lock_init(&cache->lock); 175 } 176 177 static void backref_node_init(struct backref_node *node) 178 { 179 memset(node, 0, sizeof(*node)); 180 INIT_LIST_HEAD(&node->upper); 181 INIT_LIST_HEAD(&node->lower); 182 RB_CLEAR_NODE(&node->rb_node); 183 } 184 185 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, 186 struct rb_node *node) 187 { 188 struct rb_node **p = &root->rb_node; 189 struct rb_node *parent = NULL; 190 struct tree_entry *entry; 191 192 while (*p) { 193 parent = *p; 194 entry = rb_entry(parent, struct tree_entry, rb_node); 195 196 if (bytenr < entry->bytenr) 197 p = &(*p)->rb_left; 198 else if (bytenr > entry->bytenr) 199 p = &(*p)->rb_right; 200 else 201 return parent; 202 } 203 204 rb_link_node(node, parent, p); 205 rb_insert_color(node, root); 206 return NULL; 207 } 208 209 static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) 210 { 211 struct rb_node *n = root->rb_node; 212 struct tree_entry *entry; 213 214 while (n) { 215 entry = rb_entry(n, struct tree_entry, rb_node); 216 217 if (bytenr < entry->bytenr) 218 n = n->rb_left; 219 else if (bytenr > entry->bytenr) 220 n = n->rb_right; 221 else 222 return n; 223 } 224 return NULL; 225 } 226 227 /* 228 * walk up backref nodes until reach node presents tree root 229 */ 230 static struct backref_node *walk_up_backref(struct backref_node *node, 231 struct backref_edge *edges[], 232 int *index) 233 { 234 struct backref_edge *edge; 235 int idx = *index; 236 237 while (!list_empty(&node->upper)) { 238 edge = list_entry(node->upper.next, 239 struct backref_edge, list[LOWER]); 240 edges[idx++] = edge; 241 node = edge->node[UPPER]; 242 } 243 *index = idx; 244 return node; 245 } 246 247 /* 248 * walk down backref nodes to find start of next reference path 249 */ 250 static struct backref_node *walk_down_backref(struct backref_edge *edges[], 251 int *index) 252 { 253 struct backref_edge *edge; 254 struct backref_node *lower; 255 int idx = *index; 256 257 while (idx > 0) { 258 edge = edges[idx - 1]; 259 lower = edge->node[LOWER]; 260 if (list_is_last(&edge->list[LOWER], &lower->upper)) { 261 idx--; 262 continue; 263 } 264 edge = list_entry(edge->list[LOWER].next, 265 struct backref_edge, list[LOWER]); 266 edges[idx - 1] = edge; 267 *index = idx; 268 return edge->node[UPPER]; 269 } 270 *index = 0; 271 return NULL; 272 } 273 274 static void drop_node_buffer(struct backref_node *node) 275 { 276 if (node->eb) { 277 if (node->locked) { 278 btrfs_tree_unlock(node->eb); 279 node->locked = 0; 280 } 281 free_extent_buffer(node->eb); 282 node->eb = NULL; 283 } 284 } 285 286 static void drop_backref_node(struct backref_cache *tree, 287 struct backref_node *node) 288 { 289 BUG_ON(!node->lowest); 290 BUG_ON(!list_empty(&node->upper)); 291 292 drop_node_buffer(node); 293 list_del(&node->lower); 294 295 rb_erase(&node->rb_node, &tree->rb_root); 296 kfree(node); 297 } 298 299 /* 300 * remove a backref node from the backref cache 301 */ 302 static void remove_backref_node(struct backref_cache *cache, 303 struct backref_node *node) 304 { 305 struct backref_node *upper; 306 struct backref_edge *edge; 307 308 if (!node) 309 return; 310 311 BUG_ON(!node->lowest); 312 while (!list_empty(&node->upper)) { 313 edge = list_entry(node->upper.next, struct backref_edge, 314 list[LOWER]); 315 upper = edge->node[UPPER]; 316 list_del(&edge->list[LOWER]); 317 list_del(&edge->list[UPPER]); 318 kfree(edge); 319 /* 320 * add the node to pending list if no other 321 * child block cached. 322 */ 323 if (list_empty(&upper->lower)) { 324 list_add_tail(&upper->lower, 325 &cache->pending[upper->level]); 326 upper->lowest = 1; 327 } 328 } 329 drop_backref_node(cache, node); 330 } 331 332 /* 333 * find reloc tree by address of tree root 334 */ 335 static struct btrfs_root *find_reloc_root(struct reloc_control *rc, 336 u64 bytenr) 337 { 338 struct rb_node *rb_node; 339 struct mapping_node *node; 340 struct btrfs_root *root = NULL; 341 342 spin_lock(&rc->reloc_root_tree.lock); 343 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr); 344 if (rb_node) { 345 node = rb_entry(rb_node, struct mapping_node, rb_node); 346 root = (struct btrfs_root *)node->data; 347 } 348 spin_unlock(&rc->reloc_root_tree.lock); 349 return root; 350 } 351 352 static int is_cowonly_root(u64 root_objectid) 353 { 354 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 355 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 356 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 357 root_objectid == BTRFS_DEV_TREE_OBJECTID || 358 root_objectid == BTRFS_TREE_LOG_OBJECTID || 359 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 360 return 1; 361 return 0; 362 } 363 364 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, 365 u64 root_objectid) 366 { 367 struct btrfs_key key; 368 369 key.objectid = root_objectid; 370 key.type = BTRFS_ROOT_ITEM_KEY; 371 if (is_cowonly_root(root_objectid)) 372 key.offset = 0; 373 else 374 key.offset = (u64)-1; 375 376 return btrfs_read_fs_root_no_name(fs_info, &key); 377 } 378 379 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 380 static noinline_for_stack 381 struct btrfs_root *find_tree_root(struct reloc_control *rc, 382 struct extent_buffer *leaf, 383 struct btrfs_extent_ref_v0 *ref0) 384 { 385 struct btrfs_root *root; 386 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0); 387 u64 generation = btrfs_ref_generation_v0(leaf, ref0); 388 389 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID); 390 391 root = read_fs_root(rc->extent_root->fs_info, root_objectid); 392 BUG_ON(IS_ERR(root)); 393 394 if (root->ref_cows && 395 generation != btrfs_root_generation(&root->root_item)) 396 return NULL; 397 398 return root; 399 } 400 #endif 401 402 static noinline_for_stack 403 int find_inline_backref(struct extent_buffer *leaf, int slot, 404 unsigned long *ptr, unsigned long *end) 405 { 406 struct btrfs_extent_item *ei; 407 struct btrfs_tree_block_info *bi; 408 u32 item_size; 409 410 item_size = btrfs_item_size_nr(leaf, slot); 411 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 412 if (item_size < sizeof(*ei)) { 413 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 414 return 1; 415 } 416 #endif 417 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 418 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 419 BTRFS_EXTENT_FLAG_TREE_BLOCK)); 420 421 if (item_size <= sizeof(*ei) + sizeof(*bi)) { 422 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 423 return 1; 424 } 425 426 bi = (struct btrfs_tree_block_info *)(ei + 1); 427 *ptr = (unsigned long)(bi + 1); 428 *end = (unsigned long)ei + item_size; 429 return 0; 430 } 431 432 /* 433 * build backref tree for a given tree block. root of the backref tree 434 * corresponds the tree block, leaves of the backref tree correspond 435 * roots of b-trees that reference the tree block. 436 * 437 * the basic idea of this function is check backrefs of a given block 438 * to find upper level blocks that refernece the block, and then check 439 * bakcrefs of these upper level blocks recursively. the recursion stop 440 * when tree root is reached or backrefs for the block is cached. 441 * 442 * NOTE: if we find backrefs for a block are cached, we know backrefs 443 * for all upper level blocks that directly/indirectly reference the 444 * block are also cached. 445 */ 446 static struct backref_node *build_backref_tree(struct reloc_control *rc, 447 struct backref_cache *cache, 448 struct btrfs_key *node_key, 449 int level, u64 bytenr) 450 { 451 struct btrfs_path *path1; 452 struct btrfs_path *path2; 453 struct extent_buffer *eb; 454 struct btrfs_root *root; 455 struct backref_node *cur; 456 struct backref_node *upper; 457 struct backref_node *lower; 458 struct backref_node *node = NULL; 459 struct backref_node *exist = NULL; 460 struct backref_edge *edge; 461 struct rb_node *rb_node; 462 struct btrfs_key key; 463 unsigned long end; 464 unsigned long ptr; 465 LIST_HEAD(list); 466 int ret; 467 int err = 0; 468 469 path1 = btrfs_alloc_path(); 470 path2 = btrfs_alloc_path(); 471 if (!path1 || !path2) { 472 err = -ENOMEM; 473 goto out; 474 } 475 476 node = kmalloc(sizeof(*node), GFP_NOFS); 477 if (!node) { 478 err = -ENOMEM; 479 goto out; 480 } 481 482 backref_node_init(node); 483 node->bytenr = bytenr; 484 node->owner = 0; 485 node->level = level; 486 node->lowest = 1; 487 cur = node; 488 again: 489 end = 0; 490 ptr = 0; 491 key.objectid = cur->bytenr; 492 key.type = BTRFS_EXTENT_ITEM_KEY; 493 key.offset = (u64)-1; 494 495 path1->search_commit_root = 1; 496 path1->skip_locking = 1; 497 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1, 498 0, 0); 499 if (ret < 0) { 500 err = ret; 501 goto out; 502 } 503 BUG_ON(!ret || !path1->slots[0]); 504 505 path1->slots[0]--; 506 507 WARN_ON(cur->checked); 508 if (!list_empty(&cur->upper)) { 509 /* 510 * the backref was added previously when processsing 511 * backref of type BTRFS_TREE_BLOCK_REF_KEY 512 */ 513 BUG_ON(!list_is_singular(&cur->upper)); 514 edge = list_entry(cur->upper.next, struct backref_edge, 515 list[LOWER]); 516 BUG_ON(!list_empty(&edge->list[UPPER])); 517 exist = edge->node[UPPER]; 518 /* 519 * add the upper level block to pending list if we need 520 * check its backrefs 521 */ 522 if (!exist->checked) 523 list_add_tail(&edge->list[UPPER], &list); 524 } else { 525 exist = NULL; 526 } 527 528 while (1) { 529 cond_resched(); 530 eb = path1->nodes[0]; 531 532 if (ptr >= end) { 533 if (path1->slots[0] >= btrfs_header_nritems(eb)) { 534 ret = btrfs_next_leaf(rc->extent_root, path1); 535 if (ret < 0) { 536 err = ret; 537 goto out; 538 } 539 if (ret > 0) 540 break; 541 eb = path1->nodes[0]; 542 } 543 544 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]); 545 if (key.objectid != cur->bytenr) { 546 WARN_ON(exist); 547 break; 548 } 549 550 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 551 ret = find_inline_backref(eb, path1->slots[0], 552 &ptr, &end); 553 if (ret) 554 goto next; 555 } 556 } 557 558 if (ptr < end) { 559 /* update key for inline back ref */ 560 struct btrfs_extent_inline_ref *iref; 561 iref = (struct btrfs_extent_inline_ref *)ptr; 562 key.type = btrfs_extent_inline_ref_type(eb, iref); 563 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 564 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY && 565 key.type != BTRFS_SHARED_BLOCK_REF_KEY); 566 } 567 568 if (exist && 569 ((key.type == BTRFS_TREE_BLOCK_REF_KEY && 570 exist->owner == key.offset) || 571 (key.type == BTRFS_SHARED_BLOCK_REF_KEY && 572 exist->bytenr == key.offset))) { 573 exist = NULL; 574 goto next; 575 } 576 577 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 578 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || 579 key.type == BTRFS_EXTENT_REF_V0_KEY) { 580 if (key.objectid == key.offset && 581 key.type == BTRFS_EXTENT_REF_V0_KEY) { 582 struct btrfs_extent_ref_v0 *ref0; 583 ref0 = btrfs_item_ptr(eb, path1->slots[0], 584 struct btrfs_extent_ref_v0); 585 root = find_tree_root(rc, eb, ref0); 586 if (root) 587 cur->root = root; 588 else 589 cur->old_root = 1; 590 break; 591 } 592 #else 593 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 594 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 595 #endif 596 if (key.objectid == key.offset) { 597 /* 598 * only root blocks of reloc trees use 599 * backref of this type. 600 */ 601 root = find_reloc_root(rc, cur->bytenr); 602 BUG_ON(!root); 603 cur->root = root; 604 break; 605 } 606 607 edge = kzalloc(sizeof(*edge), GFP_NOFS); 608 if (!edge) { 609 err = -ENOMEM; 610 goto out; 611 } 612 rb_node = tree_search(&cache->rb_root, key.offset); 613 if (!rb_node) { 614 upper = kmalloc(sizeof(*upper), GFP_NOFS); 615 if (!upper) { 616 kfree(edge); 617 err = -ENOMEM; 618 goto out; 619 } 620 backref_node_init(upper); 621 upper->bytenr = key.offset; 622 upper->owner = 0; 623 upper->level = cur->level + 1; 624 /* 625 * backrefs for the upper level block isn't 626 * cached, add the block to pending list 627 */ 628 list_add_tail(&edge->list[UPPER], &list); 629 } else { 630 upper = rb_entry(rb_node, struct backref_node, 631 rb_node); 632 INIT_LIST_HEAD(&edge->list[UPPER]); 633 } 634 list_add(&edge->list[LOWER], &cur->upper); 635 edge->node[UPPER] = upper; 636 edge->node[LOWER] = cur; 637 638 goto next; 639 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 640 goto next; 641 } 642 643 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */ 644 root = read_fs_root(rc->extent_root->fs_info, key.offset); 645 if (IS_ERR(root)) { 646 err = PTR_ERR(root); 647 goto out; 648 } 649 650 if (btrfs_root_level(&root->root_item) == cur->level) { 651 /* tree root */ 652 BUG_ON(btrfs_root_bytenr(&root->root_item) != 653 cur->bytenr); 654 cur->root = root; 655 break; 656 } 657 658 level = cur->level + 1; 659 660 /* 661 * searching the tree to find upper level blocks 662 * reference the block. 663 */ 664 path2->search_commit_root = 1; 665 path2->skip_locking = 1; 666 path2->lowest_level = level; 667 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0); 668 path2->lowest_level = 0; 669 if (ret < 0) { 670 err = ret; 671 goto out; 672 } 673 674 eb = path2->nodes[level]; 675 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != 676 cur->bytenr); 677 678 lower = cur; 679 for (; level < BTRFS_MAX_LEVEL; level++) { 680 if (!path2->nodes[level]) { 681 BUG_ON(btrfs_root_bytenr(&root->root_item) != 682 lower->bytenr); 683 lower->root = root; 684 break; 685 } 686 687 edge = kzalloc(sizeof(*edge), GFP_NOFS); 688 if (!edge) { 689 err = -ENOMEM; 690 goto out; 691 } 692 693 eb = path2->nodes[level]; 694 rb_node = tree_search(&cache->rb_root, eb->start); 695 if (!rb_node) { 696 upper = kmalloc(sizeof(*upper), GFP_NOFS); 697 if (!upper) { 698 kfree(edge); 699 err = -ENOMEM; 700 goto out; 701 } 702 backref_node_init(upper); 703 upper->bytenr = eb->start; 704 upper->owner = btrfs_header_owner(eb); 705 upper->level = lower->level + 1; 706 707 /* 708 * if we know the block isn't shared 709 * we can void checking its backrefs. 710 */ 711 if (btrfs_block_can_be_shared(root, eb)) 712 upper->checked = 0; 713 else 714 upper->checked = 1; 715 716 /* 717 * add the block to pending list if we 718 * need check its backrefs. only block 719 * at 'cur->level + 1' is added to the 720 * tail of pending list. this guarantees 721 * we check backrefs from lower level 722 * blocks to upper level blocks. 723 */ 724 if (!upper->checked && 725 level == cur->level + 1) { 726 list_add_tail(&edge->list[UPPER], 727 &list); 728 } else 729 INIT_LIST_HEAD(&edge->list[UPPER]); 730 } else { 731 upper = rb_entry(rb_node, struct backref_node, 732 rb_node); 733 BUG_ON(!upper->checked); 734 INIT_LIST_HEAD(&edge->list[UPPER]); 735 } 736 list_add_tail(&edge->list[LOWER], &lower->upper); 737 edge->node[UPPER] = upper; 738 edge->node[LOWER] = lower; 739 740 if (rb_node) 741 break; 742 lower = upper; 743 upper = NULL; 744 } 745 btrfs_release_path(root, path2); 746 next: 747 if (ptr < end) { 748 ptr += btrfs_extent_inline_ref_size(key.type); 749 if (ptr >= end) { 750 WARN_ON(ptr > end); 751 ptr = 0; 752 end = 0; 753 } 754 } 755 if (ptr >= end) 756 path1->slots[0]++; 757 } 758 btrfs_release_path(rc->extent_root, path1); 759 760 cur->checked = 1; 761 WARN_ON(exist); 762 763 /* the pending list isn't empty, take the first block to process */ 764 if (!list_empty(&list)) { 765 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 766 list_del_init(&edge->list[UPPER]); 767 cur = edge->node[UPPER]; 768 goto again; 769 } 770 771 /* 772 * everything goes well, connect backref nodes and insert backref nodes 773 * into the cache. 774 */ 775 BUG_ON(!node->checked); 776 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); 777 BUG_ON(rb_node); 778 779 list_for_each_entry(edge, &node->upper, list[LOWER]) 780 list_add_tail(&edge->list[UPPER], &list); 781 782 while (!list_empty(&list)) { 783 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 784 list_del_init(&edge->list[UPPER]); 785 upper = edge->node[UPPER]; 786 787 if (!RB_EMPTY_NODE(&upper->rb_node)) { 788 if (upper->lowest) { 789 list_del_init(&upper->lower); 790 upper->lowest = 0; 791 } 792 793 list_add_tail(&edge->list[UPPER], &upper->lower); 794 continue; 795 } 796 797 BUG_ON(!upper->checked); 798 rb_node = tree_insert(&cache->rb_root, upper->bytenr, 799 &upper->rb_node); 800 BUG_ON(rb_node); 801 802 list_add_tail(&edge->list[UPPER], &upper->lower); 803 804 list_for_each_entry(edge, &upper->upper, list[LOWER]) 805 list_add_tail(&edge->list[UPPER], &list); 806 } 807 out: 808 btrfs_free_path(path1); 809 btrfs_free_path(path2); 810 if (err) { 811 INIT_LIST_HEAD(&list); 812 upper = node; 813 while (upper) { 814 if (RB_EMPTY_NODE(&upper->rb_node)) { 815 list_splice_tail(&upper->upper, &list); 816 kfree(upper); 817 } 818 819 if (list_empty(&list)) 820 break; 821 822 edge = list_entry(list.next, struct backref_edge, 823 list[LOWER]); 824 upper = edge->node[UPPER]; 825 kfree(edge); 826 } 827 return ERR_PTR(err); 828 } 829 return node; 830 } 831 832 /* 833 * helper to add 'address of tree root -> reloc tree' mapping 834 */ 835 static int __add_reloc_root(struct btrfs_root *root) 836 { 837 struct rb_node *rb_node; 838 struct mapping_node *node; 839 struct reloc_control *rc = root->fs_info->reloc_ctl; 840 841 node = kmalloc(sizeof(*node), GFP_NOFS); 842 BUG_ON(!node); 843 844 node->bytenr = root->node->start; 845 node->data = root; 846 847 spin_lock(&rc->reloc_root_tree.lock); 848 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 849 node->bytenr, &node->rb_node); 850 spin_unlock(&rc->reloc_root_tree.lock); 851 BUG_ON(rb_node); 852 853 list_add_tail(&root->root_list, &rc->reloc_roots); 854 return 0; 855 } 856 857 /* 858 * helper to update/delete the 'address of tree root -> reloc tree' 859 * mapping 860 */ 861 static int __update_reloc_root(struct btrfs_root *root, int del) 862 { 863 struct rb_node *rb_node; 864 struct mapping_node *node = NULL; 865 struct reloc_control *rc = root->fs_info->reloc_ctl; 866 867 spin_lock(&rc->reloc_root_tree.lock); 868 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 869 root->commit_root->start); 870 if (rb_node) { 871 node = rb_entry(rb_node, struct mapping_node, rb_node); 872 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 873 } 874 spin_unlock(&rc->reloc_root_tree.lock); 875 876 BUG_ON((struct btrfs_root *)node->data != root); 877 878 if (!del) { 879 spin_lock(&rc->reloc_root_tree.lock); 880 node->bytenr = root->node->start; 881 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 882 node->bytenr, &node->rb_node); 883 spin_unlock(&rc->reloc_root_tree.lock); 884 BUG_ON(rb_node); 885 } else { 886 list_del_init(&root->root_list); 887 kfree(node); 888 } 889 return 0; 890 } 891 892 /* 893 * create reloc tree for a given fs tree. reloc tree is just a 894 * snapshot of the fs tree with special root objectid. 895 */ 896 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, 897 struct btrfs_root *root) 898 { 899 struct btrfs_root *reloc_root; 900 struct extent_buffer *eb; 901 struct btrfs_root_item *root_item; 902 struct btrfs_key root_key; 903 int ret; 904 905 if (root->reloc_root) { 906 reloc_root = root->reloc_root; 907 reloc_root->last_trans = trans->transid; 908 return 0; 909 } 910 911 if (!root->fs_info->reloc_ctl || 912 !root->fs_info->reloc_ctl->create_reloc_root || 913 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 914 return 0; 915 916 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 917 BUG_ON(!root_item); 918 919 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 920 root_key.type = BTRFS_ROOT_ITEM_KEY; 921 root_key.offset = root->root_key.objectid; 922 923 ret = btrfs_copy_root(trans, root, root->commit_root, &eb, 924 BTRFS_TREE_RELOC_OBJECTID); 925 BUG_ON(ret); 926 927 btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); 928 memcpy(root_item, &root->root_item, sizeof(*root_item)); 929 btrfs_set_root_refs(root_item, 1); 930 btrfs_set_root_bytenr(root_item, eb->start); 931 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 932 btrfs_set_root_generation(root_item, trans->transid); 933 memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); 934 root_item->drop_level = 0; 935 936 btrfs_tree_unlock(eb); 937 free_extent_buffer(eb); 938 939 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 940 &root_key, root_item); 941 BUG_ON(ret); 942 kfree(root_item); 943 944 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 945 &root_key); 946 BUG_ON(IS_ERR(reloc_root)); 947 reloc_root->last_trans = trans->transid; 948 949 __add_reloc_root(reloc_root); 950 root->reloc_root = reloc_root; 951 return 0; 952 } 953 954 /* 955 * update root item of reloc tree 956 */ 957 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, 958 struct btrfs_root *root) 959 { 960 struct btrfs_root *reloc_root; 961 struct btrfs_root_item *root_item; 962 int del = 0; 963 int ret; 964 965 if (!root->reloc_root) 966 return 0; 967 968 reloc_root = root->reloc_root; 969 root_item = &reloc_root->root_item; 970 971 if (btrfs_root_refs(root_item) == 0) { 972 root->reloc_root = NULL; 973 del = 1; 974 } 975 976 __update_reloc_root(reloc_root, del); 977 978 if (reloc_root->commit_root != reloc_root->node) { 979 btrfs_set_root_node(root_item, reloc_root->node); 980 free_extent_buffer(reloc_root->commit_root); 981 reloc_root->commit_root = btrfs_root_node(reloc_root); 982 } 983 984 ret = btrfs_update_root(trans, root->fs_info->tree_root, 985 &reloc_root->root_key, root_item); 986 BUG_ON(ret); 987 return 0; 988 } 989 990 /* 991 * helper to find first cached inode with inode number >= objectid 992 * in a subvolume 993 */ 994 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid) 995 { 996 struct rb_node *node; 997 struct rb_node *prev; 998 struct btrfs_inode *entry; 999 struct inode *inode; 1000 1001 spin_lock(&root->inode_lock); 1002 again: 1003 node = root->inode_tree.rb_node; 1004 prev = NULL; 1005 while (node) { 1006 prev = node; 1007 entry = rb_entry(node, struct btrfs_inode, rb_node); 1008 1009 if (objectid < entry->vfs_inode.i_ino) 1010 node = node->rb_left; 1011 else if (objectid > entry->vfs_inode.i_ino) 1012 node = node->rb_right; 1013 else 1014 break; 1015 } 1016 if (!node) { 1017 while (prev) { 1018 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1019 if (objectid <= entry->vfs_inode.i_ino) { 1020 node = prev; 1021 break; 1022 } 1023 prev = rb_next(prev); 1024 } 1025 } 1026 while (node) { 1027 entry = rb_entry(node, struct btrfs_inode, rb_node); 1028 inode = igrab(&entry->vfs_inode); 1029 if (inode) { 1030 spin_unlock(&root->inode_lock); 1031 return inode; 1032 } 1033 1034 objectid = entry->vfs_inode.i_ino + 1; 1035 if (cond_resched_lock(&root->inode_lock)) 1036 goto again; 1037 1038 node = rb_next(node); 1039 } 1040 spin_unlock(&root->inode_lock); 1041 return NULL; 1042 } 1043 1044 static int in_block_group(u64 bytenr, 1045 struct btrfs_block_group_cache *block_group) 1046 { 1047 if (bytenr >= block_group->key.objectid && 1048 bytenr < block_group->key.objectid + block_group->key.offset) 1049 return 1; 1050 return 0; 1051 } 1052 1053 /* 1054 * get new location of data 1055 */ 1056 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, 1057 u64 bytenr, u64 num_bytes) 1058 { 1059 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 1060 struct btrfs_path *path; 1061 struct btrfs_file_extent_item *fi; 1062 struct extent_buffer *leaf; 1063 int ret; 1064 1065 path = btrfs_alloc_path(); 1066 if (!path) 1067 return -ENOMEM; 1068 1069 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1070 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 1071 bytenr, 0); 1072 if (ret < 0) 1073 goto out; 1074 if (ret > 0) { 1075 ret = -ENOENT; 1076 goto out; 1077 } 1078 1079 leaf = path->nodes[0]; 1080 fi = btrfs_item_ptr(leaf, path->slots[0], 1081 struct btrfs_file_extent_item); 1082 1083 BUG_ON(btrfs_file_extent_offset(leaf, fi) || 1084 btrfs_file_extent_compression(leaf, fi) || 1085 btrfs_file_extent_encryption(leaf, fi) || 1086 btrfs_file_extent_other_encoding(leaf, fi)); 1087 1088 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1089 ret = 1; 1090 goto out; 1091 } 1092 1093 if (new_bytenr) 1094 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1095 ret = 0; 1096 out: 1097 btrfs_free_path(path); 1098 return ret; 1099 } 1100 1101 /* 1102 * update file extent items in the tree leaf to point to 1103 * the new locations. 1104 */ 1105 static int replace_file_extents(struct btrfs_trans_handle *trans, 1106 struct reloc_control *rc, 1107 struct btrfs_root *root, 1108 struct extent_buffer *leaf, 1109 struct list_head *inode_list) 1110 { 1111 struct btrfs_key key; 1112 struct btrfs_file_extent_item *fi; 1113 struct inode *inode = NULL; 1114 struct inodevec *ivec = NULL; 1115 u64 parent; 1116 u64 bytenr; 1117 u64 new_bytenr; 1118 u64 num_bytes; 1119 u64 end; 1120 u32 nritems; 1121 u32 i; 1122 int ret; 1123 int first = 1; 1124 int dirty = 0; 1125 1126 if (rc->stage != UPDATE_DATA_PTRS) 1127 return 0; 1128 1129 /* reloc trees always use full backref */ 1130 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1131 parent = leaf->start; 1132 else 1133 parent = 0; 1134 1135 nritems = btrfs_header_nritems(leaf); 1136 for (i = 0; i < nritems; i++) { 1137 cond_resched(); 1138 btrfs_item_key_to_cpu(leaf, &key, i); 1139 if (key.type != BTRFS_EXTENT_DATA_KEY) 1140 continue; 1141 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 1142 if (btrfs_file_extent_type(leaf, fi) == 1143 BTRFS_FILE_EXTENT_INLINE) 1144 continue; 1145 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1146 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 1147 if (bytenr == 0) 1148 continue; 1149 if (!in_block_group(bytenr, rc->block_group)) 1150 continue; 1151 1152 /* 1153 * if we are modifying block in fs tree, wait for readpage 1154 * to complete and drop the extent cache 1155 */ 1156 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1157 if (!ivec || ivec->nr == INODEVEC_SIZE) { 1158 ivec = kmalloc(sizeof(*ivec), GFP_NOFS); 1159 BUG_ON(!ivec); 1160 ivec->nr = 0; 1161 list_add_tail(&ivec->list, inode_list); 1162 } 1163 if (first) { 1164 inode = find_next_inode(root, key.objectid); 1165 if (inode) 1166 ivec->inode[ivec->nr++] = inode; 1167 first = 0; 1168 } else if (inode && inode->i_ino < key.objectid) { 1169 inode = find_next_inode(root, key.objectid); 1170 if (inode) 1171 ivec->inode[ivec->nr++] = inode; 1172 } 1173 if (inode && inode->i_ino == key.objectid) { 1174 end = key.offset + 1175 btrfs_file_extent_num_bytes(leaf, fi); 1176 WARN_ON(!IS_ALIGNED(key.offset, 1177 root->sectorsize)); 1178 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1179 end--; 1180 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 1181 key.offset, end, 1182 GFP_NOFS); 1183 if (!ret) 1184 continue; 1185 1186 btrfs_drop_extent_cache(inode, key.offset, end, 1187 1); 1188 unlock_extent(&BTRFS_I(inode)->io_tree, 1189 key.offset, end, GFP_NOFS); 1190 } 1191 } 1192 1193 ret = get_new_location(rc->data_inode, &new_bytenr, 1194 bytenr, num_bytes); 1195 if (ret > 0) 1196 continue; 1197 BUG_ON(ret < 0); 1198 1199 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1200 dirty = 1; 1201 1202 key.offset -= btrfs_file_extent_offset(leaf, fi); 1203 ret = btrfs_inc_extent_ref(trans, root, new_bytenr, 1204 num_bytes, parent, 1205 btrfs_header_owner(leaf), 1206 key.objectid, key.offset); 1207 BUG_ON(ret); 1208 1209 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1210 parent, btrfs_header_owner(leaf), 1211 key.objectid, key.offset); 1212 BUG_ON(ret); 1213 } 1214 if (dirty) 1215 btrfs_mark_buffer_dirty(leaf); 1216 return 0; 1217 } 1218 1219 static noinline_for_stack 1220 int memcmp_node_keys(struct extent_buffer *eb, int slot, 1221 struct btrfs_path *path, int level) 1222 { 1223 struct btrfs_disk_key key1; 1224 struct btrfs_disk_key key2; 1225 btrfs_node_key(eb, &key1, slot); 1226 btrfs_node_key(path->nodes[level], &key2, path->slots[level]); 1227 return memcmp(&key1, &key2, sizeof(key1)); 1228 } 1229 1230 /* 1231 * try to replace tree blocks in fs tree with the new blocks 1232 * in reloc tree. tree blocks haven't been modified since the 1233 * reloc tree was create can be replaced. 1234 * 1235 * if a block was replaced, level of the block + 1 is returned. 1236 * if no block got replaced, 0 is returned. if there are other 1237 * errors, a negative error number is returned. 1238 */ 1239 static int replace_path(struct btrfs_trans_handle *trans, 1240 struct btrfs_root *dest, struct btrfs_root *src, 1241 struct btrfs_path *path, struct btrfs_key *next_key, 1242 struct extent_buffer **leaf, 1243 int lowest_level, int max_level) 1244 { 1245 struct extent_buffer *eb; 1246 struct extent_buffer *parent; 1247 struct btrfs_key key; 1248 u64 old_bytenr; 1249 u64 new_bytenr; 1250 u64 old_ptr_gen; 1251 u64 new_ptr_gen; 1252 u64 last_snapshot; 1253 u32 blocksize; 1254 int level; 1255 int ret; 1256 int slot; 1257 1258 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 1259 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); 1260 BUG_ON(lowest_level > 1 && leaf); 1261 1262 last_snapshot = btrfs_root_last_snapshot(&src->root_item); 1263 1264 slot = path->slots[lowest_level]; 1265 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); 1266 1267 eb = btrfs_lock_root_node(dest); 1268 btrfs_set_lock_blocking(eb); 1269 level = btrfs_header_level(eb); 1270 1271 if (level < lowest_level) { 1272 btrfs_tree_unlock(eb); 1273 free_extent_buffer(eb); 1274 return 0; 1275 } 1276 1277 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); 1278 BUG_ON(ret); 1279 btrfs_set_lock_blocking(eb); 1280 1281 if (next_key) { 1282 next_key->objectid = (u64)-1; 1283 next_key->type = (u8)-1; 1284 next_key->offset = (u64)-1; 1285 } 1286 1287 parent = eb; 1288 while (1) { 1289 level = btrfs_header_level(parent); 1290 BUG_ON(level < lowest_level); 1291 1292 ret = btrfs_bin_search(parent, &key, level, &slot); 1293 if (ret && slot > 0) 1294 slot--; 1295 1296 if (next_key && slot + 1 < btrfs_header_nritems(parent)) 1297 btrfs_node_key_to_cpu(parent, next_key, slot + 1); 1298 1299 old_bytenr = btrfs_node_blockptr(parent, slot); 1300 blocksize = btrfs_level_size(dest, level - 1); 1301 old_ptr_gen = btrfs_node_ptr_generation(parent, slot); 1302 1303 if (level <= max_level) { 1304 eb = path->nodes[level]; 1305 new_bytenr = btrfs_node_blockptr(eb, 1306 path->slots[level]); 1307 new_ptr_gen = btrfs_node_ptr_generation(eb, 1308 path->slots[level]); 1309 } else { 1310 new_bytenr = 0; 1311 new_ptr_gen = 0; 1312 } 1313 1314 if (new_bytenr > 0 && new_bytenr == old_bytenr) { 1315 WARN_ON(1); 1316 ret = level; 1317 break; 1318 } 1319 1320 if (new_bytenr == 0 || old_ptr_gen > last_snapshot || 1321 memcmp_node_keys(parent, slot, path, level)) { 1322 if (level <= lowest_level && !leaf) { 1323 ret = 0; 1324 break; 1325 } 1326 1327 eb = read_tree_block(dest, old_bytenr, blocksize, 1328 old_ptr_gen); 1329 btrfs_tree_lock(eb); 1330 ret = btrfs_cow_block(trans, dest, eb, parent, 1331 slot, &eb); 1332 BUG_ON(ret); 1333 btrfs_set_lock_blocking(eb); 1334 1335 if (level <= lowest_level) { 1336 *leaf = eb; 1337 ret = 0; 1338 break; 1339 } 1340 1341 btrfs_tree_unlock(parent); 1342 free_extent_buffer(parent); 1343 1344 parent = eb; 1345 continue; 1346 } 1347 1348 btrfs_node_key_to_cpu(path->nodes[level], &key, 1349 path->slots[level]); 1350 btrfs_release_path(src, path); 1351 1352 path->lowest_level = level; 1353 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1354 path->lowest_level = 0; 1355 BUG_ON(ret); 1356 1357 /* 1358 * swap blocks in fs tree and reloc tree. 1359 */ 1360 btrfs_set_node_blockptr(parent, slot, new_bytenr); 1361 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen); 1362 btrfs_mark_buffer_dirty(parent); 1363 1364 btrfs_set_node_blockptr(path->nodes[level], 1365 path->slots[level], old_bytenr); 1366 btrfs_set_node_ptr_generation(path->nodes[level], 1367 path->slots[level], old_ptr_gen); 1368 btrfs_mark_buffer_dirty(path->nodes[level]); 1369 1370 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, 1371 path->nodes[level]->start, 1372 src->root_key.objectid, level - 1, 0); 1373 BUG_ON(ret); 1374 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 1375 0, dest->root_key.objectid, level - 1, 1376 0); 1377 BUG_ON(ret); 1378 1379 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, 1380 path->nodes[level]->start, 1381 src->root_key.objectid, level - 1, 0); 1382 BUG_ON(ret); 1383 1384 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 1385 0, dest->root_key.objectid, level - 1, 1386 0); 1387 BUG_ON(ret); 1388 1389 btrfs_unlock_up_safe(path, 0); 1390 1391 ret = level; 1392 break; 1393 } 1394 btrfs_tree_unlock(parent); 1395 free_extent_buffer(parent); 1396 return ret; 1397 } 1398 1399 /* 1400 * helper to find next relocated block in reloc tree 1401 */ 1402 static noinline_for_stack 1403 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1404 int *level) 1405 { 1406 struct extent_buffer *eb; 1407 int i; 1408 u64 last_snapshot; 1409 u32 nritems; 1410 1411 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1412 1413 for (i = 0; i < *level; i++) { 1414 free_extent_buffer(path->nodes[i]); 1415 path->nodes[i] = NULL; 1416 } 1417 1418 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 1419 eb = path->nodes[i]; 1420 nritems = btrfs_header_nritems(eb); 1421 while (path->slots[i] + 1 < nritems) { 1422 path->slots[i]++; 1423 if (btrfs_node_ptr_generation(eb, path->slots[i]) <= 1424 last_snapshot) 1425 continue; 1426 1427 *level = i; 1428 return 0; 1429 } 1430 free_extent_buffer(path->nodes[i]); 1431 path->nodes[i] = NULL; 1432 } 1433 return 1; 1434 } 1435 1436 /* 1437 * walk down reloc tree to find relocated block of lowest level 1438 */ 1439 static noinline_for_stack 1440 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1441 int *level) 1442 { 1443 struct extent_buffer *eb = NULL; 1444 int i; 1445 u64 bytenr; 1446 u64 ptr_gen = 0; 1447 u64 last_snapshot; 1448 u32 blocksize; 1449 u32 nritems; 1450 1451 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1452 1453 for (i = *level; i > 0; i--) { 1454 eb = path->nodes[i]; 1455 nritems = btrfs_header_nritems(eb); 1456 while (path->slots[i] < nritems) { 1457 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]); 1458 if (ptr_gen > last_snapshot) 1459 break; 1460 path->slots[i]++; 1461 } 1462 if (path->slots[i] >= nritems) { 1463 if (i == *level) 1464 break; 1465 *level = i + 1; 1466 return 0; 1467 } 1468 if (i == 1) { 1469 *level = i; 1470 return 0; 1471 } 1472 1473 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 1474 blocksize = btrfs_level_size(root, i - 1); 1475 eb = read_tree_block(root, bytenr, blocksize, ptr_gen); 1476 BUG_ON(btrfs_header_level(eb) != i - 1); 1477 path->nodes[i - 1] = eb; 1478 path->slots[i - 1] = 0; 1479 } 1480 return 1; 1481 } 1482 1483 /* 1484 * invalidate extent cache for file extents whose key in range of 1485 * [min_key, max_key) 1486 */ 1487 static int invalidate_extent_cache(struct btrfs_root *root, 1488 struct btrfs_key *min_key, 1489 struct btrfs_key *max_key) 1490 { 1491 struct inode *inode = NULL; 1492 u64 objectid; 1493 u64 start, end; 1494 1495 objectid = min_key->objectid; 1496 while (1) { 1497 cond_resched(); 1498 iput(inode); 1499 1500 if (objectid > max_key->objectid) 1501 break; 1502 1503 inode = find_next_inode(root, objectid); 1504 if (!inode) 1505 break; 1506 1507 if (inode->i_ino > max_key->objectid) { 1508 iput(inode); 1509 break; 1510 } 1511 1512 objectid = inode->i_ino + 1; 1513 if (!S_ISREG(inode->i_mode)) 1514 continue; 1515 1516 if (unlikely(min_key->objectid == inode->i_ino)) { 1517 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 1518 continue; 1519 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 1520 start = 0; 1521 else { 1522 start = min_key->offset; 1523 WARN_ON(!IS_ALIGNED(start, root->sectorsize)); 1524 } 1525 } else { 1526 start = 0; 1527 } 1528 1529 if (unlikely(max_key->objectid == inode->i_ino)) { 1530 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 1531 continue; 1532 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 1533 end = (u64)-1; 1534 } else { 1535 if (max_key->offset == 0) 1536 continue; 1537 end = max_key->offset; 1538 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1539 end--; 1540 } 1541 } else { 1542 end = (u64)-1; 1543 } 1544 1545 /* the lock_extent waits for readpage to complete */ 1546 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1547 btrfs_drop_extent_cache(inode, start, end, 1); 1548 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1549 } 1550 return 0; 1551 } 1552 1553 static int find_next_key(struct btrfs_path *path, int level, 1554 struct btrfs_key *key) 1555 1556 { 1557 while (level < BTRFS_MAX_LEVEL) { 1558 if (!path->nodes[level]) 1559 break; 1560 if (path->slots[level] + 1 < 1561 btrfs_header_nritems(path->nodes[level])) { 1562 btrfs_node_key_to_cpu(path->nodes[level], key, 1563 path->slots[level] + 1); 1564 return 0; 1565 } 1566 level++; 1567 } 1568 return 1; 1569 } 1570 1571 /* 1572 * merge the relocated tree blocks in reloc tree with corresponding 1573 * fs tree. 1574 */ 1575 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, 1576 struct btrfs_root *root) 1577 { 1578 LIST_HEAD(inode_list); 1579 struct btrfs_key key; 1580 struct btrfs_key next_key; 1581 struct btrfs_trans_handle *trans; 1582 struct btrfs_root *reloc_root; 1583 struct btrfs_root_item *root_item; 1584 struct btrfs_path *path; 1585 struct extent_buffer *leaf = NULL; 1586 unsigned long nr; 1587 int level; 1588 int max_level; 1589 int replaced = 0; 1590 int ret; 1591 int err = 0; 1592 1593 path = btrfs_alloc_path(); 1594 if (!path) 1595 return -ENOMEM; 1596 1597 reloc_root = root->reloc_root; 1598 root_item = &reloc_root->root_item; 1599 1600 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 1601 level = btrfs_root_level(root_item); 1602 extent_buffer_get(reloc_root->node); 1603 path->nodes[level] = reloc_root->node; 1604 path->slots[level] = 0; 1605 } else { 1606 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 1607 1608 level = root_item->drop_level; 1609 BUG_ON(level == 0); 1610 path->lowest_level = level; 1611 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); 1612 if (ret < 0) { 1613 btrfs_free_path(path); 1614 return ret; 1615 } 1616 1617 btrfs_node_key_to_cpu(path->nodes[level], &next_key, 1618 path->slots[level]); 1619 WARN_ON(memcmp(&key, &next_key, sizeof(key))); 1620 1621 btrfs_unlock_up_safe(path, 0); 1622 } 1623 1624 if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { 1625 trans = btrfs_start_transaction(root, 1); 1626 1627 leaf = path->nodes[0]; 1628 btrfs_item_key_to_cpu(leaf, &key, 0); 1629 btrfs_release_path(reloc_root, path); 1630 1631 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1632 if (ret < 0) { 1633 err = ret; 1634 goto out; 1635 } 1636 1637 leaf = path->nodes[0]; 1638 btrfs_unlock_up_safe(path, 1); 1639 ret = replace_file_extents(trans, rc, root, leaf, 1640 &inode_list); 1641 if (ret < 0) 1642 err = ret; 1643 goto out; 1644 } 1645 1646 memset(&next_key, 0, sizeof(next_key)); 1647 1648 while (1) { 1649 leaf = NULL; 1650 replaced = 0; 1651 trans = btrfs_start_transaction(root, 1); 1652 max_level = level; 1653 1654 ret = walk_down_reloc_tree(reloc_root, path, &level); 1655 if (ret < 0) { 1656 err = ret; 1657 goto out; 1658 } 1659 if (ret > 0) 1660 break; 1661 1662 if (!find_next_key(path, level, &key) && 1663 btrfs_comp_cpu_keys(&next_key, &key) >= 0) { 1664 ret = 0; 1665 } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { 1666 ret = replace_path(trans, root, reloc_root, 1667 path, &next_key, &leaf, 1668 level, max_level); 1669 } else { 1670 ret = replace_path(trans, root, reloc_root, 1671 path, &next_key, NULL, 1672 level, max_level); 1673 } 1674 if (ret < 0) { 1675 err = ret; 1676 goto out; 1677 } 1678 1679 if (ret > 0) { 1680 level = ret; 1681 btrfs_node_key_to_cpu(path->nodes[level], &key, 1682 path->slots[level]); 1683 replaced = 1; 1684 } else if (leaf) { 1685 /* 1686 * no block got replaced, try replacing file extents 1687 */ 1688 btrfs_item_key_to_cpu(leaf, &key, 0); 1689 ret = replace_file_extents(trans, rc, root, leaf, 1690 &inode_list); 1691 btrfs_tree_unlock(leaf); 1692 free_extent_buffer(leaf); 1693 BUG_ON(ret < 0); 1694 } 1695 1696 ret = walk_up_reloc_tree(reloc_root, path, &level); 1697 if (ret > 0) 1698 break; 1699 1700 BUG_ON(level == 0); 1701 /* 1702 * save the merging progress in the drop_progress. 1703 * this is OK since root refs == 1 in this case. 1704 */ 1705 btrfs_node_key(path->nodes[level], &root_item->drop_progress, 1706 path->slots[level]); 1707 root_item->drop_level = level; 1708 1709 nr = trans->blocks_used; 1710 btrfs_end_transaction(trans, root); 1711 1712 btrfs_btree_balance_dirty(root, nr); 1713 1714 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1715 invalidate_extent_cache(root, &key, &next_key); 1716 } 1717 1718 /* 1719 * handle the case only one block in the fs tree need to be 1720 * relocated and the block is tree root. 1721 */ 1722 leaf = btrfs_lock_root_node(root); 1723 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf); 1724 btrfs_tree_unlock(leaf); 1725 free_extent_buffer(leaf); 1726 if (ret < 0) 1727 err = ret; 1728 out: 1729 btrfs_free_path(path); 1730 1731 if (err == 0) { 1732 memset(&root_item->drop_progress, 0, 1733 sizeof(root_item->drop_progress)); 1734 root_item->drop_level = 0; 1735 btrfs_set_root_refs(root_item, 0); 1736 } 1737 1738 nr = trans->blocks_used; 1739 btrfs_end_transaction(trans, root); 1740 1741 btrfs_btree_balance_dirty(root, nr); 1742 1743 /* 1744 * put inodes while we aren't holding the tree locks 1745 */ 1746 while (!list_empty(&inode_list)) { 1747 struct inodevec *ivec; 1748 ivec = list_entry(inode_list.next, struct inodevec, list); 1749 list_del(&ivec->list); 1750 while (ivec->nr > 0) { 1751 ivec->nr--; 1752 iput(ivec->inode[ivec->nr]); 1753 } 1754 kfree(ivec); 1755 } 1756 1757 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1758 invalidate_extent_cache(root, &key, &next_key); 1759 1760 return err; 1761 } 1762 1763 /* 1764 * callback for the work threads. 1765 * this function merges reloc tree with corresponding fs tree, 1766 * and then drops the reloc tree. 1767 */ 1768 static void merge_func(struct btrfs_work *work) 1769 { 1770 struct btrfs_trans_handle *trans; 1771 struct btrfs_root *root; 1772 struct btrfs_root *reloc_root; 1773 struct async_merge *async; 1774 1775 async = container_of(work, struct async_merge, work); 1776 reloc_root = async->root; 1777 1778 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 1779 root = read_fs_root(reloc_root->fs_info, 1780 reloc_root->root_key.offset); 1781 BUG_ON(IS_ERR(root)); 1782 BUG_ON(root->reloc_root != reloc_root); 1783 1784 merge_reloc_root(async->rc, root); 1785 1786 trans = btrfs_start_transaction(root, 1); 1787 btrfs_update_reloc_root(trans, root); 1788 btrfs_end_transaction(trans, root); 1789 } 1790 1791 btrfs_drop_dead_root(reloc_root); 1792 1793 if (atomic_dec_and_test(async->num_pending)) 1794 complete(async->done); 1795 1796 kfree(async); 1797 } 1798 1799 static int merge_reloc_roots(struct reloc_control *rc) 1800 { 1801 struct async_merge *async; 1802 struct btrfs_root *root; 1803 struct completion done; 1804 atomic_t num_pending; 1805 1806 init_completion(&done); 1807 atomic_set(&num_pending, 1); 1808 1809 while (!list_empty(&rc->reloc_roots)) { 1810 root = list_entry(rc->reloc_roots.next, 1811 struct btrfs_root, root_list); 1812 list_del_init(&root->root_list); 1813 1814 async = kmalloc(sizeof(*async), GFP_NOFS); 1815 BUG_ON(!async); 1816 async->work.func = merge_func; 1817 async->work.flags = 0; 1818 async->rc = rc; 1819 async->root = root; 1820 async->done = &done; 1821 async->num_pending = &num_pending; 1822 atomic_inc(&num_pending); 1823 btrfs_queue_worker(&rc->workers, &async->work); 1824 } 1825 1826 if (!atomic_dec_and_test(&num_pending)) 1827 wait_for_completion(&done); 1828 1829 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); 1830 return 0; 1831 } 1832 1833 static void free_block_list(struct rb_root *blocks) 1834 { 1835 struct tree_block *block; 1836 struct rb_node *rb_node; 1837 while ((rb_node = rb_first(blocks))) { 1838 block = rb_entry(rb_node, struct tree_block, rb_node); 1839 rb_erase(rb_node, blocks); 1840 kfree(block); 1841 } 1842 } 1843 1844 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, 1845 struct btrfs_root *reloc_root) 1846 { 1847 struct btrfs_root *root; 1848 1849 if (reloc_root->last_trans == trans->transid) 1850 return 0; 1851 1852 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset); 1853 BUG_ON(IS_ERR(root)); 1854 BUG_ON(root->reloc_root != reloc_root); 1855 1856 return btrfs_record_root_in_trans(trans, root); 1857 } 1858 1859 /* 1860 * select one tree from trees that references the block. 1861 * for blocks in refernce counted trees, we preper reloc tree. 1862 * if no reloc tree found and reloc_only is true, NULL is returned. 1863 */ 1864 static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, 1865 struct backref_node *node, 1866 struct backref_edge *edges[], 1867 int *nr, int reloc_only) 1868 { 1869 struct backref_node *next; 1870 struct btrfs_root *root; 1871 int index; 1872 int loop = 0; 1873 again: 1874 index = 0; 1875 next = node; 1876 while (1) { 1877 cond_resched(); 1878 next = walk_up_backref(next, edges, &index); 1879 root = next->root; 1880 if (!root) { 1881 BUG_ON(!node->old_root); 1882 goto skip; 1883 } 1884 1885 /* no other choice for non-refernce counted tree */ 1886 if (!root->ref_cows) { 1887 BUG_ON(reloc_only); 1888 break; 1889 } 1890 1891 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 1892 record_reloc_root_in_trans(trans, root); 1893 break; 1894 } 1895 1896 if (loop) { 1897 btrfs_record_root_in_trans(trans, root); 1898 break; 1899 } 1900 1901 if (reloc_only || next != node) { 1902 if (!root->reloc_root) 1903 btrfs_record_root_in_trans(trans, root); 1904 root = root->reloc_root; 1905 /* 1906 * if the reloc tree was created in current 1907 * transation, there is no node in backref tree 1908 * corresponds to the root of the reloc tree. 1909 */ 1910 if (btrfs_root_last_snapshot(&root->root_item) == 1911 trans->transid - 1) 1912 break; 1913 } 1914 skip: 1915 root = NULL; 1916 next = walk_down_backref(edges, &index); 1917 if (!next || next->level <= node->level) 1918 break; 1919 } 1920 1921 if (!root && !loop && !reloc_only) { 1922 loop = 1; 1923 goto again; 1924 } 1925 1926 if (root) 1927 *nr = index; 1928 else 1929 *nr = 0; 1930 1931 return root; 1932 } 1933 1934 static noinline_for_stack 1935 struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, 1936 struct backref_node *node) 1937 { 1938 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 1939 int nr; 1940 return __select_one_root(trans, node, edges, &nr, 0); 1941 } 1942 1943 static noinline_for_stack 1944 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, 1945 struct backref_node *node, 1946 struct backref_edge *edges[], int *nr) 1947 { 1948 return __select_one_root(trans, node, edges, nr, 1); 1949 } 1950 1951 static void grab_path_buffers(struct btrfs_path *path, 1952 struct backref_node *node, 1953 struct backref_edge *edges[], int nr) 1954 { 1955 int i = 0; 1956 while (1) { 1957 drop_node_buffer(node); 1958 node->eb = path->nodes[node->level]; 1959 BUG_ON(!node->eb); 1960 if (path->locks[node->level]) 1961 node->locked = 1; 1962 path->nodes[node->level] = NULL; 1963 path->locks[node->level] = 0; 1964 1965 if (i >= nr) 1966 break; 1967 1968 edges[i]->blockptr = node->eb->start; 1969 node = edges[i]->node[UPPER]; 1970 i++; 1971 } 1972 } 1973 1974 /* 1975 * relocate a block tree, and then update pointers in upper level 1976 * blocks that reference the block to point to the new location. 1977 * 1978 * if called by link_to_upper, the block has already been relocated. 1979 * in that case this function just updates pointers. 1980 */ 1981 static int do_relocation(struct btrfs_trans_handle *trans, 1982 struct backref_node *node, 1983 struct btrfs_key *key, 1984 struct btrfs_path *path, int lowest) 1985 { 1986 struct backref_node *upper; 1987 struct backref_edge *edge; 1988 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 1989 struct btrfs_root *root; 1990 struct extent_buffer *eb; 1991 u32 blocksize; 1992 u64 bytenr; 1993 u64 generation; 1994 int nr; 1995 int slot; 1996 int ret; 1997 int err = 0; 1998 1999 BUG_ON(lowest && node->eb); 2000 2001 path->lowest_level = node->level + 1; 2002 list_for_each_entry(edge, &node->upper, list[LOWER]) { 2003 cond_resched(); 2004 if (node->eb && node->eb->start == edge->blockptr) 2005 continue; 2006 2007 upper = edge->node[UPPER]; 2008 root = select_reloc_root(trans, upper, edges, &nr); 2009 if (!root) 2010 continue; 2011 2012 if (upper->eb && !upper->locked) 2013 drop_node_buffer(upper); 2014 2015 if (!upper->eb) { 2016 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2017 if (ret < 0) { 2018 err = ret; 2019 break; 2020 } 2021 BUG_ON(ret > 0); 2022 2023 slot = path->slots[upper->level]; 2024 2025 btrfs_unlock_up_safe(path, upper->level + 1); 2026 grab_path_buffers(path, upper, edges, nr); 2027 2028 btrfs_release_path(NULL, path); 2029 } else { 2030 ret = btrfs_bin_search(upper->eb, key, upper->level, 2031 &slot); 2032 BUG_ON(ret); 2033 } 2034 2035 bytenr = btrfs_node_blockptr(upper->eb, slot); 2036 if (!lowest) { 2037 if (node->eb->start == bytenr) { 2038 btrfs_tree_unlock(upper->eb); 2039 upper->locked = 0; 2040 continue; 2041 } 2042 } else { 2043 BUG_ON(node->bytenr != bytenr); 2044 } 2045 2046 blocksize = btrfs_level_size(root, node->level); 2047 generation = btrfs_node_ptr_generation(upper->eb, slot); 2048 eb = read_tree_block(root, bytenr, blocksize, generation); 2049 btrfs_tree_lock(eb); 2050 btrfs_set_lock_blocking(eb); 2051 2052 if (!node->eb) { 2053 ret = btrfs_cow_block(trans, root, eb, upper->eb, 2054 slot, &eb); 2055 if (ret < 0) { 2056 err = ret; 2057 break; 2058 } 2059 btrfs_set_lock_blocking(eb); 2060 node->eb = eb; 2061 node->locked = 1; 2062 } else { 2063 btrfs_set_node_blockptr(upper->eb, slot, 2064 node->eb->start); 2065 btrfs_set_node_ptr_generation(upper->eb, slot, 2066 trans->transid); 2067 btrfs_mark_buffer_dirty(upper->eb); 2068 2069 ret = btrfs_inc_extent_ref(trans, root, 2070 node->eb->start, blocksize, 2071 upper->eb->start, 2072 btrfs_header_owner(upper->eb), 2073 node->level, 0); 2074 BUG_ON(ret); 2075 2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2077 BUG_ON(ret); 2078 2079 btrfs_tree_unlock(eb); 2080 free_extent_buffer(eb); 2081 } 2082 if (!lowest) { 2083 btrfs_tree_unlock(upper->eb); 2084 upper->locked = 0; 2085 } 2086 } 2087 path->lowest_level = 0; 2088 return err; 2089 } 2090 2091 static int link_to_upper(struct btrfs_trans_handle *trans, 2092 struct backref_node *node, 2093 struct btrfs_path *path) 2094 { 2095 struct btrfs_key key; 2096 if (!node->eb || list_empty(&node->upper)) 2097 return 0; 2098 2099 btrfs_node_key_to_cpu(node->eb, &key, 0); 2100 return do_relocation(trans, node, &key, path, 0); 2101 } 2102 2103 static int finish_pending_nodes(struct btrfs_trans_handle *trans, 2104 struct backref_cache *cache, 2105 struct btrfs_path *path) 2106 { 2107 struct backref_node *node; 2108 int level; 2109 int ret; 2110 int err = 0; 2111 2112 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2113 while (!list_empty(&cache->pending[level])) { 2114 node = list_entry(cache->pending[level].next, 2115 struct backref_node, lower); 2116 BUG_ON(node->level != level); 2117 2118 ret = link_to_upper(trans, node, path); 2119 if (ret < 0) 2120 err = ret; 2121 /* 2122 * this remove the node from the pending list and 2123 * may add some other nodes to the level + 1 2124 * pending list 2125 */ 2126 remove_backref_node(cache, node); 2127 } 2128 } 2129 BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); 2130 return err; 2131 } 2132 2133 static void mark_block_processed(struct reloc_control *rc, 2134 struct backref_node *node) 2135 { 2136 u32 blocksize; 2137 if (node->level == 0 || 2138 in_block_group(node->bytenr, rc->block_group)) { 2139 blocksize = btrfs_level_size(rc->extent_root, node->level); 2140 set_extent_bits(&rc->processed_blocks, node->bytenr, 2141 node->bytenr + blocksize - 1, EXTENT_DIRTY, 2142 GFP_NOFS); 2143 } 2144 node->processed = 1; 2145 } 2146 2147 /* 2148 * mark a block and all blocks directly/indirectly reference the block 2149 * as processed. 2150 */ 2151 static void update_processed_blocks(struct reloc_control *rc, 2152 struct backref_node *node) 2153 { 2154 struct backref_node *next = node; 2155 struct backref_edge *edge; 2156 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2157 int index = 0; 2158 2159 while (next) { 2160 cond_resched(); 2161 while (1) { 2162 if (next->processed) 2163 break; 2164 2165 mark_block_processed(rc, next); 2166 2167 if (list_empty(&next->upper)) 2168 break; 2169 2170 edge = list_entry(next->upper.next, 2171 struct backref_edge, list[LOWER]); 2172 edges[index++] = edge; 2173 next = edge->node[UPPER]; 2174 } 2175 next = walk_down_backref(edges, &index); 2176 } 2177 } 2178 2179 static int tree_block_processed(u64 bytenr, u32 blocksize, 2180 struct reloc_control *rc) 2181 { 2182 if (test_range_bit(&rc->processed_blocks, bytenr, 2183 bytenr + blocksize - 1, EXTENT_DIRTY, 1)) 2184 return 1; 2185 return 0; 2186 } 2187 2188 /* 2189 * check if there are any file extent pointers in the leaf point to 2190 * data require processing 2191 */ 2192 static int check_file_extents(struct reloc_control *rc, 2193 u64 bytenr, u32 blocksize, u64 ptr_gen) 2194 { 2195 struct btrfs_key found_key; 2196 struct btrfs_file_extent_item *fi; 2197 struct extent_buffer *leaf; 2198 u32 nritems; 2199 int i; 2200 int ret = 0; 2201 2202 leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); 2203 2204 nritems = btrfs_header_nritems(leaf); 2205 for (i = 0; i < nritems; i++) { 2206 cond_resched(); 2207 btrfs_item_key_to_cpu(leaf, &found_key, i); 2208 if (found_key.type != BTRFS_EXTENT_DATA_KEY) 2209 continue; 2210 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 2211 if (btrfs_file_extent_type(leaf, fi) == 2212 BTRFS_FILE_EXTENT_INLINE) 2213 continue; 2214 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 2215 if (bytenr == 0) 2216 continue; 2217 if (in_block_group(bytenr, rc->block_group)) { 2218 ret = 1; 2219 break; 2220 } 2221 } 2222 free_extent_buffer(leaf); 2223 return ret; 2224 } 2225 2226 /* 2227 * scan child blocks of a given block to find blocks require processing 2228 */ 2229 static int add_child_blocks(struct btrfs_trans_handle *trans, 2230 struct reloc_control *rc, 2231 struct backref_node *node, 2232 struct rb_root *blocks) 2233 { 2234 struct tree_block *block; 2235 struct rb_node *rb_node; 2236 u64 bytenr; 2237 u64 ptr_gen; 2238 u32 blocksize; 2239 u32 nritems; 2240 int i; 2241 int err = 0; 2242 2243 nritems = btrfs_header_nritems(node->eb); 2244 blocksize = btrfs_level_size(rc->extent_root, node->level - 1); 2245 for (i = 0; i < nritems; i++) { 2246 cond_resched(); 2247 bytenr = btrfs_node_blockptr(node->eb, i); 2248 ptr_gen = btrfs_node_ptr_generation(node->eb, i); 2249 if (ptr_gen == trans->transid) 2250 continue; 2251 if (!in_block_group(bytenr, rc->block_group) && 2252 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) 2253 continue; 2254 if (tree_block_processed(bytenr, blocksize, rc)) 2255 continue; 2256 2257 readahead_tree_block(rc->extent_root, 2258 bytenr, blocksize, ptr_gen); 2259 } 2260 2261 for (i = 0; i < nritems; i++) { 2262 cond_resched(); 2263 bytenr = btrfs_node_blockptr(node->eb, i); 2264 ptr_gen = btrfs_node_ptr_generation(node->eb, i); 2265 if (ptr_gen == trans->transid) 2266 continue; 2267 if (!in_block_group(bytenr, rc->block_group) && 2268 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) 2269 continue; 2270 if (tree_block_processed(bytenr, blocksize, rc)) 2271 continue; 2272 if (!in_block_group(bytenr, rc->block_group) && 2273 !check_file_extents(rc, bytenr, blocksize, ptr_gen)) 2274 continue; 2275 2276 block = kmalloc(sizeof(*block), GFP_NOFS); 2277 if (!block) { 2278 err = -ENOMEM; 2279 break; 2280 } 2281 block->bytenr = bytenr; 2282 btrfs_node_key_to_cpu(node->eb, &block->key, i); 2283 block->level = node->level - 1; 2284 block->key_ready = 1; 2285 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 2286 BUG_ON(rb_node); 2287 } 2288 if (err) 2289 free_block_list(blocks); 2290 return err; 2291 } 2292 2293 /* 2294 * find adjacent blocks require processing 2295 */ 2296 static noinline_for_stack 2297 int add_adjacent_blocks(struct btrfs_trans_handle *trans, 2298 struct reloc_control *rc, 2299 struct backref_cache *cache, 2300 struct rb_root *blocks, int level, 2301 struct backref_node **upper) 2302 { 2303 struct backref_node *node; 2304 int ret = 0; 2305 2306 WARN_ON(!list_empty(&cache->pending[level])); 2307 2308 if (list_empty(&cache->pending[level + 1])) 2309 return 1; 2310 2311 node = list_entry(cache->pending[level + 1].next, 2312 struct backref_node, lower); 2313 if (node->eb) 2314 ret = add_child_blocks(trans, rc, node, blocks); 2315 2316 *upper = node; 2317 return ret; 2318 } 2319 2320 static int get_tree_block_key(struct reloc_control *rc, 2321 struct tree_block *block) 2322 { 2323 struct extent_buffer *eb; 2324 2325 BUG_ON(block->key_ready); 2326 eb = read_tree_block(rc->extent_root, block->bytenr, 2327 block->key.objectid, block->key.offset); 2328 WARN_ON(btrfs_header_level(eb) != block->level); 2329 if (block->level == 0) 2330 btrfs_item_key_to_cpu(eb, &block->key, 0); 2331 else 2332 btrfs_node_key_to_cpu(eb, &block->key, 0); 2333 free_extent_buffer(eb); 2334 block->key_ready = 1; 2335 return 0; 2336 } 2337 2338 static int reada_tree_block(struct reloc_control *rc, 2339 struct tree_block *block) 2340 { 2341 BUG_ON(block->key_ready); 2342 readahead_tree_block(rc->extent_root, block->bytenr, 2343 block->key.objectid, block->key.offset); 2344 return 0; 2345 } 2346 2347 /* 2348 * helper function to relocate a tree block 2349 */ 2350 static int relocate_tree_block(struct btrfs_trans_handle *trans, 2351 struct reloc_control *rc, 2352 struct backref_node *node, 2353 struct btrfs_key *key, 2354 struct btrfs_path *path) 2355 { 2356 struct btrfs_root *root; 2357 int ret; 2358 2359 root = select_one_root(trans, node); 2360 if (unlikely(!root)) { 2361 rc->found_old_snapshot = 1; 2362 update_processed_blocks(rc, node); 2363 return 0; 2364 } 2365 2366 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 2367 ret = do_relocation(trans, node, key, path, 1); 2368 if (ret < 0) 2369 goto out; 2370 if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { 2371 ret = replace_file_extents(trans, rc, root, 2372 node->eb, NULL); 2373 if (ret < 0) 2374 goto out; 2375 } 2376 drop_node_buffer(node); 2377 } else if (!root->ref_cows) { 2378 path->lowest_level = node->level; 2379 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2380 btrfs_release_path(root, path); 2381 if (ret < 0) 2382 goto out; 2383 } else if (root != node->root) { 2384 WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); 2385 } 2386 2387 update_processed_blocks(rc, node); 2388 ret = 0; 2389 out: 2390 drop_node_buffer(node); 2391 return ret; 2392 } 2393 2394 /* 2395 * relocate a list of blocks 2396 */ 2397 static noinline_for_stack 2398 int relocate_tree_blocks(struct btrfs_trans_handle *trans, 2399 struct reloc_control *rc, struct rb_root *blocks) 2400 { 2401 struct backref_cache *cache; 2402 struct backref_node *node; 2403 struct btrfs_path *path; 2404 struct tree_block *block; 2405 struct rb_node *rb_node; 2406 int level = -1; 2407 int ret; 2408 int err = 0; 2409 2410 path = btrfs_alloc_path(); 2411 if (!path) 2412 return -ENOMEM; 2413 2414 cache = kmalloc(sizeof(*cache), GFP_NOFS); 2415 if (!cache) { 2416 btrfs_free_path(path); 2417 return -ENOMEM; 2418 } 2419 2420 backref_cache_init(cache); 2421 2422 rb_node = rb_first(blocks); 2423 while (rb_node) { 2424 block = rb_entry(rb_node, struct tree_block, rb_node); 2425 if (level == -1) 2426 level = block->level; 2427 else 2428 BUG_ON(level != block->level); 2429 if (!block->key_ready) 2430 reada_tree_block(rc, block); 2431 rb_node = rb_next(rb_node); 2432 } 2433 2434 rb_node = rb_first(blocks); 2435 while (rb_node) { 2436 block = rb_entry(rb_node, struct tree_block, rb_node); 2437 if (!block->key_ready) 2438 get_tree_block_key(rc, block); 2439 rb_node = rb_next(rb_node); 2440 } 2441 2442 rb_node = rb_first(blocks); 2443 while (rb_node) { 2444 block = rb_entry(rb_node, struct tree_block, rb_node); 2445 2446 node = build_backref_tree(rc, cache, &block->key, 2447 block->level, block->bytenr); 2448 if (IS_ERR(node)) { 2449 err = PTR_ERR(node); 2450 goto out; 2451 } 2452 2453 ret = relocate_tree_block(trans, rc, node, &block->key, 2454 path); 2455 if (ret < 0) { 2456 err = ret; 2457 goto out; 2458 } 2459 remove_backref_node(cache, node); 2460 rb_node = rb_next(rb_node); 2461 } 2462 2463 if (level > 0) 2464 goto out; 2465 2466 free_block_list(blocks); 2467 2468 /* 2469 * now backrefs of some upper level tree blocks have been cached, 2470 * try relocating blocks referenced by these upper level blocks. 2471 */ 2472 while (1) { 2473 struct backref_node *upper = NULL; 2474 if (trans->transaction->in_commit || 2475 trans->transaction->delayed_refs.flushing) 2476 break; 2477 2478 ret = add_adjacent_blocks(trans, rc, cache, blocks, level, 2479 &upper); 2480 if (ret < 0) 2481 err = ret; 2482 if (ret != 0) 2483 break; 2484 2485 rb_node = rb_first(blocks); 2486 while (rb_node) { 2487 block = rb_entry(rb_node, struct tree_block, rb_node); 2488 if (trans->transaction->in_commit || 2489 trans->transaction->delayed_refs.flushing) 2490 goto out; 2491 BUG_ON(!block->key_ready); 2492 node = build_backref_tree(rc, cache, &block->key, 2493 level, block->bytenr); 2494 if (IS_ERR(node)) { 2495 err = PTR_ERR(node); 2496 goto out; 2497 } 2498 2499 ret = relocate_tree_block(trans, rc, node, 2500 &block->key, path); 2501 if (ret < 0) { 2502 err = ret; 2503 goto out; 2504 } 2505 remove_backref_node(cache, node); 2506 rb_node = rb_next(rb_node); 2507 } 2508 free_block_list(blocks); 2509 2510 if (upper) { 2511 ret = link_to_upper(trans, upper, path); 2512 if (ret < 0) { 2513 err = ret; 2514 break; 2515 } 2516 remove_backref_node(cache, upper); 2517 } 2518 } 2519 out: 2520 free_block_list(blocks); 2521 2522 ret = finish_pending_nodes(trans, cache, path); 2523 if (ret < 0) 2524 err = ret; 2525 2526 kfree(cache); 2527 btrfs_free_path(path); 2528 return err; 2529 } 2530 2531 static noinline_for_stack 2532 int relocate_inode_pages(struct inode *inode, u64 start, u64 len) 2533 { 2534 u64 page_start; 2535 u64 page_end; 2536 unsigned long i; 2537 unsigned long first_index; 2538 unsigned long last_index; 2539 unsigned int total_read = 0; 2540 unsigned int total_dirty = 0; 2541 struct page *page; 2542 struct file_ra_state *ra; 2543 struct btrfs_ordered_extent *ordered; 2544 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2545 int ret = 0; 2546 2547 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2548 if (!ra) 2549 return -ENOMEM; 2550 2551 mutex_lock(&inode->i_mutex); 2552 first_index = start >> PAGE_CACHE_SHIFT; 2553 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 2554 2555 /* make sure the dirty trick played by the caller work */ 2556 ret = invalidate_inode_pages2_range(inode->i_mapping, 2557 first_index, last_index); 2558 if (ret) 2559 goto out_unlock; 2560 2561 file_ra_state_init(ra, inode->i_mapping); 2562 2563 for (i = first_index ; i <= last_index; i++) { 2564 if (total_read % ra->ra_pages == 0) { 2565 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 2566 min(last_index, ra->ra_pages + i - 1)); 2567 } 2568 total_read++; 2569 again: 2570 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 2571 BUG_ON(1); 2572 page = grab_cache_page(inode->i_mapping, i); 2573 if (!page) { 2574 ret = -ENOMEM; 2575 goto out_unlock; 2576 } 2577 if (!PageUptodate(page)) { 2578 btrfs_readpage(NULL, page); 2579 lock_page(page); 2580 if (!PageUptodate(page)) { 2581 unlock_page(page); 2582 page_cache_release(page); 2583 ret = -EIO; 2584 goto out_unlock; 2585 } 2586 } 2587 wait_on_page_writeback(page); 2588 2589 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2590 page_end = page_start + PAGE_CACHE_SIZE - 1; 2591 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 2592 2593 ordered = btrfs_lookup_ordered_extent(inode, page_start); 2594 if (ordered) { 2595 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2596 unlock_page(page); 2597 page_cache_release(page); 2598 btrfs_start_ordered_extent(inode, ordered, 1); 2599 btrfs_put_ordered_extent(ordered); 2600 goto again; 2601 } 2602 set_page_extent_mapped(page); 2603 2604 if (i == first_index) 2605 set_extent_bits(io_tree, page_start, page_end, 2606 EXTENT_BOUNDARY, GFP_NOFS); 2607 btrfs_set_extent_delalloc(inode, page_start, page_end); 2608 2609 set_page_dirty(page); 2610 total_dirty++; 2611 2612 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2613 unlock_page(page); 2614 page_cache_release(page); 2615 } 2616 out_unlock: 2617 mutex_unlock(&inode->i_mutex); 2618 kfree(ra); 2619 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 2620 return ret; 2621 } 2622 2623 static noinline_for_stack 2624 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) 2625 { 2626 struct btrfs_root *root = BTRFS_I(inode)->root; 2627 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2628 struct extent_map *em; 2629 u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt; 2630 u64 end = start + extent_key->offset - 1; 2631 2632 em = alloc_extent_map(GFP_NOFS); 2633 em->start = start; 2634 em->len = extent_key->offset; 2635 em->block_len = extent_key->offset; 2636 em->block_start = extent_key->objectid; 2637 em->bdev = root->fs_info->fs_devices->latest_bdev; 2638 set_bit(EXTENT_FLAG_PINNED, &em->flags); 2639 2640 /* setup extent map to cheat btrfs_readpage */ 2641 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2642 while (1) { 2643 int ret; 2644 spin_lock(&em_tree->lock); 2645 ret = add_extent_mapping(em_tree, em); 2646 spin_unlock(&em_tree->lock); 2647 if (ret != -EEXIST) { 2648 free_extent_map(em); 2649 break; 2650 } 2651 btrfs_drop_extent_cache(inode, start, end, 0); 2652 } 2653 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2654 2655 return relocate_inode_pages(inode, start, extent_key->offset); 2656 } 2657 2658 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2659 static int get_ref_objectid_v0(struct reloc_control *rc, 2660 struct btrfs_path *path, 2661 struct btrfs_key *extent_key, 2662 u64 *ref_objectid, int *path_change) 2663 { 2664 struct btrfs_key key; 2665 struct extent_buffer *leaf; 2666 struct btrfs_extent_ref_v0 *ref0; 2667 int ret; 2668 int slot; 2669 2670 leaf = path->nodes[0]; 2671 slot = path->slots[0]; 2672 while (1) { 2673 if (slot >= btrfs_header_nritems(leaf)) { 2674 ret = btrfs_next_leaf(rc->extent_root, path); 2675 if (ret < 0) 2676 return ret; 2677 BUG_ON(ret > 0); 2678 leaf = path->nodes[0]; 2679 slot = path->slots[0]; 2680 if (path_change) 2681 *path_change = 1; 2682 } 2683 btrfs_item_key_to_cpu(leaf, &key, slot); 2684 if (key.objectid != extent_key->objectid) 2685 return -ENOENT; 2686 2687 if (key.type != BTRFS_EXTENT_REF_V0_KEY) { 2688 slot++; 2689 continue; 2690 } 2691 ref0 = btrfs_item_ptr(leaf, slot, 2692 struct btrfs_extent_ref_v0); 2693 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0); 2694 break; 2695 } 2696 return 0; 2697 } 2698 #endif 2699 2700 /* 2701 * helper to add a tree block to the list. 2702 * the major work is getting the generation and level of the block 2703 */ 2704 static int add_tree_block(struct reloc_control *rc, 2705 struct btrfs_key *extent_key, 2706 struct btrfs_path *path, 2707 struct rb_root *blocks) 2708 { 2709 struct extent_buffer *eb; 2710 struct btrfs_extent_item *ei; 2711 struct btrfs_tree_block_info *bi; 2712 struct tree_block *block; 2713 struct rb_node *rb_node; 2714 u32 item_size; 2715 int level = -1; 2716 int generation; 2717 2718 eb = path->nodes[0]; 2719 item_size = btrfs_item_size_nr(eb, path->slots[0]); 2720 2721 if (item_size >= sizeof(*ei) + sizeof(*bi)) { 2722 ei = btrfs_item_ptr(eb, path->slots[0], 2723 struct btrfs_extent_item); 2724 bi = (struct btrfs_tree_block_info *)(ei + 1); 2725 generation = btrfs_extent_generation(eb, ei); 2726 level = btrfs_tree_block_level(eb, bi); 2727 } else { 2728 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2729 u64 ref_owner; 2730 int ret; 2731 2732 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2733 ret = get_ref_objectid_v0(rc, path, extent_key, 2734 &ref_owner, NULL); 2735 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 2736 level = (int)ref_owner; 2737 /* FIXME: get real generation */ 2738 generation = 0; 2739 #else 2740 BUG(); 2741 #endif 2742 } 2743 2744 btrfs_release_path(rc->extent_root, path); 2745 2746 BUG_ON(level == -1); 2747 2748 block = kmalloc(sizeof(*block), GFP_NOFS); 2749 if (!block) 2750 return -ENOMEM; 2751 2752 block->bytenr = extent_key->objectid; 2753 block->key.objectid = extent_key->offset; 2754 block->key.offset = generation; 2755 block->level = level; 2756 block->key_ready = 0; 2757 2758 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 2759 BUG_ON(rb_node); 2760 2761 return 0; 2762 } 2763 2764 /* 2765 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY 2766 */ 2767 static int __add_tree_block(struct reloc_control *rc, 2768 u64 bytenr, u32 blocksize, 2769 struct rb_root *blocks) 2770 { 2771 struct btrfs_path *path; 2772 struct btrfs_key key; 2773 int ret; 2774 2775 if (tree_block_processed(bytenr, blocksize, rc)) 2776 return 0; 2777 2778 if (tree_search(blocks, bytenr)) 2779 return 0; 2780 2781 path = btrfs_alloc_path(); 2782 if (!path) 2783 return -ENOMEM; 2784 2785 key.objectid = bytenr; 2786 key.type = BTRFS_EXTENT_ITEM_KEY; 2787 key.offset = blocksize; 2788 2789 path->search_commit_root = 1; 2790 path->skip_locking = 1; 2791 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 2792 if (ret < 0) 2793 goto out; 2794 BUG_ON(ret); 2795 2796 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 2797 ret = add_tree_block(rc, &key, path, blocks); 2798 out: 2799 btrfs_free_path(path); 2800 return ret; 2801 } 2802 2803 /* 2804 * helper to check if the block use full backrefs for pointers in it 2805 */ 2806 static int block_use_full_backref(struct reloc_control *rc, 2807 struct extent_buffer *eb) 2808 { 2809 struct btrfs_path *path; 2810 struct btrfs_extent_item *ei; 2811 struct btrfs_key key; 2812 u64 flags; 2813 int ret; 2814 2815 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) || 2816 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) 2817 return 1; 2818 2819 path = btrfs_alloc_path(); 2820 BUG_ON(!path); 2821 2822 key.objectid = eb->start; 2823 key.type = BTRFS_EXTENT_ITEM_KEY; 2824 key.offset = eb->len; 2825 2826 path->search_commit_root = 1; 2827 path->skip_locking = 1; 2828 ret = btrfs_search_slot(NULL, rc->extent_root, 2829 &key, path, 0, 0); 2830 BUG_ON(ret); 2831 2832 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 2833 struct btrfs_extent_item); 2834 flags = btrfs_extent_flags(path->nodes[0], ei); 2835 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 2836 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) 2837 ret = 1; 2838 else 2839 ret = 0; 2840 btrfs_free_path(path); 2841 return ret; 2842 } 2843 2844 /* 2845 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 2846 * this function scans fs tree to find blocks reference the data extent 2847 */ 2848 static int find_data_references(struct reloc_control *rc, 2849 struct btrfs_key *extent_key, 2850 struct extent_buffer *leaf, 2851 struct btrfs_extent_data_ref *ref, 2852 struct rb_root *blocks) 2853 { 2854 struct btrfs_path *path; 2855 struct tree_block *block; 2856 struct btrfs_root *root; 2857 struct btrfs_file_extent_item *fi; 2858 struct rb_node *rb_node; 2859 struct btrfs_key key; 2860 u64 ref_root; 2861 u64 ref_objectid; 2862 u64 ref_offset; 2863 u32 ref_count; 2864 u32 nritems; 2865 int err = 0; 2866 int added = 0; 2867 int counted; 2868 int ret; 2869 2870 path = btrfs_alloc_path(); 2871 if (!path) 2872 return -ENOMEM; 2873 2874 ref_root = btrfs_extent_data_ref_root(leaf, ref); 2875 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 2876 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 2877 ref_count = btrfs_extent_data_ref_count(leaf, ref); 2878 2879 root = read_fs_root(rc->extent_root->fs_info, ref_root); 2880 if (IS_ERR(root)) { 2881 err = PTR_ERR(root); 2882 goto out; 2883 } 2884 2885 key.objectid = ref_objectid; 2886 key.offset = ref_offset; 2887 key.type = BTRFS_EXTENT_DATA_KEY; 2888 2889 path->search_commit_root = 1; 2890 path->skip_locking = 1; 2891 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2892 if (ret < 0) { 2893 err = ret; 2894 goto out; 2895 } 2896 2897 leaf = path->nodes[0]; 2898 nritems = btrfs_header_nritems(leaf); 2899 /* 2900 * the references in tree blocks that use full backrefs 2901 * are not counted in 2902 */ 2903 if (block_use_full_backref(rc, leaf)) 2904 counted = 0; 2905 else 2906 counted = 1; 2907 rb_node = tree_search(blocks, leaf->start); 2908 if (rb_node) { 2909 if (counted) 2910 added = 1; 2911 else 2912 path->slots[0] = nritems; 2913 } 2914 2915 while (ref_count > 0) { 2916 while (path->slots[0] >= nritems) { 2917 ret = btrfs_next_leaf(root, path); 2918 if (ret < 0) { 2919 err = ret; 2920 goto out; 2921 } 2922 if (ret > 0) { 2923 WARN_ON(1); 2924 goto out; 2925 } 2926 2927 leaf = path->nodes[0]; 2928 nritems = btrfs_header_nritems(leaf); 2929 added = 0; 2930 2931 if (block_use_full_backref(rc, leaf)) 2932 counted = 0; 2933 else 2934 counted = 1; 2935 rb_node = tree_search(blocks, leaf->start); 2936 if (rb_node) { 2937 if (counted) 2938 added = 1; 2939 else 2940 path->slots[0] = nritems; 2941 } 2942 } 2943 2944 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2945 if (key.objectid != ref_objectid || 2946 key.type != BTRFS_EXTENT_DATA_KEY) { 2947 WARN_ON(1); 2948 break; 2949 } 2950 2951 fi = btrfs_item_ptr(leaf, path->slots[0], 2952 struct btrfs_file_extent_item); 2953 2954 if (btrfs_file_extent_type(leaf, fi) == 2955 BTRFS_FILE_EXTENT_INLINE) 2956 goto next; 2957 2958 if (btrfs_file_extent_disk_bytenr(leaf, fi) != 2959 extent_key->objectid) 2960 goto next; 2961 2962 key.offset -= btrfs_file_extent_offset(leaf, fi); 2963 if (key.offset != ref_offset) 2964 goto next; 2965 2966 if (counted) 2967 ref_count--; 2968 if (added) 2969 goto next; 2970 2971 if (!tree_block_processed(leaf->start, leaf->len, rc)) { 2972 block = kmalloc(sizeof(*block), GFP_NOFS); 2973 if (!block) { 2974 err = -ENOMEM; 2975 break; 2976 } 2977 block->bytenr = leaf->start; 2978 btrfs_item_key_to_cpu(leaf, &block->key, 0); 2979 block->level = 0; 2980 block->key_ready = 1; 2981 rb_node = tree_insert(blocks, block->bytenr, 2982 &block->rb_node); 2983 BUG_ON(rb_node); 2984 } 2985 if (counted) 2986 added = 1; 2987 else 2988 path->slots[0] = nritems; 2989 next: 2990 path->slots[0]++; 2991 2992 } 2993 out: 2994 btrfs_free_path(path); 2995 return err; 2996 } 2997 2998 /* 2999 * hepler to find all tree blocks that reference a given data extent 3000 */ 3001 static noinline_for_stack 3002 int add_data_references(struct reloc_control *rc, 3003 struct btrfs_key *extent_key, 3004 struct btrfs_path *path, 3005 struct rb_root *blocks) 3006 { 3007 struct btrfs_key key; 3008 struct extent_buffer *eb; 3009 struct btrfs_extent_data_ref *dref; 3010 struct btrfs_extent_inline_ref *iref; 3011 unsigned long ptr; 3012 unsigned long end; 3013 u32 blocksize; 3014 int ret; 3015 int err = 0; 3016 3017 ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, 3018 extent_key->offset); 3019 BUG_ON(ret < 0); 3020 if (ret > 0) { 3021 /* the relocated data is fragmented */ 3022 rc->extents_skipped++; 3023 btrfs_release_path(rc->extent_root, path); 3024 return 0; 3025 } 3026 3027 blocksize = btrfs_level_size(rc->extent_root, 0); 3028 3029 eb = path->nodes[0]; 3030 ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 3031 end = ptr + btrfs_item_size_nr(eb, path->slots[0]); 3032 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3033 if (ptr + sizeof(struct btrfs_extent_item_v0) == end) 3034 ptr = end; 3035 else 3036 #endif 3037 ptr += sizeof(struct btrfs_extent_item); 3038 3039 while (ptr < end) { 3040 iref = (struct btrfs_extent_inline_ref *)ptr; 3041 key.type = btrfs_extent_inline_ref_type(eb, iref); 3042 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3043 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 3044 ret = __add_tree_block(rc, key.offset, blocksize, 3045 blocks); 3046 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3047 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 3048 ret = find_data_references(rc, extent_key, 3049 eb, dref, blocks); 3050 } else { 3051 BUG(); 3052 } 3053 ptr += btrfs_extent_inline_ref_size(key.type); 3054 } 3055 WARN_ON(ptr > end); 3056 3057 while (1) { 3058 cond_resched(); 3059 eb = path->nodes[0]; 3060 if (path->slots[0] >= btrfs_header_nritems(eb)) { 3061 ret = btrfs_next_leaf(rc->extent_root, path); 3062 if (ret < 0) { 3063 err = ret; 3064 break; 3065 } 3066 if (ret > 0) 3067 break; 3068 eb = path->nodes[0]; 3069 } 3070 3071 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 3072 if (key.objectid != extent_key->objectid) 3073 break; 3074 3075 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3076 if (key.type == BTRFS_SHARED_DATA_REF_KEY || 3077 key.type == BTRFS_EXTENT_REF_V0_KEY) { 3078 #else 3079 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 3080 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3081 #endif 3082 ret = __add_tree_block(rc, key.offset, blocksize, 3083 blocks); 3084 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3085 dref = btrfs_item_ptr(eb, path->slots[0], 3086 struct btrfs_extent_data_ref); 3087 ret = find_data_references(rc, extent_key, 3088 eb, dref, blocks); 3089 } else { 3090 ret = 0; 3091 } 3092 if (ret) { 3093 err = ret; 3094 break; 3095 } 3096 path->slots[0]++; 3097 } 3098 btrfs_release_path(rc->extent_root, path); 3099 if (err) 3100 free_block_list(blocks); 3101 return err; 3102 } 3103 3104 /* 3105 * hepler to find next unprocessed extent 3106 */ 3107 static noinline_for_stack 3108 int find_next_extent(struct btrfs_trans_handle *trans, 3109 struct reloc_control *rc, struct btrfs_path *path) 3110 { 3111 struct btrfs_key key; 3112 struct extent_buffer *leaf; 3113 u64 start, end, last; 3114 int ret; 3115 3116 last = rc->block_group->key.objectid + rc->block_group->key.offset; 3117 while (1) { 3118 cond_resched(); 3119 if (rc->search_start >= last) { 3120 ret = 1; 3121 break; 3122 } 3123 3124 key.objectid = rc->search_start; 3125 key.type = BTRFS_EXTENT_ITEM_KEY; 3126 key.offset = 0; 3127 3128 path->search_commit_root = 1; 3129 path->skip_locking = 1; 3130 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 3131 0, 0); 3132 if (ret < 0) 3133 break; 3134 next: 3135 leaf = path->nodes[0]; 3136 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 3137 ret = btrfs_next_leaf(rc->extent_root, path); 3138 if (ret != 0) 3139 break; 3140 leaf = path->nodes[0]; 3141 } 3142 3143 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3144 if (key.objectid >= last) { 3145 ret = 1; 3146 break; 3147 } 3148 3149 if (key.type != BTRFS_EXTENT_ITEM_KEY || 3150 key.objectid + key.offset <= rc->search_start) { 3151 path->slots[0]++; 3152 goto next; 3153 } 3154 3155 ret = find_first_extent_bit(&rc->processed_blocks, 3156 key.objectid, &start, &end, 3157 EXTENT_DIRTY); 3158 3159 if (ret == 0 && start <= key.objectid) { 3160 btrfs_release_path(rc->extent_root, path); 3161 rc->search_start = end + 1; 3162 } else { 3163 rc->search_start = key.objectid + key.offset; 3164 return 0; 3165 } 3166 } 3167 btrfs_release_path(rc->extent_root, path); 3168 return ret; 3169 } 3170 3171 static void set_reloc_control(struct reloc_control *rc) 3172 { 3173 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3174 mutex_lock(&fs_info->trans_mutex); 3175 fs_info->reloc_ctl = rc; 3176 mutex_unlock(&fs_info->trans_mutex); 3177 } 3178 3179 static void unset_reloc_control(struct reloc_control *rc) 3180 { 3181 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3182 mutex_lock(&fs_info->trans_mutex); 3183 fs_info->reloc_ctl = NULL; 3184 mutex_unlock(&fs_info->trans_mutex); 3185 } 3186 3187 static int check_extent_flags(u64 flags) 3188 { 3189 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3190 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3191 return 1; 3192 if (!(flags & BTRFS_EXTENT_FLAG_DATA) && 3193 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3194 return 1; 3195 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3196 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 3197 return 1; 3198 return 0; 3199 } 3200 3201 static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3202 { 3203 struct rb_root blocks = RB_ROOT; 3204 struct btrfs_key key; 3205 struct btrfs_trans_handle *trans = NULL; 3206 struct btrfs_path *path; 3207 struct btrfs_extent_item *ei; 3208 unsigned long nr; 3209 u64 flags; 3210 u32 item_size; 3211 int ret; 3212 int err = 0; 3213 3214 path = btrfs_alloc_path(); 3215 if (!path) 3216 return -ENOMEM; 3217 3218 rc->search_start = rc->block_group->key.objectid; 3219 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3220 GFP_NOFS); 3221 3222 rc->create_reloc_root = 1; 3223 set_reloc_control(rc); 3224 3225 trans = btrfs_start_transaction(rc->extent_root, 1); 3226 btrfs_commit_transaction(trans, rc->extent_root); 3227 3228 while (1) { 3229 trans = btrfs_start_transaction(rc->extent_root, 1); 3230 3231 ret = find_next_extent(trans, rc, path); 3232 if (ret < 0) 3233 err = ret; 3234 if (ret != 0) 3235 break; 3236 3237 rc->extents_found++; 3238 3239 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 3240 struct btrfs_extent_item); 3241 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3242 item_size = btrfs_item_size_nr(path->nodes[0], 3243 path->slots[0]); 3244 if (item_size >= sizeof(*ei)) { 3245 flags = btrfs_extent_flags(path->nodes[0], ei); 3246 ret = check_extent_flags(flags); 3247 BUG_ON(ret); 3248 3249 } else { 3250 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3251 u64 ref_owner; 3252 int path_change = 0; 3253 3254 BUG_ON(item_size != 3255 sizeof(struct btrfs_extent_item_v0)); 3256 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner, 3257 &path_change); 3258 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID) 3259 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; 3260 else 3261 flags = BTRFS_EXTENT_FLAG_DATA; 3262 3263 if (path_change) { 3264 btrfs_release_path(rc->extent_root, path); 3265 3266 path->search_commit_root = 1; 3267 path->skip_locking = 1; 3268 ret = btrfs_search_slot(NULL, rc->extent_root, 3269 &key, path, 0, 0); 3270 if (ret < 0) { 3271 err = ret; 3272 break; 3273 } 3274 BUG_ON(ret > 0); 3275 } 3276 #else 3277 BUG(); 3278 #endif 3279 } 3280 3281 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 3282 ret = add_tree_block(rc, &key, path, &blocks); 3283 } else if (rc->stage == UPDATE_DATA_PTRS && 3284 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3285 ret = add_data_references(rc, &key, path, &blocks); 3286 } else { 3287 btrfs_release_path(rc->extent_root, path); 3288 ret = 0; 3289 } 3290 if (ret < 0) { 3291 err = 0; 3292 break; 3293 } 3294 3295 if (!RB_EMPTY_ROOT(&blocks)) { 3296 ret = relocate_tree_blocks(trans, rc, &blocks); 3297 if (ret < 0) { 3298 err = ret; 3299 break; 3300 } 3301 } 3302 3303 nr = trans->blocks_used; 3304 btrfs_end_transaction_throttle(trans, rc->extent_root); 3305 trans = NULL; 3306 btrfs_btree_balance_dirty(rc->extent_root, nr); 3307 3308 if (rc->stage == MOVE_DATA_EXTENTS && 3309 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3310 rc->found_file_extent = 1; 3311 ret = relocate_data_extent(rc->data_inode, &key); 3312 if (ret < 0) { 3313 err = ret; 3314 break; 3315 } 3316 } 3317 } 3318 btrfs_free_path(path); 3319 3320 if (trans) { 3321 nr = trans->blocks_used; 3322 btrfs_end_transaction(trans, rc->extent_root); 3323 btrfs_btree_balance_dirty(rc->extent_root, nr); 3324 } 3325 3326 rc->create_reloc_root = 0; 3327 smp_mb(); 3328 3329 if (rc->extents_found > 0) { 3330 trans = btrfs_start_transaction(rc->extent_root, 1); 3331 btrfs_commit_transaction(trans, rc->extent_root); 3332 } 3333 3334 merge_reloc_roots(rc); 3335 3336 unset_reloc_control(rc); 3337 3338 /* get rid of pinned extents */ 3339 trans = btrfs_start_transaction(rc->extent_root, 1); 3340 btrfs_commit_transaction(trans, rc->extent_root); 3341 3342 return err; 3343 } 3344 3345 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 3346 struct btrfs_root *root, 3347 u64 objectid, u64 size) 3348 { 3349 struct btrfs_path *path; 3350 struct btrfs_inode_item *item; 3351 struct extent_buffer *leaf; 3352 int ret; 3353 3354 path = btrfs_alloc_path(); 3355 if (!path) 3356 return -ENOMEM; 3357 3358 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 3359 if (ret) 3360 goto out; 3361 3362 leaf = path->nodes[0]; 3363 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 3364 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 3365 btrfs_set_inode_generation(leaf, item, 1); 3366 btrfs_set_inode_size(leaf, item, size); 3367 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 3368 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 3369 btrfs_mark_buffer_dirty(leaf); 3370 btrfs_release_path(root, path); 3371 out: 3372 btrfs_free_path(path); 3373 return ret; 3374 } 3375 3376 /* 3377 * helper to create inode for data relocation. 3378 * the inode is in data relocation tree and its link count is 0 3379 */ 3380 static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 3381 struct btrfs_block_group_cache *group) 3382 { 3383 struct inode *inode = NULL; 3384 struct btrfs_trans_handle *trans; 3385 struct btrfs_root *root; 3386 struct btrfs_key key; 3387 unsigned long nr; 3388 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 3389 int err = 0; 3390 3391 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); 3392 if (IS_ERR(root)) 3393 return ERR_CAST(root); 3394 3395 trans = btrfs_start_transaction(root, 1); 3396 BUG_ON(!trans); 3397 3398 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 3399 if (err) 3400 goto out; 3401 3402 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 3403 BUG_ON(err); 3404 3405 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, 3406 group->key.offset, 0, group->key.offset, 3407 0, 0, 0); 3408 BUG_ON(err); 3409 3410 key.objectid = objectid; 3411 key.type = BTRFS_INODE_ITEM_KEY; 3412 key.offset = 0; 3413 inode = btrfs_iget(root->fs_info->sb, &key, root); 3414 BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 3415 BTRFS_I(inode)->index_cnt = group->key.objectid; 3416 3417 err = btrfs_orphan_add(trans, inode); 3418 out: 3419 nr = trans->blocks_used; 3420 btrfs_end_transaction(trans, root); 3421 3422 btrfs_btree_balance_dirty(root, nr); 3423 if (err) { 3424 if (inode) 3425 iput(inode); 3426 inode = ERR_PTR(err); 3427 } 3428 return inode; 3429 } 3430 3431 /* 3432 * function to relocate all extents in a block group. 3433 */ 3434 int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) 3435 { 3436 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3437 struct reloc_control *rc; 3438 int ret; 3439 int err = 0; 3440 3441 rc = kzalloc(sizeof(*rc), GFP_NOFS); 3442 if (!rc) 3443 return -ENOMEM; 3444 3445 mapping_tree_init(&rc->reloc_root_tree); 3446 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); 3447 INIT_LIST_HEAD(&rc->reloc_roots); 3448 3449 rc->block_group = btrfs_lookup_block_group(fs_info, group_start); 3450 BUG_ON(!rc->block_group); 3451 3452 btrfs_init_workers(&rc->workers, "relocate", 3453 fs_info->thread_pool_size); 3454 3455 rc->extent_root = extent_root; 3456 btrfs_prepare_block_group_relocation(extent_root, rc->block_group); 3457 3458 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 3459 if (IS_ERR(rc->data_inode)) { 3460 err = PTR_ERR(rc->data_inode); 3461 rc->data_inode = NULL; 3462 goto out; 3463 } 3464 3465 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", 3466 (unsigned long long)rc->block_group->key.objectid, 3467 (unsigned long long)rc->block_group->flags); 3468 3469 btrfs_start_delalloc_inodes(fs_info->tree_root); 3470 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3471 3472 while (1) { 3473 mutex_lock(&fs_info->cleaner_mutex); 3474 btrfs_clean_old_snapshots(fs_info->tree_root); 3475 mutex_unlock(&fs_info->cleaner_mutex); 3476 3477 rc->extents_found = 0; 3478 rc->extents_skipped = 0; 3479 3480 ret = relocate_block_group(rc); 3481 if (ret < 0) { 3482 err = ret; 3483 break; 3484 } 3485 3486 if (rc->extents_found == 0) 3487 break; 3488 3489 printk(KERN_INFO "btrfs: found %llu extents\n", 3490 (unsigned long long)rc->extents_found); 3491 3492 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 3493 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); 3494 invalidate_mapping_pages(rc->data_inode->i_mapping, 3495 0, -1); 3496 rc->stage = UPDATE_DATA_PTRS; 3497 } else if (rc->stage == UPDATE_DATA_PTRS && 3498 rc->extents_skipped >= rc->extents_found) { 3499 iput(rc->data_inode); 3500 rc->data_inode = create_reloc_inode(fs_info, 3501 rc->block_group); 3502 if (IS_ERR(rc->data_inode)) { 3503 err = PTR_ERR(rc->data_inode); 3504 rc->data_inode = NULL; 3505 break; 3506 } 3507 rc->stage = MOVE_DATA_EXTENTS; 3508 rc->found_file_extent = 0; 3509 } 3510 } 3511 3512 filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, 3513 rc->block_group->key.objectid, 3514 rc->block_group->key.objectid + 3515 rc->block_group->key.offset - 1); 3516 3517 WARN_ON(rc->block_group->pinned > 0); 3518 WARN_ON(rc->block_group->reserved > 0); 3519 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); 3520 out: 3521 iput(rc->data_inode); 3522 btrfs_stop_workers(&rc->workers); 3523 btrfs_put_block_group(rc->block_group); 3524 kfree(rc); 3525 return err; 3526 } 3527 3528 /* 3529 * recover relocation interrupted by system crash. 3530 * 3531 * this function resumes merging reloc trees with corresponding fs trees. 3532 * this is important for keeping the sharing of tree blocks 3533 */ 3534 int btrfs_recover_relocation(struct btrfs_root *root) 3535 { 3536 LIST_HEAD(reloc_roots); 3537 struct btrfs_key key; 3538 struct btrfs_root *fs_root; 3539 struct btrfs_root *reloc_root; 3540 struct btrfs_path *path; 3541 struct extent_buffer *leaf; 3542 struct reloc_control *rc = NULL; 3543 struct btrfs_trans_handle *trans; 3544 int ret; 3545 int err = 0; 3546 3547 path = btrfs_alloc_path(); 3548 if (!path) 3549 return -ENOMEM; 3550 3551 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 3552 key.type = BTRFS_ROOT_ITEM_KEY; 3553 key.offset = (u64)-1; 3554 3555 while (1) { 3556 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, 3557 path, 0, 0); 3558 if (ret < 0) { 3559 err = ret; 3560 goto out; 3561 } 3562 if (ret > 0) { 3563 if (path->slots[0] == 0) 3564 break; 3565 path->slots[0]--; 3566 } 3567 leaf = path->nodes[0]; 3568 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3569 btrfs_release_path(root->fs_info->tree_root, path); 3570 3571 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 3572 key.type != BTRFS_ROOT_ITEM_KEY) 3573 break; 3574 3575 reloc_root = btrfs_read_fs_root_no_radix(root, &key); 3576 if (IS_ERR(reloc_root)) { 3577 err = PTR_ERR(reloc_root); 3578 goto out; 3579 } 3580 3581 list_add(&reloc_root->root_list, &reloc_roots); 3582 3583 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 3584 fs_root = read_fs_root(root->fs_info, 3585 reloc_root->root_key.offset); 3586 if (IS_ERR(fs_root)) { 3587 err = PTR_ERR(fs_root); 3588 goto out; 3589 } 3590 } 3591 3592 if (key.offset == 0) 3593 break; 3594 3595 key.offset--; 3596 } 3597 btrfs_release_path(root->fs_info->tree_root, path); 3598 3599 if (list_empty(&reloc_roots)) 3600 goto out; 3601 3602 rc = kzalloc(sizeof(*rc), GFP_NOFS); 3603 if (!rc) { 3604 err = -ENOMEM; 3605 goto out; 3606 } 3607 3608 mapping_tree_init(&rc->reloc_root_tree); 3609 INIT_LIST_HEAD(&rc->reloc_roots); 3610 btrfs_init_workers(&rc->workers, "relocate", 3611 root->fs_info->thread_pool_size); 3612 rc->extent_root = root->fs_info->extent_root; 3613 3614 set_reloc_control(rc); 3615 3616 while (!list_empty(&reloc_roots)) { 3617 reloc_root = list_entry(reloc_roots.next, 3618 struct btrfs_root, root_list); 3619 list_del(&reloc_root->root_list); 3620 3621 if (btrfs_root_refs(&reloc_root->root_item) == 0) { 3622 list_add_tail(&reloc_root->root_list, 3623 &rc->reloc_roots); 3624 continue; 3625 } 3626 3627 fs_root = read_fs_root(root->fs_info, 3628 reloc_root->root_key.offset); 3629 BUG_ON(IS_ERR(fs_root)); 3630 3631 __add_reloc_root(reloc_root); 3632 fs_root->reloc_root = reloc_root; 3633 } 3634 3635 trans = btrfs_start_transaction(rc->extent_root, 1); 3636 btrfs_commit_transaction(trans, rc->extent_root); 3637 3638 merge_reloc_roots(rc); 3639 3640 unset_reloc_control(rc); 3641 3642 trans = btrfs_start_transaction(rc->extent_root, 1); 3643 btrfs_commit_transaction(trans, rc->extent_root); 3644 out: 3645 if (rc) { 3646 btrfs_stop_workers(&rc->workers); 3647 kfree(rc); 3648 } 3649 while (!list_empty(&reloc_roots)) { 3650 reloc_root = list_entry(reloc_roots.next, 3651 struct btrfs_root, root_list); 3652 list_del(&reloc_root->root_list); 3653 free_extent_buffer(reloc_root->node); 3654 free_extent_buffer(reloc_root->commit_root); 3655 kfree(reloc_root); 3656 } 3657 btrfs_free_path(path); 3658 3659 if (err == 0) { 3660 /* cleanup orphan inode in data relocation tree */ 3661 fs_root = read_fs_root(root->fs_info, 3662 BTRFS_DATA_RELOC_TREE_OBJECTID); 3663 if (IS_ERR(fs_root)) 3664 err = PTR_ERR(fs_root); 3665 } 3666 return err; 3667 } 3668 3669 /* 3670 * helper to add ordered checksum for data relocation. 3671 * 3672 * cloning checksum properly handles the nodatasum extents. 3673 * it also saves CPU time to re-calculate the checksum. 3674 */ 3675 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 3676 { 3677 struct btrfs_ordered_sum *sums; 3678 struct btrfs_sector_sum *sector_sum; 3679 struct btrfs_ordered_extent *ordered; 3680 struct btrfs_root *root = BTRFS_I(inode)->root; 3681 size_t offset; 3682 int ret; 3683 u64 disk_bytenr; 3684 LIST_HEAD(list); 3685 3686 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 3687 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 3688 3689 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 3690 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 3691 disk_bytenr + len - 1, &list); 3692 3693 while (!list_empty(&list)) { 3694 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 3695 list_del_init(&sums->list); 3696 3697 sector_sum = sums->sums; 3698 sums->bytenr = ordered->start; 3699 3700 offset = 0; 3701 while (offset < sums->len) { 3702 sector_sum->bytenr += ordered->start - disk_bytenr; 3703 sector_sum++; 3704 offset += root->sectorsize; 3705 } 3706 3707 btrfs_add_ordered_sum(inode, ordered, sums); 3708 } 3709 btrfs_put_ordered_extent(ordered); 3710 return 0; 3711 } 3712