1 /* 2 * Copyright (C) 2009 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/sched.h> 20 #include <linux/pagemap.h> 21 #include <linux/writeback.h> 22 #include <linux/blkdev.h> 23 #include <linux/rbtree.h> 24 #include "ctree.h" 25 #include "disk-io.h" 26 #include "transaction.h" 27 #include "volumes.h" 28 #include "locking.h" 29 #include "btrfs_inode.h" 30 #include "async-thread.h" 31 32 /* 33 * backref_node, mapping_node and tree_block start with this 34 */ 35 struct tree_entry { 36 struct rb_node rb_node; 37 u64 bytenr; 38 }; 39 40 /* 41 * present a tree block in the backref cache 42 */ 43 struct backref_node { 44 struct rb_node rb_node; 45 u64 bytenr; 46 /* objectid tree block owner */ 47 u64 owner; 48 /* list of upper level blocks reference this block */ 49 struct list_head upper; 50 /* list of child blocks in the cache */ 51 struct list_head lower; 52 /* NULL if this node is not tree root */ 53 struct btrfs_root *root; 54 /* extent buffer got by COW the block */ 55 struct extent_buffer *eb; 56 /* level of tree block */ 57 unsigned int level:8; 58 /* 1 if the block is root of old snapshot */ 59 unsigned int old_root:1; 60 /* 1 if no child blocks in the cache */ 61 unsigned int lowest:1; 62 /* is the extent buffer locked */ 63 unsigned int locked:1; 64 /* has the block been processed */ 65 unsigned int processed:1; 66 /* have backrefs of this block been checked */ 67 unsigned int checked:1; 68 }; 69 70 /* 71 * present a block pointer in the backref cache 72 */ 73 struct backref_edge { 74 struct list_head list[2]; 75 struct backref_node *node[2]; 76 u64 blockptr; 77 }; 78 79 #define LOWER 0 80 #define UPPER 1 81 82 struct backref_cache { 83 /* red black tree of all backref nodes in the cache */ 84 struct rb_root rb_root; 85 /* list of backref nodes with no child block in the cache */ 86 struct list_head pending[BTRFS_MAX_LEVEL]; 87 spinlock_t lock; 88 }; 89 90 /* 91 * map address of tree root to tree 92 */ 93 struct mapping_node { 94 struct rb_node rb_node; 95 u64 bytenr; 96 void *data; 97 }; 98 99 struct mapping_tree { 100 struct rb_root rb_root; 101 spinlock_t lock; 102 }; 103 104 /* 105 * present a tree block to process 106 */ 107 struct tree_block { 108 struct rb_node rb_node; 109 u64 bytenr; 110 struct btrfs_key key; 111 unsigned int level:8; 112 unsigned int key_ready:1; 113 }; 114 115 /* inode vector */ 116 #define INODEVEC_SIZE 16 117 118 struct inodevec { 119 struct list_head list; 120 struct inode *inode[INODEVEC_SIZE]; 121 int nr; 122 }; 123 124 struct reloc_control { 125 /* block group to relocate */ 126 struct btrfs_block_group_cache *block_group; 127 /* extent tree */ 128 struct btrfs_root *extent_root; 129 /* inode for moving data */ 130 struct inode *data_inode; 131 struct btrfs_workers workers; 132 /* tree blocks have been processed */ 133 struct extent_io_tree processed_blocks; 134 /* map start of tree root to corresponding reloc tree */ 135 struct mapping_tree reloc_root_tree; 136 /* list of reloc trees */ 137 struct list_head reloc_roots; 138 u64 search_start; 139 u64 extents_found; 140 u64 extents_skipped; 141 int stage; 142 int create_reloc_root; 143 unsigned int found_file_extent:1; 144 unsigned int found_old_snapshot:1; 145 }; 146 147 /* stages of data relocation */ 148 #define MOVE_DATA_EXTENTS 0 149 #define UPDATE_DATA_PTRS 1 150 151 /* 152 * merge reloc tree to corresponding fs tree in worker threads 153 */ 154 struct async_merge { 155 struct btrfs_work work; 156 struct reloc_control *rc; 157 struct btrfs_root *root; 158 struct completion *done; 159 atomic_t *num_pending; 160 }; 161 162 static void mapping_tree_init(struct mapping_tree *tree) 163 { 164 tree->rb_root.rb_node = NULL; 165 spin_lock_init(&tree->lock); 166 } 167 168 static void backref_cache_init(struct backref_cache *cache) 169 { 170 int i; 171 cache->rb_root.rb_node = NULL; 172 for (i = 0; i < BTRFS_MAX_LEVEL; i++) 173 INIT_LIST_HEAD(&cache->pending[i]); 174 spin_lock_init(&cache->lock); 175 } 176 177 static void backref_node_init(struct backref_node *node) 178 { 179 memset(node, 0, sizeof(*node)); 180 INIT_LIST_HEAD(&node->upper); 181 INIT_LIST_HEAD(&node->lower); 182 RB_CLEAR_NODE(&node->rb_node); 183 } 184 185 static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, 186 struct rb_node *node) 187 { 188 struct rb_node **p = &root->rb_node; 189 struct rb_node *parent = NULL; 190 struct tree_entry *entry; 191 192 while (*p) { 193 parent = *p; 194 entry = rb_entry(parent, struct tree_entry, rb_node); 195 196 if (bytenr < entry->bytenr) 197 p = &(*p)->rb_left; 198 else if (bytenr > entry->bytenr) 199 p = &(*p)->rb_right; 200 else 201 return parent; 202 } 203 204 rb_link_node(node, parent, p); 205 rb_insert_color(node, root); 206 return NULL; 207 } 208 209 static struct rb_node *tree_search(struct rb_root *root, u64 bytenr) 210 { 211 struct rb_node *n = root->rb_node; 212 struct tree_entry *entry; 213 214 while (n) { 215 entry = rb_entry(n, struct tree_entry, rb_node); 216 217 if (bytenr < entry->bytenr) 218 n = n->rb_left; 219 else if (bytenr > entry->bytenr) 220 n = n->rb_right; 221 else 222 return n; 223 } 224 return NULL; 225 } 226 227 /* 228 * walk up backref nodes until reach node presents tree root 229 */ 230 static struct backref_node *walk_up_backref(struct backref_node *node, 231 struct backref_edge *edges[], 232 int *index) 233 { 234 struct backref_edge *edge; 235 int idx = *index; 236 237 while (!list_empty(&node->upper)) { 238 edge = list_entry(node->upper.next, 239 struct backref_edge, list[LOWER]); 240 edges[idx++] = edge; 241 node = edge->node[UPPER]; 242 } 243 *index = idx; 244 return node; 245 } 246 247 /* 248 * walk down backref nodes to find start of next reference path 249 */ 250 static struct backref_node *walk_down_backref(struct backref_edge *edges[], 251 int *index) 252 { 253 struct backref_edge *edge; 254 struct backref_node *lower; 255 int idx = *index; 256 257 while (idx > 0) { 258 edge = edges[idx - 1]; 259 lower = edge->node[LOWER]; 260 if (list_is_last(&edge->list[LOWER], &lower->upper)) { 261 idx--; 262 continue; 263 } 264 edge = list_entry(edge->list[LOWER].next, 265 struct backref_edge, list[LOWER]); 266 edges[idx - 1] = edge; 267 *index = idx; 268 return edge->node[UPPER]; 269 } 270 *index = 0; 271 return NULL; 272 } 273 274 static void drop_node_buffer(struct backref_node *node) 275 { 276 if (node->eb) { 277 if (node->locked) { 278 btrfs_tree_unlock(node->eb); 279 node->locked = 0; 280 } 281 free_extent_buffer(node->eb); 282 node->eb = NULL; 283 } 284 } 285 286 static void drop_backref_node(struct backref_cache *tree, 287 struct backref_node *node) 288 { 289 BUG_ON(!node->lowest); 290 BUG_ON(!list_empty(&node->upper)); 291 292 drop_node_buffer(node); 293 list_del(&node->lower); 294 295 rb_erase(&node->rb_node, &tree->rb_root); 296 kfree(node); 297 } 298 299 /* 300 * remove a backref node from the backref cache 301 */ 302 static void remove_backref_node(struct backref_cache *cache, 303 struct backref_node *node) 304 { 305 struct backref_node *upper; 306 struct backref_edge *edge; 307 308 if (!node) 309 return; 310 311 BUG_ON(!node->lowest); 312 while (!list_empty(&node->upper)) { 313 edge = list_entry(node->upper.next, struct backref_edge, 314 list[LOWER]); 315 upper = edge->node[UPPER]; 316 list_del(&edge->list[LOWER]); 317 list_del(&edge->list[UPPER]); 318 kfree(edge); 319 /* 320 * add the node to pending list if no other 321 * child block cached. 322 */ 323 if (list_empty(&upper->lower)) { 324 list_add_tail(&upper->lower, 325 &cache->pending[upper->level]); 326 upper->lowest = 1; 327 } 328 } 329 drop_backref_node(cache, node); 330 } 331 332 /* 333 * find reloc tree by address of tree root 334 */ 335 static struct btrfs_root *find_reloc_root(struct reloc_control *rc, 336 u64 bytenr) 337 { 338 struct rb_node *rb_node; 339 struct mapping_node *node; 340 struct btrfs_root *root = NULL; 341 342 spin_lock(&rc->reloc_root_tree.lock); 343 rb_node = tree_search(&rc->reloc_root_tree.rb_root, bytenr); 344 if (rb_node) { 345 node = rb_entry(rb_node, struct mapping_node, rb_node); 346 root = (struct btrfs_root *)node->data; 347 } 348 spin_unlock(&rc->reloc_root_tree.lock); 349 return root; 350 } 351 352 static int is_cowonly_root(u64 root_objectid) 353 { 354 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 355 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 356 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 357 root_objectid == BTRFS_DEV_TREE_OBJECTID || 358 root_objectid == BTRFS_TREE_LOG_OBJECTID || 359 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 360 return 1; 361 return 0; 362 } 363 364 static struct btrfs_root *read_fs_root(struct btrfs_fs_info *fs_info, 365 u64 root_objectid) 366 { 367 struct btrfs_key key; 368 369 key.objectid = root_objectid; 370 key.type = BTRFS_ROOT_ITEM_KEY; 371 if (is_cowonly_root(root_objectid)) 372 key.offset = 0; 373 else 374 key.offset = (u64)-1; 375 376 return btrfs_read_fs_root_no_name(fs_info, &key); 377 } 378 379 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 380 static noinline_for_stack 381 struct btrfs_root *find_tree_root(struct reloc_control *rc, 382 struct extent_buffer *leaf, 383 struct btrfs_extent_ref_v0 *ref0) 384 { 385 struct btrfs_root *root; 386 u64 root_objectid = btrfs_ref_root_v0(leaf, ref0); 387 u64 generation = btrfs_ref_generation_v0(leaf, ref0); 388 389 BUG_ON(root_objectid == BTRFS_TREE_RELOC_OBJECTID); 390 391 root = read_fs_root(rc->extent_root->fs_info, root_objectid); 392 BUG_ON(IS_ERR(root)); 393 394 if (root->ref_cows && 395 generation != btrfs_root_generation(&root->root_item)) 396 return NULL; 397 398 return root; 399 } 400 #endif 401 402 static noinline_for_stack 403 int find_inline_backref(struct extent_buffer *leaf, int slot, 404 unsigned long *ptr, unsigned long *end) 405 { 406 struct btrfs_extent_item *ei; 407 struct btrfs_tree_block_info *bi; 408 u32 item_size; 409 410 item_size = btrfs_item_size_nr(leaf, slot); 411 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 412 if (item_size < sizeof(*ei)) { 413 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 414 return 1; 415 } 416 #endif 417 ei = btrfs_item_ptr(leaf, slot, struct btrfs_extent_item); 418 WARN_ON(!(btrfs_extent_flags(leaf, ei) & 419 BTRFS_EXTENT_FLAG_TREE_BLOCK)); 420 421 if (item_size <= sizeof(*ei) + sizeof(*bi)) { 422 WARN_ON(item_size < sizeof(*ei) + sizeof(*bi)); 423 return 1; 424 } 425 426 bi = (struct btrfs_tree_block_info *)(ei + 1); 427 *ptr = (unsigned long)(bi + 1); 428 *end = (unsigned long)ei + item_size; 429 return 0; 430 } 431 432 /* 433 * build backref tree for a given tree block. root of the backref tree 434 * corresponds the tree block, leaves of the backref tree correspond 435 * roots of b-trees that reference the tree block. 436 * 437 * the basic idea of this function is check backrefs of a given block 438 * to find upper level blocks that refernece the block, and then check 439 * bakcrefs of these upper level blocks recursively. the recursion stop 440 * when tree root is reached or backrefs for the block is cached. 441 * 442 * NOTE: if we find backrefs for a block are cached, we know backrefs 443 * for all upper level blocks that directly/indirectly reference the 444 * block are also cached. 445 */ 446 static struct backref_node *build_backref_tree(struct reloc_control *rc, 447 struct backref_cache *cache, 448 struct btrfs_key *node_key, 449 int level, u64 bytenr) 450 { 451 struct btrfs_path *path1; 452 struct btrfs_path *path2; 453 struct extent_buffer *eb; 454 struct btrfs_root *root; 455 struct backref_node *cur; 456 struct backref_node *upper; 457 struct backref_node *lower; 458 struct backref_node *node = NULL; 459 struct backref_node *exist = NULL; 460 struct backref_edge *edge; 461 struct rb_node *rb_node; 462 struct btrfs_key key; 463 unsigned long end; 464 unsigned long ptr; 465 LIST_HEAD(list); 466 int ret; 467 int err = 0; 468 469 path1 = btrfs_alloc_path(); 470 path2 = btrfs_alloc_path(); 471 if (!path1 || !path2) { 472 err = -ENOMEM; 473 goto out; 474 } 475 476 node = kmalloc(sizeof(*node), GFP_NOFS); 477 if (!node) { 478 err = -ENOMEM; 479 goto out; 480 } 481 482 backref_node_init(node); 483 node->bytenr = bytenr; 484 node->owner = 0; 485 node->level = level; 486 node->lowest = 1; 487 cur = node; 488 again: 489 end = 0; 490 ptr = 0; 491 key.objectid = cur->bytenr; 492 key.type = BTRFS_EXTENT_ITEM_KEY; 493 key.offset = (u64)-1; 494 495 path1->search_commit_root = 1; 496 path1->skip_locking = 1; 497 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path1, 498 0, 0); 499 if (ret < 0) { 500 err = ret; 501 goto out; 502 } 503 BUG_ON(!ret || !path1->slots[0]); 504 505 path1->slots[0]--; 506 507 WARN_ON(cur->checked); 508 if (!list_empty(&cur->upper)) { 509 /* 510 * the backref was added previously when processsing 511 * backref of type BTRFS_TREE_BLOCK_REF_KEY 512 */ 513 BUG_ON(!list_is_singular(&cur->upper)); 514 edge = list_entry(cur->upper.next, struct backref_edge, 515 list[LOWER]); 516 BUG_ON(!list_empty(&edge->list[UPPER])); 517 exist = edge->node[UPPER]; 518 /* 519 * add the upper level block to pending list if we need 520 * check its backrefs 521 */ 522 if (!exist->checked) 523 list_add_tail(&edge->list[UPPER], &list); 524 } else { 525 exist = NULL; 526 } 527 528 while (1) { 529 cond_resched(); 530 eb = path1->nodes[0]; 531 532 if (ptr >= end) { 533 if (path1->slots[0] >= btrfs_header_nritems(eb)) { 534 ret = btrfs_next_leaf(rc->extent_root, path1); 535 if (ret < 0) { 536 err = ret; 537 goto out; 538 } 539 if (ret > 0) 540 break; 541 eb = path1->nodes[0]; 542 } 543 544 btrfs_item_key_to_cpu(eb, &key, path1->slots[0]); 545 if (key.objectid != cur->bytenr) { 546 WARN_ON(exist); 547 break; 548 } 549 550 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 551 ret = find_inline_backref(eb, path1->slots[0], 552 &ptr, &end); 553 if (ret) 554 goto next; 555 } 556 } 557 558 if (ptr < end) { 559 /* update key for inline back ref */ 560 struct btrfs_extent_inline_ref *iref; 561 iref = (struct btrfs_extent_inline_ref *)ptr; 562 key.type = btrfs_extent_inline_ref_type(eb, iref); 563 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 564 WARN_ON(key.type != BTRFS_TREE_BLOCK_REF_KEY && 565 key.type != BTRFS_SHARED_BLOCK_REF_KEY); 566 } 567 568 if (exist && 569 ((key.type == BTRFS_TREE_BLOCK_REF_KEY && 570 exist->owner == key.offset) || 571 (key.type == BTRFS_SHARED_BLOCK_REF_KEY && 572 exist->bytenr == key.offset))) { 573 exist = NULL; 574 goto next; 575 } 576 577 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 578 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || 579 key.type == BTRFS_EXTENT_REF_V0_KEY) { 580 if (key.objectid == key.offset && 581 key.type == BTRFS_EXTENT_REF_V0_KEY) { 582 struct btrfs_extent_ref_v0 *ref0; 583 ref0 = btrfs_item_ptr(eb, path1->slots[0], 584 struct btrfs_extent_ref_v0); 585 root = find_tree_root(rc, eb, ref0); 586 if (root) 587 cur->root = root; 588 else 589 cur->old_root = 1; 590 break; 591 } 592 #else 593 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 594 if (key.type == BTRFS_SHARED_BLOCK_REF_KEY) { 595 #endif 596 if (key.objectid == key.offset) { 597 /* 598 * only root blocks of reloc trees use 599 * backref of this type. 600 */ 601 root = find_reloc_root(rc, cur->bytenr); 602 BUG_ON(!root); 603 cur->root = root; 604 break; 605 } 606 607 edge = kzalloc(sizeof(*edge), GFP_NOFS); 608 if (!edge) { 609 err = -ENOMEM; 610 goto out; 611 } 612 rb_node = tree_search(&cache->rb_root, key.offset); 613 if (!rb_node) { 614 upper = kmalloc(sizeof(*upper), GFP_NOFS); 615 if (!upper) { 616 kfree(edge); 617 err = -ENOMEM; 618 goto out; 619 } 620 backref_node_init(upper); 621 upper->bytenr = key.offset; 622 upper->owner = 0; 623 upper->level = cur->level + 1; 624 /* 625 * backrefs for the upper level block isn't 626 * cached, add the block to pending list 627 */ 628 list_add_tail(&edge->list[UPPER], &list); 629 } else { 630 upper = rb_entry(rb_node, struct backref_node, 631 rb_node); 632 INIT_LIST_HEAD(&edge->list[UPPER]); 633 } 634 list_add(&edge->list[LOWER], &cur->upper); 635 edge->node[UPPER] = upper; 636 edge->node[LOWER] = cur; 637 638 goto next; 639 } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { 640 goto next; 641 } 642 643 /* key.type == BTRFS_TREE_BLOCK_REF_KEY */ 644 root = read_fs_root(rc->extent_root->fs_info, key.offset); 645 if (IS_ERR(root)) { 646 err = PTR_ERR(root); 647 goto out; 648 } 649 650 if (btrfs_root_level(&root->root_item) == cur->level) { 651 /* tree root */ 652 BUG_ON(btrfs_root_bytenr(&root->root_item) != 653 cur->bytenr); 654 cur->root = root; 655 break; 656 } 657 658 level = cur->level + 1; 659 660 /* 661 * searching the tree to find upper level blocks 662 * reference the block. 663 */ 664 path2->search_commit_root = 1; 665 path2->skip_locking = 1; 666 path2->lowest_level = level; 667 ret = btrfs_search_slot(NULL, root, node_key, path2, 0, 0); 668 path2->lowest_level = 0; 669 if (ret < 0) { 670 err = ret; 671 goto out; 672 } 673 674 eb = path2->nodes[level]; 675 WARN_ON(btrfs_node_blockptr(eb, path2->slots[level]) != 676 cur->bytenr); 677 678 lower = cur; 679 for (; level < BTRFS_MAX_LEVEL; level++) { 680 if (!path2->nodes[level]) { 681 BUG_ON(btrfs_root_bytenr(&root->root_item) != 682 lower->bytenr); 683 lower->root = root; 684 break; 685 } 686 687 edge = kzalloc(sizeof(*edge), GFP_NOFS); 688 if (!edge) { 689 err = -ENOMEM; 690 goto out; 691 } 692 693 eb = path2->nodes[level]; 694 rb_node = tree_search(&cache->rb_root, eb->start); 695 if (!rb_node) { 696 upper = kmalloc(sizeof(*upper), GFP_NOFS); 697 if (!upper) { 698 kfree(edge); 699 err = -ENOMEM; 700 goto out; 701 } 702 backref_node_init(upper); 703 upper->bytenr = eb->start; 704 upper->owner = btrfs_header_owner(eb); 705 upper->level = lower->level + 1; 706 707 /* 708 * if we know the block isn't shared 709 * we can void checking its backrefs. 710 */ 711 if (btrfs_block_can_be_shared(root, eb)) 712 upper->checked = 0; 713 else 714 upper->checked = 1; 715 716 /* 717 * add the block to pending list if we 718 * need check its backrefs. only block 719 * at 'cur->level + 1' is added to the 720 * tail of pending list. this guarantees 721 * we check backrefs from lower level 722 * blocks to upper level blocks. 723 */ 724 if (!upper->checked && 725 level == cur->level + 1) { 726 list_add_tail(&edge->list[UPPER], 727 &list); 728 } else 729 INIT_LIST_HEAD(&edge->list[UPPER]); 730 } else { 731 upper = rb_entry(rb_node, struct backref_node, 732 rb_node); 733 BUG_ON(!upper->checked); 734 INIT_LIST_HEAD(&edge->list[UPPER]); 735 } 736 list_add_tail(&edge->list[LOWER], &lower->upper); 737 edge->node[UPPER] = upper; 738 edge->node[LOWER] = lower; 739 740 if (rb_node) 741 break; 742 lower = upper; 743 upper = NULL; 744 } 745 btrfs_release_path(root, path2); 746 next: 747 if (ptr < end) { 748 ptr += btrfs_extent_inline_ref_size(key.type); 749 if (ptr >= end) { 750 WARN_ON(ptr > end); 751 ptr = 0; 752 end = 0; 753 } 754 } 755 if (ptr >= end) 756 path1->slots[0]++; 757 } 758 btrfs_release_path(rc->extent_root, path1); 759 760 cur->checked = 1; 761 WARN_ON(exist); 762 763 /* the pending list isn't empty, take the first block to process */ 764 if (!list_empty(&list)) { 765 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 766 list_del_init(&edge->list[UPPER]); 767 cur = edge->node[UPPER]; 768 goto again; 769 } 770 771 /* 772 * everything goes well, connect backref nodes and insert backref nodes 773 * into the cache. 774 */ 775 BUG_ON(!node->checked); 776 rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); 777 BUG_ON(rb_node); 778 779 list_for_each_entry(edge, &node->upper, list[LOWER]) 780 list_add_tail(&edge->list[UPPER], &list); 781 782 while (!list_empty(&list)) { 783 edge = list_entry(list.next, struct backref_edge, list[UPPER]); 784 list_del_init(&edge->list[UPPER]); 785 upper = edge->node[UPPER]; 786 787 if (!RB_EMPTY_NODE(&upper->rb_node)) { 788 if (upper->lowest) { 789 list_del_init(&upper->lower); 790 upper->lowest = 0; 791 } 792 793 list_add_tail(&edge->list[UPPER], &upper->lower); 794 continue; 795 } 796 797 BUG_ON(!upper->checked); 798 rb_node = tree_insert(&cache->rb_root, upper->bytenr, 799 &upper->rb_node); 800 BUG_ON(rb_node); 801 802 list_add_tail(&edge->list[UPPER], &upper->lower); 803 804 list_for_each_entry(edge, &upper->upper, list[LOWER]) 805 list_add_tail(&edge->list[UPPER], &list); 806 } 807 out: 808 btrfs_free_path(path1); 809 btrfs_free_path(path2); 810 if (err) { 811 INIT_LIST_HEAD(&list); 812 upper = node; 813 while (upper) { 814 if (RB_EMPTY_NODE(&upper->rb_node)) { 815 list_splice_tail(&upper->upper, &list); 816 kfree(upper); 817 } 818 819 if (list_empty(&list)) 820 break; 821 822 edge = list_entry(list.next, struct backref_edge, 823 list[LOWER]); 824 upper = edge->node[UPPER]; 825 kfree(edge); 826 } 827 return ERR_PTR(err); 828 } 829 return node; 830 } 831 832 /* 833 * helper to add 'address of tree root -> reloc tree' mapping 834 */ 835 static int __add_reloc_root(struct btrfs_root *root) 836 { 837 struct rb_node *rb_node; 838 struct mapping_node *node; 839 struct reloc_control *rc = root->fs_info->reloc_ctl; 840 841 node = kmalloc(sizeof(*node), GFP_NOFS); 842 BUG_ON(!node); 843 844 node->bytenr = root->node->start; 845 node->data = root; 846 847 spin_lock(&rc->reloc_root_tree.lock); 848 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 849 node->bytenr, &node->rb_node); 850 spin_unlock(&rc->reloc_root_tree.lock); 851 BUG_ON(rb_node); 852 853 list_add_tail(&root->root_list, &rc->reloc_roots); 854 return 0; 855 } 856 857 /* 858 * helper to update/delete the 'address of tree root -> reloc tree' 859 * mapping 860 */ 861 static int __update_reloc_root(struct btrfs_root *root, int del) 862 { 863 struct rb_node *rb_node; 864 struct mapping_node *node = NULL; 865 struct reloc_control *rc = root->fs_info->reloc_ctl; 866 867 spin_lock(&rc->reloc_root_tree.lock); 868 rb_node = tree_search(&rc->reloc_root_tree.rb_root, 869 root->commit_root->start); 870 if (rb_node) { 871 node = rb_entry(rb_node, struct mapping_node, rb_node); 872 rb_erase(&node->rb_node, &rc->reloc_root_tree.rb_root); 873 } 874 spin_unlock(&rc->reloc_root_tree.lock); 875 876 BUG_ON((struct btrfs_root *)node->data != root); 877 878 if (!del) { 879 spin_lock(&rc->reloc_root_tree.lock); 880 node->bytenr = root->node->start; 881 rb_node = tree_insert(&rc->reloc_root_tree.rb_root, 882 node->bytenr, &node->rb_node); 883 spin_unlock(&rc->reloc_root_tree.lock); 884 BUG_ON(rb_node); 885 } else { 886 list_del_init(&root->root_list); 887 kfree(node); 888 } 889 return 0; 890 } 891 892 /* 893 * create reloc tree for a given fs tree. reloc tree is just a 894 * snapshot of the fs tree with special root objectid. 895 */ 896 int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, 897 struct btrfs_root *root) 898 { 899 struct btrfs_root *reloc_root; 900 struct extent_buffer *eb; 901 struct btrfs_root_item *root_item; 902 struct btrfs_key root_key; 903 int ret; 904 905 if (root->reloc_root) { 906 reloc_root = root->reloc_root; 907 reloc_root->last_trans = trans->transid; 908 return 0; 909 } 910 911 if (!root->fs_info->reloc_ctl || 912 !root->fs_info->reloc_ctl->create_reloc_root || 913 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 914 return 0; 915 916 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 917 BUG_ON(!root_item); 918 919 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 920 root_key.type = BTRFS_ROOT_ITEM_KEY; 921 root_key.offset = root->root_key.objectid; 922 923 ret = btrfs_copy_root(trans, root, root->commit_root, &eb, 924 BTRFS_TREE_RELOC_OBJECTID); 925 BUG_ON(ret); 926 927 btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); 928 memcpy(root_item, &root->root_item, sizeof(*root_item)); 929 btrfs_set_root_refs(root_item, 1); 930 btrfs_set_root_bytenr(root_item, eb->start); 931 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 932 btrfs_set_root_generation(root_item, trans->transid); 933 memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); 934 root_item->drop_level = 0; 935 936 btrfs_tree_unlock(eb); 937 free_extent_buffer(eb); 938 939 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 940 &root_key, root_item); 941 BUG_ON(ret); 942 kfree(root_item); 943 944 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 945 &root_key); 946 BUG_ON(IS_ERR(reloc_root)); 947 reloc_root->last_trans = trans->transid; 948 949 __add_reloc_root(reloc_root); 950 root->reloc_root = reloc_root; 951 return 0; 952 } 953 954 /* 955 * update root item of reloc tree 956 */ 957 int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, 958 struct btrfs_root *root) 959 { 960 struct btrfs_root *reloc_root; 961 struct btrfs_root_item *root_item; 962 int del = 0; 963 int ret; 964 965 if (!root->reloc_root) 966 return 0; 967 968 reloc_root = root->reloc_root; 969 root_item = &reloc_root->root_item; 970 971 if (btrfs_root_refs(root_item) == 0) { 972 root->reloc_root = NULL; 973 del = 1; 974 } 975 976 __update_reloc_root(reloc_root, del); 977 978 if (reloc_root->commit_root != reloc_root->node) { 979 btrfs_set_root_node(root_item, reloc_root->node); 980 free_extent_buffer(reloc_root->commit_root); 981 reloc_root->commit_root = btrfs_root_node(reloc_root); 982 } 983 984 ret = btrfs_update_root(trans, root->fs_info->tree_root, 985 &reloc_root->root_key, root_item); 986 BUG_ON(ret); 987 return 0; 988 } 989 990 /* 991 * helper to find first cached inode with inode number >= objectid 992 * in a subvolume 993 */ 994 static struct inode *find_next_inode(struct btrfs_root *root, u64 objectid) 995 { 996 struct rb_node *node; 997 struct rb_node *prev; 998 struct btrfs_inode *entry; 999 struct inode *inode; 1000 1001 spin_lock(&root->inode_lock); 1002 again: 1003 node = root->inode_tree.rb_node; 1004 prev = NULL; 1005 while (node) { 1006 prev = node; 1007 entry = rb_entry(node, struct btrfs_inode, rb_node); 1008 1009 if (objectid < entry->vfs_inode.i_ino) 1010 node = node->rb_left; 1011 else if (objectid > entry->vfs_inode.i_ino) 1012 node = node->rb_right; 1013 else 1014 break; 1015 } 1016 if (!node) { 1017 while (prev) { 1018 entry = rb_entry(prev, struct btrfs_inode, rb_node); 1019 if (objectid <= entry->vfs_inode.i_ino) { 1020 node = prev; 1021 break; 1022 } 1023 prev = rb_next(prev); 1024 } 1025 } 1026 while (node) { 1027 entry = rb_entry(node, struct btrfs_inode, rb_node); 1028 inode = igrab(&entry->vfs_inode); 1029 if (inode) { 1030 spin_unlock(&root->inode_lock); 1031 return inode; 1032 } 1033 1034 objectid = entry->vfs_inode.i_ino + 1; 1035 if (cond_resched_lock(&root->inode_lock)) 1036 goto again; 1037 1038 node = rb_next(node); 1039 } 1040 spin_unlock(&root->inode_lock); 1041 return NULL; 1042 } 1043 1044 static int in_block_group(u64 bytenr, 1045 struct btrfs_block_group_cache *block_group) 1046 { 1047 if (bytenr >= block_group->key.objectid && 1048 bytenr < block_group->key.objectid + block_group->key.offset) 1049 return 1; 1050 return 0; 1051 } 1052 1053 /* 1054 * get new location of data 1055 */ 1056 static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, 1057 u64 bytenr, u64 num_bytes) 1058 { 1059 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 1060 struct btrfs_path *path; 1061 struct btrfs_file_extent_item *fi; 1062 struct extent_buffer *leaf; 1063 int ret; 1064 1065 path = btrfs_alloc_path(); 1066 if (!path) 1067 return -ENOMEM; 1068 1069 bytenr -= BTRFS_I(reloc_inode)->index_cnt; 1070 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 1071 bytenr, 0); 1072 if (ret < 0) 1073 goto out; 1074 if (ret > 0) { 1075 ret = -ENOENT; 1076 goto out; 1077 } 1078 1079 leaf = path->nodes[0]; 1080 fi = btrfs_item_ptr(leaf, path->slots[0], 1081 struct btrfs_file_extent_item); 1082 1083 BUG_ON(btrfs_file_extent_offset(leaf, fi) || 1084 btrfs_file_extent_compression(leaf, fi) || 1085 btrfs_file_extent_encryption(leaf, fi) || 1086 btrfs_file_extent_other_encoding(leaf, fi)); 1087 1088 if (num_bytes != btrfs_file_extent_disk_num_bytes(leaf, fi)) { 1089 ret = 1; 1090 goto out; 1091 } 1092 1093 if (new_bytenr) 1094 *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1095 ret = 0; 1096 out: 1097 btrfs_free_path(path); 1098 return ret; 1099 } 1100 1101 /* 1102 * update file extent items in the tree leaf to point to 1103 * the new locations. 1104 */ 1105 static int replace_file_extents(struct btrfs_trans_handle *trans, 1106 struct reloc_control *rc, 1107 struct btrfs_root *root, 1108 struct extent_buffer *leaf, 1109 struct list_head *inode_list) 1110 { 1111 struct btrfs_key key; 1112 struct btrfs_file_extent_item *fi; 1113 struct inode *inode = NULL; 1114 struct inodevec *ivec = NULL; 1115 u64 parent; 1116 u64 bytenr; 1117 u64 new_bytenr; 1118 u64 num_bytes; 1119 u64 end; 1120 u32 nritems; 1121 u32 i; 1122 int ret; 1123 int first = 1; 1124 int dirty = 0; 1125 1126 if (rc->stage != UPDATE_DATA_PTRS) 1127 return 0; 1128 1129 /* reloc trees always use full backref */ 1130 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) 1131 parent = leaf->start; 1132 else 1133 parent = 0; 1134 1135 nritems = btrfs_header_nritems(leaf); 1136 for (i = 0; i < nritems; i++) { 1137 cond_resched(); 1138 btrfs_item_key_to_cpu(leaf, &key, i); 1139 if (key.type != BTRFS_EXTENT_DATA_KEY) 1140 continue; 1141 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 1142 if (btrfs_file_extent_type(leaf, fi) == 1143 BTRFS_FILE_EXTENT_INLINE) 1144 continue; 1145 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 1146 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 1147 if (bytenr == 0) 1148 continue; 1149 if (!in_block_group(bytenr, rc->block_group)) 1150 continue; 1151 1152 /* 1153 * if we are modifying block in fs tree, wait for readpage 1154 * to complete and drop the extent cache 1155 */ 1156 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1157 if (!ivec || ivec->nr == INODEVEC_SIZE) { 1158 ivec = kmalloc(sizeof(*ivec), GFP_NOFS); 1159 BUG_ON(!ivec); 1160 ivec->nr = 0; 1161 list_add_tail(&ivec->list, inode_list); 1162 } 1163 if (first) { 1164 inode = find_next_inode(root, key.objectid); 1165 if (inode) 1166 ivec->inode[ivec->nr++] = inode; 1167 first = 0; 1168 } else if (inode && inode->i_ino < key.objectid) { 1169 inode = find_next_inode(root, key.objectid); 1170 if (inode) 1171 ivec->inode[ivec->nr++] = inode; 1172 } 1173 if (inode && inode->i_ino == key.objectid) { 1174 end = key.offset + 1175 btrfs_file_extent_num_bytes(leaf, fi); 1176 WARN_ON(!IS_ALIGNED(key.offset, 1177 root->sectorsize)); 1178 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1179 end--; 1180 ret = try_lock_extent(&BTRFS_I(inode)->io_tree, 1181 key.offset, end, 1182 GFP_NOFS); 1183 if (!ret) 1184 continue; 1185 1186 btrfs_drop_extent_cache(inode, key.offset, end, 1187 1); 1188 unlock_extent(&BTRFS_I(inode)->io_tree, 1189 key.offset, end, GFP_NOFS); 1190 } 1191 } 1192 1193 ret = get_new_location(rc->data_inode, &new_bytenr, 1194 bytenr, num_bytes); 1195 if (ret > 0) 1196 continue; 1197 BUG_ON(ret < 0); 1198 1199 btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); 1200 dirty = 1; 1201 1202 key.offset -= btrfs_file_extent_offset(leaf, fi); 1203 ret = btrfs_inc_extent_ref(trans, root, new_bytenr, 1204 num_bytes, parent, 1205 btrfs_header_owner(leaf), 1206 key.objectid, key.offset); 1207 BUG_ON(ret); 1208 1209 ret = btrfs_free_extent(trans, root, bytenr, num_bytes, 1210 parent, btrfs_header_owner(leaf), 1211 key.objectid, key.offset); 1212 BUG_ON(ret); 1213 } 1214 if (dirty) 1215 btrfs_mark_buffer_dirty(leaf); 1216 return 0; 1217 } 1218 1219 static noinline_for_stack 1220 int memcmp_node_keys(struct extent_buffer *eb, int slot, 1221 struct btrfs_path *path, int level) 1222 { 1223 struct btrfs_disk_key key1; 1224 struct btrfs_disk_key key2; 1225 btrfs_node_key(eb, &key1, slot); 1226 btrfs_node_key(path->nodes[level], &key2, path->slots[level]); 1227 return memcmp(&key1, &key2, sizeof(key1)); 1228 } 1229 1230 /* 1231 * try to replace tree blocks in fs tree with the new blocks 1232 * in reloc tree. tree blocks haven't been modified since the 1233 * reloc tree was create can be replaced. 1234 * 1235 * if a block was replaced, level of the block + 1 is returned. 1236 * if no block got replaced, 0 is returned. if there are other 1237 * errors, a negative error number is returned. 1238 */ 1239 static int replace_path(struct btrfs_trans_handle *trans, 1240 struct btrfs_root *dest, struct btrfs_root *src, 1241 struct btrfs_path *path, struct btrfs_key *next_key, 1242 struct extent_buffer **leaf, 1243 int lowest_level, int max_level) 1244 { 1245 struct extent_buffer *eb; 1246 struct extent_buffer *parent; 1247 struct btrfs_key key; 1248 u64 old_bytenr; 1249 u64 new_bytenr; 1250 u64 old_ptr_gen; 1251 u64 new_ptr_gen; 1252 u64 last_snapshot; 1253 u32 blocksize; 1254 int level; 1255 int ret; 1256 int slot; 1257 1258 BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 1259 BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); 1260 BUG_ON(lowest_level > 1 && leaf); 1261 1262 last_snapshot = btrfs_root_last_snapshot(&src->root_item); 1263 1264 slot = path->slots[lowest_level]; 1265 btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); 1266 1267 eb = btrfs_lock_root_node(dest); 1268 btrfs_set_lock_blocking(eb); 1269 level = btrfs_header_level(eb); 1270 1271 if (level < lowest_level) { 1272 btrfs_tree_unlock(eb); 1273 free_extent_buffer(eb); 1274 return 0; 1275 } 1276 1277 ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); 1278 BUG_ON(ret); 1279 btrfs_set_lock_blocking(eb); 1280 1281 if (next_key) { 1282 next_key->objectid = (u64)-1; 1283 next_key->type = (u8)-1; 1284 next_key->offset = (u64)-1; 1285 } 1286 1287 parent = eb; 1288 while (1) { 1289 level = btrfs_header_level(parent); 1290 BUG_ON(level < lowest_level); 1291 1292 ret = btrfs_bin_search(parent, &key, level, &slot); 1293 if (ret && slot > 0) 1294 slot--; 1295 1296 if (next_key && slot + 1 < btrfs_header_nritems(parent)) 1297 btrfs_node_key_to_cpu(parent, next_key, slot + 1); 1298 1299 old_bytenr = btrfs_node_blockptr(parent, slot); 1300 blocksize = btrfs_level_size(dest, level - 1); 1301 old_ptr_gen = btrfs_node_ptr_generation(parent, slot); 1302 1303 if (level <= max_level) { 1304 eb = path->nodes[level]; 1305 new_bytenr = btrfs_node_blockptr(eb, 1306 path->slots[level]); 1307 new_ptr_gen = btrfs_node_ptr_generation(eb, 1308 path->slots[level]); 1309 } else { 1310 new_bytenr = 0; 1311 new_ptr_gen = 0; 1312 } 1313 1314 if (new_bytenr > 0 && new_bytenr == old_bytenr) { 1315 WARN_ON(1); 1316 ret = level; 1317 break; 1318 } 1319 1320 if (new_bytenr == 0 || old_ptr_gen > last_snapshot || 1321 memcmp_node_keys(parent, slot, path, level)) { 1322 if (level <= lowest_level && !leaf) { 1323 ret = 0; 1324 break; 1325 } 1326 1327 eb = read_tree_block(dest, old_bytenr, blocksize, 1328 old_ptr_gen); 1329 btrfs_tree_lock(eb); 1330 ret = btrfs_cow_block(trans, dest, eb, parent, 1331 slot, &eb); 1332 BUG_ON(ret); 1333 btrfs_set_lock_blocking(eb); 1334 1335 if (level <= lowest_level) { 1336 *leaf = eb; 1337 ret = 0; 1338 break; 1339 } 1340 1341 btrfs_tree_unlock(parent); 1342 free_extent_buffer(parent); 1343 1344 parent = eb; 1345 continue; 1346 } 1347 1348 btrfs_node_key_to_cpu(path->nodes[level], &key, 1349 path->slots[level]); 1350 btrfs_release_path(src, path); 1351 1352 path->lowest_level = level; 1353 ret = btrfs_search_slot(trans, src, &key, path, 0, 1); 1354 path->lowest_level = 0; 1355 BUG_ON(ret); 1356 1357 /* 1358 * swap blocks in fs tree and reloc tree. 1359 */ 1360 btrfs_set_node_blockptr(parent, slot, new_bytenr); 1361 btrfs_set_node_ptr_generation(parent, slot, new_ptr_gen); 1362 btrfs_mark_buffer_dirty(parent); 1363 1364 btrfs_set_node_blockptr(path->nodes[level], 1365 path->slots[level], old_bytenr); 1366 btrfs_set_node_ptr_generation(path->nodes[level], 1367 path->slots[level], old_ptr_gen); 1368 btrfs_mark_buffer_dirty(path->nodes[level]); 1369 1370 ret = btrfs_inc_extent_ref(trans, src, old_bytenr, blocksize, 1371 path->nodes[level]->start, 1372 src->root_key.objectid, level - 1, 0); 1373 BUG_ON(ret); 1374 ret = btrfs_inc_extent_ref(trans, dest, new_bytenr, blocksize, 1375 0, dest->root_key.objectid, level - 1, 1376 0); 1377 BUG_ON(ret); 1378 1379 ret = btrfs_free_extent(trans, src, new_bytenr, blocksize, 1380 path->nodes[level]->start, 1381 src->root_key.objectid, level - 1, 0); 1382 BUG_ON(ret); 1383 1384 ret = btrfs_free_extent(trans, dest, old_bytenr, blocksize, 1385 0, dest->root_key.objectid, level - 1, 1386 0); 1387 BUG_ON(ret); 1388 1389 btrfs_unlock_up_safe(path, 0); 1390 1391 ret = level; 1392 break; 1393 } 1394 btrfs_tree_unlock(parent); 1395 free_extent_buffer(parent); 1396 return ret; 1397 } 1398 1399 /* 1400 * helper to find next relocated block in reloc tree 1401 */ 1402 static noinline_for_stack 1403 int walk_up_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1404 int *level) 1405 { 1406 struct extent_buffer *eb; 1407 int i; 1408 u64 last_snapshot; 1409 u32 nritems; 1410 1411 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1412 1413 for (i = 0; i < *level; i++) { 1414 free_extent_buffer(path->nodes[i]); 1415 path->nodes[i] = NULL; 1416 } 1417 1418 for (i = *level; i < BTRFS_MAX_LEVEL && path->nodes[i]; i++) { 1419 eb = path->nodes[i]; 1420 nritems = btrfs_header_nritems(eb); 1421 while (path->slots[i] + 1 < nritems) { 1422 path->slots[i]++; 1423 if (btrfs_node_ptr_generation(eb, path->slots[i]) <= 1424 last_snapshot) 1425 continue; 1426 1427 *level = i; 1428 return 0; 1429 } 1430 free_extent_buffer(path->nodes[i]); 1431 path->nodes[i] = NULL; 1432 } 1433 return 1; 1434 } 1435 1436 /* 1437 * walk down reloc tree to find relocated block of lowest level 1438 */ 1439 static noinline_for_stack 1440 int walk_down_reloc_tree(struct btrfs_root *root, struct btrfs_path *path, 1441 int *level) 1442 { 1443 struct extent_buffer *eb = NULL; 1444 int i; 1445 u64 bytenr; 1446 u64 ptr_gen = 0; 1447 u64 last_snapshot; 1448 u32 blocksize; 1449 u32 nritems; 1450 1451 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1452 1453 for (i = *level; i > 0; i--) { 1454 eb = path->nodes[i]; 1455 nritems = btrfs_header_nritems(eb); 1456 while (path->slots[i] < nritems) { 1457 ptr_gen = btrfs_node_ptr_generation(eb, path->slots[i]); 1458 if (ptr_gen > last_snapshot) 1459 break; 1460 path->slots[i]++; 1461 } 1462 if (path->slots[i] >= nritems) { 1463 if (i == *level) 1464 break; 1465 *level = i + 1; 1466 return 0; 1467 } 1468 if (i == 1) { 1469 *level = i; 1470 return 0; 1471 } 1472 1473 bytenr = btrfs_node_blockptr(eb, path->slots[i]); 1474 blocksize = btrfs_level_size(root, i - 1); 1475 eb = read_tree_block(root, bytenr, blocksize, ptr_gen); 1476 BUG_ON(btrfs_header_level(eb) != i - 1); 1477 path->nodes[i - 1] = eb; 1478 path->slots[i - 1] = 0; 1479 } 1480 return 1; 1481 } 1482 1483 /* 1484 * invalidate extent cache for file extents whose key in range of 1485 * [min_key, max_key) 1486 */ 1487 static int invalidate_extent_cache(struct btrfs_root *root, 1488 struct btrfs_key *min_key, 1489 struct btrfs_key *max_key) 1490 { 1491 struct inode *inode = NULL; 1492 u64 objectid; 1493 u64 start, end; 1494 1495 objectid = min_key->objectid; 1496 while (1) { 1497 cond_resched(); 1498 iput(inode); 1499 1500 if (objectid > max_key->objectid) 1501 break; 1502 1503 inode = find_next_inode(root, objectid); 1504 if (!inode) 1505 break; 1506 1507 if (inode->i_ino > max_key->objectid) { 1508 iput(inode); 1509 break; 1510 } 1511 1512 objectid = inode->i_ino + 1; 1513 if (!S_ISREG(inode->i_mode)) 1514 continue; 1515 1516 if (unlikely(min_key->objectid == inode->i_ino)) { 1517 if (min_key->type > BTRFS_EXTENT_DATA_KEY) 1518 continue; 1519 if (min_key->type < BTRFS_EXTENT_DATA_KEY) 1520 start = 0; 1521 else { 1522 start = min_key->offset; 1523 WARN_ON(!IS_ALIGNED(start, root->sectorsize)); 1524 } 1525 } else { 1526 start = 0; 1527 } 1528 1529 if (unlikely(max_key->objectid == inode->i_ino)) { 1530 if (max_key->type < BTRFS_EXTENT_DATA_KEY) 1531 continue; 1532 if (max_key->type > BTRFS_EXTENT_DATA_KEY) { 1533 end = (u64)-1; 1534 } else { 1535 if (max_key->offset == 0) 1536 continue; 1537 end = max_key->offset; 1538 WARN_ON(!IS_ALIGNED(end, root->sectorsize)); 1539 end--; 1540 } 1541 } else { 1542 end = (u64)-1; 1543 } 1544 1545 /* the lock_extent waits for readpage to complete */ 1546 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1547 btrfs_drop_extent_cache(inode, start, end, 1); 1548 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 1549 } 1550 return 0; 1551 } 1552 1553 static int find_next_key(struct btrfs_path *path, int level, 1554 struct btrfs_key *key) 1555 1556 { 1557 while (level < BTRFS_MAX_LEVEL) { 1558 if (!path->nodes[level]) 1559 break; 1560 if (path->slots[level] + 1 < 1561 btrfs_header_nritems(path->nodes[level])) { 1562 btrfs_node_key_to_cpu(path->nodes[level], key, 1563 path->slots[level] + 1); 1564 return 0; 1565 } 1566 level++; 1567 } 1568 return 1; 1569 } 1570 1571 /* 1572 * merge the relocated tree blocks in reloc tree with corresponding 1573 * fs tree. 1574 */ 1575 static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, 1576 struct btrfs_root *root) 1577 { 1578 LIST_HEAD(inode_list); 1579 struct btrfs_key key; 1580 struct btrfs_key next_key; 1581 struct btrfs_trans_handle *trans; 1582 struct btrfs_root *reloc_root; 1583 struct btrfs_root_item *root_item; 1584 struct btrfs_path *path; 1585 struct extent_buffer *leaf = NULL; 1586 unsigned long nr; 1587 int level; 1588 int max_level; 1589 int replaced = 0; 1590 int ret; 1591 int err = 0; 1592 1593 path = btrfs_alloc_path(); 1594 if (!path) 1595 return -ENOMEM; 1596 1597 reloc_root = root->reloc_root; 1598 root_item = &reloc_root->root_item; 1599 1600 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 1601 level = btrfs_root_level(root_item); 1602 extent_buffer_get(reloc_root->node); 1603 path->nodes[level] = reloc_root->node; 1604 path->slots[level] = 0; 1605 } else { 1606 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 1607 1608 level = root_item->drop_level; 1609 BUG_ON(level == 0); 1610 path->lowest_level = level; 1611 ret = btrfs_search_slot(NULL, reloc_root, &key, path, 0, 0); 1612 if (ret < 0) { 1613 btrfs_free_path(path); 1614 return ret; 1615 } 1616 1617 btrfs_node_key_to_cpu(path->nodes[level], &next_key, 1618 path->slots[level]); 1619 WARN_ON(memcmp(&key, &next_key, sizeof(key))); 1620 1621 btrfs_unlock_up_safe(path, 0); 1622 } 1623 1624 if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { 1625 trans = btrfs_start_transaction(root, 1); 1626 1627 leaf = path->nodes[0]; 1628 btrfs_item_key_to_cpu(leaf, &key, 0); 1629 btrfs_release_path(reloc_root, path); 1630 1631 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 1632 if (ret < 0) { 1633 err = ret; 1634 goto out; 1635 } 1636 1637 leaf = path->nodes[0]; 1638 btrfs_unlock_up_safe(path, 1); 1639 ret = replace_file_extents(trans, rc, root, leaf, 1640 &inode_list); 1641 if (ret < 0) 1642 err = ret; 1643 goto out; 1644 } 1645 1646 memset(&next_key, 0, sizeof(next_key)); 1647 1648 while (1) { 1649 leaf = NULL; 1650 replaced = 0; 1651 trans = btrfs_start_transaction(root, 1); 1652 max_level = level; 1653 1654 ret = walk_down_reloc_tree(reloc_root, path, &level); 1655 if (ret < 0) { 1656 err = ret; 1657 goto out; 1658 } 1659 if (ret > 0) 1660 break; 1661 1662 if (!find_next_key(path, level, &key) && 1663 btrfs_comp_cpu_keys(&next_key, &key) >= 0) { 1664 ret = 0; 1665 } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { 1666 ret = replace_path(trans, root, reloc_root, 1667 path, &next_key, &leaf, 1668 level, max_level); 1669 } else { 1670 ret = replace_path(trans, root, reloc_root, 1671 path, &next_key, NULL, 1672 level, max_level); 1673 } 1674 if (ret < 0) { 1675 err = ret; 1676 goto out; 1677 } 1678 1679 if (ret > 0) { 1680 level = ret; 1681 btrfs_node_key_to_cpu(path->nodes[level], &key, 1682 path->slots[level]); 1683 replaced = 1; 1684 } else if (leaf) { 1685 /* 1686 * no block got replaced, try replacing file extents 1687 */ 1688 btrfs_item_key_to_cpu(leaf, &key, 0); 1689 ret = replace_file_extents(trans, rc, root, leaf, 1690 &inode_list); 1691 btrfs_tree_unlock(leaf); 1692 free_extent_buffer(leaf); 1693 BUG_ON(ret < 0); 1694 } 1695 1696 ret = walk_up_reloc_tree(reloc_root, path, &level); 1697 if (ret > 0) 1698 break; 1699 1700 BUG_ON(level == 0); 1701 /* 1702 * save the merging progress in the drop_progress. 1703 * this is OK since root refs == 1 in this case. 1704 */ 1705 btrfs_node_key(path->nodes[level], &root_item->drop_progress, 1706 path->slots[level]); 1707 root_item->drop_level = level; 1708 1709 nr = trans->blocks_used; 1710 btrfs_end_transaction(trans, root); 1711 1712 btrfs_btree_balance_dirty(root, nr); 1713 1714 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1715 invalidate_extent_cache(root, &key, &next_key); 1716 } 1717 1718 /* 1719 * handle the case only one block in the fs tree need to be 1720 * relocated and the block is tree root. 1721 */ 1722 leaf = btrfs_lock_root_node(root); 1723 ret = btrfs_cow_block(trans, root, leaf, NULL, 0, &leaf); 1724 btrfs_tree_unlock(leaf); 1725 free_extent_buffer(leaf); 1726 if (ret < 0) 1727 err = ret; 1728 out: 1729 btrfs_free_path(path); 1730 1731 if (err == 0) { 1732 memset(&root_item->drop_progress, 0, 1733 sizeof(root_item->drop_progress)); 1734 root_item->drop_level = 0; 1735 btrfs_set_root_refs(root_item, 0); 1736 } 1737 1738 nr = trans->blocks_used; 1739 btrfs_end_transaction(trans, root); 1740 1741 btrfs_btree_balance_dirty(root, nr); 1742 1743 /* 1744 * put inodes while we aren't holding the tree locks 1745 */ 1746 while (!list_empty(&inode_list)) { 1747 struct inodevec *ivec; 1748 ivec = list_entry(inode_list.next, struct inodevec, list); 1749 list_del(&ivec->list); 1750 while (ivec->nr > 0) { 1751 ivec->nr--; 1752 iput(ivec->inode[ivec->nr]); 1753 } 1754 kfree(ivec); 1755 } 1756 1757 if (replaced && rc->stage == UPDATE_DATA_PTRS) 1758 invalidate_extent_cache(root, &key, &next_key); 1759 1760 return err; 1761 } 1762 1763 /* 1764 * callback for the work threads. 1765 * this function merges reloc tree with corresponding fs tree, 1766 * and then drops the reloc tree. 1767 */ 1768 static void merge_func(struct btrfs_work *work) 1769 { 1770 struct btrfs_trans_handle *trans; 1771 struct btrfs_root *root; 1772 struct btrfs_root *reloc_root; 1773 struct async_merge *async; 1774 1775 async = container_of(work, struct async_merge, work); 1776 reloc_root = async->root; 1777 1778 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 1779 root = read_fs_root(reloc_root->fs_info, 1780 reloc_root->root_key.offset); 1781 BUG_ON(IS_ERR(root)); 1782 BUG_ON(root->reloc_root != reloc_root); 1783 1784 merge_reloc_root(async->rc, root); 1785 1786 trans = btrfs_start_transaction(root, 1); 1787 btrfs_update_reloc_root(trans, root); 1788 btrfs_end_transaction(trans, root); 1789 } 1790 1791 btrfs_drop_snapshot(reloc_root, 0); 1792 1793 if (atomic_dec_and_test(async->num_pending)) 1794 complete(async->done); 1795 1796 kfree(async); 1797 } 1798 1799 static int merge_reloc_roots(struct reloc_control *rc) 1800 { 1801 struct async_merge *async; 1802 struct btrfs_root *root; 1803 struct completion done; 1804 atomic_t num_pending; 1805 1806 init_completion(&done); 1807 atomic_set(&num_pending, 1); 1808 1809 while (!list_empty(&rc->reloc_roots)) { 1810 root = list_entry(rc->reloc_roots.next, 1811 struct btrfs_root, root_list); 1812 list_del_init(&root->root_list); 1813 1814 async = kmalloc(sizeof(*async), GFP_NOFS); 1815 BUG_ON(!async); 1816 async->work.func = merge_func; 1817 async->work.flags = 0; 1818 async->rc = rc; 1819 async->root = root; 1820 async->done = &done; 1821 async->num_pending = &num_pending; 1822 atomic_inc(&num_pending); 1823 btrfs_queue_worker(&rc->workers, &async->work); 1824 } 1825 1826 if (!atomic_dec_and_test(&num_pending)) 1827 wait_for_completion(&done); 1828 1829 BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); 1830 return 0; 1831 } 1832 1833 static void free_block_list(struct rb_root *blocks) 1834 { 1835 struct tree_block *block; 1836 struct rb_node *rb_node; 1837 while ((rb_node = rb_first(blocks))) { 1838 block = rb_entry(rb_node, struct tree_block, rb_node); 1839 rb_erase(rb_node, blocks); 1840 kfree(block); 1841 } 1842 } 1843 1844 static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, 1845 struct btrfs_root *reloc_root) 1846 { 1847 struct btrfs_root *root; 1848 1849 if (reloc_root->last_trans == trans->transid) 1850 return 0; 1851 1852 root = read_fs_root(reloc_root->fs_info, reloc_root->root_key.offset); 1853 BUG_ON(IS_ERR(root)); 1854 BUG_ON(root->reloc_root != reloc_root); 1855 1856 return btrfs_record_root_in_trans(trans, root); 1857 } 1858 1859 /* 1860 * select one tree from trees that references the block. 1861 * for blocks in refernce counted trees, we preper reloc tree. 1862 * if no reloc tree found and reloc_only is true, NULL is returned. 1863 */ 1864 static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, 1865 struct backref_node *node, 1866 struct backref_edge *edges[], 1867 int *nr, int reloc_only) 1868 { 1869 struct backref_node *next; 1870 struct btrfs_root *root; 1871 int index; 1872 int loop = 0; 1873 again: 1874 index = 0; 1875 next = node; 1876 while (1) { 1877 cond_resched(); 1878 next = walk_up_backref(next, edges, &index); 1879 root = next->root; 1880 if (!root) { 1881 BUG_ON(!node->old_root); 1882 goto skip; 1883 } 1884 1885 /* no other choice for non-refernce counted tree */ 1886 if (!root->ref_cows) { 1887 BUG_ON(reloc_only); 1888 break; 1889 } 1890 1891 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 1892 record_reloc_root_in_trans(trans, root); 1893 break; 1894 } 1895 1896 if (loop) { 1897 btrfs_record_root_in_trans(trans, root); 1898 break; 1899 } 1900 1901 if (reloc_only || next != node) { 1902 if (!root->reloc_root) 1903 btrfs_record_root_in_trans(trans, root); 1904 root = root->reloc_root; 1905 /* 1906 * if the reloc tree was created in current 1907 * transation, there is no node in backref tree 1908 * corresponds to the root of the reloc tree. 1909 */ 1910 if (btrfs_root_last_snapshot(&root->root_item) == 1911 trans->transid - 1) 1912 break; 1913 } 1914 skip: 1915 root = NULL; 1916 next = walk_down_backref(edges, &index); 1917 if (!next || next->level <= node->level) 1918 break; 1919 } 1920 1921 if (!root && !loop && !reloc_only) { 1922 loop = 1; 1923 goto again; 1924 } 1925 1926 if (root) 1927 *nr = index; 1928 else 1929 *nr = 0; 1930 1931 return root; 1932 } 1933 1934 static noinline_for_stack 1935 struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, 1936 struct backref_node *node) 1937 { 1938 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 1939 int nr; 1940 return __select_one_root(trans, node, edges, &nr, 0); 1941 } 1942 1943 static noinline_for_stack 1944 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, 1945 struct backref_node *node, 1946 struct backref_edge *edges[], int *nr) 1947 { 1948 return __select_one_root(trans, node, edges, nr, 1); 1949 } 1950 1951 static void grab_path_buffers(struct btrfs_path *path, 1952 struct backref_node *node, 1953 struct backref_edge *edges[], int nr) 1954 { 1955 int i = 0; 1956 while (1) { 1957 drop_node_buffer(node); 1958 node->eb = path->nodes[node->level]; 1959 BUG_ON(!node->eb); 1960 if (path->locks[node->level]) 1961 node->locked = 1; 1962 path->nodes[node->level] = NULL; 1963 path->locks[node->level] = 0; 1964 1965 if (i >= nr) 1966 break; 1967 1968 edges[i]->blockptr = node->eb->start; 1969 node = edges[i]->node[UPPER]; 1970 i++; 1971 } 1972 } 1973 1974 /* 1975 * relocate a block tree, and then update pointers in upper level 1976 * blocks that reference the block to point to the new location. 1977 * 1978 * if called by link_to_upper, the block has already been relocated. 1979 * in that case this function just updates pointers. 1980 */ 1981 static int do_relocation(struct btrfs_trans_handle *trans, 1982 struct backref_node *node, 1983 struct btrfs_key *key, 1984 struct btrfs_path *path, int lowest) 1985 { 1986 struct backref_node *upper; 1987 struct backref_edge *edge; 1988 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 1989 struct btrfs_root *root; 1990 struct extent_buffer *eb; 1991 u32 blocksize; 1992 u64 bytenr; 1993 u64 generation; 1994 int nr; 1995 int slot; 1996 int ret; 1997 int err = 0; 1998 1999 BUG_ON(lowest && node->eb); 2000 2001 path->lowest_level = node->level + 1; 2002 list_for_each_entry(edge, &node->upper, list[LOWER]) { 2003 cond_resched(); 2004 if (node->eb && node->eb->start == edge->blockptr) 2005 continue; 2006 2007 upper = edge->node[UPPER]; 2008 root = select_reloc_root(trans, upper, edges, &nr); 2009 if (!root) 2010 continue; 2011 2012 if (upper->eb && !upper->locked) 2013 drop_node_buffer(upper); 2014 2015 if (!upper->eb) { 2016 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2017 if (ret < 0) { 2018 err = ret; 2019 break; 2020 } 2021 BUG_ON(ret > 0); 2022 2023 slot = path->slots[upper->level]; 2024 2025 btrfs_unlock_up_safe(path, upper->level + 1); 2026 grab_path_buffers(path, upper, edges, nr); 2027 2028 btrfs_release_path(NULL, path); 2029 } else { 2030 ret = btrfs_bin_search(upper->eb, key, upper->level, 2031 &slot); 2032 BUG_ON(ret); 2033 } 2034 2035 bytenr = btrfs_node_blockptr(upper->eb, slot); 2036 if (!lowest) { 2037 if (node->eb->start == bytenr) { 2038 btrfs_tree_unlock(upper->eb); 2039 upper->locked = 0; 2040 continue; 2041 } 2042 } else { 2043 BUG_ON(node->bytenr != bytenr); 2044 } 2045 2046 blocksize = btrfs_level_size(root, node->level); 2047 generation = btrfs_node_ptr_generation(upper->eb, slot); 2048 eb = read_tree_block(root, bytenr, blocksize, generation); 2049 btrfs_tree_lock(eb); 2050 btrfs_set_lock_blocking(eb); 2051 2052 if (!node->eb) { 2053 ret = btrfs_cow_block(trans, root, eb, upper->eb, 2054 slot, &eb); 2055 if (ret < 0) { 2056 err = ret; 2057 break; 2058 } 2059 btrfs_set_lock_blocking(eb); 2060 node->eb = eb; 2061 node->locked = 1; 2062 } else { 2063 btrfs_set_node_blockptr(upper->eb, slot, 2064 node->eb->start); 2065 btrfs_set_node_ptr_generation(upper->eb, slot, 2066 trans->transid); 2067 btrfs_mark_buffer_dirty(upper->eb); 2068 2069 ret = btrfs_inc_extent_ref(trans, root, 2070 node->eb->start, blocksize, 2071 upper->eb->start, 2072 btrfs_header_owner(upper->eb), 2073 node->level, 0); 2074 BUG_ON(ret); 2075 2076 ret = btrfs_drop_subtree(trans, root, eb, upper->eb); 2077 BUG_ON(ret); 2078 } 2079 if (!lowest) { 2080 btrfs_tree_unlock(upper->eb); 2081 upper->locked = 0; 2082 } 2083 } 2084 path->lowest_level = 0; 2085 return err; 2086 } 2087 2088 static int link_to_upper(struct btrfs_trans_handle *trans, 2089 struct backref_node *node, 2090 struct btrfs_path *path) 2091 { 2092 struct btrfs_key key; 2093 if (!node->eb || list_empty(&node->upper)) 2094 return 0; 2095 2096 btrfs_node_key_to_cpu(node->eb, &key, 0); 2097 return do_relocation(trans, node, &key, path, 0); 2098 } 2099 2100 static int finish_pending_nodes(struct btrfs_trans_handle *trans, 2101 struct backref_cache *cache, 2102 struct btrfs_path *path) 2103 { 2104 struct backref_node *node; 2105 int level; 2106 int ret; 2107 int err = 0; 2108 2109 for (level = 0; level < BTRFS_MAX_LEVEL; level++) { 2110 while (!list_empty(&cache->pending[level])) { 2111 node = list_entry(cache->pending[level].next, 2112 struct backref_node, lower); 2113 BUG_ON(node->level != level); 2114 2115 ret = link_to_upper(trans, node, path); 2116 if (ret < 0) 2117 err = ret; 2118 /* 2119 * this remove the node from the pending list and 2120 * may add some other nodes to the level + 1 2121 * pending list 2122 */ 2123 remove_backref_node(cache, node); 2124 } 2125 } 2126 BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); 2127 return err; 2128 } 2129 2130 static void mark_block_processed(struct reloc_control *rc, 2131 struct backref_node *node) 2132 { 2133 u32 blocksize; 2134 if (node->level == 0 || 2135 in_block_group(node->bytenr, rc->block_group)) { 2136 blocksize = btrfs_level_size(rc->extent_root, node->level); 2137 set_extent_bits(&rc->processed_blocks, node->bytenr, 2138 node->bytenr + blocksize - 1, EXTENT_DIRTY, 2139 GFP_NOFS); 2140 } 2141 node->processed = 1; 2142 } 2143 2144 /* 2145 * mark a block and all blocks directly/indirectly reference the block 2146 * as processed. 2147 */ 2148 static void update_processed_blocks(struct reloc_control *rc, 2149 struct backref_node *node) 2150 { 2151 struct backref_node *next = node; 2152 struct backref_edge *edge; 2153 struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; 2154 int index = 0; 2155 2156 while (next) { 2157 cond_resched(); 2158 while (1) { 2159 if (next->processed) 2160 break; 2161 2162 mark_block_processed(rc, next); 2163 2164 if (list_empty(&next->upper)) 2165 break; 2166 2167 edge = list_entry(next->upper.next, 2168 struct backref_edge, list[LOWER]); 2169 edges[index++] = edge; 2170 next = edge->node[UPPER]; 2171 } 2172 next = walk_down_backref(edges, &index); 2173 } 2174 } 2175 2176 static int tree_block_processed(u64 bytenr, u32 blocksize, 2177 struct reloc_control *rc) 2178 { 2179 if (test_range_bit(&rc->processed_blocks, bytenr, 2180 bytenr + blocksize - 1, EXTENT_DIRTY, 1)) 2181 return 1; 2182 return 0; 2183 } 2184 2185 /* 2186 * check if there are any file extent pointers in the leaf point to 2187 * data require processing 2188 */ 2189 static int check_file_extents(struct reloc_control *rc, 2190 u64 bytenr, u32 blocksize, u64 ptr_gen) 2191 { 2192 struct btrfs_key found_key; 2193 struct btrfs_file_extent_item *fi; 2194 struct extent_buffer *leaf; 2195 u32 nritems; 2196 int i; 2197 int ret = 0; 2198 2199 leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); 2200 2201 nritems = btrfs_header_nritems(leaf); 2202 for (i = 0; i < nritems; i++) { 2203 cond_resched(); 2204 btrfs_item_key_to_cpu(leaf, &found_key, i); 2205 if (found_key.type != BTRFS_EXTENT_DATA_KEY) 2206 continue; 2207 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 2208 if (btrfs_file_extent_type(leaf, fi) == 2209 BTRFS_FILE_EXTENT_INLINE) 2210 continue; 2211 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 2212 if (bytenr == 0) 2213 continue; 2214 if (in_block_group(bytenr, rc->block_group)) { 2215 ret = 1; 2216 break; 2217 } 2218 } 2219 free_extent_buffer(leaf); 2220 return ret; 2221 } 2222 2223 /* 2224 * scan child blocks of a given block to find blocks require processing 2225 */ 2226 static int add_child_blocks(struct btrfs_trans_handle *trans, 2227 struct reloc_control *rc, 2228 struct backref_node *node, 2229 struct rb_root *blocks) 2230 { 2231 struct tree_block *block; 2232 struct rb_node *rb_node; 2233 u64 bytenr; 2234 u64 ptr_gen; 2235 u32 blocksize; 2236 u32 nritems; 2237 int i; 2238 int err = 0; 2239 2240 nritems = btrfs_header_nritems(node->eb); 2241 blocksize = btrfs_level_size(rc->extent_root, node->level - 1); 2242 for (i = 0; i < nritems; i++) { 2243 cond_resched(); 2244 bytenr = btrfs_node_blockptr(node->eb, i); 2245 ptr_gen = btrfs_node_ptr_generation(node->eb, i); 2246 if (ptr_gen == trans->transid) 2247 continue; 2248 if (!in_block_group(bytenr, rc->block_group) && 2249 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) 2250 continue; 2251 if (tree_block_processed(bytenr, blocksize, rc)) 2252 continue; 2253 2254 readahead_tree_block(rc->extent_root, 2255 bytenr, blocksize, ptr_gen); 2256 } 2257 2258 for (i = 0; i < nritems; i++) { 2259 cond_resched(); 2260 bytenr = btrfs_node_blockptr(node->eb, i); 2261 ptr_gen = btrfs_node_ptr_generation(node->eb, i); 2262 if (ptr_gen == trans->transid) 2263 continue; 2264 if (!in_block_group(bytenr, rc->block_group) && 2265 (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) 2266 continue; 2267 if (tree_block_processed(bytenr, blocksize, rc)) 2268 continue; 2269 if (!in_block_group(bytenr, rc->block_group) && 2270 !check_file_extents(rc, bytenr, blocksize, ptr_gen)) 2271 continue; 2272 2273 block = kmalloc(sizeof(*block), GFP_NOFS); 2274 if (!block) { 2275 err = -ENOMEM; 2276 break; 2277 } 2278 block->bytenr = bytenr; 2279 btrfs_node_key_to_cpu(node->eb, &block->key, i); 2280 block->level = node->level - 1; 2281 block->key_ready = 1; 2282 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 2283 BUG_ON(rb_node); 2284 } 2285 if (err) 2286 free_block_list(blocks); 2287 return err; 2288 } 2289 2290 /* 2291 * find adjacent blocks require processing 2292 */ 2293 static noinline_for_stack 2294 int add_adjacent_blocks(struct btrfs_trans_handle *trans, 2295 struct reloc_control *rc, 2296 struct backref_cache *cache, 2297 struct rb_root *blocks, int level, 2298 struct backref_node **upper) 2299 { 2300 struct backref_node *node; 2301 int ret = 0; 2302 2303 WARN_ON(!list_empty(&cache->pending[level])); 2304 2305 if (list_empty(&cache->pending[level + 1])) 2306 return 1; 2307 2308 node = list_entry(cache->pending[level + 1].next, 2309 struct backref_node, lower); 2310 if (node->eb) 2311 ret = add_child_blocks(trans, rc, node, blocks); 2312 2313 *upper = node; 2314 return ret; 2315 } 2316 2317 static int get_tree_block_key(struct reloc_control *rc, 2318 struct tree_block *block) 2319 { 2320 struct extent_buffer *eb; 2321 2322 BUG_ON(block->key_ready); 2323 eb = read_tree_block(rc->extent_root, block->bytenr, 2324 block->key.objectid, block->key.offset); 2325 WARN_ON(btrfs_header_level(eb) != block->level); 2326 if (block->level == 0) 2327 btrfs_item_key_to_cpu(eb, &block->key, 0); 2328 else 2329 btrfs_node_key_to_cpu(eb, &block->key, 0); 2330 free_extent_buffer(eb); 2331 block->key_ready = 1; 2332 return 0; 2333 } 2334 2335 static int reada_tree_block(struct reloc_control *rc, 2336 struct tree_block *block) 2337 { 2338 BUG_ON(block->key_ready); 2339 readahead_tree_block(rc->extent_root, block->bytenr, 2340 block->key.objectid, block->key.offset); 2341 return 0; 2342 } 2343 2344 /* 2345 * helper function to relocate a tree block 2346 */ 2347 static int relocate_tree_block(struct btrfs_trans_handle *trans, 2348 struct reloc_control *rc, 2349 struct backref_node *node, 2350 struct btrfs_key *key, 2351 struct btrfs_path *path) 2352 { 2353 struct btrfs_root *root; 2354 int ret; 2355 2356 root = select_one_root(trans, node); 2357 if (unlikely(!root)) { 2358 rc->found_old_snapshot = 1; 2359 update_processed_blocks(rc, node); 2360 return 0; 2361 } 2362 2363 if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { 2364 ret = do_relocation(trans, node, key, path, 1); 2365 if (ret < 0) 2366 goto out; 2367 if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { 2368 ret = replace_file_extents(trans, rc, root, 2369 node->eb, NULL); 2370 if (ret < 0) 2371 goto out; 2372 } 2373 drop_node_buffer(node); 2374 } else if (!root->ref_cows) { 2375 path->lowest_level = node->level; 2376 ret = btrfs_search_slot(trans, root, key, path, 0, 1); 2377 btrfs_release_path(root, path); 2378 if (ret < 0) 2379 goto out; 2380 } else if (root != node->root) { 2381 WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); 2382 } 2383 2384 update_processed_blocks(rc, node); 2385 ret = 0; 2386 out: 2387 drop_node_buffer(node); 2388 return ret; 2389 } 2390 2391 /* 2392 * relocate a list of blocks 2393 */ 2394 static noinline_for_stack 2395 int relocate_tree_blocks(struct btrfs_trans_handle *trans, 2396 struct reloc_control *rc, struct rb_root *blocks) 2397 { 2398 struct backref_cache *cache; 2399 struct backref_node *node; 2400 struct btrfs_path *path; 2401 struct tree_block *block; 2402 struct rb_node *rb_node; 2403 int level = -1; 2404 int ret; 2405 int err = 0; 2406 2407 path = btrfs_alloc_path(); 2408 if (!path) 2409 return -ENOMEM; 2410 2411 cache = kmalloc(sizeof(*cache), GFP_NOFS); 2412 if (!cache) { 2413 btrfs_free_path(path); 2414 return -ENOMEM; 2415 } 2416 2417 backref_cache_init(cache); 2418 2419 rb_node = rb_first(blocks); 2420 while (rb_node) { 2421 block = rb_entry(rb_node, struct tree_block, rb_node); 2422 if (level == -1) 2423 level = block->level; 2424 else 2425 BUG_ON(level != block->level); 2426 if (!block->key_ready) 2427 reada_tree_block(rc, block); 2428 rb_node = rb_next(rb_node); 2429 } 2430 2431 rb_node = rb_first(blocks); 2432 while (rb_node) { 2433 block = rb_entry(rb_node, struct tree_block, rb_node); 2434 if (!block->key_ready) 2435 get_tree_block_key(rc, block); 2436 rb_node = rb_next(rb_node); 2437 } 2438 2439 rb_node = rb_first(blocks); 2440 while (rb_node) { 2441 block = rb_entry(rb_node, struct tree_block, rb_node); 2442 2443 node = build_backref_tree(rc, cache, &block->key, 2444 block->level, block->bytenr); 2445 if (IS_ERR(node)) { 2446 err = PTR_ERR(node); 2447 goto out; 2448 } 2449 2450 ret = relocate_tree_block(trans, rc, node, &block->key, 2451 path); 2452 if (ret < 0) { 2453 err = ret; 2454 goto out; 2455 } 2456 remove_backref_node(cache, node); 2457 rb_node = rb_next(rb_node); 2458 } 2459 2460 if (level > 0) 2461 goto out; 2462 2463 free_block_list(blocks); 2464 2465 /* 2466 * now backrefs of some upper level tree blocks have been cached, 2467 * try relocating blocks referenced by these upper level blocks. 2468 */ 2469 while (1) { 2470 struct backref_node *upper = NULL; 2471 if (trans->transaction->in_commit || 2472 trans->transaction->delayed_refs.flushing) 2473 break; 2474 2475 ret = add_adjacent_blocks(trans, rc, cache, blocks, level, 2476 &upper); 2477 if (ret < 0) 2478 err = ret; 2479 if (ret != 0) 2480 break; 2481 2482 rb_node = rb_first(blocks); 2483 while (rb_node) { 2484 block = rb_entry(rb_node, struct tree_block, rb_node); 2485 if (trans->transaction->in_commit || 2486 trans->transaction->delayed_refs.flushing) 2487 goto out; 2488 BUG_ON(!block->key_ready); 2489 node = build_backref_tree(rc, cache, &block->key, 2490 level, block->bytenr); 2491 if (IS_ERR(node)) { 2492 err = PTR_ERR(node); 2493 goto out; 2494 } 2495 2496 ret = relocate_tree_block(trans, rc, node, 2497 &block->key, path); 2498 if (ret < 0) { 2499 err = ret; 2500 goto out; 2501 } 2502 remove_backref_node(cache, node); 2503 rb_node = rb_next(rb_node); 2504 } 2505 free_block_list(blocks); 2506 2507 if (upper) { 2508 ret = link_to_upper(trans, upper, path); 2509 if (ret < 0) { 2510 err = ret; 2511 break; 2512 } 2513 remove_backref_node(cache, upper); 2514 } 2515 } 2516 out: 2517 free_block_list(blocks); 2518 2519 ret = finish_pending_nodes(trans, cache, path); 2520 if (ret < 0) 2521 err = ret; 2522 2523 kfree(cache); 2524 btrfs_free_path(path); 2525 return err; 2526 } 2527 2528 static noinline_for_stack 2529 int relocate_inode_pages(struct inode *inode, u64 start, u64 len) 2530 { 2531 u64 page_start; 2532 u64 page_end; 2533 unsigned long i; 2534 unsigned long first_index; 2535 unsigned long last_index; 2536 unsigned int total_read = 0; 2537 unsigned int total_dirty = 0; 2538 struct page *page; 2539 struct file_ra_state *ra; 2540 struct btrfs_ordered_extent *ordered; 2541 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 2542 int ret = 0; 2543 2544 ra = kzalloc(sizeof(*ra), GFP_NOFS); 2545 if (!ra) 2546 return -ENOMEM; 2547 2548 mutex_lock(&inode->i_mutex); 2549 first_index = start >> PAGE_CACHE_SHIFT; 2550 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 2551 2552 /* make sure the dirty trick played by the caller work */ 2553 ret = invalidate_inode_pages2_range(inode->i_mapping, 2554 first_index, last_index); 2555 if (ret) 2556 goto out_unlock; 2557 2558 file_ra_state_init(ra, inode->i_mapping); 2559 2560 for (i = first_index ; i <= last_index; i++) { 2561 if (total_read % ra->ra_pages == 0) { 2562 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 2563 min(last_index, ra->ra_pages + i - 1)); 2564 } 2565 total_read++; 2566 again: 2567 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 2568 BUG_ON(1); 2569 page = grab_cache_page(inode->i_mapping, i); 2570 if (!page) { 2571 ret = -ENOMEM; 2572 goto out_unlock; 2573 } 2574 if (!PageUptodate(page)) { 2575 btrfs_readpage(NULL, page); 2576 lock_page(page); 2577 if (!PageUptodate(page)) { 2578 unlock_page(page); 2579 page_cache_release(page); 2580 ret = -EIO; 2581 goto out_unlock; 2582 } 2583 } 2584 wait_on_page_writeback(page); 2585 2586 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 2587 page_end = page_start + PAGE_CACHE_SIZE - 1; 2588 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 2589 2590 ordered = btrfs_lookup_ordered_extent(inode, page_start); 2591 if (ordered) { 2592 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2593 unlock_page(page); 2594 page_cache_release(page); 2595 btrfs_start_ordered_extent(inode, ordered, 1); 2596 btrfs_put_ordered_extent(ordered); 2597 goto again; 2598 } 2599 set_page_extent_mapped(page); 2600 2601 if (i == first_index) 2602 set_extent_bits(io_tree, page_start, page_end, 2603 EXTENT_BOUNDARY, GFP_NOFS); 2604 btrfs_set_extent_delalloc(inode, page_start, page_end); 2605 2606 set_page_dirty(page); 2607 total_dirty++; 2608 2609 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 2610 unlock_page(page); 2611 page_cache_release(page); 2612 } 2613 out_unlock: 2614 mutex_unlock(&inode->i_mutex); 2615 kfree(ra); 2616 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 2617 return ret; 2618 } 2619 2620 static noinline_for_stack 2621 int relocate_data_extent(struct inode *inode, struct btrfs_key *extent_key) 2622 { 2623 struct btrfs_root *root = BTRFS_I(inode)->root; 2624 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; 2625 struct extent_map *em; 2626 u64 start = extent_key->objectid - BTRFS_I(inode)->index_cnt; 2627 u64 end = start + extent_key->offset - 1; 2628 2629 em = alloc_extent_map(GFP_NOFS); 2630 em->start = start; 2631 em->len = extent_key->offset; 2632 em->block_len = extent_key->offset; 2633 em->block_start = extent_key->objectid; 2634 em->bdev = root->fs_info->fs_devices->latest_bdev; 2635 set_bit(EXTENT_FLAG_PINNED, &em->flags); 2636 2637 /* setup extent map to cheat btrfs_readpage */ 2638 lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2639 while (1) { 2640 int ret; 2641 spin_lock(&em_tree->lock); 2642 ret = add_extent_mapping(em_tree, em); 2643 spin_unlock(&em_tree->lock); 2644 if (ret != -EEXIST) { 2645 free_extent_map(em); 2646 break; 2647 } 2648 btrfs_drop_extent_cache(inode, start, end, 0); 2649 } 2650 unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); 2651 2652 return relocate_inode_pages(inode, start, extent_key->offset); 2653 } 2654 2655 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2656 static int get_ref_objectid_v0(struct reloc_control *rc, 2657 struct btrfs_path *path, 2658 struct btrfs_key *extent_key, 2659 u64 *ref_objectid, int *path_change) 2660 { 2661 struct btrfs_key key; 2662 struct extent_buffer *leaf; 2663 struct btrfs_extent_ref_v0 *ref0; 2664 int ret; 2665 int slot; 2666 2667 leaf = path->nodes[0]; 2668 slot = path->slots[0]; 2669 while (1) { 2670 if (slot >= btrfs_header_nritems(leaf)) { 2671 ret = btrfs_next_leaf(rc->extent_root, path); 2672 if (ret < 0) 2673 return ret; 2674 BUG_ON(ret > 0); 2675 leaf = path->nodes[0]; 2676 slot = path->slots[0]; 2677 if (path_change) 2678 *path_change = 1; 2679 } 2680 btrfs_item_key_to_cpu(leaf, &key, slot); 2681 if (key.objectid != extent_key->objectid) 2682 return -ENOENT; 2683 2684 if (key.type != BTRFS_EXTENT_REF_V0_KEY) { 2685 slot++; 2686 continue; 2687 } 2688 ref0 = btrfs_item_ptr(leaf, slot, 2689 struct btrfs_extent_ref_v0); 2690 *ref_objectid = btrfs_ref_objectid_v0(leaf, ref0); 2691 break; 2692 } 2693 return 0; 2694 } 2695 #endif 2696 2697 /* 2698 * helper to add a tree block to the list. 2699 * the major work is getting the generation and level of the block 2700 */ 2701 static int add_tree_block(struct reloc_control *rc, 2702 struct btrfs_key *extent_key, 2703 struct btrfs_path *path, 2704 struct rb_root *blocks) 2705 { 2706 struct extent_buffer *eb; 2707 struct btrfs_extent_item *ei; 2708 struct btrfs_tree_block_info *bi; 2709 struct tree_block *block; 2710 struct rb_node *rb_node; 2711 u32 item_size; 2712 int level = -1; 2713 int generation; 2714 2715 eb = path->nodes[0]; 2716 item_size = btrfs_item_size_nr(eb, path->slots[0]); 2717 2718 if (item_size >= sizeof(*ei) + sizeof(*bi)) { 2719 ei = btrfs_item_ptr(eb, path->slots[0], 2720 struct btrfs_extent_item); 2721 bi = (struct btrfs_tree_block_info *)(ei + 1); 2722 generation = btrfs_extent_generation(eb, ei); 2723 level = btrfs_tree_block_level(eb, bi); 2724 } else { 2725 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2726 u64 ref_owner; 2727 int ret; 2728 2729 BUG_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2730 ret = get_ref_objectid_v0(rc, path, extent_key, 2731 &ref_owner, NULL); 2732 BUG_ON(ref_owner >= BTRFS_MAX_LEVEL); 2733 level = (int)ref_owner; 2734 /* FIXME: get real generation */ 2735 generation = 0; 2736 #else 2737 BUG(); 2738 #endif 2739 } 2740 2741 btrfs_release_path(rc->extent_root, path); 2742 2743 BUG_ON(level == -1); 2744 2745 block = kmalloc(sizeof(*block), GFP_NOFS); 2746 if (!block) 2747 return -ENOMEM; 2748 2749 block->bytenr = extent_key->objectid; 2750 block->key.objectid = extent_key->offset; 2751 block->key.offset = generation; 2752 block->level = level; 2753 block->key_ready = 0; 2754 2755 rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); 2756 BUG_ON(rb_node); 2757 2758 return 0; 2759 } 2760 2761 /* 2762 * helper to add tree blocks for backref of type BTRFS_SHARED_DATA_REF_KEY 2763 */ 2764 static int __add_tree_block(struct reloc_control *rc, 2765 u64 bytenr, u32 blocksize, 2766 struct rb_root *blocks) 2767 { 2768 struct btrfs_path *path; 2769 struct btrfs_key key; 2770 int ret; 2771 2772 if (tree_block_processed(bytenr, blocksize, rc)) 2773 return 0; 2774 2775 if (tree_search(blocks, bytenr)) 2776 return 0; 2777 2778 path = btrfs_alloc_path(); 2779 if (!path) 2780 return -ENOMEM; 2781 2782 key.objectid = bytenr; 2783 key.type = BTRFS_EXTENT_ITEM_KEY; 2784 key.offset = blocksize; 2785 2786 path->search_commit_root = 1; 2787 path->skip_locking = 1; 2788 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 0, 0); 2789 if (ret < 0) 2790 goto out; 2791 BUG_ON(ret); 2792 2793 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 2794 ret = add_tree_block(rc, &key, path, blocks); 2795 out: 2796 btrfs_free_path(path); 2797 return ret; 2798 } 2799 2800 /* 2801 * helper to check if the block use full backrefs for pointers in it 2802 */ 2803 static int block_use_full_backref(struct reloc_control *rc, 2804 struct extent_buffer *eb) 2805 { 2806 struct btrfs_path *path; 2807 struct btrfs_extent_item *ei; 2808 struct btrfs_key key; 2809 u64 flags; 2810 int ret; 2811 2812 if (btrfs_header_flag(eb, BTRFS_HEADER_FLAG_RELOC) || 2813 btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) 2814 return 1; 2815 2816 path = btrfs_alloc_path(); 2817 BUG_ON(!path); 2818 2819 key.objectid = eb->start; 2820 key.type = BTRFS_EXTENT_ITEM_KEY; 2821 key.offset = eb->len; 2822 2823 path->search_commit_root = 1; 2824 path->skip_locking = 1; 2825 ret = btrfs_search_slot(NULL, rc->extent_root, 2826 &key, path, 0, 0); 2827 BUG_ON(ret); 2828 2829 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 2830 struct btrfs_extent_item); 2831 flags = btrfs_extent_flags(path->nodes[0], ei); 2832 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 2833 if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) 2834 ret = 1; 2835 else 2836 ret = 0; 2837 btrfs_free_path(path); 2838 return ret; 2839 } 2840 2841 /* 2842 * helper to add tree blocks for backref of type BTRFS_EXTENT_DATA_REF_KEY 2843 * this function scans fs tree to find blocks reference the data extent 2844 */ 2845 static int find_data_references(struct reloc_control *rc, 2846 struct btrfs_key *extent_key, 2847 struct extent_buffer *leaf, 2848 struct btrfs_extent_data_ref *ref, 2849 struct rb_root *blocks) 2850 { 2851 struct btrfs_path *path; 2852 struct tree_block *block; 2853 struct btrfs_root *root; 2854 struct btrfs_file_extent_item *fi; 2855 struct rb_node *rb_node; 2856 struct btrfs_key key; 2857 u64 ref_root; 2858 u64 ref_objectid; 2859 u64 ref_offset; 2860 u32 ref_count; 2861 u32 nritems; 2862 int err = 0; 2863 int added = 0; 2864 int counted; 2865 int ret; 2866 2867 path = btrfs_alloc_path(); 2868 if (!path) 2869 return -ENOMEM; 2870 2871 ref_root = btrfs_extent_data_ref_root(leaf, ref); 2872 ref_objectid = btrfs_extent_data_ref_objectid(leaf, ref); 2873 ref_offset = btrfs_extent_data_ref_offset(leaf, ref); 2874 ref_count = btrfs_extent_data_ref_count(leaf, ref); 2875 2876 root = read_fs_root(rc->extent_root->fs_info, ref_root); 2877 if (IS_ERR(root)) { 2878 err = PTR_ERR(root); 2879 goto out; 2880 } 2881 2882 key.objectid = ref_objectid; 2883 key.offset = ref_offset; 2884 key.type = BTRFS_EXTENT_DATA_KEY; 2885 2886 path->search_commit_root = 1; 2887 path->skip_locking = 1; 2888 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 2889 if (ret < 0) { 2890 err = ret; 2891 goto out; 2892 } 2893 2894 leaf = path->nodes[0]; 2895 nritems = btrfs_header_nritems(leaf); 2896 /* 2897 * the references in tree blocks that use full backrefs 2898 * are not counted in 2899 */ 2900 if (block_use_full_backref(rc, leaf)) 2901 counted = 0; 2902 else 2903 counted = 1; 2904 rb_node = tree_search(blocks, leaf->start); 2905 if (rb_node) { 2906 if (counted) 2907 added = 1; 2908 else 2909 path->slots[0] = nritems; 2910 } 2911 2912 while (ref_count > 0) { 2913 while (path->slots[0] >= nritems) { 2914 ret = btrfs_next_leaf(root, path); 2915 if (ret < 0) { 2916 err = ret; 2917 goto out; 2918 } 2919 if (ret > 0) { 2920 WARN_ON(1); 2921 goto out; 2922 } 2923 2924 leaf = path->nodes[0]; 2925 nritems = btrfs_header_nritems(leaf); 2926 added = 0; 2927 2928 if (block_use_full_backref(rc, leaf)) 2929 counted = 0; 2930 else 2931 counted = 1; 2932 rb_node = tree_search(blocks, leaf->start); 2933 if (rb_node) { 2934 if (counted) 2935 added = 1; 2936 else 2937 path->slots[0] = nritems; 2938 } 2939 } 2940 2941 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2942 if (key.objectid != ref_objectid || 2943 key.type != BTRFS_EXTENT_DATA_KEY) { 2944 WARN_ON(1); 2945 break; 2946 } 2947 2948 fi = btrfs_item_ptr(leaf, path->slots[0], 2949 struct btrfs_file_extent_item); 2950 2951 if (btrfs_file_extent_type(leaf, fi) == 2952 BTRFS_FILE_EXTENT_INLINE) 2953 goto next; 2954 2955 if (btrfs_file_extent_disk_bytenr(leaf, fi) != 2956 extent_key->objectid) 2957 goto next; 2958 2959 key.offset -= btrfs_file_extent_offset(leaf, fi); 2960 if (key.offset != ref_offset) 2961 goto next; 2962 2963 if (counted) 2964 ref_count--; 2965 if (added) 2966 goto next; 2967 2968 if (!tree_block_processed(leaf->start, leaf->len, rc)) { 2969 block = kmalloc(sizeof(*block), GFP_NOFS); 2970 if (!block) { 2971 err = -ENOMEM; 2972 break; 2973 } 2974 block->bytenr = leaf->start; 2975 btrfs_item_key_to_cpu(leaf, &block->key, 0); 2976 block->level = 0; 2977 block->key_ready = 1; 2978 rb_node = tree_insert(blocks, block->bytenr, 2979 &block->rb_node); 2980 BUG_ON(rb_node); 2981 } 2982 if (counted) 2983 added = 1; 2984 else 2985 path->slots[0] = nritems; 2986 next: 2987 path->slots[0]++; 2988 2989 } 2990 out: 2991 btrfs_free_path(path); 2992 return err; 2993 } 2994 2995 /* 2996 * hepler to find all tree blocks that reference a given data extent 2997 */ 2998 static noinline_for_stack 2999 int add_data_references(struct reloc_control *rc, 3000 struct btrfs_key *extent_key, 3001 struct btrfs_path *path, 3002 struct rb_root *blocks) 3003 { 3004 struct btrfs_key key; 3005 struct extent_buffer *eb; 3006 struct btrfs_extent_data_ref *dref; 3007 struct btrfs_extent_inline_ref *iref; 3008 unsigned long ptr; 3009 unsigned long end; 3010 u32 blocksize; 3011 int ret; 3012 int err = 0; 3013 3014 ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, 3015 extent_key->offset); 3016 BUG_ON(ret < 0); 3017 if (ret > 0) { 3018 /* the relocated data is fragmented */ 3019 rc->extents_skipped++; 3020 btrfs_release_path(rc->extent_root, path); 3021 return 0; 3022 } 3023 3024 blocksize = btrfs_level_size(rc->extent_root, 0); 3025 3026 eb = path->nodes[0]; 3027 ptr = btrfs_item_ptr_offset(eb, path->slots[0]); 3028 end = ptr + btrfs_item_size_nr(eb, path->slots[0]); 3029 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3030 if (ptr + sizeof(struct btrfs_extent_item_v0) == end) 3031 ptr = end; 3032 else 3033 #endif 3034 ptr += sizeof(struct btrfs_extent_item); 3035 3036 while (ptr < end) { 3037 iref = (struct btrfs_extent_inline_ref *)ptr; 3038 key.type = btrfs_extent_inline_ref_type(eb, iref); 3039 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3040 key.offset = btrfs_extent_inline_ref_offset(eb, iref); 3041 ret = __add_tree_block(rc, key.offset, blocksize, 3042 blocks); 3043 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3044 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 3045 ret = find_data_references(rc, extent_key, 3046 eb, dref, blocks); 3047 } else { 3048 BUG(); 3049 } 3050 ptr += btrfs_extent_inline_ref_size(key.type); 3051 } 3052 WARN_ON(ptr > end); 3053 3054 while (1) { 3055 cond_resched(); 3056 eb = path->nodes[0]; 3057 if (path->slots[0] >= btrfs_header_nritems(eb)) { 3058 ret = btrfs_next_leaf(rc->extent_root, path); 3059 if (ret < 0) { 3060 err = ret; 3061 break; 3062 } 3063 if (ret > 0) 3064 break; 3065 eb = path->nodes[0]; 3066 } 3067 3068 btrfs_item_key_to_cpu(eb, &key, path->slots[0]); 3069 if (key.objectid != extent_key->objectid) 3070 break; 3071 3072 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3073 if (key.type == BTRFS_SHARED_DATA_REF_KEY || 3074 key.type == BTRFS_EXTENT_REF_V0_KEY) { 3075 #else 3076 BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); 3077 if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 3078 #endif 3079 ret = __add_tree_block(rc, key.offset, blocksize, 3080 blocks); 3081 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 3082 dref = btrfs_item_ptr(eb, path->slots[0], 3083 struct btrfs_extent_data_ref); 3084 ret = find_data_references(rc, extent_key, 3085 eb, dref, blocks); 3086 } else { 3087 ret = 0; 3088 } 3089 if (ret) { 3090 err = ret; 3091 break; 3092 } 3093 path->slots[0]++; 3094 } 3095 btrfs_release_path(rc->extent_root, path); 3096 if (err) 3097 free_block_list(blocks); 3098 return err; 3099 } 3100 3101 /* 3102 * hepler to find next unprocessed extent 3103 */ 3104 static noinline_for_stack 3105 int find_next_extent(struct btrfs_trans_handle *trans, 3106 struct reloc_control *rc, struct btrfs_path *path) 3107 { 3108 struct btrfs_key key; 3109 struct extent_buffer *leaf; 3110 u64 start, end, last; 3111 int ret; 3112 3113 last = rc->block_group->key.objectid + rc->block_group->key.offset; 3114 while (1) { 3115 cond_resched(); 3116 if (rc->search_start >= last) { 3117 ret = 1; 3118 break; 3119 } 3120 3121 key.objectid = rc->search_start; 3122 key.type = BTRFS_EXTENT_ITEM_KEY; 3123 key.offset = 0; 3124 3125 path->search_commit_root = 1; 3126 path->skip_locking = 1; 3127 ret = btrfs_search_slot(NULL, rc->extent_root, &key, path, 3128 0, 0); 3129 if (ret < 0) 3130 break; 3131 next: 3132 leaf = path->nodes[0]; 3133 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 3134 ret = btrfs_next_leaf(rc->extent_root, path); 3135 if (ret != 0) 3136 break; 3137 leaf = path->nodes[0]; 3138 } 3139 3140 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3141 if (key.objectid >= last) { 3142 ret = 1; 3143 break; 3144 } 3145 3146 if (key.type != BTRFS_EXTENT_ITEM_KEY || 3147 key.objectid + key.offset <= rc->search_start) { 3148 path->slots[0]++; 3149 goto next; 3150 } 3151 3152 ret = find_first_extent_bit(&rc->processed_blocks, 3153 key.objectid, &start, &end, 3154 EXTENT_DIRTY); 3155 3156 if (ret == 0 && start <= key.objectid) { 3157 btrfs_release_path(rc->extent_root, path); 3158 rc->search_start = end + 1; 3159 } else { 3160 rc->search_start = key.objectid + key.offset; 3161 return 0; 3162 } 3163 } 3164 btrfs_release_path(rc->extent_root, path); 3165 return ret; 3166 } 3167 3168 static void set_reloc_control(struct reloc_control *rc) 3169 { 3170 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3171 mutex_lock(&fs_info->trans_mutex); 3172 fs_info->reloc_ctl = rc; 3173 mutex_unlock(&fs_info->trans_mutex); 3174 } 3175 3176 static void unset_reloc_control(struct reloc_control *rc) 3177 { 3178 struct btrfs_fs_info *fs_info = rc->extent_root->fs_info; 3179 mutex_lock(&fs_info->trans_mutex); 3180 fs_info->reloc_ctl = NULL; 3181 mutex_unlock(&fs_info->trans_mutex); 3182 } 3183 3184 static int check_extent_flags(u64 flags) 3185 { 3186 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3187 (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3188 return 1; 3189 if (!(flags & BTRFS_EXTENT_FLAG_DATA) && 3190 !(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) 3191 return 1; 3192 if ((flags & BTRFS_EXTENT_FLAG_DATA) && 3193 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 3194 return 1; 3195 return 0; 3196 } 3197 3198 static noinline_for_stack int relocate_block_group(struct reloc_control *rc) 3199 { 3200 struct rb_root blocks = RB_ROOT; 3201 struct btrfs_key key; 3202 struct btrfs_trans_handle *trans = NULL; 3203 struct btrfs_path *path; 3204 struct btrfs_extent_item *ei; 3205 unsigned long nr; 3206 u64 flags; 3207 u32 item_size; 3208 int ret; 3209 int err = 0; 3210 3211 path = btrfs_alloc_path(); 3212 if (!path) 3213 return -ENOMEM; 3214 3215 rc->search_start = rc->block_group->key.objectid; 3216 clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, 3217 GFP_NOFS); 3218 3219 rc->create_reloc_root = 1; 3220 set_reloc_control(rc); 3221 3222 trans = btrfs_start_transaction(rc->extent_root, 1); 3223 btrfs_commit_transaction(trans, rc->extent_root); 3224 3225 while (1) { 3226 trans = btrfs_start_transaction(rc->extent_root, 1); 3227 3228 ret = find_next_extent(trans, rc, path); 3229 if (ret < 0) 3230 err = ret; 3231 if (ret != 0) 3232 break; 3233 3234 rc->extents_found++; 3235 3236 ei = btrfs_item_ptr(path->nodes[0], path->slots[0], 3237 struct btrfs_extent_item); 3238 btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); 3239 item_size = btrfs_item_size_nr(path->nodes[0], 3240 path->slots[0]); 3241 if (item_size >= sizeof(*ei)) { 3242 flags = btrfs_extent_flags(path->nodes[0], ei); 3243 ret = check_extent_flags(flags); 3244 BUG_ON(ret); 3245 3246 } else { 3247 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 3248 u64 ref_owner; 3249 int path_change = 0; 3250 3251 BUG_ON(item_size != 3252 sizeof(struct btrfs_extent_item_v0)); 3253 ret = get_ref_objectid_v0(rc, path, &key, &ref_owner, 3254 &path_change); 3255 if (ref_owner < BTRFS_FIRST_FREE_OBJECTID) 3256 flags = BTRFS_EXTENT_FLAG_TREE_BLOCK; 3257 else 3258 flags = BTRFS_EXTENT_FLAG_DATA; 3259 3260 if (path_change) { 3261 btrfs_release_path(rc->extent_root, path); 3262 3263 path->search_commit_root = 1; 3264 path->skip_locking = 1; 3265 ret = btrfs_search_slot(NULL, rc->extent_root, 3266 &key, path, 0, 0); 3267 if (ret < 0) { 3268 err = ret; 3269 break; 3270 } 3271 BUG_ON(ret > 0); 3272 } 3273 #else 3274 BUG(); 3275 #endif 3276 } 3277 3278 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 3279 ret = add_tree_block(rc, &key, path, &blocks); 3280 } else if (rc->stage == UPDATE_DATA_PTRS && 3281 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3282 ret = add_data_references(rc, &key, path, &blocks); 3283 } else { 3284 btrfs_release_path(rc->extent_root, path); 3285 ret = 0; 3286 } 3287 if (ret < 0) { 3288 err = 0; 3289 break; 3290 } 3291 3292 if (!RB_EMPTY_ROOT(&blocks)) { 3293 ret = relocate_tree_blocks(trans, rc, &blocks); 3294 if (ret < 0) { 3295 err = ret; 3296 break; 3297 } 3298 } 3299 3300 nr = trans->blocks_used; 3301 btrfs_end_transaction_throttle(trans, rc->extent_root); 3302 trans = NULL; 3303 btrfs_btree_balance_dirty(rc->extent_root, nr); 3304 3305 if (rc->stage == MOVE_DATA_EXTENTS && 3306 (flags & BTRFS_EXTENT_FLAG_DATA)) { 3307 rc->found_file_extent = 1; 3308 ret = relocate_data_extent(rc->data_inode, &key); 3309 if (ret < 0) { 3310 err = ret; 3311 break; 3312 } 3313 } 3314 } 3315 btrfs_free_path(path); 3316 3317 if (trans) { 3318 nr = trans->blocks_used; 3319 btrfs_end_transaction(trans, rc->extent_root); 3320 btrfs_btree_balance_dirty(rc->extent_root, nr); 3321 } 3322 3323 rc->create_reloc_root = 0; 3324 smp_mb(); 3325 3326 if (rc->extents_found > 0) { 3327 trans = btrfs_start_transaction(rc->extent_root, 1); 3328 btrfs_commit_transaction(trans, rc->extent_root); 3329 } 3330 3331 merge_reloc_roots(rc); 3332 3333 unset_reloc_control(rc); 3334 3335 /* get rid of pinned extents */ 3336 trans = btrfs_start_transaction(rc->extent_root, 1); 3337 btrfs_commit_transaction(trans, rc->extent_root); 3338 3339 return err; 3340 } 3341 3342 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 3343 struct btrfs_root *root, 3344 u64 objectid, u64 size) 3345 { 3346 struct btrfs_path *path; 3347 struct btrfs_inode_item *item; 3348 struct extent_buffer *leaf; 3349 int ret; 3350 3351 path = btrfs_alloc_path(); 3352 if (!path) 3353 return -ENOMEM; 3354 3355 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 3356 if (ret) 3357 goto out; 3358 3359 leaf = path->nodes[0]; 3360 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 3361 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 3362 btrfs_set_inode_generation(leaf, item, 1); 3363 btrfs_set_inode_size(leaf, item, size); 3364 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 3365 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 3366 btrfs_mark_buffer_dirty(leaf); 3367 btrfs_release_path(root, path); 3368 out: 3369 btrfs_free_path(path); 3370 return ret; 3371 } 3372 3373 /* 3374 * helper to create inode for data relocation. 3375 * the inode is in data relocation tree and its link count is 0 3376 */ 3377 static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 3378 struct btrfs_block_group_cache *group) 3379 { 3380 struct inode *inode = NULL; 3381 struct btrfs_trans_handle *trans; 3382 struct btrfs_root *root; 3383 struct btrfs_key key; 3384 unsigned long nr; 3385 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 3386 int err = 0; 3387 3388 root = read_fs_root(fs_info, BTRFS_DATA_RELOC_TREE_OBJECTID); 3389 if (IS_ERR(root)) 3390 return ERR_CAST(root); 3391 3392 trans = btrfs_start_transaction(root, 1); 3393 BUG_ON(!trans); 3394 3395 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 3396 if (err) 3397 goto out; 3398 3399 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 3400 BUG_ON(err); 3401 3402 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, 3403 group->key.offset, 0, group->key.offset, 3404 0, 0, 0); 3405 BUG_ON(err); 3406 3407 key.objectid = objectid; 3408 key.type = BTRFS_INODE_ITEM_KEY; 3409 key.offset = 0; 3410 inode = btrfs_iget(root->fs_info->sb, &key, root); 3411 BUG_ON(IS_ERR(inode) || is_bad_inode(inode)); 3412 BTRFS_I(inode)->index_cnt = group->key.objectid; 3413 3414 err = btrfs_orphan_add(trans, inode); 3415 out: 3416 nr = trans->blocks_used; 3417 btrfs_end_transaction(trans, root); 3418 3419 btrfs_btree_balance_dirty(root, nr); 3420 if (err) { 3421 if (inode) 3422 iput(inode); 3423 inode = ERR_PTR(err); 3424 } 3425 return inode; 3426 } 3427 3428 /* 3429 * function to relocate all extents in a block group. 3430 */ 3431 int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) 3432 { 3433 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3434 struct reloc_control *rc; 3435 int ret; 3436 int err = 0; 3437 3438 rc = kzalloc(sizeof(*rc), GFP_NOFS); 3439 if (!rc) 3440 return -ENOMEM; 3441 3442 mapping_tree_init(&rc->reloc_root_tree); 3443 extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); 3444 INIT_LIST_HEAD(&rc->reloc_roots); 3445 3446 rc->block_group = btrfs_lookup_block_group(fs_info, group_start); 3447 BUG_ON(!rc->block_group); 3448 3449 btrfs_init_workers(&rc->workers, "relocate", 3450 fs_info->thread_pool_size); 3451 3452 rc->extent_root = extent_root; 3453 btrfs_prepare_block_group_relocation(extent_root, rc->block_group); 3454 3455 rc->data_inode = create_reloc_inode(fs_info, rc->block_group); 3456 if (IS_ERR(rc->data_inode)) { 3457 err = PTR_ERR(rc->data_inode); 3458 rc->data_inode = NULL; 3459 goto out; 3460 } 3461 3462 printk(KERN_INFO "btrfs: relocating block group %llu flags %llu\n", 3463 (unsigned long long)rc->block_group->key.objectid, 3464 (unsigned long long)rc->block_group->flags); 3465 3466 btrfs_start_delalloc_inodes(fs_info->tree_root); 3467 btrfs_wait_ordered_extents(fs_info->tree_root, 0); 3468 3469 while (1) { 3470 mutex_lock(&fs_info->cleaner_mutex); 3471 btrfs_clean_old_snapshots(fs_info->tree_root); 3472 mutex_unlock(&fs_info->cleaner_mutex); 3473 3474 rc->extents_found = 0; 3475 rc->extents_skipped = 0; 3476 3477 ret = relocate_block_group(rc); 3478 if (ret < 0) { 3479 err = ret; 3480 break; 3481 } 3482 3483 if (rc->extents_found == 0) 3484 break; 3485 3486 printk(KERN_INFO "btrfs: found %llu extents\n", 3487 (unsigned long long)rc->extents_found); 3488 3489 if (rc->stage == MOVE_DATA_EXTENTS && rc->found_file_extent) { 3490 btrfs_wait_ordered_range(rc->data_inode, 0, (u64)-1); 3491 invalidate_mapping_pages(rc->data_inode->i_mapping, 3492 0, -1); 3493 rc->stage = UPDATE_DATA_PTRS; 3494 } else if (rc->stage == UPDATE_DATA_PTRS && 3495 rc->extents_skipped >= rc->extents_found) { 3496 iput(rc->data_inode); 3497 rc->data_inode = create_reloc_inode(fs_info, 3498 rc->block_group); 3499 if (IS_ERR(rc->data_inode)) { 3500 err = PTR_ERR(rc->data_inode); 3501 rc->data_inode = NULL; 3502 break; 3503 } 3504 rc->stage = MOVE_DATA_EXTENTS; 3505 rc->found_file_extent = 0; 3506 } 3507 } 3508 3509 filemap_fdatawrite_range(fs_info->btree_inode->i_mapping, 3510 rc->block_group->key.objectid, 3511 rc->block_group->key.objectid + 3512 rc->block_group->key.offset - 1); 3513 3514 WARN_ON(rc->block_group->pinned > 0); 3515 WARN_ON(rc->block_group->reserved > 0); 3516 WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); 3517 out: 3518 iput(rc->data_inode); 3519 btrfs_stop_workers(&rc->workers); 3520 btrfs_put_block_group(rc->block_group); 3521 kfree(rc); 3522 return err; 3523 } 3524 3525 /* 3526 * recover relocation interrupted by system crash. 3527 * 3528 * this function resumes merging reloc trees with corresponding fs trees. 3529 * this is important for keeping the sharing of tree blocks 3530 */ 3531 int btrfs_recover_relocation(struct btrfs_root *root) 3532 { 3533 LIST_HEAD(reloc_roots); 3534 struct btrfs_key key; 3535 struct btrfs_root *fs_root; 3536 struct btrfs_root *reloc_root; 3537 struct btrfs_path *path; 3538 struct extent_buffer *leaf; 3539 struct reloc_control *rc = NULL; 3540 struct btrfs_trans_handle *trans; 3541 int ret; 3542 int err = 0; 3543 3544 path = btrfs_alloc_path(); 3545 if (!path) 3546 return -ENOMEM; 3547 3548 key.objectid = BTRFS_TREE_RELOC_OBJECTID; 3549 key.type = BTRFS_ROOT_ITEM_KEY; 3550 key.offset = (u64)-1; 3551 3552 while (1) { 3553 ret = btrfs_search_slot(NULL, root->fs_info->tree_root, &key, 3554 path, 0, 0); 3555 if (ret < 0) { 3556 err = ret; 3557 goto out; 3558 } 3559 if (ret > 0) { 3560 if (path->slots[0] == 0) 3561 break; 3562 path->slots[0]--; 3563 } 3564 leaf = path->nodes[0]; 3565 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 3566 btrfs_release_path(root->fs_info->tree_root, path); 3567 3568 if (key.objectid != BTRFS_TREE_RELOC_OBJECTID || 3569 key.type != BTRFS_ROOT_ITEM_KEY) 3570 break; 3571 3572 reloc_root = btrfs_read_fs_root_no_radix(root, &key); 3573 if (IS_ERR(reloc_root)) { 3574 err = PTR_ERR(reloc_root); 3575 goto out; 3576 } 3577 3578 list_add(&reloc_root->root_list, &reloc_roots); 3579 3580 if (btrfs_root_refs(&reloc_root->root_item) > 0) { 3581 fs_root = read_fs_root(root->fs_info, 3582 reloc_root->root_key.offset); 3583 if (IS_ERR(fs_root)) { 3584 err = PTR_ERR(fs_root); 3585 goto out; 3586 } 3587 } 3588 3589 if (key.offset == 0) 3590 break; 3591 3592 key.offset--; 3593 } 3594 btrfs_release_path(root->fs_info->tree_root, path); 3595 3596 if (list_empty(&reloc_roots)) 3597 goto out; 3598 3599 rc = kzalloc(sizeof(*rc), GFP_NOFS); 3600 if (!rc) { 3601 err = -ENOMEM; 3602 goto out; 3603 } 3604 3605 mapping_tree_init(&rc->reloc_root_tree); 3606 INIT_LIST_HEAD(&rc->reloc_roots); 3607 btrfs_init_workers(&rc->workers, "relocate", 3608 root->fs_info->thread_pool_size); 3609 rc->extent_root = root->fs_info->extent_root; 3610 3611 set_reloc_control(rc); 3612 3613 while (!list_empty(&reloc_roots)) { 3614 reloc_root = list_entry(reloc_roots.next, 3615 struct btrfs_root, root_list); 3616 list_del(&reloc_root->root_list); 3617 3618 if (btrfs_root_refs(&reloc_root->root_item) == 0) { 3619 list_add_tail(&reloc_root->root_list, 3620 &rc->reloc_roots); 3621 continue; 3622 } 3623 3624 fs_root = read_fs_root(root->fs_info, 3625 reloc_root->root_key.offset); 3626 BUG_ON(IS_ERR(fs_root)); 3627 3628 __add_reloc_root(reloc_root); 3629 fs_root->reloc_root = reloc_root; 3630 } 3631 3632 trans = btrfs_start_transaction(rc->extent_root, 1); 3633 btrfs_commit_transaction(trans, rc->extent_root); 3634 3635 merge_reloc_roots(rc); 3636 3637 unset_reloc_control(rc); 3638 3639 trans = btrfs_start_transaction(rc->extent_root, 1); 3640 btrfs_commit_transaction(trans, rc->extent_root); 3641 out: 3642 if (rc) { 3643 btrfs_stop_workers(&rc->workers); 3644 kfree(rc); 3645 } 3646 while (!list_empty(&reloc_roots)) { 3647 reloc_root = list_entry(reloc_roots.next, 3648 struct btrfs_root, root_list); 3649 list_del(&reloc_root->root_list); 3650 free_extent_buffer(reloc_root->node); 3651 free_extent_buffer(reloc_root->commit_root); 3652 kfree(reloc_root); 3653 } 3654 btrfs_free_path(path); 3655 3656 if (err == 0) { 3657 /* cleanup orphan inode in data relocation tree */ 3658 fs_root = read_fs_root(root->fs_info, 3659 BTRFS_DATA_RELOC_TREE_OBJECTID); 3660 if (IS_ERR(fs_root)) 3661 err = PTR_ERR(fs_root); 3662 } 3663 return err; 3664 } 3665 3666 /* 3667 * helper to add ordered checksum for data relocation. 3668 * 3669 * cloning checksum properly handles the nodatasum extents. 3670 * it also saves CPU time to re-calculate the checksum. 3671 */ 3672 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 3673 { 3674 struct btrfs_ordered_sum *sums; 3675 struct btrfs_sector_sum *sector_sum; 3676 struct btrfs_ordered_extent *ordered; 3677 struct btrfs_root *root = BTRFS_I(inode)->root; 3678 size_t offset; 3679 int ret; 3680 u64 disk_bytenr; 3681 LIST_HEAD(list); 3682 3683 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 3684 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 3685 3686 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 3687 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 3688 disk_bytenr + len - 1, &list); 3689 3690 while (!list_empty(&list)) { 3691 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 3692 list_del_init(&sums->list); 3693 3694 sector_sum = sums->sums; 3695 sums->bytenr = ordered->start; 3696 3697 offset = 0; 3698 while (offset < sums->len) { 3699 sector_sum->bytenr += ordered->start - disk_bytenr; 3700 sector_sum++; 3701 offset += root->sectorsize; 3702 } 3703 3704 btrfs_add_ordered_sum(inode, ordered, sums); 3705 } 3706 btrfs_put_ordered_extent(ordered); 3707 return 0; 3708 } 3709