1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include "compat.h" 24 #include "hash.h" 25 #include "crc32c.h" 26 #include "ctree.h" 27 #include "disk-io.h" 28 #include "print-tree.h" 29 #include "transaction.h" 30 #include "volumes.h" 31 #include "locking.h" 32 #include "ref-cache.h" 33 34 #define PENDING_EXTENT_INSERT 0 35 #define PENDING_EXTENT_DELETE 1 36 #define PENDING_BACKREF_UPDATE 2 37 38 struct pending_extent_op { 39 int type; 40 u64 bytenr; 41 u64 num_bytes; 42 u64 parent; 43 u64 orig_parent; 44 u64 generation; 45 u64 orig_generation; 46 int level; 47 struct list_head list; 48 int del; 49 }; 50 51 static int finish_current_insert(struct btrfs_trans_handle *trans, 52 struct btrfs_root *extent_root, int all); 53 static int del_pending_extents(struct btrfs_trans_handle *trans, 54 struct btrfs_root *extent_root, int all); 55 static int pin_down_bytes(struct btrfs_trans_handle *trans, 56 struct btrfs_root *root, 57 u64 bytenr, u64 num_bytes, int is_data); 58 static int update_block_group(struct btrfs_trans_handle *trans, 59 struct btrfs_root *root, 60 u64 bytenr, u64 num_bytes, int alloc, 61 int mark_free); 62 63 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 64 { 65 return (cache->flags & bits) == bits; 66 } 67 68 /* 69 * this adds the block group to the fs_info rb tree for the block group 70 * cache 71 */ 72 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 73 struct btrfs_block_group_cache *block_group) 74 { 75 struct rb_node **p; 76 struct rb_node *parent = NULL; 77 struct btrfs_block_group_cache *cache; 78 79 spin_lock(&info->block_group_cache_lock); 80 p = &info->block_group_cache_tree.rb_node; 81 82 while (*p) { 83 parent = *p; 84 cache = rb_entry(parent, struct btrfs_block_group_cache, 85 cache_node); 86 if (block_group->key.objectid < cache->key.objectid) { 87 p = &(*p)->rb_left; 88 } else if (block_group->key.objectid > cache->key.objectid) { 89 p = &(*p)->rb_right; 90 } else { 91 spin_unlock(&info->block_group_cache_lock); 92 return -EEXIST; 93 } 94 } 95 96 rb_link_node(&block_group->cache_node, parent, p); 97 rb_insert_color(&block_group->cache_node, 98 &info->block_group_cache_tree); 99 spin_unlock(&info->block_group_cache_lock); 100 101 return 0; 102 } 103 104 /* 105 * This will return the block group at or after bytenr if contains is 0, else 106 * it will return the block group that contains the bytenr 107 */ 108 static struct btrfs_block_group_cache * 109 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 110 int contains) 111 { 112 struct btrfs_block_group_cache *cache, *ret = NULL; 113 struct rb_node *n; 114 u64 end, start; 115 116 spin_lock(&info->block_group_cache_lock); 117 n = info->block_group_cache_tree.rb_node; 118 119 while (n) { 120 cache = rb_entry(n, struct btrfs_block_group_cache, 121 cache_node); 122 end = cache->key.objectid + cache->key.offset - 1; 123 start = cache->key.objectid; 124 125 if (bytenr < start) { 126 if (!contains && (!ret || start < ret->key.objectid)) 127 ret = cache; 128 n = n->rb_left; 129 } else if (bytenr > start) { 130 if (contains && bytenr <= end) { 131 ret = cache; 132 break; 133 } 134 n = n->rb_right; 135 } else { 136 ret = cache; 137 break; 138 } 139 } 140 if (ret) 141 atomic_inc(&ret->count); 142 spin_unlock(&info->block_group_cache_lock); 143 144 return ret; 145 } 146 147 /* 148 * this is only called by cache_block_group, since we could have freed extents 149 * we need to check the pinned_extents for any extents that can't be used yet 150 * since their free space will be released as soon as the transaction commits. 151 */ 152 static int add_new_free_space(struct btrfs_block_group_cache *block_group, 153 struct btrfs_fs_info *info, u64 start, u64 end) 154 { 155 u64 extent_start, extent_end, size; 156 int ret; 157 158 mutex_lock(&info->pinned_mutex); 159 while (start < end) { 160 ret = find_first_extent_bit(&info->pinned_extents, start, 161 &extent_start, &extent_end, 162 EXTENT_DIRTY); 163 if (ret) 164 break; 165 166 if (extent_start == start) { 167 start = extent_end + 1; 168 } else if (extent_start > start && extent_start < end) { 169 size = extent_start - start; 170 ret = btrfs_add_free_space(block_group, start, 171 size); 172 BUG_ON(ret); 173 start = extent_end + 1; 174 } else { 175 break; 176 } 177 } 178 179 if (start < end) { 180 size = end - start; 181 ret = btrfs_add_free_space(block_group, start, size); 182 BUG_ON(ret); 183 } 184 mutex_unlock(&info->pinned_mutex); 185 186 return 0; 187 } 188 189 static int remove_sb_from_cache(struct btrfs_root *root, 190 struct btrfs_block_group_cache *cache) 191 { 192 u64 bytenr; 193 u64 *logical; 194 int stripe_len; 195 int i, nr, ret; 196 197 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 198 bytenr = btrfs_sb_offset(i); 199 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 200 cache->key.objectid, bytenr, 0, 201 &logical, &nr, &stripe_len); 202 BUG_ON(ret); 203 while (nr--) { 204 btrfs_remove_free_space(cache, logical[nr], 205 stripe_len); 206 } 207 kfree(logical); 208 } 209 return 0; 210 } 211 212 static int cache_block_group(struct btrfs_root *root, 213 struct btrfs_block_group_cache *block_group) 214 { 215 struct btrfs_path *path; 216 int ret = 0; 217 struct btrfs_key key; 218 struct extent_buffer *leaf; 219 int slot; 220 u64 last; 221 222 if (!block_group) 223 return 0; 224 225 root = root->fs_info->extent_root; 226 227 if (block_group->cached) 228 return 0; 229 230 path = btrfs_alloc_path(); 231 if (!path) 232 return -ENOMEM; 233 234 path->reada = 2; 235 /* 236 * we get into deadlocks with paths held by callers of this function. 237 * since the alloc_mutex is protecting things right now, just 238 * skip the locking here 239 */ 240 path->skip_locking = 1; 241 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 242 key.objectid = last; 243 key.offset = 0; 244 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 245 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 246 if (ret < 0) 247 goto err; 248 249 while (1) { 250 leaf = path->nodes[0]; 251 slot = path->slots[0]; 252 if (slot >= btrfs_header_nritems(leaf)) { 253 ret = btrfs_next_leaf(root, path); 254 if (ret < 0) 255 goto err; 256 if (ret == 0) 257 continue; 258 else 259 break; 260 } 261 btrfs_item_key_to_cpu(leaf, &key, slot); 262 if (key.objectid < block_group->key.objectid) 263 goto next; 264 265 if (key.objectid >= block_group->key.objectid + 266 block_group->key.offset) 267 break; 268 269 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 270 add_new_free_space(block_group, root->fs_info, last, 271 key.objectid); 272 273 last = key.objectid + key.offset; 274 } 275 next: 276 path->slots[0]++; 277 } 278 279 add_new_free_space(block_group, root->fs_info, last, 280 block_group->key.objectid + 281 block_group->key.offset); 282 283 remove_sb_from_cache(root, block_group); 284 block_group->cached = 1; 285 ret = 0; 286 err: 287 btrfs_free_path(path); 288 return ret; 289 } 290 291 /* 292 * return the block group that starts at or after bytenr 293 */ 294 static struct btrfs_block_group_cache * 295 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 296 { 297 struct btrfs_block_group_cache *cache; 298 299 cache = block_group_cache_tree_search(info, bytenr, 0); 300 301 return cache; 302 } 303 304 /* 305 * return the block group that contains teh given bytenr 306 */ 307 struct btrfs_block_group_cache *btrfs_lookup_block_group( 308 struct btrfs_fs_info *info, 309 u64 bytenr) 310 { 311 struct btrfs_block_group_cache *cache; 312 313 cache = block_group_cache_tree_search(info, bytenr, 1); 314 315 return cache; 316 } 317 318 static inline void put_block_group(struct btrfs_block_group_cache *cache) 319 { 320 if (atomic_dec_and_test(&cache->count)) 321 kfree(cache); 322 } 323 324 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 325 u64 flags) 326 { 327 struct list_head *head = &info->space_info; 328 struct btrfs_space_info *found; 329 list_for_each_entry(found, head, list) { 330 if (found->flags == flags) 331 return found; 332 } 333 return NULL; 334 } 335 336 static u64 div_factor(u64 num, int factor) 337 { 338 if (factor == 10) 339 return num; 340 num *= factor; 341 do_div(num, 10); 342 return num; 343 } 344 345 u64 btrfs_find_block_group(struct btrfs_root *root, 346 u64 search_start, u64 search_hint, int owner) 347 { 348 struct btrfs_block_group_cache *cache; 349 u64 used; 350 u64 last = max(search_hint, search_start); 351 u64 group_start = 0; 352 int full_search = 0; 353 int factor = 9; 354 int wrapped = 0; 355 again: 356 while (1) { 357 cache = btrfs_lookup_first_block_group(root->fs_info, last); 358 if (!cache) 359 break; 360 361 spin_lock(&cache->lock); 362 last = cache->key.objectid + cache->key.offset; 363 used = btrfs_block_group_used(&cache->item); 364 365 if ((full_search || !cache->ro) && 366 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 367 if (used + cache->pinned + cache->reserved < 368 div_factor(cache->key.offset, factor)) { 369 group_start = cache->key.objectid; 370 spin_unlock(&cache->lock); 371 put_block_group(cache); 372 goto found; 373 } 374 } 375 spin_unlock(&cache->lock); 376 put_block_group(cache); 377 cond_resched(); 378 } 379 if (!wrapped) { 380 last = search_start; 381 wrapped = 1; 382 goto again; 383 } 384 if (!full_search && factor < 10) { 385 last = search_start; 386 full_search = 1; 387 factor = 10; 388 goto again; 389 } 390 found: 391 return group_start; 392 } 393 394 /* simple helper to search for an existing extent at a given offset */ 395 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 396 { 397 int ret; 398 struct btrfs_key key; 399 struct btrfs_path *path; 400 401 path = btrfs_alloc_path(); 402 BUG_ON(!path); 403 key.objectid = start; 404 key.offset = len; 405 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 406 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 407 0, 0); 408 btrfs_free_path(path); 409 return ret; 410 } 411 412 /* 413 * Back reference rules. Back refs have three main goals: 414 * 415 * 1) differentiate between all holders of references to an extent so that 416 * when a reference is dropped we can make sure it was a valid reference 417 * before freeing the extent. 418 * 419 * 2) Provide enough information to quickly find the holders of an extent 420 * if we notice a given block is corrupted or bad. 421 * 422 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 423 * maintenance. This is actually the same as #2, but with a slightly 424 * different use case. 425 * 426 * File extents can be referenced by: 427 * 428 * - multiple snapshots, subvolumes, or different generations in one subvol 429 * - different files inside a single subvolume 430 * - different offsets inside a file (bookend extents in file.c) 431 * 432 * The extent ref structure has fields for: 433 * 434 * - Objectid of the subvolume root 435 * - Generation number of the tree holding the reference 436 * - objectid of the file holding the reference 437 * - number of references holding by parent node (alway 1 for tree blocks) 438 * 439 * Btree leaf may hold multiple references to a file extent. In most cases, 440 * these references are from same file and the corresponding offsets inside 441 * the file are close together. 442 * 443 * When a file extent is allocated the fields are filled in: 444 * (root_key.objectid, trans->transid, inode objectid, 1) 445 * 446 * When a leaf is cow'd new references are added for every file extent found 447 * in the leaf. It looks similar to the create case, but trans->transid will 448 * be different when the block is cow'd. 449 * 450 * (root_key.objectid, trans->transid, inode objectid, 451 * number of references in the leaf) 452 * 453 * When a file extent is removed either during snapshot deletion or 454 * file truncation, we find the corresponding back reference and check 455 * the following fields: 456 * 457 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), 458 * inode objectid) 459 * 460 * Btree extents can be referenced by: 461 * 462 * - Different subvolumes 463 * - Different generations of the same subvolume 464 * 465 * When a tree block is created, back references are inserted: 466 * 467 * (root->root_key.objectid, trans->transid, level, 1) 468 * 469 * When a tree block is cow'd, new back references are added for all the 470 * blocks it points to. If the tree block isn't in reference counted root, 471 * the old back references are removed. These new back references are of 472 * the form (trans->transid will have increased since creation): 473 * 474 * (root->root_key.objectid, trans->transid, level, 1) 475 * 476 * When a backref is in deleting, the following fields are checked: 477 * 478 * if backref was for a tree root: 479 * (btrfs_header_owner(itself), btrfs_header_generation(itself), level) 480 * else 481 * (btrfs_header_owner(parent), btrfs_header_generation(parent), level) 482 * 483 * Back Reference Key composing: 484 * 485 * The key objectid corresponds to the first byte in the extent, the key 486 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first 487 * byte of parent extent. If a extent is tree root, the key offset is set 488 * to the key objectid. 489 */ 490 491 static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans, 492 struct btrfs_root *root, 493 struct btrfs_path *path, 494 u64 bytenr, u64 parent, 495 u64 ref_root, u64 ref_generation, 496 u64 owner_objectid, int del) 497 { 498 struct btrfs_key key; 499 struct btrfs_extent_ref *ref; 500 struct extent_buffer *leaf; 501 u64 ref_objectid; 502 int ret; 503 504 key.objectid = bytenr; 505 key.type = BTRFS_EXTENT_REF_KEY; 506 key.offset = parent; 507 508 ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1); 509 if (ret < 0) 510 goto out; 511 if (ret > 0) { 512 ret = -ENOENT; 513 goto out; 514 } 515 516 leaf = path->nodes[0]; 517 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 518 ref_objectid = btrfs_ref_objectid(leaf, ref); 519 if (btrfs_ref_root(leaf, ref) != ref_root || 520 btrfs_ref_generation(leaf, ref) != ref_generation || 521 (ref_objectid != owner_objectid && 522 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { 523 ret = -EIO; 524 WARN_ON(1); 525 goto out; 526 } 527 ret = 0; 528 out: 529 return ret; 530 } 531 532 /* 533 * updates all the backrefs that are pending on update_list for the 534 * extent_root 535 */ 536 static noinline int update_backrefs(struct btrfs_trans_handle *trans, 537 struct btrfs_root *extent_root, 538 struct btrfs_path *path, 539 struct list_head *update_list) 540 { 541 struct btrfs_key key; 542 struct btrfs_extent_ref *ref; 543 struct btrfs_fs_info *info = extent_root->fs_info; 544 struct pending_extent_op *op; 545 struct extent_buffer *leaf; 546 int ret = 0; 547 struct list_head *cur = update_list->next; 548 u64 ref_objectid; 549 u64 ref_root = extent_root->root_key.objectid; 550 551 op = list_entry(cur, struct pending_extent_op, list); 552 553 search: 554 key.objectid = op->bytenr; 555 key.type = BTRFS_EXTENT_REF_KEY; 556 key.offset = op->orig_parent; 557 558 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1); 559 BUG_ON(ret); 560 561 leaf = path->nodes[0]; 562 563 loop: 564 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 565 566 ref_objectid = btrfs_ref_objectid(leaf, ref); 567 568 if (btrfs_ref_root(leaf, ref) != ref_root || 569 btrfs_ref_generation(leaf, ref) != op->orig_generation || 570 (ref_objectid != op->level && 571 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { 572 printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, " 573 "root %llu, owner %u\n", 574 (unsigned long long)op->bytenr, 575 (unsigned long long)op->orig_parent, 576 (unsigned long long)ref_root, op->level); 577 btrfs_print_leaf(extent_root, leaf); 578 BUG(); 579 } 580 581 key.objectid = op->bytenr; 582 key.offset = op->parent; 583 key.type = BTRFS_EXTENT_REF_KEY; 584 ret = btrfs_set_item_key_safe(trans, extent_root, path, &key); 585 BUG_ON(ret); 586 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 587 btrfs_set_ref_generation(leaf, ref, op->generation); 588 589 cur = cur->next; 590 591 list_del_init(&op->list); 592 unlock_extent(&info->extent_ins, op->bytenr, 593 op->bytenr + op->num_bytes - 1, GFP_NOFS); 594 kfree(op); 595 596 if (cur == update_list) { 597 btrfs_mark_buffer_dirty(path->nodes[0]); 598 btrfs_release_path(extent_root, path); 599 goto out; 600 } 601 602 op = list_entry(cur, struct pending_extent_op, list); 603 604 path->slots[0]++; 605 while (path->slots[0] < btrfs_header_nritems(leaf)) { 606 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 607 if (key.objectid == op->bytenr && 608 key.type == BTRFS_EXTENT_REF_KEY) 609 goto loop; 610 path->slots[0]++; 611 } 612 613 btrfs_mark_buffer_dirty(path->nodes[0]); 614 btrfs_release_path(extent_root, path); 615 goto search; 616 617 out: 618 return 0; 619 } 620 621 static noinline int insert_extents(struct btrfs_trans_handle *trans, 622 struct btrfs_root *extent_root, 623 struct btrfs_path *path, 624 struct list_head *insert_list, int nr) 625 { 626 struct btrfs_key *keys; 627 u32 *data_size; 628 struct pending_extent_op *op; 629 struct extent_buffer *leaf; 630 struct list_head *cur = insert_list->next; 631 struct btrfs_fs_info *info = extent_root->fs_info; 632 u64 ref_root = extent_root->root_key.objectid; 633 int i = 0, last = 0, ret; 634 int total = nr * 2; 635 636 if (!nr) 637 return 0; 638 639 keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS); 640 if (!keys) 641 return -ENOMEM; 642 643 data_size = kzalloc(total * sizeof(u32), GFP_NOFS); 644 if (!data_size) { 645 kfree(keys); 646 return -ENOMEM; 647 } 648 649 list_for_each_entry(op, insert_list, list) { 650 keys[i].objectid = op->bytenr; 651 keys[i].offset = op->num_bytes; 652 keys[i].type = BTRFS_EXTENT_ITEM_KEY; 653 data_size[i] = sizeof(struct btrfs_extent_item); 654 i++; 655 656 keys[i].objectid = op->bytenr; 657 keys[i].offset = op->parent; 658 keys[i].type = BTRFS_EXTENT_REF_KEY; 659 data_size[i] = sizeof(struct btrfs_extent_ref); 660 i++; 661 } 662 663 op = list_entry(cur, struct pending_extent_op, list); 664 i = 0; 665 while (i < total) { 666 int c; 667 ret = btrfs_insert_some_items(trans, extent_root, path, 668 keys+i, data_size+i, total-i); 669 BUG_ON(ret < 0); 670 671 if (last && ret > 1) 672 BUG(); 673 674 leaf = path->nodes[0]; 675 for (c = 0; c < ret; c++) { 676 int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY; 677 678 /* 679 * if the first item we inserted was a backref, then 680 * the EXTENT_ITEM will be the odd c's, else it will 681 * be the even c's 682 */ 683 if ((ref_first && (c % 2)) || 684 (!ref_first && !(c % 2))) { 685 struct btrfs_extent_item *itm; 686 687 itm = btrfs_item_ptr(leaf, path->slots[0] + c, 688 struct btrfs_extent_item); 689 btrfs_set_extent_refs(path->nodes[0], itm, 1); 690 op->del++; 691 } else { 692 struct btrfs_extent_ref *ref; 693 694 ref = btrfs_item_ptr(leaf, path->slots[0] + c, 695 struct btrfs_extent_ref); 696 btrfs_set_ref_root(leaf, ref, ref_root); 697 btrfs_set_ref_generation(leaf, ref, 698 op->generation); 699 btrfs_set_ref_objectid(leaf, ref, op->level); 700 btrfs_set_ref_num_refs(leaf, ref, 1); 701 op->del++; 702 } 703 704 /* 705 * using del to see when its ok to free up the 706 * pending_extent_op. In the case where we insert the 707 * last item on the list in order to help do batching 708 * we need to not free the extent op until we actually 709 * insert the extent_item 710 */ 711 if (op->del == 2) { 712 unlock_extent(&info->extent_ins, op->bytenr, 713 op->bytenr + op->num_bytes - 1, 714 GFP_NOFS); 715 cur = cur->next; 716 list_del_init(&op->list); 717 kfree(op); 718 if (cur != insert_list) 719 op = list_entry(cur, 720 struct pending_extent_op, 721 list); 722 } 723 } 724 btrfs_mark_buffer_dirty(leaf); 725 btrfs_release_path(extent_root, path); 726 727 /* 728 * Ok backref's and items usually go right next to eachother, 729 * but if we could only insert 1 item that means that we 730 * inserted on the end of a leaf, and we have no idea what may 731 * be on the next leaf so we just play it safe. In order to 732 * try and help this case we insert the last thing on our 733 * insert list so hopefully it will end up being the last 734 * thing on the leaf and everything else will be before it, 735 * which will let us insert a whole bunch of items at the same 736 * time. 737 */ 738 if (ret == 1 && !last && (i + ret < total)) { 739 /* 740 * last: where we will pick up the next time around 741 * i: our current key to insert, will be total - 1 742 * cur: the current op we are screwing with 743 * op: duh 744 */ 745 last = i + ret; 746 i = total - 1; 747 cur = insert_list->prev; 748 op = list_entry(cur, struct pending_extent_op, list); 749 } else if (last) { 750 /* 751 * ok we successfully inserted the last item on the 752 * list, lets reset everything 753 * 754 * i: our current key to insert, so where we left off 755 * last time 756 * last: done with this 757 * cur: the op we are messing with 758 * op: duh 759 * total: since we inserted the last key, we need to 760 * decrement total so we dont overflow 761 */ 762 i = last; 763 last = 0; 764 total--; 765 if (i < total) { 766 cur = insert_list->next; 767 op = list_entry(cur, struct pending_extent_op, 768 list); 769 } 770 } else { 771 i += ret; 772 } 773 774 cond_resched(); 775 } 776 ret = 0; 777 kfree(keys); 778 kfree(data_size); 779 return ret; 780 } 781 782 static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, 783 struct btrfs_root *root, 784 struct btrfs_path *path, 785 u64 bytenr, u64 parent, 786 u64 ref_root, u64 ref_generation, 787 u64 owner_objectid) 788 { 789 struct btrfs_key key; 790 struct extent_buffer *leaf; 791 struct btrfs_extent_ref *ref; 792 u32 num_refs; 793 int ret; 794 795 key.objectid = bytenr; 796 key.type = BTRFS_EXTENT_REF_KEY; 797 key.offset = parent; 798 799 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref)); 800 if (ret == 0) { 801 leaf = path->nodes[0]; 802 ref = btrfs_item_ptr(leaf, path->slots[0], 803 struct btrfs_extent_ref); 804 btrfs_set_ref_root(leaf, ref, ref_root); 805 btrfs_set_ref_generation(leaf, ref, ref_generation); 806 btrfs_set_ref_objectid(leaf, ref, owner_objectid); 807 btrfs_set_ref_num_refs(leaf, ref, 1); 808 } else if (ret == -EEXIST) { 809 u64 existing_owner; 810 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); 811 leaf = path->nodes[0]; 812 ref = btrfs_item_ptr(leaf, path->slots[0], 813 struct btrfs_extent_ref); 814 if (btrfs_ref_root(leaf, ref) != ref_root || 815 btrfs_ref_generation(leaf, ref) != ref_generation) { 816 ret = -EIO; 817 WARN_ON(1); 818 goto out; 819 } 820 821 num_refs = btrfs_ref_num_refs(leaf, ref); 822 BUG_ON(num_refs == 0); 823 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); 824 825 existing_owner = btrfs_ref_objectid(leaf, ref); 826 if (existing_owner != owner_objectid && 827 existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { 828 btrfs_set_ref_objectid(leaf, ref, 829 BTRFS_MULTIPLE_OBJECTIDS); 830 } 831 ret = 0; 832 } else { 833 goto out; 834 } 835 btrfs_mark_buffer_dirty(path->nodes[0]); 836 out: 837 btrfs_release_path(root, path); 838 return ret; 839 } 840 841 static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, 842 struct btrfs_root *root, 843 struct btrfs_path *path) 844 { 845 struct extent_buffer *leaf; 846 struct btrfs_extent_ref *ref; 847 u32 num_refs; 848 int ret = 0; 849 850 leaf = path->nodes[0]; 851 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 852 num_refs = btrfs_ref_num_refs(leaf, ref); 853 BUG_ON(num_refs == 0); 854 num_refs -= 1; 855 if (num_refs == 0) { 856 ret = btrfs_del_item(trans, root, path); 857 } else { 858 btrfs_set_ref_num_refs(leaf, ref, num_refs); 859 btrfs_mark_buffer_dirty(leaf); 860 } 861 btrfs_release_path(root, path); 862 return ret; 863 } 864 865 #ifdef BIO_RW_DISCARD 866 static void btrfs_issue_discard(struct block_device *bdev, 867 u64 start, u64 len) 868 { 869 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); 870 } 871 #endif 872 873 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 874 u64 num_bytes) 875 { 876 #ifdef BIO_RW_DISCARD 877 int ret; 878 u64 map_length = num_bytes; 879 struct btrfs_multi_bio *multi = NULL; 880 881 /* Tell the block device(s) that the sectors can be discarded */ 882 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 883 bytenr, &map_length, &multi, 0); 884 if (!ret) { 885 struct btrfs_bio_stripe *stripe = multi->stripes; 886 int i; 887 888 if (map_length > num_bytes) 889 map_length = num_bytes; 890 891 for (i = 0; i < multi->num_stripes; i++, stripe++) { 892 btrfs_issue_discard(stripe->dev->bdev, 893 stripe->physical, 894 map_length); 895 } 896 kfree(multi); 897 } 898 899 return ret; 900 #else 901 return 0; 902 #endif 903 } 904 905 static noinline int free_extents(struct btrfs_trans_handle *trans, 906 struct btrfs_root *extent_root, 907 struct list_head *del_list) 908 { 909 struct btrfs_fs_info *info = extent_root->fs_info; 910 struct btrfs_path *path; 911 struct btrfs_key key, found_key; 912 struct extent_buffer *leaf; 913 struct list_head *cur; 914 struct pending_extent_op *op; 915 struct btrfs_extent_item *ei; 916 int ret, num_to_del, extent_slot = 0, found_extent = 0; 917 u32 refs; 918 u64 bytes_freed = 0; 919 920 path = btrfs_alloc_path(); 921 if (!path) 922 return -ENOMEM; 923 path->reada = 1; 924 925 search: 926 /* search for the backref for the current ref we want to delete */ 927 cur = del_list->next; 928 op = list_entry(cur, struct pending_extent_op, list); 929 ret = lookup_extent_backref(trans, extent_root, path, op->bytenr, 930 op->orig_parent, 931 extent_root->root_key.objectid, 932 op->orig_generation, op->level, 1); 933 if (ret) { 934 printk(KERN_ERR "btrfs unable to find backref byte nr %llu " 935 "root %llu gen %llu owner %u\n", 936 (unsigned long long)op->bytenr, 937 (unsigned long long)extent_root->root_key.objectid, 938 (unsigned long long)op->orig_generation, op->level); 939 btrfs_print_leaf(extent_root, path->nodes[0]); 940 WARN_ON(1); 941 goto out; 942 } 943 944 extent_slot = path->slots[0]; 945 num_to_del = 1; 946 found_extent = 0; 947 948 /* 949 * if we aren't the first item on the leaf we can move back one and see 950 * if our ref is right next to our extent item 951 */ 952 if (likely(extent_slot)) { 953 extent_slot--; 954 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 955 extent_slot); 956 if (found_key.objectid == op->bytenr && 957 found_key.type == BTRFS_EXTENT_ITEM_KEY && 958 found_key.offset == op->num_bytes) { 959 num_to_del++; 960 found_extent = 1; 961 } 962 } 963 964 /* 965 * if we didn't find the extent we need to delete the backref and then 966 * search for the extent item key so we can update its ref count 967 */ 968 if (!found_extent) { 969 key.objectid = op->bytenr; 970 key.type = BTRFS_EXTENT_ITEM_KEY; 971 key.offset = op->num_bytes; 972 973 ret = remove_extent_backref(trans, extent_root, path); 974 BUG_ON(ret); 975 btrfs_release_path(extent_root, path); 976 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); 977 BUG_ON(ret); 978 extent_slot = path->slots[0]; 979 } 980 981 /* this is where we update the ref count for the extent */ 982 leaf = path->nodes[0]; 983 ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); 984 refs = btrfs_extent_refs(leaf, ei); 985 BUG_ON(refs == 0); 986 refs--; 987 btrfs_set_extent_refs(leaf, ei, refs); 988 989 btrfs_mark_buffer_dirty(leaf); 990 991 /* 992 * This extent needs deleting. The reason cur_slot is extent_slot + 993 * num_to_del is because extent_slot points to the slot where the extent 994 * is, and if the backref was not right next to the extent we will be 995 * deleting at least 1 item, and will want to start searching at the 996 * slot directly next to extent_slot. However if we did find the 997 * backref next to the extent item them we will be deleting at least 2 998 * items and will want to start searching directly after the ref slot 999 */ 1000 if (!refs) { 1001 struct list_head *pos, *n, *end; 1002 int cur_slot = extent_slot+num_to_del; 1003 u64 super_used; 1004 u64 root_used; 1005 1006 path->slots[0] = extent_slot; 1007 bytes_freed = op->num_bytes; 1008 1009 mutex_lock(&info->pinned_mutex); 1010 ret = pin_down_bytes(trans, extent_root, op->bytenr, 1011 op->num_bytes, op->level >= 1012 BTRFS_FIRST_FREE_OBJECTID); 1013 mutex_unlock(&info->pinned_mutex); 1014 BUG_ON(ret < 0); 1015 op->del = ret; 1016 1017 /* 1018 * we need to see if we can delete multiple things at once, so 1019 * start looping through the list of extents we are wanting to 1020 * delete and see if their extent/backref's are right next to 1021 * eachother and the extents only have 1 ref 1022 */ 1023 for (pos = cur->next; pos != del_list; pos = pos->next) { 1024 struct pending_extent_op *tmp; 1025 1026 tmp = list_entry(pos, struct pending_extent_op, list); 1027 1028 /* we only want to delete extent+ref at this stage */ 1029 if (cur_slot >= btrfs_header_nritems(leaf) - 1) 1030 break; 1031 1032 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot); 1033 if (found_key.objectid != tmp->bytenr || 1034 found_key.type != BTRFS_EXTENT_ITEM_KEY || 1035 found_key.offset != tmp->num_bytes) 1036 break; 1037 1038 /* check to make sure this extent only has one ref */ 1039 ei = btrfs_item_ptr(leaf, cur_slot, 1040 struct btrfs_extent_item); 1041 if (btrfs_extent_refs(leaf, ei) != 1) 1042 break; 1043 1044 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1); 1045 if (found_key.objectid != tmp->bytenr || 1046 found_key.type != BTRFS_EXTENT_REF_KEY || 1047 found_key.offset != tmp->orig_parent) 1048 break; 1049 1050 /* 1051 * the ref is right next to the extent, we can set the 1052 * ref count to 0 since we will delete them both now 1053 */ 1054 btrfs_set_extent_refs(leaf, ei, 0); 1055 1056 /* pin down the bytes for this extent */ 1057 mutex_lock(&info->pinned_mutex); 1058 ret = pin_down_bytes(trans, extent_root, tmp->bytenr, 1059 tmp->num_bytes, tmp->level >= 1060 BTRFS_FIRST_FREE_OBJECTID); 1061 mutex_unlock(&info->pinned_mutex); 1062 BUG_ON(ret < 0); 1063 1064 /* 1065 * use the del field to tell if we need to go ahead and 1066 * free up the extent when we delete the item or not. 1067 */ 1068 tmp->del = ret; 1069 bytes_freed += tmp->num_bytes; 1070 1071 num_to_del += 2; 1072 cur_slot += 2; 1073 } 1074 end = pos; 1075 1076 /* update the free space counters */ 1077 spin_lock(&info->delalloc_lock); 1078 super_used = btrfs_super_bytes_used(&info->super_copy); 1079 btrfs_set_super_bytes_used(&info->super_copy, 1080 super_used - bytes_freed); 1081 1082 root_used = btrfs_root_used(&extent_root->root_item); 1083 btrfs_set_root_used(&extent_root->root_item, 1084 root_used - bytes_freed); 1085 spin_unlock(&info->delalloc_lock); 1086 1087 /* delete the items */ 1088 ret = btrfs_del_items(trans, extent_root, path, 1089 path->slots[0], num_to_del); 1090 BUG_ON(ret); 1091 1092 /* 1093 * loop through the extents we deleted and do the cleanup work 1094 * on them 1095 */ 1096 for (pos = cur, n = pos->next; pos != end; 1097 pos = n, n = pos->next) { 1098 struct pending_extent_op *tmp; 1099 tmp = list_entry(pos, struct pending_extent_op, list); 1100 1101 /* 1102 * remember tmp->del tells us wether or not we pinned 1103 * down the extent 1104 */ 1105 ret = update_block_group(trans, extent_root, 1106 tmp->bytenr, tmp->num_bytes, 0, 1107 tmp->del); 1108 BUG_ON(ret); 1109 1110 list_del_init(&tmp->list); 1111 unlock_extent(&info->extent_ins, tmp->bytenr, 1112 tmp->bytenr + tmp->num_bytes - 1, 1113 GFP_NOFS); 1114 kfree(tmp); 1115 } 1116 } else if (refs && found_extent) { 1117 /* 1118 * the ref and extent were right next to eachother, but the 1119 * extent still has a ref, so just free the backref and keep 1120 * going 1121 */ 1122 ret = remove_extent_backref(trans, extent_root, path); 1123 BUG_ON(ret); 1124 1125 list_del_init(&op->list); 1126 unlock_extent(&info->extent_ins, op->bytenr, 1127 op->bytenr + op->num_bytes - 1, GFP_NOFS); 1128 kfree(op); 1129 } else { 1130 /* 1131 * the extent has multiple refs and the backref we were looking 1132 * for was not right next to it, so just unlock and go next, 1133 * we're good to go 1134 */ 1135 list_del_init(&op->list); 1136 unlock_extent(&info->extent_ins, op->bytenr, 1137 op->bytenr + op->num_bytes - 1, GFP_NOFS); 1138 kfree(op); 1139 } 1140 1141 btrfs_release_path(extent_root, path); 1142 if (!list_empty(del_list)) 1143 goto search; 1144 1145 out: 1146 btrfs_free_path(path); 1147 return ret; 1148 } 1149 1150 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1151 struct btrfs_root *root, u64 bytenr, 1152 u64 orig_parent, u64 parent, 1153 u64 orig_root, u64 ref_root, 1154 u64 orig_generation, u64 ref_generation, 1155 u64 owner_objectid) 1156 { 1157 int ret; 1158 struct btrfs_root *extent_root = root->fs_info->extent_root; 1159 struct btrfs_path *path; 1160 1161 if (root == root->fs_info->extent_root) { 1162 struct pending_extent_op *extent_op; 1163 u64 num_bytes; 1164 1165 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); 1166 num_bytes = btrfs_level_size(root, (int)owner_objectid); 1167 mutex_lock(&root->fs_info->extent_ins_mutex); 1168 if (test_range_bit(&root->fs_info->extent_ins, bytenr, 1169 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { 1170 u64 priv; 1171 ret = get_state_private(&root->fs_info->extent_ins, 1172 bytenr, &priv); 1173 BUG_ON(ret); 1174 extent_op = (struct pending_extent_op *) 1175 (unsigned long)priv; 1176 BUG_ON(extent_op->parent != orig_parent); 1177 BUG_ON(extent_op->generation != orig_generation); 1178 1179 extent_op->parent = parent; 1180 extent_op->generation = ref_generation; 1181 } else { 1182 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 1183 BUG_ON(!extent_op); 1184 1185 extent_op->type = PENDING_BACKREF_UPDATE; 1186 extent_op->bytenr = bytenr; 1187 extent_op->num_bytes = num_bytes; 1188 extent_op->parent = parent; 1189 extent_op->orig_parent = orig_parent; 1190 extent_op->generation = ref_generation; 1191 extent_op->orig_generation = orig_generation; 1192 extent_op->level = (int)owner_objectid; 1193 INIT_LIST_HEAD(&extent_op->list); 1194 extent_op->del = 0; 1195 1196 set_extent_bits(&root->fs_info->extent_ins, 1197 bytenr, bytenr + num_bytes - 1, 1198 EXTENT_WRITEBACK, GFP_NOFS); 1199 set_state_private(&root->fs_info->extent_ins, 1200 bytenr, (unsigned long)extent_op); 1201 } 1202 mutex_unlock(&root->fs_info->extent_ins_mutex); 1203 return 0; 1204 } 1205 1206 path = btrfs_alloc_path(); 1207 if (!path) 1208 return -ENOMEM; 1209 ret = lookup_extent_backref(trans, extent_root, path, 1210 bytenr, orig_parent, orig_root, 1211 orig_generation, owner_objectid, 1); 1212 if (ret) 1213 goto out; 1214 ret = remove_extent_backref(trans, extent_root, path); 1215 if (ret) 1216 goto out; 1217 ret = insert_extent_backref(trans, extent_root, path, bytenr, 1218 parent, ref_root, ref_generation, 1219 owner_objectid); 1220 BUG_ON(ret); 1221 finish_current_insert(trans, extent_root, 0); 1222 del_pending_extents(trans, extent_root, 0); 1223 out: 1224 btrfs_free_path(path); 1225 return ret; 1226 } 1227 1228 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1229 struct btrfs_root *root, u64 bytenr, 1230 u64 orig_parent, u64 parent, 1231 u64 ref_root, u64 ref_generation, 1232 u64 owner_objectid) 1233 { 1234 int ret; 1235 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 1236 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 1237 return 0; 1238 ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, 1239 parent, ref_root, ref_root, 1240 ref_generation, ref_generation, 1241 owner_objectid); 1242 return ret; 1243 } 1244 1245 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1246 struct btrfs_root *root, u64 bytenr, 1247 u64 orig_parent, u64 parent, 1248 u64 orig_root, u64 ref_root, 1249 u64 orig_generation, u64 ref_generation, 1250 u64 owner_objectid) 1251 { 1252 struct btrfs_path *path; 1253 int ret; 1254 struct btrfs_key key; 1255 struct extent_buffer *l; 1256 struct btrfs_extent_item *item; 1257 u32 refs; 1258 1259 path = btrfs_alloc_path(); 1260 if (!path) 1261 return -ENOMEM; 1262 1263 path->reada = 1; 1264 key.objectid = bytenr; 1265 key.type = BTRFS_EXTENT_ITEM_KEY; 1266 key.offset = (u64)-1; 1267 1268 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 1269 0, 1); 1270 if (ret < 0) 1271 return ret; 1272 BUG_ON(ret == 0 || path->slots[0] == 0); 1273 1274 path->slots[0]--; 1275 l = path->nodes[0]; 1276 1277 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 1278 if (key.objectid != bytenr) { 1279 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]); 1280 printk(KERN_ERR "btrfs wanted %llu found %llu\n", 1281 (unsigned long long)bytenr, 1282 (unsigned long long)key.objectid); 1283 BUG(); 1284 } 1285 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); 1286 1287 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 1288 refs = btrfs_extent_refs(l, item); 1289 btrfs_set_extent_refs(l, item, refs + 1); 1290 btrfs_mark_buffer_dirty(path->nodes[0]); 1291 1292 btrfs_release_path(root->fs_info->extent_root, path); 1293 1294 path->reada = 1; 1295 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1296 path, bytenr, parent, 1297 ref_root, ref_generation, 1298 owner_objectid); 1299 BUG_ON(ret); 1300 finish_current_insert(trans, root->fs_info->extent_root, 0); 1301 del_pending_extents(trans, root->fs_info->extent_root, 0); 1302 1303 btrfs_free_path(path); 1304 return 0; 1305 } 1306 1307 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1308 struct btrfs_root *root, 1309 u64 bytenr, u64 num_bytes, u64 parent, 1310 u64 ref_root, u64 ref_generation, 1311 u64 owner_objectid) 1312 { 1313 int ret; 1314 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 1315 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 1316 return 0; 1317 ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, 1318 0, ref_root, 0, ref_generation, 1319 owner_objectid); 1320 return ret; 1321 } 1322 1323 int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 1324 struct btrfs_root *root) 1325 { 1326 finish_current_insert(trans, root->fs_info->extent_root, 1); 1327 del_pending_extents(trans, root->fs_info->extent_root, 1); 1328 return 0; 1329 } 1330 1331 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 1332 struct btrfs_root *root, u64 bytenr, 1333 u64 num_bytes, u32 *refs) 1334 { 1335 struct btrfs_path *path; 1336 int ret; 1337 struct btrfs_key key; 1338 struct extent_buffer *l; 1339 struct btrfs_extent_item *item; 1340 1341 WARN_ON(num_bytes < root->sectorsize); 1342 path = btrfs_alloc_path(); 1343 path->reada = 1; 1344 key.objectid = bytenr; 1345 key.offset = num_bytes; 1346 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 1347 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 1348 0, 0); 1349 if (ret < 0) 1350 goto out; 1351 if (ret != 0) { 1352 btrfs_print_leaf(root, path->nodes[0]); 1353 printk(KERN_INFO "btrfs failed to find block number %llu\n", 1354 (unsigned long long)bytenr); 1355 BUG(); 1356 } 1357 l = path->nodes[0]; 1358 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 1359 *refs = btrfs_extent_refs(l, item); 1360 out: 1361 btrfs_free_path(path); 1362 return 0; 1363 } 1364 1365 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1366 struct btrfs_root *root, u64 objectid, u64 bytenr) 1367 { 1368 struct btrfs_root *extent_root = root->fs_info->extent_root; 1369 struct btrfs_path *path; 1370 struct extent_buffer *leaf; 1371 struct btrfs_extent_ref *ref_item; 1372 struct btrfs_key key; 1373 struct btrfs_key found_key; 1374 u64 ref_root; 1375 u64 last_snapshot; 1376 u32 nritems; 1377 int ret; 1378 1379 key.objectid = bytenr; 1380 key.offset = (u64)-1; 1381 key.type = BTRFS_EXTENT_ITEM_KEY; 1382 1383 path = btrfs_alloc_path(); 1384 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 1385 if (ret < 0) 1386 goto out; 1387 BUG_ON(ret == 0); 1388 1389 ret = -ENOENT; 1390 if (path->slots[0] == 0) 1391 goto out; 1392 1393 path->slots[0]--; 1394 leaf = path->nodes[0]; 1395 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1396 1397 if (found_key.objectid != bytenr || 1398 found_key.type != BTRFS_EXTENT_ITEM_KEY) 1399 goto out; 1400 1401 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1402 while (1) { 1403 leaf = path->nodes[0]; 1404 nritems = btrfs_header_nritems(leaf); 1405 if (path->slots[0] >= nritems) { 1406 ret = btrfs_next_leaf(extent_root, path); 1407 if (ret < 0) 1408 goto out; 1409 if (ret == 0) 1410 continue; 1411 break; 1412 } 1413 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1414 if (found_key.objectid != bytenr) 1415 break; 1416 1417 if (found_key.type != BTRFS_EXTENT_REF_KEY) { 1418 path->slots[0]++; 1419 continue; 1420 } 1421 1422 ref_item = btrfs_item_ptr(leaf, path->slots[0], 1423 struct btrfs_extent_ref); 1424 ref_root = btrfs_ref_root(leaf, ref_item); 1425 if ((ref_root != root->root_key.objectid && 1426 ref_root != BTRFS_TREE_LOG_OBJECTID) || 1427 objectid != btrfs_ref_objectid(leaf, ref_item)) { 1428 ret = 1; 1429 goto out; 1430 } 1431 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) { 1432 ret = 1; 1433 goto out; 1434 } 1435 1436 path->slots[0]++; 1437 } 1438 ret = 0; 1439 out: 1440 btrfs_free_path(path); 1441 return ret; 1442 } 1443 1444 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1445 struct extent_buffer *buf, u32 nr_extents) 1446 { 1447 struct btrfs_key key; 1448 struct btrfs_file_extent_item *fi; 1449 u64 root_gen; 1450 u32 nritems; 1451 int i; 1452 int level; 1453 int ret = 0; 1454 int shared = 0; 1455 1456 if (!root->ref_cows) 1457 return 0; 1458 1459 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1460 shared = 0; 1461 root_gen = root->root_key.offset; 1462 } else { 1463 shared = 1; 1464 root_gen = trans->transid - 1; 1465 } 1466 1467 level = btrfs_header_level(buf); 1468 nritems = btrfs_header_nritems(buf); 1469 1470 if (level == 0) { 1471 struct btrfs_leaf_ref *ref; 1472 struct btrfs_extent_info *info; 1473 1474 ref = btrfs_alloc_leaf_ref(root, nr_extents); 1475 if (!ref) { 1476 ret = -ENOMEM; 1477 goto out; 1478 } 1479 1480 ref->root_gen = root_gen; 1481 ref->bytenr = buf->start; 1482 ref->owner = btrfs_header_owner(buf); 1483 ref->generation = btrfs_header_generation(buf); 1484 ref->nritems = nr_extents; 1485 info = ref->extents; 1486 1487 for (i = 0; nr_extents > 0 && i < nritems; i++) { 1488 u64 disk_bytenr; 1489 btrfs_item_key_to_cpu(buf, &key, i); 1490 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1491 continue; 1492 fi = btrfs_item_ptr(buf, i, 1493 struct btrfs_file_extent_item); 1494 if (btrfs_file_extent_type(buf, fi) == 1495 BTRFS_FILE_EXTENT_INLINE) 1496 continue; 1497 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 1498 if (disk_bytenr == 0) 1499 continue; 1500 1501 info->bytenr = disk_bytenr; 1502 info->num_bytes = 1503 btrfs_file_extent_disk_num_bytes(buf, fi); 1504 info->objectid = key.objectid; 1505 info->offset = key.offset; 1506 info++; 1507 } 1508 1509 ret = btrfs_add_leaf_ref(root, ref, shared); 1510 if (ret == -EEXIST && shared) { 1511 struct btrfs_leaf_ref *old; 1512 old = btrfs_lookup_leaf_ref(root, ref->bytenr); 1513 BUG_ON(!old); 1514 btrfs_remove_leaf_ref(root, old); 1515 btrfs_free_leaf_ref(root, old); 1516 ret = btrfs_add_leaf_ref(root, ref, shared); 1517 } 1518 WARN_ON(ret); 1519 btrfs_free_leaf_ref(root, ref); 1520 } 1521 out: 1522 return ret; 1523 } 1524 1525 /* when a block goes through cow, we update the reference counts of 1526 * everything that block points to. The internal pointers of the block 1527 * can be in just about any order, and it is likely to have clusters of 1528 * things that are close together and clusters of things that are not. 1529 * 1530 * To help reduce the seeks that come with updating all of these reference 1531 * counts, sort them by byte number before actual updates are done. 1532 * 1533 * struct refsort is used to match byte number to slot in the btree block. 1534 * we sort based on the byte number and then use the slot to actually 1535 * find the item. 1536 * 1537 * struct refsort is smaller than strcut btrfs_item and smaller than 1538 * struct btrfs_key_ptr. Since we're currently limited to the page size 1539 * for a btree block, there's no way for a kmalloc of refsorts for a 1540 * single node to be bigger than a page. 1541 */ 1542 struct refsort { 1543 u64 bytenr; 1544 u32 slot; 1545 }; 1546 1547 /* 1548 * for passing into sort() 1549 */ 1550 static int refsort_cmp(const void *a_void, const void *b_void) 1551 { 1552 const struct refsort *a = a_void; 1553 const struct refsort *b = b_void; 1554 1555 if (a->bytenr < b->bytenr) 1556 return -1; 1557 if (a->bytenr > b->bytenr) 1558 return 1; 1559 return 0; 1560 } 1561 1562 1563 noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, 1564 struct btrfs_root *root, 1565 struct extent_buffer *orig_buf, 1566 struct extent_buffer *buf, u32 *nr_extents) 1567 { 1568 u64 bytenr; 1569 u64 ref_root; 1570 u64 orig_root; 1571 u64 ref_generation; 1572 u64 orig_generation; 1573 struct refsort *sorted; 1574 u32 nritems; 1575 u32 nr_file_extents = 0; 1576 struct btrfs_key key; 1577 struct btrfs_file_extent_item *fi; 1578 int i; 1579 int level; 1580 int ret = 0; 1581 int faili = 0; 1582 int refi = 0; 1583 int slot; 1584 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 1585 u64, u64, u64, u64, u64, u64, u64, u64); 1586 1587 ref_root = btrfs_header_owner(buf); 1588 ref_generation = btrfs_header_generation(buf); 1589 orig_root = btrfs_header_owner(orig_buf); 1590 orig_generation = btrfs_header_generation(orig_buf); 1591 1592 nritems = btrfs_header_nritems(buf); 1593 level = btrfs_header_level(buf); 1594 1595 sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS); 1596 BUG_ON(!sorted); 1597 1598 if (root->ref_cows) { 1599 process_func = __btrfs_inc_extent_ref; 1600 } else { 1601 if (level == 0 && 1602 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 1603 goto out; 1604 if (level != 0 && 1605 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) 1606 goto out; 1607 process_func = __btrfs_update_extent_ref; 1608 } 1609 1610 /* 1611 * we make two passes through the items. In the first pass we 1612 * only record the byte number and slot. Then we sort based on 1613 * byte number and do the actual work based on the sorted results 1614 */ 1615 for (i = 0; i < nritems; i++) { 1616 cond_resched(); 1617 if (level == 0) { 1618 btrfs_item_key_to_cpu(buf, &key, i); 1619 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1620 continue; 1621 fi = btrfs_item_ptr(buf, i, 1622 struct btrfs_file_extent_item); 1623 if (btrfs_file_extent_type(buf, fi) == 1624 BTRFS_FILE_EXTENT_INLINE) 1625 continue; 1626 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 1627 if (bytenr == 0) 1628 continue; 1629 1630 nr_file_extents++; 1631 sorted[refi].bytenr = bytenr; 1632 sorted[refi].slot = i; 1633 refi++; 1634 } else { 1635 bytenr = btrfs_node_blockptr(buf, i); 1636 sorted[refi].bytenr = bytenr; 1637 sorted[refi].slot = i; 1638 refi++; 1639 } 1640 } 1641 /* 1642 * if refi == 0, we didn't actually put anything into the sorted 1643 * array and we're done 1644 */ 1645 if (refi == 0) 1646 goto out; 1647 1648 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 1649 1650 for (i = 0; i < refi; i++) { 1651 cond_resched(); 1652 slot = sorted[i].slot; 1653 bytenr = sorted[i].bytenr; 1654 1655 if (level == 0) { 1656 btrfs_item_key_to_cpu(buf, &key, slot); 1657 1658 ret = process_func(trans, root, bytenr, 1659 orig_buf->start, buf->start, 1660 orig_root, ref_root, 1661 orig_generation, ref_generation, 1662 key.objectid); 1663 1664 if (ret) { 1665 faili = slot; 1666 WARN_ON(1); 1667 goto fail; 1668 } 1669 } else { 1670 ret = process_func(trans, root, bytenr, 1671 orig_buf->start, buf->start, 1672 orig_root, ref_root, 1673 orig_generation, ref_generation, 1674 level - 1); 1675 if (ret) { 1676 faili = slot; 1677 WARN_ON(1); 1678 goto fail; 1679 } 1680 } 1681 } 1682 out: 1683 kfree(sorted); 1684 if (nr_extents) { 1685 if (level == 0) 1686 *nr_extents = nr_file_extents; 1687 else 1688 *nr_extents = nritems; 1689 } 1690 return 0; 1691 fail: 1692 kfree(sorted); 1693 WARN_ON(1); 1694 return ret; 1695 } 1696 1697 int btrfs_update_ref(struct btrfs_trans_handle *trans, 1698 struct btrfs_root *root, struct extent_buffer *orig_buf, 1699 struct extent_buffer *buf, int start_slot, int nr) 1700 1701 { 1702 u64 bytenr; 1703 u64 ref_root; 1704 u64 orig_root; 1705 u64 ref_generation; 1706 u64 orig_generation; 1707 struct btrfs_key key; 1708 struct btrfs_file_extent_item *fi; 1709 int i; 1710 int ret; 1711 int slot; 1712 int level; 1713 1714 BUG_ON(start_slot < 0); 1715 BUG_ON(start_slot + nr > btrfs_header_nritems(buf)); 1716 1717 ref_root = btrfs_header_owner(buf); 1718 ref_generation = btrfs_header_generation(buf); 1719 orig_root = btrfs_header_owner(orig_buf); 1720 orig_generation = btrfs_header_generation(orig_buf); 1721 level = btrfs_header_level(buf); 1722 1723 if (!root->ref_cows) { 1724 if (level == 0 && 1725 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 1726 return 0; 1727 if (level != 0 && 1728 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) 1729 return 0; 1730 } 1731 1732 for (i = 0, slot = start_slot; i < nr; i++, slot++) { 1733 cond_resched(); 1734 if (level == 0) { 1735 btrfs_item_key_to_cpu(buf, &key, slot); 1736 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1737 continue; 1738 fi = btrfs_item_ptr(buf, slot, 1739 struct btrfs_file_extent_item); 1740 if (btrfs_file_extent_type(buf, fi) == 1741 BTRFS_FILE_EXTENT_INLINE) 1742 continue; 1743 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 1744 if (bytenr == 0) 1745 continue; 1746 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1747 orig_buf->start, buf->start, 1748 orig_root, ref_root, 1749 orig_generation, ref_generation, 1750 key.objectid); 1751 if (ret) 1752 goto fail; 1753 } else { 1754 bytenr = btrfs_node_blockptr(buf, slot); 1755 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1756 orig_buf->start, buf->start, 1757 orig_root, ref_root, 1758 orig_generation, ref_generation, 1759 level - 1); 1760 if (ret) 1761 goto fail; 1762 } 1763 } 1764 return 0; 1765 fail: 1766 WARN_ON(1); 1767 return -1; 1768 } 1769 1770 static int write_one_cache_group(struct btrfs_trans_handle *trans, 1771 struct btrfs_root *root, 1772 struct btrfs_path *path, 1773 struct btrfs_block_group_cache *cache) 1774 { 1775 int ret; 1776 int pending_ret; 1777 struct btrfs_root *extent_root = root->fs_info->extent_root; 1778 unsigned long bi; 1779 struct extent_buffer *leaf; 1780 1781 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 1782 if (ret < 0) 1783 goto fail; 1784 BUG_ON(ret); 1785 1786 leaf = path->nodes[0]; 1787 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 1788 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 1789 btrfs_mark_buffer_dirty(leaf); 1790 btrfs_release_path(extent_root, path); 1791 fail: 1792 finish_current_insert(trans, extent_root, 0); 1793 pending_ret = del_pending_extents(trans, extent_root, 0); 1794 if (ret) 1795 return ret; 1796 if (pending_ret) 1797 return pending_ret; 1798 return 0; 1799 1800 } 1801 1802 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 1803 struct btrfs_root *root) 1804 { 1805 struct btrfs_block_group_cache *cache, *entry; 1806 struct rb_node *n; 1807 int err = 0; 1808 int werr = 0; 1809 struct btrfs_path *path; 1810 u64 last = 0; 1811 1812 path = btrfs_alloc_path(); 1813 if (!path) 1814 return -ENOMEM; 1815 1816 while (1) { 1817 cache = NULL; 1818 spin_lock(&root->fs_info->block_group_cache_lock); 1819 for (n = rb_first(&root->fs_info->block_group_cache_tree); 1820 n; n = rb_next(n)) { 1821 entry = rb_entry(n, struct btrfs_block_group_cache, 1822 cache_node); 1823 if (entry->dirty) { 1824 cache = entry; 1825 break; 1826 } 1827 } 1828 spin_unlock(&root->fs_info->block_group_cache_lock); 1829 1830 if (!cache) 1831 break; 1832 1833 cache->dirty = 0; 1834 last += cache->key.offset; 1835 1836 err = write_one_cache_group(trans, root, 1837 path, cache); 1838 /* 1839 * if we fail to write the cache group, we want 1840 * to keep it marked dirty in hopes that a later 1841 * write will work 1842 */ 1843 if (err) { 1844 werr = err; 1845 continue; 1846 } 1847 } 1848 btrfs_free_path(path); 1849 return werr; 1850 } 1851 1852 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 1853 { 1854 struct btrfs_block_group_cache *block_group; 1855 int readonly = 0; 1856 1857 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 1858 if (!block_group || block_group->ro) 1859 readonly = 1; 1860 if (block_group) 1861 put_block_group(block_group); 1862 return readonly; 1863 } 1864 1865 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 1866 u64 total_bytes, u64 bytes_used, 1867 struct btrfs_space_info **space_info) 1868 { 1869 struct btrfs_space_info *found; 1870 1871 found = __find_space_info(info, flags); 1872 if (found) { 1873 spin_lock(&found->lock); 1874 found->total_bytes += total_bytes; 1875 found->bytes_used += bytes_used; 1876 found->full = 0; 1877 spin_unlock(&found->lock); 1878 *space_info = found; 1879 return 0; 1880 } 1881 found = kzalloc(sizeof(*found), GFP_NOFS); 1882 if (!found) 1883 return -ENOMEM; 1884 1885 list_add(&found->list, &info->space_info); 1886 INIT_LIST_HEAD(&found->block_groups); 1887 init_rwsem(&found->groups_sem); 1888 spin_lock_init(&found->lock); 1889 found->flags = flags; 1890 found->total_bytes = total_bytes; 1891 found->bytes_used = bytes_used; 1892 found->bytes_pinned = 0; 1893 found->bytes_reserved = 0; 1894 found->bytes_readonly = 0; 1895 found->full = 0; 1896 found->force_alloc = 0; 1897 *space_info = found; 1898 return 0; 1899 } 1900 1901 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 1902 { 1903 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 1904 BTRFS_BLOCK_GROUP_RAID1 | 1905 BTRFS_BLOCK_GROUP_RAID10 | 1906 BTRFS_BLOCK_GROUP_DUP); 1907 if (extra_flags) { 1908 if (flags & BTRFS_BLOCK_GROUP_DATA) 1909 fs_info->avail_data_alloc_bits |= extra_flags; 1910 if (flags & BTRFS_BLOCK_GROUP_METADATA) 1911 fs_info->avail_metadata_alloc_bits |= extra_flags; 1912 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 1913 fs_info->avail_system_alloc_bits |= extra_flags; 1914 } 1915 } 1916 1917 static void set_block_group_readonly(struct btrfs_block_group_cache *cache) 1918 { 1919 spin_lock(&cache->space_info->lock); 1920 spin_lock(&cache->lock); 1921 if (!cache->ro) { 1922 cache->space_info->bytes_readonly += cache->key.offset - 1923 btrfs_block_group_used(&cache->item); 1924 cache->ro = 1; 1925 } 1926 spin_unlock(&cache->lock); 1927 spin_unlock(&cache->space_info->lock); 1928 } 1929 1930 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 1931 { 1932 u64 num_devices = root->fs_info->fs_devices->rw_devices; 1933 1934 if (num_devices == 1) 1935 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 1936 if (num_devices < 4) 1937 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 1938 1939 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 1940 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 1941 BTRFS_BLOCK_GROUP_RAID10))) { 1942 flags &= ~BTRFS_BLOCK_GROUP_DUP; 1943 } 1944 1945 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 1946 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 1947 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 1948 } 1949 1950 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 1951 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 1952 (flags & BTRFS_BLOCK_GROUP_RAID10) | 1953 (flags & BTRFS_BLOCK_GROUP_DUP))) 1954 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 1955 return flags; 1956 } 1957 1958 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 1959 struct btrfs_root *extent_root, u64 alloc_bytes, 1960 u64 flags, int force) 1961 { 1962 struct btrfs_space_info *space_info; 1963 u64 thresh; 1964 int ret = 0; 1965 1966 mutex_lock(&extent_root->fs_info->chunk_mutex); 1967 1968 flags = btrfs_reduce_alloc_profile(extent_root, flags); 1969 1970 space_info = __find_space_info(extent_root->fs_info, flags); 1971 if (!space_info) { 1972 ret = update_space_info(extent_root->fs_info, flags, 1973 0, 0, &space_info); 1974 BUG_ON(ret); 1975 } 1976 BUG_ON(!space_info); 1977 1978 spin_lock(&space_info->lock); 1979 if (space_info->force_alloc) { 1980 force = 1; 1981 space_info->force_alloc = 0; 1982 } 1983 if (space_info->full) { 1984 spin_unlock(&space_info->lock); 1985 goto out; 1986 } 1987 1988 thresh = space_info->total_bytes - space_info->bytes_readonly; 1989 thresh = div_factor(thresh, 6); 1990 if (!force && 1991 (space_info->bytes_used + space_info->bytes_pinned + 1992 space_info->bytes_reserved + alloc_bytes) < thresh) { 1993 spin_unlock(&space_info->lock); 1994 goto out; 1995 } 1996 spin_unlock(&space_info->lock); 1997 1998 ret = btrfs_alloc_chunk(trans, extent_root, flags); 1999 if (ret) 2000 space_info->full = 1; 2001 out: 2002 mutex_unlock(&extent_root->fs_info->chunk_mutex); 2003 return ret; 2004 } 2005 2006 static int update_block_group(struct btrfs_trans_handle *trans, 2007 struct btrfs_root *root, 2008 u64 bytenr, u64 num_bytes, int alloc, 2009 int mark_free) 2010 { 2011 struct btrfs_block_group_cache *cache; 2012 struct btrfs_fs_info *info = root->fs_info; 2013 u64 total = num_bytes; 2014 u64 old_val; 2015 u64 byte_in_group; 2016 2017 while (total) { 2018 cache = btrfs_lookup_block_group(info, bytenr); 2019 if (!cache) 2020 return -1; 2021 byte_in_group = bytenr - cache->key.objectid; 2022 WARN_ON(byte_in_group > cache->key.offset); 2023 2024 spin_lock(&cache->space_info->lock); 2025 spin_lock(&cache->lock); 2026 cache->dirty = 1; 2027 old_val = btrfs_block_group_used(&cache->item); 2028 num_bytes = min(total, cache->key.offset - byte_in_group); 2029 if (alloc) { 2030 old_val += num_bytes; 2031 cache->space_info->bytes_used += num_bytes; 2032 if (cache->ro) 2033 cache->space_info->bytes_readonly -= num_bytes; 2034 btrfs_set_block_group_used(&cache->item, old_val); 2035 spin_unlock(&cache->lock); 2036 spin_unlock(&cache->space_info->lock); 2037 } else { 2038 old_val -= num_bytes; 2039 cache->space_info->bytes_used -= num_bytes; 2040 if (cache->ro) 2041 cache->space_info->bytes_readonly += num_bytes; 2042 btrfs_set_block_group_used(&cache->item, old_val); 2043 spin_unlock(&cache->lock); 2044 spin_unlock(&cache->space_info->lock); 2045 if (mark_free) { 2046 int ret; 2047 2048 ret = btrfs_discard_extent(root, bytenr, 2049 num_bytes); 2050 WARN_ON(ret); 2051 2052 ret = btrfs_add_free_space(cache, bytenr, 2053 num_bytes); 2054 WARN_ON(ret); 2055 } 2056 } 2057 put_block_group(cache); 2058 total -= num_bytes; 2059 bytenr += num_bytes; 2060 } 2061 return 0; 2062 } 2063 2064 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 2065 { 2066 struct btrfs_block_group_cache *cache; 2067 u64 bytenr; 2068 2069 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 2070 if (!cache) 2071 return 0; 2072 2073 bytenr = cache->key.objectid; 2074 put_block_group(cache); 2075 2076 return bytenr; 2077 } 2078 2079 int btrfs_update_pinned_extents(struct btrfs_root *root, 2080 u64 bytenr, u64 num, int pin) 2081 { 2082 u64 len; 2083 struct btrfs_block_group_cache *cache; 2084 struct btrfs_fs_info *fs_info = root->fs_info; 2085 2086 WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex)); 2087 if (pin) { 2088 set_extent_dirty(&fs_info->pinned_extents, 2089 bytenr, bytenr + num - 1, GFP_NOFS); 2090 } else { 2091 clear_extent_dirty(&fs_info->pinned_extents, 2092 bytenr, bytenr + num - 1, GFP_NOFS); 2093 } 2094 while (num > 0) { 2095 cache = btrfs_lookup_block_group(fs_info, bytenr); 2096 BUG_ON(!cache); 2097 len = min(num, cache->key.offset - 2098 (bytenr - cache->key.objectid)); 2099 if (pin) { 2100 spin_lock(&cache->space_info->lock); 2101 spin_lock(&cache->lock); 2102 cache->pinned += len; 2103 cache->space_info->bytes_pinned += len; 2104 spin_unlock(&cache->lock); 2105 spin_unlock(&cache->space_info->lock); 2106 fs_info->total_pinned += len; 2107 } else { 2108 spin_lock(&cache->space_info->lock); 2109 spin_lock(&cache->lock); 2110 cache->pinned -= len; 2111 cache->space_info->bytes_pinned -= len; 2112 spin_unlock(&cache->lock); 2113 spin_unlock(&cache->space_info->lock); 2114 fs_info->total_pinned -= len; 2115 if (cache->cached) 2116 btrfs_add_free_space(cache, bytenr, len); 2117 } 2118 put_block_group(cache); 2119 bytenr += len; 2120 num -= len; 2121 } 2122 return 0; 2123 } 2124 2125 static int update_reserved_extents(struct btrfs_root *root, 2126 u64 bytenr, u64 num, int reserve) 2127 { 2128 u64 len; 2129 struct btrfs_block_group_cache *cache; 2130 struct btrfs_fs_info *fs_info = root->fs_info; 2131 2132 while (num > 0) { 2133 cache = btrfs_lookup_block_group(fs_info, bytenr); 2134 BUG_ON(!cache); 2135 len = min(num, cache->key.offset - 2136 (bytenr - cache->key.objectid)); 2137 2138 spin_lock(&cache->space_info->lock); 2139 spin_lock(&cache->lock); 2140 if (reserve) { 2141 cache->reserved += len; 2142 cache->space_info->bytes_reserved += len; 2143 } else { 2144 cache->reserved -= len; 2145 cache->space_info->bytes_reserved -= len; 2146 } 2147 spin_unlock(&cache->lock); 2148 spin_unlock(&cache->space_info->lock); 2149 put_block_group(cache); 2150 bytenr += len; 2151 num -= len; 2152 } 2153 return 0; 2154 } 2155 2156 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 2157 { 2158 u64 last = 0; 2159 u64 start; 2160 u64 end; 2161 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; 2162 int ret; 2163 2164 mutex_lock(&root->fs_info->pinned_mutex); 2165 while (1) { 2166 ret = find_first_extent_bit(pinned_extents, last, 2167 &start, &end, EXTENT_DIRTY); 2168 if (ret) 2169 break; 2170 set_extent_dirty(copy, start, end, GFP_NOFS); 2171 last = end + 1; 2172 } 2173 mutex_unlock(&root->fs_info->pinned_mutex); 2174 return 0; 2175 } 2176 2177 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 2178 struct btrfs_root *root, 2179 struct extent_io_tree *unpin) 2180 { 2181 u64 start; 2182 u64 end; 2183 int ret; 2184 2185 mutex_lock(&root->fs_info->pinned_mutex); 2186 while (1) { 2187 ret = find_first_extent_bit(unpin, 0, &start, &end, 2188 EXTENT_DIRTY); 2189 if (ret) 2190 break; 2191 2192 ret = btrfs_discard_extent(root, start, end + 1 - start); 2193 2194 btrfs_update_pinned_extents(root, start, end + 1 - start, 0); 2195 clear_extent_dirty(unpin, start, end, GFP_NOFS); 2196 2197 if (need_resched()) { 2198 mutex_unlock(&root->fs_info->pinned_mutex); 2199 cond_resched(); 2200 mutex_lock(&root->fs_info->pinned_mutex); 2201 } 2202 } 2203 mutex_unlock(&root->fs_info->pinned_mutex); 2204 return ret; 2205 } 2206 2207 static int finish_current_insert(struct btrfs_trans_handle *trans, 2208 struct btrfs_root *extent_root, int all) 2209 { 2210 u64 start; 2211 u64 end; 2212 u64 priv; 2213 u64 search = 0; 2214 u64 skipped = 0; 2215 struct btrfs_fs_info *info = extent_root->fs_info; 2216 struct btrfs_path *path; 2217 struct pending_extent_op *extent_op, *tmp; 2218 struct list_head insert_list, update_list; 2219 int ret; 2220 int num_inserts = 0, max_inserts; 2221 2222 path = btrfs_alloc_path(); 2223 INIT_LIST_HEAD(&insert_list); 2224 INIT_LIST_HEAD(&update_list); 2225 2226 max_inserts = extent_root->leafsize / 2227 (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) + 2228 sizeof(struct btrfs_extent_ref) + 2229 sizeof(struct btrfs_extent_item)); 2230 again: 2231 mutex_lock(&info->extent_ins_mutex); 2232 while (1) { 2233 ret = find_first_extent_bit(&info->extent_ins, search, &start, 2234 &end, EXTENT_WRITEBACK); 2235 if (ret) { 2236 if (skipped && all && !num_inserts && 2237 list_empty(&update_list)) { 2238 skipped = 0; 2239 search = 0; 2240 continue; 2241 } 2242 mutex_unlock(&info->extent_ins_mutex); 2243 break; 2244 } 2245 2246 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); 2247 if (!ret) { 2248 skipped = 1; 2249 search = end + 1; 2250 if (need_resched()) { 2251 mutex_unlock(&info->extent_ins_mutex); 2252 cond_resched(); 2253 mutex_lock(&info->extent_ins_mutex); 2254 } 2255 continue; 2256 } 2257 2258 ret = get_state_private(&info->extent_ins, start, &priv); 2259 BUG_ON(ret); 2260 extent_op = (struct pending_extent_op *)(unsigned long) priv; 2261 2262 if (extent_op->type == PENDING_EXTENT_INSERT) { 2263 num_inserts++; 2264 list_add_tail(&extent_op->list, &insert_list); 2265 search = end + 1; 2266 if (num_inserts == max_inserts) { 2267 mutex_unlock(&info->extent_ins_mutex); 2268 break; 2269 } 2270 } else if (extent_op->type == PENDING_BACKREF_UPDATE) { 2271 list_add_tail(&extent_op->list, &update_list); 2272 search = end + 1; 2273 } else { 2274 BUG(); 2275 } 2276 } 2277 2278 /* 2279 * process the update list, clear the writeback bit for it, and if 2280 * somebody marked this thing for deletion then just unlock it and be 2281 * done, the free_extents will handle it 2282 */ 2283 mutex_lock(&info->extent_ins_mutex); 2284 list_for_each_entry_safe(extent_op, tmp, &update_list, list) { 2285 clear_extent_bits(&info->extent_ins, extent_op->bytenr, 2286 extent_op->bytenr + extent_op->num_bytes - 1, 2287 EXTENT_WRITEBACK, GFP_NOFS); 2288 if (extent_op->del) { 2289 list_del_init(&extent_op->list); 2290 unlock_extent(&info->extent_ins, extent_op->bytenr, 2291 extent_op->bytenr + extent_op->num_bytes 2292 - 1, GFP_NOFS); 2293 kfree(extent_op); 2294 } 2295 } 2296 mutex_unlock(&info->extent_ins_mutex); 2297 2298 /* 2299 * still have things left on the update list, go ahead an update 2300 * everything 2301 */ 2302 if (!list_empty(&update_list)) { 2303 ret = update_backrefs(trans, extent_root, path, &update_list); 2304 BUG_ON(ret); 2305 } 2306 2307 /* 2308 * if no inserts need to be done, but we skipped some extents and we 2309 * need to make sure everything is cleaned then reset everything and 2310 * go back to the beginning 2311 */ 2312 if (!num_inserts && all && skipped) { 2313 search = 0; 2314 skipped = 0; 2315 INIT_LIST_HEAD(&update_list); 2316 INIT_LIST_HEAD(&insert_list); 2317 goto again; 2318 } else if (!num_inserts) { 2319 goto out; 2320 } 2321 2322 /* 2323 * process the insert extents list. Again if we are deleting this 2324 * extent, then just unlock it, pin down the bytes if need be, and be 2325 * done with it. Saves us from having to actually insert the extent 2326 * into the tree and then subsequently come along and delete it 2327 */ 2328 mutex_lock(&info->extent_ins_mutex); 2329 list_for_each_entry_safe(extent_op, tmp, &insert_list, list) { 2330 clear_extent_bits(&info->extent_ins, extent_op->bytenr, 2331 extent_op->bytenr + extent_op->num_bytes - 1, 2332 EXTENT_WRITEBACK, GFP_NOFS); 2333 if (extent_op->del) { 2334 u64 used; 2335 list_del_init(&extent_op->list); 2336 unlock_extent(&info->extent_ins, extent_op->bytenr, 2337 extent_op->bytenr + extent_op->num_bytes 2338 - 1, GFP_NOFS); 2339 2340 mutex_lock(&extent_root->fs_info->pinned_mutex); 2341 ret = pin_down_bytes(trans, extent_root, 2342 extent_op->bytenr, 2343 extent_op->num_bytes, 0); 2344 mutex_unlock(&extent_root->fs_info->pinned_mutex); 2345 2346 spin_lock(&info->delalloc_lock); 2347 used = btrfs_super_bytes_used(&info->super_copy); 2348 btrfs_set_super_bytes_used(&info->super_copy, 2349 used - extent_op->num_bytes); 2350 used = btrfs_root_used(&extent_root->root_item); 2351 btrfs_set_root_used(&extent_root->root_item, 2352 used - extent_op->num_bytes); 2353 spin_unlock(&info->delalloc_lock); 2354 2355 ret = update_block_group(trans, extent_root, 2356 extent_op->bytenr, 2357 extent_op->num_bytes, 2358 0, ret > 0); 2359 BUG_ON(ret); 2360 kfree(extent_op); 2361 num_inserts--; 2362 } 2363 } 2364 mutex_unlock(&info->extent_ins_mutex); 2365 2366 ret = insert_extents(trans, extent_root, path, &insert_list, 2367 num_inserts); 2368 BUG_ON(ret); 2369 2370 /* 2371 * if we broke out of the loop in order to insert stuff because we hit 2372 * the maximum number of inserts at a time we can handle, then loop 2373 * back and pick up where we left off 2374 */ 2375 if (num_inserts == max_inserts) { 2376 INIT_LIST_HEAD(&insert_list); 2377 INIT_LIST_HEAD(&update_list); 2378 num_inserts = 0; 2379 goto again; 2380 } 2381 2382 /* 2383 * again, if we need to make absolutely sure there are no more pending 2384 * extent operations left and we know that we skipped some, go back to 2385 * the beginning and do it all again 2386 */ 2387 if (all && skipped) { 2388 INIT_LIST_HEAD(&insert_list); 2389 INIT_LIST_HEAD(&update_list); 2390 search = 0; 2391 skipped = 0; 2392 num_inserts = 0; 2393 goto again; 2394 } 2395 out: 2396 btrfs_free_path(path); 2397 return 0; 2398 } 2399 2400 static int pin_down_bytes(struct btrfs_trans_handle *trans, 2401 struct btrfs_root *root, 2402 u64 bytenr, u64 num_bytes, int is_data) 2403 { 2404 int err = 0; 2405 struct extent_buffer *buf; 2406 2407 if (is_data) 2408 goto pinit; 2409 2410 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 2411 if (!buf) 2412 goto pinit; 2413 2414 /* we can reuse a block if it hasn't been written 2415 * and it is from this transaction. We can't 2416 * reuse anything from the tree log root because 2417 * it has tiny sub-transactions. 2418 */ 2419 if (btrfs_buffer_uptodate(buf, 0) && 2420 btrfs_try_tree_lock(buf)) { 2421 u64 header_owner = btrfs_header_owner(buf); 2422 u64 header_transid = btrfs_header_generation(buf); 2423 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 2424 header_owner != BTRFS_TREE_RELOC_OBJECTID && 2425 header_transid == trans->transid && 2426 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 2427 clean_tree_block(NULL, root, buf); 2428 btrfs_tree_unlock(buf); 2429 free_extent_buffer(buf); 2430 return 1; 2431 } 2432 btrfs_tree_unlock(buf); 2433 } 2434 free_extent_buffer(buf); 2435 pinit: 2436 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 2437 2438 BUG_ON(err < 0); 2439 return 0; 2440 } 2441 2442 /* 2443 * remove an extent from the root, returns 0 on success 2444 */ 2445 static int __free_extent(struct btrfs_trans_handle *trans, 2446 struct btrfs_root *root, 2447 u64 bytenr, u64 num_bytes, u64 parent, 2448 u64 root_objectid, u64 ref_generation, 2449 u64 owner_objectid, int pin, int mark_free) 2450 { 2451 struct btrfs_path *path; 2452 struct btrfs_key key; 2453 struct btrfs_fs_info *info = root->fs_info; 2454 struct btrfs_root *extent_root = info->extent_root; 2455 struct extent_buffer *leaf; 2456 int ret; 2457 int extent_slot = 0; 2458 int found_extent = 0; 2459 int num_to_del = 1; 2460 struct btrfs_extent_item *ei; 2461 u32 refs; 2462 2463 key.objectid = bytenr; 2464 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 2465 key.offset = num_bytes; 2466 path = btrfs_alloc_path(); 2467 if (!path) 2468 return -ENOMEM; 2469 2470 path->reada = 1; 2471 ret = lookup_extent_backref(trans, extent_root, path, 2472 bytenr, parent, root_objectid, 2473 ref_generation, owner_objectid, 1); 2474 if (ret == 0) { 2475 struct btrfs_key found_key; 2476 extent_slot = path->slots[0]; 2477 while (extent_slot > 0) { 2478 extent_slot--; 2479 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2480 extent_slot); 2481 if (found_key.objectid != bytenr) 2482 break; 2483 if (found_key.type == BTRFS_EXTENT_ITEM_KEY && 2484 found_key.offset == num_bytes) { 2485 found_extent = 1; 2486 break; 2487 } 2488 if (path->slots[0] - extent_slot > 5) 2489 break; 2490 } 2491 if (!found_extent) { 2492 ret = remove_extent_backref(trans, extent_root, path); 2493 BUG_ON(ret); 2494 btrfs_release_path(extent_root, path); 2495 ret = btrfs_search_slot(trans, extent_root, 2496 &key, path, -1, 1); 2497 if (ret) { 2498 printk(KERN_ERR "umm, got %d back from search" 2499 ", was looking for %llu\n", ret, 2500 (unsigned long long)bytenr); 2501 btrfs_print_leaf(extent_root, path->nodes[0]); 2502 } 2503 BUG_ON(ret); 2504 extent_slot = path->slots[0]; 2505 } 2506 } else { 2507 btrfs_print_leaf(extent_root, path->nodes[0]); 2508 WARN_ON(1); 2509 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 2510 "root %llu gen %llu owner %llu\n", 2511 (unsigned long long)bytenr, 2512 (unsigned long long)root_objectid, 2513 (unsigned long long)ref_generation, 2514 (unsigned long long)owner_objectid); 2515 } 2516 2517 leaf = path->nodes[0]; 2518 ei = btrfs_item_ptr(leaf, extent_slot, 2519 struct btrfs_extent_item); 2520 refs = btrfs_extent_refs(leaf, ei); 2521 BUG_ON(refs == 0); 2522 refs -= 1; 2523 btrfs_set_extent_refs(leaf, ei, refs); 2524 2525 btrfs_mark_buffer_dirty(leaf); 2526 2527 if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { 2528 struct btrfs_extent_ref *ref; 2529 ref = btrfs_item_ptr(leaf, path->slots[0], 2530 struct btrfs_extent_ref); 2531 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); 2532 /* if the back ref and the extent are next to each other 2533 * they get deleted below in one shot 2534 */ 2535 path->slots[0] = extent_slot; 2536 num_to_del = 2; 2537 } else if (found_extent) { 2538 /* otherwise delete the extent back ref */ 2539 ret = remove_extent_backref(trans, extent_root, path); 2540 BUG_ON(ret); 2541 /* if refs are 0, we need to setup the path for deletion */ 2542 if (refs == 0) { 2543 btrfs_release_path(extent_root, path); 2544 ret = btrfs_search_slot(trans, extent_root, &key, path, 2545 -1, 1); 2546 BUG_ON(ret); 2547 } 2548 } 2549 2550 if (refs == 0) { 2551 u64 super_used; 2552 u64 root_used; 2553 2554 if (pin) { 2555 mutex_lock(&root->fs_info->pinned_mutex); 2556 ret = pin_down_bytes(trans, root, bytenr, num_bytes, 2557 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); 2558 mutex_unlock(&root->fs_info->pinned_mutex); 2559 if (ret > 0) 2560 mark_free = 1; 2561 BUG_ON(ret < 0); 2562 } 2563 /* block accounting for super block */ 2564 spin_lock(&info->delalloc_lock); 2565 super_used = btrfs_super_bytes_used(&info->super_copy); 2566 btrfs_set_super_bytes_used(&info->super_copy, 2567 super_used - num_bytes); 2568 2569 /* block accounting for root item */ 2570 root_used = btrfs_root_used(&root->root_item); 2571 btrfs_set_root_used(&root->root_item, 2572 root_used - num_bytes); 2573 spin_unlock(&info->delalloc_lock); 2574 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 2575 num_to_del); 2576 BUG_ON(ret); 2577 btrfs_release_path(extent_root, path); 2578 2579 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 2580 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 2581 BUG_ON(ret); 2582 } 2583 2584 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 2585 mark_free); 2586 BUG_ON(ret); 2587 } 2588 btrfs_free_path(path); 2589 finish_current_insert(trans, extent_root, 0); 2590 return ret; 2591 } 2592 2593 /* 2594 * find all the blocks marked as pending in the radix tree and remove 2595 * them from the extent map 2596 */ 2597 static int del_pending_extents(struct btrfs_trans_handle *trans, 2598 struct btrfs_root *extent_root, int all) 2599 { 2600 int ret; 2601 int err = 0; 2602 u64 start; 2603 u64 end; 2604 u64 priv; 2605 u64 search = 0; 2606 int nr = 0, skipped = 0; 2607 struct extent_io_tree *pending_del; 2608 struct extent_io_tree *extent_ins; 2609 struct pending_extent_op *extent_op; 2610 struct btrfs_fs_info *info = extent_root->fs_info; 2611 struct list_head delete_list; 2612 2613 INIT_LIST_HEAD(&delete_list); 2614 extent_ins = &extent_root->fs_info->extent_ins; 2615 pending_del = &extent_root->fs_info->pending_del; 2616 2617 again: 2618 mutex_lock(&info->extent_ins_mutex); 2619 while (1) { 2620 ret = find_first_extent_bit(pending_del, search, &start, &end, 2621 EXTENT_WRITEBACK); 2622 if (ret) { 2623 if (all && skipped && !nr) { 2624 search = 0; 2625 skipped = 0; 2626 continue; 2627 } 2628 mutex_unlock(&info->extent_ins_mutex); 2629 break; 2630 } 2631 2632 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS); 2633 if (!ret) { 2634 search = end+1; 2635 skipped = 1; 2636 2637 if (need_resched()) { 2638 mutex_unlock(&info->extent_ins_mutex); 2639 cond_resched(); 2640 mutex_lock(&info->extent_ins_mutex); 2641 } 2642 2643 continue; 2644 } 2645 BUG_ON(ret < 0); 2646 2647 ret = get_state_private(pending_del, start, &priv); 2648 BUG_ON(ret); 2649 extent_op = (struct pending_extent_op *)(unsigned long)priv; 2650 2651 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK, 2652 GFP_NOFS); 2653 if (!test_range_bit(extent_ins, start, end, 2654 EXTENT_WRITEBACK, 0)) { 2655 list_add_tail(&extent_op->list, &delete_list); 2656 nr++; 2657 } else { 2658 kfree(extent_op); 2659 2660 ret = get_state_private(&info->extent_ins, start, 2661 &priv); 2662 BUG_ON(ret); 2663 extent_op = (struct pending_extent_op *) 2664 (unsigned long)priv; 2665 2666 clear_extent_bits(&info->extent_ins, start, end, 2667 EXTENT_WRITEBACK, GFP_NOFS); 2668 2669 if (extent_op->type == PENDING_BACKREF_UPDATE) { 2670 list_add_tail(&extent_op->list, &delete_list); 2671 search = end + 1; 2672 nr++; 2673 continue; 2674 } 2675 2676 mutex_lock(&extent_root->fs_info->pinned_mutex); 2677 ret = pin_down_bytes(trans, extent_root, start, 2678 end + 1 - start, 0); 2679 mutex_unlock(&extent_root->fs_info->pinned_mutex); 2680 2681 ret = update_block_group(trans, extent_root, start, 2682 end + 1 - start, 0, ret > 0); 2683 2684 unlock_extent(extent_ins, start, end, GFP_NOFS); 2685 BUG_ON(ret); 2686 kfree(extent_op); 2687 } 2688 if (ret) 2689 err = ret; 2690 2691 search = end + 1; 2692 2693 if (need_resched()) { 2694 mutex_unlock(&info->extent_ins_mutex); 2695 cond_resched(); 2696 mutex_lock(&info->extent_ins_mutex); 2697 } 2698 } 2699 2700 if (nr) { 2701 ret = free_extents(trans, extent_root, &delete_list); 2702 BUG_ON(ret); 2703 } 2704 2705 if (all && skipped) { 2706 INIT_LIST_HEAD(&delete_list); 2707 search = 0; 2708 nr = 0; 2709 goto again; 2710 } 2711 2712 return err; 2713 } 2714 2715 /* 2716 * remove an extent from the root, returns 0 on success 2717 */ 2718 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 2719 struct btrfs_root *root, 2720 u64 bytenr, u64 num_bytes, u64 parent, 2721 u64 root_objectid, u64 ref_generation, 2722 u64 owner_objectid, int pin) 2723 { 2724 struct btrfs_root *extent_root = root->fs_info->extent_root; 2725 int pending_ret; 2726 int ret; 2727 2728 WARN_ON(num_bytes < root->sectorsize); 2729 if (root == extent_root) { 2730 struct pending_extent_op *extent_op = NULL; 2731 2732 mutex_lock(&root->fs_info->extent_ins_mutex); 2733 if (test_range_bit(&root->fs_info->extent_ins, bytenr, 2734 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { 2735 u64 priv; 2736 ret = get_state_private(&root->fs_info->extent_ins, 2737 bytenr, &priv); 2738 BUG_ON(ret); 2739 extent_op = (struct pending_extent_op *) 2740 (unsigned long)priv; 2741 2742 extent_op->del = 1; 2743 if (extent_op->type == PENDING_EXTENT_INSERT) { 2744 mutex_unlock(&root->fs_info->extent_ins_mutex); 2745 return 0; 2746 } 2747 } 2748 2749 if (extent_op) { 2750 ref_generation = extent_op->orig_generation; 2751 parent = extent_op->orig_parent; 2752 } 2753 2754 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2755 BUG_ON(!extent_op); 2756 2757 extent_op->type = PENDING_EXTENT_DELETE; 2758 extent_op->bytenr = bytenr; 2759 extent_op->num_bytes = num_bytes; 2760 extent_op->parent = parent; 2761 extent_op->orig_parent = parent; 2762 extent_op->generation = ref_generation; 2763 extent_op->orig_generation = ref_generation; 2764 extent_op->level = (int)owner_objectid; 2765 INIT_LIST_HEAD(&extent_op->list); 2766 extent_op->del = 0; 2767 2768 set_extent_bits(&root->fs_info->pending_del, 2769 bytenr, bytenr + num_bytes - 1, 2770 EXTENT_WRITEBACK, GFP_NOFS); 2771 set_state_private(&root->fs_info->pending_del, 2772 bytenr, (unsigned long)extent_op); 2773 mutex_unlock(&root->fs_info->extent_ins_mutex); 2774 return 0; 2775 } 2776 /* if metadata always pin */ 2777 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 2778 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 2779 mutex_lock(&root->fs_info->pinned_mutex); 2780 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 2781 mutex_unlock(&root->fs_info->pinned_mutex); 2782 update_reserved_extents(root, bytenr, num_bytes, 0); 2783 return 0; 2784 } 2785 pin = 1; 2786 } 2787 2788 /* if data pin when any transaction has committed this */ 2789 if (ref_generation != trans->transid) 2790 pin = 1; 2791 2792 ret = __free_extent(trans, root, bytenr, num_bytes, parent, 2793 root_objectid, ref_generation, 2794 owner_objectid, pin, pin == 0); 2795 2796 finish_current_insert(trans, root->fs_info->extent_root, 0); 2797 pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0); 2798 return ret ? ret : pending_ret; 2799 } 2800 2801 int btrfs_free_extent(struct btrfs_trans_handle *trans, 2802 struct btrfs_root *root, 2803 u64 bytenr, u64 num_bytes, u64 parent, 2804 u64 root_objectid, u64 ref_generation, 2805 u64 owner_objectid, int pin) 2806 { 2807 int ret; 2808 2809 ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, 2810 root_objectid, ref_generation, 2811 owner_objectid, pin); 2812 return ret; 2813 } 2814 2815 static u64 stripe_align(struct btrfs_root *root, u64 val) 2816 { 2817 u64 mask = ((u64)root->stripesize - 1); 2818 u64 ret = (val + mask) & ~mask; 2819 return ret; 2820 } 2821 2822 /* 2823 * walks the btree of allocated extents and find a hole of a given size. 2824 * The key ins is changed to record the hole: 2825 * ins->objectid == block start 2826 * ins->flags = BTRFS_EXTENT_ITEM_KEY 2827 * ins->offset == number of blocks 2828 * Any available blocks before search_start are skipped. 2829 */ 2830 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 2831 struct btrfs_root *orig_root, 2832 u64 num_bytes, u64 empty_size, 2833 u64 search_start, u64 search_end, 2834 u64 hint_byte, struct btrfs_key *ins, 2835 u64 exclude_start, u64 exclude_nr, 2836 int data) 2837 { 2838 int ret = 0; 2839 struct btrfs_root *root = orig_root->fs_info->extent_root; 2840 u64 total_needed = num_bytes; 2841 u64 *last_ptr = NULL; 2842 u64 last_wanted = 0; 2843 struct btrfs_block_group_cache *block_group = NULL; 2844 int chunk_alloc_done = 0; 2845 int empty_cluster = 2 * 1024 * 1024; 2846 int allowed_chunk_alloc = 0; 2847 struct list_head *head = NULL, *cur = NULL; 2848 int loop = 0; 2849 int extra_loop = 0; 2850 struct btrfs_space_info *space_info; 2851 2852 WARN_ON(num_bytes < root->sectorsize); 2853 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 2854 ins->objectid = 0; 2855 ins->offset = 0; 2856 2857 if (orig_root->ref_cows || empty_size) 2858 allowed_chunk_alloc = 1; 2859 2860 if (data & BTRFS_BLOCK_GROUP_METADATA) { 2861 last_ptr = &root->fs_info->last_alloc; 2862 empty_cluster = 64 * 1024; 2863 } 2864 2865 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) 2866 last_ptr = &root->fs_info->last_data_alloc; 2867 2868 if (last_ptr) { 2869 if (*last_ptr) { 2870 hint_byte = *last_ptr; 2871 last_wanted = *last_ptr; 2872 } else 2873 empty_size += empty_cluster; 2874 } else { 2875 empty_cluster = 0; 2876 } 2877 search_start = max(search_start, first_logical_byte(root, 0)); 2878 search_start = max(search_start, hint_byte); 2879 2880 if (last_wanted && search_start != last_wanted) { 2881 last_wanted = 0; 2882 empty_size += empty_cluster; 2883 } 2884 2885 total_needed += empty_size; 2886 block_group = btrfs_lookup_block_group(root->fs_info, search_start); 2887 if (!block_group) 2888 block_group = btrfs_lookup_first_block_group(root->fs_info, 2889 search_start); 2890 space_info = __find_space_info(root->fs_info, data); 2891 2892 down_read(&space_info->groups_sem); 2893 while (1) { 2894 struct btrfs_free_space *free_space; 2895 /* 2896 * the only way this happens if our hint points to a block 2897 * group thats not of the proper type, while looping this 2898 * should never happen 2899 */ 2900 if (empty_size) 2901 extra_loop = 1; 2902 2903 if (!block_group) 2904 goto new_group_no_lock; 2905 2906 if (unlikely(!block_group->cached)) { 2907 mutex_lock(&block_group->cache_mutex); 2908 ret = cache_block_group(root, block_group); 2909 mutex_unlock(&block_group->cache_mutex); 2910 if (ret) 2911 break; 2912 } 2913 2914 mutex_lock(&block_group->alloc_mutex); 2915 if (unlikely(!block_group_bits(block_group, data))) 2916 goto new_group; 2917 2918 if (unlikely(block_group->ro)) 2919 goto new_group; 2920 2921 free_space = btrfs_find_free_space(block_group, search_start, 2922 total_needed); 2923 if (free_space) { 2924 u64 start = block_group->key.objectid; 2925 u64 end = block_group->key.objectid + 2926 block_group->key.offset; 2927 2928 search_start = stripe_align(root, free_space->offset); 2929 2930 /* move on to the next group */ 2931 if (search_start + num_bytes >= search_end) 2932 goto new_group; 2933 2934 /* move on to the next group */ 2935 if (search_start + num_bytes > end) 2936 goto new_group; 2937 2938 if (last_wanted && search_start != last_wanted) { 2939 total_needed += empty_cluster; 2940 empty_size += empty_cluster; 2941 last_wanted = 0; 2942 /* 2943 * if search_start is still in this block group 2944 * then we just re-search this block group 2945 */ 2946 if (search_start >= start && 2947 search_start < end) { 2948 mutex_unlock(&block_group->alloc_mutex); 2949 continue; 2950 } 2951 2952 /* else we go to the next block group */ 2953 goto new_group; 2954 } 2955 2956 if (exclude_nr > 0 && 2957 (search_start + num_bytes > exclude_start && 2958 search_start < exclude_start + exclude_nr)) { 2959 search_start = exclude_start + exclude_nr; 2960 /* 2961 * if search_start is still in this block group 2962 * then we just re-search this block group 2963 */ 2964 if (search_start >= start && 2965 search_start < end) { 2966 mutex_unlock(&block_group->alloc_mutex); 2967 last_wanted = 0; 2968 continue; 2969 } 2970 2971 /* else we go to the next block group */ 2972 goto new_group; 2973 } 2974 2975 ins->objectid = search_start; 2976 ins->offset = num_bytes; 2977 2978 btrfs_remove_free_space_lock(block_group, search_start, 2979 num_bytes); 2980 /* we are all good, lets return */ 2981 mutex_unlock(&block_group->alloc_mutex); 2982 break; 2983 } 2984 new_group: 2985 mutex_unlock(&block_group->alloc_mutex); 2986 put_block_group(block_group); 2987 block_group = NULL; 2988 new_group_no_lock: 2989 /* don't try to compare new allocations against the 2990 * last allocation any more 2991 */ 2992 last_wanted = 0; 2993 2994 /* 2995 * Here's how this works. 2996 * loop == 0: we were searching a block group via a hint 2997 * and didn't find anything, so we start at 2998 * the head of the block groups and keep searching 2999 * loop == 1: we're searching through all of the block groups 3000 * if we hit the head again we have searched 3001 * all of the block groups for this space and we 3002 * need to try and allocate, if we cant error out. 3003 * loop == 2: we allocated more space and are looping through 3004 * all of the block groups again. 3005 */ 3006 if (loop == 0) { 3007 head = &space_info->block_groups; 3008 cur = head->next; 3009 loop++; 3010 } else if (loop == 1 && cur == head) { 3011 int keep_going; 3012 3013 /* at this point we give up on the empty_size 3014 * allocations and just try to allocate the min 3015 * space. 3016 * 3017 * The extra_loop field was set if an empty_size 3018 * allocation was attempted above, and if this 3019 * is try we need to try the loop again without 3020 * the additional empty_size. 3021 */ 3022 total_needed -= empty_size; 3023 empty_size = 0; 3024 keep_going = extra_loop; 3025 loop++; 3026 3027 if (allowed_chunk_alloc && !chunk_alloc_done) { 3028 up_read(&space_info->groups_sem); 3029 ret = do_chunk_alloc(trans, root, num_bytes + 3030 2 * 1024 * 1024, data, 1); 3031 down_read(&space_info->groups_sem); 3032 if (ret < 0) 3033 goto loop_check; 3034 head = &space_info->block_groups; 3035 /* 3036 * we've allocated a new chunk, keep 3037 * trying 3038 */ 3039 keep_going = 1; 3040 chunk_alloc_done = 1; 3041 } else if (!allowed_chunk_alloc) { 3042 space_info->force_alloc = 1; 3043 } 3044 loop_check: 3045 if (keep_going) { 3046 cur = head->next; 3047 extra_loop = 0; 3048 } else { 3049 break; 3050 } 3051 } else if (cur == head) { 3052 break; 3053 } 3054 3055 block_group = list_entry(cur, struct btrfs_block_group_cache, 3056 list); 3057 atomic_inc(&block_group->count); 3058 3059 search_start = block_group->key.objectid; 3060 cur = cur->next; 3061 } 3062 3063 /* we found what we needed */ 3064 if (ins->objectid) { 3065 if (!(data & BTRFS_BLOCK_GROUP_DATA)) 3066 trans->block_group = block_group->key.objectid; 3067 3068 if (last_ptr) 3069 *last_ptr = ins->objectid + ins->offset; 3070 ret = 0; 3071 } else if (!ret) { 3072 printk(KERN_ERR "btrfs searching for %llu bytes, " 3073 "num_bytes %llu, loop %d, allowed_alloc %d\n", 3074 (unsigned long long)total_needed, 3075 (unsigned long long)num_bytes, 3076 loop, allowed_chunk_alloc); 3077 ret = -ENOSPC; 3078 } 3079 if (block_group) 3080 put_block_group(block_group); 3081 3082 up_read(&space_info->groups_sem); 3083 return ret; 3084 } 3085 3086 static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 3087 { 3088 struct btrfs_block_group_cache *cache; 3089 3090 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 3091 (unsigned long long)(info->total_bytes - info->bytes_used - 3092 info->bytes_pinned - info->bytes_reserved), 3093 (info->full) ? "" : "not "); 3094 3095 down_read(&info->groups_sem); 3096 list_for_each_entry(cache, &info->block_groups, list) { 3097 spin_lock(&cache->lock); 3098 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 3099 "%llu pinned %llu reserved\n", 3100 (unsigned long long)cache->key.objectid, 3101 (unsigned long long)cache->key.offset, 3102 (unsigned long long)btrfs_block_group_used(&cache->item), 3103 (unsigned long long)cache->pinned, 3104 (unsigned long long)cache->reserved); 3105 btrfs_dump_free_space(cache, bytes); 3106 spin_unlock(&cache->lock); 3107 } 3108 up_read(&info->groups_sem); 3109 } 3110 3111 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3112 struct btrfs_root *root, 3113 u64 num_bytes, u64 min_alloc_size, 3114 u64 empty_size, u64 hint_byte, 3115 u64 search_end, struct btrfs_key *ins, 3116 u64 data) 3117 { 3118 int ret; 3119 u64 search_start = 0; 3120 u64 alloc_profile; 3121 struct btrfs_fs_info *info = root->fs_info; 3122 3123 if (data) { 3124 alloc_profile = info->avail_data_alloc_bits & 3125 info->data_alloc_profile; 3126 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 3127 } else if (root == root->fs_info->chunk_root) { 3128 alloc_profile = info->avail_system_alloc_bits & 3129 info->system_alloc_profile; 3130 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; 3131 } else { 3132 alloc_profile = info->avail_metadata_alloc_bits & 3133 info->metadata_alloc_profile; 3134 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 3135 } 3136 again: 3137 data = btrfs_reduce_alloc_profile(root, data); 3138 /* 3139 * the only place that sets empty_size is btrfs_realloc_node, which 3140 * is not called recursively on allocations 3141 */ 3142 if (empty_size || root->ref_cows) { 3143 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { 3144 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3145 2 * 1024 * 1024, 3146 BTRFS_BLOCK_GROUP_METADATA | 3147 (info->metadata_alloc_profile & 3148 info->avail_metadata_alloc_bits), 0); 3149 } 3150 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3151 num_bytes + 2 * 1024 * 1024, data, 0); 3152 } 3153 3154 WARN_ON(num_bytes < root->sectorsize); 3155 ret = find_free_extent(trans, root, num_bytes, empty_size, 3156 search_start, search_end, hint_byte, ins, 3157 trans->alloc_exclude_start, 3158 trans->alloc_exclude_nr, data); 3159 3160 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 3161 num_bytes = num_bytes >> 1; 3162 num_bytes = num_bytes & ~(root->sectorsize - 1); 3163 num_bytes = max(num_bytes, min_alloc_size); 3164 do_chunk_alloc(trans, root->fs_info->extent_root, 3165 num_bytes, data, 1); 3166 goto again; 3167 } 3168 if (ret) { 3169 struct btrfs_space_info *sinfo; 3170 3171 sinfo = __find_space_info(root->fs_info, data); 3172 printk(KERN_ERR "btrfs allocation failed flags %llu, " 3173 "wanted %llu\n", (unsigned long long)data, 3174 (unsigned long long)num_bytes); 3175 dump_space_info(sinfo, num_bytes); 3176 BUG(); 3177 } 3178 3179 return ret; 3180 } 3181 3182 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 3183 { 3184 struct btrfs_block_group_cache *cache; 3185 int ret = 0; 3186 3187 cache = btrfs_lookup_block_group(root->fs_info, start); 3188 if (!cache) { 3189 printk(KERN_ERR "Unable to find block group for %llu\n", 3190 (unsigned long long)start); 3191 return -ENOSPC; 3192 } 3193 3194 ret = btrfs_discard_extent(root, start, len); 3195 3196 btrfs_add_free_space(cache, start, len); 3197 put_block_group(cache); 3198 update_reserved_extents(root, start, len, 0); 3199 3200 return ret; 3201 } 3202 3203 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3204 struct btrfs_root *root, 3205 u64 num_bytes, u64 min_alloc_size, 3206 u64 empty_size, u64 hint_byte, 3207 u64 search_end, struct btrfs_key *ins, 3208 u64 data) 3209 { 3210 int ret; 3211 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 3212 empty_size, hint_byte, search_end, ins, 3213 data); 3214 update_reserved_extents(root, ins->objectid, ins->offset, 1); 3215 return ret; 3216 } 3217 3218 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3219 struct btrfs_root *root, u64 parent, 3220 u64 root_objectid, u64 ref_generation, 3221 u64 owner, struct btrfs_key *ins) 3222 { 3223 int ret; 3224 int pending_ret; 3225 u64 super_used; 3226 u64 root_used; 3227 u64 num_bytes = ins->offset; 3228 u32 sizes[2]; 3229 struct btrfs_fs_info *info = root->fs_info; 3230 struct btrfs_root *extent_root = info->extent_root; 3231 struct btrfs_extent_item *extent_item; 3232 struct btrfs_extent_ref *ref; 3233 struct btrfs_path *path; 3234 struct btrfs_key keys[2]; 3235 3236 if (parent == 0) 3237 parent = ins->objectid; 3238 3239 /* block accounting for super block */ 3240 spin_lock(&info->delalloc_lock); 3241 super_used = btrfs_super_bytes_used(&info->super_copy); 3242 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); 3243 3244 /* block accounting for root item */ 3245 root_used = btrfs_root_used(&root->root_item); 3246 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 3247 spin_unlock(&info->delalloc_lock); 3248 3249 if (root == extent_root) { 3250 struct pending_extent_op *extent_op; 3251 3252 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 3253 BUG_ON(!extent_op); 3254 3255 extent_op->type = PENDING_EXTENT_INSERT; 3256 extent_op->bytenr = ins->objectid; 3257 extent_op->num_bytes = ins->offset; 3258 extent_op->parent = parent; 3259 extent_op->orig_parent = 0; 3260 extent_op->generation = ref_generation; 3261 extent_op->orig_generation = 0; 3262 extent_op->level = (int)owner; 3263 INIT_LIST_HEAD(&extent_op->list); 3264 extent_op->del = 0; 3265 3266 mutex_lock(&root->fs_info->extent_ins_mutex); 3267 set_extent_bits(&root->fs_info->extent_ins, ins->objectid, 3268 ins->objectid + ins->offset - 1, 3269 EXTENT_WRITEBACK, GFP_NOFS); 3270 set_state_private(&root->fs_info->extent_ins, 3271 ins->objectid, (unsigned long)extent_op); 3272 mutex_unlock(&root->fs_info->extent_ins_mutex); 3273 goto update_block; 3274 } 3275 3276 memcpy(&keys[0], ins, sizeof(*ins)); 3277 keys[1].objectid = ins->objectid; 3278 keys[1].type = BTRFS_EXTENT_REF_KEY; 3279 keys[1].offset = parent; 3280 sizes[0] = sizeof(*extent_item); 3281 sizes[1] = sizeof(*ref); 3282 3283 path = btrfs_alloc_path(); 3284 BUG_ON(!path); 3285 3286 ret = btrfs_insert_empty_items(trans, extent_root, path, keys, 3287 sizes, 2); 3288 BUG_ON(ret); 3289 3290 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3291 struct btrfs_extent_item); 3292 btrfs_set_extent_refs(path->nodes[0], extent_item, 1); 3293 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, 3294 struct btrfs_extent_ref); 3295 3296 btrfs_set_ref_root(path->nodes[0], ref, root_objectid); 3297 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); 3298 btrfs_set_ref_objectid(path->nodes[0], ref, owner); 3299 btrfs_set_ref_num_refs(path->nodes[0], ref, 1); 3300 3301 btrfs_mark_buffer_dirty(path->nodes[0]); 3302 3303 trans->alloc_exclude_start = 0; 3304 trans->alloc_exclude_nr = 0; 3305 btrfs_free_path(path); 3306 finish_current_insert(trans, extent_root, 0); 3307 pending_ret = del_pending_extents(trans, extent_root, 0); 3308 3309 if (ret) 3310 goto out; 3311 if (pending_ret) { 3312 ret = pending_ret; 3313 goto out; 3314 } 3315 3316 update_block: 3317 ret = update_block_group(trans, root, ins->objectid, 3318 ins->offset, 1, 0); 3319 if (ret) { 3320 printk(KERN_ERR "btrfs update block group failed for %llu " 3321 "%llu\n", (unsigned long long)ins->objectid, 3322 (unsigned long long)ins->offset); 3323 BUG(); 3324 } 3325 out: 3326 return ret; 3327 } 3328 3329 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3330 struct btrfs_root *root, u64 parent, 3331 u64 root_objectid, u64 ref_generation, 3332 u64 owner, struct btrfs_key *ins) 3333 { 3334 int ret; 3335 3336 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) 3337 return 0; 3338 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 3339 ref_generation, owner, ins); 3340 update_reserved_extents(root, ins->objectid, ins->offset, 0); 3341 return ret; 3342 } 3343 3344 /* 3345 * this is used by the tree logging recovery code. It records that 3346 * an extent has been allocated and makes sure to clear the free 3347 * space cache bits as well 3348 */ 3349 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, 3350 struct btrfs_root *root, u64 parent, 3351 u64 root_objectid, u64 ref_generation, 3352 u64 owner, struct btrfs_key *ins) 3353 { 3354 int ret; 3355 struct btrfs_block_group_cache *block_group; 3356 3357 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 3358 mutex_lock(&block_group->cache_mutex); 3359 cache_block_group(root, block_group); 3360 mutex_unlock(&block_group->cache_mutex); 3361 3362 ret = btrfs_remove_free_space(block_group, ins->objectid, 3363 ins->offset); 3364 BUG_ON(ret); 3365 put_block_group(block_group); 3366 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 3367 ref_generation, owner, ins); 3368 return ret; 3369 } 3370 3371 /* 3372 * finds a free extent and does all the dirty work required for allocation 3373 * returns the key for the extent through ins, and a tree buffer for 3374 * the first block of the extent through buf. 3375 * 3376 * returns 0 if everything worked, non-zero otherwise. 3377 */ 3378 int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 3379 struct btrfs_root *root, 3380 u64 num_bytes, u64 parent, u64 min_alloc_size, 3381 u64 root_objectid, u64 ref_generation, 3382 u64 owner_objectid, u64 empty_size, u64 hint_byte, 3383 u64 search_end, struct btrfs_key *ins, u64 data) 3384 { 3385 int ret; 3386 3387 ret = __btrfs_reserve_extent(trans, root, num_bytes, 3388 min_alloc_size, empty_size, hint_byte, 3389 search_end, ins, data); 3390 BUG_ON(ret); 3391 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 3392 ret = __btrfs_alloc_reserved_extent(trans, root, parent, 3393 root_objectid, ref_generation, 3394 owner_objectid, ins); 3395 BUG_ON(ret); 3396 3397 } else { 3398 update_reserved_extents(root, ins->objectid, ins->offset, 1); 3399 } 3400 return ret; 3401 } 3402 3403 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 3404 struct btrfs_root *root, 3405 u64 bytenr, u32 blocksize) 3406 { 3407 struct extent_buffer *buf; 3408 3409 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 3410 if (!buf) 3411 return ERR_PTR(-ENOMEM); 3412 btrfs_set_header_generation(buf, trans->transid); 3413 btrfs_tree_lock(buf); 3414 clean_tree_block(trans, root, buf); 3415 3416 btrfs_set_lock_blocking(buf); 3417 btrfs_set_buffer_uptodate(buf); 3418 3419 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 3420 set_extent_dirty(&root->dirty_log_pages, buf->start, 3421 buf->start + buf->len - 1, GFP_NOFS); 3422 } else { 3423 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 3424 buf->start + buf->len - 1, GFP_NOFS); 3425 } 3426 trans->blocks_used++; 3427 /* this returns a buffer locked for blocking */ 3428 return buf; 3429 } 3430 3431 /* 3432 * helper function to allocate a block for a given tree 3433 * returns the tree buffer or NULL. 3434 */ 3435 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 3436 struct btrfs_root *root, 3437 u32 blocksize, u64 parent, 3438 u64 root_objectid, 3439 u64 ref_generation, 3440 int level, 3441 u64 hint, 3442 u64 empty_size) 3443 { 3444 struct btrfs_key ins; 3445 int ret; 3446 struct extent_buffer *buf; 3447 3448 ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, 3449 root_objectid, ref_generation, level, 3450 empty_size, hint, (u64)-1, &ins, 0); 3451 if (ret) { 3452 BUG_ON(ret > 0); 3453 return ERR_PTR(ret); 3454 } 3455 3456 buf = btrfs_init_new_buffer(trans, root, ins.objectid, blocksize); 3457 return buf; 3458 } 3459 3460 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 3461 struct btrfs_root *root, struct extent_buffer *leaf) 3462 { 3463 u64 leaf_owner; 3464 u64 leaf_generation; 3465 struct refsort *sorted; 3466 struct btrfs_key key; 3467 struct btrfs_file_extent_item *fi; 3468 int i; 3469 int nritems; 3470 int ret; 3471 int refi = 0; 3472 int slot; 3473 3474 BUG_ON(!btrfs_is_leaf(leaf)); 3475 nritems = btrfs_header_nritems(leaf); 3476 leaf_owner = btrfs_header_owner(leaf); 3477 leaf_generation = btrfs_header_generation(leaf); 3478 3479 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); 3480 /* we do this loop twice. The first time we build a list 3481 * of the extents we have a reference on, then we sort the list 3482 * by bytenr. The second time around we actually do the 3483 * extent freeing. 3484 */ 3485 for (i = 0; i < nritems; i++) { 3486 u64 disk_bytenr; 3487 cond_resched(); 3488 3489 btrfs_item_key_to_cpu(leaf, &key, i); 3490 3491 /* only extents have references, skip everything else */ 3492 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 3493 continue; 3494 3495 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 3496 3497 /* inline extents live in the btree, they don't have refs */ 3498 if (btrfs_file_extent_type(leaf, fi) == 3499 BTRFS_FILE_EXTENT_INLINE) 3500 continue; 3501 3502 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 3503 3504 /* holes don't have refs */ 3505 if (disk_bytenr == 0) 3506 continue; 3507 3508 sorted[refi].bytenr = disk_bytenr; 3509 sorted[refi].slot = i; 3510 refi++; 3511 } 3512 3513 if (refi == 0) 3514 goto out; 3515 3516 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 3517 3518 for (i = 0; i < refi; i++) { 3519 u64 disk_bytenr; 3520 3521 disk_bytenr = sorted[i].bytenr; 3522 slot = sorted[i].slot; 3523 3524 cond_resched(); 3525 3526 btrfs_item_key_to_cpu(leaf, &key, slot); 3527 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 3528 continue; 3529 3530 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 3531 3532 ret = __btrfs_free_extent(trans, root, disk_bytenr, 3533 btrfs_file_extent_disk_num_bytes(leaf, fi), 3534 leaf->start, leaf_owner, leaf_generation, 3535 key.objectid, 0); 3536 BUG_ON(ret); 3537 3538 atomic_inc(&root->fs_info->throttle_gen); 3539 wake_up(&root->fs_info->transaction_throttle); 3540 cond_resched(); 3541 } 3542 out: 3543 kfree(sorted); 3544 return 0; 3545 } 3546 3547 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 3548 struct btrfs_root *root, 3549 struct btrfs_leaf_ref *ref) 3550 { 3551 int i; 3552 int ret; 3553 struct btrfs_extent_info *info; 3554 struct refsort *sorted; 3555 3556 if (ref->nritems == 0) 3557 return 0; 3558 3559 sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); 3560 for (i = 0; i < ref->nritems; i++) { 3561 sorted[i].bytenr = ref->extents[i].bytenr; 3562 sorted[i].slot = i; 3563 } 3564 sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); 3565 3566 /* 3567 * the items in the ref were sorted when the ref was inserted 3568 * into the ref cache, so this is already in order 3569 */ 3570 for (i = 0; i < ref->nritems; i++) { 3571 info = ref->extents + sorted[i].slot; 3572 ret = __btrfs_free_extent(trans, root, info->bytenr, 3573 info->num_bytes, ref->bytenr, 3574 ref->owner, ref->generation, 3575 info->objectid, 0); 3576 3577 atomic_inc(&root->fs_info->throttle_gen); 3578 wake_up(&root->fs_info->transaction_throttle); 3579 cond_resched(); 3580 3581 BUG_ON(ret); 3582 info++; 3583 } 3584 3585 kfree(sorted); 3586 return 0; 3587 } 3588 3589 static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, 3590 u64 len, u32 *refs) 3591 { 3592 int ret; 3593 3594 ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); 3595 BUG_ON(ret); 3596 3597 #if 0 /* some debugging code in case we see problems here */ 3598 /* if the refs count is one, it won't get increased again. But 3599 * if the ref count is > 1, someone may be decreasing it at 3600 * the same time we are. 3601 */ 3602 if (*refs != 1) { 3603 struct extent_buffer *eb = NULL; 3604 eb = btrfs_find_create_tree_block(root, start, len); 3605 if (eb) 3606 btrfs_tree_lock(eb); 3607 3608 mutex_lock(&root->fs_info->alloc_mutex); 3609 ret = lookup_extent_ref(NULL, root, start, len, refs); 3610 BUG_ON(ret); 3611 mutex_unlock(&root->fs_info->alloc_mutex); 3612 3613 if (eb) { 3614 btrfs_tree_unlock(eb); 3615 free_extent_buffer(eb); 3616 } 3617 if (*refs == 1) { 3618 printk(KERN_ERR "btrfs block %llu went down to one " 3619 "during drop_snap\n", (unsigned long long)start); 3620 } 3621 3622 } 3623 #endif 3624 3625 cond_resched(); 3626 return ret; 3627 } 3628 3629 /* 3630 * this is used while deleting old snapshots, and it drops the refs 3631 * on a whole subtree starting from a level 1 node. 3632 * 3633 * The idea is to sort all the leaf pointers, and then drop the 3634 * ref on all the leaves in order. Most of the time the leaves 3635 * will have ref cache entries, so no leaf IOs will be required to 3636 * find the extents they have references on. 3637 * 3638 * For each leaf, any references it has are also dropped in order 3639 * 3640 * This ends up dropping the references in something close to optimal 3641 * order for reading and modifying the extent allocation tree. 3642 */ 3643 static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, 3644 struct btrfs_root *root, 3645 struct btrfs_path *path) 3646 { 3647 u64 bytenr; 3648 u64 root_owner; 3649 u64 root_gen; 3650 struct extent_buffer *eb = path->nodes[1]; 3651 struct extent_buffer *leaf; 3652 struct btrfs_leaf_ref *ref; 3653 struct refsort *sorted = NULL; 3654 int nritems = btrfs_header_nritems(eb); 3655 int ret; 3656 int i; 3657 int refi = 0; 3658 int slot = path->slots[1]; 3659 u32 blocksize = btrfs_level_size(root, 0); 3660 u32 refs; 3661 3662 if (nritems == 0) 3663 goto out; 3664 3665 root_owner = btrfs_header_owner(eb); 3666 root_gen = btrfs_header_generation(eb); 3667 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); 3668 3669 /* 3670 * step one, sort all the leaf pointers so we don't scribble 3671 * randomly into the extent allocation tree 3672 */ 3673 for (i = slot; i < nritems; i++) { 3674 sorted[refi].bytenr = btrfs_node_blockptr(eb, i); 3675 sorted[refi].slot = i; 3676 refi++; 3677 } 3678 3679 /* 3680 * nritems won't be zero, but if we're picking up drop_snapshot 3681 * after a crash, slot might be > 0, so double check things 3682 * just in case. 3683 */ 3684 if (refi == 0) 3685 goto out; 3686 3687 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 3688 3689 /* 3690 * the first loop frees everything the leaves point to 3691 */ 3692 for (i = 0; i < refi; i++) { 3693 u64 ptr_gen; 3694 3695 bytenr = sorted[i].bytenr; 3696 3697 /* 3698 * check the reference count on this leaf. If it is > 1 3699 * we just decrement it below and don't update any 3700 * of the refs the leaf points to. 3701 */ 3702 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3703 BUG_ON(ret); 3704 if (refs != 1) 3705 continue; 3706 3707 ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); 3708 3709 /* 3710 * the leaf only had one reference, which means the 3711 * only thing pointing to this leaf is the snapshot 3712 * we're deleting. It isn't possible for the reference 3713 * count to increase again later 3714 * 3715 * The reference cache is checked for the leaf, 3716 * and if found we'll be able to drop any refs held by 3717 * the leaf without needing to read it in. 3718 */ 3719 ref = btrfs_lookup_leaf_ref(root, bytenr); 3720 if (ref && ref->generation != ptr_gen) { 3721 btrfs_free_leaf_ref(root, ref); 3722 ref = NULL; 3723 } 3724 if (ref) { 3725 ret = cache_drop_leaf_ref(trans, root, ref); 3726 BUG_ON(ret); 3727 btrfs_remove_leaf_ref(root, ref); 3728 btrfs_free_leaf_ref(root, ref); 3729 } else { 3730 /* 3731 * the leaf wasn't in the reference cache, so 3732 * we have to read it. 3733 */ 3734 leaf = read_tree_block(root, bytenr, blocksize, 3735 ptr_gen); 3736 ret = btrfs_drop_leaf_ref(trans, root, leaf); 3737 BUG_ON(ret); 3738 free_extent_buffer(leaf); 3739 } 3740 atomic_inc(&root->fs_info->throttle_gen); 3741 wake_up(&root->fs_info->transaction_throttle); 3742 cond_resched(); 3743 } 3744 3745 /* 3746 * run through the loop again to free the refs on the leaves. 3747 * This is faster than doing it in the loop above because 3748 * the leaves are likely to be clustered together. We end up 3749 * working in nice chunks on the extent allocation tree. 3750 */ 3751 for (i = 0; i < refi; i++) { 3752 bytenr = sorted[i].bytenr; 3753 ret = __btrfs_free_extent(trans, root, bytenr, 3754 blocksize, eb->start, 3755 root_owner, root_gen, 0, 1); 3756 BUG_ON(ret); 3757 3758 atomic_inc(&root->fs_info->throttle_gen); 3759 wake_up(&root->fs_info->transaction_throttle); 3760 cond_resched(); 3761 } 3762 out: 3763 kfree(sorted); 3764 3765 /* 3766 * update the path to show we've processed the entire level 1 3767 * node. This will get saved into the root's drop_snapshot_progress 3768 * field so these drops are not repeated again if this transaction 3769 * commits. 3770 */ 3771 path->slots[1] = nritems; 3772 return 0; 3773 } 3774 3775 /* 3776 * helper function for drop_snapshot, this walks down the tree dropping ref 3777 * counts as it goes. 3778 */ 3779 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 3780 struct btrfs_root *root, 3781 struct btrfs_path *path, int *level) 3782 { 3783 u64 root_owner; 3784 u64 root_gen; 3785 u64 bytenr; 3786 u64 ptr_gen; 3787 struct extent_buffer *next; 3788 struct extent_buffer *cur; 3789 struct extent_buffer *parent; 3790 u32 blocksize; 3791 int ret; 3792 u32 refs; 3793 3794 WARN_ON(*level < 0); 3795 WARN_ON(*level >= BTRFS_MAX_LEVEL); 3796 ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, 3797 path->nodes[*level]->len, &refs); 3798 BUG_ON(ret); 3799 if (refs > 1) 3800 goto out; 3801 3802 /* 3803 * walk down to the last node level and free all the leaves 3804 */ 3805 while (*level >= 0) { 3806 WARN_ON(*level < 0); 3807 WARN_ON(*level >= BTRFS_MAX_LEVEL); 3808 cur = path->nodes[*level]; 3809 3810 if (btrfs_header_level(cur) != *level) 3811 WARN_ON(1); 3812 3813 if (path->slots[*level] >= 3814 btrfs_header_nritems(cur)) 3815 break; 3816 3817 /* the new code goes down to level 1 and does all the 3818 * leaves pointed to that node in bulk. So, this check 3819 * for level 0 will always be false. 3820 * 3821 * But, the disk format allows the drop_snapshot_progress 3822 * field in the root to leave things in a state where 3823 * a leaf will need cleaning up here. If someone crashes 3824 * with the old code and then boots with the new code, 3825 * we might find a leaf here. 3826 */ 3827 if (*level == 0) { 3828 ret = btrfs_drop_leaf_ref(trans, root, cur); 3829 BUG_ON(ret); 3830 break; 3831 } 3832 3833 /* 3834 * once we get to level one, process the whole node 3835 * at once, including everything below it. 3836 */ 3837 if (*level == 1) { 3838 ret = drop_level_one_refs(trans, root, path); 3839 BUG_ON(ret); 3840 break; 3841 } 3842 3843 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 3844 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 3845 blocksize = btrfs_level_size(root, *level - 1); 3846 3847 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3848 BUG_ON(ret); 3849 3850 /* 3851 * if there is more than one reference, we don't need 3852 * to read that node to drop any references it has. We 3853 * just drop the ref we hold on that node and move on to the 3854 * next slot in this level. 3855 */ 3856 if (refs != 1) { 3857 parent = path->nodes[*level]; 3858 root_owner = btrfs_header_owner(parent); 3859 root_gen = btrfs_header_generation(parent); 3860 path->slots[*level]++; 3861 3862 ret = __btrfs_free_extent(trans, root, bytenr, 3863 blocksize, parent->start, 3864 root_owner, root_gen, 3865 *level - 1, 1); 3866 BUG_ON(ret); 3867 3868 atomic_inc(&root->fs_info->throttle_gen); 3869 wake_up(&root->fs_info->transaction_throttle); 3870 cond_resched(); 3871 3872 continue; 3873 } 3874 3875 /* 3876 * we need to keep freeing things in the next level down. 3877 * read the block and loop around to process it 3878 */ 3879 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 3880 WARN_ON(*level <= 0); 3881 if (path->nodes[*level-1]) 3882 free_extent_buffer(path->nodes[*level-1]); 3883 path->nodes[*level-1] = next; 3884 *level = btrfs_header_level(next); 3885 path->slots[*level] = 0; 3886 cond_resched(); 3887 } 3888 out: 3889 WARN_ON(*level < 0); 3890 WARN_ON(*level >= BTRFS_MAX_LEVEL); 3891 3892 if (path->nodes[*level] == root->node) { 3893 parent = path->nodes[*level]; 3894 bytenr = path->nodes[*level]->start; 3895 } else { 3896 parent = path->nodes[*level + 1]; 3897 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); 3898 } 3899 3900 blocksize = btrfs_level_size(root, *level); 3901 root_owner = btrfs_header_owner(parent); 3902 root_gen = btrfs_header_generation(parent); 3903 3904 /* 3905 * cleanup and free the reference on the last node 3906 * we processed 3907 */ 3908 ret = __btrfs_free_extent(trans, root, bytenr, blocksize, 3909 parent->start, root_owner, root_gen, 3910 *level, 1); 3911 free_extent_buffer(path->nodes[*level]); 3912 path->nodes[*level] = NULL; 3913 3914 *level += 1; 3915 BUG_ON(ret); 3916 3917 cond_resched(); 3918 return 0; 3919 } 3920 3921 /* 3922 * helper function for drop_subtree, this function is similar to 3923 * walk_down_tree. The main difference is that it checks reference 3924 * counts while tree blocks are locked. 3925 */ 3926 static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, 3927 struct btrfs_root *root, 3928 struct btrfs_path *path, int *level) 3929 { 3930 struct extent_buffer *next; 3931 struct extent_buffer *cur; 3932 struct extent_buffer *parent; 3933 u64 bytenr; 3934 u64 ptr_gen; 3935 u32 blocksize; 3936 u32 refs; 3937 int ret; 3938 3939 cur = path->nodes[*level]; 3940 ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len, 3941 &refs); 3942 BUG_ON(ret); 3943 if (refs > 1) 3944 goto out; 3945 3946 while (*level >= 0) { 3947 cur = path->nodes[*level]; 3948 if (*level == 0) { 3949 ret = btrfs_drop_leaf_ref(trans, root, cur); 3950 BUG_ON(ret); 3951 clean_tree_block(trans, root, cur); 3952 break; 3953 } 3954 if (path->slots[*level] >= btrfs_header_nritems(cur)) { 3955 clean_tree_block(trans, root, cur); 3956 break; 3957 } 3958 3959 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 3960 blocksize = btrfs_level_size(root, *level - 1); 3961 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 3962 3963 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 3964 btrfs_tree_lock(next); 3965 btrfs_set_lock_blocking(next); 3966 3967 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, 3968 &refs); 3969 BUG_ON(ret); 3970 if (refs > 1) { 3971 parent = path->nodes[*level]; 3972 ret = btrfs_free_extent(trans, root, bytenr, 3973 blocksize, parent->start, 3974 btrfs_header_owner(parent), 3975 btrfs_header_generation(parent), 3976 *level - 1, 1); 3977 BUG_ON(ret); 3978 path->slots[*level]++; 3979 btrfs_tree_unlock(next); 3980 free_extent_buffer(next); 3981 continue; 3982 } 3983 3984 *level = btrfs_header_level(next); 3985 path->nodes[*level] = next; 3986 path->slots[*level] = 0; 3987 path->locks[*level] = 1; 3988 cond_resched(); 3989 } 3990 out: 3991 parent = path->nodes[*level + 1]; 3992 bytenr = path->nodes[*level]->start; 3993 blocksize = path->nodes[*level]->len; 3994 3995 ret = btrfs_free_extent(trans, root, bytenr, blocksize, 3996 parent->start, btrfs_header_owner(parent), 3997 btrfs_header_generation(parent), *level, 1); 3998 BUG_ON(ret); 3999 4000 if (path->locks[*level]) { 4001 btrfs_tree_unlock(path->nodes[*level]); 4002 path->locks[*level] = 0; 4003 } 4004 free_extent_buffer(path->nodes[*level]); 4005 path->nodes[*level] = NULL; 4006 *level += 1; 4007 cond_resched(); 4008 return 0; 4009 } 4010 4011 /* 4012 * helper for dropping snapshots. This walks back up the tree in the path 4013 * to find the first node higher up where we haven't yet gone through 4014 * all the slots 4015 */ 4016 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 4017 struct btrfs_root *root, 4018 struct btrfs_path *path, 4019 int *level, int max_level) 4020 { 4021 u64 root_owner; 4022 u64 root_gen; 4023 struct btrfs_root_item *root_item = &root->root_item; 4024 int i; 4025 int slot; 4026 int ret; 4027 4028 for (i = *level; i < max_level && path->nodes[i]; i++) { 4029 slot = path->slots[i]; 4030 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 4031 struct extent_buffer *node; 4032 struct btrfs_disk_key disk_key; 4033 4034 /* 4035 * there is more work to do in this level. 4036 * Update the drop_progress marker to reflect 4037 * the work we've done so far, and then bump 4038 * the slot number 4039 */ 4040 node = path->nodes[i]; 4041 path->slots[i]++; 4042 *level = i; 4043 WARN_ON(*level == 0); 4044 btrfs_node_key(node, &disk_key, path->slots[i]); 4045 memcpy(&root_item->drop_progress, 4046 &disk_key, sizeof(disk_key)); 4047 root_item->drop_level = i; 4048 return 0; 4049 } else { 4050 struct extent_buffer *parent; 4051 4052 /* 4053 * this whole node is done, free our reference 4054 * on it and go up one level 4055 */ 4056 if (path->nodes[*level] == root->node) 4057 parent = path->nodes[*level]; 4058 else 4059 parent = path->nodes[*level + 1]; 4060 4061 root_owner = btrfs_header_owner(parent); 4062 root_gen = btrfs_header_generation(parent); 4063 4064 clean_tree_block(trans, root, path->nodes[*level]); 4065 ret = btrfs_free_extent(trans, root, 4066 path->nodes[*level]->start, 4067 path->nodes[*level]->len, 4068 parent->start, root_owner, 4069 root_gen, *level, 1); 4070 BUG_ON(ret); 4071 if (path->locks[*level]) { 4072 btrfs_tree_unlock(path->nodes[*level]); 4073 path->locks[*level] = 0; 4074 } 4075 free_extent_buffer(path->nodes[*level]); 4076 path->nodes[*level] = NULL; 4077 *level = i + 1; 4078 } 4079 } 4080 return 1; 4081 } 4082 4083 /* 4084 * drop the reference count on the tree rooted at 'snap'. This traverses 4085 * the tree freeing any blocks that have a ref count of zero after being 4086 * decremented. 4087 */ 4088 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 4089 *root) 4090 { 4091 int ret = 0; 4092 int wret; 4093 int level; 4094 struct btrfs_path *path; 4095 int i; 4096 int orig_level; 4097 struct btrfs_root_item *root_item = &root->root_item; 4098 4099 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); 4100 path = btrfs_alloc_path(); 4101 BUG_ON(!path); 4102 4103 level = btrfs_header_level(root->node); 4104 orig_level = level; 4105 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 4106 path->nodes[level] = root->node; 4107 extent_buffer_get(root->node); 4108 path->slots[level] = 0; 4109 } else { 4110 struct btrfs_key key; 4111 struct btrfs_disk_key found_key; 4112 struct extent_buffer *node; 4113 4114 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 4115 level = root_item->drop_level; 4116 path->lowest_level = level; 4117 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4118 if (wret < 0) { 4119 ret = wret; 4120 goto out; 4121 } 4122 node = path->nodes[level]; 4123 btrfs_node_key(node, &found_key, path->slots[level]); 4124 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 4125 sizeof(found_key))); 4126 /* 4127 * unlock our path, this is safe because only this 4128 * function is allowed to delete this snapshot 4129 */ 4130 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 4131 if (path->nodes[i] && path->locks[i]) { 4132 path->locks[i] = 0; 4133 btrfs_tree_unlock(path->nodes[i]); 4134 } 4135 } 4136 } 4137 while (1) { 4138 wret = walk_down_tree(trans, root, path, &level); 4139 if (wret > 0) 4140 break; 4141 if (wret < 0) 4142 ret = wret; 4143 4144 wret = walk_up_tree(trans, root, path, &level, 4145 BTRFS_MAX_LEVEL); 4146 if (wret > 0) 4147 break; 4148 if (wret < 0) 4149 ret = wret; 4150 if (trans->transaction->in_commit) { 4151 ret = -EAGAIN; 4152 break; 4153 } 4154 atomic_inc(&root->fs_info->throttle_gen); 4155 wake_up(&root->fs_info->transaction_throttle); 4156 } 4157 for (i = 0; i <= orig_level; i++) { 4158 if (path->nodes[i]) { 4159 free_extent_buffer(path->nodes[i]); 4160 path->nodes[i] = NULL; 4161 } 4162 } 4163 out: 4164 btrfs_free_path(path); 4165 return ret; 4166 } 4167 4168 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 4169 struct btrfs_root *root, 4170 struct extent_buffer *node, 4171 struct extent_buffer *parent) 4172 { 4173 struct btrfs_path *path; 4174 int level; 4175 int parent_level; 4176 int ret = 0; 4177 int wret; 4178 4179 path = btrfs_alloc_path(); 4180 BUG_ON(!path); 4181 4182 BUG_ON(!btrfs_tree_locked(parent)); 4183 parent_level = btrfs_header_level(parent); 4184 extent_buffer_get(parent); 4185 path->nodes[parent_level] = parent; 4186 path->slots[parent_level] = btrfs_header_nritems(parent); 4187 4188 BUG_ON(!btrfs_tree_locked(node)); 4189 level = btrfs_header_level(node); 4190 extent_buffer_get(node); 4191 path->nodes[level] = node; 4192 path->slots[level] = 0; 4193 4194 while (1) { 4195 wret = walk_down_subtree(trans, root, path, &level); 4196 if (wret < 0) 4197 ret = wret; 4198 if (wret != 0) 4199 break; 4200 4201 wret = walk_up_tree(trans, root, path, &level, parent_level); 4202 if (wret < 0) 4203 ret = wret; 4204 if (wret != 0) 4205 break; 4206 } 4207 4208 btrfs_free_path(path); 4209 return ret; 4210 } 4211 4212 static unsigned long calc_ra(unsigned long start, unsigned long last, 4213 unsigned long nr) 4214 { 4215 return min(last, start + nr - 1); 4216 } 4217 4218 static noinline int relocate_inode_pages(struct inode *inode, u64 start, 4219 u64 len) 4220 { 4221 u64 page_start; 4222 u64 page_end; 4223 unsigned long first_index; 4224 unsigned long last_index; 4225 unsigned long i; 4226 struct page *page; 4227 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 4228 struct file_ra_state *ra; 4229 struct btrfs_ordered_extent *ordered; 4230 unsigned int total_read = 0; 4231 unsigned int total_dirty = 0; 4232 int ret = 0; 4233 4234 ra = kzalloc(sizeof(*ra), GFP_NOFS); 4235 4236 mutex_lock(&inode->i_mutex); 4237 first_index = start >> PAGE_CACHE_SHIFT; 4238 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 4239 4240 /* make sure the dirty trick played by the caller work */ 4241 ret = invalidate_inode_pages2_range(inode->i_mapping, 4242 first_index, last_index); 4243 if (ret) 4244 goto out_unlock; 4245 4246 file_ra_state_init(ra, inode->i_mapping); 4247 4248 for (i = first_index ; i <= last_index; i++) { 4249 if (total_read % ra->ra_pages == 0) { 4250 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 4251 calc_ra(i, last_index, ra->ra_pages)); 4252 } 4253 total_read++; 4254 again: 4255 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 4256 BUG_ON(1); 4257 page = grab_cache_page(inode->i_mapping, i); 4258 if (!page) { 4259 ret = -ENOMEM; 4260 goto out_unlock; 4261 } 4262 if (!PageUptodate(page)) { 4263 btrfs_readpage(NULL, page); 4264 lock_page(page); 4265 if (!PageUptodate(page)) { 4266 unlock_page(page); 4267 page_cache_release(page); 4268 ret = -EIO; 4269 goto out_unlock; 4270 } 4271 } 4272 wait_on_page_writeback(page); 4273 4274 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 4275 page_end = page_start + PAGE_CACHE_SIZE - 1; 4276 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 4277 4278 ordered = btrfs_lookup_ordered_extent(inode, page_start); 4279 if (ordered) { 4280 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4281 unlock_page(page); 4282 page_cache_release(page); 4283 btrfs_start_ordered_extent(inode, ordered, 1); 4284 btrfs_put_ordered_extent(ordered); 4285 goto again; 4286 } 4287 set_page_extent_mapped(page); 4288 4289 if (i == first_index) 4290 set_extent_bits(io_tree, page_start, page_end, 4291 EXTENT_BOUNDARY, GFP_NOFS); 4292 btrfs_set_extent_delalloc(inode, page_start, page_end); 4293 4294 set_page_dirty(page); 4295 total_dirty++; 4296 4297 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4298 unlock_page(page); 4299 page_cache_release(page); 4300 } 4301 4302 out_unlock: 4303 kfree(ra); 4304 mutex_unlock(&inode->i_mutex); 4305 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 4306 return ret; 4307 } 4308 4309 static noinline int relocate_data_extent(struct inode *reloc_inode, 4310 struct btrfs_key *extent_key, 4311 u64 offset) 4312 { 4313 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 4314 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; 4315 struct extent_map *em; 4316 u64 start = extent_key->objectid - offset; 4317 u64 end = start + extent_key->offset - 1; 4318 4319 em = alloc_extent_map(GFP_NOFS); 4320 BUG_ON(!em || IS_ERR(em)); 4321 4322 em->start = start; 4323 em->len = extent_key->offset; 4324 em->block_len = extent_key->offset; 4325 em->block_start = extent_key->objectid; 4326 em->bdev = root->fs_info->fs_devices->latest_bdev; 4327 set_bit(EXTENT_FLAG_PINNED, &em->flags); 4328 4329 /* setup extent map to cheat btrfs_readpage */ 4330 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 4331 while (1) { 4332 int ret; 4333 spin_lock(&em_tree->lock); 4334 ret = add_extent_mapping(em_tree, em); 4335 spin_unlock(&em_tree->lock); 4336 if (ret != -EEXIST) { 4337 free_extent_map(em); 4338 break; 4339 } 4340 btrfs_drop_extent_cache(reloc_inode, start, end, 0); 4341 } 4342 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 4343 4344 return relocate_inode_pages(reloc_inode, start, extent_key->offset); 4345 } 4346 4347 struct btrfs_ref_path { 4348 u64 extent_start; 4349 u64 nodes[BTRFS_MAX_LEVEL]; 4350 u64 root_objectid; 4351 u64 root_generation; 4352 u64 owner_objectid; 4353 u32 num_refs; 4354 int lowest_level; 4355 int current_level; 4356 int shared_level; 4357 4358 struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; 4359 u64 new_nodes[BTRFS_MAX_LEVEL]; 4360 }; 4361 4362 struct disk_extent { 4363 u64 ram_bytes; 4364 u64 disk_bytenr; 4365 u64 disk_num_bytes; 4366 u64 offset; 4367 u64 num_bytes; 4368 u8 compression; 4369 u8 encryption; 4370 u16 other_encoding; 4371 }; 4372 4373 static int is_cowonly_root(u64 root_objectid) 4374 { 4375 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 4376 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 4377 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 4378 root_objectid == BTRFS_DEV_TREE_OBJECTID || 4379 root_objectid == BTRFS_TREE_LOG_OBJECTID || 4380 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 4381 return 1; 4382 return 0; 4383 } 4384 4385 static noinline int __next_ref_path(struct btrfs_trans_handle *trans, 4386 struct btrfs_root *extent_root, 4387 struct btrfs_ref_path *ref_path, 4388 int first_time) 4389 { 4390 struct extent_buffer *leaf; 4391 struct btrfs_path *path; 4392 struct btrfs_extent_ref *ref; 4393 struct btrfs_key key; 4394 struct btrfs_key found_key; 4395 u64 bytenr; 4396 u32 nritems; 4397 int level; 4398 int ret = 1; 4399 4400 path = btrfs_alloc_path(); 4401 if (!path) 4402 return -ENOMEM; 4403 4404 if (first_time) { 4405 ref_path->lowest_level = -1; 4406 ref_path->current_level = -1; 4407 ref_path->shared_level = -1; 4408 goto walk_up; 4409 } 4410 walk_down: 4411 level = ref_path->current_level - 1; 4412 while (level >= -1) { 4413 u64 parent; 4414 if (level < ref_path->lowest_level) 4415 break; 4416 4417 if (level >= 0) 4418 bytenr = ref_path->nodes[level]; 4419 else 4420 bytenr = ref_path->extent_start; 4421 BUG_ON(bytenr == 0); 4422 4423 parent = ref_path->nodes[level + 1]; 4424 ref_path->nodes[level + 1] = 0; 4425 ref_path->current_level = level; 4426 BUG_ON(parent == 0); 4427 4428 key.objectid = bytenr; 4429 key.offset = parent + 1; 4430 key.type = BTRFS_EXTENT_REF_KEY; 4431 4432 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 4433 if (ret < 0) 4434 goto out; 4435 BUG_ON(ret == 0); 4436 4437 leaf = path->nodes[0]; 4438 nritems = btrfs_header_nritems(leaf); 4439 if (path->slots[0] >= nritems) { 4440 ret = btrfs_next_leaf(extent_root, path); 4441 if (ret < 0) 4442 goto out; 4443 if (ret > 0) 4444 goto next; 4445 leaf = path->nodes[0]; 4446 } 4447 4448 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4449 if (found_key.objectid == bytenr && 4450 found_key.type == BTRFS_EXTENT_REF_KEY) { 4451 if (level < ref_path->shared_level) 4452 ref_path->shared_level = level; 4453 goto found; 4454 } 4455 next: 4456 level--; 4457 btrfs_release_path(extent_root, path); 4458 cond_resched(); 4459 } 4460 /* reached lowest level */ 4461 ret = 1; 4462 goto out; 4463 walk_up: 4464 level = ref_path->current_level; 4465 while (level < BTRFS_MAX_LEVEL - 1) { 4466 u64 ref_objectid; 4467 4468 if (level >= 0) 4469 bytenr = ref_path->nodes[level]; 4470 else 4471 bytenr = ref_path->extent_start; 4472 4473 BUG_ON(bytenr == 0); 4474 4475 key.objectid = bytenr; 4476 key.offset = 0; 4477 key.type = BTRFS_EXTENT_REF_KEY; 4478 4479 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 4480 if (ret < 0) 4481 goto out; 4482 4483 leaf = path->nodes[0]; 4484 nritems = btrfs_header_nritems(leaf); 4485 if (path->slots[0] >= nritems) { 4486 ret = btrfs_next_leaf(extent_root, path); 4487 if (ret < 0) 4488 goto out; 4489 if (ret > 0) { 4490 /* the extent was freed by someone */ 4491 if (ref_path->lowest_level == level) 4492 goto out; 4493 btrfs_release_path(extent_root, path); 4494 goto walk_down; 4495 } 4496 leaf = path->nodes[0]; 4497 } 4498 4499 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4500 if (found_key.objectid != bytenr || 4501 found_key.type != BTRFS_EXTENT_REF_KEY) { 4502 /* the extent was freed by someone */ 4503 if (ref_path->lowest_level == level) { 4504 ret = 1; 4505 goto out; 4506 } 4507 btrfs_release_path(extent_root, path); 4508 goto walk_down; 4509 } 4510 found: 4511 ref = btrfs_item_ptr(leaf, path->slots[0], 4512 struct btrfs_extent_ref); 4513 ref_objectid = btrfs_ref_objectid(leaf, ref); 4514 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { 4515 if (first_time) { 4516 level = (int)ref_objectid; 4517 BUG_ON(level >= BTRFS_MAX_LEVEL); 4518 ref_path->lowest_level = level; 4519 ref_path->current_level = level; 4520 ref_path->nodes[level] = bytenr; 4521 } else { 4522 WARN_ON(ref_objectid != level); 4523 } 4524 } else { 4525 WARN_ON(level != -1); 4526 } 4527 first_time = 0; 4528 4529 if (ref_path->lowest_level == level) { 4530 ref_path->owner_objectid = ref_objectid; 4531 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); 4532 } 4533 4534 /* 4535 * the block is tree root or the block isn't in reference 4536 * counted tree. 4537 */ 4538 if (found_key.objectid == found_key.offset || 4539 is_cowonly_root(btrfs_ref_root(leaf, ref))) { 4540 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 4541 ref_path->root_generation = 4542 btrfs_ref_generation(leaf, ref); 4543 if (level < 0) { 4544 /* special reference from the tree log */ 4545 ref_path->nodes[0] = found_key.offset; 4546 ref_path->current_level = 0; 4547 } 4548 ret = 0; 4549 goto out; 4550 } 4551 4552 level++; 4553 BUG_ON(ref_path->nodes[level] != 0); 4554 ref_path->nodes[level] = found_key.offset; 4555 ref_path->current_level = level; 4556 4557 /* 4558 * the reference was created in the running transaction, 4559 * no need to continue walking up. 4560 */ 4561 if (btrfs_ref_generation(leaf, ref) == trans->transid) { 4562 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 4563 ref_path->root_generation = 4564 btrfs_ref_generation(leaf, ref); 4565 ret = 0; 4566 goto out; 4567 } 4568 4569 btrfs_release_path(extent_root, path); 4570 cond_resched(); 4571 } 4572 /* reached max tree level, but no tree root found. */ 4573 BUG(); 4574 out: 4575 btrfs_free_path(path); 4576 return ret; 4577 } 4578 4579 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, 4580 struct btrfs_root *extent_root, 4581 struct btrfs_ref_path *ref_path, 4582 u64 extent_start) 4583 { 4584 memset(ref_path, 0, sizeof(*ref_path)); 4585 ref_path->extent_start = extent_start; 4586 4587 return __next_ref_path(trans, extent_root, ref_path, 1); 4588 } 4589 4590 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, 4591 struct btrfs_root *extent_root, 4592 struct btrfs_ref_path *ref_path) 4593 { 4594 return __next_ref_path(trans, extent_root, ref_path, 0); 4595 } 4596 4597 static noinline int get_new_locations(struct inode *reloc_inode, 4598 struct btrfs_key *extent_key, 4599 u64 offset, int no_fragment, 4600 struct disk_extent **extents, 4601 int *nr_extents) 4602 { 4603 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 4604 struct btrfs_path *path; 4605 struct btrfs_file_extent_item *fi; 4606 struct extent_buffer *leaf; 4607 struct disk_extent *exts = *extents; 4608 struct btrfs_key found_key; 4609 u64 cur_pos; 4610 u64 last_byte; 4611 u32 nritems; 4612 int nr = 0; 4613 int max = *nr_extents; 4614 int ret; 4615 4616 WARN_ON(!no_fragment && *extents); 4617 if (!exts) { 4618 max = 1; 4619 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); 4620 if (!exts) 4621 return -ENOMEM; 4622 } 4623 4624 path = btrfs_alloc_path(); 4625 BUG_ON(!path); 4626 4627 cur_pos = extent_key->objectid - offset; 4628 last_byte = extent_key->objectid + extent_key->offset; 4629 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 4630 cur_pos, 0); 4631 if (ret < 0) 4632 goto out; 4633 if (ret > 0) { 4634 ret = -ENOENT; 4635 goto out; 4636 } 4637 4638 while (1) { 4639 leaf = path->nodes[0]; 4640 nritems = btrfs_header_nritems(leaf); 4641 if (path->slots[0] >= nritems) { 4642 ret = btrfs_next_leaf(root, path); 4643 if (ret < 0) 4644 goto out; 4645 if (ret > 0) 4646 break; 4647 leaf = path->nodes[0]; 4648 } 4649 4650 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4651 if (found_key.offset != cur_pos || 4652 found_key.type != BTRFS_EXTENT_DATA_KEY || 4653 found_key.objectid != reloc_inode->i_ino) 4654 break; 4655 4656 fi = btrfs_item_ptr(leaf, path->slots[0], 4657 struct btrfs_file_extent_item); 4658 if (btrfs_file_extent_type(leaf, fi) != 4659 BTRFS_FILE_EXTENT_REG || 4660 btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 4661 break; 4662 4663 if (nr == max) { 4664 struct disk_extent *old = exts; 4665 max *= 2; 4666 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 4667 memcpy(exts, old, sizeof(*exts) * nr); 4668 if (old != *extents) 4669 kfree(old); 4670 } 4671 4672 exts[nr].disk_bytenr = 4673 btrfs_file_extent_disk_bytenr(leaf, fi); 4674 exts[nr].disk_num_bytes = 4675 btrfs_file_extent_disk_num_bytes(leaf, fi); 4676 exts[nr].offset = btrfs_file_extent_offset(leaf, fi); 4677 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 4678 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 4679 exts[nr].compression = btrfs_file_extent_compression(leaf, fi); 4680 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); 4681 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, 4682 fi); 4683 BUG_ON(exts[nr].offset > 0); 4684 BUG_ON(exts[nr].compression || exts[nr].encryption); 4685 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); 4686 4687 cur_pos += exts[nr].num_bytes; 4688 nr++; 4689 4690 if (cur_pos + offset >= last_byte) 4691 break; 4692 4693 if (no_fragment) { 4694 ret = 1; 4695 goto out; 4696 } 4697 path->slots[0]++; 4698 } 4699 4700 BUG_ON(cur_pos + offset > last_byte); 4701 if (cur_pos + offset < last_byte) { 4702 ret = -ENOENT; 4703 goto out; 4704 } 4705 ret = 0; 4706 out: 4707 btrfs_free_path(path); 4708 if (ret) { 4709 if (exts != *extents) 4710 kfree(exts); 4711 } else { 4712 *extents = exts; 4713 *nr_extents = nr; 4714 } 4715 return ret; 4716 } 4717 4718 static noinline int replace_one_extent(struct btrfs_trans_handle *trans, 4719 struct btrfs_root *root, 4720 struct btrfs_path *path, 4721 struct btrfs_key *extent_key, 4722 struct btrfs_key *leaf_key, 4723 struct btrfs_ref_path *ref_path, 4724 struct disk_extent *new_extents, 4725 int nr_extents) 4726 { 4727 struct extent_buffer *leaf; 4728 struct btrfs_file_extent_item *fi; 4729 struct inode *inode = NULL; 4730 struct btrfs_key key; 4731 u64 lock_start = 0; 4732 u64 lock_end = 0; 4733 u64 num_bytes; 4734 u64 ext_offset; 4735 u64 search_end = (u64)-1; 4736 u32 nritems; 4737 int nr_scaned = 0; 4738 int extent_locked = 0; 4739 int extent_type; 4740 int ret; 4741 4742 memcpy(&key, leaf_key, sizeof(key)); 4743 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 4744 if (key.objectid < ref_path->owner_objectid || 4745 (key.objectid == ref_path->owner_objectid && 4746 key.type < BTRFS_EXTENT_DATA_KEY)) { 4747 key.objectid = ref_path->owner_objectid; 4748 key.type = BTRFS_EXTENT_DATA_KEY; 4749 key.offset = 0; 4750 } 4751 } 4752 4753 while (1) { 4754 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 4755 if (ret < 0) 4756 goto out; 4757 4758 leaf = path->nodes[0]; 4759 nritems = btrfs_header_nritems(leaf); 4760 next: 4761 if (extent_locked && ret > 0) { 4762 /* 4763 * the file extent item was modified by someone 4764 * before the extent got locked. 4765 */ 4766 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 4767 lock_end, GFP_NOFS); 4768 extent_locked = 0; 4769 } 4770 4771 if (path->slots[0] >= nritems) { 4772 if (++nr_scaned > 2) 4773 break; 4774 4775 BUG_ON(extent_locked); 4776 ret = btrfs_next_leaf(root, path); 4777 if (ret < 0) 4778 goto out; 4779 if (ret > 0) 4780 break; 4781 leaf = path->nodes[0]; 4782 nritems = btrfs_header_nritems(leaf); 4783 } 4784 4785 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 4786 4787 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 4788 if ((key.objectid > ref_path->owner_objectid) || 4789 (key.objectid == ref_path->owner_objectid && 4790 key.type > BTRFS_EXTENT_DATA_KEY) || 4791 key.offset >= search_end) 4792 break; 4793 } 4794 4795 if (inode && key.objectid != inode->i_ino) { 4796 BUG_ON(extent_locked); 4797 btrfs_release_path(root, path); 4798 mutex_unlock(&inode->i_mutex); 4799 iput(inode); 4800 inode = NULL; 4801 continue; 4802 } 4803 4804 if (key.type != BTRFS_EXTENT_DATA_KEY) { 4805 path->slots[0]++; 4806 ret = 1; 4807 goto next; 4808 } 4809 fi = btrfs_item_ptr(leaf, path->slots[0], 4810 struct btrfs_file_extent_item); 4811 extent_type = btrfs_file_extent_type(leaf, fi); 4812 if ((extent_type != BTRFS_FILE_EXTENT_REG && 4813 extent_type != BTRFS_FILE_EXTENT_PREALLOC) || 4814 (btrfs_file_extent_disk_bytenr(leaf, fi) != 4815 extent_key->objectid)) { 4816 path->slots[0]++; 4817 ret = 1; 4818 goto next; 4819 } 4820 4821 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 4822 ext_offset = btrfs_file_extent_offset(leaf, fi); 4823 4824 if (search_end == (u64)-1) { 4825 search_end = key.offset - ext_offset + 4826 btrfs_file_extent_ram_bytes(leaf, fi); 4827 } 4828 4829 if (!extent_locked) { 4830 lock_start = key.offset; 4831 lock_end = lock_start + num_bytes - 1; 4832 } else { 4833 if (lock_start > key.offset || 4834 lock_end + 1 < key.offset + num_bytes) { 4835 unlock_extent(&BTRFS_I(inode)->io_tree, 4836 lock_start, lock_end, GFP_NOFS); 4837 extent_locked = 0; 4838 } 4839 } 4840 4841 if (!inode) { 4842 btrfs_release_path(root, path); 4843 4844 inode = btrfs_iget_locked(root->fs_info->sb, 4845 key.objectid, root); 4846 if (inode->i_state & I_NEW) { 4847 BTRFS_I(inode)->root = root; 4848 BTRFS_I(inode)->location.objectid = 4849 key.objectid; 4850 BTRFS_I(inode)->location.type = 4851 BTRFS_INODE_ITEM_KEY; 4852 BTRFS_I(inode)->location.offset = 0; 4853 btrfs_read_locked_inode(inode); 4854 unlock_new_inode(inode); 4855 } 4856 /* 4857 * some code call btrfs_commit_transaction while 4858 * holding the i_mutex, so we can't use mutex_lock 4859 * here. 4860 */ 4861 if (is_bad_inode(inode) || 4862 !mutex_trylock(&inode->i_mutex)) { 4863 iput(inode); 4864 inode = NULL; 4865 key.offset = (u64)-1; 4866 goto skip; 4867 } 4868 } 4869 4870 if (!extent_locked) { 4871 struct btrfs_ordered_extent *ordered; 4872 4873 btrfs_release_path(root, path); 4874 4875 lock_extent(&BTRFS_I(inode)->io_tree, lock_start, 4876 lock_end, GFP_NOFS); 4877 ordered = btrfs_lookup_first_ordered_extent(inode, 4878 lock_end); 4879 if (ordered && 4880 ordered->file_offset <= lock_end && 4881 ordered->file_offset + ordered->len > lock_start) { 4882 unlock_extent(&BTRFS_I(inode)->io_tree, 4883 lock_start, lock_end, GFP_NOFS); 4884 btrfs_start_ordered_extent(inode, ordered, 1); 4885 btrfs_put_ordered_extent(ordered); 4886 key.offset += num_bytes; 4887 goto skip; 4888 } 4889 if (ordered) 4890 btrfs_put_ordered_extent(ordered); 4891 4892 extent_locked = 1; 4893 continue; 4894 } 4895 4896 if (nr_extents == 1) { 4897 /* update extent pointer in place */ 4898 btrfs_set_file_extent_disk_bytenr(leaf, fi, 4899 new_extents[0].disk_bytenr); 4900 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 4901 new_extents[0].disk_num_bytes); 4902 btrfs_mark_buffer_dirty(leaf); 4903 4904 btrfs_drop_extent_cache(inode, key.offset, 4905 key.offset + num_bytes - 1, 0); 4906 4907 ret = btrfs_inc_extent_ref(trans, root, 4908 new_extents[0].disk_bytenr, 4909 new_extents[0].disk_num_bytes, 4910 leaf->start, 4911 root->root_key.objectid, 4912 trans->transid, 4913 key.objectid); 4914 BUG_ON(ret); 4915 4916 ret = btrfs_free_extent(trans, root, 4917 extent_key->objectid, 4918 extent_key->offset, 4919 leaf->start, 4920 btrfs_header_owner(leaf), 4921 btrfs_header_generation(leaf), 4922 key.objectid, 0); 4923 BUG_ON(ret); 4924 4925 btrfs_release_path(root, path); 4926 key.offset += num_bytes; 4927 } else { 4928 BUG_ON(1); 4929 #if 0 4930 u64 alloc_hint; 4931 u64 extent_len; 4932 int i; 4933 /* 4934 * drop old extent pointer at first, then insert the 4935 * new pointers one bye one 4936 */ 4937 btrfs_release_path(root, path); 4938 ret = btrfs_drop_extents(trans, root, inode, key.offset, 4939 key.offset + num_bytes, 4940 key.offset, &alloc_hint); 4941 BUG_ON(ret); 4942 4943 for (i = 0; i < nr_extents; i++) { 4944 if (ext_offset >= new_extents[i].num_bytes) { 4945 ext_offset -= new_extents[i].num_bytes; 4946 continue; 4947 } 4948 extent_len = min(new_extents[i].num_bytes - 4949 ext_offset, num_bytes); 4950 4951 ret = btrfs_insert_empty_item(trans, root, 4952 path, &key, 4953 sizeof(*fi)); 4954 BUG_ON(ret); 4955 4956 leaf = path->nodes[0]; 4957 fi = btrfs_item_ptr(leaf, path->slots[0], 4958 struct btrfs_file_extent_item); 4959 btrfs_set_file_extent_generation(leaf, fi, 4960 trans->transid); 4961 btrfs_set_file_extent_type(leaf, fi, 4962 BTRFS_FILE_EXTENT_REG); 4963 btrfs_set_file_extent_disk_bytenr(leaf, fi, 4964 new_extents[i].disk_bytenr); 4965 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 4966 new_extents[i].disk_num_bytes); 4967 btrfs_set_file_extent_ram_bytes(leaf, fi, 4968 new_extents[i].ram_bytes); 4969 4970 btrfs_set_file_extent_compression(leaf, fi, 4971 new_extents[i].compression); 4972 btrfs_set_file_extent_encryption(leaf, fi, 4973 new_extents[i].encryption); 4974 btrfs_set_file_extent_other_encoding(leaf, fi, 4975 new_extents[i].other_encoding); 4976 4977 btrfs_set_file_extent_num_bytes(leaf, fi, 4978 extent_len); 4979 ext_offset += new_extents[i].offset; 4980 btrfs_set_file_extent_offset(leaf, fi, 4981 ext_offset); 4982 btrfs_mark_buffer_dirty(leaf); 4983 4984 btrfs_drop_extent_cache(inode, key.offset, 4985 key.offset + extent_len - 1, 0); 4986 4987 ret = btrfs_inc_extent_ref(trans, root, 4988 new_extents[i].disk_bytenr, 4989 new_extents[i].disk_num_bytes, 4990 leaf->start, 4991 root->root_key.objectid, 4992 trans->transid, key.objectid); 4993 BUG_ON(ret); 4994 btrfs_release_path(root, path); 4995 4996 inode_add_bytes(inode, extent_len); 4997 4998 ext_offset = 0; 4999 num_bytes -= extent_len; 5000 key.offset += extent_len; 5001 5002 if (num_bytes == 0) 5003 break; 5004 } 5005 BUG_ON(i >= nr_extents); 5006 #endif 5007 } 5008 5009 if (extent_locked) { 5010 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5011 lock_end, GFP_NOFS); 5012 extent_locked = 0; 5013 } 5014 skip: 5015 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 5016 key.offset >= search_end) 5017 break; 5018 5019 cond_resched(); 5020 } 5021 ret = 0; 5022 out: 5023 btrfs_release_path(root, path); 5024 if (inode) { 5025 mutex_unlock(&inode->i_mutex); 5026 if (extent_locked) { 5027 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5028 lock_end, GFP_NOFS); 5029 } 5030 iput(inode); 5031 } 5032 return ret; 5033 } 5034 5035 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 5036 struct btrfs_root *root, 5037 struct extent_buffer *buf, u64 orig_start) 5038 { 5039 int level; 5040 int ret; 5041 5042 BUG_ON(btrfs_header_generation(buf) != trans->transid); 5043 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 5044 5045 level = btrfs_header_level(buf); 5046 if (level == 0) { 5047 struct btrfs_leaf_ref *ref; 5048 struct btrfs_leaf_ref *orig_ref; 5049 5050 orig_ref = btrfs_lookup_leaf_ref(root, orig_start); 5051 if (!orig_ref) 5052 return -ENOENT; 5053 5054 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); 5055 if (!ref) { 5056 btrfs_free_leaf_ref(root, orig_ref); 5057 return -ENOMEM; 5058 } 5059 5060 ref->nritems = orig_ref->nritems; 5061 memcpy(ref->extents, orig_ref->extents, 5062 sizeof(ref->extents[0]) * ref->nritems); 5063 5064 btrfs_free_leaf_ref(root, orig_ref); 5065 5066 ref->root_gen = trans->transid; 5067 ref->bytenr = buf->start; 5068 ref->owner = btrfs_header_owner(buf); 5069 ref->generation = btrfs_header_generation(buf); 5070 5071 ret = btrfs_add_leaf_ref(root, ref, 0); 5072 WARN_ON(ret); 5073 btrfs_free_leaf_ref(root, ref); 5074 } 5075 return 0; 5076 } 5077 5078 static noinline int invalidate_extent_cache(struct btrfs_root *root, 5079 struct extent_buffer *leaf, 5080 struct btrfs_block_group_cache *group, 5081 struct btrfs_root *target_root) 5082 { 5083 struct btrfs_key key; 5084 struct inode *inode = NULL; 5085 struct btrfs_file_extent_item *fi; 5086 u64 num_bytes; 5087 u64 skip_objectid = 0; 5088 u32 nritems; 5089 u32 i; 5090 5091 nritems = btrfs_header_nritems(leaf); 5092 for (i = 0; i < nritems; i++) { 5093 btrfs_item_key_to_cpu(leaf, &key, i); 5094 if (key.objectid == skip_objectid || 5095 key.type != BTRFS_EXTENT_DATA_KEY) 5096 continue; 5097 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 5098 if (btrfs_file_extent_type(leaf, fi) == 5099 BTRFS_FILE_EXTENT_INLINE) 5100 continue; 5101 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 5102 continue; 5103 if (!inode || inode->i_ino != key.objectid) { 5104 iput(inode); 5105 inode = btrfs_ilookup(target_root->fs_info->sb, 5106 key.objectid, target_root, 1); 5107 } 5108 if (!inode) { 5109 skip_objectid = key.objectid; 5110 continue; 5111 } 5112 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5113 5114 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 5115 key.offset + num_bytes - 1, GFP_NOFS); 5116 btrfs_drop_extent_cache(inode, key.offset, 5117 key.offset + num_bytes - 1, 1); 5118 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 5119 key.offset + num_bytes - 1, GFP_NOFS); 5120 cond_resched(); 5121 } 5122 iput(inode); 5123 return 0; 5124 } 5125 5126 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, 5127 struct btrfs_root *root, 5128 struct extent_buffer *leaf, 5129 struct btrfs_block_group_cache *group, 5130 struct inode *reloc_inode) 5131 { 5132 struct btrfs_key key; 5133 struct btrfs_key extent_key; 5134 struct btrfs_file_extent_item *fi; 5135 struct btrfs_leaf_ref *ref; 5136 struct disk_extent *new_extent; 5137 u64 bytenr; 5138 u64 num_bytes; 5139 u32 nritems; 5140 u32 i; 5141 int ext_index; 5142 int nr_extent; 5143 int ret; 5144 5145 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 5146 BUG_ON(!new_extent); 5147 5148 ref = btrfs_lookup_leaf_ref(root, leaf->start); 5149 BUG_ON(!ref); 5150 5151 ext_index = -1; 5152 nritems = btrfs_header_nritems(leaf); 5153 for (i = 0; i < nritems; i++) { 5154 btrfs_item_key_to_cpu(leaf, &key, i); 5155 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 5156 continue; 5157 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 5158 if (btrfs_file_extent_type(leaf, fi) == 5159 BTRFS_FILE_EXTENT_INLINE) 5160 continue; 5161 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 5162 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 5163 if (bytenr == 0) 5164 continue; 5165 5166 ext_index++; 5167 if (bytenr >= group->key.objectid + group->key.offset || 5168 bytenr + num_bytes <= group->key.objectid) 5169 continue; 5170 5171 extent_key.objectid = bytenr; 5172 extent_key.offset = num_bytes; 5173 extent_key.type = BTRFS_EXTENT_ITEM_KEY; 5174 nr_extent = 1; 5175 ret = get_new_locations(reloc_inode, &extent_key, 5176 group->key.objectid, 1, 5177 &new_extent, &nr_extent); 5178 if (ret > 0) 5179 continue; 5180 BUG_ON(ret < 0); 5181 5182 BUG_ON(ref->extents[ext_index].bytenr != bytenr); 5183 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); 5184 ref->extents[ext_index].bytenr = new_extent->disk_bytenr; 5185 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; 5186 5187 btrfs_set_file_extent_disk_bytenr(leaf, fi, 5188 new_extent->disk_bytenr); 5189 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 5190 new_extent->disk_num_bytes); 5191 btrfs_mark_buffer_dirty(leaf); 5192 5193 ret = btrfs_inc_extent_ref(trans, root, 5194 new_extent->disk_bytenr, 5195 new_extent->disk_num_bytes, 5196 leaf->start, 5197 root->root_key.objectid, 5198 trans->transid, key.objectid); 5199 BUG_ON(ret); 5200 ret = btrfs_free_extent(trans, root, 5201 bytenr, num_bytes, leaf->start, 5202 btrfs_header_owner(leaf), 5203 btrfs_header_generation(leaf), 5204 key.objectid, 0); 5205 BUG_ON(ret); 5206 cond_resched(); 5207 } 5208 kfree(new_extent); 5209 BUG_ON(ext_index + 1 != ref->nritems); 5210 btrfs_free_leaf_ref(root, ref); 5211 return 0; 5212 } 5213 5214 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 5215 struct btrfs_root *root) 5216 { 5217 struct btrfs_root *reloc_root; 5218 int ret; 5219 5220 if (root->reloc_root) { 5221 reloc_root = root->reloc_root; 5222 root->reloc_root = NULL; 5223 list_add(&reloc_root->dead_list, 5224 &root->fs_info->dead_reloc_roots); 5225 5226 btrfs_set_root_bytenr(&reloc_root->root_item, 5227 reloc_root->node->start); 5228 btrfs_set_root_level(&root->root_item, 5229 btrfs_header_level(reloc_root->node)); 5230 memset(&reloc_root->root_item.drop_progress, 0, 5231 sizeof(struct btrfs_disk_key)); 5232 reloc_root->root_item.drop_level = 0; 5233 5234 ret = btrfs_update_root(trans, root->fs_info->tree_root, 5235 &reloc_root->root_key, 5236 &reloc_root->root_item); 5237 BUG_ON(ret); 5238 } 5239 return 0; 5240 } 5241 5242 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) 5243 { 5244 struct btrfs_trans_handle *trans; 5245 struct btrfs_root *reloc_root; 5246 struct btrfs_root *prev_root = NULL; 5247 struct list_head dead_roots; 5248 int ret; 5249 unsigned long nr; 5250 5251 INIT_LIST_HEAD(&dead_roots); 5252 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); 5253 5254 while (!list_empty(&dead_roots)) { 5255 reloc_root = list_entry(dead_roots.prev, 5256 struct btrfs_root, dead_list); 5257 list_del_init(&reloc_root->dead_list); 5258 5259 BUG_ON(reloc_root->commit_root != NULL); 5260 while (1) { 5261 trans = btrfs_join_transaction(root, 1); 5262 BUG_ON(!trans); 5263 5264 mutex_lock(&root->fs_info->drop_mutex); 5265 ret = btrfs_drop_snapshot(trans, reloc_root); 5266 if (ret != -EAGAIN) 5267 break; 5268 mutex_unlock(&root->fs_info->drop_mutex); 5269 5270 nr = trans->blocks_used; 5271 ret = btrfs_end_transaction(trans, root); 5272 BUG_ON(ret); 5273 btrfs_btree_balance_dirty(root, nr); 5274 } 5275 5276 free_extent_buffer(reloc_root->node); 5277 5278 ret = btrfs_del_root(trans, root->fs_info->tree_root, 5279 &reloc_root->root_key); 5280 BUG_ON(ret); 5281 mutex_unlock(&root->fs_info->drop_mutex); 5282 5283 nr = trans->blocks_used; 5284 ret = btrfs_end_transaction(trans, root); 5285 BUG_ON(ret); 5286 btrfs_btree_balance_dirty(root, nr); 5287 5288 kfree(prev_root); 5289 prev_root = reloc_root; 5290 } 5291 if (prev_root) { 5292 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); 5293 kfree(prev_root); 5294 } 5295 return 0; 5296 } 5297 5298 int btrfs_add_dead_reloc_root(struct btrfs_root *root) 5299 { 5300 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); 5301 return 0; 5302 } 5303 5304 int btrfs_cleanup_reloc_trees(struct btrfs_root *root) 5305 { 5306 struct btrfs_root *reloc_root; 5307 struct btrfs_trans_handle *trans; 5308 struct btrfs_key location; 5309 int found; 5310 int ret; 5311 5312 mutex_lock(&root->fs_info->tree_reloc_mutex); 5313 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); 5314 BUG_ON(ret); 5315 found = !list_empty(&root->fs_info->dead_reloc_roots); 5316 mutex_unlock(&root->fs_info->tree_reloc_mutex); 5317 5318 if (found) { 5319 trans = btrfs_start_transaction(root, 1); 5320 BUG_ON(!trans); 5321 ret = btrfs_commit_transaction(trans, root); 5322 BUG_ON(ret); 5323 } 5324 5325 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 5326 location.offset = (u64)-1; 5327 location.type = BTRFS_ROOT_ITEM_KEY; 5328 5329 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 5330 BUG_ON(!reloc_root); 5331 btrfs_orphan_cleanup(reloc_root); 5332 return 0; 5333 } 5334 5335 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, 5336 struct btrfs_root *root) 5337 { 5338 struct btrfs_root *reloc_root; 5339 struct extent_buffer *eb; 5340 struct btrfs_root_item *root_item; 5341 struct btrfs_key root_key; 5342 int ret; 5343 5344 BUG_ON(!root->ref_cows); 5345 if (root->reloc_root) 5346 return 0; 5347 5348 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 5349 BUG_ON(!root_item); 5350 5351 ret = btrfs_copy_root(trans, root, root->commit_root, 5352 &eb, BTRFS_TREE_RELOC_OBJECTID); 5353 BUG_ON(ret); 5354 5355 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 5356 root_key.offset = root->root_key.objectid; 5357 root_key.type = BTRFS_ROOT_ITEM_KEY; 5358 5359 memcpy(root_item, &root->root_item, sizeof(root_item)); 5360 btrfs_set_root_refs(root_item, 0); 5361 btrfs_set_root_bytenr(root_item, eb->start); 5362 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 5363 btrfs_set_root_generation(root_item, trans->transid); 5364 5365 btrfs_tree_unlock(eb); 5366 free_extent_buffer(eb); 5367 5368 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 5369 &root_key, root_item); 5370 BUG_ON(ret); 5371 kfree(root_item); 5372 5373 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 5374 &root_key); 5375 BUG_ON(!reloc_root); 5376 reloc_root->last_trans = trans->transid; 5377 reloc_root->commit_root = NULL; 5378 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 5379 5380 root->reloc_root = reloc_root; 5381 return 0; 5382 } 5383 5384 /* 5385 * Core function of space balance. 5386 * 5387 * The idea is using reloc trees to relocate tree blocks in reference 5388 * counted roots. There is one reloc tree for each subvol, and all 5389 * reloc trees share same root key objectid. Reloc trees are snapshots 5390 * of the latest committed roots of subvols (root->commit_root). 5391 * 5392 * To relocate a tree block referenced by a subvol, there are two steps. 5393 * COW the block through subvol's reloc tree, then update block pointer 5394 * in the subvol to point to the new block. Since all reloc trees share 5395 * same root key objectid, doing special handing for tree blocks owned 5396 * by them is easy. Once a tree block has been COWed in one reloc tree, 5397 * we can use the resulting new block directly when the same block is 5398 * required to COW again through other reloc trees. By this way, relocated 5399 * tree blocks are shared between reloc trees, so they are also shared 5400 * between subvols. 5401 */ 5402 static noinline int relocate_one_path(struct btrfs_trans_handle *trans, 5403 struct btrfs_root *root, 5404 struct btrfs_path *path, 5405 struct btrfs_key *first_key, 5406 struct btrfs_ref_path *ref_path, 5407 struct btrfs_block_group_cache *group, 5408 struct inode *reloc_inode) 5409 { 5410 struct btrfs_root *reloc_root; 5411 struct extent_buffer *eb = NULL; 5412 struct btrfs_key *keys; 5413 u64 *nodes; 5414 int level; 5415 int shared_level; 5416 int lowest_level = 0; 5417 int ret; 5418 5419 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 5420 lowest_level = ref_path->owner_objectid; 5421 5422 if (!root->ref_cows) { 5423 path->lowest_level = lowest_level; 5424 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); 5425 BUG_ON(ret < 0); 5426 path->lowest_level = 0; 5427 btrfs_release_path(root, path); 5428 return 0; 5429 } 5430 5431 mutex_lock(&root->fs_info->tree_reloc_mutex); 5432 ret = init_reloc_tree(trans, root); 5433 BUG_ON(ret); 5434 reloc_root = root->reloc_root; 5435 5436 shared_level = ref_path->shared_level; 5437 ref_path->shared_level = BTRFS_MAX_LEVEL - 1; 5438 5439 keys = ref_path->node_keys; 5440 nodes = ref_path->new_nodes; 5441 memset(&keys[shared_level + 1], 0, 5442 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); 5443 memset(&nodes[shared_level + 1], 0, 5444 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); 5445 5446 if (nodes[lowest_level] == 0) { 5447 path->lowest_level = lowest_level; 5448 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 5449 0, 1); 5450 BUG_ON(ret); 5451 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { 5452 eb = path->nodes[level]; 5453 if (!eb || eb == reloc_root->node) 5454 break; 5455 nodes[level] = eb->start; 5456 if (level == 0) 5457 btrfs_item_key_to_cpu(eb, &keys[level], 0); 5458 else 5459 btrfs_node_key_to_cpu(eb, &keys[level], 0); 5460 } 5461 if (nodes[0] && 5462 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5463 eb = path->nodes[0]; 5464 ret = replace_extents_in_leaf(trans, reloc_root, eb, 5465 group, reloc_inode); 5466 BUG_ON(ret); 5467 } 5468 btrfs_release_path(reloc_root, path); 5469 } else { 5470 ret = btrfs_merge_path(trans, reloc_root, keys, nodes, 5471 lowest_level); 5472 BUG_ON(ret); 5473 } 5474 5475 /* 5476 * replace tree blocks in the fs tree with tree blocks in 5477 * the reloc tree. 5478 */ 5479 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); 5480 BUG_ON(ret < 0); 5481 5482 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5483 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 5484 0, 0); 5485 BUG_ON(ret); 5486 extent_buffer_get(path->nodes[0]); 5487 eb = path->nodes[0]; 5488 btrfs_release_path(reloc_root, path); 5489 ret = invalidate_extent_cache(reloc_root, eb, group, root); 5490 BUG_ON(ret); 5491 free_extent_buffer(eb); 5492 } 5493 5494 mutex_unlock(&root->fs_info->tree_reloc_mutex); 5495 path->lowest_level = 0; 5496 return 0; 5497 } 5498 5499 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, 5500 struct btrfs_root *root, 5501 struct btrfs_path *path, 5502 struct btrfs_key *first_key, 5503 struct btrfs_ref_path *ref_path) 5504 { 5505 int ret; 5506 5507 ret = relocate_one_path(trans, root, path, first_key, 5508 ref_path, NULL, NULL); 5509 BUG_ON(ret); 5510 5511 if (root == root->fs_info->extent_root) 5512 btrfs_extent_post_op(trans, root); 5513 5514 return 0; 5515 } 5516 5517 static noinline int del_extent_zero(struct btrfs_trans_handle *trans, 5518 struct btrfs_root *extent_root, 5519 struct btrfs_path *path, 5520 struct btrfs_key *extent_key) 5521 { 5522 int ret; 5523 5524 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 5525 if (ret) 5526 goto out; 5527 ret = btrfs_del_item(trans, extent_root, path); 5528 out: 5529 btrfs_release_path(extent_root, path); 5530 return ret; 5531 } 5532 5533 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, 5534 struct btrfs_ref_path *ref_path) 5535 { 5536 struct btrfs_key root_key; 5537 5538 root_key.objectid = ref_path->root_objectid; 5539 root_key.type = BTRFS_ROOT_ITEM_KEY; 5540 if (is_cowonly_root(ref_path->root_objectid)) 5541 root_key.offset = 0; 5542 else 5543 root_key.offset = (u64)-1; 5544 5545 return btrfs_read_fs_root_no_name(fs_info, &root_key); 5546 } 5547 5548 static noinline int relocate_one_extent(struct btrfs_root *extent_root, 5549 struct btrfs_path *path, 5550 struct btrfs_key *extent_key, 5551 struct btrfs_block_group_cache *group, 5552 struct inode *reloc_inode, int pass) 5553 { 5554 struct btrfs_trans_handle *trans; 5555 struct btrfs_root *found_root; 5556 struct btrfs_ref_path *ref_path = NULL; 5557 struct disk_extent *new_extents = NULL; 5558 int nr_extents = 0; 5559 int loops; 5560 int ret; 5561 int level; 5562 struct btrfs_key first_key; 5563 u64 prev_block = 0; 5564 5565 5566 trans = btrfs_start_transaction(extent_root, 1); 5567 BUG_ON(!trans); 5568 5569 if (extent_key->objectid == 0) { 5570 ret = del_extent_zero(trans, extent_root, path, extent_key); 5571 goto out; 5572 } 5573 5574 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); 5575 if (!ref_path) { 5576 ret = -ENOMEM; 5577 goto out; 5578 } 5579 5580 for (loops = 0; ; loops++) { 5581 if (loops == 0) { 5582 ret = btrfs_first_ref_path(trans, extent_root, ref_path, 5583 extent_key->objectid); 5584 } else { 5585 ret = btrfs_next_ref_path(trans, extent_root, ref_path); 5586 } 5587 if (ret < 0) 5588 goto out; 5589 if (ret > 0) 5590 break; 5591 5592 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || 5593 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) 5594 continue; 5595 5596 found_root = read_ref_root(extent_root->fs_info, ref_path); 5597 BUG_ON(!found_root); 5598 /* 5599 * for reference counted tree, only process reference paths 5600 * rooted at the latest committed root. 5601 */ 5602 if (found_root->ref_cows && 5603 ref_path->root_generation != found_root->root_key.offset) 5604 continue; 5605 5606 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5607 if (pass == 0) { 5608 /* 5609 * copy data extents to new locations 5610 */ 5611 u64 group_start = group->key.objectid; 5612 ret = relocate_data_extent(reloc_inode, 5613 extent_key, 5614 group_start); 5615 if (ret < 0) 5616 goto out; 5617 break; 5618 } 5619 level = 0; 5620 } else { 5621 level = ref_path->owner_objectid; 5622 } 5623 5624 if (prev_block != ref_path->nodes[level]) { 5625 struct extent_buffer *eb; 5626 u64 block_start = ref_path->nodes[level]; 5627 u64 block_size = btrfs_level_size(found_root, level); 5628 5629 eb = read_tree_block(found_root, block_start, 5630 block_size, 0); 5631 btrfs_tree_lock(eb); 5632 BUG_ON(level != btrfs_header_level(eb)); 5633 5634 if (level == 0) 5635 btrfs_item_key_to_cpu(eb, &first_key, 0); 5636 else 5637 btrfs_node_key_to_cpu(eb, &first_key, 0); 5638 5639 btrfs_tree_unlock(eb); 5640 free_extent_buffer(eb); 5641 prev_block = block_start; 5642 } 5643 5644 btrfs_record_root_in_trans(found_root); 5645 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5646 /* 5647 * try to update data extent references while 5648 * keeping metadata shared between snapshots. 5649 */ 5650 if (pass == 1) { 5651 ret = relocate_one_path(trans, found_root, 5652 path, &first_key, ref_path, 5653 group, reloc_inode); 5654 if (ret < 0) 5655 goto out; 5656 continue; 5657 } 5658 /* 5659 * use fallback method to process the remaining 5660 * references. 5661 */ 5662 if (!new_extents) { 5663 u64 group_start = group->key.objectid; 5664 new_extents = kmalloc(sizeof(*new_extents), 5665 GFP_NOFS); 5666 nr_extents = 1; 5667 ret = get_new_locations(reloc_inode, 5668 extent_key, 5669 group_start, 1, 5670 &new_extents, 5671 &nr_extents); 5672 if (ret) 5673 goto out; 5674 } 5675 ret = replace_one_extent(trans, found_root, 5676 path, extent_key, 5677 &first_key, ref_path, 5678 new_extents, nr_extents); 5679 } else { 5680 ret = relocate_tree_block(trans, found_root, path, 5681 &first_key, ref_path); 5682 } 5683 if (ret < 0) 5684 goto out; 5685 } 5686 ret = 0; 5687 out: 5688 btrfs_end_transaction(trans, extent_root); 5689 kfree(new_extents); 5690 kfree(ref_path); 5691 return ret; 5692 } 5693 5694 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 5695 { 5696 u64 num_devices; 5697 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 5698 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 5699 5700 num_devices = root->fs_info->fs_devices->rw_devices; 5701 if (num_devices == 1) { 5702 stripped |= BTRFS_BLOCK_GROUP_DUP; 5703 stripped = flags & ~stripped; 5704 5705 /* turn raid0 into single device chunks */ 5706 if (flags & BTRFS_BLOCK_GROUP_RAID0) 5707 return stripped; 5708 5709 /* turn mirroring into duplication */ 5710 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 5711 BTRFS_BLOCK_GROUP_RAID10)) 5712 return stripped | BTRFS_BLOCK_GROUP_DUP; 5713 return flags; 5714 } else { 5715 /* they already had raid on here, just return */ 5716 if (flags & stripped) 5717 return flags; 5718 5719 stripped |= BTRFS_BLOCK_GROUP_DUP; 5720 stripped = flags & ~stripped; 5721 5722 /* switch duplicated blocks with raid1 */ 5723 if (flags & BTRFS_BLOCK_GROUP_DUP) 5724 return stripped | BTRFS_BLOCK_GROUP_RAID1; 5725 5726 /* turn single device chunks into raid0 */ 5727 return stripped | BTRFS_BLOCK_GROUP_RAID0; 5728 } 5729 return flags; 5730 } 5731 5732 static int __alloc_chunk_for_shrink(struct btrfs_root *root, 5733 struct btrfs_block_group_cache *shrink_block_group, 5734 int force) 5735 { 5736 struct btrfs_trans_handle *trans; 5737 u64 new_alloc_flags; 5738 u64 calc; 5739 5740 spin_lock(&shrink_block_group->lock); 5741 if (btrfs_block_group_used(&shrink_block_group->item) > 0) { 5742 spin_unlock(&shrink_block_group->lock); 5743 5744 trans = btrfs_start_transaction(root, 1); 5745 spin_lock(&shrink_block_group->lock); 5746 5747 new_alloc_flags = update_block_group_flags(root, 5748 shrink_block_group->flags); 5749 if (new_alloc_flags != shrink_block_group->flags) { 5750 calc = 5751 btrfs_block_group_used(&shrink_block_group->item); 5752 } else { 5753 calc = shrink_block_group->key.offset; 5754 } 5755 spin_unlock(&shrink_block_group->lock); 5756 5757 do_chunk_alloc(trans, root->fs_info->extent_root, 5758 calc + 2 * 1024 * 1024, new_alloc_flags, force); 5759 5760 btrfs_end_transaction(trans, root); 5761 } else 5762 spin_unlock(&shrink_block_group->lock); 5763 return 0; 5764 } 5765 5766 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 5767 struct btrfs_root *root, 5768 u64 objectid, u64 size) 5769 { 5770 struct btrfs_path *path; 5771 struct btrfs_inode_item *item; 5772 struct extent_buffer *leaf; 5773 int ret; 5774 5775 path = btrfs_alloc_path(); 5776 if (!path) 5777 return -ENOMEM; 5778 5779 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 5780 if (ret) 5781 goto out; 5782 5783 leaf = path->nodes[0]; 5784 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 5785 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 5786 btrfs_set_inode_generation(leaf, item, 1); 5787 btrfs_set_inode_size(leaf, item, size); 5788 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 5789 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 5790 btrfs_mark_buffer_dirty(leaf); 5791 btrfs_release_path(root, path); 5792 out: 5793 btrfs_free_path(path); 5794 return ret; 5795 } 5796 5797 static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 5798 struct btrfs_block_group_cache *group) 5799 { 5800 struct inode *inode = NULL; 5801 struct btrfs_trans_handle *trans; 5802 struct btrfs_root *root; 5803 struct btrfs_key root_key; 5804 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 5805 int err = 0; 5806 5807 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 5808 root_key.type = BTRFS_ROOT_ITEM_KEY; 5809 root_key.offset = (u64)-1; 5810 root = btrfs_read_fs_root_no_name(fs_info, &root_key); 5811 if (IS_ERR(root)) 5812 return ERR_CAST(root); 5813 5814 trans = btrfs_start_transaction(root, 1); 5815 BUG_ON(!trans); 5816 5817 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 5818 if (err) 5819 goto out; 5820 5821 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 5822 BUG_ON(err); 5823 5824 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, 5825 group->key.offset, 0, group->key.offset, 5826 0, 0, 0); 5827 BUG_ON(err); 5828 5829 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); 5830 if (inode->i_state & I_NEW) { 5831 BTRFS_I(inode)->root = root; 5832 BTRFS_I(inode)->location.objectid = objectid; 5833 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 5834 BTRFS_I(inode)->location.offset = 0; 5835 btrfs_read_locked_inode(inode); 5836 unlock_new_inode(inode); 5837 BUG_ON(is_bad_inode(inode)); 5838 } else { 5839 BUG_ON(1); 5840 } 5841 BTRFS_I(inode)->index_cnt = group->key.objectid; 5842 5843 err = btrfs_orphan_add(trans, inode); 5844 out: 5845 btrfs_end_transaction(trans, root); 5846 if (err) { 5847 if (inode) 5848 iput(inode); 5849 inode = ERR_PTR(err); 5850 } 5851 return inode; 5852 } 5853 5854 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 5855 { 5856 5857 struct btrfs_ordered_sum *sums; 5858 struct btrfs_sector_sum *sector_sum; 5859 struct btrfs_ordered_extent *ordered; 5860 struct btrfs_root *root = BTRFS_I(inode)->root; 5861 struct list_head list; 5862 size_t offset; 5863 int ret; 5864 u64 disk_bytenr; 5865 5866 INIT_LIST_HEAD(&list); 5867 5868 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 5869 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 5870 5871 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 5872 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 5873 disk_bytenr + len - 1, &list); 5874 5875 while (!list_empty(&list)) { 5876 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 5877 list_del_init(&sums->list); 5878 5879 sector_sum = sums->sums; 5880 sums->bytenr = ordered->start; 5881 5882 offset = 0; 5883 while (offset < sums->len) { 5884 sector_sum->bytenr += ordered->start - disk_bytenr; 5885 sector_sum++; 5886 offset += root->sectorsize; 5887 } 5888 5889 btrfs_add_ordered_sum(inode, ordered, sums); 5890 } 5891 btrfs_put_ordered_extent(ordered); 5892 return 0; 5893 } 5894 5895 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) 5896 { 5897 struct btrfs_trans_handle *trans; 5898 struct btrfs_path *path; 5899 struct btrfs_fs_info *info = root->fs_info; 5900 struct extent_buffer *leaf; 5901 struct inode *reloc_inode; 5902 struct btrfs_block_group_cache *block_group; 5903 struct btrfs_key key; 5904 u64 skipped; 5905 u64 cur_byte; 5906 u64 total_found; 5907 u32 nritems; 5908 int ret; 5909 int progress; 5910 int pass = 0; 5911 5912 root = root->fs_info->extent_root; 5913 5914 block_group = btrfs_lookup_block_group(info, group_start); 5915 BUG_ON(!block_group); 5916 5917 printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", 5918 (unsigned long long)block_group->key.objectid, 5919 (unsigned long long)block_group->flags); 5920 5921 path = btrfs_alloc_path(); 5922 BUG_ON(!path); 5923 5924 reloc_inode = create_reloc_inode(info, block_group); 5925 BUG_ON(IS_ERR(reloc_inode)); 5926 5927 __alloc_chunk_for_shrink(root, block_group, 1); 5928 set_block_group_readonly(block_group); 5929 5930 btrfs_start_delalloc_inodes(info->tree_root); 5931 btrfs_wait_ordered_extents(info->tree_root, 0); 5932 again: 5933 skipped = 0; 5934 total_found = 0; 5935 progress = 0; 5936 key.objectid = block_group->key.objectid; 5937 key.offset = 0; 5938 key.type = 0; 5939 cur_byte = key.objectid; 5940 5941 trans = btrfs_start_transaction(info->tree_root, 1); 5942 btrfs_commit_transaction(trans, info->tree_root); 5943 5944 mutex_lock(&root->fs_info->cleaner_mutex); 5945 btrfs_clean_old_snapshots(info->tree_root); 5946 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 5947 mutex_unlock(&root->fs_info->cleaner_mutex); 5948 5949 while (1) { 5950 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 5951 if (ret < 0) 5952 goto out; 5953 next: 5954 leaf = path->nodes[0]; 5955 nritems = btrfs_header_nritems(leaf); 5956 if (path->slots[0] >= nritems) { 5957 ret = btrfs_next_leaf(root, path); 5958 if (ret < 0) 5959 goto out; 5960 if (ret == 1) { 5961 ret = 0; 5962 break; 5963 } 5964 leaf = path->nodes[0]; 5965 nritems = btrfs_header_nritems(leaf); 5966 } 5967 5968 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 5969 5970 if (key.objectid >= block_group->key.objectid + 5971 block_group->key.offset) 5972 break; 5973 5974 if (progress && need_resched()) { 5975 btrfs_release_path(root, path); 5976 cond_resched(); 5977 progress = 0; 5978 continue; 5979 } 5980 progress = 1; 5981 5982 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || 5983 key.objectid + key.offset <= cur_byte) { 5984 path->slots[0]++; 5985 goto next; 5986 } 5987 5988 total_found++; 5989 cur_byte = key.objectid + key.offset; 5990 btrfs_release_path(root, path); 5991 5992 __alloc_chunk_for_shrink(root, block_group, 0); 5993 ret = relocate_one_extent(root, path, &key, block_group, 5994 reloc_inode, pass); 5995 BUG_ON(ret < 0); 5996 if (ret > 0) 5997 skipped++; 5998 5999 key.objectid = cur_byte; 6000 key.type = 0; 6001 key.offset = 0; 6002 } 6003 6004 btrfs_release_path(root, path); 6005 6006 if (pass == 0) { 6007 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); 6008 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); 6009 } 6010 6011 if (total_found > 0) { 6012 printk(KERN_INFO "btrfs found %llu extents in pass %d\n", 6013 (unsigned long long)total_found, pass); 6014 pass++; 6015 if (total_found == skipped && pass > 2) { 6016 iput(reloc_inode); 6017 reloc_inode = create_reloc_inode(info, block_group); 6018 pass = 0; 6019 } 6020 goto again; 6021 } 6022 6023 /* delete reloc_inode */ 6024 iput(reloc_inode); 6025 6026 /* unpin extents in this range */ 6027 trans = btrfs_start_transaction(info->tree_root, 1); 6028 btrfs_commit_transaction(trans, info->tree_root); 6029 6030 spin_lock(&block_group->lock); 6031 WARN_ON(block_group->pinned > 0); 6032 WARN_ON(block_group->reserved > 0); 6033 WARN_ON(btrfs_block_group_used(&block_group->item) > 0); 6034 spin_unlock(&block_group->lock); 6035 put_block_group(block_group); 6036 ret = 0; 6037 out: 6038 btrfs_free_path(path); 6039 return ret; 6040 } 6041 6042 static int find_first_block_group(struct btrfs_root *root, 6043 struct btrfs_path *path, struct btrfs_key *key) 6044 { 6045 int ret = 0; 6046 struct btrfs_key found_key; 6047 struct extent_buffer *leaf; 6048 int slot; 6049 6050 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 6051 if (ret < 0) 6052 goto out; 6053 6054 while (1) { 6055 slot = path->slots[0]; 6056 leaf = path->nodes[0]; 6057 if (slot >= btrfs_header_nritems(leaf)) { 6058 ret = btrfs_next_leaf(root, path); 6059 if (ret == 0) 6060 continue; 6061 if (ret < 0) 6062 goto out; 6063 break; 6064 } 6065 btrfs_item_key_to_cpu(leaf, &found_key, slot); 6066 6067 if (found_key.objectid >= key->objectid && 6068 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 6069 ret = 0; 6070 goto out; 6071 } 6072 path->slots[0]++; 6073 } 6074 ret = -ENOENT; 6075 out: 6076 return ret; 6077 } 6078 6079 int btrfs_free_block_groups(struct btrfs_fs_info *info) 6080 { 6081 struct btrfs_block_group_cache *block_group; 6082 struct rb_node *n; 6083 6084 spin_lock(&info->block_group_cache_lock); 6085 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 6086 block_group = rb_entry(n, struct btrfs_block_group_cache, 6087 cache_node); 6088 rb_erase(&block_group->cache_node, 6089 &info->block_group_cache_tree); 6090 spin_unlock(&info->block_group_cache_lock); 6091 6092 btrfs_remove_free_space_cache(block_group); 6093 down_write(&block_group->space_info->groups_sem); 6094 list_del(&block_group->list); 6095 up_write(&block_group->space_info->groups_sem); 6096 6097 WARN_ON(atomic_read(&block_group->count) != 1); 6098 kfree(block_group); 6099 6100 spin_lock(&info->block_group_cache_lock); 6101 } 6102 spin_unlock(&info->block_group_cache_lock); 6103 return 0; 6104 } 6105 6106 int btrfs_read_block_groups(struct btrfs_root *root) 6107 { 6108 struct btrfs_path *path; 6109 int ret; 6110 struct btrfs_block_group_cache *cache; 6111 struct btrfs_fs_info *info = root->fs_info; 6112 struct btrfs_space_info *space_info; 6113 struct btrfs_key key; 6114 struct btrfs_key found_key; 6115 struct extent_buffer *leaf; 6116 6117 root = info->extent_root; 6118 key.objectid = 0; 6119 key.offset = 0; 6120 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 6121 path = btrfs_alloc_path(); 6122 if (!path) 6123 return -ENOMEM; 6124 6125 while (1) { 6126 ret = find_first_block_group(root, path, &key); 6127 if (ret > 0) { 6128 ret = 0; 6129 goto error; 6130 } 6131 if (ret != 0) 6132 goto error; 6133 6134 leaf = path->nodes[0]; 6135 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 6136 cache = kzalloc(sizeof(*cache), GFP_NOFS); 6137 if (!cache) { 6138 ret = -ENOMEM; 6139 break; 6140 } 6141 6142 atomic_set(&cache->count, 1); 6143 spin_lock_init(&cache->lock); 6144 mutex_init(&cache->alloc_mutex); 6145 mutex_init(&cache->cache_mutex); 6146 INIT_LIST_HEAD(&cache->list); 6147 read_extent_buffer(leaf, &cache->item, 6148 btrfs_item_ptr_offset(leaf, path->slots[0]), 6149 sizeof(cache->item)); 6150 memcpy(&cache->key, &found_key, sizeof(found_key)); 6151 6152 key.objectid = found_key.objectid + found_key.offset; 6153 btrfs_release_path(root, path); 6154 cache->flags = btrfs_block_group_flags(&cache->item); 6155 6156 ret = update_space_info(info, cache->flags, found_key.offset, 6157 btrfs_block_group_used(&cache->item), 6158 &space_info); 6159 BUG_ON(ret); 6160 cache->space_info = space_info; 6161 down_write(&space_info->groups_sem); 6162 list_add_tail(&cache->list, &space_info->block_groups); 6163 up_write(&space_info->groups_sem); 6164 6165 ret = btrfs_add_block_group_cache(root->fs_info, cache); 6166 BUG_ON(ret); 6167 6168 set_avail_alloc_bits(root->fs_info, cache->flags); 6169 if (btrfs_chunk_readonly(root, cache->key.objectid)) 6170 set_block_group_readonly(cache); 6171 } 6172 ret = 0; 6173 error: 6174 btrfs_free_path(path); 6175 return ret; 6176 } 6177 6178 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 6179 struct btrfs_root *root, u64 bytes_used, 6180 u64 type, u64 chunk_objectid, u64 chunk_offset, 6181 u64 size) 6182 { 6183 int ret; 6184 struct btrfs_root *extent_root; 6185 struct btrfs_block_group_cache *cache; 6186 6187 extent_root = root->fs_info->extent_root; 6188 6189 root->fs_info->last_trans_new_blockgroup = trans->transid; 6190 6191 cache = kzalloc(sizeof(*cache), GFP_NOFS); 6192 if (!cache) 6193 return -ENOMEM; 6194 6195 cache->key.objectid = chunk_offset; 6196 cache->key.offset = size; 6197 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 6198 atomic_set(&cache->count, 1); 6199 spin_lock_init(&cache->lock); 6200 mutex_init(&cache->alloc_mutex); 6201 mutex_init(&cache->cache_mutex); 6202 INIT_LIST_HEAD(&cache->list); 6203 6204 btrfs_set_block_group_used(&cache->item, bytes_used); 6205 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 6206 cache->flags = type; 6207 btrfs_set_block_group_flags(&cache->item, type); 6208 6209 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 6210 &cache->space_info); 6211 BUG_ON(ret); 6212 down_write(&cache->space_info->groups_sem); 6213 list_add_tail(&cache->list, &cache->space_info->block_groups); 6214 up_write(&cache->space_info->groups_sem); 6215 6216 ret = btrfs_add_block_group_cache(root->fs_info, cache); 6217 BUG_ON(ret); 6218 6219 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 6220 sizeof(cache->item)); 6221 BUG_ON(ret); 6222 6223 finish_current_insert(trans, extent_root, 0); 6224 ret = del_pending_extents(trans, extent_root, 0); 6225 BUG_ON(ret); 6226 set_avail_alloc_bits(extent_root->fs_info, type); 6227 6228 return 0; 6229 } 6230 6231 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 6232 struct btrfs_root *root, u64 group_start) 6233 { 6234 struct btrfs_path *path; 6235 struct btrfs_block_group_cache *block_group; 6236 struct btrfs_key key; 6237 int ret; 6238 6239 root = root->fs_info->extent_root; 6240 6241 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 6242 BUG_ON(!block_group); 6243 BUG_ON(!block_group->ro); 6244 6245 memcpy(&key, &block_group->key, sizeof(key)); 6246 6247 path = btrfs_alloc_path(); 6248 BUG_ON(!path); 6249 6250 spin_lock(&root->fs_info->block_group_cache_lock); 6251 rb_erase(&block_group->cache_node, 6252 &root->fs_info->block_group_cache_tree); 6253 spin_unlock(&root->fs_info->block_group_cache_lock); 6254 btrfs_remove_free_space_cache(block_group); 6255 down_write(&block_group->space_info->groups_sem); 6256 list_del(&block_group->list); 6257 up_write(&block_group->space_info->groups_sem); 6258 6259 spin_lock(&block_group->space_info->lock); 6260 block_group->space_info->total_bytes -= block_group->key.offset; 6261 block_group->space_info->bytes_readonly -= block_group->key.offset; 6262 spin_unlock(&block_group->space_info->lock); 6263 block_group->space_info->full = 0; 6264 6265 put_block_group(block_group); 6266 put_block_group(block_group); 6267 6268 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 6269 if (ret > 0) 6270 ret = -EIO; 6271 if (ret < 0) 6272 goto out; 6273 6274 ret = btrfs_del_item(trans, root, path); 6275 out: 6276 btrfs_free_path(path); 6277 return ret; 6278 } 6279