1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include "compat.h" 24 #include "hash.h" 25 #include "crc32c.h" 26 #include "ctree.h" 27 #include "disk-io.h" 28 #include "print-tree.h" 29 #include "transaction.h" 30 #include "volumes.h" 31 #include "locking.h" 32 #include "ref-cache.h" 33 34 #define PENDING_EXTENT_INSERT 0 35 #define PENDING_EXTENT_DELETE 1 36 #define PENDING_BACKREF_UPDATE 2 37 38 struct pending_extent_op { 39 int type; 40 u64 bytenr; 41 u64 num_bytes; 42 u64 parent; 43 u64 orig_parent; 44 u64 generation; 45 u64 orig_generation; 46 int level; 47 struct list_head list; 48 int del; 49 }; 50 51 static int finish_current_insert(struct btrfs_trans_handle *trans, 52 struct btrfs_root *extent_root, int all); 53 static int del_pending_extents(struct btrfs_trans_handle *trans, 54 struct btrfs_root *extent_root, int all); 55 static int pin_down_bytes(struct btrfs_trans_handle *trans, 56 struct btrfs_root *root, 57 u64 bytenr, u64 num_bytes, int is_data); 58 static int update_block_group(struct btrfs_trans_handle *trans, 59 struct btrfs_root *root, 60 u64 bytenr, u64 num_bytes, int alloc, 61 int mark_free); 62 63 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 64 struct btrfs_root *extent_root, u64 alloc_bytes, 65 u64 flags, int force); 66 67 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 68 { 69 return (cache->flags & bits) == bits; 70 } 71 72 /* 73 * this adds the block group to the fs_info rb tree for the block group 74 * cache 75 */ 76 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 77 struct btrfs_block_group_cache *block_group) 78 { 79 struct rb_node **p; 80 struct rb_node *parent = NULL; 81 struct btrfs_block_group_cache *cache; 82 83 spin_lock(&info->block_group_cache_lock); 84 p = &info->block_group_cache_tree.rb_node; 85 86 while (*p) { 87 parent = *p; 88 cache = rb_entry(parent, struct btrfs_block_group_cache, 89 cache_node); 90 if (block_group->key.objectid < cache->key.objectid) { 91 p = &(*p)->rb_left; 92 } else if (block_group->key.objectid > cache->key.objectid) { 93 p = &(*p)->rb_right; 94 } else { 95 spin_unlock(&info->block_group_cache_lock); 96 return -EEXIST; 97 } 98 } 99 100 rb_link_node(&block_group->cache_node, parent, p); 101 rb_insert_color(&block_group->cache_node, 102 &info->block_group_cache_tree); 103 spin_unlock(&info->block_group_cache_lock); 104 105 return 0; 106 } 107 108 /* 109 * This will return the block group at or after bytenr if contains is 0, else 110 * it will return the block group that contains the bytenr 111 */ 112 static struct btrfs_block_group_cache * 113 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 114 int contains) 115 { 116 struct btrfs_block_group_cache *cache, *ret = NULL; 117 struct rb_node *n; 118 u64 end, start; 119 120 spin_lock(&info->block_group_cache_lock); 121 n = info->block_group_cache_tree.rb_node; 122 123 while (n) { 124 cache = rb_entry(n, struct btrfs_block_group_cache, 125 cache_node); 126 end = cache->key.objectid + cache->key.offset - 1; 127 start = cache->key.objectid; 128 129 if (bytenr < start) { 130 if (!contains && (!ret || start < ret->key.objectid)) 131 ret = cache; 132 n = n->rb_left; 133 } else if (bytenr > start) { 134 if (contains && bytenr <= end) { 135 ret = cache; 136 break; 137 } 138 n = n->rb_right; 139 } else { 140 ret = cache; 141 break; 142 } 143 } 144 if (ret) 145 atomic_inc(&ret->count); 146 spin_unlock(&info->block_group_cache_lock); 147 148 return ret; 149 } 150 151 /* 152 * this is only called by cache_block_group, since we could have freed extents 153 * we need to check the pinned_extents for any extents that can't be used yet 154 * since their free space will be released as soon as the transaction commits. 155 */ 156 static int add_new_free_space(struct btrfs_block_group_cache *block_group, 157 struct btrfs_fs_info *info, u64 start, u64 end) 158 { 159 u64 extent_start, extent_end, size; 160 int ret; 161 162 mutex_lock(&info->pinned_mutex); 163 while (start < end) { 164 ret = find_first_extent_bit(&info->pinned_extents, start, 165 &extent_start, &extent_end, 166 EXTENT_DIRTY); 167 if (ret) 168 break; 169 170 if (extent_start == start) { 171 start = extent_end + 1; 172 } else if (extent_start > start && extent_start < end) { 173 size = extent_start - start; 174 ret = btrfs_add_free_space(block_group, start, 175 size); 176 BUG_ON(ret); 177 start = extent_end + 1; 178 } else { 179 break; 180 } 181 } 182 183 if (start < end) { 184 size = end - start; 185 ret = btrfs_add_free_space(block_group, start, size); 186 BUG_ON(ret); 187 } 188 mutex_unlock(&info->pinned_mutex); 189 190 return 0; 191 } 192 193 static int remove_sb_from_cache(struct btrfs_root *root, 194 struct btrfs_block_group_cache *cache) 195 { 196 u64 bytenr; 197 u64 *logical; 198 int stripe_len; 199 int i, nr, ret; 200 201 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 202 bytenr = btrfs_sb_offset(i); 203 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 204 cache->key.objectid, bytenr, 0, 205 &logical, &nr, &stripe_len); 206 BUG_ON(ret); 207 while (nr--) { 208 btrfs_remove_free_space(cache, logical[nr], 209 stripe_len); 210 } 211 kfree(logical); 212 } 213 return 0; 214 } 215 216 static int cache_block_group(struct btrfs_root *root, 217 struct btrfs_block_group_cache *block_group) 218 { 219 struct btrfs_path *path; 220 int ret = 0; 221 struct btrfs_key key; 222 struct extent_buffer *leaf; 223 int slot; 224 u64 last; 225 226 if (!block_group) 227 return 0; 228 229 root = root->fs_info->extent_root; 230 231 if (block_group->cached) 232 return 0; 233 234 path = btrfs_alloc_path(); 235 if (!path) 236 return -ENOMEM; 237 238 path->reada = 2; 239 /* 240 * we get into deadlocks with paths held by callers of this function. 241 * since the alloc_mutex is protecting things right now, just 242 * skip the locking here 243 */ 244 path->skip_locking = 1; 245 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 246 key.objectid = last; 247 key.offset = 0; 248 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 249 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 250 if (ret < 0) 251 goto err; 252 253 while (1) { 254 leaf = path->nodes[0]; 255 slot = path->slots[0]; 256 if (slot >= btrfs_header_nritems(leaf)) { 257 ret = btrfs_next_leaf(root, path); 258 if (ret < 0) 259 goto err; 260 if (ret == 0) 261 continue; 262 else 263 break; 264 } 265 btrfs_item_key_to_cpu(leaf, &key, slot); 266 if (key.objectid < block_group->key.objectid) 267 goto next; 268 269 if (key.objectid >= block_group->key.objectid + 270 block_group->key.offset) 271 break; 272 273 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) { 274 add_new_free_space(block_group, root->fs_info, last, 275 key.objectid); 276 277 last = key.objectid + key.offset; 278 } 279 next: 280 path->slots[0]++; 281 } 282 283 add_new_free_space(block_group, root->fs_info, last, 284 block_group->key.objectid + 285 block_group->key.offset); 286 287 remove_sb_from_cache(root, block_group); 288 block_group->cached = 1; 289 ret = 0; 290 err: 291 btrfs_free_path(path); 292 return ret; 293 } 294 295 /* 296 * return the block group that starts at or after bytenr 297 */ 298 static struct btrfs_block_group_cache * 299 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 300 { 301 struct btrfs_block_group_cache *cache; 302 303 cache = block_group_cache_tree_search(info, bytenr, 0); 304 305 return cache; 306 } 307 308 /* 309 * return the block group that contains teh given bytenr 310 */ 311 struct btrfs_block_group_cache *btrfs_lookup_block_group( 312 struct btrfs_fs_info *info, 313 u64 bytenr) 314 { 315 struct btrfs_block_group_cache *cache; 316 317 cache = block_group_cache_tree_search(info, bytenr, 1); 318 319 return cache; 320 } 321 322 static inline void put_block_group(struct btrfs_block_group_cache *cache) 323 { 324 if (atomic_dec_and_test(&cache->count)) 325 kfree(cache); 326 } 327 328 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 329 u64 flags) 330 { 331 struct list_head *head = &info->space_info; 332 struct btrfs_space_info *found; 333 list_for_each_entry(found, head, list) { 334 if (found->flags == flags) 335 return found; 336 } 337 return NULL; 338 } 339 340 static u64 div_factor(u64 num, int factor) 341 { 342 if (factor == 10) 343 return num; 344 num *= factor; 345 do_div(num, 10); 346 return num; 347 } 348 349 u64 btrfs_find_block_group(struct btrfs_root *root, 350 u64 search_start, u64 search_hint, int owner) 351 { 352 struct btrfs_block_group_cache *cache; 353 u64 used; 354 u64 last = max(search_hint, search_start); 355 u64 group_start = 0; 356 int full_search = 0; 357 int factor = 9; 358 int wrapped = 0; 359 again: 360 while (1) { 361 cache = btrfs_lookup_first_block_group(root->fs_info, last); 362 if (!cache) 363 break; 364 365 spin_lock(&cache->lock); 366 last = cache->key.objectid + cache->key.offset; 367 used = btrfs_block_group_used(&cache->item); 368 369 if ((full_search || !cache->ro) && 370 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 371 if (used + cache->pinned + cache->reserved < 372 div_factor(cache->key.offset, factor)) { 373 group_start = cache->key.objectid; 374 spin_unlock(&cache->lock); 375 put_block_group(cache); 376 goto found; 377 } 378 } 379 spin_unlock(&cache->lock); 380 put_block_group(cache); 381 cond_resched(); 382 } 383 if (!wrapped) { 384 last = search_start; 385 wrapped = 1; 386 goto again; 387 } 388 if (!full_search && factor < 10) { 389 last = search_start; 390 full_search = 1; 391 factor = 10; 392 goto again; 393 } 394 found: 395 return group_start; 396 } 397 398 /* simple helper to search for an existing extent at a given offset */ 399 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 400 { 401 int ret; 402 struct btrfs_key key; 403 struct btrfs_path *path; 404 405 path = btrfs_alloc_path(); 406 BUG_ON(!path); 407 key.objectid = start; 408 key.offset = len; 409 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 410 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 411 0, 0); 412 btrfs_free_path(path); 413 return ret; 414 } 415 416 /* 417 * Back reference rules. Back refs have three main goals: 418 * 419 * 1) differentiate between all holders of references to an extent so that 420 * when a reference is dropped we can make sure it was a valid reference 421 * before freeing the extent. 422 * 423 * 2) Provide enough information to quickly find the holders of an extent 424 * if we notice a given block is corrupted or bad. 425 * 426 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 427 * maintenance. This is actually the same as #2, but with a slightly 428 * different use case. 429 * 430 * File extents can be referenced by: 431 * 432 * - multiple snapshots, subvolumes, or different generations in one subvol 433 * - different files inside a single subvolume 434 * - different offsets inside a file (bookend extents in file.c) 435 * 436 * The extent ref structure has fields for: 437 * 438 * - Objectid of the subvolume root 439 * - Generation number of the tree holding the reference 440 * - objectid of the file holding the reference 441 * - number of references holding by parent node (alway 1 for tree blocks) 442 * 443 * Btree leaf may hold multiple references to a file extent. In most cases, 444 * these references are from same file and the corresponding offsets inside 445 * the file are close together. 446 * 447 * When a file extent is allocated the fields are filled in: 448 * (root_key.objectid, trans->transid, inode objectid, 1) 449 * 450 * When a leaf is cow'd new references are added for every file extent found 451 * in the leaf. It looks similar to the create case, but trans->transid will 452 * be different when the block is cow'd. 453 * 454 * (root_key.objectid, trans->transid, inode objectid, 455 * number of references in the leaf) 456 * 457 * When a file extent is removed either during snapshot deletion or 458 * file truncation, we find the corresponding back reference and check 459 * the following fields: 460 * 461 * (btrfs_header_owner(leaf), btrfs_header_generation(leaf), 462 * inode objectid) 463 * 464 * Btree extents can be referenced by: 465 * 466 * - Different subvolumes 467 * - Different generations of the same subvolume 468 * 469 * When a tree block is created, back references are inserted: 470 * 471 * (root->root_key.objectid, trans->transid, level, 1) 472 * 473 * When a tree block is cow'd, new back references are added for all the 474 * blocks it points to. If the tree block isn't in reference counted root, 475 * the old back references are removed. These new back references are of 476 * the form (trans->transid will have increased since creation): 477 * 478 * (root->root_key.objectid, trans->transid, level, 1) 479 * 480 * When a backref is in deleting, the following fields are checked: 481 * 482 * if backref was for a tree root: 483 * (btrfs_header_owner(itself), btrfs_header_generation(itself), level) 484 * else 485 * (btrfs_header_owner(parent), btrfs_header_generation(parent), level) 486 * 487 * Back Reference Key composing: 488 * 489 * The key objectid corresponds to the first byte in the extent, the key 490 * type is set to BTRFS_EXTENT_REF_KEY, and the key offset is the first 491 * byte of parent extent. If a extent is tree root, the key offset is set 492 * to the key objectid. 493 */ 494 495 static noinline int lookup_extent_backref(struct btrfs_trans_handle *trans, 496 struct btrfs_root *root, 497 struct btrfs_path *path, 498 u64 bytenr, u64 parent, 499 u64 ref_root, u64 ref_generation, 500 u64 owner_objectid, int del) 501 { 502 struct btrfs_key key; 503 struct btrfs_extent_ref *ref; 504 struct extent_buffer *leaf; 505 u64 ref_objectid; 506 int ret; 507 508 key.objectid = bytenr; 509 key.type = BTRFS_EXTENT_REF_KEY; 510 key.offset = parent; 511 512 ret = btrfs_search_slot(trans, root, &key, path, del ? -1 : 0, 1); 513 if (ret < 0) 514 goto out; 515 if (ret > 0) { 516 ret = -ENOENT; 517 goto out; 518 } 519 520 leaf = path->nodes[0]; 521 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 522 ref_objectid = btrfs_ref_objectid(leaf, ref); 523 if (btrfs_ref_root(leaf, ref) != ref_root || 524 btrfs_ref_generation(leaf, ref) != ref_generation || 525 (ref_objectid != owner_objectid && 526 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { 527 ret = -EIO; 528 WARN_ON(1); 529 goto out; 530 } 531 ret = 0; 532 out: 533 return ret; 534 } 535 536 /* 537 * updates all the backrefs that are pending on update_list for the 538 * extent_root 539 */ 540 static noinline int update_backrefs(struct btrfs_trans_handle *trans, 541 struct btrfs_root *extent_root, 542 struct btrfs_path *path, 543 struct list_head *update_list) 544 { 545 struct btrfs_key key; 546 struct btrfs_extent_ref *ref; 547 struct btrfs_fs_info *info = extent_root->fs_info; 548 struct pending_extent_op *op; 549 struct extent_buffer *leaf; 550 int ret = 0; 551 struct list_head *cur = update_list->next; 552 u64 ref_objectid; 553 u64 ref_root = extent_root->root_key.objectid; 554 555 op = list_entry(cur, struct pending_extent_op, list); 556 557 search: 558 key.objectid = op->bytenr; 559 key.type = BTRFS_EXTENT_REF_KEY; 560 key.offset = op->orig_parent; 561 562 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1); 563 BUG_ON(ret); 564 565 leaf = path->nodes[0]; 566 567 loop: 568 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 569 570 ref_objectid = btrfs_ref_objectid(leaf, ref); 571 572 if (btrfs_ref_root(leaf, ref) != ref_root || 573 btrfs_ref_generation(leaf, ref) != op->orig_generation || 574 (ref_objectid != op->level && 575 ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { 576 printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, " 577 "root %llu, owner %u\n", 578 (unsigned long long)op->bytenr, 579 (unsigned long long)op->orig_parent, 580 (unsigned long long)ref_root, op->level); 581 btrfs_print_leaf(extent_root, leaf); 582 BUG(); 583 } 584 585 key.objectid = op->bytenr; 586 key.offset = op->parent; 587 key.type = BTRFS_EXTENT_REF_KEY; 588 ret = btrfs_set_item_key_safe(trans, extent_root, path, &key); 589 BUG_ON(ret); 590 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 591 btrfs_set_ref_generation(leaf, ref, op->generation); 592 593 cur = cur->next; 594 595 list_del_init(&op->list); 596 unlock_extent(&info->extent_ins, op->bytenr, 597 op->bytenr + op->num_bytes - 1, GFP_NOFS); 598 kfree(op); 599 600 if (cur == update_list) { 601 btrfs_mark_buffer_dirty(path->nodes[0]); 602 btrfs_release_path(extent_root, path); 603 goto out; 604 } 605 606 op = list_entry(cur, struct pending_extent_op, list); 607 608 path->slots[0]++; 609 while (path->slots[0] < btrfs_header_nritems(leaf)) { 610 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 611 if (key.objectid == op->bytenr && 612 key.type == BTRFS_EXTENT_REF_KEY) 613 goto loop; 614 path->slots[0]++; 615 } 616 617 btrfs_mark_buffer_dirty(path->nodes[0]); 618 btrfs_release_path(extent_root, path); 619 goto search; 620 621 out: 622 return 0; 623 } 624 625 static noinline int insert_extents(struct btrfs_trans_handle *trans, 626 struct btrfs_root *extent_root, 627 struct btrfs_path *path, 628 struct list_head *insert_list, int nr) 629 { 630 struct btrfs_key *keys; 631 u32 *data_size; 632 struct pending_extent_op *op; 633 struct extent_buffer *leaf; 634 struct list_head *cur = insert_list->next; 635 struct btrfs_fs_info *info = extent_root->fs_info; 636 u64 ref_root = extent_root->root_key.objectid; 637 int i = 0, last = 0, ret; 638 int total = nr * 2; 639 640 if (!nr) 641 return 0; 642 643 keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS); 644 if (!keys) 645 return -ENOMEM; 646 647 data_size = kzalloc(total * sizeof(u32), GFP_NOFS); 648 if (!data_size) { 649 kfree(keys); 650 return -ENOMEM; 651 } 652 653 list_for_each_entry(op, insert_list, list) { 654 keys[i].objectid = op->bytenr; 655 keys[i].offset = op->num_bytes; 656 keys[i].type = BTRFS_EXTENT_ITEM_KEY; 657 data_size[i] = sizeof(struct btrfs_extent_item); 658 i++; 659 660 keys[i].objectid = op->bytenr; 661 keys[i].offset = op->parent; 662 keys[i].type = BTRFS_EXTENT_REF_KEY; 663 data_size[i] = sizeof(struct btrfs_extent_ref); 664 i++; 665 } 666 667 op = list_entry(cur, struct pending_extent_op, list); 668 i = 0; 669 while (i < total) { 670 int c; 671 ret = btrfs_insert_some_items(trans, extent_root, path, 672 keys+i, data_size+i, total-i); 673 BUG_ON(ret < 0); 674 675 if (last && ret > 1) 676 BUG(); 677 678 leaf = path->nodes[0]; 679 for (c = 0; c < ret; c++) { 680 int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY; 681 682 /* 683 * if the first item we inserted was a backref, then 684 * the EXTENT_ITEM will be the odd c's, else it will 685 * be the even c's 686 */ 687 if ((ref_first && (c % 2)) || 688 (!ref_first && !(c % 2))) { 689 struct btrfs_extent_item *itm; 690 691 itm = btrfs_item_ptr(leaf, path->slots[0] + c, 692 struct btrfs_extent_item); 693 btrfs_set_extent_refs(path->nodes[0], itm, 1); 694 op->del++; 695 } else { 696 struct btrfs_extent_ref *ref; 697 698 ref = btrfs_item_ptr(leaf, path->slots[0] + c, 699 struct btrfs_extent_ref); 700 btrfs_set_ref_root(leaf, ref, ref_root); 701 btrfs_set_ref_generation(leaf, ref, 702 op->generation); 703 btrfs_set_ref_objectid(leaf, ref, op->level); 704 btrfs_set_ref_num_refs(leaf, ref, 1); 705 op->del++; 706 } 707 708 /* 709 * using del to see when its ok to free up the 710 * pending_extent_op. In the case where we insert the 711 * last item on the list in order to help do batching 712 * we need to not free the extent op until we actually 713 * insert the extent_item 714 */ 715 if (op->del == 2) { 716 unlock_extent(&info->extent_ins, op->bytenr, 717 op->bytenr + op->num_bytes - 1, 718 GFP_NOFS); 719 cur = cur->next; 720 list_del_init(&op->list); 721 kfree(op); 722 if (cur != insert_list) 723 op = list_entry(cur, 724 struct pending_extent_op, 725 list); 726 } 727 } 728 btrfs_mark_buffer_dirty(leaf); 729 btrfs_release_path(extent_root, path); 730 731 /* 732 * Ok backref's and items usually go right next to eachother, 733 * but if we could only insert 1 item that means that we 734 * inserted on the end of a leaf, and we have no idea what may 735 * be on the next leaf so we just play it safe. In order to 736 * try and help this case we insert the last thing on our 737 * insert list so hopefully it will end up being the last 738 * thing on the leaf and everything else will be before it, 739 * which will let us insert a whole bunch of items at the same 740 * time. 741 */ 742 if (ret == 1 && !last && (i + ret < total)) { 743 /* 744 * last: where we will pick up the next time around 745 * i: our current key to insert, will be total - 1 746 * cur: the current op we are screwing with 747 * op: duh 748 */ 749 last = i + ret; 750 i = total - 1; 751 cur = insert_list->prev; 752 op = list_entry(cur, struct pending_extent_op, list); 753 } else if (last) { 754 /* 755 * ok we successfully inserted the last item on the 756 * list, lets reset everything 757 * 758 * i: our current key to insert, so where we left off 759 * last time 760 * last: done with this 761 * cur: the op we are messing with 762 * op: duh 763 * total: since we inserted the last key, we need to 764 * decrement total so we dont overflow 765 */ 766 i = last; 767 last = 0; 768 total--; 769 if (i < total) { 770 cur = insert_list->next; 771 op = list_entry(cur, struct pending_extent_op, 772 list); 773 } 774 } else { 775 i += ret; 776 } 777 778 cond_resched(); 779 } 780 ret = 0; 781 kfree(keys); 782 kfree(data_size); 783 return ret; 784 } 785 786 static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, 787 struct btrfs_root *root, 788 struct btrfs_path *path, 789 u64 bytenr, u64 parent, 790 u64 ref_root, u64 ref_generation, 791 u64 owner_objectid) 792 { 793 struct btrfs_key key; 794 struct extent_buffer *leaf; 795 struct btrfs_extent_ref *ref; 796 u32 num_refs; 797 int ret; 798 799 key.objectid = bytenr; 800 key.type = BTRFS_EXTENT_REF_KEY; 801 key.offset = parent; 802 803 ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*ref)); 804 if (ret == 0) { 805 leaf = path->nodes[0]; 806 ref = btrfs_item_ptr(leaf, path->slots[0], 807 struct btrfs_extent_ref); 808 btrfs_set_ref_root(leaf, ref, ref_root); 809 btrfs_set_ref_generation(leaf, ref, ref_generation); 810 btrfs_set_ref_objectid(leaf, ref, owner_objectid); 811 btrfs_set_ref_num_refs(leaf, ref, 1); 812 } else if (ret == -EEXIST) { 813 u64 existing_owner; 814 BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); 815 leaf = path->nodes[0]; 816 ref = btrfs_item_ptr(leaf, path->slots[0], 817 struct btrfs_extent_ref); 818 if (btrfs_ref_root(leaf, ref) != ref_root || 819 btrfs_ref_generation(leaf, ref) != ref_generation) { 820 ret = -EIO; 821 WARN_ON(1); 822 goto out; 823 } 824 825 num_refs = btrfs_ref_num_refs(leaf, ref); 826 BUG_ON(num_refs == 0); 827 btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); 828 829 existing_owner = btrfs_ref_objectid(leaf, ref); 830 if (existing_owner != owner_objectid && 831 existing_owner != BTRFS_MULTIPLE_OBJECTIDS) { 832 btrfs_set_ref_objectid(leaf, ref, 833 BTRFS_MULTIPLE_OBJECTIDS); 834 } 835 ret = 0; 836 } else { 837 goto out; 838 } 839 btrfs_mark_buffer_dirty(path->nodes[0]); 840 out: 841 btrfs_release_path(root, path); 842 return ret; 843 } 844 845 static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, 846 struct btrfs_root *root, 847 struct btrfs_path *path) 848 { 849 struct extent_buffer *leaf; 850 struct btrfs_extent_ref *ref; 851 u32 num_refs; 852 int ret = 0; 853 854 leaf = path->nodes[0]; 855 ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); 856 num_refs = btrfs_ref_num_refs(leaf, ref); 857 BUG_ON(num_refs == 0); 858 num_refs -= 1; 859 if (num_refs == 0) { 860 ret = btrfs_del_item(trans, root, path); 861 } else { 862 btrfs_set_ref_num_refs(leaf, ref, num_refs); 863 btrfs_mark_buffer_dirty(leaf); 864 } 865 btrfs_release_path(root, path); 866 return ret; 867 } 868 869 #ifdef BIO_RW_DISCARD 870 static void btrfs_issue_discard(struct block_device *bdev, 871 u64 start, u64 len) 872 { 873 blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_KERNEL); 874 } 875 #endif 876 877 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 878 u64 num_bytes) 879 { 880 #ifdef BIO_RW_DISCARD 881 int ret; 882 u64 map_length = num_bytes; 883 struct btrfs_multi_bio *multi = NULL; 884 885 /* Tell the block device(s) that the sectors can be discarded */ 886 ret = btrfs_map_block(&root->fs_info->mapping_tree, READ, 887 bytenr, &map_length, &multi, 0); 888 if (!ret) { 889 struct btrfs_bio_stripe *stripe = multi->stripes; 890 int i; 891 892 if (map_length > num_bytes) 893 map_length = num_bytes; 894 895 for (i = 0; i < multi->num_stripes; i++, stripe++) { 896 btrfs_issue_discard(stripe->dev->bdev, 897 stripe->physical, 898 map_length); 899 } 900 kfree(multi); 901 } 902 903 return ret; 904 #else 905 return 0; 906 #endif 907 } 908 909 static noinline int free_extents(struct btrfs_trans_handle *trans, 910 struct btrfs_root *extent_root, 911 struct list_head *del_list) 912 { 913 struct btrfs_fs_info *info = extent_root->fs_info; 914 struct btrfs_path *path; 915 struct btrfs_key key, found_key; 916 struct extent_buffer *leaf; 917 struct list_head *cur; 918 struct pending_extent_op *op; 919 struct btrfs_extent_item *ei; 920 int ret, num_to_del, extent_slot = 0, found_extent = 0; 921 u32 refs; 922 u64 bytes_freed = 0; 923 924 path = btrfs_alloc_path(); 925 if (!path) 926 return -ENOMEM; 927 path->reada = 1; 928 929 search: 930 /* search for the backref for the current ref we want to delete */ 931 cur = del_list->next; 932 op = list_entry(cur, struct pending_extent_op, list); 933 ret = lookup_extent_backref(trans, extent_root, path, op->bytenr, 934 op->orig_parent, 935 extent_root->root_key.objectid, 936 op->orig_generation, op->level, 1); 937 if (ret) { 938 printk(KERN_ERR "btrfs unable to find backref byte nr %llu " 939 "root %llu gen %llu owner %u\n", 940 (unsigned long long)op->bytenr, 941 (unsigned long long)extent_root->root_key.objectid, 942 (unsigned long long)op->orig_generation, op->level); 943 btrfs_print_leaf(extent_root, path->nodes[0]); 944 WARN_ON(1); 945 goto out; 946 } 947 948 extent_slot = path->slots[0]; 949 num_to_del = 1; 950 found_extent = 0; 951 952 /* 953 * if we aren't the first item on the leaf we can move back one and see 954 * if our ref is right next to our extent item 955 */ 956 if (likely(extent_slot)) { 957 extent_slot--; 958 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 959 extent_slot); 960 if (found_key.objectid == op->bytenr && 961 found_key.type == BTRFS_EXTENT_ITEM_KEY && 962 found_key.offset == op->num_bytes) { 963 num_to_del++; 964 found_extent = 1; 965 } 966 } 967 968 /* 969 * if we didn't find the extent we need to delete the backref and then 970 * search for the extent item key so we can update its ref count 971 */ 972 if (!found_extent) { 973 key.objectid = op->bytenr; 974 key.type = BTRFS_EXTENT_ITEM_KEY; 975 key.offset = op->num_bytes; 976 977 ret = remove_extent_backref(trans, extent_root, path); 978 BUG_ON(ret); 979 btrfs_release_path(extent_root, path); 980 ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); 981 BUG_ON(ret); 982 extent_slot = path->slots[0]; 983 } 984 985 /* this is where we update the ref count for the extent */ 986 leaf = path->nodes[0]; 987 ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); 988 refs = btrfs_extent_refs(leaf, ei); 989 BUG_ON(refs == 0); 990 refs--; 991 btrfs_set_extent_refs(leaf, ei, refs); 992 993 btrfs_mark_buffer_dirty(leaf); 994 995 /* 996 * This extent needs deleting. The reason cur_slot is extent_slot + 997 * num_to_del is because extent_slot points to the slot where the extent 998 * is, and if the backref was not right next to the extent we will be 999 * deleting at least 1 item, and will want to start searching at the 1000 * slot directly next to extent_slot. However if we did find the 1001 * backref next to the extent item them we will be deleting at least 2 1002 * items and will want to start searching directly after the ref slot 1003 */ 1004 if (!refs) { 1005 struct list_head *pos, *n, *end; 1006 int cur_slot = extent_slot+num_to_del; 1007 u64 super_used; 1008 u64 root_used; 1009 1010 path->slots[0] = extent_slot; 1011 bytes_freed = op->num_bytes; 1012 1013 mutex_lock(&info->pinned_mutex); 1014 ret = pin_down_bytes(trans, extent_root, op->bytenr, 1015 op->num_bytes, op->level >= 1016 BTRFS_FIRST_FREE_OBJECTID); 1017 mutex_unlock(&info->pinned_mutex); 1018 BUG_ON(ret < 0); 1019 op->del = ret; 1020 1021 /* 1022 * we need to see if we can delete multiple things at once, so 1023 * start looping through the list of extents we are wanting to 1024 * delete and see if their extent/backref's are right next to 1025 * eachother and the extents only have 1 ref 1026 */ 1027 for (pos = cur->next; pos != del_list; pos = pos->next) { 1028 struct pending_extent_op *tmp; 1029 1030 tmp = list_entry(pos, struct pending_extent_op, list); 1031 1032 /* we only want to delete extent+ref at this stage */ 1033 if (cur_slot >= btrfs_header_nritems(leaf) - 1) 1034 break; 1035 1036 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot); 1037 if (found_key.objectid != tmp->bytenr || 1038 found_key.type != BTRFS_EXTENT_ITEM_KEY || 1039 found_key.offset != tmp->num_bytes) 1040 break; 1041 1042 /* check to make sure this extent only has one ref */ 1043 ei = btrfs_item_ptr(leaf, cur_slot, 1044 struct btrfs_extent_item); 1045 if (btrfs_extent_refs(leaf, ei) != 1) 1046 break; 1047 1048 btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1); 1049 if (found_key.objectid != tmp->bytenr || 1050 found_key.type != BTRFS_EXTENT_REF_KEY || 1051 found_key.offset != tmp->orig_parent) 1052 break; 1053 1054 /* 1055 * the ref is right next to the extent, we can set the 1056 * ref count to 0 since we will delete them both now 1057 */ 1058 btrfs_set_extent_refs(leaf, ei, 0); 1059 1060 /* pin down the bytes for this extent */ 1061 mutex_lock(&info->pinned_mutex); 1062 ret = pin_down_bytes(trans, extent_root, tmp->bytenr, 1063 tmp->num_bytes, tmp->level >= 1064 BTRFS_FIRST_FREE_OBJECTID); 1065 mutex_unlock(&info->pinned_mutex); 1066 BUG_ON(ret < 0); 1067 1068 /* 1069 * use the del field to tell if we need to go ahead and 1070 * free up the extent when we delete the item or not. 1071 */ 1072 tmp->del = ret; 1073 bytes_freed += tmp->num_bytes; 1074 1075 num_to_del += 2; 1076 cur_slot += 2; 1077 } 1078 end = pos; 1079 1080 /* update the free space counters */ 1081 spin_lock(&info->delalloc_lock); 1082 super_used = btrfs_super_bytes_used(&info->super_copy); 1083 btrfs_set_super_bytes_used(&info->super_copy, 1084 super_used - bytes_freed); 1085 1086 root_used = btrfs_root_used(&extent_root->root_item); 1087 btrfs_set_root_used(&extent_root->root_item, 1088 root_used - bytes_freed); 1089 spin_unlock(&info->delalloc_lock); 1090 1091 /* delete the items */ 1092 ret = btrfs_del_items(trans, extent_root, path, 1093 path->slots[0], num_to_del); 1094 BUG_ON(ret); 1095 1096 /* 1097 * loop through the extents we deleted and do the cleanup work 1098 * on them 1099 */ 1100 for (pos = cur, n = pos->next; pos != end; 1101 pos = n, n = pos->next) { 1102 struct pending_extent_op *tmp; 1103 tmp = list_entry(pos, struct pending_extent_op, list); 1104 1105 /* 1106 * remember tmp->del tells us wether or not we pinned 1107 * down the extent 1108 */ 1109 ret = update_block_group(trans, extent_root, 1110 tmp->bytenr, tmp->num_bytes, 0, 1111 tmp->del); 1112 BUG_ON(ret); 1113 1114 list_del_init(&tmp->list); 1115 unlock_extent(&info->extent_ins, tmp->bytenr, 1116 tmp->bytenr + tmp->num_bytes - 1, 1117 GFP_NOFS); 1118 kfree(tmp); 1119 } 1120 } else if (refs && found_extent) { 1121 /* 1122 * the ref and extent were right next to eachother, but the 1123 * extent still has a ref, so just free the backref and keep 1124 * going 1125 */ 1126 ret = remove_extent_backref(trans, extent_root, path); 1127 BUG_ON(ret); 1128 1129 list_del_init(&op->list); 1130 unlock_extent(&info->extent_ins, op->bytenr, 1131 op->bytenr + op->num_bytes - 1, GFP_NOFS); 1132 kfree(op); 1133 } else { 1134 /* 1135 * the extent has multiple refs and the backref we were looking 1136 * for was not right next to it, so just unlock and go next, 1137 * we're good to go 1138 */ 1139 list_del_init(&op->list); 1140 unlock_extent(&info->extent_ins, op->bytenr, 1141 op->bytenr + op->num_bytes - 1, GFP_NOFS); 1142 kfree(op); 1143 } 1144 1145 btrfs_release_path(extent_root, path); 1146 if (!list_empty(del_list)) 1147 goto search; 1148 1149 out: 1150 btrfs_free_path(path); 1151 return ret; 1152 } 1153 1154 static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1155 struct btrfs_root *root, u64 bytenr, 1156 u64 orig_parent, u64 parent, 1157 u64 orig_root, u64 ref_root, 1158 u64 orig_generation, u64 ref_generation, 1159 u64 owner_objectid) 1160 { 1161 int ret; 1162 struct btrfs_root *extent_root = root->fs_info->extent_root; 1163 struct btrfs_path *path; 1164 1165 if (root == root->fs_info->extent_root) { 1166 struct pending_extent_op *extent_op; 1167 u64 num_bytes; 1168 1169 BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); 1170 num_bytes = btrfs_level_size(root, (int)owner_objectid); 1171 mutex_lock(&root->fs_info->extent_ins_mutex); 1172 if (test_range_bit(&root->fs_info->extent_ins, bytenr, 1173 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { 1174 u64 priv; 1175 ret = get_state_private(&root->fs_info->extent_ins, 1176 bytenr, &priv); 1177 BUG_ON(ret); 1178 extent_op = (struct pending_extent_op *) 1179 (unsigned long)priv; 1180 BUG_ON(extent_op->parent != orig_parent); 1181 BUG_ON(extent_op->generation != orig_generation); 1182 1183 extent_op->parent = parent; 1184 extent_op->generation = ref_generation; 1185 } else { 1186 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 1187 BUG_ON(!extent_op); 1188 1189 extent_op->type = PENDING_BACKREF_UPDATE; 1190 extent_op->bytenr = bytenr; 1191 extent_op->num_bytes = num_bytes; 1192 extent_op->parent = parent; 1193 extent_op->orig_parent = orig_parent; 1194 extent_op->generation = ref_generation; 1195 extent_op->orig_generation = orig_generation; 1196 extent_op->level = (int)owner_objectid; 1197 INIT_LIST_HEAD(&extent_op->list); 1198 extent_op->del = 0; 1199 1200 set_extent_bits(&root->fs_info->extent_ins, 1201 bytenr, bytenr + num_bytes - 1, 1202 EXTENT_WRITEBACK, GFP_NOFS); 1203 set_state_private(&root->fs_info->extent_ins, 1204 bytenr, (unsigned long)extent_op); 1205 } 1206 mutex_unlock(&root->fs_info->extent_ins_mutex); 1207 return 0; 1208 } 1209 1210 path = btrfs_alloc_path(); 1211 if (!path) 1212 return -ENOMEM; 1213 ret = lookup_extent_backref(trans, extent_root, path, 1214 bytenr, orig_parent, orig_root, 1215 orig_generation, owner_objectid, 1); 1216 if (ret) 1217 goto out; 1218 ret = remove_extent_backref(trans, extent_root, path); 1219 if (ret) 1220 goto out; 1221 ret = insert_extent_backref(trans, extent_root, path, bytenr, 1222 parent, ref_root, ref_generation, 1223 owner_objectid); 1224 BUG_ON(ret); 1225 finish_current_insert(trans, extent_root, 0); 1226 del_pending_extents(trans, extent_root, 0); 1227 out: 1228 btrfs_free_path(path); 1229 return ret; 1230 } 1231 1232 int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1233 struct btrfs_root *root, u64 bytenr, 1234 u64 orig_parent, u64 parent, 1235 u64 ref_root, u64 ref_generation, 1236 u64 owner_objectid) 1237 { 1238 int ret; 1239 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 1240 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 1241 return 0; 1242 ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, 1243 parent, ref_root, ref_root, 1244 ref_generation, ref_generation, 1245 owner_objectid); 1246 return ret; 1247 } 1248 1249 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1250 struct btrfs_root *root, u64 bytenr, 1251 u64 orig_parent, u64 parent, 1252 u64 orig_root, u64 ref_root, 1253 u64 orig_generation, u64 ref_generation, 1254 u64 owner_objectid) 1255 { 1256 struct btrfs_path *path; 1257 int ret; 1258 struct btrfs_key key; 1259 struct extent_buffer *l; 1260 struct btrfs_extent_item *item; 1261 u32 refs; 1262 1263 path = btrfs_alloc_path(); 1264 if (!path) 1265 return -ENOMEM; 1266 1267 path->reada = 1; 1268 key.objectid = bytenr; 1269 key.type = BTRFS_EXTENT_ITEM_KEY; 1270 key.offset = (u64)-1; 1271 1272 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 1273 0, 1); 1274 if (ret < 0) 1275 return ret; 1276 BUG_ON(ret == 0 || path->slots[0] == 0); 1277 1278 path->slots[0]--; 1279 l = path->nodes[0]; 1280 1281 btrfs_item_key_to_cpu(l, &key, path->slots[0]); 1282 if (key.objectid != bytenr) { 1283 btrfs_print_leaf(root->fs_info->extent_root, path->nodes[0]); 1284 printk(KERN_ERR "btrfs wanted %llu found %llu\n", 1285 (unsigned long long)bytenr, 1286 (unsigned long long)key.objectid); 1287 BUG(); 1288 } 1289 BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); 1290 1291 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 1292 refs = btrfs_extent_refs(l, item); 1293 btrfs_set_extent_refs(l, item, refs + 1); 1294 btrfs_mark_buffer_dirty(path->nodes[0]); 1295 1296 btrfs_release_path(root->fs_info->extent_root, path); 1297 1298 path->reada = 1; 1299 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1300 path, bytenr, parent, 1301 ref_root, ref_generation, 1302 owner_objectid); 1303 BUG_ON(ret); 1304 finish_current_insert(trans, root->fs_info->extent_root, 0); 1305 del_pending_extents(trans, root->fs_info->extent_root, 0); 1306 1307 btrfs_free_path(path); 1308 return 0; 1309 } 1310 1311 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1312 struct btrfs_root *root, 1313 u64 bytenr, u64 num_bytes, u64 parent, 1314 u64 ref_root, u64 ref_generation, 1315 u64 owner_objectid) 1316 { 1317 int ret; 1318 if (ref_root == BTRFS_TREE_LOG_OBJECTID && 1319 owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 1320 return 0; 1321 ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, 1322 0, ref_root, 0, ref_generation, 1323 owner_objectid); 1324 return ret; 1325 } 1326 1327 int btrfs_extent_post_op(struct btrfs_trans_handle *trans, 1328 struct btrfs_root *root) 1329 { 1330 u64 start; 1331 u64 end; 1332 int ret; 1333 1334 while(1) { 1335 finish_current_insert(trans, root->fs_info->extent_root, 1); 1336 del_pending_extents(trans, root->fs_info->extent_root, 1); 1337 1338 /* is there more work to do? */ 1339 ret = find_first_extent_bit(&root->fs_info->pending_del, 1340 0, &start, &end, EXTENT_WRITEBACK); 1341 if (!ret) 1342 continue; 1343 ret = find_first_extent_bit(&root->fs_info->extent_ins, 1344 0, &start, &end, EXTENT_WRITEBACK); 1345 if (!ret) 1346 continue; 1347 break; 1348 } 1349 return 0; 1350 } 1351 1352 int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, 1353 struct btrfs_root *root, u64 bytenr, 1354 u64 num_bytes, u32 *refs) 1355 { 1356 struct btrfs_path *path; 1357 int ret; 1358 struct btrfs_key key; 1359 struct extent_buffer *l; 1360 struct btrfs_extent_item *item; 1361 1362 WARN_ON(num_bytes < root->sectorsize); 1363 path = btrfs_alloc_path(); 1364 path->reada = 1; 1365 key.objectid = bytenr; 1366 key.offset = num_bytes; 1367 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 1368 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, 1369 0, 0); 1370 if (ret < 0) 1371 goto out; 1372 if (ret != 0) { 1373 btrfs_print_leaf(root, path->nodes[0]); 1374 printk(KERN_INFO "btrfs failed to find block number %llu\n", 1375 (unsigned long long)bytenr); 1376 BUG(); 1377 } 1378 l = path->nodes[0]; 1379 item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); 1380 *refs = btrfs_extent_refs(l, item); 1381 out: 1382 btrfs_free_path(path); 1383 return 0; 1384 } 1385 1386 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1387 struct btrfs_root *root, u64 objectid, u64 bytenr) 1388 { 1389 struct btrfs_root *extent_root = root->fs_info->extent_root; 1390 struct btrfs_path *path; 1391 struct extent_buffer *leaf; 1392 struct btrfs_extent_ref *ref_item; 1393 struct btrfs_key key; 1394 struct btrfs_key found_key; 1395 u64 ref_root; 1396 u64 last_snapshot; 1397 u32 nritems; 1398 int ret; 1399 1400 key.objectid = bytenr; 1401 key.offset = (u64)-1; 1402 key.type = BTRFS_EXTENT_ITEM_KEY; 1403 1404 path = btrfs_alloc_path(); 1405 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 1406 if (ret < 0) 1407 goto out; 1408 BUG_ON(ret == 0); 1409 1410 ret = -ENOENT; 1411 if (path->slots[0] == 0) 1412 goto out; 1413 1414 path->slots[0]--; 1415 leaf = path->nodes[0]; 1416 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1417 1418 if (found_key.objectid != bytenr || 1419 found_key.type != BTRFS_EXTENT_ITEM_KEY) 1420 goto out; 1421 1422 last_snapshot = btrfs_root_last_snapshot(&root->root_item); 1423 while (1) { 1424 leaf = path->nodes[0]; 1425 nritems = btrfs_header_nritems(leaf); 1426 if (path->slots[0] >= nritems) { 1427 ret = btrfs_next_leaf(extent_root, path); 1428 if (ret < 0) 1429 goto out; 1430 if (ret == 0) 1431 continue; 1432 break; 1433 } 1434 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 1435 if (found_key.objectid != bytenr) 1436 break; 1437 1438 if (found_key.type != BTRFS_EXTENT_REF_KEY) { 1439 path->slots[0]++; 1440 continue; 1441 } 1442 1443 ref_item = btrfs_item_ptr(leaf, path->slots[0], 1444 struct btrfs_extent_ref); 1445 ref_root = btrfs_ref_root(leaf, ref_item); 1446 if ((ref_root != root->root_key.objectid && 1447 ref_root != BTRFS_TREE_LOG_OBJECTID) || 1448 objectid != btrfs_ref_objectid(leaf, ref_item)) { 1449 ret = 1; 1450 goto out; 1451 } 1452 if (btrfs_ref_generation(leaf, ref_item) <= last_snapshot) { 1453 ret = 1; 1454 goto out; 1455 } 1456 1457 path->slots[0]++; 1458 } 1459 ret = 0; 1460 out: 1461 btrfs_free_path(path); 1462 return ret; 1463 } 1464 1465 int btrfs_cache_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 1466 struct extent_buffer *buf, u32 nr_extents) 1467 { 1468 struct btrfs_key key; 1469 struct btrfs_file_extent_item *fi; 1470 u64 root_gen; 1471 u32 nritems; 1472 int i; 1473 int level; 1474 int ret = 0; 1475 int shared = 0; 1476 1477 if (!root->ref_cows) 1478 return 0; 1479 1480 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 1481 shared = 0; 1482 root_gen = root->root_key.offset; 1483 } else { 1484 shared = 1; 1485 root_gen = trans->transid - 1; 1486 } 1487 1488 level = btrfs_header_level(buf); 1489 nritems = btrfs_header_nritems(buf); 1490 1491 if (level == 0) { 1492 struct btrfs_leaf_ref *ref; 1493 struct btrfs_extent_info *info; 1494 1495 ref = btrfs_alloc_leaf_ref(root, nr_extents); 1496 if (!ref) { 1497 ret = -ENOMEM; 1498 goto out; 1499 } 1500 1501 ref->root_gen = root_gen; 1502 ref->bytenr = buf->start; 1503 ref->owner = btrfs_header_owner(buf); 1504 ref->generation = btrfs_header_generation(buf); 1505 ref->nritems = nr_extents; 1506 info = ref->extents; 1507 1508 for (i = 0; nr_extents > 0 && i < nritems; i++) { 1509 u64 disk_bytenr; 1510 btrfs_item_key_to_cpu(buf, &key, i); 1511 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1512 continue; 1513 fi = btrfs_item_ptr(buf, i, 1514 struct btrfs_file_extent_item); 1515 if (btrfs_file_extent_type(buf, fi) == 1516 BTRFS_FILE_EXTENT_INLINE) 1517 continue; 1518 disk_bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 1519 if (disk_bytenr == 0) 1520 continue; 1521 1522 info->bytenr = disk_bytenr; 1523 info->num_bytes = 1524 btrfs_file_extent_disk_num_bytes(buf, fi); 1525 info->objectid = key.objectid; 1526 info->offset = key.offset; 1527 info++; 1528 } 1529 1530 ret = btrfs_add_leaf_ref(root, ref, shared); 1531 if (ret == -EEXIST && shared) { 1532 struct btrfs_leaf_ref *old; 1533 old = btrfs_lookup_leaf_ref(root, ref->bytenr); 1534 BUG_ON(!old); 1535 btrfs_remove_leaf_ref(root, old); 1536 btrfs_free_leaf_ref(root, old); 1537 ret = btrfs_add_leaf_ref(root, ref, shared); 1538 } 1539 WARN_ON(ret); 1540 btrfs_free_leaf_ref(root, ref); 1541 } 1542 out: 1543 return ret; 1544 } 1545 1546 /* when a block goes through cow, we update the reference counts of 1547 * everything that block points to. The internal pointers of the block 1548 * can be in just about any order, and it is likely to have clusters of 1549 * things that are close together and clusters of things that are not. 1550 * 1551 * To help reduce the seeks that come with updating all of these reference 1552 * counts, sort them by byte number before actual updates are done. 1553 * 1554 * struct refsort is used to match byte number to slot in the btree block. 1555 * we sort based on the byte number and then use the slot to actually 1556 * find the item. 1557 * 1558 * struct refsort is smaller than strcut btrfs_item and smaller than 1559 * struct btrfs_key_ptr. Since we're currently limited to the page size 1560 * for a btree block, there's no way for a kmalloc of refsorts for a 1561 * single node to be bigger than a page. 1562 */ 1563 struct refsort { 1564 u64 bytenr; 1565 u32 slot; 1566 }; 1567 1568 /* 1569 * for passing into sort() 1570 */ 1571 static int refsort_cmp(const void *a_void, const void *b_void) 1572 { 1573 const struct refsort *a = a_void; 1574 const struct refsort *b = b_void; 1575 1576 if (a->bytenr < b->bytenr) 1577 return -1; 1578 if (a->bytenr > b->bytenr) 1579 return 1; 1580 return 0; 1581 } 1582 1583 1584 noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, 1585 struct btrfs_root *root, 1586 struct extent_buffer *orig_buf, 1587 struct extent_buffer *buf, u32 *nr_extents) 1588 { 1589 u64 bytenr; 1590 u64 ref_root; 1591 u64 orig_root; 1592 u64 ref_generation; 1593 u64 orig_generation; 1594 struct refsort *sorted; 1595 u32 nritems; 1596 u32 nr_file_extents = 0; 1597 struct btrfs_key key; 1598 struct btrfs_file_extent_item *fi; 1599 int i; 1600 int level; 1601 int ret = 0; 1602 int faili = 0; 1603 int refi = 0; 1604 int slot; 1605 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 1606 u64, u64, u64, u64, u64, u64, u64, u64); 1607 1608 ref_root = btrfs_header_owner(buf); 1609 ref_generation = btrfs_header_generation(buf); 1610 orig_root = btrfs_header_owner(orig_buf); 1611 orig_generation = btrfs_header_generation(orig_buf); 1612 1613 nritems = btrfs_header_nritems(buf); 1614 level = btrfs_header_level(buf); 1615 1616 sorted = kmalloc(sizeof(struct refsort) * nritems, GFP_NOFS); 1617 BUG_ON(!sorted); 1618 1619 if (root->ref_cows) { 1620 process_func = __btrfs_inc_extent_ref; 1621 } else { 1622 if (level == 0 && 1623 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 1624 goto out; 1625 if (level != 0 && 1626 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) 1627 goto out; 1628 process_func = __btrfs_update_extent_ref; 1629 } 1630 1631 /* 1632 * we make two passes through the items. In the first pass we 1633 * only record the byte number and slot. Then we sort based on 1634 * byte number and do the actual work based on the sorted results 1635 */ 1636 for (i = 0; i < nritems; i++) { 1637 cond_resched(); 1638 if (level == 0) { 1639 btrfs_item_key_to_cpu(buf, &key, i); 1640 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1641 continue; 1642 fi = btrfs_item_ptr(buf, i, 1643 struct btrfs_file_extent_item); 1644 if (btrfs_file_extent_type(buf, fi) == 1645 BTRFS_FILE_EXTENT_INLINE) 1646 continue; 1647 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 1648 if (bytenr == 0) 1649 continue; 1650 1651 nr_file_extents++; 1652 sorted[refi].bytenr = bytenr; 1653 sorted[refi].slot = i; 1654 refi++; 1655 } else { 1656 bytenr = btrfs_node_blockptr(buf, i); 1657 sorted[refi].bytenr = bytenr; 1658 sorted[refi].slot = i; 1659 refi++; 1660 } 1661 } 1662 /* 1663 * if refi == 0, we didn't actually put anything into the sorted 1664 * array and we're done 1665 */ 1666 if (refi == 0) 1667 goto out; 1668 1669 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 1670 1671 for (i = 0; i < refi; i++) { 1672 cond_resched(); 1673 slot = sorted[i].slot; 1674 bytenr = sorted[i].bytenr; 1675 1676 if (level == 0) { 1677 btrfs_item_key_to_cpu(buf, &key, slot); 1678 1679 ret = process_func(trans, root, bytenr, 1680 orig_buf->start, buf->start, 1681 orig_root, ref_root, 1682 orig_generation, ref_generation, 1683 key.objectid); 1684 1685 if (ret) { 1686 faili = slot; 1687 WARN_ON(1); 1688 goto fail; 1689 } 1690 } else { 1691 ret = process_func(trans, root, bytenr, 1692 orig_buf->start, buf->start, 1693 orig_root, ref_root, 1694 orig_generation, ref_generation, 1695 level - 1); 1696 if (ret) { 1697 faili = slot; 1698 WARN_ON(1); 1699 goto fail; 1700 } 1701 } 1702 } 1703 out: 1704 kfree(sorted); 1705 if (nr_extents) { 1706 if (level == 0) 1707 *nr_extents = nr_file_extents; 1708 else 1709 *nr_extents = nritems; 1710 } 1711 return 0; 1712 fail: 1713 kfree(sorted); 1714 WARN_ON(1); 1715 return ret; 1716 } 1717 1718 int btrfs_update_ref(struct btrfs_trans_handle *trans, 1719 struct btrfs_root *root, struct extent_buffer *orig_buf, 1720 struct extent_buffer *buf, int start_slot, int nr) 1721 1722 { 1723 u64 bytenr; 1724 u64 ref_root; 1725 u64 orig_root; 1726 u64 ref_generation; 1727 u64 orig_generation; 1728 struct btrfs_key key; 1729 struct btrfs_file_extent_item *fi; 1730 int i; 1731 int ret; 1732 int slot; 1733 int level; 1734 1735 BUG_ON(start_slot < 0); 1736 BUG_ON(start_slot + nr > btrfs_header_nritems(buf)); 1737 1738 ref_root = btrfs_header_owner(buf); 1739 ref_generation = btrfs_header_generation(buf); 1740 orig_root = btrfs_header_owner(orig_buf); 1741 orig_generation = btrfs_header_generation(orig_buf); 1742 level = btrfs_header_level(buf); 1743 1744 if (!root->ref_cows) { 1745 if (level == 0 && 1746 root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) 1747 return 0; 1748 if (level != 0 && 1749 root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) 1750 return 0; 1751 } 1752 1753 for (i = 0, slot = start_slot; i < nr; i++, slot++) { 1754 cond_resched(); 1755 if (level == 0) { 1756 btrfs_item_key_to_cpu(buf, &key, slot); 1757 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 1758 continue; 1759 fi = btrfs_item_ptr(buf, slot, 1760 struct btrfs_file_extent_item); 1761 if (btrfs_file_extent_type(buf, fi) == 1762 BTRFS_FILE_EXTENT_INLINE) 1763 continue; 1764 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 1765 if (bytenr == 0) 1766 continue; 1767 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1768 orig_buf->start, buf->start, 1769 orig_root, ref_root, 1770 orig_generation, ref_generation, 1771 key.objectid); 1772 if (ret) 1773 goto fail; 1774 } else { 1775 bytenr = btrfs_node_blockptr(buf, slot); 1776 ret = __btrfs_update_extent_ref(trans, root, bytenr, 1777 orig_buf->start, buf->start, 1778 orig_root, ref_root, 1779 orig_generation, ref_generation, 1780 level - 1); 1781 if (ret) 1782 goto fail; 1783 } 1784 } 1785 return 0; 1786 fail: 1787 WARN_ON(1); 1788 return -1; 1789 } 1790 1791 static int write_one_cache_group(struct btrfs_trans_handle *trans, 1792 struct btrfs_root *root, 1793 struct btrfs_path *path, 1794 struct btrfs_block_group_cache *cache) 1795 { 1796 int ret; 1797 int pending_ret; 1798 struct btrfs_root *extent_root = root->fs_info->extent_root; 1799 unsigned long bi; 1800 struct extent_buffer *leaf; 1801 1802 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 1803 if (ret < 0) 1804 goto fail; 1805 BUG_ON(ret); 1806 1807 leaf = path->nodes[0]; 1808 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 1809 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 1810 btrfs_mark_buffer_dirty(leaf); 1811 btrfs_release_path(extent_root, path); 1812 fail: 1813 finish_current_insert(trans, extent_root, 0); 1814 pending_ret = del_pending_extents(trans, extent_root, 0); 1815 if (ret) 1816 return ret; 1817 if (pending_ret) 1818 return pending_ret; 1819 return 0; 1820 1821 } 1822 1823 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 1824 struct btrfs_root *root) 1825 { 1826 struct btrfs_block_group_cache *cache, *entry; 1827 struct rb_node *n; 1828 int err = 0; 1829 int werr = 0; 1830 struct btrfs_path *path; 1831 u64 last = 0; 1832 1833 path = btrfs_alloc_path(); 1834 if (!path) 1835 return -ENOMEM; 1836 1837 while (1) { 1838 cache = NULL; 1839 spin_lock(&root->fs_info->block_group_cache_lock); 1840 for (n = rb_first(&root->fs_info->block_group_cache_tree); 1841 n; n = rb_next(n)) { 1842 entry = rb_entry(n, struct btrfs_block_group_cache, 1843 cache_node); 1844 if (entry->dirty) { 1845 cache = entry; 1846 break; 1847 } 1848 } 1849 spin_unlock(&root->fs_info->block_group_cache_lock); 1850 1851 if (!cache) 1852 break; 1853 1854 cache->dirty = 0; 1855 last += cache->key.offset; 1856 1857 err = write_one_cache_group(trans, root, 1858 path, cache); 1859 /* 1860 * if we fail to write the cache group, we want 1861 * to keep it marked dirty in hopes that a later 1862 * write will work 1863 */ 1864 if (err) { 1865 werr = err; 1866 continue; 1867 } 1868 } 1869 btrfs_free_path(path); 1870 return werr; 1871 } 1872 1873 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 1874 { 1875 struct btrfs_block_group_cache *block_group; 1876 int readonly = 0; 1877 1878 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 1879 if (!block_group || block_group->ro) 1880 readonly = 1; 1881 if (block_group) 1882 put_block_group(block_group); 1883 return readonly; 1884 } 1885 1886 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 1887 u64 total_bytes, u64 bytes_used, 1888 struct btrfs_space_info **space_info) 1889 { 1890 struct btrfs_space_info *found; 1891 1892 found = __find_space_info(info, flags); 1893 if (found) { 1894 spin_lock(&found->lock); 1895 found->total_bytes += total_bytes; 1896 found->bytes_used += bytes_used; 1897 found->full = 0; 1898 spin_unlock(&found->lock); 1899 *space_info = found; 1900 return 0; 1901 } 1902 found = kzalloc(sizeof(*found), GFP_NOFS); 1903 if (!found) 1904 return -ENOMEM; 1905 1906 list_add(&found->list, &info->space_info); 1907 INIT_LIST_HEAD(&found->block_groups); 1908 init_rwsem(&found->groups_sem); 1909 spin_lock_init(&found->lock); 1910 found->flags = flags; 1911 found->total_bytes = total_bytes; 1912 found->bytes_used = bytes_used; 1913 found->bytes_pinned = 0; 1914 found->bytes_reserved = 0; 1915 found->bytes_readonly = 0; 1916 found->bytes_delalloc = 0; 1917 found->full = 0; 1918 found->force_alloc = 0; 1919 *space_info = found; 1920 return 0; 1921 } 1922 1923 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 1924 { 1925 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 1926 BTRFS_BLOCK_GROUP_RAID1 | 1927 BTRFS_BLOCK_GROUP_RAID10 | 1928 BTRFS_BLOCK_GROUP_DUP); 1929 if (extra_flags) { 1930 if (flags & BTRFS_BLOCK_GROUP_DATA) 1931 fs_info->avail_data_alloc_bits |= extra_flags; 1932 if (flags & BTRFS_BLOCK_GROUP_METADATA) 1933 fs_info->avail_metadata_alloc_bits |= extra_flags; 1934 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 1935 fs_info->avail_system_alloc_bits |= extra_flags; 1936 } 1937 } 1938 1939 static void set_block_group_readonly(struct btrfs_block_group_cache *cache) 1940 { 1941 spin_lock(&cache->space_info->lock); 1942 spin_lock(&cache->lock); 1943 if (!cache->ro) { 1944 cache->space_info->bytes_readonly += cache->key.offset - 1945 btrfs_block_group_used(&cache->item); 1946 cache->ro = 1; 1947 } 1948 spin_unlock(&cache->lock); 1949 spin_unlock(&cache->space_info->lock); 1950 } 1951 1952 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 1953 { 1954 u64 num_devices = root->fs_info->fs_devices->rw_devices; 1955 1956 if (num_devices == 1) 1957 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 1958 if (num_devices < 4) 1959 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 1960 1961 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 1962 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 1963 BTRFS_BLOCK_GROUP_RAID10))) { 1964 flags &= ~BTRFS_BLOCK_GROUP_DUP; 1965 } 1966 1967 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 1968 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 1969 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 1970 } 1971 1972 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 1973 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 1974 (flags & BTRFS_BLOCK_GROUP_RAID10) | 1975 (flags & BTRFS_BLOCK_GROUP_DUP))) 1976 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 1977 return flags; 1978 } 1979 1980 static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) 1981 { 1982 struct btrfs_fs_info *info = root->fs_info; 1983 u64 alloc_profile; 1984 1985 if (data) { 1986 alloc_profile = info->avail_data_alloc_bits & 1987 info->data_alloc_profile; 1988 data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; 1989 } else if (root == root->fs_info->chunk_root) { 1990 alloc_profile = info->avail_system_alloc_bits & 1991 info->system_alloc_profile; 1992 data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; 1993 } else { 1994 alloc_profile = info->avail_metadata_alloc_bits & 1995 info->metadata_alloc_profile; 1996 data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; 1997 } 1998 1999 return btrfs_reduce_alloc_profile(root, data); 2000 } 2001 2002 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 2003 { 2004 u64 alloc_target; 2005 2006 alloc_target = btrfs_get_alloc_profile(root, 1); 2007 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 2008 alloc_target); 2009 } 2010 2011 /* 2012 * for now this just makes sure we have at least 5% of our metadata space free 2013 * for use. 2014 */ 2015 int btrfs_check_metadata_free_space(struct btrfs_root *root) 2016 { 2017 struct btrfs_fs_info *info = root->fs_info; 2018 struct btrfs_space_info *meta_sinfo; 2019 u64 alloc_target, thresh; 2020 int committed = 0, ret; 2021 2022 /* get the space info for where the metadata will live */ 2023 alloc_target = btrfs_get_alloc_profile(root, 0); 2024 meta_sinfo = __find_space_info(info, alloc_target); 2025 2026 again: 2027 spin_lock(&meta_sinfo->lock); 2028 if (!meta_sinfo->full) 2029 thresh = meta_sinfo->total_bytes * 80; 2030 else 2031 thresh = meta_sinfo->total_bytes * 95; 2032 2033 do_div(thresh, 100); 2034 2035 if (meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + 2036 meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly > thresh) { 2037 struct btrfs_trans_handle *trans; 2038 if (!meta_sinfo->full) { 2039 meta_sinfo->force_alloc = 1; 2040 spin_unlock(&meta_sinfo->lock); 2041 2042 trans = btrfs_start_transaction(root, 1); 2043 if (!trans) 2044 return -ENOMEM; 2045 2046 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2047 2 * 1024 * 1024, alloc_target, 0); 2048 btrfs_end_transaction(trans, root); 2049 goto again; 2050 } 2051 spin_unlock(&meta_sinfo->lock); 2052 2053 if (!committed) { 2054 committed = 1; 2055 trans = btrfs_join_transaction(root, 1); 2056 if (!trans) 2057 return -ENOMEM; 2058 ret = btrfs_commit_transaction(trans, root); 2059 if (ret) 2060 return ret; 2061 goto again; 2062 } 2063 return -ENOSPC; 2064 } 2065 spin_unlock(&meta_sinfo->lock); 2066 2067 return 0; 2068 } 2069 2070 /* 2071 * This will check the space that the inode allocates from to make sure we have 2072 * enough space for bytes. 2073 */ 2074 int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, 2075 u64 bytes) 2076 { 2077 struct btrfs_space_info *data_sinfo; 2078 int ret = 0, committed = 0; 2079 2080 /* make sure bytes are sectorsize aligned */ 2081 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2082 2083 data_sinfo = BTRFS_I(inode)->space_info; 2084 again: 2085 /* make sure we have enough space to handle the data first */ 2086 spin_lock(&data_sinfo->lock); 2087 if (data_sinfo->total_bytes - data_sinfo->bytes_used - 2088 data_sinfo->bytes_delalloc - data_sinfo->bytes_reserved - 2089 data_sinfo->bytes_pinned - data_sinfo->bytes_readonly - 2090 data_sinfo->bytes_may_use < bytes) { 2091 struct btrfs_trans_handle *trans; 2092 2093 /* 2094 * if we don't have enough free bytes in this space then we need 2095 * to alloc a new chunk. 2096 */ 2097 if (!data_sinfo->full) { 2098 u64 alloc_target; 2099 2100 data_sinfo->force_alloc = 1; 2101 spin_unlock(&data_sinfo->lock); 2102 2103 alloc_target = btrfs_get_alloc_profile(root, 1); 2104 trans = btrfs_start_transaction(root, 1); 2105 if (!trans) 2106 return -ENOMEM; 2107 2108 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 2109 bytes + 2 * 1024 * 1024, 2110 alloc_target, 0); 2111 btrfs_end_transaction(trans, root); 2112 if (ret) 2113 return ret; 2114 goto again; 2115 } 2116 spin_unlock(&data_sinfo->lock); 2117 2118 /* commit the current transaction and try again */ 2119 if (!committed) { 2120 committed = 1; 2121 trans = btrfs_join_transaction(root, 1); 2122 if (!trans) 2123 return -ENOMEM; 2124 ret = btrfs_commit_transaction(trans, root); 2125 if (ret) 2126 return ret; 2127 goto again; 2128 } 2129 2130 printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" 2131 ", %llu bytes_used, %llu bytes_reserved, " 2132 "%llu bytes_pinned, %llu bytes_readonly, %llu may use" 2133 "%llu total\n", bytes, data_sinfo->bytes_delalloc, 2134 data_sinfo->bytes_used, data_sinfo->bytes_reserved, 2135 data_sinfo->bytes_pinned, data_sinfo->bytes_readonly, 2136 data_sinfo->bytes_may_use, data_sinfo->total_bytes); 2137 return -ENOSPC; 2138 } 2139 data_sinfo->bytes_may_use += bytes; 2140 BTRFS_I(inode)->reserved_bytes += bytes; 2141 spin_unlock(&data_sinfo->lock); 2142 2143 return btrfs_check_metadata_free_space(root); 2144 } 2145 2146 /* 2147 * if there was an error for whatever reason after calling 2148 * btrfs_check_data_free_space, call this so we can cleanup the counters. 2149 */ 2150 void btrfs_free_reserved_data_space(struct btrfs_root *root, 2151 struct inode *inode, u64 bytes) 2152 { 2153 struct btrfs_space_info *data_sinfo; 2154 2155 /* make sure bytes are sectorsize aligned */ 2156 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 2157 2158 data_sinfo = BTRFS_I(inode)->space_info; 2159 spin_lock(&data_sinfo->lock); 2160 data_sinfo->bytes_may_use -= bytes; 2161 BTRFS_I(inode)->reserved_bytes -= bytes; 2162 spin_unlock(&data_sinfo->lock); 2163 } 2164 2165 /* called when we are adding a delalloc extent to the inode's io_tree */ 2166 void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, 2167 u64 bytes) 2168 { 2169 struct btrfs_space_info *data_sinfo; 2170 2171 /* get the space info for where this inode will be storing its data */ 2172 data_sinfo = BTRFS_I(inode)->space_info; 2173 2174 /* make sure we have enough space to handle the data first */ 2175 spin_lock(&data_sinfo->lock); 2176 data_sinfo->bytes_delalloc += bytes; 2177 2178 /* 2179 * we are adding a delalloc extent without calling 2180 * btrfs_check_data_free_space first. This happens on a weird 2181 * writepage condition, but shouldn't hurt our accounting 2182 */ 2183 if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { 2184 data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; 2185 BTRFS_I(inode)->reserved_bytes = 0; 2186 } else { 2187 data_sinfo->bytes_may_use -= bytes; 2188 BTRFS_I(inode)->reserved_bytes -= bytes; 2189 } 2190 2191 spin_unlock(&data_sinfo->lock); 2192 } 2193 2194 /* called when we are clearing an delalloc extent from the inode's io_tree */ 2195 void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, 2196 u64 bytes) 2197 { 2198 struct btrfs_space_info *info; 2199 2200 info = BTRFS_I(inode)->space_info; 2201 2202 spin_lock(&info->lock); 2203 info->bytes_delalloc -= bytes; 2204 spin_unlock(&info->lock); 2205 } 2206 2207 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 2208 struct btrfs_root *extent_root, u64 alloc_bytes, 2209 u64 flags, int force) 2210 { 2211 struct btrfs_space_info *space_info; 2212 u64 thresh; 2213 int ret = 0; 2214 2215 mutex_lock(&extent_root->fs_info->chunk_mutex); 2216 2217 flags = btrfs_reduce_alloc_profile(extent_root, flags); 2218 2219 space_info = __find_space_info(extent_root->fs_info, flags); 2220 if (!space_info) { 2221 ret = update_space_info(extent_root->fs_info, flags, 2222 0, 0, &space_info); 2223 BUG_ON(ret); 2224 } 2225 BUG_ON(!space_info); 2226 2227 spin_lock(&space_info->lock); 2228 if (space_info->force_alloc) { 2229 force = 1; 2230 space_info->force_alloc = 0; 2231 } 2232 if (space_info->full) { 2233 spin_unlock(&space_info->lock); 2234 goto out; 2235 } 2236 2237 thresh = space_info->total_bytes - space_info->bytes_readonly; 2238 thresh = div_factor(thresh, 6); 2239 if (!force && 2240 (space_info->bytes_used + space_info->bytes_pinned + 2241 space_info->bytes_reserved + alloc_bytes) < thresh) { 2242 spin_unlock(&space_info->lock); 2243 goto out; 2244 } 2245 spin_unlock(&space_info->lock); 2246 2247 ret = btrfs_alloc_chunk(trans, extent_root, flags); 2248 if (ret) 2249 space_info->full = 1; 2250 out: 2251 mutex_unlock(&extent_root->fs_info->chunk_mutex); 2252 return ret; 2253 } 2254 2255 static int update_block_group(struct btrfs_trans_handle *trans, 2256 struct btrfs_root *root, 2257 u64 bytenr, u64 num_bytes, int alloc, 2258 int mark_free) 2259 { 2260 struct btrfs_block_group_cache *cache; 2261 struct btrfs_fs_info *info = root->fs_info; 2262 u64 total = num_bytes; 2263 u64 old_val; 2264 u64 byte_in_group; 2265 2266 while (total) { 2267 cache = btrfs_lookup_block_group(info, bytenr); 2268 if (!cache) 2269 return -1; 2270 byte_in_group = bytenr - cache->key.objectid; 2271 WARN_ON(byte_in_group > cache->key.offset); 2272 2273 spin_lock(&cache->space_info->lock); 2274 spin_lock(&cache->lock); 2275 cache->dirty = 1; 2276 old_val = btrfs_block_group_used(&cache->item); 2277 num_bytes = min(total, cache->key.offset - byte_in_group); 2278 if (alloc) { 2279 old_val += num_bytes; 2280 cache->space_info->bytes_used += num_bytes; 2281 if (cache->ro) 2282 cache->space_info->bytes_readonly -= num_bytes; 2283 btrfs_set_block_group_used(&cache->item, old_val); 2284 spin_unlock(&cache->lock); 2285 spin_unlock(&cache->space_info->lock); 2286 } else { 2287 old_val -= num_bytes; 2288 cache->space_info->bytes_used -= num_bytes; 2289 if (cache->ro) 2290 cache->space_info->bytes_readonly += num_bytes; 2291 btrfs_set_block_group_used(&cache->item, old_val); 2292 spin_unlock(&cache->lock); 2293 spin_unlock(&cache->space_info->lock); 2294 if (mark_free) { 2295 int ret; 2296 2297 ret = btrfs_discard_extent(root, bytenr, 2298 num_bytes); 2299 WARN_ON(ret); 2300 2301 ret = btrfs_add_free_space(cache, bytenr, 2302 num_bytes); 2303 WARN_ON(ret); 2304 } 2305 } 2306 put_block_group(cache); 2307 total -= num_bytes; 2308 bytenr += num_bytes; 2309 } 2310 return 0; 2311 } 2312 2313 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 2314 { 2315 struct btrfs_block_group_cache *cache; 2316 u64 bytenr; 2317 2318 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 2319 if (!cache) 2320 return 0; 2321 2322 bytenr = cache->key.objectid; 2323 put_block_group(cache); 2324 2325 return bytenr; 2326 } 2327 2328 int btrfs_update_pinned_extents(struct btrfs_root *root, 2329 u64 bytenr, u64 num, int pin) 2330 { 2331 u64 len; 2332 struct btrfs_block_group_cache *cache; 2333 struct btrfs_fs_info *fs_info = root->fs_info; 2334 2335 WARN_ON(!mutex_is_locked(&root->fs_info->pinned_mutex)); 2336 if (pin) { 2337 set_extent_dirty(&fs_info->pinned_extents, 2338 bytenr, bytenr + num - 1, GFP_NOFS); 2339 } else { 2340 clear_extent_dirty(&fs_info->pinned_extents, 2341 bytenr, bytenr + num - 1, GFP_NOFS); 2342 } 2343 while (num > 0) { 2344 cache = btrfs_lookup_block_group(fs_info, bytenr); 2345 BUG_ON(!cache); 2346 len = min(num, cache->key.offset - 2347 (bytenr - cache->key.objectid)); 2348 if (pin) { 2349 spin_lock(&cache->space_info->lock); 2350 spin_lock(&cache->lock); 2351 cache->pinned += len; 2352 cache->space_info->bytes_pinned += len; 2353 spin_unlock(&cache->lock); 2354 spin_unlock(&cache->space_info->lock); 2355 fs_info->total_pinned += len; 2356 } else { 2357 spin_lock(&cache->space_info->lock); 2358 spin_lock(&cache->lock); 2359 cache->pinned -= len; 2360 cache->space_info->bytes_pinned -= len; 2361 spin_unlock(&cache->lock); 2362 spin_unlock(&cache->space_info->lock); 2363 fs_info->total_pinned -= len; 2364 if (cache->cached) 2365 btrfs_add_free_space(cache, bytenr, len); 2366 } 2367 put_block_group(cache); 2368 bytenr += len; 2369 num -= len; 2370 } 2371 return 0; 2372 } 2373 2374 static int update_reserved_extents(struct btrfs_root *root, 2375 u64 bytenr, u64 num, int reserve) 2376 { 2377 u64 len; 2378 struct btrfs_block_group_cache *cache; 2379 struct btrfs_fs_info *fs_info = root->fs_info; 2380 2381 while (num > 0) { 2382 cache = btrfs_lookup_block_group(fs_info, bytenr); 2383 BUG_ON(!cache); 2384 len = min(num, cache->key.offset - 2385 (bytenr - cache->key.objectid)); 2386 2387 spin_lock(&cache->space_info->lock); 2388 spin_lock(&cache->lock); 2389 if (reserve) { 2390 cache->reserved += len; 2391 cache->space_info->bytes_reserved += len; 2392 } else { 2393 cache->reserved -= len; 2394 cache->space_info->bytes_reserved -= len; 2395 } 2396 spin_unlock(&cache->lock); 2397 spin_unlock(&cache->space_info->lock); 2398 put_block_group(cache); 2399 bytenr += len; 2400 num -= len; 2401 } 2402 return 0; 2403 } 2404 2405 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy) 2406 { 2407 u64 last = 0; 2408 u64 start; 2409 u64 end; 2410 struct extent_io_tree *pinned_extents = &root->fs_info->pinned_extents; 2411 int ret; 2412 2413 mutex_lock(&root->fs_info->pinned_mutex); 2414 while (1) { 2415 ret = find_first_extent_bit(pinned_extents, last, 2416 &start, &end, EXTENT_DIRTY); 2417 if (ret) 2418 break; 2419 set_extent_dirty(copy, start, end, GFP_NOFS); 2420 last = end + 1; 2421 } 2422 mutex_unlock(&root->fs_info->pinned_mutex); 2423 return 0; 2424 } 2425 2426 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 2427 struct btrfs_root *root, 2428 struct extent_io_tree *unpin) 2429 { 2430 u64 start; 2431 u64 end; 2432 int ret; 2433 2434 mutex_lock(&root->fs_info->pinned_mutex); 2435 while (1) { 2436 ret = find_first_extent_bit(unpin, 0, &start, &end, 2437 EXTENT_DIRTY); 2438 if (ret) 2439 break; 2440 2441 ret = btrfs_discard_extent(root, start, end + 1 - start); 2442 2443 btrfs_update_pinned_extents(root, start, end + 1 - start, 0); 2444 clear_extent_dirty(unpin, start, end, GFP_NOFS); 2445 2446 if (need_resched()) { 2447 mutex_unlock(&root->fs_info->pinned_mutex); 2448 cond_resched(); 2449 mutex_lock(&root->fs_info->pinned_mutex); 2450 } 2451 } 2452 mutex_unlock(&root->fs_info->pinned_mutex); 2453 return ret; 2454 } 2455 2456 static int finish_current_insert(struct btrfs_trans_handle *trans, 2457 struct btrfs_root *extent_root, int all) 2458 { 2459 u64 start; 2460 u64 end; 2461 u64 priv; 2462 u64 search = 0; 2463 struct btrfs_fs_info *info = extent_root->fs_info; 2464 struct btrfs_path *path; 2465 struct pending_extent_op *extent_op, *tmp; 2466 struct list_head insert_list, update_list; 2467 int ret; 2468 int num_inserts = 0, max_inserts, restart = 0; 2469 2470 path = btrfs_alloc_path(); 2471 INIT_LIST_HEAD(&insert_list); 2472 INIT_LIST_HEAD(&update_list); 2473 2474 max_inserts = extent_root->leafsize / 2475 (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) + 2476 sizeof(struct btrfs_extent_ref) + 2477 sizeof(struct btrfs_extent_item)); 2478 again: 2479 mutex_lock(&info->extent_ins_mutex); 2480 while (1) { 2481 ret = find_first_extent_bit(&info->extent_ins, search, &start, 2482 &end, EXTENT_WRITEBACK); 2483 if (ret) { 2484 if (restart && !num_inserts && 2485 list_empty(&update_list)) { 2486 restart = 0; 2487 search = 0; 2488 continue; 2489 } 2490 break; 2491 } 2492 2493 ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); 2494 if (!ret) { 2495 if (all) 2496 restart = 1; 2497 search = end + 1; 2498 if (need_resched()) { 2499 mutex_unlock(&info->extent_ins_mutex); 2500 cond_resched(); 2501 mutex_lock(&info->extent_ins_mutex); 2502 } 2503 continue; 2504 } 2505 2506 ret = get_state_private(&info->extent_ins, start, &priv); 2507 BUG_ON(ret); 2508 extent_op = (struct pending_extent_op *)(unsigned long) priv; 2509 2510 if (extent_op->type == PENDING_EXTENT_INSERT) { 2511 num_inserts++; 2512 list_add_tail(&extent_op->list, &insert_list); 2513 search = end + 1; 2514 if (num_inserts == max_inserts) { 2515 restart = 1; 2516 break; 2517 } 2518 } else if (extent_op->type == PENDING_BACKREF_UPDATE) { 2519 list_add_tail(&extent_op->list, &update_list); 2520 search = end + 1; 2521 } else { 2522 BUG(); 2523 } 2524 } 2525 2526 /* 2527 * process the update list, clear the writeback bit for it, and if 2528 * somebody marked this thing for deletion then just unlock it and be 2529 * done, the free_extents will handle it 2530 */ 2531 list_for_each_entry_safe(extent_op, tmp, &update_list, list) { 2532 clear_extent_bits(&info->extent_ins, extent_op->bytenr, 2533 extent_op->bytenr + extent_op->num_bytes - 1, 2534 EXTENT_WRITEBACK, GFP_NOFS); 2535 if (extent_op->del) { 2536 list_del_init(&extent_op->list); 2537 unlock_extent(&info->extent_ins, extent_op->bytenr, 2538 extent_op->bytenr + extent_op->num_bytes 2539 - 1, GFP_NOFS); 2540 kfree(extent_op); 2541 } 2542 } 2543 mutex_unlock(&info->extent_ins_mutex); 2544 2545 /* 2546 * still have things left on the update list, go ahead an update 2547 * everything 2548 */ 2549 if (!list_empty(&update_list)) { 2550 ret = update_backrefs(trans, extent_root, path, &update_list); 2551 BUG_ON(ret); 2552 2553 /* we may have COW'ed new blocks, so lets start over */ 2554 if (all) 2555 restart = 1; 2556 } 2557 2558 /* 2559 * if no inserts need to be done, but we skipped some extents and we 2560 * need to make sure everything is cleaned then reset everything and 2561 * go back to the beginning 2562 */ 2563 if (!num_inserts && restart) { 2564 search = 0; 2565 restart = 0; 2566 INIT_LIST_HEAD(&update_list); 2567 INIT_LIST_HEAD(&insert_list); 2568 goto again; 2569 } else if (!num_inserts) { 2570 goto out; 2571 } 2572 2573 /* 2574 * process the insert extents list. Again if we are deleting this 2575 * extent, then just unlock it, pin down the bytes if need be, and be 2576 * done with it. Saves us from having to actually insert the extent 2577 * into the tree and then subsequently come along and delete it 2578 */ 2579 mutex_lock(&info->extent_ins_mutex); 2580 list_for_each_entry_safe(extent_op, tmp, &insert_list, list) { 2581 clear_extent_bits(&info->extent_ins, extent_op->bytenr, 2582 extent_op->bytenr + extent_op->num_bytes - 1, 2583 EXTENT_WRITEBACK, GFP_NOFS); 2584 if (extent_op->del) { 2585 u64 used; 2586 list_del_init(&extent_op->list); 2587 unlock_extent(&info->extent_ins, extent_op->bytenr, 2588 extent_op->bytenr + extent_op->num_bytes 2589 - 1, GFP_NOFS); 2590 2591 mutex_lock(&extent_root->fs_info->pinned_mutex); 2592 ret = pin_down_bytes(trans, extent_root, 2593 extent_op->bytenr, 2594 extent_op->num_bytes, 0); 2595 mutex_unlock(&extent_root->fs_info->pinned_mutex); 2596 2597 spin_lock(&info->delalloc_lock); 2598 used = btrfs_super_bytes_used(&info->super_copy); 2599 btrfs_set_super_bytes_used(&info->super_copy, 2600 used - extent_op->num_bytes); 2601 used = btrfs_root_used(&extent_root->root_item); 2602 btrfs_set_root_used(&extent_root->root_item, 2603 used - extent_op->num_bytes); 2604 spin_unlock(&info->delalloc_lock); 2605 2606 ret = update_block_group(trans, extent_root, 2607 extent_op->bytenr, 2608 extent_op->num_bytes, 2609 0, ret > 0); 2610 BUG_ON(ret); 2611 kfree(extent_op); 2612 num_inserts--; 2613 } 2614 } 2615 mutex_unlock(&info->extent_ins_mutex); 2616 2617 ret = insert_extents(trans, extent_root, path, &insert_list, 2618 num_inserts); 2619 BUG_ON(ret); 2620 2621 /* 2622 * if restart is set for whatever reason we need to go back and start 2623 * searching through the pending list again. 2624 * 2625 * We just inserted some extents, which could have resulted in new 2626 * blocks being allocated, which would result in new blocks needing 2627 * updates, so if all is set we _must_ restart to get the updated 2628 * blocks. 2629 */ 2630 if (restart || all) { 2631 INIT_LIST_HEAD(&insert_list); 2632 INIT_LIST_HEAD(&update_list); 2633 search = 0; 2634 restart = 0; 2635 num_inserts = 0; 2636 goto again; 2637 } 2638 out: 2639 btrfs_free_path(path); 2640 return 0; 2641 } 2642 2643 static int pin_down_bytes(struct btrfs_trans_handle *trans, 2644 struct btrfs_root *root, 2645 u64 bytenr, u64 num_bytes, int is_data) 2646 { 2647 int err = 0; 2648 struct extent_buffer *buf; 2649 2650 if (is_data) 2651 goto pinit; 2652 2653 buf = btrfs_find_tree_block(root, bytenr, num_bytes); 2654 if (!buf) 2655 goto pinit; 2656 2657 /* we can reuse a block if it hasn't been written 2658 * and it is from this transaction. We can't 2659 * reuse anything from the tree log root because 2660 * it has tiny sub-transactions. 2661 */ 2662 if (btrfs_buffer_uptodate(buf, 0) && 2663 btrfs_try_tree_lock(buf)) { 2664 u64 header_owner = btrfs_header_owner(buf); 2665 u64 header_transid = btrfs_header_generation(buf); 2666 if (header_owner != BTRFS_TREE_LOG_OBJECTID && 2667 header_owner != BTRFS_TREE_RELOC_OBJECTID && 2668 header_transid == trans->transid && 2669 !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 2670 clean_tree_block(NULL, root, buf); 2671 btrfs_tree_unlock(buf); 2672 free_extent_buffer(buf); 2673 return 1; 2674 } 2675 btrfs_tree_unlock(buf); 2676 } 2677 free_extent_buffer(buf); 2678 pinit: 2679 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 2680 2681 BUG_ON(err < 0); 2682 return 0; 2683 } 2684 2685 /* 2686 * remove an extent from the root, returns 0 on success 2687 */ 2688 static int __free_extent(struct btrfs_trans_handle *trans, 2689 struct btrfs_root *root, 2690 u64 bytenr, u64 num_bytes, u64 parent, 2691 u64 root_objectid, u64 ref_generation, 2692 u64 owner_objectid, int pin, int mark_free) 2693 { 2694 struct btrfs_path *path; 2695 struct btrfs_key key; 2696 struct btrfs_fs_info *info = root->fs_info; 2697 struct btrfs_root *extent_root = info->extent_root; 2698 struct extent_buffer *leaf; 2699 int ret; 2700 int extent_slot = 0; 2701 int found_extent = 0; 2702 int num_to_del = 1; 2703 struct btrfs_extent_item *ei; 2704 u32 refs; 2705 2706 key.objectid = bytenr; 2707 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 2708 key.offset = num_bytes; 2709 path = btrfs_alloc_path(); 2710 if (!path) 2711 return -ENOMEM; 2712 2713 path->reada = 1; 2714 ret = lookup_extent_backref(trans, extent_root, path, 2715 bytenr, parent, root_objectid, 2716 ref_generation, owner_objectid, 1); 2717 if (ret == 0) { 2718 struct btrfs_key found_key; 2719 extent_slot = path->slots[0]; 2720 while (extent_slot > 0) { 2721 extent_slot--; 2722 btrfs_item_key_to_cpu(path->nodes[0], &found_key, 2723 extent_slot); 2724 if (found_key.objectid != bytenr) 2725 break; 2726 if (found_key.type == BTRFS_EXTENT_ITEM_KEY && 2727 found_key.offset == num_bytes) { 2728 found_extent = 1; 2729 break; 2730 } 2731 if (path->slots[0] - extent_slot > 5) 2732 break; 2733 } 2734 if (!found_extent) { 2735 ret = remove_extent_backref(trans, extent_root, path); 2736 BUG_ON(ret); 2737 btrfs_release_path(extent_root, path); 2738 ret = btrfs_search_slot(trans, extent_root, 2739 &key, path, -1, 1); 2740 if (ret) { 2741 printk(KERN_ERR "umm, got %d back from search" 2742 ", was looking for %llu\n", ret, 2743 (unsigned long long)bytenr); 2744 btrfs_print_leaf(extent_root, path->nodes[0]); 2745 } 2746 BUG_ON(ret); 2747 extent_slot = path->slots[0]; 2748 } 2749 } else { 2750 btrfs_print_leaf(extent_root, path->nodes[0]); 2751 WARN_ON(1); 2752 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 2753 "root %llu gen %llu owner %llu\n", 2754 (unsigned long long)bytenr, 2755 (unsigned long long)root_objectid, 2756 (unsigned long long)ref_generation, 2757 (unsigned long long)owner_objectid); 2758 } 2759 2760 leaf = path->nodes[0]; 2761 ei = btrfs_item_ptr(leaf, extent_slot, 2762 struct btrfs_extent_item); 2763 refs = btrfs_extent_refs(leaf, ei); 2764 BUG_ON(refs == 0); 2765 refs -= 1; 2766 btrfs_set_extent_refs(leaf, ei, refs); 2767 2768 btrfs_mark_buffer_dirty(leaf); 2769 2770 if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { 2771 struct btrfs_extent_ref *ref; 2772 ref = btrfs_item_ptr(leaf, path->slots[0], 2773 struct btrfs_extent_ref); 2774 BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); 2775 /* if the back ref and the extent are next to each other 2776 * they get deleted below in one shot 2777 */ 2778 path->slots[0] = extent_slot; 2779 num_to_del = 2; 2780 } else if (found_extent) { 2781 /* otherwise delete the extent back ref */ 2782 ret = remove_extent_backref(trans, extent_root, path); 2783 BUG_ON(ret); 2784 /* if refs are 0, we need to setup the path for deletion */ 2785 if (refs == 0) { 2786 btrfs_release_path(extent_root, path); 2787 ret = btrfs_search_slot(trans, extent_root, &key, path, 2788 -1, 1); 2789 BUG_ON(ret); 2790 } 2791 } 2792 2793 if (refs == 0) { 2794 u64 super_used; 2795 u64 root_used; 2796 2797 if (pin) { 2798 mutex_lock(&root->fs_info->pinned_mutex); 2799 ret = pin_down_bytes(trans, root, bytenr, num_bytes, 2800 owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); 2801 mutex_unlock(&root->fs_info->pinned_mutex); 2802 if (ret > 0) 2803 mark_free = 1; 2804 BUG_ON(ret < 0); 2805 } 2806 /* block accounting for super block */ 2807 spin_lock(&info->delalloc_lock); 2808 super_used = btrfs_super_bytes_used(&info->super_copy); 2809 btrfs_set_super_bytes_used(&info->super_copy, 2810 super_used - num_bytes); 2811 2812 /* block accounting for root item */ 2813 root_used = btrfs_root_used(&root->root_item); 2814 btrfs_set_root_used(&root->root_item, 2815 root_used - num_bytes); 2816 spin_unlock(&info->delalloc_lock); 2817 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 2818 num_to_del); 2819 BUG_ON(ret); 2820 btrfs_release_path(extent_root, path); 2821 2822 if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 2823 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 2824 BUG_ON(ret); 2825 } 2826 2827 ret = update_block_group(trans, root, bytenr, num_bytes, 0, 2828 mark_free); 2829 BUG_ON(ret); 2830 } 2831 btrfs_free_path(path); 2832 finish_current_insert(trans, extent_root, 0); 2833 return ret; 2834 } 2835 2836 /* 2837 * find all the blocks marked as pending in the radix tree and remove 2838 * them from the extent map 2839 */ 2840 static int del_pending_extents(struct btrfs_trans_handle *trans, 2841 struct btrfs_root *extent_root, int all) 2842 { 2843 int ret; 2844 int err = 0; 2845 u64 start; 2846 u64 end; 2847 u64 priv; 2848 u64 search = 0; 2849 int nr = 0, skipped = 0; 2850 struct extent_io_tree *pending_del; 2851 struct extent_io_tree *extent_ins; 2852 struct pending_extent_op *extent_op; 2853 struct btrfs_fs_info *info = extent_root->fs_info; 2854 struct list_head delete_list; 2855 2856 INIT_LIST_HEAD(&delete_list); 2857 extent_ins = &extent_root->fs_info->extent_ins; 2858 pending_del = &extent_root->fs_info->pending_del; 2859 2860 again: 2861 mutex_lock(&info->extent_ins_mutex); 2862 while (1) { 2863 ret = find_first_extent_bit(pending_del, search, &start, &end, 2864 EXTENT_WRITEBACK); 2865 if (ret) { 2866 if (all && skipped && !nr) { 2867 search = 0; 2868 skipped = 0; 2869 continue; 2870 } 2871 mutex_unlock(&info->extent_ins_mutex); 2872 break; 2873 } 2874 2875 ret = try_lock_extent(extent_ins, start, end, GFP_NOFS); 2876 if (!ret) { 2877 search = end+1; 2878 skipped = 1; 2879 2880 if (need_resched()) { 2881 mutex_unlock(&info->extent_ins_mutex); 2882 cond_resched(); 2883 mutex_lock(&info->extent_ins_mutex); 2884 } 2885 2886 continue; 2887 } 2888 BUG_ON(ret < 0); 2889 2890 ret = get_state_private(pending_del, start, &priv); 2891 BUG_ON(ret); 2892 extent_op = (struct pending_extent_op *)(unsigned long)priv; 2893 2894 clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK, 2895 GFP_NOFS); 2896 if (!test_range_bit(extent_ins, start, end, 2897 EXTENT_WRITEBACK, 0)) { 2898 list_add_tail(&extent_op->list, &delete_list); 2899 nr++; 2900 } else { 2901 kfree(extent_op); 2902 2903 ret = get_state_private(&info->extent_ins, start, 2904 &priv); 2905 BUG_ON(ret); 2906 extent_op = (struct pending_extent_op *) 2907 (unsigned long)priv; 2908 2909 clear_extent_bits(&info->extent_ins, start, end, 2910 EXTENT_WRITEBACK, GFP_NOFS); 2911 2912 if (extent_op->type == PENDING_BACKREF_UPDATE) { 2913 list_add_tail(&extent_op->list, &delete_list); 2914 search = end + 1; 2915 nr++; 2916 continue; 2917 } 2918 2919 mutex_lock(&extent_root->fs_info->pinned_mutex); 2920 ret = pin_down_bytes(trans, extent_root, start, 2921 end + 1 - start, 0); 2922 mutex_unlock(&extent_root->fs_info->pinned_mutex); 2923 2924 ret = update_block_group(trans, extent_root, start, 2925 end + 1 - start, 0, ret > 0); 2926 2927 unlock_extent(extent_ins, start, end, GFP_NOFS); 2928 BUG_ON(ret); 2929 kfree(extent_op); 2930 } 2931 if (ret) 2932 err = ret; 2933 2934 search = end + 1; 2935 2936 if (need_resched()) { 2937 mutex_unlock(&info->extent_ins_mutex); 2938 cond_resched(); 2939 mutex_lock(&info->extent_ins_mutex); 2940 } 2941 } 2942 2943 if (nr) { 2944 ret = free_extents(trans, extent_root, &delete_list); 2945 BUG_ON(ret); 2946 } 2947 2948 if (all && skipped) { 2949 INIT_LIST_HEAD(&delete_list); 2950 search = 0; 2951 nr = 0; 2952 goto again; 2953 } 2954 2955 if (!err) 2956 finish_current_insert(trans, extent_root, 0); 2957 return err; 2958 } 2959 2960 /* 2961 * remove an extent from the root, returns 0 on success 2962 */ 2963 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 2964 struct btrfs_root *root, 2965 u64 bytenr, u64 num_bytes, u64 parent, 2966 u64 root_objectid, u64 ref_generation, 2967 u64 owner_objectid, int pin) 2968 { 2969 struct btrfs_root *extent_root = root->fs_info->extent_root; 2970 int pending_ret; 2971 int ret; 2972 2973 WARN_ON(num_bytes < root->sectorsize); 2974 if (root == extent_root) { 2975 struct pending_extent_op *extent_op = NULL; 2976 2977 mutex_lock(&root->fs_info->extent_ins_mutex); 2978 if (test_range_bit(&root->fs_info->extent_ins, bytenr, 2979 bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { 2980 u64 priv; 2981 ret = get_state_private(&root->fs_info->extent_ins, 2982 bytenr, &priv); 2983 BUG_ON(ret); 2984 extent_op = (struct pending_extent_op *) 2985 (unsigned long)priv; 2986 2987 extent_op->del = 1; 2988 if (extent_op->type == PENDING_EXTENT_INSERT) { 2989 mutex_unlock(&root->fs_info->extent_ins_mutex); 2990 return 0; 2991 } 2992 } 2993 2994 if (extent_op) { 2995 ref_generation = extent_op->orig_generation; 2996 parent = extent_op->orig_parent; 2997 } 2998 2999 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 3000 BUG_ON(!extent_op); 3001 3002 extent_op->type = PENDING_EXTENT_DELETE; 3003 extent_op->bytenr = bytenr; 3004 extent_op->num_bytes = num_bytes; 3005 extent_op->parent = parent; 3006 extent_op->orig_parent = parent; 3007 extent_op->generation = ref_generation; 3008 extent_op->orig_generation = ref_generation; 3009 extent_op->level = (int)owner_objectid; 3010 INIT_LIST_HEAD(&extent_op->list); 3011 extent_op->del = 0; 3012 3013 set_extent_bits(&root->fs_info->pending_del, 3014 bytenr, bytenr + num_bytes - 1, 3015 EXTENT_WRITEBACK, GFP_NOFS); 3016 set_state_private(&root->fs_info->pending_del, 3017 bytenr, (unsigned long)extent_op); 3018 mutex_unlock(&root->fs_info->extent_ins_mutex); 3019 return 0; 3020 } 3021 /* if metadata always pin */ 3022 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 3023 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 3024 mutex_lock(&root->fs_info->pinned_mutex); 3025 btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); 3026 mutex_unlock(&root->fs_info->pinned_mutex); 3027 update_reserved_extents(root, bytenr, num_bytes, 0); 3028 return 0; 3029 } 3030 pin = 1; 3031 } 3032 3033 /* if data pin when any transaction has committed this */ 3034 if (ref_generation != trans->transid) 3035 pin = 1; 3036 3037 ret = __free_extent(trans, root, bytenr, num_bytes, parent, 3038 root_objectid, ref_generation, 3039 owner_objectid, pin, pin == 0); 3040 3041 finish_current_insert(trans, root->fs_info->extent_root, 0); 3042 pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0); 3043 return ret ? ret : pending_ret; 3044 } 3045 3046 int btrfs_free_extent(struct btrfs_trans_handle *trans, 3047 struct btrfs_root *root, 3048 u64 bytenr, u64 num_bytes, u64 parent, 3049 u64 root_objectid, u64 ref_generation, 3050 u64 owner_objectid, int pin) 3051 { 3052 int ret; 3053 3054 ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, 3055 root_objectid, ref_generation, 3056 owner_objectid, pin); 3057 return ret; 3058 } 3059 3060 static u64 stripe_align(struct btrfs_root *root, u64 val) 3061 { 3062 u64 mask = ((u64)root->stripesize - 1); 3063 u64 ret = (val + mask) & ~mask; 3064 return ret; 3065 } 3066 3067 /* 3068 * walks the btree of allocated extents and find a hole of a given size. 3069 * The key ins is changed to record the hole: 3070 * ins->objectid == block start 3071 * ins->flags = BTRFS_EXTENT_ITEM_KEY 3072 * ins->offset == number of blocks 3073 * Any available blocks before search_start are skipped. 3074 */ 3075 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 3076 struct btrfs_root *orig_root, 3077 u64 num_bytes, u64 empty_size, 3078 u64 search_start, u64 search_end, 3079 u64 hint_byte, struct btrfs_key *ins, 3080 u64 exclude_start, u64 exclude_nr, 3081 int data) 3082 { 3083 int ret = 0; 3084 struct btrfs_root *root = orig_root->fs_info->extent_root; 3085 u64 total_needed = num_bytes; 3086 u64 *last_ptr = NULL; 3087 u64 last_wanted = 0; 3088 struct btrfs_block_group_cache *block_group = NULL; 3089 int chunk_alloc_done = 0; 3090 int empty_cluster = 2 * 1024 * 1024; 3091 int allowed_chunk_alloc = 0; 3092 struct list_head *head = NULL, *cur = NULL; 3093 int loop = 0; 3094 int extra_loop = 0; 3095 struct btrfs_space_info *space_info; 3096 3097 WARN_ON(num_bytes < root->sectorsize); 3098 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 3099 ins->objectid = 0; 3100 ins->offset = 0; 3101 3102 if (orig_root->ref_cows || empty_size) 3103 allowed_chunk_alloc = 1; 3104 3105 if (data & BTRFS_BLOCK_GROUP_METADATA) { 3106 last_ptr = &root->fs_info->last_alloc; 3107 if (!btrfs_test_opt(root, SSD)) 3108 empty_cluster = 64 * 1024; 3109 } 3110 3111 if ((data & BTRFS_BLOCK_GROUP_DATA) && btrfs_test_opt(root, SSD)) 3112 last_ptr = &root->fs_info->last_data_alloc; 3113 3114 if (last_ptr) { 3115 if (*last_ptr) { 3116 hint_byte = *last_ptr; 3117 last_wanted = *last_ptr; 3118 } else 3119 empty_size += empty_cluster; 3120 } else { 3121 empty_cluster = 0; 3122 } 3123 search_start = max(search_start, first_logical_byte(root, 0)); 3124 search_start = max(search_start, hint_byte); 3125 3126 if (last_wanted && search_start != last_wanted) { 3127 last_wanted = 0; 3128 empty_size += empty_cluster; 3129 } 3130 3131 total_needed += empty_size; 3132 block_group = btrfs_lookup_block_group(root->fs_info, search_start); 3133 if (!block_group) 3134 block_group = btrfs_lookup_first_block_group(root->fs_info, 3135 search_start); 3136 space_info = __find_space_info(root->fs_info, data); 3137 3138 down_read(&space_info->groups_sem); 3139 while (1) { 3140 struct btrfs_free_space *free_space; 3141 /* 3142 * the only way this happens if our hint points to a block 3143 * group thats not of the proper type, while looping this 3144 * should never happen 3145 */ 3146 if (empty_size) 3147 extra_loop = 1; 3148 3149 if (!block_group) 3150 goto new_group_no_lock; 3151 3152 if (unlikely(!block_group->cached)) { 3153 mutex_lock(&block_group->cache_mutex); 3154 ret = cache_block_group(root, block_group); 3155 mutex_unlock(&block_group->cache_mutex); 3156 if (ret) 3157 break; 3158 } 3159 3160 mutex_lock(&block_group->alloc_mutex); 3161 if (unlikely(!block_group_bits(block_group, data))) 3162 goto new_group; 3163 3164 if (unlikely(block_group->ro)) 3165 goto new_group; 3166 3167 free_space = btrfs_find_free_space(block_group, search_start, 3168 total_needed); 3169 if (free_space) { 3170 u64 start = block_group->key.objectid; 3171 u64 end = block_group->key.objectid + 3172 block_group->key.offset; 3173 3174 search_start = stripe_align(root, free_space->offset); 3175 3176 /* move on to the next group */ 3177 if (search_start + num_bytes >= search_end) 3178 goto new_group; 3179 3180 /* move on to the next group */ 3181 if (search_start + num_bytes > end) 3182 goto new_group; 3183 3184 if (last_wanted && search_start != last_wanted) { 3185 total_needed += empty_cluster; 3186 empty_size += empty_cluster; 3187 last_wanted = 0; 3188 /* 3189 * if search_start is still in this block group 3190 * then we just re-search this block group 3191 */ 3192 if (search_start >= start && 3193 search_start < end) { 3194 mutex_unlock(&block_group->alloc_mutex); 3195 continue; 3196 } 3197 3198 /* else we go to the next block group */ 3199 goto new_group; 3200 } 3201 3202 if (exclude_nr > 0 && 3203 (search_start + num_bytes > exclude_start && 3204 search_start < exclude_start + exclude_nr)) { 3205 search_start = exclude_start + exclude_nr; 3206 /* 3207 * if search_start is still in this block group 3208 * then we just re-search this block group 3209 */ 3210 if (search_start >= start && 3211 search_start < end) { 3212 mutex_unlock(&block_group->alloc_mutex); 3213 last_wanted = 0; 3214 continue; 3215 } 3216 3217 /* else we go to the next block group */ 3218 goto new_group; 3219 } 3220 3221 ins->objectid = search_start; 3222 ins->offset = num_bytes; 3223 3224 btrfs_remove_free_space_lock(block_group, search_start, 3225 num_bytes); 3226 /* we are all good, lets return */ 3227 mutex_unlock(&block_group->alloc_mutex); 3228 break; 3229 } 3230 new_group: 3231 mutex_unlock(&block_group->alloc_mutex); 3232 put_block_group(block_group); 3233 block_group = NULL; 3234 new_group_no_lock: 3235 /* don't try to compare new allocations against the 3236 * last allocation any more 3237 */ 3238 last_wanted = 0; 3239 3240 /* 3241 * Here's how this works. 3242 * loop == 0: we were searching a block group via a hint 3243 * and didn't find anything, so we start at 3244 * the head of the block groups and keep searching 3245 * loop == 1: we're searching through all of the block groups 3246 * if we hit the head again we have searched 3247 * all of the block groups for this space and we 3248 * need to try and allocate, if we cant error out. 3249 * loop == 2: we allocated more space and are looping through 3250 * all of the block groups again. 3251 */ 3252 if (loop == 0) { 3253 head = &space_info->block_groups; 3254 cur = head->next; 3255 loop++; 3256 } else if (loop == 1 && cur == head) { 3257 int keep_going; 3258 3259 /* at this point we give up on the empty_size 3260 * allocations and just try to allocate the min 3261 * space. 3262 * 3263 * The extra_loop field was set if an empty_size 3264 * allocation was attempted above, and if this 3265 * is try we need to try the loop again without 3266 * the additional empty_size. 3267 */ 3268 total_needed -= empty_size; 3269 empty_size = 0; 3270 keep_going = extra_loop; 3271 loop++; 3272 3273 if (allowed_chunk_alloc && !chunk_alloc_done) { 3274 up_read(&space_info->groups_sem); 3275 ret = do_chunk_alloc(trans, root, num_bytes + 3276 2 * 1024 * 1024, data, 1); 3277 down_read(&space_info->groups_sem); 3278 if (ret < 0) 3279 goto loop_check; 3280 head = &space_info->block_groups; 3281 /* 3282 * we've allocated a new chunk, keep 3283 * trying 3284 */ 3285 keep_going = 1; 3286 chunk_alloc_done = 1; 3287 } else if (!allowed_chunk_alloc) { 3288 space_info->force_alloc = 1; 3289 } 3290 loop_check: 3291 if (keep_going) { 3292 cur = head->next; 3293 extra_loop = 0; 3294 } else { 3295 break; 3296 } 3297 } else if (cur == head) { 3298 break; 3299 } 3300 3301 block_group = list_entry(cur, struct btrfs_block_group_cache, 3302 list); 3303 atomic_inc(&block_group->count); 3304 3305 search_start = block_group->key.objectid; 3306 cur = cur->next; 3307 } 3308 3309 /* we found what we needed */ 3310 if (ins->objectid) { 3311 if (!(data & BTRFS_BLOCK_GROUP_DATA)) 3312 trans->block_group = block_group->key.objectid; 3313 3314 if (last_ptr) 3315 *last_ptr = ins->objectid + ins->offset; 3316 ret = 0; 3317 } else if (!ret) { 3318 printk(KERN_ERR "btrfs searching for %llu bytes, " 3319 "num_bytes %llu, loop %d, allowed_alloc %d\n", 3320 (unsigned long long)total_needed, 3321 (unsigned long long)num_bytes, 3322 loop, allowed_chunk_alloc); 3323 ret = -ENOSPC; 3324 } 3325 if (block_group) 3326 put_block_group(block_group); 3327 3328 up_read(&space_info->groups_sem); 3329 return ret; 3330 } 3331 3332 static void dump_space_info(struct btrfs_space_info *info, u64 bytes) 3333 { 3334 struct btrfs_block_group_cache *cache; 3335 3336 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 3337 (unsigned long long)(info->total_bytes - info->bytes_used - 3338 info->bytes_pinned - info->bytes_reserved), 3339 (info->full) ? "" : "not "); 3340 printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," 3341 " may_use=%llu, used=%llu\n", info->total_bytes, 3342 info->bytes_pinned, info->bytes_delalloc, info->bytes_may_use, 3343 info->bytes_used); 3344 3345 down_read(&info->groups_sem); 3346 list_for_each_entry(cache, &info->block_groups, list) { 3347 spin_lock(&cache->lock); 3348 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 3349 "%llu pinned %llu reserved\n", 3350 (unsigned long long)cache->key.objectid, 3351 (unsigned long long)cache->key.offset, 3352 (unsigned long long)btrfs_block_group_used(&cache->item), 3353 (unsigned long long)cache->pinned, 3354 (unsigned long long)cache->reserved); 3355 btrfs_dump_free_space(cache, bytes); 3356 spin_unlock(&cache->lock); 3357 } 3358 up_read(&info->groups_sem); 3359 } 3360 3361 static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3362 struct btrfs_root *root, 3363 u64 num_bytes, u64 min_alloc_size, 3364 u64 empty_size, u64 hint_byte, 3365 u64 search_end, struct btrfs_key *ins, 3366 u64 data) 3367 { 3368 int ret; 3369 u64 search_start = 0; 3370 struct btrfs_fs_info *info = root->fs_info; 3371 3372 data = btrfs_get_alloc_profile(root, data); 3373 again: 3374 /* 3375 * the only place that sets empty_size is btrfs_realloc_node, which 3376 * is not called recursively on allocations 3377 */ 3378 if (empty_size || root->ref_cows) { 3379 if (!(data & BTRFS_BLOCK_GROUP_METADATA)) { 3380 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3381 2 * 1024 * 1024, 3382 BTRFS_BLOCK_GROUP_METADATA | 3383 (info->metadata_alloc_profile & 3384 info->avail_metadata_alloc_bits), 0); 3385 } 3386 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3387 num_bytes + 2 * 1024 * 1024, data, 0); 3388 } 3389 3390 WARN_ON(num_bytes < root->sectorsize); 3391 ret = find_free_extent(trans, root, num_bytes, empty_size, 3392 search_start, search_end, hint_byte, ins, 3393 trans->alloc_exclude_start, 3394 trans->alloc_exclude_nr, data); 3395 3396 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 3397 num_bytes = num_bytes >> 1; 3398 num_bytes = num_bytes & ~(root->sectorsize - 1); 3399 num_bytes = max(num_bytes, min_alloc_size); 3400 do_chunk_alloc(trans, root->fs_info->extent_root, 3401 num_bytes, data, 1); 3402 goto again; 3403 } 3404 if (ret) { 3405 struct btrfs_space_info *sinfo; 3406 3407 sinfo = __find_space_info(root->fs_info, data); 3408 printk(KERN_ERR "btrfs allocation failed flags %llu, " 3409 "wanted %llu\n", (unsigned long long)data, 3410 (unsigned long long)num_bytes); 3411 dump_space_info(sinfo, num_bytes); 3412 BUG(); 3413 } 3414 3415 return ret; 3416 } 3417 3418 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 3419 { 3420 struct btrfs_block_group_cache *cache; 3421 int ret = 0; 3422 3423 cache = btrfs_lookup_block_group(root->fs_info, start); 3424 if (!cache) { 3425 printk(KERN_ERR "Unable to find block group for %llu\n", 3426 (unsigned long long)start); 3427 return -ENOSPC; 3428 } 3429 3430 ret = btrfs_discard_extent(root, start, len); 3431 3432 btrfs_add_free_space(cache, start, len); 3433 put_block_group(cache); 3434 update_reserved_extents(root, start, len, 0); 3435 3436 return ret; 3437 } 3438 3439 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 3440 struct btrfs_root *root, 3441 u64 num_bytes, u64 min_alloc_size, 3442 u64 empty_size, u64 hint_byte, 3443 u64 search_end, struct btrfs_key *ins, 3444 u64 data) 3445 { 3446 int ret; 3447 ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, 3448 empty_size, hint_byte, search_end, ins, 3449 data); 3450 update_reserved_extents(root, ins->objectid, ins->offset, 1); 3451 return ret; 3452 } 3453 3454 static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3455 struct btrfs_root *root, u64 parent, 3456 u64 root_objectid, u64 ref_generation, 3457 u64 owner, struct btrfs_key *ins) 3458 { 3459 int ret; 3460 int pending_ret; 3461 u64 super_used; 3462 u64 root_used; 3463 u64 num_bytes = ins->offset; 3464 u32 sizes[2]; 3465 struct btrfs_fs_info *info = root->fs_info; 3466 struct btrfs_root *extent_root = info->extent_root; 3467 struct btrfs_extent_item *extent_item; 3468 struct btrfs_extent_ref *ref; 3469 struct btrfs_path *path; 3470 struct btrfs_key keys[2]; 3471 3472 if (parent == 0) 3473 parent = ins->objectid; 3474 3475 /* block accounting for super block */ 3476 spin_lock(&info->delalloc_lock); 3477 super_used = btrfs_super_bytes_used(&info->super_copy); 3478 btrfs_set_super_bytes_used(&info->super_copy, super_used + num_bytes); 3479 3480 /* block accounting for root item */ 3481 root_used = btrfs_root_used(&root->root_item); 3482 btrfs_set_root_used(&root->root_item, root_used + num_bytes); 3483 spin_unlock(&info->delalloc_lock); 3484 3485 if (root == extent_root) { 3486 struct pending_extent_op *extent_op; 3487 3488 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 3489 BUG_ON(!extent_op); 3490 3491 extent_op->type = PENDING_EXTENT_INSERT; 3492 extent_op->bytenr = ins->objectid; 3493 extent_op->num_bytes = ins->offset; 3494 extent_op->parent = parent; 3495 extent_op->orig_parent = 0; 3496 extent_op->generation = ref_generation; 3497 extent_op->orig_generation = 0; 3498 extent_op->level = (int)owner; 3499 INIT_LIST_HEAD(&extent_op->list); 3500 extent_op->del = 0; 3501 3502 mutex_lock(&root->fs_info->extent_ins_mutex); 3503 set_extent_bits(&root->fs_info->extent_ins, ins->objectid, 3504 ins->objectid + ins->offset - 1, 3505 EXTENT_WRITEBACK, GFP_NOFS); 3506 set_state_private(&root->fs_info->extent_ins, 3507 ins->objectid, (unsigned long)extent_op); 3508 mutex_unlock(&root->fs_info->extent_ins_mutex); 3509 goto update_block; 3510 } 3511 3512 memcpy(&keys[0], ins, sizeof(*ins)); 3513 keys[1].objectid = ins->objectid; 3514 keys[1].type = BTRFS_EXTENT_REF_KEY; 3515 keys[1].offset = parent; 3516 sizes[0] = sizeof(*extent_item); 3517 sizes[1] = sizeof(*ref); 3518 3519 path = btrfs_alloc_path(); 3520 BUG_ON(!path); 3521 3522 ret = btrfs_insert_empty_items(trans, extent_root, path, keys, 3523 sizes, 2); 3524 BUG_ON(ret); 3525 3526 extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], 3527 struct btrfs_extent_item); 3528 btrfs_set_extent_refs(path->nodes[0], extent_item, 1); 3529 ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, 3530 struct btrfs_extent_ref); 3531 3532 btrfs_set_ref_root(path->nodes[0], ref, root_objectid); 3533 btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); 3534 btrfs_set_ref_objectid(path->nodes[0], ref, owner); 3535 btrfs_set_ref_num_refs(path->nodes[0], ref, 1); 3536 3537 btrfs_mark_buffer_dirty(path->nodes[0]); 3538 3539 trans->alloc_exclude_start = 0; 3540 trans->alloc_exclude_nr = 0; 3541 btrfs_free_path(path); 3542 finish_current_insert(trans, extent_root, 0); 3543 pending_ret = del_pending_extents(trans, extent_root, 0); 3544 3545 if (ret) 3546 goto out; 3547 if (pending_ret) { 3548 ret = pending_ret; 3549 goto out; 3550 } 3551 3552 update_block: 3553 ret = update_block_group(trans, root, ins->objectid, 3554 ins->offset, 1, 0); 3555 if (ret) { 3556 printk(KERN_ERR "btrfs update block group failed for %llu " 3557 "%llu\n", (unsigned long long)ins->objectid, 3558 (unsigned long long)ins->offset); 3559 BUG(); 3560 } 3561 out: 3562 return ret; 3563 } 3564 3565 int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, 3566 struct btrfs_root *root, u64 parent, 3567 u64 root_objectid, u64 ref_generation, 3568 u64 owner, struct btrfs_key *ins) 3569 { 3570 int ret; 3571 3572 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) 3573 return 0; 3574 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 3575 ref_generation, owner, ins); 3576 update_reserved_extents(root, ins->objectid, ins->offset, 0); 3577 return ret; 3578 } 3579 3580 /* 3581 * this is used by the tree logging recovery code. It records that 3582 * an extent has been allocated and makes sure to clear the free 3583 * space cache bits as well 3584 */ 3585 int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, 3586 struct btrfs_root *root, u64 parent, 3587 u64 root_objectid, u64 ref_generation, 3588 u64 owner, struct btrfs_key *ins) 3589 { 3590 int ret; 3591 struct btrfs_block_group_cache *block_group; 3592 3593 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 3594 mutex_lock(&block_group->cache_mutex); 3595 cache_block_group(root, block_group); 3596 mutex_unlock(&block_group->cache_mutex); 3597 3598 ret = btrfs_remove_free_space(block_group, ins->objectid, 3599 ins->offset); 3600 BUG_ON(ret); 3601 put_block_group(block_group); 3602 ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, 3603 ref_generation, owner, ins); 3604 return ret; 3605 } 3606 3607 /* 3608 * finds a free extent and does all the dirty work required for allocation 3609 * returns the key for the extent through ins, and a tree buffer for 3610 * the first block of the extent through buf. 3611 * 3612 * returns 0 if everything worked, non-zero otherwise. 3613 */ 3614 int btrfs_alloc_extent(struct btrfs_trans_handle *trans, 3615 struct btrfs_root *root, 3616 u64 num_bytes, u64 parent, u64 min_alloc_size, 3617 u64 root_objectid, u64 ref_generation, 3618 u64 owner_objectid, u64 empty_size, u64 hint_byte, 3619 u64 search_end, struct btrfs_key *ins, u64 data) 3620 { 3621 int ret; 3622 3623 ret = __btrfs_reserve_extent(trans, root, num_bytes, 3624 min_alloc_size, empty_size, hint_byte, 3625 search_end, ins, data); 3626 BUG_ON(ret); 3627 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 3628 ret = __btrfs_alloc_reserved_extent(trans, root, parent, 3629 root_objectid, ref_generation, 3630 owner_objectid, ins); 3631 BUG_ON(ret); 3632 3633 } else { 3634 update_reserved_extents(root, ins->objectid, ins->offset, 1); 3635 } 3636 return ret; 3637 } 3638 3639 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 3640 struct btrfs_root *root, 3641 u64 bytenr, u32 blocksize, 3642 int level) 3643 { 3644 struct extent_buffer *buf; 3645 3646 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 3647 if (!buf) 3648 return ERR_PTR(-ENOMEM); 3649 btrfs_set_header_generation(buf, trans->transid); 3650 btrfs_set_buffer_lockdep_class(buf, level); 3651 btrfs_tree_lock(buf); 3652 clean_tree_block(trans, root, buf); 3653 3654 btrfs_set_lock_blocking(buf); 3655 btrfs_set_buffer_uptodate(buf); 3656 3657 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 3658 set_extent_dirty(&root->dirty_log_pages, buf->start, 3659 buf->start + buf->len - 1, GFP_NOFS); 3660 } else { 3661 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 3662 buf->start + buf->len - 1, GFP_NOFS); 3663 } 3664 trans->blocks_used++; 3665 /* this returns a buffer locked for blocking */ 3666 return buf; 3667 } 3668 3669 /* 3670 * helper function to allocate a block for a given tree 3671 * returns the tree buffer or NULL. 3672 */ 3673 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 3674 struct btrfs_root *root, 3675 u32 blocksize, u64 parent, 3676 u64 root_objectid, 3677 u64 ref_generation, 3678 int level, 3679 u64 hint, 3680 u64 empty_size) 3681 { 3682 struct btrfs_key ins; 3683 int ret; 3684 struct extent_buffer *buf; 3685 3686 ret = btrfs_alloc_extent(trans, root, blocksize, parent, blocksize, 3687 root_objectid, ref_generation, level, 3688 empty_size, hint, (u64)-1, &ins, 0); 3689 if (ret) { 3690 BUG_ON(ret > 0); 3691 return ERR_PTR(ret); 3692 } 3693 3694 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 3695 blocksize, level); 3696 return buf; 3697 } 3698 3699 int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 3700 struct btrfs_root *root, struct extent_buffer *leaf) 3701 { 3702 u64 leaf_owner; 3703 u64 leaf_generation; 3704 struct refsort *sorted; 3705 struct btrfs_key key; 3706 struct btrfs_file_extent_item *fi; 3707 int i; 3708 int nritems; 3709 int ret; 3710 int refi = 0; 3711 int slot; 3712 3713 BUG_ON(!btrfs_is_leaf(leaf)); 3714 nritems = btrfs_header_nritems(leaf); 3715 leaf_owner = btrfs_header_owner(leaf); 3716 leaf_generation = btrfs_header_generation(leaf); 3717 3718 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); 3719 /* we do this loop twice. The first time we build a list 3720 * of the extents we have a reference on, then we sort the list 3721 * by bytenr. The second time around we actually do the 3722 * extent freeing. 3723 */ 3724 for (i = 0; i < nritems; i++) { 3725 u64 disk_bytenr; 3726 cond_resched(); 3727 3728 btrfs_item_key_to_cpu(leaf, &key, i); 3729 3730 /* only extents have references, skip everything else */ 3731 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 3732 continue; 3733 3734 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 3735 3736 /* inline extents live in the btree, they don't have refs */ 3737 if (btrfs_file_extent_type(leaf, fi) == 3738 BTRFS_FILE_EXTENT_INLINE) 3739 continue; 3740 3741 disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 3742 3743 /* holes don't have refs */ 3744 if (disk_bytenr == 0) 3745 continue; 3746 3747 sorted[refi].bytenr = disk_bytenr; 3748 sorted[refi].slot = i; 3749 refi++; 3750 } 3751 3752 if (refi == 0) 3753 goto out; 3754 3755 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 3756 3757 for (i = 0; i < refi; i++) { 3758 u64 disk_bytenr; 3759 3760 disk_bytenr = sorted[i].bytenr; 3761 slot = sorted[i].slot; 3762 3763 cond_resched(); 3764 3765 btrfs_item_key_to_cpu(leaf, &key, slot); 3766 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 3767 continue; 3768 3769 fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); 3770 3771 ret = __btrfs_free_extent(trans, root, disk_bytenr, 3772 btrfs_file_extent_disk_num_bytes(leaf, fi), 3773 leaf->start, leaf_owner, leaf_generation, 3774 key.objectid, 0); 3775 BUG_ON(ret); 3776 3777 atomic_inc(&root->fs_info->throttle_gen); 3778 wake_up(&root->fs_info->transaction_throttle); 3779 cond_resched(); 3780 } 3781 out: 3782 kfree(sorted); 3783 return 0; 3784 } 3785 3786 static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, 3787 struct btrfs_root *root, 3788 struct btrfs_leaf_ref *ref) 3789 { 3790 int i; 3791 int ret; 3792 struct btrfs_extent_info *info; 3793 struct refsort *sorted; 3794 3795 if (ref->nritems == 0) 3796 return 0; 3797 3798 sorted = kmalloc(sizeof(*sorted) * ref->nritems, GFP_NOFS); 3799 for (i = 0; i < ref->nritems; i++) { 3800 sorted[i].bytenr = ref->extents[i].bytenr; 3801 sorted[i].slot = i; 3802 } 3803 sort(sorted, ref->nritems, sizeof(struct refsort), refsort_cmp, NULL); 3804 3805 /* 3806 * the items in the ref were sorted when the ref was inserted 3807 * into the ref cache, so this is already in order 3808 */ 3809 for (i = 0; i < ref->nritems; i++) { 3810 info = ref->extents + sorted[i].slot; 3811 ret = __btrfs_free_extent(trans, root, info->bytenr, 3812 info->num_bytes, ref->bytenr, 3813 ref->owner, ref->generation, 3814 info->objectid, 0); 3815 3816 atomic_inc(&root->fs_info->throttle_gen); 3817 wake_up(&root->fs_info->transaction_throttle); 3818 cond_resched(); 3819 3820 BUG_ON(ret); 3821 info++; 3822 } 3823 3824 kfree(sorted); 3825 return 0; 3826 } 3827 3828 static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, 3829 u64 len, u32 *refs) 3830 { 3831 int ret; 3832 3833 ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); 3834 BUG_ON(ret); 3835 3836 #if 0 /* some debugging code in case we see problems here */ 3837 /* if the refs count is one, it won't get increased again. But 3838 * if the ref count is > 1, someone may be decreasing it at 3839 * the same time we are. 3840 */ 3841 if (*refs != 1) { 3842 struct extent_buffer *eb = NULL; 3843 eb = btrfs_find_create_tree_block(root, start, len); 3844 if (eb) 3845 btrfs_tree_lock(eb); 3846 3847 mutex_lock(&root->fs_info->alloc_mutex); 3848 ret = lookup_extent_ref(NULL, root, start, len, refs); 3849 BUG_ON(ret); 3850 mutex_unlock(&root->fs_info->alloc_mutex); 3851 3852 if (eb) { 3853 btrfs_tree_unlock(eb); 3854 free_extent_buffer(eb); 3855 } 3856 if (*refs == 1) { 3857 printk(KERN_ERR "btrfs block %llu went down to one " 3858 "during drop_snap\n", (unsigned long long)start); 3859 } 3860 3861 } 3862 #endif 3863 3864 cond_resched(); 3865 return ret; 3866 } 3867 3868 /* 3869 * this is used while deleting old snapshots, and it drops the refs 3870 * on a whole subtree starting from a level 1 node. 3871 * 3872 * The idea is to sort all the leaf pointers, and then drop the 3873 * ref on all the leaves in order. Most of the time the leaves 3874 * will have ref cache entries, so no leaf IOs will be required to 3875 * find the extents they have references on. 3876 * 3877 * For each leaf, any references it has are also dropped in order 3878 * 3879 * This ends up dropping the references in something close to optimal 3880 * order for reading and modifying the extent allocation tree. 3881 */ 3882 static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, 3883 struct btrfs_root *root, 3884 struct btrfs_path *path) 3885 { 3886 u64 bytenr; 3887 u64 root_owner; 3888 u64 root_gen; 3889 struct extent_buffer *eb = path->nodes[1]; 3890 struct extent_buffer *leaf; 3891 struct btrfs_leaf_ref *ref; 3892 struct refsort *sorted = NULL; 3893 int nritems = btrfs_header_nritems(eb); 3894 int ret; 3895 int i; 3896 int refi = 0; 3897 int slot = path->slots[1]; 3898 u32 blocksize = btrfs_level_size(root, 0); 3899 u32 refs; 3900 3901 if (nritems == 0) 3902 goto out; 3903 3904 root_owner = btrfs_header_owner(eb); 3905 root_gen = btrfs_header_generation(eb); 3906 sorted = kmalloc(sizeof(*sorted) * nritems, GFP_NOFS); 3907 3908 /* 3909 * step one, sort all the leaf pointers so we don't scribble 3910 * randomly into the extent allocation tree 3911 */ 3912 for (i = slot; i < nritems; i++) { 3913 sorted[refi].bytenr = btrfs_node_blockptr(eb, i); 3914 sorted[refi].slot = i; 3915 refi++; 3916 } 3917 3918 /* 3919 * nritems won't be zero, but if we're picking up drop_snapshot 3920 * after a crash, slot might be > 0, so double check things 3921 * just in case. 3922 */ 3923 if (refi == 0) 3924 goto out; 3925 3926 sort(sorted, refi, sizeof(struct refsort), refsort_cmp, NULL); 3927 3928 /* 3929 * the first loop frees everything the leaves point to 3930 */ 3931 for (i = 0; i < refi; i++) { 3932 u64 ptr_gen; 3933 3934 bytenr = sorted[i].bytenr; 3935 3936 /* 3937 * check the reference count on this leaf. If it is > 1 3938 * we just decrement it below and don't update any 3939 * of the refs the leaf points to. 3940 */ 3941 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 3942 BUG_ON(ret); 3943 if (refs != 1) 3944 continue; 3945 3946 ptr_gen = btrfs_node_ptr_generation(eb, sorted[i].slot); 3947 3948 /* 3949 * the leaf only had one reference, which means the 3950 * only thing pointing to this leaf is the snapshot 3951 * we're deleting. It isn't possible for the reference 3952 * count to increase again later 3953 * 3954 * The reference cache is checked for the leaf, 3955 * and if found we'll be able to drop any refs held by 3956 * the leaf without needing to read it in. 3957 */ 3958 ref = btrfs_lookup_leaf_ref(root, bytenr); 3959 if (ref && ref->generation != ptr_gen) { 3960 btrfs_free_leaf_ref(root, ref); 3961 ref = NULL; 3962 } 3963 if (ref) { 3964 ret = cache_drop_leaf_ref(trans, root, ref); 3965 BUG_ON(ret); 3966 btrfs_remove_leaf_ref(root, ref); 3967 btrfs_free_leaf_ref(root, ref); 3968 } else { 3969 /* 3970 * the leaf wasn't in the reference cache, so 3971 * we have to read it. 3972 */ 3973 leaf = read_tree_block(root, bytenr, blocksize, 3974 ptr_gen); 3975 ret = btrfs_drop_leaf_ref(trans, root, leaf); 3976 BUG_ON(ret); 3977 free_extent_buffer(leaf); 3978 } 3979 atomic_inc(&root->fs_info->throttle_gen); 3980 wake_up(&root->fs_info->transaction_throttle); 3981 cond_resched(); 3982 } 3983 3984 /* 3985 * run through the loop again to free the refs on the leaves. 3986 * This is faster than doing it in the loop above because 3987 * the leaves are likely to be clustered together. We end up 3988 * working in nice chunks on the extent allocation tree. 3989 */ 3990 for (i = 0; i < refi; i++) { 3991 bytenr = sorted[i].bytenr; 3992 ret = __btrfs_free_extent(trans, root, bytenr, 3993 blocksize, eb->start, 3994 root_owner, root_gen, 0, 1); 3995 BUG_ON(ret); 3996 3997 atomic_inc(&root->fs_info->throttle_gen); 3998 wake_up(&root->fs_info->transaction_throttle); 3999 cond_resched(); 4000 } 4001 out: 4002 kfree(sorted); 4003 4004 /* 4005 * update the path to show we've processed the entire level 1 4006 * node. This will get saved into the root's drop_snapshot_progress 4007 * field so these drops are not repeated again if this transaction 4008 * commits. 4009 */ 4010 path->slots[1] = nritems; 4011 return 0; 4012 } 4013 4014 /* 4015 * helper function for drop_snapshot, this walks down the tree dropping ref 4016 * counts as it goes. 4017 */ 4018 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 4019 struct btrfs_root *root, 4020 struct btrfs_path *path, int *level) 4021 { 4022 u64 root_owner; 4023 u64 root_gen; 4024 u64 bytenr; 4025 u64 ptr_gen; 4026 struct extent_buffer *next; 4027 struct extent_buffer *cur; 4028 struct extent_buffer *parent; 4029 u32 blocksize; 4030 int ret; 4031 u32 refs; 4032 4033 WARN_ON(*level < 0); 4034 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4035 ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, 4036 path->nodes[*level]->len, &refs); 4037 BUG_ON(ret); 4038 if (refs > 1) 4039 goto out; 4040 4041 /* 4042 * walk down to the last node level and free all the leaves 4043 */ 4044 while (*level >= 0) { 4045 WARN_ON(*level < 0); 4046 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4047 cur = path->nodes[*level]; 4048 4049 if (btrfs_header_level(cur) != *level) 4050 WARN_ON(1); 4051 4052 if (path->slots[*level] >= 4053 btrfs_header_nritems(cur)) 4054 break; 4055 4056 /* the new code goes down to level 1 and does all the 4057 * leaves pointed to that node in bulk. So, this check 4058 * for level 0 will always be false. 4059 * 4060 * But, the disk format allows the drop_snapshot_progress 4061 * field in the root to leave things in a state where 4062 * a leaf will need cleaning up here. If someone crashes 4063 * with the old code and then boots with the new code, 4064 * we might find a leaf here. 4065 */ 4066 if (*level == 0) { 4067 ret = btrfs_drop_leaf_ref(trans, root, cur); 4068 BUG_ON(ret); 4069 break; 4070 } 4071 4072 /* 4073 * once we get to level one, process the whole node 4074 * at once, including everything below it. 4075 */ 4076 if (*level == 1) { 4077 ret = drop_level_one_refs(trans, root, path); 4078 BUG_ON(ret); 4079 break; 4080 } 4081 4082 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 4083 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 4084 blocksize = btrfs_level_size(root, *level - 1); 4085 4086 ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); 4087 BUG_ON(ret); 4088 4089 /* 4090 * if there is more than one reference, we don't need 4091 * to read that node to drop any references it has. We 4092 * just drop the ref we hold on that node and move on to the 4093 * next slot in this level. 4094 */ 4095 if (refs != 1) { 4096 parent = path->nodes[*level]; 4097 root_owner = btrfs_header_owner(parent); 4098 root_gen = btrfs_header_generation(parent); 4099 path->slots[*level]++; 4100 4101 ret = __btrfs_free_extent(trans, root, bytenr, 4102 blocksize, parent->start, 4103 root_owner, root_gen, 4104 *level - 1, 1); 4105 BUG_ON(ret); 4106 4107 atomic_inc(&root->fs_info->throttle_gen); 4108 wake_up(&root->fs_info->transaction_throttle); 4109 cond_resched(); 4110 4111 continue; 4112 } 4113 4114 /* 4115 * we need to keep freeing things in the next level down. 4116 * read the block and loop around to process it 4117 */ 4118 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4119 WARN_ON(*level <= 0); 4120 if (path->nodes[*level-1]) 4121 free_extent_buffer(path->nodes[*level-1]); 4122 path->nodes[*level-1] = next; 4123 *level = btrfs_header_level(next); 4124 path->slots[*level] = 0; 4125 cond_resched(); 4126 } 4127 out: 4128 WARN_ON(*level < 0); 4129 WARN_ON(*level >= BTRFS_MAX_LEVEL); 4130 4131 if (path->nodes[*level] == root->node) { 4132 parent = path->nodes[*level]; 4133 bytenr = path->nodes[*level]->start; 4134 } else { 4135 parent = path->nodes[*level + 1]; 4136 bytenr = btrfs_node_blockptr(parent, path->slots[*level + 1]); 4137 } 4138 4139 blocksize = btrfs_level_size(root, *level); 4140 root_owner = btrfs_header_owner(parent); 4141 root_gen = btrfs_header_generation(parent); 4142 4143 /* 4144 * cleanup and free the reference on the last node 4145 * we processed 4146 */ 4147 ret = __btrfs_free_extent(trans, root, bytenr, blocksize, 4148 parent->start, root_owner, root_gen, 4149 *level, 1); 4150 free_extent_buffer(path->nodes[*level]); 4151 path->nodes[*level] = NULL; 4152 4153 *level += 1; 4154 BUG_ON(ret); 4155 4156 cond_resched(); 4157 return 0; 4158 } 4159 4160 /* 4161 * helper function for drop_subtree, this function is similar to 4162 * walk_down_tree. The main difference is that it checks reference 4163 * counts while tree blocks are locked. 4164 */ 4165 static noinline int walk_down_subtree(struct btrfs_trans_handle *trans, 4166 struct btrfs_root *root, 4167 struct btrfs_path *path, int *level) 4168 { 4169 struct extent_buffer *next; 4170 struct extent_buffer *cur; 4171 struct extent_buffer *parent; 4172 u64 bytenr; 4173 u64 ptr_gen; 4174 u32 blocksize; 4175 u32 refs; 4176 int ret; 4177 4178 cur = path->nodes[*level]; 4179 ret = btrfs_lookup_extent_ref(trans, root, cur->start, cur->len, 4180 &refs); 4181 BUG_ON(ret); 4182 if (refs > 1) 4183 goto out; 4184 4185 while (*level >= 0) { 4186 cur = path->nodes[*level]; 4187 if (*level == 0) { 4188 ret = btrfs_drop_leaf_ref(trans, root, cur); 4189 BUG_ON(ret); 4190 clean_tree_block(trans, root, cur); 4191 break; 4192 } 4193 if (path->slots[*level] >= btrfs_header_nritems(cur)) { 4194 clean_tree_block(trans, root, cur); 4195 break; 4196 } 4197 4198 bytenr = btrfs_node_blockptr(cur, path->slots[*level]); 4199 blocksize = btrfs_level_size(root, *level - 1); 4200 ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); 4201 4202 next = read_tree_block(root, bytenr, blocksize, ptr_gen); 4203 btrfs_tree_lock(next); 4204 btrfs_set_lock_blocking(next); 4205 4206 ret = btrfs_lookup_extent_ref(trans, root, bytenr, blocksize, 4207 &refs); 4208 BUG_ON(ret); 4209 if (refs > 1) { 4210 parent = path->nodes[*level]; 4211 ret = btrfs_free_extent(trans, root, bytenr, 4212 blocksize, parent->start, 4213 btrfs_header_owner(parent), 4214 btrfs_header_generation(parent), 4215 *level - 1, 1); 4216 BUG_ON(ret); 4217 path->slots[*level]++; 4218 btrfs_tree_unlock(next); 4219 free_extent_buffer(next); 4220 continue; 4221 } 4222 4223 *level = btrfs_header_level(next); 4224 path->nodes[*level] = next; 4225 path->slots[*level] = 0; 4226 path->locks[*level] = 1; 4227 cond_resched(); 4228 } 4229 out: 4230 parent = path->nodes[*level + 1]; 4231 bytenr = path->nodes[*level]->start; 4232 blocksize = path->nodes[*level]->len; 4233 4234 ret = btrfs_free_extent(trans, root, bytenr, blocksize, 4235 parent->start, btrfs_header_owner(parent), 4236 btrfs_header_generation(parent), *level, 1); 4237 BUG_ON(ret); 4238 4239 if (path->locks[*level]) { 4240 btrfs_tree_unlock(path->nodes[*level]); 4241 path->locks[*level] = 0; 4242 } 4243 free_extent_buffer(path->nodes[*level]); 4244 path->nodes[*level] = NULL; 4245 *level += 1; 4246 cond_resched(); 4247 return 0; 4248 } 4249 4250 /* 4251 * helper for dropping snapshots. This walks back up the tree in the path 4252 * to find the first node higher up where we haven't yet gone through 4253 * all the slots 4254 */ 4255 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 4256 struct btrfs_root *root, 4257 struct btrfs_path *path, 4258 int *level, int max_level) 4259 { 4260 u64 root_owner; 4261 u64 root_gen; 4262 struct btrfs_root_item *root_item = &root->root_item; 4263 int i; 4264 int slot; 4265 int ret; 4266 4267 for (i = *level; i < max_level && path->nodes[i]; i++) { 4268 slot = path->slots[i]; 4269 if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { 4270 struct extent_buffer *node; 4271 struct btrfs_disk_key disk_key; 4272 4273 /* 4274 * there is more work to do in this level. 4275 * Update the drop_progress marker to reflect 4276 * the work we've done so far, and then bump 4277 * the slot number 4278 */ 4279 node = path->nodes[i]; 4280 path->slots[i]++; 4281 *level = i; 4282 WARN_ON(*level == 0); 4283 btrfs_node_key(node, &disk_key, path->slots[i]); 4284 memcpy(&root_item->drop_progress, 4285 &disk_key, sizeof(disk_key)); 4286 root_item->drop_level = i; 4287 return 0; 4288 } else { 4289 struct extent_buffer *parent; 4290 4291 /* 4292 * this whole node is done, free our reference 4293 * on it and go up one level 4294 */ 4295 if (path->nodes[*level] == root->node) 4296 parent = path->nodes[*level]; 4297 else 4298 parent = path->nodes[*level + 1]; 4299 4300 root_owner = btrfs_header_owner(parent); 4301 root_gen = btrfs_header_generation(parent); 4302 4303 clean_tree_block(trans, root, path->nodes[*level]); 4304 ret = btrfs_free_extent(trans, root, 4305 path->nodes[*level]->start, 4306 path->nodes[*level]->len, 4307 parent->start, root_owner, 4308 root_gen, *level, 1); 4309 BUG_ON(ret); 4310 if (path->locks[*level]) { 4311 btrfs_tree_unlock(path->nodes[*level]); 4312 path->locks[*level] = 0; 4313 } 4314 free_extent_buffer(path->nodes[*level]); 4315 path->nodes[*level] = NULL; 4316 *level = i + 1; 4317 } 4318 } 4319 return 1; 4320 } 4321 4322 /* 4323 * drop the reference count on the tree rooted at 'snap'. This traverses 4324 * the tree freeing any blocks that have a ref count of zero after being 4325 * decremented. 4326 */ 4327 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root 4328 *root) 4329 { 4330 int ret = 0; 4331 int wret; 4332 int level; 4333 struct btrfs_path *path; 4334 int i; 4335 int orig_level; 4336 struct btrfs_root_item *root_item = &root->root_item; 4337 4338 WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); 4339 path = btrfs_alloc_path(); 4340 BUG_ON(!path); 4341 4342 level = btrfs_header_level(root->node); 4343 orig_level = level; 4344 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 4345 path->nodes[level] = root->node; 4346 extent_buffer_get(root->node); 4347 path->slots[level] = 0; 4348 } else { 4349 struct btrfs_key key; 4350 struct btrfs_disk_key found_key; 4351 struct extent_buffer *node; 4352 4353 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 4354 level = root_item->drop_level; 4355 path->lowest_level = level; 4356 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 4357 if (wret < 0) { 4358 ret = wret; 4359 goto out; 4360 } 4361 node = path->nodes[level]; 4362 btrfs_node_key(node, &found_key, path->slots[level]); 4363 WARN_ON(memcmp(&found_key, &root_item->drop_progress, 4364 sizeof(found_key))); 4365 /* 4366 * unlock our path, this is safe because only this 4367 * function is allowed to delete this snapshot 4368 */ 4369 for (i = 0; i < BTRFS_MAX_LEVEL; i++) { 4370 if (path->nodes[i] && path->locks[i]) { 4371 path->locks[i] = 0; 4372 btrfs_tree_unlock(path->nodes[i]); 4373 } 4374 } 4375 } 4376 while (1) { 4377 wret = walk_down_tree(trans, root, path, &level); 4378 if (wret > 0) 4379 break; 4380 if (wret < 0) 4381 ret = wret; 4382 4383 wret = walk_up_tree(trans, root, path, &level, 4384 BTRFS_MAX_LEVEL); 4385 if (wret > 0) 4386 break; 4387 if (wret < 0) 4388 ret = wret; 4389 if (trans->transaction->in_commit) { 4390 ret = -EAGAIN; 4391 break; 4392 } 4393 atomic_inc(&root->fs_info->throttle_gen); 4394 wake_up(&root->fs_info->transaction_throttle); 4395 } 4396 for (i = 0; i <= orig_level; i++) { 4397 if (path->nodes[i]) { 4398 free_extent_buffer(path->nodes[i]); 4399 path->nodes[i] = NULL; 4400 } 4401 } 4402 out: 4403 btrfs_free_path(path); 4404 return ret; 4405 } 4406 4407 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 4408 struct btrfs_root *root, 4409 struct extent_buffer *node, 4410 struct extent_buffer *parent) 4411 { 4412 struct btrfs_path *path; 4413 int level; 4414 int parent_level; 4415 int ret = 0; 4416 int wret; 4417 4418 path = btrfs_alloc_path(); 4419 BUG_ON(!path); 4420 4421 BUG_ON(!btrfs_tree_locked(parent)); 4422 parent_level = btrfs_header_level(parent); 4423 extent_buffer_get(parent); 4424 path->nodes[parent_level] = parent; 4425 path->slots[parent_level] = btrfs_header_nritems(parent); 4426 4427 BUG_ON(!btrfs_tree_locked(node)); 4428 level = btrfs_header_level(node); 4429 extent_buffer_get(node); 4430 path->nodes[level] = node; 4431 path->slots[level] = 0; 4432 4433 while (1) { 4434 wret = walk_down_subtree(trans, root, path, &level); 4435 if (wret < 0) 4436 ret = wret; 4437 if (wret != 0) 4438 break; 4439 4440 wret = walk_up_tree(trans, root, path, &level, parent_level); 4441 if (wret < 0) 4442 ret = wret; 4443 if (wret != 0) 4444 break; 4445 } 4446 4447 btrfs_free_path(path); 4448 return ret; 4449 } 4450 4451 static unsigned long calc_ra(unsigned long start, unsigned long last, 4452 unsigned long nr) 4453 { 4454 return min(last, start + nr - 1); 4455 } 4456 4457 static noinline int relocate_inode_pages(struct inode *inode, u64 start, 4458 u64 len) 4459 { 4460 u64 page_start; 4461 u64 page_end; 4462 unsigned long first_index; 4463 unsigned long last_index; 4464 unsigned long i; 4465 struct page *page; 4466 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; 4467 struct file_ra_state *ra; 4468 struct btrfs_ordered_extent *ordered; 4469 unsigned int total_read = 0; 4470 unsigned int total_dirty = 0; 4471 int ret = 0; 4472 4473 ra = kzalloc(sizeof(*ra), GFP_NOFS); 4474 4475 mutex_lock(&inode->i_mutex); 4476 first_index = start >> PAGE_CACHE_SHIFT; 4477 last_index = (start + len - 1) >> PAGE_CACHE_SHIFT; 4478 4479 /* make sure the dirty trick played by the caller work */ 4480 ret = invalidate_inode_pages2_range(inode->i_mapping, 4481 first_index, last_index); 4482 if (ret) 4483 goto out_unlock; 4484 4485 file_ra_state_init(ra, inode->i_mapping); 4486 4487 for (i = first_index ; i <= last_index; i++) { 4488 if (total_read % ra->ra_pages == 0) { 4489 btrfs_force_ra(inode->i_mapping, ra, NULL, i, 4490 calc_ra(i, last_index, ra->ra_pages)); 4491 } 4492 total_read++; 4493 again: 4494 if (((u64)i << PAGE_CACHE_SHIFT) > i_size_read(inode)) 4495 BUG_ON(1); 4496 page = grab_cache_page(inode->i_mapping, i); 4497 if (!page) { 4498 ret = -ENOMEM; 4499 goto out_unlock; 4500 } 4501 if (!PageUptodate(page)) { 4502 btrfs_readpage(NULL, page); 4503 lock_page(page); 4504 if (!PageUptodate(page)) { 4505 unlock_page(page); 4506 page_cache_release(page); 4507 ret = -EIO; 4508 goto out_unlock; 4509 } 4510 } 4511 wait_on_page_writeback(page); 4512 4513 page_start = (u64)page->index << PAGE_CACHE_SHIFT; 4514 page_end = page_start + PAGE_CACHE_SIZE - 1; 4515 lock_extent(io_tree, page_start, page_end, GFP_NOFS); 4516 4517 ordered = btrfs_lookup_ordered_extent(inode, page_start); 4518 if (ordered) { 4519 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4520 unlock_page(page); 4521 page_cache_release(page); 4522 btrfs_start_ordered_extent(inode, ordered, 1); 4523 btrfs_put_ordered_extent(ordered); 4524 goto again; 4525 } 4526 set_page_extent_mapped(page); 4527 4528 if (i == first_index) 4529 set_extent_bits(io_tree, page_start, page_end, 4530 EXTENT_BOUNDARY, GFP_NOFS); 4531 btrfs_set_extent_delalloc(inode, page_start, page_end); 4532 4533 set_page_dirty(page); 4534 total_dirty++; 4535 4536 unlock_extent(io_tree, page_start, page_end, GFP_NOFS); 4537 unlock_page(page); 4538 page_cache_release(page); 4539 } 4540 4541 out_unlock: 4542 kfree(ra); 4543 mutex_unlock(&inode->i_mutex); 4544 balance_dirty_pages_ratelimited_nr(inode->i_mapping, total_dirty); 4545 return ret; 4546 } 4547 4548 static noinline int relocate_data_extent(struct inode *reloc_inode, 4549 struct btrfs_key *extent_key, 4550 u64 offset) 4551 { 4552 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 4553 struct extent_map_tree *em_tree = &BTRFS_I(reloc_inode)->extent_tree; 4554 struct extent_map *em; 4555 u64 start = extent_key->objectid - offset; 4556 u64 end = start + extent_key->offset - 1; 4557 4558 em = alloc_extent_map(GFP_NOFS); 4559 BUG_ON(!em || IS_ERR(em)); 4560 4561 em->start = start; 4562 em->len = extent_key->offset; 4563 em->block_len = extent_key->offset; 4564 em->block_start = extent_key->objectid; 4565 em->bdev = root->fs_info->fs_devices->latest_bdev; 4566 set_bit(EXTENT_FLAG_PINNED, &em->flags); 4567 4568 /* setup extent map to cheat btrfs_readpage */ 4569 lock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 4570 while (1) { 4571 int ret; 4572 spin_lock(&em_tree->lock); 4573 ret = add_extent_mapping(em_tree, em); 4574 spin_unlock(&em_tree->lock); 4575 if (ret != -EEXIST) { 4576 free_extent_map(em); 4577 break; 4578 } 4579 btrfs_drop_extent_cache(reloc_inode, start, end, 0); 4580 } 4581 unlock_extent(&BTRFS_I(reloc_inode)->io_tree, start, end, GFP_NOFS); 4582 4583 return relocate_inode_pages(reloc_inode, start, extent_key->offset); 4584 } 4585 4586 struct btrfs_ref_path { 4587 u64 extent_start; 4588 u64 nodes[BTRFS_MAX_LEVEL]; 4589 u64 root_objectid; 4590 u64 root_generation; 4591 u64 owner_objectid; 4592 u32 num_refs; 4593 int lowest_level; 4594 int current_level; 4595 int shared_level; 4596 4597 struct btrfs_key node_keys[BTRFS_MAX_LEVEL]; 4598 u64 new_nodes[BTRFS_MAX_LEVEL]; 4599 }; 4600 4601 struct disk_extent { 4602 u64 ram_bytes; 4603 u64 disk_bytenr; 4604 u64 disk_num_bytes; 4605 u64 offset; 4606 u64 num_bytes; 4607 u8 compression; 4608 u8 encryption; 4609 u16 other_encoding; 4610 }; 4611 4612 static int is_cowonly_root(u64 root_objectid) 4613 { 4614 if (root_objectid == BTRFS_ROOT_TREE_OBJECTID || 4615 root_objectid == BTRFS_EXTENT_TREE_OBJECTID || 4616 root_objectid == BTRFS_CHUNK_TREE_OBJECTID || 4617 root_objectid == BTRFS_DEV_TREE_OBJECTID || 4618 root_objectid == BTRFS_TREE_LOG_OBJECTID || 4619 root_objectid == BTRFS_CSUM_TREE_OBJECTID) 4620 return 1; 4621 return 0; 4622 } 4623 4624 static noinline int __next_ref_path(struct btrfs_trans_handle *trans, 4625 struct btrfs_root *extent_root, 4626 struct btrfs_ref_path *ref_path, 4627 int first_time) 4628 { 4629 struct extent_buffer *leaf; 4630 struct btrfs_path *path; 4631 struct btrfs_extent_ref *ref; 4632 struct btrfs_key key; 4633 struct btrfs_key found_key; 4634 u64 bytenr; 4635 u32 nritems; 4636 int level; 4637 int ret = 1; 4638 4639 path = btrfs_alloc_path(); 4640 if (!path) 4641 return -ENOMEM; 4642 4643 if (first_time) { 4644 ref_path->lowest_level = -1; 4645 ref_path->current_level = -1; 4646 ref_path->shared_level = -1; 4647 goto walk_up; 4648 } 4649 walk_down: 4650 level = ref_path->current_level - 1; 4651 while (level >= -1) { 4652 u64 parent; 4653 if (level < ref_path->lowest_level) 4654 break; 4655 4656 if (level >= 0) 4657 bytenr = ref_path->nodes[level]; 4658 else 4659 bytenr = ref_path->extent_start; 4660 BUG_ON(bytenr == 0); 4661 4662 parent = ref_path->nodes[level + 1]; 4663 ref_path->nodes[level + 1] = 0; 4664 ref_path->current_level = level; 4665 BUG_ON(parent == 0); 4666 4667 key.objectid = bytenr; 4668 key.offset = parent + 1; 4669 key.type = BTRFS_EXTENT_REF_KEY; 4670 4671 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 4672 if (ret < 0) 4673 goto out; 4674 BUG_ON(ret == 0); 4675 4676 leaf = path->nodes[0]; 4677 nritems = btrfs_header_nritems(leaf); 4678 if (path->slots[0] >= nritems) { 4679 ret = btrfs_next_leaf(extent_root, path); 4680 if (ret < 0) 4681 goto out; 4682 if (ret > 0) 4683 goto next; 4684 leaf = path->nodes[0]; 4685 } 4686 4687 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4688 if (found_key.objectid == bytenr && 4689 found_key.type == BTRFS_EXTENT_REF_KEY) { 4690 if (level < ref_path->shared_level) 4691 ref_path->shared_level = level; 4692 goto found; 4693 } 4694 next: 4695 level--; 4696 btrfs_release_path(extent_root, path); 4697 cond_resched(); 4698 } 4699 /* reached lowest level */ 4700 ret = 1; 4701 goto out; 4702 walk_up: 4703 level = ref_path->current_level; 4704 while (level < BTRFS_MAX_LEVEL - 1) { 4705 u64 ref_objectid; 4706 4707 if (level >= 0) 4708 bytenr = ref_path->nodes[level]; 4709 else 4710 bytenr = ref_path->extent_start; 4711 4712 BUG_ON(bytenr == 0); 4713 4714 key.objectid = bytenr; 4715 key.offset = 0; 4716 key.type = BTRFS_EXTENT_REF_KEY; 4717 4718 ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 0); 4719 if (ret < 0) 4720 goto out; 4721 4722 leaf = path->nodes[0]; 4723 nritems = btrfs_header_nritems(leaf); 4724 if (path->slots[0] >= nritems) { 4725 ret = btrfs_next_leaf(extent_root, path); 4726 if (ret < 0) 4727 goto out; 4728 if (ret > 0) { 4729 /* the extent was freed by someone */ 4730 if (ref_path->lowest_level == level) 4731 goto out; 4732 btrfs_release_path(extent_root, path); 4733 goto walk_down; 4734 } 4735 leaf = path->nodes[0]; 4736 } 4737 4738 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4739 if (found_key.objectid != bytenr || 4740 found_key.type != BTRFS_EXTENT_REF_KEY) { 4741 /* the extent was freed by someone */ 4742 if (ref_path->lowest_level == level) { 4743 ret = 1; 4744 goto out; 4745 } 4746 btrfs_release_path(extent_root, path); 4747 goto walk_down; 4748 } 4749 found: 4750 ref = btrfs_item_ptr(leaf, path->slots[0], 4751 struct btrfs_extent_ref); 4752 ref_objectid = btrfs_ref_objectid(leaf, ref); 4753 if (ref_objectid < BTRFS_FIRST_FREE_OBJECTID) { 4754 if (first_time) { 4755 level = (int)ref_objectid; 4756 BUG_ON(level >= BTRFS_MAX_LEVEL); 4757 ref_path->lowest_level = level; 4758 ref_path->current_level = level; 4759 ref_path->nodes[level] = bytenr; 4760 } else { 4761 WARN_ON(ref_objectid != level); 4762 } 4763 } else { 4764 WARN_ON(level != -1); 4765 } 4766 first_time = 0; 4767 4768 if (ref_path->lowest_level == level) { 4769 ref_path->owner_objectid = ref_objectid; 4770 ref_path->num_refs = btrfs_ref_num_refs(leaf, ref); 4771 } 4772 4773 /* 4774 * the block is tree root or the block isn't in reference 4775 * counted tree. 4776 */ 4777 if (found_key.objectid == found_key.offset || 4778 is_cowonly_root(btrfs_ref_root(leaf, ref))) { 4779 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 4780 ref_path->root_generation = 4781 btrfs_ref_generation(leaf, ref); 4782 if (level < 0) { 4783 /* special reference from the tree log */ 4784 ref_path->nodes[0] = found_key.offset; 4785 ref_path->current_level = 0; 4786 } 4787 ret = 0; 4788 goto out; 4789 } 4790 4791 level++; 4792 BUG_ON(ref_path->nodes[level] != 0); 4793 ref_path->nodes[level] = found_key.offset; 4794 ref_path->current_level = level; 4795 4796 /* 4797 * the reference was created in the running transaction, 4798 * no need to continue walking up. 4799 */ 4800 if (btrfs_ref_generation(leaf, ref) == trans->transid) { 4801 ref_path->root_objectid = btrfs_ref_root(leaf, ref); 4802 ref_path->root_generation = 4803 btrfs_ref_generation(leaf, ref); 4804 ret = 0; 4805 goto out; 4806 } 4807 4808 btrfs_release_path(extent_root, path); 4809 cond_resched(); 4810 } 4811 /* reached max tree level, but no tree root found. */ 4812 BUG(); 4813 out: 4814 btrfs_free_path(path); 4815 return ret; 4816 } 4817 4818 static int btrfs_first_ref_path(struct btrfs_trans_handle *trans, 4819 struct btrfs_root *extent_root, 4820 struct btrfs_ref_path *ref_path, 4821 u64 extent_start) 4822 { 4823 memset(ref_path, 0, sizeof(*ref_path)); 4824 ref_path->extent_start = extent_start; 4825 4826 return __next_ref_path(trans, extent_root, ref_path, 1); 4827 } 4828 4829 static int btrfs_next_ref_path(struct btrfs_trans_handle *trans, 4830 struct btrfs_root *extent_root, 4831 struct btrfs_ref_path *ref_path) 4832 { 4833 return __next_ref_path(trans, extent_root, ref_path, 0); 4834 } 4835 4836 static noinline int get_new_locations(struct inode *reloc_inode, 4837 struct btrfs_key *extent_key, 4838 u64 offset, int no_fragment, 4839 struct disk_extent **extents, 4840 int *nr_extents) 4841 { 4842 struct btrfs_root *root = BTRFS_I(reloc_inode)->root; 4843 struct btrfs_path *path; 4844 struct btrfs_file_extent_item *fi; 4845 struct extent_buffer *leaf; 4846 struct disk_extent *exts = *extents; 4847 struct btrfs_key found_key; 4848 u64 cur_pos; 4849 u64 last_byte; 4850 u32 nritems; 4851 int nr = 0; 4852 int max = *nr_extents; 4853 int ret; 4854 4855 WARN_ON(!no_fragment && *extents); 4856 if (!exts) { 4857 max = 1; 4858 exts = kmalloc(sizeof(*exts) * max, GFP_NOFS); 4859 if (!exts) 4860 return -ENOMEM; 4861 } 4862 4863 path = btrfs_alloc_path(); 4864 BUG_ON(!path); 4865 4866 cur_pos = extent_key->objectid - offset; 4867 last_byte = extent_key->objectid + extent_key->offset; 4868 ret = btrfs_lookup_file_extent(NULL, root, path, reloc_inode->i_ino, 4869 cur_pos, 0); 4870 if (ret < 0) 4871 goto out; 4872 if (ret > 0) { 4873 ret = -ENOENT; 4874 goto out; 4875 } 4876 4877 while (1) { 4878 leaf = path->nodes[0]; 4879 nritems = btrfs_header_nritems(leaf); 4880 if (path->slots[0] >= nritems) { 4881 ret = btrfs_next_leaf(root, path); 4882 if (ret < 0) 4883 goto out; 4884 if (ret > 0) 4885 break; 4886 leaf = path->nodes[0]; 4887 } 4888 4889 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 4890 if (found_key.offset != cur_pos || 4891 found_key.type != BTRFS_EXTENT_DATA_KEY || 4892 found_key.objectid != reloc_inode->i_ino) 4893 break; 4894 4895 fi = btrfs_item_ptr(leaf, path->slots[0], 4896 struct btrfs_file_extent_item); 4897 if (btrfs_file_extent_type(leaf, fi) != 4898 BTRFS_FILE_EXTENT_REG || 4899 btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 4900 break; 4901 4902 if (nr == max) { 4903 struct disk_extent *old = exts; 4904 max *= 2; 4905 exts = kzalloc(sizeof(*exts) * max, GFP_NOFS); 4906 memcpy(exts, old, sizeof(*exts) * nr); 4907 if (old != *extents) 4908 kfree(old); 4909 } 4910 4911 exts[nr].disk_bytenr = 4912 btrfs_file_extent_disk_bytenr(leaf, fi); 4913 exts[nr].disk_num_bytes = 4914 btrfs_file_extent_disk_num_bytes(leaf, fi); 4915 exts[nr].offset = btrfs_file_extent_offset(leaf, fi); 4916 exts[nr].num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 4917 exts[nr].ram_bytes = btrfs_file_extent_ram_bytes(leaf, fi); 4918 exts[nr].compression = btrfs_file_extent_compression(leaf, fi); 4919 exts[nr].encryption = btrfs_file_extent_encryption(leaf, fi); 4920 exts[nr].other_encoding = btrfs_file_extent_other_encoding(leaf, 4921 fi); 4922 BUG_ON(exts[nr].offset > 0); 4923 BUG_ON(exts[nr].compression || exts[nr].encryption); 4924 BUG_ON(exts[nr].num_bytes != exts[nr].disk_num_bytes); 4925 4926 cur_pos += exts[nr].num_bytes; 4927 nr++; 4928 4929 if (cur_pos + offset >= last_byte) 4930 break; 4931 4932 if (no_fragment) { 4933 ret = 1; 4934 goto out; 4935 } 4936 path->slots[0]++; 4937 } 4938 4939 BUG_ON(cur_pos + offset > last_byte); 4940 if (cur_pos + offset < last_byte) { 4941 ret = -ENOENT; 4942 goto out; 4943 } 4944 ret = 0; 4945 out: 4946 btrfs_free_path(path); 4947 if (ret) { 4948 if (exts != *extents) 4949 kfree(exts); 4950 } else { 4951 *extents = exts; 4952 *nr_extents = nr; 4953 } 4954 return ret; 4955 } 4956 4957 static noinline int replace_one_extent(struct btrfs_trans_handle *trans, 4958 struct btrfs_root *root, 4959 struct btrfs_path *path, 4960 struct btrfs_key *extent_key, 4961 struct btrfs_key *leaf_key, 4962 struct btrfs_ref_path *ref_path, 4963 struct disk_extent *new_extents, 4964 int nr_extents) 4965 { 4966 struct extent_buffer *leaf; 4967 struct btrfs_file_extent_item *fi; 4968 struct inode *inode = NULL; 4969 struct btrfs_key key; 4970 u64 lock_start = 0; 4971 u64 lock_end = 0; 4972 u64 num_bytes; 4973 u64 ext_offset; 4974 u64 search_end = (u64)-1; 4975 u32 nritems; 4976 int nr_scaned = 0; 4977 int extent_locked = 0; 4978 int extent_type; 4979 int ret; 4980 4981 memcpy(&key, leaf_key, sizeof(key)); 4982 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 4983 if (key.objectid < ref_path->owner_objectid || 4984 (key.objectid == ref_path->owner_objectid && 4985 key.type < BTRFS_EXTENT_DATA_KEY)) { 4986 key.objectid = ref_path->owner_objectid; 4987 key.type = BTRFS_EXTENT_DATA_KEY; 4988 key.offset = 0; 4989 } 4990 } 4991 4992 while (1) { 4993 ret = btrfs_search_slot(trans, root, &key, path, 0, 1); 4994 if (ret < 0) 4995 goto out; 4996 4997 leaf = path->nodes[0]; 4998 nritems = btrfs_header_nritems(leaf); 4999 next: 5000 if (extent_locked && ret > 0) { 5001 /* 5002 * the file extent item was modified by someone 5003 * before the extent got locked. 5004 */ 5005 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5006 lock_end, GFP_NOFS); 5007 extent_locked = 0; 5008 } 5009 5010 if (path->slots[0] >= nritems) { 5011 if (++nr_scaned > 2) 5012 break; 5013 5014 BUG_ON(extent_locked); 5015 ret = btrfs_next_leaf(root, path); 5016 if (ret < 0) 5017 goto out; 5018 if (ret > 0) 5019 break; 5020 leaf = path->nodes[0]; 5021 nritems = btrfs_header_nritems(leaf); 5022 } 5023 5024 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 5025 5026 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS) { 5027 if ((key.objectid > ref_path->owner_objectid) || 5028 (key.objectid == ref_path->owner_objectid && 5029 key.type > BTRFS_EXTENT_DATA_KEY) || 5030 key.offset >= search_end) 5031 break; 5032 } 5033 5034 if (inode && key.objectid != inode->i_ino) { 5035 BUG_ON(extent_locked); 5036 btrfs_release_path(root, path); 5037 mutex_unlock(&inode->i_mutex); 5038 iput(inode); 5039 inode = NULL; 5040 continue; 5041 } 5042 5043 if (key.type != BTRFS_EXTENT_DATA_KEY) { 5044 path->slots[0]++; 5045 ret = 1; 5046 goto next; 5047 } 5048 fi = btrfs_item_ptr(leaf, path->slots[0], 5049 struct btrfs_file_extent_item); 5050 extent_type = btrfs_file_extent_type(leaf, fi); 5051 if ((extent_type != BTRFS_FILE_EXTENT_REG && 5052 extent_type != BTRFS_FILE_EXTENT_PREALLOC) || 5053 (btrfs_file_extent_disk_bytenr(leaf, fi) != 5054 extent_key->objectid)) { 5055 path->slots[0]++; 5056 ret = 1; 5057 goto next; 5058 } 5059 5060 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5061 ext_offset = btrfs_file_extent_offset(leaf, fi); 5062 5063 if (search_end == (u64)-1) { 5064 search_end = key.offset - ext_offset + 5065 btrfs_file_extent_ram_bytes(leaf, fi); 5066 } 5067 5068 if (!extent_locked) { 5069 lock_start = key.offset; 5070 lock_end = lock_start + num_bytes - 1; 5071 } else { 5072 if (lock_start > key.offset || 5073 lock_end + 1 < key.offset + num_bytes) { 5074 unlock_extent(&BTRFS_I(inode)->io_tree, 5075 lock_start, lock_end, GFP_NOFS); 5076 extent_locked = 0; 5077 } 5078 } 5079 5080 if (!inode) { 5081 btrfs_release_path(root, path); 5082 5083 inode = btrfs_iget_locked(root->fs_info->sb, 5084 key.objectid, root); 5085 if (inode->i_state & I_NEW) { 5086 BTRFS_I(inode)->root = root; 5087 BTRFS_I(inode)->location.objectid = 5088 key.objectid; 5089 BTRFS_I(inode)->location.type = 5090 BTRFS_INODE_ITEM_KEY; 5091 BTRFS_I(inode)->location.offset = 0; 5092 btrfs_read_locked_inode(inode); 5093 unlock_new_inode(inode); 5094 } 5095 /* 5096 * some code call btrfs_commit_transaction while 5097 * holding the i_mutex, so we can't use mutex_lock 5098 * here. 5099 */ 5100 if (is_bad_inode(inode) || 5101 !mutex_trylock(&inode->i_mutex)) { 5102 iput(inode); 5103 inode = NULL; 5104 key.offset = (u64)-1; 5105 goto skip; 5106 } 5107 } 5108 5109 if (!extent_locked) { 5110 struct btrfs_ordered_extent *ordered; 5111 5112 btrfs_release_path(root, path); 5113 5114 lock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5115 lock_end, GFP_NOFS); 5116 ordered = btrfs_lookup_first_ordered_extent(inode, 5117 lock_end); 5118 if (ordered && 5119 ordered->file_offset <= lock_end && 5120 ordered->file_offset + ordered->len > lock_start) { 5121 unlock_extent(&BTRFS_I(inode)->io_tree, 5122 lock_start, lock_end, GFP_NOFS); 5123 btrfs_start_ordered_extent(inode, ordered, 1); 5124 btrfs_put_ordered_extent(ordered); 5125 key.offset += num_bytes; 5126 goto skip; 5127 } 5128 if (ordered) 5129 btrfs_put_ordered_extent(ordered); 5130 5131 extent_locked = 1; 5132 continue; 5133 } 5134 5135 if (nr_extents == 1) { 5136 /* update extent pointer in place */ 5137 btrfs_set_file_extent_disk_bytenr(leaf, fi, 5138 new_extents[0].disk_bytenr); 5139 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 5140 new_extents[0].disk_num_bytes); 5141 btrfs_mark_buffer_dirty(leaf); 5142 5143 btrfs_drop_extent_cache(inode, key.offset, 5144 key.offset + num_bytes - 1, 0); 5145 5146 ret = btrfs_inc_extent_ref(trans, root, 5147 new_extents[0].disk_bytenr, 5148 new_extents[0].disk_num_bytes, 5149 leaf->start, 5150 root->root_key.objectid, 5151 trans->transid, 5152 key.objectid); 5153 BUG_ON(ret); 5154 5155 ret = btrfs_free_extent(trans, root, 5156 extent_key->objectid, 5157 extent_key->offset, 5158 leaf->start, 5159 btrfs_header_owner(leaf), 5160 btrfs_header_generation(leaf), 5161 key.objectid, 0); 5162 BUG_ON(ret); 5163 5164 btrfs_release_path(root, path); 5165 key.offset += num_bytes; 5166 } else { 5167 BUG_ON(1); 5168 #if 0 5169 u64 alloc_hint; 5170 u64 extent_len; 5171 int i; 5172 /* 5173 * drop old extent pointer at first, then insert the 5174 * new pointers one bye one 5175 */ 5176 btrfs_release_path(root, path); 5177 ret = btrfs_drop_extents(trans, root, inode, key.offset, 5178 key.offset + num_bytes, 5179 key.offset, &alloc_hint); 5180 BUG_ON(ret); 5181 5182 for (i = 0; i < nr_extents; i++) { 5183 if (ext_offset >= new_extents[i].num_bytes) { 5184 ext_offset -= new_extents[i].num_bytes; 5185 continue; 5186 } 5187 extent_len = min(new_extents[i].num_bytes - 5188 ext_offset, num_bytes); 5189 5190 ret = btrfs_insert_empty_item(trans, root, 5191 path, &key, 5192 sizeof(*fi)); 5193 BUG_ON(ret); 5194 5195 leaf = path->nodes[0]; 5196 fi = btrfs_item_ptr(leaf, path->slots[0], 5197 struct btrfs_file_extent_item); 5198 btrfs_set_file_extent_generation(leaf, fi, 5199 trans->transid); 5200 btrfs_set_file_extent_type(leaf, fi, 5201 BTRFS_FILE_EXTENT_REG); 5202 btrfs_set_file_extent_disk_bytenr(leaf, fi, 5203 new_extents[i].disk_bytenr); 5204 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 5205 new_extents[i].disk_num_bytes); 5206 btrfs_set_file_extent_ram_bytes(leaf, fi, 5207 new_extents[i].ram_bytes); 5208 5209 btrfs_set_file_extent_compression(leaf, fi, 5210 new_extents[i].compression); 5211 btrfs_set_file_extent_encryption(leaf, fi, 5212 new_extents[i].encryption); 5213 btrfs_set_file_extent_other_encoding(leaf, fi, 5214 new_extents[i].other_encoding); 5215 5216 btrfs_set_file_extent_num_bytes(leaf, fi, 5217 extent_len); 5218 ext_offset += new_extents[i].offset; 5219 btrfs_set_file_extent_offset(leaf, fi, 5220 ext_offset); 5221 btrfs_mark_buffer_dirty(leaf); 5222 5223 btrfs_drop_extent_cache(inode, key.offset, 5224 key.offset + extent_len - 1, 0); 5225 5226 ret = btrfs_inc_extent_ref(trans, root, 5227 new_extents[i].disk_bytenr, 5228 new_extents[i].disk_num_bytes, 5229 leaf->start, 5230 root->root_key.objectid, 5231 trans->transid, key.objectid); 5232 BUG_ON(ret); 5233 btrfs_release_path(root, path); 5234 5235 inode_add_bytes(inode, extent_len); 5236 5237 ext_offset = 0; 5238 num_bytes -= extent_len; 5239 key.offset += extent_len; 5240 5241 if (num_bytes == 0) 5242 break; 5243 } 5244 BUG_ON(i >= nr_extents); 5245 #endif 5246 } 5247 5248 if (extent_locked) { 5249 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5250 lock_end, GFP_NOFS); 5251 extent_locked = 0; 5252 } 5253 skip: 5254 if (ref_path->owner_objectid != BTRFS_MULTIPLE_OBJECTIDS && 5255 key.offset >= search_end) 5256 break; 5257 5258 cond_resched(); 5259 } 5260 ret = 0; 5261 out: 5262 btrfs_release_path(root, path); 5263 if (inode) { 5264 mutex_unlock(&inode->i_mutex); 5265 if (extent_locked) { 5266 unlock_extent(&BTRFS_I(inode)->io_tree, lock_start, 5267 lock_end, GFP_NOFS); 5268 } 5269 iput(inode); 5270 } 5271 return ret; 5272 } 5273 5274 int btrfs_reloc_tree_cache_ref(struct btrfs_trans_handle *trans, 5275 struct btrfs_root *root, 5276 struct extent_buffer *buf, u64 orig_start) 5277 { 5278 int level; 5279 int ret; 5280 5281 BUG_ON(btrfs_header_generation(buf) != trans->transid); 5282 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 5283 5284 level = btrfs_header_level(buf); 5285 if (level == 0) { 5286 struct btrfs_leaf_ref *ref; 5287 struct btrfs_leaf_ref *orig_ref; 5288 5289 orig_ref = btrfs_lookup_leaf_ref(root, orig_start); 5290 if (!orig_ref) 5291 return -ENOENT; 5292 5293 ref = btrfs_alloc_leaf_ref(root, orig_ref->nritems); 5294 if (!ref) { 5295 btrfs_free_leaf_ref(root, orig_ref); 5296 return -ENOMEM; 5297 } 5298 5299 ref->nritems = orig_ref->nritems; 5300 memcpy(ref->extents, orig_ref->extents, 5301 sizeof(ref->extents[0]) * ref->nritems); 5302 5303 btrfs_free_leaf_ref(root, orig_ref); 5304 5305 ref->root_gen = trans->transid; 5306 ref->bytenr = buf->start; 5307 ref->owner = btrfs_header_owner(buf); 5308 ref->generation = btrfs_header_generation(buf); 5309 5310 ret = btrfs_add_leaf_ref(root, ref, 0); 5311 WARN_ON(ret); 5312 btrfs_free_leaf_ref(root, ref); 5313 } 5314 return 0; 5315 } 5316 5317 static noinline int invalidate_extent_cache(struct btrfs_root *root, 5318 struct extent_buffer *leaf, 5319 struct btrfs_block_group_cache *group, 5320 struct btrfs_root *target_root) 5321 { 5322 struct btrfs_key key; 5323 struct inode *inode = NULL; 5324 struct btrfs_file_extent_item *fi; 5325 u64 num_bytes; 5326 u64 skip_objectid = 0; 5327 u32 nritems; 5328 u32 i; 5329 5330 nritems = btrfs_header_nritems(leaf); 5331 for (i = 0; i < nritems; i++) { 5332 btrfs_item_key_to_cpu(leaf, &key, i); 5333 if (key.objectid == skip_objectid || 5334 key.type != BTRFS_EXTENT_DATA_KEY) 5335 continue; 5336 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 5337 if (btrfs_file_extent_type(leaf, fi) == 5338 BTRFS_FILE_EXTENT_INLINE) 5339 continue; 5340 if (btrfs_file_extent_disk_bytenr(leaf, fi) == 0) 5341 continue; 5342 if (!inode || inode->i_ino != key.objectid) { 5343 iput(inode); 5344 inode = btrfs_ilookup(target_root->fs_info->sb, 5345 key.objectid, target_root, 1); 5346 } 5347 if (!inode) { 5348 skip_objectid = key.objectid; 5349 continue; 5350 } 5351 num_bytes = btrfs_file_extent_num_bytes(leaf, fi); 5352 5353 lock_extent(&BTRFS_I(inode)->io_tree, key.offset, 5354 key.offset + num_bytes - 1, GFP_NOFS); 5355 btrfs_drop_extent_cache(inode, key.offset, 5356 key.offset + num_bytes - 1, 1); 5357 unlock_extent(&BTRFS_I(inode)->io_tree, key.offset, 5358 key.offset + num_bytes - 1, GFP_NOFS); 5359 cond_resched(); 5360 } 5361 iput(inode); 5362 return 0; 5363 } 5364 5365 static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, 5366 struct btrfs_root *root, 5367 struct extent_buffer *leaf, 5368 struct btrfs_block_group_cache *group, 5369 struct inode *reloc_inode) 5370 { 5371 struct btrfs_key key; 5372 struct btrfs_key extent_key; 5373 struct btrfs_file_extent_item *fi; 5374 struct btrfs_leaf_ref *ref; 5375 struct disk_extent *new_extent; 5376 u64 bytenr; 5377 u64 num_bytes; 5378 u32 nritems; 5379 u32 i; 5380 int ext_index; 5381 int nr_extent; 5382 int ret; 5383 5384 new_extent = kmalloc(sizeof(*new_extent), GFP_NOFS); 5385 BUG_ON(!new_extent); 5386 5387 ref = btrfs_lookup_leaf_ref(root, leaf->start); 5388 BUG_ON(!ref); 5389 5390 ext_index = -1; 5391 nritems = btrfs_header_nritems(leaf); 5392 for (i = 0; i < nritems; i++) { 5393 btrfs_item_key_to_cpu(leaf, &key, i); 5394 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 5395 continue; 5396 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); 5397 if (btrfs_file_extent_type(leaf, fi) == 5398 BTRFS_FILE_EXTENT_INLINE) 5399 continue; 5400 bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); 5401 num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi); 5402 if (bytenr == 0) 5403 continue; 5404 5405 ext_index++; 5406 if (bytenr >= group->key.objectid + group->key.offset || 5407 bytenr + num_bytes <= group->key.objectid) 5408 continue; 5409 5410 extent_key.objectid = bytenr; 5411 extent_key.offset = num_bytes; 5412 extent_key.type = BTRFS_EXTENT_ITEM_KEY; 5413 nr_extent = 1; 5414 ret = get_new_locations(reloc_inode, &extent_key, 5415 group->key.objectid, 1, 5416 &new_extent, &nr_extent); 5417 if (ret > 0) 5418 continue; 5419 BUG_ON(ret < 0); 5420 5421 BUG_ON(ref->extents[ext_index].bytenr != bytenr); 5422 BUG_ON(ref->extents[ext_index].num_bytes != num_bytes); 5423 ref->extents[ext_index].bytenr = new_extent->disk_bytenr; 5424 ref->extents[ext_index].num_bytes = new_extent->disk_num_bytes; 5425 5426 btrfs_set_file_extent_disk_bytenr(leaf, fi, 5427 new_extent->disk_bytenr); 5428 btrfs_set_file_extent_disk_num_bytes(leaf, fi, 5429 new_extent->disk_num_bytes); 5430 btrfs_mark_buffer_dirty(leaf); 5431 5432 ret = btrfs_inc_extent_ref(trans, root, 5433 new_extent->disk_bytenr, 5434 new_extent->disk_num_bytes, 5435 leaf->start, 5436 root->root_key.objectid, 5437 trans->transid, key.objectid); 5438 BUG_ON(ret); 5439 ret = btrfs_free_extent(trans, root, 5440 bytenr, num_bytes, leaf->start, 5441 btrfs_header_owner(leaf), 5442 btrfs_header_generation(leaf), 5443 key.objectid, 0); 5444 BUG_ON(ret); 5445 cond_resched(); 5446 } 5447 kfree(new_extent); 5448 BUG_ON(ext_index + 1 != ref->nritems); 5449 btrfs_free_leaf_ref(root, ref); 5450 return 0; 5451 } 5452 5453 int btrfs_free_reloc_root(struct btrfs_trans_handle *trans, 5454 struct btrfs_root *root) 5455 { 5456 struct btrfs_root *reloc_root; 5457 int ret; 5458 5459 if (root->reloc_root) { 5460 reloc_root = root->reloc_root; 5461 root->reloc_root = NULL; 5462 list_add(&reloc_root->dead_list, 5463 &root->fs_info->dead_reloc_roots); 5464 5465 btrfs_set_root_bytenr(&reloc_root->root_item, 5466 reloc_root->node->start); 5467 btrfs_set_root_level(&root->root_item, 5468 btrfs_header_level(reloc_root->node)); 5469 memset(&reloc_root->root_item.drop_progress, 0, 5470 sizeof(struct btrfs_disk_key)); 5471 reloc_root->root_item.drop_level = 0; 5472 5473 ret = btrfs_update_root(trans, root->fs_info->tree_root, 5474 &reloc_root->root_key, 5475 &reloc_root->root_item); 5476 BUG_ON(ret); 5477 } 5478 return 0; 5479 } 5480 5481 int btrfs_drop_dead_reloc_roots(struct btrfs_root *root) 5482 { 5483 struct btrfs_trans_handle *trans; 5484 struct btrfs_root *reloc_root; 5485 struct btrfs_root *prev_root = NULL; 5486 struct list_head dead_roots; 5487 int ret; 5488 unsigned long nr; 5489 5490 INIT_LIST_HEAD(&dead_roots); 5491 list_splice_init(&root->fs_info->dead_reloc_roots, &dead_roots); 5492 5493 while (!list_empty(&dead_roots)) { 5494 reloc_root = list_entry(dead_roots.prev, 5495 struct btrfs_root, dead_list); 5496 list_del_init(&reloc_root->dead_list); 5497 5498 BUG_ON(reloc_root->commit_root != NULL); 5499 while (1) { 5500 trans = btrfs_join_transaction(root, 1); 5501 BUG_ON(!trans); 5502 5503 mutex_lock(&root->fs_info->drop_mutex); 5504 ret = btrfs_drop_snapshot(trans, reloc_root); 5505 if (ret != -EAGAIN) 5506 break; 5507 mutex_unlock(&root->fs_info->drop_mutex); 5508 5509 nr = trans->blocks_used; 5510 ret = btrfs_end_transaction(trans, root); 5511 BUG_ON(ret); 5512 btrfs_btree_balance_dirty(root, nr); 5513 } 5514 5515 free_extent_buffer(reloc_root->node); 5516 5517 ret = btrfs_del_root(trans, root->fs_info->tree_root, 5518 &reloc_root->root_key); 5519 BUG_ON(ret); 5520 mutex_unlock(&root->fs_info->drop_mutex); 5521 5522 nr = trans->blocks_used; 5523 ret = btrfs_end_transaction(trans, root); 5524 BUG_ON(ret); 5525 btrfs_btree_balance_dirty(root, nr); 5526 5527 kfree(prev_root); 5528 prev_root = reloc_root; 5529 } 5530 if (prev_root) { 5531 btrfs_remove_leaf_refs(prev_root, (u64)-1, 0); 5532 kfree(prev_root); 5533 } 5534 return 0; 5535 } 5536 5537 int btrfs_add_dead_reloc_root(struct btrfs_root *root) 5538 { 5539 list_add(&root->dead_list, &root->fs_info->dead_reloc_roots); 5540 return 0; 5541 } 5542 5543 int btrfs_cleanup_reloc_trees(struct btrfs_root *root) 5544 { 5545 struct btrfs_root *reloc_root; 5546 struct btrfs_trans_handle *trans; 5547 struct btrfs_key location; 5548 int found; 5549 int ret; 5550 5551 mutex_lock(&root->fs_info->tree_reloc_mutex); 5552 ret = btrfs_find_dead_roots(root, BTRFS_TREE_RELOC_OBJECTID, NULL); 5553 BUG_ON(ret); 5554 found = !list_empty(&root->fs_info->dead_reloc_roots); 5555 mutex_unlock(&root->fs_info->tree_reloc_mutex); 5556 5557 if (found) { 5558 trans = btrfs_start_transaction(root, 1); 5559 BUG_ON(!trans); 5560 ret = btrfs_commit_transaction(trans, root); 5561 BUG_ON(ret); 5562 } 5563 5564 location.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 5565 location.offset = (u64)-1; 5566 location.type = BTRFS_ROOT_ITEM_KEY; 5567 5568 reloc_root = btrfs_read_fs_root_no_name(root->fs_info, &location); 5569 BUG_ON(!reloc_root); 5570 btrfs_orphan_cleanup(reloc_root); 5571 return 0; 5572 } 5573 5574 static noinline int init_reloc_tree(struct btrfs_trans_handle *trans, 5575 struct btrfs_root *root) 5576 { 5577 struct btrfs_root *reloc_root; 5578 struct extent_buffer *eb; 5579 struct btrfs_root_item *root_item; 5580 struct btrfs_key root_key; 5581 int ret; 5582 5583 BUG_ON(!root->ref_cows); 5584 if (root->reloc_root) 5585 return 0; 5586 5587 root_item = kmalloc(sizeof(*root_item), GFP_NOFS); 5588 BUG_ON(!root_item); 5589 5590 ret = btrfs_copy_root(trans, root, root->commit_root, 5591 &eb, BTRFS_TREE_RELOC_OBJECTID); 5592 BUG_ON(ret); 5593 5594 root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; 5595 root_key.offset = root->root_key.objectid; 5596 root_key.type = BTRFS_ROOT_ITEM_KEY; 5597 5598 memcpy(root_item, &root->root_item, sizeof(root_item)); 5599 btrfs_set_root_refs(root_item, 0); 5600 btrfs_set_root_bytenr(root_item, eb->start); 5601 btrfs_set_root_level(root_item, btrfs_header_level(eb)); 5602 btrfs_set_root_generation(root_item, trans->transid); 5603 5604 btrfs_tree_unlock(eb); 5605 free_extent_buffer(eb); 5606 5607 ret = btrfs_insert_root(trans, root->fs_info->tree_root, 5608 &root_key, root_item); 5609 BUG_ON(ret); 5610 kfree(root_item); 5611 5612 reloc_root = btrfs_read_fs_root_no_radix(root->fs_info->tree_root, 5613 &root_key); 5614 BUG_ON(!reloc_root); 5615 reloc_root->last_trans = trans->transid; 5616 reloc_root->commit_root = NULL; 5617 reloc_root->ref_tree = &root->fs_info->reloc_ref_tree; 5618 5619 root->reloc_root = reloc_root; 5620 return 0; 5621 } 5622 5623 /* 5624 * Core function of space balance. 5625 * 5626 * The idea is using reloc trees to relocate tree blocks in reference 5627 * counted roots. There is one reloc tree for each subvol, and all 5628 * reloc trees share same root key objectid. Reloc trees are snapshots 5629 * of the latest committed roots of subvols (root->commit_root). 5630 * 5631 * To relocate a tree block referenced by a subvol, there are two steps. 5632 * COW the block through subvol's reloc tree, then update block pointer 5633 * in the subvol to point to the new block. Since all reloc trees share 5634 * same root key objectid, doing special handing for tree blocks owned 5635 * by them is easy. Once a tree block has been COWed in one reloc tree, 5636 * we can use the resulting new block directly when the same block is 5637 * required to COW again through other reloc trees. By this way, relocated 5638 * tree blocks are shared between reloc trees, so they are also shared 5639 * between subvols. 5640 */ 5641 static noinline int relocate_one_path(struct btrfs_trans_handle *trans, 5642 struct btrfs_root *root, 5643 struct btrfs_path *path, 5644 struct btrfs_key *first_key, 5645 struct btrfs_ref_path *ref_path, 5646 struct btrfs_block_group_cache *group, 5647 struct inode *reloc_inode) 5648 { 5649 struct btrfs_root *reloc_root; 5650 struct extent_buffer *eb = NULL; 5651 struct btrfs_key *keys; 5652 u64 *nodes; 5653 int level; 5654 int shared_level; 5655 int lowest_level = 0; 5656 int ret; 5657 5658 if (ref_path->owner_objectid < BTRFS_FIRST_FREE_OBJECTID) 5659 lowest_level = ref_path->owner_objectid; 5660 5661 if (!root->ref_cows) { 5662 path->lowest_level = lowest_level; 5663 ret = btrfs_search_slot(trans, root, first_key, path, 0, 1); 5664 BUG_ON(ret < 0); 5665 path->lowest_level = 0; 5666 btrfs_release_path(root, path); 5667 return 0; 5668 } 5669 5670 mutex_lock(&root->fs_info->tree_reloc_mutex); 5671 ret = init_reloc_tree(trans, root); 5672 BUG_ON(ret); 5673 reloc_root = root->reloc_root; 5674 5675 shared_level = ref_path->shared_level; 5676 ref_path->shared_level = BTRFS_MAX_LEVEL - 1; 5677 5678 keys = ref_path->node_keys; 5679 nodes = ref_path->new_nodes; 5680 memset(&keys[shared_level + 1], 0, 5681 sizeof(*keys) * (BTRFS_MAX_LEVEL - shared_level - 1)); 5682 memset(&nodes[shared_level + 1], 0, 5683 sizeof(*nodes) * (BTRFS_MAX_LEVEL - shared_level - 1)); 5684 5685 if (nodes[lowest_level] == 0) { 5686 path->lowest_level = lowest_level; 5687 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 5688 0, 1); 5689 BUG_ON(ret); 5690 for (level = lowest_level; level < BTRFS_MAX_LEVEL; level++) { 5691 eb = path->nodes[level]; 5692 if (!eb || eb == reloc_root->node) 5693 break; 5694 nodes[level] = eb->start; 5695 if (level == 0) 5696 btrfs_item_key_to_cpu(eb, &keys[level], 0); 5697 else 5698 btrfs_node_key_to_cpu(eb, &keys[level], 0); 5699 } 5700 if (nodes[0] && 5701 ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5702 eb = path->nodes[0]; 5703 ret = replace_extents_in_leaf(trans, reloc_root, eb, 5704 group, reloc_inode); 5705 BUG_ON(ret); 5706 } 5707 btrfs_release_path(reloc_root, path); 5708 } else { 5709 ret = btrfs_merge_path(trans, reloc_root, keys, nodes, 5710 lowest_level); 5711 BUG_ON(ret); 5712 } 5713 5714 /* 5715 * replace tree blocks in the fs tree with tree blocks in 5716 * the reloc tree. 5717 */ 5718 ret = btrfs_merge_path(trans, root, keys, nodes, lowest_level); 5719 BUG_ON(ret < 0); 5720 5721 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5722 ret = btrfs_search_slot(trans, reloc_root, first_key, path, 5723 0, 0); 5724 BUG_ON(ret); 5725 extent_buffer_get(path->nodes[0]); 5726 eb = path->nodes[0]; 5727 btrfs_release_path(reloc_root, path); 5728 ret = invalidate_extent_cache(reloc_root, eb, group, root); 5729 BUG_ON(ret); 5730 free_extent_buffer(eb); 5731 } 5732 5733 mutex_unlock(&root->fs_info->tree_reloc_mutex); 5734 path->lowest_level = 0; 5735 return 0; 5736 } 5737 5738 static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, 5739 struct btrfs_root *root, 5740 struct btrfs_path *path, 5741 struct btrfs_key *first_key, 5742 struct btrfs_ref_path *ref_path) 5743 { 5744 int ret; 5745 5746 ret = relocate_one_path(trans, root, path, first_key, 5747 ref_path, NULL, NULL); 5748 BUG_ON(ret); 5749 5750 if (root == root->fs_info->extent_root) 5751 btrfs_extent_post_op(trans, root); 5752 5753 return 0; 5754 } 5755 5756 static noinline int del_extent_zero(struct btrfs_trans_handle *trans, 5757 struct btrfs_root *extent_root, 5758 struct btrfs_path *path, 5759 struct btrfs_key *extent_key) 5760 { 5761 int ret; 5762 5763 ret = btrfs_search_slot(trans, extent_root, extent_key, path, -1, 1); 5764 if (ret) 5765 goto out; 5766 ret = btrfs_del_item(trans, extent_root, path); 5767 out: 5768 btrfs_release_path(extent_root, path); 5769 return ret; 5770 } 5771 5772 static noinline struct btrfs_root *read_ref_root(struct btrfs_fs_info *fs_info, 5773 struct btrfs_ref_path *ref_path) 5774 { 5775 struct btrfs_key root_key; 5776 5777 root_key.objectid = ref_path->root_objectid; 5778 root_key.type = BTRFS_ROOT_ITEM_KEY; 5779 if (is_cowonly_root(ref_path->root_objectid)) 5780 root_key.offset = 0; 5781 else 5782 root_key.offset = (u64)-1; 5783 5784 return btrfs_read_fs_root_no_name(fs_info, &root_key); 5785 } 5786 5787 static noinline int relocate_one_extent(struct btrfs_root *extent_root, 5788 struct btrfs_path *path, 5789 struct btrfs_key *extent_key, 5790 struct btrfs_block_group_cache *group, 5791 struct inode *reloc_inode, int pass) 5792 { 5793 struct btrfs_trans_handle *trans; 5794 struct btrfs_root *found_root; 5795 struct btrfs_ref_path *ref_path = NULL; 5796 struct disk_extent *new_extents = NULL; 5797 int nr_extents = 0; 5798 int loops; 5799 int ret; 5800 int level; 5801 struct btrfs_key first_key; 5802 u64 prev_block = 0; 5803 5804 5805 trans = btrfs_start_transaction(extent_root, 1); 5806 BUG_ON(!trans); 5807 5808 if (extent_key->objectid == 0) { 5809 ret = del_extent_zero(trans, extent_root, path, extent_key); 5810 goto out; 5811 } 5812 5813 ref_path = kmalloc(sizeof(*ref_path), GFP_NOFS); 5814 if (!ref_path) { 5815 ret = -ENOMEM; 5816 goto out; 5817 } 5818 5819 for (loops = 0; ; loops++) { 5820 if (loops == 0) { 5821 ret = btrfs_first_ref_path(trans, extent_root, ref_path, 5822 extent_key->objectid); 5823 } else { 5824 ret = btrfs_next_ref_path(trans, extent_root, ref_path); 5825 } 5826 if (ret < 0) 5827 goto out; 5828 if (ret > 0) 5829 break; 5830 5831 if (ref_path->root_objectid == BTRFS_TREE_LOG_OBJECTID || 5832 ref_path->root_objectid == BTRFS_TREE_RELOC_OBJECTID) 5833 continue; 5834 5835 found_root = read_ref_root(extent_root->fs_info, ref_path); 5836 BUG_ON(!found_root); 5837 /* 5838 * for reference counted tree, only process reference paths 5839 * rooted at the latest committed root. 5840 */ 5841 if (found_root->ref_cows && 5842 ref_path->root_generation != found_root->root_key.offset) 5843 continue; 5844 5845 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5846 if (pass == 0) { 5847 /* 5848 * copy data extents to new locations 5849 */ 5850 u64 group_start = group->key.objectid; 5851 ret = relocate_data_extent(reloc_inode, 5852 extent_key, 5853 group_start); 5854 if (ret < 0) 5855 goto out; 5856 break; 5857 } 5858 level = 0; 5859 } else { 5860 level = ref_path->owner_objectid; 5861 } 5862 5863 if (prev_block != ref_path->nodes[level]) { 5864 struct extent_buffer *eb; 5865 u64 block_start = ref_path->nodes[level]; 5866 u64 block_size = btrfs_level_size(found_root, level); 5867 5868 eb = read_tree_block(found_root, block_start, 5869 block_size, 0); 5870 btrfs_tree_lock(eb); 5871 BUG_ON(level != btrfs_header_level(eb)); 5872 5873 if (level == 0) 5874 btrfs_item_key_to_cpu(eb, &first_key, 0); 5875 else 5876 btrfs_node_key_to_cpu(eb, &first_key, 0); 5877 5878 btrfs_tree_unlock(eb); 5879 free_extent_buffer(eb); 5880 prev_block = block_start; 5881 } 5882 5883 mutex_lock(&extent_root->fs_info->trans_mutex); 5884 btrfs_record_root_in_trans(found_root); 5885 mutex_unlock(&extent_root->fs_info->trans_mutex); 5886 if (ref_path->owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { 5887 /* 5888 * try to update data extent references while 5889 * keeping metadata shared between snapshots. 5890 */ 5891 if (pass == 1) { 5892 ret = relocate_one_path(trans, found_root, 5893 path, &first_key, ref_path, 5894 group, reloc_inode); 5895 if (ret < 0) 5896 goto out; 5897 continue; 5898 } 5899 /* 5900 * use fallback method to process the remaining 5901 * references. 5902 */ 5903 if (!new_extents) { 5904 u64 group_start = group->key.objectid; 5905 new_extents = kmalloc(sizeof(*new_extents), 5906 GFP_NOFS); 5907 nr_extents = 1; 5908 ret = get_new_locations(reloc_inode, 5909 extent_key, 5910 group_start, 1, 5911 &new_extents, 5912 &nr_extents); 5913 if (ret) 5914 goto out; 5915 } 5916 ret = replace_one_extent(trans, found_root, 5917 path, extent_key, 5918 &first_key, ref_path, 5919 new_extents, nr_extents); 5920 } else { 5921 ret = relocate_tree_block(trans, found_root, path, 5922 &first_key, ref_path); 5923 } 5924 if (ret < 0) 5925 goto out; 5926 } 5927 ret = 0; 5928 out: 5929 btrfs_end_transaction(trans, extent_root); 5930 kfree(new_extents); 5931 kfree(ref_path); 5932 return ret; 5933 } 5934 5935 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 5936 { 5937 u64 num_devices; 5938 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 5939 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 5940 5941 num_devices = root->fs_info->fs_devices->rw_devices; 5942 if (num_devices == 1) { 5943 stripped |= BTRFS_BLOCK_GROUP_DUP; 5944 stripped = flags & ~stripped; 5945 5946 /* turn raid0 into single device chunks */ 5947 if (flags & BTRFS_BLOCK_GROUP_RAID0) 5948 return stripped; 5949 5950 /* turn mirroring into duplication */ 5951 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 5952 BTRFS_BLOCK_GROUP_RAID10)) 5953 return stripped | BTRFS_BLOCK_GROUP_DUP; 5954 return flags; 5955 } else { 5956 /* they already had raid on here, just return */ 5957 if (flags & stripped) 5958 return flags; 5959 5960 stripped |= BTRFS_BLOCK_GROUP_DUP; 5961 stripped = flags & ~stripped; 5962 5963 /* switch duplicated blocks with raid1 */ 5964 if (flags & BTRFS_BLOCK_GROUP_DUP) 5965 return stripped | BTRFS_BLOCK_GROUP_RAID1; 5966 5967 /* turn single device chunks into raid0 */ 5968 return stripped | BTRFS_BLOCK_GROUP_RAID0; 5969 } 5970 return flags; 5971 } 5972 5973 static int __alloc_chunk_for_shrink(struct btrfs_root *root, 5974 struct btrfs_block_group_cache *shrink_block_group, 5975 int force) 5976 { 5977 struct btrfs_trans_handle *trans; 5978 u64 new_alloc_flags; 5979 u64 calc; 5980 5981 spin_lock(&shrink_block_group->lock); 5982 if (btrfs_block_group_used(&shrink_block_group->item) > 0) { 5983 spin_unlock(&shrink_block_group->lock); 5984 5985 trans = btrfs_start_transaction(root, 1); 5986 spin_lock(&shrink_block_group->lock); 5987 5988 new_alloc_flags = update_block_group_flags(root, 5989 shrink_block_group->flags); 5990 if (new_alloc_flags != shrink_block_group->flags) { 5991 calc = 5992 btrfs_block_group_used(&shrink_block_group->item); 5993 } else { 5994 calc = shrink_block_group->key.offset; 5995 } 5996 spin_unlock(&shrink_block_group->lock); 5997 5998 do_chunk_alloc(trans, root->fs_info->extent_root, 5999 calc + 2 * 1024 * 1024, new_alloc_flags, force); 6000 6001 btrfs_end_transaction(trans, root); 6002 } else 6003 spin_unlock(&shrink_block_group->lock); 6004 return 0; 6005 } 6006 6007 static int __insert_orphan_inode(struct btrfs_trans_handle *trans, 6008 struct btrfs_root *root, 6009 u64 objectid, u64 size) 6010 { 6011 struct btrfs_path *path; 6012 struct btrfs_inode_item *item; 6013 struct extent_buffer *leaf; 6014 int ret; 6015 6016 path = btrfs_alloc_path(); 6017 if (!path) 6018 return -ENOMEM; 6019 6020 ret = btrfs_insert_empty_inode(trans, root, path, objectid); 6021 if (ret) 6022 goto out; 6023 6024 leaf = path->nodes[0]; 6025 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_inode_item); 6026 memset_extent_buffer(leaf, 0, (unsigned long)item, sizeof(*item)); 6027 btrfs_set_inode_generation(leaf, item, 1); 6028 btrfs_set_inode_size(leaf, item, size); 6029 btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); 6030 btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); 6031 btrfs_mark_buffer_dirty(leaf); 6032 btrfs_release_path(root, path); 6033 out: 6034 btrfs_free_path(path); 6035 return ret; 6036 } 6037 6038 static noinline struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, 6039 struct btrfs_block_group_cache *group) 6040 { 6041 struct inode *inode = NULL; 6042 struct btrfs_trans_handle *trans; 6043 struct btrfs_root *root; 6044 struct btrfs_key root_key; 6045 u64 objectid = BTRFS_FIRST_FREE_OBJECTID; 6046 int err = 0; 6047 6048 root_key.objectid = BTRFS_DATA_RELOC_TREE_OBJECTID; 6049 root_key.type = BTRFS_ROOT_ITEM_KEY; 6050 root_key.offset = (u64)-1; 6051 root = btrfs_read_fs_root_no_name(fs_info, &root_key); 6052 if (IS_ERR(root)) 6053 return ERR_CAST(root); 6054 6055 trans = btrfs_start_transaction(root, 1); 6056 BUG_ON(!trans); 6057 6058 err = btrfs_find_free_objectid(trans, root, objectid, &objectid); 6059 if (err) 6060 goto out; 6061 6062 err = __insert_orphan_inode(trans, root, objectid, group->key.offset); 6063 BUG_ON(err); 6064 6065 err = btrfs_insert_file_extent(trans, root, objectid, 0, 0, 0, 6066 group->key.offset, 0, group->key.offset, 6067 0, 0, 0); 6068 BUG_ON(err); 6069 6070 inode = btrfs_iget_locked(root->fs_info->sb, objectid, root); 6071 if (inode->i_state & I_NEW) { 6072 BTRFS_I(inode)->root = root; 6073 BTRFS_I(inode)->location.objectid = objectid; 6074 BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY; 6075 BTRFS_I(inode)->location.offset = 0; 6076 btrfs_read_locked_inode(inode); 6077 unlock_new_inode(inode); 6078 BUG_ON(is_bad_inode(inode)); 6079 } else { 6080 BUG_ON(1); 6081 } 6082 BTRFS_I(inode)->index_cnt = group->key.objectid; 6083 6084 err = btrfs_orphan_add(trans, inode); 6085 out: 6086 btrfs_end_transaction(trans, root); 6087 if (err) { 6088 if (inode) 6089 iput(inode); 6090 inode = ERR_PTR(err); 6091 } 6092 return inode; 6093 } 6094 6095 int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) 6096 { 6097 6098 struct btrfs_ordered_sum *sums; 6099 struct btrfs_sector_sum *sector_sum; 6100 struct btrfs_ordered_extent *ordered; 6101 struct btrfs_root *root = BTRFS_I(inode)->root; 6102 struct list_head list; 6103 size_t offset; 6104 int ret; 6105 u64 disk_bytenr; 6106 6107 INIT_LIST_HEAD(&list); 6108 6109 ordered = btrfs_lookup_ordered_extent(inode, file_pos); 6110 BUG_ON(ordered->file_offset != file_pos || ordered->len != len); 6111 6112 disk_bytenr = file_pos + BTRFS_I(inode)->index_cnt; 6113 ret = btrfs_lookup_csums_range(root->fs_info->csum_root, disk_bytenr, 6114 disk_bytenr + len - 1, &list); 6115 6116 while (!list_empty(&list)) { 6117 sums = list_entry(list.next, struct btrfs_ordered_sum, list); 6118 list_del_init(&sums->list); 6119 6120 sector_sum = sums->sums; 6121 sums->bytenr = ordered->start; 6122 6123 offset = 0; 6124 while (offset < sums->len) { 6125 sector_sum->bytenr += ordered->start - disk_bytenr; 6126 sector_sum++; 6127 offset += root->sectorsize; 6128 } 6129 6130 btrfs_add_ordered_sum(inode, ordered, sums); 6131 } 6132 btrfs_put_ordered_extent(ordered); 6133 return 0; 6134 } 6135 6136 int btrfs_relocate_block_group(struct btrfs_root *root, u64 group_start) 6137 { 6138 struct btrfs_trans_handle *trans; 6139 struct btrfs_path *path; 6140 struct btrfs_fs_info *info = root->fs_info; 6141 struct extent_buffer *leaf; 6142 struct inode *reloc_inode; 6143 struct btrfs_block_group_cache *block_group; 6144 struct btrfs_key key; 6145 u64 skipped; 6146 u64 cur_byte; 6147 u64 total_found; 6148 u32 nritems; 6149 int ret; 6150 int progress; 6151 int pass = 0; 6152 6153 root = root->fs_info->extent_root; 6154 6155 block_group = btrfs_lookup_block_group(info, group_start); 6156 BUG_ON(!block_group); 6157 6158 printk(KERN_INFO "btrfs relocating block group %llu flags %llu\n", 6159 (unsigned long long)block_group->key.objectid, 6160 (unsigned long long)block_group->flags); 6161 6162 path = btrfs_alloc_path(); 6163 BUG_ON(!path); 6164 6165 reloc_inode = create_reloc_inode(info, block_group); 6166 BUG_ON(IS_ERR(reloc_inode)); 6167 6168 __alloc_chunk_for_shrink(root, block_group, 1); 6169 set_block_group_readonly(block_group); 6170 6171 btrfs_start_delalloc_inodes(info->tree_root); 6172 btrfs_wait_ordered_extents(info->tree_root, 0); 6173 again: 6174 skipped = 0; 6175 total_found = 0; 6176 progress = 0; 6177 key.objectid = block_group->key.objectid; 6178 key.offset = 0; 6179 key.type = 0; 6180 cur_byte = key.objectid; 6181 6182 trans = btrfs_start_transaction(info->tree_root, 1); 6183 btrfs_commit_transaction(trans, info->tree_root); 6184 6185 mutex_lock(&root->fs_info->cleaner_mutex); 6186 btrfs_clean_old_snapshots(info->tree_root); 6187 btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); 6188 mutex_unlock(&root->fs_info->cleaner_mutex); 6189 6190 while (1) { 6191 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6192 if (ret < 0) 6193 goto out; 6194 next: 6195 leaf = path->nodes[0]; 6196 nritems = btrfs_header_nritems(leaf); 6197 if (path->slots[0] >= nritems) { 6198 ret = btrfs_next_leaf(root, path); 6199 if (ret < 0) 6200 goto out; 6201 if (ret == 1) { 6202 ret = 0; 6203 break; 6204 } 6205 leaf = path->nodes[0]; 6206 nritems = btrfs_header_nritems(leaf); 6207 } 6208 6209 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 6210 6211 if (key.objectid >= block_group->key.objectid + 6212 block_group->key.offset) 6213 break; 6214 6215 if (progress && need_resched()) { 6216 btrfs_release_path(root, path); 6217 cond_resched(); 6218 progress = 0; 6219 continue; 6220 } 6221 progress = 1; 6222 6223 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY || 6224 key.objectid + key.offset <= cur_byte) { 6225 path->slots[0]++; 6226 goto next; 6227 } 6228 6229 total_found++; 6230 cur_byte = key.objectid + key.offset; 6231 btrfs_release_path(root, path); 6232 6233 __alloc_chunk_for_shrink(root, block_group, 0); 6234 ret = relocate_one_extent(root, path, &key, block_group, 6235 reloc_inode, pass); 6236 BUG_ON(ret < 0); 6237 if (ret > 0) 6238 skipped++; 6239 6240 key.objectid = cur_byte; 6241 key.type = 0; 6242 key.offset = 0; 6243 } 6244 6245 btrfs_release_path(root, path); 6246 6247 if (pass == 0) { 6248 btrfs_wait_ordered_range(reloc_inode, 0, (u64)-1); 6249 invalidate_mapping_pages(reloc_inode->i_mapping, 0, -1); 6250 } 6251 6252 if (total_found > 0) { 6253 printk(KERN_INFO "btrfs found %llu extents in pass %d\n", 6254 (unsigned long long)total_found, pass); 6255 pass++; 6256 if (total_found == skipped && pass > 2) { 6257 iput(reloc_inode); 6258 reloc_inode = create_reloc_inode(info, block_group); 6259 pass = 0; 6260 } 6261 goto again; 6262 } 6263 6264 /* delete reloc_inode */ 6265 iput(reloc_inode); 6266 6267 /* unpin extents in this range */ 6268 trans = btrfs_start_transaction(info->tree_root, 1); 6269 btrfs_commit_transaction(trans, info->tree_root); 6270 6271 spin_lock(&block_group->lock); 6272 WARN_ON(block_group->pinned > 0); 6273 WARN_ON(block_group->reserved > 0); 6274 WARN_ON(btrfs_block_group_used(&block_group->item) > 0); 6275 spin_unlock(&block_group->lock); 6276 put_block_group(block_group); 6277 ret = 0; 6278 out: 6279 btrfs_free_path(path); 6280 return ret; 6281 } 6282 6283 static int find_first_block_group(struct btrfs_root *root, 6284 struct btrfs_path *path, struct btrfs_key *key) 6285 { 6286 int ret = 0; 6287 struct btrfs_key found_key; 6288 struct extent_buffer *leaf; 6289 int slot; 6290 6291 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 6292 if (ret < 0) 6293 goto out; 6294 6295 while (1) { 6296 slot = path->slots[0]; 6297 leaf = path->nodes[0]; 6298 if (slot >= btrfs_header_nritems(leaf)) { 6299 ret = btrfs_next_leaf(root, path); 6300 if (ret == 0) 6301 continue; 6302 if (ret < 0) 6303 goto out; 6304 break; 6305 } 6306 btrfs_item_key_to_cpu(leaf, &found_key, slot); 6307 6308 if (found_key.objectid >= key->objectid && 6309 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 6310 ret = 0; 6311 goto out; 6312 } 6313 path->slots[0]++; 6314 } 6315 ret = -ENOENT; 6316 out: 6317 return ret; 6318 } 6319 6320 int btrfs_free_block_groups(struct btrfs_fs_info *info) 6321 { 6322 struct btrfs_block_group_cache *block_group; 6323 struct rb_node *n; 6324 6325 spin_lock(&info->block_group_cache_lock); 6326 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 6327 block_group = rb_entry(n, struct btrfs_block_group_cache, 6328 cache_node); 6329 rb_erase(&block_group->cache_node, 6330 &info->block_group_cache_tree); 6331 spin_unlock(&info->block_group_cache_lock); 6332 6333 btrfs_remove_free_space_cache(block_group); 6334 down_write(&block_group->space_info->groups_sem); 6335 list_del(&block_group->list); 6336 up_write(&block_group->space_info->groups_sem); 6337 6338 WARN_ON(atomic_read(&block_group->count) != 1); 6339 kfree(block_group); 6340 6341 spin_lock(&info->block_group_cache_lock); 6342 } 6343 spin_unlock(&info->block_group_cache_lock); 6344 return 0; 6345 } 6346 6347 int btrfs_read_block_groups(struct btrfs_root *root) 6348 { 6349 struct btrfs_path *path; 6350 int ret; 6351 struct btrfs_block_group_cache *cache; 6352 struct btrfs_fs_info *info = root->fs_info; 6353 struct btrfs_space_info *space_info; 6354 struct btrfs_key key; 6355 struct btrfs_key found_key; 6356 struct extent_buffer *leaf; 6357 6358 root = info->extent_root; 6359 key.objectid = 0; 6360 key.offset = 0; 6361 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 6362 path = btrfs_alloc_path(); 6363 if (!path) 6364 return -ENOMEM; 6365 6366 while (1) { 6367 ret = find_first_block_group(root, path, &key); 6368 if (ret > 0) { 6369 ret = 0; 6370 goto error; 6371 } 6372 if (ret != 0) 6373 goto error; 6374 6375 leaf = path->nodes[0]; 6376 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 6377 cache = kzalloc(sizeof(*cache), GFP_NOFS); 6378 if (!cache) { 6379 ret = -ENOMEM; 6380 break; 6381 } 6382 6383 atomic_set(&cache->count, 1); 6384 spin_lock_init(&cache->lock); 6385 mutex_init(&cache->alloc_mutex); 6386 mutex_init(&cache->cache_mutex); 6387 INIT_LIST_HEAD(&cache->list); 6388 read_extent_buffer(leaf, &cache->item, 6389 btrfs_item_ptr_offset(leaf, path->slots[0]), 6390 sizeof(cache->item)); 6391 memcpy(&cache->key, &found_key, sizeof(found_key)); 6392 6393 key.objectid = found_key.objectid + found_key.offset; 6394 btrfs_release_path(root, path); 6395 cache->flags = btrfs_block_group_flags(&cache->item); 6396 6397 ret = update_space_info(info, cache->flags, found_key.offset, 6398 btrfs_block_group_used(&cache->item), 6399 &space_info); 6400 BUG_ON(ret); 6401 cache->space_info = space_info; 6402 down_write(&space_info->groups_sem); 6403 list_add_tail(&cache->list, &space_info->block_groups); 6404 up_write(&space_info->groups_sem); 6405 6406 ret = btrfs_add_block_group_cache(root->fs_info, cache); 6407 BUG_ON(ret); 6408 6409 set_avail_alloc_bits(root->fs_info, cache->flags); 6410 if (btrfs_chunk_readonly(root, cache->key.objectid)) 6411 set_block_group_readonly(cache); 6412 } 6413 ret = 0; 6414 error: 6415 btrfs_free_path(path); 6416 return ret; 6417 } 6418 6419 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 6420 struct btrfs_root *root, u64 bytes_used, 6421 u64 type, u64 chunk_objectid, u64 chunk_offset, 6422 u64 size) 6423 { 6424 int ret; 6425 struct btrfs_root *extent_root; 6426 struct btrfs_block_group_cache *cache; 6427 6428 extent_root = root->fs_info->extent_root; 6429 6430 root->fs_info->last_trans_new_blockgroup = trans->transid; 6431 6432 cache = kzalloc(sizeof(*cache), GFP_NOFS); 6433 if (!cache) 6434 return -ENOMEM; 6435 6436 cache->key.objectid = chunk_offset; 6437 cache->key.offset = size; 6438 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 6439 atomic_set(&cache->count, 1); 6440 spin_lock_init(&cache->lock); 6441 mutex_init(&cache->alloc_mutex); 6442 mutex_init(&cache->cache_mutex); 6443 INIT_LIST_HEAD(&cache->list); 6444 6445 btrfs_set_block_group_used(&cache->item, bytes_used); 6446 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 6447 cache->flags = type; 6448 btrfs_set_block_group_flags(&cache->item, type); 6449 6450 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 6451 &cache->space_info); 6452 BUG_ON(ret); 6453 down_write(&cache->space_info->groups_sem); 6454 list_add_tail(&cache->list, &cache->space_info->block_groups); 6455 up_write(&cache->space_info->groups_sem); 6456 6457 ret = btrfs_add_block_group_cache(root->fs_info, cache); 6458 BUG_ON(ret); 6459 6460 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 6461 sizeof(cache->item)); 6462 BUG_ON(ret); 6463 6464 finish_current_insert(trans, extent_root, 0); 6465 ret = del_pending_extents(trans, extent_root, 0); 6466 BUG_ON(ret); 6467 set_avail_alloc_bits(extent_root->fs_info, type); 6468 6469 return 0; 6470 } 6471 6472 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 6473 struct btrfs_root *root, u64 group_start) 6474 { 6475 struct btrfs_path *path; 6476 struct btrfs_block_group_cache *block_group; 6477 struct btrfs_key key; 6478 int ret; 6479 6480 root = root->fs_info->extent_root; 6481 6482 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 6483 BUG_ON(!block_group); 6484 BUG_ON(!block_group->ro); 6485 6486 memcpy(&key, &block_group->key, sizeof(key)); 6487 6488 path = btrfs_alloc_path(); 6489 BUG_ON(!path); 6490 6491 spin_lock(&root->fs_info->block_group_cache_lock); 6492 rb_erase(&block_group->cache_node, 6493 &root->fs_info->block_group_cache_tree); 6494 spin_unlock(&root->fs_info->block_group_cache_lock); 6495 btrfs_remove_free_space_cache(block_group); 6496 down_write(&block_group->space_info->groups_sem); 6497 list_del(&block_group->list); 6498 up_write(&block_group->space_info->groups_sem); 6499 6500 spin_lock(&block_group->space_info->lock); 6501 block_group->space_info->total_bytes -= block_group->key.offset; 6502 block_group->space_info->bytes_readonly -= block_group->key.offset; 6503 spin_unlock(&block_group->space_info->lock); 6504 block_group->space_info->full = 0; 6505 6506 put_block_group(block_group); 6507 put_block_group(block_group); 6508 6509 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 6510 if (ret > 0) 6511 ret = -EIO; 6512 if (ret < 0) 6513 goto out; 6514 6515 ret = btrfs_del_item(trans, root, path); 6516 out: 6517 btrfs_free_path(path); 6518 return ret; 6519 } 6520