1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 #include <linux/sched.h> 19 #include <linux/pagemap.h> 20 #include <linux/writeback.h> 21 #include <linux/blkdev.h> 22 #include <linux/sort.h> 23 #include <linux/rcupdate.h> 24 #include <linux/kthread.h> 25 #include <linux/slab.h> 26 #include "compat.h" 27 #include "hash.h" 28 #include "ctree.h" 29 #include "disk-io.h" 30 #include "print-tree.h" 31 #include "transaction.h" 32 #include "volumes.h" 33 #include "locking.h" 34 #include "free-space-cache.h" 35 36 /* control flags for do_chunk_alloc's force field 37 * CHUNK_ALLOC_NO_FORCE means to only allocate a chunk 38 * if we really need one. 39 * 40 * CHUNK_ALLOC_FORCE means it must try to allocate one 41 * 42 * CHUNK_ALLOC_LIMITED means to only try and allocate one 43 * if we have very few chunks already allocated. This is 44 * used as part of the clustering code to help make sure 45 * we have a good pool of storage to cluster in, without 46 * filling the FS with empty chunks 47 * 48 */ 49 enum { 50 CHUNK_ALLOC_NO_FORCE = 0, 51 CHUNK_ALLOC_FORCE = 1, 52 CHUNK_ALLOC_LIMITED = 2, 53 }; 54 55 static int update_block_group(struct btrfs_trans_handle *trans, 56 struct btrfs_root *root, 57 u64 bytenr, u64 num_bytes, int alloc); 58 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 59 struct btrfs_root *root, 60 u64 bytenr, u64 num_bytes, u64 parent, 61 u64 root_objectid, u64 owner_objectid, 62 u64 owner_offset, int refs_to_drop, 63 struct btrfs_delayed_extent_op *extra_op); 64 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 65 struct extent_buffer *leaf, 66 struct btrfs_extent_item *ei); 67 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 68 struct btrfs_root *root, 69 u64 parent, u64 root_objectid, 70 u64 flags, u64 owner, u64 offset, 71 struct btrfs_key *ins, int ref_mod); 72 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 73 struct btrfs_root *root, 74 u64 parent, u64 root_objectid, 75 u64 flags, struct btrfs_disk_key *key, 76 int level, struct btrfs_key *ins); 77 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 78 struct btrfs_root *extent_root, u64 alloc_bytes, 79 u64 flags, int force); 80 static int find_next_key(struct btrfs_path *path, int level, 81 struct btrfs_key *key); 82 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 83 int dump_block_groups); 84 85 static noinline int 86 block_group_cache_done(struct btrfs_block_group_cache *cache) 87 { 88 smp_mb(); 89 return cache->cached == BTRFS_CACHE_FINISHED; 90 } 91 92 static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) 93 { 94 return (cache->flags & bits) == bits; 95 } 96 97 static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) 98 { 99 atomic_inc(&cache->count); 100 } 101 102 void btrfs_put_block_group(struct btrfs_block_group_cache *cache) 103 { 104 if (atomic_dec_and_test(&cache->count)) { 105 WARN_ON(cache->pinned > 0); 106 WARN_ON(cache->reserved > 0); 107 WARN_ON(cache->reserved_pinned > 0); 108 kfree(cache->free_space_ctl); 109 kfree(cache); 110 } 111 } 112 113 /* 114 * this adds the block group to the fs_info rb tree for the block group 115 * cache 116 */ 117 static int btrfs_add_block_group_cache(struct btrfs_fs_info *info, 118 struct btrfs_block_group_cache *block_group) 119 { 120 struct rb_node **p; 121 struct rb_node *parent = NULL; 122 struct btrfs_block_group_cache *cache; 123 124 spin_lock(&info->block_group_cache_lock); 125 p = &info->block_group_cache_tree.rb_node; 126 127 while (*p) { 128 parent = *p; 129 cache = rb_entry(parent, struct btrfs_block_group_cache, 130 cache_node); 131 if (block_group->key.objectid < cache->key.objectid) { 132 p = &(*p)->rb_left; 133 } else if (block_group->key.objectid > cache->key.objectid) { 134 p = &(*p)->rb_right; 135 } else { 136 spin_unlock(&info->block_group_cache_lock); 137 return -EEXIST; 138 } 139 } 140 141 rb_link_node(&block_group->cache_node, parent, p); 142 rb_insert_color(&block_group->cache_node, 143 &info->block_group_cache_tree); 144 spin_unlock(&info->block_group_cache_lock); 145 146 return 0; 147 } 148 149 /* 150 * This will return the block group at or after bytenr if contains is 0, else 151 * it will return the block group that contains the bytenr 152 */ 153 static struct btrfs_block_group_cache * 154 block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr, 155 int contains) 156 { 157 struct btrfs_block_group_cache *cache, *ret = NULL; 158 struct rb_node *n; 159 u64 end, start; 160 161 spin_lock(&info->block_group_cache_lock); 162 n = info->block_group_cache_tree.rb_node; 163 164 while (n) { 165 cache = rb_entry(n, struct btrfs_block_group_cache, 166 cache_node); 167 end = cache->key.objectid + cache->key.offset - 1; 168 start = cache->key.objectid; 169 170 if (bytenr < start) { 171 if (!contains && (!ret || start < ret->key.objectid)) 172 ret = cache; 173 n = n->rb_left; 174 } else if (bytenr > start) { 175 if (contains && bytenr <= end) { 176 ret = cache; 177 break; 178 } 179 n = n->rb_right; 180 } else { 181 ret = cache; 182 break; 183 } 184 } 185 if (ret) 186 btrfs_get_block_group(ret); 187 spin_unlock(&info->block_group_cache_lock); 188 189 return ret; 190 } 191 192 static int add_excluded_extent(struct btrfs_root *root, 193 u64 start, u64 num_bytes) 194 { 195 u64 end = start + num_bytes - 1; 196 set_extent_bits(&root->fs_info->freed_extents[0], 197 start, end, EXTENT_UPTODATE, GFP_NOFS); 198 set_extent_bits(&root->fs_info->freed_extents[1], 199 start, end, EXTENT_UPTODATE, GFP_NOFS); 200 return 0; 201 } 202 203 static void free_excluded_extents(struct btrfs_root *root, 204 struct btrfs_block_group_cache *cache) 205 { 206 u64 start, end; 207 208 start = cache->key.objectid; 209 end = start + cache->key.offset - 1; 210 211 clear_extent_bits(&root->fs_info->freed_extents[0], 212 start, end, EXTENT_UPTODATE, GFP_NOFS); 213 clear_extent_bits(&root->fs_info->freed_extents[1], 214 start, end, EXTENT_UPTODATE, GFP_NOFS); 215 } 216 217 static int exclude_super_stripes(struct btrfs_root *root, 218 struct btrfs_block_group_cache *cache) 219 { 220 u64 bytenr; 221 u64 *logical; 222 int stripe_len; 223 int i, nr, ret; 224 225 if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) { 226 stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid; 227 cache->bytes_super += stripe_len; 228 ret = add_excluded_extent(root, cache->key.objectid, 229 stripe_len); 230 BUG_ON(ret); 231 } 232 233 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { 234 bytenr = btrfs_sb_offset(i); 235 ret = btrfs_rmap_block(&root->fs_info->mapping_tree, 236 cache->key.objectid, bytenr, 237 0, &logical, &nr, &stripe_len); 238 BUG_ON(ret); 239 240 while (nr--) { 241 cache->bytes_super += stripe_len; 242 ret = add_excluded_extent(root, logical[nr], 243 stripe_len); 244 BUG_ON(ret); 245 } 246 247 kfree(logical); 248 } 249 return 0; 250 } 251 252 static struct btrfs_caching_control * 253 get_caching_control(struct btrfs_block_group_cache *cache) 254 { 255 struct btrfs_caching_control *ctl; 256 257 spin_lock(&cache->lock); 258 if (cache->cached != BTRFS_CACHE_STARTED) { 259 spin_unlock(&cache->lock); 260 return NULL; 261 } 262 263 /* We're loading it the fast way, so we don't have a caching_ctl. */ 264 if (!cache->caching_ctl) { 265 spin_unlock(&cache->lock); 266 return NULL; 267 } 268 269 ctl = cache->caching_ctl; 270 atomic_inc(&ctl->count); 271 spin_unlock(&cache->lock); 272 return ctl; 273 } 274 275 static void put_caching_control(struct btrfs_caching_control *ctl) 276 { 277 if (atomic_dec_and_test(&ctl->count)) 278 kfree(ctl); 279 } 280 281 /* 282 * this is only called by cache_block_group, since we could have freed extents 283 * we need to check the pinned_extents for any extents that can't be used yet 284 * since their free space will be released as soon as the transaction commits. 285 */ 286 static u64 add_new_free_space(struct btrfs_block_group_cache *block_group, 287 struct btrfs_fs_info *info, u64 start, u64 end) 288 { 289 u64 extent_start, extent_end, size, total_added = 0; 290 int ret; 291 292 while (start < end) { 293 ret = find_first_extent_bit(info->pinned_extents, start, 294 &extent_start, &extent_end, 295 EXTENT_DIRTY | EXTENT_UPTODATE); 296 if (ret) 297 break; 298 299 if (extent_start <= start) { 300 start = extent_end + 1; 301 } else if (extent_start > start && extent_start < end) { 302 size = extent_start - start; 303 total_added += size; 304 ret = btrfs_add_free_space(block_group, start, 305 size); 306 BUG_ON(ret); 307 start = extent_end + 1; 308 } else { 309 break; 310 } 311 } 312 313 if (start < end) { 314 size = end - start; 315 total_added += size; 316 ret = btrfs_add_free_space(block_group, start, size); 317 BUG_ON(ret); 318 } 319 320 return total_added; 321 } 322 323 static noinline void caching_thread(struct btrfs_work *work) 324 { 325 struct btrfs_block_group_cache *block_group; 326 struct btrfs_fs_info *fs_info; 327 struct btrfs_caching_control *caching_ctl; 328 struct btrfs_root *extent_root; 329 struct btrfs_path *path; 330 struct extent_buffer *leaf; 331 struct btrfs_key key; 332 u64 total_found = 0; 333 u64 last = 0; 334 u32 nritems; 335 int ret = 0; 336 337 caching_ctl = container_of(work, struct btrfs_caching_control, work); 338 block_group = caching_ctl->block_group; 339 fs_info = block_group->fs_info; 340 extent_root = fs_info->extent_root; 341 342 path = btrfs_alloc_path(); 343 if (!path) 344 goto out; 345 346 last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); 347 348 /* 349 * We don't want to deadlock with somebody trying to allocate a new 350 * extent for the extent root while also trying to search the extent 351 * root to add free space. So we skip locking and search the commit 352 * root, since its read-only 353 */ 354 path->skip_locking = 1; 355 path->search_commit_root = 1; 356 path->reada = 1; 357 358 key.objectid = last; 359 key.offset = 0; 360 key.type = BTRFS_EXTENT_ITEM_KEY; 361 again: 362 mutex_lock(&caching_ctl->mutex); 363 /* need to make sure the commit_root doesn't disappear */ 364 down_read(&fs_info->extent_commit_sem); 365 366 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 367 if (ret < 0) 368 goto err; 369 370 leaf = path->nodes[0]; 371 nritems = btrfs_header_nritems(leaf); 372 373 while (1) { 374 if (btrfs_fs_closing(fs_info) > 1) { 375 last = (u64)-1; 376 break; 377 } 378 379 if (path->slots[0] < nritems) { 380 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 381 } else { 382 ret = find_next_key(path, 0, &key); 383 if (ret) 384 break; 385 386 if (need_resched() || 387 btrfs_next_leaf(extent_root, path)) { 388 caching_ctl->progress = last; 389 btrfs_release_path(path); 390 up_read(&fs_info->extent_commit_sem); 391 mutex_unlock(&caching_ctl->mutex); 392 cond_resched(); 393 goto again; 394 } 395 leaf = path->nodes[0]; 396 nritems = btrfs_header_nritems(leaf); 397 continue; 398 } 399 400 if (key.objectid < block_group->key.objectid) { 401 path->slots[0]++; 402 continue; 403 } 404 405 if (key.objectid >= block_group->key.objectid + 406 block_group->key.offset) 407 break; 408 409 if (key.type == BTRFS_EXTENT_ITEM_KEY) { 410 total_found += add_new_free_space(block_group, 411 fs_info, last, 412 key.objectid); 413 last = key.objectid + key.offset; 414 415 if (total_found > (1024 * 1024 * 2)) { 416 total_found = 0; 417 wake_up(&caching_ctl->wait); 418 } 419 } 420 path->slots[0]++; 421 } 422 ret = 0; 423 424 total_found += add_new_free_space(block_group, fs_info, last, 425 block_group->key.objectid + 426 block_group->key.offset); 427 caching_ctl->progress = (u64)-1; 428 429 spin_lock(&block_group->lock); 430 block_group->caching_ctl = NULL; 431 block_group->cached = BTRFS_CACHE_FINISHED; 432 spin_unlock(&block_group->lock); 433 434 err: 435 btrfs_free_path(path); 436 up_read(&fs_info->extent_commit_sem); 437 438 free_excluded_extents(extent_root, block_group); 439 440 mutex_unlock(&caching_ctl->mutex); 441 out: 442 wake_up(&caching_ctl->wait); 443 444 put_caching_control(caching_ctl); 445 btrfs_put_block_group(block_group); 446 } 447 448 static int cache_block_group(struct btrfs_block_group_cache *cache, 449 struct btrfs_trans_handle *trans, 450 struct btrfs_root *root, 451 int load_cache_only) 452 { 453 struct btrfs_fs_info *fs_info = cache->fs_info; 454 struct btrfs_caching_control *caching_ctl; 455 int ret = 0; 456 457 smp_mb(); 458 if (cache->cached != BTRFS_CACHE_NO) 459 return 0; 460 461 /* 462 * We can't do the read from on-disk cache during a commit since we need 463 * to have the normal tree locking. Also if we are currently trying to 464 * allocate blocks for the tree root we can't do the fast caching since 465 * we likely hold important locks. 466 */ 467 if (trans && (!trans->transaction->in_commit) && 468 (root && root != root->fs_info->tree_root)) { 469 spin_lock(&cache->lock); 470 if (cache->cached != BTRFS_CACHE_NO) { 471 spin_unlock(&cache->lock); 472 return 0; 473 } 474 cache->cached = BTRFS_CACHE_STARTED; 475 spin_unlock(&cache->lock); 476 477 ret = load_free_space_cache(fs_info, cache); 478 479 spin_lock(&cache->lock); 480 if (ret == 1) { 481 cache->cached = BTRFS_CACHE_FINISHED; 482 cache->last_byte_to_unpin = (u64)-1; 483 } else { 484 cache->cached = BTRFS_CACHE_NO; 485 } 486 spin_unlock(&cache->lock); 487 if (ret == 1) { 488 free_excluded_extents(fs_info->extent_root, cache); 489 return 0; 490 } 491 } 492 493 if (load_cache_only) 494 return 0; 495 496 caching_ctl = kzalloc(sizeof(*caching_ctl), GFP_NOFS); 497 BUG_ON(!caching_ctl); 498 499 INIT_LIST_HEAD(&caching_ctl->list); 500 mutex_init(&caching_ctl->mutex); 501 init_waitqueue_head(&caching_ctl->wait); 502 caching_ctl->block_group = cache; 503 caching_ctl->progress = cache->key.objectid; 504 /* one for caching kthread, one for caching block group list */ 505 atomic_set(&caching_ctl->count, 2); 506 caching_ctl->work.func = caching_thread; 507 508 spin_lock(&cache->lock); 509 if (cache->cached != BTRFS_CACHE_NO) { 510 spin_unlock(&cache->lock); 511 kfree(caching_ctl); 512 return 0; 513 } 514 cache->caching_ctl = caching_ctl; 515 cache->cached = BTRFS_CACHE_STARTED; 516 spin_unlock(&cache->lock); 517 518 down_write(&fs_info->extent_commit_sem); 519 list_add_tail(&caching_ctl->list, &fs_info->caching_block_groups); 520 up_write(&fs_info->extent_commit_sem); 521 522 btrfs_get_block_group(cache); 523 524 btrfs_queue_worker(&fs_info->caching_workers, &caching_ctl->work); 525 526 return ret; 527 } 528 529 /* 530 * return the block group that starts at or after bytenr 531 */ 532 static struct btrfs_block_group_cache * 533 btrfs_lookup_first_block_group(struct btrfs_fs_info *info, u64 bytenr) 534 { 535 struct btrfs_block_group_cache *cache; 536 537 cache = block_group_cache_tree_search(info, bytenr, 0); 538 539 return cache; 540 } 541 542 /* 543 * return the block group that contains the given bytenr 544 */ 545 struct btrfs_block_group_cache *btrfs_lookup_block_group( 546 struct btrfs_fs_info *info, 547 u64 bytenr) 548 { 549 struct btrfs_block_group_cache *cache; 550 551 cache = block_group_cache_tree_search(info, bytenr, 1); 552 553 return cache; 554 } 555 556 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, 557 u64 flags) 558 { 559 struct list_head *head = &info->space_info; 560 struct btrfs_space_info *found; 561 562 flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | 563 BTRFS_BLOCK_GROUP_METADATA; 564 565 rcu_read_lock(); 566 list_for_each_entry_rcu(found, head, list) { 567 if (found->flags & flags) { 568 rcu_read_unlock(); 569 return found; 570 } 571 } 572 rcu_read_unlock(); 573 return NULL; 574 } 575 576 /* 577 * after adding space to the filesystem, we need to clear the full flags 578 * on all the space infos. 579 */ 580 void btrfs_clear_space_info_full(struct btrfs_fs_info *info) 581 { 582 struct list_head *head = &info->space_info; 583 struct btrfs_space_info *found; 584 585 rcu_read_lock(); 586 list_for_each_entry_rcu(found, head, list) 587 found->full = 0; 588 rcu_read_unlock(); 589 } 590 591 static u64 div_factor(u64 num, int factor) 592 { 593 if (factor == 10) 594 return num; 595 num *= factor; 596 do_div(num, 10); 597 return num; 598 } 599 600 static u64 div_factor_fine(u64 num, int factor) 601 { 602 if (factor == 100) 603 return num; 604 num *= factor; 605 do_div(num, 100); 606 return num; 607 } 608 609 u64 btrfs_find_block_group(struct btrfs_root *root, 610 u64 search_start, u64 search_hint, int owner) 611 { 612 struct btrfs_block_group_cache *cache; 613 u64 used; 614 u64 last = max(search_hint, search_start); 615 u64 group_start = 0; 616 int full_search = 0; 617 int factor = 9; 618 int wrapped = 0; 619 again: 620 while (1) { 621 cache = btrfs_lookup_first_block_group(root->fs_info, last); 622 if (!cache) 623 break; 624 625 spin_lock(&cache->lock); 626 last = cache->key.objectid + cache->key.offset; 627 used = btrfs_block_group_used(&cache->item); 628 629 if ((full_search || !cache->ro) && 630 block_group_bits(cache, BTRFS_BLOCK_GROUP_METADATA)) { 631 if (used + cache->pinned + cache->reserved < 632 div_factor(cache->key.offset, factor)) { 633 group_start = cache->key.objectid; 634 spin_unlock(&cache->lock); 635 btrfs_put_block_group(cache); 636 goto found; 637 } 638 } 639 spin_unlock(&cache->lock); 640 btrfs_put_block_group(cache); 641 cond_resched(); 642 } 643 if (!wrapped) { 644 last = search_start; 645 wrapped = 1; 646 goto again; 647 } 648 if (!full_search && factor < 10) { 649 last = search_start; 650 full_search = 1; 651 factor = 10; 652 goto again; 653 } 654 found: 655 return group_start; 656 } 657 658 /* simple helper to search for an existing extent at a given offset */ 659 int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) 660 { 661 int ret; 662 struct btrfs_key key; 663 struct btrfs_path *path; 664 665 path = btrfs_alloc_path(); 666 if (!path) 667 return -ENOMEM; 668 669 key.objectid = start; 670 key.offset = len; 671 btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); 672 ret = btrfs_search_slot(NULL, root->fs_info->extent_root, &key, path, 673 0, 0); 674 btrfs_free_path(path); 675 return ret; 676 } 677 678 /* 679 * helper function to lookup reference count and flags of extent. 680 * 681 * the head node for delayed ref is used to store the sum of all the 682 * reference count modifications queued up in the rbtree. the head 683 * node may also store the extent flags to set. This way you can check 684 * to see what the reference count and extent flags would be if all of 685 * the delayed refs are not processed. 686 */ 687 int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, 688 struct btrfs_root *root, u64 bytenr, 689 u64 num_bytes, u64 *refs, u64 *flags) 690 { 691 struct btrfs_delayed_ref_head *head; 692 struct btrfs_delayed_ref_root *delayed_refs; 693 struct btrfs_path *path; 694 struct btrfs_extent_item *ei; 695 struct extent_buffer *leaf; 696 struct btrfs_key key; 697 u32 item_size; 698 u64 num_refs; 699 u64 extent_flags; 700 int ret; 701 702 path = btrfs_alloc_path(); 703 if (!path) 704 return -ENOMEM; 705 706 key.objectid = bytenr; 707 key.type = BTRFS_EXTENT_ITEM_KEY; 708 key.offset = num_bytes; 709 if (!trans) { 710 path->skip_locking = 1; 711 path->search_commit_root = 1; 712 } 713 again: 714 ret = btrfs_search_slot(trans, root->fs_info->extent_root, 715 &key, path, 0, 0); 716 if (ret < 0) 717 goto out_free; 718 719 if (ret == 0) { 720 leaf = path->nodes[0]; 721 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 722 if (item_size >= sizeof(*ei)) { 723 ei = btrfs_item_ptr(leaf, path->slots[0], 724 struct btrfs_extent_item); 725 num_refs = btrfs_extent_refs(leaf, ei); 726 extent_flags = btrfs_extent_flags(leaf, ei); 727 } else { 728 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 729 struct btrfs_extent_item_v0 *ei0; 730 BUG_ON(item_size != sizeof(*ei0)); 731 ei0 = btrfs_item_ptr(leaf, path->slots[0], 732 struct btrfs_extent_item_v0); 733 num_refs = btrfs_extent_refs_v0(leaf, ei0); 734 /* FIXME: this isn't correct for data */ 735 extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; 736 #else 737 BUG(); 738 #endif 739 } 740 BUG_ON(num_refs == 0); 741 } else { 742 num_refs = 0; 743 extent_flags = 0; 744 ret = 0; 745 } 746 747 if (!trans) 748 goto out; 749 750 delayed_refs = &trans->transaction->delayed_refs; 751 spin_lock(&delayed_refs->lock); 752 head = btrfs_find_delayed_ref_head(trans, bytenr); 753 if (head) { 754 if (!mutex_trylock(&head->mutex)) { 755 atomic_inc(&head->node.refs); 756 spin_unlock(&delayed_refs->lock); 757 758 btrfs_release_path(path); 759 760 /* 761 * Mutex was contended, block until it's released and try 762 * again 763 */ 764 mutex_lock(&head->mutex); 765 mutex_unlock(&head->mutex); 766 btrfs_put_delayed_ref(&head->node); 767 goto again; 768 } 769 if (head->extent_op && head->extent_op->update_flags) 770 extent_flags |= head->extent_op->flags_to_set; 771 else 772 BUG_ON(num_refs == 0); 773 774 num_refs += head->node.ref_mod; 775 mutex_unlock(&head->mutex); 776 } 777 spin_unlock(&delayed_refs->lock); 778 out: 779 WARN_ON(num_refs == 0); 780 if (refs) 781 *refs = num_refs; 782 if (flags) 783 *flags = extent_flags; 784 out_free: 785 btrfs_free_path(path); 786 return ret; 787 } 788 789 /* 790 * Back reference rules. Back refs have three main goals: 791 * 792 * 1) differentiate between all holders of references to an extent so that 793 * when a reference is dropped we can make sure it was a valid reference 794 * before freeing the extent. 795 * 796 * 2) Provide enough information to quickly find the holders of an extent 797 * if we notice a given block is corrupted or bad. 798 * 799 * 3) Make it easy to migrate blocks for FS shrinking or storage pool 800 * maintenance. This is actually the same as #2, but with a slightly 801 * different use case. 802 * 803 * There are two kinds of back refs. The implicit back refs is optimized 804 * for pointers in non-shared tree blocks. For a given pointer in a block, 805 * back refs of this kind provide information about the block's owner tree 806 * and the pointer's key. These information allow us to find the block by 807 * b-tree searching. The full back refs is for pointers in tree blocks not 808 * referenced by their owner trees. The location of tree block is recorded 809 * in the back refs. Actually the full back refs is generic, and can be 810 * used in all cases the implicit back refs is used. The major shortcoming 811 * of the full back refs is its overhead. Every time a tree block gets 812 * COWed, we have to update back refs entry for all pointers in it. 813 * 814 * For a newly allocated tree block, we use implicit back refs for 815 * pointers in it. This means most tree related operations only involve 816 * implicit back refs. For a tree block created in old transaction, the 817 * only way to drop a reference to it is COW it. So we can detect the 818 * event that tree block loses its owner tree's reference and do the 819 * back refs conversion. 820 * 821 * When a tree block is COW'd through a tree, there are four cases: 822 * 823 * The reference count of the block is one and the tree is the block's 824 * owner tree. Nothing to do in this case. 825 * 826 * The reference count of the block is one and the tree is not the 827 * block's owner tree. In this case, full back refs is used for pointers 828 * in the block. Remove these full back refs, add implicit back refs for 829 * every pointers in the new block. 830 * 831 * The reference count of the block is greater than one and the tree is 832 * the block's owner tree. In this case, implicit back refs is used for 833 * pointers in the block. Add full back refs for every pointers in the 834 * block, increase lower level extents' reference counts. The original 835 * implicit back refs are entailed to the new block. 836 * 837 * The reference count of the block is greater than one and the tree is 838 * not the block's owner tree. Add implicit back refs for every pointer in 839 * the new block, increase lower level extents' reference count. 840 * 841 * Back Reference Key composing: 842 * 843 * The key objectid corresponds to the first byte in the extent, 844 * The key type is used to differentiate between types of back refs. 845 * There are different meanings of the key offset for different types 846 * of back refs. 847 * 848 * File extents can be referenced by: 849 * 850 * - multiple snapshots, subvolumes, or different generations in one subvol 851 * - different files inside a single subvolume 852 * - different offsets inside a file (bookend extents in file.c) 853 * 854 * The extent ref structure for the implicit back refs has fields for: 855 * 856 * - Objectid of the subvolume root 857 * - objectid of the file holding the reference 858 * - original offset in the file 859 * - how many bookend extents 860 * 861 * The key offset for the implicit back refs is hash of the first 862 * three fields. 863 * 864 * The extent ref structure for the full back refs has field for: 865 * 866 * - number of pointers in the tree leaf 867 * 868 * The key offset for the implicit back refs is the first byte of 869 * the tree leaf 870 * 871 * When a file extent is allocated, The implicit back refs is used. 872 * the fields are filled in: 873 * 874 * (root_key.objectid, inode objectid, offset in file, 1) 875 * 876 * When a file extent is removed file truncation, we find the 877 * corresponding implicit back refs and check the following fields: 878 * 879 * (btrfs_header_owner(leaf), inode objectid, offset in file) 880 * 881 * Btree extents can be referenced by: 882 * 883 * - Different subvolumes 884 * 885 * Both the implicit back refs and the full back refs for tree blocks 886 * only consist of key. The key offset for the implicit back refs is 887 * objectid of block's owner tree. The key offset for the full back refs 888 * is the first byte of parent block. 889 * 890 * When implicit back refs is used, information about the lowest key and 891 * level of the tree block are required. These information are stored in 892 * tree block info structure. 893 */ 894 895 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 896 static int convert_extent_item_v0(struct btrfs_trans_handle *trans, 897 struct btrfs_root *root, 898 struct btrfs_path *path, 899 u64 owner, u32 extra_size) 900 { 901 struct btrfs_extent_item *item; 902 struct btrfs_extent_item_v0 *ei0; 903 struct btrfs_extent_ref_v0 *ref0; 904 struct btrfs_tree_block_info *bi; 905 struct extent_buffer *leaf; 906 struct btrfs_key key; 907 struct btrfs_key found_key; 908 u32 new_size = sizeof(*item); 909 u64 refs; 910 int ret; 911 912 leaf = path->nodes[0]; 913 BUG_ON(btrfs_item_size_nr(leaf, path->slots[0]) != sizeof(*ei0)); 914 915 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 916 ei0 = btrfs_item_ptr(leaf, path->slots[0], 917 struct btrfs_extent_item_v0); 918 refs = btrfs_extent_refs_v0(leaf, ei0); 919 920 if (owner == (u64)-1) { 921 while (1) { 922 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 923 ret = btrfs_next_leaf(root, path); 924 if (ret < 0) 925 return ret; 926 BUG_ON(ret > 0); 927 leaf = path->nodes[0]; 928 } 929 btrfs_item_key_to_cpu(leaf, &found_key, 930 path->slots[0]); 931 BUG_ON(key.objectid != found_key.objectid); 932 if (found_key.type != BTRFS_EXTENT_REF_V0_KEY) { 933 path->slots[0]++; 934 continue; 935 } 936 ref0 = btrfs_item_ptr(leaf, path->slots[0], 937 struct btrfs_extent_ref_v0); 938 owner = btrfs_ref_objectid_v0(leaf, ref0); 939 break; 940 } 941 } 942 btrfs_release_path(path); 943 944 if (owner < BTRFS_FIRST_FREE_OBJECTID) 945 new_size += sizeof(*bi); 946 947 new_size -= sizeof(*ei0); 948 ret = btrfs_search_slot(trans, root, &key, path, 949 new_size + extra_size, 1); 950 if (ret < 0) 951 return ret; 952 BUG_ON(ret); 953 954 ret = btrfs_extend_item(trans, root, path, new_size); 955 956 leaf = path->nodes[0]; 957 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 958 btrfs_set_extent_refs(leaf, item, refs); 959 /* FIXME: get real generation */ 960 btrfs_set_extent_generation(leaf, item, 0); 961 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 962 btrfs_set_extent_flags(leaf, item, 963 BTRFS_EXTENT_FLAG_TREE_BLOCK | 964 BTRFS_BLOCK_FLAG_FULL_BACKREF); 965 bi = (struct btrfs_tree_block_info *)(item + 1); 966 /* FIXME: get first key of the block */ 967 memset_extent_buffer(leaf, 0, (unsigned long)bi, sizeof(*bi)); 968 btrfs_set_tree_block_level(leaf, bi, (int)owner); 969 } else { 970 btrfs_set_extent_flags(leaf, item, BTRFS_EXTENT_FLAG_DATA); 971 } 972 btrfs_mark_buffer_dirty(leaf); 973 return 0; 974 } 975 #endif 976 977 static u64 hash_extent_data_ref(u64 root_objectid, u64 owner, u64 offset) 978 { 979 u32 high_crc = ~(u32)0; 980 u32 low_crc = ~(u32)0; 981 __le64 lenum; 982 983 lenum = cpu_to_le64(root_objectid); 984 high_crc = crc32c(high_crc, &lenum, sizeof(lenum)); 985 lenum = cpu_to_le64(owner); 986 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 987 lenum = cpu_to_le64(offset); 988 low_crc = crc32c(low_crc, &lenum, sizeof(lenum)); 989 990 return ((u64)high_crc << 31) ^ (u64)low_crc; 991 } 992 993 static u64 hash_extent_data_ref_item(struct extent_buffer *leaf, 994 struct btrfs_extent_data_ref *ref) 995 { 996 return hash_extent_data_ref(btrfs_extent_data_ref_root(leaf, ref), 997 btrfs_extent_data_ref_objectid(leaf, ref), 998 btrfs_extent_data_ref_offset(leaf, ref)); 999 } 1000 1001 static int match_extent_data_ref(struct extent_buffer *leaf, 1002 struct btrfs_extent_data_ref *ref, 1003 u64 root_objectid, u64 owner, u64 offset) 1004 { 1005 if (btrfs_extent_data_ref_root(leaf, ref) != root_objectid || 1006 btrfs_extent_data_ref_objectid(leaf, ref) != owner || 1007 btrfs_extent_data_ref_offset(leaf, ref) != offset) 1008 return 0; 1009 return 1; 1010 } 1011 1012 static noinline int lookup_extent_data_ref(struct btrfs_trans_handle *trans, 1013 struct btrfs_root *root, 1014 struct btrfs_path *path, 1015 u64 bytenr, u64 parent, 1016 u64 root_objectid, 1017 u64 owner, u64 offset) 1018 { 1019 struct btrfs_key key; 1020 struct btrfs_extent_data_ref *ref; 1021 struct extent_buffer *leaf; 1022 u32 nritems; 1023 int ret; 1024 int recow; 1025 int err = -ENOENT; 1026 1027 key.objectid = bytenr; 1028 if (parent) { 1029 key.type = BTRFS_SHARED_DATA_REF_KEY; 1030 key.offset = parent; 1031 } else { 1032 key.type = BTRFS_EXTENT_DATA_REF_KEY; 1033 key.offset = hash_extent_data_ref(root_objectid, 1034 owner, offset); 1035 } 1036 again: 1037 recow = 0; 1038 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1039 if (ret < 0) { 1040 err = ret; 1041 goto fail; 1042 } 1043 1044 if (parent) { 1045 if (!ret) 1046 return 0; 1047 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1048 key.type = BTRFS_EXTENT_REF_V0_KEY; 1049 btrfs_release_path(path); 1050 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1051 if (ret < 0) { 1052 err = ret; 1053 goto fail; 1054 } 1055 if (!ret) 1056 return 0; 1057 #endif 1058 goto fail; 1059 } 1060 1061 leaf = path->nodes[0]; 1062 nritems = btrfs_header_nritems(leaf); 1063 while (1) { 1064 if (path->slots[0] >= nritems) { 1065 ret = btrfs_next_leaf(root, path); 1066 if (ret < 0) 1067 err = ret; 1068 if (ret) 1069 goto fail; 1070 1071 leaf = path->nodes[0]; 1072 nritems = btrfs_header_nritems(leaf); 1073 recow = 1; 1074 } 1075 1076 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1077 if (key.objectid != bytenr || 1078 key.type != BTRFS_EXTENT_DATA_REF_KEY) 1079 goto fail; 1080 1081 ref = btrfs_item_ptr(leaf, path->slots[0], 1082 struct btrfs_extent_data_ref); 1083 1084 if (match_extent_data_ref(leaf, ref, root_objectid, 1085 owner, offset)) { 1086 if (recow) { 1087 btrfs_release_path(path); 1088 goto again; 1089 } 1090 err = 0; 1091 break; 1092 } 1093 path->slots[0]++; 1094 } 1095 fail: 1096 return err; 1097 } 1098 1099 static noinline int insert_extent_data_ref(struct btrfs_trans_handle *trans, 1100 struct btrfs_root *root, 1101 struct btrfs_path *path, 1102 u64 bytenr, u64 parent, 1103 u64 root_objectid, u64 owner, 1104 u64 offset, int refs_to_add) 1105 { 1106 struct btrfs_key key; 1107 struct extent_buffer *leaf; 1108 u32 size; 1109 u32 num_refs; 1110 int ret; 1111 1112 key.objectid = bytenr; 1113 if (parent) { 1114 key.type = BTRFS_SHARED_DATA_REF_KEY; 1115 key.offset = parent; 1116 size = sizeof(struct btrfs_shared_data_ref); 1117 } else { 1118 key.type = BTRFS_EXTENT_DATA_REF_KEY; 1119 key.offset = hash_extent_data_ref(root_objectid, 1120 owner, offset); 1121 size = sizeof(struct btrfs_extent_data_ref); 1122 } 1123 1124 ret = btrfs_insert_empty_item(trans, root, path, &key, size); 1125 if (ret && ret != -EEXIST) 1126 goto fail; 1127 1128 leaf = path->nodes[0]; 1129 if (parent) { 1130 struct btrfs_shared_data_ref *ref; 1131 ref = btrfs_item_ptr(leaf, path->slots[0], 1132 struct btrfs_shared_data_ref); 1133 if (ret == 0) { 1134 btrfs_set_shared_data_ref_count(leaf, ref, refs_to_add); 1135 } else { 1136 num_refs = btrfs_shared_data_ref_count(leaf, ref); 1137 num_refs += refs_to_add; 1138 btrfs_set_shared_data_ref_count(leaf, ref, num_refs); 1139 } 1140 } else { 1141 struct btrfs_extent_data_ref *ref; 1142 while (ret == -EEXIST) { 1143 ref = btrfs_item_ptr(leaf, path->slots[0], 1144 struct btrfs_extent_data_ref); 1145 if (match_extent_data_ref(leaf, ref, root_objectid, 1146 owner, offset)) 1147 break; 1148 btrfs_release_path(path); 1149 key.offset++; 1150 ret = btrfs_insert_empty_item(trans, root, path, &key, 1151 size); 1152 if (ret && ret != -EEXIST) 1153 goto fail; 1154 1155 leaf = path->nodes[0]; 1156 } 1157 ref = btrfs_item_ptr(leaf, path->slots[0], 1158 struct btrfs_extent_data_ref); 1159 if (ret == 0) { 1160 btrfs_set_extent_data_ref_root(leaf, ref, 1161 root_objectid); 1162 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 1163 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 1164 btrfs_set_extent_data_ref_count(leaf, ref, refs_to_add); 1165 } else { 1166 num_refs = btrfs_extent_data_ref_count(leaf, ref); 1167 num_refs += refs_to_add; 1168 btrfs_set_extent_data_ref_count(leaf, ref, num_refs); 1169 } 1170 } 1171 btrfs_mark_buffer_dirty(leaf); 1172 ret = 0; 1173 fail: 1174 btrfs_release_path(path); 1175 return ret; 1176 } 1177 1178 static noinline int remove_extent_data_ref(struct btrfs_trans_handle *trans, 1179 struct btrfs_root *root, 1180 struct btrfs_path *path, 1181 int refs_to_drop) 1182 { 1183 struct btrfs_key key; 1184 struct btrfs_extent_data_ref *ref1 = NULL; 1185 struct btrfs_shared_data_ref *ref2 = NULL; 1186 struct extent_buffer *leaf; 1187 u32 num_refs = 0; 1188 int ret = 0; 1189 1190 leaf = path->nodes[0]; 1191 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1192 1193 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1194 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1195 struct btrfs_extent_data_ref); 1196 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1197 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1198 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1199 struct btrfs_shared_data_ref); 1200 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1201 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1202 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1203 struct btrfs_extent_ref_v0 *ref0; 1204 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1205 struct btrfs_extent_ref_v0); 1206 num_refs = btrfs_ref_count_v0(leaf, ref0); 1207 #endif 1208 } else { 1209 BUG(); 1210 } 1211 1212 BUG_ON(num_refs < refs_to_drop); 1213 num_refs -= refs_to_drop; 1214 1215 if (num_refs == 0) { 1216 ret = btrfs_del_item(trans, root, path); 1217 } else { 1218 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) 1219 btrfs_set_extent_data_ref_count(leaf, ref1, num_refs); 1220 else if (key.type == BTRFS_SHARED_DATA_REF_KEY) 1221 btrfs_set_shared_data_ref_count(leaf, ref2, num_refs); 1222 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1223 else { 1224 struct btrfs_extent_ref_v0 *ref0; 1225 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1226 struct btrfs_extent_ref_v0); 1227 btrfs_set_ref_count_v0(leaf, ref0, num_refs); 1228 } 1229 #endif 1230 btrfs_mark_buffer_dirty(leaf); 1231 } 1232 return ret; 1233 } 1234 1235 static noinline u32 extent_data_ref_count(struct btrfs_root *root, 1236 struct btrfs_path *path, 1237 struct btrfs_extent_inline_ref *iref) 1238 { 1239 struct btrfs_key key; 1240 struct extent_buffer *leaf; 1241 struct btrfs_extent_data_ref *ref1; 1242 struct btrfs_shared_data_ref *ref2; 1243 u32 num_refs = 0; 1244 1245 leaf = path->nodes[0]; 1246 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 1247 if (iref) { 1248 if (btrfs_extent_inline_ref_type(leaf, iref) == 1249 BTRFS_EXTENT_DATA_REF_KEY) { 1250 ref1 = (struct btrfs_extent_data_ref *)(&iref->offset); 1251 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1252 } else { 1253 ref2 = (struct btrfs_shared_data_ref *)(iref + 1); 1254 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1255 } 1256 } else if (key.type == BTRFS_EXTENT_DATA_REF_KEY) { 1257 ref1 = btrfs_item_ptr(leaf, path->slots[0], 1258 struct btrfs_extent_data_ref); 1259 num_refs = btrfs_extent_data_ref_count(leaf, ref1); 1260 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) { 1261 ref2 = btrfs_item_ptr(leaf, path->slots[0], 1262 struct btrfs_shared_data_ref); 1263 num_refs = btrfs_shared_data_ref_count(leaf, ref2); 1264 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1265 } else if (key.type == BTRFS_EXTENT_REF_V0_KEY) { 1266 struct btrfs_extent_ref_v0 *ref0; 1267 ref0 = btrfs_item_ptr(leaf, path->slots[0], 1268 struct btrfs_extent_ref_v0); 1269 num_refs = btrfs_ref_count_v0(leaf, ref0); 1270 #endif 1271 } else { 1272 WARN_ON(1); 1273 } 1274 return num_refs; 1275 } 1276 1277 static noinline int lookup_tree_block_ref(struct btrfs_trans_handle *trans, 1278 struct btrfs_root *root, 1279 struct btrfs_path *path, 1280 u64 bytenr, u64 parent, 1281 u64 root_objectid) 1282 { 1283 struct btrfs_key key; 1284 int ret; 1285 1286 key.objectid = bytenr; 1287 if (parent) { 1288 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1289 key.offset = parent; 1290 } else { 1291 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1292 key.offset = root_objectid; 1293 } 1294 1295 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1296 if (ret > 0) 1297 ret = -ENOENT; 1298 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1299 if (ret == -ENOENT && parent) { 1300 btrfs_release_path(path); 1301 key.type = BTRFS_EXTENT_REF_V0_KEY; 1302 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 1303 if (ret > 0) 1304 ret = -ENOENT; 1305 } 1306 #endif 1307 return ret; 1308 } 1309 1310 static noinline int insert_tree_block_ref(struct btrfs_trans_handle *trans, 1311 struct btrfs_root *root, 1312 struct btrfs_path *path, 1313 u64 bytenr, u64 parent, 1314 u64 root_objectid) 1315 { 1316 struct btrfs_key key; 1317 int ret; 1318 1319 key.objectid = bytenr; 1320 if (parent) { 1321 key.type = BTRFS_SHARED_BLOCK_REF_KEY; 1322 key.offset = parent; 1323 } else { 1324 key.type = BTRFS_TREE_BLOCK_REF_KEY; 1325 key.offset = root_objectid; 1326 } 1327 1328 ret = btrfs_insert_empty_item(trans, root, path, &key, 0); 1329 btrfs_release_path(path); 1330 return ret; 1331 } 1332 1333 static inline int extent_ref_type(u64 parent, u64 owner) 1334 { 1335 int type; 1336 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1337 if (parent > 0) 1338 type = BTRFS_SHARED_BLOCK_REF_KEY; 1339 else 1340 type = BTRFS_TREE_BLOCK_REF_KEY; 1341 } else { 1342 if (parent > 0) 1343 type = BTRFS_SHARED_DATA_REF_KEY; 1344 else 1345 type = BTRFS_EXTENT_DATA_REF_KEY; 1346 } 1347 return type; 1348 } 1349 1350 static int find_next_key(struct btrfs_path *path, int level, 1351 struct btrfs_key *key) 1352 1353 { 1354 for (; level < BTRFS_MAX_LEVEL; level++) { 1355 if (!path->nodes[level]) 1356 break; 1357 if (path->slots[level] + 1 >= 1358 btrfs_header_nritems(path->nodes[level])) 1359 continue; 1360 if (level == 0) 1361 btrfs_item_key_to_cpu(path->nodes[level], key, 1362 path->slots[level] + 1); 1363 else 1364 btrfs_node_key_to_cpu(path->nodes[level], key, 1365 path->slots[level] + 1); 1366 return 0; 1367 } 1368 return 1; 1369 } 1370 1371 /* 1372 * look for inline back ref. if back ref is found, *ref_ret is set 1373 * to the address of inline back ref, and 0 is returned. 1374 * 1375 * if back ref isn't found, *ref_ret is set to the address where it 1376 * should be inserted, and -ENOENT is returned. 1377 * 1378 * if insert is true and there are too many inline back refs, the path 1379 * points to the extent item, and -EAGAIN is returned. 1380 * 1381 * NOTE: inline back refs are ordered in the same way that back ref 1382 * items in the tree are ordered. 1383 */ 1384 static noinline_for_stack 1385 int lookup_inline_extent_backref(struct btrfs_trans_handle *trans, 1386 struct btrfs_root *root, 1387 struct btrfs_path *path, 1388 struct btrfs_extent_inline_ref **ref_ret, 1389 u64 bytenr, u64 num_bytes, 1390 u64 parent, u64 root_objectid, 1391 u64 owner, u64 offset, int insert) 1392 { 1393 struct btrfs_key key; 1394 struct extent_buffer *leaf; 1395 struct btrfs_extent_item *ei; 1396 struct btrfs_extent_inline_ref *iref; 1397 u64 flags; 1398 u64 item_size; 1399 unsigned long ptr; 1400 unsigned long end; 1401 int extra_size; 1402 int type; 1403 int want; 1404 int ret; 1405 int err = 0; 1406 1407 key.objectid = bytenr; 1408 key.type = BTRFS_EXTENT_ITEM_KEY; 1409 key.offset = num_bytes; 1410 1411 want = extent_ref_type(parent, owner); 1412 if (insert) { 1413 extra_size = btrfs_extent_inline_ref_size(want); 1414 path->keep_locks = 1; 1415 } else 1416 extra_size = -1; 1417 ret = btrfs_search_slot(trans, root, &key, path, extra_size, 1); 1418 if (ret < 0) { 1419 err = ret; 1420 goto out; 1421 } 1422 BUG_ON(ret); 1423 1424 leaf = path->nodes[0]; 1425 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1426 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1427 if (item_size < sizeof(*ei)) { 1428 if (!insert) { 1429 err = -ENOENT; 1430 goto out; 1431 } 1432 ret = convert_extent_item_v0(trans, root, path, owner, 1433 extra_size); 1434 if (ret < 0) { 1435 err = ret; 1436 goto out; 1437 } 1438 leaf = path->nodes[0]; 1439 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1440 } 1441 #endif 1442 BUG_ON(item_size < sizeof(*ei)); 1443 1444 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1445 flags = btrfs_extent_flags(leaf, ei); 1446 1447 ptr = (unsigned long)(ei + 1); 1448 end = (unsigned long)ei + item_size; 1449 1450 if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 1451 ptr += sizeof(struct btrfs_tree_block_info); 1452 BUG_ON(ptr > end); 1453 } else { 1454 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_DATA)); 1455 } 1456 1457 err = -ENOENT; 1458 while (1) { 1459 if (ptr >= end) { 1460 WARN_ON(ptr > end); 1461 break; 1462 } 1463 iref = (struct btrfs_extent_inline_ref *)ptr; 1464 type = btrfs_extent_inline_ref_type(leaf, iref); 1465 if (want < type) 1466 break; 1467 if (want > type) { 1468 ptr += btrfs_extent_inline_ref_size(type); 1469 continue; 1470 } 1471 1472 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1473 struct btrfs_extent_data_ref *dref; 1474 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1475 if (match_extent_data_ref(leaf, dref, root_objectid, 1476 owner, offset)) { 1477 err = 0; 1478 break; 1479 } 1480 if (hash_extent_data_ref_item(leaf, dref) < 1481 hash_extent_data_ref(root_objectid, owner, offset)) 1482 break; 1483 } else { 1484 u64 ref_offset; 1485 ref_offset = btrfs_extent_inline_ref_offset(leaf, iref); 1486 if (parent > 0) { 1487 if (parent == ref_offset) { 1488 err = 0; 1489 break; 1490 } 1491 if (ref_offset < parent) 1492 break; 1493 } else { 1494 if (root_objectid == ref_offset) { 1495 err = 0; 1496 break; 1497 } 1498 if (ref_offset < root_objectid) 1499 break; 1500 } 1501 } 1502 ptr += btrfs_extent_inline_ref_size(type); 1503 } 1504 if (err == -ENOENT && insert) { 1505 if (item_size + extra_size >= 1506 BTRFS_MAX_EXTENT_ITEM_SIZE(root)) { 1507 err = -EAGAIN; 1508 goto out; 1509 } 1510 /* 1511 * To add new inline back ref, we have to make sure 1512 * there is no corresponding back ref item. 1513 * For simplicity, we just do not add new inline back 1514 * ref if there is any kind of item for this block 1515 */ 1516 if (find_next_key(path, 0, &key) == 0 && 1517 key.objectid == bytenr && 1518 key.type < BTRFS_BLOCK_GROUP_ITEM_KEY) { 1519 err = -EAGAIN; 1520 goto out; 1521 } 1522 } 1523 *ref_ret = (struct btrfs_extent_inline_ref *)ptr; 1524 out: 1525 if (insert) { 1526 path->keep_locks = 0; 1527 btrfs_unlock_up_safe(path, 1); 1528 } 1529 return err; 1530 } 1531 1532 /* 1533 * helper to add new inline back ref 1534 */ 1535 static noinline_for_stack 1536 int setup_inline_extent_backref(struct btrfs_trans_handle *trans, 1537 struct btrfs_root *root, 1538 struct btrfs_path *path, 1539 struct btrfs_extent_inline_ref *iref, 1540 u64 parent, u64 root_objectid, 1541 u64 owner, u64 offset, int refs_to_add, 1542 struct btrfs_delayed_extent_op *extent_op) 1543 { 1544 struct extent_buffer *leaf; 1545 struct btrfs_extent_item *ei; 1546 unsigned long ptr; 1547 unsigned long end; 1548 unsigned long item_offset; 1549 u64 refs; 1550 int size; 1551 int type; 1552 int ret; 1553 1554 leaf = path->nodes[0]; 1555 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1556 item_offset = (unsigned long)iref - (unsigned long)ei; 1557 1558 type = extent_ref_type(parent, owner); 1559 size = btrfs_extent_inline_ref_size(type); 1560 1561 ret = btrfs_extend_item(trans, root, path, size); 1562 1563 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1564 refs = btrfs_extent_refs(leaf, ei); 1565 refs += refs_to_add; 1566 btrfs_set_extent_refs(leaf, ei, refs); 1567 if (extent_op) 1568 __run_delayed_extent_op(extent_op, leaf, ei); 1569 1570 ptr = (unsigned long)ei + item_offset; 1571 end = (unsigned long)ei + btrfs_item_size_nr(leaf, path->slots[0]); 1572 if (ptr < end - size) 1573 memmove_extent_buffer(leaf, ptr + size, ptr, 1574 end - size - ptr); 1575 1576 iref = (struct btrfs_extent_inline_ref *)ptr; 1577 btrfs_set_extent_inline_ref_type(leaf, iref, type); 1578 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1579 struct btrfs_extent_data_ref *dref; 1580 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1581 btrfs_set_extent_data_ref_root(leaf, dref, root_objectid); 1582 btrfs_set_extent_data_ref_objectid(leaf, dref, owner); 1583 btrfs_set_extent_data_ref_offset(leaf, dref, offset); 1584 btrfs_set_extent_data_ref_count(leaf, dref, refs_to_add); 1585 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1586 struct btrfs_shared_data_ref *sref; 1587 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1588 btrfs_set_shared_data_ref_count(leaf, sref, refs_to_add); 1589 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1590 } else if (type == BTRFS_SHARED_BLOCK_REF_KEY) { 1591 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 1592 } else { 1593 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 1594 } 1595 btrfs_mark_buffer_dirty(leaf); 1596 return 0; 1597 } 1598 1599 static int lookup_extent_backref(struct btrfs_trans_handle *trans, 1600 struct btrfs_root *root, 1601 struct btrfs_path *path, 1602 struct btrfs_extent_inline_ref **ref_ret, 1603 u64 bytenr, u64 num_bytes, u64 parent, 1604 u64 root_objectid, u64 owner, u64 offset) 1605 { 1606 int ret; 1607 1608 ret = lookup_inline_extent_backref(trans, root, path, ref_ret, 1609 bytenr, num_bytes, parent, 1610 root_objectid, owner, offset, 0); 1611 if (ret != -ENOENT) 1612 return ret; 1613 1614 btrfs_release_path(path); 1615 *ref_ret = NULL; 1616 1617 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1618 ret = lookup_tree_block_ref(trans, root, path, bytenr, parent, 1619 root_objectid); 1620 } else { 1621 ret = lookup_extent_data_ref(trans, root, path, bytenr, parent, 1622 root_objectid, owner, offset); 1623 } 1624 return ret; 1625 } 1626 1627 /* 1628 * helper to update/remove inline back ref 1629 */ 1630 static noinline_for_stack 1631 int update_inline_extent_backref(struct btrfs_trans_handle *trans, 1632 struct btrfs_root *root, 1633 struct btrfs_path *path, 1634 struct btrfs_extent_inline_ref *iref, 1635 int refs_to_mod, 1636 struct btrfs_delayed_extent_op *extent_op) 1637 { 1638 struct extent_buffer *leaf; 1639 struct btrfs_extent_item *ei; 1640 struct btrfs_extent_data_ref *dref = NULL; 1641 struct btrfs_shared_data_ref *sref = NULL; 1642 unsigned long ptr; 1643 unsigned long end; 1644 u32 item_size; 1645 int size; 1646 int type; 1647 int ret; 1648 u64 refs; 1649 1650 leaf = path->nodes[0]; 1651 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1652 refs = btrfs_extent_refs(leaf, ei); 1653 WARN_ON(refs_to_mod < 0 && refs + refs_to_mod <= 0); 1654 refs += refs_to_mod; 1655 btrfs_set_extent_refs(leaf, ei, refs); 1656 if (extent_op) 1657 __run_delayed_extent_op(extent_op, leaf, ei); 1658 1659 type = btrfs_extent_inline_ref_type(leaf, iref); 1660 1661 if (type == BTRFS_EXTENT_DATA_REF_KEY) { 1662 dref = (struct btrfs_extent_data_ref *)(&iref->offset); 1663 refs = btrfs_extent_data_ref_count(leaf, dref); 1664 } else if (type == BTRFS_SHARED_DATA_REF_KEY) { 1665 sref = (struct btrfs_shared_data_ref *)(iref + 1); 1666 refs = btrfs_shared_data_ref_count(leaf, sref); 1667 } else { 1668 refs = 1; 1669 BUG_ON(refs_to_mod != -1); 1670 } 1671 1672 BUG_ON(refs_to_mod < 0 && refs < -refs_to_mod); 1673 refs += refs_to_mod; 1674 1675 if (refs > 0) { 1676 if (type == BTRFS_EXTENT_DATA_REF_KEY) 1677 btrfs_set_extent_data_ref_count(leaf, dref, refs); 1678 else 1679 btrfs_set_shared_data_ref_count(leaf, sref, refs); 1680 } else { 1681 size = btrfs_extent_inline_ref_size(type); 1682 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1683 ptr = (unsigned long)iref; 1684 end = (unsigned long)ei + item_size; 1685 if (ptr + size < end) 1686 memmove_extent_buffer(leaf, ptr, ptr + size, 1687 end - ptr - size); 1688 item_size -= size; 1689 ret = btrfs_truncate_item(trans, root, path, item_size, 1); 1690 } 1691 btrfs_mark_buffer_dirty(leaf); 1692 return 0; 1693 } 1694 1695 static noinline_for_stack 1696 int insert_inline_extent_backref(struct btrfs_trans_handle *trans, 1697 struct btrfs_root *root, 1698 struct btrfs_path *path, 1699 u64 bytenr, u64 num_bytes, u64 parent, 1700 u64 root_objectid, u64 owner, 1701 u64 offset, int refs_to_add, 1702 struct btrfs_delayed_extent_op *extent_op) 1703 { 1704 struct btrfs_extent_inline_ref *iref; 1705 int ret; 1706 1707 ret = lookup_inline_extent_backref(trans, root, path, &iref, 1708 bytenr, num_bytes, parent, 1709 root_objectid, owner, offset, 1); 1710 if (ret == 0) { 1711 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID); 1712 ret = update_inline_extent_backref(trans, root, path, iref, 1713 refs_to_add, extent_op); 1714 } else if (ret == -ENOENT) { 1715 ret = setup_inline_extent_backref(trans, root, path, iref, 1716 parent, root_objectid, 1717 owner, offset, refs_to_add, 1718 extent_op); 1719 } 1720 return ret; 1721 } 1722 1723 static int insert_extent_backref(struct btrfs_trans_handle *trans, 1724 struct btrfs_root *root, 1725 struct btrfs_path *path, 1726 u64 bytenr, u64 parent, u64 root_objectid, 1727 u64 owner, u64 offset, int refs_to_add) 1728 { 1729 int ret; 1730 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1731 BUG_ON(refs_to_add != 1); 1732 ret = insert_tree_block_ref(trans, root, path, bytenr, 1733 parent, root_objectid); 1734 } else { 1735 ret = insert_extent_data_ref(trans, root, path, bytenr, 1736 parent, root_objectid, 1737 owner, offset, refs_to_add); 1738 } 1739 return ret; 1740 } 1741 1742 static int remove_extent_backref(struct btrfs_trans_handle *trans, 1743 struct btrfs_root *root, 1744 struct btrfs_path *path, 1745 struct btrfs_extent_inline_ref *iref, 1746 int refs_to_drop, int is_data) 1747 { 1748 int ret; 1749 1750 BUG_ON(!is_data && refs_to_drop != 1); 1751 if (iref) { 1752 ret = update_inline_extent_backref(trans, root, path, iref, 1753 -refs_to_drop, NULL); 1754 } else if (is_data) { 1755 ret = remove_extent_data_ref(trans, root, path, refs_to_drop); 1756 } else { 1757 ret = btrfs_del_item(trans, root, path); 1758 } 1759 return ret; 1760 } 1761 1762 static int btrfs_issue_discard(struct block_device *bdev, 1763 u64 start, u64 len) 1764 { 1765 return blkdev_issue_discard(bdev, start >> 9, len >> 9, GFP_NOFS, 0); 1766 } 1767 1768 static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, 1769 u64 num_bytes, u64 *actual_bytes) 1770 { 1771 int ret; 1772 u64 discarded_bytes = 0; 1773 struct btrfs_multi_bio *multi = NULL; 1774 1775 1776 /* Tell the block device(s) that the sectors can be discarded */ 1777 ret = btrfs_map_block(&root->fs_info->mapping_tree, REQ_DISCARD, 1778 bytenr, &num_bytes, &multi, 0); 1779 if (!ret) { 1780 struct btrfs_bio_stripe *stripe = multi->stripes; 1781 int i; 1782 1783 1784 for (i = 0; i < multi->num_stripes; i++, stripe++) { 1785 ret = btrfs_issue_discard(stripe->dev->bdev, 1786 stripe->physical, 1787 stripe->length); 1788 if (!ret) 1789 discarded_bytes += stripe->length; 1790 else if (ret != -EOPNOTSUPP) 1791 break; 1792 } 1793 kfree(multi); 1794 } 1795 if (discarded_bytes && ret == -EOPNOTSUPP) 1796 ret = 0; 1797 1798 if (actual_bytes) 1799 *actual_bytes = discarded_bytes; 1800 1801 1802 return ret; 1803 } 1804 1805 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1806 struct btrfs_root *root, 1807 u64 bytenr, u64 num_bytes, u64 parent, 1808 u64 root_objectid, u64 owner, u64 offset) 1809 { 1810 int ret; 1811 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 1812 root_objectid == BTRFS_TREE_LOG_OBJECTID); 1813 1814 if (owner < BTRFS_FIRST_FREE_OBJECTID) { 1815 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 1816 parent, root_objectid, (int)owner, 1817 BTRFS_ADD_DELAYED_REF, NULL); 1818 } else { 1819 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 1820 parent, root_objectid, owner, offset, 1821 BTRFS_ADD_DELAYED_REF, NULL); 1822 } 1823 return ret; 1824 } 1825 1826 static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, 1827 struct btrfs_root *root, 1828 u64 bytenr, u64 num_bytes, 1829 u64 parent, u64 root_objectid, 1830 u64 owner, u64 offset, int refs_to_add, 1831 struct btrfs_delayed_extent_op *extent_op) 1832 { 1833 struct btrfs_path *path; 1834 struct extent_buffer *leaf; 1835 struct btrfs_extent_item *item; 1836 u64 refs; 1837 int ret; 1838 int err = 0; 1839 1840 path = btrfs_alloc_path(); 1841 if (!path) 1842 return -ENOMEM; 1843 1844 path->reada = 1; 1845 path->leave_spinning = 1; 1846 /* this will setup the path even if it fails to insert the back ref */ 1847 ret = insert_inline_extent_backref(trans, root->fs_info->extent_root, 1848 path, bytenr, num_bytes, parent, 1849 root_objectid, owner, offset, 1850 refs_to_add, extent_op); 1851 if (ret == 0) 1852 goto out; 1853 1854 if (ret != -EAGAIN) { 1855 err = ret; 1856 goto out; 1857 } 1858 1859 leaf = path->nodes[0]; 1860 item = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 1861 refs = btrfs_extent_refs(leaf, item); 1862 btrfs_set_extent_refs(leaf, item, refs + refs_to_add); 1863 if (extent_op) 1864 __run_delayed_extent_op(extent_op, leaf, item); 1865 1866 btrfs_mark_buffer_dirty(leaf); 1867 btrfs_release_path(path); 1868 1869 path->reada = 1; 1870 path->leave_spinning = 1; 1871 1872 /* now insert the actual backref */ 1873 ret = insert_extent_backref(trans, root->fs_info->extent_root, 1874 path, bytenr, parent, root_objectid, 1875 owner, offset, refs_to_add); 1876 BUG_ON(ret); 1877 out: 1878 btrfs_free_path(path); 1879 return err; 1880 } 1881 1882 static int run_delayed_data_ref(struct btrfs_trans_handle *trans, 1883 struct btrfs_root *root, 1884 struct btrfs_delayed_ref_node *node, 1885 struct btrfs_delayed_extent_op *extent_op, 1886 int insert_reserved) 1887 { 1888 int ret = 0; 1889 struct btrfs_delayed_data_ref *ref; 1890 struct btrfs_key ins; 1891 u64 parent = 0; 1892 u64 ref_root = 0; 1893 u64 flags = 0; 1894 1895 ins.objectid = node->bytenr; 1896 ins.offset = node->num_bytes; 1897 ins.type = BTRFS_EXTENT_ITEM_KEY; 1898 1899 ref = btrfs_delayed_node_to_data_ref(node); 1900 if (node->type == BTRFS_SHARED_DATA_REF_KEY) 1901 parent = ref->parent; 1902 else 1903 ref_root = ref->root; 1904 1905 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 1906 if (extent_op) { 1907 BUG_ON(extent_op->update_key); 1908 flags |= extent_op->flags_to_set; 1909 } 1910 ret = alloc_reserved_file_extent(trans, root, 1911 parent, ref_root, flags, 1912 ref->objectid, ref->offset, 1913 &ins, node->ref_mod); 1914 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 1915 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 1916 node->num_bytes, parent, 1917 ref_root, ref->objectid, 1918 ref->offset, node->ref_mod, 1919 extent_op); 1920 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 1921 ret = __btrfs_free_extent(trans, root, node->bytenr, 1922 node->num_bytes, parent, 1923 ref_root, ref->objectid, 1924 ref->offset, node->ref_mod, 1925 extent_op); 1926 } else { 1927 BUG(); 1928 } 1929 return ret; 1930 } 1931 1932 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op, 1933 struct extent_buffer *leaf, 1934 struct btrfs_extent_item *ei) 1935 { 1936 u64 flags = btrfs_extent_flags(leaf, ei); 1937 if (extent_op->update_flags) { 1938 flags |= extent_op->flags_to_set; 1939 btrfs_set_extent_flags(leaf, ei, flags); 1940 } 1941 1942 if (extent_op->update_key) { 1943 struct btrfs_tree_block_info *bi; 1944 BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); 1945 bi = (struct btrfs_tree_block_info *)(ei + 1); 1946 btrfs_set_tree_block_key(leaf, bi, &extent_op->key); 1947 } 1948 } 1949 1950 static int run_delayed_extent_op(struct btrfs_trans_handle *trans, 1951 struct btrfs_root *root, 1952 struct btrfs_delayed_ref_node *node, 1953 struct btrfs_delayed_extent_op *extent_op) 1954 { 1955 struct btrfs_key key; 1956 struct btrfs_path *path; 1957 struct btrfs_extent_item *ei; 1958 struct extent_buffer *leaf; 1959 u32 item_size; 1960 int ret; 1961 int err = 0; 1962 1963 path = btrfs_alloc_path(); 1964 if (!path) 1965 return -ENOMEM; 1966 1967 key.objectid = node->bytenr; 1968 key.type = BTRFS_EXTENT_ITEM_KEY; 1969 key.offset = node->num_bytes; 1970 1971 path->reada = 1; 1972 path->leave_spinning = 1; 1973 ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, 1974 path, 0, 1); 1975 if (ret < 0) { 1976 err = ret; 1977 goto out; 1978 } 1979 if (ret > 0) { 1980 err = -EIO; 1981 goto out; 1982 } 1983 1984 leaf = path->nodes[0]; 1985 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1986 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 1987 if (item_size < sizeof(*ei)) { 1988 ret = convert_extent_item_v0(trans, root->fs_info->extent_root, 1989 path, (u64)-1, 0); 1990 if (ret < 0) { 1991 err = ret; 1992 goto out; 1993 } 1994 leaf = path->nodes[0]; 1995 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 1996 } 1997 #endif 1998 BUG_ON(item_size < sizeof(*ei)); 1999 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2000 __run_delayed_extent_op(extent_op, leaf, ei); 2001 2002 btrfs_mark_buffer_dirty(leaf); 2003 out: 2004 btrfs_free_path(path); 2005 return err; 2006 } 2007 2008 static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, 2009 struct btrfs_root *root, 2010 struct btrfs_delayed_ref_node *node, 2011 struct btrfs_delayed_extent_op *extent_op, 2012 int insert_reserved) 2013 { 2014 int ret = 0; 2015 struct btrfs_delayed_tree_ref *ref; 2016 struct btrfs_key ins; 2017 u64 parent = 0; 2018 u64 ref_root = 0; 2019 2020 ins.objectid = node->bytenr; 2021 ins.offset = node->num_bytes; 2022 ins.type = BTRFS_EXTENT_ITEM_KEY; 2023 2024 ref = btrfs_delayed_node_to_tree_ref(node); 2025 if (node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2026 parent = ref->parent; 2027 else 2028 ref_root = ref->root; 2029 2030 BUG_ON(node->ref_mod != 1); 2031 if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) { 2032 BUG_ON(!extent_op || !extent_op->update_flags || 2033 !extent_op->update_key); 2034 ret = alloc_reserved_tree_block(trans, root, 2035 parent, ref_root, 2036 extent_op->flags_to_set, 2037 &extent_op->key, 2038 ref->level, &ins); 2039 } else if (node->action == BTRFS_ADD_DELAYED_REF) { 2040 ret = __btrfs_inc_extent_ref(trans, root, node->bytenr, 2041 node->num_bytes, parent, ref_root, 2042 ref->level, 0, 1, extent_op); 2043 } else if (node->action == BTRFS_DROP_DELAYED_REF) { 2044 ret = __btrfs_free_extent(trans, root, node->bytenr, 2045 node->num_bytes, parent, ref_root, 2046 ref->level, 0, 1, extent_op); 2047 } else { 2048 BUG(); 2049 } 2050 return ret; 2051 } 2052 2053 /* helper function to actually process a single delayed ref entry */ 2054 static int run_one_delayed_ref(struct btrfs_trans_handle *trans, 2055 struct btrfs_root *root, 2056 struct btrfs_delayed_ref_node *node, 2057 struct btrfs_delayed_extent_op *extent_op, 2058 int insert_reserved) 2059 { 2060 int ret; 2061 if (btrfs_delayed_ref_is_head(node)) { 2062 struct btrfs_delayed_ref_head *head; 2063 /* 2064 * we've hit the end of the chain and we were supposed 2065 * to insert this extent into the tree. But, it got 2066 * deleted before we ever needed to insert it, so all 2067 * we have to do is clean up the accounting 2068 */ 2069 BUG_ON(extent_op); 2070 head = btrfs_delayed_node_to_head(node); 2071 if (insert_reserved) { 2072 btrfs_pin_extent(root, node->bytenr, 2073 node->num_bytes, 1); 2074 if (head->is_data) { 2075 ret = btrfs_del_csums(trans, root, 2076 node->bytenr, 2077 node->num_bytes); 2078 BUG_ON(ret); 2079 } 2080 } 2081 mutex_unlock(&head->mutex); 2082 return 0; 2083 } 2084 2085 if (node->type == BTRFS_TREE_BLOCK_REF_KEY || 2086 node->type == BTRFS_SHARED_BLOCK_REF_KEY) 2087 ret = run_delayed_tree_ref(trans, root, node, extent_op, 2088 insert_reserved); 2089 else if (node->type == BTRFS_EXTENT_DATA_REF_KEY || 2090 node->type == BTRFS_SHARED_DATA_REF_KEY) 2091 ret = run_delayed_data_ref(trans, root, node, extent_op, 2092 insert_reserved); 2093 else 2094 BUG(); 2095 return ret; 2096 } 2097 2098 static noinline struct btrfs_delayed_ref_node * 2099 select_delayed_ref(struct btrfs_delayed_ref_head *head) 2100 { 2101 struct rb_node *node; 2102 struct btrfs_delayed_ref_node *ref; 2103 int action = BTRFS_ADD_DELAYED_REF; 2104 again: 2105 /* 2106 * select delayed ref of type BTRFS_ADD_DELAYED_REF first. 2107 * this prevents ref count from going down to zero when 2108 * there still are pending delayed ref. 2109 */ 2110 node = rb_prev(&head->node.rb_node); 2111 while (1) { 2112 if (!node) 2113 break; 2114 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2115 rb_node); 2116 if (ref->bytenr != head->node.bytenr) 2117 break; 2118 if (ref->action == action) 2119 return ref; 2120 node = rb_prev(node); 2121 } 2122 if (action == BTRFS_ADD_DELAYED_REF) { 2123 action = BTRFS_DROP_DELAYED_REF; 2124 goto again; 2125 } 2126 return NULL; 2127 } 2128 2129 static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, 2130 struct btrfs_root *root, 2131 struct list_head *cluster) 2132 { 2133 struct btrfs_delayed_ref_root *delayed_refs; 2134 struct btrfs_delayed_ref_node *ref; 2135 struct btrfs_delayed_ref_head *locked_ref = NULL; 2136 struct btrfs_delayed_extent_op *extent_op; 2137 int ret; 2138 int count = 0; 2139 int must_insert_reserved = 0; 2140 2141 delayed_refs = &trans->transaction->delayed_refs; 2142 while (1) { 2143 if (!locked_ref) { 2144 /* pick a new head ref from the cluster list */ 2145 if (list_empty(cluster)) 2146 break; 2147 2148 locked_ref = list_entry(cluster->next, 2149 struct btrfs_delayed_ref_head, cluster); 2150 2151 /* grab the lock that says we are going to process 2152 * all the refs for this head */ 2153 ret = btrfs_delayed_ref_lock(trans, locked_ref); 2154 2155 /* 2156 * we may have dropped the spin lock to get the head 2157 * mutex lock, and that might have given someone else 2158 * time to free the head. If that's true, it has been 2159 * removed from our list and we can move on. 2160 */ 2161 if (ret == -EAGAIN) { 2162 locked_ref = NULL; 2163 count++; 2164 continue; 2165 } 2166 } 2167 2168 /* 2169 * record the must insert reserved flag before we 2170 * drop the spin lock. 2171 */ 2172 must_insert_reserved = locked_ref->must_insert_reserved; 2173 locked_ref->must_insert_reserved = 0; 2174 2175 extent_op = locked_ref->extent_op; 2176 locked_ref->extent_op = NULL; 2177 2178 /* 2179 * locked_ref is the head node, so we have to go one 2180 * node back for any delayed ref updates 2181 */ 2182 ref = select_delayed_ref(locked_ref); 2183 if (!ref) { 2184 /* All delayed refs have been processed, Go ahead 2185 * and send the head node to run_one_delayed_ref, 2186 * so that any accounting fixes can happen 2187 */ 2188 ref = &locked_ref->node; 2189 2190 if (extent_op && must_insert_reserved) { 2191 kfree(extent_op); 2192 extent_op = NULL; 2193 } 2194 2195 if (extent_op) { 2196 spin_unlock(&delayed_refs->lock); 2197 2198 ret = run_delayed_extent_op(trans, root, 2199 ref, extent_op); 2200 BUG_ON(ret); 2201 kfree(extent_op); 2202 2203 cond_resched(); 2204 spin_lock(&delayed_refs->lock); 2205 continue; 2206 } 2207 2208 list_del_init(&locked_ref->cluster); 2209 locked_ref = NULL; 2210 } 2211 2212 ref->in_tree = 0; 2213 rb_erase(&ref->rb_node, &delayed_refs->root); 2214 delayed_refs->num_entries--; 2215 2216 spin_unlock(&delayed_refs->lock); 2217 2218 ret = run_one_delayed_ref(trans, root, ref, extent_op, 2219 must_insert_reserved); 2220 BUG_ON(ret); 2221 2222 btrfs_put_delayed_ref(ref); 2223 kfree(extent_op); 2224 count++; 2225 2226 cond_resched(); 2227 spin_lock(&delayed_refs->lock); 2228 } 2229 return count; 2230 } 2231 2232 /* 2233 * this starts processing the delayed reference count updates and 2234 * extent insertions we have queued up so far. count can be 2235 * 0, which means to process everything in the tree at the start 2236 * of the run (but not newly added entries), or it can be some target 2237 * number you'd like to process. 2238 */ 2239 int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, 2240 struct btrfs_root *root, unsigned long count) 2241 { 2242 struct rb_node *node; 2243 struct btrfs_delayed_ref_root *delayed_refs; 2244 struct btrfs_delayed_ref_node *ref; 2245 struct list_head cluster; 2246 int ret; 2247 int run_all = count == (unsigned long)-1; 2248 int run_most = 0; 2249 2250 if (root == root->fs_info->extent_root) 2251 root = root->fs_info->tree_root; 2252 2253 delayed_refs = &trans->transaction->delayed_refs; 2254 INIT_LIST_HEAD(&cluster); 2255 again: 2256 spin_lock(&delayed_refs->lock); 2257 if (count == 0) { 2258 count = delayed_refs->num_entries * 2; 2259 run_most = 1; 2260 } 2261 while (1) { 2262 if (!(run_all || run_most) && 2263 delayed_refs->num_heads_ready < 64) 2264 break; 2265 2266 /* 2267 * go find something we can process in the rbtree. We start at 2268 * the beginning of the tree, and then build a cluster 2269 * of refs to process starting at the first one we are able to 2270 * lock 2271 */ 2272 ret = btrfs_find_ref_cluster(trans, &cluster, 2273 delayed_refs->run_delayed_start); 2274 if (ret) 2275 break; 2276 2277 ret = run_clustered_refs(trans, root, &cluster); 2278 BUG_ON(ret < 0); 2279 2280 count -= min_t(unsigned long, ret, count); 2281 2282 if (count == 0) 2283 break; 2284 } 2285 2286 if (run_all) { 2287 node = rb_first(&delayed_refs->root); 2288 if (!node) 2289 goto out; 2290 count = (unsigned long)-1; 2291 2292 while (node) { 2293 ref = rb_entry(node, struct btrfs_delayed_ref_node, 2294 rb_node); 2295 if (btrfs_delayed_ref_is_head(ref)) { 2296 struct btrfs_delayed_ref_head *head; 2297 2298 head = btrfs_delayed_node_to_head(ref); 2299 atomic_inc(&ref->refs); 2300 2301 spin_unlock(&delayed_refs->lock); 2302 /* 2303 * Mutex was contended, block until it's 2304 * released and try again 2305 */ 2306 mutex_lock(&head->mutex); 2307 mutex_unlock(&head->mutex); 2308 2309 btrfs_put_delayed_ref(ref); 2310 cond_resched(); 2311 goto again; 2312 } 2313 node = rb_next(node); 2314 } 2315 spin_unlock(&delayed_refs->lock); 2316 schedule_timeout(1); 2317 goto again; 2318 } 2319 out: 2320 spin_unlock(&delayed_refs->lock); 2321 return 0; 2322 } 2323 2324 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans, 2325 struct btrfs_root *root, 2326 u64 bytenr, u64 num_bytes, u64 flags, 2327 int is_data) 2328 { 2329 struct btrfs_delayed_extent_op *extent_op; 2330 int ret; 2331 2332 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 2333 if (!extent_op) 2334 return -ENOMEM; 2335 2336 extent_op->flags_to_set = flags; 2337 extent_op->update_flags = 1; 2338 extent_op->update_key = 0; 2339 extent_op->is_data = is_data ? 1 : 0; 2340 2341 ret = btrfs_add_delayed_extent_op(trans, bytenr, num_bytes, extent_op); 2342 if (ret) 2343 kfree(extent_op); 2344 return ret; 2345 } 2346 2347 static noinline int check_delayed_ref(struct btrfs_trans_handle *trans, 2348 struct btrfs_root *root, 2349 struct btrfs_path *path, 2350 u64 objectid, u64 offset, u64 bytenr) 2351 { 2352 struct btrfs_delayed_ref_head *head; 2353 struct btrfs_delayed_ref_node *ref; 2354 struct btrfs_delayed_data_ref *data_ref; 2355 struct btrfs_delayed_ref_root *delayed_refs; 2356 struct rb_node *node; 2357 int ret = 0; 2358 2359 ret = -ENOENT; 2360 delayed_refs = &trans->transaction->delayed_refs; 2361 spin_lock(&delayed_refs->lock); 2362 head = btrfs_find_delayed_ref_head(trans, bytenr); 2363 if (!head) 2364 goto out; 2365 2366 if (!mutex_trylock(&head->mutex)) { 2367 atomic_inc(&head->node.refs); 2368 spin_unlock(&delayed_refs->lock); 2369 2370 btrfs_release_path(path); 2371 2372 /* 2373 * Mutex was contended, block until it's released and let 2374 * caller try again 2375 */ 2376 mutex_lock(&head->mutex); 2377 mutex_unlock(&head->mutex); 2378 btrfs_put_delayed_ref(&head->node); 2379 return -EAGAIN; 2380 } 2381 2382 node = rb_prev(&head->node.rb_node); 2383 if (!node) 2384 goto out_unlock; 2385 2386 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2387 2388 if (ref->bytenr != bytenr) 2389 goto out_unlock; 2390 2391 ret = 1; 2392 if (ref->type != BTRFS_EXTENT_DATA_REF_KEY) 2393 goto out_unlock; 2394 2395 data_ref = btrfs_delayed_node_to_data_ref(ref); 2396 2397 node = rb_prev(node); 2398 if (node) { 2399 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 2400 if (ref->bytenr == bytenr) 2401 goto out_unlock; 2402 } 2403 2404 if (data_ref->root != root->root_key.objectid || 2405 data_ref->objectid != objectid || data_ref->offset != offset) 2406 goto out_unlock; 2407 2408 ret = 0; 2409 out_unlock: 2410 mutex_unlock(&head->mutex); 2411 out: 2412 spin_unlock(&delayed_refs->lock); 2413 return ret; 2414 } 2415 2416 static noinline int check_committed_ref(struct btrfs_trans_handle *trans, 2417 struct btrfs_root *root, 2418 struct btrfs_path *path, 2419 u64 objectid, u64 offset, u64 bytenr) 2420 { 2421 struct btrfs_root *extent_root = root->fs_info->extent_root; 2422 struct extent_buffer *leaf; 2423 struct btrfs_extent_data_ref *ref; 2424 struct btrfs_extent_inline_ref *iref; 2425 struct btrfs_extent_item *ei; 2426 struct btrfs_key key; 2427 u32 item_size; 2428 int ret; 2429 2430 key.objectid = bytenr; 2431 key.offset = (u64)-1; 2432 key.type = BTRFS_EXTENT_ITEM_KEY; 2433 2434 ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0); 2435 if (ret < 0) 2436 goto out; 2437 BUG_ON(ret == 0); 2438 2439 ret = -ENOENT; 2440 if (path->slots[0] == 0) 2441 goto out; 2442 2443 path->slots[0]--; 2444 leaf = path->nodes[0]; 2445 btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); 2446 2447 if (key.objectid != bytenr || key.type != BTRFS_EXTENT_ITEM_KEY) 2448 goto out; 2449 2450 ret = 1; 2451 item_size = btrfs_item_size_nr(leaf, path->slots[0]); 2452 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 2453 if (item_size < sizeof(*ei)) { 2454 WARN_ON(item_size != sizeof(struct btrfs_extent_item_v0)); 2455 goto out; 2456 } 2457 #endif 2458 ei = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_item); 2459 2460 if (item_size != sizeof(*ei) + 2461 btrfs_extent_inline_ref_size(BTRFS_EXTENT_DATA_REF_KEY)) 2462 goto out; 2463 2464 if (btrfs_extent_generation(leaf, ei) <= 2465 btrfs_root_last_snapshot(&root->root_item)) 2466 goto out; 2467 2468 iref = (struct btrfs_extent_inline_ref *)(ei + 1); 2469 if (btrfs_extent_inline_ref_type(leaf, iref) != 2470 BTRFS_EXTENT_DATA_REF_KEY) 2471 goto out; 2472 2473 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 2474 if (btrfs_extent_refs(leaf, ei) != 2475 btrfs_extent_data_ref_count(leaf, ref) || 2476 btrfs_extent_data_ref_root(leaf, ref) != 2477 root->root_key.objectid || 2478 btrfs_extent_data_ref_objectid(leaf, ref) != objectid || 2479 btrfs_extent_data_ref_offset(leaf, ref) != offset) 2480 goto out; 2481 2482 ret = 0; 2483 out: 2484 return ret; 2485 } 2486 2487 int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2488 struct btrfs_root *root, 2489 u64 objectid, u64 offset, u64 bytenr) 2490 { 2491 struct btrfs_path *path; 2492 int ret; 2493 int ret2; 2494 2495 path = btrfs_alloc_path(); 2496 if (!path) 2497 return -ENOENT; 2498 2499 do { 2500 ret = check_committed_ref(trans, root, path, objectid, 2501 offset, bytenr); 2502 if (ret && ret != -ENOENT) 2503 goto out; 2504 2505 ret2 = check_delayed_ref(trans, root, path, objectid, 2506 offset, bytenr); 2507 } while (ret2 == -EAGAIN); 2508 2509 if (ret2 && ret2 != -ENOENT) { 2510 ret = ret2; 2511 goto out; 2512 } 2513 2514 if (ret != -ENOENT || ret2 != -ENOENT) 2515 ret = 0; 2516 out: 2517 btrfs_free_path(path); 2518 if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) 2519 WARN_ON(ret > 0); 2520 return ret; 2521 } 2522 2523 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans, 2524 struct btrfs_root *root, 2525 struct extent_buffer *buf, 2526 int full_backref, int inc) 2527 { 2528 u64 bytenr; 2529 u64 num_bytes; 2530 u64 parent; 2531 u64 ref_root; 2532 u32 nritems; 2533 struct btrfs_key key; 2534 struct btrfs_file_extent_item *fi; 2535 int i; 2536 int level; 2537 int ret = 0; 2538 int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, 2539 u64, u64, u64, u64, u64, u64); 2540 2541 ref_root = btrfs_header_owner(buf); 2542 nritems = btrfs_header_nritems(buf); 2543 level = btrfs_header_level(buf); 2544 2545 if (!root->ref_cows && level == 0) 2546 return 0; 2547 2548 if (inc) 2549 process_func = btrfs_inc_extent_ref; 2550 else 2551 process_func = btrfs_free_extent; 2552 2553 if (full_backref) 2554 parent = buf->start; 2555 else 2556 parent = 0; 2557 2558 for (i = 0; i < nritems; i++) { 2559 if (level == 0) { 2560 btrfs_item_key_to_cpu(buf, &key, i); 2561 if (btrfs_key_type(&key) != BTRFS_EXTENT_DATA_KEY) 2562 continue; 2563 fi = btrfs_item_ptr(buf, i, 2564 struct btrfs_file_extent_item); 2565 if (btrfs_file_extent_type(buf, fi) == 2566 BTRFS_FILE_EXTENT_INLINE) 2567 continue; 2568 bytenr = btrfs_file_extent_disk_bytenr(buf, fi); 2569 if (bytenr == 0) 2570 continue; 2571 2572 num_bytes = btrfs_file_extent_disk_num_bytes(buf, fi); 2573 key.offset -= btrfs_file_extent_offset(buf, fi); 2574 ret = process_func(trans, root, bytenr, num_bytes, 2575 parent, ref_root, key.objectid, 2576 key.offset); 2577 if (ret) 2578 goto fail; 2579 } else { 2580 bytenr = btrfs_node_blockptr(buf, i); 2581 num_bytes = btrfs_level_size(root, level - 1); 2582 ret = process_func(trans, root, bytenr, num_bytes, 2583 parent, ref_root, level - 1, 0); 2584 if (ret) 2585 goto fail; 2586 } 2587 } 2588 return 0; 2589 fail: 2590 BUG(); 2591 return ret; 2592 } 2593 2594 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2595 struct extent_buffer *buf, int full_backref) 2596 { 2597 return __btrfs_mod_ref(trans, root, buf, full_backref, 1); 2598 } 2599 2600 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root, 2601 struct extent_buffer *buf, int full_backref) 2602 { 2603 return __btrfs_mod_ref(trans, root, buf, full_backref, 0); 2604 } 2605 2606 static int write_one_cache_group(struct btrfs_trans_handle *trans, 2607 struct btrfs_root *root, 2608 struct btrfs_path *path, 2609 struct btrfs_block_group_cache *cache) 2610 { 2611 int ret; 2612 struct btrfs_root *extent_root = root->fs_info->extent_root; 2613 unsigned long bi; 2614 struct extent_buffer *leaf; 2615 2616 ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1); 2617 if (ret < 0) 2618 goto fail; 2619 BUG_ON(ret); 2620 2621 leaf = path->nodes[0]; 2622 bi = btrfs_item_ptr_offset(leaf, path->slots[0]); 2623 write_extent_buffer(leaf, &cache->item, bi, sizeof(cache->item)); 2624 btrfs_mark_buffer_dirty(leaf); 2625 btrfs_release_path(path); 2626 fail: 2627 if (ret) 2628 return ret; 2629 return 0; 2630 2631 } 2632 2633 static struct btrfs_block_group_cache * 2634 next_block_group(struct btrfs_root *root, 2635 struct btrfs_block_group_cache *cache) 2636 { 2637 struct rb_node *node; 2638 spin_lock(&root->fs_info->block_group_cache_lock); 2639 node = rb_next(&cache->cache_node); 2640 btrfs_put_block_group(cache); 2641 if (node) { 2642 cache = rb_entry(node, struct btrfs_block_group_cache, 2643 cache_node); 2644 btrfs_get_block_group(cache); 2645 } else 2646 cache = NULL; 2647 spin_unlock(&root->fs_info->block_group_cache_lock); 2648 return cache; 2649 } 2650 2651 static int cache_save_setup(struct btrfs_block_group_cache *block_group, 2652 struct btrfs_trans_handle *trans, 2653 struct btrfs_path *path) 2654 { 2655 struct btrfs_root *root = block_group->fs_info->tree_root; 2656 struct inode *inode = NULL; 2657 u64 alloc_hint = 0; 2658 int dcs = BTRFS_DC_ERROR; 2659 int num_pages = 0; 2660 int retries = 0; 2661 int ret = 0; 2662 2663 /* 2664 * If this block group is smaller than 100 megs don't bother caching the 2665 * block group. 2666 */ 2667 if (block_group->key.offset < (100 * 1024 * 1024)) { 2668 spin_lock(&block_group->lock); 2669 block_group->disk_cache_state = BTRFS_DC_WRITTEN; 2670 spin_unlock(&block_group->lock); 2671 return 0; 2672 } 2673 2674 again: 2675 inode = lookup_free_space_inode(root, block_group, path); 2676 if (IS_ERR(inode) && PTR_ERR(inode) != -ENOENT) { 2677 ret = PTR_ERR(inode); 2678 btrfs_release_path(path); 2679 goto out; 2680 } 2681 2682 if (IS_ERR(inode)) { 2683 BUG_ON(retries); 2684 retries++; 2685 2686 if (block_group->ro) 2687 goto out_free; 2688 2689 ret = create_free_space_inode(root, trans, block_group, path); 2690 if (ret) 2691 goto out_free; 2692 goto again; 2693 } 2694 2695 /* 2696 * We want to set the generation to 0, that way if anything goes wrong 2697 * from here on out we know not to trust this cache when we load up next 2698 * time. 2699 */ 2700 BTRFS_I(inode)->generation = 0; 2701 ret = btrfs_update_inode(trans, root, inode); 2702 WARN_ON(ret); 2703 2704 if (i_size_read(inode) > 0) { 2705 ret = btrfs_truncate_free_space_cache(root, trans, path, 2706 inode); 2707 if (ret) 2708 goto out_put; 2709 } 2710 2711 spin_lock(&block_group->lock); 2712 if (block_group->cached != BTRFS_CACHE_FINISHED) { 2713 /* We're not cached, don't bother trying to write stuff out */ 2714 dcs = BTRFS_DC_WRITTEN; 2715 spin_unlock(&block_group->lock); 2716 goto out_put; 2717 } 2718 spin_unlock(&block_group->lock); 2719 2720 num_pages = (int)div64_u64(block_group->key.offset, 1024 * 1024 * 1024); 2721 if (!num_pages) 2722 num_pages = 1; 2723 2724 /* 2725 * Just to make absolutely sure we have enough space, we're going to 2726 * preallocate 12 pages worth of space for each block group. In 2727 * practice we ought to use at most 8, but we need extra space so we can 2728 * add our header and have a terminator between the extents and the 2729 * bitmaps. 2730 */ 2731 num_pages *= 16; 2732 num_pages *= PAGE_CACHE_SIZE; 2733 2734 ret = btrfs_check_data_free_space(inode, num_pages); 2735 if (ret) 2736 goto out_put; 2737 2738 ret = btrfs_prealloc_file_range_trans(inode, trans, 0, 0, num_pages, 2739 num_pages, num_pages, 2740 &alloc_hint); 2741 if (!ret) 2742 dcs = BTRFS_DC_SETUP; 2743 btrfs_free_reserved_data_space(inode, num_pages); 2744 out_put: 2745 iput(inode); 2746 out_free: 2747 btrfs_release_path(path); 2748 out: 2749 spin_lock(&block_group->lock); 2750 block_group->disk_cache_state = dcs; 2751 spin_unlock(&block_group->lock); 2752 2753 return ret; 2754 } 2755 2756 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans, 2757 struct btrfs_root *root) 2758 { 2759 struct btrfs_block_group_cache *cache; 2760 int err = 0; 2761 struct btrfs_path *path; 2762 u64 last = 0; 2763 2764 path = btrfs_alloc_path(); 2765 if (!path) 2766 return -ENOMEM; 2767 2768 again: 2769 while (1) { 2770 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2771 while (cache) { 2772 if (cache->disk_cache_state == BTRFS_DC_CLEAR) 2773 break; 2774 cache = next_block_group(root, cache); 2775 } 2776 if (!cache) { 2777 if (last == 0) 2778 break; 2779 last = 0; 2780 continue; 2781 } 2782 err = cache_save_setup(cache, trans, path); 2783 last = cache->key.objectid + cache->key.offset; 2784 btrfs_put_block_group(cache); 2785 } 2786 2787 while (1) { 2788 if (last == 0) { 2789 err = btrfs_run_delayed_refs(trans, root, 2790 (unsigned long)-1); 2791 BUG_ON(err); 2792 } 2793 2794 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2795 while (cache) { 2796 if (cache->disk_cache_state == BTRFS_DC_CLEAR) { 2797 btrfs_put_block_group(cache); 2798 goto again; 2799 } 2800 2801 if (cache->dirty) 2802 break; 2803 cache = next_block_group(root, cache); 2804 } 2805 if (!cache) { 2806 if (last == 0) 2807 break; 2808 last = 0; 2809 continue; 2810 } 2811 2812 if (cache->disk_cache_state == BTRFS_DC_SETUP) 2813 cache->disk_cache_state = BTRFS_DC_NEED_WRITE; 2814 cache->dirty = 0; 2815 last = cache->key.objectid + cache->key.offset; 2816 2817 err = write_one_cache_group(trans, root, path, cache); 2818 BUG_ON(err); 2819 btrfs_put_block_group(cache); 2820 } 2821 2822 while (1) { 2823 /* 2824 * I don't think this is needed since we're just marking our 2825 * preallocated extent as written, but just in case it can't 2826 * hurt. 2827 */ 2828 if (last == 0) { 2829 err = btrfs_run_delayed_refs(trans, root, 2830 (unsigned long)-1); 2831 BUG_ON(err); 2832 } 2833 2834 cache = btrfs_lookup_first_block_group(root->fs_info, last); 2835 while (cache) { 2836 /* 2837 * Really this shouldn't happen, but it could if we 2838 * couldn't write the entire preallocated extent and 2839 * splitting the extent resulted in a new block. 2840 */ 2841 if (cache->dirty) { 2842 btrfs_put_block_group(cache); 2843 goto again; 2844 } 2845 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 2846 break; 2847 cache = next_block_group(root, cache); 2848 } 2849 if (!cache) { 2850 if (last == 0) 2851 break; 2852 last = 0; 2853 continue; 2854 } 2855 2856 btrfs_write_out_cache(root, trans, cache, path); 2857 2858 /* 2859 * If we didn't have an error then the cache state is still 2860 * NEED_WRITE, so we can set it to WRITTEN. 2861 */ 2862 if (cache->disk_cache_state == BTRFS_DC_NEED_WRITE) 2863 cache->disk_cache_state = BTRFS_DC_WRITTEN; 2864 last = cache->key.objectid + cache->key.offset; 2865 btrfs_put_block_group(cache); 2866 } 2867 2868 btrfs_free_path(path); 2869 return 0; 2870 } 2871 2872 int btrfs_extent_readonly(struct btrfs_root *root, u64 bytenr) 2873 { 2874 struct btrfs_block_group_cache *block_group; 2875 int readonly = 0; 2876 2877 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 2878 if (!block_group || block_group->ro) 2879 readonly = 1; 2880 if (block_group) 2881 btrfs_put_block_group(block_group); 2882 return readonly; 2883 } 2884 2885 static int update_space_info(struct btrfs_fs_info *info, u64 flags, 2886 u64 total_bytes, u64 bytes_used, 2887 struct btrfs_space_info **space_info) 2888 { 2889 struct btrfs_space_info *found; 2890 int i; 2891 int factor; 2892 2893 if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | 2894 BTRFS_BLOCK_GROUP_RAID10)) 2895 factor = 2; 2896 else 2897 factor = 1; 2898 2899 found = __find_space_info(info, flags); 2900 if (found) { 2901 spin_lock(&found->lock); 2902 found->total_bytes += total_bytes; 2903 found->disk_total += total_bytes * factor; 2904 found->bytes_used += bytes_used; 2905 found->disk_used += bytes_used * factor; 2906 found->full = 0; 2907 spin_unlock(&found->lock); 2908 *space_info = found; 2909 return 0; 2910 } 2911 found = kzalloc(sizeof(*found), GFP_NOFS); 2912 if (!found) 2913 return -ENOMEM; 2914 2915 for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) 2916 INIT_LIST_HEAD(&found->block_groups[i]); 2917 init_rwsem(&found->groups_sem); 2918 spin_lock_init(&found->lock); 2919 found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | 2920 BTRFS_BLOCK_GROUP_SYSTEM | 2921 BTRFS_BLOCK_GROUP_METADATA); 2922 found->total_bytes = total_bytes; 2923 found->disk_total = total_bytes * factor; 2924 found->bytes_used = bytes_used; 2925 found->disk_used = bytes_used * factor; 2926 found->bytes_pinned = 0; 2927 found->bytes_reserved = 0; 2928 found->bytes_readonly = 0; 2929 found->bytes_may_use = 0; 2930 found->full = 0; 2931 found->force_alloc = CHUNK_ALLOC_NO_FORCE; 2932 found->chunk_alloc = 0; 2933 found->flush = 0; 2934 init_waitqueue_head(&found->wait); 2935 *space_info = found; 2936 list_add_rcu(&found->list, &info->space_info); 2937 return 0; 2938 } 2939 2940 static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) 2941 { 2942 u64 extra_flags = flags & (BTRFS_BLOCK_GROUP_RAID0 | 2943 BTRFS_BLOCK_GROUP_RAID1 | 2944 BTRFS_BLOCK_GROUP_RAID10 | 2945 BTRFS_BLOCK_GROUP_DUP); 2946 if (extra_flags) { 2947 if (flags & BTRFS_BLOCK_GROUP_DATA) 2948 fs_info->avail_data_alloc_bits |= extra_flags; 2949 if (flags & BTRFS_BLOCK_GROUP_METADATA) 2950 fs_info->avail_metadata_alloc_bits |= extra_flags; 2951 if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2952 fs_info->avail_system_alloc_bits |= extra_flags; 2953 } 2954 } 2955 2956 u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) 2957 { 2958 /* 2959 * we add in the count of missing devices because we want 2960 * to make sure that any RAID levels on a degraded FS 2961 * continue to be honored. 2962 */ 2963 u64 num_devices = root->fs_info->fs_devices->rw_devices + 2964 root->fs_info->fs_devices->missing_devices; 2965 2966 if (num_devices == 1) 2967 flags &= ~(BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID0); 2968 if (num_devices < 4) 2969 flags &= ~BTRFS_BLOCK_GROUP_RAID10; 2970 2971 if ((flags & BTRFS_BLOCK_GROUP_DUP) && 2972 (flags & (BTRFS_BLOCK_GROUP_RAID1 | 2973 BTRFS_BLOCK_GROUP_RAID10))) { 2974 flags &= ~BTRFS_BLOCK_GROUP_DUP; 2975 } 2976 2977 if ((flags & BTRFS_BLOCK_GROUP_RAID1) && 2978 (flags & BTRFS_BLOCK_GROUP_RAID10)) { 2979 flags &= ~BTRFS_BLOCK_GROUP_RAID1; 2980 } 2981 2982 if ((flags & BTRFS_BLOCK_GROUP_RAID0) && 2983 ((flags & BTRFS_BLOCK_GROUP_RAID1) | 2984 (flags & BTRFS_BLOCK_GROUP_RAID10) | 2985 (flags & BTRFS_BLOCK_GROUP_DUP))) 2986 flags &= ~BTRFS_BLOCK_GROUP_RAID0; 2987 return flags; 2988 } 2989 2990 static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) 2991 { 2992 if (flags & BTRFS_BLOCK_GROUP_DATA) 2993 flags |= root->fs_info->avail_data_alloc_bits & 2994 root->fs_info->data_alloc_profile; 2995 else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) 2996 flags |= root->fs_info->avail_system_alloc_bits & 2997 root->fs_info->system_alloc_profile; 2998 else if (flags & BTRFS_BLOCK_GROUP_METADATA) 2999 flags |= root->fs_info->avail_metadata_alloc_bits & 3000 root->fs_info->metadata_alloc_profile; 3001 return btrfs_reduce_alloc_profile(root, flags); 3002 } 3003 3004 u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) 3005 { 3006 u64 flags; 3007 3008 if (data) 3009 flags = BTRFS_BLOCK_GROUP_DATA; 3010 else if (root == root->fs_info->chunk_root) 3011 flags = BTRFS_BLOCK_GROUP_SYSTEM; 3012 else 3013 flags = BTRFS_BLOCK_GROUP_METADATA; 3014 3015 return get_alloc_profile(root, flags); 3016 } 3017 3018 void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) 3019 { 3020 BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, 3021 BTRFS_BLOCK_GROUP_DATA); 3022 } 3023 3024 /* 3025 * This will check the space that the inode allocates from to make sure we have 3026 * enough space for bytes. 3027 */ 3028 int btrfs_check_data_free_space(struct inode *inode, u64 bytes) 3029 { 3030 struct btrfs_space_info *data_sinfo; 3031 struct btrfs_root *root = BTRFS_I(inode)->root; 3032 u64 used; 3033 int ret = 0, committed = 0, alloc_chunk = 1; 3034 3035 /* make sure bytes are sectorsize aligned */ 3036 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3037 3038 if (root == root->fs_info->tree_root || 3039 BTRFS_I(inode)->location.objectid == BTRFS_FREE_INO_OBJECTID) { 3040 alloc_chunk = 0; 3041 committed = 1; 3042 } 3043 3044 data_sinfo = BTRFS_I(inode)->space_info; 3045 if (!data_sinfo) 3046 goto alloc; 3047 3048 again: 3049 /* make sure we have enough space to handle the data first */ 3050 spin_lock(&data_sinfo->lock); 3051 used = data_sinfo->bytes_used + data_sinfo->bytes_reserved + 3052 data_sinfo->bytes_pinned + data_sinfo->bytes_readonly + 3053 data_sinfo->bytes_may_use; 3054 3055 if (used + bytes > data_sinfo->total_bytes) { 3056 struct btrfs_trans_handle *trans; 3057 3058 /* 3059 * if we don't have enough free bytes in this space then we need 3060 * to alloc a new chunk. 3061 */ 3062 if (!data_sinfo->full && alloc_chunk) { 3063 u64 alloc_target; 3064 3065 data_sinfo->force_alloc = CHUNK_ALLOC_FORCE; 3066 spin_unlock(&data_sinfo->lock); 3067 alloc: 3068 alloc_target = btrfs_get_alloc_profile(root, 1); 3069 trans = btrfs_join_transaction(root); 3070 if (IS_ERR(trans)) 3071 return PTR_ERR(trans); 3072 3073 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 3074 bytes + 2 * 1024 * 1024, 3075 alloc_target, 3076 CHUNK_ALLOC_NO_FORCE); 3077 btrfs_end_transaction(trans, root); 3078 if (ret < 0) { 3079 if (ret != -ENOSPC) 3080 return ret; 3081 else 3082 goto commit_trans; 3083 } 3084 3085 if (!data_sinfo) { 3086 btrfs_set_inode_space_info(root, inode); 3087 data_sinfo = BTRFS_I(inode)->space_info; 3088 } 3089 goto again; 3090 } 3091 3092 /* 3093 * If we have less pinned bytes than we want to allocate then 3094 * don't bother committing the transaction, it won't help us. 3095 */ 3096 if (data_sinfo->bytes_pinned < bytes) 3097 committed = 1; 3098 spin_unlock(&data_sinfo->lock); 3099 3100 /* commit the current transaction and try again */ 3101 commit_trans: 3102 if (!committed && 3103 !atomic_read(&root->fs_info->open_ioctl_trans)) { 3104 committed = 1; 3105 trans = btrfs_join_transaction(root); 3106 if (IS_ERR(trans)) 3107 return PTR_ERR(trans); 3108 ret = btrfs_commit_transaction(trans, root); 3109 if (ret) 3110 return ret; 3111 goto again; 3112 } 3113 3114 return -ENOSPC; 3115 } 3116 data_sinfo->bytes_may_use += bytes; 3117 BTRFS_I(inode)->reserved_bytes += bytes; 3118 spin_unlock(&data_sinfo->lock); 3119 3120 return 0; 3121 } 3122 3123 /* 3124 * called when we are clearing an delalloc extent from the 3125 * inode's io_tree or there was an error for whatever reason 3126 * after calling btrfs_check_data_free_space 3127 */ 3128 void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) 3129 { 3130 struct btrfs_root *root = BTRFS_I(inode)->root; 3131 struct btrfs_space_info *data_sinfo; 3132 3133 /* make sure bytes are sectorsize aligned */ 3134 bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 3135 3136 data_sinfo = BTRFS_I(inode)->space_info; 3137 spin_lock(&data_sinfo->lock); 3138 data_sinfo->bytes_may_use -= bytes; 3139 BTRFS_I(inode)->reserved_bytes -= bytes; 3140 spin_unlock(&data_sinfo->lock); 3141 } 3142 3143 static void force_metadata_allocation(struct btrfs_fs_info *info) 3144 { 3145 struct list_head *head = &info->space_info; 3146 struct btrfs_space_info *found; 3147 3148 rcu_read_lock(); 3149 list_for_each_entry_rcu(found, head, list) { 3150 if (found->flags & BTRFS_BLOCK_GROUP_METADATA) 3151 found->force_alloc = CHUNK_ALLOC_FORCE; 3152 } 3153 rcu_read_unlock(); 3154 } 3155 3156 static int should_alloc_chunk(struct btrfs_root *root, 3157 struct btrfs_space_info *sinfo, u64 alloc_bytes, 3158 int force) 3159 { 3160 u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; 3161 u64 num_allocated = sinfo->bytes_used + sinfo->bytes_reserved; 3162 u64 thresh; 3163 3164 if (force == CHUNK_ALLOC_FORCE) 3165 return 1; 3166 3167 /* 3168 * in limited mode, we want to have some free space up to 3169 * about 1% of the FS size. 3170 */ 3171 if (force == CHUNK_ALLOC_LIMITED) { 3172 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3173 thresh = max_t(u64, 64 * 1024 * 1024, 3174 div_factor_fine(thresh, 1)); 3175 3176 if (num_bytes - num_allocated < thresh) 3177 return 1; 3178 } 3179 3180 /* 3181 * we have two similar checks here, one based on percentage 3182 * and once based on a hard number of 256MB. The idea 3183 * is that if we have a good amount of free 3184 * room, don't allocate a chunk. A good mount is 3185 * less than 80% utilized of the chunks we have allocated, 3186 * or more than 256MB free 3187 */ 3188 if (num_allocated + alloc_bytes + 256 * 1024 * 1024 < num_bytes) 3189 return 0; 3190 3191 if (num_allocated + alloc_bytes < div_factor(num_bytes, 8)) 3192 return 0; 3193 3194 thresh = btrfs_super_total_bytes(&root->fs_info->super_copy); 3195 3196 /* 256MB or 5% of the FS */ 3197 thresh = max_t(u64, 256 * 1024 * 1024, div_factor_fine(thresh, 5)); 3198 3199 if (num_bytes > thresh && sinfo->bytes_used < div_factor(num_bytes, 3)) 3200 return 0; 3201 return 1; 3202 } 3203 3204 static int do_chunk_alloc(struct btrfs_trans_handle *trans, 3205 struct btrfs_root *extent_root, u64 alloc_bytes, 3206 u64 flags, int force) 3207 { 3208 struct btrfs_space_info *space_info; 3209 struct btrfs_fs_info *fs_info = extent_root->fs_info; 3210 int wait_for_alloc = 0; 3211 int ret = 0; 3212 3213 flags = btrfs_reduce_alloc_profile(extent_root, flags); 3214 3215 space_info = __find_space_info(extent_root->fs_info, flags); 3216 if (!space_info) { 3217 ret = update_space_info(extent_root->fs_info, flags, 3218 0, 0, &space_info); 3219 BUG_ON(ret); 3220 } 3221 BUG_ON(!space_info); 3222 3223 again: 3224 spin_lock(&space_info->lock); 3225 if (space_info->force_alloc) 3226 force = space_info->force_alloc; 3227 if (space_info->full) { 3228 spin_unlock(&space_info->lock); 3229 return 0; 3230 } 3231 3232 if (!should_alloc_chunk(extent_root, space_info, alloc_bytes, force)) { 3233 spin_unlock(&space_info->lock); 3234 return 0; 3235 } else if (space_info->chunk_alloc) { 3236 wait_for_alloc = 1; 3237 } else { 3238 space_info->chunk_alloc = 1; 3239 } 3240 3241 spin_unlock(&space_info->lock); 3242 3243 mutex_lock(&fs_info->chunk_mutex); 3244 3245 /* 3246 * The chunk_mutex is held throughout the entirety of a chunk 3247 * allocation, so once we've acquired the chunk_mutex we know that the 3248 * other guy is done and we need to recheck and see if we should 3249 * allocate. 3250 */ 3251 if (wait_for_alloc) { 3252 mutex_unlock(&fs_info->chunk_mutex); 3253 wait_for_alloc = 0; 3254 goto again; 3255 } 3256 3257 /* 3258 * If we have mixed data/metadata chunks we want to make sure we keep 3259 * allocating mixed chunks instead of individual chunks. 3260 */ 3261 if (btrfs_mixed_space_info(space_info)) 3262 flags |= (BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_METADATA); 3263 3264 /* 3265 * if we're doing a data chunk, go ahead and make sure that 3266 * we keep a reasonable number of metadata chunks allocated in the 3267 * FS as well. 3268 */ 3269 if (flags & BTRFS_BLOCK_GROUP_DATA && fs_info->metadata_ratio) { 3270 fs_info->data_chunk_allocations++; 3271 if (!(fs_info->data_chunk_allocations % 3272 fs_info->metadata_ratio)) 3273 force_metadata_allocation(fs_info); 3274 } 3275 3276 ret = btrfs_alloc_chunk(trans, extent_root, flags); 3277 if (ret < 0 && ret != -ENOSPC) 3278 goto out; 3279 3280 spin_lock(&space_info->lock); 3281 if (ret) 3282 space_info->full = 1; 3283 else 3284 ret = 1; 3285 3286 space_info->force_alloc = CHUNK_ALLOC_NO_FORCE; 3287 space_info->chunk_alloc = 0; 3288 spin_unlock(&space_info->lock); 3289 out: 3290 mutex_unlock(&extent_root->fs_info->chunk_mutex); 3291 return ret; 3292 } 3293 3294 /* 3295 * shrink metadata reservation for delalloc 3296 */ 3297 static int shrink_delalloc(struct btrfs_trans_handle *trans, 3298 struct btrfs_root *root, u64 to_reclaim, int sync) 3299 { 3300 struct btrfs_block_rsv *block_rsv; 3301 struct btrfs_space_info *space_info; 3302 u64 reserved; 3303 u64 max_reclaim; 3304 u64 reclaimed = 0; 3305 long time_left; 3306 int nr_pages = (2 * 1024 * 1024) >> PAGE_CACHE_SHIFT; 3307 int loops = 0; 3308 unsigned long progress; 3309 3310 block_rsv = &root->fs_info->delalloc_block_rsv; 3311 space_info = block_rsv->space_info; 3312 3313 smp_mb(); 3314 reserved = space_info->bytes_reserved; 3315 progress = space_info->reservation_progress; 3316 3317 if (reserved == 0) 3318 return 0; 3319 3320 smp_mb(); 3321 if (root->fs_info->delalloc_bytes == 0) { 3322 if (trans) 3323 return 0; 3324 btrfs_wait_ordered_extents(root, 0, 0); 3325 return 0; 3326 } 3327 3328 max_reclaim = min(reserved, to_reclaim); 3329 3330 while (loops < 1024) { 3331 /* have the flusher threads jump in and do some IO */ 3332 smp_mb(); 3333 nr_pages = min_t(unsigned long, nr_pages, 3334 root->fs_info->delalloc_bytes >> PAGE_CACHE_SHIFT); 3335 writeback_inodes_sb_nr_if_idle(root->fs_info->sb, nr_pages); 3336 3337 spin_lock(&space_info->lock); 3338 if (reserved > space_info->bytes_reserved) 3339 reclaimed += reserved - space_info->bytes_reserved; 3340 reserved = space_info->bytes_reserved; 3341 spin_unlock(&space_info->lock); 3342 3343 loops++; 3344 3345 if (reserved == 0 || reclaimed >= max_reclaim) 3346 break; 3347 3348 if (trans && trans->transaction->blocked) 3349 return -EAGAIN; 3350 3351 time_left = schedule_timeout_interruptible(1); 3352 3353 /* We were interrupted, exit */ 3354 if (time_left) 3355 break; 3356 3357 /* we've kicked the IO a few times, if anything has been freed, 3358 * exit. There is no sense in looping here for a long time 3359 * when we really need to commit the transaction, or there are 3360 * just too many writers without enough free space 3361 */ 3362 3363 if (loops > 3) { 3364 smp_mb(); 3365 if (progress != space_info->reservation_progress) 3366 break; 3367 } 3368 3369 } 3370 if (reclaimed >= to_reclaim && !trans) 3371 btrfs_wait_ordered_extents(root, 0, 0); 3372 return reclaimed >= to_reclaim; 3373 } 3374 3375 /* 3376 * Retries tells us how many times we've called reserve_metadata_bytes. The 3377 * idea is if this is the first call (retries == 0) then we will add to our 3378 * reserved count if we can't make the allocation in order to hold our place 3379 * while we go and try and free up space. That way for retries > 1 we don't try 3380 * and add space, we just check to see if the amount of unused space is >= the 3381 * total space, meaning that our reservation is valid. 3382 * 3383 * However if we don't intend to retry this reservation, pass -1 as retries so 3384 * that it short circuits this logic. 3385 */ 3386 static int reserve_metadata_bytes(struct btrfs_trans_handle *trans, 3387 struct btrfs_root *root, 3388 struct btrfs_block_rsv *block_rsv, 3389 u64 orig_bytes, int flush) 3390 { 3391 struct btrfs_space_info *space_info = block_rsv->space_info; 3392 u64 unused; 3393 u64 num_bytes = orig_bytes; 3394 int retries = 0; 3395 int ret = 0; 3396 bool committed = false; 3397 bool flushing = false; 3398 3399 again: 3400 ret = 0; 3401 spin_lock(&space_info->lock); 3402 /* 3403 * We only want to wait if somebody other than us is flushing and we are 3404 * actually alloed to flush. 3405 */ 3406 while (flush && !flushing && space_info->flush) { 3407 spin_unlock(&space_info->lock); 3408 /* 3409 * If we have a trans handle we can't wait because the flusher 3410 * may have to commit the transaction, which would mean we would 3411 * deadlock since we are waiting for the flusher to finish, but 3412 * hold the current transaction open. 3413 */ 3414 if (trans) 3415 return -EAGAIN; 3416 ret = wait_event_interruptible(space_info->wait, 3417 !space_info->flush); 3418 /* Must have been interrupted, return */ 3419 if (ret) 3420 return -EINTR; 3421 3422 spin_lock(&space_info->lock); 3423 } 3424 3425 ret = -ENOSPC; 3426 unused = space_info->bytes_used + space_info->bytes_reserved + 3427 space_info->bytes_pinned + space_info->bytes_readonly + 3428 space_info->bytes_may_use; 3429 3430 /* 3431 * The idea here is that we've not already over-reserved the block group 3432 * then we can go ahead and save our reservation first and then start 3433 * flushing if we need to. Otherwise if we've already overcommitted 3434 * lets start flushing stuff first and then come back and try to make 3435 * our reservation. 3436 */ 3437 if (unused <= space_info->total_bytes) { 3438 unused = space_info->total_bytes - unused; 3439 if (unused >= num_bytes) { 3440 space_info->bytes_reserved += orig_bytes; 3441 ret = 0; 3442 } else { 3443 /* 3444 * Ok set num_bytes to orig_bytes since we aren't 3445 * overocmmitted, this way we only try and reclaim what 3446 * we need. 3447 */ 3448 num_bytes = orig_bytes; 3449 } 3450 } else { 3451 /* 3452 * Ok we're over committed, set num_bytes to the overcommitted 3453 * amount plus the amount of bytes that we need for this 3454 * reservation. 3455 */ 3456 num_bytes = unused - space_info->total_bytes + 3457 (orig_bytes * (retries + 1)); 3458 } 3459 3460 /* 3461 * Couldn't make our reservation, save our place so while we're trying 3462 * to reclaim space we can actually use it instead of somebody else 3463 * stealing it from us. 3464 */ 3465 if (ret && flush) { 3466 flushing = true; 3467 space_info->flush = 1; 3468 } 3469 3470 spin_unlock(&space_info->lock); 3471 3472 if (!ret || !flush) 3473 goto out; 3474 3475 /* 3476 * We do synchronous shrinking since we don't actually unreserve 3477 * metadata until after the IO is completed. 3478 */ 3479 ret = shrink_delalloc(trans, root, num_bytes, 1); 3480 if (ret < 0) 3481 goto out; 3482 3483 ret = 0; 3484 3485 /* 3486 * So if we were overcommitted it's possible that somebody else flushed 3487 * out enough space and we simply didn't have enough space to reclaim, 3488 * so go back around and try again. 3489 */ 3490 if (retries < 2) { 3491 retries++; 3492 goto again; 3493 } 3494 3495 /* 3496 * Not enough space to be reclaimed, don't bother committing the 3497 * transaction. 3498 */ 3499 spin_lock(&space_info->lock); 3500 if (space_info->bytes_pinned < orig_bytes) 3501 ret = -ENOSPC; 3502 spin_unlock(&space_info->lock); 3503 if (ret) 3504 goto out; 3505 3506 ret = -EAGAIN; 3507 if (trans) 3508 goto out; 3509 3510 ret = -ENOSPC; 3511 if (committed) 3512 goto out; 3513 3514 trans = btrfs_join_transaction(root); 3515 if (IS_ERR(trans)) 3516 goto out; 3517 ret = btrfs_commit_transaction(trans, root); 3518 if (!ret) { 3519 trans = NULL; 3520 committed = true; 3521 goto again; 3522 } 3523 3524 out: 3525 if (flushing) { 3526 spin_lock(&space_info->lock); 3527 space_info->flush = 0; 3528 wake_up_all(&space_info->wait); 3529 spin_unlock(&space_info->lock); 3530 } 3531 return ret; 3532 } 3533 3534 static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, 3535 struct btrfs_root *root) 3536 { 3537 struct btrfs_block_rsv *block_rsv; 3538 if (root->ref_cows) 3539 block_rsv = trans->block_rsv; 3540 else 3541 block_rsv = root->block_rsv; 3542 3543 if (!block_rsv) 3544 block_rsv = &root->fs_info->empty_block_rsv; 3545 3546 return block_rsv; 3547 } 3548 3549 static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, 3550 u64 num_bytes) 3551 { 3552 int ret = -ENOSPC; 3553 spin_lock(&block_rsv->lock); 3554 if (block_rsv->reserved >= num_bytes) { 3555 block_rsv->reserved -= num_bytes; 3556 if (block_rsv->reserved < block_rsv->size) 3557 block_rsv->full = 0; 3558 ret = 0; 3559 } 3560 spin_unlock(&block_rsv->lock); 3561 return ret; 3562 } 3563 3564 static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, 3565 u64 num_bytes, int update_size) 3566 { 3567 spin_lock(&block_rsv->lock); 3568 block_rsv->reserved += num_bytes; 3569 if (update_size) 3570 block_rsv->size += num_bytes; 3571 else if (block_rsv->reserved >= block_rsv->size) 3572 block_rsv->full = 1; 3573 spin_unlock(&block_rsv->lock); 3574 } 3575 3576 static void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, 3577 struct btrfs_block_rsv *dest, u64 num_bytes) 3578 { 3579 struct btrfs_space_info *space_info = block_rsv->space_info; 3580 3581 spin_lock(&block_rsv->lock); 3582 if (num_bytes == (u64)-1) 3583 num_bytes = block_rsv->size; 3584 block_rsv->size -= num_bytes; 3585 if (block_rsv->reserved >= block_rsv->size) { 3586 num_bytes = block_rsv->reserved - block_rsv->size; 3587 block_rsv->reserved = block_rsv->size; 3588 block_rsv->full = 1; 3589 } else { 3590 num_bytes = 0; 3591 } 3592 spin_unlock(&block_rsv->lock); 3593 3594 if (num_bytes > 0) { 3595 if (dest) { 3596 spin_lock(&dest->lock); 3597 if (!dest->full) { 3598 u64 bytes_to_add; 3599 3600 bytes_to_add = dest->size - dest->reserved; 3601 bytes_to_add = min(num_bytes, bytes_to_add); 3602 dest->reserved += bytes_to_add; 3603 if (dest->reserved >= dest->size) 3604 dest->full = 1; 3605 num_bytes -= bytes_to_add; 3606 } 3607 spin_unlock(&dest->lock); 3608 } 3609 if (num_bytes) { 3610 spin_lock(&space_info->lock); 3611 space_info->bytes_reserved -= num_bytes; 3612 space_info->reservation_progress++; 3613 spin_unlock(&space_info->lock); 3614 } 3615 } 3616 } 3617 3618 static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, 3619 struct btrfs_block_rsv *dst, u64 num_bytes) 3620 { 3621 int ret; 3622 3623 ret = block_rsv_use_bytes(src, num_bytes); 3624 if (ret) 3625 return ret; 3626 3627 block_rsv_add_bytes(dst, num_bytes, 1); 3628 return 0; 3629 } 3630 3631 void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) 3632 { 3633 memset(rsv, 0, sizeof(*rsv)); 3634 spin_lock_init(&rsv->lock); 3635 atomic_set(&rsv->usage, 1); 3636 rsv->priority = 6; 3637 INIT_LIST_HEAD(&rsv->list); 3638 } 3639 3640 struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) 3641 { 3642 struct btrfs_block_rsv *block_rsv; 3643 struct btrfs_fs_info *fs_info = root->fs_info; 3644 3645 block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); 3646 if (!block_rsv) 3647 return NULL; 3648 3649 btrfs_init_block_rsv(block_rsv); 3650 block_rsv->space_info = __find_space_info(fs_info, 3651 BTRFS_BLOCK_GROUP_METADATA); 3652 return block_rsv; 3653 } 3654 3655 void btrfs_free_block_rsv(struct btrfs_root *root, 3656 struct btrfs_block_rsv *rsv) 3657 { 3658 if (rsv && atomic_dec_and_test(&rsv->usage)) { 3659 btrfs_block_rsv_release(root, rsv, (u64)-1); 3660 if (!rsv->durable) 3661 kfree(rsv); 3662 } 3663 } 3664 3665 /* 3666 * make the block_rsv struct be able to capture freed space. 3667 * the captured space will re-add to the the block_rsv struct 3668 * after transaction commit 3669 */ 3670 void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, 3671 struct btrfs_block_rsv *block_rsv) 3672 { 3673 block_rsv->durable = 1; 3674 mutex_lock(&fs_info->durable_block_rsv_mutex); 3675 list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); 3676 mutex_unlock(&fs_info->durable_block_rsv_mutex); 3677 } 3678 3679 int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, 3680 struct btrfs_root *root, 3681 struct btrfs_block_rsv *block_rsv, 3682 u64 num_bytes) 3683 { 3684 int ret; 3685 3686 if (num_bytes == 0) 3687 return 0; 3688 3689 ret = reserve_metadata_bytes(trans, root, block_rsv, num_bytes, 1); 3690 if (!ret) { 3691 block_rsv_add_bytes(block_rsv, num_bytes, 1); 3692 return 0; 3693 } 3694 3695 return ret; 3696 } 3697 3698 int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, 3699 struct btrfs_root *root, 3700 struct btrfs_block_rsv *block_rsv, 3701 u64 min_reserved, int min_factor) 3702 { 3703 u64 num_bytes = 0; 3704 int commit_trans = 0; 3705 int ret = -ENOSPC; 3706 3707 if (!block_rsv) 3708 return 0; 3709 3710 spin_lock(&block_rsv->lock); 3711 if (min_factor > 0) 3712 num_bytes = div_factor(block_rsv->size, min_factor); 3713 if (min_reserved > num_bytes) 3714 num_bytes = min_reserved; 3715 3716 if (block_rsv->reserved >= num_bytes) { 3717 ret = 0; 3718 } else { 3719 num_bytes -= block_rsv->reserved; 3720 if (block_rsv->durable && 3721 block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) 3722 commit_trans = 1; 3723 } 3724 spin_unlock(&block_rsv->lock); 3725 if (!ret) 3726 return 0; 3727 3728 if (block_rsv->refill_used) { 3729 ret = reserve_metadata_bytes(trans, root, block_rsv, 3730 num_bytes, 0); 3731 if (!ret) { 3732 block_rsv_add_bytes(block_rsv, num_bytes, 0); 3733 return 0; 3734 } 3735 } 3736 3737 if (commit_trans) { 3738 if (trans) 3739 return -EAGAIN; 3740 trans = btrfs_join_transaction(root); 3741 BUG_ON(IS_ERR(trans)); 3742 ret = btrfs_commit_transaction(trans, root); 3743 return 0; 3744 } 3745 3746 return -ENOSPC; 3747 } 3748 3749 int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, 3750 struct btrfs_block_rsv *dst_rsv, 3751 u64 num_bytes) 3752 { 3753 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3754 } 3755 3756 void btrfs_block_rsv_release(struct btrfs_root *root, 3757 struct btrfs_block_rsv *block_rsv, 3758 u64 num_bytes) 3759 { 3760 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 3761 if (global_rsv->full || global_rsv == block_rsv || 3762 block_rsv->space_info != global_rsv->space_info) 3763 global_rsv = NULL; 3764 block_rsv_release_bytes(block_rsv, global_rsv, num_bytes); 3765 } 3766 3767 /* 3768 * helper to calculate size of global block reservation. 3769 * the desired value is sum of space used by extent tree, 3770 * checksum tree and root tree 3771 */ 3772 static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) 3773 { 3774 struct btrfs_space_info *sinfo; 3775 u64 num_bytes; 3776 u64 meta_used; 3777 u64 data_used; 3778 int csum_size = btrfs_super_csum_size(&fs_info->super_copy); 3779 3780 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); 3781 spin_lock(&sinfo->lock); 3782 data_used = sinfo->bytes_used; 3783 spin_unlock(&sinfo->lock); 3784 3785 sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3786 spin_lock(&sinfo->lock); 3787 if (sinfo->flags & BTRFS_BLOCK_GROUP_DATA) 3788 data_used = 0; 3789 meta_used = sinfo->bytes_used; 3790 spin_unlock(&sinfo->lock); 3791 3792 num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * 3793 csum_size * 2; 3794 num_bytes += div64_u64(data_used + meta_used, 50); 3795 3796 if (num_bytes * 3 > meta_used) 3797 num_bytes = div64_u64(meta_used, 3); 3798 3799 return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); 3800 } 3801 3802 static void update_global_block_rsv(struct btrfs_fs_info *fs_info) 3803 { 3804 struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; 3805 struct btrfs_space_info *sinfo = block_rsv->space_info; 3806 u64 num_bytes; 3807 3808 num_bytes = calc_global_metadata_size(fs_info); 3809 3810 spin_lock(&block_rsv->lock); 3811 spin_lock(&sinfo->lock); 3812 3813 block_rsv->size = num_bytes; 3814 3815 num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + 3816 sinfo->bytes_reserved + sinfo->bytes_readonly + 3817 sinfo->bytes_may_use; 3818 3819 if (sinfo->total_bytes > num_bytes) { 3820 num_bytes = sinfo->total_bytes - num_bytes; 3821 block_rsv->reserved += num_bytes; 3822 sinfo->bytes_reserved += num_bytes; 3823 } 3824 3825 if (block_rsv->reserved >= block_rsv->size) { 3826 num_bytes = block_rsv->reserved - block_rsv->size; 3827 sinfo->bytes_reserved -= num_bytes; 3828 sinfo->reservation_progress++; 3829 block_rsv->reserved = block_rsv->size; 3830 block_rsv->full = 1; 3831 } 3832 3833 spin_unlock(&sinfo->lock); 3834 spin_unlock(&block_rsv->lock); 3835 } 3836 3837 static void init_global_block_rsv(struct btrfs_fs_info *fs_info) 3838 { 3839 struct btrfs_space_info *space_info; 3840 3841 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); 3842 fs_info->chunk_block_rsv.space_info = space_info; 3843 fs_info->chunk_block_rsv.priority = 10; 3844 3845 space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); 3846 fs_info->global_block_rsv.space_info = space_info; 3847 fs_info->global_block_rsv.priority = 10; 3848 fs_info->global_block_rsv.refill_used = 1; 3849 fs_info->delalloc_block_rsv.space_info = space_info; 3850 fs_info->trans_block_rsv.space_info = space_info; 3851 fs_info->empty_block_rsv.space_info = space_info; 3852 fs_info->empty_block_rsv.priority = 10; 3853 3854 fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; 3855 fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; 3856 fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; 3857 fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; 3858 fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; 3859 3860 btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); 3861 3862 btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); 3863 3864 update_global_block_rsv(fs_info); 3865 } 3866 3867 static void release_global_block_rsv(struct btrfs_fs_info *fs_info) 3868 { 3869 block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1); 3870 WARN_ON(fs_info->delalloc_block_rsv.size > 0); 3871 WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); 3872 WARN_ON(fs_info->trans_block_rsv.size > 0); 3873 WARN_ON(fs_info->trans_block_rsv.reserved > 0); 3874 WARN_ON(fs_info->chunk_block_rsv.size > 0); 3875 WARN_ON(fs_info->chunk_block_rsv.reserved > 0); 3876 } 3877 3878 int btrfs_truncate_reserve_metadata(struct btrfs_trans_handle *trans, 3879 struct btrfs_root *root, 3880 struct btrfs_block_rsv *rsv) 3881 { 3882 struct btrfs_block_rsv *trans_rsv = &root->fs_info->trans_block_rsv; 3883 u64 num_bytes; 3884 int ret; 3885 3886 /* 3887 * Truncate should be freeing data, but give us 2 items just in case it 3888 * needs to use some space. We may want to be smarter about this in the 3889 * future. 3890 */ 3891 num_bytes = btrfs_calc_trans_metadata_size(root, 2); 3892 3893 /* We already have enough bytes, just return */ 3894 if (rsv->reserved >= num_bytes) 3895 return 0; 3896 3897 num_bytes -= rsv->reserved; 3898 3899 /* 3900 * You should have reserved enough space before hand to do this, so this 3901 * should not fail. 3902 */ 3903 ret = block_rsv_migrate_bytes(trans_rsv, rsv, num_bytes); 3904 BUG_ON(ret); 3905 3906 return 0; 3907 } 3908 3909 void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, 3910 struct btrfs_root *root) 3911 { 3912 if (!trans->bytes_reserved) 3913 return; 3914 3915 BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); 3916 btrfs_block_rsv_release(root, trans->block_rsv, 3917 trans->bytes_reserved); 3918 trans->bytes_reserved = 0; 3919 } 3920 3921 int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, 3922 struct inode *inode) 3923 { 3924 struct btrfs_root *root = BTRFS_I(inode)->root; 3925 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); 3926 struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; 3927 3928 /* 3929 * We need to hold space in order to delete our orphan item once we've 3930 * added it, so this takes the reservation so we can release it later 3931 * when we are truly done with the orphan item. 3932 */ 3933 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 3934 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3935 } 3936 3937 void btrfs_orphan_release_metadata(struct inode *inode) 3938 { 3939 struct btrfs_root *root = BTRFS_I(inode)->root; 3940 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 1); 3941 btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); 3942 } 3943 3944 int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, 3945 struct btrfs_pending_snapshot *pending) 3946 { 3947 struct btrfs_root *root = pending->root; 3948 struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); 3949 struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; 3950 /* 3951 * two for root back/forward refs, two for directory entries 3952 * and one for root of the snapshot. 3953 */ 3954 u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); 3955 dst_rsv->space_info = src_rsv->space_info; 3956 return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); 3957 } 3958 3959 static unsigned drop_outstanding_extent(struct inode *inode) 3960 { 3961 unsigned dropped_extents = 0; 3962 3963 spin_lock(&BTRFS_I(inode)->lock); 3964 BUG_ON(!BTRFS_I(inode)->outstanding_extents); 3965 BTRFS_I(inode)->outstanding_extents--; 3966 3967 /* 3968 * If we have more or the same amount of outsanding extents than we have 3969 * reserved then we need to leave the reserved extents count alone. 3970 */ 3971 if (BTRFS_I(inode)->outstanding_extents >= 3972 BTRFS_I(inode)->reserved_extents) 3973 goto out; 3974 3975 dropped_extents = BTRFS_I(inode)->reserved_extents - 3976 BTRFS_I(inode)->outstanding_extents; 3977 BTRFS_I(inode)->reserved_extents -= dropped_extents; 3978 out: 3979 spin_unlock(&BTRFS_I(inode)->lock); 3980 return dropped_extents; 3981 } 3982 3983 static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) 3984 { 3985 return num_bytes >>= 3; 3986 } 3987 3988 int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) 3989 { 3990 struct btrfs_root *root = BTRFS_I(inode)->root; 3991 struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; 3992 u64 to_reserve = 0; 3993 unsigned nr_extents = 0; 3994 int ret; 3995 3996 if (btrfs_transaction_in_commit(root->fs_info)) 3997 schedule_timeout(1); 3998 3999 num_bytes = ALIGN(num_bytes, root->sectorsize); 4000 4001 spin_lock(&BTRFS_I(inode)->lock); 4002 BTRFS_I(inode)->outstanding_extents++; 4003 4004 if (BTRFS_I(inode)->outstanding_extents > 4005 BTRFS_I(inode)->reserved_extents) { 4006 nr_extents = BTRFS_I(inode)->outstanding_extents - 4007 BTRFS_I(inode)->reserved_extents; 4008 BTRFS_I(inode)->reserved_extents += nr_extents; 4009 4010 to_reserve = btrfs_calc_trans_metadata_size(root, nr_extents); 4011 } 4012 spin_unlock(&BTRFS_I(inode)->lock); 4013 4014 to_reserve += calc_csum_metadata_size(inode, num_bytes); 4015 ret = reserve_metadata_bytes(NULL, root, block_rsv, to_reserve, 1); 4016 if (ret) { 4017 unsigned dropped; 4018 /* 4019 * We don't need the return value since our reservation failed, 4020 * we just need to clean up our counter. 4021 */ 4022 dropped = drop_outstanding_extent(inode); 4023 WARN_ON(dropped > 1); 4024 return ret; 4025 } 4026 4027 block_rsv_add_bytes(block_rsv, to_reserve, 1); 4028 4029 return 0; 4030 } 4031 4032 void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) 4033 { 4034 struct btrfs_root *root = BTRFS_I(inode)->root; 4035 u64 to_free = 0; 4036 unsigned dropped; 4037 4038 num_bytes = ALIGN(num_bytes, root->sectorsize); 4039 dropped = drop_outstanding_extent(inode); 4040 4041 to_free = calc_csum_metadata_size(inode, num_bytes); 4042 if (dropped > 0) 4043 to_free += btrfs_calc_trans_metadata_size(root, dropped); 4044 4045 btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, 4046 to_free); 4047 } 4048 4049 int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) 4050 { 4051 int ret; 4052 4053 ret = btrfs_check_data_free_space(inode, num_bytes); 4054 if (ret) 4055 return ret; 4056 4057 ret = btrfs_delalloc_reserve_metadata(inode, num_bytes); 4058 if (ret) { 4059 btrfs_free_reserved_data_space(inode, num_bytes); 4060 return ret; 4061 } 4062 4063 return 0; 4064 } 4065 4066 void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) 4067 { 4068 btrfs_delalloc_release_metadata(inode, num_bytes); 4069 btrfs_free_reserved_data_space(inode, num_bytes); 4070 } 4071 4072 static int update_block_group(struct btrfs_trans_handle *trans, 4073 struct btrfs_root *root, 4074 u64 bytenr, u64 num_bytes, int alloc) 4075 { 4076 struct btrfs_block_group_cache *cache = NULL; 4077 struct btrfs_fs_info *info = root->fs_info; 4078 u64 total = num_bytes; 4079 u64 old_val; 4080 u64 byte_in_group; 4081 int factor; 4082 4083 /* block accounting for super block */ 4084 spin_lock(&info->delalloc_lock); 4085 old_val = btrfs_super_bytes_used(&info->super_copy); 4086 if (alloc) 4087 old_val += num_bytes; 4088 else 4089 old_val -= num_bytes; 4090 btrfs_set_super_bytes_used(&info->super_copy, old_val); 4091 spin_unlock(&info->delalloc_lock); 4092 4093 while (total) { 4094 cache = btrfs_lookup_block_group(info, bytenr); 4095 if (!cache) 4096 return -1; 4097 if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | 4098 BTRFS_BLOCK_GROUP_RAID1 | 4099 BTRFS_BLOCK_GROUP_RAID10)) 4100 factor = 2; 4101 else 4102 factor = 1; 4103 /* 4104 * If this block group has free space cache written out, we 4105 * need to make sure to load it if we are removing space. This 4106 * is because we need the unpinning stage to actually add the 4107 * space back to the block group, otherwise we will leak space. 4108 */ 4109 if (!alloc && cache->cached == BTRFS_CACHE_NO) 4110 cache_block_group(cache, trans, NULL, 1); 4111 4112 byte_in_group = bytenr - cache->key.objectid; 4113 WARN_ON(byte_in_group > cache->key.offset); 4114 4115 spin_lock(&cache->space_info->lock); 4116 spin_lock(&cache->lock); 4117 4118 if (btrfs_super_cache_generation(&info->super_copy) != 0 && 4119 cache->disk_cache_state < BTRFS_DC_CLEAR) 4120 cache->disk_cache_state = BTRFS_DC_CLEAR; 4121 4122 cache->dirty = 1; 4123 old_val = btrfs_block_group_used(&cache->item); 4124 num_bytes = min(total, cache->key.offset - byte_in_group); 4125 if (alloc) { 4126 old_val += num_bytes; 4127 btrfs_set_block_group_used(&cache->item, old_val); 4128 cache->reserved -= num_bytes; 4129 cache->space_info->bytes_reserved -= num_bytes; 4130 cache->space_info->reservation_progress++; 4131 cache->space_info->bytes_used += num_bytes; 4132 cache->space_info->disk_used += num_bytes * factor; 4133 spin_unlock(&cache->lock); 4134 spin_unlock(&cache->space_info->lock); 4135 } else { 4136 old_val -= num_bytes; 4137 btrfs_set_block_group_used(&cache->item, old_val); 4138 cache->pinned += num_bytes; 4139 cache->space_info->bytes_pinned += num_bytes; 4140 cache->space_info->bytes_used -= num_bytes; 4141 cache->space_info->disk_used -= num_bytes * factor; 4142 spin_unlock(&cache->lock); 4143 spin_unlock(&cache->space_info->lock); 4144 4145 set_extent_dirty(info->pinned_extents, 4146 bytenr, bytenr + num_bytes - 1, 4147 GFP_NOFS | __GFP_NOFAIL); 4148 } 4149 btrfs_put_block_group(cache); 4150 total -= num_bytes; 4151 bytenr += num_bytes; 4152 } 4153 return 0; 4154 } 4155 4156 static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) 4157 { 4158 struct btrfs_block_group_cache *cache; 4159 u64 bytenr; 4160 4161 cache = btrfs_lookup_first_block_group(root->fs_info, search_start); 4162 if (!cache) 4163 return 0; 4164 4165 bytenr = cache->key.objectid; 4166 btrfs_put_block_group(cache); 4167 4168 return bytenr; 4169 } 4170 4171 static int pin_down_extent(struct btrfs_root *root, 4172 struct btrfs_block_group_cache *cache, 4173 u64 bytenr, u64 num_bytes, int reserved) 4174 { 4175 spin_lock(&cache->space_info->lock); 4176 spin_lock(&cache->lock); 4177 cache->pinned += num_bytes; 4178 cache->space_info->bytes_pinned += num_bytes; 4179 if (reserved) { 4180 cache->reserved -= num_bytes; 4181 cache->space_info->bytes_reserved -= num_bytes; 4182 cache->space_info->reservation_progress++; 4183 } 4184 spin_unlock(&cache->lock); 4185 spin_unlock(&cache->space_info->lock); 4186 4187 set_extent_dirty(root->fs_info->pinned_extents, bytenr, 4188 bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); 4189 return 0; 4190 } 4191 4192 /* 4193 * this function must be called within transaction 4194 */ 4195 int btrfs_pin_extent(struct btrfs_root *root, 4196 u64 bytenr, u64 num_bytes, int reserved) 4197 { 4198 struct btrfs_block_group_cache *cache; 4199 4200 cache = btrfs_lookup_block_group(root->fs_info, bytenr); 4201 BUG_ON(!cache); 4202 4203 pin_down_extent(root, cache, bytenr, num_bytes, reserved); 4204 4205 btrfs_put_block_group(cache); 4206 return 0; 4207 } 4208 4209 /* 4210 * update size of reserved extents. this function may return -EAGAIN 4211 * if 'reserve' is true or 'sinfo' is false. 4212 */ 4213 int btrfs_update_reserved_bytes(struct btrfs_block_group_cache *cache, 4214 u64 num_bytes, int reserve, int sinfo) 4215 { 4216 int ret = 0; 4217 if (sinfo) { 4218 struct btrfs_space_info *space_info = cache->space_info; 4219 spin_lock(&space_info->lock); 4220 spin_lock(&cache->lock); 4221 if (reserve) { 4222 if (cache->ro) { 4223 ret = -EAGAIN; 4224 } else { 4225 cache->reserved += num_bytes; 4226 space_info->bytes_reserved += num_bytes; 4227 } 4228 } else { 4229 if (cache->ro) 4230 space_info->bytes_readonly += num_bytes; 4231 cache->reserved -= num_bytes; 4232 space_info->bytes_reserved -= num_bytes; 4233 space_info->reservation_progress++; 4234 } 4235 spin_unlock(&cache->lock); 4236 spin_unlock(&space_info->lock); 4237 } else { 4238 spin_lock(&cache->lock); 4239 if (cache->ro) { 4240 ret = -EAGAIN; 4241 } else { 4242 if (reserve) 4243 cache->reserved += num_bytes; 4244 else 4245 cache->reserved -= num_bytes; 4246 } 4247 spin_unlock(&cache->lock); 4248 } 4249 return ret; 4250 } 4251 4252 int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 4253 struct btrfs_root *root) 4254 { 4255 struct btrfs_fs_info *fs_info = root->fs_info; 4256 struct btrfs_caching_control *next; 4257 struct btrfs_caching_control *caching_ctl; 4258 struct btrfs_block_group_cache *cache; 4259 4260 down_write(&fs_info->extent_commit_sem); 4261 4262 list_for_each_entry_safe(caching_ctl, next, 4263 &fs_info->caching_block_groups, list) { 4264 cache = caching_ctl->block_group; 4265 if (block_group_cache_done(cache)) { 4266 cache->last_byte_to_unpin = (u64)-1; 4267 list_del_init(&caching_ctl->list); 4268 put_caching_control(caching_ctl); 4269 } else { 4270 cache->last_byte_to_unpin = caching_ctl->progress; 4271 } 4272 } 4273 4274 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4275 fs_info->pinned_extents = &fs_info->freed_extents[1]; 4276 else 4277 fs_info->pinned_extents = &fs_info->freed_extents[0]; 4278 4279 up_write(&fs_info->extent_commit_sem); 4280 4281 update_global_block_rsv(fs_info); 4282 return 0; 4283 } 4284 4285 static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 4286 { 4287 struct btrfs_fs_info *fs_info = root->fs_info; 4288 struct btrfs_block_group_cache *cache = NULL; 4289 u64 len; 4290 4291 while (start <= end) { 4292 if (!cache || 4293 start >= cache->key.objectid + cache->key.offset) { 4294 if (cache) 4295 btrfs_put_block_group(cache); 4296 cache = btrfs_lookup_block_group(fs_info, start); 4297 BUG_ON(!cache); 4298 } 4299 4300 len = cache->key.objectid + cache->key.offset - start; 4301 len = min(len, end + 1 - start); 4302 4303 if (start < cache->last_byte_to_unpin) { 4304 len = min(len, cache->last_byte_to_unpin - start); 4305 btrfs_add_free_space(cache, start, len); 4306 } 4307 4308 start += len; 4309 4310 spin_lock(&cache->space_info->lock); 4311 spin_lock(&cache->lock); 4312 cache->pinned -= len; 4313 cache->space_info->bytes_pinned -= len; 4314 if (cache->ro) { 4315 cache->space_info->bytes_readonly += len; 4316 } else if (cache->reserved_pinned > 0) { 4317 len = min(len, cache->reserved_pinned); 4318 cache->reserved_pinned -= len; 4319 cache->space_info->bytes_reserved += len; 4320 } 4321 spin_unlock(&cache->lock); 4322 spin_unlock(&cache->space_info->lock); 4323 } 4324 4325 if (cache) 4326 btrfs_put_block_group(cache); 4327 return 0; 4328 } 4329 4330 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 4331 struct btrfs_root *root) 4332 { 4333 struct btrfs_fs_info *fs_info = root->fs_info; 4334 struct extent_io_tree *unpin; 4335 struct btrfs_block_rsv *block_rsv; 4336 struct btrfs_block_rsv *next_rsv; 4337 u64 start; 4338 u64 end; 4339 int idx; 4340 int ret; 4341 4342 if (fs_info->pinned_extents == &fs_info->freed_extents[0]) 4343 unpin = &fs_info->freed_extents[1]; 4344 else 4345 unpin = &fs_info->freed_extents[0]; 4346 4347 while (1) { 4348 ret = find_first_extent_bit(unpin, 0, &start, &end, 4349 EXTENT_DIRTY); 4350 if (ret) 4351 break; 4352 4353 if (btrfs_test_opt(root, DISCARD)) 4354 ret = btrfs_discard_extent(root, start, 4355 end + 1 - start, NULL); 4356 4357 clear_extent_dirty(unpin, start, end, GFP_NOFS); 4358 unpin_extent_range(root, start, end); 4359 cond_resched(); 4360 } 4361 4362 mutex_lock(&fs_info->durable_block_rsv_mutex); 4363 list_for_each_entry_safe(block_rsv, next_rsv, 4364 &fs_info->durable_block_rsv_list, list) { 4365 4366 idx = trans->transid & 0x1; 4367 if (block_rsv->freed[idx] > 0) { 4368 block_rsv_add_bytes(block_rsv, 4369 block_rsv->freed[idx], 0); 4370 block_rsv->freed[idx] = 0; 4371 } 4372 if (atomic_read(&block_rsv->usage) == 0) { 4373 btrfs_block_rsv_release(root, block_rsv, (u64)-1); 4374 4375 if (block_rsv->freed[0] == 0 && 4376 block_rsv->freed[1] == 0) { 4377 list_del_init(&block_rsv->list); 4378 kfree(block_rsv); 4379 } 4380 } else { 4381 btrfs_block_rsv_release(root, block_rsv, 0); 4382 } 4383 } 4384 mutex_unlock(&fs_info->durable_block_rsv_mutex); 4385 4386 return 0; 4387 } 4388 4389 static int __btrfs_free_extent(struct btrfs_trans_handle *trans, 4390 struct btrfs_root *root, 4391 u64 bytenr, u64 num_bytes, u64 parent, 4392 u64 root_objectid, u64 owner_objectid, 4393 u64 owner_offset, int refs_to_drop, 4394 struct btrfs_delayed_extent_op *extent_op) 4395 { 4396 struct btrfs_key key; 4397 struct btrfs_path *path; 4398 struct btrfs_fs_info *info = root->fs_info; 4399 struct btrfs_root *extent_root = info->extent_root; 4400 struct extent_buffer *leaf; 4401 struct btrfs_extent_item *ei; 4402 struct btrfs_extent_inline_ref *iref; 4403 int ret; 4404 int is_data; 4405 int extent_slot = 0; 4406 int found_extent = 0; 4407 int num_to_del = 1; 4408 u32 item_size; 4409 u64 refs; 4410 4411 path = btrfs_alloc_path(); 4412 if (!path) 4413 return -ENOMEM; 4414 4415 path->reada = 1; 4416 path->leave_spinning = 1; 4417 4418 is_data = owner_objectid >= BTRFS_FIRST_FREE_OBJECTID; 4419 BUG_ON(!is_data && refs_to_drop != 1); 4420 4421 ret = lookup_extent_backref(trans, extent_root, path, &iref, 4422 bytenr, num_bytes, parent, 4423 root_objectid, owner_objectid, 4424 owner_offset); 4425 if (ret == 0) { 4426 extent_slot = path->slots[0]; 4427 while (extent_slot >= 0) { 4428 btrfs_item_key_to_cpu(path->nodes[0], &key, 4429 extent_slot); 4430 if (key.objectid != bytenr) 4431 break; 4432 if (key.type == BTRFS_EXTENT_ITEM_KEY && 4433 key.offset == num_bytes) { 4434 found_extent = 1; 4435 break; 4436 } 4437 if (path->slots[0] - extent_slot > 5) 4438 break; 4439 extent_slot--; 4440 } 4441 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 4442 item_size = btrfs_item_size_nr(path->nodes[0], extent_slot); 4443 if (found_extent && item_size < sizeof(*ei)) 4444 found_extent = 0; 4445 #endif 4446 if (!found_extent) { 4447 BUG_ON(iref); 4448 ret = remove_extent_backref(trans, extent_root, path, 4449 NULL, refs_to_drop, 4450 is_data); 4451 BUG_ON(ret); 4452 btrfs_release_path(path); 4453 path->leave_spinning = 1; 4454 4455 key.objectid = bytenr; 4456 key.type = BTRFS_EXTENT_ITEM_KEY; 4457 key.offset = num_bytes; 4458 4459 ret = btrfs_search_slot(trans, extent_root, 4460 &key, path, -1, 1); 4461 if (ret) { 4462 printk(KERN_ERR "umm, got %d back from search" 4463 ", was looking for %llu\n", ret, 4464 (unsigned long long)bytenr); 4465 if (ret > 0) 4466 btrfs_print_leaf(extent_root, 4467 path->nodes[0]); 4468 } 4469 BUG_ON(ret); 4470 extent_slot = path->slots[0]; 4471 } 4472 } else { 4473 btrfs_print_leaf(extent_root, path->nodes[0]); 4474 WARN_ON(1); 4475 printk(KERN_ERR "btrfs unable to find ref byte nr %llu " 4476 "parent %llu root %llu owner %llu offset %llu\n", 4477 (unsigned long long)bytenr, 4478 (unsigned long long)parent, 4479 (unsigned long long)root_objectid, 4480 (unsigned long long)owner_objectid, 4481 (unsigned long long)owner_offset); 4482 } 4483 4484 leaf = path->nodes[0]; 4485 item_size = btrfs_item_size_nr(leaf, extent_slot); 4486 #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 4487 if (item_size < sizeof(*ei)) { 4488 BUG_ON(found_extent || extent_slot != path->slots[0]); 4489 ret = convert_extent_item_v0(trans, extent_root, path, 4490 owner_objectid, 0); 4491 BUG_ON(ret < 0); 4492 4493 btrfs_release_path(path); 4494 path->leave_spinning = 1; 4495 4496 key.objectid = bytenr; 4497 key.type = BTRFS_EXTENT_ITEM_KEY; 4498 key.offset = num_bytes; 4499 4500 ret = btrfs_search_slot(trans, extent_root, &key, path, 4501 -1, 1); 4502 if (ret) { 4503 printk(KERN_ERR "umm, got %d back from search" 4504 ", was looking for %llu\n", ret, 4505 (unsigned long long)bytenr); 4506 btrfs_print_leaf(extent_root, path->nodes[0]); 4507 } 4508 BUG_ON(ret); 4509 extent_slot = path->slots[0]; 4510 leaf = path->nodes[0]; 4511 item_size = btrfs_item_size_nr(leaf, extent_slot); 4512 } 4513 #endif 4514 BUG_ON(item_size < sizeof(*ei)); 4515 ei = btrfs_item_ptr(leaf, extent_slot, 4516 struct btrfs_extent_item); 4517 if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { 4518 struct btrfs_tree_block_info *bi; 4519 BUG_ON(item_size < sizeof(*ei) + sizeof(*bi)); 4520 bi = (struct btrfs_tree_block_info *)(ei + 1); 4521 WARN_ON(owner_objectid != btrfs_tree_block_level(leaf, bi)); 4522 } 4523 4524 refs = btrfs_extent_refs(leaf, ei); 4525 BUG_ON(refs < refs_to_drop); 4526 refs -= refs_to_drop; 4527 4528 if (refs > 0) { 4529 if (extent_op) 4530 __run_delayed_extent_op(extent_op, leaf, ei); 4531 /* 4532 * In the case of inline back ref, reference count will 4533 * be updated by remove_extent_backref 4534 */ 4535 if (iref) { 4536 BUG_ON(!found_extent); 4537 } else { 4538 btrfs_set_extent_refs(leaf, ei, refs); 4539 btrfs_mark_buffer_dirty(leaf); 4540 } 4541 if (found_extent) { 4542 ret = remove_extent_backref(trans, extent_root, path, 4543 iref, refs_to_drop, 4544 is_data); 4545 BUG_ON(ret); 4546 } 4547 } else { 4548 if (found_extent) { 4549 BUG_ON(is_data && refs_to_drop != 4550 extent_data_ref_count(root, path, iref)); 4551 if (iref) { 4552 BUG_ON(path->slots[0] != extent_slot); 4553 } else { 4554 BUG_ON(path->slots[0] != extent_slot + 1); 4555 path->slots[0] = extent_slot; 4556 num_to_del = 2; 4557 } 4558 } 4559 4560 ret = btrfs_del_items(trans, extent_root, path, path->slots[0], 4561 num_to_del); 4562 BUG_ON(ret); 4563 btrfs_release_path(path); 4564 4565 if (is_data) { 4566 ret = btrfs_del_csums(trans, root, bytenr, num_bytes); 4567 BUG_ON(ret); 4568 } else { 4569 invalidate_mapping_pages(info->btree_inode->i_mapping, 4570 bytenr >> PAGE_CACHE_SHIFT, 4571 (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); 4572 } 4573 4574 ret = update_block_group(trans, root, bytenr, num_bytes, 0); 4575 BUG_ON(ret); 4576 } 4577 btrfs_free_path(path); 4578 return ret; 4579 } 4580 4581 /* 4582 * when we free an block, it is possible (and likely) that we free the last 4583 * delayed ref for that extent as well. This searches the delayed ref tree for 4584 * a given extent, and if there are no other delayed refs to be processed, it 4585 * removes it from the tree. 4586 */ 4587 static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, 4588 struct btrfs_root *root, u64 bytenr) 4589 { 4590 struct btrfs_delayed_ref_head *head; 4591 struct btrfs_delayed_ref_root *delayed_refs; 4592 struct btrfs_delayed_ref_node *ref; 4593 struct rb_node *node; 4594 int ret = 0; 4595 4596 delayed_refs = &trans->transaction->delayed_refs; 4597 spin_lock(&delayed_refs->lock); 4598 head = btrfs_find_delayed_ref_head(trans, bytenr); 4599 if (!head) 4600 goto out; 4601 4602 node = rb_prev(&head->node.rb_node); 4603 if (!node) 4604 goto out; 4605 4606 ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); 4607 4608 /* there are still entries for this ref, we can't drop it */ 4609 if (ref->bytenr == bytenr) 4610 goto out; 4611 4612 if (head->extent_op) { 4613 if (!head->must_insert_reserved) 4614 goto out; 4615 kfree(head->extent_op); 4616 head->extent_op = NULL; 4617 } 4618 4619 /* 4620 * waiting for the lock here would deadlock. If someone else has it 4621 * locked they are already in the process of dropping it anyway 4622 */ 4623 if (!mutex_trylock(&head->mutex)) 4624 goto out; 4625 4626 /* 4627 * at this point we have a head with no other entries. Go 4628 * ahead and process it. 4629 */ 4630 head->node.in_tree = 0; 4631 rb_erase(&head->node.rb_node, &delayed_refs->root); 4632 4633 delayed_refs->num_entries--; 4634 4635 /* 4636 * we don't take a ref on the node because we're removing it from the 4637 * tree, so we just steal the ref the tree was holding. 4638 */ 4639 delayed_refs->num_heads--; 4640 if (list_empty(&head->cluster)) 4641 delayed_refs->num_heads_ready--; 4642 4643 list_del_init(&head->cluster); 4644 spin_unlock(&delayed_refs->lock); 4645 4646 BUG_ON(head->extent_op); 4647 if (head->must_insert_reserved) 4648 ret = 1; 4649 4650 mutex_unlock(&head->mutex); 4651 btrfs_put_delayed_ref(&head->node); 4652 return ret; 4653 out: 4654 spin_unlock(&delayed_refs->lock); 4655 return 0; 4656 } 4657 4658 void btrfs_free_tree_block(struct btrfs_trans_handle *trans, 4659 struct btrfs_root *root, 4660 struct extent_buffer *buf, 4661 u64 parent, int last_ref) 4662 { 4663 struct btrfs_block_rsv *block_rsv; 4664 struct btrfs_block_group_cache *cache = NULL; 4665 int ret; 4666 4667 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4668 ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, 4669 parent, root->root_key.objectid, 4670 btrfs_header_level(buf), 4671 BTRFS_DROP_DELAYED_REF, NULL); 4672 BUG_ON(ret); 4673 } 4674 4675 if (!last_ref) 4676 return; 4677 4678 block_rsv = get_block_rsv(trans, root); 4679 cache = btrfs_lookup_block_group(root->fs_info, buf->start); 4680 if (block_rsv->space_info != cache->space_info) 4681 goto out; 4682 4683 if (btrfs_header_generation(buf) == trans->transid) { 4684 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 4685 ret = check_ref_cleanup(trans, root, buf->start); 4686 if (!ret) 4687 goto pin; 4688 } 4689 4690 if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { 4691 pin_down_extent(root, cache, buf->start, buf->len, 1); 4692 goto pin; 4693 } 4694 4695 WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); 4696 4697 btrfs_add_free_space(cache, buf->start, buf->len); 4698 ret = btrfs_update_reserved_bytes(cache, buf->len, 0, 0); 4699 if (ret == -EAGAIN) { 4700 /* block group became read-only */ 4701 btrfs_update_reserved_bytes(cache, buf->len, 0, 1); 4702 goto out; 4703 } 4704 4705 ret = 1; 4706 spin_lock(&block_rsv->lock); 4707 if (block_rsv->reserved < block_rsv->size) { 4708 block_rsv->reserved += buf->len; 4709 ret = 0; 4710 } 4711 spin_unlock(&block_rsv->lock); 4712 4713 if (ret) { 4714 spin_lock(&cache->space_info->lock); 4715 cache->space_info->bytes_reserved -= buf->len; 4716 cache->space_info->reservation_progress++; 4717 spin_unlock(&cache->space_info->lock); 4718 } 4719 goto out; 4720 } 4721 pin: 4722 if (block_rsv->durable && !cache->ro) { 4723 ret = 0; 4724 spin_lock(&cache->lock); 4725 if (!cache->ro) { 4726 cache->reserved_pinned += buf->len; 4727 ret = 1; 4728 } 4729 spin_unlock(&cache->lock); 4730 4731 if (ret) { 4732 spin_lock(&block_rsv->lock); 4733 block_rsv->freed[trans->transid & 0x1] += buf->len; 4734 spin_unlock(&block_rsv->lock); 4735 } 4736 } 4737 out: 4738 /* 4739 * Deleting the buffer, clear the corrupt flag since it doesn't matter 4740 * anymore. 4741 */ 4742 clear_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags); 4743 btrfs_put_block_group(cache); 4744 } 4745 4746 int btrfs_free_extent(struct btrfs_trans_handle *trans, 4747 struct btrfs_root *root, 4748 u64 bytenr, u64 num_bytes, u64 parent, 4749 u64 root_objectid, u64 owner, u64 offset) 4750 { 4751 int ret; 4752 4753 /* 4754 * tree log blocks never actually go into the extent allocation 4755 * tree, just update pinning info and exit early. 4756 */ 4757 if (root_objectid == BTRFS_TREE_LOG_OBJECTID) { 4758 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 4759 /* unlocks the pinned mutex */ 4760 btrfs_pin_extent(root, bytenr, num_bytes, 1); 4761 ret = 0; 4762 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 4763 ret = btrfs_add_delayed_tree_ref(trans, bytenr, num_bytes, 4764 parent, root_objectid, (int)owner, 4765 BTRFS_DROP_DELAYED_REF, NULL); 4766 BUG_ON(ret); 4767 } else { 4768 ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, 4769 parent, root_objectid, owner, 4770 offset, BTRFS_DROP_DELAYED_REF, NULL); 4771 BUG_ON(ret); 4772 } 4773 return ret; 4774 } 4775 4776 static u64 stripe_align(struct btrfs_root *root, u64 val) 4777 { 4778 u64 mask = ((u64)root->stripesize - 1); 4779 u64 ret = (val + mask) & ~mask; 4780 return ret; 4781 } 4782 4783 /* 4784 * when we wait for progress in the block group caching, its because 4785 * our allocation attempt failed at least once. So, we must sleep 4786 * and let some progress happen before we try again. 4787 * 4788 * This function will sleep at least once waiting for new free space to 4789 * show up, and then it will check the block group free space numbers 4790 * for our min num_bytes. Another option is to have it go ahead 4791 * and look in the rbtree for a free extent of a given size, but this 4792 * is a good start. 4793 */ 4794 static noinline int 4795 wait_block_group_cache_progress(struct btrfs_block_group_cache *cache, 4796 u64 num_bytes) 4797 { 4798 struct btrfs_caching_control *caching_ctl; 4799 DEFINE_WAIT(wait); 4800 4801 caching_ctl = get_caching_control(cache); 4802 if (!caching_ctl) 4803 return 0; 4804 4805 wait_event(caching_ctl->wait, block_group_cache_done(cache) || 4806 (cache->free_space_ctl->free_space >= num_bytes)); 4807 4808 put_caching_control(caching_ctl); 4809 return 0; 4810 } 4811 4812 static noinline int 4813 wait_block_group_cache_done(struct btrfs_block_group_cache *cache) 4814 { 4815 struct btrfs_caching_control *caching_ctl; 4816 DEFINE_WAIT(wait); 4817 4818 caching_ctl = get_caching_control(cache); 4819 if (!caching_ctl) 4820 return 0; 4821 4822 wait_event(caching_ctl->wait, block_group_cache_done(cache)); 4823 4824 put_caching_control(caching_ctl); 4825 return 0; 4826 } 4827 4828 static int get_block_group_index(struct btrfs_block_group_cache *cache) 4829 { 4830 int index; 4831 if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) 4832 index = 0; 4833 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) 4834 index = 1; 4835 else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) 4836 index = 2; 4837 else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) 4838 index = 3; 4839 else 4840 index = 4; 4841 return index; 4842 } 4843 4844 enum btrfs_loop_type { 4845 LOOP_FIND_IDEAL = 0, 4846 LOOP_CACHING_NOWAIT = 1, 4847 LOOP_CACHING_WAIT = 2, 4848 LOOP_ALLOC_CHUNK = 3, 4849 LOOP_NO_EMPTY_SIZE = 4, 4850 }; 4851 4852 /* 4853 * walks the btree of allocated extents and find a hole of a given size. 4854 * The key ins is changed to record the hole: 4855 * ins->objectid == block start 4856 * ins->flags = BTRFS_EXTENT_ITEM_KEY 4857 * ins->offset == number of blocks 4858 * Any available blocks before search_start are skipped. 4859 */ 4860 static noinline int find_free_extent(struct btrfs_trans_handle *trans, 4861 struct btrfs_root *orig_root, 4862 u64 num_bytes, u64 empty_size, 4863 u64 search_start, u64 search_end, 4864 u64 hint_byte, struct btrfs_key *ins, 4865 u64 data) 4866 { 4867 int ret = 0; 4868 struct btrfs_root *root = orig_root->fs_info->extent_root; 4869 struct btrfs_free_cluster *last_ptr = NULL; 4870 struct btrfs_block_group_cache *block_group = NULL; 4871 int empty_cluster = 2 * 1024 * 1024; 4872 int allowed_chunk_alloc = 0; 4873 int done_chunk_alloc = 0; 4874 struct btrfs_space_info *space_info; 4875 int last_ptr_loop = 0; 4876 int loop = 0; 4877 int index = 0; 4878 bool found_uncached_bg = false; 4879 bool failed_cluster_refill = false; 4880 bool failed_alloc = false; 4881 bool use_cluster = true; 4882 u64 ideal_cache_percent = 0; 4883 u64 ideal_cache_offset = 0; 4884 4885 WARN_ON(num_bytes < root->sectorsize); 4886 btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY); 4887 ins->objectid = 0; 4888 ins->offset = 0; 4889 4890 space_info = __find_space_info(root->fs_info, data); 4891 if (!space_info) { 4892 printk(KERN_ERR "No space info for %llu\n", data); 4893 return -ENOSPC; 4894 } 4895 4896 /* 4897 * If the space info is for both data and metadata it means we have a 4898 * small filesystem and we can't use the clustering stuff. 4899 */ 4900 if (btrfs_mixed_space_info(space_info)) 4901 use_cluster = false; 4902 4903 if (orig_root->ref_cows || empty_size) 4904 allowed_chunk_alloc = 1; 4905 4906 if (data & BTRFS_BLOCK_GROUP_METADATA && use_cluster) { 4907 last_ptr = &root->fs_info->meta_alloc_cluster; 4908 if (!btrfs_test_opt(root, SSD)) 4909 empty_cluster = 64 * 1024; 4910 } 4911 4912 if ((data & BTRFS_BLOCK_GROUP_DATA) && use_cluster && 4913 btrfs_test_opt(root, SSD)) { 4914 last_ptr = &root->fs_info->data_alloc_cluster; 4915 } 4916 4917 if (last_ptr) { 4918 spin_lock(&last_ptr->lock); 4919 if (last_ptr->block_group) 4920 hint_byte = last_ptr->window_start; 4921 spin_unlock(&last_ptr->lock); 4922 } 4923 4924 search_start = max(search_start, first_logical_byte(root, 0)); 4925 search_start = max(search_start, hint_byte); 4926 4927 if (!last_ptr) 4928 empty_cluster = 0; 4929 4930 if (search_start == hint_byte) { 4931 ideal_cache: 4932 block_group = btrfs_lookup_block_group(root->fs_info, 4933 search_start); 4934 /* 4935 * we don't want to use the block group if it doesn't match our 4936 * allocation bits, or if its not cached. 4937 * 4938 * However if we are re-searching with an ideal block group 4939 * picked out then we don't care that the block group is cached. 4940 */ 4941 if (block_group && block_group_bits(block_group, data) && 4942 (block_group->cached != BTRFS_CACHE_NO || 4943 search_start == ideal_cache_offset)) { 4944 down_read(&space_info->groups_sem); 4945 if (list_empty(&block_group->list) || 4946 block_group->ro) { 4947 /* 4948 * someone is removing this block group, 4949 * we can't jump into the have_block_group 4950 * target because our list pointers are not 4951 * valid 4952 */ 4953 btrfs_put_block_group(block_group); 4954 up_read(&space_info->groups_sem); 4955 } else { 4956 index = get_block_group_index(block_group); 4957 goto have_block_group; 4958 } 4959 } else if (block_group) { 4960 btrfs_put_block_group(block_group); 4961 } 4962 } 4963 search: 4964 down_read(&space_info->groups_sem); 4965 list_for_each_entry(block_group, &space_info->block_groups[index], 4966 list) { 4967 u64 offset; 4968 int cached; 4969 4970 btrfs_get_block_group(block_group); 4971 search_start = block_group->key.objectid; 4972 4973 /* 4974 * this can happen if we end up cycling through all the 4975 * raid types, but we want to make sure we only allocate 4976 * for the proper type. 4977 */ 4978 if (!block_group_bits(block_group, data)) { 4979 u64 extra = BTRFS_BLOCK_GROUP_DUP | 4980 BTRFS_BLOCK_GROUP_RAID1 | 4981 BTRFS_BLOCK_GROUP_RAID10; 4982 4983 /* 4984 * if they asked for extra copies and this block group 4985 * doesn't provide them, bail. This does allow us to 4986 * fill raid0 from raid1. 4987 */ 4988 if ((data & extra) && !(block_group->flags & extra)) 4989 goto loop; 4990 } 4991 4992 have_block_group: 4993 if (unlikely(block_group->cached == BTRFS_CACHE_NO)) { 4994 u64 free_percent; 4995 4996 ret = cache_block_group(block_group, trans, 4997 orig_root, 1); 4998 if (block_group->cached == BTRFS_CACHE_FINISHED) 4999 goto have_block_group; 5000 5001 free_percent = btrfs_block_group_used(&block_group->item); 5002 free_percent *= 100; 5003 free_percent = div64_u64(free_percent, 5004 block_group->key.offset); 5005 free_percent = 100 - free_percent; 5006 if (free_percent > ideal_cache_percent && 5007 likely(!block_group->ro)) { 5008 ideal_cache_offset = block_group->key.objectid; 5009 ideal_cache_percent = free_percent; 5010 } 5011 5012 /* 5013 * The caching workers are limited to 2 threads, so we 5014 * can queue as much work as we care to. 5015 */ 5016 if (loop > LOOP_FIND_IDEAL) { 5017 ret = cache_block_group(block_group, trans, 5018 orig_root, 0); 5019 BUG_ON(ret); 5020 } 5021 found_uncached_bg = true; 5022 5023 /* 5024 * If loop is set for cached only, try the next block 5025 * group. 5026 */ 5027 if (loop == LOOP_FIND_IDEAL) 5028 goto loop; 5029 } 5030 5031 cached = block_group_cache_done(block_group); 5032 if (unlikely(!cached)) 5033 found_uncached_bg = true; 5034 5035 if (unlikely(block_group->ro)) 5036 goto loop; 5037 5038 spin_lock(&block_group->free_space_ctl->tree_lock); 5039 if (cached && 5040 block_group->free_space_ctl->free_space < 5041 num_bytes + empty_size) { 5042 spin_unlock(&block_group->free_space_ctl->tree_lock); 5043 goto loop; 5044 } 5045 spin_unlock(&block_group->free_space_ctl->tree_lock); 5046 5047 /* 5048 * Ok we want to try and use the cluster allocator, so lets look 5049 * there, unless we are on LOOP_NO_EMPTY_SIZE, since we will 5050 * have tried the cluster allocator plenty of times at this 5051 * point and not have found anything, so we are likely way too 5052 * fragmented for the clustering stuff to find anything, so lets 5053 * just skip it and let the allocator find whatever block it can 5054 * find 5055 */ 5056 if (last_ptr && loop < LOOP_NO_EMPTY_SIZE) { 5057 /* 5058 * the refill lock keeps out other 5059 * people trying to start a new cluster 5060 */ 5061 spin_lock(&last_ptr->refill_lock); 5062 if (last_ptr->block_group && 5063 (last_ptr->block_group->ro || 5064 !block_group_bits(last_ptr->block_group, data))) { 5065 offset = 0; 5066 goto refill_cluster; 5067 } 5068 5069 offset = btrfs_alloc_from_cluster(block_group, last_ptr, 5070 num_bytes, search_start); 5071 if (offset) { 5072 /* we have a block, we're done */ 5073 spin_unlock(&last_ptr->refill_lock); 5074 goto checks; 5075 } 5076 5077 spin_lock(&last_ptr->lock); 5078 /* 5079 * whoops, this cluster doesn't actually point to 5080 * this block group. Get a ref on the block 5081 * group is does point to and try again 5082 */ 5083 if (!last_ptr_loop && last_ptr->block_group && 5084 last_ptr->block_group != block_group && 5085 index <= 5086 get_block_group_index(last_ptr->block_group)) { 5087 5088 btrfs_put_block_group(block_group); 5089 block_group = last_ptr->block_group; 5090 btrfs_get_block_group(block_group); 5091 spin_unlock(&last_ptr->lock); 5092 spin_unlock(&last_ptr->refill_lock); 5093 5094 last_ptr_loop = 1; 5095 search_start = block_group->key.objectid; 5096 /* 5097 * we know this block group is properly 5098 * in the list because 5099 * btrfs_remove_block_group, drops the 5100 * cluster before it removes the block 5101 * group from the list 5102 */ 5103 goto have_block_group; 5104 } 5105 spin_unlock(&last_ptr->lock); 5106 refill_cluster: 5107 /* 5108 * this cluster didn't work out, free it and 5109 * start over 5110 */ 5111 btrfs_return_cluster_to_free_space(NULL, last_ptr); 5112 5113 last_ptr_loop = 0; 5114 5115 /* allocate a cluster in this block group */ 5116 ret = btrfs_find_space_cluster(trans, root, 5117 block_group, last_ptr, 5118 offset, num_bytes, 5119 empty_cluster + empty_size); 5120 if (ret == 0) { 5121 /* 5122 * now pull our allocation out of this 5123 * cluster 5124 */ 5125 offset = btrfs_alloc_from_cluster(block_group, 5126 last_ptr, num_bytes, 5127 search_start); 5128 if (offset) { 5129 /* we found one, proceed */ 5130 spin_unlock(&last_ptr->refill_lock); 5131 goto checks; 5132 } 5133 } else if (!cached && loop > LOOP_CACHING_NOWAIT 5134 && !failed_cluster_refill) { 5135 spin_unlock(&last_ptr->refill_lock); 5136 5137 failed_cluster_refill = true; 5138 wait_block_group_cache_progress(block_group, 5139 num_bytes + empty_cluster + empty_size); 5140 goto have_block_group; 5141 } 5142 5143 /* 5144 * at this point we either didn't find a cluster 5145 * or we weren't able to allocate a block from our 5146 * cluster. Free the cluster we've been trying 5147 * to use, and go to the next block group 5148 */ 5149 btrfs_return_cluster_to_free_space(NULL, last_ptr); 5150 spin_unlock(&last_ptr->refill_lock); 5151 goto loop; 5152 } 5153 5154 offset = btrfs_find_space_for_alloc(block_group, search_start, 5155 num_bytes, empty_size); 5156 /* 5157 * If we didn't find a chunk, and we haven't failed on this 5158 * block group before, and this block group is in the middle of 5159 * caching and we are ok with waiting, then go ahead and wait 5160 * for progress to be made, and set failed_alloc to true. 5161 * 5162 * If failed_alloc is true then we've already waited on this 5163 * block group once and should move on to the next block group. 5164 */ 5165 if (!offset && !failed_alloc && !cached && 5166 loop > LOOP_CACHING_NOWAIT) { 5167 wait_block_group_cache_progress(block_group, 5168 num_bytes + empty_size); 5169 failed_alloc = true; 5170 goto have_block_group; 5171 } else if (!offset) { 5172 goto loop; 5173 } 5174 checks: 5175 search_start = stripe_align(root, offset); 5176 /* move on to the next group */ 5177 if (search_start + num_bytes >= search_end) { 5178 btrfs_add_free_space(block_group, offset, num_bytes); 5179 goto loop; 5180 } 5181 5182 /* move on to the next group */ 5183 if (search_start + num_bytes > 5184 block_group->key.objectid + block_group->key.offset) { 5185 btrfs_add_free_space(block_group, offset, num_bytes); 5186 goto loop; 5187 } 5188 5189 ins->objectid = search_start; 5190 ins->offset = num_bytes; 5191 5192 if (offset < search_start) 5193 btrfs_add_free_space(block_group, offset, 5194 search_start - offset); 5195 BUG_ON(offset > search_start); 5196 5197 ret = btrfs_update_reserved_bytes(block_group, num_bytes, 1, 5198 (data & BTRFS_BLOCK_GROUP_DATA)); 5199 if (ret == -EAGAIN) { 5200 btrfs_add_free_space(block_group, offset, num_bytes); 5201 goto loop; 5202 } 5203 5204 /* we are all good, lets return */ 5205 ins->objectid = search_start; 5206 ins->offset = num_bytes; 5207 5208 if (offset < search_start) 5209 btrfs_add_free_space(block_group, offset, 5210 search_start - offset); 5211 BUG_ON(offset > search_start); 5212 btrfs_put_block_group(block_group); 5213 break; 5214 loop: 5215 failed_cluster_refill = false; 5216 failed_alloc = false; 5217 BUG_ON(index != get_block_group_index(block_group)); 5218 btrfs_put_block_group(block_group); 5219 } 5220 up_read(&space_info->groups_sem); 5221 5222 if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) 5223 goto search; 5224 5225 /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for 5226 * for them to make caching progress. Also 5227 * determine the best possible bg to cache 5228 * LOOP_CACHING_NOWAIT, search partially cached block groups, kicking 5229 * caching kthreads as we move along 5230 * LOOP_CACHING_WAIT, search everything, and wait if our bg is caching 5231 * LOOP_ALLOC_CHUNK, force a chunk allocation and try again 5232 * LOOP_NO_EMPTY_SIZE, set empty_size and empty_cluster to 0 and try 5233 * again 5234 */ 5235 if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE) { 5236 index = 0; 5237 if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { 5238 found_uncached_bg = false; 5239 loop++; 5240 if (!ideal_cache_percent) 5241 goto search; 5242 5243 /* 5244 * 1 of the following 2 things have happened so far 5245 * 5246 * 1) We found an ideal block group for caching that 5247 * is mostly full and will cache quickly, so we might 5248 * as well wait for it. 5249 * 5250 * 2) We searched for cached only and we didn't find 5251 * anything, and we didn't start any caching kthreads 5252 * either, so chances are we will loop through and 5253 * start a couple caching kthreads, and then come back 5254 * around and just wait for them. This will be slower 5255 * because we will have 2 caching kthreads reading at 5256 * the same time when we could have just started one 5257 * and waited for it to get far enough to give us an 5258 * allocation, so go ahead and go to the wait caching 5259 * loop. 5260 */ 5261 loop = LOOP_CACHING_WAIT; 5262 search_start = ideal_cache_offset; 5263 ideal_cache_percent = 0; 5264 goto ideal_cache; 5265 } else if (loop == LOOP_FIND_IDEAL) { 5266 /* 5267 * Didn't find a uncached bg, wait on anything we find 5268 * next. 5269 */ 5270 loop = LOOP_CACHING_WAIT; 5271 goto search; 5272 } 5273 5274 loop++; 5275 5276 if (loop == LOOP_ALLOC_CHUNK) { 5277 if (allowed_chunk_alloc) { 5278 ret = do_chunk_alloc(trans, root, num_bytes + 5279 2 * 1024 * 1024, data, 5280 CHUNK_ALLOC_LIMITED); 5281 allowed_chunk_alloc = 0; 5282 if (ret == 1) 5283 done_chunk_alloc = 1; 5284 } else if (!done_chunk_alloc && 5285 space_info->force_alloc == 5286 CHUNK_ALLOC_NO_FORCE) { 5287 space_info->force_alloc = CHUNK_ALLOC_LIMITED; 5288 } 5289 5290 /* 5291 * We didn't allocate a chunk, go ahead and drop the 5292 * empty size and loop again. 5293 */ 5294 if (!done_chunk_alloc) 5295 loop = LOOP_NO_EMPTY_SIZE; 5296 } 5297 5298 if (loop == LOOP_NO_EMPTY_SIZE) { 5299 empty_size = 0; 5300 empty_cluster = 0; 5301 } 5302 5303 goto search; 5304 } else if (!ins->objectid) { 5305 ret = -ENOSPC; 5306 } else if (ins->objectid) { 5307 ret = 0; 5308 } 5309 5310 return ret; 5311 } 5312 5313 static void dump_space_info(struct btrfs_space_info *info, u64 bytes, 5314 int dump_block_groups) 5315 { 5316 struct btrfs_block_group_cache *cache; 5317 int index = 0; 5318 5319 spin_lock(&info->lock); 5320 printk(KERN_INFO "space_info has %llu free, is %sfull\n", 5321 (unsigned long long)(info->total_bytes - info->bytes_used - 5322 info->bytes_pinned - info->bytes_reserved - 5323 info->bytes_readonly), 5324 (info->full) ? "" : "not "); 5325 printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " 5326 "reserved=%llu, may_use=%llu, readonly=%llu\n", 5327 (unsigned long long)info->total_bytes, 5328 (unsigned long long)info->bytes_used, 5329 (unsigned long long)info->bytes_pinned, 5330 (unsigned long long)info->bytes_reserved, 5331 (unsigned long long)info->bytes_may_use, 5332 (unsigned long long)info->bytes_readonly); 5333 spin_unlock(&info->lock); 5334 5335 if (!dump_block_groups) 5336 return; 5337 5338 down_read(&info->groups_sem); 5339 again: 5340 list_for_each_entry(cache, &info->block_groups[index], list) { 5341 spin_lock(&cache->lock); 5342 printk(KERN_INFO "block group %llu has %llu bytes, %llu used " 5343 "%llu pinned %llu reserved\n", 5344 (unsigned long long)cache->key.objectid, 5345 (unsigned long long)cache->key.offset, 5346 (unsigned long long)btrfs_block_group_used(&cache->item), 5347 (unsigned long long)cache->pinned, 5348 (unsigned long long)cache->reserved); 5349 btrfs_dump_free_space(cache, bytes); 5350 spin_unlock(&cache->lock); 5351 } 5352 if (++index < BTRFS_NR_RAID_TYPES) 5353 goto again; 5354 up_read(&info->groups_sem); 5355 } 5356 5357 int btrfs_reserve_extent(struct btrfs_trans_handle *trans, 5358 struct btrfs_root *root, 5359 u64 num_bytes, u64 min_alloc_size, 5360 u64 empty_size, u64 hint_byte, 5361 u64 search_end, struct btrfs_key *ins, 5362 u64 data) 5363 { 5364 int ret; 5365 u64 search_start = 0; 5366 5367 data = btrfs_get_alloc_profile(root, data); 5368 again: 5369 /* 5370 * the only place that sets empty_size is btrfs_realloc_node, which 5371 * is not called recursively on allocations 5372 */ 5373 if (empty_size || root->ref_cows) 5374 ret = do_chunk_alloc(trans, root->fs_info->extent_root, 5375 num_bytes + 2 * 1024 * 1024, data, 5376 CHUNK_ALLOC_NO_FORCE); 5377 5378 WARN_ON(num_bytes < root->sectorsize); 5379 ret = find_free_extent(trans, root, num_bytes, empty_size, 5380 search_start, search_end, hint_byte, 5381 ins, data); 5382 5383 if (ret == -ENOSPC && num_bytes > min_alloc_size) { 5384 num_bytes = num_bytes >> 1; 5385 num_bytes = num_bytes & ~(root->sectorsize - 1); 5386 num_bytes = max(num_bytes, min_alloc_size); 5387 do_chunk_alloc(trans, root->fs_info->extent_root, 5388 num_bytes, data, CHUNK_ALLOC_FORCE); 5389 goto again; 5390 } 5391 if (ret == -ENOSPC && btrfs_test_opt(root, ENOSPC_DEBUG)) { 5392 struct btrfs_space_info *sinfo; 5393 5394 sinfo = __find_space_info(root->fs_info, data); 5395 printk(KERN_ERR "btrfs allocation failed flags %llu, " 5396 "wanted %llu\n", (unsigned long long)data, 5397 (unsigned long long)num_bytes); 5398 dump_space_info(sinfo, num_bytes, 1); 5399 } 5400 5401 trace_btrfs_reserved_extent_alloc(root, ins->objectid, ins->offset); 5402 5403 return ret; 5404 } 5405 5406 int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 5407 { 5408 struct btrfs_block_group_cache *cache; 5409 int ret = 0; 5410 5411 cache = btrfs_lookup_block_group(root->fs_info, start); 5412 if (!cache) { 5413 printk(KERN_ERR "Unable to find block group for %llu\n", 5414 (unsigned long long)start); 5415 return -ENOSPC; 5416 } 5417 5418 if (btrfs_test_opt(root, DISCARD)) 5419 ret = btrfs_discard_extent(root, start, len, NULL); 5420 5421 btrfs_add_free_space(cache, start, len); 5422 btrfs_update_reserved_bytes(cache, len, 0, 1); 5423 btrfs_put_block_group(cache); 5424 5425 trace_btrfs_reserved_extent_free(root, start, len); 5426 5427 return ret; 5428 } 5429 5430 static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 5431 struct btrfs_root *root, 5432 u64 parent, u64 root_objectid, 5433 u64 flags, u64 owner, u64 offset, 5434 struct btrfs_key *ins, int ref_mod) 5435 { 5436 int ret; 5437 struct btrfs_fs_info *fs_info = root->fs_info; 5438 struct btrfs_extent_item *extent_item; 5439 struct btrfs_extent_inline_ref *iref; 5440 struct btrfs_path *path; 5441 struct extent_buffer *leaf; 5442 int type; 5443 u32 size; 5444 5445 if (parent > 0) 5446 type = BTRFS_SHARED_DATA_REF_KEY; 5447 else 5448 type = BTRFS_EXTENT_DATA_REF_KEY; 5449 5450 size = sizeof(*extent_item) + btrfs_extent_inline_ref_size(type); 5451 5452 path = btrfs_alloc_path(); 5453 if (!path) 5454 return -ENOMEM; 5455 5456 path->leave_spinning = 1; 5457 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5458 ins, size); 5459 BUG_ON(ret); 5460 5461 leaf = path->nodes[0]; 5462 extent_item = btrfs_item_ptr(leaf, path->slots[0], 5463 struct btrfs_extent_item); 5464 btrfs_set_extent_refs(leaf, extent_item, ref_mod); 5465 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 5466 btrfs_set_extent_flags(leaf, extent_item, 5467 flags | BTRFS_EXTENT_FLAG_DATA); 5468 5469 iref = (struct btrfs_extent_inline_ref *)(extent_item + 1); 5470 btrfs_set_extent_inline_ref_type(leaf, iref, type); 5471 if (parent > 0) { 5472 struct btrfs_shared_data_ref *ref; 5473 ref = (struct btrfs_shared_data_ref *)(iref + 1); 5474 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 5475 btrfs_set_shared_data_ref_count(leaf, ref, ref_mod); 5476 } else { 5477 struct btrfs_extent_data_ref *ref; 5478 ref = (struct btrfs_extent_data_ref *)(&iref->offset); 5479 btrfs_set_extent_data_ref_root(leaf, ref, root_objectid); 5480 btrfs_set_extent_data_ref_objectid(leaf, ref, owner); 5481 btrfs_set_extent_data_ref_offset(leaf, ref, offset); 5482 btrfs_set_extent_data_ref_count(leaf, ref, ref_mod); 5483 } 5484 5485 btrfs_mark_buffer_dirty(path->nodes[0]); 5486 btrfs_free_path(path); 5487 5488 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 5489 if (ret) { 5490 printk(KERN_ERR "btrfs update block group failed for %llu " 5491 "%llu\n", (unsigned long long)ins->objectid, 5492 (unsigned long long)ins->offset); 5493 BUG(); 5494 } 5495 return ret; 5496 } 5497 5498 static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, 5499 struct btrfs_root *root, 5500 u64 parent, u64 root_objectid, 5501 u64 flags, struct btrfs_disk_key *key, 5502 int level, struct btrfs_key *ins) 5503 { 5504 int ret; 5505 struct btrfs_fs_info *fs_info = root->fs_info; 5506 struct btrfs_extent_item *extent_item; 5507 struct btrfs_tree_block_info *block_info; 5508 struct btrfs_extent_inline_ref *iref; 5509 struct btrfs_path *path; 5510 struct extent_buffer *leaf; 5511 u32 size = sizeof(*extent_item) + sizeof(*block_info) + sizeof(*iref); 5512 5513 path = btrfs_alloc_path(); 5514 if (!path) 5515 return -ENOMEM; 5516 5517 path->leave_spinning = 1; 5518 ret = btrfs_insert_empty_item(trans, fs_info->extent_root, path, 5519 ins, size); 5520 BUG_ON(ret); 5521 5522 leaf = path->nodes[0]; 5523 extent_item = btrfs_item_ptr(leaf, path->slots[0], 5524 struct btrfs_extent_item); 5525 btrfs_set_extent_refs(leaf, extent_item, 1); 5526 btrfs_set_extent_generation(leaf, extent_item, trans->transid); 5527 btrfs_set_extent_flags(leaf, extent_item, 5528 flags | BTRFS_EXTENT_FLAG_TREE_BLOCK); 5529 block_info = (struct btrfs_tree_block_info *)(extent_item + 1); 5530 5531 btrfs_set_tree_block_key(leaf, block_info, key); 5532 btrfs_set_tree_block_level(leaf, block_info, level); 5533 5534 iref = (struct btrfs_extent_inline_ref *)(block_info + 1); 5535 if (parent > 0) { 5536 BUG_ON(!(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)); 5537 btrfs_set_extent_inline_ref_type(leaf, iref, 5538 BTRFS_SHARED_BLOCK_REF_KEY); 5539 btrfs_set_extent_inline_ref_offset(leaf, iref, parent); 5540 } else { 5541 btrfs_set_extent_inline_ref_type(leaf, iref, 5542 BTRFS_TREE_BLOCK_REF_KEY); 5543 btrfs_set_extent_inline_ref_offset(leaf, iref, root_objectid); 5544 } 5545 5546 btrfs_mark_buffer_dirty(leaf); 5547 btrfs_free_path(path); 5548 5549 ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); 5550 if (ret) { 5551 printk(KERN_ERR "btrfs update block group failed for %llu " 5552 "%llu\n", (unsigned long long)ins->objectid, 5553 (unsigned long long)ins->offset); 5554 BUG(); 5555 } 5556 return ret; 5557 } 5558 5559 int btrfs_alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 5560 struct btrfs_root *root, 5561 u64 root_objectid, u64 owner, 5562 u64 offset, struct btrfs_key *ins) 5563 { 5564 int ret; 5565 5566 BUG_ON(root_objectid == BTRFS_TREE_LOG_OBJECTID); 5567 5568 ret = btrfs_add_delayed_data_ref(trans, ins->objectid, ins->offset, 5569 0, root_objectid, owner, offset, 5570 BTRFS_ADD_DELAYED_EXTENT, NULL); 5571 return ret; 5572 } 5573 5574 /* 5575 * this is used by the tree logging recovery code. It records that 5576 * an extent has been allocated and makes sure to clear the free 5577 * space cache bits as well 5578 */ 5579 int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, 5580 struct btrfs_root *root, 5581 u64 root_objectid, u64 owner, u64 offset, 5582 struct btrfs_key *ins) 5583 { 5584 int ret; 5585 struct btrfs_block_group_cache *block_group; 5586 struct btrfs_caching_control *caching_ctl; 5587 u64 start = ins->objectid; 5588 u64 num_bytes = ins->offset; 5589 5590 block_group = btrfs_lookup_block_group(root->fs_info, ins->objectid); 5591 cache_block_group(block_group, trans, NULL, 0); 5592 caching_ctl = get_caching_control(block_group); 5593 5594 if (!caching_ctl) { 5595 BUG_ON(!block_group_cache_done(block_group)); 5596 ret = btrfs_remove_free_space(block_group, start, num_bytes); 5597 BUG_ON(ret); 5598 } else { 5599 mutex_lock(&caching_ctl->mutex); 5600 5601 if (start >= caching_ctl->progress) { 5602 ret = add_excluded_extent(root, start, num_bytes); 5603 BUG_ON(ret); 5604 } else if (start + num_bytes <= caching_ctl->progress) { 5605 ret = btrfs_remove_free_space(block_group, 5606 start, num_bytes); 5607 BUG_ON(ret); 5608 } else { 5609 num_bytes = caching_ctl->progress - start; 5610 ret = btrfs_remove_free_space(block_group, 5611 start, num_bytes); 5612 BUG_ON(ret); 5613 5614 start = caching_ctl->progress; 5615 num_bytes = ins->objectid + ins->offset - 5616 caching_ctl->progress; 5617 ret = add_excluded_extent(root, start, num_bytes); 5618 BUG_ON(ret); 5619 } 5620 5621 mutex_unlock(&caching_ctl->mutex); 5622 put_caching_control(caching_ctl); 5623 } 5624 5625 ret = btrfs_update_reserved_bytes(block_group, ins->offset, 1, 1); 5626 BUG_ON(ret); 5627 btrfs_put_block_group(block_group); 5628 ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, 5629 0, owner, offset, ins, 1); 5630 return ret; 5631 } 5632 5633 struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, 5634 struct btrfs_root *root, 5635 u64 bytenr, u32 blocksize, 5636 int level) 5637 { 5638 struct extent_buffer *buf; 5639 5640 buf = btrfs_find_create_tree_block(root, bytenr, blocksize); 5641 if (!buf) 5642 return ERR_PTR(-ENOMEM); 5643 btrfs_set_header_generation(buf, trans->transid); 5644 btrfs_set_buffer_lockdep_class(root->root_key.objectid, buf, level); 5645 btrfs_tree_lock(buf); 5646 clean_tree_block(trans, root, buf); 5647 5648 btrfs_set_lock_blocking(buf); 5649 btrfs_set_buffer_uptodate(buf); 5650 5651 if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { 5652 /* 5653 * we allow two log transactions at a time, use different 5654 * EXENT bit to differentiate dirty pages. 5655 */ 5656 if (root->log_transid % 2 == 0) 5657 set_extent_dirty(&root->dirty_log_pages, buf->start, 5658 buf->start + buf->len - 1, GFP_NOFS); 5659 else 5660 set_extent_new(&root->dirty_log_pages, buf->start, 5661 buf->start + buf->len - 1, GFP_NOFS); 5662 } else { 5663 set_extent_dirty(&trans->transaction->dirty_pages, buf->start, 5664 buf->start + buf->len - 1, GFP_NOFS); 5665 } 5666 trans->blocks_used++; 5667 /* this returns a buffer locked for blocking */ 5668 return buf; 5669 } 5670 5671 static struct btrfs_block_rsv * 5672 use_block_rsv(struct btrfs_trans_handle *trans, 5673 struct btrfs_root *root, u32 blocksize) 5674 { 5675 struct btrfs_block_rsv *block_rsv; 5676 struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; 5677 int ret; 5678 5679 block_rsv = get_block_rsv(trans, root); 5680 5681 if (block_rsv->size == 0) { 5682 ret = reserve_metadata_bytes(trans, root, block_rsv, 5683 blocksize, 0); 5684 /* 5685 * If we couldn't reserve metadata bytes try and use some from 5686 * the global reserve. 5687 */ 5688 if (ret && block_rsv != global_rsv) { 5689 ret = block_rsv_use_bytes(global_rsv, blocksize); 5690 if (!ret) 5691 return global_rsv; 5692 return ERR_PTR(ret); 5693 } else if (ret) { 5694 return ERR_PTR(ret); 5695 } 5696 return block_rsv; 5697 } 5698 5699 ret = block_rsv_use_bytes(block_rsv, blocksize); 5700 if (!ret) 5701 return block_rsv; 5702 if (ret) { 5703 WARN_ON(1); 5704 ret = reserve_metadata_bytes(trans, root, block_rsv, blocksize, 5705 0); 5706 if (!ret) { 5707 spin_lock(&block_rsv->lock); 5708 block_rsv->size += blocksize; 5709 spin_unlock(&block_rsv->lock); 5710 return block_rsv; 5711 } else if (ret && block_rsv != global_rsv) { 5712 ret = block_rsv_use_bytes(global_rsv, blocksize); 5713 if (!ret) 5714 return global_rsv; 5715 } 5716 } 5717 5718 return ERR_PTR(-ENOSPC); 5719 } 5720 5721 static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize) 5722 { 5723 block_rsv_add_bytes(block_rsv, blocksize, 0); 5724 block_rsv_release_bytes(block_rsv, NULL, 0); 5725 } 5726 5727 /* 5728 * finds a free extent and does all the dirty work required for allocation 5729 * returns the key for the extent through ins, and a tree buffer for 5730 * the first block of the extent through buf. 5731 * 5732 * returns the tree buffer or NULL. 5733 */ 5734 struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, 5735 struct btrfs_root *root, u32 blocksize, 5736 u64 parent, u64 root_objectid, 5737 struct btrfs_disk_key *key, int level, 5738 u64 hint, u64 empty_size) 5739 { 5740 struct btrfs_key ins; 5741 struct btrfs_block_rsv *block_rsv; 5742 struct extent_buffer *buf; 5743 u64 flags = 0; 5744 int ret; 5745 5746 5747 block_rsv = use_block_rsv(trans, root, blocksize); 5748 if (IS_ERR(block_rsv)) 5749 return ERR_CAST(block_rsv); 5750 5751 ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, 5752 empty_size, hint, (u64)-1, &ins, 0); 5753 if (ret) { 5754 unuse_block_rsv(block_rsv, blocksize); 5755 return ERR_PTR(ret); 5756 } 5757 5758 buf = btrfs_init_new_buffer(trans, root, ins.objectid, 5759 blocksize, level); 5760 BUG_ON(IS_ERR(buf)); 5761 5762 if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { 5763 if (parent == 0) 5764 parent = ins.objectid; 5765 flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 5766 } else 5767 BUG_ON(parent > 0); 5768 5769 if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { 5770 struct btrfs_delayed_extent_op *extent_op; 5771 extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); 5772 BUG_ON(!extent_op); 5773 if (key) 5774 memcpy(&extent_op->key, key, sizeof(extent_op->key)); 5775 else 5776 memset(&extent_op->key, 0, sizeof(extent_op->key)); 5777 extent_op->flags_to_set = flags; 5778 extent_op->update_key = 1; 5779 extent_op->update_flags = 1; 5780 extent_op->is_data = 0; 5781 5782 ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, 5783 ins.offset, parent, root_objectid, 5784 level, BTRFS_ADD_DELAYED_EXTENT, 5785 extent_op); 5786 BUG_ON(ret); 5787 } 5788 return buf; 5789 } 5790 5791 struct walk_control { 5792 u64 refs[BTRFS_MAX_LEVEL]; 5793 u64 flags[BTRFS_MAX_LEVEL]; 5794 struct btrfs_key update_progress; 5795 int stage; 5796 int level; 5797 int shared_level; 5798 int update_ref; 5799 int keep_locks; 5800 int reada_slot; 5801 int reada_count; 5802 }; 5803 5804 #define DROP_REFERENCE 1 5805 #define UPDATE_BACKREF 2 5806 5807 static noinline void reada_walk_down(struct btrfs_trans_handle *trans, 5808 struct btrfs_root *root, 5809 struct walk_control *wc, 5810 struct btrfs_path *path) 5811 { 5812 u64 bytenr; 5813 u64 generation; 5814 u64 refs; 5815 u64 flags; 5816 u32 nritems; 5817 u32 blocksize; 5818 struct btrfs_key key; 5819 struct extent_buffer *eb; 5820 int ret; 5821 int slot; 5822 int nread = 0; 5823 5824 if (path->slots[wc->level] < wc->reada_slot) { 5825 wc->reada_count = wc->reada_count * 2 / 3; 5826 wc->reada_count = max(wc->reada_count, 2); 5827 } else { 5828 wc->reada_count = wc->reada_count * 3 / 2; 5829 wc->reada_count = min_t(int, wc->reada_count, 5830 BTRFS_NODEPTRS_PER_BLOCK(root)); 5831 } 5832 5833 eb = path->nodes[wc->level]; 5834 nritems = btrfs_header_nritems(eb); 5835 blocksize = btrfs_level_size(root, wc->level - 1); 5836 5837 for (slot = path->slots[wc->level]; slot < nritems; slot++) { 5838 if (nread >= wc->reada_count) 5839 break; 5840 5841 cond_resched(); 5842 bytenr = btrfs_node_blockptr(eb, slot); 5843 generation = btrfs_node_ptr_generation(eb, slot); 5844 5845 if (slot == path->slots[wc->level]) 5846 goto reada; 5847 5848 if (wc->stage == UPDATE_BACKREF && 5849 generation <= root->root_key.offset) 5850 continue; 5851 5852 /* We don't lock the tree block, it's OK to be racy here */ 5853 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 5854 &refs, &flags); 5855 BUG_ON(ret); 5856 BUG_ON(refs == 0); 5857 5858 if (wc->stage == DROP_REFERENCE) { 5859 if (refs == 1) 5860 goto reada; 5861 5862 if (wc->level == 1 && 5863 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5864 continue; 5865 if (!wc->update_ref || 5866 generation <= root->root_key.offset) 5867 continue; 5868 btrfs_node_key_to_cpu(eb, &key, slot); 5869 ret = btrfs_comp_cpu_keys(&key, 5870 &wc->update_progress); 5871 if (ret < 0) 5872 continue; 5873 } else { 5874 if (wc->level == 1 && 5875 (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 5876 continue; 5877 } 5878 reada: 5879 ret = readahead_tree_block(root, bytenr, blocksize, 5880 generation); 5881 if (ret) 5882 break; 5883 nread++; 5884 } 5885 wc->reada_slot = slot; 5886 } 5887 5888 /* 5889 * hepler to process tree block while walking down the tree. 5890 * 5891 * when wc->stage == UPDATE_BACKREF, this function updates 5892 * back refs for pointers in the block. 5893 * 5894 * NOTE: return value 1 means we should stop walking down. 5895 */ 5896 static noinline int walk_down_proc(struct btrfs_trans_handle *trans, 5897 struct btrfs_root *root, 5898 struct btrfs_path *path, 5899 struct walk_control *wc, int lookup_info) 5900 { 5901 int level = wc->level; 5902 struct extent_buffer *eb = path->nodes[level]; 5903 u64 flag = BTRFS_BLOCK_FLAG_FULL_BACKREF; 5904 int ret; 5905 5906 if (wc->stage == UPDATE_BACKREF && 5907 btrfs_header_owner(eb) != root->root_key.objectid) 5908 return 1; 5909 5910 /* 5911 * when reference count of tree block is 1, it won't increase 5912 * again. once full backref flag is set, we never clear it. 5913 */ 5914 if (lookup_info && 5915 ((wc->stage == DROP_REFERENCE && wc->refs[level] != 1) || 5916 (wc->stage == UPDATE_BACKREF && !(wc->flags[level] & flag)))) { 5917 BUG_ON(!path->locks[level]); 5918 ret = btrfs_lookup_extent_info(trans, root, 5919 eb->start, eb->len, 5920 &wc->refs[level], 5921 &wc->flags[level]); 5922 BUG_ON(ret); 5923 BUG_ON(wc->refs[level] == 0); 5924 } 5925 5926 if (wc->stage == DROP_REFERENCE) { 5927 if (wc->refs[level] > 1) 5928 return 1; 5929 5930 if (path->locks[level] && !wc->keep_locks) { 5931 btrfs_tree_unlock_rw(eb, path->locks[level]); 5932 path->locks[level] = 0; 5933 } 5934 return 0; 5935 } 5936 5937 /* wc->stage == UPDATE_BACKREF */ 5938 if (!(wc->flags[level] & flag)) { 5939 BUG_ON(!path->locks[level]); 5940 ret = btrfs_inc_ref(trans, root, eb, 1); 5941 BUG_ON(ret); 5942 ret = btrfs_dec_ref(trans, root, eb, 0); 5943 BUG_ON(ret); 5944 ret = btrfs_set_disk_extent_flags(trans, root, eb->start, 5945 eb->len, flag, 0); 5946 BUG_ON(ret); 5947 wc->flags[level] |= flag; 5948 } 5949 5950 /* 5951 * the block is shared by multiple trees, so it's not good to 5952 * keep the tree lock 5953 */ 5954 if (path->locks[level] && level > 0) { 5955 btrfs_tree_unlock_rw(eb, path->locks[level]); 5956 path->locks[level] = 0; 5957 } 5958 return 0; 5959 } 5960 5961 /* 5962 * hepler to process tree block pointer. 5963 * 5964 * when wc->stage == DROP_REFERENCE, this function checks 5965 * reference count of the block pointed to. if the block 5966 * is shared and we need update back refs for the subtree 5967 * rooted at the block, this function changes wc->stage to 5968 * UPDATE_BACKREF. if the block is shared and there is no 5969 * need to update back, this function drops the reference 5970 * to the block. 5971 * 5972 * NOTE: return value 1 means we should stop walking down. 5973 */ 5974 static noinline int do_walk_down(struct btrfs_trans_handle *trans, 5975 struct btrfs_root *root, 5976 struct btrfs_path *path, 5977 struct walk_control *wc, int *lookup_info) 5978 { 5979 u64 bytenr; 5980 u64 generation; 5981 u64 parent; 5982 u32 blocksize; 5983 struct btrfs_key key; 5984 struct extent_buffer *next; 5985 int level = wc->level; 5986 int reada = 0; 5987 int ret = 0; 5988 5989 generation = btrfs_node_ptr_generation(path->nodes[level], 5990 path->slots[level]); 5991 /* 5992 * if the lower level block was created before the snapshot 5993 * was created, we know there is no need to update back refs 5994 * for the subtree 5995 */ 5996 if (wc->stage == UPDATE_BACKREF && 5997 generation <= root->root_key.offset) { 5998 *lookup_info = 1; 5999 return 1; 6000 } 6001 6002 bytenr = btrfs_node_blockptr(path->nodes[level], path->slots[level]); 6003 blocksize = btrfs_level_size(root, level - 1); 6004 6005 next = btrfs_find_tree_block(root, bytenr, blocksize); 6006 if (!next) { 6007 next = btrfs_find_create_tree_block(root, bytenr, blocksize); 6008 if (!next) 6009 return -ENOMEM; 6010 reada = 1; 6011 } 6012 btrfs_tree_lock(next); 6013 btrfs_set_lock_blocking(next); 6014 6015 ret = btrfs_lookup_extent_info(trans, root, bytenr, blocksize, 6016 &wc->refs[level - 1], 6017 &wc->flags[level - 1]); 6018 BUG_ON(ret); 6019 BUG_ON(wc->refs[level - 1] == 0); 6020 *lookup_info = 0; 6021 6022 if (wc->stage == DROP_REFERENCE) { 6023 if (wc->refs[level - 1] > 1) { 6024 if (level == 1 && 6025 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 6026 goto skip; 6027 6028 if (!wc->update_ref || 6029 generation <= root->root_key.offset) 6030 goto skip; 6031 6032 btrfs_node_key_to_cpu(path->nodes[level], &key, 6033 path->slots[level]); 6034 ret = btrfs_comp_cpu_keys(&key, &wc->update_progress); 6035 if (ret < 0) 6036 goto skip; 6037 6038 wc->stage = UPDATE_BACKREF; 6039 wc->shared_level = level - 1; 6040 } 6041 } else { 6042 if (level == 1 && 6043 (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF)) 6044 goto skip; 6045 } 6046 6047 if (!btrfs_buffer_uptodate(next, generation)) { 6048 btrfs_tree_unlock(next); 6049 free_extent_buffer(next); 6050 next = NULL; 6051 *lookup_info = 1; 6052 } 6053 6054 if (!next) { 6055 if (reada && level == 1) 6056 reada_walk_down(trans, root, wc, path); 6057 next = read_tree_block(root, bytenr, blocksize, generation); 6058 if (!next) 6059 return -EIO; 6060 btrfs_tree_lock(next); 6061 btrfs_set_lock_blocking(next); 6062 } 6063 6064 level--; 6065 BUG_ON(level != btrfs_header_level(next)); 6066 path->nodes[level] = next; 6067 path->slots[level] = 0; 6068 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 6069 wc->level = level; 6070 if (wc->level == 1) 6071 wc->reada_slot = 0; 6072 return 0; 6073 skip: 6074 wc->refs[level - 1] = 0; 6075 wc->flags[level - 1] = 0; 6076 if (wc->stage == DROP_REFERENCE) { 6077 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) { 6078 parent = path->nodes[level]->start; 6079 } else { 6080 BUG_ON(root->root_key.objectid != 6081 btrfs_header_owner(path->nodes[level])); 6082 parent = 0; 6083 } 6084 6085 ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent, 6086 root->root_key.objectid, level - 1, 0); 6087 BUG_ON(ret); 6088 } 6089 btrfs_tree_unlock(next); 6090 free_extent_buffer(next); 6091 *lookup_info = 1; 6092 return 1; 6093 } 6094 6095 /* 6096 * hepler to process tree block while walking up the tree. 6097 * 6098 * when wc->stage == DROP_REFERENCE, this function drops 6099 * reference count on the block. 6100 * 6101 * when wc->stage == UPDATE_BACKREF, this function changes 6102 * wc->stage back to DROP_REFERENCE if we changed wc->stage 6103 * to UPDATE_BACKREF previously while processing the block. 6104 * 6105 * NOTE: return value 1 means we should stop walking up. 6106 */ 6107 static noinline int walk_up_proc(struct btrfs_trans_handle *trans, 6108 struct btrfs_root *root, 6109 struct btrfs_path *path, 6110 struct walk_control *wc) 6111 { 6112 int ret; 6113 int level = wc->level; 6114 struct extent_buffer *eb = path->nodes[level]; 6115 u64 parent = 0; 6116 6117 if (wc->stage == UPDATE_BACKREF) { 6118 BUG_ON(wc->shared_level < level); 6119 if (level < wc->shared_level) 6120 goto out; 6121 6122 ret = find_next_key(path, level + 1, &wc->update_progress); 6123 if (ret > 0) 6124 wc->update_ref = 0; 6125 6126 wc->stage = DROP_REFERENCE; 6127 wc->shared_level = -1; 6128 path->slots[level] = 0; 6129 6130 /* 6131 * check reference count again if the block isn't locked. 6132 * we should start walking down the tree again if reference 6133 * count is one. 6134 */ 6135 if (!path->locks[level]) { 6136 BUG_ON(level == 0); 6137 btrfs_tree_lock(eb); 6138 btrfs_set_lock_blocking(eb); 6139 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 6140 6141 ret = btrfs_lookup_extent_info(trans, root, 6142 eb->start, eb->len, 6143 &wc->refs[level], 6144 &wc->flags[level]); 6145 BUG_ON(ret); 6146 BUG_ON(wc->refs[level] == 0); 6147 if (wc->refs[level] == 1) { 6148 btrfs_tree_unlock_rw(eb, path->locks[level]); 6149 return 1; 6150 } 6151 } 6152 } 6153 6154 /* wc->stage == DROP_REFERENCE */ 6155 BUG_ON(wc->refs[level] > 1 && !path->locks[level]); 6156 6157 if (wc->refs[level] == 1) { 6158 if (level == 0) { 6159 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6160 ret = btrfs_dec_ref(trans, root, eb, 1); 6161 else 6162 ret = btrfs_dec_ref(trans, root, eb, 0); 6163 BUG_ON(ret); 6164 } 6165 /* make block locked assertion in clean_tree_block happy */ 6166 if (!path->locks[level] && 6167 btrfs_header_generation(eb) == trans->transid) { 6168 btrfs_tree_lock(eb); 6169 btrfs_set_lock_blocking(eb); 6170 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 6171 } 6172 clean_tree_block(trans, root, eb); 6173 } 6174 6175 if (eb == root->node) { 6176 if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6177 parent = eb->start; 6178 else 6179 BUG_ON(root->root_key.objectid != 6180 btrfs_header_owner(eb)); 6181 } else { 6182 if (wc->flags[level + 1] & BTRFS_BLOCK_FLAG_FULL_BACKREF) 6183 parent = path->nodes[level + 1]->start; 6184 else 6185 BUG_ON(root->root_key.objectid != 6186 btrfs_header_owner(path->nodes[level + 1])); 6187 } 6188 6189 btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); 6190 out: 6191 wc->refs[level] = 0; 6192 wc->flags[level] = 0; 6193 return 0; 6194 } 6195 6196 static noinline int walk_down_tree(struct btrfs_trans_handle *trans, 6197 struct btrfs_root *root, 6198 struct btrfs_path *path, 6199 struct walk_control *wc) 6200 { 6201 int level = wc->level; 6202 int lookup_info = 1; 6203 int ret; 6204 6205 while (level >= 0) { 6206 ret = walk_down_proc(trans, root, path, wc, lookup_info); 6207 if (ret > 0) 6208 break; 6209 6210 if (level == 0) 6211 break; 6212 6213 if (path->slots[level] >= 6214 btrfs_header_nritems(path->nodes[level])) 6215 break; 6216 6217 ret = do_walk_down(trans, root, path, wc, &lookup_info); 6218 if (ret > 0) { 6219 path->slots[level]++; 6220 continue; 6221 } else if (ret < 0) 6222 return ret; 6223 level = wc->level; 6224 } 6225 return 0; 6226 } 6227 6228 static noinline int walk_up_tree(struct btrfs_trans_handle *trans, 6229 struct btrfs_root *root, 6230 struct btrfs_path *path, 6231 struct walk_control *wc, int max_level) 6232 { 6233 int level = wc->level; 6234 int ret; 6235 6236 path->slots[level] = btrfs_header_nritems(path->nodes[level]); 6237 while (level < max_level && path->nodes[level]) { 6238 wc->level = level; 6239 if (path->slots[level] + 1 < 6240 btrfs_header_nritems(path->nodes[level])) { 6241 path->slots[level]++; 6242 return 0; 6243 } else { 6244 ret = walk_up_proc(trans, root, path, wc); 6245 if (ret > 0) 6246 return 0; 6247 6248 if (path->locks[level]) { 6249 btrfs_tree_unlock_rw(path->nodes[level], 6250 path->locks[level]); 6251 path->locks[level] = 0; 6252 } 6253 free_extent_buffer(path->nodes[level]); 6254 path->nodes[level] = NULL; 6255 level++; 6256 } 6257 } 6258 return 1; 6259 } 6260 6261 /* 6262 * drop a subvolume tree. 6263 * 6264 * this function traverses the tree freeing any blocks that only 6265 * referenced by the tree. 6266 * 6267 * when a shared tree block is found. this function decreases its 6268 * reference count by one. if update_ref is true, this function 6269 * also make sure backrefs for the shared block and all lower level 6270 * blocks are properly updated. 6271 */ 6272 int btrfs_drop_snapshot(struct btrfs_root *root, 6273 struct btrfs_block_rsv *block_rsv, int update_ref) 6274 { 6275 struct btrfs_path *path; 6276 struct btrfs_trans_handle *trans; 6277 struct btrfs_root *tree_root = root->fs_info->tree_root; 6278 struct btrfs_root_item *root_item = &root->root_item; 6279 struct walk_control *wc; 6280 struct btrfs_key key; 6281 int err = 0; 6282 int ret; 6283 int level; 6284 6285 path = btrfs_alloc_path(); 6286 if (!path) 6287 return -ENOMEM; 6288 6289 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6290 if (!wc) { 6291 btrfs_free_path(path); 6292 return -ENOMEM; 6293 } 6294 6295 trans = btrfs_start_transaction(tree_root, 0); 6296 BUG_ON(IS_ERR(trans)); 6297 6298 if (block_rsv) 6299 trans->block_rsv = block_rsv; 6300 6301 if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { 6302 level = btrfs_header_level(root->node); 6303 path->nodes[level] = btrfs_lock_root_node(root); 6304 btrfs_set_lock_blocking(path->nodes[level]); 6305 path->slots[level] = 0; 6306 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 6307 memset(&wc->update_progress, 0, 6308 sizeof(wc->update_progress)); 6309 } else { 6310 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress); 6311 memcpy(&wc->update_progress, &key, 6312 sizeof(wc->update_progress)); 6313 6314 level = root_item->drop_level; 6315 BUG_ON(level == 0); 6316 path->lowest_level = level; 6317 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); 6318 path->lowest_level = 0; 6319 if (ret < 0) { 6320 err = ret; 6321 goto out; 6322 } 6323 WARN_ON(ret > 0); 6324 6325 /* 6326 * unlock our path, this is safe because only this 6327 * function is allowed to delete this snapshot 6328 */ 6329 btrfs_unlock_up_safe(path, 0); 6330 6331 level = btrfs_header_level(root->node); 6332 while (1) { 6333 btrfs_tree_lock(path->nodes[level]); 6334 btrfs_set_lock_blocking(path->nodes[level]); 6335 6336 ret = btrfs_lookup_extent_info(trans, root, 6337 path->nodes[level]->start, 6338 path->nodes[level]->len, 6339 &wc->refs[level], 6340 &wc->flags[level]); 6341 BUG_ON(ret); 6342 BUG_ON(wc->refs[level] == 0); 6343 6344 if (level == root_item->drop_level) 6345 break; 6346 6347 btrfs_tree_unlock(path->nodes[level]); 6348 WARN_ON(wc->refs[level] != 1); 6349 level--; 6350 } 6351 } 6352 6353 wc->level = level; 6354 wc->shared_level = -1; 6355 wc->stage = DROP_REFERENCE; 6356 wc->update_ref = update_ref; 6357 wc->keep_locks = 0; 6358 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 6359 6360 while (1) { 6361 ret = walk_down_tree(trans, root, path, wc); 6362 if (ret < 0) { 6363 err = ret; 6364 break; 6365 } 6366 6367 ret = walk_up_tree(trans, root, path, wc, BTRFS_MAX_LEVEL); 6368 if (ret < 0) { 6369 err = ret; 6370 break; 6371 } 6372 6373 if (ret > 0) { 6374 BUG_ON(wc->stage != DROP_REFERENCE); 6375 break; 6376 } 6377 6378 if (wc->stage == DROP_REFERENCE) { 6379 level = wc->level; 6380 btrfs_node_key(path->nodes[level], 6381 &root_item->drop_progress, 6382 path->slots[level]); 6383 root_item->drop_level = level; 6384 } 6385 6386 BUG_ON(wc->level == 0); 6387 if (btrfs_should_end_transaction(trans, tree_root)) { 6388 ret = btrfs_update_root(trans, tree_root, 6389 &root->root_key, 6390 root_item); 6391 BUG_ON(ret); 6392 6393 btrfs_end_transaction_throttle(trans, tree_root); 6394 trans = btrfs_start_transaction(tree_root, 0); 6395 BUG_ON(IS_ERR(trans)); 6396 if (block_rsv) 6397 trans->block_rsv = block_rsv; 6398 } 6399 } 6400 btrfs_release_path(path); 6401 BUG_ON(err); 6402 6403 ret = btrfs_del_root(trans, tree_root, &root->root_key); 6404 BUG_ON(ret); 6405 6406 if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { 6407 ret = btrfs_find_last_root(tree_root, root->root_key.objectid, 6408 NULL, NULL); 6409 BUG_ON(ret < 0); 6410 if (ret > 0) { 6411 /* if we fail to delete the orphan item this time 6412 * around, it'll get picked up the next time. 6413 * 6414 * The most common failure here is just -ENOENT. 6415 */ 6416 btrfs_del_orphan_item(trans, tree_root, 6417 root->root_key.objectid); 6418 } 6419 } 6420 6421 if (root->in_radix) { 6422 btrfs_free_fs_root(tree_root->fs_info, root); 6423 } else { 6424 free_extent_buffer(root->node); 6425 free_extent_buffer(root->commit_root); 6426 kfree(root); 6427 } 6428 out: 6429 btrfs_end_transaction_throttle(trans, tree_root); 6430 kfree(wc); 6431 btrfs_free_path(path); 6432 return err; 6433 } 6434 6435 /* 6436 * drop subtree rooted at tree block 'node'. 6437 * 6438 * NOTE: this function will unlock and release tree block 'node' 6439 */ 6440 int btrfs_drop_subtree(struct btrfs_trans_handle *trans, 6441 struct btrfs_root *root, 6442 struct extent_buffer *node, 6443 struct extent_buffer *parent) 6444 { 6445 struct btrfs_path *path; 6446 struct walk_control *wc; 6447 int level; 6448 int parent_level; 6449 int ret = 0; 6450 int wret; 6451 6452 BUG_ON(root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); 6453 6454 path = btrfs_alloc_path(); 6455 if (!path) 6456 return -ENOMEM; 6457 6458 wc = kzalloc(sizeof(*wc), GFP_NOFS); 6459 if (!wc) { 6460 btrfs_free_path(path); 6461 return -ENOMEM; 6462 } 6463 6464 btrfs_assert_tree_locked(parent); 6465 parent_level = btrfs_header_level(parent); 6466 extent_buffer_get(parent); 6467 path->nodes[parent_level] = parent; 6468 path->slots[parent_level] = btrfs_header_nritems(parent); 6469 6470 btrfs_assert_tree_locked(node); 6471 level = btrfs_header_level(node); 6472 path->nodes[level] = node; 6473 path->slots[level] = 0; 6474 path->locks[level] = BTRFS_WRITE_LOCK_BLOCKING; 6475 6476 wc->refs[parent_level] = 1; 6477 wc->flags[parent_level] = BTRFS_BLOCK_FLAG_FULL_BACKREF; 6478 wc->level = level; 6479 wc->shared_level = -1; 6480 wc->stage = DROP_REFERENCE; 6481 wc->update_ref = 0; 6482 wc->keep_locks = 1; 6483 wc->reada_count = BTRFS_NODEPTRS_PER_BLOCK(root); 6484 6485 while (1) { 6486 wret = walk_down_tree(trans, root, path, wc); 6487 if (wret < 0) { 6488 ret = wret; 6489 break; 6490 } 6491 6492 wret = walk_up_tree(trans, root, path, wc, parent_level); 6493 if (wret < 0) 6494 ret = wret; 6495 if (wret != 0) 6496 break; 6497 } 6498 6499 kfree(wc); 6500 btrfs_free_path(path); 6501 return ret; 6502 } 6503 6504 static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) 6505 { 6506 u64 num_devices; 6507 u64 stripped = BTRFS_BLOCK_GROUP_RAID0 | 6508 BTRFS_BLOCK_GROUP_RAID1 | BTRFS_BLOCK_GROUP_RAID10; 6509 6510 /* 6511 * we add in the count of missing devices because we want 6512 * to make sure that any RAID levels on a degraded FS 6513 * continue to be honored. 6514 */ 6515 num_devices = root->fs_info->fs_devices->rw_devices + 6516 root->fs_info->fs_devices->missing_devices; 6517 6518 if (num_devices == 1) { 6519 stripped |= BTRFS_BLOCK_GROUP_DUP; 6520 stripped = flags & ~stripped; 6521 6522 /* turn raid0 into single device chunks */ 6523 if (flags & BTRFS_BLOCK_GROUP_RAID0) 6524 return stripped; 6525 6526 /* turn mirroring into duplication */ 6527 if (flags & (BTRFS_BLOCK_GROUP_RAID1 | 6528 BTRFS_BLOCK_GROUP_RAID10)) 6529 return stripped | BTRFS_BLOCK_GROUP_DUP; 6530 return flags; 6531 } else { 6532 /* they already had raid on here, just return */ 6533 if (flags & stripped) 6534 return flags; 6535 6536 stripped |= BTRFS_BLOCK_GROUP_DUP; 6537 stripped = flags & ~stripped; 6538 6539 /* switch duplicated blocks with raid1 */ 6540 if (flags & BTRFS_BLOCK_GROUP_DUP) 6541 return stripped | BTRFS_BLOCK_GROUP_RAID1; 6542 6543 /* turn single device chunks into raid0 */ 6544 return stripped | BTRFS_BLOCK_GROUP_RAID0; 6545 } 6546 return flags; 6547 } 6548 6549 static int set_block_group_ro(struct btrfs_block_group_cache *cache, int force) 6550 { 6551 struct btrfs_space_info *sinfo = cache->space_info; 6552 u64 num_bytes; 6553 u64 min_allocable_bytes; 6554 int ret = -ENOSPC; 6555 6556 6557 /* 6558 * We need some metadata space and system metadata space for 6559 * allocating chunks in some corner cases until we force to set 6560 * it to be readonly. 6561 */ 6562 if ((sinfo->flags & 6563 (BTRFS_BLOCK_GROUP_SYSTEM | BTRFS_BLOCK_GROUP_METADATA)) && 6564 !force) 6565 min_allocable_bytes = 1 * 1024 * 1024; 6566 else 6567 min_allocable_bytes = 0; 6568 6569 spin_lock(&sinfo->lock); 6570 spin_lock(&cache->lock); 6571 6572 if (cache->ro) { 6573 ret = 0; 6574 goto out; 6575 } 6576 6577 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6578 cache->bytes_super - btrfs_block_group_used(&cache->item); 6579 6580 if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + 6581 sinfo->bytes_may_use + sinfo->bytes_readonly + 6582 cache->reserved_pinned + num_bytes + min_allocable_bytes <= 6583 sinfo->total_bytes) { 6584 sinfo->bytes_readonly += num_bytes; 6585 sinfo->bytes_reserved += cache->reserved_pinned; 6586 cache->reserved_pinned = 0; 6587 cache->ro = 1; 6588 ret = 0; 6589 } 6590 out: 6591 spin_unlock(&cache->lock); 6592 spin_unlock(&sinfo->lock); 6593 return ret; 6594 } 6595 6596 int btrfs_set_block_group_ro(struct btrfs_root *root, 6597 struct btrfs_block_group_cache *cache) 6598 6599 { 6600 struct btrfs_trans_handle *trans; 6601 u64 alloc_flags; 6602 int ret; 6603 6604 BUG_ON(cache->ro); 6605 6606 trans = btrfs_join_transaction(root); 6607 BUG_ON(IS_ERR(trans)); 6608 6609 alloc_flags = update_block_group_flags(root, cache->flags); 6610 if (alloc_flags != cache->flags) 6611 do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6612 CHUNK_ALLOC_FORCE); 6613 6614 ret = set_block_group_ro(cache, 0); 6615 if (!ret) 6616 goto out; 6617 alloc_flags = get_alloc_profile(root, cache->space_info->flags); 6618 ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6619 CHUNK_ALLOC_FORCE); 6620 if (ret < 0) 6621 goto out; 6622 ret = set_block_group_ro(cache, 0); 6623 out: 6624 btrfs_end_transaction(trans, root); 6625 return ret; 6626 } 6627 6628 int btrfs_force_chunk_alloc(struct btrfs_trans_handle *trans, 6629 struct btrfs_root *root, u64 type) 6630 { 6631 u64 alloc_flags = get_alloc_profile(root, type); 6632 return do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 6633 CHUNK_ALLOC_FORCE); 6634 } 6635 6636 /* 6637 * helper to account the unused space of all the readonly block group in the 6638 * list. takes mirrors into account. 6639 */ 6640 static u64 __btrfs_get_ro_block_group_free_space(struct list_head *groups_list) 6641 { 6642 struct btrfs_block_group_cache *block_group; 6643 u64 free_bytes = 0; 6644 int factor; 6645 6646 list_for_each_entry(block_group, groups_list, list) { 6647 spin_lock(&block_group->lock); 6648 6649 if (!block_group->ro) { 6650 spin_unlock(&block_group->lock); 6651 continue; 6652 } 6653 6654 if (block_group->flags & (BTRFS_BLOCK_GROUP_RAID1 | 6655 BTRFS_BLOCK_GROUP_RAID10 | 6656 BTRFS_BLOCK_GROUP_DUP)) 6657 factor = 2; 6658 else 6659 factor = 1; 6660 6661 free_bytes += (block_group->key.offset - 6662 btrfs_block_group_used(&block_group->item)) * 6663 factor; 6664 6665 spin_unlock(&block_group->lock); 6666 } 6667 6668 return free_bytes; 6669 } 6670 6671 /* 6672 * helper to account the unused space of all the readonly block group in the 6673 * space_info. takes mirrors into account. 6674 */ 6675 u64 btrfs_account_ro_block_groups_free_space(struct btrfs_space_info *sinfo) 6676 { 6677 int i; 6678 u64 free_bytes = 0; 6679 6680 spin_lock(&sinfo->lock); 6681 6682 for(i = 0; i < BTRFS_NR_RAID_TYPES; i++) 6683 if (!list_empty(&sinfo->block_groups[i])) 6684 free_bytes += __btrfs_get_ro_block_group_free_space( 6685 &sinfo->block_groups[i]); 6686 6687 spin_unlock(&sinfo->lock); 6688 6689 return free_bytes; 6690 } 6691 6692 int btrfs_set_block_group_rw(struct btrfs_root *root, 6693 struct btrfs_block_group_cache *cache) 6694 { 6695 struct btrfs_space_info *sinfo = cache->space_info; 6696 u64 num_bytes; 6697 6698 BUG_ON(!cache->ro); 6699 6700 spin_lock(&sinfo->lock); 6701 spin_lock(&cache->lock); 6702 num_bytes = cache->key.offset - cache->reserved - cache->pinned - 6703 cache->bytes_super - btrfs_block_group_used(&cache->item); 6704 sinfo->bytes_readonly -= num_bytes; 6705 cache->ro = 0; 6706 spin_unlock(&cache->lock); 6707 spin_unlock(&sinfo->lock); 6708 return 0; 6709 } 6710 6711 /* 6712 * checks to see if its even possible to relocate this block group. 6713 * 6714 * @return - -1 if it's not a good idea to relocate this block group, 0 if its 6715 * ok to go ahead and try. 6716 */ 6717 int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr) 6718 { 6719 struct btrfs_block_group_cache *block_group; 6720 struct btrfs_space_info *space_info; 6721 struct btrfs_fs_devices *fs_devices = root->fs_info->fs_devices; 6722 struct btrfs_device *device; 6723 int full = 0; 6724 int ret = 0; 6725 6726 block_group = btrfs_lookup_block_group(root->fs_info, bytenr); 6727 6728 /* odd, couldn't find the block group, leave it alone */ 6729 if (!block_group) 6730 return -1; 6731 6732 /* no bytes used, we're good */ 6733 if (!btrfs_block_group_used(&block_group->item)) 6734 goto out; 6735 6736 space_info = block_group->space_info; 6737 spin_lock(&space_info->lock); 6738 6739 full = space_info->full; 6740 6741 /* 6742 * if this is the last block group we have in this space, we can't 6743 * relocate it unless we're able to allocate a new chunk below. 6744 * 6745 * Otherwise, we need to make sure we have room in the space to handle 6746 * all of the extents from this block group. If we can, we're good 6747 */ 6748 if ((space_info->total_bytes != block_group->key.offset) && 6749 (space_info->bytes_used + space_info->bytes_reserved + 6750 space_info->bytes_pinned + space_info->bytes_readonly + 6751 btrfs_block_group_used(&block_group->item) < 6752 space_info->total_bytes)) { 6753 spin_unlock(&space_info->lock); 6754 goto out; 6755 } 6756 spin_unlock(&space_info->lock); 6757 6758 /* 6759 * ok we don't have enough space, but maybe we have free space on our 6760 * devices to allocate new chunks for relocation, so loop through our 6761 * alloc devices and guess if we have enough space. However, if we 6762 * were marked as full, then we know there aren't enough chunks, and we 6763 * can just return. 6764 */ 6765 ret = -1; 6766 if (full) 6767 goto out; 6768 6769 mutex_lock(&root->fs_info->chunk_mutex); 6770 list_for_each_entry(device, &fs_devices->alloc_list, dev_alloc_list) { 6771 u64 min_free = btrfs_block_group_used(&block_group->item); 6772 u64 dev_offset; 6773 6774 /* 6775 * check to make sure we can actually find a chunk with enough 6776 * space to fit our block group in. 6777 */ 6778 if (device->total_bytes > device->bytes_used + min_free) { 6779 ret = find_free_dev_extent(NULL, device, min_free, 6780 &dev_offset, NULL); 6781 if (!ret) 6782 break; 6783 ret = -1; 6784 } 6785 } 6786 mutex_unlock(&root->fs_info->chunk_mutex); 6787 out: 6788 btrfs_put_block_group(block_group); 6789 return ret; 6790 } 6791 6792 static int find_first_block_group(struct btrfs_root *root, 6793 struct btrfs_path *path, struct btrfs_key *key) 6794 { 6795 int ret = 0; 6796 struct btrfs_key found_key; 6797 struct extent_buffer *leaf; 6798 int slot; 6799 6800 ret = btrfs_search_slot(NULL, root, key, path, 0, 0); 6801 if (ret < 0) 6802 goto out; 6803 6804 while (1) { 6805 slot = path->slots[0]; 6806 leaf = path->nodes[0]; 6807 if (slot >= btrfs_header_nritems(leaf)) { 6808 ret = btrfs_next_leaf(root, path); 6809 if (ret == 0) 6810 continue; 6811 if (ret < 0) 6812 goto out; 6813 break; 6814 } 6815 btrfs_item_key_to_cpu(leaf, &found_key, slot); 6816 6817 if (found_key.objectid >= key->objectid && 6818 found_key.type == BTRFS_BLOCK_GROUP_ITEM_KEY) { 6819 ret = 0; 6820 goto out; 6821 } 6822 path->slots[0]++; 6823 } 6824 out: 6825 return ret; 6826 } 6827 6828 void btrfs_put_block_group_cache(struct btrfs_fs_info *info) 6829 { 6830 struct btrfs_block_group_cache *block_group; 6831 u64 last = 0; 6832 6833 while (1) { 6834 struct inode *inode; 6835 6836 block_group = btrfs_lookup_first_block_group(info, last); 6837 while (block_group) { 6838 spin_lock(&block_group->lock); 6839 if (block_group->iref) 6840 break; 6841 spin_unlock(&block_group->lock); 6842 block_group = next_block_group(info->tree_root, 6843 block_group); 6844 } 6845 if (!block_group) { 6846 if (last == 0) 6847 break; 6848 last = 0; 6849 continue; 6850 } 6851 6852 inode = block_group->inode; 6853 block_group->iref = 0; 6854 block_group->inode = NULL; 6855 spin_unlock(&block_group->lock); 6856 iput(inode); 6857 last = block_group->key.objectid + block_group->key.offset; 6858 btrfs_put_block_group(block_group); 6859 } 6860 } 6861 6862 int btrfs_free_block_groups(struct btrfs_fs_info *info) 6863 { 6864 struct btrfs_block_group_cache *block_group; 6865 struct btrfs_space_info *space_info; 6866 struct btrfs_caching_control *caching_ctl; 6867 struct rb_node *n; 6868 6869 down_write(&info->extent_commit_sem); 6870 while (!list_empty(&info->caching_block_groups)) { 6871 caching_ctl = list_entry(info->caching_block_groups.next, 6872 struct btrfs_caching_control, list); 6873 list_del(&caching_ctl->list); 6874 put_caching_control(caching_ctl); 6875 } 6876 up_write(&info->extent_commit_sem); 6877 6878 spin_lock(&info->block_group_cache_lock); 6879 while ((n = rb_last(&info->block_group_cache_tree)) != NULL) { 6880 block_group = rb_entry(n, struct btrfs_block_group_cache, 6881 cache_node); 6882 rb_erase(&block_group->cache_node, 6883 &info->block_group_cache_tree); 6884 spin_unlock(&info->block_group_cache_lock); 6885 6886 down_write(&block_group->space_info->groups_sem); 6887 list_del(&block_group->list); 6888 up_write(&block_group->space_info->groups_sem); 6889 6890 if (block_group->cached == BTRFS_CACHE_STARTED) 6891 wait_block_group_cache_done(block_group); 6892 6893 /* 6894 * We haven't cached this block group, which means we could 6895 * possibly have excluded extents on this block group. 6896 */ 6897 if (block_group->cached == BTRFS_CACHE_NO) 6898 free_excluded_extents(info->extent_root, block_group); 6899 6900 btrfs_remove_free_space_cache(block_group); 6901 btrfs_put_block_group(block_group); 6902 6903 spin_lock(&info->block_group_cache_lock); 6904 } 6905 spin_unlock(&info->block_group_cache_lock); 6906 6907 /* now that all the block groups are freed, go through and 6908 * free all the space_info structs. This is only called during 6909 * the final stages of unmount, and so we know nobody is 6910 * using them. We call synchronize_rcu() once before we start, 6911 * just to be on the safe side. 6912 */ 6913 synchronize_rcu(); 6914 6915 release_global_block_rsv(info); 6916 6917 while(!list_empty(&info->space_info)) { 6918 space_info = list_entry(info->space_info.next, 6919 struct btrfs_space_info, 6920 list); 6921 if (space_info->bytes_pinned > 0 || 6922 space_info->bytes_reserved > 0) { 6923 WARN_ON(1); 6924 dump_space_info(space_info, 0, 0); 6925 } 6926 list_del(&space_info->list); 6927 kfree(space_info); 6928 } 6929 return 0; 6930 } 6931 6932 static void __link_block_group(struct btrfs_space_info *space_info, 6933 struct btrfs_block_group_cache *cache) 6934 { 6935 int index = get_block_group_index(cache); 6936 6937 down_write(&space_info->groups_sem); 6938 list_add_tail(&cache->list, &space_info->block_groups[index]); 6939 up_write(&space_info->groups_sem); 6940 } 6941 6942 int btrfs_read_block_groups(struct btrfs_root *root) 6943 { 6944 struct btrfs_path *path; 6945 int ret; 6946 struct btrfs_block_group_cache *cache; 6947 struct btrfs_fs_info *info = root->fs_info; 6948 struct btrfs_space_info *space_info; 6949 struct btrfs_key key; 6950 struct btrfs_key found_key; 6951 struct extent_buffer *leaf; 6952 int need_clear = 0; 6953 u64 cache_gen; 6954 6955 root = info->extent_root; 6956 key.objectid = 0; 6957 key.offset = 0; 6958 btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY); 6959 path = btrfs_alloc_path(); 6960 if (!path) 6961 return -ENOMEM; 6962 path->reada = 1; 6963 6964 cache_gen = btrfs_super_cache_generation(&root->fs_info->super_copy); 6965 if (cache_gen != 0 && 6966 btrfs_super_generation(&root->fs_info->super_copy) != cache_gen) 6967 need_clear = 1; 6968 if (btrfs_test_opt(root, CLEAR_CACHE)) 6969 need_clear = 1; 6970 if (!btrfs_test_opt(root, SPACE_CACHE) && cache_gen) 6971 printk(KERN_INFO "btrfs: disk space caching is enabled\n"); 6972 6973 while (1) { 6974 ret = find_first_block_group(root, path, &key); 6975 if (ret > 0) 6976 break; 6977 if (ret != 0) 6978 goto error; 6979 leaf = path->nodes[0]; 6980 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]); 6981 cache = kzalloc(sizeof(*cache), GFP_NOFS); 6982 if (!cache) { 6983 ret = -ENOMEM; 6984 goto error; 6985 } 6986 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 6987 GFP_NOFS); 6988 if (!cache->free_space_ctl) { 6989 kfree(cache); 6990 ret = -ENOMEM; 6991 goto error; 6992 } 6993 6994 atomic_set(&cache->count, 1); 6995 spin_lock_init(&cache->lock); 6996 cache->fs_info = info; 6997 INIT_LIST_HEAD(&cache->list); 6998 INIT_LIST_HEAD(&cache->cluster_list); 6999 7000 if (need_clear) 7001 cache->disk_cache_state = BTRFS_DC_CLEAR; 7002 7003 read_extent_buffer(leaf, &cache->item, 7004 btrfs_item_ptr_offset(leaf, path->slots[0]), 7005 sizeof(cache->item)); 7006 memcpy(&cache->key, &found_key, sizeof(found_key)); 7007 7008 key.objectid = found_key.objectid + found_key.offset; 7009 btrfs_release_path(path); 7010 cache->flags = btrfs_block_group_flags(&cache->item); 7011 cache->sectorsize = root->sectorsize; 7012 7013 btrfs_init_free_space_ctl(cache); 7014 7015 /* 7016 * We need to exclude the super stripes now so that the space 7017 * info has super bytes accounted for, otherwise we'll think 7018 * we have more space than we actually do. 7019 */ 7020 exclude_super_stripes(root, cache); 7021 7022 /* 7023 * check for two cases, either we are full, and therefore 7024 * don't need to bother with the caching work since we won't 7025 * find any space, or we are empty, and we can just add all 7026 * the space in and be done with it. This saves us _alot_ of 7027 * time, particularly in the full case. 7028 */ 7029 if (found_key.offset == btrfs_block_group_used(&cache->item)) { 7030 cache->last_byte_to_unpin = (u64)-1; 7031 cache->cached = BTRFS_CACHE_FINISHED; 7032 free_excluded_extents(root, cache); 7033 } else if (btrfs_block_group_used(&cache->item) == 0) { 7034 cache->last_byte_to_unpin = (u64)-1; 7035 cache->cached = BTRFS_CACHE_FINISHED; 7036 add_new_free_space(cache, root->fs_info, 7037 found_key.objectid, 7038 found_key.objectid + 7039 found_key.offset); 7040 free_excluded_extents(root, cache); 7041 } 7042 7043 ret = update_space_info(info, cache->flags, found_key.offset, 7044 btrfs_block_group_used(&cache->item), 7045 &space_info); 7046 BUG_ON(ret); 7047 cache->space_info = space_info; 7048 spin_lock(&cache->space_info->lock); 7049 cache->space_info->bytes_readonly += cache->bytes_super; 7050 spin_unlock(&cache->space_info->lock); 7051 7052 __link_block_group(space_info, cache); 7053 7054 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7055 BUG_ON(ret); 7056 7057 set_avail_alloc_bits(root->fs_info, cache->flags); 7058 if (btrfs_chunk_readonly(root, cache->key.objectid)) 7059 set_block_group_ro(cache, 1); 7060 } 7061 7062 list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { 7063 if (!(get_alloc_profile(root, space_info->flags) & 7064 (BTRFS_BLOCK_GROUP_RAID10 | 7065 BTRFS_BLOCK_GROUP_RAID1 | 7066 BTRFS_BLOCK_GROUP_DUP))) 7067 continue; 7068 /* 7069 * avoid allocating from un-mirrored block group if there are 7070 * mirrored block groups. 7071 */ 7072 list_for_each_entry(cache, &space_info->block_groups[3], list) 7073 set_block_group_ro(cache, 1); 7074 list_for_each_entry(cache, &space_info->block_groups[4], list) 7075 set_block_group_ro(cache, 1); 7076 } 7077 7078 init_global_block_rsv(info); 7079 ret = 0; 7080 error: 7081 btrfs_free_path(path); 7082 return ret; 7083 } 7084 7085 int btrfs_make_block_group(struct btrfs_trans_handle *trans, 7086 struct btrfs_root *root, u64 bytes_used, 7087 u64 type, u64 chunk_objectid, u64 chunk_offset, 7088 u64 size) 7089 { 7090 int ret; 7091 struct btrfs_root *extent_root; 7092 struct btrfs_block_group_cache *cache; 7093 7094 extent_root = root->fs_info->extent_root; 7095 7096 root->fs_info->last_trans_log_full_commit = trans->transid; 7097 7098 cache = kzalloc(sizeof(*cache), GFP_NOFS); 7099 if (!cache) 7100 return -ENOMEM; 7101 cache->free_space_ctl = kzalloc(sizeof(*cache->free_space_ctl), 7102 GFP_NOFS); 7103 if (!cache->free_space_ctl) { 7104 kfree(cache); 7105 return -ENOMEM; 7106 } 7107 7108 cache->key.objectid = chunk_offset; 7109 cache->key.offset = size; 7110 cache->key.type = BTRFS_BLOCK_GROUP_ITEM_KEY; 7111 cache->sectorsize = root->sectorsize; 7112 cache->fs_info = root->fs_info; 7113 7114 atomic_set(&cache->count, 1); 7115 spin_lock_init(&cache->lock); 7116 INIT_LIST_HEAD(&cache->list); 7117 INIT_LIST_HEAD(&cache->cluster_list); 7118 7119 btrfs_init_free_space_ctl(cache); 7120 7121 btrfs_set_block_group_used(&cache->item, bytes_used); 7122 btrfs_set_block_group_chunk_objectid(&cache->item, chunk_objectid); 7123 cache->flags = type; 7124 btrfs_set_block_group_flags(&cache->item, type); 7125 7126 cache->last_byte_to_unpin = (u64)-1; 7127 cache->cached = BTRFS_CACHE_FINISHED; 7128 exclude_super_stripes(root, cache); 7129 7130 add_new_free_space(cache, root->fs_info, chunk_offset, 7131 chunk_offset + size); 7132 7133 free_excluded_extents(root, cache); 7134 7135 ret = update_space_info(root->fs_info, cache->flags, size, bytes_used, 7136 &cache->space_info); 7137 BUG_ON(ret); 7138 7139 spin_lock(&cache->space_info->lock); 7140 cache->space_info->bytes_readonly += cache->bytes_super; 7141 spin_unlock(&cache->space_info->lock); 7142 7143 __link_block_group(cache->space_info, cache); 7144 7145 ret = btrfs_add_block_group_cache(root->fs_info, cache); 7146 BUG_ON(ret); 7147 7148 ret = btrfs_insert_item(trans, extent_root, &cache->key, &cache->item, 7149 sizeof(cache->item)); 7150 BUG_ON(ret); 7151 7152 set_avail_alloc_bits(extent_root->fs_info, type); 7153 7154 return 0; 7155 } 7156 7157 int btrfs_remove_block_group(struct btrfs_trans_handle *trans, 7158 struct btrfs_root *root, u64 group_start) 7159 { 7160 struct btrfs_path *path; 7161 struct btrfs_block_group_cache *block_group; 7162 struct btrfs_free_cluster *cluster; 7163 struct btrfs_root *tree_root = root->fs_info->tree_root; 7164 struct btrfs_key key; 7165 struct inode *inode; 7166 int ret; 7167 int factor; 7168 7169 root = root->fs_info->extent_root; 7170 7171 block_group = btrfs_lookup_block_group(root->fs_info, group_start); 7172 BUG_ON(!block_group); 7173 BUG_ON(!block_group->ro); 7174 7175 /* 7176 * Free the reserved super bytes from this block group before 7177 * remove it. 7178 */ 7179 free_excluded_extents(root, block_group); 7180 7181 memcpy(&key, &block_group->key, sizeof(key)); 7182 if (block_group->flags & (BTRFS_BLOCK_GROUP_DUP | 7183 BTRFS_BLOCK_GROUP_RAID1 | 7184 BTRFS_BLOCK_GROUP_RAID10)) 7185 factor = 2; 7186 else 7187 factor = 1; 7188 7189 /* make sure this block group isn't part of an allocation cluster */ 7190 cluster = &root->fs_info->data_alloc_cluster; 7191 spin_lock(&cluster->refill_lock); 7192 btrfs_return_cluster_to_free_space(block_group, cluster); 7193 spin_unlock(&cluster->refill_lock); 7194 7195 /* 7196 * make sure this block group isn't part of a metadata 7197 * allocation cluster 7198 */ 7199 cluster = &root->fs_info->meta_alloc_cluster; 7200 spin_lock(&cluster->refill_lock); 7201 btrfs_return_cluster_to_free_space(block_group, cluster); 7202 spin_unlock(&cluster->refill_lock); 7203 7204 path = btrfs_alloc_path(); 7205 if (!path) { 7206 ret = -ENOMEM; 7207 goto out; 7208 } 7209 7210 inode = lookup_free_space_inode(root, block_group, path); 7211 if (!IS_ERR(inode)) { 7212 ret = btrfs_orphan_add(trans, inode); 7213 BUG_ON(ret); 7214 clear_nlink(inode); 7215 /* One for the block groups ref */ 7216 spin_lock(&block_group->lock); 7217 if (block_group->iref) { 7218 block_group->iref = 0; 7219 block_group->inode = NULL; 7220 spin_unlock(&block_group->lock); 7221 iput(inode); 7222 } else { 7223 spin_unlock(&block_group->lock); 7224 } 7225 /* One for our lookup ref */ 7226 iput(inode); 7227 } 7228 7229 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 7230 key.offset = block_group->key.objectid; 7231 key.type = 0; 7232 7233 ret = btrfs_search_slot(trans, tree_root, &key, path, -1, 1); 7234 if (ret < 0) 7235 goto out; 7236 if (ret > 0) 7237 btrfs_release_path(path); 7238 if (ret == 0) { 7239 ret = btrfs_del_item(trans, tree_root, path); 7240 if (ret) 7241 goto out; 7242 btrfs_release_path(path); 7243 } 7244 7245 spin_lock(&root->fs_info->block_group_cache_lock); 7246 rb_erase(&block_group->cache_node, 7247 &root->fs_info->block_group_cache_tree); 7248 spin_unlock(&root->fs_info->block_group_cache_lock); 7249 7250 down_write(&block_group->space_info->groups_sem); 7251 /* 7252 * we must use list_del_init so people can check to see if they 7253 * are still on the list after taking the semaphore 7254 */ 7255 list_del_init(&block_group->list); 7256 up_write(&block_group->space_info->groups_sem); 7257 7258 if (block_group->cached == BTRFS_CACHE_STARTED) 7259 wait_block_group_cache_done(block_group); 7260 7261 btrfs_remove_free_space_cache(block_group); 7262 7263 spin_lock(&block_group->space_info->lock); 7264 block_group->space_info->total_bytes -= block_group->key.offset; 7265 block_group->space_info->bytes_readonly -= block_group->key.offset; 7266 block_group->space_info->disk_total -= block_group->key.offset * factor; 7267 spin_unlock(&block_group->space_info->lock); 7268 7269 memcpy(&key, &block_group->key, sizeof(key)); 7270 7271 btrfs_clear_space_info_full(root->fs_info); 7272 7273 btrfs_put_block_group(block_group); 7274 btrfs_put_block_group(block_group); 7275 7276 ret = btrfs_search_slot(trans, root, &key, path, -1, 1); 7277 if (ret > 0) 7278 ret = -EIO; 7279 if (ret < 0) 7280 goto out; 7281 7282 ret = btrfs_del_item(trans, root, path); 7283 out: 7284 btrfs_free_path(path); 7285 return ret; 7286 } 7287 7288 int btrfs_init_space_info(struct btrfs_fs_info *fs_info) 7289 { 7290 struct btrfs_space_info *space_info; 7291 struct btrfs_super_block *disk_super; 7292 u64 features; 7293 u64 flags; 7294 int mixed = 0; 7295 int ret; 7296 7297 disk_super = &fs_info->super_copy; 7298 if (!btrfs_super_root(disk_super)) 7299 return 1; 7300 7301 features = btrfs_super_incompat_flags(disk_super); 7302 if (features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) 7303 mixed = 1; 7304 7305 flags = BTRFS_BLOCK_GROUP_SYSTEM; 7306 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7307 if (ret) 7308 goto out; 7309 7310 if (mixed) { 7311 flags = BTRFS_BLOCK_GROUP_METADATA | BTRFS_BLOCK_GROUP_DATA; 7312 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7313 } else { 7314 flags = BTRFS_BLOCK_GROUP_METADATA; 7315 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7316 if (ret) 7317 goto out; 7318 7319 flags = BTRFS_BLOCK_GROUP_DATA; 7320 ret = update_space_info(fs_info, flags, 0, 0, &space_info); 7321 } 7322 out: 7323 return ret; 7324 } 7325 7326 int btrfs_error_unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) 7327 { 7328 return unpin_extent_range(root, start, end); 7329 } 7330 7331 int btrfs_error_discard_extent(struct btrfs_root *root, u64 bytenr, 7332 u64 num_bytes, u64 *actual_bytes) 7333 { 7334 return btrfs_discard_extent(root, bytenr, num_bytes, actual_bytes); 7335 } 7336 7337 int btrfs_trim_fs(struct btrfs_root *root, struct fstrim_range *range) 7338 { 7339 struct btrfs_fs_info *fs_info = root->fs_info; 7340 struct btrfs_block_group_cache *cache = NULL; 7341 u64 group_trimmed; 7342 u64 start; 7343 u64 end; 7344 u64 trimmed = 0; 7345 int ret = 0; 7346 7347 cache = btrfs_lookup_block_group(fs_info, range->start); 7348 7349 while (cache) { 7350 if (cache->key.objectid >= (range->start + range->len)) { 7351 btrfs_put_block_group(cache); 7352 break; 7353 } 7354 7355 start = max(range->start, cache->key.objectid); 7356 end = min(range->start + range->len, 7357 cache->key.objectid + cache->key.offset); 7358 7359 if (end - start >= range->minlen) { 7360 if (!block_group_cache_done(cache)) { 7361 ret = cache_block_group(cache, NULL, root, 0); 7362 if (!ret) 7363 wait_block_group_cache_done(cache); 7364 } 7365 ret = btrfs_trim_block_group(cache, 7366 &group_trimmed, 7367 start, 7368 end, 7369 range->minlen); 7370 7371 trimmed += group_trimmed; 7372 if (ret) { 7373 btrfs_put_block_group(cache); 7374 break; 7375 } 7376 } 7377 7378 cache = next_block_group(fs_info->tree_root, cache); 7379 } 7380 7381 range->len = trimmed; 7382 return ret; 7383 } 7384