1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/fs.h> 20 #include <linux/sched.h> 21 #include <linux/writeback.h> 22 #include <linux/pagemap.h> 23 #include <linux/blkdev.h> 24 #include "ctree.h" 25 #include "disk-io.h" 26 #include "transaction.h" 27 #include "locking.h" 28 #include "tree-log.h" 29 30 #define BTRFS_ROOT_TRANS_TAG 0 31 32 static noinline void put_transaction(struct btrfs_transaction *transaction) 33 { 34 WARN_ON(transaction->use_count == 0); 35 transaction->use_count--; 36 if (transaction->use_count == 0) { 37 list_del_init(&transaction->list); 38 memset(transaction, 0, sizeof(*transaction)); 39 kmem_cache_free(btrfs_transaction_cachep, transaction); 40 } 41 } 42 43 /* 44 * either allocate a new transaction or hop into the existing one 45 */ 46 static noinline int join_transaction(struct btrfs_root *root) 47 { 48 struct btrfs_transaction *cur_trans; 49 cur_trans = root->fs_info->running_transaction; 50 if (!cur_trans) { 51 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, 52 GFP_NOFS); 53 BUG_ON(!cur_trans); 54 root->fs_info->generation++; 55 cur_trans->num_writers = 1; 56 cur_trans->num_joined = 0; 57 cur_trans->transid = root->fs_info->generation; 58 init_waitqueue_head(&cur_trans->writer_wait); 59 init_waitqueue_head(&cur_trans->commit_wait); 60 cur_trans->in_commit = 0; 61 cur_trans->blocked = 0; 62 cur_trans->use_count = 1; 63 cur_trans->commit_done = 0; 64 cur_trans->start_time = get_seconds(); 65 66 cur_trans->delayed_refs.root.rb_node = NULL; 67 cur_trans->delayed_refs.num_entries = 0; 68 cur_trans->delayed_refs.num_heads_ready = 0; 69 cur_trans->delayed_refs.num_heads = 0; 70 cur_trans->delayed_refs.flushing = 0; 71 cur_trans->delayed_refs.run_delayed_start = 0; 72 spin_lock_init(&cur_trans->delayed_refs.lock); 73 74 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 75 list_add_tail(&cur_trans->list, &root->fs_info->trans_list); 76 extent_io_tree_init(&cur_trans->dirty_pages, 77 root->fs_info->btree_inode->i_mapping, 78 GFP_NOFS); 79 spin_lock(&root->fs_info->new_trans_lock); 80 root->fs_info->running_transaction = cur_trans; 81 spin_unlock(&root->fs_info->new_trans_lock); 82 } else { 83 cur_trans->num_writers++; 84 cur_trans->num_joined++; 85 } 86 87 return 0; 88 } 89 90 /* 91 * this does all the record keeping required to make sure that a reference 92 * counted root is properly recorded in a given transaction. This is required 93 * to make sure the old root from before we joined the transaction is deleted 94 * when the transaction commits 95 */ 96 static noinline int record_root_in_trans(struct btrfs_trans_handle *trans, 97 struct btrfs_root *root) 98 { 99 if (root->ref_cows && root->last_trans < trans->transid) { 100 WARN_ON(root == root->fs_info->extent_root); 101 WARN_ON(root->root_item.refs == 0); 102 WARN_ON(root->commit_root != root->node); 103 104 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 105 (unsigned long)root->root_key.objectid, 106 BTRFS_ROOT_TRANS_TAG); 107 root->last_trans = trans->transid; 108 btrfs_init_reloc_root(trans, root); 109 } 110 return 0; 111 } 112 113 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 114 struct btrfs_root *root) 115 { 116 if (!root->ref_cows) 117 return 0; 118 119 mutex_lock(&root->fs_info->trans_mutex); 120 if (root->last_trans == trans->transid) { 121 mutex_unlock(&root->fs_info->trans_mutex); 122 return 0; 123 } 124 125 record_root_in_trans(trans, root); 126 mutex_unlock(&root->fs_info->trans_mutex); 127 return 0; 128 } 129 130 /* wait for commit against the current transaction to become unblocked 131 * when this is done, it is safe to start a new transaction, but the current 132 * transaction might not be fully on disk. 133 */ 134 static void wait_current_trans(struct btrfs_root *root) 135 { 136 struct btrfs_transaction *cur_trans; 137 138 cur_trans = root->fs_info->running_transaction; 139 if (cur_trans && cur_trans->blocked) { 140 DEFINE_WAIT(wait); 141 cur_trans->use_count++; 142 while (1) { 143 prepare_to_wait(&root->fs_info->transaction_wait, &wait, 144 TASK_UNINTERRUPTIBLE); 145 if (cur_trans->blocked) { 146 mutex_unlock(&root->fs_info->trans_mutex); 147 schedule(); 148 mutex_lock(&root->fs_info->trans_mutex); 149 finish_wait(&root->fs_info->transaction_wait, 150 &wait); 151 } else { 152 finish_wait(&root->fs_info->transaction_wait, 153 &wait); 154 break; 155 } 156 } 157 put_transaction(cur_trans); 158 } 159 } 160 161 static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, 162 int num_blocks, int wait) 163 { 164 struct btrfs_trans_handle *h = 165 kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 166 int ret; 167 168 mutex_lock(&root->fs_info->trans_mutex); 169 if (!root->fs_info->log_root_recovering && 170 ((wait == 1 && !root->fs_info->open_ioctl_trans) || wait == 2)) 171 wait_current_trans(root); 172 ret = join_transaction(root); 173 BUG_ON(ret); 174 175 h->transid = root->fs_info->running_transaction->transid; 176 h->transaction = root->fs_info->running_transaction; 177 h->blocks_reserved = num_blocks; 178 h->blocks_used = 0; 179 h->block_group = 0; 180 h->alloc_exclude_nr = 0; 181 h->alloc_exclude_start = 0; 182 h->delayed_ref_updates = 0; 183 184 root->fs_info->running_transaction->use_count++; 185 record_root_in_trans(h, root); 186 mutex_unlock(&root->fs_info->trans_mutex); 187 return h; 188 } 189 190 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 191 int num_blocks) 192 { 193 return start_transaction(root, num_blocks, 1); 194 } 195 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, 196 int num_blocks) 197 { 198 return start_transaction(root, num_blocks, 0); 199 } 200 201 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, 202 int num_blocks) 203 { 204 return start_transaction(r, num_blocks, 2); 205 } 206 207 /* wait for a transaction commit to be fully complete */ 208 static noinline int wait_for_commit(struct btrfs_root *root, 209 struct btrfs_transaction *commit) 210 { 211 DEFINE_WAIT(wait); 212 mutex_lock(&root->fs_info->trans_mutex); 213 while (!commit->commit_done) { 214 prepare_to_wait(&commit->commit_wait, &wait, 215 TASK_UNINTERRUPTIBLE); 216 if (commit->commit_done) 217 break; 218 mutex_unlock(&root->fs_info->trans_mutex); 219 schedule(); 220 mutex_lock(&root->fs_info->trans_mutex); 221 } 222 mutex_unlock(&root->fs_info->trans_mutex); 223 finish_wait(&commit->commit_wait, &wait); 224 return 0; 225 } 226 227 #if 0 228 /* 229 * rate limit against the drop_snapshot code. This helps to slow down new 230 * operations if the drop_snapshot code isn't able to keep up. 231 */ 232 static void throttle_on_drops(struct btrfs_root *root) 233 { 234 struct btrfs_fs_info *info = root->fs_info; 235 int harder_count = 0; 236 237 harder: 238 if (atomic_read(&info->throttles)) { 239 DEFINE_WAIT(wait); 240 int thr; 241 thr = atomic_read(&info->throttle_gen); 242 243 do { 244 prepare_to_wait(&info->transaction_throttle, 245 &wait, TASK_UNINTERRUPTIBLE); 246 if (!atomic_read(&info->throttles)) { 247 finish_wait(&info->transaction_throttle, &wait); 248 break; 249 } 250 schedule(); 251 finish_wait(&info->transaction_throttle, &wait); 252 } while (thr == atomic_read(&info->throttle_gen)); 253 harder_count++; 254 255 if (root->fs_info->total_ref_cache_size > 1 * 1024 * 1024 && 256 harder_count < 2) 257 goto harder; 258 259 if (root->fs_info->total_ref_cache_size > 5 * 1024 * 1024 && 260 harder_count < 10) 261 goto harder; 262 263 if (root->fs_info->total_ref_cache_size > 10 * 1024 * 1024 && 264 harder_count < 20) 265 goto harder; 266 } 267 } 268 #endif 269 270 void btrfs_throttle(struct btrfs_root *root) 271 { 272 mutex_lock(&root->fs_info->trans_mutex); 273 if (!root->fs_info->open_ioctl_trans) 274 wait_current_trans(root); 275 mutex_unlock(&root->fs_info->trans_mutex); 276 } 277 278 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 279 struct btrfs_root *root, int throttle) 280 { 281 struct btrfs_transaction *cur_trans; 282 struct btrfs_fs_info *info = root->fs_info; 283 int count = 0; 284 285 while (count < 4) { 286 unsigned long cur = trans->delayed_ref_updates; 287 trans->delayed_ref_updates = 0; 288 if (cur && 289 trans->transaction->delayed_refs.num_heads_ready > 64) { 290 trans->delayed_ref_updates = 0; 291 292 /* 293 * do a full flush if the transaction is trying 294 * to close 295 */ 296 if (trans->transaction->delayed_refs.flushing) 297 cur = 0; 298 btrfs_run_delayed_refs(trans, root, cur); 299 } else { 300 break; 301 } 302 count++; 303 } 304 305 mutex_lock(&info->trans_mutex); 306 cur_trans = info->running_transaction; 307 WARN_ON(cur_trans != trans->transaction); 308 WARN_ON(cur_trans->num_writers < 1); 309 cur_trans->num_writers--; 310 311 if (waitqueue_active(&cur_trans->writer_wait)) 312 wake_up(&cur_trans->writer_wait); 313 put_transaction(cur_trans); 314 mutex_unlock(&info->trans_mutex); 315 memset(trans, 0, sizeof(*trans)); 316 kmem_cache_free(btrfs_trans_handle_cachep, trans); 317 318 return 0; 319 } 320 321 int btrfs_end_transaction(struct btrfs_trans_handle *trans, 322 struct btrfs_root *root) 323 { 324 return __btrfs_end_transaction(trans, root, 0); 325 } 326 327 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 328 struct btrfs_root *root) 329 { 330 return __btrfs_end_transaction(trans, root, 1); 331 } 332 333 /* 334 * when btree blocks are allocated, they have some corresponding bits set for 335 * them in one of two extent_io trees. This is used to make sure all of 336 * those extents are on disk for transaction or log commit 337 */ 338 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 339 struct extent_io_tree *dirty_pages) 340 { 341 int ret; 342 int err = 0; 343 int werr = 0; 344 struct page *page; 345 struct inode *btree_inode = root->fs_info->btree_inode; 346 u64 start = 0; 347 u64 end; 348 unsigned long index; 349 350 while (1) { 351 ret = find_first_extent_bit(dirty_pages, start, &start, &end, 352 EXTENT_DIRTY); 353 if (ret) 354 break; 355 while (start <= end) { 356 cond_resched(); 357 358 index = start >> PAGE_CACHE_SHIFT; 359 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 360 page = find_get_page(btree_inode->i_mapping, index); 361 if (!page) 362 continue; 363 364 btree_lock_page_hook(page); 365 if (!page->mapping) { 366 unlock_page(page); 367 page_cache_release(page); 368 continue; 369 } 370 371 if (PageWriteback(page)) { 372 if (PageDirty(page)) 373 wait_on_page_writeback(page); 374 else { 375 unlock_page(page); 376 page_cache_release(page); 377 continue; 378 } 379 } 380 err = write_one_page(page, 0); 381 if (err) 382 werr = err; 383 page_cache_release(page); 384 } 385 } 386 while (1) { 387 ret = find_first_extent_bit(dirty_pages, 0, &start, &end, 388 EXTENT_DIRTY); 389 if (ret) 390 break; 391 392 clear_extent_dirty(dirty_pages, start, end, GFP_NOFS); 393 while (start <= end) { 394 index = start >> PAGE_CACHE_SHIFT; 395 start = (u64)(index + 1) << PAGE_CACHE_SHIFT; 396 page = find_get_page(btree_inode->i_mapping, index); 397 if (!page) 398 continue; 399 if (PageDirty(page)) { 400 btree_lock_page_hook(page); 401 wait_on_page_writeback(page); 402 err = write_one_page(page, 0); 403 if (err) 404 werr = err; 405 } 406 wait_on_page_writeback(page); 407 page_cache_release(page); 408 cond_resched(); 409 } 410 } 411 if (err) 412 werr = err; 413 return werr; 414 } 415 416 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 417 struct btrfs_root *root) 418 { 419 if (!trans || !trans->transaction) { 420 struct inode *btree_inode; 421 btree_inode = root->fs_info->btree_inode; 422 return filemap_write_and_wait(btree_inode->i_mapping); 423 } 424 return btrfs_write_and_wait_marked_extents(root, 425 &trans->transaction->dirty_pages); 426 } 427 428 /* 429 * this is used to update the root pointer in the tree of tree roots. 430 * 431 * But, in the case of the extent allocation tree, updating the root 432 * pointer may allocate blocks which may change the root of the extent 433 * allocation tree. 434 * 435 * So, this loops and repeats and makes sure the cowonly root didn't 436 * change while the root pointer was being updated in the metadata. 437 */ 438 static int update_cowonly_root(struct btrfs_trans_handle *trans, 439 struct btrfs_root *root) 440 { 441 int ret; 442 u64 old_root_bytenr; 443 struct btrfs_root *tree_root = root->fs_info->tree_root; 444 445 btrfs_write_dirty_block_groups(trans, root); 446 447 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 448 BUG_ON(ret); 449 450 while (1) { 451 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 452 if (old_root_bytenr == root->node->start) 453 break; 454 455 btrfs_set_root_node(&root->root_item, root->node); 456 ret = btrfs_update_root(trans, tree_root, 457 &root->root_key, 458 &root->root_item); 459 BUG_ON(ret); 460 btrfs_write_dirty_block_groups(trans, root); 461 462 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 463 BUG_ON(ret); 464 } 465 free_extent_buffer(root->commit_root); 466 root->commit_root = btrfs_root_node(root); 467 return 0; 468 } 469 470 /* 471 * update all the cowonly tree roots on disk 472 */ 473 static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, 474 struct btrfs_root *root) 475 { 476 struct btrfs_fs_info *fs_info = root->fs_info; 477 struct list_head *next; 478 struct extent_buffer *eb; 479 int ret; 480 481 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 482 BUG_ON(ret); 483 484 eb = btrfs_lock_root_node(fs_info->tree_root); 485 btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); 486 btrfs_tree_unlock(eb); 487 free_extent_buffer(eb); 488 489 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 490 BUG_ON(ret); 491 492 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 493 next = fs_info->dirty_cowonly_roots.next; 494 list_del_init(next); 495 root = list_entry(next, struct btrfs_root, dirty_list); 496 497 update_cowonly_root(trans, root); 498 499 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 500 BUG_ON(ret); 501 } 502 return 0; 503 } 504 505 /* 506 * dead roots are old snapshots that need to be deleted. This allocates 507 * a dirty root struct and adds it into the list of dead roots that need to 508 * be deleted 509 */ 510 int btrfs_add_dead_root(struct btrfs_root *root) 511 { 512 mutex_lock(&root->fs_info->trans_mutex); 513 list_add(&root->root_list, &root->fs_info->dead_roots); 514 mutex_unlock(&root->fs_info->trans_mutex); 515 return 0; 516 } 517 518 /* 519 * update all the cowonly tree roots on disk 520 */ 521 static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, 522 struct btrfs_root *root) 523 { 524 struct btrfs_root *gang[8]; 525 struct btrfs_fs_info *fs_info = root->fs_info; 526 int i; 527 int ret; 528 int err = 0; 529 530 while (1) { 531 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 532 (void **)gang, 0, 533 ARRAY_SIZE(gang), 534 BTRFS_ROOT_TRANS_TAG); 535 if (ret == 0) 536 break; 537 for (i = 0; i < ret; i++) { 538 root = gang[i]; 539 radix_tree_tag_clear(&fs_info->fs_roots_radix, 540 (unsigned long)root->root_key.objectid, 541 BTRFS_ROOT_TRANS_TAG); 542 543 btrfs_free_log(trans, root); 544 btrfs_update_reloc_root(trans, root); 545 546 if (root->commit_root != root->node) { 547 free_extent_buffer(root->commit_root); 548 root->commit_root = btrfs_root_node(root); 549 btrfs_set_root_node(&root->root_item, 550 root->node); 551 } 552 553 err = btrfs_update_root(trans, fs_info->tree_root, 554 &root->root_key, 555 &root->root_item); 556 if (err) 557 break; 558 } 559 } 560 return err; 561 } 562 563 /* 564 * defrag a given btree. If cacheonly == 1, this won't read from the disk, 565 * otherwise every leaf in the btree is read and defragged. 566 */ 567 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) 568 { 569 struct btrfs_fs_info *info = root->fs_info; 570 int ret; 571 struct btrfs_trans_handle *trans; 572 unsigned long nr; 573 574 smp_mb(); 575 if (root->defrag_running) 576 return 0; 577 trans = btrfs_start_transaction(root, 1); 578 while (1) { 579 root->defrag_running = 1; 580 ret = btrfs_defrag_leaves(trans, root, cacheonly); 581 nr = trans->blocks_used; 582 btrfs_end_transaction(trans, root); 583 btrfs_btree_balance_dirty(info->tree_root, nr); 584 cond_resched(); 585 586 trans = btrfs_start_transaction(root, 1); 587 if (root->fs_info->closing || ret != -EAGAIN) 588 break; 589 } 590 root->defrag_running = 0; 591 smp_mb(); 592 btrfs_end_transaction(trans, root); 593 return 0; 594 } 595 596 #if 0 597 /* 598 * when dropping snapshots, we generate a ton of delayed refs, and it makes 599 * sense not to join the transaction while it is trying to flush the current 600 * queue of delayed refs out. 601 * 602 * This is used by the drop snapshot code only 603 */ 604 static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) 605 { 606 DEFINE_WAIT(wait); 607 608 mutex_lock(&info->trans_mutex); 609 while (info->running_transaction && 610 info->running_transaction->delayed_refs.flushing) { 611 prepare_to_wait(&info->transaction_wait, &wait, 612 TASK_UNINTERRUPTIBLE); 613 mutex_unlock(&info->trans_mutex); 614 615 schedule(); 616 617 mutex_lock(&info->trans_mutex); 618 finish_wait(&info->transaction_wait, &wait); 619 } 620 mutex_unlock(&info->trans_mutex); 621 return 0; 622 } 623 624 /* 625 * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on 626 * all of them 627 */ 628 int btrfs_drop_dead_root(struct btrfs_root *root) 629 { 630 struct btrfs_trans_handle *trans; 631 struct btrfs_root *tree_root = root->fs_info->tree_root; 632 unsigned long nr; 633 int ret; 634 635 while (1) { 636 /* 637 * we don't want to jump in and create a bunch of 638 * delayed refs if the transaction is starting to close 639 */ 640 wait_transaction_pre_flush(tree_root->fs_info); 641 trans = btrfs_start_transaction(tree_root, 1); 642 643 /* 644 * we've joined a transaction, make sure it isn't 645 * closing right now 646 */ 647 if (trans->transaction->delayed_refs.flushing) { 648 btrfs_end_transaction(trans, tree_root); 649 continue; 650 } 651 652 ret = btrfs_drop_snapshot(trans, root); 653 if (ret != -EAGAIN) 654 break; 655 656 ret = btrfs_update_root(trans, tree_root, 657 &root->root_key, 658 &root->root_item); 659 if (ret) 660 break; 661 662 nr = trans->blocks_used; 663 ret = btrfs_end_transaction(trans, tree_root); 664 BUG_ON(ret); 665 666 btrfs_btree_balance_dirty(tree_root, nr); 667 cond_resched(); 668 } 669 BUG_ON(ret); 670 671 ret = btrfs_del_root(trans, tree_root, &root->root_key); 672 BUG_ON(ret); 673 674 nr = trans->blocks_used; 675 ret = btrfs_end_transaction(trans, tree_root); 676 BUG_ON(ret); 677 678 free_extent_buffer(root->node); 679 free_extent_buffer(root->commit_root); 680 kfree(root); 681 682 btrfs_btree_balance_dirty(tree_root, nr); 683 return ret; 684 } 685 #endif 686 687 /* 688 * new snapshots need to be created at a very specific time in the 689 * transaction commit. This does the actual creation 690 */ 691 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, 692 struct btrfs_fs_info *fs_info, 693 struct btrfs_pending_snapshot *pending) 694 { 695 struct btrfs_key key; 696 struct btrfs_root_item *new_root_item; 697 struct btrfs_root *tree_root = fs_info->tree_root; 698 struct btrfs_root *root = pending->root; 699 struct extent_buffer *tmp; 700 struct extent_buffer *old; 701 int ret; 702 u64 objectid; 703 704 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 705 if (!new_root_item) { 706 ret = -ENOMEM; 707 goto fail; 708 } 709 ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); 710 if (ret) 711 goto fail; 712 713 record_root_in_trans(trans, root); 714 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 715 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 716 717 key.objectid = objectid; 718 key.offset = 0; 719 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 720 721 old = btrfs_lock_root_node(root); 722 btrfs_cow_block(trans, root, old, NULL, 0, &old); 723 btrfs_set_lock_blocking(old); 724 725 btrfs_copy_root(trans, root, old, &tmp, objectid); 726 btrfs_tree_unlock(old); 727 free_extent_buffer(old); 728 729 btrfs_set_root_node(new_root_item, tmp); 730 ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, 731 new_root_item); 732 btrfs_tree_unlock(tmp); 733 free_extent_buffer(tmp); 734 if (ret) 735 goto fail; 736 737 key.offset = (u64)-1; 738 memcpy(&pending->root_key, &key, sizeof(key)); 739 fail: 740 kfree(new_root_item); 741 return ret; 742 } 743 744 static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info, 745 struct btrfs_pending_snapshot *pending) 746 { 747 int ret; 748 int namelen; 749 u64 index = 0; 750 struct btrfs_trans_handle *trans; 751 struct inode *parent_inode; 752 struct inode *inode; 753 struct btrfs_root *parent_root; 754 755 parent_inode = pending->dentry->d_parent->d_inode; 756 parent_root = BTRFS_I(parent_inode)->root; 757 trans = btrfs_join_transaction(parent_root, 1); 758 759 /* 760 * insert the directory item 761 */ 762 namelen = strlen(pending->name); 763 ret = btrfs_set_inode_index(parent_inode, &index); 764 ret = btrfs_insert_dir_item(trans, parent_root, 765 pending->name, namelen, 766 parent_inode->i_ino, 767 &pending->root_key, BTRFS_FT_DIR, index); 768 769 if (ret) 770 goto fail; 771 772 btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); 773 ret = btrfs_update_inode(trans, parent_root, parent_inode); 774 BUG_ON(ret); 775 776 /* add the backref first */ 777 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 778 pending->root_key.objectid, 779 BTRFS_ROOT_BACKREF_KEY, 780 parent_root->root_key.objectid, 781 parent_inode->i_ino, index, pending->name, 782 namelen); 783 784 BUG_ON(ret); 785 786 /* now add the forward ref */ 787 ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, 788 parent_root->root_key.objectid, 789 BTRFS_ROOT_REF_KEY, 790 pending->root_key.objectid, 791 parent_inode->i_ino, index, pending->name, 792 namelen); 793 794 inode = btrfs_lookup_dentry(parent_inode, pending->dentry); 795 d_instantiate(pending->dentry, inode); 796 fail: 797 btrfs_end_transaction(trans, fs_info->fs_root); 798 return ret; 799 } 800 801 /* 802 * create all the snapshots we've scheduled for creation 803 */ 804 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, 805 struct btrfs_fs_info *fs_info) 806 { 807 struct btrfs_pending_snapshot *pending; 808 struct list_head *head = &trans->transaction->pending_snapshots; 809 int ret; 810 811 list_for_each_entry(pending, head, list) { 812 ret = create_pending_snapshot(trans, fs_info, pending); 813 BUG_ON(ret); 814 } 815 return 0; 816 } 817 818 static noinline int finish_pending_snapshots(struct btrfs_trans_handle *trans, 819 struct btrfs_fs_info *fs_info) 820 { 821 struct btrfs_pending_snapshot *pending; 822 struct list_head *head = &trans->transaction->pending_snapshots; 823 int ret; 824 825 while (!list_empty(head)) { 826 pending = list_entry(head->next, 827 struct btrfs_pending_snapshot, list); 828 ret = finish_pending_snapshot(fs_info, pending); 829 BUG_ON(ret); 830 list_del(&pending->list); 831 kfree(pending->name); 832 kfree(pending); 833 } 834 return 0; 835 } 836 837 static void update_super_roots(struct btrfs_root *root) 838 { 839 struct btrfs_root_item *root_item; 840 struct btrfs_super_block *super; 841 842 super = &root->fs_info->super_copy; 843 844 root_item = &root->fs_info->chunk_root->root_item; 845 super->chunk_root = root_item->bytenr; 846 super->chunk_root_generation = root_item->generation; 847 super->chunk_root_level = root_item->level; 848 849 root_item = &root->fs_info->tree_root->root_item; 850 super->root = root_item->bytenr; 851 super->generation = root_item->generation; 852 super->root_level = root_item->level; 853 } 854 855 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 856 struct btrfs_root *root) 857 { 858 unsigned long joined = 0; 859 unsigned long timeout = 1; 860 struct btrfs_transaction *cur_trans; 861 struct btrfs_transaction *prev_trans = NULL; 862 struct extent_io_tree *pinned_copy; 863 DEFINE_WAIT(wait); 864 int ret; 865 int should_grow = 0; 866 unsigned long now = get_seconds(); 867 int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); 868 869 btrfs_run_ordered_operations(root, 0); 870 871 /* make a pass through all the delayed refs we have so far 872 * any runnings procs may add more while we are here 873 */ 874 ret = btrfs_run_delayed_refs(trans, root, 0); 875 BUG_ON(ret); 876 877 cur_trans = trans->transaction; 878 /* 879 * set the flushing flag so procs in this transaction have to 880 * start sending their work down. 881 */ 882 cur_trans->delayed_refs.flushing = 1; 883 884 ret = btrfs_run_delayed_refs(trans, root, 0); 885 BUG_ON(ret); 886 887 mutex_lock(&root->fs_info->trans_mutex); 888 if (cur_trans->in_commit) { 889 cur_trans->use_count++; 890 mutex_unlock(&root->fs_info->trans_mutex); 891 btrfs_end_transaction(trans, root); 892 893 ret = wait_for_commit(root, cur_trans); 894 BUG_ON(ret); 895 896 mutex_lock(&root->fs_info->trans_mutex); 897 put_transaction(cur_trans); 898 mutex_unlock(&root->fs_info->trans_mutex); 899 900 return 0; 901 } 902 903 pinned_copy = kmalloc(sizeof(*pinned_copy), GFP_NOFS); 904 if (!pinned_copy) 905 return -ENOMEM; 906 907 extent_io_tree_init(pinned_copy, 908 root->fs_info->btree_inode->i_mapping, GFP_NOFS); 909 910 trans->transaction->in_commit = 1; 911 trans->transaction->blocked = 1; 912 if (cur_trans->list.prev != &root->fs_info->trans_list) { 913 prev_trans = list_entry(cur_trans->list.prev, 914 struct btrfs_transaction, list); 915 if (!prev_trans->commit_done) { 916 prev_trans->use_count++; 917 mutex_unlock(&root->fs_info->trans_mutex); 918 919 wait_for_commit(root, prev_trans); 920 921 mutex_lock(&root->fs_info->trans_mutex); 922 put_transaction(prev_trans); 923 } 924 } 925 926 if (now < cur_trans->start_time || now - cur_trans->start_time < 1) 927 should_grow = 1; 928 929 do { 930 int snap_pending = 0; 931 joined = cur_trans->num_joined; 932 if (!list_empty(&trans->transaction->pending_snapshots)) 933 snap_pending = 1; 934 935 WARN_ON(cur_trans != trans->transaction); 936 prepare_to_wait(&cur_trans->writer_wait, &wait, 937 TASK_UNINTERRUPTIBLE); 938 939 if (cur_trans->num_writers > 1) 940 timeout = MAX_SCHEDULE_TIMEOUT; 941 else if (should_grow) 942 timeout = 1; 943 944 mutex_unlock(&root->fs_info->trans_mutex); 945 946 if (flush_on_commit || snap_pending) { 947 if (flush_on_commit) 948 btrfs_start_delalloc_inodes(root); 949 ret = btrfs_wait_ordered_extents(root, 1); 950 BUG_ON(ret); 951 } 952 953 /* 954 * rename don't use btrfs_join_transaction, so, once we 955 * set the transaction to blocked above, we aren't going 956 * to get any new ordered operations. We can safely run 957 * it here and no for sure that nothing new will be added 958 * to the list 959 */ 960 btrfs_run_ordered_operations(root, 1); 961 962 smp_mb(); 963 if (cur_trans->num_writers > 1 || should_grow) 964 schedule_timeout(timeout); 965 966 mutex_lock(&root->fs_info->trans_mutex); 967 finish_wait(&cur_trans->writer_wait, &wait); 968 } while (cur_trans->num_writers > 1 || 969 (should_grow && cur_trans->num_joined != joined)); 970 971 ret = create_pending_snapshots(trans, root->fs_info); 972 BUG_ON(ret); 973 974 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 975 BUG_ON(ret); 976 977 WARN_ON(cur_trans != trans->transaction); 978 979 /* btrfs_commit_tree_roots is responsible for getting the 980 * various roots consistent with each other. Every pointer 981 * in the tree of tree roots has to point to the most up to date 982 * root for every subvolume and other tree. So, we have to keep 983 * the tree logging code from jumping in and changing any 984 * of the trees. 985 * 986 * At this point in the commit, there can't be any tree-log 987 * writers, but a little lower down we drop the trans mutex 988 * and let new people in. By holding the tree_log_mutex 989 * from now until after the super is written, we avoid races 990 * with the tree-log code. 991 */ 992 mutex_lock(&root->fs_info->tree_log_mutex); 993 994 ret = commit_fs_roots(trans, root); 995 BUG_ON(ret); 996 997 /* commit_fs_roots gets rid of all the tree log roots, it is now 998 * safe to free the root of tree log roots 999 */ 1000 btrfs_free_log_root_tree(trans, root->fs_info); 1001 1002 ret = commit_cowonly_roots(trans, root); 1003 BUG_ON(ret); 1004 1005 cur_trans = root->fs_info->running_transaction; 1006 spin_lock(&root->fs_info->new_trans_lock); 1007 root->fs_info->running_transaction = NULL; 1008 spin_unlock(&root->fs_info->new_trans_lock); 1009 1010 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1011 root->fs_info->tree_root->node); 1012 free_extent_buffer(root->fs_info->tree_root->commit_root); 1013 root->fs_info->tree_root->commit_root = 1014 btrfs_root_node(root->fs_info->tree_root); 1015 1016 btrfs_set_root_node(&root->fs_info->chunk_root->root_item, 1017 root->fs_info->chunk_root->node); 1018 free_extent_buffer(root->fs_info->chunk_root->commit_root); 1019 root->fs_info->chunk_root->commit_root = 1020 btrfs_root_node(root->fs_info->chunk_root); 1021 1022 update_super_roots(root); 1023 1024 if (!root->fs_info->log_root_recovering) { 1025 btrfs_set_super_log_root(&root->fs_info->super_copy, 0); 1026 btrfs_set_super_log_root_level(&root->fs_info->super_copy, 0); 1027 } 1028 1029 memcpy(&root->fs_info->super_for_commit, &root->fs_info->super_copy, 1030 sizeof(root->fs_info->super_copy)); 1031 1032 btrfs_copy_pinned(root, pinned_copy); 1033 1034 trans->transaction->blocked = 0; 1035 1036 wake_up(&root->fs_info->transaction_wait); 1037 1038 mutex_unlock(&root->fs_info->trans_mutex); 1039 ret = btrfs_write_and_wait_transaction(trans, root); 1040 BUG_ON(ret); 1041 write_ctree_super(trans, root, 0); 1042 1043 /* 1044 * the super is written, we can safely allow the tree-loggers 1045 * to go about their business 1046 */ 1047 mutex_unlock(&root->fs_info->tree_log_mutex); 1048 1049 btrfs_finish_extent_commit(trans, root, pinned_copy); 1050 kfree(pinned_copy); 1051 1052 /* do the directory inserts of any pending snapshot creations */ 1053 finish_pending_snapshots(trans, root->fs_info); 1054 1055 mutex_lock(&root->fs_info->trans_mutex); 1056 1057 cur_trans->commit_done = 1; 1058 1059 root->fs_info->last_trans_committed = cur_trans->transid; 1060 wake_up(&cur_trans->commit_wait); 1061 1062 put_transaction(cur_trans); 1063 put_transaction(cur_trans); 1064 1065 mutex_unlock(&root->fs_info->trans_mutex); 1066 1067 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1068 return ret; 1069 } 1070 1071 /* 1072 * interface function to delete all the snapshots we have scheduled for deletion 1073 */ 1074 int btrfs_clean_old_snapshots(struct btrfs_root *root) 1075 { 1076 LIST_HEAD(list); 1077 struct btrfs_fs_info *fs_info = root->fs_info; 1078 1079 mutex_lock(&fs_info->trans_mutex); 1080 list_splice_init(&fs_info->dead_roots, &list); 1081 mutex_unlock(&fs_info->trans_mutex); 1082 1083 while (!list_empty(&list)) { 1084 root = list_entry(list.next, struct btrfs_root, root_list); 1085 list_del_init(&root->root_list); 1086 btrfs_drop_snapshot(root, 0); 1087 } 1088 return 0; 1089 } 1090