1 /* 2 * Copyright (C) 2007 Oracle. All rights reserved. 3 * 4 * This program is free software; you can redistribute it and/or 5 * modify it under the terms of the GNU General Public 6 * License v2 as published by the Free Software Foundation. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 11 * General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public 14 * License along with this program; if not, write to the 15 * Free Software Foundation, Inc., 59 Temple Place - Suite 330, 16 * Boston, MA 021110-1307, USA. 17 */ 18 19 #include <linux/fs.h> 20 #include <linux/slab.h> 21 #include <linux/sched.h> 22 #include <linux/writeback.h> 23 #include <linux/pagemap.h> 24 #include <linux/blkdev.h> 25 #include <linux/uuid.h> 26 #include "ctree.h" 27 #include "disk-io.h" 28 #include "transaction.h" 29 #include "locking.h" 30 #include "tree-log.h" 31 #include "inode-map.h" 32 #include "volumes.h" 33 34 #define BTRFS_ROOT_TRANS_TAG 0 35 36 void put_transaction(struct btrfs_transaction *transaction) 37 { 38 WARN_ON(atomic_read(&transaction->use_count) == 0); 39 if (atomic_dec_and_test(&transaction->use_count)) { 40 BUG_ON(!list_empty(&transaction->list)); 41 WARN_ON(transaction->delayed_refs.root.rb_node); 42 memset(transaction, 0, sizeof(*transaction)); 43 kmem_cache_free(btrfs_transaction_cachep, transaction); 44 } 45 } 46 47 static noinline void switch_commit_root(struct btrfs_root *root) 48 { 49 free_extent_buffer(root->commit_root); 50 root->commit_root = btrfs_root_node(root); 51 } 52 53 /* 54 * either allocate a new transaction or hop into the existing one 55 */ 56 static noinline int join_transaction(struct btrfs_root *root, int nofail) 57 { 58 struct btrfs_transaction *cur_trans; 59 struct btrfs_fs_info *fs_info = root->fs_info; 60 61 spin_lock(&fs_info->trans_lock); 62 loop: 63 /* The file system has been taken offline. No new transactions. */ 64 if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 65 spin_unlock(&fs_info->trans_lock); 66 return -EROFS; 67 } 68 69 if (fs_info->trans_no_join) { 70 if (!nofail) { 71 spin_unlock(&fs_info->trans_lock); 72 return -EBUSY; 73 } 74 } 75 76 cur_trans = fs_info->running_transaction; 77 if (cur_trans) { 78 if (cur_trans->aborted) { 79 spin_unlock(&fs_info->trans_lock); 80 return cur_trans->aborted; 81 } 82 atomic_inc(&cur_trans->use_count); 83 atomic_inc(&cur_trans->num_writers); 84 cur_trans->num_joined++; 85 spin_unlock(&fs_info->trans_lock); 86 return 0; 87 } 88 spin_unlock(&fs_info->trans_lock); 89 90 cur_trans = kmem_cache_alloc(btrfs_transaction_cachep, GFP_NOFS); 91 if (!cur_trans) 92 return -ENOMEM; 93 94 spin_lock(&fs_info->trans_lock); 95 if (fs_info->running_transaction) { 96 /* 97 * someone started a transaction after we unlocked. Make sure 98 * to redo the trans_no_join checks above 99 */ 100 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 101 cur_trans = fs_info->running_transaction; 102 goto loop; 103 } else if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 104 spin_unlock(&fs_info->trans_lock); 105 kmem_cache_free(btrfs_transaction_cachep, cur_trans); 106 return -EROFS; 107 } 108 109 atomic_set(&cur_trans->num_writers, 1); 110 cur_trans->num_joined = 0; 111 init_waitqueue_head(&cur_trans->writer_wait); 112 init_waitqueue_head(&cur_trans->commit_wait); 113 cur_trans->in_commit = 0; 114 cur_trans->blocked = 0; 115 /* 116 * One for this trans handle, one so it will live on until we 117 * commit the transaction. 118 */ 119 atomic_set(&cur_trans->use_count, 2); 120 cur_trans->commit_done = 0; 121 cur_trans->start_time = get_seconds(); 122 123 cur_trans->delayed_refs.root = RB_ROOT; 124 cur_trans->delayed_refs.num_entries = 0; 125 cur_trans->delayed_refs.num_heads_ready = 0; 126 cur_trans->delayed_refs.num_heads = 0; 127 cur_trans->delayed_refs.flushing = 0; 128 cur_trans->delayed_refs.run_delayed_start = 0; 129 130 /* 131 * although the tree mod log is per file system and not per transaction, 132 * the log must never go across transaction boundaries. 133 */ 134 smp_mb(); 135 if (!list_empty(&fs_info->tree_mod_seq_list)) { 136 printk(KERN_ERR "btrfs: tree_mod_seq_list not empty when " 137 "creating a fresh transaction\n"); 138 WARN_ON(1); 139 } 140 if (!RB_EMPTY_ROOT(&fs_info->tree_mod_log)) { 141 printk(KERN_ERR "btrfs: tree_mod_log rb tree not empty when " 142 "creating a fresh transaction\n"); 143 WARN_ON(1); 144 } 145 atomic_set(&fs_info->tree_mod_seq, 0); 146 147 spin_lock_init(&cur_trans->commit_lock); 148 spin_lock_init(&cur_trans->delayed_refs.lock); 149 150 INIT_LIST_HEAD(&cur_trans->pending_snapshots); 151 list_add_tail(&cur_trans->list, &fs_info->trans_list); 152 extent_io_tree_init(&cur_trans->dirty_pages, 153 fs_info->btree_inode->i_mapping); 154 fs_info->generation++; 155 cur_trans->transid = fs_info->generation; 156 fs_info->running_transaction = cur_trans; 157 cur_trans->aborted = 0; 158 spin_unlock(&fs_info->trans_lock); 159 160 return 0; 161 } 162 163 /* 164 * this does all the record keeping required to make sure that a reference 165 * counted root is properly recorded in a given transaction. This is required 166 * to make sure the old root from before we joined the transaction is deleted 167 * when the transaction commits 168 */ 169 static int record_root_in_trans(struct btrfs_trans_handle *trans, 170 struct btrfs_root *root) 171 { 172 if (root->ref_cows && root->last_trans < trans->transid) { 173 WARN_ON(root == root->fs_info->extent_root); 174 WARN_ON(root->commit_root != root->node); 175 176 /* 177 * see below for in_trans_setup usage rules 178 * we have the reloc mutex held now, so there 179 * is only one writer in this function 180 */ 181 root->in_trans_setup = 1; 182 183 /* make sure readers find in_trans_setup before 184 * they find our root->last_trans update 185 */ 186 smp_wmb(); 187 188 spin_lock(&root->fs_info->fs_roots_radix_lock); 189 if (root->last_trans == trans->transid) { 190 spin_unlock(&root->fs_info->fs_roots_radix_lock); 191 return 0; 192 } 193 radix_tree_tag_set(&root->fs_info->fs_roots_radix, 194 (unsigned long)root->root_key.objectid, 195 BTRFS_ROOT_TRANS_TAG); 196 spin_unlock(&root->fs_info->fs_roots_radix_lock); 197 root->last_trans = trans->transid; 198 199 /* this is pretty tricky. We don't want to 200 * take the relocation lock in btrfs_record_root_in_trans 201 * unless we're really doing the first setup for this root in 202 * this transaction. 203 * 204 * Normally we'd use root->last_trans as a flag to decide 205 * if we want to take the expensive mutex. 206 * 207 * But, we have to set root->last_trans before we 208 * init the relocation root, otherwise, we trip over warnings 209 * in ctree.c. The solution used here is to flag ourselves 210 * with root->in_trans_setup. When this is 1, we're still 211 * fixing up the reloc trees and everyone must wait. 212 * 213 * When this is zero, they can trust root->last_trans and fly 214 * through btrfs_record_root_in_trans without having to take the 215 * lock. smp_wmb() makes sure that all the writes above are 216 * done before we pop in the zero below 217 */ 218 btrfs_init_reloc_root(trans, root); 219 smp_wmb(); 220 root->in_trans_setup = 0; 221 } 222 return 0; 223 } 224 225 226 int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, 227 struct btrfs_root *root) 228 { 229 if (!root->ref_cows) 230 return 0; 231 232 /* 233 * see record_root_in_trans for comments about in_trans_setup usage 234 * and barriers 235 */ 236 smp_rmb(); 237 if (root->last_trans == trans->transid && 238 !root->in_trans_setup) 239 return 0; 240 241 mutex_lock(&root->fs_info->reloc_mutex); 242 record_root_in_trans(trans, root); 243 mutex_unlock(&root->fs_info->reloc_mutex); 244 245 return 0; 246 } 247 248 /* wait for commit against the current transaction to become unblocked 249 * when this is done, it is safe to start a new transaction, but the current 250 * transaction might not be fully on disk. 251 */ 252 static void wait_current_trans(struct btrfs_root *root) 253 { 254 struct btrfs_transaction *cur_trans; 255 256 spin_lock(&root->fs_info->trans_lock); 257 cur_trans = root->fs_info->running_transaction; 258 if (cur_trans && cur_trans->blocked) { 259 atomic_inc(&cur_trans->use_count); 260 spin_unlock(&root->fs_info->trans_lock); 261 262 wait_event(root->fs_info->transaction_wait, 263 !cur_trans->blocked); 264 put_transaction(cur_trans); 265 } else { 266 spin_unlock(&root->fs_info->trans_lock); 267 } 268 } 269 270 enum btrfs_trans_type { 271 TRANS_START, 272 TRANS_JOIN, 273 TRANS_USERSPACE, 274 TRANS_JOIN_NOLOCK, 275 }; 276 277 static int may_wait_transaction(struct btrfs_root *root, int type) 278 { 279 if (root->fs_info->log_root_recovering) 280 return 0; 281 282 if (type == TRANS_USERSPACE) 283 return 1; 284 285 if (type == TRANS_START && 286 !atomic_read(&root->fs_info->open_ioctl_trans)) 287 return 1; 288 289 return 0; 290 } 291 292 static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, 293 u64 num_items, int type) 294 { 295 struct btrfs_trans_handle *h; 296 struct btrfs_transaction *cur_trans; 297 u64 num_bytes = 0; 298 int ret; 299 u64 qgroup_reserved = 0; 300 301 if (root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) 302 return ERR_PTR(-EROFS); 303 304 if (current->journal_info) { 305 WARN_ON(type != TRANS_JOIN && type != TRANS_JOIN_NOLOCK); 306 h = current->journal_info; 307 h->use_count++; 308 h->orig_rsv = h->block_rsv; 309 h->block_rsv = NULL; 310 goto got_it; 311 } 312 313 /* 314 * Do the reservation before we join the transaction so we can do all 315 * the appropriate flushing if need be. 316 */ 317 if (num_items > 0 && root != root->fs_info->chunk_root) { 318 if (root->fs_info->quota_enabled && 319 is_fstree(root->root_key.objectid)) { 320 qgroup_reserved = num_items * root->leafsize; 321 ret = btrfs_qgroup_reserve(root, qgroup_reserved); 322 if (ret) 323 return ERR_PTR(ret); 324 } 325 326 num_bytes = btrfs_calc_trans_metadata_size(root, num_items); 327 ret = btrfs_block_rsv_add(root, 328 &root->fs_info->trans_block_rsv, 329 num_bytes); 330 if (ret) 331 return ERR_PTR(ret); 332 } 333 again: 334 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 335 if (!h) 336 return ERR_PTR(-ENOMEM); 337 338 if (may_wait_transaction(root, type)) 339 wait_current_trans(root); 340 341 do { 342 ret = join_transaction(root, type == TRANS_JOIN_NOLOCK); 343 if (ret == -EBUSY) 344 wait_current_trans(root); 345 } while (ret == -EBUSY); 346 347 if (ret < 0) { 348 kmem_cache_free(btrfs_trans_handle_cachep, h); 349 return ERR_PTR(ret); 350 } 351 352 cur_trans = root->fs_info->running_transaction; 353 354 h->transid = cur_trans->transid; 355 h->transaction = cur_trans; 356 h->blocks_used = 0; 357 h->bytes_reserved = 0; 358 h->root = root; 359 h->delayed_ref_updates = 0; 360 h->use_count = 1; 361 h->adding_csums = 0; 362 h->block_rsv = NULL; 363 h->orig_rsv = NULL; 364 h->aborted = 0; 365 h->qgroup_reserved = qgroup_reserved; 366 h->delayed_ref_elem.seq = 0; 367 INIT_LIST_HEAD(&h->qgroup_ref_list); 368 369 smp_mb(); 370 if (cur_trans->blocked && may_wait_transaction(root, type)) { 371 btrfs_commit_transaction(h, root); 372 goto again; 373 } 374 375 if (num_bytes) { 376 trace_btrfs_space_reservation(root->fs_info, "transaction", 377 h->transid, num_bytes, 1); 378 h->block_rsv = &root->fs_info->trans_block_rsv; 379 h->bytes_reserved = num_bytes; 380 } 381 382 got_it: 383 btrfs_record_root_in_trans(h, root); 384 385 if (!current->journal_info && type != TRANS_USERSPACE) 386 current->journal_info = h; 387 return h; 388 } 389 390 struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 391 int num_items) 392 { 393 return start_transaction(root, num_items, TRANS_START); 394 } 395 struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root) 396 { 397 return start_transaction(root, 0, TRANS_JOIN); 398 } 399 400 struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root) 401 { 402 return start_transaction(root, 0, TRANS_JOIN_NOLOCK); 403 } 404 405 struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *root) 406 { 407 return start_transaction(root, 0, TRANS_USERSPACE); 408 } 409 410 /* wait for a transaction commit to be fully complete */ 411 static noinline void wait_for_commit(struct btrfs_root *root, 412 struct btrfs_transaction *commit) 413 { 414 wait_event(commit->commit_wait, commit->commit_done); 415 } 416 417 int btrfs_wait_for_commit(struct btrfs_root *root, u64 transid) 418 { 419 struct btrfs_transaction *cur_trans = NULL, *t; 420 int ret; 421 422 ret = 0; 423 if (transid) { 424 if (transid <= root->fs_info->last_trans_committed) 425 goto out; 426 427 /* find specified transaction */ 428 spin_lock(&root->fs_info->trans_lock); 429 list_for_each_entry(t, &root->fs_info->trans_list, list) { 430 if (t->transid == transid) { 431 cur_trans = t; 432 atomic_inc(&cur_trans->use_count); 433 break; 434 } 435 if (t->transid > transid) 436 break; 437 } 438 spin_unlock(&root->fs_info->trans_lock); 439 ret = -EINVAL; 440 if (!cur_trans) 441 goto out; /* bad transid */ 442 } else { 443 /* find newest transaction that is committing | committed */ 444 spin_lock(&root->fs_info->trans_lock); 445 list_for_each_entry_reverse(t, &root->fs_info->trans_list, 446 list) { 447 if (t->in_commit) { 448 if (t->commit_done) 449 break; 450 cur_trans = t; 451 atomic_inc(&cur_trans->use_count); 452 break; 453 } 454 } 455 spin_unlock(&root->fs_info->trans_lock); 456 if (!cur_trans) 457 goto out; /* nothing committing|committed */ 458 } 459 460 wait_for_commit(root, cur_trans); 461 462 put_transaction(cur_trans); 463 ret = 0; 464 out: 465 return ret; 466 } 467 468 void btrfs_throttle(struct btrfs_root *root) 469 { 470 if (!atomic_read(&root->fs_info->open_ioctl_trans)) 471 wait_current_trans(root); 472 } 473 474 static int should_end_transaction(struct btrfs_trans_handle *trans, 475 struct btrfs_root *root) 476 { 477 int ret; 478 479 ret = btrfs_block_rsv_check(root, &root->fs_info->global_block_rsv, 5); 480 return ret ? 1 : 0; 481 } 482 483 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, 484 struct btrfs_root *root) 485 { 486 struct btrfs_transaction *cur_trans = trans->transaction; 487 int updates; 488 int err; 489 490 smp_mb(); 491 if (cur_trans->blocked || cur_trans->delayed_refs.flushing) 492 return 1; 493 494 updates = trans->delayed_ref_updates; 495 trans->delayed_ref_updates = 0; 496 if (updates) { 497 err = btrfs_run_delayed_refs(trans, root, updates); 498 if (err) /* Error code will also eval true */ 499 return err; 500 } 501 502 return should_end_transaction(trans, root); 503 } 504 505 static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, 506 struct btrfs_root *root, int throttle, int lock) 507 { 508 struct btrfs_transaction *cur_trans = trans->transaction; 509 struct btrfs_fs_info *info = root->fs_info; 510 int count = 0; 511 int err = 0; 512 513 if (--trans->use_count) { 514 trans->block_rsv = trans->orig_rsv; 515 return 0; 516 } 517 518 /* 519 * do the qgroup accounting as early as possible 520 */ 521 err = btrfs_delayed_refs_qgroup_accounting(trans, info); 522 523 btrfs_trans_release_metadata(trans, root); 524 trans->block_rsv = NULL; 525 /* 526 * the same root has to be passed to start_transaction and 527 * end_transaction. Subvolume quota depends on this. 528 */ 529 WARN_ON(trans->root != root); 530 531 if (trans->qgroup_reserved) { 532 btrfs_qgroup_free(root, trans->qgroup_reserved); 533 trans->qgroup_reserved = 0; 534 } 535 536 while (count < 2) { 537 unsigned long cur = trans->delayed_ref_updates; 538 trans->delayed_ref_updates = 0; 539 if (cur && 540 trans->transaction->delayed_refs.num_heads_ready > 64) { 541 trans->delayed_ref_updates = 0; 542 btrfs_run_delayed_refs(trans, root, cur); 543 } else { 544 break; 545 } 546 count++; 547 } 548 btrfs_trans_release_metadata(trans, root); 549 trans->block_rsv = NULL; 550 551 if (lock && !atomic_read(&root->fs_info->open_ioctl_trans) && 552 should_end_transaction(trans, root)) { 553 trans->transaction->blocked = 1; 554 smp_wmb(); 555 } 556 557 if (lock && cur_trans->blocked && !cur_trans->in_commit) { 558 if (throttle) { 559 /* 560 * We may race with somebody else here so end up having 561 * to call end_transaction on ourselves again, so inc 562 * our use_count. 563 */ 564 trans->use_count++; 565 return btrfs_commit_transaction(trans, root); 566 } else { 567 wake_up_process(info->transaction_kthread); 568 } 569 } 570 571 WARN_ON(cur_trans != info->running_transaction); 572 WARN_ON(atomic_read(&cur_trans->num_writers) < 1); 573 atomic_dec(&cur_trans->num_writers); 574 575 smp_mb(); 576 if (waitqueue_active(&cur_trans->writer_wait)) 577 wake_up(&cur_trans->writer_wait); 578 put_transaction(cur_trans); 579 580 if (current->journal_info == trans) 581 current->journal_info = NULL; 582 583 if (throttle) 584 btrfs_run_delayed_iputs(root); 585 586 if (trans->aborted || 587 root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { 588 err = -EIO; 589 } 590 assert_qgroups_uptodate(trans); 591 592 memset(trans, 0, sizeof(*trans)); 593 kmem_cache_free(btrfs_trans_handle_cachep, trans); 594 return err; 595 } 596 597 int btrfs_end_transaction(struct btrfs_trans_handle *trans, 598 struct btrfs_root *root) 599 { 600 int ret; 601 602 ret = __btrfs_end_transaction(trans, root, 0, 1); 603 if (ret) 604 return ret; 605 return 0; 606 } 607 608 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, 609 struct btrfs_root *root) 610 { 611 int ret; 612 613 ret = __btrfs_end_transaction(trans, root, 1, 1); 614 if (ret) 615 return ret; 616 return 0; 617 } 618 619 int btrfs_end_transaction_nolock(struct btrfs_trans_handle *trans, 620 struct btrfs_root *root) 621 { 622 int ret; 623 624 ret = __btrfs_end_transaction(trans, root, 0, 0); 625 if (ret) 626 return ret; 627 return 0; 628 } 629 630 int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans, 631 struct btrfs_root *root) 632 { 633 return __btrfs_end_transaction(trans, root, 1, 1); 634 } 635 636 /* 637 * when btree blocks are allocated, they have some corresponding bits set for 638 * them in one of two extent_io trees. This is used to make sure all of 639 * those extents are sent to disk but does not wait on them 640 */ 641 int btrfs_write_marked_extents(struct btrfs_root *root, 642 struct extent_io_tree *dirty_pages, int mark) 643 { 644 int err = 0; 645 int werr = 0; 646 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; 647 u64 start = 0; 648 u64 end; 649 650 while (!find_first_extent_bit(dirty_pages, start, &start, &end, 651 mark)) { 652 convert_extent_bit(dirty_pages, start, end, EXTENT_NEED_WAIT, mark, 653 GFP_NOFS); 654 err = filemap_fdatawrite_range(mapping, start, end); 655 if (err) 656 werr = err; 657 cond_resched(); 658 start = end + 1; 659 } 660 if (err) 661 werr = err; 662 return werr; 663 } 664 665 /* 666 * when btree blocks are allocated, they have some corresponding bits set for 667 * them in one of two extent_io trees. This is used to make sure all of 668 * those extents are on disk for transaction or log commit. We wait 669 * on all the pages and clear them from the dirty pages state tree 670 */ 671 int btrfs_wait_marked_extents(struct btrfs_root *root, 672 struct extent_io_tree *dirty_pages, int mark) 673 { 674 int err = 0; 675 int werr = 0; 676 struct address_space *mapping = root->fs_info->btree_inode->i_mapping; 677 u64 start = 0; 678 u64 end; 679 680 while (!find_first_extent_bit(dirty_pages, start, &start, &end, 681 EXTENT_NEED_WAIT)) { 682 clear_extent_bits(dirty_pages, start, end, EXTENT_NEED_WAIT, GFP_NOFS); 683 err = filemap_fdatawait_range(mapping, start, end); 684 if (err) 685 werr = err; 686 cond_resched(); 687 start = end + 1; 688 } 689 if (err) 690 werr = err; 691 return werr; 692 } 693 694 /* 695 * when btree blocks are allocated, they have some corresponding bits set for 696 * them in one of two extent_io trees. This is used to make sure all of 697 * those extents are on disk for transaction or log commit 698 */ 699 int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, 700 struct extent_io_tree *dirty_pages, int mark) 701 { 702 int ret; 703 int ret2; 704 705 ret = btrfs_write_marked_extents(root, dirty_pages, mark); 706 ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark); 707 708 if (ret) 709 return ret; 710 if (ret2) 711 return ret2; 712 return 0; 713 } 714 715 int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, 716 struct btrfs_root *root) 717 { 718 if (!trans || !trans->transaction) { 719 struct inode *btree_inode; 720 btree_inode = root->fs_info->btree_inode; 721 return filemap_write_and_wait(btree_inode->i_mapping); 722 } 723 return btrfs_write_and_wait_marked_extents(root, 724 &trans->transaction->dirty_pages, 725 EXTENT_DIRTY); 726 } 727 728 /* 729 * this is used to update the root pointer in the tree of tree roots. 730 * 731 * But, in the case of the extent allocation tree, updating the root 732 * pointer may allocate blocks which may change the root of the extent 733 * allocation tree. 734 * 735 * So, this loops and repeats and makes sure the cowonly root didn't 736 * change while the root pointer was being updated in the metadata. 737 */ 738 static int update_cowonly_root(struct btrfs_trans_handle *trans, 739 struct btrfs_root *root) 740 { 741 int ret; 742 u64 old_root_bytenr; 743 u64 old_root_used; 744 struct btrfs_root *tree_root = root->fs_info->tree_root; 745 746 old_root_used = btrfs_root_used(&root->root_item); 747 btrfs_write_dirty_block_groups(trans, root); 748 749 while (1) { 750 old_root_bytenr = btrfs_root_bytenr(&root->root_item); 751 if (old_root_bytenr == root->node->start && 752 old_root_used == btrfs_root_used(&root->root_item)) 753 break; 754 755 btrfs_set_root_node(&root->root_item, root->node); 756 ret = btrfs_update_root(trans, tree_root, 757 &root->root_key, 758 &root->root_item); 759 if (ret) 760 return ret; 761 762 old_root_used = btrfs_root_used(&root->root_item); 763 ret = btrfs_write_dirty_block_groups(trans, root); 764 if (ret) 765 return ret; 766 } 767 768 if (root != root->fs_info->extent_root) 769 switch_commit_root(root); 770 771 return 0; 772 } 773 774 /* 775 * update all the cowonly tree roots on disk 776 * 777 * The error handling in this function may not be obvious. Any of the 778 * failures will cause the file system to go offline. We still need 779 * to clean up the delayed refs. 780 */ 781 static noinline int commit_cowonly_roots(struct btrfs_trans_handle *trans, 782 struct btrfs_root *root) 783 { 784 struct btrfs_fs_info *fs_info = root->fs_info; 785 struct list_head *next; 786 struct extent_buffer *eb; 787 int ret; 788 789 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 790 if (ret) 791 return ret; 792 793 eb = btrfs_lock_root_node(fs_info->tree_root); 794 ret = btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 795 0, &eb); 796 btrfs_tree_unlock(eb); 797 free_extent_buffer(eb); 798 799 if (ret) 800 return ret; 801 802 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 803 if (ret) 804 return ret; 805 806 ret = btrfs_run_dev_stats(trans, root->fs_info); 807 BUG_ON(ret); 808 809 ret = btrfs_run_qgroups(trans, root->fs_info); 810 BUG_ON(ret); 811 812 /* run_qgroups might have added some more refs */ 813 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 814 BUG_ON(ret); 815 816 while (!list_empty(&fs_info->dirty_cowonly_roots)) { 817 next = fs_info->dirty_cowonly_roots.next; 818 list_del_init(next); 819 root = list_entry(next, struct btrfs_root, dirty_list); 820 821 ret = update_cowonly_root(trans, root); 822 if (ret) 823 return ret; 824 } 825 826 down_write(&fs_info->extent_commit_sem); 827 switch_commit_root(fs_info->extent_root); 828 up_write(&fs_info->extent_commit_sem); 829 830 return 0; 831 } 832 833 /* 834 * dead roots are old snapshots that need to be deleted. This allocates 835 * a dirty root struct and adds it into the list of dead roots that need to 836 * be deleted 837 */ 838 int btrfs_add_dead_root(struct btrfs_root *root) 839 { 840 spin_lock(&root->fs_info->trans_lock); 841 list_add(&root->root_list, &root->fs_info->dead_roots); 842 spin_unlock(&root->fs_info->trans_lock); 843 return 0; 844 } 845 846 /* 847 * update all the cowonly tree roots on disk 848 */ 849 static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, 850 struct btrfs_root *root) 851 { 852 struct btrfs_root *gang[8]; 853 struct btrfs_fs_info *fs_info = root->fs_info; 854 int i; 855 int ret; 856 int err = 0; 857 858 spin_lock(&fs_info->fs_roots_radix_lock); 859 while (1) { 860 ret = radix_tree_gang_lookup_tag(&fs_info->fs_roots_radix, 861 (void **)gang, 0, 862 ARRAY_SIZE(gang), 863 BTRFS_ROOT_TRANS_TAG); 864 if (ret == 0) 865 break; 866 for (i = 0; i < ret; i++) { 867 root = gang[i]; 868 radix_tree_tag_clear(&fs_info->fs_roots_radix, 869 (unsigned long)root->root_key.objectid, 870 BTRFS_ROOT_TRANS_TAG); 871 spin_unlock(&fs_info->fs_roots_radix_lock); 872 873 btrfs_free_log(trans, root); 874 btrfs_update_reloc_root(trans, root); 875 btrfs_orphan_commit_root(trans, root); 876 877 btrfs_save_ino_cache(root, trans); 878 879 /* see comments in should_cow_block() */ 880 root->force_cow = 0; 881 smp_wmb(); 882 883 if (root->commit_root != root->node) { 884 mutex_lock(&root->fs_commit_mutex); 885 switch_commit_root(root); 886 btrfs_unpin_free_ino(root); 887 mutex_unlock(&root->fs_commit_mutex); 888 889 btrfs_set_root_node(&root->root_item, 890 root->node); 891 } 892 893 err = btrfs_update_root(trans, fs_info->tree_root, 894 &root->root_key, 895 &root->root_item); 896 spin_lock(&fs_info->fs_roots_radix_lock); 897 if (err) 898 break; 899 } 900 } 901 spin_unlock(&fs_info->fs_roots_radix_lock); 902 return err; 903 } 904 905 /* 906 * defrag a given btree. If cacheonly == 1, this won't read from the disk, 907 * otherwise every leaf in the btree is read and defragged. 908 */ 909 int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) 910 { 911 struct btrfs_fs_info *info = root->fs_info; 912 struct btrfs_trans_handle *trans; 913 int ret; 914 unsigned long nr; 915 916 if (xchg(&root->defrag_running, 1)) 917 return 0; 918 919 while (1) { 920 trans = btrfs_start_transaction(root, 0); 921 if (IS_ERR(trans)) 922 return PTR_ERR(trans); 923 924 ret = btrfs_defrag_leaves(trans, root, cacheonly); 925 926 nr = trans->blocks_used; 927 btrfs_end_transaction(trans, root); 928 btrfs_btree_balance_dirty(info->tree_root, nr); 929 cond_resched(); 930 931 if (btrfs_fs_closing(root->fs_info) || ret != -EAGAIN) 932 break; 933 } 934 root->defrag_running = 0; 935 return ret; 936 } 937 938 /* 939 * new snapshots need to be created at a very specific time in the 940 * transaction commit. This does the actual creation 941 */ 942 static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, 943 struct btrfs_fs_info *fs_info, 944 struct btrfs_pending_snapshot *pending) 945 { 946 struct btrfs_key key; 947 struct btrfs_root_item *new_root_item; 948 struct btrfs_root *tree_root = fs_info->tree_root; 949 struct btrfs_root *root = pending->root; 950 struct btrfs_root *parent_root; 951 struct btrfs_block_rsv *rsv; 952 struct inode *parent_inode; 953 struct dentry *parent; 954 struct dentry *dentry; 955 struct extent_buffer *tmp; 956 struct extent_buffer *old; 957 struct timespec cur_time = CURRENT_TIME; 958 int ret; 959 u64 to_reserve = 0; 960 u64 index = 0; 961 u64 objectid; 962 u64 root_flags; 963 uuid_le new_uuid; 964 965 rsv = trans->block_rsv; 966 967 new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); 968 if (!new_root_item) { 969 ret = pending->error = -ENOMEM; 970 goto fail; 971 } 972 973 ret = btrfs_find_free_objectid(tree_root, &objectid); 974 if (ret) { 975 pending->error = ret; 976 goto fail; 977 } 978 979 btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); 980 981 if (to_reserve > 0) { 982 ret = btrfs_block_rsv_add_noflush(root, &pending->block_rsv, 983 to_reserve); 984 if (ret) { 985 pending->error = ret; 986 goto fail; 987 } 988 } 989 990 ret = btrfs_qgroup_inherit(trans, fs_info, root->root_key.objectid, 991 objectid, pending->inherit); 992 kfree(pending->inherit); 993 if (ret) { 994 pending->error = ret; 995 goto fail; 996 } 997 998 key.objectid = objectid; 999 key.offset = (u64)-1; 1000 key.type = BTRFS_ROOT_ITEM_KEY; 1001 1002 trans->block_rsv = &pending->block_rsv; 1003 1004 dentry = pending->dentry; 1005 parent = dget_parent(dentry); 1006 parent_inode = parent->d_inode; 1007 parent_root = BTRFS_I(parent_inode)->root; 1008 record_root_in_trans(trans, parent_root); 1009 1010 /* 1011 * insert the directory item 1012 */ 1013 ret = btrfs_set_inode_index(parent_inode, &index); 1014 BUG_ON(ret); /* -ENOMEM */ 1015 ret = btrfs_insert_dir_item(trans, parent_root, 1016 dentry->d_name.name, dentry->d_name.len, 1017 parent_inode, &key, 1018 BTRFS_FT_DIR, index); 1019 if (ret == -EEXIST) { 1020 pending->error = -EEXIST; 1021 dput(parent); 1022 goto fail; 1023 } else if (ret) { 1024 goto abort_trans_dput; 1025 } 1026 1027 btrfs_i_size_write(parent_inode, parent_inode->i_size + 1028 dentry->d_name.len * 2); 1029 ret = btrfs_update_inode(trans, parent_root, parent_inode); 1030 if (ret) 1031 goto abort_trans_dput; 1032 1033 /* 1034 * pull in the delayed directory update 1035 * and the delayed inode item 1036 * otherwise we corrupt the FS during 1037 * snapshot 1038 */ 1039 ret = btrfs_run_delayed_items(trans, root); 1040 if (ret) { /* Transaction aborted */ 1041 dput(parent); 1042 goto fail; 1043 } 1044 1045 record_root_in_trans(trans, root); 1046 btrfs_set_root_last_snapshot(&root->root_item, trans->transid); 1047 memcpy(new_root_item, &root->root_item, sizeof(*new_root_item)); 1048 btrfs_check_and_init_root_item(new_root_item); 1049 1050 root_flags = btrfs_root_flags(new_root_item); 1051 if (pending->readonly) 1052 root_flags |= BTRFS_ROOT_SUBVOL_RDONLY; 1053 else 1054 root_flags &= ~BTRFS_ROOT_SUBVOL_RDONLY; 1055 btrfs_set_root_flags(new_root_item, root_flags); 1056 1057 btrfs_set_root_generation_v2(new_root_item, 1058 trans->transid); 1059 uuid_le_gen(&new_uuid); 1060 memcpy(new_root_item->uuid, new_uuid.b, BTRFS_UUID_SIZE); 1061 memcpy(new_root_item->parent_uuid, root->root_item.uuid, 1062 BTRFS_UUID_SIZE); 1063 new_root_item->otime.sec = cpu_to_le64(cur_time.tv_sec); 1064 new_root_item->otime.nsec = cpu_to_le64(cur_time.tv_nsec); 1065 btrfs_set_root_otransid(new_root_item, trans->transid); 1066 memset(&new_root_item->stime, 0, sizeof(new_root_item->stime)); 1067 memset(&new_root_item->rtime, 0, sizeof(new_root_item->rtime)); 1068 btrfs_set_root_stransid(new_root_item, 0); 1069 btrfs_set_root_rtransid(new_root_item, 0); 1070 1071 old = btrfs_lock_root_node(root); 1072 ret = btrfs_cow_block(trans, root, old, NULL, 0, &old); 1073 if (ret) { 1074 btrfs_tree_unlock(old); 1075 free_extent_buffer(old); 1076 goto abort_trans_dput; 1077 } 1078 1079 btrfs_set_lock_blocking(old); 1080 1081 ret = btrfs_copy_root(trans, root, old, &tmp, objectid); 1082 /* clean up in any case */ 1083 btrfs_tree_unlock(old); 1084 free_extent_buffer(old); 1085 if (ret) 1086 goto abort_trans_dput; 1087 1088 /* see comments in should_cow_block() */ 1089 root->force_cow = 1; 1090 smp_wmb(); 1091 1092 btrfs_set_root_node(new_root_item, tmp); 1093 /* record when the snapshot was created in key.offset */ 1094 key.offset = trans->transid; 1095 ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); 1096 btrfs_tree_unlock(tmp); 1097 free_extent_buffer(tmp); 1098 if (ret) 1099 goto abort_trans_dput; 1100 1101 /* 1102 * insert root back/forward references 1103 */ 1104 ret = btrfs_add_root_ref(trans, tree_root, objectid, 1105 parent_root->root_key.objectid, 1106 btrfs_ino(parent_inode), index, 1107 dentry->d_name.name, dentry->d_name.len); 1108 dput(parent); 1109 if (ret) 1110 goto fail; 1111 1112 key.offset = (u64)-1; 1113 pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); 1114 if (IS_ERR(pending->snap)) { 1115 ret = PTR_ERR(pending->snap); 1116 goto abort_trans; 1117 } 1118 1119 ret = btrfs_reloc_post_snapshot(trans, pending); 1120 if (ret) 1121 goto abort_trans; 1122 ret = 0; 1123 fail: 1124 kfree(new_root_item); 1125 trans->block_rsv = rsv; 1126 btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); 1127 return ret; 1128 1129 abort_trans_dput: 1130 dput(parent); 1131 abort_trans: 1132 btrfs_abort_transaction(trans, root, ret); 1133 goto fail; 1134 } 1135 1136 /* 1137 * create all the snapshots we've scheduled for creation 1138 */ 1139 static noinline int create_pending_snapshots(struct btrfs_trans_handle *trans, 1140 struct btrfs_fs_info *fs_info) 1141 { 1142 struct btrfs_pending_snapshot *pending; 1143 struct list_head *head = &trans->transaction->pending_snapshots; 1144 1145 list_for_each_entry(pending, head, list) 1146 create_pending_snapshot(trans, fs_info, pending); 1147 return 0; 1148 } 1149 1150 static void update_super_roots(struct btrfs_root *root) 1151 { 1152 struct btrfs_root_item *root_item; 1153 struct btrfs_super_block *super; 1154 1155 super = root->fs_info->super_copy; 1156 1157 root_item = &root->fs_info->chunk_root->root_item; 1158 super->chunk_root = root_item->bytenr; 1159 super->chunk_root_generation = root_item->generation; 1160 super->chunk_root_level = root_item->level; 1161 1162 root_item = &root->fs_info->tree_root->root_item; 1163 super->root = root_item->bytenr; 1164 super->generation = root_item->generation; 1165 super->root_level = root_item->level; 1166 if (btrfs_test_opt(root, SPACE_CACHE)) 1167 super->cache_generation = root_item->generation; 1168 } 1169 1170 int btrfs_transaction_in_commit(struct btrfs_fs_info *info) 1171 { 1172 int ret = 0; 1173 spin_lock(&info->trans_lock); 1174 if (info->running_transaction) 1175 ret = info->running_transaction->in_commit; 1176 spin_unlock(&info->trans_lock); 1177 return ret; 1178 } 1179 1180 int btrfs_transaction_blocked(struct btrfs_fs_info *info) 1181 { 1182 int ret = 0; 1183 spin_lock(&info->trans_lock); 1184 if (info->running_transaction) 1185 ret = info->running_transaction->blocked; 1186 spin_unlock(&info->trans_lock); 1187 return ret; 1188 } 1189 1190 /* 1191 * wait for the current transaction commit to start and block subsequent 1192 * transaction joins 1193 */ 1194 static void wait_current_trans_commit_start(struct btrfs_root *root, 1195 struct btrfs_transaction *trans) 1196 { 1197 wait_event(root->fs_info->transaction_blocked_wait, trans->in_commit); 1198 } 1199 1200 /* 1201 * wait for the current transaction to start and then become unblocked. 1202 * caller holds ref. 1203 */ 1204 static void wait_current_trans_commit_start_and_unblock(struct btrfs_root *root, 1205 struct btrfs_transaction *trans) 1206 { 1207 wait_event(root->fs_info->transaction_wait, 1208 trans->commit_done || (trans->in_commit && !trans->blocked)); 1209 } 1210 1211 /* 1212 * commit transactions asynchronously. once btrfs_commit_transaction_async 1213 * returns, any subsequent transaction will not be allowed to join. 1214 */ 1215 struct btrfs_async_commit { 1216 struct btrfs_trans_handle *newtrans; 1217 struct btrfs_root *root; 1218 struct delayed_work work; 1219 }; 1220 1221 static void do_async_commit(struct work_struct *work) 1222 { 1223 struct btrfs_async_commit *ac = 1224 container_of(work, struct btrfs_async_commit, work.work); 1225 1226 btrfs_commit_transaction(ac->newtrans, ac->root); 1227 kfree(ac); 1228 } 1229 1230 int btrfs_commit_transaction_async(struct btrfs_trans_handle *trans, 1231 struct btrfs_root *root, 1232 int wait_for_unblock) 1233 { 1234 struct btrfs_async_commit *ac; 1235 struct btrfs_transaction *cur_trans; 1236 1237 ac = kmalloc(sizeof(*ac), GFP_NOFS); 1238 if (!ac) 1239 return -ENOMEM; 1240 1241 INIT_DELAYED_WORK(&ac->work, do_async_commit); 1242 ac->root = root; 1243 ac->newtrans = btrfs_join_transaction(root); 1244 if (IS_ERR(ac->newtrans)) { 1245 int err = PTR_ERR(ac->newtrans); 1246 kfree(ac); 1247 return err; 1248 } 1249 1250 /* take transaction reference */ 1251 cur_trans = trans->transaction; 1252 atomic_inc(&cur_trans->use_count); 1253 1254 btrfs_end_transaction(trans, root); 1255 schedule_delayed_work(&ac->work, 0); 1256 1257 /* wait for transaction to start and unblock */ 1258 if (wait_for_unblock) 1259 wait_current_trans_commit_start_and_unblock(root, cur_trans); 1260 else 1261 wait_current_trans_commit_start(root, cur_trans); 1262 1263 if (current->journal_info == trans) 1264 current->journal_info = NULL; 1265 1266 put_transaction(cur_trans); 1267 return 0; 1268 } 1269 1270 1271 static void cleanup_transaction(struct btrfs_trans_handle *trans, 1272 struct btrfs_root *root, int err) 1273 { 1274 struct btrfs_transaction *cur_trans = trans->transaction; 1275 1276 WARN_ON(trans->use_count > 1); 1277 1278 btrfs_abort_transaction(trans, root, err); 1279 1280 spin_lock(&root->fs_info->trans_lock); 1281 list_del_init(&cur_trans->list); 1282 if (cur_trans == root->fs_info->running_transaction) { 1283 root->fs_info->running_transaction = NULL; 1284 root->fs_info->trans_no_join = 0; 1285 } 1286 spin_unlock(&root->fs_info->trans_lock); 1287 1288 btrfs_cleanup_one_transaction(trans->transaction, root); 1289 1290 put_transaction(cur_trans); 1291 put_transaction(cur_trans); 1292 1293 trace_btrfs_transaction_commit(root); 1294 1295 btrfs_scrub_continue(root); 1296 1297 if (current->journal_info == trans) 1298 current->journal_info = NULL; 1299 1300 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1301 } 1302 1303 /* 1304 * btrfs_transaction state sequence: 1305 * in_commit = 0, blocked = 0 (initial) 1306 * in_commit = 1, blocked = 1 1307 * blocked = 0 1308 * commit_done = 1 1309 */ 1310 int btrfs_commit_transaction(struct btrfs_trans_handle *trans, 1311 struct btrfs_root *root) 1312 { 1313 unsigned long joined = 0; 1314 struct btrfs_transaction *cur_trans = trans->transaction; 1315 struct btrfs_transaction *prev_trans = NULL; 1316 DEFINE_WAIT(wait); 1317 int ret = -EIO; 1318 int should_grow = 0; 1319 unsigned long now = get_seconds(); 1320 int flush_on_commit = btrfs_test_opt(root, FLUSHONCOMMIT); 1321 1322 btrfs_run_ordered_operations(root, 0); 1323 1324 if (cur_trans->aborted) 1325 goto cleanup_transaction; 1326 1327 /* make a pass through all the delayed refs we have so far 1328 * any runnings procs may add more while we are here 1329 */ 1330 ret = btrfs_run_delayed_refs(trans, root, 0); 1331 if (ret) 1332 goto cleanup_transaction; 1333 1334 btrfs_trans_release_metadata(trans, root); 1335 trans->block_rsv = NULL; 1336 1337 cur_trans = trans->transaction; 1338 1339 /* 1340 * set the flushing flag so procs in this transaction have to 1341 * start sending their work down. 1342 */ 1343 cur_trans->delayed_refs.flushing = 1; 1344 1345 ret = btrfs_run_delayed_refs(trans, root, 0); 1346 if (ret) 1347 goto cleanup_transaction; 1348 1349 spin_lock(&cur_trans->commit_lock); 1350 if (cur_trans->in_commit) { 1351 spin_unlock(&cur_trans->commit_lock); 1352 atomic_inc(&cur_trans->use_count); 1353 ret = btrfs_end_transaction(trans, root); 1354 1355 wait_for_commit(root, cur_trans); 1356 1357 put_transaction(cur_trans); 1358 1359 return ret; 1360 } 1361 1362 trans->transaction->in_commit = 1; 1363 trans->transaction->blocked = 1; 1364 spin_unlock(&cur_trans->commit_lock); 1365 wake_up(&root->fs_info->transaction_blocked_wait); 1366 1367 spin_lock(&root->fs_info->trans_lock); 1368 if (cur_trans->list.prev != &root->fs_info->trans_list) { 1369 prev_trans = list_entry(cur_trans->list.prev, 1370 struct btrfs_transaction, list); 1371 if (!prev_trans->commit_done) { 1372 atomic_inc(&prev_trans->use_count); 1373 spin_unlock(&root->fs_info->trans_lock); 1374 1375 wait_for_commit(root, prev_trans); 1376 1377 put_transaction(prev_trans); 1378 } else { 1379 spin_unlock(&root->fs_info->trans_lock); 1380 } 1381 } else { 1382 spin_unlock(&root->fs_info->trans_lock); 1383 } 1384 1385 if (!btrfs_test_opt(root, SSD) && 1386 (now < cur_trans->start_time || now - cur_trans->start_time < 1)) 1387 should_grow = 1; 1388 1389 do { 1390 int snap_pending = 0; 1391 1392 joined = cur_trans->num_joined; 1393 if (!list_empty(&trans->transaction->pending_snapshots)) 1394 snap_pending = 1; 1395 1396 WARN_ON(cur_trans != trans->transaction); 1397 1398 if (flush_on_commit || snap_pending) { 1399 btrfs_start_delalloc_inodes(root, 1); 1400 btrfs_wait_ordered_extents(root, 0, 1); 1401 } 1402 1403 ret = btrfs_run_delayed_items(trans, root); 1404 if (ret) 1405 goto cleanup_transaction; 1406 1407 /* 1408 * running the delayed items may have added new refs. account 1409 * them now so that they hinder processing of more delayed refs 1410 * as little as possible. 1411 */ 1412 btrfs_delayed_refs_qgroup_accounting(trans, root->fs_info); 1413 1414 /* 1415 * rename don't use btrfs_join_transaction, so, once we 1416 * set the transaction to blocked above, we aren't going 1417 * to get any new ordered operations. We can safely run 1418 * it here and no for sure that nothing new will be added 1419 * to the list 1420 */ 1421 btrfs_run_ordered_operations(root, 1); 1422 1423 prepare_to_wait(&cur_trans->writer_wait, &wait, 1424 TASK_UNINTERRUPTIBLE); 1425 1426 if (atomic_read(&cur_trans->num_writers) > 1) 1427 schedule_timeout(MAX_SCHEDULE_TIMEOUT); 1428 else if (should_grow) 1429 schedule_timeout(1); 1430 1431 finish_wait(&cur_trans->writer_wait, &wait); 1432 } while (atomic_read(&cur_trans->num_writers) > 1 || 1433 (should_grow && cur_trans->num_joined != joined)); 1434 1435 /* 1436 * Ok now we need to make sure to block out any other joins while we 1437 * commit the transaction. We could have started a join before setting 1438 * no_join so make sure to wait for num_writers to == 1 again. 1439 */ 1440 spin_lock(&root->fs_info->trans_lock); 1441 root->fs_info->trans_no_join = 1; 1442 spin_unlock(&root->fs_info->trans_lock); 1443 wait_event(cur_trans->writer_wait, 1444 atomic_read(&cur_trans->num_writers) == 1); 1445 1446 /* 1447 * the reloc mutex makes sure that we stop 1448 * the balancing code from coming in and moving 1449 * extents around in the middle of the commit 1450 */ 1451 mutex_lock(&root->fs_info->reloc_mutex); 1452 1453 ret = btrfs_run_delayed_items(trans, root); 1454 if (ret) { 1455 mutex_unlock(&root->fs_info->reloc_mutex); 1456 goto cleanup_transaction; 1457 } 1458 1459 ret = create_pending_snapshots(trans, root->fs_info); 1460 if (ret) { 1461 mutex_unlock(&root->fs_info->reloc_mutex); 1462 goto cleanup_transaction; 1463 } 1464 1465 ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); 1466 if (ret) { 1467 mutex_unlock(&root->fs_info->reloc_mutex); 1468 goto cleanup_transaction; 1469 } 1470 1471 /* 1472 * make sure none of the code above managed to slip in a 1473 * delayed item 1474 */ 1475 btrfs_assert_delayed_root_empty(root); 1476 1477 WARN_ON(cur_trans != trans->transaction); 1478 1479 btrfs_scrub_pause(root); 1480 /* btrfs_commit_tree_roots is responsible for getting the 1481 * various roots consistent with each other. Every pointer 1482 * in the tree of tree roots has to point to the most up to date 1483 * root for every subvolume and other tree. So, we have to keep 1484 * the tree logging code from jumping in and changing any 1485 * of the trees. 1486 * 1487 * At this point in the commit, there can't be any tree-log 1488 * writers, but a little lower down we drop the trans mutex 1489 * and let new people in. By holding the tree_log_mutex 1490 * from now until after the super is written, we avoid races 1491 * with the tree-log code. 1492 */ 1493 mutex_lock(&root->fs_info->tree_log_mutex); 1494 1495 ret = commit_fs_roots(trans, root); 1496 if (ret) { 1497 mutex_unlock(&root->fs_info->tree_log_mutex); 1498 mutex_unlock(&root->fs_info->reloc_mutex); 1499 goto cleanup_transaction; 1500 } 1501 1502 /* commit_fs_roots gets rid of all the tree log roots, it is now 1503 * safe to free the root of tree log roots 1504 */ 1505 btrfs_free_log_root_tree(trans, root->fs_info); 1506 1507 ret = commit_cowonly_roots(trans, root); 1508 if (ret) { 1509 mutex_unlock(&root->fs_info->tree_log_mutex); 1510 mutex_unlock(&root->fs_info->reloc_mutex); 1511 goto cleanup_transaction; 1512 } 1513 1514 btrfs_prepare_extent_commit(trans, root); 1515 1516 cur_trans = root->fs_info->running_transaction; 1517 1518 btrfs_set_root_node(&root->fs_info->tree_root->root_item, 1519 root->fs_info->tree_root->node); 1520 switch_commit_root(root->fs_info->tree_root); 1521 1522 btrfs_set_root_node(&root->fs_info->chunk_root->root_item, 1523 root->fs_info->chunk_root->node); 1524 switch_commit_root(root->fs_info->chunk_root); 1525 1526 assert_qgroups_uptodate(trans); 1527 update_super_roots(root); 1528 1529 if (!root->fs_info->log_root_recovering) { 1530 btrfs_set_super_log_root(root->fs_info->super_copy, 0); 1531 btrfs_set_super_log_root_level(root->fs_info->super_copy, 0); 1532 } 1533 1534 memcpy(root->fs_info->super_for_commit, root->fs_info->super_copy, 1535 sizeof(*root->fs_info->super_copy)); 1536 1537 trans->transaction->blocked = 0; 1538 spin_lock(&root->fs_info->trans_lock); 1539 root->fs_info->running_transaction = NULL; 1540 root->fs_info->trans_no_join = 0; 1541 spin_unlock(&root->fs_info->trans_lock); 1542 mutex_unlock(&root->fs_info->reloc_mutex); 1543 1544 wake_up(&root->fs_info->transaction_wait); 1545 1546 ret = btrfs_write_and_wait_transaction(trans, root); 1547 if (ret) { 1548 btrfs_error(root->fs_info, ret, 1549 "Error while writing out transaction."); 1550 mutex_unlock(&root->fs_info->tree_log_mutex); 1551 goto cleanup_transaction; 1552 } 1553 1554 ret = write_ctree_super(trans, root, 0); 1555 if (ret) { 1556 mutex_unlock(&root->fs_info->tree_log_mutex); 1557 goto cleanup_transaction; 1558 } 1559 1560 /* 1561 * the super is written, we can safely allow the tree-loggers 1562 * to go about their business 1563 */ 1564 mutex_unlock(&root->fs_info->tree_log_mutex); 1565 1566 btrfs_finish_extent_commit(trans, root); 1567 1568 cur_trans->commit_done = 1; 1569 1570 root->fs_info->last_trans_committed = cur_trans->transid; 1571 1572 wake_up(&cur_trans->commit_wait); 1573 1574 spin_lock(&root->fs_info->trans_lock); 1575 list_del_init(&cur_trans->list); 1576 spin_unlock(&root->fs_info->trans_lock); 1577 1578 put_transaction(cur_trans); 1579 put_transaction(cur_trans); 1580 1581 trace_btrfs_transaction_commit(root); 1582 1583 btrfs_scrub_continue(root); 1584 1585 if (current->journal_info == trans) 1586 current->journal_info = NULL; 1587 1588 kmem_cache_free(btrfs_trans_handle_cachep, trans); 1589 1590 if (current != root->fs_info->transaction_kthread) 1591 btrfs_run_delayed_iputs(root); 1592 1593 return ret; 1594 1595 cleanup_transaction: 1596 btrfs_trans_release_metadata(trans, root); 1597 trans->block_rsv = NULL; 1598 btrfs_printk(root->fs_info, "Skipping commit of aborted transaction.\n"); 1599 // WARN_ON(1); 1600 if (current->journal_info == trans) 1601 current->journal_info = NULL; 1602 cleanup_transaction(trans, root, ret); 1603 1604 return ret; 1605 } 1606 1607 /* 1608 * interface function to delete all the snapshots we have scheduled for deletion 1609 */ 1610 int btrfs_clean_old_snapshots(struct btrfs_root *root) 1611 { 1612 LIST_HEAD(list); 1613 struct btrfs_fs_info *fs_info = root->fs_info; 1614 1615 spin_lock(&fs_info->trans_lock); 1616 list_splice_init(&fs_info->dead_roots, &list); 1617 spin_unlock(&fs_info->trans_lock); 1618 1619 while (!list_empty(&list)) { 1620 int ret; 1621 1622 root = list_entry(list.next, struct btrfs_root, root_list); 1623 list_del(&root->root_list); 1624 1625 btrfs_kill_all_delayed_nodes(root); 1626 1627 if (btrfs_header_backref_rev(root->node) < 1628 BTRFS_MIXED_BACKREF_REV) 1629 ret = btrfs_drop_snapshot(root, NULL, 0, 0); 1630 else 1631 ret =btrfs_drop_snapshot(root, NULL, 1, 0); 1632 BUG_ON(ret < 0); 1633 } 1634 return 0; 1635 } 1636