1 /* 2 * linux/fs/jbd2/transaction.c 3 * 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 5 * 6 * Copyright 1998 Red Hat corp --- All Rights Reserved 7 * 8 * This file is part of the Linux kernel and is made available under 9 * the terms of the GNU General Public License, version 2, or at your 10 * option, any later version, incorporated herein by reference. 11 * 12 * Generic filesystem transaction handling code; part of the ext2fs 13 * journaling system. 14 * 15 * This file manages transactions (compound commits managed by the 16 * journaling code) and handles (individual atomic operations by the 17 * filesystem). 18 */ 19 20 #include <linux/time.h> 21 #include <linux/fs.h> 22 #include <linux/jbd2.h> 23 #include <linux/errno.h> 24 #include <linux/slab.h> 25 #include <linux/timer.h> 26 #include <linux/mm.h> 27 #include <linux/highmem.h> 28 #include <linux/hrtimer.h> 29 #include <linux/backing-dev.h> 30 #include <linux/module.h> 31 32 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 33 34 /* 35 * jbd2_get_transaction: obtain a new transaction_t object. 36 * 37 * Simply allocate and initialise a new transaction. Create it in 38 * RUNNING state and add it to the current journal (which should not 39 * have an existing running transaction: we only make a new transaction 40 * once we have started to commit the old one). 41 * 42 * Preconditions: 43 * The journal MUST be locked. We don't perform atomic mallocs on the 44 * new transaction and we can't block without protecting against other 45 * processes trying to touch the journal while it is in transition. 46 * 47 */ 48 49 static transaction_t * 50 jbd2_get_transaction(journal_t *journal, transaction_t *transaction) 51 { 52 transaction->t_journal = journal; 53 transaction->t_state = T_RUNNING; 54 transaction->t_start_time = ktime_get(); 55 transaction->t_tid = journal->j_transaction_sequence++; 56 transaction->t_expires = jiffies + journal->j_commit_interval; 57 spin_lock_init(&transaction->t_handle_lock); 58 atomic_set(&transaction->t_updates, 0); 59 atomic_set(&transaction->t_outstanding_credits, 0); 60 atomic_set(&transaction->t_handle_count, 0); 61 INIT_LIST_HEAD(&transaction->t_inode_list); 62 INIT_LIST_HEAD(&transaction->t_private_list); 63 64 /* Set up the commit timer for the new transaction. */ 65 journal->j_commit_timer.expires = round_jiffies_up(transaction->t_expires); 66 add_timer(&journal->j_commit_timer); 67 68 J_ASSERT(journal->j_running_transaction == NULL); 69 journal->j_running_transaction = transaction; 70 transaction->t_max_wait = 0; 71 transaction->t_start = jiffies; 72 73 return transaction; 74 } 75 76 /* 77 * Handle management. 78 * 79 * A handle_t is an object which represents a single atomic update to a 80 * filesystem, and which tracks all of the modifications which form part 81 * of that one update. 82 */ 83 84 /* 85 * Update transiaction's maximum wait time, if debugging is enabled. 86 * 87 * In order for t_max_wait to be reliable, it must be protected by a 88 * lock. But doing so will mean that start_this_handle() can not be 89 * run in parallel on SMP systems, which limits our scalability. So 90 * unless debugging is enabled, we no longer update t_max_wait, which 91 * means that maximum wait time reported by the jbd2_run_stats 92 * tracepoint will always be zero. 93 */ 94 static inline void update_t_max_wait(transaction_t *transaction) 95 { 96 #ifdef CONFIG_JBD2_DEBUG 97 unsigned long ts = jiffies; 98 99 if (jbd2_journal_enable_debug && 100 time_after(transaction->t_start, ts)) { 101 ts = jbd2_time_diff(ts, transaction->t_start); 102 spin_lock(&transaction->t_handle_lock); 103 if (ts > transaction->t_max_wait) 104 transaction->t_max_wait = ts; 105 spin_unlock(&transaction->t_handle_lock); 106 } 107 #endif 108 } 109 110 /* 111 * start_this_handle: Given a handle, deal with any locking or stalling 112 * needed to make sure that there is enough journal space for the handle 113 * to begin. Attach the handle to a transaction and set up the 114 * transaction's buffer credits. 115 */ 116 117 static int start_this_handle(journal_t *journal, handle_t *handle, 118 int gfp_mask) 119 { 120 transaction_t *transaction; 121 int needed; 122 int nblocks = handle->h_buffer_credits; 123 transaction_t *new_transaction = NULL; 124 125 if (nblocks > journal->j_max_transaction_buffers) { 126 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 127 current->comm, nblocks, 128 journal->j_max_transaction_buffers); 129 return -ENOSPC; 130 } 131 132 alloc_transaction: 133 if (!journal->j_running_transaction) { 134 new_transaction = kzalloc(sizeof(*new_transaction), gfp_mask); 135 if (!new_transaction) { 136 /* 137 * If __GFP_FS is not present, then we may be 138 * being called from inside the fs writeback 139 * layer, so we MUST NOT fail. Since 140 * __GFP_NOFAIL is going away, we will arrange 141 * to retry the allocation ourselves. 142 */ 143 if ((gfp_mask & __GFP_FS) == 0) { 144 congestion_wait(BLK_RW_ASYNC, HZ/50); 145 goto alloc_transaction; 146 } 147 return -ENOMEM; 148 } 149 } 150 151 jbd_debug(3, "New handle %p going live.\n", handle); 152 153 /* 154 * We need to hold j_state_lock until t_updates has been incremented, 155 * for proper journal barrier handling 156 */ 157 repeat: 158 read_lock(&journal->j_state_lock); 159 if (is_journal_aborted(journal) || 160 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 161 read_unlock(&journal->j_state_lock); 162 kfree(new_transaction); 163 return -EROFS; 164 } 165 166 /* Wait on the journal's transaction barrier if necessary */ 167 if (journal->j_barrier_count) { 168 read_unlock(&journal->j_state_lock); 169 wait_event(journal->j_wait_transaction_locked, 170 journal->j_barrier_count == 0); 171 goto repeat; 172 } 173 174 if (!journal->j_running_transaction) { 175 read_unlock(&journal->j_state_lock); 176 if (!new_transaction) 177 goto alloc_transaction; 178 write_lock(&journal->j_state_lock); 179 if (!journal->j_running_transaction) { 180 jbd2_get_transaction(journal, new_transaction); 181 new_transaction = NULL; 182 } 183 write_unlock(&journal->j_state_lock); 184 goto repeat; 185 } 186 187 transaction = journal->j_running_transaction; 188 189 /* 190 * If the current transaction is locked down for commit, wait for the 191 * lock to be released. 192 */ 193 if (transaction->t_state == T_LOCKED) { 194 DEFINE_WAIT(wait); 195 196 prepare_to_wait(&journal->j_wait_transaction_locked, 197 &wait, TASK_UNINTERRUPTIBLE); 198 read_unlock(&journal->j_state_lock); 199 schedule(); 200 finish_wait(&journal->j_wait_transaction_locked, &wait); 201 goto repeat; 202 } 203 204 /* 205 * If there is not enough space left in the log to write all potential 206 * buffers requested by this operation, we need to stall pending a log 207 * checkpoint to free some more log space. 208 */ 209 needed = atomic_add_return(nblocks, 210 &transaction->t_outstanding_credits); 211 212 if (needed > journal->j_max_transaction_buffers) { 213 /* 214 * If the current transaction is already too large, then start 215 * to commit it: we can then go back and attach this handle to 216 * a new transaction. 217 */ 218 DEFINE_WAIT(wait); 219 220 jbd_debug(2, "Handle %p starting new commit...\n", handle); 221 atomic_sub(nblocks, &transaction->t_outstanding_credits); 222 prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 223 TASK_UNINTERRUPTIBLE); 224 __jbd2_log_start_commit(journal, transaction->t_tid); 225 read_unlock(&journal->j_state_lock); 226 schedule(); 227 finish_wait(&journal->j_wait_transaction_locked, &wait); 228 goto repeat; 229 } 230 231 /* 232 * The commit code assumes that it can get enough log space 233 * without forcing a checkpoint. This is *critical* for 234 * correctness: a checkpoint of a buffer which is also 235 * associated with a committing transaction creates a deadlock, 236 * so commit simply cannot force through checkpoints. 237 * 238 * We must therefore ensure the necessary space in the journal 239 * *before* starting to dirty potentially checkpointed buffers 240 * in the new transaction. 241 * 242 * The worst part is, any transaction currently committing can 243 * reduce the free space arbitrarily. Be careful to account for 244 * those buffers when checkpointing. 245 */ 246 247 /* 248 * @@@ AKPM: This seems rather over-defensive. We're giving commit 249 * a _lot_ of headroom: 1/4 of the journal plus the size of 250 * the committing transaction. Really, we only need to give it 251 * committing_transaction->t_outstanding_credits plus "enough" for 252 * the log control blocks. 253 * Also, this test is inconsitent with the matching one in 254 * jbd2_journal_extend(). 255 */ 256 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { 257 jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); 258 atomic_sub(nblocks, &transaction->t_outstanding_credits); 259 read_unlock(&journal->j_state_lock); 260 write_lock(&journal->j_state_lock); 261 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) 262 __jbd2_log_wait_for_space(journal); 263 write_unlock(&journal->j_state_lock); 264 goto repeat; 265 } 266 267 /* OK, account for the buffers that this operation expects to 268 * use and add the handle to the running transaction. 269 */ 270 update_t_max_wait(transaction); 271 handle->h_transaction = transaction; 272 atomic_inc(&transaction->t_updates); 273 atomic_inc(&transaction->t_handle_count); 274 jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", 275 handle, nblocks, 276 atomic_read(&transaction->t_outstanding_credits), 277 __jbd2_log_space_left(journal)); 278 read_unlock(&journal->j_state_lock); 279 280 lock_map_acquire(&handle->h_lockdep_map); 281 kfree(new_transaction); 282 return 0; 283 } 284 285 static struct lock_class_key jbd2_handle_key; 286 287 /* Allocate a new handle. This should probably be in a slab... */ 288 static handle_t *new_handle(int nblocks) 289 { 290 handle_t *handle = jbd2_alloc_handle(GFP_NOFS); 291 if (!handle) 292 return NULL; 293 memset(handle, 0, sizeof(*handle)); 294 handle->h_buffer_credits = nblocks; 295 handle->h_ref = 1; 296 297 lockdep_init_map(&handle->h_lockdep_map, "jbd2_handle", 298 &jbd2_handle_key, 0); 299 300 return handle; 301 } 302 303 /** 304 * handle_t *jbd2_journal_start() - Obtain a new handle. 305 * @journal: Journal to start transaction on. 306 * @nblocks: number of block buffer we might modify 307 * 308 * We make sure that the transaction can guarantee at least nblocks of 309 * modified buffers in the log. We block until the log can guarantee 310 * that much space. 311 * 312 * This function is visible to journal users (like ext3fs), so is not 313 * called with the journal already locked. 314 * 315 * Return a pointer to a newly allocated handle, or NULL on failure 316 */ 317 handle_t *jbd2__journal_start(journal_t *journal, int nblocks, int gfp_mask) 318 { 319 handle_t *handle = journal_current_handle(); 320 int err; 321 322 if (!journal) 323 return ERR_PTR(-EROFS); 324 325 if (handle) { 326 J_ASSERT(handle->h_transaction->t_journal == journal); 327 handle->h_ref++; 328 return handle; 329 } 330 331 handle = new_handle(nblocks); 332 if (!handle) 333 return ERR_PTR(-ENOMEM); 334 335 current->journal_info = handle; 336 337 err = start_this_handle(journal, handle, gfp_mask); 338 if (err < 0) { 339 jbd2_free_handle(handle); 340 current->journal_info = NULL; 341 handle = ERR_PTR(err); 342 goto out; 343 } 344 out: 345 return handle; 346 } 347 EXPORT_SYMBOL(jbd2__journal_start); 348 349 350 handle_t *jbd2_journal_start(journal_t *journal, int nblocks) 351 { 352 return jbd2__journal_start(journal, nblocks, GFP_NOFS); 353 } 354 EXPORT_SYMBOL(jbd2_journal_start); 355 356 357 /** 358 * int jbd2_journal_extend() - extend buffer credits. 359 * @handle: handle to 'extend' 360 * @nblocks: nr blocks to try to extend by. 361 * 362 * Some transactions, such as large extends and truncates, can be done 363 * atomically all at once or in several stages. The operation requests 364 * a credit for a number of buffer modications in advance, but can 365 * extend its credit if it needs more. 366 * 367 * jbd2_journal_extend tries to give the running handle more buffer credits. 368 * It does not guarantee that allocation - this is a best-effort only. 369 * The calling process MUST be able to deal cleanly with a failure to 370 * extend here. 371 * 372 * Return 0 on success, non-zero on failure. 373 * 374 * return code < 0 implies an error 375 * return code > 0 implies normal transaction-full status. 376 */ 377 int jbd2_journal_extend(handle_t *handle, int nblocks) 378 { 379 transaction_t *transaction = handle->h_transaction; 380 journal_t *journal = transaction->t_journal; 381 int result; 382 int wanted; 383 384 result = -EIO; 385 if (is_handle_aborted(handle)) 386 goto out; 387 388 result = 1; 389 390 read_lock(&journal->j_state_lock); 391 392 /* Don't extend a locked-down transaction! */ 393 if (handle->h_transaction->t_state != T_RUNNING) { 394 jbd_debug(3, "denied handle %p %d blocks: " 395 "transaction not running\n", handle, nblocks); 396 goto error_out; 397 } 398 399 spin_lock(&transaction->t_handle_lock); 400 wanted = atomic_read(&transaction->t_outstanding_credits) + nblocks; 401 402 if (wanted > journal->j_max_transaction_buffers) { 403 jbd_debug(3, "denied handle %p %d blocks: " 404 "transaction too large\n", handle, nblocks); 405 goto unlock; 406 } 407 408 if (wanted > __jbd2_log_space_left(journal)) { 409 jbd_debug(3, "denied handle %p %d blocks: " 410 "insufficient log space\n", handle, nblocks); 411 goto unlock; 412 } 413 414 handle->h_buffer_credits += nblocks; 415 atomic_add(nblocks, &transaction->t_outstanding_credits); 416 result = 0; 417 418 jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); 419 unlock: 420 spin_unlock(&transaction->t_handle_lock); 421 error_out: 422 read_unlock(&journal->j_state_lock); 423 out: 424 return result; 425 } 426 427 428 /** 429 * int jbd2_journal_restart() - restart a handle . 430 * @handle: handle to restart 431 * @nblocks: nr credits requested 432 * 433 * Restart a handle for a multi-transaction filesystem 434 * operation. 435 * 436 * If the jbd2_journal_extend() call above fails to grant new buffer credits 437 * to a running handle, a call to jbd2_journal_restart will commit the 438 * handle's transaction so far and reattach the handle to a new 439 * transaction capabable of guaranteeing the requested number of 440 * credits. 441 */ 442 int jbd2__journal_restart(handle_t *handle, int nblocks, int gfp_mask) 443 { 444 transaction_t *transaction = handle->h_transaction; 445 journal_t *journal = transaction->t_journal; 446 int ret; 447 448 /* If we've had an abort of any type, don't even think about 449 * actually doing the restart! */ 450 if (is_handle_aborted(handle)) 451 return 0; 452 453 /* 454 * First unlink the handle from its current transaction, and start the 455 * commit on that. 456 */ 457 J_ASSERT(atomic_read(&transaction->t_updates) > 0); 458 J_ASSERT(journal_current_handle() == handle); 459 460 read_lock(&journal->j_state_lock); 461 spin_lock(&transaction->t_handle_lock); 462 atomic_sub(handle->h_buffer_credits, 463 &transaction->t_outstanding_credits); 464 if (atomic_dec_and_test(&transaction->t_updates)) 465 wake_up(&journal->j_wait_updates); 466 spin_unlock(&transaction->t_handle_lock); 467 468 jbd_debug(2, "restarting handle %p\n", handle); 469 __jbd2_log_start_commit(journal, transaction->t_tid); 470 read_unlock(&journal->j_state_lock); 471 472 lock_map_release(&handle->h_lockdep_map); 473 handle->h_buffer_credits = nblocks; 474 ret = start_this_handle(journal, handle, gfp_mask); 475 return ret; 476 } 477 EXPORT_SYMBOL(jbd2__journal_restart); 478 479 480 int jbd2_journal_restart(handle_t *handle, int nblocks) 481 { 482 return jbd2__journal_restart(handle, nblocks, GFP_NOFS); 483 } 484 EXPORT_SYMBOL(jbd2_journal_restart); 485 486 /** 487 * void jbd2_journal_lock_updates () - establish a transaction barrier. 488 * @journal: Journal to establish a barrier on. 489 * 490 * This locks out any further updates from being started, and blocks 491 * until all existing updates have completed, returning only once the 492 * journal is in a quiescent state with no updates running. 493 * 494 * The journal lock should not be held on entry. 495 */ 496 void jbd2_journal_lock_updates(journal_t *journal) 497 { 498 DEFINE_WAIT(wait); 499 500 write_lock(&journal->j_state_lock); 501 ++journal->j_barrier_count; 502 503 /* Wait until there are no running updates */ 504 while (1) { 505 transaction_t *transaction = journal->j_running_transaction; 506 507 if (!transaction) 508 break; 509 510 spin_lock(&transaction->t_handle_lock); 511 if (!atomic_read(&transaction->t_updates)) { 512 spin_unlock(&transaction->t_handle_lock); 513 break; 514 } 515 prepare_to_wait(&journal->j_wait_updates, &wait, 516 TASK_UNINTERRUPTIBLE); 517 spin_unlock(&transaction->t_handle_lock); 518 write_unlock(&journal->j_state_lock); 519 schedule(); 520 finish_wait(&journal->j_wait_updates, &wait); 521 write_lock(&journal->j_state_lock); 522 } 523 write_unlock(&journal->j_state_lock); 524 525 /* 526 * We have now established a barrier against other normal updates, but 527 * we also need to barrier against other jbd2_journal_lock_updates() calls 528 * to make sure that we serialise special journal-locked operations 529 * too. 530 */ 531 mutex_lock(&journal->j_barrier); 532 } 533 534 /** 535 * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier 536 * @journal: Journal to release the barrier on. 537 * 538 * Release a transaction barrier obtained with jbd2_journal_lock_updates(). 539 * 540 * Should be called without the journal lock held. 541 */ 542 void jbd2_journal_unlock_updates (journal_t *journal) 543 { 544 J_ASSERT(journal->j_barrier_count != 0); 545 546 mutex_unlock(&journal->j_barrier); 547 write_lock(&journal->j_state_lock); 548 --journal->j_barrier_count; 549 write_unlock(&journal->j_state_lock); 550 wake_up(&journal->j_wait_transaction_locked); 551 } 552 553 static void warn_dirty_buffer(struct buffer_head *bh) 554 { 555 char b[BDEVNAME_SIZE]; 556 557 printk(KERN_WARNING 558 "JBD: Spotted dirty metadata buffer (dev = %s, blocknr = %llu). " 559 "There's a risk of filesystem corruption in case of system " 560 "crash.\n", 561 bdevname(bh->b_bdev, b), (unsigned long long)bh->b_blocknr); 562 } 563 564 /* 565 * If the buffer is already part of the current transaction, then there 566 * is nothing we need to do. If it is already part of a prior 567 * transaction which we are still committing to disk, then we need to 568 * make sure that we do not overwrite the old copy: we do copy-out to 569 * preserve the copy going to disk. We also account the buffer against 570 * the handle's metadata buffer credits (unless the buffer is already 571 * part of the transaction, that is). 572 * 573 */ 574 static int 575 do_get_write_access(handle_t *handle, struct journal_head *jh, 576 int force_copy) 577 { 578 struct buffer_head *bh; 579 transaction_t *transaction; 580 journal_t *journal; 581 int error; 582 char *frozen_buffer = NULL; 583 int need_copy = 0; 584 585 if (is_handle_aborted(handle)) 586 return -EROFS; 587 588 transaction = handle->h_transaction; 589 journal = transaction->t_journal; 590 591 jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); 592 593 JBUFFER_TRACE(jh, "entry"); 594 repeat: 595 bh = jh2bh(jh); 596 597 /* @@@ Need to check for errors here at some point. */ 598 599 lock_buffer(bh); 600 jbd_lock_bh_state(bh); 601 602 /* We now hold the buffer lock so it is safe to query the buffer 603 * state. Is the buffer dirty? 604 * 605 * If so, there are two possibilities. The buffer may be 606 * non-journaled, and undergoing a quite legitimate writeback. 607 * Otherwise, it is journaled, and we don't expect dirty buffers 608 * in that state (the buffers should be marked JBD_Dirty 609 * instead.) So either the IO is being done under our own 610 * control and this is a bug, or it's a third party IO such as 611 * dump(8) (which may leave the buffer scheduled for read --- 612 * ie. locked but not dirty) or tune2fs (which may actually have 613 * the buffer dirtied, ugh.) */ 614 615 if (buffer_dirty(bh)) { 616 /* 617 * First question: is this buffer already part of the current 618 * transaction or the existing committing transaction? 619 */ 620 if (jh->b_transaction) { 621 J_ASSERT_JH(jh, 622 jh->b_transaction == transaction || 623 jh->b_transaction == 624 journal->j_committing_transaction); 625 if (jh->b_next_transaction) 626 J_ASSERT_JH(jh, jh->b_next_transaction == 627 transaction); 628 warn_dirty_buffer(bh); 629 } 630 /* 631 * In any case we need to clean the dirty flag and we must 632 * do it under the buffer lock to be sure we don't race 633 * with running write-out. 634 */ 635 JBUFFER_TRACE(jh, "Journalling dirty buffer"); 636 clear_buffer_dirty(bh); 637 set_buffer_jbddirty(bh); 638 } 639 640 unlock_buffer(bh); 641 642 error = -EROFS; 643 if (is_handle_aborted(handle)) { 644 jbd_unlock_bh_state(bh); 645 goto out; 646 } 647 error = 0; 648 649 /* 650 * The buffer is already part of this transaction if b_transaction or 651 * b_next_transaction points to it 652 */ 653 if (jh->b_transaction == transaction || 654 jh->b_next_transaction == transaction) 655 goto done; 656 657 /* 658 * this is the first time this transaction is touching this buffer, 659 * reset the modified flag 660 */ 661 jh->b_modified = 0; 662 663 /* 664 * If there is already a copy-out version of this buffer, then we don't 665 * need to make another one 666 */ 667 if (jh->b_frozen_data) { 668 JBUFFER_TRACE(jh, "has frozen data"); 669 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 670 jh->b_next_transaction = transaction; 671 goto done; 672 } 673 674 /* Is there data here we need to preserve? */ 675 676 if (jh->b_transaction && jh->b_transaction != transaction) { 677 JBUFFER_TRACE(jh, "owned by older transaction"); 678 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 679 J_ASSERT_JH(jh, jh->b_transaction == 680 journal->j_committing_transaction); 681 682 /* There is one case we have to be very careful about. 683 * If the committing transaction is currently writing 684 * this buffer out to disk and has NOT made a copy-out, 685 * then we cannot modify the buffer contents at all 686 * right now. The essence of copy-out is that it is the 687 * extra copy, not the primary copy, which gets 688 * journaled. If the primary copy is already going to 689 * disk then we cannot do copy-out here. */ 690 691 if (jh->b_jlist == BJ_Shadow) { 692 DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); 693 wait_queue_head_t *wqh; 694 695 wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); 696 697 JBUFFER_TRACE(jh, "on shadow: sleep"); 698 jbd_unlock_bh_state(bh); 699 /* commit wakes up all shadow buffers after IO */ 700 for ( ; ; ) { 701 prepare_to_wait(wqh, &wait.wait, 702 TASK_UNINTERRUPTIBLE); 703 if (jh->b_jlist != BJ_Shadow) 704 break; 705 schedule(); 706 } 707 finish_wait(wqh, &wait.wait); 708 goto repeat; 709 } 710 711 /* Only do the copy if the currently-owning transaction 712 * still needs it. If it is on the Forget list, the 713 * committing transaction is past that stage. The 714 * buffer had better remain locked during the kmalloc, 715 * but that should be true --- we hold the journal lock 716 * still and the buffer is already on the BUF_JOURNAL 717 * list so won't be flushed. 718 * 719 * Subtle point, though: if this is a get_undo_access, 720 * then we will be relying on the frozen_data to contain 721 * the new value of the committed_data record after the 722 * transaction, so we HAVE to force the frozen_data copy 723 * in that case. */ 724 725 if (jh->b_jlist != BJ_Forget || force_copy) { 726 JBUFFER_TRACE(jh, "generate frozen data"); 727 if (!frozen_buffer) { 728 JBUFFER_TRACE(jh, "allocate memory for buffer"); 729 jbd_unlock_bh_state(bh); 730 frozen_buffer = 731 jbd2_alloc(jh2bh(jh)->b_size, 732 GFP_NOFS); 733 if (!frozen_buffer) { 734 printk(KERN_EMERG 735 "%s: OOM for frozen_buffer\n", 736 __func__); 737 JBUFFER_TRACE(jh, "oom!"); 738 error = -ENOMEM; 739 jbd_lock_bh_state(bh); 740 goto done; 741 } 742 goto repeat; 743 } 744 jh->b_frozen_data = frozen_buffer; 745 frozen_buffer = NULL; 746 need_copy = 1; 747 } 748 jh->b_next_transaction = transaction; 749 } 750 751 752 /* 753 * Finally, if the buffer is not journaled right now, we need to make 754 * sure it doesn't get written to disk before the caller actually 755 * commits the new data 756 */ 757 if (!jh->b_transaction) { 758 JBUFFER_TRACE(jh, "no transaction"); 759 J_ASSERT_JH(jh, !jh->b_next_transaction); 760 jh->b_transaction = transaction; 761 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 762 spin_lock(&journal->j_list_lock); 763 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 764 spin_unlock(&journal->j_list_lock); 765 } 766 767 done: 768 if (need_copy) { 769 struct page *page; 770 int offset; 771 char *source; 772 773 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), 774 "Possible IO failure.\n"); 775 page = jh2bh(jh)->b_page; 776 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; 777 source = kmap_atomic(page, KM_USER0); 778 /* Fire data frozen trigger just before we copy the data */ 779 jbd2_buffer_frozen_trigger(jh, source + offset, 780 jh->b_triggers); 781 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 782 kunmap_atomic(source, KM_USER0); 783 784 /* 785 * Now that the frozen data is saved off, we need to store 786 * any matching triggers. 787 */ 788 jh->b_frozen_triggers = jh->b_triggers; 789 } 790 jbd_unlock_bh_state(bh); 791 792 /* 793 * If we are about to journal a buffer, then any revoke pending on it is 794 * no longer valid 795 */ 796 jbd2_journal_cancel_revoke(handle, jh); 797 798 out: 799 if (unlikely(frozen_buffer)) /* It's usually NULL */ 800 jbd2_free(frozen_buffer, bh->b_size); 801 802 JBUFFER_TRACE(jh, "exit"); 803 return error; 804 } 805 806 /** 807 * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. 808 * @handle: transaction to add buffer modifications to 809 * @bh: bh to be used for metadata writes 810 * @credits: variable that will receive credits for the buffer 811 * 812 * Returns an error code or 0 on success. 813 * 814 * In full data journalling mode the buffer may be of type BJ_AsyncData, 815 * because we're write()ing a buffer which is also part of a shared mapping. 816 */ 817 818 int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) 819 { 820 struct journal_head *jh = jbd2_journal_add_journal_head(bh); 821 int rc; 822 823 /* We do not want to get caught playing with fields which the 824 * log thread also manipulates. Make sure that the buffer 825 * completes any outstanding IO before proceeding. */ 826 rc = do_get_write_access(handle, jh, 0); 827 jbd2_journal_put_journal_head(jh); 828 return rc; 829 } 830 831 832 /* 833 * When the user wants to journal a newly created buffer_head 834 * (ie. getblk() returned a new buffer and we are going to populate it 835 * manually rather than reading off disk), then we need to keep the 836 * buffer_head locked until it has been completely filled with new 837 * data. In this case, we should be able to make the assertion that 838 * the bh is not already part of an existing transaction. 839 * 840 * The buffer should already be locked by the caller by this point. 841 * There is no lock ranking violation: it was a newly created, 842 * unlocked buffer beforehand. */ 843 844 /** 845 * int jbd2_journal_get_create_access () - notify intent to use newly created bh 846 * @handle: transaction to new buffer to 847 * @bh: new buffer. 848 * 849 * Call this if you create a new bh. 850 */ 851 int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) 852 { 853 transaction_t *transaction = handle->h_transaction; 854 journal_t *journal = transaction->t_journal; 855 struct journal_head *jh = jbd2_journal_add_journal_head(bh); 856 int err; 857 858 jbd_debug(5, "journal_head %p\n", jh); 859 err = -EROFS; 860 if (is_handle_aborted(handle)) 861 goto out; 862 err = 0; 863 864 JBUFFER_TRACE(jh, "entry"); 865 /* 866 * The buffer may already belong to this transaction due to pre-zeroing 867 * in the filesystem's new_block code. It may also be on the previous, 868 * committing transaction's lists, but it HAS to be in Forget state in 869 * that case: the transaction must have deleted the buffer for it to be 870 * reused here. 871 */ 872 jbd_lock_bh_state(bh); 873 spin_lock(&journal->j_list_lock); 874 J_ASSERT_JH(jh, (jh->b_transaction == transaction || 875 jh->b_transaction == NULL || 876 (jh->b_transaction == journal->j_committing_transaction && 877 jh->b_jlist == BJ_Forget))); 878 879 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 880 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); 881 882 if (jh->b_transaction == NULL) { 883 /* 884 * Previous jbd2_journal_forget() could have left the buffer 885 * with jbddirty bit set because it was being committed. When 886 * the commit finished, we've filed the buffer for 887 * checkpointing and marked it dirty. Now we are reallocating 888 * the buffer so the transaction freeing it must have 889 * committed and so it's safe to clear the dirty bit. 890 */ 891 clear_buffer_dirty(jh2bh(jh)); 892 jh->b_transaction = transaction; 893 894 /* first access by this transaction */ 895 jh->b_modified = 0; 896 897 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 898 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 899 } else if (jh->b_transaction == journal->j_committing_transaction) { 900 /* first access by this transaction */ 901 jh->b_modified = 0; 902 903 JBUFFER_TRACE(jh, "set next transaction"); 904 jh->b_next_transaction = transaction; 905 } 906 spin_unlock(&journal->j_list_lock); 907 jbd_unlock_bh_state(bh); 908 909 /* 910 * akpm: I added this. ext3_alloc_branch can pick up new indirect 911 * blocks which contain freed but then revoked metadata. We need 912 * to cancel the revoke in case we end up freeing it yet again 913 * and the reallocating as data - this would cause a second revoke, 914 * which hits an assertion error. 915 */ 916 JBUFFER_TRACE(jh, "cancelling revoke"); 917 jbd2_journal_cancel_revoke(handle, jh); 918 jbd2_journal_put_journal_head(jh); 919 out: 920 return err; 921 } 922 923 /** 924 * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with 925 * non-rewindable consequences 926 * @handle: transaction 927 * @bh: buffer to undo 928 * @credits: store the number of taken credits here (if not NULL) 929 * 930 * Sometimes there is a need to distinguish between metadata which has 931 * been committed to disk and that which has not. The ext3fs code uses 932 * this for freeing and allocating space, we have to make sure that we 933 * do not reuse freed space until the deallocation has been committed, 934 * since if we overwrote that space we would make the delete 935 * un-rewindable in case of a crash. 936 * 937 * To deal with that, jbd2_journal_get_undo_access requests write access to a 938 * buffer for parts of non-rewindable operations such as delete 939 * operations on the bitmaps. The journaling code must keep a copy of 940 * the buffer's contents prior to the undo_access call until such time 941 * as we know that the buffer has definitely been committed to disk. 942 * 943 * We never need to know which transaction the committed data is part 944 * of, buffers touched here are guaranteed to be dirtied later and so 945 * will be committed to a new transaction in due course, at which point 946 * we can discard the old committed data pointer. 947 * 948 * Returns error number or 0 on success. 949 */ 950 int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) 951 { 952 int err; 953 struct journal_head *jh = jbd2_journal_add_journal_head(bh); 954 char *committed_data = NULL; 955 956 JBUFFER_TRACE(jh, "entry"); 957 958 /* 959 * Do this first --- it can drop the journal lock, so we want to 960 * make sure that obtaining the committed_data is done 961 * atomically wrt. completion of any outstanding commits. 962 */ 963 err = do_get_write_access(handle, jh, 1); 964 if (err) 965 goto out; 966 967 repeat: 968 if (!jh->b_committed_data) { 969 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); 970 if (!committed_data) { 971 printk(KERN_EMERG "%s: No memory for committed data\n", 972 __func__); 973 err = -ENOMEM; 974 goto out; 975 } 976 } 977 978 jbd_lock_bh_state(bh); 979 if (!jh->b_committed_data) { 980 /* Copy out the current buffer contents into the 981 * preserved, committed copy. */ 982 JBUFFER_TRACE(jh, "generate b_committed data"); 983 if (!committed_data) { 984 jbd_unlock_bh_state(bh); 985 goto repeat; 986 } 987 988 jh->b_committed_data = committed_data; 989 committed_data = NULL; 990 memcpy(jh->b_committed_data, bh->b_data, bh->b_size); 991 } 992 jbd_unlock_bh_state(bh); 993 out: 994 jbd2_journal_put_journal_head(jh); 995 if (unlikely(committed_data)) 996 jbd2_free(committed_data, bh->b_size); 997 return err; 998 } 999 1000 /** 1001 * void jbd2_journal_set_triggers() - Add triggers for commit writeout 1002 * @bh: buffer to trigger on 1003 * @type: struct jbd2_buffer_trigger_type containing the trigger(s). 1004 * 1005 * Set any triggers on this journal_head. This is always safe, because 1006 * triggers for a committing buffer will be saved off, and triggers for 1007 * a running transaction will match the buffer in that transaction. 1008 * 1009 * Call with NULL to clear the triggers. 1010 */ 1011 void jbd2_journal_set_triggers(struct buffer_head *bh, 1012 struct jbd2_buffer_trigger_type *type) 1013 { 1014 struct journal_head *jh = bh2jh(bh); 1015 1016 jh->b_triggers = type; 1017 } 1018 1019 void jbd2_buffer_frozen_trigger(struct journal_head *jh, void *mapped_data, 1020 struct jbd2_buffer_trigger_type *triggers) 1021 { 1022 struct buffer_head *bh = jh2bh(jh); 1023 1024 if (!triggers || !triggers->t_frozen) 1025 return; 1026 1027 triggers->t_frozen(triggers, bh, mapped_data, bh->b_size); 1028 } 1029 1030 void jbd2_buffer_abort_trigger(struct journal_head *jh, 1031 struct jbd2_buffer_trigger_type *triggers) 1032 { 1033 if (!triggers || !triggers->t_abort) 1034 return; 1035 1036 triggers->t_abort(triggers, jh2bh(jh)); 1037 } 1038 1039 1040 1041 /** 1042 * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata 1043 * @handle: transaction to add buffer to. 1044 * @bh: buffer to mark 1045 * 1046 * mark dirty metadata which needs to be journaled as part of the current 1047 * transaction. 1048 * 1049 * The buffer is placed on the transaction's metadata list and is marked 1050 * as belonging to the transaction. 1051 * 1052 * Returns error number or 0 on success. 1053 * 1054 * Special care needs to be taken if the buffer already belongs to the 1055 * current committing transaction (in which case we should have frozen 1056 * data present for that commit). In that case, we don't relink the 1057 * buffer: that only gets done when the old transaction finally 1058 * completes its commit. 1059 */ 1060 int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) 1061 { 1062 transaction_t *transaction = handle->h_transaction; 1063 journal_t *journal = transaction->t_journal; 1064 struct journal_head *jh = bh2jh(bh); 1065 1066 jbd_debug(5, "journal_head %p\n", jh); 1067 JBUFFER_TRACE(jh, "entry"); 1068 if (is_handle_aborted(handle)) 1069 goto out; 1070 1071 jbd_lock_bh_state(bh); 1072 1073 if (jh->b_modified == 0) { 1074 /* 1075 * This buffer's got modified and becoming part 1076 * of the transaction. This needs to be done 1077 * once a transaction -bzzz 1078 */ 1079 jh->b_modified = 1; 1080 J_ASSERT_JH(jh, handle->h_buffer_credits > 0); 1081 handle->h_buffer_credits--; 1082 } 1083 1084 /* 1085 * fastpath, to avoid expensive locking. If this buffer is already 1086 * on the running transaction's metadata list there is nothing to do. 1087 * Nobody can take it off again because there is a handle open. 1088 * I _think_ we're OK here with SMP barriers - a mistaken decision will 1089 * result in this test being false, so we go in and take the locks. 1090 */ 1091 if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { 1092 JBUFFER_TRACE(jh, "fastpath"); 1093 J_ASSERT_JH(jh, jh->b_transaction == 1094 journal->j_running_transaction); 1095 goto out_unlock_bh; 1096 } 1097 1098 set_buffer_jbddirty(bh); 1099 1100 /* 1101 * Metadata already on the current transaction list doesn't 1102 * need to be filed. Metadata on another transaction's list must 1103 * be committing, and will be refiled once the commit completes: 1104 * leave it alone for now. 1105 */ 1106 if (jh->b_transaction != transaction) { 1107 JBUFFER_TRACE(jh, "already on other transaction"); 1108 J_ASSERT_JH(jh, jh->b_transaction == 1109 journal->j_committing_transaction); 1110 J_ASSERT_JH(jh, jh->b_next_transaction == transaction); 1111 /* And this case is illegal: we can't reuse another 1112 * transaction's data buffer, ever. */ 1113 goto out_unlock_bh; 1114 } 1115 1116 /* That test should have eliminated the following case: */ 1117 J_ASSERT_JH(jh, jh->b_frozen_data == NULL); 1118 1119 JBUFFER_TRACE(jh, "file as BJ_Metadata"); 1120 spin_lock(&journal->j_list_lock); 1121 __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); 1122 spin_unlock(&journal->j_list_lock); 1123 out_unlock_bh: 1124 jbd_unlock_bh_state(bh); 1125 out: 1126 JBUFFER_TRACE(jh, "exit"); 1127 return 0; 1128 } 1129 1130 /* 1131 * jbd2_journal_release_buffer: undo a get_write_access without any buffer 1132 * updates, if the update decided in the end that it didn't need access. 1133 * 1134 */ 1135 void 1136 jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh) 1137 { 1138 BUFFER_TRACE(bh, "entry"); 1139 } 1140 1141 /** 1142 * void jbd2_journal_forget() - bforget() for potentially-journaled buffers. 1143 * @handle: transaction handle 1144 * @bh: bh to 'forget' 1145 * 1146 * We can only do the bforget if there are no commits pending against the 1147 * buffer. If the buffer is dirty in the current running transaction we 1148 * can safely unlink it. 1149 * 1150 * bh may not be a journalled buffer at all - it may be a non-JBD 1151 * buffer which came off the hashtable. Check for this. 1152 * 1153 * Decrements bh->b_count by one. 1154 * 1155 * Allow this call even if the handle has aborted --- it may be part of 1156 * the caller's cleanup after an abort. 1157 */ 1158 int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) 1159 { 1160 transaction_t *transaction = handle->h_transaction; 1161 journal_t *journal = transaction->t_journal; 1162 struct journal_head *jh; 1163 int drop_reserve = 0; 1164 int err = 0; 1165 int was_modified = 0; 1166 1167 BUFFER_TRACE(bh, "entry"); 1168 1169 jbd_lock_bh_state(bh); 1170 spin_lock(&journal->j_list_lock); 1171 1172 if (!buffer_jbd(bh)) 1173 goto not_jbd; 1174 jh = bh2jh(bh); 1175 1176 /* Critical error: attempting to delete a bitmap buffer, maybe? 1177 * Don't do any jbd operations, and return an error. */ 1178 if (!J_EXPECT_JH(jh, !jh->b_committed_data, 1179 "inconsistent data on disk")) { 1180 err = -EIO; 1181 goto not_jbd; 1182 } 1183 1184 /* keep track of wether or not this transaction modified us */ 1185 was_modified = jh->b_modified; 1186 1187 /* 1188 * The buffer's going from the transaction, we must drop 1189 * all references -bzzz 1190 */ 1191 jh->b_modified = 0; 1192 1193 if (jh->b_transaction == handle->h_transaction) { 1194 J_ASSERT_JH(jh, !jh->b_frozen_data); 1195 1196 /* If we are forgetting a buffer which is already part 1197 * of this transaction, then we can just drop it from 1198 * the transaction immediately. */ 1199 clear_buffer_dirty(bh); 1200 clear_buffer_jbddirty(bh); 1201 1202 JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); 1203 1204 /* 1205 * we only want to drop a reference if this transaction 1206 * modified the buffer 1207 */ 1208 if (was_modified) 1209 drop_reserve = 1; 1210 1211 /* 1212 * We are no longer going to journal this buffer. 1213 * However, the commit of this transaction is still 1214 * important to the buffer: the delete that we are now 1215 * processing might obsolete an old log entry, so by 1216 * committing, we can satisfy the buffer's checkpoint. 1217 * 1218 * So, if we have a checkpoint on the buffer, we should 1219 * now refile the buffer on our BJ_Forget list so that 1220 * we know to remove the checkpoint after we commit. 1221 */ 1222 1223 if (jh->b_cp_transaction) { 1224 __jbd2_journal_temp_unlink_buffer(jh); 1225 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 1226 } else { 1227 __jbd2_journal_unfile_buffer(jh); 1228 jbd2_journal_remove_journal_head(bh); 1229 __brelse(bh); 1230 if (!buffer_jbd(bh)) { 1231 spin_unlock(&journal->j_list_lock); 1232 jbd_unlock_bh_state(bh); 1233 __bforget(bh); 1234 goto drop; 1235 } 1236 } 1237 } else if (jh->b_transaction) { 1238 J_ASSERT_JH(jh, (jh->b_transaction == 1239 journal->j_committing_transaction)); 1240 /* However, if the buffer is still owned by a prior 1241 * (committing) transaction, we can't drop it yet... */ 1242 JBUFFER_TRACE(jh, "belongs to older transaction"); 1243 /* ... but we CAN drop it from the new transaction if we 1244 * have also modified it since the original commit. */ 1245 1246 if (jh->b_next_transaction) { 1247 J_ASSERT(jh->b_next_transaction == transaction); 1248 jh->b_next_transaction = NULL; 1249 1250 /* 1251 * only drop a reference if this transaction modified 1252 * the buffer 1253 */ 1254 if (was_modified) 1255 drop_reserve = 1; 1256 } 1257 } 1258 1259 not_jbd: 1260 spin_unlock(&journal->j_list_lock); 1261 jbd_unlock_bh_state(bh); 1262 __brelse(bh); 1263 drop: 1264 if (drop_reserve) { 1265 /* no need to reserve log space for this block -bzzz */ 1266 handle->h_buffer_credits++; 1267 } 1268 return err; 1269 } 1270 1271 /** 1272 * int jbd2_journal_stop() - complete a transaction 1273 * @handle: tranaction to complete. 1274 * 1275 * All done for a particular handle. 1276 * 1277 * There is not much action needed here. We just return any remaining 1278 * buffer credits to the transaction and remove the handle. The only 1279 * complication is that we need to start a commit operation if the 1280 * filesystem is marked for synchronous update. 1281 * 1282 * jbd2_journal_stop itself will not usually return an error, but it may 1283 * do so in unusual circumstances. In particular, expect it to 1284 * return -EIO if a jbd2_journal_abort has been executed since the 1285 * transaction began. 1286 */ 1287 int jbd2_journal_stop(handle_t *handle) 1288 { 1289 transaction_t *transaction = handle->h_transaction; 1290 journal_t *journal = transaction->t_journal; 1291 int err, wait_for_commit = 0; 1292 tid_t tid; 1293 pid_t pid; 1294 1295 J_ASSERT(journal_current_handle() == handle); 1296 1297 if (is_handle_aborted(handle)) 1298 err = -EIO; 1299 else { 1300 J_ASSERT(atomic_read(&transaction->t_updates) > 0); 1301 err = 0; 1302 } 1303 1304 if (--handle->h_ref > 0) { 1305 jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, 1306 handle->h_ref); 1307 return err; 1308 } 1309 1310 jbd_debug(4, "Handle %p going down\n", handle); 1311 1312 /* 1313 * Implement synchronous transaction batching. If the handle 1314 * was synchronous, don't force a commit immediately. Let's 1315 * yield and let another thread piggyback onto this 1316 * transaction. Keep doing that while new threads continue to 1317 * arrive. It doesn't cost much - we're about to run a commit 1318 * and sleep on IO anyway. Speeds up many-threaded, many-dir 1319 * operations by 30x or more... 1320 * 1321 * We try and optimize the sleep time against what the 1322 * underlying disk can do, instead of having a static sleep 1323 * time. This is useful for the case where our storage is so 1324 * fast that it is more optimal to go ahead and force a flush 1325 * and wait for the transaction to be committed than it is to 1326 * wait for an arbitrary amount of time for new writers to 1327 * join the transaction. We achieve this by measuring how 1328 * long it takes to commit a transaction, and compare it with 1329 * how long this transaction has been running, and if run time 1330 * < commit time then we sleep for the delta and commit. This 1331 * greatly helps super fast disks that would see slowdowns as 1332 * more threads started doing fsyncs. 1333 * 1334 * But don't do this if this process was the most recent one 1335 * to perform a synchronous write. We do this to detect the 1336 * case where a single process is doing a stream of sync 1337 * writes. No point in waiting for joiners in that case. 1338 */ 1339 pid = current->pid; 1340 if (handle->h_sync && journal->j_last_sync_writer != pid) { 1341 u64 commit_time, trans_time; 1342 1343 journal->j_last_sync_writer = pid; 1344 1345 read_lock(&journal->j_state_lock); 1346 commit_time = journal->j_average_commit_time; 1347 read_unlock(&journal->j_state_lock); 1348 1349 trans_time = ktime_to_ns(ktime_sub(ktime_get(), 1350 transaction->t_start_time)); 1351 1352 commit_time = max_t(u64, commit_time, 1353 1000*journal->j_min_batch_time); 1354 commit_time = min_t(u64, commit_time, 1355 1000*journal->j_max_batch_time); 1356 1357 if (trans_time < commit_time) { 1358 ktime_t expires = ktime_add_ns(ktime_get(), 1359 commit_time); 1360 set_current_state(TASK_UNINTERRUPTIBLE); 1361 schedule_hrtimeout(&expires, HRTIMER_MODE_ABS); 1362 } 1363 } 1364 1365 if (handle->h_sync) 1366 transaction->t_synchronous_commit = 1; 1367 current->journal_info = NULL; 1368 atomic_sub(handle->h_buffer_credits, 1369 &transaction->t_outstanding_credits); 1370 1371 /* 1372 * If the handle is marked SYNC, we need to set another commit 1373 * going! We also want to force a commit if the current 1374 * transaction is occupying too much of the log, or if the 1375 * transaction is too old now. 1376 */ 1377 if (handle->h_sync || 1378 (atomic_read(&transaction->t_outstanding_credits) > 1379 journal->j_max_transaction_buffers) || 1380 time_after_eq(jiffies, transaction->t_expires)) { 1381 /* Do this even for aborted journals: an abort still 1382 * completes the commit thread, it just doesn't write 1383 * anything to disk. */ 1384 1385 jbd_debug(2, "transaction too old, requesting commit for " 1386 "handle %p\n", handle); 1387 /* This is non-blocking */ 1388 jbd2_log_start_commit(journal, transaction->t_tid); 1389 1390 /* 1391 * Special case: JBD2_SYNC synchronous updates require us 1392 * to wait for the commit to complete. 1393 */ 1394 if (handle->h_sync && !(current->flags & PF_MEMALLOC)) 1395 wait_for_commit = 1; 1396 } 1397 1398 /* 1399 * Once we drop t_updates, if it goes to zero the transaction 1400 * could start commiting on us and eventually disappear. So 1401 * once we do this, we must not dereference transaction 1402 * pointer again. 1403 */ 1404 tid = transaction->t_tid; 1405 if (atomic_dec_and_test(&transaction->t_updates)) { 1406 wake_up(&journal->j_wait_updates); 1407 if (journal->j_barrier_count) 1408 wake_up(&journal->j_wait_transaction_locked); 1409 } 1410 1411 if (wait_for_commit) 1412 err = jbd2_log_wait_commit(journal, tid); 1413 1414 lock_map_release(&handle->h_lockdep_map); 1415 1416 jbd2_free_handle(handle); 1417 return err; 1418 } 1419 1420 /** 1421 * int jbd2_journal_force_commit() - force any uncommitted transactions 1422 * @journal: journal to force 1423 * 1424 * For synchronous operations: force any uncommitted transactions 1425 * to disk. May seem kludgy, but it reuses all the handle batching 1426 * code in a very simple manner. 1427 */ 1428 int jbd2_journal_force_commit(journal_t *journal) 1429 { 1430 handle_t *handle; 1431 int ret; 1432 1433 handle = jbd2_journal_start(journal, 1); 1434 if (IS_ERR(handle)) { 1435 ret = PTR_ERR(handle); 1436 } else { 1437 handle->h_sync = 1; 1438 ret = jbd2_journal_stop(handle); 1439 } 1440 return ret; 1441 } 1442 1443 /* 1444 * 1445 * List management code snippets: various functions for manipulating the 1446 * transaction buffer lists. 1447 * 1448 */ 1449 1450 /* 1451 * Append a buffer to a transaction list, given the transaction's list head 1452 * pointer. 1453 * 1454 * j_list_lock is held. 1455 * 1456 * jbd_lock_bh_state(jh2bh(jh)) is held. 1457 */ 1458 1459 static inline void 1460 __blist_add_buffer(struct journal_head **list, struct journal_head *jh) 1461 { 1462 if (!*list) { 1463 jh->b_tnext = jh->b_tprev = jh; 1464 *list = jh; 1465 } else { 1466 /* Insert at the tail of the list to preserve order */ 1467 struct journal_head *first = *list, *last = first->b_tprev; 1468 jh->b_tprev = last; 1469 jh->b_tnext = first; 1470 last->b_tnext = first->b_tprev = jh; 1471 } 1472 } 1473 1474 /* 1475 * Remove a buffer from a transaction list, given the transaction's list 1476 * head pointer. 1477 * 1478 * Called with j_list_lock held, and the journal may not be locked. 1479 * 1480 * jbd_lock_bh_state(jh2bh(jh)) is held. 1481 */ 1482 1483 static inline void 1484 __blist_del_buffer(struct journal_head **list, struct journal_head *jh) 1485 { 1486 if (*list == jh) { 1487 *list = jh->b_tnext; 1488 if (*list == jh) 1489 *list = NULL; 1490 } 1491 jh->b_tprev->b_tnext = jh->b_tnext; 1492 jh->b_tnext->b_tprev = jh->b_tprev; 1493 } 1494 1495 /* 1496 * Remove a buffer from the appropriate transaction list. 1497 * 1498 * Note that this function can *change* the value of 1499 * bh->b_transaction->t_buffers, t_forget, t_iobuf_list, t_shadow_list, 1500 * t_log_list or t_reserved_list. If the caller is holding onto a copy of one 1501 * of these pointers, it could go bad. Generally the caller needs to re-read 1502 * the pointer from the transaction_t. 1503 * 1504 * Called under j_list_lock. The journal may not be locked. 1505 */ 1506 void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) 1507 { 1508 struct journal_head **list = NULL; 1509 transaction_t *transaction; 1510 struct buffer_head *bh = jh2bh(jh); 1511 1512 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 1513 transaction = jh->b_transaction; 1514 if (transaction) 1515 assert_spin_locked(&transaction->t_journal->j_list_lock); 1516 1517 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 1518 if (jh->b_jlist != BJ_None) 1519 J_ASSERT_JH(jh, transaction != NULL); 1520 1521 switch (jh->b_jlist) { 1522 case BJ_None: 1523 return; 1524 case BJ_Metadata: 1525 transaction->t_nr_buffers--; 1526 J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); 1527 list = &transaction->t_buffers; 1528 break; 1529 case BJ_Forget: 1530 list = &transaction->t_forget; 1531 break; 1532 case BJ_IO: 1533 list = &transaction->t_iobuf_list; 1534 break; 1535 case BJ_Shadow: 1536 list = &transaction->t_shadow_list; 1537 break; 1538 case BJ_LogCtl: 1539 list = &transaction->t_log_list; 1540 break; 1541 case BJ_Reserved: 1542 list = &transaction->t_reserved_list; 1543 break; 1544 } 1545 1546 __blist_del_buffer(list, jh); 1547 jh->b_jlist = BJ_None; 1548 if (test_clear_buffer_jbddirty(bh)) 1549 mark_buffer_dirty(bh); /* Expose it to the VM */ 1550 } 1551 1552 void __jbd2_journal_unfile_buffer(struct journal_head *jh) 1553 { 1554 __jbd2_journal_temp_unlink_buffer(jh); 1555 jh->b_transaction = NULL; 1556 } 1557 1558 void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) 1559 { 1560 jbd_lock_bh_state(jh2bh(jh)); 1561 spin_lock(&journal->j_list_lock); 1562 __jbd2_journal_unfile_buffer(jh); 1563 spin_unlock(&journal->j_list_lock); 1564 jbd_unlock_bh_state(jh2bh(jh)); 1565 } 1566 1567 /* 1568 * Called from jbd2_journal_try_to_free_buffers(). 1569 * 1570 * Called under jbd_lock_bh_state(bh) 1571 */ 1572 static void 1573 __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) 1574 { 1575 struct journal_head *jh; 1576 1577 jh = bh2jh(bh); 1578 1579 if (buffer_locked(bh) || buffer_dirty(bh)) 1580 goto out; 1581 1582 if (jh->b_next_transaction != NULL) 1583 goto out; 1584 1585 spin_lock(&journal->j_list_lock); 1586 if (jh->b_cp_transaction != NULL && jh->b_transaction == NULL) { 1587 /* written-back checkpointed metadata buffer */ 1588 if (jh->b_jlist == BJ_None) { 1589 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1590 __jbd2_journal_remove_checkpoint(jh); 1591 jbd2_journal_remove_journal_head(bh); 1592 __brelse(bh); 1593 } 1594 } 1595 spin_unlock(&journal->j_list_lock); 1596 out: 1597 return; 1598 } 1599 1600 /** 1601 * int jbd2_journal_try_to_free_buffers() - try to free page buffers. 1602 * @journal: journal for operation 1603 * @page: to try and free 1604 * @gfp_mask: we use the mask to detect how hard should we try to release 1605 * buffers. If __GFP_WAIT and __GFP_FS is set, we wait for commit code to 1606 * release the buffers. 1607 * 1608 * 1609 * For all the buffers on this page, 1610 * if they are fully written out ordered data, move them onto BUF_CLEAN 1611 * so try_to_free_buffers() can reap them. 1612 * 1613 * This function returns non-zero if we wish try_to_free_buffers() 1614 * to be called. We do this if the page is releasable by try_to_free_buffers(). 1615 * We also do it if the page has locked or dirty buffers and the caller wants 1616 * us to perform sync or async writeout. 1617 * 1618 * This complicates JBD locking somewhat. We aren't protected by the 1619 * BKL here. We wish to remove the buffer from its committing or 1620 * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer. 1621 * 1622 * This may *change* the value of transaction_t->t_datalist, so anyone 1623 * who looks at t_datalist needs to lock against this function. 1624 * 1625 * Even worse, someone may be doing a jbd2_journal_dirty_data on this 1626 * buffer. So we need to lock against that. jbd2_journal_dirty_data() 1627 * will come out of the lock with the buffer dirty, which makes it 1628 * ineligible for release here. 1629 * 1630 * Who else is affected by this? hmm... Really the only contender 1631 * is do_get_write_access() - it could be looking at the buffer while 1632 * journal_try_to_free_buffer() is changing its state. But that 1633 * cannot happen because we never reallocate freed data as metadata 1634 * while the data is part of a transaction. Yes? 1635 * 1636 * Return 0 on failure, 1 on success 1637 */ 1638 int jbd2_journal_try_to_free_buffers(journal_t *journal, 1639 struct page *page, gfp_t gfp_mask) 1640 { 1641 struct buffer_head *head; 1642 struct buffer_head *bh; 1643 int ret = 0; 1644 1645 J_ASSERT(PageLocked(page)); 1646 1647 head = page_buffers(page); 1648 bh = head; 1649 do { 1650 struct journal_head *jh; 1651 1652 /* 1653 * We take our own ref against the journal_head here to avoid 1654 * having to add tons of locking around each instance of 1655 * jbd2_journal_remove_journal_head() and 1656 * jbd2_journal_put_journal_head(). 1657 */ 1658 jh = jbd2_journal_grab_journal_head(bh); 1659 if (!jh) 1660 continue; 1661 1662 jbd_lock_bh_state(bh); 1663 __journal_try_to_free_buffer(journal, bh); 1664 jbd2_journal_put_journal_head(jh); 1665 jbd_unlock_bh_state(bh); 1666 if (buffer_jbd(bh)) 1667 goto busy; 1668 } while ((bh = bh->b_this_page) != head); 1669 1670 ret = try_to_free_buffers(page); 1671 1672 busy: 1673 return ret; 1674 } 1675 1676 /* 1677 * This buffer is no longer needed. If it is on an older transaction's 1678 * checkpoint list we need to record it on this transaction's forget list 1679 * to pin this buffer (and hence its checkpointing transaction) down until 1680 * this transaction commits. If the buffer isn't on a checkpoint list, we 1681 * release it. 1682 * Returns non-zero if JBD no longer has an interest in the buffer. 1683 * 1684 * Called under j_list_lock. 1685 * 1686 * Called under jbd_lock_bh_state(bh). 1687 */ 1688 static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) 1689 { 1690 int may_free = 1; 1691 struct buffer_head *bh = jh2bh(jh); 1692 1693 __jbd2_journal_unfile_buffer(jh); 1694 1695 if (jh->b_cp_transaction) { 1696 JBUFFER_TRACE(jh, "on running+cp transaction"); 1697 /* 1698 * We don't want to write the buffer anymore, clear the 1699 * bit so that we don't confuse checks in 1700 * __journal_file_buffer 1701 */ 1702 clear_buffer_dirty(bh); 1703 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 1704 may_free = 0; 1705 } else { 1706 JBUFFER_TRACE(jh, "on running transaction"); 1707 jbd2_journal_remove_journal_head(bh); 1708 __brelse(bh); 1709 } 1710 return may_free; 1711 } 1712 1713 /* 1714 * jbd2_journal_invalidatepage 1715 * 1716 * This code is tricky. It has a number of cases to deal with. 1717 * 1718 * There are two invariants which this code relies on: 1719 * 1720 * i_size must be updated on disk before we start calling invalidatepage on the 1721 * data. 1722 * 1723 * This is done in ext3 by defining an ext3_setattr method which 1724 * updates i_size before truncate gets going. By maintaining this 1725 * invariant, we can be sure that it is safe to throw away any buffers 1726 * attached to the current transaction: once the transaction commits, 1727 * we know that the data will not be needed. 1728 * 1729 * Note however that we can *not* throw away data belonging to the 1730 * previous, committing transaction! 1731 * 1732 * Any disk blocks which *are* part of the previous, committing 1733 * transaction (and which therefore cannot be discarded immediately) are 1734 * not going to be reused in the new running transaction 1735 * 1736 * The bitmap committed_data images guarantee this: any block which is 1737 * allocated in one transaction and removed in the next will be marked 1738 * as in-use in the committed_data bitmap, so cannot be reused until 1739 * the next transaction to delete the block commits. This means that 1740 * leaving committing buffers dirty is quite safe: the disk blocks 1741 * cannot be reallocated to a different file and so buffer aliasing is 1742 * not possible. 1743 * 1744 * 1745 * The above applies mainly to ordered data mode. In writeback mode we 1746 * don't make guarantees about the order in which data hits disk --- in 1747 * particular we don't guarantee that new dirty data is flushed before 1748 * transaction commit --- so it is always safe just to discard data 1749 * immediately in that mode. --sct 1750 */ 1751 1752 /* 1753 * The journal_unmap_buffer helper function returns zero if the buffer 1754 * concerned remains pinned as an anonymous buffer belonging to an older 1755 * transaction. 1756 * 1757 * We're outside-transaction here. Either or both of j_running_transaction 1758 * and j_committing_transaction may be NULL. 1759 */ 1760 static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) 1761 { 1762 transaction_t *transaction; 1763 struct journal_head *jh; 1764 int may_free = 1; 1765 int ret; 1766 1767 BUFFER_TRACE(bh, "entry"); 1768 1769 /* 1770 * It is safe to proceed here without the j_list_lock because the 1771 * buffers cannot be stolen by try_to_free_buffers as long as we are 1772 * holding the page lock. --sct 1773 */ 1774 1775 if (!buffer_jbd(bh)) 1776 goto zap_buffer_unlocked; 1777 1778 /* OK, we have data buffer in journaled mode */ 1779 write_lock(&journal->j_state_lock); 1780 jbd_lock_bh_state(bh); 1781 spin_lock(&journal->j_list_lock); 1782 1783 jh = jbd2_journal_grab_journal_head(bh); 1784 if (!jh) 1785 goto zap_buffer_no_jh; 1786 1787 /* 1788 * We cannot remove the buffer from checkpoint lists until the 1789 * transaction adding inode to orphan list (let's call it T) 1790 * is committed. Otherwise if the transaction changing the 1791 * buffer would be cleaned from the journal before T is 1792 * committed, a crash will cause that the correct contents of 1793 * the buffer will be lost. On the other hand we have to 1794 * clear the buffer dirty bit at latest at the moment when the 1795 * transaction marking the buffer as freed in the filesystem 1796 * structures is committed because from that moment on the 1797 * buffer can be reallocated and used by a different page. 1798 * Since the block hasn't been freed yet but the inode has 1799 * already been added to orphan list, it is safe for us to add 1800 * the buffer to BJ_Forget list of the newest transaction. 1801 */ 1802 transaction = jh->b_transaction; 1803 if (transaction == NULL) { 1804 /* First case: not on any transaction. If it 1805 * has no checkpoint link, then we can zap it: 1806 * it's a writeback-mode buffer so we don't care 1807 * if it hits disk safely. */ 1808 if (!jh->b_cp_transaction) { 1809 JBUFFER_TRACE(jh, "not on any transaction: zap"); 1810 goto zap_buffer; 1811 } 1812 1813 if (!buffer_dirty(bh)) { 1814 /* bdflush has written it. We can drop it now */ 1815 goto zap_buffer; 1816 } 1817 1818 /* OK, it must be in the journal but still not 1819 * written fully to disk: it's metadata or 1820 * journaled data... */ 1821 1822 if (journal->j_running_transaction) { 1823 /* ... and once the current transaction has 1824 * committed, the buffer won't be needed any 1825 * longer. */ 1826 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); 1827 ret = __dispose_buffer(jh, 1828 journal->j_running_transaction); 1829 jbd2_journal_put_journal_head(jh); 1830 spin_unlock(&journal->j_list_lock); 1831 jbd_unlock_bh_state(bh); 1832 write_unlock(&journal->j_state_lock); 1833 return ret; 1834 } else { 1835 /* There is no currently-running transaction. So the 1836 * orphan record which we wrote for this file must have 1837 * passed into commit. We must attach this buffer to 1838 * the committing transaction, if it exists. */ 1839 if (journal->j_committing_transaction) { 1840 JBUFFER_TRACE(jh, "give to committing trans"); 1841 ret = __dispose_buffer(jh, 1842 journal->j_committing_transaction); 1843 jbd2_journal_put_journal_head(jh); 1844 spin_unlock(&journal->j_list_lock); 1845 jbd_unlock_bh_state(bh); 1846 write_unlock(&journal->j_state_lock); 1847 return ret; 1848 } else { 1849 /* The orphan record's transaction has 1850 * committed. We can cleanse this buffer */ 1851 clear_buffer_jbddirty(bh); 1852 goto zap_buffer; 1853 } 1854 } 1855 } else if (transaction == journal->j_committing_transaction) { 1856 JBUFFER_TRACE(jh, "on committing transaction"); 1857 /* 1858 * The buffer is committing, we simply cannot touch 1859 * it. So we just set j_next_transaction to the 1860 * running transaction (if there is one) and mark 1861 * buffer as freed so that commit code knows it should 1862 * clear dirty bits when it is done with the buffer. 1863 */ 1864 set_buffer_freed(bh); 1865 if (journal->j_running_transaction && buffer_jbddirty(bh)) 1866 jh->b_next_transaction = journal->j_running_transaction; 1867 jbd2_journal_put_journal_head(jh); 1868 spin_unlock(&journal->j_list_lock); 1869 jbd_unlock_bh_state(bh); 1870 write_unlock(&journal->j_state_lock); 1871 return 0; 1872 } else { 1873 /* Good, the buffer belongs to the running transaction. 1874 * We are writing our own transaction's data, not any 1875 * previous one's, so it is safe to throw it away 1876 * (remember that we expect the filesystem to have set 1877 * i_size already for this truncate so recovery will not 1878 * expose the disk blocks we are discarding here.) */ 1879 J_ASSERT_JH(jh, transaction == journal->j_running_transaction); 1880 JBUFFER_TRACE(jh, "on running transaction"); 1881 may_free = __dispose_buffer(jh, transaction); 1882 } 1883 1884 zap_buffer: 1885 jbd2_journal_put_journal_head(jh); 1886 zap_buffer_no_jh: 1887 spin_unlock(&journal->j_list_lock); 1888 jbd_unlock_bh_state(bh); 1889 write_unlock(&journal->j_state_lock); 1890 zap_buffer_unlocked: 1891 clear_buffer_dirty(bh); 1892 J_ASSERT_BH(bh, !buffer_jbddirty(bh)); 1893 clear_buffer_mapped(bh); 1894 clear_buffer_req(bh); 1895 clear_buffer_new(bh); 1896 bh->b_bdev = NULL; 1897 return may_free; 1898 } 1899 1900 /** 1901 * void jbd2_journal_invalidatepage() 1902 * @journal: journal to use for flush... 1903 * @page: page to flush 1904 * @offset: length of page to invalidate. 1905 * 1906 * Reap page buffers containing data after offset in page. 1907 * 1908 */ 1909 void jbd2_journal_invalidatepage(journal_t *journal, 1910 struct page *page, 1911 unsigned long offset) 1912 { 1913 struct buffer_head *head, *bh, *next; 1914 unsigned int curr_off = 0; 1915 int may_free = 1; 1916 1917 if (!PageLocked(page)) 1918 BUG(); 1919 if (!page_has_buffers(page)) 1920 return; 1921 1922 /* We will potentially be playing with lists other than just the 1923 * data lists (especially for journaled data mode), so be 1924 * cautious in our locking. */ 1925 1926 head = bh = page_buffers(page); 1927 do { 1928 unsigned int next_off = curr_off + bh->b_size; 1929 next = bh->b_this_page; 1930 1931 if (offset <= curr_off) { 1932 /* This block is wholly outside the truncation point */ 1933 lock_buffer(bh); 1934 may_free &= journal_unmap_buffer(journal, bh); 1935 unlock_buffer(bh); 1936 } 1937 curr_off = next_off; 1938 bh = next; 1939 1940 } while (bh != head); 1941 1942 if (!offset) { 1943 if (may_free && try_to_free_buffers(page)) 1944 J_ASSERT(!page_has_buffers(page)); 1945 } 1946 } 1947 1948 /* 1949 * File a buffer on the given transaction list. 1950 */ 1951 void __jbd2_journal_file_buffer(struct journal_head *jh, 1952 transaction_t *transaction, int jlist) 1953 { 1954 struct journal_head **list = NULL; 1955 int was_dirty = 0; 1956 struct buffer_head *bh = jh2bh(jh); 1957 1958 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 1959 assert_spin_locked(&transaction->t_journal->j_list_lock); 1960 1961 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 1962 J_ASSERT_JH(jh, jh->b_transaction == transaction || 1963 jh->b_transaction == NULL); 1964 1965 if (jh->b_transaction && jh->b_jlist == jlist) 1966 return; 1967 1968 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 1969 jlist == BJ_Shadow || jlist == BJ_Forget) { 1970 /* 1971 * For metadata buffers, we track dirty bit in buffer_jbddirty 1972 * instead of buffer_dirty. We should not see a dirty bit set 1973 * here because we clear it in do_get_write_access but e.g. 1974 * tune2fs can modify the sb and set the dirty bit at any time 1975 * so we try to gracefully handle that. 1976 */ 1977 if (buffer_dirty(bh)) 1978 warn_dirty_buffer(bh); 1979 if (test_clear_buffer_dirty(bh) || 1980 test_clear_buffer_jbddirty(bh)) 1981 was_dirty = 1; 1982 } 1983 1984 if (jh->b_transaction) 1985 __jbd2_journal_temp_unlink_buffer(jh); 1986 jh->b_transaction = transaction; 1987 1988 switch (jlist) { 1989 case BJ_None: 1990 J_ASSERT_JH(jh, !jh->b_committed_data); 1991 J_ASSERT_JH(jh, !jh->b_frozen_data); 1992 return; 1993 case BJ_Metadata: 1994 transaction->t_nr_buffers++; 1995 list = &transaction->t_buffers; 1996 break; 1997 case BJ_Forget: 1998 list = &transaction->t_forget; 1999 break; 2000 case BJ_IO: 2001 list = &transaction->t_iobuf_list; 2002 break; 2003 case BJ_Shadow: 2004 list = &transaction->t_shadow_list; 2005 break; 2006 case BJ_LogCtl: 2007 list = &transaction->t_log_list; 2008 break; 2009 case BJ_Reserved: 2010 list = &transaction->t_reserved_list; 2011 break; 2012 } 2013 2014 __blist_add_buffer(list, jh); 2015 jh->b_jlist = jlist; 2016 2017 if (was_dirty) 2018 set_buffer_jbddirty(bh); 2019 } 2020 2021 void jbd2_journal_file_buffer(struct journal_head *jh, 2022 transaction_t *transaction, int jlist) 2023 { 2024 jbd_lock_bh_state(jh2bh(jh)); 2025 spin_lock(&transaction->t_journal->j_list_lock); 2026 __jbd2_journal_file_buffer(jh, transaction, jlist); 2027 spin_unlock(&transaction->t_journal->j_list_lock); 2028 jbd_unlock_bh_state(jh2bh(jh)); 2029 } 2030 2031 /* 2032 * Remove a buffer from its current buffer list in preparation for 2033 * dropping it from its current transaction entirely. If the buffer has 2034 * already started to be used by a subsequent transaction, refile the 2035 * buffer on that transaction's metadata list. 2036 * 2037 * Called under journal->j_list_lock 2038 * 2039 * Called under jbd_lock_bh_state(jh2bh(jh)) 2040 */ 2041 void __jbd2_journal_refile_buffer(struct journal_head *jh) 2042 { 2043 int was_dirty, jlist; 2044 struct buffer_head *bh = jh2bh(jh); 2045 2046 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 2047 if (jh->b_transaction) 2048 assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); 2049 2050 /* If the buffer is now unused, just drop it. */ 2051 if (jh->b_next_transaction == NULL) { 2052 __jbd2_journal_unfile_buffer(jh); 2053 return; 2054 } 2055 2056 /* 2057 * It has been modified by a later transaction: add it to the new 2058 * transaction's metadata list. 2059 */ 2060 2061 was_dirty = test_clear_buffer_jbddirty(bh); 2062 __jbd2_journal_temp_unlink_buffer(jh); 2063 jh->b_transaction = jh->b_next_transaction; 2064 jh->b_next_transaction = NULL; 2065 if (buffer_freed(bh)) 2066 jlist = BJ_Forget; 2067 else if (jh->b_modified) 2068 jlist = BJ_Metadata; 2069 else 2070 jlist = BJ_Reserved; 2071 __jbd2_journal_file_buffer(jh, jh->b_transaction, jlist); 2072 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2073 2074 if (was_dirty) 2075 set_buffer_jbddirty(bh); 2076 } 2077 2078 /* 2079 * For the unlocked version of this call, also make sure that any 2080 * hanging journal_head is cleaned up if necessary. 2081 * 2082 * __jbd2_journal_refile_buffer is usually called as part of a single locked 2083 * operation on a buffer_head, in which the caller is probably going to 2084 * be hooking the journal_head onto other lists. In that case it is up 2085 * to the caller to remove the journal_head if necessary. For the 2086 * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be 2087 * doing anything else to the buffer so we need to do the cleanup 2088 * ourselves to avoid a jh leak. 2089 * 2090 * *** The journal_head may be freed by this call! *** 2091 */ 2092 void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) 2093 { 2094 struct buffer_head *bh = jh2bh(jh); 2095 2096 jbd_lock_bh_state(bh); 2097 spin_lock(&journal->j_list_lock); 2098 2099 __jbd2_journal_refile_buffer(jh); 2100 jbd_unlock_bh_state(bh); 2101 jbd2_journal_remove_journal_head(bh); 2102 2103 spin_unlock(&journal->j_list_lock); 2104 __brelse(bh); 2105 } 2106 2107 /* 2108 * File inode in the inode list of the handle's transaction 2109 */ 2110 int jbd2_journal_file_inode(handle_t *handle, struct jbd2_inode *jinode) 2111 { 2112 transaction_t *transaction = handle->h_transaction; 2113 journal_t *journal = transaction->t_journal; 2114 2115 if (is_handle_aborted(handle)) 2116 return -EIO; 2117 2118 jbd_debug(4, "Adding inode %lu, tid:%d\n", jinode->i_vfs_inode->i_ino, 2119 transaction->t_tid); 2120 2121 /* 2122 * First check whether inode isn't already on the transaction's 2123 * lists without taking the lock. Note that this check is safe 2124 * without the lock as we cannot race with somebody removing inode 2125 * from the transaction. The reason is that we remove inode from the 2126 * transaction only in journal_release_jbd_inode() and when we commit 2127 * the transaction. We are guarded from the first case by holding 2128 * a reference to the inode. We are safe against the second case 2129 * because if jinode->i_transaction == transaction, commit code 2130 * cannot touch the transaction because we hold reference to it, 2131 * and if jinode->i_next_transaction == transaction, commit code 2132 * will only file the inode where we want it. 2133 */ 2134 if (jinode->i_transaction == transaction || 2135 jinode->i_next_transaction == transaction) 2136 return 0; 2137 2138 spin_lock(&journal->j_list_lock); 2139 2140 if (jinode->i_transaction == transaction || 2141 jinode->i_next_transaction == transaction) 2142 goto done; 2143 2144 /* On some different transaction's list - should be 2145 * the committing one */ 2146 if (jinode->i_transaction) { 2147 J_ASSERT(jinode->i_next_transaction == NULL); 2148 J_ASSERT(jinode->i_transaction == 2149 journal->j_committing_transaction); 2150 jinode->i_next_transaction = transaction; 2151 goto done; 2152 } 2153 /* Not on any transaction list... */ 2154 J_ASSERT(!jinode->i_next_transaction); 2155 jinode->i_transaction = transaction; 2156 list_add(&jinode->i_list, &transaction->t_inode_list); 2157 done: 2158 spin_unlock(&journal->j_list_lock); 2159 2160 return 0; 2161 } 2162 2163 /* 2164 * File truncate and transaction commit interact with each other in a 2165 * non-trivial way. If a transaction writing data block A is 2166 * committing, we cannot discard the data by truncate until we have 2167 * written them. Otherwise if we crashed after the transaction with 2168 * write has committed but before the transaction with truncate has 2169 * committed, we could see stale data in block A. This function is a 2170 * helper to solve this problem. It starts writeout of the truncated 2171 * part in case it is in the committing transaction. 2172 * 2173 * Filesystem code must call this function when inode is journaled in 2174 * ordered mode before truncation happens and after the inode has been 2175 * placed on orphan list with the new inode size. The second condition 2176 * avoids the race that someone writes new data and we start 2177 * committing the transaction after this function has been called but 2178 * before a transaction for truncate is started (and furthermore it 2179 * allows us to optimize the case where the addition to orphan list 2180 * happens in the same transaction as write --- we don't have to write 2181 * any data in such case). 2182 */ 2183 int jbd2_journal_begin_ordered_truncate(journal_t *journal, 2184 struct jbd2_inode *jinode, 2185 loff_t new_size) 2186 { 2187 transaction_t *inode_trans, *commit_trans; 2188 int ret = 0; 2189 2190 /* This is a quick check to avoid locking if not necessary */ 2191 if (!jinode->i_transaction) 2192 goto out; 2193 /* Locks are here just to force reading of recent values, it is 2194 * enough that the transaction was not committing before we started 2195 * a transaction adding the inode to orphan list */ 2196 read_lock(&journal->j_state_lock); 2197 commit_trans = journal->j_committing_transaction; 2198 read_unlock(&journal->j_state_lock); 2199 spin_lock(&journal->j_list_lock); 2200 inode_trans = jinode->i_transaction; 2201 spin_unlock(&journal->j_list_lock); 2202 if (inode_trans == commit_trans) { 2203 ret = filemap_fdatawrite_range(jinode->i_vfs_inode->i_mapping, 2204 new_size, LLONG_MAX); 2205 if (ret) 2206 jbd2_journal_abort(journal, ret); 2207 } 2208 out: 2209 return ret; 2210 } 2211