1 /* 2 * linux/fs/jbd2/transaction.c 3 * 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 5 * 6 * Copyright 1998 Red Hat corp --- All Rights Reserved 7 * 8 * This file is part of the Linux kernel and is made available under 9 * the terms of the GNU General Public License, version 2, or at your 10 * option, any later version, incorporated herein by reference. 11 * 12 * Generic filesystem transaction handling code; part of the ext2fs 13 * journaling system. 14 * 15 * This file manages transactions (compound commits managed by the 16 * journaling code) and handles (individual atomic operations by the 17 * filesystem). 18 */ 19 20 #include <linux/time.h> 21 #include <linux/fs.h> 22 #include <linux/jbd2.h> 23 #include <linux/errno.h> 24 #include <linux/slab.h> 25 #include <linux/timer.h> 26 #include <linux/mm.h> 27 #include <linux/highmem.h> 28 29 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh); 30 31 /* 32 * jbd2_get_transaction: obtain a new transaction_t object. 33 * 34 * Simply allocate and initialise a new transaction. Create it in 35 * RUNNING state and add it to the current journal (which should not 36 * have an existing running transaction: we only make a new transaction 37 * once we have started to commit the old one). 38 * 39 * Preconditions: 40 * The journal MUST be locked. We don't perform atomic mallocs on the 41 * new transaction and we can't block without protecting against other 42 * processes trying to touch the journal while it is in transition. 43 * 44 * Called under j_state_lock 45 */ 46 47 static transaction_t * 48 jbd2_get_transaction(journal_t *journal, transaction_t *transaction) 49 { 50 transaction->t_journal = journal; 51 transaction->t_state = T_RUNNING; 52 transaction->t_tid = journal->j_transaction_sequence++; 53 transaction->t_expires = jiffies + journal->j_commit_interval; 54 spin_lock_init(&transaction->t_handle_lock); 55 56 /* Set up the commit timer for the new transaction. */ 57 journal->j_commit_timer.expires = transaction->t_expires; 58 add_timer(&journal->j_commit_timer); 59 60 J_ASSERT(journal->j_running_transaction == NULL); 61 journal->j_running_transaction = transaction; 62 63 return transaction; 64 } 65 66 /* 67 * Handle management. 68 * 69 * A handle_t is an object which represents a single atomic update to a 70 * filesystem, and which tracks all of the modifications which form part 71 * of that one update. 72 */ 73 74 /* 75 * start_this_handle: Given a handle, deal with any locking or stalling 76 * needed to make sure that there is enough journal space for the handle 77 * to begin. Attach the handle to a transaction and set up the 78 * transaction's buffer credits. 79 */ 80 81 static int start_this_handle(journal_t *journal, handle_t *handle) 82 { 83 transaction_t *transaction; 84 int needed; 85 int nblocks = handle->h_buffer_credits; 86 transaction_t *new_transaction = NULL; 87 int ret = 0; 88 89 if (nblocks > journal->j_max_transaction_buffers) { 90 printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n", 91 current->comm, nblocks, 92 journal->j_max_transaction_buffers); 93 ret = -ENOSPC; 94 goto out; 95 } 96 97 alloc_transaction: 98 if (!journal->j_running_transaction) { 99 new_transaction = kzalloc(sizeof(*new_transaction), 100 GFP_NOFS|__GFP_NOFAIL); 101 if (!new_transaction) { 102 ret = -ENOMEM; 103 goto out; 104 } 105 } 106 107 jbd_debug(3, "New handle %p going live.\n", handle); 108 109 repeat: 110 111 /* 112 * We need to hold j_state_lock until t_updates has been incremented, 113 * for proper journal barrier handling 114 */ 115 spin_lock(&journal->j_state_lock); 116 repeat_locked: 117 if (is_journal_aborted(journal) || 118 (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) { 119 spin_unlock(&journal->j_state_lock); 120 ret = -EROFS; 121 goto out; 122 } 123 124 /* Wait on the journal's transaction barrier if necessary */ 125 if (journal->j_barrier_count) { 126 spin_unlock(&journal->j_state_lock); 127 wait_event(journal->j_wait_transaction_locked, 128 journal->j_barrier_count == 0); 129 goto repeat; 130 } 131 132 if (!journal->j_running_transaction) { 133 if (!new_transaction) { 134 spin_unlock(&journal->j_state_lock); 135 goto alloc_transaction; 136 } 137 jbd2_get_transaction(journal, new_transaction); 138 new_transaction = NULL; 139 } 140 141 transaction = journal->j_running_transaction; 142 143 /* 144 * If the current transaction is locked down for commit, wait for the 145 * lock to be released. 146 */ 147 if (transaction->t_state == T_LOCKED) { 148 DEFINE_WAIT(wait); 149 150 prepare_to_wait(&journal->j_wait_transaction_locked, 151 &wait, TASK_UNINTERRUPTIBLE); 152 spin_unlock(&journal->j_state_lock); 153 schedule(); 154 finish_wait(&journal->j_wait_transaction_locked, &wait); 155 goto repeat; 156 } 157 158 /* 159 * If there is not enough space left in the log to write all potential 160 * buffers requested by this operation, we need to stall pending a log 161 * checkpoint to free some more log space. 162 */ 163 spin_lock(&transaction->t_handle_lock); 164 needed = transaction->t_outstanding_credits + nblocks; 165 166 if (needed > journal->j_max_transaction_buffers) { 167 /* 168 * If the current transaction is already too large, then start 169 * to commit it: we can then go back and attach this handle to 170 * a new transaction. 171 */ 172 DEFINE_WAIT(wait); 173 174 jbd_debug(2, "Handle %p starting new commit...\n", handle); 175 spin_unlock(&transaction->t_handle_lock); 176 prepare_to_wait(&journal->j_wait_transaction_locked, &wait, 177 TASK_UNINTERRUPTIBLE); 178 __jbd2_log_start_commit(journal, transaction->t_tid); 179 spin_unlock(&journal->j_state_lock); 180 schedule(); 181 finish_wait(&journal->j_wait_transaction_locked, &wait); 182 goto repeat; 183 } 184 185 /* 186 * The commit code assumes that it can get enough log space 187 * without forcing a checkpoint. This is *critical* for 188 * correctness: a checkpoint of a buffer which is also 189 * associated with a committing transaction creates a deadlock, 190 * so commit simply cannot force through checkpoints. 191 * 192 * We must therefore ensure the necessary space in the journal 193 * *before* starting to dirty potentially checkpointed buffers 194 * in the new transaction. 195 * 196 * The worst part is, any transaction currently committing can 197 * reduce the free space arbitrarily. Be careful to account for 198 * those buffers when checkpointing. 199 */ 200 201 /* 202 * @@@ AKPM: This seems rather over-defensive. We're giving commit 203 * a _lot_ of headroom: 1/4 of the journal plus the size of 204 * the committing transaction. Really, we only need to give it 205 * committing_transaction->t_outstanding_credits plus "enough" for 206 * the log control blocks. 207 * Also, this test is inconsitent with the matching one in 208 * jbd2_journal_extend(). 209 */ 210 if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) { 211 jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle); 212 spin_unlock(&transaction->t_handle_lock); 213 __jbd2_log_wait_for_space(journal); 214 goto repeat_locked; 215 } 216 217 /* OK, account for the buffers that this operation expects to 218 * use and add the handle to the running transaction. */ 219 220 handle->h_transaction = transaction; 221 transaction->t_outstanding_credits += nblocks; 222 transaction->t_updates++; 223 transaction->t_handle_count++; 224 jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n", 225 handle, nblocks, transaction->t_outstanding_credits, 226 __jbd2_log_space_left(journal)); 227 spin_unlock(&transaction->t_handle_lock); 228 spin_unlock(&journal->j_state_lock); 229 out: 230 if (unlikely(new_transaction)) /* It's usually NULL */ 231 kfree(new_transaction); 232 return ret; 233 } 234 235 /* Allocate a new handle. This should probably be in a slab... */ 236 static handle_t *new_handle(int nblocks) 237 { 238 handle_t *handle = jbd2_alloc_handle(GFP_NOFS); 239 if (!handle) 240 return NULL; 241 memset(handle, 0, sizeof(*handle)); 242 handle->h_buffer_credits = nblocks; 243 handle->h_ref = 1; 244 245 return handle; 246 } 247 248 /** 249 * handle_t *jbd2_journal_start() - Obtain a new handle. 250 * @journal: Journal to start transaction on. 251 * @nblocks: number of block buffer we might modify 252 * 253 * We make sure that the transaction can guarantee at least nblocks of 254 * modified buffers in the log. We block until the log can guarantee 255 * that much space. 256 * 257 * This function is visible to journal users (like ext3fs), so is not 258 * called with the journal already locked. 259 * 260 * Return a pointer to a newly allocated handle, or NULL on failure 261 */ 262 handle_t *jbd2_journal_start(journal_t *journal, int nblocks) 263 { 264 handle_t *handle = journal_current_handle(); 265 int err; 266 267 if (!journal) 268 return ERR_PTR(-EROFS); 269 270 if (handle) { 271 J_ASSERT(handle->h_transaction->t_journal == journal); 272 handle->h_ref++; 273 return handle; 274 } 275 276 handle = new_handle(nblocks); 277 if (!handle) 278 return ERR_PTR(-ENOMEM); 279 280 current->journal_info = handle; 281 282 err = start_this_handle(journal, handle); 283 if (err < 0) { 284 jbd2_free_handle(handle); 285 current->journal_info = NULL; 286 handle = ERR_PTR(err); 287 } 288 return handle; 289 } 290 291 /** 292 * int jbd2_journal_extend() - extend buffer credits. 293 * @handle: handle to 'extend' 294 * @nblocks: nr blocks to try to extend by. 295 * 296 * Some transactions, such as large extends and truncates, can be done 297 * atomically all at once or in several stages. The operation requests 298 * a credit for a number of buffer modications in advance, but can 299 * extend its credit if it needs more. 300 * 301 * jbd2_journal_extend tries to give the running handle more buffer credits. 302 * It does not guarantee that allocation - this is a best-effort only. 303 * The calling process MUST be able to deal cleanly with a failure to 304 * extend here. 305 * 306 * Return 0 on success, non-zero on failure. 307 * 308 * return code < 0 implies an error 309 * return code > 0 implies normal transaction-full status. 310 */ 311 int jbd2_journal_extend(handle_t *handle, int nblocks) 312 { 313 transaction_t *transaction = handle->h_transaction; 314 journal_t *journal = transaction->t_journal; 315 int result; 316 int wanted; 317 318 result = -EIO; 319 if (is_handle_aborted(handle)) 320 goto out; 321 322 result = 1; 323 324 spin_lock(&journal->j_state_lock); 325 326 /* Don't extend a locked-down transaction! */ 327 if (handle->h_transaction->t_state != T_RUNNING) { 328 jbd_debug(3, "denied handle %p %d blocks: " 329 "transaction not running\n", handle, nblocks); 330 goto error_out; 331 } 332 333 spin_lock(&transaction->t_handle_lock); 334 wanted = transaction->t_outstanding_credits + nblocks; 335 336 if (wanted > journal->j_max_transaction_buffers) { 337 jbd_debug(3, "denied handle %p %d blocks: " 338 "transaction too large\n", handle, nblocks); 339 goto unlock; 340 } 341 342 if (wanted > __jbd2_log_space_left(journal)) { 343 jbd_debug(3, "denied handle %p %d blocks: " 344 "insufficient log space\n", handle, nblocks); 345 goto unlock; 346 } 347 348 handle->h_buffer_credits += nblocks; 349 transaction->t_outstanding_credits += nblocks; 350 result = 0; 351 352 jbd_debug(3, "extended handle %p by %d\n", handle, nblocks); 353 unlock: 354 spin_unlock(&transaction->t_handle_lock); 355 error_out: 356 spin_unlock(&journal->j_state_lock); 357 out: 358 return result; 359 } 360 361 362 /** 363 * int jbd2_journal_restart() - restart a handle . 364 * @handle: handle to restart 365 * @nblocks: nr credits requested 366 * 367 * Restart a handle for a multi-transaction filesystem 368 * operation. 369 * 370 * If the jbd2_journal_extend() call above fails to grant new buffer credits 371 * to a running handle, a call to jbd2_journal_restart will commit the 372 * handle's transaction so far and reattach the handle to a new 373 * transaction capabable of guaranteeing the requested number of 374 * credits. 375 */ 376 377 int jbd2_journal_restart(handle_t *handle, int nblocks) 378 { 379 transaction_t *transaction = handle->h_transaction; 380 journal_t *journal = transaction->t_journal; 381 int ret; 382 383 /* If we've had an abort of any type, don't even think about 384 * actually doing the restart! */ 385 if (is_handle_aborted(handle)) 386 return 0; 387 388 /* 389 * First unlink the handle from its current transaction, and start the 390 * commit on that. 391 */ 392 J_ASSERT(transaction->t_updates > 0); 393 J_ASSERT(journal_current_handle() == handle); 394 395 spin_lock(&journal->j_state_lock); 396 spin_lock(&transaction->t_handle_lock); 397 transaction->t_outstanding_credits -= handle->h_buffer_credits; 398 transaction->t_updates--; 399 400 if (!transaction->t_updates) 401 wake_up(&journal->j_wait_updates); 402 spin_unlock(&transaction->t_handle_lock); 403 404 jbd_debug(2, "restarting handle %p\n", handle); 405 __jbd2_log_start_commit(journal, transaction->t_tid); 406 spin_unlock(&journal->j_state_lock); 407 408 handle->h_buffer_credits = nblocks; 409 ret = start_this_handle(journal, handle); 410 return ret; 411 } 412 413 414 /** 415 * void jbd2_journal_lock_updates () - establish a transaction barrier. 416 * @journal: Journal to establish a barrier on. 417 * 418 * This locks out any further updates from being started, and blocks 419 * until all existing updates have completed, returning only once the 420 * journal is in a quiescent state with no updates running. 421 * 422 * The journal lock should not be held on entry. 423 */ 424 void jbd2_journal_lock_updates(journal_t *journal) 425 { 426 DEFINE_WAIT(wait); 427 428 spin_lock(&journal->j_state_lock); 429 ++journal->j_barrier_count; 430 431 /* Wait until there are no running updates */ 432 while (1) { 433 transaction_t *transaction = journal->j_running_transaction; 434 435 if (!transaction) 436 break; 437 438 spin_lock(&transaction->t_handle_lock); 439 if (!transaction->t_updates) { 440 spin_unlock(&transaction->t_handle_lock); 441 break; 442 } 443 prepare_to_wait(&journal->j_wait_updates, &wait, 444 TASK_UNINTERRUPTIBLE); 445 spin_unlock(&transaction->t_handle_lock); 446 spin_unlock(&journal->j_state_lock); 447 schedule(); 448 finish_wait(&journal->j_wait_updates, &wait); 449 spin_lock(&journal->j_state_lock); 450 } 451 spin_unlock(&journal->j_state_lock); 452 453 /* 454 * We have now established a barrier against other normal updates, but 455 * we also need to barrier against other jbd2_journal_lock_updates() calls 456 * to make sure that we serialise special journal-locked operations 457 * too. 458 */ 459 mutex_lock(&journal->j_barrier); 460 } 461 462 /** 463 * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier 464 * @journal: Journal to release the barrier on. 465 * 466 * Release a transaction barrier obtained with jbd2_journal_lock_updates(). 467 * 468 * Should be called without the journal lock held. 469 */ 470 void jbd2_journal_unlock_updates (journal_t *journal) 471 { 472 J_ASSERT(journal->j_barrier_count != 0); 473 474 mutex_unlock(&journal->j_barrier); 475 spin_lock(&journal->j_state_lock); 476 --journal->j_barrier_count; 477 spin_unlock(&journal->j_state_lock); 478 wake_up(&journal->j_wait_transaction_locked); 479 } 480 481 /* 482 * Report any unexpected dirty buffers which turn up. Normally those 483 * indicate an error, but they can occur if the user is running (say) 484 * tune2fs to modify the live filesystem, so we need the option of 485 * continuing as gracefully as possible. # 486 * 487 * The caller should already hold the journal lock and 488 * j_list_lock spinlock: most callers will need those anyway 489 * in order to probe the buffer's journaling state safely. 490 */ 491 static void jbd_unexpected_dirty_buffer(struct journal_head *jh) 492 { 493 int jlist; 494 495 /* If this buffer is one which might reasonably be dirty 496 * --- ie. data, or not part of this journal --- then 497 * we're OK to leave it alone, but otherwise we need to 498 * move the dirty bit to the journal's own internal 499 * JBDDirty bit. */ 500 jlist = jh->b_jlist; 501 502 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 503 jlist == BJ_Shadow || jlist == BJ_Forget) { 504 struct buffer_head *bh = jh2bh(jh); 505 506 if (test_clear_buffer_dirty(bh)) 507 set_buffer_jbddirty(bh); 508 } 509 } 510 511 /* 512 * If the buffer is already part of the current transaction, then there 513 * is nothing we need to do. If it is already part of a prior 514 * transaction which we are still committing to disk, then we need to 515 * make sure that we do not overwrite the old copy: we do copy-out to 516 * preserve the copy going to disk. We also account the buffer against 517 * the handle's metadata buffer credits (unless the buffer is already 518 * part of the transaction, that is). 519 * 520 */ 521 static int 522 do_get_write_access(handle_t *handle, struct journal_head *jh, 523 int force_copy) 524 { 525 struct buffer_head *bh; 526 transaction_t *transaction; 527 journal_t *journal; 528 int error; 529 char *frozen_buffer = NULL; 530 int need_copy = 0; 531 532 if (is_handle_aborted(handle)) 533 return -EROFS; 534 535 transaction = handle->h_transaction; 536 journal = transaction->t_journal; 537 538 jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy); 539 540 JBUFFER_TRACE(jh, "entry"); 541 repeat: 542 bh = jh2bh(jh); 543 544 /* @@@ Need to check for errors here at some point. */ 545 546 lock_buffer(bh); 547 jbd_lock_bh_state(bh); 548 549 /* We now hold the buffer lock so it is safe to query the buffer 550 * state. Is the buffer dirty? 551 * 552 * If so, there are two possibilities. The buffer may be 553 * non-journaled, and undergoing a quite legitimate writeback. 554 * Otherwise, it is journaled, and we don't expect dirty buffers 555 * in that state (the buffers should be marked JBD_Dirty 556 * instead.) So either the IO is being done under our own 557 * control and this is a bug, or it's a third party IO such as 558 * dump(8) (which may leave the buffer scheduled for read --- 559 * ie. locked but not dirty) or tune2fs (which may actually have 560 * the buffer dirtied, ugh.) */ 561 562 if (buffer_dirty(bh)) { 563 /* 564 * First question: is this buffer already part of the current 565 * transaction or the existing committing transaction? 566 */ 567 if (jh->b_transaction) { 568 J_ASSERT_JH(jh, 569 jh->b_transaction == transaction || 570 jh->b_transaction == 571 journal->j_committing_transaction); 572 if (jh->b_next_transaction) 573 J_ASSERT_JH(jh, jh->b_next_transaction == 574 transaction); 575 } 576 /* 577 * In any case we need to clean the dirty flag and we must 578 * do it under the buffer lock to be sure we don't race 579 * with running write-out. 580 */ 581 JBUFFER_TRACE(jh, "Unexpected dirty buffer"); 582 jbd_unexpected_dirty_buffer(jh); 583 } 584 585 unlock_buffer(bh); 586 587 error = -EROFS; 588 if (is_handle_aborted(handle)) { 589 jbd_unlock_bh_state(bh); 590 goto out; 591 } 592 error = 0; 593 594 /* 595 * The buffer is already part of this transaction if b_transaction or 596 * b_next_transaction points to it 597 */ 598 if (jh->b_transaction == transaction || 599 jh->b_next_transaction == transaction) 600 goto done; 601 602 /* 603 * If there is already a copy-out version of this buffer, then we don't 604 * need to make another one 605 */ 606 if (jh->b_frozen_data) { 607 JBUFFER_TRACE(jh, "has frozen data"); 608 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 609 jh->b_next_transaction = transaction; 610 goto done; 611 } 612 613 /* Is there data here we need to preserve? */ 614 615 if (jh->b_transaction && jh->b_transaction != transaction) { 616 JBUFFER_TRACE(jh, "owned by older transaction"); 617 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 618 J_ASSERT_JH(jh, jh->b_transaction == 619 journal->j_committing_transaction); 620 621 /* There is one case we have to be very careful about. 622 * If the committing transaction is currently writing 623 * this buffer out to disk and has NOT made a copy-out, 624 * then we cannot modify the buffer contents at all 625 * right now. The essence of copy-out is that it is the 626 * extra copy, not the primary copy, which gets 627 * journaled. If the primary copy is already going to 628 * disk then we cannot do copy-out here. */ 629 630 if (jh->b_jlist == BJ_Shadow) { 631 DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow); 632 wait_queue_head_t *wqh; 633 634 wqh = bit_waitqueue(&bh->b_state, BH_Unshadow); 635 636 JBUFFER_TRACE(jh, "on shadow: sleep"); 637 jbd_unlock_bh_state(bh); 638 /* commit wakes up all shadow buffers after IO */ 639 for ( ; ; ) { 640 prepare_to_wait(wqh, &wait.wait, 641 TASK_UNINTERRUPTIBLE); 642 if (jh->b_jlist != BJ_Shadow) 643 break; 644 schedule(); 645 } 646 finish_wait(wqh, &wait.wait); 647 goto repeat; 648 } 649 650 /* Only do the copy if the currently-owning transaction 651 * still needs it. If it is on the Forget list, the 652 * committing transaction is past that stage. The 653 * buffer had better remain locked during the kmalloc, 654 * but that should be true --- we hold the journal lock 655 * still and the buffer is already on the BUF_JOURNAL 656 * list so won't be flushed. 657 * 658 * Subtle point, though: if this is a get_undo_access, 659 * then we will be relying on the frozen_data to contain 660 * the new value of the committed_data record after the 661 * transaction, so we HAVE to force the frozen_data copy 662 * in that case. */ 663 664 if (jh->b_jlist != BJ_Forget || force_copy) { 665 JBUFFER_TRACE(jh, "generate frozen data"); 666 if (!frozen_buffer) { 667 JBUFFER_TRACE(jh, "allocate memory for buffer"); 668 jbd_unlock_bh_state(bh); 669 frozen_buffer = 670 jbd2_alloc(jh2bh(jh)->b_size, 671 GFP_NOFS); 672 if (!frozen_buffer) { 673 printk(KERN_EMERG 674 "%s: OOM for frozen_buffer\n", 675 __FUNCTION__); 676 JBUFFER_TRACE(jh, "oom!"); 677 error = -ENOMEM; 678 jbd_lock_bh_state(bh); 679 goto done; 680 } 681 goto repeat; 682 } 683 jh->b_frozen_data = frozen_buffer; 684 frozen_buffer = NULL; 685 need_copy = 1; 686 } 687 jh->b_next_transaction = transaction; 688 } 689 690 691 /* 692 * Finally, if the buffer is not journaled right now, we need to make 693 * sure it doesn't get written to disk before the caller actually 694 * commits the new data 695 */ 696 if (!jh->b_transaction) { 697 JBUFFER_TRACE(jh, "no transaction"); 698 J_ASSERT_JH(jh, !jh->b_next_transaction); 699 jh->b_transaction = transaction; 700 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 701 spin_lock(&journal->j_list_lock); 702 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 703 spin_unlock(&journal->j_list_lock); 704 } 705 706 done: 707 if (need_copy) { 708 struct page *page; 709 int offset; 710 char *source; 711 712 J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)), 713 "Possible IO failure.\n"); 714 page = jh2bh(jh)->b_page; 715 offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK; 716 source = kmap_atomic(page, KM_USER0); 717 memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size); 718 kunmap_atomic(source, KM_USER0); 719 } 720 jbd_unlock_bh_state(bh); 721 722 /* 723 * If we are about to journal a buffer, then any revoke pending on it is 724 * no longer valid 725 */ 726 jbd2_journal_cancel_revoke(handle, jh); 727 728 out: 729 if (unlikely(frozen_buffer)) /* It's usually NULL */ 730 jbd2_free(frozen_buffer, bh->b_size); 731 732 JBUFFER_TRACE(jh, "exit"); 733 return error; 734 } 735 736 /** 737 * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update. 738 * @handle: transaction to add buffer modifications to 739 * @bh: bh to be used for metadata writes 740 * @credits: variable that will receive credits for the buffer 741 * 742 * Returns an error code or 0 on success. 743 * 744 * In full data journalling mode the buffer may be of type BJ_AsyncData, 745 * because we're write()ing a buffer which is also part of a shared mapping. 746 */ 747 748 int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) 749 { 750 struct journal_head *jh = jbd2_journal_add_journal_head(bh); 751 int rc; 752 753 /* We do not want to get caught playing with fields which the 754 * log thread also manipulates. Make sure that the buffer 755 * completes any outstanding IO before proceeding. */ 756 rc = do_get_write_access(handle, jh, 0); 757 jbd2_journal_put_journal_head(jh); 758 return rc; 759 } 760 761 762 /* 763 * When the user wants to journal a newly created buffer_head 764 * (ie. getblk() returned a new buffer and we are going to populate it 765 * manually rather than reading off disk), then we need to keep the 766 * buffer_head locked until it has been completely filled with new 767 * data. In this case, we should be able to make the assertion that 768 * the bh is not already part of an existing transaction. 769 * 770 * The buffer should already be locked by the caller by this point. 771 * There is no lock ranking violation: it was a newly created, 772 * unlocked buffer beforehand. */ 773 774 /** 775 * int jbd2_journal_get_create_access () - notify intent to use newly created bh 776 * @handle: transaction to new buffer to 777 * @bh: new buffer. 778 * 779 * Call this if you create a new bh. 780 */ 781 int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh) 782 { 783 transaction_t *transaction = handle->h_transaction; 784 journal_t *journal = transaction->t_journal; 785 struct journal_head *jh = jbd2_journal_add_journal_head(bh); 786 int err; 787 788 jbd_debug(5, "journal_head %p\n", jh); 789 err = -EROFS; 790 if (is_handle_aborted(handle)) 791 goto out; 792 err = 0; 793 794 JBUFFER_TRACE(jh, "entry"); 795 /* 796 * The buffer may already belong to this transaction due to pre-zeroing 797 * in the filesystem's new_block code. It may also be on the previous, 798 * committing transaction's lists, but it HAS to be in Forget state in 799 * that case: the transaction must have deleted the buffer for it to be 800 * reused here. 801 */ 802 jbd_lock_bh_state(bh); 803 spin_lock(&journal->j_list_lock); 804 J_ASSERT_JH(jh, (jh->b_transaction == transaction || 805 jh->b_transaction == NULL || 806 (jh->b_transaction == journal->j_committing_transaction && 807 jh->b_jlist == BJ_Forget))); 808 809 J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 810 J_ASSERT_JH(jh, buffer_locked(jh2bh(jh))); 811 812 if (jh->b_transaction == NULL) { 813 jh->b_transaction = transaction; 814 JBUFFER_TRACE(jh, "file as BJ_Reserved"); 815 __jbd2_journal_file_buffer(jh, transaction, BJ_Reserved); 816 } else if (jh->b_transaction == journal->j_committing_transaction) { 817 JBUFFER_TRACE(jh, "set next transaction"); 818 jh->b_next_transaction = transaction; 819 } 820 spin_unlock(&journal->j_list_lock); 821 jbd_unlock_bh_state(bh); 822 823 /* 824 * akpm: I added this. ext3_alloc_branch can pick up new indirect 825 * blocks which contain freed but then revoked metadata. We need 826 * to cancel the revoke in case we end up freeing it yet again 827 * and the reallocating as data - this would cause a second revoke, 828 * which hits an assertion error. 829 */ 830 JBUFFER_TRACE(jh, "cancelling revoke"); 831 jbd2_journal_cancel_revoke(handle, jh); 832 jbd2_journal_put_journal_head(jh); 833 out: 834 return err; 835 } 836 837 /** 838 * int jbd2_journal_get_undo_access() - Notify intent to modify metadata with 839 * non-rewindable consequences 840 * @handle: transaction 841 * @bh: buffer to undo 842 * @credits: store the number of taken credits here (if not NULL) 843 * 844 * Sometimes there is a need to distinguish between metadata which has 845 * been committed to disk and that which has not. The ext3fs code uses 846 * this for freeing and allocating space, we have to make sure that we 847 * do not reuse freed space until the deallocation has been committed, 848 * since if we overwrote that space we would make the delete 849 * un-rewindable in case of a crash. 850 * 851 * To deal with that, jbd2_journal_get_undo_access requests write access to a 852 * buffer for parts of non-rewindable operations such as delete 853 * operations on the bitmaps. The journaling code must keep a copy of 854 * the buffer's contents prior to the undo_access call until such time 855 * as we know that the buffer has definitely been committed to disk. 856 * 857 * We never need to know which transaction the committed data is part 858 * of, buffers touched here are guaranteed to be dirtied later and so 859 * will be committed to a new transaction in due course, at which point 860 * we can discard the old committed data pointer. 861 * 862 * Returns error number or 0 on success. 863 */ 864 int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) 865 { 866 int err; 867 struct journal_head *jh = jbd2_journal_add_journal_head(bh); 868 char *committed_data = NULL; 869 870 JBUFFER_TRACE(jh, "entry"); 871 872 /* 873 * Do this first --- it can drop the journal lock, so we want to 874 * make sure that obtaining the committed_data is done 875 * atomically wrt. completion of any outstanding commits. 876 */ 877 err = do_get_write_access(handle, jh, 1); 878 if (err) 879 goto out; 880 881 repeat: 882 if (!jh->b_committed_data) { 883 committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS); 884 if (!committed_data) { 885 printk(KERN_EMERG "%s: No memory for committed data\n", 886 __FUNCTION__); 887 err = -ENOMEM; 888 goto out; 889 } 890 } 891 892 jbd_lock_bh_state(bh); 893 if (!jh->b_committed_data) { 894 /* Copy out the current buffer contents into the 895 * preserved, committed copy. */ 896 JBUFFER_TRACE(jh, "generate b_committed data"); 897 if (!committed_data) { 898 jbd_unlock_bh_state(bh); 899 goto repeat; 900 } 901 902 jh->b_committed_data = committed_data; 903 committed_data = NULL; 904 memcpy(jh->b_committed_data, bh->b_data, bh->b_size); 905 } 906 jbd_unlock_bh_state(bh); 907 out: 908 jbd2_journal_put_journal_head(jh); 909 if (unlikely(committed_data)) 910 jbd2_free(committed_data, bh->b_size); 911 return err; 912 } 913 914 /** 915 * int jbd2_journal_dirty_data() - mark a buffer as containing dirty data which 916 * needs to be flushed before we can commit the 917 * current transaction. 918 * @handle: transaction 919 * @bh: bufferhead to mark 920 * 921 * The buffer is placed on the transaction's data list and is marked as 922 * belonging to the transaction. 923 * 924 * Returns error number or 0 on success. 925 * 926 * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage 927 * by kswapd. 928 */ 929 int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh) 930 { 931 journal_t *journal = handle->h_transaction->t_journal; 932 int need_brelse = 0; 933 struct journal_head *jh; 934 935 if (is_handle_aborted(handle)) 936 return 0; 937 938 jh = jbd2_journal_add_journal_head(bh); 939 JBUFFER_TRACE(jh, "entry"); 940 941 /* 942 * The buffer could *already* be dirty. Writeout can start 943 * at any time. 944 */ 945 jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid); 946 947 /* 948 * What if the buffer is already part of a running transaction? 949 * 950 * There are two cases: 951 * 1) It is part of the current running transaction. Refile it, 952 * just in case we have allocated it as metadata, deallocated 953 * it, then reallocated it as data. 954 * 2) It is part of the previous, still-committing transaction. 955 * If all we want to do is to guarantee that the buffer will be 956 * written to disk before this new transaction commits, then 957 * being sure that the *previous* transaction has this same 958 * property is sufficient for us! Just leave it on its old 959 * transaction. 960 * 961 * In case (2), the buffer must not already exist as metadata 962 * --- that would violate write ordering (a transaction is free 963 * to write its data at any point, even before the previous 964 * committing transaction has committed). The caller must 965 * never, ever allow this to happen: there's nothing we can do 966 * about it in this layer. 967 */ 968 jbd_lock_bh_state(bh); 969 spin_lock(&journal->j_list_lock); 970 971 /* Now that we have bh_state locked, are we really still mapped? */ 972 if (!buffer_mapped(bh)) { 973 JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); 974 goto no_journal; 975 } 976 977 if (jh->b_transaction) { 978 JBUFFER_TRACE(jh, "has transaction"); 979 if (jh->b_transaction != handle->h_transaction) { 980 JBUFFER_TRACE(jh, "belongs to older transaction"); 981 J_ASSERT_JH(jh, jh->b_transaction == 982 journal->j_committing_transaction); 983 984 /* @@@ IS THIS TRUE ? */ 985 /* 986 * Not any more. Scenario: someone does a write() 987 * in data=journal mode. The buffer's transaction has 988 * moved into commit. Then someone does another 989 * write() to the file. We do the frozen data copyout 990 * and set b_next_transaction to point to j_running_t. 991 * And while we're in that state, someone does a 992 * writepage() in an attempt to pageout the same area 993 * of the file via a shared mapping. At present that 994 * calls jbd2_journal_dirty_data(), and we get right here. 995 * It may be too late to journal the data. Simply 996 * falling through to the next test will suffice: the 997 * data will be dirty and wil be checkpointed. The 998 * ordering comments in the next comment block still 999 * apply. 1000 */ 1001 //J_ASSERT_JH(jh, jh->b_next_transaction == NULL); 1002 1003 /* 1004 * If we're journalling data, and this buffer was 1005 * subject to a write(), it could be metadata, forget 1006 * or shadow against the committing transaction. Now, 1007 * someone has dirtied the same darn page via a mapping 1008 * and it is being writepage()'d. 1009 * We *could* just steal the page from commit, with some 1010 * fancy locking there. Instead, we just skip it - 1011 * don't tie the page's buffers to the new transaction 1012 * at all. 1013 * Implication: if we crash before the writepage() data 1014 * is written into the filesystem, recovery will replay 1015 * the write() data. 1016 */ 1017 if (jh->b_jlist != BJ_None && 1018 jh->b_jlist != BJ_SyncData && 1019 jh->b_jlist != BJ_Locked) { 1020 JBUFFER_TRACE(jh, "Not stealing"); 1021 goto no_journal; 1022 } 1023 1024 /* 1025 * This buffer may be undergoing writeout in commit. We 1026 * can't return from here and let the caller dirty it 1027 * again because that can cause the write-out loop in 1028 * commit to never terminate. 1029 */ 1030 if (buffer_dirty(bh)) { 1031 get_bh(bh); 1032 spin_unlock(&journal->j_list_lock); 1033 jbd_unlock_bh_state(bh); 1034 need_brelse = 1; 1035 sync_dirty_buffer(bh); 1036 jbd_lock_bh_state(bh); 1037 spin_lock(&journal->j_list_lock); 1038 /* Since we dropped the lock... */ 1039 if (!buffer_mapped(bh)) { 1040 JBUFFER_TRACE(jh, "buffer got unmapped"); 1041 goto no_journal; 1042 } 1043 /* The buffer may become locked again at any 1044 time if it is redirtied */ 1045 } 1046 1047 /* journal_clean_data_list() may have got there first */ 1048 if (jh->b_transaction != NULL) { 1049 JBUFFER_TRACE(jh, "unfile from commit"); 1050 __jbd2_journal_temp_unlink_buffer(jh); 1051 /* It still points to the committing 1052 * transaction; move it to this one so 1053 * that the refile assert checks are 1054 * happy. */ 1055 jh->b_transaction = handle->h_transaction; 1056 } 1057 /* The buffer will be refiled below */ 1058 1059 } 1060 /* 1061 * Special case --- the buffer might actually have been 1062 * allocated and then immediately deallocated in the previous, 1063 * committing transaction, so might still be left on that 1064 * transaction's metadata lists. 1065 */ 1066 if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) { 1067 JBUFFER_TRACE(jh, "not on correct data list: unfile"); 1068 J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow); 1069 __jbd2_journal_temp_unlink_buffer(jh); 1070 jh->b_transaction = handle->h_transaction; 1071 JBUFFER_TRACE(jh, "file as data"); 1072 __jbd2_journal_file_buffer(jh, handle->h_transaction, 1073 BJ_SyncData); 1074 } 1075 } else { 1076 JBUFFER_TRACE(jh, "not on a transaction"); 1077 __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData); 1078 } 1079 no_journal: 1080 spin_unlock(&journal->j_list_lock); 1081 jbd_unlock_bh_state(bh); 1082 if (need_brelse) { 1083 BUFFER_TRACE(bh, "brelse"); 1084 __brelse(bh); 1085 } 1086 JBUFFER_TRACE(jh, "exit"); 1087 jbd2_journal_put_journal_head(jh); 1088 return 0; 1089 } 1090 1091 /** 1092 * int jbd2_journal_dirty_metadata() - mark a buffer as containing dirty metadata 1093 * @handle: transaction to add buffer to. 1094 * @bh: buffer to mark 1095 * 1096 * mark dirty metadata which needs to be journaled as part of the current 1097 * transaction. 1098 * 1099 * The buffer is placed on the transaction's metadata list and is marked 1100 * as belonging to the transaction. 1101 * 1102 * Returns error number or 0 on success. 1103 * 1104 * Special care needs to be taken if the buffer already belongs to the 1105 * current committing transaction (in which case we should have frozen 1106 * data present for that commit). In that case, we don't relink the 1107 * buffer: that only gets done when the old transaction finally 1108 * completes its commit. 1109 */ 1110 int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh) 1111 { 1112 transaction_t *transaction = handle->h_transaction; 1113 journal_t *journal = transaction->t_journal; 1114 struct journal_head *jh = bh2jh(bh); 1115 1116 jbd_debug(5, "journal_head %p\n", jh); 1117 JBUFFER_TRACE(jh, "entry"); 1118 if (is_handle_aborted(handle)) 1119 goto out; 1120 1121 jbd_lock_bh_state(bh); 1122 1123 if (jh->b_modified == 0) { 1124 /* 1125 * This buffer's got modified and becoming part 1126 * of the transaction. This needs to be done 1127 * once a transaction -bzzz 1128 */ 1129 jh->b_modified = 1; 1130 J_ASSERT_JH(jh, handle->h_buffer_credits > 0); 1131 handle->h_buffer_credits--; 1132 } 1133 1134 /* 1135 * fastpath, to avoid expensive locking. If this buffer is already 1136 * on the running transaction's metadata list there is nothing to do. 1137 * Nobody can take it off again because there is a handle open. 1138 * I _think_ we're OK here with SMP barriers - a mistaken decision will 1139 * result in this test being false, so we go in and take the locks. 1140 */ 1141 if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) { 1142 JBUFFER_TRACE(jh, "fastpath"); 1143 J_ASSERT_JH(jh, jh->b_transaction == 1144 journal->j_running_transaction); 1145 goto out_unlock_bh; 1146 } 1147 1148 set_buffer_jbddirty(bh); 1149 1150 /* 1151 * Metadata already on the current transaction list doesn't 1152 * need to be filed. Metadata on another transaction's list must 1153 * be committing, and will be refiled once the commit completes: 1154 * leave it alone for now. 1155 */ 1156 if (jh->b_transaction != transaction) { 1157 JBUFFER_TRACE(jh, "already on other transaction"); 1158 J_ASSERT_JH(jh, jh->b_transaction == 1159 journal->j_committing_transaction); 1160 J_ASSERT_JH(jh, jh->b_next_transaction == transaction); 1161 /* And this case is illegal: we can't reuse another 1162 * transaction's data buffer, ever. */ 1163 goto out_unlock_bh; 1164 } 1165 1166 /* That test should have eliminated the following case: */ 1167 J_ASSERT_JH(jh, jh->b_frozen_data == 0); 1168 1169 JBUFFER_TRACE(jh, "file as BJ_Metadata"); 1170 spin_lock(&journal->j_list_lock); 1171 __jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata); 1172 spin_unlock(&journal->j_list_lock); 1173 out_unlock_bh: 1174 jbd_unlock_bh_state(bh); 1175 out: 1176 JBUFFER_TRACE(jh, "exit"); 1177 return 0; 1178 } 1179 1180 /* 1181 * jbd2_journal_release_buffer: undo a get_write_access without any buffer 1182 * updates, if the update decided in the end that it didn't need access. 1183 * 1184 */ 1185 void 1186 jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh) 1187 { 1188 BUFFER_TRACE(bh, "entry"); 1189 } 1190 1191 /** 1192 * void jbd2_journal_forget() - bforget() for potentially-journaled buffers. 1193 * @handle: transaction handle 1194 * @bh: bh to 'forget' 1195 * 1196 * We can only do the bforget if there are no commits pending against the 1197 * buffer. If the buffer is dirty in the current running transaction we 1198 * can safely unlink it. 1199 * 1200 * bh may not be a journalled buffer at all - it may be a non-JBD 1201 * buffer which came off the hashtable. Check for this. 1202 * 1203 * Decrements bh->b_count by one. 1204 * 1205 * Allow this call even if the handle has aborted --- it may be part of 1206 * the caller's cleanup after an abort. 1207 */ 1208 int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh) 1209 { 1210 transaction_t *transaction = handle->h_transaction; 1211 journal_t *journal = transaction->t_journal; 1212 struct journal_head *jh; 1213 int drop_reserve = 0; 1214 int err = 0; 1215 1216 BUFFER_TRACE(bh, "entry"); 1217 1218 jbd_lock_bh_state(bh); 1219 spin_lock(&journal->j_list_lock); 1220 1221 if (!buffer_jbd(bh)) 1222 goto not_jbd; 1223 jh = bh2jh(bh); 1224 1225 /* Critical error: attempting to delete a bitmap buffer, maybe? 1226 * Don't do any jbd operations, and return an error. */ 1227 if (!J_EXPECT_JH(jh, !jh->b_committed_data, 1228 "inconsistent data on disk")) { 1229 err = -EIO; 1230 goto not_jbd; 1231 } 1232 1233 /* 1234 * The buffer's going from the transaction, we must drop 1235 * all references -bzzz 1236 */ 1237 jh->b_modified = 0; 1238 1239 if (jh->b_transaction == handle->h_transaction) { 1240 J_ASSERT_JH(jh, !jh->b_frozen_data); 1241 1242 /* If we are forgetting a buffer which is already part 1243 * of this transaction, then we can just drop it from 1244 * the transaction immediately. */ 1245 clear_buffer_dirty(bh); 1246 clear_buffer_jbddirty(bh); 1247 1248 JBUFFER_TRACE(jh, "belongs to current transaction: unfile"); 1249 1250 drop_reserve = 1; 1251 1252 /* 1253 * We are no longer going to journal this buffer. 1254 * However, the commit of this transaction is still 1255 * important to the buffer: the delete that we are now 1256 * processing might obsolete an old log entry, so by 1257 * committing, we can satisfy the buffer's checkpoint. 1258 * 1259 * So, if we have a checkpoint on the buffer, we should 1260 * now refile the buffer on our BJ_Forget list so that 1261 * we know to remove the checkpoint after we commit. 1262 */ 1263 1264 if (jh->b_cp_transaction) { 1265 __jbd2_journal_temp_unlink_buffer(jh); 1266 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 1267 } else { 1268 __jbd2_journal_unfile_buffer(jh); 1269 jbd2_journal_remove_journal_head(bh); 1270 __brelse(bh); 1271 if (!buffer_jbd(bh)) { 1272 spin_unlock(&journal->j_list_lock); 1273 jbd_unlock_bh_state(bh); 1274 __bforget(bh); 1275 goto drop; 1276 } 1277 } 1278 } else if (jh->b_transaction) { 1279 J_ASSERT_JH(jh, (jh->b_transaction == 1280 journal->j_committing_transaction)); 1281 /* However, if the buffer is still owned by a prior 1282 * (committing) transaction, we can't drop it yet... */ 1283 JBUFFER_TRACE(jh, "belongs to older transaction"); 1284 /* ... but we CAN drop it from the new transaction if we 1285 * have also modified it since the original commit. */ 1286 1287 if (jh->b_next_transaction) { 1288 J_ASSERT(jh->b_next_transaction == transaction); 1289 jh->b_next_transaction = NULL; 1290 drop_reserve = 1; 1291 } 1292 } 1293 1294 not_jbd: 1295 spin_unlock(&journal->j_list_lock); 1296 jbd_unlock_bh_state(bh); 1297 __brelse(bh); 1298 drop: 1299 if (drop_reserve) { 1300 /* no need to reserve log space for this block -bzzz */ 1301 handle->h_buffer_credits++; 1302 } 1303 return err; 1304 } 1305 1306 /** 1307 * int jbd2_journal_stop() - complete a transaction 1308 * @handle: tranaction to complete. 1309 * 1310 * All done for a particular handle. 1311 * 1312 * There is not much action needed here. We just return any remaining 1313 * buffer credits to the transaction and remove the handle. The only 1314 * complication is that we need to start a commit operation if the 1315 * filesystem is marked for synchronous update. 1316 * 1317 * jbd2_journal_stop itself will not usually return an error, but it may 1318 * do so in unusual circumstances. In particular, expect it to 1319 * return -EIO if a jbd2_journal_abort has been executed since the 1320 * transaction began. 1321 */ 1322 int jbd2_journal_stop(handle_t *handle) 1323 { 1324 transaction_t *transaction = handle->h_transaction; 1325 journal_t *journal = transaction->t_journal; 1326 int old_handle_count, err; 1327 pid_t pid; 1328 1329 J_ASSERT(journal_current_handle() == handle); 1330 1331 if (is_handle_aborted(handle)) 1332 err = -EIO; 1333 else { 1334 J_ASSERT(transaction->t_updates > 0); 1335 err = 0; 1336 } 1337 1338 if (--handle->h_ref > 0) { 1339 jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1, 1340 handle->h_ref); 1341 return err; 1342 } 1343 1344 jbd_debug(4, "Handle %p going down\n", handle); 1345 1346 /* 1347 * Implement synchronous transaction batching. If the handle 1348 * was synchronous, don't force a commit immediately. Let's 1349 * yield and let another thread piggyback onto this transaction. 1350 * Keep doing that while new threads continue to arrive. 1351 * It doesn't cost much - we're about to run a commit and sleep 1352 * on IO anyway. Speeds up many-threaded, many-dir operations 1353 * by 30x or more... 1354 * 1355 * But don't do this if this process was the most recent one to 1356 * perform a synchronous write. We do this to detect the case where a 1357 * single process is doing a stream of sync writes. No point in waiting 1358 * for joiners in that case. 1359 */ 1360 pid = current->pid; 1361 if (handle->h_sync && journal->j_last_sync_writer != pid) { 1362 journal->j_last_sync_writer = pid; 1363 do { 1364 old_handle_count = transaction->t_handle_count; 1365 schedule_timeout_uninterruptible(1); 1366 } while (old_handle_count != transaction->t_handle_count); 1367 } 1368 1369 current->journal_info = NULL; 1370 spin_lock(&journal->j_state_lock); 1371 spin_lock(&transaction->t_handle_lock); 1372 transaction->t_outstanding_credits -= handle->h_buffer_credits; 1373 transaction->t_updates--; 1374 if (!transaction->t_updates) { 1375 wake_up(&journal->j_wait_updates); 1376 if (journal->j_barrier_count) 1377 wake_up(&journal->j_wait_transaction_locked); 1378 } 1379 1380 /* 1381 * If the handle is marked SYNC, we need to set another commit 1382 * going! We also want to force a commit if the current 1383 * transaction is occupying too much of the log, or if the 1384 * transaction is too old now. 1385 */ 1386 if (handle->h_sync || 1387 transaction->t_outstanding_credits > 1388 journal->j_max_transaction_buffers || 1389 time_after_eq(jiffies, transaction->t_expires)) { 1390 /* Do this even for aborted journals: an abort still 1391 * completes the commit thread, it just doesn't write 1392 * anything to disk. */ 1393 tid_t tid = transaction->t_tid; 1394 1395 spin_unlock(&transaction->t_handle_lock); 1396 jbd_debug(2, "transaction too old, requesting commit for " 1397 "handle %p\n", handle); 1398 /* This is non-blocking */ 1399 __jbd2_log_start_commit(journal, transaction->t_tid); 1400 spin_unlock(&journal->j_state_lock); 1401 1402 /* 1403 * Special case: JBD2_SYNC synchronous updates require us 1404 * to wait for the commit to complete. 1405 */ 1406 if (handle->h_sync && !(current->flags & PF_MEMALLOC)) 1407 err = jbd2_log_wait_commit(journal, tid); 1408 } else { 1409 spin_unlock(&transaction->t_handle_lock); 1410 spin_unlock(&journal->j_state_lock); 1411 } 1412 1413 jbd2_free_handle(handle); 1414 return err; 1415 } 1416 1417 /**int jbd2_journal_force_commit() - force any uncommitted transactions 1418 * @journal: journal to force 1419 * 1420 * For synchronous operations: force any uncommitted transactions 1421 * to disk. May seem kludgy, but it reuses all the handle batching 1422 * code in a very simple manner. 1423 */ 1424 int jbd2_journal_force_commit(journal_t *journal) 1425 { 1426 handle_t *handle; 1427 int ret; 1428 1429 handle = jbd2_journal_start(journal, 1); 1430 if (IS_ERR(handle)) { 1431 ret = PTR_ERR(handle); 1432 } else { 1433 handle->h_sync = 1; 1434 ret = jbd2_journal_stop(handle); 1435 } 1436 return ret; 1437 } 1438 1439 /* 1440 * 1441 * List management code snippets: various functions for manipulating the 1442 * transaction buffer lists. 1443 * 1444 */ 1445 1446 /* 1447 * Append a buffer to a transaction list, given the transaction's list head 1448 * pointer. 1449 * 1450 * j_list_lock is held. 1451 * 1452 * jbd_lock_bh_state(jh2bh(jh)) is held. 1453 */ 1454 1455 static inline void 1456 __blist_add_buffer(struct journal_head **list, struct journal_head *jh) 1457 { 1458 if (!*list) { 1459 jh->b_tnext = jh->b_tprev = jh; 1460 *list = jh; 1461 } else { 1462 /* Insert at the tail of the list to preserve order */ 1463 struct journal_head *first = *list, *last = first->b_tprev; 1464 jh->b_tprev = last; 1465 jh->b_tnext = first; 1466 last->b_tnext = first->b_tprev = jh; 1467 } 1468 } 1469 1470 /* 1471 * Remove a buffer from a transaction list, given the transaction's list 1472 * head pointer. 1473 * 1474 * Called with j_list_lock held, and the journal may not be locked. 1475 * 1476 * jbd_lock_bh_state(jh2bh(jh)) is held. 1477 */ 1478 1479 static inline void 1480 __blist_del_buffer(struct journal_head **list, struct journal_head *jh) 1481 { 1482 if (*list == jh) { 1483 *list = jh->b_tnext; 1484 if (*list == jh) 1485 *list = NULL; 1486 } 1487 jh->b_tprev->b_tnext = jh->b_tnext; 1488 jh->b_tnext->b_tprev = jh->b_tprev; 1489 } 1490 1491 /* 1492 * Remove a buffer from the appropriate transaction list. 1493 * 1494 * Note that this function can *change* the value of 1495 * bh->b_transaction->t_sync_datalist, t_buffers, t_forget, 1496 * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list. If the caller 1497 * is holding onto a copy of one of thee pointers, it could go bad. 1498 * Generally the caller needs to re-read the pointer from the transaction_t. 1499 * 1500 * Called under j_list_lock. The journal may not be locked. 1501 */ 1502 void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh) 1503 { 1504 struct journal_head **list = NULL; 1505 transaction_t *transaction; 1506 struct buffer_head *bh = jh2bh(jh); 1507 1508 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 1509 transaction = jh->b_transaction; 1510 if (transaction) 1511 assert_spin_locked(&transaction->t_journal->j_list_lock); 1512 1513 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 1514 if (jh->b_jlist != BJ_None) 1515 J_ASSERT_JH(jh, transaction != 0); 1516 1517 switch (jh->b_jlist) { 1518 case BJ_None: 1519 return; 1520 case BJ_SyncData: 1521 list = &transaction->t_sync_datalist; 1522 break; 1523 case BJ_Metadata: 1524 transaction->t_nr_buffers--; 1525 J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0); 1526 list = &transaction->t_buffers; 1527 break; 1528 case BJ_Forget: 1529 list = &transaction->t_forget; 1530 break; 1531 case BJ_IO: 1532 list = &transaction->t_iobuf_list; 1533 break; 1534 case BJ_Shadow: 1535 list = &transaction->t_shadow_list; 1536 break; 1537 case BJ_LogCtl: 1538 list = &transaction->t_log_list; 1539 break; 1540 case BJ_Reserved: 1541 list = &transaction->t_reserved_list; 1542 break; 1543 case BJ_Locked: 1544 list = &transaction->t_locked_list; 1545 break; 1546 } 1547 1548 __blist_del_buffer(list, jh); 1549 jh->b_jlist = BJ_None; 1550 if (test_clear_buffer_jbddirty(bh)) 1551 mark_buffer_dirty(bh); /* Expose it to the VM */ 1552 } 1553 1554 void __jbd2_journal_unfile_buffer(struct journal_head *jh) 1555 { 1556 __jbd2_journal_temp_unlink_buffer(jh); 1557 jh->b_transaction = NULL; 1558 } 1559 1560 void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh) 1561 { 1562 jbd_lock_bh_state(jh2bh(jh)); 1563 spin_lock(&journal->j_list_lock); 1564 __jbd2_journal_unfile_buffer(jh); 1565 spin_unlock(&journal->j_list_lock); 1566 jbd_unlock_bh_state(jh2bh(jh)); 1567 } 1568 1569 /* 1570 * Called from jbd2_journal_try_to_free_buffers(). 1571 * 1572 * Called under jbd_lock_bh_state(bh) 1573 */ 1574 static void 1575 __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh) 1576 { 1577 struct journal_head *jh; 1578 1579 jh = bh2jh(bh); 1580 1581 if (buffer_locked(bh) || buffer_dirty(bh)) 1582 goto out; 1583 1584 if (jh->b_next_transaction != 0) 1585 goto out; 1586 1587 spin_lock(&journal->j_list_lock); 1588 if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) { 1589 if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) { 1590 /* A written-back ordered data buffer */ 1591 JBUFFER_TRACE(jh, "release data"); 1592 __jbd2_journal_unfile_buffer(jh); 1593 jbd2_journal_remove_journal_head(bh); 1594 __brelse(bh); 1595 } 1596 } else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) { 1597 /* written-back checkpointed metadata buffer */ 1598 if (jh->b_jlist == BJ_None) { 1599 JBUFFER_TRACE(jh, "remove from checkpoint list"); 1600 __jbd2_journal_remove_checkpoint(jh); 1601 jbd2_journal_remove_journal_head(bh); 1602 __brelse(bh); 1603 } 1604 } 1605 spin_unlock(&journal->j_list_lock); 1606 out: 1607 return; 1608 } 1609 1610 1611 /** 1612 * int jbd2_journal_try_to_free_buffers() - try to free page buffers. 1613 * @journal: journal for operation 1614 * @page: to try and free 1615 * @unused_gfp_mask: unused 1616 * 1617 * 1618 * For all the buffers on this page, 1619 * if they are fully written out ordered data, move them onto BUF_CLEAN 1620 * so try_to_free_buffers() can reap them. 1621 * 1622 * This function returns non-zero if we wish try_to_free_buffers() 1623 * to be called. We do this if the page is releasable by try_to_free_buffers(). 1624 * We also do it if the page has locked or dirty buffers and the caller wants 1625 * us to perform sync or async writeout. 1626 * 1627 * This complicates JBD locking somewhat. We aren't protected by the 1628 * BKL here. We wish to remove the buffer from its committing or 1629 * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer. 1630 * 1631 * This may *change* the value of transaction_t->t_datalist, so anyone 1632 * who looks at t_datalist needs to lock against this function. 1633 * 1634 * Even worse, someone may be doing a jbd2_journal_dirty_data on this 1635 * buffer. So we need to lock against that. jbd2_journal_dirty_data() 1636 * will come out of the lock with the buffer dirty, which makes it 1637 * ineligible for release here. 1638 * 1639 * Who else is affected by this? hmm... Really the only contender 1640 * is do_get_write_access() - it could be looking at the buffer while 1641 * journal_try_to_free_buffer() is changing its state. But that 1642 * cannot happen because we never reallocate freed data as metadata 1643 * while the data is part of a transaction. Yes? 1644 */ 1645 int jbd2_journal_try_to_free_buffers(journal_t *journal, 1646 struct page *page, gfp_t unused_gfp_mask) 1647 { 1648 struct buffer_head *head; 1649 struct buffer_head *bh; 1650 int ret = 0; 1651 1652 J_ASSERT(PageLocked(page)); 1653 1654 head = page_buffers(page); 1655 bh = head; 1656 do { 1657 struct journal_head *jh; 1658 1659 /* 1660 * We take our own ref against the journal_head here to avoid 1661 * having to add tons of locking around each instance of 1662 * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head(). 1663 */ 1664 jh = jbd2_journal_grab_journal_head(bh); 1665 if (!jh) 1666 continue; 1667 1668 jbd_lock_bh_state(bh); 1669 __journal_try_to_free_buffer(journal, bh); 1670 jbd2_journal_put_journal_head(jh); 1671 jbd_unlock_bh_state(bh); 1672 if (buffer_jbd(bh)) 1673 goto busy; 1674 } while ((bh = bh->b_this_page) != head); 1675 ret = try_to_free_buffers(page); 1676 busy: 1677 return ret; 1678 } 1679 1680 /* 1681 * This buffer is no longer needed. If it is on an older transaction's 1682 * checkpoint list we need to record it on this transaction's forget list 1683 * to pin this buffer (and hence its checkpointing transaction) down until 1684 * this transaction commits. If the buffer isn't on a checkpoint list, we 1685 * release it. 1686 * Returns non-zero if JBD no longer has an interest in the buffer. 1687 * 1688 * Called under j_list_lock. 1689 * 1690 * Called under jbd_lock_bh_state(bh). 1691 */ 1692 static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction) 1693 { 1694 int may_free = 1; 1695 struct buffer_head *bh = jh2bh(jh); 1696 1697 __jbd2_journal_unfile_buffer(jh); 1698 1699 if (jh->b_cp_transaction) { 1700 JBUFFER_TRACE(jh, "on running+cp transaction"); 1701 __jbd2_journal_file_buffer(jh, transaction, BJ_Forget); 1702 clear_buffer_jbddirty(bh); 1703 may_free = 0; 1704 } else { 1705 JBUFFER_TRACE(jh, "on running transaction"); 1706 jbd2_journal_remove_journal_head(bh); 1707 __brelse(bh); 1708 } 1709 return may_free; 1710 } 1711 1712 /* 1713 * jbd2_journal_invalidatepage 1714 * 1715 * This code is tricky. It has a number of cases to deal with. 1716 * 1717 * There are two invariants which this code relies on: 1718 * 1719 * i_size must be updated on disk before we start calling invalidatepage on the 1720 * data. 1721 * 1722 * This is done in ext3 by defining an ext3_setattr method which 1723 * updates i_size before truncate gets going. By maintaining this 1724 * invariant, we can be sure that it is safe to throw away any buffers 1725 * attached to the current transaction: once the transaction commits, 1726 * we know that the data will not be needed. 1727 * 1728 * Note however that we can *not* throw away data belonging to the 1729 * previous, committing transaction! 1730 * 1731 * Any disk blocks which *are* part of the previous, committing 1732 * transaction (and which therefore cannot be discarded immediately) are 1733 * not going to be reused in the new running transaction 1734 * 1735 * The bitmap committed_data images guarantee this: any block which is 1736 * allocated in one transaction and removed in the next will be marked 1737 * as in-use in the committed_data bitmap, so cannot be reused until 1738 * the next transaction to delete the block commits. This means that 1739 * leaving committing buffers dirty is quite safe: the disk blocks 1740 * cannot be reallocated to a different file and so buffer aliasing is 1741 * not possible. 1742 * 1743 * 1744 * The above applies mainly to ordered data mode. In writeback mode we 1745 * don't make guarantees about the order in which data hits disk --- in 1746 * particular we don't guarantee that new dirty data is flushed before 1747 * transaction commit --- so it is always safe just to discard data 1748 * immediately in that mode. --sct 1749 */ 1750 1751 /* 1752 * The journal_unmap_buffer helper function returns zero if the buffer 1753 * concerned remains pinned as an anonymous buffer belonging to an older 1754 * transaction. 1755 * 1756 * We're outside-transaction here. Either or both of j_running_transaction 1757 * and j_committing_transaction may be NULL. 1758 */ 1759 static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh) 1760 { 1761 transaction_t *transaction; 1762 struct journal_head *jh; 1763 int may_free = 1; 1764 int ret; 1765 1766 BUFFER_TRACE(bh, "entry"); 1767 1768 /* 1769 * It is safe to proceed here without the j_list_lock because the 1770 * buffers cannot be stolen by try_to_free_buffers as long as we are 1771 * holding the page lock. --sct 1772 */ 1773 1774 if (!buffer_jbd(bh)) 1775 goto zap_buffer_unlocked; 1776 1777 spin_lock(&journal->j_state_lock); 1778 jbd_lock_bh_state(bh); 1779 spin_lock(&journal->j_list_lock); 1780 1781 jh = jbd2_journal_grab_journal_head(bh); 1782 if (!jh) 1783 goto zap_buffer_no_jh; 1784 1785 transaction = jh->b_transaction; 1786 if (transaction == NULL) { 1787 /* First case: not on any transaction. If it 1788 * has no checkpoint link, then we can zap it: 1789 * it's a writeback-mode buffer so we don't care 1790 * if it hits disk safely. */ 1791 if (!jh->b_cp_transaction) { 1792 JBUFFER_TRACE(jh, "not on any transaction: zap"); 1793 goto zap_buffer; 1794 } 1795 1796 if (!buffer_dirty(bh)) { 1797 /* bdflush has written it. We can drop it now */ 1798 goto zap_buffer; 1799 } 1800 1801 /* OK, it must be in the journal but still not 1802 * written fully to disk: it's metadata or 1803 * journaled data... */ 1804 1805 if (journal->j_running_transaction) { 1806 /* ... and once the current transaction has 1807 * committed, the buffer won't be needed any 1808 * longer. */ 1809 JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget"); 1810 ret = __dispose_buffer(jh, 1811 journal->j_running_transaction); 1812 jbd2_journal_put_journal_head(jh); 1813 spin_unlock(&journal->j_list_lock); 1814 jbd_unlock_bh_state(bh); 1815 spin_unlock(&journal->j_state_lock); 1816 return ret; 1817 } else { 1818 /* There is no currently-running transaction. So the 1819 * orphan record which we wrote for this file must have 1820 * passed into commit. We must attach this buffer to 1821 * the committing transaction, if it exists. */ 1822 if (journal->j_committing_transaction) { 1823 JBUFFER_TRACE(jh, "give to committing trans"); 1824 ret = __dispose_buffer(jh, 1825 journal->j_committing_transaction); 1826 jbd2_journal_put_journal_head(jh); 1827 spin_unlock(&journal->j_list_lock); 1828 jbd_unlock_bh_state(bh); 1829 spin_unlock(&journal->j_state_lock); 1830 return ret; 1831 } else { 1832 /* The orphan record's transaction has 1833 * committed. We can cleanse this buffer */ 1834 clear_buffer_jbddirty(bh); 1835 goto zap_buffer; 1836 } 1837 } 1838 } else if (transaction == journal->j_committing_transaction) { 1839 JBUFFER_TRACE(jh, "on committing transaction"); 1840 if (jh->b_jlist == BJ_Locked) { 1841 /* 1842 * The buffer is on the committing transaction's locked 1843 * list. We have the buffer locked, so I/O has 1844 * completed. So we can nail the buffer now. 1845 */ 1846 may_free = __dispose_buffer(jh, transaction); 1847 goto zap_buffer; 1848 } 1849 /* 1850 * If it is committing, we simply cannot touch it. We 1851 * can remove it's next_transaction pointer from the 1852 * running transaction if that is set, but nothing 1853 * else. */ 1854 set_buffer_freed(bh); 1855 if (jh->b_next_transaction) { 1856 J_ASSERT(jh->b_next_transaction == 1857 journal->j_running_transaction); 1858 jh->b_next_transaction = NULL; 1859 } 1860 jbd2_journal_put_journal_head(jh); 1861 spin_unlock(&journal->j_list_lock); 1862 jbd_unlock_bh_state(bh); 1863 spin_unlock(&journal->j_state_lock); 1864 return 0; 1865 } else { 1866 /* Good, the buffer belongs to the running transaction. 1867 * We are writing our own transaction's data, not any 1868 * previous one's, so it is safe to throw it away 1869 * (remember that we expect the filesystem to have set 1870 * i_size already for this truncate so recovery will not 1871 * expose the disk blocks we are discarding here.) */ 1872 J_ASSERT_JH(jh, transaction == journal->j_running_transaction); 1873 JBUFFER_TRACE(jh, "on running transaction"); 1874 may_free = __dispose_buffer(jh, transaction); 1875 } 1876 1877 zap_buffer: 1878 jbd2_journal_put_journal_head(jh); 1879 zap_buffer_no_jh: 1880 spin_unlock(&journal->j_list_lock); 1881 jbd_unlock_bh_state(bh); 1882 spin_unlock(&journal->j_state_lock); 1883 zap_buffer_unlocked: 1884 clear_buffer_dirty(bh); 1885 J_ASSERT_BH(bh, !buffer_jbddirty(bh)); 1886 clear_buffer_mapped(bh); 1887 clear_buffer_req(bh); 1888 clear_buffer_new(bh); 1889 bh->b_bdev = NULL; 1890 return may_free; 1891 } 1892 1893 /** 1894 * void jbd2_journal_invalidatepage() 1895 * @journal: journal to use for flush... 1896 * @page: page to flush 1897 * @offset: length of page to invalidate. 1898 * 1899 * Reap page buffers containing data after offset in page. 1900 * 1901 */ 1902 void jbd2_journal_invalidatepage(journal_t *journal, 1903 struct page *page, 1904 unsigned long offset) 1905 { 1906 struct buffer_head *head, *bh, *next; 1907 unsigned int curr_off = 0; 1908 int may_free = 1; 1909 1910 if (!PageLocked(page)) 1911 BUG(); 1912 if (!page_has_buffers(page)) 1913 return; 1914 1915 /* We will potentially be playing with lists other than just the 1916 * data lists (especially for journaled data mode), so be 1917 * cautious in our locking. */ 1918 1919 head = bh = page_buffers(page); 1920 do { 1921 unsigned int next_off = curr_off + bh->b_size; 1922 next = bh->b_this_page; 1923 1924 if (offset <= curr_off) { 1925 /* This block is wholly outside the truncation point */ 1926 lock_buffer(bh); 1927 may_free &= journal_unmap_buffer(journal, bh); 1928 unlock_buffer(bh); 1929 } 1930 curr_off = next_off; 1931 bh = next; 1932 1933 } while (bh != head); 1934 1935 if (!offset) { 1936 if (may_free && try_to_free_buffers(page)) 1937 J_ASSERT(!page_has_buffers(page)); 1938 } 1939 } 1940 1941 /* 1942 * File a buffer on the given transaction list. 1943 */ 1944 void __jbd2_journal_file_buffer(struct journal_head *jh, 1945 transaction_t *transaction, int jlist) 1946 { 1947 struct journal_head **list = NULL; 1948 int was_dirty = 0; 1949 struct buffer_head *bh = jh2bh(jh); 1950 1951 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 1952 assert_spin_locked(&transaction->t_journal->j_list_lock); 1953 1954 J_ASSERT_JH(jh, jh->b_jlist < BJ_Types); 1955 J_ASSERT_JH(jh, jh->b_transaction == transaction || 1956 jh->b_transaction == 0); 1957 1958 if (jh->b_transaction && jh->b_jlist == jlist) 1959 return; 1960 1961 /* The following list of buffer states needs to be consistent 1962 * with __jbd_unexpected_dirty_buffer()'s handling of dirty 1963 * state. */ 1964 1965 if (jlist == BJ_Metadata || jlist == BJ_Reserved || 1966 jlist == BJ_Shadow || jlist == BJ_Forget) { 1967 if (test_clear_buffer_dirty(bh) || 1968 test_clear_buffer_jbddirty(bh)) 1969 was_dirty = 1; 1970 } 1971 1972 if (jh->b_transaction) 1973 __jbd2_journal_temp_unlink_buffer(jh); 1974 jh->b_transaction = transaction; 1975 1976 switch (jlist) { 1977 case BJ_None: 1978 J_ASSERT_JH(jh, !jh->b_committed_data); 1979 J_ASSERT_JH(jh, !jh->b_frozen_data); 1980 return; 1981 case BJ_SyncData: 1982 list = &transaction->t_sync_datalist; 1983 break; 1984 case BJ_Metadata: 1985 transaction->t_nr_buffers++; 1986 list = &transaction->t_buffers; 1987 break; 1988 case BJ_Forget: 1989 list = &transaction->t_forget; 1990 break; 1991 case BJ_IO: 1992 list = &transaction->t_iobuf_list; 1993 break; 1994 case BJ_Shadow: 1995 list = &transaction->t_shadow_list; 1996 break; 1997 case BJ_LogCtl: 1998 list = &transaction->t_log_list; 1999 break; 2000 case BJ_Reserved: 2001 list = &transaction->t_reserved_list; 2002 break; 2003 case BJ_Locked: 2004 list = &transaction->t_locked_list; 2005 break; 2006 } 2007 2008 __blist_add_buffer(list, jh); 2009 jh->b_jlist = jlist; 2010 2011 if (was_dirty) 2012 set_buffer_jbddirty(bh); 2013 } 2014 2015 void jbd2_journal_file_buffer(struct journal_head *jh, 2016 transaction_t *transaction, int jlist) 2017 { 2018 jbd_lock_bh_state(jh2bh(jh)); 2019 spin_lock(&transaction->t_journal->j_list_lock); 2020 __jbd2_journal_file_buffer(jh, transaction, jlist); 2021 spin_unlock(&transaction->t_journal->j_list_lock); 2022 jbd_unlock_bh_state(jh2bh(jh)); 2023 } 2024 2025 /* 2026 * Remove a buffer from its current buffer list in preparation for 2027 * dropping it from its current transaction entirely. If the buffer has 2028 * already started to be used by a subsequent transaction, refile the 2029 * buffer on that transaction's metadata list. 2030 * 2031 * Called under journal->j_list_lock 2032 * 2033 * Called under jbd_lock_bh_state(jh2bh(jh)) 2034 */ 2035 void __jbd2_journal_refile_buffer(struct journal_head *jh) 2036 { 2037 int was_dirty; 2038 struct buffer_head *bh = jh2bh(jh); 2039 2040 J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh)); 2041 if (jh->b_transaction) 2042 assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock); 2043 2044 /* If the buffer is now unused, just drop it. */ 2045 if (jh->b_next_transaction == NULL) { 2046 __jbd2_journal_unfile_buffer(jh); 2047 return; 2048 } 2049 2050 /* 2051 * It has been modified by a later transaction: add it to the new 2052 * transaction's metadata list. 2053 */ 2054 2055 was_dirty = test_clear_buffer_jbddirty(bh); 2056 __jbd2_journal_temp_unlink_buffer(jh); 2057 jh->b_transaction = jh->b_next_transaction; 2058 jh->b_next_transaction = NULL; 2059 __jbd2_journal_file_buffer(jh, jh->b_transaction, 2060 was_dirty ? BJ_Metadata : BJ_Reserved); 2061 J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING); 2062 2063 if (was_dirty) 2064 set_buffer_jbddirty(bh); 2065 } 2066 2067 /* 2068 * For the unlocked version of this call, also make sure that any 2069 * hanging journal_head is cleaned up if necessary. 2070 * 2071 * __jbd2_journal_refile_buffer is usually called as part of a single locked 2072 * operation on a buffer_head, in which the caller is probably going to 2073 * be hooking the journal_head onto other lists. In that case it is up 2074 * to the caller to remove the journal_head if necessary. For the 2075 * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be 2076 * doing anything else to the buffer so we need to do the cleanup 2077 * ourselves to avoid a jh leak. 2078 * 2079 * *** The journal_head may be freed by this call! *** 2080 */ 2081 void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh) 2082 { 2083 struct buffer_head *bh = jh2bh(jh); 2084 2085 jbd_lock_bh_state(bh); 2086 spin_lock(&journal->j_list_lock); 2087 2088 __jbd2_journal_refile_buffer(jh); 2089 jbd_unlock_bh_state(bh); 2090 jbd2_journal_remove_journal_head(bh); 2091 2092 spin_unlock(&journal->j_list_lock); 2093 __brelse(bh); 2094 } 2095