1 /* 2 * linux/fs/jbd2/commit.c 3 * 4 * Written by Stephen C. Tweedie <sct@redhat.com>, 1998 5 * 6 * Copyright 1998 Red Hat corp --- All Rights Reserved 7 * 8 * This file is part of the Linux kernel and is made available under 9 * the terms of the GNU General Public License, version 2, or at your 10 * option, any later version, incorporated herein by reference. 11 * 12 * Journal commit routines for the generic filesystem journaling code; 13 * part of the ext2fs journaling system. 14 */ 15 16 #include <linux/time.h> 17 #include <linux/fs.h> 18 #include <linux/jbd2.h> 19 #include <linux/errno.h> 20 #include <linux/slab.h> 21 #include <linux/mm.h> 22 #include <linux/pagemap.h> 23 #include <linux/smp_lock.h> 24 25 /* 26 * Default IO end handler for temporary BJ_IO buffer_heads. 27 */ 28 static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate) 29 { 30 BUFFER_TRACE(bh, ""); 31 if (uptodate) 32 set_buffer_uptodate(bh); 33 else 34 clear_buffer_uptodate(bh); 35 unlock_buffer(bh); 36 } 37 38 /* 39 * When an ext3-ordered file is truncated, it is possible that many pages are 40 * not sucessfully freed, because they are attached to a committing transaction. 41 * After the transaction commits, these pages are left on the LRU, with no 42 * ->mapping, and with attached buffers. These pages are trivially reclaimable 43 * by the VM, but their apparent absence upsets the VM accounting, and it makes 44 * the numbers in /proc/meminfo look odd. 45 * 46 * So here, we have a buffer which has just come off the forget list. Look to 47 * see if we can strip all buffers from the backing page. 48 * 49 * Called under lock_journal(), and possibly under journal_datalist_lock. The 50 * caller provided us with a ref against the buffer, and we drop that here. 51 */ 52 static void release_buffer_page(struct buffer_head *bh) 53 { 54 struct page *page; 55 56 if (buffer_dirty(bh)) 57 goto nope; 58 if (atomic_read(&bh->b_count) != 1) 59 goto nope; 60 page = bh->b_page; 61 if (!page) 62 goto nope; 63 if (page->mapping) 64 goto nope; 65 66 /* OK, it's a truncated page */ 67 if (TestSetPageLocked(page)) 68 goto nope; 69 70 page_cache_get(page); 71 __brelse(bh); 72 try_to_free_buffers(page); 73 unlock_page(page); 74 page_cache_release(page); 75 return; 76 77 nope: 78 __brelse(bh); 79 } 80 81 /* 82 * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is 83 * held. For ranking reasons we must trylock. If we lose, schedule away and 84 * return 0. j_list_lock is dropped in this case. 85 */ 86 static int inverted_lock(journal_t *journal, struct buffer_head *bh) 87 { 88 if (!jbd_trylock_bh_state(bh)) { 89 spin_unlock(&journal->j_list_lock); 90 schedule(); 91 return 0; 92 } 93 return 1; 94 } 95 96 /* Done it all: now write the commit record. We should have 97 * cleaned up our previous buffers by now, so if we are in abort 98 * mode we can now just skip the rest of the journal write 99 * entirely. 100 * 101 * Returns 1 if the journal needs to be aborted or 0 on success 102 */ 103 static int journal_write_commit_record(journal_t *journal, 104 transaction_t *commit_transaction) 105 { 106 struct journal_head *descriptor; 107 struct buffer_head *bh; 108 int i, ret; 109 int barrier_done = 0; 110 111 if (is_journal_aborted(journal)) 112 return 0; 113 114 descriptor = jbd2_journal_get_descriptor_buffer(journal); 115 if (!descriptor) 116 return 1; 117 118 bh = jh2bh(descriptor); 119 120 /* AKPM: buglet - add `i' to tmp! */ 121 for (i = 0; i < bh->b_size; i += 512) { 122 journal_header_t *tmp = (journal_header_t*)bh->b_data; 123 tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 124 tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK); 125 tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid); 126 } 127 128 JBUFFER_TRACE(descriptor, "write commit block"); 129 set_buffer_dirty(bh); 130 if (journal->j_flags & JBD2_BARRIER) { 131 set_buffer_ordered(bh); 132 barrier_done = 1; 133 } 134 ret = sync_dirty_buffer(bh); 135 /* is it possible for another commit to fail at roughly 136 * the same time as this one? If so, we don't want to 137 * trust the barrier flag in the super, but instead want 138 * to remember if we sent a barrier request 139 */ 140 if (ret == -EOPNOTSUPP && barrier_done) { 141 char b[BDEVNAME_SIZE]; 142 143 printk(KERN_WARNING 144 "JBD: barrier-based sync failed on %s - " 145 "disabling barriers\n", 146 bdevname(journal->j_dev, b)); 147 spin_lock(&journal->j_state_lock); 148 journal->j_flags &= ~JBD2_BARRIER; 149 spin_unlock(&journal->j_state_lock); 150 151 /* And try again, without the barrier */ 152 clear_buffer_ordered(bh); 153 set_buffer_uptodate(bh); 154 set_buffer_dirty(bh); 155 ret = sync_dirty_buffer(bh); 156 } 157 put_bh(bh); /* One for getblk() */ 158 jbd2_journal_put_journal_head(descriptor); 159 160 return (ret == -EIO); 161 } 162 163 static void journal_do_submit_data(struct buffer_head **wbuf, int bufs) 164 { 165 int i; 166 167 for (i = 0; i < bufs; i++) { 168 wbuf[i]->b_end_io = end_buffer_write_sync; 169 /* We use-up our safety reference in submit_bh() */ 170 submit_bh(WRITE, wbuf[i]); 171 } 172 } 173 174 /* 175 * Submit all the data buffers to disk 176 */ 177 static void journal_submit_data_buffers(journal_t *journal, 178 transaction_t *commit_transaction) 179 { 180 struct journal_head *jh; 181 struct buffer_head *bh; 182 int locked; 183 int bufs = 0; 184 struct buffer_head **wbuf = journal->j_wbuf; 185 186 /* 187 * Whenever we unlock the journal and sleep, things can get added 188 * onto ->t_sync_datalist, so we have to keep looping back to 189 * write_out_data until we *know* that the list is empty. 190 * 191 * Cleanup any flushed data buffers from the data list. Even in 192 * abort mode, we want to flush this out as soon as possible. 193 */ 194 write_out_data: 195 cond_resched(); 196 spin_lock(&journal->j_list_lock); 197 198 while (commit_transaction->t_sync_datalist) { 199 jh = commit_transaction->t_sync_datalist; 200 bh = jh2bh(jh); 201 locked = 0; 202 203 /* Get reference just to make sure buffer does not disappear 204 * when we are forced to drop various locks */ 205 get_bh(bh); 206 /* If the buffer is dirty, we need to submit IO and hence 207 * we need the buffer lock. We try to lock the buffer without 208 * blocking. If we fail, we need to drop j_list_lock and do 209 * blocking lock_buffer(). 210 */ 211 if (buffer_dirty(bh)) { 212 if (test_set_buffer_locked(bh)) { 213 BUFFER_TRACE(bh, "needs blocking lock"); 214 spin_unlock(&journal->j_list_lock); 215 /* Write out all data to prevent deadlocks */ 216 journal_do_submit_data(wbuf, bufs); 217 bufs = 0; 218 lock_buffer(bh); 219 spin_lock(&journal->j_list_lock); 220 } 221 locked = 1; 222 } 223 /* We have to get bh_state lock. Again out of order, sigh. */ 224 if (!inverted_lock(journal, bh)) { 225 jbd_lock_bh_state(bh); 226 spin_lock(&journal->j_list_lock); 227 } 228 /* Someone already cleaned up the buffer? */ 229 if (!buffer_jbd(bh) 230 || jh->b_transaction != commit_transaction 231 || jh->b_jlist != BJ_SyncData) { 232 jbd_unlock_bh_state(bh); 233 if (locked) 234 unlock_buffer(bh); 235 BUFFER_TRACE(bh, "already cleaned up"); 236 put_bh(bh); 237 continue; 238 } 239 if (locked && test_clear_buffer_dirty(bh)) { 240 BUFFER_TRACE(bh, "needs writeout, adding to array"); 241 wbuf[bufs++] = bh; 242 __jbd2_journal_file_buffer(jh, commit_transaction, 243 BJ_Locked); 244 jbd_unlock_bh_state(bh); 245 if (bufs == journal->j_wbufsize) { 246 spin_unlock(&journal->j_list_lock); 247 journal_do_submit_data(wbuf, bufs); 248 bufs = 0; 249 goto write_out_data; 250 } 251 } 252 else { 253 BUFFER_TRACE(bh, "writeout complete: unfile"); 254 __jbd2_journal_unfile_buffer(jh); 255 jbd_unlock_bh_state(bh); 256 if (locked) 257 unlock_buffer(bh); 258 jbd2_journal_remove_journal_head(bh); 259 /* Once for our safety reference, once for 260 * jbd2_journal_remove_journal_head() */ 261 put_bh(bh); 262 put_bh(bh); 263 } 264 265 if (lock_need_resched(&journal->j_list_lock)) { 266 spin_unlock(&journal->j_list_lock); 267 goto write_out_data; 268 } 269 } 270 spin_unlock(&journal->j_list_lock); 271 journal_do_submit_data(wbuf, bufs); 272 } 273 274 static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag, 275 unsigned long long block) 276 { 277 tag->t_blocknr = cpu_to_be32(block & (u32)~0); 278 if (tag_bytes > JBD_TAG_SIZE32) 279 tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1); 280 } 281 282 /* 283 * jbd2_journal_commit_transaction 284 * 285 * The primary function for committing a transaction to the log. This 286 * function is called by the journal thread to begin a complete commit. 287 */ 288 void jbd2_journal_commit_transaction(journal_t *journal) 289 { 290 transaction_t *commit_transaction; 291 struct journal_head *jh, *new_jh, *descriptor; 292 struct buffer_head **wbuf = journal->j_wbuf; 293 int bufs; 294 int flags; 295 int err; 296 unsigned long long blocknr; 297 char *tagp = NULL; 298 journal_header_t *header; 299 journal_block_tag_t *tag = NULL; 300 int space_left = 0; 301 int first_tag = 0; 302 int tag_flag; 303 int i; 304 int tag_bytes = journal_tag_bytes(journal); 305 306 /* 307 * First job: lock down the current transaction and wait for 308 * all outstanding updates to complete. 309 */ 310 311 #ifdef COMMIT_STATS 312 spin_lock(&journal->j_list_lock); 313 summarise_journal_usage(journal); 314 spin_unlock(&journal->j_list_lock); 315 #endif 316 317 /* Do we need to erase the effects of a prior jbd2_journal_flush? */ 318 if (journal->j_flags & JBD2_FLUSHED) { 319 jbd_debug(3, "super block updated\n"); 320 jbd2_journal_update_superblock(journal, 1); 321 } else { 322 jbd_debug(3, "superblock not updated\n"); 323 } 324 325 J_ASSERT(journal->j_running_transaction != NULL); 326 J_ASSERT(journal->j_committing_transaction == NULL); 327 328 commit_transaction = journal->j_running_transaction; 329 J_ASSERT(commit_transaction->t_state == T_RUNNING); 330 331 jbd_debug(1, "JBD: starting commit of transaction %d\n", 332 commit_transaction->t_tid); 333 334 spin_lock(&journal->j_state_lock); 335 commit_transaction->t_state = T_LOCKED; 336 337 spin_lock(&commit_transaction->t_handle_lock); 338 while (commit_transaction->t_updates) { 339 DEFINE_WAIT(wait); 340 341 prepare_to_wait(&journal->j_wait_updates, &wait, 342 TASK_UNINTERRUPTIBLE); 343 if (commit_transaction->t_updates) { 344 spin_unlock(&commit_transaction->t_handle_lock); 345 spin_unlock(&journal->j_state_lock); 346 schedule(); 347 spin_lock(&journal->j_state_lock); 348 spin_lock(&commit_transaction->t_handle_lock); 349 } 350 finish_wait(&journal->j_wait_updates, &wait); 351 } 352 spin_unlock(&commit_transaction->t_handle_lock); 353 354 J_ASSERT (commit_transaction->t_outstanding_credits <= 355 journal->j_max_transaction_buffers); 356 357 /* 358 * First thing we are allowed to do is to discard any remaining 359 * BJ_Reserved buffers. Note, it is _not_ permissible to assume 360 * that there are no such buffers: if a large filesystem 361 * operation like a truncate needs to split itself over multiple 362 * transactions, then it may try to do a jbd2_journal_restart() while 363 * there are still BJ_Reserved buffers outstanding. These must 364 * be released cleanly from the current transaction. 365 * 366 * In this case, the filesystem must still reserve write access 367 * again before modifying the buffer in the new transaction, but 368 * we do not require it to remember exactly which old buffers it 369 * has reserved. This is consistent with the existing behaviour 370 * that multiple jbd2_journal_get_write_access() calls to the same 371 * buffer are perfectly permissable. 372 */ 373 while (commit_transaction->t_reserved_list) { 374 jh = commit_transaction->t_reserved_list; 375 JBUFFER_TRACE(jh, "reserved, unused: refile"); 376 /* 377 * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may 378 * leave undo-committed data. 379 */ 380 if (jh->b_committed_data) { 381 struct buffer_head *bh = jh2bh(jh); 382 383 jbd_lock_bh_state(bh); 384 jbd2_slab_free(jh->b_committed_data, bh->b_size); 385 jh->b_committed_data = NULL; 386 jbd_unlock_bh_state(bh); 387 } 388 jbd2_journal_refile_buffer(journal, jh); 389 } 390 391 /* 392 * Now try to drop any written-back buffers from the journal's 393 * checkpoint lists. We do this *before* commit because it potentially 394 * frees some memory 395 */ 396 spin_lock(&journal->j_list_lock); 397 __jbd2_journal_clean_checkpoint_list(journal); 398 spin_unlock(&journal->j_list_lock); 399 400 jbd_debug (3, "JBD: commit phase 1\n"); 401 402 /* 403 * Switch to a new revoke table. 404 */ 405 jbd2_journal_switch_revoke_table(journal); 406 407 commit_transaction->t_state = T_FLUSH; 408 journal->j_committing_transaction = commit_transaction; 409 journal->j_running_transaction = NULL; 410 commit_transaction->t_log_start = journal->j_head; 411 wake_up(&journal->j_wait_transaction_locked); 412 spin_unlock(&journal->j_state_lock); 413 414 jbd_debug (3, "JBD: commit phase 2\n"); 415 416 /* 417 * First, drop modified flag: all accesses to the buffers 418 * will be tracked for a new trasaction only -bzzz 419 */ 420 spin_lock(&journal->j_list_lock); 421 if (commit_transaction->t_buffers) { 422 new_jh = jh = commit_transaction->t_buffers->b_tnext; 423 do { 424 J_ASSERT_JH(new_jh, new_jh->b_modified == 1 || 425 new_jh->b_modified == 0); 426 new_jh->b_modified = 0; 427 new_jh = new_jh->b_tnext; 428 } while (new_jh != jh); 429 } 430 spin_unlock(&journal->j_list_lock); 431 432 /* 433 * Now start flushing things to disk, in the order they appear 434 * on the transaction lists. Data blocks go first. 435 */ 436 err = 0; 437 journal_submit_data_buffers(journal, commit_transaction); 438 439 /* 440 * Wait for all previously submitted IO to complete. 441 */ 442 spin_lock(&journal->j_list_lock); 443 while (commit_transaction->t_locked_list) { 444 struct buffer_head *bh; 445 446 jh = commit_transaction->t_locked_list->b_tprev; 447 bh = jh2bh(jh); 448 get_bh(bh); 449 if (buffer_locked(bh)) { 450 spin_unlock(&journal->j_list_lock); 451 wait_on_buffer(bh); 452 if (unlikely(!buffer_uptodate(bh))) 453 err = -EIO; 454 spin_lock(&journal->j_list_lock); 455 } 456 if (!inverted_lock(journal, bh)) { 457 put_bh(bh); 458 spin_lock(&journal->j_list_lock); 459 continue; 460 } 461 if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) { 462 __jbd2_journal_unfile_buffer(jh); 463 jbd_unlock_bh_state(bh); 464 jbd2_journal_remove_journal_head(bh); 465 put_bh(bh); 466 } else { 467 jbd_unlock_bh_state(bh); 468 } 469 put_bh(bh); 470 cond_resched_lock(&journal->j_list_lock); 471 } 472 spin_unlock(&journal->j_list_lock); 473 474 if (err) 475 __jbd2_journal_abort_hard(journal); 476 477 jbd2_journal_write_revoke_records(journal, commit_transaction); 478 479 jbd_debug(3, "JBD: commit phase 2\n"); 480 481 /* 482 * If we found any dirty or locked buffers, then we should have 483 * looped back up to the write_out_data label. If there weren't 484 * any then journal_clean_data_list should have wiped the list 485 * clean by now, so check that it is in fact empty. 486 */ 487 J_ASSERT (commit_transaction->t_sync_datalist == NULL); 488 489 jbd_debug (3, "JBD: commit phase 3\n"); 490 491 /* 492 * Way to go: we have now written out all of the data for a 493 * transaction! Now comes the tricky part: we need to write out 494 * metadata. Loop over the transaction's entire buffer list: 495 */ 496 commit_transaction->t_state = T_COMMIT; 497 498 descriptor = NULL; 499 bufs = 0; 500 while (commit_transaction->t_buffers) { 501 502 /* Find the next buffer to be journaled... */ 503 504 jh = commit_transaction->t_buffers; 505 506 /* If we're in abort mode, we just un-journal the buffer and 507 release it for background writing. */ 508 509 if (is_journal_aborted(journal)) { 510 JBUFFER_TRACE(jh, "journal is aborting: refile"); 511 jbd2_journal_refile_buffer(journal, jh); 512 /* If that was the last one, we need to clean up 513 * any descriptor buffers which may have been 514 * already allocated, even if we are now 515 * aborting. */ 516 if (!commit_transaction->t_buffers) 517 goto start_journal_io; 518 continue; 519 } 520 521 /* Make sure we have a descriptor block in which to 522 record the metadata buffer. */ 523 524 if (!descriptor) { 525 struct buffer_head *bh; 526 527 J_ASSERT (bufs == 0); 528 529 jbd_debug(4, "JBD: get descriptor\n"); 530 531 descriptor = jbd2_journal_get_descriptor_buffer(journal); 532 if (!descriptor) { 533 __jbd2_journal_abort_hard(journal); 534 continue; 535 } 536 537 bh = jh2bh(descriptor); 538 jbd_debug(4, "JBD: got buffer %llu (%p)\n", 539 (unsigned long long)bh->b_blocknr, bh->b_data); 540 header = (journal_header_t *)&bh->b_data[0]; 541 header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER); 542 header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK); 543 header->h_sequence = cpu_to_be32(commit_transaction->t_tid); 544 545 tagp = &bh->b_data[sizeof(journal_header_t)]; 546 space_left = bh->b_size - sizeof(journal_header_t); 547 first_tag = 1; 548 set_buffer_jwrite(bh); 549 set_buffer_dirty(bh); 550 wbuf[bufs++] = bh; 551 552 /* Record it so that we can wait for IO 553 completion later */ 554 BUFFER_TRACE(bh, "ph3: file as descriptor"); 555 jbd2_journal_file_buffer(descriptor, commit_transaction, 556 BJ_LogCtl); 557 } 558 559 /* Where is the buffer to be written? */ 560 561 err = jbd2_journal_next_log_block(journal, &blocknr); 562 /* If the block mapping failed, just abandon the buffer 563 and repeat this loop: we'll fall into the 564 refile-on-abort condition above. */ 565 if (err) { 566 __jbd2_journal_abort_hard(journal); 567 continue; 568 } 569 570 /* 571 * start_this_handle() uses t_outstanding_credits to determine 572 * the free space in the log, but this counter is changed 573 * by jbd2_journal_next_log_block() also. 574 */ 575 commit_transaction->t_outstanding_credits--; 576 577 /* Bump b_count to prevent truncate from stumbling over 578 the shadowed buffer! @@@ This can go if we ever get 579 rid of the BJ_IO/BJ_Shadow pairing of buffers. */ 580 atomic_inc(&jh2bh(jh)->b_count); 581 582 /* Make a temporary IO buffer with which to write it out 583 (this will requeue both the metadata buffer and the 584 temporary IO buffer). new_bh goes on BJ_IO*/ 585 586 set_bit(BH_JWrite, &jh2bh(jh)->b_state); 587 /* 588 * akpm: jbd2_journal_write_metadata_buffer() sets 589 * new_bh->b_transaction to commit_transaction. 590 * We need to clean this up before we release new_bh 591 * (which is of type BJ_IO) 592 */ 593 JBUFFER_TRACE(jh, "ph3: write metadata"); 594 flags = jbd2_journal_write_metadata_buffer(commit_transaction, 595 jh, &new_jh, blocknr); 596 set_bit(BH_JWrite, &jh2bh(new_jh)->b_state); 597 wbuf[bufs++] = jh2bh(new_jh); 598 599 /* Record the new block's tag in the current descriptor 600 buffer */ 601 602 tag_flag = 0; 603 if (flags & 1) 604 tag_flag |= JBD2_FLAG_ESCAPE; 605 if (!first_tag) 606 tag_flag |= JBD2_FLAG_SAME_UUID; 607 608 tag = (journal_block_tag_t *) tagp; 609 write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr); 610 tag->t_flags = cpu_to_be32(tag_flag); 611 tagp += tag_bytes; 612 space_left -= tag_bytes; 613 614 if (first_tag) { 615 memcpy (tagp, journal->j_uuid, 16); 616 tagp += 16; 617 space_left -= 16; 618 first_tag = 0; 619 } 620 621 /* If there's no more to do, or if the descriptor is full, 622 let the IO rip! */ 623 624 if (bufs == journal->j_wbufsize || 625 commit_transaction->t_buffers == NULL || 626 space_left < tag_bytes + 16) { 627 628 jbd_debug(4, "JBD: Submit %d IOs\n", bufs); 629 630 /* Write an end-of-descriptor marker before 631 submitting the IOs. "tag" still points to 632 the last tag we set up. */ 633 634 tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG); 635 636 start_journal_io: 637 for (i = 0; i < bufs; i++) { 638 struct buffer_head *bh = wbuf[i]; 639 lock_buffer(bh); 640 clear_buffer_dirty(bh); 641 set_buffer_uptodate(bh); 642 bh->b_end_io = journal_end_buffer_io_sync; 643 submit_bh(WRITE, bh); 644 } 645 cond_resched(); 646 647 /* Force a new descriptor to be generated next 648 time round the loop. */ 649 descriptor = NULL; 650 bufs = 0; 651 } 652 } 653 654 /* Lo and behold: we have just managed to send a transaction to 655 the log. Before we can commit it, wait for the IO so far to 656 complete. Control buffers being written are on the 657 transaction's t_log_list queue, and metadata buffers are on 658 the t_iobuf_list queue. 659 660 Wait for the buffers in reverse order. That way we are 661 less likely to be woken up until all IOs have completed, and 662 so we incur less scheduling load. 663 */ 664 665 jbd_debug(3, "JBD: commit phase 4\n"); 666 667 /* 668 * akpm: these are BJ_IO, and j_list_lock is not needed. 669 * See __journal_try_to_free_buffer. 670 */ 671 wait_for_iobuf: 672 while (commit_transaction->t_iobuf_list != NULL) { 673 struct buffer_head *bh; 674 675 jh = commit_transaction->t_iobuf_list->b_tprev; 676 bh = jh2bh(jh); 677 if (buffer_locked(bh)) { 678 wait_on_buffer(bh); 679 goto wait_for_iobuf; 680 } 681 if (cond_resched()) 682 goto wait_for_iobuf; 683 684 if (unlikely(!buffer_uptodate(bh))) 685 err = -EIO; 686 687 clear_buffer_jwrite(bh); 688 689 JBUFFER_TRACE(jh, "ph4: unfile after journal write"); 690 jbd2_journal_unfile_buffer(journal, jh); 691 692 /* 693 * ->t_iobuf_list should contain only dummy buffer_heads 694 * which were created by jbd2_journal_write_metadata_buffer(). 695 */ 696 BUFFER_TRACE(bh, "dumping temporary bh"); 697 jbd2_journal_put_journal_head(jh); 698 __brelse(bh); 699 J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0); 700 free_buffer_head(bh); 701 702 /* We also have to unlock and free the corresponding 703 shadowed buffer */ 704 jh = commit_transaction->t_shadow_list->b_tprev; 705 bh = jh2bh(jh); 706 clear_bit(BH_JWrite, &bh->b_state); 707 J_ASSERT_BH(bh, buffer_jbddirty(bh)); 708 709 /* The metadata is now released for reuse, but we need 710 to remember it against this transaction so that when 711 we finally commit, we can do any checkpointing 712 required. */ 713 JBUFFER_TRACE(jh, "file as BJ_Forget"); 714 jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget); 715 /* Wake up any transactions which were waiting for this 716 IO to complete */ 717 wake_up_bit(&bh->b_state, BH_Unshadow); 718 JBUFFER_TRACE(jh, "brelse shadowed buffer"); 719 __brelse(bh); 720 } 721 722 J_ASSERT (commit_transaction->t_shadow_list == NULL); 723 724 jbd_debug(3, "JBD: commit phase 5\n"); 725 726 /* Here we wait for the revoke record and descriptor record buffers */ 727 wait_for_ctlbuf: 728 while (commit_transaction->t_log_list != NULL) { 729 struct buffer_head *bh; 730 731 jh = commit_transaction->t_log_list->b_tprev; 732 bh = jh2bh(jh); 733 if (buffer_locked(bh)) { 734 wait_on_buffer(bh); 735 goto wait_for_ctlbuf; 736 } 737 if (cond_resched()) 738 goto wait_for_ctlbuf; 739 740 if (unlikely(!buffer_uptodate(bh))) 741 err = -EIO; 742 743 BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile"); 744 clear_buffer_jwrite(bh); 745 jbd2_journal_unfile_buffer(journal, jh); 746 jbd2_journal_put_journal_head(jh); 747 __brelse(bh); /* One for getblk */ 748 /* AKPM: bforget here */ 749 } 750 751 jbd_debug(3, "JBD: commit phase 6\n"); 752 753 if (journal_write_commit_record(journal, commit_transaction)) 754 err = -EIO; 755 756 if (err) 757 __jbd2_journal_abort_hard(journal); 758 759 /* End of a transaction! Finally, we can do checkpoint 760 processing: any buffers committed as a result of this 761 transaction can be removed from any checkpoint list it was on 762 before. */ 763 764 jbd_debug(3, "JBD: commit phase 7\n"); 765 766 J_ASSERT(commit_transaction->t_sync_datalist == NULL); 767 J_ASSERT(commit_transaction->t_buffers == NULL); 768 J_ASSERT(commit_transaction->t_checkpoint_list == NULL); 769 J_ASSERT(commit_transaction->t_iobuf_list == NULL); 770 J_ASSERT(commit_transaction->t_shadow_list == NULL); 771 J_ASSERT(commit_transaction->t_log_list == NULL); 772 773 restart_loop: 774 /* 775 * As there are other places (journal_unmap_buffer()) adding buffers 776 * to this list we have to be careful and hold the j_list_lock. 777 */ 778 spin_lock(&journal->j_list_lock); 779 while (commit_transaction->t_forget) { 780 transaction_t *cp_transaction; 781 struct buffer_head *bh; 782 783 jh = commit_transaction->t_forget; 784 spin_unlock(&journal->j_list_lock); 785 bh = jh2bh(jh); 786 jbd_lock_bh_state(bh); 787 J_ASSERT_JH(jh, jh->b_transaction == commit_transaction || 788 jh->b_transaction == journal->j_running_transaction); 789 790 /* 791 * If there is undo-protected committed data against 792 * this buffer, then we can remove it now. If it is a 793 * buffer needing such protection, the old frozen_data 794 * field now points to a committed version of the 795 * buffer, so rotate that field to the new committed 796 * data. 797 * 798 * Otherwise, we can just throw away the frozen data now. 799 */ 800 if (jh->b_committed_data) { 801 jbd2_slab_free(jh->b_committed_data, bh->b_size); 802 jh->b_committed_data = NULL; 803 if (jh->b_frozen_data) { 804 jh->b_committed_data = jh->b_frozen_data; 805 jh->b_frozen_data = NULL; 806 } 807 } else if (jh->b_frozen_data) { 808 jbd2_slab_free(jh->b_frozen_data, bh->b_size); 809 jh->b_frozen_data = NULL; 810 } 811 812 spin_lock(&journal->j_list_lock); 813 cp_transaction = jh->b_cp_transaction; 814 if (cp_transaction) { 815 JBUFFER_TRACE(jh, "remove from old cp transaction"); 816 __jbd2_journal_remove_checkpoint(jh); 817 } 818 819 /* Only re-checkpoint the buffer_head if it is marked 820 * dirty. If the buffer was added to the BJ_Forget list 821 * by jbd2_journal_forget, it may no longer be dirty and 822 * there's no point in keeping a checkpoint record for 823 * it. */ 824 825 /* A buffer which has been freed while still being 826 * journaled by a previous transaction may end up still 827 * being dirty here, but we want to avoid writing back 828 * that buffer in the future now that the last use has 829 * been committed. That's not only a performance gain, 830 * it also stops aliasing problems if the buffer is left 831 * behind for writeback and gets reallocated for another 832 * use in a different page. */ 833 if (buffer_freed(bh)) { 834 clear_buffer_freed(bh); 835 clear_buffer_jbddirty(bh); 836 } 837 838 if (buffer_jbddirty(bh)) { 839 JBUFFER_TRACE(jh, "add to new checkpointing trans"); 840 __jbd2_journal_insert_checkpoint(jh, commit_transaction); 841 JBUFFER_TRACE(jh, "refile for checkpoint writeback"); 842 __jbd2_journal_refile_buffer(jh); 843 jbd_unlock_bh_state(bh); 844 } else { 845 J_ASSERT_BH(bh, !buffer_dirty(bh)); 846 /* The buffer on BJ_Forget list and not jbddirty means 847 * it has been freed by this transaction and hence it 848 * could not have been reallocated until this 849 * transaction has committed. *BUT* it could be 850 * reallocated once we have written all the data to 851 * disk and before we process the buffer on BJ_Forget 852 * list. */ 853 JBUFFER_TRACE(jh, "refile or unfile freed buffer"); 854 __jbd2_journal_refile_buffer(jh); 855 if (!jh->b_transaction) { 856 jbd_unlock_bh_state(bh); 857 /* needs a brelse */ 858 jbd2_journal_remove_journal_head(bh); 859 release_buffer_page(bh); 860 } else 861 jbd_unlock_bh_state(bh); 862 } 863 cond_resched_lock(&journal->j_list_lock); 864 } 865 spin_unlock(&journal->j_list_lock); 866 /* 867 * This is a bit sleazy. We borrow j_list_lock to protect 868 * journal->j_committing_transaction in __jbd2_journal_remove_checkpoint. 869 * Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but 870 * it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint 871 */ 872 spin_lock(&journal->j_state_lock); 873 spin_lock(&journal->j_list_lock); 874 /* 875 * Now recheck if some buffers did not get attached to the transaction 876 * while the lock was dropped... 877 */ 878 if (commit_transaction->t_forget) { 879 spin_unlock(&journal->j_list_lock); 880 spin_unlock(&journal->j_state_lock); 881 goto restart_loop; 882 } 883 884 /* Done with this transaction! */ 885 886 jbd_debug(3, "JBD: commit phase 8\n"); 887 888 J_ASSERT(commit_transaction->t_state == T_COMMIT); 889 890 commit_transaction->t_state = T_FINISHED; 891 J_ASSERT(commit_transaction == journal->j_committing_transaction); 892 journal->j_commit_sequence = commit_transaction->t_tid; 893 journal->j_committing_transaction = NULL; 894 spin_unlock(&journal->j_state_lock); 895 896 if (commit_transaction->t_checkpoint_list == NULL) { 897 __jbd2_journal_drop_transaction(journal, commit_transaction); 898 } else { 899 if (journal->j_checkpoint_transactions == NULL) { 900 journal->j_checkpoint_transactions = commit_transaction; 901 commit_transaction->t_cpnext = commit_transaction; 902 commit_transaction->t_cpprev = commit_transaction; 903 } else { 904 commit_transaction->t_cpnext = 905 journal->j_checkpoint_transactions; 906 commit_transaction->t_cpprev = 907 commit_transaction->t_cpnext->t_cpprev; 908 commit_transaction->t_cpnext->t_cpprev = 909 commit_transaction; 910 commit_transaction->t_cpprev->t_cpnext = 911 commit_transaction; 912 } 913 } 914 spin_unlock(&journal->j_list_lock); 915 916 jbd_debug(1, "JBD: commit %d complete, head %d\n", 917 journal->j_commit_sequence, journal->j_tail_sequence); 918 919 wake_up(&journal->j_wait_done_commit); 920 } 921