1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * linux/fs/jbd2/recovery.c 4 * 5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 6 * 7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 8 * 9 * Journal recovery routines for the generic filesystem journaling code; 10 * part of the ext2fs journaling system. 11 */ 12 13 #ifndef __KERNEL__ 14 #include "jfs_user.h" 15 #else 16 #include <linux/time.h> 17 #include <linux/fs.h> 18 #include <linux/jbd2.h> 19 #include <linux/errno.h> 20 #include <linux/crc32.h> 21 #include <linux/blkdev.h> 22 #include <linux/string_choices.h> 23 #endif 24 25 /* 26 * Maintain information about the progress of the recovery job, so that 27 * the different passes can carry information between them. 28 */ 29 struct recovery_info 30 { 31 tid_t start_transaction; 32 tid_t end_transaction; 33 unsigned long head_block; 34 35 int nr_replays; 36 int nr_revokes; 37 int nr_revoke_hits; 38 }; 39 40 static int do_one_pass(journal_t *journal, 41 struct recovery_info *info, enum passtype pass); 42 static int scan_revoke_records(journal_t *, struct buffer_head *, 43 tid_t, struct recovery_info *); 44 45 #ifdef __KERNEL__ 46 47 /* Release readahead buffers after use */ 48 static void journal_brelse_array(struct buffer_head *b[], int n) 49 { 50 while (--n >= 0) 51 brelse (b[n]); 52 } 53 54 55 /* 56 * When reading from the journal, we are going through the block device 57 * layer directly and so there is no readahead being done for us. We 58 * need to implement any readahead ourselves if we want it to happen at 59 * all. Recovery is basically one long sequential read, so make sure we 60 * do the IO in reasonably large chunks. 61 * 62 * This is not so critical that we need to be enormously clever about 63 * the readahead size, though. 128K is a purely arbitrary, good-enough 64 * fixed value. 65 */ 66 67 #define MAXBUF 8 68 static int do_readahead(journal_t *journal, unsigned int start) 69 { 70 int err; 71 unsigned int max, nbufs, next; 72 unsigned long long blocknr; 73 struct buffer_head *bh; 74 75 struct buffer_head * bufs[MAXBUF]; 76 77 /* Do up to 128K of readahead */ 78 max = start + (128 * 1024 / journal->j_blocksize); 79 if (max > journal->j_total_len) 80 max = journal->j_total_len; 81 82 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 83 * a time to the block device IO layer. */ 84 85 nbufs = 0; 86 87 for (next = start; next < max; next++) { 88 err = jbd2_journal_bmap(journal, next, &blocknr); 89 90 if (err) { 91 printk(KERN_ERR "JBD2: bad block at offset %u\n", 92 next); 93 goto failed; 94 } 95 96 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 97 if (!bh) { 98 err = -ENOMEM; 99 goto failed; 100 } 101 102 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 103 bufs[nbufs++] = bh; 104 if (nbufs == MAXBUF) { 105 bh_readahead_batch(nbufs, bufs, 0); 106 journal_brelse_array(bufs, nbufs); 107 nbufs = 0; 108 } 109 } else 110 brelse(bh); 111 } 112 113 if (nbufs) 114 bh_readahead_batch(nbufs, bufs, 0); 115 err = 0; 116 117 failed: 118 if (nbufs) 119 journal_brelse_array(bufs, nbufs); 120 return err; 121 } 122 123 #endif /* __KERNEL__ */ 124 125 126 /* 127 * Read a block from the journal 128 */ 129 130 static int jread(struct buffer_head **bhp, journal_t *journal, 131 unsigned int offset) 132 { 133 int err; 134 unsigned long long blocknr; 135 struct buffer_head *bh; 136 137 *bhp = NULL; 138 139 if (offset >= journal->j_total_len) { 140 printk(KERN_ERR "JBD2: corrupted journal superblock\n"); 141 return -EFSCORRUPTED; 142 } 143 144 err = jbd2_journal_bmap(journal, offset, &blocknr); 145 146 if (err) { 147 printk(KERN_ERR "JBD2: bad block at offset %u\n", 148 offset); 149 return err; 150 } 151 152 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 153 if (!bh) 154 return -ENOMEM; 155 156 if (!buffer_uptodate(bh)) { 157 /* 158 * If this is a brand new buffer, start readahead. 159 * Otherwise, we assume we are already reading it. 160 */ 161 bool need_readahead = !buffer_req(bh); 162 163 bh_read_nowait(bh, 0); 164 if (need_readahead) 165 do_readahead(journal, offset); 166 wait_on_buffer(bh); 167 } 168 169 if (!buffer_uptodate(bh)) { 170 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", 171 offset); 172 brelse(bh); 173 return -EIO; 174 } 175 176 *bhp = bh; 177 return 0; 178 } 179 180 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) 181 { 182 struct jbd2_journal_block_tail *tail; 183 __be32 provided; 184 __u32 calculated; 185 186 if (!jbd2_journal_has_csum_v2or3(j)) 187 return 1; 188 189 tail = (struct jbd2_journal_block_tail *)((char *)buf + 190 j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); 191 provided = tail->t_checksum; 192 tail->t_checksum = 0; 193 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 194 tail->t_checksum = provided; 195 196 return provided == cpu_to_be32(calculated); 197 } 198 199 /* 200 * Count the number of in-use tags in a journal descriptor block. 201 */ 202 203 static int count_tags(journal_t *journal, struct buffer_head *bh) 204 { 205 char * tagp; 206 journal_block_tag_t tag; 207 int nr = 0, size = journal->j_blocksize; 208 int tag_bytes = journal_tag_bytes(journal); 209 210 if (jbd2_journal_has_csum_v2or3(journal)) 211 size -= sizeof(struct jbd2_journal_block_tail); 212 213 tagp = &bh->b_data[sizeof(journal_header_t)]; 214 215 while ((tagp - bh->b_data + tag_bytes) <= size) { 216 memcpy(&tag, tagp, sizeof(tag)); 217 218 nr++; 219 tagp += tag_bytes; 220 if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 221 tagp += 16; 222 223 if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 224 break; 225 } 226 227 return nr; 228 } 229 230 231 /* Make sure we wrap around the log correctly! */ 232 #define wrap(journal, var) \ 233 do { \ 234 if (var >= (journal)->j_last) \ 235 var -= ((journal)->j_last - (journal)->j_first); \ 236 } while (0) 237 238 static int fc_do_one_pass(journal_t *journal, 239 struct recovery_info *info, enum passtype pass) 240 { 241 unsigned int expected_commit_id = info->end_transaction; 242 unsigned long next_fc_block; 243 struct buffer_head *bh; 244 int err = 0; 245 246 next_fc_block = journal->j_fc_first; 247 if (!journal->j_fc_replay_callback) 248 return 0; 249 250 while (next_fc_block <= journal->j_fc_last) { 251 jbd2_debug(3, "Fast commit replay: next block %ld\n", 252 next_fc_block); 253 err = jread(&bh, journal, next_fc_block); 254 if (err) { 255 jbd2_debug(3, "Fast commit replay: read error\n"); 256 break; 257 } 258 259 err = journal->j_fc_replay_callback(journal, bh, pass, 260 next_fc_block - journal->j_fc_first, 261 expected_commit_id); 262 brelse(bh); 263 next_fc_block++; 264 if (err < 0 || err == JBD2_FC_REPLAY_STOP) 265 break; 266 err = 0; 267 } 268 269 if (err) 270 jbd2_debug(3, "Fast commit replay failed, err = %d\n", err); 271 272 return err; 273 } 274 275 /** 276 * jbd2_journal_recover - recovers a on-disk journal 277 * @journal: the journal to recover 278 * 279 * The primary function for recovering the log contents when mounting a 280 * journaled device. 281 * 282 * Recovery is done in three passes. In the first pass, we look for the 283 * end of the log. In the second, we assemble the list of revoke 284 * blocks. In the third and final pass, we replay any un-revoked blocks 285 * in the log. 286 */ 287 int jbd2_journal_recover(journal_t *journal) 288 { 289 int err, err2; 290 journal_superblock_t * sb; 291 292 struct recovery_info info; 293 294 memset(&info, 0, sizeof(info)); 295 sb = journal->j_superblock; 296 297 /* 298 * The journal superblock's s_start field (the current log head) 299 * is always zero if, and only if, the journal was cleanly 300 * unmounted. 301 */ 302 if (!sb->s_start) { 303 jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n", 304 be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head)); 305 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 306 journal->j_head = be32_to_cpu(sb->s_head); 307 return 0; 308 } 309 310 err = do_one_pass(journal, &info, PASS_SCAN); 311 if (!err) 312 err = do_one_pass(journal, &info, PASS_REVOKE); 313 if (!err) 314 err = do_one_pass(journal, &info, PASS_REPLAY); 315 316 jbd2_debug(1, "JBD2: recovery, exit status %d, " 317 "recovered transactions %u to %u\n", 318 err, info.start_transaction, info.end_transaction); 319 jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", 320 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 321 322 /* Restart the log at the next transaction ID, thus invalidating 323 * any existing commit records in the log. */ 324 journal->j_transaction_sequence = ++info.end_transaction; 325 journal->j_head = info.head_block; 326 jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n", 327 journal->j_transaction_sequence, journal->j_head); 328 329 jbd2_journal_clear_revoke(journal); 330 err2 = sync_blockdev(journal->j_fs_dev); 331 if (!err) 332 err = err2; 333 err2 = jbd2_check_fs_dev_write_error(journal); 334 if (!err) 335 err = err2; 336 /* Make sure all replayed data is on permanent storage */ 337 if (journal->j_flags & JBD2_BARRIER) { 338 err2 = blkdev_issue_flush(journal->j_fs_dev); 339 if (!err) 340 err = err2; 341 } 342 return err; 343 } 344 345 /** 346 * jbd2_journal_skip_recovery - Start journal and wipe exiting records 347 * @journal: journal to startup 348 * 349 * Locate any valid recovery information from the journal and set up the 350 * journal structures in memory to ignore it (presumably because the 351 * caller has evidence that it is out of date). 352 * This function doesn't appear to be exported.. 353 * 354 * We perform one pass over the journal to allow us to tell the user how 355 * much recovery information is being erased, and to let us initialise 356 * the journal transaction sequence numbers to the next unused ID. 357 */ 358 int jbd2_journal_skip_recovery(journal_t *journal) 359 { 360 int err; 361 362 struct recovery_info info; 363 364 memset (&info, 0, sizeof(info)); 365 366 err = do_one_pass(journal, &info, PASS_SCAN); 367 368 if (err) { 369 printk(KERN_ERR "JBD2: error %d scanning journal\n", err); 370 ++journal->j_transaction_sequence; 371 journal->j_head = journal->j_first; 372 } else { 373 #ifdef CONFIG_JBD2_DEBUG 374 int dropped = info.end_transaction - 375 be32_to_cpu(journal->j_superblock->s_sequence); 376 jbd2_debug(1, 377 "JBD2: ignoring %d transaction%s from the journal.\n", 378 dropped, str_plural(dropped)); 379 #endif 380 journal->j_transaction_sequence = ++info.end_transaction; 381 journal->j_head = info.head_block; 382 } 383 384 journal->j_tail = 0; 385 return err; 386 } 387 388 static inline unsigned long long read_tag_block(journal_t *journal, 389 journal_block_tag_t *tag) 390 { 391 unsigned long long block = be32_to_cpu(tag->t_blocknr); 392 if (jbd2_has_feature_64bit(journal)) 393 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 394 return block; 395 } 396 397 /* 398 * calc_chksums calculates the checksums for the blocks described in the 399 * descriptor block. 400 */ 401 static int calc_chksums(journal_t *journal, struct buffer_head *bh, 402 unsigned long *next_log_block, __u32 *crc32_sum) 403 { 404 int i, num_blks, err; 405 unsigned long io_block; 406 struct buffer_head *obh; 407 408 num_blks = count_tags(journal, bh); 409 /* Calculate checksum of the descriptor block. */ 410 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 411 412 for (i = 0; i < num_blks; i++) { 413 io_block = (*next_log_block)++; 414 wrap(journal, *next_log_block); 415 err = jread(&obh, journal, io_block); 416 if (err) { 417 printk(KERN_ERR "JBD2: IO error %d recovering block " 418 "%lu in log\n", err, io_block); 419 return 1; 420 } else { 421 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 422 obh->b_size); 423 } 424 put_bh(obh); 425 } 426 return 0; 427 } 428 429 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 430 { 431 struct commit_header *h; 432 __be32 provided; 433 __u32 calculated; 434 435 if (!jbd2_journal_has_csum_v2or3(j)) 436 return 1; 437 438 h = buf; 439 provided = h->h_chksum[0]; 440 h->h_chksum[0] = 0; 441 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 442 h->h_chksum[0] = provided; 443 444 return provided == cpu_to_be32(calculated); 445 } 446 447 static bool jbd2_commit_block_csum_verify_partial(journal_t *j, void *buf) 448 { 449 struct commit_header *h; 450 __be32 provided; 451 __u32 calculated; 452 void *tmpbuf; 453 454 tmpbuf = kzalloc(j->j_blocksize, GFP_KERNEL); 455 if (!tmpbuf) 456 return false; 457 458 memcpy(tmpbuf, buf, sizeof(struct commit_header)); 459 h = tmpbuf; 460 provided = h->h_chksum[0]; 461 h->h_chksum[0] = 0; 462 calculated = jbd2_chksum(j, j->j_csum_seed, tmpbuf, j->j_blocksize); 463 kfree(tmpbuf); 464 465 return provided == cpu_to_be32(calculated); 466 } 467 468 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 469 journal_block_tag3_t *tag3, 470 void *buf, __u32 sequence) 471 { 472 __u32 csum32; 473 __be32 seq; 474 475 if (!jbd2_journal_has_csum_v2or3(j)) 476 return 1; 477 478 seq = cpu_to_be32(sequence); 479 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 480 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 481 482 if (jbd2_has_feature_csum3(j)) 483 return tag3->t_checksum == cpu_to_be32(csum32); 484 else 485 return tag->t_checksum == cpu_to_be16(csum32); 486 } 487 488 static __always_inline int jbd2_do_replay(journal_t *journal, 489 struct recovery_info *info, 490 struct buffer_head *bh, 491 unsigned long *next_log_block, 492 unsigned int next_commit_ID) 493 { 494 char *tagp; 495 int flags; 496 int ret = 0; 497 int tag_bytes = journal_tag_bytes(journal); 498 int descr_csum_size = 0; 499 unsigned long io_block; 500 journal_block_tag_t tag; 501 struct buffer_head *obh; 502 struct buffer_head *nbh; 503 504 if (jbd2_journal_has_csum_v2or3(journal)) 505 descr_csum_size = sizeof(struct jbd2_journal_block_tail); 506 507 tagp = &bh->b_data[sizeof(journal_header_t)]; 508 while (tagp - bh->b_data + tag_bytes <= 509 journal->j_blocksize - descr_csum_size) { 510 int err; 511 512 memcpy(&tag, tagp, sizeof(tag)); 513 flags = be16_to_cpu(tag.t_flags); 514 515 io_block = (*next_log_block)++; 516 wrap(journal, *next_log_block); 517 err = jread(&obh, journal, io_block); 518 if (err) { 519 /* Recover what we can, but report failure at the end. */ 520 ret = err; 521 pr_err("JBD2: IO error %d recovering block %lu in log\n", 522 err, io_block); 523 } else { 524 unsigned long long blocknr; 525 526 J_ASSERT(obh != NULL); 527 blocknr = read_tag_block(journal, &tag); 528 529 /* If the block has been revoked, then we're all done here. */ 530 if (jbd2_journal_test_revoke(journal, blocknr, 531 next_commit_ID)) { 532 brelse(obh); 533 ++info->nr_revoke_hits; 534 goto skip_write; 535 } 536 537 /* Look for block corruption */ 538 if (!jbd2_block_tag_csum_verify(journal, &tag, 539 (journal_block_tag3_t *)tagp, 540 obh->b_data, next_commit_ID)) { 541 brelse(obh); 542 ret = -EFSBADCRC; 543 pr_err("JBD2: Invalid checksum recovering data block %llu in journal block %lu\n", 544 blocknr, io_block); 545 goto skip_write; 546 } 547 548 /* Find a buffer for the new data being restored */ 549 nbh = __getblk(journal->j_fs_dev, blocknr, 550 journal->j_blocksize); 551 if (nbh == NULL) { 552 pr_err("JBD2: Out of memory during recovery.\n"); 553 brelse(obh); 554 return -ENOMEM; 555 } 556 557 lock_buffer(nbh); 558 memcpy(nbh->b_data, obh->b_data, journal->j_blocksize); 559 if (flags & JBD2_FLAG_ESCAPE) { 560 *((__be32 *)nbh->b_data) = 561 cpu_to_be32(JBD2_MAGIC_NUMBER); 562 } 563 564 BUFFER_TRACE(nbh, "marking dirty"); 565 set_buffer_uptodate(nbh); 566 mark_buffer_dirty(nbh); 567 BUFFER_TRACE(nbh, "marking uptodate"); 568 ++info->nr_replays; 569 unlock_buffer(nbh); 570 brelse(obh); 571 brelse(nbh); 572 } 573 574 skip_write: 575 tagp += tag_bytes; 576 if (!(flags & JBD2_FLAG_SAME_UUID)) 577 tagp += 16; 578 579 if (flags & JBD2_FLAG_LAST_TAG) 580 break; 581 } 582 583 return ret; 584 } 585 586 static int do_one_pass(journal_t *journal, 587 struct recovery_info *info, enum passtype pass) 588 { 589 unsigned int first_commit_ID, next_commit_ID; 590 unsigned long next_log_block, head_block; 591 int err, success = 0; 592 journal_superblock_t * sb; 593 journal_header_t * tmp; 594 struct buffer_head *bh = NULL; 595 unsigned int sequence; 596 int blocktype; 597 __u32 crc32_sum = ~0; /* Transactional Checksums */ 598 bool need_check_commit_time = false; 599 __u64 last_trans_commit_time = 0, commit_time; 600 601 /* 602 * First thing is to establish what we expect to find in the log 603 * (in terms of transaction IDs), and where (in terms of log 604 * block offsets): query the superblock. 605 */ 606 607 sb = journal->j_superblock; 608 next_commit_ID = be32_to_cpu(sb->s_sequence); 609 next_log_block = be32_to_cpu(sb->s_start); 610 head_block = next_log_block; 611 612 first_commit_ID = next_commit_ID; 613 if (pass == PASS_SCAN) 614 info->start_transaction = first_commit_ID; 615 616 jbd2_debug(1, "Starting recovery pass %d\n", pass); 617 618 /* 619 * Now we walk through the log, transaction by transaction, 620 * making sure that each transaction has a commit block in the 621 * expected place. Each complete transaction gets replayed back 622 * into the main filesystem. 623 */ 624 625 while (1) { 626 cond_resched(); 627 628 /* If we already know where to stop the log traversal, 629 * check right now that we haven't gone past the end of 630 * the log. */ 631 632 if (pass != PASS_SCAN) 633 if (tid_geq(next_commit_ID, info->end_transaction)) 634 break; 635 636 jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 637 next_commit_ID, next_log_block, journal->j_last); 638 639 /* Skip over each chunk of the transaction looking 640 * either the next descriptor block or the final commit 641 * record. */ 642 643 jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block); 644 brelse(bh); 645 bh = NULL; 646 err = jread(&bh, journal, next_log_block); 647 if (err) 648 goto failed; 649 650 next_log_block++; 651 wrap(journal, next_log_block); 652 653 /* What kind of buffer is it? 654 * 655 * If it is a descriptor block, check that it has the 656 * expected sequence number. Otherwise, we're all done 657 * here. */ 658 659 tmp = (journal_header_t *)bh->b_data; 660 661 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) 662 break; 663 664 blocktype = be32_to_cpu(tmp->h_blocktype); 665 sequence = be32_to_cpu(tmp->h_sequence); 666 jbd2_debug(3, "Found magic %d, sequence %d\n", 667 blocktype, sequence); 668 669 if (sequence != next_commit_ID) 670 break; 671 672 /* OK, we have a valid descriptor block which matches 673 * all of the sequence number checks. What are we going 674 * to do with it? That depends on the pass... */ 675 676 switch(blocktype) { 677 case JBD2_DESCRIPTOR_BLOCK: 678 /* Verify checksum first */ 679 if (!jbd2_descriptor_block_csum_verify(journal, 680 bh->b_data)) { 681 /* 682 * PASS_SCAN can see stale blocks due to lazy 683 * journal init. Don't error out on those yet. 684 */ 685 if (pass != PASS_SCAN) { 686 pr_err("JBD2: Invalid checksum recovering block %lu in log\n", 687 next_log_block); 688 err = -EFSBADCRC; 689 goto failed; 690 } 691 need_check_commit_time = true; 692 jbd2_debug(1, 693 "invalid descriptor block found in %lu\n", 694 next_log_block); 695 } 696 697 /* If it is a valid descriptor block, replay it 698 * in pass REPLAY; if journal_checksums enabled, then 699 * calculate checksums in PASS_SCAN, otherwise, 700 * just skip over the blocks it describes. */ 701 if (pass != PASS_REPLAY) { 702 if (pass == PASS_SCAN && 703 jbd2_has_feature_checksum(journal) && 704 !info->end_transaction) { 705 if (calc_chksums(journal, bh, 706 &next_log_block, 707 &crc32_sum)) 708 break; 709 continue; 710 } 711 next_log_block += count_tags(journal, bh); 712 wrap(journal, next_log_block); 713 continue; 714 } 715 716 /* 717 * A descriptor block: we can now write all of the 718 * data blocks. Yay, useful work is finally getting 719 * done here! 720 */ 721 err = jbd2_do_replay(journal, info, bh, &next_log_block, 722 next_commit_ID); 723 if (err) { 724 if (err == -ENOMEM) 725 goto failed; 726 success = err; 727 } 728 729 continue; 730 731 case JBD2_COMMIT_BLOCK: 732 if (pass != PASS_SCAN) { 733 next_commit_ID++; 734 continue; 735 } 736 737 /* How to differentiate between interrupted commit 738 * and journal corruption ? 739 * 740 * {nth transaction} 741 * Checksum Verification Failed 742 * | 743 * ____________________ 744 * | | 745 * async_commit sync_commit 746 * | | 747 * | GO TO NEXT "Journal Corruption" 748 * | TRANSACTION 749 * | 750 * {(n+1)th transanction} 751 * | 752 * _______|______________ 753 * | | 754 * Commit block found Commit block not found 755 * | | 756 * "Journal Corruption" | 757 * _____________|_________ 758 * | | 759 * nth trans corrupt OR nth trans 760 * and (n+1)th interrupted interrupted 761 * before commit block 762 * could reach the disk. 763 * (Cannot find the difference in above 764 * mentioned conditions. Hence assume 765 * "Interrupted Commit".) 766 */ 767 commit_time = be64_to_cpu( 768 ((struct commit_header *)bh->b_data)->h_commit_sec); 769 /* 770 * If need_check_commit_time is set, it means we are in 771 * PASS_SCAN and csum verify failed before. If 772 * commit_time is increasing, it's the same journal, 773 * otherwise it is stale journal block, just end this 774 * recovery. 775 */ 776 if (need_check_commit_time) { 777 if (commit_time >= last_trans_commit_time) { 778 pr_err("JBD2: Invalid checksum found in transaction %u\n", 779 next_commit_ID); 780 err = -EFSBADCRC; 781 goto failed; 782 } 783 ignore_crc_mismatch: 784 /* 785 * It likely does not belong to same journal, 786 * just end this recovery with success. 787 */ 788 jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n", 789 next_commit_ID); 790 goto done; 791 } 792 793 /* 794 * Found an expected commit block: if checksums 795 * are present, verify them in PASS_SCAN; else not 796 * much to do other than move on to the next sequence 797 * number. 798 */ 799 if (jbd2_has_feature_checksum(journal)) { 800 struct commit_header *cbh = 801 (struct commit_header *)bh->b_data; 802 unsigned found_chksum = 803 be32_to_cpu(cbh->h_chksum[0]); 804 805 if (info->end_transaction) { 806 journal->j_failed_commit = 807 info->end_transaction; 808 break; 809 } 810 811 /* Neither checksum match nor unused? */ 812 if (!((crc32_sum == found_chksum && 813 cbh->h_chksum_type == 814 JBD2_CRC32_CHKSUM && 815 cbh->h_chksum_size == 816 JBD2_CRC32_CHKSUM_SIZE) || 817 (cbh->h_chksum_type == 0 && 818 cbh->h_chksum_size == 0 && 819 found_chksum == 0))) 820 goto chksum_error; 821 822 crc32_sum = ~0; 823 goto chksum_ok; 824 } 825 826 if (jbd2_commit_block_csum_verify(journal, bh->b_data)) 827 goto chksum_ok; 828 829 if (jbd2_commit_block_csum_verify_partial(journal, 830 bh->b_data)) { 831 pr_notice("JBD2: Find incomplete commit block in transaction %u block %lu\n", 832 next_commit_ID, next_log_block); 833 goto chksum_ok; 834 } 835 836 chksum_error: 837 if (commit_time < last_trans_commit_time) 838 goto ignore_crc_mismatch; 839 info->end_transaction = next_commit_ID; 840 info->head_block = head_block; 841 842 if (!jbd2_has_feature_async_commit(journal)) { 843 journal->j_failed_commit = next_commit_ID; 844 break; 845 } 846 847 chksum_ok: 848 last_trans_commit_time = commit_time; 849 head_block = next_log_block; 850 next_commit_ID++; 851 continue; 852 853 case JBD2_REVOKE_BLOCK: 854 /* 855 * Check revoke block crc in pass_scan, if csum verify 856 * failed, check commit block time later. 857 */ 858 if (pass == PASS_SCAN && 859 !jbd2_descriptor_block_csum_verify(journal, 860 bh->b_data)) { 861 jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n", 862 next_log_block); 863 need_check_commit_time = true; 864 } 865 866 /* If we aren't in the REVOKE pass, then we can 867 * just skip over this block. */ 868 if (pass != PASS_REVOKE) 869 continue; 870 871 err = scan_revoke_records(journal, bh, 872 next_commit_ID, info); 873 if (err) 874 goto failed; 875 continue; 876 877 default: 878 jbd2_debug(3, "Unrecognised magic %d, end of scan.\n", 879 blocktype); 880 goto done; 881 } 882 } 883 884 done: 885 brelse(bh); 886 /* 887 * We broke out of the log scan loop: either we came to the 888 * known end of the log or we found an unexpected block in the 889 * log. If the latter happened, then we know that the "current" 890 * transaction marks the end of the valid log. 891 */ 892 893 if (pass == PASS_SCAN) { 894 if (!info->end_transaction) 895 info->end_transaction = next_commit_ID; 896 if (!info->head_block) 897 info->head_block = head_block; 898 } else { 899 /* It's really bad news if different passes end up at 900 * different places (but possible due to IO errors). */ 901 if (info->end_transaction != next_commit_ID) { 902 printk(KERN_ERR "JBD2: recovery pass %d ended at " 903 "transaction %u, expected %u\n", 904 pass, next_commit_ID, info->end_transaction); 905 if (!success) 906 success = -EIO; 907 } 908 } 909 910 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { 911 err = fc_do_one_pass(journal, info, pass); 912 if (err) 913 success = err; 914 } 915 916 return success; 917 918 failed: 919 brelse(bh); 920 return err; 921 } 922 923 /* Scan a revoke record, marking all blocks mentioned as revoked. */ 924 925 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 926 tid_t sequence, struct recovery_info *info) 927 { 928 jbd2_journal_revoke_header_t *header; 929 int offset, max; 930 unsigned csum_size = 0; 931 __u32 rcount; 932 int record_len = 4; 933 934 header = (jbd2_journal_revoke_header_t *) bh->b_data; 935 offset = sizeof(jbd2_journal_revoke_header_t); 936 rcount = be32_to_cpu(header->r_count); 937 938 if (jbd2_journal_has_csum_v2or3(journal)) 939 csum_size = sizeof(struct jbd2_journal_block_tail); 940 if (rcount > journal->j_blocksize - csum_size) 941 return -EINVAL; 942 max = rcount; 943 944 if (jbd2_has_feature_64bit(journal)) 945 record_len = 8; 946 947 while (offset + record_len <= max) { 948 unsigned long long blocknr; 949 int err; 950 951 if (record_len == 4) 952 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 953 else 954 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 955 offset += record_len; 956 err = jbd2_journal_set_revoke(journal, blocknr, sequence); 957 if (err) 958 return err; 959 ++info->nr_revokes; 960 } 961 return 0; 962 } 963