1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * linux/fs/jbd2/recovery.c 4 * 5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 6 * 7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 8 * 9 * Journal recovery routines for the generic filesystem journaling code; 10 * part of the ext2fs journaling system. 11 */ 12 13 #ifndef __KERNEL__ 14 #include "jfs_user.h" 15 #else 16 #include <linux/time.h> 17 #include <linux/fs.h> 18 #include <linux/jbd2.h> 19 #include <linux/errno.h> 20 #include <linux/crc32.h> 21 #include <linux/blkdev.h> 22 #endif 23 24 /* 25 * Maintain information about the progress of the recovery job, so that 26 * the different passes can carry information between them. 27 */ 28 struct recovery_info 29 { 30 tid_t start_transaction; 31 tid_t end_transaction; 32 unsigned long head_block; 33 34 int nr_replays; 35 int nr_revokes; 36 int nr_revoke_hits; 37 }; 38 39 static int do_one_pass(journal_t *journal, 40 struct recovery_info *info, enum passtype pass); 41 static int scan_revoke_records(journal_t *, struct buffer_head *, 42 tid_t, struct recovery_info *); 43 44 #ifdef __KERNEL__ 45 46 /* Release readahead buffers after use */ 47 static void journal_brelse_array(struct buffer_head *b[], int n) 48 { 49 while (--n >= 0) 50 brelse (b[n]); 51 } 52 53 54 /* 55 * When reading from the journal, we are going through the block device 56 * layer directly and so there is no readahead being done for us. We 57 * need to implement any readahead ourselves if we want it to happen at 58 * all. Recovery is basically one long sequential read, so make sure we 59 * do the IO in reasonably large chunks. 60 * 61 * This is not so critical that we need to be enormously clever about 62 * the readahead size, though. 128K is a purely arbitrary, good-enough 63 * fixed value. 64 */ 65 66 #define MAXBUF 8 67 static int do_readahead(journal_t *journal, unsigned int start) 68 { 69 int err; 70 unsigned int max, nbufs, next; 71 unsigned long long blocknr; 72 struct buffer_head *bh; 73 74 struct buffer_head * bufs[MAXBUF]; 75 76 /* Do up to 128K of readahead */ 77 max = start + (128 * 1024 / journal->j_blocksize); 78 if (max > journal->j_total_len) 79 max = journal->j_total_len; 80 81 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 82 * a time to the block device IO layer. */ 83 84 nbufs = 0; 85 86 for (next = start; next < max; next++) { 87 err = jbd2_journal_bmap(journal, next, &blocknr); 88 89 if (err) { 90 printk(KERN_ERR "JBD2: bad block at offset %u\n", 91 next); 92 goto failed; 93 } 94 95 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 96 if (!bh) { 97 err = -ENOMEM; 98 goto failed; 99 } 100 101 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 102 bufs[nbufs++] = bh; 103 if (nbufs == MAXBUF) { 104 bh_readahead_batch(nbufs, bufs, 0); 105 journal_brelse_array(bufs, nbufs); 106 nbufs = 0; 107 } 108 } else 109 brelse(bh); 110 } 111 112 if (nbufs) 113 bh_readahead_batch(nbufs, bufs, 0); 114 err = 0; 115 116 failed: 117 if (nbufs) 118 journal_brelse_array(bufs, nbufs); 119 return err; 120 } 121 122 #endif /* __KERNEL__ */ 123 124 125 /* 126 * Read a block from the journal 127 */ 128 129 static int jread(struct buffer_head **bhp, journal_t *journal, 130 unsigned int offset) 131 { 132 int err; 133 unsigned long long blocknr; 134 struct buffer_head *bh; 135 136 *bhp = NULL; 137 138 if (offset >= journal->j_total_len) { 139 printk(KERN_ERR "JBD2: corrupted journal superblock\n"); 140 return -EFSCORRUPTED; 141 } 142 143 err = jbd2_journal_bmap(journal, offset, &blocknr); 144 145 if (err) { 146 printk(KERN_ERR "JBD2: bad block at offset %u\n", 147 offset); 148 return err; 149 } 150 151 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 152 if (!bh) 153 return -ENOMEM; 154 155 if (!buffer_uptodate(bh)) { 156 /* 157 * If this is a brand new buffer, start readahead. 158 * Otherwise, we assume we are already reading it. 159 */ 160 bool need_readahead = !buffer_req(bh); 161 162 bh_read_nowait(bh, 0); 163 if (need_readahead) 164 do_readahead(journal, offset); 165 wait_on_buffer(bh); 166 } 167 168 if (!buffer_uptodate(bh)) { 169 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", 170 offset); 171 brelse(bh); 172 return -EIO; 173 } 174 175 *bhp = bh; 176 return 0; 177 } 178 179 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) 180 { 181 struct jbd2_journal_block_tail *tail; 182 __be32 provided; 183 __u32 calculated; 184 185 if (!jbd2_journal_has_csum_v2or3(j)) 186 return 1; 187 188 tail = (struct jbd2_journal_block_tail *)((char *)buf + 189 j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); 190 provided = tail->t_checksum; 191 tail->t_checksum = 0; 192 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 193 tail->t_checksum = provided; 194 195 return provided == cpu_to_be32(calculated); 196 } 197 198 /* 199 * Count the number of in-use tags in a journal descriptor block. 200 */ 201 202 static int count_tags(journal_t *journal, struct buffer_head *bh) 203 { 204 char * tagp; 205 journal_block_tag_t tag; 206 int nr = 0, size = journal->j_blocksize; 207 int tag_bytes = journal_tag_bytes(journal); 208 209 if (jbd2_journal_has_csum_v2or3(journal)) 210 size -= sizeof(struct jbd2_journal_block_tail); 211 212 tagp = &bh->b_data[sizeof(journal_header_t)]; 213 214 while ((tagp - bh->b_data + tag_bytes) <= size) { 215 memcpy(&tag, tagp, sizeof(tag)); 216 217 nr++; 218 tagp += tag_bytes; 219 if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 220 tagp += 16; 221 222 if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 223 break; 224 } 225 226 return nr; 227 } 228 229 230 /* Make sure we wrap around the log correctly! */ 231 #define wrap(journal, var) \ 232 do { \ 233 if (var >= (journal)->j_last) \ 234 var -= ((journal)->j_last - (journal)->j_first); \ 235 } while (0) 236 237 static int fc_do_one_pass(journal_t *journal, 238 struct recovery_info *info, enum passtype pass) 239 { 240 unsigned int expected_commit_id = info->end_transaction; 241 unsigned long next_fc_block; 242 struct buffer_head *bh; 243 int err = 0; 244 245 next_fc_block = journal->j_fc_first; 246 if (!journal->j_fc_replay_callback) 247 return 0; 248 249 while (next_fc_block <= journal->j_fc_last) { 250 jbd2_debug(3, "Fast commit replay: next block %ld\n", 251 next_fc_block); 252 err = jread(&bh, journal, next_fc_block); 253 if (err) { 254 jbd2_debug(3, "Fast commit replay: read error\n"); 255 break; 256 } 257 258 err = journal->j_fc_replay_callback(journal, bh, pass, 259 next_fc_block - journal->j_fc_first, 260 expected_commit_id); 261 brelse(bh); 262 next_fc_block++; 263 if (err < 0 || err == JBD2_FC_REPLAY_STOP) 264 break; 265 err = 0; 266 } 267 268 if (err) 269 jbd2_debug(3, "Fast commit replay failed, err = %d\n", err); 270 271 return err; 272 } 273 274 /** 275 * jbd2_journal_recover - recovers a on-disk journal 276 * @journal: the journal to recover 277 * 278 * The primary function for recovering the log contents when mounting a 279 * journaled device. 280 * 281 * Recovery is done in three passes. In the first pass, we look for the 282 * end of the log. In the second, we assemble the list of revoke 283 * blocks. In the third and final pass, we replay any un-revoked blocks 284 * in the log. 285 */ 286 int jbd2_journal_recover(journal_t *journal) 287 { 288 int err, err2; 289 journal_superblock_t * sb; 290 291 struct recovery_info info; 292 293 memset(&info, 0, sizeof(info)); 294 sb = journal->j_superblock; 295 296 /* 297 * The journal superblock's s_start field (the current log head) 298 * is always zero if, and only if, the journal was cleanly 299 * unmounted. 300 */ 301 if (!sb->s_start) { 302 jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n", 303 be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head)); 304 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 305 journal->j_head = be32_to_cpu(sb->s_head); 306 return 0; 307 } 308 309 err = do_one_pass(journal, &info, PASS_SCAN); 310 if (!err) 311 err = do_one_pass(journal, &info, PASS_REVOKE); 312 if (!err) 313 err = do_one_pass(journal, &info, PASS_REPLAY); 314 315 jbd2_debug(1, "JBD2: recovery, exit status %d, " 316 "recovered transactions %u to %u\n", 317 err, info.start_transaction, info.end_transaction); 318 jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", 319 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 320 321 /* Restart the log at the next transaction ID, thus invalidating 322 * any existing commit records in the log. */ 323 journal->j_transaction_sequence = ++info.end_transaction; 324 journal->j_head = info.head_block; 325 jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n", 326 journal->j_transaction_sequence, journal->j_head); 327 328 jbd2_journal_clear_revoke(journal); 329 err2 = sync_blockdev(journal->j_fs_dev); 330 if (!err) 331 err = err2; 332 err2 = jbd2_check_fs_dev_write_error(journal); 333 if (!err) 334 err = err2; 335 /* Make sure all replayed data is on permanent storage */ 336 if (journal->j_flags & JBD2_BARRIER) { 337 err2 = blkdev_issue_flush(journal->j_fs_dev); 338 if (!err) 339 err = err2; 340 } 341 return err; 342 } 343 344 /** 345 * jbd2_journal_skip_recovery - Start journal and wipe exiting records 346 * @journal: journal to startup 347 * 348 * Locate any valid recovery information from the journal and set up the 349 * journal structures in memory to ignore it (presumably because the 350 * caller has evidence that it is out of date). 351 * This function doesn't appear to be exported.. 352 * 353 * We perform one pass over the journal to allow us to tell the user how 354 * much recovery information is being erased, and to let us initialise 355 * the journal transaction sequence numbers to the next unused ID. 356 */ 357 int jbd2_journal_skip_recovery(journal_t *journal) 358 { 359 int err; 360 361 struct recovery_info info; 362 363 memset (&info, 0, sizeof(info)); 364 365 err = do_one_pass(journal, &info, PASS_SCAN); 366 367 if (err) { 368 printk(KERN_ERR "JBD2: error %d scanning journal\n", err); 369 ++journal->j_transaction_sequence; 370 journal->j_head = journal->j_first; 371 } else { 372 #ifdef CONFIG_JBD2_DEBUG 373 int dropped = info.end_transaction - 374 be32_to_cpu(journal->j_superblock->s_sequence); 375 jbd2_debug(1, 376 "JBD2: ignoring %d transaction%s from the journal.\n", 377 dropped, (dropped == 1) ? "" : "s"); 378 #endif 379 journal->j_transaction_sequence = ++info.end_transaction; 380 journal->j_head = info.head_block; 381 } 382 383 journal->j_tail = 0; 384 return err; 385 } 386 387 static inline unsigned long long read_tag_block(journal_t *journal, 388 journal_block_tag_t *tag) 389 { 390 unsigned long long block = be32_to_cpu(tag->t_blocknr); 391 if (jbd2_has_feature_64bit(journal)) 392 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 393 return block; 394 } 395 396 /* 397 * calc_chksums calculates the checksums for the blocks described in the 398 * descriptor block. 399 */ 400 static int calc_chksums(journal_t *journal, struct buffer_head *bh, 401 unsigned long *next_log_block, __u32 *crc32_sum) 402 { 403 int i, num_blks, err; 404 unsigned long io_block; 405 struct buffer_head *obh; 406 407 num_blks = count_tags(journal, bh); 408 /* Calculate checksum of the descriptor block. */ 409 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 410 411 for (i = 0; i < num_blks; i++) { 412 io_block = (*next_log_block)++; 413 wrap(journal, *next_log_block); 414 err = jread(&obh, journal, io_block); 415 if (err) { 416 printk(KERN_ERR "JBD2: IO error %d recovering block " 417 "%lu in log\n", err, io_block); 418 return 1; 419 } else { 420 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 421 obh->b_size); 422 } 423 put_bh(obh); 424 } 425 return 0; 426 } 427 428 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 429 { 430 struct commit_header *h; 431 __be32 provided; 432 __u32 calculated; 433 434 if (!jbd2_journal_has_csum_v2or3(j)) 435 return 1; 436 437 h = buf; 438 provided = h->h_chksum[0]; 439 h->h_chksum[0] = 0; 440 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 441 h->h_chksum[0] = provided; 442 443 return provided == cpu_to_be32(calculated); 444 } 445 446 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 447 journal_block_tag3_t *tag3, 448 void *buf, __u32 sequence) 449 { 450 __u32 csum32; 451 __be32 seq; 452 453 if (!jbd2_journal_has_csum_v2or3(j)) 454 return 1; 455 456 seq = cpu_to_be32(sequence); 457 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 458 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 459 460 if (jbd2_has_feature_csum3(j)) 461 return tag3->t_checksum == cpu_to_be32(csum32); 462 else 463 return tag->t_checksum == cpu_to_be16(csum32); 464 } 465 466 static int do_one_pass(journal_t *journal, 467 struct recovery_info *info, enum passtype pass) 468 { 469 unsigned int first_commit_ID, next_commit_ID; 470 unsigned long next_log_block, head_block; 471 int err, success = 0; 472 journal_superblock_t * sb; 473 journal_header_t * tmp; 474 struct buffer_head * bh; 475 unsigned int sequence; 476 int blocktype; 477 int tag_bytes = journal_tag_bytes(journal); 478 __u32 crc32_sum = ~0; /* Transactional Checksums */ 479 int descr_csum_size = 0; 480 int block_error = 0; 481 bool need_check_commit_time = false; 482 __u64 last_trans_commit_time = 0, commit_time; 483 484 /* 485 * First thing is to establish what we expect to find in the log 486 * (in terms of transaction IDs), and where (in terms of log 487 * block offsets): query the superblock. 488 */ 489 490 sb = journal->j_superblock; 491 next_commit_ID = be32_to_cpu(sb->s_sequence); 492 next_log_block = be32_to_cpu(sb->s_start); 493 head_block = next_log_block; 494 495 first_commit_ID = next_commit_ID; 496 if (pass == PASS_SCAN) 497 info->start_transaction = first_commit_ID; 498 499 jbd2_debug(1, "Starting recovery pass %d\n", pass); 500 501 /* 502 * Now we walk through the log, transaction by transaction, 503 * making sure that each transaction has a commit block in the 504 * expected place. Each complete transaction gets replayed back 505 * into the main filesystem. 506 */ 507 508 while (1) { 509 int flags; 510 char * tagp; 511 journal_block_tag_t tag; 512 struct buffer_head * obh; 513 struct buffer_head * nbh; 514 515 cond_resched(); 516 517 /* If we already know where to stop the log traversal, 518 * check right now that we haven't gone past the end of 519 * the log. */ 520 521 if (pass != PASS_SCAN) 522 if (tid_geq(next_commit_ID, info->end_transaction)) 523 break; 524 525 jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 526 next_commit_ID, next_log_block, journal->j_last); 527 528 /* Skip over each chunk of the transaction looking 529 * either the next descriptor block or the final commit 530 * record. */ 531 532 jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block); 533 err = jread(&bh, journal, next_log_block); 534 if (err) 535 goto failed; 536 537 next_log_block++; 538 wrap(journal, next_log_block); 539 540 /* What kind of buffer is it? 541 * 542 * If it is a descriptor block, check that it has the 543 * expected sequence number. Otherwise, we're all done 544 * here. */ 545 546 tmp = (journal_header_t *)bh->b_data; 547 548 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { 549 brelse(bh); 550 break; 551 } 552 553 blocktype = be32_to_cpu(tmp->h_blocktype); 554 sequence = be32_to_cpu(tmp->h_sequence); 555 jbd2_debug(3, "Found magic %d, sequence %d\n", 556 blocktype, sequence); 557 558 if (sequence != next_commit_ID) { 559 brelse(bh); 560 break; 561 } 562 563 /* OK, we have a valid descriptor block which matches 564 * all of the sequence number checks. What are we going 565 * to do with it? That depends on the pass... */ 566 567 switch(blocktype) { 568 case JBD2_DESCRIPTOR_BLOCK: 569 /* Verify checksum first */ 570 if (jbd2_journal_has_csum_v2or3(journal)) 571 descr_csum_size = 572 sizeof(struct jbd2_journal_block_tail); 573 if (descr_csum_size > 0 && 574 !jbd2_descriptor_block_csum_verify(journal, 575 bh->b_data)) { 576 /* 577 * PASS_SCAN can see stale blocks due to lazy 578 * journal init. Don't error out on those yet. 579 */ 580 if (pass != PASS_SCAN) { 581 pr_err("JBD2: Invalid checksum recovering block %lu in log\n", 582 next_log_block); 583 err = -EFSBADCRC; 584 brelse(bh); 585 goto failed; 586 } 587 need_check_commit_time = true; 588 jbd2_debug(1, 589 "invalid descriptor block found in %lu\n", 590 next_log_block); 591 } 592 593 /* If it is a valid descriptor block, replay it 594 * in pass REPLAY; if journal_checksums enabled, then 595 * calculate checksums in PASS_SCAN, otherwise, 596 * just skip over the blocks it describes. */ 597 if (pass != PASS_REPLAY) { 598 if (pass == PASS_SCAN && 599 jbd2_has_feature_checksum(journal) && 600 !need_check_commit_time && 601 !info->end_transaction) { 602 if (calc_chksums(journal, bh, 603 &next_log_block, 604 &crc32_sum)) { 605 put_bh(bh); 606 break; 607 } 608 put_bh(bh); 609 continue; 610 } 611 next_log_block += count_tags(journal, bh); 612 wrap(journal, next_log_block); 613 put_bh(bh); 614 continue; 615 } 616 617 /* A descriptor block: we can now write all of 618 * the data blocks. Yay, useful work is finally 619 * getting done here! */ 620 621 tagp = &bh->b_data[sizeof(journal_header_t)]; 622 while ((tagp - bh->b_data + tag_bytes) 623 <= journal->j_blocksize - descr_csum_size) { 624 unsigned long io_block; 625 626 memcpy(&tag, tagp, sizeof(tag)); 627 flags = be16_to_cpu(tag.t_flags); 628 629 io_block = next_log_block++; 630 wrap(journal, next_log_block); 631 err = jread(&obh, journal, io_block); 632 if (err) { 633 /* Recover what we can, but 634 * report failure at the end. */ 635 success = err; 636 printk(KERN_ERR 637 "JBD2: IO error %d recovering " 638 "block %lu in log\n", 639 err, io_block); 640 } else { 641 unsigned long long blocknr; 642 643 J_ASSERT(obh != NULL); 644 blocknr = read_tag_block(journal, 645 &tag); 646 647 /* If the block has been 648 * revoked, then we're all done 649 * here. */ 650 if (jbd2_journal_test_revoke 651 (journal, blocknr, 652 next_commit_ID)) { 653 brelse(obh); 654 ++info->nr_revoke_hits; 655 goto skip_write; 656 } 657 658 /* Look for block corruption */ 659 if (!jbd2_block_tag_csum_verify( 660 journal, &tag, (journal_block_tag3_t *)tagp, 661 obh->b_data, be32_to_cpu(tmp->h_sequence))) { 662 brelse(obh); 663 success = -EFSBADCRC; 664 printk(KERN_ERR "JBD2: Invalid " 665 "checksum recovering " 666 "data block %llu in " 667 "journal block %lu\n", 668 blocknr, io_block); 669 block_error = 1; 670 goto skip_write; 671 } 672 673 /* Find a buffer for the new 674 * data being restored */ 675 nbh = __getblk(journal->j_fs_dev, 676 blocknr, 677 journal->j_blocksize); 678 if (nbh == NULL) { 679 printk(KERN_ERR 680 "JBD2: Out of memory " 681 "during recovery.\n"); 682 err = -ENOMEM; 683 brelse(bh); 684 brelse(obh); 685 goto failed; 686 } 687 688 lock_buffer(nbh); 689 memcpy(nbh->b_data, obh->b_data, 690 journal->j_blocksize); 691 if (flags & JBD2_FLAG_ESCAPE) { 692 *((__be32 *)nbh->b_data) = 693 cpu_to_be32(JBD2_MAGIC_NUMBER); 694 } 695 696 BUFFER_TRACE(nbh, "marking dirty"); 697 set_buffer_uptodate(nbh); 698 mark_buffer_dirty(nbh); 699 BUFFER_TRACE(nbh, "marking uptodate"); 700 ++info->nr_replays; 701 unlock_buffer(nbh); 702 brelse(obh); 703 brelse(nbh); 704 } 705 706 skip_write: 707 tagp += tag_bytes; 708 if (!(flags & JBD2_FLAG_SAME_UUID)) 709 tagp += 16; 710 711 if (flags & JBD2_FLAG_LAST_TAG) 712 break; 713 } 714 715 brelse(bh); 716 continue; 717 718 case JBD2_COMMIT_BLOCK: 719 /* How to differentiate between interrupted commit 720 * and journal corruption ? 721 * 722 * {nth transaction} 723 * Checksum Verification Failed 724 * | 725 * ____________________ 726 * | | 727 * async_commit sync_commit 728 * | | 729 * | GO TO NEXT "Journal Corruption" 730 * | TRANSACTION 731 * | 732 * {(n+1)th transanction} 733 * | 734 * _______|______________ 735 * | | 736 * Commit block found Commit block not found 737 * | | 738 * "Journal Corruption" | 739 * _____________|_________ 740 * | | 741 * nth trans corrupt OR nth trans 742 * and (n+1)th interrupted interrupted 743 * before commit block 744 * could reach the disk. 745 * (Cannot find the difference in above 746 * mentioned conditions. Hence assume 747 * "Interrupted Commit".) 748 */ 749 commit_time = be64_to_cpu( 750 ((struct commit_header *)bh->b_data)->h_commit_sec); 751 /* 752 * If need_check_commit_time is set, it means we are in 753 * PASS_SCAN and csum verify failed before. If 754 * commit_time is increasing, it's the same journal, 755 * otherwise it is stale journal block, just end this 756 * recovery. 757 */ 758 if (need_check_commit_time) { 759 if (commit_time >= last_trans_commit_time) { 760 pr_err("JBD2: Invalid checksum found in transaction %u\n", 761 next_commit_ID); 762 err = -EFSBADCRC; 763 brelse(bh); 764 goto failed; 765 } 766 ignore_crc_mismatch: 767 /* 768 * It likely does not belong to same journal, 769 * just end this recovery with success. 770 */ 771 jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n", 772 next_commit_ID); 773 brelse(bh); 774 goto done; 775 } 776 777 /* 778 * Found an expected commit block: if checksums 779 * are present, verify them in PASS_SCAN; else not 780 * much to do other than move on to the next sequence 781 * number. 782 */ 783 if (pass == PASS_SCAN && 784 jbd2_has_feature_checksum(journal)) { 785 struct commit_header *cbh = 786 (struct commit_header *)bh->b_data; 787 unsigned found_chksum = 788 be32_to_cpu(cbh->h_chksum[0]); 789 790 if (info->end_transaction) { 791 journal->j_failed_commit = 792 info->end_transaction; 793 brelse(bh); 794 break; 795 } 796 797 /* Neither checksum match nor unused? */ 798 if (!((crc32_sum == found_chksum && 799 cbh->h_chksum_type == 800 JBD2_CRC32_CHKSUM && 801 cbh->h_chksum_size == 802 JBD2_CRC32_CHKSUM_SIZE) || 803 (cbh->h_chksum_type == 0 && 804 cbh->h_chksum_size == 0 && 805 found_chksum == 0))) 806 goto chksum_error; 807 808 crc32_sum = ~0; 809 } 810 if (pass == PASS_SCAN && 811 !jbd2_commit_block_csum_verify(journal, 812 bh->b_data)) { 813 chksum_error: 814 if (commit_time < last_trans_commit_time) 815 goto ignore_crc_mismatch; 816 info->end_transaction = next_commit_ID; 817 info->head_block = head_block; 818 819 if (!jbd2_has_feature_async_commit(journal)) { 820 journal->j_failed_commit = 821 next_commit_ID; 822 brelse(bh); 823 break; 824 } 825 } 826 if (pass == PASS_SCAN) { 827 last_trans_commit_time = commit_time; 828 head_block = next_log_block; 829 } 830 brelse(bh); 831 next_commit_ID++; 832 continue; 833 834 case JBD2_REVOKE_BLOCK: 835 /* 836 * Check revoke block crc in pass_scan, if csum verify 837 * failed, check commit block time later. 838 */ 839 if (pass == PASS_SCAN && 840 !jbd2_descriptor_block_csum_verify(journal, 841 bh->b_data)) { 842 jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n", 843 next_log_block); 844 need_check_commit_time = true; 845 } 846 /* If we aren't in the REVOKE pass, then we can 847 * just skip over this block. */ 848 if (pass != PASS_REVOKE) { 849 brelse(bh); 850 continue; 851 } 852 853 err = scan_revoke_records(journal, bh, 854 next_commit_ID, info); 855 brelse(bh); 856 if (err) 857 goto failed; 858 continue; 859 860 default: 861 jbd2_debug(3, "Unrecognised magic %d, end of scan.\n", 862 blocktype); 863 brelse(bh); 864 goto done; 865 } 866 } 867 868 done: 869 /* 870 * We broke out of the log scan loop: either we came to the 871 * known end of the log or we found an unexpected block in the 872 * log. If the latter happened, then we know that the "current" 873 * transaction marks the end of the valid log. 874 */ 875 876 if (pass == PASS_SCAN) { 877 if (!info->end_transaction) 878 info->end_transaction = next_commit_ID; 879 if (!info->head_block) 880 info->head_block = head_block; 881 } else { 882 /* It's really bad news if different passes end up at 883 * different places (but possible due to IO errors). */ 884 if (info->end_transaction != next_commit_ID) { 885 printk(KERN_ERR "JBD2: recovery pass %d ended at " 886 "transaction %u, expected %u\n", 887 pass, next_commit_ID, info->end_transaction); 888 if (!success) 889 success = -EIO; 890 } 891 } 892 893 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { 894 err = fc_do_one_pass(journal, info, pass); 895 if (err) 896 success = err; 897 } 898 899 if (block_error && success == 0) 900 success = -EIO; 901 return success; 902 903 failed: 904 return err; 905 } 906 907 /* Scan a revoke record, marking all blocks mentioned as revoked. */ 908 909 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 910 tid_t sequence, struct recovery_info *info) 911 { 912 jbd2_journal_revoke_header_t *header; 913 int offset, max; 914 unsigned csum_size = 0; 915 __u32 rcount; 916 int record_len = 4; 917 918 header = (jbd2_journal_revoke_header_t *) bh->b_data; 919 offset = sizeof(jbd2_journal_revoke_header_t); 920 rcount = be32_to_cpu(header->r_count); 921 922 if (jbd2_journal_has_csum_v2or3(journal)) 923 csum_size = sizeof(struct jbd2_journal_block_tail); 924 if (rcount > journal->j_blocksize - csum_size) 925 return -EINVAL; 926 max = rcount; 927 928 if (jbd2_has_feature_64bit(journal)) 929 record_len = 8; 930 931 while (offset + record_len <= max) { 932 unsigned long long blocknr; 933 int err; 934 935 if (record_len == 4) 936 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 937 else 938 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 939 offset += record_len; 940 err = jbd2_journal_set_revoke(journal, blocknr, sequence); 941 if (err) 942 return err; 943 ++info->nr_revokes; 944 } 945 return 0; 946 } 947