1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * linux/fs/jbd2/recovery.c 4 * 5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999 6 * 7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved 8 * 9 * Journal recovery routines for the generic filesystem journaling code; 10 * part of the ext2fs journaling system. 11 */ 12 13 #ifndef __KERNEL__ 14 #include "jfs_user.h" 15 #else 16 #include <linux/time.h> 17 #include <linux/fs.h> 18 #include <linux/jbd2.h> 19 #include <linux/errno.h> 20 #include <linux/crc32.h> 21 #include <linux/blkdev.h> 22 #endif 23 24 /* 25 * Maintain information about the progress of the recovery job, so that 26 * the different passes can carry information between them. 27 */ 28 struct recovery_info 29 { 30 tid_t start_transaction; 31 tid_t end_transaction; 32 33 int nr_replays; 34 int nr_revokes; 35 int nr_revoke_hits; 36 }; 37 38 static int do_one_pass(journal_t *journal, 39 struct recovery_info *info, enum passtype pass); 40 static int scan_revoke_records(journal_t *, struct buffer_head *, 41 tid_t, struct recovery_info *); 42 43 #ifdef __KERNEL__ 44 45 /* Release readahead buffers after use */ 46 static void journal_brelse_array(struct buffer_head *b[], int n) 47 { 48 while (--n >= 0) 49 brelse (b[n]); 50 } 51 52 53 /* 54 * When reading from the journal, we are going through the block device 55 * layer directly and so there is no readahead being done for us. We 56 * need to implement any readahead ourselves if we want it to happen at 57 * all. Recovery is basically one long sequential read, so make sure we 58 * do the IO in reasonably large chunks. 59 * 60 * This is not so critical that we need to be enormously clever about 61 * the readahead size, though. 128K is a purely arbitrary, good-enough 62 * fixed value. 63 */ 64 65 #define MAXBUF 8 66 static int do_readahead(journal_t *journal, unsigned int start) 67 { 68 int err; 69 unsigned int max, nbufs, next; 70 unsigned long long blocknr; 71 struct buffer_head *bh; 72 73 struct buffer_head * bufs[MAXBUF]; 74 75 /* Do up to 128K of readahead */ 76 max = start + (128 * 1024 / journal->j_blocksize); 77 if (max > journal->j_total_len) 78 max = journal->j_total_len; 79 80 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at 81 * a time to the block device IO layer. */ 82 83 nbufs = 0; 84 85 for (next = start; next < max; next++) { 86 err = jbd2_journal_bmap(journal, next, &blocknr); 87 88 if (err) { 89 printk(KERN_ERR "JBD2: bad block at offset %u\n", 90 next); 91 goto failed; 92 } 93 94 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 95 if (!bh) { 96 err = -ENOMEM; 97 goto failed; 98 } 99 100 if (!buffer_uptodate(bh) && !buffer_locked(bh)) { 101 bufs[nbufs++] = bh; 102 if (nbufs == MAXBUF) { 103 bh_readahead_batch(nbufs, bufs, 0); 104 journal_brelse_array(bufs, nbufs); 105 nbufs = 0; 106 } 107 } else 108 brelse(bh); 109 } 110 111 if (nbufs) 112 bh_readahead_batch(nbufs, bufs, 0); 113 err = 0; 114 115 failed: 116 if (nbufs) 117 journal_brelse_array(bufs, nbufs); 118 return err; 119 } 120 121 #endif /* __KERNEL__ */ 122 123 124 /* 125 * Read a block from the journal 126 */ 127 128 static int jread(struct buffer_head **bhp, journal_t *journal, 129 unsigned int offset) 130 { 131 int err; 132 unsigned long long blocknr; 133 struct buffer_head *bh; 134 135 *bhp = NULL; 136 137 if (offset >= journal->j_total_len) { 138 printk(KERN_ERR "JBD2: corrupted journal superblock\n"); 139 return -EFSCORRUPTED; 140 } 141 142 err = jbd2_journal_bmap(journal, offset, &blocknr); 143 144 if (err) { 145 printk(KERN_ERR "JBD2: bad block at offset %u\n", 146 offset); 147 return err; 148 } 149 150 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize); 151 if (!bh) 152 return -ENOMEM; 153 154 if (!buffer_uptodate(bh)) { 155 /* 156 * If this is a brand new buffer, start readahead. 157 * Otherwise, we assume we are already reading it. 158 */ 159 bool need_readahead = !buffer_req(bh); 160 161 bh_read_nowait(bh, 0); 162 if (need_readahead) 163 do_readahead(journal, offset); 164 wait_on_buffer(bh); 165 } 166 167 if (!buffer_uptodate(bh)) { 168 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n", 169 offset); 170 brelse(bh); 171 return -EIO; 172 } 173 174 *bhp = bh; 175 return 0; 176 } 177 178 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf) 179 { 180 struct jbd2_journal_block_tail *tail; 181 __be32 provided; 182 __u32 calculated; 183 184 if (!jbd2_journal_has_csum_v2or3(j)) 185 return 1; 186 187 tail = (struct jbd2_journal_block_tail *)((char *)buf + 188 j->j_blocksize - sizeof(struct jbd2_journal_block_tail)); 189 provided = tail->t_checksum; 190 tail->t_checksum = 0; 191 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 192 tail->t_checksum = provided; 193 194 return provided == cpu_to_be32(calculated); 195 } 196 197 /* 198 * Count the number of in-use tags in a journal descriptor block. 199 */ 200 201 static int count_tags(journal_t *journal, struct buffer_head *bh) 202 { 203 char * tagp; 204 journal_block_tag_t tag; 205 int nr = 0, size = journal->j_blocksize; 206 int tag_bytes = journal_tag_bytes(journal); 207 208 if (jbd2_journal_has_csum_v2or3(journal)) 209 size -= sizeof(struct jbd2_journal_block_tail); 210 211 tagp = &bh->b_data[sizeof(journal_header_t)]; 212 213 while ((tagp - bh->b_data + tag_bytes) <= size) { 214 memcpy(&tag, tagp, sizeof(tag)); 215 216 nr++; 217 tagp += tag_bytes; 218 if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID))) 219 tagp += 16; 220 221 if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG)) 222 break; 223 } 224 225 return nr; 226 } 227 228 229 /* Make sure we wrap around the log correctly! */ 230 #define wrap(journal, var) \ 231 do { \ 232 unsigned long _wrap_last = \ 233 jbd2_has_feature_fast_commit(journal) ? \ 234 (journal)->j_fc_last : (journal)->j_last; \ 235 \ 236 if (var >= _wrap_last) \ 237 var -= (_wrap_last - (journal)->j_first); \ 238 } while (0) 239 240 static int fc_do_one_pass(journal_t *journal, 241 struct recovery_info *info, enum passtype pass) 242 { 243 unsigned int expected_commit_id = info->end_transaction; 244 unsigned long next_fc_block; 245 struct buffer_head *bh; 246 int err = 0; 247 248 next_fc_block = journal->j_fc_first; 249 if (!journal->j_fc_replay_callback) 250 return 0; 251 252 while (next_fc_block <= journal->j_fc_last) { 253 jbd2_debug(3, "Fast commit replay: next block %ld\n", 254 next_fc_block); 255 err = jread(&bh, journal, next_fc_block); 256 if (err) { 257 jbd2_debug(3, "Fast commit replay: read error\n"); 258 break; 259 } 260 261 err = journal->j_fc_replay_callback(journal, bh, pass, 262 next_fc_block - journal->j_fc_first, 263 expected_commit_id); 264 brelse(bh); 265 next_fc_block++; 266 if (err < 0 || err == JBD2_FC_REPLAY_STOP) 267 break; 268 err = 0; 269 } 270 271 if (err) 272 jbd2_debug(3, "Fast commit replay failed, err = %d\n", err); 273 274 return err; 275 } 276 277 /** 278 * jbd2_journal_recover - recovers a on-disk journal 279 * @journal: the journal to recover 280 * 281 * The primary function for recovering the log contents when mounting a 282 * journaled device. 283 * 284 * Recovery is done in three passes. In the first pass, we look for the 285 * end of the log. In the second, we assemble the list of revoke 286 * blocks. In the third and final pass, we replay any un-revoked blocks 287 * in the log. 288 */ 289 int jbd2_journal_recover(journal_t *journal) 290 { 291 int err, err2; 292 journal_superblock_t * sb; 293 294 struct recovery_info info; 295 296 memset(&info, 0, sizeof(info)); 297 sb = journal->j_superblock; 298 299 /* 300 * The journal superblock's s_start field (the current log head) 301 * is always zero if, and only if, the journal was cleanly 302 * unmounted. 303 */ 304 305 if (!sb->s_start) { 306 jbd2_debug(1, "No recovery required, last transaction %d\n", 307 be32_to_cpu(sb->s_sequence)); 308 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1; 309 return 0; 310 } 311 312 err = do_one_pass(journal, &info, PASS_SCAN); 313 if (!err) 314 err = do_one_pass(journal, &info, PASS_REVOKE); 315 if (!err) 316 err = do_one_pass(journal, &info, PASS_REPLAY); 317 318 jbd2_debug(1, "JBD2: recovery, exit status %d, " 319 "recovered transactions %u to %u\n", 320 err, info.start_transaction, info.end_transaction); 321 jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n", 322 info.nr_replays, info.nr_revoke_hits, info.nr_revokes); 323 324 /* Restart the log at the next transaction ID, thus invalidating 325 * any existing commit records in the log. */ 326 journal->j_transaction_sequence = ++info.end_transaction; 327 328 jbd2_journal_clear_revoke(journal); 329 err2 = sync_blockdev(journal->j_fs_dev); 330 if (!err) 331 err = err2; 332 /* Make sure all replayed data is on permanent storage */ 333 if (journal->j_flags & JBD2_BARRIER) { 334 err2 = blkdev_issue_flush(journal->j_fs_dev); 335 if (!err) 336 err = err2; 337 } 338 return err; 339 } 340 341 /** 342 * jbd2_journal_skip_recovery - Start journal and wipe exiting records 343 * @journal: journal to startup 344 * 345 * Locate any valid recovery information from the journal and set up the 346 * journal structures in memory to ignore it (presumably because the 347 * caller has evidence that it is out of date). 348 * This function doesn't appear to be exported.. 349 * 350 * We perform one pass over the journal to allow us to tell the user how 351 * much recovery information is being erased, and to let us initialise 352 * the journal transaction sequence numbers to the next unused ID. 353 */ 354 int jbd2_journal_skip_recovery(journal_t *journal) 355 { 356 int err; 357 358 struct recovery_info info; 359 360 memset (&info, 0, sizeof(info)); 361 362 err = do_one_pass(journal, &info, PASS_SCAN); 363 364 if (err) { 365 printk(KERN_ERR "JBD2: error %d scanning journal\n", err); 366 ++journal->j_transaction_sequence; 367 } else { 368 #ifdef CONFIG_JBD2_DEBUG 369 int dropped = info.end_transaction - 370 be32_to_cpu(journal->j_superblock->s_sequence); 371 jbd2_debug(1, 372 "JBD2: ignoring %d transaction%s from the journal.\n", 373 dropped, (dropped == 1) ? "" : "s"); 374 #endif 375 journal->j_transaction_sequence = ++info.end_transaction; 376 } 377 378 journal->j_tail = 0; 379 return err; 380 } 381 382 static inline unsigned long long read_tag_block(journal_t *journal, 383 journal_block_tag_t *tag) 384 { 385 unsigned long long block = be32_to_cpu(tag->t_blocknr); 386 if (jbd2_has_feature_64bit(journal)) 387 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32; 388 return block; 389 } 390 391 /* 392 * calc_chksums calculates the checksums for the blocks described in the 393 * descriptor block. 394 */ 395 static int calc_chksums(journal_t *journal, struct buffer_head *bh, 396 unsigned long *next_log_block, __u32 *crc32_sum) 397 { 398 int i, num_blks, err; 399 unsigned long io_block; 400 struct buffer_head *obh; 401 402 num_blks = count_tags(journal, bh); 403 /* Calculate checksum of the descriptor block. */ 404 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size); 405 406 for (i = 0; i < num_blks; i++) { 407 io_block = (*next_log_block)++; 408 wrap(journal, *next_log_block); 409 err = jread(&obh, journal, io_block); 410 if (err) { 411 printk(KERN_ERR "JBD2: IO error %d recovering block " 412 "%lu in log\n", err, io_block); 413 return 1; 414 } else { 415 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data, 416 obh->b_size); 417 } 418 put_bh(obh); 419 } 420 return 0; 421 } 422 423 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf) 424 { 425 struct commit_header *h; 426 __be32 provided; 427 __u32 calculated; 428 429 if (!jbd2_journal_has_csum_v2or3(j)) 430 return 1; 431 432 h = buf; 433 provided = h->h_chksum[0]; 434 h->h_chksum[0] = 0; 435 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize); 436 h->h_chksum[0] = provided; 437 438 return provided == cpu_to_be32(calculated); 439 } 440 441 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag, 442 journal_block_tag3_t *tag3, 443 void *buf, __u32 sequence) 444 { 445 __u32 csum32; 446 __be32 seq; 447 448 if (!jbd2_journal_has_csum_v2or3(j)) 449 return 1; 450 451 seq = cpu_to_be32(sequence); 452 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq)); 453 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize); 454 455 if (jbd2_has_feature_csum3(j)) 456 return tag3->t_checksum == cpu_to_be32(csum32); 457 else 458 return tag->t_checksum == cpu_to_be16(csum32); 459 } 460 461 static int do_one_pass(journal_t *journal, 462 struct recovery_info *info, enum passtype pass) 463 { 464 unsigned int first_commit_ID, next_commit_ID; 465 unsigned long next_log_block; 466 int err, success = 0; 467 journal_superblock_t * sb; 468 journal_header_t * tmp; 469 struct buffer_head * bh; 470 unsigned int sequence; 471 int blocktype; 472 int tag_bytes = journal_tag_bytes(journal); 473 __u32 crc32_sum = ~0; /* Transactional Checksums */ 474 int descr_csum_size = 0; 475 int block_error = 0; 476 bool need_check_commit_time = false; 477 __u64 last_trans_commit_time = 0, commit_time; 478 479 /* 480 * First thing is to establish what we expect to find in the log 481 * (in terms of transaction IDs), and where (in terms of log 482 * block offsets): query the superblock. 483 */ 484 485 sb = journal->j_superblock; 486 next_commit_ID = be32_to_cpu(sb->s_sequence); 487 next_log_block = be32_to_cpu(sb->s_start); 488 489 first_commit_ID = next_commit_ID; 490 if (pass == PASS_SCAN) 491 info->start_transaction = first_commit_ID; 492 493 jbd2_debug(1, "Starting recovery pass %d\n", pass); 494 495 /* 496 * Now we walk through the log, transaction by transaction, 497 * making sure that each transaction has a commit block in the 498 * expected place. Each complete transaction gets replayed back 499 * into the main filesystem. 500 */ 501 502 while (1) { 503 int flags; 504 char * tagp; 505 journal_block_tag_t tag; 506 struct buffer_head * obh; 507 struct buffer_head * nbh; 508 509 cond_resched(); 510 511 /* If we already know where to stop the log traversal, 512 * check right now that we haven't gone past the end of 513 * the log. */ 514 515 if (pass != PASS_SCAN) 516 if (tid_geq(next_commit_ID, info->end_transaction)) 517 break; 518 519 jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n", 520 next_commit_ID, next_log_block, 521 jbd2_has_feature_fast_commit(journal) ? 522 journal->j_fc_last : journal->j_last); 523 524 /* Skip over each chunk of the transaction looking 525 * either the next descriptor block or the final commit 526 * record. */ 527 528 jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block); 529 err = jread(&bh, journal, next_log_block); 530 if (err) 531 goto failed; 532 533 next_log_block++; 534 wrap(journal, next_log_block); 535 536 /* What kind of buffer is it? 537 * 538 * If it is a descriptor block, check that it has the 539 * expected sequence number. Otherwise, we're all done 540 * here. */ 541 542 tmp = (journal_header_t *)bh->b_data; 543 544 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) { 545 brelse(bh); 546 break; 547 } 548 549 blocktype = be32_to_cpu(tmp->h_blocktype); 550 sequence = be32_to_cpu(tmp->h_sequence); 551 jbd2_debug(3, "Found magic %d, sequence %d\n", 552 blocktype, sequence); 553 554 if (sequence != next_commit_ID) { 555 brelse(bh); 556 break; 557 } 558 559 /* OK, we have a valid descriptor block which matches 560 * all of the sequence number checks. What are we going 561 * to do with it? That depends on the pass... */ 562 563 switch(blocktype) { 564 case JBD2_DESCRIPTOR_BLOCK: 565 /* Verify checksum first */ 566 if (jbd2_journal_has_csum_v2or3(journal)) 567 descr_csum_size = 568 sizeof(struct jbd2_journal_block_tail); 569 if (descr_csum_size > 0 && 570 !jbd2_descriptor_block_csum_verify(journal, 571 bh->b_data)) { 572 /* 573 * PASS_SCAN can see stale blocks due to lazy 574 * journal init. Don't error out on those yet. 575 */ 576 if (pass != PASS_SCAN) { 577 pr_err("JBD2: Invalid checksum recovering block %lu in log\n", 578 next_log_block); 579 err = -EFSBADCRC; 580 brelse(bh); 581 goto failed; 582 } 583 need_check_commit_time = true; 584 jbd2_debug(1, 585 "invalid descriptor block found in %lu\n", 586 next_log_block); 587 } 588 589 /* If it is a valid descriptor block, replay it 590 * in pass REPLAY; if journal_checksums enabled, then 591 * calculate checksums in PASS_SCAN, otherwise, 592 * just skip over the blocks it describes. */ 593 if (pass != PASS_REPLAY) { 594 if (pass == PASS_SCAN && 595 jbd2_has_feature_checksum(journal) && 596 !need_check_commit_time && 597 !info->end_transaction) { 598 if (calc_chksums(journal, bh, 599 &next_log_block, 600 &crc32_sum)) { 601 put_bh(bh); 602 break; 603 } 604 put_bh(bh); 605 continue; 606 } 607 next_log_block += count_tags(journal, bh); 608 wrap(journal, next_log_block); 609 put_bh(bh); 610 continue; 611 } 612 613 /* A descriptor block: we can now write all of 614 * the data blocks. Yay, useful work is finally 615 * getting done here! */ 616 617 tagp = &bh->b_data[sizeof(journal_header_t)]; 618 while ((tagp - bh->b_data + tag_bytes) 619 <= journal->j_blocksize - descr_csum_size) { 620 unsigned long io_block; 621 622 memcpy(&tag, tagp, sizeof(tag)); 623 flags = be16_to_cpu(tag.t_flags); 624 625 io_block = next_log_block++; 626 wrap(journal, next_log_block); 627 err = jread(&obh, journal, io_block); 628 if (err) { 629 /* Recover what we can, but 630 * report failure at the end. */ 631 success = err; 632 printk(KERN_ERR 633 "JBD2: IO error %d recovering " 634 "block %ld in log\n", 635 err, io_block); 636 } else { 637 unsigned long long blocknr; 638 639 J_ASSERT(obh != NULL); 640 blocknr = read_tag_block(journal, 641 &tag); 642 643 /* If the block has been 644 * revoked, then we're all done 645 * here. */ 646 if (jbd2_journal_test_revoke 647 (journal, blocknr, 648 next_commit_ID)) { 649 brelse(obh); 650 ++info->nr_revoke_hits; 651 goto skip_write; 652 } 653 654 /* Look for block corruption */ 655 if (!jbd2_block_tag_csum_verify( 656 journal, &tag, (journal_block_tag3_t *)tagp, 657 obh->b_data, be32_to_cpu(tmp->h_sequence))) { 658 brelse(obh); 659 success = -EFSBADCRC; 660 printk(KERN_ERR "JBD2: Invalid " 661 "checksum recovering " 662 "data block %llu in " 663 "log\n", blocknr); 664 block_error = 1; 665 goto skip_write; 666 } 667 668 /* Find a buffer for the new 669 * data being restored */ 670 nbh = __getblk(journal->j_fs_dev, 671 blocknr, 672 journal->j_blocksize); 673 if (nbh == NULL) { 674 printk(KERN_ERR 675 "JBD2: Out of memory " 676 "during recovery.\n"); 677 err = -ENOMEM; 678 brelse(bh); 679 brelse(obh); 680 goto failed; 681 } 682 683 lock_buffer(nbh); 684 memcpy(nbh->b_data, obh->b_data, 685 journal->j_blocksize); 686 if (flags & JBD2_FLAG_ESCAPE) { 687 *((__be32 *)nbh->b_data) = 688 cpu_to_be32(JBD2_MAGIC_NUMBER); 689 } 690 691 BUFFER_TRACE(nbh, "marking dirty"); 692 set_buffer_uptodate(nbh); 693 mark_buffer_dirty(nbh); 694 BUFFER_TRACE(nbh, "marking uptodate"); 695 ++info->nr_replays; 696 unlock_buffer(nbh); 697 brelse(obh); 698 brelse(nbh); 699 } 700 701 skip_write: 702 tagp += tag_bytes; 703 if (!(flags & JBD2_FLAG_SAME_UUID)) 704 tagp += 16; 705 706 if (flags & JBD2_FLAG_LAST_TAG) 707 break; 708 } 709 710 brelse(bh); 711 continue; 712 713 case JBD2_COMMIT_BLOCK: 714 /* How to differentiate between interrupted commit 715 * and journal corruption ? 716 * 717 * {nth transaction} 718 * Checksum Verification Failed 719 * | 720 * ____________________ 721 * | | 722 * async_commit sync_commit 723 * | | 724 * | GO TO NEXT "Journal Corruption" 725 * | TRANSACTION 726 * | 727 * {(n+1)th transanction} 728 * | 729 * _______|______________ 730 * | | 731 * Commit block found Commit block not found 732 * | | 733 * "Journal Corruption" | 734 * _____________|_________ 735 * | | 736 * nth trans corrupt OR nth trans 737 * and (n+1)th interrupted interrupted 738 * before commit block 739 * could reach the disk. 740 * (Cannot find the difference in above 741 * mentioned conditions. Hence assume 742 * "Interrupted Commit".) 743 */ 744 commit_time = be64_to_cpu( 745 ((struct commit_header *)bh->b_data)->h_commit_sec); 746 /* 747 * If need_check_commit_time is set, it means we are in 748 * PASS_SCAN and csum verify failed before. If 749 * commit_time is increasing, it's the same journal, 750 * otherwise it is stale journal block, just end this 751 * recovery. 752 */ 753 if (need_check_commit_time) { 754 if (commit_time >= last_trans_commit_time) { 755 pr_err("JBD2: Invalid checksum found in transaction %u\n", 756 next_commit_ID); 757 err = -EFSBADCRC; 758 brelse(bh); 759 goto failed; 760 } 761 ignore_crc_mismatch: 762 /* 763 * It likely does not belong to same journal, 764 * just end this recovery with success. 765 */ 766 jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n", 767 next_commit_ID); 768 brelse(bh); 769 goto done; 770 } 771 772 /* 773 * Found an expected commit block: if checksums 774 * are present, verify them in PASS_SCAN; else not 775 * much to do other than move on to the next sequence 776 * number. 777 */ 778 if (pass == PASS_SCAN && 779 jbd2_has_feature_checksum(journal)) { 780 struct commit_header *cbh = 781 (struct commit_header *)bh->b_data; 782 unsigned found_chksum = 783 be32_to_cpu(cbh->h_chksum[0]); 784 785 if (info->end_transaction) { 786 journal->j_failed_commit = 787 info->end_transaction; 788 brelse(bh); 789 break; 790 } 791 792 /* Neither checksum match nor unused? */ 793 if (!((crc32_sum == found_chksum && 794 cbh->h_chksum_type == 795 JBD2_CRC32_CHKSUM && 796 cbh->h_chksum_size == 797 JBD2_CRC32_CHKSUM_SIZE) || 798 (cbh->h_chksum_type == 0 && 799 cbh->h_chksum_size == 0 && 800 found_chksum == 0))) 801 goto chksum_error; 802 803 crc32_sum = ~0; 804 } 805 if (pass == PASS_SCAN && 806 !jbd2_commit_block_csum_verify(journal, 807 bh->b_data)) { 808 chksum_error: 809 if (commit_time < last_trans_commit_time) 810 goto ignore_crc_mismatch; 811 info->end_transaction = next_commit_ID; 812 813 if (!jbd2_has_feature_async_commit(journal)) { 814 journal->j_failed_commit = 815 next_commit_ID; 816 brelse(bh); 817 break; 818 } 819 } 820 if (pass == PASS_SCAN) 821 last_trans_commit_time = commit_time; 822 brelse(bh); 823 next_commit_ID++; 824 continue; 825 826 case JBD2_REVOKE_BLOCK: 827 /* 828 * Check revoke block crc in pass_scan, if csum verify 829 * failed, check commit block time later. 830 */ 831 if (pass == PASS_SCAN && 832 !jbd2_descriptor_block_csum_verify(journal, 833 bh->b_data)) { 834 jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n", 835 next_log_block); 836 need_check_commit_time = true; 837 } 838 /* If we aren't in the REVOKE pass, then we can 839 * just skip over this block. */ 840 if (pass != PASS_REVOKE) { 841 brelse(bh); 842 continue; 843 } 844 845 err = scan_revoke_records(journal, bh, 846 next_commit_ID, info); 847 brelse(bh); 848 if (err) 849 goto failed; 850 continue; 851 852 default: 853 jbd2_debug(3, "Unrecognised magic %d, end of scan.\n", 854 blocktype); 855 brelse(bh); 856 goto done; 857 } 858 } 859 860 done: 861 /* 862 * We broke out of the log scan loop: either we came to the 863 * known end of the log or we found an unexpected block in the 864 * log. If the latter happened, then we know that the "current" 865 * transaction marks the end of the valid log. 866 */ 867 868 if (pass == PASS_SCAN) { 869 if (!info->end_transaction) 870 info->end_transaction = next_commit_ID; 871 } else { 872 /* It's really bad news if different passes end up at 873 * different places (but possible due to IO errors). */ 874 if (info->end_transaction != next_commit_ID) { 875 printk(KERN_ERR "JBD2: recovery pass %d ended at " 876 "transaction %u, expected %u\n", 877 pass, next_commit_ID, info->end_transaction); 878 if (!success) 879 success = -EIO; 880 } 881 } 882 883 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) { 884 err = fc_do_one_pass(journal, info, pass); 885 if (err) 886 success = err; 887 } 888 889 if (block_error && success == 0) 890 success = -EIO; 891 return success; 892 893 failed: 894 return err; 895 } 896 897 /* Scan a revoke record, marking all blocks mentioned as revoked. */ 898 899 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh, 900 tid_t sequence, struct recovery_info *info) 901 { 902 jbd2_journal_revoke_header_t *header; 903 int offset, max; 904 unsigned csum_size = 0; 905 __u32 rcount; 906 int record_len = 4; 907 908 header = (jbd2_journal_revoke_header_t *) bh->b_data; 909 offset = sizeof(jbd2_journal_revoke_header_t); 910 rcount = be32_to_cpu(header->r_count); 911 912 if (jbd2_journal_has_csum_v2or3(journal)) 913 csum_size = sizeof(struct jbd2_journal_block_tail); 914 if (rcount > journal->j_blocksize - csum_size) 915 return -EINVAL; 916 max = rcount; 917 918 if (jbd2_has_feature_64bit(journal)) 919 record_len = 8; 920 921 while (offset + record_len <= max) { 922 unsigned long long blocknr; 923 int err; 924 925 if (record_len == 4) 926 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset))); 927 else 928 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset))); 929 offset += record_len; 930 err = jbd2_journal_set_revoke(journal, blocknr, sequence); 931 if (err) 932 return err; 933 ++info->nr_revokes; 934 } 935 return 0; 936 } 937