1 // SPDX-License-Identifier: GPL-2.0+
2 /*
3 * linux/fs/jbd2/recovery.c
4 *
5 * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
6 *
7 * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
8 *
9 * Journal recovery routines for the generic filesystem journaling code;
10 * part of the ext2fs journaling system.
11 */
12
13 #ifndef __KERNEL__
14 #include "jfs_user.h"
15 #else
16 #include <linux/time.h>
17 #include <linux/fs.h>
18 #include <linux/jbd2.h>
19 #include <linux/errno.h>
20 #include <linux/crc32.h>
21 #include <linux/blkdev.h>
22 #include <linux/string_choices.h>
23 #endif
24
25 /*
26 * Maintain information about the progress of the recovery job, so that
27 * the different passes can carry information between them.
28 */
29 struct recovery_info
30 {
31 tid_t start_transaction;
32 tid_t end_transaction;
33 unsigned long head_block;
34
35 int nr_replays;
36 int nr_revokes;
37 int nr_revoke_hits;
38 };
39
40 static int do_one_pass(journal_t *journal,
41 struct recovery_info *info, enum passtype pass);
42 static int scan_revoke_records(journal_t *, enum passtype, struct buffer_head *,
43 tid_t, struct recovery_info *);
44
45 #ifdef __KERNEL__
46
47 /* Release readahead buffers after use */
journal_brelse_array(struct buffer_head * b[],int n)48 static void journal_brelse_array(struct buffer_head *b[], int n)
49 {
50 while (--n >= 0)
51 brelse (b[n]);
52 }
53
54
55 /*
56 * When reading from the journal, we are going through the block device
57 * layer directly and so there is no readahead being done for us. We
58 * need to implement any readahead ourselves if we want it to happen at
59 * all. Recovery is basically one long sequential read, so make sure we
60 * do the IO in reasonably large chunks.
61 *
62 * This is not so critical that we need to be enormously clever about
63 * the readahead size, though. 128K is a purely arbitrary, good-enough
64 * fixed value.
65 */
66
67 #define MAXBUF 8
do_readahead(journal_t * journal,unsigned int start)68 static void do_readahead(journal_t *journal, unsigned int start)
69 {
70 unsigned int max, nbufs, next;
71 unsigned long long blocknr;
72 struct buffer_head *bh;
73
74 struct buffer_head * bufs[MAXBUF];
75
76 /* Do up to 128K of readahead */
77 max = start + (128 * 1024 / journal->j_blocksize);
78 if (max > journal->j_total_len)
79 max = journal->j_total_len;
80
81 /* Do the readahead itself. We'll submit MAXBUF buffer_heads at
82 * a time to the block device IO layer. */
83
84 nbufs = 0;
85
86 for (next = start; next < max; next++) {
87 int err = jbd2_journal_bmap(journal, next, &blocknr);
88
89 if (err) {
90 printk(KERN_ERR "JBD2: bad block at offset %u\n",
91 next);
92 goto failed;
93 }
94
95 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
96 if (!bh)
97 goto failed;
98
99 if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
100 bufs[nbufs++] = bh;
101 if (nbufs == MAXBUF) {
102 bh_readahead_batch(nbufs, bufs, 0);
103 journal_brelse_array(bufs, nbufs);
104 nbufs = 0;
105 }
106 } else
107 brelse(bh);
108 }
109
110 if (nbufs)
111 bh_readahead_batch(nbufs, bufs, 0);
112
113 failed:
114 if (nbufs)
115 journal_brelse_array(bufs, nbufs);
116 }
117
118 #endif /* __KERNEL__ */
119
120
121 /*
122 * Read a block from the journal
123 */
124
jread(struct buffer_head ** bhp,journal_t * journal,unsigned int offset)125 static int jread(struct buffer_head **bhp, journal_t *journal,
126 unsigned int offset)
127 {
128 int err;
129 unsigned long long blocknr;
130 struct buffer_head *bh;
131
132 *bhp = NULL;
133
134 if (offset >= journal->j_total_len) {
135 printk(KERN_ERR "JBD2: corrupted journal superblock\n");
136 return -EFSCORRUPTED;
137 }
138
139 err = jbd2_journal_bmap(journal, offset, &blocknr);
140
141 if (err) {
142 printk(KERN_ERR "JBD2: bad block at offset %u\n",
143 offset);
144 return err;
145 }
146
147 bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
148 if (!bh)
149 return -ENOMEM;
150
151 if (!buffer_uptodate(bh)) {
152 /*
153 * If this is a brand new buffer, start readahead.
154 * Otherwise, we assume we are already reading it.
155 */
156 bool need_readahead = !buffer_req(bh);
157
158 bh_read_nowait(bh, 0);
159 if (need_readahead)
160 do_readahead(journal, offset);
161 wait_on_buffer(bh);
162 }
163
164 if (!buffer_uptodate(bh)) {
165 printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
166 offset);
167 brelse(bh);
168 return -EIO;
169 }
170
171 *bhp = bh;
172 return 0;
173 }
174
jbd2_descriptor_block_csum_verify(journal_t * j,void * buf)175 static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
176 {
177 struct jbd2_journal_block_tail *tail;
178 __be32 provided;
179 __u32 calculated;
180
181 if (!jbd2_journal_has_csum_v2or3(j))
182 return 1;
183
184 tail = (struct jbd2_journal_block_tail *)((char *)buf +
185 j->j_blocksize - sizeof(struct jbd2_journal_block_tail));
186 provided = tail->t_checksum;
187 tail->t_checksum = 0;
188 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
189 tail->t_checksum = provided;
190
191 return provided == cpu_to_be32(calculated);
192 }
193
194 /*
195 * Count the number of in-use tags in a journal descriptor block.
196 */
197
count_tags(journal_t * journal,struct buffer_head * bh)198 static int count_tags(journal_t *journal, struct buffer_head *bh)
199 {
200 char * tagp;
201 journal_block_tag_t tag;
202 int nr = 0, size = journal->j_blocksize;
203 int tag_bytes = journal_tag_bytes(journal);
204
205 if (jbd2_journal_has_csum_v2or3(journal))
206 size -= sizeof(struct jbd2_journal_block_tail);
207
208 tagp = &bh->b_data[sizeof(journal_header_t)];
209
210 while ((tagp - bh->b_data + tag_bytes) <= size) {
211 memcpy(&tag, tagp, sizeof(tag));
212
213 nr++;
214 tagp += tag_bytes;
215 if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
216 tagp += 16;
217
218 if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
219 break;
220 }
221
222 return nr;
223 }
224
225
226 /* Make sure we wrap around the log correctly! */
227 #define wrap(journal, var) \
228 do { \
229 if (var >= (journal)->j_last) \
230 var -= ((journal)->j_last - (journal)->j_first); \
231 } while (0)
232
fc_do_one_pass(journal_t * journal,struct recovery_info * info,enum passtype pass)233 static int fc_do_one_pass(journal_t *journal,
234 struct recovery_info *info, enum passtype pass)
235 {
236 unsigned int expected_commit_id = info->end_transaction;
237 unsigned long next_fc_block;
238 struct buffer_head *bh;
239 int err = 0;
240
241 next_fc_block = journal->j_fc_first;
242 if (!journal->j_fc_replay_callback)
243 return 0;
244
245 while (next_fc_block <= journal->j_fc_last) {
246 jbd2_debug(3, "Fast commit replay: next block %ld\n",
247 next_fc_block);
248 err = jread(&bh, journal, next_fc_block);
249 if (err) {
250 jbd2_debug(3, "Fast commit replay: read error\n");
251 break;
252 }
253
254 err = journal->j_fc_replay_callback(journal, bh, pass,
255 next_fc_block - journal->j_fc_first,
256 expected_commit_id);
257 brelse(bh);
258 next_fc_block++;
259 if (err < 0 || err == JBD2_FC_REPLAY_STOP)
260 break;
261 err = 0;
262 }
263
264 if (err)
265 jbd2_debug(3, "Fast commit replay failed, err = %d\n", err);
266
267 return err;
268 }
269
270 /**
271 * jbd2_journal_recover - recovers a on-disk journal
272 * @journal: the journal to recover
273 *
274 * The primary function for recovering the log contents when mounting a
275 * journaled device.
276 *
277 * Recovery is done in three passes. In the first pass, we look for the
278 * end of the log. In the second, we assemble the list of revoke
279 * blocks. In the third and final pass, we replay any un-revoked blocks
280 * in the log.
281 */
jbd2_journal_recover(journal_t * journal)282 int jbd2_journal_recover(journal_t *journal)
283 {
284 int err, err2;
285 struct recovery_info info;
286
287 memset(&info, 0, sizeof(info));
288
289 /*
290 * The journal superblock's s_start field (the current log head)
291 * is always zero if, and only if, the journal was cleanly
292 * unmounted. We use its in-memory version j_tail here because
293 * jbd2_journal_wipe() could have updated it without updating journal
294 * superblock.
295 */
296 if (!journal->j_tail) {
297 journal_superblock_t *sb = journal->j_superblock;
298
299 jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n",
300 be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head));
301 journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
302 journal->j_head = be32_to_cpu(sb->s_head);
303 return 0;
304 }
305
306 err = do_one_pass(journal, &info, PASS_SCAN);
307 if (!err)
308 err = do_one_pass(journal, &info, PASS_REVOKE);
309 if (!err)
310 err = do_one_pass(journal, &info, PASS_REPLAY);
311
312 jbd2_debug(1, "JBD2: recovery, exit status %d, "
313 "recovered transactions %u to %u\n",
314 err, info.start_transaction, info.end_transaction);
315 jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
316 info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
317
318 /* Restart the log at the next transaction ID, thus invalidating
319 * any existing commit records in the log. */
320 journal->j_transaction_sequence = ++info.end_transaction;
321 journal->j_head = info.head_block;
322 jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n",
323 journal->j_transaction_sequence, journal->j_head);
324
325 jbd2_journal_clear_revoke(journal);
326 /* Free revoke table allocated for replay */
327 if (journal->j_revoke != journal->j_revoke_table[0] &&
328 journal->j_revoke != journal->j_revoke_table[1]) {
329 jbd2_journal_destroy_revoke_table(journal->j_revoke);
330 journal->j_revoke = journal->j_revoke_table[1];
331 }
332 err2 = sync_blockdev(journal->j_fs_dev);
333 if (!err)
334 err = err2;
335 err2 = jbd2_check_fs_dev_write_error(journal);
336 if (!err)
337 err = err2;
338 /* Make sure all replayed data is on permanent storage */
339 if (journal->j_flags & JBD2_BARRIER) {
340 err2 = blkdev_issue_flush(journal->j_fs_dev);
341 if (!err)
342 err = err2;
343 }
344 return err;
345 }
346
347 /**
348 * jbd2_journal_skip_recovery - Start journal and wipe exiting records
349 * @journal: journal to startup
350 *
351 * Locate any valid recovery information from the journal and set up the
352 * journal structures in memory to ignore it (presumably because the
353 * caller has evidence that it is out of date).
354 * This function doesn't appear to be exported..
355 *
356 * We perform one pass over the journal to allow us to tell the user how
357 * much recovery information is being erased, and to let us initialise
358 * the journal transaction sequence numbers to the next unused ID.
359 */
jbd2_journal_skip_recovery(journal_t * journal)360 int jbd2_journal_skip_recovery(journal_t *journal)
361 {
362 int err;
363
364 struct recovery_info info;
365
366 memset (&info, 0, sizeof(info));
367
368 err = do_one_pass(journal, &info, PASS_SCAN);
369
370 if (err) {
371 printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
372 ++journal->j_transaction_sequence;
373 journal->j_head = journal->j_first;
374 } else {
375 #ifdef CONFIG_JBD2_DEBUG
376 int dropped = info.end_transaction -
377 be32_to_cpu(journal->j_superblock->s_sequence);
378 jbd2_debug(1,
379 "JBD2: ignoring %d transaction%s from the journal.\n",
380 dropped, str_plural(dropped));
381 #endif
382 journal->j_transaction_sequence = ++info.end_transaction;
383 journal->j_head = info.head_block;
384 }
385
386 journal->j_tail = 0;
387 return err;
388 }
389
read_tag_block(journal_t * journal,journal_block_tag_t * tag)390 static inline unsigned long long read_tag_block(journal_t *journal,
391 journal_block_tag_t *tag)
392 {
393 unsigned long long block = be32_to_cpu(tag->t_blocknr);
394 if (jbd2_has_feature_64bit(journal))
395 block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
396 return block;
397 }
398
399 /*
400 * calc_chksums calculates the checksums for the blocks described in the
401 * descriptor block.
402 */
calc_chksums(journal_t * journal,struct buffer_head * bh,unsigned long * next_log_block,__u32 * crc32_sum)403 static int calc_chksums(journal_t *journal, struct buffer_head *bh,
404 unsigned long *next_log_block, __u32 *crc32_sum)
405 {
406 int i, num_blks, err;
407 unsigned long io_block;
408 struct buffer_head *obh;
409
410 num_blks = count_tags(journal, bh);
411 /* Calculate checksum of the descriptor block. */
412 *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
413
414 for (i = 0; i < num_blks; i++) {
415 io_block = (*next_log_block)++;
416 wrap(journal, *next_log_block);
417 err = jread(&obh, journal, io_block);
418 if (err) {
419 printk(KERN_ERR "JBD2: IO error %d recovering block "
420 "%lu in log\n", err, io_block);
421 return 1;
422 } else {
423 *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
424 obh->b_size);
425 }
426 put_bh(obh);
427 }
428 return 0;
429 }
430
jbd2_commit_block_csum_verify(journal_t * j,void * buf)431 static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
432 {
433 struct commit_header *h;
434 __be32 provided;
435 __u32 calculated;
436
437 if (!jbd2_journal_has_csum_v2or3(j))
438 return 1;
439
440 h = buf;
441 provided = h->h_chksum[0];
442 h->h_chksum[0] = 0;
443 calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
444 h->h_chksum[0] = provided;
445
446 return provided == cpu_to_be32(calculated);
447 }
448
jbd2_commit_block_csum_verify_partial(journal_t * j,void * buf)449 static bool jbd2_commit_block_csum_verify_partial(journal_t *j, void *buf)
450 {
451 struct commit_header *h;
452 __be32 provided;
453 __u32 calculated;
454 void *tmpbuf;
455
456 tmpbuf = kzalloc(j->j_blocksize, GFP_KERNEL);
457 if (!tmpbuf)
458 return false;
459
460 memcpy(tmpbuf, buf, sizeof(struct commit_header));
461 h = tmpbuf;
462 provided = h->h_chksum[0];
463 h->h_chksum[0] = 0;
464 calculated = jbd2_chksum(j, j->j_csum_seed, tmpbuf, j->j_blocksize);
465 kfree(tmpbuf);
466
467 return provided == cpu_to_be32(calculated);
468 }
469
jbd2_block_tag_csum_verify(journal_t * j,journal_block_tag_t * tag,journal_block_tag3_t * tag3,void * buf,__u32 sequence)470 static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
471 journal_block_tag3_t *tag3,
472 void *buf, __u32 sequence)
473 {
474 __u32 csum32;
475 __be32 seq;
476
477 if (!jbd2_journal_has_csum_v2or3(j))
478 return 1;
479
480 seq = cpu_to_be32(sequence);
481 csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
482 csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
483
484 if (jbd2_has_feature_csum3(j))
485 return tag3->t_checksum == cpu_to_be32(csum32);
486 else
487 return tag->t_checksum == cpu_to_be16(csum32);
488 }
489
jbd2_do_replay(journal_t * journal,struct recovery_info * info,struct buffer_head * bh,unsigned long * next_log_block,unsigned int next_commit_ID)490 static __always_inline int jbd2_do_replay(journal_t *journal,
491 struct recovery_info *info,
492 struct buffer_head *bh,
493 unsigned long *next_log_block,
494 unsigned int next_commit_ID)
495 {
496 char *tagp;
497 int flags;
498 int ret = 0;
499 int tag_bytes = journal_tag_bytes(journal);
500 int descr_csum_size = 0;
501 unsigned long io_block;
502 journal_block_tag_t tag;
503 struct buffer_head *obh;
504 struct buffer_head *nbh;
505
506 if (jbd2_journal_has_csum_v2or3(journal))
507 descr_csum_size = sizeof(struct jbd2_journal_block_tail);
508
509 tagp = &bh->b_data[sizeof(journal_header_t)];
510 while (tagp - bh->b_data + tag_bytes <=
511 journal->j_blocksize - descr_csum_size) {
512 int err;
513
514 memcpy(&tag, tagp, sizeof(tag));
515 flags = be16_to_cpu(tag.t_flags);
516
517 io_block = (*next_log_block)++;
518 wrap(journal, *next_log_block);
519 err = jread(&obh, journal, io_block);
520 if (err) {
521 /* Recover what we can, but report failure at the end. */
522 ret = err;
523 pr_err("JBD2: IO error %d recovering block %lu in log\n",
524 err, io_block);
525 } else {
526 unsigned long long blocknr;
527
528 J_ASSERT(obh != NULL);
529 blocknr = read_tag_block(journal, &tag);
530
531 /* If the block has been revoked, then we're all done here. */
532 if (jbd2_journal_test_revoke(journal, blocknr,
533 next_commit_ID)) {
534 brelse(obh);
535 ++info->nr_revoke_hits;
536 goto skip_write;
537 }
538
539 /* Look for block corruption */
540 if (!jbd2_block_tag_csum_verify(journal, &tag,
541 (journal_block_tag3_t *)tagp,
542 obh->b_data, next_commit_ID)) {
543 brelse(obh);
544 ret = -EFSBADCRC;
545 pr_err("JBD2: Invalid checksum recovering data block %llu in journal block %lu\n",
546 blocknr, io_block);
547 goto skip_write;
548 }
549
550 /* Find a buffer for the new data being restored */
551 nbh = __getblk(journal->j_fs_dev, blocknr,
552 journal->j_blocksize);
553 if (nbh == NULL) {
554 pr_err("JBD2: Out of memory during recovery.\n");
555 brelse(obh);
556 return -ENOMEM;
557 }
558
559 lock_buffer(nbh);
560 memcpy(nbh->b_data, obh->b_data, journal->j_blocksize);
561 if (flags & JBD2_FLAG_ESCAPE) {
562 *((__be32 *)nbh->b_data) =
563 cpu_to_be32(JBD2_MAGIC_NUMBER);
564 }
565
566 BUFFER_TRACE(nbh, "marking dirty");
567 set_buffer_uptodate(nbh);
568 mark_buffer_dirty(nbh);
569 BUFFER_TRACE(nbh, "marking uptodate");
570 ++info->nr_replays;
571 unlock_buffer(nbh);
572 brelse(obh);
573 brelse(nbh);
574 }
575
576 skip_write:
577 tagp += tag_bytes;
578 if (!(flags & JBD2_FLAG_SAME_UUID))
579 tagp += 16;
580
581 if (flags & JBD2_FLAG_LAST_TAG)
582 break;
583 }
584
585 return ret;
586 }
587
do_one_pass(journal_t * journal,struct recovery_info * info,enum passtype pass)588 static int do_one_pass(journal_t *journal,
589 struct recovery_info *info, enum passtype pass)
590 {
591 unsigned int first_commit_ID, next_commit_ID;
592 unsigned long next_log_block, head_block;
593 int err, success = 0;
594 journal_superblock_t * sb;
595 journal_header_t * tmp;
596 struct buffer_head *bh = NULL;
597 unsigned int sequence;
598 int blocktype;
599 __u32 crc32_sum = ~0; /* Transactional Checksums */
600 bool need_check_commit_time = false;
601 __u64 last_trans_commit_time = 0, commit_time;
602
603 /*
604 * First thing is to establish what we expect to find in the log
605 * (in terms of transaction IDs), and where (in terms of log
606 * block offsets): query the superblock.
607 */
608
609 sb = journal->j_superblock;
610 next_commit_ID = be32_to_cpu(sb->s_sequence);
611 next_log_block = be32_to_cpu(sb->s_start);
612 head_block = next_log_block;
613
614 first_commit_ID = next_commit_ID;
615 if (pass == PASS_SCAN)
616 info->start_transaction = first_commit_ID;
617 else if (pass == PASS_REVOKE) {
618 /*
619 * Would the default revoke table have too long hash chains
620 * during replay?
621 */
622 if (info->nr_revokes > JOURNAL_REVOKE_DEFAULT_HASH * 16) {
623 unsigned int hash_size;
624
625 /*
626 * Aim for average chain length of 8, limit at 1M
627 * entries to avoid problems with malicious
628 * filesystems.
629 */
630 hash_size = min(roundup_pow_of_two(info->nr_revokes / 8),
631 1U << 20);
632 journal->j_revoke =
633 jbd2_journal_init_revoke_table(hash_size);
634 if (!journal->j_revoke) {
635 printk(KERN_ERR
636 "JBD2: failed to allocate revoke table for replay with %u entries. "
637 "Journal replay may be slow.\n", hash_size);
638 journal->j_revoke = journal->j_revoke_table[1];
639 }
640 }
641 }
642
643 jbd2_debug(1, "Starting recovery pass %d\n", pass);
644
645 /*
646 * Now we walk through the log, transaction by transaction,
647 * making sure that each transaction has a commit block in the
648 * expected place. Each complete transaction gets replayed back
649 * into the main filesystem.
650 */
651
652 while (1) {
653 cond_resched();
654
655 /* If we already know where to stop the log traversal,
656 * check right now that we haven't gone past the end of
657 * the log. */
658
659 if (pass != PASS_SCAN)
660 if (tid_geq(next_commit_ID, info->end_transaction))
661 break;
662
663 jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
664 next_commit_ID, next_log_block, journal->j_last);
665
666 /* Skip over each chunk of the transaction looking
667 * either the next descriptor block or the final commit
668 * record. */
669
670 jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block);
671 brelse(bh);
672 bh = NULL;
673 err = jread(&bh, journal, next_log_block);
674 if (err)
675 goto failed;
676
677 next_log_block++;
678 wrap(journal, next_log_block);
679
680 /* What kind of buffer is it?
681 *
682 * If it is a descriptor block, check that it has the
683 * expected sequence number. Otherwise, we're all done
684 * here. */
685
686 tmp = (journal_header_t *)bh->b_data;
687
688 if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER))
689 break;
690
691 blocktype = be32_to_cpu(tmp->h_blocktype);
692 sequence = be32_to_cpu(tmp->h_sequence);
693 jbd2_debug(3, "Found magic %d, sequence %d\n",
694 blocktype, sequence);
695
696 if (sequence != next_commit_ID)
697 break;
698
699 /* OK, we have a valid descriptor block which matches
700 * all of the sequence number checks. What are we going
701 * to do with it? That depends on the pass... */
702
703 switch(blocktype) {
704 case JBD2_DESCRIPTOR_BLOCK:
705 /* Verify checksum first */
706 if (!jbd2_descriptor_block_csum_verify(journal,
707 bh->b_data)) {
708 /*
709 * PASS_SCAN can see stale blocks due to lazy
710 * journal init. Don't error out on those yet.
711 */
712 if (pass != PASS_SCAN) {
713 pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
714 next_log_block);
715 err = -EFSBADCRC;
716 goto failed;
717 }
718 need_check_commit_time = true;
719 jbd2_debug(1,
720 "invalid descriptor block found in %lu\n",
721 next_log_block);
722 }
723
724 /* If it is a valid descriptor block, replay it
725 * in pass REPLAY; if journal_checksums enabled, then
726 * calculate checksums in PASS_SCAN, otherwise,
727 * just skip over the blocks it describes. */
728 if (pass != PASS_REPLAY) {
729 if (pass == PASS_SCAN &&
730 jbd2_has_feature_checksum(journal) &&
731 !info->end_transaction) {
732 if (calc_chksums(journal, bh,
733 &next_log_block,
734 &crc32_sum))
735 break;
736 continue;
737 }
738 next_log_block += count_tags(journal, bh);
739 wrap(journal, next_log_block);
740 continue;
741 }
742
743 /*
744 * A descriptor block: we can now write all of the
745 * data blocks. Yay, useful work is finally getting
746 * done here!
747 */
748 err = jbd2_do_replay(journal, info, bh, &next_log_block,
749 next_commit_ID);
750 if (err) {
751 if (err == -ENOMEM)
752 goto failed;
753 success = err;
754 }
755
756 continue;
757
758 case JBD2_COMMIT_BLOCK:
759 if (pass != PASS_SCAN) {
760 next_commit_ID++;
761 continue;
762 }
763
764 /* How to differentiate between interrupted commit
765 * and journal corruption ?
766 *
767 * {nth transaction}
768 * Checksum Verification Failed
769 * |
770 * ____________________
771 * | |
772 * async_commit sync_commit
773 * | |
774 * | GO TO NEXT "Journal Corruption"
775 * | TRANSACTION
776 * |
777 * {(n+1)th transanction}
778 * |
779 * _______|______________
780 * | |
781 * Commit block found Commit block not found
782 * | |
783 * "Journal Corruption" |
784 * _____________|_________
785 * | |
786 * nth trans corrupt OR nth trans
787 * and (n+1)th interrupted interrupted
788 * before commit block
789 * could reach the disk.
790 * (Cannot find the difference in above
791 * mentioned conditions. Hence assume
792 * "Interrupted Commit".)
793 */
794 commit_time = be64_to_cpu(
795 ((struct commit_header *)bh->b_data)->h_commit_sec);
796 /*
797 * If need_check_commit_time is set, it means we are in
798 * PASS_SCAN and csum verify failed before. If
799 * commit_time is increasing, it's the same journal,
800 * otherwise it is stale journal block, just end this
801 * recovery.
802 */
803 if (need_check_commit_time) {
804 if (commit_time >= last_trans_commit_time) {
805 pr_err("JBD2: Invalid checksum found in transaction %u\n",
806 next_commit_ID);
807 err = -EFSBADCRC;
808 goto failed;
809 }
810 ignore_crc_mismatch:
811 /*
812 * It likely does not belong to same journal,
813 * just end this recovery with success.
814 */
815 jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
816 next_commit_ID);
817 goto done;
818 }
819
820 /*
821 * Found an expected commit block: if checksums
822 * are present, verify them in PASS_SCAN; else not
823 * much to do other than move on to the next sequence
824 * number.
825 */
826 if (jbd2_has_feature_checksum(journal)) {
827 struct commit_header *cbh =
828 (struct commit_header *)bh->b_data;
829 unsigned found_chksum =
830 be32_to_cpu(cbh->h_chksum[0]);
831
832 if (info->end_transaction) {
833 journal->j_failed_commit =
834 info->end_transaction;
835 break;
836 }
837
838 /* Neither checksum match nor unused? */
839 if (!((crc32_sum == found_chksum &&
840 cbh->h_chksum_type ==
841 JBD2_CRC32_CHKSUM &&
842 cbh->h_chksum_size ==
843 JBD2_CRC32_CHKSUM_SIZE) ||
844 (cbh->h_chksum_type == 0 &&
845 cbh->h_chksum_size == 0 &&
846 found_chksum == 0)))
847 goto chksum_error;
848
849 crc32_sum = ~0;
850 goto chksum_ok;
851 }
852
853 if (jbd2_commit_block_csum_verify(journal, bh->b_data))
854 goto chksum_ok;
855
856 if (jbd2_commit_block_csum_verify_partial(journal,
857 bh->b_data)) {
858 pr_notice("JBD2: Find incomplete commit block in transaction %u block %lu\n",
859 next_commit_ID, next_log_block);
860 goto chksum_ok;
861 }
862
863 chksum_error:
864 if (commit_time < last_trans_commit_time)
865 goto ignore_crc_mismatch;
866 info->end_transaction = next_commit_ID;
867 info->head_block = head_block;
868
869 if (!jbd2_has_feature_async_commit(journal)) {
870 journal->j_failed_commit = next_commit_ID;
871 break;
872 }
873
874 chksum_ok:
875 last_trans_commit_time = commit_time;
876 head_block = next_log_block;
877 next_commit_ID++;
878 continue;
879
880 case JBD2_REVOKE_BLOCK:
881 /*
882 * If we aren't in the SCAN or REVOKE pass, then we can
883 * just skip over this block.
884 */
885 if (pass != PASS_REVOKE && pass != PASS_SCAN)
886 continue;
887
888 /*
889 * Check revoke block crc in pass_scan, if csum verify
890 * failed, check commit block time later.
891 */
892 if (pass == PASS_SCAN &&
893 !jbd2_descriptor_block_csum_verify(journal,
894 bh->b_data)) {
895 jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n",
896 next_log_block);
897 need_check_commit_time = true;
898 }
899
900 err = scan_revoke_records(journal, pass, bh,
901 next_commit_ID, info);
902 if (err)
903 goto failed;
904 continue;
905
906 default:
907 jbd2_debug(3, "Unrecognised magic %d, end of scan.\n",
908 blocktype);
909 goto done;
910 }
911 }
912
913 done:
914 brelse(bh);
915 /*
916 * We broke out of the log scan loop: either we came to the
917 * known end of the log or we found an unexpected block in the
918 * log. If the latter happened, then we know that the "current"
919 * transaction marks the end of the valid log.
920 */
921
922 if (pass == PASS_SCAN) {
923 if (!info->end_transaction)
924 info->end_transaction = next_commit_ID;
925 if (!info->head_block)
926 info->head_block = head_block;
927 } else {
928 /* It's really bad news if different passes end up at
929 * different places (but possible due to IO errors). */
930 if (info->end_transaction != next_commit_ID) {
931 printk(KERN_ERR "JBD2: recovery pass %d ended at "
932 "transaction %u, expected %u\n",
933 pass, next_commit_ID, info->end_transaction);
934 if (!success)
935 success = -EIO;
936 }
937 }
938
939 if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) {
940 err = fc_do_one_pass(journal, info, pass);
941 if (err)
942 success = err;
943 }
944
945 return success;
946
947 failed:
948 brelse(bh);
949 return err;
950 }
951
952 /* Scan a revoke record, marking all blocks mentioned as revoked. */
953
scan_revoke_records(journal_t * journal,enum passtype pass,struct buffer_head * bh,tid_t sequence,struct recovery_info * info)954 static int scan_revoke_records(journal_t *journal, enum passtype pass,
955 struct buffer_head *bh, tid_t sequence,
956 struct recovery_info *info)
957 {
958 jbd2_journal_revoke_header_t *header;
959 int offset, max;
960 unsigned csum_size = 0;
961 __u32 rcount;
962 int record_len = 4;
963
964 header = (jbd2_journal_revoke_header_t *) bh->b_data;
965 offset = sizeof(jbd2_journal_revoke_header_t);
966 rcount = be32_to_cpu(header->r_count);
967
968 if (jbd2_journal_has_csum_v2or3(journal))
969 csum_size = sizeof(struct jbd2_journal_block_tail);
970 if (rcount > journal->j_blocksize - csum_size)
971 return -EINVAL;
972 max = rcount;
973
974 if (jbd2_has_feature_64bit(journal))
975 record_len = 8;
976
977 if (pass == PASS_SCAN) {
978 info->nr_revokes += (max - offset) / record_len;
979 return 0;
980 }
981
982 while (offset + record_len <= max) {
983 unsigned long long blocknr;
984 int err;
985
986 if (record_len == 4)
987 blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
988 else
989 blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
990 offset += record_len;
991 err = jbd2_journal_set_revoke(journal, blocknr, sequence);
992 if (err)
993 return err;
994 }
995 return 0;
996 }
997