1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "alloc_background.h"
5 #include "bkey_buf.h"
6 #include "btree_journal_iter.h"
7 #include "btree_node_scan.h"
8 #include "btree_update.h"
9 #include "btree_update_interior.h"
10 #include "btree_io.h"
11 #include "buckets.h"
12 #include "dirent.h"
13 #include "disk_accounting.h"
14 #include "errcode.h"
15 #include "error.h"
16 #include "journal_io.h"
17 #include "journal_reclaim.h"
18 #include "journal_seq_blacklist.h"
19 #include "logged_ops.h"
20 #include "move.h"
21 #include "namei.h"
22 #include "quota.h"
23 #include "rebalance.h"
24 #include "recovery.h"
25 #include "recovery_passes.h"
26 #include "replicas.h"
27 #include "sb-clean.h"
28 #include "sb-downgrade.h"
29 #include "snapshot.h"
30 #include "super-io.h"
31
32 #include <linux/sort.h>
33 #include <linux/stat.h>
34
35
bch2_btree_lost_data(struct bch_fs * c,enum btree_id btree)36 int bch2_btree_lost_data(struct bch_fs *c, enum btree_id btree)
37 {
38 u64 b = BIT_ULL(btree);
39 int ret = 0;
40
41 mutex_lock(&c->sb_lock);
42 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
43
44 if (!(c->sb.btrees_lost_data & b)) {
45 struct printbuf buf = PRINTBUF;
46 bch2_btree_id_to_text(&buf, btree);
47 bch_err(c, "flagging btree %s lost data", buf.buf);
48 printbuf_exit(&buf);
49 ext->btrees_lost_data |= cpu_to_le64(b);
50 }
51
52 /* Once we have runtime self healing for topology errors we won't need this: */
53 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_topology) ?: ret;
54
55 /* Btree node accounting will be off: */
56 __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
57 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
58
59 #ifdef CONFIG_BCACHEFS_DEBUG
60 /*
61 * These are much more minor, and don't need to be corrected right away,
62 * but in debug mode we want the next fsck run to be clean:
63 */
64 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_lrus) ?: ret;
65 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_backpointers_to_extents) ?: ret;
66 #endif
67
68 switch (btree) {
69 case BTREE_ID_alloc:
70 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
71
72 __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
73 __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
74 __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
75 __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent);
76 __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent);
77 __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
78 goto out;
79 case BTREE_ID_backpointers:
80 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_btree_backpointers) ?: ret;
81 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_extents_to_backpointers) ?: ret;
82 goto out;
83 case BTREE_ID_need_discard:
84 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
85 goto out;
86 case BTREE_ID_freespace:
87 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
88 goto out;
89 case BTREE_ID_bucket_gens:
90 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
91 goto out;
92 case BTREE_ID_lru:
93 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_alloc_info) ?: ret;
94 goto out;
95 case BTREE_ID_accounting:
96 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_check_allocations) ?: ret;
97 goto out;
98 default:
99 ret = bch2_run_explicit_recovery_pass_persistent_locked(c, BCH_RECOVERY_PASS_scan_for_btree_nodes) ?: ret;
100 goto out;
101 }
102 out:
103 bch2_write_super(c);
104 mutex_unlock(&c->sb_lock);
105
106 return ret;
107 }
108
kill_btree(struct bch_fs * c,enum btree_id btree)109 static void kill_btree(struct bch_fs *c, enum btree_id btree)
110 {
111 bch2_btree_id_root(c, btree)->alive = false;
112 bch2_shoot_down_journal_keys(c, btree, 0, BTREE_MAX_DEPTH, POS_MIN, SPOS_MAX);
113 }
114
115 /* for -o reconstruct_alloc: */
bch2_reconstruct_alloc(struct bch_fs * c)116 static void bch2_reconstruct_alloc(struct bch_fs *c)
117 {
118 bch2_journal_log_msg(c, "dropping alloc info");
119 bch_info(c, "dropping and reconstructing all alloc info");
120
121 mutex_lock(&c->sb_lock);
122 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
123
124 __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required);
125 __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_alloc_info, ext->recovery_passes_required);
126 __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_lrus, ext->recovery_passes_required);
127 __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_extents_to_backpointers, ext->recovery_passes_required);
128 __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_alloc_to_lru_refs, ext->recovery_passes_required);
129
130 __set_bit_le64(BCH_FSCK_ERR_ptr_to_missing_alloc_key, ext->errors_silent);
131 __set_bit_le64(BCH_FSCK_ERR_ptr_gen_newer_than_bucket_gen, ext->errors_silent);
132 __set_bit_le64(BCH_FSCK_ERR_stale_dirty_ptr, ext->errors_silent);
133
134 __set_bit_le64(BCH_FSCK_ERR_dev_usage_buckets_wrong, ext->errors_silent);
135 __set_bit_le64(BCH_FSCK_ERR_dev_usage_sectors_wrong, ext->errors_silent);
136 __set_bit_le64(BCH_FSCK_ERR_dev_usage_fragmented_wrong, ext->errors_silent);
137
138 __set_bit_le64(BCH_FSCK_ERR_fs_usage_btree_wrong, ext->errors_silent);
139 __set_bit_le64(BCH_FSCK_ERR_fs_usage_cached_wrong, ext->errors_silent);
140 __set_bit_le64(BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, ext->errors_silent);
141 __set_bit_le64(BCH_FSCK_ERR_fs_usage_replicas_wrong, ext->errors_silent);
142
143 __set_bit_le64(BCH_FSCK_ERR_alloc_key_to_missing_lru_entry, ext->errors_silent);
144
145 __set_bit_le64(BCH_FSCK_ERR_alloc_key_data_type_wrong, ext->errors_silent);
146 __set_bit_le64(BCH_FSCK_ERR_alloc_key_gen_wrong, ext->errors_silent);
147 __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
148 __set_bit_le64(BCH_FSCK_ERR_alloc_key_cached_sectors_wrong, ext->errors_silent);
149 __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_wrong, ext->errors_silent);
150 __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_redundancy_wrong, ext->errors_silent);
151 __set_bit_le64(BCH_FSCK_ERR_need_discard_key_wrong, ext->errors_silent);
152 __set_bit_le64(BCH_FSCK_ERR_freespace_key_wrong, ext->errors_silent);
153 __set_bit_le64(BCH_FSCK_ERR_bucket_gens_key_wrong, ext->errors_silent);
154 __set_bit_le64(BCH_FSCK_ERR_freespace_hole_missing, ext->errors_silent);
155 __set_bit_le64(BCH_FSCK_ERR_ptr_to_missing_backpointer, ext->errors_silent);
156 __set_bit_le64(BCH_FSCK_ERR_lru_entry_bad, ext->errors_silent);
157 __set_bit_le64(BCH_FSCK_ERR_accounting_mismatch, ext->errors_silent);
158 c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
159
160 c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
161
162 bch2_write_super(c);
163 mutex_unlock(&c->sb_lock);
164
165 for (unsigned i = 0; i < btree_id_nr_alive(c); i++)
166 if (btree_id_is_alloc(i))
167 kill_btree(c, i);
168 }
169
170 /*
171 * Btree node pointers have a field to stack a pointer to the in memory btree
172 * node; we need to zero out this field when reading in btree nodes, or when
173 * reading in keys from the journal:
174 */
zero_out_btree_mem_ptr(struct journal_keys * keys)175 static void zero_out_btree_mem_ptr(struct journal_keys *keys)
176 {
177 darray_for_each(*keys, i)
178 if (i->k->k.type == KEY_TYPE_btree_ptr_v2)
179 bkey_i_to_btree_ptr_v2(i->k)->v.mem_ptr = 0;
180 }
181
182 /* journal replay: */
183
replay_now_at(struct journal * j,u64 seq)184 static void replay_now_at(struct journal *j, u64 seq)
185 {
186 BUG_ON(seq < j->replay_journal_seq);
187
188 seq = min(seq, j->replay_journal_seq_end);
189
190 while (j->replay_journal_seq < seq)
191 bch2_journal_pin_put(j, j->replay_journal_seq++);
192 }
193
bch2_journal_replay_accounting_key(struct btree_trans * trans,struct journal_key * k)194 static int bch2_journal_replay_accounting_key(struct btree_trans *trans,
195 struct journal_key *k)
196 {
197 struct btree_iter iter;
198 bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
199 BTREE_MAX_DEPTH, k->level,
200 BTREE_ITER_intent);
201 int ret = bch2_btree_iter_traverse(trans, &iter);
202 if (ret)
203 goto out;
204
205 struct bkey u;
206 struct bkey_s_c old = bch2_btree_path_peek_slot(btree_iter_path(trans, &iter), &u);
207
208 /* Has this delta already been applied to the btree? */
209 if (bversion_cmp(old.k->bversion, k->k->k.bversion) >= 0) {
210 ret = 0;
211 goto out;
212 }
213
214 struct bkey_i *new = k->k;
215 if (old.k->type == KEY_TYPE_accounting) {
216 new = bch2_bkey_make_mut_noupdate(trans, bkey_i_to_s_c(k->k));
217 ret = PTR_ERR_OR_ZERO(new);
218 if (ret)
219 goto out;
220
221 bch2_accounting_accumulate(bkey_i_to_accounting(new),
222 bkey_s_c_to_accounting(old));
223 }
224
225 trans->journal_res.seq = k->journal_seq;
226
227 ret = bch2_trans_update(trans, &iter, new, BTREE_TRIGGER_norun);
228 out:
229 bch2_trans_iter_exit(trans, &iter);
230 return ret;
231 }
232
bch2_journal_replay_key(struct btree_trans * trans,struct journal_key * k)233 static int bch2_journal_replay_key(struct btree_trans *trans,
234 struct journal_key *k)
235 {
236 struct btree_iter iter;
237 unsigned iter_flags =
238 BTREE_ITER_intent|
239 BTREE_ITER_not_extents;
240 unsigned update_flags = BTREE_TRIGGER_norun;
241 int ret;
242
243 if (k->overwritten)
244 return 0;
245
246 trans->journal_res.seq = k->journal_seq;
247
248 /*
249 * BTREE_UPDATE_key_cache_reclaim disables key cache lookup/update to
250 * keep the key cache coherent with the underlying btree. Nothing
251 * besides the allocator is doing updates yet so we don't need key cache
252 * coherency for non-alloc btrees, and key cache fills for snapshots
253 * btrees use BTREE_ITER_filter_snapshots, which isn't available until
254 * the snapshots recovery pass runs.
255 */
256 if (!k->level && k->btree_id == BTREE_ID_alloc)
257 iter_flags |= BTREE_ITER_cached;
258 else
259 update_flags |= BTREE_UPDATE_key_cache_reclaim;
260
261 bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
262 BTREE_MAX_DEPTH, k->level,
263 iter_flags);
264 ret = bch2_btree_iter_traverse(trans, &iter);
265 if (ret)
266 goto out;
267
268 struct btree_path *path = btree_iter_path(trans, &iter);
269 if (unlikely(!btree_path_node(path, k->level))) {
270 bch2_trans_iter_exit(trans, &iter);
271 bch2_trans_node_iter_init(trans, &iter, k->btree_id, k->k->k.p,
272 BTREE_MAX_DEPTH, 0, iter_flags);
273 ret = bch2_btree_iter_traverse(trans, &iter) ?:
274 bch2_btree_increase_depth(trans, iter.path, 0) ?:
275 -BCH_ERR_transaction_restart_nested;
276 goto out;
277 }
278
279 /* Must be checked with btree locked: */
280 if (k->overwritten)
281 goto out;
282
283 if (k->k->k.type == KEY_TYPE_accounting) {
284 ret = bch2_trans_update_buffered(trans, BTREE_ID_accounting, k->k);
285 goto out;
286 }
287
288 ret = bch2_trans_update(trans, &iter, k->k, update_flags);
289 out:
290 bch2_trans_iter_exit(trans, &iter);
291 return ret;
292 }
293
journal_sort_seq_cmp(const void * _l,const void * _r)294 static int journal_sort_seq_cmp(const void *_l, const void *_r)
295 {
296 const struct journal_key *l = *((const struct journal_key **)_l);
297 const struct journal_key *r = *((const struct journal_key **)_r);
298
299 /*
300 * Map 0 to U64_MAX, so that keys with journal_seq === 0 come last
301 *
302 * journal_seq == 0 means that the key comes from early repair, and
303 * should be inserted last so as to avoid overflowing the journal
304 */
305 return cmp_int(l->journal_seq - 1, r->journal_seq - 1);
306 }
307
bch2_journal_replay(struct bch_fs * c)308 int bch2_journal_replay(struct bch_fs *c)
309 {
310 struct journal_keys *keys = &c->journal_keys;
311 DARRAY(struct journal_key *) keys_sorted = { 0 };
312 struct journal *j = &c->journal;
313 u64 start_seq = c->journal_replay_seq_start;
314 u64 end_seq = c->journal_replay_seq_start;
315 struct btree_trans *trans = NULL;
316 bool immediate_flush = false;
317 int ret = 0;
318
319 if (keys->nr) {
320 ret = bch2_journal_log_msg(c, "Starting journal replay (%zu keys in entries %llu-%llu)",
321 keys->nr, start_seq, end_seq);
322 if (ret)
323 goto err;
324 }
325
326 BUG_ON(!atomic_read(&keys->ref));
327
328 move_gap(keys, keys->nr);
329 trans = bch2_trans_get(c);
330
331 /*
332 * Replay accounting keys first: we can't allow the write buffer to
333 * flush accounting keys until we're done
334 */
335 darray_for_each(*keys, k) {
336 if (!(k->k->k.type == KEY_TYPE_accounting && !k->allocated))
337 continue;
338
339 cond_resched();
340
341 ret = commit_do(trans, NULL, NULL,
342 BCH_TRANS_COMMIT_no_enospc|
343 BCH_TRANS_COMMIT_journal_reclaim|
344 BCH_TRANS_COMMIT_skip_accounting_apply|
345 BCH_TRANS_COMMIT_no_journal_res|
346 BCH_WATERMARK_reclaim,
347 bch2_journal_replay_accounting_key(trans, k));
348 if (bch2_fs_fatal_err_on(ret, c, "error replaying accounting; %s", bch2_err_str(ret)))
349 goto err;
350
351 k->overwritten = true;
352 }
353
354 set_bit(BCH_FS_accounting_replay_done, &c->flags);
355
356 /*
357 * First, attempt to replay keys in sorted order. This is more
358 * efficient - better locality of btree access - but some might fail if
359 * that would cause a journal deadlock.
360 */
361 darray_for_each(*keys, k) {
362 cond_resched();
363
364 /*
365 * k->allocated means the key wasn't read in from the journal,
366 * rather it was from early repair code
367 */
368 if (k->allocated)
369 immediate_flush = true;
370
371 /* Skip fastpath if we're low on space in the journal */
372 ret = c->journal.watermark ? -1 :
373 commit_do(trans, NULL, NULL,
374 BCH_TRANS_COMMIT_no_enospc|
375 BCH_TRANS_COMMIT_journal_reclaim|
376 BCH_TRANS_COMMIT_skip_accounting_apply|
377 (!k->allocated ? BCH_TRANS_COMMIT_no_journal_res : 0),
378 bch2_journal_replay_key(trans, k));
379 BUG_ON(!ret && !k->overwritten && k->k->k.type != KEY_TYPE_accounting);
380 if (ret) {
381 ret = darray_push(&keys_sorted, k);
382 if (ret)
383 goto err;
384 }
385 }
386
387 bch2_trans_unlock_long(trans);
388 /*
389 * Now, replay any remaining keys in the order in which they appear in
390 * the journal, unpinning those journal entries as we go:
391 */
392 sort(keys_sorted.data, keys_sorted.nr,
393 sizeof(keys_sorted.data[0]),
394 journal_sort_seq_cmp, NULL);
395
396 darray_for_each(keys_sorted, kp) {
397 cond_resched();
398
399 struct journal_key *k = *kp;
400
401 if (k->journal_seq)
402 replay_now_at(j, k->journal_seq);
403 else
404 replay_now_at(j, j->replay_journal_seq_end);
405
406 ret = commit_do(trans, NULL, NULL,
407 BCH_TRANS_COMMIT_no_enospc|
408 BCH_TRANS_COMMIT_skip_accounting_apply|
409 (!k->allocated
410 ? BCH_TRANS_COMMIT_no_journal_res|BCH_WATERMARK_reclaim
411 : 0),
412 bch2_journal_replay_key(trans, k));
413 if (ret) {
414 struct printbuf buf = PRINTBUF;
415 bch2_btree_id_level_to_text(&buf, k->btree_id, k->level);
416 bch_err_msg(c, ret, "while replaying key at %s:", buf.buf);
417 printbuf_exit(&buf);
418 goto err;
419 }
420
421 BUG_ON(k->btree_id != BTREE_ID_accounting && !k->overwritten);
422 }
423
424 /*
425 * We need to put our btree_trans before calling flush_all_pins(), since
426 * that will use a btree_trans internally
427 */
428 bch2_trans_put(trans);
429 trans = NULL;
430
431 if (!c->opts.retain_recovery_info &&
432 c->recovery_pass_done >= BCH_RECOVERY_PASS_journal_replay)
433 bch2_journal_keys_put_initial(c);
434
435 replay_now_at(j, j->replay_journal_seq_end);
436 j->replay_journal_seq = 0;
437
438 bch2_journal_set_replay_done(j);
439
440 /* if we did any repair, flush it immediately */
441 if (immediate_flush) {
442 bch2_journal_flush_all_pins(&c->journal);
443 ret = bch2_journal_meta(&c->journal);
444 }
445
446 if (keys->nr)
447 bch2_journal_log_msg(c, "journal replay finished");
448 err:
449 if (trans)
450 bch2_trans_put(trans);
451 darray_exit(&keys_sorted);
452 bch_err_fn(c, ret);
453 return ret;
454 }
455
456 /* journal replay early: */
457
journal_replay_entry_early(struct bch_fs * c,struct jset_entry * entry)458 static int journal_replay_entry_early(struct bch_fs *c,
459 struct jset_entry *entry)
460 {
461 int ret = 0;
462
463 switch (entry->type) {
464 case BCH_JSET_ENTRY_btree_root: {
465
466 if (unlikely(!entry->u64s))
467 return 0;
468
469 if (fsck_err_on(entry->btree_id >= BTREE_ID_NR_MAX,
470 c, invalid_btree_id,
471 "invalid btree id %u (max %u)",
472 entry->btree_id, BTREE_ID_NR_MAX))
473 return 0;
474
475 while (entry->btree_id >= c->btree_roots_extra.nr + BTREE_ID_NR) {
476 ret = darray_push(&c->btree_roots_extra, (struct btree_root) { NULL });
477 if (ret)
478 return ret;
479 }
480
481 struct btree_root *r = bch2_btree_id_root(c, entry->btree_id);
482
483 r->level = entry->level;
484 bkey_copy(&r->key, (struct bkey_i *) entry->start);
485 r->error = 0;
486 r->alive = true;
487 break;
488 }
489 case BCH_JSET_ENTRY_usage: {
490 struct jset_entry_usage *u =
491 container_of(entry, struct jset_entry_usage, entry);
492
493 switch (entry->btree_id) {
494 case BCH_FS_USAGE_key_version:
495 atomic64_set(&c->key_version, le64_to_cpu(u->v));
496 break;
497 }
498 break;
499 }
500 case BCH_JSET_ENTRY_blacklist: {
501 struct jset_entry_blacklist *bl_entry =
502 container_of(entry, struct jset_entry_blacklist, entry);
503
504 ret = bch2_journal_seq_blacklist_add(c,
505 le64_to_cpu(bl_entry->seq),
506 le64_to_cpu(bl_entry->seq) + 1);
507 break;
508 }
509 case BCH_JSET_ENTRY_blacklist_v2: {
510 struct jset_entry_blacklist_v2 *bl_entry =
511 container_of(entry, struct jset_entry_blacklist_v2, entry);
512
513 ret = bch2_journal_seq_blacklist_add(c,
514 le64_to_cpu(bl_entry->start),
515 le64_to_cpu(bl_entry->end) + 1);
516 break;
517 }
518 case BCH_JSET_ENTRY_clock: {
519 struct jset_entry_clock *clock =
520 container_of(entry, struct jset_entry_clock, entry);
521
522 atomic64_set(&c->io_clock[clock->rw].now, le64_to_cpu(clock->time));
523 }
524 }
525 fsck_err:
526 return ret;
527 }
528
journal_replay_early(struct bch_fs * c,struct bch_sb_field_clean * clean)529 static int journal_replay_early(struct bch_fs *c,
530 struct bch_sb_field_clean *clean)
531 {
532 if (clean) {
533 for (struct jset_entry *entry = clean->start;
534 entry != vstruct_end(&clean->field);
535 entry = vstruct_next(entry)) {
536 int ret = journal_replay_entry_early(c, entry);
537 if (ret)
538 return ret;
539 }
540 } else {
541 struct genradix_iter iter;
542 struct journal_replay *i, **_i;
543
544 genradix_for_each(&c->journal_entries, iter, _i) {
545 i = *_i;
546
547 if (journal_replay_ignore(i))
548 continue;
549
550 vstruct_for_each(&i->j, entry) {
551 int ret = journal_replay_entry_early(c, entry);
552 if (ret)
553 return ret;
554 }
555 }
556 }
557
558 return 0;
559 }
560
561 /* sb clean section: */
562
read_btree_roots(struct bch_fs * c)563 static int read_btree_roots(struct bch_fs *c)
564 {
565 struct printbuf buf = PRINTBUF;
566 int ret = 0;
567
568 for (unsigned i = 0; i < btree_id_nr_alive(c); i++) {
569 struct btree_root *r = bch2_btree_id_root(c, i);
570
571 if (!r->alive)
572 continue;
573
574 printbuf_reset(&buf);
575 bch2_btree_id_level_to_text(&buf, i, r->level);
576
577 if (mustfix_fsck_err_on((ret = r->error),
578 c, btree_root_bkey_invalid,
579 "invalid btree root %s",
580 buf.buf) ||
581 mustfix_fsck_err_on((ret = r->error = bch2_btree_root_read(c, i, &r->key, r->level)),
582 c, btree_root_read_error,
583 "error reading btree root %s: %s",
584 buf.buf, bch2_err_str(ret))) {
585 if (btree_id_is_alloc(i))
586 r->error = 0;
587
588 ret = bch2_btree_lost_data(c, i);
589 BUG_ON(ret);
590 }
591 }
592
593 for (unsigned i = 0; i < BTREE_ID_NR; i++) {
594 struct btree_root *r = bch2_btree_id_root(c, i);
595
596 if (!r->b && !r->error) {
597 r->alive = false;
598 r->level = 0;
599 bch2_btree_root_alloc_fake(c, i, 0);
600 }
601 }
602 fsck_err:
603 printbuf_exit(&buf);
604 return ret;
605 }
606
check_version_upgrade(struct bch_fs * c)607 static bool check_version_upgrade(struct bch_fs *c)
608 {
609 unsigned latest_version = bcachefs_metadata_version_current;
610 unsigned latest_compatible = min(latest_version,
611 bch2_latest_compatible_version(c->sb.version));
612 unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
613 unsigned new_version = 0;
614 bool ret = false;
615
616 if (old_version < bcachefs_metadata_required_upgrade_below) {
617 if (c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible ||
618 latest_compatible < bcachefs_metadata_required_upgrade_below)
619 new_version = latest_version;
620 else
621 new_version = latest_compatible;
622 } else {
623 switch (c->opts.version_upgrade) {
624 case BCH_VERSION_UPGRADE_compatible:
625 new_version = latest_compatible;
626 break;
627 case BCH_VERSION_UPGRADE_incompatible:
628 new_version = latest_version;
629 break;
630 case BCH_VERSION_UPGRADE_none:
631 new_version = min(old_version, latest_version);
632 break;
633 }
634 }
635
636 if (new_version > old_version) {
637 struct printbuf buf = PRINTBUF;
638
639 if (old_version < bcachefs_metadata_required_upgrade_below)
640 prt_str(&buf, "Version upgrade required:\n");
641
642 if (old_version != c->sb.version) {
643 prt_str(&buf, "Version upgrade from ");
644 bch2_version_to_text(&buf, c->sb.version_upgrade_complete);
645 prt_str(&buf, " to ");
646 bch2_version_to_text(&buf, c->sb.version);
647 prt_str(&buf, " incomplete\n");
648 }
649
650 prt_printf(&buf, "Doing %s version upgrade from ",
651 BCH_VERSION_MAJOR(old_version) != BCH_VERSION_MAJOR(new_version)
652 ? "incompatible" : "compatible");
653 bch2_version_to_text(&buf, old_version);
654 prt_str(&buf, " to ");
655 bch2_version_to_text(&buf, new_version);
656 prt_newline(&buf);
657
658 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
659 __le64 passes = ext->recovery_passes_required[0];
660 bch2_sb_set_upgrade(c, old_version, new_version);
661 passes = ext->recovery_passes_required[0] & ~passes;
662
663 if (passes) {
664 prt_str(&buf, " running recovery passes: ");
665 prt_bitflags(&buf, bch2_recovery_passes,
666 bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
667 }
668
669 bch_info(c, "%s", buf.buf);
670 printbuf_exit(&buf);
671
672 ret = true;
673 }
674
675 if (new_version > c->sb.version_incompat &&
676 c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible) {
677 struct printbuf buf = PRINTBUF;
678
679 prt_str(&buf, "Now allowing incompatible features up to ");
680 bch2_version_to_text(&buf, new_version);
681 prt_str(&buf, ", previously allowed up to ");
682 bch2_version_to_text(&buf, c->sb.version_incompat_allowed);
683 prt_newline(&buf);
684
685 bch_info(c, "%s", buf.buf);
686 printbuf_exit(&buf);
687
688 ret = true;
689 }
690
691 if (ret)
692 bch2_sb_upgrade(c, new_version,
693 c->opts.version_upgrade == BCH_VERSION_UPGRADE_incompatible);
694
695 return ret;
696 }
697
bch2_fs_recovery(struct bch_fs * c)698 int bch2_fs_recovery(struct bch_fs *c)
699 {
700 struct bch_sb_field_clean *clean = NULL;
701 struct jset *last_journal_entry = NULL;
702 u64 last_seq = 0, blacklist_seq, journal_seq;
703 int ret = 0;
704
705 if (c->sb.clean) {
706 clean = bch2_read_superblock_clean(c);
707 ret = PTR_ERR_OR_ZERO(clean);
708 if (ret)
709 goto err;
710
711 bch_info(c, "recovering from clean shutdown, journal seq %llu",
712 le64_to_cpu(clean->journal_seq));
713 } else {
714 bch_info(c, "recovering from unclean shutdown");
715 }
716
717 if (!(c->sb.features & (1ULL << BCH_FEATURE_new_extent_overwrite))) {
718 bch_err(c, "feature new_extent_overwrite not set, filesystem no longer supported");
719 ret = -EINVAL;
720 goto err;
721 }
722
723 if (!c->sb.clean &&
724 !(c->sb.features & (1ULL << BCH_FEATURE_extents_above_btree_updates))) {
725 bch_err(c, "filesystem needs recovery from older version; run fsck from older bcachefs-tools to fix");
726 ret = -EINVAL;
727 goto err;
728 }
729
730 if (c->opts.norecovery) {
731 c->opts.recovery_pass_last = c->opts.recovery_pass_last
732 ? min(c->opts.recovery_pass_last, BCH_RECOVERY_PASS_snapshots_read)
733 : BCH_RECOVERY_PASS_snapshots_read;
734 c->opts.nochanges = true;
735 c->opts.read_only = true;
736 }
737
738 mutex_lock(&c->sb_lock);
739 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
740 bool write_sb = false;
741
742 if (BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb)) {
743 ext->recovery_passes_required[0] |=
744 cpu_to_le64(bch2_recovery_passes_to_stable(BIT_ULL(BCH_RECOVERY_PASS_check_topology)));
745 write_sb = true;
746 }
747
748 u64 sb_passes = bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
749 if (sb_passes) {
750 struct printbuf buf = PRINTBUF;
751 prt_str(&buf, "superblock requires following recovery passes to be run:\n ");
752 prt_bitflags(&buf, bch2_recovery_passes, sb_passes);
753 bch_info(c, "%s", buf.buf);
754 printbuf_exit(&buf);
755 }
756
757 if (bch2_check_version_downgrade(c)) {
758 struct printbuf buf = PRINTBUF;
759
760 prt_str(&buf, "Version downgrade required:");
761
762 __le64 passes = ext->recovery_passes_required[0];
763 bch2_sb_set_downgrade(c,
764 BCH_VERSION_MINOR(bcachefs_metadata_version_current),
765 BCH_VERSION_MINOR(c->sb.version));
766 passes = ext->recovery_passes_required[0] & ~passes;
767 if (passes) {
768 prt_str(&buf, "\n running recovery passes: ");
769 prt_bitflags(&buf, bch2_recovery_passes,
770 bch2_recovery_passes_from_stable(le64_to_cpu(passes)));
771 }
772
773 bch_info(c, "%s", buf.buf);
774 printbuf_exit(&buf);
775 write_sb = true;
776 }
777
778 if (check_version_upgrade(c))
779 write_sb = true;
780
781 c->opts.recovery_passes |= bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0]));
782
783 if (c->sb.version_upgrade_complete < bcachefs_metadata_version_autofix_errors) {
784 SET_BCH_SB_ERROR_ACTION(c->disk_sb.sb, BCH_ON_ERROR_fix_safe);
785 write_sb = true;
786 }
787
788 if (write_sb)
789 bch2_write_super(c);
790 mutex_unlock(&c->sb_lock);
791
792 if (c->opts.fsck)
793 set_bit(BCH_FS_fsck_running, &c->flags);
794 if (c->sb.clean)
795 set_bit(BCH_FS_clean_recovery, &c->flags);
796 set_bit(BCH_FS_recovery_running, &c->flags);
797
798 ret = bch2_blacklist_table_initialize(c);
799 if (ret) {
800 bch_err(c, "error initializing blacklist table");
801 goto err;
802 }
803
804 bch2_journal_pos_from_member_info_resume(c);
805
806 if (!c->sb.clean || c->opts.retain_recovery_info) {
807 struct genradix_iter iter;
808 struct journal_replay **i;
809
810 bch_verbose(c, "starting journal read");
811 ret = bch2_journal_read(c, &last_seq, &blacklist_seq, &journal_seq);
812 if (ret)
813 goto err;
814
815 /*
816 * note: cmd_list_journal needs the blacklist table fully up to date so
817 * it can asterisk ignored journal entries:
818 */
819 if (c->opts.read_journal_only)
820 goto out;
821
822 genradix_for_each_reverse(&c->journal_entries, iter, i)
823 if (!journal_replay_ignore(*i)) {
824 last_journal_entry = &(*i)->j;
825 break;
826 }
827
828 if (mustfix_fsck_err_on(c->sb.clean &&
829 last_journal_entry &&
830 !journal_entry_empty(last_journal_entry), c,
831 clean_but_journal_not_empty,
832 "filesystem marked clean but journal not empty")) {
833 c->sb.compat &= ~(1ULL << BCH_COMPAT_alloc_info);
834 SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
835 c->sb.clean = false;
836 }
837
838 if (!last_journal_entry) {
839 fsck_err_on(!c->sb.clean, c,
840 dirty_but_no_journal_entries,
841 "no journal entries found");
842 if (clean)
843 goto use_clean;
844
845 genradix_for_each_reverse(&c->journal_entries, iter, i)
846 if (*i) {
847 last_journal_entry = &(*i)->j;
848 (*i)->ignore_blacklisted = false;
849 (*i)->ignore_not_dirty= false;
850 /*
851 * This was probably a NO_FLUSH entry,
852 * so last_seq was garbage - but we know
853 * we're only using a single journal
854 * entry, set it here:
855 */
856 (*i)->j.last_seq = (*i)->j.seq;
857 break;
858 }
859 }
860
861 ret = bch2_journal_keys_sort(c);
862 if (ret)
863 goto err;
864
865 if (c->sb.clean && last_journal_entry) {
866 ret = bch2_verify_superblock_clean(c, &clean,
867 last_journal_entry);
868 if (ret)
869 goto err;
870 }
871 } else {
872 use_clean:
873 if (!clean) {
874 bch_err(c, "no superblock clean section found");
875 ret = -BCH_ERR_fsck_repair_impossible;
876 goto err;
877
878 }
879 blacklist_seq = journal_seq = le64_to_cpu(clean->journal_seq) + 1;
880 }
881
882 c->journal_replay_seq_start = last_seq;
883 c->journal_replay_seq_end = blacklist_seq - 1;
884
885 zero_out_btree_mem_ptr(&c->journal_keys);
886
887 ret = journal_replay_early(c, clean);
888 if (ret)
889 goto err;
890
891 if (c->opts.reconstruct_alloc)
892 bch2_reconstruct_alloc(c);
893
894 /*
895 * After an unclean shutdown, skip then next few journal sequence
896 * numbers as they may have been referenced by btree writes that
897 * happened before their corresponding journal writes - those btree
898 * writes need to be ignored, by skipping and blacklisting the next few
899 * journal sequence numbers:
900 */
901 if (!c->sb.clean)
902 journal_seq += JOURNAL_BUF_NR * 4;
903
904 if (blacklist_seq != journal_seq) {
905 ret = bch2_journal_log_msg(c, "blacklisting entries %llu-%llu",
906 blacklist_seq, journal_seq) ?:
907 bch2_journal_seq_blacklist_add(c,
908 blacklist_seq, journal_seq);
909 if (ret) {
910 bch_err_msg(c, ret, "error creating new journal seq blacklist entry");
911 goto err;
912 }
913 }
914
915 ret = bch2_journal_log_msg(c, "starting journal at entry %llu, replaying %llu-%llu",
916 journal_seq, last_seq, blacklist_seq - 1) ?:
917 bch2_fs_journal_start(&c->journal, journal_seq);
918 if (ret)
919 goto err;
920
921 /*
922 * Skip past versions that might have possibly been used (as nonces),
923 * but hadn't had their pointers written:
924 */
925 if (c->sb.encryption_type && !c->sb.clean)
926 atomic64_add(1 << 16, &c->key_version);
927
928 ret = read_btree_roots(c);
929 if (ret)
930 goto err;
931
932 set_bit(BCH_FS_btree_running, &c->flags);
933
934 ret = bch2_sb_set_upgrade_extra(c);
935
936 ret = bch2_run_recovery_passes(c);
937 if (ret)
938 goto err;
939
940 /*
941 * Normally set by the appropriate recovery pass: when cleared, this
942 * indicates we're in early recovery and btree updates should be done by
943 * being applied to the journal replay keys. _Must_ be cleared before
944 * multithreaded use:
945 */
946 set_bit(BCH_FS_may_go_rw, &c->flags);
947 clear_bit(BCH_FS_fsck_running, &c->flags);
948 clear_bit(BCH_FS_recovery_running, &c->flags);
949
950 /* in case we don't run journal replay, i.e. norecovery mode */
951 set_bit(BCH_FS_accounting_replay_done, &c->flags);
952
953 bch2_async_btree_node_rewrites_flush(c);
954
955 /* fsync if we fixed errors */
956 if (test_bit(BCH_FS_errors_fixed, &c->flags)) {
957 bch2_journal_flush_all_pins(&c->journal);
958 bch2_journal_meta(&c->journal);
959 }
960
961 /* If we fixed errors, verify that fs is actually clean now: */
962 if (IS_ENABLED(CONFIG_BCACHEFS_DEBUG) &&
963 test_bit(BCH_FS_errors_fixed, &c->flags) &&
964 !test_bit(BCH_FS_errors_not_fixed, &c->flags) &&
965 !test_bit(BCH_FS_error, &c->flags)) {
966 bch2_flush_fsck_errs(c);
967
968 bch_info(c, "Fixed errors, running fsck a second time to verify fs is clean");
969 clear_bit(BCH_FS_errors_fixed, &c->flags);
970
971 c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info;
972
973 ret = bch2_run_recovery_passes(c);
974 if (ret)
975 goto err;
976
977 if (test_bit(BCH_FS_errors_fixed, &c->flags) ||
978 test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
979 bch_err(c, "Second fsck run was not clean");
980 set_bit(BCH_FS_errors_not_fixed, &c->flags);
981 }
982
983 set_bit(BCH_FS_errors_fixed, &c->flags);
984 }
985
986 if (enabled_qtypes(c)) {
987 bch_verbose(c, "reading quotas");
988 ret = bch2_fs_quota_read(c);
989 if (ret)
990 goto err;
991 bch_verbose(c, "quotas done");
992 }
993
994 mutex_lock(&c->sb_lock);
995 ext = bch2_sb_field_get(c->disk_sb.sb, ext);
996 write_sb = false;
997
998 if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) != le16_to_cpu(c->disk_sb.sb->version)) {
999 SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, le16_to_cpu(c->disk_sb.sb->version));
1000 write_sb = true;
1001 }
1002
1003 if (!test_bit(BCH_FS_error, &c->flags) &&
1004 !(c->disk_sb.sb->compat[0] & cpu_to_le64(1ULL << BCH_COMPAT_alloc_info))) {
1005 c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_alloc_info);
1006 write_sb = true;
1007 }
1008
1009 if (!test_bit(BCH_FS_error, &c->flags) &&
1010 !bch2_is_zero(ext->errors_silent, sizeof(ext->errors_silent))) {
1011 memset(ext->errors_silent, 0, sizeof(ext->errors_silent));
1012 write_sb = true;
1013 }
1014
1015 if (c->opts.fsck &&
1016 !test_bit(BCH_FS_error, &c->flags) &&
1017 c->recovery_pass_done == BCH_RECOVERY_PASS_NR - 1 &&
1018 ext->btrees_lost_data) {
1019 ext->btrees_lost_data = 0;
1020 write_sb = true;
1021 }
1022
1023 if (c->opts.fsck &&
1024 !test_bit(BCH_FS_error, &c->flags) &&
1025 !test_bit(BCH_FS_errors_not_fixed, &c->flags)) {
1026 SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 0);
1027 SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 0);
1028 write_sb = true;
1029 }
1030
1031 if (bch2_blacklist_entries_gc(c))
1032 write_sb = true;
1033
1034 if (write_sb)
1035 bch2_write_super(c);
1036 mutex_unlock(&c->sb_lock);
1037
1038 if (!(c->sb.compat & (1ULL << BCH_COMPAT_extents_above_btree_updates_done)) ||
1039 c->sb.version_min < bcachefs_metadata_version_btree_ptr_sectors_written) {
1040 struct bch_move_stats stats;
1041
1042 bch2_move_stats_init(&stats, "recovery");
1043
1044 struct printbuf buf = PRINTBUF;
1045 bch2_version_to_text(&buf, c->sb.version_min);
1046 bch_info(c, "scanning for old btree nodes: min_version %s", buf.buf);
1047 printbuf_exit(&buf);
1048
1049 ret = bch2_fs_read_write_early(c) ?:
1050 bch2_scan_old_btree_nodes(c, &stats);
1051 if (ret)
1052 goto err;
1053 bch_info(c, "scanning for old btree nodes done");
1054 }
1055
1056 ret = 0;
1057 out:
1058 bch2_flush_fsck_errs(c);
1059
1060 if (!c->opts.retain_recovery_info) {
1061 bch2_journal_keys_put_initial(c);
1062 bch2_find_btree_nodes_exit(&c->found_btree_nodes);
1063 }
1064 if (!IS_ERR(clean))
1065 kfree(clean);
1066
1067 if (!ret &&
1068 test_bit(BCH_FS_need_delete_dead_snapshots, &c->flags) &&
1069 !c->opts.nochanges) {
1070 bch2_fs_read_write_early(c);
1071 bch2_delete_dead_snapshots_async(c);
1072 }
1073
1074 bch_err_fn(c, ret);
1075 return ret;
1076 err:
1077 fsck_err:
1078 bch2_fs_emergency_read_only(c);
1079 goto out;
1080 }
1081
bch2_fs_initialize(struct bch_fs * c)1082 int bch2_fs_initialize(struct bch_fs *c)
1083 {
1084 struct bch_inode_unpacked root_inode, lostfound_inode;
1085 struct bkey_inode_buf packed_inode;
1086 struct qstr lostfound = QSTR("lost+found");
1087 struct bch_member *m;
1088 int ret;
1089
1090 bch_notice(c, "initializing new filesystem");
1091 set_bit(BCH_FS_new_fs, &c->flags);
1092
1093 mutex_lock(&c->sb_lock);
1094 c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_extents_above_btree_updates_done);
1095 c->disk_sb.sb->compat[0] |= cpu_to_le64(1ULL << BCH_COMPAT_bformat_overflow_done);
1096
1097 bch2_check_version_downgrade(c);
1098
1099 if (c->opts.version_upgrade != BCH_VERSION_UPGRADE_none) {
1100 bch2_sb_upgrade(c, bcachefs_metadata_version_current, false);
1101 SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current);
1102 bch2_write_super(c);
1103 }
1104
1105 for_each_member_device(c, ca) {
1106 m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
1107 SET_BCH_MEMBER_FREESPACE_INITIALIZED(m, false);
1108 ca->mi = bch2_mi_to_cpu(m);
1109 }
1110
1111 bch2_write_super(c);
1112 mutex_unlock(&c->sb_lock);
1113
1114 set_bit(BCH_FS_btree_running, &c->flags);
1115 set_bit(BCH_FS_may_go_rw, &c->flags);
1116
1117 for (unsigned i = 0; i < BTREE_ID_NR; i++)
1118 bch2_btree_root_alloc_fake(c, i, 0);
1119
1120 ret = bch2_fs_journal_alloc(c);
1121 if (ret)
1122 goto err;
1123
1124 /*
1125 * journal_res_get() will crash if called before this has
1126 * set up the journal.pin FIFO and journal.cur pointer:
1127 */
1128 bch2_fs_journal_start(&c->journal, 1);
1129 set_bit(BCH_FS_accounting_replay_done, &c->flags);
1130 bch2_journal_set_replay_done(&c->journal);
1131
1132 ret = bch2_fs_read_write_early(c);
1133 if (ret)
1134 goto err;
1135
1136 for_each_member_device(c, ca) {
1137 ret = bch2_dev_usage_init(ca, false);
1138 if (ret) {
1139 bch2_dev_put(ca);
1140 goto err;
1141 }
1142 }
1143
1144 /*
1145 * Write out the superblock and journal buckets, now that we can do
1146 * btree updates
1147 */
1148 bch_verbose(c, "marking superblocks");
1149 ret = bch2_trans_mark_dev_sbs(c);
1150 bch_err_msg(c, ret, "marking superblocks");
1151 if (ret)
1152 goto err;
1153
1154 ret = bch2_fs_freespace_init(c);
1155 if (ret)
1156 goto err;
1157
1158 ret = bch2_initialize_subvolumes(c);
1159 if (ret)
1160 goto err;
1161
1162 bch_verbose(c, "reading snapshots table");
1163 ret = bch2_snapshots_read(c);
1164 if (ret)
1165 goto err;
1166 bch_verbose(c, "reading snapshots done");
1167
1168 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 0, NULL);
1169 root_inode.bi_inum = BCACHEFS_ROOT_INO;
1170 root_inode.bi_subvol = BCACHEFS_ROOT_SUBVOL;
1171 bch2_inode_pack(&packed_inode, &root_inode);
1172 packed_inode.inode.k.p.snapshot = U32_MAX;
1173
1174 ret = bch2_btree_insert(c, BTREE_ID_inodes, &packed_inode.inode.k_i, NULL, 0, 0);
1175 bch_err_msg(c, ret, "creating root directory");
1176 if (ret)
1177 goto err;
1178
1179 bch2_inode_init_early(c, &lostfound_inode);
1180
1181 ret = bch2_trans_commit_do(c, NULL, NULL, 0,
1182 bch2_create_trans(trans,
1183 BCACHEFS_ROOT_SUBVOL_INUM,
1184 &root_inode, &lostfound_inode,
1185 &lostfound,
1186 0, 0, S_IFDIR|0700, 0,
1187 NULL, NULL, (subvol_inum) { 0 }, 0));
1188 bch_err_msg(c, ret, "creating lost+found");
1189 if (ret)
1190 goto err;
1191
1192 c->recovery_pass_done = BCH_RECOVERY_PASS_NR - 1;
1193
1194 if (enabled_qtypes(c)) {
1195 ret = bch2_fs_quota_read(c);
1196 if (ret)
1197 goto err;
1198 }
1199
1200 ret = bch2_journal_flush(&c->journal);
1201 bch_err_msg(c, ret, "writing first journal entry");
1202 if (ret)
1203 goto err;
1204
1205 mutex_lock(&c->sb_lock);
1206 SET_BCH_SB_INITIALIZED(c->disk_sb.sb, true);
1207 SET_BCH_SB_CLEAN(c->disk_sb.sb, false);
1208
1209 bch2_write_super(c);
1210 mutex_unlock(&c->sb_lock);
1211
1212 c->curr_recovery_pass = BCH_RECOVERY_PASS_NR;
1213 return 0;
1214 err:
1215 bch_err_fn(c, ret);
1216 return ret;
1217 }
1218