xref: /linux/fs/bcachefs/recovery_passes.c (revision 4abcd80f23357808b0444d261ed08e5a77dbaa9a)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "alloc_background.h"
5 #include "backpointers.h"
6 #include "btree_gc.h"
7 #include "btree_node_scan.h"
8 #include "disk_accounting.h"
9 #include "ec.h"
10 #include "fsck.h"
11 #include "inode.h"
12 #include "journal.h"
13 #include "lru.h"
14 #include "logged_ops.h"
15 #include "rebalance.h"
16 #include "recovery.h"
17 #include "recovery_passes.h"
18 #include "snapshot.h"
19 #include "subvolume.h"
20 #include "super.h"
21 #include "super-io.h"
22 
23 const char * const bch2_recovery_passes[] = {
24 #define x(_fn, ...)	#_fn,
25 	BCH_RECOVERY_PASSES()
26 #undef x
27 	NULL
28 };
29 
30 /* Fake recovery pass, so that scan_for_btree_nodes isn't 0: */
bch2_recovery_pass_empty(struct bch_fs * c)31 static int bch2_recovery_pass_empty(struct bch_fs *c)
32 {
33 	return 0;
34 }
35 
bch2_set_may_go_rw(struct bch_fs * c)36 static int bch2_set_may_go_rw(struct bch_fs *c)
37 {
38 	struct journal_keys *keys = &c->journal_keys;
39 
40 	/*
41 	 * After we go RW, the journal keys buffer can't be modified (except for
42 	 * setting journal_key->overwritten: it will be accessed by multiple
43 	 * threads
44 	 */
45 	move_gap(keys, keys->nr);
46 
47 	set_bit(BCH_FS_may_go_rw, &c->flags);
48 
49 	if (keys->nr || c->opts.fsck || !c->sb.clean || c->opts.recovery_passes)
50 		return bch2_fs_read_write_early(c);
51 	return 0;
52 }
53 
54 struct recovery_pass_fn {
55 	int		(*fn)(struct bch_fs *);
56 	unsigned	when;
57 };
58 
59 static struct recovery_pass_fn recovery_pass_fns[] = {
60 #define x(_fn, _id, _when)	{ .fn = bch2_##_fn, .when = _when },
61 	BCH_RECOVERY_PASSES()
62 #undef x
63 };
64 
65 static const u8 passes_to_stable_map[] = {
66 #define x(n, id, ...)	[BCH_RECOVERY_PASS_##n] = BCH_RECOVERY_PASS_STABLE_##n,
67 	BCH_RECOVERY_PASSES()
68 #undef x
69 };
70 
bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)71 static enum bch_recovery_pass_stable bch2_recovery_pass_to_stable(enum bch_recovery_pass pass)
72 {
73 	return passes_to_stable_map[pass];
74 }
75 
bch2_recovery_passes_to_stable(u64 v)76 u64 bch2_recovery_passes_to_stable(u64 v)
77 {
78 	u64 ret = 0;
79 	for (unsigned i = 0; i < ARRAY_SIZE(passes_to_stable_map); i++)
80 		if (v & BIT_ULL(i))
81 			ret |= BIT_ULL(passes_to_stable_map[i]);
82 	return ret;
83 }
84 
bch2_recovery_passes_from_stable(u64 v)85 u64 bch2_recovery_passes_from_stable(u64 v)
86 {
87 	static const u8 map[] = {
88 #define x(n, id, ...)	[BCH_RECOVERY_PASS_STABLE_##n] = BCH_RECOVERY_PASS_##n,
89 	BCH_RECOVERY_PASSES()
90 #undef x
91 	};
92 
93 	u64 ret = 0;
94 	for (unsigned i = 0; i < ARRAY_SIZE(map); i++)
95 		if (v & BIT_ULL(i))
96 			ret |= BIT_ULL(map[i]);
97 	return ret;
98 }
99 
100 /*
101  * For when we need to rewind recovery passes and run a pass we skipped:
102  */
bch2_run_explicit_recovery_pass(struct bch_fs * c,enum bch_recovery_pass pass)103 int bch2_run_explicit_recovery_pass(struct bch_fs *c,
104 				    enum bch_recovery_pass pass)
105 {
106 	if (c->opts.recovery_passes & BIT_ULL(pass))
107 		return 0;
108 
109 	bch_info(c, "running explicit recovery pass %s (%u), currently at %s (%u)",
110 		 bch2_recovery_passes[pass], pass,
111 		 bch2_recovery_passes[c->curr_recovery_pass], c->curr_recovery_pass);
112 
113 	c->opts.recovery_passes |= BIT_ULL(pass);
114 
115 	if (c->curr_recovery_pass >= pass) {
116 		c->curr_recovery_pass = pass;
117 		c->recovery_passes_complete &= (1ULL << pass) >> 1;
118 		return -BCH_ERR_restart_recovery;
119 	} else {
120 		return 0;
121 	}
122 }
123 
bch2_run_explicit_recovery_pass_persistent(struct bch_fs * c,enum bch_recovery_pass pass)124 int bch2_run_explicit_recovery_pass_persistent(struct bch_fs *c,
125 					       enum bch_recovery_pass pass)
126 {
127 	enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
128 
129 	mutex_lock(&c->sb_lock);
130 	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
131 
132 	if (!test_bit_le64(s, ext->recovery_passes_required)) {
133 		__set_bit_le64(s, ext->recovery_passes_required);
134 		bch2_write_super(c);
135 	}
136 	mutex_unlock(&c->sb_lock);
137 
138 	return bch2_run_explicit_recovery_pass(c, pass);
139 }
140 
bch2_clear_recovery_pass_required(struct bch_fs * c,enum bch_recovery_pass pass)141 static void bch2_clear_recovery_pass_required(struct bch_fs *c,
142 					      enum bch_recovery_pass pass)
143 {
144 	enum bch_recovery_pass_stable s = bch2_recovery_pass_to_stable(pass);
145 
146 	mutex_lock(&c->sb_lock);
147 	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
148 
149 	if (test_bit_le64(s, ext->recovery_passes_required)) {
150 		__clear_bit_le64(s, ext->recovery_passes_required);
151 		bch2_write_super(c);
152 	}
153 	mutex_unlock(&c->sb_lock);
154 }
155 
bch2_fsck_recovery_passes(void)156 u64 bch2_fsck_recovery_passes(void)
157 {
158 	u64 ret = 0;
159 
160 	for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++)
161 		if (recovery_pass_fns[i].when & PASS_FSCK)
162 			ret |= BIT_ULL(i);
163 	return ret;
164 }
165 
should_run_recovery_pass(struct bch_fs * c,enum bch_recovery_pass pass)166 static bool should_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
167 {
168 	struct recovery_pass_fn *p = recovery_pass_fns + pass;
169 
170 	if (c->opts.recovery_passes_exclude & BIT_ULL(pass))
171 		return false;
172 	if (c->opts.recovery_passes & BIT_ULL(pass))
173 		return true;
174 	if ((p->when & PASS_FSCK) && c->opts.fsck)
175 		return true;
176 	if ((p->when & PASS_UNCLEAN) && !c->sb.clean)
177 		return true;
178 	if (p->when & PASS_ALWAYS)
179 		return true;
180 	return false;
181 }
182 
bch2_run_recovery_pass(struct bch_fs * c,enum bch_recovery_pass pass)183 static int bch2_run_recovery_pass(struct bch_fs *c, enum bch_recovery_pass pass)
184 {
185 	struct recovery_pass_fn *p = recovery_pass_fns + pass;
186 	int ret;
187 
188 	if (!(p->when & PASS_SILENT))
189 		bch2_print(c, KERN_INFO bch2_log_msg(c, "%s..."),
190 			   bch2_recovery_passes[pass]);
191 	ret = p->fn(c);
192 	if (ret)
193 		return ret;
194 	if (!(p->when & PASS_SILENT))
195 		bch2_print(c, KERN_CONT " done\n");
196 
197 	return 0;
198 }
199 
bch2_run_online_recovery_passes(struct bch_fs * c)200 int bch2_run_online_recovery_passes(struct bch_fs *c)
201 {
202 	int ret = 0;
203 
204 	down_read(&c->state_lock);
205 
206 	for (unsigned i = 0; i < ARRAY_SIZE(recovery_pass_fns); i++) {
207 		struct recovery_pass_fn *p = recovery_pass_fns + i;
208 
209 		if (!(p->when & PASS_ONLINE))
210 			continue;
211 
212 		ret = bch2_run_recovery_pass(c, i);
213 		if (bch2_err_matches(ret, BCH_ERR_restart_recovery)) {
214 			i = c->curr_recovery_pass;
215 			continue;
216 		}
217 		if (ret)
218 			break;
219 	}
220 
221 	up_read(&c->state_lock);
222 
223 	return ret;
224 }
225 
bch2_run_recovery_passes(struct bch_fs * c)226 int bch2_run_recovery_passes(struct bch_fs *c)
227 {
228 	int ret = 0;
229 
230 	/*
231 	 * We can't allow set_may_go_rw to be excluded; that would cause us to
232 	 * use the journal replay keys for updates where it's not expected.
233 	 */
234 	c->opts.recovery_passes_exclude &= ~BCH_RECOVERY_PASS_set_may_go_rw;
235 
236 	while (c->curr_recovery_pass < ARRAY_SIZE(recovery_pass_fns)) {
237 		if (c->opts.recovery_pass_last &&
238 		    c->curr_recovery_pass > c->opts.recovery_pass_last)
239 			break;
240 
241 		if (should_run_recovery_pass(c, c->curr_recovery_pass)) {
242 			unsigned pass = c->curr_recovery_pass;
243 
244 			ret =   bch2_run_recovery_pass(c, c->curr_recovery_pass) ?:
245 				bch2_journal_flush(&c->journal);
246 			if (bch2_err_matches(ret, BCH_ERR_restart_recovery) ||
247 			    (ret && c->curr_recovery_pass < pass))
248 				continue;
249 			if (ret)
250 				break;
251 
252 			c->recovery_passes_complete |= BIT_ULL(c->curr_recovery_pass);
253 		}
254 
255 		c->recovery_pass_done = max(c->recovery_pass_done, c->curr_recovery_pass);
256 
257 		if (!test_bit(BCH_FS_error, &c->flags))
258 			bch2_clear_recovery_pass_required(c, c->curr_recovery_pass);
259 
260 		c->curr_recovery_pass++;
261 	}
262 
263 	return ret;
264 }
265