xref: /linux/fs/bcachefs/sb-downgrade.c (revision a9aaf1ff88a8cb99a1335c9eb76de637f0cf8c10)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Superblock section that contains a list of recovery passes to run when
5  * downgrading past a given version
6  */
7 
8 #include "bcachefs.h"
9 #include "darray.h"
10 #include "recovery_passes.h"
11 #include "sb-downgrade.h"
12 #include "sb-errors.h"
13 #include "super-io.h"
14 
15 #define RECOVERY_PASS_ALL_FSCK		BIT_ULL(63)
16 
17 /*
18  * Upgrade, downgrade tables - run certain recovery passes, fix certain errors
19  *
20  * x(version, recovery_passes, errors...)
21  */
22 #define UPGRADE_TABLE()						\
23 	x(backpointers,						\
24 	  RECOVERY_PASS_ALL_FSCK)				\
25 	x(inode_v3,						\
26 	  RECOVERY_PASS_ALL_FSCK)				\
27 	x(unwritten_extents,					\
28 	  RECOVERY_PASS_ALL_FSCK)				\
29 	x(bucket_gens,						\
30 	  BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)|		\
31 	  RECOVERY_PASS_ALL_FSCK)				\
32 	x(lru_v2,						\
33 	  RECOVERY_PASS_ALL_FSCK)				\
34 	x(fragmentation_lru,					\
35 	  RECOVERY_PASS_ALL_FSCK)				\
36 	x(no_bps_in_alloc_keys,					\
37 	  RECOVERY_PASS_ALL_FSCK)				\
38 	x(snapshot_trees,					\
39 	  RECOVERY_PASS_ALL_FSCK)				\
40 	x(snapshot_skiplists,					\
41 	  BIT_ULL(BCH_RECOVERY_PASS_check_snapshots),		\
42 	  BCH_FSCK_ERR_snapshot_bad_depth,			\
43 	  BCH_FSCK_ERR_snapshot_bad_skiplist)			\
44 	x(deleted_inodes,					\
45 	  BIT_ULL(BCH_RECOVERY_PASS_check_inodes),		\
46 	  BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list)	\
47 	x(rebalance_work,					\
48 	  BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))	\
49 	x(subvolume_fs_parent,					\
50 	  BIT_ULL(BCH_RECOVERY_PASS_check_dirents),		\
51 	  BCH_FSCK_ERR_subvol_fs_path_parent_wrong)		\
52 	x(btree_subvolume_children,				\
53 	  BIT_ULL(BCH_RECOVERY_PASS_check_subvols),		\
54 	  BCH_FSCK_ERR_subvol_children_not_set)			\
55 	x(mi_btree_bitmap,					\
56 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
57 	  BCH_FSCK_ERR_btree_bitmap_not_marked)			\
58 	x(disk_accounting_v2,					\
59 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
60 	  BCH_FSCK_ERR_bkey_version_in_future,			\
61 	  BCH_FSCK_ERR_dev_usage_buckets_wrong,			\
62 	  BCH_FSCK_ERR_dev_usage_sectors_wrong,			\
63 	  BCH_FSCK_ERR_dev_usage_fragmented_wrong,		\
64 	  BCH_FSCK_ERR_accounting_mismatch)
65 
66 #define DOWNGRADE_TABLE()					\
67 	x(bucket_stripe_sectors,				\
68 	  0)							\
69 	x(disk_accounting_v2,					\
70 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
71 	  BCH_FSCK_ERR_dev_usage_buckets_wrong,			\
72 	  BCH_FSCK_ERR_dev_usage_sectors_wrong,			\
73 	  BCH_FSCK_ERR_dev_usage_fragmented_wrong,		\
74 	  BCH_FSCK_ERR_fs_usage_hidden_wrong,			\
75 	  BCH_FSCK_ERR_fs_usage_btree_wrong,			\
76 	  BCH_FSCK_ERR_fs_usage_data_wrong,			\
77 	  BCH_FSCK_ERR_fs_usage_cached_wrong,			\
78 	  BCH_FSCK_ERR_fs_usage_reserved_wrong,			\
79 	  BCH_FSCK_ERR_fs_usage_nr_inodes_wrong,		\
80 	  BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong,	\
81 	  BCH_FSCK_ERR_fs_usage_replicas_wrong,			\
82 	  BCH_FSCK_ERR_bkey_version_in_future)
83 
84 struct upgrade_downgrade_entry {
85 	u64		recovery_passes;
86 	u16		version;
87 	u16		nr_errors;
88 	const u16	*errors;
89 };
90 
91 #define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ };
92 UPGRADE_TABLE()
93 #undef x
94 
95 static const struct upgrade_downgrade_entry upgrade_table[] = {
96 #define x(ver, passes, ...) {					\
97 	.recovery_passes	= passes,			\
98 	.version		= bcachefs_metadata_version_##ver,\
99 	.nr_errors		= ARRAY_SIZE(upgrade_##ver##_errors),	\
100 	.errors			= upgrade_##ver##_errors,	\
101 },
102 UPGRADE_TABLE()
103 #undef x
104 };
105 
106 static int have_stripes(struct bch_fs *c)
107 {
108 	return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b);
109 }
110 
111 int bch2_sb_set_upgrade_extra(struct bch_fs *c)
112 {
113 	unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
114 	unsigned new_version = c->sb.version;
115 	bool write_sb = false;
116 	int ret = 0;
117 
118 	mutex_lock(&c->sb_lock);
119 	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
120 
121 	if (old_version <  bcachefs_metadata_version_bucket_stripe_sectors &&
122 	    new_version >= bcachefs_metadata_version_bucket_stripe_sectors &&
123 	    (ret = have_stripes(c) > 0)) {
124 		__set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required);
125 		__set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
126 		__set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_sectors_wrong, ext->errors_silent);
127 		write_sb = true;
128 	}
129 
130 	if (write_sb)
131 		bch2_write_super(c);
132 	mutex_unlock(&c->sb_lock);
133 
134 	return ret < 0 ? ret : 0;
135 }
136 
137 void bch2_sb_set_upgrade(struct bch_fs *c,
138 			 unsigned old_version,
139 			 unsigned new_version)
140 {
141 	lockdep_assert_held(&c->sb_lock);
142 
143 	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
144 
145 	for (const struct upgrade_downgrade_entry *i = upgrade_table;
146 	     i < upgrade_table + ARRAY_SIZE(upgrade_table);
147 	     i++)
148 		if (i->version > old_version && i->version <= new_version) {
149 			u64 passes = i->recovery_passes;
150 
151 			if (passes & RECOVERY_PASS_ALL_FSCK)
152 				passes |= bch2_fsck_recovery_passes();
153 			passes &= ~RECOVERY_PASS_ALL_FSCK;
154 
155 			ext->recovery_passes_required[0] |=
156 				cpu_to_le64(bch2_recovery_passes_to_stable(passes));
157 
158 			for (const u16 *e = i->errors; e < i->errors + i->nr_errors; e++)
159 				__set_bit_le64(*e, ext->errors_silent);
160 		}
161 }
162 
163 #define x(ver, passes, ...) static const u16 downgrade_##ver##_errors[] = { __VA_ARGS__ };
164 DOWNGRADE_TABLE()
165 #undef x
166 
167 static const struct upgrade_downgrade_entry downgrade_table[] = {
168 #define x(ver, passes, ...) {					\
169 	.recovery_passes	= passes,			\
170 	.version		= bcachefs_metadata_version_##ver,\
171 	.nr_errors		= ARRAY_SIZE(downgrade_##ver##_errors),	\
172 	.errors			= downgrade_##ver##_errors,	\
173 },
174 DOWNGRADE_TABLE()
175 #undef x
176 };
177 
178 static int downgrade_table_extra(struct bch_fs *c, darray_char *table)
179 {
180 	struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table);
181 	unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors);
182 	int ret = 0;
183 
184 	unsigned nr_errors = le16_to_cpu(dst->nr_errors);
185 
186 	switch (le16_to_cpu(dst->version)) {
187 	case bcachefs_metadata_version_bucket_stripe_sectors:
188 		if (have_stripes(c)) {
189 			bytes += sizeof(dst->errors[0]) * 2;
190 
191 			ret = darray_make_room(table, bytes);
192 			if (ret)
193 				return ret;
194 
195 			/* open coded __set_bit_le64, as dst is packed and
196 			 * dst->recovery_passes is misaligned */
197 			unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations;
198 			dst->recovery_passes[b / 64] |= cpu_to_le64(BIT_ULL(b % 64));
199 
200 			dst->errors[nr_errors++] = cpu_to_le16(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong);
201 		}
202 		break;
203 	}
204 
205 	dst->nr_errors = cpu_to_le16(nr_errors);
206 	return ret;
207 }
208 
209 static inline const struct bch_sb_field_downgrade_entry *
210 downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
211 {
212 	return (void *) &e->errors[le16_to_cpu(e->nr_errors)];
213 }
214 
215 #define for_each_downgrade_entry(_d, _i)						\
216 	for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries;		\
217 	     (void *) _i	< vstruct_end(&(_d)->field) &&				\
218 	     (void *) &_i->errors[0] <= vstruct_end(&(_d)->field) &&			\
219 	     (void *) downgrade_entry_next_c(_i) <= vstruct_end(&(_d)->field);		\
220 	     _i = downgrade_entry_next_c(_i))
221 
222 static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
223 				      enum bch_validate_flags flags, struct printbuf *err)
224 {
225 	struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
226 
227 	for (const struct bch_sb_field_downgrade_entry *i = e->entries;
228 	     (void *) i	< vstruct_end(&e->field);
229 	     i = downgrade_entry_next_c(i)) {
230 		/*
231 		 * Careful: sb_field_downgrade_entry is only 2 byte aligned, but
232 		 * section sizes are 8 byte aligned - an empty entry spanning
233 		 * the end of the section is allowed (and ignored):
234 		 */
235 		if ((void *) &i->errors[0] > vstruct_end(&e->field))
236 			break;
237 
238 		if (flags & BCH_VALIDATE_write &&
239 		    (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field)) {
240 			prt_printf(err, "downgrade entry overruns end of superblock section");
241 			return -BCH_ERR_invalid_sb_downgrade;
242 		}
243 
244 		if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) !=
245 		    BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) {
246 			prt_printf(err, "downgrade entry with mismatched major version (%u != %u)",
247 				   BCH_VERSION_MAJOR(le16_to_cpu(i->version)),
248 				   BCH_VERSION_MAJOR(le16_to_cpu(sb->version)));
249 			return -BCH_ERR_invalid_sb_downgrade;
250 		}
251 	}
252 
253 	return 0;
254 }
255 
256 static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
257 				      struct bch_sb_field *f)
258 {
259 	struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
260 
261 	if (out->nr_tabstops <= 1)
262 		printbuf_tabstop_push(out, 16);
263 
264 	for_each_downgrade_entry(e, i) {
265 		prt_str(out, "version:\t");
266 		bch2_version_to_text(out, le16_to_cpu(i->version));
267 		prt_newline(out);
268 
269 		prt_str(out, "recovery passes:\t");
270 		prt_bitflags(out, bch2_recovery_passes,
271 			     bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0])));
272 		prt_newline(out);
273 
274 		prt_str(out, "errors:\t");
275 		bool first = true;
276 		for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
277 			if (!first)
278 				prt_char(out, ',');
279 			first = false;
280 			unsigned e = le16_to_cpu(i->errors[j]);
281 			prt_str(out, e < BCH_SB_ERR_MAX ? bch2_sb_error_strs[e] : "(unknown)");
282 		}
283 		prt_newline(out);
284 	}
285 }
286 
287 const struct bch_sb_field_ops bch_sb_field_ops_downgrade = {
288 	.validate	= bch2_sb_downgrade_validate,
289 	.to_text	= bch2_sb_downgrade_to_text,
290 };
291 
292 int bch2_sb_downgrade_update(struct bch_fs *c)
293 {
294 	if (!test_bit(BCH_FS_btree_running, &c->flags))
295 		return 0;
296 
297 	darray_char table = {};
298 	int ret = 0;
299 
300 	for (const struct upgrade_downgrade_entry *src = downgrade_table;
301 	     src < downgrade_table + ARRAY_SIZE(downgrade_table);
302 	     src++) {
303 		if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
304 			continue;
305 
306 		struct bch_sb_field_downgrade_entry *dst;
307 		unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
308 
309 		ret = darray_make_room(&table, bytes);
310 		if (ret)
311 			goto out;
312 
313 		dst = (void *) &darray_top(table);
314 		dst->version = cpu_to_le16(src->version);
315 		dst->recovery_passes[0]	= cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes));
316 		dst->recovery_passes[1]	= 0;
317 		dst->nr_errors		= cpu_to_le16(src->nr_errors);
318 		for (unsigned i = 0; i < src->nr_errors; i++)
319 			dst->errors[i] = cpu_to_le16(src->errors[i]);
320 
321 		downgrade_table_extra(c, &table);
322 
323 		if (!dst->recovery_passes[0] &&
324 		    !dst->recovery_passes[1] &&
325 		    !dst->nr_errors)
326 			continue;
327 
328 		table.nr += sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors);
329 	}
330 
331 	struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
332 
333 	unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
334 
335 	if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
336 		goto out;
337 
338 	d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
339 	if (!d) {
340 		ret = -BCH_ERR_ENOSPC_sb_downgrade;
341 		goto out;
342 	}
343 
344 	memcpy(d->entries, table.data, table.nr);
345 	memset_u64s_tail(d->entries, 0, table.nr);
346 out:
347 	darray_exit(&table);
348 	return ret;
349 }
350 
351 void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
352 {
353 	struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
354 	if (!d)
355 		return;
356 
357 	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
358 
359 	for_each_downgrade_entry(d, i) {
360 		unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version));
361 		if (new_minor < minor && minor <= old_minor) {
362 			ext->recovery_passes_required[0] |= i->recovery_passes[0];
363 			ext->recovery_passes_required[1] |= i->recovery_passes[1];
364 
365 			for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
366 				unsigned e = le16_to_cpu(i->errors[j]);
367 				if (e < BCH_SB_ERR_MAX)
368 					__set_bit(e, c->sb.errors_silent);
369 				if (e < sizeof(ext->errors_silent) * 8)
370 					__set_bit_le64(e, ext->errors_silent);
371 			}
372 		}
373 	}
374 }
375