xref: /linux/fs/bcachefs/sb-downgrade.c (revision 36df6f734a7ad69880c5262543165c47cb57169f)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Superblock section that contains a list of recovery passes to run when
5  * downgrading past a given version
6  */
7 
8 #include "bcachefs.h"
9 #include "darray.h"
10 #include "recovery_passes.h"
11 #include "sb-downgrade.h"
12 #include "sb-errors.h"
13 #include "super-io.h"
14 
15 #define RECOVERY_PASS_ALL_FSCK		BIT_ULL(63)
16 
17 /*
18  * Upgrade, downgrade tables - run certain recovery passes, fix certain errors
19  *
20  * x(version, recovery_passes, errors...)
21  */
22 #define UPGRADE_TABLE()						\
23 	x(snapshot_2,						\
24 	  RECOVERY_PASS_ALL_FSCK,				\
25 	  BCH_FSCK_ERR_subvol_root_wrong_bi_subvol,		\
26 	  BCH_FSCK_ERR_subvol_not_master_and_not_snapshot)	\
27 	x(backpointers,						\
28 	  RECOVERY_PASS_ALL_FSCK)				\
29 	x(inode_v3,						\
30 	  RECOVERY_PASS_ALL_FSCK)				\
31 	x(unwritten_extents,					\
32 	  RECOVERY_PASS_ALL_FSCK)				\
33 	x(bucket_gens,						\
34 	  BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)|		\
35 	  RECOVERY_PASS_ALL_FSCK)				\
36 	x(lru_v2,						\
37 	  RECOVERY_PASS_ALL_FSCK)				\
38 	x(fragmentation_lru,					\
39 	  RECOVERY_PASS_ALL_FSCK)				\
40 	x(no_bps_in_alloc_keys,					\
41 	  RECOVERY_PASS_ALL_FSCK)				\
42 	x(snapshot_trees,					\
43 	  RECOVERY_PASS_ALL_FSCK)				\
44 	x(snapshot_skiplists,					\
45 	  BIT_ULL(BCH_RECOVERY_PASS_check_snapshots),		\
46 	  BCH_FSCK_ERR_snapshot_bad_depth,			\
47 	  BCH_FSCK_ERR_snapshot_bad_skiplist)			\
48 	x(deleted_inodes,					\
49 	  BIT_ULL(BCH_RECOVERY_PASS_check_inodes),		\
50 	  BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list)	\
51 	x(rebalance_work,					\
52 	  BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance))	\
53 	x(subvolume_fs_parent,					\
54 	  BIT_ULL(BCH_RECOVERY_PASS_check_dirents),		\
55 	  BCH_FSCK_ERR_subvol_fs_path_parent_wrong)		\
56 	x(btree_subvolume_children,				\
57 	  BIT_ULL(BCH_RECOVERY_PASS_check_subvols),		\
58 	  BCH_FSCK_ERR_subvol_children_not_set)			\
59 	x(mi_btree_bitmap,					\
60 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
61 	  BCH_FSCK_ERR_btree_bitmap_not_marked)			\
62 	x(disk_accounting_v2,					\
63 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
64 	  BCH_FSCK_ERR_bkey_version_in_future,			\
65 	  BCH_FSCK_ERR_dev_usage_buckets_wrong,			\
66 	  BCH_FSCK_ERR_dev_usage_sectors_wrong,			\
67 	  BCH_FSCK_ERR_dev_usage_fragmented_wrong,		\
68 	  BCH_FSCK_ERR_accounting_mismatch)			\
69 	x(disk_accounting_v3,					\
70 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
71 	  BCH_FSCK_ERR_bkey_version_in_future,			\
72 	  BCH_FSCK_ERR_dev_usage_buckets_wrong,			\
73 	  BCH_FSCK_ERR_dev_usage_sectors_wrong,			\
74 	  BCH_FSCK_ERR_dev_usage_fragmented_wrong,		\
75 	  BCH_FSCK_ERR_accounting_mismatch,			\
76 	  BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0,	\
77 	  BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad,	\
78 	  BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted,	\
79 	  BCH_FSCK_ERR_accounting_key_junk_at_end)		\
80 	x(disk_accounting_inum,					\
81 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
82 	  BCH_FSCK_ERR_accounting_mismatch)			\
83 	x(rebalance_work_acct_fix,				\
84 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
85 	  BCH_FSCK_ERR_accounting_mismatch)			\
86 	x(inode_has_child_snapshots,				\
87 	  BIT_ULL(BCH_RECOVERY_PASS_check_inodes),		\
88 	  BCH_FSCK_ERR_inode_has_child_snapshots_wrong)		\
89 	x(backpointer_bucket_gen,				\
90 	  BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
91 	  BCH_FSCK_ERR_backpointer_to_missing_ptr,		\
92 	  BCH_FSCK_ERR_ptr_to_missing_backpointer)		\
93 	x(disk_accounting_big_endian,				\
94 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
95 	  BCH_FSCK_ERR_accounting_mismatch,			\
96 	  BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0,	\
97 	  BCH_FSCK_ERR_accounting_key_junk_at_end)		\
98 	x(cached_backpointers,					\
99 	  BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
100 	  BCH_FSCK_ERR_ptr_to_missing_backpointer)		\
101 	x(stripe_backpointers,					\
102 	  BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
103 	  BCH_FSCK_ERR_ptr_to_missing_backpointer)		\
104 	x(inode_has_case_insensitive,				\
105 	  BIT_ULL(BCH_RECOVERY_PASS_check_inodes),		\
106 	  BCH_FSCK_ERR_inode_has_case_insensitive_not_set,	\
107 	  BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set)
108 
109 #define DOWNGRADE_TABLE()					\
110 	x(bucket_stripe_sectors,				\
111 	  0)							\
112 	x(disk_accounting_v2,					\
113 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
114 	  BCH_FSCK_ERR_dev_usage_buckets_wrong,			\
115 	  BCH_FSCK_ERR_dev_usage_sectors_wrong,			\
116 	  BCH_FSCK_ERR_dev_usage_fragmented_wrong,		\
117 	  BCH_FSCK_ERR_fs_usage_hidden_wrong,			\
118 	  BCH_FSCK_ERR_fs_usage_btree_wrong,			\
119 	  BCH_FSCK_ERR_fs_usage_data_wrong,			\
120 	  BCH_FSCK_ERR_fs_usage_cached_wrong,			\
121 	  BCH_FSCK_ERR_fs_usage_reserved_wrong,			\
122 	  BCH_FSCK_ERR_fs_usage_nr_inodes_wrong,		\
123 	  BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong,	\
124 	  BCH_FSCK_ERR_fs_usage_replicas_wrong,			\
125 	  BCH_FSCK_ERR_bkey_version_in_future)			\
126 	x(disk_accounting_v3,					\
127 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
128 	  BCH_FSCK_ERR_dev_usage_buckets_wrong,			\
129 	  BCH_FSCK_ERR_dev_usage_sectors_wrong,			\
130 	  BCH_FSCK_ERR_dev_usage_fragmented_wrong,		\
131 	  BCH_FSCK_ERR_fs_usage_hidden_wrong,			\
132 	  BCH_FSCK_ERR_fs_usage_btree_wrong,			\
133 	  BCH_FSCK_ERR_fs_usage_data_wrong,			\
134 	  BCH_FSCK_ERR_fs_usage_cached_wrong,			\
135 	  BCH_FSCK_ERR_fs_usage_reserved_wrong,			\
136 	  BCH_FSCK_ERR_fs_usage_nr_inodes_wrong,		\
137 	  BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong,	\
138 	  BCH_FSCK_ERR_fs_usage_replicas_wrong,			\
139 	  BCH_FSCK_ERR_accounting_replicas_not_marked,		\
140 	  BCH_FSCK_ERR_bkey_version_in_future)			\
141 	x(rebalance_work_acct_fix,				\
142 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
143 	  BCH_FSCK_ERR_accounting_mismatch,			\
144 	  BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0,	\
145 	  BCH_FSCK_ERR_accounting_key_junk_at_end)		\
146 	x(backpointer_bucket_gen,				\
147 	  BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
148 	  BCH_FSCK_ERR_backpointer_bucket_offset_wrong,		\
149 	  BCH_FSCK_ERR_backpointer_to_missing_ptr,		\
150 	  BCH_FSCK_ERR_ptr_to_missing_backpointer)		\
151 	x(disk_accounting_big_endian,				\
152 	  BIT_ULL(BCH_RECOVERY_PASS_check_allocations),		\
153 	  BCH_FSCK_ERR_accounting_mismatch,			\
154 	  BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0,	\
155 	  BCH_FSCK_ERR_accounting_key_junk_at_end)
156 
157 struct upgrade_downgrade_entry {
158 	u64		recovery_passes;
159 	u16		version;
160 	u16		nr_errors;
161 	const u16	*errors;
162 };
163 
164 #define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ };
165 UPGRADE_TABLE()
166 #undef x
167 
168 static const struct upgrade_downgrade_entry upgrade_table[] = {
169 #define x(ver, passes, ...) {					\
170 	.recovery_passes	= passes,			\
171 	.version		= bcachefs_metadata_version_##ver,\
172 	.nr_errors		= ARRAY_SIZE(upgrade_##ver##_errors),	\
173 	.errors			= upgrade_##ver##_errors,	\
174 },
175 UPGRADE_TABLE()
176 #undef x
177 };
178 
have_stripes(struct bch_fs * c)179 static int have_stripes(struct bch_fs *c)
180 {
181 	if (IS_ERR_OR_NULL(c->btree_roots_known[BTREE_ID_stripes].b))
182 		return 0;
183 
184 	return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b);
185 }
186 
bch2_sb_set_upgrade_extra(struct bch_fs * c)187 int bch2_sb_set_upgrade_extra(struct bch_fs *c)
188 {
189 	unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
190 	unsigned new_version = c->sb.version;
191 	bool write_sb = false;
192 	int ret = 0;
193 
194 	mutex_lock(&c->sb_lock);
195 	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
196 
197 	if (old_version <  bcachefs_metadata_version_bucket_stripe_sectors &&
198 	    new_version >= bcachefs_metadata_version_bucket_stripe_sectors &&
199 	    (ret = have_stripes(c) > 0)) {
200 		__set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required);
201 		__set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
202 		__set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_sectors_wrong, ext->errors_silent);
203 		write_sb = true;
204 	}
205 
206 	if (write_sb)
207 		bch2_write_super(c);
208 	mutex_unlock(&c->sb_lock);
209 
210 	return ret < 0 ? ret : 0;
211 }
212 
bch2_sb_set_upgrade(struct bch_fs * c,unsigned old_version,unsigned new_version)213 void bch2_sb_set_upgrade(struct bch_fs *c,
214 			 unsigned old_version,
215 			 unsigned new_version)
216 {
217 	lockdep_assert_held(&c->sb_lock);
218 
219 	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
220 
221 	for (const struct upgrade_downgrade_entry *i = upgrade_table;
222 	     i < upgrade_table + ARRAY_SIZE(upgrade_table);
223 	     i++)
224 		if (i->version > old_version && i->version <= new_version) {
225 			u64 passes = i->recovery_passes;
226 
227 			if (passes & RECOVERY_PASS_ALL_FSCK)
228 				passes |= bch2_fsck_recovery_passes();
229 			passes &= ~RECOVERY_PASS_ALL_FSCK;
230 
231 			ext->recovery_passes_required[0] |=
232 				cpu_to_le64(bch2_recovery_passes_to_stable(passes));
233 
234 			for (const u16 *e = i->errors; e < i->errors + i->nr_errors; e++)
235 				__set_bit_le64(*e, ext->errors_silent);
236 		}
237 }
238 
239 #define x(ver, passes, ...) static const u16 downgrade_##ver##_errors[] = { __VA_ARGS__ };
240 DOWNGRADE_TABLE()
241 #undef x
242 
243 static const struct upgrade_downgrade_entry downgrade_table[] = {
244 #define x(ver, passes, ...) {					\
245 	.recovery_passes	= passes,			\
246 	.version		= bcachefs_metadata_version_##ver,\
247 	.nr_errors		= ARRAY_SIZE(downgrade_##ver##_errors),	\
248 	.errors			= downgrade_##ver##_errors,	\
249 },
250 DOWNGRADE_TABLE()
251 #undef x
252 };
253 
downgrade_table_extra(struct bch_fs * c,darray_char * table)254 static int downgrade_table_extra(struct bch_fs *c, darray_char *table)
255 {
256 	unsigned dst_offset = table->nr;
257 	struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table);
258 	unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors);
259 	int ret = 0;
260 
261 	unsigned nr_errors = le16_to_cpu(dst->nr_errors);
262 
263 	switch (le16_to_cpu(dst->version)) {
264 	case bcachefs_metadata_version_bucket_stripe_sectors:
265 		if (have_stripes(c)) {
266 			bytes += sizeof(dst->errors[0]) * 2;
267 
268 			ret = darray_make_room(table, bytes);
269 			if (ret)
270 				return ret;
271 
272 			dst = (void *) &table->data[dst_offset];
273 			dst->nr_errors = cpu_to_le16(nr_errors + 1);
274 
275 			/* open coded __set_bit_le64, as dst is packed and
276 			 * dst->recovery_passes is misaligned */
277 			unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations;
278 			dst->recovery_passes[b / 64] |= cpu_to_le64(BIT_ULL(b % 64));
279 
280 			dst->errors[nr_errors++] = cpu_to_le16(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong);
281 		}
282 		break;
283 	}
284 
285 	return ret;
286 }
287 
288 static inline const struct bch_sb_field_downgrade_entry *
downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry * e)289 downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
290 {
291 	return (void *) &e->errors[le16_to_cpu(e->nr_errors)];
292 }
293 
294 #define for_each_downgrade_entry(_d, _i)						\
295 	for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries;		\
296 	     (void *) _i	< vstruct_end(&(_d)->field) &&				\
297 	     (void *) &_i->errors[0] <= vstruct_end(&(_d)->field) &&			\
298 	     (void *) downgrade_entry_next_c(_i) <= vstruct_end(&(_d)->field);		\
299 	     _i = downgrade_entry_next_c(_i))
300 
bch2_sb_downgrade_validate(struct bch_sb * sb,struct bch_sb_field * f,enum bch_validate_flags flags,struct printbuf * err)301 static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
302 				      enum bch_validate_flags flags, struct printbuf *err)
303 {
304 	struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
305 
306 	for (const struct bch_sb_field_downgrade_entry *i = e->entries;
307 	     (void *) i	< vstruct_end(&e->field);
308 	     i = downgrade_entry_next_c(i)) {
309 		/*
310 		 * Careful: sb_field_downgrade_entry is only 2 byte aligned, but
311 		 * section sizes are 8 byte aligned - an empty entry spanning
312 		 * the end of the section is allowed (and ignored):
313 		 */
314 		if ((void *) &i->errors[0] > vstruct_end(&e->field))
315 			break;
316 
317 		if (flags & BCH_VALIDATE_write &&
318 		    (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field)) {
319 			prt_printf(err, "downgrade entry overruns end of superblock section");
320 			return -BCH_ERR_invalid_sb_downgrade;
321 		}
322 
323 		if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) !=
324 		    BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) {
325 			prt_printf(err, "downgrade entry with mismatched major version (%u != %u)",
326 				   BCH_VERSION_MAJOR(le16_to_cpu(i->version)),
327 				   BCH_VERSION_MAJOR(le16_to_cpu(sb->version)));
328 			return -BCH_ERR_invalid_sb_downgrade;
329 		}
330 	}
331 
332 	return 0;
333 }
334 
bch2_sb_downgrade_to_text(struct printbuf * out,struct bch_sb * sb,struct bch_sb_field * f)335 static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
336 				      struct bch_sb_field *f)
337 {
338 	struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
339 
340 	if (out->nr_tabstops <= 1)
341 		printbuf_tabstop_push(out, 16);
342 
343 	for_each_downgrade_entry(e, i) {
344 		prt_str(out, "version:\t");
345 		bch2_version_to_text(out, le16_to_cpu(i->version));
346 		prt_newline(out);
347 
348 		prt_str(out, "recovery passes:\t");
349 		prt_bitflags(out, bch2_recovery_passes,
350 			     bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0])));
351 		prt_newline(out);
352 
353 		prt_str(out, "errors:\t");
354 		bool first = true;
355 		for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
356 			if (!first)
357 				prt_char(out, ',');
358 			first = false;
359 			bch2_sb_error_id_to_text(out, le16_to_cpu(i->errors[j]));
360 		}
361 		prt_newline(out);
362 	}
363 }
364 
365 const struct bch_sb_field_ops bch_sb_field_ops_downgrade = {
366 	.validate	= bch2_sb_downgrade_validate,
367 	.to_text	= bch2_sb_downgrade_to_text,
368 };
369 
bch2_sb_downgrade_update(struct bch_fs * c)370 int bch2_sb_downgrade_update(struct bch_fs *c)
371 {
372 	if (!test_bit(BCH_FS_btree_running, &c->flags))
373 		return 0;
374 
375 	darray_char table = {};
376 	int ret = 0;
377 
378 	for (const struct upgrade_downgrade_entry *src = downgrade_table;
379 	     src < downgrade_table + ARRAY_SIZE(downgrade_table);
380 	     src++) {
381 		if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
382 			continue;
383 
384 		if (src->version < c->sb.version_incompat)
385 			continue;
386 
387 		struct bch_sb_field_downgrade_entry *dst;
388 		unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
389 
390 		ret = darray_make_room(&table, bytes);
391 		if (ret)
392 			goto out;
393 
394 		dst = (void *) &darray_top(table);
395 		dst->version = cpu_to_le16(src->version);
396 		dst->recovery_passes[0]	= cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes));
397 		dst->recovery_passes[1]	= 0;
398 		dst->nr_errors		= cpu_to_le16(src->nr_errors);
399 		for (unsigned i = 0; i < src->nr_errors; i++)
400 			dst->errors[i] = cpu_to_le16(src->errors[i]);
401 
402 		ret = downgrade_table_extra(c, &table);
403 		if (ret)
404 			goto out;
405 
406 		if (!dst->recovery_passes[0] &&
407 		    !dst->recovery_passes[1] &&
408 		    !dst->nr_errors)
409 			continue;
410 
411 		table.nr += sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors);
412 	}
413 
414 	struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
415 
416 	unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
417 
418 	if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
419 		goto out;
420 
421 	d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
422 	if (!d) {
423 		ret = bch_err_throw(c, ENOSPC_sb_downgrade);
424 		goto out;
425 	}
426 
427 	memcpy(d->entries, table.data, table.nr);
428 	memset_u64s_tail(d->entries, 0, table.nr);
429 out:
430 	darray_exit(&table);
431 	return ret;
432 }
433 
bch2_sb_set_downgrade(struct bch_fs * c,unsigned new_minor,unsigned old_minor)434 void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
435 {
436 	struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
437 	if (!d)
438 		return;
439 
440 	struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
441 
442 	for_each_downgrade_entry(d, i) {
443 		unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version));
444 		if (new_minor < minor && minor <= old_minor) {
445 			ext->recovery_passes_required[0] |= i->recovery_passes[0];
446 			ext->recovery_passes_required[1] |= i->recovery_passes[1];
447 
448 			for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
449 				unsigned e = le16_to_cpu(i->errors[j]);
450 				if (e < BCH_FSCK_ERR_MAX)
451 					__set_bit(e, c->sb.errors_silent);
452 				if (e < sizeof(ext->errors_silent) * 8)
453 					__set_bit_le64(e, ext->errors_silent);
454 			}
455 		}
456 	}
457 }
458