1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Superblock section that contains a list of recovery passes to run when
5 * downgrading past a given version
6 */
7
8 #include "bcachefs.h"
9 #include "darray.h"
10 #include "recovery_passes.h"
11 #include "sb-downgrade.h"
12 #include "sb-errors.h"
13 #include "super-io.h"
14
15 #define RECOVERY_PASS_ALL_FSCK BIT_ULL(63)
16
17 /*
18 * Upgrade, downgrade tables - run certain recovery passes, fix certain errors
19 *
20 * x(version, recovery_passes, errors...)
21 */
22 #define UPGRADE_TABLE() \
23 x(snapshot_2, \
24 RECOVERY_PASS_ALL_FSCK, \
25 BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \
26 BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \
27 x(backpointers, \
28 RECOVERY_PASS_ALL_FSCK) \
29 x(inode_v3, \
30 RECOVERY_PASS_ALL_FSCK) \
31 x(unwritten_extents, \
32 RECOVERY_PASS_ALL_FSCK) \
33 x(bucket_gens, \
34 BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \
35 RECOVERY_PASS_ALL_FSCK) \
36 x(lru_v2, \
37 RECOVERY_PASS_ALL_FSCK) \
38 x(fragmentation_lru, \
39 RECOVERY_PASS_ALL_FSCK) \
40 x(no_bps_in_alloc_keys, \
41 RECOVERY_PASS_ALL_FSCK) \
42 x(snapshot_trees, \
43 RECOVERY_PASS_ALL_FSCK) \
44 x(snapshot_skiplists, \
45 BIT_ULL(BCH_RECOVERY_PASS_check_snapshots), \
46 BCH_FSCK_ERR_snapshot_bad_depth, \
47 BCH_FSCK_ERR_snapshot_bad_skiplist) \
48 x(deleted_inodes, \
49 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
50 BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \
51 x(rebalance_work, \
52 BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \
53 x(subvolume_fs_parent, \
54 BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \
55 BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \
56 x(btree_subvolume_children, \
57 BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \
58 BCH_FSCK_ERR_subvol_children_not_set) \
59 x(mi_btree_bitmap, \
60 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
61 BCH_FSCK_ERR_btree_bitmap_not_marked) \
62 x(disk_accounting_v2, \
63 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
64 BCH_FSCK_ERR_bkey_version_in_future, \
65 BCH_FSCK_ERR_dev_usage_buckets_wrong, \
66 BCH_FSCK_ERR_dev_usage_sectors_wrong, \
67 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
68 BCH_FSCK_ERR_accounting_mismatch) \
69 x(disk_accounting_v3, \
70 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
71 BCH_FSCK_ERR_bkey_version_in_future, \
72 BCH_FSCK_ERR_dev_usage_buckets_wrong, \
73 BCH_FSCK_ERR_dev_usage_sectors_wrong, \
74 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
75 BCH_FSCK_ERR_accounting_mismatch, \
76 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
77 BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \
78 BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \
79 BCH_FSCK_ERR_accounting_key_junk_at_end) \
80 x(disk_accounting_inum, \
81 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
82 BCH_FSCK_ERR_accounting_mismatch) \
83 x(rebalance_work_acct_fix, \
84 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
85 BCH_FSCK_ERR_accounting_mismatch) \
86 x(inode_has_child_snapshots, \
87 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
88 BCH_FSCK_ERR_inode_has_child_snapshots_wrong) \
89 x(backpointer_bucket_gen, \
90 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
91 BCH_FSCK_ERR_backpointer_to_missing_ptr, \
92 BCH_FSCK_ERR_ptr_to_missing_backpointer) \
93 x(disk_accounting_big_endian, \
94 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
95 BCH_FSCK_ERR_accounting_mismatch, \
96 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
97 BCH_FSCK_ERR_accounting_key_junk_at_end) \
98 x(cached_backpointers, \
99 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
100 BCH_FSCK_ERR_ptr_to_missing_backpointer) \
101 x(stripe_backpointers, \
102 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
103 BCH_FSCK_ERR_ptr_to_missing_backpointer) \
104 x(inode_has_case_insensitive, \
105 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
106 BCH_FSCK_ERR_inode_has_case_insensitive_not_set, \
107 BCH_FSCK_ERR_inode_parent_has_case_insensitive_not_set)
108
109 #define DOWNGRADE_TABLE() \
110 x(bucket_stripe_sectors, \
111 0) \
112 x(disk_accounting_v2, \
113 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
114 BCH_FSCK_ERR_dev_usage_buckets_wrong, \
115 BCH_FSCK_ERR_dev_usage_sectors_wrong, \
116 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
117 BCH_FSCK_ERR_fs_usage_hidden_wrong, \
118 BCH_FSCK_ERR_fs_usage_btree_wrong, \
119 BCH_FSCK_ERR_fs_usage_data_wrong, \
120 BCH_FSCK_ERR_fs_usage_cached_wrong, \
121 BCH_FSCK_ERR_fs_usage_reserved_wrong, \
122 BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \
123 BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
124 BCH_FSCK_ERR_fs_usage_replicas_wrong, \
125 BCH_FSCK_ERR_bkey_version_in_future) \
126 x(disk_accounting_v3, \
127 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
128 BCH_FSCK_ERR_dev_usage_buckets_wrong, \
129 BCH_FSCK_ERR_dev_usage_sectors_wrong, \
130 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
131 BCH_FSCK_ERR_fs_usage_hidden_wrong, \
132 BCH_FSCK_ERR_fs_usage_btree_wrong, \
133 BCH_FSCK_ERR_fs_usage_data_wrong, \
134 BCH_FSCK_ERR_fs_usage_cached_wrong, \
135 BCH_FSCK_ERR_fs_usage_reserved_wrong, \
136 BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \
137 BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
138 BCH_FSCK_ERR_fs_usage_replicas_wrong, \
139 BCH_FSCK_ERR_accounting_replicas_not_marked, \
140 BCH_FSCK_ERR_bkey_version_in_future) \
141 x(rebalance_work_acct_fix, \
142 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
143 BCH_FSCK_ERR_accounting_mismatch, \
144 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
145 BCH_FSCK_ERR_accounting_key_junk_at_end) \
146 x(backpointer_bucket_gen, \
147 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\
148 BCH_FSCK_ERR_backpointer_bucket_offset_wrong, \
149 BCH_FSCK_ERR_backpointer_to_missing_ptr, \
150 BCH_FSCK_ERR_ptr_to_missing_backpointer) \
151 x(disk_accounting_big_endian, \
152 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
153 BCH_FSCK_ERR_accounting_mismatch, \
154 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
155 BCH_FSCK_ERR_accounting_key_junk_at_end)
156
157 struct upgrade_downgrade_entry {
158 u64 recovery_passes;
159 u16 version;
160 u16 nr_errors;
161 const u16 *errors;
162 };
163
164 #define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ };
165 UPGRADE_TABLE()
166 #undef x
167
168 static const struct upgrade_downgrade_entry upgrade_table[] = {
169 #define x(ver, passes, ...) { \
170 .recovery_passes = passes, \
171 .version = bcachefs_metadata_version_##ver,\
172 .nr_errors = ARRAY_SIZE(upgrade_##ver##_errors), \
173 .errors = upgrade_##ver##_errors, \
174 },
175 UPGRADE_TABLE()
176 #undef x
177 };
178
have_stripes(struct bch_fs * c)179 static int have_stripes(struct bch_fs *c)
180 {
181 if (IS_ERR_OR_NULL(c->btree_roots_known[BTREE_ID_stripes].b))
182 return 0;
183
184 return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b);
185 }
186
bch2_sb_set_upgrade_extra(struct bch_fs * c)187 int bch2_sb_set_upgrade_extra(struct bch_fs *c)
188 {
189 unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
190 unsigned new_version = c->sb.version;
191 bool write_sb = false;
192 int ret = 0;
193
194 mutex_lock(&c->sb_lock);
195 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
196
197 if (old_version < bcachefs_metadata_version_bucket_stripe_sectors &&
198 new_version >= bcachefs_metadata_version_bucket_stripe_sectors &&
199 (ret = have_stripes(c) > 0)) {
200 __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required);
201 __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
202 __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_sectors_wrong, ext->errors_silent);
203 write_sb = true;
204 }
205
206 if (write_sb)
207 bch2_write_super(c);
208 mutex_unlock(&c->sb_lock);
209
210 return ret < 0 ? ret : 0;
211 }
212
bch2_sb_set_upgrade(struct bch_fs * c,unsigned old_version,unsigned new_version)213 void bch2_sb_set_upgrade(struct bch_fs *c,
214 unsigned old_version,
215 unsigned new_version)
216 {
217 lockdep_assert_held(&c->sb_lock);
218
219 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
220
221 for (const struct upgrade_downgrade_entry *i = upgrade_table;
222 i < upgrade_table + ARRAY_SIZE(upgrade_table);
223 i++)
224 if (i->version > old_version && i->version <= new_version) {
225 u64 passes = i->recovery_passes;
226
227 if (passes & RECOVERY_PASS_ALL_FSCK)
228 passes |= bch2_fsck_recovery_passes();
229 passes &= ~RECOVERY_PASS_ALL_FSCK;
230
231 ext->recovery_passes_required[0] |=
232 cpu_to_le64(bch2_recovery_passes_to_stable(passes));
233
234 for (const u16 *e = i->errors; e < i->errors + i->nr_errors; e++)
235 __set_bit_le64(*e, ext->errors_silent);
236 }
237 }
238
239 #define x(ver, passes, ...) static const u16 downgrade_##ver##_errors[] = { __VA_ARGS__ };
240 DOWNGRADE_TABLE()
241 #undef x
242
243 static const struct upgrade_downgrade_entry downgrade_table[] = {
244 #define x(ver, passes, ...) { \
245 .recovery_passes = passes, \
246 .version = bcachefs_metadata_version_##ver,\
247 .nr_errors = ARRAY_SIZE(downgrade_##ver##_errors), \
248 .errors = downgrade_##ver##_errors, \
249 },
250 DOWNGRADE_TABLE()
251 #undef x
252 };
253
downgrade_table_extra(struct bch_fs * c,darray_char * table)254 static int downgrade_table_extra(struct bch_fs *c, darray_char *table)
255 {
256 unsigned dst_offset = table->nr;
257 struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table);
258 unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors);
259 int ret = 0;
260
261 unsigned nr_errors = le16_to_cpu(dst->nr_errors);
262
263 switch (le16_to_cpu(dst->version)) {
264 case bcachefs_metadata_version_bucket_stripe_sectors:
265 if (have_stripes(c)) {
266 bytes += sizeof(dst->errors[0]) * 2;
267
268 ret = darray_make_room(table, bytes);
269 if (ret)
270 return ret;
271
272 dst = (void *) &table->data[dst_offset];
273 dst->nr_errors = cpu_to_le16(nr_errors + 1);
274
275 /* open coded __set_bit_le64, as dst is packed and
276 * dst->recovery_passes is misaligned */
277 unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations;
278 dst->recovery_passes[b / 64] |= cpu_to_le64(BIT_ULL(b % 64));
279
280 dst->errors[nr_errors++] = cpu_to_le16(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong);
281 }
282 break;
283 }
284
285 return ret;
286 }
287
288 static inline const struct bch_sb_field_downgrade_entry *
downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry * e)289 downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
290 {
291 return (void *) &e->errors[le16_to_cpu(e->nr_errors)];
292 }
293
294 #define for_each_downgrade_entry(_d, _i) \
295 for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \
296 (void *) _i < vstruct_end(&(_d)->field) && \
297 (void *) &_i->errors[0] <= vstruct_end(&(_d)->field) && \
298 (void *) downgrade_entry_next_c(_i) <= vstruct_end(&(_d)->field); \
299 _i = downgrade_entry_next_c(_i))
300
bch2_sb_downgrade_validate(struct bch_sb * sb,struct bch_sb_field * f,enum bch_validate_flags flags,struct printbuf * err)301 static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
302 enum bch_validate_flags flags, struct printbuf *err)
303 {
304 struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
305
306 for (const struct bch_sb_field_downgrade_entry *i = e->entries;
307 (void *) i < vstruct_end(&e->field);
308 i = downgrade_entry_next_c(i)) {
309 /*
310 * Careful: sb_field_downgrade_entry is only 2 byte aligned, but
311 * section sizes are 8 byte aligned - an empty entry spanning
312 * the end of the section is allowed (and ignored):
313 */
314 if ((void *) &i->errors[0] > vstruct_end(&e->field))
315 break;
316
317 if (flags & BCH_VALIDATE_write &&
318 (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field)) {
319 prt_printf(err, "downgrade entry overruns end of superblock section");
320 return -BCH_ERR_invalid_sb_downgrade;
321 }
322
323 if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) !=
324 BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) {
325 prt_printf(err, "downgrade entry with mismatched major version (%u != %u)",
326 BCH_VERSION_MAJOR(le16_to_cpu(i->version)),
327 BCH_VERSION_MAJOR(le16_to_cpu(sb->version)));
328 return -BCH_ERR_invalid_sb_downgrade;
329 }
330 }
331
332 return 0;
333 }
334
bch2_sb_downgrade_to_text(struct printbuf * out,struct bch_sb * sb,struct bch_sb_field * f)335 static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
336 struct bch_sb_field *f)
337 {
338 struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
339
340 if (out->nr_tabstops <= 1)
341 printbuf_tabstop_push(out, 16);
342
343 for_each_downgrade_entry(e, i) {
344 prt_str(out, "version:\t");
345 bch2_version_to_text(out, le16_to_cpu(i->version));
346 prt_newline(out);
347
348 prt_str(out, "recovery passes:\t");
349 prt_bitflags(out, bch2_recovery_passes,
350 bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0])));
351 prt_newline(out);
352
353 prt_str(out, "errors:\t");
354 bool first = true;
355 for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
356 if (!first)
357 prt_char(out, ',');
358 first = false;
359 bch2_sb_error_id_to_text(out, le16_to_cpu(i->errors[j]));
360 }
361 prt_newline(out);
362 }
363 }
364
365 const struct bch_sb_field_ops bch_sb_field_ops_downgrade = {
366 .validate = bch2_sb_downgrade_validate,
367 .to_text = bch2_sb_downgrade_to_text,
368 };
369
bch2_sb_downgrade_update(struct bch_fs * c)370 int bch2_sb_downgrade_update(struct bch_fs *c)
371 {
372 if (!test_bit(BCH_FS_btree_running, &c->flags))
373 return 0;
374
375 darray_char table = {};
376 int ret = 0;
377
378 for (const struct upgrade_downgrade_entry *src = downgrade_table;
379 src < downgrade_table + ARRAY_SIZE(downgrade_table);
380 src++) {
381 if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
382 continue;
383
384 if (src->version < c->sb.version_incompat)
385 continue;
386
387 struct bch_sb_field_downgrade_entry *dst;
388 unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
389
390 ret = darray_make_room(&table, bytes);
391 if (ret)
392 goto out;
393
394 dst = (void *) &darray_top(table);
395 dst->version = cpu_to_le16(src->version);
396 dst->recovery_passes[0] = cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes));
397 dst->recovery_passes[1] = 0;
398 dst->nr_errors = cpu_to_le16(src->nr_errors);
399 for (unsigned i = 0; i < src->nr_errors; i++)
400 dst->errors[i] = cpu_to_le16(src->errors[i]);
401
402 ret = downgrade_table_extra(c, &table);
403 if (ret)
404 goto out;
405
406 if (!dst->recovery_passes[0] &&
407 !dst->recovery_passes[1] &&
408 !dst->nr_errors)
409 continue;
410
411 table.nr += sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors);
412 }
413
414 struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
415
416 unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
417
418 if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
419 goto out;
420
421 d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
422 if (!d) {
423 ret = bch_err_throw(c, ENOSPC_sb_downgrade);
424 goto out;
425 }
426
427 memcpy(d->entries, table.data, table.nr);
428 memset_u64s_tail(d->entries, 0, table.nr);
429 out:
430 darray_exit(&table);
431 return ret;
432 }
433
bch2_sb_set_downgrade(struct bch_fs * c,unsigned new_minor,unsigned old_minor)434 void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
435 {
436 struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
437 if (!d)
438 return;
439
440 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
441
442 for_each_downgrade_entry(d, i) {
443 unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version));
444 if (new_minor < minor && minor <= old_minor) {
445 ext->recovery_passes_required[0] |= i->recovery_passes[0];
446 ext->recovery_passes_required[1] |= i->recovery_passes[1];
447
448 for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
449 unsigned e = le16_to_cpu(i->errors[j]);
450 if (e < BCH_FSCK_ERR_MAX)
451 __set_bit(e, c->sb.errors_silent);
452 if (e < sizeof(ext->errors_silent) * 8)
453 __set_bit_le64(e, ext->errors_silent);
454 }
455 }
456 }
457 }
458