1 // SPDX-License-Identifier: GPL-2.0
2
3 /*
4 * Superblock section that contains a list of recovery passes to run when
5 * downgrading past a given version
6 */
7
8 #include "bcachefs.h"
9 #include "darray.h"
10 #include "recovery_passes.h"
11 #include "sb-downgrade.h"
12 #include "sb-errors.h"
13 #include "super-io.h"
14
15 #define RECOVERY_PASS_ALL_FSCK BIT_ULL(63)
16
17 /*
18 * Upgrade, downgrade tables - run certain recovery passes, fix certain errors
19 *
20 * x(version, recovery_passes, errors...)
21 */
22 #define UPGRADE_TABLE() \
23 x(backpointers, \
24 RECOVERY_PASS_ALL_FSCK) \
25 x(inode_v3, \
26 RECOVERY_PASS_ALL_FSCK) \
27 x(unwritten_extents, \
28 RECOVERY_PASS_ALL_FSCK) \
29 x(bucket_gens, \
30 BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \
31 RECOVERY_PASS_ALL_FSCK) \
32 x(lru_v2, \
33 RECOVERY_PASS_ALL_FSCK) \
34 x(fragmentation_lru, \
35 RECOVERY_PASS_ALL_FSCK) \
36 x(no_bps_in_alloc_keys, \
37 RECOVERY_PASS_ALL_FSCK) \
38 x(snapshot_trees, \
39 RECOVERY_PASS_ALL_FSCK) \
40 x(snapshot_skiplists, \
41 BIT_ULL(BCH_RECOVERY_PASS_check_snapshots), \
42 BCH_FSCK_ERR_snapshot_bad_depth, \
43 BCH_FSCK_ERR_snapshot_bad_skiplist) \
44 x(deleted_inodes, \
45 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
46 BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \
47 x(rebalance_work, \
48 BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \
49 x(subvolume_fs_parent, \
50 BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \
51 BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \
52 x(btree_subvolume_children, \
53 BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \
54 BCH_FSCK_ERR_subvol_children_not_set) \
55 x(mi_btree_bitmap, \
56 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
57 BCH_FSCK_ERR_btree_bitmap_not_marked) \
58 x(disk_accounting_v2, \
59 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
60 BCH_FSCK_ERR_bkey_version_in_future, \
61 BCH_FSCK_ERR_dev_usage_buckets_wrong, \
62 BCH_FSCK_ERR_dev_usage_sectors_wrong, \
63 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
64 BCH_FSCK_ERR_accounting_mismatch) \
65 x(disk_accounting_v3, \
66 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
67 BCH_FSCK_ERR_bkey_version_in_future, \
68 BCH_FSCK_ERR_dev_usage_buckets_wrong, \
69 BCH_FSCK_ERR_dev_usage_sectors_wrong, \
70 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
71 BCH_FSCK_ERR_accounting_mismatch, \
72 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \
73 BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \
74 BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \
75 BCH_FSCK_ERR_accounting_key_junk_at_end) \
76 x(disk_accounting_inum, \
77 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
78 BCH_FSCK_ERR_accounting_mismatch) \
79 x(rebalance_work_acct_fix, \
80 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
81 BCH_FSCK_ERR_accounting_mismatch) \
82 x(inode_has_child_snapshots, \
83 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \
84 BCH_FSCK_ERR_inode_has_child_snapshots_wrong)
85
86 #define DOWNGRADE_TABLE() \
87 x(bucket_stripe_sectors, \
88 0) \
89 x(disk_accounting_v2, \
90 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
91 BCH_FSCK_ERR_dev_usage_buckets_wrong, \
92 BCH_FSCK_ERR_dev_usage_sectors_wrong, \
93 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
94 BCH_FSCK_ERR_fs_usage_hidden_wrong, \
95 BCH_FSCK_ERR_fs_usage_btree_wrong, \
96 BCH_FSCK_ERR_fs_usage_data_wrong, \
97 BCH_FSCK_ERR_fs_usage_cached_wrong, \
98 BCH_FSCK_ERR_fs_usage_reserved_wrong, \
99 BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \
100 BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
101 BCH_FSCK_ERR_fs_usage_replicas_wrong, \
102 BCH_FSCK_ERR_bkey_version_in_future) \
103 x(disk_accounting_v3, \
104 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
105 BCH_FSCK_ERR_dev_usage_buckets_wrong, \
106 BCH_FSCK_ERR_dev_usage_sectors_wrong, \
107 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \
108 BCH_FSCK_ERR_fs_usage_hidden_wrong, \
109 BCH_FSCK_ERR_fs_usage_btree_wrong, \
110 BCH_FSCK_ERR_fs_usage_data_wrong, \
111 BCH_FSCK_ERR_fs_usage_cached_wrong, \
112 BCH_FSCK_ERR_fs_usage_reserved_wrong, \
113 BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \
114 BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \
115 BCH_FSCK_ERR_fs_usage_replicas_wrong, \
116 BCH_FSCK_ERR_accounting_replicas_not_marked, \
117 BCH_FSCK_ERR_bkey_version_in_future) \
118 x(rebalance_work_acct_fix, \
119 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \
120 BCH_FSCK_ERR_accounting_mismatch)
121
122 struct upgrade_downgrade_entry {
123 u64 recovery_passes;
124 u16 version;
125 u16 nr_errors;
126 const u16 *errors;
127 };
128
129 #define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ };
130 UPGRADE_TABLE()
131 #undef x
132
133 static const struct upgrade_downgrade_entry upgrade_table[] = {
134 #define x(ver, passes, ...) { \
135 .recovery_passes = passes, \
136 .version = bcachefs_metadata_version_##ver,\
137 .nr_errors = ARRAY_SIZE(upgrade_##ver##_errors), \
138 .errors = upgrade_##ver##_errors, \
139 },
140 UPGRADE_TABLE()
141 #undef x
142 };
143
have_stripes(struct bch_fs * c)144 static int have_stripes(struct bch_fs *c)
145 {
146 if (IS_ERR_OR_NULL(c->btree_roots_known[BTREE_ID_stripes].b))
147 return 0;
148
149 return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b);
150 }
151
bch2_sb_set_upgrade_extra(struct bch_fs * c)152 int bch2_sb_set_upgrade_extra(struct bch_fs *c)
153 {
154 unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version;
155 unsigned new_version = c->sb.version;
156 bool write_sb = false;
157 int ret = 0;
158
159 mutex_lock(&c->sb_lock);
160 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
161
162 if (old_version < bcachefs_metadata_version_bucket_stripe_sectors &&
163 new_version >= bcachefs_metadata_version_bucket_stripe_sectors &&
164 (ret = have_stripes(c) > 0)) {
165 __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required);
166 __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent);
167 __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_sectors_wrong, ext->errors_silent);
168 write_sb = true;
169 }
170
171 if (write_sb)
172 bch2_write_super(c);
173 mutex_unlock(&c->sb_lock);
174
175 return ret < 0 ? ret : 0;
176 }
177
bch2_sb_set_upgrade(struct bch_fs * c,unsigned old_version,unsigned new_version)178 void bch2_sb_set_upgrade(struct bch_fs *c,
179 unsigned old_version,
180 unsigned new_version)
181 {
182 lockdep_assert_held(&c->sb_lock);
183
184 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
185
186 for (const struct upgrade_downgrade_entry *i = upgrade_table;
187 i < upgrade_table + ARRAY_SIZE(upgrade_table);
188 i++)
189 if (i->version > old_version && i->version <= new_version) {
190 u64 passes = i->recovery_passes;
191
192 if (passes & RECOVERY_PASS_ALL_FSCK)
193 passes |= bch2_fsck_recovery_passes();
194 passes &= ~RECOVERY_PASS_ALL_FSCK;
195
196 ext->recovery_passes_required[0] |=
197 cpu_to_le64(bch2_recovery_passes_to_stable(passes));
198
199 for (const u16 *e = i->errors; e < i->errors + i->nr_errors; e++)
200 __set_bit_le64(*e, ext->errors_silent);
201 }
202 }
203
204 #define x(ver, passes, ...) static const u16 downgrade_##ver##_errors[] = { __VA_ARGS__ };
205 DOWNGRADE_TABLE()
206 #undef x
207
208 static const struct upgrade_downgrade_entry downgrade_table[] = {
209 #define x(ver, passes, ...) { \
210 .recovery_passes = passes, \
211 .version = bcachefs_metadata_version_##ver,\
212 .nr_errors = ARRAY_SIZE(downgrade_##ver##_errors), \
213 .errors = downgrade_##ver##_errors, \
214 },
215 DOWNGRADE_TABLE()
216 #undef x
217 };
218
downgrade_table_extra(struct bch_fs * c,darray_char * table)219 static int downgrade_table_extra(struct bch_fs *c, darray_char *table)
220 {
221 struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table);
222 unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors);
223 int ret = 0;
224
225 unsigned nr_errors = le16_to_cpu(dst->nr_errors);
226
227 switch (le16_to_cpu(dst->version)) {
228 case bcachefs_metadata_version_bucket_stripe_sectors:
229 if (have_stripes(c)) {
230 bytes += sizeof(dst->errors[0]) * 2;
231
232 ret = darray_make_room(table, bytes);
233 if (ret)
234 return ret;
235
236 /* open coded __set_bit_le64, as dst is packed and
237 * dst->recovery_passes is misaligned */
238 unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations;
239 dst->recovery_passes[b / 64] |= cpu_to_le64(BIT_ULL(b % 64));
240
241 dst->errors[nr_errors++] = cpu_to_le16(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong);
242 }
243 break;
244 }
245
246 dst->nr_errors = cpu_to_le16(nr_errors);
247 return ret;
248 }
249
250 static inline const struct bch_sb_field_downgrade_entry *
downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry * e)251 downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e)
252 {
253 return (void *) &e->errors[le16_to_cpu(e->nr_errors)];
254 }
255
256 #define for_each_downgrade_entry(_d, _i) \
257 for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \
258 (void *) _i < vstruct_end(&(_d)->field) && \
259 (void *) &_i->errors[0] <= vstruct_end(&(_d)->field) && \
260 (void *) downgrade_entry_next_c(_i) <= vstruct_end(&(_d)->field); \
261 _i = downgrade_entry_next_c(_i))
262
bch2_sb_downgrade_validate(struct bch_sb * sb,struct bch_sb_field * f,enum bch_validate_flags flags,struct printbuf * err)263 static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f,
264 enum bch_validate_flags flags, struct printbuf *err)
265 {
266 struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
267
268 for (const struct bch_sb_field_downgrade_entry *i = e->entries;
269 (void *) i < vstruct_end(&e->field);
270 i = downgrade_entry_next_c(i)) {
271 /*
272 * Careful: sb_field_downgrade_entry is only 2 byte aligned, but
273 * section sizes are 8 byte aligned - an empty entry spanning
274 * the end of the section is allowed (and ignored):
275 */
276 if ((void *) &i->errors[0] > vstruct_end(&e->field))
277 break;
278
279 if (flags & BCH_VALIDATE_write &&
280 (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field)) {
281 prt_printf(err, "downgrade entry overruns end of superblock section");
282 return -BCH_ERR_invalid_sb_downgrade;
283 }
284
285 if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) !=
286 BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) {
287 prt_printf(err, "downgrade entry with mismatched major version (%u != %u)",
288 BCH_VERSION_MAJOR(le16_to_cpu(i->version)),
289 BCH_VERSION_MAJOR(le16_to_cpu(sb->version)));
290 return -BCH_ERR_invalid_sb_downgrade;
291 }
292 }
293
294 return 0;
295 }
296
bch2_sb_downgrade_to_text(struct printbuf * out,struct bch_sb * sb,struct bch_sb_field * f)297 static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb,
298 struct bch_sb_field *f)
299 {
300 struct bch_sb_field_downgrade *e = field_to_type(f, downgrade);
301
302 if (out->nr_tabstops <= 1)
303 printbuf_tabstop_push(out, 16);
304
305 for_each_downgrade_entry(e, i) {
306 prt_str(out, "version:\t");
307 bch2_version_to_text(out, le16_to_cpu(i->version));
308 prt_newline(out);
309
310 prt_str(out, "recovery passes:\t");
311 prt_bitflags(out, bch2_recovery_passes,
312 bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0])));
313 prt_newline(out);
314
315 prt_str(out, "errors:\t");
316 bool first = true;
317 for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
318 if (!first)
319 prt_char(out, ',');
320 first = false;
321 bch2_sb_error_id_to_text(out, le16_to_cpu(i->errors[j]));
322 }
323 prt_newline(out);
324 }
325 }
326
327 const struct bch_sb_field_ops bch_sb_field_ops_downgrade = {
328 .validate = bch2_sb_downgrade_validate,
329 .to_text = bch2_sb_downgrade_to_text,
330 };
331
bch2_sb_downgrade_update(struct bch_fs * c)332 int bch2_sb_downgrade_update(struct bch_fs *c)
333 {
334 if (!test_bit(BCH_FS_btree_running, &c->flags))
335 return 0;
336
337 darray_char table = {};
338 int ret = 0;
339
340 for (const struct upgrade_downgrade_entry *src = downgrade_table;
341 src < downgrade_table + ARRAY_SIZE(downgrade_table);
342 src++) {
343 if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version)))
344 continue;
345
346 struct bch_sb_field_downgrade_entry *dst;
347 unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors;
348
349 ret = darray_make_room(&table, bytes);
350 if (ret)
351 goto out;
352
353 dst = (void *) &darray_top(table);
354 dst->version = cpu_to_le16(src->version);
355 dst->recovery_passes[0] = cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes));
356 dst->recovery_passes[1] = 0;
357 dst->nr_errors = cpu_to_le16(src->nr_errors);
358 for (unsigned i = 0; i < src->nr_errors; i++)
359 dst->errors[i] = cpu_to_le16(src->errors[i]);
360
361 ret = downgrade_table_extra(c, &table);
362 if (ret)
363 goto out;
364
365 if (!dst->recovery_passes[0] &&
366 !dst->recovery_passes[1] &&
367 !dst->nr_errors)
368 continue;
369
370 table.nr += sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors);
371 }
372
373 struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
374
375 unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64));
376
377 if (d && le32_to_cpu(d->field.u64s) > sb_u64s)
378 goto out;
379
380 d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s);
381 if (!d) {
382 ret = -BCH_ERR_ENOSPC_sb_downgrade;
383 goto out;
384 }
385
386 memcpy(d->entries, table.data, table.nr);
387 memset_u64s_tail(d->entries, 0, table.nr);
388 out:
389 darray_exit(&table);
390 return ret;
391 }
392
bch2_sb_set_downgrade(struct bch_fs * c,unsigned new_minor,unsigned old_minor)393 void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor)
394 {
395 struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade);
396 if (!d)
397 return;
398
399 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext);
400
401 for_each_downgrade_entry(d, i) {
402 unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version));
403 if (new_minor < minor && minor <= old_minor) {
404 ext->recovery_passes_required[0] |= i->recovery_passes[0];
405 ext->recovery_passes_required[1] |= i->recovery_passes[1];
406
407 for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) {
408 unsigned e = le16_to_cpu(i->errors[j]);
409 if (e < BCH_FSCK_ERR_MAX)
410 __set_bit(e, c->sb.errors_silent);
411 if (e < sizeof(ext->errors_silent) * 8)
412 __set_bit_le64(e, ext->errors_silent);
413 }
414 }
415 }
416 }
417