1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Superblock section that contains a list of recovery passes to run when 5 * downgrading past a given version 6 */ 7 8 #include "bcachefs.h" 9 #include "darray.h" 10 #include "recovery_passes.h" 11 #include "sb-downgrade.h" 12 #include "sb-errors.h" 13 #include "super-io.h" 14 15 #define RECOVERY_PASS_ALL_FSCK BIT_ULL(63) 16 17 /* 18 * Upgrade, downgrade tables - run certain recovery passes, fix certain errors 19 * 20 * x(version, recovery_passes, errors...) 21 */ 22 #define UPGRADE_TABLE() \ 23 x(backpointers, \ 24 RECOVERY_PASS_ALL_FSCK) \ 25 x(inode_v3, \ 26 RECOVERY_PASS_ALL_FSCK) \ 27 x(unwritten_extents, \ 28 RECOVERY_PASS_ALL_FSCK) \ 29 x(bucket_gens, \ 30 BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \ 31 RECOVERY_PASS_ALL_FSCK) \ 32 x(lru_v2, \ 33 RECOVERY_PASS_ALL_FSCK) \ 34 x(fragmentation_lru, \ 35 RECOVERY_PASS_ALL_FSCK) \ 36 x(no_bps_in_alloc_keys, \ 37 RECOVERY_PASS_ALL_FSCK) \ 38 x(snapshot_trees, \ 39 RECOVERY_PASS_ALL_FSCK) \ 40 x(snapshot_skiplists, \ 41 BIT_ULL(BCH_RECOVERY_PASS_check_snapshots), \ 42 BCH_FSCK_ERR_snapshot_bad_depth, \ 43 BCH_FSCK_ERR_snapshot_bad_skiplist) \ 44 x(deleted_inodes, \ 45 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ 46 BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \ 47 x(rebalance_work, \ 48 BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \ 49 x(subvolume_fs_parent, \ 50 BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \ 51 BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \ 52 x(btree_subvolume_children, \ 53 BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \ 54 BCH_FSCK_ERR_subvol_children_not_set) \ 55 x(mi_btree_bitmap, \ 56 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 57 BCH_FSCK_ERR_btree_bitmap_not_marked) \ 58 x(disk_accounting_v2, \ 59 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 60 BCH_FSCK_ERR_bkey_version_in_future, \ 61 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 62 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 63 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 64 BCH_FSCK_ERR_accounting_mismatch) \ 65 x(disk_accounting_v3, \ 66 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 67 BCH_FSCK_ERR_bkey_version_in_future, \ 68 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 69 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 70 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 71 BCH_FSCK_ERR_accounting_mismatch, \ 72 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ 73 BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ 74 BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ 75 BCH_FSCK_ERR_accounting_key_junk_at_end) \ 76 x(disk_accounting_inum, \ 77 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 78 BCH_FSCK_ERR_accounting_mismatch) \ 79 x(rebalance_work_acct_fix, \ 80 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 81 BCH_FSCK_ERR_accounting_mismatch) \ 82 x(inode_has_child_snapshots, \ 83 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ 84 BCH_FSCK_ERR_inode_has_child_snapshots_wrong) 85 86 #define DOWNGRADE_TABLE() \ 87 x(bucket_stripe_sectors, \ 88 0) \ 89 x(disk_accounting_v2, \ 90 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 91 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 92 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 93 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 94 BCH_FSCK_ERR_fs_usage_hidden_wrong, \ 95 BCH_FSCK_ERR_fs_usage_btree_wrong, \ 96 BCH_FSCK_ERR_fs_usage_data_wrong, \ 97 BCH_FSCK_ERR_fs_usage_cached_wrong, \ 98 BCH_FSCK_ERR_fs_usage_reserved_wrong, \ 99 BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ 100 BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ 101 BCH_FSCK_ERR_fs_usage_replicas_wrong, \ 102 BCH_FSCK_ERR_bkey_version_in_future) \ 103 x(disk_accounting_v3, \ 104 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 105 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 106 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 107 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 108 BCH_FSCK_ERR_fs_usage_hidden_wrong, \ 109 BCH_FSCK_ERR_fs_usage_btree_wrong, \ 110 BCH_FSCK_ERR_fs_usage_data_wrong, \ 111 BCH_FSCK_ERR_fs_usage_cached_wrong, \ 112 BCH_FSCK_ERR_fs_usage_reserved_wrong, \ 113 BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ 114 BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ 115 BCH_FSCK_ERR_fs_usage_replicas_wrong, \ 116 BCH_FSCK_ERR_accounting_replicas_not_marked, \ 117 BCH_FSCK_ERR_bkey_version_in_future) \ 118 x(rebalance_work_acct_fix, \ 119 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 120 BCH_FSCK_ERR_accounting_mismatch) 121 122 struct upgrade_downgrade_entry { 123 u64 recovery_passes; 124 u16 version; 125 u16 nr_errors; 126 const u16 *errors; 127 }; 128 129 #define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ }; 130 UPGRADE_TABLE() 131 #undef x 132 133 static const struct upgrade_downgrade_entry upgrade_table[] = { 134 #define x(ver, passes, ...) { \ 135 .recovery_passes = passes, \ 136 .version = bcachefs_metadata_version_##ver,\ 137 .nr_errors = ARRAY_SIZE(upgrade_##ver##_errors), \ 138 .errors = upgrade_##ver##_errors, \ 139 }, 140 UPGRADE_TABLE() 141 #undef x 142 }; 143 144 static int have_stripes(struct bch_fs *c) 145 { 146 if (IS_ERR_OR_NULL(c->btree_roots_known[BTREE_ID_stripes].b)) 147 return 0; 148 149 return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b); 150 } 151 152 int bch2_sb_set_upgrade_extra(struct bch_fs *c) 153 { 154 unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; 155 unsigned new_version = c->sb.version; 156 bool write_sb = false; 157 int ret = 0; 158 159 mutex_lock(&c->sb_lock); 160 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 161 162 if (old_version < bcachefs_metadata_version_bucket_stripe_sectors && 163 new_version >= bcachefs_metadata_version_bucket_stripe_sectors && 164 (ret = have_stripes(c) > 0)) { 165 __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required); 166 __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); 167 __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_sectors_wrong, ext->errors_silent); 168 write_sb = true; 169 } 170 171 if (write_sb) 172 bch2_write_super(c); 173 mutex_unlock(&c->sb_lock); 174 175 return ret < 0 ? ret : 0; 176 } 177 178 void bch2_sb_set_upgrade(struct bch_fs *c, 179 unsigned old_version, 180 unsigned new_version) 181 { 182 lockdep_assert_held(&c->sb_lock); 183 184 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 185 186 for (const struct upgrade_downgrade_entry *i = upgrade_table; 187 i < upgrade_table + ARRAY_SIZE(upgrade_table); 188 i++) 189 if (i->version > old_version && i->version <= new_version) { 190 u64 passes = i->recovery_passes; 191 192 if (passes & RECOVERY_PASS_ALL_FSCK) 193 passes |= bch2_fsck_recovery_passes(); 194 passes &= ~RECOVERY_PASS_ALL_FSCK; 195 196 ext->recovery_passes_required[0] |= 197 cpu_to_le64(bch2_recovery_passes_to_stable(passes)); 198 199 for (const u16 *e = i->errors; e < i->errors + i->nr_errors; e++) 200 __set_bit_le64(*e, ext->errors_silent); 201 } 202 } 203 204 #define x(ver, passes, ...) static const u16 downgrade_##ver##_errors[] = { __VA_ARGS__ }; 205 DOWNGRADE_TABLE() 206 #undef x 207 208 static const struct upgrade_downgrade_entry downgrade_table[] = { 209 #define x(ver, passes, ...) { \ 210 .recovery_passes = passes, \ 211 .version = bcachefs_metadata_version_##ver,\ 212 .nr_errors = ARRAY_SIZE(downgrade_##ver##_errors), \ 213 .errors = downgrade_##ver##_errors, \ 214 }, 215 DOWNGRADE_TABLE() 216 #undef x 217 }; 218 219 static int downgrade_table_extra(struct bch_fs *c, darray_char *table) 220 { 221 struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table); 222 unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); 223 int ret = 0; 224 225 unsigned nr_errors = le16_to_cpu(dst->nr_errors); 226 227 switch (le16_to_cpu(dst->version)) { 228 case bcachefs_metadata_version_bucket_stripe_sectors: 229 if (have_stripes(c)) { 230 bytes += sizeof(dst->errors[0]) * 2; 231 232 ret = darray_make_room(table, bytes); 233 if (ret) 234 return ret; 235 236 /* open coded __set_bit_le64, as dst is packed and 237 * dst->recovery_passes is misaligned */ 238 unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations; 239 dst->recovery_passes[b / 64] |= cpu_to_le64(BIT_ULL(b % 64)); 240 241 dst->errors[nr_errors++] = cpu_to_le16(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong); 242 } 243 break; 244 } 245 246 dst->nr_errors = cpu_to_le16(nr_errors); 247 return ret; 248 } 249 250 static inline const struct bch_sb_field_downgrade_entry * 251 downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e) 252 { 253 return (void *) &e->errors[le16_to_cpu(e->nr_errors)]; 254 } 255 256 #define for_each_downgrade_entry(_d, _i) \ 257 for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \ 258 (void *) _i < vstruct_end(&(_d)->field) && \ 259 (void *) &_i->errors[0] <= vstruct_end(&(_d)->field) && \ 260 (void *) downgrade_entry_next_c(_i) <= vstruct_end(&(_d)->field); \ 261 _i = downgrade_entry_next_c(_i)) 262 263 static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f, 264 enum bch_validate_flags flags, struct printbuf *err) 265 { 266 struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); 267 268 for (const struct bch_sb_field_downgrade_entry *i = e->entries; 269 (void *) i < vstruct_end(&e->field); 270 i = downgrade_entry_next_c(i)) { 271 /* 272 * Careful: sb_field_downgrade_entry is only 2 byte aligned, but 273 * section sizes are 8 byte aligned - an empty entry spanning 274 * the end of the section is allowed (and ignored): 275 */ 276 if ((void *) &i->errors[0] > vstruct_end(&e->field)) 277 break; 278 279 if (flags & BCH_VALIDATE_write && 280 (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field)) { 281 prt_printf(err, "downgrade entry overruns end of superblock section"); 282 return -BCH_ERR_invalid_sb_downgrade; 283 } 284 285 if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) != 286 BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) { 287 prt_printf(err, "downgrade entry with mismatched major version (%u != %u)", 288 BCH_VERSION_MAJOR(le16_to_cpu(i->version)), 289 BCH_VERSION_MAJOR(le16_to_cpu(sb->version))); 290 return -BCH_ERR_invalid_sb_downgrade; 291 } 292 } 293 294 return 0; 295 } 296 297 static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb, 298 struct bch_sb_field *f) 299 { 300 struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); 301 302 if (out->nr_tabstops <= 1) 303 printbuf_tabstop_push(out, 16); 304 305 for_each_downgrade_entry(e, i) { 306 prt_str(out, "version:\t"); 307 bch2_version_to_text(out, le16_to_cpu(i->version)); 308 prt_newline(out); 309 310 prt_str(out, "recovery passes:\t"); 311 prt_bitflags(out, bch2_recovery_passes, 312 bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0]))); 313 prt_newline(out); 314 315 prt_str(out, "errors:\t"); 316 bool first = true; 317 for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { 318 if (!first) 319 prt_char(out, ','); 320 first = false; 321 bch2_sb_error_id_to_text(out, le16_to_cpu(i->errors[j])); 322 } 323 prt_newline(out); 324 } 325 } 326 327 const struct bch_sb_field_ops bch_sb_field_ops_downgrade = { 328 .validate = bch2_sb_downgrade_validate, 329 .to_text = bch2_sb_downgrade_to_text, 330 }; 331 332 int bch2_sb_downgrade_update(struct bch_fs *c) 333 { 334 if (!test_bit(BCH_FS_btree_running, &c->flags)) 335 return 0; 336 337 darray_char table = {}; 338 int ret = 0; 339 340 for (const struct upgrade_downgrade_entry *src = downgrade_table; 341 src < downgrade_table + ARRAY_SIZE(downgrade_table); 342 src++) { 343 if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) 344 continue; 345 346 struct bch_sb_field_downgrade_entry *dst; 347 unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors; 348 349 ret = darray_make_room(&table, bytes); 350 if (ret) 351 goto out; 352 353 dst = (void *) &darray_top(table); 354 dst->version = cpu_to_le16(src->version); 355 dst->recovery_passes[0] = cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes)); 356 dst->recovery_passes[1] = 0; 357 dst->nr_errors = cpu_to_le16(src->nr_errors); 358 for (unsigned i = 0; i < src->nr_errors; i++) 359 dst->errors[i] = cpu_to_le16(src->errors[i]); 360 361 ret = downgrade_table_extra(c, &table); 362 if (ret) 363 goto out; 364 365 if (!dst->recovery_passes[0] && 366 !dst->recovery_passes[1] && 367 !dst->nr_errors) 368 continue; 369 370 table.nr += sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); 371 } 372 373 struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); 374 375 unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64)); 376 377 if (d && le32_to_cpu(d->field.u64s) > sb_u64s) 378 goto out; 379 380 d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s); 381 if (!d) { 382 ret = -BCH_ERR_ENOSPC_sb_downgrade; 383 goto out; 384 } 385 386 memcpy(d->entries, table.data, table.nr); 387 memset_u64s_tail(d->entries, 0, table.nr); 388 out: 389 darray_exit(&table); 390 return ret; 391 } 392 393 void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor) 394 { 395 struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); 396 if (!d) 397 return; 398 399 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 400 401 for_each_downgrade_entry(d, i) { 402 unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version)); 403 if (new_minor < minor && minor <= old_minor) { 404 ext->recovery_passes_required[0] |= i->recovery_passes[0]; 405 ext->recovery_passes_required[1] |= i->recovery_passes[1]; 406 407 for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { 408 unsigned e = le16_to_cpu(i->errors[j]); 409 if (e < BCH_FSCK_ERR_MAX) 410 __set_bit(e, c->sb.errors_silent); 411 if (e < sizeof(ext->errors_silent) * 8) 412 __set_bit_le64(e, ext->errors_silent); 413 } 414 } 415 } 416 } 417