1 // SPDX-License-Identifier: GPL-2.0 2 3 /* 4 * Superblock section that contains a list of recovery passes to run when 5 * downgrading past a given version 6 */ 7 8 #include "bcachefs.h" 9 #include "darray.h" 10 #include "recovery_passes.h" 11 #include "sb-downgrade.h" 12 #include "sb-errors.h" 13 #include "super-io.h" 14 15 #define RECOVERY_PASS_ALL_FSCK BIT_ULL(63) 16 17 /* 18 * Upgrade, downgrade tables - run certain recovery passes, fix certain errors 19 * 20 * x(version, recovery_passes, errors...) 21 */ 22 #define UPGRADE_TABLE() \ 23 x(snapshot_2, \ 24 RECOVERY_PASS_ALL_FSCK, \ 25 BCH_FSCK_ERR_subvol_root_wrong_bi_subvol, \ 26 BCH_FSCK_ERR_subvol_not_master_and_not_snapshot) \ 27 x(backpointers, \ 28 RECOVERY_PASS_ALL_FSCK) \ 29 x(inode_v3, \ 30 RECOVERY_PASS_ALL_FSCK) \ 31 x(unwritten_extents, \ 32 RECOVERY_PASS_ALL_FSCK) \ 33 x(bucket_gens, \ 34 BIT_ULL(BCH_RECOVERY_PASS_bucket_gens_init)| \ 35 RECOVERY_PASS_ALL_FSCK) \ 36 x(lru_v2, \ 37 RECOVERY_PASS_ALL_FSCK) \ 38 x(fragmentation_lru, \ 39 RECOVERY_PASS_ALL_FSCK) \ 40 x(no_bps_in_alloc_keys, \ 41 RECOVERY_PASS_ALL_FSCK) \ 42 x(snapshot_trees, \ 43 RECOVERY_PASS_ALL_FSCK) \ 44 x(snapshot_skiplists, \ 45 BIT_ULL(BCH_RECOVERY_PASS_check_snapshots), \ 46 BCH_FSCK_ERR_snapshot_bad_depth, \ 47 BCH_FSCK_ERR_snapshot_bad_skiplist) \ 48 x(deleted_inodes, \ 49 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ 50 BCH_FSCK_ERR_unlinked_inode_not_on_deleted_list) \ 51 x(rebalance_work, \ 52 BIT_ULL(BCH_RECOVERY_PASS_set_fs_needs_rebalance)) \ 53 x(subvolume_fs_parent, \ 54 BIT_ULL(BCH_RECOVERY_PASS_check_dirents), \ 55 BCH_FSCK_ERR_subvol_fs_path_parent_wrong) \ 56 x(btree_subvolume_children, \ 57 BIT_ULL(BCH_RECOVERY_PASS_check_subvols), \ 58 BCH_FSCK_ERR_subvol_children_not_set) \ 59 x(mi_btree_bitmap, \ 60 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 61 BCH_FSCK_ERR_btree_bitmap_not_marked) \ 62 x(disk_accounting_v2, \ 63 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 64 BCH_FSCK_ERR_bkey_version_in_future, \ 65 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 66 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 67 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 68 BCH_FSCK_ERR_accounting_mismatch) \ 69 x(disk_accounting_v3, \ 70 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 71 BCH_FSCK_ERR_bkey_version_in_future, \ 72 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 73 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 74 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 75 BCH_FSCK_ERR_accounting_mismatch, \ 76 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ 77 BCH_FSCK_ERR_accounting_key_replicas_nr_required_bad, \ 78 BCH_FSCK_ERR_accounting_key_replicas_devs_unsorted, \ 79 BCH_FSCK_ERR_accounting_key_junk_at_end) \ 80 x(disk_accounting_inum, \ 81 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 82 BCH_FSCK_ERR_accounting_mismatch) \ 83 x(rebalance_work_acct_fix, \ 84 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 85 BCH_FSCK_ERR_accounting_mismatch) \ 86 x(inode_has_child_snapshots, \ 87 BIT_ULL(BCH_RECOVERY_PASS_check_inodes), \ 88 BCH_FSCK_ERR_inode_has_child_snapshots_wrong) \ 89 x(backpointer_bucket_gen, \ 90 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ 91 BCH_FSCK_ERR_backpointer_to_missing_ptr, \ 92 BCH_FSCK_ERR_ptr_to_missing_backpointer) \ 93 x(disk_accounting_big_endian, \ 94 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 95 BCH_FSCK_ERR_accounting_mismatch, \ 96 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ 97 BCH_FSCK_ERR_accounting_key_junk_at_end) \ 98 x(cached_backpointers, \ 99 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ 100 BCH_FSCK_ERR_ptr_to_missing_backpointer) \ 101 x(stripe_backpointers, \ 102 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ 103 BCH_FSCK_ERR_ptr_to_missing_backpointer) 104 105 #define DOWNGRADE_TABLE() \ 106 x(bucket_stripe_sectors, \ 107 0) \ 108 x(disk_accounting_v2, \ 109 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 110 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 111 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 112 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 113 BCH_FSCK_ERR_fs_usage_hidden_wrong, \ 114 BCH_FSCK_ERR_fs_usage_btree_wrong, \ 115 BCH_FSCK_ERR_fs_usage_data_wrong, \ 116 BCH_FSCK_ERR_fs_usage_cached_wrong, \ 117 BCH_FSCK_ERR_fs_usage_reserved_wrong, \ 118 BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ 119 BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ 120 BCH_FSCK_ERR_fs_usage_replicas_wrong, \ 121 BCH_FSCK_ERR_bkey_version_in_future) \ 122 x(disk_accounting_v3, \ 123 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 124 BCH_FSCK_ERR_dev_usage_buckets_wrong, \ 125 BCH_FSCK_ERR_dev_usage_sectors_wrong, \ 126 BCH_FSCK_ERR_dev_usage_fragmented_wrong, \ 127 BCH_FSCK_ERR_fs_usage_hidden_wrong, \ 128 BCH_FSCK_ERR_fs_usage_btree_wrong, \ 129 BCH_FSCK_ERR_fs_usage_data_wrong, \ 130 BCH_FSCK_ERR_fs_usage_cached_wrong, \ 131 BCH_FSCK_ERR_fs_usage_reserved_wrong, \ 132 BCH_FSCK_ERR_fs_usage_nr_inodes_wrong, \ 133 BCH_FSCK_ERR_fs_usage_persistent_reserved_wrong, \ 134 BCH_FSCK_ERR_fs_usage_replicas_wrong, \ 135 BCH_FSCK_ERR_accounting_replicas_not_marked, \ 136 BCH_FSCK_ERR_bkey_version_in_future) \ 137 x(rebalance_work_acct_fix, \ 138 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 139 BCH_FSCK_ERR_accounting_mismatch, \ 140 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ 141 BCH_FSCK_ERR_accounting_key_junk_at_end) \ 142 x(backpointer_bucket_gen, \ 143 BIT_ULL(BCH_RECOVERY_PASS_check_extents_to_backpointers),\ 144 BCH_FSCK_ERR_backpointer_bucket_offset_wrong, \ 145 BCH_FSCK_ERR_backpointer_to_missing_ptr, \ 146 BCH_FSCK_ERR_ptr_to_missing_backpointer) \ 147 x(disk_accounting_big_endian, \ 148 BIT_ULL(BCH_RECOVERY_PASS_check_allocations), \ 149 BCH_FSCK_ERR_accounting_mismatch, \ 150 BCH_FSCK_ERR_accounting_key_replicas_nr_devs_0, \ 151 BCH_FSCK_ERR_accounting_key_junk_at_end) 152 153 struct upgrade_downgrade_entry { 154 u64 recovery_passes; 155 u16 version; 156 u16 nr_errors; 157 const u16 *errors; 158 }; 159 160 #define x(ver, passes, ...) static const u16 upgrade_##ver##_errors[] = { __VA_ARGS__ }; 161 UPGRADE_TABLE() 162 #undef x 163 164 static const struct upgrade_downgrade_entry upgrade_table[] = { 165 #define x(ver, passes, ...) { \ 166 .recovery_passes = passes, \ 167 .version = bcachefs_metadata_version_##ver,\ 168 .nr_errors = ARRAY_SIZE(upgrade_##ver##_errors), \ 169 .errors = upgrade_##ver##_errors, \ 170 }, 171 UPGRADE_TABLE() 172 #undef x 173 }; 174 175 static int have_stripes(struct bch_fs *c) 176 { 177 if (IS_ERR_OR_NULL(c->btree_roots_known[BTREE_ID_stripes].b)) 178 return 0; 179 180 return !btree_node_fake(c->btree_roots_known[BTREE_ID_stripes].b); 181 } 182 183 int bch2_sb_set_upgrade_extra(struct bch_fs *c) 184 { 185 unsigned old_version = c->sb.version_upgrade_complete ?: c->sb.version; 186 unsigned new_version = c->sb.version; 187 bool write_sb = false; 188 int ret = 0; 189 190 mutex_lock(&c->sb_lock); 191 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 192 193 if (old_version < bcachefs_metadata_version_bucket_stripe_sectors && 194 new_version >= bcachefs_metadata_version_bucket_stripe_sectors && 195 (ret = have_stripes(c) > 0)) { 196 __set_bit_le64(BCH_RECOVERY_PASS_STABLE_check_allocations, ext->recovery_passes_required); 197 __set_bit_le64(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong, ext->errors_silent); 198 __set_bit_le64(BCH_FSCK_ERR_alloc_key_stripe_sectors_wrong, ext->errors_silent); 199 write_sb = true; 200 } 201 202 if (write_sb) 203 bch2_write_super(c); 204 mutex_unlock(&c->sb_lock); 205 206 return ret < 0 ? ret : 0; 207 } 208 209 void bch2_sb_set_upgrade(struct bch_fs *c, 210 unsigned old_version, 211 unsigned new_version) 212 { 213 lockdep_assert_held(&c->sb_lock); 214 215 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 216 217 for (const struct upgrade_downgrade_entry *i = upgrade_table; 218 i < upgrade_table + ARRAY_SIZE(upgrade_table); 219 i++) 220 if (i->version > old_version && i->version <= new_version) { 221 u64 passes = i->recovery_passes; 222 223 if (passes & RECOVERY_PASS_ALL_FSCK) 224 passes |= bch2_fsck_recovery_passes(); 225 passes &= ~RECOVERY_PASS_ALL_FSCK; 226 227 ext->recovery_passes_required[0] |= 228 cpu_to_le64(bch2_recovery_passes_to_stable(passes)); 229 230 for (const u16 *e = i->errors; e < i->errors + i->nr_errors; e++) 231 __set_bit_le64(*e, ext->errors_silent); 232 } 233 } 234 235 #define x(ver, passes, ...) static const u16 downgrade_##ver##_errors[] = { __VA_ARGS__ }; 236 DOWNGRADE_TABLE() 237 #undef x 238 239 static const struct upgrade_downgrade_entry downgrade_table[] = { 240 #define x(ver, passes, ...) { \ 241 .recovery_passes = passes, \ 242 .version = bcachefs_metadata_version_##ver,\ 243 .nr_errors = ARRAY_SIZE(downgrade_##ver##_errors), \ 244 .errors = downgrade_##ver##_errors, \ 245 }, 246 DOWNGRADE_TABLE() 247 #undef x 248 }; 249 250 static int downgrade_table_extra(struct bch_fs *c, darray_char *table) 251 { 252 struct bch_sb_field_downgrade_entry *dst = (void *) &darray_top(*table); 253 unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); 254 int ret = 0; 255 256 unsigned nr_errors = le16_to_cpu(dst->nr_errors); 257 258 switch (le16_to_cpu(dst->version)) { 259 case bcachefs_metadata_version_bucket_stripe_sectors: 260 if (have_stripes(c)) { 261 bytes += sizeof(dst->errors[0]) * 2; 262 263 ret = darray_make_room(table, bytes); 264 if (ret) 265 return ret; 266 267 /* open coded __set_bit_le64, as dst is packed and 268 * dst->recovery_passes is misaligned */ 269 unsigned b = BCH_RECOVERY_PASS_STABLE_check_allocations; 270 dst->recovery_passes[b / 64] |= cpu_to_le64(BIT_ULL(b % 64)); 271 272 dst->errors[nr_errors++] = cpu_to_le16(BCH_FSCK_ERR_alloc_key_dirty_sectors_wrong); 273 } 274 break; 275 } 276 277 dst->nr_errors = cpu_to_le16(nr_errors); 278 return ret; 279 } 280 281 static inline const struct bch_sb_field_downgrade_entry * 282 downgrade_entry_next_c(const struct bch_sb_field_downgrade_entry *e) 283 { 284 return (void *) &e->errors[le16_to_cpu(e->nr_errors)]; 285 } 286 287 #define for_each_downgrade_entry(_d, _i) \ 288 for (const struct bch_sb_field_downgrade_entry *_i = (_d)->entries; \ 289 (void *) _i < vstruct_end(&(_d)->field) && \ 290 (void *) &_i->errors[0] <= vstruct_end(&(_d)->field) && \ 291 (void *) downgrade_entry_next_c(_i) <= vstruct_end(&(_d)->field); \ 292 _i = downgrade_entry_next_c(_i)) 293 294 static int bch2_sb_downgrade_validate(struct bch_sb *sb, struct bch_sb_field *f, 295 enum bch_validate_flags flags, struct printbuf *err) 296 { 297 struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); 298 299 for (const struct bch_sb_field_downgrade_entry *i = e->entries; 300 (void *) i < vstruct_end(&e->field); 301 i = downgrade_entry_next_c(i)) { 302 /* 303 * Careful: sb_field_downgrade_entry is only 2 byte aligned, but 304 * section sizes are 8 byte aligned - an empty entry spanning 305 * the end of the section is allowed (and ignored): 306 */ 307 if ((void *) &i->errors[0] > vstruct_end(&e->field)) 308 break; 309 310 if (flags & BCH_VALIDATE_write && 311 (void *) downgrade_entry_next_c(i) > vstruct_end(&e->field)) { 312 prt_printf(err, "downgrade entry overruns end of superblock section"); 313 return -BCH_ERR_invalid_sb_downgrade; 314 } 315 316 if (BCH_VERSION_MAJOR(le16_to_cpu(i->version)) != 317 BCH_VERSION_MAJOR(le16_to_cpu(sb->version))) { 318 prt_printf(err, "downgrade entry with mismatched major version (%u != %u)", 319 BCH_VERSION_MAJOR(le16_to_cpu(i->version)), 320 BCH_VERSION_MAJOR(le16_to_cpu(sb->version))); 321 return -BCH_ERR_invalid_sb_downgrade; 322 } 323 } 324 325 return 0; 326 } 327 328 static void bch2_sb_downgrade_to_text(struct printbuf *out, struct bch_sb *sb, 329 struct bch_sb_field *f) 330 { 331 struct bch_sb_field_downgrade *e = field_to_type(f, downgrade); 332 333 if (out->nr_tabstops <= 1) 334 printbuf_tabstop_push(out, 16); 335 336 for_each_downgrade_entry(e, i) { 337 prt_str(out, "version:\t"); 338 bch2_version_to_text(out, le16_to_cpu(i->version)); 339 prt_newline(out); 340 341 prt_str(out, "recovery passes:\t"); 342 prt_bitflags(out, bch2_recovery_passes, 343 bch2_recovery_passes_from_stable(le64_to_cpu(i->recovery_passes[0]))); 344 prt_newline(out); 345 346 prt_str(out, "errors:\t"); 347 bool first = true; 348 for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { 349 if (!first) 350 prt_char(out, ','); 351 first = false; 352 bch2_sb_error_id_to_text(out, le16_to_cpu(i->errors[j])); 353 } 354 prt_newline(out); 355 } 356 } 357 358 const struct bch_sb_field_ops bch_sb_field_ops_downgrade = { 359 .validate = bch2_sb_downgrade_validate, 360 .to_text = bch2_sb_downgrade_to_text, 361 }; 362 363 int bch2_sb_downgrade_update(struct bch_fs *c) 364 { 365 if (!test_bit(BCH_FS_btree_running, &c->flags)) 366 return 0; 367 368 darray_char table = {}; 369 int ret = 0; 370 371 for (const struct upgrade_downgrade_entry *src = downgrade_table; 372 src < downgrade_table + ARRAY_SIZE(downgrade_table); 373 src++) { 374 if (BCH_VERSION_MAJOR(src->version) != BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) 375 continue; 376 377 struct bch_sb_field_downgrade_entry *dst; 378 unsigned bytes = sizeof(*dst) + sizeof(dst->errors[0]) * src->nr_errors; 379 380 ret = darray_make_room(&table, bytes); 381 if (ret) 382 goto out; 383 384 dst = (void *) &darray_top(table); 385 dst->version = cpu_to_le16(src->version); 386 dst->recovery_passes[0] = cpu_to_le64(bch2_recovery_passes_to_stable(src->recovery_passes)); 387 dst->recovery_passes[1] = 0; 388 dst->nr_errors = cpu_to_le16(src->nr_errors); 389 for (unsigned i = 0; i < src->nr_errors; i++) 390 dst->errors[i] = cpu_to_le16(src->errors[i]); 391 392 ret = downgrade_table_extra(c, &table); 393 if (ret) 394 goto out; 395 396 if (!dst->recovery_passes[0] && 397 !dst->recovery_passes[1] && 398 !dst->nr_errors) 399 continue; 400 401 table.nr += sizeof(*dst) + sizeof(dst->errors[0]) * le16_to_cpu(dst->nr_errors); 402 } 403 404 struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); 405 406 unsigned sb_u64s = DIV_ROUND_UP(sizeof(*d) + table.nr, sizeof(u64)); 407 408 if (d && le32_to_cpu(d->field.u64s) > sb_u64s) 409 goto out; 410 411 d = bch2_sb_field_resize(&c->disk_sb, downgrade, sb_u64s); 412 if (!d) { 413 ret = -BCH_ERR_ENOSPC_sb_downgrade; 414 goto out; 415 } 416 417 memcpy(d->entries, table.data, table.nr); 418 memset_u64s_tail(d->entries, 0, table.nr); 419 out: 420 darray_exit(&table); 421 return ret; 422 } 423 424 void bch2_sb_set_downgrade(struct bch_fs *c, unsigned new_minor, unsigned old_minor) 425 { 426 struct bch_sb_field_downgrade *d = bch2_sb_field_get(c->disk_sb.sb, downgrade); 427 if (!d) 428 return; 429 430 struct bch_sb_field_ext *ext = bch2_sb_field_get(c->disk_sb.sb, ext); 431 432 for_each_downgrade_entry(d, i) { 433 unsigned minor = BCH_VERSION_MINOR(le16_to_cpu(i->version)); 434 if (new_minor < minor && minor <= old_minor) { 435 ext->recovery_passes_required[0] |= i->recovery_passes[0]; 436 ext->recovery_passes_required[1] |= i->recovery_passes[1]; 437 438 for (unsigned j = 0; j < le16_to_cpu(i->nr_errors); j++) { 439 unsigned e = le16_to_cpu(i->errors[j]); 440 if (e < BCH_FSCK_ERR_MAX) 441 __set_bit(e, c->sb.errors_silent); 442 if (e < sizeof(ext->errors_silent) * 8) 443 __set_bit_le64(e, ext->errors_silent); 444 } 445 } 446 } 447 } 448