1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "checksum.h" 5 #include "disk_groups.h" 6 #include "ec.h" 7 #include "error.h" 8 #include "journal.h" 9 #include "journal_sb.h" 10 #include "journal_seq_blacklist.h" 11 #include "recovery_passes.h" 12 #include "replicas.h" 13 #include "quota.h" 14 #include "sb-clean.h" 15 #include "sb-counters.h" 16 #include "sb-downgrade.h" 17 #include "sb-errors.h" 18 #include "sb-members.h" 19 #include "super-io.h" 20 #include "super.h" 21 #include "trace.h" 22 #include "vstructs.h" 23 24 #include <linux/backing-dev.h> 25 #include <linux/sort.h> 26 #include <linux/string_choices.h> 27 28 struct bch2_metadata_version { 29 u16 version; 30 const char *name; 31 }; 32 33 static const struct bch2_metadata_version bch2_metadata_versions[] = { 34 #define x(n, v) { \ 35 .version = v, \ 36 .name = #n, \ 37 }, 38 BCH_METADATA_VERSIONS() 39 #undef x 40 }; 41 42 void bch2_version_to_text(struct printbuf *out, enum bcachefs_metadata_version v) 43 { 44 const char *str = "(unknown version)"; 45 46 for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++) 47 if (bch2_metadata_versions[i].version == v) { 48 str = bch2_metadata_versions[i].name; 49 break; 50 } 51 52 prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str); 53 } 54 55 enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version v) 56 { 57 if (!BCH_VERSION_MAJOR(v)) 58 return v; 59 60 for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++) 61 if (bch2_metadata_versions[i].version > v && 62 BCH_VERSION_MAJOR(bch2_metadata_versions[i].version) == 63 BCH_VERSION_MAJOR(v)) 64 v = bch2_metadata_versions[i].version; 65 66 return v; 67 } 68 69 int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) 70 { 71 int ret = ((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && 72 version <= c->sb.version_incompat_allowed) 73 ? 0 74 : -BCH_ERR_may_not_use_incompat_feature; 75 76 if (!ret) { 77 mutex_lock(&c->sb_lock); 78 SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, 79 max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); 80 bch2_write_super(c); 81 mutex_unlock(&c->sb_lock); 82 } 83 84 return ret; 85 } 86 87 const char * const bch2_sb_fields[] = { 88 #define x(name, nr) #name, 89 BCH_SB_FIELDS() 90 #undef x 91 NULL 92 }; 93 94 static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *, 95 enum bch_validate_flags, struct printbuf *); 96 97 struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *sb, 98 enum bch_sb_field_type type) 99 { 100 /* XXX: need locking around superblock to access optional fields */ 101 102 vstruct_for_each(sb, f) 103 if (le32_to_cpu(f->type) == type) 104 return f; 105 return NULL; 106 } 107 108 static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb, 109 struct bch_sb_field *f, 110 unsigned u64s) 111 { 112 unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0; 113 unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s; 114 115 BUG_ON(__vstruct_bytes(struct bch_sb, sb_u64s) > sb->buffer_size); 116 117 if (!f && !u64s) { 118 /* nothing to do: */ 119 } else if (!f) { 120 f = vstruct_last(sb->sb); 121 memset(f, 0, sizeof(u64) * u64s); 122 f->u64s = cpu_to_le32(u64s); 123 f->type = 0; 124 } else { 125 void *src, *dst; 126 127 src = vstruct_end(f); 128 129 if (u64s) { 130 f->u64s = cpu_to_le32(u64s); 131 dst = vstruct_end(f); 132 } else { 133 dst = f; 134 } 135 136 memmove(dst, src, vstruct_end(sb->sb) - src); 137 138 if (dst > src) 139 memset(src, 0, dst - src); 140 } 141 142 sb->sb->u64s = cpu_to_le32(sb_u64s); 143 144 return u64s ? f : NULL; 145 } 146 147 void bch2_sb_field_delete(struct bch_sb_handle *sb, 148 enum bch_sb_field_type type) 149 { 150 struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); 151 152 if (f) 153 __bch2_sb_field_resize(sb, f, 0); 154 } 155 156 /* Superblock realloc/free: */ 157 158 void bch2_free_super(struct bch_sb_handle *sb) 159 { 160 kfree(sb->bio); 161 if (!IS_ERR_OR_NULL(sb->s_bdev_file)) 162 bdev_fput(sb->s_bdev_file); 163 kfree(sb->holder); 164 kfree(sb->sb_name); 165 166 kfree(sb->sb); 167 memset(sb, 0, sizeof(*sb)); 168 } 169 170 int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) 171 { 172 size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s); 173 size_t new_buffer_size; 174 struct bch_sb *new_sb; 175 struct bio *bio; 176 177 if (sb->bdev) 178 new_bytes = max_t(size_t, new_bytes, bdev_logical_block_size(sb->bdev)); 179 180 new_buffer_size = roundup_pow_of_two(new_bytes); 181 182 if (sb->sb && sb->buffer_size >= new_buffer_size) 183 return 0; 184 185 if (sb->sb && sb->have_layout) { 186 u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; 187 188 if (new_bytes > max_bytes) { 189 struct printbuf buf = PRINTBUF; 190 191 prt_bdevname(&buf, sb->bdev); 192 prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes); 193 pr_err("%s", buf.buf); 194 printbuf_exit(&buf); 195 return -BCH_ERR_ENOSPC_sb; 196 } 197 } 198 199 if (sb->buffer_size >= new_buffer_size && sb->sb) 200 return 0; 201 202 if (dynamic_fault("bcachefs:add:super_realloc")) 203 return -BCH_ERR_ENOMEM_sb_realloc_injected; 204 205 new_sb = krealloc(sb->sb, new_buffer_size, GFP_NOFS|__GFP_ZERO); 206 if (!new_sb) 207 return -BCH_ERR_ENOMEM_sb_buf_realloc; 208 209 sb->sb = new_sb; 210 211 if (sb->have_bio) { 212 unsigned nr_bvecs = buf_pages(sb->sb, new_buffer_size); 213 214 bio = bio_kmalloc(nr_bvecs, GFP_KERNEL); 215 if (!bio) 216 return -BCH_ERR_ENOMEM_sb_bio_realloc; 217 218 bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0); 219 220 kfree(sb->bio); 221 sb->bio = bio; 222 } 223 224 sb->buffer_size = new_buffer_size; 225 226 return 0; 227 } 228 229 struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, 230 enum bch_sb_field_type type, 231 unsigned u64s) 232 { 233 struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); 234 ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0; 235 ssize_t d = -old_u64s + u64s; 236 237 if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) 238 return NULL; 239 240 if (sb->fs_sb) { 241 struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb); 242 243 lockdep_assert_held(&c->sb_lock); 244 245 /* XXX: we're not checking that offline device have enough space */ 246 247 for_each_online_member(c, ca) { 248 struct bch_sb_handle *dev_sb = &ca->disk_sb; 249 250 if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { 251 percpu_ref_put(&ca->io_ref[READ]); 252 return NULL; 253 } 254 } 255 } 256 257 f = bch2_sb_field_get_id(sb->sb, type); 258 f = __bch2_sb_field_resize(sb, f, u64s); 259 if (f) 260 f->type = cpu_to_le32(type); 261 return f; 262 } 263 264 struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *sb, 265 enum bch_sb_field_type type, 266 unsigned u64s) 267 { 268 struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); 269 270 if (!f || le32_to_cpu(f->u64s) < u64s) 271 f = bch2_sb_field_resize_id(sb, type, u64s); 272 return f; 273 } 274 275 /* Superblock validate: */ 276 277 static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out) 278 { 279 u64 offset, prev_offset, max_sectors; 280 unsigned i; 281 282 BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512); 283 284 if (!uuid_equal(&layout->magic, &BCACHE_MAGIC) && 285 !uuid_equal(&layout->magic, &BCHFS_MAGIC)) { 286 prt_printf(out, "Not a bcachefs superblock layout"); 287 return -BCH_ERR_invalid_sb_layout; 288 } 289 290 if (layout->layout_type != 0) { 291 prt_printf(out, "Invalid superblock layout type %u", 292 layout->layout_type); 293 return -BCH_ERR_invalid_sb_layout_type; 294 } 295 296 if (!layout->nr_superblocks) { 297 prt_printf(out, "Invalid superblock layout: no superblocks"); 298 return -BCH_ERR_invalid_sb_layout_nr_superblocks; 299 } 300 301 if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) { 302 prt_printf(out, "Invalid superblock layout: too many superblocks"); 303 return -BCH_ERR_invalid_sb_layout_nr_superblocks; 304 } 305 306 if (layout->sb_max_size_bits > BCH_SB_LAYOUT_SIZE_BITS_MAX) { 307 prt_printf(out, "Invalid superblock layout: max_size_bits too high"); 308 return -BCH_ERR_invalid_sb_layout_sb_max_size_bits; 309 } 310 311 max_sectors = 1 << layout->sb_max_size_bits; 312 313 prev_offset = le64_to_cpu(layout->sb_offset[0]); 314 315 for (i = 1; i < layout->nr_superblocks; i++) { 316 offset = le64_to_cpu(layout->sb_offset[i]); 317 318 if (offset < prev_offset + max_sectors) { 319 prt_printf(out, "Invalid superblock layout: superblocks overlap\n" 320 " (sb %u ends at %llu next starts at %llu", 321 i - 1, prev_offset + max_sectors, offset); 322 return -BCH_ERR_invalid_sb_layout_superblocks_overlap; 323 } 324 prev_offset = offset; 325 } 326 327 return 0; 328 } 329 330 static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) 331 { 332 u16 version = le16_to_cpu(sb->version); 333 u16 version_min = le16_to_cpu(sb->version_min); 334 335 if (!bch2_version_compatible(version)) { 336 prt_str(out, "Unsupported superblock version "); 337 bch2_version_to_text(out, version); 338 prt_str(out, " (min "); 339 bch2_version_to_text(out, bcachefs_metadata_version_min); 340 prt_str(out, ", max "); 341 bch2_version_to_text(out, bcachefs_metadata_version_current); 342 prt_str(out, ")"); 343 return -BCH_ERR_invalid_sb_version; 344 } 345 346 if (!bch2_version_compatible(version_min)) { 347 prt_str(out, "Unsupported superblock version_min "); 348 bch2_version_to_text(out, version_min); 349 prt_str(out, " (min "); 350 bch2_version_to_text(out, bcachefs_metadata_version_min); 351 prt_str(out, ", max "); 352 bch2_version_to_text(out, bcachefs_metadata_version_current); 353 prt_str(out, ")"); 354 return -BCH_ERR_invalid_sb_version; 355 } 356 357 if (version_min > version) { 358 prt_str(out, "Bad minimum version "); 359 bch2_version_to_text(out, version_min); 360 prt_str(out, ", greater than version field "); 361 bch2_version_to_text(out, version); 362 return -BCH_ERR_invalid_sb_version; 363 } 364 365 return 0; 366 } 367 368 int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, 369 enum bch_validate_flags flags, struct printbuf *out) 370 { 371 struct bch_sb_field_members_v1 *mi; 372 enum bch_opt_id opt_id; 373 int ret; 374 375 ret = bch2_sb_compatible(sb, out); 376 if (ret) 377 return ret; 378 379 u64 incompat = le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR); 380 unsigned incompat_bit = 0; 381 if (incompat) 382 incompat_bit = __ffs64(incompat); 383 else if (sb->features[1]) 384 incompat_bit = 64 + __ffs64(le64_to_cpu(sb->features[1])); 385 386 if (incompat_bit) { 387 prt_printf(out, "Filesystem has incompatible feature bit %u, highest supported %s (%u)", 388 incompat_bit, 389 bch2_sb_features[BCH_FEATURE_NR - 1], 390 BCH_FEATURE_NR - 1); 391 return -BCH_ERR_invalid_sb_features; 392 } 393 394 if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || 395 BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { 396 prt_str(out, "Filesystem has incompatible version "); 397 bch2_version_to_text(out, le16_to_cpu(sb->version)); 398 prt_str(out, ", current version "); 399 bch2_version_to_text(out, bcachefs_metadata_version_current); 400 return -BCH_ERR_invalid_sb_features; 401 } 402 403 if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) { 404 prt_printf(out, "Bad user UUID (got zeroes)"); 405 return -BCH_ERR_invalid_sb_uuid; 406 } 407 408 if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid))) { 409 prt_printf(out, "Bad internal UUID (got zeroes)"); 410 return -BCH_ERR_invalid_sb_uuid; 411 } 412 413 if (!(flags & BCH_VALIDATE_write) && 414 le64_to_cpu(sb->offset) != read_offset) { 415 prt_printf(out, "Bad sb offset (got %llu, read from %llu)", 416 le64_to_cpu(sb->offset), read_offset); 417 return -BCH_ERR_invalid_sb_offset; 418 } 419 420 if (!sb->nr_devices || 421 sb->nr_devices > BCH_SB_MEMBERS_MAX) { 422 prt_printf(out, "Bad number of member devices %u (max %u)", 423 sb->nr_devices, BCH_SB_MEMBERS_MAX); 424 return -BCH_ERR_invalid_sb_too_many_members; 425 } 426 427 if (sb->dev_idx >= sb->nr_devices) { 428 prt_printf(out, "Bad dev_idx (got %u, nr_devices %u)", 429 sb->dev_idx, sb->nr_devices); 430 return -BCH_ERR_invalid_sb_dev_idx; 431 } 432 433 if (!sb->time_precision || 434 le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) { 435 prt_printf(out, "Invalid time precision: %u (min 1, max %lu)", 436 le32_to_cpu(sb->time_precision), NSEC_PER_SEC); 437 return -BCH_ERR_invalid_sb_time_precision; 438 } 439 440 /* old versions didn't know to downgrade this field */ 441 if (BCH_SB_VERSION_INCOMPAT_ALLOWED(sb) > le16_to_cpu(sb->version)) 442 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, le16_to_cpu(sb->version)); 443 444 if (BCH_SB_VERSION_INCOMPAT(sb) > BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)) { 445 prt_printf(out, "Invalid version_incompat "); 446 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); 447 prt_str(out, " > incompat_allowed "); 448 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); 449 if (flags & BCH_VALIDATE_write) 450 return -BCH_ERR_invalid_sb_version; 451 else 452 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, BCH_SB_VERSION_INCOMPAT(sb)); 453 } 454 455 if (!flags) { 456 /* 457 * Been seeing a bug where these are getting inexplicably 458 * zeroed, so we're now validating them, but we have to be 459 * careful not to preven people's filesystems from mounting: 460 */ 461 if (!BCH_SB_JOURNAL_FLUSH_DELAY(sb)) 462 SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000); 463 if (!BCH_SB_JOURNAL_RECLAIM_DELAY(sb)) 464 SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 1000); 465 466 if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) 467 SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); 468 469 if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 && 470 !BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb)) 471 SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30); 472 473 if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2) 474 SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true); 475 476 if (!BCH_SB_WRITE_ERROR_TIMEOUT(sb)) 477 SET_BCH_SB_WRITE_ERROR_TIMEOUT(sb, 30); 478 479 if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_extent_flags && 480 !BCH_SB_CSUM_ERR_RETRY_NR(sb)) 481 SET_BCH_SB_CSUM_ERR_RETRY_NR(sb, 3); 482 } 483 484 #ifdef __KERNEL__ 485 if (!BCH_SB_SHARD_INUMS_NBITS(sb)) 486 SET_BCH_SB_SHARD_INUMS_NBITS(sb, ilog2(roundup_pow_of_two(num_online_cpus()))); 487 #endif 488 489 for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { 490 const struct bch_option *opt = bch2_opt_table + opt_id; 491 492 if (opt->get_sb) { 493 u64 v = bch2_opt_from_sb(sb, opt_id, -1); 494 495 prt_printf(out, "Invalid option "); 496 ret = bch2_opt_validate(opt, v, out); 497 if (ret) 498 return ret; 499 500 printbuf_reset(out); 501 } 502 } 503 504 /* validate layout */ 505 ret = validate_sb_layout(&sb->layout, out); 506 if (ret) 507 return ret; 508 509 vstruct_for_each(sb, f) { 510 if (!f->u64s) { 511 prt_printf(out, "Invalid superblock: optional field with size 0 (type %u)", 512 le32_to_cpu(f->type)); 513 return -BCH_ERR_invalid_sb_field_size; 514 } 515 516 if (vstruct_next(f) > vstruct_last(sb)) { 517 prt_printf(out, "Invalid superblock: optional field extends past end of superblock (type %u)", 518 le32_to_cpu(f->type)); 519 return -BCH_ERR_invalid_sb_field_size; 520 } 521 } 522 523 /* members must be validated first: */ 524 mi = bch2_sb_field_get(sb, members_v1); 525 if (!mi) { 526 prt_printf(out, "Invalid superblock: member info area missing"); 527 return -BCH_ERR_invalid_sb_members_missing; 528 } 529 530 ret = bch2_sb_field_validate(sb, &mi->field, flags, out); 531 if (ret) 532 return ret; 533 534 vstruct_for_each(sb, f) { 535 if (le32_to_cpu(f->type) == BCH_SB_FIELD_members_v1) 536 continue; 537 538 ret = bch2_sb_field_validate(sb, f, flags, out); 539 if (ret) 540 return ret; 541 } 542 543 if ((flags & BCH_VALIDATE_write) && 544 bch2_sb_member_get(sb, sb->dev_idx).seq != sb->seq) { 545 prt_printf(out, "Invalid superblock: member seq %llu != sb seq %llu", 546 le64_to_cpu(bch2_sb_member_get(sb, sb->dev_idx).seq), 547 le64_to_cpu(sb->seq)); 548 return -BCH_ERR_invalid_sb_members_missing; 549 } 550 551 return 0; 552 } 553 554 /* device open: */ 555 556 static unsigned long le_ulong_to_cpu(unsigned long v) 557 { 558 return sizeof(unsigned long) == 8 559 ? le64_to_cpu(v) 560 : le32_to_cpu(v); 561 } 562 563 static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned nr) 564 { 565 BUG_ON(nr & (BITS_PER_TYPE(long) - 1)); 566 567 for (unsigned i = 0; i < BITS_TO_LONGS(nr); i++) 568 dst[i] = le_ulong_to_cpu(src[i]); 569 } 570 571 static void bch2_sb_update(struct bch_fs *c) 572 { 573 struct bch_sb *src = c->disk_sb.sb; 574 575 lockdep_assert_held(&c->sb_lock); 576 577 c->sb.uuid = src->uuid; 578 c->sb.user_uuid = src->user_uuid; 579 c->sb.version = le16_to_cpu(src->version); 580 c->sb.version_incompat = BCH_SB_VERSION_INCOMPAT(src); 581 c->sb.version_incompat_allowed 582 = BCH_SB_VERSION_INCOMPAT_ALLOWED(src); 583 c->sb.version_min = le16_to_cpu(src->version_min); 584 c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src); 585 c->sb.nr_devices = src->nr_devices; 586 c->sb.clean = BCH_SB_CLEAN(src); 587 c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); 588 589 c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision); 590 c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit; 591 592 /* XXX this is wrong, we need a 96 or 128 bit integer type */ 593 c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo), 594 c->sb.nsec_per_time_unit); 595 c->sb.time_base_hi = le32_to_cpu(src->time_base_hi); 596 597 c->sb.features = le64_to_cpu(src->features[0]); 598 c->sb.compat = le64_to_cpu(src->compat[0]); 599 600 memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent)); 601 602 struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext); 603 if (ext) { 604 le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, 605 sizeof(c->sb.errors_silent) * 8); 606 c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); 607 } 608 609 for_each_member_device(c, ca) { 610 struct bch_member m = bch2_sb_member_get(src, ca->dev_idx); 611 ca->mi = bch2_mi_to_cpu(&m); 612 } 613 } 614 615 static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) 616 { 617 struct bch_sb_field *src_f, *dst_f; 618 struct bch_sb *dst = dst_handle->sb; 619 unsigned i; 620 621 dst->version = src->version; 622 dst->version_min = src->version_min; 623 dst->seq = src->seq; 624 dst->uuid = src->uuid; 625 dst->user_uuid = src->user_uuid; 626 memcpy(dst->label, src->label, sizeof(dst->label)); 627 628 dst->block_size = src->block_size; 629 dst->nr_devices = src->nr_devices; 630 631 dst->time_base_lo = src->time_base_lo; 632 dst->time_base_hi = src->time_base_hi; 633 dst->time_precision = src->time_precision; 634 dst->write_time = src->write_time; 635 636 memcpy(dst->flags, src->flags, sizeof(dst->flags)); 637 memcpy(dst->features, src->features, sizeof(dst->features)); 638 memcpy(dst->compat, src->compat, sizeof(dst->compat)); 639 640 for (i = 0; i < BCH_SB_FIELD_NR; i++) { 641 int d; 642 643 if ((1U << i) & BCH_SINGLE_DEVICE_SB_FIELDS) 644 continue; 645 646 src_f = bch2_sb_field_get_id(src, i); 647 dst_f = bch2_sb_field_get_id(dst, i); 648 649 d = (src_f ? le32_to_cpu(src_f->u64s) : 0) - 650 (dst_f ? le32_to_cpu(dst_f->u64s) : 0); 651 if (d > 0) { 652 int ret = bch2_sb_realloc(dst_handle, 653 le32_to_cpu(dst_handle->sb->u64s) + d); 654 655 if (ret) 656 return ret; 657 658 dst = dst_handle->sb; 659 dst_f = bch2_sb_field_get_id(dst, i); 660 } 661 662 dst_f = __bch2_sb_field_resize(dst_handle, dst_f, 663 src_f ? le32_to_cpu(src_f->u64s) : 0); 664 665 if (src_f) 666 memcpy(dst_f, src_f, vstruct_bytes(src_f)); 667 } 668 669 return 0; 670 } 671 672 int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src) 673 { 674 int ret; 675 676 lockdep_assert_held(&c->sb_lock); 677 678 ret = bch2_sb_realloc(&c->disk_sb, 0) ?: 679 __copy_super(&c->disk_sb, src) ?: 680 bch2_sb_replicas_to_cpu_replicas(c) ?: 681 bch2_sb_disk_groups_to_cpu(c); 682 if (ret) 683 return ret; 684 685 bch2_sb_update(c); 686 return 0; 687 } 688 689 int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca) 690 { 691 return __copy_super(&ca->disk_sb, c->disk_sb.sb); 692 } 693 694 /* read superblock: */ 695 696 static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err) 697 { 698 size_t bytes; 699 int ret; 700 reread: 701 bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META); 702 sb->bio->bi_iter.bi_sector = offset; 703 bch2_bio_map(sb->bio, sb->sb, sb->buffer_size); 704 705 ret = submit_bio_wait(sb->bio); 706 if (ret) { 707 prt_printf(err, "IO error: %i", ret); 708 return ret; 709 } 710 711 if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) && 712 !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) { 713 prt_str(err, "Not a bcachefs superblock (got magic "); 714 pr_uuid(err, sb->sb->magic.b); 715 prt_str(err, ")"); 716 return -BCH_ERR_invalid_sb_magic; 717 } 718 719 ret = bch2_sb_compatible(sb->sb, err); 720 if (ret) 721 return ret; 722 723 bytes = vstruct_bytes(sb->sb); 724 725 u64 sb_size = 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits); 726 if (bytes > sb_size) { 727 prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %llu)", 728 bytes, sb_size); 729 return -BCH_ERR_invalid_sb_too_big; 730 } 731 732 if (bytes > sb->buffer_size) { 733 ret = bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)); 734 if (ret) 735 return ret; 736 goto reread; 737 } 738 739 enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb); 740 if (csum_type >= BCH_CSUM_NR || 741 bch2_csum_type_is_encryption(csum_type)) { 742 prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); 743 return -BCH_ERR_invalid_sb_csum_type; 744 } 745 746 /* XXX: verify MACs */ 747 struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb); 748 if (bch2_crc_cmp(csum, sb->sb->csum)) { 749 bch2_csum_err_msg(err, csum_type, sb->sb->csum, csum); 750 return -BCH_ERR_invalid_sb_csum; 751 } 752 753 sb->seq = le64_to_cpu(sb->sb->seq); 754 755 return 0; 756 } 757 758 static int __bch2_read_super(const char *path, struct bch_opts *opts, 759 struct bch_sb_handle *sb, bool ignore_notbchfs_msg) 760 { 761 u64 offset = opt_get(*opts, sb); 762 struct bch_sb_layout layout; 763 struct printbuf err = PRINTBUF; 764 struct printbuf err2 = PRINTBUF; 765 __le64 *i; 766 int ret; 767 #ifndef __KERNEL__ 768 retry: 769 #endif 770 memset(sb, 0, sizeof(*sb)); 771 sb->mode = BLK_OPEN_READ; 772 sb->have_bio = true; 773 sb->holder = kzalloc(sizeof(*sb->holder), GFP_KERNEL); 774 if (!sb->holder) 775 return -ENOMEM; 776 777 sb->sb_name = kstrdup(path, GFP_KERNEL); 778 if (!sb->sb_name) { 779 ret = -ENOMEM; 780 prt_printf(&err, "error allocating memory for sb_name"); 781 goto err; 782 } 783 784 #ifndef __KERNEL__ 785 if (opt_get(*opts, direct_io) == false) 786 sb->mode |= BLK_OPEN_BUFFERED; 787 #endif 788 789 if (!opt_get(*opts, noexcl)) 790 sb->mode |= BLK_OPEN_EXCL; 791 792 if (!opt_get(*opts, nochanges)) 793 sb->mode |= BLK_OPEN_WRITE; 794 795 sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops); 796 if (IS_ERR(sb->s_bdev_file) && 797 PTR_ERR(sb->s_bdev_file) == -EACCES && 798 opt_get(*opts, read_only)) { 799 sb->mode &= ~BLK_OPEN_WRITE; 800 801 sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops); 802 if (!IS_ERR(sb->s_bdev_file)) 803 opt_set(*opts, nochanges, true); 804 } 805 806 if (IS_ERR(sb->s_bdev_file)) { 807 ret = PTR_ERR(sb->s_bdev_file); 808 prt_printf(&err, "error opening %s: %s", path, bch2_err_str(ret)); 809 goto err; 810 } 811 sb->bdev = file_bdev(sb->s_bdev_file); 812 813 ret = bch2_sb_realloc(sb, 0); 814 if (ret) { 815 prt_printf(&err, "error allocating memory for superblock"); 816 goto err; 817 } 818 819 if (bch2_fs_init_fault("read_super")) { 820 prt_printf(&err, "dynamic fault"); 821 ret = -EFAULT; 822 goto err; 823 } 824 825 ret = read_one_super(sb, offset, &err); 826 if (!ret) 827 goto got_super; 828 829 if (opt_defined(*opts, sb)) 830 goto err; 831 832 prt_printf(&err2, "bcachefs (%s): error reading default superblock: %s\n", 833 path, err.buf); 834 if (ret == -BCH_ERR_invalid_sb_magic && ignore_notbchfs_msg) 835 bch2_print_opts(opts, KERN_INFO "%s", err2.buf); 836 else 837 bch2_print_opts(opts, KERN_ERR "%s", err2.buf); 838 839 printbuf_exit(&err2); 840 printbuf_reset(&err); 841 842 /* 843 * Error reading primary superblock - read location of backup 844 * superblocks: 845 */ 846 bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META); 847 sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR; 848 /* 849 * use sb buffer to read layout, since sb buffer is page aligned but 850 * layout won't be: 851 */ 852 bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout)); 853 854 ret = submit_bio_wait(sb->bio); 855 if (ret) { 856 prt_printf(&err, "IO error: %i", ret); 857 goto err; 858 } 859 860 memcpy(&layout, sb->sb, sizeof(layout)); 861 ret = validate_sb_layout(&layout, &err); 862 if (ret) 863 goto err; 864 865 for (i = layout.sb_offset; 866 i < layout.sb_offset + layout.nr_superblocks; i++) { 867 offset = le64_to_cpu(*i); 868 869 if (offset == opt_get(*opts, sb)) { 870 ret = -BCH_ERR_invalid; 871 continue; 872 } 873 874 ret = read_one_super(sb, offset, &err); 875 if (!ret) 876 goto got_super; 877 } 878 879 goto err; 880 881 got_super: 882 if (le16_to_cpu(sb->sb->block_size) << 9 < 883 bdev_logical_block_size(sb->bdev) && 884 opt_get(*opts, direct_io)) { 885 #ifndef __KERNEL__ 886 opt_set(*opts, direct_io, false); 887 bch2_free_super(sb); 888 goto retry; 889 #endif 890 prt_printf(&err, "block size (%u) smaller than device block size (%u)", 891 le16_to_cpu(sb->sb->block_size) << 9, 892 bdev_logical_block_size(sb->bdev)); 893 ret = -BCH_ERR_block_size_too_small; 894 goto err; 895 } 896 897 sb->have_layout = true; 898 899 ret = bch2_sb_validate(sb->sb, offset, 0, &err); 900 if (ret) { 901 bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error validating superblock: %s\n", 902 path, err.buf); 903 goto err_no_print; 904 } 905 out: 906 printbuf_exit(&err); 907 return ret; 908 err: 909 bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error reading superblock: %s\n", 910 path, err.buf); 911 err_no_print: 912 bch2_free_super(sb); 913 goto out; 914 } 915 916 int bch2_read_super(const char *path, struct bch_opts *opts, 917 struct bch_sb_handle *sb) 918 { 919 return __bch2_read_super(path, opts, sb, false); 920 } 921 922 /* provide a silenced version for mount.bcachefs */ 923 924 int bch2_read_super_silent(const char *path, struct bch_opts *opts, 925 struct bch_sb_handle *sb) 926 { 927 return __bch2_read_super(path, opts, sb, true); 928 } 929 930 /* write superblock: */ 931 932 static void write_super_endio(struct bio *bio) 933 { 934 struct bch_dev *ca = bio->bi_private; 935 936 bch2_account_io_success_fail(ca, bio_data_dir(bio), !bio->bi_status); 937 938 /* XXX: return errors directly */ 939 940 if (bio->bi_status) { 941 bch_err_dev_ratelimited(ca, "superblock %s error: %s", 942 str_write_read(bio_data_dir(bio)), 943 bch2_blk_status_to_str(bio->bi_status)); 944 ca->sb_write_error = 1; 945 } 946 947 closure_put(&ca->fs->sb_write); 948 percpu_ref_put(&ca->io_ref[READ]); 949 } 950 951 static void read_back_super(struct bch_fs *c, struct bch_dev *ca) 952 { 953 struct bch_sb *sb = ca->disk_sb.sb; 954 struct bio *bio = ca->disk_sb.bio; 955 956 memset(ca->sb_read_scratch, 0, BCH_SB_READ_SCRATCH_BUF_SIZE); 957 958 bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META); 959 bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); 960 bio->bi_end_io = write_super_endio; 961 bio->bi_private = ca; 962 bch2_bio_map(bio, ca->sb_read_scratch, BCH_SB_READ_SCRATCH_BUF_SIZE); 963 964 this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio)); 965 966 percpu_ref_get(&ca->io_ref[READ]); 967 closure_bio_submit(bio, &c->sb_write); 968 } 969 970 static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) 971 { 972 struct bch_sb *sb = ca->disk_sb.sb; 973 struct bio *bio = ca->disk_sb.bio; 974 975 sb->offset = sb->layout.sb_offset[idx]; 976 977 SET_BCH_SB_CSUM_TYPE(sb, bch2_csum_opt_to_type(c->opts.metadata_checksum, false)); 978 sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb), 979 null_nonce(), sb); 980 981 bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); 982 bio->bi_iter.bi_sector = le64_to_cpu(sb->offset); 983 bio->bi_end_io = write_super_endio; 984 bio->bi_private = ca; 985 bch2_bio_map(bio, sb, 986 roundup((size_t) vstruct_bytes(sb), 987 bdev_logical_block_size(ca->disk_sb.bdev))); 988 989 this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], 990 bio_sectors(bio)); 991 992 percpu_ref_get(&ca->io_ref[READ]); 993 closure_bio_submit(bio, &c->sb_write); 994 } 995 996 int bch2_write_super(struct bch_fs *c) 997 { 998 struct closure *cl = &c->sb_write; 999 struct printbuf err = PRINTBUF; 1000 unsigned sb = 0, nr_wrote; 1001 struct bch_devs_mask sb_written; 1002 bool wrote, can_mount_without_written, can_mount_with_written; 1003 unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; 1004 DARRAY(struct bch_dev *) online_devices = {}; 1005 int ret = 0; 1006 1007 trace_and_count(c, write_super, c, _RET_IP_); 1008 1009 if (c->opts.very_degraded) 1010 degraded_flags |= BCH_FORCE_IF_LOST; 1011 1012 lockdep_assert_held(&c->sb_lock); 1013 1014 closure_init_stack(cl); 1015 memset(&sb_written, 0, sizeof(sb_written)); 1016 1017 /* 1018 * Note: we do writes to RO devices here, and we might want to change 1019 * that in the future. 1020 * 1021 * For now, we expect to be able to call write_super() when we're not 1022 * yet RW: 1023 */ 1024 for_each_online_member(c, ca) { 1025 ret = darray_push(&online_devices, ca); 1026 if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) { 1027 percpu_ref_put(&ca->io_ref[READ]); 1028 goto out; 1029 } 1030 percpu_ref_get(&ca->io_ref[READ]); 1031 } 1032 1033 /* Make sure we're using the new magic numbers: */ 1034 c->disk_sb.sb->magic = BCHFS_MAGIC; 1035 c->disk_sb.sb->layout.magic = BCHFS_MAGIC; 1036 1037 le64_add_cpu(&c->disk_sb.sb->seq, 1); 1038 1039 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 1040 darray_for_each(online_devices, ca) 1041 __bch2_members_v2_get_mut(mi, (*ca)->dev_idx)->seq = c->disk_sb.sb->seq; 1042 c->disk_sb.sb->write_time = cpu_to_le64(ktime_get_real_seconds()); 1043 1044 if (test_bit(BCH_FS_error, &c->flags)) 1045 SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1); 1046 if (test_bit(BCH_FS_topology_error, &c->flags)) 1047 SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1); 1048 1049 SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); 1050 1051 bch2_sb_counters_from_cpu(c); 1052 bch2_sb_members_from_cpu(c); 1053 bch2_sb_members_cpy_v2_v1(&c->disk_sb); 1054 bch2_sb_errors_from_cpu(c); 1055 bch2_sb_downgrade_update(c); 1056 1057 darray_for_each(online_devices, ca) 1058 bch2_sb_from_fs(c, (*ca)); 1059 1060 darray_for_each(online_devices, ca) { 1061 printbuf_reset(&err); 1062 1063 ret = bch2_sb_validate((*ca)->disk_sb.sb, 0, BCH_VALIDATE_write, &err); 1064 if (ret) { 1065 bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); 1066 goto out; 1067 } 1068 } 1069 1070 if (c->opts.nochanges) 1071 goto out; 1072 1073 /* 1074 * Defer writing the superblock until filesystem initialization is 1075 * complete - don't write out a partly initialized superblock: 1076 */ 1077 if (!BCH_SB_INITIALIZED(c->disk_sb.sb)) 1078 goto out; 1079 1080 if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) { 1081 struct printbuf buf = PRINTBUF; 1082 prt_printf(&buf, "attempting to write superblock that wasn't version downgraded ("); 1083 bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version)); 1084 prt_str(&buf, " > "); 1085 bch2_version_to_text(&buf, bcachefs_metadata_version_current); 1086 prt_str(&buf, ")"); 1087 bch2_fs_fatal_error(c, ": %s", buf.buf); 1088 printbuf_exit(&buf); 1089 return -BCH_ERR_sb_not_downgraded; 1090 } 1091 1092 darray_for_each(online_devices, ca) { 1093 __set_bit((*ca)->dev_idx, sb_written.d); 1094 (*ca)->sb_write_error = 0; 1095 } 1096 1097 darray_for_each(online_devices, ca) 1098 read_back_super(c, *ca); 1099 closure_sync(cl); 1100 1101 darray_for_each(online_devices, cap) { 1102 struct bch_dev *ca = *cap; 1103 1104 if (ca->sb_write_error) 1105 continue; 1106 1107 if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) { 1108 struct printbuf buf = PRINTBUF; 1109 prt_char(&buf, ' '); 1110 prt_bdevname(&buf, ca->disk_sb.bdev); 1111 prt_printf(&buf, 1112 ": Superblock write was silently dropped! (seq %llu expected %llu)", 1113 le64_to_cpu(ca->sb_read_scratch->seq), 1114 ca->disk_sb.seq); 1115 1116 if (c->opts.errors != BCH_ON_ERROR_continue && 1117 c->opts.errors != BCH_ON_ERROR_fix_safe) { 1118 ret = -BCH_ERR_erofs_sb_err; 1119 bch2_fs_fatal_error(c, "%s", buf.buf); 1120 } else { 1121 bch_err(c, "%s", buf.buf); 1122 } 1123 1124 printbuf_exit(&buf); 1125 } 1126 1127 if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { 1128 struct printbuf buf = PRINTBUF; 1129 prt_char(&buf, ' '); 1130 prt_bdevname(&buf, ca->disk_sb.bdev); 1131 prt_printf(&buf, 1132 ": Superblock modified by another process (seq %llu expected %llu)", 1133 le64_to_cpu(ca->sb_read_scratch->seq), 1134 ca->disk_sb.seq); 1135 bch2_fs_fatal_error(c, "%s", buf.buf); 1136 printbuf_exit(&buf); 1137 ret = -BCH_ERR_erofs_sb_err; 1138 } 1139 } 1140 1141 if (ret) 1142 goto out; 1143 1144 do { 1145 wrote = false; 1146 darray_for_each(online_devices, cap) { 1147 struct bch_dev *ca = *cap; 1148 if (!ca->sb_write_error && 1149 sb < ca->disk_sb.sb->layout.nr_superblocks) { 1150 write_one_super(c, ca, sb); 1151 wrote = true; 1152 } 1153 } 1154 closure_sync(cl); 1155 sb++; 1156 } while (wrote); 1157 1158 darray_for_each(online_devices, cap) { 1159 struct bch_dev *ca = *cap; 1160 if (ca->sb_write_error) 1161 __clear_bit(ca->dev_idx, sb_written.d); 1162 else 1163 ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq); 1164 } 1165 1166 nr_wrote = dev_mask_nr(&sb_written); 1167 1168 can_mount_with_written = 1169 bch2_have_enough_devs(c, sb_written, degraded_flags, false); 1170 1171 for (unsigned i = 0; i < ARRAY_SIZE(sb_written.d); i++) 1172 sb_written.d[i] = ~sb_written.d[i]; 1173 1174 can_mount_without_written = 1175 bch2_have_enough_devs(c, sb_written, degraded_flags, false); 1176 1177 /* 1178 * If we would be able to mount _without_ the devices we successfully 1179 * wrote superblocks to, we weren't able to write to enough devices: 1180 * 1181 * Exception: if we can mount without the successes because we haven't 1182 * written anything (new filesystem), we continue if we'd be able to 1183 * mount with the devices we did successfully write to: 1184 */ 1185 if (bch2_fs_fatal_err_on(!nr_wrote || 1186 !can_mount_with_written || 1187 (can_mount_without_written && 1188 !can_mount_with_written), c, 1189 ": Unable to write superblock to sufficient devices (from %ps)", 1190 (void *) _RET_IP_)) 1191 ret = -BCH_ERR_erofs_sb_err; 1192 out: 1193 /* Make new options visible after they're persistent: */ 1194 bch2_sb_update(c); 1195 darray_for_each(online_devices, ca) 1196 percpu_ref_put(&(*ca)->io_ref[READ]); 1197 darray_exit(&online_devices); 1198 printbuf_exit(&err); 1199 return ret; 1200 } 1201 1202 void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) 1203 { 1204 mutex_lock(&c->sb_lock); 1205 if (!(c->sb.features & (1ULL << feat))) { 1206 c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat); 1207 1208 bch2_write_super(c); 1209 } 1210 mutex_unlock(&c->sb_lock); 1211 } 1212 1213 /* Downgrade if superblock is at a higher version than currently supported: */ 1214 bool bch2_check_version_downgrade(struct bch_fs *c) 1215 { 1216 bool ret = bcachefs_metadata_version_current < c->sb.version; 1217 1218 lockdep_assert_held(&c->sb_lock); 1219 1220 /* 1221 * Downgrade, if superblock is at a higher version than currently 1222 * supported: 1223 * 1224 * c->sb will be checked before we write the superblock, so update it as 1225 * well: 1226 */ 1227 if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) 1228 SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); 1229 if (BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb) > bcachefs_metadata_version_current) 1230 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, bcachefs_metadata_version_current); 1231 if (c->sb.version > bcachefs_metadata_version_current) 1232 c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); 1233 if (c->sb.version_min > bcachefs_metadata_version_current) 1234 c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current); 1235 c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); 1236 return ret; 1237 } 1238 1239 void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) 1240 { 1241 lockdep_assert_held(&c->sb_lock); 1242 1243 if (BCH_VERSION_MAJOR(new_version) > 1244 BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) 1245 bch2_sb_field_resize(&c->disk_sb, downgrade, 0); 1246 1247 c->disk_sb.sb->version = cpu_to_le16(new_version); 1248 1249 if (incompat) { 1250 c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); 1251 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, 1252 max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version)); 1253 } 1254 } 1255 1256 static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, 1257 enum bch_validate_flags flags, struct printbuf *err) 1258 { 1259 if (vstruct_bytes(f) < 88) { 1260 prt_printf(err, "field too small (%zu < %u)", vstruct_bytes(f), 88); 1261 return -BCH_ERR_invalid_sb_ext; 1262 } 1263 1264 return 0; 1265 } 1266 1267 static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb, 1268 struct bch_sb_field *f) 1269 { 1270 struct bch_sb_field_ext *e = field_to_type(f, ext); 1271 1272 prt_printf(out, "Recovery passes required:\t"); 1273 prt_bitflags(out, bch2_recovery_passes, 1274 bch2_recovery_passes_from_stable(le64_to_cpu(e->recovery_passes_required[0]))); 1275 prt_newline(out); 1276 1277 unsigned long *errors_silent = kmalloc(sizeof(e->errors_silent), GFP_KERNEL); 1278 if (errors_silent) { 1279 le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8); 1280 1281 prt_printf(out, "Errors to silently fix:\t"); 1282 prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, 1283 min(BCH_FSCK_ERR_MAX, sizeof(e->errors_silent) * 8)); 1284 prt_newline(out); 1285 1286 kfree(errors_silent); 1287 } 1288 1289 prt_printf(out, "Btrees with missing data:\t"); 1290 prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data)); 1291 prt_newline(out); 1292 } 1293 1294 static const struct bch_sb_field_ops bch_sb_field_ops_ext = { 1295 .validate = bch2_sb_ext_validate, 1296 .to_text = bch2_sb_ext_to_text, 1297 }; 1298 1299 static const struct bch_sb_field_ops *bch2_sb_field_ops[] = { 1300 #define x(f, nr) \ 1301 [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f, 1302 BCH_SB_FIELDS() 1303 #undef x 1304 }; 1305 1306 static const struct bch_sb_field_ops bch2_sb_field_null_ops; 1307 1308 static const struct bch_sb_field_ops *bch2_sb_field_type_ops(unsigned type) 1309 { 1310 return likely(type < ARRAY_SIZE(bch2_sb_field_ops)) 1311 ? bch2_sb_field_ops[type] 1312 : &bch2_sb_field_null_ops; 1313 } 1314 1315 static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, 1316 enum bch_validate_flags flags, struct printbuf *err) 1317 { 1318 unsigned type = le32_to_cpu(f->type); 1319 struct printbuf field_err = PRINTBUF; 1320 const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); 1321 int ret; 1322 1323 ret = ops->validate ? ops->validate(sb, f, flags, &field_err) : 0; 1324 if (ret) { 1325 prt_printf(err, "Invalid superblock section %s: %s", 1326 bch2_sb_fields[type], field_err.buf); 1327 prt_newline(err); 1328 bch2_sb_field_to_text(err, sb, f); 1329 } 1330 1331 printbuf_exit(&field_err); 1332 return ret; 1333 } 1334 1335 void __bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, 1336 struct bch_sb_field *f) 1337 { 1338 unsigned type = le32_to_cpu(f->type); 1339 const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); 1340 1341 if (!out->nr_tabstops) 1342 printbuf_tabstop_push(out, 32); 1343 1344 if (ops->to_text) 1345 ops->to_text(out, sb, f); 1346 } 1347 1348 void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, 1349 struct bch_sb_field *f) 1350 { 1351 unsigned type = le32_to_cpu(f->type); 1352 1353 if (type < BCH_SB_FIELD_NR) 1354 prt_printf(out, "%s", bch2_sb_fields[type]); 1355 else 1356 prt_printf(out, "(unknown field %u)", type); 1357 1358 prt_printf(out, " (size %zu):", vstruct_bytes(f)); 1359 prt_newline(out); 1360 1361 __bch2_sb_field_to_text(out, sb, f); 1362 } 1363 1364 void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l) 1365 { 1366 unsigned i; 1367 1368 prt_printf(out, "Type: %u", l->layout_type); 1369 prt_newline(out); 1370 1371 prt_str(out, "Superblock max size: "); 1372 prt_units_u64(out, 512 << l->sb_max_size_bits); 1373 prt_newline(out); 1374 1375 prt_printf(out, "Nr superblocks: %u", l->nr_superblocks); 1376 prt_newline(out); 1377 1378 prt_str(out, "Offsets: "); 1379 for (i = 0; i < l->nr_superblocks; i++) { 1380 if (i) 1381 prt_str(out, ", "); 1382 prt_printf(out, "%llu", le64_to_cpu(l->sb_offset[i])); 1383 } 1384 prt_newline(out); 1385 } 1386 1387 void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, 1388 bool print_layout, unsigned fields) 1389 { 1390 if (!out->nr_tabstops) 1391 printbuf_tabstop_push(out, 44); 1392 1393 prt_printf(out, "External UUID:\t"); 1394 pr_uuid(out, sb->user_uuid.b); 1395 prt_newline(out); 1396 1397 prt_printf(out, "Internal UUID:\t"); 1398 pr_uuid(out, sb->uuid.b); 1399 prt_newline(out); 1400 1401 prt_printf(out, "Magic number:\t"); 1402 pr_uuid(out, sb->magic.b); 1403 prt_newline(out); 1404 1405 prt_printf(out, "Device index:\t%u\n", sb->dev_idx); 1406 1407 prt_printf(out, "Label:\t"); 1408 if (!strlen(sb->label)) 1409 prt_printf(out, "(none)"); 1410 else 1411 prt_printf(out, "%.*s", (int) sizeof(sb->label), sb->label); 1412 prt_newline(out); 1413 1414 prt_printf(out, "Version:\t"); 1415 bch2_version_to_text(out, le16_to_cpu(sb->version)); 1416 prt_newline(out); 1417 1418 prt_printf(out, "Incompatible features allowed:\t"); 1419 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); 1420 prt_newline(out); 1421 1422 prt_printf(out, "Incompatible features in use:\t"); 1423 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); 1424 prt_newline(out); 1425 1426 prt_printf(out, "Version upgrade complete:\t"); 1427 bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); 1428 prt_newline(out); 1429 1430 prt_printf(out, "Oldest version on disk:\t"); 1431 bch2_version_to_text(out, le16_to_cpu(sb->version_min)); 1432 prt_newline(out); 1433 1434 prt_printf(out, "Created:\t"); 1435 if (sb->time_base_lo) 1436 bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); 1437 else 1438 prt_printf(out, "(not set)"); 1439 prt_newline(out); 1440 1441 prt_printf(out, "Sequence number:\t"); 1442 prt_printf(out, "%llu", le64_to_cpu(sb->seq)); 1443 prt_newline(out); 1444 1445 prt_printf(out, "Time of last write:\t"); 1446 bch2_prt_datetime(out, le64_to_cpu(sb->write_time)); 1447 prt_newline(out); 1448 1449 prt_printf(out, "Superblock size:\t"); 1450 prt_units_u64(out, vstruct_bytes(sb)); 1451 prt_str(out, "/"); 1452 prt_units_u64(out, 512ULL << sb->layout.sb_max_size_bits); 1453 prt_newline(out); 1454 1455 prt_printf(out, "Clean:\t%llu\n", BCH_SB_CLEAN(sb)); 1456 prt_printf(out, "Devices:\t%u\n", bch2_sb_nr_devices(sb)); 1457 1458 prt_printf(out, "Sections:\t"); 1459 u64 fields_have = 0; 1460 vstruct_for_each(sb, f) 1461 fields_have |= 1 << le32_to_cpu(f->type); 1462 prt_bitflags(out, bch2_sb_fields, fields_have); 1463 prt_newline(out); 1464 1465 prt_printf(out, "Features:\t"); 1466 prt_bitflags(out, bch2_sb_features, le64_to_cpu(sb->features[0])); 1467 prt_newline(out); 1468 1469 prt_printf(out, "Compat features:\t"); 1470 prt_bitflags(out, bch2_sb_compat, le64_to_cpu(sb->compat[0])); 1471 prt_newline(out); 1472 1473 prt_newline(out); 1474 prt_printf(out, "Options:"); 1475 prt_newline(out); 1476 printbuf_indent_add(out, 2); 1477 { 1478 enum bch_opt_id id; 1479 1480 for (id = 0; id < bch2_opts_nr; id++) { 1481 const struct bch_option *opt = bch2_opt_table + id; 1482 1483 if (opt->get_sb) { 1484 u64 v = bch2_opt_from_sb(sb, id, -1); 1485 1486 prt_printf(out, "%s:\t", opt->attr.name); 1487 bch2_opt_to_text(out, NULL, sb, opt, v, 1488 OPT_HUMAN_READABLE|OPT_SHOW_FULL_LIST); 1489 prt_newline(out); 1490 } 1491 } 1492 } 1493 1494 printbuf_indent_sub(out, 2); 1495 1496 if (print_layout) { 1497 prt_newline(out); 1498 prt_printf(out, "layout:"); 1499 prt_newline(out); 1500 printbuf_indent_add(out, 2); 1501 bch2_sb_layout_to_text(out, &sb->layout); 1502 printbuf_indent_sub(out, 2); 1503 } 1504 1505 vstruct_for_each(sb, f) 1506 if (fields & (1 << le32_to_cpu(f->type))) { 1507 prt_newline(out); 1508 bch2_sb_field_to_text(out, sb, f); 1509 } 1510 } 1511