1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "checksum.h" 5 #include "disk_groups.h" 6 #include "ec.h" 7 #include "error.h" 8 #include "journal.h" 9 #include "journal_sb.h" 10 #include "journal_seq_blacklist.h" 11 #include "recovery_passes.h" 12 #include "replicas.h" 13 #include "quota.h" 14 #include "sb-clean.h" 15 #include "sb-counters.h" 16 #include "sb-downgrade.h" 17 #include "sb-errors.h" 18 #include "sb-members.h" 19 #include "super-io.h" 20 #include "super.h" 21 #include "trace.h" 22 #include "vstructs.h" 23 24 #include <linux/backing-dev.h> 25 #include <linux/sort.h> 26 #include <linux/string_choices.h> 27 28 struct bch2_metadata_version { 29 u16 version; 30 const char *name; 31 }; 32 33 static const struct bch2_metadata_version bch2_metadata_versions[] = { 34 #define x(n, v) { \ 35 .version = v, \ 36 .name = #n, \ 37 }, 38 BCH_METADATA_VERSIONS() 39 #undef x 40 }; 41 42 void bch2_version_to_text(struct printbuf *out, enum bcachefs_metadata_version v) 43 { 44 const char *str = "(unknown version)"; 45 46 for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++) 47 if (bch2_metadata_versions[i].version == v) { 48 str = bch2_metadata_versions[i].name; 49 break; 50 } 51 52 prt_printf(out, "%u.%u: %s", BCH_VERSION_MAJOR(v), BCH_VERSION_MINOR(v), str); 53 } 54 55 enum bcachefs_metadata_version bch2_latest_compatible_version(enum bcachefs_metadata_version v) 56 { 57 if (!BCH_VERSION_MAJOR(v)) 58 return v; 59 60 for (unsigned i = 0; i < ARRAY_SIZE(bch2_metadata_versions); i++) 61 if (bch2_metadata_versions[i].version > v && 62 BCH_VERSION_MAJOR(bch2_metadata_versions[i].version) == 63 BCH_VERSION_MAJOR(v)) 64 v = bch2_metadata_versions[i].version; 65 66 return v; 67 } 68 69 int bch2_set_version_incompat(struct bch_fs *c, enum bcachefs_metadata_version version) 70 { 71 int ret = ((c->sb.features & BIT_ULL(BCH_FEATURE_incompat_version_field)) && 72 version <= c->sb.version_incompat_allowed) 73 ? 0 74 : -BCH_ERR_may_not_use_incompat_feature; 75 76 mutex_lock(&c->sb_lock); 77 if (!ret) { 78 SET_BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb, 79 max(BCH_SB_VERSION_INCOMPAT(c->disk_sb.sb), version)); 80 bch2_write_super(c); 81 } else { 82 darray_for_each(c->incompat_versions_requested, i) 83 if (version == *i) 84 goto out; 85 86 darray_push(&c->incompat_versions_requested, version); 87 struct printbuf buf = PRINTBUF; 88 prt_str(&buf, "requested incompat feature "); 89 bch2_version_to_text(&buf, version); 90 prt_str(&buf, " currently not enabled, allowed up to "); 91 bch2_version_to_text(&buf, version); 92 prt_printf(&buf, "\n set version_upgrade=incompat to enable"); 93 94 bch_notice(c, "%s", buf.buf); 95 printbuf_exit(&buf); 96 } 97 98 out: 99 mutex_unlock(&c->sb_lock); 100 101 return ret; 102 } 103 104 const char * const bch2_sb_fields[] = { 105 #define x(name, nr) #name, 106 BCH_SB_FIELDS() 107 #undef x 108 NULL 109 }; 110 111 static int bch2_sb_field_validate(struct bch_sb *, struct bch_sb_field *, 112 enum bch_validate_flags, struct printbuf *); 113 114 struct bch_sb_field *bch2_sb_field_get_id(struct bch_sb *sb, 115 enum bch_sb_field_type type) 116 { 117 /* XXX: need locking around superblock to access optional fields */ 118 119 vstruct_for_each(sb, f) 120 if (le32_to_cpu(f->type) == type) 121 return f; 122 return NULL; 123 } 124 125 static struct bch_sb_field *__bch2_sb_field_resize(struct bch_sb_handle *sb, 126 struct bch_sb_field *f, 127 unsigned u64s) 128 { 129 unsigned old_u64s = f ? le32_to_cpu(f->u64s) : 0; 130 unsigned sb_u64s = le32_to_cpu(sb->sb->u64s) + u64s - old_u64s; 131 132 BUG_ON(__vstruct_bytes(struct bch_sb, sb_u64s) > sb->buffer_size); 133 134 if (!f && !u64s) { 135 /* nothing to do: */ 136 } else if (!f) { 137 f = vstruct_last(sb->sb); 138 memset(f, 0, sizeof(u64) * u64s); 139 f->u64s = cpu_to_le32(u64s); 140 f->type = 0; 141 } else { 142 void *src, *dst; 143 144 src = vstruct_end(f); 145 146 if (u64s) { 147 f->u64s = cpu_to_le32(u64s); 148 dst = vstruct_end(f); 149 } else { 150 dst = f; 151 } 152 153 memmove(dst, src, vstruct_end(sb->sb) - src); 154 155 if (dst > src) 156 memset(src, 0, dst - src); 157 } 158 159 sb->sb->u64s = cpu_to_le32(sb_u64s); 160 161 return u64s ? f : NULL; 162 } 163 164 void bch2_sb_field_delete(struct bch_sb_handle *sb, 165 enum bch_sb_field_type type) 166 { 167 struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); 168 169 if (f) 170 __bch2_sb_field_resize(sb, f, 0); 171 } 172 173 /* Superblock realloc/free: */ 174 175 void bch2_free_super(struct bch_sb_handle *sb) 176 { 177 kfree(sb->bio); 178 if (!IS_ERR_OR_NULL(sb->s_bdev_file)) 179 bdev_fput(sb->s_bdev_file); 180 kfree(sb->holder); 181 kfree(sb->sb_name); 182 183 kfree(sb->sb); 184 memset(sb, 0, sizeof(*sb)); 185 } 186 187 int bch2_sb_realloc(struct bch_sb_handle *sb, unsigned u64s) 188 { 189 size_t new_bytes = __vstruct_bytes(struct bch_sb, u64s); 190 size_t new_buffer_size; 191 struct bch_sb *new_sb; 192 struct bio *bio; 193 194 if (sb->bdev) 195 new_bytes = max_t(size_t, new_bytes, bdev_logical_block_size(sb->bdev)); 196 197 new_buffer_size = roundup_pow_of_two(new_bytes); 198 199 if (sb->sb && sb->buffer_size >= new_buffer_size) 200 return 0; 201 202 if (sb->sb && sb->have_layout) { 203 u64 max_bytes = 512 << sb->sb->layout.sb_max_size_bits; 204 205 if (new_bytes > max_bytes) { 206 struct printbuf buf = PRINTBUF; 207 208 prt_bdevname(&buf, sb->bdev); 209 prt_printf(&buf, ": superblock too big: want %zu but have %llu", new_bytes, max_bytes); 210 pr_err("%s", buf.buf); 211 printbuf_exit(&buf); 212 return -BCH_ERR_ENOSPC_sb; 213 } 214 } 215 216 if (sb->buffer_size >= new_buffer_size && sb->sb) 217 return 0; 218 219 if (dynamic_fault("bcachefs:add:super_realloc")) 220 return -BCH_ERR_ENOMEM_sb_realloc_injected; 221 222 new_sb = krealloc(sb->sb, new_buffer_size, GFP_NOFS|__GFP_ZERO); 223 if (!new_sb) 224 return -BCH_ERR_ENOMEM_sb_buf_realloc; 225 226 sb->sb = new_sb; 227 228 if (sb->have_bio) { 229 unsigned nr_bvecs = buf_pages(sb->sb, new_buffer_size); 230 231 bio = bio_kmalloc(nr_bvecs, GFP_KERNEL); 232 if (!bio) 233 return -BCH_ERR_ENOMEM_sb_bio_realloc; 234 235 bio_init(bio, NULL, bio->bi_inline_vecs, nr_bvecs, 0); 236 237 kfree(sb->bio); 238 sb->bio = bio; 239 } 240 241 sb->buffer_size = new_buffer_size; 242 243 return 0; 244 } 245 246 struct bch_sb_field *bch2_sb_field_resize_id(struct bch_sb_handle *sb, 247 enum bch_sb_field_type type, 248 unsigned u64s) 249 { 250 struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); 251 ssize_t old_u64s = f ? le32_to_cpu(f->u64s) : 0; 252 ssize_t d = -old_u64s + u64s; 253 254 if (bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s) + d)) 255 return NULL; 256 257 if (sb->fs_sb) { 258 struct bch_fs *c = container_of(sb, struct bch_fs, disk_sb); 259 260 lockdep_assert_held(&c->sb_lock); 261 262 /* XXX: we're not checking that offline device have enough space */ 263 264 for_each_online_member(c, ca, BCH_DEV_READ_REF_sb_field_resize) { 265 struct bch_sb_handle *dev_sb = &ca->disk_sb; 266 267 if (bch2_sb_realloc(dev_sb, le32_to_cpu(dev_sb->sb->u64s) + d)) { 268 enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_sb_field_resize); 269 return NULL; 270 } 271 } 272 } 273 274 f = bch2_sb_field_get_id(sb->sb, type); 275 f = __bch2_sb_field_resize(sb, f, u64s); 276 if (f) 277 f->type = cpu_to_le32(type); 278 return f; 279 } 280 281 struct bch_sb_field *bch2_sb_field_get_minsize_id(struct bch_sb_handle *sb, 282 enum bch_sb_field_type type, 283 unsigned u64s) 284 { 285 struct bch_sb_field *f = bch2_sb_field_get_id(sb->sb, type); 286 287 if (!f || le32_to_cpu(f->u64s) < u64s) 288 f = bch2_sb_field_resize_id(sb, type, u64s); 289 return f; 290 } 291 292 /* Superblock validate: */ 293 294 static int validate_sb_layout(struct bch_sb_layout *layout, struct printbuf *out) 295 { 296 u64 offset, prev_offset, max_sectors; 297 unsigned i; 298 299 BUILD_BUG_ON(sizeof(struct bch_sb_layout) != 512); 300 301 if (!uuid_equal(&layout->magic, &BCACHE_MAGIC) && 302 !uuid_equal(&layout->magic, &BCHFS_MAGIC)) { 303 prt_printf(out, "Not a bcachefs superblock layout"); 304 return -BCH_ERR_invalid_sb_layout; 305 } 306 307 if (layout->layout_type != 0) { 308 prt_printf(out, "Invalid superblock layout type %u", 309 layout->layout_type); 310 return -BCH_ERR_invalid_sb_layout_type; 311 } 312 313 if (!layout->nr_superblocks) { 314 prt_printf(out, "Invalid superblock layout: no superblocks"); 315 return -BCH_ERR_invalid_sb_layout_nr_superblocks; 316 } 317 318 if (layout->nr_superblocks > ARRAY_SIZE(layout->sb_offset)) { 319 prt_printf(out, "Invalid superblock layout: too many superblocks"); 320 return -BCH_ERR_invalid_sb_layout_nr_superblocks; 321 } 322 323 if (layout->sb_max_size_bits > BCH_SB_LAYOUT_SIZE_BITS_MAX) { 324 prt_printf(out, "Invalid superblock layout: max_size_bits too high"); 325 return -BCH_ERR_invalid_sb_layout_sb_max_size_bits; 326 } 327 328 max_sectors = 1 << layout->sb_max_size_bits; 329 330 prev_offset = le64_to_cpu(layout->sb_offset[0]); 331 332 for (i = 1; i < layout->nr_superblocks; i++) { 333 offset = le64_to_cpu(layout->sb_offset[i]); 334 335 if (offset < prev_offset + max_sectors) { 336 prt_printf(out, "Invalid superblock layout: superblocks overlap\n" 337 " (sb %u ends at %llu next starts at %llu", 338 i - 1, prev_offset + max_sectors, offset); 339 return -BCH_ERR_invalid_sb_layout_superblocks_overlap; 340 } 341 prev_offset = offset; 342 } 343 344 return 0; 345 } 346 347 static int bch2_sb_compatible(struct bch_sb *sb, struct printbuf *out) 348 { 349 u16 version = le16_to_cpu(sb->version); 350 u16 version_min = le16_to_cpu(sb->version_min); 351 352 if (!bch2_version_compatible(version)) { 353 prt_str(out, "Unsupported superblock version "); 354 bch2_version_to_text(out, version); 355 prt_str(out, " (min "); 356 bch2_version_to_text(out, bcachefs_metadata_version_min); 357 prt_str(out, ", max "); 358 bch2_version_to_text(out, bcachefs_metadata_version_current); 359 prt_str(out, ")"); 360 return -BCH_ERR_invalid_sb_version; 361 } 362 363 if (!bch2_version_compatible(version_min)) { 364 prt_str(out, "Unsupported superblock version_min "); 365 bch2_version_to_text(out, version_min); 366 prt_str(out, " (min "); 367 bch2_version_to_text(out, bcachefs_metadata_version_min); 368 prt_str(out, ", max "); 369 bch2_version_to_text(out, bcachefs_metadata_version_current); 370 prt_str(out, ")"); 371 return -BCH_ERR_invalid_sb_version; 372 } 373 374 if (version_min > version) { 375 prt_str(out, "Bad minimum version "); 376 bch2_version_to_text(out, version_min); 377 prt_str(out, ", greater than version field "); 378 bch2_version_to_text(out, version); 379 return -BCH_ERR_invalid_sb_version; 380 } 381 382 return 0; 383 } 384 385 int bch2_sb_validate(struct bch_sb *sb, u64 read_offset, 386 enum bch_validate_flags flags, struct printbuf *out) 387 { 388 enum bch_opt_id opt_id; 389 int ret; 390 391 ret = bch2_sb_compatible(sb, out); 392 if (ret) 393 return ret; 394 395 u64 incompat = le64_to_cpu(sb->features[0]) & (~0ULL << BCH_FEATURE_NR); 396 unsigned incompat_bit = 0; 397 if (incompat) 398 incompat_bit = __ffs64(incompat); 399 else if (sb->features[1]) 400 incompat_bit = 64 + __ffs64(le64_to_cpu(sb->features[1])); 401 402 if (incompat_bit) { 403 prt_printf(out, "Filesystem has incompatible feature bit %u, highest supported %s (%u)", 404 incompat_bit, 405 bch2_sb_features[BCH_FEATURE_NR - 1], 406 BCH_FEATURE_NR - 1); 407 return -BCH_ERR_invalid_sb_features; 408 } 409 410 if (BCH_VERSION_MAJOR(le16_to_cpu(sb->version)) > BCH_VERSION_MAJOR(bcachefs_metadata_version_current) || 411 BCH_SB_VERSION_INCOMPAT(sb) > bcachefs_metadata_version_current) { 412 prt_str(out, "Filesystem has incompatible version "); 413 bch2_version_to_text(out, le16_to_cpu(sb->version)); 414 prt_str(out, ", current version "); 415 bch2_version_to_text(out, bcachefs_metadata_version_current); 416 return -BCH_ERR_invalid_sb_features; 417 } 418 419 if (bch2_is_zero(sb->user_uuid.b, sizeof(sb->user_uuid))) { 420 prt_printf(out, "Bad user UUID (got zeroes)"); 421 return -BCH_ERR_invalid_sb_uuid; 422 } 423 424 if (bch2_is_zero(sb->uuid.b, sizeof(sb->uuid))) { 425 prt_printf(out, "Bad internal UUID (got zeroes)"); 426 return -BCH_ERR_invalid_sb_uuid; 427 } 428 429 if (!(flags & BCH_VALIDATE_write) && 430 le64_to_cpu(sb->offset) != read_offset) { 431 prt_printf(out, "Bad sb offset (got %llu, read from %llu)", 432 le64_to_cpu(sb->offset), read_offset); 433 return -BCH_ERR_invalid_sb_offset; 434 } 435 436 if (!sb->nr_devices || 437 sb->nr_devices > BCH_SB_MEMBERS_MAX) { 438 prt_printf(out, "Bad number of member devices %u (max %u)", 439 sb->nr_devices, BCH_SB_MEMBERS_MAX); 440 return -BCH_ERR_invalid_sb_too_many_members; 441 } 442 443 if (sb->dev_idx >= sb->nr_devices) { 444 prt_printf(out, "Bad dev_idx (got %u, nr_devices %u)", 445 sb->dev_idx, sb->nr_devices); 446 return -BCH_ERR_invalid_sb_dev_idx; 447 } 448 449 if (!sb->time_precision || 450 le32_to_cpu(sb->time_precision) > NSEC_PER_SEC) { 451 prt_printf(out, "Invalid time precision: %u (min 1, max %lu)", 452 le32_to_cpu(sb->time_precision), NSEC_PER_SEC); 453 return -BCH_ERR_invalid_sb_time_precision; 454 } 455 456 /* old versions didn't know to downgrade this field */ 457 if (BCH_SB_VERSION_INCOMPAT_ALLOWED(sb) > le16_to_cpu(sb->version)) 458 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, le16_to_cpu(sb->version)); 459 460 if (BCH_SB_VERSION_INCOMPAT(sb) > BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)) { 461 prt_printf(out, "Invalid version_incompat "); 462 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); 463 prt_str(out, " > incompat_allowed "); 464 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); 465 if (flags & BCH_VALIDATE_write) 466 return -BCH_ERR_invalid_sb_version; 467 else 468 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(sb, BCH_SB_VERSION_INCOMPAT(sb)); 469 } 470 471 if (sb->nr_devices > 1) 472 SET_BCH_SB_MULTI_DEVICE(sb, true); 473 474 if (!flags) { 475 /* 476 * Been seeing a bug where these are getting inexplicably 477 * zeroed, so we're now validating them, but we have to be 478 * careful not to preven people's filesystems from mounting: 479 */ 480 if (!BCH_SB_JOURNAL_FLUSH_DELAY(sb)) 481 SET_BCH_SB_JOURNAL_FLUSH_DELAY(sb, 1000); 482 if (!BCH_SB_JOURNAL_RECLAIM_DELAY(sb)) 483 SET_BCH_SB_JOURNAL_RECLAIM_DELAY(sb, 1000); 484 485 if (!BCH_SB_VERSION_UPGRADE_COMPLETE(sb)) 486 SET_BCH_SB_VERSION_UPGRADE_COMPLETE(sb, le16_to_cpu(sb->version)); 487 488 if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2 && 489 !BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb)) 490 SET_BCH_SB_ALLOCATOR_STUCK_TIMEOUT(sb, 30); 491 492 if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_disk_accounting_v2) 493 SET_BCH_SB_PROMOTE_WHOLE_EXTENTS(sb, true); 494 495 if (!BCH_SB_WRITE_ERROR_TIMEOUT(sb)) 496 SET_BCH_SB_WRITE_ERROR_TIMEOUT(sb, 30); 497 498 if (le16_to_cpu(sb->version) <= bcachefs_metadata_version_extent_flags && 499 !BCH_SB_CSUM_ERR_RETRY_NR(sb)) 500 SET_BCH_SB_CSUM_ERR_RETRY_NR(sb, 3); 501 } 502 503 #ifdef __KERNEL__ 504 if (!BCH_SB_SHARD_INUMS_NBITS(sb)) 505 SET_BCH_SB_SHARD_INUMS_NBITS(sb, ilog2(roundup_pow_of_two(num_online_cpus()))); 506 #endif 507 508 for (opt_id = 0; opt_id < bch2_opts_nr; opt_id++) { 509 const struct bch_option *opt = bch2_opt_table + opt_id; 510 511 if (opt->get_sb) { 512 u64 v = bch2_opt_from_sb(sb, opt_id, -1); 513 514 prt_printf(out, "Invalid option "); 515 ret = bch2_opt_validate(opt, v, out); 516 if (ret) 517 return ret; 518 519 printbuf_reset(out); 520 } 521 } 522 523 /* validate layout */ 524 ret = validate_sb_layout(&sb->layout, out); 525 if (ret) 526 return ret; 527 528 vstruct_for_each(sb, f) { 529 if (!f->u64s) { 530 prt_printf(out, "Invalid superblock: optional field with size 0 (type %u)", 531 le32_to_cpu(f->type)); 532 return -BCH_ERR_invalid_sb_field_size; 533 } 534 535 if (vstruct_next(f) > vstruct_last(sb)) { 536 prt_printf(out, "Invalid superblock: optional field extends past end of superblock (type %u)", 537 le32_to_cpu(f->type)); 538 return -BCH_ERR_invalid_sb_field_size; 539 } 540 } 541 542 struct bch_sb_field *mi = 543 bch2_sb_field_get_id(sb, BCH_SB_FIELD_members_v2) ?: 544 bch2_sb_field_get_id(sb, BCH_SB_FIELD_members_v1); 545 546 /* members must be validated first: */ 547 if (!mi) { 548 prt_printf(out, "Invalid superblock: member info area missing"); 549 return -BCH_ERR_invalid_sb_members_missing; 550 } 551 552 ret = bch2_sb_field_validate(sb, mi, flags, out); 553 if (ret) 554 return ret; 555 556 vstruct_for_each(sb, f) { 557 if (le32_to_cpu(f->type) == BCH_SB_FIELD_members_v1) 558 continue; 559 560 ret = bch2_sb_field_validate(sb, f, flags, out); 561 if (ret) 562 return ret; 563 } 564 565 if ((flags & BCH_VALIDATE_write) && 566 bch2_sb_member_get(sb, sb->dev_idx).seq != sb->seq) { 567 prt_printf(out, "Invalid superblock: member seq %llu != sb seq %llu", 568 le64_to_cpu(bch2_sb_member_get(sb, sb->dev_idx).seq), 569 le64_to_cpu(sb->seq)); 570 return -BCH_ERR_invalid_sb_members_missing; 571 } 572 573 return 0; 574 } 575 576 /* device open: */ 577 578 static unsigned long le_ulong_to_cpu(unsigned long v) 579 { 580 return sizeof(unsigned long) == 8 581 ? le64_to_cpu(v) 582 : le32_to_cpu(v); 583 } 584 585 static void le_bitvector_to_cpu(unsigned long *dst, unsigned long *src, unsigned nr) 586 { 587 BUG_ON(nr & (BITS_PER_TYPE(long) - 1)); 588 589 for (unsigned i = 0; i < BITS_TO_LONGS(nr); i++) 590 dst[i] = le_ulong_to_cpu(src[i]); 591 } 592 593 static void bch2_sb_update(struct bch_fs *c) 594 { 595 struct bch_sb *src = c->disk_sb.sb; 596 597 lockdep_assert_held(&c->sb_lock); 598 599 c->sb.uuid = src->uuid; 600 c->sb.user_uuid = src->user_uuid; 601 c->sb.version = le16_to_cpu(src->version); 602 c->sb.version_incompat = BCH_SB_VERSION_INCOMPAT(src); 603 c->sb.version_incompat_allowed 604 = BCH_SB_VERSION_INCOMPAT_ALLOWED(src); 605 c->sb.version_min = le16_to_cpu(src->version_min); 606 c->sb.version_upgrade_complete = BCH_SB_VERSION_UPGRADE_COMPLETE(src); 607 c->sb.nr_devices = src->nr_devices; 608 c->sb.clean = BCH_SB_CLEAN(src); 609 c->sb.encryption_type = BCH_SB_ENCRYPTION_TYPE(src); 610 611 c->sb.nsec_per_time_unit = le32_to_cpu(src->time_precision); 612 c->sb.time_units_per_sec = NSEC_PER_SEC / c->sb.nsec_per_time_unit; 613 614 /* XXX this is wrong, we need a 96 or 128 bit integer type */ 615 c->sb.time_base_lo = div_u64(le64_to_cpu(src->time_base_lo), 616 c->sb.nsec_per_time_unit); 617 c->sb.time_base_hi = le32_to_cpu(src->time_base_hi); 618 619 c->sb.features = le64_to_cpu(src->features[0]); 620 c->sb.compat = le64_to_cpu(src->compat[0]); 621 c->sb.multi_device = BCH_SB_MULTI_DEVICE(src); 622 623 memset(c->sb.errors_silent, 0, sizeof(c->sb.errors_silent)); 624 625 struct bch_sb_field_ext *ext = bch2_sb_field_get(src, ext); 626 if (ext) { 627 c->sb.recovery_passes_required = 628 bch2_recovery_passes_from_stable(le64_to_cpu(ext->recovery_passes_required[0])); 629 630 le_bitvector_to_cpu(c->sb.errors_silent, (void *) ext->errors_silent, 631 sizeof(c->sb.errors_silent) * 8); 632 c->sb.btrees_lost_data = le64_to_cpu(ext->btrees_lost_data); 633 } 634 635 for_each_member_device(c, ca) { 636 struct bch_member m = bch2_sb_member_get(src, ca->dev_idx); 637 ca->mi = bch2_mi_to_cpu(&m); 638 } 639 } 640 641 static int __copy_super(struct bch_sb_handle *dst_handle, struct bch_sb *src) 642 { 643 struct bch_sb_field *src_f, *dst_f; 644 struct bch_sb *dst = dst_handle->sb; 645 unsigned i; 646 647 dst->version = src->version; 648 dst->version_min = src->version_min; 649 dst->seq = src->seq; 650 dst->uuid = src->uuid; 651 dst->user_uuid = src->user_uuid; 652 memcpy(dst->label, src->label, sizeof(dst->label)); 653 654 dst->block_size = src->block_size; 655 dst->nr_devices = src->nr_devices; 656 657 dst->time_base_lo = src->time_base_lo; 658 dst->time_base_hi = src->time_base_hi; 659 dst->time_precision = src->time_precision; 660 dst->write_time = src->write_time; 661 662 memcpy(dst->flags, src->flags, sizeof(dst->flags)); 663 memcpy(dst->features, src->features, sizeof(dst->features)); 664 memcpy(dst->compat, src->compat, sizeof(dst->compat)); 665 666 for (i = 0; i < BCH_SB_FIELD_NR; i++) { 667 int d; 668 669 if ((1U << i) & BCH_SINGLE_DEVICE_SB_FIELDS) 670 continue; 671 672 src_f = bch2_sb_field_get_id(src, i); 673 dst_f = bch2_sb_field_get_id(dst, i); 674 675 d = (src_f ? le32_to_cpu(src_f->u64s) : 0) - 676 (dst_f ? le32_to_cpu(dst_f->u64s) : 0); 677 if (d > 0) { 678 int ret = bch2_sb_realloc(dst_handle, 679 le32_to_cpu(dst_handle->sb->u64s) + d); 680 681 if (ret) 682 return ret; 683 684 dst = dst_handle->sb; 685 dst_f = bch2_sb_field_get_id(dst, i); 686 } 687 688 dst_f = __bch2_sb_field_resize(dst_handle, dst_f, 689 src_f ? le32_to_cpu(src_f->u64s) : 0); 690 691 if (src_f) 692 memcpy(dst_f, src_f, vstruct_bytes(src_f)); 693 } 694 695 return 0; 696 } 697 698 int bch2_sb_to_fs(struct bch_fs *c, struct bch_sb *src) 699 { 700 int ret; 701 702 lockdep_assert_held(&c->sb_lock); 703 704 ret = bch2_sb_realloc(&c->disk_sb, 0) ?: 705 __copy_super(&c->disk_sb, src) ?: 706 bch2_sb_replicas_to_cpu_replicas(c) ?: 707 bch2_sb_disk_groups_to_cpu(c); 708 if (ret) 709 return ret; 710 711 bch2_sb_update(c); 712 return 0; 713 } 714 715 int bch2_sb_from_fs(struct bch_fs *c, struct bch_dev *ca) 716 { 717 return __copy_super(&ca->disk_sb, c->disk_sb.sb); 718 } 719 720 /* read superblock: */ 721 722 static int read_one_super(struct bch_sb_handle *sb, u64 offset, struct printbuf *err) 723 { 724 size_t bytes; 725 int ret; 726 reread: 727 bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META); 728 sb->bio->bi_iter.bi_sector = offset; 729 bch2_bio_map(sb->bio, sb->sb, sb->buffer_size); 730 731 ret = submit_bio_wait(sb->bio); 732 if (ret) { 733 prt_printf(err, "IO error: %i", ret); 734 return ret; 735 } 736 737 if (!uuid_equal(&sb->sb->magic, &BCACHE_MAGIC) && 738 !uuid_equal(&sb->sb->magic, &BCHFS_MAGIC)) { 739 prt_str(err, "Not a bcachefs superblock (got magic "); 740 pr_uuid(err, sb->sb->magic.b); 741 prt_str(err, ")"); 742 return -BCH_ERR_invalid_sb_magic; 743 } 744 745 ret = bch2_sb_compatible(sb->sb, err); 746 if (ret) 747 return ret; 748 749 bytes = vstruct_bytes(sb->sb); 750 751 u64 sb_size = 512ULL << min(BCH_SB_LAYOUT_SIZE_BITS_MAX, sb->sb->layout.sb_max_size_bits); 752 if (bytes > sb_size) { 753 prt_printf(err, "Invalid superblock: too big (got %zu bytes, layout max %llu)", 754 bytes, sb_size); 755 return -BCH_ERR_invalid_sb_too_big; 756 } 757 758 if (bytes > sb->buffer_size) { 759 ret = bch2_sb_realloc(sb, le32_to_cpu(sb->sb->u64s)); 760 if (ret) 761 return ret; 762 goto reread; 763 } 764 765 enum bch_csum_type csum_type = BCH_SB_CSUM_TYPE(sb->sb); 766 if (csum_type >= BCH_CSUM_NR || 767 bch2_csum_type_is_encryption(csum_type)) { 768 prt_printf(err, "unknown checksum type %llu", BCH_SB_CSUM_TYPE(sb->sb)); 769 return -BCH_ERR_invalid_sb_csum_type; 770 } 771 772 /* XXX: verify MACs */ 773 struct bch_csum csum = csum_vstruct(NULL, csum_type, null_nonce(), sb->sb); 774 if (bch2_crc_cmp(csum, sb->sb->csum)) { 775 bch2_csum_err_msg(err, csum_type, sb->sb->csum, csum); 776 return -BCH_ERR_invalid_sb_csum; 777 } 778 779 sb->seq = le64_to_cpu(sb->sb->seq); 780 781 return 0; 782 } 783 784 static int __bch2_read_super(const char *path, struct bch_opts *opts, 785 struct bch_sb_handle *sb, bool ignore_notbchfs_msg) 786 { 787 u64 offset = opt_get(*opts, sb); 788 struct bch_sb_layout layout; 789 struct printbuf err = PRINTBUF; 790 struct printbuf err2 = PRINTBUF; 791 __le64 *i; 792 int ret; 793 #ifndef __KERNEL__ 794 retry: 795 #endif 796 memset(sb, 0, sizeof(*sb)); 797 sb->mode = BLK_OPEN_READ; 798 sb->have_bio = true; 799 sb->holder = kzalloc(sizeof(*sb->holder), GFP_KERNEL); 800 if (!sb->holder) 801 return -ENOMEM; 802 803 sb->sb_name = kstrdup(path, GFP_KERNEL); 804 if (!sb->sb_name) { 805 ret = -ENOMEM; 806 prt_printf(&err, "error allocating memory for sb_name"); 807 goto err; 808 } 809 810 #ifndef __KERNEL__ 811 if (opt_get(*opts, direct_io) == false) 812 sb->mode |= BLK_OPEN_BUFFERED; 813 #endif 814 815 if (!opt_get(*opts, noexcl)) 816 sb->mode |= BLK_OPEN_EXCL; 817 818 if (!opt_get(*opts, nochanges)) 819 sb->mode |= BLK_OPEN_WRITE; 820 821 sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops); 822 if (IS_ERR(sb->s_bdev_file) && 823 PTR_ERR(sb->s_bdev_file) == -EACCES && 824 opt_get(*opts, read_only)) { 825 sb->mode &= ~BLK_OPEN_WRITE; 826 827 sb->s_bdev_file = bdev_file_open_by_path(path, sb->mode, sb->holder, &bch2_sb_handle_bdev_ops); 828 if (!IS_ERR(sb->s_bdev_file)) 829 opt_set(*opts, nochanges, true); 830 } 831 832 if (IS_ERR(sb->s_bdev_file)) { 833 ret = PTR_ERR(sb->s_bdev_file); 834 prt_printf(&err, "error opening %s: %s", path, bch2_err_str(ret)); 835 goto err; 836 } 837 sb->bdev = file_bdev(sb->s_bdev_file); 838 839 ret = bch2_sb_realloc(sb, 0); 840 if (ret) { 841 prt_printf(&err, "error allocating memory for superblock"); 842 goto err; 843 } 844 845 if (bch2_fs_init_fault("read_super")) { 846 prt_printf(&err, "dynamic fault"); 847 ret = -EFAULT; 848 goto err; 849 } 850 851 ret = read_one_super(sb, offset, &err); 852 if (!ret) 853 goto got_super; 854 855 if (opt_defined(*opts, sb)) 856 goto err; 857 858 prt_printf(&err2, "bcachefs (%s): error reading default superblock: %s\n", 859 path, err.buf); 860 if (ret == -BCH_ERR_invalid_sb_magic && ignore_notbchfs_msg) 861 bch2_print_opts(opts, KERN_INFO "%s", err2.buf); 862 else 863 bch2_print_opts(opts, KERN_ERR "%s", err2.buf); 864 865 printbuf_exit(&err2); 866 printbuf_reset(&err); 867 868 /* 869 * Error reading primary superblock - read location of backup 870 * superblocks: 871 */ 872 bio_reset(sb->bio, sb->bdev, REQ_OP_READ|REQ_SYNC|REQ_META); 873 sb->bio->bi_iter.bi_sector = BCH_SB_LAYOUT_SECTOR; 874 /* 875 * use sb buffer to read layout, since sb buffer is page aligned but 876 * layout won't be: 877 */ 878 bch2_bio_map(sb->bio, sb->sb, sizeof(struct bch_sb_layout)); 879 880 ret = submit_bio_wait(sb->bio); 881 if (ret) { 882 prt_printf(&err, "IO error: %i", ret); 883 goto err; 884 } 885 886 memcpy(&layout, sb->sb, sizeof(layout)); 887 ret = validate_sb_layout(&layout, &err); 888 if (ret) 889 goto err; 890 891 for (i = layout.sb_offset; 892 i < layout.sb_offset + layout.nr_superblocks; i++) { 893 offset = le64_to_cpu(*i); 894 895 if (offset == opt_get(*opts, sb)) { 896 ret = -BCH_ERR_invalid; 897 continue; 898 } 899 900 ret = read_one_super(sb, offset, &err); 901 if (!ret) 902 goto got_super; 903 } 904 905 goto err; 906 907 got_super: 908 if (le16_to_cpu(sb->sb->block_size) << 9 < 909 bdev_logical_block_size(sb->bdev) && 910 opt_get(*opts, direct_io)) { 911 #ifndef __KERNEL__ 912 opt_set(*opts, direct_io, false); 913 bch2_free_super(sb); 914 goto retry; 915 #endif 916 prt_printf(&err, "block size (%u) smaller than device block size (%u)", 917 le16_to_cpu(sb->sb->block_size) << 9, 918 bdev_logical_block_size(sb->bdev)); 919 ret = -BCH_ERR_block_size_too_small; 920 goto err; 921 } 922 923 sb->have_layout = true; 924 925 ret = bch2_sb_validate(sb->sb, offset, 0, &err); 926 if (ret) { 927 bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error validating superblock: %s\n", 928 path, err.buf); 929 goto err_no_print; 930 } 931 out: 932 printbuf_exit(&err); 933 return ret; 934 err: 935 bch2_print_opts(opts, KERN_ERR "bcachefs (%s): error reading superblock: %s\n", 936 path, err.buf); 937 err_no_print: 938 bch2_free_super(sb); 939 goto out; 940 } 941 942 int bch2_read_super(const char *path, struct bch_opts *opts, 943 struct bch_sb_handle *sb) 944 { 945 return __bch2_read_super(path, opts, sb, false); 946 } 947 948 /* provide a silenced version for mount.bcachefs */ 949 950 int bch2_read_super_silent(const char *path, struct bch_opts *opts, 951 struct bch_sb_handle *sb) 952 { 953 return __bch2_read_super(path, opts, sb, true); 954 } 955 956 /* write superblock: */ 957 958 static void write_super_endio(struct bio *bio) 959 { 960 struct bch_dev *ca = bio->bi_private; 961 962 bch2_account_io_success_fail(ca, bio_data_dir(bio), !bio->bi_status); 963 964 /* XXX: return errors directly */ 965 966 if (bio->bi_status) { 967 bch_err_dev_ratelimited(ca, "superblock %s error: %s", 968 str_write_read(bio_data_dir(bio)), 969 bch2_blk_status_to_str(bio->bi_status)); 970 ca->sb_write_error = 1; 971 } 972 973 closure_put(&ca->fs->sb_write); 974 enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super); 975 } 976 977 static void read_back_super(struct bch_fs *c, struct bch_dev *ca) 978 { 979 struct bch_sb *sb = ca->disk_sb.sb; 980 struct bio *bio = ca->disk_sb.bio; 981 982 memset(ca->sb_read_scratch, 0, BCH_SB_READ_SCRATCH_BUF_SIZE); 983 984 bio_reset(bio, ca->disk_sb.bdev, REQ_OP_READ|REQ_SYNC|REQ_META); 985 bio->bi_iter.bi_sector = le64_to_cpu(sb->layout.sb_offset[0]); 986 bio->bi_end_io = write_super_endio; 987 bio->bi_private = ca; 988 bch2_bio_map(bio, ca->sb_read_scratch, BCH_SB_READ_SCRATCH_BUF_SIZE); 989 990 this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_sb], bio_sectors(bio)); 991 992 enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super); 993 closure_bio_submit(bio, &c->sb_write); 994 } 995 996 static void write_one_super(struct bch_fs *c, struct bch_dev *ca, unsigned idx) 997 { 998 struct bch_sb *sb = ca->disk_sb.sb; 999 struct bio *bio = ca->disk_sb.bio; 1000 1001 sb->offset = sb->layout.sb_offset[idx]; 1002 1003 SET_BCH_SB_CSUM_TYPE(sb, bch2_csum_opt_to_type(c->opts.metadata_checksum, false)); 1004 sb->csum = csum_vstruct(c, BCH_SB_CSUM_TYPE(sb), 1005 null_nonce(), sb); 1006 1007 bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META); 1008 bio->bi_iter.bi_sector = le64_to_cpu(sb->offset); 1009 bio->bi_end_io = write_super_endio; 1010 bio->bi_private = ca; 1011 bch2_bio_map(bio, sb, 1012 roundup((size_t) vstruct_bytes(sb), 1013 bdev_logical_block_size(ca->disk_sb.bdev))); 1014 1015 this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_sb], 1016 bio_sectors(bio)); 1017 1018 enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super); 1019 closure_bio_submit(bio, &c->sb_write); 1020 } 1021 1022 int bch2_write_super(struct bch_fs *c) 1023 { 1024 struct closure *cl = &c->sb_write; 1025 struct printbuf err = PRINTBUF; 1026 unsigned sb = 0, nr_wrote; 1027 struct bch_devs_mask sb_written; 1028 bool wrote, can_mount_without_written, can_mount_with_written; 1029 unsigned degraded_flags = BCH_FORCE_IF_DEGRADED; 1030 DARRAY(struct bch_dev *) online_devices = {}; 1031 int ret = 0; 1032 1033 trace_and_count(c, write_super, c, _RET_IP_); 1034 1035 if (c->opts.degraded == BCH_DEGRADED_very) 1036 degraded_flags |= BCH_FORCE_IF_LOST; 1037 1038 lockdep_assert_held(&c->sb_lock); 1039 1040 closure_init_stack(cl); 1041 memset(&sb_written, 0, sizeof(sb_written)); 1042 1043 /* 1044 * Note: we do writes to RO devices here, and we might want to change 1045 * that in the future. 1046 * 1047 * For now, we expect to be able to call write_super() when we're not 1048 * yet RW: 1049 */ 1050 for_each_online_member(c, ca, BCH_DEV_READ_REF_write_super) { 1051 ret = darray_push(&online_devices, ca); 1052 if (bch2_fs_fatal_err_on(ret, c, "%s: error allocating online devices", __func__)) { 1053 enumerated_ref_put(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super); 1054 goto out; 1055 } 1056 enumerated_ref_get(&ca->io_ref[READ], BCH_DEV_READ_REF_write_super); 1057 } 1058 1059 /* Make sure we're using the new magic numbers: */ 1060 c->disk_sb.sb->magic = BCHFS_MAGIC; 1061 c->disk_sb.sb->layout.magic = BCHFS_MAGIC; 1062 1063 le64_add_cpu(&c->disk_sb.sb->seq, 1); 1064 1065 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 1066 darray_for_each(online_devices, ca) 1067 __bch2_members_v2_get_mut(mi, (*ca)->dev_idx)->seq = c->disk_sb.sb->seq; 1068 c->disk_sb.sb->write_time = cpu_to_le64(ktime_get_real_seconds()); 1069 1070 if (test_bit(BCH_FS_error, &c->flags)) 1071 SET_BCH_SB_HAS_ERRORS(c->disk_sb.sb, 1); 1072 if (test_bit(BCH_FS_topology_error, &c->flags)) 1073 SET_BCH_SB_HAS_TOPOLOGY_ERRORS(c->disk_sb.sb, 1); 1074 1075 SET_BCH_SB_BIG_ENDIAN(c->disk_sb.sb, CPU_BIG_ENDIAN); 1076 1077 bch2_sb_counters_from_cpu(c); 1078 bch2_sb_members_from_cpu(c); 1079 bch2_sb_members_cpy_v2_v1(&c->disk_sb); 1080 bch2_sb_errors_from_cpu(c); 1081 bch2_sb_downgrade_update(c); 1082 1083 darray_for_each(online_devices, ca) 1084 bch2_sb_from_fs(c, (*ca)); 1085 1086 darray_for_each(online_devices, ca) { 1087 printbuf_reset(&err); 1088 1089 ret = bch2_sb_validate((*ca)->disk_sb.sb, 0, BCH_VALIDATE_write, &err); 1090 if (ret) { 1091 bch2_fs_inconsistent(c, "sb invalid before write: %s", err.buf); 1092 goto out; 1093 } 1094 } 1095 1096 if (c->opts.nochanges) 1097 goto out; 1098 1099 /* 1100 * Defer writing the superblock until filesystem initialization is 1101 * complete - don't write out a partly initialized superblock: 1102 */ 1103 if (!BCH_SB_INITIALIZED(c->disk_sb.sb)) 1104 goto out; 1105 1106 if (le16_to_cpu(c->disk_sb.sb->version) > bcachefs_metadata_version_current) { 1107 struct printbuf buf = PRINTBUF; 1108 prt_printf(&buf, "attempting to write superblock that wasn't version downgraded ("); 1109 bch2_version_to_text(&buf, le16_to_cpu(c->disk_sb.sb->version)); 1110 prt_str(&buf, " > "); 1111 bch2_version_to_text(&buf, bcachefs_metadata_version_current); 1112 prt_str(&buf, ")"); 1113 bch2_fs_fatal_error(c, ": %s", buf.buf); 1114 printbuf_exit(&buf); 1115 ret = bch_err_throw(c, sb_not_downgraded); 1116 goto out; 1117 } 1118 1119 darray_for_each(online_devices, ca) { 1120 __set_bit((*ca)->dev_idx, sb_written.d); 1121 (*ca)->sb_write_error = 0; 1122 } 1123 1124 darray_for_each(online_devices, ca) 1125 read_back_super(c, *ca); 1126 closure_sync(cl); 1127 1128 darray_for_each(online_devices, cap) { 1129 struct bch_dev *ca = *cap; 1130 1131 if (ca->sb_write_error) 1132 continue; 1133 1134 if (le64_to_cpu(ca->sb_read_scratch->seq) < ca->disk_sb.seq) { 1135 struct printbuf buf = PRINTBUF; 1136 prt_char(&buf, ' '); 1137 prt_bdevname(&buf, ca->disk_sb.bdev); 1138 prt_printf(&buf, 1139 ": Superblock write was silently dropped! (seq %llu expected %llu)", 1140 le64_to_cpu(ca->sb_read_scratch->seq), 1141 ca->disk_sb.seq); 1142 1143 if (c->opts.errors != BCH_ON_ERROR_continue && 1144 c->opts.errors != BCH_ON_ERROR_fix_safe) { 1145 ret = bch_err_throw(c, erofs_sb_err); 1146 bch2_fs_fatal_error(c, "%s", buf.buf); 1147 } else { 1148 bch_err(c, "%s", buf.buf); 1149 } 1150 1151 printbuf_exit(&buf); 1152 } 1153 1154 if (le64_to_cpu(ca->sb_read_scratch->seq) > ca->disk_sb.seq) { 1155 struct printbuf buf = PRINTBUF; 1156 prt_char(&buf, ' '); 1157 prt_bdevname(&buf, ca->disk_sb.bdev); 1158 prt_printf(&buf, 1159 ": Superblock modified by another process (seq %llu expected %llu)", 1160 le64_to_cpu(ca->sb_read_scratch->seq), 1161 ca->disk_sb.seq); 1162 bch2_fs_fatal_error(c, "%s", buf.buf); 1163 printbuf_exit(&buf); 1164 ret = bch_err_throw(c, erofs_sb_err); 1165 } 1166 } 1167 1168 if (ret) 1169 goto out; 1170 1171 do { 1172 wrote = false; 1173 darray_for_each(online_devices, cap) { 1174 struct bch_dev *ca = *cap; 1175 if (!ca->sb_write_error && 1176 sb < ca->disk_sb.sb->layout.nr_superblocks) { 1177 write_one_super(c, ca, sb); 1178 wrote = true; 1179 } 1180 } 1181 closure_sync(cl); 1182 sb++; 1183 } while (wrote); 1184 1185 darray_for_each(online_devices, cap) { 1186 struct bch_dev *ca = *cap; 1187 if (ca->sb_write_error) 1188 __clear_bit(ca->dev_idx, sb_written.d); 1189 else 1190 ca->disk_sb.seq = le64_to_cpu(ca->disk_sb.sb->seq); 1191 } 1192 1193 nr_wrote = dev_mask_nr(&sb_written); 1194 1195 can_mount_with_written = 1196 bch2_have_enough_devs(c, sb_written, degraded_flags, false); 1197 1198 for (unsigned i = 0; i < ARRAY_SIZE(sb_written.d); i++) 1199 sb_written.d[i] = ~sb_written.d[i]; 1200 1201 can_mount_without_written = 1202 bch2_have_enough_devs(c, sb_written, degraded_flags, false); 1203 1204 /* 1205 * If we would be able to mount _without_ the devices we successfully 1206 * wrote superblocks to, we weren't able to write to enough devices: 1207 * 1208 * Exception: if we can mount without the successes because we haven't 1209 * written anything (new filesystem), we continue if we'd be able to 1210 * mount with the devices we did successfully write to: 1211 */ 1212 if (bch2_fs_fatal_err_on(!nr_wrote || 1213 !can_mount_with_written || 1214 (can_mount_without_written && 1215 !can_mount_with_written), c, 1216 ": Unable to write superblock to sufficient devices (from %ps)", 1217 (void *) _RET_IP_)) 1218 ret = bch_err_throw(c, erofs_sb_err); 1219 out: 1220 /* Make new options visible after they're persistent: */ 1221 bch2_sb_update(c); 1222 darray_for_each(online_devices, ca) 1223 enumerated_ref_put(&(*ca)->io_ref[READ], BCH_DEV_READ_REF_write_super); 1224 darray_exit(&online_devices); 1225 printbuf_exit(&err); 1226 return ret; 1227 } 1228 1229 void __bch2_check_set_feature(struct bch_fs *c, unsigned feat) 1230 { 1231 mutex_lock(&c->sb_lock); 1232 if (!(c->sb.features & (1ULL << feat))) { 1233 c->disk_sb.sb->features[0] |= cpu_to_le64(1ULL << feat); 1234 1235 bch2_write_super(c); 1236 } 1237 mutex_unlock(&c->sb_lock); 1238 } 1239 1240 /* Downgrade if superblock is at a higher version than currently supported: */ 1241 bool bch2_check_version_downgrade(struct bch_fs *c) 1242 { 1243 bool ret = bcachefs_metadata_version_current < c->sb.version; 1244 1245 lockdep_assert_held(&c->sb_lock); 1246 1247 /* 1248 * Downgrade, if superblock is at a higher version than currently 1249 * supported: 1250 * 1251 * c->sb will be checked before we write the superblock, so update it as 1252 * well: 1253 */ 1254 if (BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb) > bcachefs_metadata_version_current) 1255 SET_BCH_SB_VERSION_UPGRADE_COMPLETE(c->disk_sb.sb, bcachefs_metadata_version_current); 1256 if (BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb) > bcachefs_metadata_version_current) 1257 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, bcachefs_metadata_version_current); 1258 if (c->sb.version > bcachefs_metadata_version_current) 1259 c->disk_sb.sb->version = cpu_to_le16(bcachefs_metadata_version_current); 1260 if (c->sb.version_min > bcachefs_metadata_version_current) 1261 c->disk_sb.sb->version_min = cpu_to_le16(bcachefs_metadata_version_current); 1262 c->disk_sb.sb->compat[0] &= cpu_to_le64((1ULL << BCH_COMPAT_NR) - 1); 1263 return ret; 1264 } 1265 1266 void bch2_sb_upgrade(struct bch_fs *c, unsigned new_version, bool incompat) 1267 { 1268 lockdep_assert_held(&c->sb_lock); 1269 1270 if (BCH_VERSION_MAJOR(new_version) > 1271 BCH_VERSION_MAJOR(le16_to_cpu(c->disk_sb.sb->version))) 1272 bch2_sb_field_resize(&c->disk_sb, downgrade, 0); 1273 1274 c->disk_sb.sb->version = cpu_to_le16(new_version); 1275 1276 if (incompat) { 1277 c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); 1278 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, 1279 max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), new_version)); 1280 } 1281 } 1282 1283 void bch2_sb_upgrade_incompat(struct bch_fs *c) 1284 { 1285 mutex_lock(&c->sb_lock); 1286 if (c->sb.version == c->sb.version_incompat_allowed) 1287 goto unlock; 1288 1289 struct printbuf buf = PRINTBUF; 1290 1291 prt_str(&buf, "Now allowing incompatible features up to "); 1292 bch2_version_to_text(&buf, c->sb.version); 1293 prt_str(&buf, ", previously allowed up to "); 1294 bch2_version_to_text(&buf, c->sb.version_incompat_allowed); 1295 prt_newline(&buf); 1296 1297 bch_notice(c, "%s", buf.buf); 1298 printbuf_exit(&buf); 1299 1300 c->disk_sb.sb->features[0] |= cpu_to_le64(BCH_SB_FEATURES_ALL); 1301 SET_BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb, 1302 max(BCH_SB_VERSION_INCOMPAT_ALLOWED(c->disk_sb.sb), c->sb.version)); 1303 bch2_write_super(c); 1304 unlock: 1305 mutex_unlock(&c->sb_lock); 1306 } 1307 1308 static int bch2_sb_ext_validate(struct bch_sb *sb, struct bch_sb_field *f, 1309 enum bch_validate_flags flags, struct printbuf *err) 1310 { 1311 if (vstruct_bytes(f) < 88) { 1312 prt_printf(err, "field too small (%zu < %u)", vstruct_bytes(f), 88); 1313 return -BCH_ERR_invalid_sb_ext; 1314 } 1315 1316 return 0; 1317 } 1318 1319 static void bch2_sb_ext_to_text(struct printbuf *out, struct bch_sb *sb, 1320 struct bch_sb_field *f) 1321 { 1322 struct bch_sb_field_ext *e = field_to_type(f, ext); 1323 1324 prt_printf(out, "Recovery passes required:\t"); 1325 prt_bitflags(out, bch2_recovery_passes, 1326 bch2_recovery_passes_from_stable(le64_to_cpu(e->recovery_passes_required[0]))); 1327 prt_newline(out); 1328 1329 unsigned long *errors_silent = kmalloc(sizeof(e->errors_silent), GFP_KERNEL); 1330 if (errors_silent) { 1331 le_bitvector_to_cpu(errors_silent, (void *) e->errors_silent, sizeof(e->errors_silent) * 8); 1332 1333 prt_printf(out, "Errors to silently fix:\t"); 1334 prt_bitflags_vector(out, bch2_sb_error_strs, errors_silent, 1335 min(BCH_FSCK_ERR_MAX, sizeof(e->errors_silent) * 8)); 1336 prt_newline(out); 1337 1338 kfree(errors_silent); 1339 } 1340 1341 prt_printf(out, "Btrees with missing data:\t"); 1342 prt_bitflags(out, __bch2_btree_ids, le64_to_cpu(e->btrees_lost_data)); 1343 prt_newline(out); 1344 } 1345 1346 static const struct bch_sb_field_ops bch_sb_field_ops_ext = { 1347 .validate = bch2_sb_ext_validate, 1348 .to_text = bch2_sb_ext_to_text, 1349 }; 1350 1351 static const struct bch_sb_field_ops *bch2_sb_field_ops[] = { 1352 #define x(f, nr) \ 1353 [BCH_SB_FIELD_##f] = &bch_sb_field_ops_##f, 1354 BCH_SB_FIELDS() 1355 #undef x 1356 }; 1357 1358 static const struct bch_sb_field_ops bch2_sb_field_null_ops; 1359 1360 static const struct bch_sb_field_ops *bch2_sb_field_type_ops(unsigned type) 1361 { 1362 return likely(type < ARRAY_SIZE(bch2_sb_field_ops)) 1363 ? bch2_sb_field_ops[type] 1364 : &bch2_sb_field_null_ops; 1365 } 1366 1367 static int bch2_sb_field_validate(struct bch_sb *sb, struct bch_sb_field *f, 1368 enum bch_validate_flags flags, struct printbuf *err) 1369 { 1370 unsigned type = le32_to_cpu(f->type); 1371 struct printbuf field_err = PRINTBUF; 1372 const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); 1373 int ret; 1374 1375 ret = ops->validate ? ops->validate(sb, f, flags, &field_err) : 0; 1376 if (ret) { 1377 prt_printf(err, "Invalid superblock section %s: %s", 1378 bch2_sb_fields[type], field_err.buf); 1379 prt_newline(err); 1380 bch2_sb_field_to_text(err, sb, f); 1381 } 1382 1383 printbuf_exit(&field_err); 1384 return ret; 1385 } 1386 1387 void __bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, 1388 struct bch_sb_field *f) 1389 { 1390 unsigned type = le32_to_cpu(f->type); 1391 const struct bch_sb_field_ops *ops = bch2_sb_field_type_ops(type); 1392 1393 if (!out->nr_tabstops) 1394 printbuf_tabstop_push(out, 32); 1395 1396 if (ops->to_text) 1397 ops->to_text(out, sb, f); 1398 } 1399 1400 void bch2_sb_field_to_text(struct printbuf *out, struct bch_sb *sb, 1401 struct bch_sb_field *f) 1402 { 1403 unsigned type = le32_to_cpu(f->type); 1404 1405 if (type < BCH_SB_FIELD_NR) 1406 prt_printf(out, "%s", bch2_sb_fields[type]); 1407 else 1408 prt_printf(out, "(unknown field %u)", type); 1409 1410 prt_printf(out, " (size %zu):", vstruct_bytes(f)); 1411 prt_newline(out); 1412 1413 __bch2_sb_field_to_text(out, sb, f); 1414 } 1415 1416 void bch2_sb_layout_to_text(struct printbuf *out, struct bch_sb_layout *l) 1417 { 1418 unsigned i; 1419 1420 prt_printf(out, "Type: %u", l->layout_type); 1421 prt_newline(out); 1422 1423 prt_str(out, "Superblock max size: "); 1424 prt_units_u64(out, 512 << l->sb_max_size_bits); 1425 prt_newline(out); 1426 1427 prt_printf(out, "Nr superblocks: %u", l->nr_superblocks); 1428 prt_newline(out); 1429 1430 prt_str(out, "Offsets: "); 1431 for (i = 0; i < l->nr_superblocks; i++) { 1432 if (i) 1433 prt_str(out, ", "); 1434 prt_printf(out, "%llu", le64_to_cpu(l->sb_offset[i])); 1435 } 1436 prt_newline(out); 1437 } 1438 1439 void bch2_sb_to_text(struct printbuf *out, struct bch_sb *sb, 1440 bool print_layout, unsigned fields) 1441 { 1442 if (!out->nr_tabstops) 1443 printbuf_tabstop_push(out, 44); 1444 1445 prt_printf(out, "External UUID:\t"); 1446 pr_uuid(out, sb->user_uuid.b); 1447 prt_newline(out); 1448 1449 prt_printf(out, "Internal UUID:\t"); 1450 pr_uuid(out, sb->uuid.b); 1451 prt_newline(out); 1452 1453 prt_printf(out, "Magic number:\t"); 1454 pr_uuid(out, sb->magic.b); 1455 prt_newline(out); 1456 1457 prt_printf(out, "Device index:\t%u\n", sb->dev_idx); 1458 1459 prt_printf(out, "Label:\t"); 1460 if (!strlen(sb->label)) 1461 prt_printf(out, "(none)"); 1462 else 1463 prt_printf(out, "%.*s", (int) sizeof(sb->label), sb->label); 1464 prt_newline(out); 1465 1466 prt_printf(out, "Version:\t"); 1467 bch2_version_to_text(out, le16_to_cpu(sb->version)); 1468 prt_newline(out); 1469 1470 prt_printf(out, "Incompatible features allowed:\t"); 1471 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT_ALLOWED(sb)); 1472 prt_newline(out); 1473 1474 prt_printf(out, "Incompatible features in use:\t"); 1475 bch2_version_to_text(out, BCH_SB_VERSION_INCOMPAT(sb)); 1476 prt_newline(out); 1477 1478 prt_printf(out, "Version upgrade complete:\t"); 1479 bch2_version_to_text(out, BCH_SB_VERSION_UPGRADE_COMPLETE(sb)); 1480 prt_newline(out); 1481 1482 prt_printf(out, "Oldest version on disk:\t"); 1483 bch2_version_to_text(out, le16_to_cpu(sb->version_min)); 1484 prt_newline(out); 1485 1486 prt_printf(out, "Created:\t"); 1487 if (sb->time_base_lo) 1488 bch2_prt_datetime(out, div_u64(le64_to_cpu(sb->time_base_lo), NSEC_PER_SEC)); 1489 else 1490 prt_printf(out, "(not set)"); 1491 prt_newline(out); 1492 1493 prt_printf(out, "Sequence number:\t"); 1494 prt_printf(out, "%llu", le64_to_cpu(sb->seq)); 1495 prt_newline(out); 1496 1497 prt_printf(out, "Time of last write:\t"); 1498 bch2_prt_datetime(out, le64_to_cpu(sb->write_time)); 1499 prt_newline(out); 1500 1501 prt_printf(out, "Superblock size:\t"); 1502 prt_units_u64(out, vstruct_bytes(sb)); 1503 prt_str(out, "/"); 1504 prt_units_u64(out, 512ULL << sb->layout.sb_max_size_bits); 1505 prt_newline(out); 1506 1507 prt_printf(out, "Clean:\t%llu\n", BCH_SB_CLEAN(sb)); 1508 prt_printf(out, "Devices:\t%u\n", bch2_sb_nr_devices(sb)); 1509 1510 prt_printf(out, "Sections:\t"); 1511 u64 fields_have = 0; 1512 vstruct_for_each(sb, f) 1513 fields_have |= 1 << le32_to_cpu(f->type); 1514 prt_bitflags(out, bch2_sb_fields, fields_have); 1515 prt_newline(out); 1516 1517 prt_printf(out, "Features:\t"); 1518 prt_bitflags(out, bch2_sb_features, le64_to_cpu(sb->features[0])); 1519 prt_newline(out); 1520 1521 prt_printf(out, "Compat features:\t"); 1522 prt_bitflags(out, bch2_sb_compat, le64_to_cpu(sb->compat[0])); 1523 prt_newline(out); 1524 1525 prt_newline(out); 1526 prt_printf(out, "Options:"); 1527 prt_newline(out); 1528 printbuf_indent_add(out, 2); 1529 { 1530 enum bch_opt_id id; 1531 1532 for (id = 0; id < bch2_opts_nr; id++) { 1533 const struct bch_option *opt = bch2_opt_table + id; 1534 1535 if (opt->get_sb) { 1536 u64 v = bch2_opt_from_sb(sb, id, -1); 1537 1538 prt_printf(out, "%s:\t", opt->attr.name); 1539 bch2_opt_to_text(out, NULL, sb, opt, v, 1540 OPT_HUMAN_READABLE|OPT_SHOW_FULL_LIST); 1541 prt_newline(out); 1542 } 1543 } 1544 } 1545 1546 printbuf_indent_sub(out, 2); 1547 1548 if (print_layout) { 1549 prt_newline(out); 1550 prt_printf(out, "layout:"); 1551 prt_newline(out); 1552 printbuf_indent_add(out, 2); 1553 bch2_sb_layout_to_text(out, &sb->layout); 1554 printbuf_indent_sub(out, 2); 1555 } 1556 1557 vstruct_for_each(sb, f) 1558 if (fields & (1 << le32_to_cpu(f->type))) { 1559 prt_newline(out); 1560 bch2_sb_field_to_text(out, sb, f); 1561 } 1562 } 1563