1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_cache.h" 5 #include "disk_groups.h" 6 #include "error.h" 7 #include "opts.h" 8 #include "replicas.h" 9 #include "sb-members.h" 10 #include "super-io.h" 11 12 void bch2_dev_missing(struct bch_fs *c, unsigned dev) 13 { 14 if (dev != BCH_SB_MEMBER_INVALID) 15 bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); 16 } 17 18 void bch2_dev_bucket_missing(struct bch_dev *ca, u64 bucket) 19 { 20 bch2_fs_inconsistent(ca->fs, 21 "pointer to nonexistent bucket %llu on device %s (valid range %u-%llu)", 22 bucket, ca->name, ca->mi.first_bucket, ca->mi.nbuckets); 23 } 24 25 #define x(t, n, ...) [n] = #t, 26 static const char * const bch2_iops_measurements[] = { 27 BCH_IOPS_MEASUREMENTS() 28 NULL 29 }; 30 31 char * const bch2_member_error_strs[] = { 32 BCH_MEMBER_ERROR_TYPES() 33 NULL 34 }; 35 #undef x 36 37 /* Code for bch_sb_field_members_v1: */ 38 39 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) 40 { 41 return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); 42 } 43 44 static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) 45 { 46 struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); 47 memset(&ret, 0, sizeof(ret)); 48 memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); 49 return ret; 50 } 51 52 static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) 53 { 54 return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); 55 } 56 57 static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i) 58 { 59 struct bch_member ret, *p = members_v1_get_mut(mi, i); 60 memset(&ret, 0, sizeof(ret)); 61 memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); 62 return ret; 63 } 64 65 struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) 66 { 67 struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2); 68 if (mi2) 69 return members_v2_get(mi2, i); 70 struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1); 71 return members_v1_get(mi1, i); 72 } 73 74 static int sb_members_v2_resize_entries(struct bch_fs *c) 75 { 76 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 77 78 if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) { 79 unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) * 80 c->disk_sb.sb->nr_devices), 8); 81 82 mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); 83 if (!mi) 84 return -BCH_ERR_ENOSPC_sb_members_v2; 85 86 for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) { 87 void *dst = (void *) mi->_members + (i * sizeof(struct bch_member)); 88 memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); 89 memset(dst + le16_to_cpu(mi->member_bytes), 90 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes))); 91 } 92 mi->member_bytes = cpu_to_le16(sizeof(struct bch_member)); 93 } 94 return 0; 95 } 96 97 int bch2_sb_members_v2_init(struct bch_fs *c) 98 { 99 struct bch_sb_field_members_v1 *mi1; 100 struct bch_sb_field_members_v2 *mi2; 101 102 if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) { 103 mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2, 104 DIV_ROUND_UP(sizeof(*mi2) + 105 sizeof(struct bch_member) * c->sb.nr_devices, 106 sizeof(u64))); 107 mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1); 108 memcpy(&mi2->_members[0], &mi1->_members[0], 109 BCH_MEMBER_V1_BYTES * c->sb.nr_devices); 110 memset(&mi2->pad[0], 0, sizeof(mi2->pad)); 111 mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES); 112 } 113 114 return sb_members_v2_resize_entries(c); 115 } 116 117 int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) 118 { 119 struct bch_sb_field_members_v1 *mi1; 120 struct bch_sb_field_members_v2 *mi2; 121 122 mi1 = bch2_sb_field_resize(disk_sb, members_v1, 123 DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES * 124 disk_sb->sb->nr_devices, sizeof(u64))); 125 if (!mi1) 126 return -BCH_ERR_ENOSPC_sb_members; 127 128 mi2 = bch2_sb_field_get(disk_sb->sb, members_v2); 129 130 for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++) 131 memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); 132 133 return 0; 134 } 135 136 static int validate_member(struct printbuf *err, 137 struct bch_member m, 138 struct bch_sb *sb, 139 int i) 140 { 141 if (le64_to_cpu(m.nbuckets) > BCH_MEMBER_NBUCKETS_MAX) { 142 prt_printf(err, "device %u: too many buckets (got %llu, max %u)", 143 i, le64_to_cpu(m.nbuckets), BCH_MEMBER_NBUCKETS_MAX); 144 return -BCH_ERR_invalid_sb_members; 145 } 146 147 if (le64_to_cpu(m.nbuckets) - 148 le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) { 149 prt_printf(err, "device %u: not enough buckets (got %llu, max %u)", 150 i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS); 151 return -BCH_ERR_invalid_sb_members; 152 } 153 154 if (le16_to_cpu(m.bucket_size) < 155 le16_to_cpu(sb->block_size)) { 156 prt_printf(err, "device %u: bucket size %u smaller than block size %u", 157 i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size)); 158 return -BCH_ERR_invalid_sb_members; 159 } 160 161 if (le16_to_cpu(m.bucket_size) < 162 BCH_SB_BTREE_NODE_SIZE(sb)) { 163 prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu", 164 i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb)); 165 return -BCH_ERR_invalid_sb_members; 166 } 167 168 if (m.btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX) { 169 prt_printf(err, "device %u: invalid btree_bitmap_shift %u", i, m.btree_bitmap_shift); 170 return -BCH_ERR_invalid_sb_members; 171 } 172 173 return 0; 174 } 175 176 static void member_to_text(struct printbuf *out, 177 struct bch_member m, 178 struct bch_sb_field_disk_groups *gi, 179 struct bch_sb *sb, 180 int i) 181 { 182 unsigned data_have = bch2_sb_dev_has_data(sb, i); 183 u64 bucket_size = le16_to_cpu(m.bucket_size); 184 u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; 185 186 if (!bch2_member_alive(&m)) 187 return; 188 189 prt_printf(out, "Device:\t%u\n", i); 190 191 printbuf_indent_add(out, 2); 192 193 prt_printf(out, "Label:\t"); 194 if (BCH_MEMBER_GROUP(&m)) { 195 unsigned idx = BCH_MEMBER_GROUP(&m) - 1; 196 197 if (idx < disk_groups_nr(gi)) 198 prt_printf(out, "%s (%u)", 199 gi->entries[idx].label, idx); 200 else 201 prt_printf(out, "(bad disk labels section)"); 202 } else { 203 prt_printf(out, "(none)"); 204 } 205 prt_newline(out); 206 207 prt_printf(out, "UUID:\t"); 208 pr_uuid(out, m.uuid.b); 209 prt_newline(out); 210 211 prt_printf(out, "Size:\t"); 212 prt_units_u64(out, device_size << 9); 213 prt_newline(out); 214 215 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 216 prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m.errors[i])); 217 218 for (unsigned i = 0; i < BCH_IOPS_NR; i++) 219 prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m.iops[i])); 220 221 prt_printf(out, "Bucket size:\t"); 222 prt_units_u64(out, bucket_size << 9); 223 prt_newline(out); 224 225 prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m.first_bucket)); 226 prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m.nbuckets)); 227 228 prt_printf(out, "Last mount:\t"); 229 if (m.last_mount) 230 bch2_prt_datetime(out, le64_to_cpu(m.last_mount)); 231 else 232 prt_printf(out, "(never)"); 233 prt_newline(out); 234 235 prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m.seq)); 236 237 prt_printf(out, "State:\t%s\n", 238 BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR 239 ? bch2_member_states[BCH_MEMBER_STATE(&m)] 240 : "unknown"); 241 242 prt_printf(out, "Data allowed:\t"); 243 if (BCH_MEMBER_DATA_ALLOWED(&m)) 244 prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); 245 else 246 prt_printf(out, "(none)"); 247 prt_newline(out); 248 249 prt_printf(out, "Has data:\t"); 250 if (data_have) 251 prt_bitflags(out, __bch2_data_types, data_have); 252 else 253 prt_printf(out, "(none)"); 254 prt_newline(out); 255 256 prt_printf(out, "Btree allocated bitmap blocksize:\t"); 257 if (m.btree_bitmap_shift < 64) 258 prt_units_u64(out, 1ULL << m.btree_bitmap_shift); 259 else 260 prt_printf(out, "(invalid shift %u)", m.btree_bitmap_shift); 261 prt_newline(out); 262 263 prt_printf(out, "Btree allocated bitmap:\t"); 264 bch2_prt_u64_base2_nbits(out, le64_to_cpu(m.btree_allocated_bitmap), 64); 265 prt_newline(out); 266 267 prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1); 268 269 prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m)); 270 prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); 271 272 printbuf_indent_sub(out, 2); 273 } 274 275 static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f, 276 enum bch_validate_flags flags, struct printbuf *err) 277 { 278 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); 279 unsigned i; 280 281 if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) { 282 prt_printf(err, "too many devices for section size"); 283 return -BCH_ERR_invalid_sb_members; 284 } 285 286 for (i = 0; i < sb->nr_devices; i++) { 287 struct bch_member m = members_v1_get(mi, i); 288 289 int ret = validate_member(err, m, sb, i); 290 if (ret) 291 return ret; 292 } 293 294 return 0; 295 } 296 297 static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, 298 struct bch_sb_field *f) 299 { 300 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); 301 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); 302 unsigned i; 303 304 for (i = 0; i < sb->nr_devices; i++) 305 member_to_text(out, members_v1_get(mi, i), gi, sb, i); 306 } 307 308 const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { 309 .validate = bch2_sb_members_v1_validate, 310 .to_text = bch2_sb_members_v1_to_text, 311 }; 312 313 static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, 314 struct bch_sb_field *f) 315 { 316 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); 317 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); 318 unsigned i; 319 320 for (i = 0; i < sb->nr_devices; i++) 321 member_to_text(out, members_v2_get(mi, i), gi, sb, i); 322 } 323 324 static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f, 325 enum bch_validate_flags flags, struct printbuf *err) 326 { 327 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); 328 size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) - 329 (void *) mi; 330 331 if (mi_bytes > vstruct_bytes(&mi->field)) { 332 prt_printf(err, "section too small (%zu > %zu)", 333 mi_bytes, vstruct_bytes(&mi->field)); 334 return -BCH_ERR_invalid_sb_members; 335 } 336 337 for (unsigned i = 0; i < sb->nr_devices; i++) { 338 int ret = validate_member(err, members_v2_get(mi, i), sb, i); 339 if (ret) 340 return ret; 341 } 342 343 return 0; 344 } 345 346 const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = { 347 .validate = bch2_sb_members_v2_validate, 348 .to_text = bch2_sb_members_v2_to_text, 349 }; 350 351 void bch2_sb_members_from_cpu(struct bch_fs *c) 352 { 353 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 354 355 rcu_read_lock(); 356 for_each_member_device_rcu(c, ca, NULL) { 357 struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx); 358 359 for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++) 360 m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); 361 } 362 rcu_read_unlock(); 363 } 364 365 void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) 366 { 367 struct bch_fs *c = ca->fs; 368 struct bch_member m; 369 370 mutex_lock(&ca->fs->sb_lock); 371 m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); 372 mutex_unlock(&ca->fs->sb_lock); 373 374 printbuf_tabstop_push(out, 12); 375 376 prt_str(out, "IO errors since filesystem creation"); 377 prt_newline(out); 378 379 printbuf_indent_add(out, 2); 380 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 381 prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i])); 382 printbuf_indent_sub(out, 2); 383 384 prt_str(out, "IO errors since "); 385 bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC); 386 prt_str(out, " ago"); 387 prt_newline(out); 388 389 printbuf_indent_add(out, 2); 390 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 391 prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], 392 atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); 393 printbuf_indent_sub(out, 2); 394 } 395 396 void bch2_dev_errors_reset(struct bch_dev *ca) 397 { 398 struct bch_fs *c = ca->fs; 399 struct bch_member *m; 400 401 mutex_lock(&c->sb_lock); 402 m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); 403 for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) 404 m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); 405 m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds()); 406 407 bch2_write_super(c); 408 mutex_unlock(&c->sb_lock); 409 } 410 411 /* 412 * Per member "range has btree nodes" bitmap: 413 * 414 * This is so that if we ever have to run the btree node scan to repair we don't 415 * have to scan full devices: 416 */ 417 418 bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k) 419 { 420 bool ret = true; 421 rcu_read_lock(); 422 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { 423 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); 424 if (!ca) 425 continue; 426 427 if (!bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c))) { 428 ret = false; 429 break; 430 } 431 } 432 rcu_read_unlock(); 433 return ret; 434 } 435 436 static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev, 437 u64 start, unsigned sectors) 438 { 439 struct bch_member *m = __bch2_members_v2_get_mut(mi, dev); 440 u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap); 441 442 u64 end = start + sectors; 443 444 int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6); 445 if (resize > 0) { 446 u64 new_bitmap = 0; 447 448 for (unsigned i = 0; i < 64; i++) 449 if (bitmap & BIT_ULL(i)) 450 new_bitmap |= BIT_ULL(i >> resize); 451 bitmap = new_bitmap; 452 m->btree_bitmap_shift += resize; 453 } 454 455 BUG_ON(m->btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX); 456 BUG_ON(end > 64ULL << m->btree_bitmap_shift); 457 458 for (unsigned bit = start >> m->btree_bitmap_shift; 459 (u64) bit << m->btree_bitmap_shift < end; 460 bit++) 461 bitmap |= BIT_ULL(bit); 462 463 m->btree_allocated_bitmap = cpu_to_le64(bitmap); 464 } 465 466 void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k) 467 { 468 lockdep_assert_held(&c->sb_lock); 469 470 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 471 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { 472 if (!bch2_member_exists(c->disk_sb.sb, ptr->dev)) 473 continue; 474 475 __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c)); 476 } 477 } 478 479 unsigned bch2_sb_nr_devices(const struct bch_sb *sb) 480 { 481 unsigned nr = 0; 482 483 for (unsigned i = 0; i < sb->nr_devices; i++) 484 nr += bch2_member_exists((struct bch_sb *) sb, i); 485 return nr; 486 } 487 488 int bch2_sb_member_alloc(struct bch_fs *c) 489 { 490 unsigned dev_idx = c->sb.nr_devices; 491 struct bch_sb_field_members_v2 *mi; 492 unsigned nr_devices; 493 unsigned u64s; 494 int best = -1; 495 u64 best_last_mount = 0; 496 497 if (dev_idx < BCH_SB_MEMBERS_MAX) 498 goto have_slot; 499 500 for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++) { 501 /* eventually BCH_SB_MEMBERS_MAX will be raised */ 502 if (dev_idx == BCH_SB_MEMBER_INVALID) 503 continue; 504 505 struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx); 506 if (bch2_member_alive(&m)) 507 continue; 508 509 u64 last_mount = le64_to_cpu(m.last_mount); 510 if (best < 0 || last_mount < best_last_mount) { 511 best = dev_idx; 512 best_last_mount = last_mount; 513 } 514 } 515 if (best >= 0) { 516 dev_idx = best; 517 goto have_slot; 518 } 519 520 return -BCH_ERR_ENOSPC_sb_members; 521 have_slot: 522 nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); 523 524 mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 525 u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) + 526 le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64)); 527 528 mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); 529 if (!mi) 530 return -BCH_ERR_ENOSPC_sb_members; 531 532 c->disk_sb.sb->nr_devices = nr_devices; 533 return dev_idx; 534 } 535