1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_cache.h" 5 #include "disk_groups.h" 6 #include "error.h" 7 #include "opts.h" 8 #include "replicas.h" 9 #include "sb-members.h" 10 #include "super-io.h" 11 12 void bch2_dev_missing(struct bch_fs *c, unsigned dev) 13 { 14 if (dev != BCH_SB_MEMBER_INVALID) 15 bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); 16 } 17 18 void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket) 19 { 20 bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset); 21 } 22 23 #define x(t, n, ...) [n] = #t, 24 static const char * const bch2_iops_measurements[] = { 25 BCH_IOPS_MEASUREMENTS() 26 NULL 27 }; 28 29 char * const bch2_member_error_strs[] = { 30 BCH_MEMBER_ERROR_TYPES() 31 NULL 32 }; 33 #undef x 34 35 /* Code for bch_sb_field_members_v1: */ 36 37 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) 38 { 39 return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); 40 } 41 42 static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) 43 { 44 struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); 45 memset(&ret, 0, sizeof(ret)); 46 memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); 47 return ret; 48 } 49 50 static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) 51 { 52 return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); 53 } 54 55 static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i) 56 { 57 struct bch_member ret, *p = members_v1_get_mut(mi, i); 58 memset(&ret, 0, sizeof(ret)); 59 memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); 60 return ret; 61 } 62 63 struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) 64 { 65 struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2); 66 if (mi2) 67 return members_v2_get(mi2, i); 68 struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1); 69 return members_v1_get(mi1, i); 70 } 71 72 static int sb_members_v2_resize_entries(struct bch_fs *c) 73 { 74 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 75 76 if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) { 77 unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) * 78 c->disk_sb.sb->nr_devices), 8); 79 80 mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); 81 if (!mi) 82 return -BCH_ERR_ENOSPC_sb_members_v2; 83 84 for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) { 85 void *dst = (void *) mi->_members + (i * sizeof(struct bch_member)); 86 memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); 87 memset(dst + le16_to_cpu(mi->member_bytes), 88 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes))); 89 } 90 mi->member_bytes = cpu_to_le16(sizeof(struct bch_member)); 91 } 92 return 0; 93 } 94 95 int bch2_sb_members_v2_init(struct bch_fs *c) 96 { 97 struct bch_sb_field_members_v1 *mi1; 98 struct bch_sb_field_members_v2 *mi2; 99 100 if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) { 101 mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2, 102 DIV_ROUND_UP(sizeof(*mi2) + 103 sizeof(struct bch_member) * c->sb.nr_devices, 104 sizeof(u64))); 105 mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1); 106 memcpy(&mi2->_members[0], &mi1->_members[0], 107 BCH_MEMBER_V1_BYTES * c->sb.nr_devices); 108 memset(&mi2->pad[0], 0, sizeof(mi2->pad)); 109 mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES); 110 } 111 112 return sb_members_v2_resize_entries(c); 113 } 114 115 int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) 116 { 117 struct bch_sb_field_members_v1 *mi1; 118 struct bch_sb_field_members_v2 *mi2; 119 120 mi1 = bch2_sb_field_resize(disk_sb, members_v1, 121 DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES * 122 disk_sb->sb->nr_devices, sizeof(u64))); 123 if (!mi1) 124 return -BCH_ERR_ENOSPC_sb_members; 125 126 mi2 = bch2_sb_field_get(disk_sb->sb, members_v2); 127 128 for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++) 129 memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); 130 131 return 0; 132 } 133 134 static int validate_member(struct printbuf *err, 135 struct bch_member m, 136 struct bch_sb *sb, 137 int i) 138 { 139 if (le64_to_cpu(m.nbuckets) > BCH_MEMBER_NBUCKETS_MAX) { 140 prt_printf(err, "device %u: too many buckets (got %llu, max %u)", 141 i, le64_to_cpu(m.nbuckets), BCH_MEMBER_NBUCKETS_MAX); 142 return -BCH_ERR_invalid_sb_members; 143 } 144 145 if (le64_to_cpu(m.nbuckets) - 146 le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) { 147 prt_printf(err, "device %u: not enough buckets (got %llu, max %u)", 148 i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS); 149 return -BCH_ERR_invalid_sb_members; 150 } 151 152 if (le16_to_cpu(m.bucket_size) < 153 le16_to_cpu(sb->block_size)) { 154 prt_printf(err, "device %u: bucket size %u smaller than block size %u", 155 i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size)); 156 return -BCH_ERR_invalid_sb_members; 157 } 158 159 if (le16_to_cpu(m.bucket_size) < 160 BCH_SB_BTREE_NODE_SIZE(sb)) { 161 prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu", 162 i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb)); 163 return -BCH_ERR_invalid_sb_members; 164 } 165 166 if (m.btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX) { 167 prt_printf(err, "device %u: invalid btree_bitmap_shift %u", i, m.btree_bitmap_shift); 168 return -BCH_ERR_invalid_sb_members; 169 } 170 171 return 0; 172 } 173 174 static void member_to_text(struct printbuf *out, 175 struct bch_member m, 176 struct bch_sb_field_disk_groups *gi, 177 struct bch_sb *sb, 178 int i) 179 { 180 unsigned data_have = bch2_sb_dev_has_data(sb, i); 181 u64 bucket_size = le16_to_cpu(m.bucket_size); 182 u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; 183 184 if (!bch2_member_alive(&m)) 185 return; 186 187 prt_printf(out, "Device:\t%u\n", i); 188 189 printbuf_indent_add(out, 2); 190 191 prt_printf(out, "Label:\t"); 192 if (BCH_MEMBER_GROUP(&m)) { 193 unsigned idx = BCH_MEMBER_GROUP(&m) - 1; 194 195 if (idx < disk_groups_nr(gi)) 196 prt_printf(out, "%s (%u)", 197 gi->entries[idx].label, idx); 198 else 199 prt_printf(out, "(bad disk labels section)"); 200 } else { 201 prt_printf(out, "(none)"); 202 } 203 prt_newline(out); 204 205 prt_printf(out, "UUID:\t"); 206 pr_uuid(out, m.uuid.b); 207 prt_newline(out); 208 209 prt_printf(out, "Size:\t"); 210 prt_units_u64(out, device_size << 9); 211 prt_newline(out); 212 213 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 214 prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m.errors[i])); 215 216 for (unsigned i = 0; i < BCH_IOPS_NR; i++) 217 prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m.iops[i])); 218 219 prt_printf(out, "Bucket size:\t"); 220 prt_units_u64(out, bucket_size << 9); 221 prt_newline(out); 222 223 prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m.first_bucket)); 224 prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m.nbuckets)); 225 226 prt_printf(out, "Last mount:\t"); 227 if (m.last_mount) 228 bch2_prt_datetime(out, le64_to_cpu(m.last_mount)); 229 else 230 prt_printf(out, "(never)"); 231 prt_newline(out); 232 233 prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m.seq)); 234 235 prt_printf(out, "State:\t%s\n", 236 BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR 237 ? bch2_member_states[BCH_MEMBER_STATE(&m)] 238 : "unknown"); 239 240 prt_printf(out, "Data allowed:\t"); 241 if (BCH_MEMBER_DATA_ALLOWED(&m)) 242 prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); 243 else 244 prt_printf(out, "(none)"); 245 prt_newline(out); 246 247 prt_printf(out, "Has data:\t"); 248 if (data_have) 249 prt_bitflags(out, __bch2_data_types, data_have); 250 else 251 prt_printf(out, "(none)"); 252 prt_newline(out); 253 254 prt_printf(out, "Btree allocated bitmap blocksize:\t"); 255 if (m.btree_bitmap_shift < 64) 256 prt_units_u64(out, 1ULL << m.btree_bitmap_shift); 257 else 258 prt_printf(out, "(invalid shift %u)", m.btree_bitmap_shift); 259 prt_newline(out); 260 261 prt_printf(out, "Btree allocated bitmap:\t"); 262 bch2_prt_u64_base2_nbits(out, le64_to_cpu(m.btree_allocated_bitmap), 64); 263 prt_newline(out); 264 265 prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1); 266 267 prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m)); 268 prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); 269 270 printbuf_indent_sub(out, 2); 271 } 272 273 static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f, 274 enum bch_validate_flags flags, struct printbuf *err) 275 { 276 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); 277 unsigned i; 278 279 if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) { 280 prt_printf(err, "too many devices for section size"); 281 return -BCH_ERR_invalid_sb_members; 282 } 283 284 for (i = 0; i < sb->nr_devices; i++) { 285 struct bch_member m = members_v1_get(mi, i); 286 287 int ret = validate_member(err, m, sb, i); 288 if (ret) 289 return ret; 290 } 291 292 return 0; 293 } 294 295 static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, 296 struct bch_sb_field *f) 297 { 298 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); 299 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); 300 unsigned i; 301 302 for (i = 0; i < sb->nr_devices; i++) 303 member_to_text(out, members_v1_get(mi, i), gi, sb, i); 304 } 305 306 const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { 307 .validate = bch2_sb_members_v1_validate, 308 .to_text = bch2_sb_members_v1_to_text, 309 }; 310 311 static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, 312 struct bch_sb_field *f) 313 { 314 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); 315 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); 316 unsigned i; 317 318 for (i = 0; i < sb->nr_devices; i++) 319 member_to_text(out, members_v2_get(mi, i), gi, sb, i); 320 } 321 322 static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f, 323 enum bch_validate_flags flags, struct printbuf *err) 324 { 325 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); 326 size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) - 327 (void *) mi; 328 329 if (mi_bytes > vstruct_bytes(&mi->field)) { 330 prt_printf(err, "section too small (%zu > %zu)", 331 mi_bytes, vstruct_bytes(&mi->field)); 332 return -BCH_ERR_invalid_sb_members; 333 } 334 335 for (unsigned i = 0; i < sb->nr_devices; i++) { 336 int ret = validate_member(err, members_v2_get(mi, i), sb, i); 337 if (ret) 338 return ret; 339 } 340 341 return 0; 342 } 343 344 const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = { 345 .validate = bch2_sb_members_v2_validate, 346 .to_text = bch2_sb_members_v2_to_text, 347 }; 348 349 void bch2_sb_members_from_cpu(struct bch_fs *c) 350 { 351 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 352 353 rcu_read_lock(); 354 for_each_member_device_rcu(c, ca, NULL) { 355 struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx); 356 357 for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++) 358 m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); 359 } 360 rcu_read_unlock(); 361 } 362 363 void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) 364 { 365 struct bch_fs *c = ca->fs; 366 struct bch_member m; 367 368 mutex_lock(&ca->fs->sb_lock); 369 m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); 370 mutex_unlock(&ca->fs->sb_lock); 371 372 printbuf_tabstop_push(out, 12); 373 374 prt_str(out, "IO errors since filesystem creation"); 375 prt_newline(out); 376 377 printbuf_indent_add(out, 2); 378 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 379 prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i])); 380 printbuf_indent_sub(out, 2); 381 382 prt_str(out, "IO errors since "); 383 bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC); 384 prt_str(out, " ago"); 385 prt_newline(out); 386 387 printbuf_indent_add(out, 2); 388 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 389 prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], 390 atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); 391 printbuf_indent_sub(out, 2); 392 } 393 394 void bch2_dev_errors_reset(struct bch_dev *ca) 395 { 396 struct bch_fs *c = ca->fs; 397 struct bch_member *m; 398 399 mutex_lock(&c->sb_lock); 400 m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); 401 for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) 402 m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); 403 m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds()); 404 405 bch2_write_super(c); 406 mutex_unlock(&c->sb_lock); 407 } 408 409 /* 410 * Per member "range has btree nodes" bitmap: 411 * 412 * This is so that if we ever have to run the btree node scan to repair we don't 413 * have to scan full devices: 414 */ 415 416 bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k) 417 { 418 bool ret = true; 419 rcu_read_lock(); 420 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { 421 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); 422 if (!ca) 423 continue; 424 425 if (!bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c))) { 426 ret = false; 427 break; 428 } 429 } 430 rcu_read_unlock(); 431 return ret; 432 } 433 434 static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev, 435 u64 start, unsigned sectors) 436 { 437 struct bch_member *m = __bch2_members_v2_get_mut(mi, dev); 438 u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap); 439 440 u64 end = start + sectors; 441 442 int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6); 443 if (resize > 0) { 444 u64 new_bitmap = 0; 445 446 for (unsigned i = 0; i < 64; i++) 447 if (bitmap & BIT_ULL(i)) 448 new_bitmap |= BIT_ULL(i >> resize); 449 bitmap = new_bitmap; 450 m->btree_bitmap_shift += resize; 451 } 452 453 BUG_ON(m->btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX); 454 BUG_ON(end > 64ULL << m->btree_bitmap_shift); 455 456 for (unsigned bit = start >> m->btree_bitmap_shift; 457 (u64) bit << m->btree_bitmap_shift < end; 458 bit++) 459 bitmap |= BIT_ULL(bit); 460 461 m->btree_allocated_bitmap = cpu_to_le64(bitmap); 462 } 463 464 void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k) 465 { 466 lockdep_assert_held(&c->sb_lock); 467 468 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 469 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { 470 if (!bch2_member_exists(c->disk_sb.sb, ptr->dev)) 471 continue; 472 473 __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c)); 474 } 475 } 476 477 unsigned bch2_sb_nr_devices(const struct bch_sb *sb) 478 { 479 unsigned nr = 0; 480 481 for (unsigned i = 0; i < sb->nr_devices; i++) 482 nr += bch2_member_exists((struct bch_sb *) sb, i); 483 return nr; 484 } 485 486 int bch2_sb_member_alloc(struct bch_fs *c) 487 { 488 unsigned dev_idx = c->sb.nr_devices; 489 struct bch_sb_field_members_v2 *mi; 490 unsigned nr_devices; 491 unsigned u64s; 492 int best = -1; 493 u64 best_last_mount = 0; 494 495 if (dev_idx < BCH_SB_MEMBERS_MAX) 496 goto have_slot; 497 498 for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++) { 499 /* eventually BCH_SB_MEMBERS_MAX will be raised */ 500 if (dev_idx == BCH_SB_MEMBER_INVALID) 501 continue; 502 503 struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx); 504 if (bch2_member_alive(&m)) 505 continue; 506 507 u64 last_mount = le64_to_cpu(m.last_mount); 508 if (best < 0 || last_mount < best_last_mount) { 509 best = dev_idx; 510 best_last_mount = last_mount; 511 } 512 } 513 if (best >= 0) { 514 dev_idx = best; 515 goto have_slot; 516 } 517 518 return -BCH_ERR_ENOSPC_sb_members; 519 have_slot: 520 nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); 521 522 mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 523 u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) + 524 le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64)); 525 526 mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); 527 if (!mi) 528 return -BCH_ERR_ENOSPC_sb_members; 529 530 c->disk_sb.sb->nr_devices = nr_devices; 531 return dev_idx; 532 } 533