1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_cache.h" 5 #include "disk_groups.h" 6 #include "error.h" 7 #include "opts.h" 8 #include "replicas.h" 9 #include "sb-members.h" 10 #include "super-io.h" 11 12 void bch2_dev_missing(struct bch_fs *c, unsigned dev) 13 { 14 if (dev != BCH_SB_MEMBER_INVALID) 15 bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); 16 } 17 18 void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket) 19 { 20 bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset); 21 } 22 23 #define x(t, n, ...) [n] = #t, 24 static const char * const bch2_iops_measurements[] = { 25 BCH_IOPS_MEASUREMENTS() 26 NULL 27 }; 28 29 char * const bch2_member_error_strs[] = { 30 BCH_MEMBER_ERROR_TYPES() 31 NULL 32 }; 33 #undef x 34 35 /* Code for bch_sb_field_members_v1: */ 36 37 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) 38 { 39 return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); 40 } 41 42 static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) 43 { 44 struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); 45 memset(&ret, 0, sizeof(ret)); 46 memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); 47 return ret; 48 } 49 50 static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) 51 { 52 return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); 53 } 54 55 static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i) 56 { 57 struct bch_member ret, *p = members_v1_get_mut(mi, i); 58 memset(&ret, 0, sizeof(ret)); 59 memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); 60 return ret; 61 } 62 63 struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) 64 { 65 struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2); 66 if (mi2) 67 return members_v2_get(mi2, i); 68 struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1); 69 return members_v1_get(mi1, i); 70 } 71 72 static int sb_members_v2_resize_entries(struct bch_fs *c) 73 { 74 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 75 76 if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) { 77 unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) * 78 c->disk_sb.sb->nr_devices), 8); 79 80 mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); 81 if (!mi) 82 return -BCH_ERR_ENOSPC_sb_members_v2; 83 84 for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) { 85 void *dst = (void *) mi->_members + (i * sizeof(struct bch_member)); 86 memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); 87 memset(dst + le16_to_cpu(mi->member_bytes), 88 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes))); 89 } 90 mi->member_bytes = cpu_to_le16(sizeof(struct bch_member)); 91 } 92 return 0; 93 } 94 95 int bch2_sb_members_v2_init(struct bch_fs *c) 96 { 97 struct bch_sb_field_members_v1 *mi1; 98 struct bch_sb_field_members_v2 *mi2; 99 100 if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) { 101 mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2, 102 DIV_ROUND_UP(sizeof(*mi2) + 103 sizeof(struct bch_member) * c->sb.nr_devices, 104 sizeof(u64))); 105 mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1); 106 memcpy(&mi2->_members[0], &mi1->_members[0], 107 BCH_MEMBER_V1_BYTES * c->sb.nr_devices); 108 memset(&mi2->pad[0], 0, sizeof(mi2->pad)); 109 mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES); 110 } 111 112 return sb_members_v2_resize_entries(c); 113 } 114 115 int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) 116 { 117 struct bch_sb_field_members_v1 *mi1; 118 struct bch_sb_field_members_v2 *mi2; 119 120 mi1 = bch2_sb_field_resize(disk_sb, members_v1, 121 DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES * 122 disk_sb->sb->nr_devices, sizeof(u64))); 123 if (!mi1) 124 return -BCH_ERR_ENOSPC_sb_members; 125 126 mi2 = bch2_sb_field_get(disk_sb->sb, members_v2); 127 128 for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++) 129 memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); 130 131 return 0; 132 } 133 134 static int validate_member(struct printbuf *err, 135 struct bch_member m, 136 struct bch_sb *sb, 137 int i) 138 { 139 if (le64_to_cpu(m.nbuckets) > BCH_MEMBER_NBUCKETS_MAX) { 140 prt_printf(err, "device %u: too many buckets (got %llu, max %u)", 141 i, le64_to_cpu(m.nbuckets), BCH_MEMBER_NBUCKETS_MAX); 142 return -BCH_ERR_invalid_sb_members; 143 } 144 145 if (le64_to_cpu(m.nbuckets) - 146 le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) { 147 prt_printf(err, "device %u: not enough buckets (got %llu, max %u)", 148 i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS); 149 return -BCH_ERR_invalid_sb_members; 150 } 151 152 if (le16_to_cpu(m.bucket_size) < 153 le16_to_cpu(sb->block_size)) { 154 prt_printf(err, "device %u: bucket size %u smaller than block size %u", 155 i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size)); 156 return -BCH_ERR_invalid_sb_members; 157 } 158 159 if (le16_to_cpu(m.bucket_size) < 160 BCH_SB_BTREE_NODE_SIZE(sb)) { 161 prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu", 162 i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb)); 163 return -BCH_ERR_invalid_sb_members; 164 } 165 166 return 0; 167 } 168 169 static void member_to_text(struct printbuf *out, 170 struct bch_member m, 171 struct bch_sb_field_disk_groups *gi, 172 struct bch_sb *sb, 173 int i) 174 { 175 unsigned data_have = bch2_sb_dev_has_data(sb, i); 176 u64 bucket_size = le16_to_cpu(m.bucket_size); 177 u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; 178 179 if (!bch2_member_alive(&m)) 180 return; 181 182 prt_printf(out, "Device:\t%u\n", i); 183 184 printbuf_indent_add(out, 2); 185 186 prt_printf(out, "Label:\t"); 187 if (BCH_MEMBER_GROUP(&m)) { 188 unsigned idx = BCH_MEMBER_GROUP(&m) - 1; 189 190 if (idx < disk_groups_nr(gi)) 191 prt_printf(out, "%s (%u)", 192 gi->entries[idx].label, idx); 193 else 194 prt_printf(out, "(bad disk labels section)"); 195 } else { 196 prt_printf(out, "(none)"); 197 } 198 prt_newline(out); 199 200 prt_printf(out, "UUID:\t"); 201 pr_uuid(out, m.uuid.b); 202 prt_newline(out); 203 204 prt_printf(out, "Size:\t"); 205 prt_units_u64(out, device_size << 9); 206 prt_newline(out); 207 208 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 209 prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m.errors[i])); 210 211 for (unsigned i = 0; i < BCH_IOPS_NR; i++) 212 prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m.iops[i])); 213 214 prt_printf(out, "Bucket size:\t"); 215 prt_units_u64(out, bucket_size << 9); 216 prt_newline(out); 217 218 prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m.first_bucket)); 219 prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m.nbuckets)); 220 221 prt_printf(out, "Last mount:\t"); 222 if (m.last_mount) 223 bch2_prt_datetime(out, le64_to_cpu(m.last_mount)); 224 else 225 prt_printf(out, "(never)"); 226 prt_newline(out); 227 228 prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m.seq)); 229 230 prt_printf(out, "State:\t%s\n", 231 BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR 232 ? bch2_member_states[BCH_MEMBER_STATE(&m)] 233 : "unknown"); 234 235 prt_printf(out, "Data allowed:\t"); 236 if (BCH_MEMBER_DATA_ALLOWED(&m)) 237 prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); 238 else 239 prt_printf(out, "(none)"); 240 prt_newline(out); 241 242 prt_printf(out, "Has data:\t"); 243 if (data_have) 244 prt_bitflags(out, __bch2_data_types, data_have); 245 else 246 prt_printf(out, "(none)"); 247 prt_newline(out); 248 249 prt_printf(out, "Btree allocated bitmap blocksize:\t"); 250 prt_units_u64(out, 1ULL << m.btree_bitmap_shift); 251 prt_newline(out); 252 253 prt_printf(out, "Btree allocated bitmap:\t"); 254 bch2_prt_u64_base2_nbits(out, le64_to_cpu(m.btree_allocated_bitmap), 64); 255 prt_newline(out); 256 257 prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1); 258 259 prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m)); 260 prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); 261 262 printbuf_indent_sub(out, 2); 263 } 264 265 static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f, 266 enum bch_validate_flags flags, struct printbuf *err) 267 { 268 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); 269 unsigned i; 270 271 if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) { 272 prt_printf(err, "too many devices for section size"); 273 return -BCH_ERR_invalid_sb_members; 274 } 275 276 for (i = 0; i < sb->nr_devices; i++) { 277 struct bch_member m = members_v1_get(mi, i); 278 279 int ret = validate_member(err, m, sb, i); 280 if (ret) 281 return ret; 282 } 283 284 return 0; 285 } 286 287 static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, 288 struct bch_sb_field *f) 289 { 290 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); 291 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); 292 unsigned i; 293 294 for (i = 0; i < sb->nr_devices; i++) 295 member_to_text(out, members_v1_get(mi, i), gi, sb, i); 296 } 297 298 const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { 299 .validate = bch2_sb_members_v1_validate, 300 .to_text = bch2_sb_members_v1_to_text, 301 }; 302 303 static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, 304 struct bch_sb_field *f) 305 { 306 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); 307 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); 308 unsigned i; 309 310 for (i = 0; i < sb->nr_devices; i++) 311 member_to_text(out, members_v2_get(mi, i), gi, sb, i); 312 } 313 314 static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f, 315 enum bch_validate_flags flags, struct printbuf *err) 316 { 317 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); 318 size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) - 319 (void *) mi; 320 321 if (mi_bytes > vstruct_bytes(&mi->field)) { 322 prt_printf(err, "section too small (%zu > %zu)", 323 mi_bytes, vstruct_bytes(&mi->field)); 324 return -BCH_ERR_invalid_sb_members; 325 } 326 327 for (unsigned i = 0; i < sb->nr_devices; i++) { 328 int ret = validate_member(err, members_v2_get(mi, i), sb, i); 329 if (ret) 330 return ret; 331 } 332 333 return 0; 334 } 335 336 const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = { 337 .validate = bch2_sb_members_v2_validate, 338 .to_text = bch2_sb_members_v2_to_text, 339 }; 340 341 void bch2_sb_members_from_cpu(struct bch_fs *c) 342 { 343 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 344 345 rcu_read_lock(); 346 for_each_member_device_rcu(c, ca, NULL) { 347 struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx); 348 349 for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++) 350 m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); 351 } 352 rcu_read_unlock(); 353 } 354 355 void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) 356 { 357 struct bch_fs *c = ca->fs; 358 struct bch_member m; 359 360 mutex_lock(&ca->fs->sb_lock); 361 m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); 362 mutex_unlock(&ca->fs->sb_lock); 363 364 printbuf_tabstop_push(out, 12); 365 366 prt_str(out, "IO errors since filesystem creation"); 367 prt_newline(out); 368 369 printbuf_indent_add(out, 2); 370 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 371 prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i])); 372 printbuf_indent_sub(out, 2); 373 374 prt_str(out, "IO errors since "); 375 bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC); 376 prt_str(out, " ago"); 377 prt_newline(out); 378 379 printbuf_indent_add(out, 2); 380 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 381 prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], 382 atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); 383 printbuf_indent_sub(out, 2); 384 } 385 386 void bch2_dev_errors_reset(struct bch_dev *ca) 387 { 388 struct bch_fs *c = ca->fs; 389 struct bch_member *m; 390 391 mutex_lock(&c->sb_lock); 392 m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); 393 for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) 394 m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); 395 m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds()); 396 397 bch2_write_super(c); 398 mutex_unlock(&c->sb_lock); 399 } 400 401 /* 402 * Per member "range has btree nodes" bitmap: 403 * 404 * This is so that if we ever have to run the btree node scan to repair we don't 405 * have to scan full devices: 406 */ 407 408 bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k) 409 { 410 bool ret = true; 411 rcu_read_lock(); 412 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { 413 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); 414 if (!ca) 415 continue; 416 417 if (!bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c))) { 418 ret = false; 419 break; 420 } 421 } 422 rcu_read_unlock(); 423 return ret; 424 } 425 426 static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev, 427 u64 start, unsigned sectors) 428 { 429 struct bch_member *m = __bch2_members_v2_get_mut(mi, dev); 430 u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap); 431 432 u64 end = start + sectors; 433 434 int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6); 435 if (resize > 0) { 436 u64 new_bitmap = 0; 437 438 for (unsigned i = 0; i < 64; i++) 439 if (bitmap & BIT_ULL(i)) 440 new_bitmap |= BIT_ULL(i >> resize); 441 bitmap = new_bitmap; 442 m->btree_bitmap_shift += resize; 443 } 444 445 BUG_ON(m->btree_bitmap_shift > 57); 446 BUG_ON(end > 64ULL << m->btree_bitmap_shift); 447 448 for (unsigned bit = start >> m->btree_bitmap_shift; 449 (u64) bit << m->btree_bitmap_shift < end; 450 bit++) 451 bitmap |= BIT_ULL(bit); 452 453 m->btree_allocated_bitmap = cpu_to_le64(bitmap); 454 } 455 456 void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k) 457 { 458 lockdep_assert_held(&c->sb_lock); 459 460 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 461 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { 462 if (!bch2_member_exists(c->disk_sb.sb, ptr->dev)) 463 continue; 464 465 __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c)); 466 } 467 } 468 469 unsigned bch2_sb_nr_devices(const struct bch_sb *sb) 470 { 471 unsigned nr = 0; 472 473 for (unsigned i = 0; i < sb->nr_devices; i++) 474 nr += bch2_member_exists((struct bch_sb *) sb, i); 475 return nr; 476 } 477 478 int bch2_sb_member_alloc(struct bch_fs *c) 479 { 480 unsigned dev_idx = c->sb.nr_devices; 481 struct bch_sb_field_members_v2 *mi; 482 unsigned nr_devices; 483 unsigned u64s; 484 int best = -1; 485 u64 best_last_mount = 0; 486 487 if (dev_idx < BCH_SB_MEMBERS_MAX) 488 goto have_slot; 489 490 for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++) { 491 /* eventually BCH_SB_MEMBERS_MAX will be raised */ 492 if (dev_idx == BCH_SB_MEMBER_INVALID) 493 continue; 494 495 struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx); 496 if (bch2_member_alive(&m)) 497 continue; 498 499 u64 last_mount = le64_to_cpu(m.last_mount); 500 if (best < 0 || last_mount < best_last_mount) { 501 best = dev_idx; 502 best_last_mount = last_mount; 503 } 504 } 505 if (best >= 0) { 506 dev_idx = best; 507 goto have_slot; 508 } 509 510 return -BCH_ERR_ENOSPC_sb_members; 511 have_slot: 512 nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices); 513 514 mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 515 u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) + 516 le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64)); 517 518 mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); 519 if (!mi) 520 return -BCH_ERR_ENOSPC_sb_members; 521 522 c->disk_sb.sb->nr_devices = nr_devices; 523 return dev_idx; 524 } 525