1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_cache.h" 5 #include "disk_groups.h" 6 #include "opts.h" 7 #include "replicas.h" 8 #include "sb-members.h" 9 #include "super-io.h" 10 11 #define x(t, n, ...) [n] = #t, 12 static const char * const bch2_iops_measurements[] = { 13 BCH_IOPS_MEASUREMENTS() 14 NULL 15 }; 16 17 char * const bch2_member_error_strs[] = { 18 BCH_MEMBER_ERROR_TYPES() 19 NULL 20 }; 21 #undef x 22 23 /* Code for bch_sb_field_members_v1: */ 24 25 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) 26 { 27 return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); 28 } 29 30 static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) 31 { 32 struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); 33 memset(&ret, 0, sizeof(ret)); 34 memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); 35 return ret; 36 } 37 38 static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) 39 { 40 return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); 41 } 42 43 static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i) 44 { 45 struct bch_member ret, *p = members_v1_get_mut(mi, i); 46 memset(&ret, 0, sizeof(ret)); 47 memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); 48 return ret; 49 } 50 51 struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) 52 { 53 struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2); 54 if (mi2) 55 return members_v2_get(mi2, i); 56 struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1); 57 return members_v1_get(mi1, i); 58 } 59 60 static int sb_members_v2_resize_entries(struct bch_fs *c) 61 { 62 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 63 64 if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) { 65 unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) * 66 c->disk_sb.sb->nr_devices), 8); 67 68 mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); 69 if (!mi) 70 return -BCH_ERR_ENOSPC_sb_members_v2; 71 72 for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) { 73 void *dst = (void *) mi->_members + (i * sizeof(struct bch_member)); 74 memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); 75 memset(dst + le16_to_cpu(mi->member_bytes), 76 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes))); 77 } 78 mi->member_bytes = cpu_to_le16(sizeof(struct bch_member)); 79 } 80 return 0; 81 } 82 83 int bch2_sb_members_v2_init(struct bch_fs *c) 84 { 85 struct bch_sb_field_members_v1 *mi1; 86 struct bch_sb_field_members_v2 *mi2; 87 88 if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) { 89 mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2, 90 DIV_ROUND_UP(sizeof(*mi2) + 91 sizeof(struct bch_member) * c->sb.nr_devices, 92 sizeof(u64))); 93 mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1); 94 memcpy(&mi2->_members[0], &mi1->_members[0], 95 BCH_MEMBER_V1_BYTES * c->sb.nr_devices); 96 memset(&mi2->pad[0], 0, sizeof(mi2->pad)); 97 mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES); 98 } 99 100 return sb_members_v2_resize_entries(c); 101 } 102 103 int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) 104 { 105 struct bch_sb_field_members_v1 *mi1; 106 struct bch_sb_field_members_v2 *mi2; 107 108 mi1 = bch2_sb_field_resize(disk_sb, members_v1, 109 DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES * 110 disk_sb->sb->nr_devices, sizeof(u64))); 111 if (!mi1) 112 return -BCH_ERR_ENOSPC_sb_members; 113 114 mi2 = bch2_sb_field_get(disk_sb->sb, members_v2); 115 116 for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++) 117 memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); 118 119 return 0; 120 } 121 122 static int validate_member(struct printbuf *err, 123 struct bch_member m, 124 struct bch_sb *sb, 125 int i) 126 { 127 if (le64_to_cpu(m.nbuckets) > LONG_MAX) { 128 prt_printf(err, "device %u: too many buckets (got %llu, max %lu)", 129 i, le64_to_cpu(m.nbuckets), LONG_MAX); 130 return -BCH_ERR_invalid_sb_members; 131 } 132 133 if (le64_to_cpu(m.nbuckets) - 134 le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) { 135 prt_printf(err, "device %u: not enough buckets (got %llu, max %u)", 136 i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS); 137 return -BCH_ERR_invalid_sb_members; 138 } 139 140 if (le16_to_cpu(m.bucket_size) < 141 le16_to_cpu(sb->block_size)) { 142 prt_printf(err, "device %u: bucket size %u smaller than block size %u", 143 i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size)); 144 return -BCH_ERR_invalid_sb_members; 145 } 146 147 if (le16_to_cpu(m.bucket_size) < 148 BCH_SB_BTREE_NODE_SIZE(sb)) { 149 prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu", 150 i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb)); 151 return -BCH_ERR_invalid_sb_members; 152 } 153 154 return 0; 155 } 156 157 static void member_to_text(struct printbuf *out, 158 struct bch_member m, 159 struct bch_sb_field_disk_groups *gi, 160 struct bch_sb *sb, 161 int i) 162 { 163 unsigned data_have = bch2_sb_dev_has_data(sb, i); 164 u64 bucket_size = le16_to_cpu(m.bucket_size); 165 u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; 166 167 if (!bch2_member_exists(&m)) 168 return; 169 170 prt_printf(out, "Device:"); 171 prt_tab(out); 172 prt_printf(out, "%u", i); 173 prt_newline(out); 174 175 printbuf_indent_add(out, 2); 176 177 prt_printf(out, "Label:"); 178 prt_tab(out); 179 if (BCH_MEMBER_GROUP(&m)) { 180 unsigned idx = BCH_MEMBER_GROUP(&m) - 1; 181 182 if (idx < disk_groups_nr(gi)) 183 prt_printf(out, "%s (%u)", 184 gi->entries[idx].label, idx); 185 else 186 prt_printf(out, "(bad disk labels section)"); 187 } else { 188 prt_printf(out, "(none)"); 189 } 190 prt_newline(out); 191 192 prt_printf(out, "UUID:"); 193 prt_tab(out); 194 pr_uuid(out, m.uuid.b); 195 prt_newline(out); 196 197 prt_printf(out, "Size:"); 198 prt_tab(out); 199 prt_units_u64(out, device_size << 9); 200 prt_newline(out); 201 202 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { 203 prt_printf(out, "%s errors:", bch2_member_error_strs[i]); 204 prt_tab(out); 205 prt_u64(out, le64_to_cpu(m.errors[i])); 206 prt_newline(out); 207 } 208 209 for (unsigned i = 0; i < BCH_IOPS_NR; i++) { 210 prt_printf(out, "%s iops:", bch2_iops_measurements[i]); 211 prt_tab(out); 212 prt_printf(out, "%u", le32_to_cpu(m.iops[i])); 213 prt_newline(out); 214 } 215 216 prt_printf(out, "Bucket size:"); 217 prt_tab(out); 218 prt_units_u64(out, bucket_size << 9); 219 prt_newline(out); 220 221 prt_printf(out, "First bucket:"); 222 prt_tab(out); 223 prt_printf(out, "%u", le16_to_cpu(m.first_bucket)); 224 prt_newline(out); 225 226 prt_printf(out, "Buckets:"); 227 prt_tab(out); 228 prt_printf(out, "%llu", le64_to_cpu(m.nbuckets)); 229 prt_newline(out); 230 231 prt_printf(out, "Last mount:"); 232 prt_tab(out); 233 if (m.last_mount) 234 bch2_prt_datetime(out, le64_to_cpu(m.last_mount)); 235 else 236 prt_printf(out, "(never)"); 237 prt_newline(out); 238 239 prt_printf(out, "Last superblock write:"); 240 prt_tab(out); 241 prt_u64(out, le64_to_cpu(m.seq)); 242 prt_newline(out); 243 244 prt_printf(out, "State:"); 245 prt_tab(out); 246 prt_printf(out, "%s", 247 BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR 248 ? bch2_member_states[BCH_MEMBER_STATE(&m)] 249 : "unknown"); 250 prt_newline(out); 251 252 prt_printf(out, "Data allowed:"); 253 prt_tab(out); 254 if (BCH_MEMBER_DATA_ALLOWED(&m)) 255 prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); 256 else 257 prt_printf(out, "(none)"); 258 prt_newline(out); 259 260 prt_printf(out, "Has data:"); 261 prt_tab(out); 262 if (data_have) 263 prt_bitflags(out, __bch2_data_types, data_have); 264 else 265 prt_printf(out, "(none)"); 266 prt_newline(out); 267 268 prt_str(out, "Durability:"); 269 prt_tab(out); 270 prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1); 271 prt_newline(out); 272 273 prt_printf(out, "Discard:"); 274 prt_tab(out); 275 prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m)); 276 prt_newline(out); 277 278 prt_printf(out, "Freespace initialized:"); 279 prt_tab(out); 280 prt_printf(out, "%llu", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); 281 prt_newline(out); 282 283 printbuf_indent_sub(out, 2); 284 } 285 286 static int bch2_sb_members_v1_validate(struct bch_sb *sb, 287 struct bch_sb_field *f, 288 struct printbuf *err) 289 { 290 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); 291 unsigned i; 292 293 if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) { 294 prt_printf(err, "too many devices for section size"); 295 return -BCH_ERR_invalid_sb_members; 296 } 297 298 for (i = 0; i < sb->nr_devices; i++) { 299 struct bch_member m = members_v1_get(mi, i); 300 301 int ret = validate_member(err, m, sb, i); 302 if (ret) 303 return ret; 304 } 305 306 return 0; 307 } 308 309 static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, 310 struct bch_sb_field *f) 311 { 312 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); 313 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); 314 unsigned i; 315 316 for (i = 0; i < sb->nr_devices; i++) 317 member_to_text(out, members_v1_get(mi, i), gi, sb, i); 318 } 319 320 const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { 321 .validate = bch2_sb_members_v1_validate, 322 .to_text = bch2_sb_members_v1_to_text, 323 }; 324 325 static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, 326 struct bch_sb_field *f) 327 { 328 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); 329 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); 330 unsigned i; 331 332 for (i = 0; i < sb->nr_devices; i++) 333 member_to_text(out, members_v2_get(mi, i), gi, sb, i); 334 } 335 336 static int bch2_sb_members_v2_validate(struct bch_sb *sb, 337 struct bch_sb_field *f, 338 struct printbuf *err) 339 { 340 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); 341 size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) - 342 (void *) mi; 343 344 if (mi_bytes > vstruct_bytes(&mi->field)) { 345 prt_printf(err, "section too small (%zu > %zu)", 346 mi_bytes, vstruct_bytes(&mi->field)); 347 return -BCH_ERR_invalid_sb_members; 348 } 349 350 for (unsigned i = 0; i < sb->nr_devices; i++) { 351 int ret = validate_member(err, members_v2_get(mi, i), sb, i); 352 if (ret) 353 return ret; 354 } 355 356 return 0; 357 } 358 359 const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = { 360 .validate = bch2_sb_members_v2_validate, 361 .to_text = bch2_sb_members_v2_to_text, 362 }; 363 364 void bch2_sb_members_from_cpu(struct bch_fs *c) 365 { 366 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 367 368 rcu_read_lock(); 369 for_each_member_device_rcu(c, ca, NULL) { 370 struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx); 371 372 for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++) 373 m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); 374 } 375 rcu_read_unlock(); 376 } 377 378 void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) 379 { 380 struct bch_fs *c = ca->fs; 381 struct bch_member m; 382 383 mutex_lock(&ca->fs->sb_lock); 384 m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); 385 mutex_unlock(&ca->fs->sb_lock); 386 387 printbuf_tabstop_push(out, 12); 388 389 prt_str(out, "IO errors since filesystem creation"); 390 prt_newline(out); 391 392 printbuf_indent_add(out, 2); 393 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { 394 prt_printf(out, "%s:", bch2_member_error_strs[i]); 395 prt_tab(out); 396 prt_u64(out, atomic64_read(&ca->errors[i])); 397 prt_newline(out); 398 } 399 printbuf_indent_sub(out, 2); 400 401 prt_str(out, "IO errors since "); 402 bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC); 403 prt_str(out, " ago"); 404 prt_newline(out); 405 406 printbuf_indent_add(out, 2); 407 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) { 408 prt_printf(out, "%s:", bch2_member_error_strs[i]); 409 prt_tab(out); 410 prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); 411 prt_newline(out); 412 } 413 printbuf_indent_sub(out, 2); 414 } 415 416 void bch2_dev_errors_reset(struct bch_dev *ca) 417 { 418 struct bch_fs *c = ca->fs; 419 struct bch_member *m; 420 421 mutex_lock(&c->sb_lock); 422 m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); 423 for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) 424 m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); 425 m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds()); 426 427 bch2_write_super(c); 428 mutex_unlock(&c->sb_lock); 429 } 430 431 /* 432 * Per member "range has btree nodes" bitmap: 433 * 434 * This is so that if we ever have to run the btree node scan to repair we don't 435 * have to scan full devices: 436 */ 437 438 bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k) 439 { 440 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) 441 if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev), 442 ptr->offset, btree_sectors(c))) 443 return false; 444 return true; 445 } 446 447 static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev, 448 u64 start, unsigned sectors) 449 { 450 struct bch_member *m = __bch2_members_v2_get_mut(mi, dev); 451 u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap); 452 453 u64 end = start + sectors; 454 455 int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6); 456 if (resize > 0) { 457 u64 new_bitmap = 0; 458 459 for (unsigned i = 0; i < 64; i++) 460 if (bitmap & BIT_ULL(i)) 461 new_bitmap |= BIT_ULL(i >> resize); 462 bitmap = new_bitmap; 463 m->btree_bitmap_shift += resize; 464 } 465 466 for (unsigned bit = start >> m->btree_bitmap_shift; 467 (u64) bit << m->btree_bitmap_shift < end; 468 bit++) 469 bitmap |= BIT_ULL(bit); 470 471 m->btree_allocated_bitmap = cpu_to_le64(bitmap); 472 } 473 474 void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k) 475 { 476 lockdep_assert_held(&c->sb_lock); 477 478 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 479 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) 480 __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c)); 481 } 482