1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "btree_cache.h" 5 #include "disk_groups.h" 6 #include "error.h" 7 #include "opts.h" 8 #include "replicas.h" 9 #include "sb-members.h" 10 #include "super-io.h" 11 12 void bch2_dev_missing(struct bch_fs *c, unsigned dev) 13 { 14 bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev); 15 } 16 17 void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket) 18 { 19 bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset); 20 } 21 22 #define x(t, n, ...) [n] = #t, 23 static const char * const bch2_iops_measurements[] = { 24 BCH_IOPS_MEASUREMENTS() 25 NULL 26 }; 27 28 char * const bch2_member_error_strs[] = { 29 BCH_MEMBER_ERROR_TYPES() 30 NULL 31 }; 32 #undef x 33 34 /* Code for bch_sb_field_members_v1: */ 35 36 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i) 37 { 38 return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i); 39 } 40 41 static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i) 42 { 43 struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i); 44 memset(&ret, 0, sizeof(ret)); 45 memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret))); 46 return ret; 47 } 48 49 static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i) 50 { 51 return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES); 52 } 53 54 static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i) 55 { 56 struct bch_member ret, *p = members_v1_get_mut(mi, i); 57 memset(&ret, 0, sizeof(ret)); 58 memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret))); 59 return ret; 60 } 61 62 struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i) 63 { 64 struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2); 65 if (mi2) 66 return members_v2_get(mi2, i); 67 struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1); 68 return members_v1_get(mi1, i); 69 } 70 71 static int sb_members_v2_resize_entries(struct bch_fs *c) 72 { 73 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 74 75 if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) { 76 unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) * 77 c->disk_sb.sb->nr_devices), 8); 78 79 mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s); 80 if (!mi) 81 return -BCH_ERR_ENOSPC_sb_members_v2; 82 83 for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) { 84 void *dst = (void *) mi->_members + (i * sizeof(struct bch_member)); 85 memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes)); 86 memset(dst + le16_to_cpu(mi->member_bytes), 87 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes))); 88 } 89 mi->member_bytes = cpu_to_le16(sizeof(struct bch_member)); 90 } 91 return 0; 92 } 93 94 int bch2_sb_members_v2_init(struct bch_fs *c) 95 { 96 struct bch_sb_field_members_v1 *mi1; 97 struct bch_sb_field_members_v2 *mi2; 98 99 if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) { 100 mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2, 101 DIV_ROUND_UP(sizeof(*mi2) + 102 sizeof(struct bch_member) * c->sb.nr_devices, 103 sizeof(u64))); 104 mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1); 105 memcpy(&mi2->_members[0], &mi1->_members[0], 106 BCH_MEMBER_V1_BYTES * c->sb.nr_devices); 107 memset(&mi2->pad[0], 0, sizeof(mi2->pad)); 108 mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES); 109 } 110 111 return sb_members_v2_resize_entries(c); 112 } 113 114 int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb) 115 { 116 struct bch_sb_field_members_v1 *mi1; 117 struct bch_sb_field_members_v2 *mi2; 118 119 mi1 = bch2_sb_field_resize(disk_sb, members_v1, 120 DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES * 121 disk_sb->sb->nr_devices, sizeof(u64))); 122 if (!mi1) 123 return -BCH_ERR_ENOSPC_sb_members; 124 125 mi2 = bch2_sb_field_get(disk_sb->sb, members_v2); 126 127 for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++) 128 memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES); 129 130 return 0; 131 } 132 133 static int validate_member(struct printbuf *err, 134 struct bch_member m, 135 struct bch_sb *sb, 136 int i) 137 { 138 if (le64_to_cpu(m.nbuckets) > BCH_MEMBER_NBUCKETS_MAX) { 139 prt_printf(err, "device %u: too many buckets (got %llu, max %u)", 140 i, le64_to_cpu(m.nbuckets), BCH_MEMBER_NBUCKETS_MAX); 141 return -BCH_ERR_invalid_sb_members; 142 } 143 144 if (le64_to_cpu(m.nbuckets) - 145 le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) { 146 prt_printf(err, "device %u: not enough buckets (got %llu, max %u)", 147 i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS); 148 return -BCH_ERR_invalid_sb_members; 149 } 150 151 if (le16_to_cpu(m.bucket_size) < 152 le16_to_cpu(sb->block_size)) { 153 prt_printf(err, "device %u: bucket size %u smaller than block size %u", 154 i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size)); 155 return -BCH_ERR_invalid_sb_members; 156 } 157 158 if (le16_to_cpu(m.bucket_size) < 159 BCH_SB_BTREE_NODE_SIZE(sb)) { 160 prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu", 161 i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb)); 162 return -BCH_ERR_invalid_sb_members; 163 } 164 165 return 0; 166 } 167 168 static void member_to_text(struct printbuf *out, 169 struct bch_member m, 170 struct bch_sb_field_disk_groups *gi, 171 struct bch_sb *sb, 172 int i) 173 { 174 unsigned data_have = bch2_sb_dev_has_data(sb, i); 175 u64 bucket_size = le16_to_cpu(m.bucket_size); 176 u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size; 177 178 if (!bch2_member_alive(&m)) 179 return; 180 181 prt_printf(out, "Device:\t%u\n", i); 182 183 printbuf_indent_add(out, 2); 184 185 prt_printf(out, "Label:\t"); 186 if (BCH_MEMBER_GROUP(&m)) { 187 unsigned idx = BCH_MEMBER_GROUP(&m) - 1; 188 189 if (idx < disk_groups_nr(gi)) 190 prt_printf(out, "%s (%u)", 191 gi->entries[idx].label, idx); 192 else 193 prt_printf(out, "(bad disk labels section)"); 194 } else { 195 prt_printf(out, "(none)"); 196 } 197 prt_newline(out); 198 199 prt_printf(out, "UUID:\t"); 200 pr_uuid(out, m.uuid.b); 201 prt_newline(out); 202 203 prt_printf(out, "Size:\t"); 204 prt_units_u64(out, device_size << 9); 205 prt_newline(out); 206 207 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 208 prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m.errors[i])); 209 210 for (unsigned i = 0; i < BCH_IOPS_NR; i++) 211 prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m.iops[i])); 212 213 prt_printf(out, "Bucket size:\t"); 214 prt_units_u64(out, bucket_size << 9); 215 prt_newline(out); 216 217 prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m.first_bucket)); 218 prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m.nbuckets)); 219 220 prt_printf(out, "Last mount:\t"); 221 if (m.last_mount) 222 bch2_prt_datetime(out, le64_to_cpu(m.last_mount)); 223 else 224 prt_printf(out, "(never)"); 225 prt_newline(out); 226 227 prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m.seq)); 228 229 prt_printf(out, "State:\t%s\n", 230 BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR 231 ? bch2_member_states[BCH_MEMBER_STATE(&m)] 232 : "unknown"); 233 234 prt_printf(out, "Data allowed:\t"); 235 if (BCH_MEMBER_DATA_ALLOWED(&m)) 236 prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m)); 237 else 238 prt_printf(out, "(none)"); 239 prt_newline(out); 240 241 prt_printf(out, "Has data:\t"); 242 if (data_have) 243 prt_bitflags(out, __bch2_data_types, data_have); 244 else 245 prt_printf(out, "(none)"); 246 prt_newline(out); 247 248 prt_printf(out, "Btree allocated bitmap blocksize:\t"); 249 prt_units_u64(out, 1ULL << m.btree_bitmap_shift); 250 prt_newline(out); 251 252 prt_printf(out, "Btree allocated bitmap:\t"); 253 bch2_prt_u64_base2_nbits(out, le64_to_cpu(m.btree_allocated_bitmap), 64); 254 prt_newline(out); 255 256 prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1); 257 258 prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m)); 259 prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m)); 260 261 printbuf_indent_sub(out, 2); 262 } 263 264 static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f, 265 enum bch_validate_flags flags, struct printbuf *err) 266 { 267 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); 268 unsigned i; 269 270 if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) { 271 prt_printf(err, "too many devices for section size"); 272 return -BCH_ERR_invalid_sb_members; 273 } 274 275 for (i = 0; i < sb->nr_devices; i++) { 276 struct bch_member m = members_v1_get(mi, i); 277 278 int ret = validate_member(err, m, sb, i); 279 if (ret) 280 return ret; 281 } 282 283 return 0; 284 } 285 286 static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb, 287 struct bch_sb_field *f) 288 { 289 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1); 290 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); 291 unsigned i; 292 293 for (i = 0; i < sb->nr_devices; i++) 294 member_to_text(out, members_v1_get(mi, i), gi, sb, i); 295 } 296 297 const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = { 298 .validate = bch2_sb_members_v1_validate, 299 .to_text = bch2_sb_members_v1_to_text, 300 }; 301 302 static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb, 303 struct bch_sb_field *f) 304 { 305 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); 306 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups); 307 unsigned i; 308 309 for (i = 0; i < sb->nr_devices; i++) 310 member_to_text(out, members_v2_get(mi, i), gi, sb, i); 311 } 312 313 static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f, 314 enum bch_validate_flags flags, struct printbuf *err) 315 { 316 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2); 317 size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) - 318 (void *) mi; 319 320 if (mi_bytes > vstruct_bytes(&mi->field)) { 321 prt_printf(err, "section too small (%zu > %zu)", 322 mi_bytes, vstruct_bytes(&mi->field)); 323 return -BCH_ERR_invalid_sb_members; 324 } 325 326 for (unsigned i = 0; i < sb->nr_devices; i++) { 327 int ret = validate_member(err, members_v2_get(mi, i), sb, i); 328 if (ret) 329 return ret; 330 } 331 332 return 0; 333 } 334 335 const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = { 336 .validate = bch2_sb_members_v2_validate, 337 .to_text = bch2_sb_members_v2_to_text, 338 }; 339 340 void bch2_sb_members_from_cpu(struct bch_fs *c) 341 { 342 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 343 344 rcu_read_lock(); 345 for_each_member_device_rcu(c, ca, NULL) { 346 struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx); 347 348 for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++) 349 m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e])); 350 } 351 rcu_read_unlock(); 352 } 353 354 void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca) 355 { 356 struct bch_fs *c = ca->fs; 357 struct bch_member m; 358 359 mutex_lock(&ca->fs->sb_lock); 360 m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx); 361 mutex_unlock(&ca->fs->sb_lock); 362 363 printbuf_tabstop_push(out, 12); 364 365 prt_str(out, "IO errors since filesystem creation"); 366 prt_newline(out); 367 368 printbuf_indent_add(out, 2); 369 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 370 prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i])); 371 printbuf_indent_sub(out, 2); 372 373 prt_str(out, "IO errors since "); 374 bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC); 375 prt_str(out, " ago"); 376 prt_newline(out); 377 378 printbuf_indent_add(out, 2); 379 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) 380 prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], 381 atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i])); 382 printbuf_indent_sub(out, 2); 383 } 384 385 void bch2_dev_errors_reset(struct bch_dev *ca) 386 { 387 struct bch_fs *c = ca->fs; 388 struct bch_member *m; 389 390 mutex_lock(&c->sb_lock); 391 m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx); 392 for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++) 393 m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i])); 394 m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds()); 395 396 bch2_write_super(c); 397 mutex_unlock(&c->sb_lock); 398 } 399 400 /* 401 * Per member "range has btree nodes" bitmap: 402 * 403 * This is so that if we ever have to run the btree node scan to repair we don't 404 * have to scan full devices: 405 */ 406 407 bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k) 408 { 409 bool ret = true; 410 rcu_read_lock(); 411 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { 412 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); 413 if (!ca) 414 continue; 415 416 if (!bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c))) { 417 ret = false; 418 break; 419 } 420 } 421 rcu_read_unlock(); 422 return ret; 423 } 424 425 static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev, 426 u64 start, unsigned sectors) 427 { 428 struct bch_member *m = __bch2_members_v2_get_mut(mi, dev); 429 u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap); 430 431 u64 end = start + sectors; 432 433 int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6); 434 if (resize > 0) { 435 u64 new_bitmap = 0; 436 437 for (unsigned i = 0; i < 64; i++) 438 if (bitmap & BIT_ULL(i)) 439 new_bitmap |= BIT_ULL(i >> resize); 440 bitmap = new_bitmap; 441 m->btree_bitmap_shift += resize; 442 } 443 444 BUG_ON(m->btree_bitmap_shift > 57); 445 BUG_ON(end > 64ULL << m->btree_bitmap_shift); 446 447 for (unsigned bit = start >> m->btree_bitmap_shift; 448 (u64) bit << m->btree_bitmap_shift < end; 449 bit++) 450 bitmap |= BIT_ULL(bit); 451 452 m->btree_allocated_bitmap = cpu_to_le64(bitmap); 453 } 454 455 void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k) 456 { 457 lockdep_assert_held(&c->sb_lock); 458 459 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2); 460 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) { 461 if (!bch2_member_exists(c->disk_sb.sb, ptr->dev)) 462 continue; 463 464 __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c)); 465 } 466 } 467