1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "btree_cache.h"
5 #include "disk_groups.h"
6 #include "error.h"
7 #include "opts.h"
8 #include "replicas.h"
9 #include "sb-members.h"
10 #include "super-io.h"
11
bch2_dev_missing(struct bch_fs * c,unsigned dev)12 void bch2_dev_missing(struct bch_fs *c, unsigned dev)
13 {
14 if (dev != BCH_SB_MEMBER_INVALID)
15 bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
16 }
17
bch2_dev_bucket_missing(struct bch_fs * c,struct bpos bucket)18 void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket)
19 {
20 bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset);
21 }
22
23 #define x(t, n, ...) [n] = #t,
24 static const char * const bch2_iops_measurements[] = {
25 BCH_IOPS_MEASUREMENTS()
26 NULL
27 };
28
29 char * const bch2_member_error_strs[] = {
30 BCH_MEMBER_ERROR_TYPES()
31 NULL
32 };
33 #undef x
34
35 /* Code for bch_sb_field_members_v1: */
36
bch2_members_v2_get_mut(struct bch_sb * sb,int i)37 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i)
38 {
39 return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
40 }
41
members_v2_get(struct bch_sb_field_members_v2 * mi,int i)42 static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
43 {
44 struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
45 memset(&ret, 0, sizeof(ret));
46 memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
47 return ret;
48 }
49
members_v1_get_mut(struct bch_sb_field_members_v1 * mi,int i)50 static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i)
51 {
52 return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES);
53 }
54
members_v1_get(struct bch_sb_field_members_v1 * mi,int i)55 static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i)
56 {
57 struct bch_member ret, *p = members_v1_get_mut(mi, i);
58 memset(&ret, 0, sizeof(ret));
59 memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret)));
60 return ret;
61 }
62
bch2_sb_member_get(struct bch_sb * sb,int i)63 struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i)
64 {
65 struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2);
66 if (mi2)
67 return members_v2_get(mi2, i);
68 struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1);
69 return members_v1_get(mi1, i);
70 }
71
sb_members_v2_resize_entries(struct bch_fs * c)72 static int sb_members_v2_resize_entries(struct bch_fs *c)
73 {
74 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
75
76 if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) {
77 unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) *
78 c->disk_sb.sb->nr_devices), 8);
79
80 mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
81 if (!mi)
82 return -BCH_ERR_ENOSPC_sb_members_v2;
83
84 for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) {
85 void *dst = (void *) mi->_members + (i * sizeof(struct bch_member));
86 memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
87 memset(dst + le16_to_cpu(mi->member_bytes),
88 0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes)));
89 }
90 mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
91 }
92 return 0;
93 }
94
bch2_sb_members_v2_init(struct bch_fs * c)95 int bch2_sb_members_v2_init(struct bch_fs *c)
96 {
97 struct bch_sb_field_members_v1 *mi1;
98 struct bch_sb_field_members_v2 *mi2;
99
100 if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) {
101 mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2,
102 DIV_ROUND_UP(sizeof(*mi2) +
103 sizeof(struct bch_member) * c->sb.nr_devices,
104 sizeof(u64)));
105 mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1);
106 memcpy(&mi2->_members[0], &mi1->_members[0],
107 BCH_MEMBER_V1_BYTES * c->sb.nr_devices);
108 memset(&mi2->pad[0], 0, sizeof(mi2->pad));
109 mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES);
110 }
111
112 return sb_members_v2_resize_entries(c);
113 }
114
bch2_sb_members_cpy_v2_v1(struct bch_sb_handle * disk_sb)115 int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
116 {
117 struct bch_sb_field_members_v1 *mi1;
118 struct bch_sb_field_members_v2 *mi2;
119
120 mi1 = bch2_sb_field_resize(disk_sb, members_v1,
121 DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES *
122 disk_sb->sb->nr_devices, sizeof(u64)));
123 if (!mi1)
124 return -BCH_ERR_ENOSPC_sb_members;
125
126 mi2 = bch2_sb_field_get(disk_sb->sb, members_v2);
127
128 for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++)
129 memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
130
131 return 0;
132 }
133
validate_member(struct printbuf * err,struct bch_member m,struct bch_sb * sb,int i)134 static int validate_member(struct printbuf *err,
135 struct bch_member m,
136 struct bch_sb *sb,
137 int i)
138 {
139 if (le64_to_cpu(m.nbuckets) > BCH_MEMBER_NBUCKETS_MAX) {
140 prt_printf(err, "device %u: too many buckets (got %llu, max %u)",
141 i, le64_to_cpu(m.nbuckets), BCH_MEMBER_NBUCKETS_MAX);
142 return -BCH_ERR_invalid_sb_members;
143 }
144
145 if (le64_to_cpu(m.nbuckets) -
146 le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) {
147 prt_printf(err, "device %u: not enough buckets (got %llu, max %u)",
148 i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS);
149 return -BCH_ERR_invalid_sb_members;
150 }
151
152 if (le16_to_cpu(m.bucket_size) <
153 le16_to_cpu(sb->block_size)) {
154 prt_printf(err, "device %u: bucket size %u smaller than block size %u",
155 i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size));
156 return -BCH_ERR_invalid_sb_members;
157 }
158
159 if (le16_to_cpu(m.bucket_size) <
160 BCH_SB_BTREE_NODE_SIZE(sb)) {
161 prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu",
162 i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb));
163 return -BCH_ERR_invalid_sb_members;
164 }
165
166 return 0;
167 }
168
member_to_text(struct printbuf * out,struct bch_member m,struct bch_sb_field_disk_groups * gi,struct bch_sb * sb,int i)169 static void member_to_text(struct printbuf *out,
170 struct bch_member m,
171 struct bch_sb_field_disk_groups *gi,
172 struct bch_sb *sb,
173 int i)
174 {
175 unsigned data_have = bch2_sb_dev_has_data(sb, i);
176 u64 bucket_size = le16_to_cpu(m.bucket_size);
177 u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size;
178
179 if (!bch2_member_alive(&m))
180 return;
181
182 prt_printf(out, "Device:\t%u\n", i);
183
184 printbuf_indent_add(out, 2);
185
186 prt_printf(out, "Label:\t");
187 if (BCH_MEMBER_GROUP(&m)) {
188 unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
189
190 if (idx < disk_groups_nr(gi))
191 prt_printf(out, "%s (%u)",
192 gi->entries[idx].label, idx);
193 else
194 prt_printf(out, "(bad disk labels section)");
195 } else {
196 prt_printf(out, "(none)");
197 }
198 prt_newline(out);
199
200 prt_printf(out, "UUID:\t");
201 pr_uuid(out, m.uuid.b);
202 prt_newline(out);
203
204 prt_printf(out, "Size:\t");
205 prt_units_u64(out, device_size << 9);
206 prt_newline(out);
207
208 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
209 prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m.errors[i]));
210
211 for (unsigned i = 0; i < BCH_IOPS_NR; i++)
212 prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m.iops[i]));
213
214 prt_printf(out, "Bucket size:\t");
215 prt_units_u64(out, bucket_size << 9);
216 prt_newline(out);
217
218 prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m.first_bucket));
219 prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m.nbuckets));
220
221 prt_printf(out, "Last mount:\t");
222 if (m.last_mount)
223 bch2_prt_datetime(out, le64_to_cpu(m.last_mount));
224 else
225 prt_printf(out, "(never)");
226 prt_newline(out);
227
228 prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m.seq));
229
230 prt_printf(out, "State:\t%s\n",
231 BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR
232 ? bch2_member_states[BCH_MEMBER_STATE(&m)]
233 : "unknown");
234
235 prt_printf(out, "Data allowed:\t");
236 if (BCH_MEMBER_DATA_ALLOWED(&m))
237 prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
238 else
239 prt_printf(out, "(none)");
240 prt_newline(out);
241
242 prt_printf(out, "Has data:\t");
243 if (data_have)
244 prt_bitflags(out, __bch2_data_types, data_have);
245 else
246 prt_printf(out, "(none)");
247 prt_newline(out);
248
249 prt_printf(out, "Btree allocated bitmap blocksize:\t");
250 prt_units_u64(out, 1ULL << m.btree_bitmap_shift);
251 prt_newline(out);
252
253 prt_printf(out, "Btree allocated bitmap:\t");
254 bch2_prt_u64_base2_nbits(out, le64_to_cpu(m.btree_allocated_bitmap), 64);
255 prt_newline(out);
256
257 prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1);
258
259 prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m));
260 prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m));
261
262 printbuf_indent_sub(out, 2);
263 }
264
bch2_sb_members_v1_validate(struct bch_sb * sb,struct bch_sb_field * f,enum bch_validate_flags flags,struct printbuf * err)265 static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f,
266 enum bch_validate_flags flags, struct printbuf *err)
267 {
268 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
269 unsigned i;
270
271 if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) {
272 prt_printf(err, "too many devices for section size");
273 return -BCH_ERR_invalid_sb_members;
274 }
275
276 for (i = 0; i < sb->nr_devices; i++) {
277 struct bch_member m = members_v1_get(mi, i);
278
279 int ret = validate_member(err, m, sb, i);
280 if (ret)
281 return ret;
282 }
283
284 return 0;
285 }
286
bch2_sb_members_v1_to_text(struct printbuf * out,struct bch_sb * sb,struct bch_sb_field * f)287 static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb,
288 struct bch_sb_field *f)
289 {
290 struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
291 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
292 unsigned i;
293
294 for (i = 0; i < sb->nr_devices; i++)
295 member_to_text(out, members_v1_get(mi, i), gi, sb, i);
296 }
297
298 const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = {
299 .validate = bch2_sb_members_v1_validate,
300 .to_text = bch2_sb_members_v1_to_text,
301 };
302
bch2_sb_members_v2_to_text(struct printbuf * out,struct bch_sb * sb,struct bch_sb_field * f)303 static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb,
304 struct bch_sb_field *f)
305 {
306 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
307 struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
308 unsigned i;
309
310 for (i = 0; i < sb->nr_devices; i++)
311 member_to_text(out, members_v2_get(mi, i), gi, sb, i);
312 }
313
bch2_sb_members_v2_validate(struct bch_sb * sb,struct bch_sb_field * f,enum bch_validate_flags flags,struct printbuf * err)314 static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f,
315 enum bch_validate_flags flags, struct printbuf *err)
316 {
317 struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
318 size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) -
319 (void *) mi;
320
321 if (mi_bytes > vstruct_bytes(&mi->field)) {
322 prt_printf(err, "section too small (%zu > %zu)",
323 mi_bytes, vstruct_bytes(&mi->field));
324 return -BCH_ERR_invalid_sb_members;
325 }
326
327 for (unsigned i = 0; i < sb->nr_devices; i++) {
328 int ret = validate_member(err, members_v2_get(mi, i), sb, i);
329 if (ret)
330 return ret;
331 }
332
333 return 0;
334 }
335
336 const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = {
337 .validate = bch2_sb_members_v2_validate,
338 .to_text = bch2_sb_members_v2_to_text,
339 };
340
bch2_sb_members_from_cpu(struct bch_fs * c)341 void bch2_sb_members_from_cpu(struct bch_fs *c)
342 {
343 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
344
345 rcu_read_lock();
346 for_each_member_device_rcu(c, ca, NULL) {
347 struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx);
348
349 for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++)
350 m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
351 }
352 rcu_read_unlock();
353 }
354
bch2_dev_io_errors_to_text(struct printbuf * out,struct bch_dev * ca)355 void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
356 {
357 struct bch_fs *c = ca->fs;
358 struct bch_member m;
359
360 mutex_lock(&ca->fs->sb_lock);
361 m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
362 mutex_unlock(&ca->fs->sb_lock);
363
364 printbuf_tabstop_push(out, 12);
365
366 prt_str(out, "IO errors since filesystem creation");
367 prt_newline(out);
368
369 printbuf_indent_add(out, 2);
370 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
371 prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i]));
372 printbuf_indent_sub(out, 2);
373
374 prt_str(out, "IO errors since ");
375 bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
376 prt_str(out, " ago");
377 prt_newline(out);
378
379 printbuf_indent_add(out, 2);
380 for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
381 prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i],
382 atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
383 printbuf_indent_sub(out, 2);
384 }
385
bch2_dev_errors_reset(struct bch_dev * ca)386 void bch2_dev_errors_reset(struct bch_dev *ca)
387 {
388 struct bch_fs *c = ca->fs;
389 struct bch_member *m;
390
391 mutex_lock(&c->sb_lock);
392 m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
393 for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
394 m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
395 m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds());
396
397 bch2_write_super(c);
398 mutex_unlock(&c->sb_lock);
399 }
400
401 /*
402 * Per member "range has btree nodes" bitmap:
403 *
404 * This is so that if we ever have to run the btree node scan to repair we don't
405 * have to scan full devices:
406 */
407
bch2_dev_btree_bitmap_marked(struct bch_fs * c,struct bkey_s_c k)408 bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
409 {
410 bool ret = true;
411 rcu_read_lock();
412 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
413 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
414 if (!ca)
415 continue;
416
417 if (!bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c))) {
418 ret = false;
419 break;
420 }
421 }
422 rcu_read_unlock();
423 return ret;
424 }
425
__bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 * mi,unsigned dev,u64 start,unsigned sectors)426 static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
427 u64 start, unsigned sectors)
428 {
429 struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
430 u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
431
432 u64 end = start + sectors;
433
434 int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
435 if (resize > 0) {
436 u64 new_bitmap = 0;
437
438 for (unsigned i = 0; i < 64; i++)
439 if (bitmap & BIT_ULL(i))
440 new_bitmap |= BIT_ULL(i >> resize);
441 bitmap = new_bitmap;
442 m->btree_bitmap_shift += resize;
443 }
444
445 BUG_ON(m->btree_bitmap_shift > 57);
446 BUG_ON(end > 64ULL << m->btree_bitmap_shift);
447
448 for (unsigned bit = start >> m->btree_bitmap_shift;
449 (u64) bit << m->btree_bitmap_shift < end;
450 bit++)
451 bitmap |= BIT_ULL(bit);
452
453 m->btree_allocated_bitmap = cpu_to_le64(bitmap);
454 }
455
bch2_dev_btree_bitmap_mark(struct bch_fs * c,struct bkey_s_c k)456 void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
457 {
458 lockdep_assert_held(&c->sb_lock);
459
460 struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
461 bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
462 if (!bch2_member_exists(c->disk_sb.sb, ptr->dev))
463 continue;
464
465 __bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
466 }
467 }
468
bch2_sb_nr_devices(const struct bch_sb * sb)469 unsigned bch2_sb_nr_devices(const struct bch_sb *sb)
470 {
471 unsigned nr = 0;
472
473 for (unsigned i = 0; i < sb->nr_devices; i++)
474 nr += bch2_member_exists((struct bch_sb *) sb, i);
475 return nr;
476 }
477
bch2_sb_member_alloc(struct bch_fs * c)478 int bch2_sb_member_alloc(struct bch_fs *c)
479 {
480 unsigned dev_idx = c->sb.nr_devices;
481 struct bch_sb_field_members_v2 *mi;
482 unsigned nr_devices;
483 unsigned u64s;
484 int best = -1;
485 u64 best_last_mount = 0;
486
487 if (dev_idx < BCH_SB_MEMBERS_MAX)
488 goto have_slot;
489
490 for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++) {
491 /* eventually BCH_SB_MEMBERS_MAX will be raised */
492 if (dev_idx == BCH_SB_MEMBER_INVALID)
493 continue;
494
495 struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx);
496 if (bch2_member_alive(&m))
497 continue;
498
499 u64 last_mount = le64_to_cpu(m.last_mount);
500 if (best < 0 || last_mount < best_last_mount) {
501 best = dev_idx;
502 best_last_mount = last_mount;
503 }
504 }
505 if (best >= 0) {
506 dev_idx = best;
507 goto have_slot;
508 }
509
510 return -BCH_ERR_ENOSPC_sb_members;
511 have_slot:
512 nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
513
514 mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
515 u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) +
516 le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64));
517
518 mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
519 if (!mi)
520 return -BCH_ERR_ENOSPC_sb_members;
521
522 c->disk_sb.sb->nr_devices = nr_devices;
523 return dev_idx;
524 }
525