xref: /linux/fs/bcachefs/sb-members.c (revision 5bb6ba448fe3598a7668838942db1f008beb581b)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "btree_cache.h"
5 #include "disk_groups.h"
6 #include "error.h"
7 #include "opts.h"
8 #include "replicas.h"
9 #include "sb-members.h"
10 #include "super-io.h"
11 
12 void bch2_dev_missing(struct bch_fs *c, unsigned dev)
13 {
14 	if (dev != BCH_SB_MEMBER_INVALID)
15 		bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
16 }
17 
18 void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket)
19 {
20 	bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset);
21 }
22 
23 #define x(t, n, ...) [n] = #t,
24 static const char * const bch2_iops_measurements[] = {
25 	BCH_IOPS_MEASUREMENTS()
26 	NULL
27 };
28 
29 char * const bch2_member_error_strs[] = {
30 	BCH_MEMBER_ERROR_TYPES()
31 	NULL
32 };
33 #undef x
34 
35 /* Code for bch_sb_field_members_v1: */
36 
37 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i)
38 {
39 	return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
40 }
41 
42 static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
43 {
44 	struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
45 	memset(&ret, 0, sizeof(ret));
46 	memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
47 	return ret;
48 }
49 
50 static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i)
51 {
52 	return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES);
53 }
54 
55 static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i)
56 {
57 	struct bch_member ret, *p = members_v1_get_mut(mi, i);
58 	memset(&ret, 0, sizeof(ret));
59 	memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret)));
60 	return ret;
61 }
62 
63 struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i)
64 {
65 	struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2);
66 	if (mi2)
67 		return members_v2_get(mi2, i);
68 	struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1);
69 	return members_v1_get(mi1, i);
70 }
71 
72 static int sb_members_v2_resize_entries(struct bch_fs *c)
73 {
74 	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
75 
76 	if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) {
77 		unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) *
78 					      c->disk_sb.sb->nr_devices), 8);
79 
80 		mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
81 		if (!mi)
82 			return -BCH_ERR_ENOSPC_sb_members_v2;
83 
84 		for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) {
85 			void *dst = (void *) mi->_members + (i * sizeof(struct bch_member));
86 			memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
87 			memset(dst + le16_to_cpu(mi->member_bytes),
88 			       0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes)));
89 		}
90 		mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
91 	}
92 	return 0;
93 }
94 
95 int bch2_sb_members_v2_init(struct bch_fs *c)
96 {
97 	struct bch_sb_field_members_v1 *mi1;
98 	struct bch_sb_field_members_v2 *mi2;
99 
100 	if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) {
101 		mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2,
102 				DIV_ROUND_UP(sizeof(*mi2) +
103 					     sizeof(struct bch_member) * c->sb.nr_devices,
104 					     sizeof(u64)));
105 		mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1);
106 		memcpy(&mi2->_members[0], &mi1->_members[0],
107 		       BCH_MEMBER_V1_BYTES * c->sb.nr_devices);
108 		memset(&mi2->pad[0], 0, sizeof(mi2->pad));
109 		mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES);
110 	}
111 
112 	return sb_members_v2_resize_entries(c);
113 }
114 
115 int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
116 {
117 	struct bch_sb_field_members_v1 *mi1;
118 	struct bch_sb_field_members_v2 *mi2;
119 
120 	mi1 = bch2_sb_field_resize(disk_sb, members_v1,
121 			DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES *
122 				     disk_sb->sb->nr_devices, sizeof(u64)));
123 	if (!mi1)
124 		return -BCH_ERR_ENOSPC_sb_members;
125 
126 	mi2 = bch2_sb_field_get(disk_sb->sb, members_v2);
127 
128 	for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++)
129 		memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
130 
131 	return 0;
132 }
133 
134 static int validate_member(struct printbuf *err,
135 			   struct bch_member m,
136 			   struct bch_sb *sb,
137 			   int i)
138 {
139 	if (le64_to_cpu(m.nbuckets) > BCH_MEMBER_NBUCKETS_MAX) {
140 		prt_printf(err, "device %u: too many buckets (got %llu, max %u)",
141 			   i, le64_to_cpu(m.nbuckets), BCH_MEMBER_NBUCKETS_MAX);
142 		return -BCH_ERR_invalid_sb_members;
143 	}
144 
145 	if (le64_to_cpu(m.nbuckets) -
146 	    le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) {
147 		prt_printf(err, "device %u: not enough buckets (got %llu, max %u)",
148 			   i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS);
149 		return -BCH_ERR_invalid_sb_members;
150 	}
151 
152 	if (le16_to_cpu(m.bucket_size) <
153 	    le16_to_cpu(sb->block_size)) {
154 		prt_printf(err, "device %u: bucket size %u smaller than block size %u",
155 			   i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size));
156 		return -BCH_ERR_invalid_sb_members;
157 	}
158 
159 	if (le16_to_cpu(m.bucket_size) <
160 	    BCH_SB_BTREE_NODE_SIZE(sb)) {
161 		prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu",
162 			   i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb));
163 		return -BCH_ERR_invalid_sb_members;
164 	}
165 
166 	if (m.btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX) {
167 		prt_printf(err, "device %u: invalid btree_bitmap_shift %u", i, m.btree_bitmap_shift);
168 		return -BCH_ERR_invalid_sb_members;
169 	}
170 
171 	return 0;
172 }
173 
174 static void member_to_text(struct printbuf *out,
175 			   struct bch_member m,
176 			   struct bch_sb_field_disk_groups *gi,
177 			   struct bch_sb *sb,
178 			   int i)
179 {
180 	unsigned data_have = bch2_sb_dev_has_data(sb, i);
181 	u64 bucket_size = le16_to_cpu(m.bucket_size);
182 	u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size;
183 
184 	if (!bch2_member_alive(&m))
185 		return;
186 
187 	prt_printf(out, "Device:\t%u\n", i);
188 
189 	printbuf_indent_add(out, 2);
190 
191 	prt_printf(out, "Label:\t");
192 	if (BCH_MEMBER_GROUP(&m)) {
193 		unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
194 
195 		if (idx < disk_groups_nr(gi))
196 			prt_printf(out, "%s (%u)",
197 				   gi->entries[idx].label, idx);
198 		else
199 			prt_printf(out, "(bad disk labels section)");
200 	} else {
201 		prt_printf(out, "(none)");
202 	}
203 	prt_newline(out);
204 
205 	prt_printf(out, "UUID:\t");
206 	pr_uuid(out, m.uuid.b);
207 	prt_newline(out);
208 
209 	prt_printf(out, "Size:\t");
210 	prt_units_u64(out, device_size << 9);
211 	prt_newline(out);
212 
213 	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
214 		prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m.errors[i]));
215 
216 	for (unsigned i = 0; i < BCH_IOPS_NR; i++)
217 		prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m.iops[i]));
218 
219 	prt_printf(out, "Bucket size:\t");
220 	prt_units_u64(out, bucket_size << 9);
221 	prt_newline(out);
222 
223 	prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m.first_bucket));
224 	prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m.nbuckets));
225 
226 	prt_printf(out, "Last mount:\t");
227 	if (m.last_mount)
228 		bch2_prt_datetime(out, le64_to_cpu(m.last_mount));
229 	else
230 		prt_printf(out, "(never)");
231 	prt_newline(out);
232 
233 	prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m.seq));
234 
235 	prt_printf(out, "State:\t%s\n",
236 		   BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR
237 		   ? bch2_member_states[BCH_MEMBER_STATE(&m)]
238 		   : "unknown");
239 
240 	prt_printf(out, "Data allowed:\t");
241 	if (BCH_MEMBER_DATA_ALLOWED(&m))
242 		prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
243 	else
244 		prt_printf(out, "(none)");
245 	prt_newline(out);
246 
247 	prt_printf(out, "Has data:\t");
248 	if (data_have)
249 		prt_bitflags(out, __bch2_data_types, data_have);
250 	else
251 		prt_printf(out, "(none)");
252 	prt_newline(out);
253 
254 	prt_printf(out, "Btree allocated bitmap blocksize:\t");
255 	if (m.btree_bitmap_shift < 64)
256 		prt_units_u64(out, 1ULL << m.btree_bitmap_shift);
257 	else
258 		prt_printf(out, "(invalid shift %u)", m.btree_bitmap_shift);
259 	prt_newline(out);
260 
261 	prt_printf(out, "Btree allocated bitmap:\t");
262 	bch2_prt_u64_base2_nbits(out, le64_to_cpu(m.btree_allocated_bitmap), 64);
263 	prt_newline(out);
264 
265 	prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1);
266 
267 	prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m));
268 	prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m));
269 
270 	printbuf_indent_sub(out, 2);
271 }
272 
273 static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f,
274 				enum bch_validate_flags flags, struct printbuf *err)
275 {
276 	struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
277 	unsigned i;
278 
279 	if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) {
280 		prt_printf(err, "too many devices for section size");
281 		return -BCH_ERR_invalid_sb_members;
282 	}
283 
284 	for (i = 0; i < sb->nr_devices; i++) {
285 		struct bch_member m = members_v1_get(mi, i);
286 
287 		int ret = validate_member(err, m, sb, i);
288 		if (ret)
289 			return ret;
290 	}
291 
292 	return 0;
293 }
294 
295 static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb,
296 				       struct bch_sb_field *f)
297 {
298 	struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
299 	struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
300 	unsigned i;
301 
302 	for (i = 0; i < sb->nr_devices; i++)
303 		member_to_text(out, members_v1_get(mi, i), gi, sb, i);
304 }
305 
306 const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = {
307 	.validate	= bch2_sb_members_v1_validate,
308 	.to_text	= bch2_sb_members_v1_to_text,
309 };
310 
311 static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb,
312 				       struct bch_sb_field *f)
313 {
314 	struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
315 	struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
316 	unsigned i;
317 
318 	for (i = 0; i < sb->nr_devices; i++)
319 		member_to_text(out, members_v2_get(mi, i), gi, sb, i);
320 }
321 
322 static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f,
323 				enum bch_validate_flags flags, struct printbuf *err)
324 {
325 	struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
326 	size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) -
327 		(void *) mi;
328 
329 	if (mi_bytes > vstruct_bytes(&mi->field)) {
330 		prt_printf(err, "section too small (%zu > %zu)",
331 			   mi_bytes, vstruct_bytes(&mi->field));
332 		return -BCH_ERR_invalid_sb_members;
333 	}
334 
335 	for (unsigned i = 0; i < sb->nr_devices; i++) {
336 		int ret = validate_member(err, members_v2_get(mi, i), sb, i);
337 		if (ret)
338 			return ret;
339 	}
340 
341 	return 0;
342 }
343 
344 const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = {
345 	.validate	= bch2_sb_members_v2_validate,
346 	.to_text	= bch2_sb_members_v2_to_text,
347 };
348 
349 void bch2_sb_members_from_cpu(struct bch_fs *c)
350 {
351 	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
352 
353 	rcu_read_lock();
354 	for_each_member_device_rcu(c, ca, NULL) {
355 		struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx);
356 
357 		for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++)
358 			m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
359 	}
360 	rcu_read_unlock();
361 }
362 
363 void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
364 {
365 	struct bch_fs *c = ca->fs;
366 	struct bch_member m;
367 
368 	mutex_lock(&ca->fs->sb_lock);
369 	m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
370 	mutex_unlock(&ca->fs->sb_lock);
371 
372 	printbuf_tabstop_push(out, 12);
373 
374 	prt_str(out, "IO errors since filesystem creation");
375 	prt_newline(out);
376 
377 	printbuf_indent_add(out, 2);
378 	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
379 		prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i]));
380 	printbuf_indent_sub(out, 2);
381 
382 	prt_str(out, "IO errors since ");
383 	bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
384 	prt_str(out, " ago");
385 	prt_newline(out);
386 
387 	printbuf_indent_add(out, 2);
388 	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
389 		prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i],
390 			   atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
391 	printbuf_indent_sub(out, 2);
392 }
393 
394 void bch2_dev_errors_reset(struct bch_dev *ca)
395 {
396 	struct bch_fs *c = ca->fs;
397 	struct bch_member *m;
398 
399 	mutex_lock(&c->sb_lock);
400 	m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
401 	for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
402 		m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
403 	m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds());
404 
405 	bch2_write_super(c);
406 	mutex_unlock(&c->sb_lock);
407 }
408 
409 /*
410  * Per member "range has btree nodes" bitmap:
411  *
412  * This is so that if we ever have to run the btree node scan to repair we don't
413  * have to scan full devices:
414  */
415 
416 bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
417 {
418 	bool ret = true;
419 	rcu_read_lock();
420 	bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
421 		struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
422 		if (!ca)
423 			continue;
424 
425 		if (!bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c))) {
426 			ret = false;
427 			break;
428 		}
429 	}
430 	rcu_read_unlock();
431 	return ret;
432 }
433 
434 static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
435 				u64 start, unsigned sectors)
436 {
437 	struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
438 	u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
439 
440 	u64 end = start + sectors;
441 
442 	int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
443 	if (resize > 0) {
444 		u64 new_bitmap = 0;
445 
446 		for (unsigned i = 0; i < 64; i++)
447 			if (bitmap & BIT_ULL(i))
448 				new_bitmap |= BIT_ULL(i >> resize);
449 		bitmap = new_bitmap;
450 		m->btree_bitmap_shift += resize;
451 	}
452 
453 	BUG_ON(m->btree_bitmap_shift >= BCH_MI_BTREE_BITMAP_SHIFT_MAX);
454 	BUG_ON(end > 64ULL << m->btree_bitmap_shift);
455 
456 	for (unsigned bit = start >> m->btree_bitmap_shift;
457 	     (u64) bit << m->btree_bitmap_shift < end;
458 	     bit++)
459 		bitmap |= BIT_ULL(bit);
460 
461 	m->btree_allocated_bitmap = cpu_to_le64(bitmap);
462 }
463 
464 void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
465 {
466 	lockdep_assert_held(&c->sb_lock);
467 
468 	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
469 	bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
470 		if (!bch2_member_exists(c->disk_sb.sb, ptr->dev))
471 			continue;
472 
473 		__bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
474 	}
475 }
476 
477 unsigned bch2_sb_nr_devices(const struct bch_sb *sb)
478 {
479 	unsigned nr = 0;
480 
481 	for (unsigned i = 0; i < sb->nr_devices; i++)
482 		nr += bch2_member_exists((struct bch_sb *) sb, i);
483 	return nr;
484 }
485 
486 int bch2_sb_member_alloc(struct bch_fs *c)
487 {
488 	unsigned dev_idx = c->sb.nr_devices;
489 	struct bch_sb_field_members_v2 *mi;
490 	unsigned nr_devices;
491 	unsigned u64s;
492 	int best = -1;
493 	u64 best_last_mount = 0;
494 
495 	if (dev_idx < BCH_SB_MEMBERS_MAX)
496 		goto have_slot;
497 
498 	for (dev_idx = 0; dev_idx < BCH_SB_MEMBERS_MAX; dev_idx++) {
499 		/* eventually BCH_SB_MEMBERS_MAX will be raised */
500 		if (dev_idx == BCH_SB_MEMBER_INVALID)
501 			continue;
502 
503 		struct bch_member m = bch2_sb_member_get(c->disk_sb.sb, dev_idx);
504 		if (bch2_member_alive(&m))
505 			continue;
506 
507 		u64 last_mount = le64_to_cpu(m.last_mount);
508 		if (best < 0 || last_mount < best_last_mount) {
509 			best = dev_idx;
510 			best_last_mount = last_mount;
511 		}
512 	}
513 	if (best >= 0) {
514 		dev_idx = best;
515 		goto have_slot;
516 	}
517 
518 	return -BCH_ERR_ENOSPC_sb_members;
519 have_slot:
520 	nr_devices = max_t(unsigned, dev_idx + 1, c->sb.nr_devices);
521 
522 	mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
523 	u64s = DIV_ROUND_UP(sizeof(struct bch_sb_field_members_v2) +
524 			    le16_to_cpu(mi->member_bytes) * nr_devices, sizeof(u64));
525 
526 	mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
527 	if (!mi)
528 		return -BCH_ERR_ENOSPC_sb_members;
529 
530 	c->disk_sb.sb->nr_devices = nr_devices;
531 	return dev_idx;
532 }
533