xref: /linux/fs/bcachefs/sb-members.c (revision 9c2f5b6eb8b7da05e13cde60c32e0a8b1f5873b0)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "btree_cache.h"
5 #include "disk_groups.h"
6 #include "opts.h"
7 #include "replicas.h"
8 #include "sb-members.h"
9 #include "super-io.h"
10 
11 #define x(t, n, ...) [n] = #t,
12 static const char * const bch2_iops_measurements[] = {
13 	BCH_IOPS_MEASUREMENTS()
14 	NULL
15 };
16 
17 char * const bch2_member_error_strs[] = {
18 	BCH_MEMBER_ERROR_TYPES()
19 	NULL
20 };
21 #undef x
22 
23 /* Code for bch_sb_field_members_v1: */
24 
25 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i)
26 {
27 	return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
28 }
29 
30 static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
31 {
32 	struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
33 	memset(&ret, 0, sizeof(ret));
34 	memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
35 	return ret;
36 }
37 
38 static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i)
39 {
40 	return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES);
41 }
42 
43 static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i)
44 {
45 	struct bch_member ret, *p = members_v1_get_mut(mi, i);
46 	memset(&ret, 0, sizeof(ret));
47 	memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret)));
48 	return ret;
49 }
50 
51 struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i)
52 {
53 	struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2);
54 	if (mi2)
55 		return members_v2_get(mi2, i);
56 	struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1);
57 	return members_v1_get(mi1, i);
58 }
59 
60 static int sb_members_v2_resize_entries(struct bch_fs *c)
61 {
62 	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
63 
64 	if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) {
65 		unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) *
66 					      c->disk_sb.sb->nr_devices), 8);
67 
68 		mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
69 		if (!mi)
70 			return -BCH_ERR_ENOSPC_sb_members_v2;
71 
72 		for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) {
73 			void *dst = (void *) mi->_members + (i * sizeof(struct bch_member));
74 			memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
75 			memset(dst + le16_to_cpu(mi->member_bytes),
76 			       0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes)));
77 		}
78 		mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
79 	}
80 	return 0;
81 }
82 
83 int bch2_sb_members_v2_init(struct bch_fs *c)
84 {
85 	struct bch_sb_field_members_v1 *mi1;
86 	struct bch_sb_field_members_v2 *mi2;
87 
88 	if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) {
89 		mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2,
90 				DIV_ROUND_UP(sizeof(*mi2) +
91 					     sizeof(struct bch_member) * c->sb.nr_devices,
92 					     sizeof(u64)));
93 		mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1);
94 		memcpy(&mi2->_members[0], &mi1->_members[0],
95 		       BCH_MEMBER_V1_BYTES * c->sb.nr_devices);
96 		memset(&mi2->pad[0], 0, sizeof(mi2->pad));
97 		mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES);
98 	}
99 
100 	return sb_members_v2_resize_entries(c);
101 }
102 
103 int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
104 {
105 	struct bch_sb_field_members_v1 *mi1;
106 	struct bch_sb_field_members_v2 *mi2;
107 
108 	mi1 = bch2_sb_field_resize(disk_sb, members_v1,
109 			DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES *
110 				     disk_sb->sb->nr_devices, sizeof(u64)));
111 	if (!mi1)
112 		return -BCH_ERR_ENOSPC_sb_members;
113 
114 	mi2 = bch2_sb_field_get(disk_sb->sb, members_v2);
115 
116 	for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++)
117 		memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
118 
119 	return 0;
120 }
121 
122 static int validate_member(struct printbuf *err,
123 			   struct bch_member m,
124 			   struct bch_sb *sb,
125 			   int i)
126 {
127 	if (le64_to_cpu(m.nbuckets) > LONG_MAX) {
128 		prt_printf(err, "device %u: too many buckets (got %llu, max %lu)",
129 			   i, le64_to_cpu(m.nbuckets), LONG_MAX);
130 		return -BCH_ERR_invalid_sb_members;
131 	}
132 
133 	if (le64_to_cpu(m.nbuckets) -
134 	    le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) {
135 		prt_printf(err, "device %u: not enough buckets (got %llu, max %u)",
136 			   i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS);
137 		return -BCH_ERR_invalid_sb_members;
138 	}
139 
140 	if (le16_to_cpu(m.bucket_size) <
141 	    le16_to_cpu(sb->block_size)) {
142 		prt_printf(err, "device %u: bucket size %u smaller than block size %u",
143 			   i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size));
144 		return -BCH_ERR_invalid_sb_members;
145 	}
146 
147 	if (le16_to_cpu(m.bucket_size) <
148 	    BCH_SB_BTREE_NODE_SIZE(sb)) {
149 		prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu",
150 			   i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb));
151 		return -BCH_ERR_invalid_sb_members;
152 	}
153 
154 	return 0;
155 }
156 
157 static void member_to_text(struct printbuf *out,
158 			   struct bch_member m,
159 			   struct bch_sb_field_disk_groups *gi,
160 			   struct bch_sb *sb,
161 			   int i)
162 {
163 	unsigned data_have = bch2_sb_dev_has_data(sb, i);
164 	u64 bucket_size = le16_to_cpu(m.bucket_size);
165 	u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size;
166 
167 	if (!bch2_member_exists(&m))
168 		return;
169 
170 	prt_printf(out, "Device:");
171 	prt_tab(out);
172 	prt_printf(out, "%u", i);
173 	prt_newline(out);
174 
175 	printbuf_indent_add(out, 2);
176 
177 	prt_printf(out, "Label:");
178 	prt_tab(out);
179 	if (BCH_MEMBER_GROUP(&m)) {
180 		unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
181 
182 		if (idx < disk_groups_nr(gi))
183 			prt_printf(out, "%s (%u)",
184 				   gi->entries[idx].label, idx);
185 		else
186 			prt_printf(out, "(bad disk labels section)");
187 	} else {
188 		prt_printf(out, "(none)");
189 	}
190 	prt_newline(out);
191 
192 	prt_printf(out, "UUID:");
193 	prt_tab(out);
194 	pr_uuid(out, m.uuid.b);
195 	prt_newline(out);
196 
197 	prt_printf(out, "Size:");
198 	prt_tab(out);
199 	prt_units_u64(out, device_size << 9);
200 	prt_newline(out);
201 
202 	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
203 		prt_printf(out, "%s errors:", bch2_member_error_strs[i]);
204 		prt_tab(out);
205 		prt_u64(out, le64_to_cpu(m.errors[i]));
206 		prt_newline(out);
207 	}
208 
209 	for (unsigned i = 0; i < BCH_IOPS_NR; i++) {
210 		prt_printf(out, "%s iops:", bch2_iops_measurements[i]);
211 		prt_tab(out);
212 		prt_printf(out, "%u", le32_to_cpu(m.iops[i]));
213 		prt_newline(out);
214 	}
215 
216 	prt_printf(out, "Bucket size:");
217 	prt_tab(out);
218 	prt_units_u64(out, bucket_size << 9);
219 	prt_newline(out);
220 
221 	prt_printf(out, "First bucket:");
222 	prt_tab(out);
223 	prt_printf(out, "%u", le16_to_cpu(m.first_bucket));
224 	prt_newline(out);
225 
226 	prt_printf(out, "Buckets:");
227 	prt_tab(out);
228 	prt_printf(out, "%llu", le64_to_cpu(m.nbuckets));
229 	prt_newline(out);
230 
231 	prt_printf(out, "Last mount:");
232 	prt_tab(out);
233 	if (m.last_mount)
234 		bch2_prt_datetime(out, le64_to_cpu(m.last_mount));
235 	else
236 		prt_printf(out, "(never)");
237 	prt_newline(out);
238 
239 	prt_printf(out, "Last superblock write:");
240 	prt_tab(out);
241 	prt_u64(out, le64_to_cpu(m.seq));
242 	prt_newline(out);
243 
244 	prt_printf(out, "State:");
245 	prt_tab(out);
246 	prt_printf(out, "%s",
247 		   BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR
248 		   ? bch2_member_states[BCH_MEMBER_STATE(&m)]
249 		   : "unknown");
250 	prt_newline(out);
251 
252 	prt_printf(out, "Data allowed:");
253 	prt_tab(out);
254 	if (BCH_MEMBER_DATA_ALLOWED(&m))
255 		prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
256 	else
257 		prt_printf(out, "(none)");
258 	prt_newline(out);
259 
260 	prt_printf(out, "Has data:");
261 	prt_tab(out);
262 	if (data_have)
263 		prt_bitflags(out, __bch2_data_types, data_have);
264 	else
265 		prt_printf(out, "(none)");
266 	prt_newline(out);
267 
268 	prt_str(out, "Durability:");
269 	prt_tab(out);
270 	prt_printf(out, "%llu", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1);
271 	prt_newline(out);
272 
273 	prt_printf(out, "Discard:");
274 	prt_tab(out);
275 	prt_printf(out, "%llu", BCH_MEMBER_DISCARD(&m));
276 	prt_newline(out);
277 
278 	prt_printf(out, "Freespace initialized:");
279 	prt_tab(out);
280 	prt_printf(out, "%llu", BCH_MEMBER_FREESPACE_INITIALIZED(&m));
281 	prt_newline(out);
282 
283 	printbuf_indent_sub(out, 2);
284 }
285 
286 static int bch2_sb_members_v1_validate(struct bch_sb *sb,
287 				    struct bch_sb_field *f,
288 				    struct printbuf *err)
289 {
290 	struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
291 	unsigned i;
292 
293 	if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) {
294 		prt_printf(err, "too many devices for section size");
295 		return -BCH_ERR_invalid_sb_members;
296 	}
297 
298 	for (i = 0; i < sb->nr_devices; i++) {
299 		struct bch_member m = members_v1_get(mi, i);
300 
301 		int ret = validate_member(err, m, sb, i);
302 		if (ret)
303 			return ret;
304 	}
305 
306 	return 0;
307 }
308 
309 static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb,
310 				       struct bch_sb_field *f)
311 {
312 	struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
313 	struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
314 	unsigned i;
315 
316 	for (i = 0; i < sb->nr_devices; i++)
317 		member_to_text(out, members_v1_get(mi, i), gi, sb, i);
318 }
319 
320 const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = {
321 	.validate	= bch2_sb_members_v1_validate,
322 	.to_text	= bch2_sb_members_v1_to_text,
323 };
324 
325 static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb,
326 				       struct bch_sb_field *f)
327 {
328 	struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
329 	struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
330 	unsigned i;
331 
332 	for (i = 0; i < sb->nr_devices; i++)
333 		member_to_text(out, members_v2_get(mi, i), gi, sb, i);
334 }
335 
336 static int bch2_sb_members_v2_validate(struct bch_sb *sb,
337 				       struct bch_sb_field *f,
338 				       struct printbuf *err)
339 {
340 	struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
341 	size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) -
342 		(void *) mi;
343 
344 	if (mi_bytes > vstruct_bytes(&mi->field)) {
345 		prt_printf(err, "section too small (%zu > %zu)",
346 			   mi_bytes, vstruct_bytes(&mi->field));
347 		return -BCH_ERR_invalid_sb_members;
348 	}
349 
350 	for (unsigned i = 0; i < sb->nr_devices; i++) {
351 		int ret = validate_member(err, members_v2_get(mi, i), sb, i);
352 		if (ret)
353 			return ret;
354 	}
355 
356 	return 0;
357 }
358 
359 const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = {
360 	.validate	= bch2_sb_members_v2_validate,
361 	.to_text	= bch2_sb_members_v2_to_text,
362 };
363 
364 void bch2_sb_members_from_cpu(struct bch_fs *c)
365 {
366 	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
367 
368 	rcu_read_lock();
369 	for_each_member_device_rcu(c, ca, NULL) {
370 		struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx);
371 
372 		for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++)
373 			m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
374 	}
375 	rcu_read_unlock();
376 }
377 
378 void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
379 {
380 	struct bch_fs *c = ca->fs;
381 	struct bch_member m;
382 
383 	mutex_lock(&ca->fs->sb_lock);
384 	m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
385 	mutex_unlock(&ca->fs->sb_lock);
386 
387 	printbuf_tabstop_push(out, 12);
388 
389 	prt_str(out, "IO errors since filesystem creation");
390 	prt_newline(out);
391 
392 	printbuf_indent_add(out, 2);
393 	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
394 		prt_printf(out, "%s:", bch2_member_error_strs[i]);
395 		prt_tab(out);
396 		prt_u64(out, atomic64_read(&ca->errors[i]));
397 		prt_newline(out);
398 	}
399 	printbuf_indent_sub(out, 2);
400 
401 	prt_str(out, "IO errors since ");
402 	bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
403 	prt_str(out, " ago");
404 	prt_newline(out);
405 
406 	printbuf_indent_add(out, 2);
407 	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++) {
408 		prt_printf(out, "%s:", bch2_member_error_strs[i]);
409 		prt_tab(out);
410 		prt_u64(out, atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
411 		prt_newline(out);
412 	}
413 	printbuf_indent_sub(out, 2);
414 }
415 
416 void bch2_dev_errors_reset(struct bch_dev *ca)
417 {
418 	struct bch_fs *c = ca->fs;
419 	struct bch_member *m;
420 
421 	mutex_lock(&c->sb_lock);
422 	m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
423 	for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
424 		m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
425 	m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds());
426 
427 	bch2_write_super(c);
428 	mutex_unlock(&c->sb_lock);
429 }
430 
431 /*
432  * Per member "range has btree nodes" bitmap:
433  *
434  * This is so that if we ever have to run the btree node scan to repair we don't
435  * have to scan full devices:
436  */
437 
438 bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
439 {
440 	bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
441 		if (!bch2_dev_btree_bitmap_marked_sectors(bch_dev_bkey_exists(c, ptr->dev),
442 							  ptr->offset, btree_sectors(c)))
443 			return false;
444 	return true;
445 }
446 
447 static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
448 				u64 start, unsigned sectors)
449 {
450 	struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
451 	u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
452 
453 	u64 end = start + sectors;
454 
455 	int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
456 	if (resize > 0) {
457 		u64 new_bitmap = 0;
458 
459 		for (unsigned i = 0; i < 64; i++)
460 			if (bitmap & BIT_ULL(i))
461 				new_bitmap |= BIT_ULL(i >> resize);
462 		bitmap = new_bitmap;
463 		m->btree_bitmap_shift += resize;
464 	}
465 
466 	for (unsigned bit = start >> m->btree_bitmap_shift;
467 	     (u64) bit << m->btree_bitmap_shift < end;
468 	     bit++)
469 		bitmap |= BIT_ULL(bit);
470 
471 	m->btree_allocated_bitmap = cpu_to_le64(bitmap);
472 }
473 
474 void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
475 {
476 	lockdep_assert_held(&c->sb_lock);
477 
478 	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
479 	bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr)
480 		__bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
481 }
482