xref: /linux/fs/bcachefs/sb-members.c (revision 2a52ca7c98960aafb0eca9ef96b2d0c932171357)
1 // SPDX-License-Identifier: GPL-2.0
2 
3 #include "bcachefs.h"
4 #include "btree_cache.h"
5 #include "disk_groups.h"
6 #include "error.h"
7 #include "opts.h"
8 #include "replicas.h"
9 #include "sb-members.h"
10 #include "super-io.h"
11 
12 void bch2_dev_missing(struct bch_fs *c, unsigned dev)
13 {
14 	bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
15 }
16 
17 void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket)
18 {
19 	bch2_fs_inconsistent(c, "pointer to nonexistent bucket %llu:%llu", bucket.inode, bucket.offset);
20 }
21 
22 #define x(t, n, ...) [n] = #t,
23 static const char * const bch2_iops_measurements[] = {
24 	BCH_IOPS_MEASUREMENTS()
25 	NULL
26 };
27 
28 char * const bch2_member_error_strs[] = {
29 	BCH_MEMBER_ERROR_TYPES()
30 	NULL
31 };
32 #undef x
33 
34 /* Code for bch_sb_field_members_v1: */
35 
36 struct bch_member *bch2_members_v2_get_mut(struct bch_sb *sb, int i)
37 {
38 	return __bch2_members_v2_get_mut(bch2_sb_field_get(sb, members_v2), i);
39 }
40 
41 static struct bch_member members_v2_get(struct bch_sb_field_members_v2 *mi, int i)
42 {
43 	struct bch_member ret, *p = __bch2_members_v2_get_mut(mi, i);
44 	memset(&ret, 0, sizeof(ret));
45 	memcpy(&ret, p, min_t(size_t, le16_to_cpu(mi->member_bytes), sizeof(ret)));
46 	return ret;
47 }
48 
49 static struct bch_member *members_v1_get_mut(struct bch_sb_field_members_v1 *mi, int i)
50 {
51 	return (void *) mi->_members + (i * BCH_MEMBER_V1_BYTES);
52 }
53 
54 static struct bch_member members_v1_get(struct bch_sb_field_members_v1 *mi, int i)
55 {
56 	struct bch_member ret, *p = members_v1_get_mut(mi, i);
57 	memset(&ret, 0, sizeof(ret));
58 	memcpy(&ret, p, min_t(size_t, BCH_MEMBER_V1_BYTES, sizeof(ret)));
59 	return ret;
60 }
61 
62 struct bch_member bch2_sb_member_get(struct bch_sb *sb, int i)
63 {
64 	struct bch_sb_field_members_v2 *mi2 = bch2_sb_field_get(sb, members_v2);
65 	if (mi2)
66 		return members_v2_get(mi2, i);
67 	struct bch_sb_field_members_v1 *mi1 = bch2_sb_field_get(sb, members_v1);
68 	return members_v1_get(mi1, i);
69 }
70 
71 static int sb_members_v2_resize_entries(struct bch_fs *c)
72 {
73 	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
74 
75 	if (le16_to_cpu(mi->member_bytes) < sizeof(struct bch_member)) {
76 		unsigned u64s = DIV_ROUND_UP((sizeof(*mi) + sizeof(mi->_members[0]) *
77 					      c->disk_sb.sb->nr_devices), 8);
78 
79 		mi = bch2_sb_field_resize(&c->disk_sb, members_v2, u64s);
80 		if (!mi)
81 			return -BCH_ERR_ENOSPC_sb_members_v2;
82 
83 		for (int i = c->disk_sb.sb->nr_devices - 1; i >= 0; --i) {
84 			void *dst = (void *) mi->_members + (i * sizeof(struct bch_member));
85 			memmove(dst, __bch2_members_v2_get_mut(mi, i), le16_to_cpu(mi->member_bytes));
86 			memset(dst + le16_to_cpu(mi->member_bytes),
87 			       0, (sizeof(struct bch_member) - le16_to_cpu(mi->member_bytes)));
88 		}
89 		mi->member_bytes = cpu_to_le16(sizeof(struct bch_member));
90 	}
91 	return 0;
92 }
93 
94 int bch2_sb_members_v2_init(struct bch_fs *c)
95 {
96 	struct bch_sb_field_members_v1 *mi1;
97 	struct bch_sb_field_members_v2 *mi2;
98 
99 	if (!bch2_sb_field_get(c->disk_sb.sb, members_v2)) {
100 		mi2 = bch2_sb_field_resize(&c->disk_sb, members_v2,
101 				DIV_ROUND_UP(sizeof(*mi2) +
102 					     sizeof(struct bch_member) * c->sb.nr_devices,
103 					     sizeof(u64)));
104 		mi1 = bch2_sb_field_get(c->disk_sb.sb, members_v1);
105 		memcpy(&mi2->_members[0], &mi1->_members[0],
106 		       BCH_MEMBER_V1_BYTES * c->sb.nr_devices);
107 		memset(&mi2->pad[0], 0, sizeof(mi2->pad));
108 		mi2->member_bytes = cpu_to_le16(BCH_MEMBER_V1_BYTES);
109 	}
110 
111 	return sb_members_v2_resize_entries(c);
112 }
113 
114 int bch2_sb_members_cpy_v2_v1(struct bch_sb_handle *disk_sb)
115 {
116 	struct bch_sb_field_members_v1 *mi1;
117 	struct bch_sb_field_members_v2 *mi2;
118 
119 	mi1 = bch2_sb_field_resize(disk_sb, members_v1,
120 			DIV_ROUND_UP(sizeof(*mi1) + BCH_MEMBER_V1_BYTES *
121 				     disk_sb->sb->nr_devices, sizeof(u64)));
122 	if (!mi1)
123 		return -BCH_ERR_ENOSPC_sb_members;
124 
125 	mi2 = bch2_sb_field_get(disk_sb->sb, members_v2);
126 
127 	for (unsigned i = 0; i < disk_sb->sb->nr_devices; i++)
128 		memcpy(members_v1_get_mut(mi1, i), __bch2_members_v2_get_mut(mi2, i), BCH_MEMBER_V1_BYTES);
129 
130 	return 0;
131 }
132 
133 static int validate_member(struct printbuf *err,
134 			   struct bch_member m,
135 			   struct bch_sb *sb,
136 			   int i)
137 {
138 	if (le64_to_cpu(m.nbuckets) > BCH_MEMBER_NBUCKETS_MAX) {
139 		prt_printf(err, "device %u: too many buckets (got %llu, max %u)",
140 			   i, le64_to_cpu(m.nbuckets), BCH_MEMBER_NBUCKETS_MAX);
141 		return -BCH_ERR_invalid_sb_members;
142 	}
143 
144 	if (le64_to_cpu(m.nbuckets) -
145 	    le16_to_cpu(m.first_bucket) < BCH_MIN_NR_NBUCKETS) {
146 		prt_printf(err, "device %u: not enough buckets (got %llu, max %u)",
147 			   i, le64_to_cpu(m.nbuckets), BCH_MIN_NR_NBUCKETS);
148 		return -BCH_ERR_invalid_sb_members;
149 	}
150 
151 	if (le16_to_cpu(m.bucket_size) <
152 	    le16_to_cpu(sb->block_size)) {
153 		prt_printf(err, "device %u: bucket size %u smaller than block size %u",
154 			   i, le16_to_cpu(m.bucket_size), le16_to_cpu(sb->block_size));
155 		return -BCH_ERR_invalid_sb_members;
156 	}
157 
158 	if (le16_to_cpu(m.bucket_size) <
159 	    BCH_SB_BTREE_NODE_SIZE(sb)) {
160 		prt_printf(err, "device %u: bucket size %u smaller than btree node size %llu",
161 			   i, le16_to_cpu(m.bucket_size), BCH_SB_BTREE_NODE_SIZE(sb));
162 		return -BCH_ERR_invalid_sb_members;
163 	}
164 
165 	return 0;
166 }
167 
168 static void member_to_text(struct printbuf *out,
169 			   struct bch_member m,
170 			   struct bch_sb_field_disk_groups *gi,
171 			   struct bch_sb *sb,
172 			   int i)
173 {
174 	unsigned data_have = bch2_sb_dev_has_data(sb, i);
175 	u64 bucket_size = le16_to_cpu(m.bucket_size);
176 	u64 device_size = le64_to_cpu(m.nbuckets) * bucket_size;
177 
178 	if (!bch2_member_alive(&m))
179 		return;
180 
181 	prt_printf(out, "Device:\t%u\n", i);
182 
183 	printbuf_indent_add(out, 2);
184 
185 	prt_printf(out, "Label:\t");
186 	if (BCH_MEMBER_GROUP(&m)) {
187 		unsigned idx = BCH_MEMBER_GROUP(&m) - 1;
188 
189 		if (idx < disk_groups_nr(gi))
190 			prt_printf(out, "%s (%u)",
191 				   gi->entries[idx].label, idx);
192 		else
193 			prt_printf(out, "(bad disk labels section)");
194 	} else {
195 		prt_printf(out, "(none)");
196 	}
197 	prt_newline(out);
198 
199 	prt_printf(out, "UUID:\t");
200 	pr_uuid(out, m.uuid.b);
201 	prt_newline(out);
202 
203 	prt_printf(out, "Size:\t");
204 	prt_units_u64(out, device_size << 9);
205 	prt_newline(out);
206 
207 	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
208 		prt_printf(out, "%s errors:\t%llu\n", bch2_member_error_strs[i], le64_to_cpu(m.errors[i]));
209 
210 	for (unsigned i = 0; i < BCH_IOPS_NR; i++)
211 		prt_printf(out, "%s iops:\t%u\n", bch2_iops_measurements[i], le32_to_cpu(m.iops[i]));
212 
213 	prt_printf(out, "Bucket size:\t");
214 	prt_units_u64(out, bucket_size << 9);
215 	prt_newline(out);
216 
217 	prt_printf(out, "First bucket:\t%u\n", le16_to_cpu(m.first_bucket));
218 	prt_printf(out, "Buckets:\t%llu\n", le64_to_cpu(m.nbuckets));
219 
220 	prt_printf(out, "Last mount:\t");
221 	if (m.last_mount)
222 		bch2_prt_datetime(out, le64_to_cpu(m.last_mount));
223 	else
224 		prt_printf(out, "(never)");
225 	prt_newline(out);
226 
227 	prt_printf(out, "Last superblock write:\t%llu\n", le64_to_cpu(m.seq));
228 
229 	prt_printf(out, "State:\t%s\n",
230 		   BCH_MEMBER_STATE(&m) < BCH_MEMBER_STATE_NR
231 		   ? bch2_member_states[BCH_MEMBER_STATE(&m)]
232 		   : "unknown");
233 
234 	prt_printf(out, "Data allowed:\t");
235 	if (BCH_MEMBER_DATA_ALLOWED(&m))
236 		prt_bitflags(out, __bch2_data_types, BCH_MEMBER_DATA_ALLOWED(&m));
237 	else
238 		prt_printf(out, "(none)");
239 	prt_newline(out);
240 
241 	prt_printf(out, "Has data:\t");
242 	if (data_have)
243 		prt_bitflags(out, __bch2_data_types, data_have);
244 	else
245 		prt_printf(out, "(none)");
246 	prt_newline(out);
247 
248 	prt_printf(out, "Btree allocated bitmap blocksize:\t");
249 	prt_units_u64(out, 1ULL << m.btree_bitmap_shift);
250 	prt_newline(out);
251 
252 	prt_printf(out, "Btree allocated bitmap:\t");
253 	bch2_prt_u64_base2_nbits(out, le64_to_cpu(m.btree_allocated_bitmap), 64);
254 	prt_newline(out);
255 
256 	prt_printf(out, "Durability:\t%llu\n", BCH_MEMBER_DURABILITY(&m) ? BCH_MEMBER_DURABILITY(&m) - 1 : 1);
257 
258 	prt_printf(out, "Discard:\t%llu\n", BCH_MEMBER_DISCARD(&m));
259 	prt_printf(out, "Freespace initialized:\t%llu\n", BCH_MEMBER_FREESPACE_INITIALIZED(&m));
260 
261 	printbuf_indent_sub(out, 2);
262 }
263 
264 static int bch2_sb_members_v1_validate(struct bch_sb *sb, struct bch_sb_field *f,
265 				enum bch_validate_flags flags, struct printbuf *err)
266 {
267 	struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
268 	unsigned i;
269 
270 	if ((void *) members_v1_get_mut(mi, sb->nr_devices) > vstruct_end(&mi->field)) {
271 		prt_printf(err, "too many devices for section size");
272 		return -BCH_ERR_invalid_sb_members;
273 	}
274 
275 	for (i = 0; i < sb->nr_devices; i++) {
276 		struct bch_member m = members_v1_get(mi, i);
277 
278 		int ret = validate_member(err, m, sb, i);
279 		if (ret)
280 			return ret;
281 	}
282 
283 	return 0;
284 }
285 
286 static void bch2_sb_members_v1_to_text(struct printbuf *out, struct bch_sb *sb,
287 				       struct bch_sb_field *f)
288 {
289 	struct bch_sb_field_members_v1 *mi = field_to_type(f, members_v1);
290 	struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
291 	unsigned i;
292 
293 	for (i = 0; i < sb->nr_devices; i++)
294 		member_to_text(out, members_v1_get(mi, i), gi, sb, i);
295 }
296 
297 const struct bch_sb_field_ops bch_sb_field_ops_members_v1 = {
298 	.validate	= bch2_sb_members_v1_validate,
299 	.to_text	= bch2_sb_members_v1_to_text,
300 };
301 
302 static void bch2_sb_members_v2_to_text(struct printbuf *out, struct bch_sb *sb,
303 				       struct bch_sb_field *f)
304 {
305 	struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
306 	struct bch_sb_field_disk_groups *gi = bch2_sb_field_get(sb, disk_groups);
307 	unsigned i;
308 
309 	for (i = 0; i < sb->nr_devices; i++)
310 		member_to_text(out, members_v2_get(mi, i), gi, sb, i);
311 }
312 
313 static int bch2_sb_members_v2_validate(struct bch_sb *sb, struct bch_sb_field *f,
314 				enum bch_validate_flags flags, struct printbuf *err)
315 {
316 	struct bch_sb_field_members_v2 *mi = field_to_type(f, members_v2);
317 	size_t mi_bytes = (void *) __bch2_members_v2_get_mut(mi, sb->nr_devices) -
318 		(void *) mi;
319 
320 	if (mi_bytes > vstruct_bytes(&mi->field)) {
321 		prt_printf(err, "section too small (%zu > %zu)",
322 			   mi_bytes, vstruct_bytes(&mi->field));
323 		return -BCH_ERR_invalid_sb_members;
324 	}
325 
326 	for (unsigned i = 0; i < sb->nr_devices; i++) {
327 		int ret = validate_member(err, members_v2_get(mi, i), sb, i);
328 		if (ret)
329 			return ret;
330 	}
331 
332 	return 0;
333 }
334 
335 const struct bch_sb_field_ops bch_sb_field_ops_members_v2 = {
336 	.validate	= bch2_sb_members_v2_validate,
337 	.to_text	= bch2_sb_members_v2_to_text,
338 };
339 
340 void bch2_sb_members_from_cpu(struct bch_fs *c)
341 {
342 	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
343 
344 	rcu_read_lock();
345 	for_each_member_device_rcu(c, ca, NULL) {
346 		struct bch_member *m = __bch2_members_v2_get_mut(mi, ca->dev_idx);
347 
348 		for (unsigned e = 0; e < BCH_MEMBER_ERROR_NR; e++)
349 			m->errors[e] = cpu_to_le64(atomic64_read(&ca->errors[e]));
350 	}
351 	rcu_read_unlock();
352 }
353 
354 void bch2_dev_io_errors_to_text(struct printbuf *out, struct bch_dev *ca)
355 {
356 	struct bch_fs *c = ca->fs;
357 	struct bch_member m;
358 
359 	mutex_lock(&ca->fs->sb_lock);
360 	m = bch2_sb_member_get(c->disk_sb.sb, ca->dev_idx);
361 	mutex_unlock(&ca->fs->sb_lock);
362 
363 	printbuf_tabstop_push(out, 12);
364 
365 	prt_str(out, "IO errors since filesystem creation");
366 	prt_newline(out);
367 
368 	printbuf_indent_add(out, 2);
369 	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
370 		prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i], atomic64_read(&ca->errors[i]));
371 	printbuf_indent_sub(out, 2);
372 
373 	prt_str(out, "IO errors since ");
374 	bch2_pr_time_units(out, (ktime_get_real_seconds() - le64_to_cpu(m.errors_reset_time)) * NSEC_PER_SEC);
375 	prt_str(out, " ago");
376 	prt_newline(out);
377 
378 	printbuf_indent_add(out, 2);
379 	for (unsigned i = 0; i < BCH_MEMBER_ERROR_NR; i++)
380 		prt_printf(out, "%s:\t%llu\n", bch2_member_error_strs[i],
381 			   atomic64_read(&ca->errors[i]) - le64_to_cpu(m.errors_at_reset[i]));
382 	printbuf_indent_sub(out, 2);
383 }
384 
385 void bch2_dev_errors_reset(struct bch_dev *ca)
386 {
387 	struct bch_fs *c = ca->fs;
388 	struct bch_member *m;
389 
390 	mutex_lock(&c->sb_lock);
391 	m = bch2_members_v2_get_mut(c->disk_sb.sb, ca->dev_idx);
392 	for (unsigned i = 0; i < ARRAY_SIZE(m->errors_at_reset); i++)
393 		m->errors_at_reset[i] = cpu_to_le64(atomic64_read(&ca->errors[i]));
394 	m->errors_reset_time = cpu_to_le64(ktime_get_real_seconds());
395 
396 	bch2_write_super(c);
397 	mutex_unlock(&c->sb_lock);
398 }
399 
400 /*
401  * Per member "range has btree nodes" bitmap:
402  *
403  * This is so that if we ever have to run the btree node scan to repair we don't
404  * have to scan full devices:
405  */
406 
407 bool bch2_dev_btree_bitmap_marked(struct bch_fs *c, struct bkey_s_c k)
408 {
409 	bool ret = true;
410 	rcu_read_lock();
411 	bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
412 		struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev);
413 		if (!ca)
414 			continue;
415 
416 		if (!bch2_dev_btree_bitmap_marked_sectors(ca, ptr->offset, btree_sectors(c))) {
417 			ret = false;
418 			break;
419 		}
420 	}
421 	rcu_read_unlock();
422 	return ret;
423 }
424 
425 static void __bch2_dev_btree_bitmap_mark(struct bch_sb_field_members_v2 *mi, unsigned dev,
426 				u64 start, unsigned sectors)
427 {
428 	struct bch_member *m = __bch2_members_v2_get_mut(mi, dev);
429 	u64 bitmap = le64_to_cpu(m->btree_allocated_bitmap);
430 
431 	u64 end = start + sectors;
432 
433 	int resize = ilog2(roundup_pow_of_two(end)) - (m->btree_bitmap_shift + 6);
434 	if (resize > 0) {
435 		u64 new_bitmap = 0;
436 
437 		for (unsigned i = 0; i < 64; i++)
438 			if (bitmap & BIT_ULL(i))
439 				new_bitmap |= BIT_ULL(i >> resize);
440 		bitmap = new_bitmap;
441 		m->btree_bitmap_shift += resize;
442 	}
443 
444 	BUG_ON(m->btree_bitmap_shift > 57);
445 	BUG_ON(end > 64ULL << m->btree_bitmap_shift);
446 
447 	for (unsigned bit = start >> m->btree_bitmap_shift;
448 	     (u64) bit << m->btree_bitmap_shift < end;
449 	     bit++)
450 		bitmap |= BIT_ULL(bit);
451 
452 	m->btree_allocated_bitmap = cpu_to_le64(bitmap);
453 }
454 
455 void bch2_dev_btree_bitmap_mark(struct bch_fs *c, struct bkey_s_c k)
456 {
457 	lockdep_assert_held(&c->sb_lock);
458 
459 	struct bch_sb_field_members_v2 *mi = bch2_sb_field_get(c->disk_sb.sb, members_v2);
460 	bkey_for_each_ptr(bch2_bkey_ptrs_c(k), ptr) {
461 		if (!bch2_member_exists(c->disk_sb.sb, ptr->dev))
462 			continue;
463 
464 		__bch2_dev_btree_bitmap_mark(mi, ptr->dev, ptr->offset, btree_sectors(c));
465 	}
466 }
467