1 // SPDX-License-Identifier: GPL-2.0
2
3 #include "bcachefs.h"
4 #include "buckets.h"
5 #include "disk_accounting.h"
6 #include "journal.h"
7 #include "replicas.h"
8 #include "super-io.h"
9
10 #include <linux/sort.h>
11
12 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *,
13 struct bch_replicas_cpu *);
14
15 /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */
bch2_memcmp(const void * l,const void * r,const void * priv)16 static int bch2_memcmp(const void *l, const void *r, const void *priv)
17 {
18 size_t size = (size_t) priv;
19 return memcmp(l, r, size);
20 }
21
22 /* Replicas tracking - in memory: */
23
verify_replicas_entry(struct bch_replicas_entry_v1 * e)24 static void verify_replicas_entry(struct bch_replicas_entry_v1 *e)
25 {
26 #ifdef CONFIG_BCACHEFS_DEBUG
27 BUG_ON(!e->nr_devs);
28 BUG_ON(e->nr_required > 1 &&
29 e->nr_required >= e->nr_devs);
30
31 for (unsigned i = 0; i + 1 < e->nr_devs; i++)
32 BUG_ON(e->devs[i] >= e->devs[i + 1]);
33 #endif
34 }
35
bch2_replicas_entry_sort(struct bch_replicas_entry_v1 * e)36 void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e)
37 {
38 bubble_sort(e->devs, e->nr_devs, u8_cmp);
39 }
40
bch2_cpu_replicas_sort(struct bch_replicas_cpu * r)41 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r)
42 {
43 eytzinger0_sort_r(r->entries, r->nr, r->entry_size,
44 bch2_memcmp, NULL, (void *)(size_t)r->entry_size);
45 }
46
bch2_replicas_entry_v0_to_text(struct printbuf * out,struct bch_replicas_entry_v0 * e)47 static void bch2_replicas_entry_v0_to_text(struct printbuf *out,
48 struct bch_replicas_entry_v0 *e)
49 {
50 bch2_prt_data_type(out, e->data_type);
51
52 prt_printf(out, ": %u [", e->nr_devs);
53 for (unsigned i = 0; i < e->nr_devs; i++)
54 prt_printf(out, i ? " %u" : "%u", e->devs[i]);
55 prt_printf(out, "]");
56 }
57
bch2_replicas_entry_to_text(struct printbuf * out,struct bch_replicas_entry_v1 * e)58 void bch2_replicas_entry_to_text(struct printbuf *out,
59 struct bch_replicas_entry_v1 *e)
60 {
61 bch2_prt_data_type(out, e->data_type);
62
63 prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs);
64 for (unsigned i = 0; i < e->nr_devs; i++)
65 prt_printf(out, i ? " %u" : "%u", e->devs[i]);
66 prt_printf(out, "]");
67 }
68
bch2_replicas_entry_validate(struct bch_replicas_entry_v1 * r,struct bch_sb * sb,struct printbuf * err)69 int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
70 struct bch_sb *sb,
71 struct printbuf *err)
72 {
73 if (!r->nr_devs) {
74 prt_printf(err, "no devices in entry ");
75 goto bad;
76 }
77
78 if (r->nr_required > 1 &&
79 r->nr_required >= r->nr_devs) {
80 prt_printf(err, "bad nr_required in entry ");
81 goto bad;
82 }
83
84 for (unsigned i = 0; i < r->nr_devs; i++)
85 if (r->devs[i] != BCH_SB_MEMBER_INVALID &&
86 !bch2_member_exists(sb, r->devs[i])) {
87 prt_printf(err, "invalid device %u in entry ", r->devs[i]);
88 goto bad;
89 }
90
91 return 0;
92 bad:
93 bch2_replicas_entry_to_text(err, r);
94 return -BCH_ERR_invalid_replicas_entry;
95 }
96
bch2_cpu_replicas_to_text(struct printbuf * out,struct bch_replicas_cpu * r)97 void bch2_cpu_replicas_to_text(struct printbuf *out,
98 struct bch_replicas_cpu *r)
99 {
100 struct bch_replicas_entry_v1 *e;
101 bool first = true;
102
103 for_each_cpu_replicas_entry(r, e) {
104 if (!first)
105 prt_printf(out, " ");
106 first = false;
107
108 bch2_replicas_entry_to_text(out, e);
109 }
110 }
111
extent_to_replicas(struct bkey_s_c k,struct bch_replicas_entry_v1 * r)112 static void extent_to_replicas(struct bkey_s_c k,
113 struct bch_replicas_entry_v1 *r)
114 {
115 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k);
116 const union bch_extent_entry *entry;
117 struct extent_ptr_decoded p;
118
119 r->nr_required = 1;
120
121 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) {
122 if (p.ptr.cached)
123 continue;
124
125 if (!p.has_ec)
126 replicas_entry_add_dev(r, p.ptr.dev);
127 else
128 r->nr_required = 0;
129 }
130 }
131
stripe_to_replicas(struct bkey_s_c k,struct bch_replicas_entry_v1 * r)132 static void stripe_to_replicas(struct bkey_s_c k,
133 struct bch_replicas_entry_v1 *r)
134 {
135 struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k);
136 const struct bch_extent_ptr *ptr;
137
138 r->nr_required = s.v->nr_blocks - s.v->nr_redundant;
139
140 for (ptr = s.v->ptrs;
141 ptr < s.v->ptrs + s.v->nr_blocks;
142 ptr++)
143 replicas_entry_add_dev(r, ptr->dev);
144 }
145
bch2_bkey_to_replicas(struct bch_replicas_entry_v1 * e,struct bkey_s_c k)146 void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e,
147 struct bkey_s_c k)
148 {
149 e->nr_devs = 0;
150
151 switch (k.k->type) {
152 case KEY_TYPE_btree_ptr:
153 case KEY_TYPE_btree_ptr_v2:
154 e->data_type = BCH_DATA_btree;
155 extent_to_replicas(k, e);
156 break;
157 case KEY_TYPE_extent:
158 case KEY_TYPE_reflink_v:
159 e->data_type = BCH_DATA_user;
160 extent_to_replicas(k, e);
161 break;
162 case KEY_TYPE_stripe:
163 e->data_type = BCH_DATA_parity;
164 stripe_to_replicas(k, e);
165 break;
166 }
167
168 bch2_replicas_entry_sort(e);
169 }
170
bch2_devlist_to_replicas(struct bch_replicas_entry_v1 * e,enum bch_data_type data_type,struct bch_devs_list devs)171 void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e,
172 enum bch_data_type data_type,
173 struct bch_devs_list devs)
174 {
175 BUG_ON(!data_type ||
176 data_type == BCH_DATA_sb ||
177 data_type >= BCH_DATA_NR);
178
179 e->data_type = data_type;
180 e->nr_devs = 0;
181 e->nr_required = 1;
182
183 darray_for_each(devs, i)
184 replicas_entry_add_dev(e, *i);
185
186 bch2_replicas_entry_sort(e);
187 }
188
189 static struct bch_replicas_cpu
cpu_replicas_add_entry(struct bch_fs * c,struct bch_replicas_cpu * old,struct bch_replicas_entry_v1 * new_entry)190 cpu_replicas_add_entry(struct bch_fs *c,
191 struct bch_replicas_cpu *old,
192 struct bch_replicas_entry_v1 *new_entry)
193 {
194 struct bch_replicas_cpu new = {
195 .nr = old->nr + 1,
196 .entry_size = max_t(unsigned, old->entry_size,
197 replicas_entry_bytes(new_entry)),
198 };
199
200 new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL);
201 if (!new.entries)
202 return new;
203
204 for (unsigned i = 0; i < old->nr; i++)
205 memcpy(cpu_replicas_entry(&new, i),
206 cpu_replicas_entry(old, i),
207 old->entry_size);
208
209 memcpy(cpu_replicas_entry(&new, old->nr),
210 new_entry,
211 replicas_entry_bytes(new_entry));
212
213 bch2_cpu_replicas_sort(&new);
214 return new;
215 }
216
__replicas_entry_idx(struct bch_replicas_cpu * r,struct bch_replicas_entry_v1 * search)217 static inline int __replicas_entry_idx(struct bch_replicas_cpu *r,
218 struct bch_replicas_entry_v1 *search)
219 {
220 int idx, entry_size = replicas_entry_bytes(search);
221
222 if (unlikely(entry_size > r->entry_size))
223 return -1;
224
225 #define entry_cmp(_l, _r) memcmp(_l, _r, entry_size)
226 idx = eytzinger0_find(r->entries, r->nr, r->entry_size,
227 entry_cmp, search);
228 #undef entry_cmp
229
230 return idx < r->nr ? idx : -1;
231 }
232
bch2_replicas_entry_idx(struct bch_fs * c,struct bch_replicas_entry_v1 * search)233 int bch2_replicas_entry_idx(struct bch_fs *c,
234 struct bch_replicas_entry_v1 *search)
235 {
236 bch2_replicas_entry_sort(search);
237
238 return __replicas_entry_idx(&c->replicas, search);
239 }
240
__replicas_has_entry(struct bch_replicas_cpu * r,struct bch_replicas_entry_v1 * search)241 static bool __replicas_has_entry(struct bch_replicas_cpu *r,
242 struct bch_replicas_entry_v1 *search)
243 {
244 return __replicas_entry_idx(r, search) >= 0;
245 }
246
bch2_replicas_marked_locked(struct bch_fs * c,struct bch_replicas_entry_v1 * search)247 bool bch2_replicas_marked_locked(struct bch_fs *c,
248 struct bch_replicas_entry_v1 *search)
249 {
250 verify_replicas_entry(search);
251
252 return !search->nr_devs ||
253 (__replicas_has_entry(&c->replicas, search) &&
254 (likely((!c->replicas_gc.entries)) ||
255 __replicas_has_entry(&c->replicas_gc, search)));
256 }
257
bch2_replicas_marked(struct bch_fs * c,struct bch_replicas_entry_v1 * search)258 bool bch2_replicas_marked(struct bch_fs *c,
259 struct bch_replicas_entry_v1 *search)
260 {
261 percpu_down_read(&c->mark_lock);
262 bool ret = bch2_replicas_marked_locked(c, search);
263 percpu_up_read(&c->mark_lock);
264
265 return ret;
266 }
267
268 noinline
bch2_mark_replicas_slowpath(struct bch_fs * c,struct bch_replicas_entry_v1 * new_entry)269 static int bch2_mark_replicas_slowpath(struct bch_fs *c,
270 struct bch_replicas_entry_v1 *new_entry)
271 {
272 struct bch_replicas_cpu new_r, new_gc;
273 int ret = 0;
274
275 verify_replicas_entry(new_entry);
276
277 memset(&new_r, 0, sizeof(new_r));
278 memset(&new_gc, 0, sizeof(new_gc));
279
280 mutex_lock(&c->sb_lock);
281
282 if (c->replicas_gc.entries &&
283 !__replicas_has_entry(&c->replicas_gc, new_entry)) {
284 new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry);
285 if (!new_gc.entries) {
286 ret = -BCH_ERR_ENOMEM_cpu_replicas;
287 goto err;
288 }
289 }
290
291 if (!__replicas_has_entry(&c->replicas, new_entry)) {
292 new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry);
293 if (!new_r.entries) {
294 ret = -BCH_ERR_ENOMEM_cpu_replicas;
295 goto err;
296 }
297
298 ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r);
299 if (ret)
300 goto err;
301 }
302
303 if (!new_r.entries &&
304 !new_gc.entries)
305 goto out;
306
307 /* allocations done, now commit: */
308
309 if (new_r.entries)
310 bch2_write_super(c);
311
312 /* don't update in memory replicas until changes are persistent */
313 percpu_down_write(&c->mark_lock);
314 if (new_r.entries)
315 swap(c->replicas, new_r);
316 if (new_gc.entries)
317 swap(new_gc, c->replicas_gc);
318 percpu_up_write(&c->mark_lock);
319 out:
320 mutex_unlock(&c->sb_lock);
321
322 kfree(new_r.entries);
323 kfree(new_gc.entries);
324
325 return ret;
326 err:
327 bch_err_msg(c, ret, "adding replicas entry");
328 goto out;
329 }
330
bch2_mark_replicas(struct bch_fs * c,struct bch_replicas_entry_v1 * r)331 int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r)
332 {
333 return likely(bch2_replicas_marked(c, r))
334 ? 0 : bch2_mark_replicas_slowpath(c, r);
335 }
336
337 /*
338 * Old replicas_gc mechanism: only used for journal replicas entries now, should
339 * die at some point:
340 */
341
bch2_replicas_gc_end(struct bch_fs * c,int ret)342 int bch2_replicas_gc_end(struct bch_fs *c, int ret)
343 {
344 lockdep_assert_held(&c->replicas_gc_lock);
345
346 mutex_lock(&c->sb_lock);
347 percpu_down_write(&c->mark_lock);
348
349 ret = ret ?:
350 bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc);
351 if (!ret)
352 swap(c->replicas, c->replicas_gc);
353
354 kfree(c->replicas_gc.entries);
355 c->replicas_gc.entries = NULL;
356
357 percpu_up_write(&c->mark_lock);
358
359 if (!ret)
360 bch2_write_super(c);
361
362 mutex_unlock(&c->sb_lock);
363
364 return ret;
365 }
366
bch2_replicas_gc_start(struct bch_fs * c,unsigned typemask)367 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask)
368 {
369 struct bch_replicas_entry_v1 *e;
370 unsigned i = 0;
371
372 lockdep_assert_held(&c->replicas_gc_lock);
373
374 mutex_lock(&c->sb_lock);
375 BUG_ON(c->replicas_gc.entries);
376
377 c->replicas_gc.nr = 0;
378 c->replicas_gc.entry_size = 0;
379
380 for_each_cpu_replicas_entry(&c->replicas, e) {
381 /* Preserve unknown data types */
382 if (e->data_type >= BCH_DATA_NR ||
383 !((1 << e->data_type) & typemask)) {
384 c->replicas_gc.nr++;
385 c->replicas_gc.entry_size =
386 max_t(unsigned, c->replicas_gc.entry_size,
387 replicas_entry_bytes(e));
388 }
389 }
390
391 c->replicas_gc.entries = kcalloc(c->replicas_gc.nr,
392 c->replicas_gc.entry_size,
393 GFP_KERNEL);
394 if (!c->replicas_gc.entries) {
395 mutex_unlock(&c->sb_lock);
396 bch_err(c, "error allocating c->replicas_gc");
397 return -BCH_ERR_ENOMEM_replicas_gc;
398 }
399
400 for_each_cpu_replicas_entry(&c->replicas, e)
401 if (e->data_type >= BCH_DATA_NR ||
402 !((1 << e->data_type) & typemask))
403 memcpy(cpu_replicas_entry(&c->replicas_gc, i++),
404 e, c->replicas_gc.entry_size);
405
406 bch2_cpu_replicas_sort(&c->replicas_gc);
407 mutex_unlock(&c->sb_lock);
408
409 return 0;
410 }
411
412 /*
413 * New much simpler mechanism for clearing out unneeded replicas entries - drop
414 * replicas entries that have 0 sectors used.
415 *
416 * However, we don't track sector counts for journal usage, so this doesn't drop
417 * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism
418 * is retained for that.
419 */
bch2_replicas_gc2(struct bch_fs * c)420 int bch2_replicas_gc2(struct bch_fs *c)
421 {
422 struct bch_replicas_cpu new = { 0 };
423 unsigned nr;
424 int ret = 0;
425
426 bch2_accounting_mem_gc(c);
427 retry:
428 nr = READ_ONCE(c->replicas.nr);
429 new.entry_size = READ_ONCE(c->replicas.entry_size);
430 new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL);
431 if (!new.entries) {
432 bch_err(c, "error allocating c->replicas_gc");
433 return -BCH_ERR_ENOMEM_replicas_gc;
434 }
435
436 mutex_lock(&c->sb_lock);
437 percpu_down_write(&c->mark_lock);
438
439 if (nr != c->replicas.nr ||
440 new.entry_size != c->replicas.entry_size) {
441 percpu_up_write(&c->mark_lock);
442 mutex_unlock(&c->sb_lock);
443 kfree(new.entries);
444 goto retry;
445 }
446
447 for (unsigned i = 0; i < c->replicas.nr; i++) {
448 struct bch_replicas_entry_v1 *e =
449 cpu_replicas_entry(&c->replicas, i);
450
451 struct disk_accounting_pos k = {
452 .type = BCH_DISK_ACCOUNTING_replicas,
453 };
454
455 unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e),
456 "embedded variable length struct");
457
458 struct bpos p = disk_accounting_pos_to_bpos(&k);
459
460 struct bch_accounting_mem *acc = &c->accounting;
461 bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]),
462 accounting_pos_cmp, &p) >= acc->k.nr;
463
464 if (e->data_type == BCH_DATA_journal || !kill)
465 memcpy(cpu_replicas_entry(&new, new.nr++),
466 e, new.entry_size);
467 }
468
469 bch2_cpu_replicas_sort(&new);
470
471 ret = bch2_cpu_replicas_to_sb_replicas(c, &new);
472
473 if (!ret)
474 swap(c->replicas, new);
475
476 kfree(new.entries);
477
478 percpu_up_write(&c->mark_lock);
479
480 if (!ret)
481 bch2_write_super(c);
482
483 mutex_unlock(&c->sb_lock);
484
485 return ret;
486 }
487
488 /* Replicas tracking - superblock: */
489
490 static int
__bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas * sb_r,struct bch_replicas_cpu * cpu_r)491 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r,
492 struct bch_replicas_cpu *cpu_r)
493 {
494 struct bch_replicas_entry_v1 *e, *dst;
495 unsigned nr = 0, entry_size = 0, idx = 0;
496
497 for_each_replicas_entry(sb_r, e) {
498 entry_size = max_t(unsigned, entry_size,
499 replicas_entry_bytes(e));
500 nr++;
501 }
502
503 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL);
504 if (!cpu_r->entries)
505 return -BCH_ERR_ENOMEM_cpu_replicas;
506
507 cpu_r->nr = nr;
508 cpu_r->entry_size = entry_size;
509
510 for_each_replicas_entry(sb_r, e) {
511 dst = cpu_replicas_entry(cpu_r, idx++);
512 memcpy(dst, e, replicas_entry_bytes(e));
513 bch2_replicas_entry_sort(dst);
514 }
515
516 return 0;
517 }
518
519 static int
__bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 * sb_r,struct bch_replicas_cpu * cpu_r)520 __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r,
521 struct bch_replicas_cpu *cpu_r)
522 {
523 struct bch_replicas_entry_v0 *e;
524 unsigned nr = 0, entry_size = 0, idx = 0;
525
526 for_each_replicas_entry(sb_r, e) {
527 entry_size = max_t(unsigned, entry_size,
528 replicas_entry_bytes(e));
529 nr++;
530 }
531
532 entry_size += sizeof(struct bch_replicas_entry_v1) -
533 sizeof(struct bch_replicas_entry_v0);
534
535 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL);
536 if (!cpu_r->entries)
537 return -BCH_ERR_ENOMEM_cpu_replicas;
538
539 cpu_r->nr = nr;
540 cpu_r->entry_size = entry_size;
541
542 for_each_replicas_entry(sb_r, e) {
543 struct bch_replicas_entry_v1 *dst =
544 cpu_replicas_entry(cpu_r, idx++);
545
546 dst->data_type = e->data_type;
547 dst->nr_devs = e->nr_devs;
548 dst->nr_required = 1;
549 memcpy(dst->devs, e->devs, e->nr_devs);
550 bch2_replicas_entry_sort(dst);
551 }
552
553 return 0;
554 }
555
bch2_sb_replicas_to_cpu_replicas(struct bch_fs * c)556 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c)
557 {
558 struct bch_sb_field_replicas *sb_v1;
559 struct bch_sb_field_replicas_v0 *sb_v0;
560 struct bch_replicas_cpu new_r = { 0, 0, NULL };
561 int ret = 0;
562
563 if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas)))
564 ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r);
565 else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0)))
566 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r);
567 if (ret)
568 return ret;
569
570 bch2_cpu_replicas_sort(&new_r);
571
572 percpu_down_write(&c->mark_lock);
573 swap(c->replicas, new_r);
574 percpu_up_write(&c->mark_lock);
575
576 kfree(new_r.entries);
577
578 return 0;
579 }
580
bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs * c,struct bch_replicas_cpu * r)581 static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c,
582 struct bch_replicas_cpu *r)
583 {
584 struct bch_sb_field_replicas_v0 *sb_r;
585 struct bch_replicas_entry_v0 *dst;
586 struct bch_replicas_entry_v1 *src;
587 size_t bytes;
588
589 bytes = sizeof(struct bch_sb_field_replicas);
590
591 for_each_cpu_replicas_entry(r, src)
592 bytes += replicas_entry_bytes(src) - 1;
593
594 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0,
595 DIV_ROUND_UP(bytes, sizeof(u64)));
596 if (!sb_r)
597 return -BCH_ERR_ENOSPC_sb_replicas;
598
599 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas);
600 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0);
601
602 memset(&sb_r->entries, 0,
603 vstruct_end(&sb_r->field) -
604 (void *) &sb_r->entries);
605
606 dst = sb_r->entries;
607 for_each_cpu_replicas_entry(r, src) {
608 dst->data_type = src->data_type;
609 dst->nr_devs = src->nr_devs;
610 memcpy(dst->devs, src->devs, src->nr_devs);
611
612 dst = replicas_entry_next(dst);
613
614 BUG_ON((void *) dst > vstruct_end(&sb_r->field));
615 }
616
617 return 0;
618 }
619
bch2_cpu_replicas_to_sb_replicas(struct bch_fs * c,struct bch_replicas_cpu * r)620 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c,
621 struct bch_replicas_cpu *r)
622 {
623 struct bch_sb_field_replicas *sb_r;
624 struct bch_replicas_entry_v1 *dst, *src;
625 bool need_v1 = false;
626 size_t bytes;
627
628 bytes = sizeof(struct bch_sb_field_replicas);
629
630 for_each_cpu_replicas_entry(r, src) {
631 bytes += replicas_entry_bytes(src);
632 if (src->nr_required != 1)
633 need_v1 = true;
634 }
635
636 if (!need_v1)
637 return bch2_cpu_replicas_to_sb_replicas_v0(c, r);
638
639 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas,
640 DIV_ROUND_UP(bytes, sizeof(u64)));
641 if (!sb_r)
642 return -BCH_ERR_ENOSPC_sb_replicas;
643
644 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0);
645 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas);
646
647 memset(&sb_r->entries, 0,
648 vstruct_end(&sb_r->field) -
649 (void *) &sb_r->entries);
650
651 dst = sb_r->entries;
652 for_each_cpu_replicas_entry(r, src) {
653 memcpy(dst, src, replicas_entry_bytes(src));
654
655 dst = replicas_entry_next(dst);
656
657 BUG_ON((void *) dst > vstruct_end(&sb_r->field));
658 }
659
660 return 0;
661 }
662
bch2_cpu_replicas_validate(struct bch_replicas_cpu * cpu_r,struct bch_sb * sb,struct printbuf * err)663 static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r,
664 struct bch_sb *sb,
665 struct printbuf *err)
666 {
667 unsigned i;
668
669 sort_r(cpu_r->entries,
670 cpu_r->nr,
671 cpu_r->entry_size,
672 bch2_memcmp, NULL,
673 (void *)(size_t)cpu_r->entry_size);
674
675 for (i = 0; i < cpu_r->nr; i++) {
676 struct bch_replicas_entry_v1 *e =
677 cpu_replicas_entry(cpu_r, i);
678
679 int ret = bch2_replicas_entry_validate(e, sb, err);
680 if (ret)
681 return ret;
682
683 if (i + 1 < cpu_r->nr) {
684 struct bch_replicas_entry_v1 *n =
685 cpu_replicas_entry(cpu_r, i + 1);
686
687 BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0);
688
689 if (!memcmp(e, n, cpu_r->entry_size)) {
690 prt_printf(err, "duplicate replicas entry ");
691 bch2_replicas_entry_to_text(err, e);
692 return -BCH_ERR_invalid_sb_replicas;
693 }
694 }
695 }
696
697 return 0;
698 }
699
bch2_sb_replicas_validate(struct bch_sb * sb,struct bch_sb_field * f,enum bch_validate_flags flags,struct printbuf * err)700 static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f,
701 enum bch_validate_flags flags, struct printbuf *err)
702 {
703 struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas);
704 struct bch_replicas_cpu cpu_r;
705 int ret;
706
707 ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r);
708 if (ret)
709 return ret;
710
711 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
712 kfree(cpu_r.entries);
713 return ret;
714 }
715
bch2_sb_replicas_to_text(struct printbuf * out,struct bch_sb * sb,struct bch_sb_field * f)716 static void bch2_sb_replicas_to_text(struct printbuf *out,
717 struct bch_sb *sb,
718 struct bch_sb_field *f)
719 {
720 struct bch_sb_field_replicas *r = field_to_type(f, replicas);
721 struct bch_replicas_entry_v1 *e;
722 bool first = true;
723
724 for_each_replicas_entry(r, e) {
725 if (!first)
726 prt_printf(out, " ");
727 first = false;
728
729 bch2_replicas_entry_to_text(out, e);
730 }
731 prt_newline(out);
732 }
733
734 const struct bch_sb_field_ops bch_sb_field_ops_replicas = {
735 .validate = bch2_sb_replicas_validate,
736 .to_text = bch2_sb_replicas_to_text,
737 };
738
bch2_sb_replicas_v0_validate(struct bch_sb * sb,struct bch_sb_field * f,enum bch_validate_flags flags,struct printbuf * err)739 static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f,
740 enum bch_validate_flags flags, struct printbuf *err)
741 {
742 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
743 struct bch_replicas_cpu cpu_r;
744 int ret;
745
746 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r);
747 if (ret)
748 return ret;
749
750 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err);
751 kfree(cpu_r.entries);
752 return ret;
753 }
754
bch2_sb_replicas_v0_to_text(struct printbuf * out,struct bch_sb * sb,struct bch_sb_field * f)755 static void bch2_sb_replicas_v0_to_text(struct printbuf *out,
756 struct bch_sb *sb,
757 struct bch_sb_field *f)
758 {
759 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0);
760 struct bch_replicas_entry_v0 *e;
761 bool first = true;
762
763 for_each_replicas_entry(sb_r, e) {
764 if (!first)
765 prt_printf(out, " ");
766 first = false;
767
768 bch2_replicas_entry_v0_to_text(out, e);
769 }
770 prt_newline(out);
771 }
772
773 const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = {
774 .validate = bch2_sb_replicas_v0_validate,
775 .to_text = bch2_sb_replicas_v0_to_text,
776 };
777
778 /* Query replicas: */
779
bch2_have_enough_devs(struct bch_fs * c,struct bch_devs_mask devs,unsigned flags,bool print)780 bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
781 unsigned flags, bool print)
782 {
783 struct bch_replicas_entry_v1 *e;
784 bool ret = true;
785
786 percpu_down_read(&c->mark_lock);
787 for_each_cpu_replicas_entry(&c->replicas, e) {
788 unsigned nr_online = 0, nr_failed = 0, dflags = 0;
789 bool metadata = e->data_type < BCH_DATA_user;
790
791 if (e->data_type == BCH_DATA_cached)
792 continue;
793
794 rcu_read_lock();
795 for (unsigned i = 0; i < e->nr_devs; i++) {
796 nr_online += test_bit(e->devs[i], devs.d);
797
798 struct bch_dev *ca = bch2_dev_rcu_noerror(c, e->devs[i]);
799 nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
800 }
801 rcu_read_unlock();
802
803 if (nr_online + nr_failed == e->nr_devs)
804 continue;
805
806 if (nr_online < e->nr_required)
807 dflags |= metadata
808 ? BCH_FORCE_IF_METADATA_LOST
809 : BCH_FORCE_IF_DATA_LOST;
810
811 if (nr_online < e->nr_devs)
812 dflags |= metadata
813 ? BCH_FORCE_IF_METADATA_DEGRADED
814 : BCH_FORCE_IF_DATA_DEGRADED;
815
816 if (dflags & ~flags) {
817 if (print) {
818 struct printbuf buf = PRINTBUF;
819
820 bch2_replicas_entry_to_text(&buf, e);
821 bch_err(c, "insufficient devices online (%u) for replicas entry %s",
822 nr_online, buf.buf);
823 printbuf_exit(&buf);
824 }
825 ret = false;
826 break;
827 }
828
829 }
830 percpu_up_read(&c->mark_lock);
831
832 return ret;
833 }
834
bch2_sb_dev_has_data(struct bch_sb * sb,unsigned dev)835 unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev)
836 {
837 struct bch_sb_field_replicas *replicas;
838 struct bch_sb_field_replicas_v0 *replicas_v0;
839 unsigned data_has = 0;
840
841 replicas = bch2_sb_field_get(sb, replicas);
842 replicas_v0 = bch2_sb_field_get(sb, replicas_v0);
843
844 if (replicas) {
845 struct bch_replicas_entry_v1 *r;
846
847 for_each_replicas_entry(replicas, r) {
848 if (r->data_type >= sizeof(data_has) * 8)
849 continue;
850
851 for (unsigned i = 0; i < r->nr_devs; i++)
852 if (r->devs[i] == dev)
853 data_has |= 1 << r->data_type;
854 }
855
856 } else if (replicas_v0) {
857 struct bch_replicas_entry_v0 *r;
858
859 for_each_replicas_entry_v0(replicas_v0, r) {
860 if (r->data_type >= sizeof(data_has) * 8)
861 continue;
862
863 for (unsigned i = 0; i < r->nr_devs; i++)
864 if (r->devs[i] == dev)
865 data_has |= 1 << r->data_type;
866 }
867 }
868
869
870 return data_has;
871 }
872
bch2_dev_has_data(struct bch_fs * c,struct bch_dev * ca)873 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca)
874 {
875 mutex_lock(&c->sb_lock);
876 unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx);
877 mutex_unlock(&c->sb_lock);
878
879 return ret;
880 }
881
bch2_fs_replicas_exit(struct bch_fs * c)882 void bch2_fs_replicas_exit(struct bch_fs *c)
883 {
884 kfree(c->replicas.entries);
885 kfree(c->replicas_gc.entries);
886 }
887