1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "buckets.h" 5 #include "disk_accounting.h" 6 #include "journal.h" 7 #include "replicas.h" 8 #include "super-io.h" 9 10 #include <linux/sort.h> 11 12 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, 13 struct bch_replicas_cpu *); 14 15 /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ 16 static int bch2_memcmp(const void *l, const void *r, const void *priv) 17 { 18 size_t size = (size_t) priv; 19 return memcmp(l, r, size); 20 } 21 22 /* Replicas tracking - in memory: */ 23 24 static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) 25 { 26 #ifdef CONFIG_BCACHEFS_DEBUG 27 BUG_ON(!e->nr_devs); 28 BUG_ON(e->nr_required > 1 && 29 e->nr_required >= e->nr_devs); 30 31 for (unsigned i = 0; i + 1 < e->nr_devs; i++) 32 BUG_ON(e->devs[i] >= e->devs[i + 1]); 33 #endif 34 } 35 36 void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) 37 { 38 bubble_sort(e->devs, e->nr_devs, u8_cmp); 39 } 40 41 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) 42 { 43 eytzinger0_sort_r(r->entries, r->nr, r->entry_size, 44 bch2_memcmp, NULL, (void *)(size_t)r->entry_size); 45 } 46 47 static void bch2_replicas_entry_v0_to_text(struct printbuf *out, 48 struct bch_replicas_entry_v0 *e) 49 { 50 bch2_prt_data_type(out, e->data_type); 51 52 prt_printf(out, ": %u [", e->nr_devs); 53 for (unsigned i = 0; i < e->nr_devs; i++) 54 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 55 prt_printf(out, "]"); 56 } 57 58 void bch2_replicas_entry_to_text(struct printbuf *out, 59 struct bch_replicas_entry_v1 *e) 60 { 61 bch2_prt_data_type(out, e->data_type); 62 63 prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); 64 for (unsigned i = 0; i < e->nr_devs; i++) 65 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 66 prt_printf(out, "]"); 67 } 68 69 int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, 70 struct bch_sb *sb, 71 struct printbuf *err) 72 { 73 if (!r->nr_devs) { 74 prt_printf(err, "no devices in entry "); 75 goto bad; 76 } 77 78 if (r->nr_required > 1 && 79 r->nr_required >= r->nr_devs) { 80 prt_printf(err, "bad nr_required in entry "); 81 goto bad; 82 } 83 84 for (unsigned i = 0; i < r->nr_devs; i++) 85 if (r->devs[i] != BCH_SB_MEMBER_INVALID && 86 !bch2_member_exists(sb, r->devs[i])) { 87 prt_printf(err, "invalid device %u in entry ", r->devs[i]); 88 goto bad; 89 } 90 91 return 0; 92 bad: 93 bch2_replicas_entry_to_text(err, r); 94 return -BCH_ERR_invalid_replicas_entry; 95 } 96 97 void bch2_cpu_replicas_to_text(struct printbuf *out, 98 struct bch_replicas_cpu *r) 99 { 100 struct bch_replicas_entry_v1 *e; 101 bool first = true; 102 103 for_each_cpu_replicas_entry(r, e) { 104 if (!first) 105 prt_printf(out, " "); 106 first = false; 107 108 bch2_replicas_entry_to_text(out, e); 109 } 110 } 111 112 static void extent_to_replicas(struct bkey_s_c k, 113 struct bch_replicas_entry_v1 *r) 114 { 115 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 116 const union bch_extent_entry *entry; 117 struct extent_ptr_decoded p; 118 119 r->nr_required = 1; 120 121 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 122 if (p.ptr.cached) 123 continue; 124 125 if (!p.has_ec) 126 replicas_entry_add_dev(r, p.ptr.dev); 127 else 128 r->nr_required = 0; 129 } 130 } 131 132 static void stripe_to_replicas(struct bkey_s_c k, 133 struct bch_replicas_entry_v1 *r) 134 { 135 struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); 136 const struct bch_extent_ptr *ptr; 137 138 r->nr_required = s.v->nr_blocks - s.v->nr_redundant; 139 140 for (ptr = s.v->ptrs; 141 ptr < s.v->ptrs + s.v->nr_blocks; 142 ptr++) 143 replicas_entry_add_dev(r, ptr->dev); 144 } 145 146 void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e, 147 struct bkey_s_c k) 148 { 149 e->nr_devs = 0; 150 151 switch (k.k->type) { 152 case KEY_TYPE_btree_ptr: 153 case KEY_TYPE_btree_ptr_v2: 154 e->data_type = BCH_DATA_btree; 155 extent_to_replicas(k, e); 156 break; 157 case KEY_TYPE_extent: 158 case KEY_TYPE_reflink_v: 159 e->data_type = BCH_DATA_user; 160 extent_to_replicas(k, e); 161 break; 162 case KEY_TYPE_stripe: 163 e->data_type = BCH_DATA_parity; 164 stripe_to_replicas(k, e); 165 break; 166 } 167 168 bch2_replicas_entry_sort(e); 169 } 170 171 void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, 172 enum bch_data_type data_type, 173 struct bch_devs_list devs) 174 { 175 BUG_ON(!data_type || 176 data_type == BCH_DATA_sb || 177 data_type >= BCH_DATA_NR); 178 179 e->data_type = data_type; 180 e->nr_devs = 0; 181 e->nr_required = 1; 182 183 darray_for_each(devs, i) 184 replicas_entry_add_dev(e, *i); 185 186 bch2_replicas_entry_sort(e); 187 } 188 189 static struct bch_replicas_cpu 190 cpu_replicas_add_entry(struct bch_fs *c, 191 struct bch_replicas_cpu *old, 192 struct bch_replicas_entry_v1 *new_entry) 193 { 194 struct bch_replicas_cpu new = { 195 .nr = old->nr + 1, 196 .entry_size = max_t(unsigned, old->entry_size, 197 replicas_entry_bytes(new_entry)), 198 }; 199 200 new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); 201 if (!new.entries) 202 return new; 203 204 for (unsigned i = 0; i < old->nr; i++) 205 memcpy(cpu_replicas_entry(&new, i), 206 cpu_replicas_entry(old, i), 207 old->entry_size); 208 209 memcpy(cpu_replicas_entry(&new, old->nr), 210 new_entry, 211 replicas_entry_bytes(new_entry)); 212 213 bch2_cpu_replicas_sort(&new); 214 return new; 215 } 216 217 static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, 218 struct bch_replicas_entry_v1 *search) 219 { 220 int idx, entry_size = replicas_entry_bytes(search); 221 222 if (unlikely(entry_size > r->entry_size)) 223 return -1; 224 225 #define entry_cmp(_l, _r) memcmp(_l, _r, entry_size) 226 idx = eytzinger0_find(r->entries, r->nr, r->entry_size, 227 entry_cmp, search); 228 #undef entry_cmp 229 230 return idx < r->nr ? idx : -1; 231 } 232 233 int bch2_replicas_entry_idx(struct bch_fs *c, 234 struct bch_replicas_entry_v1 *search) 235 { 236 bch2_replicas_entry_sort(search); 237 238 return __replicas_entry_idx(&c->replicas, search); 239 } 240 241 static bool __replicas_has_entry(struct bch_replicas_cpu *r, 242 struct bch_replicas_entry_v1 *search) 243 { 244 return __replicas_entry_idx(r, search) >= 0; 245 } 246 247 bool bch2_replicas_marked_locked(struct bch_fs *c, 248 struct bch_replicas_entry_v1 *search) 249 { 250 verify_replicas_entry(search); 251 252 return !search->nr_devs || 253 (__replicas_has_entry(&c->replicas, search) && 254 (likely((!c->replicas_gc.entries)) || 255 __replicas_has_entry(&c->replicas_gc, search))); 256 } 257 258 bool bch2_replicas_marked(struct bch_fs *c, 259 struct bch_replicas_entry_v1 *search) 260 { 261 percpu_down_read(&c->mark_lock); 262 bool ret = bch2_replicas_marked_locked(c, search); 263 percpu_up_read(&c->mark_lock); 264 265 return ret; 266 } 267 268 noinline 269 static int bch2_mark_replicas_slowpath(struct bch_fs *c, 270 struct bch_replicas_entry_v1 *new_entry) 271 { 272 struct bch_replicas_cpu new_r, new_gc; 273 int ret = 0; 274 275 verify_replicas_entry(new_entry); 276 277 memset(&new_r, 0, sizeof(new_r)); 278 memset(&new_gc, 0, sizeof(new_gc)); 279 280 mutex_lock(&c->sb_lock); 281 282 if (c->replicas_gc.entries && 283 !__replicas_has_entry(&c->replicas_gc, new_entry)) { 284 new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); 285 if (!new_gc.entries) { 286 ret = -BCH_ERR_ENOMEM_cpu_replicas; 287 goto err; 288 } 289 } 290 291 if (!__replicas_has_entry(&c->replicas, new_entry)) { 292 new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); 293 if (!new_r.entries) { 294 ret = -BCH_ERR_ENOMEM_cpu_replicas; 295 goto err; 296 } 297 298 ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); 299 if (ret) 300 goto err; 301 } 302 303 if (!new_r.entries && 304 !new_gc.entries) 305 goto out; 306 307 /* allocations done, now commit: */ 308 309 if (new_r.entries) 310 bch2_write_super(c); 311 312 /* don't update in memory replicas until changes are persistent */ 313 percpu_down_write(&c->mark_lock); 314 if (new_r.entries) 315 swap(c->replicas, new_r); 316 if (new_gc.entries) 317 swap(new_gc, c->replicas_gc); 318 percpu_up_write(&c->mark_lock); 319 out: 320 mutex_unlock(&c->sb_lock); 321 322 kfree(new_r.entries); 323 kfree(new_gc.entries); 324 325 return ret; 326 err: 327 bch_err_msg(c, ret, "adding replicas entry"); 328 goto out; 329 } 330 331 int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) 332 { 333 return likely(bch2_replicas_marked(c, r)) 334 ? 0 : bch2_mark_replicas_slowpath(c, r); 335 } 336 337 /* 338 * Old replicas_gc mechanism: only used for journal replicas entries now, should 339 * die at some point: 340 */ 341 342 int bch2_replicas_gc_end(struct bch_fs *c, int ret) 343 { 344 lockdep_assert_held(&c->replicas_gc_lock); 345 346 mutex_lock(&c->sb_lock); 347 percpu_down_write(&c->mark_lock); 348 349 ret = ret ?: 350 bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); 351 if (!ret) 352 swap(c->replicas, c->replicas_gc); 353 354 kfree(c->replicas_gc.entries); 355 c->replicas_gc.entries = NULL; 356 357 percpu_up_write(&c->mark_lock); 358 359 if (!ret) 360 bch2_write_super(c); 361 362 mutex_unlock(&c->sb_lock); 363 364 return ret; 365 } 366 367 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) 368 { 369 struct bch_replicas_entry_v1 *e; 370 unsigned i = 0; 371 372 lockdep_assert_held(&c->replicas_gc_lock); 373 374 mutex_lock(&c->sb_lock); 375 BUG_ON(c->replicas_gc.entries); 376 377 c->replicas_gc.nr = 0; 378 c->replicas_gc.entry_size = 0; 379 380 for_each_cpu_replicas_entry(&c->replicas, e) { 381 /* Preserve unknown data types */ 382 if (e->data_type >= BCH_DATA_NR || 383 !((1 << e->data_type) & typemask)) { 384 c->replicas_gc.nr++; 385 c->replicas_gc.entry_size = 386 max_t(unsigned, c->replicas_gc.entry_size, 387 replicas_entry_bytes(e)); 388 } 389 } 390 391 c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, 392 c->replicas_gc.entry_size, 393 GFP_KERNEL); 394 if (!c->replicas_gc.entries) { 395 mutex_unlock(&c->sb_lock); 396 bch_err(c, "error allocating c->replicas_gc"); 397 return -BCH_ERR_ENOMEM_replicas_gc; 398 } 399 400 for_each_cpu_replicas_entry(&c->replicas, e) 401 if (e->data_type >= BCH_DATA_NR || 402 !((1 << e->data_type) & typemask)) 403 memcpy(cpu_replicas_entry(&c->replicas_gc, i++), 404 e, c->replicas_gc.entry_size); 405 406 bch2_cpu_replicas_sort(&c->replicas_gc); 407 mutex_unlock(&c->sb_lock); 408 409 return 0; 410 } 411 412 /* 413 * New much simpler mechanism for clearing out unneeded replicas entries - drop 414 * replicas entries that have 0 sectors used. 415 * 416 * However, we don't track sector counts for journal usage, so this doesn't drop 417 * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism 418 * is retained for that. 419 */ 420 int bch2_replicas_gc2(struct bch_fs *c) 421 { 422 struct bch_replicas_cpu new = { 0 }; 423 unsigned nr; 424 int ret = 0; 425 426 bch2_accounting_mem_gc(c); 427 retry: 428 nr = READ_ONCE(c->replicas.nr); 429 new.entry_size = READ_ONCE(c->replicas.entry_size); 430 new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); 431 if (!new.entries) { 432 bch_err(c, "error allocating c->replicas_gc"); 433 return -BCH_ERR_ENOMEM_replicas_gc; 434 } 435 436 mutex_lock(&c->sb_lock); 437 percpu_down_write(&c->mark_lock); 438 439 if (nr != c->replicas.nr || 440 new.entry_size != c->replicas.entry_size) { 441 percpu_up_write(&c->mark_lock); 442 mutex_unlock(&c->sb_lock); 443 kfree(new.entries); 444 goto retry; 445 } 446 447 for (unsigned i = 0; i < c->replicas.nr; i++) { 448 struct bch_replicas_entry_v1 *e = 449 cpu_replicas_entry(&c->replicas, i); 450 451 struct disk_accounting_pos k = { 452 .type = BCH_DISK_ACCOUNTING_replicas, 453 }; 454 455 unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), 456 "embedded variable length struct"); 457 458 struct bpos p = disk_accounting_pos_to_bpos(&k); 459 460 struct bch_accounting_mem *acc = &c->accounting; 461 bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), 462 accounting_pos_cmp, &p) >= acc->k.nr; 463 464 if (e->data_type == BCH_DATA_journal || !kill) 465 memcpy(cpu_replicas_entry(&new, new.nr++), 466 e, new.entry_size); 467 } 468 469 bch2_cpu_replicas_sort(&new); 470 471 ret = bch2_cpu_replicas_to_sb_replicas(c, &new); 472 473 if (!ret) 474 swap(c->replicas, new); 475 476 kfree(new.entries); 477 478 percpu_up_write(&c->mark_lock); 479 480 if (!ret) 481 bch2_write_super(c); 482 483 mutex_unlock(&c->sb_lock); 484 485 return ret; 486 } 487 488 /* Replicas tracking - superblock: */ 489 490 static int 491 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, 492 struct bch_replicas_cpu *cpu_r) 493 { 494 struct bch_replicas_entry_v1 *e, *dst; 495 unsigned nr = 0, entry_size = 0, idx = 0; 496 497 for_each_replicas_entry(sb_r, e) { 498 entry_size = max_t(unsigned, entry_size, 499 replicas_entry_bytes(e)); 500 nr++; 501 } 502 503 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 504 if (!cpu_r->entries) 505 return -BCH_ERR_ENOMEM_cpu_replicas; 506 507 cpu_r->nr = nr; 508 cpu_r->entry_size = entry_size; 509 510 for_each_replicas_entry(sb_r, e) { 511 dst = cpu_replicas_entry(cpu_r, idx++); 512 memcpy(dst, e, replicas_entry_bytes(e)); 513 bch2_replicas_entry_sort(dst); 514 } 515 516 return 0; 517 } 518 519 static int 520 __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, 521 struct bch_replicas_cpu *cpu_r) 522 { 523 struct bch_replicas_entry_v0 *e; 524 unsigned nr = 0, entry_size = 0, idx = 0; 525 526 for_each_replicas_entry(sb_r, e) { 527 entry_size = max_t(unsigned, entry_size, 528 replicas_entry_bytes(e)); 529 nr++; 530 } 531 532 entry_size += sizeof(struct bch_replicas_entry_v1) - 533 sizeof(struct bch_replicas_entry_v0); 534 535 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 536 if (!cpu_r->entries) 537 return -BCH_ERR_ENOMEM_cpu_replicas; 538 539 cpu_r->nr = nr; 540 cpu_r->entry_size = entry_size; 541 542 for_each_replicas_entry(sb_r, e) { 543 struct bch_replicas_entry_v1 *dst = 544 cpu_replicas_entry(cpu_r, idx++); 545 546 dst->data_type = e->data_type; 547 dst->nr_devs = e->nr_devs; 548 dst->nr_required = 1; 549 memcpy(dst->devs, e->devs, e->nr_devs); 550 bch2_replicas_entry_sort(dst); 551 } 552 553 return 0; 554 } 555 556 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) 557 { 558 struct bch_sb_field_replicas *sb_v1; 559 struct bch_sb_field_replicas_v0 *sb_v0; 560 struct bch_replicas_cpu new_r = { 0, 0, NULL }; 561 int ret = 0; 562 563 if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas))) 564 ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); 565 else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0))) 566 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); 567 if (ret) 568 return ret; 569 570 bch2_cpu_replicas_sort(&new_r); 571 572 percpu_down_write(&c->mark_lock); 573 swap(c->replicas, new_r); 574 percpu_up_write(&c->mark_lock); 575 576 kfree(new_r.entries); 577 578 return 0; 579 } 580 581 static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, 582 struct bch_replicas_cpu *r) 583 { 584 struct bch_sb_field_replicas_v0 *sb_r; 585 struct bch_replicas_entry_v0 *dst; 586 struct bch_replicas_entry_v1 *src; 587 size_t bytes; 588 589 bytes = sizeof(struct bch_sb_field_replicas); 590 591 for_each_cpu_replicas_entry(r, src) 592 bytes += replicas_entry_bytes(src) - 1; 593 594 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0, 595 DIV_ROUND_UP(bytes, sizeof(u64))); 596 if (!sb_r) 597 return -BCH_ERR_ENOSPC_sb_replicas; 598 599 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); 600 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0); 601 602 memset(&sb_r->entries, 0, 603 vstruct_end(&sb_r->field) - 604 (void *) &sb_r->entries); 605 606 dst = sb_r->entries; 607 for_each_cpu_replicas_entry(r, src) { 608 dst->data_type = src->data_type; 609 dst->nr_devs = src->nr_devs; 610 memcpy(dst->devs, src->devs, src->nr_devs); 611 612 dst = replicas_entry_next(dst); 613 614 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 615 } 616 617 return 0; 618 } 619 620 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, 621 struct bch_replicas_cpu *r) 622 { 623 struct bch_sb_field_replicas *sb_r; 624 struct bch_replicas_entry_v1 *dst, *src; 625 bool need_v1 = false; 626 size_t bytes; 627 628 bytes = sizeof(struct bch_sb_field_replicas); 629 630 for_each_cpu_replicas_entry(r, src) { 631 bytes += replicas_entry_bytes(src); 632 if (src->nr_required != 1) 633 need_v1 = true; 634 } 635 636 if (!need_v1) 637 return bch2_cpu_replicas_to_sb_replicas_v0(c, r); 638 639 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas, 640 DIV_ROUND_UP(bytes, sizeof(u64))); 641 if (!sb_r) 642 return -BCH_ERR_ENOSPC_sb_replicas; 643 644 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); 645 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas); 646 647 memset(&sb_r->entries, 0, 648 vstruct_end(&sb_r->field) - 649 (void *) &sb_r->entries); 650 651 dst = sb_r->entries; 652 for_each_cpu_replicas_entry(r, src) { 653 memcpy(dst, src, replicas_entry_bytes(src)); 654 655 dst = replicas_entry_next(dst); 656 657 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 658 } 659 660 return 0; 661 } 662 663 static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, 664 struct bch_sb *sb, 665 struct printbuf *err) 666 { 667 unsigned i; 668 669 sort_r(cpu_r->entries, 670 cpu_r->nr, 671 cpu_r->entry_size, 672 bch2_memcmp, NULL, 673 (void *)(size_t)cpu_r->entry_size); 674 675 for (i = 0; i < cpu_r->nr; i++) { 676 struct bch_replicas_entry_v1 *e = 677 cpu_replicas_entry(cpu_r, i); 678 679 int ret = bch2_replicas_entry_validate(e, sb, err); 680 if (ret) 681 return ret; 682 683 if (i + 1 < cpu_r->nr) { 684 struct bch_replicas_entry_v1 *n = 685 cpu_replicas_entry(cpu_r, i + 1); 686 687 BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); 688 689 if (!memcmp(e, n, cpu_r->entry_size)) { 690 prt_printf(err, "duplicate replicas entry "); 691 bch2_replicas_entry_to_text(err, e); 692 return -BCH_ERR_invalid_sb_replicas; 693 } 694 } 695 } 696 697 return 0; 698 } 699 700 static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f, 701 enum bch_validate_flags flags, struct printbuf *err) 702 { 703 struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); 704 struct bch_replicas_cpu cpu_r; 705 int ret; 706 707 ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r); 708 if (ret) 709 return ret; 710 711 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 712 kfree(cpu_r.entries); 713 return ret; 714 } 715 716 static void bch2_sb_replicas_to_text(struct printbuf *out, 717 struct bch_sb *sb, 718 struct bch_sb_field *f) 719 { 720 struct bch_sb_field_replicas *r = field_to_type(f, replicas); 721 struct bch_replicas_entry_v1 *e; 722 bool first = true; 723 724 for_each_replicas_entry(r, e) { 725 if (!first) 726 prt_printf(out, " "); 727 first = false; 728 729 bch2_replicas_entry_to_text(out, e); 730 } 731 prt_newline(out); 732 } 733 734 const struct bch_sb_field_ops bch_sb_field_ops_replicas = { 735 .validate = bch2_sb_replicas_validate, 736 .to_text = bch2_sb_replicas_to_text, 737 }; 738 739 static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f, 740 enum bch_validate_flags flags, struct printbuf *err) 741 { 742 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 743 struct bch_replicas_cpu cpu_r; 744 int ret; 745 746 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r); 747 if (ret) 748 return ret; 749 750 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 751 kfree(cpu_r.entries); 752 return ret; 753 } 754 755 static void bch2_sb_replicas_v0_to_text(struct printbuf *out, 756 struct bch_sb *sb, 757 struct bch_sb_field *f) 758 { 759 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 760 struct bch_replicas_entry_v0 *e; 761 bool first = true; 762 763 for_each_replicas_entry(sb_r, e) { 764 if (!first) 765 prt_printf(out, " "); 766 first = false; 767 768 bch2_replicas_entry_v0_to_text(out, e); 769 } 770 prt_newline(out); 771 } 772 773 const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { 774 .validate = bch2_sb_replicas_v0_validate, 775 .to_text = bch2_sb_replicas_v0_to_text, 776 }; 777 778 /* Query replicas: */ 779 780 bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, 781 unsigned flags, bool print) 782 { 783 struct bch_replicas_entry_v1 *e; 784 bool ret = true; 785 786 percpu_down_read(&c->mark_lock); 787 for_each_cpu_replicas_entry(&c->replicas, e) { 788 unsigned nr_online = 0, nr_failed = 0, dflags = 0; 789 bool metadata = e->data_type < BCH_DATA_user; 790 791 if (e->data_type == BCH_DATA_cached) 792 continue; 793 794 rcu_read_lock(); 795 for (unsigned i = 0; i < e->nr_devs; i++) { 796 nr_online += test_bit(e->devs[i], devs.d); 797 798 struct bch_dev *ca = bch2_dev_rcu_noerror(c, e->devs[i]); 799 nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed; 800 } 801 rcu_read_unlock(); 802 803 if (nr_online + nr_failed == e->nr_devs) 804 continue; 805 806 if (nr_online < e->nr_required) 807 dflags |= metadata 808 ? BCH_FORCE_IF_METADATA_LOST 809 : BCH_FORCE_IF_DATA_LOST; 810 811 if (nr_online < e->nr_devs) 812 dflags |= metadata 813 ? BCH_FORCE_IF_METADATA_DEGRADED 814 : BCH_FORCE_IF_DATA_DEGRADED; 815 816 if (dflags & ~flags) { 817 if (print) { 818 struct printbuf buf = PRINTBUF; 819 820 bch2_replicas_entry_to_text(&buf, e); 821 bch_err(c, "insufficient devices online (%u) for replicas entry %s", 822 nr_online, buf.buf); 823 printbuf_exit(&buf); 824 } 825 ret = false; 826 break; 827 } 828 829 } 830 percpu_up_read(&c->mark_lock); 831 832 return ret; 833 } 834 835 unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) 836 { 837 struct bch_sb_field_replicas *replicas; 838 struct bch_sb_field_replicas_v0 *replicas_v0; 839 unsigned data_has = 0; 840 841 replicas = bch2_sb_field_get(sb, replicas); 842 replicas_v0 = bch2_sb_field_get(sb, replicas_v0); 843 844 if (replicas) { 845 struct bch_replicas_entry_v1 *r; 846 847 for_each_replicas_entry(replicas, r) { 848 if (r->data_type >= sizeof(data_has) * 8) 849 continue; 850 851 for (unsigned i = 0; i < r->nr_devs; i++) 852 if (r->devs[i] == dev) 853 data_has |= 1 << r->data_type; 854 } 855 856 } else if (replicas_v0) { 857 struct bch_replicas_entry_v0 *r; 858 859 for_each_replicas_entry_v0(replicas_v0, r) { 860 if (r->data_type >= sizeof(data_has) * 8) 861 continue; 862 863 for (unsigned i = 0; i < r->nr_devs; i++) 864 if (r->devs[i] == dev) 865 data_has |= 1 << r->data_type; 866 } 867 } 868 869 870 return data_has; 871 } 872 873 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) 874 { 875 mutex_lock(&c->sb_lock); 876 unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); 877 mutex_unlock(&c->sb_lock); 878 879 return ret; 880 } 881 882 void bch2_fs_replicas_exit(struct bch_fs *c) 883 { 884 kfree(c->replicas.entries); 885 kfree(c->replicas_gc.entries); 886 } 887