1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "buckets.h" 5 #include "disk_accounting.h" 6 #include "journal.h" 7 #include "replicas.h" 8 #include "super-io.h" 9 10 #include <linux/sort.h> 11 12 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, 13 struct bch_replicas_cpu *); 14 15 /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ 16 static int bch2_memcmp(const void *l, const void *r, const void *priv) 17 { 18 size_t size = (size_t) priv; 19 return memcmp(l, r, size); 20 } 21 22 /* Replicas tracking - in memory: */ 23 24 static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) 25 { 26 #ifdef CONFIG_BCACHEFS_DEBUG 27 BUG_ON(!e->nr_devs); 28 BUG_ON(e->nr_required > 1 && 29 e->nr_required >= e->nr_devs); 30 31 for (unsigned i = 0; i + 1 < e->nr_devs; i++) 32 BUG_ON(e->devs[i] >= e->devs[i + 1]); 33 #endif 34 } 35 36 void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) 37 { 38 bubble_sort(e->devs, e->nr_devs, u8_cmp); 39 } 40 41 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) 42 { 43 eytzinger0_sort_r(r->entries, r->nr, r->entry_size, 44 bch2_memcmp, NULL, (void *)(size_t)r->entry_size); 45 } 46 47 static void bch2_replicas_entry_v0_to_text(struct printbuf *out, 48 struct bch_replicas_entry_v0 *e) 49 { 50 bch2_prt_data_type(out, e->data_type); 51 52 prt_printf(out, ": %u [", e->nr_devs); 53 for (unsigned i = 0; i < e->nr_devs; i++) 54 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 55 prt_printf(out, "]"); 56 } 57 58 void bch2_replicas_entry_to_text(struct printbuf *out, 59 struct bch_replicas_entry_v1 *e) 60 { 61 bch2_prt_data_type(out, e->data_type); 62 63 prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); 64 for (unsigned i = 0; i < e->nr_devs; i++) 65 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 66 prt_printf(out, "]"); 67 } 68 69 static int bch2_replicas_entry_sb_validate(struct bch_replicas_entry_v1 *r, 70 struct bch_sb *sb, 71 struct printbuf *err) 72 { 73 if (!r->nr_devs) { 74 prt_printf(err, "no devices in entry "); 75 goto bad; 76 } 77 78 if (r->nr_required > 1 && 79 r->nr_required >= r->nr_devs) { 80 prt_printf(err, "bad nr_required in entry "); 81 goto bad; 82 } 83 84 for (unsigned i = 0; i < r->nr_devs; i++) 85 if (r->devs[i] != BCH_SB_MEMBER_INVALID && 86 !bch2_member_exists(sb, r->devs[i])) { 87 prt_printf(err, "invalid device %u in entry ", r->devs[i]); 88 goto bad; 89 } 90 91 return 0; 92 bad: 93 bch2_replicas_entry_to_text(err, r); 94 return -BCH_ERR_invalid_replicas_entry; 95 } 96 97 int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, 98 struct bch_fs *c, 99 struct printbuf *err) 100 { 101 if (!r->nr_devs) { 102 prt_printf(err, "no devices in entry "); 103 goto bad; 104 } 105 106 if (r->nr_required > 1 && 107 r->nr_required >= r->nr_devs) { 108 prt_printf(err, "bad nr_required in entry "); 109 goto bad; 110 } 111 112 for (unsigned i = 0; i < r->nr_devs; i++) 113 if (r->devs[i] != BCH_SB_MEMBER_INVALID && 114 !bch2_dev_exists(c, r->devs[i])) { 115 prt_printf(err, "invalid device %u in entry ", r->devs[i]); 116 goto bad; 117 } 118 119 return 0; 120 bad: 121 bch2_replicas_entry_to_text(err, r); 122 return -BCH_ERR_invalid_replicas_entry; 123 } 124 125 void bch2_cpu_replicas_to_text(struct printbuf *out, 126 struct bch_replicas_cpu *r) 127 { 128 struct bch_replicas_entry_v1 *e; 129 bool first = true; 130 131 for_each_cpu_replicas_entry(r, e) { 132 if (!first) 133 prt_printf(out, " "); 134 first = false; 135 136 bch2_replicas_entry_to_text(out, e); 137 } 138 } 139 140 static void extent_to_replicas(struct bkey_s_c k, 141 struct bch_replicas_entry_v1 *r) 142 { 143 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 144 const union bch_extent_entry *entry; 145 struct extent_ptr_decoded p; 146 147 r->nr_required = 1; 148 149 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 150 if (p.ptr.cached) 151 continue; 152 153 if (!p.has_ec) 154 replicas_entry_add_dev(r, p.ptr.dev); 155 else 156 r->nr_required = 0; 157 } 158 } 159 160 static void stripe_to_replicas(struct bkey_s_c k, 161 struct bch_replicas_entry_v1 *r) 162 { 163 struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); 164 const struct bch_extent_ptr *ptr; 165 166 r->nr_required = s.v->nr_blocks - s.v->nr_redundant; 167 168 for (ptr = s.v->ptrs; 169 ptr < s.v->ptrs + s.v->nr_blocks; 170 ptr++) 171 replicas_entry_add_dev(r, ptr->dev); 172 } 173 174 void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e, 175 struct bkey_s_c k) 176 { 177 e->nr_devs = 0; 178 179 switch (k.k->type) { 180 case KEY_TYPE_btree_ptr: 181 case KEY_TYPE_btree_ptr_v2: 182 e->data_type = BCH_DATA_btree; 183 extent_to_replicas(k, e); 184 break; 185 case KEY_TYPE_extent: 186 case KEY_TYPE_reflink_v: 187 e->data_type = BCH_DATA_user; 188 extent_to_replicas(k, e); 189 break; 190 case KEY_TYPE_stripe: 191 e->data_type = BCH_DATA_parity; 192 stripe_to_replicas(k, e); 193 break; 194 } 195 196 bch2_replicas_entry_sort(e); 197 } 198 199 void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, 200 enum bch_data_type data_type, 201 struct bch_devs_list devs) 202 { 203 BUG_ON(!data_type || 204 data_type == BCH_DATA_sb || 205 data_type >= BCH_DATA_NR); 206 207 e->data_type = data_type; 208 e->nr_devs = 0; 209 e->nr_required = 1; 210 211 darray_for_each(devs, i) 212 replicas_entry_add_dev(e, *i); 213 214 bch2_replicas_entry_sort(e); 215 } 216 217 static struct bch_replicas_cpu 218 cpu_replicas_add_entry(struct bch_fs *c, 219 struct bch_replicas_cpu *old, 220 struct bch_replicas_entry_v1 *new_entry) 221 { 222 struct bch_replicas_cpu new = { 223 .nr = old->nr + 1, 224 .entry_size = max_t(unsigned, old->entry_size, 225 replicas_entry_bytes(new_entry)), 226 }; 227 228 new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); 229 if (!new.entries) 230 return new; 231 232 for (unsigned i = 0; i < old->nr; i++) 233 memcpy(cpu_replicas_entry(&new, i), 234 cpu_replicas_entry(old, i), 235 old->entry_size); 236 237 memcpy(cpu_replicas_entry(&new, old->nr), 238 new_entry, 239 replicas_entry_bytes(new_entry)); 240 241 bch2_cpu_replicas_sort(&new); 242 return new; 243 } 244 245 static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, 246 struct bch_replicas_entry_v1 *search) 247 { 248 int idx, entry_size = replicas_entry_bytes(search); 249 250 if (unlikely(entry_size > r->entry_size)) 251 return -1; 252 253 #define entry_cmp(_l, _r) memcmp(_l, _r, entry_size) 254 idx = eytzinger0_find(r->entries, r->nr, r->entry_size, 255 entry_cmp, search); 256 #undef entry_cmp 257 258 return idx < r->nr ? idx : -1; 259 } 260 261 int bch2_replicas_entry_idx(struct bch_fs *c, 262 struct bch_replicas_entry_v1 *search) 263 { 264 bch2_replicas_entry_sort(search); 265 266 return __replicas_entry_idx(&c->replicas, search); 267 } 268 269 static bool __replicas_has_entry(struct bch_replicas_cpu *r, 270 struct bch_replicas_entry_v1 *search) 271 { 272 return __replicas_entry_idx(r, search) >= 0; 273 } 274 275 bool bch2_replicas_marked_locked(struct bch_fs *c, 276 struct bch_replicas_entry_v1 *search) 277 { 278 verify_replicas_entry(search); 279 280 return !search->nr_devs || 281 (__replicas_has_entry(&c->replicas, search) && 282 (likely((!c->replicas_gc.entries)) || 283 __replicas_has_entry(&c->replicas_gc, search))); 284 } 285 286 bool bch2_replicas_marked(struct bch_fs *c, 287 struct bch_replicas_entry_v1 *search) 288 { 289 percpu_down_read(&c->mark_lock); 290 bool ret = bch2_replicas_marked_locked(c, search); 291 percpu_up_read(&c->mark_lock); 292 293 return ret; 294 } 295 296 noinline 297 static int bch2_mark_replicas_slowpath(struct bch_fs *c, 298 struct bch_replicas_entry_v1 *new_entry) 299 { 300 struct bch_replicas_cpu new_r, new_gc; 301 int ret = 0; 302 303 verify_replicas_entry(new_entry); 304 305 memset(&new_r, 0, sizeof(new_r)); 306 memset(&new_gc, 0, sizeof(new_gc)); 307 308 mutex_lock(&c->sb_lock); 309 310 if (c->replicas_gc.entries && 311 !__replicas_has_entry(&c->replicas_gc, new_entry)) { 312 new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); 313 if (!new_gc.entries) { 314 ret = -BCH_ERR_ENOMEM_cpu_replicas; 315 goto err; 316 } 317 } 318 319 if (!__replicas_has_entry(&c->replicas, new_entry)) { 320 new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); 321 if (!new_r.entries) { 322 ret = -BCH_ERR_ENOMEM_cpu_replicas; 323 goto err; 324 } 325 326 ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); 327 if (ret) 328 goto err; 329 } 330 331 if (!new_r.entries && 332 !new_gc.entries) 333 goto out; 334 335 /* allocations done, now commit: */ 336 337 if (new_r.entries) 338 bch2_write_super(c); 339 340 /* don't update in memory replicas until changes are persistent */ 341 percpu_down_write(&c->mark_lock); 342 if (new_r.entries) 343 swap(c->replicas, new_r); 344 if (new_gc.entries) 345 swap(new_gc, c->replicas_gc); 346 percpu_up_write(&c->mark_lock); 347 out: 348 mutex_unlock(&c->sb_lock); 349 350 kfree(new_r.entries); 351 kfree(new_gc.entries); 352 353 return ret; 354 err: 355 bch_err_msg(c, ret, "adding replicas entry"); 356 goto out; 357 } 358 359 int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) 360 { 361 return likely(bch2_replicas_marked(c, r)) 362 ? 0 : bch2_mark_replicas_slowpath(c, r); 363 } 364 365 /* 366 * Old replicas_gc mechanism: only used for journal replicas entries now, should 367 * die at some point: 368 */ 369 370 int bch2_replicas_gc_end(struct bch_fs *c, int ret) 371 { 372 lockdep_assert_held(&c->replicas_gc_lock); 373 374 mutex_lock(&c->sb_lock); 375 percpu_down_write(&c->mark_lock); 376 377 ret = ret ?: 378 bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); 379 if (!ret) 380 swap(c->replicas, c->replicas_gc); 381 382 kfree(c->replicas_gc.entries); 383 c->replicas_gc.entries = NULL; 384 385 percpu_up_write(&c->mark_lock); 386 387 if (!ret) 388 bch2_write_super(c); 389 390 mutex_unlock(&c->sb_lock); 391 392 return ret; 393 } 394 395 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) 396 { 397 struct bch_replicas_entry_v1 *e; 398 unsigned i = 0; 399 400 lockdep_assert_held(&c->replicas_gc_lock); 401 402 mutex_lock(&c->sb_lock); 403 BUG_ON(c->replicas_gc.entries); 404 405 c->replicas_gc.nr = 0; 406 c->replicas_gc.entry_size = 0; 407 408 for_each_cpu_replicas_entry(&c->replicas, e) { 409 /* Preserve unknown data types */ 410 if (e->data_type >= BCH_DATA_NR || 411 !((1 << e->data_type) & typemask)) { 412 c->replicas_gc.nr++; 413 c->replicas_gc.entry_size = 414 max_t(unsigned, c->replicas_gc.entry_size, 415 replicas_entry_bytes(e)); 416 } 417 } 418 419 c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, 420 c->replicas_gc.entry_size, 421 GFP_KERNEL); 422 if (!c->replicas_gc.entries) { 423 mutex_unlock(&c->sb_lock); 424 bch_err(c, "error allocating c->replicas_gc"); 425 return -BCH_ERR_ENOMEM_replicas_gc; 426 } 427 428 for_each_cpu_replicas_entry(&c->replicas, e) 429 if (e->data_type >= BCH_DATA_NR || 430 !((1 << e->data_type) & typemask)) 431 memcpy(cpu_replicas_entry(&c->replicas_gc, i++), 432 e, c->replicas_gc.entry_size); 433 434 bch2_cpu_replicas_sort(&c->replicas_gc); 435 mutex_unlock(&c->sb_lock); 436 437 return 0; 438 } 439 440 /* 441 * New much simpler mechanism for clearing out unneeded replicas entries - drop 442 * replicas entries that have 0 sectors used. 443 * 444 * However, we don't track sector counts for journal usage, so this doesn't drop 445 * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism 446 * is retained for that. 447 */ 448 int bch2_replicas_gc2(struct bch_fs *c) 449 { 450 struct bch_replicas_cpu new = { 0 }; 451 unsigned nr; 452 int ret = 0; 453 454 bch2_accounting_mem_gc(c); 455 retry: 456 nr = READ_ONCE(c->replicas.nr); 457 new.entry_size = READ_ONCE(c->replicas.entry_size); 458 new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); 459 if (!new.entries) { 460 bch_err(c, "error allocating c->replicas_gc"); 461 return -BCH_ERR_ENOMEM_replicas_gc; 462 } 463 464 mutex_lock(&c->sb_lock); 465 percpu_down_write(&c->mark_lock); 466 467 if (nr != c->replicas.nr || 468 new.entry_size != c->replicas.entry_size) { 469 percpu_up_write(&c->mark_lock); 470 mutex_unlock(&c->sb_lock); 471 kfree(new.entries); 472 goto retry; 473 } 474 475 for (unsigned i = 0; i < c->replicas.nr; i++) { 476 struct bch_replicas_entry_v1 *e = 477 cpu_replicas_entry(&c->replicas, i); 478 479 struct disk_accounting_pos k = { 480 .type = BCH_DISK_ACCOUNTING_replicas, 481 }; 482 483 unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), 484 "embedded variable length struct"); 485 486 struct bpos p = disk_accounting_pos_to_bpos(&k); 487 488 struct bch_accounting_mem *acc = &c->accounting; 489 bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), 490 accounting_pos_cmp, &p) >= acc->k.nr; 491 492 if (e->data_type == BCH_DATA_journal || !kill) 493 memcpy(cpu_replicas_entry(&new, new.nr++), 494 e, new.entry_size); 495 } 496 497 bch2_cpu_replicas_sort(&new); 498 499 ret = bch2_cpu_replicas_to_sb_replicas(c, &new); 500 501 if (!ret) 502 swap(c->replicas, new); 503 504 kfree(new.entries); 505 506 percpu_up_write(&c->mark_lock); 507 508 if (!ret) 509 bch2_write_super(c); 510 511 mutex_unlock(&c->sb_lock); 512 513 return ret; 514 } 515 516 /* Replicas tracking - superblock: */ 517 518 static int 519 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, 520 struct bch_replicas_cpu *cpu_r) 521 { 522 struct bch_replicas_entry_v1 *e, *dst; 523 unsigned nr = 0, entry_size = 0, idx = 0; 524 525 for_each_replicas_entry(sb_r, e) { 526 entry_size = max_t(unsigned, entry_size, 527 replicas_entry_bytes(e)); 528 nr++; 529 } 530 531 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 532 if (!cpu_r->entries) 533 return -BCH_ERR_ENOMEM_cpu_replicas; 534 535 cpu_r->nr = nr; 536 cpu_r->entry_size = entry_size; 537 538 for_each_replicas_entry(sb_r, e) { 539 dst = cpu_replicas_entry(cpu_r, idx++); 540 memcpy(dst, e, replicas_entry_bytes(e)); 541 bch2_replicas_entry_sort(dst); 542 } 543 544 return 0; 545 } 546 547 static int 548 __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, 549 struct bch_replicas_cpu *cpu_r) 550 { 551 struct bch_replicas_entry_v0 *e; 552 unsigned nr = 0, entry_size = 0, idx = 0; 553 554 for_each_replicas_entry(sb_r, e) { 555 entry_size = max_t(unsigned, entry_size, 556 replicas_entry_bytes(e)); 557 nr++; 558 } 559 560 entry_size += sizeof(struct bch_replicas_entry_v1) - 561 sizeof(struct bch_replicas_entry_v0); 562 563 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 564 if (!cpu_r->entries) 565 return -BCH_ERR_ENOMEM_cpu_replicas; 566 567 cpu_r->nr = nr; 568 cpu_r->entry_size = entry_size; 569 570 for_each_replicas_entry(sb_r, e) { 571 struct bch_replicas_entry_v1 *dst = 572 cpu_replicas_entry(cpu_r, idx++); 573 574 dst->data_type = e->data_type; 575 dst->nr_devs = e->nr_devs; 576 dst->nr_required = 1; 577 memcpy(dst->devs, e->devs, e->nr_devs); 578 bch2_replicas_entry_sort(dst); 579 } 580 581 return 0; 582 } 583 584 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) 585 { 586 struct bch_sb_field_replicas *sb_v1; 587 struct bch_sb_field_replicas_v0 *sb_v0; 588 struct bch_replicas_cpu new_r = { 0, 0, NULL }; 589 int ret = 0; 590 591 if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas))) 592 ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); 593 else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0))) 594 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); 595 if (ret) 596 return ret; 597 598 bch2_cpu_replicas_sort(&new_r); 599 600 percpu_down_write(&c->mark_lock); 601 swap(c->replicas, new_r); 602 percpu_up_write(&c->mark_lock); 603 604 kfree(new_r.entries); 605 606 return 0; 607 } 608 609 static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, 610 struct bch_replicas_cpu *r) 611 { 612 struct bch_sb_field_replicas_v0 *sb_r; 613 struct bch_replicas_entry_v0 *dst; 614 struct bch_replicas_entry_v1 *src; 615 size_t bytes; 616 617 bytes = sizeof(struct bch_sb_field_replicas); 618 619 for_each_cpu_replicas_entry(r, src) 620 bytes += replicas_entry_bytes(src) - 1; 621 622 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0, 623 DIV_ROUND_UP(bytes, sizeof(u64))); 624 if (!sb_r) 625 return -BCH_ERR_ENOSPC_sb_replicas; 626 627 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); 628 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0); 629 630 memset(&sb_r->entries, 0, 631 vstruct_end(&sb_r->field) - 632 (void *) &sb_r->entries); 633 634 dst = sb_r->entries; 635 for_each_cpu_replicas_entry(r, src) { 636 dst->data_type = src->data_type; 637 dst->nr_devs = src->nr_devs; 638 memcpy(dst->devs, src->devs, src->nr_devs); 639 640 dst = replicas_entry_next(dst); 641 642 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 643 } 644 645 return 0; 646 } 647 648 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, 649 struct bch_replicas_cpu *r) 650 { 651 struct bch_sb_field_replicas *sb_r; 652 struct bch_replicas_entry_v1 *dst, *src; 653 bool need_v1 = false; 654 size_t bytes; 655 656 bytes = sizeof(struct bch_sb_field_replicas); 657 658 for_each_cpu_replicas_entry(r, src) { 659 bytes += replicas_entry_bytes(src); 660 if (src->nr_required != 1) 661 need_v1 = true; 662 } 663 664 if (!need_v1) 665 return bch2_cpu_replicas_to_sb_replicas_v0(c, r); 666 667 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas, 668 DIV_ROUND_UP(bytes, sizeof(u64))); 669 if (!sb_r) 670 return -BCH_ERR_ENOSPC_sb_replicas; 671 672 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); 673 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas); 674 675 memset(&sb_r->entries, 0, 676 vstruct_end(&sb_r->field) - 677 (void *) &sb_r->entries); 678 679 dst = sb_r->entries; 680 for_each_cpu_replicas_entry(r, src) { 681 memcpy(dst, src, replicas_entry_bytes(src)); 682 683 dst = replicas_entry_next(dst); 684 685 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 686 } 687 688 return 0; 689 } 690 691 static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, 692 struct bch_sb *sb, 693 struct printbuf *err) 694 { 695 unsigned i; 696 697 sort_r(cpu_r->entries, 698 cpu_r->nr, 699 cpu_r->entry_size, 700 bch2_memcmp, NULL, 701 (void *)(size_t)cpu_r->entry_size); 702 703 for (i = 0; i < cpu_r->nr; i++) { 704 struct bch_replicas_entry_v1 *e = 705 cpu_replicas_entry(cpu_r, i); 706 707 int ret = bch2_replicas_entry_sb_validate(e, sb, err); 708 if (ret) 709 return ret; 710 711 if (i + 1 < cpu_r->nr) { 712 struct bch_replicas_entry_v1 *n = 713 cpu_replicas_entry(cpu_r, i + 1); 714 715 BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); 716 717 if (!memcmp(e, n, cpu_r->entry_size)) { 718 prt_printf(err, "duplicate replicas entry "); 719 bch2_replicas_entry_to_text(err, e); 720 return -BCH_ERR_invalid_sb_replicas; 721 } 722 } 723 } 724 725 return 0; 726 } 727 728 static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f, 729 enum bch_validate_flags flags, struct printbuf *err) 730 { 731 struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); 732 struct bch_replicas_cpu cpu_r; 733 int ret; 734 735 ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r); 736 if (ret) 737 return ret; 738 739 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 740 kfree(cpu_r.entries); 741 return ret; 742 } 743 744 static void bch2_sb_replicas_to_text(struct printbuf *out, 745 struct bch_sb *sb, 746 struct bch_sb_field *f) 747 { 748 struct bch_sb_field_replicas *r = field_to_type(f, replicas); 749 struct bch_replicas_entry_v1 *e; 750 bool first = true; 751 752 for_each_replicas_entry(r, e) { 753 if (!first) 754 prt_printf(out, " "); 755 first = false; 756 757 bch2_replicas_entry_to_text(out, e); 758 } 759 prt_newline(out); 760 } 761 762 const struct bch_sb_field_ops bch_sb_field_ops_replicas = { 763 .validate = bch2_sb_replicas_validate, 764 .to_text = bch2_sb_replicas_to_text, 765 }; 766 767 static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f, 768 enum bch_validate_flags flags, struct printbuf *err) 769 { 770 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 771 struct bch_replicas_cpu cpu_r; 772 int ret; 773 774 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r); 775 if (ret) 776 return ret; 777 778 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 779 kfree(cpu_r.entries); 780 return ret; 781 } 782 783 static void bch2_sb_replicas_v0_to_text(struct printbuf *out, 784 struct bch_sb *sb, 785 struct bch_sb_field *f) 786 { 787 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 788 struct bch_replicas_entry_v0 *e; 789 bool first = true; 790 791 for_each_replicas_entry(sb_r, e) { 792 if (!first) 793 prt_printf(out, " "); 794 first = false; 795 796 bch2_replicas_entry_v0_to_text(out, e); 797 } 798 prt_newline(out); 799 } 800 801 const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { 802 .validate = bch2_sb_replicas_v0_validate, 803 .to_text = bch2_sb_replicas_v0_to_text, 804 }; 805 806 /* Query replicas: */ 807 808 bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, 809 unsigned flags, bool print) 810 { 811 struct bch_replicas_entry_v1 *e; 812 bool ret = true; 813 814 percpu_down_read(&c->mark_lock); 815 for_each_cpu_replicas_entry(&c->replicas, e) { 816 unsigned nr_online = 0, nr_failed = 0, dflags = 0; 817 bool metadata = e->data_type < BCH_DATA_user; 818 819 if (e->data_type == BCH_DATA_cached) 820 continue; 821 822 rcu_read_lock(); 823 for (unsigned i = 0; i < e->nr_devs; i++) { 824 if (e->devs[i] == BCH_SB_MEMBER_INVALID) { 825 nr_failed++; 826 continue; 827 } 828 829 nr_online += test_bit(e->devs[i], devs.d); 830 831 struct bch_dev *ca = bch2_dev_rcu_noerror(c, e->devs[i]); 832 nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed; 833 } 834 rcu_read_unlock(); 835 836 if (nr_online + nr_failed == e->nr_devs) 837 continue; 838 839 if (nr_online < e->nr_required) 840 dflags |= metadata 841 ? BCH_FORCE_IF_METADATA_LOST 842 : BCH_FORCE_IF_DATA_LOST; 843 844 if (nr_online < e->nr_devs) 845 dflags |= metadata 846 ? BCH_FORCE_IF_METADATA_DEGRADED 847 : BCH_FORCE_IF_DATA_DEGRADED; 848 849 if (dflags & ~flags) { 850 if (print) { 851 struct printbuf buf = PRINTBUF; 852 853 bch2_replicas_entry_to_text(&buf, e); 854 bch_err(c, "insufficient devices online (%u) for replicas entry %s", 855 nr_online, buf.buf); 856 printbuf_exit(&buf); 857 } 858 ret = false; 859 break; 860 } 861 862 } 863 percpu_up_read(&c->mark_lock); 864 865 return ret; 866 } 867 868 unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) 869 { 870 struct bch_sb_field_replicas *replicas; 871 struct bch_sb_field_replicas_v0 *replicas_v0; 872 unsigned data_has = 0; 873 874 replicas = bch2_sb_field_get(sb, replicas); 875 replicas_v0 = bch2_sb_field_get(sb, replicas_v0); 876 877 if (replicas) { 878 struct bch_replicas_entry_v1 *r; 879 880 for_each_replicas_entry(replicas, r) { 881 if (r->data_type >= sizeof(data_has) * 8) 882 continue; 883 884 for (unsigned i = 0; i < r->nr_devs; i++) 885 if (r->devs[i] == dev) 886 data_has |= 1 << r->data_type; 887 } 888 889 } else if (replicas_v0) { 890 struct bch_replicas_entry_v0 *r; 891 892 for_each_replicas_entry_v0(replicas_v0, r) { 893 if (r->data_type >= sizeof(data_has) * 8) 894 continue; 895 896 for (unsigned i = 0; i < r->nr_devs; i++) 897 if (r->devs[i] == dev) 898 data_has |= 1 << r->data_type; 899 } 900 } 901 902 903 return data_has; 904 } 905 906 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) 907 { 908 mutex_lock(&c->sb_lock); 909 unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); 910 mutex_unlock(&c->sb_lock); 911 912 return ret; 913 } 914 915 void bch2_fs_replicas_exit(struct bch_fs *c) 916 { 917 kfree(c->replicas.entries); 918 kfree(c->replicas_gc.entries); 919 } 920