1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "buckets.h" 5 #include "disk_accounting.h" 6 #include "journal.h" 7 #include "replicas.h" 8 #include "super-io.h" 9 10 #include <linux/sort.h> 11 12 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, 13 struct bch_replicas_cpu *); 14 15 /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ 16 static int bch2_memcmp(const void *l, const void *r, const void *priv) 17 { 18 size_t size = (size_t) priv; 19 return memcmp(l, r, size); 20 } 21 22 /* Replicas tracking - in memory: */ 23 24 static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) 25 { 26 #ifdef CONFIG_BCACHEFS_DEBUG 27 BUG_ON(!e->nr_devs); 28 BUG_ON(e->nr_required > 1 && 29 e->nr_required >= e->nr_devs); 30 31 for (unsigned i = 0; i + 1 < e->nr_devs; i++) 32 BUG_ON(e->devs[i] >= e->devs[i + 1]); 33 #endif 34 } 35 36 void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) 37 { 38 bubble_sort(e->devs, e->nr_devs, u8_cmp); 39 } 40 41 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) 42 { 43 eytzinger0_sort_r(r->entries, r->nr, r->entry_size, 44 bch2_memcmp, NULL, (void *)(size_t)r->entry_size); 45 } 46 47 static void bch2_replicas_entry_v0_to_text(struct printbuf *out, 48 struct bch_replicas_entry_v0 *e) 49 { 50 bch2_prt_data_type(out, e->data_type); 51 52 prt_printf(out, ": %u [", e->nr_devs); 53 for (unsigned i = 0; i < e->nr_devs; i++) 54 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 55 prt_printf(out, "]"); 56 } 57 58 void bch2_replicas_entry_to_text(struct printbuf *out, 59 struct bch_replicas_entry_v1 *e) 60 { 61 bch2_prt_data_type(out, e->data_type); 62 63 prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); 64 for (unsigned i = 0; i < e->nr_devs; i++) 65 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 66 prt_printf(out, "]"); 67 } 68 69 static int bch2_replicas_entry_validate_locked(struct bch_replicas_entry_v1 *r, 70 struct bch_sb *sb, 71 struct printbuf *err) 72 { 73 if (!r->nr_devs) { 74 prt_printf(err, "no devices in entry "); 75 goto bad; 76 } 77 78 if (r->nr_required > 1 && 79 r->nr_required >= r->nr_devs) { 80 prt_printf(err, "bad nr_required in entry "); 81 goto bad; 82 } 83 84 for (unsigned i = 0; i < r->nr_devs; i++) 85 if (r->devs[i] != BCH_SB_MEMBER_INVALID && 86 !bch2_member_exists(sb, r->devs[i])) { 87 prt_printf(err, "invalid device %u in entry ", r->devs[i]); 88 goto bad; 89 } 90 91 return 0; 92 bad: 93 bch2_replicas_entry_to_text(err, r); 94 return -BCH_ERR_invalid_replicas_entry; 95 } 96 97 int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, 98 struct bch_fs *c, 99 struct printbuf *err) 100 { 101 mutex_lock(&c->sb_lock); 102 int ret = bch2_replicas_entry_validate_locked(r, c->disk_sb.sb, err); 103 mutex_unlock(&c->sb_lock); 104 return ret; 105 } 106 107 void bch2_cpu_replicas_to_text(struct printbuf *out, 108 struct bch_replicas_cpu *r) 109 { 110 struct bch_replicas_entry_v1 *e; 111 bool first = true; 112 113 for_each_cpu_replicas_entry(r, e) { 114 if (!first) 115 prt_printf(out, " "); 116 first = false; 117 118 bch2_replicas_entry_to_text(out, e); 119 } 120 } 121 122 static void extent_to_replicas(struct bkey_s_c k, 123 struct bch_replicas_entry_v1 *r) 124 { 125 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 126 const union bch_extent_entry *entry; 127 struct extent_ptr_decoded p; 128 129 r->nr_required = 1; 130 131 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 132 if (p.ptr.cached) 133 continue; 134 135 if (!p.has_ec) 136 replicas_entry_add_dev(r, p.ptr.dev); 137 else 138 r->nr_required = 0; 139 } 140 } 141 142 static void stripe_to_replicas(struct bkey_s_c k, 143 struct bch_replicas_entry_v1 *r) 144 { 145 struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); 146 const struct bch_extent_ptr *ptr; 147 148 r->nr_required = s.v->nr_blocks - s.v->nr_redundant; 149 150 for (ptr = s.v->ptrs; 151 ptr < s.v->ptrs + s.v->nr_blocks; 152 ptr++) 153 replicas_entry_add_dev(r, ptr->dev); 154 } 155 156 void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e, 157 struct bkey_s_c k) 158 { 159 e->nr_devs = 0; 160 161 switch (k.k->type) { 162 case KEY_TYPE_btree_ptr: 163 case KEY_TYPE_btree_ptr_v2: 164 e->data_type = BCH_DATA_btree; 165 extent_to_replicas(k, e); 166 break; 167 case KEY_TYPE_extent: 168 case KEY_TYPE_reflink_v: 169 e->data_type = BCH_DATA_user; 170 extent_to_replicas(k, e); 171 break; 172 case KEY_TYPE_stripe: 173 e->data_type = BCH_DATA_parity; 174 stripe_to_replicas(k, e); 175 break; 176 } 177 178 bch2_replicas_entry_sort(e); 179 } 180 181 void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, 182 enum bch_data_type data_type, 183 struct bch_devs_list devs) 184 { 185 BUG_ON(!data_type || 186 data_type == BCH_DATA_sb || 187 data_type >= BCH_DATA_NR); 188 189 e->data_type = data_type; 190 e->nr_devs = 0; 191 e->nr_required = 1; 192 193 darray_for_each(devs, i) 194 replicas_entry_add_dev(e, *i); 195 196 bch2_replicas_entry_sort(e); 197 } 198 199 static struct bch_replicas_cpu 200 cpu_replicas_add_entry(struct bch_fs *c, 201 struct bch_replicas_cpu *old, 202 struct bch_replicas_entry_v1 *new_entry) 203 { 204 struct bch_replicas_cpu new = { 205 .nr = old->nr + 1, 206 .entry_size = max_t(unsigned, old->entry_size, 207 replicas_entry_bytes(new_entry)), 208 }; 209 210 new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); 211 if (!new.entries) 212 return new; 213 214 for (unsigned i = 0; i < old->nr; i++) 215 memcpy(cpu_replicas_entry(&new, i), 216 cpu_replicas_entry(old, i), 217 old->entry_size); 218 219 memcpy(cpu_replicas_entry(&new, old->nr), 220 new_entry, 221 replicas_entry_bytes(new_entry)); 222 223 bch2_cpu_replicas_sort(&new); 224 return new; 225 } 226 227 static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, 228 struct bch_replicas_entry_v1 *search) 229 { 230 int idx, entry_size = replicas_entry_bytes(search); 231 232 if (unlikely(entry_size > r->entry_size)) 233 return -1; 234 235 #define entry_cmp(_l, _r) memcmp(_l, _r, entry_size) 236 idx = eytzinger0_find(r->entries, r->nr, r->entry_size, 237 entry_cmp, search); 238 #undef entry_cmp 239 240 return idx < r->nr ? idx : -1; 241 } 242 243 int bch2_replicas_entry_idx(struct bch_fs *c, 244 struct bch_replicas_entry_v1 *search) 245 { 246 bch2_replicas_entry_sort(search); 247 248 return __replicas_entry_idx(&c->replicas, search); 249 } 250 251 static bool __replicas_has_entry(struct bch_replicas_cpu *r, 252 struct bch_replicas_entry_v1 *search) 253 { 254 return __replicas_entry_idx(r, search) >= 0; 255 } 256 257 bool bch2_replicas_marked_locked(struct bch_fs *c, 258 struct bch_replicas_entry_v1 *search) 259 { 260 verify_replicas_entry(search); 261 262 return !search->nr_devs || 263 (__replicas_has_entry(&c->replicas, search) && 264 (likely((!c->replicas_gc.entries)) || 265 __replicas_has_entry(&c->replicas_gc, search))); 266 } 267 268 bool bch2_replicas_marked(struct bch_fs *c, 269 struct bch_replicas_entry_v1 *search) 270 { 271 percpu_down_read(&c->mark_lock); 272 bool ret = bch2_replicas_marked_locked(c, search); 273 percpu_up_read(&c->mark_lock); 274 275 return ret; 276 } 277 278 noinline 279 static int bch2_mark_replicas_slowpath(struct bch_fs *c, 280 struct bch_replicas_entry_v1 *new_entry) 281 { 282 struct bch_replicas_cpu new_r, new_gc; 283 int ret = 0; 284 285 verify_replicas_entry(new_entry); 286 287 memset(&new_r, 0, sizeof(new_r)); 288 memset(&new_gc, 0, sizeof(new_gc)); 289 290 mutex_lock(&c->sb_lock); 291 292 if (c->replicas_gc.entries && 293 !__replicas_has_entry(&c->replicas_gc, new_entry)) { 294 new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); 295 if (!new_gc.entries) { 296 ret = -BCH_ERR_ENOMEM_cpu_replicas; 297 goto err; 298 } 299 } 300 301 if (!__replicas_has_entry(&c->replicas, new_entry)) { 302 new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); 303 if (!new_r.entries) { 304 ret = -BCH_ERR_ENOMEM_cpu_replicas; 305 goto err; 306 } 307 308 ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); 309 if (ret) 310 goto err; 311 } 312 313 if (!new_r.entries && 314 !new_gc.entries) 315 goto out; 316 317 /* allocations done, now commit: */ 318 319 if (new_r.entries) 320 bch2_write_super(c); 321 322 /* don't update in memory replicas until changes are persistent */ 323 percpu_down_write(&c->mark_lock); 324 if (new_r.entries) 325 swap(c->replicas, new_r); 326 if (new_gc.entries) 327 swap(new_gc, c->replicas_gc); 328 percpu_up_write(&c->mark_lock); 329 out: 330 mutex_unlock(&c->sb_lock); 331 332 kfree(new_r.entries); 333 kfree(new_gc.entries); 334 335 return ret; 336 err: 337 bch_err_msg(c, ret, "adding replicas entry"); 338 goto out; 339 } 340 341 int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) 342 { 343 return likely(bch2_replicas_marked(c, r)) 344 ? 0 : bch2_mark_replicas_slowpath(c, r); 345 } 346 347 /* 348 * Old replicas_gc mechanism: only used for journal replicas entries now, should 349 * die at some point: 350 */ 351 352 int bch2_replicas_gc_end(struct bch_fs *c, int ret) 353 { 354 lockdep_assert_held(&c->replicas_gc_lock); 355 356 mutex_lock(&c->sb_lock); 357 percpu_down_write(&c->mark_lock); 358 359 ret = ret ?: 360 bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); 361 if (!ret) 362 swap(c->replicas, c->replicas_gc); 363 364 kfree(c->replicas_gc.entries); 365 c->replicas_gc.entries = NULL; 366 367 percpu_up_write(&c->mark_lock); 368 369 if (!ret) 370 bch2_write_super(c); 371 372 mutex_unlock(&c->sb_lock); 373 374 return ret; 375 } 376 377 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) 378 { 379 struct bch_replicas_entry_v1 *e; 380 unsigned i = 0; 381 382 lockdep_assert_held(&c->replicas_gc_lock); 383 384 mutex_lock(&c->sb_lock); 385 BUG_ON(c->replicas_gc.entries); 386 387 c->replicas_gc.nr = 0; 388 c->replicas_gc.entry_size = 0; 389 390 for_each_cpu_replicas_entry(&c->replicas, e) { 391 /* Preserve unknown data types */ 392 if (e->data_type >= BCH_DATA_NR || 393 !((1 << e->data_type) & typemask)) { 394 c->replicas_gc.nr++; 395 c->replicas_gc.entry_size = 396 max_t(unsigned, c->replicas_gc.entry_size, 397 replicas_entry_bytes(e)); 398 } 399 } 400 401 c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, 402 c->replicas_gc.entry_size, 403 GFP_KERNEL); 404 if (!c->replicas_gc.entries) { 405 mutex_unlock(&c->sb_lock); 406 bch_err(c, "error allocating c->replicas_gc"); 407 return -BCH_ERR_ENOMEM_replicas_gc; 408 } 409 410 for_each_cpu_replicas_entry(&c->replicas, e) 411 if (e->data_type >= BCH_DATA_NR || 412 !((1 << e->data_type) & typemask)) 413 memcpy(cpu_replicas_entry(&c->replicas_gc, i++), 414 e, c->replicas_gc.entry_size); 415 416 bch2_cpu_replicas_sort(&c->replicas_gc); 417 mutex_unlock(&c->sb_lock); 418 419 return 0; 420 } 421 422 /* 423 * New much simpler mechanism for clearing out unneeded replicas entries - drop 424 * replicas entries that have 0 sectors used. 425 * 426 * However, we don't track sector counts for journal usage, so this doesn't drop 427 * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism 428 * is retained for that. 429 */ 430 int bch2_replicas_gc2(struct bch_fs *c) 431 { 432 struct bch_replicas_cpu new = { 0 }; 433 unsigned nr; 434 int ret = 0; 435 436 bch2_accounting_mem_gc(c); 437 retry: 438 nr = READ_ONCE(c->replicas.nr); 439 new.entry_size = READ_ONCE(c->replicas.entry_size); 440 new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); 441 if (!new.entries) { 442 bch_err(c, "error allocating c->replicas_gc"); 443 return -BCH_ERR_ENOMEM_replicas_gc; 444 } 445 446 mutex_lock(&c->sb_lock); 447 percpu_down_write(&c->mark_lock); 448 449 if (nr != c->replicas.nr || 450 new.entry_size != c->replicas.entry_size) { 451 percpu_up_write(&c->mark_lock); 452 mutex_unlock(&c->sb_lock); 453 kfree(new.entries); 454 goto retry; 455 } 456 457 for (unsigned i = 0; i < c->replicas.nr; i++) { 458 struct bch_replicas_entry_v1 *e = 459 cpu_replicas_entry(&c->replicas, i); 460 461 struct disk_accounting_pos k = { 462 .type = BCH_DISK_ACCOUNTING_replicas, 463 }; 464 465 unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), 466 "embedded variable length struct"); 467 468 struct bpos p = disk_accounting_pos_to_bpos(&k); 469 470 struct bch_accounting_mem *acc = &c->accounting; 471 bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), 472 accounting_pos_cmp, &p) >= acc->k.nr; 473 474 if (e->data_type == BCH_DATA_journal || !kill) 475 memcpy(cpu_replicas_entry(&new, new.nr++), 476 e, new.entry_size); 477 } 478 479 bch2_cpu_replicas_sort(&new); 480 481 ret = bch2_cpu_replicas_to_sb_replicas(c, &new); 482 483 if (!ret) 484 swap(c->replicas, new); 485 486 kfree(new.entries); 487 488 percpu_up_write(&c->mark_lock); 489 490 if (!ret) 491 bch2_write_super(c); 492 493 mutex_unlock(&c->sb_lock); 494 495 return ret; 496 } 497 498 /* Replicas tracking - superblock: */ 499 500 static int 501 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, 502 struct bch_replicas_cpu *cpu_r) 503 { 504 struct bch_replicas_entry_v1 *e, *dst; 505 unsigned nr = 0, entry_size = 0, idx = 0; 506 507 for_each_replicas_entry(sb_r, e) { 508 entry_size = max_t(unsigned, entry_size, 509 replicas_entry_bytes(e)); 510 nr++; 511 } 512 513 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 514 if (!cpu_r->entries) 515 return -BCH_ERR_ENOMEM_cpu_replicas; 516 517 cpu_r->nr = nr; 518 cpu_r->entry_size = entry_size; 519 520 for_each_replicas_entry(sb_r, e) { 521 dst = cpu_replicas_entry(cpu_r, idx++); 522 memcpy(dst, e, replicas_entry_bytes(e)); 523 bch2_replicas_entry_sort(dst); 524 } 525 526 return 0; 527 } 528 529 static int 530 __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, 531 struct bch_replicas_cpu *cpu_r) 532 { 533 struct bch_replicas_entry_v0 *e; 534 unsigned nr = 0, entry_size = 0, idx = 0; 535 536 for_each_replicas_entry(sb_r, e) { 537 entry_size = max_t(unsigned, entry_size, 538 replicas_entry_bytes(e)); 539 nr++; 540 } 541 542 entry_size += sizeof(struct bch_replicas_entry_v1) - 543 sizeof(struct bch_replicas_entry_v0); 544 545 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 546 if (!cpu_r->entries) 547 return -BCH_ERR_ENOMEM_cpu_replicas; 548 549 cpu_r->nr = nr; 550 cpu_r->entry_size = entry_size; 551 552 for_each_replicas_entry(sb_r, e) { 553 struct bch_replicas_entry_v1 *dst = 554 cpu_replicas_entry(cpu_r, idx++); 555 556 dst->data_type = e->data_type; 557 dst->nr_devs = e->nr_devs; 558 dst->nr_required = 1; 559 memcpy(dst->devs, e->devs, e->nr_devs); 560 bch2_replicas_entry_sort(dst); 561 } 562 563 return 0; 564 } 565 566 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) 567 { 568 struct bch_sb_field_replicas *sb_v1; 569 struct bch_sb_field_replicas_v0 *sb_v0; 570 struct bch_replicas_cpu new_r = { 0, 0, NULL }; 571 int ret = 0; 572 573 if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas))) 574 ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); 575 else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0))) 576 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); 577 if (ret) 578 return ret; 579 580 bch2_cpu_replicas_sort(&new_r); 581 582 percpu_down_write(&c->mark_lock); 583 swap(c->replicas, new_r); 584 percpu_up_write(&c->mark_lock); 585 586 kfree(new_r.entries); 587 588 return 0; 589 } 590 591 static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, 592 struct bch_replicas_cpu *r) 593 { 594 struct bch_sb_field_replicas_v0 *sb_r; 595 struct bch_replicas_entry_v0 *dst; 596 struct bch_replicas_entry_v1 *src; 597 size_t bytes; 598 599 bytes = sizeof(struct bch_sb_field_replicas); 600 601 for_each_cpu_replicas_entry(r, src) 602 bytes += replicas_entry_bytes(src) - 1; 603 604 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0, 605 DIV_ROUND_UP(bytes, sizeof(u64))); 606 if (!sb_r) 607 return -BCH_ERR_ENOSPC_sb_replicas; 608 609 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); 610 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0); 611 612 memset(&sb_r->entries, 0, 613 vstruct_end(&sb_r->field) - 614 (void *) &sb_r->entries); 615 616 dst = sb_r->entries; 617 for_each_cpu_replicas_entry(r, src) { 618 dst->data_type = src->data_type; 619 dst->nr_devs = src->nr_devs; 620 memcpy(dst->devs, src->devs, src->nr_devs); 621 622 dst = replicas_entry_next(dst); 623 624 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 625 } 626 627 return 0; 628 } 629 630 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, 631 struct bch_replicas_cpu *r) 632 { 633 struct bch_sb_field_replicas *sb_r; 634 struct bch_replicas_entry_v1 *dst, *src; 635 bool need_v1 = false; 636 size_t bytes; 637 638 bytes = sizeof(struct bch_sb_field_replicas); 639 640 for_each_cpu_replicas_entry(r, src) { 641 bytes += replicas_entry_bytes(src); 642 if (src->nr_required != 1) 643 need_v1 = true; 644 } 645 646 if (!need_v1) 647 return bch2_cpu_replicas_to_sb_replicas_v0(c, r); 648 649 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas, 650 DIV_ROUND_UP(bytes, sizeof(u64))); 651 if (!sb_r) 652 return -BCH_ERR_ENOSPC_sb_replicas; 653 654 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); 655 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas); 656 657 memset(&sb_r->entries, 0, 658 vstruct_end(&sb_r->field) - 659 (void *) &sb_r->entries); 660 661 dst = sb_r->entries; 662 for_each_cpu_replicas_entry(r, src) { 663 memcpy(dst, src, replicas_entry_bytes(src)); 664 665 dst = replicas_entry_next(dst); 666 667 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 668 } 669 670 return 0; 671 } 672 673 static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, 674 struct bch_sb *sb, 675 struct printbuf *err) 676 { 677 unsigned i; 678 679 sort_r(cpu_r->entries, 680 cpu_r->nr, 681 cpu_r->entry_size, 682 bch2_memcmp, NULL, 683 (void *)(size_t)cpu_r->entry_size); 684 685 for (i = 0; i < cpu_r->nr; i++) { 686 struct bch_replicas_entry_v1 *e = 687 cpu_replicas_entry(cpu_r, i); 688 689 int ret = bch2_replicas_entry_validate_locked(e, sb, err); 690 if (ret) 691 return ret; 692 693 if (i + 1 < cpu_r->nr) { 694 struct bch_replicas_entry_v1 *n = 695 cpu_replicas_entry(cpu_r, i + 1); 696 697 BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); 698 699 if (!memcmp(e, n, cpu_r->entry_size)) { 700 prt_printf(err, "duplicate replicas entry "); 701 bch2_replicas_entry_to_text(err, e); 702 return -BCH_ERR_invalid_sb_replicas; 703 } 704 } 705 } 706 707 return 0; 708 } 709 710 static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f, 711 enum bch_validate_flags flags, struct printbuf *err) 712 { 713 struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); 714 struct bch_replicas_cpu cpu_r; 715 int ret; 716 717 ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r); 718 if (ret) 719 return ret; 720 721 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 722 kfree(cpu_r.entries); 723 return ret; 724 } 725 726 static void bch2_sb_replicas_to_text(struct printbuf *out, 727 struct bch_sb *sb, 728 struct bch_sb_field *f) 729 { 730 struct bch_sb_field_replicas *r = field_to_type(f, replicas); 731 struct bch_replicas_entry_v1 *e; 732 bool first = true; 733 734 for_each_replicas_entry(r, e) { 735 if (!first) 736 prt_printf(out, " "); 737 first = false; 738 739 bch2_replicas_entry_to_text(out, e); 740 } 741 prt_newline(out); 742 } 743 744 const struct bch_sb_field_ops bch_sb_field_ops_replicas = { 745 .validate = bch2_sb_replicas_validate, 746 .to_text = bch2_sb_replicas_to_text, 747 }; 748 749 static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f, 750 enum bch_validate_flags flags, struct printbuf *err) 751 { 752 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 753 struct bch_replicas_cpu cpu_r; 754 int ret; 755 756 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r); 757 if (ret) 758 return ret; 759 760 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 761 kfree(cpu_r.entries); 762 return ret; 763 } 764 765 static void bch2_sb_replicas_v0_to_text(struct printbuf *out, 766 struct bch_sb *sb, 767 struct bch_sb_field *f) 768 { 769 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 770 struct bch_replicas_entry_v0 *e; 771 bool first = true; 772 773 for_each_replicas_entry(sb_r, e) { 774 if (!first) 775 prt_printf(out, " "); 776 first = false; 777 778 bch2_replicas_entry_v0_to_text(out, e); 779 } 780 prt_newline(out); 781 } 782 783 const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { 784 .validate = bch2_sb_replicas_v0_validate, 785 .to_text = bch2_sb_replicas_v0_to_text, 786 }; 787 788 /* Query replicas: */ 789 790 bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, 791 unsigned flags, bool print) 792 { 793 struct bch_replicas_entry_v1 *e; 794 bool ret = true; 795 796 percpu_down_read(&c->mark_lock); 797 for_each_cpu_replicas_entry(&c->replicas, e) { 798 unsigned nr_online = 0, nr_failed = 0, dflags = 0; 799 bool metadata = e->data_type < BCH_DATA_user; 800 801 if (e->data_type == BCH_DATA_cached) 802 continue; 803 804 rcu_read_lock(); 805 for (unsigned i = 0; i < e->nr_devs; i++) { 806 nr_online += test_bit(e->devs[i], devs.d); 807 808 struct bch_dev *ca = bch2_dev_rcu_noerror(c, e->devs[i]); 809 nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed; 810 } 811 rcu_read_unlock(); 812 813 if (nr_online + nr_failed == e->nr_devs) 814 continue; 815 816 if (nr_online < e->nr_required) 817 dflags |= metadata 818 ? BCH_FORCE_IF_METADATA_LOST 819 : BCH_FORCE_IF_DATA_LOST; 820 821 if (nr_online < e->nr_devs) 822 dflags |= metadata 823 ? BCH_FORCE_IF_METADATA_DEGRADED 824 : BCH_FORCE_IF_DATA_DEGRADED; 825 826 if (dflags & ~flags) { 827 if (print) { 828 struct printbuf buf = PRINTBUF; 829 830 bch2_replicas_entry_to_text(&buf, e); 831 bch_err(c, "insufficient devices online (%u) for replicas entry %s", 832 nr_online, buf.buf); 833 printbuf_exit(&buf); 834 } 835 ret = false; 836 break; 837 } 838 839 } 840 percpu_up_read(&c->mark_lock); 841 842 return ret; 843 } 844 845 unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) 846 { 847 struct bch_sb_field_replicas *replicas; 848 struct bch_sb_field_replicas_v0 *replicas_v0; 849 unsigned data_has = 0; 850 851 replicas = bch2_sb_field_get(sb, replicas); 852 replicas_v0 = bch2_sb_field_get(sb, replicas_v0); 853 854 if (replicas) { 855 struct bch_replicas_entry_v1 *r; 856 857 for_each_replicas_entry(replicas, r) { 858 if (r->data_type >= sizeof(data_has) * 8) 859 continue; 860 861 for (unsigned i = 0; i < r->nr_devs; i++) 862 if (r->devs[i] == dev) 863 data_has |= 1 << r->data_type; 864 } 865 866 } else if (replicas_v0) { 867 struct bch_replicas_entry_v0 *r; 868 869 for_each_replicas_entry_v0(replicas_v0, r) { 870 if (r->data_type >= sizeof(data_has) * 8) 871 continue; 872 873 for (unsigned i = 0; i < r->nr_devs; i++) 874 if (r->devs[i] == dev) 875 data_has |= 1 << r->data_type; 876 } 877 } 878 879 880 return data_has; 881 } 882 883 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) 884 { 885 mutex_lock(&c->sb_lock); 886 unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); 887 mutex_unlock(&c->sb_lock); 888 889 return ret; 890 } 891 892 void bch2_fs_replicas_exit(struct bch_fs *c) 893 { 894 kfree(c->replicas.entries); 895 kfree(c->replicas_gc.entries); 896 } 897