1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "buckets.h" 5 #include "disk_accounting.h" 6 #include "journal.h" 7 #include "replicas.h" 8 #include "super-io.h" 9 10 #include <linux/sort.h> 11 12 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, 13 struct bch_replicas_cpu *); 14 15 /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ 16 static int bch2_memcmp(const void *l, const void *r, const void *priv) 17 { 18 size_t size = (size_t) priv; 19 return memcmp(l, r, size); 20 } 21 22 /* Replicas tracking - in memory: */ 23 24 static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) 25 { 26 #ifdef CONFIG_BCACHEFS_DEBUG 27 BUG_ON(e->data_type >= BCH_DATA_NR); 28 BUG_ON(!e->nr_devs); 29 BUG_ON(e->nr_required > 1 && 30 e->nr_required >= e->nr_devs); 31 32 for (unsigned i = 0; i + 1 < e->nr_devs; i++) 33 BUG_ON(e->devs[i] >= e->devs[i + 1]); 34 #endif 35 } 36 37 void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) 38 { 39 bubble_sort(e->devs, e->nr_devs, u8_cmp); 40 } 41 42 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) 43 { 44 eytzinger0_sort_r(r->entries, r->nr, r->entry_size, 45 bch2_memcmp, NULL, (void *)(size_t)r->entry_size); 46 } 47 48 static void bch2_replicas_entry_v0_to_text(struct printbuf *out, 49 struct bch_replicas_entry_v0 *e) 50 { 51 bch2_prt_data_type(out, e->data_type); 52 53 prt_printf(out, ": %u [", e->nr_devs); 54 for (unsigned i = 0; i < e->nr_devs; i++) 55 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 56 prt_printf(out, "]"); 57 } 58 59 void bch2_replicas_entry_to_text(struct printbuf *out, 60 struct bch_replicas_entry_v1 *e) 61 { 62 bch2_prt_data_type(out, e->data_type); 63 64 prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); 65 for (unsigned i = 0; i < e->nr_devs; i++) 66 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 67 prt_printf(out, "]"); 68 } 69 70 int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, 71 struct bch_sb *sb, 72 struct printbuf *err) 73 { 74 if (!r->nr_devs) { 75 prt_printf(err, "no devices in entry "); 76 goto bad; 77 } 78 79 if (r->nr_required > 1 && 80 r->nr_required >= r->nr_devs) { 81 prt_printf(err, "bad nr_required in entry "); 82 goto bad; 83 } 84 85 for (unsigned i = 0; i < r->nr_devs; i++) 86 if (!bch2_member_exists(sb, r->devs[i])) { 87 prt_printf(err, "invalid device %u in entry ", r->devs[i]); 88 goto bad; 89 } 90 91 return 0; 92 bad: 93 bch2_replicas_entry_to_text(err, r); 94 return -BCH_ERR_invalid_replicas_entry; 95 } 96 97 void bch2_cpu_replicas_to_text(struct printbuf *out, 98 struct bch_replicas_cpu *r) 99 { 100 struct bch_replicas_entry_v1 *e; 101 bool first = true; 102 103 for_each_cpu_replicas_entry(r, e) { 104 if (!first) 105 prt_printf(out, " "); 106 first = false; 107 108 bch2_replicas_entry_to_text(out, e); 109 } 110 } 111 112 static void extent_to_replicas(struct bkey_s_c k, 113 struct bch_replicas_entry_v1 *r) 114 { 115 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 116 const union bch_extent_entry *entry; 117 struct extent_ptr_decoded p; 118 119 r->nr_required = 1; 120 121 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 122 if (p.ptr.cached) 123 continue; 124 125 if (!p.has_ec) 126 r->devs[r->nr_devs++] = p.ptr.dev; 127 else 128 r->nr_required = 0; 129 } 130 } 131 132 static void stripe_to_replicas(struct bkey_s_c k, 133 struct bch_replicas_entry_v1 *r) 134 { 135 struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); 136 const struct bch_extent_ptr *ptr; 137 138 r->nr_required = s.v->nr_blocks - s.v->nr_redundant; 139 140 for (ptr = s.v->ptrs; 141 ptr < s.v->ptrs + s.v->nr_blocks; 142 ptr++) 143 r->devs[r->nr_devs++] = ptr->dev; 144 } 145 146 void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e, 147 struct bkey_s_c k) 148 { 149 e->nr_devs = 0; 150 151 switch (k.k->type) { 152 case KEY_TYPE_btree_ptr: 153 case KEY_TYPE_btree_ptr_v2: 154 e->data_type = BCH_DATA_btree; 155 extent_to_replicas(k, e); 156 break; 157 case KEY_TYPE_extent: 158 case KEY_TYPE_reflink_v: 159 e->data_type = BCH_DATA_user; 160 extent_to_replicas(k, e); 161 break; 162 case KEY_TYPE_stripe: 163 e->data_type = BCH_DATA_parity; 164 stripe_to_replicas(k, e); 165 break; 166 } 167 168 bch2_replicas_entry_sort(e); 169 } 170 171 void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, 172 enum bch_data_type data_type, 173 struct bch_devs_list devs) 174 { 175 BUG_ON(!data_type || 176 data_type == BCH_DATA_sb || 177 data_type >= BCH_DATA_NR); 178 179 e->data_type = data_type; 180 e->nr_devs = 0; 181 e->nr_required = 1; 182 183 darray_for_each(devs, i) 184 e->devs[e->nr_devs++] = *i; 185 186 bch2_replicas_entry_sort(e); 187 } 188 189 static struct bch_replicas_cpu 190 cpu_replicas_add_entry(struct bch_fs *c, 191 struct bch_replicas_cpu *old, 192 struct bch_replicas_entry_v1 *new_entry) 193 { 194 struct bch_replicas_cpu new = { 195 .nr = old->nr + 1, 196 .entry_size = max_t(unsigned, old->entry_size, 197 replicas_entry_bytes(new_entry)), 198 }; 199 200 new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); 201 if (!new.entries) 202 return new; 203 204 for (unsigned i = 0; i < old->nr; i++) 205 memcpy(cpu_replicas_entry(&new, i), 206 cpu_replicas_entry(old, i), 207 old->entry_size); 208 209 memcpy(cpu_replicas_entry(&new, old->nr), 210 new_entry, 211 replicas_entry_bytes(new_entry)); 212 213 bch2_cpu_replicas_sort(&new); 214 return new; 215 } 216 217 static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, 218 struct bch_replicas_entry_v1 *search) 219 { 220 int idx, entry_size = replicas_entry_bytes(search); 221 222 if (unlikely(entry_size > r->entry_size)) 223 return -1; 224 225 #define entry_cmp(_l, _r) memcmp(_l, _r, entry_size) 226 idx = eytzinger0_find(r->entries, r->nr, r->entry_size, 227 entry_cmp, search); 228 #undef entry_cmp 229 230 return idx < r->nr ? idx : -1; 231 } 232 233 int bch2_replicas_entry_idx(struct bch_fs *c, 234 struct bch_replicas_entry_v1 *search) 235 { 236 bch2_replicas_entry_sort(search); 237 238 return __replicas_entry_idx(&c->replicas, search); 239 } 240 241 static bool __replicas_has_entry(struct bch_replicas_cpu *r, 242 struct bch_replicas_entry_v1 *search) 243 { 244 return __replicas_entry_idx(r, search) >= 0; 245 } 246 247 bool bch2_replicas_marked_locked(struct bch_fs *c, 248 struct bch_replicas_entry_v1 *search) 249 { 250 verify_replicas_entry(search); 251 252 return !search->nr_devs || 253 (__replicas_has_entry(&c->replicas, search) && 254 (likely((!c->replicas_gc.entries)) || 255 __replicas_has_entry(&c->replicas_gc, search))); 256 } 257 258 bool bch2_replicas_marked(struct bch_fs *c, 259 struct bch_replicas_entry_v1 *search) 260 { 261 percpu_down_read(&c->mark_lock); 262 bool ret = bch2_replicas_marked_locked(c, search); 263 percpu_up_read(&c->mark_lock); 264 265 return ret; 266 } 267 268 noinline 269 static int bch2_mark_replicas_slowpath(struct bch_fs *c, 270 struct bch_replicas_entry_v1 *new_entry) 271 { 272 struct bch_replicas_cpu new_r, new_gc; 273 int ret = 0; 274 275 verify_replicas_entry(new_entry); 276 277 memset(&new_r, 0, sizeof(new_r)); 278 memset(&new_gc, 0, sizeof(new_gc)); 279 280 mutex_lock(&c->sb_lock); 281 282 if (c->replicas_gc.entries && 283 !__replicas_has_entry(&c->replicas_gc, new_entry)) { 284 new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); 285 if (!new_gc.entries) { 286 ret = -BCH_ERR_ENOMEM_cpu_replicas; 287 goto err; 288 } 289 } 290 291 if (!__replicas_has_entry(&c->replicas, new_entry)) { 292 new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); 293 if (!new_r.entries) { 294 ret = -BCH_ERR_ENOMEM_cpu_replicas; 295 goto err; 296 } 297 298 ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); 299 if (ret) 300 goto err; 301 } 302 303 if (!new_r.entries && 304 !new_gc.entries) 305 goto out; 306 307 /* allocations done, now commit: */ 308 309 if (new_r.entries) 310 bch2_write_super(c); 311 312 /* don't update in memory replicas until changes are persistent */ 313 percpu_down_write(&c->mark_lock); 314 if (new_r.entries) 315 swap(c->replicas, new_r); 316 if (new_gc.entries) 317 swap(new_gc, c->replicas_gc); 318 percpu_up_write(&c->mark_lock); 319 out: 320 mutex_unlock(&c->sb_lock); 321 322 kfree(new_r.entries); 323 kfree(new_gc.entries); 324 325 return ret; 326 err: 327 bch_err_msg(c, ret, "adding replicas entry"); 328 goto out; 329 } 330 331 int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) 332 { 333 return likely(bch2_replicas_marked(c, r)) 334 ? 0 : bch2_mark_replicas_slowpath(c, r); 335 } 336 337 /* 338 * Old replicas_gc mechanism: only used for journal replicas entries now, should 339 * die at some point: 340 */ 341 342 int bch2_replicas_gc_end(struct bch_fs *c, int ret) 343 { 344 lockdep_assert_held(&c->replicas_gc_lock); 345 346 mutex_lock(&c->sb_lock); 347 percpu_down_write(&c->mark_lock); 348 349 ret = ret ?: 350 bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); 351 if (!ret) 352 swap(c->replicas, c->replicas_gc); 353 354 kfree(c->replicas_gc.entries); 355 c->replicas_gc.entries = NULL; 356 357 percpu_up_write(&c->mark_lock); 358 359 if (!ret) 360 bch2_write_super(c); 361 362 mutex_unlock(&c->sb_lock); 363 364 return ret; 365 } 366 367 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) 368 { 369 struct bch_replicas_entry_v1 *e; 370 unsigned i = 0; 371 372 lockdep_assert_held(&c->replicas_gc_lock); 373 374 mutex_lock(&c->sb_lock); 375 BUG_ON(c->replicas_gc.entries); 376 377 c->replicas_gc.nr = 0; 378 c->replicas_gc.entry_size = 0; 379 380 for_each_cpu_replicas_entry(&c->replicas, e) { 381 /* Preserve unknown data types */ 382 if (e->data_type >= BCH_DATA_NR || 383 !((1 << e->data_type) & typemask)) { 384 c->replicas_gc.nr++; 385 c->replicas_gc.entry_size = 386 max_t(unsigned, c->replicas_gc.entry_size, 387 replicas_entry_bytes(e)); 388 } 389 } 390 391 c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, 392 c->replicas_gc.entry_size, 393 GFP_KERNEL); 394 if (!c->replicas_gc.entries) { 395 mutex_unlock(&c->sb_lock); 396 bch_err(c, "error allocating c->replicas_gc"); 397 return -BCH_ERR_ENOMEM_replicas_gc; 398 } 399 400 for_each_cpu_replicas_entry(&c->replicas, e) 401 if (e->data_type >= BCH_DATA_NR || 402 !((1 << e->data_type) & typemask)) 403 memcpy(cpu_replicas_entry(&c->replicas_gc, i++), 404 e, c->replicas_gc.entry_size); 405 406 bch2_cpu_replicas_sort(&c->replicas_gc); 407 mutex_unlock(&c->sb_lock); 408 409 return 0; 410 } 411 412 /* 413 * New much simpler mechanism for clearing out unneeded replicas entries - drop 414 * replicas entries that have 0 sectors used. 415 * 416 * However, we don't track sector counts for journal usage, so this doesn't drop 417 * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism 418 * is retained for that. 419 */ 420 int bch2_replicas_gc2(struct bch_fs *c) 421 { 422 struct bch_replicas_cpu new = { 0 }; 423 unsigned nr; 424 int ret = 0; 425 426 bch2_accounting_mem_gc(c); 427 retry: 428 nr = READ_ONCE(c->replicas.nr); 429 new.entry_size = READ_ONCE(c->replicas.entry_size); 430 new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); 431 if (!new.entries) { 432 bch_err(c, "error allocating c->replicas_gc"); 433 return -BCH_ERR_ENOMEM_replicas_gc; 434 } 435 436 mutex_lock(&c->sb_lock); 437 percpu_down_write(&c->mark_lock); 438 439 if (nr != c->replicas.nr || 440 new.entry_size != c->replicas.entry_size) { 441 percpu_up_write(&c->mark_lock); 442 mutex_unlock(&c->sb_lock); 443 kfree(new.entries); 444 goto retry; 445 } 446 447 for (unsigned i = 0; i < c->replicas.nr; i++) { 448 struct bch_replicas_entry_v1 *e = 449 cpu_replicas_entry(&c->replicas, i); 450 451 struct disk_accounting_pos k = { 452 .type = BCH_DISK_ACCOUNTING_replicas, 453 }; 454 455 memcpy(&k.replicas, e, replicas_entry_bytes(e)); 456 457 struct bpos p = disk_accounting_pos_to_bpos(&k); 458 459 struct bch_accounting_mem *acc = &c->accounting; 460 bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), 461 accounting_pos_cmp, &p) >= acc->k.nr; 462 463 if (e->data_type == BCH_DATA_journal || !kill) 464 memcpy(cpu_replicas_entry(&new, new.nr++), 465 e, new.entry_size); 466 } 467 468 bch2_cpu_replicas_sort(&new); 469 470 ret = bch2_cpu_replicas_to_sb_replicas(c, &new); 471 472 if (!ret) 473 swap(c->replicas, new); 474 475 kfree(new.entries); 476 477 percpu_up_write(&c->mark_lock); 478 479 if (!ret) 480 bch2_write_super(c); 481 482 mutex_unlock(&c->sb_lock); 483 484 return ret; 485 } 486 487 /* Replicas tracking - superblock: */ 488 489 static int 490 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, 491 struct bch_replicas_cpu *cpu_r) 492 { 493 struct bch_replicas_entry_v1 *e, *dst; 494 unsigned nr = 0, entry_size = 0, idx = 0; 495 496 for_each_replicas_entry(sb_r, e) { 497 entry_size = max_t(unsigned, entry_size, 498 replicas_entry_bytes(e)); 499 nr++; 500 } 501 502 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 503 if (!cpu_r->entries) 504 return -BCH_ERR_ENOMEM_cpu_replicas; 505 506 cpu_r->nr = nr; 507 cpu_r->entry_size = entry_size; 508 509 for_each_replicas_entry(sb_r, e) { 510 dst = cpu_replicas_entry(cpu_r, idx++); 511 memcpy(dst, e, replicas_entry_bytes(e)); 512 bch2_replicas_entry_sort(dst); 513 } 514 515 return 0; 516 } 517 518 static int 519 __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, 520 struct bch_replicas_cpu *cpu_r) 521 { 522 struct bch_replicas_entry_v0 *e; 523 unsigned nr = 0, entry_size = 0, idx = 0; 524 525 for_each_replicas_entry(sb_r, e) { 526 entry_size = max_t(unsigned, entry_size, 527 replicas_entry_bytes(e)); 528 nr++; 529 } 530 531 entry_size += sizeof(struct bch_replicas_entry_v1) - 532 sizeof(struct bch_replicas_entry_v0); 533 534 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 535 if (!cpu_r->entries) 536 return -BCH_ERR_ENOMEM_cpu_replicas; 537 538 cpu_r->nr = nr; 539 cpu_r->entry_size = entry_size; 540 541 for_each_replicas_entry(sb_r, e) { 542 struct bch_replicas_entry_v1 *dst = 543 cpu_replicas_entry(cpu_r, idx++); 544 545 dst->data_type = e->data_type; 546 dst->nr_devs = e->nr_devs; 547 dst->nr_required = 1; 548 memcpy(dst->devs, e->devs, e->nr_devs); 549 bch2_replicas_entry_sort(dst); 550 } 551 552 return 0; 553 } 554 555 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) 556 { 557 struct bch_sb_field_replicas *sb_v1; 558 struct bch_sb_field_replicas_v0 *sb_v0; 559 struct bch_replicas_cpu new_r = { 0, 0, NULL }; 560 int ret = 0; 561 562 if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas))) 563 ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); 564 else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0))) 565 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); 566 if (ret) 567 return ret; 568 569 bch2_cpu_replicas_sort(&new_r); 570 571 percpu_down_write(&c->mark_lock); 572 swap(c->replicas, new_r); 573 percpu_up_write(&c->mark_lock); 574 575 kfree(new_r.entries); 576 577 return 0; 578 } 579 580 static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, 581 struct bch_replicas_cpu *r) 582 { 583 struct bch_sb_field_replicas_v0 *sb_r; 584 struct bch_replicas_entry_v0 *dst; 585 struct bch_replicas_entry_v1 *src; 586 size_t bytes; 587 588 bytes = sizeof(struct bch_sb_field_replicas); 589 590 for_each_cpu_replicas_entry(r, src) 591 bytes += replicas_entry_bytes(src) - 1; 592 593 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0, 594 DIV_ROUND_UP(bytes, sizeof(u64))); 595 if (!sb_r) 596 return -BCH_ERR_ENOSPC_sb_replicas; 597 598 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); 599 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0); 600 601 memset(&sb_r->entries, 0, 602 vstruct_end(&sb_r->field) - 603 (void *) &sb_r->entries); 604 605 dst = sb_r->entries; 606 for_each_cpu_replicas_entry(r, src) { 607 dst->data_type = src->data_type; 608 dst->nr_devs = src->nr_devs; 609 memcpy(dst->devs, src->devs, src->nr_devs); 610 611 dst = replicas_entry_next(dst); 612 613 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 614 } 615 616 return 0; 617 } 618 619 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, 620 struct bch_replicas_cpu *r) 621 { 622 struct bch_sb_field_replicas *sb_r; 623 struct bch_replicas_entry_v1 *dst, *src; 624 bool need_v1 = false; 625 size_t bytes; 626 627 bytes = sizeof(struct bch_sb_field_replicas); 628 629 for_each_cpu_replicas_entry(r, src) { 630 bytes += replicas_entry_bytes(src); 631 if (src->nr_required != 1) 632 need_v1 = true; 633 } 634 635 if (!need_v1) 636 return bch2_cpu_replicas_to_sb_replicas_v0(c, r); 637 638 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas, 639 DIV_ROUND_UP(bytes, sizeof(u64))); 640 if (!sb_r) 641 return -BCH_ERR_ENOSPC_sb_replicas; 642 643 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); 644 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas); 645 646 memset(&sb_r->entries, 0, 647 vstruct_end(&sb_r->field) - 648 (void *) &sb_r->entries); 649 650 dst = sb_r->entries; 651 for_each_cpu_replicas_entry(r, src) { 652 memcpy(dst, src, replicas_entry_bytes(src)); 653 654 dst = replicas_entry_next(dst); 655 656 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 657 } 658 659 return 0; 660 } 661 662 static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, 663 struct bch_sb *sb, 664 struct printbuf *err) 665 { 666 unsigned i; 667 668 sort_r(cpu_r->entries, 669 cpu_r->nr, 670 cpu_r->entry_size, 671 bch2_memcmp, NULL, 672 (void *)(size_t)cpu_r->entry_size); 673 674 for (i = 0; i < cpu_r->nr; i++) { 675 struct bch_replicas_entry_v1 *e = 676 cpu_replicas_entry(cpu_r, i); 677 678 int ret = bch2_replicas_entry_validate(e, sb, err); 679 if (ret) 680 return ret; 681 682 if (i + 1 < cpu_r->nr) { 683 struct bch_replicas_entry_v1 *n = 684 cpu_replicas_entry(cpu_r, i + 1); 685 686 BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); 687 688 if (!memcmp(e, n, cpu_r->entry_size)) { 689 prt_printf(err, "duplicate replicas entry "); 690 bch2_replicas_entry_to_text(err, e); 691 return -BCH_ERR_invalid_sb_replicas; 692 } 693 } 694 } 695 696 return 0; 697 } 698 699 static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f, 700 enum bch_validate_flags flags, struct printbuf *err) 701 { 702 struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); 703 struct bch_replicas_cpu cpu_r; 704 int ret; 705 706 ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r); 707 if (ret) 708 return ret; 709 710 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 711 kfree(cpu_r.entries); 712 return ret; 713 } 714 715 static void bch2_sb_replicas_to_text(struct printbuf *out, 716 struct bch_sb *sb, 717 struct bch_sb_field *f) 718 { 719 struct bch_sb_field_replicas *r = field_to_type(f, replicas); 720 struct bch_replicas_entry_v1 *e; 721 bool first = true; 722 723 for_each_replicas_entry(r, e) { 724 if (!first) 725 prt_printf(out, " "); 726 first = false; 727 728 bch2_replicas_entry_to_text(out, e); 729 } 730 prt_newline(out); 731 } 732 733 const struct bch_sb_field_ops bch_sb_field_ops_replicas = { 734 .validate = bch2_sb_replicas_validate, 735 .to_text = bch2_sb_replicas_to_text, 736 }; 737 738 static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f, 739 enum bch_validate_flags flags, struct printbuf *err) 740 { 741 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 742 struct bch_replicas_cpu cpu_r; 743 int ret; 744 745 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r); 746 if (ret) 747 return ret; 748 749 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 750 kfree(cpu_r.entries); 751 return ret; 752 } 753 754 static void bch2_sb_replicas_v0_to_text(struct printbuf *out, 755 struct bch_sb *sb, 756 struct bch_sb_field *f) 757 { 758 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 759 struct bch_replicas_entry_v0 *e; 760 bool first = true; 761 762 for_each_replicas_entry(sb_r, e) { 763 if (!first) 764 prt_printf(out, " "); 765 first = false; 766 767 bch2_replicas_entry_v0_to_text(out, e); 768 } 769 prt_newline(out); 770 } 771 772 const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { 773 .validate = bch2_sb_replicas_v0_validate, 774 .to_text = bch2_sb_replicas_v0_to_text, 775 }; 776 777 /* Query replicas: */ 778 779 bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, 780 unsigned flags, bool print) 781 { 782 struct bch_replicas_entry_v1 *e; 783 bool ret = true; 784 785 percpu_down_read(&c->mark_lock); 786 for_each_cpu_replicas_entry(&c->replicas, e) { 787 unsigned nr_online = 0, nr_failed = 0, dflags = 0; 788 bool metadata = e->data_type < BCH_DATA_user; 789 790 if (e->data_type == BCH_DATA_cached) 791 continue; 792 793 rcu_read_lock(); 794 for (unsigned i = 0; i < e->nr_devs; i++) { 795 nr_online += test_bit(e->devs[i], devs.d); 796 797 struct bch_dev *ca = bch2_dev_rcu(c, e->devs[i]); 798 nr_failed += ca && ca->mi.state == BCH_MEMBER_STATE_failed; 799 } 800 rcu_read_unlock(); 801 802 if (nr_failed == e->nr_devs) 803 continue; 804 805 if (nr_online < e->nr_required) 806 dflags |= metadata 807 ? BCH_FORCE_IF_METADATA_LOST 808 : BCH_FORCE_IF_DATA_LOST; 809 810 if (nr_online < e->nr_devs) 811 dflags |= metadata 812 ? BCH_FORCE_IF_METADATA_DEGRADED 813 : BCH_FORCE_IF_DATA_DEGRADED; 814 815 if (dflags & ~flags) { 816 if (print) { 817 struct printbuf buf = PRINTBUF; 818 819 bch2_replicas_entry_to_text(&buf, e); 820 bch_err(c, "insufficient devices online (%u) for replicas entry %s", 821 nr_online, buf.buf); 822 printbuf_exit(&buf); 823 } 824 ret = false; 825 break; 826 } 827 828 } 829 percpu_up_read(&c->mark_lock); 830 831 return ret; 832 } 833 834 unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) 835 { 836 struct bch_sb_field_replicas *replicas; 837 struct bch_sb_field_replicas_v0 *replicas_v0; 838 unsigned data_has = 0; 839 840 replicas = bch2_sb_field_get(sb, replicas); 841 replicas_v0 = bch2_sb_field_get(sb, replicas_v0); 842 843 if (replicas) { 844 struct bch_replicas_entry_v1 *r; 845 846 for_each_replicas_entry(replicas, r) { 847 if (r->data_type >= sizeof(data_has) * 8) 848 continue; 849 850 for (unsigned i = 0; i < r->nr_devs; i++) 851 if (r->devs[i] == dev) 852 data_has |= 1 << r->data_type; 853 } 854 855 } else if (replicas_v0) { 856 struct bch_replicas_entry_v0 *r; 857 858 for_each_replicas_entry_v0(replicas_v0, r) { 859 if (r->data_type >= sizeof(data_has) * 8) 860 continue; 861 862 for (unsigned i = 0; i < r->nr_devs; i++) 863 if (r->devs[i] == dev) 864 data_has |= 1 << r->data_type; 865 } 866 } 867 868 869 return data_has; 870 } 871 872 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) 873 { 874 mutex_lock(&c->sb_lock); 875 unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); 876 mutex_unlock(&c->sb_lock); 877 878 return ret; 879 } 880 881 void bch2_fs_replicas_exit(struct bch_fs *c) 882 { 883 kfree(c->replicas.entries); 884 kfree(c->replicas_gc.entries); 885 } 886