1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "buckets.h" 5 #include "disk_accounting.h" 6 #include "journal.h" 7 #include "replicas.h" 8 #include "super-io.h" 9 10 #include <linux/sort.h> 11 12 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, 13 struct bch_replicas_cpu *); 14 15 /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ 16 static int bch2_memcmp(const void *l, const void *r, const void *priv) 17 { 18 size_t size = (size_t) priv; 19 return memcmp(l, r, size); 20 } 21 22 /* Replicas tracking - in memory: */ 23 24 static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) 25 { 26 #ifdef CONFIG_BCACHEFS_DEBUG 27 BUG_ON(!e->nr_devs); 28 BUG_ON(e->nr_required > 1 && 29 e->nr_required >= e->nr_devs); 30 31 for (unsigned i = 0; i + 1 < e->nr_devs; i++) 32 BUG_ON(e->devs[i] >= e->devs[i + 1]); 33 #endif 34 } 35 36 void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) 37 { 38 bubble_sort(e->devs, e->nr_devs, u8_cmp); 39 } 40 41 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) 42 { 43 eytzinger0_sort_r(r->entries, r->nr, r->entry_size, 44 bch2_memcmp, NULL, (void *)(size_t)r->entry_size); 45 } 46 47 static void bch2_replicas_entry_v0_to_text(struct printbuf *out, 48 struct bch_replicas_entry_v0 *e) 49 { 50 bch2_prt_data_type(out, e->data_type); 51 52 prt_printf(out, ": %u [", e->nr_devs); 53 for (unsigned i = 0; i < e->nr_devs; i++) 54 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 55 prt_printf(out, "]"); 56 } 57 58 void bch2_replicas_entry_to_text(struct printbuf *out, 59 struct bch_replicas_entry_v1 *e) 60 { 61 bch2_prt_data_type(out, e->data_type); 62 63 prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); 64 for (unsigned i = 0; i < e->nr_devs; i++) 65 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 66 prt_printf(out, "]"); 67 } 68 69 int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, 70 struct bch_sb *sb, 71 struct printbuf *err) 72 { 73 if (!r->nr_devs) { 74 prt_printf(err, "no devices in entry "); 75 goto bad; 76 } 77 78 if (r->nr_required > 1 && 79 r->nr_required >= r->nr_devs) { 80 prt_printf(err, "bad nr_required in entry "); 81 goto bad; 82 } 83 84 for (unsigned i = 0; i < r->nr_devs; i++) 85 if (!bch2_member_exists(sb, r->devs[i])) { 86 prt_printf(err, "invalid device %u in entry ", r->devs[i]); 87 goto bad; 88 } 89 90 return 0; 91 bad: 92 bch2_replicas_entry_to_text(err, r); 93 return -BCH_ERR_invalid_replicas_entry; 94 } 95 96 void bch2_cpu_replicas_to_text(struct printbuf *out, 97 struct bch_replicas_cpu *r) 98 { 99 struct bch_replicas_entry_v1 *e; 100 bool first = true; 101 102 for_each_cpu_replicas_entry(r, e) { 103 if (!first) 104 prt_printf(out, " "); 105 first = false; 106 107 bch2_replicas_entry_to_text(out, e); 108 } 109 } 110 111 static void extent_to_replicas(struct bkey_s_c k, 112 struct bch_replicas_entry_v1 *r) 113 { 114 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 115 const union bch_extent_entry *entry; 116 struct extent_ptr_decoded p; 117 118 r->nr_required = 1; 119 120 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 121 if (p.ptr.cached) 122 continue; 123 124 if (!p.has_ec) 125 r->devs[r->nr_devs++] = p.ptr.dev; 126 else 127 r->nr_required = 0; 128 } 129 } 130 131 static void stripe_to_replicas(struct bkey_s_c k, 132 struct bch_replicas_entry_v1 *r) 133 { 134 struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); 135 const struct bch_extent_ptr *ptr; 136 137 r->nr_required = s.v->nr_blocks - s.v->nr_redundant; 138 139 for (ptr = s.v->ptrs; 140 ptr < s.v->ptrs + s.v->nr_blocks; 141 ptr++) 142 r->devs[r->nr_devs++] = ptr->dev; 143 } 144 145 void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e, 146 struct bkey_s_c k) 147 { 148 e->nr_devs = 0; 149 150 switch (k.k->type) { 151 case KEY_TYPE_btree_ptr: 152 case KEY_TYPE_btree_ptr_v2: 153 e->data_type = BCH_DATA_btree; 154 extent_to_replicas(k, e); 155 break; 156 case KEY_TYPE_extent: 157 case KEY_TYPE_reflink_v: 158 e->data_type = BCH_DATA_user; 159 extent_to_replicas(k, e); 160 break; 161 case KEY_TYPE_stripe: 162 e->data_type = BCH_DATA_parity; 163 stripe_to_replicas(k, e); 164 break; 165 } 166 167 bch2_replicas_entry_sort(e); 168 } 169 170 void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, 171 enum bch_data_type data_type, 172 struct bch_devs_list devs) 173 { 174 BUG_ON(!data_type || 175 data_type == BCH_DATA_sb || 176 data_type >= BCH_DATA_NR); 177 178 e->data_type = data_type; 179 e->nr_devs = 0; 180 e->nr_required = 1; 181 182 darray_for_each(devs, i) 183 e->devs[e->nr_devs++] = *i; 184 185 bch2_replicas_entry_sort(e); 186 } 187 188 static struct bch_replicas_cpu 189 cpu_replicas_add_entry(struct bch_fs *c, 190 struct bch_replicas_cpu *old, 191 struct bch_replicas_entry_v1 *new_entry) 192 { 193 struct bch_replicas_cpu new = { 194 .nr = old->nr + 1, 195 .entry_size = max_t(unsigned, old->entry_size, 196 replicas_entry_bytes(new_entry)), 197 }; 198 199 new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); 200 if (!new.entries) 201 return new; 202 203 for (unsigned i = 0; i < old->nr; i++) 204 memcpy(cpu_replicas_entry(&new, i), 205 cpu_replicas_entry(old, i), 206 old->entry_size); 207 208 memcpy(cpu_replicas_entry(&new, old->nr), 209 new_entry, 210 replicas_entry_bytes(new_entry)); 211 212 bch2_cpu_replicas_sort(&new); 213 return new; 214 } 215 216 static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, 217 struct bch_replicas_entry_v1 *search) 218 { 219 int idx, entry_size = replicas_entry_bytes(search); 220 221 if (unlikely(entry_size > r->entry_size)) 222 return -1; 223 224 #define entry_cmp(_l, _r) memcmp(_l, _r, entry_size) 225 idx = eytzinger0_find(r->entries, r->nr, r->entry_size, 226 entry_cmp, search); 227 #undef entry_cmp 228 229 return idx < r->nr ? idx : -1; 230 } 231 232 int bch2_replicas_entry_idx(struct bch_fs *c, 233 struct bch_replicas_entry_v1 *search) 234 { 235 bch2_replicas_entry_sort(search); 236 237 return __replicas_entry_idx(&c->replicas, search); 238 } 239 240 static bool __replicas_has_entry(struct bch_replicas_cpu *r, 241 struct bch_replicas_entry_v1 *search) 242 { 243 return __replicas_entry_idx(r, search) >= 0; 244 } 245 246 bool bch2_replicas_marked_locked(struct bch_fs *c, 247 struct bch_replicas_entry_v1 *search) 248 { 249 verify_replicas_entry(search); 250 251 return !search->nr_devs || 252 (__replicas_has_entry(&c->replicas, search) && 253 (likely((!c->replicas_gc.entries)) || 254 __replicas_has_entry(&c->replicas_gc, search))); 255 } 256 257 bool bch2_replicas_marked(struct bch_fs *c, 258 struct bch_replicas_entry_v1 *search) 259 { 260 percpu_down_read(&c->mark_lock); 261 bool ret = bch2_replicas_marked_locked(c, search); 262 percpu_up_read(&c->mark_lock); 263 264 return ret; 265 } 266 267 noinline 268 static int bch2_mark_replicas_slowpath(struct bch_fs *c, 269 struct bch_replicas_entry_v1 *new_entry) 270 { 271 struct bch_replicas_cpu new_r, new_gc; 272 int ret = 0; 273 274 verify_replicas_entry(new_entry); 275 276 memset(&new_r, 0, sizeof(new_r)); 277 memset(&new_gc, 0, sizeof(new_gc)); 278 279 mutex_lock(&c->sb_lock); 280 281 if (c->replicas_gc.entries && 282 !__replicas_has_entry(&c->replicas_gc, new_entry)) { 283 new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); 284 if (!new_gc.entries) { 285 ret = -BCH_ERR_ENOMEM_cpu_replicas; 286 goto err; 287 } 288 } 289 290 if (!__replicas_has_entry(&c->replicas, new_entry)) { 291 new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); 292 if (!new_r.entries) { 293 ret = -BCH_ERR_ENOMEM_cpu_replicas; 294 goto err; 295 } 296 297 ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); 298 if (ret) 299 goto err; 300 } 301 302 if (!new_r.entries && 303 !new_gc.entries) 304 goto out; 305 306 /* allocations done, now commit: */ 307 308 if (new_r.entries) 309 bch2_write_super(c); 310 311 /* don't update in memory replicas until changes are persistent */ 312 percpu_down_write(&c->mark_lock); 313 if (new_r.entries) 314 swap(c->replicas, new_r); 315 if (new_gc.entries) 316 swap(new_gc, c->replicas_gc); 317 percpu_up_write(&c->mark_lock); 318 out: 319 mutex_unlock(&c->sb_lock); 320 321 kfree(new_r.entries); 322 kfree(new_gc.entries); 323 324 return ret; 325 err: 326 bch_err_msg(c, ret, "adding replicas entry"); 327 goto out; 328 } 329 330 int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) 331 { 332 return likely(bch2_replicas_marked(c, r)) 333 ? 0 : bch2_mark_replicas_slowpath(c, r); 334 } 335 336 /* 337 * Old replicas_gc mechanism: only used for journal replicas entries now, should 338 * die at some point: 339 */ 340 341 int bch2_replicas_gc_end(struct bch_fs *c, int ret) 342 { 343 lockdep_assert_held(&c->replicas_gc_lock); 344 345 mutex_lock(&c->sb_lock); 346 percpu_down_write(&c->mark_lock); 347 348 ret = ret ?: 349 bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); 350 if (!ret) 351 swap(c->replicas, c->replicas_gc); 352 353 kfree(c->replicas_gc.entries); 354 c->replicas_gc.entries = NULL; 355 356 percpu_up_write(&c->mark_lock); 357 358 if (!ret) 359 bch2_write_super(c); 360 361 mutex_unlock(&c->sb_lock); 362 363 return ret; 364 } 365 366 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) 367 { 368 struct bch_replicas_entry_v1 *e; 369 unsigned i = 0; 370 371 lockdep_assert_held(&c->replicas_gc_lock); 372 373 mutex_lock(&c->sb_lock); 374 BUG_ON(c->replicas_gc.entries); 375 376 c->replicas_gc.nr = 0; 377 c->replicas_gc.entry_size = 0; 378 379 for_each_cpu_replicas_entry(&c->replicas, e) { 380 /* Preserve unknown data types */ 381 if (e->data_type >= BCH_DATA_NR || 382 !((1 << e->data_type) & typemask)) { 383 c->replicas_gc.nr++; 384 c->replicas_gc.entry_size = 385 max_t(unsigned, c->replicas_gc.entry_size, 386 replicas_entry_bytes(e)); 387 } 388 } 389 390 c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, 391 c->replicas_gc.entry_size, 392 GFP_KERNEL); 393 if (!c->replicas_gc.entries) { 394 mutex_unlock(&c->sb_lock); 395 bch_err(c, "error allocating c->replicas_gc"); 396 return -BCH_ERR_ENOMEM_replicas_gc; 397 } 398 399 for_each_cpu_replicas_entry(&c->replicas, e) 400 if (e->data_type >= BCH_DATA_NR || 401 !((1 << e->data_type) & typemask)) 402 memcpy(cpu_replicas_entry(&c->replicas_gc, i++), 403 e, c->replicas_gc.entry_size); 404 405 bch2_cpu_replicas_sort(&c->replicas_gc); 406 mutex_unlock(&c->sb_lock); 407 408 return 0; 409 } 410 411 /* 412 * New much simpler mechanism for clearing out unneeded replicas entries - drop 413 * replicas entries that have 0 sectors used. 414 * 415 * However, we don't track sector counts for journal usage, so this doesn't drop 416 * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism 417 * is retained for that. 418 */ 419 int bch2_replicas_gc2(struct bch_fs *c) 420 { 421 struct bch_replicas_cpu new = { 0 }; 422 unsigned nr; 423 int ret = 0; 424 425 bch2_accounting_mem_gc(c); 426 retry: 427 nr = READ_ONCE(c->replicas.nr); 428 new.entry_size = READ_ONCE(c->replicas.entry_size); 429 new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); 430 if (!new.entries) { 431 bch_err(c, "error allocating c->replicas_gc"); 432 return -BCH_ERR_ENOMEM_replicas_gc; 433 } 434 435 mutex_lock(&c->sb_lock); 436 percpu_down_write(&c->mark_lock); 437 438 if (nr != c->replicas.nr || 439 new.entry_size != c->replicas.entry_size) { 440 percpu_up_write(&c->mark_lock); 441 mutex_unlock(&c->sb_lock); 442 kfree(new.entries); 443 goto retry; 444 } 445 446 for (unsigned i = 0; i < c->replicas.nr; i++) { 447 struct bch_replicas_entry_v1 *e = 448 cpu_replicas_entry(&c->replicas, i); 449 450 struct disk_accounting_pos k = { 451 .type = BCH_DISK_ACCOUNTING_replicas, 452 }; 453 454 unsafe_memcpy(&k.replicas, e, replicas_entry_bytes(e), 455 "embedded variable length struct"); 456 457 struct bpos p = disk_accounting_pos_to_bpos(&k); 458 459 struct bch_accounting_mem *acc = &c->accounting; 460 bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), 461 accounting_pos_cmp, &p) >= acc->k.nr; 462 463 if (e->data_type == BCH_DATA_journal || !kill) 464 memcpy(cpu_replicas_entry(&new, new.nr++), 465 e, new.entry_size); 466 } 467 468 bch2_cpu_replicas_sort(&new); 469 470 ret = bch2_cpu_replicas_to_sb_replicas(c, &new); 471 472 if (!ret) 473 swap(c->replicas, new); 474 475 kfree(new.entries); 476 477 percpu_up_write(&c->mark_lock); 478 479 if (!ret) 480 bch2_write_super(c); 481 482 mutex_unlock(&c->sb_lock); 483 484 return ret; 485 } 486 487 /* Replicas tracking - superblock: */ 488 489 static int 490 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, 491 struct bch_replicas_cpu *cpu_r) 492 { 493 struct bch_replicas_entry_v1 *e, *dst; 494 unsigned nr = 0, entry_size = 0, idx = 0; 495 496 for_each_replicas_entry(sb_r, e) { 497 entry_size = max_t(unsigned, entry_size, 498 replicas_entry_bytes(e)); 499 nr++; 500 } 501 502 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 503 if (!cpu_r->entries) 504 return -BCH_ERR_ENOMEM_cpu_replicas; 505 506 cpu_r->nr = nr; 507 cpu_r->entry_size = entry_size; 508 509 for_each_replicas_entry(sb_r, e) { 510 dst = cpu_replicas_entry(cpu_r, idx++); 511 memcpy(dst, e, replicas_entry_bytes(e)); 512 bch2_replicas_entry_sort(dst); 513 } 514 515 return 0; 516 } 517 518 static int 519 __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, 520 struct bch_replicas_cpu *cpu_r) 521 { 522 struct bch_replicas_entry_v0 *e; 523 unsigned nr = 0, entry_size = 0, idx = 0; 524 525 for_each_replicas_entry(sb_r, e) { 526 entry_size = max_t(unsigned, entry_size, 527 replicas_entry_bytes(e)); 528 nr++; 529 } 530 531 entry_size += sizeof(struct bch_replicas_entry_v1) - 532 sizeof(struct bch_replicas_entry_v0); 533 534 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 535 if (!cpu_r->entries) 536 return -BCH_ERR_ENOMEM_cpu_replicas; 537 538 cpu_r->nr = nr; 539 cpu_r->entry_size = entry_size; 540 541 for_each_replicas_entry(sb_r, e) { 542 struct bch_replicas_entry_v1 *dst = 543 cpu_replicas_entry(cpu_r, idx++); 544 545 dst->data_type = e->data_type; 546 dst->nr_devs = e->nr_devs; 547 dst->nr_required = 1; 548 memcpy(dst->devs, e->devs, e->nr_devs); 549 bch2_replicas_entry_sort(dst); 550 } 551 552 return 0; 553 } 554 555 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) 556 { 557 struct bch_sb_field_replicas *sb_v1; 558 struct bch_sb_field_replicas_v0 *sb_v0; 559 struct bch_replicas_cpu new_r = { 0, 0, NULL }; 560 int ret = 0; 561 562 if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas))) 563 ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); 564 else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0))) 565 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); 566 if (ret) 567 return ret; 568 569 bch2_cpu_replicas_sort(&new_r); 570 571 percpu_down_write(&c->mark_lock); 572 swap(c->replicas, new_r); 573 percpu_up_write(&c->mark_lock); 574 575 kfree(new_r.entries); 576 577 return 0; 578 } 579 580 static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, 581 struct bch_replicas_cpu *r) 582 { 583 struct bch_sb_field_replicas_v0 *sb_r; 584 struct bch_replicas_entry_v0 *dst; 585 struct bch_replicas_entry_v1 *src; 586 size_t bytes; 587 588 bytes = sizeof(struct bch_sb_field_replicas); 589 590 for_each_cpu_replicas_entry(r, src) 591 bytes += replicas_entry_bytes(src) - 1; 592 593 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0, 594 DIV_ROUND_UP(bytes, sizeof(u64))); 595 if (!sb_r) 596 return -BCH_ERR_ENOSPC_sb_replicas; 597 598 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); 599 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0); 600 601 memset(&sb_r->entries, 0, 602 vstruct_end(&sb_r->field) - 603 (void *) &sb_r->entries); 604 605 dst = sb_r->entries; 606 for_each_cpu_replicas_entry(r, src) { 607 dst->data_type = src->data_type; 608 dst->nr_devs = src->nr_devs; 609 memcpy(dst->devs, src->devs, src->nr_devs); 610 611 dst = replicas_entry_next(dst); 612 613 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 614 } 615 616 return 0; 617 } 618 619 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, 620 struct bch_replicas_cpu *r) 621 { 622 struct bch_sb_field_replicas *sb_r; 623 struct bch_replicas_entry_v1 *dst, *src; 624 bool need_v1 = false; 625 size_t bytes; 626 627 bytes = sizeof(struct bch_sb_field_replicas); 628 629 for_each_cpu_replicas_entry(r, src) { 630 bytes += replicas_entry_bytes(src); 631 if (src->nr_required != 1) 632 need_v1 = true; 633 } 634 635 if (!need_v1) 636 return bch2_cpu_replicas_to_sb_replicas_v0(c, r); 637 638 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas, 639 DIV_ROUND_UP(bytes, sizeof(u64))); 640 if (!sb_r) 641 return -BCH_ERR_ENOSPC_sb_replicas; 642 643 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); 644 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas); 645 646 memset(&sb_r->entries, 0, 647 vstruct_end(&sb_r->field) - 648 (void *) &sb_r->entries); 649 650 dst = sb_r->entries; 651 for_each_cpu_replicas_entry(r, src) { 652 memcpy(dst, src, replicas_entry_bytes(src)); 653 654 dst = replicas_entry_next(dst); 655 656 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 657 } 658 659 return 0; 660 } 661 662 static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, 663 struct bch_sb *sb, 664 struct printbuf *err) 665 { 666 unsigned i; 667 668 sort_r(cpu_r->entries, 669 cpu_r->nr, 670 cpu_r->entry_size, 671 bch2_memcmp, NULL, 672 (void *)(size_t)cpu_r->entry_size); 673 674 for (i = 0; i < cpu_r->nr; i++) { 675 struct bch_replicas_entry_v1 *e = 676 cpu_replicas_entry(cpu_r, i); 677 678 int ret = bch2_replicas_entry_validate(e, sb, err); 679 if (ret) 680 return ret; 681 682 if (i + 1 < cpu_r->nr) { 683 struct bch_replicas_entry_v1 *n = 684 cpu_replicas_entry(cpu_r, i + 1); 685 686 BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); 687 688 if (!memcmp(e, n, cpu_r->entry_size)) { 689 prt_printf(err, "duplicate replicas entry "); 690 bch2_replicas_entry_to_text(err, e); 691 return -BCH_ERR_invalid_sb_replicas; 692 } 693 } 694 } 695 696 return 0; 697 } 698 699 static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f, 700 enum bch_validate_flags flags, struct printbuf *err) 701 { 702 struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); 703 struct bch_replicas_cpu cpu_r; 704 int ret; 705 706 ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r); 707 if (ret) 708 return ret; 709 710 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 711 kfree(cpu_r.entries); 712 return ret; 713 } 714 715 static void bch2_sb_replicas_to_text(struct printbuf *out, 716 struct bch_sb *sb, 717 struct bch_sb_field *f) 718 { 719 struct bch_sb_field_replicas *r = field_to_type(f, replicas); 720 struct bch_replicas_entry_v1 *e; 721 bool first = true; 722 723 for_each_replicas_entry(r, e) { 724 if (!first) 725 prt_printf(out, " "); 726 first = false; 727 728 bch2_replicas_entry_to_text(out, e); 729 } 730 prt_newline(out); 731 } 732 733 const struct bch_sb_field_ops bch_sb_field_ops_replicas = { 734 .validate = bch2_sb_replicas_validate, 735 .to_text = bch2_sb_replicas_to_text, 736 }; 737 738 static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f, 739 enum bch_validate_flags flags, struct printbuf *err) 740 { 741 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 742 struct bch_replicas_cpu cpu_r; 743 int ret; 744 745 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r); 746 if (ret) 747 return ret; 748 749 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 750 kfree(cpu_r.entries); 751 return ret; 752 } 753 754 static void bch2_sb_replicas_v0_to_text(struct printbuf *out, 755 struct bch_sb *sb, 756 struct bch_sb_field *f) 757 { 758 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 759 struct bch_replicas_entry_v0 *e; 760 bool first = true; 761 762 for_each_replicas_entry(sb_r, e) { 763 if (!first) 764 prt_printf(out, " "); 765 first = false; 766 767 bch2_replicas_entry_v0_to_text(out, e); 768 } 769 prt_newline(out); 770 } 771 772 const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { 773 .validate = bch2_sb_replicas_v0_validate, 774 .to_text = bch2_sb_replicas_v0_to_text, 775 }; 776 777 /* Query replicas: */ 778 779 bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, 780 unsigned flags, bool print) 781 { 782 struct bch_replicas_entry_v1 *e; 783 bool ret = true; 784 785 percpu_down_read(&c->mark_lock); 786 for_each_cpu_replicas_entry(&c->replicas, e) { 787 unsigned nr_online = 0, nr_failed = 0, dflags = 0; 788 bool metadata = e->data_type < BCH_DATA_user; 789 790 if (e->data_type == BCH_DATA_cached) 791 continue; 792 793 rcu_read_lock(); 794 for (unsigned i = 0; i < e->nr_devs; i++) { 795 nr_online += test_bit(e->devs[i], devs.d); 796 797 struct bch_dev *ca = bch2_dev_rcu(c, e->devs[i]); 798 nr_failed += ca && ca->mi.state == BCH_MEMBER_STATE_failed; 799 } 800 rcu_read_unlock(); 801 802 if (nr_failed == e->nr_devs) 803 continue; 804 805 if (nr_online < e->nr_required) 806 dflags |= metadata 807 ? BCH_FORCE_IF_METADATA_LOST 808 : BCH_FORCE_IF_DATA_LOST; 809 810 if (nr_online < e->nr_devs) 811 dflags |= metadata 812 ? BCH_FORCE_IF_METADATA_DEGRADED 813 : BCH_FORCE_IF_DATA_DEGRADED; 814 815 if (dflags & ~flags) { 816 if (print) { 817 struct printbuf buf = PRINTBUF; 818 819 bch2_replicas_entry_to_text(&buf, e); 820 bch_err(c, "insufficient devices online (%u) for replicas entry %s", 821 nr_online, buf.buf); 822 printbuf_exit(&buf); 823 } 824 ret = false; 825 break; 826 } 827 828 } 829 percpu_up_read(&c->mark_lock); 830 831 return ret; 832 } 833 834 unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) 835 { 836 struct bch_sb_field_replicas *replicas; 837 struct bch_sb_field_replicas_v0 *replicas_v0; 838 unsigned data_has = 0; 839 840 replicas = bch2_sb_field_get(sb, replicas); 841 replicas_v0 = bch2_sb_field_get(sb, replicas_v0); 842 843 if (replicas) { 844 struct bch_replicas_entry_v1 *r; 845 846 for_each_replicas_entry(replicas, r) { 847 if (r->data_type >= sizeof(data_has) * 8) 848 continue; 849 850 for (unsigned i = 0; i < r->nr_devs; i++) 851 if (r->devs[i] == dev) 852 data_has |= 1 << r->data_type; 853 } 854 855 } else if (replicas_v0) { 856 struct bch_replicas_entry_v0 *r; 857 858 for_each_replicas_entry_v0(replicas_v0, r) { 859 if (r->data_type >= sizeof(data_has) * 8) 860 continue; 861 862 for (unsigned i = 0; i < r->nr_devs; i++) 863 if (r->devs[i] == dev) 864 data_has |= 1 << r->data_type; 865 } 866 } 867 868 869 return data_has; 870 } 871 872 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) 873 { 874 mutex_lock(&c->sb_lock); 875 unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); 876 mutex_unlock(&c->sb_lock); 877 878 return ret; 879 } 880 881 void bch2_fs_replicas_exit(struct bch_fs *c) 882 { 883 kfree(c->replicas.entries); 884 kfree(c->replicas_gc.entries); 885 } 886