1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "buckets.h" 5 #include "disk_accounting.h" 6 #include "journal.h" 7 #include "replicas.h" 8 #include "super-io.h" 9 10 #include <linux/sort.h> 11 12 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, 13 struct bch_replicas_cpu *); 14 15 /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ 16 static int bch2_memcmp(const void *l, const void *r, const void *priv) 17 { 18 size_t size = (size_t) priv; 19 return memcmp(l, r, size); 20 } 21 22 /* Replicas tracking - in memory: */ 23 24 static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) 25 { 26 #ifdef CONFIG_BCACHEFS_DEBUG 27 BUG_ON(!e->nr_devs); 28 BUG_ON(e->nr_required > 1 && 29 e->nr_required >= e->nr_devs); 30 31 for (unsigned i = 0; i + 1 < e->nr_devs; i++) 32 BUG_ON(e->devs[i] >= e->devs[i + 1]); 33 #endif 34 } 35 36 void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) 37 { 38 bubble_sort(e->devs, e->nr_devs, u8_cmp); 39 } 40 41 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) 42 { 43 eytzinger0_sort_r(r->entries, r->nr, r->entry_size, 44 bch2_memcmp, NULL, (void *)(size_t)r->entry_size); 45 } 46 47 static void bch2_replicas_entry_v0_to_text(struct printbuf *out, 48 struct bch_replicas_entry_v0 *e) 49 { 50 bch2_prt_data_type(out, e->data_type); 51 52 prt_printf(out, ": %u [", e->nr_devs); 53 for (unsigned i = 0; i < e->nr_devs; i++) 54 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 55 prt_printf(out, "]"); 56 } 57 58 void bch2_replicas_entry_to_text(struct printbuf *out, 59 struct bch_replicas_entry_v1 *e) 60 { 61 bch2_prt_data_type(out, e->data_type); 62 63 prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); 64 for (unsigned i = 0; i < e->nr_devs; i++) 65 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 66 prt_printf(out, "]"); 67 } 68 69 int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, 70 struct bch_sb *sb, 71 struct printbuf *err) 72 { 73 if (!r->nr_devs) { 74 prt_printf(err, "no devices in entry "); 75 goto bad; 76 } 77 78 if (r->nr_required > 1 && 79 r->nr_required >= r->nr_devs) { 80 prt_printf(err, "bad nr_required in entry "); 81 goto bad; 82 } 83 84 for (unsigned i = 0; i < r->nr_devs; i++) 85 if (!bch2_member_exists(sb, r->devs[i])) { 86 prt_printf(err, "invalid device %u in entry ", r->devs[i]); 87 goto bad; 88 } 89 90 return 0; 91 bad: 92 bch2_replicas_entry_to_text(err, r); 93 return -BCH_ERR_invalid_replicas_entry; 94 } 95 96 void bch2_cpu_replicas_to_text(struct printbuf *out, 97 struct bch_replicas_cpu *r) 98 { 99 struct bch_replicas_entry_v1 *e; 100 bool first = true; 101 102 for_each_cpu_replicas_entry(r, e) { 103 if (!first) 104 prt_printf(out, " "); 105 first = false; 106 107 bch2_replicas_entry_to_text(out, e); 108 } 109 } 110 111 static void extent_to_replicas(struct bkey_s_c k, 112 struct bch_replicas_entry_v1 *r) 113 { 114 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 115 const union bch_extent_entry *entry; 116 struct extent_ptr_decoded p; 117 118 r->nr_required = 1; 119 120 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 121 if (p.ptr.cached) 122 continue; 123 124 if (!p.has_ec) 125 r->devs[r->nr_devs++] = p.ptr.dev; 126 else 127 r->nr_required = 0; 128 } 129 } 130 131 static void stripe_to_replicas(struct bkey_s_c k, 132 struct bch_replicas_entry_v1 *r) 133 { 134 struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); 135 const struct bch_extent_ptr *ptr; 136 137 r->nr_required = s.v->nr_blocks - s.v->nr_redundant; 138 139 for (ptr = s.v->ptrs; 140 ptr < s.v->ptrs + s.v->nr_blocks; 141 ptr++) 142 r->devs[r->nr_devs++] = ptr->dev; 143 } 144 145 void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e, 146 struct bkey_s_c k) 147 { 148 e->nr_devs = 0; 149 150 switch (k.k->type) { 151 case KEY_TYPE_btree_ptr: 152 case KEY_TYPE_btree_ptr_v2: 153 e->data_type = BCH_DATA_btree; 154 extent_to_replicas(k, e); 155 break; 156 case KEY_TYPE_extent: 157 case KEY_TYPE_reflink_v: 158 e->data_type = BCH_DATA_user; 159 extent_to_replicas(k, e); 160 break; 161 case KEY_TYPE_stripe: 162 e->data_type = BCH_DATA_parity; 163 stripe_to_replicas(k, e); 164 break; 165 } 166 167 bch2_replicas_entry_sort(e); 168 } 169 170 void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, 171 enum bch_data_type data_type, 172 struct bch_devs_list devs) 173 { 174 BUG_ON(!data_type || 175 data_type == BCH_DATA_sb || 176 data_type >= BCH_DATA_NR); 177 178 e->data_type = data_type; 179 e->nr_devs = 0; 180 e->nr_required = 1; 181 182 darray_for_each(devs, i) 183 e->devs[e->nr_devs++] = *i; 184 185 bch2_replicas_entry_sort(e); 186 } 187 188 static struct bch_replicas_cpu 189 cpu_replicas_add_entry(struct bch_fs *c, 190 struct bch_replicas_cpu *old, 191 struct bch_replicas_entry_v1 *new_entry) 192 { 193 struct bch_replicas_cpu new = { 194 .nr = old->nr + 1, 195 .entry_size = max_t(unsigned, old->entry_size, 196 replicas_entry_bytes(new_entry)), 197 }; 198 199 new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); 200 if (!new.entries) 201 return new; 202 203 for (unsigned i = 0; i < old->nr; i++) 204 memcpy(cpu_replicas_entry(&new, i), 205 cpu_replicas_entry(old, i), 206 old->entry_size); 207 208 memcpy(cpu_replicas_entry(&new, old->nr), 209 new_entry, 210 replicas_entry_bytes(new_entry)); 211 212 bch2_cpu_replicas_sort(&new); 213 return new; 214 } 215 216 static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, 217 struct bch_replicas_entry_v1 *search) 218 { 219 int idx, entry_size = replicas_entry_bytes(search); 220 221 if (unlikely(entry_size > r->entry_size)) 222 return -1; 223 224 #define entry_cmp(_l, _r) memcmp(_l, _r, entry_size) 225 idx = eytzinger0_find(r->entries, r->nr, r->entry_size, 226 entry_cmp, search); 227 #undef entry_cmp 228 229 return idx < r->nr ? idx : -1; 230 } 231 232 int bch2_replicas_entry_idx(struct bch_fs *c, 233 struct bch_replicas_entry_v1 *search) 234 { 235 bch2_replicas_entry_sort(search); 236 237 return __replicas_entry_idx(&c->replicas, search); 238 } 239 240 static bool __replicas_has_entry(struct bch_replicas_cpu *r, 241 struct bch_replicas_entry_v1 *search) 242 { 243 return __replicas_entry_idx(r, search) >= 0; 244 } 245 246 bool bch2_replicas_marked_locked(struct bch_fs *c, 247 struct bch_replicas_entry_v1 *search) 248 { 249 verify_replicas_entry(search); 250 251 return !search->nr_devs || 252 (__replicas_has_entry(&c->replicas, search) && 253 (likely((!c->replicas_gc.entries)) || 254 __replicas_has_entry(&c->replicas_gc, search))); 255 } 256 257 bool bch2_replicas_marked(struct bch_fs *c, 258 struct bch_replicas_entry_v1 *search) 259 { 260 percpu_down_read(&c->mark_lock); 261 bool ret = bch2_replicas_marked_locked(c, search); 262 percpu_up_read(&c->mark_lock); 263 264 return ret; 265 } 266 267 noinline 268 static int bch2_mark_replicas_slowpath(struct bch_fs *c, 269 struct bch_replicas_entry_v1 *new_entry) 270 { 271 struct bch_replicas_cpu new_r, new_gc; 272 int ret = 0; 273 274 verify_replicas_entry(new_entry); 275 276 memset(&new_r, 0, sizeof(new_r)); 277 memset(&new_gc, 0, sizeof(new_gc)); 278 279 mutex_lock(&c->sb_lock); 280 281 if (c->replicas_gc.entries && 282 !__replicas_has_entry(&c->replicas_gc, new_entry)) { 283 new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); 284 if (!new_gc.entries) { 285 ret = -BCH_ERR_ENOMEM_cpu_replicas; 286 goto err; 287 } 288 } 289 290 if (!__replicas_has_entry(&c->replicas, new_entry)) { 291 new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); 292 if (!new_r.entries) { 293 ret = -BCH_ERR_ENOMEM_cpu_replicas; 294 goto err; 295 } 296 297 ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); 298 if (ret) 299 goto err; 300 } 301 302 if (!new_r.entries && 303 !new_gc.entries) 304 goto out; 305 306 /* allocations done, now commit: */ 307 308 if (new_r.entries) 309 bch2_write_super(c); 310 311 /* don't update in memory replicas until changes are persistent */ 312 percpu_down_write(&c->mark_lock); 313 if (new_r.entries) 314 swap(c->replicas, new_r); 315 if (new_gc.entries) 316 swap(new_gc, c->replicas_gc); 317 percpu_up_write(&c->mark_lock); 318 out: 319 mutex_unlock(&c->sb_lock); 320 321 kfree(new_r.entries); 322 kfree(new_gc.entries); 323 324 return ret; 325 err: 326 bch_err_msg(c, ret, "adding replicas entry"); 327 goto out; 328 } 329 330 int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) 331 { 332 return likely(bch2_replicas_marked(c, r)) 333 ? 0 : bch2_mark_replicas_slowpath(c, r); 334 } 335 336 /* 337 * Old replicas_gc mechanism: only used for journal replicas entries now, should 338 * die at some point: 339 */ 340 341 int bch2_replicas_gc_end(struct bch_fs *c, int ret) 342 { 343 lockdep_assert_held(&c->replicas_gc_lock); 344 345 mutex_lock(&c->sb_lock); 346 percpu_down_write(&c->mark_lock); 347 348 ret = ret ?: 349 bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc); 350 if (!ret) 351 swap(c->replicas, c->replicas_gc); 352 353 kfree(c->replicas_gc.entries); 354 c->replicas_gc.entries = NULL; 355 356 percpu_up_write(&c->mark_lock); 357 358 if (!ret) 359 bch2_write_super(c); 360 361 mutex_unlock(&c->sb_lock); 362 363 return ret; 364 } 365 366 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) 367 { 368 struct bch_replicas_entry_v1 *e; 369 unsigned i = 0; 370 371 lockdep_assert_held(&c->replicas_gc_lock); 372 373 mutex_lock(&c->sb_lock); 374 BUG_ON(c->replicas_gc.entries); 375 376 c->replicas_gc.nr = 0; 377 c->replicas_gc.entry_size = 0; 378 379 for_each_cpu_replicas_entry(&c->replicas, e) { 380 /* Preserve unknown data types */ 381 if (e->data_type >= BCH_DATA_NR || 382 !((1 << e->data_type) & typemask)) { 383 c->replicas_gc.nr++; 384 c->replicas_gc.entry_size = 385 max_t(unsigned, c->replicas_gc.entry_size, 386 replicas_entry_bytes(e)); 387 } 388 } 389 390 c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, 391 c->replicas_gc.entry_size, 392 GFP_KERNEL); 393 if (!c->replicas_gc.entries) { 394 mutex_unlock(&c->sb_lock); 395 bch_err(c, "error allocating c->replicas_gc"); 396 return -BCH_ERR_ENOMEM_replicas_gc; 397 } 398 399 for_each_cpu_replicas_entry(&c->replicas, e) 400 if (e->data_type >= BCH_DATA_NR || 401 !((1 << e->data_type) & typemask)) 402 memcpy(cpu_replicas_entry(&c->replicas_gc, i++), 403 e, c->replicas_gc.entry_size); 404 405 bch2_cpu_replicas_sort(&c->replicas_gc); 406 mutex_unlock(&c->sb_lock); 407 408 return 0; 409 } 410 411 /* 412 * New much simpler mechanism for clearing out unneeded replicas entries - drop 413 * replicas entries that have 0 sectors used. 414 * 415 * However, we don't track sector counts for journal usage, so this doesn't drop 416 * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism 417 * is retained for that. 418 */ 419 int bch2_replicas_gc2(struct bch_fs *c) 420 { 421 struct bch_replicas_cpu new = { 0 }; 422 unsigned nr; 423 int ret = 0; 424 425 bch2_accounting_mem_gc(c); 426 retry: 427 nr = READ_ONCE(c->replicas.nr); 428 new.entry_size = READ_ONCE(c->replicas.entry_size); 429 new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); 430 if (!new.entries) { 431 bch_err(c, "error allocating c->replicas_gc"); 432 return -BCH_ERR_ENOMEM_replicas_gc; 433 } 434 435 mutex_lock(&c->sb_lock); 436 percpu_down_write(&c->mark_lock); 437 438 if (nr != c->replicas.nr || 439 new.entry_size != c->replicas.entry_size) { 440 percpu_up_write(&c->mark_lock); 441 mutex_unlock(&c->sb_lock); 442 kfree(new.entries); 443 goto retry; 444 } 445 446 for (unsigned i = 0; i < c->replicas.nr; i++) { 447 struct bch_replicas_entry_v1 *e = 448 cpu_replicas_entry(&c->replicas, i); 449 450 struct disk_accounting_pos k = { 451 .type = BCH_DISK_ACCOUNTING_replicas, 452 }; 453 454 memcpy(&k.replicas, e, replicas_entry_bytes(e)); 455 456 struct bpos p = disk_accounting_pos_to_bpos(&k); 457 458 struct bch_accounting_mem *acc = &c->accounting; 459 bool kill = eytzinger0_find(acc->k.data, acc->k.nr, sizeof(acc->k.data[0]), 460 accounting_pos_cmp, &p) >= acc->k.nr; 461 462 if (e->data_type == BCH_DATA_journal || !kill) 463 memcpy(cpu_replicas_entry(&new, new.nr++), 464 e, new.entry_size); 465 } 466 467 bch2_cpu_replicas_sort(&new); 468 469 ret = bch2_cpu_replicas_to_sb_replicas(c, &new); 470 471 if (!ret) 472 swap(c->replicas, new); 473 474 kfree(new.entries); 475 476 percpu_up_write(&c->mark_lock); 477 478 if (!ret) 479 bch2_write_super(c); 480 481 mutex_unlock(&c->sb_lock); 482 483 return ret; 484 } 485 486 /* Replicas tracking - superblock: */ 487 488 static int 489 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, 490 struct bch_replicas_cpu *cpu_r) 491 { 492 struct bch_replicas_entry_v1 *e, *dst; 493 unsigned nr = 0, entry_size = 0, idx = 0; 494 495 for_each_replicas_entry(sb_r, e) { 496 entry_size = max_t(unsigned, entry_size, 497 replicas_entry_bytes(e)); 498 nr++; 499 } 500 501 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 502 if (!cpu_r->entries) 503 return -BCH_ERR_ENOMEM_cpu_replicas; 504 505 cpu_r->nr = nr; 506 cpu_r->entry_size = entry_size; 507 508 for_each_replicas_entry(sb_r, e) { 509 dst = cpu_replicas_entry(cpu_r, idx++); 510 memcpy(dst, e, replicas_entry_bytes(e)); 511 bch2_replicas_entry_sort(dst); 512 } 513 514 return 0; 515 } 516 517 static int 518 __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, 519 struct bch_replicas_cpu *cpu_r) 520 { 521 struct bch_replicas_entry_v0 *e; 522 unsigned nr = 0, entry_size = 0, idx = 0; 523 524 for_each_replicas_entry(sb_r, e) { 525 entry_size = max_t(unsigned, entry_size, 526 replicas_entry_bytes(e)); 527 nr++; 528 } 529 530 entry_size += sizeof(struct bch_replicas_entry_v1) - 531 sizeof(struct bch_replicas_entry_v0); 532 533 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 534 if (!cpu_r->entries) 535 return -BCH_ERR_ENOMEM_cpu_replicas; 536 537 cpu_r->nr = nr; 538 cpu_r->entry_size = entry_size; 539 540 for_each_replicas_entry(sb_r, e) { 541 struct bch_replicas_entry_v1 *dst = 542 cpu_replicas_entry(cpu_r, idx++); 543 544 dst->data_type = e->data_type; 545 dst->nr_devs = e->nr_devs; 546 dst->nr_required = 1; 547 memcpy(dst->devs, e->devs, e->nr_devs); 548 bch2_replicas_entry_sort(dst); 549 } 550 551 return 0; 552 } 553 554 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) 555 { 556 struct bch_sb_field_replicas *sb_v1; 557 struct bch_sb_field_replicas_v0 *sb_v0; 558 struct bch_replicas_cpu new_r = { 0, 0, NULL }; 559 int ret = 0; 560 561 if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas))) 562 ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); 563 else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0))) 564 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); 565 if (ret) 566 return ret; 567 568 bch2_cpu_replicas_sort(&new_r); 569 570 percpu_down_write(&c->mark_lock); 571 swap(c->replicas, new_r); 572 percpu_up_write(&c->mark_lock); 573 574 kfree(new_r.entries); 575 576 return 0; 577 } 578 579 static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, 580 struct bch_replicas_cpu *r) 581 { 582 struct bch_sb_field_replicas_v0 *sb_r; 583 struct bch_replicas_entry_v0 *dst; 584 struct bch_replicas_entry_v1 *src; 585 size_t bytes; 586 587 bytes = sizeof(struct bch_sb_field_replicas); 588 589 for_each_cpu_replicas_entry(r, src) 590 bytes += replicas_entry_bytes(src) - 1; 591 592 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0, 593 DIV_ROUND_UP(bytes, sizeof(u64))); 594 if (!sb_r) 595 return -BCH_ERR_ENOSPC_sb_replicas; 596 597 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); 598 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0); 599 600 memset(&sb_r->entries, 0, 601 vstruct_end(&sb_r->field) - 602 (void *) &sb_r->entries); 603 604 dst = sb_r->entries; 605 for_each_cpu_replicas_entry(r, src) { 606 dst->data_type = src->data_type; 607 dst->nr_devs = src->nr_devs; 608 memcpy(dst->devs, src->devs, src->nr_devs); 609 610 dst = replicas_entry_next(dst); 611 612 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 613 } 614 615 return 0; 616 } 617 618 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, 619 struct bch_replicas_cpu *r) 620 { 621 struct bch_sb_field_replicas *sb_r; 622 struct bch_replicas_entry_v1 *dst, *src; 623 bool need_v1 = false; 624 size_t bytes; 625 626 bytes = sizeof(struct bch_sb_field_replicas); 627 628 for_each_cpu_replicas_entry(r, src) { 629 bytes += replicas_entry_bytes(src); 630 if (src->nr_required != 1) 631 need_v1 = true; 632 } 633 634 if (!need_v1) 635 return bch2_cpu_replicas_to_sb_replicas_v0(c, r); 636 637 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas, 638 DIV_ROUND_UP(bytes, sizeof(u64))); 639 if (!sb_r) 640 return -BCH_ERR_ENOSPC_sb_replicas; 641 642 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); 643 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas); 644 645 memset(&sb_r->entries, 0, 646 vstruct_end(&sb_r->field) - 647 (void *) &sb_r->entries); 648 649 dst = sb_r->entries; 650 for_each_cpu_replicas_entry(r, src) { 651 memcpy(dst, src, replicas_entry_bytes(src)); 652 653 dst = replicas_entry_next(dst); 654 655 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 656 } 657 658 return 0; 659 } 660 661 static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, 662 struct bch_sb *sb, 663 struct printbuf *err) 664 { 665 unsigned i; 666 667 sort_r(cpu_r->entries, 668 cpu_r->nr, 669 cpu_r->entry_size, 670 bch2_memcmp, NULL, 671 (void *)(size_t)cpu_r->entry_size); 672 673 for (i = 0; i < cpu_r->nr; i++) { 674 struct bch_replicas_entry_v1 *e = 675 cpu_replicas_entry(cpu_r, i); 676 677 int ret = bch2_replicas_entry_validate(e, sb, err); 678 if (ret) 679 return ret; 680 681 if (i + 1 < cpu_r->nr) { 682 struct bch_replicas_entry_v1 *n = 683 cpu_replicas_entry(cpu_r, i + 1); 684 685 BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); 686 687 if (!memcmp(e, n, cpu_r->entry_size)) { 688 prt_printf(err, "duplicate replicas entry "); 689 bch2_replicas_entry_to_text(err, e); 690 return -BCH_ERR_invalid_sb_replicas; 691 } 692 } 693 } 694 695 return 0; 696 } 697 698 static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f, 699 enum bch_validate_flags flags, struct printbuf *err) 700 { 701 struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); 702 struct bch_replicas_cpu cpu_r; 703 int ret; 704 705 ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r); 706 if (ret) 707 return ret; 708 709 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 710 kfree(cpu_r.entries); 711 return ret; 712 } 713 714 static void bch2_sb_replicas_to_text(struct printbuf *out, 715 struct bch_sb *sb, 716 struct bch_sb_field *f) 717 { 718 struct bch_sb_field_replicas *r = field_to_type(f, replicas); 719 struct bch_replicas_entry_v1 *e; 720 bool first = true; 721 722 for_each_replicas_entry(r, e) { 723 if (!first) 724 prt_printf(out, " "); 725 first = false; 726 727 bch2_replicas_entry_to_text(out, e); 728 } 729 prt_newline(out); 730 } 731 732 const struct bch_sb_field_ops bch_sb_field_ops_replicas = { 733 .validate = bch2_sb_replicas_validate, 734 .to_text = bch2_sb_replicas_to_text, 735 }; 736 737 static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f, 738 enum bch_validate_flags flags, struct printbuf *err) 739 { 740 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 741 struct bch_replicas_cpu cpu_r; 742 int ret; 743 744 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r); 745 if (ret) 746 return ret; 747 748 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 749 kfree(cpu_r.entries); 750 return ret; 751 } 752 753 static void bch2_sb_replicas_v0_to_text(struct printbuf *out, 754 struct bch_sb *sb, 755 struct bch_sb_field *f) 756 { 757 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 758 struct bch_replicas_entry_v0 *e; 759 bool first = true; 760 761 for_each_replicas_entry(sb_r, e) { 762 if (!first) 763 prt_printf(out, " "); 764 first = false; 765 766 bch2_replicas_entry_v0_to_text(out, e); 767 } 768 prt_newline(out); 769 } 770 771 const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { 772 .validate = bch2_sb_replicas_v0_validate, 773 .to_text = bch2_sb_replicas_v0_to_text, 774 }; 775 776 /* Query replicas: */ 777 778 bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, 779 unsigned flags, bool print) 780 { 781 struct bch_replicas_entry_v1 *e; 782 bool ret = true; 783 784 percpu_down_read(&c->mark_lock); 785 for_each_cpu_replicas_entry(&c->replicas, e) { 786 unsigned nr_online = 0, nr_failed = 0, dflags = 0; 787 bool metadata = e->data_type < BCH_DATA_user; 788 789 if (e->data_type == BCH_DATA_cached) 790 continue; 791 792 rcu_read_lock(); 793 for (unsigned i = 0; i < e->nr_devs; i++) { 794 nr_online += test_bit(e->devs[i], devs.d); 795 796 struct bch_dev *ca = bch2_dev_rcu(c, e->devs[i]); 797 nr_failed += ca && ca->mi.state == BCH_MEMBER_STATE_failed; 798 } 799 rcu_read_unlock(); 800 801 if (nr_failed == e->nr_devs) 802 continue; 803 804 if (nr_online < e->nr_required) 805 dflags |= metadata 806 ? BCH_FORCE_IF_METADATA_LOST 807 : BCH_FORCE_IF_DATA_LOST; 808 809 if (nr_online < e->nr_devs) 810 dflags |= metadata 811 ? BCH_FORCE_IF_METADATA_DEGRADED 812 : BCH_FORCE_IF_DATA_DEGRADED; 813 814 if (dflags & ~flags) { 815 if (print) { 816 struct printbuf buf = PRINTBUF; 817 818 bch2_replicas_entry_to_text(&buf, e); 819 bch_err(c, "insufficient devices online (%u) for replicas entry %s", 820 nr_online, buf.buf); 821 printbuf_exit(&buf); 822 } 823 ret = false; 824 break; 825 } 826 827 } 828 percpu_up_read(&c->mark_lock); 829 830 return ret; 831 } 832 833 unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) 834 { 835 struct bch_sb_field_replicas *replicas; 836 struct bch_sb_field_replicas_v0 *replicas_v0; 837 unsigned data_has = 0; 838 839 replicas = bch2_sb_field_get(sb, replicas); 840 replicas_v0 = bch2_sb_field_get(sb, replicas_v0); 841 842 if (replicas) { 843 struct bch_replicas_entry_v1 *r; 844 845 for_each_replicas_entry(replicas, r) { 846 if (r->data_type >= sizeof(data_has) * 8) 847 continue; 848 849 for (unsigned i = 0; i < r->nr_devs; i++) 850 if (r->devs[i] == dev) 851 data_has |= 1 << r->data_type; 852 } 853 854 } else if (replicas_v0) { 855 struct bch_replicas_entry_v0 *r; 856 857 for_each_replicas_entry_v0(replicas_v0, r) { 858 if (r->data_type >= sizeof(data_has) * 8) 859 continue; 860 861 for (unsigned i = 0; i < r->nr_devs; i++) 862 if (r->devs[i] == dev) 863 data_has |= 1 << r->data_type; 864 } 865 } 866 867 868 return data_has; 869 } 870 871 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) 872 { 873 mutex_lock(&c->sb_lock); 874 unsigned ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); 875 mutex_unlock(&c->sb_lock); 876 877 return ret; 878 } 879 880 void bch2_fs_replicas_exit(struct bch_fs *c) 881 { 882 kfree(c->replicas.entries); 883 kfree(c->replicas_gc.entries); 884 } 885