1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "buckets.h" 5 #include "journal.h" 6 #include "replicas.h" 7 #include "super-io.h" 8 9 #include <linux/sort.h> 10 11 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, 12 struct bch_replicas_cpu *); 13 14 /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ 15 static int bch2_memcmp(const void *l, const void *r, const void *priv) 16 { 17 size_t size = (size_t) priv; 18 return memcmp(l, r, size); 19 } 20 21 /* Replicas tracking - in memory: */ 22 23 static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) 24 { 25 #ifdef CONFIG_BCACHEFS_DEBUG 26 unsigned i; 27 28 BUG_ON(e->data_type >= BCH_DATA_NR); 29 BUG_ON(!e->nr_devs); 30 BUG_ON(e->nr_required > 1 && 31 e->nr_required >= e->nr_devs); 32 33 for (i = 0; i + 1 < e->nr_devs; i++) 34 BUG_ON(e->devs[i] >= e->devs[i + 1]); 35 #endif 36 } 37 38 void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) 39 { 40 bubble_sort(e->devs, e->nr_devs, u8_cmp); 41 } 42 43 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) 44 { 45 eytzinger0_sort_r(r->entries, r->nr, r->entry_size, 46 bch2_memcmp, NULL, (void *)(size_t)r->entry_size); 47 } 48 49 static void bch2_replicas_entry_v0_to_text(struct printbuf *out, 50 struct bch_replicas_entry_v0 *e) 51 { 52 bch2_prt_data_type(out, e->data_type); 53 54 prt_printf(out, ": %u [", e->nr_devs); 55 for (unsigned i = 0; i < e->nr_devs; i++) 56 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 57 prt_printf(out, "]"); 58 } 59 60 void bch2_replicas_entry_to_text(struct printbuf *out, 61 struct bch_replicas_entry_v1 *e) 62 { 63 bch2_prt_data_type(out, e->data_type); 64 65 prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); 66 for (unsigned i = 0; i < e->nr_devs; i++) 67 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 68 prt_printf(out, "]"); 69 } 70 71 int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, 72 struct bch_sb *sb, 73 struct printbuf *err) 74 { 75 if (!r->nr_devs) { 76 prt_printf(err, "no devices in entry "); 77 goto bad; 78 } 79 80 if (r->nr_required > 1 && 81 r->nr_required >= r->nr_devs) { 82 prt_printf(err, "bad nr_required in entry "); 83 goto bad; 84 } 85 86 for (unsigned i = 0; i < r->nr_devs; i++) 87 if (!bch2_dev_exists(sb, r->devs[i])) { 88 prt_printf(err, "invalid device %u in entry ", r->devs[i]); 89 goto bad; 90 } 91 92 return 0; 93 bad: 94 bch2_replicas_entry_to_text(err, r); 95 return -BCH_ERR_invalid_replicas_entry; 96 } 97 98 void bch2_cpu_replicas_to_text(struct printbuf *out, 99 struct bch_replicas_cpu *r) 100 { 101 struct bch_replicas_entry_v1 *e; 102 bool first = true; 103 104 for_each_cpu_replicas_entry(r, e) { 105 if (!first) 106 prt_printf(out, " "); 107 first = false; 108 109 bch2_replicas_entry_to_text(out, e); 110 } 111 } 112 113 static void extent_to_replicas(struct bkey_s_c k, 114 struct bch_replicas_entry_v1 *r) 115 { 116 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 117 const union bch_extent_entry *entry; 118 struct extent_ptr_decoded p; 119 120 r->nr_required = 1; 121 122 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 123 if (p.ptr.cached) 124 continue; 125 126 if (!p.has_ec) 127 r->devs[r->nr_devs++] = p.ptr.dev; 128 else 129 r->nr_required = 0; 130 } 131 } 132 133 static void stripe_to_replicas(struct bkey_s_c k, 134 struct bch_replicas_entry_v1 *r) 135 { 136 struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); 137 const struct bch_extent_ptr *ptr; 138 139 r->nr_required = s.v->nr_blocks - s.v->nr_redundant; 140 141 for (ptr = s.v->ptrs; 142 ptr < s.v->ptrs + s.v->nr_blocks; 143 ptr++) 144 r->devs[r->nr_devs++] = ptr->dev; 145 } 146 147 void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e, 148 struct bkey_s_c k) 149 { 150 e->nr_devs = 0; 151 152 switch (k.k->type) { 153 case KEY_TYPE_btree_ptr: 154 case KEY_TYPE_btree_ptr_v2: 155 e->data_type = BCH_DATA_btree; 156 extent_to_replicas(k, e); 157 break; 158 case KEY_TYPE_extent: 159 case KEY_TYPE_reflink_v: 160 e->data_type = BCH_DATA_user; 161 extent_to_replicas(k, e); 162 break; 163 case KEY_TYPE_stripe: 164 e->data_type = BCH_DATA_parity; 165 stripe_to_replicas(k, e); 166 break; 167 } 168 169 bch2_replicas_entry_sort(e); 170 } 171 172 void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, 173 enum bch_data_type data_type, 174 struct bch_devs_list devs) 175 { 176 BUG_ON(!data_type || 177 data_type == BCH_DATA_sb || 178 data_type >= BCH_DATA_NR); 179 180 e->data_type = data_type; 181 e->nr_devs = 0; 182 e->nr_required = 1; 183 184 darray_for_each(devs, i) 185 e->devs[e->nr_devs++] = *i; 186 187 bch2_replicas_entry_sort(e); 188 } 189 190 static struct bch_replicas_cpu 191 cpu_replicas_add_entry(struct bch_fs *c, 192 struct bch_replicas_cpu *old, 193 struct bch_replicas_entry_v1 *new_entry) 194 { 195 unsigned i; 196 struct bch_replicas_cpu new = { 197 .nr = old->nr + 1, 198 .entry_size = max_t(unsigned, old->entry_size, 199 replicas_entry_bytes(new_entry)), 200 }; 201 202 for (i = 0; i < new_entry->nr_devs; i++) 203 BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i])); 204 205 BUG_ON(!new_entry->data_type); 206 verify_replicas_entry(new_entry); 207 208 new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); 209 if (!new.entries) 210 return new; 211 212 for (i = 0; i < old->nr; i++) 213 memcpy(cpu_replicas_entry(&new, i), 214 cpu_replicas_entry(old, i), 215 old->entry_size); 216 217 memcpy(cpu_replicas_entry(&new, old->nr), 218 new_entry, 219 replicas_entry_bytes(new_entry)); 220 221 bch2_cpu_replicas_sort(&new); 222 return new; 223 } 224 225 static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, 226 struct bch_replicas_entry_v1 *search) 227 { 228 int idx, entry_size = replicas_entry_bytes(search); 229 230 if (unlikely(entry_size > r->entry_size)) 231 return -1; 232 233 verify_replicas_entry(search); 234 235 #define entry_cmp(_l, _r) memcmp(_l, _r, entry_size) 236 idx = eytzinger0_find(r->entries, r->nr, r->entry_size, 237 entry_cmp, search); 238 #undef entry_cmp 239 240 return idx < r->nr ? idx : -1; 241 } 242 243 int bch2_replicas_entry_idx(struct bch_fs *c, 244 struct bch_replicas_entry_v1 *search) 245 { 246 bch2_replicas_entry_sort(search); 247 248 return __replicas_entry_idx(&c->replicas, search); 249 } 250 251 static bool __replicas_has_entry(struct bch_replicas_cpu *r, 252 struct bch_replicas_entry_v1 *search) 253 { 254 return __replicas_entry_idx(r, search) >= 0; 255 } 256 257 bool bch2_replicas_marked(struct bch_fs *c, 258 struct bch_replicas_entry_v1 *search) 259 { 260 bool marked; 261 262 if (!search->nr_devs) 263 return true; 264 265 verify_replicas_entry(search); 266 267 percpu_down_read(&c->mark_lock); 268 marked = __replicas_has_entry(&c->replicas, search) && 269 (likely((!c->replicas_gc.entries)) || 270 __replicas_has_entry(&c->replicas_gc, search)); 271 percpu_up_read(&c->mark_lock); 272 273 return marked; 274 } 275 276 static void __replicas_table_update(struct bch_fs_usage *dst, 277 struct bch_replicas_cpu *dst_r, 278 struct bch_fs_usage *src, 279 struct bch_replicas_cpu *src_r) 280 { 281 int src_idx, dst_idx; 282 283 *dst = *src; 284 285 for (src_idx = 0; src_idx < src_r->nr; src_idx++) { 286 if (!src->replicas[src_idx]) 287 continue; 288 289 dst_idx = __replicas_entry_idx(dst_r, 290 cpu_replicas_entry(src_r, src_idx)); 291 BUG_ON(dst_idx < 0); 292 293 dst->replicas[dst_idx] = src->replicas[src_idx]; 294 } 295 } 296 297 static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p, 298 struct bch_replicas_cpu *dst_r, 299 struct bch_fs_usage __percpu *src_p, 300 struct bch_replicas_cpu *src_r) 301 { 302 unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr; 303 struct bch_fs_usage *dst, *src = (void *) 304 bch2_acc_percpu_u64s((u64 __percpu *) src_p, src_nr); 305 306 preempt_disable(); 307 dst = this_cpu_ptr(dst_p); 308 preempt_enable(); 309 310 __replicas_table_update(dst, dst_r, src, src_r); 311 } 312 313 /* 314 * Resize filesystem accounting: 315 */ 316 static int replicas_table_update(struct bch_fs *c, 317 struct bch_replicas_cpu *new_r) 318 { 319 struct bch_fs_usage __percpu *new_usage[JOURNAL_BUF_NR]; 320 struct bch_fs_usage_online *new_scratch = NULL; 321 struct bch_fs_usage __percpu *new_gc = NULL; 322 struct bch_fs_usage *new_base = NULL; 323 unsigned i, bytes = sizeof(struct bch_fs_usage) + 324 sizeof(u64) * new_r->nr; 325 unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) + 326 sizeof(u64) * new_r->nr; 327 int ret = 0; 328 329 memset(new_usage, 0, sizeof(new_usage)); 330 331 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 332 if (!(new_usage[i] = __alloc_percpu_gfp(bytes, 333 sizeof(u64), GFP_KERNEL))) 334 goto err; 335 336 if (!(new_base = kzalloc(bytes, GFP_KERNEL)) || 337 !(new_scratch = kmalloc(scratch_bytes, GFP_KERNEL)) || 338 (c->usage_gc && 339 !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_KERNEL)))) 340 goto err; 341 342 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 343 if (c->usage[i]) 344 __replicas_table_update_pcpu(new_usage[i], new_r, 345 c->usage[i], &c->replicas); 346 if (c->usage_base) 347 __replicas_table_update(new_base, new_r, 348 c->usage_base, &c->replicas); 349 if (c->usage_gc) 350 __replicas_table_update_pcpu(new_gc, new_r, 351 c->usage_gc, &c->replicas); 352 353 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 354 swap(c->usage[i], new_usage[i]); 355 swap(c->usage_base, new_base); 356 swap(c->usage_scratch, new_scratch); 357 swap(c->usage_gc, new_gc); 358 swap(c->replicas, *new_r); 359 out: 360 free_percpu(new_gc); 361 kfree(new_scratch); 362 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 363 free_percpu(new_usage[i]); 364 kfree(new_base); 365 return ret; 366 err: 367 bch_err(c, "error updating replicas table: memory allocation failure"); 368 ret = -BCH_ERR_ENOMEM_replicas_table; 369 goto out; 370 } 371 372 static unsigned reserve_journal_replicas(struct bch_fs *c, 373 struct bch_replicas_cpu *r) 374 { 375 struct bch_replicas_entry_v1 *e; 376 unsigned journal_res_u64s = 0; 377 378 /* nr_inodes: */ 379 journal_res_u64s += 380 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); 381 382 /* key_version: */ 383 journal_res_u64s += 384 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); 385 386 /* persistent_reserved: */ 387 journal_res_u64s += 388 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) * 389 BCH_REPLICAS_MAX; 390 391 for_each_cpu_replicas_entry(r, e) 392 journal_res_u64s += 393 DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) + 394 e->nr_devs, sizeof(u64)); 395 return journal_res_u64s; 396 } 397 398 noinline 399 static int bch2_mark_replicas_slowpath(struct bch_fs *c, 400 struct bch_replicas_entry_v1 *new_entry) 401 { 402 struct bch_replicas_cpu new_r, new_gc; 403 int ret = 0; 404 405 verify_replicas_entry(new_entry); 406 407 memset(&new_r, 0, sizeof(new_r)); 408 memset(&new_gc, 0, sizeof(new_gc)); 409 410 mutex_lock(&c->sb_lock); 411 412 if (c->replicas_gc.entries && 413 !__replicas_has_entry(&c->replicas_gc, new_entry)) { 414 new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); 415 if (!new_gc.entries) { 416 ret = -BCH_ERR_ENOMEM_cpu_replicas; 417 goto err; 418 } 419 } 420 421 if (!__replicas_has_entry(&c->replicas, new_entry)) { 422 new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); 423 if (!new_r.entries) { 424 ret = -BCH_ERR_ENOMEM_cpu_replicas; 425 goto err; 426 } 427 428 ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); 429 if (ret) 430 goto err; 431 432 bch2_journal_entry_res_resize(&c->journal, 433 &c->replicas_journal_res, 434 reserve_journal_replicas(c, &new_r)); 435 } 436 437 if (!new_r.entries && 438 !new_gc.entries) 439 goto out; 440 441 /* allocations done, now commit: */ 442 443 if (new_r.entries) 444 bch2_write_super(c); 445 446 /* don't update in memory replicas until changes are persistent */ 447 percpu_down_write(&c->mark_lock); 448 if (new_r.entries) 449 ret = replicas_table_update(c, &new_r); 450 if (new_gc.entries) 451 swap(new_gc, c->replicas_gc); 452 percpu_up_write(&c->mark_lock); 453 out: 454 mutex_unlock(&c->sb_lock); 455 456 kfree(new_r.entries); 457 kfree(new_gc.entries); 458 459 return ret; 460 err: 461 bch_err_msg(c, ret, "adding replicas entry"); 462 goto out; 463 } 464 465 int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) 466 { 467 return likely(bch2_replicas_marked(c, r)) 468 ? 0 : bch2_mark_replicas_slowpath(c, r); 469 } 470 471 /* replicas delta list: */ 472 473 int bch2_replicas_delta_list_mark(struct bch_fs *c, 474 struct replicas_delta_list *r) 475 { 476 struct replicas_delta *d = r->d; 477 struct replicas_delta *top = (void *) r->d + r->used; 478 int ret = 0; 479 480 for (d = r->d; !ret && d != top; d = replicas_delta_next(d)) 481 ret = bch2_mark_replicas(c, &d->r); 482 return ret; 483 } 484 485 /* 486 * Old replicas_gc mechanism: only used for journal replicas entries now, should 487 * die at some point: 488 */ 489 490 int bch2_replicas_gc_end(struct bch_fs *c, int ret) 491 { 492 lockdep_assert_held(&c->replicas_gc_lock); 493 494 mutex_lock(&c->sb_lock); 495 percpu_down_write(&c->mark_lock); 496 497 ret = ret ?: 498 bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc) ?: 499 replicas_table_update(c, &c->replicas_gc); 500 501 kfree(c->replicas_gc.entries); 502 c->replicas_gc.entries = NULL; 503 504 percpu_up_write(&c->mark_lock); 505 506 if (!ret) 507 bch2_write_super(c); 508 509 mutex_unlock(&c->sb_lock); 510 511 return ret; 512 } 513 514 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) 515 { 516 struct bch_replicas_entry_v1 *e; 517 unsigned i = 0; 518 519 lockdep_assert_held(&c->replicas_gc_lock); 520 521 mutex_lock(&c->sb_lock); 522 BUG_ON(c->replicas_gc.entries); 523 524 c->replicas_gc.nr = 0; 525 c->replicas_gc.entry_size = 0; 526 527 for_each_cpu_replicas_entry(&c->replicas, e) 528 if (!((1 << e->data_type) & typemask)) { 529 c->replicas_gc.nr++; 530 c->replicas_gc.entry_size = 531 max_t(unsigned, c->replicas_gc.entry_size, 532 replicas_entry_bytes(e)); 533 } 534 535 c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, 536 c->replicas_gc.entry_size, 537 GFP_KERNEL); 538 if (!c->replicas_gc.entries) { 539 mutex_unlock(&c->sb_lock); 540 bch_err(c, "error allocating c->replicas_gc"); 541 return -BCH_ERR_ENOMEM_replicas_gc; 542 } 543 544 for_each_cpu_replicas_entry(&c->replicas, e) 545 if (!((1 << e->data_type) & typemask)) 546 memcpy(cpu_replicas_entry(&c->replicas_gc, i++), 547 e, c->replicas_gc.entry_size); 548 549 bch2_cpu_replicas_sort(&c->replicas_gc); 550 mutex_unlock(&c->sb_lock); 551 552 return 0; 553 } 554 555 /* 556 * New much simpler mechanism for clearing out unneeded replicas entries - drop 557 * replicas entries that have 0 sectors used. 558 * 559 * However, we don't track sector counts for journal usage, so this doesn't drop 560 * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism 561 * is retained for that. 562 */ 563 int bch2_replicas_gc2(struct bch_fs *c) 564 { 565 struct bch_replicas_cpu new = { 0 }; 566 unsigned i, nr; 567 int ret = 0; 568 569 bch2_journal_meta(&c->journal); 570 retry: 571 nr = READ_ONCE(c->replicas.nr); 572 new.entry_size = READ_ONCE(c->replicas.entry_size); 573 new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); 574 if (!new.entries) { 575 bch_err(c, "error allocating c->replicas_gc"); 576 return -BCH_ERR_ENOMEM_replicas_gc; 577 } 578 579 mutex_lock(&c->sb_lock); 580 percpu_down_write(&c->mark_lock); 581 582 if (nr != c->replicas.nr || 583 new.entry_size != c->replicas.entry_size) { 584 percpu_up_write(&c->mark_lock); 585 mutex_unlock(&c->sb_lock); 586 kfree(new.entries); 587 goto retry; 588 } 589 590 for (i = 0; i < c->replicas.nr; i++) { 591 struct bch_replicas_entry_v1 *e = 592 cpu_replicas_entry(&c->replicas, i); 593 594 if (e->data_type == BCH_DATA_journal || 595 c->usage_base->replicas[i] || 596 percpu_u64_get(&c->usage[0]->replicas[i]) || 597 percpu_u64_get(&c->usage[1]->replicas[i]) || 598 percpu_u64_get(&c->usage[2]->replicas[i]) || 599 percpu_u64_get(&c->usage[3]->replicas[i])) 600 memcpy(cpu_replicas_entry(&new, new.nr++), 601 e, new.entry_size); 602 } 603 604 bch2_cpu_replicas_sort(&new); 605 606 ret = bch2_cpu_replicas_to_sb_replicas(c, &new) ?: 607 replicas_table_update(c, &new); 608 609 kfree(new.entries); 610 611 percpu_up_write(&c->mark_lock); 612 613 if (!ret) 614 bch2_write_super(c); 615 616 mutex_unlock(&c->sb_lock); 617 618 return ret; 619 } 620 621 int bch2_replicas_set_usage(struct bch_fs *c, 622 struct bch_replicas_entry_v1 *r, 623 u64 sectors) 624 { 625 int ret, idx = bch2_replicas_entry_idx(c, r); 626 627 if (idx < 0) { 628 struct bch_replicas_cpu n; 629 630 n = cpu_replicas_add_entry(c, &c->replicas, r); 631 if (!n.entries) 632 return -BCH_ERR_ENOMEM_cpu_replicas; 633 634 ret = replicas_table_update(c, &n); 635 if (ret) 636 return ret; 637 638 kfree(n.entries); 639 640 idx = bch2_replicas_entry_idx(c, r); 641 BUG_ON(ret < 0); 642 } 643 644 c->usage_base->replicas[idx] = sectors; 645 646 return 0; 647 } 648 649 /* Replicas tracking - superblock: */ 650 651 static int 652 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, 653 struct bch_replicas_cpu *cpu_r) 654 { 655 struct bch_replicas_entry_v1 *e, *dst; 656 unsigned nr = 0, entry_size = 0, idx = 0; 657 658 for_each_replicas_entry(sb_r, e) { 659 entry_size = max_t(unsigned, entry_size, 660 replicas_entry_bytes(e)); 661 nr++; 662 } 663 664 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 665 if (!cpu_r->entries) 666 return -BCH_ERR_ENOMEM_cpu_replicas; 667 668 cpu_r->nr = nr; 669 cpu_r->entry_size = entry_size; 670 671 for_each_replicas_entry(sb_r, e) { 672 dst = cpu_replicas_entry(cpu_r, idx++); 673 memcpy(dst, e, replicas_entry_bytes(e)); 674 bch2_replicas_entry_sort(dst); 675 } 676 677 return 0; 678 } 679 680 static int 681 __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, 682 struct bch_replicas_cpu *cpu_r) 683 { 684 struct bch_replicas_entry_v0 *e; 685 unsigned nr = 0, entry_size = 0, idx = 0; 686 687 for_each_replicas_entry(sb_r, e) { 688 entry_size = max_t(unsigned, entry_size, 689 replicas_entry_bytes(e)); 690 nr++; 691 } 692 693 entry_size += sizeof(struct bch_replicas_entry_v1) - 694 sizeof(struct bch_replicas_entry_v0); 695 696 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 697 if (!cpu_r->entries) 698 return -BCH_ERR_ENOMEM_cpu_replicas; 699 700 cpu_r->nr = nr; 701 cpu_r->entry_size = entry_size; 702 703 for_each_replicas_entry(sb_r, e) { 704 struct bch_replicas_entry_v1 *dst = 705 cpu_replicas_entry(cpu_r, idx++); 706 707 dst->data_type = e->data_type; 708 dst->nr_devs = e->nr_devs; 709 dst->nr_required = 1; 710 memcpy(dst->devs, e->devs, e->nr_devs); 711 bch2_replicas_entry_sort(dst); 712 } 713 714 return 0; 715 } 716 717 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) 718 { 719 struct bch_sb_field_replicas *sb_v1; 720 struct bch_sb_field_replicas_v0 *sb_v0; 721 struct bch_replicas_cpu new_r = { 0, 0, NULL }; 722 int ret = 0; 723 724 if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas))) 725 ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); 726 else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0))) 727 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); 728 if (ret) 729 return ret; 730 731 bch2_cpu_replicas_sort(&new_r); 732 733 percpu_down_write(&c->mark_lock); 734 735 ret = replicas_table_update(c, &new_r); 736 percpu_up_write(&c->mark_lock); 737 738 kfree(new_r.entries); 739 740 return 0; 741 } 742 743 static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, 744 struct bch_replicas_cpu *r) 745 { 746 struct bch_sb_field_replicas_v0 *sb_r; 747 struct bch_replicas_entry_v0 *dst; 748 struct bch_replicas_entry_v1 *src; 749 size_t bytes; 750 751 bytes = sizeof(struct bch_sb_field_replicas); 752 753 for_each_cpu_replicas_entry(r, src) 754 bytes += replicas_entry_bytes(src) - 1; 755 756 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0, 757 DIV_ROUND_UP(bytes, sizeof(u64))); 758 if (!sb_r) 759 return -BCH_ERR_ENOSPC_sb_replicas; 760 761 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); 762 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0); 763 764 memset(&sb_r->entries, 0, 765 vstruct_end(&sb_r->field) - 766 (void *) &sb_r->entries); 767 768 dst = sb_r->entries; 769 for_each_cpu_replicas_entry(r, src) { 770 dst->data_type = src->data_type; 771 dst->nr_devs = src->nr_devs; 772 memcpy(dst->devs, src->devs, src->nr_devs); 773 774 dst = replicas_entry_next(dst); 775 776 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 777 } 778 779 return 0; 780 } 781 782 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, 783 struct bch_replicas_cpu *r) 784 { 785 struct bch_sb_field_replicas *sb_r; 786 struct bch_replicas_entry_v1 *dst, *src; 787 bool need_v1 = false; 788 size_t bytes; 789 790 bytes = sizeof(struct bch_sb_field_replicas); 791 792 for_each_cpu_replicas_entry(r, src) { 793 bytes += replicas_entry_bytes(src); 794 if (src->nr_required != 1) 795 need_v1 = true; 796 } 797 798 if (!need_v1) 799 return bch2_cpu_replicas_to_sb_replicas_v0(c, r); 800 801 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas, 802 DIV_ROUND_UP(bytes, sizeof(u64))); 803 if (!sb_r) 804 return -BCH_ERR_ENOSPC_sb_replicas; 805 806 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); 807 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas); 808 809 memset(&sb_r->entries, 0, 810 vstruct_end(&sb_r->field) - 811 (void *) &sb_r->entries); 812 813 dst = sb_r->entries; 814 for_each_cpu_replicas_entry(r, src) { 815 memcpy(dst, src, replicas_entry_bytes(src)); 816 817 dst = replicas_entry_next(dst); 818 819 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 820 } 821 822 return 0; 823 } 824 825 static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, 826 struct bch_sb *sb, 827 struct printbuf *err) 828 { 829 unsigned i; 830 831 sort_r(cpu_r->entries, 832 cpu_r->nr, 833 cpu_r->entry_size, 834 bch2_memcmp, NULL, 835 (void *)(size_t)cpu_r->entry_size); 836 837 for (i = 0; i < cpu_r->nr; i++) { 838 struct bch_replicas_entry_v1 *e = 839 cpu_replicas_entry(cpu_r, i); 840 841 int ret = bch2_replicas_entry_validate(e, sb, err); 842 if (ret) 843 return ret; 844 845 if (i + 1 < cpu_r->nr) { 846 struct bch_replicas_entry_v1 *n = 847 cpu_replicas_entry(cpu_r, i + 1); 848 849 BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); 850 851 if (!memcmp(e, n, cpu_r->entry_size)) { 852 prt_printf(err, "duplicate replicas entry "); 853 bch2_replicas_entry_to_text(err, e); 854 return -BCH_ERR_invalid_sb_replicas; 855 } 856 } 857 } 858 859 return 0; 860 } 861 862 static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f, 863 struct printbuf *err) 864 { 865 struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); 866 struct bch_replicas_cpu cpu_r; 867 int ret; 868 869 ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r); 870 if (ret) 871 return ret; 872 873 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 874 kfree(cpu_r.entries); 875 return ret; 876 } 877 878 static void bch2_sb_replicas_to_text(struct printbuf *out, 879 struct bch_sb *sb, 880 struct bch_sb_field *f) 881 { 882 struct bch_sb_field_replicas *r = field_to_type(f, replicas); 883 struct bch_replicas_entry_v1 *e; 884 bool first = true; 885 886 for_each_replicas_entry(r, e) { 887 if (!first) 888 prt_printf(out, " "); 889 first = false; 890 891 bch2_replicas_entry_to_text(out, e); 892 } 893 prt_newline(out); 894 } 895 896 const struct bch_sb_field_ops bch_sb_field_ops_replicas = { 897 .validate = bch2_sb_replicas_validate, 898 .to_text = bch2_sb_replicas_to_text, 899 }; 900 901 static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f, 902 struct printbuf *err) 903 { 904 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 905 struct bch_replicas_cpu cpu_r; 906 int ret; 907 908 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r); 909 if (ret) 910 return ret; 911 912 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 913 kfree(cpu_r.entries); 914 return ret; 915 } 916 917 static void bch2_sb_replicas_v0_to_text(struct printbuf *out, 918 struct bch_sb *sb, 919 struct bch_sb_field *f) 920 { 921 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 922 struct bch_replicas_entry_v0 *e; 923 bool first = true; 924 925 for_each_replicas_entry(sb_r, e) { 926 if (!first) 927 prt_printf(out, " "); 928 first = false; 929 930 bch2_replicas_entry_v0_to_text(out, e); 931 } 932 prt_newline(out); 933 } 934 935 const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { 936 .validate = bch2_sb_replicas_v0_validate, 937 .to_text = bch2_sb_replicas_v0_to_text, 938 }; 939 940 /* Query replicas: */ 941 942 bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, 943 unsigned flags, bool print) 944 { 945 struct bch_replicas_entry_v1 *e; 946 bool ret = true; 947 948 percpu_down_read(&c->mark_lock); 949 for_each_cpu_replicas_entry(&c->replicas, e) { 950 unsigned i, nr_online = 0, nr_failed = 0, dflags = 0; 951 bool metadata = e->data_type < BCH_DATA_user; 952 953 if (e->data_type == BCH_DATA_cached) 954 continue; 955 956 for (i = 0; i < e->nr_devs; i++) { 957 struct bch_dev *ca = bch_dev_bkey_exists(c, e->devs[i]); 958 959 nr_online += test_bit(e->devs[i], devs.d); 960 nr_failed += ca->mi.state == BCH_MEMBER_STATE_failed; 961 } 962 963 if (nr_failed == e->nr_devs) 964 continue; 965 966 if (nr_online < e->nr_required) 967 dflags |= metadata 968 ? BCH_FORCE_IF_METADATA_LOST 969 : BCH_FORCE_IF_DATA_LOST; 970 971 if (nr_online < e->nr_devs) 972 dflags |= metadata 973 ? BCH_FORCE_IF_METADATA_DEGRADED 974 : BCH_FORCE_IF_DATA_DEGRADED; 975 976 if (dflags & ~flags) { 977 if (print) { 978 struct printbuf buf = PRINTBUF; 979 980 bch2_replicas_entry_to_text(&buf, e); 981 bch_err(c, "insufficient devices online (%u) for replicas entry %s", 982 nr_online, buf.buf); 983 printbuf_exit(&buf); 984 } 985 ret = false; 986 break; 987 } 988 989 } 990 percpu_up_read(&c->mark_lock); 991 992 return ret; 993 } 994 995 unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) 996 { 997 struct bch_sb_field_replicas *replicas; 998 struct bch_sb_field_replicas_v0 *replicas_v0; 999 unsigned i, data_has = 0; 1000 1001 replicas = bch2_sb_field_get(sb, replicas); 1002 replicas_v0 = bch2_sb_field_get(sb, replicas_v0); 1003 1004 if (replicas) { 1005 struct bch_replicas_entry_v1 *r; 1006 1007 for_each_replicas_entry(replicas, r) 1008 for (i = 0; i < r->nr_devs; i++) 1009 if (r->devs[i] == dev) 1010 data_has |= 1 << r->data_type; 1011 } else if (replicas_v0) { 1012 struct bch_replicas_entry_v0 *r; 1013 1014 for_each_replicas_entry_v0(replicas_v0, r) 1015 for (i = 0; i < r->nr_devs; i++) 1016 if (r->devs[i] == dev) 1017 data_has |= 1 << r->data_type; 1018 } 1019 1020 1021 return data_has; 1022 } 1023 1024 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) 1025 { 1026 unsigned ret; 1027 1028 mutex_lock(&c->sb_lock); 1029 ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); 1030 mutex_unlock(&c->sb_lock); 1031 1032 return ret; 1033 } 1034 1035 void bch2_fs_replicas_exit(struct bch_fs *c) 1036 { 1037 unsigned i; 1038 1039 kfree(c->usage_scratch); 1040 for (i = 0; i < ARRAY_SIZE(c->usage); i++) 1041 free_percpu(c->usage[i]); 1042 kfree(c->usage_base); 1043 kfree(c->replicas.entries); 1044 kfree(c->replicas_gc.entries); 1045 1046 mempool_exit(&c->replicas_delta_pool); 1047 } 1048 1049 int bch2_fs_replicas_init(struct bch_fs *c) 1050 { 1051 bch2_journal_entry_res_resize(&c->journal, 1052 &c->replicas_journal_res, 1053 reserve_journal_replicas(c, &c->replicas)); 1054 1055 return mempool_init_kmalloc_pool(&c->replicas_delta_pool, 1, 1056 REPLICAS_DELTA_LIST_MAX) ?: 1057 replicas_table_update(c, &c->replicas); 1058 } 1059