1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "buckets.h" 5 #include "journal.h" 6 #include "replicas.h" 7 #include "super-io.h" 8 9 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, 10 struct bch_replicas_cpu *); 11 12 /* Replicas tracking - in memory: */ 13 14 static void verify_replicas_entry(struct bch_replicas_entry *e) 15 { 16 #ifdef CONFIG_BCACHEFS_DEBUG 17 unsigned i; 18 19 BUG_ON(e->data_type >= BCH_DATA_NR); 20 BUG_ON(!e->nr_devs); 21 BUG_ON(e->nr_required > 1 && 22 e->nr_required >= e->nr_devs); 23 24 for (i = 0; i + 1 < e->nr_devs; i++) 25 BUG_ON(e->devs[i] >= e->devs[i + 1]); 26 #endif 27 } 28 29 void bch2_replicas_entry_sort(struct bch_replicas_entry *e) 30 { 31 bubble_sort(e->devs, e->nr_devs, u8_cmp); 32 } 33 34 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) 35 { 36 eytzinger0_sort(r->entries, r->nr, r->entry_size, memcmp, NULL); 37 } 38 39 static void bch2_replicas_entry_v0_to_text(struct printbuf *out, 40 struct bch_replicas_entry_v0 *e) 41 { 42 unsigned i; 43 44 if (e->data_type < BCH_DATA_NR) 45 prt_printf(out, "%s", bch2_data_types[e->data_type]); 46 else 47 prt_printf(out, "(invalid data type %u)", e->data_type); 48 49 prt_printf(out, ": %u [", e->nr_devs); 50 for (i = 0; i < e->nr_devs; i++) 51 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 52 prt_printf(out, "]"); 53 } 54 55 void bch2_replicas_entry_to_text(struct printbuf *out, 56 struct bch_replicas_entry *e) 57 { 58 unsigned i; 59 60 if (e->data_type < BCH_DATA_NR) 61 prt_printf(out, "%s", bch2_data_types[e->data_type]); 62 else 63 prt_printf(out, "(invalid data type %u)", e->data_type); 64 65 prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); 66 for (i = 0; i < e->nr_devs; i++) 67 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 68 prt_printf(out, "]"); 69 } 70 71 int bch2_replicas_entry_validate(struct bch_replicas_entry *r, 72 struct bch_sb *sb, 73 struct printbuf *err) 74 { 75 if (!r->nr_devs) { 76 prt_printf(err, "no devices in entry "); 77 goto bad; 78 } 79 80 if (r->nr_required > 1 && 81 r->nr_required >= r->nr_devs) { 82 prt_printf(err, "bad nr_required in entry "); 83 goto bad; 84 } 85 86 for (unsigned i = 0; i < r->nr_devs; i++) 87 if (!bch2_dev_exists(sb, r->devs[i])) { 88 prt_printf(err, "invalid device %u in entry ", r->devs[i]); 89 goto bad; 90 } 91 92 return 0; 93 bad: 94 bch2_replicas_entry_to_text(err, r); 95 return -BCH_ERR_invalid_replicas_entry; 96 } 97 98 void bch2_cpu_replicas_to_text(struct printbuf *out, 99 struct bch_replicas_cpu *r) 100 { 101 struct bch_replicas_entry *e; 102 bool first = true; 103 104 for_each_cpu_replicas_entry(r, e) { 105 if (!first) 106 prt_printf(out, " "); 107 first = false; 108 109 bch2_replicas_entry_to_text(out, e); 110 } 111 } 112 113 static void extent_to_replicas(struct bkey_s_c k, 114 struct bch_replicas_entry *r) 115 { 116 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 117 const union bch_extent_entry *entry; 118 struct extent_ptr_decoded p; 119 120 r->nr_required = 1; 121 122 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 123 if (p.ptr.cached) 124 continue; 125 126 if (!p.has_ec) 127 r->devs[r->nr_devs++] = p.ptr.dev; 128 else 129 r->nr_required = 0; 130 } 131 } 132 133 static void stripe_to_replicas(struct bkey_s_c k, 134 struct bch_replicas_entry *r) 135 { 136 struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); 137 const struct bch_extent_ptr *ptr; 138 139 r->nr_required = s.v->nr_blocks - s.v->nr_redundant; 140 141 for (ptr = s.v->ptrs; 142 ptr < s.v->ptrs + s.v->nr_blocks; 143 ptr++) 144 r->devs[r->nr_devs++] = ptr->dev; 145 } 146 147 void bch2_bkey_to_replicas(struct bch_replicas_entry *e, 148 struct bkey_s_c k) 149 { 150 e->nr_devs = 0; 151 152 switch (k.k->type) { 153 case KEY_TYPE_btree_ptr: 154 case KEY_TYPE_btree_ptr_v2: 155 e->data_type = BCH_DATA_btree; 156 extent_to_replicas(k, e); 157 break; 158 case KEY_TYPE_extent: 159 case KEY_TYPE_reflink_v: 160 e->data_type = BCH_DATA_user; 161 extent_to_replicas(k, e); 162 break; 163 case KEY_TYPE_stripe: 164 e->data_type = BCH_DATA_parity; 165 stripe_to_replicas(k, e); 166 break; 167 } 168 169 bch2_replicas_entry_sort(e); 170 } 171 172 void bch2_devlist_to_replicas(struct bch_replicas_entry *e, 173 enum bch_data_type data_type, 174 struct bch_devs_list devs) 175 { 176 unsigned i; 177 178 BUG_ON(!data_type || 179 data_type == BCH_DATA_sb || 180 data_type >= BCH_DATA_NR); 181 182 e->data_type = data_type; 183 e->nr_devs = 0; 184 e->nr_required = 1; 185 186 for (i = 0; i < devs.nr; i++) 187 e->devs[e->nr_devs++] = devs.devs[i]; 188 189 bch2_replicas_entry_sort(e); 190 } 191 192 static struct bch_replicas_cpu 193 cpu_replicas_add_entry(struct bch_fs *c, 194 struct bch_replicas_cpu *old, 195 struct bch_replicas_entry *new_entry) 196 { 197 unsigned i; 198 struct bch_replicas_cpu new = { 199 .nr = old->nr + 1, 200 .entry_size = max_t(unsigned, old->entry_size, 201 replicas_entry_bytes(new_entry)), 202 }; 203 204 for (i = 0; i < new_entry->nr_devs; i++) 205 BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i])); 206 207 BUG_ON(!new_entry->data_type); 208 verify_replicas_entry(new_entry); 209 210 new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); 211 if (!new.entries) 212 return new; 213 214 for (i = 0; i < old->nr; i++) 215 memcpy(cpu_replicas_entry(&new, i), 216 cpu_replicas_entry(old, i), 217 old->entry_size); 218 219 memcpy(cpu_replicas_entry(&new, old->nr), 220 new_entry, 221 replicas_entry_bytes(new_entry)); 222 223 bch2_cpu_replicas_sort(&new); 224 return new; 225 } 226 227 static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, 228 struct bch_replicas_entry *search) 229 { 230 int idx, entry_size = replicas_entry_bytes(search); 231 232 if (unlikely(entry_size > r->entry_size)) 233 return -1; 234 235 verify_replicas_entry(search); 236 237 #define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size) 238 idx = eytzinger0_find(r->entries, r->nr, r->entry_size, 239 entry_cmp, search); 240 #undef entry_cmp 241 242 return idx < r->nr ? idx : -1; 243 } 244 245 int bch2_replicas_entry_idx(struct bch_fs *c, 246 struct bch_replicas_entry *search) 247 { 248 bch2_replicas_entry_sort(search); 249 250 return __replicas_entry_idx(&c->replicas, search); 251 } 252 253 static bool __replicas_has_entry(struct bch_replicas_cpu *r, 254 struct bch_replicas_entry *search) 255 { 256 return __replicas_entry_idx(r, search) >= 0; 257 } 258 259 bool bch2_replicas_marked(struct bch_fs *c, 260 struct bch_replicas_entry *search) 261 { 262 bool marked; 263 264 if (!search->nr_devs) 265 return true; 266 267 verify_replicas_entry(search); 268 269 percpu_down_read(&c->mark_lock); 270 marked = __replicas_has_entry(&c->replicas, search) && 271 (likely((!c->replicas_gc.entries)) || 272 __replicas_has_entry(&c->replicas_gc, search)); 273 percpu_up_read(&c->mark_lock); 274 275 return marked; 276 } 277 278 static void __replicas_table_update(struct bch_fs_usage *dst, 279 struct bch_replicas_cpu *dst_r, 280 struct bch_fs_usage *src, 281 struct bch_replicas_cpu *src_r) 282 { 283 int src_idx, dst_idx; 284 285 *dst = *src; 286 287 for (src_idx = 0; src_idx < src_r->nr; src_idx++) { 288 if (!src->replicas[src_idx]) 289 continue; 290 291 dst_idx = __replicas_entry_idx(dst_r, 292 cpu_replicas_entry(src_r, src_idx)); 293 BUG_ON(dst_idx < 0); 294 295 dst->replicas[dst_idx] = src->replicas[src_idx]; 296 } 297 } 298 299 static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p, 300 struct bch_replicas_cpu *dst_r, 301 struct bch_fs_usage __percpu *src_p, 302 struct bch_replicas_cpu *src_r) 303 { 304 unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr; 305 struct bch_fs_usage *dst, *src = (void *) 306 bch2_acc_percpu_u64s((u64 __percpu *) src_p, src_nr); 307 308 preempt_disable(); 309 dst = this_cpu_ptr(dst_p); 310 preempt_enable(); 311 312 __replicas_table_update(dst, dst_r, src, src_r); 313 } 314 315 /* 316 * Resize filesystem accounting: 317 */ 318 static int replicas_table_update(struct bch_fs *c, 319 struct bch_replicas_cpu *new_r) 320 { 321 struct bch_fs_usage __percpu *new_usage[JOURNAL_BUF_NR]; 322 struct bch_fs_usage_online *new_scratch = NULL; 323 struct bch_fs_usage __percpu *new_gc = NULL; 324 struct bch_fs_usage *new_base = NULL; 325 unsigned i, bytes = sizeof(struct bch_fs_usage) + 326 sizeof(u64) * new_r->nr; 327 unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) + 328 sizeof(u64) * new_r->nr; 329 int ret = 0; 330 331 memset(new_usage, 0, sizeof(new_usage)); 332 333 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 334 if (!(new_usage[i] = __alloc_percpu_gfp(bytes, 335 sizeof(u64), GFP_KERNEL))) 336 goto err; 337 338 if (!(new_base = kzalloc(bytes, GFP_KERNEL)) || 339 !(new_scratch = kmalloc(scratch_bytes, GFP_KERNEL)) || 340 (c->usage_gc && 341 !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_KERNEL)))) 342 goto err; 343 344 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 345 if (c->usage[i]) 346 __replicas_table_update_pcpu(new_usage[i], new_r, 347 c->usage[i], &c->replicas); 348 if (c->usage_base) 349 __replicas_table_update(new_base, new_r, 350 c->usage_base, &c->replicas); 351 if (c->usage_gc) 352 __replicas_table_update_pcpu(new_gc, new_r, 353 c->usage_gc, &c->replicas); 354 355 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 356 swap(c->usage[i], new_usage[i]); 357 swap(c->usage_base, new_base); 358 swap(c->usage_scratch, new_scratch); 359 swap(c->usage_gc, new_gc); 360 swap(c->replicas, *new_r); 361 out: 362 free_percpu(new_gc); 363 kfree(new_scratch); 364 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 365 free_percpu(new_usage[i]); 366 kfree(new_base); 367 return ret; 368 err: 369 bch_err(c, "error updating replicas table: memory allocation failure"); 370 ret = -BCH_ERR_ENOMEM_replicas_table; 371 goto out; 372 } 373 374 static unsigned reserve_journal_replicas(struct bch_fs *c, 375 struct bch_replicas_cpu *r) 376 { 377 struct bch_replicas_entry *e; 378 unsigned journal_res_u64s = 0; 379 380 /* nr_inodes: */ 381 journal_res_u64s += 382 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); 383 384 /* key_version: */ 385 journal_res_u64s += 386 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); 387 388 /* persistent_reserved: */ 389 journal_res_u64s += 390 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) * 391 BCH_REPLICAS_MAX; 392 393 for_each_cpu_replicas_entry(r, e) 394 journal_res_u64s += 395 DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) + 396 e->nr_devs, sizeof(u64)); 397 return journal_res_u64s; 398 } 399 400 noinline 401 static int bch2_mark_replicas_slowpath(struct bch_fs *c, 402 struct bch_replicas_entry *new_entry) 403 { 404 struct bch_replicas_cpu new_r, new_gc; 405 int ret = 0; 406 407 verify_replicas_entry(new_entry); 408 409 memset(&new_r, 0, sizeof(new_r)); 410 memset(&new_gc, 0, sizeof(new_gc)); 411 412 mutex_lock(&c->sb_lock); 413 414 if (c->replicas_gc.entries && 415 !__replicas_has_entry(&c->replicas_gc, new_entry)) { 416 new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); 417 if (!new_gc.entries) { 418 ret = -BCH_ERR_ENOMEM_cpu_replicas; 419 goto err; 420 } 421 } 422 423 if (!__replicas_has_entry(&c->replicas, new_entry)) { 424 new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); 425 if (!new_r.entries) { 426 ret = -BCH_ERR_ENOMEM_cpu_replicas; 427 goto err; 428 } 429 430 ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); 431 if (ret) 432 goto err; 433 434 bch2_journal_entry_res_resize(&c->journal, 435 &c->replicas_journal_res, 436 reserve_journal_replicas(c, &new_r)); 437 } 438 439 if (!new_r.entries && 440 !new_gc.entries) 441 goto out; 442 443 /* allocations done, now commit: */ 444 445 if (new_r.entries) 446 bch2_write_super(c); 447 448 /* don't update in memory replicas until changes are persistent */ 449 percpu_down_write(&c->mark_lock); 450 if (new_r.entries) 451 ret = replicas_table_update(c, &new_r); 452 if (new_gc.entries) 453 swap(new_gc, c->replicas_gc); 454 percpu_up_write(&c->mark_lock); 455 out: 456 mutex_unlock(&c->sb_lock); 457 458 kfree(new_r.entries); 459 kfree(new_gc.entries); 460 461 return ret; 462 err: 463 bch_err_msg(c, ret, "adding replicas entry"); 464 goto out; 465 } 466 467 int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry *r) 468 { 469 return likely(bch2_replicas_marked(c, r)) 470 ? 0 : bch2_mark_replicas_slowpath(c, r); 471 } 472 473 /* replicas delta list: */ 474 475 int bch2_replicas_delta_list_mark(struct bch_fs *c, 476 struct replicas_delta_list *r) 477 { 478 struct replicas_delta *d = r->d; 479 struct replicas_delta *top = (void *) r->d + r->used; 480 int ret = 0; 481 482 for (d = r->d; !ret && d != top; d = replicas_delta_next(d)) 483 ret = bch2_mark_replicas(c, &d->r); 484 return ret; 485 } 486 487 /* 488 * Old replicas_gc mechanism: only used for journal replicas entries now, should 489 * die at some point: 490 */ 491 492 int bch2_replicas_gc_end(struct bch_fs *c, int ret) 493 { 494 lockdep_assert_held(&c->replicas_gc_lock); 495 496 mutex_lock(&c->sb_lock); 497 percpu_down_write(&c->mark_lock); 498 499 ret = ret ?: 500 bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc) ?: 501 replicas_table_update(c, &c->replicas_gc); 502 503 kfree(c->replicas_gc.entries); 504 c->replicas_gc.entries = NULL; 505 506 percpu_up_write(&c->mark_lock); 507 508 if (!ret) 509 bch2_write_super(c); 510 511 mutex_unlock(&c->sb_lock); 512 513 return ret; 514 } 515 516 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) 517 { 518 struct bch_replicas_entry *e; 519 unsigned i = 0; 520 521 lockdep_assert_held(&c->replicas_gc_lock); 522 523 mutex_lock(&c->sb_lock); 524 BUG_ON(c->replicas_gc.entries); 525 526 c->replicas_gc.nr = 0; 527 c->replicas_gc.entry_size = 0; 528 529 for_each_cpu_replicas_entry(&c->replicas, e) 530 if (!((1 << e->data_type) & typemask)) { 531 c->replicas_gc.nr++; 532 c->replicas_gc.entry_size = 533 max_t(unsigned, c->replicas_gc.entry_size, 534 replicas_entry_bytes(e)); 535 } 536 537 c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, 538 c->replicas_gc.entry_size, 539 GFP_KERNEL); 540 if (!c->replicas_gc.entries) { 541 mutex_unlock(&c->sb_lock); 542 bch_err(c, "error allocating c->replicas_gc"); 543 return -BCH_ERR_ENOMEM_replicas_gc; 544 } 545 546 for_each_cpu_replicas_entry(&c->replicas, e) 547 if (!((1 << e->data_type) & typemask)) 548 memcpy(cpu_replicas_entry(&c->replicas_gc, i++), 549 e, c->replicas_gc.entry_size); 550 551 bch2_cpu_replicas_sort(&c->replicas_gc); 552 mutex_unlock(&c->sb_lock); 553 554 return 0; 555 } 556 557 /* 558 * New much simpler mechanism for clearing out unneeded replicas entries - drop 559 * replicas entries that have 0 sectors used. 560 * 561 * However, we don't track sector counts for journal usage, so this doesn't drop 562 * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism 563 * is retained for that. 564 */ 565 int bch2_replicas_gc2(struct bch_fs *c) 566 { 567 struct bch_replicas_cpu new = { 0 }; 568 unsigned i, nr; 569 int ret = 0; 570 571 bch2_journal_meta(&c->journal); 572 retry: 573 nr = READ_ONCE(c->replicas.nr); 574 new.entry_size = READ_ONCE(c->replicas.entry_size); 575 new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); 576 if (!new.entries) { 577 bch_err(c, "error allocating c->replicas_gc"); 578 return -BCH_ERR_ENOMEM_replicas_gc; 579 } 580 581 mutex_lock(&c->sb_lock); 582 percpu_down_write(&c->mark_lock); 583 584 if (nr != c->replicas.nr || 585 new.entry_size != c->replicas.entry_size) { 586 percpu_up_write(&c->mark_lock); 587 mutex_unlock(&c->sb_lock); 588 kfree(new.entries); 589 goto retry; 590 } 591 592 for (i = 0; i < c->replicas.nr; i++) { 593 struct bch_replicas_entry *e = 594 cpu_replicas_entry(&c->replicas, i); 595 596 if (e->data_type == BCH_DATA_journal || 597 c->usage_base->replicas[i] || 598 percpu_u64_get(&c->usage[0]->replicas[i]) || 599 percpu_u64_get(&c->usage[1]->replicas[i]) || 600 percpu_u64_get(&c->usage[2]->replicas[i]) || 601 percpu_u64_get(&c->usage[3]->replicas[i])) 602 memcpy(cpu_replicas_entry(&new, new.nr++), 603 e, new.entry_size); 604 } 605 606 bch2_cpu_replicas_sort(&new); 607 608 ret = bch2_cpu_replicas_to_sb_replicas(c, &new) ?: 609 replicas_table_update(c, &new); 610 611 kfree(new.entries); 612 613 percpu_up_write(&c->mark_lock); 614 615 if (!ret) 616 bch2_write_super(c); 617 618 mutex_unlock(&c->sb_lock); 619 620 return ret; 621 } 622 623 int bch2_replicas_set_usage(struct bch_fs *c, 624 struct bch_replicas_entry *r, 625 u64 sectors) 626 { 627 int ret, idx = bch2_replicas_entry_idx(c, r); 628 629 if (idx < 0) { 630 struct bch_replicas_cpu n; 631 632 n = cpu_replicas_add_entry(c, &c->replicas, r); 633 if (!n.entries) 634 return -BCH_ERR_ENOMEM_cpu_replicas; 635 636 ret = replicas_table_update(c, &n); 637 if (ret) 638 return ret; 639 640 kfree(n.entries); 641 642 idx = bch2_replicas_entry_idx(c, r); 643 BUG_ON(ret < 0); 644 } 645 646 c->usage_base->replicas[idx] = sectors; 647 648 return 0; 649 } 650 651 /* Replicas tracking - superblock: */ 652 653 static int 654 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, 655 struct bch_replicas_cpu *cpu_r) 656 { 657 struct bch_replicas_entry *e, *dst; 658 unsigned nr = 0, entry_size = 0, idx = 0; 659 660 for_each_replicas_entry(sb_r, e) { 661 entry_size = max_t(unsigned, entry_size, 662 replicas_entry_bytes(e)); 663 nr++; 664 } 665 666 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 667 if (!cpu_r->entries) 668 return -BCH_ERR_ENOMEM_cpu_replicas; 669 670 cpu_r->nr = nr; 671 cpu_r->entry_size = entry_size; 672 673 for_each_replicas_entry(sb_r, e) { 674 dst = cpu_replicas_entry(cpu_r, idx++); 675 memcpy(dst, e, replicas_entry_bytes(e)); 676 bch2_replicas_entry_sort(dst); 677 } 678 679 return 0; 680 } 681 682 static int 683 __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, 684 struct bch_replicas_cpu *cpu_r) 685 { 686 struct bch_replicas_entry_v0 *e; 687 unsigned nr = 0, entry_size = 0, idx = 0; 688 689 for_each_replicas_entry(sb_r, e) { 690 entry_size = max_t(unsigned, entry_size, 691 replicas_entry_bytes(e)); 692 nr++; 693 } 694 695 entry_size += sizeof(struct bch_replicas_entry) - 696 sizeof(struct bch_replicas_entry_v0); 697 698 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 699 if (!cpu_r->entries) 700 return -BCH_ERR_ENOMEM_cpu_replicas; 701 702 cpu_r->nr = nr; 703 cpu_r->entry_size = entry_size; 704 705 for_each_replicas_entry(sb_r, e) { 706 struct bch_replicas_entry *dst = 707 cpu_replicas_entry(cpu_r, idx++); 708 709 dst->data_type = e->data_type; 710 dst->nr_devs = e->nr_devs; 711 dst->nr_required = 1; 712 memcpy(dst->devs, e->devs, e->nr_devs); 713 bch2_replicas_entry_sort(dst); 714 } 715 716 return 0; 717 } 718 719 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) 720 { 721 struct bch_sb_field_replicas *sb_v1; 722 struct bch_sb_field_replicas_v0 *sb_v0; 723 struct bch_replicas_cpu new_r = { 0, 0, NULL }; 724 int ret = 0; 725 726 if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas))) 727 ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); 728 else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0))) 729 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); 730 if (ret) 731 return ret; 732 733 bch2_cpu_replicas_sort(&new_r); 734 735 percpu_down_write(&c->mark_lock); 736 737 ret = replicas_table_update(c, &new_r); 738 percpu_up_write(&c->mark_lock); 739 740 kfree(new_r.entries); 741 742 return 0; 743 } 744 745 static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, 746 struct bch_replicas_cpu *r) 747 { 748 struct bch_sb_field_replicas_v0 *sb_r; 749 struct bch_replicas_entry_v0 *dst; 750 struct bch_replicas_entry *src; 751 size_t bytes; 752 753 bytes = sizeof(struct bch_sb_field_replicas); 754 755 for_each_cpu_replicas_entry(r, src) 756 bytes += replicas_entry_bytes(src) - 1; 757 758 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0, 759 DIV_ROUND_UP(bytes, sizeof(u64))); 760 if (!sb_r) 761 return -BCH_ERR_ENOSPC_sb_replicas; 762 763 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); 764 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0); 765 766 memset(&sb_r->entries, 0, 767 vstruct_end(&sb_r->field) - 768 (void *) &sb_r->entries); 769 770 dst = sb_r->entries; 771 for_each_cpu_replicas_entry(r, src) { 772 dst->data_type = src->data_type; 773 dst->nr_devs = src->nr_devs; 774 memcpy(dst->devs, src->devs, src->nr_devs); 775 776 dst = replicas_entry_next(dst); 777 778 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 779 } 780 781 return 0; 782 } 783 784 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, 785 struct bch_replicas_cpu *r) 786 { 787 struct bch_sb_field_replicas *sb_r; 788 struct bch_replicas_entry *dst, *src; 789 bool need_v1 = false; 790 size_t bytes; 791 792 bytes = sizeof(struct bch_sb_field_replicas); 793 794 for_each_cpu_replicas_entry(r, src) { 795 bytes += replicas_entry_bytes(src); 796 if (src->nr_required != 1) 797 need_v1 = true; 798 } 799 800 if (!need_v1) 801 return bch2_cpu_replicas_to_sb_replicas_v0(c, r); 802 803 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas, 804 DIV_ROUND_UP(bytes, sizeof(u64))); 805 if (!sb_r) 806 return -BCH_ERR_ENOSPC_sb_replicas; 807 808 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); 809 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas); 810 811 memset(&sb_r->entries, 0, 812 vstruct_end(&sb_r->field) - 813 (void *) &sb_r->entries); 814 815 dst = sb_r->entries; 816 for_each_cpu_replicas_entry(r, src) { 817 memcpy(dst, src, replicas_entry_bytes(src)); 818 819 dst = replicas_entry_next(dst); 820 821 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 822 } 823 824 return 0; 825 } 826 827 static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, 828 struct bch_sb *sb, 829 struct printbuf *err) 830 { 831 unsigned i; 832 833 sort_cmp_size(cpu_r->entries, 834 cpu_r->nr, 835 cpu_r->entry_size, 836 memcmp, NULL); 837 838 for (i = 0; i < cpu_r->nr; i++) { 839 struct bch_replicas_entry *e = 840 cpu_replicas_entry(cpu_r, i); 841 842 int ret = bch2_replicas_entry_validate(e, sb, err); 843 if (ret) 844 return ret; 845 846 if (i + 1 < cpu_r->nr) { 847 struct bch_replicas_entry *n = 848 cpu_replicas_entry(cpu_r, i + 1); 849 850 BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); 851 852 if (!memcmp(e, n, cpu_r->entry_size)) { 853 prt_printf(err, "duplicate replicas entry "); 854 bch2_replicas_entry_to_text(err, e); 855 return -BCH_ERR_invalid_sb_replicas; 856 } 857 } 858 } 859 860 return 0; 861 } 862 863 static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f, 864 struct printbuf *err) 865 { 866 struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); 867 struct bch_replicas_cpu cpu_r; 868 int ret; 869 870 ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r); 871 if (ret) 872 return ret; 873 874 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 875 kfree(cpu_r.entries); 876 return ret; 877 } 878 879 static void bch2_sb_replicas_to_text(struct printbuf *out, 880 struct bch_sb *sb, 881 struct bch_sb_field *f) 882 { 883 struct bch_sb_field_replicas *r = field_to_type(f, replicas); 884 struct bch_replicas_entry *e; 885 bool first = true; 886 887 for_each_replicas_entry(r, e) { 888 if (!first) 889 prt_printf(out, " "); 890 first = false; 891 892 bch2_replicas_entry_to_text(out, e); 893 } 894 prt_newline(out); 895 } 896 897 const struct bch_sb_field_ops bch_sb_field_ops_replicas = { 898 .validate = bch2_sb_replicas_validate, 899 .to_text = bch2_sb_replicas_to_text, 900 }; 901 902 static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f, 903 struct printbuf *err) 904 { 905 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 906 struct bch_replicas_cpu cpu_r; 907 int ret; 908 909 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r); 910 if (ret) 911 return ret; 912 913 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 914 kfree(cpu_r.entries); 915 return ret; 916 } 917 918 static void bch2_sb_replicas_v0_to_text(struct printbuf *out, 919 struct bch_sb *sb, 920 struct bch_sb_field *f) 921 { 922 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 923 struct bch_replicas_entry_v0 *e; 924 bool first = true; 925 926 for_each_replicas_entry(sb_r, e) { 927 if (!first) 928 prt_printf(out, " "); 929 first = false; 930 931 bch2_replicas_entry_v0_to_text(out, e); 932 } 933 prt_newline(out); 934 } 935 936 const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { 937 .validate = bch2_sb_replicas_v0_validate, 938 .to_text = bch2_sb_replicas_v0_to_text, 939 }; 940 941 /* Query replicas: */ 942 943 bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, 944 unsigned flags, bool print) 945 { 946 struct bch_replicas_entry *e; 947 bool ret = true; 948 949 percpu_down_read(&c->mark_lock); 950 for_each_cpu_replicas_entry(&c->replicas, e) { 951 unsigned i, nr_online = 0, nr_failed = 0, dflags = 0; 952 bool metadata = e->data_type < BCH_DATA_user; 953 954 if (e->data_type == BCH_DATA_cached) 955 continue; 956 957 for (i = 0; i < e->nr_devs; i++) { 958 struct bch_dev *ca = bch_dev_bkey_exists(c, e->devs[i]); 959 960 nr_online += test_bit(e->devs[i], devs.d); 961 nr_failed += ca->mi.state == BCH_MEMBER_STATE_failed; 962 } 963 964 if (nr_failed == e->nr_devs) 965 continue; 966 967 if (nr_online < e->nr_required) 968 dflags |= metadata 969 ? BCH_FORCE_IF_METADATA_LOST 970 : BCH_FORCE_IF_DATA_LOST; 971 972 if (nr_online < e->nr_devs) 973 dflags |= metadata 974 ? BCH_FORCE_IF_METADATA_DEGRADED 975 : BCH_FORCE_IF_DATA_DEGRADED; 976 977 if (dflags & ~flags) { 978 if (print) { 979 struct printbuf buf = PRINTBUF; 980 981 bch2_replicas_entry_to_text(&buf, e); 982 bch_err(c, "insufficient devices online (%u) for replicas entry %s", 983 nr_online, buf.buf); 984 printbuf_exit(&buf); 985 } 986 ret = false; 987 break; 988 } 989 990 } 991 percpu_up_read(&c->mark_lock); 992 993 return ret; 994 } 995 996 unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) 997 { 998 struct bch_sb_field_replicas *replicas; 999 struct bch_sb_field_replicas_v0 *replicas_v0; 1000 unsigned i, data_has = 0; 1001 1002 replicas = bch2_sb_field_get(sb, replicas); 1003 replicas_v0 = bch2_sb_field_get(sb, replicas_v0); 1004 1005 if (replicas) { 1006 struct bch_replicas_entry *r; 1007 1008 for_each_replicas_entry(replicas, r) 1009 for (i = 0; i < r->nr_devs; i++) 1010 if (r->devs[i] == dev) 1011 data_has |= 1 << r->data_type; 1012 } else if (replicas_v0) { 1013 struct bch_replicas_entry_v0 *r; 1014 1015 for_each_replicas_entry_v0(replicas_v0, r) 1016 for (i = 0; i < r->nr_devs; i++) 1017 if (r->devs[i] == dev) 1018 data_has |= 1 << r->data_type; 1019 } 1020 1021 1022 return data_has; 1023 } 1024 1025 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) 1026 { 1027 unsigned ret; 1028 1029 mutex_lock(&c->sb_lock); 1030 ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); 1031 mutex_unlock(&c->sb_lock); 1032 1033 return ret; 1034 } 1035 1036 void bch2_fs_replicas_exit(struct bch_fs *c) 1037 { 1038 unsigned i; 1039 1040 kfree(c->usage_scratch); 1041 for (i = 0; i < ARRAY_SIZE(c->usage); i++) 1042 free_percpu(c->usage[i]); 1043 kfree(c->usage_base); 1044 kfree(c->replicas.entries); 1045 kfree(c->replicas_gc.entries); 1046 1047 mempool_exit(&c->replicas_delta_pool); 1048 } 1049 1050 int bch2_fs_replicas_init(struct bch_fs *c) 1051 { 1052 bch2_journal_entry_res_resize(&c->journal, 1053 &c->replicas_journal_res, 1054 reserve_journal_replicas(c, &c->replicas)); 1055 1056 return mempool_init_kmalloc_pool(&c->replicas_delta_pool, 1, 1057 REPLICAS_DELTA_LIST_MAX) ?: 1058 replicas_table_update(c, &c->replicas); 1059 } 1060