1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "buckets.h" 5 #include "journal.h" 6 #include "replicas.h" 7 #include "super-io.h" 8 9 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *, 10 struct bch_replicas_cpu *); 11 12 /* Some (buggy!) compilers don't allow memcmp to be passed as a pointer */ 13 static int bch2_memcmp(const void *l, const void *r, size_t size) 14 { 15 return memcmp(l, r, size); 16 } 17 18 /* Replicas tracking - in memory: */ 19 20 static void verify_replicas_entry(struct bch_replicas_entry_v1 *e) 21 { 22 #ifdef CONFIG_BCACHEFS_DEBUG 23 unsigned i; 24 25 BUG_ON(e->data_type >= BCH_DATA_NR); 26 BUG_ON(!e->nr_devs); 27 BUG_ON(e->nr_required > 1 && 28 e->nr_required >= e->nr_devs); 29 30 for (i = 0; i + 1 < e->nr_devs; i++) 31 BUG_ON(e->devs[i] >= e->devs[i + 1]); 32 #endif 33 } 34 35 void bch2_replicas_entry_sort(struct bch_replicas_entry_v1 *e) 36 { 37 bubble_sort(e->devs, e->nr_devs, u8_cmp); 38 } 39 40 static void bch2_cpu_replicas_sort(struct bch_replicas_cpu *r) 41 { 42 eytzinger0_sort(r->entries, r->nr, r->entry_size, bch2_memcmp, NULL); 43 } 44 45 static void bch2_replicas_entry_v0_to_text(struct printbuf *out, 46 struct bch_replicas_entry_v0 *e) 47 { 48 bch2_prt_data_type(out, e->data_type); 49 50 prt_printf(out, ": %u [", e->nr_devs); 51 for (unsigned i = 0; i < e->nr_devs; i++) 52 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 53 prt_printf(out, "]"); 54 } 55 56 void bch2_replicas_entry_to_text(struct printbuf *out, 57 struct bch_replicas_entry_v1 *e) 58 { 59 bch2_prt_data_type(out, e->data_type); 60 61 prt_printf(out, ": %u/%u [", e->nr_required, e->nr_devs); 62 for (unsigned i = 0; i < e->nr_devs; i++) 63 prt_printf(out, i ? " %u" : "%u", e->devs[i]); 64 prt_printf(out, "]"); 65 } 66 67 int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r, 68 struct bch_sb *sb, 69 struct printbuf *err) 70 { 71 if (!r->nr_devs) { 72 prt_printf(err, "no devices in entry "); 73 goto bad; 74 } 75 76 if (r->nr_required > 1 && 77 r->nr_required >= r->nr_devs) { 78 prt_printf(err, "bad nr_required in entry "); 79 goto bad; 80 } 81 82 for (unsigned i = 0; i < r->nr_devs; i++) 83 if (!bch2_dev_exists(sb, r->devs[i])) { 84 prt_printf(err, "invalid device %u in entry ", r->devs[i]); 85 goto bad; 86 } 87 88 return 0; 89 bad: 90 bch2_replicas_entry_to_text(err, r); 91 return -BCH_ERR_invalid_replicas_entry; 92 } 93 94 void bch2_cpu_replicas_to_text(struct printbuf *out, 95 struct bch_replicas_cpu *r) 96 { 97 struct bch_replicas_entry_v1 *e; 98 bool first = true; 99 100 for_each_cpu_replicas_entry(r, e) { 101 if (!first) 102 prt_printf(out, " "); 103 first = false; 104 105 bch2_replicas_entry_to_text(out, e); 106 } 107 } 108 109 static void extent_to_replicas(struct bkey_s_c k, 110 struct bch_replicas_entry_v1 *r) 111 { 112 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 113 const union bch_extent_entry *entry; 114 struct extent_ptr_decoded p; 115 116 r->nr_required = 1; 117 118 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 119 if (p.ptr.cached) 120 continue; 121 122 if (!p.has_ec) 123 r->devs[r->nr_devs++] = p.ptr.dev; 124 else 125 r->nr_required = 0; 126 } 127 } 128 129 static void stripe_to_replicas(struct bkey_s_c k, 130 struct bch_replicas_entry_v1 *r) 131 { 132 struct bkey_s_c_stripe s = bkey_s_c_to_stripe(k); 133 const struct bch_extent_ptr *ptr; 134 135 r->nr_required = s.v->nr_blocks - s.v->nr_redundant; 136 137 for (ptr = s.v->ptrs; 138 ptr < s.v->ptrs + s.v->nr_blocks; 139 ptr++) 140 r->devs[r->nr_devs++] = ptr->dev; 141 } 142 143 void bch2_bkey_to_replicas(struct bch_replicas_entry_v1 *e, 144 struct bkey_s_c k) 145 { 146 e->nr_devs = 0; 147 148 switch (k.k->type) { 149 case KEY_TYPE_btree_ptr: 150 case KEY_TYPE_btree_ptr_v2: 151 e->data_type = BCH_DATA_btree; 152 extent_to_replicas(k, e); 153 break; 154 case KEY_TYPE_extent: 155 case KEY_TYPE_reflink_v: 156 e->data_type = BCH_DATA_user; 157 extent_to_replicas(k, e); 158 break; 159 case KEY_TYPE_stripe: 160 e->data_type = BCH_DATA_parity; 161 stripe_to_replicas(k, e); 162 break; 163 } 164 165 bch2_replicas_entry_sort(e); 166 } 167 168 void bch2_devlist_to_replicas(struct bch_replicas_entry_v1 *e, 169 enum bch_data_type data_type, 170 struct bch_devs_list devs) 171 { 172 BUG_ON(!data_type || 173 data_type == BCH_DATA_sb || 174 data_type >= BCH_DATA_NR); 175 176 e->data_type = data_type; 177 e->nr_devs = 0; 178 e->nr_required = 1; 179 180 darray_for_each(devs, i) 181 e->devs[e->nr_devs++] = *i; 182 183 bch2_replicas_entry_sort(e); 184 } 185 186 static struct bch_replicas_cpu 187 cpu_replicas_add_entry(struct bch_fs *c, 188 struct bch_replicas_cpu *old, 189 struct bch_replicas_entry_v1 *new_entry) 190 { 191 unsigned i; 192 struct bch_replicas_cpu new = { 193 .nr = old->nr + 1, 194 .entry_size = max_t(unsigned, old->entry_size, 195 replicas_entry_bytes(new_entry)), 196 }; 197 198 for (i = 0; i < new_entry->nr_devs; i++) 199 BUG_ON(!bch2_dev_exists2(c, new_entry->devs[i])); 200 201 BUG_ON(!new_entry->data_type); 202 verify_replicas_entry(new_entry); 203 204 new.entries = kcalloc(new.nr, new.entry_size, GFP_KERNEL); 205 if (!new.entries) 206 return new; 207 208 for (i = 0; i < old->nr; i++) 209 memcpy(cpu_replicas_entry(&new, i), 210 cpu_replicas_entry(old, i), 211 old->entry_size); 212 213 memcpy(cpu_replicas_entry(&new, old->nr), 214 new_entry, 215 replicas_entry_bytes(new_entry)); 216 217 bch2_cpu_replicas_sort(&new); 218 return new; 219 } 220 221 static inline int __replicas_entry_idx(struct bch_replicas_cpu *r, 222 struct bch_replicas_entry_v1 *search) 223 { 224 int idx, entry_size = replicas_entry_bytes(search); 225 226 if (unlikely(entry_size > r->entry_size)) 227 return -1; 228 229 verify_replicas_entry(search); 230 231 #define entry_cmp(_l, _r, size) memcmp(_l, _r, entry_size) 232 idx = eytzinger0_find(r->entries, r->nr, r->entry_size, 233 entry_cmp, search); 234 #undef entry_cmp 235 236 return idx < r->nr ? idx : -1; 237 } 238 239 int bch2_replicas_entry_idx(struct bch_fs *c, 240 struct bch_replicas_entry_v1 *search) 241 { 242 bch2_replicas_entry_sort(search); 243 244 return __replicas_entry_idx(&c->replicas, search); 245 } 246 247 static bool __replicas_has_entry(struct bch_replicas_cpu *r, 248 struct bch_replicas_entry_v1 *search) 249 { 250 return __replicas_entry_idx(r, search) >= 0; 251 } 252 253 bool bch2_replicas_marked(struct bch_fs *c, 254 struct bch_replicas_entry_v1 *search) 255 { 256 bool marked; 257 258 if (!search->nr_devs) 259 return true; 260 261 verify_replicas_entry(search); 262 263 percpu_down_read(&c->mark_lock); 264 marked = __replicas_has_entry(&c->replicas, search) && 265 (likely((!c->replicas_gc.entries)) || 266 __replicas_has_entry(&c->replicas_gc, search)); 267 percpu_up_read(&c->mark_lock); 268 269 return marked; 270 } 271 272 static void __replicas_table_update(struct bch_fs_usage *dst, 273 struct bch_replicas_cpu *dst_r, 274 struct bch_fs_usage *src, 275 struct bch_replicas_cpu *src_r) 276 { 277 int src_idx, dst_idx; 278 279 *dst = *src; 280 281 for (src_idx = 0; src_idx < src_r->nr; src_idx++) { 282 if (!src->replicas[src_idx]) 283 continue; 284 285 dst_idx = __replicas_entry_idx(dst_r, 286 cpu_replicas_entry(src_r, src_idx)); 287 BUG_ON(dst_idx < 0); 288 289 dst->replicas[dst_idx] = src->replicas[src_idx]; 290 } 291 } 292 293 static void __replicas_table_update_pcpu(struct bch_fs_usage __percpu *dst_p, 294 struct bch_replicas_cpu *dst_r, 295 struct bch_fs_usage __percpu *src_p, 296 struct bch_replicas_cpu *src_r) 297 { 298 unsigned src_nr = sizeof(struct bch_fs_usage) / sizeof(u64) + src_r->nr; 299 struct bch_fs_usage *dst, *src = (void *) 300 bch2_acc_percpu_u64s((u64 __percpu *) src_p, src_nr); 301 302 preempt_disable(); 303 dst = this_cpu_ptr(dst_p); 304 preempt_enable(); 305 306 __replicas_table_update(dst, dst_r, src, src_r); 307 } 308 309 /* 310 * Resize filesystem accounting: 311 */ 312 static int replicas_table_update(struct bch_fs *c, 313 struct bch_replicas_cpu *new_r) 314 { 315 struct bch_fs_usage __percpu *new_usage[JOURNAL_BUF_NR]; 316 struct bch_fs_usage_online *new_scratch = NULL; 317 struct bch_fs_usage __percpu *new_gc = NULL; 318 struct bch_fs_usage *new_base = NULL; 319 unsigned i, bytes = sizeof(struct bch_fs_usage) + 320 sizeof(u64) * new_r->nr; 321 unsigned scratch_bytes = sizeof(struct bch_fs_usage_online) + 322 sizeof(u64) * new_r->nr; 323 int ret = 0; 324 325 memset(new_usage, 0, sizeof(new_usage)); 326 327 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 328 if (!(new_usage[i] = __alloc_percpu_gfp(bytes, 329 sizeof(u64), GFP_KERNEL))) 330 goto err; 331 332 if (!(new_base = kzalloc(bytes, GFP_KERNEL)) || 333 !(new_scratch = kmalloc(scratch_bytes, GFP_KERNEL)) || 334 (c->usage_gc && 335 !(new_gc = __alloc_percpu_gfp(bytes, sizeof(u64), GFP_KERNEL)))) 336 goto err; 337 338 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 339 if (c->usage[i]) 340 __replicas_table_update_pcpu(new_usage[i], new_r, 341 c->usage[i], &c->replicas); 342 if (c->usage_base) 343 __replicas_table_update(new_base, new_r, 344 c->usage_base, &c->replicas); 345 if (c->usage_gc) 346 __replicas_table_update_pcpu(new_gc, new_r, 347 c->usage_gc, &c->replicas); 348 349 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 350 swap(c->usage[i], new_usage[i]); 351 swap(c->usage_base, new_base); 352 swap(c->usage_scratch, new_scratch); 353 swap(c->usage_gc, new_gc); 354 swap(c->replicas, *new_r); 355 out: 356 free_percpu(new_gc); 357 kfree(new_scratch); 358 for (i = 0; i < ARRAY_SIZE(new_usage); i++) 359 free_percpu(new_usage[i]); 360 kfree(new_base); 361 return ret; 362 err: 363 bch_err(c, "error updating replicas table: memory allocation failure"); 364 ret = -BCH_ERR_ENOMEM_replicas_table; 365 goto out; 366 } 367 368 static unsigned reserve_journal_replicas(struct bch_fs *c, 369 struct bch_replicas_cpu *r) 370 { 371 struct bch_replicas_entry_v1 *e; 372 unsigned journal_res_u64s = 0; 373 374 /* nr_inodes: */ 375 journal_res_u64s += 376 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); 377 378 /* key_version: */ 379 journal_res_u64s += 380 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)); 381 382 /* persistent_reserved: */ 383 journal_res_u64s += 384 DIV_ROUND_UP(sizeof(struct jset_entry_usage), sizeof(u64)) * 385 BCH_REPLICAS_MAX; 386 387 for_each_cpu_replicas_entry(r, e) 388 journal_res_u64s += 389 DIV_ROUND_UP(sizeof(struct jset_entry_data_usage) + 390 e->nr_devs, sizeof(u64)); 391 return journal_res_u64s; 392 } 393 394 noinline 395 static int bch2_mark_replicas_slowpath(struct bch_fs *c, 396 struct bch_replicas_entry_v1 *new_entry) 397 { 398 struct bch_replicas_cpu new_r, new_gc; 399 int ret = 0; 400 401 verify_replicas_entry(new_entry); 402 403 memset(&new_r, 0, sizeof(new_r)); 404 memset(&new_gc, 0, sizeof(new_gc)); 405 406 mutex_lock(&c->sb_lock); 407 408 if (c->replicas_gc.entries && 409 !__replicas_has_entry(&c->replicas_gc, new_entry)) { 410 new_gc = cpu_replicas_add_entry(c, &c->replicas_gc, new_entry); 411 if (!new_gc.entries) { 412 ret = -BCH_ERR_ENOMEM_cpu_replicas; 413 goto err; 414 } 415 } 416 417 if (!__replicas_has_entry(&c->replicas, new_entry)) { 418 new_r = cpu_replicas_add_entry(c, &c->replicas, new_entry); 419 if (!new_r.entries) { 420 ret = -BCH_ERR_ENOMEM_cpu_replicas; 421 goto err; 422 } 423 424 ret = bch2_cpu_replicas_to_sb_replicas(c, &new_r); 425 if (ret) 426 goto err; 427 428 bch2_journal_entry_res_resize(&c->journal, 429 &c->replicas_journal_res, 430 reserve_journal_replicas(c, &new_r)); 431 } 432 433 if (!new_r.entries && 434 !new_gc.entries) 435 goto out; 436 437 /* allocations done, now commit: */ 438 439 if (new_r.entries) 440 bch2_write_super(c); 441 442 /* don't update in memory replicas until changes are persistent */ 443 percpu_down_write(&c->mark_lock); 444 if (new_r.entries) 445 ret = replicas_table_update(c, &new_r); 446 if (new_gc.entries) 447 swap(new_gc, c->replicas_gc); 448 percpu_up_write(&c->mark_lock); 449 out: 450 mutex_unlock(&c->sb_lock); 451 452 kfree(new_r.entries); 453 kfree(new_gc.entries); 454 455 return ret; 456 err: 457 bch_err_msg(c, ret, "adding replicas entry"); 458 goto out; 459 } 460 461 int bch2_mark_replicas(struct bch_fs *c, struct bch_replicas_entry_v1 *r) 462 { 463 return likely(bch2_replicas_marked(c, r)) 464 ? 0 : bch2_mark_replicas_slowpath(c, r); 465 } 466 467 /* replicas delta list: */ 468 469 int bch2_replicas_delta_list_mark(struct bch_fs *c, 470 struct replicas_delta_list *r) 471 { 472 struct replicas_delta *d = r->d; 473 struct replicas_delta *top = (void *) r->d + r->used; 474 int ret = 0; 475 476 for (d = r->d; !ret && d != top; d = replicas_delta_next(d)) 477 ret = bch2_mark_replicas(c, &d->r); 478 return ret; 479 } 480 481 /* 482 * Old replicas_gc mechanism: only used for journal replicas entries now, should 483 * die at some point: 484 */ 485 486 int bch2_replicas_gc_end(struct bch_fs *c, int ret) 487 { 488 lockdep_assert_held(&c->replicas_gc_lock); 489 490 mutex_lock(&c->sb_lock); 491 percpu_down_write(&c->mark_lock); 492 493 ret = ret ?: 494 bch2_cpu_replicas_to_sb_replicas(c, &c->replicas_gc) ?: 495 replicas_table_update(c, &c->replicas_gc); 496 497 kfree(c->replicas_gc.entries); 498 c->replicas_gc.entries = NULL; 499 500 percpu_up_write(&c->mark_lock); 501 502 if (!ret) 503 bch2_write_super(c); 504 505 mutex_unlock(&c->sb_lock); 506 507 return ret; 508 } 509 510 int bch2_replicas_gc_start(struct bch_fs *c, unsigned typemask) 511 { 512 struct bch_replicas_entry_v1 *e; 513 unsigned i = 0; 514 515 lockdep_assert_held(&c->replicas_gc_lock); 516 517 mutex_lock(&c->sb_lock); 518 BUG_ON(c->replicas_gc.entries); 519 520 c->replicas_gc.nr = 0; 521 c->replicas_gc.entry_size = 0; 522 523 for_each_cpu_replicas_entry(&c->replicas, e) 524 if (!((1 << e->data_type) & typemask)) { 525 c->replicas_gc.nr++; 526 c->replicas_gc.entry_size = 527 max_t(unsigned, c->replicas_gc.entry_size, 528 replicas_entry_bytes(e)); 529 } 530 531 c->replicas_gc.entries = kcalloc(c->replicas_gc.nr, 532 c->replicas_gc.entry_size, 533 GFP_KERNEL); 534 if (!c->replicas_gc.entries) { 535 mutex_unlock(&c->sb_lock); 536 bch_err(c, "error allocating c->replicas_gc"); 537 return -BCH_ERR_ENOMEM_replicas_gc; 538 } 539 540 for_each_cpu_replicas_entry(&c->replicas, e) 541 if (!((1 << e->data_type) & typemask)) 542 memcpy(cpu_replicas_entry(&c->replicas_gc, i++), 543 e, c->replicas_gc.entry_size); 544 545 bch2_cpu_replicas_sort(&c->replicas_gc); 546 mutex_unlock(&c->sb_lock); 547 548 return 0; 549 } 550 551 /* 552 * New much simpler mechanism for clearing out unneeded replicas entries - drop 553 * replicas entries that have 0 sectors used. 554 * 555 * However, we don't track sector counts for journal usage, so this doesn't drop 556 * any BCH_DATA_journal entries; the old bch2_replicas_gc_(start|end) mechanism 557 * is retained for that. 558 */ 559 int bch2_replicas_gc2(struct bch_fs *c) 560 { 561 struct bch_replicas_cpu new = { 0 }; 562 unsigned i, nr; 563 int ret = 0; 564 565 bch2_journal_meta(&c->journal); 566 retry: 567 nr = READ_ONCE(c->replicas.nr); 568 new.entry_size = READ_ONCE(c->replicas.entry_size); 569 new.entries = kcalloc(nr, new.entry_size, GFP_KERNEL); 570 if (!new.entries) { 571 bch_err(c, "error allocating c->replicas_gc"); 572 return -BCH_ERR_ENOMEM_replicas_gc; 573 } 574 575 mutex_lock(&c->sb_lock); 576 percpu_down_write(&c->mark_lock); 577 578 if (nr != c->replicas.nr || 579 new.entry_size != c->replicas.entry_size) { 580 percpu_up_write(&c->mark_lock); 581 mutex_unlock(&c->sb_lock); 582 kfree(new.entries); 583 goto retry; 584 } 585 586 for (i = 0; i < c->replicas.nr; i++) { 587 struct bch_replicas_entry_v1 *e = 588 cpu_replicas_entry(&c->replicas, i); 589 590 if (e->data_type == BCH_DATA_journal || 591 c->usage_base->replicas[i] || 592 percpu_u64_get(&c->usage[0]->replicas[i]) || 593 percpu_u64_get(&c->usage[1]->replicas[i]) || 594 percpu_u64_get(&c->usage[2]->replicas[i]) || 595 percpu_u64_get(&c->usage[3]->replicas[i])) 596 memcpy(cpu_replicas_entry(&new, new.nr++), 597 e, new.entry_size); 598 } 599 600 bch2_cpu_replicas_sort(&new); 601 602 ret = bch2_cpu_replicas_to_sb_replicas(c, &new) ?: 603 replicas_table_update(c, &new); 604 605 kfree(new.entries); 606 607 percpu_up_write(&c->mark_lock); 608 609 if (!ret) 610 bch2_write_super(c); 611 612 mutex_unlock(&c->sb_lock); 613 614 return ret; 615 } 616 617 int bch2_replicas_set_usage(struct bch_fs *c, 618 struct bch_replicas_entry_v1 *r, 619 u64 sectors) 620 { 621 int ret, idx = bch2_replicas_entry_idx(c, r); 622 623 if (idx < 0) { 624 struct bch_replicas_cpu n; 625 626 n = cpu_replicas_add_entry(c, &c->replicas, r); 627 if (!n.entries) 628 return -BCH_ERR_ENOMEM_cpu_replicas; 629 630 ret = replicas_table_update(c, &n); 631 if (ret) 632 return ret; 633 634 kfree(n.entries); 635 636 idx = bch2_replicas_entry_idx(c, r); 637 BUG_ON(ret < 0); 638 } 639 640 c->usage_base->replicas[idx] = sectors; 641 642 return 0; 643 } 644 645 /* Replicas tracking - superblock: */ 646 647 static int 648 __bch2_sb_replicas_to_cpu_replicas(struct bch_sb_field_replicas *sb_r, 649 struct bch_replicas_cpu *cpu_r) 650 { 651 struct bch_replicas_entry_v1 *e, *dst; 652 unsigned nr = 0, entry_size = 0, idx = 0; 653 654 for_each_replicas_entry(sb_r, e) { 655 entry_size = max_t(unsigned, entry_size, 656 replicas_entry_bytes(e)); 657 nr++; 658 } 659 660 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 661 if (!cpu_r->entries) 662 return -BCH_ERR_ENOMEM_cpu_replicas; 663 664 cpu_r->nr = nr; 665 cpu_r->entry_size = entry_size; 666 667 for_each_replicas_entry(sb_r, e) { 668 dst = cpu_replicas_entry(cpu_r, idx++); 669 memcpy(dst, e, replicas_entry_bytes(e)); 670 bch2_replicas_entry_sort(dst); 671 } 672 673 return 0; 674 } 675 676 static int 677 __bch2_sb_replicas_v0_to_cpu_replicas(struct bch_sb_field_replicas_v0 *sb_r, 678 struct bch_replicas_cpu *cpu_r) 679 { 680 struct bch_replicas_entry_v0 *e; 681 unsigned nr = 0, entry_size = 0, idx = 0; 682 683 for_each_replicas_entry(sb_r, e) { 684 entry_size = max_t(unsigned, entry_size, 685 replicas_entry_bytes(e)); 686 nr++; 687 } 688 689 entry_size += sizeof(struct bch_replicas_entry_v1) - 690 sizeof(struct bch_replicas_entry_v0); 691 692 cpu_r->entries = kcalloc(nr, entry_size, GFP_KERNEL); 693 if (!cpu_r->entries) 694 return -BCH_ERR_ENOMEM_cpu_replicas; 695 696 cpu_r->nr = nr; 697 cpu_r->entry_size = entry_size; 698 699 for_each_replicas_entry(sb_r, e) { 700 struct bch_replicas_entry_v1 *dst = 701 cpu_replicas_entry(cpu_r, idx++); 702 703 dst->data_type = e->data_type; 704 dst->nr_devs = e->nr_devs; 705 dst->nr_required = 1; 706 memcpy(dst->devs, e->devs, e->nr_devs); 707 bch2_replicas_entry_sort(dst); 708 } 709 710 return 0; 711 } 712 713 int bch2_sb_replicas_to_cpu_replicas(struct bch_fs *c) 714 { 715 struct bch_sb_field_replicas *sb_v1; 716 struct bch_sb_field_replicas_v0 *sb_v0; 717 struct bch_replicas_cpu new_r = { 0, 0, NULL }; 718 int ret = 0; 719 720 if ((sb_v1 = bch2_sb_field_get(c->disk_sb.sb, replicas))) 721 ret = __bch2_sb_replicas_to_cpu_replicas(sb_v1, &new_r); 722 else if ((sb_v0 = bch2_sb_field_get(c->disk_sb.sb, replicas_v0))) 723 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_v0, &new_r); 724 if (ret) 725 return ret; 726 727 bch2_cpu_replicas_sort(&new_r); 728 729 percpu_down_write(&c->mark_lock); 730 731 ret = replicas_table_update(c, &new_r); 732 percpu_up_write(&c->mark_lock); 733 734 kfree(new_r.entries); 735 736 return 0; 737 } 738 739 static int bch2_cpu_replicas_to_sb_replicas_v0(struct bch_fs *c, 740 struct bch_replicas_cpu *r) 741 { 742 struct bch_sb_field_replicas_v0 *sb_r; 743 struct bch_replicas_entry_v0 *dst; 744 struct bch_replicas_entry_v1 *src; 745 size_t bytes; 746 747 bytes = sizeof(struct bch_sb_field_replicas); 748 749 for_each_cpu_replicas_entry(r, src) 750 bytes += replicas_entry_bytes(src) - 1; 751 752 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas_v0, 753 DIV_ROUND_UP(bytes, sizeof(u64))); 754 if (!sb_r) 755 return -BCH_ERR_ENOSPC_sb_replicas; 756 757 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas); 758 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas_v0); 759 760 memset(&sb_r->entries, 0, 761 vstruct_end(&sb_r->field) - 762 (void *) &sb_r->entries); 763 764 dst = sb_r->entries; 765 for_each_cpu_replicas_entry(r, src) { 766 dst->data_type = src->data_type; 767 dst->nr_devs = src->nr_devs; 768 memcpy(dst->devs, src->devs, src->nr_devs); 769 770 dst = replicas_entry_next(dst); 771 772 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 773 } 774 775 return 0; 776 } 777 778 static int bch2_cpu_replicas_to_sb_replicas(struct bch_fs *c, 779 struct bch_replicas_cpu *r) 780 { 781 struct bch_sb_field_replicas *sb_r; 782 struct bch_replicas_entry_v1 *dst, *src; 783 bool need_v1 = false; 784 size_t bytes; 785 786 bytes = sizeof(struct bch_sb_field_replicas); 787 788 for_each_cpu_replicas_entry(r, src) { 789 bytes += replicas_entry_bytes(src); 790 if (src->nr_required != 1) 791 need_v1 = true; 792 } 793 794 if (!need_v1) 795 return bch2_cpu_replicas_to_sb_replicas_v0(c, r); 796 797 sb_r = bch2_sb_field_resize(&c->disk_sb, replicas, 798 DIV_ROUND_UP(bytes, sizeof(u64))); 799 if (!sb_r) 800 return -BCH_ERR_ENOSPC_sb_replicas; 801 802 bch2_sb_field_delete(&c->disk_sb, BCH_SB_FIELD_replicas_v0); 803 sb_r = bch2_sb_field_get(c->disk_sb.sb, replicas); 804 805 memset(&sb_r->entries, 0, 806 vstruct_end(&sb_r->field) - 807 (void *) &sb_r->entries); 808 809 dst = sb_r->entries; 810 for_each_cpu_replicas_entry(r, src) { 811 memcpy(dst, src, replicas_entry_bytes(src)); 812 813 dst = replicas_entry_next(dst); 814 815 BUG_ON((void *) dst > vstruct_end(&sb_r->field)); 816 } 817 818 return 0; 819 } 820 821 static int bch2_cpu_replicas_validate(struct bch_replicas_cpu *cpu_r, 822 struct bch_sb *sb, 823 struct printbuf *err) 824 { 825 unsigned i; 826 827 sort_cmp_size(cpu_r->entries, 828 cpu_r->nr, 829 cpu_r->entry_size, 830 bch2_memcmp, NULL); 831 832 for (i = 0; i < cpu_r->nr; i++) { 833 struct bch_replicas_entry_v1 *e = 834 cpu_replicas_entry(cpu_r, i); 835 836 int ret = bch2_replicas_entry_validate(e, sb, err); 837 if (ret) 838 return ret; 839 840 if (i + 1 < cpu_r->nr) { 841 struct bch_replicas_entry_v1 *n = 842 cpu_replicas_entry(cpu_r, i + 1); 843 844 BUG_ON(memcmp(e, n, cpu_r->entry_size) > 0); 845 846 if (!memcmp(e, n, cpu_r->entry_size)) { 847 prt_printf(err, "duplicate replicas entry "); 848 bch2_replicas_entry_to_text(err, e); 849 return -BCH_ERR_invalid_sb_replicas; 850 } 851 } 852 } 853 854 return 0; 855 } 856 857 static int bch2_sb_replicas_validate(struct bch_sb *sb, struct bch_sb_field *f, 858 struct printbuf *err) 859 { 860 struct bch_sb_field_replicas *sb_r = field_to_type(f, replicas); 861 struct bch_replicas_cpu cpu_r; 862 int ret; 863 864 ret = __bch2_sb_replicas_to_cpu_replicas(sb_r, &cpu_r); 865 if (ret) 866 return ret; 867 868 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 869 kfree(cpu_r.entries); 870 return ret; 871 } 872 873 static void bch2_sb_replicas_to_text(struct printbuf *out, 874 struct bch_sb *sb, 875 struct bch_sb_field *f) 876 { 877 struct bch_sb_field_replicas *r = field_to_type(f, replicas); 878 struct bch_replicas_entry_v1 *e; 879 bool first = true; 880 881 for_each_replicas_entry(r, e) { 882 if (!first) 883 prt_printf(out, " "); 884 first = false; 885 886 bch2_replicas_entry_to_text(out, e); 887 } 888 prt_newline(out); 889 } 890 891 const struct bch_sb_field_ops bch_sb_field_ops_replicas = { 892 .validate = bch2_sb_replicas_validate, 893 .to_text = bch2_sb_replicas_to_text, 894 }; 895 896 static int bch2_sb_replicas_v0_validate(struct bch_sb *sb, struct bch_sb_field *f, 897 struct printbuf *err) 898 { 899 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 900 struct bch_replicas_cpu cpu_r; 901 int ret; 902 903 ret = __bch2_sb_replicas_v0_to_cpu_replicas(sb_r, &cpu_r); 904 if (ret) 905 return ret; 906 907 ret = bch2_cpu_replicas_validate(&cpu_r, sb, err); 908 kfree(cpu_r.entries); 909 return ret; 910 } 911 912 static void bch2_sb_replicas_v0_to_text(struct printbuf *out, 913 struct bch_sb *sb, 914 struct bch_sb_field *f) 915 { 916 struct bch_sb_field_replicas_v0 *sb_r = field_to_type(f, replicas_v0); 917 struct bch_replicas_entry_v0 *e; 918 bool first = true; 919 920 for_each_replicas_entry(sb_r, e) { 921 if (!first) 922 prt_printf(out, " "); 923 first = false; 924 925 bch2_replicas_entry_v0_to_text(out, e); 926 } 927 prt_newline(out); 928 } 929 930 const struct bch_sb_field_ops bch_sb_field_ops_replicas_v0 = { 931 .validate = bch2_sb_replicas_v0_validate, 932 .to_text = bch2_sb_replicas_v0_to_text, 933 }; 934 935 /* Query replicas: */ 936 937 bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs, 938 unsigned flags, bool print) 939 { 940 struct bch_replicas_entry_v1 *e; 941 bool ret = true; 942 943 percpu_down_read(&c->mark_lock); 944 for_each_cpu_replicas_entry(&c->replicas, e) { 945 unsigned i, nr_online = 0, nr_failed = 0, dflags = 0; 946 bool metadata = e->data_type < BCH_DATA_user; 947 948 if (e->data_type == BCH_DATA_cached) 949 continue; 950 951 for (i = 0; i < e->nr_devs; i++) { 952 struct bch_dev *ca = bch_dev_bkey_exists(c, e->devs[i]); 953 954 nr_online += test_bit(e->devs[i], devs.d); 955 nr_failed += ca->mi.state == BCH_MEMBER_STATE_failed; 956 } 957 958 if (nr_failed == e->nr_devs) 959 continue; 960 961 if (nr_online < e->nr_required) 962 dflags |= metadata 963 ? BCH_FORCE_IF_METADATA_LOST 964 : BCH_FORCE_IF_DATA_LOST; 965 966 if (nr_online < e->nr_devs) 967 dflags |= metadata 968 ? BCH_FORCE_IF_METADATA_DEGRADED 969 : BCH_FORCE_IF_DATA_DEGRADED; 970 971 if (dflags & ~flags) { 972 if (print) { 973 struct printbuf buf = PRINTBUF; 974 975 bch2_replicas_entry_to_text(&buf, e); 976 bch_err(c, "insufficient devices online (%u) for replicas entry %s", 977 nr_online, buf.buf); 978 printbuf_exit(&buf); 979 } 980 ret = false; 981 break; 982 } 983 984 } 985 percpu_up_read(&c->mark_lock); 986 987 return ret; 988 } 989 990 unsigned bch2_sb_dev_has_data(struct bch_sb *sb, unsigned dev) 991 { 992 struct bch_sb_field_replicas *replicas; 993 struct bch_sb_field_replicas_v0 *replicas_v0; 994 unsigned i, data_has = 0; 995 996 replicas = bch2_sb_field_get(sb, replicas); 997 replicas_v0 = bch2_sb_field_get(sb, replicas_v0); 998 999 if (replicas) { 1000 struct bch_replicas_entry_v1 *r; 1001 1002 for_each_replicas_entry(replicas, r) 1003 for (i = 0; i < r->nr_devs; i++) 1004 if (r->devs[i] == dev) 1005 data_has |= 1 << r->data_type; 1006 } else if (replicas_v0) { 1007 struct bch_replicas_entry_v0 *r; 1008 1009 for_each_replicas_entry_v0(replicas_v0, r) 1010 for (i = 0; i < r->nr_devs; i++) 1011 if (r->devs[i] == dev) 1012 data_has |= 1 << r->data_type; 1013 } 1014 1015 1016 return data_has; 1017 } 1018 1019 unsigned bch2_dev_has_data(struct bch_fs *c, struct bch_dev *ca) 1020 { 1021 unsigned ret; 1022 1023 mutex_lock(&c->sb_lock); 1024 ret = bch2_sb_dev_has_data(c->disk_sb.sb, ca->dev_idx); 1025 mutex_unlock(&c->sb_lock); 1026 1027 return ret; 1028 } 1029 1030 void bch2_fs_replicas_exit(struct bch_fs *c) 1031 { 1032 unsigned i; 1033 1034 kfree(c->usage_scratch); 1035 for (i = 0; i < ARRAY_SIZE(c->usage); i++) 1036 free_percpu(c->usage[i]); 1037 kfree(c->usage_base); 1038 kfree(c->replicas.entries); 1039 kfree(c->replicas_gc.entries); 1040 1041 mempool_exit(&c->replicas_delta_pool); 1042 } 1043 1044 int bch2_fs_replicas_init(struct bch_fs *c) 1045 { 1046 bch2_journal_entry_res_resize(&c->journal, 1047 &c->replicas_journal_res, 1048 reserve_journal_replicas(c, &c->replicas)); 1049 1050 return mempool_init_kmalloc_pool(&c->replicas_delta_pool, 1, 1051 REPLICAS_DELTA_LIST_MAX) ?: 1052 replicas_table_update(c, &c->replicas); 1053 } 1054