1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Code for manipulating bucket marks for garbage collection. 4 * 5 * Copyright 2014 Datera, Inc. 6 */ 7 8 #include "bcachefs.h" 9 #include "alloc_background.h" 10 #include "backpointers.h" 11 #include "bset.h" 12 #include "btree_gc.h" 13 #include "btree_update.h" 14 #include "buckets.h" 15 #include "buckets_waiting_for_journal.h" 16 #include "disk_accounting.h" 17 #include "ec.h" 18 #include "error.h" 19 #include "inode.h" 20 #include "movinggc.h" 21 #include "recovery.h" 22 #include "reflink.h" 23 #include "replicas.h" 24 #include "subvolume.h" 25 #include "trace.h" 26 27 #include <linux/preempt.h> 28 29 void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) 30 { 31 memset(usage, 0, sizeof(*usage)); 32 acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s()); 33 } 34 35 static u64 reserve_factor(u64 r) 36 { 37 return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR); 38 } 39 40 static struct bch_fs_usage_short 41 __bch2_fs_usage_read_short(struct bch_fs *c) 42 { 43 struct bch_fs_usage_short ret; 44 u64 data, reserved; 45 46 ret.capacity = c->capacity - 47 percpu_u64_get(&c->usage->hidden); 48 49 data = percpu_u64_get(&c->usage->data) + 50 percpu_u64_get(&c->usage->btree); 51 reserved = percpu_u64_get(&c->usage->reserved) + 52 percpu_u64_get(c->online_reserved); 53 54 ret.used = min(ret.capacity, data + reserve_factor(reserved)); 55 ret.free = ret.capacity - ret.used; 56 57 ret.nr_inodes = percpu_u64_get(&c->usage->nr_inodes); 58 59 return ret; 60 } 61 62 struct bch_fs_usage_short 63 bch2_fs_usage_read_short(struct bch_fs *c) 64 { 65 struct bch_fs_usage_short ret; 66 67 percpu_down_read(&c->mark_lock); 68 ret = __bch2_fs_usage_read_short(c); 69 percpu_up_read(&c->mark_lock); 70 71 return ret; 72 } 73 74 void bch2_dev_usage_to_text(struct printbuf *out, 75 struct bch_dev *ca, 76 struct bch_dev_usage *usage) 77 { 78 prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n"); 79 80 for (unsigned i = 0; i < BCH_DATA_NR; i++) { 81 bch2_prt_data_type(out, i); 82 prt_printf(out, "\t%llu\r%llu\r%llu\r\n", 83 usage->d[i].buckets, 84 usage->d[i].sectors, 85 usage->d[i].fragmented); 86 } 87 88 prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets); 89 } 90 91 static int bch2_check_fix_ptr(struct btree_trans *trans, 92 struct bkey_s_c k, 93 struct extent_ptr_decoded p, 94 const union bch_extent_entry *entry, 95 bool *do_update) 96 { 97 struct bch_fs *c = trans->c; 98 struct printbuf buf = PRINTBUF; 99 int ret = 0; 100 101 struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); 102 if (!ca) { 103 if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID, 104 trans, ptr_to_invalid_device, 105 "pointer to missing device %u\n" 106 "while marking %s", 107 p.ptr.dev, 108 (printbuf_reset(&buf), 109 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 110 *do_update = true; 111 return 0; 112 } 113 114 struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); 115 if (!g) { 116 if (fsck_err(trans, ptr_to_invalid_device, 117 "pointer to invalid bucket on device %u\n" 118 "while marking %s", 119 p.ptr.dev, 120 (printbuf_reset(&buf), 121 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 122 *do_update = true; 123 goto out; 124 } 125 126 enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); 127 128 if (fsck_err_on(!g->gen_valid, 129 trans, ptr_to_missing_alloc_key, 130 "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" 131 "while marking %s", 132 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), 133 bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), 134 p.ptr.gen, 135 (printbuf_reset(&buf), 136 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 137 if (!p.ptr.cached) { 138 g->gen_valid = true; 139 g->gen = p.ptr.gen; 140 } else { 141 *do_update = true; 142 } 143 } 144 145 if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, 146 trans, ptr_gen_newer_than_bucket_gen, 147 "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" 148 "while marking %s", 149 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), 150 bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), 151 p.ptr.gen, g->gen, 152 (printbuf_reset(&buf), 153 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 154 if (!p.ptr.cached && 155 (g->data_type != BCH_DATA_btree || 156 data_type == BCH_DATA_btree)) { 157 g->gen_valid = true; 158 g->gen = p.ptr.gen; 159 g->data_type = 0; 160 g->stripe_sectors = 0; 161 g->dirty_sectors = 0; 162 g->cached_sectors = 0; 163 } else { 164 *do_update = true; 165 } 166 } 167 168 if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, 169 trans, ptr_gen_newer_than_bucket_gen, 170 "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" 171 "while marking %s", 172 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, 173 bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), 174 p.ptr.gen, 175 (printbuf_reset(&buf), 176 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 177 *do_update = true; 178 179 if (fsck_err_on(!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0, 180 trans, stale_dirty_ptr, 181 "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" 182 "while marking %s", 183 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), 184 bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), 185 p.ptr.gen, g->gen, 186 (printbuf_reset(&buf), 187 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 188 *do_update = true; 189 190 if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) 191 goto out; 192 193 if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type), 194 trans, ptr_bucket_data_type_mismatch, 195 "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" 196 "while marking %s", 197 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, 198 bch2_data_type_str(g->data_type), 199 bch2_data_type_str(data_type), 200 (printbuf_reset(&buf), 201 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 202 if (data_type == BCH_DATA_btree) { 203 g->gen_valid = true; 204 g->gen = p.ptr.gen; 205 g->data_type = data_type; 206 g->stripe_sectors = 0; 207 g->dirty_sectors = 0; 208 g->cached_sectors = 0; 209 } else { 210 *do_update = true; 211 } 212 } 213 214 if (p.has_ec) { 215 struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx); 216 217 if (fsck_err_on(!m || !m->alive, 218 trans, ptr_to_missing_stripe, 219 "pointer to nonexistent stripe %llu\n" 220 "while marking %s", 221 (u64) p.ec.idx, 222 (printbuf_reset(&buf), 223 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 224 *do_update = true; 225 226 if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), 227 trans, ptr_to_incorrect_stripe, 228 "pointer does not match stripe %llu\n" 229 "while marking %s", 230 (u64) p.ec.idx, 231 (printbuf_reset(&buf), 232 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 233 *do_update = true; 234 } 235 out: 236 fsck_err: 237 bch2_dev_put(ca); 238 printbuf_exit(&buf); 239 return ret; 240 } 241 242 int bch2_check_fix_ptrs(struct btree_trans *trans, 243 enum btree_id btree, unsigned level, struct bkey_s_c k, 244 enum btree_iter_update_trigger_flags flags) 245 { 246 struct bch_fs *c = trans->c; 247 struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(k); 248 const union bch_extent_entry *entry_c; 249 struct extent_ptr_decoded p = { 0 }; 250 bool do_update = false; 251 struct printbuf buf = PRINTBUF; 252 int ret = 0; 253 254 percpu_down_read(&c->mark_lock); 255 256 bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { 257 ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update); 258 if (ret) 259 goto err; 260 } 261 262 if (do_update) { 263 if (flags & BTREE_TRIGGER_is_root) { 264 bch_err(c, "cannot update btree roots yet"); 265 ret = -EINVAL; 266 goto err; 267 } 268 269 struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); 270 ret = PTR_ERR_OR_ZERO(new); 271 if (ret) 272 goto err; 273 274 rcu_read_lock(); 275 bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_rcu(c, ptr->dev)); 276 rcu_read_unlock(); 277 278 if (level) { 279 /* 280 * We don't want to drop btree node pointers - if the 281 * btree node isn't there anymore, the read path will 282 * sort it out: 283 */ 284 struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); 285 rcu_read_lock(); 286 bkey_for_each_ptr(ptrs, ptr) { 287 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); 288 struct bucket *g = PTR_GC_BUCKET(ca, ptr); 289 290 ptr->gen = g->gen; 291 } 292 rcu_read_unlock(); 293 } else { 294 struct bkey_ptrs ptrs; 295 union bch_extent_entry *entry; 296 297 rcu_read_lock(); 298 restart_drop_ptrs: 299 ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); 300 bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) { 301 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 302 struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); 303 enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry); 304 305 if ((p.ptr.cached && 306 (!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) || 307 (!p.ptr.cached && 308 gen_cmp(p.ptr.gen, g->gen) < 0) || 309 gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX || 310 (g->data_type && 311 g->data_type != data_type)) { 312 bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr); 313 goto restart_drop_ptrs; 314 } 315 } 316 rcu_read_unlock(); 317 again: 318 ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); 319 bkey_extent_entry_for_each(ptrs, entry) { 320 if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) { 321 struct gc_stripe *m = genradix_ptr(&c->gc_stripes, 322 entry->stripe_ptr.idx); 323 union bch_extent_entry *next_ptr; 324 325 bkey_extent_entry_for_each_from(ptrs, next_ptr, entry) 326 if (extent_entry_type(next_ptr) == BCH_EXTENT_ENTRY_ptr) 327 goto found; 328 next_ptr = NULL; 329 found: 330 if (!next_ptr) { 331 bch_err(c, "aieee, found stripe ptr with no data ptr"); 332 continue; 333 } 334 335 if (!m || !m->alive || 336 !__bch2_ptr_matches_stripe(&m->ptrs[entry->stripe_ptr.block], 337 &next_ptr->ptr, 338 m->sectors)) { 339 bch2_bkey_extent_entry_drop(new, entry); 340 goto again; 341 } 342 } 343 } 344 } 345 346 if (0) { 347 printbuf_reset(&buf); 348 bch2_bkey_val_to_text(&buf, c, k); 349 bch_info(c, "updated %s", buf.buf); 350 351 printbuf_reset(&buf); 352 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new)); 353 bch_info(c, "new key %s", buf.buf); 354 } 355 356 percpu_up_read(&c->mark_lock); 357 struct btree_iter iter; 358 bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level, 359 BTREE_ITER_intent|BTREE_ITER_all_snapshots); 360 ret = bch2_btree_iter_traverse(&iter) ?: 361 bch2_trans_update(trans, &iter, new, 362 BTREE_UPDATE_internal_snapshot_node| 363 BTREE_TRIGGER_norun); 364 bch2_trans_iter_exit(trans, &iter); 365 percpu_down_read(&c->mark_lock); 366 367 if (ret) 368 goto err; 369 370 if (level) 371 bch2_btree_node_update_key_early(trans, btree, level - 1, k, new); 372 } 373 err: 374 percpu_up_read(&c->mark_lock); 375 printbuf_exit(&buf); 376 return ret; 377 } 378 379 int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, 380 struct bkey_s_c k, 381 const struct bch_extent_ptr *ptr, 382 s64 sectors, enum bch_data_type ptr_data_type, 383 u8 b_gen, u8 bucket_data_type, 384 u32 *bucket_sectors) 385 { 386 struct bch_fs *c = trans->c; 387 size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); 388 struct printbuf buf = PRINTBUF; 389 bool inserting = sectors > 0; 390 int ret = 0; 391 392 BUG_ON(!sectors); 393 394 if (gen_after(ptr->gen, b_gen)) { 395 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 396 ptr_gen_newer_than_bucket_gen, 397 "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" 398 "while marking %s", 399 ptr->dev, bucket_nr, b_gen, 400 bch2_data_type_str(bucket_data_type ?: ptr_data_type), 401 ptr->gen, 402 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 403 if (inserting) 404 goto err; 405 goto out; 406 } 407 408 if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { 409 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 410 ptr_too_stale, 411 "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" 412 "while marking %s", 413 ptr->dev, bucket_nr, b_gen, 414 bch2_data_type_str(bucket_data_type ?: ptr_data_type), 415 ptr->gen, 416 (printbuf_reset(&buf), 417 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 418 if (inserting) 419 goto err; 420 goto out; 421 } 422 423 if (b_gen != ptr->gen && ptr->cached) { 424 ret = 1; 425 goto out; 426 } 427 428 if (b_gen != ptr->gen) { 429 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 430 stale_dirty_ptr, 431 "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" 432 "while marking %s", 433 ptr->dev, bucket_nr, b_gen, 434 bucket_gen_get(ca, bucket_nr), 435 bch2_data_type_str(bucket_data_type ?: ptr_data_type), 436 ptr->gen, 437 (printbuf_reset(&buf), 438 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 439 if (inserting) 440 goto err; 441 goto out; 442 } 443 444 if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) { 445 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 446 ptr_bucket_data_type_mismatch, 447 "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" 448 "while marking %s", 449 ptr->dev, bucket_nr, b_gen, 450 bch2_data_type_str(bucket_data_type), 451 bch2_data_type_str(ptr_data_type), 452 (printbuf_reset(&buf), 453 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 454 if (inserting) 455 goto err; 456 goto out; 457 } 458 459 if ((u64) *bucket_sectors + sectors > U32_MAX) { 460 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 461 bucket_sector_count_overflow, 462 "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" 463 "while marking %s", 464 ptr->dev, bucket_nr, b_gen, 465 bch2_data_type_str(bucket_data_type ?: ptr_data_type), 466 *bucket_sectors, sectors, 467 (printbuf_reset(&buf), 468 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 469 if (inserting) 470 goto err; 471 sectors = -*bucket_sectors; 472 } 473 474 *bucket_sectors += sectors; 475 out: 476 printbuf_exit(&buf); 477 return ret; 478 err: 479 bch2_dump_trans_updates(trans); 480 ret = -EIO; 481 goto out; 482 } 483 484 void bch2_trans_account_disk_usage_change(struct btree_trans *trans) 485 { 486 struct bch_fs *c = trans->c; 487 u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; 488 static int warned_disk_usage = 0; 489 bool warn = false; 490 491 percpu_down_read(&c->mark_lock); 492 struct bch_fs_usage_base *src = &trans->fs_usage_delta; 493 494 s64 added = src->btree + src->data + src->reserved; 495 496 /* 497 * Not allowed to reduce sectors_available except by getting a 498 * reservation: 499 */ 500 s64 should_not_have_added = added - (s64) disk_res_sectors; 501 if (unlikely(should_not_have_added > 0)) { 502 u64 old, new; 503 504 old = atomic64_read(&c->sectors_available); 505 do { 506 new = max_t(s64, 0, old - should_not_have_added); 507 } while (!atomic64_try_cmpxchg(&c->sectors_available, 508 &old, new)); 509 510 added -= should_not_have_added; 511 warn = true; 512 } 513 514 if (added > 0) { 515 trans->disk_res->sectors -= added; 516 this_cpu_sub(*c->online_reserved, added); 517 } 518 519 preempt_disable(); 520 struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); 521 acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); 522 preempt_enable(); 523 percpu_up_read(&c->mark_lock); 524 525 if (unlikely(warn) && !xchg(&warned_disk_usage, 1)) 526 bch2_trans_inconsistent(trans, 527 "disk usage increased %lli more than %llu sectors reserved)", 528 should_not_have_added, disk_res_sectors); 529 } 530 531 /* KEY_TYPE_extent: */ 532 533 static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca, 534 struct bkey_s_c k, 535 const struct extent_ptr_decoded *p, 536 s64 sectors, enum bch_data_type ptr_data_type, 537 struct bch_alloc_v4 *a) 538 { 539 u32 *dst_sectors = p->has_ec ? &a->stripe_sectors : 540 !p->ptr.cached ? &a->dirty_sectors : 541 &a->cached_sectors; 542 int ret = bch2_bucket_ref_update(trans, ca, k, &p->ptr, sectors, ptr_data_type, 543 a->gen, a->data_type, dst_sectors); 544 545 if (ret) 546 return ret; 547 548 alloc_data_type_set(a, ptr_data_type); 549 return 0; 550 } 551 552 static int bch2_trigger_pointer(struct btree_trans *trans, 553 enum btree_id btree_id, unsigned level, 554 struct bkey_s_c k, struct extent_ptr_decoded p, 555 const union bch_extent_entry *entry, 556 s64 *sectors, 557 enum btree_iter_update_trigger_flags flags) 558 { 559 bool insert = !(flags & BTREE_TRIGGER_overwrite); 560 struct printbuf buf = PRINTBUF; 561 int ret = 0; 562 563 struct bch_fs *c = trans->c; 564 struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); 565 if (unlikely(!ca)) { 566 if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID) 567 ret = -EIO; 568 goto err; 569 } 570 571 struct bpos bucket; 572 struct bch_backpointer bp; 573 bch2_extent_ptr_to_bp(trans->c, ca, btree_id, level, k, p, entry, &bucket, &bp); 574 *sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len); 575 576 if (flags & BTREE_TRIGGER_transactional) { 577 struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); 578 ret = PTR_ERR_OR_ZERO(a) ?: 579 __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &a->v); 580 if (ret) 581 goto err; 582 583 if (!p.ptr.cached) { 584 ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, k, insert); 585 if (ret) 586 goto err; 587 } 588 } 589 590 if (flags & BTREE_TRIGGER_gc) { 591 percpu_down_read(&c->mark_lock); 592 struct bucket *g = gc_bucket(ca, bucket.offset); 593 if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", 594 p.ptr.dev, 595 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 596 ret = -EIO; 597 goto err_unlock; 598 } 599 600 bucket_lock(g); 601 struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; 602 ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &new); 603 alloc_to_bucket(g, new); 604 bucket_unlock(g); 605 err_unlock: 606 percpu_up_read(&c->mark_lock); 607 608 if (!ret) 609 ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); 610 } 611 err: 612 bch2_dev_put(ca); 613 printbuf_exit(&buf); 614 return ret; 615 } 616 617 static int bch2_trigger_stripe_ptr(struct btree_trans *trans, 618 struct bkey_s_c k, 619 struct extent_ptr_decoded p, 620 enum bch_data_type data_type, 621 s64 sectors, 622 enum btree_iter_update_trigger_flags flags) 623 { 624 if (flags & BTREE_TRIGGER_transactional) { 625 struct btree_iter iter; 626 struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter, 627 BTREE_ID_stripes, POS(0, p.ec.idx), 628 BTREE_ITER_with_updates, stripe); 629 int ret = PTR_ERR_OR_ZERO(s); 630 if (unlikely(ret)) { 631 bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans, 632 "pointer to nonexistent stripe %llu", 633 (u64) p.ec.idx); 634 goto err; 635 } 636 637 if (!bch2_ptr_matches_stripe(&s->v, p)) { 638 bch2_trans_inconsistent(trans, 639 "stripe pointer doesn't match stripe %llu", 640 (u64) p.ec.idx); 641 ret = -EIO; 642 goto err; 643 } 644 645 stripe_blockcount_set(&s->v, p.ec.block, 646 stripe_blockcount_get(&s->v, p.ec.block) + 647 sectors); 648 649 struct disk_accounting_pos acc = { 650 .type = BCH_DISK_ACCOUNTING_replicas, 651 }; 652 bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); 653 acc.replicas.data_type = data_type; 654 ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); 655 err: 656 bch2_trans_iter_exit(trans, &iter); 657 return ret; 658 } 659 660 if (flags & BTREE_TRIGGER_gc) { 661 struct bch_fs *c = trans->c; 662 663 struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL); 664 if (!m) { 665 bch_err(c, "error allocating memory for gc_stripes, idx %llu", 666 (u64) p.ec.idx); 667 return -BCH_ERR_ENOMEM_mark_stripe_ptr; 668 } 669 670 mutex_lock(&c->ec_stripes_heap_lock); 671 672 if (!m || !m->alive) { 673 mutex_unlock(&c->ec_stripes_heap_lock); 674 struct printbuf buf = PRINTBUF; 675 bch2_bkey_val_to_text(&buf, c, k); 676 bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s", 677 (u64) p.ec.idx, buf.buf); 678 printbuf_exit(&buf); 679 bch2_inconsistent_error(c); 680 return -EIO; 681 } 682 683 m->block_sectors[p.ec.block] += sectors; 684 685 struct disk_accounting_pos acc = { 686 .type = BCH_DISK_ACCOUNTING_replicas, 687 }; 688 memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e)); 689 mutex_unlock(&c->ec_stripes_heap_lock); 690 691 acc.replicas.data_type = data_type; 692 int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, true); 693 if (ret) 694 return ret; 695 } 696 697 return 0; 698 } 699 700 static int __trigger_extent(struct btree_trans *trans, 701 enum btree_id btree_id, unsigned level, 702 struct bkey_s_c k, 703 enum btree_iter_update_trigger_flags flags, 704 s64 *replicas_sectors) 705 { 706 bool gc = flags & BTREE_TRIGGER_gc; 707 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 708 const union bch_extent_entry *entry; 709 struct extent_ptr_decoded p; 710 enum bch_data_type data_type = bkey_is_btree_ptr(k.k) 711 ? BCH_DATA_btree 712 : BCH_DATA_user; 713 int ret = 0; 714 715 struct disk_accounting_pos acc_replicas_key = { 716 .type = BCH_DISK_ACCOUNTING_replicas, 717 .replicas.data_type = data_type, 718 .replicas.nr_devs = 0, 719 .replicas.nr_required = 1, 720 }; 721 722 struct disk_accounting_pos acct_compression_key = { 723 .type = BCH_DISK_ACCOUNTING_compression, 724 }; 725 u64 compression_acct[3] = { 1, 0, 0 }; 726 727 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 728 s64 disk_sectors = 0; 729 ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags); 730 if (ret < 0) 731 return ret; 732 733 bool stale = ret > 0; 734 735 if (p.ptr.cached && stale) 736 continue; 737 738 if (p.ptr.cached) { 739 ret = bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors, gc); 740 if (ret) 741 return ret; 742 } else if (!p.has_ec) { 743 *replicas_sectors += disk_sectors; 744 acc_replicas_key.replicas.devs[acc_replicas_key.replicas.nr_devs++] = p.ptr.dev; 745 } else { 746 ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); 747 if (ret) 748 return ret; 749 750 /* 751 * There may be other dirty pointers in this extent, but 752 * if so they're not required for mounting if we have an 753 * erasure coded pointer in this extent: 754 */ 755 acc_replicas_key.replicas.nr_required = 0; 756 } 757 758 if (acct_compression_key.compression.type && 759 acct_compression_key.compression.type != p.crc.compression_type) { 760 if (flags & BTREE_TRIGGER_overwrite) 761 bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct)); 762 763 ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct, 764 ARRAY_SIZE(compression_acct), gc); 765 if (ret) 766 return ret; 767 768 compression_acct[0] = 1; 769 compression_acct[1] = 0; 770 compression_acct[2] = 0; 771 } 772 773 acct_compression_key.compression.type = p.crc.compression_type; 774 if (p.crc.compression_type) { 775 compression_acct[1] += p.crc.uncompressed_size; 776 compression_acct[2] += p.crc.compressed_size; 777 } 778 } 779 780 if (acc_replicas_key.replicas.nr_devs) { 781 ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc); 782 if (ret) 783 return ret; 784 } 785 786 if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) { 787 struct disk_accounting_pos acc_snapshot_key = { 788 .type = BCH_DISK_ACCOUNTING_snapshot, 789 .snapshot.id = k.k->p.snapshot, 790 }; 791 ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc); 792 if (ret) 793 return ret; 794 } 795 796 if (acct_compression_key.compression.type) { 797 if (flags & BTREE_TRIGGER_overwrite) 798 bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct)); 799 800 ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct, 801 ARRAY_SIZE(compression_acct), gc); 802 if (ret) 803 return ret; 804 } 805 806 if (level) { 807 struct disk_accounting_pos acc_btree_key = { 808 .type = BCH_DISK_ACCOUNTING_btree, 809 .btree.id = btree_id, 810 }; 811 ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc); 812 if (ret) 813 return ret; 814 } else { 815 bool insert = !(flags & BTREE_TRIGGER_overwrite); 816 struct disk_accounting_pos acc_inum_key = { 817 .type = BCH_DISK_ACCOUNTING_inum, 818 .inum.inum = k.k->p.inode, 819 }; 820 s64 v[3] = { 821 insert ? 1 : -1, 822 insert ? k.k->size : -((s64) k.k->size), 823 *replicas_sectors, 824 }; 825 ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); 826 if (ret) 827 return ret; 828 } 829 830 return 0; 831 } 832 833 int bch2_trigger_extent(struct btree_trans *trans, 834 enum btree_id btree, unsigned level, 835 struct bkey_s_c old, struct bkey_s new, 836 enum btree_iter_update_trigger_flags flags) 837 { 838 struct bch_fs *c = trans->c; 839 struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c); 840 struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old); 841 unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start; 842 unsigned old_ptrs_bytes = (void *) old_ptrs.end - (void *) old_ptrs.start; 843 844 if (unlikely(flags & BTREE_TRIGGER_check_repair)) 845 return bch2_check_fix_ptrs(trans, btree, level, new.s_c, flags); 846 847 /* if pointers aren't changing - nothing to do: */ 848 if (new_ptrs_bytes == old_ptrs_bytes && 849 !memcmp(new_ptrs.start, 850 old_ptrs.start, 851 new_ptrs_bytes)) 852 return 0; 853 854 if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { 855 s64 old_replicas_sectors = 0, new_replicas_sectors = 0; 856 857 if (old.k->type) { 858 int ret = __trigger_extent(trans, btree, level, old, 859 flags & ~BTREE_TRIGGER_insert, 860 &old_replicas_sectors); 861 if (ret) 862 return ret; 863 } 864 865 if (new.k->type) { 866 int ret = __trigger_extent(trans, btree, level, new.s_c, 867 flags & ~BTREE_TRIGGER_overwrite, 868 &new_replicas_sectors); 869 if (ret) 870 return ret; 871 } 872 873 int need_rebalance_delta = 0; 874 s64 need_rebalance_sectors_delta = 0; 875 876 s64 s = bch2_bkey_sectors_need_rebalance(c, old); 877 need_rebalance_delta -= s != 0; 878 need_rebalance_sectors_delta -= s; 879 880 s = bch2_bkey_sectors_need_rebalance(c, new.s_c); 881 need_rebalance_delta += s != 0; 882 need_rebalance_sectors_delta += s; 883 884 if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) { 885 int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, 886 new.k->p, need_rebalance_delta > 0); 887 if (ret) 888 return ret; 889 } 890 891 if (need_rebalance_sectors_delta) { 892 struct disk_accounting_pos acc = { 893 .type = BCH_DISK_ACCOUNTING_rebalance_work, 894 }; 895 int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1, 896 flags & BTREE_TRIGGER_gc); 897 if (ret) 898 return ret; 899 } 900 } 901 902 return 0; 903 } 904 905 /* KEY_TYPE_reservation */ 906 907 static int __trigger_reservation(struct btree_trans *trans, 908 enum btree_id btree_id, unsigned level, struct bkey_s_c k, 909 enum btree_iter_update_trigger_flags flags) 910 { 911 if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { 912 s64 sectors = k.k->size; 913 914 if (flags & BTREE_TRIGGER_overwrite) 915 sectors = -sectors; 916 917 struct disk_accounting_pos acc = { 918 .type = BCH_DISK_ACCOUNTING_persistent_reserved, 919 .persistent_reserved.nr_replicas = bkey_s_c_to_reservation(k).v->nr_replicas, 920 }; 921 922 return bch2_disk_accounting_mod(trans, &acc, §ors, 1, flags & BTREE_TRIGGER_gc); 923 } 924 925 return 0; 926 } 927 928 int bch2_trigger_reservation(struct btree_trans *trans, 929 enum btree_id btree_id, unsigned level, 930 struct bkey_s_c old, struct bkey_s new, 931 enum btree_iter_update_trigger_flags flags) 932 { 933 return trigger_run_overwrite_then_insert(__trigger_reservation, trans, btree_id, level, old, new, flags); 934 } 935 936 /* Mark superblocks: */ 937 938 static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, 939 struct bch_dev *ca, u64 b, 940 enum bch_data_type type, 941 unsigned sectors) 942 { 943 struct btree_iter iter; 944 int ret = 0; 945 946 struct bkey_i_alloc_v4 *a = 947 bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(ca->dev_idx, b)); 948 if (IS_ERR(a)) 949 return PTR_ERR(a); 950 951 if (a->v.data_type && type && a->v.data_type != type) { 952 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 953 bucket_metadata_type_mismatch, 954 "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" 955 "while marking %s", 956 iter.pos.inode, iter.pos.offset, a->v.gen, 957 bch2_data_type_str(a->v.data_type), 958 bch2_data_type_str(type), 959 bch2_data_type_str(type)); 960 ret = -EIO; 961 goto err; 962 } 963 964 if (a->v.data_type != type || 965 a->v.dirty_sectors != sectors) { 966 a->v.data_type = type; 967 a->v.dirty_sectors = sectors; 968 ret = bch2_trans_update(trans, &iter, &a->k_i, 0); 969 } 970 err: 971 bch2_trans_iter_exit(trans, &iter); 972 return ret; 973 } 974 975 static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca, 976 u64 b, enum bch_data_type data_type, unsigned sectors, 977 enum btree_iter_update_trigger_flags flags) 978 { 979 struct bch_fs *c = trans->c; 980 int ret = 0; 981 982 percpu_down_read(&c->mark_lock); 983 struct bucket *g = gc_bucket(ca, b); 984 if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s", 985 ca->dev_idx, bch2_data_type_str(data_type))) 986 goto err_unlock; 987 988 bucket_lock(g); 989 struct bch_alloc_v4 old = bucket_m_to_alloc(*g); 990 991 if (bch2_fs_inconsistent_on(g->data_type && 992 g->data_type != data_type, c, 993 "different types of data in same bucket: %s, %s", 994 bch2_data_type_str(g->data_type), 995 bch2_data_type_str(data_type))) 996 goto err; 997 998 if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c, 999 "bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size", 1000 ca->dev_idx, b, g->gen, 1001 bch2_data_type_str(g->data_type ?: data_type), 1002 g->dirty_sectors, sectors)) 1003 goto err; 1004 1005 g->data_type = data_type; 1006 g->dirty_sectors += sectors; 1007 struct bch_alloc_v4 new = bucket_m_to_alloc(*g); 1008 bucket_unlock(g); 1009 percpu_up_read(&c->mark_lock); 1010 ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); 1011 return ret; 1012 err: 1013 bucket_unlock(g); 1014 err_unlock: 1015 percpu_up_read(&c->mark_lock); 1016 return -EIO; 1017 } 1018 1019 int bch2_trans_mark_metadata_bucket(struct btree_trans *trans, 1020 struct bch_dev *ca, u64 b, 1021 enum bch_data_type type, unsigned sectors, 1022 enum btree_iter_update_trigger_flags flags) 1023 { 1024 BUG_ON(type != BCH_DATA_free && 1025 type != BCH_DATA_sb && 1026 type != BCH_DATA_journal); 1027 1028 /* 1029 * Backup superblock might be past the end of our normal usable space: 1030 */ 1031 if (b >= ca->mi.nbuckets) 1032 return 0; 1033 1034 if (flags & BTREE_TRIGGER_gc) 1035 return bch2_mark_metadata_bucket(trans, ca, b, type, sectors, flags); 1036 else if (flags & BTREE_TRIGGER_transactional) 1037 return commit_do(trans, NULL, NULL, 0, 1038 __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors)); 1039 else 1040 BUG(); 1041 } 1042 1043 static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, 1044 struct bch_dev *ca, u64 start, u64 end, 1045 enum bch_data_type type, u64 *bucket, unsigned *bucket_sectors, 1046 enum btree_iter_update_trigger_flags flags) 1047 { 1048 do { 1049 u64 b = sector_to_bucket(ca, start); 1050 unsigned sectors = 1051 min_t(u64, bucket_to_sector(ca, b + 1), end) - start; 1052 1053 if (b != *bucket && *bucket_sectors) { 1054 int ret = bch2_trans_mark_metadata_bucket(trans, ca, *bucket, 1055 type, *bucket_sectors, flags); 1056 if (ret) 1057 return ret; 1058 1059 *bucket_sectors = 0; 1060 } 1061 1062 *bucket = b; 1063 *bucket_sectors += sectors; 1064 start += sectors; 1065 } while (start < end); 1066 1067 return 0; 1068 } 1069 1070 static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca, 1071 enum btree_iter_update_trigger_flags flags) 1072 { 1073 struct bch_fs *c = trans->c; 1074 1075 mutex_lock(&c->sb_lock); 1076 struct bch_sb_layout layout = ca->disk_sb.sb->layout; 1077 mutex_unlock(&c->sb_lock); 1078 1079 u64 bucket = 0; 1080 unsigned i, bucket_sectors = 0; 1081 int ret; 1082 1083 for (i = 0; i < layout.nr_superblocks; i++) { 1084 u64 offset = le64_to_cpu(layout.sb_offset[i]); 1085 1086 if (offset == BCH_SB_SECTOR) { 1087 ret = bch2_trans_mark_metadata_sectors(trans, ca, 1088 0, BCH_SB_SECTOR, 1089 BCH_DATA_sb, &bucket, &bucket_sectors, flags); 1090 if (ret) 1091 return ret; 1092 } 1093 1094 ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, 1095 offset + (1 << layout.sb_max_size_bits), 1096 BCH_DATA_sb, &bucket, &bucket_sectors, flags); 1097 if (ret) 1098 return ret; 1099 } 1100 1101 if (bucket_sectors) { 1102 ret = bch2_trans_mark_metadata_bucket(trans, ca, 1103 bucket, BCH_DATA_sb, bucket_sectors, flags); 1104 if (ret) 1105 return ret; 1106 } 1107 1108 for (i = 0; i < ca->journal.nr; i++) { 1109 ret = bch2_trans_mark_metadata_bucket(trans, ca, 1110 ca->journal.buckets[i], 1111 BCH_DATA_journal, ca->mi.bucket_size, flags); 1112 if (ret) 1113 return ret; 1114 } 1115 1116 return 0; 1117 } 1118 1119 int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca, 1120 enum btree_iter_update_trigger_flags flags) 1121 { 1122 int ret = bch2_trans_run(c, 1123 __bch2_trans_mark_dev_sb(trans, ca, flags)); 1124 bch_err_fn(c, ret); 1125 return ret; 1126 } 1127 1128 int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c, 1129 enum btree_iter_update_trigger_flags flags) 1130 { 1131 for_each_online_member(c, ca) { 1132 int ret = bch2_trans_mark_dev_sb(c, ca, flags); 1133 if (ret) { 1134 percpu_ref_put(&ca->io_ref); 1135 return ret; 1136 } 1137 } 1138 1139 return 0; 1140 } 1141 1142 int bch2_trans_mark_dev_sbs(struct bch_fs *c) 1143 { 1144 return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_transactional); 1145 } 1146 1147 /* Disk reservations: */ 1148 1149 #define SECTORS_CACHE 1024 1150 1151 int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, 1152 u64 sectors, int flags) 1153 { 1154 struct bch_fs_pcpu *pcpu; 1155 u64 old, get; 1156 s64 sectors_available; 1157 int ret; 1158 1159 percpu_down_read(&c->mark_lock); 1160 preempt_disable(); 1161 pcpu = this_cpu_ptr(c->pcpu); 1162 1163 if (sectors <= pcpu->sectors_available) 1164 goto out; 1165 1166 old = atomic64_read(&c->sectors_available); 1167 do { 1168 get = min((u64) sectors + SECTORS_CACHE, old); 1169 1170 if (get < sectors) { 1171 preempt_enable(); 1172 goto recalculate; 1173 } 1174 } while (!atomic64_try_cmpxchg(&c->sectors_available, 1175 &old, old - get)); 1176 1177 pcpu->sectors_available += get; 1178 1179 out: 1180 pcpu->sectors_available -= sectors; 1181 this_cpu_add(*c->online_reserved, sectors); 1182 res->sectors += sectors; 1183 1184 preempt_enable(); 1185 percpu_up_read(&c->mark_lock); 1186 return 0; 1187 1188 recalculate: 1189 mutex_lock(&c->sectors_available_lock); 1190 1191 percpu_u64_set(&c->pcpu->sectors_available, 0); 1192 sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); 1193 1194 if (sectors <= sectors_available || 1195 (flags & BCH_DISK_RESERVATION_NOFAIL)) { 1196 atomic64_set(&c->sectors_available, 1197 max_t(s64, 0, sectors_available - sectors)); 1198 this_cpu_add(*c->online_reserved, sectors); 1199 res->sectors += sectors; 1200 ret = 0; 1201 } else { 1202 atomic64_set(&c->sectors_available, sectors_available); 1203 ret = -BCH_ERR_ENOSPC_disk_reservation; 1204 } 1205 1206 mutex_unlock(&c->sectors_available_lock); 1207 percpu_up_read(&c->mark_lock); 1208 1209 return ret; 1210 } 1211 1212 /* Startup/shutdown: */ 1213 1214 void bch2_buckets_nouse_free(struct bch_fs *c) 1215 { 1216 for_each_member_device(c, ca) { 1217 kvfree_rcu_mightsleep(ca->buckets_nouse); 1218 ca->buckets_nouse = NULL; 1219 } 1220 } 1221 1222 int bch2_buckets_nouse_alloc(struct bch_fs *c) 1223 { 1224 for_each_member_device(c, ca) { 1225 BUG_ON(ca->buckets_nouse); 1226 1227 ca->buckets_nouse = kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) * 1228 sizeof(unsigned long), 1229 GFP_KERNEL|__GFP_ZERO); 1230 if (!ca->buckets_nouse) { 1231 bch2_dev_put(ca); 1232 return -BCH_ERR_ENOMEM_buckets_nouse; 1233 } 1234 } 1235 1236 return 0; 1237 } 1238 1239 static void bucket_gens_free_rcu(struct rcu_head *rcu) 1240 { 1241 struct bucket_gens *buckets = 1242 container_of(rcu, struct bucket_gens, rcu); 1243 1244 kvfree(buckets); 1245 } 1246 1247 int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) 1248 { 1249 struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL; 1250 bool resize = ca->bucket_gens != NULL; 1251 int ret; 1252 1253 BUG_ON(resize && ca->buckets_nouse); 1254 1255 if (!(bucket_gens = kvmalloc(sizeof(struct bucket_gens) + nbuckets, 1256 GFP_KERNEL|__GFP_ZERO))) { 1257 ret = -BCH_ERR_ENOMEM_bucket_gens; 1258 goto err; 1259 } 1260 1261 bucket_gens->first_bucket = ca->mi.first_bucket; 1262 bucket_gens->nbuckets = nbuckets; 1263 bucket_gens->nbuckets_minus_first = 1264 bucket_gens->nbuckets - bucket_gens->first_bucket; 1265 1266 if (resize) { 1267 down_write(&ca->bucket_lock); 1268 percpu_down_write(&c->mark_lock); 1269 } 1270 1271 old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); 1272 1273 if (resize) { 1274 size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets); 1275 1276 memcpy(bucket_gens->b, 1277 old_bucket_gens->b, 1278 n); 1279 } 1280 1281 rcu_assign_pointer(ca->bucket_gens, bucket_gens); 1282 bucket_gens = old_bucket_gens; 1283 1284 nbuckets = ca->mi.nbuckets; 1285 1286 if (resize) { 1287 percpu_up_write(&c->mark_lock); 1288 up_write(&ca->bucket_lock); 1289 } 1290 1291 ret = 0; 1292 err: 1293 if (bucket_gens) 1294 call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu); 1295 1296 return ret; 1297 } 1298 1299 void bch2_dev_buckets_free(struct bch_dev *ca) 1300 { 1301 kvfree(ca->buckets_nouse); 1302 kvfree(rcu_dereference_protected(ca->bucket_gens, 1)); 1303 free_percpu(ca->usage); 1304 } 1305 1306 int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) 1307 { 1308 ca->usage = alloc_percpu(struct bch_dev_usage); 1309 if (!ca->usage) 1310 return -BCH_ERR_ENOMEM_usage_init; 1311 1312 return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets); 1313 } 1314