1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Code for manipulating bucket marks for garbage collection. 4 * 5 * Copyright 2014 Datera, Inc. 6 */ 7 8 #include "bcachefs.h" 9 #include "alloc_background.h" 10 #include "backpointers.h" 11 #include "bset.h" 12 #include "btree_gc.h" 13 #include "btree_update.h" 14 #include "buckets.h" 15 #include "buckets_waiting_for_journal.h" 16 #include "disk_accounting.h" 17 #include "ec.h" 18 #include "error.h" 19 #include "inode.h" 20 #include "movinggc.h" 21 #include "recovery.h" 22 #include "reflink.h" 23 #include "replicas.h" 24 #include "subvolume.h" 25 #include "trace.h" 26 27 #include <linux/preempt.h> 28 29 void bch2_dev_usage_read_fast(struct bch_dev *ca, struct bch_dev_usage *usage) 30 { 31 memset(usage, 0, sizeof(*usage)); 32 acc_u64s_percpu((u64 *) usage, (u64 __percpu *) ca->usage, dev_usage_u64s()); 33 } 34 35 static u64 reserve_factor(u64 r) 36 { 37 return r + (round_up(r, (1 << RESERVE_FACTOR)) >> RESERVE_FACTOR); 38 } 39 40 static struct bch_fs_usage_short 41 __bch2_fs_usage_read_short(struct bch_fs *c) 42 { 43 struct bch_fs_usage_short ret; 44 u64 data, reserved; 45 46 ret.capacity = c->capacity - 47 percpu_u64_get(&c->usage->hidden); 48 49 data = percpu_u64_get(&c->usage->data) + 50 percpu_u64_get(&c->usage->btree); 51 reserved = percpu_u64_get(&c->usage->reserved) + 52 percpu_u64_get(c->online_reserved); 53 54 ret.used = min(ret.capacity, data + reserve_factor(reserved)); 55 ret.free = ret.capacity - ret.used; 56 57 ret.nr_inodes = percpu_u64_get(&c->usage->nr_inodes); 58 59 return ret; 60 } 61 62 struct bch_fs_usage_short 63 bch2_fs_usage_read_short(struct bch_fs *c) 64 { 65 struct bch_fs_usage_short ret; 66 67 percpu_down_read(&c->mark_lock); 68 ret = __bch2_fs_usage_read_short(c); 69 percpu_up_read(&c->mark_lock); 70 71 return ret; 72 } 73 74 void bch2_dev_usage_to_text(struct printbuf *out, 75 struct bch_dev *ca, 76 struct bch_dev_usage *usage) 77 { 78 prt_printf(out, "\tbuckets\rsectors\rfragmented\r\n"); 79 80 for (unsigned i = 0; i < BCH_DATA_NR; i++) { 81 bch2_prt_data_type(out, i); 82 prt_printf(out, "\t%llu\r%llu\r%llu\r\n", 83 usage->d[i].buckets, 84 usage->d[i].sectors, 85 usage->d[i].fragmented); 86 } 87 88 prt_printf(out, "capacity\t%llu\r\n", ca->mi.nbuckets); 89 } 90 91 static int bch2_check_fix_ptr(struct btree_trans *trans, 92 struct bkey_s_c k, 93 struct extent_ptr_decoded p, 94 const union bch_extent_entry *entry, 95 bool *do_update) 96 { 97 struct bch_fs *c = trans->c; 98 struct printbuf buf = PRINTBUF; 99 int ret = 0; 100 101 struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); 102 if (!ca) { 103 if (fsck_err(trans, ptr_to_invalid_device, 104 "pointer to missing device %u\n" 105 "while marking %s", 106 p.ptr.dev, 107 (printbuf_reset(&buf), 108 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 109 *do_update = true; 110 return 0; 111 } 112 113 struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); 114 if (!g) { 115 if (fsck_err(trans, ptr_to_invalid_device, 116 "pointer to invalid bucket on device %u\n" 117 "while marking %s", 118 p.ptr.dev, 119 (printbuf_reset(&buf), 120 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 121 *do_update = true; 122 goto out; 123 } 124 125 enum bch_data_type data_type = bch2_bkey_ptr_data_type(k, p, entry); 126 127 if (fsck_err_on(!g->gen_valid, 128 trans, ptr_to_missing_alloc_key, 129 "bucket %u:%zu data type %s ptr gen %u missing in alloc btree\n" 130 "while marking %s", 131 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), 132 bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), 133 p.ptr.gen, 134 (printbuf_reset(&buf), 135 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 136 if (!p.ptr.cached) { 137 g->gen_valid = true; 138 g->gen = p.ptr.gen; 139 } else { 140 *do_update = true; 141 } 142 } 143 144 if (fsck_err_on(gen_cmp(p.ptr.gen, g->gen) > 0, 145 trans, ptr_gen_newer_than_bucket_gen, 146 "bucket %u:%zu data type %s ptr gen in the future: %u > %u\n" 147 "while marking %s", 148 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), 149 bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), 150 p.ptr.gen, g->gen, 151 (printbuf_reset(&buf), 152 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 153 if (!p.ptr.cached && 154 (g->data_type != BCH_DATA_btree || 155 data_type == BCH_DATA_btree)) { 156 g->gen_valid = true; 157 g->gen = p.ptr.gen; 158 g->data_type = 0; 159 g->stripe_sectors = 0; 160 g->dirty_sectors = 0; 161 g->cached_sectors = 0; 162 } else { 163 *do_update = true; 164 } 165 } 166 167 if (fsck_err_on(gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX, 168 trans, ptr_gen_newer_than_bucket_gen, 169 "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" 170 "while marking %s", 171 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, 172 bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), 173 p.ptr.gen, 174 (printbuf_reset(&buf), 175 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 176 *do_update = true; 177 178 if (fsck_err_on(!p.ptr.cached && gen_cmp(p.ptr.gen, g->gen) < 0, 179 trans, stale_dirty_ptr, 180 "bucket %u:%zu data type %s stale dirty ptr: %u < %u\n" 181 "while marking %s", 182 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), 183 bch2_data_type_str(ptr_data_type(k.k, &p.ptr)), 184 p.ptr.gen, g->gen, 185 (printbuf_reset(&buf), 186 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 187 *do_update = true; 188 189 if (data_type != BCH_DATA_btree && p.ptr.gen != g->gen) 190 goto out; 191 192 if (fsck_err_on(bucket_data_type_mismatch(g->data_type, data_type), 193 trans, ptr_bucket_data_type_mismatch, 194 "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" 195 "while marking %s", 196 p.ptr.dev, PTR_BUCKET_NR(ca, &p.ptr), g->gen, 197 bch2_data_type_str(g->data_type), 198 bch2_data_type_str(data_type), 199 (printbuf_reset(&buf), 200 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 201 if (data_type == BCH_DATA_btree) { 202 g->gen_valid = true; 203 g->gen = p.ptr.gen; 204 g->data_type = data_type; 205 g->stripe_sectors = 0; 206 g->dirty_sectors = 0; 207 g->cached_sectors = 0; 208 } else { 209 *do_update = true; 210 } 211 } 212 213 if (p.has_ec) { 214 struct gc_stripe *m = genradix_ptr(&c->gc_stripes, p.ec.idx); 215 216 if (fsck_err_on(!m || !m->alive, 217 trans, ptr_to_missing_stripe, 218 "pointer to nonexistent stripe %llu\n" 219 "while marking %s", 220 (u64) p.ec.idx, 221 (printbuf_reset(&buf), 222 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 223 *do_update = true; 224 225 if (fsck_err_on(m && m->alive && !bch2_ptr_matches_stripe_m(m, p), 226 trans, ptr_to_incorrect_stripe, 227 "pointer does not match stripe %llu\n" 228 "while marking %s", 229 (u64) p.ec.idx, 230 (printbuf_reset(&buf), 231 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 232 *do_update = true; 233 } 234 out: 235 fsck_err: 236 bch2_dev_put(ca); 237 printbuf_exit(&buf); 238 return ret; 239 } 240 241 int bch2_check_fix_ptrs(struct btree_trans *trans, 242 enum btree_id btree, unsigned level, struct bkey_s_c k, 243 enum btree_iter_update_trigger_flags flags) 244 { 245 struct bch_fs *c = trans->c; 246 struct bkey_ptrs_c ptrs_c = bch2_bkey_ptrs_c(k); 247 const union bch_extent_entry *entry_c; 248 struct extent_ptr_decoded p = { 0 }; 249 bool do_update = false; 250 struct printbuf buf = PRINTBUF; 251 int ret = 0; 252 253 percpu_down_read(&c->mark_lock); 254 255 bkey_for_each_ptr_decode(k.k, ptrs_c, p, entry_c) { 256 ret = bch2_check_fix_ptr(trans, k, p, entry_c, &do_update); 257 if (ret) 258 goto err; 259 } 260 261 if (do_update) { 262 if (flags & BTREE_TRIGGER_is_root) { 263 bch_err(c, "cannot update btree roots yet"); 264 ret = -EINVAL; 265 goto err; 266 } 267 268 struct bkey_i *new = bch2_bkey_make_mut_noupdate(trans, k); 269 ret = PTR_ERR_OR_ZERO(new); 270 if (ret) 271 goto err; 272 273 rcu_read_lock(); 274 bch2_bkey_drop_ptrs(bkey_i_to_s(new), ptr, !bch2_dev_rcu(c, ptr->dev)); 275 rcu_read_unlock(); 276 277 if (level) { 278 /* 279 * We don't want to drop btree node pointers - if the 280 * btree node isn't there anymore, the read path will 281 * sort it out: 282 */ 283 struct bkey_ptrs ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); 284 rcu_read_lock(); 285 bkey_for_each_ptr(ptrs, ptr) { 286 struct bch_dev *ca = bch2_dev_rcu(c, ptr->dev); 287 struct bucket *g = PTR_GC_BUCKET(ca, ptr); 288 289 ptr->gen = g->gen; 290 } 291 rcu_read_unlock(); 292 } else { 293 struct bkey_ptrs ptrs; 294 union bch_extent_entry *entry; 295 296 rcu_read_lock(); 297 restart_drop_ptrs: 298 ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); 299 bkey_for_each_ptr_decode(bkey_i_to_s(new).k, ptrs, p, entry) { 300 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 301 struct bucket *g = PTR_GC_BUCKET(ca, &p.ptr); 302 enum bch_data_type data_type = bch2_bkey_ptr_data_type(bkey_i_to_s_c(new), p, entry); 303 304 if ((p.ptr.cached && 305 (!g->gen_valid || gen_cmp(p.ptr.gen, g->gen) > 0)) || 306 (!p.ptr.cached && 307 gen_cmp(p.ptr.gen, g->gen) < 0) || 308 gen_cmp(g->gen, p.ptr.gen) > BUCKET_GC_GEN_MAX || 309 (g->data_type && 310 g->data_type != data_type)) { 311 bch2_bkey_drop_ptr(bkey_i_to_s(new), &entry->ptr); 312 goto restart_drop_ptrs; 313 } 314 } 315 rcu_read_unlock(); 316 again: 317 ptrs = bch2_bkey_ptrs(bkey_i_to_s(new)); 318 bkey_extent_entry_for_each(ptrs, entry) { 319 if (extent_entry_type(entry) == BCH_EXTENT_ENTRY_stripe_ptr) { 320 struct gc_stripe *m = genradix_ptr(&c->gc_stripes, 321 entry->stripe_ptr.idx); 322 union bch_extent_entry *next_ptr; 323 324 bkey_extent_entry_for_each_from(ptrs, next_ptr, entry) 325 if (extent_entry_type(next_ptr) == BCH_EXTENT_ENTRY_ptr) 326 goto found; 327 next_ptr = NULL; 328 found: 329 if (!next_ptr) { 330 bch_err(c, "aieee, found stripe ptr with no data ptr"); 331 continue; 332 } 333 334 if (!m || !m->alive || 335 !__bch2_ptr_matches_stripe(&m->ptrs[entry->stripe_ptr.block], 336 &next_ptr->ptr, 337 m->sectors)) { 338 bch2_bkey_extent_entry_drop(new, entry); 339 goto again; 340 } 341 } 342 } 343 } 344 345 if (0) { 346 printbuf_reset(&buf); 347 bch2_bkey_val_to_text(&buf, c, k); 348 bch_info(c, "updated %s", buf.buf); 349 350 printbuf_reset(&buf); 351 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(new)); 352 bch_info(c, "new key %s", buf.buf); 353 } 354 355 percpu_up_read(&c->mark_lock); 356 struct btree_iter iter; 357 bch2_trans_node_iter_init(trans, &iter, btree, new->k.p, 0, level, 358 BTREE_ITER_intent|BTREE_ITER_all_snapshots); 359 ret = bch2_btree_iter_traverse(&iter) ?: 360 bch2_trans_update(trans, &iter, new, 361 BTREE_UPDATE_internal_snapshot_node| 362 BTREE_TRIGGER_norun); 363 bch2_trans_iter_exit(trans, &iter); 364 percpu_down_read(&c->mark_lock); 365 366 if (ret) 367 goto err; 368 369 if (level) 370 bch2_btree_node_update_key_early(trans, btree, level - 1, k, new); 371 } 372 err: 373 percpu_up_read(&c->mark_lock); 374 printbuf_exit(&buf); 375 return ret; 376 } 377 378 int bch2_bucket_ref_update(struct btree_trans *trans, struct bch_dev *ca, 379 struct bkey_s_c k, 380 const struct bch_extent_ptr *ptr, 381 s64 sectors, enum bch_data_type ptr_data_type, 382 u8 b_gen, u8 bucket_data_type, 383 u32 *bucket_sectors) 384 { 385 struct bch_fs *c = trans->c; 386 size_t bucket_nr = PTR_BUCKET_NR(ca, ptr); 387 struct printbuf buf = PRINTBUF; 388 bool inserting = sectors > 0; 389 int ret = 0; 390 391 BUG_ON(!sectors); 392 393 if (gen_after(ptr->gen, b_gen)) { 394 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 395 ptr_gen_newer_than_bucket_gen, 396 "bucket %u:%zu gen %u data type %s: ptr gen %u newer than bucket gen\n" 397 "while marking %s", 398 ptr->dev, bucket_nr, b_gen, 399 bch2_data_type_str(bucket_data_type ?: ptr_data_type), 400 ptr->gen, 401 (bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 402 if (inserting) 403 goto err; 404 goto out; 405 } 406 407 if (gen_cmp(b_gen, ptr->gen) > BUCKET_GC_GEN_MAX) { 408 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 409 ptr_too_stale, 410 "bucket %u:%zu gen %u data type %s: ptr gen %u too stale\n" 411 "while marking %s", 412 ptr->dev, bucket_nr, b_gen, 413 bch2_data_type_str(bucket_data_type ?: ptr_data_type), 414 ptr->gen, 415 (printbuf_reset(&buf), 416 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 417 if (inserting) 418 goto err; 419 goto out; 420 } 421 422 if (b_gen != ptr->gen && ptr->cached) { 423 ret = 1; 424 goto out; 425 } 426 427 if (b_gen != ptr->gen) { 428 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 429 stale_dirty_ptr, 430 "bucket %u:%zu gen %u (mem gen %u) data type %s: stale dirty ptr (gen %u)\n" 431 "while marking %s", 432 ptr->dev, bucket_nr, b_gen, 433 bucket_gen_get(ca, bucket_nr), 434 bch2_data_type_str(bucket_data_type ?: ptr_data_type), 435 ptr->gen, 436 (printbuf_reset(&buf), 437 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 438 if (inserting) 439 goto err; 440 goto out; 441 } 442 443 if (bucket_data_type_mismatch(bucket_data_type, ptr_data_type)) { 444 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 445 ptr_bucket_data_type_mismatch, 446 "bucket %u:%zu gen %u different types of data in same bucket: %s, %s\n" 447 "while marking %s", 448 ptr->dev, bucket_nr, b_gen, 449 bch2_data_type_str(bucket_data_type), 450 bch2_data_type_str(ptr_data_type), 451 (printbuf_reset(&buf), 452 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 453 if (inserting) 454 goto err; 455 goto out; 456 } 457 458 if ((u64) *bucket_sectors + sectors > U32_MAX) { 459 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 460 bucket_sector_count_overflow, 461 "bucket %u:%zu gen %u data type %s sector count overflow: %u + %lli > U32_MAX\n" 462 "while marking %s", 463 ptr->dev, bucket_nr, b_gen, 464 bch2_data_type_str(bucket_data_type ?: ptr_data_type), 465 *bucket_sectors, sectors, 466 (printbuf_reset(&buf), 467 bch2_bkey_val_to_text(&buf, c, k), buf.buf)); 468 if (inserting) 469 goto err; 470 sectors = -*bucket_sectors; 471 } 472 473 *bucket_sectors += sectors; 474 out: 475 printbuf_exit(&buf); 476 return ret; 477 err: 478 bch2_dump_trans_updates(trans); 479 ret = -EIO; 480 goto out; 481 } 482 483 void bch2_trans_account_disk_usage_change(struct btree_trans *trans) 484 { 485 struct bch_fs *c = trans->c; 486 u64 disk_res_sectors = trans->disk_res ? trans->disk_res->sectors : 0; 487 static int warned_disk_usage = 0; 488 bool warn = false; 489 490 percpu_down_read(&c->mark_lock); 491 struct bch_fs_usage_base *src = &trans->fs_usage_delta; 492 493 s64 added = src->btree + src->data + src->reserved; 494 495 /* 496 * Not allowed to reduce sectors_available except by getting a 497 * reservation: 498 */ 499 s64 should_not_have_added = added - (s64) disk_res_sectors; 500 if (unlikely(should_not_have_added > 0)) { 501 u64 old, new; 502 503 old = atomic64_read(&c->sectors_available); 504 do { 505 new = max_t(s64, 0, old - should_not_have_added); 506 } while (!atomic64_try_cmpxchg(&c->sectors_available, 507 &old, new)); 508 509 added -= should_not_have_added; 510 warn = true; 511 } 512 513 if (added > 0) { 514 trans->disk_res->sectors -= added; 515 this_cpu_sub(*c->online_reserved, added); 516 } 517 518 preempt_disable(); 519 struct bch_fs_usage_base *dst = this_cpu_ptr(c->usage); 520 acc_u64s((u64 *) dst, (u64 *) src, sizeof(*src) / sizeof(u64)); 521 preempt_enable(); 522 percpu_up_read(&c->mark_lock); 523 524 if (unlikely(warn) && !xchg(&warned_disk_usage, 1)) 525 bch2_trans_inconsistent(trans, 526 "disk usage increased %lli more than %llu sectors reserved)", 527 should_not_have_added, disk_res_sectors); 528 } 529 530 /* KEY_TYPE_extent: */ 531 532 static int __mark_pointer(struct btree_trans *trans, struct bch_dev *ca, 533 struct bkey_s_c k, 534 const struct extent_ptr_decoded *p, 535 s64 sectors, enum bch_data_type ptr_data_type, 536 struct bch_alloc_v4 *a) 537 { 538 u32 *dst_sectors = p->has_ec ? &a->stripe_sectors : 539 !p->ptr.cached ? &a->dirty_sectors : 540 &a->cached_sectors; 541 int ret = bch2_bucket_ref_update(trans, ca, k, &p->ptr, sectors, ptr_data_type, 542 a->gen, a->data_type, dst_sectors); 543 544 if (ret) 545 return ret; 546 547 alloc_data_type_set(a, ptr_data_type); 548 return 0; 549 } 550 551 static int bch2_trigger_pointer(struct btree_trans *trans, 552 enum btree_id btree_id, unsigned level, 553 struct bkey_s_c k, struct extent_ptr_decoded p, 554 const union bch_extent_entry *entry, 555 s64 *sectors, 556 enum btree_iter_update_trigger_flags flags) 557 { 558 bool insert = !(flags & BTREE_TRIGGER_overwrite); 559 struct printbuf buf = PRINTBUF; 560 int ret = 0; 561 562 struct bch_fs *c = trans->c; 563 struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev); 564 if (unlikely(!ca)) { 565 if (insert) 566 ret = -EIO; 567 goto err; 568 } 569 570 struct bpos bucket; 571 struct bch_backpointer bp; 572 bch2_extent_ptr_to_bp(trans->c, ca, btree_id, level, k, p, entry, &bucket, &bp); 573 *sectors = insert ? bp.bucket_len : -((s64) bp.bucket_len); 574 575 if (flags & BTREE_TRIGGER_transactional) { 576 struct bkey_i_alloc_v4 *a = bch2_trans_start_alloc_update(trans, bucket, 0); 577 ret = PTR_ERR_OR_ZERO(a) ?: 578 __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &a->v); 579 if (ret) 580 goto err; 581 582 if (!p.ptr.cached) { 583 ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, k, insert); 584 if (ret) 585 goto err; 586 } 587 } 588 589 if (flags & BTREE_TRIGGER_gc) { 590 percpu_down_read(&c->mark_lock); 591 struct bucket *g = gc_bucket(ca, bucket.offset); 592 if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u\n %s", 593 p.ptr.dev, 594 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 595 ret = -EIO; 596 goto err_unlock; 597 } 598 599 bucket_lock(g); 600 struct bch_alloc_v4 old = bucket_m_to_alloc(*g), new = old; 601 ret = __mark_pointer(trans, ca, k, &p, *sectors, bp.data_type, &new); 602 alloc_to_bucket(g, new); 603 bucket_unlock(g); 604 err_unlock: 605 percpu_up_read(&c->mark_lock); 606 607 if (!ret) 608 ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); 609 } 610 err: 611 bch2_dev_put(ca); 612 printbuf_exit(&buf); 613 return ret; 614 } 615 616 static int bch2_trigger_stripe_ptr(struct btree_trans *trans, 617 struct bkey_s_c k, 618 struct extent_ptr_decoded p, 619 enum bch_data_type data_type, 620 s64 sectors, 621 enum btree_iter_update_trigger_flags flags) 622 { 623 if (flags & BTREE_TRIGGER_transactional) { 624 struct btree_iter iter; 625 struct bkey_i_stripe *s = bch2_bkey_get_mut_typed(trans, &iter, 626 BTREE_ID_stripes, POS(0, p.ec.idx), 627 BTREE_ITER_with_updates, stripe); 628 int ret = PTR_ERR_OR_ZERO(s); 629 if (unlikely(ret)) { 630 bch2_trans_inconsistent_on(bch2_err_matches(ret, ENOENT), trans, 631 "pointer to nonexistent stripe %llu", 632 (u64) p.ec.idx); 633 goto err; 634 } 635 636 if (!bch2_ptr_matches_stripe(&s->v, p)) { 637 bch2_trans_inconsistent(trans, 638 "stripe pointer doesn't match stripe %llu", 639 (u64) p.ec.idx); 640 ret = -EIO; 641 goto err; 642 } 643 644 stripe_blockcount_set(&s->v, p.ec.block, 645 stripe_blockcount_get(&s->v, p.ec.block) + 646 sectors); 647 648 struct disk_accounting_pos acc = { 649 .type = BCH_DISK_ACCOUNTING_replicas, 650 }; 651 bch2_bkey_to_replicas(&acc.replicas, bkey_i_to_s_c(&s->k_i)); 652 acc.replicas.data_type = data_type; 653 ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, false); 654 err: 655 bch2_trans_iter_exit(trans, &iter); 656 return ret; 657 } 658 659 if (flags & BTREE_TRIGGER_gc) { 660 struct bch_fs *c = trans->c; 661 662 struct gc_stripe *m = genradix_ptr_alloc(&c->gc_stripes, p.ec.idx, GFP_KERNEL); 663 if (!m) { 664 bch_err(c, "error allocating memory for gc_stripes, idx %llu", 665 (u64) p.ec.idx); 666 return -BCH_ERR_ENOMEM_mark_stripe_ptr; 667 } 668 669 mutex_lock(&c->ec_stripes_heap_lock); 670 671 if (!m || !m->alive) { 672 mutex_unlock(&c->ec_stripes_heap_lock); 673 struct printbuf buf = PRINTBUF; 674 bch2_bkey_val_to_text(&buf, c, k); 675 bch_err_ratelimited(c, "pointer to nonexistent stripe %llu\n while marking %s", 676 (u64) p.ec.idx, buf.buf); 677 printbuf_exit(&buf); 678 bch2_inconsistent_error(c); 679 return -EIO; 680 } 681 682 m->block_sectors[p.ec.block] += sectors; 683 684 struct disk_accounting_pos acc = { 685 .type = BCH_DISK_ACCOUNTING_replicas, 686 }; 687 memcpy(&acc.replicas, &m->r.e, replicas_entry_bytes(&m->r.e)); 688 mutex_unlock(&c->ec_stripes_heap_lock); 689 690 acc.replicas.data_type = data_type; 691 int ret = bch2_disk_accounting_mod(trans, &acc, §ors, 1, true); 692 if (ret) 693 return ret; 694 } 695 696 return 0; 697 } 698 699 static int __trigger_extent(struct btree_trans *trans, 700 enum btree_id btree_id, unsigned level, 701 struct bkey_s_c k, 702 enum btree_iter_update_trigger_flags flags, 703 s64 *replicas_sectors) 704 { 705 bool gc = flags & BTREE_TRIGGER_gc; 706 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 707 const union bch_extent_entry *entry; 708 struct extent_ptr_decoded p; 709 enum bch_data_type data_type = bkey_is_btree_ptr(k.k) 710 ? BCH_DATA_btree 711 : BCH_DATA_user; 712 int ret = 0; 713 714 struct disk_accounting_pos acc_replicas_key = { 715 .type = BCH_DISK_ACCOUNTING_replicas, 716 .replicas.data_type = data_type, 717 .replicas.nr_devs = 0, 718 .replicas.nr_required = 1, 719 }; 720 721 struct disk_accounting_pos acct_compression_key = { 722 .type = BCH_DISK_ACCOUNTING_compression, 723 }; 724 u64 compression_acct[3] = { 1, 0, 0 }; 725 726 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 727 s64 disk_sectors = 0; 728 ret = bch2_trigger_pointer(trans, btree_id, level, k, p, entry, &disk_sectors, flags); 729 if (ret < 0) 730 return ret; 731 732 bool stale = ret > 0; 733 734 if (p.ptr.cached && stale) 735 continue; 736 737 if (p.ptr.cached) { 738 ret = bch2_mod_dev_cached_sectors(trans, p.ptr.dev, disk_sectors, gc); 739 if (ret) 740 return ret; 741 } else if (!p.has_ec) { 742 *replicas_sectors += disk_sectors; 743 acc_replicas_key.replicas.devs[acc_replicas_key.replicas.nr_devs++] = p.ptr.dev; 744 } else { 745 ret = bch2_trigger_stripe_ptr(trans, k, p, data_type, disk_sectors, flags); 746 if (ret) 747 return ret; 748 749 /* 750 * There may be other dirty pointers in this extent, but 751 * if so they're not required for mounting if we have an 752 * erasure coded pointer in this extent: 753 */ 754 acc_replicas_key.replicas.nr_required = 0; 755 } 756 757 if (acct_compression_key.compression.type && 758 acct_compression_key.compression.type != p.crc.compression_type) { 759 if (flags & BTREE_TRIGGER_overwrite) 760 bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct)); 761 762 ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct, 763 ARRAY_SIZE(compression_acct), gc); 764 if (ret) 765 return ret; 766 767 compression_acct[0] = 1; 768 compression_acct[1] = 0; 769 compression_acct[2] = 0; 770 } 771 772 acct_compression_key.compression.type = p.crc.compression_type; 773 if (p.crc.compression_type) { 774 compression_acct[1] += p.crc.uncompressed_size; 775 compression_acct[2] += p.crc.compressed_size; 776 } 777 } 778 779 if (acc_replicas_key.replicas.nr_devs) { 780 ret = bch2_disk_accounting_mod(trans, &acc_replicas_key, replicas_sectors, 1, gc); 781 if (ret) 782 return ret; 783 } 784 785 if (acc_replicas_key.replicas.nr_devs && !level && k.k->p.snapshot) { 786 struct disk_accounting_pos acc_snapshot_key = { 787 .type = BCH_DISK_ACCOUNTING_snapshot, 788 .snapshot.id = k.k->p.snapshot, 789 }; 790 ret = bch2_disk_accounting_mod(trans, &acc_snapshot_key, replicas_sectors, 1, gc); 791 if (ret) 792 return ret; 793 } 794 795 if (acct_compression_key.compression.type) { 796 if (flags & BTREE_TRIGGER_overwrite) 797 bch2_u64s_neg(compression_acct, ARRAY_SIZE(compression_acct)); 798 799 ret = bch2_disk_accounting_mod(trans, &acct_compression_key, compression_acct, 800 ARRAY_SIZE(compression_acct), gc); 801 if (ret) 802 return ret; 803 } 804 805 if (level) { 806 struct disk_accounting_pos acc_btree_key = { 807 .type = BCH_DISK_ACCOUNTING_btree, 808 .btree.id = btree_id, 809 }; 810 ret = bch2_disk_accounting_mod(trans, &acc_btree_key, replicas_sectors, 1, gc); 811 if (ret) 812 return ret; 813 } else { 814 bool insert = !(flags & BTREE_TRIGGER_overwrite); 815 struct disk_accounting_pos acc_inum_key = { 816 .type = BCH_DISK_ACCOUNTING_inum, 817 .inum.inum = k.k->p.inode, 818 }; 819 s64 v[3] = { 820 insert ? 1 : -1, 821 insert ? k.k->size : -((s64) k.k->size), 822 *replicas_sectors, 823 }; 824 ret = bch2_disk_accounting_mod(trans, &acc_inum_key, v, ARRAY_SIZE(v), gc); 825 if (ret) 826 return ret; 827 } 828 829 return 0; 830 } 831 832 int bch2_trigger_extent(struct btree_trans *trans, 833 enum btree_id btree, unsigned level, 834 struct bkey_s_c old, struct bkey_s new, 835 enum btree_iter_update_trigger_flags flags) 836 { 837 struct bch_fs *c = trans->c; 838 struct bkey_ptrs_c new_ptrs = bch2_bkey_ptrs_c(new.s_c); 839 struct bkey_ptrs_c old_ptrs = bch2_bkey_ptrs_c(old); 840 unsigned new_ptrs_bytes = (void *) new_ptrs.end - (void *) new_ptrs.start; 841 unsigned old_ptrs_bytes = (void *) old_ptrs.end - (void *) old_ptrs.start; 842 843 if (unlikely(flags & BTREE_TRIGGER_check_repair)) 844 return bch2_check_fix_ptrs(trans, btree, level, new.s_c, flags); 845 846 /* if pointers aren't changing - nothing to do: */ 847 if (new_ptrs_bytes == old_ptrs_bytes && 848 !memcmp(new_ptrs.start, 849 old_ptrs.start, 850 new_ptrs_bytes)) 851 return 0; 852 853 if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { 854 s64 old_replicas_sectors = 0, new_replicas_sectors = 0; 855 856 if (old.k->type) { 857 int ret = __trigger_extent(trans, btree, level, old, 858 flags & ~BTREE_TRIGGER_insert, 859 &old_replicas_sectors); 860 if (ret) 861 return ret; 862 } 863 864 if (new.k->type) { 865 int ret = __trigger_extent(trans, btree, level, new.s_c, 866 flags & ~BTREE_TRIGGER_overwrite, 867 &new_replicas_sectors); 868 if (ret) 869 return ret; 870 } 871 872 int need_rebalance_delta = 0; 873 s64 need_rebalance_sectors_delta = 0; 874 875 s64 s = bch2_bkey_sectors_need_rebalance(c, old); 876 need_rebalance_delta -= s != 0; 877 need_rebalance_sectors_delta -= s; 878 879 s = bch2_bkey_sectors_need_rebalance(c, new.s_c); 880 need_rebalance_delta += s != 0; 881 need_rebalance_sectors_delta += s; 882 883 if ((flags & BTREE_TRIGGER_transactional) && need_rebalance_delta) { 884 int ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_rebalance_work, 885 new.k->p, need_rebalance_delta > 0); 886 if (ret) 887 return ret; 888 } 889 890 if (need_rebalance_sectors_delta) { 891 struct disk_accounting_pos acc = { 892 .type = BCH_DISK_ACCOUNTING_rebalance_work, 893 }; 894 int ret = bch2_disk_accounting_mod(trans, &acc, &need_rebalance_sectors_delta, 1, 895 flags & BTREE_TRIGGER_gc); 896 if (ret) 897 return ret; 898 } 899 } 900 901 return 0; 902 } 903 904 /* KEY_TYPE_reservation */ 905 906 static int __trigger_reservation(struct btree_trans *trans, 907 enum btree_id btree_id, unsigned level, struct bkey_s_c k, 908 enum btree_iter_update_trigger_flags flags) 909 { 910 if (flags & (BTREE_TRIGGER_transactional|BTREE_TRIGGER_gc)) { 911 s64 sectors = k.k->size; 912 913 if (flags & BTREE_TRIGGER_overwrite) 914 sectors = -sectors; 915 916 struct disk_accounting_pos acc = { 917 .type = BCH_DISK_ACCOUNTING_persistent_reserved, 918 .persistent_reserved.nr_replicas = bkey_s_c_to_reservation(k).v->nr_replicas, 919 }; 920 921 return bch2_disk_accounting_mod(trans, &acc, §ors, 1, flags & BTREE_TRIGGER_gc); 922 } 923 924 return 0; 925 } 926 927 int bch2_trigger_reservation(struct btree_trans *trans, 928 enum btree_id btree_id, unsigned level, 929 struct bkey_s_c old, struct bkey_s new, 930 enum btree_iter_update_trigger_flags flags) 931 { 932 return trigger_run_overwrite_then_insert(__trigger_reservation, trans, btree_id, level, old, new, flags); 933 } 934 935 /* Mark superblocks: */ 936 937 static int __bch2_trans_mark_metadata_bucket(struct btree_trans *trans, 938 struct bch_dev *ca, u64 b, 939 enum bch_data_type type, 940 unsigned sectors) 941 { 942 struct btree_iter iter; 943 int ret = 0; 944 945 struct bkey_i_alloc_v4 *a = 946 bch2_trans_start_alloc_update_noupdate(trans, &iter, POS(ca->dev_idx, b)); 947 if (IS_ERR(a)) 948 return PTR_ERR(a); 949 950 if (a->v.data_type && type && a->v.data_type != type) { 951 bch2_fsck_err(trans, FSCK_CAN_IGNORE|FSCK_NEED_FSCK, 952 bucket_metadata_type_mismatch, 953 "bucket %llu:%llu gen %u different types of data in same bucket: %s, %s\n" 954 "while marking %s", 955 iter.pos.inode, iter.pos.offset, a->v.gen, 956 bch2_data_type_str(a->v.data_type), 957 bch2_data_type_str(type), 958 bch2_data_type_str(type)); 959 ret = -EIO; 960 goto err; 961 } 962 963 if (a->v.data_type != type || 964 a->v.dirty_sectors != sectors) { 965 a->v.data_type = type; 966 a->v.dirty_sectors = sectors; 967 ret = bch2_trans_update(trans, &iter, &a->k_i, 0); 968 } 969 err: 970 bch2_trans_iter_exit(trans, &iter); 971 return ret; 972 } 973 974 static int bch2_mark_metadata_bucket(struct btree_trans *trans, struct bch_dev *ca, 975 u64 b, enum bch_data_type data_type, unsigned sectors, 976 enum btree_iter_update_trigger_flags flags) 977 { 978 struct bch_fs *c = trans->c; 979 int ret = 0; 980 981 percpu_down_read(&c->mark_lock); 982 struct bucket *g = gc_bucket(ca, b); 983 if (bch2_fs_inconsistent_on(!g, c, "reference to invalid bucket on device %u when marking metadata type %s", 984 ca->dev_idx, bch2_data_type_str(data_type))) 985 goto err_unlock; 986 987 bucket_lock(g); 988 struct bch_alloc_v4 old = bucket_m_to_alloc(*g); 989 990 if (bch2_fs_inconsistent_on(g->data_type && 991 g->data_type != data_type, c, 992 "different types of data in same bucket: %s, %s", 993 bch2_data_type_str(g->data_type), 994 bch2_data_type_str(data_type))) 995 goto err; 996 997 if (bch2_fs_inconsistent_on((u64) g->dirty_sectors + sectors > ca->mi.bucket_size, c, 998 "bucket %u:%llu gen %u data type %s sector count overflow: %u + %u > bucket size", 999 ca->dev_idx, b, g->gen, 1000 bch2_data_type_str(g->data_type ?: data_type), 1001 g->dirty_sectors, sectors)) 1002 goto err; 1003 1004 g->data_type = data_type; 1005 g->dirty_sectors += sectors; 1006 struct bch_alloc_v4 new = bucket_m_to_alloc(*g); 1007 bucket_unlock(g); 1008 percpu_up_read(&c->mark_lock); 1009 ret = bch2_alloc_key_to_dev_counters(trans, ca, &old, &new, flags); 1010 return ret; 1011 err: 1012 bucket_unlock(g); 1013 err_unlock: 1014 percpu_up_read(&c->mark_lock); 1015 return -EIO; 1016 } 1017 1018 int bch2_trans_mark_metadata_bucket(struct btree_trans *trans, 1019 struct bch_dev *ca, u64 b, 1020 enum bch_data_type type, unsigned sectors, 1021 enum btree_iter_update_trigger_flags flags) 1022 { 1023 BUG_ON(type != BCH_DATA_free && 1024 type != BCH_DATA_sb && 1025 type != BCH_DATA_journal); 1026 1027 /* 1028 * Backup superblock might be past the end of our normal usable space: 1029 */ 1030 if (b >= ca->mi.nbuckets) 1031 return 0; 1032 1033 if (flags & BTREE_TRIGGER_gc) 1034 return bch2_mark_metadata_bucket(trans, ca, b, type, sectors, flags); 1035 else if (flags & BTREE_TRIGGER_transactional) 1036 return commit_do(trans, NULL, NULL, 0, 1037 __bch2_trans_mark_metadata_bucket(trans, ca, b, type, sectors)); 1038 else 1039 BUG(); 1040 } 1041 1042 static int bch2_trans_mark_metadata_sectors(struct btree_trans *trans, 1043 struct bch_dev *ca, u64 start, u64 end, 1044 enum bch_data_type type, u64 *bucket, unsigned *bucket_sectors, 1045 enum btree_iter_update_trigger_flags flags) 1046 { 1047 do { 1048 u64 b = sector_to_bucket(ca, start); 1049 unsigned sectors = 1050 min_t(u64, bucket_to_sector(ca, b + 1), end) - start; 1051 1052 if (b != *bucket && *bucket_sectors) { 1053 int ret = bch2_trans_mark_metadata_bucket(trans, ca, *bucket, 1054 type, *bucket_sectors, flags); 1055 if (ret) 1056 return ret; 1057 1058 *bucket_sectors = 0; 1059 } 1060 1061 *bucket = b; 1062 *bucket_sectors += sectors; 1063 start += sectors; 1064 } while (start < end); 1065 1066 return 0; 1067 } 1068 1069 static int __bch2_trans_mark_dev_sb(struct btree_trans *trans, struct bch_dev *ca, 1070 enum btree_iter_update_trigger_flags flags) 1071 { 1072 struct bch_fs *c = trans->c; 1073 1074 mutex_lock(&c->sb_lock); 1075 struct bch_sb_layout layout = ca->disk_sb.sb->layout; 1076 mutex_unlock(&c->sb_lock); 1077 1078 u64 bucket = 0; 1079 unsigned i, bucket_sectors = 0; 1080 int ret; 1081 1082 for (i = 0; i < layout.nr_superblocks; i++) { 1083 u64 offset = le64_to_cpu(layout.sb_offset[i]); 1084 1085 if (offset == BCH_SB_SECTOR) { 1086 ret = bch2_trans_mark_metadata_sectors(trans, ca, 1087 0, BCH_SB_SECTOR, 1088 BCH_DATA_sb, &bucket, &bucket_sectors, flags); 1089 if (ret) 1090 return ret; 1091 } 1092 1093 ret = bch2_trans_mark_metadata_sectors(trans, ca, offset, 1094 offset + (1 << layout.sb_max_size_bits), 1095 BCH_DATA_sb, &bucket, &bucket_sectors, flags); 1096 if (ret) 1097 return ret; 1098 } 1099 1100 if (bucket_sectors) { 1101 ret = bch2_trans_mark_metadata_bucket(trans, ca, 1102 bucket, BCH_DATA_sb, bucket_sectors, flags); 1103 if (ret) 1104 return ret; 1105 } 1106 1107 for (i = 0; i < ca->journal.nr; i++) { 1108 ret = bch2_trans_mark_metadata_bucket(trans, ca, 1109 ca->journal.buckets[i], 1110 BCH_DATA_journal, ca->mi.bucket_size, flags); 1111 if (ret) 1112 return ret; 1113 } 1114 1115 return 0; 1116 } 1117 1118 int bch2_trans_mark_dev_sb(struct bch_fs *c, struct bch_dev *ca, 1119 enum btree_iter_update_trigger_flags flags) 1120 { 1121 int ret = bch2_trans_run(c, 1122 __bch2_trans_mark_dev_sb(trans, ca, flags)); 1123 bch_err_fn(c, ret); 1124 return ret; 1125 } 1126 1127 int bch2_trans_mark_dev_sbs_flags(struct bch_fs *c, 1128 enum btree_iter_update_trigger_flags flags) 1129 { 1130 for_each_online_member(c, ca) { 1131 int ret = bch2_trans_mark_dev_sb(c, ca, flags); 1132 if (ret) { 1133 percpu_ref_put(&ca->io_ref); 1134 return ret; 1135 } 1136 } 1137 1138 return 0; 1139 } 1140 1141 int bch2_trans_mark_dev_sbs(struct bch_fs *c) 1142 { 1143 return bch2_trans_mark_dev_sbs_flags(c, BTREE_TRIGGER_transactional); 1144 } 1145 1146 /* Disk reservations: */ 1147 1148 #define SECTORS_CACHE 1024 1149 1150 int __bch2_disk_reservation_add(struct bch_fs *c, struct disk_reservation *res, 1151 u64 sectors, int flags) 1152 { 1153 struct bch_fs_pcpu *pcpu; 1154 u64 old, get; 1155 s64 sectors_available; 1156 int ret; 1157 1158 percpu_down_read(&c->mark_lock); 1159 preempt_disable(); 1160 pcpu = this_cpu_ptr(c->pcpu); 1161 1162 if (sectors <= pcpu->sectors_available) 1163 goto out; 1164 1165 old = atomic64_read(&c->sectors_available); 1166 do { 1167 get = min((u64) sectors + SECTORS_CACHE, old); 1168 1169 if (get < sectors) { 1170 preempt_enable(); 1171 goto recalculate; 1172 } 1173 } while (!atomic64_try_cmpxchg(&c->sectors_available, 1174 &old, old - get)); 1175 1176 pcpu->sectors_available += get; 1177 1178 out: 1179 pcpu->sectors_available -= sectors; 1180 this_cpu_add(*c->online_reserved, sectors); 1181 res->sectors += sectors; 1182 1183 preempt_enable(); 1184 percpu_up_read(&c->mark_lock); 1185 return 0; 1186 1187 recalculate: 1188 mutex_lock(&c->sectors_available_lock); 1189 1190 percpu_u64_set(&c->pcpu->sectors_available, 0); 1191 sectors_available = avail_factor(__bch2_fs_usage_read_short(c).free); 1192 1193 if (sectors <= sectors_available || 1194 (flags & BCH_DISK_RESERVATION_NOFAIL)) { 1195 atomic64_set(&c->sectors_available, 1196 max_t(s64, 0, sectors_available - sectors)); 1197 this_cpu_add(*c->online_reserved, sectors); 1198 res->sectors += sectors; 1199 ret = 0; 1200 } else { 1201 atomic64_set(&c->sectors_available, sectors_available); 1202 ret = -BCH_ERR_ENOSPC_disk_reservation; 1203 } 1204 1205 mutex_unlock(&c->sectors_available_lock); 1206 percpu_up_read(&c->mark_lock); 1207 1208 return ret; 1209 } 1210 1211 /* Startup/shutdown: */ 1212 1213 void bch2_buckets_nouse_free(struct bch_fs *c) 1214 { 1215 for_each_member_device(c, ca) { 1216 kvfree_rcu_mightsleep(ca->buckets_nouse); 1217 ca->buckets_nouse = NULL; 1218 } 1219 } 1220 1221 int bch2_buckets_nouse_alloc(struct bch_fs *c) 1222 { 1223 for_each_member_device(c, ca) { 1224 BUG_ON(ca->buckets_nouse); 1225 1226 ca->buckets_nouse = kvmalloc(BITS_TO_LONGS(ca->mi.nbuckets) * 1227 sizeof(unsigned long), 1228 GFP_KERNEL|__GFP_ZERO); 1229 if (!ca->buckets_nouse) { 1230 bch2_dev_put(ca); 1231 return -BCH_ERR_ENOMEM_buckets_nouse; 1232 } 1233 } 1234 1235 return 0; 1236 } 1237 1238 static void bucket_gens_free_rcu(struct rcu_head *rcu) 1239 { 1240 struct bucket_gens *buckets = 1241 container_of(rcu, struct bucket_gens, rcu); 1242 1243 kvfree(buckets); 1244 } 1245 1246 int bch2_dev_buckets_resize(struct bch_fs *c, struct bch_dev *ca, u64 nbuckets) 1247 { 1248 struct bucket_gens *bucket_gens = NULL, *old_bucket_gens = NULL; 1249 bool resize = ca->bucket_gens != NULL; 1250 int ret; 1251 1252 BUG_ON(resize && ca->buckets_nouse); 1253 1254 if (!(bucket_gens = kvmalloc(sizeof(struct bucket_gens) + nbuckets, 1255 GFP_KERNEL|__GFP_ZERO))) { 1256 ret = -BCH_ERR_ENOMEM_bucket_gens; 1257 goto err; 1258 } 1259 1260 bucket_gens->first_bucket = ca->mi.first_bucket; 1261 bucket_gens->nbuckets = nbuckets; 1262 bucket_gens->nbuckets_minus_first = 1263 bucket_gens->nbuckets - bucket_gens->first_bucket; 1264 1265 if (resize) { 1266 down_write(&ca->bucket_lock); 1267 percpu_down_write(&c->mark_lock); 1268 } 1269 1270 old_bucket_gens = rcu_dereference_protected(ca->bucket_gens, 1); 1271 1272 if (resize) { 1273 size_t n = min(bucket_gens->nbuckets, old_bucket_gens->nbuckets); 1274 1275 memcpy(bucket_gens->b, 1276 old_bucket_gens->b, 1277 n); 1278 } 1279 1280 rcu_assign_pointer(ca->bucket_gens, bucket_gens); 1281 bucket_gens = old_bucket_gens; 1282 1283 nbuckets = ca->mi.nbuckets; 1284 1285 if (resize) { 1286 percpu_up_write(&c->mark_lock); 1287 up_write(&ca->bucket_lock); 1288 } 1289 1290 ret = 0; 1291 err: 1292 if (bucket_gens) 1293 call_rcu(&bucket_gens->rcu, bucket_gens_free_rcu); 1294 1295 return ret; 1296 } 1297 1298 void bch2_dev_buckets_free(struct bch_dev *ca) 1299 { 1300 kvfree(ca->buckets_nouse); 1301 kvfree(rcu_dereference_protected(ca->bucket_gens, 1)); 1302 free_percpu(ca->usage); 1303 } 1304 1305 int bch2_dev_buckets_alloc(struct bch_fs *c, struct bch_dev *ca) 1306 { 1307 ca->usage = alloc_percpu(struct bch_dev_usage); 1308 if (!ca->usage) 1309 return -BCH_ERR_ENOMEM_usage_init; 1310 1311 return bch2_dev_buckets_resize(c, ca, ca->mi.nbuckets); 1312 } 1313