1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Some low level IO code, and hacks for various block layer limitations 4 * 5 * Copyright 2010, 2011 Kent Overstreet <kent.overstreet@gmail.com> 6 * Copyright 2012 Google, Inc. 7 */ 8 9 #include "bcachefs.h" 10 #include "alloc_background.h" 11 #include "alloc_foreground.h" 12 #include "btree_update.h" 13 #include "buckets.h" 14 #include "checksum.h" 15 #include "clock.h" 16 #include "compress.h" 17 #include "data_update.h" 18 #include "disk_groups.h" 19 #include "ec.h" 20 #include "error.h" 21 #include "io_read.h" 22 #include "io_misc.h" 23 #include "io_write.h" 24 #include "reflink.h" 25 #include "subvolume.h" 26 #include "trace.h" 27 28 #include <linux/sched/mm.h> 29 30 #ifndef CONFIG_BCACHEFS_NO_LATENCY_ACCT 31 32 static bool bch2_target_congested(struct bch_fs *c, u16 target) 33 { 34 const struct bch_devs_mask *devs; 35 unsigned d, nr = 0, total = 0; 36 u64 now = local_clock(), last; 37 s64 congested; 38 struct bch_dev *ca; 39 40 if (!target) 41 return false; 42 43 rcu_read_lock(); 44 devs = bch2_target_to_mask(c, target) ?: 45 &c->rw_devs[BCH_DATA_user]; 46 47 for_each_set_bit(d, devs->d, BCH_SB_MEMBERS_MAX) { 48 ca = rcu_dereference(c->devs[d]); 49 if (!ca) 50 continue; 51 52 congested = atomic_read(&ca->congested); 53 last = READ_ONCE(ca->congested_last); 54 if (time_after64(now, last)) 55 congested -= (now - last) >> 12; 56 57 total += max(congested, 0LL); 58 nr++; 59 } 60 rcu_read_unlock(); 61 62 return bch2_rand_range(nr * CONGESTED_MAX) < total; 63 } 64 65 #else 66 67 static bool bch2_target_congested(struct bch_fs *c, u16 target) 68 { 69 return false; 70 } 71 72 #endif 73 74 /* Cache promotion on read */ 75 76 struct promote_op { 77 struct rcu_head rcu; 78 u64 start_time; 79 80 struct rhash_head hash; 81 struct bpos pos; 82 83 struct data_update write; 84 struct bio_vec bi_inline_vecs[]; /* must be last */ 85 }; 86 87 static const struct rhashtable_params bch_promote_params = { 88 .head_offset = offsetof(struct promote_op, hash), 89 .key_offset = offsetof(struct promote_op, pos), 90 .key_len = sizeof(struct bpos), 91 .automatic_shrinking = true, 92 }; 93 94 static inline bool have_io_error(struct bch_io_failures *failed) 95 { 96 return failed && failed->nr; 97 } 98 99 static inline int should_promote(struct bch_fs *c, struct bkey_s_c k, 100 struct bpos pos, 101 struct bch_io_opts opts, 102 unsigned flags, 103 struct bch_io_failures *failed) 104 { 105 if (!have_io_error(failed)) { 106 BUG_ON(!opts.promote_target); 107 108 if (!(flags & BCH_READ_MAY_PROMOTE)) 109 return -BCH_ERR_nopromote_may_not; 110 111 if (bch2_bkey_has_target(c, k, opts.promote_target)) 112 return -BCH_ERR_nopromote_already_promoted; 113 114 if (bkey_extent_is_unwritten(k)) 115 return -BCH_ERR_nopromote_unwritten; 116 117 if (bch2_target_congested(c, opts.promote_target)) 118 return -BCH_ERR_nopromote_congested; 119 } 120 121 if (rhashtable_lookup_fast(&c->promote_table, &pos, 122 bch_promote_params)) 123 return -BCH_ERR_nopromote_in_flight; 124 125 return 0; 126 } 127 128 static void promote_free(struct bch_fs *c, struct promote_op *op) 129 { 130 int ret; 131 132 bch2_data_update_exit(&op->write); 133 134 ret = rhashtable_remove_fast(&c->promote_table, &op->hash, 135 bch_promote_params); 136 BUG_ON(ret); 137 bch2_write_ref_put(c, BCH_WRITE_REF_promote); 138 kfree_rcu(op, rcu); 139 } 140 141 static void promote_done(struct bch_write_op *wop) 142 { 143 struct promote_op *op = 144 container_of(wop, struct promote_op, write.op); 145 struct bch_fs *c = op->write.op.c; 146 147 bch2_time_stats_update(&c->times[BCH_TIME_data_promote], 148 op->start_time); 149 promote_free(c, op); 150 } 151 152 static void promote_start(struct promote_op *op, struct bch_read_bio *rbio) 153 { 154 struct bio *bio = &op->write.op.wbio.bio; 155 156 trace_and_count(op->write.op.c, read_promote, &rbio->bio); 157 158 /* we now own pages: */ 159 BUG_ON(!rbio->bounce); 160 BUG_ON(rbio->bio.bi_vcnt > bio->bi_max_vecs); 161 162 memcpy(bio->bi_io_vec, rbio->bio.bi_io_vec, 163 sizeof(struct bio_vec) * rbio->bio.bi_vcnt); 164 swap(bio->bi_vcnt, rbio->bio.bi_vcnt); 165 166 bch2_data_update_read_done(&op->write, rbio->pick.crc); 167 } 168 169 static struct promote_op *__promote_alloc(struct btree_trans *trans, 170 enum btree_id btree_id, 171 struct bkey_s_c k, 172 struct bpos pos, 173 struct extent_ptr_decoded *pick, 174 struct bch_io_opts opts, 175 unsigned sectors, 176 struct bch_read_bio **rbio, 177 struct bch_io_failures *failed) 178 { 179 struct bch_fs *c = trans->c; 180 struct promote_op *op = NULL; 181 struct bio *bio; 182 unsigned pages = DIV_ROUND_UP(sectors, PAGE_SECTORS); 183 int ret; 184 185 if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_promote)) 186 return ERR_PTR(-BCH_ERR_nopromote_no_writes); 187 188 op = kzalloc(struct_size(op, bi_inline_vecs, pages), GFP_KERNEL); 189 if (!op) { 190 ret = -BCH_ERR_nopromote_enomem; 191 goto err; 192 } 193 194 op->start_time = local_clock(); 195 op->pos = pos; 196 197 /* 198 * We don't use the mempool here because extents that aren't 199 * checksummed or compressed can be too big for the mempool: 200 */ 201 *rbio = kzalloc(sizeof(struct bch_read_bio) + 202 sizeof(struct bio_vec) * pages, 203 GFP_KERNEL); 204 if (!*rbio) { 205 ret = -BCH_ERR_nopromote_enomem; 206 goto err; 207 } 208 209 rbio_init(&(*rbio)->bio, opts); 210 bio_init(&(*rbio)->bio, NULL, (*rbio)->bio.bi_inline_vecs, pages, 0); 211 212 if (bch2_bio_alloc_pages(&(*rbio)->bio, sectors << 9, GFP_KERNEL)) { 213 ret = -BCH_ERR_nopromote_enomem; 214 goto err; 215 } 216 217 (*rbio)->bounce = true; 218 (*rbio)->split = true; 219 (*rbio)->kmalloc = true; 220 221 if (rhashtable_lookup_insert_fast(&c->promote_table, &op->hash, 222 bch_promote_params)) { 223 ret = -BCH_ERR_nopromote_in_flight; 224 goto err; 225 } 226 227 bio = &op->write.op.wbio.bio; 228 bio_init(bio, NULL, bio->bi_inline_vecs, pages, 0); 229 230 struct data_update_opts update_opts = {}; 231 232 if (!have_io_error(failed)) { 233 update_opts.target = opts.promote_target; 234 update_opts.extra_replicas = 1; 235 update_opts.write_flags = BCH_WRITE_ALLOC_NOWAIT|BCH_WRITE_CACHED; 236 } else { 237 update_opts.target = opts.foreground_target; 238 239 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 240 unsigned ptr_bit = 1; 241 bkey_for_each_ptr(ptrs, ptr) { 242 if (bch2_dev_io_failures(failed, ptr->dev)) 243 update_opts.rewrite_ptrs |= ptr_bit; 244 ptr_bit <<= 1; 245 } 246 } 247 248 ret = bch2_data_update_init(trans, NULL, NULL, &op->write, 249 writepoint_hashed((unsigned long) current), 250 opts, 251 update_opts, 252 btree_id, k); 253 /* 254 * possible errors: -BCH_ERR_nocow_lock_blocked, 255 * -BCH_ERR_ENOSPC_disk_reservation: 256 */ 257 if (ret) { 258 BUG_ON(rhashtable_remove_fast(&c->promote_table, &op->hash, 259 bch_promote_params)); 260 goto err; 261 } 262 263 op->write.op.end_io = promote_done; 264 265 return op; 266 err: 267 if (*rbio) 268 bio_free_pages(&(*rbio)->bio); 269 kfree(*rbio); 270 *rbio = NULL; 271 /* We may have added to the rhashtable and thus need rcu freeing: */ 272 kfree_rcu(op, rcu); 273 bch2_write_ref_put(c, BCH_WRITE_REF_promote); 274 return ERR_PTR(ret); 275 } 276 277 noinline 278 static struct promote_op *promote_alloc(struct btree_trans *trans, 279 struct bvec_iter iter, 280 struct bkey_s_c k, 281 struct extent_ptr_decoded *pick, 282 struct bch_io_opts opts, 283 unsigned flags, 284 struct bch_read_bio **rbio, 285 bool *bounce, 286 bool *read_full, 287 struct bch_io_failures *failed) 288 { 289 struct bch_fs *c = trans->c; 290 /* 291 * if failed != NULL we're not actually doing a promote, we're 292 * recovering from an io/checksum error 293 */ 294 bool promote_full = (have_io_error(failed) || 295 *read_full || 296 READ_ONCE(c->opts.promote_whole_extents)); 297 /* data might have to be decompressed in the write path: */ 298 unsigned sectors = promote_full 299 ? max(pick->crc.compressed_size, pick->crc.live_size) 300 : bvec_iter_sectors(iter); 301 struct bpos pos = promote_full 302 ? bkey_start_pos(k.k) 303 : POS(k.k->p.inode, iter.bi_sector); 304 struct promote_op *promote; 305 int ret; 306 307 ret = should_promote(c, k, pos, opts, flags, failed); 308 if (ret) 309 goto nopromote; 310 311 promote = __promote_alloc(trans, 312 k.k->type == KEY_TYPE_reflink_v 313 ? BTREE_ID_reflink 314 : BTREE_ID_extents, 315 k, pos, pick, opts, sectors, rbio, failed); 316 ret = PTR_ERR_OR_ZERO(promote); 317 if (ret) 318 goto nopromote; 319 320 *bounce = true; 321 *read_full = promote_full; 322 return promote; 323 nopromote: 324 trace_read_nopromote(c, ret); 325 return NULL; 326 } 327 328 /* Read */ 329 330 static int bch2_read_err_msg_trans(struct btree_trans *trans, struct printbuf *out, 331 struct bch_read_bio *rbio, struct bpos read_pos) 332 { 333 return bch2_inum_offset_err_msg_trans(trans, out, 334 (subvol_inum) { rbio->subvol, read_pos.inode }, 335 read_pos.offset << 9); 336 } 337 338 static void bch2_read_err_msg(struct bch_fs *c, struct printbuf *out, 339 struct bch_read_bio *rbio, struct bpos read_pos) 340 { 341 bch2_trans_run(c, bch2_read_err_msg_trans(trans, out, rbio, read_pos)); 342 } 343 344 #define READ_RETRY_AVOID 1 345 #define READ_RETRY 2 346 #define READ_ERR 3 347 348 enum rbio_context { 349 RBIO_CONTEXT_NULL, 350 RBIO_CONTEXT_HIGHPRI, 351 RBIO_CONTEXT_UNBOUND, 352 }; 353 354 static inline struct bch_read_bio * 355 bch2_rbio_parent(struct bch_read_bio *rbio) 356 { 357 return rbio->split ? rbio->parent : rbio; 358 } 359 360 __always_inline 361 static void bch2_rbio_punt(struct bch_read_bio *rbio, work_func_t fn, 362 enum rbio_context context, 363 struct workqueue_struct *wq) 364 { 365 if (context <= rbio->context) { 366 fn(&rbio->work); 367 } else { 368 rbio->work.func = fn; 369 rbio->context = context; 370 queue_work(wq, &rbio->work); 371 } 372 } 373 374 static inline struct bch_read_bio *bch2_rbio_free(struct bch_read_bio *rbio) 375 { 376 BUG_ON(rbio->bounce && !rbio->split); 377 378 if (rbio->promote) 379 promote_free(rbio->c, rbio->promote); 380 rbio->promote = NULL; 381 382 if (rbio->bounce) 383 bch2_bio_free_pages_pool(rbio->c, &rbio->bio); 384 385 if (rbio->split) { 386 struct bch_read_bio *parent = rbio->parent; 387 388 if (rbio->kmalloc) 389 kfree(rbio); 390 else 391 bio_put(&rbio->bio); 392 393 rbio = parent; 394 } 395 396 return rbio; 397 } 398 399 /* 400 * Only called on a top level bch_read_bio to complete an entire read request, 401 * not a split: 402 */ 403 static void bch2_rbio_done(struct bch_read_bio *rbio) 404 { 405 if (rbio->start_time) 406 bch2_time_stats_update(&rbio->c->times[BCH_TIME_data_read], 407 rbio->start_time); 408 bio_endio(&rbio->bio); 409 } 410 411 static void bch2_read_retry_nodecode(struct bch_fs *c, struct bch_read_bio *rbio, 412 struct bvec_iter bvec_iter, 413 struct bch_io_failures *failed, 414 unsigned flags) 415 { 416 struct btree_trans *trans = bch2_trans_get(c); 417 struct btree_iter iter; 418 struct bkey_buf sk; 419 struct bkey_s_c k; 420 int ret; 421 422 flags &= ~BCH_READ_LAST_FRAGMENT; 423 flags |= BCH_READ_MUST_CLONE; 424 425 bch2_bkey_buf_init(&sk); 426 427 bch2_trans_iter_init(trans, &iter, rbio->data_btree, 428 rbio->read_pos, BTREE_ITER_slots); 429 retry: 430 bch2_trans_begin(trans); 431 rbio->bio.bi_status = 0; 432 433 ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); 434 if (ret) 435 goto err; 436 437 bch2_bkey_buf_reassemble(&sk, c, k); 438 k = bkey_i_to_s_c(sk.k); 439 440 if (!bch2_bkey_matches_ptr(c, k, 441 rbio->pick.ptr, 442 rbio->data_pos.offset - 443 rbio->pick.crc.offset)) { 444 /* extent we wanted to read no longer exists: */ 445 rbio->hole = true; 446 goto out; 447 } 448 449 ret = __bch2_read_extent(trans, rbio, bvec_iter, 450 rbio->read_pos, 451 rbio->data_btree, 452 k, 0, failed, flags); 453 if (ret == READ_RETRY) 454 goto retry; 455 if (ret) 456 goto err; 457 out: 458 bch2_rbio_done(rbio); 459 bch2_trans_iter_exit(trans, &iter); 460 bch2_trans_put(trans); 461 bch2_bkey_buf_exit(&sk, c); 462 return; 463 err: 464 rbio->bio.bi_status = BLK_STS_IOERR; 465 goto out; 466 } 467 468 static void bch2_rbio_retry(struct work_struct *work) 469 { 470 struct bch_read_bio *rbio = 471 container_of(work, struct bch_read_bio, work); 472 struct bch_fs *c = rbio->c; 473 struct bvec_iter iter = rbio->bvec_iter; 474 unsigned flags = rbio->flags; 475 subvol_inum inum = { 476 .subvol = rbio->subvol, 477 .inum = rbio->read_pos.inode, 478 }; 479 struct bch_io_failures failed = { .nr = 0 }; 480 481 trace_and_count(c, read_retry, &rbio->bio); 482 483 if (rbio->retry == READ_RETRY_AVOID) 484 bch2_mark_io_failure(&failed, &rbio->pick); 485 486 rbio->bio.bi_status = 0; 487 488 rbio = bch2_rbio_free(rbio); 489 490 flags |= BCH_READ_IN_RETRY; 491 flags &= ~BCH_READ_MAY_PROMOTE; 492 493 if (flags & BCH_READ_NODECODE) { 494 bch2_read_retry_nodecode(c, rbio, iter, &failed, flags); 495 } else { 496 flags &= ~BCH_READ_LAST_FRAGMENT; 497 flags |= BCH_READ_MUST_CLONE; 498 499 __bch2_read(c, rbio, iter, inum, &failed, flags); 500 } 501 } 502 503 static void bch2_rbio_error(struct bch_read_bio *rbio, int retry, 504 blk_status_t error) 505 { 506 rbio->retry = retry; 507 508 if (rbio->flags & BCH_READ_IN_RETRY) 509 return; 510 511 if (retry == READ_ERR) { 512 rbio = bch2_rbio_free(rbio); 513 514 rbio->bio.bi_status = error; 515 bch2_rbio_done(rbio); 516 } else { 517 bch2_rbio_punt(rbio, bch2_rbio_retry, 518 RBIO_CONTEXT_UNBOUND, system_unbound_wq); 519 } 520 } 521 522 static void bch2_read_io_err(struct work_struct *work) 523 { 524 struct bch_read_bio *rbio = 525 container_of(work, struct bch_read_bio, work); 526 struct bio *bio = &rbio->bio; 527 struct bch_fs *c = rbio->c; 528 struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; 529 struct printbuf buf = PRINTBUF; 530 531 bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); 532 prt_printf(&buf, "data read error: %s", bch2_blk_status_to_str(bio->bi_status)); 533 534 if (ca) { 535 bch2_io_error(ca, BCH_MEMBER_ERROR_read); 536 bch_err_ratelimited(ca, "%s", buf.buf); 537 } else { 538 bch_err_ratelimited(c, "%s", buf.buf); 539 } 540 541 printbuf_exit(&buf); 542 bch2_rbio_error(rbio, READ_RETRY_AVOID, bio->bi_status); 543 } 544 545 static int __bch2_rbio_narrow_crcs(struct btree_trans *trans, 546 struct bch_read_bio *rbio) 547 { 548 struct bch_fs *c = rbio->c; 549 u64 data_offset = rbio->data_pos.offset - rbio->pick.crc.offset; 550 struct bch_extent_crc_unpacked new_crc; 551 struct btree_iter iter; 552 struct bkey_i *new; 553 struct bkey_s_c k; 554 int ret = 0; 555 556 if (crc_is_compressed(rbio->pick.crc)) 557 return 0; 558 559 k = bch2_bkey_get_iter(trans, &iter, rbio->data_btree, rbio->data_pos, 560 BTREE_ITER_slots|BTREE_ITER_intent); 561 if ((ret = bkey_err(k))) 562 goto out; 563 564 if (bversion_cmp(k.k->bversion, rbio->version) || 565 !bch2_bkey_matches_ptr(c, k, rbio->pick.ptr, data_offset)) 566 goto out; 567 568 /* Extent was merged? */ 569 if (bkey_start_offset(k.k) < data_offset || 570 k.k->p.offset > data_offset + rbio->pick.crc.uncompressed_size) 571 goto out; 572 573 if (bch2_rechecksum_bio(c, &rbio->bio, rbio->version, 574 rbio->pick.crc, NULL, &new_crc, 575 bkey_start_offset(k.k) - data_offset, k.k->size, 576 rbio->pick.crc.csum_type)) { 577 bch_err(c, "error verifying existing checksum while narrowing checksum (memory corruption?)"); 578 ret = 0; 579 goto out; 580 } 581 582 /* 583 * going to be temporarily appending another checksum entry: 584 */ 585 new = bch2_trans_kmalloc(trans, bkey_bytes(k.k) + 586 sizeof(struct bch_extent_crc128)); 587 if ((ret = PTR_ERR_OR_ZERO(new))) 588 goto out; 589 590 bkey_reassemble(new, k); 591 592 if (!bch2_bkey_narrow_crcs(new, new_crc)) 593 goto out; 594 595 ret = bch2_trans_update(trans, &iter, new, 596 BTREE_UPDATE_internal_snapshot_node); 597 out: 598 bch2_trans_iter_exit(trans, &iter); 599 return ret; 600 } 601 602 static noinline void bch2_rbio_narrow_crcs(struct bch_read_bio *rbio) 603 { 604 bch2_trans_commit_do(rbio->c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 605 __bch2_rbio_narrow_crcs(trans, rbio)); 606 } 607 608 static void bch2_read_csum_err(struct work_struct *work) 609 { 610 struct bch_read_bio *rbio = 611 container_of(work, struct bch_read_bio, work); 612 struct bch_fs *c = rbio->c; 613 struct bio *src = &rbio->bio; 614 struct bch_extent_crc_unpacked crc = rbio->pick.crc; 615 struct nonce nonce = extent_nonce(rbio->version, crc); 616 struct bch_csum csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); 617 struct printbuf buf = PRINTBUF; 618 619 bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); 620 prt_str(&buf, "data "); 621 bch2_csum_err_msg(&buf, crc.csum_type, rbio->pick.crc.csum, csum); 622 623 struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; 624 if (ca) { 625 bch2_io_error(ca, BCH_MEMBER_ERROR_checksum); 626 bch_err_ratelimited(ca, "%s", buf.buf); 627 } else { 628 bch_err_ratelimited(c, "%s", buf.buf); 629 } 630 631 bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); 632 printbuf_exit(&buf); 633 } 634 635 static void bch2_read_decompress_err(struct work_struct *work) 636 { 637 struct bch_read_bio *rbio = 638 container_of(work, struct bch_read_bio, work); 639 struct bch_fs *c = rbio->c; 640 struct printbuf buf = PRINTBUF; 641 642 bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); 643 prt_str(&buf, "decompression error"); 644 645 struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; 646 if (ca) 647 bch_err_ratelimited(ca, "%s", buf.buf); 648 else 649 bch_err_ratelimited(c, "%s", buf.buf); 650 651 bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); 652 printbuf_exit(&buf); 653 } 654 655 static void bch2_read_decrypt_err(struct work_struct *work) 656 { 657 struct bch_read_bio *rbio = 658 container_of(work, struct bch_read_bio, work); 659 struct bch_fs *c = rbio->c; 660 struct printbuf buf = PRINTBUF; 661 662 bch2_read_err_msg(c, &buf, rbio, rbio->read_pos); 663 prt_str(&buf, "decrypt error"); 664 665 struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; 666 if (ca) 667 bch_err_ratelimited(ca, "%s", buf.buf); 668 else 669 bch_err_ratelimited(c, "%s", buf.buf); 670 671 bch2_rbio_error(rbio, READ_ERR, BLK_STS_IOERR); 672 printbuf_exit(&buf); 673 } 674 675 /* Inner part that may run in process context */ 676 static void __bch2_read_endio(struct work_struct *work) 677 { 678 struct bch_read_bio *rbio = 679 container_of(work, struct bch_read_bio, work); 680 struct bch_fs *c = rbio->c; 681 struct bio *src = &rbio->bio; 682 struct bio *dst = &bch2_rbio_parent(rbio)->bio; 683 struct bvec_iter dst_iter = rbio->bvec_iter; 684 struct bch_extent_crc_unpacked crc = rbio->pick.crc; 685 struct nonce nonce = extent_nonce(rbio->version, crc); 686 unsigned nofs_flags; 687 struct bch_csum csum; 688 int ret; 689 690 nofs_flags = memalloc_nofs_save(); 691 692 /* Reset iterator for checksumming and copying bounced data: */ 693 if (rbio->bounce) { 694 src->bi_iter.bi_size = crc.compressed_size << 9; 695 src->bi_iter.bi_idx = 0; 696 src->bi_iter.bi_bvec_done = 0; 697 } else { 698 src->bi_iter = rbio->bvec_iter; 699 } 700 701 csum = bch2_checksum_bio(c, crc.csum_type, nonce, src); 702 if (bch2_crc_cmp(csum, rbio->pick.crc.csum) && !c->opts.no_data_io) 703 goto csum_err; 704 705 /* 706 * XXX 707 * We need to rework the narrow_crcs path to deliver the read completion 708 * first, and then punt to a different workqueue, otherwise we're 709 * holding up reads while doing btree updates which is bad for memory 710 * reclaim. 711 */ 712 if (unlikely(rbio->narrow_crcs)) 713 bch2_rbio_narrow_crcs(rbio); 714 715 if (rbio->flags & BCH_READ_NODECODE) 716 goto nodecode; 717 718 /* Adjust crc to point to subset of data we want: */ 719 crc.offset += rbio->offset_into_extent; 720 crc.live_size = bvec_iter_sectors(rbio->bvec_iter); 721 722 if (crc_is_compressed(crc)) { 723 ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); 724 if (ret) 725 goto decrypt_err; 726 727 if (bch2_bio_uncompress(c, src, dst, dst_iter, crc) && 728 !c->opts.no_data_io) 729 goto decompression_err; 730 } else { 731 /* don't need to decrypt the entire bio: */ 732 nonce = nonce_add(nonce, crc.offset << 9); 733 bio_advance(src, crc.offset << 9); 734 735 BUG_ON(src->bi_iter.bi_size < dst_iter.bi_size); 736 src->bi_iter.bi_size = dst_iter.bi_size; 737 738 ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); 739 if (ret) 740 goto decrypt_err; 741 742 if (rbio->bounce) { 743 struct bvec_iter src_iter = src->bi_iter; 744 745 bio_copy_data_iter(dst, &dst_iter, src, &src_iter); 746 } 747 } 748 749 if (rbio->promote) { 750 /* 751 * Re encrypt data we decrypted, so it's consistent with 752 * rbio->crc: 753 */ 754 ret = bch2_encrypt_bio(c, crc.csum_type, nonce, src); 755 if (ret) 756 goto decrypt_err; 757 758 promote_start(rbio->promote, rbio); 759 rbio->promote = NULL; 760 } 761 nodecode: 762 if (likely(!(rbio->flags & BCH_READ_IN_RETRY))) { 763 rbio = bch2_rbio_free(rbio); 764 bch2_rbio_done(rbio); 765 } 766 out: 767 memalloc_nofs_restore(nofs_flags); 768 return; 769 csum_err: 770 /* 771 * Checksum error: if the bio wasn't bounced, we may have been 772 * reading into buffers owned by userspace (that userspace can 773 * scribble over) - retry the read, bouncing it this time: 774 */ 775 if (!rbio->bounce && (rbio->flags & BCH_READ_USER_MAPPED)) { 776 rbio->flags |= BCH_READ_MUST_BOUNCE; 777 bch2_rbio_error(rbio, READ_RETRY, BLK_STS_IOERR); 778 goto out; 779 } 780 781 bch2_rbio_punt(rbio, bch2_read_csum_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); 782 goto out; 783 decompression_err: 784 bch2_rbio_punt(rbio, bch2_read_decompress_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); 785 goto out; 786 decrypt_err: 787 bch2_rbio_punt(rbio, bch2_read_decrypt_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); 788 goto out; 789 } 790 791 static void bch2_read_endio(struct bio *bio) 792 { 793 struct bch_read_bio *rbio = 794 container_of(bio, struct bch_read_bio, bio); 795 struct bch_fs *c = rbio->c; 796 struct bch_dev *ca = rbio->have_ioref ? bch2_dev_have_ref(c, rbio->pick.ptr.dev) : NULL; 797 struct workqueue_struct *wq = NULL; 798 enum rbio_context context = RBIO_CONTEXT_NULL; 799 800 if (rbio->have_ioref) { 801 bch2_latency_acct(ca, rbio->submit_time, READ); 802 percpu_ref_put(&ca->io_ref); 803 } 804 805 if (!rbio->split) 806 rbio->bio.bi_end_io = rbio->end_io; 807 808 if (unlikely(bio->bi_status)) { 809 bch2_rbio_punt(rbio, bch2_read_io_err, RBIO_CONTEXT_UNBOUND, system_unbound_wq); 810 return; 811 } 812 813 if (((rbio->flags & BCH_READ_RETRY_IF_STALE) && race_fault()) || 814 (ca && dev_ptr_stale(ca, &rbio->pick.ptr))) { 815 trace_and_count(c, read_reuse_race, &rbio->bio); 816 817 if (rbio->flags & BCH_READ_RETRY_IF_STALE) 818 bch2_rbio_error(rbio, READ_RETRY, BLK_STS_AGAIN); 819 else 820 bch2_rbio_error(rbio, READ_ERR, BLK_STS_AGAIN); 821 return; 822 } 823 824 if (rbio->narrow_crcs || 825 rbio->promote || 826 crc_is_compressed(rbio->pick.crc) || 827 bch2_csum_type_is_encryption(rbio->pick.crc.csum_type)) 828 context = RBIO_CONTEXT_UNBOUND, wq = system_unbound_wq; 829 else if (rbio->pick.crc.csum_type) 830 context = RBIO_CONTEXT_HIGHPRI, wq = system_highpri_wq; 831 832 bch2_rbio_punt(rbio, __bch2_read_endio, context, wq); 833 } 834 835 static noinline void read_from_stale_dirty_pointer(struct btree_trans *trans, 836 struct bch_dev *ca, 837 struct bkey_s_c k, 838 struct bch_extent_ptr ptr) 839 { 840 struct bch_fs *c = trans->c; 841 struct btree_iter iter; 842 struct printbuf buf = PRINTBUF; 843 int ret; 844 845 bch2_trans_iter_init(trans, &iter, BTREE_ID_alloc, 846 PTR_BUCKET_POS(ca, &ptr), 847 BTREE_ITER_cached); 848 849 int gen = bucket_gen_get(ca, iter.pos.offset); 850 if (gen >= 0) { 851 prt_printf(&buf, "Attempting to read from stale dirty pointer:\n"); 852 printbuf_indent_add(&buf, 2); 853 854 bch2_bkey_val_to_text(&buf, c, k); 855 prt_newline(&buf); 856 857 prt_printf(&buf, "memory gen: %u", gen); 858 859 ret = lockrestart_do(trans, bkey_err(k = bch2_btree_iter_peek_slot(&iter))); 860 if (!ret) { 861 prt_newline(&buf); 862 bch2_bkey_val_to_text(&buf, c, k); 863 } 864 } else { 865 prt_printf(&buf, "Attempting to read from invalid bucket %llu:%llu:\n", 866 iter.pos.inode, iter.pos.offset); 867 printbuf_indent_add(&buf, 2); 868 869 prt_printf(&buf, "first bucket %u nbuckets %llu\n", 870 ca->mi.first_bucket, ca->mi.nbuckets); 871 872 bch2_bkey_val_to_text(&buf, c, k); 873 prt_newline(&buf); 874 } 875 876 bch2_fs_inconsistent(c, "%s", buf.buf); 877 878 bch2_trans_iter_exit(trans, &iter); 879 printbuf_exit(&buf); 880 } 881 882 int __bch2_read_extent(struct btree_trans *trans, struct bch_read_bio *orig, 883 struct bvec_iter iter, struct bpos read_pos, 884 enum btree_id data_btree, struct bkey_s_c k, 885 unsigned offset_into_extent, 886 struct bch_io_failures *failed, unsigned flags) 887 { 888 struct bch_fs *c = trans->c; 889 struct extent_ptr_decoded pick; 890 struct bch_read_bio *rbio = NULL; 891 struct promote_op *promote = NULL; 892 bool bounce = false, read_full = false, narrow_crcs = false; 893 struct bpos data_pos = bkey_start_pos(k.k); 894 int pick_ret; 895 896 if (bkey_extent_is_inline_data(k.k)) { 897 unsigned bytes = min_t(unsigned, iter.bi_size, 898 bkey_inline_data_bytes(k.k)); 899 900 swap(iter.bi_size, bytes); 901 memcpy_to_bio(&orig->bio, iter, bkey_inline_data_p(k)); 902 swap(iter.bi_size, bytes); 903 bio_advance_iter(&orig->bio, &iter, bytes); 904 zero_fill_bio_iter(&orig->bio, iter); 905 goto out_read_done; 906 } 907 retry_pick: 908 pick_ret = bch2_bkey_pick_read_device(c, k, failed, &pick); 909 910 /* hole or reservation - just zero fill: */ 911 if (!pick_ret) 912 goto hole; 913 914 if (unlikely(pick_ret < 0)) { 915 struct printbuf buf = PRINTBUF; 916 bch2_read_err_msg_trans(trans, &buf, orig, read_pos); 917 prt_printf(&buf, "no device to read from: %s\n ", bch2_err_str(pick_ret)); 918 bch2_bkey_val_to_text(&buf, c, k); 919 920 bch_err_ratelimited(c, "%s", buf.buf); 921 printbuf_exit(&buf); 922 goto err; 923 } 924 925 if (unlikely(bch2_csum_type_is_encryption(pick.crc.csum_type)) && !c->chacha20) { 926 struct printbuf buf = PRINTBUF; 927 bch2_read_err_msg_trans(trans, &buf, orig, read_pos); 928 prt_printf(&buf, "attempting to read encrypted data without encryption key\n "); 929 bch2_bkey_val_to_text(&buf, c, k); 930 931 bch_err_ratelimited(c, "%s", buf.buf); 932 printbuf_exit(&buf); 933 goto err; 934 } 935 936 struct bch_dev *ca = bch2_dev_get_ioref(c, pick.ptr.dev, READ); 937 938 /* 939 * Stale dirty pointers are treated as IO errors, but @failed isn't 940 * allocated unless we're in the retry path - so if we're not in the 941 * retry path, don't check here, it'll be caught in bch2_read_endio() 942 * and we'll end up in the retry path: 943 */ 944 if ((flags & BCH_READ_IN_RETRY) && 945 !pick.ptr.cached && 946 ca && 947 unlikely(dev_ptr_stale(ca, &pick.ptr))) { 948 read_from_stale_dirty_pointer(trans, ca, k, pick.ptr); 949 bch2_mark_io_failure(failed, &pick); 950 percpu_ref_put(&ca->io_ref); 951 goto retry_pick; 952 } 953 954 /* 955 * Unlock the iterator while the btree node's lock is still in 956 * cache, before doing the IO: 957 */ 958 bch2_trans_unlock(trans); 959 960 if (flags & BCH_READ_NODECODE) { 961 /* 962 * can happen if we retry, and the extent we were going to read 963 * has been merged in the meantime: 964 */ 965 if (pick.crc.compressed_size > orig->bio.bi_vcnt * PAGE_SECTORS) { 966 if (ca) 967 percpu_ref_put(&ca->io_ref); 968 goto hole; 969 } 970 971 iter.bi_size = pick.crc.compressed_size << 9; 972 goto get_bio; 973 } 974 975 if (!(flags & BCH_READ_LAST_FRAGMENT) || 976 bio_flagged(&orig->bio, BIO_CHAIN)) 977 flags |= BCH_READ_MUST_CLONE; 978 979 narrow_crcs = !(flags & BCH_READ_IN_RETRY) && 980 bch2_can_narrow_extent_crcs(k, pick.crc); 981 982 if (narrow_crcs && (flags & BCH_READ_USER_MAPPED)) 983 flags |= BCH_READ_MUST_BOUNCE; 984 985 EBUG_ON(offset_into_extent + bvec_iter_sectors(iter) > k.k->size); 986 987 if (crc_is_compressed(pick.crc) || 988 (pick.crc.csum_type != BCH_CSUM_none && 989 (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || 990 (bch2_csum_type_is_encryption(pick.crc.csum_type) && 991 (flags & BCH_READ_USER_MAPPED)) || 992 (flags & BCH_READ_MUST_BOUNCE)))) { 993 read_full = true; 994 bounce = true; 995 } 996 997 if (orig->opts.promote_target || have_io_error(failed)) 998 promote = promote_alloc(trans, iter, k, &pick, orig->opts, flags, 999 &rbio, &bounce, &read_full, failed); 1000 1001 if (!read_full) { 1002 EBUG_ON(crc_is_compressed(pick.crc)); 1003 EBUG_ON(pick.crc.csum_type && 1004 (bvec_iter_sectors(iter) != pick.crc.uncompressed_size || 1005 bvec_iter_sectors(iter) != pick.crc.live_size || 1006 pick.crc.offset || 1007 offset_into_extent)); 1008 1009 data_pos.offset += offset_into_extent; 1010 pick.ptr.offset += pick.crc.offset + 1011 offset_into_extent; 1012 offset_into_extent = 0; 1013 pick.crc.compressed_size = bvec_iter_sectors(iter); 1014 pick.crc.uncompressed_size = bvec_iter_sectors(iter); 1015 pick.crc.offset = 0; 1016 pick.crc.live_size = bvec_iter_sectors(iter); 1017 } 1018 get_bio: 1019 if (rbio) { 1020 /* 1021 * promote already allocated bounce rbio: 1022 * promote needs to allocate a bio big enough for uncompressing 1023 * data in the write path, but we're not going to use it all 1024 * here: 1025 */ 1026 EBUG_ON(rbio->bio.bi_iter.bi_size < 1027 pick.crc.compressed_size << 9); 1028 rbio->bio.bi_iter.bi_size = 1029 pick.crc.compressed_size << 9; 1030 } else if (bounce) { 1031 unsigned sectors = pick.crc.compressed_size; 1032 1033 rbio = rbio_init(bio_alloc_bioset(NULL, 1034 DIV_ROUND_UP(sectors, PAGE_SECTORS), 1035 0, 1036 GFP_NOFS, 1037 &c->bio_read_split), 1038 orig->opts); 1039 1040 bch2_bio_alloc_pages_pool(c, &rbio->bio, sectors << 9); 1041 rbio->bounce = true; 1042 rbio->split = true; 1043 } else if (flags & BCH_READ_MUST_CLONE) { 1044 /* 1045 * Have to clone if there were any splits, due to error 1046 * reporting issues (if a split errored, and retrying didn't 1047 * work, when it reports the error to its parent (us) we don't 1048 * know if the error was from our bio, and we should retry, or 1049 * from the whole bio, in which case we don't want to retry and 1050 * lose the error) 1051 */ 1052 rbio = rbio_init(bio_alloc_clone(NULL, &orig->bio, GFP_NOFS, 1053 &c->bio_read_split), 1054 orig->opts); 1055 rbio->bio.bi_iter = iter; 1056 rbio->split = true; 1057 } else { 1058 rbio = orig; 1059 rbio->bio.bi_iter = iter; 1060 EBUG_ON(bio_flagged(&rbio->bio, BIO_CHAIN)); 1061 } 1062 1063 EBUG_ON(bio_sectors(&rbio->bio) != pick.crc.compressed_size); 1064 1065 rbio->c = c; 1066 rbio->submit_time = local_clock(); 1067 if (rbio->split) 1068 rbio->parent = orig; 1069 else 1070 rbio->end_io = orig->bio.bi_end_io; 1071 rbio->bvec_iter = iter; 1072 rbio->offset_into_extent= offset_into_extent; 1073 rbio->flags = flags; 1074 rbio->have_ioref = ca != NULL; 1075 rbio->narrow_crcs = narrow_crcs; 1076 rbio->hole = 0; 1077 rbio->retry = 0; 1078 rbio->context = 0; 1079 /* XXX: only initialize this if needed */ 1080 rbio->devs_have = bch2_bkey_devs(k); 1081 rbio->pick = pick; 1082 rbio->subvol = orig->subvol; 1083 rbio->read_pos = read_pos; 1084 rbio->data_btree = data_btree; 1085 rbio->data_pos = data_pos; 1086 rbio->version = k.k->bversion; 1087 rbio->promote = promote; 1088 INIT_WORK(&rbio->work, NULL); 1089 1090 if (flags & BCH_READ_NODECODE) 1091 orig->pick = pick; 1092 1093 rbio->bio.bi_opf = orig->bio.bi_opf; 1094 rbio->bio.bi_iter.bi_sector = pick.ptr.offset; 1095 rbio->bio.bi_end_io = bch2_read_endio; 1096 1097 if (rbio->bounce) 1098 trace_and_count(c, read_bounce, &rbio->bio); 1099 1100 this_cpu_add(c->counters[BCH_COUNTER_io_read], bio_sectors(&rbio->bio)); 1101 bch2_increment_clock(c, bio_sectors(&rbio->bio), READ); 1102 1103 /* 1104 * If it's being moved internally, we don't want to flag it as a cache 1105 * hit: 1106 */ 1107 if (ca && pick.ptr.cached && !(flags & BCH_READ_NODECODE)) 1108 bch2_bucket_io_time_reset(trans, pick.ptr.dev, 1109 PTR_BUCKET_NR(ca, &pick.ptr), READ); 1110 1111 if (!(flags & (BCH_READ_IN_RETRY|BCH_READ_LAST_FRAGMENT))) { 1112 bio_inc_remaining(&orig->bio); 1113 trace_and_count(c, read_split, &orig->bio); 1114 } 1115 1116 if (!rbio->pick.idx) { 1117 if (unlikely(!rbio->have_ioref)) { 1118 struct printbuf buf = PRINTBUF; 1119 bch2_read_err_msg_trans(trans, &buf, rbio, read_pos); 1120 prt_printf(&buf, "no device to read from:\n "); 1121 bch2_bkey_val_to_text(&buf, c, k); 1122 1123 bch_err_ratelimited(c, "%s", buf.buf); 1124 printbuf_exit(&buf); 1125 1126 bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); 1127 goto out; 1128 } 1129 1130 this_cpu_add(ca->io_done->sectors[READ][BCH_DATA_user], 1131 bio_sectors(&rbio->bio)); 1132 bio_set_dev(&rbio->bio, ca->disk_sb.bdev); 1133 1134 if (unlikely(c->opts.no_data_io)) { 1135 if (likely(!(flags & BCH_READ_IN_RETRY))) 1136 bio_endio(&rbio->bio); 1137 } else { 1138 if (likely(!(flags & BCH_READ_IN_RETRY))) 1139 submit_bio(&rbio->bio); 1140 else 1141 submit_bio_wait(&rbio->bio); 1142 } 1143 1144 /* 1145 * We just submitted IO which may block, we expect relock fail 1146 * events and shouldn't count them: 1147 */ 1148 trans->notrace_relock_fail = true; 1149 } else { 1150 /* Attempting reconstruct read: */ 1151 if (bch2_ec_read_extent(trans, rbio, k)) { 1152 bch2_rbio_error(rbio, READ_RETRY_AVOID, BLK_STS_IOERR); 1153 goto out; 1154 } 1155 1156 if (likely(!(flags & BCH_READ_IN_RETRY))) 1157 bio_endio(&rbio->bio); 1158 } 1159 out: 1160 if (likely(!(flags & BCH_READ_IN_RETRY))) { 1161 return 0; 1162 } else { 1163 int ret; 1164 1165 rbio->context = RBIO_CONTEXT_UNBOUND; 1166 bch2_read_endio(&rbio->bio); 1167 1168 ret = rbio->retry; 1169 rbio = bch2_rbio_free(rbio); 1170 1171 if (ret == READ_RETRY_AVOID) { 1172 bch2_mark_io_failure(failed, &pick); 1173 ret = READ_RETRY; 1174 } 1175 1176 if (!ret) 1177 goto out_read_done; 1178 1179 return ret; 1180 } 1181 1182 err: 1183 if (flags & BCH_READ_IN_RETRY) 1184 return READ_ERR; 1185 1186 orig->bio.bi_status = BLK_STS_IOERR; 1187 goto out_read_done; 1188 1189 hole: 1190 /* 1191 * won't normally happen in the BCH_READ_NODECODE 1192 * (bch2_move_extent()) path, but if we retry and the extent we wanted 1193 * to read no longer exists we have to signal that: 1194 */ 1195 if (flags & BCH_READ_NODECODE) 1196 orig->hole = true; 1197 1198 zero_fill_bio_iter(&orig->bio, iter); 1199 out_read_done: 1200 if (flags & BCH_READ_LAST_FRAGMENT) 1201 bch2_rbio_done(orig); 1202 return 0; 1203 } 1204 1205 void __bch2_read(struct bch_fs *c, struct bch_read_bio *rbio, 1206 struct bvec_iter bvec_iter, subvol_inum inum, 1207 struct bch_io_failures *failed, unsigned flags) 1208 { 1209 struct btree_trans *trans = bch2_trans_get(c); 1210 struct btree_iter iter; 1211 struct bkey_buf sk; 1212 struct bkey_s_c k; 1213 int ret; 1214 1215 BUG_ON(flags & BCH_READ_NODECODE); 1216 1217 bch2_bkey_buf_init(&sk); 1218 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, 1219 POS(inum.inum, bvec_iter.bi_sector), 1220 BTREE_ITER_slots); 1221 1222 while (1) { 1223 enum btree_id data_btree = BTREE_ID_extents; 1224 1225 bch2_trans_begin(trans); 1226 1227 u32 snapshot; 1228 ret = bch2_subvolume_get_snapshot(trans, inum.subvol, &snapshot); 1229 if (ret) 1230 goto err; 1231 1232 bch2_btree_iter_set_snapshot(&iter, snapshot); 1233 1234 bch2_btree_iter_set_pos(&iter, 1235 POS(inum.inum, bvec_iter.bi_sector)); 1236 1237 k = bch2_btree_iter_peek_slot(&iter); 1238 ret = bkey_err(k); 1239 if (ret) 1240 goto err; 1241 1242 s64 offset_into_extent = iter.pos.offset - 1243 bkey_start_offset(k.k); 1244 unsigned sectors = k.k->size - offset_into_extent; 1245 1246 bch2_bkey_buf_reassemble(&sk, c, k); 1247 1248 ret = bch2_read_indirect_extent(trans, &data_btree, 1249 &offset_into_extent, &sk); 1250 if (ret) 1251 goto err; 1252 1253 k = bkey_i_to_s_c(sk.k); 1254 1255 /* 1256 * With indirect extents, the amount of data to read is the min 1257 * of the original extent and the indirect extent: 1258 */ 1259 sectors = min_t(unsigned, sectors, k.k->size - offset_into_extent); 1260 1261 unsigned bytes = min(sectors, bvec_iter_sectors(bvec_iter)) << 9; 1262 swap(bvec_iter.bi_size, bytes); 1263 1264 if (bvec_iter.bi_size == bytes) 1265 flags |= BCH_READ_LAST_FRAGMENT; 1266 1267 ret = __bch2_read_extent(trans, rbio, bvec_iter, iter.pos, 1268 data_btree, k, 1269 offset_into_extent, failed, flags); 1270 if (ret) 1271 goto err; 1272 1273 if (flags & BCH_READ_LAST_FRAGMENT) 1274 break; 1275 1276 swap(bvec_iter.bi_size, bytes); 1277 bio_advance_iter(&rbio->bio, &bvec_iter, bytes); 1278 err: 1279 if (ret && 1280 !bch2_err_matches(ret, BCH_ERR_transaction_restart) && 1281 ret != READ_RETRY && 1282 ret != READ_RETRY_AVOID) 1283 break; 1284 } 1285 1286 bch2_trans_iter_exit(trans, &iter); 1287 1288 if (ret) { 1289 struct printbuf buf = PRINTBUF; 1290 bch2_inum_offset_err_msg_trans(trans, &buf, inum, bvec_iter.bi_sector << 9); 1291 prt_printf(&buf, "read error %i from btree lookup", ret); 1292 bch_err_ratelimited(c, "%s", buf.buf); 1293 printbuf_exit(&buf); 1294 1295 rbio->bio.bi_status = BLK_STS_IOERR; 1296 bch2_rbio_done(rbio); 1297 } 1298 1299 bch2_trans_put(trans); 1300 bch2_bkey_buf_exit(&sk, c); 1301 } 1302 1303 void bch2_fs_io_read_exit(struct bch_fs *c) 1304 { 1305 if (c->promote_table.tbl) 1306 rhashtable_destroy(&c->promote_table); 1307 bioset_exit(&c->bio_read_split); 1308 bioset_exit(&c->bio_read); 1309 } 1310 1311 int bch2_fs_io_read_init(struct bch_fs *c) 1312 { 1313 if (bioset_init(&c->bio_read, 1, offsetof(struct bch_read_bio, bio), 1314 BIOSET_NEED_BVECS)) 1315 return -BCH_ERR_ENOMEM_bio_read_init; 1316 1317 if (bioset_init(&c->bio_read_split, 1, offsetof(struct bch_read_bio, bio), 1318 BIOSET_NEED_BVECS)) 1319 return -BCH_ERR_ENOMEM_bio_read_split_init; 1320 1321 if (rhashtable_init(&c->promote_table, &bch_promote_params)) 1322 return -BCH_ERR_ENOMEM_promote_table_init; 1323 1324 return 0; 1325 } 1326