1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "bbpos.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "bkey_buf.h" 7 #include "btree_cache.h" 8 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_write_buffer.h" 11 #include "checksum.h" 12 #include "error.h" 13 14 #include <linux/mm.h> 15 16 static bool extent_matches_bp(struct bch_fs *c, 17 enum btree_id btree_id, unsigned level, 18 struct bkey_s_c k, 19 struct bpos bucket, 20 struct bch_backpointer bp) 21 { 22 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 23 const union bch_extent_entry *entry; 24 struct extent_ptr_decoded p; 25 26 rcu_read_lock(); 27 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 28 struct bpos bucket2; 29 struct bch_backpointer bp2; 30 31 if (p.ptr.cached) 32 continue; 33 34 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 35 if (!ca) 36 continue; 37 38 bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); 39 if (bpos_eq(bucket, bucket2) && 40 !memcmp(&bp, &bp2, sizeof(bp))) { 41 rcu_read_unlock(); 42 return true; 43 } 44 } 45 rcu_read_unlock(); 46 47 return false; 48 } 49 50 int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, 51 enum bch_validate_flags flags) 52 { 53 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 54 55 rcu_read_lock(); 56 struct bch_dev *ca = bch2_dev_rcu(c, bp.k->p.inode); 57 if (!ca) { 58 /* these will be caught by fsck */ 59 rcu_read_unlock(); 60 return 0; 61 } 62 63 struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); 64 struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); 65 rcu_read_unlock(); 66 int ret = 0; 67 68 bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || 69 !bpos_eq(bp.k->p, bp_pos), 70 c, backpointer_bucket_offset_wrong, 71 "backpointer bucket_offset wrong"); 72 fsck_err: 73 return ret; 74 } 75 76 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) 77 { 78 prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", 79 bch2_btree_id_str(bp->btree_id), 80 bp->level, 81 (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), 82 (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), 83 bp->bucket_len); 84 bch2_bpos_to_text(out, bp->pos); 85 } 86 87 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 88 { 89 rcu_read_lock(); 90 struct bch_dev *ca = bch2_dev_rcu(c, k.k->p.inode); 91 if (ca) { 92 struct bpos bucket = bp_pos_to_bucket(ca, k.k->p); 93 rcu_read_unlock(); 94 prt_str(out, "bucket="); 95 bch2_bpos_to_text(out, bucket); 96 prt_str(out, " "); 97 } else { 98 rcu_read_unlock(); 99 } 100 101 bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); 102 } 103 104 void bch2_backpointer_swab(struct bkey_s k) 105 { 106 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); 107 108 bp.v->bucket_offset = swab40(bp.v->bucket_offset); 109 bp.v->bucket_len = swab32(bp.v->bucket_len); 110 bch2_bpos_swab(&bp.v->pos); 111 } 112 113 static noinline int backpointer_mod_err(struct btree_trans *trans, 114 struct bch_backpointer bp, 115 struct bkey_s_c bp_k, 116 struct bkey_s_c orig_k, 117 bool insert) 118 { 119 struct bch_fs *c = trans->c; 120 struct printbuf buf = PRINTBUF; 121 122 if (insert) { 123 prt_printf(&buf, "existing backpointer found when inserting "); 124 bch2_backpointer_to_text(&buf, &bp); 125 prt_newline(&buf); 126 printbuf_indent_add(&buf, 2); 127 128 prt_printf(&buf, "found "); 129 bch2_bkey_val_to_text(&buf, c, bp_k); 130 prt_newline(&buf); 131 132 prt_printf(&buf, "for "); 133 bch2_bkey_val_to_text(&buf, c, orig_k); 134 135 bch_err(c, "%s", buf.buf); 136 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 137 prt_printf(&buf, "backpointer not found when deleting\n"); 138 printbuf_indent_add(&buf, 2); 139 140 prt_printf(&buf, "searching for "); 141 bch2_backpointer_to_text(&buf, &bp); 142 prt_newline(&buf); 143 144 prt_printf(&buf, "got "); 145 bch2_bkey_val_to_text(&buf, c, bp_k); 146 prt_newline(&buf); 147 148 prt_printf(&buf, "for "); 149 bch2_bkey_val_to_text(&buf, c, orig_k); 150 151 bch_err(c, "%s", buf.buf); 152 } 153 154 printbuf_exit(&buf); 155 156 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 157 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; 158 } else { 159 return 0; 160 } 161 } 162 163 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, 164 struct bch_dev *ca, 165 struct bpos bucket, 166 struct bch_backpointer bp, 167 struct bkey_s_c orig_k, 168 bool insert) 169 { 170 struct btree_iter bp_iter; 171 struct bkey_s_c k; 172 struct bkey_i_backpointer *bp_k; 173 int ret; 174 175 bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); 176 ret = PTR_ERR_OR_ZERO(bp_k); 177 if (ret) 178 return ret; 179 180 bkey_backpointer_init(&bp_k->k_i); 181 bp_k->k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); 182 bp_k->v = bp; 183 184 if (!insert) { 185 bp_k->k.type = KEY_TYPE_deleted; 186 set_bkey_val_u64s(&bp_k->k, 0); 187 } 188 189 k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 190 bp_k->k.p, 191 BTREE_ITER_intent| 192 BTREE_ITER_slots| 193 BTREE_ITER_with_updates); 194 ret = bkey_err(k); 195 if (ret) 196 goto err; 197 198 if (insert 199 ? k.k->type 200 : (k.k->type != KEY_TYPE_backpointer || 201 memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { 202 ret = backpointer_mod_err(trans, bp, k, orig_k, insert); 203 if (ret) 204 goto err; 205 } 206 207 ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); 208 err: 209 bch2_trans_iter_exit(trans, &bp_iter); 210 return ret; 211 } 212 213 /* 214 * Find the next backpointer >= *bp_offset: 215 */ 216 int bch2_get_next_backpointer(struct btree_trans *trans, 217 struct bch_dev *ca, 218 struct bpos bucket, int gen, 219 struct bpos *bp_pos, 220 struct bch_backpointer *bp, 221 unsigned iter_flags) 222 { 223 struct bpos bp_end_pos = bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0); 224 struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL }; 225 struct bkey_s_c k; 226 int ret = 0; 227 228 if (bpos_ge(*bp_pos, bp_end_pos)) 229 goto done; 230 231 if (gen >= 0) { 232 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, 233 bucket, BTREE_ITER_cached|iter_flags); 234 ret = bkey_err(k); 235 if (ret) 236 goto out; 237 238 if (k.k->type != KEY_TYPE_alloc_v4 || 239 bkey_s_c_to_alloc_v4(k).v->gen != gen) 240 goto done; 241 } 242 243 *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(ca, bucket, 0)); 244 245 for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, 246 *bp_pos, iter_flags, k, ret) { 247 if (bpos_ge(k.k->p, bp_end_pos)) 248 break; 249 250 *bp_pos = k.k->p; 251 *bp = *bkey_s_c_to_backpointer(k).v; 252 goto out; 253 } 254 done: 255 *bp_pos = SPOS_MAX; 256 out: 257 bch2_trans_iter_exit(trans, &bp_iter); 258 bch2_trans_iter_exit(trans, &alloc_iter); 259 return ret; 260 } 261 262 static void backpointer_not_found(struct btree_trans *trans, 263 struct bpos bp_pos, 264 struct bch_backpointer bp, 265 struct bkey_s_c k) 266 { 267 struct bch_fs *c = trans->c; 268 struct printbuf buf = PRINTBUF; 269 270 /* 271 * If we're using the btree write buffer, the backpointer we were 272 * looking at may have already been deleted - failure to find what it 273 * pointed to is not an error: 274 */ 275 if (likely(!bch2_backpointers_no_use_write_buffer)) 276 return; 277 278 struct bpos bucket; 279 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 280 return; 281 282 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", 283 bp.level ? "btree node" : "extent"); 284 prt_printf(&buf, "bucket: "); 285 bch2_bpos_to_text(&buf, bucket); 286 prt_printf(&buf, "\n "); 287 288 prt_printf(&buf, "backpointer pos: "); 289 bch2_bpos_to_text(&buf, bp_pos); 290 prt_printf(&buf, "\n "); 291 292 bch2_backpointer_to_text(&buf, &bp); 293 prt_printf(&buf, "\n "); 294 bch2_bkey_val_to_text(&buf, c, k); 295 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) 296 bch_err_ratelimited(c, "%s", buf.buf); 297 else 298 bch2_trans_inconsistent(trans, "%s", buf.buf); 299 300 printbuf_exit(&buf); 301 } 302 303 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 304 struct btree_iter *iter, 305 struct bpos bp_pos, 306 struct bch_backpointer bp, 307 unsigned iter_flags) 308 { 309 if (likely(!bp.level)) { 310 struct bch_fs *c = trans->c; 311 312 struct bpos bucket; 313 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 314 return bkey_s_c_err(-EIO); 315 316 bch2_trans_node_iter_init(trans, iter, 317 bp.btree_id, 318 bp.pos, 319 0, 0, 320 iter_flags); 321 struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); 322 if (bkey_err(k)) { 323 bch2_trans_iter_exit(trans, iter); 324 return k; 325 } 326 327 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) 328 return k; 329 330 bch2_trans_iter_exit(trans, iter); 331 backpointer_not_found(trans, bp_pos, bp, k); 332 return bkey_s_c_null; 333 } else { 334 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); 335 336 if (IS_ERR_OR_NULL(b)) { 337 bch2_trans_iter_exit(trans, iter); 338 return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; 339 } 340 return bkey_i_to_s_c(&b->key); 341 } 342 } 343 344 struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 345 struct btree_iter *iter, 346 struct bpos bp_pos, 347 struct bch_backpointer bp) 348 { 349 struct bch_fs *c = trans->c; 350 351 BUG_ON(!bp.level); 352 353 struct bpos bucket; 354 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 355 return ERR_PTR(-EIO); 356 357 bch2_trans_node_iter_init(trans, iter, 358 bp.btree_id, 359 bp.pos, 360 0, 361 bp.level - 1, 362 0); 363 struct btree *b = bch2_btree_iter_peek_node(iter); 364 if (IS_ERR_OR_NULL(b)) 365 goto err; 366 367 BUG_ON(b->c.level != bp.level - 1); 368 369 if (extent_matches_bp(c, bp.btree_id, bp.level, 370 bkey_i_to_s_c(&b->key), 371 bucket, bp)) 372 return b; 373 374 if (btree_node_will_make_reachable(b)) { 375 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); 376 } else { 377 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); 378 b = NULL; 379 } 380 err: 381 bch2_trans_iter_exit(trans, iter); 382 return b; 383 } 384 385 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, 386 struct bkey_s_c k) 387 { 388 struct bch_fs *c = trans->c; 389 struct btree_iter alloc_iter = { NULL }; 390 struct bkey_s_c alloc_k; 391 struct printbuf buf = PRINTBUF; 392 int ret = 0; 393 394 struct bpos bucket; 395 if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { 396 if (fsck_err(trans, backpointer_to_missing_device, 397 "backpointer for missing device:\n%s", 398 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 399 ret = bch2_btree_delete_at(trans, bp_iter, 0); 400 goto out; 401 } 402 403 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0); 404 ret = bkey_err(alloc_k); 405 if (ret) 406 goto out; 407 408 if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, 409 trans, backpointer_to_missing_alloc, 410 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", 411 alloc_iter.pos.inode, alloc_iter.pos.offset, 412 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 413 ret = bch2_btree_delete_at(trans, bp_iter, 0); 414 goto out; 415 } 416 out: 417 fsck_err: 418 bch2_trans_iter_exit(trans, &alloc_iter); 419 printbuf_exit(&buf); 420 return ret; 421 } 422 423 /* verify that every backpointer has a corresponding alloc key */ 424 int bch2_check_btree_backpointers(struct bch_fs *c) 425 { 426 int ret = bch2_trans_run(c, 427 for_each_btree_key_commit(trans, iter, 428 BTREE_ID_backpointers, POS_MIN, 0, k, 429 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 430 bch2_check_btree_backpointer(trans, &iter, k))); 431 bch_err_fn(c, ret); 432 return ret; 433 } 434 435 struct extents_to_bp_state { 436 struct bpos bucket_start; 437 struct bpos bucket_end; 438 struct bkey_buf last_flushed; 439 }; 440 441 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, 442 struct bkey_s_c extent, unsigned dev) 443 { 444 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); 445 int ret = PTR_ERR_OR_ZERO(n); 446 if (ret) 447 return ret; 448 449 bch2_bkey_drop_device(bkey_i_to_s(n), dev); 450 return bch2_btree_insert_trans(trans, btree, n, 0); 451 } 452 453 static int check_extent_checksum(struct btree_trans *trans, 454 enum btree_id btree, struct bkey_s_c extent, 455 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) 456 { 457 struct bch_fs *c = trans->c; 458 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); 459 const union bch_extent_entry *entry; 460 struct extent_ptr_decoded p; 461 struct printbuf buf = PRINTBUF; 462 void *data_buf = NULL; 463 struct bio *bio = NULL; 464 size_t bytes; 465 int ret = 0; 466 467 if (bkey_is_btree_ptr(extent.k)) 468 return false; 469 470 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) 471 if (p.ptr.dev == dev) 472 goto found; 473 BUG(); 474 found: 475 if (!p.crc.csum_type) 476 return false; 477 478 bytes = p.crc.compressed_size << 9; 479 480 struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ); 481 if (!ca) 482 return false; 483 484 data_buf = kvmalloc(bytes, GFP_KERNEL); 485 if (!data_buf) { 486 ret = -ENOMEM; 487 goto err; 488 } 489 490 bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); 491 bio->bi_iter.bi_sector = p.ptr.offset; 492 bch2_bio_map(bio, data_buf, bytes); 493 ret = submit_bio_wait(bio); 494 if (ret) 495 goto err; 496 497 prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); 498 prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); 499 bch2_bkey_val_to_text(&buf, c, extent); 500 prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); 501 bch2_bkey_val_to_text(&buf, c, extent2); 502 503 struct nonce nonce = extent_nonce(extent.k->version, p.crc); 504 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); 505 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), 506 trans, dup_backpointer_to_bad_csum_extent, 507 "%s", buf.buf)) 508 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; 509 fsck_err: 510 err: 511 if (bio) 512 bio_put(bio); 513 kvfree(data_buf); 514 percpu_ref_put(&ca->io_ref); 515 printbuf_exit(&buf); 516 return ret; 517 } 518 519 static int check_bp_exists(struct btree_trans *trans, 520 struct extents_to_bp_state *s, 521 struct bpos bucket, 522 struct bch_backpointer bp, 523 struct bkey_s_c orig_k) 524 { 525 struct bch_fs *c = trans->c; 526 struct btree_iter bp_iter = {}; 527 struct btree_iter other_extent_iter = {}; 528 struct printbuf buf = PRINTBUF; 529 struct bkey_s_c bp_k; 530 int ret = 0; 531 532 struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); 533 if (!ca) { 534 prt_str(&buf, "extent for nonexistent device:bucket "); 535 bch2_bpos_to_text(&buf, bucket); 536 prt_str(&buf, "\n "); 537 bch2_bkey_val_to_text(&buf, c, orig_k); 538 bch_err(c, "%s", buf.buf); 539 ret = -BCH_ERR_fsck_repair_unimplemented; 540 goto err; 541 } 542 543 if (bpos_lt(bucket, s->bucket_start) || 544 bpos_gt(bucket, s->bucket_end)) 545 goto out; 546 547 bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 548 bucket_pos_to_bp(ca, bucket, bp.bucket_offset), 549 0); 550 ret = bkey_err(bp_k); 551 if (ret) 552 goto err; 553 554 if (bp_k.k->type != KEY_TYPE_backpointer || 555 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { 556 ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); 557 if (ret) 558 goto err; 559 560 goto check_existing_bp; 561 } 562 out: 563 err: 564 fsck_err: 565 bch2_trans_iter_exit(trans, &other_extent_iter); 566 bch2_trans_iter_exit(trans, &bp_iter); 567 bch2_dev_put(ca); 568 printbuf_exit(&buf); 569 return ret; 570 check_existing_bp: 571 /* Do we have a backpointer for a different extent? */ 572 if (bp_k.k->type != KEY_TYPE_backpointer) 573 goto missing; 574 575 struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; 576 577 struct bkey_s_c other_extent = 578 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); 579 ret = bkey_err(other_extent); 580 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 581 ret = 0; 582 if (ret) 583 goto err; 584 585 if (!other_extent.k) 586 goto missing; 587 588 if (bch2_extents_match(orig_k, other_extent)) { 589 printbuf_reset(&buf); 590 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); 591 bch2_bkey_val_to_text(&buf, c, orig_k); 592 prt_str(&buf, "\n "); 593 bch2_bkey_val_to_text(&buf, c, other_extent); 594 bch_err(c, "%s", buf.buf); 595 596 if (other_extent.k->size <= orig_k.k->size) { 597 ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); 598 if (ret) 599 goto err; 600 goto out; 601 } else { 602 ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); 603 if (ret) 604 goto err; 605 goto missing; 606 } 607 } 608 609 ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); 610 if (ret < 0) 611 goto err; 612 if (ret) { 613 ret = 0; 614 goto missing; 615 } 616 617 ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); 618 if (ret < 0) 619 goto err; 620 if (ret) { 621 ret = 0; 622 goto out; 623 } 624 625 printbuf_reset(&buf); 626 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); 627 bch2_bkey_val_to_text(&buf, c, orig_k); 628 prt_str(&buf, "\n "); 629 bch2_bkey_val_to_text(&buf, c, other_extent); 630 bch_err(c, "%s", buf.buf); 631 ret = -BCH_ERR_fsck_repair_unimplemented; 632 goto err; 633 missing: 634 printbuf_reset(&buf); 635 prt_printf(&buf, "missing backpointer for btree=%s l=%u ", 636 bch2_btree_id_str(bp.btree_id), bp.level); 637 bch2_bkey_val_to_text(&buf, c, orig_k); 638 prt_printf(&buf, "\n got: "); 639 bch2_bkey_val_to_text(&buf, c, bp_k); 640 641 struct bkey_i_backpointer n_bp_k; 642 bkey_backpointer_init(&n_bp_k.k_i); 643 n_bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); 644 n_bp_k.v = bp; 645 prt_printf(&buf, "\n want: "); 646 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); 647 648 if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) 649 ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, orig_k, true); 650 651 goto out; 652 } 653 654 static int check_extent_to_backpointers(struct btree_trans *trans, 655 struct extents_to_bp_state *s, 656 enum btree_id btree, unsigned level, 657 struct bkey_s_c k) 658 { 659 struct bch_fs *c = trans->c; 660 struct bkey_ptrs_c ptrs; 661 const union bch_extent_entry *entry; 662 struct extent_ptr_decoded p; 663 int ret; 664 665 ptrs = bch2_bkey_ptrs_c(k); 666 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 667 struct bpos bucket_pos = POS_MIN; 668 struct bch_backpointer bp; 669 670 if (p.ptr.cached) 671 continue; 672 673 rcu_read_lock(); 674 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 675 if (ca) 676 bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp); 677 rcu_read_unlock(); 678 679 if (!ca) 680 continue; 681 682 ret = check_bp_exists(trans, s, bucket_pos, bp, k); 683 if (ret) 684 return ret; 685 } 686 687 return 0; 688 } 689 690 static int check_btree_root_to_backpointers(struct btree_trans *trans, 691 struct extents_to_bp_state *s, 692 enum btree_id btree_id, 693 int *level) 694 { 695 struct bch_fs *c = trans->c; 696 struct btree_iter iter; 697 struct btree *b; 698 struct bkey_s_c k; 699 int ret; 700 retry: 701 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 702 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); 703 b = bch2_btree_iter_peek_node(&iter); 704 ret = PTR_ERR_OR_ZERO(b); 705 if (ret) 706 goto err; 707 708 if (b != btree_node_root(c, b)) { 709 bch2_trans_iter_exit(trans, &iter); 710 goto retry; 711 } 712 713 *level = b->c.level; 714 715 k = bkey_i_to_s_c(&b->key); 716 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); 717 err: 718 bch2_trans_iter_exit(trans, &iter); 719 return ret; 720 } 721 722 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) 723 { 724 return (struct bbpos) { 725 .btree = bp.btree_id, 726 .pos = bp.pos, 727 }; 728 } 729 730 static u64 mem_may_pin_bytes(struct bch_fs *c) 731 { 732 struct sysinfo i; 733 si_meminfo(&i); 734 735 u64 mem_bytes = i.totalram * i.mem_unit; 736 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100); 737 } 738 739 static size_t btree_nodes_fit_in_ram(struct bch_fs *c) 740 { 741 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size); 742 } 743 744 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, 745 u64 btree_leaf_mask, 746 u64 btree_interior_mask, 747 struct bbpos start, struct bbpos *end) 748 { 749 struct bch_fs *c = trans->c; 750 s64 mem_may_pin = mem_may_pin_bytes(c); 751 int ret = 0; 752 753 btree_interior_mask |= btree_leaf_mask; 754 755 c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask; 756 c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask; 757 c->btree_cache.pinned_nodes_start = start; 758 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX; 759 760 for (enum btree_id btree = start.btree; 761 btree < BTREE_ID_NR && !ret; 762 btree++) { 763 unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; 764 765 if (!(BIT_ULL(btree) & btree_leaf_mask) && 766 !(BIT_ULL(btree) & btree_interior_mask)) 767 continue; 768 769 ret = __for_each_btree_node(trans, iter, btree, 770 btree == start.btree ? start.pos : POS_MIN, 771 0, depth, BTREE_ITER_prefetch, b, ({ 772 mem_may_pin -= btree_buf_bytes(b); 773 if (mem_may_pin <= 0) { 774 c->btree_cache.pinned_nodes_end = *end = 775 BBPOS(btree, b->key.k.p); 776 break; 777 } 778 0; 779 })); 780 } 781 782 return ret; 783 } 784 785 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, 786 struct extents_to_bp_state *s) 787 { 788 struct bch_fs *c = trans->c; 789 int ret = 0; 790 791 for (enum btree_id btree_id = 0; 792 btree_id < btree_id_nr_alive(c); 793 btree_id++) { 794 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; 795 796 ret = commit_do(trans, NULL, NULL, 797 BCH_TRANS_COMMIT_no_enospc, 798 check_btree_root_to_backpointers(trans, s, btree_id, &level)); 799 if (ret) 800 return ret; 801 802 while (level >= depth) { 803 struct btree_iter iter; 804 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level, 805 BTREE_ITER_prefetch); 806 807 ret = for_each_btree_key_continue(trans, iter, 0, k, ({ 808 check_extent_to_backpointers(trans, s, btree_id, level, k) ?: 809 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 810 })); 811 if (ret) 812 return ret; 813 814 --level; 815 } 816 } 817 818 return 0; 819 } 820 821 int bch2_check_extents_to_backpointers(struct bch_fs *c) 822 { 823 struct btree_trans *trans = bch2_trans_get(c); 824 struct extents_to_bp_state s = { .bucket_start = POS_MIN }; 825 int ret; 826 827 bch2_bkey_buf_init(&s.last_flushed); 828 bkey_init(&s.last_flushed.k->k); 829 830 while (1) { 831 struct bbpos end; 832 ret = bch2_get_btree_in_memory_pos(trans, 833 BIT_ULL(BTREE_ID_backpointers), 834 BIT_ULL(BTREE_ID_backpointers), 835 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end); 836 if (ret) 837 break; 838 839 s.bucket_end = end.pos; 840 841 if ( bpos_eq(s.bucket_start, POS_MIN) && 842 !bpos_eq(s.bucket_end, SPOS_MAX)) 843 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", 844 __func__, btree_nodes_fit_in_ram(c)); 845 846 if (!bpos_eq(s.bucket_start, POS_MIN) || 847 !bpos_eq(s.bucket_end, SPOS_MAX)) { 848 struct printbuf buf = PRINTBUF; 849 850 prt_str(&buf, "check_extents_to_backpointers(): "); 851 bch2_bpos_to_text(&buf, s.bucket_start); 852 prt_str(&buf, "-"); 853 bch2_bpos_to_text(&buf, s.bucket_end); 854 855 bch_verbose(c, "%s", buf.buf); 856 printbuf_exit(&buf); 857 } 858 859 ret = bch2_check_extents_to_backpointers_pass(trans, &s); 860 if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) 861 break; 862 863 s.bucket_start = bpos_successor(s.bucket_end); 864 } 865 bch2_trans_put(trans); 866 bch2_bkey_buf_exit(&s.last_flushed, c); 867 868 c->btree_cache.pinned_nodes_leaf_mask = 0; 869 c->btree_cache.pinned_nodes_interior_mask = 0; 870 871 bch_err_fn(c, ret); 872 return ret; 873 } 874 875 static int check_one_backpointer(struct btree_trans *trans, 876 struct bbpos start, 877 struct bbpos end, 878 struct bkey_s_c_backpointer bp, 879 struct bkey_buf *last_flushed) 880 { 881 struct bch_fs *c = trans->c; 882 struct btree_iter iter; 883 struct bbpos pos = bp_to_bbpos(*bp.v); 884 struct bkey_s_c k; 885 struct printbuf buf = PRINTBUF; 886 int ret; 887 888 if (bbpos_cmp(pos, start) < 0 || 889 bbpos_cmp(pos, end) > 0) 890 return 0; 891 892 k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0); 893 ret = bkey_err(k); 894 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 895 return 0; 896 if (ret) 897 return ret; 898 899 if (!k.k) { 900 ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); 901 if (ret) 902 goto out; 903 904 if (fsck_err(trans, backpointer_to_missing_ptr, 905 "backpointer for missing %s\n %s", 906 bp.v->level ? "btree node" : "extent", 907 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 908 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); 909 goto out; 910 } 911 } 912 out: 913 fsck_err: 914 bch2_trans_iter_exit(trans, &iter); 915 printbuf_exit(&buf); 916 return ret; 917 } 918 919 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, 920 struct bbpos start, 921 struct bbpos end) 922 { 923 struct bkey_buf last_flushed; 924 925 bch2_bkey_buf_init(&last_flushed); 926 bkey_init(&last_flushed.k->k); 927 928 int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 929 POS_MIN, BTREE_ITER_prefetch, k, 930 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 931 check_one_backpointer(trans, start, end, 932 bkey_s_c_to_backpointer(k), 933 &last_flushed)); 934 935 bch2_bkey_buf_exit(&last_flushed, trans->c); 936 return ret; 937 } 938 939 int bch2_check_backpointers_to_extents(struct bch_fs *c) 940 { 941 struct btree_trans *trans = bch2_trans_get(c); 942 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; 943 int ret; 944 945 while (1) { 946 ret = bch2_get_btree_in_memory_pos(trans, 947 BIT_ULL(BTREE_ID_extents)| 948 BIT_ULL(BTREE_ID_reflink), 949 ~0, 950 start, &end); 951 if (ret) 952 break; 953 954 if (!bbpos_cmp(start, BBPOS_MIN) && 955 bbpos_cmp(end, BBPOS_MAX)) 956 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass", 957 __func__, btree_nodes_fit_in_ram(c)); 958 959 if (bbpos_cmp(start, BBPOS_MIN) || 960 bbpos_cmp(end, BBPOS_MAX)) { 961 struct printbuf buf = PRINTBUF; 962 963 prt_str(&buf, "check_backpointers_to_extents(): "); 964 bch2_bbpos_to_text(&buf, start); 965 prt_str(&buf, "-"); 966 bch2_bbpos_to_text(&buf, end); 967 968 bch_verbose(c, "%s", buf.buf); 969 printbuf_exit(&buf); 970 } 971 972 ret = bch2_check_backpointers_to_extents_pass(trans, start, end); 973 if (ret || !bbpos_cmp(end, BBPOS_MAX)) 974 break; 975 976 start = bbpos_successor(end); 977 } 978 bch2_trans_put(trans); 979 980 c->btree_cache.pinned_nodes_leaf_mask = 0; 981 c->btree_cache.pinned_nodes_interior_mask = 0; 982 983 bch_err_fn(c, ret); 984 return ret; 985 } 986