1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "bbpos.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "bkey_buf.h" 7 #include "btree_cache.h" 8 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_write_buffer.h" 11 #include "checksum.h" 12 #include "error.h" 13 14 #include <linux/mm.h> 15 16 static bool extent_matches_bp(struct bch_fs *c, 17 enum btree_id btree_id, unsigned level, 18 struct bkey_s_c k, 19 struct bpos bucket, 20 struct bch_backpointer bp) 21 { 22 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 23 const union bch_extent_entry *entry; 24 struct extent_ptr_decoded p; 25 26 rcu_read_lock(); 27 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 28 struct bpos bucket2; 29 struct bch_backpointer bp2; 30 31 if (p.ptr.cached) 32 continue; 33 34 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 35 if (!ca) 36 continue; 37 38 bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); 39 if (bpos_eq(bucket, bucket2) && 40 !memcmp(&bp, &bp2, sizeof(bp))) { 41 rcu_read_unlock(); 42 return true; 43 } 44 } 45 rcu_read_unlock(); 46 47 return false; 48 } 49 50 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, 51 enum bch_validate_flags flags, 52 struct printbuf *err) 53 { 54 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 55 56 rcu_read_lock(); 57 struct bch_dev *ca = bch2_dev_rcu(c, bp.k->p.inode); 58 if (!ca) { 59 /* these will be caught by fsck */ 60 rcu_read_unlock(); 61 return 0; 62 } 63 64 struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); 65 struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); 66 rcu_read_unlock(); 67 int ret = 0; 68 69 bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || 70 !bpos_eq(bp.k->p, bp_pos), 71 c, err, 72 backpointer_bucket_offset_wrong, 73 "backpointer bucket_offset wrong"); 74 fsck_err: 75 return ret; 76 } 77 78 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) 79 { 80 prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", 81 bch2_btree_id_str(bp->btree_id), 82 bp->level, 83 (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), 84 (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), 85 bp->bucket_len); 86 bch2_bpos_to_text(out, bp->pos); 87 } 88 89 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 90 { 91 rcu_read_lock(); 92 struct bch_dev *ca = bch2_dev_rcu(c, k.k->p.inode); 93 if (ca) { 94 struct bpos bucket = bp_pos_to_bucket(ca, k.k->p); 95 rcu_read_unlock(); 96 prt_str(out, "bucket="); 97 bch2_bpos_to_text(out, bucket); 98 prt_str(out, " "); 99 } else { 100 rcu_read_unlock(); 101 } 102 103 bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); 104 } 105 106 void bch2_backpointer_swab(struct bkey_s k) 107 { 108 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); 109 110 bp.v->bucket_offset = swab40(bp.v->bucket_offset); 111 bp.v->bucket_len = swab32(bp.v->bucket_len); 112 bch2_bpos_swab(&bp.v->pos); 113 } 114 115 static noinline int backpointer_mod_err(struct btree_trans *trans, 116 struct bch_backpointer bp, 117 struct bkey_s_c bp_k, 118 struct bkey_s_c orig_k, 119 bool insert) 120 { 121 struct bch_fs *c = trans->c; 122 struct printbuf buf = PRINTBUF; 123 124 if (insert) { 125 prt_printf(&buf, "existing backpointer found when inserting "); 126 bch2_backpointer_to_text(&buf, &bp); 127 prt_newline(&buf); 128 printbuf_indent_add(&buf, 2); 129 130 prt_printf(&buf, "found "); 131 bch2_bkey_val_to_text(&buf, c, bp_k); 132 prt_newline(&buf); 133 134 prt_printf(&buf, "for "); 135 bch2_bkey_val_to_text(&buf, c, orig_k); 136 137 bch_err(c, "%s", buf.buf); 138 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 139 prt_printf(&buf, "backpointer not found when deleting\n"); 140 printbuf_indent_add(&buf, 2); 141 142 prt_printf(&buf, "searching for "); 143 bch2_backpointer_to_text(&buf, &bp); 144 prt_newline(&buf); 145 146 prt_printf(&buf, "got "); 147 bch2_bkey_val_to_text(&buf, c, bp_k); 148 prt_newline(&buf); 149 150 prt_printf(&buf, "for "); 151 bch2_bkey_val_to_text(&buf, c, orig_k); 152 153 bch_err(c, "%s", buf.buf); 154 } 155 156 printbuf_exit(&buf); 157 158 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 159 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; 160 } else { 161 return 0; 162 } 163 } 164 165 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, 166 struct bch_dev *ca, 167 struct bpos bucket, 168 struct bch_backpointer bp, 169 struct bkey_s_c orig_k, 170 bool insert) 171 { 172 struct btree_iter bp_iter; 173 struct bkey_s_c k; 174 struct bkey_i_backpointer *bp_k; 175 int ret; 176 177 bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); 178 ret = PTR_ERR_OR_ZERO(bp_k); 179 if (ret) 180 return ret; 181 182 bkey_backpointer_init(&bp_k->k_i); 183 bp_k->k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); 184 bp_k->v = bp; 185 186 if (!insert) { 187 bp_k->k.type = KEY_TYPE_deleted; 188 set_bkey_val_u64s(&bp_k->k, 0); 189 } 190 191 k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 192 bp_k->k.p, 193 BTREE_ITER_intent| 194 BTREE_ITER_slots| 195 BTREE_ITER_with_updates); 196 ret = bkey_err(k); 197 if (ret) 198 goto err; 199 200 if (insert 201 ? k.k->type 202 : (k.k->type != KEY_TYPE_backpointer || 203 memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { 204 ret = backpointer_mod_err(trans, bp, k, orig_k, insert); 205 if (ret) 206 goto err; 207 } 208 209 ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); 210 err: 211 bch2_trans_iter_exit(trans, &bp_iter); 212 return ret; 213 } 214 215 /* 216 * Find the next backpointer >= *bp_offset: 217 */ 218 int bch2_get_next_backpointer(struct btree_trans *trans, 219 struct bch_dev *ca, 220 struct bpos bucket, int gen, 221 struct bpos *bp_pos, 222 struct bch_backpointer *bp, 223 unsigned iter_flags) 224 { 225 struct bpos bp_end_pos = bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0); 226 struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL }; 227 struct bkey_s_c k; 228 int ret = 0; 229 230 if (bpos_ge(*bp_pos, bp_end_pos)) 231 goto done; 232 233 if (gen >= 0) { 234 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, 235 bucket, BTREE_ITER_cached|iter_flags); 236 ret = bkey_err(k); 237 if (ret) 238 goto out; 239 240 if (k.k->type != KEY_TYPE_alloc_v4 || 241 bkey_s_c_to_alloc_v4(k).v->gen != gen) 242 goto done; 243 } 244 245 *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(ca, bucket, 0)); 246 247 for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, 248 *bp_pos, iter_flags, k, ret) { 249 if (bpos_ge(k.k->p, bp_end_pos)) 250 break; 251 252 *bp_pos = k.k->p; 253 *bp = *bkey_s_c_to_backpointer(k).v; 254 goto out; 255 } 256 done: 257 *bp_pos = SPOS_MAX; 258 out: 259 bch2_trans_iter_exit(trans, &bp_iter); 260 bch2_trans_iter_exit(trans, &alloc_iter); 261 return ret; 262 } 263 264 static void backpointer_not_found(struct btree_trans *trans, 265 struct bpos bp_pos, 266 struct bch_backpointer bp, 267 struct bkey_s_c k) 268 { 269 struct bch_fs *c = trans->c; 270 struct printbuf buf = PRINTBUF; 271 272 /* 273 * If we're using the btree write buffer, the backpointer we were 274 * looking at may have already been deleted - failure to find what it 275 * pointed to is not an error: 276 */ 277 if (likely(!bch2_backpointers_no_use_write_buffer)) 278 return; 279 280 struct bpos bucket; 281 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 282 return; 283 284 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", 285 bp.level ? "btree node" : "extent"); 286 prt_printf(&buf, "bucket: "); 287 bch2_bpos_to_text(&buf, bucket); 288 prt_printf(&buf, "\n "); 289 290 prt_printf(&buf, "backpointer pos: "); 291 bch2_bpos_to_text(&buf, bp_pos); 292 prt_printf(&buf, "\n "); 293 294 bch2_backpointer_to_text(&buf, &bp); 295 prt_printf(&buf, "\n "); 296 bch2_bkey_val_to_text(&buf, c, k); 297 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) 298 bch_err_ratelimited(c, "%s", buf.buf); 299 else 300 bch2_trans_inconsistent(trans, "%s", buf.buf); 301 302 printbuf_exit(&buf); 303 } 304 305 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 306 struct btree_iter *iter, 307 struct bpos bp_pos, 308 struct bch_backpointer bp, 309 unsigned iter_flags) 310 { 311 if (likely(!bp.level)) { 312 struct bch_fs *c = trans->c; 313 314 struct bpos bucket; 315 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 316 return bkey_s_c_err(-EIO); 317 318 bch2_trans_node_iter_init(trans, iter, 319 bp.btree_id, 320 bp.pos, 321 0, 0, 322 iter_flags); 323 struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); 324 if (bkey_err(k)) { 325 bch2_trans_iter_exit(trans, iter); 326 return k; 327 } 328 329 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) 330 return k; 331 332 bch2_trans_iter_exit(trans, iter); 333 backpointer_not_found(trans, bp_pos, bp, k); 334 return bkey_s_c_null; 335 } else { 336 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); 337 338 if (IS_ERR_OR_NULL(b)) { 339 bch2_trans_iter_exit(trans, iter); 340 return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; 341 } 342 return bkey_i_to_s_c(&b->key); 343 } 344 } 345 346 struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 347 struct btree_iter *iter, 348 struct bpos bp_pos, 349 struct bch_backpointer bp) 350 { 351 struct bch_fs *c = trans->c; 352 353 BUG_ON(!bp.level); 354 355 struct bpos bucket; 356 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 357 return ERR_PTR(-EIO); 358 359 bch2_trans_node_iter_init(trans, iter, 360 bp.btree_id, 361 bp.pos, 362 0, 363 bp.level - 1, 364 0); 365 struct btree *b = bch2_btree_iter_peek_node(iter); 366 if (IS_ERR_OR_NULL(b)) 367 goto err; 368 369 BUG_ON(b->c.level != bp.level - 1); 370 371 if (extent_matches_bp(c, bp.btree_id, bp.level, 372 bkey_i_to_s_c(&b->key), 373 bucket, bp)) 374 return b; 375 376 if (btree_node_will_make_reachable(b)) { 377 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); 378 } else { 379 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); 380 b = NULL; 381 } 382 err: 383 bch2_trans_iter_exit(trans, iter); 384 return b; 385 } 386 387 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, 388 struct bkey_s_c k) 389 { 390 struct bch_fs *c = trans->c; 391 struct btree_iter alloc_iter = { NULL }; 392 struct bkey_s_c alloc_k; 393 struct printbuf buf = PRINTBUF; 394 int ret = 0; 395 396 struct bpos bucket; 397 if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { 398 if (fsck_err(trans, backpointer_to_missing_device, 399 "backpointer for missing device:\n%s", 400 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 401 ret = bch2_btree_delete_at(trans, bp_iter, 0); 402 goto out; 403 } 404 405 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0); 406 ret = bkey_err(alloc_k); 407 if (ret) 408 goto out; 409 410 if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, 411 trans, backpointer_to_missing_alloc, 412 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", 413 alloc_iter.pos.inode, alloc_iter.pos.offset, 414 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 415 ret = bch2_btree_delete_at(trans, bp_iter, 0); 416 goto out; 417 } 418 out: 419 fsck_err: 420 bch2_trans_iter_exit(trans, &alloc_iter); 421 printbuf_exit(&buf); 422 return ret; 423 } 424 425 /* verify that every backpointer has a corresponding alloc key */ 426 int bch2_check_btree_backpointers(struct bch_fs *c) 427 { 428 int ret = bch2_trans_run(c, 429 for_each_btree_key_commit(trans, iter, 430 BTREE_ID_backpointers, POS_MIN, 0, k, 431 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 432 bch2_check_btree_backpointer(trans, &iter, k))); 433 bch_err_fn(c, ret); 434 return ret; 435 } 436 437 struct extents_to_bp_state { 438 struct bpos bucket_start; 439 struct bpos bucket_end; 440 struct bkey_buf last_flushed; 441 }; 442 443 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, 444 struct bkey_s_c extent, unsigned dev) 445 { 446 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); 447 int ret = PTR_ERR_OR_ZERO(n); 448 if (ret) 449 return ret; 450 451 bch2_bkey_drop_device(bkey_i_to_s(n), dev); 452 return bch2_btree_insert_trans(trans, btree, n, 0); 453 } 454 455 static int check_extent_checksum(struct btree_trans *trans, 456 enum btree_id btree, struct bkey_s_c extent, 457 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) 458 { 459 struct bch_fs *c = trans->c; 460 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); 461 const union bch_extent_entry *entry; 462 struct extent_ptr_decoded p; 463 struct printbuf buf = PRINTBUF; 464 void *data_buf = NULL; 465 struct bio *bio = NULL; 466 size_t bytes; 467 int ret = 0; 468 469 if (bkey_is_btree_ptr(extent.k)) 470 return false; 471 472 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) 473 if (p.ptr.dev == dev) 474 goto found; 475 BUG(); 476 found: 477 if (!p.crc.csum_type) 478 return false; 479 480 bytes = p.crc.compressed_size << 9; 481 482 struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ); 483 if (!ca) 484 return false; 485 486 data_buf = kvmalloc(bytes, GFP_KERNEL); 487 if (!data_buf) { 488 ret = -ENOMEM; 489 goto err; 490 } 491 492 bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); 493 bio->bi_iter.bi_sector = p.ptr.offset; 494 bch2_bio_map(bio, data_buf, bytes); 495 ret = submit_bio_wait(bio); 496 if (ret) 497 goto err; 498 499 prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); 500 prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); 501 bch2_bkey_val_to_text(&buf, c, extent); 502 prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); 503 bch2_bkey_val_to_text(&buf, c, extent2); 504 505 struct nonce nonce = extent_nonce(extent.k->version, p.crc); 506 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); 507 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), 508 trans, dup_backpointer_to_bad_csum_extent, 509 "%s", buf.buf)) 510 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; 511 fsck_err: 512 err: 513 if (bio) 514 bio_put(bio); 515 kvfree(data_buf); 516 percpu_ref_put(&ca->io_ref); 517 printbuf_exit(&buf); 518 return ret; 519 } 520 521 static int check_bp_exists(struct btree_trans *trans, 522 struct extents_to_bp_state *s, 523 struct bpos bucket, 524 struct bch_backpointer bp, 525 struct bkey_s_c orig_k) 526 { 527 struct bch_fs *c = trans->c; 528 struct btree_iter bp_iter = {}; 529 struct btree_iter other_extent_iter = {}; 530 struct printbuf buf = PRINTBUF; 531 struct bkey_s_c bp_k; 532 int ret = 0; 533 534 struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); 535 if (!ca) { 536 prt_str(&buf, "extent for nonexistent device:bucket "); 537 bch2_bpos_to_text(&buf, bucket); 538 prt_str(&buf, "\n "); 539 bch2_bkey_val_to_text(&buf, c, orig_k); 540 bch_err(c, "%s", buf.buf); 541 ret = -BCH_ERR_fsck_repair_unimplemented; 542 goto err; 543 } 544 545 if (bpos_lt(bucket, s->bucket_start) || 546 bpos_gt(bucket, s->bucket_end)) 547 goto out; 548 549 bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 550 bucket_pos_to_bp(ca, bucket, bp.bucket_offset), 551 0); 552 ret = bkey_err(bp_k); 553 if (ret) 554 goto err; 555 556 if (bp_k.k->type != KEY_TYPE_backpointer || 557 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { 558 ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); 559 if (ret) 560 goto err; 561 562 goto check_existing_bp; 563 } 564 out: 565 err: 566 fsck_err: 567 bch2_trans_iter_exit(trans, &other_extent_iter); 568 bch2_trans_iter_exit(trans, &bp_iter); 569 bch2_dev_put(ca); 570 printbuf_exit(&buf); 571 return ret; 572 check_existing_bp: 573 /* Do we have a backpointer for a different extent? */ 574 if (bp_k.k->type != KEY_TYPE_backpointer) 575 goto missing; 576 577 struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; 578 579 struct bkey_s_c other_extent = 580 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); 581 ret = bkey_err(other_extent); 582 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 583 ret = 0; 584 if (ret) 585 goto err; 586 587 if (!other_extent.k) 588 goto missing; 589 590 if (bch2_extents_match(orig_k, other_extent)) { 591 printbuf_reset(&buf); 592 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); 593 bch2_bkey_val_to_text(&buf, c, orig_k); 594 prt_str(&buf, "\n "); 595 bch2_bkey_val_to_text(&buf, c, other_extent); 596 bch_err(c, "%s", buf.buf); 597 598 if (other_extent.k->size <= orig_k.k->size) { 599 ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); 600 if (ret) 601 goto err; 602 goto out; 603 } else { 604 ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); 605 if (ret) 606 goto err; 607 goto missing; 608 } 609 } 610 611 ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); 612 if (ret < 0) 613 goto err; 614 if (ret) { 615 ret = 0; 616 goto missing; 617 } 618 619 ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); 620 if (ret < 0) 621 goto err; 622 if (ret) { 623 ret = 0; 624 goto out; 625 } 626 627 printbuf_reset(&buf); 628 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); 629 bch2_bkey_val_to_text(&buf, c, orig_k); 630 prt_str(&buf, "\n "); 631 bch2_bkey_val_to_text(&buf, c, other_extent); 632 bch_err(c, "%s", buf.buf); 633 ret = -BCH_ERR_fsck_repair_unimplemented; 634 goto err; 635 missing: 636 printbuf_reset(&buf); 637 prt_printf(&buf, "missing backpointer for btree=%s l=%u ", 638 bch2_btree_id_str(bp.btree_id), bp.level); 639 bch2_bkey_val_to_text(&buf, c, orig_k); 640 prt_printf(&buf, "\n got: "); 641 bch2_bkey_val_to_text(&buf, c, bp_k); 642 643 struct bkey_i_backpointer n_bp_k; 644 bkey_backpointer_init(&n_bp_k.k_i); 645 n_bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); 646 n_bp_k.v = bp; 647 prt_printf(&buf, "\n want: "); 648 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); 649 650 if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) 651 ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, orig_k, true); 652 653 goto out; 654 } 655 656 static int check_extent_to_backpointers(struct btree_trans *trans, 657 struct extents_to_bp_state *s, 658 enum btree_id btree, unsigned level, 659 struct bkey_s_c k) 660 { 661 struct bch_fs *c = trans->c; 662 struct bkey_ptrs_c ptrs; 663 const union bch_extent_entry *entry; 664 struct extent_ptr_decoded p; 665 int ret; 666 667 ptrs = bch2_bkey_ptrs_c(k); 668 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 669 struct bpos bucket_pos = POS_MIN; 670 struct bch_backpointer bp; 671 672 if (p.ptr.cached) 673 continue; 674 675 rcu_read_lock(); 676 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 677 if (ca) 678 bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp); 679 rcu_read_unlock(); 680 681 if (!ca) 682 continue; 683 684 ret = check_bp_exists(trans, s, bucket_pos, bp, k); 685 if (ret) 686 return ret; 687 } 688 689 return 0; 690 } 691 692 static int check_btree_root_to_backpointers(struct btree_trans *trans, 693 struct extents_to_bp_state *s, 694 enum btree_id btree_id, 695 int *level) 696 { 697 struct bch_fs *c = trans->c; 698 struct btree_iter iter; 699 struct btree *b; 700 struct bkey_s_c k; 701 int ret; 702 retry: 703 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 704 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); 705 b = bch2_btree_iter_peek_node(&iter); 706 ret = PTR_ERR_OR_ZERO(b); 707 if (ret) 708 goto err; 709 710 if (b != btree_node_root(c, b)) { 711 bch2_trans_iter_exit(trans, &iter); 712 goto retry; 713 } 714 715 *level = b->c.level; 716 717 k = bkey_i_to_s_c(&b->key); 718 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); 719 err: 720 bch2_trans_iter_exit(trans, &iter); 721 return ret; 722 } 723 724 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) 725 { 726 return (struct bbpos) { 727 .btree = bp.btree_id, 728 .pos = bp.pos, 729 }; 730 } 731 732 static u64 mem_may_pin_bytes(struct bch_fs *c) 733 { 734 struct sysinfo i; 735 si_meminfo(&i); 736 737 u64 mem_bytes = i.totalram * i.mem_unit; 738 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100); 739 } 740 741 static size_t btree_nodes_fit_in_ram(struct bch_fs *c) 742 { 743 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size); 744 } 745 746 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, 747 u64 btree_leaf_mask, 748 u64 btree_interior_mask, 749 struct bbpos start, struct bbpos *end) 750 { 751 struct bch_fs *c = trans->c; 752 s64 mem_may_pin = mem_may_pin_bytes(c); 753 int ret = 0; 754 755 btree_interior_mask |= btree_leaf_mask; 756 757 c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask; 758 c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask; 759 c->btree_cache.pinned_nodes_start = start; 760 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX; 761 762 for (enum btree_id btree = start.btree; 763 btree < BTREE_ID_NR && !ret; 764 btree++) { 765 unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; 766 struct btree_iter iter; 767 struct btree *b; 768 769 if (!(BIT_ULL(btree) & btree_leaf_mask) && 770 !(BIT_ULL(btree) & btree_interior_mask)) 771 continue; 772 773 bch2_trans_begin(trans); 774 775 __for_each_btree_node(trans, iter, btree, 776 btree == start.btree ? start.pos : POS_MIN, 777 0, depth, BTREE_ITER_prefetch, b, ret) { 778 mem_may_pin -= btree_buf_bytes(b); 779 if (mem_may_pin <= 0) { 780 c->btree_cache.pinned_nodes_end = *end = 781 BBPOS(btree, b->key.k.p); 782 bch2_trans_iter_exit(trans, &iter); 783 return 0; 784 } 785 } 786 bch2_trans_iter_exit(trans, &iter); 787 } 788 789 return ret; 790 } 791 792 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, 793 struct extents_to_bp_state *s) 794 { 795 struct bch_fs *c = trans->c; 796 int ret = 0; 797 798 for (enum btree_id btree_id = 0; 799 btree_id < btree_id_nr_alive(c); 800 btree_id++) { 801 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; 802 803 ret = commit_do(trans, NULL, NULL, 804 BCH_TRANS_COMMIT_no_enospc, 805 check_btree_root_to_backpointers(trans, s, btree_id, &level)); 806 if (ret) 807 return ret; 808 809 while (level >= depth) { 810 struct btree_iter iter; 811 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level, 812 BTREE_ITER_prefetch); 813 814 ret = for_each_btree_key_continue(trans, iter, 0, k, ({ 815 check_extent_to_backpointers(trans, s, btree_id, level, k) ?: 816 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 817 })); 818 if (ret) 819 return ret; 820 821 --level; 822 } 823 } 824 825 return 0; 826 } 827 828 int bch2_check_extents_to_backpointers(struct bch_fs *c) 829 { 830 struct btree_trans *trans = bch2_trans_get(c); 831 struct extents_to_bp_state s = { .bucket_start = POS_MIN }; 832 int ret; 833 834 bch2_bkey_buf_init(&s.last_flushed); 835 bkey_init(&s.last_flushed.k->k); 836 837 while (1) { 838 struct bbpos end; 839 ret = bch2_get_btree_in_memory_pos(trans, 840 BIT_ULL(BTREE_ID_backpointers), 841 BIT_ULL(BTREE_ID_backpointers), 842 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end); 843 if (ret) 844 break; 845 846 s.bucket_end = end.pos; 847 848 if ( bpos_eq(s.bucket_start, POS_MIN) && 849 !bpos_eq(s.bucket_end, SPOS_MAX)) 850 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", 851 __func__, btree_nodes_fit_in_ram(c)); 852 853 if (!bpos_eq(s.bucket_start, POS_MIN) || 854 !bpos_eq(s.bucket_end, SPOS_MAX)) { 855 struct printbuf buf = PRINTBUF; 856 857 prt_str(&buf, "check_extents_to_backpointers(): "); 858 bch2_bpos_to_text(&buf, s.bucket_start); 859 prt_str(&buf, "-"); 860 bch2_bpos_to_text(&buf, s.bucket_end); 861 862 bch_verbose(c, "%s", buf.buf); 863 printbuf_exit(&buf); 864 } 865 866 ret = bch2_check_extents_to_backpointers_pass(trans, &s); 867 if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) 868 break; 869 870 s.bucket_start = bpos_successor(s.bucket_end); 871 } 872 bch2_trans_put(trans); 873 bch2_bkey_buf_exit(&s.last_flushed, c); 874 875 c->btree_cache.pinned_nodes_leaf_mask = 0; 876 c->btree_cache.pinned_nodes_interior_mask = 0; 877 878 bch_err_fn(c, ret); 879 return ret; 880 } 881 882 static int check_one_backpointer(struct btree_trans *trans, 883 struct bbpos start, 884 struct bbpos end, 885 struct bkey_s_c_backpointer bp, 886 struct bkey_buf *last_flushed) 887 { 888 struct bch_fs *c = trans->c; 889 struct btree_iter iter; 890 struct bbpos pos = bp_to_bbpos(*bp.v); 891 struct bkey_s_c k; 892 struct printbuf buf = PRINTBUF; 893 int ret; 894 895 if (bbpos_cmp(pos, start) < 0 || 896 bbpos_cmp(pos, end) > 0) 897 return 0; 898 899 k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0); 900 ret = bkey_err(k); 901 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 902 return 0; 903 if (ret) 904 return ret; 905 906 if (!k.k) { 907 ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); 908 if (ret) 909 goto out; 910 911 if (fsck_err(trans, backpointer_to_missing_ptr, 912 "backpointer for missing %s\n %s", 913 bp.v->level ? "btree node" : "extent", 914 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 915 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); 916 goto out; 917 } 918 } 919 out: 920 fsck_err: 921 bch2_trans_iter_exit(trans, &iter); 922 printbuf_exit(&buf); 923 return ret; 924 } 925 926 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, 927 struct bbpos start, 928 struct bbpos end) 929 { 930 struct bkey_buf last_flushed; 931 932 bch2_bkey_buf_init(&last_flushed); 933 bkey_init(&last_flushed.k->k); 934 935 int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 936 POS_MIN, BTREE_ITER_prefetch, k, 937 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 938 check_one_backpointer(trans, start, end, 939 bkey_s_c_to_backpointer(k), 940 &last_flushed)); 941 942 bch2_bkey_buf_exit(&last_flushed, trans->c); 943 return ret; 944 } 945 946 int bch2_check_backpointers_to_extents(struct bch_fs *c) 947 { 948 struct btree_trans *trans = bch2_trans_get(c); 949 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; 950 int ret; 951 952 while (1) { 953 ret = bch2_get_btree_in_memory_pos(trans, 954 BIT_ULL(BTREE_ID_extents)| 955 BIT_ULL(BTREE_ID_reflink), 956 ~0, 957 start, &end); 958 if (ret) 959 break; 960 961 if (!bbpos_cmp(start, BBPOS_MIN) && 962 bbpos_cmp(end, BBPOS_MAX)) 963 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass", 964 __func__, btree_nodes_fit_in_ram(c)); 965 966 if (bbpos_cmp(start, BBPOS_MIN) || 967 bbpos_cmp(end, BBPOS_MAX)) { 968 struct printbuf buf = PRINTBUF; 969 970 prt_str(&buf, "check_backpointers_to_extents(): "); 971 bch2_bbpos_to_text(&buf, start); 972 prt_str(&buf, "-"); 973 bch2_bbpos_to_text(&buf, end); 974 975 bch_verbose(c, "%s", buf.buf); 976 printbuf_exit(&buf); 977 } 978 979 ret = bch2_check_backpointers_to_extents_pass(trans, start, end); 980 if (ret || !bbpos_cmp(end, BBPOS_MAX)) 981 break; 982 983 start = bbpos_successor(end); 984 } 985 bch2_trans_put(trans); 986 987 c->btree_cache.pinned_nodes_leaf_mask = 0; 988 c->btree_cache.pinned_nodes_interior_mask = 0; 989 990 bch_err_fn(c, ret); 991 return ret; 992 } 993