1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "bbpos.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "bkey_buf.h" 7 #include "btree_cache.h" 8 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_write_buffer.h" 11 #include "checksum.h" 12 #include "disk_accounting.h" 13 #include "error.h" 14 15 #include <linux/mm.h> 16 17 static bool extent_matches_bp(struct bch_fs *c, 18 enum btree_id btree_id, unsigned level, 19 struct bkey_s_c k, 20 struct bpos bucket, 21 struct bch_backpointer bp) 22 { 23 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 24 const union bch_extent_entry *entry; 25 struct extent_ptr_decoded p; 26 27 rcu_read_lock(); 28 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 29 struct bpos bucket2; 30 struct bch_backpointer bp2; 31 32 if (p.ptr.cached) 33 continue; 34 35 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 36 if (!ca) 37 continue; 38 39 bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); 40 if (bpos_eq(bucket, bucket2) && 41 !memcmp(&bp, &bp2, sizeof(bp))) { 42 rcu_read_unlock(); 43 return true; 44 } 45 } 46 rcu_read_unlock(); 47 48 return false; 49 } 50 51 int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, 52 enum bch_validate_flags flags) 53 { 54 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 55 56 rcu_read_lock(); 57 struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); 58 if (!ca) { 59 /* these will be caught by fsck */ 60 rcu_read_unlock(); 61 return 0; 62 } 63 64 struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); 65 struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); 66 rcu_read_unlock(); 67 int ret = 0; 68 69 bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || 70 !bpos_eq(bp.k->p, bp_pos), 71 c, backpointer_bucket_offset_wrong, 72 "backpointer bucket_offset wrong"); 73 fsck_err: 74 return ret; 75 } 76 77 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) 78 { 79 prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", 80 bch2_btree_id_str(bp->btree_id), 81 bp->level, 82 (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), 83 (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), 84 bp->bucket_len); 85 bch2_bpos_to_text(out, bp->pos); 86 } 87 88 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 89 { 90 rcu_read_lock(); 91 struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.k->p.inode); 92 if (ca) { 93 struct bpos bucket = bp_pos_to_bucket(ca, k.k->p); 94 rcu_read_unlock(); 95 prt_str(out, "bucket="); 96 bch2_bpos_to_text(out, bucket); 97 prt_str(out, " "); 98 } else { 99 rcu_read_unlock(); 100 } 101 102 bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); 103 } 104 105 void bch2_backpointer_swab(struct bkey_s k) 106 { 107 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); 108 109 bp.v->bucket_offset = swab40(bp.v->bucket_offset); 110 bp.v->bucket_len = swab32(bp.v->bucket_len); 111 bch2_bpos_swab(&bp.v->pos); 112 } 113 114 static noinline int backpointer_mod_err(struct btree_trans *trans, 115 struct bch_backpointer bp, 116 struct bkey_s_c bp_k, 117 struct bkey_s_c orig_k, 118 bool insert) 119 { 120 struct bch_fs *c = trans->c; 121 struct printbuf buf = PRINTBUF; 122 123 if (insert) { 124 prt_printf(&buf, "existing backpointer found when inserting "); 125 bch2_backpointer_to_text(&buf, &bp); 126 prt_newline(&buf); 127 printbuf_indent_add(&buf, 2); 128 129 prt_printf(&buf, "found "); 130 bch2_bkey_val_to_text(&buf, c, bp_k); 131 prt_newline(&buf); 132 133 prt_printf(&buf, "for "); 134 bch2_bkey_val_to_text(&buf, c, orig_k); 135 136 bch_err(c, "%s", buf.buf); 137 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 138 prt_printf(&buf, "backpointer not found when deleting\n"); 139 printbuf_indent_add(&buf, 2); 140 141 prt_printf(&buf, "searching for "); 142 bch2_backpointer_to_text(&buf, &bp); 143 prt_newline(&buf); 144 145 prt_printf(&buf, "got "); 146 bch2_bkey_val_to_text(&buf, c, bp_k); 147 prt_newline(&buf); 148 149 prt_printf(&buf, "for "); 150 bch2_bkey_val_to_text(&buf, c, orig_k); 151 152 bch_err(c, "%s", buf.buf); 153 } 154 155 printbuf_exit(&buf); 156 157 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 158 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; 159 } else { 160 return 0; 161 } 162 } 163 164 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, 165 struct bch_dev *ca, 166 struct bpos bucket, 167 struct bch_backpointer bp, 168 struct bkey_s_c orig_k, 169 bool insert) 170 { 171 struct btree_iter bp_iter; 172 struct bkey_s_c k; 173 struct bkey_i_backpointer *bp_k; 174 int ret; 175 176 bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); 177 ret = PTR_ERR_OR_ZERO(bp_k); 178 if (ret) 179 return ret; 180 181 bkey_backpointer_init(&bp_k->k_i); 182 bp_k->k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); 183 bp_k->v = bp; 184 185 if (!insert) { 186 bp_k->k.type = KEY_TYPE_deleted; 187 set_bkey_val_u64s(&bp_k->k, 0); 188 } 189 190 k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 191 bp_k->k.p, 192 BTREE_ITER_intent| 193 BTREE_ITER_slots| 194 BTREE_ITER_with_updates); 195 ret = bkey_err(k); 196 if (ret) 197 goto err; 198 199 if (insert 200 ? k.k->type 201 : (k.k->type != KEY_TYPE_backpointer || 202 memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { 203 ret = backpointer_mod_err(trans, bp, k, orig_k, insert); 204 if (ret) 205 goto err; 206 } 207 208 ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); 209 err: 210 bch2_trans_iter_exit(trans, &bp_iter); 211 return ret; 212 } 213 214 /* 215 * Find the next backpointer >= *bp_offset: 216 */ 217 int bch2_get_next_backpointer(struct btree_trans *trans, 218 struct bch_dev *ca, 219 struct bpos bucket, int gen, 220 struct bpos *bp_pos, 221 struct bch_backpointer *bp, 222 unsigned iter_flags) 223 { 224 struct bpos bp_end_pos = bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0); 225 struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL }; 226 struct bkey_s_c k; 227 int ret = 0; 228 229 if (bpos_ge(*bp_pos, bp_end_pos)) 230 goto done; 231 232 if (gen >= 0) { 233 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, 234 bucket, BTREE_ITER_cached|iter_flags); 235 ret = bkey_err(k); 236 if (ret) 237 goto out; 238 239 if (k.k->type != KEY_TYPE_alloc_v4 || 240 bkey_s_c_to_alloc_v4(k).v->gen != gen) 241 goto done; 242 } 243 244 *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(ca, bucket, 0)); 245 246 for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, 247 *bp_pos, iter_flags, k, ret) { 248 if (bpos_ge(k.k->p, bp_end_pos)) 249 break; 250 251 *bp_pos = k.k->p; 252 *bp = *bkey_s_c_to_backpointer(k).v; 253 goto out; 254 } 255 done: 256 *bp_pos = SPOS_MAX; 257 out: 258 bch2_trans_iter_exit(trans, &bp_iter); 259 bch2_trans_iter_exit(trans, &alloc_iter); 260 return ret; 261 } 262 263 static void backpointer_not_found(struct btree_trans *trans, 264 struct bpos bp_pos, 265 struct bch_backpointer bp, 266 struct bkey_s_c k) 267 { 268 struct bch_fs *c = trans->c; 269 struct printbuf buf = PRINTBUF; 270 271 /* 272 * If we're using the btree write buffer, the backpointer we were 273 * looking at may have already been deleted - failure to find what it 274 * pointed to is not an error: 275 */ 276 if (likely(!bch2_backpointers_no_use_write_buffer)) 277 return; 278 279 struct bpos bucket; 280 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 281 return; 282 283 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", 284 bp.level ? "btree node" : "extent"); 285 prt_printf(&buf, "bucket: "); 286 bch2_bpos_to_text(&buf, bucket); 287 prt_printf(&buf, "\n "); 288 289 prt_printf(&buf, "backpointer pos: "); 290 bch2_bpos_to_text(&buf, bp_pos); 291 prt_printf(&buf, "\n "); 292 293 bch2_backpointer_to_text(&buf, &bp); 294 prt_printf(&buf, "\n "); 295 bch2_bkey_val_to_text(&buf, c, k); 296 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) 297 bch_err_ratelimited(c, "%s", buf.buf); 298 else 299 bch2_trans_inconsistent(trans, "%s", buf.buf); 300 301 printbuf_exit(&buf); 302 } 303 304 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 305 struct btree_iter *iter, 306 struct bpos bp_pos, 307 struct bch_backpointer bp, 308 unsigned iter_flags) 309 { 310 if (likely(!bp.level)) { 311 struct bch_fs *c = trans->c; 312 313 struct bpos bucket; 314 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 315 return bkey_s_c_err(-EIO); 316 317 bch2_trans_node_iter_init(trans, iter, 318 bp.btree_id, 319 bp.pos, 320 0, 0, 321 iter_flags); 322 struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); 323 if (bkey_err(k)) { 324 bch2_trans_iter_exit(trans, iter); 325 return k; 326 } 327 328 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) 329 return k; 330 331 bch2_trans_iter_exit(trans, iter); 332 backpointer_not_found(trans, bp_pos, bp, k); 333 return bkey_s_c_null; 334 } else { 335 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); 336 337 if (IS_ERR_OR_NULL(b)) { 338 bch2_trans_iter_exit(trans, iter); 339 return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; 340 } 341 return bkey_i_to_s_c(&b->key); 342 } 343 } 344 345 struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 346 struct btree_iter *iter, 347 struct bpos bp_pos, 348 struct bch_backpointer bp) 349 { 350 struct bch_fs *c = trans->c; 351 352 BUG_ON(!bp.level); 353 354 struct bpos bucket; 355 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 356 return ERR_PTR(-EIO); 357 358 bch2_trans_node_iter_init(trans, iter, 359 bp.btree_id, 360 bp.pos, 361 0, 362 bp.level - 1, 363 0); 364 struct btree *b = bch2_btree_iter_peek_node(iter); 365 if (IS_ERR_OR_NULL(b)) 366 goto err; 367 368 BUG_ON(b->c.level != bp.level - 1); 369 370 if (extent_matches_bp(c, bp.btree_id, bp.level, 371 bkey_i_to_s_c(&b->key), 372 bucket, bp)) 373 return b; 374 375 if (btree_node_will_make_reachable(b)) { 376 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); 377 } else { 378 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); 379 b = NULL; 380 } 381 err: 382 bch2_trans_iter_exit(trans, iter); 383 return b; 384 } 385 386 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, 387 struct bkey_s_c k) 388 { 389 struct bch_fs *c = trans->c; 390 struct btree_iter alloc_iter = { NULL }; 391 struct bkey_s_c alloc_k; 392 struct printbuf buf = PRINTBUF; 393 int ret = 0; 394 395 struct bpos bucket; 396 if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { 397 if (fsck_err(trans, backpointer_to_missing_device, 398 "backpointer for missing device:\n%s", 399 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 400 ret = bch2_btree_delete_at(trans, bp_iter, 0); 401 goto out; 402 } 403 404 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0); 405 ret = bkey_err(alloc_k); 406 if (ret) 407 goto out; 408 409 if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, 410 trans, backpointer_to_missing_alloc, 411 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", 412 alloc_iter.pos.inode, alloc_iter.pos.offset, 413 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 414 ret = bch2_btree_delete_at(trans, bp_iter, 0); 415 goto out; 416 } 417 out: 418 fsck_err: 419 bch2_trans_iter_exit(trans, &alloc_iter); 420 printbuf_exit(&buf); 421 return ret; 422 } 423 424 /* verify that every backpointer has a corresponding alloc key */ 425 int bch2_check_btree_backpointers(struct bch_fs *c) 426 { 427 int ret = bch2_trans_run(c, 428 for_each_btree_key_commit(trans, iter, 429 BTREE_ID_backpointers, POS_MIN, 0, k, 430 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 431 bch2_check_btree_backpointer(trans, &iter, k))); 432 bch_err_fn(c, ret); 433 return ret; 434 } 435 436 struct extents_to_bp_state { 437 struct bpos bucket_start; 438 struct bpos bucket_end; 439 struct bkey_buf last_flushed; 440 }; 441 442 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, 443 struct bkey_s_c extent, unsigned dev) 444 { 445 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); 446 int ret = PTR_ERR_OR_ZERO(n); 447 if (ret) 448 return ret; 449 450 bch2_bkey_drop_device(bkey_i_to_s(n), dev); 451 return bch2_btree_insert_trans(trans, btree, n, 0); 452 } 453 454 static int check_extent_checksum(struct btree_trans *trans, 455 enum btree_id btree, struct bkey_s_c extent, 456 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) 457 { 458 struct bch_fs *c = trans->c; 459 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); 460 const union bch_extent_entry *entry; 461 struct extent_ptr_decoded p; 462 struct printbuf buf = PRINTBUF; 463 void *data_buf = NULL; 464 struct bio *bio = NULL; 465 size_t bytes; 466 int ret = 0; 467 468 if (bkey_is_btree_ptr(extent.k)) 469 return false; 470 471 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) 472 if (p.ptr.dev == dev) 473 goto found; 474 BUG(); 475 found: 476 if (!p.crc.csum_type) 477 return false; 478 479 bytes = p.crc.compressed_size << 9; 480 481 struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ); 482 if (!ca) 483 return false; 484 485 data_buf = kvmalloc(bytes, GFP_KERNEL); 486 if (!data_buf) { 487 ret = -ENOMEM; 488 goto err; 489 } 490 491 bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); 492 bio->bi_iter.bi_sector = p.ptr.offset; 493 bch2_bio_map(bio, data_buf, bytes); 494 ret = submit_bio_wait(bio); 495 if (ret) 496 goto err; 497 498 prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); 499 prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); 500 bch2_bkey_val_to_text(&buf, c, extent); 501 prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); 502 bch2_bkey_val_to_text(&buf, c, extent2); 503 504 struct nonce nonce = extent_nonce(extent.k->bversion, p.crc); 505 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); 506 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), 507 trans, dup_backpointer_to_bad_csum_extent, 508 "%s", buf.buf)) 509 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; 510 fsck_err: 511 err: 512 if (bio) 513 bio_put(bio); 514 kvfree(data_buf); 515 percpu_ref_put(&ca->io_ref); 516 printbuf_exit(&buf); 517 return ret; 518 } 519 520 static int check_bp_exists(struct btree_trans *trans, 521 struct extents_to_bp_state *s, 522 struct bpos bucket, 523 struct bch_backpointer bp, 524 struct bkey_s_c orig_k) 525 { 526 struct bch_fs *c = trans->c; 527 struct btree_iter bp_iter = {}; 528 struct btree_iter other_extent_iter = {}; 529 struct printbuf buf = PRINTBUF; 530 struct bkey_s_c bp_k; 531 int ret = 0; 532 533 struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); 534 if (!ca) { 535 prt_str(&buf, "extent for nonexistent device:bucket "); 536 bch2_bpos_to_text(&buf, bucket); 537 prt_str(&buf, "\n "); 538 bch2_bkey_val_to_text(&buf, c, orig_k); 539 bch_err(c, "%s", buf.buf); 540 ret = -BCH_ERR_fsck_repair_unimplemented; 541 goto err; 542 } 543 544 if (bpos_lt(bucket, s->bucket_start) || 545 bpos_gt(bucket, s->bucket_end)) 546 goto out; 547 548 bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 549 bucket_pos_to_bp(ca, bucket, bp.bucket_offset), 550 0); 551 ret = bkey_err(bp_k); 552 if (ret) 553 goto err; 554 555 if (bp_k.k->type != KEY_TYPE_backpointer || 556 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { 557 ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); 558 if (ret) 559 goto err; 560 561 goto check_existing_bp; 562 } 563 out: 564 err: 565 fsck_err: 566 bch2_trans_iter_exit(trans, &other_extent_iter); 567 bch2_trans_iter_exit(trans, &bp_iter); 568 bch2_dev_put(ca); 569 printbuf_exit(&buf); 570 return ret; 571 check_existing_bp: 572 /* Do we have a backpointer for a different extent? */ 573 if (bp_k.k->type != KEY_TYPE_backpointer) 574 goto missing; 575 576 struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; 577 578 struct bkey_s_c other_extent = 579 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); 580 ret = bkey_err(other_extent); 581 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 582 ret = 0; 583 if (ret) 584 goto err; 585 586 if (!other_extent.k) 587 goto missing; 588 589 if (bch2_extents_match(orig_k, other_extent)) { 590 printbuf_reset(&buf); 591 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); 592 bch2_bkey_val_to_text(&buf, c, orig_k); 593 prt_str(&buf, "\n "); 594 bch2_bkey_val_to_text(&buf, c, other_extent); 595 bch_err(c, "%s", buf.buf); 596 597 if (other_extent.k->size <= orig_k.k->size) { 598 ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); 599 if (ret) 600 goto err; 601 goto out; 602 } else { 603 ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); 604 if (ret) 605 goto err; 606 goto missing; 607 } 608 } 609 610 ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); 611 if (ret < 0) 612 goto err; 613 if (ret) { 614 ret = 0; 615 goto missing; 616 } 617 618 ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); 619 if (ret < 0) 620 goto err; 621 if (ret) { 622 ret = 0; 623 goto out; 624 } 625 626 printbuf_reset(&buf); 627 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); 628 bch2_bkey_val_to_text(&buf, c, orig_k); 629 prt_str(&buf, "\n "); 630 bch2_bkey_val_to_text(&buf, c, other_extent); 631 bch_err(c, "%s", buf.buf); 632 ret = -BCH_ERR_fsck_repair_unimplemented; 633 goto err; 634 missing: 635 printbuf_reset(&buf); 636 prt_printf(&buf, "missing backpointer for btree=%s l=%u ", 637 bch2_btree_id_str(bp.btree_id), bp.level); 638 bch2_bkey_val_to_text(&buf, c, orig_k); 639 prt_printf(&buf, "\n got: "); 640 bch2_bkey_val_to_text(&buf, c, bp_k); 641 642 struct bkey_i_backpointer n_bp_k; 643 bkey_backpointer_init(&n_bp_k.k_i); 644 n_bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); 645 n_bp_k.v = bp; 646 prt_printf(&buf, "\n want: "); 647 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); 648 649 if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) 650 ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, orig_k, true); 651 652 goto out; 653 } 654 655 static int check_extent_to_backpointers(struct btree_trans *trans, 656 struct extents_to_bp_state *s, 657 enum btree_id btree, unsigned level, 658 struct bkey_s_c k) 659 { 660 struct bch_fs *c = trans->c; 661 struct bkey_ptrs_c ptrs; 662 const union bch_extent_entry *entry; 663 struct extent_ptr_decoded p; 664 int ret; 665 666 ptrs = bch2_bkey_ptrs_c(k); 667 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 668 struct bpos bucket_pos = POS_MIN; 669 struct bch_backpointer bp; 670 671 if (p.ptr.cached) 672 continue; 673 674 rcu_read_lock(); 675 struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); 676 if (ca) 677 bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp); 678 rcu_read_unlock(); 679 680 if (!ca) 681 continue; 682 683 ret = check_bp_exists(trans, s, bucket_pos, bp, k); 684 if (ret) 685 return ret; 686 } 687 688 return 0; 689 } 690 691 static int check_btree_root_to_backpointers(struct btree_trans *trans, 692 struct extents_to_bp_state *s, 693 enum btree_id btree_id, 694 int *level) 695 { 696 struct bch_fs *c = trans->c; 697 struct btree_iter iter; 698 struct btree *b; 699 struct bkey_s_c k; 700 int ret; 701 retry: 702 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 703 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); 704 b = bch2_btree_iter_peek_node(&iter); 705 ret = PTR_ERR_OR_ZERO(b); 706 if (ret) 707 goto err; 708 709 if (b != btree_node_root(c, b)) { 710 bch2_trans_iter_exit(trans, &iter); 711 goto retry; 712 } 713 714 *level = b->c.level; 715 716 k = bkey_i_to_s_c(&b->key); 717 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); 718 err: 719 bch2_trans_iter_exit(trans, &iter); 720 return ret; 721 } 722 723 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) 724 { 725 return (struct bbpos) { 726 .btree = bp.btree_id, 727 .pos = bp.pos, 728 }; 729 } 730 731 static u64 mem_may_pin_bytes(struct bch_fs *c) 732 { 733 struct sysinfo i; 734 si_meminfo(&i); 735 736 u64 mem_bytes = i.totalram * i.mem_unit; 737 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100); 738 } 739 740 static size_t btree_nodes_fit_in_ram(struct bch_fs *c) 741 { 742 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size); 743 } 744 745 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, 746 u64 btree_leaf_mask, 747 u64 btree_interior_mask, 748 struct bbpos start, struct bbpos *end) 749 { 750 struct bch_fs *c = trans->c; 751 s64 mem_may_pin = mem_may_pin_bytes(c); 752 int ret = 0; 753 754 bch2_btree_cache_unpin(c); 755 756 btree_interior_mask |= btree_leaf_mask; 757 758 c->btree_cache.pinned_nodes_mask[0] = btree_leaf_mask; 759 c->btree_cache.pinned_nodes_mask[1] = btree_interior_mask; 760 c->btree_cache.pinned_nodes_start = start; 761 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX; 762 763 for (enum btree_id btree = start.btree; 764 btree < BTREE_ID_NR && !ret; 765 btree++) { 766 unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; 767 768 if (!(BIT_ULL(btree) & btree_leaf_mask) && 769 !(BIT_ULL(btree) & btree_interior_mask)) 770 continue; 771 772 ret = __for_each_btree_node(trans, iter, btree, 773 btree == start.btree ? start.pos : POS_MIN, 774 0, depth, BTREE_ITER_prefetch, b, ({ 775 mem_may_pin -= btree_buf_bytes(b); 776 if (mem_may_pin <= 0) { 777 c->btree_cache.pinned_nodes_end = *end = 778 BBPOS(btree, b->key.k.p); 779 break; 780 } 781 bch2_node_pin(c, b); 782 0; 783 })); 784 } 785 786 return ret; 787 } 788 789 struct progress_indicator_state { 790 unsigned long next_print; 791 u64 nodes_seen; 792 u64 nodes_total; 793 struct btree *last_node; 794 }; 795 796 static inline void progress_init(struct progress_indicator_state *s, 797 struct bch_fs *c, 798 u64 btree_id_mask) 799 { 800 memset(s, 0, sizeof(*s)); 801 802 s->next_print = jiffies + HZ * 10; 803 804 for (unsigned i = 0; i < BTREE_ID_NR; i++) { 805 if (!(btree_id_mask & BIT_ULL(i))) 806 continue; 807 808 struct disk_accounting_pos acc = { 809 .type = BCH_DISK_ACCOUNTING_btree, 810 .btree.id = i, 811 }; 812 813 u64 v; 814 bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); 815 s->nodes_total += div64_ul(v, btree_sectors(c)); 816 } 817 } 818 819 static inline bool progress_update_p(struct progress_indicator_state *s) 820 { 821 bool ret = time_after_eq(jiffies, s->next_print); 822 823 if (ret) 824 s->next_print = jiffies + HZ * 10; 825 return ret; 826 } 827 828 static void progress_update_iter(struct btree_trans *trans, 829 struct progress_indicator_state *s, 830 struct btree_iter *iter, 831 const char *msg) 832 { 833 struct bch_fs *c = trans->c; 834 struct btree *b = path_l(btree_iter_path(trans, iter))->b; 835 836 s->nodes_seen += b != s->last_node; 837 s->last_node = b; 838 839 if (progress_update_p(s)) { 840 struct printbuf buf = PRINTBUF; 841 unsigned percent = s->nodes_total 842 ? div64_u64(s->nodes_seen * 100, s->nodes_total) 843 : 0; 844 845 prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ", 846 msg, percent, s->nodes_seen, s->nodes_total); 847 bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos)); 848 849 bch_info(c, "%s", buf.buf); 850 printbuf_exit(&buf); 851 } 852 } 853 854 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, 855 struct extents_to_bp_state *s) 856 { 857 struct bch_fs *c = trans->c; 858 struct progress_indicator_state progress; 859 int ret = 0; 860 861 progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink)); 862 863 for (enum btree_id btree_id = 0; 864 btree_id < btree_id_nr_alive(c); 865 btree_id++) { 866 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; 867 868 ret = commit_do(trans, NULL, NULL, 869 BCH_TRANS_COMMIT_no_enospc, 870 check_btree_root_to_backpointers(trans, s, btree_id, &level)); 871 if (ret) 872 return ret; 873 874 while (level >= depth) { 875 struct btree_iter iter; 876 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level, 877 BTREE_ITER_prefetch); 878 879 ret = for_each_btree_key_continue(trans, iter, 0, k, ({ 880 progress_update_iter(trans, &progress, &iter, "extents_to_backpointers"); 881 check_extent_to_backpointers(trans, s, btree_id, level, k) ?: 882 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 883 })); 884 if (ret) 885 return ret; 886 887 --level; 888 } 889 } 890 891 return 0; 892 } 893 894 int bch2_check_extents_to_backpointers(struct bch_fs *c) 895 { 896 struct btree_trans *trans = bch2_trans_get(c); 897 struct extents_to_bp_state s = { .bucket_start = POS_MIN }; 898 int ret; 899 900 bch2_bkey_buf_init(&s.last_flushed); 901 bkey_init(&s.last_flushed.k->k); 902 903 while (1) { 904 struct bbpos end; 905 ret = bch2_get_btree_in_memory_pos(trans, 906 BIT_ULL(BTREE_ID_backpointers), 907 BIT_ULL(BTREE_ID_backpointers), 908 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end); 909 if (ret) 910 break; 911 912 s.bucket_end = end.pos; 913 914 if ( bpos_eq(s.bucket_start, POS_MIN) && 915 !bpos_eq(s.bucket_end, SPOS_MAX)) 916 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", 917 __func__, btree_nodes_fit_in_ram(c)); 918 919 if (!bpos_eq(s.bucket_start, POS_MIN) || 920 !bpos_eq(s.bucket_end, SPOS_MAX)) { 921 struct printbuf buf = PRINTBUF; 922 923 prt_str(&buf, "check_extents_to_backpointers(): "); 924 bch2_bpos_to_text(&buf, s.bucket_start); 925 prt_str(&buf, "-"); 926 bch2_bpos_to_text(&buf, s.bucket_end); 927 928 bch_verbose(c, "%s", buf.buf); 929 printbuf_exit(&buf); 930 } 931 932 ret = bch2_check_extents_to_backpointers_pass(trans, &s); 933 if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) 934 break; 935 936 s.bucket_start = bpos_successor(s.bucket_end); 937 } 938 bch2_trans_put(trans); 939 bch2_bkey_buf_exit(&s.last_flushed, c); 940 941 bch2_btree_cache_unpin(c); 942 943 bch_err_fn(c, ret); 944 return ret; 945 } 946 947 static int check_one_backpointer(struct btree_trans *trans, 948 struct bbpos start, 949 struct bbpos end, 950 struct bkey_s_c_backpointer bp, 951 struct bkey_buf *last_flushed) 952 { 953 struct bch_fs *c = trans->c; 954 struct btree_iter iter; 955 struct bbpos pos = bp_to_bbpos(*bp.v); 956 struct bkey_s_c k; 957 struct printbuf buf = PRINTBUF; 958 int ret; 959 960 if (bbpos_cmp(pos, start) < 0 || 961 bbpos_cmp(pos, end) > 0) 962 return 0; 963 964 k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0); 965 ret = bkey_err(k); 966 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 967 return 0; 968 if (ret) 969 return ret; 970 971 if (!k.k) { 972 ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); 973 if (ret) 974 goto out; 975 976 if (fsck_err(trans, backpointer_to_missing_ptr, 977 "backpointer for missing %s\n %s", 978 bp.v->level ? "btree node" : "extent", 979 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 980 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); 981 goto out; 982 } 983 } 984 out: 985 fsck_err: 986 bch2_trans_iter_exit(trans, &iter); 987 printbuf_exit(&buf); 988 return ret; 989 } 990 991 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, 992 struct bbpos start, 993 struct bbpos end) 994 { 995 struct bch_fs *c = trans->c; 996 struct bkey_buf last_flushed; 997 struct progress_indicator_state progress; 998 999 bch2_bkey_buf_init(&last_flushed); 1000 bkey_init(&last_flushed.k->k); 1001 progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers)); 1002 1003 int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 1004 POS_MIN, BTREE_ITER_prefetch, k, 1005 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 1006 progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); 1007 check_one_backpointer(trans, start, end, 1008 bkey_s_c_to_backpointer(k), 1009 &last_flushed); 1010 })); 1011 1012 bch2_bkey_buf_exit(&last_flushed, c); 1013 return ret; 1014 } 1015 1016 int bch2_check_backpointers_to_extents(struct bch_fs *c) 1017 { 1018 struct btree_trans *trans = bch2_trans_get(c); 1019 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; 1020 int ret; 1021 1022 while (1) { 1023 ret = bch2_get_btree_in_memory_pos(trans, 1024 BIT_ULL(BTREE_ID_extents)| 1025 BIT_ULL(BTREE_ID_reflink), 1026 ~0, 1027 start, &end); 1028 if (ret) 1029 break; 1030 1031 if (!bbpos_cmp(start, BBPOS_MIN) && 1032 bbpos_cmp(end, BBPOS_MAX)) 1033 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass", 1034 __func__, btree_nodes_fit_in_ram(c)); 1035 1036 if (bbpos_cmp(start, BBPOS_MIN) || 1037 bbpos_cmp(end, BBPOS_MAX)) { 1038 struct printbuf buf = PRINTBUF; 1039 1040 prt_str(&buf, "check_backpointers_to_extents(): "); 1041 bch2_bbpos_to_text(&buf, start); 1042 prt_str(&buf, "-"); 1043 bch2_bbpos_to_text(&buf, end); 1044 1045 bch_verbose(c, "%s", buf.buf); 1046 printbuf_exit(&buf); 1047 } 1048 1049 ret = bch2_check_backpointers_to_extents_pass(trans, start, end); 1050 if (ret || !bbpos_cmp(end, BBPOS_MAX)) 1051 break; 1052 1053 start = bbpos_successor(end); 1054 } 1055 bch2_trans_put(trans); 1056 1057 bch2_btree_cache_unpin(c); 1058 1059 bch_err_fn(c, ret); 1060 return ret; 1061 } 1062