1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "bbpos.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "bkey_buf.h" 7 #include "btree_cache.h" 8 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_write_buffer.h" 11 #include "checksum.h" 12 #include "disk_accounting.h" 13 #include "error.h" 14 15 #include <linux/mm.h> 16 17 static bool extent_matches_bp(struct bch_fs *c, 18 enum btree_id btree_id, unsigned level, 19 struct bkey_s_c k, 20 struct bpos bucket, 21 struct bch_backpointer bp) 22 { 23 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 24 const union bch_extent_entry *entry; 25 struct extent_ptr_decoded p; 26 27 rcu_read_lock(); 28 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 29 struct bpos bucket2; 30 struct bch_backpointer bp2; 31 32 if (p.ptr.cached) 33 continue; 34 35 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 36 if (!ca) 37 continue; 38 39 bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); 40 if (bpos_eq(bucket, bucket2) && 41 !memcmp(&bp, &bp2, sizeof(bp))) { 42 rcu_read_unlock(); 43 return true; 44 } 45 } 46 rcu_read_unlock(); 47 48 return false; 49 } 50 51 int bch2_backpointer_validate(struct bch_fs *c, struct bkey_s_c k, 52 enum bch_validate_flags flags) 53 { 54 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 55 int ret = 0; 56 57 bkey_fsck_err_on(bp.v->level > BTREE_MAX_DEPTH, 58 c, backpointer_level_bad, 59 "backpointer level bad: %u >= %u", 60 bp.v->level, BTREE_MAX_DEPTH); 61 62 rcu_read_lock(); 63 struct bch_dev *ca = bch2_dev_rcu_noerror(c, bp.k->p.inode); 64 if (!ca) { 65 /* these will be caught by fsck */ 66 rcu_read_unlock(); 67 return 0; 68 } 69 70 struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); 71 struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); 72 rcu_read_unlock(); 73 74 bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || 75 !bpos_eq(bp.k->p, bp_pos), 76 c, backpointer_bucket_offset_wrong, 77 "backpointer bucket_offset wrong"); 78 fsck_err: 79 return ret; 80 } 81 82 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) 83 { 84 prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", 85 bch2_btree_id_str(bp->btree_id), 86 bp->level, 87 (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), 88 (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), 89 bp->bucket_len); 90 bch2_bpos_to_text(out, bp->pos); 91 } 92 93 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 94 { 95 rcu_read_lock(); 96 struct bch_dev *ca = bch2_dev_rcu_noerror(c, k.k->p.inode); 97 if (ca) { 98 struct bpos bucket = bp_pos_to_bucket(ca, k.k->p); 99 rcu_read_unlock(); 100 prt_str(out, "bucket="); 101 bch2_bpos_to_text(out, bucket); 102 prt_str(out, " "); 103 } else { 104 rcu_read_unlock(); 105 } 106 107 bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); 108 } 109 110 void bch2_backpointer_swab(struct bkey_s k) 111 { 112 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); 113 114 bp.v->bucket_offset = swab40(bp.v->bucket_offset); 115 bp.v->bucket_len = swab32(bp.v->bucket_len); 116 bch2_bpos_swab(&bp.v->pos); 117 } 118 119 static noinline int backpointer_mod_err(struct btree_trans *trans, 120 struct bch_backpointer bp, 121 struct bkey_s_c bp_k, 122 struct bkey_s_c orig_k, 123 bool insert) 124 { 125 struct bch_fs *c = trans->c; 126 struct printbuf buf = PRINTBUF; 127 128 if (insert) { 129 prt_printf(&buf, "existing backpointer found when inserting "); 130 bch2_backpointer_to_text(&buf, &bp); 131 prt_newline(&buf); 132 printbuf_indent_add(&buf, 2); 133 134 prt_printf(&buf, "found "); 135 bch2_bkey_val_to_text(&buf, c, bp_k); 136 prt_newline(&buf); 137 138 prt_printf(&buf, "for "); 139 bch2_bkey_val_to_text(&buf, c, orig_k); 140 141 bch_err(c, "%s", buf.buf); 142 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 143 prt_printf(&buf, "backpointer not found when deleting\n"); 144 printbuf_indent_add(&buf, 2); 145 146 prt_printf(&buf, "searching for "); 147 bch2_backpointer_to_text(&buf, &bp); 148 prt_newline(&buf); 149 150 prt_printf(&buf, "got "); 151 bch2_bkey_val_to_text(&buf, c, bp_k); 152 prt_newline(&buf); 153 154 prt_printf(&buf, "for "); 155 bch2_bkey_val_to_text(&buf, c, orig_k); 156 157 bch_err(c, "%s", buf.buf); 158 } 159 160 printbuf_exit(&buf); 161 162 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 163 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; 164 } else { 165 return 0; 166 } 167 } 168 169 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, 170 struct bch_dev *ca, 171 struct bpos bucket, 172 struct bch_backpointer bp, 173 struct bkey_s_c orig_k, 174 bool insert) 175 { 176 struct btree_iter bp_iter; 177 struct bkey_s_c k; 178 struct bkey_i_backpointer *bp_k; 179 int ret; 180 181 bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); 182 ret = PTR_ERR_OR_ZERO(bp_k); 183 if (ret) 184 return ret; 185 186 bkey_backpointer_init(&bp_k->k_i); 187 bp_k->k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); 188 bp_k->v = bp; 189 190 if (!insert) { 191 bp_k->k.type = KEY_TYPE_deleted; 192 set_bkey_val_u64s(&bp_k->k, 0); 193 } 194 195 k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 196 bp_k->k.p, 197 BTREE_ITER_intent| 198 BTREE_ITER_slots| 199 BTREE_ITER_with_updates); 200 ret = bkey_err(k); 201 if (ret) 202 goto err; 203 204 if (insert 205 ? k.k->type 206 : (k.k->type != KEY_TYPE_backpointer || 207 memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { 208 ret = backpointer_mod_err(trans, bp, k, orig_k, insert); 209 if (ret) 210 goto err; 211 } 212 213 ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); 214 err: 215 bch2_trans_iter_exit(trans, &bp_iter); 216 return ret; 217 } 218 219 /* 220 * Find the next backpointer >= *bp_offset: 221 */ 222 int bch2_get_next_backpointer(struct btree_trans *trans, 223 struct bch_dev *ca, 224 struct bpos bucket, int gen, 225 struct bpos *bp_pos, 226 struct bch_backpointer *bp, 227 unsigned iter_flags) 228 { 229 struct bpos bp_end_pos = bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0); 230 struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL }; 231 struct bkey_s_c k; 232 int ret = 0; 233 234 if (bpos_ge(*bp_pos, bp_end_pos)) 235 goto done; 236 237 if (gen >= 0) { 238 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, 239 bucket, BTREE_ITER_cached|iter_flags); 240 ret = bkey_err(k); 241 if (ret) 242 goto out; 243 244 if (k.k->type != KEY_TYPE_alloc_v4 || 245 bkey_s_c_to_alloc_v4(k).v->gen != gen) 246 goto done; 247 } 248 249 *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(ca, bucket, 0)); 250 251 for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, 252 *bp_pos, iter_flags, k, ret) { 253 if (bpos_ge(k.k->p, bp_end_pos)) 254 break; 255 256 *bp_pos = k.k->p; 257 *bp = *bkey_s_c_to_backpointer(k).v; 258 goto out; 259 } 260 done: 261 *bp_pos = SPOS_MAX; 262 out: 263 bch2_trans_iter_exit(trans, &bp_iter); 264 bch2_trans_iter_exit(trans, &alloc_iter); 265 return ret; 266 } 267 268 static void backpointer_not_found(struct btree_trans *trans, 269 struct bpos bp_pos, 270 struct bch_backpointer bp, 271 struct bkey_s_c k) 272 { 273 struct bch_fs *c = trans->c; 274 struct printbuf buf = PRINTBUF; 275 276 /* 277 * If we're using the btree write buffer, the backpointer we were 278 * looking at may have already been deleted - failure to find what it 279 * pointed to is not an error: 280 */ 281 if (likely(!bch2_backpointers_no_use_write_buffer)) 282 return; 283 284 struct bpos bucket; 285 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 286 return; 287 288 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", 289 bp.level ? "btree node" : "extent"); 290 prt_printf(&buf, "bucket: "); 291 bch2_bpos_to_text(&buf, bucket); 292 prt_printf(&buf, "\n "); 293 294 prt_printf(&buf, "backpointer pos: "); 295 bch2_bpos_to_text(&buf, bp_pos); 296 prt_printf(&buf, "\n "); 297 298 bch2_backpointer_to_text(&buf, &bp); 299 prt_printf(&buf, "\n "); 300 bch2_bkey_val_to_text(&buf, c, k); 301 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) 302 bch_err_ratelimited(c, "%s", buf.buf); 303 else 304 bch2_trans_inconsistent(trans, "%s", buf.buf); 305 306 printbuf_exit(&buf); 307 } 308 309 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 310 struct btree_iter *iter, 311 struct bpos bp_pos, 312 struct bch_backpointer bp, 313 unsigned iter_flags) 314 { 315 if (likely(!bp.level)) { 316 struct bch_fs *c = trans->c; 317 318 struct bpos bucket; 319 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 320 return bkey_s_c_err(-EIO); 321 322 bch2_trans_node_iter_init(trans, iter, 323 bp.btree_id, 324 bp.pos, 325 0, 0, 326 iter_flags); 327 struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); 328 if (bkey_err(k)) { 329 bch2_trans_iter_exit(trans, iter); 330 return k; 331 } 332 333 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) 334 return k; 335 336 bch2_trans_iter_exit(trans, iter); 337 backpointer_not_found(trans, bp_pos, bp, k); 338 return bkey_s_c_null; 339 } else { 340 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); 341 342 if (IS_ERR_OR_NULL(b)) { 343 bch2_trans_iter_exit(trans, iter); 344 return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; 345 } 346 return bkey_i_to_s_c(&b->key); 347 } 348 } 349 350 struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 351 struct btree_iter *iter, 352 struct bpos bp_pos, 353 struct bch_backpointer bp) 354 { 355 struct bch_fs *c = trans->c; 356 357 BUG_ON(!bp.level); 358 359 struct bpos bucket; 360 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 361 return ERR_PTR(-EIO); 362 363 bch2_trans_node_iter_init(trans, iter, 364 bp.btree_id, 365 bp.pos, 366 0, 367 bp.level - 1, 368 0); 369 struct btree *b = bch2_btree_iter_peek_node(iter); 370 if (IS_ERR_OR_NULL(b)) 371 goto err; 372 373 BUG_ON(b->c.level != bp.level - 1); 374 375 if (extent_matches_bp(c, bp.btree_id, bp.level, 376 bkey_i_to_s_c(&b->key), 377 bucket, bp)) 378 return b; 379 380 if (btree_node_will_make_reachable(b)) { 381 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); 382 } else { 383 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); 384 b = NULL; 385 } 386 err: 387 bch2_trans_iter_exit(trans, iter); 388 return b; 389 } 390 391 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, 392 struct bkey_s_c k) 393 { 394 struct bch_fs *c = trans->c; 395 struct btree_iter alloc_iter = { NULL }; 396 struct bkey_s_c alloc_k; 397 struct printbuf buf = PRINTBUF; 398 int ret = 0; 399 400 struct bpos bucket; 401 if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { 402 if (fsck_err(trans, backpointer_to_missing_device, 403 "backpointer for missing device:\n%s", 404 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 405 ret = bch2_btree_delete_at(trans, bp_iter, 0); 406 goto out; 407 } 408 409 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0); 410 ret = bkey_err(alloc_k); 411 if (ret) 412 goto out; 413 414 if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, 415 trans, backpointer_to_missing_alloc, 416 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", 417 alloc_iter.pos.inode, alloc_iter.pos.offset, 418 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 419 ret = bch2_btree_delete_at(trans, bp_iter, 0); 420 goto out; 421 } 422 out: 423 fsck_err: 424 bch2_trans_iter_exit(trans, &alloc_iter); 425 printbuf_exit(&buf); 426 return ret; 427 } 428 429 /* verify that every backpointer has a corresponding alloc key */ 430 int bch2_check_btree_backpointers(struct bch_fs *c) 431 { 432 int ret = bch2_trans_run(c, 433 for_each_btree_key_commit(trans, iter, 434 BTREE_ID_backpointers, POS_MIN, 0, k, 435 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 436 bch2_check_btree_backpointer(trans, &iter, k))); 437 bch_err_fn(c, ret); 438 return ret; 439 } 440 441 struct extents_to_bp_state { 442 struct bpos bucket_start; 443 struct bpos bucket_end; 444 struct bkey_buf last_flushed; 445 }; 446 447 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, 448 struct bkey_s_c extent, unsigned dev) 449 { 450 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); 451 int ret = PTR_ERR_OR_ZERO(n); 452 if (ret) 453 return ret; 454 455 bch2_bkey_drop_device(bkey_i_to_s(n), dev); 456 return bch2_btree_insert_trans(trans, btree, n, 0); 457 } 458 459 static int check_extent_checksum(struct btree_trans *trans, 460 enum btree_id btree, struct bkey_s_c extent, 461 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) 462 { 463 struct bch_fs *c = trans->c; 464 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); 465 const union bch_extent_entry *entry; 466 struct extent_ptr_decoded p; 467 struct printbuf buf = PRINTBUF; 468 void *data_buf = NULL; 469 struct bio *bio = NULL; 470 size_t bytes; 471 int ret = 0; 472 473 if (bkey_is_btree_ptr(extent.k)) 474 return false; 475 476 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) 477 if (p.ptr.dev == dev) 478 goto found; 479 BUG(); 480 found: 481 if (!p.crc.csum_type) 482 return false; 483 484 bytes = p.crc.compressed_size << 9; 485 486 struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ); 487 if (!ca) 488 return false; 489 490 data_buf = kvmalloc(bytes, GFP_KERNEL); 491 if (!data_buf) { 492 ret = -ENOMEM; 493 goto err; 494 } 495 496 bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); 497 bio->bi_iter.bi_sector = p.ptr.offset; 498 bch2_bio_map(bio, data_buf, bytes); 499 ret = submit_bio_wait(bio); 500 if (ret) 501 goto err; 502 503 prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); 504 prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); 505 bch2_bkey_val_to_text(&buf, c, extent); 506 prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); 507 bch2_bkey_val_to_text(&buf, c, extent2); 508 509 struct nonce nonce = extent_nonce(extent.k->bversion, p.crc); 510 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); 511 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), 512 trans, dup_backpointer_to_bad_csum_extent, 513 "%s", buf.buf)) 514 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; 515 fsck_err: 516 err: 517 if (bio) 518 bio_put(bio); 519 kvfree(data_buf); 520 percpu_ref_put(&ca->io_ref); 521 printbuf_exit(&buf); 522 return ret; 523 } 524 525 static int check_bp_exists(struct btree_trans *trans, 526 struct extents_to_bp_state *s, 527 struct bpos bucket, 528 struct bch_backpointer bp, 529 struct bkey_s_c orig_k) 530 { 531 struct bch_fs *c = trans->c; 532 struct btree_iter bp_iter = {}; 533 struct btree_iter other_extent_iter = {}; 534 struct printbuf buf = PRINTBUF; 535 struct bkey_s_c bp_k; 536 int ret = 0; 537 538 struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); 539 if (!ca) { 540 prt_str(&buf, "extent for nonexistent device:bucket "); 541 bch2_bpos_to_text(&buf, bucket); 542 prt_str(&buf, "\n "); 543 bch2_bkey_val_to_text(&buf, c, orig_k); 544 bch_err(c, "%s", buf.buf); 545 ret = -BCH_ERR_fsck_repair_unimplemented; 546 goto err; 547 } 548 549 if (bpos_lt(bucket, s->bucket_start) || 550 bpos_gt(bucket, s->bucket_end)) 551 goto out; 552 553 bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 554 bucket_pos_to_bp(ca, bucket, bp.bucket_offset), 555 0); 556 ret = bkey_err(bp_k); 557 if (ret) 558 goto err; 559 560 if (bp_k.k->type != KEY_TYPE_backpointer || 561 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { 562 ret = bch2_btree_write_buffer_maybe_flush(trans, orig_k, &s->last_flushed); 563 if (ret) 564 goto err; 565 566 goto check_existing_bp; 567 } 568 out: 569 err: 570 fsck_err: 571 bch2_trans_iter_exit(trans, &other_extent_iter); 572 bch2_trans_iter_exit(trans, &bp_iter); 573 bch2_dev_put(ca); 574 printbuf_exit(&buf); 575 return ret; 576 check_existing_bp: 577 /* Do we have a backpointer for a different extent? */ 578 if (bp_k.k->type != KEY_TYPE_backpointer) 579 goto missing; 580 581 struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; 582 583 struct bkey_s_c other_extent = 584 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); 585 ret = bkey_err(other_extent); 586 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 587 ret = 0; 588 if (ret) 589 goto err; 590 591 if (!other_extent.k) 592 goto missing; 593 594 if (bch2_extents_match(orig_k, other_extent)) { 595 printbuf_reset(&buf); 596 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); 597 bch2_bkey_val_to_text(&buf, c, orig_k); 598 prt_str(&buf, "\n "); 599 bch2_bkey_val_to_text(&buf, c, other_extent); 600 bch_err(c, "%s", buf.buf); 601 602 if (other_extent.k->size <= orig_k.k->size) { 603 ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); 604 if (ret) 605 goto err; 606 goto out; 607 } else { 608 ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); 609 if (ret) 610 goto err; 611 goto missing; 612 } 613 } 614 615 ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); 616 if (ret < 0) 617 goto err; 618 if (ret) { 619 ret = 0; 620 goto missing; 621 } 622 623 ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); 624 if (ret < 0) 625 goto err; 626 if (ret) { 627 ret = 0; 628 goto out; 629 } 630 631 printbuf_reset(&buf); 632 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); 633 bch2_bkey_val_to_text(&buf, c, orig_k); 634 prt_str(&buf, "\n "); 635 bch2_bkey_val_to_text(&buf, c, other_extent); 636 bch_err(c, "%s", buf.buf); 637 ret = -BCH_ERR_fsck_repair_unimplemented; 638 goto err; 639 missing: 640 printbuf_reset(&buf); 641 prt_printf(&buf, "missing backpointer for btree=%s l=%u ", 642 bch2_btree_id_str(bp.btree_id), bp.level); 643 bch2_bkey_val_to_text(&buf, c, orig_k); 644 prt_printf(&buf, "\n got: "); 645 bch2_bkey_val_to_text(&buf, c, bp_k); 646 647 struct bkey_i_backpointer n_bp_k; 648 bkey_backpointer_init(&n_bp_k.k_i); 649 n_bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); 650 n_bp_k.v = bp; 651 prt_printf(&buf, "\n want: "); 652 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); 653 654 if (fsck_err(trans, ptr_to_missing_backpointer, "%s", buf.buf)) 655 ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, orig_k, true); 656 657 goto out; 658 } 659 660 static int check_extent_to_backpointers(struct btree_trans *trans, 661 struct extents_to_bp_state *s, 662 enum btree_id btree, unsigned level, 663 struct bkey_s_c k) 664 { 665 struct bch_fs *c = trans->c; 666 struct bkey_ptrs_c ptrs; 667 const union bch_extent_entry *entry; 668 struct extent_ptr_decoded p; 669 int ret; 670 671 ptrs = bch2_bkey_ptrs_c(k); 672 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 673 struct bpos bucket_pos = POS_MIN; 674 struct bch_backpointer bp; 675 676 if (p.ptr.cached) 677 continue; 678 679 rcu_read_lock(); 680 struct bch_dev *ca = bch2_dev_rcu_noerror(c, p.ptr.dev); 681 if (ca) 682 bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp); 683 rcu_read_unlock(); 684 685 if (!ca) 686 continue; 687 688 ret = check_bp_exists(trans, s, bucket_pos, bp, k); 689 if (ret) 690 return ret; 691 } 692 693 return 0; 694 } 695 696 static int check_btree_root_to_backpointers(struct btree_trans *trans, 697 struct extents_to_bp_state *s, 698 enum btree_id btree_id, 699 int *level) 700 { 701 struct bch_fs *c = trans->c; 702 struct btree_iter iter; 703 struct btree *b; 704 struct bkey_s_c k; 705 int ret; 706 retry: 707 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 708 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); 709 b = bch2_btree_iter_peek_node(&iter); 710 ret = PTR_ERR_OR_ZERO(b); 711 if (ret) 712 goto err; 713 714 if (b != btree_node_root(c, b)) { 715 bch2_trans_iter_exit(trans, &iter); 716 goto retry; 717 } 718 719 *level = b->c.level; 720 721 k = bkey_i_to_s_c(&b->key); 722 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); 723 err: 724 bch2_trans_iter_exit(trans, &iter); 725 return ret; 726 } 727 728 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) 729 { 730 return (struct bbpos) { 731 .btree = bp.btree_id, 732 .pos = bp.pos, 733 }; 734 } 735 736 static u64 mem_may_pin_bytes(struct bch_fs *c) 737 { 738 struct sysinfo i; 739 si_meminfo(&i); 740 741 u64 mem_bytes = i.totalram * i.mem_unit; 742 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100); 743 } 744 745 static size_t btree_nodes_fit_in_ram(struct bch_fs *c) 746 { 747 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size); 748 } 749 750 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, 751 u64 btree_leaf_mask, 752 u64 btree_interior_mask, 753 struct bbpos start, struct bbpos *end) 754 { 755 struct bch_fs *c = trans->c; 756 s64 mem_may_pin = mem_may_pin_bytes(c); 757 int ret = 0; 758 759 bch2_btree_cache_unpin(c); 760 761 btree_interior_mask |= btree_leaf_mask; 762 763 c->btree_cache.pinned_nodes_mask[0] = btree_leaf_mask; 764 c->btree_cache.pinned_nodes_mask[1] = btree_interior_mask; 765 c->btree_cache.pinned_nodes_start = start; 766 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX; 767 768 for (enum btree_id btree = start.btree; 769 btree < BTREE_ID_NR && !ret; 770 btree++) { 771 unsigned depth = (BIT_ULL(btree) & btree_leaf_mask) ? 0 : 1; 772 773 if (!(BIT_ULL(btree) & btree_leaf_mask) && 774 !(BIT_ULL(btree) & btree_interior_mask)) 775 continue; 776 777 ret = __for_each_btree_node(trans, iter, btree, 778 btree == start.btree ? start.pos : POS_MIN, 779 0, depth, BTREE_ITER_prefetch, b, ({ 780 mem_may_pin -= btree_buf_bytes(b); 781 if (mem_may_pin <= 0) { 782 c->btree_cache.pinned_nodes_end = *end = 783 BBPOS(btree, b->key.k.p); 784 break; 785 } 786 bch2_node_pin(c, b); 787 0; 788 })); 789 } 790 791 return ret; 792 } 793 794 struct progress_indicator_state { 795 unsigned long next_print; 796 u64 nodes_seen; 797 u64 nodes_total; 798 struct btree *last_node; 799 }; 800 801 static inline void progress_init(struct progress_indicator_state *s, 802 struct bch_fs *c, 803 u64 btree_id_mask) 804 { 805 memset(s, 0, sizeof(*s)); 806 807 s->next_print = jiffies + HZ * 10; 808 809 for (unsigned i = 0; i < BTREE_ID_NR; i++) { 810 if (!(btree_id_mask & BIT_ULL(i))) 811 continue; 812 813 struct disk_accounting_pos acc = { 814 .type = BCH_DISK_ACCOUNTING_btree, 815 .btree.id = i, 816 }; 817 818 u64 v; 819 bch2_accounting_mem_read(c, disk_accounting_pos_to_bpos(&acc), &v, 1); 820 s->nodes_total += div64_ul(v, btree_sectors(c)); 821 } 822 } 823 824 static inline bool progress_update_p(struct progress_indicator_state *s) 825 { 826 bool ret = time_after_eq(jiffies, s->next_print); 827 828 if (ret) 829 s->next_print = jiffies + HZ * 10; 830 return ret; 831 } 832 833 static void progress_update_iter(struct btree_trans *trans, 834 struct progress_indicator_state *s, 835 struct btree_iter *iter, 836 const char *msg) 837 { 838 struct bch_fs *c = trans->c; 839 struct btree *b = path_l(btree_iter_path(trans, iter))->b; 840 841 s->nodes_seen += b != s->last_node; 842 s->last_node = b; 843 844 if (progress_update_p(s)) { 845 struct printbuf buf = PRINTBUF; 846 unsigned percent = s->nodes_total 847 ? div64_u64(s->nodes_seen * 100, s->nodes_total) 848 : 0; 849 850 prt_printf(&buf, "%s: %d%%, done %llu/%llu nodes, at ", 851 msg, percent, s->nodes_seen, s->nodes_total); 852 bch2_bbpos_to_text(&buf, BBPOS(iter->btree_id, iter->pos)); 853 854 bch_info(c, "%s", buf.buf); 855 printbuf_exit(&buf); 856 } 857 } 858 859 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, 860 struct extents_to_bp_state *s) 861 { 862 struct bch_fs *c = trans->c; 863 struct progress_indicator_state progress; 864 int ret = 0; 865 866 progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_extents)|BIT_ULL(BTREE_ID_reflink)); 867 868 for (enum btree_id btree_id = 0; 869 btree_id < btree_id_nr_alive(c); 870 btree_id++) { 871 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; 872 873 ret = commit_do(trans, NULL, NULL, 874 BCH_TRANS_COMMIT_no_enospc, 875 check_btree_root_to_backpointers(trans, s, btree_id, &level)); 876 if (ret) 877 return ret; 878 879 while (level >= depth) { 880 struct btree_iter iter; 881 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level, 882 BTREE_ITER_prefetch); 883 884 ret = for_each_btree_key_continue(trans, iter, 0, k, ({ 885 progress_update_iter(trans, &progress, &iter, "extents_to_backpointers"); 886 check_extent_to_backpointers(trans, s, btree_id, level, k) ?: 887 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 888 })); 889 if (ret) 890 return ret; 891 892 --level; 893 } 894 } 895 896 return 0; 897 } 898 899 int bch2_check_extents_to_backpointers(struct bch_fs *c) 900 { 901 struct btree_trans *trans = bch2_trans_get(c); 902 struct extents_to_bp_state s = { .bucket_start = POS_MIN }; 903 int ret; 904 905 bch2_bkey_buf_init(&s.last_flushed); 906 bkey_init(&s.last_flushed.k->k); 907 908 while (1) { 909 struct bbpos end; 910 ret = bch2_get_btree_in_memory_pos(trans, 911 BIT_ULL(BTREE_ID_backpointers), 912 BIT_ULL(BTREE_ID_backpointers), 913 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end); 914 if (ret) 915 break; 916 917 s.bucket_end = end.pos; 918 919 if ( bpos_eq(s.bucket_start, POS_MIN) && 920 !bpos_eq(s.bucket_end, SPOS_MAX)) 921 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", 922 __func__, btree_nodes_fit_in_ram(c)); 923 924 if (!bpos_eq(s.bucket_start, POS_MIN) || 925 !bpos_eq(s.bucket_end, SPOS_MAX)) { 926 struct printbuf buf = PRINTBUF; 927 928 prt_str(&buf, "check_extents_to_backpointers(): "); 929 bch2_bpos_to_text(&buf, s.bucket_start); 930 prt_str(&buf, "-"); 931 bch2_bpos_to_text(&buf, s.bucket_end); 932 933 bch_verbose(c, "%s", buf.buf); 934 printbuf_exit(&buf); 935 } 936 937 ret = bch2_check_extents_to_backpointers_pass(trans, &s); 938 if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) 939 break; 940 941 s.bucket_start = bpos_successor(s.bucket_end); 942 } 943 bch2_trans_put(trans); 944 bch2_bkey_buf_exit(&s.last_flushed, c); 945 946 bch2_btree_cache_unpin(c); 947 948 bch_err_fn(c, ret); 949 return ret; 950 } 951 952 static int check_one_backpointer(struct btree_trans *trans, 953 struct bbpos start, 954 struct bbpos end, 955 struct bkey_s_c bp_k, 956 struct bkey_buf *last_flushed) 957 { 958 if (bp_k.k->type != KEY_TYPE_backpointer) 959 return 0; 960 961 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(bp_k); 962 struct bch_fs *c = trans->c; 963 struct btree_iter iter; 964 struct bbpos pos = bp_to_bbpos(*bp.v); 965 struct bkey_s_c k; 966 struct printbuf buf = PRINTBUF; 967 int ret; 968 969 if (bbpos_cmp(pos, start) < 0 || 970 bbpos_cmp(pos, end) > 0) 971 return 0; 972 973 k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0); 974 ret = bkey_err(k); 975 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 976 return 0; 977 if (ret) 978 return ret; 979 980 if (!k.k) { 981 ret = bch2_btree_write_buffer_maybe_flush(trans, bp.s_c, last_flushed); 982 if (ret) 983 goto out; 984 985 if (fsck_err(trans, backpointer_to_missing_ptr, 986 "backpointer for missing %s\n %s", 987 bp.v->level ? "btree node" : "extent", 988 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 989 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); 990 goto out; 991 } 992 } 993 out: 994 fsck_err: 995 bch2_trans_iter_exit(trans, &iter); 996 printbuf_exit(&buf); 997 return ret; 998 } 999 1000 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, 1001 struct bbpos start, 1002 struct bbpos end) 1003 { 1004 struct bch_fs *c = trans->c; 1005 struct bkey_buf last_flushed; 1006 struct progress_indicator_state progress; 1007 1008 bch2_bkey_buf_init(&last_flushed); 1009 bkey_init(&last_flushed.k->k); 1010 progress_init(&progress, trans->c, BIT_ULL(BTREE_ID_backpointers)); 1011 1012 int ret = for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 1013 POS_MIN, BTREE_ITER_prefetch, k, 1014 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 1015 progress_update_iter(trans, &progress, &iter, "backpointers_to_extents"); 1016 check_one_backpointer(trans, start, end, k, &last_flushed); 1017 })); 1018 1019 bch2_bkey_buf_exit(&last_flushed, c); 1020 return ret; 1021 } 1022 1023 int bch2_check_backpointers_to_extents(struct bch_fs *c) 1024 { 1025 struct btree_trans *trans = bch2_trans_get(c); 1026 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; 1027 int ret; 1028 1029 while (1) { 1030 ret = bch2_get_btree_in_memory_pos(trans, 1031 BIT_ULL(BTREE_ID_extents)| 1032 BIT_ULL(BTREE_ID_reflink), 1033 ~0, 1034 start, &end); 1035 if (ret) 1036 break; 1037 1038 if (!bbpos_cmp(start, BBPOS_MIN) && 1039 bbpos_cmp(end, BBPOS_MAX)) 1040 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass", 1041 __func__, btree_nodes_fit_in_ram(c)); 1042 1043 if (bbpos_cmp(start, BBPOS_MIN) || 1044 bbpos_cmp(end, BBPOS_MAX)) { 1045 struct printbuf buf = PRINTBUF; 1046 1047 prt_str(&buf, "check_backpointers_to_extents(): "); 1048 bch2_bbpos_to_text(&buf, start); 1049 prt_str(&buf, "-"); 1050 bch2_bbpos_to_text(&buf, end); 1051 1052 bch_verbose(c, "%s", buf.buf); 1053 printbuf_exit(&buf); 1054 } 1055 1056 ret = bch2_check_backpointers_to_extents_pass(trans, start, end); 1057 if (ret || !bbpos_cmp(end, BBPOS_MAX)) 1058 break; 1059 1060 start = bbpos_successor(end); 1061 } 1062 bch2_trans_put(trans); 1063 1064 bch2_btree_cache_unpin(c); 1065 1066 bch_err_fn(c, ret); 1067 return ret; 1068 } 1069