1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "bbpos.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "bkey_buf.h" 7 #include "btree_cache.h" 8 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_write_buffer.h" 11 #include "checksum.h" 12 #include "error.h" 13 14 #include <linux/mm.h> 15 16 static bool extent_matches_bp(struct bch_fs *c, 17 enum btree_id btree_id, unsigned level, 18 struct bkey_s_c k, 19 struct bpos bucket, 20 struct bch_backpointer bp) 21 { 22 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 23 const union bch_extent_entry *entry; 24 struct extent_ptr_decoded p; 25 26 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 27 struct bpos bucket2; 28 struct bch_backpointer bp2; 29 30 if (p.ptr.cached) 31 continue; 32 33 bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2); 34 if (bpos_eq(bucket, bucket2) && 35 !memcmp(&bp, &bp2, sizeof(bp))) 36 return true; 37 } 38 39 return false; 40 } 41 42 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, 43 enum bkey_invalid_flags flags, 44 struct printbuf *err) 45 { 46 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 47 48 /* these will be caught by fsck */ 49 if (!bch2_dev_exists2(c, bp.k->p.inode)) 50 return 0; 51 52 struct bpos bucket = bp_pos_to_bucket(c, bp.k->p); 53 int ret = 0; 54 55 bkey_fsck_err_on(!bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), 56 c, err, 57 backpointer_pos_wrong, 58 "backpointer at wrong pos"); 59 fsck_err: 60 return ret; 61 } 62 63 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) 64 { 65 prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", 66 bch2_btree_id_str(bp->btree_id), 67 bp->level, 68 (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), 69 (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), 70 bp->bucket_len); 71 bch2_bpos_to_text(out, bp->pos); 72 } 73 74 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 75 { 76 if (bch2_dev_exists2(c, k.k->p.inode)) { 77 prt_str(out, "bucket="); 78 bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p)); 79 prt_str(out, " "); 80 } 81 82 bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); 83 } 84 85 void bch2_backpointer_swab(struct bkey_s k) 86 { 87 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); 88 89 bp.v->bucket_offset = swab40(bp.v->bucket_offset); 90 bp.v->bucket_len = swab32(bp.v->bucket_len); 91 bch2_bpos_swab(&bp.v->pos); 92 } 93 94 static noinline int backpointer_mod_err(struct btree_trans *trans, 95 struct bch_backpointer bp, 96 struct bkey_s_c bp_k, 97 struct bkey_s_c orig_k, 98 bool insert) 99 { 100 struct bch_fs *c = trans->c; 101 struct printbuf buf = PRINTBUF; 102 103 if (insert) { 104 prt_printf(&buf, "existing backpointer found when inserting "); 105 bch2_backpointer_to_text(&buf, &bp); 106 prt_newline(&buf); 107 printbuf_indent_add(&buf, 2); 108 109 prt_printf(&buf, "found "); 110 bch2_bkey_val_to_text(&buf, c, bp_k); 111 prt_newline(&buf); 112 113 prt_printf(&buf, "for "); 114 bch2_bkey_val_to_text(&buf, c, orig_k); 115 116 bch_err(c, "%s", buf.buf); 117 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 118 prt_printf(&buf, "backpointer not found when deleting"); 119 prt_newline(&buf); 120 printbuf_indent_add(&buf, 2); 121 122 prt_printf(&buf, "searching for "); 123 bch2_backpointer_to_text(&buf, &bp); 124 prt_newline(&buf); 125 126 prt_printf(&buf, "got "); 127 bch2_bkey_val_to_text(&buf, c, bp_k); 128 prt_newline(&buf); 129 130 prt_printf(&buf, "for "); 131 bch2_bkey_val_to_text(&buf, c, orig_k); 132 133 bch_err(c, "%s", buf.buf); 134 } 135 136 printbuf_exit(&buf); 137 138 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 139 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; 140 } else { 141 return 0; 142 } 143 } 144 145 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, 146 struct bpos bucket, 147 struct bch_backpointer bp, 148 struct bkey_s_c orig_k, 149 bool insert) 150 { 151 struct btree_iter bp_iter; 152 struct bkey_s_c k; 153 struct bkey_i_backpointer *bp_k; 154 int ret; 155 156 bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); 157 ret = PTR_ERR_OR_ZERO(bp_k); 158 if (ret) 159 return ret; 160 161 bkey_backpointer_init(&bp_k->k_i); 162 bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset); 163 bp_k->v = bp; 164 165 if (!insert) { 166 bp_k->k.type = KEY_TYPE_deleted; 167 set_bkey_val_u64s(&bp_k->k, 0); 168 } 169 170 k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 171 bp_k->k.p, 172 BTREE_ITER_INTENT| 173 BTREE_ITER_SLOTS| 174 BTREE_ITER_WITH_UPDATES); 175 ret = bkey_err(k); 176 if (ret) 177 goto err; 178 179 if (insert 180 ? k.k->type 181 : (k.k->type != KEY_TYPE_backpointer || 182 memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { 183 ret = backpointer_mod_err(trans, bp, k, orig_k, insert); 184 if (ret) 185 goto err; 186 } 187 188 ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); 189 err: 190 bch2_trans_iter_exit(trans, &bp_iter); 191 return ret; 192 } 193 194 /* 195 * Find the next backpointer >= *bp_offset: 196 */ 197 int bch2_get_next_backpointer(struct btree_trans *trans, 198 struct bpos bucket, int gen, 199 struct bpos *bp_pos, 200 struct bch_backpointer *bp, 201 unsigned iter_flags) 202 { 203 struct bch_fs *c = trans->c; 204 struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0); 205 struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL }; 206 struct bkey_s_c k; 207 int ret = 0; 208 209 if (bpos_ge(*bp_pos, bp_end_pos)) 210 goto done; 211 212 if (gen >= 0) { 213 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, 214 bucket, BTREE_ITER_CACHED|iter_flags); 215 ret = bkey_err(k); 216 if (ret) 217 goto out; 218 219 if (k.k->type != KEY_TYPE_alloc_v4 || 220 bkey_s_c_to_alloc_v4(k).v->gen != gen) 221 goto done; 222 } 223 224 *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0)); 225 226 for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, 227 *bp_pos, iter_flags, k, ret) { 228 if (bpos_ge(k.k->p, bp_end_pos)) 229 break; 230 231 *bp_pos = k.k->p; 232 *bp = *bkey_s_c_to_backpointer(k).v; 233 goto out; 234 } 235 done: 236 *bp_pos = SPOS_MAX; 237 out: 238 bch2_trans_iter_exit(trans, &bp_iter); 239 bch2_trans_iter_exit(trans, &alloc_iter); 240 return ret; 241 } 242 243 static void backpointer_not_found(struct btree_trans *trans, 244 struct bpos bp_pos, 245 struct bch_backpointer bp, 246 struct bkey_s_c k) 247 { 248 struct bch_fs *c = trans->c; 249 struct printbuf buf = PRINTBUF; 250 struct bpos bucket = bp_pos_to_bucket(c, bp_pos); 251 252 /* 253 * If we're using the btree write buffer, the backpointer we were 254 * looking at may have already been deleted - failure to find what it 255 * pointed to is not an error: 256 */ 257 if (likely(!bch2_backpointers_no_use_write_buffer)) 258 return; 259 260 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", 261 bp.level ? "btree node" : "extent"); 262 prt_printf(&buf, "bucket: "); 263 bch2_bpos_to_text(&buf, bucket); 264 prt_printf(&buf, "\n "); 265 266 prt_printf(&buf, "backpointer pos: "); 267 bch2_bpos_to_text(&buf, bp_pos); 268 prt_printf(&buf, "\n "); 269 270 bch2_backpointer_to_text(&buf, &bp); 271 prt_printf(&buf, "\n "); 272 bch2_bkey_val_to_text(&buf, c, k); 273 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) 274 bch_err_ratelimited(c, "%s", buf.buf); 275 else 276 bch2_trans_inconsistent(trans, "%s", buf.buf); 277 278 printbuf_exit(&buf); 279 } 280 281 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 282 struct btree_iter *iter, 283 struct bpos bp_pos, 284 struct bch_backpointer bp, 285 unsigned iter_flags) 286 { 287 if (likely(!bp.level)) { 288 struct bch_fs *c = trans->c; 289 struct bpos bucket = bp_pos_to_bucket(c, bp_pos); 290 struct bkey_s_c k; 291 292 bch2_trans_node_iter_init(trans, iter, 293 bp.btree_id, 294 bp.pos, 295 0, 0, 296 iter_flags); 297 k = bch2_btree_iter_peek_slot(iter); 298 if (bkey_err(k)) { 299 bch2_trans_iter_exit(trans, iter); 300 return k; 301 } 302 303 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) 304 return k; 305 306 bch2_trans_iter_exit(trans, iter); 307 backpointer_not_found(trans, bp_pos, bp, k); 308 return bkey_s_c_null; 309 } else { 310 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); 311 312 if (IS_ERR_OR_NULL(b)) { 313 bch2_trans_iter_exit(trans, iter); 314 return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; 315 } 316 return bkey_i_to_s_c(&b->key); 317 } 318 } 319 320 struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 321 struct btree_iter *iter, 322 struct bpos bp_pos, 323 struct bch_backpointer bp) 324 { 325 struct bch_fs *c = trans->c; 326 struct bpos bucket = bp_pos_to_bucket(c, bp_pos); 327 struct btree *b; 328 329 BUG_ON(!bp.level); 330 331 bch2_trans_node_iter_init(trans, iter, 332 bp.btree_id, 333 bp.pos, 334 0, 335 bp.level - 1, 336 0); 337 b = bch2_btree_iter_peek_node(iter); 338 if (IS_ERR_OR_NULL(b)) 339 goto err; 340 341 BUG_ON(b->c.level != bp.level - 1); 342 343 if (extent_matches_bp(c, bp.btree_id, bp.level, 344 bkey_i_to_s_c(&b->key), 345 bucket, bp)) 346 return b; 347 348 if (btree_node_will_make_reachable(b)) { 349 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); 350 } else { 351 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); 352 b = NULL; 353 } 354 err: 355 bch2_trans_iter_exit(trans, iter); 356 return b; 357 } 358 359 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, 360 struct bkey_s_c k) 361 { 362 struct bch_fs *c = trans->c; 363 struct btree_iter alloc_iter = { NULL }; 364 struct bkey_s_c alloc_k; 365 struct printbuf buf = PRINTBUF; 366 int ret = 0; 367 368 if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c, 369 backpointer_to_missing_device, 370 "backpointer for missing device:\n%s", 371 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 372 ret = bch2_btree_delete_at(trans, bp_iter, 0); 373 goto out; 374 } 375 376 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, 377 bp_pos_to_bucket(c, k.k->p), 0); 378 ret = bkey_err(alloc_k); 379 if (ret) 380 goto out; 381 382 if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c, 383 backpointer_to_missing_alloc, 384 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", 385 alloc_iter.pos.inode, alloc_iter.pos.offset, 386 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 387 ret = bch2_btree_delete_at(trans, bp_iter, 0); 388 goto out; 389 } 390 out: 391 fsck_err: 392 bch2_trans_iter_exit(trans, &alloc_iter); 393 printbuf_exit(&buf); 394 return ret; 395 } 396 397 /* verify that every backpointer has a corresponding alloc key */ 398 int bch2_check_btree_backpointers(struct bch_fs *c) 399 { 400 int ret = bch2_trans_run(c, 401 for_each_btree_key_commit(trans, iter, 402 BTREE_ID_backpointers, POS_MIN, 0, k, 403 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 404 bch2_check_btree_backpointer(trans, &iter, k))); 405 bch_err_fn(c, ret); 406 return ret; 407 } 408 409 static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) 410 { 411 return bpos_eq(l.k->p, r.k->p) && 412 bkey_bytes(l.k) == bkey_bytes(r.k) && 413 !memcmp(l.v, r.v, bkey_val_bytes(l.k)); 414 } 415 416 struct extents_to_bp_state { 417 struct bpos bucket_start; 418 struct bpos bucket_end; 419 struct bkey_buf last_flushed; 420 }; 421 422 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, 423 struct bkey_s_c extent, unsigned dev) 424 { 425 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); 426 int ret = PTR_ERR_OR_ZERO(n); 427 if (ret) 428 return ret; 429 430 bch2_bkey_drop_device(bkey_i_to_s(n), dev); 431 return bch2_btree_insert_trans(trans, btree, n, 0); 432 } 433 434 static int check_extent_checksum(struct btree_trans *trans, 435 enum btree_id btree, struct bkey_s_c extent, 436 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) 437 { 438 struct bch_fs *c = trans->c; 439 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); 440 const union bch_extent_entry *entry; 441 struct extent_ptr_decoded p; 442 struct printbuf buf = PRINTBUF; 443 void *data_buf = NULL; 444 struct bio *bio = NULL; 445 size_t bytes; 446 int ret = 0; 447 448 if (bkey_is_btree_ptr(extent.k)) 449 return false; 450 451 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) 452 if (p.ptr.dev == dev) 453 goto found; 454 BUG(); 455 found: 456 if (!p.crc.csum_type) 457 return false; 458 459 bytes = p.crc.compressed_size << 9; 460 461 struct bch_dev *ca = bch_dev_bkey_exists(c, dev); 462 if (!bch2_dev_get_ioref(ca, READ)) 463 return false; 464 465 data_buf = kvmalloc(bytes, GFP_KERNEL); 466 if (!data_buf) { 467 ret = -ENOMEM; 468 goto err; 469 } 470 471 bio = bio_alloc(ca->disk_sb.bdev, 1, REQ_OP_READ, GFP_KERNEL); 472 bio->bi_iter.bi_sector = p.ptr.offset; 473 bch2_bio_map(bio, data_buf, bytes); 474 ret = submit_bio_wait(bio); 475 if (ret) 476 goto err; 477 478 prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); 479 prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); 480 bch2_bkey_val_to_text(&buf, c, extent); 481 prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); 482 bch2_bkey_val_to_text(&buf, c, extent2); 483 484 struct nonce nonce = extent_nonce(extent.k->version, p.crc); 485 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); 486 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), 487 c, dup_backpointer_to_bad_csum_extent, 488 "%s", buf.buf)) 489 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; 490 fsck_err: 491 err: 492 if (bio) 493 bio_put(bio); 494 kvfree(data_buf); 495 percpu_ref_put(&ca->io_ref); 496 printbuf_exit(&buf); 497 return ret; 498 } 499 500 static int check_bp_exists(struct btree_trans *trans, 501 struct extents_to_bp_state *s, 502 struct bpos bucket, 503 struct bch_backpointer bp, 504 struct bkey_s_c orig_k) 505 { 506 struct bch_fs *c = trans->c; 507 struct btree_iter bp_iter = {}; 508 struct btree_iter other_extent_iter = {}; 509 struct printbuf buf = PRINTBUF; 510 struct bkey_s_c bp_k; 511 struct bkey_buf tmp; 512 int ret; 513 514 bch2_bkey_buf_init(&tmp); 515 516 if (!bch2_dev_bucket_exists(c, bucket)) { 517 prt_str(&buf, "extent for nonexistent device:bucket "); 518 bch2_bpos_to_text(&buf, bucket); 519 prt_str(&buf, "\n "); 520 bch2_bkey_val_to_text(&buf, c, orig_k); 521 bch_err(c, "%s", buf.buf); 522 return -BCH_ERR_fsck_repair_unimplemented; 523 } 524 525 if (bpos_lt(bucket, s->bucket_start) || 526 bpos_gt(bucket, s->bucket_end)) 527 return 0; 528 529 bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 530 bucket_pos_to_bp(c, bucket, bp.bucket_offset), 531 0); 532 ret = bkey_err(bp_k); 533 if (ret) 534 goto err; 535 536 if (bp_k.k->type != KEY_TYPE_backpointer || 537 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { 538 bch2_bkey_buf_reassemble(&tmp, c, orig_k); 539 540 if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) { 541 if (bp.level) { 542 bch2_trans_unlock(trans); 543 bch2_btree_interior_updates_flush(c); 544 } 545 546 ret = bch2_btree_write_buffer_flush_sync(trans); 547 if (ret) 548 goto err; 549 550 bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k); 551 ret = -BCH_ERR_transaction_restart_write_buffer_flush; 552 goto out; 553 } 554 555 goto check_existing_bp; 556 } 557 out: 558 err: 559 fsck_err: 560 bch2_trans_iter_exit(trans, &other_extent_iter); 561 bch2_trans_iter_exit(trans, &bp_iter); 562 bch2_bkey_buf_exit(&tmp, c); 563 printbuf_exit(&buf); 564 return ret; 565 check_existing_bp: 566 /* Do we have a backpointer for a different extent? */ 567 if (bp_k.k->type != KEY_TYPE_backpointer) 568 goto missing; 569 570 struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; 571 572 struct bkey_s_c other_extent = 573 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); 574 ret = bkey_err(other_extent); 575 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 576 ret = 0; 577 if (ret) 578 goto err; 579 580 if (!other_extent.k) 581 goto missing; 582 583 if (bch2_extents_match(orig_k, other_extent)) { 584 printbuf_reset(&buf); 585 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); 586 bch2_bkey_val_to_text(&buf, c, orig_k); 587 prt_str(&buf, "\n "); 588 bch2_bkey_val_to_text(&buf, c, other_extent); 589 bch_err(c, "%s", buf.buf); 590 591 if (other_extent.k->size <= orig_k.k->size) { 592 ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); 593 if (ret) 594 goto err; 595 goto out; 596 } else { 597 ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); 598 if (ret) 599 goto err; 600 goto missing; 601 } 602 } 603 604 ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); 605 if (ret < 0) 606 goto err; 607 if (ret) { 608 ret = 0; 609 goto missing; 610 } 611 612 ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); 613 if (ret < 0) 614 goto err; 615 if (ret) { 616 ret = 0; 617 goto out; 618 } 619 620 printbuf_reset(&buf); 621 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); 622 bch2_bkey_val_to_text(&buf, c, orig_k); 623 prt_str(&buf, "\n "); 624 bch2_bkey_val_to_text(&buf, c, other_extent); 625 bch_err(c, "%s", buf.buf); 626 ret = -BCH_ERR_fsck_repair_unimplemented; 627 goto err; 628 missing: 629 printbuf_reset(&buf); 630 prt_printf(&buf, "missing backpointer for btree=%s l=%u ", 631 bch2_btree_id_str(bp.btree_id), bp.level); 632 bch2_bkey_val_to_text(&buf, c, orig_k); 633 prt_printf(&buf, "\n got: "); 634 bch2_bkey_val_to_text(&buf, c, bp_k); 635 636 struct bkey_i_backpointer n_bp_k; 637 bkey_backpointer_init(&n_bp_k.k_i); 638 n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset); 639 n_bp_k.v = bp; 640 prt_printf(&buf, "\n want: "); 641 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); 642 643 if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) 644 ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); 645 646 goto out; 647 } 648 649 static int check_extent_to_backpointers(struct btree_trans *trans, 650 struct extents_to_bp_state *s, 651 enum btree_id btree, unsigned level, 652 struct bkey_s_c k) 653 { 654 struct bch_fs *c = trans->c; 655 struct bkey_ptrs_c ptrs; 656 const union bch_extent_entry *entry; 657 struct extent_ptr_decoded p; 658 int ret; 659 660 ptrs = bch2_bkey_ptrs_c(k); 661 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 662 struct bpos bucket_pos; 663 struct bch_backpointer bp; 664 665 if (p.ptr.cached) 666 continue; 667 668 bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp); 669 670 ret = check_bp_exists(trans, s, bucket_pos, bp, k); 671 if (ret) 672 return ret; 673 } 674 675 return 0; 676 } 677 678 static int check_btree_root_to_backpointers(struct btree_trans *trans, 679 struct extents_to_bp_state *s, 680 enum btree_id btree_id, 681 int *level) 682 { 683 struct bch_fs *c = trans->c; 684 struct btree_iter iter; 685 struct btree *b; 686 struct bkey_s_c k; 687 int ret; 688 retry: 689 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 690 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); 691 b = bch2_btree_iter_peek_node(&iter); 692 ret = PTR_ERR_OR_ZERO(b); 693 if (ret) 694 goto err; 695 696 if (b != btree_node_root(c, b)) { 697 bch2_trans_iter_exit(trans, &iter); 698 goto retry; 699 } 700 701 *level = b->c.level; 702 703 k = bkey_i_to_s_c(&b->key); 704 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); 705 err: 706 bch2_trans_iter_exit(trans, &iter); 707 return ret; 708 } 709 710 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) 711 { 712 return (struct bbpos) { 713 .btree = bp.btree_id, 714 .pos = bp.pos, 715 }; 716 } 717 718 static u64 mem_may_pin_bytes(struct bch_fs *c) 719 { 720 struct sysinfo i; 721 si_meminfo(&i); 722 723 u64 mem_bytes = i.totalram * i.mem_unit; 724 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100); 725 } 726 727 static size_t btree_nodes_fit_in_ram(struct bch_fs *c) 728 { 729 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size); 730 } 731 732 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, 733 u64 btree_leaf_mask, 734 u64 btree_interior_mask, 735 struct bbpos start, struct bbpos *end) 736 { 737 struct bch_fs *c = trans->c; 738 s64 mem_may_pin = mem_may_pin_bytes(c); 739 int ret = 0; 740 741 btree_interior_mask |= btree_leaf_mask; 742 743 c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask; 744 c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask; 745 c->btree_cache.pinned_nodes_start = start; 746 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX; 747 748 for (enum btree_id btree = start.btree; 749 btree < BTREE_ID_NR && !ret; 750 btree++) { 751 unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1; 752 struct btree_iter iter; 753 struct btree *b; 754 755 if (!((1U << btree) & btree_leaf_mask) && 756 !((1U << btree) & btree_interior_mask)) 757 continue; 758 759 __for_each_btree_node(trans, iter, btree, 760 btree == start.btree ? start.pos : POS_MIN, 761 0, depth, BTREE_ITER_PREFETCH, b, ret) { 762 mem_may_pin -= btree_buf_bytes(b); 763 if (mem_may_pin <= 0) { 764 c->btree_cache.pinned_nodes_end = *end = 765 BBPOS(btree, b->key.k.p); 766 bch2_trans_iter_exit(trans, &iter); 767 return 0; 768 } 769 } 770 bch2_trans_iter_exit(trans, &iter); 771 } 772 773 return ret; 774 } 775 776 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, 777 struct extents_to_bp_state *s) 778 { 779 struct bch_fs *c = trans->c; 780 int ret = 0; 781 782 for (enum btree_id btree_id = 0; 783 btree_id < btree_id_nr_alive(c); 784 btree_id++) { 785 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; 786 787 ret = commit_do(trans, NULL, NULL, 788 BCH_TRANS_COMMIT_no_enospc, 789 check_btree_root_to_backpointers(trans, s, btree_id, &level)); 790 if (ret) 791 return ret; 792 793 while (level >= depth) { 794 struct btree_iter iter; 795 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, 796 level, 797 BTREE_ITER_PREFETCH); 798 while (1) { 799 bch2_trans_begin(trans); 800 801 struct bkey_s_c k = bch2_btree_iter_peek(&iter); 802 if (!k.k) 803 break; 804 ret = bkey_err(k) ?: 805 check_extent_to_backpointers(trans, s, btree_id, level, k) ?: 806 bch2_trans_commit(trans, NULL, NULL, 807 BCH_TRANS_COMMIT_no_enospc); 808 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 809 ret = 0; 810 continue; 811 } 812 if (ret) 813 break; 814 if (bpos_eq(iter.pos, SPOS_MAX)) 815 break; 816 bch2_btree_iter_advance(&iter); 817 } 818 bch2_trans_iter_exit(trans, &iter); 819 820 if (ret) 821 return ret; 822 823 --level; 824 } 825 } 826 827 return 0; 828 } 829 830 int bch2_check_extents_to_backpointers(struct bch_fs *c) 831 { 832 struct btree_trans *trans = bch2_trans_get(c); 833 struct extents_to_bp_state s = { .bucket_start = POS_MIN }; 834 int ret; 835 836 bch2_bkey_buf_init(&s.last_flushed); 837 bkey_init(&s.last_flushed.k->k); 838 839 while (1) { 840 struct bbpos end; 841 ret = bch2_get_btree_in_memory_pos(trans, 842 BIT_ULL(BTREE_ID_backpointers), 843 BIT_ULL(BTREE_ID_backpointers), 844 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end); 845 if (ret) 846 break; 847 848 s.bucket_end = end.pos; 849 850 if ( bpos_eq(s.bucket_start, POS_MIN) && 851 !bpos_eq(s.bucket_end, SPOS_MAX)) 852 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", 853 __func__, btree_nodes_fit_in_ram(c)); 854 855 if (!bpos_eq(s.bucket_start, POS_MIN) || 856 !bpos_eq(s.bucket_end, SPOS_MAX)) { 857 struct printbuf buf = PRINTBUF; 858 859 prt_str(&buf, "check_extents_to_backpointers(): "); 860 bch2_bpos_to_text(&buf, s.bucket_start); 861 prt_str(&buf, "-"); 862 bch2_bpos_to_text(&buf, s.bucket_end); 863 864 bch_verbose(c, "%s", buf.buf); 865 printbuf_exit(&buf); 866 } 867 868 ret = bch2_check_extents_to_backpointers_pass(trans, &s); 869 if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) 870 break; 871 872 s.bucket_start = bpos_successor(s.bucket_end); 873 } 874 bch2_trans_put(trans); 875 bch2_bkey_buf_exit(&s.last_flushed, c); 876 877 c->btree_cache.pinned_nodes_leaf_mask = 0; 878 c->btree_cache.pinned_nodes_interior_mask = 0; 879 880 bch_err_fn(c, ret); 881 return ret; 882 } 883 884 static int check_one_backpointer(struct btree_trans *trans, 885 struct bbpos start, 886 struct bbpos end, 887 struct bkey_s_c_backpointer bp, 888 struct bpos *last_flushed_pos) 889 { 890 struct bch_fs *c = trans->c; 891 struct btree_iter iter; 892 struct bbpos pos = bp_to_bbpos(*bp.v); 893 struct bkey_s_c k; 894 struct printbuf buf = PRINTBUF; 895 int ret; 896 897 if (bbpos_cmp(pos, start) < 0 || 898 bbpos_cmp(pos, end) > 0) 899 return 0; 900 901 k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0); 902 ret = bkey_err(k); 903 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 904 return 0; 905 if (ret) 906 return ret; 907 908 if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) { 909 *last_flushed_pos = bp.k->p; 910 ret = bch2_btree_write_buffer_flush_sync(trans) ?: 911 -BCH_ERR_transaction_restart_write_buffer_flush; 912 goto out; 913 } 914 915 if (fsck_err_on(!k.k, c, 916 backpointer_to_missing_ptr, 917 "backpointer for missing %s\n %s", 918 bp.v->level ? "btree node" : "extent", 919 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 920 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); 921 goto out; 922 } 923 out: 924 fsck_err: 925 bch2_trans_iter_exit(trans, &iter); 926 printbuf_exit(&buf); 927 return ret; 928 } 929 930 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, 931 struct bbpos start, 932 struct bbpos end) 933 { 934 struct bpos last_flushed_pos = SPOS_MAX; 935 936 return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 937 POS_MIN, BTREE_ITER_PREFETCH, k, 938 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 939 check_one_backpointer(trans, start, end, 940 bkey_s_c_to_backpointer(k), 941 &last_flushed_pos)); 942 } 943 944 int bch2_check_backpointers_to_extents(struct bch_fs *c) 945 { 946 struct btree_trans *trans = bch2_trans_get(c); 947 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; 948 int ret; 949 950 while (1) { 951 ret = bch2_get_btree_in_memory_pos(trans, 952 (1U << BTREE_ID_extents)| 953 (1U << BTREE_ID_reflink), 954 ~0, 955 start, &end); 956 if (ret) 957 break; 958 959 if (!bbpos_cmp(start, BBPOS_MIN) && 960 bbpos_cmp(end, BBPOS_MAX)) 961 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass", 962 __func__, btree_nodes_fit_in_ram(c)); 963 964 if (bbpos_cmp(start, BBPOS_MIN) || 965 bbpos_cmp(end, BBPOS_MAX)) { 966 struct printbuf buf = PRINTBUF; 967 968 prt_str(&buf, "check_backpointers_to_extents(): "); 969 bch2_bbpos_to_text(&buf, start); 970 prt_str(&buf, "-"); 971 bch2_bbpos_to_text(&buf, end); 972 973 bch_verbose(c, "%s", buf.buf); 974 printbuf_exit(&buf); 975 } 976 977 ret = bch2_check_backpointers_to_extents_pass(trans, start, end); 978 if (ret || !bbpos_cmp(end, BBPOS_MAX)) 979 break; 980 981 start = bbpos_successor(end); 982 } 983 bch2_trans_put(trans); 984 985 c->btree_cache.pinned_nodes_leaf_mask = 0; 986 c->btree_cache.pinned_nodes_interior_mask = 0; 987 988 bch_err_fn(c, ret); 989 return ret; 990 } 991