1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "bbpos.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "bkey_buf.h" 7 #include "btree_cache.h" 8 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_write_buffer.h" 11 #include "checksum.h" 12 #include "error.h" 13 14 #include <linux/mm.h> 15 16 static bool extent_matches_bp(struct bch_fs *c, 17 enum btree_id btree_id, unsigned level, 18 struct bkey_s_c k, 19 struct bpos bucket, 20 struct bch_backpointer bp) 21 { 22 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 23 const union bch_extent_entry *entry; 24 struct extent_ptr_decoded p; 25 26 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 27 struct bpos bucket2; 28 struct bch_backpointer bp2; 29 30 if (p.ptr.cached) 31 continue; 32 33 bch2_extent_ptr_to_bp(c, btree_id, level, k, p, entry, &bucket2, &bp2); 34 if (bpos_eq(bucket, bucket2) && 35 !memcmp(&bp, &bp2, sizeof(bp))) 36 return true; 37 } 38 39 return false; 40 } 41 42 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, 43 enum bkey_invalid_flags flags, 44 struct printbuf *err) 45 { 46 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 47 48 /* these will be caught by fsck */ 49 if (!bch2_dev_exists2(c, bp.k->p.inode)) 50 return 0; 51 52 struct bch_dev *ca = bch_dev_bkey_exists(c, bp.k->p.inode); 53 struct bpos bucket = bp_pos_to_bucket(c, bp.k->p); 54 int ret = 0; 55 56 bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || 57 !bpos_eq(bp.k->p, bucket_pos_to_bp(c, bucket, bp.v->bucket_offset)), 58 c, err, 59 backpointer_bucket_offset_wrong, 60 "backpointer bucket_offset wrong"); 61 fsck_err: 62 return ret; 63 } 64 65 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) 66 { 67 prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", 68 bch2_btree_id_str(bp->btree_id), 69 bp->level, 70 (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), 71 (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), 72 bp->bucket_len); 73 bch2_bpos_to_text(out, bp->pos); 74 } 75 76 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 77 { 78 if (bch2_dev_exists2(c, k.k->p.inode)) { 79 prt_str(out, "bucket="); 80 bch2_bpos_to_text(out, bp_pos_to_bucket(c, k.k->p)); 81 prt_str(out, " "); 82 } 83 84 bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); 85 } 86 87 void bch2_backpointer_swab(struct bkey_s k) 88 { 89 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); 90 91 bp.v->bucket_offset = swab40(bp.v->bucket_offset); 92 bp.v->bucket_len = swab32(bp.v->bucket_len); 93 bch2_bpos_swab(&bp.v->pos); 94 } 95 96 static noinline int backpointer_mod_err(struct btree_trans *trans, 97 struct bch_backpointer bp, 98 struct bkey_s_c bp_k, 99 struct bkey_s_c orig_k, 100 bool insert) 101 { 102 struct bch_fs *c = trans->c; 103 struct printbuf buf = PRINTBUF; 104 105 if (insert) { 106 prt_printf(&buf, "existing backpointer found when inserting "); 107 bch2_backpointer_to_text(&buf, &bp); 108 prt_newline(&buf); 109 printbuf_indent_add(&buf, 2); 110 111 prt_printf(&buf, "found "); 112 bch2_bkey_val_to_text(&buf, c, bp_k); 113 prt_newline(&buf); 114 115 prt_printf(&buf, "for "); 116 bch2_bkey_val_to_text(&buf, c, orig_k); 117 118 bch_err(c, "%s", buf.buf); 119 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 120 prt_printf(&buf, "backpointer not found when deleting"); 121 prt_newline(&buf); 122 printbuf_indent_add(&buf, 2); 123 124 prt_printf(&buf, "searching for "); 125 bch2_backpointer_to_text(&buf, &bp); 126 prt_newline(&buf); 127 128 prt_printf(&buf, "got "); 129 bch2_bkey_val_to_text(&buf, c, bp_k); 130 prt_newline(&buf); 131 132 prt_printf(&buf, "for "); 133 bch2_bkey_val_to_text(&buf, c, orig_k); 134 135 bch_err(c, "%s", buf.buf); 136 } 137 138 printbuf_exit(&buf); 139 140 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 141 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; 142 } else { 143 return 0; 144 } 145 } 146 147 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, 148 struct bpos bucket, 149 struct bch_backpointer bp, 150 struct bkey_s_c orig_k, 151 bool insert) 152 { 153 struct btree_iter bp_iter; 154 struct bkey_s_c k; 155 struct bkey_i_backpointer *bp_k; 156 int ret; 157 158 bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); 159 ret = PTR_ERR_OR_ZERO(bp_k); 160 if (ret) 161 return ret; 162 163 bkey_backpointer_init(&bp_k->k_i); 164 bp_k->k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset); 165 bp_k->v = bp; 166 167 if (!insert) { 168 bp_k->k.type = KEY_TYPE_deleted; 169 set_bkey_val_u64s(&bp_k->k, 0); 170 } 171 172 k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 173 bp_k->k.p, 174 BTREE_ITER_INTENT| 175 BTREE_ITER_SLOTS| 176 BTREE_ITER_WITH_UPDATES); 177 ret = bkey_err(k); 178 if (ret) 179 goto err; 180 181 if (insert 182 ? k.k->type 183 : (k.k->type != KEY_TYPE_backpointer || 184 memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { 185 ret = backpointer_mod_err(trans, bp, k, orig_k, insert); 186 if (ret) 187 goto err; 188 } 189 190 ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); 191 err: 192 bch2_trans_iter_exit(trans, &bp_iter); 193 return ret; 194 } 195 196 /* 197 * Find the next backpointer >= *bp_offset: 198 */ 199 int bch2_get_next_backpointer(struct btree_trans *trans, 200 struct bpos bucket, int gen, 201 struct bpos *bp_pos, 202 struct bch_backpointer *bp, 203 unsigned iter_flags) 204 { 205 struct bch_fs *c = trans->c; 206 struct bpos bp_end_pos = bucket_pos_to_bp(c, bpos_nosnap_successor(bucket), 0); 207 struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL }; 208 struct bkey_s_c k; 209 int ret = 0; 210 211 if (bpos_ge(*bp_pos, bp_end_pos)) 212 goto done; 213 214 if (gen >= 0) { 215 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, 216 bucket, BTREE_ITER_CACHED|iter_flags); 217 ret = bkey_err(k); 218 if (ret) 219 goto out; 220 221 if (k.k->type != KEY_TYPE_alloc_v4 || 222 bkey_s_c_to_alloc_v4(k).v->gen != gen) 223 goto done; 224 } 225 226 *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(c, bucket, 0)); 227 228 for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, 229 *bp_pos, iter_flags, k, ret) { 230 if (bpos_ge(k.k->p, bp_end_pos)) 231 break; 232 233 *bp_pos = k.k->p; 234 *bp = *bkey_s_c_to_backpointer(k).v; 235 goto out; 236 } 237 done: 238 *bp_pos = SPOS_MAX; 239 out: 240 bch2_trans_iter_exit(trans, &bp_iter); 241 bch2_trans_iter_exit(trans, &alloc_iter); 242 return ret; 243 } 244 245 static void backpointer_not_found(struct btree_trans *trans, 246 struct bpos bp_pos, 247 struct bch_backpointer bp, 248 struct bkey_s_c k) 249 { 250 struct bch_fs *c = trans->c; 251 struct printbuf buf = PRINTBUF; 252 struct bpos bucket = bp_pos_to_bucket(c, bp_pos); 253 254 /* 255 * If we're using the btree write buffer, the backpointer we were 256 * looking at may have already been deleted - failure to find what it 257 * pointed to is not an error: 258 */ 259 if (likely(!bch2_backpointers_no_use_write_buffer)) 260 return; 261 262 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", 263 bp.level ? "btree node" : "extent"); 264 prt_printf(&buf, "bucket: "); 265 bch2_bpos_to_text(&buf, bucket); 266 prt_printf(&buf, "\n "); 267 268 prt_printf(&buf, "backpointer pos: "); 269 bch2_bpos_to_text(&buf, bp_pos); 270 prt_printf(&buf, "\n "); 271 272 bch2_backpointer_to_text(&buf, &bp); 273 prt_printf(&buf, "\n "); 274 bch2_bkey_val_to_text(&buf, c, k); 275 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) 276 bch_err_ratelimited(c, "%s", buf.buf); 277 else 278 bch2_trans_inconsistent(trans, "%s", buf.buf); 279 280 printbuf_exit(&buf); 281 } 282 283 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 284 struct btree_iter *iter, 285 struct bpos bp_pos, 286 struct bch_backpointer bp, 287 unsigned iter_flags) 288 { 289 if (likely(!bp.level)) { 290 struct bch_fs *c = trans->c; 291 struct bpos bucket = bp_pos_to_bucket(c, bp_pos); 292 struct bkey_s_c k; 293 294 bch2_trans_node_iter_init(trans, iter, 295 bp.btree_id, 296 bp.pos, 297 0, 0, 298 iter_flags); 299 k = bch2_btree_iter_peek_slot(iter); 300 if (bkey_err(k)) { 301 bch2_trans_iter_exit(trans, iter); 302 return k; 303 } 304 305 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) 306 return k; 307 308 bch2_trans_iter_exit(trans, iter); 309 backpointer_not_found(trans, bp_pos, bp, k); 310 return bkey_s_c_null; 311 } else { 312 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); 313 314 if (IS_ERR_OR_NULL(b)) { 315 bch2_trans_iter_exit(trans, iter); 316 return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; 317 } 318 return bkey_i_to_s_c(&b->key); 319 } 320 } 321 322 struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 323 struct btree_iter *iter, 324 struct bpos bp_pos, 325 struct bch_backpointer bp) 326 { 327 struct bch_fs *c = trans->c; 328 struct bpos bucket = bp_pos_to_bucket(c, bp_pos); 329 struct btree *b; 330 331 BUG_ON(!bp.level); 332 333 bch2_trans_node_iter_init(trans, iter, 334 bp.btree_id, 335 bp.pos, 336 0, 337 bp.level - 1, 338 0); 339 b = bch2_btree_iter_peek_node(iter); 340 if (IS_ERR_OR_NULL(b)) 341 goto err; 342 343 BUG_ON(b->c.level != bp.level - 1); 344 345 if (extent_matches_bp(c, bp.btree_id, bp.level, 346 bkey_i_to_s_c(&b->key), 347 bucket, bp)) 348 return b; 349 350 if (btree_node_will_make_reachable(b)) { 351 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); 352 } else { 353 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); 354 b = NULL; 355 } 356 err: 357 bch2_trans_iter_exit(trans, iter); 358 return b; 359 } 360 361 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, 362 struct bkey_s_c k) 363 { 364 struct bch_fs *c = trans->c; 365 struct btree_iter alloc_iter = { NULL }; 366 struct bkey_s_c alloc_k; 367 struct printbuf buf = PRINTBUF; 368 int ret = 0; 369 370 if (fsck_err_on(!bch2_dev_exists2(c, k.k->p.inode), c, 371 backpointer_to_missing_device, 372 "backpointer for missing device:\n%s", 373 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 374 ret = bch2_btree_delete_at(trans, bp_iter, 0); 375 goto out; 376 } 377 378 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, 379 bp_pos_to_bucket(c, k.k->p), 0); 380 ret = bkey_err(alloc_k); 381 if (ret) 382 goto out; 383 384 if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c, 385 backpointer_to_missing_alloc, 386 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", 387 alloc_iter.pos.inode, alloc_iter.pos.offset, 388 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 389 ret = bch2_btree_delete_at(trans, bp_iter, 0); 390 goto out; 391 } 392 out: 393 fsck_err: 394 bch2_trans_iter_exit(trans, &alloc_iter); 395 printbuf_exit(&buf); 396 return ret; 397 } 398 399 /* verify that every backpointer has a corresponding alloc key */ 400 int bch2_check_btree_backpointers(struct bch_fs *c) 401 { 402 int ret = bch2_trans_run(c, 403 for_each_btree_key_commit(trans, iter, 404 BTREE_ID_backpointers, POS_MIN, 0, k, 405 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 406 bch2_check_btree_backpointer(trans, &iter, k))); 407 bch_err_fn(c, ret); 408 return ret; 409 } 410 411 static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) 412 { 413 return bpos_eq(l.k->p, r.k->p) && 414 bkey_bytes(l.k) == bkey_bytes(r.k) && 415 !memcmp(l.v, r.v, bkey_val_bytes(l.k)); 416 } 417 418 struct extents_to_bp_state { 419 struct bpos bucket_start; 420 struct bpos bucket_end; 421 struct bkey_buf last_flushed; 422 }; 423 424 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, 425 struct bkey_s_c extent, unsigned dev) 426 { 427 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); 428 int ret = PTR_ERR_OR_ZERO(n); 429 if (ret) 430 return ret; 431 432 bch2_bkey_drop_device(bkey_i_to_s(n), dev); 433 return bch2_btree_insert_trans(trans, btree, n, 0); 434 } 435 436 static int check_extent_checksum(struct btree_trans *trans, 437 enum btree_id btree, struct bkey_s_c extent, 438 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) 439 { 440 struct bch_fs *c = trans->c; 441 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); 442 const union bch_extent_entry *entry; 443 struct extent_ptr_decoded p; 444 struct printbuf buf = PRINTBUF; 445 void *data_buf = NULL; 446 struct bio *bio = NULL; 447 size_t bytes; 448 int ret = 0; 449 450 if (bkey_is_btree_ptr(extent.k)) 451 return false; 452 453 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) 454 if (p.ptr.dev == dev) 455 goto found; 456 BUG(); 457 found: 458 if (!p.crc.csum_type) 459 return false; 460 461 bytes = p.crc.compressed_size << 9; 462 463 struct bch_dev *ca = bch_dev_bkey_exists(c, dev); 464 if (!bch2_dev_get_ioref(ca, READ)) 465 return false; 466 467 data_buf = kvmalloc(bytes, GFP_KERNEL); 468 if (!data_buf) { 469 ret = -ENOMEM; 470 goto err; 471 } 472 473 bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); 474 bio->bi_iter.bi_sector = p.ptr.offset; 475 bch2_bio_map(bio, data_buf, bytes); 476 ret = submit_bio_wait(bio); 477 if (ret) 478 goto err; 479 480 prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); 481 prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); 482 bch2_bkey_val_to_text(&buf, c, extent); 483 prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); 484 bch2_bkey_val_to_text(&buf, c, extent2); 485 486 struct nonce nonce = extent_nonce(extent.k->version, p.crc); 487 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); 488 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), 489 c, dup_backpointer_to_bad_csum_extent, 490 "%s", buf.buf)) 491 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; 492 fsck_err: 493 err: 494 if (bio) 495 bio_put(bio); 496 kvfree(data_buf); 497 percpu_ref_put(&ca->io_ref); 498 printbuf_exit(&buf); 499 return ret; 500 } 501 502 static int check_bp_exists(struct btree_trans *trans, 503 struct extents_to_bp_state *s, 504 struct bpos bucket, 505 struct bch_backpointer bp, 506 struct bkey_s_c orig_k) 507 { 508 struct bch_fs *c = trans->c; 509 struct btree_iter bp_iter = {}; 510 struct btree_iter other_extent_iter = {}; 511 struct printbuf buf = PRINTBUF; 512 struct bkey_s_c bp_k; 513 struct bkey_buf tmp; 514 int ret; 515 516 bch2_bkey_buf_init(&tmp); 517 518 if (!bch2_dev_bucket_exists(c, bucket)) { 519 prt_str(&buf, "extent for nonexistent device:bucket "); 520 bch2_bpos_to_text(&buf, bucket); 521 prt_str(&buf, "\n "); 522 bch2_bkey_val_to_text(&buf, c, orig_k); 523 bch_err(c, "%s", buf.buf); 524 return -BCH_ERR_fsck_repair_unimplemented; 525 } 526 527 if (bpos_lt(bucket, s->bucket_start) || 528 bpos_gt(bucket, s->bucket_end)) 529 return 0; 530 531 bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 532 bucket_pos_to_bp(c, bucket, bp.bucket_offset), 533 0); 534 ret = bkey_err(bp_k); 535 if (ret) 536 goto err; 537 538 if (bp_k.k->type != KEY_TYPE_backpointer || 539 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { 540 bch2_bkey_buf_reassemble(&tmp, c, orig_k); 541 542 if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) { 543 if (bp.level) { 544 bch2_trans_unlock(trans); 545 bch2_btree_interior_updates_flush(c); 546 } 547 548 ret = bch2_btree_write_buffer_flush_sync(trans); 549 if (ret) 550 goto err; 551 552 bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k); 553 ret = -BCH_ERR_transaction_restart_write_buffer_flush; 554 goto out; 555 } 556 557 goto check_existing_bp; 558 } 559 out: 560 err: 561 fsck_err: 562 bch2_trans_iter_exit(trans, &other_extent_iter); 563 bch2_trans_iter_exit(trans, &bp_iter); 564 bch2_bkey_buf_exit(&tmp, c); 565 printbuf_exit(&buf); 566 return ret; 567 check_existing_bp: 568 /* Do we have a backpointer for a different extent? */ 569 if (bp_k.k->type != KEY_TYPE_backpointer) 570 goto missing; 571 572 struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; 573 574 struct bkey_s_c other_extent = 575 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); 576 ret = bkey_err(other_extent); 577 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 578 ret = 0; 579 if (ret) 580 goto err; 581 582 if (!other_extent.k) 583 goto missing; 584 585 if (bch2_extents_match(orig_k, other_extent)) { 586 printbuf_reset(&buf); 587 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); 588 bch2_bkey_val_to_text(&buf, c, orig_k); 589 prt_str(&buf, "\n "); 590 bch2_bkey_val_to_text(&buf, c, other_extent); 591 bch_err(c, "%s", buf.buf); 592 593 if (other_extent.k->size <= orig_k.k->size) { 594 ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); 595 if (ret) 596 goto err; 597 goto out; 598 } else { 599 ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); 600 if (ret) 601 goto err; 602 goto missing; 603 } 604 } 605 606 ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); 607 if (ret < 0) 608 goto err; 609 if (ret) { 610 ret = 0; 611 goto missing; 612 } 613 614 ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); 615 if (ret < 0) 616 goto err; 617 if (ret) { 618 ret = 0; 619 goto out; 620 } 621 622 printbuf_reset(&buf); 623 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); 624 bch2_bkey_val_to_text(&buf, c, orig_k); 625 prt_str(&buf, "\n "); 626 bch2_bkey_val_to_text(&buf, c, other_extent); 627 bch_err(c, "%s", buf.buf); 628 ret = -BCH_ERR_fsck_repair_unimplemented; 629 goto err; 630 missing: 631 printbuf_reset(&buf); 632 prt_printf(&buf, "missing backpointer for btree=%s l=%u ", 633 bch2_btree_id_str(bp.btree_id), bp.level); 634 bch2_bkey_val_to_text(&buf, c, orig_k); 635 prt_printf(&buf, "\n got: "); 636 bch2_bkey_val_to_text(&buf, c, bp_k); 637 638 struct bkey_i_backpointer n_bp_k; 639 bkey_backpointer_init(&n_bp_k.k_i); 640 n_bp_k.k.p = bucket_pos_to_bp(trans->c, bucket, bp.bucket_offset); 641 n_bp_k.v = bp; 642 prt_printf(&buf, "\n want: "); 643 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); 644 645 if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) 646 ret = bch2_bucket_backpointer_mod(trans, bucket, bp, orig_k, true); 647 648 goto out; 649 } 650 651 static int check_extent_to_backpointers(struct btree_trans *trans, 652 struct extents_to_bp_state *s, 653 enum btree_id btree, unsigned level, 654 struct bkey_s_c k) 655 { 656 struct bch_fs *c = trans->c; 657 struct bkey_ptrs_c ptrs; 658 const union bch_extent_entry *entry; 659 struct extent_ptr_decoded p; 660 int ret; 661 662 ptrs = bch2_bkey_ptrs_c(k); 663 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 664 struct bpos bucket_pos; 665 struct bch_backpointer bp; 666 667 if (p.ptr.cached) 668 continue; 669 670 bch2_extent_ptr_to_bp(c, btree, level, k, p, entry, &bucket_pos, &bp); 671 672 ret = check_bp_exists(trans, s, bucket_pos, bp, k); 673 if (ret) 674 return ret; 675 } 676 677 return 0; 678 } 679 680 static int check_btree_root_to_backpointers(struct btree_trans *trans, 681 struct extents_to_bp_state *s, 682 enum btree_id btree_id, 683 int *level) 684 { 685 struct bch_fs *c = trans->c; 686 struct btree_iter iter; 687 struct btree *b; 688 struct bkey_s_c k; 689 int ret; 690 retry: 691 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 692 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); 693 b = bch2_btree_iter_peek_node(&iter); 694 ret = PTR_ERR_OR_ZERO(b); 695 if (ret) 696 goto err; 697 698 if (b != btree_node_root(c, b)) { 699 bch2_trans_iter_exit(trans, &iter); 700 goto retry; 701 } 702 703 *level = b->c.level; 704 705 k = bkey_i_to_s_c(&b->key); 706 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); 707 err: 708 bch2_trans_iter_exit(trans, &iter); 709 return ret; 710 } 711 712 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) 713 { 714 return (struct bbpos) { 715 .btree = bp.btree_id, 716 .pos = bp.pos, 717 }; 718 } 719 720 static u64 mem_may_pin_bytes(struct bch_fs *c) 721 { 722 struct sysinfo i; 723 si_meminfo(&i); 724 725 u64 mem_bytes = i.totalram * i.mem_unit; 726 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100); 727 } 728 729 static size_t btree_nodes_fit_in_ram(struct bch_fs *c) 730 { 731 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size); 732 } 733 734 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, 735 u64 btree_leaf_mask, 736 u64 btree_interior_mask, 737 struct bbpos start, struct bbpos *end) 738 { 739 struct bch_fs *c = trans->c; 740 s64 mem_may_pin = mem_may_pin_bytes(c); 741 int ret = 0; 742 743 btree_interior_mask |= btree_leaf_mask; 744 745 c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask; 746 c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask; 747 c->btree_cache.pinned_nodes_start = start; 748 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX; 749 750 for (enum btree_id btree = start.btree; 751 btree < BTREE_ID_NR && !ret; 752 btree++) { 753 unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1; 754 struct btree_iter iter; 755 struct btree *b; 756 757 if (!((1U << btree) & btree_leaf_mask) && 758 !((1U << btree) & btree_interior_mask)) 759 continue; 760 761 __for_each_btree_node(trans, iter, btree, 762 btree == start.btree ? start.pos : POS_MIN, 763 0, depth, BTREE_ITER_PREFETCH, b, ret) { 764 mem_may_pin -= btree_buf_bytes(b); 765 if (mem_may_pin <= 0) { 766 c->btree_cache.pinned_nodes_end = *end = 767 BBPOS(btree, b->key.k.p); 768 bch2_trans_iter_exit(trans, &iter); 769 return 0; 770 } 771 } 772 bch2_trans_iter_exit(trans, &iter); 773 } 774 775 return ret; 776 } 777 778 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, 779 struct extents_to_bp_state *s) 780 { 781 struct bch_fs *c = trans->c; 782 int ret = 0; 783 784 for (enum btree_id btree_id = 0; 785 btree_id < btree_id_nr_alive(c); 786 btree_id++) { 787 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; 788 789 ret = commit_do(trans, NULL, NULL, 790 BCH_TRANS_COMMIT_no_enospc, 791 check_btree_root_to_backpointers(trans, s, btree_id, &level)); 792 if (ret) 793 return ret; 794 795 while (level >= depth) { 796 struct btree_iter iter; 797 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, 798 level, 799 BTREE_ITER_PREFETCH); 800 while (1) { 801 bch2_trans_begin(trans); 802 803 struct bkey_s_c k = bch2_btree_iter_peek(&iter); 804 if (!k.k) 805 break; 806 ret = bkey_err(k) ?: 807 check_extent_to_backpointers(trans, s, btree_id, level, k) ?: 808 bch2_trans_commit(trans, NULL, NULL, 809 BCH_TRANS_COMMIT_no_enospc); 810 if (bch2_err_matches(ret, BCH_ERR_transaction_restart)) { 811 ret = 0; 812 continue; 813 } 814 if (ret) 815 break; 816 if (bpos_eq(iter.pos, SPOS_MAX)) 817 break; 818 bch2_btree_iter_advance(&iter); 819 } 820 bch2_trans_iter_exit(trans, &iter); 821 822 if (ret) 823 return ret; 824 825 --level; 826 } 827 } 828 829 return 0; 830 } 831 832 int bch2_check_extents_to_backpointers(struct bch_fs *c) 833 { 834 struct btree_trans *trans = bch2_trans_get(c); 835 struct extents_to_bp_state s = { .bucket_start = POS_MIN }; 836 int ret; 837 838 bch2_bkey_buf_init(&s.last_flushed); 839 bkey_init(&s.last_flushed.k->k); 840 841 while (1) { 842 struct bbpos end; 843 ret = bch2_get_btree_in_memory_pos(trans, 844 BIT_ULL(BTREE_ID_backpointers), 845 BIT_ULL(BTREE_ID_backpointers), 846 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end); 847 if (ret) 848 break; 849 850 s.bucket_end = end.pos; 851 852 if ( bpos_eq(s.bucket_start, POS_MIN) && 853 !bpos_eq(s.bucket_end, SPOS_MAX)) 854 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", 855 __func__, btree_nodes_fit_in_ram(c)); 856 857 if (!bpos_eq(s.bucket_start, POS_MIN) || 858 !bpos_eq(s.bucket_end, SPOS_MAX)) { 859 struct printbuf buf = PRINTBUF; 860 861 prt_str(&buf, "check_extents_to_backpointers(): "); 862 bch2_bpos_to_text(&buf, s.bucket_start); 863 prt_str(&buf, "-"); 864 bch2_bpos_to_text(&buf, s.bucket_end); 865 866 bch_verbose(c, "%s", buf.buf); 867 printbuf_exit(&buf); 868 } 869 870 ret = bch2_check_extents_to_backpointers_pass(trans, &s); 871 if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) 872 break; 873 874 s.bucket_start = bpos_successor(s.bucket_end); 875 } 876 bch2_trans_put(trans); 877 bch2_bkey_buf_exit(&s.last_flushed, c); 878 879 c->btree_cache.pinned_nodes_leaf_mask = 0; 880 c->btree_cache.pinned_nodes_interior_mask = 0; 881 882 bch_err_fn(c, ret); 883 return ret; 884 } 885 886 static int check_one_backpointer(struct btree_trans *trans, 887 struct bbpos start, 888 struct bbpos end, 889 struct bkey_s_c_backpointer bp, 890 struct bpos *last_flushed_pos) 891 { 892 struct bch_fs *c = trans->c; 893 struct btree_iter iter; 894 struct bbpos pos = bp_to_bbpos(*bp.v); 895 struct bkey_s_c k; 896 struct printbuf buf = PRINTBUF; 897 int ret; 898 899 if (bbpos_cmp(pos, start) < 0 || 900 bbpos_cmp(pos, end) > 0) 901 return 0; 902 903 k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0); 904 ret = bkey_err(k); 905 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 906 return 0; 907 if (ret) 908 return ret; 909 910 if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) { 911 *last_flushed_pos = bp.k->p; 912 ret = bch2_btree_write_buffer_flush_sync(trans) ?: 913 -BCH_ERR_transaction_restart_write_buffer_flush; 914 goto out; 915 } 916 917 if (fsck_err_on(!k.k, c, 918 backpointer_to_missing_ptr, 919 "backpointer for missing %s\n %s", 920 bp.v->level ? "btree node" : "extent", 921 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 922 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); 923 goto out; 924 } 925 out: 926 fsck_err: 927 bch2_trans_iter_exit(trans, &iter); 928 printbuf_exit(&buf); 929 return ret; 930 } 931 932 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, 933 struct bbpos start, 934 struct bbpos end) 935 { 936 struct bpos last_flushed_pos = SPOS_MAX; 937 938 return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 939 POS_MIN, BTREE_ITER_PREFETCH, k, 940 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 941 check_one_backpointer(trans, start, end, 942 bkey_s_c_to_backpointer(k), 943 &last_flushed_pos)); 944 } 945 946 int bch2_check_backpointers_to_extents(struct bch_fs *c) 947 { 948 struct btree_trans *trans = bch2_trans_get(c); 949 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; 950 int ret; 951 952 while (1) { 953 ret = bch2_get_btree_in_memory_pos(trans, 954 (1U << BTREE_ID_extents)| 955 (1U << BTREE_ID_reflink), 956 ~0, 957 start, &end); 958 if (ret) 959 break; 960 961 if (!bbpos_cmp(start, BBPOS_MIN) && 962 bbpos_cmp(end, BBPOS_MAX)) 963 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass", 964 __func__, btree_nodes_fit_in_ram(c)); 965 966 if (bbpos_cmp(start, BBPOS_MIN) || 967 bbpos_cmp(end, BBPOS_MAX)) { 968 struct printbuf buf = PRINTBUF; 969 970 prt_str(&buf, "check_backpointers_to_extents(): "); 971 bch2_bbpos_to_text(&buf, start); 972 prt_str(&buf, "-"); 973 bch2_bbpos_to_text(&buf, end); 974 975 bch_verbose(c, "%s", buf.buf); 976 printbuf_exit(&buf); 977 } 978 979 ret = bch2_check_backpointers_to_extents_pass(trans, start, end); 980 if (ret || !bbpos_cmp(end, BBPOS_MAX)) 981 break; 982 983 start = bbpos_successor(end); 984 } 985 bch2_trans_put(trans); 986 987 c->btree_cache.pinned_nodes_leaf_mask = 0; 988 c->btree_cache.pinned_nodes_interior_mask = 0; 989 990 bch_err_fn(c, ret); 991 return ret; 992 } 993