1 // SPDX-License-Identifier: GPL-2.0 2 #include "bcachefs.h" 3 #include "bbpos.h" 4 #include "alloc_background.h" 5 #include "backpointers.h" 6 #include "bkey_buf.h" 7 #include "btree_cache.h" 8 #include "btree_update.h" 9 #include "btree_update_interior.h" 10 #include "btree_write_buffer.h" 11 #include "checksum.h" 12 #include "error.h" 13 14 #include <linux/mm.h> 15 16 static bool extent_matches_bp(struct bch_fs *c, 17 enum btree_id btree_id, unsigned level, 18 struct bkey_s_c k, 19 struct bpos bucket, 20 struct bch_backpointer bp) 21 { 22 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 23 const union bch_extent_entry *entry; 24 struct extent_ptr_decoded p; 25 26 rcu_read_lock(); 27 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 28 struct bpos bucket2; 29 struct bch_backpointer bp2; 30 31 if (p.ptr.cached) 32 continue; 33 34 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 35 if (!ca) 36 continue; 37 38 bch2_extent_ptr_to_bp(c, ca, btree_id, level, k, p, entry, &bucket2, &bp2); 39 if (bpos_eq(bucket, bucket2) && 40 !memcmp(&bp, &bp2, sizeof(bp))) { 41 rcu_read_unlock(); 42 return true; 43 } 44 } 45 rcu_read_unlock(); 46 47 return false; 48 } 49 50 int bch2_backpointer_invalid(struct bch_fs *c, struct bkey_s_c k, 51 enum bch_validate_flags flags, 52 struct printbuf *err) 53 { 54 struct bkey_s_c_backpointer bp = bkey_s_c_to_backpointer(k); 55 56 rcu_read_lock(); 57 struct bch_dev *ca = bch2_dev_rcu(c, bp.k->p.inode); 58 if (!ca) { 59 /* these will be caught by fsck */ 60 rcu_read_unlock(); 61 return 0; 62 } 63 64 struct bpos bucket = bp_pos_to_bucket(ca, bp.k->p); 65 struct bpos bp_pos = bucket_pos_to_bp_noerror(ca, bucket, bp.v->bucket_offset); 66 rcu_read_unlock(); 67 int ret = 0; 68 69 bkey_fsck_err_on((bp.v->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT) >= ca->mi.bucket_size || 70 !bpos_eq(bp.k->p, bp_pos), 71 c, err, 72 backpointer_bucket_offset_wrong, 73 "backpointer bucket_offset wrong"); 74 fsck_err: 75 return ret; 76 } 77 78 void bch2_backpointer_to_text(struct printbuf *out, const struct bch_backpointer *bp) 79 { 80 prt_printf(out, "btree=%s l=%u offset=%llu:%u len=%u pos=", 81 bch2_btree_id_str(bp->btree_id), 82 bp->level, 83 (u64) (bp->bucket_offset >> MAX_EXTENT_COMPRESS_RATIO_SHIFT), 84 (u32) bp->bucket_offset & ~(~0U << MAX_EXTENT_COMPRESS_RATIO_SHIFT), 85 bp->bucket_len); 86 bch2_bpos_to_text(out, bp->pos); 87 } 88 89 void bch2_backpointer_k_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) 90 { 91 rcu_read_lock(); 92 struct bch_dev *ca = bch2_dev_rcu(c, k.k->p.inode); 93 if (ca) { 94 struct bpos bucket = bp_pos_to_bucket(ca, k.k->p); 95 rcu_read_unlock(); 96 prt_str(out, "bucket="); 97 bch2_bpos_to_text(out, bucket); 98 prt_str(out, " "); 99 } else { 100 rcu_read_unlock(); 101 } 102 103 bch2_backpointer_to_text(out, bkey_s_c_to_backpointer(k).v); 104 } 105 106 void bch2_backpointer_swab(struct bkey_s k) 107 { 108 struct bkey_s_backpointer bp = bkey_s_to_backpointer(k); 109 110 bp.v->bucket_offset = swab40(bp.v->bucket_offset); 111 bp.v->bucket_len = swab32(bp.v->bucket_len); 112 bch2_bpos_swab(&bp.v->pos); 113 } 114 115 static noinline int backpointer_mod_err(struct btree_trans *trans, 116 struct bch_backpointer bp, 117 struct bkey_s_c bp_k, 118 struct bkey_s_c orig_k, 119 bool insert) 120 { 121 struct bch_fs *c = trans->c; 122 struct printbuf buf = PRINTBUF; 123 124 if (insert) { 125 prt_printf(&buf, "existing backpointer found when inserting "); 126 bch2_backpointer_to_text(&buf, &bp); 127 prt_newline(&buf); 128 printbuf_indent_add(&buf, 2); 129 130 prt_printf(&buf, "found "); 131 bch2_bkey_val_to_text(&buf, c, bp_k); 132 prt_newline(&buf); 133 134 prt_printf(&buf, "for "); 135 bch2_bkey_val_to_text(&buf, c, orig_k); 136 137 bch_err(c, "%s", buf.buf); 138 } else if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 139 prt_printf(&buf, "backpointer not found when deleting\n"); 140 printbuf_indent_add(&buf, 2); 141 142 prt_printf(&buf, "searching for "); 143 bch2_backpointer_to_text(&buf, &bp); 144 prt_newline(&buf); 145 146 prt_printf(&buf, "got "); 147 bch2_bkey_val_to_text(&buf, c, bp_k); 148 prt_newline(&buf); 149 150 prt_printf(&buf, "for "); 151 bch2_bkey_val_to_text(&buf, c, orig_k); 152 153 bch_err(c, "%s", buf.buf); 154 } 155 156 printbuf_exit(&buf); 157 158 if (c->curr_recovery_pass > BCH_RECOVERY_PASS_check_extents_to_backpointers) { 159 return bch2_inconsistent_error(c) ? BCH_ERR_erofs_unfixed_errors : 0; 160 } else { 161 return 0; 162 } 163 } 164 165 int bch2_bucket_backpointer_mod_nowritebuffer(struct btree_trans *trans, 166 struct bch_dev *ca, 167 struct bpos bucket, 168 struct bch_backpointer bp, 169 struct bkey_s_c orig_k, 170 bool insert) 171 { 172 struct btree_iter bp_iter; 173 struct bkey_s_c k; 174 struct bkey_i_backpointer *bp_k; 175 int ret; 176 177 bp_k = bch2_trans_kmalloc_nomemzero(trans, sizeof(struct bkey_i_backpointer)); 178 ret = PTR_ERR_OR_ZERO(bp_k); 179 if (ret) 180 return ret; 181 182 bkey_backpointer_init(&bp_k->k_i); 183 bp_k->k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); 184 bp_k->v = bp; 185 186 if (!insert) { 187 bp_k->k.type = KEY_TYPE_deleted; 188 set_bkey_val_u64s(&bp_k->k, 0); 189 } 190 191 k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 192 bp_k->k.p, 193 BTREE_ITER_intent| 194 BTREE_ITER_slots| 195 BTREE_ITER_with_updates); 196 ret = bkey_err(k); 197 if (ret) 198 goto err; 199 200 if (insert 201 ? k.k->type 202 : (k.k->type != KEY_TYPE_backpointer || 203 memcmp(bkey_s_c_to_backpointer(k).v, &bp, sizeof(bp)))) { 204 ret = backpointer_mod_err(trans, bp, k, orig_k, insert); 205 if (ret) 206 goto err; 207 } 208 209 ret = bch2_trans_update(trans, &bp_iter, &bp_k->k_i, 0); 210 err: 211 bch2_trans_iter_exit(trans, &bp_iter); 212 return ret; 213 } 214 215 /* 216 * Find the next backpointer >= *bp_offset: 217 */ 218 int bch2_get_next_backpointer(struct btree_trans *trans, 219 struct bch_dev *ca, 220 struct bpos bucket, int gen, 221 struct bpos *bp_pos, 222 struct bch_backpointer *bp, 223 unsigned iter_flags) 224 { 225 struct bpos bp_end_pos = bucket_pos_to_bp(ca, bpos_nosnap_successor(bucket), 0); 226 struct btree_iter alloc_iter = { NULL }, bp_iter = { NULL }; 227 struct bkey_s_c k; 228 int ret = 0; 229 230 if (bpos_ge(*bp_pos, bp_end_pos)) 231 goto done; 232 233 if (gen >= 0) { 234 k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, 235 bucket, BTREE_ITER_cached|iter_flags); 236 ret = bkey_err(k); 237 if (ret) 238 goto out; 239 240 if (k.k->type != KEY_TYPE_alloc_v4 || 241 bkey_s_c_to_alloc_v4(k).v->gen != gen) 242 goto done; 243 } 244 245 *bp_pos = bpos_max(*bp_pos, bucket_pos_to_bp(ca, bucket, 0)); 246 247 for_each_btree_key_norestart(trans, bp_iter, BTREE_ID_backpointers, 248 *bp_pos, iter_flags, k, ret) { 249 if (bpos_ge(k.k->p, bp_end_pos)) 250 break; 251 252 *bp_pos = k.k->p; 253 *bp = *bkey_s_c_to_backpointer(k).v; 254 goto out; 255 } 256 done: 257 *bp_pos = SPOS_MAX; 258 out: 259 bch2_trans_iter_exit(trans, &bp_iter); 260 bch2_trans_iter_exit(trans, &alloc_iter); 261 return ret; 262 } 263 264 static void backpointer_not_found(struct btree_trans *trans, 265 struct bpos bp_pos, 266 struct bch_backpointer bp, 267 struct bkey_s_c k) 268 { 269 struct bch_fs *c = trans->c; 270 struct printbuf buf = PRINTBUF; 271 272 /* 273 * If we're using the btree write buffer, the backpointer we were 274 * looking at may have already been deleted - failure to find what it 275 * pointed to is not an error: 276 */ 277 if (likely(!bch2_backpointers_no_use_write_buffer)) 278 return; 279 280 struct bpos bucket; 281 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 282 return; 283 284 prt_printf(&buf, "backpointer doesn't match %s it points to:\n ", 285 bp.level ? "btree node" : "extent"); 286 prt_printf(&buf, "bucket: "); 287 bch2_bpos_to_text(&buf, bucket); 288 prt_printf(&buf, "\n "); 289 290 prt_printf(&buf, "backpointer pos: "); 291 bch2_bpos_to_text(&buf, bp_pos); 292 prt_printf(&buf, "\n "); 293 294 bch2_backpointer_to_text(&buf, &bp); 295 prt_printf(&buf, "\n "); 296 bch2_bkey_val_to_text(&buf, c, k); 297 if (c->curr_recovery_pass >= BCH_RECOVERY_PASS_check_extents_to_backpointers) 298 bch_err_ratelimited(c, "%s", buf.buf); 299 else 300 bch2_trans_inconsistent(trans, "%s", buf.buf); 301 302 printbuf_exit(&buf); 303 } 304 305 struct bkey_s_c bch2_backpointer_get_key(struct btree_trans *trans, 306 struct btree_iter *iter, 307 struct bpos bp_pos, 308 struct bch_backpointer bp, 309 unsigned iter_flags) 310 { 311 if (likely(!bp.level)) { 312 struct bch_fs *c = trans->c; 313 314 struct bpos bucket; 315 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 316 return bkey_s_c_err(-EIO); 317 318 bch2_trans_node_iter_init(trans, iter, 319 bp.btree_id, 320 bp.pos, 321 0, 0, 322 iter_flags); 323 struct bkey_s_c k = bch2_btree_iter_peek_slot(iter); 324 if (bkey_err(k)) { 325 bch2_trans_iter_exit(trans, iter); 326 return k; 327 } 328 329 if (k.k && extent_matches_bp(c, bp.btree_id, bp.level, k, bucket, bp)) 330 return k; 331 332 bch2_trans_iter_exit(trans, iter); 333 backpointer_not_found(trans, bp_pos, bp, k); 334 return bkey_s_c_null; 335 } else { 336 struct btree *b = bch2_backpointer_get_node(trans, iter, bp_pos, bp); 337 338 if (IS_ERR_OR_NULL(b)) { 339 bch2_trans_iter_exit(trans, iter); 340 return IS_ERR(b) ? bkey_s_c_err(PTR_ERR(b)) : bkey_s_c_null; 341 } 342 return bkey_i_to_s_c(&b->key); 343 } 344 } 345 346 struct btree *bch2_backpointer_get_node(struct btree_trans *trans, 347 struct btree_iter *iter, 348 struct bpos bp_pos, 349 struct bch_backpointer bp) 350 { 351 struct bch_fs *c = trans->c; 352 353 BUG_ON(!bp.level); 354 355 struct bpos bucket; 356 if (!bp_pos_to_bucket_nodev(c, bp_pos, &bucket)) 357 return ERR_PTR(-EIO); 358 359 bch2_trans_node_iter_init(trans, iter, 360 bp.btree_id, 361 bp.pos, 362 0, 363 bp.level - 1, 364 0); 365 struct btree *b = bch2_btree_iter_peek_node(iter); 366 if (IS_ERR_OR_NULL(b)) 367 goto err; 368 369 BUG_ON(b->c.level != bp.level - 1); 370 371 if (extent_matches_bp(c, bp.btree_id, bp.level, 372 bkey_i_to_s_c(&b->key), 373 bucket, bp)) 374 return b; 375 376 if (btree_node_will_make_reachable(b)) { 377 b = ERR_PTR(-BCH_ERR_backpointer_to_overwritten_btree_node); 378 } else { 379 backpointer_not_found(trans, bp_pos, bp, bkey_i_to_s_c(&b->key)); 380 b = NULL; 381 } 382 err: 383 bch2_trans_iter_exit(trans, iter); 384 return b; 385 } 386 387 static int bch2_check_btree_backpointer(struct btree_trans *trans, struct btree_iter *bp_iter, 388 struct bkey_s_c k) 389 { 390 struct bch_fs *c = trans->c; 391 struct btree_iter alloc_iter = { NULL }; 392 struct bkey_s_c alloc_k; 393 struct printbuf buf = PRINTBUF; 394 int ret = 0; 395 396 struct bpos bucket; 397 if (!bp_pos_to_bucket_nodev_noerror(c, k.k->p, &bucket)) { 398 if (fsck_err(c, backpointer_to_missing_device, 399 "backpointer for missing device:\n%s", 400 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 401 ret = bch2_btree_delete_at(trans, bp_iter, 0); 402 goto out; 403 } 404 405 alloc_k = bch2_bkey_get_iter(trans, &alloc_iter, BTREE_ID_alloc, bucket, 0); 406 ret = bkey_err(alloc_k); 407 if (ret) 408 goto out; 409 410 if (fsck_err_on(alloc_k.k->type != KEY_TYPE_alloc_v4, c, 411 backpointer_to_missing_alloc, 412 "backpointer for nonexistent alloc key: %llu:%llu:0\n%s", 413 alloc_iter.pos.inode, alloc_iter.pos.offset, 414 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 415 ret = bch2_btree_delete_at(trans, bp_iter, 0); 416 goto out; 417 } 418 out: 419 fsck_err: 420 bch2_trans_iter_exit(trans, &alloc_iter); 421 printbuf_exit(&buf); 422 return ret; 423 } 424 425 /* verify that every backpointer has a corresponding alloc key */ 426 int bch2_check_btree_backpointers(struct bch_fs *c) 427 { 428 int ret = bch2_trans_run(c, 429 for_each_btree_key_commit(trans, iter, 430 BTREE_ID_backpointers, POS_MIN, 0, k, 431 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 432 bch2_check_btree_backpointer(trans, &iter, k))); 433 bch_err_fn(c, ret); 434 return ret; 435 } 436 437 static inline bool bkey_and_val_eq(struct bkey_s_c l, struct bkey_s_c r) 438 { 439 return bpos_eq(l.k->p, r.k->p) && 440 bkey_bytes(l.k) == bkey_bytes(r.k) && 441 !memcmp(l.v, r.v, bkey_val_bytes(l.k)); 442 } 443 444 struct extents_to_bp_state { 445 struct bpos bucket_start; 446 struct bpos bucket_end; 447 struct bkey_buf last_flushed; 448 }; 449 450 static int drop_dev_and_update(struct btree_trans *trans, enum btree_id btree, 451 struct bkey_s_c extent, unsigned dev) 452 { 453 struct bkey_i *n = bch2_bkey_make_mut_noupdate(trans, extent); 454 int ret = PTR_ERR_OR_ZERO(n); 455 if (ret) 456 return ret; 457 458 bch2_bkey_drop_device(bkey_i_to_s(n), dev); 459 return bch2_btree_insert_trans(trans, btree, n, 0); 460 } 461 462 static int check_extent_checksum(struct btree_trans *trans, 463 enum btree_id btree, struct bkey_s_c extent, 464 enum btree_id o_btree, struct bkey_s_c extent2, unsigned dev) 465 { 466 struct bch_fs *c = trans->c; 467 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(extent); 468 const union bch_extent_entry *entry; 469 struct extent_ptr_decoded p; 470 struct printbuf buf = PRINTBUF; 471 void *data_buf = NULL; 472 struct bio *bio = NULL; 473 size_t bytes; 474 int ret = 0; 475 476 if (bkey_is_btree_ptr(extent.k)) 477 return false; 478 479 bkey_for_each_ptr_decode(extent.k, ptrs, p, entry) 480 if (p.ptr.dev == dev) 481 goto found; 482 BUG(); 483 found: 484 if (!p.crc.csum_type) 485 return false; 486 487 bytes = p.crc.compressed_size << 9; 488 489 struct bch_dev *ca = bch2_dev_get_ioref(c, dev, READ); 490 if (!ca) 491 return false; 492 493 data_buf = kvmalloc(bytes, GFP_KERNEL); 494 if (!data_buf) { 495 ret = -ENOMEM; 496 goto err; 497 } 498 499 bio = bio_alloc(ca->disk_sb.bdev, buf_pages(data_buf, bytes), REQ_OP_READ, GFP_KERNEL); 500 bio->bi_iter.bi_sector = p.ptr.offset; 501 bch2_bio_map(bio, data_buf, bytes); 502 ret = submit_bio_wait(bio); 503 if (ret) 504 goto err; 505 506 prt_str(&buf, "extents pointing to same space, but first extent checksum bad:"); 507 prt_printf(&buf, "\n %s ", bch2_btree_id_str(btree)); 508 bch2_bkey_val_to_text(&buf, c, extent); 509 prt_printf(&buf, "\n %s ", bch2_btree_id_str(o_btree)); 510 bch2_bkey_val_to_text(&buf, c, extent2); 511 512 struct nonce nonce = extent_nonce(extent.k->version, p.crc); 513 struct bch_csum csum = bch2_checksum(c, p.crc.csum_type, nonce, data_buf, bytes); 514 if (fsck_err_on(bch2_crc_cmp(csum, p.crc.csum), 515 c, dup_backpointer_to_bad_csum_extent, 516 "%s", buf.buf)) 517 ret = drop_dev_and_update(trans, btree, extent, dev) ?: 1; 518 fsck_err: 519 err: 520 if (bio) 521 bio_put(bio); 522 kvfree(data_buf); 523 percpu_ref_put(&ca->io_ref); 524 printbuf_exit(&buf); 525 return ret; 526 } 527 528 static int check_bp_exists(struct btree_trans *trans, 529 struct extents_to_bp_state *s, 530 struct bpos bucket, 531 struct bch_backpointer bp, 532 struct bkey_s_c orig_k) 533 { 534 struct bch_fs *c = trans->c; 535 struct btree_iter bp_iter = {}; 536 struct btree_iter other_extent_iter = {}; 537 struct printbuf buf = PRINTBUF; 538 struct bkey_s_c bp_k; 539 struct bkey_buf tmp; 540 int ret = 0; 541 542 bch2_bkey_buf_init(&tmp); 543 544 struct bch_dev *ca = bch2_dev_bucket_tryget(c, bucket); 545 if (!ca) { 546 prt_str(&buf, "extent for nonexistent device:bucket "); 547 bch2_bpos_to_text(&buf, bucket); 548 prt_str(&buf, "\n "); 549 bch2_bkey_val_to_text(&buf, c, orig_k); 550 bch_err(c, "%s", buf.buf); 551 ret = -BCH_ERR_fsck_repair_unimplemented; 552 goto err; 553 } 554 555 if (bpos_lt(bucket, s->bucket_start) || 556 bpos_gt(bucket, s->bucket_end)) 557 goto out; 558 559 bp_k = bch2_bkey_get_iter(trans, &bp_iter, BTREE_ID_backpointers, 560 bucket_pos_to_bp(ca, bucket, bp.bucket_offset), 561 0); 562 ret = bkey_err(bp_k); 563 if (ret) 564 goto err; 565 566 if (bp_k.k->type != KEY_TYPE_backpointer || 567 memcmp(bkey_s_c_to_backpointer(bp_k).v, &bp, sizeof(bp))) { 568 bch2_bkey_buf_reassemble(&tmp, c, orig_k); 569 570 if (!bkey_and_val_eq(orig_k, bkey_i_to_s_c(s->last_flushed.k))) { 571 if (bp.level) { 572 bch2_trans_unlock(trans); 573 bch2_btree_interior_updates_flush(c); 574 } 575 576 ret = bch2_btree_write_buffer_flush_sync(trans); 577 if (ret) 578 goto err; 579 580 bch2_bkey_buf_copy(&s->last_flushed, c, tmp.k); 581 ret = -BCH_ERR_transaction_restart_write_buffer_flush; 582 goto out; 583 } 584 585 goto check_existing_bp; 586 } 587 out: 588 err: 589 fsck_err: 590 bch2_trans_iter_exit(trans, &other_extent_iter); 591 bch2_trans_iter_exit(trans, &bp_iter); 592 bch2_bkey_buf_exit(&tmp, c); 593 bch2_dev_put(ca); 594 printbuf_exit(&buf); 595 return ret; 596 check_existing_bp: 597 /* Do we have a backpointer for a different extent? */ 598 if (bp_k.k->type != KEY_TYPE_backpointer) 599 goto missing; 600 601 struct bch_backpointer other_bp = *bkey_s_c_to_backpointer(bp_k).v; 602 603 struct bkey_s_c other_extent = 604 bch2_backpointer_get_key(trans, &other_extent_iter, bp_k.k->p, other_bp, 0); 605 ret = bkey_err(other_extent); 606 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 607 ret = 0; 608 if (ret) 609 goto err; 610 611 if (!other_extent.k) 612 goto missing; 613 614 if (bch2_extents_match(orig_k, other_extent)) { 615 printbuf_reset(&buf); 616 prt_printf(&buf, "duplicate versions of same extent, deleting smaller\n "); 617 bch2_bkey_val_to_text(&buf, c, orig_k); 618 prt_str(&buf, "\n "); 619 bch2_bkey_val_to_text(&buf, c, other_extent); 620 bch_err(c, "%s", buf.buf); 621 622 if (other_extent.k->size <= orig_k.k->size) { 623 ret = drop_dev_and_update(trans, other_bp.btree_id, other_extent, bucket.inode); 624 if (ret) 625 goto err; 626 goto out; 627 } else { 628 ret = drop_dev_and_update(trans, bp.btree_id, orig_k, bucket.inode); 629 if (ret) 630 goto err; 631 goto missing; 632 } 633 } 634 635 ret = check_extent_checksum(trans, other_bp.btree_id, other_extent, bp.btree_id, orig_k, bucket.inode); 636 if (ret < 0) 637 goto err; 638 if (ret) { 639 ret = 0; 640 goto missing; 641 } 642 643 ret = check_extent_checksum(trans, bp.btree_id, orig_k, other_bp.btree_id, other_extent, bucket.inode); 644 if (ret < 0) 645 goto err; 646 if (ret) { 647 ret = 0; 648 goto out; 649 } 650 651 printbuf_reset(&buf); 652 prt_printf(&buf, "duplicate extents pointing to same space on dev %llu\n ", bucket.inode); 653 bch2_bkey_val_to_text(&buf, c, orig_k); 654 prt_str(&buf, "\n "); 655 bch2_bkey_val_to_text(&buf, c, other_extent); 656 bch_err(c, "%s", buf.buf); 657 ret = -BCH_ERR_fsck_repair_unimplemented; 658 goto err; 659 missing: 660 printbuf_reset(&buf); 661 prt_printf(&buf, "missing backpointer for btree=%s l=%u ", 662 bch2_btree_id_str(bp.btree_id), bp.level); 663 bch2_bkey_val_to_text(&buf, c, orig_k); 664 prt_printf(&buf, "\n got: "); 665 bch2_bkey_val_to_text(&buf, c, bp_k); 666 667 struct bkey_i_backpointer n_bp_k; 668 bkey_backpointer_init(&n_bp_k.k_i); 669 n_bp_k.k.p = bucket_pos_to_bp(ca, bucket, bp.bucket_offset); 670 n_bp_k.v = bp; 671 prt_printf(&buf, "\n want: "); 672 bch2_bkey_val_to_text(&buf, c, bkey_i_to_s_c(&n_bp_k.k_i)); 673 674 if (fsck_err(c, ptr_to_missing_backpointer, "%s", buf.buf)) 675 ret = bch2_bucket_backpointer_mod(trans, ca, bucket, bp, orig_k, true); 676 677 goto out; 678 } 679 680 static int check_extent_to_backpointers(struct btree_trans *trans, 681 struct extents_to_bp_state *s, 682 enum btree_id btree, unsigned level, 683 struct bkey_s_c k) 684 { 685 struct bch_fs *c = trans->c; 686 struct bkey_ptrs_c ptrs; 687 const union bch_extent_entry *entry; 688 struct extent_ptr_decoded p; 689 int ret; 690 691 ptrs = bch2_bkey_ptrs_c(k); 692 bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { 693 struct bpos bucket_pos = POS_MIN; 694 struct bch_backpointer bp; 695 696 if (p.ptr.cached) 697 continue; 698 699 rcu_read_lock(); 700 struct bch_dev *ca = bch2_dev_rcu(c, p.ptr.dev); 701 if (ca) 702 bch2_extent_ptr_to_bp(c, ca, btree, level, k, p, entry, &bucket_pos, &bp); 703 rcu_read_unlock(); 704 705 if (!ca) 706 continue; 707 708 ret = check_bp_exists(trans, s, bucket_pos, bp, k); 709 if (ret) 710 return ret; 711 } 712 713 return 0; 714 } 715 716 static int check_btree_root_to_backpointers(struct btree_trans *trans, 717 struct extents_to_bp_state *s, 718 enum btree_id btree_id, 719 int *level) 720 { 721 struct bch_fs *c = trans->c; 722 struct btree_iter iter; 723 struct btree *b; 724 struct bkey_s_c k; 725 int ret; 726 retry: 727 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 728 0, bch2_btree_id_root(c, btree_id)->b->c.level, 0); 729 b = bch2_btree_iter_peek_node(&iter); 730 ret = PTR_ERR_OR_ZERO(b); 731 if (ret) 732 goto err; 733 734 if (b != btree_node_root(c, b)) { 735 bch2_trans_iter_exit(trans, &iter); 736 goto retry; 737 } 738 739 *level = b->c.level; 740 741 k = bkey_i_to_s_c(&b->key); 742 ret = check_extent_to_backpointers(trans, s, btree_id, b->c.level + 1, k); 743 err: 744 bch2_trans_iter_exit(trans, &iter); 745 return ret; 746 } 747 748 static inline struct bbpos bp_to_bbpos(struct bch_backpointer bp) 749 { 750 return (struct bbpos) { 751 .btree = bp.btree_id, 752 .pos = bp.pos, 753 }; 754 } 755 756 static u64 mem_may_pin_bytes(struct bch_fs *c) 757 { 758 struct sysinfo i; 759 si_meminfo(&i); 760 761 u64 mem_bytes = i.totalram * i.mem_unit; 762 return div_u64(mem_bytes * c->opts.fsck_memory_usage_percent, 100); 763 } 764 765 static size_t btree_nodes_fit_in_ram(struct bch_fs *c) 766 { 767 return div_u64(mem_may_pin_bytes(c), c->opts.btree_node_size); 768 } 769 770 static int bch2_get_btree_in_memory_pos(struct btree_trans *trans, 771 u64 btree_leaf_mask, 772 u64 btree_interior_mask, 773 struct bbpos start, struct bbpos *end) 774 { 775 struct bch_fs *c = trans->c; 776 s64 mem_may_pin = mem_may_pin_bytes(c); 777 int ret = 0; 778 779 btree_interior_mask |= btree_leaf_mask; 780 781 c->btree_cache.pinned_nodes_leaf_mask = btree_leaf_mask; 782 c->btree_cache.pinned_nodes_interior_mask = btree_interior_mask; 783 c->btree_cache.pinned_nodes_start = start; 784 c->btree_cache.pinned_nodes_end = *end = BBPOS_MAX; 785 786 for (enum btree_id btree = start.btree; 787 btree < BTREE_ID_NR && !ret; 788 btree++) { 789 unsigned depth = ((1U << btree) & btree_leaf_mask) ? 0 : 1; 790 struct btree_iter iter; 791 struct btree *b; 792 793 if (!((1U << btree) & btree_leaf_mask) && 794 !((1U << btree) & btree_interior_mask)) 795 continue; 796 797 __for_each_btree_node(trans, iter, btree, 798 btree == start.btree ? start.pos : POS_MIN, 799 0, depth, BTREE_ITER_prefetch, b, ret) { 800 mem_may_pin -= btree_buf_bytes(b); 801 if (mem_may_pin <= 0) { 802 c->btree_cache.pinned_nodes_end = *end = 803 BBPOS(btree, b->key.k.p); 804 bch2_trans_iter_exit(trans, &iter); 805 return 0; 806 } 807 } 808 bch2_trans_iter_exit(trans, &iter); 809 } 810 811 return ret; 812 } 813 814 static int bch2_check_extents_to_backpointers_pass(struct btree_trans *trans, 815 struct extents_to_bp_state *s) 816 { 817 struct bch_fs *c = trans->c; 818 int ret = 0; 819 820 for (enum btree_id btree_id = 0; 821 btree_id < btree_id_nr_alive(c); 822 btree_id++) { 823 int level, depth = btree_type_has_ptrs(btree_id) ? 0 : 1; 824 825 ret = commit_do(trans, NULL, NULL, 826 BCH_TRANS_COMMIT_no_enospc, 827 check_btree_root_to_backpointers(trans, s, btree_id, &level)); 828 if (ret) 829 return ret; 830 831 while (level >= depth) { 832 struct btree_iter iter; 833 bch2_trans_node_iter_init(trans, &iter, btree_id, POS_MIN, 0, level, 834 BTREE_ITER_prefetch); 835 836 ret = for_each_btree_key_continue(trans, iter, 0, k, ({ 837 check_extent_to_backpointers(trans, s, btree_id, level, k) ?: 838 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 839 })); 840 if (ret) 841 return ret; 842 843 --level; 844 } 845 } 846 847 return 0; 848 } 849 850 int bch2_check_extents_to_backpointers(struct bch_fs *c) 851 { 852 struct btree_trans *trans = bch2_trans_get(c); 853 struct extents_to_bp_state s = { .bucket_start = POS_MIN }; 854 int ret; 855 856 bch2_bkey_buf_init(&s.last_flushed); 857 bkey_init(&s.last_flushed.k->k); 858 859 while (1) { 860 struct bbpos end; 861 ret = bch2_get_btree_in_memory_pos(trans, 862 BIT_ULL(BTREE_ID_backpointers), 863 BIT_ULL(BTREE_ID_backpointers), 864 BBPOS(BTREE_ID_backpointers, s.bucket_start), &end); 865 if (ret) 866 break; 867 868 s.bucket_end = end.pos; 869 870 if ( bpos_eq(s.bucket_start, POS_MIN) && 871 !bpos_eq(s.bucket_end, SPOS_MAX)) 872 bch_verbose(c, "%s(): alloc info does not fit in ram, running in multiple passes with %zu nodes per pass", 873 __func__, btree_nodes_fit_in_ram(c)); 874 875 if (!bpos_eq(s.bucket_start, POS_MIN) || 876 !bpos_eq(s.bucket_end, SPOS_MAX)) { 877 struct printbuf buf = PRINTBUF; 878 879 prt_str(&buf, "check_extents_to_backpointers(): "); 880 bch2_bpos_to_text(&buf, s.bucket_start); 881 prt_str(&buf, "-"); 882 bch2_bpos_to_text(&buf, s.bucket_end); 883 884 bch_verbose(c, "%s", buf.buf); 885 printbuf_exit(&buf); 886 } 887 888 ret = bch2_check_extents_to_backpointers_pass(trans, &s); 889 if (ret || bpos_eq(s.bucket_end, SPOS_MAX)) 890 break; 891 892 s.bucket_start = bpos_successor(s.bucket_end); 893 } 894 bch2_trans_put(trans); 895 bch2_bkey_buf_exit(&s.last_flushed, c); 896 897 c->btree_cache.pinned_nodes_leaf_mask = 0; 898 c->btree_cache.pinned_nodes_interior_mask = 0; 899 900 bch_err_fn(c, ret); 901 return ret; 902 } 903 904 static int check_one_backpointer(struct btree_trans *trans, 905 struct bbpos start, 906 struct bbpos end, 907 struct bkey_s_c_backpointer bp, 908 struct bpos *last_flushed_pos) 909 { 910 struct bch_fs *c = trans->c; 911 struct btree_iter iter; 912 struct bbpos pos = bp_to_bbpos(*bp.v); 913 struct bkey_s_c k; 914 struct printbuf buf = PRINTBUF; 915 int ret; 916 917 if (bbpos_cmp(pos, start) < 0 || 918 bbpos_cmp(pos, end) > 0) 919 return 0; 920 921 k = bch2_backpointer_get_key(trans, &iter, bp.k->p, *bp.v, 0); 922 ret = bkey_err(k); 923 if (ret == -BCH_ERR_backpointer_to_overwritten_btree_node) 924 return 0; 925 if (ret) 926 return ret; 927 928 if (!k.k && !bpos_eq(*last_flushed_pos, bp.k->p)) { 929 *last_flushed_pos = bp.k->p; 930 ret = bch2_btree_write_buffer_flush_sync(trans) ?: 931 -BCH_ERR_transaction_restart_write_buffer_flush; 932 goto out; 933 } 934 935 if (fsck_err_on(!k.k, c, 936 backpointer_to_missing_ptr, 937 "backpointer for missing %s\n %s", 938 bp.v->level ? "btree node" : "extent", 939 (bch2_bkey_val_to_text(&buf, c, bp.s_c), buf.buf))) { 940 ret = bch2_btree_delete_at_buffered(trans, BTREE_ID_backpointers, bp.k->p); 941 goto out; 942 } 943 out: 944 fsck_err: 945 bch2_trans_iter_exit(trans, &iter); 946 printbuf_exit(&buf); 947 return ret; 948 } 949 950 static int bch2_check_backpointers_to_extents_pass(struct btree_trans *trans, 951 struct bbpos start, 952 struct bbpos end) 953 { 954 struct bpos last_flushed_pos = SPOS_MAX; 955 956 return for_each_btree_key_commit(trans, iter, BTREE_ID_backpointers, 957 POS_MIN, BTREE_ITER_prefetch, k, 958 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 959 check_one_backpointer(trans, start, end, 960 bkey_s_c_to_backpointer(k), 961 &last_flushed_pos)); 962 } 963 964 int bch2_check_backpointers_to_extents(struct bch_fs *c) 965 { 966 struct btree_trans *trans = bch2_trans_get(c); 967 struct bbpos start = (struct bbpos) { .btree = 0, .pos = POS_MIN, }, end; 968 int ret; 969 970 while (1) { 971 ret = bch2_get_btree_in_memory_pos(trans, 972 (1U << BTREE_ID_extents)| 973 (1U << BTREE_ID_reflink), 974 ~0, 975 start, &end); 976 if (ret) 977 break; 978 979 if (!bbpos_cmp(start, BBPOS_MIN) && 980 bbpos_cmp(end, BBPOS_MAX)) 981 bch_verbose(c, "%s(): extents do not fit in ram, running in multiple passes with %zu nodes per pass", 982 __func__, btree_nodes_fit_in_ram(c)); 983 984 if (bbpos_cmp(start, BBPOS_MIN) || 985 bbpos_cmp(end, BBPOS_MAX)) { 986 struct printbuf buf = PRINTBUF; 987 988 prt_str(&buf, "check_backpointers_to_extents(): "); 989 bch2_bbpos_to_text(&buf, start); 990 prt_str(&buf, "-"); 991 bch2_bbpos_to_text(&buf, end); 992 993 bch_verbose(c, "%s", buf.buf); 994 printbuf_exit(&buf); 995 } 996 997 ret = bch2_check_backpointers_to_extents_pass(trans, start, end); 998 if (ret || !bbpos_cmp(end, BBPOS_MAX)) 999 break; 1000 1001 start = bbpos_successor(end); 1002 } 1003 bch2_trans_put(trans); 1004 1005 c->btree_cache.pinned_nodes_leaf_mask = 0; 1006 c->btree_cache.pinned_nodes_interior_mask = 0; 1007 1008 bch_err_fn(c, ret); 1009 return ret; 1010 } 1011