1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bkey_buf.h" 5 #include "btree_cache.h" 6 #include "btree_update.h" 7 #include "buckets.h" 8 #include "darray.h" 9 #include "dirent.h" 10 #include "error.h" 11 #include "fs-common.h" 12 #include "fsck.h" 13 #include "inode.h" 14 #include "keylist.h" 15 #include "recovery.h" 16 #include "snapshot.h" 17 #include "super.h" 18 #include "xattr.h" 19 20 #include <linux/bsearch.h> 21 #include <linux/dcache.h> /* struct qstr */ 22 23 /* 24 * XXX: this is handling transaction restarts without returning 25 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: 26 */ 27 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, 28 u32 snapshot) 29 { 30 u64 sectors = 0; 31 32 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, 33 SPOS(inum, 0, snapshot), 34 POS(inum, U64_MAX), 35 0, k, ({ 36 if (bkey_extent_is_allocation(k.k)) 37 sectors += k.k->size; 38 0; 39 })); 40 41 return ret ?: sectors; 42 } 43 44 static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, 45 u32 snapshot) 46 { 47 u64 subdirs = 0; 48 49 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, 50 SPOS(inum, 0, snapshot), 51 POS(inum, U64_MAX), 52 0, k, ({ 53 if (k.k->type == KEY_TYPE_dirent && 54 bkey_s_c_to_dirent(k).v->d_type == DT_DIR) 55 subdirs++; 56 0; 57 })); 58 59 return ret ?: subdirs; 60 } 61 62 static int subvol_lookup(struct btree_trans *trans, u32 subvol, 63 u32 *snapshot, u64 *inum) 64 { 65 struct bch_subvolume s; 66 int ret; 67 68 ret = bch2_subvolume_get(trans, subvol, false, 0, &s); 69 70 *snapshot = le32_to_cpu(s.snapshot); 71 *inum = le64_to_cpu(s.inode); 72 return ret; 73 } 74 75 static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, 76 struct bch_inode_unpacked *inode) 77 { 78 struct btree_iter iter; 79 struct bkey_s_c k; 80 int ret; 81 82 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, 83 POS(0, inode_nr), 84 BTREE_ITER_ALL_SNAPSHOTS); 85 k = bch2_btree_iter_peek(&iter); 86 ret = bkey_err(k); 87 if (ret) 88 goto err; 89 90 if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) { 91 ret = -BCH_ERR_ENOENT_inode; 92 goto err; 93 } 94 95 ret = bch2_inode_unpack(k, inode); 96 err: 97 bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); 98 bch2_trans_iter_exit(trans, &iter); 99 return ret; 100 } 101 102 static int lookup_inode(struct btree_trans *trans, u64 inode_nr, 103 struct bch_inode_unpacked *inode, 104 u32 *snapshot) 105 { 106 struct btree_iter iter; 107 struct bkey_s_c k; 108 int ret; 109 110 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 111 SPOS(0, inode_nr, *snapshot), 0); 112 ret = bkey_err(k); 113 if (ret) 114 goto err; 115 116 ret = bkey_is_inode(k.k) 117 ? bch2_inode_unpack(k, inode) 118 : -BCH_ERR_ENOENT_inode; 119 if (!ret) 120 *snapshot = iter.pos.snapshot; 121 err: 122 bch2_trans_iter_exit(trans, &iter); 123 return ret; 124 } 125 126 static int lookup_dirent_in_snapshot(struct btree_trans *trans, 127 struct bch_hash_info hash_info, 128 subvol_inum dir, struct qstr *name, 129 u64 *target, unsigned *type, u32 snapshot) 130 { 131 struct btree_iter iter; 132 struct bkey_s_c_dirent d; 133 int ret = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, 134 &hash_info, dir, name, 0, snapshot); 135 if (ret) 136 return ret; 137 138 d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); 139 *target = le64_to_cpu(d.v->d_inum); 140 *type = d.v->d_type; 141 bch2_trans_iter_exit(trans, &iter); 142 return 0; 143 } 144 145 static int __remove_dirent(struct btree_trans *trans, struct bpos pos) 146 { 147 struct bch_fs *c = trans->c; 148 struct btree_iter iter; 149 struct bch_inode_unpacked dir_inode; 150 struct bch_hash_info dir_hash_info; 151 int ret; 152 153 ret = lookup_first_inode(trans, pos.inode, &dir_inode); 154 if (ret) 155 goto err; 156 157 dir_hash_info = bch2_hash_info_init(c, &dir_inode); 158 159 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); 160 161 ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, 162 &dir_hash_info, &iter, 163 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 164 bch2_trans_iter_exit(trans, &iter); 165 err: 166 bch_err_fn(c, ret); 167 return ret; 168 } 169 170 /* Get lost+found, create if it doesn't exist: */ 171 static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, 172 struct bch_inode_unpacked *lostfound) 173 { 174 struct bch_fs *c = trans->c; 175 struct qstr lostfound_str = QSTR("lost+found"); 176 u64 inum = 0; 177 unsigned d_type = 0; 178 int ret; 179 180 struct bch_snapshot_tree st; 181 ret = bch2_snapshot_tree_lookup(trans, 182 bch2_snapshot_tree(c, snapshot), &st); 183 if (ret) 184 return ret; 185 186 subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; 187 u32 subvol_snapshot; 188 189 ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol), 190 &subvol_snapshot, &root_inum.inum); 191 bch_err_msg(c, ret, "looking up root subvol"); 192 if (ret) 193 return ret; 194 195 struct bch_inode_unpacked root_inode; 196 struct bch_hash_info root_hash_info; 197 u32 root_inode_snapshot = snapshot; 198 ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); 199 bch_err_msg(c, ret, "looking up root inode"); 200 if (ret) 201 return ret; 202 203 root_hash_info = bch2_hash_info_init(c, &root_inode); 204 205 ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum, 206 &lostfound_str, &inum, &d_type, snapshot); 207 if (bch2_err_matches(ret, ENOENT)) 208 goto create_lostfound; 209 210 bch_err_fn(c, ret); 211 if (ret) 212 return ret; 213 214 if (d_type != DT_DIR) { 215 bch_err(c, "error looking up lost+found: not a directory"); 216 return -BCH_ERR_ENOENT_not_directory; 217 } 218 219 /* 220 * The bch2_check_dirents pass has already run, dangling dirents 221 * shouldn't exist here: 222 */ 223 ret = lookup_inode(trans, inum, lostfound, &snapshot); 224 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", 225 inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); 226 return ret; 227 228 create_lostfound: 229 /* 230 * XXX: we could have a nicer log message here if we had a nice way to 231 * walk backpointers to print a path 232 */ 233 bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); 234 235 u64 now = bch2_current_time(c); 236 struct btree_iter lostfound_iter = { NULL }; 237 u64 cpu = raw_smp_processor_id(); 238 239 bch2_inode_init_early(c, lostfound); 240 bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); 241 lostfound->bi_dir = root_inode.bi_inum; 242 243 root_inode.bi_nlink++; 244 245 ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); 246 if (ret) 247 goto err; 248 249 bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); 250 ret = bch2_btree_iter_traverse(&lostfound_iter); 251 if (ret) 252 goto err; 253 254 ret = bch2_dirent_create_snapshot(trans, 255 0, root_inode.bi_inum, snapshot, &root_hash_info, 256 mode_to_type(lostfound->bi_mode), 257 &lostfound_str, 258 lostfound->bi_inum, 259 &lostfound->bi_dir_offset, 260 BCH_HASH_SET_MUST_CREATE) ?: 261 bch2_inode_write_flags(trans, &lostfound_iter, lostfound, 262 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 263 err: 264 bch_err_msg(c, ret, "creating lost+found"); 265 bch2_trans_iter_exit(trans, &lostfound_iter); 266 return ret; 267 } 268 269 static int reattach_inode(struct btree_trans *trans, 270 struct bch_inode_unpacked *inode, 271 u32 inode_snapshot) 272 { 273 struct bch_hash_info dir_hash; 274 struct bch_inode_unpacked lostfound; 275 char name_buf[20]; 276 struct qstr name; 277 u64 dir_offset = 0; 278 u32 dirent_snapshot = inode_snapshot; 279 int ret; 280 281 if (inode->bi_subvol) { 282 inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; 283 284 u64 root_inum; 285 ret = subvol_lookup(trans, inode->bi_parent_subvol, 286 &dirent_snapshot, &root_inum); 287 if (ret) 288 return ret; 289 290 snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol); 291 } else { 292 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); 293 } 294 295 ret = lookup_lostfound(trans, dirent_snapshot, &lostfound); 296 if (ret) 297 return ret; 298 299 if (S_ISDIR(inode->bi_mode)) { 300 lostfound.bi_nlink++; 301 302 ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); 303 if (ret) 304 return ret; 305 } 306 307 dir_hash = bch2_hash_info_init(trans->c, &lostfound); 308 309 name = (struct qstr) QSTR(name_buf); 310 311 ret = bch2_dirent_create_snapshot(trans, 312 inode->bi_parent_subvol, lostfound.bi_inum, 313 dirent_snapshot, 314 &dir_hash, 315 inode_d_type(inode), 316 &name, 317 inode->bi_subvol ?: inode->bi_inum, 318 &dir_offset, 319 BCH_HASH_SET_MUST_CREATE); 320 if (ret) 321 return ret; 322 323 inode->bi_dir = lostfound.bi_inum; 324 inode->bi_dir_offset = dir_offset; 325 326 return __bch2_fsck_write_inode(trans, inode, inode_snapshot); 327 } 328 329 static int remove_backpointer(struct btree_trans *trans, 330 struct bch_inode_unpacked *inode) 331 { 332 struct btree_iter iter; 333 struct bkey_s_c_dirent d; 334 int ret; 335 336 d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, 337 POS(inode->bi_dir, inode->bi_dir_offset), 0, 338 dirent); 339 ret = bkey_err(d) ?: 340 __remove_dirent(trans, d.k->p); 341 bch2_trans_iter_exit(trans, &iter); 342 return ret; 343 } 344 345 static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s) 346 { 347 struct bch_fs *c = trans->c; 348 349 struct bch_inode_unpacked inode; 350 int ret = bch2_inode_find_by_inum_trans(trans, 351 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 352 &inode); 353 if (ret) 354 return ret; 355 356 ret = remove_backpointer(trans, &inode); 357 bch_err_msg(c, ret, "removing dirent"); 358 if (ret) 359 return ret; 360 361 ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); 362 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 363 return ret; 364 } 365 366 struct snapshots_seen_entry { 367 u32 id; 368 u32 equiv; 369 }; 370 371 struct snapshots_seen { 372 struct bpos pos; 373 DARRAY(struct snapshots_seen_entry) ids; 374 }; 375 376 static inline void snapshots_seen_exit(struct snapshots_seen *s) 377 { 378 darray_exit(&s->ids); 379 } 380 381 static inline void snapshots_seen_init(struct snapshots_seen *s) 382 { 383 memset(s, 0, sizeof(*s)); 384 } 385 386 static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) 387 { 388 struct snapshots_seen_entry *i, n = { 389 .id = id, 390 .equiv = bch2_snapshot_equiv(c, id), 391 }; 392 int ret = 0; 393 394 __darray_for_each(s->ids, i) { 395 if (i->id == id) 396 return 0; 397 if (i->id > id) 398 break; 399 } 400 401 ret = darray_insert_item(&s->ids, i - s->ids.data, n); 402 if (ret) 403 bch_err(c, "error reallocating snapshots_seen table (size %zu)", 404 s->ids.size); 405 return ret; 406 } 407 408 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, 409 enum btree_id btree_id, struct bpos pos) 410 { 411 struct snapshots_seen_entry n = { 412 .id = pos.snapshot, 413 .equiv = bch2_snapshot_equiv(c, pos.snapshot), 414 }; 415 int ret = 0; 416 417 if (!bkey_eq(s->pos, pos)) 418 s->ids.nr = 0; 419 420 s->pos = pos; 421 s->pos.snapshot = n.equiv; 422 423 darray_for_each(s->ids, i) { 424 if (i->id == n.id) 425 return 0; 426 427 /* 428 * We currently don't rigorously track for snapshot cleanup 429 * needing to be run, so it shouldn't be a fsck error yet: 430 */ 431 if (i->equiv == n.equiv) { 432 bch_err(c, "snapshot deletion did not finish:\n" 433 " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n", 434 bch2_btree_id_str(btree_id), 435 pos.inode, pos.offset, 436 i->id, n.id, n.equiv); 437 set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); 438 return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots); 439 } 440 } 441 442 ret = darray_push(&s->ids, n); 443 if (ret) 444 bch_err(c, "error reallocating snapshots_seen table (size %zu)", 445 s->ids.size); 446 return ret; 447 } 448 449 /** 450 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, 451 * and @ancestor hasn't been overwritten in @seen 452 * 453 * @c: filesystem handle 454 * @seen: list of snapshot ids already seen at current position 455 * @id: descendent snapshot id 456 * @ancestor: ancestor snapshot id 457 * 458 * Returns: whether key in @ancestor snapshot is visible in @id snapshot 459 */ 460 static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, 461 u32 id, u32 ancestor) 462 { 463 ssize_t i; 464 465 EBUG_ON(id > ancestor); 466 EBUG_ON(!bch2_snapshot_is_equiv(c, id)); 467 EBUG_ON(!bch2_snapshot_is_equiv(c, ancestor)); 468 469 /* @ancestor should be the snapshot most recently added to @seen */ 470 EBUG_ON(ancestor != seen->pos.snapshot); 471 EBUG_ON(ancestor != seen->ids.data[seen->ids.nr - 1].equiv); 472 473 if (id == ancestor) 474 return true; 475 476 if (!bch2_snapshot_is_ancestor(c, id, ancestor)) 477 return false; 478 479 /* 480 * We know that @id is a descendant of @ancestor, we're checking if 481 * we've seen a key that overwrote @ancestor - i.e. also a descendent of 482 * @ascestor and with @id as a descendent. 483 * 484 * But we already know that we're scanning IDs between @id and @ancestor 485 * numerically, since snapshot ID lists are kept sorted, so if we find 486 * an id that's an ancestor of @id we're done: 487 */ 488 489 for (i = seen->ids.nr - 2; 490 i >= 0 && seen->ids.data[i].equiv >= id; 491 --i) 492 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv)) 493 return false; 494 495 return true; 496 } 497 498 /** 499 * ref_visible - given a key with snapshot id @src that points to a key with 500 * snapshot id @dst, test whether there is some snapshot in which @dst is 501 * visible. 502 * 503 * @c: filesystem handle 504 * @s: list of snapshot IDs already seen at @src 505 * @src: snapshot ID of src key 506 * @dst: snapshot ID of dst key 507 * Returns: true if there is some snapshot in which @dst is visible 508 * 509 * Assumes we're visiting @src keys in natural key order 510 */ 511 static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, 512 u32 src, u32 dst) 513 { 514 return dst <= src 515 ? key_visible_in_snapshot(c, s, dst, src) 516 : bch2_snapshot_is_ancestor(c, src, dst); 517 } 518 519 static int ref_visible2(struct bch_fs *c, 520 u32 src, struct snapshots_seen *src_seen, 521 u32 dst, struct snapshots_seen *dst_seen) 522 { 523 src = bch2_snapshot_equiv(c, src); 524 dst = bch2_snapshot_equiv(c, dst); 525 526 if (dst > src) { 527 swap(dst, src); 528 swap(dst_seen, src_seen); 529 } 530 return key_visible_in_snapshot(c, src_seen, dst, src); 531 } 532 533 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ 534 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ 535 (_i)->snapshot <= (_snapshot); _i++) \ 536 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) 537 538 struct inode_walker_entry { 539 struct bch_inode_unpacked inode; 540 u32 snapshot; 541 bool seen_this_pos; 542 u64 count; 543 }; 544 545 struct inode_walker { 546 bool first_this_inode; 547 bool recalculate_sums; 548 struct bpos last_pos; 549 550 DARRAY(struct inode_walker_entry) inodes; 551 }; 552 553 static void inode_walker_exit(struct inode_walker *w) 554 { 555 darray_exit(&w->inodes); 556 } 557 558 static struct inode_walker inode_walker_init(void) 559 { 560 return (struct inode_walker) { 0, }; 561 } 562 563 static int add_inode(struct bch_fs *c, struct inode_walker *w, 564 struct bkey_s_c inode) 565 { 566 struct bch_inode_unpacked u; 567 568 BUG_ON(bch2_inode_unpack(inode, &u)); 569 570 return darray_push(&w->inodes, ((struct inode_walker_entry) { 571 .inode = u, 572 .snapshot = bch2_snapshot_equiv(c, inode.k->p.snapshot), 573 })); 574 } 575 576 static int get_inodes_all_snapshots(struct btree_trans *trans, 577 struct inode_walker *w, u64 inum) 578 { 579 struct bch_fs *c = trans->c; 580 struct btree_iter iter; 581 struct bkey_s_c k; 582 int ret; 583 584 w->recalculate_sums = false; 585 w->inodes.nr = 0; 586 587 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 588 BTREE_ITER_ALL_SNAPSHOTS, k, ret) { 589 if (k.k->p.offset != inum) 590 break; 591 592 if (bkey_is_inode(k.k)) 593 add_inode(c, w, k); 594 } 595 bch2_trans_iter_exit(trans, &iter); 596 597 if (ret) 598 return ret; 599 600 w->first_this_inode = true; 601 return 0; 602 } 603 604 static struct inode_walker_entry * 605 lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k) 606 { 607 bool is_whiteout = k.k->type == KEY_TYPE_whiteout; 608 u32 snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); 609 610 struct inode_walker_entry *i; 611 __darray_for_each(w->inodes, i) 612 if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot)) 613 goto found; 614 615 return NULL; 616 found: 617 BUG_ON(snapshot > i->snapshot); 618 619 if (snapshot != i->snapshot && !is_whiteout) { 620 struct inode_walker_entry new = *i; 621 622 new.snapshot = snapshot; 623 new.count = 0; 624 625 struct printbuf buf = PRINTBUF; 626 bch2_bkey_val_to_text(&buf, c, k); 627 628 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" 629 "unexpected because we should always update the inode when we update a key in that inode\n" 630 "%s", 631 w->last_pos.inode, snapshot, i->snapshot, buf.buf); 632 printbuf_exit(&buf); 633 634 while (i > w->inodes.data && i[-1].snapshot > snapshot) 635 --i; 636 637 size_t pos = i - w->inodes.data; 638 int ret = darray_insert_item(&w->inodes, pos, new); 639 if (ret) 640 return ERR_PTR(ret); 641 642 i = w->inodes.data + pos; 643 } 644 645 return i; 646 } 647 648 static struct inode_walker_entry *walk_inode(struct btree_trans *trans, 649 struct inode_walker *w, 650 struct bkey_s_c k) 651 { 652 if (w->last_pos.inode != k.k->p.inode) { 653 int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); 654 if (ret) 655 return ERR_PTR(ret); 656 } else if (bkey_cmp(w->last_pos, k.k->p)) { 657 darray_for_each(w->inodes, i) 658 i->seen_this_pos = false; 659 } 660 661 w->last_pos = k.k->p; 662 663 return lookup_inode_for_snapshot(trans->c, w, k); 664 } 665 666 static int __get_visible_inodes(struct btree_trans *trans, 667 struct inode_walker *w, 668 struct snapshots_seen *s, 669 u64 inum) 670 { 671 struct bch_fs *c = trans->c; 672 struct btree_iter iter; 673 struct bkey_s_c k; 674 int ret; 675 676 w->inodes.nr = 0; 677 678 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 679 BTREE_ITER_ALL_SNAPSHOTS, k, ret) { 680 u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); 681 682 if (k.k->p.offset != inum) 683 break; 684 685 if (!ref_visible(c, s, s->pos.snapshot, equiv)) 686 continue; 687 688 if (bkey_is_inode(k.k)) 689 add_inode(c, w, k); 690 691 if (equiv >= s->pos.snapshot) 692 break; 693 } 694 bch2_trans_iter_exit(trans, &iter); 695 696 return ret; 697 } 698 699 static int check_key_has_snapshot(struct btree_trans *trans, 700 struct btree_iter *iter, 701 struct bkey_s_c k) 702 { 703 struct bch_fs *c = trans->c; 704 struct printbuf buf = PRINTBUF; 705 int ret = 0; 706 707 if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c, 708 bkey_in_missing_snapshot, 709 "key in missing snapshot: %s", 710 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 711 ret = bch2_btree_delete_at(trans, iter, 712 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1; 713 fsck_err: 714 printbuf_exit(&buf); 715 return ret; 716 } 717 718 static int hash_redo_key(struct btree_trans *trans, 719 const struct bch_hash_desc desc, 720 struct bch_hash_info *hash_info, 721 struct btree_iter *k_iter, struct bkey_s_c k) 722 { 723 struct bkey_i *delete; 724 struct bkey_i *tmp; 725 726 delete = bch2_trans_kmalloc(trans, sizeof(*delete)); 727 if (IS_ERR(delete)) 728 return PTR_ERR(delete); 729 730 tmp = bch2_bkey_make_mut_noupdate(trans, k); 731 if (IS_ERR(tmp)) 732 return PTR_ERR(tmp); 733 734 bkey_init(&delete->k); 735 delete->k.p = k_iter->pos; 736 return bch2_btree_iter_traverse(k_iter) ?: 737 bch2_trans_update(trans, k_iter, delete, 0) ?: 738 bch2_hash_set_in_snapshot(trans, desc, hash_info, 739 (subvol_inum) { 0, k.k->p.inode }, 740 k.k->p.snapshot, tmp, 741 BCH_HASH_SET_MUST_CREATE, 742 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 743 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 744 } 745 746 static int hash_check_key(struct btree_trans *trans, 747 const struct bch_hash_desc desc, 748 struct bch_hash_info *hash_info, 749 struct btree_iter *k_iter, struct bkey_s_c hash_k) 750 { 751 struct bch_fs *c = trans->c; 752 struct btree_iter iter = { NULL }; 753 struct printbuf buf = PRINTBUF; 754 struct bkey_s_c k; 755 u64 hash; 756 int ret = 0; 757 758 if (hash_k.k->type != desc.key_type) 759 return 0; 760 761 hash = desc.hash_bkey(hash_info, hash_k); 762 763 if (likely(hash == hash_k.k->p.offset)) 764 return 0; 765 766 if (hash_k.k->p.offset < hash) 767 goto bad_hash; 768 769 for_each_btree_key_norestart(trans, iter, desc.btree_id, 770 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), 771 BTREE_ITER_SLOTS, k, ret) { 772 if (bkey_eq(k.k->p, hash_k.k->p)) 773 break; 774 775 if (fsck_err_on(k.k->type == desc.key_type && 776 !desc.cmp_bkey(k, hash_k), c, 777 hash_table_key_duplicate, 778 "duplicate hash table keys:\n%s", 779 (printbuf_reset(&buf), 780 bch2_bkey_val_to_text(&buf, c, hash_k), 781 buf.buf))) { 782 ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; 783 break; 784 } 785 786 if (bkey_deleted(k.k)) { 787 bch2_trans_iter_exit(trans, &iter); 788 goto bad_hash; 789 } 790 } 791 out: 792 bch2_trans_iter_exit(trans, &iter); 793 printbuf_exit(&buf); 794 return ret; 795 bad_hash: 796 if (fsck_err(c, hash_table_key_wrong_offset, 797 "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", 798 bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, 799 (printbuf_reset(&buf), 800 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { 801 ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); 802 bch_err_fn(c, ret); 803 if (ret) 804 return ret; 805 ret = -BCH_ERR_transaction_restart_nested; 806 } 807 fsck_err: 808 goto out; 809 } 810 811 static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, 812 struct btree_iter *iter, 813 struct bpos pos) 814 { 815 return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); 816 } 817 818 static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, 819 struct btree_iter *iter, 820 struct bch_inode_unpacked *inode, 821 u32 *snapshot) 822 { 823 if (inode->bi_subvol) { 824 u64 inum; 825 int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum); 826 if (ret) 827 return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) }); 828 } 829 830 return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); 831 } 832 833 static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, 834 struct bkey_s_c_dirent d) 835 { 836 return inode->bi_dir == d.k->p.inode && 837 inode->bi_dir_offset == d.k->p.offset; 838 } 839 840 static bool dirent_points_to_inode(struct bkey_s_c_dirent d, 841 struct bch_inode_unpacked *inode) 842 { 843 return d.v->d_type == DT_SUBVOL 844 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol 845 : le64_to_cpu(d.v->d_inum) == inode->bi_inum; 846 } 847 848 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) 849 { 850 struct btree_iter iter; 851 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0); 852 int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set; 853 bch2_trans_iter_exit(trans, &iter); 854 return ret; 855 } 856 857 static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k, 858 struct bch_inode_unpacked *inode, 859 u32 inode_snapshot, bool *write_inode) 860 { 861 struct bch_fs *c = trans->c; 862 struct printbuf buf = PRINTBUF; 863 864 struct btree_iter dirent_iter = {}; 865 struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); 866 int ret = bkey_err(d); 867 if (ret && !bch2_err_matches(ret, ENOENT)) 868 return ret; 869 870 if (fsck_err_on(ret, 871 c, inode_points_to_missing_dirent, 872 "inode points to missing dirent\n%s", 873 (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) || 874 fsck_err_on(!ret && !dirent_points_to_inode(d, inode), 875 c, inode_points_to_wrong_dirent, 876 "inode points to dirent that does not point back:\n%s", 877 (bch2_bkey_val_to_text(&buf, c, inode_k), 878 prt_newline(&buf), 879 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 880 /* 881 * We just clear the backpointer fields for now. If we find a 882 * dirent that points to this inode in check_dirents(), we'll 883 * update it then; then when we get to check_path() if the 884 * backpointer is still 0 we'll reattach it. 885 */ 886 inode->bi_dir = 0; 887 inode->bi_dir_offset = 0; 888 inode->bi_flags &= ~BCH_INODE_backptr_untrusted; 889 *write_inode = true; 890 } 891 892 ret = 0; 893 fsck_err: 894 bch2_trans_iter_exit(trans, &dirent_iter); 895 printbuf_exit(&buf); 896 bch_err_fn(c, ret); 897 return ret; 898 } 899 900 static int check_inode(struct btree_trans *trans, 901 struct btree_iter *iter, 902 struct bkey_s_c k, 903 struct bch_inode_unpacked *prev, 904 struct snapshots_seen *s, 905 bool full) 906 { 907 struct bch_fs *c = trans->c; 908 struct bch_inode_unpacked u; 909 bool do_update = false; 910 int ret; 911 912 ret = check_key_has_snapshot(trans, iter, k); 913 if (ret < 0) 914 goto err; 915 if (ret) 916 return 0; 917 918 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 919 if (ret) 920 goto err; 921 922 if (!bkey_is_inode(k.k)) 923 return 0; 924 925 BUG_ON(bch2_inode_unpack(k, &u)); 926 927 if (!full && 928 !(u.bi_flags & (BCH_INODE_i_size_dirty| 929 BCH_INODE_i_sectors_dirty| 930 BCH_INODE_unlinked))) 931 return 0; 932 933 if (prev->bi_inum != u.bi_inum) 934 *prev = u; 935 936 if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || 937 inode_d_type(prev) != inode_d_type(&u), 938 c, inode_snapshot_mismatch, 939 "inodes in different snapshots don't match")) { 940 bch_err(c, "repair not implemented yet"); 941 return -BCH_ERR_fsck_repair_unimplemented; 942 } 943 944 if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && 945 bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { 946 struct bpos new_min_pos; 947 948 ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos); 949 if (ret) 950 goto err; 951 952 u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; 953 954 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 955 956 bch_err_msg(c, ret, "in fsck updating inode"); 957 if (ret) 958 return ret; 959 960 if (!bpos_eq(new_min_pos, POS_MIN)) 961 bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); 962 return 0; 963 } 964 965 if (u.bi_flags & BCH_INODE_unlinked) { 966 ret = check_inode_deleted_list(trans, k.k->p); 967 if (ret < 0) 968 return ret; 969 970 fsck_err_on(!ret, c, unlinked_inode_not_on_deleted_list, 971 "inode %llu:%u unlinked, but not on deleted list", 972 u.bi_inum, k.k->p.snapshot); 973 ret = 0; 974 } 975 976 if (u.bi_flags & BCH_INODE_unlinked && 977 (!c->sb.clean || 978 fsck_err(c, inode_unlinked_but_clean, 979 "filesystem marked clean, but inode %llu unlinked", 980 u.bi_inum))) { 981 ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 982 bch_err_msg(c, ret, "in fsck deleting inode"); 983 return ret; 984 } 985 986 if (u.bi_flags & BCH_INODE_i_size_dirty && 987 (!c->sb.clean || 988 fsck_err(c, inode_i_size_dirty_but_clean, 989 "filesystem marked clean, but inode %llu has i_size dirty", 990 u.bi_inum))) { 991 bch_verbose(c, "truncating inode %llu", u.bi_inum); 992 993 /* 994 * XXX: need to truncate partial blocks too here - or ideally 995 * just switch units to bytes and that issue goes away 996 */ 997 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, 998 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9, 999 iter->pos.snapshot), 1000 POS(u.bi_inum, U64_MAX), 1001 0, NULL); 1002 bch_err_msg(c, ret, "in fsck truncating inode"); 1003 if (ret) 1004 return ret; 1005 1006 /* 1007 * We truncated without our normal sector accounting hook, just 1008 * make sure we recalculate it: 1009 */ 1010 u.bi_flags |= BCH_INODE_i_sectors_dirty; 1011 1012 u.bi_flags &= ~BCH_INODE_i_size_dirty; 1013 do_update = true; 1014 } 1015 1016 if (u.bi_flags & BCH_INODE_i_sectors_dirty && 1017 (!c->sb.clean || 1018 fsck_err(c, inode_i_sectors_dirty_but_clean, 1019 "filesystem marked clean, but inode %llu has i_sectors dirty", 1020 u.bi_inum))) { 1021 s64 sectors; 1022 1023 bch_verbose(c, "recounting sectors for inode %llu", 1024 u.bi_inum); 1025 1026 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); 1027 if (sectors < 0) { 1028 bch_err_msg(c, sectors, "in fsck recounting inode sectors"); 1029 return sectors; 1030 } 1031 1032 u.bi_sectors = sectors; 1033 u.bi_flags &= ~BCH_INODE_i_sectors_dirty; 1034 do_update = true; 1035 } 1036 1037 if (u.bi_flags & BCH_INODE_backptr_untrusted) { 1038 u.bi_dir = 0; 1039 u.bi_dir_offset = 0; 1040 u.bi_flags &= ~BCH_INODE_backptr_untrusted; 1041 do_update = true; 1042 } 1043 1044 if (u.bi_dir || u.bi_dir_offset) { 1045 ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update); 1046 if (ret) 1047 goto err; 1048 } 1049 1050 if (fsck_err_on(u.bi_parent_subvol && 1051 (u.bi_subvol == 0 || 1052 u.bi_subvol == BCACHEFS_ROOT_SUBVOL), 1053 c, inode_bi_parent_nonzero, 1054 "inode %llu:%u has subvol %u but nonzero parent subvol %u", 1055 u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) { 1056 u.bi_parent_subvol = 0; 1057 do_update = true; 1058 } 1059 1060 if (u.bi_subvol) { 1061 struct bch_subvolume s; 1062 1063 ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s); 1064 if (ret && !bch2_err_matches(ret, ENOENT)) 1065 goto err; 1066 1067 if (fsck_err_on(ret, 1068 c, inode_bi_subvol_missing, 1069 "inode %llu:%u bi_subvol points to missing subvolume %u", 1070 u.bi_inum, k.k->p.snapshot, u.bi_subvol) || 1071 fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum || 1072 !bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot), 1073 k.k->p.snapshot), 1074 c, inode_bi_subvol_wrong, 1075 "inode %llu:%u points to subvol %u, but subvol points to %llu:%u", 1076 u.bi_inum, k.k->p.snapshot, u.bi_subvol, 1077 le64_to_cpu(s.inode), 1078 le32_to_cpu(s.snapshot))) { 1079 u.bi_subvol = 0; 1080 u.bi_parent_subvol = 0; 1081 do_update = true; 1082 } 1083 } 1084 1085 if (do_update) { 1086 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1087 bch_err_msg(c, ret, "in fsck updating inode"); 1088 if (ret) 1089 return ret; 1090 } 1091 err: 1092 fsck_err: 1093 bch_err_fn(c, ret); 1094 return ret; 1095 } 1096 1097 int bch2_check_inodes(struct bch_fs *c) 1098 { 1099 bool full = c->opts.fsck; 1100 struct bch_inode_unpacked prev = { 0 }; 1101 struct snapshots_seen s; 1102 1103 snapshots_seen_init(&s); 1104 1105 int ret = bch2_trans_run(c, 1106 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 1107 POS_MIN, 1108 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, 1109 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1110 check_inode(trans, &iter, k, &prev, &s, full))); 1111 1112 snapshots_seen_exit(&s); 1113 bch_err_fn(c, ret); 1114 return ret; 1115 } 1116 1117 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) 1118 { 1119 struct bch_fs *c = trans->c; 1120 u32 restart_count = trans->restart_count; 1121 int ret = 0; 1122 s64 count2; 1123 1124 darray_for_each(w->inodes, i) { 1125 if (i->inode.bi_sectors == i->count) 1126 continue; 1127 1128 count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); 1129 1130 if (w->recalculate_sums) 1131 i->count = count2; 1132 1133 if (i->count != count2) { 1134 bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", 1135 w->last_pos.inode, i->snapshot, i->count, count2); 1136 return -BCH_ERR_internal_fsck_err; 1137 } 1138 1139 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), 1140 c, inode_i_sectors_wrong, 1141 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", 1142 w->last_pos.inode, i->snapshot, 1143 i->inode.bi_sectors, i->count)) { 1144 i->inode.bi_sectors = i->count; 1145 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1146 if (ret) 1147 break; 1148 } 1149 } 1150 fsck_err: 1151 bch_err_fn(c, ret); 1152 return ret ?: trans_was_restarted(trans, restart_count); 1153 } 1154 1155 struct extent_end { 1156 u32 snapshot; 1157 u64 offset; 1158 struct snapshots_seen seen; 1159 }; 1160 1161 struct extent_ends { 1162 struct bpos last_pos; 1163 DARRAY(struct extent_end) e; 1164 }; 1165 1166 static void extent_ends_reset(struct extent_ends *extent_ends) 1167 { 1168 darray_for_each(extent_ends->e, i) 1169 snapshots_seen_exit(&i->seen); 1170 extent_ends->e.nr = 0; 1171 } 1172 1173 static void extent_ends_exit(struct extent_ends *extent_ends) 1174 { 1175 extent_ends_reset(extent_ends); 1176 darray_exit(&extent_ends->e); 1177 } 1178 1179 static void extent_ends_init(struct extent_ends *extent_ends) 1180 { 1181 memset(extent_ends, 0, sizeof(*extent_ends)); 1182 } 1183 1184 static int extent_ends_at(struct bch_fs *c, 1185 struct extent_ends *extent_ends, 1186 struct snapshots_seen *seen, 1187 struct bkey_s_c k) 1188 { 1189 struct extent_end *i, n = (struct extent_end) { 1190 .offset = k.k->p.offset, 1191 .snapshot = k.k->p.snapshot, 1192 .seen = *seen, 1193 }; 1194 1195 n.seen.ids.data = kmemdup(seen->ids.data, 1196 sizeof(seen->ids.data[0]) * seen->ids.size, 1197 GFP_KERNEL); 1198 if (!n.seen.ids.data) 1199 return -BCH_ERR_ENOMEM_fsck_extent_ends_at; 1200 1201 __darray_for_each(extent_ends->e, i) { 1202 if (i->snapshot == k.k->p.snapshot) { 1203 snapshots_seen_exit(&i->seen); 1204 *i = n; 1205 return 0; 1206 } 1207 1208 if (i->snapshot >= k.k->p.snapshot) 1209 break; 1210 } 1211 1212 return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); 1213 } 1214 1215 static int overlapping_extents_found(struct btree_trans *trans, 1216 enum btree_id btree, 1217 struct bpos pos1, struct snapshots_seen *pos1_seen, 1218 struct bkey pos2, 1219 bool *fixed, 1220 struct extent_end *extent_end) 1221 { 1222 struct bch_fs *c = trans->c; 1223 struct printbuf buf = PRINTBUF; 1224 struct btree_iter iter1, iter2 = { NULL }; 1225 struct bkey_s_c k1, k2; 1226 int ret; 1227 1228 BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); 1229 1230 bch2_trans_iter_init(trans, &iter1, btree, pos1, 1231 BTREE_ITER_ALL_SNAPSHOTS| 1232 BTREE_ITER_NOT_EXTENTS); 1233 k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); 1234 ret = bkey_err(k1); 1235 if (ret) 1236 goto err; 1237 1238 prt_str(&buf, "\n "); 1239 bch2_bkey_val_to_text(&buf, c, k1); 1240 1241 if (!bpos_eq(pos1, k1.k->p)) { 1242 prt_str(&buf, "\n wanted\n "); 1243 bch2_bpos_to_text(&buf, pos1); 1244 prt_str(&buf, "\n "); 1245 bch2_bkey_to_text(&buf, &pos2); 1246 1247 bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", 1248 __func__, buf.buf); 1249 ret = -BCH_ERR_internal_fsck_err; 1250 goto err; 1251 } 1252 1253 bch2_trans_copy_iter(&iter2, &iter1); 1254 1255 while (1) { 1256 bch2_btree_iter_advance(&iter2); 1257 1258 k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); 1259 ret = bkey_err(k2); 1260 if (ret) 1261 goto err; 1262 1263 if (bpos_ge(k2.k->p, pos2.p)) 1264 break; 1265 } 1266 1267 prt_str(&buf, "\n "); 1268 bch2_bkey_val_to_text(&buf, c, k2); 1269 1270 if (bpos_gt(k2.k->p, pos2.p) || 1271 pos2.size != k2.k->size) { 1272 bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", 1273 __func__, buf.buf); 1274 ret = -BCH_ERR_internal_fsck_err; 1275 goto err; 1276 } 1277 1278 prt_printf(&buf, "\n overwriting %s extent", 1279 pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); 1280 1281 if (fsck_err(c, extent_overlapping, 1282 "overlapping extents%s", buf.buf)) { 1283 struct btree_iter *old_iter = &iter1; 1284 struct disk_reservation res = { 0 }; 1285 1286 if (pos1.snapshot < pos2.p.snapshot) { 1287 old_iter = &iter2; 1288 swap(k1, k2); 1289 } 1290 1291 trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); 1292 1293 ret = bch2_trans_update_extent_overwrite(trans, old_iter, 1294 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE, 1295 k1, k2) ?: 1296 bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); 1297 bch2_disk_reservation_put(c, &res); 1298 1299 if (ret) 1300 goto err; 1301 1302 *fixed = true; 1303 1304 if (pos1.snapshot == pos2.p.snapshot) { 1305 /* 1306 * We overwrote the first extent, and did the overwrite 1307 * in the same snapshot: 1308 */ 1309 extent_end->offset = bkey_start_offset(&pos2); 1310 } else if (pos1.snapshot > pos2.p.snapshot) { 1311 /* 1312 * We overwrote the first extent in pos2's snapshot: 1313 */ 1314 ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); 1315 } else { 1316 /* 1317 * We overwrote the second extent - restart 1318 * check_extent() from the top: 1319 */ 1320 ret = -BCH_ERR_transaction_restart_nested; 1321 } 1322 } 1323 fsck_err: 1324 err: 1325 bch2_trans_iter_exit(trans, &iter2); 1326 bch2_trans_iter_exit(trans, &iter1); 1327 printbuf_exit(&buf); 1328 return ret; 1329 } 1330 1331 static int check_overlapping_extents(struct btree_trans *trans, 1332 struct snapshots_seen *seen, 1333 struct extent_ends *extent_ends, 1334 struct bkey_s_c k, 1335 u32 equiv, 1336 struct btree_iter *iter, 1337 bool *fixed) 1338 { 1339 struct bch_fs *c = trans->c; 1340 int ret = 0; 1341 1342 /* transaction restart, running again */ 1343 if (bpos_eq(extent_ends->last_pos, k.k->p)) 1344 return 0; 1345 1346 if (extent_ends->last_pos.inode != k.k->p.inode) 1347 extent_ends_reset(extent_ends); 1348 1349 darray_for_each(extent_ends->e, i) { 1350 if (i->offset <= bkey_start_offset(k.k)) 1351 continue; 1352 1353 if (!ref_visible2(c, 1354 k.k->p.snapshot, seen, 1355 i->snapshot, &i->seen)) 1356 continue; 1357 1358 ret = overlapping_extents_found(trans, iter->btree_id, 1359 SPOS(iter->pos.inode, 1360 i->offset, 1361 i->snapshot), 1362 &i->seen, 1363 *k.k, fixed, i); 1364 if (ret) 1365 goto err; 1366 } 1367 1368 ret = extent_ends_at(c, extent_ends, seen, k); 1369 if (ret) 1370 goto err; 1371 1372 extent_ends->last_pos = k.k->p; 1373 err: 1374 return ret; 1375 } 1376 1377 static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, 1378 struct bkey_s_c k) 1379 { 1380 struct bch_fs *c = trans->c; 1381 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 1382 struct bch_extent_crc_unpacked crc; 1383 const union bch_extent_entry *i; 1384 unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; 1385 1386 bkey_for_each_crc(k.k, ptrs, crc, i) 1387 if (crc_is_encoded(crc) && 1388 crc.uncompressed_size > encoded_extent_max_sectors) { 1389 struct printbuf buf = PRINTBUF; 1390 1391 bch2_bkey_val_to_text(&buf, c, k); 1392 bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); 1393 printbuf_exit(&buf); 1394 } 1395 1396 return 0; 1397 } 1398 1399 static int check_extent(struct btree_trans *trans, struct btree_iter *iter, 1400 struct bkey_s_c k, 1401 struct inode_walker *inode, 1402 struct snapshots_seen *s, 1403 struct extent_ends *extent_ends) 1404 { 1405 struct bch_fs *c = trans->c; 1406 struct inode_walker_entry *i; 1407 struct printbuf buf = PRINTBUF; 1408 struct bpos equiv = k.k->p; 1409 int ret = 0; 1410 1411 equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); 1412 1413 ret = check_key_has_snapshot(trans, iter, k); 1414 if (ret) { 1415 ret = ret < 0 ? ret : 0; 1416 goto out; 1417 } 1418 1419 if (inode->last_pos.inode != k.k->p.inode) { 1420 ret = check_i_sectors(trans, inode); 1421 if (ret) 1422 goto err; 1423 } 1424 1425 i = walk_inode(trans, inode, k); 1426 ret = PTR_ERR_OR_ZERO(i); 1427 if (ret) 1428 goto err; 1429 1430 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1431 if (ret) 1432 goto err; 1433 1434 if (k.k->type != KEY_TYPE_whiteout) { 1435 if (fsck_err_on(!i, c, extent_in_missing_inode, 1436 "extent in missing inode:\n %s", 1437 (printbuf_reset(&buf), 1438 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1439 goto delete; 1440 1441 if (fsck_err_on(i && 1442 !S_ISREG(i->inode.bi_mode) && 1443 !S_ISLNK(i->inode.bi_mode), 1444 c, extent_in_non_reg_inode, 1445 "extent in non regular inode mode %o:\n %s", 1446 i->inode.bi_mode, 1447 (printbuf_reset(&buf), 1448 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1449 goto delete; 1450 1451 ret = check_overlapping_extents(trans, s, extent_ends, k, 1452 equiv.snapshot, iter, 1453 &inode->recalculate_sums); 1454 if (ret) 1455 goto err; 1456 } 1457 1458 /* 1459 * Check inodes in reverse order, from oldest snapshots to newest, 1460 * starting from the inode that matches this extent's snapshot. If we 1461 * didn't have one, iterate over all inodes: 1462 */ 1463 if (!i) 1464 i = inode->inodes.data + inode->inodes.nr - 1; 1465 1466 for (; 1467 inode->inodes.data && i >= inode->inodes.data; 1468 --i) { 1469 if (i->snapshot > equiv.snapshot || 1470 !key_visible_in_snapshot(c, s, i->snapshot, equiv.snapshot)) 1471 continue; 1472 1473 if (k.k->type != KEY_TYPE_whiteout) { 1474 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && 1475 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && 1476 !bkey_extent_is_reservation(k), 1477 c, extent_past_end_of_inode, 1478 "extent type past end of inode %llu:%u, i_size %llu\n %s", 1479 i->inode.bi_inum, i->snapshot, i->inode.bi_size, 1480 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1481 struct btree_iter iter2; 1482 1483 bch2_trans_copy_iter(&iter2, iter); 1484 bch2_btree_iter_set_snapshot(&iter2, i->snapshot); 1485 ret = bch2_btree_iter_traverse(&iter2) ?: 1486 bch2_btree_delete_at(trans, &iter2, 1487 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 1488 bch2_trans_iter_exit(trans, &iter2); 1489 if (ret) 1490 goto err; 1491 1492 iter->k.type = KEY_TYPE_whiteout; 1493 } 1494 1495 if (bkey_extent_is_allocation(k.k)) 1496 i->count += k.k->size; 1497 } 1498 1499 i->seen_this_pos = true; 1500 } 1501 out: 1502 err: 1503 fsck_err: 1504 printbuf_exit(&buf); 1505 bch_err_fn(c, ret); 1506 return ret; 1507 delete: 1508 ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 1509 goto out; 1510 } 1511 1512 /* 1513 * Walk extents: verify that extents have a corresponding S_ISREG inode, and 1514 * that i_size an i_sectors are consistent 1515 */ 1516 int bch2_check_extents(struct bch_fs *c) 1517 { 1518 struct inode_walker w = inode_walker_init(); 1519 struct snapshots_seen s; 1520 struct extent_ends extent_ends; 1521 struct disk_reservation res = { 0 }; 1522 1523 snapshots_seen_init(&s); 1524 extent_ends_init(&extent_ends); 1525 1526 int ret = bch2_trans_run(c, 1527 for_each_btree_key_commit(trans, iter, BTREE_ID_extents, 1528 POS(BCACHEFS_ROOT_INO, 0), 1529 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, 1530 &res, NULL, 1531 BCH_TRANS_COMMIT_no_enospc, ({ 1532 bch2_disk_reservation_put(c, &res); 1533 check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: 1534 check_extent_overbig(trans, &iter, k); 1535 })) ?: 1536 check_i_sectors(trans, &w)); 1537 1538 bch2_disk_reservation_put(c, &res); 1539 extent_ends_exit(&extent_ends); 1540 inode_walker_exit(&w); 1541 snapshots_seen_exit(&s); 1542 1543 bch_err_fn(c, ret); 1544 return ret; 1545 } 1546 1547 int bch2_check_indirect_extents(struct bch_fs *c) 1548 { 1549 struct disk_reservation res = { 0 }; 1550 1551 int ret = bch2_trans_run(c, 1552 for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, 1553 POS_MIN, 1554 BTREE_ITER_PREFETCH, k, 1555 &res, NULL, 1556 BCH_TRANS_COMMIT_no_enospc, ({ 1557 bch2_disk_reservation_put(c, &res); 1558 check_extent_overbig(trans, &iter, k); 1559 }))); 1560 1561 bch2_disk_reservation_put(c, &res); 1562 bch_err_fn(c, ret); 1563 return ret; 1564 } 1565 1566 static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) 1567 { 1568 struct bch_fs *c = trans->c; 1569 u32 restart_count = trans->restart_count; 1570 int ret = 0; 1571 s64 count2; 1572 1573 darray_for_each(w->inodes, i) { 1574 if (i->inode.bi_nlink == i->count) 1575 continue; 1576 1577 count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); 1578 if (count2 < 0) 1579 return count2; 1580 1581 if (i->count != count2) { 1582 bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu", 1583 i->count, count2); 1584 i->count = count2; 1585 if (i->inode.bi_nlink == i->count) 1586 continue; 1587 } 1588 1589 if (fsck_err_on(i->inode.bi_nlink != i->count, 1590 c, inode_dir_wrong_nlink, 1591 "directory %llu:%u with wrong i_nlink: got %u, should be %llu", 1592 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { 1593 i->inode.bi_nlink = i->count; 1594 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1595 if (ret) 1596 break; 1597 } 1598 } 1599 fsck_err: 1600 bch_err_fn(c, ret); 1601 return ret ?: trans_was_restarted(trans, restart_count); 1602 } 1603 1604 static int check_dirent_inode_dirent(struct btree_trans *trans, 1605 struct btree_iter *iter, 1606 struct bkey_s_c_dirent d, 1607 struct bch_inode_unpacked *target, 1608 u32 target_snapshot) 1609 { 1610 struct bch_fs *c = trans->c; 1611 struct printbuf buf = PRINTBUF; 1612 int ret = 0; 1613 1614 if (inode_points_to_dirent(target, d)) 1615 return 0; 1616 1617 if (!target->bi_dir && 1618 !target->bi_dir_offset) { 1619 target->bi_dir = d.k->p.inode; 1620 target->bi_dir_offset = d.k->p.offset; 1621 return __bch2_fsck_write_inode(trans, target, target_snapshot); 1622 } 1623 1624 struct btree_iter bp_iter = { NULL }; 1625 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, 1626 SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); 1627 ret = bkey_err(bp_dirent); 1628 if (ret && !bch2_err_matches(ret, ENOENT)) 1629 goto err; 1630 1631 bool backpointer_exists = !ret; 1632 ret = 0; 1633 1634 if (fsck_err_on(!backpointer_exists, 1635 c, inode_wrong_backpointer, 1636 "inode %llu:%u has wrong backpointer:\n" 1637 "got %llu:%llu\n" 1638 "should be %llu:%llu", 1639 target->bi_inum, target_snapshot, 1640 target->bi_dir, 1641 target->bi_dir_offset, 1642 d.k->p.inode, 1643 d.k->p.offset)) { 1644 target->bi_dir = d.k->p.inode; 1645 target->bi_dir_offset = d.k->p.offset; 1646 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1647 goto out; 1648 } 1649 1650 bch2_bkey_val_to_text(&buf, c, d.s_c); 1651 prt_newline(&buf); 1652 if (backpointer_exists) 1653 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c); 1654 1655 if (fsck_err_on(backpointer_exists && 1656 (S_ISDIR(target->bi_mode) || 1657 target->bi_subvol), 1658 c, inode_dir_multiple_links, 1659 "%s %llu:%u with multiple links\n%s", 1660 S_ISDIR(target->bi_mode) ? "directory" : "subvolume", 1661 target->bi_inum, target_snapshot, buf.buf)) { 1662 ret = __remove_dirent(trans, d.k->p); 1663 goto out; 1664 } 1665 1666 /* 1667 * hardlinked file with nlink 0: 1668 * We're just adjusting nlink here so check_nlinks() will pick 1669 * it up, it ignores inodes with nlink 0 1670 */ 1671 if (fsck_err_on(backpointer_exists && !target->bi_nlink, 1672 c, inode_multiple_links_but_nlink_0, 1673 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", 1674 target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { 1675 target->bi_nlink++; 1676 target->bi_flags &= ~BCH_INODE_unlinked; 1677 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1678 if (ret) 1679 goto err; 1680 } 1681 out: 1682 err: 1683 fsck_err: 1684 bch2_trans_iter_exit(trans, &bp_iter); 1685 printbuf_exit(&buf); 1686 bch_err_fn(c, ret); 1687 return ret; 1688 } 1689 1690 static int check_dirent_target(struct btree_trans *trans, 1691 struct btree_iter *iter, 1692 struct bkey_s_c_dirent d, 1693 struct bch_inode_unpacked *target, 1694 u32 target_snapshot) 1695 { 1696 struct bch_fs *c = trans->c; 1697 struct bkey_i_dirent *n; 1698 struct printbuf buf = PRINTBUF; 1699 int ret = 0; 1700 1701 ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); 1702 if (ret) 1703 goto err; 1704 1705 if (fsck_err_on(d.v->d_type != inode_d_type(target), 1706 c, dirent_d_type_wrong, 1707 "incorrect d_type: got %s, should be %s:\n%s", 1708 bch2_d_type_str(d.v->d_type), 1709 bch2_d_type_str(inode_d_type(target)), 1710 (printbuf_reset(&buf), 1711 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1712 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); 1713 ret = PTR_ERR_OR_ZERO(n); 1714 if (ret) 1715 goto err; 1716 1717 bkey_reassemble(&n->k_i, d.s_c); 1718 n->v.d_type = inode_d_type(target); 1719 if (n->v.d_type == DT_SUBVOL) { 1720 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); 1721 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol); 1722 } else { 1723 n->v.d_inum = cpu_to_le64(target->bi_inum); 1724 } 1725 1726 ret = bch2_trans_update(trans, iter, &n->k_i, 0); 1727 if (ret) 1728 goto err; 1729 1730 d = dirent_i_to_s_c(n); 1731 } 1732 err: 1733 fsck_err: 1734 printbuf_exit(&buf); 1735 bch_err_fn(c, ret); 1736 return ret; 1737 } 1738 1739 /* find a subvolume that's a descendent of @snapshot: */ 1740 static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid) 1741 { 1742 struct btree_iter iter; 1743 struct bkey_s_c k; 1744 int ret; 1745 1746 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { 1747 if (k.k->type != KEY_TYPE_subvolume) 1748 continue; 1749 1750 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 1751 if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) { 1752 bch2_trans_iter_exit(trans, &iter); 1753 *subvolid = k.k->p.offset; 1754 goto found; 1755 } 1756 } 1757 if (!ret) 1758 ret = -ENOENT; 1759 found: 1760 bch2_trans_iter_exit(trans, &iter); 1761 return ret; 1762 } 1763 1764 static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter, 1765 struct bkey_s_c_dirent d) 1766 { 1767 struct bch_fs *c = trans->c; 1768 struct btree_iter subvol_iter = {}; 1769 struct bch_inode_unpacked subvol_root; 1770 u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); 1771 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); 1772 u32 parent_snapshot; 1773 u64 parent_inum; 1774 struct printbuf buf = PRINTBUF; 1775 int ret = 0; 1776 1777 ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); 1778 if (ret && !bch2_err_matches(ret, ENOENT)) 1779 return ret; 1780 1781 if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol, 1782 "dirent parent_subvol points to missing subvolume\n%s", 1783 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || 1784 fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot), 1785 c, dirent_not_visible_in_parent_subvol, 1786 "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", 1787 parent_snapshot, 1788 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1789 u32 new_parent_subvol; 1790 ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); 1791 if (ret) 1792 goto err; 1793 1794 struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); 1795 ret = PTR_ERR_OR_ZERO(new_dirent); 1796 if (ret) 1797 goto err; 1798 1799 new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol); 1800 } 1801 1802 struct bkey_s_c_subvolume s = 1803 bch2_bkey_get_iter_typed(trans, &subvol_iter, 1804 BTREE_ID_subvolumes, POS(0, target_subvol), 1805 0, subvolume); 1806 ret = bkey_err(s.s_c); 1807 if (ret && !bch2_err_matches(ret, ENOENT)) 1808 return ret; 1809 1810 if (ret) { 1811 if (fsck_err(c, dirent_to_missing_subvol, 1812 "dirent points to missing subvolume\n%s", 1813 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) 1814 return __remove_dirent(trans, d.k->p); 1815 ret = 0; 1816 goto out; 1817 } 1818 1819 if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol, 1820 c, subvol_fs_path_parent_wrong, 1821 "subvol with wrong fs_path_parent, should be be %u\n%s", 1822 parent_subvol, 1823 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { 1824 struct bkey_i_subvolume *n = 1825 bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume); 1826 ret = PTR_ERR_OR_ZERO(n); 1827 if (ret) 1828 goto err; 1829 1830 n->v.fs_path_parent = cpu_to_le32(parent_subvol); 1831 } 1832 1833 u64 target_inum = le64_to_cpu(s.v->inode); 1834 u32 target_snapshot = le32_to_cpu(s.v->snapshot); 1835 1836 ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); 1837 if (ret && !bch2_err_matches(ret, ENOENT)) 1838 return ret; 1839 1840 if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol, 1841 c, inode_bi_parent_wrong, 1842 "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", 1843 target_inum, 1844 subvol_root.bi_parent_subvol, parent_subvol)) { 1845 subvol_root.bi_parent_subvol = parent_subvol; 1846 ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); 1847 if (ret) 1848 return ret; 1849 } 1850 1851 ret = check_dirent_target(trans, iter, d, &subvol_root, 1852 target_snapshot); 1853 if (ret) 1854 return ret; 1855 out: 1856 err: 1857 fsck_err: 1858 bch2_trans_iter_exit(trans, &subvol_iter); 1859 printbuf_exit(&buf); 1860 return ret; 1861 } 1862 1863 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, 1864 struct bkey_s_c k, 1865 struct bch_hash_info *hash_info, 1866 struct inode_walker *dir, 1867 struct inode_walker *target, 1868 struct snapshots_seen *s) 1869 { 1870 struct bch_fs *c = trans->c; 1871 struct bkey_s_c_dirent d; 1872 struct inode_walker_entry *i; 1873 struct printbuf buf = PRINTBUF; 1874 struct bpos equiv; 1875 int ret = 0; 1876 1877 ret = check_key_has_snapshot(trans, iter, k); 1878 if (ret) { 1879 ret = ret < 0 ? ret : 0; 1880 goto out; 1881 } 1882 1883 equiv = k.k->p; 1884 equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); 1885 1886 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1887 if (ret) 1888 goto err; 1889 1890 if (k.k->type == KEY_TYPE_whiteout) 1891 goto out; 1892 1893 if (dir->last_pos.inode != k.k->p.inode) { 1894 ret = check_subdir_count(trans, dir); 1895 if (ret) 1896 goto err; 1897 } 1898 1899 BUG_ON(!btree_iter_path(trans, iter)->should_be_locked); 1900 1901 i = walk_inode(trans, dir, k); 1902 ret = PTR_ERR_OR_ZERO(i); 1903 if (ret < 0) 1904 goto err; 1905 1906 if (dir->first_this_inode && dir->inodes.nr) 1907 *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); 1908 dir->first_this_inode = false; 1909 1910 if (fsck_err_on(!i, c, dirent_in_missing_dir_inode, 1911 "dirent in nonexisting directory:\n%s", 1912 (printbuf_reset(&buf), 1913 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1914 ret = bch2_btree_delete_at(trans, iter, 1915 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 1916 goto out; 1917 } 1918 1919 if (!i) 1920 goto out; 1921 1922 if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), 1923 c, dirent_in_non_dir_inode, 1924 "dirent in non directory inode type %s:\n%s", 1925 bch2_d_type_str(inode_d_type(&i->inode)), 1926 (printbuf_reset(&buf), 1927 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1928 ret = bch2_btree_delete_at(trans, iter, 0); 1929 goto out; 1930 } 1931 1932 ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); 1933 if (ret < 0) 1934 goto err; 1935 if (ret) { 1936 /* dirent has been deleted */ 1937 ret = 0; 1938 goto out; 1939 } 1940 1941 if (k.k->type != KEY_TYPE_dirent) 1942 goto out; 1943 1944 d = bkey_s_c_to_dirent(k); 1945 1946 if (d.v->d_type == DT_SUBVOL) { 1947 ret = check_dirent_to_subvol(trans, iter, d); 1948 if (ret) 1949 goto err; 1950 } else { 1951 ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); 1952 if (ret) 1953 goto err; 1954 1955 if (fsck_err_on(!target->inodes.nr, 1956 c, dirent_to_missing_inode, 1957 "dirent points to missing inode: (equiv %u)\n%s", 1958 equiv.snapshot, 1959 (printbuf_reset(&buf), 1960 bch2_bkey_val_to_text(&buf, c, k), 1961 buf.buf))) { 1962 ret = __remove_dirent(trans, d.k->p); 1963 if (ret) 1964 goto err; 1965 } 1966 1967 darray_for_each(target->inodes, i) { 1968 ret = check_dirent_target(trans, iter, d, 1969 &i->inode, i->snapshot); 1970 if (ret) 1971 goto err; 1972 } 1973 1974 if (d.v->d_type == DT_DIR) 1975 for_each_visible_inode(c, s, dir, equiv.snapshot, i) 1976 i->count++; 1977 } 1978 out: 1979 err: 1980 fsck_err: 1981 printbuf_exit(&buf); 1982 bch_err_fn(c, ret); 1983 return ret; 1984 } 1985 1986 /* 1987 * Walk dirents: verify that they all have a corresponding S_ISDIR inode, 1988 * validate d_type 1989 */ 1990 int bch2_check_dirents(struct bch_fs *c) 1991 { 1992 struct inode_walker dir = inode_walker_init(); 1993 struct inode_walker target = inode_walker_init(); 1994 struct snapshots_seen s; 1995 struct bch_hash_info hash_info; 1996 1997 snapshots_seen_init(&s); 1998 1999 int ret = bch2_trans_run(c, 2000 for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, 2001 POS(BCACHEFS_ROOT_INO, 0), 2002 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, 2003 k, 2004 NULL, NULL, 2005 BCH_TRANS_COMMIT_no_enospc, 2006 check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s))); 2007 2008 snapshots_seen_exit(&s); 2009 inode_walker_exit(&dir); 2010 inode_walker_exit(&target); 2011 bch_err_fn(c, ret); 2012 return ret; 2013 } 2014 2015 static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, 2016 struct bkey_s_c k, 2017 struct bch_hash_info *hash_info, 2018 struct inode_walker *inode) 2019 { 2020 struct bch_fs *c = trans->c; 2021 struct inode_walker_entry *i; 2022 int ret; 2023 2024 ret = check_key_has_snapshot(trans, iter, k); 2025 if (ret) 2026 return ret; 2027 2028 i = walk_inode(trans, inode, k); 2029 ret = PTR_ERR_OR_ZERO(i); 2030 if (ret) 2031 return ret; 2032 2033 if (inode->first_this_inode && inode->inodes.nr) 2034 *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode); 2035 inode->first_this_inode = false; 2036 2037 if (fsck_err_on(!i, c, xattr_in_missing_inode, 2038 "xattr for missing inode %llu", 2039 k.k->p.inode)) 2040 return bch2_btree_delete_at(trans, iter, 0); 2041 2042 if (!i) 2043 return 0; 2044 2045 ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); 2046 fsck_err: 2047 bch_err_fn(c, ret); 2048 return ret; 2049 } 2050 2051 /* 2052 * Walk xattrs: verify that they all have a corresponding inode 2053 */ 2054 int bch2_check_xattrs(struct bch_fs *c) 2055 { 2056 struct inode_walker inode = inode_walker_init(); 2057 struct bch_hash_info hash_info; 2058 int ret = 0; 2059 2060 ret = bch2_trans_run(c, 2061 for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, 2062 POS(BCACHEFS_ROOT_INO, 0), 2063 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, 2064 k, 2065 NULL, NULL, 2066 BCH_TRANS_COMMIT_no_enospc, 2067 check_xattr(trans, &iter, k, &hash_info, &inode))); 2068 bch_err_fn(c, ret); 2069 return ret; 2070 } 2071 2072 static int check_root_trans(struct btree_trans *trans) 2073 { 2074 struct bch_fs *c = trans->c; 2075 struct bch_inode_unpacked root_inode; 2076 u32 snapshot; 2077 u64 inum; 2078 int ret; 2079 2080 ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); 2081 if (ret && !bch2_err_matches(ret, ENOENT)) 2082 return ret; 2083 2084 if (mustfix_fsck_err_on(ret, c, root_subvol_missing, 2085 "root subvol missing")) { 2086 struct bkey_i_subvolume root_subvol; 2087 2088 snapshot = U32_MAX; 2089 inum = BCACHEFS_ROOT_INO; 2090 2091 bkey_subvolume_init(&root_subvol.k_i); 2092 root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL; 2093 root_subvol.v.flags = 0; 2094 root_subvol.v.snapshot = cpu_to_le32(snapshot); 2095 root_subvol.v.inode = cpu_to_le64(inum); 2096 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0); 2097 bch_err_msg(c, ret, "writing root subvol"); 2098 if (ret) 2099 goto err; 2100 } 2101 2102 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); 2103 if (ret && !bch2_err_matches(ret, ENOENT)) 2104 return ret; 2105 2106 if (mustfix_fsck_err_on(ret, c, root_dir_missing, 2107 "root directory missing") || 2108 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), 2109 c, root_inode_not_dir, 2110 "root inode not a directory")) { 2111 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 2112 0, NULL); 2113 root_inode.bi_inum = inum; 2114 2115 ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); 2116 bch_err_msg(c, ret, "writing root inode"); 2117 } 2118 err: 2119 fsck_err: 2120 return ret; 2121 } 2122 2123 /* Get root directory, create if it doesn't exist: */ 2124 int bch2_check_root(struct bch_fs *c) 2125 { 2126 int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2127 check_root_trans(trans)); 2128 bch_err_fn(c, ret); 2129 return ret; 2130 } 2131 2132 typedef DARRAY(u32) darray_u32; 2133 2134 static bool darray_u32_has(darray_u32 *d, u32 v) 2135 { 2136 darray_for_each(*d, i) 2137 if (*i == v) 2138 return true; 2139 return false; 2140 } 2141 2142 /* 2143 * We've checked that inode backpointers point to valid dirents; here, it's 2144 * sufficient to check that the subvolume root has a dirent: 2145 */ 2146 static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s) 2147 { 2148 struct bch_inode_unpacked inode; 2149 int ret = bch2_inode_find_by_inum_trans(trans, 2150 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 2151 &inode); 2152 if (ret) 2153 return ret; 2154 2155 return inode.bi_dir != 0; 2156 } 2157 2158 static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) 2159 { 2160 struct bch_fs *c = trans->c; 2161 struct btree_iter parent_iter = {}; 2162 darray_u32 subvol_path = {}; 2163 struct printbuf buf = PRINTBUF; 2164 int ret = 0; 2165 2166 if (k.k->type != KEY_TYPE_subvolume) 2167 return 0; 2168 2169 while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { 2170 ret = darray_push(&subvol_path, k.k->p.offset); 2171 if (ret) 2172 goto err; 2173 2174 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 2175 2176 ret = subvol_has_dirent(trans, s); 2177 if (ret < 0) 2178 break; 2179 2180 if (fsck_err_on(!ret, 2181 c, subvol_unreachable, 2182 "unreachable subvolume %s", 2183 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2184 buf.buf))) { 2185 ret = reattach_subvol(trans, s); 2186 break; 2187 } 2188 2189 u32 parent = le32_to_cpu(s.v->fs_path_parent); 2190 2191 if (darray_u32_has(&subvol_path, parent)) { 2192 if (fsck_err(c, subvol_loop, "subvolume loop")) 2193 ret = reattach_subvol(trans, s); 2194 break; 2195 } 2196 2197 bch2_trans_iter_exit(trans, &parent_iter); 2198 bch2_trans_iter_init(trans, &parent_iter, 2199 BTREE_ID_subvolumes, POS(0, parent), 0); 2200 k = bch2_btree_iter_peek_slot(&parent_iter); 2201 ret = bkey_err(k); 2202 if (ret) 2203 goto err; 2204 2205 if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, 2206 c, subvol_unreachable, 2207 "unreachable subvolume %s", 2208 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2209 buf.buf))) { 2210 ret = reattach_subvol(trans, s); 2211 break; 2212 } 2213 } 2214 fsck_err: 2215 err: 2216 printbuf_exit(&buf); 2217 darray_exit(&subvol_path); 2218 bch2_trans_iter_exit(trans, &parent_iter); 2219 return ret; 2220 } 2221 2222 int bch2_check_subvolume_structure(struct bch_fs *c) 2223 { 2224 int ret = bch2_trans_run(c, 2225 for_each_btree_key_commit(trans, iter, 2226 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, 2227 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2228 check_subvol_path(trans, &iter, k))); 2229 bch_err_fn(c, ret); 2230 return ret; 2231 } 2232 2233 struct pathbuf_entry { 2234 u64 inum; 2235 u32 snapshot; 2236 }; 2237 2238 typedef DARRAY(struct pathbuf_entry) pathbuf; 2239 2240 static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) 2241 { 2242 darray_for_each(*p, i) 2243 if (i->inum == inum && 2244 i->snapshot == snapshot) 2245 return true; 2246 return false; 2247 } 2248 2249 /* 2250 * Check that a given inode is reachable from its subvolume root - we already 2251 * verified subvolume connectivity: 2252 * 2253 * XXX: we should also be verifying that inodes are in the right subvolumes 2254 */ 2255 static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) 2256 { 2257 struct bch_fs *c = trans->c; 2258 struct btree_iter inode_iter = {}; 2259 struct bch_inode_unpacked inode; 2260 struct printbuf buf = PRINTBUF; 2261 u32 snapshot = bch2_snapshot_equiv(c, inode_k.k->p.snapshot); 2262 int ret = 0; 2263 2264 p->nr = 0; 2265 2266 BUG_ON(bch2_inode_unpack(inode_k, &inode)); 2267 2268 while (!inode.bi_subvol) { 2269 struct btree_iter dirent_iter; 2270 struct bkey_s_c_dirent d; 2271 u32 parent_snapshot = snapshot; 2272 2273 d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); 2274 ret = bkey_err(d.s_c); 2275 if (ret && !bch2_err_matches(ret, ENOENT)) 2276 break; 2277 2278 if (!ret && !dirent_points_to_inode(d, &inode)) { 2279 bch2_trans_iter_exit(trans, &dirent_iter); 2280 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 2281 } 2282 2283 if (bch2_err_matches(ret, ENOENT)) { 2284 ret = 0; 2285 if (fsck_err(c, inode_unreachable, 2286 "unreachable inode\n%s", 2287 (printbuf_reset(&buf), 2288 bch2_bkey_val_to_text(&buf, c, inode_k), 2289 buf.buf))) 2290 ret = reattach_inode(trans, &inode, snapshot); 2291 goto out; 2292 } 2293 2294 bch2_trans_iter_exit(trans, &dirent_iter); 2295 2296 if (!S_ISDIR(inode.bi_mode)) 2297 break; 2298 2299 ret = darray_push(p, ((struct pathbuf_entry) { 2300 .inum = inode.bi_inum, 2301 .snapshot = snapshot, 2302 })); 2303 if (ret) 2304 return ret; 2305 2306 snapshot = parent_snapshot; 2307 2308 bch2_trans_iter_exit(trans, &inode_iter); 2309 inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, 2310 SPOS(0, inode.bi_dir, snapshot), 0); 2311 ret = bkey_err(inode_k) ?: 2312 !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode 2313 : bch2_inode_unpack(inode_k, &inode); 2314 if (ret) { 2315 /* Should have been caught in dirents pass */ 2316 if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2317 bch_err(c, "error looking up parent directory: %i", ret); 2318 break; 2319 } 2320 2321 snapshot = inode_k.k->p.snapshot; 2322 2323 if (path_is_dup(p, inode.bi_inum, snapshot)) { 2324 /* XXX print path */ 2325 bch_err(c, "directory structure loop"); 2326 2327 darray_for_each(*p, i) 2328 pr_err("%llu:%u", i->inum, i->snapshot); 2329 pr_err("%llu:%u", inode.bi_inum, snapshot); 2330 2331 if (fsck_err(c, dir_loop, "directory structure loop")) { 2332 ret = remove_backpointer(trans, &inode); 2333 bch_err_msg(c, ret, "removing dirent"); 2334 if (ret) 2335 break; 2336 2337 ret = reattach_inode(trans, &inode, snapshot); 2338 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 2339 } 2340 break; 2341 } 2342 } 2343 out: 2344 fsck_err: 2345 bch2_trans_iter_exit(trans, &inode_iter); 2346 printbuf_exit(&buf); 2347 bch_err_fn(c, ret); 2348 return ret; 2349 } 2350 2351 /* 2352 * Check for unreachable inodes, as well as loops in the directory structure: 2353 * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's 2354 * unreachable: 2355 */ 2356 int bch2_check_directory_structure(struct bch_fs *c) 2357 { 2358 pathbuf path = { 0, }; 2359 int ret; 2360 2361 ret = bch2_trans_run(c, 2362 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, 2363 BTREE_ITER_INTENT| 2364 BTREE_ITER_PREFETCH| 2365 BTREE_ITER_ALL_SNAPSHOTS, k, 2366 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 2367 if (!bkey_is_inode(k.k)) 2368 continue; 2369 2370 if (bch2_inode_flags(k) & BCH_INODE_unlinked) 2371 continue; 2372 2373 check_path(trans, &path, k); 2374 }))); 2375 darray_exit(&path); 2376 2377 bch_err_fn(c, ret); 2378 return ret; 2379 } 2380 2381 struct nlink_table { 2382 size_t nr; 2383 size_t size; 2384 2385 struct nlink { 2386 u64 inum; 2387 u32 snapshot; 2388 u32 count; 2389 } *d; 2390 }; 2391 2392 static int add_nlink(struct bch_fs *c, struct nlink_table *t, 2393 u64 inum, u32 snapshot) 2394 { 2395 if (t->nr == t->size) { 2396 size_t new_size = max_t(size_t, 128UL, t->size * 2); 2397 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); 2398 2399 if (!d) { 2400 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", 2401 new_size); 2402 return -BCH_ERR_ENOMEM_fsck_add_nlink; 2403 } 2404 2405 if (t->d) 2406 memcpy(d, t->d, t->size * sizeof(t->d[0])); 2407 kvfree(t->d); 2408 2409 t->d = d; 2410 t->size = new_size; 2411 } 2412 2413 2414 t->d[t->nr++] = (struct nlink) { 2415 .inum = inum, 2416 .snapshot = snapshot, 2417 }; 2418 2419 return 0; 2420 } 2421 2422 static int nlink_cmp(const void *_l, const void *_r) 2423 { 2424 const struct nlink *l = _l; 2425 const struct nlink *r = _r; 2426 2427 return cmp_int(l->inum, r->inum); 2428 } 2429 2430 static void inc_link(struct bch_fs *c, struct snapshots_seen *s, 2431 struct nlink_table *links, 2432 u64 range_start, u64 range_end, u64 inum, u32 snapshot) 2433 { 2434 struct nlink *link, key = { 2435 .inum = inum, .snapshot = U32_MAX, 2436 }; 2437 2438 if (inum < range_start || inum >= range_end) 2439 return; 2440 2441 link = __inline_bsearch(&key, links->d, links->nr, 2442 sizeof(links->d[0]), nlink_cmp); 2443 if (!link) 2444 return; 2445 2446 while (link > links->d && link[0].inum == link[-1].inum) 2447 --link; 2448 2449 for (; link < links->d + links->nr && link->inum == inum; link++) 2450 if (ref_visible(c, s, snapshot, link->snapshot)) { 2451 link->count++; 2452 if (link->snapshot >= snapshot) 2453 break; 2454 } 2455 } 2456 2457 noinline_for_stack 2458 static int check_nlinks_find_hardlinks(struct bch_fs *c, 2459 struct nlink_table *t, 2460 u64 start, u64 *end) 2461 { 2462 int ret = bch2_trans_run(c, 2463 for_each_btree_key(trans, iter, BTREE_ID_inodes, 2464 POS(0, start), 2465 BTREE_ITER_INTENT| 2466 BTREE_ITER_PREFETCH| 2467 BTREE_ITER_ALL_SNAPSHOTS, k, ({ 2468 if (!bkey_is_inode(k.k)) 2469 continue; 2470 2471 /* Should never fail, checked by bch2_inode_invalid: */ 2472 struct bch_inode_unpacked u; 2473 BUG_ON(bch2_inode_unpack(k, &u)); 2474 2475 /* 2476 * Backpointer and directory structure checks are sufficient for 2477 * directories, since they can't have hardlinks: 2478 */ 2479 if (S_ISDIR(u.bi_mode)) 2480 continue; 2481 2482 if (!u.bi_nlink) 2483 continue; 2484 2485 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); 2486 if (ret) { 2487 *end = k.k->p.offset; 2488 ret = 0; 2489 break; 2490 } 2491 0; 2492 }))); 2493 2494 bch_err_fn(c, ret); 2495 return ret; 2496 } 2497 2498 noinline_for_stack 2499 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, 2500 u64 range_start, u64 range_end) 2501 { 2502 struct snapshots_seen s; 2503 2504 snapshots_seen_init(&s); 2505 2506 int ret = bch2_trans_run(c, 2507 for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, 2508 BTREE_ITER_INTENT| 2509 BTREE_ITER_PREFETCH| 2510 BTREE_ITER_ALL_SNAPSHOTS, k, ({ 2511 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); 2512 if (ret) 2513 break; 2514 2515 if (k.k->type == KEY_TYPE_dirent) { 2516 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2517 2518 if (d.v->d_type != DT_DIR && 2519 d.v->d_type != DT_SUBVOL) 2520 inc_link(c, &s, links, range_start, range_end, 2521 le64_to_cpu(d.v->d_inum), 2522 bch2_snapshot_equiv(c, d.k->p.snapshot)); 2523 } 2524 0; 2525 }))); 2526 2527 snapshots_seen_exit(&s); 2528 2529 bch_err_fn(c, ret); 2530 return ret; 2531 } 2532 2533 static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, 2534 struct bkey_s_c k, 2535 struct nlink_table *links, 2536 size_t *idx, u64 range_end) 2537 { 2538 struct bch_fs *c = trans->c; 2539 struct bch_inode_unpacked u; 2540 struct nlink *link = &links->d[*idx]; 2541 int ret = 0; 2542 2543 if (k.k->p.offset >= range_end) 2544 return 1; 2545 2546 if (!bkey_is_inode(k.k)) 2547 return 0; 2548 2549 BUG_ON(bch2_inode_unpack(k, &u)); 2550 2551 if (S_ISDIR(u.bi_mode)) 2552 return 0; 2553 2554 if (!u.bi_nlink) 2555 return 0; 2556 2557 while ((cmp_int(link->inum, k.k->p.offset) ?: 2558 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { 2559 BUG_ON(*idx == links->nr); 2560 link = &links->d[++*idx]; 2561 } 2562 2563 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, 2564 c, inode_wrong_nlink, 2565 "inode %llu type %s has wrong i_nlink (%u, should be %u)", 2566 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], 2567 bch2_inode_nlink_get(&u), link->count)) { 2568 bch2_inode_nlink_set(&u, link->count); 2569 ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); 2570 } 2571 fsck_err: 2572 return ret; 2573 } 2574 2575 noinline_for_stack 2576 static int check_nlinks_update_hardlinks(struct bch_fs *c, 2577 struct nlink_table *links, 2578 u64 range_start, u64 range_end) 2579 { 2580 size_t idx = 0; 2581 2582 int ret = bch2_trans_run(c, 2583 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 2584 POS(0, range_start), 2585 BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, 2586 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2587 check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); 2588 if (ret < 0) { 2589 bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); 2590 return ret; 2591 } 2592 2593 return 0; 2594 } 2595 2596 int bch2_check_nlinks(struct bch_fs *c) 2597 { 2598 struct nlink_table links = { 0 }; 2599 u64 this_iter_range_start, next_iter_range_start = 0; 2600 int ret = 0; 2601 2602 do { 2603 this_iter_range_start = next_iter_range_start; 2604 next_iter_range_start = U64_MAX; 2605 2606 ret = check_nlinks_find_hardlinks(c, &links, 2607 this_iter_range_start, 2608 &next_iter_range_start); 2609 2610 ret = check_nlinks_walk_dirents(c, &links, 2611 this_iter_range_start, 2612 next_iter_range_start); 2613 if (ret) 2614 break; 2615 2616 ret = check_nlinks_update_hardlinks(c, &links, 2617 this_iter_range_start, 2618 next_iter_range_start); 2619 if (ret) 2620 break; 2621 2622 links.nr = 0; 2623 } while (next_iter_range_start != U64_MAX); 2624 2625 kvfree(links.d); 2626 bch_err_fn(c, ret); 2627 return ret; 2628 } 2629 2630 static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, 2631 struct bkey_s_c k) 2632 { 2633 struct bkey_s_c_reflink_p p; 2634 struct bkey_i_reflink_p *u; 2635 2636 if (k.k->type != KEY_TYPE_reflink_p) 2637 return 0; 2638 2639 p = bkey_s_c_to_reflink_p(k); 2640 2641 if (!p.v->front_pad && !p.v->back_pad) 2642 return 0; 2643 2644 u = bch2_trans_kmalloc(trans, sizeof(*u)); 2645 int ret = PTR_ERR_OR_ZERO(u); 2646 if (ret) 2647 return ret; 2648 2649 bkey_reassemble(&u->k_i, k); 2650 u->v.front_pad = 0; 2651 u->v.back_pad = 0; 2652 2653 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN); 2654 } 2655 2656 int bch2_fix_reflink_p(struct bch_fs *c) 2657 { 2658 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) 2659 return 0; 2660 2661 int ret = bch2_trans_run(c, 2662 for_each_btree_key_commit(trans, iter, 2663 BTREE_ID_extents, POS_MIN, 2664 BTREE_ITER_INTENT|BTREE_ITER_PREFETCH| 2665 BTREE_ITER_ALL_SNAPSHOTS, k, 2666 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2667 fix_reflink_p_key(trans, &iter, k))); 2668 bch_err_fn(c, ret); 2669 return ret; 2670 } 2671