1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bkey_buf.h" 5 #include "btree_cache.h" 6 #include "btree_update.h" 7 #include "buckets.h" 8 #include "darray.h" 9 #include "dirent.h" 10 #include "error.h" 11 #include "fs-common.h" 12 #include "fsck.h" 13 #include "inode.h" 14 #include "keylist.h" 15 #include "recovery_passes.h" 16 #include "snapshot.h" 17 #include "super.h" 18 #include "xattr.h" 19 20 #include <linux/bsearch.h> 21 #include <linux/dcache.h> /* struct qstr */ 22 23 /* 24 * XXX: this is handling transaction restarts without returning 25 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: 26 */ 27 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, 28 u32 snapshot) 29 { 30 u64 sectors = 0; 31 32 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, 33 SPOS(inum, 0, snapshot), 34 POS(inum, U64_MAX), 35 0, k, ({ 36 if (bkey_extent_is_allocation(k.k)) 37 sectors += k.k->size; 38 0; 39 })); 40 41 return ret ?: sectors; 42 } 43 44 static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, 45 u32 snapshot) 46 { 47 u64 subdirs = 0; 48 49 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, 50 SPOS(inum, 0, snapshot), 51 POS(inum, U64_MAX), 52 0, k, ({ 53 if (k.k->type == KEY_TYPE_dirent && 54 bkey_s_c_to_dirent(k).v->d_type == DT_DIR) 55 subdirs++; 56 0; 57 })); 58 59 return ret ?: subdirs; 60 } 61 62 static int subvol_lookup(struct btree_trans *trans, u32 subvol, 63 u32 *snapshot, u64 *inum) 64 { 65 struct bch_subvolume s; 66 int ret = bch2_subvolume_get(trans, subvol, false, 0, &s); 67 68 *snapshot = le32_to_cpu(s.snapshot); 69 *inum = le64_to_cpu(s.inode); 70 return ret; 71 } 72 73 static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, 74 struct bch_inode_unpacked *inode) 75 { 76 struct btree_iter iter; 77 struct bkey_s_c k; 78 int ret; 79 80 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr), 81 BTREE_ITER_all_snapshots, k, ret) { 82 if (k.k->p.offset != inode_nr) 83 break; 84 if (!bkey_is_inode(k.k)) 85 continue; 86 ret = bch2_inode_unpack(k, inode); 87 goto found; 88 } 89 ret = -BCH_ERR_ENOENT_inode; 90 found: 91 bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); 92 bch2_trans_iter_exit(trans, &iter); 93 return ret; 94 } 95 96 static int lookup_inode(struct btree_trans *trans, u64 inode_nr, 97 struct bch_inode_unpacked *inode, 98 u32 *snapshot) 99 { 100 struct btree_iter iter; 101 struct bkey_s_c k; 102 int ret; 103 104 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 105 SPOS(0, inode_nr, *snapshot), 0); 106 ret = bkey_err(k); 107 if (ret) 108 goto err; 109 110 ret = bkey_is_inode(k.k) 111 ? bch2_inode_unpack(k, inode) 112 : -BCH_ERR_ENOENT_inode; 113 if (!ret) 114 *snapshot = iter.pos.snapshot; 115 err: 116 bch2_trans_iter_exit(trans, &iter); 117 return ret; 118 } 119 120 static int lookup_dirent_in_snapshot(struct btree_trans *trans, 121 struct bch_hash_info hash_info, 122 subvol_inum dir, struct qstr *name, 123 u64 *target, unsigned *type, u32 snapshot) 124 { 125 struct btree_iter iter; 126 struct bkey_s_c k = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, 127 &hash_info, dir, name, 0, snapshot); 128 int ret = bkey_err(k); 129 if (ret) 130 return ret; 131 132 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); 133 *target = le64_to_cpu(d.v->d_inum); 134 *type = d.v->d_type; 135 bch2_trans_iter_exit(trans, &iter); 136 return 0; 137 } 138 139 static int __remove_dirent(struct btree_trans *trans, struct bpos pos) 140 { 141 struct bch_fs *c = trans->c; 142 struct btree_iter iter; 143 struct bch_inode_unpacked dir_inode; 144 struct bch_hash_info dir_hash_info; 145 int ret; 146 147 ret = lookup_first_inode(trans, pos.inode, &dir_inode); 148 if (ret) 149 goto err; 150 151 dir_hash_info = bch2_hash_info_init(c, &dir_inode); 152 153 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent); 154 155 ret = bch2_btree_iter_traverse(&iter) ?: 156 bch2_hash_delete_at(trans, bch2_dirent_hash_desc, 157 &dir_hash_info, &iter, 158 BTREE_UPDATE_internal_snapshot_node); 159 bch2_trans_iter_exit(trans, &iter); 160 err: 161 bch_err_fn(c, ret); 162 return ret; 163 } 164 165 /* Get lost+found, create if it doesn't exist: */ 166 static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, 167 struct bch_inode_unpacked *lostfound, 168 u64 reattaching_inum) 169 { 170 struct bch_fs *c = trans->c; 171 struct qstr lostfound_str = QSTR("lost+found"); 172 u64 inum = 0; 173 unsigned d_type = 0; 174 int ret; 175 176 struct bch_snapshot_tree st; 177 ret = bch2_snapshot_tree_lookup(trans, 178 bch2_snapshot_tree(c, snapshot), &st); 179 if (ret) 180 return ret; 181 182 subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; 183 184 struct bch_subvolume subvol; 185 ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), 186 false, 0, &subvol); 187 bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u", 188 le32_to_cpu(st.master_subvol), snapshot); 189 if (ret) 190 return ret; 191 192 if (!subvol.inode) { 193 struct btree_iter iter; 194 struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, 195 BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)), 196 0, subvolume); 197 ret = PTR_ERR_OR_ZERO(subvol); 198 if (ret) 199 return ret; 200 201 subvol->v.inode = cpu_to_le64(reattaching_inum); 202 bch2_trans_iter_exit(trans, &iter); 203 } 204 205 root_inum.inum = le64_to_cpu(subvol.inode); 206 207 struct bch_inode_unpacked root_inode; 208 struct bch_hash_info root_hash_info; 209 u32 root_inode_snapshot = snapshot; 210 ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); 211 bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", 212 root_inum.inum, le32_to_cpu(st.master_subvol)); 213 if (ret) 214 return ret; 215 216 root_hash_info = bch2_hash_info_init(c, &root_inode); 217 218 ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum, 219 &lostfound_str, &inum, &d_type, snapshot); 220 if (bch2_err_matches(ret, ENOENT)) 221 goto create_lostfound; 222 223 bch_err_fn(c, ret); 224 if (ret) 225 return ret; 226 227 if (d_type != DT_DIR) { 228 bch_err(c, "error looking up lost+found: not a directory"); 229 return -BCH_ERR_ENOENT_not_directory; 230 } 231 232 /* 233 * The bch2_check_dirents pass has already run, dangling dirents 234 * shouldn't exist here: 235 */ 236 ret = lookup_inode(trans, inum, lostfound, &snapshot); 237 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", 238 inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); 239 return ret; 240 241 create_lostfound: 242 /* 243 * XXX: we could have a nicer log message here if we had a nice way to 244 * walk backpointers to print a path 245 */ 246 bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); 247 248 u64 now = bch2_current_time(c); 249 struct btree_iter lostfound_iter = { NULL }; 250 u64 cpu = raw_smp_processor_id(); 251 252 bch2_inode_init_early(c, lostfound); 253 bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); 254 lostfound->bi_dir = root_inode.bi_inum; 255 256 root_inode.bi_nlink++; 257 258 ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); 259 if (ret) 260 goto err; 261 262 bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); 263 ret = bch2_btree_iter_traverse(&lostfound_iter); 264 if (ret) 265 goto err; 266 267 ret = bch2_dirent_create_snapshot(trans, 268 0, root_inode.bi_inum, snapshot, &root_hash_info, 269 mode_to_type(lostfound->bi_mode), 270 &lostfound_str, 271 lostfound->bi_inum, 272 &lostfound->bi_dir_offset, 273 STR_HASH_must_create) ?: 274 bch2_inode_write_flags(trans, &lostfound_iter, lostfound, 275 BTREE_UPDATE_internal_snapshot_node); 276 err: 277 bch_err_msg(c, ret, "creating lost+found"); 278 bch2_trans_iter_exit(trans, &lostfound_iter); 279 return ret; 280 } 281 282 static int reattach_inode(struct btree_trans *trans, 283 struct bch_inode_unpacked *inode, 284 u32 inode_snapshot) 285 { 286 struct bch_fs *c = trans->c; 287 struct bch_hash_info dir_hash; 288 struct bch_inode_unpacked lostfound; 289 char name_buf[20]; 290 struct qstr name; 291 u64 dir_offset = 0; 292 u32 dirent_snapshot = inode_snapshot; 293 int ret; 294 295 if (inode->bi_subvol) { 296 inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; 297 298 u64 root_inum; 299 ret = subvol_lookup(trans, inode->bi_parent_subvol, 300 &dirent_snapshot, &root_inum); 301 if (ret) 302 return ret; 303 304 snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol); 305 } else { 306 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); 307 } 308 309 ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum); 310 if (ret) 311 return ret; 312 313 if (S_ISDIR(inode->bi_mode)) { 314 lostfound.bi_nlink++; 315 316 ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); 317 if (ret) 318 return ret; 319 } 320 321 dir_hash = bch2_hash_info_init(c, &lostfound); 322 323 name = (struct qstr) QSTR(name_buf); 324 325 ret = bch2_dirent_create_snapshot(trans, 326 inode->bi_parent_subvol, lostfound.bi_inum, 327 dirent_snapshot, 328 &dir_hash, 329 inode_d_type(inode), 330 &name, 331 inode->bi_subvol ?: inode->bi_inum, 332 &dir_offset, 333 STR_HASH_must_create); 334 if (ret) { 335 bch_err_msg(c, ret, "error creating dirent"); 336 return ret; 337 } 338 339 inode->bi_dir = lostfound.bi_inum; 340 inode->bi_dir_offset = dir_offset; 341 342 return __bch2_fsck_write_inode(trans, inode, inode_snapshot); 343 } 344 345 static int remove_backpointer(struct btree_trans *trans, 346 struct bch_inode_unpacked *inode) 347 { 348 struct btree_iter iter; 349 struct bkey_s_c_dirent d; 350 int ret; 351 352 d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, 353 POS(inode->bi_dir, inode->bi_dir_offset), 0, 354 dirent); 355 ret = bkey_err(d) ?: 356 __remove_dirent(trans, d.k->p); 357 bch2_trans_iter_exit(trans, &iter); 358 return ret; 359 } 360 361 static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s) 362 { 363 struct bch_fs *c = trans->c; 364 365 struct bch_inode_unpacked inode; 366 int ret = bch2_inode_find_by_inum_trans(trans, 367 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 368 &inode); 369 if (ret) 370 return ret; 371 372 ret = remove_backpointer(trans, &inode); 373 bch_err_msg(c, ret, "removing dirent"); 374 if (ret) 375 return ret; 376 377 ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); 378 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 379 return ret; 380 } 381 382 static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum) 383 { 384 struct bch_fs *c = trans->c; 385 386 if (!bch2_snapshot_is_leaf(c, snapshotid)) { 387 bch_err(c, "need to reconstruct subvol, but have interior node snapshot"); 388 return -BCH_ERR_fsck_repair_unimplemented; 389 } 390 391 /* 392 * If inum isn't set, that means we're being called from check_dirents, 393 * not check_inodes - the root of this subvolume doesn't exist or we 394 * would have found it there: 395 */ 396 if (!inum) { 397 struct btree_iter inode_iter = {}; 398 struct bch_inode_unpacked new_inode; 399 u64 cpu = raw_smp_processor_id(); 400 401 bch2_inode_init_early(c, &new_inode); 402 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL); 403 404 new_inode.bi_subvol = subvolid; 405 406 int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: 407 bch2_btree_iter_traverse(&inode_iter) ?: 408 bch2_inode_write(trans, &inode_iter, &new_inode); 409 bch2_trans_iter_exit(trans, &inode_iter); 410 if (ret) 411 return ret; 412 413 inum = new_inode.bi_inum; 414 } 415 416 bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum); 417 418 struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); 419 int ret = PTR_ERR_OR_ZERO(new_subvol); 420 if (ret) 421 return ret; 422 423 bkey_subvolume_init(&new_subvol->k_i); 424 new_subvol->k.p.offset = subvolid; 425 new_subvol->v.snapshot = cpu_to_le32(snapshotid); 426 new_subvol->v.inode = cpu_to_le64(inum); 427 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0); 428 if (ret) 429 return ret; 430 431 struct btree_iter iter; 432 struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter, 433 BTREE_ID_snapshots, POS(0, snapshotid), 434 0, snapshot); 435 ret = PTR_ERR_OR_ZERO(s); 436 bch_err_msg(c, ret, "getting snapshot %u", snapshotid); 437 if (ret) 438 return ret; 439 440 u32 snapshot_tree = le32_to_cpu(s->v.tree); 441 442 s->v.subvol = cpu_to_le32(subvolid); 443 SET_BCH_SNAPSHOT_SUBVOL(&s->v, true); 444 bch2_trans_iter_exit(trans, &iter); 445 446 struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter, 447 BTREE_ID_snapshot_trees, POS(0, snapshot_tree), 448 0, snapshot_tree); 449 ret = PTR_ERR_OR_ZERO(st); 450 bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree); 451 if (ret) 452 return ret; 453 454 if (!st->v.master_subvol) 455 st->v.master_subvol = cpu_to_le32(subvolid); 456 457 bch2_trans_iter_exit(trans, &iter); 458 return 0; 459 } 460 461 static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 snapshot, u64 inum) 462 { 463 struct bch_fs *c = trans->c; 464 unsigned i_mode = S_IFREG; 465 u64 i_size = 0; 466 467 switch (btree) { 468 case BTREE_ID_extents: { 469 struct btree_iter iter = {}; 470 471 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); 472 struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter); 473 bch2_trans_iter_exit(trans, &iter); 474 int ret = bkey_err(k); 475 if (ret) 476 return ret; 477 478 i_size = k.k->p.offset << 9; 479 break; 480 } 481 case BTREE_ID_dirents: 482 i_mode = S_IFDIR; 483 break; 484 case BTREE_ID_xattrs: 485 break; 486 default: 487 BUG(); 488 } 489 490 struct bch_inode_unpacked new_inode; 491 bch2_inode_init_early(c, &new_inode); 492 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL); 493 new_inode.bi_size = i_size; 494 new_inode.bi_inum = inum; 495 496 return __bch2_fsck_write_inode(trans, &new_inode, snapshot); 497 } 498 499 struct snapshots_seen { 500 struct bpos pos; 501 snapshot_id_list ids; 502 }; 503 504 static inline void snapshots_seen_exit(struct snapshots_seen *s) 505 { 506 darray_exit(&s->ids); 507 } 508 509 static inline void snapshots_seen_init(struct snapshots_seen *s) 510 { 511 memset(s, 0, sizeof(*s)); 512 } 513 514 static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) 515 { 516 u32 *i; 517 __darray_for_each(s->ids, i) { 518 if (*i == id) 519 return 0; 520 if (*i > id) 521 break; 522 } 523 524 int ret = darray_insert_item(&s->ids, i - s->ids.data, id); 525 if (ret) 526 bch_err(c, "error reallocating snapshots_seen table (size %zu)", 527 s->ids.size); 528 return ret; 529 } 530 531 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, 532 enum btree_id btree_id, struct bpos pos) 533 { 534 if (!bkey_eq(s->pos, pos)) 535 s->ids.nr = 0; 536 s->pos = pos; 537 538 return snapshot_list_add_nodup(c, &s->ids, pos.snapshot); 539 } 540 541 /** 542 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, 543 * and @ancestor hasn't been overwritten in @seen 544 * 545 * @c: filesystem handle 546 * @seen: list of snapshot ids already seen at current position 547 * @id: descendent snapshot id 548 * @ancestor: ancestor snapshot id 549 * 550 * Returns: whether key in @ancestor snapshot is visible in @id snapshot 551 */ 552 static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, 553 u32 id, u32 ancestor) 554 { 555 ssize_t i; 556 557 EBUG_ON(id > ancestor); 558 559 /* @ancestor should be the snapshot most recently added to @seen */ 560 EBUG_ON(ancestor != seen->pos.snapshot); 561 EBUG_ON(ancestor != darray_last(seen->ids)); 562 563 if (id == ancestor) 564 return true; 565 566 if (!bch2_snapshot_is_ancestor(c, id, ancestor)) 567 return false; 568 569 /* 570 * We know that @id is a descendant of @ancestor, we're checking if 571 * we've seen a key that overwrote @ancestor - i.e. also a descendent of 572 * @ascestor and with @id as a descendent. 573 * 574 * But we already know that we're scanning IDs between @id and @ancestor 575 * numerically, since snapshot ID lists are kept sorted, so if we find 576 * an id that's an ancestor of @id we're done: 577 */ 578 579 for (i = seen->ids.nr - 2; 580 i >= 0 && seen->ids.data[i] >= id; 581 --i) 582 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i])) 583 return false; 584 585 return true; 586 } 587 588 /** 589 * ref_visible - given a key with snapshot id @src that points to a key with 590 * snapshot id @dst, test whether there is some snapshot in which @dst is 591 * visible. 592 * 593 * @c: filesystem handle 594 * @s: list of snapshot IDs already seen at @src 595 * @src: snapshot ID of src key 596 * @dst: snapshot ID of dst key 597 * Returns: true if there is some snapshot in which @dst is visible 598 * 599 * Assumes we're visiting @src keys in natural key order 600 */ 601 static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, 602 u32 src, u32 dst) 603 { 604 return dst <= src 605 ? key_visible_in_snapshot(c, s, dst, src) 606 : bch2_snapshot_is_ancestor(c, src, dst); 607 } 608 609 static int ref_visible2(struct bch_fs *c, 610 u32 src, struct snapshots_seen *src_seen, 611 u32 dst, struct snapshots_seen *dst_seen) 612 { 613 if (dst > src) { 614 swap(dst, src); 615 swap(dst_seen, src_seen); 616 } 617 return key_visible_in_snapshot(c, src_seen, dst, src); 618 } 619 620 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ 621 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ 622 (_i)->snapshot <= (_snapshot); _i++) \ 623 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) 624 625 struct inode_walker_entry { 626 struct bch_inode_unpacked inode; 627 u32 snapshot; 628 bool seen_this_pos; 629 u64 count; 630 }; 631 632 struct inode_walker { 633 bool first_this_inode; 634 bool recalculate_sums; 635 struct bpos last_pos; 636 637 DARRAY(struct inode_walker_entry) inodes; 638 }; 639 640 static void inode_walker_exit(struct inode_walker *w) 641 { 642 darray_exit(&w->inodes); 643 } 644 645 static struct inode_walker inode_walker_init(void) 646 { 647 return (struct inode_walker) { 0, }; 648 } 649 650 static int add_inode(struct bch_fs *c, struct inode_walker *w, 651 struct bkey_s_c inode) 652 { 653 struct bch_inode_unpacked u; 654 655 BUG_ON(bch2_inode_unpack(inode, &u)); 656 657 return darray_push(&w->inodes, ((struct inode_walker_entry) { 658 .inode = u, 659 .snapshot = inode.k->p.snapshot, 660 })); 661 } 662 663 static int get_inodes_all_snapshots(struct btree_trans *trans, 664 struct inode_walker *w, u64 inum) 665 { 666 struct bch_fs *c = trans->c; 667 struct btree_iter iter; 668 struct bkey_s_c k; 669 int ret; 670 671 w->recalculate_sums = false; 672 w->inodes.nr = 0; 673 674 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 675 BTREE_ITER_all_snapshots, k, ret) { 676 if (k.k->p.offset != inum) 677 break; 678 679 if (bkey_is_inode(k.k)) 680 add_inode(c, w, k); 681 } 682 bch2_trans_iter_exit(trans, &iter); 683 684 if (ret) 685 return ret; 686 687 w->first_this_inode = true; 688 return 0; 689 } 690 691 static struct inode_walker_entry * 692 lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k) 693 { 694 bool is_whiteout = k.k->type == KEY_TYPE_whiteout; 695 696 struct inode_walker_entry *i; 697 __darray_for_each(w->inodes, i) 698 if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot)) 699 goto found; 700 701 return NULL; 702 found: 703 BUG_ON(k.k->p.snapshot > i->snapshot); 704 705 if (k.k->p.snapshot != i->snapshot && !is_whiteout) { 706 struct inode_walker_entry new = *i; 707 708 new.snapshot = k.k->p.snapshot; 709 new.count = 0; 710 711 struct printbuf buf = PRINTBUF; 712 bch2_bkey_val_to_text(&buf, c, k); 713 714 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" 715 "unexpected because we should always update the inode when we update a key in that inode\n" 716 "%s", 717 w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf); 718 printbuf_exit(&buf); 719 720 while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot) 721 --i; 722 723 size_t pos = i - w->inodes.data; 724 int ret = darray_insert_item(&w->inodes, pos, new); 725 if (ret) 726 return ERR_PTR(ret); 727 728 i = w->inodes.data + pos; 729 } 730 731 return i; 732 } 733 734 static struct inode_walker_entry *walk_inode(struct btree_trans *trans, 735 struct inode_walker *w, 736 struct bkey_s_c k) 737 { 738 if (w->last_pos.inode != k.k->p.inode) { 739 int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); 740 if (ret) 741 return ERR_PTR(ret); 742 } else if (bkey_cmp(w->last_pos, k.k->p)) { 743 darray_for_each(w->inodes, i) 744 i->seen_this_pos = false; 745 } 746 747 w->last_pos = k.k->p; 748 749 return lookup_inode_for_snapshot(trans->c, w, k); 750 } 751 752 static int get_visible_inodes(struct btree_trans *trans, 753 struct inode_walker *w, 754 struct snapshots_seen *s, 755 u64 inum) 756 { 757 struct bch_fs *c = trans->c; 758 struct btree_iter iter; 759 struct bkey_s_c k; 760 int ret; 761 762 w->inodes.nr = 0; 763 764 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 765 BTREE_ITER_all_snapshots, k, ret) { 766 if (k.k->p.offset != inum) 767 break; 768 769 if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) 770 continue; 771 772 if (bkey_is_inode(k.k)) 773 add_inode(c, w, k); 774 775 if (k.k->p.snapshot >= s->pos.snapshot) 776 break; 777 } 778 bch2_trans_iter_exit(trans, &iter); 779 780 return ret; 781 } 782 783 static int hash_redo_key(struct btree_trans *trans, 784 const struct bch_hash_desc desc, 785 struct bch_hash_info *hash_info, 786 struct btree_iter *k_iter, struct bkey_s_c k) 787 { 788 struct bkey_i *delete; 789 struct bkey_i *tmp; 790 791 delete = bch2_trans_kmalloc(trans, sizeof(*delete)); 792 if (IS_ERR(delete)) 793 return PTR_ERR(delete); 794 795 tmp = bch2_bkey_make_mut_noupdate(trans, k); 796 if (IS_ERR(tmp)) 797 return PTR_ERR(tmp); 798 799 bkey_init(&delete->k); 800 delete->k.p = k_iter->pos; 801 return bch2_btree_iter_traverse(k_iter) ?: 802 bch2_trans_update(trans, k_iter, delete, 0) ?: 803 bch2_hash_set_in_snapshot(trans, desc, hash_info, 804 (subvol_inum) { 0, k.k->p.inode }, 805 k.k->p.snapshot, tmp, 806 STR_HASH_must_create| 807 BTREE_UPDATE_internal_snapshot_node) ?: 808 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 809 } 810 811 static int hash_check_key(struct btree_trans *trans, 812 const struct bch_hash_desc desc, 813 struct bch_hash_info *hash_info, 814 struct btree_iter *k_iter, struct bkey_s_c hash_k) 815 { 816 struct bch_fs *c = trans->c; 817 struct btree_iter iter = { NULL }; 818 struct printbuf buf = PRINTBUF; 819 struct bkey_s_c k; 820 u64 hash; 821 int ret = 0; 822 823 if (hash_k.k->type != desc.key_type) 824 return 0; 825 826 hash = desc.hash_bkey(hash_info, hash_k); 827 828 if (likely(hash == hash_k.k->p.offset)) 829 return 0; 830 831 if (hash_k.k->p.offset < hash) 832 goto bad_hash; 833 834 for_each_btree_key_norestart(trans, iter, desc.btree_id, 835 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), 836 BTREE_ITER_slots, k, ret) { 837 if (bkey_eq(k.k->p, hash_k.k->p)) 838 break; 839 840 if (fsck_err_on(k.k->type == desc.key_type && 841 !desc.cmp_bkey(k, hash_k), 842 trans, hash_table_key_duplicate, 843 "duplicate hash table keys:\n%s", 844 (printbuf_reset(&buf), 845 bch2_bkey_val_to_text(&buf, c, hash_k), 846 buf.buf))) { 847 ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; 848 break; 849 } 850 851 if (bkey_deleted(k.k)) { 852 bch2_trans_iter_exit(trans, &iter); 853 goto bad_hash; 854 } 855 } 856 out: 857 bch2_trans_iter_exit(trans, &iter); 858 printbuf_exit(&buf); 859 return ret; 860 bad_hash: 861 if (fsck_err(trans, hash_table_key_wrong_offset, 862 "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", 863 bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, 864 (printbuf_reset(&buf), 865 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { 866 ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); 867 bch_err_fn(c, ret); 868 if (ret) 869 return ret; 870 ret = -BCH_ERR_transaction_restart_nested; 871 } 872 fsck_err: 873 goto out; 874 } 875 876 static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, 877 struct btree_iter *iter, 878 struct bpos pos) 879 { 880 return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); 881 } 882 883 static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, 884 struct btree_iter *iter, 885 struct bch_inode_unpacked *inode, 886 u32 *snapshot) 887 { 888 if (inode->bi_subvol) { 889 u64 inum; 890 int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum); 891 if (ret) 892 return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) }); 893 } 894 895 return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); 896 } 897 898 static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, 899 struct bkey_s_c_dirent d) 900 { 901 return inode->bi_dir == d.k->p.inode && 902 inode->bi_dir_offset == d.k->p.offset; 903 } 904 905 static bool dirent_points_to_inode(struct bkey_s_c_dirent d, 906 struct bch_inode_unpacked *inode) 907 { 908 return d.v->d_type == DT_SUBVOL 909 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol 910 : le64_to_cpu(d.v->d_inum) == inode->bi_inum; 911 } 912 913 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) 914 { 915 struct btree_iter iter; 916 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0); 917 int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set; 918 bch2_trans_iter_exit(trans, &iter); 919 return ret; 920 } 921 922 static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k, 923 struct bch_inode_unpacked *inode, 924 u32 inode_snapshot, bool *write_inode) 925 { 926 struct bch_fs *c = trans->c; 927 struct printbuf buf = PRINTBUF; 928 929 struct btree_iter dirent_iter = {}; 930 struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); 931 int ret = bkey_err(d); 932 if (ret && !bch2_err_matches(ret, ENOENT)) 933 return ret; 934 935 if (fsck_err_on(ret, 936 trans, inode_points_to_missing_dirent, 937 "inode points to missing dirent\n%s", 938 (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) || 939 fsck_err_on(!ret && !dirent_points_to_inode(d, inode), 940 trans, inode_points_to_wrong_dirent, 941 "inode points to dirent that does not point back:\n%s", 942 (bch2_bkey_val_to_text(&buf, c, inode_k), 943 prt_newline(&buf), 944 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 945 /* 946 * We just clear the backpointer fields for now. If we find a 947 * dirent that points to this inode in check_dirents(), we'll 948 * update it then; then when we get to check_path() if the 949 * backpointer is still 0 we'll reattach it. 950 */ 951 inode->bi_dir = 0; 952 inode->bi_dir_offset = 0; 953 inode->bi_flags &= ~BCH_INODE_backptr_untrusted; 954 *write_inode = true; 955 } 956 957 ret = 0; 958 fsck_err: 959 bch2_trans_iter_exit(trans, &dirent_iter); 960 printbuf_exit(&buf); 961 bch_err_fn(c, ret); 962 return ret; 963 } 964 965 static int check_inode(struct btree_trans *trans, 966 struct btree_iter *iter, 967 struct bkey_s_c k, 968 struct bch_inode_unpacked *prev, 969 struct snapshots_seen *s, 970 bool full) 971 { 972 struct bch_fs *c = trans->c; 973 struct bch_inode_unpacked u; 974 bool do_update = false; 975 int ret; 976 977 ret = bch2_check_key_has_snapshot(trans, iter, k); 978 if (ret < 0) 979 goto err; 980 if (ret) 981 return 0; 982 983 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 984 if (ret) 985 goto err; 986 987 if (!bkey_is_inode(k.k)) 988 return 0; 989 990 BUG_ON(bch2_inode_unpack(k, &u)); 991 992 if (!full && 993 !(u.bi_flags & (BCH_INODE_i_size_dirty| 994 BCH_INODE_i_sectors_dirty| 995 BCH_INODE_unlinked))) 996 return 0; 997 998 if (prev->bi_inum != u.bi_inum) 999 *prev = u; 1000 1001 if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || 1002 inode_d_type(prev) != inode_d_type(&u), 1003 trans, inode_snapshot_mismatch, 1004 "inodes in different snapshots don't match")) { 1005 bch_err(c, "repair not implemented yet"); 1006 return -BCH_ERR_fsck_repair_unimplemented; 1007 } 1008 1009 if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && 1010 bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { 1011 struct bpos new_min_pos; 1012 1013 ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos); 1014 if (ret) 1015 goto err; 1016 1017 u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; 1018 1019 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1020 1021 bch_err_msg(c, ret, "in fsck updating inode"); 1022 if (ret) 1023 return ret; 1024 1025 if (!bpos_eq(new_min_pos, POS_MIN)) 1026 bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); 1027 return 0; 1028 } 1029 1030 if (u.bi_flags & BCH_INODE_unlinked) { 1031 ret = check_inode_deleted_list(trans, k.k->p); 1032 if (ret < 0) 1033 return ret; 1034 1035 fsck_err_on(!ret, 1036 trans, unlinked_inode_not_on_deleted_list, 1037 "inode %llu:%u unlinked, but not on deleted list", 1038 u.bi_inum, k.k->p.snapshot); 1039 ret = 0; 1040 } 1041 1042 if (u.bi_flags & BCH_INODE_unlinked && 1043 (!c->sb.clean || 1044 fsck_err(trans, inode_unlinked_but_clean, 1045 "filesystem marked clean, but inode %llu unlinked", 1046 u.bi_inum))) { 1047 ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 1048 bch_err_msg(c, ret, "in fsck deleting inode"); 1049 return ret; 1050 } 1051 1052 if (u.bi_flags & BCH_INODE_i_size_dirty && 1053 (!c->sb.clean || 1054 fsck_err(trans, inode_i_size_dirty_but_clean, 1055 "filesystem marked clean, but inode %llu has i_size dirty", 1056 u.bi_inum))) { 1057 bch_verbose(c, "truncating inode %llu", u.bi_inum); 1058 1059 /* 1060 * XXX: need to truncate partial blocks too here - or ideally 1061 * just switch units to bytes and that issue goes away 1062 */ 1063 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, 1064 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9, 1065 iter->pos.snapshot), 1066 POS(u.bi_inum, U64_MAX), 1067 0, NULL); 1068 bch_err_msg(c, ret, "in fsck truncating inode"); 1069 if (ret) 1070 return ret; 1071 1072 /* 1073 * We truncated without our normal sector accounting hook, just 1074 * make sure we recalculate it: 1075 */ 1076 u.bi_flags |= BCH_INODE_i_sectors_dirty; 1077 1078 u.bi_flags &= ~BCH_INODE_i_size_dirty; 1079 do_update = true; 1080 } 1081 1082 if (u.bi_flags & BCH_INODE_i_sectors_dirty && 1083 (!c->sb.clean || 1084 fsck_err(trans, inode_i_sectors_dirty_but_clean, 1085 "filesystem marked clean, but inode %llu has i_sectors dirty", 1086 u.bi_inum))) { 1087 s64 sectors; 1088 1089 bch_verbose(c, "recounting sectors for inode %llu", 1090 u.bi_inum); 1091 1092 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); 1093 if (sectors < 0) { 1094 bch_err_msg(c, sectors, "in fsck recounting inode sectors"); 1095 return sectors; 1096 } 1097 1098 u.bi_sectors = sectors; 1099 u.bi_flags &= ~BCH_INODE_i_sectors_dirty; 1100 do_update = true; 1101 } 1102 1103 if (u.bi_flags & BCH_INODE_backptr_untrusted) { 1104 u.bi_dir = 0; 1105 u.bi_dir_offset = 0; 1106 u.bi_flags &= ~BCH_INODE_backptr_untrusted; 1107 do_update = true; 1108 } 1109 1110 if (u.bi_dir || u.bi_dir_offset) { 1111 ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update); 1112 if (ret) 1113 goto err; 1114 } 1115 1116 if (fsck_err_on(u.bi_parent_subvol && 1117 (u.bi_subvol == 0 || 1118 u.bi_subvol == BCACHEFS_ROOT_SUBVOL), 1119 trans, inode_bi_parent_nonzero, 1120 "inode %llu:%u has subvol %u but nonzero parent subvol %u", 1121 u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) { 1122 u.bi_parent_subvol = 0; 1123 do_update = true; 1124 } 1125 1126 if (u.bi_subvol) { 1127 struct bch_subvolume s; 1128 1129 ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s); 1130 if (ret && !bch2_err_matches(ret, ENOENT)) 1131 goto err; 1132 1133 if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1134 ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum); 1135 goto do_update; 1136 } 1137 1138 if (fsck_err_on(ret, 1139 trans, inode_bi_subvol_missing, 1140 "inode %llu:%u bi_subvol points to missing subvolume %u", 1141 u.bi_inum, k.k->p.snapshot, u.bi_subvol) || 1142 fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum || 1143 !bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot), 1144 k.k->p.snapshot), 1145 trans, inode_bi_subvol_wrong, 1146 "inode %llu:%u points to subvol %u, but subvol points to %llu:%u", 1147 u.bi_inum, k.k->p.snapshot, u.bi_subvol, 1148 le64_to_cpu(s.inode), 1149 le32_to_cpu(s.snapshot))) { 1150 u.bi_subvol = 0; 1151 u.bi_parent_subvol = 0; 1152 do_update = true; 1153 } 1154 } 1155 do_update: 1156 if (do_update) { 1157 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1158 bch_err_msg(c, ret, "in fsck updating inode"); 1159 if (ret) 1160 return ret; 1161 } 1162 err: 1163 fsck_err: 1164 bch_err_fn(c, ret); 1165 return ret; 1166 } 1167 1168 int bch2_check_inodes(struct bch_fs *c) 1169 { 1170 bool full = c->opts.fsck; 1171 struct bch_inode_unpacked prev = { 0 }; 1172 struct snapshots_seen s; 1173 1174 snapshots_seen_init(&s); 1175 1176 int ret = bch2_trans_run(c, 1177 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 1178 POS_MIN, 1179 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1180 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1181 check_inode(trans, &iter, k, &prev, &s, full))); 1182 1183 snapshots_seen_exit(&s); 1184 bch_err_fn(c, ret); 1185 return ret; 1186 } 1187 1188 static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode) 1189 { 1190 switch (btree) { 1191 case BTREE_ID_extents: 1192 return S_ISREG(mode) || S_ISLNK(mode); 1193 case BTREE_ID_dirents: 1194 return S_ISDIR(mode); 1195 case BTREE_ID_xattrs: 1196 return true; 1197 default: 1198 BUG(); 1199 } 1200 } 1201 1202 static int check_key_has_inode(struct btree_trans *trans, 1203 struct btree_iter *iter, 1204 struct inode_walker *inode, 1205 struct inode_walker_entry *i, 1206 struct bkey_s_c k) 1207 { 1208 struct bch_fs *c = trans->c; 1209 struct printbuf buf = PRINTBUF; 1210 int ret = PTR_ERR_OR_ZERO(i); 1211 if (ret) 1212 return ret; 1213 1214 if (k.k->type == KEY_TYPE_whiteout) 1215 goto out; 1216 1217 if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { 1218 ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?: 1219 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 1220 if (ret) 1221 goto err; 1222 1223 inode->last_pos.inode--; 1224 ret = -BCH_ERR_transaction_restart_nested; 1225 goto err; 1226 } 1227 1228 if (fsck_err_on(!i, 1229 trans, key_in_missing_inode, 1230 "key in missing inode:\n %s", 1231 (printbuf_reset(&buf), 1232 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1233 goto delete; 1234 1235 if (fsck_err_on(i && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode), 1236 trans, key_in_wrong_inode_type, 1237 "key for wrong inode mode %o:\n %s", 1238 i->inode.bi_mode, 1239 (printbuf_reset(&buf), 1240 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1241 goto delete; 1242 out: 1243 err: 1244 fsck_err: 1245 printbuf_exit(&buf); 1246 bch_err_fn(c, ret); 1247 return ret; 1248 delete: 1249 ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node); 1250 goto out; 1251 } 1252 1253 static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) 1254 { 1255 struct bch_fs *c = trans->c; 1256 int ret = 0; 1257 s64 count2; 1258 1259 darray_for_each(w->inodes, i) { 1260 if (i->inode.bi_sectors == i->count) 1261 continue; 1262 1263 count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); 1264 1265 if (w->recalculate_sums) 1266 i->count = count2; 1267 1268 if (i->count != count2) { 1269 bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", 1270 w->last_pos.inode, i->snapshot, i->count, count2); 1271 return -BCH_ERR_internal_fsck_err; 1272 } 1273 1274 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), 1275 trans, inode_i_sectors_wrong, 1276 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", 1277 w->last_pos.inode, i->snapshot, 1278 i->inode.bi_sectors, i->count)) { 1279 i->inode.bi_sectors = i->count; 1280 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1281 if (ret) 1282 break; 1283 } 1284 } 1285 fsck_err: 1286 bch_err_fn(c, ret); 1287 return ret; 1288 } 1289 1290 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) 1291 { 1292 u32 restart_count = trans->restart_count; 1293 return check_i_sectors_notnested(trans, w) ?: 1294 trans_was_restarted(trans, restart_count); 1295 } 1296 1297 struct extent_end { 1298 u32 snapshot; 1299 u64 offset; 1300 struct snapshots_seen seen; 1301 }; 1302 1303 struct extent_ends { 1304 struct bpos last_pos; 1305 DARRAY(struct extent_end) e; 1306 }; 1307 1308 static void extent_ends_reset(struct extent_ends *extent_ends) 1309 { 1310 darray_for_each(extent_ends->e, i) 1311 snapshots_seen_exit(&i->seen); 1312 extent_ends->e.nr = 0; 1313 } 1314 1315 static void extent_ends_exit(struct extent_ends *extent_ends) 1316 { 1317 extent_ends_reset(extent_ends); 1318 darray_exit(&extent_ends->e); 1319 } 1320 1321 static void extent_ends_init(struct extent_ends *extent_ends) 1322 { 1323 memset(extent_ends, 0, sizeof(*extent_ends)); 1324 } 1325 1326 static int extent_ends_at(struct bch_fs *c, 1327 struct extent_ends *extent_ends, 1328 struct snapshots_seen *seen, 1329 struct bkey_s_c k) 1330 { 1331 struct extent_end *i, n = (struct extent_end) { 1332 .offset = k.k->p.offset, 1333 .snapshot = k.k->p.snapshot, 1334 .seen = *seen, 1335 }; 1336 1337 n.seen.ids.data = kmemdup(seen->ids.data, 1338 sizeof(seen->ids.data[0]) * seen->ids.size, 1339 GFP_KERNEL); 1340 if (!n.seen.ids.data) 1341 return -BCH_ERR_ENOMEM_fsck_extent_ends_at; 1342 1343 __darray_for_each(extent_ends->e, i) { 1344 if (i->snapshot == k.k->p.snapshot) { 1345 snapshots_seen_exit(&i->seen); 1346 *i = n; 1347 return 0; 1348 } 1349 1350 if (i->snapshot >= k.k->p.snapshot) 1351 break; 1352 } 1353 1354 return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); 1355 } 1356 1357 static int overlapping_extents_found(struct btree_trans *trans, 1358 enum btree_id btree, 1359 struct bpos pos1, struct snapshots_seen *pos1_seen, 1360 struct bkey pos2, 1361 bool *fixed, 1362 struct extent_end *extent_end) 1363 { 1364 struct bch_fs *c = trans->c; 1365 struct printbuf buf = PRINTBUF; 1366 struct btree_iter iter1, iter2 = { NULL }; 1367 struct bkey_s_c k1, k2; 1368 int ret; 1369 1370 BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); 1371 1372 bch2_trans_iter_init(trans, &iter1, btree, pos1, 1373 BTREE_ITER_all_snapshots| 1374 BTREE_ITER_not_extents); 1375 k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); 1376 ret = bkey_err(k1); 1377 if (ret) 1378 goto err; 1379 1380 prt_str(&buf, "\n "); 1381 bch2_bkey_val_to_text(&buf, c, k1); 1382 1383 if (!bpos_eq(pos1, k1.k->p)) { 1384 prt_str(&buf, "\n wanted\n "); 1385 bch2_bpos_to_text(&buf, pos1); 1386 prt_str(&buf, "\n "); 1387 bch2_bkey_to_text(&buf, &pos2); 1388 1389 bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", 1390 __func__, buf.buf); 1391 ret = -BCH_ERR_internal_fsck_err; 1392 goto err; 1393 } 1394 1395 bch2_trans_copy_iter(&iter2, &iter1); 1396 1397 while (1) { 1398 bch2_btree_iter_advance(&iter2); 1399 1400 k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); 1401 ret = bkey_err(k2); 1402 if (ret) 1403 goto err; 1404 1405 if (bpos_ge(k2.k->p, pos2.p)) 1406 break; 1407 } 1408 1409 prt_str(&buf, "\n "); 1410 bch2_bkey_val_to_text(&buf, c, k2); 1411 1412 if (bpos_gt(k2.k->p, pos2.p) || 1413 pos2.size != k2.k->size) { 1414 bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", 1415 __func__, buf.buf); 1416 ret = -BCH_ERR_internal_fsck_err; 1417 goto err; 1418 } 1419 1420 prt_printf(&buf, "\n overwriting %s extent", 1421 pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); 1422 1423 if (fsck_err(trans, extent_overlapping, 1424 "overlapping extents%s", buf.buf)) { 1425 struct btree_iter *old_iter = &iter1; 1426 struct disk_reservation res = { 0 }; 1427 1428 if (pos1.snapshot < pos2.p.snapshot) { 1429 old_iter = &iter2; 1430 swap(k1, k2); 1431 } 1432 1433 trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); 1434 1435 ret = bch2_trans_update_extent_overwrite(trans, old_iter, 1436 BTREE_UPDATE_internal_snapshot_node, 1437 k1, k2) ?: 1438 bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); 1439 bch2_disk_reservation_put(c, &res); 1440 1441 if (ret) 1442 goto err; 1443 1444 *fixed = true; 1445 1446 if (pos1.snapshot == pos2.p.snapshot) { 1447 /* 1448 * We overwrote the first extent, and did the overwrite 1449 * in the same snapshot: 1450 */ 1451 extent_end->offset = bkey_start_offset(&pos2); 1452 } else if (pos1.snapshot > pos2.p.snapshot) { 1453 /* 1454 * We overwrote the first extent in pos2's snapshot: 1455 */ 1456 ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); 1457 } else { 1458 /* 1459 * We overwrote the second extent - restart 1460 * check_extent() from the top: 1461 */ 1462 ret = -BCH_ERR_transaction_restart_nested; 1463 } 1464 } 1465 fsck_err: 1466 err: 1467 bch2_trans_iter_exit(trans, &iter2); 1468 bch2_trans_iter_exit(trans, &iter1); 1469 printbuf_exit(&buf); 1470 return ret; 1471 } 1472 1473 static int check_overlapping_extents(struct btree_trans *trans, 1474 struct snapshots_seen *seen, 1475 struct extent_ends *extent_ends, 1476 struct bkey_s_c k, 1477 struct btree_iter *iter, 1478 bool *fixed) 1479 { 1480 struct bch_fs *c = trans->c; 1481 int ret = 0; 1482 1483 /* transaction restart, running again */ 1484 if (bpos_eq(extent_ends->last_pos, k.k->p)) 1485 return 0; 1486 1487 if (extent_ends->last_pos.inode != k.k->p.inode) 1488 extent_ends_reset(extent_ends); 1489 1490 darray_for_each(extent_ends->e, i) { 1491 if (i->offset <= bkey_start_offset(k.k)) 1492 continue; 1493 1494 if (!ref_visible2(c, 1495 k.k->p.snapshot, seen, 1496 i->snapshot, &i->seen)) 1497 continue; 1498 1499 ret = overlapping_extents_found(trans, iter->btree_id, 1500 SPOS(iter->pos.inode, 1501 i->offset, 1502 i->snapshot), 1503 &i->seen, 1504 *k.k, fixed, i); 1505 if (ret) 1506 goto err; 1507 } 1508 1509 extent_ends->last_pos = k.k->p; 1510 err: 1511 return ret; 1512 } 1513 1514 static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, 1515 struct bkey_s_c k) 1516 { 1517 struct bch_fs *c = trans->c; 1518 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 1519 struct bch_extent_crc_unpacked crc; 1520 const union bch_extent_entry *i; 1521 unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; 1522 1523 bkey_for_each_crc(k.k, ptrs, crc, i) 1524 if (crc_is_encoded(crc) && 1525 crc.uncompressed_size > encoded_extent_max_sectors) { 1526 struct printbuf buf = PRINTBUF; 1527 1528 bch2_bkey_val_to_text(&buf, c, k); 1529 bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); 1530 printbuf_exit(&buf); 1531 } 1532 1533 return 0; 1534 } 1535 1536 static int check_extent(struct btree_trans *trans, struct btree_iter *iter, 1537 struct bkey_s_c k, 1538 struct inode_walker *inode, 1539 struct snapshots_seen *s, 1540 struct extent_ends *extent_ends) 1541 { 1542 struct bch_fs *c = trans->c; 1543 struct inode_walker_entry *i; 1544 struct printbuf buf = PRINTBUF; 1545 int ret = 0; 1546 1547 ret = bch2_check_key_has_snapshot(trans, iter, k); 1548 if (ret) { 1549 ret = ret < 0 ? ret : 0; 1550 goto out; 1551 } 1552 1553 if (inode->last_pos.inode != k.k->p.inode) { 1554 ret = check_i_sectors(trans, inode); 1555 if (ret) 1556 goto err; 1557 } 1558 1559 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1560 if (ret) 1561 goto err; 1562 1563 i = walk_inode(trans, inode, k); 1564 ret = PTR_ERR_OR_ZERO(i); 1565 if (ret) 1566 goto err; 1567 1568 ret = check_key_has_inode(trans, iter, inode, i, k); 1569 if (ret) 1570 goto err; 1571 1572 if (k.k->type != KEY_TYPE_whiteout) { 1573 ret = check_overlapping_extents(trans, s, extent_ends, k, iter, 1574 &inode->recalculate_sums); 1575 if (ret) 1576 goto err; 1577 } 1578 1579 /* 1580 * Check inodes in reverse order, from oldest snapshots to newest, 1581 * starting from the inode that matches this extent's snapshot. If we 1582 * didn't have one, iterate over all inodes: 1583 */ 1584 if (!i) 1585 i = &darray_last(inode->inodes); 1586 1587 for (; 1588 inode->inodes.data && i >= inode->inodes.data; 1589 --i) { 1590 if (i->snapshot > k.k->p.snapshot || 1591 !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) 1592 continue; 1593 1594 if (k.k->type != KEY_TYPE_whiteout) { 1595 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && 1596 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && 1597 !bkey_extent_is_reservation(k), 1598 trans, extent_past_end_of_inode, 1599 "extent type past end of inode %llu:%u, i_size %llu\n %s", 1600 i->inode.bi_inum, i->snapshot, i->inode.bi_size, 1601 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1602 struct btree_iter iter2; 1603 1604 bch2_trans_copy_iter(&iter2, iter); 1605 bch2_btree_iter_set_snapshot(&iter2, i->snapshot); 1606 ret = bch2_btree_iter_traverse(&iter2) ?: 1607 bch2_btree_delete_at(trans, &iter2, 1608 BTREE_UPDATE_internal_snapshot_node); 1609 bch2_trans_iter_exit(trans, &iter2); 1610 if (ret) 1611 goto err; 1612 1613 iter->k.type = KEY_TYPE_whiteout; 1614 } 1615 1616 if (bkey_extent_is_allocation(k.k)) 1617 i->count += k.k->size; 1618 } 1619 1620 i->seen_this_pos = true; 1621 } 1622 1623 if (k.k->type != KEY_TYPE_whiteout) { 1624 ret = extent_ends_at(c, extent_ends, s, k); 1625 if (ret) 1626 goto err; 1627 } 1628 out: 1629 err: 1630 fsck_err: 1631 printbuf_exit(&buf); 1632 bch_err_fn(c, ret); 1633 return ret; 1634 } 1635 1636 /* 1637 * Walk extents: verify that extents have a corresponding S_ISREG inode, and 1638 * that i_size an i_sectors are consistent 1639 */ 1640 int bch2_check_extents(struct bch_fs *c) 1641 { 1642 struct inode_walker w = inode_walker_init(); 1643 struct snapshots_seen s; 1644 struct extent_ends extent_ends; 1645 struct disk_reservation res = { 0 }; 1646 1647 snapshots_seen_init(&s); 1648 extent_ends_init(&extent_ends); 1649 1650 int ret = bch2_trans_run(c, 1651 for_each_btree_key_commit(trans, iter, BTREE_ID_extents, 1652 POS(BCACHEFS_ROOT_INO, 0), 1653 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1654 &res, NULL, 1655 BCH_TRANS_COMMIT_no_enospc, ({ 1656 bch2_disk_reservation_put(c, &res); 1657 check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: 1658 check_extent_overbig(trans, &iter, k); 1659 })) ?: 1660 check_i_sectors_notnested(trans, &w)); 1661 1662 bch2_disk_reservation_put(c, &res); 1663 extent_ends_exit(&extent_ends); 1664 inode_walker_exit(&w); 1665 snapshots_seen_exit(&s); 1666 1667 bch_err_fn(c, ret); 1668 return ret; 1669 } 1670 1671 int bch2_check_indirect_extents(struct bch_fs *c) 1672 { 1673 struct disk_reservation res = { 0 }; 1674 1675 int ret = bch2_trans_run(c, 1676 for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, 1677 POS_MIN, 1678 BTREE_ITER_prefetch, k, 1679 &res, NULL, 1680 BCH_TRANS_COMMIT_no_enospc, ({ 1681 bch2_disk_reservation_put(c, &res); 1682 check_extent_overbig(trans, &iter, k); 1683 }))); 1684 1685 bch2_disk_reservation_put(c, &res); 1686 bch_err_fn(c, ret); 1687 return ret; 1688 } 1689 1690 static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w) 1691 { 1692 struct bch_fs *c = trans->c; 1693 int ret = 0; 1694 s64 count2; 1695 1696 darray_for_each(w->inodes, i) { 1697 if (i->inode.bi_nlink == i->count) 1698 continue; 1699 1700 count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); 1701 if (count2 < 0) 1702 return count2; 1703 1704 if (i->count != count2) { 1705 bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu", 1706 w->last_pos.inode, i->snapshot, i->count, count2); 1707 i->count = count2; 1708 if (i->inode.bi_nlink == i->count) 1709 continue; 1710 } 1711 1712 if (fsck_err_on(i->inode.bi_nlink != i->count, 1713 trans, inode_dir_wrong_nlink, 1714 "directory %llu:%u with wrong i_nlink: got %u, should be %llu", 1715 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { 1716 i->inode.bi_nlink = i->count; 1717 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1718 if (ret) 1719 break; 1720 } 1721 } 1722 fsck_err: 1723 bch_err_fn(c, ret); 1724 return ret; 1725 } 1726 1727 static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) 1728 { 1729 u32 restart_count = trans->restart_count; 1730 return check_subdir_count_notnested(trans, w) ?: 1731 trans_was_restarted(trans, restart_count); 1732 } 1733 1734 noinline_for_stack 1735 static int check_dirent_inode_dirent(struct btree_trans *trans, 1736 struct btree_iter *iter, 1737 struct bkey_s_c_dirent d, 1738 struct bch_inode_unpacked *target, 1739 u32 target_snapshot) 1740 { 1741 struct bch_fs *c = trans->c; 1742 struct printbuf buf = PRINTBUF; 1743 int ret = 0; 1744 1745 if (inode_points_to_dirent(target, d)) 1746 return 0; 1747 1748 if (bch2_inode_should_have_bp(target) && 1749 !fsck_err(trans, inode_wrong_backpointer, 1750 "dirent points to inode that does not point back:\n %s", 1751 (bch2_bkey_val_to_text(&buf, c, d.s_c), 1752 prt_printf(&buf, "\n "), 1753 bch2_inode_unpacked_to_text(&buf, target), 1754 buf.buf))) 1755 goto out_noiter; 1756 1757 if (!target->bi_dir && 1758 !target->bi_dir_offset) { 1759 target->bi_dir = d.k->p.inode; 1760 target->bi_dir_offset = d.k->p.offset; 1761 return __bch2_fsck_write_inode(trans, target, target_snapshot); 1762 } 1763 1764 struct btree_iter bp_iter = { NULL }; 1765 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, 1766 SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); 1767 ret = bkey_err(bp_dirent); 1768 if (ret && !bch2_err_matches(ret, ENOENT)) 1769 goto err; 1770 1771 bool backpointer_exists = !ret; 1772 ret = 0; 1773 1774 if (fsck_err_on(!backpointer_exists, 1775 trans, inode_wrong_backpointer, 1776 "inode %llu:%u has wrong backpointer:\n" 1777 "got %llu:%llu\n" 1778 "should be %llu:%llu", 1779 target->bi_inum, target_snapshot, 1780 target->bi_dir, 1781 target->bi_dir_offset, 1782 d.k->p.inode, 1783 d.k->p.offset)) { 1784 target->bi_dir = d.k->p.inode; 1785 target->bi_dir_offset = d.k->p.offset; 1786 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1787 goto out; 1788 } 1789 1790 bch2_bkey_val_to_text(&buf, c, d.s_c); 1791 prt_newline(&buf); 1792 if (backpointer_exists) 1793 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c); 1794 1795 if (fsck_err_on(backpointer_exists && 1796 (S_ISDIR(target->bi_mode) || 1797 target->bi_subvol), 1798 trans, inode_dir_multiple_links, 1799 "%s %llu:%u with multiple links\n%s", 1800 S_ISDIR(target->bi_mode) ? "directory" : "subvolume", 1801 target->bi_inum, target_snapshot, buf.buf)) { 1802 ret = __remove_dirent(trans, d.k->p); 1803 goto out; 1804 } 1805 1806 /* 1807 * hardlinked file with nlink 0: 1808 * We're just adjusting nlink here so check_nlinks() will pick 1809 * it up, it ignores inodes with nlink 0 1810 */ 1811 if (fsck_err_on(backpointer_exists && !target->bi_nlink, 1812 trans, inode_multiple_links_but_nlink_0, 1813 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", 1814 target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { 1815 target->bi_nlink++; 1816 target->bi_flags &= ~BCH_INODE_unlinked; 1817 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1818 if (ret) 1819 goto err; 1820 } 1821 out: 1822 err: 1823 fsck_err: 1824 bch2_trans_iter_exit(trans, &bp_iter); 1825 out_noiter: 1826 printbuf_exit(&buf); 1827 bch_err_fn(c, ret); 1828 return ret; 1829 } 1830 1831 noinline_for_stack 1832 static int check_dirent_target(struct btree_trans *trans, 1833 struct btree_iter *iter, 1834 struct bkey_s_c_dirent d, 1835 struct bch_inode_unpacked *target, 1836 u32 target_snapshot) 1837 { 1838 struct bch_fs *c = trans->c; 1839 struct bkey_i_dirent *n; 1840 struct printbuf buf = PRINTBUF; 1841 int ret = 0; 1842 1843 ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); 1844 if (ret) 1845 goto err; 1846 1847 if (fsck_err_on(d.v->d_type != inode_d_type(target), 1848 trans, dirent_d_type_wrong, 1849 "incorrect d_type: got %s, should be %s:\n%s", 1850 bch2_d_type_str(d.v->d_type), 1851 bch2_d_type_str(inode_d_type(target)), 1852 (printbuf_reset(&buf), 1853 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1854 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); 1855 ret = PTR_ERR_OR_ZERO(n); 1856 if (ret) 1857 goto err; 1858 1859 bkey_reassemble(&n->k_i, d.s_c); 1860 n->v.d_type = inode_d_type(target); 1861 if (n->v.d_type == DT_SUBVOL) { 1862 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); 1863 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol); 1864 } else { 1865 n->v.d_inum = cpu_to_le64(target->bi_inum); 1866 } 1867 1868 ret = bch2_trans_update(trans, iter, &n->k_i, 0); 1869 if (ret) 1870 goto err; 1871 1872 d = dirent_i_to_s_c(n); 1873 } 1874 err: 1875 fsck_err: 1876 printbuf_exit(&buf); 1877 bch_err_fn(c, ret); 1878 return ret; 1879 } 1880 1881 /* find a subvolume that's a descendent of @snapshot: */ 1882 static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid) 1883 { 1884 struct btree_iter iter; 1885 struct bkey_s_c k; 1886 int ret; 1887 1888 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { 1889 if (k.k->type != KEY_TYPE_subvolume) 1890 continue; 1891 1892 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 1893 if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) { 1894 bch2_trans_iter_exit(trans, &iter); 1895 *subvolid = k.k->p.offset; 1896 goto found; 1897 } 1898 } 1899 if (!ret) 1900 ret = -ENOENT; 1901 found: 1902 bch2_trans_iter_exit(trans, &iter); 1903 return ret; 1904 } 1905 1906 noinline_for_stack 1907 static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter, 1908 struct bkey_s_c_dirent d) 1909 { 1910 struct bch_fs *c = trans->c; 1911 struct btree_iter subvol_iter = {}; 1912 struct bch_inode_unpacked subvol_root; 1913 u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); 1914 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); 1915 u32 parent_snapshot; 1916 u32 new_parent_subvol = 0; 1917 u64 parent_inum; 1918 struct printbuf buf = PRINTBUF; 1919 int ret = 0; 1920 1921 ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); 1922 if (ret && !bch2_err_matches(ret, ENOENT)) 1923 return ret; 1924 1925 if (ret || 1926 (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) { 1927 int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); 1928 if (ret2 && !bch2_err_matches(ret, ENOENT)) 1929 return ret2; 1930 } 1931 1932 if (ret && 1933 !new_parent_subvol && 1934 (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1935 /* 1936 * Couldn't find a subvol for dirent's snapshot - but we lost 1937 * subvols, so we need to reconstruct: 1938 */ 1939 ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0); 1940 if (ret) 1941 return ret; 1942 1943 parent_snapshot = d.k->p.snapshot; 1944 } 1945 1946 if (fsck_err_on(ret, 1947 trans, dirent_to_missing_parent_subvol, 1948 "dirent parent_subvol points to missing subvolume\n%s", 1949 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || 1950 fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot), 1951 trans, dirent_not_visible_in_parent_subvol, 1952 "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", 1953 parent_snapshot, 1954 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1955 if (!new_parent_subvol) { 1956 bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot); 1957 return -BCH_ERR_fsck_repair_unimplemented; 1958 } 1959 1960 struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); 1961 ret = PTR_ERR_OR_ZERO(new_dirent); 1962 if (ret) 1963 goto err; 1964 1965 new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol); 1966 } 1967 1968 struct bkey_s_c_subvolume s = 1969 bch2_bkey_get_iter_typed(trans, &subvol_iter, 1970 BTREE_ID_subvolumes, POS(0, target_subvol), 1971 0, subvolume); 1972 ret = bkey_err(s.s_c); 1973 if (ret && !bch2_err_matches(ret, ENOENT)) 1974 return ret; 1975 1976 if (ret) { 1977 if (fsck_err(trans, dirent_to_missing_subvol, 1978 "dirent points to missing subvolume\n%s", 1979 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) 1980 return __remove_dirent(trans, d.k->p); 1981 ret = 0; 1982 goto out; 1983 } 1984 1985 if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol, 1986 trans, subvol_fs_path_parent_wrong, 1987 "subvol with wrong fs_path_parent, should be be %u\n%s", 1988 parent_subvol, 1989 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { 1990 struct bkey_i_subvolume *n = 1991 bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume); 1992 ret = PTR_ERR_OR_ZERO(n); 1993 if (ret) 1994 goto err; 1995 1996 n->v.fs_path_parent = cpu_to_le32(parent_subvol); 1997 } 1998 1999 u64 target_inum = le64_to_cpu(s.v->inode); 2000 u32 target_snapshot = le32_to_cpu(s.v->snapshot); 2001 2002 ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); 2003 if (ret && !bch2_err_matches(ret, ENOENT)) 2004 goto err; 2005 2006 if (ret) { 2007 bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); 2008 ret = -BCH_ERR_fsck_repair_unimplemented; 2009 ret = 0; 2010 goto err; 2011 } 2012 2013 if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol, 2014 trans, inode_bi_parent_wrong, 2015 "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", 2016 target_inum, 2017 subvol_root.bi_parent_subvol, parent_subvol)) { 2018 subvol_root.bi_parent_subvol = parent_subvol; 2019 ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); 2020 if (ret) 2021 goto err; 2022 } 2023 2024 ret = check_dirent_target(trans, iter, d, &subvol_root, 2025 target_snapshot); 2026 if (ret) 2027 goto err; 2028 out: 2029 err: 2030 fsck_err: 2031 bch2_trans_iter_exit(trans, &subvol_iter); 2032 printbuf_exit(&buf); 2033 return ret; 2034 } 2035 2036 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, 2037 struct bkey_s_c k, 2038 struct bch_hash_info *hash_info, 2039 struct inode_walker *dir, 2040 struct inode_walker *target, 2041 struct snapshots_seen *s) 2042 { 2043 struct bch_fs *c = trans->c; 2044 struct inode_walker_entry *i; 2045 struct printbuf buf = PRINTBUF; 2046 int ret = 0; 2047 2048 ret = bch2_check_key_has_snapshot(trans, iter, k); 2049 if (ret) { 2050 ret = ret < 0 ? ret : 0; 2051 goto out; 2052 } 2053 2054 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 2055 if (ret) 2056 goto err; 2057 2058 if (k.k->type == KEY_TYPE_whiteout) 2059 goto out; 2060 2061 if (dir->last_pos.inode != k.k->p.inode) { 2062 ret = check_subdir_count(trans, dir); 2063 if (ret) 2064 goto err; 2065 } 2066 2067 i = walk_inode(trans, dir, k); 2068 ret = PTR_ERR_OR_ZERO(i); 2069 if (ret < 0) 2070 goto err; 2071 2072 ret = check_key_has_inode(trans, iter, dir, i, k); 2073 if (ret) 2074 goto err; 2075 2076 if (!i) 2077 goto out; 2078 2079 if (dir->first_this_inode) 2080 *hash_info = bch2_hash_info_init(c, &i->inode); 2081 dir->first_this_inode = false; 2082 2083 ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); 2084 if (ret < 0) 2085 goto err; 2086 if (ret) { 2087 /* dirent has been deleted */ 2088 ret = 0; 2089 goto out; 2090 } 2091 2092 if (k.k->type != KEY_TYPE_dirent) 2093 goto out; 2094 2095 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2096 2097 if (d.v->d_type == DT_SUBVOL) { 2098 ret = check_dirent_to_subvol(trans, iter, d); 2099 if (ret) 2100 goto err; 2101 } else { 2102 ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); 2103 if (ret) 2104 goto err; 2105 2106 if (fsck_err_on(!target->inodes.nr, 2107 trans, dirent_to_missing_inode, 2108 "dirent points to missing inode:\n%s", 2109 (printbuf_reset(&buf), 2110 bch2_bkey_val_to_text(&buf, c, k), 2111 buf.buf))) { 2112 ret = __remove_dirent(trans, d.k->p); 2113 if (ret) 2114 goto err; 2115 } 2116 2117 darray_for_each(target->inodes, i) { 2118 ret = check_dirent_target(trans, iter, d, 2119 &i->inode, i->snapshot); 2120 if (ret) 2121 goto err; 2122 } 2123 2124 if (d.v->d_type == DT_DIR) 2125 for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) 2126 i->count++; 2127 } 2128 out: 2129 err: 2130 fsck_err: 2131 printbuf_exit(&buf); 2132 bch_err_fn(c, ret); 2133 return ret; 2134 } 2135 2136 /* 2137 * Walk dirents: verify that they all have a corresponding S_ISDIR inode, 2138 * validate d_type 2139 */ 2140 int bch2_check_dirents(struct bch_fs *c) 2141 { 2142 struct inode_walker dir = inode_walker_init(); 2143 struct inode_walker target = inode_walker_init(); 2144 struct snapshots_seen s; 2145 struct bch_hash_info hash_info; 2146 2147 snapshots_seen_init(&s); 2148 2149 int ret = bch2_trans_run(c, 2150 for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, 2151 POS(BCACHEFS_ROOT_INO, 0), 2152 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2153 k, 2154 NULL, NULL, 2155 BCH_TRANS_COMMIT_no_enospc, 2156 check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: 2157 check_subdir_count_notnested(trans, &dir)); 2158 2159 snapshots_seen_exit(&s); 2160 inode_walker_exit(&dir); 2161 inode_walker_exit(&target); 2162 bch_err_fn(c, ret); 2163 return ret; 2164 } 2165 2166 static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, 2167 struct bkey_s_c k, 2168 struct bch_hash_info *hash_info, 2169 struct inode_walker *inode) 2170 { 2171 struct bch_fs *c = trans->c; 2172 struct inode_walker_entry *i; 2173 int ret; 2174 2175 ret = bch2_check_key_has_snapshot(trans, iter, k); 2176 if (ret < 0) 2177 return ret; 2178 if (ret) 2179 return 0; 2180 2181 i = walk_inode(trans, inode, k); 2182 ret = PTR_ERR_OR_ZERO(i); 2183 if (ret) 2184 return ret; 2185 2186 ret = check_key_has_inode(trans, iter, inode, i, k); 2187 if (ret) 2188 return ret; 2189 2190 if (!i) 2191 return 0; 2192 2193 if (inode->first_this_inode) 2194 *hash_info = bch2_hash_info_init(c, &i->inode); 2195 inode->first_this_inode = false; 2196 2197 ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); 2198 bch_err_fn(c, ret); 2199 return ret; 2200 } 2201 2202 /* 2203 * Walk xattrs: verify that they all have a corresponding inode 2204 */ 2205 int bch2_check_xattrs(struct bch_fs *c) 2206 { 2207 struct inode_walker inode = inode_walker_init(); 2208 struct bch_hash_info hash_info; 2209 int ret = 0; 2210 2211 ret = bch2_trans_run(c, 2212 for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, 2213 POS(BCACHEFS_ROOT_INO, 0), 2214 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2215 k, 2216 NULL, NULL, 2217 BCH_TRANS_COMMIT_no_enospc, 2218 check_xattr(trans, &iter, k, &hash_info, &inode))); 2219 bch_err_fn(c, ret); 2220 return ret; 2221 } 2222 2223 static int check_root_trans(struct btree_trans *trans) 2224 { 2225 struct bch_fs *c = trans->c; 2226 struct bch_inode_unpacked root_inode; 2227 u32 snapshot; 2228 u64 inum; 2229 int ret; 2230 2231 ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); 2232 if (ret && !bch2_err_matches(ret, ENOENT)) 2233 return ret; 2234 2235 if (mustfix_fsck_err_on(ret, trans, root_subvol_missing, 2236 "root subvol missing")) { 2237 struct bkey_i_subvolume *root_subvol = 2238 bch2_trans_kmalloc(trans, sizeof(*root_subvol)); 2239 ret = PTR_ERR_OR_ZERO(root_subvol); 2240 if (ret) 2241 goto err; 2242 2243 snapshot = U32_MAX; 2244 inum = BCACHEFS_ROOT_INO; 2245 2246 bkey_subvolume_init(&root_subvol->k_i); 2247 root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL; 2248 root_subvol->v.flags = 0; 2249 root_subvol->v.snapshot = cpu_to_le32(snapshot); 2250 root_subvol->v.inode = cpu_to_le64(inum); 2251 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0); 2252 bch_err_msg(c, ret, "writing root subvol"); 2253 if (ret) 2254 goto err; 2255 } 2256 2257 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); 2258 if (ret && !bch2_err_matches(ret, ENOENT)) 2259 return ret; 2260 2261 if (mustfix_fsck_err_on(ret, 2262 trans, root_dir_missing, 2263 "root directory missing") || 2264 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), 2265 trans, root_inode_not_dir, 2266 "root inode not a directory")) { 2267 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 2268 0, NULL); 2269 root_inode.bi_inum = inum; 2270 2271 ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); 2272 bch_err_msg(c, ret, "writing root inode"); 2273 } 2274 err: 2275 fsck_err: 2276 return ret; 2277 } 2278 2279 /* Get root directory, create if it doesn't exist: */ 2280 int bch2_check_root(struct bch_fs *c) 2281 { 2282 int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2283 check_root_trans(trans)); 2284 bch_err_fn(c, ret); 2285 return ret; 2286 } 2287 2288 typedef DARRAY(u32) darray_u32; 2289 2290 static bool darray_u32_has(darray_u32 *d, u32 v) 2291 { 2292 darray_for_each(*d, i) 2293 if (*i == v) 2294 return true; 2295 return false; 2296 } 2297 2298 /* 2299 * We've checked that inode backpointers point to valid dirents; here, it's 2300 * sufficient to check that the subvolume root has a dirent: 2301 */ 2302 static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s) 2303 { 2304 struct bch_inode_unpacked inode; 2305 int ret = bch2_inode_find_by_inum_trans(trans, 2306 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 2307 &inode); 2308 if (ret) 2309 return ret; 2310 2311 return inode.bi_dir != 0; 2312 } 2313 2314 static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) 2315 { 2316 struct bch_fs *c = trans->c; 2317 struct btree_iter parent_iter = {}; 2318 darray_u32 subvol_path = {}; 2319 struct printbuf buf = PRINTBUF; 2320 int ret = 0; 2321 2322 if (k.k->type != KEY_TYPE_subvolume) 2323 return 0; 2324 2325 while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { 2326 ret = darray_push(&subvol_path, k.k->p.offset); 2327 if (ret) 2328 goto err; 2329 2330 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 2331 2332 ret = subvol_has_dirent(trans, s); 2333 if (ret < 0) 2334 break; 2335 2336 if (fsck_err_on(!ret, 2337 trans, subvol_unreachable, 2338 "unreachable subvolume %s", 2339 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2340 buf.buf))) { 2341 ret = reattach_subvol(trans, s); 2342 break; 2343 } 2344 2345 u32 parent = le32_to_cpu(s.v->fs_path_parent); 2346 2347 if (darray_u32_has(&subvol_path, parent)) { 2348 if (fsck_err(c, subvol_loop, "subvolume loop")) 2349 ret = reattach_subvol(trans, s); 2350 break; 2351 } 2352 2353 bch2_trans_iter_exit(trans, &parent_iter); 2354 bch2_trans_iter_init(trans, &parent_iter, 2355 BTREE_ID_subvolumes, POS(0, parent), 0); 2356 k = bch2_btree_iter_peek_slot(&parent_iter); 2357 ret = bkey_err(k); 2358 if (ret) 2359 goto err; 2360 2361 if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, 2362 trans, subvol_unreachable, 2363 "unreachable subvolume %s", 2364 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2365 buf.buf))) { 2366 ret = reattach_subvol(trans, s); 2367 break; 2368 } 2369 } 2370 fsck_err: 2371 err: 2372 printbuf_exit(&buf); 2373 darray_exit(&subvol_path); 2374 bch2_trans_iter_exit(trans, &parent_iter); 2375 return ret; 2376 } 2377 2378 int bch2_check_subvolume_structure(struct bch_fs *c) 2379 { 2380 int ret = bch2_trans_run(c, 2381 for_each_btree_key_commit(trans, iter, 2382 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, 2383 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2384 check_subvol_path(trans, &iter, k))); 2385 bch_err_fn(c, ret); 2386 return ret; 2387 } 2388 2389 struct pathbuf_entry { 2390 u64 inum; 2391 u32 snapshot; 2392 }; 2393 2394 typedef DARRAY(struct pathbuf_entry) pathbuf; 2395 2396 static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) 2397 { 2398 darray_for_each(*p, i) 2399 if (i->inum == inum && 2400 i->snapshot == snapshot) 2401 return true; 2402 return false; 2403 } 2404 2405 /* 2406 * Check that a given inode is reachable from its subvolume root - we already 2407 * verified subvolume connectivity: 2408 * 2409 * XXX: we should also be verifying that inodes are in the right subvolumes 2410 */ 2411 static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) 2412 { 2413 struct bch_fs *c = trans->c; 2414 struct btree_iter inode_iter = {}; 2415 struct bch_inode_unpacked inode; 2416 struct printbuf buf = PRINTBUF; 2417 u32 snapshot = inode_k.k->p.snapshot; 2418 int ret = 0; 2419 2420 p->nr = 0; 2421 2422 BUG_ON(bch2_inode_unpack(inode_k, &inode)); 2423 2424 while (!inode.bi_subvol) { 2425 struct btree_iter dirent_iter; 2426 struct bkey_s_c_dirent d; 2427 u32 parent_snapshot = snapshot; 2428 2429 d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); 2430 ret = bkey_err(d.s_c); 2431 if (ret && !bch2_err_matches(ret, ENOENT)) 2432 break; 2433 2434 if (!ret && !dirent_points_to_inode(d, &inode)) { 2435 bch2_trans_iter_exit(trans, &dirent_iter); 2436 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 2437 } 2438 2439 if (bch2_err_matches(ret, ENOENT)) { 2440 ret = 0; 2441 if (fsck_err(trans, inode_unreachable, 2442 "unreachable inode\n%s", 2443 (printbuf_reset(&buf), 2444 bch2_bkey_val_to_text(&buf, c, inode_k), 2445 buf.buf))) 2446 ret = reattach_inode(trans, &inode, snapshot); 2447 goto out; 2448 } 2449 2450 bch2_trans_iter_exit(trans, &dirent_iter); 2451 2452 if (!S_ISDIR(inode.bi_mode)) 2453 break; 2454 2455 ret = darray_push(p, ((struct pathbuf_entry) { 2456 .inum = inode.bi_inum, 2457 .snapshot = snapshot, 2458 })); 2459 if (ret) 2460 return ret; 2461 2462 snapshot = parent_snapshot; 2463 2464 bch2_trans_iter_exit(trans, &inode_iter); 2465 inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, 2466 SPOS(0, inode.bi_dir, snapshot), 0); 2467 ret = bkey_err(inode_k) ?: 2468 !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode 2469 : bch2_inode_unpack(inode_k, &inode); 2470 if (ret) { 2471 /* Should have been caught in dirents pass */ 2472 if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2473 bch_err(c, "error looking up parent directory: %i", ret); 2474 break; 2475 } 2476 2477 snapshot = inode_k.k->p.snapshot; 2478 2479 if (path_is_dup(p, inode.bi_inum, snapshot)) { 2480 /* XXX print path */ 2481 bch_err(c, "directory structure loop"); 2482 2483 darray_for_each(*p, i) 2484 pr_err("%llu:%u", i->inum, i->snapshot); 2485 pr_err("%llu:%u", inode.bi_inum, snapshot); 2486 2487 if (fsck_err(trans, dir_loop, "directory structure loop")) { 2488 ret = remove_backpointer(trans, &inode); 2489 bch_err_msg(c, ret, "removing dirent"); 2490 if (ret) 2491 break; 2492 2493 ret = reattach_inode(trans, &inode, snapshot); 2494 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 2495 } 2496 break; 2497 } 2498 } 2499 out: 2500 fsck_err: 2501 bch2_trans_iter_exit(trans, &inode_iter); 2502 printbuf_exit(&buf); 2503 bch_err_fn(c, ret); 2504 return ret; 2505 } 2506 2507 /* 2508 * Check for unreachable inodes, as well as loops in the directory structure: 2509 * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's 2510 * unreachable: 2511 */ 2512 int bch2_check_directory_structure(struct bch_fs *c) 2513 { 2514 pathbuf path = { 0, }; 2515 int ret; 2516 2517 ret = bch2_trans_run(c, 2518 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, 2519 BTREE_ITER_intent| 2520 BTREE_ITER_prefetch| 2521 BTREE_ITER_all_snapshots, k, 2522 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 2523 if (!bkey_is_inode(k.k)) 2524 continue; 2525 2526 if (bch2_inode_flags(k) & BCH_INODE_unlinked) 2527 continue; 2528 2529 check_path(trans, &path, k); 2530 }))); 2531 darray_exit(&path); 2532 2533 bch_err_fn(c, ret); 2534 return ret; 2535 } 2536 2537 struct nlink_table { 2538 size_t nr; 2539 size_t size; 2540 2541 struct nlink { 2542 u64 inum; 2543 u32 snapshot; 2544 u32 count; 2545 } *d; 2546 }; 2547 2548 static int add_nlink(struct bch_fs *c, struct nlink_table *t, 2549 u64 inum, u32 snapshot) 2550 { 2551 if (t->nr == t->size) { 2552 size_t new_size = max_t(size_t, 128UL, t->size * 2); 2553 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); 2554 2555 if (!d) { 2556 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", 2557 new_size); 2558 return -BCH_ERR_ENOMEM_fsck_add_nlink; 2559 } 2560 2561 if (t->d) 2562 memcpy(d, t->d, t->size * sizeof(t->d[0])); 2563 kvfree(t->d); 2564 2565 t->d = d; 2566 t->size = new_size; 2567 } 2568 2569 2570 t->d[t->nr++] = (struct nlink) { 2571 .inum = inum, 2572 .snapshot = snapshot, 2573 }; 2574 2575 return 0; 2576 } 2577 2578 static int nlink_cmp(const void *_l, const void *_r) 2579 { 2580 const struct nlink *l = _l; 2581 const struct nlink *r = _r; 2582 2583 return cmp_int(l->inum, r->inum); 2584 } 2585 2586 static void inc_link(struct bch_fs *c, struct snapshots_seen *s, 2587 struct nlink_table *links, 2588 u64 range_start, u64 range_end, u64 inum, u32 snapshot) 2589 { 2590 struct nlink *link, key = { 2591 .inum = inum, .snapshot = U32_MAX, 2592 }; 2593 2594 if (inum < range_start || inum >= range_end) 2595 return; 2596 2597 link = __inline_bsearch(&key, links->d, links->nr, 2598 sizeof(links->d[0]), nlink_cmp); 2599 if (!link) 2600 return; 2601 2602 while (link > links->d && link[0].inum == link[-1].inum) 2603 --link; 2604 2605 for (; link < links->d + links->nr && link->inum == inum; link++) 2606 if (ref_visible(c, s, snapshot, link->snapshot)) { 2607 link->count++; 2608 if (link->snapshot >= snapshot) 2609 break; 2610 } 2611 } 2612 2613 noinline_for_stack 2614 static int check_nlinks_find_hardlinks(struct bch_fs *c, 2615 struct nlink_table *t, 2616 u64 start, u64 *end) 2617 { 2618 int ret = bch2_trans_run(c, 2619 for_each_btree_key(trans, iter, BTREE_ID_inodes, 2620 POS(0, start), 2621 BTREE_ITER_intent| 2622 BTREE_ITER_prefetch| 2623 BTREE_ITER_all_snapshots, k, ({ 2624 if (!bkey_is_inode(k.k)) 2625 continue; 2626 2627 /* Should never fail, checked by bch2_inode_invalid: */ 2628 struct bch_inode_unpacked u; 2629 BUG_ON(bch2_inode_unpack(k, &u)); 2630 2631 /* 2632 * Backpointer and directory structure checks are sufficient for 2633 * directories, since they can't have hardlinks: 2634 */ 2635 if (S_ISDIR(u.bi_mode)) 2636 continue; 2637 2638 if (!u.bi_nlink) 2639 continue; 2640 2641 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); 2642 if (ret) { 2643 *end = k.k->p.offset; 2644 ret = 0; 2645 break; 2646 } 2647 0; 2648 }))); 2649 2650 bch_err_fn(c, ret); 2651 return ret; 2652 } 2653 2654 noinline_for_stack 2655 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, 2656 u64 range_start, u64 range_end) 2657 { 2658 struct snapshots_seen s; 2659 2660 snapshots_seen_init(&s); 2661 2662 int ret = bch2_trans_run(c, 2663 for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, 2664 BTREE_ITER_intent| 2665 BTREE_ITER_prefetch| 2666 BTREE_ITER_all_snapshots, k, ({ 2667 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); 2668 if (ret) 2669 break; 2670 2671 if (k.k->type == KEY_TYPE_dirent) { 2672 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2673 2674 if (d.v->d_type != DT_DIR && 2675 d.v->d_type != DT_SUBVOL) 2676 inc_link(c, &s, links, range_start, range_end, 2677 le64_to_cpu(d.v->d_inum), d.k->p.snapshot); 2678 } 2679 0; 2680 }))); 2681 2682 snapshots_seen_exit(&s); 2683 2684 bch_err_fn(c, ret); 2685 return ret; 2686 } 2687 2688 static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, 2689 struct bkey_s_c k, 2690 struct nlink_table *links, 2691 size_t *idx, u64 range_end) 2692 { 2693 struct bch_inode_unpacked u; 2694 struct nlink *link = &links->d[*idx]; 2695 int ret = 0; 2696 2697 if (k.k->p.offset >= range_end) 2698 return 1; 2699 2700 if (!bkey_is_inode(k.k)) 2701 return 0; 2702 2703 BUG_ON(bch2_inode_unpack(k, &u)); 2704 2705 if (S_ISDIR(u.bi_mode)) 2706 return 0; 2707 2708 if (!u.bi_nlink) 2709 return 0; 2710 2711 while ((cmp_int(link->inum, k.k->p.offset) ?: 2712 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { 2713 BUG_ON(*idx == links->nr); 2714 link = &links->d[++*idx]; 2715 } 2716 2717 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, 2718 trans, inode_wrong_nlink, 2719 "inode %llu type %s has wrong i_nlink (%u, should be %u)", 2720 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], 2721 bch2_inode_nlink_get(&u), link->count)) { 2722 bch2_inode_nlink_set(&u, link->count); 2723 ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); 2724 } 2725 fsck_err: 2726 return ret; 2727 } 2728 2729 noinline_for_stack 2730 static int check_nlinks_update_hardlinks(struct bch_fs *c, 2731 struct nlink_table *links, 2732 u64 range_start, u64 range_end) 2733 { 2734 size_t idx = 0; 2735 2736 int ret = bch2_trans_run(c, 2737 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 2738 POS(0, range_start), 2739 BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 2740 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2741 check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); 2742 if (ret < 0) { 2743 bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); 2744 return ret; 2745 } 2746 2747 return 0; 2748 } 2749 2750 int bch2_check_nlinks(struct bch_fs *c) 2751 { 2752 struct nlink_table links = { 0 }; 2753 u64 this_iter_range_start, next_iter_range_start = 0; 2754 int ret = 0; 2755 2756 do { 2757 this_iter_range_start = next_iter_range_start; 2758 next_iter_range_start = U64_MAX; 2759 2760 ret = check_nlinks_find_hardlinks(c, &links, 2761 this_iter_range_start, 2762 &next_iter_range_start); 2763 2764 ret = check_nlinks_walk_dirents(c, &links, 2765 this_iter_range_start, 2766 next_iter_range_start); 2767 if (ret) 2768 break; 2769 2770 ret = check_nlinks_update_hardlinks(c, &links, 2771 this_iter_range_start, 2772 next_iter_range_start); 2773 if (ret) 2774 break; 2775 2776 links.nr = 0; 2777 } while (next_iter_range_start != U64_MAX); 2778 2779 kvfree(links.d); 2780 bch_err_fn(c, ret); 2781 return ret; 2782 } 2783 2784 static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, 2785 struct bkey_s_c k) 2786 { 2787 struct bkey_s_c_reflink_p p; 2788 struct bkey_i_reflink_p *u; 2789 2790 if (k.k->type != KEY_TYPE_reflink_p) 2791 return 0; 2792 2793 p = bkey_s_c_to_reflink_p(k); 2794 2795 if (!p.v->front_pad && !p.v->back_pad) 2796 return 0; 2797 2798 u = bch2_trans_kmalloc(trans, sizeof(*u)); 2799 int ret = PTR_ERR_OR_ZERO(u); 2800 if (ret) 2801 return ret; 2802 2803 bkey_reassemble(&u->k_i, k); 2804 u->v.front_pad = 0; 2805 u->v.back_pad = 0; 2806 2807 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_norun); 2808 } 2809 2810 int bch2_fix_reflink_p(struct bch_fs *c) 2811 { 2812 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) 2813 return 0; 2814 2815 int ret = bch2_trans_run(c, 2816 for_each_btree_key_commit(trans, iter, 2817 BTREE_ID_extents, POS_MIN, 2818 BTREE_ITER_intent|BTREE_ITER_prefetch| 2819 BTREE_ITER_all_snapshots, k, 2820 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2821 fix_reflink_p_key(trans, &iter, k))); 2822 bch_err_fn(c, ret); 2823 return ret; 2824 } 2825