1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bkey_buf.h" 5 #include "btree_cache.h" 6 #include "btree_update.h" 7 #include "buckets.h" 8 #include "darray.h" 9 #include "dirent.h" 10 #include "error.h" 11 #include "fs-common.h" 12 #include "fsck.h" 13 #include "inode.h" 14 #include "keylist.h" 15 #include "recovery_passes.h" 16 #include "snapshot.h" 17 #include "super.h" 18 #include "xattr.h" 19 20 #include <linux/bsearch.h> 21 #include <linux/dcache.h> /* struct qstr */ 22 23 /* 24 * XXX: this is handling transaction restarts without returning 25 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: 26 */ 27 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, 28 u32 snapshot) 29 { 30 u64 sectors = 0; 31 32 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, 33 SPOS(inum, 0, snapshot), 34 POS(inum, U64_MAX), 35 0, k, ({ 36 if (bkey_extent_is_allocation(k.k)) 37 sectors += k.k->size; 38 0; 39 })); 40 41 return ret ?: sectors; 42 } 43 44 static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, 45 u32 snapshot) 46 { 47 u64 subdirs = 0; 48 49 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, 50 SPOS(inum, 0, snapshot), 51 POS(inum, U64_MAX), 52 0, k, ({ 53 if (k.k->type == KEY_TYPE_dirent && 54 bkey_s_c_to_dirent(k).v->d_type == DT_DIR) 55 subdirs++; 56 0; 57 })); 58 59 return ret ?: subdirs; 60 } 61 62 static int subvol_lookup(struct btree_trans *trans, u32 subvol, 63 u32 *snapshot, u64 *inum) 64 { 65 struct bch_subvolume s; 66 int ret = bch2_subvolume_get(trans, subvol, false, 0, &s); 67 68 *snapshot = le32_to_cpu(s.snapshot); 69 *inum = le64_to_cpu(s.inode); 70 return ret; 71 } 72 73 static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, 74 struct bch_inode_unpacked *inode) 75 { 76 struct btree_iter iter; 77 struct bkey_s_c k; 78 int ret; 79 80 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr), 81 BTREE_ITER_all_snapshots, k, ret) { 82 if (k.k->p.offset != inode_nr) 83 break; 84 if (!bkey_is_inode(k.k)) 85 continue; 86 ret = bch2_inode_unpack(k, inode); 87 goto found; 88 } 89 ret = -BCH_ERR_ENOENT_inode; 90 found: 91 bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); 92 bch2_trans_iter_exit(trans, &iter); 93 return ret; 94 } 95 96 static int lookup_inode(struct btree_trans *trans, u64 inode_nr, 97 struct bch_inode_unpacked *inode, 98 u32 *snapshot) 99 { 100 struct btree_iter iter; 101 struct bkey_s_c k; 102 int ret; 103 104 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 105 SPOS(0, inode_nr, *snapshot), 0); 106 ret = bkey_err(k); 107 if (ret) 108 goto err; 109 110 ret = bkey_is_inode(k.k) 111 ? bch2_inode_unpack(k, inode) 112 : -BCH_ERR_ENOENT_inode; 113 if (!ret) 114 *snapshot = iter.pos.snapshot; 115 err: 116 bch2_trans_iter_exit(trans, &iter); 117 return ret; 118 } 119 120 static int lookup_dirent_in_snapshot(struct btree_trans *trans, 121 struct bch_hash_info hash_info, 122 subvol_inum dir, struct qstr *name, 123 u64 *target, unsigned *type, u32 snapshot) 124 { 125 struct btree_iter iter; 126 struct bkey_s_c k = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, 127 &hash_info, dir, name, 0, snapshot); 128 int ret = bkey_err(k); 129 if (ret) 130 return ret; 131 132 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); 133 *target = le64_to_cpu(d.v->d_inum); 134 *type = d.v->d_type; 135 bch2_trans_iter_exit(trans, &iter); 136 return 0; 137 } 138 139 static int __remove_dirent(struct btree_trans *trans, struct bpos pos) 140 { 141 struct bch_fs *c = trans->c; 142 struct btree_iter iter; 143 struct bch_inode_unpacked dir_inode; 144 struct bch_hash_info dir_hash_info; 145 int ret; 146 147 ret = lookup_first_inode(trans, pos.inode, &dir_inode); 148 if (ret) 149 goto err; 150 151 dir_hash_info = bch2_hash_info_init(c, &dir_inode); 152 153 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent); 154 155 ret = bch2_btree_iter_traverse(&iter) ?: 156 bch2_hash_delete_at(trans, bch2_dirent_hash_desc, 157 &dir_hash_info, &iter, 158 BTREE_UPDATE_internal_snapshot_node); 159 bch2_trans_iter_exit(trans, &iter); 160 err: 161 bch_err_fn(c, ret); 162 return ret; 163 } 164 165 /* Get lost+found, create if it doesn't exist: */ 166 static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, 167 struct bch_inode_unpacked *lostfound, 168 u64 reattaching_inum) 169 { 170 struct bch_fs *c = trans->c; 171 struct qstr lostfound_str = QSTR("lost+found"); 172 u64 inum = 0; 173 unsigned d_type = 0; 174 int ret; 175 176 struct bch_snapshot_tree st; 177 ret = bch2_snapshot_tree_lookup(trans, 178 bch2_snapshot_tree(c, snapshot), &st); 179 if (ret) 180 return ret; 181 182 subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; 183 184 struct bch_subvolume subvol; 185 ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), 186 false, 0, &subvol); 187 bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u", 188 le32_to_cpu(st.master_subvol), snapshot); 189 if (ret) 190 return ret; 191 192 if (!subvol.inode) { 193 struct btree_iter iter; 194 struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, 195 BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)), 196 0, subvolume); 197 ret = PTR_ERR_OR_ZERO(subvol); 198 if (ret) 199 return ret; 200 201 subvol->v.inode = cpu_to_le64(reattaching_inum); 202 bch2_trans_iter_exit(trans, &iter); 203 } 204 205 root_inum.inum = le64_to_cpu(subvol.inode); 206 207 struct bch_inode_unpacked root_inode; 208 struct bch_hash_info root_hash_info; 209 u32 root_inode_snapshot = snapshot; 210 ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); 211 bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", 212 root_inum.inum, le32_to_cpu(st.master_subvol)); 213 if (ret) 214 return ret; 215 216 root_hash_info = bch2_hash_info_init(c, &root_inode); 217 218 ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum, 219 &lostfound_str, &inum, &d_type, snapshot); 220 if (bch2_err_matches(ret, ENOENT)) 221 goto create_lostfound; 222 223 bch_err_fn(c, ret); 224 if (ret) 225 return ret; 226 227 if (d_type != DT_DIR) { 228 bch_err(c, "error looking up lost+found: not a directory"); 229 return -BCH_ERR_ENOENT_not_directory; 230 } 231 232 /* 233 * The bch2_check_dirents pass has already run, dangling dirents 234 * shouldn't exist here: 235 */ 236 ret = lookup_inode(trans, inum, lostfound, &snapshot); 237 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", 238 inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); 239 return ret; 240 241 create_lostfound: 242 /* 243 * XXX: we could have a nicer log message here if we had a nice way to 244 * walk backpointers to print a path 245 */ 246 bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); 247 248 u64 now = bch2_current_time(c); 249 struct btree_iter lostfound_iter = { NULL }; 250 u64 cpu = raw_smp_processor_id(); 251 252 bch2_inode_init_early(c, lostfound); 253 bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); 254 lostfound->bi_dir = root_inode.bi_inum; 255 256 root_inode.bi_nlink++; 257 258 ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); 259 if (ret) 260 goto err; 261 262 bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); 263 ret = bch2_btree_iter_traverse(&lostfound_iter); 264 if (ret) 265 goto err; 266 267 ret = bch2_dirent_create_snapshot(trans, 268 0, root_inode.bi_inum, snapshot, &root_hash_info, 269 mode_to_type(lostfound->bi_mode), 270 &lostfound_str, 271 lostfound->bi_inum, 272 &lostfound->bi_dir_offset, 273 STR_HASH_must_create) ?: 274 bch2_inode_write_flags(trans, &lostfound_iter, lostfound, 275 BTREE_UPDATE_internal_snapshot_node); 276 err: 277 bch_err_msg(c, ret, "creating lost+found"); 278 bch2_trans_iter_exit(trans, &lostfound_iter); 279 return ret; 280 } 281 282 static int reattach_inode(struct btree_trans *trans, 283 struct bch_inode_unpacked *inode, 284 u32 inode_snapshot) 285 { 286 struct bch_hash_info dir_hash; 287 struct bch_inode_unpacked lostfound; 288 char name_buf[20]; 289 struct qstr name; 290 u64 dir_offset = 0; 291 u32 dirent_snapshot = inode_snapshot; 292 int ret; 293 294 if (inode->bi_subvol) { 295 inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; 296 297 u64 root_inum; 298 ret = subvol_lookup(trans, inode->bi_parent_subvol, 299 &dirent_snapshot, &root_inum); 300 if (ret) 301 return ret; 302 303 snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol); 304 } else { 305 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); 306 } 307 308 ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum); 309 if (ret) 310 return ret; 311 312 if (S_ISDIR(inode->bi_mode)) { 313 lostfound.bi_nlink++; 314 315 ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); 316 if (ret) 317 return ret; 318 } 319 320 dir_hash = bch2_hash_info_init(trans->c, &lostfound); 321 322 name = (struct qstr) QSTR(name_buf); 323 324 ret = bch2_dirent_create_snapshot(trans, 325 inode->bi_parent_subvol, lostfound.bi_inum, 326 dirent_snapshot, 327 &dir_hash, 328 inode_d_type(inode), 329 &name, 330 inode->bi_subvol ?: inode->bi_inum, 331 &dir_offset, 332 STR_HASH_must_create); 333 if (ret) 334 return ret; 335 336 inode->bi_dir = lostfound.bi_inum; 337 inode->bi_dir_offset = dir_offset; 338 339 return __bch2_fsck_write_inode(trans, inode, inode_snapshot); 340 } 341 342 static int remove_backpointer(struct btree_trans *trans, 343 struct bch_inode_unpacked *inode) 344 { 345 struct btree_iter iter; 346 struct bkey_s_c_dirent d; 347 int ret; 348 349 d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, 350 POS(inode->bi_dir, inode->bi_dir_offset), 0, 351 dirent); 352 ret = bkey_err(d) ?: 353 __remove_dirent(trans, d.k->p); 354 bch2_trans_iter_exit(trans, &iter); 355 return ret; 356 } 357 358 static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s) 359 { 360 struct bch_fs *c = trans->c; 361 362 struct bch_inode_unpacked inode; 363 int ret = bch2_inode_find_by_inum_trans(trans, 364 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 365 &inode); 366 if (ret) 367 return ret; 368 369 ret = remove_backpointer(trans, &inode); 370 bch_err_msg(c, ret, "removing dirent"); 371 if (ret) 372 return ret; 373 374 ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); 375 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 376 return ret; 377 } 378 379 static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum) 380 { 381 struct bch_fs *c = trans->c; 382 383 if (!bch2_snapshot_is_leaf(c, snapshotid)) { 384 bch_err(c, "need to reconstruct subvol, but have interior node snapshot"); 385 return -BCH_ERR_fsck_repair_unimplemented; 386 } 387 388 /* 389 * If inum isn't set, that means we're being called from check_dirents, 390 * not check_inodes - the root of this subvolume doesn't exist or we 391 * would have found it there: 392 */ 393 if (!inum) { 394 struct btree_iter inode_iter = {}; 395 struct bch_inode_unpacked new_inode; 396 u64 cpu = raw_smp_processor_id(); 397 398 bch2_inode_init_early(c, &new_inode); 399 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL); 400 401 new_inode.bi_subvol = subvolid; 402 403 int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: 404 bch2_btree_iter_traverse(&inode_iter) ?: 405 bch2_inode_write(trans, &inode_iter, &new_inode); 406 bch2_trans_iter_exit(trans, &inode_iter); 407 if (ret) 408 return ret; 409 410 inum = new_inode.bi_inum; 411 } 412 413 bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum); 414 415 struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); 416 int ret = PTR_ERR_OR_ZERO(new_subvol); 417 if (ret) 418 return ret; 419 420 bkey_subvolume_init(&new_subvol->k_i); 421 new_subvol->k.p.offset = subvolid; 422 new_subvol->v.snapshot = cpu_to_le32(snapshotid); 423 new_subvol->v.inode = cpu_to_le64(inum); 424 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0); 425 if (ret) 426 return ret; 427 428 struct btree_iter iter; 429 struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter, 430 BTREE_ID_snapshots, POS(0, snapshotid), 431 0, snapshot); 432 ret = PTR_ERR_OR_ZERO(s); 433 bch_err_msg(c, ret, "getting snapshot %u", snapshotid); 434 if (ret) 435 return ret; 436 437 u32 snapshot_tree = le32_to_cpu(s->v.tree); 438 439 s->v.subvol = cpu_to_le32(subvolid); 440 SET_BCH_SNAPSHOT_SUBVOL(&s->v, true); 441 bch2_trans_iter_exit(trans, &iter); 442 443 struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter, 444 BTREE_ID_snapshot_trees, POS(0, snapshot_tree), 445 0, snapshot_tree); 446 ret = PTR_ERR_OR_ZERO(st); 447 bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree); 448 if (ret) 449 return ret; 450 451 if (!st->v.master_subvol) 452 st->v.master_subvol = cpu_to_le32(subvolid); 453 454 bch2_trans_iter_exit(trans, &iter); 455 return 0; 456 } 457 458 static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 snapshot, u64 inum) 459 { 460 struct bch_fs *c = trans->c; 461 unsigned i_mode = S_IFREG; 462 u64 i_size = 0; 463 464 switch (btree) { 465 case BTREE_ID_extents: { 466 struct btree_iter iter = {}; 467 468 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); 469 struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter); 470 bch2_trans_iter_exit(trans, &iter); 471 int ret = bkey_err(k); 472 if (ret) 473 return ret; 474 475 i_size = k.k->p.offset << 9; 476 break; 477 } 478 case BTREE_ID_dirents: 479 i_mode = S_IFDIR; 480 break; 481 case BTREE_ID_xattrs: 482 break; 483 default: 484 BUG(); 485 } 486 487 struct bch_inode_unpacked new_inode; 488 bch2_inode_init_early(c, &new_inode); 489 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL); 490 new_inode.bi_size = i_size; 491 new_inode.bi_inum = inum; 492 493 return __bch2_fsck_write_inode(trans, &new_inode, snapshot); 494 } 495 496 struct snapshots_seen { 497 struct bpos pos; 498 snapshot_id_list ids; 499 }; 500 501 static inline void snapshots_seen_exit(struct snapshots_seen *s) 502 { 503 darray_exit(&s->ids); 504 } 505 506 static inline void snapshots_seen_init(struct snapshots_seen *s) 507 { 508 memset(s, 0, sizeof(*s)); 509 } 510 511 static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) 512 { 513 u32 *i; 514 __darray_for_each(s->ids, i) { 515 if (*i == id) 516 return 0; 517 if (*i > id) 518 break; 519 } 520 521 int ret = darray_insert_item(&s->ids, i - s->ids.data, id); 522 if (ret) 523 bch_err(c, "error reallocating snapshots_seen table (size %zu)", 524 s->ids.size); 525 return ret; 526 } 527 528 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, 529 enum btree_id btree_id, struct bpos pos) 530 { 531 if (!bkey_eq(s->pos, pos)) 532 s->ids.nr = 0; 533 s->pos = pos; 534 535 return snapshot_list_add_nodup(c, &s->ids, pos.snapshot); 536 } 537 538 /** 539 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, 540 * and @ancestor hasn't been overwritten in @seen 541 * 542 * @c: filesystem handle 543 * @seen: list of snapshot ids already seen at current position 544 * @id: descendent snapshot id 545 * @ancestor: ancestor snapshot id 546 * 547 * Returns: whether key in @ancestor snapshot is visible in @id snapshot 548 */ 549 static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, 550 u32 id, u32 ancestor) 551 { 552 ssize_t i; 553 554 EBUG_ON(id > ancestor); 555 556 /* @ancestor should be the snapshot most recently added to @seen */ 557 EBUG_ON(ancestor != seen->pos.snapshot); 558 EBUG_ON(ancestor != darray_last(seen->ids)); 559 560 if (id == ancestor) 561 return true; 562 563 if (!bch2_snapshot_is_ancestor(c, id, ancestor)) 564 return false; 565 566 /* 567 * We know that @id is a descendant of @ancestor, we're checking if 568 * we've seen a key that overwrote @ancestor - i.e. also a descendent of 569 * @ascestor and with @id as a descendent. 570 * 571 * But we already know that we're scanning IDs between @id and @ancestor 572 * numerically, since snapshot ID lists are kept sorted, so if we find 573 * an id that's an ancestor of @id we're done: 574 */ 575 576 for (i = seen->ids.nr - 2; 577 i >= 0 && seen->ids.data[i] >= id; 578 --i) 579 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i])) 580 return false; 581 582 return true; 583 } 584 585 /** 586 * ref_visible - given a key with snapshot id @src that points to a key with 587 * snapshot id @dst, test whether there is some snapshot in which @dst is 588 * visible. 589 * 590 * @c: filesystem handle 591 * @s: list of snapshot IDs already seen at @src 592 * @src: snapshot ID of src key 593 * @dst: snapshot ID of dst key 594 * Returns: true if there is some snapshot in which @dst is visible 595 * 596 * Assumes we're visiting @src keys in natural key order 597 */ 598 static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, 599 u32 src, u32 dst) 600 { 601 return dst <= src 602 ? key_visible_in_snapshot(c, s, dst, src) 603 : bch2_snapshot_is_ancestor(c, src, dst); 604 } 605 606 static int ref_visible2(struct bch_fs *c, 607 u32 src, struct snapshots_seen *src_seen, 608 u32 dst, struct snapshots_seen *dst_seen) 609 { 610 if (dst > src) { 611 swap(dst, src); 612 swap(dst_seen, src_seen); 613 } 614 return key_visible_in_snapshot(c, src_seen, dst, src); 615 } 616 617 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ 618 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ 619 (_i)->snapshot <= (_snapshot); _i++) \ 620 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) 621 622 struct inode_walker_entry { 623 struct bch_inode_unpacked inode; 624 u32 snapshot; 625 bool seen_this_pos; 626 u64 count; 627 }; 628 629 struct inode_walker { 630 bool first_this_inode; 631 bool recalculate_sums; 632 struct bpos last_pos; 633 634 DARRAY(struct inode_walker_entry) inodes; 635 }; 636 637 static void inode_walker_exit(struct inode_walker *w) 638 { 639 darray_exit(&w->inodes); 640 } 641 642 static struct inode_walker inode_walker_init(void) 643 { 644 return (struct inode_walker) { 0, }; 645 } 646 647 static int add_inode(struct bch_fs *c, struct inode_walker *w, 648 struct bkey_s_c inode) 649 { 650 struct bch_inode_unpacked u; 651 652 BUG_ON(bch2_inode_unpack(inode, &u)); 653 654 return darray_push(&w->inodes, ((struct inode_walker_entry) { 655 .inode = u, 656 .snapshot = inode.k->p.snapshot, 657 })); 658 } 659 660 static int get_inodes_all_snapshots(struct btree_trans *trans, 661 struct inode_walker *w, u64 inum) 662 { 663 struct bch_fs *c = trans->c; 664 struct btree_iter iter; 665 struct bkey_s_c k; 666 int ret; 667 668 w->recalculate_sums = false; 669 w->inodes.nr = 0; 670 671 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 672 BTREE_ITER_all_snapshots, k, ret) { 673 if (k.k->p.offset != inum) 674 break; 675 676 if (bkey_is_inode(k.k)) 677 add_inode(c, w, k); 678 } 679 bch2_trans_iter_exit(trans, &iter); 680 681 if (ret) 682 return ret; 683 684 w->first_this_inode = true; 685 return 0; 686 } 687 688 static struct inode_walker_entry * 689 lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k) 690 { 691 bool is_whiteout = k.k->type == KEY_TYPE_whiteout; 692 693 struct inode_walker_entry *i; 694 __darray_for_each(w->inodes, i) 695 if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot)) 696 goto found; 697 698 return NULL; 699 found: 700 BUG_ON(k.k->p.snapshot > i->snapshot); 701 702 if (k.k->p.snapshot != i->snapshot && !is_whiteout) { 703 struct inode_walker_entry new = *i; 704 705 new.snapshot = k.k->p.snapshot; 706 new.count = 0; 707 708 struct printbuf buf = PRINTBUF; 709 bch2_bkey_val_to_text(&buf, c, k); 710 711 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" 712 "unexpected because we should always update the inode when we update a key in that inode\n" 713 "%s", 714 w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf); 715 printbuf_exit(&buf); 716 717 while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot) 718 --i; 719 720 size_t pos = i - w->inodes.data; 721 int ret = darray_insert_item(&w->inodes, pos, new); 722 if (ret) 723 return ERR_PTR(ret); 724 725 i = w->inodes.data + pos; 726 } 727 728 return i; 729 } 730 731 static struct inode_walker_entry *walk_inode(struct btree_trans *trans, 732 struct inode_walker *w, 733 struct bkey_s_c k) 734 { 735 if (w->last_pos.inode != k.k->p.inode) { 736 int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); 737 if (ret) 738 return ERR_PTR(ret); 739 } else if (bkey_cmp(w->last_pos, k.k->p)) { 740 darray_for_each(w->inodes, i) 741 i->seen_this_pos = false; 742 } 743 744 w->last_pos = k.k->p; 745 746 return lookup_inode_for_snapshot(trans->c, w, k); 747 } 748 749 static int get_visible_inodes(struct btree_trans *trans, 750 struct inode_walker *w, 751 struct snapshots_seen *s, 752 u64 inum) 753 { 754 struct bch_fs *c = trans->c; 755 struct btree_iter iter; 756 struct bkey_s_c k; 757 int ret; 758 759 w->inodes.nr = 0; 760 761 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 762 BTREE_ITER_all_snapshots, k, ret) { 763 if (k.k->p.offset != inum) 764 break; 765 766 if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) 767 continue; 768 769 if (bkey_is_inode(k.k)) 770 add_inode(c, w, k); 771 772 if (k.k->p.snapshot >= s->pos.snapshot) 773 break; 774 } 775 bch2_trans_iter_exit(trans, &iter); 776 777 return ret; 778 } 779 780 static int hash_redo_key(struct btree_trans *trans, 781 const struct bch_hash_desc desc, 782 struct bch_hash_info *hash_info, 783 struct btree_iter *k_iter, struct bkey_s_c k) 784 { 785 struct bkey_i *delete; 786 struct bkey_i *tmp; 787 788 delete = bch2_trans_kmalloc(trans, sizeof(*delete)); 789 if (IS_ERR(delete)) 790 return PTR_ERR(delete); 791 792 tmp = bch2_bkey_make_mut_noupdate(trans, k); 793 if (IS_ERR(tmp)) 794 return PTR_ERR(tmp); 795 796 bkey_init(&delete->k); 797 delete->k.p = k_iter->pos; 798 return bch2_btree_iter_traverse(k_iter) ?: 799 bch2_trans_update(trans, k_iter, delete, 0) ?: 800 bch2_hash_set_in_snapshot(trans, desc, hash_info, 801 (subvol_inum) { 0, k.k->p.inode }, 802 k.k->p.snapshot, tmp, 803 STR_HASH_must_create| 804 BTREE_UPDATE_internal_snapshot_node) ?: 805 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 806 } 807 808 static int hash_check_key(struct btree_trans *trans, 809 const struct bch_hash_desc desc, 810 struct bch_hash_info *hash_info, 811 struct btree_iter *k_iter, struct bkey_s_c hash_k) 812 { 813 struct bch_fs *c = trans->c; 814 struct btree_iter iter = { NULL }; 815 struct printbuf buf = PRINTBUF; 816 struct bkey_s_c k; 817 u64 hash; 818 int ret = 0; 819 820 if (hash_k.k->type != desc.key_type) 821 return 0; 822 823 hash = desc.hash_bkey(hash_info, hash_k); 824 825 if (likely(hash == hash_k.k->p.offset)) 826 return 0; 827 828 if (hash_k.k->p.offset < hash) 829 goto bad_hash; 830 831 for_each_btree_key_norestart(trans, iter, desc.btree_id, 832 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), 833 BTREE_ITER_slots, k, ret) { 834 if (bkey_eq(k.k->p, hash_k.k->p)) 835 break; 836 837 if (fsck_err_on(k.k->type == desc.key_type && 838 !desc.cmp_bkey(k, hash_k), c, 839 hash_table_key_duplicate, 840 "duplicate hash table keys:\n%s", 841 (printbuf_reset(&buf), 842 bch2_bkey_val_to_text(&buf, c, hash_k), 843 buf.buf))) { 844 ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; 845 break; 846 } 847 848 if (bkey_deleted(k.k)) { 849 bch2_trans_iter_exit(trans, &iter); 850 goto bad_hash; 851 } 852 } 853 out: 854 bch2_trans_iter_exit(trans, &iter); 855 printbuf_exit(&buf); 856 return ret; 857 bad_hash: 858 if (fsck_err(c, hash_table_key_wrong_offset, 859 "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", 860 bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, 861 (printbuf_reset(&buf), 862 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { 863 ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); 864 bch_err_fn(c, ret); 865 if (ret) 866 return ret; 867 ret = -BCH_ERR_transaction_restart_nested; 868 } 869 fsck_err: 870 goto out; 871 } 872 873 static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, 874 struct btree_iter *iter, 875 struct bpos pos) 876 { 877 return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); 878 } 879 880 static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, 881 struct btree_iter *iter, 882 struct bch_inode_unpacked *inode, 883 u32 *snapshot) 884 { 885 if (inode->bi_subvol) { 886 u64 inum; 887 int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum); 888 if (ret) 889 return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) }); 890 } 891 892 return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); 893 } 894 895 static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, 896 struct bkey_s_c_dirent d) 897 { 898 return inode->bi_dir == d.k->p.inode && 899 inode->bi_dir_offset == d.k->p.offset; 900 } 901 902 static bool dirent_points_to_inode(struct bkey_s_c_dirent d, 903 struct bch_inode_unpacked *inode) 904 { 905 return d.v->d_type == DT_SUBVOL 906 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol 907 : le64_to_cpu(d.v->d_inum) == inode->bi_inum; 908 } 909 910 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) 911 { 912 struct btree_iter iter; 913 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0); 914 int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set; 915 bch2_trans_iter_exit(trans, &iter); 916 return ret; 917 } 918 919 static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k, 920 struct bch_inode_unpacked *inode, 921 u32 inode_snapshot, bool *write_inode) 922 { 923 struct bch_fs *c = trans->c; 924 struct printbuf buf = PRINTBUF; 925 926 struct btree_iter dirent_iter = {}; 927 struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); 928 int ret = bkey_err(d); 929 if (ret && !bch2_err_matches(ret, ENOENT)) 930 return ret; 931 932 if (fsck_err_on(ret, 933 c, inode_points_to_missing_dirent, 934 "inode points to missing dirent\n%s", 935 (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) || 936 fsck_err_on(!ret && !dirent_points_to_inode(d, inode), 937 c, inode_points_to_wrong_dirent, 938 "inode points to dirent that does not point back:\n%s", 939 (bch2_bkey_val_to_text(&buf, c, inode_k), 940 prt_newline(&buf), 941 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 942 /* 943 * We just clear the backpointer fields for now. If we find a 944 * dirent that points to this inode in check_dirents(), we'll 945 * update it then; then when we get to check_path() if the 946 * backpointer is still 0 we'll reattach it. 947 */ 948 inode->bi_dir = 0; 949 inode->bi_dir_offset = 0; 950 inode->bi_flags &= ~BCH_INODE_backptr_untrusted; 951 *write_inode = true; 952 } 953 954 ret = 0; 955 fsck_err: 956 bch2_trans_iter_exit(trans, &dirent_iter); 957 printbuf_exit(&buf); 958 bch_err_fn(c, ret); 959 return ret; 960 } 961 962 static int check_inode(struct btree_trans *trans, 963 struct btree_iter *iter, 964 struct bkey_s_c k, 965 struct bch_inode_unpacked *prev, 966 struct snapshots_seen *s, 967 bool full) 968 { 969 struct bch_fs *c = trans->c; 970 struct bch_inode_unpacked u; 971 bool do_update = false; 972 int ret; 973 974 ret = bch2_check_key_has_snapshot(trans, iter, k); 975 if (ret < 0) 976 goto err; 977 if (ret) 978 return 0; 979 980 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 981 if (ret) 982 goto err; 983 984 if (!bkey_is_inode(k.k)) 985 return 0; 986 987 BUG_ON(bch2_inode_unpack(k, &u)); 988 989 if (!full && 990 !(u.bi_flags & (BCH_INODE_i_size_dirty| 991 BCH_INODE_i_sectors_dirty| 992 BCH_INODE_unlinked))) 993 return 0; 994 995 if (prev->bi_inum != u.bi_inum) 996 *prev = u; 997 998 if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || 999 inode_d_type(prev) != inode_d_type(&u), 1000 c, inode_snapshot_mismatch, 1001 "inodes in different snapshots don't match")) { 1002 bch_err(c, "repair not implemented yet"); 1003 return -BCH_ERR_fsck_repair_unimplemented; 1004 } 1005 1006 if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && 1007 bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { 1008 struct bpos new_min_pos; 1009 1010 ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos); 1011 if (ret) 1012 goto err; 1013 1014 u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; 1015 1016 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1017 1018 bch_err_msg(c, ret, "in fsck updating inode"); 1019 if (ret) 1020 return ret; 1021 1022 if (!bpos_eq(new_min_pos, POS_MIN)) 1023 bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); 1024 return 0; 1025 } 1026 1027 if (u.bi_flags & BCH_INODE_unlinked) { 1028 ret = check_inode_deleted_list(trans, k.k->p); 1029 if (ret < 0) 1030 return ret; 1031 1032 fsck_err_on(!ret, c, unlinked_inode_not_on_deleted_list, 1033 "inode %llu:%u unlinked, but not on deleted list", 1034 u.bi_inum, k.k->p.snapshot); 1035 ret = 0; 1036 } 1037 1038 if (u.bi_flags & BCH_INODE_unlinked && 1039 (!c->sb.clean || 1040 fsck_err(c, inode_unlinked_but_clean, 1041 "filesystem marked clean, but inode %llu unlinked", 1042 u.bi_inum))) { 1043 ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 1044 bch_err_msg(c, ret, "in fsck deleting inode"); 1045 return ret; 1046 } 1047 1048 if (u.bi_flags & BCH_INODE_i_size_dirty && 1049 (!c->sb.clean || 1050 fsck_err(c, inode_i_size_dirty_but_clean, 1051 "filesystem marked clean, but inode %llu has i_size dirty", 1052 u.bi_inum))) { 1053 bch_verbose(c, "truncating inode %llu", u.bi_inum); 1054 1055 /* 1056 * XXX: need to truncate partial blocks too here - or ideally 1057 * just switch units to bytes and that issue goes away 1058 */ 1059 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, 1060 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9, 1061 iter->pos.snapshot), 1062 POS(u.bi_inum, U64_MAX), 1063 0, NULL); 1064 bch_err_msg(c, ret, "in fsck truncating inode"); 1065 if (ret) 1066 return ret; 1067 1068 /* 1069 * We truncated without our normal sector accounting hook, just 1070 * make sure we recalculate it: 1071 */ 1072 u.bi_flags |= BCH_INODE_i_sectors_dirty; 1073 1074 u.bi_flags &= ~BCH_INODE_i_size_dirty; 1075 do_update = true; 1076 } 1077 1078 if (u.bi_flags & BCH_INODE_i_sectors_dirty && 1079 (!c->sb.clean || 1080 fsck_err(c, inode_i_sectors_dirty_but_clean, 1081 "filesystem marked clean, but inode %llu has i_sectors dirty", 1082 u.bi_inum))) { 1083 s64 sectors; 1084 1085 bch_verbose(c, "recounting sectors for inode %llu", 1086 u.bi_inum); 1087 1088 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); 1089 if (sectors < 0) { 1090 bch_err_msg(c, sectors, "in fsck recounting inode sectors"); 1091 return sectors; 1092 } 1093 1094 u.bi_sectors = sectors; 1095 u.bi_flags &= ~BCH_INODE_i_sectors_dirty; 1096 do_update = true; 1097 } 1098 1099 if (u.bi_flags & BCH_INODE_backptr_untrusted) { 1100 u.bi_dir = 0; 1101 u.bi_dir_offset = 0; 1102 u.bi_flags &= ~BCH_INODE_backptr_untrusted; 1103 do_update = true; 1104 } 1105 1106 if (u.bi_dir || u.bi_dir_offset) { 1107 ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update); 1108 if (ret) 1109 goto err; 1110 } 1111 1112 if (fsck_err_on(u.bi_parent_subvol && 1113 (u.bi_subvol == 0 || 1114 u.bi_subvol == BCACHEFS_ROOT_SUBVOL), 1115 c, inode_bi_parent_nonzero, 1116 "inode %llu:%u has subvol %u but nonzero parent subvol %u", 1117 u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) { 1118 u.bi_parent_subvol = 0; 1119 do_update = true; 1120 } 1121 1122 if (u.bi_subvol) { 1123 struct bch_subvolume s; 1124 1125 ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s); 1126 if (ret && !bch2_err_matches(ret, ENOENT)) 1127 goto err; 1128 1129 if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1130 ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum); 1131 goto do_update; 1132 } 1133 1134 if (fsck_err_on(ret, 1135 c, inode_bi_subvol_missing, 1136 "inode %llu:%u bi_subvol points to missing subvolume %u", 1137 u.bi_inum, k.k->p.snapshot, u.bi_subvol) || 1138 fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum || 1139 !bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot), 1140 k.k->p.snapshot), 1141 c, inode_bi_subvol_wrong, 1142 "inode %llu:%u points to subvol %u, but subvol points to %llu:%u", 1143 u.bi_inum, k.k->p.snapshot, u.bi_subvol, 1144 le64_to_cpu(s.inode), 1145 le32_to_cpu(s.snapshot))) { 1146 u.bi_subvol = 0; 1147 u.bi_parent_subvol = 0; 1148 do_update = true; 1149 } 1150 } 1151 do_update: 1152 if (do_update) { 1153 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1154 bch_err_msg(c, ret, "in fsck updating inode"); 1155 if (ret) 1156 return ret; 1157 } 1158 err: 1159 fsck_err: 1160 bch_err_fn(c, ret); 1161 return ret; 1162 } 1163 1164 int bch2_check_inodes(struct bch_fs *c) 1165 { 1166 bool full = c->opts.fsck; 1167 struct bch_inode_unpacked prev = { 0 }; 1168 struct snapshots_seen s; 1169 1170 snapshots_seen_init(&s); 1171 1172 int ret = bch2_trans_run(c, 1173 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 1174 POS_MIN, 1175 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1176 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1177 check_inode(trans, &iter, k, &prev, &s, full))); 1178 1179 snapshots_seen_exit(&s); 1180 bch_err_fn(c, ret); 1181 return ret; 1182 } 1183 1184 static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode) 1185 { 1186 switch (btree) { 1187 case BTREE_ID_extents: 1188 return S_ISREG(mode) || S_ISLNK(mode); 1189 case BTREE_ID_dirents: 1190 return S_ISDIR(mode); 1191 case BTREE_ID_xattrs: 1192 return true; 1193 default: 1194 BUG(); 1195 } 1196 } 1197 1198 static int check_key_has_inode(struct btree_trans *trans, 1199 struct btree_iter *iter, 1200 struct inode_walker *inode, 1201 struct inode_walker_entry *i, 1202 struct bkey_s_c k) 1203 { 1204 struct bch_fs *c = trans->c; 1205 struct printbuf buf = PRINTBUF; 1206 int ret = PTR_ERR_OR_ZERO(i); 1207 if (ret) 1208 return ret; 1209 1210 if (k.k->type == KEY_TYPE_whiteout) 1211 goto out; 1212 1213 if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { 1214 ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?: 1215 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 1216 if (ret) 1217 goto err; 1218 1219 inode->last_pos.inode--; 1220 ret = -BCH_ERR_transaction_restart_nested; 1221 goto err; 1222 } 1223 1224 if (fsck_err_on(!i, c, key_in_missing_inode, 1225 "key in missing inode:\n %s", 1226 (printbuf_reset(&buf), 1227 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1228 goto delete; 1229 1230 if (fsck_err_on(i && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode), 1231 c, key_in_wrong_inode_type, 1232 "key for wrong inode mode %o:\n %s", 1233 i->inode.bi_mode, 1234 (printbuf_reset(&buf), 1235 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1236 goto delete; 1237 out: 1238 err: 1239 fsck_err: 1240 printbuf_exit(&buf); 1241 bch_err_fn(c, ret); 1242 return ret; 1243 delete: 1244 ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node); 1245 goto out; 1246 } 1247 1248 static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) 1249 { 1250 struct bch_fs *c = trans->c; 1251 int ret = 0; 1252 s64 count2; 1253 1254 darray_for_each(w->inodes, i) { 1255 if (i->inode.bi_sectors == i->count) 1256 continue; 1257 1258 count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); 1259 1260 if (w->recalculate_sums) 1261 i->count = count2; 1262 1263 if (i->count != count2) { 1264 bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", 1265 w->last_pos.inode, i->snapshot, i->count, count2); 1266 return -BCH_ERR_internal_fsck_err; 1267 } 1268 1269 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), 1270 c, inode_i_sectors_wrong, 1271 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", 1272 w->last_pos.inode, i->snapshot, 1273 i->inode.bi_sectors, i->count)) { 1274 i->inode.bi_sectors = i->count; 1275 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1276 if (ret) 1277 break; 1278 } 1279 } 1280 fsck_err: 1281 bch_err_fn(c, ret); 1282 return ret; 1283 } 1284 1285 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) 1286 { 1287 u32 restart_count = trans->restart_count; 1288 return check_i_sectors_notnested(trans, w) ?: 1289 trans_was_restarted(trans, restart_count); 1290 } 1291 1292 struct extent_end { 1293 u32 snapshot; 1294 u64 offset; 1295 struct snapshots_seen seen; 1296 }; 1297 1298 struct extent_ends { 1299 struct bpos last_pos; 1300 DARRAY(struct extent_end) e; 1301 }; 1302 1303 static void extent_ends_reset(struct extent_ends *extent_ends) 1304 { 1305 darray_for_each(extent_ends->e, i) 1306 snapshots_seen_exit(&i->seen); 1307 extent_ends->e.nr = 0; 1308 } 1309 1310 static void extent_ends_exit(struct extent_ends *extent_ends) 1311 { 1312 extent_ends_reset(extent_ends); 1313 darray_exit(&extent_ends->e); 1314 } 1315 1316 static void extent_ends_init(struct extent_ends *extent_ends) 1317 { 1318 memset(extent_ends, 0, sizeof(*extent_ends)); 1319 } 1320 1321 static int extent_ends_at(struct bch_fs *c, 1322 struct extent_ends *extent_ends, 1323 struct snapshots_seen *seen, 1324 struct bkey_s_c k) 1325 { 1326 struct extent_end *i, n = (struct extent_end) { 1327 .offset = k.k->p.offset, 1328 .snapshot = k.k->p.snapshot, 1329 .seen = *seen, 1330 }; 1331 1332 n.seen.ids.data = kmemdup(seen->ids.data, 1333 sizeof(seen->ids.data[0]) * seen->ids.size, 1334 GFP_KERNEL); 1335 if (!n.seen.ids.data) 1336 return -BCH_ERR_ENOMEM_fsck_extent_ends_at; 1337 1338 __darray_for_each(extent_ends->e, i) { 1339 if (i->snapshot == k.k->p.snapshot) { 1340 snapshots_seen_exit(&i->seen); 1341 *i = n; 1342 return 0; 1343 } 1344 1345 if (i->snapshot >= k.k->p.snapshot) 1346 break; 1347 } 1348 1349 return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); 1350 } 1351 1352 static int overlapping_extents_found(struct btree_trans *trans, 1353 enum btree_id btree, 1354 struct bpos pos1, struct snapshots_seen *pos1_seen, 1355 struct bkey pos2, 1356 bool *fixed, 1357 struct extent_end *extent_end) 1358 { 1359 struct bch_fs *c = trans->c; 1360 struct printbuf buf = PRINTBUF; 1361 struct btree_iter iter1, iter2 = { NULL }; 1362 struct bkey_s_c k1, k2; 1363 int ret; 1364 1365 BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); 1366 1367 bch2_trans_iter_init(trans, &iter1, btree, pos1, 1368 BTREE_ITER_all_snapshots| 1369 BTREE_ITER_not_extents); 1370 k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); 1371 ret = bkey_err(k1); 1372 if (ret) 1373 goto err; 1374 1375 prt_str(&buf, "\n "); 1376 bch2_bkey_val_to_text(&buf, c, k1); 1377 1378 if (!bpos_eq(pos1, k1.k->p)) { 1379 prt_str(&buf, "\n wanted\n "); 1380 bch2_bpos_to_text(&buf, pos1); 1381 prt_str(&buf, "\n "); 1382 bch2_bkey_to_text(&buf, &pos2); 1383 1384 bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", 1385 __func__, buf.buf); 1386 ret = -BCH_ERR_internal_fsck_err; 1387 goto err; 1388 } 1389 1390 bch2_trans_copy_iter(&iter2, &iter1); 1391 1392 while (1) { 1393 bch2_btree_iter_advance(&iter2); 1394 1395 k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); 1396 ret = bkey_err(k2); 1397 if (ret) 1398 goto err; 1399 1400 if (bpos_ge(k2.k->p, pos2.p)) 1401 break; 1402 } 1403 1404 prt_str(&buf, "\n "); 1405 bch2_bkey_val_to_text(&buf, c, k2); 1406 1407 if (bpos_gt(k2.k->p, pos2.p) || 1408 pos2.size != k2.k->size) { 1409 bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", 1410 __func__, buf.buf); 1411 ret = -BCH_ERR_internal_fsck_err; 1412 goto err; 1413 } 1414 1415 prt_printf(&buf, "\n overwriting %s extent", 1416 pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); 1417 1418 if (fsck_err(c, extent_overlapping, 1419 "overlapping extents%s", buf.buf)) { 1420 struct btree_iter *old_iter = &iter1; 1421 struct disk_reservation res = { 0 }; 1422 1423 if (pos1.snapshot < pos2.p.snapshot) { 1424 old_iter = &iter2; 1425 swap(k1, k2); 1426 } 1427 1428 trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); 1429 1430 ret = bch2_trans_update_extent_overwrite(trans, old_iter, 1431 BTREE_UPDATE_internal_snapshot_node, 1432 k1, k2) ?: 1433 bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); 1434 bch2_disk_reservation_put(c, &res); 1435 1436 if (ret) 1437 goto err; 1438 1439 *fixed = true; 1440 1441 if (pos1.snapshot == pos2.p.snapshot) { 1442 /* 1443 * We overwrote the first extent, and did the overwrite 1444 * in the same snapshot: 1445 */ 1446 extent_end->offset = bkey_start_offset(&pos2); 1447 } else if (pos1.snapshot > pos2.p.snapshot) { 1448 /* 1449 * We overwrote the first extent in pos2's snapshot: 1450 */ 1451 ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); 1452 } else { 1453 /* 1454 * We overwrote the second extent - restart 1455 * check_extent() from the top: 1456 */ 1457 ret = -BCH_ERR_transaction_restart_nested; 1458 } 1459 } 1460 fsck_err: 1461 err: 1462 bch2_trans_iter_exit(trans, &iter2); 1463 bch2_trans_iter_exit(trans, &iter1); 1464 printbuf_exit(&buf); 1465 return ret; 1466 } 1467 1468 static int check_overlapping_extents(struct btree_trans *trans, 1469 struct snapshots_seen *seen, 1470 struct extent_ends *extent_ends, 1471 struct bkey_s_c k, 1472 struct btree_iter *iter, 1473 bool *fixed) 1474 { 1475 struct bch_fs *c = trans->c; 1476 int ret = 0; 1477 1478 /* transaction restart, running again */ 1479 if (bpos_eq(extent_ends->last_pos, k.k->p)) 1480 return 0; 1481 1482 if (extent_ends->last_pos.inode != k.k->p.inode) 1483 extent_ends_reset(extent_ends); 1484 1485 darray_for_each(extent_ends->e, i) { 1486 if (i->offset <= bkey_start_offset(k.k)) 1487 continue; 1488 1489 if (!ref_visible2(c, 1490 k.k->p.snapshot, seen, 1491 i->snapshot, &i->seen)) 1492 continue; 1493 1494 ret = overlapping_extents_found(trans, iter->btree_id, 1495 SPOS(iter->pos.inode, 1496 i->offset, 1497 i->snapshot), 1498 &i->seen, 1499 *k.k, fixed, i); 1500 if (ret) 1501 goto err; 1502 } 1503 1504 extent_ends->last_pos = k.k->p; 1505 err: 1506 return ret; 1507 } 1508 1509 static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, 1510 struct bkey_s_c k) 1511 { 1512 struct bch_fs *c = trans->c; 1513 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 1514 struct bch_extent_crc_unpacked crc; 1515 const union bch_extent_entry *i; 1516 unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; 1517 1518 bkey_for_each_crc(k.k, ptrs, crc, i) 1519 if (crc_is_encoded(crc) && 1520 crc.uncompressed_size > encoded_extent_max_sectors) { 1521 struct printbuf buf = PRINTBUF; 1522 1523 bch2_bkey_val_to_text(&buf, c, k); 1524 bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); 1525 printbuf_exit(&buf); 1526 } 1527 1528 return 0; 1529 } 1530 1531 static int check_extent(struct btree_trans *trans, struct btree_iter *iter, 1532 struct bkey_s_c k, 1533 struct inode_walker *inode, 1534 struct snapshots_seen *s, 1535 struct extent_ends *extent_ends) 1536 { 1537 struct bch_fs *c = trans->c; 1538 struct inode_walker_entry *i; 1539 struct printbuf buf = PRINTBUF; 1540 int ret = 0; 1541 1542 ret = bch2_check_key_has_snapshot(trans, iter, k); 1543 if (ret) { 1544 ret = ret < 0 ? ret : 0; 1545 goto out; 1546 } 1547 1548 if (inode->last_pos.inode != k.k->p.inode) { 1549 ret = check_i_sectors(trans, inode); 1550 if (ret) 1551 goto err; 1552 } 1553 1554 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1555 if (ret) 1556 goto err; 1557 1558 i = walk_inode(trans, inode, k); 1559 ret = PTR_ERR_OR_ZERO(i); 1560 if (ret) 1561 goto err; 1562 1563 ret = check_key_has_inode(trans, iter, inode, i, k); 1564 if (ret) 1565 goto err; 1566 1567 if (k.k->type != KEY_TYPE_whiteout) { 1568 ret = check_overlapping_extents(trans, s, extent_ends, k, iter, 1569 &inode->recalculate_sums); 1570 if (ret) 1571 goto err; 1572 } 1573 1574 /* 1575 * Check inodes in reverse order, from oldest snapshots to newest, 1576 * starting from the inode that matches this extent's snapshot. If we 1577 * didn't have one, iterate over all inodes: 1578 */ 1579 if (!i) 1580 i = &darray_last(inode->inodes); 1581 1582 for (; 1583 inode->inodes.data && i >= inode->inodes.data; 1584 --i) { 1585 if (i->snapshot > k.k->p.snapshot || 1586 !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) 1587 continue; 1588 1589 if (k.k->type != KEY_TYPE_whiteout) { 1590 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && 1591 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && 1592 !bkey_extent_is_reservation(k), 1593 c, extent_past_end_of_inode, 1594 "extent type past end of inode %llu:%u, i_size %llu\n %s", 1595 i->inode.bi_inum, i->snapshot, i->inode.bi_size, 1596 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1597 struct btree_iter iter2; 1598 1599 bch2_trans_copy_iter(&iter2, iter); 1600 bch2_btree_iter_set_snapshot(&iter2, i->snapshot); 1601 ret = bch2_btree_iter_traverse(&iter2) ?: 1602 bch2_btree_delete_at(trans, &iter2, 1603 BTREE_UPDATE_internal_snapshot_node); 1604 bch2_trans_iter_exit(trans, &iter2); 1605 if (ret) 1606 goto err; 1607 1608 iter->k.type = KEY_TYPE_whiteout; 1609 } 1610 1611 if (bkey_extent_is_allocation(k.k)) 1612 i->count += k.k->size; 1613 } 1614 1615 i->seen_this_pos = true; 1616 } 1617 1618 if (k.k->type != KEY_TYPE_whiteout) { 1619 ret = extent_ends_at(c, extent_ends, s, k); 1620 if (ret) 1621 goto err; 1622 } 1623 out: 1624 err: 1625 fsck_err: 1626 printbuf_exit(&buf); 1627 bch_err_fn(c, ret); 1628 return ret; 1629 } 1630 1631 /* 1632 * Walk extents: verify that extents have a corresponding S_ISREG inode, and 1633 * that i_size an i_sectors are consistent 1634 */ 1635 int bch2_check_extents(struct bch_fs *c) 1636 { 1637 struct inode_walker w = inode_walker_init(); 1638 struct snapshots_seen s; 1639 struct extent_ends extent_ends; 1640 struct disk_reservation res = { 0 }; 1641 1642 snapshots_seen_init(&s); 1643 extent_ends_init(&extent_ends); 1644 1645 int ret = bch2_trans_run(c, 1646 for_each_btree_key_commit(trans, iter, BTREE_ID_extents, 1647 POS(BCACHEFS_ROOT_INO, 0), 1648 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1649 &res, NULL, 1650 BCH_TRANS_COMMIT_no_enospc, ({ 1651 bch2_disk_reservation_put(c, &res); 1652 check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: 1653 check_extent_overbig(trans, &iter, k); 1654 })) ?: 1655 check_i_sectors_notnested(trans, &w)); 1656 1657 bch2_disk_reservation_put(c, &res); 1658 extent_ends_exit(&extent_ends); 1659 inode_walker_exit(&w); 1660 snapshots_seen_exit(&s); 1661 1662 bch_err_fn(c, ret); 1663 return ret; 1664 } 1665 1666 int bch2_check_indirect_extents(struct bch_fs *c) 1667 { 1668 struct disk_reservation res = { 0 }; 1669 1670 int ret = bch2_trans_run(c, 1671 for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, 1672 POS_MIN, 1673 BTREE_ITER_prefetch, k, 1674 &res, NULL, 1675 BCH_TRANS_COMMIT_no_enospc, ({ 1676 bch2_disk_reservation_put(c, &res); 1677 check_extent_overbig(trans, &iter, k); 1678 }))); 1679 1680 bch2_disk_reservation_put(c, &res); 1681 bch_err_fn(c, ret); 1682 return ret; 1683 } 1684 1685 static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w) 1686 { 1687 struct bch_fs *c = trans->c; 1688 int ret = 0; 1689 s64 count2; 1690 1691 darray_for_each(w->inodes, i) { 1692 if (i->inode.bi_nlink == i->count) 1693 continue; 1694 1695 count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); 1696 if (count2 < 0) 1697 return count2; 1698 1699 if (i->count != count2) { 1700 bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu", 1701 w->last_pos.inode, i->snapshot, i->count, count2); 1702 i->count = count2; 1703 if (i->inode.bi_nlink == i->count) 1704 continue; 1705 } 1706 1707 if (fsck_err_on(i->inode.bi_nlink != i->count, 1708 c, inode_dir_wrong_nlink, 1709 "directory %llu:%u with wrong i_nlink: got %u, should be %llu", 1710 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { 1711 i->inode.bi_nlink = i->count; 1712 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1713 if (ret) 1714 break; 1715 } 1716 } 1717 fsck_err: 1718 bch_err_fn(c, ret); 1719 return ret; 1720 } 1721 1722 static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) 1723 { 1724 u32 restart_count = trans->restart_count; 1725 return check_subdir_count_notnested(trans, w) ?: 1726 trans_was_restarted(trans, restart_count); 1727 } 1728 1729 noinline_for_stack 1730 static int check_dirent_inode_dirent(struct btree_trans *trans, 1731 struct btree_iter *iter, 1732 struct bkey_s_c_dirent d, 1733 struct bch_inode_unpacked *target, 1734 u32 target_snapshot) 1735 { 1736 struct bch_fs *c = trans->c; 1737 struct printbuf buf = PRINTBUF; 1738 int ret = 0; 1739 1740 if (inode_points_to_dirent(target, d)) 1741 return 0; 1742 1743 if (bch2_inode_should_have_bp(target) && 1744 !fsck_err(c, inode_wrong_backpointer, 1745 "dirent points to inode that does not point back:\n %s", 1746 (bch2_bkey_val_to_text(&buf, c, d.s_c), 1747 prt_printf(&buf, "\n "), 1748 bch2_inode_unpacked_to_text(&buf, target), 1749 buf.buf))) 1750 goto out_noiter; 1751 1752 if (!target->bi_dir && 1753 !target->bi_dir_offset) { 1754 target->bi_dir = d.k->p.inode; 1755 target->bi_dir_offset = d.k->p.offset; 1756 return __bch2_fsck_write_inode(trans, target, target_snapshot); 1757 } 1758 1759 struct btree_iter bp_iter = { NULL }; 1760 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, 1761 SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); 1762 ret = bkey_err(bp_dirent); 1763 if (ret && !bch2_err_matches(ret, ENOENT)) 1764 goto err; 1765 1766 bool backpointer_exists = !ret; 1767 ret = 0; 1768 1769 if (fsck_err_on(!backpointer_exists, 1770 c, inode_wrong_backpointer, 1771 "inode %llu:%u has wrong backpointer:\n" 1772 "got %llu:%llu\n" 1773 "should be %llu:%llu", 1774 target->bi_inum, target_snapshot, 1775 target->bi_dir, 1776 target->bi_dir_offset, 1777 d.k->p.inode, 1778 d.k->p.offset)) { 1779 target->bi_dir = d.k->p.inode; 1780 target->bi_dir_offset = d.k->p.offset; 1781 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1782 goto out; 1783 } 1784 1785 bch2_bkey_val_to_text(&buf, c, d.s_c); 1786 prt_newline(&buf); 1787 if (backpointer_exists) 1788 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c); 1789 1790 if (fsck_err_on(backpointer_exists && 1791 (S_ISDIR(target->bi_mode) || 1792 target->bi_subvol), 1793 c, inode_dir_multiple_links, 1794 "%s %llu:%u with multiple links\n%s", 1795 S_ISDIR(target->bi_mode) ? "directory" : "subvolume", 1796 target->bi_inum, target_snapshot, buf.buf)) { 1797 ret = __remove_dirent(trans, d.k->p); 1798 goto out; 1799 } 1800 1801 /* 1802 * hardlinked file with nlink 0: 1803 * We're just adjusting nlink here so check_nlinks() will pick 1804 * it up, it ignores inodes with nlink 0 1805 */ 1806 if (fsck_err_on(backpointer_exists && !target->bi_nlink, 1807 c, inode_multiple_links_but_nlink_0, 1808 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", 1809 target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { 1810 target->bi_nlink++; 1811 target->bi_flags &= ~BCH_INODE_unlinked; 1812 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1813 if (ret) 1814 goto err; 1815 } 1816 out: 1817 err: 1818 fsck_err: 1819 bch2_trans_iter_exit(trans, &bp_iter); 1820 out_noiter: 1821 printbuf_exit(&buf); 1822 bch_err_fn(c, ret); 1823 return ret; 1824 } 1825 1826 noinline_for_stack 1827 static int check_dirent_target(struct btree_trans *trans, 1828 struct btree_iter *iter, 1829 struct bkey_s_c_dirent d, 1830 struct bch_inode_unpacked *target, 1831 u32 target_snapshot) 1832 { 1833 struct bch_fs *c = trans->c; 1834 struct bkey_i_dirent *n; 1835 struct printbuf buf = PRINTBUF; 1836 int ret = 0; 1837 1838 ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); 1839 if (ret) 1840 goto err; 1841 1842 if (fsck_err_on(d.v->d_type != inode_d_type(target), 1843 c, dirent_d_type_wrong, 1844 "incorrect d_type: got %s, should be %s:\n%s", 1845 bch2_d_type_str(d.v->d_type), 1846 bch2_d_type_str(inode_d_type(target)), 1847 (printbuf_reset(&buf), 1848 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1849 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); 1850 ret = PTR_ERR_OR_ZERO(n); 1851 if (ret) 1852 goto err; 1853 1854 bkey_reassemble(&n->k_i, d.s_c); 1855 n->v.d_type = inode_d_type(target); 1856 if (n->v.d_type == DT_SUBVOL) { 1857 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); 1858 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol); 1859 } else { 1860 n->v.d_inum = cpu_to_le64(target->bi_inum); 1861 } 1862 1863 ret = bch2_trans_update(trans, iter, &n->k_i, 0); 1864 if (ret) 1865 goto err; 1866 1867 d = dirent_i_to_s_c(n); 1868 } 1869 err: 1870 fsck_err: 1871 printbuf_exit(&buf); 1872 bch_err_fn(c, ret); 1873 return ret; 1874 } 1875 1876 /* find a subvolume that's a descendent of @snapshot: */ 1877 static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid) 1878 { 1879 struct btree_iter iter; 1880 struct bkey_s_c k; 1881 int ret; 1882 1883 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { 1884 if (k.k->type != KEY_TYPE_subvolume) 1885 continue; 1886 1887 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 1888 if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) { 1889 bch2_trans_iter_exit(trans, &iter); 1890 *subvolid = k.k->p.offset; 1891 goto found; 1892 } 1893 } 1894 if (!ret) 1895 ret = -ENOENT; 1896 found: 1897 bch2_trans_iter_exit(trans, &iter); 1898 return ret; 1899 } 1900 1901 noinline_for_stack 1902 static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter, 1903 struct bkey_s_c_dirent d) 1904 { 1905 struct bch_fs *c = trans->c; 1906 struct btree_iter subvol_iter = {}; 1907 struct bch_inode_unpacked subvol_root; 1908 u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); 1909 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); 1910 u32 parent_snapshot; 1911 u32 new_parent_subvol = 0; 1912 u64 parent_inum; 1913 struct printbuf buf = PRINTBUF; 1914 int ret = 0; 1915 1916 ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); 1917 if (ret && !bch2_err_matches(ret, ENOENT)) 1918 return ret; 1919 1920 if (ret || 1921 (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) { 1922 int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); 1923 if (ret2 && !bch2_err_matches(ret, ENOENT)) 1924 return ret2; 1925 } 1926 1927 if (ret && 1928 !new_parent_subvol && 1929 (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1930 /* 1931 * Couldn't find a subvol for dirent's snapshot - but we lost 1932 * subvols, so we need to reconstruct: 1933 */ 1934 ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0); 1935 if (ret) 1936 return ret; 1937 1938 parent_snapshot = d.k->p.snapshot; 1939 } 1940 1941 if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol, 1942 "dirent parent_subvol points to missing subvolume\n%s", 1943 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || 1944 fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot), 1945 c, dirent_not_visible_in_parent_subvol, 1946 "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", 1947 parent_snapshot, 1948 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1949 if (!new_parent_subvol) { 1950 bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot); 1951 return -BCH_ERR_fsck_repair_unimplemented; 1952 } 1953 1954 struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); 1955 ret = PTR_ERR_OR_ZERO(new_dirent); 1956 if (ret) 1957 goto err; 1958 1959 new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol); 1960 } 1961 1962 struct bkey_s_c_subvolume s = 1963 bch2_bkey_get_iter_typed(trans, &subvol_iter, 1964 BTREE_ID_subvolumes, POS(0, target_subvol), 1965 0, subvolume); 1966 ret = bkey_err(s.s_c); 1967 if (ret && !bch2_err_matches(ret, ENOENT)) 1968 return ret; 1969 1970 if (ret) { 1971 if (fsck_err(c, dirent_to_missing_subvol, 1972 "dirent points to missing subvolume\n%s", 1973 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) 1974 return __remove_dirent(trans, d.k->p); 1975 ret = 0; 1976 goto out; 1977 } 1978 1979 if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol, 1980 c, subvol_fs_path_parent_wrong, 1981 "subvol with wrong fs_path_parent, should be be %u\n%s", 1982 parent_subvol, 1983 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { 1984 struct bkey_i_subvolume *n = 1985 bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume); 1986 ret = PTR_ERR_OR_ZERO(n); 1987 if (ret) 1988 goto err; 1989 1990 n->v.fs_path_parent = cpu_to_le32(parent_subvol); 1991 } 1992 1993 u64 target_inum = le64_to_cpu(s.v->inode); 1994 u32 target_snapshot = le32_to_cpu(s.v->snapshot); 1995 1996 ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); 1997 if (ret && !bch2_err_matches(ret, ENOENT)) 1998 goto err; 1999 2000 if (ret) { 2001 bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); 2002 ret = -BCH_ERR_fsck_repair_unimplemented; 2003 ret = 0; 2004 goto err; 2005 } 2006 2007 if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol, 2008 c, inode_bi_parent_wrong, 2009 "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", 2010 target_inum, 2011 subvol_root.bi_parent_subvol, parent_subvol)) { 2012 subvol_root.bi_parent_subvol = parent_subvol; 2013 ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); 2014 if (ret) 2015 goto err; 2016 } 2017 2018 ret = check_dirent_target(trans, iter, d, &subvol_root, 2019 target_snapshot); 2020 if (ret) 2021 goto err; 2022 out: 2023 err: 2024 fsck_err: 2025 bch2_trans_iter_exit(trans, &subvol_iter); 2026 printbuf_exit(&buf); 2027 return ret; 2028 } 2029 2030 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, 2031 struct bkey_s_c k, 2032 struct bch_hash_info *hash_info, 2033 struct inode_walker *dir, 2034 struct inode_walker *target, 2035 struct snapshots_seen *s) 2036 { 2037 struct bch_fs *c = trans->c; 2038 struct inode_walker_entry *i; 2039 struct printbuf buf = PRINTBUF; 2040 int ret = 0; 2041 2042 ret = bch2_check_key_has_snapshot(trans, iter, k); 2043 if (ret) { 2044 ret = ret < 0 ? ret : 0; 2045 goto out; 2046 } 2047 2048 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 2049 if (ret) 2050 goto err; 2051 2052 if (k.k->type == KEY_TYPE_whiteout) 2053 goto out; 2054 2055 if (dir->last_pos.inode != k.k->p.inode) { 2056 ret = check_subdir_count(trans, dir); 2057 if (ret) 2058 goto err; 2059 } 2060 2061 i = walk_inode(trans, dir, k); 2062 ret = PTR_ERR_OR_ZERO(i); 2063 if (ret < 0) 2064 goto err; 2065 2066 ret = check_key_has_inode(trans, iter, dir, i, k); 2067 if (ret) 2068 goto err; 2069 2070 if (!i) 2071 goto out; 2072 2073 if (dir->first_this_inode) 2074 *hash_info = bch2_hash_info_init(c, &i->inode); 2075 dir->first_this_inode = false; 2076 2077 ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); 2078 if (ret < 0) 2079 goto err; 2080 if (ret) { 2081 /* dirent has been deleted */ 2082 ret = 0; 2083 goto out; 2084 } 2085 2086 if (k.k->type != KEY_TYPE_dirent) 2087 goto out; 2088 2089 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2090 2091 if (d.v->d_type == DT_SUBVOL) { 2092 ret = check_dirent_to_subvol(trans, iter, d); 2093 if (ret) 2094 goto err; 2095 } else { 2096 ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); 2097 if (ret) 2098 goto err; 2099 2100 if (fsck_err_on(!target->inodes.nr, 2101 c, dirent_to_missing_inode, 2102 "dirent points to missing inode:\n%s", 2103 (printbuf_reset(&buf), 2104 bch2_bkey_val_to_text(&buf, c, k), 2105 buf.buf))) { 2106 ret = __remove_dirent(trans, d.k->p); 2107 if (ret) 2108 goto err; 2109 } 2110 2111 darray_for_each(target->inodes, i) { 2112 ret = check_dirent_target(trans, iter, d, 2113 &i->inode, i->snapshot); 2114 if (ret) 2115 goto err; 2116 } 2117 2118 if (d.v->d_type == DT_DIR) 2119 for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) 2120 i->count++; 2121 } 2122 out: 2123 err: 2124 fsck_err: 2125 printbuf_exit(&buf); 2126 bch_err_fn(c, ret); 2127 return ret; 2128 } 2129 2130 /* 2131 * Walk dirents: verify that they all have a corresponding S_ISDIR inode, 2132 * validate d_type 2133 */ 2134 int bch2_check_dirents(struct bch_fs *c) 2135 { 2136 struct inode_walker dir = inode_walker_init(); 2137 struct inode_walker target = inode_walker_init(); 2138 struct snapshots_seen s; 2139 struct bch_hash_info hash_info; 2140 2141 snapshots_seen_init(&s); 2142 2143 int ret = bch2_trans_run(c, 2144 for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, 2145 POS(BCACHEFS_ROOT_INO, 0), 2146 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2147 k, 2148 NULL, NULL, 2149 BCH_TRANS_COMMIT_no_enospc, 2150 check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: 2151 check_subdir_count_notnested(trans, &dir)); 2152 2153 snapshots_seen_exit(&s); 2154 inode_walker_exit(&dir); 2155 inode_walker_exit(&target); 2156 bch_err_fn(c, ret); 2157 return ret; 2158 } 2159 2160 static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, 2161 struct bkey_s_c k, 2162 struct bch_hash_info *hash_info, 2163 struct inode_walker *inode) 2164 { 2165 struct bch_fs *c = trans->c; 2166 struct inode_walker_entry *i; 2167 int ret; 2168 2169 ret = bch2_check_key_has_snapshot(trans, iter, k); 2170 if (ret < 0) 2171 return ret; 2172 if (ret) 2173 return 0; 2174 2175 i = walk_inode(trans, inode, k); 2176 ret = PTR_ERR_OR_ZERO(i); 2177 if (ret) 2178 return ret; 2179 2180 ret = check_key_has_inode(trans, iter, inode, i, k); 2181 if (ret) 2182 return ret; 2183 2184 if (!i) 2185 return 0; 2186 2187 if (inode->first_this_inode) 2188 *hash_info = bch2_hash_info_init(c, &i->inode); 2189 inode->first_this_inode = false; 2190 2191 ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); 2192 bch_err_fn(c, ret); 2193 return ret; 2194 } 2195 2196 /* 2197 * Walk xattrs: verify that they all have a corresponding inode 2198 */ 2199 int bch2_check_xattrs(struct bch_fs *c) 2200 { 2201 struct inode_walker inode = inode_walker_init(); 2202 struct bch_hash_info hash_info; 2203 int ret = 0; 2204 2205 ret = bch2_trans_run(c, 2206 for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, 2207 POS(BCACHEFS_ROOT_INO, 0), 2208 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2209 k, 2210 NULL, NULL, 2211 BCH_TRANS_COMMIT_no_enospc, 2212 check_xattr(trans, &iter, k, &hash_info, &inode))); 2213 bch_err_fn(c, ret); 2214 return ret; 2215 } 2216 2217 static int check_root_trans(struct btree_trans *trans) 2218 { 2219 struct bch_fs *c = trans->c; 2220 struct bch_inode_unpacked root_inode; 2221 u32 snapshot; 2222 u64 inum; 2223 int ret; 2224 2225 ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); 2226 if (ret && !bch2_err_matches(ret, ENOENT)) 2227 return ret; 2228 2229 if (mustfix_fsck_err_on(ret, c, root_subvol_missing, 2230 "root subvol missing")) { 2231 struct bkey_i_subvolume *root_subvol = 2232 bch2_trans_kmalloc(trans, sizeof(*root_subvol)); 2233 ret = PTR_ERR_OR_ZERO(root_subvol); 2234 if (ret) 2235 goto err; 2236 2237 snapshot = U32_MAX; 2238 inum = BCACHEFS_ROOT_INO; 2239 2240 bkey_subvolume_init(&root_subvol->k_i); 2241 root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL; 2242 root_subvol->v.flags = 0; 2243 root_subvol->v.snapshot = cpu_to_le32(snapshot); 2244 root_subvol->v.inode = cpu_to_le64(inum); 2245 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0); 2246 bch_err_msg(c, ret, "writing root subvol"); 2247 if (ret) 2248 goto err; 2249 } 2250 2251 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); 2252 if (ret && !bch2_err_matches(ret, ENOENT)) 2253 return ret; 2254 2255 if (mustfix_fsck_err_on(ret, c, root_dir_missing, 2256 "root directory missing") || 2257 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), 2258 c, root_inode_not_dir, 2259 "root inode not a directory")) { 2260 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 2261 0, NULL); 2262 root_inode.bi_inum = inum; 2263 2264 ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); 2265 bch_err_msg(c, ret, "writing root inode"); 2266 } 2267 err: 2268 fsck_err: 2269 return ret; 2270 } 2271 2272 /* Get root directory, create if it doesn't exist: */ 2273 int bch2_check_root(struct bch_fs *c) 2274 { 2275 int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2276 check_root_trans(trans)); 2277 bch_err_fn(c, ret); 2278 return ret; 2279 } 2280 2281 typedef DARRAY(u32) darray_u32; 2282 2283 static bool darray_u32_has(darray_u32 *d, u32 v) 2284 { 2285 darray_for_each(*d, i) 2286 if (*i == v) 2287 return true; 2288 return false; 2289 } 2290 2291 /* 2292 * We've checked that inode backpointers point to valid dirents; here, it's 2293 * sufficient to check that the subvolume root has a dirent: 2294 */ 2295 static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s) 2296 { 2297 struct bch_inode_unpacked inode; 2298 int ret = bch2_inode_find_by_inum_trans(trans, 2299 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 2300 &inode); 2301 if (ret) 2302 return ret; 2303 2304 return inode.bi_dir != 0; 2305 } 2306 2307 static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) 2308 { 2309 struct bch_fs *c = trans->c; 2310 struct btree_iter parent_iter = {}; 2311 darray_u32 subvol_path = {}; 2312 struct printbuf buf = PRINTBUF; 2313 int ret = 0; 2314 2315 if (k.k->type != KEY_TYPE_subvolume) 2316 return 0; 2317 2318 while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { 2319 ret = darray_push(&subvol_path, k.k->p.offset); 2320 if (ret) 2321 goto err; 2322 2323 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 2324 2325 ret = subvol_has_dirent(trans, s); 2326 if (ret < 0) 2327 break; 2328 2329 if (fsck_err_on(!ret, 2330 c, subvol_unreachable, 2331 "unreachable subvolume %s", 2332 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2333 buf.buf))) { 2334 ret = reattach_subvol(trans, s); 2335 break; 2336 } 2337 2338 u32 parent = le32_to_cpu(s.v->fs_path_parent); 2339 2340 if (darray_u32_has(&subvol_path, parent)) { 2341 if (fsck_err(c, subvol_loop, "subvolume loop")) 2342 ret = reattach_subvol(trans, s); 2343 break; 2344 } 2345 2346 bch2_trans_iter_exit(trans, &parent_iter); 2347 bch2_trans_iter_init(trans, &parent_iter, 2348 BTREE_ID_subvolumes, POS(0, parent), 0); 2349 k = bch2_btree_iter_peek_slot(&parent_iter); 2350 ret = bkey_err(k); 2351 if (ret) 2352 goto err; 2353 2354 if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, 2355 c, subvol_unreachable, 2356 "unreachable subvolume %s", 2357 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2358 buf.buf))) { 2359 ret = reattach_subvol(trans, s); 2360 break; 2361 } 2362 } 2363 fsck_err: 2364 err: 2365 printbuf_exit(&buf); 2366 darray_exit(&subvol_path); 2367 bch2_trans_iter_exit(trans, &parent_iter); 2368 return ret; 2369 } 2370 2371 int bch2_check_subvolume_structure(struct bch_fs *c) 2372 { 2373 int ret = bch2_trans_run(c, 2374 for_each_btree_key_commit(trans, iter, 2375 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, 2376 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2377 check_subvol_path(trans, &iter, k))); 2378 bch_err_fn(c, ret); 2379 return ret; 2380 } 2381 2382 struct pathbuf_entry { 2383 u64 inum; 2384 u32 snapshot; 2385 }; 2386 2387 typedef DARRAY(struct pathbuf_entry) pathbuf; 2388 2389 static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) 2390 { 2391 darray_for_each(*p, i) 2392 if (i->inum == inum && 2393 i->snapshot == snapshot) 2394 return true; 2395 return false; 2396 } 2397 2398 /* 2399 * Check that a given inode is reachable from its subvolume root - we already 2400 * verified subvolume connectivity: 2401 * 2402 * XXX: we should also be verifying that inodes are in the right subvolumes 2403 */ 2404 static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) 2405 { 2406 struct bch_fs *c = trans->c; 2407 struct btree_iter inode_iter = {}; 2408 struct bch_inode_unpacked inode; 2409 struct printbuf buf = PRINTBUF; 2410 u32 snapshot = inode_k.k->p.snapshot; 2411 int ret = 0; 2412 2413 p->nr = 0; 2414 2415 BUG_ON(bch2_inode_unpack(inode_k, &inode)); 2416 2417 while (!inode.bi_subvol) { 2418 struct btree_iter dirent_iter; 2419 struct bkey_s_c_dirent d; 2420 u32 parent_snapshot = snapshot; 2421 2422 d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); 2423 ret = bkey_err(d.s_c); 2424 if (ret && !bch2_err_matches(ret, ENOENT)) 2425 break; 2426 2427 if (!ret && !dirent_points_to_inode(d, &inode)) { 2428 bch2_trans_iter_exit(trans, &dirent_iter); 2429 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 2430 } 2431 2432 if (bch2_err_matches(ret, ENOENT)) { 2433 ret = 0; 2434 if (fsck_err(c, inode_unreachable, 2435 "unreachable inode\n%s", 2436 (printbuf_reset(&buf), 2437 bch2_bkey_val_to_text(&buf, c, inode_k), 2438 buf.buf))) 2439 ret = reattach_inode(trans, &inode, snapshot); 2440 goto out; 2441 } 2442 2443 bch2_trans_iter_exit(trans, &dirent_iter); 2444 2445 if (!S_ISDIR(inode.bi_mode)) 2446 break; 2447 2448 ret = darray_push(p, ((struct pathbuf_entry) { 2449 .inum = inode.bi_inum, 2450 .snapshot = snapshot, 2451 })); 2452 if (ret) 2453 return ret; 2454 2455 snapshot = parent_snapshot; 2456 2457 bch2_trans_iter_exit(trans, &inode_iter); 2458 inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, 2459 SPOS(0, inode.bi_dir, snapshot), 0); 2460 ret = bkey_err(inode_k) ?: 2461 !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode 2462 : bch2_inode_unpack(inode_k, &inode); 2463 if (ret) { 2464 /* Should have been caught in dirents pass */ 2465 if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2466 bch_err(c, "error looking up parent directory: %i", ret); 2467 break; 2468 } 2469 2470 snapshot = inode_k.k->p.snapshot; 2471 2472 if (path_is_dup(p, inode.bi_inum, snapshot)) { 2473 /* XXX print path */ 2474 bch_err(c, "directory structure loop"); 2475 2476 darray_for_each(*p, i) 2477 pr_err("%llu:%u", i->inum, i->snapshot); 2478 pr_err("%llu:%u", inode.bi_inum, snapshot); 2479 2480 if (fsck_err(c, dir_loop, "directory structure loop")) { 2481 ret = remove_backpointer(trans, &inode); 2482 bch_err_msg(c, ret, "removing dirent"); 2483 if (ret) 2484 break; 2485 2486 ret = reattach_inode(trans, &inode, snapshot); 2487 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 2488 } 2489 break; 2490 } 2491 } 2492 out: 2493 fsck_err: 2494 bch2_trans_iter_exit(trans, &inode_iter); 2495 printbuf_exit(&buf); 2496 bch_err_fn(c, ret); 2497 return ret; 2498 } 2499 2500 /* 2501 * Check for unreachable inodes, as well as loops in the directory structure: 2502 * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's 2503 * unreachable: 2504 */ 2505 int bch2_check_directory_structure(struct bch_fs *c) 2506 { 2507 pathbuf path = { 0, }; 2508 int ret; 2509 2510 ret = bch2_trans_run(c, 2511 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, 2512 BTREE_ITER_intent| 2513 BTREE_ITER_prefetch| 2514 BTREE_ITER_all_snapshots, k, 2515 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 2516 if (!bkey_is_inode(k.k)) 2517 continue; 2518 2519 if (bch2_inode_flags(k) & BCH_INODE_unlinked) 2520 continue; 2521 2522 check_path(trans, &path, k); 2523 }))); 2524 darray_exit(&path); 2525 2526 bch_err_fn(c, ret); 2527 return ret; 2528 } 2529 2530 struct nlink_table { 2531 size_t nr; 2532 size_t size; 2533 2534 struct nlink { 2535 u64 inum; 2536 u32 snapshot; 2537 u32 count; 2538 } *d; 2539 }; 2540 2541 static int add_nlink(struct bch_fs *c, struct nlink_table *t, 2542 u64 inum, u32 snapshot) 2543 { 2544 if (t->nr == t->size) { 2545 size_t new_size = max_t(size_t, 128UL, t->size * 2); 2546 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); 2547 2548 if (!d) { 2549 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", 2550 new_size); 2551 return -BCH_ERR_ENOMEM_fsck_add_nlink; 2552 } 2553 2554 if (t->d) 2555 memcpy(d, t->d, t->size * sizeof(t->d[0])); 2556 kvfree(t->d); 2557 2558 t->d = d; 2559 t->size = new_size; 2560 } 2561 2562 2563 t->d[t->nr++] = (struct nlink) { 2564 .inum = inum, 2565 .snapshot = snapshot, 2566 }; 2567 2568 return 0; 2569 } 2570 2571 static int nlink_cmp(const void *_l, const void *_r) 2572 { 2573 const struct nlink *l = _l; 2574 const struct nlink *r = _r; 2575 2576 return cmp_int(l->inum, r->inum); 2577 } 2578 2579 static void inc_link(struct bch_fs *c, struct snapshots_seen *s, 2580 struct nlink_table *links, 2581 u64 range_start, u64 range_end, u64 inum, u32 snapshot) 2582 { 2583 struct nlink *link, key = { 2584 .inum = inum, .snapshot = U32_MAX, 2585 }; 2586 2587 if (inum < range_start || inum >= range_end) 2588 return; 2589 2590 link = __inline_bsearch(&key, links->d, links->nr, 2591 sizeof(links->d[0]), nlink_cmp); 2592 if (!link) 2593 return; 2594 2595 while (link > links->d && link[0].inum == link[-1].inum) 2596 --link; 2597 2598 for (; link < links->d + links->nr && link->inum == inum; link++) 2599 if (ref_visible(c, s, snapshot, link->snapshot)) { 2600 link->count++; 2601 if (link->snapshot >= snapshot) 2602 break; 2603 } 2604 } 2605 2606 noinline_for_stack 2607 static int check_nlinks_find_hardlinks(struct bch_fs *c, 2608 struct nlink_table *t, 2609 u64 start, u64 *end) 2610 { 2611 int ret = bch2_trans_run(c, 2612 for_each_btree_key(trans, iter, BTREE_ID_inodes, 2613 POS(0, start), 2614 BTREE_ITER_intent| 2615 BTREE_ITER_prefetch| 2616 BTREE_ITER_all_snapshots, k, ({ 2617 if (!bkey_is_inode(k.k)) 2618 continue; 2619 2620 /* Should never fail, checked by bch2_inode_invalid: */ 2621 struct bch_inode_unpacked u; 2622 BUG_ON(bch2_inode_unpack(k, &u)); 2623 2624 /* 2625 * Backpointer and directory structure checks are sufficient for 2626 * directories, since they can't have hardlinks: 2627 */ 2628 if (S_ISDIR(u.bi_mode)) 2629 continue; 2630 2631 if (!u.bi_nlink) 2632 continue; 2633 2634 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); 2635 if (ret) { 2636 *end = k.k->p.offset; 2637 ret = 0; 2638 break; 2639 } 2640 0; 2641 }))); 2642 2643 bch_err_fn(c, ret); 2644 return ret; 2645 } 2646 2647 noinline_for_stack 2648 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, 2649 u64 range_start, u64 range_end) 2650 { 2651 struct snapshots_seen s; 2652 2653 snapshots_seen_init(&s); 2654 2655 int ret = bch2_trans_run(c, 2656 for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, 2657 BTREE_ITER_intent| 2658 BTREE_ITER_prefetch| 2659 BTREE_ITER_all_snapshots, k, ({ 2660 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); 2661 if (ret) 2662 break; 2663 2664 if (k.k->type == KEY_TYPE_dirent) { 2665 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2666 2667 if (d.v->d_type != DT_DIR && 2668 d.v->d_type != DT_SUBVOL) 2669 inc_link(c, &s, links, range_start, range_end, 2670 le64_to_cpu(d.v->d_inum), d.k->p.snapshot); 2671 } 2672 0; 2673 }))); 2674 2675 snapshots_seen_exit(&s); 2676 2677 bch_err_fn(c, ret); 2678 return ret; 2679 } 2680 2681 static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, 2682 struct bkey_s_c k, 2683 struct nlink_table *links, 2684 size_t *idx, u64 range_end) 2685 { 2686 struct bch_fs *c = trans->c; 2687 struct bch_inode_unpacked u; 2688 struct nlink *link = &links->d[*idx]; 2689 int ret = 0; 2690 2691 if (k.k->p.offset >= range_end) 2692 return 1; 2693 2694 if (!bkey_is_inode(k.k)) 2695 return 0; 2696 2697 BUG_ON(bch2_inode_unpack(k, &u)); 2698 2699 if (S_ISDIR(u.bi_mode)) 2700 return 0; 2701 2702 if (!u.bi_nlink) 2703 return 0; 2704 2705 while ((cmp_int(link->inum, k.k->p.offset) ?: 2706 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { 2707 BUG_ON(*idx == links->nr); 2708 link = &links->d[++*idx]; 2709 } 2710 2711 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, 2712 c, inode_wrong_nlink, 2713 "inode %llu type %s has wrong i_nlink (%u, should be %u)", 2714 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], 2715 bch2_inode_nlink_get(&u), link->count)) { 2716 bch2_inode_nlink_set(&u, link->count); 2717 ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); 2718 } 2719 fsck_err: 2720 return ret; 2721 } 2722 2723 noinline_for_stack 2724 static int check_nlinks_update_hardlinks(struct bch_fs *c, 2725 struct nlink_table *links, 2726 u64 range_start, u64 range_end) 2727 { 2728 size_t idx = 0; 2729 2730 int ret = bch2_trans_run(c, 2731 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 2732 POS(0, range_start), 2733 BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 2734 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2735 check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); 2736 if (ret < 0) { 2737 bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); 2738 return ret; 2739 } 2740 2741 return 0; 2742 } 2743 2744 int bch2_check_nlinks(struct bch_fs *c) 2745 { 2746 struct nlink_table links = { 0 }; 2747 u64 this_iter_range_start, next_iter_range_start = 0; 2748 int ret = 0; 2749 2750 do { 2751 this_iter_range_start = next_iter_range_start; 2752 next_iter_range_start = U64_MAX; 2753 2754 ret = check_nlinks_find_hardlinks(c, &links, 2755 this_iter_range_start, 2756 &next_iter_range_start); 2757 2758 ret = check_nlinks_walk_dirents(c, &links, 2759 this_iter_range_start, 2760 next_iter_range_start); 2761 if (ret) 2762 break; 2763 2764 ret = check_nlinks_update_hardlinks(c, &links, 2765 this_iter_range_start, 2766 next_iter_range_start); 2767 if (ret) 2768 break; 2769 2770 links.nr = 0; 2771 } while (next_iter_range_start != U64_MAX); 2772 2773 kvfree(links.d); 2774 bch_err_fn(c, ret); 2775 return ret; 2776 } 2777 2778 static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, 2779 struct bkey_s_c k) 2780 { 2781 struct bkey_s_c_reflink_p p; 2782 struct bkey_i_reflink_p *u; 2783 2784 if (k.k->type != KEY_TYPE_reflink_p) 2785 return 0; 2786 2787 p = bkey_s_c_to_reflink_p(k); 2788 2789 if (!p.v->front_pad && !p.v->back_pad) 2790 return 0; 2791 2792 u = bch2_trans_kmalloc(trans, sizeof(*u)); 2793 int ret = PTR_ERR_OR_ZERO(u); 2794 if (ret) 2795 return ret; 2796 2797 bkey_reassemble(&u->k_i, k); 2798 u->v.front_pad = 0; 2799 u->v.back_pad = 0; 2800 2801 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_norun); 2802 } 2803 2804 int bch2_fix_reflink_p(struct bch_fs *c) 2805 { 2806 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) 2807 return 0; 2808 2809 int ret = bch2_trans_run(c, 2810 for_each_btree_key_commit(trans, iter, 2811 BTREE_ID_extents, POS_MIN, 2812 BTREE_ITER_intent|BTREE_ITER_prefetch| 2813 BTREE_ITER_all_snapshots, k, 2814 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2815 fix_reflink_p_key(trans, &iter, k))); 2816 bch_err_fn(c, ret); 2817 return ret; 2818 } 2819