1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bkey_buf.h" 5 #include "btree_cache.h" 6 #include "btree_update.h" 7 #include "buckets.h" 8 #include "darray.h" 9 #include "dirent.h" 10 #include "error.h" 11 #include "fs.h" 12 #include "fs-common.h" 13 #include "fsck.h" 14 #include "inode.h" 15 #include "keylist.h" 16 #include "recovery_passes.h" 17 #include "snapshot.h" 18 #include "super.h" 19 #include "xattr.h" 20 21 #include <linux/bsearch.h> 22 #include <linux/dcache.h> /* struct qstr */ 23 24 /* 25 * XXX: this is handling transaction restarts without returning 26 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: 27 */ 28 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, 29 u32 snapshot) 30 { 31 u64 sectors = 0; 32 33 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, 34 SPOS(inum, 0, snapshot), 35 POS(inum, U64_MAX), 36 0, k, ({ 37 if (bkey_extent_is_allocation(k.k)) 38 sectors += k.k->size; 39 0; 40 })); 41 42 return ret ?: sectors; 43 } 44 45 static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, 46 u32 snapshot) 47 { 48 u64 subdirs = 0; 49 50 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, 51 SPOS(inum, 0, snapshot), 52 POS(inum, U64_MAX), 53 0, k, ({ 54 if (k.k->type == KEY_TYPE_dirent && 55 bkey_s_c_to_dirent(k).v->d_type == DT_DIR) 56 subdirs++; 57 0; 58 })); 59 60 return ret ?: subdirs; 61 } 62 63 static int subvol_lookup(struct btree_trans *trans, u32 subvol, 64 u32 *snapshot, u64 *inum) 65 { 66 struct bch_subvolume s; 67 int ret = bch2_subvolume_get(trans, subvol, false, 0, &s); 68 69 *snapshot = le32_to_cpu(s.snapshot); 70 *inum = le64_to_cpu(s.inode); 71 return ret; 72 } 73 74 static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, 75 struct bch_inode_unpacked *inode) 76 { 77 struct btree_iter iter; 78 struct bkey_s_c k; 79 int ret; 80 81 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr), 82 BTREE_ITER_all_snapshots, k, ret) { 83 if (k.k->p.offset != inode_nr) 84 break; 85 if (!bkey_is_inode(k.k)) 86 continue; 87 ret = bch2_inode_unpack(k, inode); 88 goto found; 89 } 90 ret = -BCH_ERR_ENOENT_inode; 91 found: 92 bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); 93 bch2_trans_iter_exit(trans, &iter); 94 return ret; 95 } 96 97 static int lookup_inode(struct btree_trans *trans, u64 inode_nr, 98 struct bch_inode_unpacked *inode, 99 u32 *snapshot) 100 { 101 struct btree_iter iter; 102 struct bkey_s_c k; 103 int ret; 104 105 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 106 SPOS(0, inode_nr, *snapshot), 0); 107 ret = bkey_err(k); 108 if (ret) 109 goto err; 110 111 ret = bkey_is_inode(k.k) 112 ? bch2_inode_unpack(k, inode) 113 : -BCH_ERR_ENOENT_inode; 114 if (!ret) 115 *snapshot = iter.pos.snapshot; 116 err: 117 bch2_trans_iter_exit(trans, &iter); 118 return ret; 119 } 120 121 static int lookup_dirent_in_snapshot(struct btree_trans *trans, 122 struct bch_hash_info hash_info, 123 subvol_inum dir, struct qstr *name, 124 u64 *target, unsigned *type, u32 snapshot) 125 { 126 struct btree_iter iter; 127 struct bkey_s_c k = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, 128 &hash_info, dir, name, 0, snapshot); 129 int ret = bkey_err(k); 130 if (ret) 131 return ret; 132 133 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); 134 *target = le64_to_cpu(d.v->d_inum); 135 *type = d.v->d_type; 136 bch2_trans_iter_exit(trans, &iter); 137 return 0; 138 } 139 140 static int __remove_dirent(struct btree_trans *trans, struct bpos pos) 141 { 142 struct bch_fs *c = trans->c; 143 struct btree_iter iter; 144 struct bch_inode_unpacked dir_inode; 145 struct bch_hash_info dir_hash_info; 146 int ret; 147 148 ret = lookup_first_inode(trans, pos.inode, &dir_inode); 149 if (ret) 150 goto err; 151 152 dir_hash_info = bch2_hash_info_init(c, &dir_inode); 153 154 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent); 155 156 ret = bch2_btree_iter_traverse(&iter) ?: 157 bch2_hash_delete_at(trans, bch2_dirent_hash_desc, 158 &dir_hash_info, &iter, 159 BTREE_UPDATE_internal_snapshot_node); 160 bch2_trans_iter_exit(trans, &iter); 161 err: 162 bch_err_fn(c, ret); 163 return ret; 164 } 165 166 /* Get lost+found, create if it doesn't exist: */ 167 static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, 168 struct bch_inode_unpacked *lostfound, 169 u64 reattaching_inum) 170 { 171 struct bch_fs *c = trans->c; 172 struct qstr lostfound_str = QSTR("lost+found"); 173 u64 inum = 0; 174 unsigned d_type = 0; 175 int ret; 176 177 struct bch_snapshot_tree st; 178 ret = bch2_snapshot_tree_lookup(trans, 179 bch2_snapshot_tree(c, snapshot), &st); 180 if (ret) 181 return ret; 182 183 subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; 184 185 struct bch_subvolume subvol; 186 ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), 187 false, 0, &subvol); 188 bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u", 189 le32_to_cpu(st.master_subvol), snapshot); 190 if (ret) 191 return ret; 192 193 if (!subvol.inode) { 194 struct btree_iter iter; 195 struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, 196 BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)), 197 0, subvolume); 198 ret = PTR_ERR_OR_ZERO(subvol); 199 if (ret) 200 return ret; 201 202 subvol->v.inode = cpu_to_le64(reattaching_inum); 203 bch2_trans_iter_exit(trans, &iter); 204 } 205 206 root_inum.inum = le64_to_cpu(subvol.inode); 207 208 struct bch_inode_unpacked root_inode; 209 struct bch_hash_info root_hash_info; 210 u32 root_inode_snapshot = snapshot; 211 ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); 212 bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", 213 root_inum.inum, le32_to_cpu(st.master_subvol)); 214 if (ret) 215 return ret; 216 217 root_hash_info = bch2_hash_info_init(c, &root_inode); 218 219 ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum, 220 &lostfound_str, &inum, &d_type, snapshot); 221 if (bch2_err_matches(ret, ENOENT)) 222 goto create_lostfound; 223 224 bch_err_fn(c, ret); 225 if (ret) 226 return ret; 227 228 if (d_type != DT_DIR) { 229 bch_err(c, "error looking up lost+found: not a directory"); 230 return -BCH_ERR_ENOENT_not_directory; 231 } 232 233 /* 234 * The bch2_check_dirents pass has already run, dangling dirents 235 * shouldn't exist here: 236 */ 237 ret = lookup_inode(trans, inum, lostfound, &snapshot); 238 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", 239 inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); 240 return ret; 241 242 create_lostfound: 243 /* 244 * XXX: we could have a nicer log message here if we had a nice way to 245 * walk backpointers to print a path 246 */ 247 bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); 248 249 u64 now = bch2_current_time(c); 250 struct btree_iter lostfound_iter = { NULL }; 251 u64 cpu = raw_smp_processor_id(); 252 253 bch2_inode_init_early(c, lostfound); 254 bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); 255 lostfound->bi_dir = root_inode.bi_inum; 256 257 root_inode.bi_nlink++; 258 259 ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); 260 if (ret) 261 goto err; 262 263 bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); 264 ret = bch2_btree_iter_traverse(&lostfound_iter); 265 if (ret) 266 goto err; 267 268 ret = bch2_dirent_create_snapshot(trans, 269 0, root_inode.bi_inum, snapshot, &root_hash_info, 270 mode_to_type(lostfound->bi_mode), 271 &lostfound_str, 272 lostfound->bi_inum, 273 &lostfound->bi_dir_offset, 274 STR_HASH_must_create) ?: 275 bch2_inode_write_flags(trans, &lostfound_iter, lostfound, 276 BTREE_UPDATE_internal_snapshot_node); 277 err: 278 bch_err_msg(c, ret, "creating lost+found"); 279 bch2_trans_iter_exit(trans, &lostfound_iter); 280 return ret; 281 } 282 283 static int reattach_inode(struct btree_trans *trans, 284 struct bch_inode_unpacked *inode, 285 u32 inode_snapshot) 286 { 287 struct bch_fs *c = trans->c; 288 struct bch_hash_info dir_hash; 289 struct bch_inode_unpacked lostfound; 290 char name_buf[20]; 291 struct qstr name; 292 u64 dir_offset = 0; 293 u32 dirent_snapshot = inode_snapshot; 294 int ret; 295 296 if (inode->bi_subvol) { 297 inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; 298 299 u64 root_inum; 300 ret = subvol_lookup(trans, inode->bi_parent_subvol, 301 &dirent_snapshot, &root_inum); 302 if (ret) 303 return ret; 304 305 snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol); 306 } else { 307 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); 308 } 309 310 ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum); 311 if (ret) 312 return ret; 313 314 if (S_ISDIR(inode->bi_mode)) { 315 lostfound.bi_nlink++; 316 317 ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); 318 if (ret) 319 return ret; 320 } 321 322 dir_hash = bch2_hash_info_init(c, &lostfound); 323 324 name = (struct qstr) QSTR(name_buf); 325 326 ret = bch2_dirent_create_snapshot(trans, 327 inode->bi_parent_subvol, lostfound.bi_inum, 328 dirent_snapshot, 329 &dir_hash, 330 inode_d_type(inode), 331 &name, 332 inode->bi_subvol ?: inode->bi_inum, 333 &dir_offset, 334 STR_HASH_must_create); 335 if (ret) { 336 bch_err_msg(c, ret, "error creating dirent"); 337 return ret; 338 } 339 340 inode->bi_dir = lostfound.bi_inum; 341 inode->bi_dir_offset = dir_offset; 342 343 return __bch2_fsck_write_inode(trans, inode, inode_snapshot); 344 } 345 346 static int remove_backpointer(struct btree_trans *trans, 347 struct bch_inode_unpacked *inode) 348 { 349 struct btree_iter iter; 350 struct bkey_s_c_dirent d; 351 int ret; 352 353 d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, 354 POS(inode->bi_dir, inode->bi_dir_offset), 0, 355 dirent); 356 ret = bkey_err(d) ?: 357 __remove_dirent(trans, d.k->p); 358 bch2_trans_iter_exit(trans, &iter); 359 return ret; 360 } 361 362 static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s) 363 { 364 struct bch_fs *c = trans->c; 365 366 struct bch_inode_unpacked inode; 367 int ret = bch2_inode_find_by_inum_trans(trans, 368 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 369 &inode); 370 if (ret) 371 return ret; 372 373 ret = remove_backpointer(trans, &inode); 374 bch_err_msg(c, ret, "removing dirent"); 375 if (ret) 376 return ret; 377 378 ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); 379 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 380 return ret; 381 } 382 383 static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum) 384 { 385 struct bch_fs *c = trans->c; 386 387 if (!bch2_snapshot_is_leaf(c, snapshotid)) { 388 bch_err(c, "need to reconstruct subvol, but have interior node snapshot"); 389 return -BCH_ERR_fsck_repair_unimplemented; 390 } 391 392 /* 393 * If inum isn't set, that means we're being called from check_dirents, 394 * not check_inodes - the root of this subvolume doesn't exist or we 395 * would have found it there: 396 */ 397 if (!inum) { 398 struct btree_iter inode_iter = {}; 399 struct bch_inode_unpacked new_inode; 400 u64 cpu = raw_smp_processor_id(); 401 402 bch2_inode_init_early(c, &new_inode); 403 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL); 404 405 new_inode.bi_subvol = subvolid; 406 407 int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: 408 bch2_btree_iter_traverse(&inode_iter) ?: 409 bch2_inode_write(trans, &inode_iter, &new_inode); 410 bch2_trans_iter_exit(trans, &inode_iter); 411 if (ret) 412 return ret; 413 414 inum = new_inode.bi_inum; 415 } 416 417 bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum); 418 419 struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); 420 int ret = PTR_ERR_OR_ZERO(new_subvol); 421 if (ret) 422 return ret; 423 424 bkey_subvolume_init(&new_subvol->k_i); 425 new_subvol->k.p.offset = subvolid; 426 new_subvol->v.snapshot = cpu_to_le32(snapshotid); 427 new_subvol->v.inode = cpu_to_le64(inum); 428 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0); 429 if (ret) 430 return ret; 431 432 struct btree_iter iter; 433 struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter, 434 BTREE_ID_snapshots, POS(0, snapshotid), 435 0, snapshot); 436 ret = PTR_ERR_OR_ZERO(s); 437 bch_err_msg(c, ret, "getting snapshot %u", snapshotid); 438 if (ret) 439 return ret; 440 441 u32 snapshot_tree = le32_to_cpu(s->v.tree); 442 443 s->v.subvol = cpu_to_le32(subvolid); 444 SET_BCH_SNAPSHOT_SUBVOL(&s->v, true); 445 bch2_trans_iter_exit(trans, &iter); 446 447 struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter, 448 BTREE_ID_snapshot_trees, POS(0, snapshot_tree), 449 0, snapshot_tree); 450 ret = PTR_ERR_OR_ZERO(st); 451 bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree); 452 if (ret) 453 return ret; 454 455 if (!st->v.master_subvol) 456 st->v.master_subvol = cpu_to_le32(subvolid); 457 458 bch2_trans_iter_exit(trans, &iter); 459 return 0; 460 } 461 462 static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 snapshot, u64 inum) 463 { 464 struct bch_fs *c = trans->c; 465 unsigned i_mode = S_IFREG; 466 u64 i_size = 0; 467 468 switch (btree) { 469 case BTREE_ID_extents: { 470 struct btree_iter iter = {}; 471 472 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); 473 struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter); 474 bch2_trans_iter_exit(trans, &iter); 475 int ret = bkey_err(k); 476 if (ret) 477 return ret; 478 479 i_size = k.k->p.offset << 9; 480 break; 481 } 482 case BTREE_ID_dirents: 483 i_mode = S_IFDIR; 484 break; 485 case BTREE_ID_xattrs: 486 break; 487 default: 488 BUG(); 489 } 490 491 struct bch_inode_unpacked new_inode; 492 bch2_inode_init_early(c, &new_inode); 493 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL); 494 new_inode.bi_size = i_size; 495 new_inode.bi_inum = inum; 496 497 return __bch2_fsck_write_inode(trans, &new_inode, snapshot); 498 } 499 500 struct snapshots_seen { 501 struct bpos pos; 502 snapshot_id_list ids; 503 }; 504 505 static inline void snapshots_seen_exit(struct snapshots_seen *s) 506 { 507 darray_exit(&s->ids); 508 } 509 510 static inline void snapshots_seen_init(struct snapshots_seen *s) 511 { 512 memset(s, 0, sizeof(*s)); 513 } 514 515 static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) 516 { 517 u32 *i; 518 __darray_for_each(s->ids, i) { 519 if (*i == id) 520 return 0; 521 if (*i > id) 522 break; 523 } 524 525 int ret = darray_insert_item(&s->ids, i - s->ids.data, id); 526 if (ret) 527 bch_err(c, "error reallocating snapshots_seen table (size %zu)", 528 s->ids.size); 529 return ret; 530 } 531 532 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, 533 enum btree_id btree_id, struct bpos pos) 534 { 535 if (!bkey_eq(s->pos, pos)) 536 s->ids.nr = 0; 537 s->pos = pos; 538 539 return snapshot_list_add_nodup(c, &s->ids, pos.snapshot); 540 } 541 542 /** 543 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, 544 * and @ancestor hasn't been overwritten in @seen 545 * 546 * @c: filesystem handle 547 * @seen: list of snapshot ids already seen at current position 548 * @id: descendent snapshot id 549 * @ancestor: ancestor snapshot id 550 * 551 * Returns: whether key in @ancestor snapshot is visible in @id snapshot 552 */ 553 static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, 554 u32 id, u32 ancestor) 555 { 556 ssize_t i; 557 558 EBUG_ON(id > ancestor); 559 560 /* @ancestor should be the snapshot most recently added to @seen */ 561 EBUG_ON(ancestor != seen->pos.snapshot); 562 EBUG_ON(ancestor != darray_last(seen->ids)); 563 564 if (id == ancestor) 565 return true; 566 567 if (!bch2_snapshot_is_ancestor(c, id, ancestor)) 568 return false; 569 570 /* 571 * We know that @id is a descendant of @ancestor, we're checking if 572 * we've seen a key that overwrote @ancestor - i.e. also a descendent of 573 * @ascestor and with @id as a descendent. 574 * 575 * But we already know that we're scanning IDs between @id and @ancestor 576 * numerically, since snapshot ID lists are kept sorted, so if we find 577 * an id that's an ancestor of @id we're done: 578 */ 579 580 for (i = seen->ids.nr - 2; 581 i >= 0 && seen->ids.data[i] >= id; 582 --i) 583 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i])) 584 return false; 585 586 return true; 587 } 588 589 /** 590 * ref_visible - given a key with snapshot id @src that points to a key with 591 * snapshot id @dst, test whether there is some snapshot in which @dst is 592 * visible. 593 * 594 * @c: filesystem handle 595 * @s: list of snapshot IDs already seen at @src 596 * @src: snapshot ID of src key 597 * @dst: snapshot ID of dst key 598 * Returns: true if there is some snapshot in which @dst is visible 599 * 600 * Assumes we're visiting @src keys in natural key order 601 */ 602 static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, 603 u32 src, u32 dst) 604 { 605 return dst <= src 606 ? key_visible_in_snapshot(c, s, dst, src) 607 : bch2_snapshot_is_ancestor(c, src, dst); 608 } 609 610 static int ref_visible2(struct bch_fs *c, 611 u32 src, struct snapshots_seen *src_seen, 612 u32 dst, struct snapshots_seen *dst_seen) 613 { 614 if (dst > src) { 615 swap(dst, src); 616 swap(dst_seen, src_seen); 617 } 618 return key_visible_in_snapshot(c, src_seen, dst, src); 619 } 620 621 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ 622 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ 623 (_i)->snapshot <= (_snapshot); _i++) \ 624 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) 625 626 struct inode_walker_entry { 627 struct bch_inode_unpacked inode; 628 u32 snapshot; 629 bool seen_this_pos; 630 u64 count; 631 }; 632 633 struct inode_walker { 634 bool first_this_inode; 635 bool recalculate_sums; 636 struct bpos last_pos; 637 638 DARRAY(struct inode_walker_entry) inodes; 639 }; 640 641 static void inode_walker_exit(struct inode_walker *w) 642 { 643 darray_exit(&w->inodes); 644 } 645 646 static struct inode_walker inode_walker_init(void) 647 { 648 return (struct inode_walker) { 0, }; 649 } 650 651 static int add_inode(struct bch_fs *c, struct inode_walker *w, 652 struct bkey_s_c inode) 653 { 654 struct bch_inode_unpacked u; 655 656 BUG_ON(bch2_inode_unpack(inode, &u)); 657 658 return darray_push(&w->inodes, ((struct inode_walker_entry) { 659 .inode = u, 660 .snapshot = inode.k->p.snapshot, 661 })); 662 } 663 664 static int get_inodes_all_snapshots(struct btree_trans *trans, 665 struct inode_walker *w, u64 inum) 666 { 667 struct bch_fs *c = trans->c; 668 struct btree_iter iter; 669 struct bkey_s_c k; 670 int ret; 671 672 w->recalculate_sums = false; 673 w->inodes.nr = 0; 674 675 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 676 BTREE_ITER_all_snapshots, k, ret) { 677 if (k.k->p.offset != inum) 678 break; 679 680 if (bkey_is_inode(k.k)) 681 add_inode(c, w, k); 682 } 683 bch2_trans_iter_exit(trans, &iter); 684 685 if (ret) 686 return ret; 687 688 w->first_this_inode = true; 689 return 0; 690 } 691 692 static struct inode_walker_entry * 693 lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k) 694 { 695 bool is_whiteout = k.k->type == KEY_TYPE_whiteout; 696 697 struct inode_walker_entry *i; 698 __darray_for_each(w->inodes, i) 699 if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot)) 700 goto found; 701 702 return NULL; 703 found: 704 BUG_ON(k.k->p.snapshot > i->snapshot); 705 706 if (k.k->p.snapshot != i->snapshot && !is_whiteout) { 707 struct inode_walker_entry new = *i; 708 709 new.snapshot = k.k->p.snapshot; 710 new.count = 0; 711 712 struct printbuf buf = PRINTBUF; 713 bch2_bkey_val_to_text(&buf, c, k); 714 715 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" 716 "unexpected because we should always update the inode when we update a key in that inode\n" 717 "%s", 718 w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf); 719 printbuf_exit(&buf); 720 721 while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot) 722 --i; 723 724 size_t pos = i - w->inodes.data; 725 int ret = darray_insert_item(&w->inodes, pos, new); 726 if (ret) 727 return ERR_PTR(ret); 728 729 i = w->inodes.data + pos; 730 } 731 732 return i; 733 } 734 735 static struct inode_walker_entry *walk_inode(struct btree_trans *trans, 736 struct inode_walker *w, 737 struct bkey_s_c k) 738 { 739 if (w->last_pos.inode != k.k->p.inode) { 740 int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); 741 if (ret) 742 return ERR_PTR(ret); 743 } else if (bkey_cmp(w->last_pos, k.k->p)) { 744 darray_for_each(w->inodes, i) 745 i->seen_this_pos = false; 746 } 747 748 w->last_pos = k.k->p; 749 750 return lookup_inode_for_snapshot(trans->c, w, k); 751 } 752 753 static int get_visible_inodes(struct btree_trans *trans, 754 struct inode_walker *w, 755 struct snapshots_seen *s, 756 u64 inum) 757 { 758 struct bch_fs *c = trans->c; 759 struct btree_iter iter; 760 struct bkey_s_c k; 761 int ret; 762 763 w->inodes.nr = 0; 764 765 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 766 BTREE_ITER_all_snapshots, k, ret) { 767 if (k.k->p.offset != inum) 768 break; 769 770 if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) 771 continue; 772 773 if (bkey_is_inode(k.k)) 774 add_inode(c, w, k); 775 776 if (k.k->p.snapshot >= s->pos.snapshot) 777 break; 778 } 779 bch2_trans_iter_exit(trans, &iter); 780 781 return ret; 782 } 783 784 static int hash_redo_key(struct btree_trans *trans, 785 const struct bch_hash_desc desc, 786 struct bch_hash_info *hash_info, 787 struct btree_iter *k_iter, struct bkey_s_c k) 788 { 789 struct bkey_i *delete; 790 struct bkey_i *tmp; 791 792 delete = bch2_trans_kmalloc(trans, sizeof(*delete)); 793 if (IS_ERR(delete)) 794 return PTR_ERR(delete); 795 796 tmp = bch2_bkey_make_mut_noupdate(trans, k); 797 if (IS_ERR(tmp)) 798 return PTR_ERR(tmp); 799 800 bkey_init(&delete->k); 801 delete->k.p = k_iter->pos; 802 return bch2_btree_iter_traverse(k_iter) ?: 803 bch2_trans_update(trans, k_iter, delete, 0) ?: 804 bch2_hash_set_in_snapshot(trans, desc, hash_info, 805 (subvol_inum) { 0, k.k->p.inode }, 806 k.k->p.snapshot, tmp, 807 STR_HASH_must_create| 808 BTREE_UPDATE_internal_snapshot_node) ?: 809 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 810 } 811 812 static int hash_check_key(struct btree_trans *trans, 813 const struct bch_hash_desc desc, 814 struct bch_hash_info *hash_info, 815 struct btree_iter *k_iter, struct bkey_s_c hash_k) 816 { 817 struct bch_fs *c = trans->c; 818 struct btree_iter iter = { NULL }; 819 struct printbuf buf = PRINTBUF; 820 struct bkey_s_c k; 821 u64 hash; 822 int ret = 0; 823 824 if (hash_k.k->type != desc.key_type) 825 return 0; 826 827 hash = desc.hash_bkey(hash_info, hash_k); 828 829 if (likely(hash == hash_k.k->p.offset)) 830 return 0; 831 832 if (hash_k.k->p.offset < hash) 833 goto bad_hash; 834 835 for_each_btree_key_norestart(trans, iter, desc.btree_id, 836 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), 837 BTREE_ITER_slots, k, ret) { 838 if (bkey_eq(k.k->p, hash_k.k->p)) 839 break; 840 841 if (fsck_err_on(k.k->type == desc.key_type && 842 !desc.cmp_bkey(k, hash_k), 843 trans, hash_table_key_duplicate, 844 "duplicate hash table keys:\n%s", 845 (printbuf_reset(&buf), 846 bch2_bkey_val_to_text(&buf, c, hash_k), 847 buf.buf))) { 848 ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; 849 break; 850 } 851 852 if (bkey_deleted(k.k)) { 853 bch2_trans_iter_exit(trans, &iter); 854 goto bad_hash; 855 } 856 } 857 out: 858 bch2_trans_iter_exit(trans, &iter); 859 printbuf_exit(&buf); 860 return ret; 861 bad_hash: 862 if (fsck_err(trans, hash_table_key_wrong_offset, 863 "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", 864 bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, 865 (printbuf_reset(&buf), 866 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { 867 ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); 868 bch_err_fn(c, ret); 869 if (ret) 870 return ret; 871 ret = -BCH_ERR_transaction_restart_nested; 872 } 873 fsck_err: 874 goto out; 875 } 876 877 static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, 878 struct btree_iter *iter, 879 struct bpos pos) 880 { 881 return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); 882 } 883 884 static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, 885 struct btree_iter *iter, 886 struct bch_inode_unpacked *inode, 887 u32 *snapshot) 888 { 889 if (inode->bi_subvol) { 890 u64 inum; 891 int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum); 892 if (ret) 893 return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) }); 894 } 895 896 return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); 897 } 898 899 static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, 900 struct bkey_s_c_dirent d) 901 { 902 return inode->bi_dir == d.k->p.inode && 903 inode->bi_dir_offset == d.k->p.offset; 904 } 905 906 static bool dirent_points_to_inode(struct bkey_s_c_dirent d, 907 struct bch_inode_unpacked *inode) 908 { 909 return d.v->d_type == DT_SUBVOL 910 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol 911 : le64_to_cpu(d.v->d_inum) == inode->bi_inum; 912 } 913 914 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) 915 { 916 struct btree_iter iter; 917 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0); 918 int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set; 919 bch2_trans_iter_exit(trans, &iter); 920 return ret; 921 } 922 923 static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k, 924 struct bch_inode_unpacked *inode, 925 u32 inode_snapshot, bool *write_inode) 926 { 927 struct bch_fs *c = trans->c; 928 struct printbuf buf = PRINTBUF; 929 930 struct btree_iter dirent_iter = {}; 931 struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); 932 int ret = bkey_err(d); 933 if (ret && !bch2_err_matches(ret, ENOENT)) 934 return ret; 935 936 if (fsck_err_on(ret, 937 trans, inode_points_to_missing_dirent, 938 "inode points to missing dirent\n%s", 939 (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) || 940 fsck_err_on(!ret && !dirent_points_to_inode(d, inode), 941 trans, inode_points_to_wrong_dirent, 942 "inode points to dirent that does not point back:\n%s", 943 (bch2_bkey_val_to_text(&buf, c, inode_k), 944 prt_newline(&buf), 945 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 946 /* 947 * We just clear the backpointer fields for now. If we find a 948 * dirent that points to this inode in check_dirents(), we'll 949 * update it then; then when we get to check_path() if the 950 * backpointer is still 0 we'll reattach it. 951 */ 952 inode->bi_dir = 0; 953 inode->bi_dir_offset = 0; 954 inode->bi_flags &= ~BCH_INODE_backptr_untrusted; 955 *write_inode = true; 956 } 957 958 ret = 0; 959 fsck_err: 960 bch2_trans_iter_exit(trans, &dirent_iter); 961 printbuf_exit(&buf); 962 bch_err_fn(c, ret); 963 return ret; 964 } 965 966 static bool bch2_inode_open(struct bch_fs *c, struct bpos p) 967 { 968 subvol_inum inum = { 969 .subvol = snapshot_t(c, p.snapshot)->subvol, 970 .inum = p.offset, 971 }; 972 973 /* snapshot tree corruption, can't safely delete */ 974 if (!inum.subvol) { 975 bch_err_ratelimited(c, "%s(): snapshot %u has no subvol", __func__, p.snapshot); 976 return true; 977 } 978 979 return __bch2_inode_hash_find(c, inum) != NULL; 980 } 981 982 static int check_inode(struct btree_trans *trans, 983 struct btree_iter *iter, 984 struct bkey_s_c k, 985 struct bch_inode_unpacked *prev, 986 struct snapshots_seen *s, 987 bool full) 988 { 989 struct bch_fs *c = trans->c; 990 struct bch_inode_unpacked u; 991 bool do_update = false; 992 int ret; 993 994 ret = bch2_check_key_has_snapshot(trans, iter, k); 995 if (ret < 0) 996 goto err; 997 if (ret) 998 return 0; 999 1000 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1001 if (ret) 1002 goto err; 1003 1004 if (!bkey_is_inode(k.k)) 1005 return 0; 1006 1007 BUG_ON(bch2_inode_unpack(k, &u)); 1008 1009 if (!full && 1010 !(u.bi_flags & (BCH_INODE_i_size_dirty| 1011 BCH_INODE_i_sectors_dirty| 1012 BCH_INODE_unlinked))) 1013 return 0; 1014 1015 if (prev->bi_inum != u.bi_inum) 1016 *prev = u; 1017 1018 if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || 1019 inode_d_type(prev) != inode_d_type(&u), 1020 trans, inode_snapshot_mismatch, 1021 "inodes in different snapshots don't match")) { 1022 bch_err(c, "repair not implemented yet"); 1023 return -BCH_ERR_fsck_repair_unimplemented; 1024 } 1025 1026 if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && 1027 bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { 1028 struct bpos new_min_pos; 1029 1030 ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos); 1031 if (ret) 1032 goto err; 1033 1034 u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; 1035 1036 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1037 1038 bch_err_msg(c, ret, "in fsck updating inode"); 1039 if (ret) 1040 return ret; 1041 1042 if (!bpos_eq(new_min_pos, POS_MIN)) 1043 bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); 1044 return 0; 1045 } 1046 1047 if (u.bi_flags & BCH_INODE_unlinked) { 1048 ret = check_inode_deleted_list(trans, k.k->p); 1049 if (ret < 0) 1050 return ret; 1051 1052 fsck_err_on(!ret, 1053 trans, unlinked_inode_not_on_deleted_list, 1054 "inode %llu:%u unlinked, but not on deleted list", 1055 u.bi_inum, k.k->p.snapshot); 1056 ret = 0; 1057 } 1058 1059 if (u.bi_flags & BCH_INODE_unlinked && 1060 !bch2_inode_open(c, k.k->p) && 1061 (!c->sb.clean || 1062 fsck_err(trans, inode_unlinked_but_clean, 1063 "filesystem marked clean, but inode %llu unlinked", 1064 u.bi_inum))) { 1065 ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 1066 bch_err_msg(c, ret, "in fsck deleting inode"); 1067 return ret; 1068 } 1069 1070 if (u.bi_flags & BCH_INODE_i_size_dirty && 1071 (!c->sb.clean || 1072 fsck_err(trans, inode_i_size_dirty_but_clean, 1073 "filesystem marked clean, but inode %llu has i_size dirty", 1074 u.bi_inum))) { 1075 bch_verbose(c, "truncating inode %llu", u.bi_inum); 1076 1077 /* 1078 * XXX: need to truncate partial blocks too here - or ideally 1079 * just switch units to bytes and that issue goes away 1080 */ 1081 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, 1082 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9, 1083 iter->pos.snapshot), 1084 POS(u.bi_inum, U64_MAX), 1085 0, NULL); 1086 bch_err_msg(c, ret, "in fsck truncating inode"); 1087 if (ret) 1088 return ret; 1089 1090 /* 1091 * We truncated without our normal sector accounting hook, just 1092 * make sure we recalculate it: 1093 */ 1094 u.bi_flags |= BCH_INODE_i_sectors_dirty; 1095 1096 u.bi_flags &= ~BCH_INODE_i_size_dirty; 1097 do_update = true; 1098 } 1099 1100 if (u.bi_flags & BCH_INODE_i_sectors_dirty && 1101 (!c->sb.clean || 1102 fsck_err(trans, inode_i_sectors_dirty_but_clean, 1103 "filesystem marked clean, but inode %llu has i_sectors dirty", 1104 u.bi_inum))) { 1105 s64 sectors; 1106 1107 bch_verbose(c, "recounting sectors for inode %llu", 1108 u.bi_inum); 1109 1110 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); 1111 if (sectors < 0) { 1112 bch_err_msg(c, sectors, "in fsck recounting inode sectors"); 1113 return sectors; 1114 } 1115 1116 u.bi_sectors = sectors; 1117 u.bi_flags &= ~BCH_INODE_i_sectors_dirty; 1118 do_update = true; 1119 } 1120 1121 if (u.bi_flags & BCH_INODE_backptr_untrusted) { 1122 u.bi_dir = 0; 1123 u.bi_dir_offset = 0; 1124 u.bi_flags &= ~BCH_INODE_backptr_untrusted; 1125 do_update = true; 1126 } 1127 1128 if (u.bi_dir || u.bi_dir_offset) { 1129 ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update); 1130 if (ret) 1131 goto err; 1132 } 1133 1134 if (fsck_err_on(u.bi_parent_subvol && 1135 (u.bi_subvol == 0 || 1136 u.bi_subvol == BCACHEFS_ROOT_SUBVOL), 1137 trans, inode_bi_parent_nonzero, 1138 "inode %llu:%u has subvol %u but nonzero parent subvol %u", 1139 u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) { 1140 u.bi_parent_subvol = 0; 1141 do_update = true; 1142 } 1143 1144 if (u.bi_subvol) { 1145 struct bch_subvolume s; 1146 1147 ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s); 1148 if (ret && !bch2_err_matches(ret, ENOENT)) 1149 goto err; 1150 1151 if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1152 ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum); 1153 goto do_update; 1154 } 1155 1156 if (fsck_err_on(ret, 1157 trans, inode_bi_subvol_missing, 1158 "inode %llu:%u bi_subvol points to missing subvolume %u", 1159 u.bi_inum, k.k->p.snapshot, u.bi_subvol) || 1160 fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum || 1161 !bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot), 1162 k.k->p.snapshot), 1163 trans, inode_bi_subvol_wrong, 1164 "inode %llu:%u points to subvol %u, but subvol points to %llu:%u", 1165 u.bi_inum, k.k->p.snapshot, u.bi_subvol, 1166 le64_to_cpu(s.inode), 1167 le32_to_cpu(s.snapshot))) { 1168 u.bi_subvol = 0; 1169 u.bi_parent_subvol = 0; 1170 do_update = true; 1171 } 1172 } 1173 do_update: 1174 if (do_update) { 1175 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1176 bch_err_msg(c, ret, "in fsck updating inode"); 1177 if (ret) 1178 return ret; 1179 } 1180 err: 1181 fsck_err: 1182 bch_err_fn(c, ret); 1183 return ret; 1184 } 1185 1186 int bch2_check_inodes(struct bch_fs *c) 1187 { 1188 bool full = c->opts.fsck; 1189 struct bch_inode_unpacked prev = { 0 }; 1190 struct snapshots_seen s; 1191 1192 snapshots_seen_init(&s); 1193 1194 int ret = bch2_trans_run(c, 1195 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 1196 POS_MIN, 1197 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1198 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1199 check_inode(trans, &iter, k, &prev, &s, full))); 1200 1201 snapshots_seen_exit(&s); 1202 bch_err_fn(c, ret); 1203 return ret; 1204 } 1205 1206 static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode) 1207 { 1208 switch (btree) { 1209 case BTREE_ID_extents: 1210 return S_ISREG(mode) || S_ISLNK(mode); 1211 case BTREE_ID_dirents: 1212 return S_ISDIR(mode); 1213 case BTREE_ID_xattrs: 1214 return true; 1215 default: 1216 BUG(); 1217 } 1218 } 1219 1220 static int check_key_has_inode(struct btree_trans *trans, 1221 struct btree_iter *iter, 1222 struct inode_walker *inode, 1223 struct inode_walker_entry *i, 1224 struct bkey_s_c k) 1225 { 1226 struct bch_fs *c = trans->c; 1227 struct printbuf buf = PRINTBUF; 1228 int ret = PTR_ERR_OR_ZERO(i); 1229 if (ret) 1230 return ret; 1231 1232 if (k.k->type == KEY_TYPE_whiteout) 1233 goto out; 1234 1235 if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { 1236 ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?: 1237 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 1238 if (ret) 1239 goto err; 1240 1241 inode->last_pos.inode--; 1242 ret = -BCH_ERR_transaction_restart_nested; 1243 goto err; 1244 } 1245 1246 if (fsck_err_on(!i, 1247 trans, key_in_missing_inode, 1248 "key in missing inode:\n %s", 1249 (printbuf_reset(&buf), 1250 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1251 goto delete; 1252 1253 if (fsck_err_on(i && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode), 1254 trans, key_in_wrong_inode_type, 1255 "key for wrong inode mode %o:\n %s", 1256 i->inode.bi_mode, 1257 (printbuf_reset(&buf), 1258 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1259 goto delete; 1260 out: 1261 err: 1262 fsck_err: 1263 printbuf_exit(&buf); 1264 bch_err_fn(c, ret); 1265 return ret; 1266 delete: 1267 ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node); 1268 goto out; 1269 } 1270 1271 static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) 1272 { 1273 struct bch_fs *c = trans->c; 1274 int ret = 0; 1275 s64 count2; 1276 1277 darray_for_each(w->inodes, i) { 1278 if (i->inode.bi_sectors == i->count) 1279 continue; 1280 1281 count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); 1282 1283 if (w->recalculate_sums) 1284 i->count = count2; 1285 1286 if (i->count != count2) { 1287 bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", 1288 w->last_pos.inode, i->snapshot, i->count, count2); 1289 return -BCH_ERR_internal_fsck_err; 1290 } 1291 1292 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), 1293 trans, inode_i_sectors_wrong, 1294 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", 1295 w->last_pos.inode, i->snapshot, 1296 i->inode.bi_sectors, i->count)) { 1297 i->inode.bi_sectors = i->count; 1298 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1299 if (ret) 1300 break; 1301 } 1302 } 1303 fsck_err: 1304 bch_err_fn(c, ret); 1305 return ret; 1306 } 1307 1308 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) 1309 { 1310 u32 restart_count = trans->restart_count; 1311 return check_i_sectors_notnested(trans, w) ?: 1312 trans_was_restarted(trans, restart_count); 1313 } 1314 1315 struct extent_end { 1316 u32 snapshot; 1317 u64 offset; 1318 struct snapshots_seen seen; 1319 }; 1320 1321 struct extent_ends { 1322 struct bpos last_pos; 1323 DARRAY(struct extent_end) e; 1324 }; 1325 1326 static void extent_ends_reset(struct extent_ends *extent_ends) 1327 { 1328 darray_for_each(extent_ends->e, i) 1329 snapshots_seen_exit(&i->seen); 1330 extent_ends->e.nr = 0; 1331 } 1332 1333 static void extent_ends_exit(struct extent_ends *extent_ends) 1334 { 1335 extent_ends_reset(extent_ends); 1336 darray_exit(&extent_ends->e); 1337 } 1338 1339 static void extent_ends_init(struct extent_ends *extent_ends) 1340 { 1341 memset(extent_ends, 0, sizeof(*extent_ends)); 1342 } 1343 1344 static int extent_ends_at(struct bch_fs *c, 1345 struct extent_ends *extent_ends, 1346 struct snapshots_seen *seen, 1347 struct bkey_s_c k) 1348 { 1349 struct extent_end *i, n = (struct extent_end) { 1350 .offset = k.k->p.offset, 1351 .snapshot = k.k->p.snapshot, 1352 .seen = *seen, 1353 }; 1354 1355 n.seen.ids.data = kmemdup(seen->ids.data, 1356 sizeof(seen->ids.data[0]) * seen->ids.size, 1357 GFP_KERNEL); 1358 if (!n.seen.ids.data) 1359 return -BCH_ERR_ENOMEM_fsck_extent_ends_at; 1360 1361 __darray_for_each(extent_ends->e, i) { 1362 if (i->snapshot == k.k->p.snapshot) { 1363 snapshots_seen_exit(&i->seen); 1364 *i = n; 1365 return 0; 1366 } 1367 1368 if (i->snapshot >= k.k->p.snapshot) 1369 break; 1370 } 1371 1372 return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); 1373 } 1374 1375 static int overlapping_extents_found(struct btree_trans *trans, 1376 enum btree_id btree, 1377 struct bpos pos1, struct snapshots_seen *pos1_seen, 1378 struct bkey pos2, 1379 bool *fixed, 1380 struct extent_end *extent_end) 1381 { 1382 struct bch_fs *c = trans->c; 1383 struct printbuf buf = PRINTBUF; 1384 struct btree_iter iter1, iter2 = { NULL }; 1385 struct bkey_s_c k1, k2; 1386 int ret; 1387 1388 BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); 1389 1390 bch2_trans_iter_init(trans, &iter1, btree, pos1, 1391 BTREE_ITER_all_snapshots| 1392 BTREE_ITER_not_extents); 1393 k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); 1394 ret = bkey_err(k1); 1395 if (ret) 1396 goto err; 1397 1398 prt_str(&buf, "\n "); 1399 bch2_bkey_val_to_text(&buf, c, k1); 1400 1401 if (!bpos_eq(pos1, k1.k->p)) { 1402 prt_str(&buf, "\n wanted\n "); 1403 bch2_bpos_to_text(&buf, pos1); 1404 prt_str(&buf, "\n "); 1405 bch2_bkey_to_text(&buf, &pos2); 1406 1407 bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", 1408 __func__, buf.buf); 1409 ret = -BCH_ERR_internal_fsck_err; 1410 goto err; 1411 } 1412 1413 bch2_trans_copy_iter(&iter2, &iter1); 1414 1415 while (1) { 1416 bch2_btree_iter_advance(&iter2); 1417 1418 k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); 1419 ret = bkey_err(k2); 1420 if (ret) 1421 goto err; 1422 1423 if (bpos_ge(k2.k->p, pos2.p)) 1424 break; 1425 } 1426 1427 prt_str(&buf, "\n "); 1428 bch2_bkey_val_to_text(&buf, c, k2); 1429 1430 if (bpos_gt(k2.k->p, pos2.p) || 1431 pos2.size != k2.k->size) { 1432 bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", 1433 __func__, buf.buf); 1434 ret = -BCH_ERR_internal_fsck_err; 1435 goto err; 1436 } 1437 1438 prt_printf(&buf, "\n overwriting %s extent", 1439 pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); 1440 1441 if (fsck_err(trans, extent_overlapping, 1442 "overlapping extents%s", buf.buf)) { 1443 struct btree_iter *old_iter = &iter1; 1444 struct disk_reservation res = { 0 }; 1445 1446 if (pos1.snapshot < pos2.p.snapshot) { 1447 old_iter = &iter2; 1448 swap(k1, k2); 1449 } 1450 1451 trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); 1452 1453 ret = bch2_trans_update_extent_overwrite(trans, old_iter, 1454 BTREE_UPDATE_internal_snapshot_node, 1455 k1, k2) ?: 1456 bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); 1457 bch2_disk_reservation_put(c, &res); 1458 1459 if (ret) 1460 goto err; 1461 1462 *fixed = true; 1463 1464 if (pos1.snapshot == pos2.p.snapshot) { 1465 /* 1466 * We overwrote the first extent, and did the overwrite 1467 * in the same snapshot: 1468 */ 1469 extent_end->offset = bkey_start_offset(&pos2); 1470 } else if (pos1.snapshot > pos2.p.snapshot) { 1471 /* 1472 * We overwrote the first extent in pos2's snapshot: 1473 */ 1474 ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); 1475 } else { 1476 /* 1477 * We overwrote the second extent - restart 1478 * check_extent() from the top: 1479 */ 1480 ret = -BCH_ERR_transaction_restart_nested; 1481 } 1482 } 1483 fsck_err: 1484 err: 1485 bch2_trans_iter_exit(trans, &iter2); 1486 bch2_trans_iter_exit(trans, &iter1); 1487 printbuf_exit(&buf); 1488 return ret; 1489 } 1490 1491 static int check_overlapping_extents(struct btree_trans *trans, 1492 struct snapshots_seen *seen, 1493 struct extent_ends *extent_ends, 1494 struct bkey_s_c k, 1495 struct btree_iter *iter, 1496 bool *fixed) 1497 { 1498 struct bch_fs *c = trans->c; 1499 int ret = 0; 1500 1501 /* transaction restart, running again */ 1502 if (bpos_eq(extent_ends->last_pos, k.k->p)) 1503 return 0; 1504 1505 if (extent_ends->last_pos.inode != k.k->p.inode) 1506 extent_ends_reset(extent_ends); 1507 1508 darray_for_each(extent_ends->e, i) { 1509 if (i->offset <= bkey_start_offset(k.k)) 1510 continue; 1511 1512 if (!ref_visible2(c, 1513 k.k->p.snapshot, seen, 1514 i->snapshot, &i->seen)) 1515 continue; 1516 1517 ret = overlapping_extents_found(trans, iter->btree_id, 1518 SPOS(iter->pos.inode, 1519 i->offset, 1520 i->snapshot), 1521 &i->seen, 1522 *k.k, fixed, i); 1523 if (ret) 1524 goto err; 1525 } 1526 1527 extent_ends->last_pos = k.k->p; 1528 err: 1529 return ret; 1530 } 1531 1532 static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, 1533 struct bkey_s_c k) 1534 { 1535 struct bch_fs *c = trans->c; 1536 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 1537 struct bch_extent_crc_unpacked crc; 1538 const union bch_extent_entry *i; 1539 unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; 1540 1541 bkey_for_each_crc(k.k, ptrs, crc, i) 1542 if (crc_is_encoded(crc) && 1543 crc.uncompressed_size > encoded_extent_max_sectors) { 1544 struct printbuf buf = PRINTBUF; 1545 1546 bch2_bkey_val_to_text(&buf, c, k); 1547 bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); 1548 printbuf_exit(&buf); 1549 } 1550 1551 return 0; 1552 } 1553 1554 static int check_extent(struct btree_trans *trans, struct btree_iter *iter, 1555 struct bkey_s_c k, 1556 struct inode_walker *inode, 1557 struct snapshots_seen *s, 1558 struct extent_ends *extent_ends) 1559 { 1560 struct bch_fs *c = trans->c; 1561 struct inode_walker_entry *i; 1562 struct printbuf buf = PRINTBUF; 1563 int ret = 0; 1564 1565 ret = bch2_check_key_has_snapshot(trans, iter, k); 1566 if (ret) { 1567 ret = ret < 0 ? ret : 0; 1568 goto out; 1569 } 1570 1571 if (inode->last_pos.inode != k.k->p.inode) { 1572 ret = check_i_sectors(trans, inode); 1573 if (ret) 1574 goto err; 1575 } 1576 1577 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1578 if (ret) 1579 goto err; 1580 1581 i = walk_inode(trans, inode, k); 1582 ret = PTR_ERR_OR_ZERO(i); 1583 if (ret) 1584 goto err; 1585 1586 ret = check_key_has_inode(trans, iter, inode, i, k); 1587 if (ret) 1588 goto err; 1589 1590 if (k.k->type != KEY_TYPE_whiteout) { 1591 ret = check_overlapping_extents(trans, s, extent_ends, k, iter, 1592 &inode->recalculate_sums); 1593 if (ret) 1594 goto err; 1595 } 1596 1597 /* 1598 * Check inodes in reverse order, from oldest snapshots to newest, 1599 * starting from the inode that matches this extent's snapshot. If we 1600 * didn't have one, iterate over all inodes: 1601 */ 1602 if (!i) 1603 i = &darray_last(inode->inodes); 1604 1605 for (; 1606 inode->inodes.data && i >= inode->inodes.data; 1607 --i) { 1608 if (i->snapshot > k.k->p.snapshot || 1609 !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) 1610 continue; 1611 1612 if (k.k->type != KEY_TYPE_whiteout) { 1613 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && 1614 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && 1615 !bkey_extent_is_reservation(k), 1616 trans, extent_past_end_of_inode, 1617 "extent type past end of inode %llu:%u, i_size %llu\n %s", 1618 i->inode.bi_inum, i->snapshot, i->inode.bi_size, 1619 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1620 struct btree_iter iter2; 1621 1622 bch2_trans_copy_iter(&iter2, iter); 1623 bch2_btree_iter_set_snapshot(&iter2, i->snapshot); 1624 ret = bch2_btree_iter_traverse(&iter2) ?: 1625 bch2_btree_delete_at(trans, &iter2, 1626 BTREE_UPDATE_internal_snapshot_node); 1627 bch2_trans_iter_exit(trans, &iter2); 1628 if (ret) 1629 goto err; 1630 1631 iter->k.type = KEY_TYPE_whiteout; 1632 } 1633 1634 if (bkey_extent_is_allocation(k.k)) 1635 i->count += k.k->size; 1636 } 1637 1638 i->seen_this_pos = true; 1639 } 1640 1641 if (k.k->type != KEY_TYPE_whiteout) { 1642 ret = extent_ends_at(c, extent_ends, s, k); 1643 if (ret) 1644 goto err; 1645 } 1646 out: 1647 err: 1648 fsck_err: 1649 printbuf_exit(&buf); 1650 bch_err_fn(c, ret); 1651 return ret; 1652 } 1653 1654 /* 1655 * Walk extents: verify that extents have a corresponding S_ISREG inode, and 1656 * that i_size an i_sectors are consistent 1657 */ 1658 int bch2_check_extents(struct bch_fs *c) 1659 { 1660 struct inode_walker w = inode_walker_init(); 1661 struct snapshots_seen s; 1662 struct extent_ends extent_ends; 1663 struct disk_reservation res = { 0 }; 1664 1665 snapshots_seen_init(&s); 1666 extent_ends_init(&extent_ends); 1667 1668 int ret = bch2_trans_run(c, 1669 for_each_btree_key_commit(trans, iter, BTREE_ID_extents, 1670 POS(BCACHEFS_ROOT_INO, 0), 1671 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1672 &res, NULL, 1673 BCH_TRANS_COMMIT_no_enospc, ({ 1674 bch2_disk_reservation_put(c, &res); 1675 check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: 1676 check_extent_overbig(trans, &iter, k); 1677 })) ?: 1678 check_i_sectors_notnested(trans, &w)); 1679 1680 bch2_disk_reservation_put(c, &res); 1681 extent_ends_exit(&extent_ends); 1682 inode_walker_exit(&w); 1683 snapshots_seen_exit(&s); 1684 1685 bch_err_fn(c, ret); 1686 return ret; 1687 } 1688 1689 int bch2_check_indirect_extents(struct bch_fs *c) 1690 { 1691 struct disk_reservation res = { 0 }; 1692 1693 int ret = bch2_trans_run(c, 1694 for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, 1695 POS_MIN, 1696 BTREE_ITER_prefetch, k, 1697 &res, NULL, 1698 BCH_TRANS_COMMIT_no_enospc, ({ 1699 bch2_disk_reservation_put(c, &res); 1700 check_extent_overbig(trans, &iter, k); 1701 }))); 1702 1703 bch2_disk_reservation_put(c, &res); 1704 bch_err_fn(c, ret); 1705 return ret; 1706 } 1707 1708 static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w) 1709 { 1710 struct bch_fs *c = trans->c; 1711 int ret = 0; 1712 s64 count2; 1713 1714 darray_for_each(w->inodes, i) { 1715 if (i->inode.bi_nlink == i->count) 1716 continue; 1717 1718 count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); 1719 if (count2 < 0) 1720 return count2; 1721 1722 if (i->count != count2) { 1723 bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu", 1724 w->last_pos.inode, i->snapshot, i->count, count2); 1725 i->count = count2; 1726 if (i->inode.bi_nlink == i->count) 1727 continue; 1728 } 1729 1730 if (fsck_err_on(i->inode.bi_nlink != i->count, 1731 trans, inode_dir_wrong_nlink, 1732 "directory %llu:%u with wrong i_nlink: got %u, should be %llu", 1733 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { 1734 i->inode.bi_nlink = i->count; 1735 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1736 if (ret) 1737 break; 1738 } 1739 } 1740 fsck_err: 1741 bch_err_fn(c, ret); 1742 return ret; 1743 } 1744 1745 static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) 1746 { 1747 u32 restart_count = trans->restart_count; 1748 return check_subdir_count_notnested(trans, w) ?: 1749 trans_was_restarted(trans, restart_count); 1750 } 1751 1752 noinline_for_stack 1753 static int check_dirent_inode_dirent(struct btree_trans *trans, 1754 struct btree_iter *iter, 1755 struct bkey_s_c_dirent d, 1756 struct bch_inode_unpacked *target, 1757 u32 target_snapshot) 1758 { 1759 struct bch_fs *c = trans->c; 1760 struct printbuf buf = PRINTBUF; 1761 int ret = 0; 1762 1763 if (inode_points_to_dirent(target, d)) 1764 return 0; 1765 1766 if (bch2_inode_should_have_bp(target) && 1767 !fsck_err(trans, inode_wrong_backpointer, 1768 "dirent points to inode that does not point back:\n %s", 1769 (bch2_bkey_val_to_text(&buf, c, d.s_c), 1770 prt_printf(&buf, "\n "), 1771 bch2_inode_unpacked_to_text(&buf, target), 1772 buf.buf))) 1773 goto out_noiter; 1774 1775 if (!target->bi_dir && 1776 !target->bi_dir_offset) { 1777 target->bi_dir = d.k->p.inode; 1778 target->bi_dir_offset = d.k->p.offset; 1779 return __bch2_fsck_write_inode(trans, target, target_snapshot); 1780 } 1781 1782 struct btree_iter bp_iter = { NULL }; 1783 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, 1784 SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); 1785 ret = bkey_err(bp_dirent); 1786 if (ret && !bch2_err_matches(ret, ENOENT)) 1787 goto err; 1788 1789 bool backpointer_exists = !ret; 1790 ret = 0; 1791 1792 if (fsck_err_on(!backpointer_exists, 1793 trans, inode_wrong_backpointer, 1794 "inode %llu:%u has wrong backpointer:\n" 1795 "got %llu:%llu\n" 1796 "should be %llu:%llu", 1797 target->bi_inum, target_snapshot, 1798 target->bi_dir, 1799 target->bi_dir_offset, 1800 d.k->p.inode, 1801 d.k->p.offset)) { 1802 target->bi_dir = d.k->p.inode; 1803 target->bi_dir_offset = d.k->p.offset; 1804 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1805 goto out; 1806 } 1807 1808 bch2_bkey_val_to_text(&buf, c, d.s_c); 1809 prt_newline(&buf); 1810 if (backpointer_exists) 1811 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c); 1812 1813 if (fsck_err_on(backpointer_exists && 1814 (S_ISDIR(target->bi_mode) || 1815 target->bi_subvol), 1816 trans, inode_dir_multiple_links, 1817 "%s %llu:%u with multiple links\n%s", 1818 S_ISDIR(target->bi_mode) ? "directory" : "subvolume", 1819 target->bi_inum, target_snapshot, buf.buf)) { 1820 ret = __remove_dirent(trans, d.k->p); 1821 goto out; 1822 } 1823 1824 /* 1825 * hardlinked file with nlink 0: 1826 * We're just adjusting nlink here so check_nlinks() will pick 1827 * it up, it ignores inodes with nlink 0 1828 */ 1829 if (fsck_err_on(backpointer_exists && !target->bi_nlink, 1830 trans, inode_multiple_links_but_nlink_0, 1831 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", 1832 target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { 1833 target->bi_nlink++; 1834 target->bi_flags &= ~BCH_INODE_unlinked; 1835 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1836 if (ret) 1837 goto err; 1838 } 1839 out: 1840 err: 1841 fsck_err: 1842 bch2_trans_iter_exit(trans, &bp_iter); 1843 out_noiter: 1844 printbuf_exit(&buf); 1845 bch_err_fn(c, ret); 1846 return ret; 1847 } 1848 1849 noinline_for_stack 1850 static int check_dirent_target(struct btree_trans *trans, 1851 struct btree_iter *iter, 1852 struct bkey_s_c_dirent d, 1853 struct bch_inode_unpacked *target, 1854 u32 target_snapshot) 1855 { 1856 struct bch_fs *c = trans->c; 1857 struct bkey_i_dirent *n; 1858 struct printbuf buf = PRINTBUF; 1859 int ret = 0; 1860 1861 ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); 1862 if (ret) 1863 goto err; 1864 1865 if (fsck_err_on(d.v->d_type != inode_d_type(target), 1866 trans, dirent_d_type_wrong, 1867 "incorrect d_type: got %s, should be %s:\n%s", 1868 bch2_d_type_str(d.v->d_type), 1869 bch2_d_type_str(inode_d_type(target)), 1870 (printbuf_reset(&buf), 1871 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1872 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); 1873 ret = PTR_ERR_OR_ZERO(n); 1874 if (ret) 1875 goto err; 1876 1877 bkey_reassemble(&n->k_i, d.s_c); 1878 n->v.d_type = inode_d_type(target); 1879 if (n->v.d_type == DT_SUBVOL) { 1880 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); 1881 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol); 1882 } else { 1883 n->v.d_inum = cpu_to_le64(target->bi_inum); 1884 } 1885 1886 ret = bch2_trans_update(trans, iter, &n->k_i, 0); 1887 if (ret) 1888 goto err; 1889 1890 d = dirent_i_to_s_c(n); 1891 } 1892 err: 1893 fsck_err: 1894 printbuf_exit(&buf); 1895 bch_err_fn(c, ret); 1896 return ret; 1897 } 1898 1899 /* find a subvolume that's a descendent of @snapshot: */ 1900 static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid) 1901 { 1902 struct btree_iter iter; 1903 struct bkey_s_c k; 1904 int ret; 1905 1906 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { 1907 if (k.k->type != KEY_TYPE_subvolume) 1908 continue; 1909 1910 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 1911 if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) { 1912 bch2_trans_iter_exit(trans, &iter); 1913 *subvolid = k.k->p.offset; 1914 goto found; 1915 } 1916 } 1917 if (!ret) 1918 ret = -ENOENT; 1919 found: 1920 bch2_trans_iter_exit(trans, &iter); 1921 return ret; 1922 } 1923 1924 noinline_for_stack 1925 static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter, 1926 struct bkey_s_c_dirent d) 1927 { 1928 struct bch_fs *c = trans->c; 1929 struct btree_iter subvol_iter = {}; 1930 struct bch_inode_unpacked subvol_root; 1931 u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); 1932 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); 1933 u32 parent_snapshot; 1934 u32 new_parent_subvol = 0; 1935 u64 parent_inum; 1936 struct printbuf buf = PRINTBUF; 1937 int ret = 0; 1938 1939 ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); 1940 if (ret && !bch2_err_matches(ret, ENOENT)) 1941 return ret; 1942 1943 if (ret || 1944 (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) { 1945 int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); 1946 if (ret2 && !bch2_err_matches(ret, ENOENT)) 1947 return ret2; 1948 } 1949 1950 if (ret && 1951 !new_parent_subvol && 1952 (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1953 /* 1954 * Couldn't find a subvol for dirent's snapshot - but we lost 1955 * subvols, so we need to reconstruct: 1956 */ 1957 ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0); 1958 if (ret) 1959 return ret; 1960 1961 parent_snapshot = d.k->p.snapshot; 1962 } 1963 1964 if (fsck_err_on(ret, 1965 trans, dirent_to_missing_parent_subvol, 1966 "dirent parent_subvol points to missing subvolume\n%s", 1967 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || 1968 fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot), 1969 trans, dirent_not_visible_in_parent_subvol, 1970 "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", 1971 parent_snapshot, 1972 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1973 if (!new_parent_subvol) { 1974 bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot); 1975 return -BCH_ERR_fsck_repair_unimplemented; 1976 } 1977 1978 struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); 1979 ret = PTR_ERR_OR_ZERO(new_dirent); 1980 if (ret) 1981 goto err; 1982 1983 new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol); 1984 } 1985 1986 struct bkey_s_c_subvolume s = 1987 bch2_bkey_get_iter_typed(trans, &subvol_iter, 1988 BTREE_ID_subvolumes, POS(0, target_subvol), 1989 0, subvolume); 1990 ret = bkey_err(s.s_c); 1991 if (ret && !bch2_err_matches(ret, ENOENT)) 1992 return ret; 1993 1994 if (ret) { 1995 if (fsck_err(trans, dirent_to_missing_subvol, 1996 "dirent points to missing subvolume\n%s", 1997 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) 1998 return __remove_dirent(trans, d.k->p); 1999 ret = 0; 2000 goto out; 2001 } 2002 2003 if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol, 2004 trans, subvol_fs_path_parent_wrong, 2005 "subvol with wrong fs_path_parent, should be be %u\n%s", 2006 parent_subvol, 2007 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { 2008 struct bkey_i_subvolume *n = 2009 bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume); 2010 ret = PTR_ERR_OR_ZERO(n); 2011 if (ret) 2012 goto err; 2013 2014 n->v.fs_path_parent = cpu_to_le32(parent_subvol); 2015 } 2016 2017 u64 target_inum = le64_to_cpu(s.v->inode); 2018 u32 target_snapshot = le32_to_cpu(s.v->snapshot); 2019 2020 ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); 2021 if (ret && !bch2_err_matches(ret, ENOENT)) 2022 goto err; 2023 2024 if (ret) { 2025 bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); 2026 ret = -BCH_ERR_fsck_repair_unimplemented; 2027 goto err; 2028 } 2029 2030 if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol, 2031 trans, inode_bi_parent_wrong, 2032 "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", 2033 target_inum, 2034 subvol_root.bi_parent_subvol, parent_subvol)) { 2035 subvol_root.bi_parent_subvol = parent_subvol; 2036 ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); 2037 if (ret) 2038 goto err; 2039 } 2040 2041 ret = check_dirent_target(trans, iter, d, &subvol_root, 2042 target_snapshot); 2043 if (ret) 2044 goto err; 2045 out: 2046 err: 2047 fsck_err: 2048 bch2_trans_iter_exit(trans, &subvol_iter); 2049 printbuf_exit(&buf); 2050 return ret; 2051 } 2052 2053 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, 2054 struct bkey_s_c k, 2055 struct bch_hash_info *hash_info, 2056 struct inode_walker *dir, 2057 struct inode_walker *target, 2058 struct snapshots_seen *s) 2059 { 2060 struct bch_fs *c = trans->c; 2061 struct inode_walker_entry *i; 2062 struct printbuf buf = PRINTBUF; 2063 int ret = 0; 2064 2065 ret = bch2_check_key_has_snapshot(trans, iter, k); 2066 if (ret) { 2067 ret = ret < 0 ? ret : 0; 2068 goto out; 2069 } 2070 2071 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 2072 if (ret) 2073 goto err; 2074 2075 if (k.k->type == KEY_TYPE_whiteout) 2076 goto out; 2077 2078 if (dir->last_pos.inode != k.k->p.inode) { 2079 ret = check_subdir_count(trans, dir); 2080 if (ret) 2081 goto err; 2082 } 2083 2084 i = walk_inode(trans, dir, k); 2085 ret = PTR_ERR_OR_ZERO(i); 2086 if (ret < 0) 2087 goto err; 2088 2089 ret = check_key_has_inode(trans, iter, dir, i, k); 2090 if (ret) 2091 goto err; 2092 2093 if (!i) 2094 goto out; 2095 2096 if (dir->first_this_inode) 2097 *hash_info = bch2_hash_info_init(c, &i->inode); 2098 dir->first_this_inode = false; 2099 2100 ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); 2101 if (ret < 0) 2102 goto err; 2103 if (ret) { 2104 /* dirent has been deleted */ 2105 ret = 0; 2106 goto out; 2107 } 2108 2109 if (k.k->type != KEY_TYPE_dirent) 2110 goto out; 2111 2112 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2113 2114 if (d.v->d_type == DT_SUBVOL) { 2115 ret = check_dirent_to_subvol(trans, iter, d); 2116 if (ret) 2117 goto err; 2118 } else { 2119 ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); 2120 if (ret) 2121 goto err; 2122 2123 if (fsck_err_on(!target->inodes.nr, 2124 trans, dirent_to_missing_inode, 2125 "dirent points to missing inode:\n%s", 2126 (printbuf_reset(&buf), 2127 bch2_bkey_val_to_text(&buf, c, k), 2128 buf.buf))) { 2129 ret = __remove_dirent(trans, d.k->p); 2130 if (ret) 2131 goto err; 2132 } 2133 2134 darray_for_each(target->inodes, i) { 2135 ret = check_dirent_target(trans, iter, d, 2136 &i->inode, i->snapshot); 2137 if (ret) 2138 goto err; 2139 } 2140 2141 if (d.v->d_type == DT_DIR) 2142 for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) 2143 i->count++; 2144 } 2145 out: 2146 err: 2147 fsck_err: 2148 printbuf_exit(&buf); 2149 bch_err_fn(c, ret); 2150 return ret; 2151 } 2152 2153 /* 2154 * Walk dirents: verify that they all have a corresponding S_ISDIR inode, 2155 * validate d_type 2156 */ 2157 int bch2_check_dirents(struct bch_fs *c) 2158 { 2159 struct inode_walker dir = inode_walker_init(); 2160 struct inode_walker target = inode_walker_init(); 2161 struct snapshots_seen s; 2162 struct bch_hash_info hash_info; 2163 2164 snapshots_seen_init(&s); 2165 2166 int ret = bch2_trans_run(c, 2167 for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, 2168 POS(BCACHEFS_ROOT_INO, 0), 2169 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2170 k, 2171 NULL, NULL, 2172 BCH_TRANS_COMMIT_no_enospc, 2173 check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: 2174 check_subdir_count_notnested(trans, &dir)); 2175 2176 snapshots_seen_exit(&s); 2177 inode_walker_exit(&dir); 2178 inode_walker_exit(&target); 2179 bch_err_fn(c, ret); 2180 return ret; 2181 } 2182 2183 static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, 2184 struct bkey_s_c k, 2185 struct bch_hash_info *hash_info, 2186 struct inode_walker *inode) 2187 { 2188 struct bch_fs *c = trans->c; 2189 struct inode_walker_entry *i; 2190 int ret; 2191 2192 ret = bch2_check_key_has_snapshot(trans, iter, k); 2193 if (ret < 0) 2194 return ret; 2195 if (ret) 2196 return 0; 2197 2198 i = walk_inode(trans, inode, k); 2199 ret = PTR_ERR_OR_ZERO(i); 2200 if (ret) 2201 return ret; 2202 2203 ret = check_key_has_inode(trans, iter, inode, i, k); 2204 if (ret) 2205 return ret; 2206 2207 if (!i) 2208 return 0; 2209 2210 if (inode->first_this_inode) 2211 *hash_info = bch2_hash_info_init(c, &i->inode); 2212 inode->first_this_inode = false; 2213 2214 ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); 2215 bch_err_fn(c, ret); 2216 return ret; 2217 } 2218 2219 /* 2220 * Walk xattrs: verify that they all have a corresponding inode 2221 */ 2222 int bch2_check_xattrs(struct bch_fs *c) 2223 { 2224 struct inode_walker inode = inode_walker_init(); 2225 struct bch_hash_info hash_info; 2226 int ret = 0; 2227 2228 ret = bch2_trans_run(c, 2229 for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, 2230 POS(BCACHEFS_ROOT_INO, 0), 2231 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2232 k, 2233 NULL, NULL, 2234 BCH_TRANS_COMMIT_no_enospc, 2235 check_xattr(trans, &iter, k, &hash_info, &inode))); 2236 2237 inode_walker_exit(&inode); 2238 bch_err_fn(c, ret); 2239 return ret; 2240 } 2241 2242 static int check_root_trans(struct btree_trans *trans) 2243 { 2244 struct bch_fs *c = trans->c; 2245 struct bch_inode_unpacked root_inode; 2246 u32 snapshot; 2247 u64 inum; 2248 int ret; 2249 2250 ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); 2251 if (ret && !bch2_err_matches(ret, ENOENT)) 2252 return ret; 2253 2254 if (mustfix_fsck_err_on(ret, trans, root_subvol_missing, 2255 "root subvol missing")) { 2256 struct bkey_i_subvolume *root_subvol = 2257 bch2_trans_kmalloc(trans, sizeof(*root_subvol)); 2258 ret = PTR_ERR_OR_ZERO(root_subvol); 2259 if (ret) 2260 goto err; 2261 2262 snapshot = U32_MAX; 2263 inum = BCACHEFS_ROOT_INO; 2264 2265 bkey_subvolume_init(&root_subvol->k_i); 2266 root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL; 2267 root_subvol->v.flags = 0; 2268 root_subvol->v.snapshot = cpu_to_le32(snapshot); 2269 root_subvol->v.inode = cpu_to_le64(inum); 2270 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0); 2271 bch_err_msg(c, ret, "writing root subvol"); 2272 if (ret) 2273 goto err; 2274 } 2275 2276 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); 2277 if (ret && !bch2_err_matches(ret, ENOENT)) 2278 return ret; 2279 2280 if (mustfix_fsck_err_on(ret, 2281 trans, root_dir_missing, 2282 "root directory missing") || 2283 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), 2284 trans, root_inode_not_dir, 2285 "root inode not a directory")) { 2286 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 2287 0, NULL); 2288 root_inode.bi_inum = inum; 2289 2290 ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); 2291 bch_err_msg(c, ret, "writing root inode"); 2292 } 2293 err: 2294 fsck_err: 2295 return ret; 2296 } 2297 2298 /* Get root directory, create if it doesn't exist: */ 2299 int bch2_check_root(struct bch_fs *c) 2300 { 2301 int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2302 check_root_trans(trans)); 2303 bch_err_fn(c, ret); 2304 return ret; 2305 } 2306 2307 typedef DARRAY(u32) darray_u32; 2308 2309 static bool darray_u32_has(darray_u32 *d, u32 v) 2310 { 2311 darray_for_each(*d, i) 2312 if (*i == v) 2313 return true; 2314 return false; 2315 } 2316 2317 /* 2318 * We've checked that inode backpointers point to valid dirents; here, it's 2319 * sufficient to check that the subvolume root has a dirent: 2320 */ 2321 static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s) 2322 { 2323 struct bch_inode_unpacked inode; 2324 int ret = bch2_inode_find_by_inum_trans(trans, 2325 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 2326 &inode); 2327 if (ret) 2328 return ret; 2329 2330 return inode.bi_dir != 0; 2331 } 2332 2333 static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) 2334 { 2335 struct bch_fs *c = trans->c; 2336 struct btree_iter parent_iter = {}; 2337 darray_u32 subvol_path = {}; 2338 struct printbuf buf = PRINTBUF; 2339 int ret = 0; 2340 2341 if (k.k->type != KEY_TYPE_subvolume) 2342 return 0; 2343 2344 while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { 2345 ret = darray_push(&subvol_path, k.k->p.offset); 2346 if (ret) 2347 goto err; 2348 2349 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 2350 2351 ret = subvol_has_dirent(trans, s); 2352 if (ret < 0) 2353 break; 2354 2355 if (fsck_err_on(!ret, 2356 trans, subvol_unreachable, 2357 "unreachable subvolume %s", 2358 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2359 buf.buf))) { 2360 ret = reattach_subvol(trans, s); 2361 break; 2362 } 2363 2364 u32 parent = le32_to_cpu(s.v->fs_path_parent); 2365 2366 if (darray_u32_has(&subvol_path, parent)) { 2367 if (fsck_err(c, subvol_loop, "subvolume loop")) 2368 ret = reattach_subvol(trans, s); 2369 break; 2370 } 2371 2372 bch2_trans_iter_exit(trans, &parent_iter); 2373 bch2_trans_iter_init(trans, &parent_iter, 2374 BTREE_ID_subvolumes, POS(0, parent), 0); 2375 k = bch2_btree_iter_peek_slot(&parent_iter); 2376 ret = bkey_err(k); 2377 if (ret) 2378 goto err; 2379 2380 if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, 2381 trans, subvol_unreachable, 2382 "unreachable subvolume %s", 2383 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2384 buf.buf))) { 2385 ret = reattach_subvol(trans, s); 2386 break; 2387 } 2388 } 2389 fsck_err: 2390 err: 2391 printbuf_exit(&buf); 2392 darray_exit(&subvol_path); 2393 bch2_trans_iter_exit(trans, &parent_iter); 2394 return ret; 2395 } 2396 2397 int bch2_check_subvolume_structure(struct bch_fs *c) 2398 { 2399 int ret = bch2_trans_run(c, 2400 for_each_btree_key_commit(trans, iter, 2401 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, 2402 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2403 check_subvol_path(trans, &iter, k))); 2404 bch_err_fn(c, ret); 2405 return ret; 2406 } 2407 2408 struct pathbuf_entry { 2409 u64 inum; 2410 u32 snapshot; 2411 }; 2412 2413 typedef DARRAY(struct pathbuf_entry) pathbuf; 2414 2415 static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) 2416 { 2417 darray_for_each(*p, i) 2418 if (i->inum == inum && 2419 i->snapshot == snapshot) 2420 return true; 2421 return false; 2422 } 2423 2424 /* 2425 * Check that a given inode is reachable from its subvolume root - we already 2426 * verified subvolume connectivity: 2427 * 2428 * XXX: we should also be verifying that inodes are in the right subvolumes 2429 */ 2430 static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) 2431 { 2432 struct bch_fs *c = trans->c; 2433 struct btree_iter inode_iter = {}; 2434 struct bch_inode_unpacked inode; 2435 struct printbuf buf = PRINTBUF; 2436 u32 snapshot = inode_k.k->p.snapshot; 2437 int ret = 0; 2438 2439 p->nr = 0; 2440 2441 BUG_ON(bch2_inode_unpack(inode_k, &inode)); 2442 2443 while (!inode.bi_subvol) { 2444 struct btree_iter dirent_iter; 2445 struct bkey_s_c_dirent d; 2446 u32 parent_snapshot = snapshot; 2447 2448 d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); 2449 ret = bkey_err(d.s_c); 2450 if (ret && !bch2_err_matches(ret, ENOENT)) 2451 break; 2452 2453 if (!ret && !dirent_points_to_inode(d, &inode)) { 2454 bch2_trans_iter_exit(trans, &dirent_iter); 2455 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 2456 } 2457 2458 if (bch2_err_matches(ret, ENOENT)) { 2459 ret = 0; 2460 if (fsck_err(trans, inode_unreachable, 2461 "unreachable inode\n%s", 2462 (printbuf_reset(&buf), 2463 bch2_bkey_val_to_text(&buf, c, inode_k), 2464 buf.buf))) 2465 ret = reattach_inode(trans, &inode, snapshot); 2466 goto out; 2467 } 2468 2469 bch2_trans_iter_exit(trans, &dirent_iter); 2470 2471 if (!S_ISDIR(inode.bi_mode)) 2472 break; 2473 2474 ret = darray_push(p, ((struct pathbuf_entry) { 2475 .inum = inode.bi_inum, 2476 .snapshot = snapshot, 2477 })); 2478 if (ret) 2479 return ret; 2480 2481 snapshot = parent_snapshot; 2482 2483 bch2_trans_iter_exit(trans, &inode_iter); 2484 inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, 2485 SPOS(0, inode.bi_dir, snapshot), 0); 2486 ret = bkey_err(inode_k) ?: 2487 !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode 2488 : bch2_inode_unpack(inode_k, &inode); 2489 if (ret) { 2490 /* Should have been caught in dirents pass */ 2491 bch_err_msg(c, ret, "error looking up parent directory"); 2492 break; 2493 } 2494 2495 snapshot = inode_k.k->p.snapshot; 2496 2497 if (path_is_dup(p, inode.bi_inum, snapshot)) { 2498 /* XXX print path */ 2499 bch_err(c, "directory structure loop"); 2500 2501 darray_for_each(*p, i) 2502 pr_err("%llu:%u", i->inum, i->snapshot); 2503 pr_err("%llu:%u", inode.bi_inum, snapshot); 2504 2505 if (fsck_err(trans, dir_loop, "directory structure loop")) { 2506 ret = remove_backpointer(trans, &inode); 2507 bch_err_msg(c, ret, "removing dirent"); 2508 if (ret) 2509 break; 2510 2511 ret = reattach_inode(trans, &inode, snapshot); 2512 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 2513 } 2514 break; 2515 } 2516 } 2517 out: 2518 fsck_err: 2519 bch2_trans_iter_exit(trans, &inode_iter); 2520 printbuf_exit(&buf); 2521 bch_err_fn(c, ret); 2522 return ret; 2523 } 2524 2525 /* 2526 * Check for unreachable inodes, as well as loops in the directory structure: 2527 * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's 2528 * unreachable: 2529 */ 2530 int bch2_check_directory_structure(struct bch_fs *c) 2531 { 2532 pathbuf path = { 0, }; 2533 int ret; 2534 2535 ret = bch2_trans_run(c, 2536 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, 2537 BTREE_ITER_intent| 2538 BTREE_ITER_prefetch| 2539 BTREE_ITER_all_snapshots, k, 2540 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 2541 if (!bkey_is_inode(k.k)) 2542 continue; 2543 2544 if (bch2_inode_flags(k) & BCH_INODE_unlinked) 2545 continue; 2546 2547 check_path(trans, &path, k); 2548 }))); 2549 darray_exit(&path); 2550 2551 bch_err_fn(c, ret); 2552 return ret; 2553 } 2554 2555 struct nlink_table { 2556 size_t nr; 2557 size_t size; 2558 2559 struct nlink { 2560 u64 inum; 2561 u32 snapshot; 2562 u32 count; 2563 } *d; 2564 }; 2565 2566 static int add_nlink(struct bch_fs *c, struct nlink_table *t, 2567 u64 inum, u32 snapshot) 2568 { 2569 if (t->nr == t->size) { 2570 size_t new_size = max_t(size_t, 128UL, t->size * 2); 2571 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); 2572 2573 if (!d) { 2574 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", 2575 new_size); 2576 return -BCH_ERR_ENOMEM_fsck_add_nlink; 2577 } 2578 2579 if (t->d) 2580 memcpy(d, t->d, t->size * sizeof(t->d[0])); 2581 kvfree(t->d); 2582 2583 t->d = d; 2584 t->size = new_size; 2585 } 2586 2587 2588 t->d[t->nr++] = (struct nlink) { 2589 .inum = inum, 2590 .snapshot = snapshot, 2591 }; 2592 2593 return 0; 2594 } 2595 2596 static int nlink_cmp(const void *_l, const void *_r) 2597 { 2598 const struct nlink *l = _l; 2599 const struct nlink *r = _r; 2600 2601 return cmp_int(l->inum, r->inum); 2602 } 2603 2604 static void inc_link(struct bch_fs *c, struct snapshots_seen *s, 2605 struct nlink_table *links, 2606 u64 range_start, u64 range_end, u64 inum, u32 snapshot) 2607 { 2608 struct nlink *link, key = { 2609 .inum = inum, .snapshot = U32_MAX, 2610 }; 2611 2612 if (inum < range_start || inum >= range_end) 2613 return; 2614 2615 link = __inline_bsearch(&key, links->d, links->nr, 2616 sizeof(links->d[0]), nlink_cmp); 2617 if (!link) 2618 return; 2619 2620 while (link > links->d && link[0].inum == link[-1].inum) 2621 --link; 2622 2623 for (; link < links->d + links->nr && link->inum == inum; link++) 2624 if (ref_visible(c, s, snapshot, link->snapshot)) { 2625 link->count++; 2626 if (link->snapshot >= snapshot) 2627 break; 2628 } 2629 } 2630 2631 noinline_for_stack 2632 static int check_nlinks_find_hardlinks(struct bch_fs *c, 2633 struct nlink_table *t, 2634 u64 start, u64 *end) 2635 { 2636 int ret = bch2_trans_run(c, 2637 for_each_btree_key(trans, iter, BTREE_ID_inodes, 2638 POS(0, start), 2639 BTREE_ITER_intent| 2640 BTREE_ITER_prefetch| 2641 BTREE_ITER_all_snapshots, k, ({ 2642 if (!bkey_is_inode(k.k)) 2643 continue; 2644 2645 /* Should never fail, checked by bch2_inode_invalid: */ 2646 struct bch_inode_unpacked u; 2647 BUG_ON(bch2_inode_unpack(k, &u)); 2648 2649 /* 2650 * Backpointer and directory structure checks are sufficient for 2651 * directories, since they can't have hardlinks: 2652 */ 2653 if (S_ISDIR(u.bi_mode)) 2654 continue; 2655 2656 if (!u.bi_nlink) 2657 continue; 2658 2659 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); 2660 if (ret) { 2661 *end = k.k->p.offset; 2662 ret = 0; 2663 break; 2664 } 2665 0; 2666 }))); 2667 2668 bch_err_fn(c, ret); 2669 return ret; 2670 } 2671 2672 noinline_for_stack 2673 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, 2674 u64 range_start, u64 range_end) 2675 { 2676 struct snapshots_seen s; 2677 2678 snapshots_seen_init(&s); 2679 2680 int ret = bch2_trans_run(c, 2681 for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, 2682 BTREE_ITER_intent| 2683 BTREE_ITER_prefetch| 2684 BTREE_ITER_all_snapshots, k, ({ 2685 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); 2686 if (ret) 2687 break; 2688 2689 if (k.k->type == KEY_TYPE_dirent) { 2690 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2691 2692 if (d.v->d_type != DT_DIR && 2693 d.v->d_type != DT_SUBVOL) 2694 inc_link(c, &s, links, range_start, range_end, 2695 le64_to_cpu(d.v->d_inum), d.k->p.snapshot); 2696 } 2697 0; 2698 }))); 2699 2700 snapshots_seen_exit(&s); 2701 2702 bch_err_fn(c, ret); 2703 return ret; 2704 } 2705 2706 static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, 2707 struct bkey_s_c k, 2708 struct nlink_table *links, 2709 size_t *idx, u64 range_end) 2710 { 2711 struct bch_inode_unpacked u; 2712 struct nlink *link = &links->d[*idx]; 2713 int ret = 0; 2714 2715 if (k.k->p.offset >= range_end) 2716 return 1; 2717 2718 if (!bkey_is_inode(k.k)) 2719 return 0; 2720 2721 BUG_ON(bch2_inode_unpack(k, &u)); 2722 2723 if (S_ISDIR(u.bi_mode)) 2724 return 0; 2725 2726 if (!u.bi_nlink) 2727 return 0; 2728 2729 while ((cmp_int(link->inum, k.k->p.offset) ?: 2730 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { 2731 BUG_ON(*idx == links->nr); 2732 link = &links->d[++*idx]; 2733 } 2734 2735 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, 2736 trans, inode_wrong_nlink, 2737 "inode %llu type %s has wrong i_nlink (%u, should be %u)", 2738 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], 2739 bch2_inode_nlink_get(&u), link->count)) { 2740 bch2_inode_nlink_set(&u, link->count); 2741 ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); 2742 } 2743 fsck_err: 2744 return ret; 2745 } 2746 2747 noinline_for_stack 2748 static int check_nlinks_update_hardlinks(struct bch_fs *c, 2749 struct nlink_table *links, 2750 u64 range_start, u64 range_end) 2751 { 2752 size_t idx = 0; 2753 2754 int ret = bch2_trans_run(c, 2755 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 2756 POS(0, range_start), 2757 BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 2758 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2759 check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); 2760 if (ret < 0) { 2761 bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); 2762 return ret; 2763 } 2764 2765 return 0; 2766 } 2767 2768 int bch2_check_nlinks(struct bch_fs *c) 2769 { 2770 struct nlink_table links = { 0 }; 2771 u64 this_iter_range_start, next_iter_range_start = 0; 2772 int ret = 0; 2773 2774 do { 2775 this_iter_range_start = next_iter_range_start; 2776 next_iter_range_start = U64_MAX; 2777 2778 ret = check_nlinks_find_hardlinks(c, &links, 2779 this_iter_range_start, 2780 &next_iter_range_start); 2781 2782 ret = check_nlinks_walk_dirents(c, &links, 2783 this_iter_range_start, 2784 next_iter_range_start); 2785 if (ret) 2786 break; 2787 2788 ret = check_nlinks_update_hardlinks(c, &links, 2789 this_iter_range_start, 2790 next_iter_range_start); 2791 if (ret) 2792 break; 2793 2794 links.nr = 0; 2795 } while (next_iter_range_start != U64_MAX); 2796 2797 kvfree(links.d); 2798 bch_err_fn(c, ret); 2799 return ret; 2800 } 2801 2802 static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, 2803 struct bkey_s_c k) 2804 { 2805 struct bkey_s_c_reflink_p p; 2806 struct bkey_i_reflink_p *u; 2807 2808 if (k.k->type != KEY_TYPE_reflink_p) 2809 return 0; 2810 2811 p = bkey_s_c_to_reflink_p(k); 2812 2813 if (!p.v->front_pad && !p.v->back_pad) 2814 return 0; 2815 2816 u = bch2_trans_kmalloc(trans, sizeof(*u)); 2817 int ret = PTR_ERR_OR_ZERO(u); 2818 if (ret) 2819 return ret; 2820 2821 bkey_reassemble(&u->k_i, k); 2822 u->v.front_pad = 0; 2823 u->v.back_pad = 0; 2824 2825 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_norun); 2826 } 2827 2828 int bch2_fix_reflink_p(struct bch_fs *c) 2829 { 2830 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) 2831 return 0; 2832 2833 int ret = bch2_trans_run(c, 2834 for_each_btree_key_commit(trans, iter, 2835 BTREE_ID_extents, POS_MIN, 2836 BTREE_ITER_intent|BTREE_ITER_prefetch| 2837 BTREE_ITER_all_snapshots, k, 2838 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2839 fix_reflink_p_key(trans, &iter, k))); 2840 bch_err_fn(c, ret); 2841 return ret; 2842 } 2843