1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bkey_buf.h" 5 #include "btree_cache.h" 6 #include "btree_update.h" 7 #include "buckets.h" 8 #include "darray.h" 9 #include "dirent.h" 10 #include "error.h" 11 #include "fs-common.h" 12 #include "fsck.h" 13 #include "inode.h" 14 #include "keylist.h" 15 #include "recovery_passes.h" 16 #include "snapshot.h" 17 #include "super.h" 18 #include "xattr.h" 19 20 #include <linux/bsearch.h> 21 #include <linux/dcache.h> /* struct qstr */ 22 23 /* 24 * XXX: this is handling transaction restarts without returning 25 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: 26 */ 27 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, 28 u32 snapshot) 29 { 30 u64 sectors = 0; 31 32 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, 33 SPOS(inum, 0, snapshot), 34 POS(inum, U64_MAX), 35 0, k, ({ 36 if (bkey_extent_is_allocation(k.k)) 37 sectors += k.k->size; 38 0; 39 })); 40 41 return ret ?: sectors; 42 } 43 44 static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, 45 u32 snapshot) 46 { 47 u64 subdirs = 0; 48 49 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, 50 SPOS(inum, 0, snapshot), 51 POS(inum, U64_MAX), 52 0, k, ({ 53 if (k.k->type == KEY_TYPE_dirent && 54 bkey_s_c_to_dirent(k).v->d_type == DT_DIR) 55 subdirs++; 56 0; 57 })); 58 59 return ret ?: subdirs; 60 } 61 62 static int subvol_lookup(struct btree_trans *trans, u32 subvol, 63 u32 *snapshot, u64 *inum) 64 { 65 struct bch_subvolume s; 66 int ret = bch2_subvolume_get(trans, subvol, false, 0, &s); 67 68 *snapshot = le32_to_cpu(s.snapshot); 69 *inum = le64_to_cpu(s.inode); 70 return ret; 71 } 72 73 static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, 74 struct bch_inode_unpacked *inode) 75 { 76 struct btree_iter iter; 77 struct bkey_s_c k; 78 int ret; 79 80 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr), 81 BTREE_ITER_all_snapshots, k, ret) { 82 if (k.k->p.offset != inode_nr) 83 break; 84 if (!bkey_is_inode(k.k)) 85 continue; 86 ret = bch2_inode_unpack(k, inode); 87 goto found; 88 } 89 ret = -BCH_ERR_ENOENT_inode; 90 found: 91 bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); 92 bch2_trans_iter_exit(trans, &iter); 93 return ret; 94 } 95 96 static int lookup_inode(struct btree_trans *trans, u64 inode_nr, 97 struct bch_inode_unpacked *inode, 98 u32 *snapshot) 99 { 100 struct btree_iter iter; 101 struct bkey_s_c k; 102 int ret; 103 104 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 105 SPOS(0, inode_nr, *snapshot), 0); 106 ret = bkey_err(k); 107 if (ret) 108 goto err; 109 110 ret = bkey_is_inode(k.k) 111 ? bch2_inode_unpack(k, inode) 112 : -BCH_ERR_ENOENT_inode; 113 if (!ret) 114 *snapshot = iter.pos.snapshot; 115 err: 116 bch2_trans_iter_exit(trans, &iter); 117 return ret; 118 } 119 120 static int lookup_dirent_in_snapshot(struct btree_trans *trans, 121 struct bch_hash_info hash_info, 122 subvol_inum dir, struct qstr *name, 123 u64 *target, unsigned *type, u32 snapshot) 124 { 125 struct btree_iter iter; 126 struct bkey_s_c k = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, 127 &hash_info, dir, name, 0, snapshot); 128 int ret = bkey_err(k); 129 if (ret) 130 return ret; 131 132 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); 133 *target = le64_to_cpu(d.v->d_inum); 134 *type = d.v->d_type; 135 bch2_trans_iter_exit(trans, &iter); 136 return 0; 137 } 138 139 static int __remove_dirent(struct btree_trans *trans, struct bpos pos) 140 { 141 struct bch_fs *c = trans->c; 142 struct btree_iter iter; 143 struct bch_inode_unpacked dir_inode; 144 struct bch_hash_info dir_hash_info; 145 int ret; 146 147 ret = lookup_first_inode(trans, pos.inode, &dir_inode); 148 if (ret) 149 goto err; 150 151 dir_hash_info = bch2_hash_info_init(c, &dir_inode); 152 153 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent); 154 155 ret = bch2_btree_iter_traverse(&iter) ?: 156 bch2_hash_delete_at(trans, bch2_dirent_hash_desc, 157 &dir_hash_info, &iter, 158 BTREE_UPDATE_internal_snapshot_node); 159 bch2_trans_iter_exit(trans, &iter); 160 err: 161 bch_err_fn(c, ret); 162 return ret; 163 } 164 165 /* Get lost+found, create if it doesn't exist: */ 166 static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, 167 struct bch_inode_unpacked *lostfound, 168 u64 reattaching_inum) 169 { 170 struct bch_fs *c = trans->c; 171 struct qstr lostfound_str = QSTR("lost+found"); 172 u64 inum = 0; 173 unsigned d_type = 0; 174 int ret; 175 176 struct bch_snapshot_tree st; 177 ret = bch2_snapshot_tree_lookup(trans, 178 bch2_snapshot_tree(c, snapshot), &st); 179 if (ret) 180 return ret; 181 182 subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; 183 184 struct bch_subvolume subvol; 185 ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), 186 false, 0, &subvol); 187 bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u", 188 le32_to_cpu(st.master_subvol), snapshot); 189 if (ret) 190 return ret; 191 192 if (!subvol.inode) { 193 struct btree_iter iter; 194 struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, 195 BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)), 196 0, subvolume); 197 ret = PTR_ERR_OR_ZERO(subvol); 198 if (ret) 199 return ret; 200 201 subvol->v.inode = cpu_to_le64(reattaching_inum); 202 bch2_trans_iter_exit(trans, &iter); 203 } 204 205 root_inum.inum = le64_to_cpu(subvol.inode); 206 207 struct bch_inode_unpacked root_inode; 208 struct bch_hash_info root_hash_info; 209 u32 root_inode_snapshot = snapshot; 210 ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); 211 bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", 212 root_inum.inum, le32_to_cpu(st.master_subvol)); 213 if (ret) 214 return ret; 215 216 root_hash_info = bch2_hash_info_init(c, &root_inode); 217 218 ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum, 219 &lostfound_str, &inum, &d_type, snapshot); 220 if (bch2_err_matches(ret, ENOENT)) 221 goto create_lostfound; 222 223 bch_err_fn(c, ret); 224 if (ret) 225 return ret; 226 227 if (d_type != DT_DIR) { 228 bch_err(c, "error looking up lost+found: not a directory"); 229 return -BCH_ERR_ENOENT_not_directory; 230 } 231 232 /* 233 * The bch2_check_dirents pass has already run, dangling dirents 234 * shouldn't exist here: 235 */ 236 ret = lookup_inode(trans, inum, lostfound, &snapshot); 237 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", 238 inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); 239 return ret; 240 241 create_lostfound: 242 /* 243 * XXX: we could have a nicer log message here if we had a nice way to 244 * walk backpointers to print a path 245 */ 246 bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); 247 248 u64 now = bch2_current_time(c); 249 struct btree_iter lostfound_iter = { NULL }; 250 u64 cpu = raw_smp_processor_id(); 251 252 bch2_inode_init_early(c, lostfound); 253 bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); 254 lostfound->bi_dir = root_inode.bi_inum; 255 256 root_inode.bi_nlink++; 257 258 ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); 259 if (ret) 260 goto err; 261 262 bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); 263 ret = bch2_btree_iter_traverse(&lostfound_iter); 264 if (ret) 265 goto err; 266 267 ret = bch2_dirent_create_snapshot(trans, 268 0, root_inode.bi_inum, snapshot, &root_hash_info, 269 mode_to_type(lostfound->bi_mode), 270 &lostfound_str, 271 lostfound->bi_inum, 272 &lostfound->bi_dir_offset, 273 STR_HASH_must_create) ?: 274 bch2_inode_write_flags(trans, &lostfound_iter, lostfound, 275 BTREE_UPDATE_internal_snapshot_node); 276 err: 277 bch_err_msg(c, ret, "creating lost+found"); 278 bch2_trans_iter_exit(trans, &lostfound_iter); 279 return ret; 280 } 281 282 static int reattach_inode(struct btree_trans *trans, 283 struct bch_inode_unpacked *inode, 284 u32 inode_snapshot) 285 { 286 struct bch_hash_info dir_hash; 287 struct bch_inode_unpacked lostfound; 288 char name_buf[20]; 289 struct qstr name; 290 u64 dir_offset = 0; 291 u32 dirent_snapshot = inode_snapshot; 292 int ret; 293 294 if (inode->bi_subvol) { 295 inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; 296 297 u64 root_inum; 298 ret = subvol_lookup(trans, inode->bi_parent_subvol, 299 &dirent_snapshot, &root_inum); 300 if (ret) 301 return ret; 302 303 snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol); 304 } else { 305 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); 306 } 307 308 ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum); 309 if (ret) 310 return ret; 311 312 if (S_ISDIR(inode->bi_mode)) { 313 lostfound.bi_nlink++; 314 315 ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); 316 if (ret) 317 return ret; 318 } 319 320 dir_hash = bch2_hash_info_init(trans->c, &lostfound); 321 322 name = (struct qstr) QSTR(name_buf); 323 324 ret = bch2_dirent_create_snapshot(trans, 325 inode->bi_parent_subvol, lostfound.bi_inum, 326 dirent_snapshot, 327 &dir_hash, 328 inode_d_type(inode), 329 &name, 330 inode->bi_subvol ?: inode->bi_inum, 331 &dir_offset, 332 STR_HASH_must_create); 333 if (ret) 334 return ret; 335 336 inode->bi_dir = lostfound.bi_inum; 337 inode->bi_dir_offset = dir_offset; 338 339 return __bch2_fsck_write_inode(trans, inode, inode_snapshot); 340 } 341 342 static int remove_backpointer(struct btree_trans *trans, 343 struct bch_inode_unpacked *inode) 344 { 345 struct btree_iter iter; 346 struct bkey_s_c_dirent d; 347 int ret; 348 349 d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, 350 POS(inode->bi_dir, inode->bi_dir_offset), 0, 351 dirent); 352 ret = bkey_err(d) ?: 353 __remove_dirent(trans, d.k->p); 354 bch2_trans_iter_exit(trans, &iter); 355 return ret; 356 } 357 358 static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s) 359 { 360 struct bch_fs *c = trans->c; 361 362 struct bch_inode_unpacked inode; 363 int ret = bch2_inode_find_by_inum_trans(trans, 364 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 365 &inode); 366 if (ret) 367 return ret; 368 369 ret = remove_backpointer(trans, &inode); 370 bch_err_msg(c, ret, "removing dirent"); 371 if (ret) 372 return ret; 373 374 ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); 375 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 376 return ret; 377 } 378 379 static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum) 380 { 381 struct bch_fs *c = trans->c; 382 383 if (!bch2_snapshot_is_leaf(c, snapshotid)) { 384 bch_err(c, "need to reconstruct subvol, but have interior node snapshot"); 385 return -BCH_ERR_fsck_repair_unimplemented; 386 } 387 388 /* 389 * If inum isn't set, that means we're being called from check_dirents, 390 * not check_inodes - the root of this subvolume doesn't exist or we 391 * would have found it there: 392 */ 393 if (!inum) { 394 struct btree_iter inode_iter = {}; 395 struct bch_inode_unpacked new_inode; 396 u64 cpu = raw_smp_processor_id(); 397 398 bch2_inode_init_early(c, &new_inode); 399 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL); 400 401 new_inode.bi_subvol = subvolid; 402 403 int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: 404 bch2_btree_iter_traverse(&inode_iter) ?: 405 bch2_inode_write(trans, &inode_iter, &new_inode); 406 bch2_trans_iter_exit(trans, &inode_iter); 407 if (ret) 408 return ret; 409 410 inum = new_inode.bi_inum; 411 } 412 413 bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum); 414 415 struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); 416 int ret = PTR_ERR_OR_ZERO(new_subvol); 417 if (ret) 418 return ret; 419 420 bkey_subvolume_init(&new_subvol->k_i); 421 new_subvol->k.p.offset = subvolid; 422 new_subvol->v.snapshot = cpu_to_le32(snapshotid); 423 new_subvol->v.inode = cpu_to_le64(inum); 424 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0); 425 if (ret) 426 return ret; 427 428 struct btree_iter iter; 429 struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter, 430 BTREE_ID_snapshots, POS(0, snapshotid), 431 0, snapshot); 432 ret = PTR_ERR_OR_ZERO(s); 433 bch_err_msg(c, ret, "getting snapshot %u", snapshotid); 434 if (ret) 435 return ret; 436 437 u32 snapshot_tree = le32_to_cpu(s->v.tree); 438 439 s->v.subvol = cpu_to_le32(subvolid); 440 SET_BCH_SNAPSHOT_SUBVOL(&s->v, true); 441 bch2_trans_iter_exit(trans, &iter); 442 443 struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter, 444 BTREE_ID_snapshot_trees, POS(0, snapshot_tree), 445 0, snapshot_tree); 446 ret = PTR_ERR_OR_ZERO(st); 447 bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree); 448 if (ret) 449 return ret; 450 451 if (!st->v.master_subvol) 452 st->v.master_subvol = cpu_to_le32(subvolid); 453 454 bch2_trans_iter_exit(trans, &iter); 455 return 0; 456 } 457 458 static int reconstruct_inode(struct btree_trans *trans, u32 snapshot, u64 inum, u64 size, unsigned mode) 459 { 460 struct bch_fs *c = trans->c; 461 struct bch_inode_unpacked new_inode; 462 463 bch2_inode_init_early(c, &new_inode); 464 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, mode|0755, 0, NULL); 465 new_inode.bi_size = size; 466 new_inode.bi_inum = inum; 467 468 return __bch2_fsck_write_inode(trans, &new_inode, snapshot); 469 } 470 471 static int reconstruct_reg_inode(struct btree_trans *trans, u32 snapshot, u64 inum) 472 { 473 struct btree_iter iter = {}; 474 475 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); 476 struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter); 477 bch2_trans_iter_exit(trans, &iter); 478 int ret = bkey_err(k); 479 if (ret) 480 return ret; 481 482 return reconstruct_inode(trans, snapshot, inum, k.k->p.offset << 9, S_IFREG); 483 } 484 485 struct snapshots_seen { 486 struct bpos pos; 487 snapshot_id_list ids; 488 }; 489 490 static inline void snapshots_seen_exit(struct snapshots_seen *s) 491 { 492 darray_exit(&s->ids); 493 } 494 495 static inline void snapshots_seen_init(struct snapshots_seen *s) 496 { 497 memset(s, 0, sizeof(*s)); 498 } 499 500 static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) 501 { 502 u32 *i; 503 __darray_for_each(s->ids, i) { 504 if (*i == id) 505 return 0; 506 if (*i > id) 507 break; 508 } 509 510 int ret = darray_insert_item(&s->ids, i - s->ids.data, id); 511 if (ret) 512 bch_err(c, "error reallocating snapshots_seen table (size %zu)", 513 s->ids.size); 514 return ret; 515 } 516 517 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, 518 enum btree_id btree_id, struct bpos pos) 519 { 520 if (!bkey_eq(s->pos, pos)) 521 s->ids.nr = 0; 522 s->pos = pos; 523 524 return snapshot_list_add_nodup(c, &s->ids, pos.snapshot); 525 } 526 527 /** 528 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, 529 * and @ancestor hasn't been overwritten in @seen 530 * 531 * @c: filesystem handle 532 * @seen: list of snapshot ids already seen at current position 533 * @id: descendent snapshot id 534 * @ancestor: ancestor snapshot id 535 * 536 * Returns: whether key in @ancestor snapshot is visible in @id snapshot 537 */ 538 static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, 539 u32 id, u32 ancestor) 540 { 541 ssize_t i; 542 543 EBUG_ON(id > ancestor); 544 545 /* @ancestor should be the snapshot most recently added to @seen */ 546 EBUG_ON(ancestor != seen->pos.snapshot); 547 EBUG_ON(ancestor != darray_last(seen->ids)); 548 549 if (id == ancestor) 550 return true; 551 552 if (!bch2_snapshot_is_ancestor(c, id, ancestor)) 553 return false; 554 555 /* 556 * We know that @id is a descendant of @ancestor, we're checking if 557 * we've seen a key that overwrote @ancestor - i.e. also a descendent of 558 * @ascestor and with @id as a descendent. 559 * 560 * But we already know that we're scanning IDs between @id and @ancestor 561 * numerically, since snapshot ID lists are kept sorted, so if we find 562 * an id that's an ancestor of @id we're done: 563 */ 564 565 for (i = seen->ids.nr - 2; 566 i >= 0 && seen->ids.data[i] >= id; 567 --i) 568 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i])) 569 return false; 570 571 return true; 572 } 573 574 /** 575 * ref_visible - given a key with snapshot id @src that points to a key with 576 * snapshot id @dst, test whether there is some snapshot in which @dst is 577 * visible. 578 * 579 * @c: filesystem handle 580 * @s: list of snapshot IDs already seen at @src 581 * @src: snapshot ID of src key 582 * @dst: snapshot ID of dst key 583 * Returns: true if there is some snapshot in which @dst is visible 584 * 585 * Assumes we're visiting @src keys in natural key order 586 */ 587 static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, 588 u32 src, u32 dst) 589 { 590 return dst <= src 591 ? key_visible_in_snapshot(c, s, dst, src) 592 : bch2_snapshot_is_ancestor(c, src, dst); 593 } 594 595 static int ref_visible2(struct bch_fs *c, 596 u32 src, struct snapshots_seen *src_seen, 597 u32 dst, struct snapshots_seen *dst_seen) 598 { 599 if (dst > src) { 600 swap(dst, src); 601 swap(dst_seen, src_seen); 602 } 603 return key_visible_in_snapshot(c, src_seen, dst, src); 604 } 605 606 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ 607 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ 608 (_i)->snapshot <= (_snapshot); _i++) \ 609 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) 610 611 struct inode_walker_entry { 612 struct bch_inode_unpacked inode; 613 u32 snapshot; 614 bool seen_this_pos; 615 u64 count; 616 }; 617 618 struct inode_walker { 619 bool first_this_inode; 620 bool recalculate_sums; 621 struct bpos last_pos; 622 623 DARRAY(struct inode_walker_entry) inodes; 624 }; 625 626 static void inode_walker_exit(struct inode_walker *w) 627 { 628 darray_exit(&w->inodes); 629 } 630 631 static struct inode_walker inode_walker_init(void) 632 { 633 return (struct inode_walker) { 0, }; 634 } 635 636 static int add_inode(struct bch_fs *c, struct inode_walker *w, 637 struct bkey_s_c inode) 638 { 639 struct bch_inode_unpacked u; 640 641 BUG_ON(bch2_inode_unpack(inode, &u)); 642 643 return darray_push(&w->inodes, ((struct inode_walker_entry) { 644 .inode = u, 645 .snapshot = inode.k->p.snapshot, 646 })); 647 } 648 649 static int get_inodes_all_snapshots(struct btree_trans *trans, 650 struct inode_walker *w, u64 inum) 651 { 652 struct bch_fs *c = trans->c; 653 struct btree_iter iter; 654 struct bkey_s_c k; 655 int ret; 656 657 w->recalculate_sums = false; 658 w->inodes.nr = 0; 659 660 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 661 BTREE_ITER_all_snapshots, k, ret) { 662 if (k.k->p.offset != inum) 663 break; 664 665 if (bkey_is_inode(k.k)) 666 add_inode(c, w, k); 667 } 668 bch2_trans_iter_exit(trans, &iter); 669 670 if (ret) 671 return ret; 672 673 w->first_this_inode = true; 674 return 0; 675 } 676 677 static struct inode_walker_entry * 678 lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k) 679 { 680 bool is_whiteout = k.k->type == KEY_TYPE_whiteout; 681 682 struct inode_walker_entry *i; 683 __darray_for_each(w->inodes, i) 684 if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot)) 685 goto found; 686 687 return NULL; 688 found: 689 BUG_ON(k.k->p.snapshot > i->snapshot); 690 691 if (k.k->p.snapshot != i->snapshot && !is_whiteout) { 692 struct inode_walker_entry new = *i; 693 694 new.snapshot = k.k->p.snapshot; 695 new.count = 0; 696 697 struct printbuf buf = PRINTBUF; 698 bch2_bkey_val_to_text(&buf, c, k); 699 700 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" 701 "unexpected because we should always update the inode when we update a key in that inode\n" 702 "%s", 703 w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf); 704 printbuf_exit(&buf); 705 706 while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot) 707 --i; 708 709 size_t pos = i - w->inodes.data; 710 int ret = darray_insert_item(&w->inodes, pos, new); 711 if (ret) 712 return ERR_PTR(ret); 713 714 i = w->inodes.data + pos; 715 } 716 717 return i; 718 } 719 720 static struct inode_walker_entry *walk_inode(struct btree_trans *trans, 721 struct inode_walker *w, 722 struct bkey_s_c k) 723 { 724 if (w->last_pos.inode != k.k->p.inode) { 725 int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); 726 if (ret) 727 return ERR_PTR(ret); 728 } else if (bkey_cmp(w->last_pos, k.k->p)) { 729 darray_for_each(w->inodes, i) 730 i->seen_this_pos = false; 731 } 732 733 w->last_pos = k.k->p; 734 735 return lookup_inode_for_snapshot(trans->c, w, k); 736 } 737 738 static int get_visible_inodes(struct btree_trans *trans, 739 struct inode_walker *w, 740 struct snapshots_seen *s, 741 u64 inum) 742 { 743 struct bch_fs *c = trans->c; 744 struct btree_iter iter; 745 struct bkey_s_c k; 746 int ret; 747 748 w->inodes.nr = 0; 749 750 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 751 BTREE_ITER_all_snapshots, k, ret) { 752 if (k.k->p.offset != inum) 753 break; 754 755 if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) 756 continue; 757 758 if (bkey_is_inode(k.k)) 759 add_inode(c, w, k); 760 761 if (k.k->p.snapshot >= s->pos.snapshot) 762 break; 763 } 764 bch2_trans_iter_exit(trans, &iter); 765 766 return ret; 767 } 768 769 static int hash_redo_key(struct btree_trans *trans, 770 const struct bch_hash_desc desc, 771 struct bch_hash_info *hash_info, 772 struct btree_iter *k_iter, struct bkey_s_c k) 773 { 774 struct bkey_i *delete; 775 struct bkey_i *tmp; 776 777 delete = bch2_trans_kmalloc(trans, sizeof(*delete)); 778 if (IS_ERR(delete)) 779 return PTR_ERR(delete); 780 781 tmp = bch2_bkey_make_mut_noupdate(trans, k); 782 if (IS_ERR(tmp)) 783 return PTR_ERR(tmp); 784 785 bkey_init(&delete->k); 786 delete->k.p = k_iter->pos; 787 return bch2_btree_iter_traverse(k_iter) ?: 788 bch2_trans_update(trans, k_iter, delete, 0) ?: 789 bch2_hash_set_in_snapshot(trans, desc, hash_info, 790 (subvol_inum) { 0, k.k->p.inode }, 791 k.k->p.snapshot, tmp, 792 STR_HASH_must_create| 793 BTREE_UPDATE_internal_snapshot_node) ?: 794 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 795 } 796 797 static int hash_check_key(struct btree_trans *trans, 798 const struct bch_hash_desc desc, 799 struct bch_hash_info *hash_info, 800 struct btree_iter *k_iter, struct bkey_s_c hash_k) 801 { 802 struct bch_fs *c = trans->c; 803 struct btree_iter iter = { NULL }; 804 struct printbuf buf = PRINTBUF; 805 struct bkey_s_c k; 806 u64 hash; 807 int ret = 0; 808 809 if (hash_k.k->type != desc.key_type) 810 return 0; 811 812 hash = desc.hash_bkey(hash_info, hash_k); 813 814 if (likely(hash == hash_k.k->p.offset)) 815 return 0; 816 817 if (hash_k.k->p.offset < hash) 818 goto bad_hash; 819 820 for_each_btree_key_norestart(trans, iter, desc.btree_id, 821 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), 822 BTREE_ITER_slots, k, ret) { 823 if (bkey_eq(k.k->p, hash_k.k->p)) 824 break; 825 826 if (fsck_err_on(k.k->type == desc.key_type && 827 !desc.cmp_bkey(k, hash_k), c, 828 hash_table_key_duplicate, 829 "duplicate hash table keys:\n%s", 830 (printbuf_reset(&buf), 831 bch2_bkey_val_to_text(&buf, c, hash_k), 832 buf.buf))) { 833 ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; 834 break; 835 } 836 837 if (bkey_deleted(k.k)) { 838 bch2_trans_iter_exit(trans, &iter); 839 goto bad_hash; 840 } 841 } 842 out: 843 bch2_trans_iter_exit(trans, &iter); 844 printbuf_exit(&buf); 845 return ret; 846 bad_hash: 847 if (fsck_err(c, hash_table_key_wrong_offset, 848 "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", 849 bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, 850 (printbuf_reset(&buf), 851 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { 852 ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); 853 bch_err_fn(c, ret); 854 if (ret) 855 return ret; 856 ret = -BCH_ERR_transaction_restart_nested; 857 } 858 fsck_err: 859 goto out; 860 } 861 862 static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, 863 struct btree_iter *iter, 864 struct bpos pos) 865 { 866 return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); 867 } 868 869 static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, 870 struct btree_iter *iter, 871 struct bch_inode_unpacked *inode, 872 u32 *snapshot) 873 { 874 if (inode->bi_subvol) { 875 u64 inum; 876 int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum); 877 if (ret) 878 return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) }); 879 } 880 881 return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); 882 } 883 884 static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, 885 struct bkey_s_c_dirent d) 886 { 887 return inode->bi_dir == d.k->p.inode && 888 inode->bi_dir_offset == d.k->p.offset; 889 } 890 891 static bool dirent_points_to_inode(struct bkey_s_c_dirent d, 892 struct bch_inode_unpacked *inode) 893 { 894 return d.v->d_type == DT_SUBVOL 895 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol 896 : le64_to_cpu(d.v->d_inum) == inode->bi_inum; 897 } 898 899 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) 900 { 901 struct btree_iter iter; 902 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0); 903 int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set; 904 bch2_trans_iter_exit(trans, &iter); 905 return ret; 906 } 907 908 static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k, 909 struct bch_inode_unpacked *inode, 910 u32 inode_snapshot, bool *write_inode) 911 { 912 struct bch_fs *c = trans->c; 913 struct printbuf buf = PRINTBUF; 914 915 struct btree_iter dirent_iter = {}; 916 struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); 917 int ret = bkey_err(d); 918 if (ret && !bch2_err_matches(ret, ENOENT)) 919 return ret; 920 921 if (fsck_err_on(ret, 922 c, inode_points_to_missing_dirent, 923 "inode points to missing dirent\n%s", 924 (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) || 925 fsck_err_on(!ret && !dirent_points_to_inode(d, inode), 926 c, inode_points_to_wrong_dirent, 927 "inode points to dirent that does not point back:\n%s", 928 (bch2_bkey_val_to_text(&buf, c, inode_k), 929 prt_newline(&buf), 930 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 931 /* 932 * We just clear the backpointer fields for now. If we find a 933 * dirent that points to this inode in check_dirents(), we'll 934 * update it then; then when we get to check_path() if the 935 * backpointer is still 0 we'll reattach it. 936 */ 937 inode->bi_dir = 0; 938 inode->bi_dir_offset = 0; 939 inode->bi_flags &= ~BCH_INODE_backptr_untrusted; 940 *write_inode = true; 941 } 942 943 ret = 0; 944 fsck_err: 945 bch2_trans_iter_exit(trans, &dirent_iter); 946 printbuf_exit(&buf); 947 bch_err_fn(c, ret); 948 return ret; 949 } 950 951 static int check_inode(struct btree_trans *trans, 952 struct btree_iter *iter, 953 struct bkey_s_c k, 954 struct bch_inode_unpacked *prev, 955 struct snapshots_seen *s, 956 bool full) 957 { 958 struct bch_fs *c = trans->c; 959 struct bch_inode_unpacked u; 960 bool do_update = false; 961 int ret; 962 963 ret = bch2_check_key_has_snapshot(trans, iter, k); 964 if (ret < 0) 965 goto err; 966 if (ret) 967 return 0; 968 969 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 970 if (ret) 971 goto err; 972 973 if (!bkey_is_inode(k.k)) 974 return 0; 975 976 BUG_ON(bch2_inode_unpack(k, &u)); 977 978 if (!full && 979 !(u.bi_flags & (BCH_INODE_i_size_dirty| 980 BCH_INODE_i_sectors_dirty| 981 BCH_INODE_unlinked))) 982 return 0; 983 984 if (prev->bi_inum != u.bi_inum) 985 *prev = u; 986 987 if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || 988 inode_d_type(prev) != inode_d_type(&u), 989 c, inode_snapshot_mismatch, 990 "inodes in different snapshots don't match")) { 991 bch_err(c, "repair not implemented yet"); 992 return -BCH_ERR_fsck_repair_unimplemented; 993 } 994 995 if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && 996 bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { 997 struct bpos new_min_pos; 998 999 ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos); 1000 if (ret) 1001 goto err; 1002 1003 u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; 1004 1005 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1006 1007 bch_err_msg(c, ret, "in fsck updating inode"); 1008 if (ret) 1009 return ret; 1010 1011 if (!bpos_eq(new_min_pos, POS_MIN)) 1012 bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); 1013 return 0; 1014 } 1015 1016 if (u.bi_flags & BCH_INODE_unlinked) { 1017 ret = check_inode_deleted_list(trans, k.k->p); 1018 if (ret < 0) 1019 return ret; 1020 1021 fsck_err_on(!ret, c, unlinked_inode_not_on_deleted_list, 1022 "inode %llu:%u unlinked, but not on deleted list", 1023 u.bi_inum, k.k->p.snapshot); 1024 ret = 0; 1025 } 1026 1027 if (u.bi_flags & BCH_INODE_unlinked && 1028 (!c->sb.clean || 1029 fsck_err(c, inode_unlinked_but_clean, 1030 "filesystem marked clean, but inode %llu unlinked", 1031 u.bi_inum))) { 1032 ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 1033 bch_err_msg(c, ret, "in fsck deleting inode"); 1034 return ret; 1035 } 1036 1037 if (u.bi_flags & BCH_INODE_i_size_dirty && 1038 (!c->sb.clean || 1039 fsck_err(c, inode_i_size_dirty_but_clean, 1040 "filesystem marked clean, but inode %llu has i_size dirty", 1041 u.bi_inum))) { 1042 bch_verbose(c, "truncating inode %llu", u.bi_inum); 1043 1044 /* 1045 * XXX: need to truncate partial blocks too here - or ideally 1046 * just switch units to bytes and that issue goes away 1047 */ 1048 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, 1049 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9, 1050 iter->pos.snapshot), 1051 POS(u.bi_inum, U64_MAX), 1052 0, NULL); 1053 bch_err_msg(c, ret, "in fsck truncating inode"); 1054 if (ret) 1055 return ret; 1056 1057 /* 1058 * We truncated without our normal sector accounting hook, just 1059 * make sure we recalculate it: 1060 */ 1061 u.bi_flags |= BCH_INODE_i_sectors_dirty; 1062 1063 u.bi_flags &= ~BCH_INODE_i_size_dirty; 1064 do_update = true; 1065 } 1066 1067 if (u.bi_flags & BCH_INODE_i_sectors_dirty && 1068 (!c->sb.clean || 1069 fsck_err(c, inode_i_sectors_dirty_but_clean, 1070 "filesystem marked clean, but inode %llu has i_sectors dirty", 1071 u.bi_inum))) { 1072 s64 sectors; 1073 1074 bch_verbose(c, "recounting sectors for inode %llu", 1075 u.bi_inum); 1076 1077 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); 1078 if (sectors < 0) { 1079 bch_err_msg(c, sectors, "in fsck recounting inode sectors"); 1080 return sectors; 1081 } 1082 1083 u.bi_sectors = sectors; 1084 u.bi_flags &= ~BCH_INODE_i_sectors_dirty; 1085 do_update = true; 1086 } 1087 1088 if (u.bi_flags & BCH_INODE_backptr_untrusted) { 1089 u.bi_dir = 0; 1090 u.bi_dir_offset = 0; 1091 u.bi_flags &= ~BCH_INODE_backptr_untrusted; 1092 do_update = true; 1093 } 1094 1095 if (u.bi_dir || u.bi_dir_offset) { 1096 ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update); 1097 if (ret) 1098 goto err; 1099 } 1100 1101 if (fsck_err_on(u.bi_parent_subvol && 1102 (u.bi_subvol == 0 || 1103 u.bi_subvol == BCACHEFS_ROOT_SUBVOL), 1104 c, inode_bi_parent_nonzero, 1105 "inode %llu:%u has subvol %u but nonzero parent subvol %u", 1106 u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) { 1107 u.bi_parent_subvol = 0; 1108 do_update = true; 1109 } 1110 1111 if (u.bi_subvol) { 1112 struct bch_subvolume s; 1113 1114 ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s); 1115 if (ret && !bch2_err_matches(ret, ENOENT)) 1116 goto err; 1117 1118 if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1119 ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum); 1120 goto do_update; 1121 } 1122 1123 if (fsck_err_on(ret, 1124 c, inode_bi_subvol_missing, 1125 "inode %llu:%u bi_subvol points to missing subvolume %u", 1126 u.bi_inum, k.k->p.snapshot, u.bi_subvol) || 1127 fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum || 1128 !bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot), 1129 k.k->p.snapshot), 1130 c, inode_bi_subvol_wrong, 1131 "inode %llu:%u points to subvol %u, but subvol points to %llu:%u", 1132 u.bi_inum, k.k->p.snapshot, u.bi_subvol, 1133 le64_to_cpu(s.inode), 1134 le32_to_cpu(s.snapshot))) { 1135 u.bi_subvol = 0; 1136 u.bi_parent_subvol = 0; 1137 do_update = true; 1138 } 1139 } 1140 do_update: 1141 if (do_update) { 1142 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1143 bch_err_msg(c, ret, "in fsck updating inode"); 1144 if (ret) 1145 return ret; 1146 } 1147 err: 1148 fsck_err: 1149 bch_err_fn(c, ret); 1150 return ret; 1151 } 1152 1153 int bch2_check_inodes(struct bch_fs *c) 1154 { 1155 bool full = c->opts.fsck; 1156 struct bch_inode_unpacked prev = { 0 }; 1157 struct snapshots_seen s; 1158 1159 snapshots_seen_init(&s); 1160 1161 int ret = bch2_trans_run(c, 1162 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 1163 POS_MIN, 1164 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1165 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1166 check_inode(trans, &iter, k, &prev, &s, full))); 1167 1168 snapshots_seen_exit(&s); 1169 bch_err_fn(c, ret); 1170 return ret; 1171 } 1172 1173 static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) 1174 { 1175 struct bch_fs *c = trans->c; 1176 int ret = 0; 1177 s64 count2; 1178 1179 darray_for_each(w->inodes, i) { 1180 if (i->inode.bi_sectors == i->count) 1181 continue; 1182 1183 count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); 1184 1185 if (w->recalculate_sums) 1186 i->count = count2; 1187 1188 if (i->count != count2) { 1189 bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", 1190 w->last_pos.inode, i->snapshot, i->count, count2); 1191 return -BCH_ERR_internal_fsck_err; 1192 } 1193 1194 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), 1195 c, inode_i_sectors_wrong, 1196 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", 1197 w->last_pos.inode, i->snapshot, 1198 i->inode.bi_sectors, i->count)) { 1199 i->inode.bi_sectors = i->count; 1200 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1201 if (ret) 1202 break; 1203 } 1204 } 1205 fsck_err: 1206 bch_err_fn(c, ret); 1207 return ret; 1208 } 1209 1210 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) 1211 { 1212 u32 restart_count = trans->restart_count; 1213 return check_i_sectors_notnested(trans, w) ?: 1214 trans_was_restarted(trans, restart_count); 1215 } 1216 1217 struct extent_end { 1218 u32 snapshot; 1219 u64 offset; 1220 struct snapshots_seen seen; 1221 }; 1222 1223 struct extent_ends { 1224 struct bpos last_pos; 1225 DARRAY(struct extent_end) e; 1226 }; 1227 1228 static void extent_ends_reset(struct extent_ends *extent_ends) 1229 { 1230 darray_for_each(extent_ends->e, i) 1231 snapshots_seen_exit(&i->seen); 1232 extent_ends->e.nr = 0; 1233 } 1234 1235 static void extent_ends_exit(struct extent_ends *extent_ends) 1236 { 1237 extent_ends_reset(extent_ends); 1238 darray_exit(&extent_ends->e); 1239 } 1240 1241 static void extent_ends_init(struct extent_ends *extent_ends) 1242 { 1243 memset(extent_ends, 0, sizeof(*extent_ends)); 1244 } 1245 1246 static int extent_ends_at(struct bch_fs *c, 1247 struct extent_ends *extent_ends, 1248 struct snapshots_seen *seen, 1249 struct bkey_s_c k) 1250 { 1251 struct extent_end *i, n = (struct extent_end) { 1252 .offset = k.k->p.offset, 1253 .snapshot = k.k->p.snapshot, 1254 .seen = *seen, 1255 }; 1256 1257 n.seen.ids.data = kmemdup(seen->ids.data, 1258 sizeof(seen->ids.data[0]) * seen->ids.size, 1259 GFP_KERNEL); 1260 if (!n.seen.ids.data) 1261 return -BCH_ERR_ENOMEM_fsck_extent_ends_at; 1262 1263 __darray_for_each(extent_ends->e, i) { 1264 if (i->snapshot == k.k->p.snapshot) { 1265 snapshots_seen_exit(&i->seen); 1266 *i = n; 1267 return 0; 1268 } 1269 1270 if (i->snapshot >= k.k->p.snapshot) 1271 break; 1272 } 1273 1274 return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); 1275 } 1276 1277 static int overlapping_extents_found(struct btree_trans *trans, 1278 enum btree_id btree, 1279 struct bpos pos1, struct snapshots_seen *pos1_seen, 1280 struct bkey pos2, 1281 bool *fixed, 1282 struct extent_end *extent_end) 1283 { 1284 struct bch_fs *c = trans->c; 1285 struct printbuf buf = PRINTBUF; 1286 struct btree_iter iter1, iter2 = { NULL }; 1287 struct bkey_s_c k1, k2; 1288 int ret; 1289 1290 BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); 1291 1292 bch2_trans_iter_init(trans, &iter1, btree, pos1, 1293 BTREE_ITER_all_snapshots| 1294 BTREE_ITER_not_extents); 1295 k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); 1296 ret = bkey_err(k1); 1297 if (ret) 1298 goto err; 1299 1300 prt_str(&buf, "\n "); 1301 bch2_bkey_val_to_text(&buf, c, k1); 1302 1303 if (!bpos_eq(pos1, k1.k->p)) { 1304 prt_str(&buf, "\n wanted\n "); 1305 bch2_bpos_to_text(&buf, pos1); 1306 prt_str(&buf, "\n "); 1307 bch2_bkey_to_text(&buf, &pos2); 1308 1309 bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", 1310 __func__, buf.buf); 1311 ret = -BCH_ERR_internal_fsck_err; 1312 goto err; 1313 } 1314 1315 bch2_trans_copy_iter(&iter2, &iter1); 1316 1317 while (1) { 1318 bch2_btree_iter_advance(&iter2); 1319 1320 k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); 1321 ret = bkey_err(k2); 1322 if (ret) 1323 goto err; 1324 1325 if (bpos_ge(k2.k->p, pos2.p)) 1326 break; 1327 } 1328 1329 prt_str(&buf, "\n "); 1330 bch2_bkey_val_to_text(&buf, c, k2); 1331 1332 if (bpos_gt(k2.k->p, pos2.p) || 1333 pos2.size != k2.k->size) { 1334 bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", 1335 __func__, buf.buf); 1336 ret = -BCH_ERR_internal_fsck_err; 1337 goto err; 1338 } 1339 1340 prt_printf(&buf, "\n overwriting %s extent", 1341 pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); 1342 1343 if (fsck_err(c, extent_overlapping, 1344 "overlapping extents%s", buf.buf)) { 1345 struct btree_iter *old_iter = &iter1; 1346 struct disk_reservation res = { 0 }; 1347 1348 if (pos1.snapshot < pos2.p.snapshot) { 1349 old_iter = &iter2; 1350 swap(k1, k2); 1351 } 1352 1353 trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); 1354 1355 ret = bch2_trans_update_extent_overwrite(trans, old_iter, 1356 BTREE_UPDATE_internal_snapshot_node, 1357 k1, k2) ?: 1358 bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); 1359 bch2_disk_reservation_put(c, &res); 1360 1361 if (ret) 1362 goto err; 1363 1364 *fixed = true; 1365 1366 if (pos1.snapshot == pos2.p.snapshot) { 1367 /* 1368 * We overwrote the first extent, and did the overwrite 1369 * in the same snapshot: 1370 */ 1371 extent_end->offset = bkey_start_offset(&pos2); 1372 } else if (pos1.snapshot > pos2.p.snapshot) { 1373 /* 1374 * We overwrote the first extent in pos2's snapshot: 1375 */ 1376 ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); 1377 } else { 1378 /* 1379 * We overwrote the second extent - restart 1380 * check_extent() from the top: 1381 */ 1382 ret = -BCH_ERR_transaction_restart_nested; 1383 } 1384 } 1385 fsck_err: 1386 err: 1387 bch2_trans_iter_exit(trans, &iter2); 1388 bch2_trans_iter_exit(trans, &iter1); 1389 printbuf_exit(&buf); 1390 return ret; 1391 } 1392 1393 static int check_overlapping_extents(struct btree_trans *trans, 1394 struct snapshots_seen *seen, 1395 struct extent_ends *extent_ends, 1396 struct bkey_s_c k, 1397 struct btree_iter *iter, 1398 bool *fixed) 1399 { 1400 struct bch_fs *c = trans->c; 1401 int ret = 0; 1402 1403 /* transaction restart, running again */ 1404 if (bpos_eq(extent_ends->last_pos, k.k->p)) 1405 return 0; 1406 1407 if (extent_ends->last_pos.inode != k.k->p.inode) 1408 extent_ends_reset(extent_ends); 1409 1410 darray_for_each(extent_ends->e, i) { 1411 if (i->offset <= bkey_start_offset(k.k)) 1412 continue; 1413 1414 if (!ref_visible2(c, 1415 k.k->p.snapshot, seen, 1416 i->snapshot, &i->seen)) 1417 continue; 1418 1419 ret = overlapping_extents_found(trans, iter->btree_id, 1420 SPOS(iter->pos.inode, 1421 i->offset, 1422 i->snapshot), 1423 &i->seen, 1424 *k.k, fixed, i); 1425 if (ret) 1426 goto err; 1427 } 1428 1429 extent_ends->last_pos = k.k->p; 1430 err: 1431 return ret; 1432 } 1433 1434 static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, 1435 struct bkey_s_c k) 1436 { 1437 struct bch_fs *c = trans->c; 1438 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 1439 struct bch_extent_crc_unpacked crc; 1440 const union bch_extent_entry *i; 1441 unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; 1442 1443 bkey_for_each_crc(k.k, ptrs, crc, i) 1444 if (crc_is_encoded(crc) && 1445 crc.uncompressed_size > encoded_extent_max_sectors) { 1446 struct printbuf buf = PRINTBUF; 1447 1448 bch2_bkey_val_to_text(&buf, c, k); 1449 bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); 1450 printbuf_exit(&buf); 1451 } 1452 1453 return 0; 1454 } 1455 1456 static int check_extent(struct btree_trans *trans, struct btree_iter *iter, 1457 struct bkey_s_c k, 1458 struct inode_walker *inode, 1459 struct snapshots_seen *s, 1460 struct extent_ends *extent_ends) 1461 { 1462 struct bch_fs *c = trans->c; 1463 struct inode_walker_entry *i; 1464 struct printbuf buf = PRINTBUF; 1465 int ret = 0; 1466 1467 ret = bch2_check_key_has_snapshot(trans, iter, k); 1468 if (ret) { 1469 ret = ret < 0 ? ret : 0; 1470 goto out; 1471 } 1472 1473 if (inode->last_pos.inode != k.k->p.inode) { 1474 ret = check_i_sectors(trans, inode); 1475 if (ret) 1476 goto err; 1477 } 1478 1479 i = walk_inode(trans, inode, k); 1480 ret = PTR_ERR_OR_ZERO(i); 1481 if (ret) 1482 goto err; 1483 1484 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1485 if (ret) 1486 goto err; 1487 1488 if (k.k->type != KEY_TYPE_whiteout) { 1489 if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { 1490 ret = reconstruct_reg_inode(trans, k.k->p.snapshot, k.k->p.inode) ?: 1491 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 1492 if (ret) 1493 goto err; 1494 1495 inode->last_pos.inode--; 1496 ret = -BCH_ERR_transaction_restart_nested; 1497 goto err; 1498 } 1499 1500 if (fsck_err_on(!i, c, extent_in_missing_inode, 1501 "extent in missing inode:\n %s", 1502 (printbuf_reset(&buf), 1503 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1504 goto delete; 1505 1506 if (fsck_err_on(i && 1507 !S_ISREG(i->inode.bi_mode) && 1508 !S_ISLNK(i->inode.bi_mode), 1509 c, extent_in_non_reg_inode, 1510 "extent in non regular inode mode %o:\n %s", 1511 i->inode.bi_mode, 1512 (printbuf_reset(&buf), 1513 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1514 goto delete; 1515 1516 ret = check_overlapping_extents(trans, s, extent_ends, k, iter, 1517 &inode->recalculate_sums); 1518 if (ret) 1519 goto err; 1520 } 1521 1522 /* 1523 * Check inodes in reverse order, from oldest snapshots to newest, 1524 * starting from the inode that matches this extent's snapshot. If we 1525 * didn't have one, iterate over all inodes: 1526 */ 1527 if (!i) 1528 i = inode->inodes.data + inode->inodes.nr - 1; 1529 1530 for (; 1531 inode->inodes.data && i >= inode->inodes.data; 1532 --i) { 1533 if (i->snapshot > k.k->p.snapshot || 1534 !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) 1535 continue; 1536 1537 if (k.k->type != KEY_TYPE_whiteout) { 1538 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && 1539 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && 1540 !bkey_extent_is_reservation(k), 1541 c, extent_past_end_of_inode, 1542 "extent type past end of inode %llu:%u, i_size %llu\n %s", 1543 i->inode.bi_inum, i->snapshot, i->inode.bi_size, 1544 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1545 struct btree_iter iter2; 1546 1547 bch2_trans_copy_iter(&iter2, iter); 1548 bch2_btree_iter_set_snapshot(&iter2, i->snapshot); 1549 ret = bch2_btree_iter_traverse(&iter2) ?: 1550 bch2_btree_delete_at(trans, &iter2, 1551 BTREE_UPDATE_internal_snapshot_node); 1552 bch2_trans_iter_exit(trans, &iter2); 1553 if (ret) 1554 goto err; 1555 1556 iter->k.type = KEY_TYPE_whiteout; 1557 } 1558 1559 if (bkey_extent_is_allocation(k.k)) 1560 i->count += k.k->size; 1561 } 1562 1563 i->seen_this_pos = true; 1564 } 1565 1566 if (k.k->type != KEY_TYPE_whiteout) { 1567 ret = extent_ends_at(c, extent_ends, s, k); 1568 if (ret) 1569 goto err; 1570 } 1571 out: 1572 err: 1573 fsck_err: 1574 printbuf_exit(&buf); 1575 bch_err_fn(c, ret); 1576 return ret; 1577 delete: 1578 ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node); 1579 goto out; 1580 } 1581 1582 /* 1583 * Walk extents: verify that extents have a corresponding S_ISREG inode, and 1584 * that i_size an i_sectors are consistent 1585 */ 1586 int bch2_check_extents(struct bch_fs *c) 1587 { 1588 struct inode_walker w = inode_walker_init(); 1589 struct snapshots_seen s; 1590 struct extent_ends extent_ends; 1591 struct disk_reservation res = { 0 }; 1592 1593 snapshots_seen_init(&s); 1594 extent_ends_init(&extent_ends); 1595 1596 int ret = bch2_trans_run(c, 1597 for_each_btree_key_commit(trans, iter, BTREE_ID_extents, 1598 POS(BCACHEFS_ROOT_INO, 0), 1599 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1600 &res, NULL, 1601 BCH_TRANS_COMMIT_no_enospc, ({ 1602 bch2_disk_reservation_put(c, &res); 1603 check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: 1604 check_extent_overbig(trans, &iter, k); 1605 })) ?: 1606 check_i_sectors_notnested(trans, &w)); 1607 1608 bch2_disk_reservation_put(c, &res); 1609 extent_ends_exit(&extent_ends); 1610 inode_walker_exit(&w); 1611 snapshots_seen_exit(&s); 1612 1613 bch_err_fn(c, ret); 1614 return ret; 1615 } 1616 1617 int bch2_check_indirect_extents(struct bch_fs *c) 1618 { 1619 struct disk_reservation res = { 0 }; 1620 1621 int ret = bch2_trans_run(c, 1622 for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, 1623 POS_MIN, 1624 BTREE_ITER_prefetch, k, 1625 &res, NULL, 1626 BCH_TRANS_COMMIT_no_enospc, ({ 1627 bch2_disk_reservation_put(c, &res); 1628 check_extent_overbig(trans, &iter, k); 1629 }))); 1630 1631 bch2_disk_reservation_put(c, &res); 1632 bch_err_fn(c, ret); 1633 return ret; 1634 } 1635 1636 static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w) 1637 { 1638 struct bch_fs *c = trans->c; 1639 int ret = 0; 1640 s64 count2; 1641 1642 darray_for_each(w->inodes, i) { 1643 if (i->inode.bi_nlink == i->count) 1644 continue; 1645 1646 count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); 1647 if (count2 < 0) 1648 return count2; 1649 1650 if (i->count != count2) { 1651 bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu", 1652 w->last_pos.inode, i->snapshot, i->count, count2); 1653 i->count = count2; 1654 if (i->inode.bi_nlink == i->count) 1655 continue; 1656 } 1657 1658 if (fsck_err_on(i->inode.bi_nlink != i->count, 1659 c, inode_dir_wrong_nlink, 1660 "directory %llu:%u with wrong i_nlink: got %u, should be %llu", 1661 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { 1662 i->inode.bi_nlink = i->count; 1663 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1664 if (ret) 1665 break; 1666 } 1667 } 1668 fsck_err: 1669 bch_err_fn(c, ret); 1670 return ret; 1671 } 1672 1673 static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) 1674 { 1675 u32 restart_count = trans->restart_count; 1676 return check_subdir_count_notnested(trans, w) ?: 1677 trans_was_restarted(trans, restart_count); 1678 } 1679 1680 noinline_for_stack 1681 static int check_dirent_inode_dirent(struct btree_trans *trans, 1682 struct btree_iter *iter, 1683 struct bkey_s_c_dirent d, 1684 struct bch_inode_unpacked *target, 1685 u32 target_snapshot) 1686 { 1687 struct bch_fs *c = trans->c; 1688 struct printbuf buf = PRINTBUF; 1689 int ret = 0; 1690 1691 if (inode_points_to_dirent(target, d)) 1692 return 0; 1693 1694 if (bch2_inode_should_have_bp(target) && 1695 !fsck_err(c, inode_wrong_backpointer, 1696 "dirent points to inode that does not point back:\n %s", 1697 (bch2_bkey_val_to_text(&buf, c, d.s_c), 1698 prt_printf(&buf, "\n "), 1699 bch2_inode_unpacked_to_text(&buf, target), 1700 buf.buf))) 1701 goto out_noiter; 1702 1703 if (!target->bi_dir && 1704 !target->bi_dir_offset) { 1705 target->bi_dir = d.k->p.inode; 1706 target->bi_dir_offset = d.k->p.offset; 1707 return __bch2_fsck_write_inode(trans, target, target_snapshot); 1708 } 1709 1710 struct btree_iter bp_iter = { NULL }; 1711 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, 1712 SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); 1713 ret = bkey_err(bp_dirent); 1714 if (ret && !bch2_err_matches(ret, ENOENT)) 1715 goto err; 1716 1717 bool backpointer_exists = !ret; 1718 ret = 0; 1719 1720 if (fsck_err_on(!backpointer_exists, 1721 c, inode_wrong_backpointer, 1722 "inode %llu:%u has wrong backpointer:\n" 1723 "got %llu:%llu\n" 1724 "should be %llu:%llu", 1725 target->bi_inum, target_snapshot, 1726 target->bi_dir, 1727 target->bi_dir_offset, 1728 d.k->p.inode, 1729 d.k->p.offset)) { 1730 target->bi_dir = d.k->p.inode; 1731 target->bi_dir_offset = d.k->p.offset; 1732 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1733 goto out; 1734 } 1735 1736 bch2_bkey_val_to_text(&buf, c, d.s_c); 1737 prt_newline(&buf); 1738 if (backpointer_exists) 1739 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c); 1740 1741 if (fsck_err_on(backpointer_exists && 1742 (S_ISDIR(target->bi_mode) || 1743 target->bi_subvol), 1744 c, inode_dir_multiple_links, 1745 "%s %llu:%u with multiple links\n%s", 1746 S_ISDIR(target->bi_mode) ? "directory" : "subvolume", 1747 target->bi_inum, target_snapshot, buf.buf)) { 1748 ret = __remove_dirent(trans, d.k->p); 1749 goto out; 1750 } 1751 1752 /* 1753 * hardlinked file with nlink 0: 1754 * We're just adjusting nlink here so check_nlinks() will pick 1755 * it up, it ignores inodes with nlink 0 1756 */ 1757 if (fsck_err_on(backpointer_exists && !target->bi_nlink, 1758 c, inode_multiple_links_but_nlink_0, 1759 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", 1760 target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { 1761 target->bi_nlink++; 1762 target->bi_flags &= ~BCH_INODE_unlinked; 1763 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1764 if (ret) 1765 goto err; 1766 } 1767 out: 1768 err: 1769 fsck_err: 1770 bch2_trans_iter_exit(trans, &bp_iter); 1771 out_noiter: 1772 printbuf_exit(&buf); 1773 bch_err_fn(c, ret); 1774 return ret; 1775 } 1776 1777 noinline_for_stack 1778 static int check_dirent_target(struct btree_trans *trans, 1779 struct btree_iter *iter, 1780 struct bkey_s_c_dirent d, 1781 struct bch_inode_unpacked *target, 1782 u32 target_snapshot) 1783 { 1784 struct bch_fs *c = trans->c; 1785 struct bkey_i_dirent *n; 1786 struct printbuf buf = PRINTBUF; 1787 int ret = 0; 1788 1789 ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); 1790 if (ret) 1791 goto err; 1792 1793 if (fsck_err_on(d.v->d_type != inode_d_type(target), 1794 c, dirent_d_type_wrong, 1795 "incorrect d_type: got %s, should be %s:\n%s", 1796 bch2_d_type_str(d.v->d_type), 1797 bch2_d_type_str(inode_d_type(target)), 1798 (printbuf_reset(&buf), 1799 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1800 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); 1801 ret = PTR_ERR_OR_ZERO(n); 1802 if (ret) 1803 goto err; 1804 1805 bkey_reassemble(&n->k_i, d.s_c); 1806 n->v.d_type = inode_d_type(target); 1807 if (n->v.d_type == DT_SUBVOL) { 1808 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); 1809 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol); 1810 } else { 1811 n->v.d_inum = cpu_to_le64(target->bi_inum); 1812 } 1813 1814 ret = bch2_trans_update(trans, iter, &n->k_i, 0); 1815 if (ret) 1816 goto err; 1817 1818 d = dirent_i_to_s_c(n); 1819 } 1820 err: 1821 fsck_err: 1822 printbuf_exit(&buf); 1823 bch_err_fn(c, ret); 1824 return ret; 1825 } 1826 1827 /* find a subvolume that's a descendent of @snapshot: */ 1828 static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid) 1829 { 1830 struct btree_iter iter; 1831 struct bkey_s_c k; 1832 int ret; 1833 1834 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { 1835 if (k.k->type != KEY_TYPE_subvolume) 1836 continue; 1837 1838 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 1839 if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) { 1840 bch2_trans_iter_exit(trans, &iter); 1841 *subvolid = k.k->p.offset; 1842 goto found; 1843 } 1844 } 1845 if (!ret) 1846 ret = -ENOENT; 1847 found: 1848 bch2_trans_iter_exit(trans, &iter); 1849 return ret; 1850 } 1851 1852 noinline_for_stack 1853 static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter, 1854 struct bkey_s_c_dirent d) 1855 { 1856 struct bch_fs *c = trans->c; 1857 struct btree_iter subvol_iter = {}; 1858 struct bch_inode_unpacked subvol_root; 1859 u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); 1860 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); 1861 u32 parent_snapshot; 1862 u32 new_parent_subvol = 0; 1863 u64 parent_inum; 1864 struct printbuf buf = PRINTBUF; 1865 int ret = 0; 1866 1867 ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); 1868 if (ret && !bch2_err_matches(ret, ENOENT)) 1869 return ret; 1870 1871 if (ret || 1872 (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) { 1873 int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); 1874 if (ret2 && !bch2_err_matches(ret, ENOENT)) 1875 return ret2; 1876 } 1877 1878 if (ret && 1879 !new_parent_subvol && 1880 (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1881 /* 1882 * Couldn't find a subvol for dirent's snapshot - but we lost 1883 * subvols, so we need to reconstruct: 1884 */ 1885 ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0); 1886 if (ret) 1887 return ret; 1888 1889 parent_snapshot = d.k->p.snapshot; 1890 } 1891 1892 if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol, 1893 "dirent parent_subvol points to missing subvolume\n%s", 1894 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || 1895 fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot), 1896 c, dirent_not_visible_in_parent_subvol, 1897 "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", 1898 parent_snapshot, 1899 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1900 if (!new_parent_subvol) { 1901 bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot); 1902 return -BCH_ERR_fsck_repair_unimplemented; 1903 } 1904 1905 struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); 1906 ret = PTR_ERR_OR_ZERO(new_dirent); 1907 if (ret) 1908 goto err; 1909 1910 new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol); 1911 } 1912 1913 struct bkey_s_c_subvolume s = 1914 bch2_bkey_get_iter_typed(trans, &subvol_iter, 1915 BTREE_ID_subvolumes, POS(0, target_subvol), 1916 0, subvolume); 1917 ret = bkey_err(s.s_c); 1918 if (ret && !bch2_err_matches(ret, ENOENT)) 1919 return ret; 1920 1921 if (ret) { 1922 if (fsck_err(c, dirent_to_missing_subvol, 1923 "dirent points to missing subvolume\n%s", 1924 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) 1925 return __remove_dirent(trans, d.k->p); 1926 ret = 0; 1927 goto out; 1928 } 1929 1930 if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol, 1931 c, subvol_fs_path_parent_wrong, 1932 "subvol with wrong fs_path_parent, should be be %u\n%s", 1933 parent_subvol, 1934 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { 1935 struct bkey_i_subvolume *n = 1936 bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume); 1937 ret = PTR_ERR_OR_ZERO(n); 1938 if (ret) 1939 goto err; 1940 1941 n->v.fs_path_parent = cpu_to_le32(parent_subvol); 1942 } 1943 1944 u64 target_inum = le64_to_cpu(s.v->inode); 1945 u32 target_snapshot = le32_to_cpu(s.v->snapshot); 1946 1947 ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); 1948 if (ret && !bch2_err_matches(ret, ENOENT)) 1949 goto err; 1950 1951 if (ret) { 1952 bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); 1953 ret = -BCH_ERR_fsck_repair_unimplemented; 1954 ret = 0; 1955 goto err; 1956 } 1957 1958 if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol, 1959 c, inode_bi_parent_wrong, 1960 "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", 1961 target_inum, 1962 subvol_root.bi_parent_subvol, parent_subvol)) { 1963 subvol_root.bi_parent_subvol = parent_subvol; 1964 ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); 1965 if (ret) 1966 goto err; 1967 } 1968 1969 ret = check_dirent_target(trans, iter, d, &subvol_root, 1970 target_snapshot); 1971 if (ret) 1972 goto err; 1973 out: 1974 err: 1975 fsck_err: 1976 bch2_trans_iter_exit(trans, &subvol_iter); 1977 printbuf_exit(&buf); 1978 return ret; 1979 } 1980 1981 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, 1982 struct bkey_s_c k, 1983 struct bch_hash_info *hash_info, 1984 struct inode_walker *dir, 1985 struct inode_walker *target, 1986 struct snapshots_seen *s) 1987 { 1988 struct bch_fs *c = trans->c; 1989 struct inode_walker_entry *i; 1990 struct printbuf buf = PRINTBUF; 1991 int ret = 0; 1992 1993 ret = bch2_check_key_has_snapshot(trans, iter, k); 1994 if (ret) { 1995 ret = ret < 0 ? ret : 0; 1996 goto out; 1997 } 1998 1999 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 2000 if (ret) 2001 goto err; 2002 2003 if (k.k->type == KEY_TYPE_whiteout) 2004 goto out; 2005 2006 if (dir->last_pos.inode != k.k->p.inode) { 2007 ret = check_subdir_count(trans, dir); 2008 if (ret) 2009 goto err; 2010 } 2011 2012 BUG_ON(!btree_iter_path(trans, iter)->should_be_locked); 2013 2014 i = walk_inode(trans, dir, k); 2015 ret = PTR_ERR_OR_ZERO(i); 2016 if (ret < 0) 2017 goto err; 2018 2019 if (dir->first_this_inode && dir->inodes.nr) 2020 *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); 2021 dir->first_this_inode = false; 2022 2023 if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { 2024 ret = reconstruct_inode(trans, k.k->p.snapshot, k.k->p.inode, 0, S_IFDIR) ?: 2025 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 2026 if (ret) 2027 goto err; 2028 2029 dir->last_pos.inode--; 2030 ret = -BCH_ERR_transaction_restart_nested; 2031 goto err; 2032 } 2033 2034 if (fsck_err_on(!i, c, dirent_in_missing_dir_inode, 2035 "dirent in nonexisting directory:\n%s", 2036 (printbuf_reset(&buf), 2037 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 2038 ret = bch2_btree_delete_at(trans, iter, 2039 BTREE_UPDATE_internal_snapshot_node); 2040 goto out; 2041 } 2042 2043 if (!i) 2044 goto out; 2045 2046 if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), 2047 c, dirent_in_non_dir_inode, 2048 "dirent in non directory inode type %s:\n%s", 2049 bch2_d_type_str(inode_d_type(&i->inode)), 2050 (printbuf_reset(&buf), 2051 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 2052 ret = bch2_btree_delete_at(trans, iter, 0); 2053 goto out; 2054 } 2055 2056 ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); 2057 if (ret < 0) 2058 goto err; 2059 if (ret) { 2060 /* dirent has been deleted */ 2061 ret = 0; 2062 goto out; 2063 } 2064 2065 if (k.k->type != KEY_TYPE_dirent) 2066 goto out; 2067 2068 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2069 2070 if (d.v->d_type == DT_SUBVOL) { 2071 ret = check_dirent_to_subvol(trans, iter, d); 2072 if (ret) 2073 goto err; 2074 } else { 2075 ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); 2076 if (ret) 2077 goto err; 2078 2079 if (fsck_err_on(!target->inodes.nr, 2080 c, dirent_to_missing_inode, 2081 "dirent points to missing inode:\n%s", 2082 (printbuf_reset(&buf), 2083 bch2_bkey_val_to_text(&buf, c, k), 2084 buf.buf))) { 2085 ret = __remove_dirent(trans, d.k->p); 2086 if (ret) 2087 goto err; 2088 } 2089 2090 darray_for_each(target->inodes, i) { 2091 ret = check_dirent_target(trans, iter, d, 2092 &i->inode, i->snapshot); 2093 if (ret) 2094 goto err; 2095 } 2096 2097 if (d.v->d_type == DT_DIR) 2098 for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) 2099 i->count++; 2100 } 2101 out: 2102 err: 2103 fsck_err: 2104 printbuf_exit(&buf); 2105 bch_err_fn(c, ret); 2106 return ret; 2107 } 2108 2109 /* 2110 * Walk dirents: verify that they all have a corresponding S_ISDIR inode, 2111 * validate d_type 2112 */ 2113 int bch2_check_dirents(struct bch_fs *c) 2114 { 2115 struct inode_walker dir = inode_walker_init(); 2116 struct inode_walker target = inode_walker_init(); 2117 struct snapshots_seen s; 2118 struct bch_hash_info hash_info; 2119 2120 snapshots_seen_init(&s); 2121 2122 int ret = bch2_trans_run(c, 2123 for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, 2124 POS(BCACHEFS_ROOT_INO, 0), 2125 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2126 k, 2127 NULL, NULL, 2128 BCH_TRANS_COMMIT_no_enospc, 2129 check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: 2130 check_subdir_count_notnested(trans, &dir)); 2131 2132 snapshots_seen_exit(&s); 2133 inode_walker_exit(&dir); 2134 inode_walker_exit(&target); 2135 bch_err_fn(c, ret); 2136 return ret; 2137 } 2138 2139 static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, 2140 struct bkey_s_c k, 2141 struct bch_hash_info *hash_info, 2142 struct inode_walker *inode) 2143 { 2144 struct bch_fs *c = trans->c; 2145 struct inode_walker_entry *i; 2146 int ret; 2147 2148 ret = bch2_check_key_has_snapshot(trans, iter, k); 2149 if (ret < 0) 2150 return ret; 2151 if (ret) 2152 return 0; 2153 2154 i = walk_inode(trans, inode, k); 2155 ret = PTR_ERR_OR_ZERO(i); 2156 if (ret) 2157 return ret; 2158 2159 if (inode->first_this_inode && inode->inodes.nr) 2160 *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode); 2161 inode->first_this_inode = false; 2162 2163 if (fsck_err_on(!i, c, xattr_in_missing_inode, 2164 "xattr for missing inode %llu", 2165 k.k->p.inode)) 2166 return bch2_btree_delete_at(trans, iter, 0); 2167 2168 if (!i) 2169 return 0; 2170 2171 ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); 2172 fsck_err: 2173 bch_err_fn(c, ret); 2174 return ret; 2175 } 2176 2177 /* 2178 * Walk xattrs: verify that they all have a corresponding inode 2179 */ 2180 int bch2_check_xattrs(struct bch_fs *c) 2181 { 2182 struct inode_walker inode = inode_walker_init(); 2183 struct bch_hash_info hash_info; 2184 int ret = 0; 2185 2186 ret = bch2_trans_run(c, 2187 for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, 2188 POS(BCACHEFS_ROOT_INO, 0), 2189 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2190 k, 2191 NULL, NULL, 2192 BCH_TRANS_COMMIT_no_enospc, 2193 check_xattr(trans, &iter, k, &hash_info, &inode))); 2194 bch_err_fn(c, ret); 2195 return ret; 2196 } 2197 2198 static int check_root_trans(struct btree_trans *trans) 2199 { 2200 struct bch_fs *c = trans->c; 2201 struct bch_inode_unpacked root_inode; 2202 u32 snapshot; 2203 u64 inum; 2204 int ret; 2205 2206 ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); 2207 if (ret && !bch2_err_matches(ret, ENOENT)) 2208 return ret; 2209 2210 if (mustfix_fsck_err_on(ret, c, root_subvol_missing, 2211 "root subvol missing")) { 2212 struct bkey_i_subvolume *root_subvol = 2213 bch2_trans_kmalloc(trans, sizeof(*root_subvol)); 2214 ret = PTR_ERR_OR_ZERO(root_subvol); 2215 if (ret) 2216 goto err; 2217 2218 snapshot = U32_MAX; 2219 inum = BCACHEFS_ROOT_INO; 2220 2221 bkey_subvolume_init(&root_subvol->k_i); 2222 root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL; 2223 root_subvol->v.flags = 0; 2224 root_subvol->v.snapshot = cpu_to_le32(snapshot); 2225 root_subvol->v.inode = cpu_to_le64(inum); 2226 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0); 2227 bch_err_msg(c, ret, "writing root subvol"); 2228 if (ret) 2229 goto err; 2230 } 2231 2232 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); 2233 if (ret && !bch2_err_matches(ret, ENOENT)) 2234 return ret; 2235 2236 if (mustfix_fsck_err_on(ret, c, root_dir_missing, 2237 "root directory missing") || 2238 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), 2239 c, root_inode_not_dir, 2240 "root inode not a directory")) { 2241 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 2242 0, NULL); 2243 root_inode.bi_inum = inum; 2244 2245 ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); 2246 bch_err_msg(c, ret, "writing root inode"); 2247 } 2248 err: 2249 fsck_err: 2250 return ret; 2251 } 2252 2253 /* Get root directory, create if it doesn't exist: */ 2254 int bch2_check_root(struct bch_fs *c) 2255 { 2256 int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2257 check_root_trans(trans)); 2258 bch_err_fn(c, ret); 2259 return ret; 2260 } 2261 2262 typedef DARRAY(u32) darray_u32; 2263 2264 static bool darray_u32_has(darray_u32 *d, u32 v) 2265 { 2266 darray_for_each(*d, i) 2267 if (*i == v) 2268 return true; 2269 return false; 2270 } 2271 2272 /* 2273 * We've checked that inode backpointers point to valid dirents; here, it's 2274 * sufficient to check that the subvolume root has a dirent: 2275 */ 2276 static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s) 2277 { 2278 struct bch_inode_unpacked inode; 2279 int ret = bch2_inode_find_by_inum_trans(trans, 2280 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 2281 &inode); 2282 if (ret) 2283 return ret; 2284 2285 return inode.bi_dir != 0; 2286 } 2287 2288 static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) 2289 { 2290 struct bch_fs *c = trans->c; 2291 struct btree_iter parent_iter = {}; 2292 darray_u32 subvol_path = {}; 2293 struct printbuf buf = PRINTBUF; 2294 int ret = 0; 2295 2296 if (k.k->type != KEY_TYPE_subvolume) 2297 return 0; 2298 2299 while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { 2300 ret = darray_push(&subvol_path, k.k->p.offset); 2301 if (ret) 2302 goto err; 2303 2304 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 2305 2306 ret = subvol_has_dirent(trans, s); 2307 if (ret < 0) 2308 break; 2309 2310 if (fsck_err_on(!ret, 2311 c, subvol_unreachable, 2312 "unreachable subvolume %s", 2313 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2314 buf.buf))) { 2315 ret = reattach_subvol(trans, s); 2316 break; 2317 } 2318 2319 u32 parent = le32_to_cpu(s.v->fs_path_parent); 2320 2321 if (darray_u32_has(&subvol_path, parent)) { 2322 if (fsck_err(c, subvol_loop, "subvolume loop")) 2323 ret = reattach_subvol(trans, s); 2324 break; 2325 } 2326 2327 bch2_trans_iter_exit(trans, &parent_iter); 2328 bch2_trans_iter_init(trans, &parent_iter, 2329 BTREE_ID_subvolumes, POS(0, parent), 0); 2330 k = bch2_btree_iter_peek_slot(&parent_iter); 2331 ret = bkey_err(k); 2332 if (ret) 2333 goto err; 2334 2335 if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, 2336 c, subvol_unreachable, 2337 "unreachable subvolume %s", 2338 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2339 buf.buf))) { 2340 ret = reattach_subvol(trans, s); 2341 break; 2342 } 2343 } 2344 fsck_err: 2345 err: 2346 printbuf_exit(&buf); 2347 darray_exit(&subvol_path); 2348 bch2_trans_iter_exit(trans, &parent_iter); 2349 return ret; 2350 } 2351 2352 int bch2_check_subvolume_structure(struct bch_fs *c) 2353 { 2354 int ret = bch2_trans_run(c, 2355 for_each_btree_key_commit(trans, iter, 2356 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, 2357 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2358 check_subvol_path(trans, &iter, k))); 2359 bch_err_fn(c, ret); 2360 return ret; 2361 } 2362 2363 struct pathbuf_entry { 2364 u64 inum; 2365 u32 snapshot; 2366 }; 2367 2368 typedef DARRAY(struct pathbuf_entry) pathbuf; 2369 2370 static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) 2371 { 2372 darray_for_each(*p, i) 2373 if (i->inum == inum && 2374 i->snapshot == snapshot) 2375 return true; 2376 return false; 2377 } 2378 2379 /* 2380 * Check that a given inode is reachable from its subvolume root - we already 2381 * verified subvolume connectivity: 2382 * 2383 * XXX: we should also be verifying that inodes are in the right subvolumes 2384 */ 2385 static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) 2386 { 2387 struct bch_fs *c = trans->c; 2388 struct btree_iter inode_iter = {}; 2389 struct bch_inode_unpacked inode; 2390 struct printbuf buf = PRINTBUF; 2391 u32 snapshot = inode_k.k->p.snapshot; 2392 int ret = 0; 2393 2394 p->nr = 0; 2395 2396 BUG_ON(bch2_inode_unpack(inode_k, &inode)); 2397 2398 while (!inode.bi_subvol) { 2399 struct btree_iter dirent_iter; 2400 struct bkey_s_c_dirent d; 2401 u32 parent_snapshot = snapshot; 2402 2403 d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); 2404 ret = bkey_err(d.s_c); 2405 if (ret && !bch2_err_matches(ret, ENOENT)) 2406 break; 2407 2408 if (!ret && !dirent_points_to_inode(d, &inode)) { 2409 bch2_trans_iter_exit(trans, &dirent_iter); 2410 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 2411 } 2412 2413 if (bch2_err_matches(ret, ENOENT)) { 2414 ret = 0; 2415 if (fsck_err(c, inode_unreachable, 2416 "unreachable inode\n%s", 2417 (printbuf_reset(&buf), 2418 bch2_bkey_val_to_text(&buf, c, inode_k), 2419 buf.buf))) 2420 ret = reattach_inode(trans, &inode, snapshot); 2421 goto out; 2422 } 2423 2424 bch2_trans_iter_exit(trans, &dirent_iter); 2425 2426 if (!S_ISDIR(inode.bi_mode)) 2427 break; 2428 2429 ret = darray_push(p, ((struct pathbuf_entry) { 2430 .inum = inode.bi_inum, 2431 .snapshot = snapshot, 2432 })); 2433 if (ret) 2434 return ret; 2435 2436 snapshot = parent_snapshot; 2437 2438 bch2_trans_iter_exit(trans, &inode_iter); 2439 inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, 2440 SPOS(0, inode.bi_dir, snapshot), 0); 2441 ret = bkey_err(inode_k) ?: 2442 !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode 2443 : bch2_inode_unpack(inode_k, &inode); 2444 if (ret) { 2445 /* Should have been caught in dirents pass */ 2446 if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2447 bch_err(c, "error looking up parent directory: %i", ret); 2448 break; 2449 } 2450 2451 snapshot = inode_k.k->p.snapshot; 2452 2453 if (path_is_dup(p, inode.bi_inum, snapshot)) { 2454 /* XXX print path */ 2455 bch_err(c, "directory structure loop"); 2456 2457 darray_for_each(*p, i) 2458 pr_err("%llu:%u", i->inum, i->snapshot); 2459 pr_err("%llu:%u", inode.bi_inum, snapshot); 2460 2461 if (fsck_err(c, dir_loop, "directory structure loop")) { 2462 ret = remove_backpointer(trans, &inode); 2463 bch_err_msg(c, ret, "removing dirent"); 2464 if (ret) 2465 break; 2466 2467 ret = reattach_inode(trans, &inode, snapshot); 2468 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 2469 } 2470 break; 2471 } 2472 } 2473 out: 2474 fsck_err: 2475 bch2_trans_iter_exit(trans, &inode_iter); 2476 printbuf_exit(&buf); 2477 bch_err_fn(c, ret); 2478 return ret; 2479 } 2480 2481 /* 2482 * Check for unreachable inodes, as well as loops in the directory structure: 2483 * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's 2484 * unreachable: 2485 */ 2486 int bch2_check_directory_structure(struct bch_fs *c) 2487 { 2488 pathbuf path = { 0, }; 2489 int ret; 2490 2491 ret = bch2_trans_run(c, 2492 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, 2493 BTREE_ITER_intent| 2494 BTREE_ITER_prefetch| 2495 BTREE_ITER_all_snapshots, k, 2496 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 2497 if (!bkey_is_inode(k.k)) 2498 continue; 2499 2500 if (bch2_inode_flags(k) & BCH_INODE_unlinked) 2501 continue; 2502 2503 check_path(trans, &path, k); 2504 }))); 2505 darray_exit(&path); 2506 2507 bch_err_fn(c, ret); 2508 return ret; 2509 } 2510 2511 struct nlink_table { 2512 size_t nr; 2513 size_t size; 2514 2515 struct nlink { 2516 u64 inum; 2517 u32 snapshot; 2518 u32 count; 2519 } *d; 2520 }; 2521 2522 static int add_nlink(struct bch_fs *c, struct nlink_table *t, 2523 u64 inum, u32 snapshot) 2524 { 2525 if (t->nr == t->size) { 2526 size_t new_size = max_t(size_t, 128UL, t->size * 2); 2527 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); 2528 2529 if (!d) { 2530 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", 2531 new_size); 2532 return -BCH_ERR_ENOMEM_fsck_add_nlink; 2533 } 2534 2535 if (t->d) 2536 memcpy(d, t->d, t->size * sizeof(t->d[0])); 2537 kvfree(t->d); 2538 2539 t->d = d; 2540 t->size = new_size; 2541 } 2542 2543 2544 t->d[t->nr++] = (struct nlink) { 2545 .inum = inum, 2546 .snapshot = snapshot, 2547 }; 2548 2549 return 0; 2550 } 2551 2552 static int nlink_cmp(const void *_l, const void *_r) 2553 { 2554 const struct nlink *l = _l; 2555 const struct nlink *r = _r; 2556 2557 return cmp_int(l->inum, r->inum); 2558 } 2559 2560 static void inc_link(struct bch_fs *c, struct snapshots_seen *s, 2561 struct nlink_table *links, 2562 u64 range_start, u64 range_end, u64 inum, u32 snapshot) 2563 { 2564 struct nlink *link, key = { 2565 .inum = inum, .snapshot = U32_MAX, 2566 }; 2567 2568 if (inum < range_start || inum >= range_end) 2569 return; 2570 2571 link = __inline_bsearch(&key, links->d, links->nr, 2572 sizeof(links->d[0]), nlink_cmp); 2573 if (!link) 2574 return; 2575 2576 while (link > links->d && link[0].inum == link[-1].inum) 2577 --link; 2578 2579 for (; link < links->d + links->nr && link->inum == inum; link++) 2580 if (ref_visible(c, s, snapshot, link->snapshot)) { 2581 link->count++; 2582 if (link->snapshot >= snapshot) 2583 break; 2584 } 2585 } 2586 2587 noinline_for_stack 2588 static int check_nlinks_find_hardlinks(struct bch_fs *c, 2589 struct nlink_table *t, 2590 u64 start, u64 *end) 2591 { 2592 int ret = bch2_trans_run(c, 2593 for_each_btree_key(trans, iter, BTREE_ID_inodes, 2594 POS(0, start), 2595 BTREE_ITER_intent| 2596 BTREE_ITER_prefetch| 2597 BTREE_ITER_all_snapshots, k, ({ 2598 if (!bkey_is_inode(k.k)) 2599 continue; 2600 2601 /* Should never fail, checked by bch2_inode_invalid: */ 2602 struct bch_inode_unpacked u; 2603 BUG_ON(bch2_inode_unpack(k, &u)); 2604 2605 /* 2606 * Backpointer and directory structure checks are sufficient for 2607 * directories, since they can't have hardlinks: 2608 */ 2609 if (S_ISDIR(u.bi_mode)) 2610 continue; 2611 2612 if (!u.bi_nlink) 2613 continue; 2614 2615 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); 2616 if (ret) { 2617 *end = k.k->p.offset; 2618 ret = 0; 2619 break; 2620 } 2621 0; 2622 }))); 2623 2624 bch_err_fn(c, ret); 2625 return ret; 2626 } 2627 2628 noinline_for_stack 2629 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, 2630 u64 range_start, u64 range_end) 2631 { 2632 struct snapshots_seen s; 2633 2634 snapshots_seen_init(&s); 2635 2636 int ret = bch2_trans_run(c, 2637 for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, 2638 BTREE_ITER_intent| 2639 BTREE_ITER_prefetch| 2640 BTREE_ITER_all_snapshots, k, ({ 2641 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); 2642 if (ret) 2643 break; 2644 2645 if (k.k->type == KEY_TYPE_dirent) { 2646 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2647 2648 if (d.v->d_type != DT_DIR && 2649 d.v->d_type != DT_SUBVOL) 2650 inc_link(c, &s, links, range_start, range_end, 2651 le64_to_cpu(d.v->d_inum), d.k->p.snapshot); 2652 } 2653 0; 2654 }))); 2655 2656 snapshots_seen_exit(&s); 2657 2658 bch_err_fn(c, ret); 2659 return ret; 2660 } 2661 2662 static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, 2663 struct bkey_s_c k, 2664 struct nlink_table *links, 2665 size_t *idx, u64 range_end) 2666 { 2667 struct bch_fs *c = trans->c; 2668 struct bch_inode_unpacked u; 2669 struct nlink *link = &links->d[*idx]; 2670 int ret = 0; 2671 2672 if (k.k->p.offset >= range_end) 2673 return 1; 2674 2675 if (!bkey_is_inode(k.k)) 2676 return 0; 2677 2678 BUG_ON(bch2_inode_unpack(k, &u)); 2679 2680 if (S_ISDIR(u.bi_mode)) 2681 return 0; 2682 2683 if (!u.bi_nlink) 2684 return 0; 2685 2686 while ((cmp_int(link->inum, k.k->p.offset) ?: 2687 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { 2688 BUG_ON(*idx == links->nr); 2689 link = &links->d[++*idx]; 2690 } 2691 2692 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, 2693 c, inode_wrong_nlink, 2694 "inode %llu type %s has wrong i_nlink (%u, should be %u)", 2695 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], 2696 bch2_inode_nlink_get(&u), link->count)) { 2697 bch2_inode_nlink_set(&u, link->count); 2698 ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); 2699 } 2700 fsck_err: 2701 return ret; 2702 } 2703 2704 noinline_for_stack 2705 static int check_nlinks_update_hardlinks(struct bch_fs *c, 2706 struct nlink_table *links, 2707 u64 range_start, u64 range_end) 2708 { 2709 size_t idx = 0; 2710 2711 int ret = bch2_trans_run(c, 2712 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 2713 POS(0, range_start), 2714 BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 2715 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2716 check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); 2717 if (ret < 0) { 2718 bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); 2719 return ret; 2720 } 2721 2722 return 0; 2723 } 2724 2725 int bch2_check_nlinks(struct bch_fs *c) 2726 { 2727 struct nlink_table links = { 0 }; 2728 u64 this_iter_range_start, next_iter_range_start = 0; 2729 int ret = 0; 2730 2731 do { 2732 this_iter_range_start = next_iter_range_start; 2733 next_iter_range_start = U64_MAX; 2734 2735 ret = check_nlinks_find_hardlinks(c, &links, 2736 this_iter_range_start, 2737 &next_iter_range_start); 2738 2739 ret = check_nlinks_walk_dirents(c, &links, 2740 this_iter_range_start, 2741 next_iter_range_start); 2742 if (ret) 2743 break; 2744 2745 ret = check_nlinks_update_hardlinks(c, &links, 2746 this_iter_range_start, 2747 next_iter_range_start); 2748 if (ret) 2749 break; 2750 2751 links.nr = 0; 2752 } while (next_iter_range_start != U64_MAX); 2753 2754 kvfree(links.d); 2755 bch_err_fn(c, ret); 2756 return ret; 2757 } 2758 2759 static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, 2760 struct bkey_s_c k) 2761 { 2762 struct bkey_s_c_reflink_p p; 2763 struct bkey_i_reflink_p *u; 2764 2765 if (k.k->type != KEY_TYPE_reflink_p) 2766 return 0; 2767 2768 p = bkey_s_c_to_reflink_p(k); 2769 2770 if (!p.v->front_pad && !p.v->back_pad) 2771 return 0; 2772 2773 u = bch2_trans_kmalloc(trans, sizeof(*u)); 2774 int ret = PTR_ERR_OR_ZERO(u); 2775 if (ret) 2776 return ret; 2777 2778 bkey_reassemble(&u->k_i, k); 2779 u->v.front_pad = 0; 2780 u->v.back_pad = 0; 2781 2782 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_norun); 2783 } 2784 2785 int bch2_fix_reflink_p(struct bch_fs *c) 2786 { 2787 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) 2788 return 0; 2789 2790 int ret = bch2_trans_run(c, 2791 for_each_btree_key_commit(trans, iter, 2792 BTREE_ID_extents, POS_MIN, 2793 BTREE_ITER_intent|BTREE_ITER_prefetch| 2794 BTREE_ITER_all_snapshots, k, 2795 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2796 fix_reflink_p_key(trans, &iter, k))); 2797 bch_err_fn(c, ret); 2798 return ret; 2799 } 2800