1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bkey_buf.h" 5 #include "btree_cache.h" 6 #include "btree_update.h" 7 #include "buckets.h" 8 #include "darray.h" 9 #include "dirent.h" 10 #include "error.h" 11 #include "fs-common.h" 12 #include "fsck.h" 13 #include "inode.h" 14 #include "keylist.h" 15 #include "recovery_passes.h" 16 #include "snapshot.h" 17 #include "super.h" 18 #include "xattr.h" 19 20 #include <linux/bsearch.h> 21 #include <linux/dcache.h> /* struct qstr */ 22 23 /* 24 * XXX: this is handling transaction restarts without returning 25 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: 26 */ 27 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, 28 u32 snapshot) 29 { 30 u64 sectors = 0; 31 32 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, 33 SPOS(inum, 0, snapshot), 34 POS(inum, U64_MAX), 35 0, k, ({ 36 if (bkey_extent_is_allocation(k.k)) 37 sectors += k.k->size; 38 0; 39 })); 40 41 return ret ?: sectors; 42 } 43 44 static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, 45 u32 snapshot) 46 { 47 u64 subdirs = 0; 48 49 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, 50 SPOS(inum, 0, snapshot), 51 POS(inum, U64_MAX), 52 0, k, ({ 53 if (k.k->type == KEY_TYPE_dirent && 54 bkey_s_c_to_dirent(k).v->d_type == DT_DIR) 55 subdirs++; 56 0; 57 })); 58 59 return ret ?: subdirs; 60 } 61 62 static int subvol_lookup(struct btree_trans *trans, u32 subvol, 63 u32 *snapshot, u64 *inum) 64 { 65 struct bch_subvolume s; 66 int ret = bch2_subvolume_get(trans, subvol, false, 0, &s); 67 68 *snapshot = le32_to_cpu(s.snapshot); 69 *inum = le64_to_cpu(s.inode); 70 return ret; 71 } 72 73 static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, 74 struct bch_inode_unpacked *inode) 75 { 76 struct btree_iter iter; 77 struct bkey_s_c k; 78 int ret; 79 80 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr), 81 BTREE_ITER_all_snapshots, k, ret) { 82 if (k.k->p.offset != inode_nr) 83 break; 84 if (!bkey_is_inode(k.k)) 85 continue; 86 ret = bch2_inode_unpack(k, inode); 87 goto found; 88 } 89 ret = -BCH_ERR_ENOENT_inode; 90 found: 91 bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); 92 bch2_trans_iter_exit(trans, &iter); 93 return ret; 94 } 95 96 static int lookup_inode(struct btree_trans *trans, u64 inode_nr, 97 struct bch_inode_unpacked *inode, 98 u32 *snapshot) 99 { 100 struct btree_iter iter; 101 struct bkey_s_c k; 102 int ret; 103 104 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 105 SPOS(0, inode_nr, *snapshot), 0); 106 ret = bkey_err(k); 107 if (ret) 108 goto err; 109 110 ret = bkey_is_inode(k.k) 111 ? bch2_inode_unpack(k, inode) 112 : -BCH_ERR_ENOENT_inode; 113 if (!ret) 114 *snapshot = iter.pos.snapshot; 115 err: 116 bch2_trans_iter_exit(trans, &iter); 117 return ret; 118 } 119 120 static int lookup_dirent_in_snapshot(struct btree_trans *trans, 121 struct bch_hash_info hash_info, 122 subvol_inum dir, struct qstr *name, 123 u64 *target, unsigned *type, u32 snapshot) 124 { 125 struct btree_iter iter; 126 struct bkey_s_c k = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, 127 &hash_info, dir, name, 0, snapshot); 128 int ret = bkey_err(k); 129 if (ret) 130 return ret; 131 132 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); 133 *target = le64_to_cpu(d.v->d_inum); 134 *type = d.v->d_type; 135 bch2_trans_iter_exit(trans, &iter); 136 return 0; 137 } 138 139 static int __remove_dirent(struct btree_trans *trans, struct bpos pos) 140 { 141 struct bch_fs *c = trans->c; 142 struct btree_iter iter; 143 struct bch_inode_unpacked dir_inode; 144 struct bch_hash_info dir_hash_info; 145 int ret; 146 147 ret = lookup_first_inode(trans, pos.inode, &dir_inode); 148 if (ret) 149 goto err; 150 151 dir_hash_info = bch2_hash_info_init(c, &dir_inode); 152 153 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent); 154 155 ret = bch2_btree_iter_traverse(&iter) ?: 156 bch2_hash_delete_at(trans, bch2_dirent_hash_desc, 157 &dir_hash_info, &iter, 158 BTREE_UPDATE_internal_snapshot_node); 159 bch2_trans_iter_exit(trans, &iter); 160 err: 161 bch_err_fn(c, ret); 162 return ret; 163 } 164 165 /* Get lost+found, create if it doesn't exist: */ 166 static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, 167 struct bch_inode_unpacked *lostfound, 168 u64 reattaching_inum) 169 { 170 struct bch_fs *c = trans->c; 171 struct qstr lostfound_str = QSTR("lost+found"); 172 u64 inum = 0; 173 unsigned d_type = 0; 174 int ret; 175 176 struct bch_snapshot_tree st; 177 ret = bch2_snapshot_tree_lookup(trans, 178 bch2_snapshot_tree(c, snapshot), &st); 179 if (ret) 180 return ret; 181 182 subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; 183 184 struct bch_subvolume subvol; 185 ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), 186 false, 0, &subvol); 187 bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u", 188 le32_to_cpu(st.master_subvol), snapshot); 189 if (ret) 190 return ret; 191 192 if (!subvol.inode) { 193 struct btree_iter iter; 194 struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, 195 BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)), 196 0, subvolume); 197 ret = PTR_ERR_OR_ZERO(subvol); 198 if (ret) 199 return ret; 200 201 subvol->v.inode = cpu_to_le64(reattaching_inum); 202 bch2_trans_iter_exit(trans, &iter); 203 } 204 205 root_inum.inum = le64_to_cpu(subvol.inode); 206 207 struct bch_inode_unpacked root_inode; 208 struct bch_hash_info root_hash_info; 209 u32 root_inode_snapshot = snapshot; 210 ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); 211 bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", 212 root_inum.inum, le32_to_cpu(st.master_subvol)); 213 if (ret) 214 return ret; 215 216 root_hash_info = bch2_hash_info_init(c, &root_inode); 217 218 ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum, 219 &lostfound_str, &inum, &d_type, snapshot); 220 if (bch2_err_matches(ret, ENOENT)) 221 goto create_lostfound; 222 223 bch_err_fn(c, ret); 224 if (ret) 225 return ret; 226 227 if (d_type != DT_DIR) { 228 bch_err(c, "error looking up lost+found: not a directory"); 229 return -BCH_ERR_ENOENT_not_directory; 230 } 231 232 /* 233 * The bch2_check_dirents pass has already run, dangling dirents 234 * shouldn't exist here: 235 */ 236 ret = lookup_inode(trans, inum, lostfound, &snapshot); 237 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", 238 inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); 239 return ret; 240 241 create_lostfound: 242 /* 243 * XXX: we could have a nicer log message here if we had a nice way to 244 * walk backpointers to print a path 245 */ 246 bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); 247 248 u64 now = bch2_current_time(c); 249 struct btree_iter lostfound_iter = { NULL }; 250 u64 cpu = raw_smp_processor_id(); 251 252 bch2_inode_init_early(c, lostfound); 253 bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); 254 lostfound->bi_dir = root_inode.bi_inum; 255 256 root_inode.bi_nlink++; 257 258 ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); 259 if (ret) 260 goto err; 261 262 bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); 263 ret = bch2_btree_iter_traverse(&lostfound_iter); 264 if (ret) 265 goto err; 266 267 ret = bch2_dirent_create_snapshot(trans, 268 0, root_inode.bi_inum, snapshot, &root_hash_info, 269 mode_to_type(lostfound->bi_mode), 270 &lostfound_str, 271 lostfound->bi_inum, 272 &lostfound->bi_dir_offset, 273 STR_HASH_must_create) ?: 274 bch2_inode_write_flags(trans, &lostfound_iter, lostfound, 275 BTREE_UPDATE_internal_snapshot_node); 276 err: 277 bch_err_msg(c, ret, "creating lost+found"); 278 bch2_trans_iter_exit(trans, &lostfound_iter); 279 return ret; 280 } 281 282 static int reattach_inode(struct btree_trans *trans, 283 struct bch_inode_unpacked *inode, 284 u32 inode_snapshot) 285 { 286 struct bch_hash_info dir_hash; 287 struct bch_inode_unpacked lostfound; 288 char name_buf[20]; 289 struct qstr name; 290 u64 dir_offset = 0; 291 u32 dirent_snapshot = inode_snapshot; 292 int ret; 293 294 if (inode->bi_subvol) { 295 inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; 296 297 u64 root_inum; 298 ret = subvol_lookup(trans, inode->bi_parent_subvol, 299 &dirent_snapshot, &root_inum); 300 if (ret) 301 return ret; 302 303 snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol); 304 } else { 305 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); 306 } 307 308 ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum); 309 if (ret) 310 return ret; 311 312 if (S_ISDIR(inode->bi_mode)) { 313 lostfound.bi_nlink++; 314 315 ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); 316 if (ret) 317 return ret; 318 } 319 320 dir_hash = bch2_hash_info_init(trans->c, &lostfound); 321 322 name = (struct qstr) QSTR(name_buf); 323 324 ret = bch2_dirent_create_snapshot(trans, 325 inode->bi_parent_subvol, lostfound.bi_inum, 326 dirent_snapshot, 327 &dir_hash, 328 inode_d_type(inode), 329 &name, 330 inode->bi_subvol ?: inode->bi_inum, 331 &dir_offset, 332 STR_HASH_must_create); 333 if (ret) 334 return ret; 335 336 inode->bi_dir = lostfound.bi_inum; 337 inode->bi_dir_offset = dir_offset; 338 339 return __bch2_fsck_write_inode(trans, inode, inode_snapshot); 340 } 341 342 static int remove_backpointer(struct btree_trans *trans, 343 struct bch_inode_unpacked *inode) 344 { 345 struct btree_iter iter; 346 struct bkey_s_c_dirent d; 347 int ret; 348 349 d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, 350 POS(inode->bi_dir, inode->bi_dir_offset), 0, 351 dirent); 352 ret = bkey_err(d) ?: 353 __remove_dirent(trans, d.k->p); 354 bch2_trans_iter_exit(trans, &iter); 355 return ret; 356 } 357 358 static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s) 359 { 360 struct bch_fs *c = trans->c; 361 362 struct bch_inode_unpacked inode; 363 int ret = bch2_inode_find_by_inum_trans(trans, 364 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 365 &inode); 366 if (ret) 367 return ret; 368 369 ret = remove_backpointer(trans, &inode); 370 bch_err_msg(c, ret, "removing dirent"); 371 if (ret) 372 return ret; 373 374 ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); 375 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 376 return ret; 377 } 378 379 static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum) 380 { 381 struct bch_fs *c = trans->c; 382 383 if (!bch2_snapshot_is_leaf(c, snapshotid)) { 384 bch_err(c, "need to reconstruct subvol, but have interior node snapshot"); 385 return -BCH_ERR_fsck_repair_unimplemented; 386 } 387 388 /* 389 * If inum isn't set, that means we're being called from check_dirents, 390 * not check_inodes - the root of this subvolume doesn't exist or we 391 * would have found it there: 392 */ 393 if (!inum) { 394 struct btree_iter inode_iter = {}; 395 struct bch_inode_unpacked new_inode; 396 u64 cpu = raw_smp_processor_id(); 397 398 bch2_inode_init_early(c, &new_inode); 399 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL); 400 401 new_inode.bi_subvol = subvolid; 402 403 int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: 404 bch2_btree_iter_traverse(&inode_iter) ?: 405 bch2_inode_write(trans, &inode_iter, &new_inode); 406 bch2_trans_iter_exit(trans, &inode_iter); 407 if (ret) 408 return ret; 409 410 inum = new_inode.bi_inum; 411 } 412 413 bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum); 414 415 struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); 416 int ret = PTR_ERR_OR_ZERO(new_subvol); 417 if (ret) 418 return ret; 419 420 bkey_subvolume_init(&new_subvol->k_i); 421 new_subvol->k.p.offset = subvolid; 422 new_subvol->v.snapshot = cpu_to_le32(snapshotid); 423 new_subvol->v.inode = cpu_to_le64(inum); 424 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0); 425 if (ret) 426 return ret; 427 428 struct btree_iter iter; 429 struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter, 430 BTREE_ID_snapshots, POS(0, snapshotid), 431 0, snapshot); 432 ret = PTR_ERR_OR_ZERO(s); 433 bch_err_msg(c, ret, "getting snapshot %u", snapshotid); 434 if (ret) 435 return ret; 436 437 u32 snapshot_tree = le32_to_cpu(s->v.tree); 438 439 s->v.subvol = cpu_to_le32(subvolid); 440 SET_BCH_SNAPSHOT_SUBVOL(&s->v, true); 441 bch2_trans_iter_exit(trans, &iter); 442 443 struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter, 444 BTREE_ID_snapshot_trees, POS(0, snapshot_tree), 445 0, snapshot_tree); 446 ret = PTR_ERR_OR_ZERO(st); 447 bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree); 448 if (ret) 449 return ret; 450 451 if (!st->v.master_subvol) 452 st->v.master_subvol = cpu_to_le32(subvolid); 453 454 bch2_trans_iter_exit(trans, &iter); 455 return 0; 456 } 457 458 static int reconstruct_inode(struct btree_trans *trans, u32 snapshot, u64 inum, u64 size, unsigned mode) 459 { 460 struct bch_fs *c = trans->c; 461 struct bch_inode_unpacked new_inode; 462 463 bch2_inode_init_early(c, &new_inode); 464 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, mode|0755, 0, NULL); 465 new_inode.bi_size = size; 466 new_inode.bi_inum = inum; 467 468 return __bch2_fsck_write_inode(trans, &new_inode, snapshot); 469 } 470 471 static int reconstruct_reg_inode(struct btree_trans *trans, u32 snapshot, u64 inum) 472 { 473 struct btree_iter iter = {}; 474 475 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); 476 struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter); 477 bch2_trans_iter_exit(trans, &iter); 478 int ret = bkey_err(k); 479 if (ret) 480 return ret; 481 482 return reconstruct_inode(trans, snapshot, inum, k.k->p.offset << 9, S_IFREG); 483 } 484 485 struct snapshots_seen { 486 struct bpos pos; 487 snapshot_id_list ids; 488 }; 489 490 static inline void snapshots_seen_exit(struct snapshots_seen *s) 491 { 492 darray_exit(&s->ids); 493 } 494 495 static inline void snapshots_seen_init(struct snapshots_seen *s) 496 { 497 memset(s, 0, sizeof(*s)); 498 } 499 500 static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) 501 { 502 u32 *i; 503 __darray_for_each(s->ids, i) { 504 if (*i == id) 505 return 0; 506 if (*i > id) 507 break; 508 } 509 510 int ret = darray_insert_item(&s->ids, i - s->ids.data, id); 511 if (ret) 512 bch_err(c, "error reallocating snapshots_seen table (size %zu)", 513 s->ids.size); 514 return ret; 515 } 516 517 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, 518 enum btree_id btree_id, struct bpos pos) 519 { 520 if (!bkey_eq(s->pos, pos)) 521 s->ids.nr = 0; 522 s->pos = pos; 523 524 return snapshot_list_add_nodup(c, &s->ids, pos.snapshot); 525 } 526 527 /** 528 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, 529 * and @ancestor hasn't been overwritten in @seen 530 * 531 * @c: filesystem handle 532 * @seen: list of snapshot ids already seen at current position 533 * @id: descendent snapshot id 534 * @ancestor: ancestor snapshot id 535 * 536 * Returns: whether key in @ancestor snapshot is visible in @id snapshot 537 */ 538 static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, 539 u32 id, u32 ancestor) 540 { 541 ssize_t i; 542 543 EBUG_ON(id > ancestor); 544 545 /* @ancestor should be the snapshot most recently added to @seen */ 546 EBUG_ON(ancestor != seen->pos.snapshot); 547 EBUG_ON(ancestor != darray_last(seen->ids)); 548 549 if (id == ancestor) 550 return true; 551 552 if (!bch2_snapshot_is_ancestor(c, id, ancestor)) 553 return false; 554 555 /* 556 * We know that @id is a descendant of @ancestor, we're checking if 557 * we've seen a key that overwrote @ancestor - i.e. also a descendent of 558 * @ascestor and with @id as a descendent. 559 * 560 * But we already know that we're scanning IDs between @id and @ancestor 561 * numerically, since snapshot ID lists are kept sorted, so if we find 562 * an id that's an ancestor of @id we're done: 563 */ 564 565 for (i = seen->ids.nr - 2; 566 i >= 0 && seen->ids.data[i] >= id; 567 --i) 568 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i])) 569 return false; 570 571 return true; 572 } 573 574 /** 575 * ref_visible - given a key with snapshot id @src that points to a key with 576 * snapshot id @dst, test whether there is some snapshot in which @dst is 577 * visible. 578 * 579 * @c: filesystem handle 580 * @s: list of snapshot IDs already seen at @src 581 * @src: snapshot ID of src key 582 * @dst: snapshot ID of dst key 583 * Returns: true if there is some snapshot in which @dst is visible 584 * 585 * Assumes we're visiting @src keys in natural key order 586 */ 587 static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, 588 u32 src, u32 dst) 589 { 590 return dst <= src 591 ? key_visible_in_snapshot(c, s, dst, src) 592 : bch2_snapshot_is_ancestor(c, src, dst); 593 } 594 595 static int ref_visible2(struct bch_fs *c, 596 u32 src, struct snapshots_seen *src_seen, 597 u32 dst, struct snapshots_seen *dst_seen) 598 { 599 if (dst > src) { 600 swap(dst, src); 601 swap(dst_seen, src_seen); 602 } 603 return key_visible_in_snapshot(c, src_seen, dst, src); 604 } 605 606 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ 607 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ 608 (_i)->snapshot <= (_snapshot); _i++) \ 609 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) 610 611 struct inode_walker_entry { 612 struct bch_inode_unpacked inode; 613 u32 snapshot; 614 bool seen_this_pos; 615 u64 count; 616 }; 617 618 struct inode_walker { 619 bool first_this_inode; 620 bool recalculate_sums; 621 struct bpos last_pos; 622 623 DARRAY(struct inode_walker_entry) inodes; 624 }; 625 626 static void inode_walker_exit(struct inode_walker *w) 627 { 628 darray_exit(&w->inodes); 629 } 630 631 static struct inode_walker inode_walker_init(void) 632 { 633 return (struct inode_walker) { 0, }; 634 } 635 636 static int add_inode(struct bch_fs *c, struct inode_walker *w, 637 struct bkey_s_c inode) 638 { 639 struct bch_inode_unpacked u; 640 641 BUG_ON(bch2_inode_unpack(inode, &u)); 642 643 return darray_push(&w->inodes, ((struct inode_walker_entry) { 644 .inode = u, 645 .snapshot = inode.k->p.snapshot, 646 })); 647 } 648 649 static int get_inodes_all_snapshots(struct btree_trans *trans, 650 struct inode_walker *w, u64 inum) 651 { 652 struct bch_fs *c = trans->c; 653 struct btree_iter iter; 654 struct bkey_s_c k; 655 int ret; 656 657 w->recalculate_sums = false; 658 w->inodes.nr = 0; 659 660 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 661 BTREE_ITER_all_snapshots, k, ret) { 662 if (k.k->p.offset != inum) 663 break; 664 665 if (bkey_is_inode(k.k)) 666 add_inode(c, w, k); 667 } 668 bch2_trans_iter_exit(trans, &iter); 669 670 if (ret) 671 return ret; 672 673 w->first_this_inode = true; 674 return 0; 675 } 676 677 static struct inode_walker_entry * 678 lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k) 679 { 680 bool is_whiteout = k.k->type == KEY_TYPE_whiteout; 681 682 struct inode_walker_entry *i; 683 __darray_for_each(w->inodes, i) 684 if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot)) 685 goto found; 686 687 return NULL; 688 found: 689 BUG_ON(k.k->p.snapshot > i->snapshot); 690 691 if (k.k->p.snapshot != i->snapshot && !is_whiteout) { 692 struct inode_walker_entry new = *i; 693 694 new.snapshot = k.k->p.snapshot; 695 new.count = 0; 696 697 struct printbuf buf = PRINTBUF; 698 bch2_bkey_val_to_text(&buf, c, k); 699 700 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" 701 "unexpected because we should always update the inode when we update a key in that inode\n" 702 "%s", 703 w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf); 704 printbuf_exit(&buf); 705 706 while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot) 707 --i; 708 709 size_t pos = i - w->inodes.data; 710 int ret = darray_insert_item(&w->inodes, pos, new); 711 if (ret) 712 return ERR_PTR(ret); 713 714 i = w->inodes.data + pos; 715 } 716 717 return i; 718 } 719 720 static struct inode_walker_entry *walk_inode(struct btree_trans *trans, 721 struct inode_walker *w, 722 struct bkey_s_c k) 723 { 724 if (w->last_pos.inode != k.k->p.inode) { 725 int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); 726 if (ret) 727 return ERR_PTR(ret); 728 } else if (bkey_cmp(w->last_pos, k.k->p)) { 729 darray_for_each(w->inodes, i) 730 i->seen_this_pos = false; 731 } 732 733 w->last_pos = k.k->p; 734 735 return lookup_inode_for_snapshot(trans->c, w, k); 736 } 737 738 static int get_visible_inodes(struct btree_trans *trans, 739 struct inode_walker *w, 740 struct snapshots_seen *s, 741 u64 inum) 742 { 743 struct bch_fs *c = trans->c; 744 struct btree_iter iter; 745 struct bkey_s_c k; 746 int ret; 747 748 w->inodes.nr = 0; 749 750 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 751 BTREE_ITER_all_snapshots, k, ret) { 752 if (k.k->p.offset != inum) 753 break; 754 755 if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) 756 continue; 757 758 if (bkey_is_inode(k.k)) 759 add_inode(c, w, k); 760 761 if (k.k->p.snapshot >= s->pos.snapshot) 762 break; 763 } 764 bch2_trans_iter_exit(trans, &iter); 765 766 return ret; 767 } 768 769 static int hash_redo_key(struct btree_trans *trans, 770 const struct bch_hash_desc desc, 771 struct bch_hash_info *hash_info, 772 struct btree_iter *k_iter, struct bkey_s_c k) 773 { 774 struct bkey_i *delete; 775 struct bkey_i *tmp; 776 777 delete = bch2_trans_kmalloc(trans, sizeof(*delete)); 778 if (IS_ERR(delete)) 779 return PTR_ERR(delete); 780 781 tmp = bch2_bkey_make_mut_noupdate(trans, k); 782 if (IS_ERR(tmp)) 783 return PTR_ERR(tmp); 784 785 bkey_init(&delete->k); 786 delete->k.p = k_iter->pos; 787 return bch2_btree_iter_traverse(k_iter) ?: 788 bch2_trans_update(trans, k_iter, delete, 0) ?: 789 bch2_hash_set_in_snapshot(trans, desc, hash_info, 790 (subvol_inum) { 0, k.k->p.inode }, 791 k.k->p.snapshot, tmp, 792 STR_HASH_must_create| 793 BTREE_UPDATE_internal_snapshot_node) ?: 794 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 795 } 796 797 static int hash_check_key(struct btree_trans *trans, 798 const struct bch_hash_desc desc, 799 struct bch_hash_info *hash_info, 800 struct btree_iter *k_iter, struct bkey_s_c hash_k) 801 { 802 struct bch_fs *c = trans->c; 803 struct btree_iter iter = { NULL }; 804 struct printbuf buf = PRINTBUF; 805 struct bkey_s_c k; 806 u64 hash; 807 int ret = 0; 808 809 if (hash_k.k->type != desc.key_type) 810 return 0; 811 812 hash = desc.hash_bkey(hash_info, hash_k); 813 814 if (likely(hash == hash_k.k->p.offset)) 815 return 0; 816 817 if (hash_k.k->p.offset < hash) 818 goto bad_hash; 819 820 for_each_btree_key_norestart(trans, iter, desc.btree_id, 821 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), 822 BTREE_ITER_slots, k, ret) { 823 if (bkey_eq(k.k->p, hash_k.k->p)) 824 break; 825 826 if (fsck_err_on(k.k->type == desc.key_type && 827 !desc.cmp_bkey(k, hash_k), c, 828 hash_table_key_duplicate, 829 "duplicate hash table keys:\n%s", 830 (printbuf_reset(&buf), 831 bch2_bkey_val_to_text(&buf, c, hash_k), 832 buf.buf))) { 833 ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; 834 break; 835 } 836 837 if (bkey_deleted(k.k)) { 838 bch2_trans_iter_exit(trans, &iter); 839 goto bad_hash; 840 } 841 } 842 out: 843 bch2_trans_iter_exit(trans, &iter); 844 printbuf_exit(&buf); 845 return ret; 846 bad_hash: 847 if (fsck_err(c, hash_table_key_wrong_offset, 848 "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", 849 bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, 850 (printbuf_reset(&buf), 851 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { 852 ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); 853 bch_err_fn(c, ret); 854 if (ret) 855 return ret; 856 ret = -BCH_ERR_transaction_restart_nested; 857 } 858 fsck_err: 859 goto out; 860 } 861 862 static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, 863 struct btree_iter *iter, 864 struct bpos pos) 865 { 866 return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); 867 } 868 869 static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, 870 struct btree_iter *iter, 871 struct bch_inode_unpacked *inode, 872 u32 *snapshot) 873 { 874 if (inode->bi_subvol) { 875 u64 inum; 876 int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum); 877 if (ret) 878 return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) }); 879 } 880 881 return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); 882 } 883 884 static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, 885 struct bkey_s_c_dirent d) 886 { 887 return inode->bi_dir == d.k->p.inode && 888 inode->bi_dir_offset == d.k->p.offset; 889 } 890 891 static bool dirent_points_to_inode(struct bkey_s_c_dirent d, 892 struct bch_inode_unpacked *inode) 893 { 894 return d.v->d_type == DT_SUBVOL 895 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol 896 : le64_to_cpu(d.v->d_inum) == inode->bi_inum; 897 } 898 899 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) 900 { 901 struct btree_iter iter; 902 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0); 903 int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set; 904 bch2_trans_iter_exit(trans, &iter); 905 return ret; 906 } 907 908 static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k, 909 struct bch_inode_unpacked *inode, 910 u32 inode_snapshot, bool *write_inode) 911 { 912 struct bch_fs *c = trans->c; 913 struct printbuf buf = PRINTBUF; 914 915 struct btree_iter dirent_iter = {}; 916 struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); 917 int ret = bkey_err(d); 918 if (ret && !bch2_err_matches(ret, ENOENT)) 919 return ret; 920 921 if (fsck_err_on(ret, 922 c, inode_points_to_missing_dirent, 923 "inode points to missing dirent\n%s", 924 (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) || 925 fsck_err_on(!ret && !dirent_points_to_inode(d, inode), 926 c, inode_points_to_wrong_dirent, 927 "inode points to dirent that does not point back:\n%s", 928 (bch2_bkey_val_to_text(&buf, c, inode_k), 929 prt_newline(&buf), 930 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 931 /* 932 * We just clear the backpointer fields for now. If we find a 933 * dirent that points to this inode in check_dirents(), we'll 934 * update it then; then when we get to check_path() if the 935 * backpointer is still 0 we'll reattach it. 936 */ 937 inode->bi_dir = 0; 938 inode->bi_dir_offset = 0; 939 inode->bi_flags &= ~BCH_INODE_backptr_untrusted; 940 *write_inode = true; 941 } 942 943 ret = 0; 944 fsck_err: 945 bch2_trans_iter_exit(trans, &dirent_iter); 946 printbuf_exit(&buf); 947 bch_err_fn(c, ret); 948 return ret; 949 } 950 951 static int check_inode(struct btree_trans *trans, 952 struct btree_iter *iter, 953 struct bkey_s_c k, 954 struct bch_inode_unpacked *prev, 955 struct snapshots_seen *s, 956 bool full) 957 { 958 struct bch_fs *c = trans->c; 959 struct bch_inode_unpacked u; 960 bool do_update = false; 961 int ret; 962 963 ret = bch2_check_key_has_snapshot(trans, iter, k); 964 if (ret < 0) 965 goto err; 966 if (ret) 967 return 0; 968 969 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 970 if (ret) 971 goto err; 972 973 if (!bkey_is_inode(k.k)) 974 return 0; 975 976 BUG_ON(bch2_inode_unpack(k, &u)); 977 978 if (!full && 979 !(u.bi_flags & (BCH_INODE_i_size_dirty| 980 BCH_INODE_i_sectors_dirty| 981 BCH_INODE_unlinked))) 982 return 0; 983 984 if (prev->bi_inum != u.bi_inum) 985 *prev = u; 986 987 if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || 988 inode_d_type(prev) != inode_d_type(&u), 989 c, inode_snapshot_mismatch, 990 "inodes in different snapshots don't match")) { 991 bch_err(c, "repair not implemented yet"); 992 return -BCH_ERR_fsck_repair_unimplemented; 993 } 994 995 if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && 996 bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { 997 struct bpos new_min_pos; 998 999 ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos); 1000 if (ret) 1001 goto err; 1002 1003 u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; 1004 1005 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1006 1007 bch_err_msg(c, ret, "in fsck updating inode"); 1008 if (ret) 1009 return ret; 1010 1011 if (!bpos_eq(new_min_pos, POS_MIN)) 1012 bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); 1013 return 0; 1014 } 1015 1016 if (u.bi_flags & BCH_INODE_unlinked) { 1017 ret = check_inode_deleted_list(trans, k.k->p); 1018 if (ret < 0) 1019 return ret; 1020 1021 fsck_err_on(!ret, c, unlinked_inode_not_on_deleted_list, 1022 "inode %llu:%u unlinked, but not on deleted list", 1023 u.bi_inum, k.k->p.snapshot); 1024 ret = 0; 1025 } 1026 1027 if (u.bi_flags & BCH_INODE_unlinked && 1028 (!c->sb.clean || 1029 fsck_err(c, inode_unlinked_but_clean, 1030 "filesystem marked clean, but inode %llu unlinked", 1031 u.bi_inum))) { 1032 ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 1033 bch_err_msg(c, ret, "in fsck deleting inode"); 1034 return ret; 1035 } 1036 1037 if (u.bi_flags & BCH_INODE_i_size_dirty && 1038 (!c->sb.clean || 1039 fsck_err(c, inode_i_size_dirty_but_clean, 1040 "filesystem marked clean, but inode %llu has i_size dirty", 1041 u.bi_inum))) { 1042 bch_verbose(c, "truncating inode %llu", u.bi_inum); 1043 1044 /* 1045 * XXX: need to truncate partial blocks too here - or ideally 1046 * just switch units to bytes and that issue goes away 1047 */ 1048 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, 1049 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9, 1050 iter->pos.snapshot), 1051 POS(u.bi_inum, U64_MAX), 1052 0, NULL); 1053 bch_err_msg(c, ret, "in fsck truncating inode"); 1054 if (ret) 1055 return ret; 1056 1057 /* 1058 * We truncated without our normal sector accounting hook, just 1059 * make sure we recalculate it: 1060 */ 1061 u.bi_flags |= BCH_INODE_i_sectors_dirty; 1062 1063 u.bi_flags &= ~BCH_INODE_i_size_dirty; 1064 do_update = true; 1065 } 1066 1067 if (u.bi_flags & BCH_INODE_i_sectors_dirty && 1068 (!c->sb.clean || 1069 fsck_err(c, inode_i_sectors_dirty_but_clean, 1070 "filesystem marked clean, but inode %llu has i_sectors dirty", 1071 u.bi_inum))) { 1072 s64 sectors; 1073 1074 bch_verbose(c, "recounting sectors for inode %llu", 1075 u.bi_inum); 1076 1077 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); 1078 if (sectors < 0) { 1079 bch_err_msg(c, sectors, "in fsck recounting inode sectors"); 1080 return sectors; 1081 } 1082 1083 u.bi_sectors = sectors; 1084 u.bi_flags &= ~BCH_INODE_i_sectors_dirty; 1085 do_update = true; 1086 } 1087 1088 if (u.bi_flags & BCH_INODE_backptr_untrusted) { 1089 u.bi_dir = 0; 1090 u.bi_dir_offset = 0; 1091 u.bi_flags &= ~BCH_INODE_backptr_untrusted; 1092 do_update = true; 1093 } 1094 1095 if (u.bi_dir || u.bi_dir_offset) { 1096 ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update); 1097 if (ret) 1098 goto err; 1099 } 1100 1101 if (fsck_err_on(u.bi_parent_subvol && 1102 (u.bi_subvol == 0 || 1103 u.bi_subvol == BCACHEFS_ROOT_SUBVOL), 1104 c, inode_bi_parent_nonzero, 1105 "inode %llu:%u has subvol %u but nonzero parent subvol %u", 1106 u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) { 1107 u.bi_parent_subvol = 0; 1108 do_update = true; 1109 } 1110 1111 if (u.bi_subvol) { 1112 struct bch_subvolume s; 1113 1114 ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s); 1115 if (ret && !bch2_err_matches(ret, ENOENT)) 1116 goto err; 1117 1118 if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1119 ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum); 1120 goto do_update; 1121 } 1122 1123 if (fsck_err_on(ret, 1124 c, inode_bi_subvol_missing, 1125 "inode %llu:%u bi_subvol points to missing subvolume %u", 1126 u.bi_inum, k.k->p.snapshot, u.bi_subvol) || 1127 fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum || 1128 !bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot), 1129 k.k->p.snapshot), 1130 c, inode_bi_subvol_wrong, 1131 "inode %llu:%u points to subvol %u, but subvol points to %llu:%u", 1132 u.bi_inum, k.k->p.snapshot, u.bi_subvol, 1133 le64_to_cpu(s.inode), 1134 le32_to_cpu(s.snapshot))) { 1135 u.bi_subvol = 0; 1136 u.bi_parent_subvol = 0; 1137 do_update = true; 1138 } 1139 } 1140 do_update: 1141 if (do_update) { 1142 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1143 bch_err_msg(c, ret, "in fsck updating inode"); 1144 if (ret) 1145 return ret; 1146 } 1147 err: 1148 fsck_err: 1149 bch_err_fn(c, ret); 1150 return ret; 1151 } 1152 1153 int bch2_check_inodes(struct bch_fs *c) 1154 { 1155 bool full = c->opts.fsck; 1156 struct bch_inode_unpacked prev = { 0 }; 1157 struct snapshots_seen s; 1158 1159 snapshots_seen_init(&s); 1160 1161 int ret = bch2_trans_run(c, 1162 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 1163 POS_MIN, 1164 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1165 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1166 check_inode(trans, &iter, k, &prev, &s, full))); 1167 1168 snapshots_seen_exit(&s); 1169 bch_err_fn(c, ret); 1170 return ret; 1171 } 1172 1173 static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) 1174 { 1175 struct bch_fs *c = trans->c; 1176 int ret = 0; 1177 s64 count2; 1178 1179 darray_for_each(w->inodes, i) { 1180 if (i->inode.bi_sectors == i->count) 1181 continue; 1182 1183 count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); 1184 1185 if (w->recalculate_sums) 1186 i->count = count2; 1187 1188 if (i->count != count2) { 1189 bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", 1190 w->last_pos.inode, i->snapshot, i->count, count2); 1191 return -BCH_ERR_internal_fsck_err; 1192 } 1193 1194 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), 1195 c, inode_i_sectors_wrong, 1196 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", 1197 w->last_pos.inode, i->snapshot, 1198 i->inode.bi_sectors, i->count)) { 1199 i->inode.bi_sectors = i->count; 1200 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1201 if (ret) 1202 break; 1203 } 1204 } 1205 fsck_err: 1206 bch_err_fn(c, ret); 1207 return ret; 1208 } 1209 1210 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) 1211 { 1212 u32 restart_count = trans->restart_count; 1213 return check_i_sectors_notnested(trans, w) ?: 1214 trans_was_restarted(trans, restart_count); 1215 } 1216 1217 struct extent_end { 1218 u32 snapshot; 1219 u64 offset; 1220 struct snapshots_seen seen; 1221 }; 1222 1223 struct extent_ends { 1224 struct bpos last_pos; 1225 DARRAY(struct extent_end) e; 1226 }; 1227 1228 static void extent_ends_reset(struct extent_ends *extent_ends) 1229 { 1230 darray_for_each(extent_ends->e, i) 1231 snapshots_seen_exit(&i->seen); 1232 extent_ends->e.nr = 0; 1233 } 1234 1235 static void extent_ends_exit(struct extent_ends *extent_ends) 1236 { 1237 extent_ends_reset(extent_ends); 1238 darray_exit(&extent_ends->e); 1239 } 1240 1241 static void extent_ends_init(struct extent_ends *extent_ends) 1242 { 1243 memset(extent_ends, 0, sizeof(*extent_ends)); 1244 } 1245 1246 static int extent_ends_at(struct bch_fs *c, 1247 struct extent_ends *extent_ends, 1248 struct snapshots_seen *seen, 1249 struct bkey_s_c k) 1250 { 1251 struct extent_end *i, n = (struct extent_end) { 1252 .offset = k.k->p.offset, 1253 .snapshot = k.k->p.snapshot, 1254 .seen = *seen, 1255 }; 1256 1257 n.seen.ids.data = kmemdup(seen->ids.data, 1258 sizeof(seen->ids.data[0]) * seen->ids.size, 1259 GFP_KERNEL); 1260 if (!n.seen.ids.data) 1261 return -BCH_ERR_ENOMEM_fsck_extent_ends_at; 1262 1263 __darray_for_each(extent_ends->e, i) { 1264 if (i->snapshot == k.k->p.snapshot) { 1265 snapshots_seen_exit(&i->seen); 1266 *i = n; 1267 return 0; 1268 } 1269 1270 if (i->snapshot >= k.k->p.snapshot) 1271 break; 1272 } 1273 1274 return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); 1275 } 1276 1277 static int overlapping_extents_found(struct btree_trans *trans, 1278 enum btree_id btree, 1279 struct bpos pos1, struct snapshots_seen *pos1_seen, 1280 struct bkey pos2, 1281 bool *fixed, 1282 struct extent_end *extent_end) 1283 { 1284 struct bch_fs *c = trans->c; 1285 struct printbuf buf = PRINTBUF; 1286 struct btree_iter iter1, iter2 = { NULL }; 1287 struct bkey_s_c k1, k2; 1288 int ret; 1289 1290 BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); 1291 1292 bch2_trans_iter_init(trans, &iter1, btree, pos1, 1293 BTREE_ITER_all_snapshots| 1294 BTREE_ITER_not_extents); 1295 k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); 1296 ret = bkey_err(k1); 1297 if (ret) 1298 goto err; 1299 1300 prt_str(&buf, "\n "); 1301 bch2_bkey_val_to_text(&buf, c, k1); 1302 1303 if (!bpos_eq(pos1, k1.k->p)) { 1304 prt_str(&buf, "\n wanted\n "); 1305 bch2_bpos_to_text(&buf, pos1); 1306 prt_str(&buf, "\n "); 1307 bch2_bkey_to_text(&buf, &pos2); 1308 1309 bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", 1310 __func__, buf.buf); 1311 ret = -BCH_ERR_internal_fsck_err; 1312 goto err; 1313 } 1314 1315 bch2_trans_copy_iter(&iter2, &iter1); 1316 1317 while (1) { 1318 bch2_btree_iter_advance(&iter2); 1319 1320 k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); 1321 ret = bkey_err(k2); 1322 if (ret) 1323 goto err; 1324 1325 if (bpos_ge(k2.k->p, pos2.p)) 1326 break; 1327 } 1328 1329 prt_str(&buf, "\n "); 1330 bch2_bkey_val_to_text(&buf, c, k2); 1331 1332 if (bpos_gt(k2.k->p, pos2.p) || 1333 pos2.size != k2.k->size) { 1334 bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", 1335 __func__, buf.buf); 1336 ret = -BCH_ERR_internal_fsck_err; 1337 goto err; 1338 } 1339 1340 prt_printf(&buf, "\n overwriting %s extent", 1341 pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); 1342 1343 if (fsck_err(c, extent_overlapping, 1344 "overlapping extents%s", buf.buf)) { 1345 struct btree_iter *old_iter = &iter1; 1346 struct disk_reservation res = { 0 }; 1347 1348 if (pos1.snapshot < pos2.p.snapshot) { 1349 old_iter = &iter2; 1350 swap(k1, k2); 1351 } 1352 1353 trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); 1354 1355 ret = bch2_trans_update_extent_overwrite(trans, old_iter, 1356 BTREE_UPDATE_internal_snapshot_node, 1357 k1, k2) ?: 1358 bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); 1359 bch2_disk_reservation_put(c, &res); 1360 1361 if (ret) 1362 goto err; 1363 1364 *fixed = true; 1365 1366 if (pos1.snapshot == pos2.p.snapshot) { 1367 /* 1368 * We overwrote the first extent, and did the overwrite 1369 * in the same snapshot: 1370 */ 1371 extent_end->offset = bkey_start_offset(&pos2); 1372 } else if (pos1.snapshot > pos2.p.snapshot) { 1373 /* 1374 * We overwrote the first extent in pos2's snapshot: 1375 */ 1376 ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); 1377 } else { 1378 /* 1379 * We overwrote the second extent - restart 1380 * check_extent() from the top: 1381 */ 1382 ret = -BCH_ERR_transaction_restart_nested; 1383 } 1384 } 1385 fsck_err: 1386 err: 1387 bch2_trans_iter_exit(trans, &iter2); 1388 bch2_trans_iter_exit(trans, &iter1); 1389 printbuf_exit(&buf); 1390 return ret; 1391 } 1392 1393 static int check_overlapping_extents(struct btree_trans *trans, 1394 struct snapshots_seen *seen, 1395 struct extent_ends *extent_ends, 1396 struct bkey_s_c k, 1397 struct btree_iter *iter, 1398 bool *fixed) 1399 { 1400 struct bch_fs *c = trans->c; 1401 int ret = 0; 1402 1403 /* transaction restart, running again */ 1404 if (bpos_eq(extent_ends->last_pos, k.k->p)) 1405 return 0; 1406 1407 if (extent_ends->last_pos.inode != k.k->p.inode) 1408 extent_ends_reset(extent_ends); 1409 1410 darray_for_each(extent_ends->e, i) { 1411 if (i->offset <= bkey_start_offset(k.k)) 1412 continue; 1413 1414 if (!ref_visible2(c, 1415 k.k->p.snapshot, seen, 1416 i->snapshot, &i->seen)) 1417 continue; 1418 1419 ret = overlapping_extents_found(trans, iter->btree_id, 1420 SPOS(iter->pos.inode, 1421 i->offset, 1422 i->snapshot), 1423 &i->seen, 1424 *k.k, fixed, i); 1425 if (ret) 1426 goto err; 1427 } 1428 1429 extent_ends->last_pos = k.k->p; 1430 err: 1431 return ret; 1432 } 1433 1434 static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, 1435 struct bkey_s_c k) 1436 { 1437 struct bch_fs *c = trans->c; 1438 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 1439 struct bch_extent_crc_unpacked crc; 1440 const union bch_extent_entry *i; 1441 unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; 1442 1443 bkey_for_each_crc(k.k, ptrs, crc, i) 1444 if (crc_is_encoded(crc) && 1445 crc.uncompressed_size > encoded_extent_max_sectors) { 1446 struct printbuf buf = PRINTBUF; 1447 1448 bch2_bkey_val_to_text(&buf, c, k); 1449 bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); 1450 printbuf_exit(&buf); 1451 } 1452 1453 return 0; 1454 } 1455 1456 static int check_extent(struct btree_trans *trans, struct btree_iter *iter, 1457 struct bkey_s_c k, 1458 struct inode_walker *inode, 1459 struct snapshots_seen *s, 1460 struct extent_ends *extent_ends) 1461 { 1462 struct bch_fs *c = trans->c; 1463 struct inode_walker_entry *i; 1464 struct printbuf buf = PRINTBUF; 1465 int ret = 0; 1466 1467 ret = bch2_check_key_has_snapshot(trans, iter, k); 1468 if (ret) { 1469 ret = ret < 0 ? ret : 0; 1470 goto out; 1471 } 1472 1473 if (inode->last_pos.inode != k.k->p.inode) { 1474 ret = check_i_sectors(trans, inode); 1475 if (ret) 1476 goto err; 1477 } 1478 1479 i = walk_inode(trans, inode, k); 1480 ret = PTR_ERR_OR_ZERO(i); 1481 if (ret) 1482 goto err; 1483 1484 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1485 if (ret) 1486 goto err; 1487 1488 if (k.k->type != KEY_TYPE_whiteout) { 1489 if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { 1490 ret = reconstruct_reg_inode(trans, k.k->p.snapshot, k.k->p.inode) ?: 1491 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 1492 if (ret) 1493 goto err; 1494 1495 inode->last_pos.inode--; 1496 ret = -BCH_ERR_transaction_restart_nested; 1497 goto err; 1498 } 1499 1500 if (fsck_err_on(!i, c, extent_in_missing_inode, 1501 "extent in missing inode:\n %s", 1502 (printbuf_reset(&buf), 1503 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1504 goto delete; 1505 1506 if (fsck_err_on(i && 1507 !S_ISREG(i->inode.bi_mode) && 1508 !S_ISLNK(i->inode.bi_mode), 1509 c, extent_in_non_reg_inode, 1510 "extent in non regular inode mode %o:\n %s", 1511 i->inode.bi_mode, 1512 (printbuf_reset(&buf), 1513 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1514 goto delete; 1515 1516 ret = check_overlapping_extents(trans, s, extent_ends, k, iter, 1517 &inode->recalculate_sums); 1518 if (ret) 1519 goto err; 1520 } 1521 1522 /* 1523 * Check inodes in reverse order, from oldest snapshots to newest, 1524 * starting from the inode that matches this extent's snapshot. If we 1525 * didn't have one, iterate over all inodes: 1526 */ 1527 if (!i) 1528 i = inode->inodes.data + inode->inodes.nr - 1; 1529 1530 for (; 1531 inode->inodes.data && i >= inode->inodes.data; 1532 --i) { 1533 if (i->snapshot > k.k->p.snapshot || 1534 !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) 1535 continue; 1536 1537 if (k.k->type != KEY_TYPE_whiteout) { 1538 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && 1539 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && 1540 !bkey_extent_is_reservation(k), 1541 c, extent_past_end_of_inode, 1542 "extent type past end of inode %llu:%u, i_size %llu\n %s", 1543 i->inode.bi_inum, i->snapshot, i->inode.bi_size, 1544 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1545 struct btree_iter iter2; 1546 1547 bch2_trans_copy_iter(&iter2, iter); 1548 bch2_btree_iter_set_snapshot(&iter2, i->snapshot); 1549 ret = bch2_btree_iter_traverse(&iter2) ?: 1550 bch2_btree_delete_at(trans, &iter2, 1551 BTREE_UPDATE_internal_snapshot_node); 1552 bch2_trans_iter_exit(trans, &iter2); 1553 if (ret) 1554 goto err; 1555 1556 iter->k.type = KEY_TYPE_whiteout; 1557 } 1558 1559 if (bkey_extent_is_allocation(k.k)) 1560 i->count += k.k->size; 1561 } 1562 1563 i->seen_this_pos = true; 1564 } 1565 1566 if (k.k->type != KEY_TYPE_whiteout) { 1567 ret = extent_ends_at(c, extent_ends, s, k); 1568 if (ret) 1569 goto err; 1570 } 1571 out: 1572 err: 1573 fsck_err: 1574 printbuf_exit(&buf); 1575 bch_err_fn(c, ret); 1576 return ret; 1577 delete: 1578 ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node); 1579 goto out; 1580 } 1581 1582 /* 1583 * Walk extents: verify that extents have a corresponding S_ISREG inode, and 1584 * that i_size an i_sectors are consistent 1585 */ 1586 int bch2_check_extents(struct bch_fs *c) 1587 { 1588 struct inode_walker w = inode_walker_init(); 1589 struct snapshots_seen s; 1590 struct extent_ends extent_ends; 1591 struct disk_reservation res = { 0 }; 1592 1593 snapshots_seen_init(&s); 1594 extent_ends_init(&extent_ends); 1595 1596 int ret = bch2_trans_run(c, 1597 for_each_btree_key_commit(trans, iter, BTREE_ID_extents, 1598 POS(BCACHEFS_ROOT_INO, 0), 1599 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1600 &res, NULL, 1601 BCH_TRANS_COMMIT_no_enospc, ({ 1602 bch2_disk_reservation_put(c, &res); 1603 check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: 1604 check_extent_overbig(trans, &iter, k); 1605 })) ?: 1606 check_i_sectors_notnested(trans, &w)); 1607 1608 bch2_disk_reservation_put(c, &res); 1609 extent_ends_exit(&extent_ends); 1610 inode_walker_exit(&w); 1611 snapshots_seen_exit(&s); 1612 1613 bch_err_fn(c, ret); 1614 return ret; 1615 } 1616 1617 int bch2_check_indirect_extents(struct bch_fs *c) 1618 { 1619 struct disk_reservation res = { 0 }; 1620 1621 int ret = bch2_trans_run(c, 1622 for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, 1623 POS_MIN, 1624 BTREE_ITER_prefetch, k, 1625 &res, NULL, 1626 BCH_TRANS_COMMIT_no_enospc, ({ 1627 bch2_disk_reservation_put(c, &res); 1628 check_extent_overbig(trans, &iter, k); 1629 }))); 1630 1631 bch2_disk_reservation_put(c, &res); 1632 bch_err_fn(c, ret); 1633 return ret; 1634 } 1635 1636 static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w) 1637 { 1638 struct bch_fs *c = trans->c; 1639 int ret = 0; 1640 s64 count2; 1641 1642 darray_for_each(w->inodes, i) { 1643 if (i->inode.bi_nlink == i->count) 1644 continue; 1645 1646 count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); 1647 if (count2 < 0) 1648 return count2; 1649 1650 if (i->count != count2) { 1651 bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu", 1652 w->last_pos.inode, i->snapshot, i->count, count2); 1653 i->count = count2; 1654 if (i->inode.bi_nlink == i->count) 1655 continue; 1656 } 1657 1658 if (fsck_err_on(i->inode.bi_nlink != i->count, 1659 c, inode_dir_wrong_nlink, 1660 "directory %llu:%u with wrong i_nlink: got %u, should be %llu", 1661 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { 1662 i->inode.bi_nlink = i->count; 1663 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1664 if (ret) 1665 break; 1666 } 1667 } 1668 fsck_err: 1669 bch_err_fn(c, ret); 1670 return ret; 1671 } 1672 1673 static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) 1674 { 1675 u32 restart_count = trans->restart_count; 1676 return check_subdir_count_notnested(trans, w) ?: 1677 trans_was_restarted(trans, restart_count); 1678 } 1679 1680 static int check_dirent_inode_dirent(struct btree_trans *trans, 1681 struct btree_iter *iter, 1682 struct bkey_s_c_dirent d, 1683 struct bch_inode_unpacked *target, 1684 u32 target_snapshot) 1685 { 1686 struct bch_fs *c = trans->c; 1687 struct printbuf buf = PRINTBUF; 1688 int ret = 0; 1689 1690 if (inode_points_to_dirent(target, d)) 1691 return 0; 1692 1693 if (bch2_inode_should_have_bp(target) && 1694 !fsck_err(c, inode_wrong_backpointer, 1695 "dirent points to inode that does not point back:\n %s", 1696 (bch2_bkey_val_to_text(&buf, c, d.s_c), 1697 prt_printf(&buf, "\n "), 1698 bch2_inode_unpacked_to_text(&buf, target), 1699 buf.buf))) 1700 goto out_noiter; 1701 1702 if (!target->bi_dir && 1703 !target->bi_dir_offset) { 1704 target->bi_dir = d.k->p.inode; 1705 target->bi_dir_offset = d.k->p.offset; 1706 return __bch2_fsck_write_inode(trans, target, target_snapshot); 1707 } 1708 1709 struct btree_iter bp_iter = { NULL }; 1710 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, 1711 SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); 1712 ret = bkey_err(bp_dirent); 1713 if (ret && !bch2_err_matches(ret, ENOENT)) 1714 goto err; 1715 1716 bool backpointer_exists = !ret; 1717 ret = 0; 1718 1719 if (fsck_err_on(!backpointer_exists, 1720 c, inode_wrong_backpointer, 1721 "inode %llu:%u has wrong backpointer:\n" 1722 "got %llu:%llu\n" 1723 "should be %llu:%llu", 1724 target->bi_inum, target_snapshot, 1725 target->bi_dir, 1726 target->bi_dir_offset, 1727 d.k->p.inode, 1728 d.k->p.offset)) { 1729 target->bi_dir = d.k->p.inode; 1730 target->bi_dir_offset = d.k->p.offset; 1731 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1732 goto out; 1733 } 1734 1735 bch2_bkey_val_to_text(&buf, c, d.s_c); 1736 prt_newline(&buf); 1737 if (backpointer_exists) 1738 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c); 1739 1740 if (fsck_err_on(backpointer_exists && 1741 (S_ISDIR(target->bi_mode) || 1742 target->bi_subvol), 1743 c, inode_dir_multiple_links, 1744 "%s %llu:%u with multiple links\n%s", 1745 S_ISDIR(target->bi_mode) ? "directory" : "subvolume", 1746 target->bi_inum, target_snapshot, buf.buf)) { 1747 ret = __remove_dirent(trans, d.k->p); 1748 goto out; 1749 } 1750 1751 /* 1752 * hardlinked file with nlink 0: 1753 * We're just adjusting nlink here so check_nlinks() will pick 1754 * it up, it ignores inodes with nlink 0 1755 */ 1756 if (fsck_err_on(backpointer_exists && !target->bi_nlink, 1757 c, inode_multiple_links_but_nlink_0, 1758 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", 1759 target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { 1760 target->bi_nlink++; 1761 target->bi_flags &= ~BCH_INODE_unlinked; 1762 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1763 if (ret) 1764 goto err; 1765 } 1766 out: 1767 err: 1768 fsck_err: 1769 bch2_trans_iter_exit(trans, &bp_iter); 1770 out_noiter: 1771 printbuf_exit(&buf); 1772 bch_err_fn(c, ret); 1773 return ret; 1774 } 1775 1776 static int check_dirent_target(struct btree_trans *trans, 1777 struct btree_iter *iter, 1778 struct bkey_s_c_dirent d, 1779 struct bch_inode_unpacked *target, 1780 u32 target_snapshot) 1781 { 1782 struct bch_fs *c = trans->c; 1783 struct bkey_i_dirent *n; 1784 struct printbuf buf = PRINTBUF; 1785 int ret = 0; 1786 1787 ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); 1788 if (ret) 1789 goto err; 1790 1791 if (fsck_err_on(d.v->d_type != inode_d_type(target), 1792 c, dirent_d_type_wrong, 1793 "incorrect d_type: got %s, should be %s:\n%s", 1794 bch2_d_type_str(d.v->d_type), 1795 bch2_d_type_str(inode_d_type(target)), 1796 (printbuf_reset(&buf), 1797 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1798 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); 1799 ret = PTR_ERR_OR_ZERO(n); 1800 if (ret) 1801 goto err; 1802 1803 bkey_reassemble(&n->k_i, d.s_c); 1804 n->v.d_type = inode_d_type(target); 1805 if (n->v.d_type == DT_SUBVOL) { 1806 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); 1807 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol); 1808 } else { 1809 n->v.d_inum = cpu_to_le64(target->bi_inum); 1810 } 1811 1812 ret = bch2_trans_update(trans, iter, &n->k_i, 0); 1813 if (ret) 1814 goto err; 1815 1816 d = dirent_i_to_s_c(n); 1817 } 1818 err: 1819 fsck_err: 1820 printbuf_exit(&buf); 1821 bch_err_fn(c, ret); 1822 return ret; 1823 } 1824 1825 /* find a subvolume that's a descendent of @snapshot: */ 1826 static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid) 1827 { 1828 struct btree_iter iter; 1829 struct bkey_s_c k; 1830 int ret; 1831 1832 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { 1833 if (k.k->type != KEY_TYPE_subvolume) 1834 continue; 1835 1836 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 1837 if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) { 1838 bch2_trans_iter_exit(trans, &iter); 1839 *subvolid = k.k->p.offset; 1840 goto found; 1841 } 1842 } 1843 if (!ret) 1844 ret = -ENOENT; 1845 found: 1846 bch2_trans_iter_exit(trans, &iter); 1847 return ret; 1848 } 1849 1850 static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter, 1851 struct bkey_s_c_dirent d) 1852 { 1853 struct bch_fs *c = trans->c; 1854 struct btree_iter subvol_iter = {}; 1855 struct bch_inode_unpacked subvol_root; 1856 u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); 1857 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); 1858 u32 parent_snapshot; 1859 u32 new_parent_subvol = 0; 1860 u64 parent_inum; 1861 struct printbuf buf = PRINTBUF; 1862 int ret = 0; 1863 1864 ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); 1865 if (ret && !bch2_err_matches(ret, ENOENT)) 1866 return ret; 1867 1868 if (ret || 1869 (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) { 1870 int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); 1871 if (ret2 && !bch2_err_matches(ret, ENOENT)) 1872 return ret2; 1873 } 1874 1875 if (ret && 1876 !new_parent_subvol && 1877 (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1878 /* 1879 * Couldn't find a subvol for dirent's snapshot - but we lost 1880 * subvols, so we need to reconstruct: 1881 */ 1882 ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0); 1883 if (ret) 1884 return ret; 1885 1886 parent_snapshot = d.k->p.snapshot; 1887 } 1888 1889 if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol, 1890 "dirent parent_subvol points to missing subvolume\n%s", 1891 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || 1892 fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot), 1893 c, dirent_not_visible_in_parent_subvol, 1894 "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", 1895 parent_snapshot, 1896 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1897 if (!new_parent_subvol) { 1898 bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot); 1899 return -BCH_ERR_fsck_repair_unimplemented; 1900 } 1901 1902 struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); 1903 ret = PTR_ERR_OR_ZERO(new_dirent); 1904 if (ret) 1905 goto err; 1906 1907 new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol); 1908 } 1909 1910 struct bkey_s_c_subvolume s = 1911 bch2_bkey_get_iter_typed(trans, &subvol_iter, 1912 BTREE_ID_subvolumes, POS(0, target_subvol), 1913 0, subvolume); 1914 ret = bkey_err(s.s_c); 1915 if (ret && !bch2_err_matches(ret, ENOENT)) 1916 return ret; 1917 1918 if (ret) { 1919 if (fsck_err(c, dirent_to_missing_subvol, 1920 "dirent points to missing subvolume\n%s", 1921 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) 1922 return __remove_dirent(trans, d.k->p); 1923 ret = 0; 1924 goto out; 1925 } 1926 1927 if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol, 1928 c, subvol_fs_path_parent_wrong, 1929 "subvol with wrong fs_path_parent, should be be %u\n%s", 1930 parent_subvol, 1931 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { 1932 struct bkey_i_subvolume *n = 1933 bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume); 1934 ret = PTR_ERR_OR_ZERO(n); 1935 if (ret) 1936 goto err; 1937 1938 n->v.fs_path_parent = cpu_to_le32(parent_subvol); 1939 } 1940 1941 u64 target_inum = le64_to_cpu(s.v->inode); 1942 u32 target_snapshot = le32_to_cpu(s.v->snapshot); 1943 1944 ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); 1945 if (ret && !bch2_err_matches(ret, ENOENT)) 1946 goto err; 1947 1948 if (ret) { 1949 bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); 1950 ret = -BCH_ERR_fsck_repair_unimplemented; 1951 ret = 0; 1952 goto err; 1953 } 1954 1955 if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol, 1956 c, inode_bi_parent_wrong, 1957 "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", 1958 target_inum, 1959 subvol_root.bi_parent_subvol, parent_subvol)) { 1960 subvol_root.bi_parent_subvol = parent_subvol; 1961 ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); 1962 if (ret) 1963 goto err; 1964 } 1965 1966 ret = check_dirent_target(trans, iter, d, &subvol_root, 1967 target_snapshot); 1968 if (ret) 1969 goto err; 1970 out: 1971 err: 1972 fsck_err: 1973 bch2_trans_iter_exit(trans, &subvol_iter); 1974 printbuf_exit(&buf); 1975 return ret; 1976 } 1977 1978 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, 1979 struct bkey_s_c k, 1980 struct bch_hash_info *hash_info, 1981 struct inode_walker *dir, 1982 struct inode_walker *target, 1983 struct snapshots_seen *s) 1984 { 1985 struct bch_fs *c = trans->c; 1986 struct inode_walker_entry *i; 1987 struct printbuf buf = PRINTBUF; 1988 int ret = 0; 1989 1990 ret = bch2_check_key_has_snapshot(trans, iter, k); 1991 if (ret) { 1992 ret = ret < 0 ? ret : 0; 1993 goto out; 1994 } 1995 1996 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1997 if (ret) 1998 goto err; 1999 2000 if (k.k->type == KEY_TYPE_whiteout) 2001 goto out; 2002 2003 if (dir->last_pos.inode != k.k->p.inode) { 2004 ret = check_subdir_count(trans, dir); 2005 if (ret) 2006 goto err; 2007 } 2008 2009 BUG_ON(!btree_iter_path(trans, iter)->should_be_locked); 2010 2011 i = walk_inode(trans, dir, k); 2012 ret = PTR_ERR_OR_ZERO(i); 2013 if (ret < 0) 2014 goto err; 2015 2016 if (dir->first_this_inode && dir->inodes.nr) 2017 *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); 2018 dir->first_this_inode = false; 2019 2020 if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { 2021 ret = reconstruct_inode(trans, k.k->p.snapshot, k.k->p.inode, 0, S_IFDIR) ?: 2022 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 2023 if (ret) 2024 goto err; 2025 2026 dir->last_pos.inode--; 2027 ret = -BCH_ERR_transaction_restart_nested; 2028 goto err; 2029 } 2030 2031 if (fsck_err_on(!i, c, dirent_in_missing_dir_inode, 2032 "dirent in nonexisting directory:\n%s", 2033 (printbuf_reset(&buf), 2034 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 2035 ret = bch2_btree_delete_at(trans, iter, 2036 BTREE_UPDATE_internal_snapshot_node); 2037 goto out; 2038 } 2039 2040 if (!i) 2041 goto out; 2042 2043 if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), 2044 c, dirent_in_non_dir_inode, 2045 "dirent in non directory inode type %s:\n%s", 2046 bch2_d_type_str(inode_d_type(&i->inode)), 2047 (printbuf_reset(&buf), 2048 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 2049 ret = bch2_btree_delete_at(trans, iter, 0); 2050 goto out; 2051 } 2052 2053 ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); 2054 if (ret < 0) 2055 goto err; 2056 if (ret) { 2057 /* dirent has been deleted */ 2058 ret = 0; 2059 goto out; 2060 } 2061 2062 if (k.k->type != KEY_TYPE_dirent) 2063 goto out; 2064 2065 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2066 2067 if (d.v->d_type == DT_SUBVOL) { 2068 ret = check_dirent_to_subvol(trans, iter, d); 2069 if (ret) 2070 goto err; 2071 } else { 2072 ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); 2073 if (ret) 2074 goto err; 2075 2076 if (fsck_err_on(!target->inodes.nr, 2077 c, dirent_to_missing_inode, 2078 "dirent points to missing inode:\n%s", 2079 (printbuf_reset(&buf), 2080 bch2_bkey_val_to_text(&buf, c, k), 2081 buf.buf))) { 2082 ret = __remove_dirent(trans, d.k->p); 2083 if (ret) 2084 goto err; 2085 } 2086 2087 darray_for_each(target->inodes, i) { 2088 ret = check_dirent_target(trans, iter, d, 2089 &i->inode, i->snapshot); 2090 if (ret) 2091 goto err; 2092 } 2093 2094 if (d.v->d_type == DT_DIR) 2095 for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) 2096 i->count++; 2097 } 2098 out: 2099 err: 2100 fsck_err: 2101 printbuf_exit(&buf); 2102 bch_err_fn(c, ret); 2103 return ret; 2104 } 2105 2106 /* 2107 * Walk dirents: verify that they all have a corresponding S_ISDIR inode, 2108 * validate d_type 2109 */ 2110 int bch2_check_dirents(struct bch_fs *c) 2111 { 2112 struct inode_walker dir = inode_walker_init(); 2113 struct inode_walker target = inode_walker_init(); 2114 struct snapshots_seen s; 2115 struct bch_hash_info hash_info; 2116 2117 snapshots_seen_init(&s); 2118 2119 int ret = bch2_trans_run(c, 2120 for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, 2121 POS(BCACHEFS_ROOT_INO, 0), 2122 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2123 k, 2124 NULL, NULL, 2125 BCH_TRANS_COMMIT_no_enospc, 2126 check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: 2127 check_subdir_count_notnested(trans, &dir)); 2128 2129 snapshots_seen_exit(&s); 2130 inode_walker_exit(&dir); 2131 inode_walker_exit(&target); 2132 bch_err_fn(c, ret); 2133 return ret; 2134 } 2135 2136 static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, 2137 struct bkey_s_c k, 2138 struct bch_hash_info *hash_info, 2139 struct inode_walker *inode) 2140 { 2141 struct bch_fs *c = trans->c; 2142 struct inode_walker_entry *i; 2143 int ret; 2144 2145 ret = bch2_check_key_has_snapshot(trans, iter, k); 2146 if (ret < 0) 2147 return ret; 2148 if (ret) 2149 return 0; 2150 2151 i = walk_inode(trans, inode, k); 2152 ret = PTR_ERR_OR_ZERO(i); 2153 if (ret) 2154 return ret; 2155 2156 if (inode->first_this_inode && inode->inodes.nr) 2157 *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode); 2158 inode->first_this_inode = false; 2159 2160 if (fsck_err_on(!i, c, xattr_in_missing_inode, 2161 "xattr for missing inode %llu", 2162 k.k->p.inode)) 2163 return bch2_btree_delete_at(trans, iter, 0); 2164 2165 if (!i) 2166 return 0; 2167 2168 ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); 2169 fsck_err: 2170 bch_err_fn(c, ret); 2171 return ret; 2172 } 2173 2174 /* 2175 * Walk xattrs: verify that they all have a corresponding inode 2176 */ 2177 int bch2_check_xattrs(struct bch_fs *c) 2178 { 2179 struct inode_walker inode = inode_walker_init(); 2180 struct bch_hash_info hash_info; 2181 int ret = 0; 2182 2183 ret = bch2_trans_run(c, 2184 for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, 2185 POS(BCACHEFS_ROOT_INO, 0), 2186 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2187 k, 2188 NULL, NULL, 2189 BCH_TRANS_COMMIT_no_enospc, 2190 check_xattr(trans, &iter, k, &hash_info, &inode))); 2191 bch_err_fn(c, ret); 2192 return ret; 2193 } 2194 2195 static int check_root_trans(struct btree_trans *trans) 2196 { 2197 struct bch_fs *c = trans->c; 2198 struct bch_inode_unpacked root_inode; 2199 u32 snapshot; 2200 u64 inum; 2201 int ret; 2202 2203 ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); 2204 if (ret && !bch2_err_matches(ret, ENOENT)) 2205 return ret; 2206 2207 if (mustfix_fsck_err_on(ret, c, root_subvol_missing, 2208 "root subvol missing")) { 2209 struct bkey_i_subvolume *root_subvol = 2210 bch2_trans_kmalloc(trans, sizeof(*root_subvol)); 2211 ret = PTR_ERR_OR_ZERO(root_subvol); 2212 if (ret) 2213 goto err; 2214 2215 snapshot = U32_MAX; 2216 inum = BCACHEFS_ROOT_INO; 2217 2218 bkey_subvolume_init(&root_subvol->k_i); 2219 root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL; 2220 root_subvol->v.flags = 0; 2221 root_subvol->v.snapshot = cpu_to_le32(snapshot); 2222 root_subvol->v.inode = cpu_to_le64(inum); 2223 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0); 2224 bch_err_msg(c, ret, "writing root subvol"); 2225 if (ret) 2226 goto err; 2227 } 2228 2229 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); 2230 if (ret && !bch2_err_matches(ret, ENOENT)) 2231 return ret; 2232 2233 if (mustfix_fsck_err_on(ret, c, root_dir_missing, 2234 "root directory missing") || 2235 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), 2236 c, root_inode_not_dir, 2237 "root inode not a directory")) { 2238 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 2239 0, NULL); 2240 root_inode.bi_inum = inum; 2241 2242 ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); 2243 bch_err_msg(c, ret, "writing root inode"); 2244 } 2245 err: 2246 fsck_err: 2247 return ret; 2248 } 2249 2250 /* Get root directory, create if it doesn't exist: */ 2251 int bch2_check_root(struct bch_fs *c) 2252 { 2253 int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2254 check_root_trans(trans)); 2255 bch_err_fn(c, ret); 2256 return ret; 2257 } 2258 2259 typedef DARRAY(u32) darray_u32; 2260 2261 static bool darray_u32_has(darray_u32 *d, u32 v) 2262 { 2263 darray_for_each(*d, i) 2264 if (*i == v) 2265 return true; 2266 return false; 2267 } 2268 2269 /* 2270 * We've checked that inode backpointers point to valid dirents; here, it's 2271 * sufficient to check that the subvolume root has a dirent: 2272 */ 2273 static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s) 2274 { 2275 struct bch_inode_unpacked inode; 2276 int ret = bch2_inode_find_by_inum_trans(trans, 2277 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 2278 &inode); 2279 if (ret) 2280 return ret; 2281 2282 return inode.bi_dir != 0; 2283 } 2284 2285 static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) 2286 { 2287 struct bch_fs *c = trans->c; 2288 struct btree_iter parent_iter = {}; 2289 darray_u32 subvol_path = {}; 2290 struct printbuf buf = PRINTBUF; 2291 int ret = 0; 2292 2293 if (k.k->type != KEY_TYPE_subvolume) 2294 return 0; 2295 2296 while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { 2297 ret = darray_push(&subvol_path, k.k->p.offset); 2298 if (ret) 2299 goto err; 2300 2301 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 2302 2303 ret = subvol_has_dirent(trans, s); 2304 if (ret < 0) 2305 break; 2306 2307 if (fsck_err_on(!ret, 2308 c, subvol_unreachable, 2309 "unreachable subvolume %s", 2310 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2311 buf.buf))) { 2312 ret = reattach_subvol(trans, s); 2313 break; 2314 } 2315 2316 u32 parent = le32_to_cpu(s.v->fs_path_parent); 2317 2318 if (darray_u32_has(&subvol_path, parent)) { 2319 if (fsck_err(c, subvol_loop, "subvolume loop")) 2320 ret = reattach_subvol(trans, s); 2321 break; 2322 } 2323 2324 bch2_trans_iter_exit(trans, &parent_iter); 2325 bch2_trans_iter_init(trans, &parent_iter, 2326 BTREE_ID_subvolumes, POS(0, parent), 0); 2327 k = bch2_btree_iter_peek_slot(&parent_iter); 2328 ret = bkey_err(k); 2329 if (ret) 2330 goto err; 2331 2332 if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, 2333 c, subvol_unreachable, 2334 "unreachable subvolume %s", 2335 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2336 buf.buf))) { 2337 ret = reattach_subvol(trans, s); 2338 break; 2339 } 2340 } 2341 fsck_err: 2342 err: 2343 printbuf_exit(&buf); 2344 darray_exit(&subvol_path); 2345 bch2_trans_iter_exit(trans, &parent_iter); 2346 return ret; 2347 } 2348 2349 int bch2_check_subvolume_structure(struct bch_fs *c) 2350 { 2351 int ret = bch2_trans_run(c, 2352 for_each_btree_key_commit(trans, iter, 2353 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, 2354 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2355 check_subvol_path(trans, &iter, k))); 2356 bch_err_fn(c, ret); 2357 return ret; 2358 } 2359 2360 struct pathbuf_entry { 2361 u64 inum; 2362 u32 snapshot; 2363 }; 2364 2365 typedef DARRAY(struct pathbuf_entry) pathbuf; 2366 2367 static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) 2368 { 2369 darray_for_each(*p, i) 2370 if (i->inum == inum && 2371 i->snapshot == snapshot) 2372 return true; 2373 return false; 2374 } 2375 2376 /* 2377 * Check that a given inode is reachable from its subvolume root - we already 2378 * verified subvolume connectivity: 2379 * 2380 * XXX: we should also be verifying that inodes are in the right subvolumes 2381 */ 2382 static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) 2383 { 2384 struct bch_fs *c = trans->c; 2385 struct btree_iter inode_iter = {}; 2386 struct bch_inode_unpacked inode; 2387 struct printbuf buf = PRINTBUF; 2388 u32 snapshot = inode_k.k->p.snapshot; 2389 int ret = 0; 2390 2391 p->nr = 0; 2392 2393 BUG_ON(bch2_inode_unpack(inode_k, &inode)); 2394 2395 while (!inode.bi_subvol) { 2396 struct btree_iter dirent_iter; 2397 struct bkey_s_c_dirent d; 2398 u32 parent_snapshot = snapshot; 2399 2400 d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); 2401 ret = bkey_err(d.s_c); 2402 if (ret && !bch2_err_matches(ret, ENOENT)) 2403 break; 2404 2405 if (!ret && !dirent_points_to_inode(d, &inode)) { 2406 bch2_trans_iter_exit(trans, &dirent_iter); 2407 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 2408 } 2409 2410 if (bch2_err_matches(ret, ENOENT)) { 2411 ret = 0; 2412 if (fsck_err(c, inode_unreachable, 2413 "unreachable inode\n%s", 2414 (printbuf_reset(&buf), 2415 bch2_bkey_val_to_text(&buf, c, inode_k), 2416 buf.buf))) 2417 ret = reattach_inode(trans, &inode, snapshot); 2418 goto out; 2419 } 2420 2421 bch2_trans_iter_exit(trans, &dirent_iter); 2422 2423 if (!S_ISDIR(inode.bi_mode)) 2424 break; 2425 2426 ret = darray_push(p, ((struct pathbuf_entry) { 2427 .inum = inode.bi_inum, 2428 .snapshot = snapshot, 2429 })); 2430 if (ret) 2431 return ret; 2432 2433 snapshot = parent_snapshot; 2434 2435 bch2_trans_iter_exit(trans, &inode_iter); 2436 inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, 2437 SPOS(0, inode.bi_dir, snapshot), 0); 2438 ret = bkey_err(inode_k) ?: 2439 !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode 2440 : bch2_inode_unpack(inode_k, &inode); 2441 if (ret) { 2442 /* Should have been caught in dirents pass */ 2443 if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2444 bch_err(c, "error looking up parent directory: %i", ret); 2445 break; 2446 } 2447 2448 snapshot = inode_k.k->p.snapshot; 2449 2450 if (path_is_dup(p, inode.bi_inum, snapshot)) { 2451 /* XXX print path */ 2452 bch_err(c, "directory structure loop"); 2453 2454 darray_for_each(*p, i) 2455 pr_err("%llu:%u", i->inum, i->snapshot); 2456 pr_err("%llu:%u", inode.bi_inum, snapshot); 2457 2458 if (fsck_err(c, dir_loop, "directory structure loop")) { 2459 ret = remove_backpointer(trans, &inode); 2460 bch_err_msg(c, ret, "removing dirent"); 2461 if (ret) 2462 break; 2463 2464 ret = reattach_inode(trans, &inode, snapshot); 2465 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 2466 } 2467 break; 2468 } 2469 } 2470 out: 2471 fsck_err: 2472 bch2_trans_iter_exit(trans, &inode_iter); 2473 printbuf_exit(&buf); 2474 bch_err_fn(c, ret); 2475 return ret; 2476 } 2477 2478 /* 2479 * Check for unreachable inodes, as well as loops in the directory structure: 2480 * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's 2481 * unreachable: 2482 */ 2483 int bch2_check_directory_structure(struct bch_fs *c) 2484 { 2485 pathbuf path = { 0, }; 2486 int ret; 2487 2488 ret = bch2_trans_run(c, 2489 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, 2490 BTREE_ITER_intent| 2491 BTREE_ITER_prefetch| 2492 BTREE_ITER_all_snapshots, k, 2493 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 2494 if (!bkey_is_inode(k.k)) 2495 continue; 2496 2497 if (bch2_inode_flags(k) & BCH_INODE_unlinked) 2498 continue; 2499 2500 check_path(trans, &path, k); 2501 }))); 2502 darray_exit(&path); 2503 2504 bch_err_fn(c, ret); 2505 return ret; 2506 } 2507 2508 struct nlink_table { 2509 size_t nr; 2510 size_t size; 2511 2512 struct nlink { 2513 u64 inum; 2514 u32 snapshot; 2515 u32 count; 2516 } *d; 2517 }; 2518 2519 static int add_nlink(struct bch_fs *c, struct nlink_table *t, 2520 u64 inum, u32 snapshot) 2521 { 2522 if (t->nr == t->size) { 2523 size_t new_size = max_t(size_t, 128UL, t->size * 2); 2524 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); 2525 2526 if (!d) { 2527 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", 2528 new_size); 2529 return -BCH_ERR_ENOMEM_fsck_add_nlink; 2530 } 2531 2532 if (t->d) 2533 memcpy(d, t->d, t->size * sizeof(t->d[0])); 2534 kvfree(t->d); 2535 2536 t->d = d; 2537 t->size = new_size; 2538 } 2539 2540 2541 t->d[t->nr++] = (struct nlink) { 2542 .inum = inum, 2543 .snapshot = snapshot, 2544 }; 2545 2546 return 0; 2547 } 2548 2549 static int nlink_cmp(const void *_l, const void *_r) 2550 { 2551 const struct nlink *l = _l; 2552 const struct nlink *r = _r; 2553 2554 return cmp_int(l->inum, r->inum); 2555 } 2556 2557 static void inc_link(struct bch_fs *c, struct snapshots_seen *s, 2558 struct nlink_table *links, 2559 u64 range_start, u64 range_end, u64 inum, u32 snapshot) 2560 { 2561 struct nlink *link, key = { 2562 .inum = inum, .snapshot = U32_MAX, 2563 }; 2564 2565 if (inum < range_start || inum >= range_end) 2566 return; 2567 2568 link = __inline_bsearch(&key, links->d, links->nr, 2569 sizeof(links->d[0]), nlink_cmp); 2570 if (!link) 2571 return; 2572 2573 while (link > links->d && link[0].inum == link[-1].inum) 2574 --link; 2575 2576 for (; link < links->d + links->nr && link->inum == inum; link++) 2577 if (ref_visible(c, s, snapshot, link->snapshot)) { 2578 link->count++; 2579 if (link->snapshot >= snapshot) 2580 break; 2581 } 2582 } 2583 2584 noinline_for_stack 2585 static int check_nlinks_find_hardlinks(struct bch_fs *c, 2586 struct nlink_table *t, 2587 u64 start, u64 *end) 2588 { 2589 int ret = bch2_trans_run(c, 2590 for_each_btree_key(trans, iter, BTREE_ID_inodes, 2591 POS(0, start), 2592 BTREE_ITER_intent| 2593 BTREE_ITER_prefetch| 2594 BTREE_ITER_all_snapshots, k, ({ 2595 if (!bkey_is_inode(k.k)) 2596 continue; 2597 2598 /* Should never fail, checked by bch2_inode_invalid: */ 2599 struct bch_inode_unpacked u; 2600 BUG_ON(bch2_inode_unpack(k, &u)); 2601 2602 /* 2603 * Backpointer and directory structure checks are sufficient for 2604 * directories, since they can't have hardlinks: 2605 */ 2606 if (S_ISDIR(u.bi_mode)) 2607 continue; 2608 2609 if (!u.bi_nlink) 2610 continue; 2611 2612 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); 2613 if (ret) { 2614 *end = k.k->p.offset; 2615 ret = 0; 2616 break; 2617 } 2618 0; 2619 }))); 2620 2621 bch_err_fn(c, ret); 2622 return ret; 2623 } 2624 2625 noinline_for_stack 2626 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, 2627 u64 range_start, u64 range_end) 2628 { 2629 struct snapshots_seen s; 2630 2631 snapshots_seen_init(&s); 2632 2633 int ret = bch2_trans_run(c, 2634 for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, 2635 BTREE_ITER_intent| 2636 BTREE_ITER_prefetch| 2637 BTREE_ITER_all_snapshots, k, ({ 2638 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); 2639 if (ret) 2640 break; 2641 2642 if (k.k->type == KEY_TYPE_dirent) { 2643 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2644 2645 if (d.v->d_type != DT_DIR && 2646 d.v->d_type != DT_SUBVOL) 2647 inc_link(c, &s, links, range_start, range_end, 2648 le64_to_cpu(d.v->d_inum), d.k->p.snapshot); 2649 } 2650 0; 2651 }))); 2652 2653 snapshots_seen_exit(&s); 2654 2655 bch_err_fn(c, ret); 2656 return ret; 2657 } 2658 2659 static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, 2660 struct bkey_s_c k, 2661 struct nlink_table *links, 2662 size_t *idx, u64 range_end) 2663 { 2664 struct bch_fs *c = trans->c; 2665 struct bch_inode_unpacked u; 2666 struct nlink *link = &links->d[*idx]; 2667 int ret = 0; 2668 2669 if (k.k->p.offset >= range_end) 2670 return 1; 2671 2672 if (!bkey_is_inode(k.k)) 2673 return 0; 2674 2675 BUG_ON(bch2_inode_unpack(k, &u)); 2676 2677 if (S_ISDIR(u.bi_mode)) 2678 return 0; 2679 2680 if (!u.bi_nlink) 2681 return 0; 2682 2683 while ((cmp_int(link->inum, k.k->p.offset) ?: 2684 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { 2685 BUG_ON(*idx == links->nr); 2686 link = &links->d[++*idx]; 2687 } 2688 2689 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, 2690 c, inode_wrong_nlink, 2691 "inode %llu type %s has wrong i_nlink (%u, should be %u)", 2692 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], 2693 bch2_inode_nlink_get(&u), link->count)) { 2694 bch2_inode_nlink_set(&u, link->count); 2695 ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); 2696 } 2697 fsck_err: 2698 return ret; 2699 } 2700 2701 noinline_for_stack 2702 static int check_nlinks_update_hardlinks(struct bch_fs *c, 2703 struct nlink_table *links, 2704 u64 range_start, u64 range_end) 2705 { 2706 size_t idx = 0; 2707 2708 int ret = bch2_trans_run(c, 2709 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 2710 POS(0, range_start), 2711 BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 2712 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2713 check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); 2714 if (ret < 0) { 2715 bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); 2716 return ret; 2717 } 2718 2719 return 0; 2720 } 2721 2722 int bch2_check_nlinks(struct bch_fs *c) 2723 { 2724 struct nlink_table links = { 0 }; 2725 u64 this_iter_range_start, next_iter_range_start = 0; 2726 int ret = 0; 2727 2728 do { 2729 this_iter_range_start = next_iter_range_start; 2730 next_iter_range_start = U64_MAX; 2731 2732 ret = check_nlinks_find_hardlinks(c, &links, 2733 this_iter_range_start, 2734 &next_iter_range_start); 2735 2736 ret = check_nlinks_walk_dirents(c, &links, 2737 this_iter_range_start, 2738 next_iter_range_start); 2739 if (ret) 2740 break; 2741 2742 ret = check_nlinks_update_hardlinks(c, &links, 2743 this_iter_range_start, 2744 next_iter_range_start); 2745 if (ret) 2746 break; 2747 2748 links.nr = 0; 2749 } while (next_iter_range_start != U64_MAX); 2750 2751 kvfree(links.d); 2752 bch_err_fn(c, ret); 2753 return ret; 2754 } 2755 2756 static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, 2757 struct bkey_s_c k) 2758 { 2759 struct bkey_s_c_reflink_p p; 2760 struct bkey_i_reflink_p *u; 2761 2762 if (k.k->type != KEY_TYPE_reflink_p) 2763 return 0; 2764 2765 p = bkey_s_c_to_reflink_p(k); 2766 2767 if (!p.v->front_pad && !p.v->back_pad) 2768 return 0; 2769 2770 u = bch2_trans_kmalloc(trans, sizeof(*u)); 2771 int ret = PTR_ERR_OR_ZERO(u); 2772 if (ret) 2773 return ret; 2774 2775 bkey_reassemble(&u->k_i, k); 2776 u->v.front_pad = 0; 2777 u->v.back_pad = 0; 2778 2779 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_norun); 2780 } 2781 2782 int bch2_fix_reflink_p(struct bch_fs *c) 2783 { 2784 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) 2785 return 0; 2786 2787 int ret = bch2_trans_run(c, 2788 for_each_btree_key_commit(trans, iter, 2789 BTREE_ID_extents, POS_MIN, 2790 BTREE_ITER_intent|BTREE_ITER_prefetch| 2791 BTREE_ITER_all_snapshots, k, 2792 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2793 fix_reflink_p_key(trans, &iter, k))); 2794 bch_err_fn(c, ret); 2795 return ret; 2796 } 2797