1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bkey_buf.h" 5 #include "btree_cache.h" 6 #include "btree_update.h" 7 #include "buckets.h" 8 #include "darray.h" 9 #include "dirent.h" 10 #include "error.h" 11 #include "fs-common.h" 12 #include "fsck.h" 13 #include "inode.h" 14 #include "keylist.h" 15 #include "recovery_passes.h" 16 #include "snapshot.h" 17 #include "super.h" 18 #include "xattr.h" 19 20 #include <linux/bsearch.h> 21 #include <linux/dcache.h> /* struct qstr */ 22 23 /* 24 * XXX: this is handling transaction restarts without returning 25 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: 26 */ 27 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, 28 u32 snapshot) 29 { 30 u64 sectors = 0; 31 32 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, 33 SPOS(inum, 0, snapshot), 34 POS(inum, U64_MAX), 35 0, k, ({ 36 if (bkey_extent_is_allocation(k.k)) 37 sectors += k.k->size; 38 0; 39 })); 40 41 return ret ?: sectors; 42 } 43 44 static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, 45 u32 snapshot) 46 { 47 u64 subdirs = 0; 48 49 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, 50 SPOS(inum, 0, snapshot), 51 POS(inum, U64_MAX), 52 0, k, ({ 53 if (k.k->type == KEY_TYPE_dirent && 54 bkey_s_c_to_dirent(k).v->d_type == DT_DIR) 55 subdirs++; 56 0; 57 })); 58 59 return ret ?: subdirs; 60 } 61 62 static int subvol_lookup(struct btree_trans *trans, u32 subvol, 63 u32 *snapshot, u64 *inum) 64 { 65 struct bch_subvolume s; 66 int ret = bch2_subvolume_get(trans, subvol, false, 0, &s); 67 68 *snapshot = le32_to_cpu(s.snapshot); 69 *inum = le64_to_cpu(s.inode); 70 return ret; 71 } 72 73 static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, 74 struct bch_inode_unpacked *inode) 75 { 76 struct btree_iter iter; 77 struct bkey_s_c k; 78 int ret; 79 80 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, 81 POS(0, inode_nr), 82 BTREE_ITER_all_snapshots); 83 k = bch2_btree_iter_peek(&iter); 84 ret = bkey_err(k); 85 if (ret) 86 goto err; 87 88 if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) { 89 ret = -BCH_ERR_ENOENT_inode; 90 goto err; 91 } 92 93 ret = bch2_inode_unpack(k, inode); 94 err: 95 bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); 96 bch2_trans_iter_exit(trans, &iter); 97 return ret; 98 } 99 100 static int lookup_inode(struct btree_trans *trans, u64 inode_nr, 101 struct bch_inode_unpacked *inode, 102 u32 *snapshot) 103 { 104 struct btree_iter iter; 105 struct bkey_s_c k; 106 int ret; 107 108 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 109 SPOS(0, inode_nr, *snapshot), 0); 110 ret = bkey_err(k); 111 if (ret) 112 goto err; 113 114 ret = bkey_is_inode(k.k) 115 ? bch2_inode_unpack(k, inode) 116 : -BCH_ERR_ENOENT_inode; 117 if (!ret) 118 *snapshot = iter.pos.snapshot; 119 err: 120 bch2_trans_iter_exit(trans, &iter); 121 return ret; 122 } 123 124 static int lookup_dirent_in_snapshot(struct btree_trans *trans, 125 struct bch_hash_info hash_info, 126 subvol_inum dir, struct qstr *name, 127 u64 *target, unsigned *type, u32 snapshot) 128 { 129 struct btree_iter iter; 130 struct bkey_s_c k = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, 131 &hash_info, dir, name, 0, snapshot); 132 int ret = bkey_err(k); 133 if (ret) 134 return ret; 135 136 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); 137 *target = le64_to_cpu(d.v->d_inum); 138 *type = d.v->d_type; 139 bch2_trans_iter_exit(trans, &iter); 140 return 0; 141 } 142 143 static int __remove_dirent(struct btree_trans *trans, struct bpos pos) 144 { 145 struct bch_fs *c = trans->c; 146 struct btree_iter iter; 147 struct bch_inode_unpacked dir_inode; 148 struct bch_hash_info dir_hash_info; 149 int ret; 150 151 ret = lookup_first_inode(trans, pos.inode, &dir_inode); 152 if (ret) 153 goto err; 154 155 dir_hash_info = bch2_hash_info_init(c, &dir_inode); 156 157 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent); 158 159 ret = bch2_btree_iter_traverse(&iter) ?: 160 bch2_hash_delete_at(trans, bch2_dirent_hash_desc, 161 &dir_hash_info, &iter, 162 BTREE_UPDATE_internal_snapshot_node); 163 bch2_trans_iter_exit(trans, &iter); 164 err: 165 bch_err_fn(c, ret); 166 return ret; 167 } 168 169 /* Get lost+found, create if it doesn't exist: */ 170 static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, 171 struct bch_inode_unpacked *lostfound, 172 u64 reattaching_inum) 173 { 174 struct bch_fs *c = trans->c; 175 struct qstr lostfound_str = QSTR("lost+found"); 176 u64 inum = 0; 177 unsigned d_type = 0; 178 int ret; 179 180 struct bch_snapshot_tree st; 181 ret = bch2_snapshot_tree_lookup(trans, 182 bch2_snapshot_tree(c, snapshot), &st); 183 if (ret) 184 return ret; 185 186 subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; 187 188 struct bch_subvolume subvol; 189 ret = bch2_subvolume_get(trans, le32_to_cpu(st.master_subvol), 190 false, 0, &subvol); 191 bch_err_msg(c, ret, "looking up root subvol %u for snapshot %u", 192 le32_to_cpu(st.master_subvol), snapshot); 193 if (ret) 194 return ret; 195 196 if (!subvol.inode) { 197 struct btree_iter iter; 198 struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, 199 BTREE_ID_subvolumes, POS(0, le32_to_cpu(st.master_subvol)), 200 0, subvolume); 201 ret = PTR_ERR_OR_ZERO(subvol); 202 if (ret) 203 return ret; 204 205 subvol->v.inode = cpu_to_le64(reattaching_inum); 206 bch2_trans_iter_exit(trans, &iter); 207 } 208 209 root_inum.inum = le64_to_cpu(subvol.inode); 210 211 struct bch_inode_unpacked root_inode; 212 struct bch_hash_info root_hash_info; 213 u32 root_inode_snapshot = snapshot; 214 ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); 215 bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", 216 root_inum.inum, le32_to_cpu(st.master_subvol)); 217 if (ret) 218 return ret; 219 220 root_hash_info = bch2_hash_info_init(c, &root_inode); 221 222 ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum, 223 &lostfound_str, &inum, &d_type, snapshot); 224 if (bch2_err_matches(ret, ENOENT)) 225 goto create_lostfound; 226 227 bch_err_fn(c, ret); 228 if (ret) 229 return ret; 230 231 if (d_type != DT_DIR) { 232 bch_err(c, "error looking up lost+found: not a directory"); 233 return -BCH_ERR_ENOENT_not_directory; 234 } 235 236 /* 237 * The bch2_check_dirents pass has already run, dangling dirents 238 * shouldn't exist here: 239 */ 240 ret = lookup_inode(trans, inum, lostfound, &snapshot); 241 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", 242 inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); 243 return ret; 244 245 create_lostfound: 246 /* 247 * XXX: we could have a nicer log message here if we had a nice way to 248 * walk backpointers to print a path 249 */ 250 bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); 251 252 u64 now = bch2_current_time(c); 253 struct btree_iter lostfound_iter = { NULL }; 254 u64 cpu = raw_smp_processor_id(); 255 256 bch2_inode_init_early(c, lostfound); 257 bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); 258 lostfound->bi_dir = root_inode.bi_inum; 259 260 root_inode.bi_nlink++; 261 262 ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); 263 if (ret) 264 goto err; 265 266 bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); 267 ret = bch2_btree_iter_traverse(&lostfound_iter); 268 if (ret) 269 goto err; 270 271 ret = bch2_dirent_create_snapshot(trans, 272 0, root_inode.bi_inum, snapshot, &root_hash_info, 273 mode_to_type(lostfound->bi_mode), 274 &lostfound_str, 275 lostfound->bi_inum, 276 &lostfound->bi_dir_offset, 277 STR_HASH_must_create) ?: 278 bch2_inode_write_flags(trans, &lostfound_iter, lostfound, 279 BTREE_UPDATE_internal_snapshot_node); 280 err: 281 bch_err_msg(c, ret, "creating lost+found"); 282 bch2_trans_iter_exit(trans, &lostfound_iter); 283 return ret; 284 } 285 286 static int reattach_inode(struct btree_trans *trans, 287 struct bch_inode_unpacked *inode, 288 u32 inode_snapshot) 289 { 290 struct bch_hash_info dir_hash; 291 struct bch_inode_unpacked lostfound; 292 char name_buf[20]; 293 struct qstr name; 294 u64 dir_offset = 0; 295 u32 dirent_snapshot = inode_snapshot; 296 int ret; 297 298 if (inode->bi_subvol) { 299 inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; 300 301 u64 root_inum; 302 ret = subvol_lookup(trans, inode->bi_parent_subvol, 303 &dirent_snapshot, &root_inum); 304 if (ret) 305 return ret; 306 307 snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol); 308 } else { 309 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); 310 } 311 312 ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum); 313 if (ret) 314 return ret; 315 316 if (S_ISDIR(inode->bi_mode)) { 317 lostfound.bi_nlink++; 318 319 ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); 320 if (ret) 321 return ret; 322 } 323 324 dir_hash = bch2_hash_info_init(trans->c, &lostfound); 325 326 name = (struct qstr) QSTR(name_buf); 327 328 ret = bch2_dirent_create_snapshot(trans, 329 inode->bi_parent_subvol, lostfound.bi_inum, 330 dirent_snapshot, 331 &dir_hash, 332 inode_d_type(inode), 333 &name, 334 inode->bi_subvol ?: inode->bi_inum, 335 &dir_offset, 336 STR_HASH_must_create); 337 if (ret) 338 return ret; 339 340 inode->bi_dir = lostfound.bi_inum; 341 inode->bi_dir_offset = dir_offset; 342 343 return __bch2_fsck_write_inode(trans, inode, inode_snapshot); 344 } 345 346 static int remove_backpointer(struct btree_trans *trans, 347 struct bch_inode_unpacked *inode) 348 { 349 struct btree_iter iter; 350 struct bkey_s_c_dirent d; 351 int ret; 352 353 d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, 354 POS(inode->bi_dir, inode->bi_dir_offset), 0, 355 dirent); 356 ret = bkey_err(d) ?: 357 __remove_dirent(trans, d.k->p); 358 bch2_trans_iter_exit(trans, &iter); 359 return ret; 360 } 361 362 static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s) 363 { 364 struct bch_fs *c = trans->c; 365 366 struct bch_inode_unpacked inode; 367 int ret = bch2_inode_find_by_inum_trans(trans, 368 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 369 &inode); 370 if (ret) 371 return ret; 372 373 ret = remove_backpointer(trans, &inode); 374 bch_err_msg(c, ret, "removing dirent"); 375 if (ret) 376 return ret; 377 378 ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); 379 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 380 return ret; 381 } 382 383 static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum) 384 { 385 struct bch_fs *c = trans->c; 386 387 if (!bch2_snapshot_is_leaf(c, snapshotid)) { 388 bch_err(c, "need to reconstruct subvol, but have interior node snapshot"); 389 return -BCH_ERR_fsck_repair_unimplemented; 390 } 391 392 /* 393 * If inum isn't set, that means we're being called from check_dirents, 394 * not check_inodes - the root of this subvolume doesn't exist or we 395 * would have found it there: 396 */ 397 if (!inum) { 398 struct btree_iter inode_iter = {}; 399 struct bch_inode_unpacked new_inode; 400 u64 cpu = raw_smp_processor_id(); 401 402 bch2_inode_init_early(c, &new_inode); 403 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL); 404 405 new_inode.bi_subvol = subvolid; 406 407 int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: 408 bch2_btree_iter_traverse(&inode_iter) ?: 409 bch2_inode_write(trans, &inode_iter, &new_inode); 410 bch2_trans_iter_exit(trans, &inode_iter); 411 if (ret) 412 return ret; 413 414 inum = new_inode.bi_inum; 415 } 416 417 bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum); 418 419 struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); 420 int ret = PTR_ERR_OR_ZERO(new_subvol); 421 if (ret) 422 return ret; 423 424 bkey_subvolume_init(&new_subvol->k_i); 425 new_subvol->k.p.offset = subvolid; 426 new_subvol->v.snapshot = cpu_to_le32(snapshotid); 427 new_subvol->v.inode = cpu_to_le64(inum); 428 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0); 429 if (ret) 430 return ret; 431 432 struct btree_iter iter; 433 struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter, 434 BTREE_ID_snapshots, POS(0, snapshotid), 435 0, snapshot); 436 ret = PTR_ERR_OR_ZERO(s); 437 bch_err_msg(c, ret, "getting snapshot %u", snapshotid); 438 if (ret) 439 return ret; 440 441 u32 snapshot_tree = le32_to_cpu(s->v.tree); 442 443 s->v.subvol = cpu_to_le32(subvolid); 444 SET_BCH_SNAPSHOT_SUBVOL(&s->v, true); 445 bch2_trans_iter_exit(trans, &iter); 446 447 struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter, 448 BTREE_ID_snapshot_trees, POS(0, snapshot_tree), 449 0, snapshot_tree); 450 ret = PTR_ERR_OR_ZERO(st); 451 bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree); 452 if (ret) 453 return ret; 454 455 if (!st->v.master_subvol) 456 st->v.master_subvol = cpu_to_le32(subvolid); 457 458 bch2_trans_iter_exit(trans, &iter); 459 return 0; 460 } 461 462 static int reconstruct_inode(struct btree_trans *trans, u32 snapshot, u64 inum, u64 size, unsigned mode) 463 { 464 struct bch_fs *c = trans->c; 465 struct bch_inode_unpacked new_inode; 466 467 bch2_inode_init_early(c, &new_inode); 468 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, mode|0755, 0, NULL); 469 new_inode.bi_size = size; 470 new_inode.bi_inum = inum; 471 472 return __bch2_fsck_write_inode(trans, &new_inode, snapshot); 473 } 474 475 static int reconstruct_reg_inode(struct btree_trans *trans, u32 snapshot, u64 inum) 476 { 477 struct btree_iter iter = {}; 478 479 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); 480 struct bkey_s_c k = bch2_btree_iter_peek_prev(&iter); 481 bch2_trans_iter_exit(trans, &iter); 482 int ret = bkey_err(k); 483 if (ret) 484 return ret; 485 486 return reconstruct_inode(trans, snapshot, inum, k.k->p.offset << 9, S_IFREG); 487 } 488 489 struct snapshots_seen { 490 struct bpos pos; 491 snapshot_id_list ids; 492 }; 493 494 static inline void snapshots_seen_exit(struct snapshots_seen *s) 495 { 496 darray_exit(&s->ids); 497 } 498 499 static inline void snapshots_seen_init(struct snapshots_seen *s) 500 { 501 memset(s, 0, sizeof(*s)); 502 } 503 504 static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) 505 { 506 u32 *i; 507 __darray_for_each(s->ids, i) { 508 if (*i == id) 509 return 0; 510 if (*i > id) 511 break; 512 } 513 514 int ret = darray_insert_item(&s->ids, i - s->ids.data, id); 515 if (ret) 516 bch_err(c, "error reallocating snapshots_seen table (size %zu)", 517 s->ids.size); 518 return ret; 519 } 520 521 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, 522 enum btree_id btree_id, struct bpos pos) 523 { 524 if (!bkey_eq(s->pos, pos)) 525 s->ids.nr = 0; 526 s->pos = pos; 527 528 return snapshot_list_add_nodup(c, &s->ids, pos.snapshot); 529 } 530 531 /** 532 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, 533 * and @ancestor hasn't been overwritten in @seen 534 * 535 * @c: filesystem handle 536 * @seen: list of snapshot ids already seen at current position 537 * @id: descendent snapshot id 538 * @ancestor: ancestor snapshot id 539 * 540 * Returns: whether key in @ancestor snapshot is visible in @id snapshot 541 */ 542 static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, 543 u32 id, u32 ancestor) 544 { 545 ssize_t i; 546 547 EBUG_ON(id > ancestor); 548 549 /* @ancestor should be the snapshot most recently added to @seen */ 550 EBUG_ON(ancestor != seen->pos.snapshot); 551 EBUG_ON(ancestor != darray_last(seen->ids)); 552 553 if (id == ancestor) 554 return true; 555 556 if (!bch2_snapshot_is_ancestor(c, id, ancestor)) 557 return false; 558 559 /* 560 * We know that @id is a descendant of @ancestor, we're checking if 561 * we've seen a key that overwrote @ancestor - i.e. also a descendent of 562 * @ascestor and with @id as a descendent. 563 * 564 * But we already know that we're scanning IDs between @id and @ancestor 565 * numerically, since snapshot ID lists are kept sorted, so if we find 566 * an id that's an ancestor of @id we're done: 567 */ 568 569 for (i = seen->ids.nr - 2; 570 i >= 0 && seen->ids.data[i] >= id; 571 --i) 572 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i])) 573 return false; 574 575 return true; 576 } 577 578 /** 579 * ref_visible - given a key with snapshot id @src that points to a key with 580 * snapshot id @dst, test whether there is some snapshot in which @dst is 581 * visible. 582 * 583 * @c: filesystem handle 584 * @s: list of snapshot IDs already seen at @src 585 * @src: snapshot ID of src key 586 * @dst: snapshot ID of dst key 587 * Returns: true if there is some snapshot in which @dst is visible 588 * 589 * Assumes we're visiting @src keys in natural key order 590 */ 591 static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, 592 u32 src, u32 dst) 593 { 594 return dst <= src 595 ? key_visible_in_snapshot(c, s, dst, src) 596 : bch2_snapshot_is_ancestor(c, src, dst); 597 } 598 599 static int ref_visible2(struct bch_fs *c, 600 u32 src, struct snapshots_seen *src_seen, 601 u32 dst, struct snapshots_seen *dst_seen) 602 { 603 if (dst > src) { 604 swap(dst, src); 605 swap(dst_seen, src_seen); 606 } 607 return key_visible_in_snapshot(c, src_seen, dst, src); 608 } 609 610 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ 611 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ 612 (_i)->snapshot <= (_snapshot); _i++) \ 613 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) 614 615 struct inode_walker_entry { 616 struct bch_inode_unpacked inode; 617 u32 snapshot; 618 bool seen_this_pos; 619 u64 count; 620 }; 621 622 struct inode_walker { 623 bool first_this_inode; 624 bool recalculate_sums; 625 struct bpos last_pos; 626 627 DARRAY(struct inode_walker_entry) inodes; 628 }; 629 630 static void inode_walker_exit(struct inode_walker *w) 631 { 632 darray_exit(&w->inodes); 633 } 634 635 static struct inode_walker inode_walker_init(void) 636 { 637 return (struct inode_walker) { 0, }; 638 } 639 640 static int add_inode(struct bch_fs *c, struct inode_walker *w, 641 struct bkey_s_c inode) 642 { 643 struct bch_inode_unpacked u; 644 645 BUG_ON(bch2_inode_unpack(inode, &u)); 646 647 return darray_push(&w->inodes, ((struct inode_walker_entry) { 648 .inode = u, 649 .snapshot = inode.k->p.snapshot, 650 })); 651 } 652 653 static int get_inodes_all_snapshots(struct btree_trans *trans, 654 struct inode_walker *w, u64 inum) 655 { 656 struct bch_fs *c = trans->c; 657 struct btree_iter iter; 658 struct bkey_s_c k; 659 int ret; 660 661 w->recalculate_sums = false; 662 w->inodes.nr = 0; 663 664 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 665 BTREE_ITER_all_snapshots, k, ret) { 666 if (k.k->p.offset != inum) 667 break; 668 669 if (bkey_is_inode(k.k)) 670 add_inode(c, w, k); 671 } 672 bch2_trans_iter_exit(trans, &iter); 673 674 if (ret) 675 return ret; 676 677 w->first_this_inode = true; 678 return 0; 679 } 680 681 static struct inode_walker_entry * 682 lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k) 683 { 684 bool is_whiteout = k.k->type == KEY_TYPE_whiteout; 685 686 struct inode_walker_entry *i; 687 __darray_for_each(w->inodes, i) 688 if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot)) 689 goto found; 690 691 return NULL; 692 found: 693 BUG_ON(k.k->p.snapshot > i->snapshot); 694 695 if (k.k->p.snapshot != i->snapshot && !is_whiteout) { 696 struct inode_walker_entry new = *i; 697 698 new.snapshot = k.k->p.snapshot; 699 new.count = 0; 700 701 struct printbuf buf = PRINTBUF; 702 bch2_bkey_val_to_text(&buf, c, k); 703 704 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" 705 "unexpected because we should always update the inode when we update a key in that inode\n" 706 "%s", 707 w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf); 708 printbuf_exit(&buf); 709 710 while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot) 711 --i; 712 713 size_t pos = i - w->inodes.data; 714 int ret = darray_insert_item(&w->inodes, pos, new); 715 if (ret) 716 return ERR_PTR(ret); 717 718 i = w->inodes.data + pos; 719 } 720 721 return i; 722 } 723 724 static struct inode_walker_entry *walk_inode(struct btree_trans *trans, 725 struct inode_walker *w, 726 struct bkey_s_c k) 727 { 728 if (w->last_pos.inode != k.k->p.inode) { 729 int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); 730 if (ret) 731 return ERR_PTR(ret); 732 } else if (bkey_cmp(w->last_pos, k.k->p)) { 733 darray_for_each(w->inodes, i) 734 i->seen_this_pos = false; 735 } 736 737 w->last_pos = k.k->p; 738 739 return lookup_inode_for_snapshot(trans->c, w, k); 740 } 741 742 static int get_visible_inodes(struct btree_trans *trans, 743 struct inode_walker *w, 744 struct snapshots_seen *s, 745 u64 inum) 746 { 747 struct bch_fs *c = trans->c; 748 struct btree_iter iter; 749 struct bkey_s_c k; 750 int ret; 751 752 w->inodes.nr = 0; 753 754 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 755 BTREE_ITER_all_snapshots, k, ret) { 756 if (k.k->p.offset != inum) 757 break; 758 759 if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) 760 continue; 761 762 if (bkey_is_inode(k.k)) 763 add_inode(c, w, k); 764 765 if (k.k->p.snapshot >= s->pos.snapshot) 766 break; 767 } 768 bch2_trans_iter_exit(trans, &iter); 769 770 return ret; 771 } 772 773 static int check_key_has_snapshot(struct btree_trans *trans, 774 struct btree_iter *iter, 775 struct bkey_s_c k) 776 { 777 struct bch_fs *c = trans->c; 778 struct printbuf buf = PRINTBUF; 779 int ret = 0; 780 781 if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c, 782 bkey_in_missing_snapshot, 783 "key in missing snapshot: %s", 784 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 785 ret = bch2_btree_delete_at(trans, iter, 786 BTREE_UPDATE_internal_snapshot_node) ?: 1; 787 fsck_err: 788 printbuf_exit(&buf); 789 return ret; 790 } 791 792 static int hash_redo_key(struct btree_trans *trans, 793 const struct bch_hash_desc desc, 794 struct bch_hash_info *hash_info, 795 struct btree_iter *k_iter, struct bkey_s_c k) 796 { 797 struct bkey_i *delete; 798 struct bkey_i *tmp; 799 800 delete = bch2_trans_kmalloc(trans, sizeof(*delete)); 801 if (IS_ERR(delete)) 802 return PTR_ERR(delete); 803 804 tmp = bch2_bkey_make_mut_noupdate(trans, k); 805 if (IS_ERR(tmp)) 806 return PTR_ERR(tmp); 807 808 bkey_init(&delete->k); 809 delete->k.p = k_iter->pos; 810 return bch2_btree_iter_traverse(k_iter) ?: 811 bch2_trans_update(trans, k_iter, delete, 0) ?: 812 bch2_hash_set_in_snapshot(trans, desc, hash_info, 813 (subvol_inum) { 0, k.k->p.inode }, 814 k.k->p.snapshot, tmp, 815 STR_HASH_must_create| 816 BTREE_UPDATE_internal_snapshot_node) ?: 817 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 818 } 819 820 static int hash_check_key(struct btree_trans *trans, 821 const struct bch_hash_desc desc, 822 struct bch_hash_info *hash_info, 823 struct btree_iter *k_iter, struct bkey_s_c hash_k) 824 { 825 struct bch_fs *c = trans->c; 826 struct btree_iter iter = { NULL }; 827 struct printbuf buf = PRINTBUF; 828 struct bkey_s_c k; 829 u64 hash; 830 int ret = 0; 831 832 if (hash_k.k->type != desc.key_type) 833 return 0; 834 835 hash = desc.hash_bkey(hash_info, hash_k); 836 837 if (likely(hash == hash_k.k->p.offset)) 838 return 0; 839 840 if (hash_k.k->p.offset < hash) 841 goto bad_hash; 842 843 for_each_btree_key_norestart(trans, iter, desc.btree_id, 844 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), 845 BTREE_ITER_slots, k, ret) { 846 if (bkey_eq(k.k->p, hash_k.k->p)) 847 break; 848 849 if (fsck_err_on(k.k->type == desc.key_type && 850 !desc.cmp_bkey(k, hash_k), c, 851 hash_table_key_duplicate, 852 "duplicate hash table keys:\n%s", 853 (printbuf_reset(&buf), 854 bch2_bkey_val_to_text(&buf, c, hash_k), 855 buf.buf))) { 856 ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; 857 break; 858 } 859 860 if (bkey_deleted(k.k)) { 861 bch2_trans_iter_exit(trans, &iter); 862 goto bad_hash; 863 } 864 } 865 out: 866 bch2_trans_iter_exit(trans, &iter); 867 printbuf_exit(&buf); 868 return ret; 869 bad_hash: 870 if (fsck_err(c, hash_table_key_wrong_offset, 871 "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", 872 bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, 873 (printbuf_reset(&buf), 874 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { 875 ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); 876 bch_err_fn(c, ret); 877 if (ret) 878 return ret; 879 ret = -BCH_ERR_transaction_restart_nested; 880 } 881 fsck_err: 882 goto out; 883 } 884 885 static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, 886 struct btree_iter *iter, 887 struct bpos pos) 888 { 889 return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); 890 } 891 892 static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, 893 struct btree_iter *iter, 894 struct bch_inode_unpacked *inode, 895 u32 *snapshot) 896 { 897 if (inode->bi_subvol) { 898 u64 inum; 899 int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum); 900 if (ret) 901 return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) }); 902 } 903 904 return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); 905 } 906 907 static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, 908 struct bkey_s_c_dirent d) 909 { 910 return inode->bi_dir == d.k->p.inode && 911 inode->bi_dir_offset == d.k->p.offset; 912 } 913 914 static bool dirent_points_to_inode(struct bkey_s_c_dirent d, 915 struct bch_inode_unpacked *inode) 916 { 917 return d.v->d_type == DT_SUBVOL 918 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol 919 : le64_to_cpu(d.v->d_inum) == inode->bi_inum; 920 } 921 922 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) 923 { 924 struct btree_iter iter; 925 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0); 926 int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set; 927 bch2_trans_iter_exit(trans, &iter); 928 return ret; 929 } 930 931 static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k, 932 struct bch_inode_unpacked *inode, 933 u32 inode_snapshot, bool *write_inode) 934 { 935 struct bch_fs *c = trans->c; 936 struct printbuf buf = PRINTBUF; 937 938 struct btree_iter dirent_iter = {}; 939 struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); 940 int ret = bkey_err(d); 941 if (ret && !bch2_err_matches(ret, ENOENT)) 942 return ret; 943 944 if (fsck_err_on(ret, 945 c, inode_points_to_missing_dirent, 946 "inode points to missing dirent\n%s", 947 (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) || 948 fsck_err_on(!ret && !dirent_points_to_inode(d, inode), 949 c, inode_points_to_wrong_dirent, 950 "inode points to dirent that does not point back:\n%s", 951 (bch2_bkey_val_to_text(&buf, c, inode_k), 952 prt_newline(&buf), 953 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 954 /* 955 * We just clear the backpointer fields for now. If we find a 956 * dirent that points to this inode in check_dirents(), we'll 957 * update it then; then when we get to check_path() if the 958 * backpointer is still 0 we'll reattach it. 959 */ 960 inode->bi_dir = 0; 961 inode->bi_dir_offset = 0; 962 inode->bi_flags &= ~BCH_INODE_backptr_untrusted; 963 *write_inode = true; 964 } 965 966 ret = 0; 967 fsck_err: 968 bch2_trans_iter_exit(trans, &dirent_iter); 969 printbuf_exit(&buf); 970 bch_err_fn(c, ret); 971 return ret; 972 } 973 974 static int check_inode(struct btree_trans *trans, 975 struct btree_iter *iter, 976 struct bkey_s_c k, 977 struct bch_inode_unpacked *prev, 978 struct snapshots_seen *s, 979 bool full) 980 { 981 struct bch_fs *c = trans->c; 982 struct bch_inode_unpacked u; 983 bool do_update = false; 984 int ret; 985 986 ret = check_key_has_snapshot(trans, iter, k); 987 if (ret < 0) 988 goto err; 989 if (ret) 990 return 0; 991 992 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 993 if (ret) 994 goto err; 995 996 if (!bkey_is_inode(k.k)) 997 return 0; 998 999 BUG_ON(bch2_inode_unpack(k, &u)); 1000 1001 if (!full && 1002 !(u.bi_flags & (BCH_INODE_i_size_dirty| 1003 BCH_INODE_i_sectors_dirty| 1004 BCH_INODE_unlinked))) 1005 return 0; 1006 1007 if (prev->bi_inum != u.bi_inum) 1008 *prev = u; 1009 1010 if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || 1011 inode_d_type(prev) != inode_d_type(&u), 1012 c, inode_snapshot_mismatch, 1013 "inodes in different snapshots don't match")) { 1014 bch_err(c, "repair not implemented yet"); 1015 return -BCH_ERR_fsck_repair_unimplemented; 1016 } 1017 1018 if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && 1019 bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { 1020 struct bpos new_min_pos; 1021 1022 ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos); 1023 if (ret) 1024 goto err; 1025 1026 u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; 1027 1028 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1029 1030 bch_err_msg(c, ret, "in fsck updating inode"); 1031 if (ret) 1032 return ret; 1033 1034 if (!bpos_eq(new_min_pos, POS_MIN)) 1035 bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); 1036 return 0; 1037 } 1038 1039 if (u.bi_flags & BCH_INODE_unlinked) { 1040 ret = check_inode_deleted_list(trans, k.k->p); 1041 if (ret < 0) 1042 return ret; 1043 1044 fsck_err_on(!ret, c, unlinked_inode_not_on_deleted_list, 1045 "inode %llu:%u unlinked, but not on deleted list", 1046 u.bi_inum, k.k->p.snapshot); 1047 ret = 0; 1048 } 1049 1050 if (u.bi_flags & BCH_INODE_unlinked && 1051 (!c->sb.clean || 1052 fsck_err(c, inode_unlinked_but_clean, 1053 "filesystem marked clean, but inode %llu unlinked", 1054 u.bi_inum))) { 1055 ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 1056 bch_err_msg(c, ret, "in fsck deleting inode"); 1057 return ret; 1058 } 1059 1060 if (u.bi_flags & BCH_INODE_i_size_dirty && 1061 (!c->sb.clean || 1062 fsck_err(c, inode_i_size_dirty_but_clean, 1063 "filesystem marked clean, but inode %llu has i_size dirty", 1064 u.bi_inum))) { 1065 bch_verbose(c, "truncating inode %llu", u.bi_inum); 1066 1067 /* 1068 * XXX: need to truncate partial blocks too here - or ideally 1069 * just switch units to bytes and that issue goes away 1070 */ 1071 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, 1072 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9, 1073 iter->pos.snapshot), 1074 POS(u.bi_inum, U64_MAX), 1075 0, NULL); 1076 bch_err_msg(c, ret, "in fsck truncating inode"); 1077 if (ret) 1078 return ret; 1079 1080 /* 1081 * We truncated without our normal sector accounting hook, just 1082 * make sure we recalculate it: 1083 */ 1084 u.bi_flags |= BCH_INODE_i_sectors_dirty; 1085 1086 u.bi_flags &= ~BCH_INODE_i_size_dirty; 1087 do_update = true; 1088 } 1089 1090 if (u.bi_flags & BCH_INODE_i_sectors_dirty && 1091 (!c->sb.clean || 1092 fsck_err(c, inode_i_sectors_dirty_but_clean, 1093 "filesystem marked clean, but inode %llu has i_sectors dirty", 1094 u.bi_inum))) { 1095 s64 sectors; 1096 1097 bch_verbose(c, "recounting sectors for inode %llu", 1098 u.bi_inum); 1099 1100 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); 1101 if (sectors < 0) { 1102 bch_err_msg(c, sectors, "in fsck recounting inode sectors"); 1103 return sectors; 1104 } 1105 1106 u.bi_sectors = sectors; 1107 u.bi_flags &= ~BCH_INODE_i_sectors_dirty; 1108 do_update = true; 1109 } 1110 1111 if (u.bi_flags & BCH_INODE_backptr_untrusted) { 1112 u.bi_dir = 0; 1113 u.bi_dir_offset = 0; 1114 u.bi_flags &= ~BCH_INODE_backptr_untrusted; 1115 do_update = true; 1116 } 1117 1118 if (u.bi_dir || u.bi_dir_offset) { 1119 ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update); 1120 if (ret) 1121 goto err; 1122 } 1123 1124 if (fsck_err_on(u.bi_parent_subvol && 1125 (u.bi_subvol == 0 || 1126 u.bi_subvol == BCACHEFS_ROOT_SUBVOL), 1127 c, inode_bi_parent_nonzero, 1128 "inode %llu:%u has subvol %u but nonzero parent subvol %u", 1129 u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) { 1130 u.bi_parent_subvol = 0; 1131 do_update = true; 1132 } 1133 1134 if (u.bi_subvol) { 1135 struct bch_subvolume s; 1136 1137 ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s); 1138 if (ret && !bch2_err_matches(ret, ENOENT)) 1139 goto err; 1140 1141 if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1142 ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum); 1143 goto do_update; 1144 } 1145 1146 if (fsck_err_on(ret, 1147 c, inode_bi_subvol_missing, 1148 "inode %llu:%u bi_subvol points to missing subvolume %u", 1149 u.bi_inum, k.k->p.snapshot, u.bi_subvol) || 1150 fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum || 1151 !bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot), 1152 k.k->p.snapshot), 1153 c, inode_bi_subvol_wrong, 1154 "inode %llu:%u points to subvol %u, but subvol points to %llu:%u", 1155 u.bi_inum, k.k->p.snapshot, u.bi_subvol, 1156 le64_to_cpu(s.inode), 1157 le32_to_cpu(s.snapshot))) { 1158 u.bi_subvol = 0; 1159 u.bi_parent_subvol = 0; 1160 do_update = true; 1161 } 1162 } 1163 do_update: 1164 if (do_update) { 1165 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1166 bch_err_msg(c, ret, "in fsck updating inode"); 1167 if (ret) 1168 return ret; 1169 } 1170 err: 1171 fsck_err: 1172 bch_err_fn(c, ret); 1173 return ret; 1174 } 1175 1176 int bch2_check_inodes(struct bch_fs *c) 1177 { 1178 bool full = c->opts.fsck; 1179 struct bch_inode_unpacked prev = { 0 }; 1180 struct snapshots_seen s; 1181 1182 snapshots_seen_init(&s); 1183 1184 int ret = bch2_trans_run(c, 1185 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 1186 POS_MIN, 1187 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1188 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1189 check_inode(trans, &iter, k, &prev, &s, full))); 1190 1191 snapshots_seen_exit(&s); 1192 bch_err_fn(c, ret); 1193 return ret; 1194 } 1195 1196 static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) 1197 { 1198 struct bch_fs *c = trans->c; 1199 int ret = 0; 1200 s64 count2; 1201 1202 darray_for_each(w->inodes, i) { 1203 if (i->inode.bi_sectors == i->count) 1204 continue; 1205 1206 count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); 1207 1208 if (w->recalculate_sums) 1209 i->count = count2; 1210 1211 if (i->count != count2) { 1212 bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", 1213 w->last_pos.inode, i->snapshot, i->count, count2); 1214 return -BCH_ERR_internal_fsck_err; 1215 } 1216 1217 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), 1218 c, inode_i_sectors_wrong, 1219 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", 1220 w->last_pos.inode, i->snapshot, 1221 i->inode.bi_sectors, i->count)) { 1222 i->inode.bi_sectors = i->count; 1223 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1224 if (ret) 1225 break; 1226 } 1227 } 1228 fsck_err: 1229 bch_err_fn(c, ret); 1230 return ret; 1231 } 1232 1233 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) 1234 { 1235 u32 restart_count = trans->restart_count; 1236 return check_i_sectors_notnested(trans, w) ?: 1237 trans_was_restarted(trans, restart_count); 1238 } 1239 1240 struct extent_end { 1241 u32 snapshot; 1242 u64 offset; 1243 struct snapshots_seen seen; 1244 }; 1245 1246 struct extent_ends { 1247 struct bpos last_pos; 1248 DARRAY(struct extent_end) e; 1249 }; 1250 1251 static void extent_ends_reset(struct extent_ends *extent_ends) 1252 { 1253 darray_for_each(extent_ends->e, i) 1254 snapshots_seen_exit(&i->seen); 1255 extent_ends->e.nr = 0; 1256 } 1257 1258 static void extent_ends_exit(struct extent_ends *extent_ends) 1259 { 1260 extent_ends_reset(extent_ends); 1261 darray_exit(&extent_ends->e); 1262 } 1263 1264 static void extent_ends_init(struct extent_ends *extent_ends) 1265 { 1266 memset(extent_ends, 0, sizeof(*extent_ends)); 1267 } 1268 1269 static int extent_ends_at(struct bch_fs *c, 1270 struct extent_ends *extent_ends, 1271 struct snapshots_seen *seen, 1272 struct bkey_s_c k) 1273 { 1274 struct extent_end *i, n = (struct extent_end) { 1275 .offset = k.k->p.offset, 1276 .snapshot = k.k->p.snapshot, 1277 .seen = *seen, 1278 }; 1279 1280 n.seen.ids.data = kmemdup(seen->ids.data, 1281 sizeof(seen->ids.data[0]) * seen->ids.size, 1282 GFP_KERNEL); 1283 if (!n.seen.ids.data) 1284 return -BCH_ERR_ENOMEM_fsck_extent_ends_at; 1285 1286 __darray_for_each(extent_ends->e, i) { 1287 if (i->snapshot == k.k->p.snapshot) { 1288 snapshots_seen_exit(&i->seen); 1289 *i = n; 1290 return 0; 1291 } 1292 1293 if (i->snapshot >= k.k->p.snapshot) 1294 break; 1295 } 1296 1297 return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); 1298 } 1299 1300 static int overlapping_extents_found(struct btree_trans *trans, 1301 enum btree_id btree, 1302 struct bpos pos1, struct snapshots_seen *pos1_seen, 1303 struct bkey pos2, 1304 bool *fixed, 1305 struct extent_end *extent_end) 1306 { 1307 struct bch_fs *c = trans->c; 1308 struct printbuf buf = PRINTBUF; 1309 struct btree_iter iter1, iter2 = { NULL }; 1310 struct bkey_s_c k1, k2; 1311 int ret; 1312 1313 BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); 1314 1315 bch2_trans_iter_init(trans, &iter1, btree, pos1, 1316 BTREE_ITER_all_snapshots| 1317 BTREE_ITER_not_extents); 1318 k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); 1319 ret = bkey_err(k1); 1320 if (ret) 1321 goto err; 1322 1323 prt_str(&buf, "\n "); 1324 bch2_bkey_val_to_text(&buf, c, k1); 1325 1326 if (!bpos_eq(pos1, k1.k->p)) { 1327 prt_str(&buf, "\n wanted\n "); 1328 bch2_bpos_to_text(&buf, pos1); 1329 prt_str(&buf, "\n "); 1330 bch2_bkey_to_text(&buf, &pos2); 1331 1332 bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", 1333 __func__, buf.buf); 1334 ret = -BCH_ERR_internal_fsck_err; 1335 goto err; 1336 } 1337 1338 bch2_trans_copy_iter(&iter2, &iter1); 1339 1340 while (1) { 1341 bch2_btree_iter_advance(&iter2); 1342 1343 k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); 1344 ret = bkey_err(k2); 1345 if (ret) 1346 goto err; 1347 1348 if (bpos_ge(k2.k->p, pos2.p)) 1349 break; 1350 } 1351 1352 prt_str(&buf, "\n "); 1353 bch2_bkey_val_to_text(&buf, c, k2); 1354 1355 if (bpos_gt(k2.k->p, pos2.p) || 1356 pos2.size != k2.k->size) { 1357 bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", 1358 __func__, buf.buf); 1359 ret = -BCH_ERR_internal_fsck_err; 1360 goto err; 1361 } 1362 1363 prt_printf(&buf, "\n overwriting %s extent", 1364 pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); 1365 1366 if (fsck_err(c, extent_overlapping, 1367 "overlapping extents%s", buf.buf)) { 1368 struct btree_iter *old_iter = &iter1; 1369 struct disk_reservation res = { 0 }; 1370 1371 if (pos1.snapshot < pos2.p.snapshot) { 1372 old_iter = &iter2; 1373 swap(k1, k2); 1374 } 1375 1376 trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); 1377 1378 ret = bch2_trans_update_extent_overwrite(trans, old_iter, 1379 BTREE_UPDATE_internal_snapshot_node, 1380 k1, k2) ?: 1381 bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); 1382 bch2_disk_reservation_put(c, &res); 1383 1384 if (ret) 1385 goto err; 1386 1387 *fixed = true; 1388 1389 if (pos1.snapshot == pos2.p.snapshot) { 1390 /* 1391 * We overwrote the first extent, and did the overwrite 1392 * in the same snapshot: 1393 */ 1394 extent_end->offset = bkey_start_offset(&pos2); 1395 } else if (pos1.snapshot > pos2.p.snapshot) { 1396 /* 1397 * We overwrote the first extent in pos2's snapshot: 1398 */ 1399 ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); 1400 } else { 1401 /* 1402 * We overwrote the second extent - restart 1403 * check_extent() from the top: 1404 */ 1405 ret = -BCH_ERR_transaction_restart_nested; 1406 } 1407 } 1408 fsck_err: 1409 err: 1410 bch2_trans_iter_exit(trans, &iter2); 1411 bch2_trans_iter_exit(trans, &iter1); 1412 printbuf_exit(&buf); 1413 return ret; 1414 } 1415 1416 static int check_overlapping_extents(struct btree_trans *trans, 1417 struct snapshots_seen *seen, 1418 struct extent_ends *extent_ends, 1419 struct bkey_s_c k, 1420 struct btree_iter *iter, 1421 bool *fixed) 1422 { 1423 struct bch_fs *c = trans->c; 1424 int ret = 0; 1425 1426 /* transaction restart, running again */ 1427 if (bpos_eq(extent_ends->last_pos, k.k->p)) 1428 return 0; 1429 1430 if (extent_ends->last_pos.inode != k.k->p.inode) 1431 extent_ends_reset(extent_ends); 1432 1433 darray_for_each(extent_ends->e, i) { 1434 if (i->offset <= bkey_start_offset(k.k)) 1435 continue; 1436 1437 if (!ref_visible2(c, 1438 k.k->p.snapshot, seen, 1439 i->snapshot, &i->seen)) 1440 continue; 1441 1442 ret = overlapping_extents_found(trans, iter->btree_id, 1443 SPOS(iter->pos.inode, 1444 i->offset, 1445 i->snapshot), 1446 &i->seen, 1447 *k.k, fixed, i); 1448 if (ret) 1449 goto err; 1450 } 1451 1452 extent_ends->last_pos = k.k->p; 1453 err: 1454 return ret; 1455 } 1456 1457 static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, 1458 struct bkey_s_c k) 1459 { 1460 struct bch_fs *c = trans->c; 1461 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 1462 struct bch_extent_crc_unpacked crc; 1463 const union bch_extent_entry *i; 1464 unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; 1465 1466 bkey_for_each_crc(k.k, ptrs, crc, i) 1467 if (crc_is_encoded(crc) && 1468 crc.uncompressed_size > encoded_extent_max_sectors) { 1469 struct printbuf buf = PRINTBUF; 1470 1471 bch2_bkey_val_to_text(&buf, c, k); 1472 bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); 1473 printbuf_exit(&buf); 1474 } 1475 1476 return 0; 1477 } 1478 1479 static int check_extent(struct btree_trans *trans, struct btree_iter *iter, 1480 struct bkey_s_c k, 1481 struct inode_walker *inode, 1482 struct snapshots_seen *s, 1483 struct extent_ends *extent_ends) 1484 { 1485 struct bch_fs *c = trans->c; 1486 struct inode_walker_entry *i; 1487 struct printbuf buf = PRINTBUF; 1488 int ret = 0; 1489 1490 ret = check_key_has_snapshot(trans, iter, k); 1491 if (ret) { 1492 ret = ret < 0 ? ret : 0; 1493 goto out; 1494 } 1495 1496 if (inode->last_pos.inode != k.k->p.inode) { 1497 ret = check_i_sectors(trans, inode); 1498 if (ret) 1499 goto err; 1500 } 1501 1502 i = walk_inode(trans, inode, k); 1503 ret = PTR_ERR_OR_ZERO(i); 1504 if (ret) 1505 goto err; 1506 1507 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1508 if (ret) 1509 goto err; 1510 1511 if (k.k->type != KEY_TYPE_whiteout) { 1512 if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { 1513 ret = reconstruct_reg_inode(trans, k.k->p.snapshot, k.k->p.inode) ?: 1514 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 1515 if (ret) 1516 goto err; 1517 1518 inode->last_pos.inode--; 1519 ret = -BCH_ERR_transaction_restart_nested; 1520 goto err; 1521 } 1522 1523 if (fsck_err_on(!i, c, extent_in_missing_inode, 1524 "extent in missing inode:\n %s", 1525 (printbuf_reset(&buf), 1526 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1527 goto delete; 1528 1529 if (fsck_err_on(i && 1530 !S_ISREG(i->inode.bi_mode) && 1531 !S_ISLNK(i->inode.bi_mode), 1532 c, extent_in_non_reg_inode, 1533 "extent in non regular inode mode %o:\n %s", 1534 i->inode.bi_mode, 1535 (printbuf_reset(&buf), 1536 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1537 goto delete; 1538 1539 ret = check_overlapping_extents(trans, s, extent_ends, k, iter, 1540 &inode->recalculate_sums); 1541 if (ret) 1542 goto err; 1543 } 1544 1545 /* 1546 * Check inodes in reverse order, from oldest snapshots to newest, 1547 * starting from the inode that matches this extent's snapshot. If we 1548 * didn't have one, iterate over all inodes: 1549 */ 1550 if (!i) 1551 i = inode->inodes.data + inode->inodes.nr - 1; 1552 1553 for (; 1554 inode->inodes.data && i >= inode->inodes.data; 1555 --i) { 1556 if (i->snapshot > k.k->p.snapshot || 1557 !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) 1558 continue; 1559 1560 if (k.k->type != KEY_TYPE_whiteout) { 1561 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && 1562 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && 1563 !bkey_extent_is_reservation(k), 1564 c, extent_past_end_of_inode, 1565 "extent type past end of inode %llu:%u, i_size %llu\n %s", 1566 i->inode.bi_inum, i->snapshot, i->inode.bi_size, 1567 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1568 struct btree_iter iter2; 1569 1570 bch2_trans_copy_iter(&iter2, iter); 1571 bch2_btree_iter_set_snapshot(&iter2, i->snapshot); 1572 ret = bch2_btree_iter_traverse(&iter2) ?: 1573 bch2_btree_delete_at(trans, &iter2, 1574 BTREE_UPDATE_internal_snapshot_node); 1575 bch2_trans_iter_exit(trans, &iter2); 1576 if (ret) 1577 goto err; 1578 1579 iter->k.type = KEY_TYPE_whiteout; 1580 } 1581 1582 if (bkey_extent_is_allocation(k.k)) 1583 i->count += k.k->size; 1584 } 1585 1586 i->seen_this_pos = true; 1587 } 1588 1589 if (k.k->type != KEY_TYPE_whiteout) { 1590 ret = extent_ends_at(c, extent_ends, s, k); 1591 if (ret) 1592 goto err; 1593 } 1594 out: 1595 err: 1596 fsck_err: 1597 printbuf_exit(&buf); 1598 bch_err_fn(c, ret); 1599 return ret; 1600 delete: 1601 ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node); 1602 goto out; 1603 } 1604 1605 /* 1606 * Walk extents: verify that extents have a corresponding S_ISREG inode, and 1607 * that i_size an i_sectors are consistent 1608 */ 1609 int bch2_check_extents(struct bch_fs *c) 1610 { 1611 struct inode_walker w = inode_walker_init(); 1612 struct snapshots_seen s; 1613 struct extent_ends extent_ends; 1614 struct disk_reservation res = { 0 }; 1615 1616 snapshots_seen_init(&s); 1617 extent_ends_init(&extent_ends); 1618 1619 int ret = bch2_trans_run(c, 1620 for_each_btree_key_commit(trans, iter, BTREE_ID_extents, 1621 POS(BCACHEFS_ROOT_INO, 0), 1622 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1623 &res, NULL, 1624 BCH_TRANS_COMMIT_no_enospc, ({ 1625 bch2_disk_reservation_put(c, &res); 1626 check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: 1627 check_extent_overbig(trans, &iter, k); 1628 })) ?: 1629 check_i_sectors_notnested(trans, &w)); 1630 1631 bch2_disk_reservation_put(c, &res); 1632 extent_ends_exit(&extent_ends); 1633 inode_walker_exit(&w); 1634 snapshots_seen_exit(&s); 1635 1636 bch_err_fn(c, ret); 1637 return ret; 1638 } 1639 1640 int bch2_check_indirect_extents(struct bch_fs *c) 1641 { 1642 struct disk_reservation res = { 0 }; 1643 1644 int ret = bch2_trans_run(c, 1645 for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, 1646 POS_MIN, 1647 BTREE_ITER_prefetch, k, 1648 &res, NULL, 1649 BCH_TRANS_COMMIT_no_enospc, ({ 1650 bch2_disk_reservation_put(c, &res); 1651 check_extent_overbig(trans, &iter, k); 1652 }))); 1653 1654 bch2_disk_reservation_put(c, &res); 1655 bch_err_fn(c, ret); 1656 return ret; 1657 } 1658 1659 static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w) 1660 { 1661 struct bch_fs *c = trans->c; 1662 int ret = 0; 1663 s64 count2; 1664 1665 darray_for_each(w->inodes, i) { 1666 if (i->inode.bi_nlink == i->count) 1667 continue; 1668 1669 count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); 1670 if (count2 < 0) 1671 return count2; 1672 1673 if (i->count != count2) { 1674 bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu", 1675 w->last_pos.inode, i->snapshot, i->count, count2); 1676 i->count = count2; 1677 if (i->inode.bi_nlink == i->count) 1678 continue; 1679 } 1680 1681 if (fsck_err_on(i->inode.bi_nlink != i->count, 1682 c, inode_dir_wrong_nlink, 1683 "directory %llu:%u with wrong i_nlink: got %u, should be %llu", 1684 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { 1685 i->inode.bi_nlink = i->count; 1686 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1687 if (ret) 1688 break; 1689 } 1690 } 1691 fsck_err: 1692 bch_err_fn(c, ret); 1693 return ret; 1694 } 1695 1696 static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) 1697 { 1698 u32 restart_count = trans->restart_count; 1699 return check_subdir_count_notnested(trans, w) ?: 1700 trans_was_restarted(trans, restart_count); 1701 } 1702 1703 static int check_dirent_inode_dirent(struct btree_trans *trans, 1704 struct btree_iter *iter, 1705 struct bkey_s_c_dirent d, 1706 struct bch_inode_unpacked *target, 1707 u32 target_snapshot) 1708 { 1709 struct bch_fs *c = trans->c; 1710 struct printbuf buf = PRINTBUF; 1711 int ret = 0; 1712 1713 if (inode_points_to_dirent(target, d)) 1714 return 0; 1715 1716 if (bch2_inode_should_have_bp(target) && 1717 !fsck_err(c, inode_wrong_backpointer, 1718 "dirent points to inode that does not point back:\n %s", 1719 (bch2_bkey_val_to_text(&buf, c, d.s_c), 1720 prt_printf(&buf, "\n "), 1721 bch2_inode_unpacked_to_text(&buf, target), 1722 buf.buf))) 1723 goto out_noiter; 1724 1725 if (!target->bi_dir && 1726 !target->bi_dir_offset) { 1727 target->bi_dir = d.k->p.inode; 1728 target->bi_dir_offset = d.k->p.offset; 1729 return __bch2_fsck_write_inode(trans, target, target_snapshot); 1730 } 1731 1732 struct btree_iter bp_iter = { NULL }; 1733 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, 1734 SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); 1735 ret = bkey_err(bp_dirent); 1736 if (ret && !bch2_err_matches(ret, ENOENT)) 1737 goto err; 1738 1739 bool backpointer_exists = !ret; 1740 ret = 0; 1741 1742 if (fsck_err_on(!backpointer_exists, 1743 c, inode_wrong_backpointer, 1744 "inode %llu:%u has wrong backpointer:\n" 1745 "got %llu:%llu\n" 1746 "should be %llu:%llu", 1747 target->bi_inum, target_snapshot, 1748 target->bi_dir, 1749 target->bi_dir_offset, 1750 d.k->p.inode, 1751 d.k->p.offset)) { 1752 target->bi_dir = d.k->p.inode; 1753 target->bi_dir_offset = d.k->p.offset; 1754 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1755 goto out; 1756 } 1757 1758 bch2_bkey_val_to_text(&buf, c, d.s_c); 1759 prt_newline(&buf); 1760 if (backpointer_exists) 1761 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c); 1762 1763 if (fsck_err_on(backpointer_exists && 1764 (S_ISDIR(target->bi_mode) || 1765 target->bi_subvol), 1766 c, inode_dir_multiple_links, 1767 "%s %llu:%u with multiple links\n%s", 1768 S_ISDIR(target->bi_mode) ? "directory" : "subvolume", 1769 target->bi_inum, target_snapshot, buf.buf)) { 1770 ret = __remove_dirent(trans, d.k->p); 1771 goto out; 1772 } 1773 1774 /* 1775 * hardlinked file with nlink 0: 1776 * We're just adjusting nlink here so check_nlinks() will pick 1777 * it up, it ignores inodes with nlink 0 1778 */ 1779 if (fsck_err_on(backpointer_exists && !target->bi_nlink, 1780 c, inode_multiple_links_but_nlink_0, 1781 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", 1782 target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { 1783 target->bi_nlink++; 1784 target->bi_flags &= ~BCH_INODE_unlinked; 1785 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1786 if (ret) 1787 goto err; 1788 } 1789 out: 1790 err: 1791 fsck_err: 1792 bch2_trans_iter_exit(trans, &bp_iter); 1793 out_noiter: 1794 printbuf_exit(&buf); 1795 bch_err_fn(c, ret); 1796 return ret; 1797 } 1798 1799 static int check_dirent_target(struct btree_trans *trans, 1800 struct btree_iter *iter, 1801 struct bkey_s_c_dirent d, 1802 struct bch_inode_unpacked *target, 1803 u32 target_snapshot) 1804 { 1805 struct bch_fs *c = trans->c; 1806 struct bkey_i_dirent *n; 1807 struct printbuf buf = PRINTBUF; 1808 int ret = 0; 1809 1810 ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); 1811 if (ret) 1812 goto err; 1813 1814 if (fsck_err_on(d.v->d_type != inode_d_type(target), 1815 c, dirent_d_type_wrong, 1816 "incorrect d_type: got %s, should be %s:\n%s", 1817 bch2_d_type_str(d.v->d_type), 1818 bch2_d_type_str(inode_d_type(target)), 1819 (printbuf_reset(&buf), 1820 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1821 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); 1822 ret = PTR_ERR_OR_ZERO(n); 1823 if (ret) 1824 goto err; 1825 1826 bkey_reassemble(&n->k_i, d.s_c); 1827 n->v.d_type = inode_d_type(target); 1828 if (n->v.d_type == DT_SUBVOL) { 1829 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); 1830 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol); 1831 } else { 1832 n->v.d_inum = cpu_to_le64(target->bi_inum); 1833 } 1834 1835 ret = bch2_trans_update(trans, iter, &n->k_i, 0); 1836 if (ret) 1837 goto err; 1838 1839 d = dirent_i_to_s_c(n); 1840 } 1841 err: 1842 fsck_err: 1843 printbuf_exit(&buf); 1844 bch_err_fn(c, ret); 1845 return ret; 1846 } 1847 1848 /* find a subvolume that's a descendent of @snapshot: */ 1849 static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid) 1850 { 1851 struct btree_iter iter; 1852 struct bkey_s_c k; 1853 int ret; 1854 1855 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { 1856 if (k.k->type != KEY_TYPE_subvolume) 1857 continue; 1858 1859 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 1860 if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) { 1861 bch2_trans_iter_exit(trans, &iter); 1862 *subvolid = k.k->p.offset; 1863 goto found; 1864 } 1865 } 1866 if (!ret) 1867 ret = -ENOENT; 1868 found: 1869 bch2_trans_iter_exit(trans, &iter); 1870 return ret; 1871 } 1872 1873 static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter, 1874 struct bkey_s_c_dirent d) 1875 { 1876 struct bch_fs *c = trans->c; 1877 struct btree_iter subvol_iter = {}; 1878 struct bch_inode_unpacked subvol_root; 1879 u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); 1880 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); 1881 u32 parent_snapshot; 1882 u32 new_parent_subvol = 0; 1883 u64 parent_inum; 1884 struct printbuf buf = PRINTBUF; 1885 int ret = 0; 1886 1887 ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); 1888 if (ret && !bch2_err_matches(ret, ENOENT)) 1889 return ret; 1890 1891 if (ret || 1892 (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) { 1893 int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); 1894 if (ret2 && !bch2_err_matches(ret, ENOENT)) 1895 return ret2; 1896 } 1897 1898 if (ret && 1899 !new_parent_subvol && 1900 (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1901 /* 1902 * Couldn't find a subvol for dirent's snapshot - but we lost 1903 * subvols, so we need to reconstruct: 1904 */ 1905 ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0); 1906 if (ret) 1907 return ret; 1908 1909 parent_snapshot = d.k->p.snapshot; 1910 } 1911 1912 if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol, 1913 "dirent parent_subvol points to missing subvolume\n%s", 1914 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || 1915 fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot), 1916 c, dirent_not_visible_in_parent_subvol, 1917 "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", 1918 parent_snapshot, 1919 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1920 if (!new_parent_subvol) { 1921 bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot); 1922 return -BCH_ERR_fsck_repair_unimplemented; 1923 } 1924 1925 struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); 1926 ret = PTR_ERR_OR_ZERO(new_dirent); 1927 if (ret) 1928 goto err; 1929 1930 new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol); 1931 } 1932 1933 struct bkey_s_c_subvolume s = 1934 bch2_bkey_get_iter_typed(trans, &subvol_iter, 1935 BTREE_ID_subvolumes, POS(0, target_subvol), 1936 0, subvolume); 1937 ret = bkey_err(s.s_c); 1938 if (ret && !bch2_err_matches(ret, ENOENT)) 1939 return ret; 1940 1941 if (ret) { 1942 if (fsck_err(c, dirent_to_missing_subvol, 1943 "dirent points to missing subvolume\n%s", 1944 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) 1945 return __remove_dirent(trans, d.k->p); 1946 ret = 0; 1947 goto out; 1948 } 1949 1950 if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol, 1951 c, subvol_fs_path_parent_wrong, 1952 "subvol with wrong fs_path_parent, should be be %u\n%s", 1953 parent_subvol, 1954 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { 1955 struct bkey_i_subvolume *n = 1956 bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume); 1957 ret = PTR_ERR_OR_ZERO(n); 1958 if (ret) 1959 goto err; 1960 1961 n->v.fs_path_parent = cpu_to_le32(parent_subvol); 1962 } 1963 1964 u64 target_inum = le64_to_cpu(s.v->inode); 1965 u32 target_snapshot = le32_to_cpu(s.v->snapshot); 1966 1967 ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); 1968 if (ret && !bch2_err_matches(ret, ENOENT)) 1969 goto err; 1970 1971 if (ret) { 1972 bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); 1973 ret = -BCH_ERR_fsck_repair_unimplemented; 1974 ret = 0; 1975 goto err; 1976 } 1977 1978 if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol, 1979 c, inode_bi_parent_wrong, 1980 "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", 1981 target_inum, 1982 subvol_root.bi_parent_subvol, parent_subvol)) { 1983 subvol_root.bi_parent_subvol = parent_subvol; 1984 ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); 1985 if (ret) 1986 goto err; 1987 } 1988 1989 ret = check_dirent_target(trans, iter, d, &subvol_root, 1990 target_snapshot); 1991 if (ret) 1992 goto err; 1993 out: 1994 err: 1995 fsck_err: 1996 bch2_trans_iter_exit(trans, &subvol_iter); 1997 printbuf_exit(&buf); 1998 return ret; 1999 } 2000 2001 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, 2002 struct bkey_s_c k, 2003 struct bch_hash_info *hash_info, 2004 struct inode_walker *dir, 2005 struct inode_walker *target, 2006 struct snapshots_seen *s) 2007 { 2008 struct bch_fs *c = trans->c; 2009 struct inode_walker_entry *i; 2010 struct printbuf buf = PRINTBUF; 2011 int ret = 0; 2012 2013 ret = check_key_has_snapshot(trans, iter, k); 2014 if (ret) { 2015 ret = ret < 0 ? ret : 0; 2016 goto out; 2017 } 2018 2019 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 2020 if (ret) 2021 goto err; 2022 2023 if (k.k->type == KEY_TYPE_whiteout) 2024 goto out; 2025 2026 if (dir->last_pos.inode != k.k->p.inode) { 2027 ret = check_subdir_count(trans, dir); 2028 if (ret) 2029 goto err; 2030 } 2031 2032 BUG_ON(!btree_iter_path(trans, iter)->should_be_locked); 2033 2034 i = walk_inode(trans, dir, k); 2035 ret = PTR_ERR_OR_ZERO(i); 2036 if (ret < 0) 2037 goto err; 2038 2039 if (dir->first_this_inode && dir->inodes.nr) 2040 *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); 2041 dir->first_this_inode = false; 2042 2043 if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { 2044 ret = reconstruct_inode(trans, k.k->p.snapshot, k.k->p.inode, 0, S_IFDIR) ?: 2045 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 2046 if (ret) 2047 goto err; 2048 2049 dir->last_pos.inode--; 2050 ret = -BCH_ERR_transaction_restart_nested; 2051 goto err; 2052 } 2053 2054 if (fsck_err_on(!i, c, dirent_in_missing_dir_inode, 2055 "dirent in nonexisting directory:\n%s", 2056 (printbuf_reset(&buf), 2057 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 2058 ret = bch2_btree_delete_at(trans, iter, 2059 BTREE_UPDATE_internal_snapshot_node); 2060 goto out; 2061 } 2062 2063 if (!i) 2064 goto out; 2065 2066 if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), 2067 c, dirent_in_non_dir_inode, 2068 "dirent in non directory inode type %s:\n%s", 2069 bch2_d_type_str(inode_d_type(&i->inode)), 2070 (printbuf_reset(&buf), 2071 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 2072 ret = bch2_btree_delete_at(trans, iter, 0); 2073 goto out; 2074 } 2075 2076 ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); 2077 if (ret < 0) 2078 goto err; 2079 if (ret) { 2080 /* dirent has been deleted */ 2081 ret = 0; 2082 goto out; 2083 } 2084 2085 if (k.k->type != KEY_TYPE_dirent) 2086 goto out; 2087 2088 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2089 2090 if (d.v->d_type == DT_SUBVOL) { 2091 ret = check_dirent_to_subvol(trans, iter, d); 2092 if (ret) 2093 goto err; 2094 } else { 2095 ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); 2096 if (ret) 2097 goto err; 2098 2099 if (fsck_err_on(!target->inodes.nr, 2100 c, dirent_to_missing_inode, 2101 "dirent points to missing inode:\n%s", 2102 (printbuf_reset(&buf), 2103 bch2_bkey_val_to_text(&buf, c, k), 2104 buf.buf))) { 2105 ret = __remove_dirent(trans, d.k->p); 2106 if (ret) 2107 goto err; 2108 } 2109 2110 darray_for_each(target->inodes, i) { 2111 ret = check_dirent_target(trans, iter, d, 2112 &i->inode, i->snapshot); 2113 if (ret) 2114 goto err; 2115 } 2116 2117 if (d.v->d_type == DT_DIR) 2118 for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) 2119 i->count++; 2120 } 2121 out: 2122 err: 2123 fsck_err: 2124 printbuf_exit(&buf); 2125 bch_err_fn(c, ret); 2126 return ret; 2127 } 2128 2129 /* 2130 * Walk dirents: verify that they all have a corresponding S_ISDIR inode, 2131 * validate d_type 2132 */ 2133 int bch2_check_dirents(struct bch_fs *c) 2134 { 2135 struct inode_walker dir = inode_walker_init(); 2136 struct inode_walker target = inode_walker_init(); 2137 struct snapshots_seen s; 2138 struct bch_hash_info hash_info; 2139 2140 snapshots_seen_init(&s); 2141 2142 int ret = bch2_trans_run(c, 2143 for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, 2144 POS(BCACHEFS_ROOT_INO, 0), 2145 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2146 k, 2147 NULL, NULL, 2148 BCH_TRANS_COMMIT_no_enospc, 2149 check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: 2150 check_subdir_count_notnested(trans, &dir)); 2151 2152 snapshots_seen_exit(&s); 2153 inode_walker_exit(&dir); 2154 inode_walker_exit(&target); 2155 bch_err_fn(c, ret); 2156 return ret; 2157 } 2158 2159 static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, 2160 struct bkey_s_c k, 2161 struct bch_hash_info *hash_info, 2162 struct inode_walker *inode) 2163 { 2164 struct bch_fs *c = trans->c; 2165 struct inode_walker_entry *i; 2166 int ret; 2167 2168 ret = check_key_has_snapshot(trans, iter, k); 2169 if (ret < 0) 2170 return ret; 2171 if (ret) 2172 return 0; 2173 2174 i = walk_inode(trans, inode, k); 2175 ret = PTR_ERR_OR_ZERO(i); 2176 if (ret) 2177 return ret; 2178 2179 if (inode->first_this_inode && inode->inodes.nr) 2180 *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode); 2181 inode->first_this_inode = false; 2182 2183 if (fsck_err_on(!i, c, xattr_in_missing_inode, 2184 "xattr for missing inode %llu", 2185 k.k->p.inode)) 2186 return bch2_btree_delete_at(trans, iter, 0); 2187 2188 if (!i) 2189 return 0; 2190 2191 ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); 2192 fsck_err: 2193 bch_err_fn(c, ret); 2194 return ret; 2195 } 2196 2197 /* 2198 * Walk xattrs: verify that they all have a corresponding inode 2199 */ 2200 int bch2_check_xattrs(struct bch_fs *c) 2201 { 2202 struct inode_walker inode = inode_walker_init(); 2203 struct bch_hash_info hash_info; 2204 int ret = 0; 2205 2206 ret = bch2_trans_run(c, 2207 for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, 2208 POS(BCACHEFS_ROOT_INO, 0), 2209 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2210 k, 2211 NULL, NULL, 2212 BCH_TRANS_COMMIT_no_enospc, 2213 check_xattr(trans, &iter, k, &hash_info, &inode))); 2214 bch_err_fn(c, ret); 2215 return ret; 2216 } 2217 2218 static int check_root_trans(struct btree_trans *trans) 2219 { 2220 struct bch_fs *c = trans->c; 2221 struct bch_inode_unpacked root_inode; 2222 u32 snapshot; 2223 u64 inum; 2224 int ret; 2225 2226 ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); 2227 if (ret && !bch2_err_matches(ret, ENOENT)) 2228 return ret; 2229 2230 if (mustfix_fsck_err_on(ret, c, root_subvol_missing, 2231 "root subvol missing")) { 2232 struct bkey_i_subvolume *root_subvol = 2233 bch2_trans_kmalloc(trans, sizeof(*root_subvol)); 2234 ret = PTR_ERR_OR_ZERO(root_subvol); 2235 if (ret) 2236 goto err; 2237 2238 snapshot = U32_MAX; 2239 inum = BCACHEFS_ROOT_INO; 2240 2241 bkey_subvolume_init(&root_subvol->k_i); 2242 root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL; 2243 root_subvol->v.flags = 0; 2244 root_subvol->v.snapshot = cpu_to_le32(snapshot); 2245 root_subvol->v.inode = cpu_to_le64(inum); 2246 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0); 2247 bch_err_msg(c, ret, "writing root subvol"); 2248 if (ret) 2249 goto err; 2250 } 2251 2252 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); 2253 if (ret && !bch2_err_matches(ret, ENOENT)) 2254 return ret; 2255 2256 if (mustfix_fsck_err_on(ret, c, root_dir_missing, 2257 "root directory missing") || 2258 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), 2259 c, root_inode_not_dir, 2260 "root inode not a directory")) { 2261 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 2262 0, NULL); 2263 root_inode.bi_inum = inum; 2264 2265 ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); 2266 bch_err_msg(c, ret, "writing root inode"); 2267 } 2268 err: 2269 fsck_err: 2270 return ret; 2271 } 2272 2273 /* Get root directory, create if it doesn't exist: */ 2274 int bch2_check_root(struct bch_fs *c) 2275 { 2276 int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2277 check_root_trans(trans)); 2278 bch_err_fn(c, ret); 2279 return ret; 2280 } 2281 2282 typedef DARRAY(u32) darray_u32; 2283 2284 static bool darray_u32_has(darray_u32 *d, u32 v) 2285 { 2286 darray_for_each(*d, i) 2287 if (*i == v) 2288 return true; 2289 return false; 2290 } 2291 2292 /* 2293 * We've checked that inode backpointers point to valid dirents; here, it's 2294 * sufficient to check that the subvolume root has a dirent: 2295 */ 2296 static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s) 2297 { 2298 struct bch_inode_unpacked inode; 2299 int ret = bch2_inode_find_by_inum_trans(trans, 2300 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 2301 &inode); 2302 if (ret) 2303 return ret; 2304 2305 return inode.bi_dir != 0; 2306 } 2307 2308 static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) 2309 { 2310 struct bch_fs *c = trans->c; 2311 struct btree_iter parent_iter = {}; 2312 darray_u32 subvol_path = {}; 2313 struct printbuf buf = PRINTBUF; 2314 int ret = 0; 2315 2316 if (k.k->type != KEY_TYPE_subvolume) 2317 return 0; 2318 2319 while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { 2320 ret = darray_push(&subvol_path, k.k->p.offset); 2321 if (ret) 2322 goto err; 2323 2324 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 2325 2326 ret = subvol_has_dirent(trans, s); 2327 if (ret < 0) 2328 break; 2329 2330 if (fsck_err_on(!ret, 2331 c, subvol_unreachable, 2332 "unreachable subvolume %s", 2333 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2334 buf.buf))) { 2335 ret = reattach_subvol(trans, s); 2336 break; 2337 } 2338 2339 u32 parent = le32_to_cpu(s.v->fs_path_parent); 2340 2341 if (darray_u32_has(&subvol_path, parent)) { 2342 if (fsck_err(c, subvol_loop, "subvolume loop")) 2343 ret = reattach_subvol(trans, s); 2344 break; 2345 } 2346 2347 bch2_trans_iter_exit(trans, &parent_iter); 2348 bch2_trans_iter_init(trans, &parent_iter, 2349 BTREE_ID_subvolumes, POS(0, parent), 0); 2350 k = bch2_btree_iter_peek_slot(&parent_iter); 2351 ret = bkey_err(k); 2352 if (ret) 2353 goto err; 2354 2355 if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, 2356 c, subvol_unreachable, 2357 "unreachable subvolume %s", 2358 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2359 buf.buf))) { 2360 ret = reattach_subvol(trans, s); 2361 break; 2362 } 2363 } 2364 fsck_err: 2365 err: 2366 printbuf_exit(&buf); 2367 darray_exit(&subvol_path); 2368 bch2_trans_iter_exit(trans, &parent_iter); 2369 return ret; 2370 } 2371 2372 int bch2_check_subvolume_structure(struct bch_fs *c) 2373 { 2374 int ret = bch2_trans_run(c, 2375 for_each_btree_key_commit(trans, iter, 2376 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, 2377 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2378 check_subvol_path(trans, &iter, k))); 2379 bch_err_fn(c, ret); 2380 return ret; 2381 } 2382 2383 struct pathbuf_entry { 2384 u64 inum; 2385 u32 snapshot; 2386 }; 2387 2388 typedef DARRAY(struct pathbuf_entry) pathbuf; 2389 2390 static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) 2391 { 2392 darray_for_each(*p, i) 2393 if (i->inum == inum && 2394 i->snapshot == snapshot) 2395 return true; 2396 return false; 2397 } 2398 2399 /* 2400 * Check that a given inode is reachable from its subvolume root - we already 2401 * verified subvolume connectivity: 2402 * 2403 * XXX: we should also be verifying that inodes are in the right subvolumes 2404 */ 2405 static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) 2406 { 2407 struct bch_fs *c = trans->c; 2408 struct btree_iter inode_iter = {}; 2409 struct bch_inode_unpacked inode; 2410 struct printbuf buf = PRINTBUF; 2411 u32 snapshot = inode_k.k->p.snapshot; 2412 int ret = 0; 2413 2414 p->nr = 0; 2415 2416 BUG_ON(bch2_inode_unpack(inode_k, &inode)); 2417 2418 while (!inode.bi_subvol) { 2419 struct btree_iter dirent_iter; 2420 struct bkey_s_c_dirent d; 2421 u32 parent_snapshot = snapshot; 2422 2423 d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); 2424 ret = bkey_err(d.s_c); 2425 if (ret && !bch2_err_matches(ret, ENOENT)) 2426 break; 2427 2428 if (!ret && !dirent_points_to_inode(d, &inode)) { 2429 bch2_trans_iter_exit(trans, &dirent_iter); 2430 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 2431 } 2432 2433 if (bch2_err_matches(ret, ENOENT)) { 2434 ret = 0; 2435 if (fsck_err(c, inode_unreachable, 2436 "unreachable inode\n%s", 2437 (printbuf_reset(&buf), 2438 bch2_bkey_val_to_text(&buf, c, inode_k), 2439 buf.buf))) 2440 ret = reattach_inode(trans, &inode, snapshot); 2441 goto out; 2442 } 2443 2444 bch2_trans_iter_exit(trans, &dirent_iter); 2445 2446 if (!S_ISDIR(inode.bi_mode)) 2447 break; 2448 2449 ret = darray_push(p, ((struct pathbuf_entry) { 2450 .inum = inode.bi_inum, 2451 .snapshot = snapshot, 2452 })); 2453 if (ret) 2454 return ret; 2455 2456 snapshot = parent_snapshot; 2457 2458 bch2_trans_iter_exit(trans, &inode_iter); 2459 inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, 2460 SPOS(0, inode.bi_dir, snapshot), 0); 2461 ret = bkey_err(inode_k) ?: 2462 !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode 2463 : bch2_inode_unpack(inode_k, &inode); 2464 if (ret) { 2465 /* Should have been caught in dirents pass */ 2466 if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2467 bch_err(c, "error looking up parent directory: %i", ret); 2468 break; 2469 } 2470 2471 snapshot = inode_k.k->p.snapshot; 2472 2473 if (path_is_dup(p, inode.bi_inum, snapshot)) { 2474 /* XXX print path */ 2475 bch_err(c, "directory structure loop"); 2476 2477 darray_for_each(*p, i) 2478 pr_err("%llu:%u", i->inum, i->snapshot); 2479 pr_err("%llu:%u", inode.bi_inum, snapshot); 2480 2481 if (fsck_err(c, dir_loop, "directory structure loop")) { 2482 ret = remove_backpointer(trans, &inode); 2483 bch_err_msg(c, ret, "removing dirent"); 2484 if (ret) 2485 break; 2486 2487 ret = reattach_inode(trans, &inode, snapshot); 2488 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 2489 } 2490 break; 2491 } 2492 } 2493 out: 2494 fsck_err: 2495 bch2_trans_iter_exit(trans, &inode_iter); 2496 printbuf_exit(&buf); 2497 bch_err_fn(c, ret); 2498 return ret; 2499 } 2500 2501 /* 2502 * Check for unreachable inodes, as well as loops in the directory structure: 2503 * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's 2504 * unreachable: 2505 */ 2506 int bch2_check_directory_structure(struct bch_fs *c) 2507 { 2508 pathbuf path = { 0, }; 2509 int ret; 2510 2511 ret = bch2_trans_run(c, 2512 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, 2513 BTREE_ITER_intent| 2514 BTREE_ITER_prefetch| 2515 BTREE_ITER_all_snapshots, k, 2516 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 2517 if (!bkey_is_inode(k.k)) 2518 continue; 2519 2520 if (bch2_inode_flags(k) & BCH_INODE_unlinked) 2521 continue; 2522 2523 check_path(trans, &path, k); 2524 }))); 2525 darray_exit(&path); 2526 2527 bch_err_fn(c, ret); 2528 return ret; 2529 } 2530 2531 struct nlink_table { 2532 size_t nr; 2533 size_t size; 2534 2535 struct nlink { 2536 u64 inum; 2537 u32 snapshot; 2538 u32 count; 2539 } *d; 2540 }; 2541 2542 static int add_nlink(struct bch_fs *c, struct nlink_table *t, 2543 u64 inum, u32 snapshot) 2544 { 2545 if (t->nr == t->size) { 2546 size_t new_size = max_t(size_t, 128UL, t->size * 2); 2547 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); 2548 2549 if (!d) { 2550 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", 2551 new_size); 2552 return -BCH_ERR_ENOMEM_fsck_add_nlink; 2553 } 2554 2555 if (t->d) 2556 memcpy(d, t->d, t->size * sizeof(t->d[0])); 2557 kvfree(t->d); 2558 2559 t->d = d; 2560 t->size = new_size; 2561 } 2562 2563 2564 t->d[t->nr++] = (struct nlink) { 2565 .inum = inum, 2566 .snapshot = snapshot, 2567 }; 2568 2569 return 0; 2570 } 2571 2572 static int nlink_cmp(const void *_l, const void *_r) 2573 { 2574 const struct nlink *l = _l; 2575 const struct nlink *r = _r; 2576 2577 return cmp_int(l->inum, r->inum); 2578 } 2579 2580 static void inc_link(struct bch_fs *c, struct snapshots_seen *s, 2581 struct nlink_table *links, 2582 u64 range_start, u64 range_end, u64 inum, u32 snapshot) 2583 { 2584 struct nlink *link, key = { 2585 .inum = inum, .snapshot = U32_MAX, 2586 }; 2587 2588 if (inum < range_start || inum >= range_end) 2589 return; 2590 2591 link = __inline_bsearch(&key, links->d, links->nr, 2592 sizeof(links->d[0]), nlink_cmp); 2593 if (!link) 2594 return; 2595 2596 while (link > links->d && link[0].inum == link[-1].inum) 2597 --link; 2598 2599 for (; link < links->d + links->nr && link->inum == inum; link++) 2600 if (ref_visible(c, s, snapshot, link->snapshot)) { 2601 link->count++; 2602 if (link->snapshot >= snapshot) 2603 break; 2604 } 2605 } 2606 2607 noinline_for_stack 2608 static int check_nlinks_find_hardlinks(struct bch_fs *c, 2609 struct nlink_table *t, 2610 u64 start, u64 *end) 2611 { 2612 int ret = bch2_trans_run(c, 2613 for_each_btree_key(trans, iter, BTREE_ID_inodes, 2614 POS(0, start), 2615 BTREE_ITER_intent| 2616 BTREE_ITER_prefetch| 2617 BTREE_ITER_all_snapshots, k, ({ 2618 if (!bkey_is_inode(k.k)) 2619 continue; 2620 2621 /* Should never fail, checked by bch2_inode_invalid: */ 2622 struct bch_inode_unpacked u; 2623 BUG_ON(bch2_inode_unpack(k, &u)); 2624 2625 /* 2626 * Backpointer and directory structure checks are sufficient for 2627 * directories, since they can't have hardlinks: 2628 */ 2629 if (S_ISDIR(u.bi_mode)) 2630 continue; 2631 2632 if (!u.bi_nlink) 2633 continue; 2634 2635 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); 2636 if (ret) { 2637 *end = k.k->p.offset; 2638 ret = 0; 2639 break; 2640 } 2641 0; 2642 }))); 2643 2644 bch_err_fn(c, ret); 2645 return ret; 2646 } 2647 2648 noinline_for_stack 2649 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, 2650 u64 range_start, u64 range_end) 2651 { 2652 struct snapshots_seen s; 2653 2654 snapshots_seen_init(&s); 2655 2656 int ret = bch2_trans_run(c, 2657 for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, 2658 BTREE_ITER_intent| 2659 BTREE_ITER_prefetch| 2660 BTREE_ITER_all_snapshots, k, ({ 2661 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); 2662 if (ret) 2663 break; 2664 2665 if (k.k->type == KEY_TYPE_dirent) { 2666 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2667 2668 if (d.v->d_type != DT_DIR && 2669 d.v->d_type != DT_SUBVOL) 2670 inc_link(c, &s, links, range_start, range_end, 2671 le64_to_cpu(d.v->d_inum), d.k->p.snapshot); 2672 } 2673 0; 2674 }))); 2675 2676 snapshots_seen_exit(&s); 2677 2678 bch_err_fn(c, ret); 2679 return ret; 2680 } 2681 2682 static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, 2683 struct bkey_s_c k, 2684 struct nlink_table *links, 2685 size_t *idx, u64 range_end) 2686 { 2687 struct bch_fs *c = trans->c; 2688 struct bch_inode_unpacked u; 2689 struct nlink *link = &links->d[*idx]; 2690 int ret = 0; 2691 2692 if (k.k->p.offset >= range_end) 2693 return 1; 2694 2695 if (!bkey_is_inode(k.k)) 2696 return 0; 2697 2698 BUG_ON(bch2_inode_unpack(k, &u)); 2699 2700 if (S_ISDIR(u.bi_mode)) 2701 return 0; 2702 2703 if (!u.bi_nlink) 2704 return 0; 2705 2706 while ((cmp_int(link->inum, k.k->p.offset) ?: 2707 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { 2708 BUG_ON(*idx == links->nr); 2709 link = &links->d[++*idx]; 2710 } 2711 2712 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, 2713 c, inode_wrong_nlink, 2714 "inode %llu type %s has wrong i_nlink (%u, should be %u)", 2715 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], 2716 bch2_inode_nlink_get(&u), link->count)) { 2717 bch2_inode_nlink_set(&u, link->count); 2718 ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); 2719 } 2720 fsck_err: 2721 return ret; 2722 } 2723 2724 noinline_for_stack 2725 static int check_nlinks_update_hardlinks(struct bch_fs *c, 2726 struct nlink_table *links, 2727 u64 range_start, u64 range_end) 2728 { 2729 size_t idx = 0; 2730 2731 int ret = bch2_trans_run(c, 2732 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 2733 POS(0, range_start), 2734 BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 2735 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2736 check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); 2737 if (ret < 0) { 2738 bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); 2739 return ret; 2740 } 2741 2742 return 0; 2743 } 2744 2745 int bch2_check_nlinks(struct bch_fs *c) 2746 { 2747 struct nlink_table links = { 0 }; 2748 u64 this_iter_range_start, next_iter_range_start = 0; 2749 int ret = 0; 2750 2751 do { 2752 this_iter_range_start = next_iter_range_start; 2753 next_iter_range_start = U64_MAX; 2754 2755 ret = check_nlinks_find_hardlinks(c, &links, 2756 this_iter_range_start, 2757 &next_iter_range_start); 2758 2759 ret = check_nlinks_walk_dirents(c, &links, 2760 this_iter_range_start, 2761 next_iter_range_start); 2762 if (ret) 2763 break; 2764 2765 ret = check_nlinks_update_hardlinks(c, &links, 2766 this_iter_range_start, 2767 next_iter_range_start); 2768 if (ret) 2769 break; 2770 2771 links.nr = 0; 2772 } while (next_iter_range_start != U64_MAX); 2773 2774 kvfree(links.d); 2775 bch_err_fn(c, ret); 2776 return ret; 2777 } 2778 2779 static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, 2780 struct bkey_s_c k) 2781 { 2782 struct bkey_s_c_reflink_p p; 2783 struct bkey_i_reflink_p *u; 2784 2785 if (k.k->type != KEY_TYPE_reflink_p) 2786 return 0; 2787 2788 p = bkey_s_c_to_reflink_p(k); 2789 2790 if (!p.v->front_pad && !p.v->back_pad) 2791 return 0; 2792 2793 u = bch2_trans_kmalloc(trans, sizeof(*u)); 2794 int ret = PTR_ERR_OR_ZERO(u); 2795 if (ret) 2796 return ret; 2797 2798 bkey_reassemble(&u->k_i, k); 2799 u->v.front_pad = 0; 2800 u->v.back_pad = 0; 2801 2802 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_norun); 2803 } 2804 2805 int bch2_fix_reflink_p(struct bch_fs *c) 2806 { 2807 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) 2808 return 0; 2809 2810 int ret = bch2_trans_run(c, 2811 for_each_btree_key_commit(trans, iter, 2812 BTREE_ID_extents, POS_MIN, 2813 BTREE_ITER_intent|BTREE_ITER_prefetch| 2814 BTREE_ITER_all_snapshots, k, 2815 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2816 fix_reflink_p_key(trans, &iter, k))); 2817 bch_err_fn(c, ret); 2818 return ret; 2819 } 2820