1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bkey_buf.h" 5 #include "btree_cache.h" 6 #include "btree_update.h" 7 #include "buckets.h" 8 #include "darray.h" 9 #include "dirent.h" 10 #include "error.h" 11 #include "fs-common.h" 12 #include "fsck.h" 13 #include "inode.h" 14 #include "keylist.h" 15 #include "recovery.h" 16 #include "snapshot.h" 17 #include "super.h" 18 #include "xattr.h" 19 20 #include <linux/bsearch.h> 21 #include <linux/dcache.h> /* struct qstr */ 22 23 /* 24 * XXX: this is handling transaction restarts without returning 25 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: 26 */ 27 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, 28 u32 snapshot) 29 { 30 u64 sectors = 0; 31 32 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_extents, 33 SPOS(inum, 0, snapshot), 34 POS(inum, U64_MAX), 35 0, k, ({ 36 if (bkey_extent_is_allocation(k.k)) 37 sectors += k.k->size; 38 0; 39 })); 40 41 return ret ?: sectors; 42 } 43 44 static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, 45 u32 snapshot) 46 { 47 u64 subdirs = 0; 48 49 int ret = for_each_btree_key_upto(trans, iter, BTREE_ID_dirents, 50 SPOS(inum, 0, snapshot), 51 POS(inum, U64_MAX), 52 0, k, ({ 53 if (k.k->type == KEY_TYPE_dirent && 54 bkey_s_c_to_dirent(k).v->d_type == DT_DIR) 55 subdirs++; 56 0; 57 })); 58 59 return ret ?: subdirs; 60 } 61 62 static int subvol_lookup(struct btree_trans *trans, u32 subvol, 63 u32 *snapshot, u64 *inum) 64 { 65 struct bch_subvolume s; 66 int ret; 67 68 ret = bch2_subvolume_get(trans, subvol, false, 0, &s); 69 70 *snapshot = le32_to_cpu(s.snapshot); 71 *inum = le64_to_cpu(s.inode); 72 return ret; 73 } 74 75 static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, 76 struct bch_inode_unpacked *inode) 77 { 78 struct btree_iter iter; 79 struct bkey_s_c k; 80 int ret; 81 82 bch2_trans_iter_init(trans, &iter, BTREE_ID_inodes, 83 POS(0, inode_nr), 84 BTREE_ITER_ALL_SNAPSHOTS); 85 k = bch2_btree_iter_peek(&iter); 86 ret = bkey_err(k); 87 if (ret) 88 goto err; 89 90 if (!k.k || !bkey_eq(k.k->p, POS(0, inode_nr))) { 91 ret = -BCH_ERR_ENOENT_inode; 92 goto err; 93 } 94 95 ret = bch2_inode_unpack(k, inode); 96 err: 97 bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); 98 bch2_trans_iter_exit(trans, &iter); 99 return ret; 100 } 101 102 static int lookup_inode(struct btree_trans *trans, u64 inode_nr, 103 struct bch_inode_unpacked *inode, 104 u32 *snapshot) 105 { 106 struct btree_iter iter; 107 struct bkey_s_c k; 108 int ret; 109 110 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 111 SPOS(0, inode_nr, *snapshot), 0); 112 ret = bkey_err(k); 113 if (ret) 114 goto err; 115 116 ret = bkey_is_inode(k.k) 117 ? bch2_inode_unpack(k, inode) 118 : -BCH_ERR_ENOENT_inode; 119 if (!ret) 120 *snapshot = iter.pos.snapshot; 121 err: 122 bch2_trans_iter_exit(trans, &iter); 123 return ret; 124 } 125 126 static int lookup_dirent_in_snapshot(struct btree_trans *trans, 127 struct bch_hash_info hash_info, 128 subvol_inum dir, struct qstr *name, 129 u64 *target, unsigned *type, u32 snapshot) 130 { 131 struct btree_iter iter; 132 struct bkey_s_c_dirent d; 133 int ret = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, 134 &hash_info, dir, name, 0, snapshot); 135 if (ret) 136 return ret; 137 138 d = bkey_s_c_to_dirent(bch2_btree_iter_peek_slot(&iter)); 139 *target = le64_to_cpu(d.v->d_inum); 140 *type = d.v->d_type; 141 bch2_trans_iter_exit(trans, &iter); 142 return 0; 143 } 144 145 static int __remove_dirent(struct btree_trans *trans, struct bpos pos) 146 { 147 struct bch_fs *c = trans->c; 148 struct btree_iter iter; 149 struct bch_inode_unpacked dir_inode; 150 struct bch_hash_info dir_hash_info; 151 int ret; 152 153 ret = lookup_first_inode(trans, pos.inode, &dir_inode); 154 if (ret) 155 goto err; 156 157 dir_hash_info = bch2_hash_info_init(c, &dir_inode); 158 159 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_INTENT); 160 161 ret = bch2_hash_delete_at(trans, bch2_dirent_hash_desc, 162 &dir_hash_info, &iter, 163 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 164 bch2_trans_iter_exit(trans, &iter); 165 err: 166 bch_err_fn(c, ret); 167 return ret; 168 } 169 170 /* Get lost+found, create if it doesn't exist: */ 171 static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, 172 struct bch_inode_unpacked *lostfound) 173 { 174 struct bch_fs *c = trans->c; 175 struct qstr lostfound_str = QSTR("lost+found"); 176 u64 inum = 0; 177 unsigned d_type = 0; 178 int ret; 179 180 struct bch_snapshot_tree st; 181 ret = bch2_snapshot_tree_lookup(trans, 182 bch2_snapshot_tree(c, snapshot), &st); 183 if (ret) 184 return ret; 185 186 subvol_inum root_inum = { .subvol = le32_to_cpu(st.master_subvol) }; 187 u32 subvol_snapshot; 188 189 ret = subvol_lookup(trans, le32_to_cpu(st.master_subvol), 190 &subvol_snapshot, &root_inum.inum); 191 bch_err_msg(c, ret, "looking up root subvol"); 192 if (ret) 193 return ret; 194 195 struct bch_inode_unpacked root_inode; 196 struct bch_hash_info root_hash_info; 197 u32 root_inode_snapshot = snapshot; 198 ret = lookup_inode(trans, root_inum.inum, &root_inode, &root_inode_snapshot); 199 bch_err_msg(c, ret, "looking up root inode"); 200 if (ret) 201 return ret; 202 203 root_hash_info = bch2_hash_info_init(c, &root_inode); 204 205 ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum, 206 &lostfound_str, &inum, &d_type, snapshot); 207 if (bch2_err_matches(ret, ENOENT)) 208 goto create_lostfound; 209 210 bch_err_fn(c, ret); 211 if (ret) 212 return ret; 213 214 if (d_type != DT_DIR) { 215 bch_err(c, "error looking up lost+found: not a directory"); 216 return -BCH_ERR_ENOENT_not_directory; 217 } 218 219 /* 220 * The bch2_check_dirents pass has already run, dangling dirents 221 * shouldn't exist here: 222 */ 223 ret = lookup_inode(trans, inum, lostfound, &snapshot); 224 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", 225 inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); 226 return ret; 227 228 create_lostfound: 229 /* 230 * XXX: we could have a nicer log message here if we had a nice way to 231 * walk backpointers to print a path 232 */ 233 bch_notice(c, "creating lost+found in snapshot %u", le32_to_cpu(st.root_snapshot)); 234 235 u64 now = bch2_current_time(c); 236 struct btree_iter lostfound_iter = { NULL }; 237 u64 cpu = raw_smp_processor_id(); 238 239 bch2_inode_init_early(c, lostfound); 240 bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); 241 lostfound->bi_dir = root_inode.bi_inum; 242 243 root_inode.bi_nlink++; 244 245 ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); 246 if (ret) 247 goto err; 248 249 bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); 250 ret = bch2_btree_iter_traverse(&lostfound_iter); 251 if (ret) 252 goto err; 253 254 ret = bch2_dirent_create_snapshot(trans, 255 0, root_inode.bi_inum, snapshot, &root_hash_info, 256 mode_to_type(lostfound->bi_mode), 257 &lostfound_str, 258 lostfound->bi_inum, 259 &lostfound->bi_dir_offset, 260 BCH_HASH_SET_MUST_CREATE) ?: 261 bch2_inode_write_flags(trans, &lostfound_iter, lostfound, 262 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 263 err: 264 bch_err_msg(c, ret, "creating lost+found"); 265 bch2_trans_iter_exit(trans, &lostfound_iter); 266 return ret; 267 } 268 269 static int reattach_inode(struct btree_trans *trans, 270 struct bch_inode_unpacked *inode, 271 u32 inode_snapshot) 272 { 273 struct bch_hash_info dir_hash; 274 struct bch_inode_unpacked lostfound; 275 char name_buf[20]; 276 struct qstr name; 277 u64 dir_offset = 0; 278 u32 dirent_snapshot = inode_snapshot; 279 int ret; 280 281 if (inode->bi_subvol) { 282 inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; 283 284 u64 root_inum; 285 ret = subvol_lookup(trans, inode->bi_parent_subvol, 286 &dirent_snapshot, &root_inum); 287 if (ret) 288 return ret; 289 290 snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol); 291 } else { 292 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); 293 } 294 295 ret = lookup_lostfound(trans, dirent_snapshot, &lostfound); 296 if (ret) 297 return ret; 298 299 if (S_ISDIR(inode->bi_mode)) { 300 lostfound.bi_nlink++; 301 302 ret = __bch2_fsck_write_inode(trans, &lostfound, U32_MAX); 303 if (ret) 304 return ret; 305 } 306 307 dir_hash = bch2_hash_info_init(trans->c, &lostfound); 308 309 name = (struct qstr) QSTR(name_buf); 310 311 ret = bch2_dirent_create_snapshot(trans, 312 inode->bi_parent_subvol, lostfound.bi_inum, 313 dirent_snapshot, 314 &dir_hash, 315 inode_d_type(inode), 316 &name, 317 inode->bi_subvol ?: inode->bi_inum, 318 &dir_offset, 319 BCH_HASH_SET_MUST_CREATE); 320 if (ret) 321 return ret; 322 323 inode->bi_dir = lostfound.bi_inum; 324 inode->bi_dir_offset = dir_offset; 325 326 return __bch2_fsck_write_inode(trans, inode, inode_snapshot); 327 } 328 329 static int remove_backpointer(struct btree_trans *trans, 330 struct bch_inode_unpacked *inode) 331 { 332 struct btree_iter iter; 333 struct bkey_s_c_dirent d; 334 int ret; 335 336 d = bch2_bkey_get_iter_typed(trans, &iter, BTREE_ID_dirents, 337 POS(inode->bi_dir, inode->bi_dir_offset), 0, 338 dirent); 339 ret = bkey_err(d) ?: 340 __remove_dirent(trans, d.k->p); 341 bch2_trans_iter_exit(trans, &iter); 342 return ret; 343 } 344 345 static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s) 346 { 347 struct bch_fs *c = trans->c; 348 349 struct bch_inode_unpacked inode; 350 int ret = bch2_inode_find_by_inum_trans(trans, 351 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 352 &inode); 353 if (ret) 354 return ret; 355 356 ret = remove_backpointer(trans, &inode); 357 bch_err_msg(c, ret, "removing dirent"); 358 if (ret) 359 return ret; 360 361 ret = reattach_inode(trans, &inode, le32_to_cpu(s.v->snapshot)); 362 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 363 return ret; 364 } 365 366 struct snapshots_seen_entry { 367 u32 id; 368 u32 equiv; 369 }; 370 371 struct snapshots_seen { 372 struct bpos pos; 373 DARRAY(struct snapshots_seen_entry) ids; 374 }; 375 376 static inline void snapshots_seen_exit(struct snapshots_seen *s) 377 { 378 darray_exit(&s->ids); 379 } 380 381 static inline void snapshots_seen_init(struct snapshots_seen *s) 382 { 383 memset(s, 0, sizeof(*s)); 384 } 385 386 static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) 387 { 388 struct snapshots_seen_entry *i, n = { 389 .id = id, 390 .equiv = bch2_snapshot_equiv(c, id), 391 }; 392 int ret = 0; 393 394 __darray_for_each(s->ids, i) { 395 if (i->id == id) 396 return 0; 397 if (i->id > id) 398 break; 399 } 400 401 ret = darray_insert_item(&s->ids, i - s->ids.data, n); 402 if (ret) 403 bch_err(c, "error reallocating snapshots_seen table (size %zu)", 404 s->ids.size); 405 return ret; 406 } 407 408 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, 409 enum btree_id btree_id, struct bpos pos) 410 { 411 struct snapshots_seen_entry n = { 412 .id = pos.snapshot, 413 .equiv = bch2_snapshot_equiv(c, pos.snapshot), 414 }; 415 int ret = 0; 416 417 if (!bkey_eq(s->pos, pos)) 418 s->ids.nr = 0; 419 420 s->pos = pos; 421 s->pos.snapshot = n.equiv; 422 423 darray_for_each(s->ids, i) { 424 if (i->id == n.id) 425 return 0; 426 427 /* 428 * We currently don't rigorously track for snapshot cleanup 429 * needing to be run, so it shouldn't be a fsck error yet: 430 */ 431 if (i->equiv == n.equiv) { 432 bch_err(c, "snapshot deletion did not finish:\n" 433 " duplicate keys in btree %s at %llu:%llu snapshots %u, %u (equiv %u)\n", 434 bch2_btree_id_str(btree_id), 435 pos.inode, pos.offset, 436 i->id, n.id, n.equiv); 437 set_bit(BCH_FS_need_delete_dead_snapshots, &c->flags); 438 return bch2_run_explicit_recovery_pass(c, BCH_RECOVERY_PASS_delete_dead_snapshots); 439 } 440 } 441 442 ret = darray_push(&s->ids, n); 443 if (ret) 444 bch_err(c, "error reallocating snapshots_seen table (size %zu)", 445 s->ids.size); 446 return ret; 447 } 448 449 /** 450 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, 451 * and @ancestor hasn't been overwritten in @seen 452 * 453 * @c: filesystem handle 454 * @seen: list of snapshot ids already seen at current position 455 * @id: descendent snapshot id 456 * @ancestor: ancestor snapshot id 457 * 458 * Returns: whether key in @ancestor snapshot is visible in @id snapshot 459 */ 460 static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, 461 u32 id, u32 ancestor) 462 { 463 ssize_t i; 464 465 EBUG_ON(id > ancestor); 466 EBUG_ON(!bch2_snapshot_is_equiv(c, id)); 467 EBUG_ON(!bch2_snapshot_is_equiv(c, ancestor)); 468 469 /* @ancestor should be the snapshot most recently added to @seen */ 470 EBUG_ON(ancestor != seen->pos.snapshot); 471 EBUG_ON(ancestor != seen->ids.data[seen->ids.nr - 1].equiv); 472 473 if (id == ancestor) 474 return true; 475 476 if (!bch2_snapshot_is_ancestor(c, id, ancestor)) 477 return false; 478 479 /* 480 * We know that @id is a descendant of @ancestor, we're checking if 481 * we've seen a key that overwrote @ancestor - i.e. also a descendent of 482 * @ascestor and with @id as a descendent. 483 * 484 * But we already know that we're scanning IDs between @id and @ancestor 485 * numerically, since snapshot ID lists are kept sorted, so if we find 486 * an id that's an ancestor of @id we're done: 487 */ 488 489 for (i = seen->ids.nr - 2; 490 i >= 0 && seen->ids.data[i].equiv >= id; 491 --i) 492 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i].equiv)) 493 return false; 494 495 return true; 496 } 497 498 /** 499 * ref_visible - given a key with snapshot id @src that points to a key with 500 * snapshot id @dst, test whether there is some snapshot in which @dst is 501 * visible. 502 * 503 * @c: filesystem handle 504 * @s: list of snapshot IDs already seen at @src 505 * @src: snapshot ID of src key 506 * @dst: snapshot ID of dst key 507 * Returns: true if there is some snapshot in which @dst is visible 508 * 509 * Assumes we're visiting @src keys in natural key order 510 */ 511 static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, 512 u32 src, u32 dst) 513 { 514 return dst <= src 515 ? key_visible_in_snapshot(c, s, dst, src) 516 : bch2_snapshot_is_ancestor(c, src, dst); 517 } 518 519 static int ref_visible2(struct bch_fs *c, 520 u32 src, struct snapshots_seen *src_seen, 521 u32 dst, struct snapshots_seen *dst_seen) 522 { 523 src = bch2_snapshot_equiv(c, src); 524 dst = bch2_snapshot_equiv(c, dst); 525 526 if (dst > src) { 527 swap(dst, src); 528 swap(dst_seen, src_seen); 529 } 530 return key_visible_in_snapshot(c, src_seen, dst, src); 531 } 532 533 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ 534 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ 535 (_i)->snapshot <= (_snapshot); _i++) \ 536 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) 537 538 struct inode_walker_entry { 539 struct bch_inode_unpacked inode; 540 u32 snapshot; 541 bool seen_this_pos; 542 u64 count; 543 }; 544 545 struct inode_walker { 546 bool first_this_inode; 547 bool recalculate_sums; 548 struct bpos last_pos; 549 550 DARRAY(struct inode_walker_entry) inodes; 551 }; 552 553 static void inode_walker_exit(struct inode_walker *w) 554 { 555 darray_exit(&w->inodes); 556 } 557 558 static struct inode_walker inode_walker_init(void) 559 { 560 return (struct inode_walker) { 0, }; 561 } 562 563 static int add_inode(struct bch_fs *c, struct inode_walker *w, 564 struct bkey_s_c inode) 565 { 566 struct bch_inode_unpacked u; 567 568 BUG_ON(bch2_inode_unpack(inode, &u)); 569 570 return darray_push(&w->inodes, ((struct inode_walker_entry) { 571 .inode = u, 572 .snapshot = bch2_snapshot_equiv(c, inode.k->p.snapshot), 573 })); 574 } 575 576 static int get_inodes_all_snapshots(struct btree_trans *trans, 577 struct inode_walker *w, u64 inum) 578 { 579 struct bch_fs *c = trans->c; 580 struct btree_iter iter; 581 struct bkey_s_c k; 582 int ret; 583 584 w->recalculate_sums = false; 585 w->inodes.nr = 0; 586 587 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 588 BTREE_ITER_ALL_SNAPSHOTS, k, ret) { 589 if (k.k->p.offset != inum) 590 break; 591 592 if (bkey_is_inode(k.k)) 593 add_inode(c, w, k); 594 } 595 bch2_trans_iter_exit(trans, &iter); 596 597 if (ret) 598 return ret; 599 600 w->first_this_inode = true; 601 return 0; 602 } 603 604 static struct inode_walker_entry * 605 lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k) 606 { 607 bool is_whiteout = k.k->type == KEY_TYPE_whiteout; 608 u32 snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); 609 610 struct inode_walker_entry *i; 611 __darray_for_each(w->inodes, i) 612 if (bch2_snapshot_is_ancestor(c, snapshot, i->snapshot)) 613 goto found; 614 615 return NULL; 616 found: 617 BUG_ON(snapshot > i->snapshot); 618 619 if (snapshot != i->snapshot && !is_whiteout) { 620 struct inode_walker_entry new = *i; 621 622 new.snapshot = snapshot; 623 new.count = 0; 624 625 struct printbuf buf = PRINTBUF; 626 bch2_bkey_val_to_text(&buf, c, k); 627 628 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" 629 "unexpected because we should always update the inode when we update a key in that inode\n" 630 "%s", 631 w->last_pos.inode, snapshot, i->snapshot, buf.buf); 632 printbuf_exit(&buf); 633 634 while (i > w->inodes.data && i[-1].snapshot > snapshot) 635 --i; 636 637 size_t pos = i - w->inodes.data; 638 int ret = darray_insert_item(&w->inodes, pos, new); 639 if (ret) 640 return ERR_PTR(ret); 641 642 i = w->inodes.data + pos; 643 } 644 645 return i; 646 } 647 648 static struct inode_walker_entry *walk_inode(struct btree_trans *trans, 649 struct inode_walker *w, 650 struct bkey_s_c k) 651 { 652 if (w->last_pos.inode != k.k->p.inode) { 653 int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); 654 if (ret) 655 return ERR_PTR(ret); 656 } else if (bkey_cmp(w->last_pos, k.k->p)) { 657 darray_for_each(w->inodes, i) 658 i->seen_this_pos = false; 659 } 660 661 w->last_pos = k.k->p; 662 663 return lookup_inode_for_snapshot(trans->c, w, k); 664 } 665 666 static int __get_visible_inodes(struct btree_trans *trans, 667 struct inode_walker *w, 668 struct snapshots_seen *s, 669 u64 inum) 670 { 671 struct bch_fs *c = trans->c; 672 struct btree_iter iter; 673 struct bkey_s_c k; 674 int ret; 675 676 w->inodes.nr = 0; 677 678 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 679 BTREE_ITER_ALL_SNAPSHOTS, k, ret) { 680 u32 equiv = bch2_snapshot_equiv(c, k.k->p.snapshot); 681 682 if (k.k->p.offset != inum) 683 break; 684 685 if (!ref_visible(c, s, s->pos.snapshot, equiv)) 686 continue; 687 688 if (bkey_is_inode(k.k)) 689 add_inode(c, w, k); 690 691 if (equiv >= s->pos.snapshot) 692 break; 693 } 694 bch2_trans_iter_exit(trans, &iter); 695 696 return ret; 697 } 698 699 static int check_key_has_snapshot(struct btree_trans *trans, 700 struct btree_iter *iter, 701 struct bkey_s_c k) 702 { 703 struct bch_fs *c = trans->c; 704 struct printbuf buf = PRINTBUF; 705 int ret = 0; 706 707 if (mustfix_fsck_err_on(!bch2_snapshot_equiv(c, k.k->p.snapshot), c, 708 bkey_in_missing_snapshot, 709 "key in missing snapshot: %s", 710 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 711 ret = bch2_btree_delete_at(trans, iter, 712 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 1; 713 fsck_err: 714 printbuf_exit(&buf); 715 return ret; 716 } 717 718 static int hash_redo_key(struct btree_trans *trans, 719 const struct bch_hash_desc desc, 720 struct bch_hash_info *hash_info, 721 struct btree_iter *k_iter, struct bkey_s_c k) 722 { 723 struct bkey_i *delete; 724 struct bkey_i *tmp; 725 726 delete = bch2_trans_kmalloc(trans, sizeof(*delete)); 727 if (IS_ERR(delete)) 728 return PTR_ERR(delete); 729 730 tmp = bch2_bkey_make_mut_noupdate(trans, k); 731 if (IS_ERR(tmp)) 732 return PTR_ERR(tmp); 733 734 bkey_init(&delete->k); 735 delete->k.p = k_iter->pos; 736 return bch2_btree_iter_traverse(k_iter) ?: 737 bch2_trans_update(trans, k_iter, delete, 0) ?: 738 bch2_hash_set_in_snapshot(trans, desc, hash_info, 739 (subvol_inum) { 0, k.k->p.inode }, 740 k.k->p.snapshot, tmp, 741 BCH_HASH_SET_MUST_CREATE, 742 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE) ?: 743 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 744 } 745 746 static int hash_check_key(struct btree_trans *trans, 747 const struct bch_hash_desc desc, 748 struct bch_hash_info *hash_info, 749 struct btree_iter *k_iter, struct bkey_s_c hash_k) 750 { 751 struct bch_fs *c = trans->c; 752 struct btree_iter iter = { NULL }; 753 struct printbuf buf = PRINTBUF; 754 struct bkey_s_c k; 755 u64 hash; 756 int ret = 0; 757 758 if (hash_k.k->type != desc.key_type) 759 return 0; 760 761 hash = desc.hash_bkey(hash_info, hash_k); 762 763 if (likely(hash == hash_k.k->p.offset)) 764 return 0; 765 766 if (hash_k.k->p.offset < hash) 767 goto bad_hash; 768 769 for_each_btree_key_norestart(trans, iter, desc.btree_id, 770 SPOS(hash_k.k->p.inode, hash, hash_k.k->p.snapshot), 771 BTREE_ITER_SLOTS, k, ret) { 772 if (bkey_eq(k.k->p, hash_k.k->p)) 773 break; 774 775 if (fsck_err_on(k.k->type == desc.key_type && 776 !desc.cmp_bkey(k, hash_k), c, 777 hash_table_key_duplicate, 778 "duplicate hash table keys:\n%s", 779 (printbuf_reset(&buf), 780 bch2_bkey_val_to_text(&buf, c, hash_k), 781 buf.buf))) { 782 ret = bch2_hash_delete_at(trans, desc, hash_info, k_iter, 0) ?: 1; 783 break; 784 } 785 786 if (bkey_deleted(k.k)) { 787 bch2_trans_iter_exit(trans, &iter); 788 goto bad_hash; 789 } 790 } 791 out: 792 bch2_trans_iter_exit(trans, &iter); 793 printbuf_exit(&buf); 794 return ret; 795 bad_hash: 796 if (fsck_err(c, hash_table_key_wrong_offset, 797 "hash table key at wrong offset: btree %s inode %llu offset %llu, hashed to %llu\n%s", 798 bch2_btree_id_str(desc.btree_id), hash_k.k->p.inode, hash_k.k->p.offset, hash, 799 (printbuf_reset(&buf), 800 bch2_bkey_val_to_text(&buf, c, hash_k), buf.buf))) { 801 ret = hash_redo_key(trans, desc, hash_info, k_iter, hash_k); 802 bch_err_fn(c, ret); 803 if (ret) 804 return ret; 805 ret = -BCH_ERR_transaction_restart_nested; 806 } 807 fsck_err: 808 goto out; 809 } 810 811 static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, 812 struct btree_iter *iter, 813 struct bpos pos) 814 { 815 return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); 816 } 817 818 static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, 819 struct btree_iter *iter, 820 struct bch_inode_unpacked *inode, 821 u32 *snapshot) 822 { 823 if (inode->bi_subvol) { 824 u64 inum; 825 int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum); 826 if (ret) 827 return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) }); 828 } 829 830 return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); 831 } 832 833 static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, 834 struct bkey_s_c_dirent d) 835 { 836 return inode->bi_dir == d.k->p.inode && 837 inode->bi_dir_offset == d.k->p.offset; 838 } 839 840 static bool dirent_points_to_inode(struct bkey_s_c_dirent d, 841 struct bch_inode_unpacked *inode) 842 { 843 return d.v->d_type == DT_SUBVOL 844 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol 845 : le64_to_cpu(d.v->d_inum) == inode->bi_inum; 846 } 847 848 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) 849 { 850 struct btree_iter iter; 851 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0); 852 int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set; 853 bch2_trans_iter_exit(trans, &iter); 854 return ret; 855 } 856 857 static int check_inode_dirent_inode(struct btree_trans *trans, struct bkey_s_c inode_k, 858 struct bch_inode_unpacked *inode, 859 u32 inode_snapshot, bool *write_inode) 860 { 861 struct bch_fs *c = trans->c; 862 struct printbuf buf = PRINTBUF; 863 864 struct btree_iter dirent_iter = {}; 865 struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); 866 int ret = bkey_err(d); 867 if (ret && !bch2_err_matches(ret, ENOENT)) 868 return ret; 869 870 if (fsck_err_on(ret, 871 c, inode_points_to_missing_dirent, 872 "inode points to missing dirent\n%s", 873 (bch2_bkey_val_to_text(&buf, c, inode_k), buf.buf)) || 874 fsck_err_on(!ret && !dirent_points_to_inode(d, inode), 875 c, inode_points_to_wrong_dirent, 876 "inode points to dirent that does not point back:\n%s", 877 (bch2_bkey_val_to_text(&buf, c, inode_k), 878 prt_newline(&buf), 879 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 880 /* 881 * We just clear the backpointer fields for now. If we find a 882 * dirent that points to this inode in check_dirents(), we'll 883 * update it then; then when we get to check_path() if the 884 * backpointer is still 0 we'll reattach it. 885 */ 886 inode->bi_dir = 0; 887 inode->bi_dir_offset = 0; 888 inode->bi_flags &= ~BCH_INODE_backptr_untrusted; 889 *write_inode = true; 890 } 891 892 ret = 0; 893 fsck_err: 894 bch2_trans_iter_exit(trans, &dirent_iter); 895 printbuf_exit(&buf); 896 bch_err_fn(c, ret); 897 return ret; 898 } 899 900 static int check_inode(struct btree_trans *trans, 901 struct btree_iter *iter, 902 struct bkey_s_c k, 903 struct bch_inode_unpacked *prev, 904 struct snapshots_seen *s, 905 bool full) 906 { 907 struct bch_fs *c = trans->c; 908 struct bch_inode_unpacked u; 909 bool do_update = false; 910 int ret; 911 912 ret = check_key_has_snapshot(trans, iter, k); 913 if (ret < 0) 914 goto err; 915 if (ret) 916 return 0; 917 918 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 919 if (ret) 920 goto err; 921 922 if (!bkey_is_inode(k.k)) 923 return 0; 924 925 BUG_ON(bch2_inode_unpack(k, &u)); 926 927 if (!full && 928 !(u.bi_flags & (BCH_INODE_i_size_dirty| 929 BCH_INODE_i_sectors_dirty| 930 BCH_INODE_unlinked))) 931 return 0; 932 933 if (prev->bi_inum != u.bi_inum) 934 *prev = u; 935 936 if (fsck_err_on(prev->bi_hash_seed != u.bi_hash_seed || 937 inode_d_type(prev) != inode_d_type(&u), 938 c, inode_snapshot_mismatch, 939 "inodes in different snapshots don't match")) { 940 bch_err(c, "repair not implemented yet"); 941 return -BCH_ERR_fsck_repair_unimplemented; 942 } 943 944 if ((u.bi_flags & (BCH_INODE_i_size_dirty|BCH_INODE_unlinked)) && 945 bch2_key_has_snapshot_overwrites(trans, BTREE_ID_inodes, k.k->p)) { 946 struct bpos new_min_pos; 947 948 ret = bch2_propagate_key_to_snapshot_leaves(trans, iter->btree_id, k, &new_min_pos); 949 if (ret) 950 goto err; 951 952 u.bi_flags &= ~BCH_INODE_i_size_dirty|BCH_INODE_unlinked; 953 954 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 955 956 bch_err_msg(c, ret, "in fsck updating inode"); 957 if (ret) 958 return ret; 959 960 if (!bpos_eq(new_min_pos, POS_MIN)) 961 bch2_btree_iter_set_pos(iter, bpos_predecessor(new_min_pos)); 962 return 0; 963 } 964 965 if (u.bi_flags & BCH_INODE_unlinked) { 966 ret = check_inode_deleted_list(trans, k.k->p); 967 if (ret < 0) 968 return ret; 969 970 fsck_err_on(!ret, c, unlinked_inode_not_on_deleted_list, 971 "inode %llu:%u unlinked, but not on deleted list", 972 u.bi_inum, k.k->p.snapshot); 973 ret = 0; 974 } 975 976 if (u.bi_flags & BCH_INODE_unlinked && 977 (!c->sb.clean || 978 fsck_err(c, inode_unlinked_but_clean, 979 "filesystem marked clean, but inode %llu unlinked", 980 u.bi_inum))) { 981 ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 982 bch_err_msg(c, ret, "in fsck deleting inode"); 983 return ret; 984 } 985 986 if (u.bi_flags & BCH_INODE_i_size_dirty && 987 (!c->sb.clean || 988 fsck_err(c, inode_i_size_dirty_but_clean, 989 "filesystem marked clean, but inode %llu has i_size dirty", 990 u.bi_inum))) { 991 bch_verbose(c, "truncating inode %llu", u.bi_inum); 992 993 /* 994 * XXX: need to truncate partial blocks too here - or ideally 995 * just switch units to bytes and that issue goes away 996 */ 997 ret = bch2_btree_delete_range_trans(trans, BTREE_ID_extents, 998 SPOS(u.bi_inum, round_up(u.bi_size, block_bytes(c)) >> 9, 999 iter->pos.snapshot), 1000 POS(u.bi_inum, U64_MAX), 1001 0, NULL); 1002 bch_err_msg(c, ret, "in fsck truncating inode"); 1003 if (ret) 1004 return ret; 1005 1006 /* 1007 * We truncated without our normal sector accounting hook, just 1008 * make sure we recalculate it: 1009 */ 1010 u.bi_flags |= BCH_INODE_i_sectors_dirty; 1011 1012 u.bi_flags &= ~BCH_INODE_i_size_dirty; 1013 do_update = true; 1014 } 1015 1016 if (u.bi_flags & BCH_INODE_i_sectors_dirty && 1017 (!c->sb.clean || 1018 fsck_err(c, inode_i_sectors_dirty_but_clean, 1019 "filesystem marked clean, but inode %llu has i_sectors dirty", 1020 u.bi_inum))) { 1021 s64 sectors; 1022 1023 bch_verbose(c, "recounting sectors for inode %llu", 1024 u.bi_inum); 1025 1026 sectors = bch2_count_inode_sectors(trans, u.bi_inum, iter->pos.snapshot); 1027 if (sectors < 0) { 1028 bch_err_msg(c, sectors, "in fsck recounting inode sectors"); 1029 return sectors; 1030 } 1031 1032 u.bi_sectors = sectors; 1033 u.bi_flags &= ~BCH_INODE_i_sectors_dirty; 1034 do_update = true; 1035 } 1036 1037 if (u.bi_flags & BCH_INODE_backptr_untrusted) { 1038 u.bi_dir = 0; 1039 u.bi_dir_offset = 0; 1040 u.bi_flags &= ~BCH_INODE_backptr_untrusted; 1041 do_update = true; 1042 } 1043 1044 if (u.bi_dir || u.bi_dir_offset) { 1045 ret = check_inode_dirent_inode(trans, k, &u, k.k->p.snapshot, &do_update); 1046 if (ret) 1047 goto err; 1048 } 1049 1050 if (fsck_err_on(u.bi_parent_subvol && 1051 (u.bi_subvol == 0 || 1052 u.bi_subvol == BCACHEFS_ROOT_SUBVOL), 1053 c, inode_bi_parent_nonzero, 1054 "inode %llu:%u has subvol %u but nonzero parent subvol %u", 1055 u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) { 1056 u.bi_parent_subvol = 0; 1057 do_update = true; 1058 } 1059 1060 if (u.bi_subvol) { 1061 struct bch_subvolume s; 1062 1063 ret = bch2_subvolume_get(trans, u.bi_subvol, false, 0, &s); 1064 if (ret && !bch2_err_matches(ret, ENOENT)) 1065 goto err; 1066 1067 if (fsck_err_on(ret, 1068 c, inode_bi_subvol_missing, 1069 "inode %llu:%u bi_subvol points to missing subvolume %u", 1070 u.bi_inum, k.k->p.snapshot, u.bi_subvol) || 1071 fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum || 1072 !bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot), 1073 k.k->p.snapshot), 1074 c, inode_bi_subvol_wrong, 1075 "inode %llu:%u points to subvol %u, but subvol points to %llu:%u", 1076 u.bi_inum, k.k->p.snapshot, u.bi_subvol, 1077 le64_to_cpu(s.inode), 1078 le32_to_cpu(s.snapshot))) { 1079 u.bi_subvol = 0; 1080 u.bi_parent_subvol = 0; 1081 do_update = true; 1082 } 1083 } 1084 1085 if (do_update) { 1086 ret = __bch2_fsck_write_inode(trans, &u, iter->pos.snapshot); 1087 bch_err_msg(c, ret, "in fsck updating inode"); 1088 if (ret) 1089 return ret; 1090 } 1091 err: 1092 fsck_err: 1093 bch_err_fn(c, ret); 1094 return ret; 1095 } 1096 1097 int bch2_check_inodes(struct bch_fs *c) 1098 { 1099 bool full = c->opts.fsck; 1100 struct bch_inode_unpacked prev = { 0 }; 1101 struct snapshots_seen s; 1102 1103 snapshots_seen_init(&s); 1104 1105 int ret = bch2_trans_run(c, 1106 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 1107 POS_MIN, 1108 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, 1109 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1110 check_inode(trans, &iter, k, &prev, &s, full))); 1111 1112 snapshots_seen_exit(&s); 1113 bch_err_fn(c, ret); 1114 return ret; 1115 } 1116 1117 static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) 1118 { 1119 struct bch_fs *c = trans->c; 1120 int ret = 0; 1121 s64 count2; 1122 1123 darray_for_each(w->inodes, i) { 1124 if (i->inode.bi_sectors == i->count) 1125 continue; 1126 1127 count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); 1128 1129 if (w->recalculate_sums) 1130 i->count = count2; 1131 1132 if (i->count != count2) { 1133 bch_err(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", 1134 w->last_pos.inode, i->snapshot, i->count, count2); 1135 return -BCH_ERR_internal_fsck_err; 1136 } 1137 1138 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), 1139 c, inode_i_sectors_wrong, 1140 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", 1141 w->last_pos.inode, i->snapshot, 1142 i->inode.bi_sectors, i->count)) { 1143 i->inode.bi_sectors = i->count; 1144 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1145 if (ret) 1146 break; 1147 } 1148 } 1149 fsck_err: 1150 bch_err_fn(c, ret); 1151 return ret; 1152 } 1153 1154 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) 1155 { 1156 u32 restart_count = trans->restart_count; 1157 return check_i_sectors_notnested(trans, w) ?: 1158 trans_was_restarted(trans, restart_count); 1159 } 1160 1161 struct extent_end { 1162 u32 snapshot; 1163 u64 offset; 1164 struct snapshots_seen seen; 1165 }; 1166 1167 struct extent_ends { 1168 struct bpos last_pos; 1169 DARRAY(struct extent_end) e; 1170 }; 1171 1172 static void extent_ends_reset(struct extent_ends *extent_ends) 1173 { 1174 darray_for_each(extent_ends->e, i) 1175 snapshots_seen_exit(&i->seen); 1176 extent_ends->e.nr = 0; 1177 } 1178 1179 static void extent_ends_exit(struct extent_ends *extent_ends) 1180 { 1181 extent_ends_reset(extent_ends); 1182 darray_exit(&extent_ends->e); 1183 } 1184 1185 static void extent_ends_init(struct extent_ends *extent_ends) 1186 { 1187 memset(extent_ends, 0, sizeof(*extent_ends)); 1188 } 1189 1190 static int extent_ends_at(struct bch_fs *c, 1191 struct extent_ends *extent_ends, 1192 struct snapshots_seen *seen, 1193 struct bkey_s_c k) 1194 { 1195 struct extent_end *i, n = (struct extent_end) { 1196 .offset = k.k->p.offset, 1197 .snapshot = k.k->p.snapshot, 1198 .seen = *seen, 1199 }; 1200 1201 n.seen.ids.data = kmemdup(seen->ids.data, 1202 sizeof(seen->ids.data[0]) * seen->ids.size, 1203 GFP_KERNEL); 1204 if (!n.seen.ids.data) 1205 return -BCH_ERR_ENOMEM_fsck_extent_ends_at; 1206 1207 __darray_for_each(extent_ends->e, i) { 1208 if (i->snapshot == k.k->p.snapshot) { 1209 snapshots_seen_exit(&i->seen); 1210 *i = n; 1211 return 0; 1212 } 1213 1214 if (i->snapshot >= k.k->p.snapshot) 1215 break; 1216 } 1217 1218 return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); 1219 } 1220 1221 static int overlapping_extents_found(struct btree_trans *trans, 1222 enum btree_id btree, 1223 struct bpos pos1, struct snapshots_seen *pos1_seen, 1224 struct bkey pos2, 1225 bool *fixed, 1226 struct extent_end *extent_end) 1227 { 1228 struct bch_fs *c = trans->c; 1229 struct printbuf buf = PRINTBUF; 1230 struct btree_iter iter1, iter2 = { NULL }; 1231 struct bkey_s_c k1, k2; 1232 int ret; 1233 1234 BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); 1235 1236 bch2_trans_iter_init(trans, &iter1, btree, pos1, 1237 BTREE_ITER_ALL_SNAPSHOTS| 1238 BTREE_ITER_NOT_EXTENTS); 1239 k1 = bch2_btree_iter_peek_upto(&iter1, POS(pos1.inode, U64_MAX)); 1240 ret = bkey_err(k1); 1241 if (ret) 1242 goto err; 1243 1244 prt_str(&buf, "\n "); 1245 bch2_bkey_val_to_text(&buf, c, k1); 1246 1247 if (!bpos_eq(pos1, k1.k->p)) { 1248 prt_str(&buf, "\n wanted\n "); 1249 bch2_bpos_to_text(&buf, pos1); 1250 prt_str(&buf, "\n "); 1251 bch2_bkey_to_text(&buf, &pos2); 1252 1253 bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", 1254 __func__, buf.buf); 1255 ret = -BCH_ERR_internal_fsck_err; 1256 goto err; 1257 } 1258 1259 bch2_trans_copy_iter(&iter2, &iter1); 1260 1261 while (1) { 1262 bch2_btree_iter_advance(&iter2); 1263 1264 k2 = bch2_btree_iter_peek_upto(&iter2, POS(pos1.inode, U64_MAX)); 1265 ret = bkey_err(k2); 1266 if (ret) 1267 goto err; 1268 1269 if (bpos_ge(k2.k->p, pos2.p)) 1270 break; 1271 } 1272 1273 prt_str(&buf, "\n "); 1274 bch2_bkey_val_to_text(&buf, c, k2); 1275 1276 if (bpos_gt(k2.k->p, pos2.p) || 1277 pos2.size != k2.k->size) { 1278 bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", 1279 __func__, buf.buf); 1280 ret = -BCH_ERR_internal_fsck_err; 1281 goto err; 1282 } 1283 1284 prt_printf(&buf, "\n overwriting %s extent", 1285 pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); 1286 1287 if (fsck_err(c, extent_overlapping, 1288 "overlapping extents%s", buf.buf)) { 1289 struct btree_iter *old_iter = &iter1; 1290 struct disk_reservation res = { 0 }; 1291 1292 if (pos1.snapshot < pos2.p.snapshot) { 1293 old_iter = &iter2; 1294 swap(k1, k2); 1295 } 1296 1297 trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); 1298 1299 ret = bch2_trans_update_extent_overwrite(trans, old_iter, 1300 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE, 1301 k1, k2) ?: 1302 bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); 1303 bch2_disk_reservation_put(c, &res); 1304 1305 if (ret) 1306 goto err; 1307 1308 *fixed = true; 1309 1310 if (pos1.snapshot == pos2.p.snapshot) { 1311 /* 1312 * We overwrote the first extent, and did the overwrite 1313 * in the same snapshot: 1314 */ 1315 extent_end->offset = bkey_start_offset(&pos2); 1316 } else if (pos1.snapshot > pos2.p.snapshot) { 1317 /* 1318 * We overwrote the first extent in pos2's snapshot: 1319 */ 1320 ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); 1321 } else { 1322 /* 1323 * We overwrote the second extent - restart 1324 * check_extent() from the top: 1325 */ 1326 ret = -BCH_ERR_transaction_restart_nested; 1327 } 1328 } 1329 fsck_err: 1330 err: 1331 bch2_trans_iter_exit(trans, &iter2); 1332 bch2_trans_iter_exit(trans, &iter1); 1333 printbuf_exit(&buf); 1334 return ret; 1335 } 1336 1337 static int check_overlapping_extents(struct btree_trans *trans, 1338 struct snapshots_seen *seen, 1339 struct extent_ends *extent_ends, 1340 struct bkey_s_c k, 1341 u32 equiv, 1342 struct btree_iter *iter, 1343 bool *fixed) 1344 { 1345 struct bch_fs *c = trans->c; 1346 int ret = 0; 1347 1348 /* transaction restart, running again */ 1349 if (bpos_eq(extent_ends->last_pos, k.k->p)) 1350 return 0; 1351 1352 if (extent_ends->last_pos.inode != k.k->p.inode) 1353 extent_ends_reset(extent_ends); 1354 1355 darray_for_each(extent_ends->e, i) { 1356 if (i->offset <= bkey_start_offset(k.k)) 1357 continue; 1358 1359 if (!ref_visible2(c, 1360 k.k->p.snapshot, seen, 1361 i->snapshot, &i->seen)) 1362 continue; 1363 1364 ret = overlapping_extents_found(trans, iter->btree_id, 1365 SPOS(iter->pos.inode, 1366 i->offset, 1367 i->snapshot), 1368 &i->seen, 1369 *k.k, fixed, i); 1370 if (ret) 1371 goto err; 1372 } 1373 1374 ret = extent_ends_at(c, extent_ends, seen, k); 1375 if (ret) 1376 goto err; 1377 1378 extent_ends->last_pos = k.k->p; 1379 err: 1380 return ret; 1381 } 1382 1383 static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, 1384 struct bkey_s_c k) 1385 { 1386 struct bch_fs *c = trans->c; 1387 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 1388 struct bch_extent_crc_unpacked crc; 1389 const union bch_extent_entry *i; 1390 unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; 1391 1392 bkey_for_each_crc(k.k, ptrs, crc, i) 1393 if (crc_is_encoded(crc) && 1394 crc.uncompressed_size > encoded_extent_max_sectors) { 1395 struct printbuf buf = PRINTBUF; 1396 1397 bch2_bkey_val_to_text(&buf, c, k); 1398 bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); 1399 printbuf_exit(&buf); 1400 } 1401 1402 return 0; 1403 } 1404 1405 static int check_extent(struct btree_trans *trans, struct btree_iter *iter, 1406 struct bkey_s_c k, 1407 struct inode_walker *inode, 1408 struct snapshots_seen *s, 1409 struct extent_ends *extent_ends) 1410 { 1411 struct bch_fs *c = trans->c; 1412 struct inode_walker_entry *i; 1413 struct printbuf buf = PRINTBUF; 1414 struct bpos equiv = k.k->p; 1415 int ret = 0; 1416 1417 equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); 1418 1419 ret = check_key_has_snapshot(trans, iter, k); 1420 if (ret) { 1421 ret = ret < 0 ? ret : 0; 1422 goto out; 1423 } 1424 1425 if (inode->last_pos.inode != k.k->p.inode) { 1426 ret = check_i_sectors(trans, inode); 1427 if (ret) 1428 goto err; 1429 } 1430 1431 i = walk_inode(trans, inode, k); 1432 ret = PTR_ERR_OR_ZERO(i); 1433 if (ret) 1434 goto err; 1435 1436 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1437 if (ret) 1438 goto err; 1439 1440 if (k.k->type != KEY_TYPE_whiteout) { 1441 if (fsck_err_on(!i, c, extent_in_missing_inode, 1442 "extent in missing inode:\n %s", 1443 (printbuf_reset(&buf), 1444 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1445 goto delete; 1446 1447 if (fsck_err_on(i && 1448 !S_ISREG(i->inode.bi_mode) && 1449 !S_ISLNK(i->inode.bi_mode), 1450 c, extent_in_non_reg_inode, 1451 "extent in non regular inode mode %o:\n %s", 1452 i->inode.bi_mode, 1453 (printbuf_reset(&buf), 1454 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1455 goto delete; 1456 1457 ret = check_overlapping_extents(trans, s, extent_ends, k, 1458 equiv.snapshot, iter, 1459 &inode->recalculate_sums); 1460 if (ret) 1461 goto err; 1462 } 1463 1464 /* 1465 * Check inodes in reverse order, from oldest snapshots to newest, 1466 * starting from the inode that matches this extent's snapshot. If we 1467 * didn't have one, iterate over all inodes: 1468 */ 1469 if (!i) 1470 i = inode->inodes.data + inode->inodes.nr - 1; 1471 1472 for (; 1473 inode->inodes.data && i >= inode->inodes.data; 1474 --i) { 1475 if (i->snapshot > equiv.snapshot || 1476 !key_visible_in_snapshot(c, s, i->snapshot, equiv.snapshot)) 1477 continue; 1478 1479 if (k.k->type != KEY_TYPE_whiteout) { 1480 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_size_dirty) && 1481 k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && 1482 !bkey_extent_is_reservation(k), 1483 c, extent_past_end_of_inode, 1484 "extent type past end of inode %llu:%u, i_size %llu\n %s", 1485 i->inode.bi_inum, i->snapshot, i->inode.bi_size, 1486 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1487 struct btree_iter iter2; 1488 1489 bch2_trans_copy_iter(&iter2, iter); 1490 bch2_btree_iter_set_snapshot(&iter2, i->snapshot); 1491 ret = bch2_btree_iter_traverse(&iter2) ?: 1492 bch2_btree_delete_at(trans, &iter2, 1493 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 1494 bch2_trans_iter_exit(trans, &iter2); 1495 if (ret) 1496 goto err; 1497 1498 iter->k.type = KEY_TYPE_whiteout; 1499 } 1500 1501 if (bkey_extent_is_allocation(k.k)) 1502 i->count += k.k->size; 1503 } 1504 1505 i->seen_this_pos = true; 1506 } 1507 out: 1508 err: 1509 fsck_err: 1510 printbuf_exit(&buf); 1511 bch_err_fn(c, ret); 1512 return ret; 1513 delete: 1514 ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 1515 goto out; 1516 } 1517 1518 /* 1519 * Walk extents: verify that extents have a corresponding S_ISREG inode, and 1520 * that i_size an i_sectors are consistent 1521 */ 1522 int bch2_check_extents(struct bch_fs *c) 1523 { 1524 struct inode_walker w = inode_walker_init(); 1525 struct snapshots_seen s; 1526 struct extent_ends extent_ends; 1527 struct disk_reservation res = { 0 }; 1528 1529 snapshots_seen_init(&s); 1530 extent_ends_init(&extent_ends); 1531 1532 int ret = bch2_trans_run(c, 1533 for_each_btree_key_commit(trans, iter, BTREE_ID_extents, 1534 POS(BCACHEFS_ROOT_INO, 0), 1535 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, 1536 &res, NULL, 1537 BCH_TRANS_COMMIT_no_enospc, ({ 1538 bch2_disk_reservation_put(c, &res); 1539 check_extent(trans, &iter, k, &w, &s, &extent_ends) ?: 1540 check_extent_overbig(trans, &iter, k); 1541 })) ?: 1542 check_i_sectors_notnested(trans, &w)); 1543 1544 bch2_disk_reservation_put(c, &res); 1545 extent_ends_exit(&extent_ends); 1546 inode_walker_exit(&w); 1547 snapshots_seen_exit(&s); 1548 1549 bch_err_fn(c, ret); 1550 return ret; 1551 } 1552 1553 int bch2_check_indirect_extents(struct bch_fs *c) 1554 { 1555 struct disk_reservation res = { 0 }; 1556 1557 int ret = bch2_trans_run(c, 1558 for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, 1559 POS_MIN, 1560 BTREE_ITER_PREFETCH, k, 1561 &res, NULL, 1562 BCH_TRANS_COMMIT_no_enospc, ({ 1563 bch2_disk_reservation_put(c, &res); 1564 check_extent_overbig(trans, &iter, k); 1565 }))); 1566 1567 bch2_disk_reservation_put(c, &res); 1568 bch_err_fn(c, ret); 1569 return ret; 1570 } 1571 1572 static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w) 1573 { 1574 struct bch_fs *c = trans->c; 1575 int ret = 0; 1576 s64 count2; 1577 1578 darray_for_each(w->inodes, i) { 1579 if (i->inode.bi_nlink == i->count) 1580 continue; 1581 1582 count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); 1583 if (count2 < 0) 1584 return count2; 1585 1586 if (i->count != count2) { 1587 bch_err(c, "fsck counted subdirectories wrong: got %llu should be %llu", 1588 i->count, count2); 1589 i->count = count2; 1590 if (i->inode.bi_nlink == i->count) 1591 continue; 1592 } 1593 1594 if (fsck_err_on(i->inode.bi_nlink != i->count, 1595 c, inode_dir_wrong_nlink, 1596 "directory %llu:%u with wrong i_nlink: got %u, should be %llu", 1597 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { 1598 i->inode.bi_nlink = i->count; 1599 ret = bch2_fsck_write_inode(trans, &i->inode, i->snapshot); 1600 if (ret) 1601 break; 1602 } 1603 } 1604 fsck_err: 1605 bch_err_fn(c, ret); 1606 return ret; 1607 } 1608 1609 static int check_subdir_count(struct btree_trans *trans, struct inode_walker *w) 1610 { 1611 u32 restart_count = trans->restart_count; 1612 return check_subdir_count_notnested(trans, w) ?: 1613 trans_was_restarted(trans, restart_count); 1614 } 1615 1616 static int check_dirent_inode_dirent(struct btree_trans *trans, 1617 struct btree_iter *iter, 1618 struct bkey_s_c_dirent d, 1619 struct bch_inode_unpacked *target, 1620 u32 target_snapshot) 1621 { 1622 struct bch_fs *c = trans->c; 1623 struct printbuf buf = PRINTBUF; 1624 int ret = 0; 1625 1626 if (inode_points_to_dirent(target, d)) 1627 return 0; 1628 1629 if (!target->bi_dir && 1630 !target->bi_dir_offset) { 1631 target->bi_dir = d.k->p.inode; 1632 target->bi_dir_offset = d.k->p.offset; 1633 return __bch2_fsck_write_inode(trans, target, target_snapshot); 1634 } 1635 1636 struct btree_iter bp_iter = { NULL }; 1637 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, 1638 SPOS(target->bi_dir, target->bi_dir_offset, target_snapshot)); 1639 ret = bkey_err(bp_dirent); 1640 if (ret && !bch2_err_matches(ret, ENOENT)) 1641 goto err; 1642 1643 bool backpointer_exists = !ret; 1644 ret = 0; 1645 1646 if (fsck_err_on(!backpointer_exists, 1647 c, inode_wrong_backpointer, 1648 "inode %llu:%u has wrong backpointer:\n" 1649 "got %llu:%llu\n" 1650 "should be %llu:%llu", 1651 target->bi_inum, target_snapshot, 1652 target->bi_dir, 1653 target->bi_dir_offset, 1654 d.k->p.inode, 1655 d.k->p.offset)) { 1656 target->bi_dir = d.k->p.inode; 1657 target->bi_dir_offset = d.k->p.offset; 1658 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1659 goto out; 1660 } 1661 1662 bch2_bkey_val_to_text(&buf, c, d.s_c); 1663 prt_newline(&buf); 1664 if (backpointer_exists) 1665 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c); 1666 1667 if (fsck_err_on(backpointer_exists && 1668 (S_ISDIR(target->bi_mode) || 1669 target->bi_subvol), 1670 c, inode_dir_multiple_links, 1671 "%s %llu:%u with multiple links\n%s", 1672 S_ISDIR(target->bi_mode) ? "directory" : "subvolume", 1673 target->bi_inum, target_snapshot, buf.buf)) { 1674 ret = __remove_dirent(trans, d.k->p); 1675 goto out; 1676 } 1677 1678 /* 1679 * hardlinked file with nlink 0: 1680 * We're just adjusting nlink here so check_nlinks() will pick 1681 * it up, it ignores inodes with nlink 0 1682 */ 1683 if (fsck_err_on(backpointer_exists && !target->bi_nlink, 1684 c, inode_multiple_links_but_nlink_0, 1685 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", 1686 target->bi_inum, target_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { 1687 target->bi_nlink++; 1688 target->bi_flags &= ~BCH_INODE_unlinked; 1689 ret = __bch2_fsck_write_inode(trans, target, target_snapshot); 1690 if (ret) 1691 goto err; 1692 } 1693 out: 1694 err: 1695 fsck_err: 1696 bch2_trans_iter_exit(trans, &bp_iter); 1697 printbuf_exit(&buf); 1698 bch_err_fn(c, ret); 1699 return ret; 1700 } 1701 1702 static int check_dirent_target(struct btree_trans *trans, 1703 struct btree_iter *iter, 1704 struct bkey_s_c_dirent d, 1705 struct bch_inode_unpacked *target, 1706 u32 target_snapshot) 1707 { 1708 struct bch_fs *c = trans->c; 1709 struct bkey_i_dirent *n; 1710 struct printbuf buf = PRINTBUF; 1711 int ret = 0; 1712 1713 ret = check_dirent_inode_dirent(trans, iter, d, target, target_snapshot); 1714 if (ret) 1715 goto err; 1716 1717 if (fsck_err_on(d.v->d_type != inode_d_type(target), 1718 c, dirent_d_type_wrong, 1719 "incorrect d_type: got %s, should be %s:\n%s", 1720 bch2_d_type_str(d.v->d_type), 1721 bch2_d_type_str(inode_d_type(target)), 1722 (printbuf_reset(&buf), 1723 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1724 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); 1725 ret = PTR_ERR_OR_ZERO(n); 1726 if (ret) 1727 goto err; 1728 1729 bkey_reassemble(&n->k_i, d.s_c); 1730 n->v.d_type = inode_d_type(target); 1731 if (n->v.d_type == DT_SUBVOL) { 1732 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); 1733 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol); 1734 } else { 1735 n->v.d_inum = cpu_to_le64(target->bi_inum); 1736 } 1737 1738 ret = bch2_trans_update(trans, iter, &n->k_i, 0); 1739 if (ret) 1740 goto err; 1741 1742 d = dirent_i_to_s_c(n); 1743 } 1744 err: 1745 fsck_err: 1746 printbuf_exit(&buf); 1747 bch_err_fn(c, ret); 1748 return ret; 1749 } 1750 1751 /* find a subvolume that's a descendent of @snapshot: */ 1752 static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid) 1753 { 1754 struct btree_iter iter; 1755 struct bkey_s_c k; 1756 int ret; 1757 1758 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { 1759 if (k.k->type != KEY_TYPE_subvolume) 1760 continue; 1761 1762 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 1763 if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) { 1764 bch2_trans_iter_exit(trans, &iter); 1765 *subvolid = k.k->p.offset; 1766 goto found; 1767 } 1768 } 1769 if (!ret) 1770 ret = -ENOENT; 1771 found: 1772 bch2_trans_iter_exit(trans, &iter); 1773 return ret; 1774 } 1775 1776 static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter, 1777 struct bkey_s_c_dirent d) 1778 { 1779 struct bch_fs *c = trans->c; 1780 struct btree_iter subvol_iter = {}; 1781 struct bch_inode_unpacked subvol_root; 1782 u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); 1783 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); 1784 u32 parent_snapshot; 1785 u64 parent_inum; 1786 struct printbuf buf = PRINTBUF; 1787 int ret = 0; 1788 1789 ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); 1790 if (ret && !bch2_err_matches(ret, ENOENT)) 1791 return ret; 1792 1793 if (fsck_err_on(ret, c, dirent_to_missing_parent_subvol, 1794 "dirent parent_subvol points to missing subvolume\n%s", 1795 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || 1796 fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot), 1797 c, dirent_not_visible_in_parent_subvol, 1798 "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", 1799 parent_snapshot, 1800 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 1801 u32 new_parent_subvol; 1802 ret = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); 1803 if (ret) 1804 goto err; 1805 1806 struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); 1807 ret = PTR_ERR_OR_ZERO(new_dirent); 1808 if (ret) 1809 goto err; 1810 1811 new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol); 1812 } 1813 1814 struct bkey_s_c_subvolume s = 1815 bch2_bkey_get_iter_typed(trans, &subvol_iter, 1816 BTREE_ID_subvolumes, POS(0, target_subvol), 1817 0, subvolume); 1818 ret = bkey_err(s.s_c); 1819 if (ret && !bch2_err_matches(ret, ENOENT)) 1820 return ret; 1821 1822 if (ret) { 1823 if (fsck_err(c, dirent_to_missing_subvol, 1824 "dirent points to missing subvolume\n%s", 1825 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) 1826 return __remove_dirent(trans, d.k->p); 1827 ret = 0; 1828 goto out; 1829 } 1830 1831 if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol, 1832 c, subvol_fs_path_parent_wrong, 1833 "subvol with wrong fs_path_parent, should be be %u\n%s", 1834 parent_subvol, 1835 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { 1836 struct bkey_i_subvolume *n = 1837 bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume); 1838 ret = PTR_ERR_OR_ZERO(n); 1839 if (ret) 1840 goto err; 1841 1842 n->v.fs_path_parent = cpu_to_le32(parent_subvol); 1843 } 1844 1845 u64 target_inum = le64_to_cpu(s.v->inode); 1846 u32 target_snapshot = le32_to_cpu(s.v->snapshot); 1847 1848 ret = lookup_inode(trans, target_inum, &subvol_root, &target_snapshot); 1849 if (ret && !bch2_err_matches(ret, ENOENT)) 1850 return ret; 1851 1852 if (fsck_err_on(parent_subvol != subvol_root.bi_parent_subvol, 1853 c, inode_bi_parent_wrong, 1854 "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", 1855 target_inum, 1856 subvol_root.bi_parent_subvol, parent_subvol)) { 1857 subvol_root.bi_parent_subvol = parent_subvol; 1858 ret = __bch2_fsck_write_inode(trans, &subvol_root, target_snapshot); 1859 if (ret) 1860 return ret; 1861 } 1862 1863 ret = check_dirent_target(trans, iter, d, &subvol_root, 1864 target_snapshot); 1865 if (ret) 1866 return ret; 1867 out: 1868 err: 1869 fsck_err: 1870 bch2_trans_iter_exit(trans, &subvol_iter); 1871 printbuf_exit(&buf); 1872 return ret; 1873 } 1874 1875 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, 1876 struct bkey_s_c k, 1877 struct bch_hash_info *hash_info, 1878 struct inode_walker *dir, 1879 struct inode_walker *target, 1880 struct snapshots_seen *s) 1881 { 1882 struct bch_fs *c = trans->c; 1883 struct bkey_s_c_dirent d; 1884 struct inode_walker_entry *i; 1885 struct printbuf buf = PRINTBUF; 1886 struct bpos equiv; 1887 int ret = 0; 1888 1889 ret = check_key_has_snapshot(trans, iter, k); 1890 if (ret) { 1891 ret = ret < 0 ? ret : 0; 1892 goto out; 1893 } 1894 1895 equiv = k.k->p; 1896 equiv.snapshot = bch2_snapshot_equiv(c, k.k->p.snapshot); 1897 1898 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1899 if (ret) 1900 goto err; 1901 1902 if (k.k->type == KEY_TYPE_whiteout) 1903 goto out; 1904 1905 if (dir->last_pos.inode != k.k->p.inode) { 1906 ret = check_subdir_count(trans, dir); 1907 if (ret) 1908 goto err; 1909 } 1910 1911 BUG_ON(!btree_iter_path(trans, iter)->should_be_locked); 1912 1913 i = walk_inode(trans, dir, k); 1914 ret = PTR_ERR_OR_ZERO(i); 1915 if (ret < 0) 1916 goto err; 1917 1918 if (dir->first_this_inode && dir->inodes.nr) 1919 *hash_info = bch2_hash_info_init(c, &dir->inodes.data[0].inode); 1920 dir->first_this_inode = false; 1921 1922 if (fsck_err_on(!i, c, dirent_in_missing_dir_inode, 1923 "dirent in nonexisting directory:\n%s", 1924 (printbuf_reset(&buf), 1925 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1926 ret = bch2_btree_delete_at(trans, iter, 1927 BTREE_UPDATE_INTERNAL_SNAPSHOT_NODE); 1928 goto out; 1929 } 1930 1931 if (!i) 1932 goto out; 1933 1934 if (fsck_err_on(!S_ISDIR(i->inode.bi_mode), 1935 c, dirent_in_non_dir_inode, 1936 "dirent in non directory inode type %s:\n%s", 1937 bch2_d_type_str(inode_d_type(&i->inode)), 1938 (printbuf_reset(&buf), 1939 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1940 ret = bch2_btree_delete_at(trans, iter, 0); 1941 goto out; 1942 } 1943 1944 ret = hash_check_key(trans, bch2_dirent_hash_desc, hash_info, iter, k); 1945 if (ret < 0) 1946 goto err; 1947 if (ret) { 1948 /* dirent has been deleted */ 1949 ret = 0; 1950 goto out; 1951 } 1952 1953 if (k.k->type != KEY_TYPE_dirent) 1954 goto out; 1955 1956 d = bkey_s_c_to_dirent(k); 1957 1958 if (d.v->d_type == DT_SUBVOL) { 1959 ret = check_dirent_to_subvol(trans, iter, d); 1960 if (ret) 1961 goto err; 1962 } else { 1963 ret = __get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); 1964 if (ret) 1965 goto err; 1966 1967 if (fsck_err_on(!target->inodes.nr, 1968 c, dirent_to_missing_inode, 1969 "dirent points to missing inode: (equiv %u)\n%s", 1970 equiv.snapshot, 1971 (printbuf_reset(&buf), 1972 bch2_bkey_val_to_text(&buf, c, k), 1973 buf.buf))) { 1974 ret = __remove_dirent(trans, d.k->p); 1975 if (ret) 1976 goto err; 1977 } 1978 1979 darray_for_each(target->inodes, i) { 1980 ret = check_dirent_target(trans, iter, d, 1981 &i->inode, i->snapshot); 1982 if (ret) 1983 goto err; 1984 } 1985 1986 if (d.v->d_type == DT_DIR) 1987 for_each_visible_inode(c, s, dir, equiv.snapshot, i) 1988 i->count++; 1989 } 1990 out: 1991 err: 1992 fsck_err: 1993 printbuf_exit(&buf); 1994 bch_err_fn(c, ret); 1995 return ret; 1996 } 1997 1998 /* 1999 * Walk dirents: verify that they all have a corresponding S_ISDIR inode, 2000 * validate d_type 2001 */ 2002 int bch2_check_dirents(struct bch_fs *c) 2003 { 2004 struct inode_walker dir = inode_walker_init(); 2005 struct inode_walker target = inode_walker_init(); 2006 struct snapshots_seen s; 2007 struct bch_hash_info hash_info; 2008 2009 snapshots_seen_init(&s); 2010 2011 int ret = bch2_trans_run(c, 2012 for_each_btree_key_commit(trans, iter, BTREE_ID_dirents, 2013 POS(BCACHEFS_ROOT_INO, 0), 2014 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, 2015 k, 2016 NULL, NULL, 2017 BCH_TRANS_COMMIT_no_enospc, 2018 check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: 2019 check_subdir_count_notnested(trans, &dir)); 2020 2021 snapshots_seen_exit(&s); 2022 inode_walker_exit(&dir); 2023 inode_walker_exit(&target); 2024 bch_err_fn(c, ret); 2025 return ret; 2026 } 2027 2028 static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, 2029 struct bkey_s_c k, 2030 struct bch_hash_info *hash_info, 2031 struct inode_walker *inode) 2032 { 2033 struct bch_fs *c = trans->c; 2034 struct inode_walker_entry *i; 2035 int ret; 2036 2037 ret = check_key_has_snapshot(trans, iter, k); 2038 if (ret < 0) 2039 return ret; 2040 if (ret) 2041 return 0; 2042 2043 i = walk_inode(trans, inode, k); 2044 ret = PTR_ERR_OR_ZERO(i); 2045 if (ret) 2046 return ret; 2047 2048 if (inode->first_this_inode && inode->inodes.nr) 2049 *hash_info = bch2_hash_info_init(c, &inode->inodes.data[0].inode); 2050 inode->first_this_inode = false; 2051 2052 if (fsck_err_on(!i, c, xattr_in_missing_inode, 2053 "xattr for missing inode %llu", 2054 k.k->p.inode)) 2055 return bch2_btree_delete_at(trans, iter, 0); 2056 2057 if (!i) 2058 return 0; 2059 2060 ret = hash_check_key(trans, bch2_xattr_hash_desc, hash_info, iter, k); 2061 fsck_err: 2062 bch_err_fn(c, ret); 2063 return ret; 2064 } 2065 2066 /* 2067 * Walk xattrs: verify that they all have a corresponding inode 2068 */ 2069 int bch2_check_xattrs(struct bch_fs *c) 2070 { 2071 struct inode_walker inode = inode_walker_init(); 2072 struct bch_hash_info hash_info; 2073 int ret = 0; 2074 2075 ret = bch2_trans_run(c, 2076 for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, 2077 POS(BCACHEFS_ROOT_INO, 0), 2078 BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, 2079 k, 2080 NULL, NULL, 2081 BCH_TRANS_COMMIT_no_enospc, 2082 check_xattr(trans, &iter, k, &hash_info, &inode))); 2083 bch_err_fn(c, ret); 2084 return ret; 2085 } 2086 2087 static int check_root_trans(struct btree_trans *trans) 2088 { 2089 struct bch_fs *c = trans->c; 2090 struct bch_inode_unpacked root_inode; 2091 u32 snapshot; 2092 u64 inum; 2093 int ret; 2094 2095 ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); 2096 if (ret && !bch2_err_matches(ret, ENOENT)) 2097 return ret; 2098 2099 if (mustfix_fsck_err_on(ret, c, root_subvol_missing, 2100 "root subvol missing")) { 2101 struct bkey_i_subvolume root_subvol; 2102 2103 snapshot = U32_MAX; 2104 inum = BCACHEFS_ROOT_INO; 2105 2106 bkey_subvolume_init(&root_subvol.k_i); 2107 root_subvol.k.p.offset = BCACHEFS_ROOT_SUBVOL; 2108 root_subvol.v.flags = 0; 2109 root_subvol.v.snapshot = cpu_to_le32(snapshot); 2110 root_subvol.v.inode = cpu_to_le64(inum); 2111 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol.k_i, 0); 2112 bch_err_msg(c, ret, "writing root subvol"); 2113 if (ret) 2114 goto err; 2115 } 2116 2117 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, &root_inode, &snapshot); 2118 if (ret && !bch2_err_matches(ret, ENOENT)) 2119 return ret; 2120 2121 if (mustfix_fsck_err_on(ret, c, root_dir_missing, 2122 "root directory missing") || 2123 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), 2124 c, root_inode_not_dir, 2125 "root inode not a directory")) { 2126 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 2127 0, NULL); 2128 root_inode.bi_inum = inum; 2129 2130 ret = __bch2_fsck_write_inode(trans, &root_inode, snapshot); 2131 bch_err_msg(c, ret, "writing root inode"); 2132 } 2133 err: 2134 fsck_err: 2135 return ret; 2136 } 2137 2138 /* Get root directory, create if it doesn't exist: */ 2139 int bch2_check_root(struct bch_fs *c) 2140 { 2141 int ret = bch2_trans_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2142 check_root_trans(trans)); 2143 bch_err_fn(c, ret); 2144 return ret; 2145 } 2146 2147 typedef DARRAY(u32) darray_u32; 2148 2149 static bool darray_u32_has(darray_u32 *d, u32 v) 2150 { 2151 darray_for_each(*d, i) 2152 if (*i == v) 2153 return true; 2154 return false; 2155 } 2156 2157 /* 2158 * We've checked that inode backpointers point to valid dirents; here, it's 2159 * sufficient to check that the subvolume root has a dirent: 2160 */ 2161 static int subvol_has_dirent(struct btree_trans *trans, struct bkey_s_c_subvolume s) 2162 { 2163 struct bch_inode_unpacked inode; 2164 int ret = bch2_inode_find_by_inum_trans(trans, 2165 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 2166 &inode); 2167 if (ret) 2168 return ret; 2169 2170 return inode.bi_dir != 0; 2171 } 2172 2173 static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) 2174 { 2175 struct bch_fs *c = trans->c; 2176 struct btree_iter parent_iter = {}; 2177 darray_u32 subvol_path = {}; 2178 struct printbuf buf = PRINTBUF; 2179 int ret = 0; 2180 2181 if (k.k->type != KEY_TYPE_subvolume) 2182 return 0; 2183 2184 while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { 2185 ret = darray_push(&subvol_path, k.k->p.offset); 2186 if (ret) 2187 goto err; 2188 2189 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 2190 2191 ret = subvol_has_dirent(trans, s); 2192 if (ret < 0) 2193 break; 2194 2195 if (fsck_err_on(!ret, 2196 c, subvol_unreachable, 2197 "unreachable subvolume %s", 2198 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2199 buf.buf))) { 2200 ret = reattach_subvol(trans, s); 2201 break; 2202 } 2203 2204 u32 parent = le32_to_cpu(s.v->fs_path_parent); 2205 2206 if (darray_u32_has(&subvol_path, parent)) { 2207 if (fsck_err(c, subvol_loop, "subvolume loop")) 2208 ret = reattach_subvol(trans, s); 2209 break; 2210 } 2211 2212 bch2_trans_iter_exit(trans, &parent_iter); 2213 bch2_trans_iter_init(trans, &parent_iter, 2214 BTREE_ID_subvolumes, POS(0, parent), 0); 2215 k = bch2_btree_iter_peek_slot(&parent_iter); 2216 ret = bkey_err(k); 2217 if (ret) 2218 goto err; 2219 2220 if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, 2221 c, subvol_unreachable, 2222 "unreachable subvolume %s", 2223 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2224 buf.buf))) { 2225 ret = reattach_subvol(trans, s); 2226 break; 2227 } 2228 } 2229 fsck_err: 2230 err: 2231 printbuf_exit(&buf); 2232 darray_exit(&subvol_path); 2233 bch2_trans_iter_exit(trans, &parent_iter); 2234 return ret; 2235 } 2236 2237 int bch2_check_subvolume_structure(struct bch_fs *c) 2238 { 2239 int ret = bch2_trans_run(c, 2240 for_each_btree_key_commit(trans, iter, 2241 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_PREFETCH, k, 2242 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2243 check_subvol_path(trans, &iter, k))); 2244 bch_err_fn(c, ret); 2245 return ret; 2246 } 2247 2248 struct pathbuf_entry { 2249 u64 inum; 2250 u32 snapshot; 2251 }; 2252 2253 typedef DARRAY(struct pathbuf_entry) pathbuf; 2254 2255 static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) 2256 { 2257 darray_for_each(*p, i) 2258 if (i->inum == inum && 2259 i->snapshot == snapshot) 2260 return true; 2261 return false; 2262 } 2263 2264 /* 2265 * Check that a given inode is reachable from its subvolume root - we already 2266 * verified subvolume connectivity: 2267 * 2268 * XXX: we should also be verifying that inodes are in the right subvolumes 2269 */ 2270 static int check_path(struct btree_trans *trans, pathbuf *p, struct bkey_s_c inode_k) 2271 { 2272 struct bch_fs *c = trans->c; 2273 struct btree_iter inode_iter = {}; 2274 struct bch_inode_unpacked inode; 2275 struct printbuf buf = PRINTBUF; 2276 u32 snapshot = bch2_snapshot_equiv(c, inode_k.k->p.snapshot); 2277 int ret = 0; 2278 2279 p->nr = 0; 2280 2281 BUG_ON(bch2_inode_unpack(inode_k, &inode)); 2282 2283 while (!inode.bi_subvol) { 2284 struct btree_iter dirent_iter; 2285 struct bkey_s_c_dirent d; 2286 u32 parent_snapshot = snapshot; 2287 2288 d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); 2289 ret = bkey_err(d.s_c); 2290 if (ret && !bch2_err_matches(ret, ENOENT)) 2291 break; 2292 2293 if (!ret && !dirent_points_to_inode(d, &inode)) { 2294 bch2_trans_iter_exit(trans, &dirent_iter); 2295 ret = -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 2296 } 2297 2298 if (bch2_err_matches(ret, ENOENT)) { 2299 ret = 0; 2300 if (fsck_err(c, inode_unreachable, 2301 "unreachable inode\n%s", 2302 (printbuf_reset(&buf), 2303 bch2_bkey_val_to_text(&buf, c, inode_k), 2304 buf.buf))) 2305 ret = reattach_inode(trans, &inode, snapshot); 2306 goto out; 2307 } 2308 2309 bch2_trans_iter_exit(trans, &dirent_iter); 2310 2311 if (!S_ISDIR(inode.bi_mode)) 2312 break; 2313 2314 ret = darray_push(p, ((struct pathbuf_entry) { 2315 .inum = inode.bi_inum, 2316 .snapshot = snapshot, 2317 })); 2318 if (ret) 2319 return ret; 2320 2321 snapshot = parent_snapshot; 2322 2323 bch2_trans_iter_exit(trans, &inode_iter); 2324 inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, 2325 SPOS(0, inode.bi_dir, snapshot), 0); 2326 ret = bkey_err(inode_k) ?: 2327 !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode 2328 : bch2_inode_unpack(inode_k, &inode); 2329 if (ret) { 2330 /* Should have been caught in dirents pass */ 2331 if (!bch2_err_matches(ret, BCH_ERR_transaction_restart)) 2332 bch_err(c, "error looking up parent directory: %i", ret); 2333 break; 2334 } 2335 2336 snapshot = inode_k.k->p.snapshot; 2337 2338 if (path_is_dup(p, inode.bi_inum, snapshot)) { 2339 /* XXX print path */ 2340 bch_err(c, "directory structure loop"); 2341 2342 darray_for_each(*p, i) 2343 pr_err("%llu:%u", i->inum, i->snapshot); 2344 pr_err("%llu:%u", inode.bi_inum, snapshot); 2345 2346 if (fsck_err(c, dir_loop, "directory structure loop")) { 2347 ret = remove_backpointer(trans, &inode); 2348 bch_err_msg(c, ret, "removing dirent"); 2349 if (ret) 2350 break; 2351 2352 ret = reattach_inode(trans, &inode, snapshot); 2353 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 2354 } 2355 break; 2356 } 2357 } 2358 out: 2359 fsck_err: 2360 bch2_trans_iter_exit(trans, &inode_iter); 2361 printbuf_exit(&buf); 2362 bch_err_fn(c, ret); 2363 return ret; 2364 } 2365 2366 /* 2367 * Check for unreachable inodes, as well as loops in the directory structure: 2368 * After bch2_check_dirents(), if an inode backpointer doesn't exist that means it's 2369 * unreachable: 2370 */ 2371 int bch2_check_directory_structure(struct bch_fs *c) 2372 { 2373 pathbuf path = { 0, }; 2374 int ret; 2375 2376 ret = bch2_trans_run(c, 2377 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, 2378 BTREE_ITER_INTENT| 2379 BTREE_ITER_PREFETCH| 2380 BTREE_ITER_ALL_SNAPSHOTS, k, 2381 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 2382 if (!bkey_is_inode(k.k)) 2383 continue; 2384 2385 if (bch2_inode_flags(k) & BCH_INODE_unlinked) 2386 continue; 2387 2388 check_path(trans, &path, k); 2389 }))); 2390 darray_exit(&path); 2391 2392 bch_err_fn(c, ret); 2393 return ret; 2394 } 2395 2396 struct nlink_table { 2397 size_t nr; 2398 size_t size; 2399 2400 struct nlink { 2401 u64 inum; 2402 u32 snapshot; 2403 u32 count; 2404 } *d; 2405 }; 2406 2407 static int add_nlink(struct bch_fs *c, struct nlink_table *t, 2408 u64 inum, u32 snapshot) 2409 { 2410 if (t->nr == t->size) { 2411 size_t new_size = max_t(size_t, 128UL, t->size * 2); 2412 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); 2413 2414 if (!d) { 2415 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", 2416 new_size); 2417 return -BCH_ERR_ENOMEM_fsck_add_nlink; 2418 } 2419 2420 if (t->d) 2421 memcpy(d, t->d, t->size * sizeof(t->d[0])); 2422 kvfree(t->d); 2423 2424 t->d = d; 2425 t->size = new_size; 2426 } 2427 2428 2429 t->d[t->nr++] = (struct nlink) { 2430 .inum = inum, 2431 .snapshot = snapshot, 2432 }; 2433 2434 return 0; 2435 } 2436 2437 static int nlink_cmp(const void *_l, const void *_r) 2438 { 2439 const struct nlink *l = _l; 2440 const struct nlink *r = _r; 2441 2442 return cmp_int(l->inum, r->inum); 2443 } 2444 2445 static void inc_link(struct bch_fs *c, struct snapshots_seen *s, 2446 struct nlink_table *links, 2447 u64 range_start, u64 range_end, u64 inum, u32 snapshot) 2448 { 2449 struct nlink *link, key = { 2450 .inum = inum, .snapshot = U32_MAX, 2451 }; 2452 2453 if (inum < range_start || inum >= range_end) 2454 return; 2455 2456 link = __inline_bsearch(&key, links->d, links->nr, 2457 sizeof(links->d[0]), nlink_cmp); 2458 if (!link) 2459 return; 2460 2461 while (link > links->d && link[0].inum == link[-1].inum) 2462 --link; 2463 2464 for (; link < links->d + links->nr && link->inum == inum; link++) 2465 if (ref_visible(c, s, snapshot, link->snapshot)) { 2466 link->count++; 2467 if (link->snapshot >= snapshot) 2468 break; 2469 } 2470 } 2471 2472 noinline_for_stack 2473 static int check_nlinks_find_hardlinks(struct bch_fs *c, 2474 struct nlink_table *t, 2475 u64 start, u64 *end) 2476 { 2477 int ret = bch2_trans_run(c, 2478 for_each_btree_key(trans, iter, BTREE_ID_inodes, 2479 POS(0, start), 2480 BTREE_ITER_INTENT| 2481 BTREE_ITER_PREFETCH| 2482 BTREE_ITER_ALL_SNAPSHOTS, k, ({ 2483 if (!bkey_is_inode(k.k)) 2484 continue; 2485 2486 /* Should never fail, checked by bch2_inode_invalid: */ 2487 struct bch_inode_unpacked u; 2488 BUG_ON(bch2_inode_unpack(k, &u)); 2489 2490 /* 2491 * Backpointer and directory structure checks are sufficient for 2492 * directories, since they can't have hardlinks: 2493 */ 2494 if (S_ISDIR(u.bi_mode)) 2495 continue; 2496 2497 if (!u.bi_nlink) 2498 continue; 2499 2500 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); 2501 if (ret) { 2502 *end = k.k->p.offset; 2503 ret = 0; 2504 break; 2505 } 2506 0; 2507 }))); 2508 2509 bch_err_fn(c, ret); 2510 return ret; 2511 } 2512 2513 noinline_for_stack 2514 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, 2515 u64 range_start, u64 range_end) 2516 { 2517 struct snapshots_seen s; 2518 2519 snapshots_seen_init(&s); 2520 2521 int ret = bch2_trans_run(c, 2522 for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, 2523 BTREE_ITER_INTENT| 2524 BTREE_ITER_PREFETCH| 2525 BTREE_ITER_ALL_SNAPSHOTS, k, ({ 2526 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); 2527 if (ret) 2528 break; 2529 2530 if (k.k->type == KEY_TYPE_dirent) { 2531 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2532 2533 if (d.v->d_type != DT_DIR && 2534 d.v->d_type != DT_SUBVOL) 2535 inc_link(c, &s, links, range_start, range_end, 2536 le64_to_cpu(d.v->d_inum), 2537 bch2_snapshot_equiv(c, d.k->p.snapshot)); 2538 } 2539 0; 2540 }))); 2541 2542 snapshots_seen_exit(&s); 2543 2544 bch_err_fn(c, ret); 2545 return ret; 2546 } 2547 2548 static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, 2549 struct bkey_s_c k, 2550 struct nlink_table *links, 2551 size_t *idx, u64 range_end) 2552 { 2553 struct bch_fs *c = trans->c; 2554 struct bch_inode_unpacked u; 2555 struct nlink *link = &links->d[*idx]; 2556 int ret = 0; 2557 2558 if (k.k->p.offset >= range_end) 2559 return 1; 2560 2561 if (!bkey_is_inode(k.k)) 2562 return 0; 2563 2564 BUG_ON(bch2_inode_unpack(k, &u)); 2565 2566 if (S_ISDIR(u.bi_mode)) 2567 return 0; 2568 2569 if (!u.bi_nlink) 2570 return 0; 2571 2572 while ((cmp_int(link->inum, k.k->p.offset) ?: 2573 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { 2574 BUG_ON(*idx == links->nr); 2575 link = &links->d[++*idx]; 2576 } 2577 2578 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, 2579 c, inode_wrong_nlink, 2580 "inode %llu type %s has wrong i_nlink (%u, should be %u)", 2581 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], 2582 bch2_inode_nlink_get(&u), link->count)) { 2583 bch2_inode_nlink_set(&u, link->count); 2584 ret = __bch2_fsck_write_inode(trans, &u, k.k->p.snapshot); 2585 } 2586 fsck_err: 2587 return ret; 2588 } 2589 2590 noinline_for_stack 2591 static int check_nlinks_update_hardlinks(struct bch_fs *c, 2592 struct nlink_table *links, 2593 u64 range_start, u64 range_end) 2594 { 2595 size_t idx = 0; 2596 2597 int ret = bch2_trans_run(c, 2598 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 2599 POS(0, range_start), 2600 BTREE_ITER_INTENT|BTREE_ITER_PREFETCH|BTREE_ITER_ALL_SNAPSHOTS, k, 2601 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2602 check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); 2603 if (ret < 0) { 2604 bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); 2605 return ret; 2606 } 2607 2608 return 0; 2609 } 2610 2611 int bch2_check_nlinks(struct bch_fs *c) 2612 { 2613 struct nlink_table links = { 0 }; 2614 u64 this_iter_range_start, next_iter_range_start = 0; 2615 int ret = 0; 2616 2617 do { 2618 this_iter_range_start = next_iter_range_start; 2619 next_iter_range_start = U64_MAX; 2620 2621 ret = check_nlinks_find_hardlinks(c, &links, 2622 this_iter_range_start, 2623 &next_iter_range_start); 2624 2625 ret = check_nlinks_walk_dirents(c, &links, 2626 this_iter_range_start, 2627 next_iter_range_start); 2628 if (ret) 2629 break; 2630 2631 ret = check_nlinks_update_hardlinks(c, &links, 2632 this_iter_range_start, 2633 next_iter_range_start); 2634 if (ret) 2635 break; 2636 2637 links.nr = 0; 2638 } while (next_iter_range_start != U64_MAX); 2639 2640 kvfree(links.d); 2641 bch_err_fn(c, ret); 2642 return ret; 2643 } 2644 2645 static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, 2646 struct bkey_s_c k) 2647 { 2648 struct bkey_s_c_reflink_p p; 2649 struct bkey_i_reflink_p *u; 2650 2651 if (k.k->type != KEY_TYPE_reflink_p) 2652 return 0; 2653 2654 p = bkey_s_c_to_reflink_p(k); 2655 2656 if (!p.v->front_pad && !p.v->back_pad) 2657 return 0; 2658 2659 u = bch2_trans_kmalloc(trans, sizeof(*u)); 2660 int ret = PTR_ERR_OR_ZERO(u); 2661 if (ret) 2662 return ret; 2663 2664 bkey_reassemble(&u->k_i, k); 2665 u->v.front_pad = 0; 2666 u->v.back_pad = 0; 2667 2668 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_NORUN); 2669 } 2670 2671 int bch2_fix_reflink_p(struct bch_fs *c) 2672 { 2673 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) 2674 return 0; 2675 2676 int ret = bch2_trans_run(c, 2677 for_each_btree_key_commit(trans, iter, 2678 BTREE_ID_extents, POS_MIN, 2679 BTREE_ITER_INTENT|BTREE_ITER_PREFETCH| 2680 BTREE_ITER_ALL_SNAPSHOTS, k, 2681 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2682 fix_reflink_p_key(trans, &iter, k))); 2683 bch_err_fn(c, ret); 2684 return ret; 2685 } 2686