1 // SPDX-License-Identifier: GPL-2.0 2 3 #include "bcachefs.h" 4 #include "bcachefs_ioctl.h" 5 #include "bkey_buf.h" 6 #include "btree_cache.h" 7 #include "btree_update.h" 8 #include "buckets.h" 9 #include "darray.h" 10 #include "dirent.h" 11 #include "error.h" 12 #include "fs.h" 13 #include "fs-common.h" 14 #include "fsck.h" 15 #include "inode.h" 16 #include "keylist.h" 17 #include "recovery_passes.h" 18 #include "snapshot.h" 19 #include "super.h" 20 #include "thread_with_file.h" 21 #include "xattr.h" 22 23 #include <linux/bsearch.h> 24 #include <linux/dcache.h> /* struct qstr */ 25 26 static bool inode_points_to_dirent(struct bch_inode_unpacked *inode, 27 struct bkey_s_c_dirent d) 28 { 29 return inode->bi_dir == d.k->p.inode && 30 inode->bi_dir_offset == d.k->p.offset; 31 } 32 33 static int dirent_points_to_inode_nowarn(struct bkey_s_c_dirent d, 34 struct bch_inode_unpacked *inode) 35 { 36 if (d.v->d_type == DT_SUBVOL 37 ? le32_to_cpu(d.v->d_child_subvol) == inode->bi_subvol 38 : le64_to_cpu(d.v->d_inum) == inode->bi_inum) 39 return 0; 40 return -BCH_ERR_ENOENT_dirent_doesnt_match_inode; 41 } 42 43 static void dirent_inode_mismatch_msg(struct printbuf *out, 44 struct bch_fs *c, 45 struct bkey_s_c_dirent dirent, 46 struct bch_inode_unpacked *inode) 47 { 48 prt_str(out, "inode points to dirent that does not point back:"); 49 prt_newline(out); 50 bch2_bkey_val_to_text(out, c, dirent.s_c); 51 prt_newline(out); 52 bch2_inode_unpacked_to_text(out, inode); 53 } 54 55 static int dirent_points_to_inode(struct bch_fs *c, 56 struct bkey_s_c_dirent dirent, 57 struct bch_inode_unpacked *inode) 58 { 59 int ret = dirent_points_to_inode_nowarn(dirent, inode); 60 if (ret) { 61 struct printbuf buf = PRINTBUF; 62 dirent_inode_mismatch_msg(&buf, c, dirent, inode); 63 bch_warn(c, "%s", buf.buf); 64 printbuf_exit(&buf); 65 } 66 return ret; 67 } 68 69 /* 70 * XXX: this is handling transaction restarts without returning 71 * -BCH_ERR_transaction_restart_nested, this is not how we do things anymore: 72 */ 73 static s64 bch2_count_inode_sectors(struct btree_trans *trans, u64 inum, 74 u32 snapshot) 75 { 76 u64 sectors = 0; 77 78 int ret = for_each_btree_key_max(trans, iter, BTREE_ID_extents, 79 SPOS(inum, 0, snapshot), 80 POS(inum, U64_MAX), 81 0, k, ({ 82 if (bkey_extent_is_allocation(k.k)) 83 sectors += k.k->size; 84 0; 85 })); 86 87 return ret ?: sectors; 88 } 89 90 static s64 bch2_count_subdirs(struct btree_trans *trans, u64 inum, 91 u32 snapshot) 92 { 93 u64 subdirs = 0; 94 95 int ret = for_each_btree_key_max(trans, iter, BTREE_ID_dirents, 96 SPOS(inum, 0, snapshot), 97 POS(inum, U64_MAX), 98 0, k, ({ 99 if (k.k->type == KEY_TYPE_dirent && 100 bkey_s_c_to_dirent(k).v->d_type == DT_DIR) 101 subdirs++; 102 0; 103 })); 104 105 return ret ?: subdirs; 106 } 107 108 static int subvol_lookup(struct btree_trans *trans, u32 subvol, 109 u32 *snapshot, u64 *inum) 110 { 111 struct bch_subvolume s; 112 int ret = bch2_subvolume_get(trans, subvol, false, &s); 113 114 *snapshot = le32_to_cpu(s.snapshot); 115 *inum = le64_to_cpu(s.inode); 116 return ret; 117 } 118 119 static int lookup_first_inode(struct btree_trans *trans, u64 inode_nr, 120 struct bch_inode_unpacked *inode) 121 { 122 struct btree_iter iter; 123 struct bkey_s_c k; 124 int ret; 125 126 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inode_nr), 127 BTREE_ITER_all_snapshots, k, ret) { 128 if (k.k->p.offset != inode_nr) 129 break; 130 if (!bkey_is_inode(k.k)) 131 continue; 132 ret = bch2_inode_unpack(k, inode); 133 goto found; 134 } 135 ret = -BCH_ERR_ENOENT_inode; 136 found: 137 bch_err_msg(trans->c, ret, "fetching inode %llu", inode_nr); 138 bch2_trans_iter_exit(trans, &iter); 139 return ret; 140 } 141 142 static int lookup_inode(struct btree_trans *trans, u64 inode_nr, u32 snapshot, 143 struct bch_inode_unpacked *inode) 144 { 145 struct btree_iter iter; 146 struct bkey_s_c k; 147 int ret; 148 149 k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 150 SPOS(0, inode_nr, snapshot), 0); 151 ret = bkey_err(k); 152 if (ret) 153 goto err; 154 155 ret = bkey_is_inode(k.k) 156 ? bch2_inode_unpack(k, inode) 157 : -BCH_ERR_ENOENT_inode; 158 err: 159 bch2_trans_iter_exit(trans, &iter); 160 return ret; 161 } 162 163 static int lookup_dirent_in_snapshot(struct btree_trans *trans, 164 struct bch_hash_info hash_info, 165 subvol_inum dir, struct qstr *name, 166 u64 *target, unsigned *type, u32 snapshot) 167 { 168 struct btree_iter iter; 169 struct bkey_s_c k = bch2_hash_lookup_in_snapshot(trans, &iter, bch2_dirent_hash_desc, 170 &hash_info, dir, name, 0, snapshot); 171 int ret = bkey_err(k); 172 if (ret) 173 return ret; 174 175 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 176 *target = le64_to_cpu(d.v->d_inum); 177 *type = d.v->d_type; 178 bch2_trans_iter_exit(trans, &iter); 179 return 0; 180 } 181 182 static int __remove_dirent(struct btree_trans *trans, struct bpos pos) 183 { 184 struct bch_fs *c = trans->c; 185 struct btree_iter iter; 186 struct bch_inode_unpacked dir_inode; 187 struct bch_hash_info dir_hash_info; 188 int ret; 189 190 ret = lookup_first_inode(trans, pos.inode, &dir_inode); 191 if (ret) 192 goto err; 193 194 dir_hash_info = bch2_hash_info_init(c, &dir_inode); 195 196 bch2_trans_iter_init(trans, &iter, BTREE_ID_dirents, pos, BTREE_ITER_intent); 197 198 ret = bch2_btree_iter_traverse(&iter) ?: 199 bch2_hash_delete_at(trans, bch2_dirent_hash_desc, 200 &dir_hash_info, &iter, 201 BTREE_UPDATE_internal_snapshot_node); 202 bch2_trans_iter_exit(trans, &iter); 203 err: 204 bch_err_fn(c, ret); 205 return ret; 206 } 207 208 /* 209 * Find any subvolume associated with a tree of snapshots 210 * We can't rely on master_subvol - it might have been deleted. 211 */ 212 static int find_snapshot_tree_subvol(struct btree_trans *trans, 213 u32 tree_id, u32 *subvol) 214 { 215 struct btree_iter iter; 216 struct bkey_s_c k; 217 int ret; 218 219 for_each_btree_key_norestart(trans, iter, BTREE_ID_snapshots, POS_MIN, 0, k, ret) { 220 if (k.k->type != KEY_TYPE_snapshot) 221 continue; 222 223 struct bkey_s_c_snapshot s = bkey_s_c_to_snapshot(k); 224 if (le32_to_cpu(s.v->tree) != tree_id) 225 continue; 226 227 if (s.v->subvol) { 228 *subvol = le32_to_cpu(s.v->subvol); 229 goto found; 230 } 231 } 232 ret = -BCH_ERR_ENOENT_no_snapshot_tree_subvol; 233 found: 234 bch2_trans_iter_exit(trans, &iter); 235 return ret; 236 } 237 238 /* Get lost+found, create if it doesn't exist: */ 239 static int lookup_lostfound(struct btree_trans *trans, u32 snapshot, 240 struct bch_inode_unpacked *lostfound, 241 u64 reattaching_inum) 242 { 243 struct bch_fs *c = trans->c; 244 struct qstr lostfound_str = QSTR("lost+found"); 245 struct btree_iter lostfound_iter = { NULL }; 246 u64 inum = 0; 247 unsigned d_type = 0; 248 int ret; 249 250 struct bch_snapshot_tree st; 251 ret = bch2_snapshot_tree_lookup(trans, 252 bch2_snapshot_tree(c, snapshot), &st); 253 if (ret) 254 return ret; 255 256 u32 subvolid; 257 ret = find_snapshot_tree_subvol(trans, 258 bch2_snapshot_tree(c, snapshot), &subvolid); 259 bch_err_msg(c, ret, "finding subvol associated with snapshot tree %u", 260 bch2_snapshot_tree(c, snapshot)); 261 if (ret) 262 return ret; 263 264 struct bch_subvolume subvol; 265 ret = bch2_subvolume_get(trans, subvolid, false, &subvol); 266 bch_err_msg(c, ret, "looking up subvol %u for snapshot %u", subvolid, snapshot); 267 if (ret) 268 return ret; 269 270 if (!subvol.inode) { 271 struct btree_iter iter; 272 struct bkey_i_subvolume *subvol = bch2_bkey_get_mut_typed(trans, &iter, 273 BTREE_ID_subvolumes, POS(0, subvolid), 274 0, subvolume); 275 ret = PTR_ERR_OR_ZERO(subvol); 276 if (ret) 277 return ret; 278 279 subvol->v.inode = cpu_to_le64(reattaching_inum); 280 bch2_trans_iter_exit(trans, &iter); 281 } 282 283 subvol_inum root_inum = { 284 .subvol = subvolid, 285 .inum = le64_to_cpu(subvol.inode) 286 }; 287 288 struct bch_inode_unpacked root_inode; 289 struct bch_hash_info root_hash_info; 290 ret = lookup_inode(trans, root_inum.inum, snapshot, &root_inode); 291 bch_err_msg(c, ret, "looking up root inode %llu for subvol %u", 292 root_inum.inum, subvolid); 293 if (ret) 294 return ret; 295 296 root_hash_info = bch2_hash_info_init(c, &root_inode); 297 298 ret = lookup_dirent_in_snapshot(trans, root_hash_info, root_inum, 299 &lostfound_str, &inum, &d_type, snapshot); 300 if (bch2_err_matches(ret, ENOENT)) 301 goto create_lostfound; 302 303 bch_err_fn(c, ret); 304 if (ret) 305 return ret; 306 307 if (d_type != DT_DIR) { 308 bch_err(c, "error looking up lost+found: not a directory"); 309 return -BCH_ERR_ENOENT_not_directory; 310 } 311 312 /* 313 * The bch2_check_dirents pass has already run, dangling dirents 314 * shouldn't exist here: 315 */ 316 ret = lookup_inode(trans, inum, snapshot, lostfound); 317 bch_err_msg(c, ret, "looking up lost+found %llu:%u in (root inode %llu, snapshot root %u)", 318 inum, snapshot, root_inum.inum, bch2_snapshot_root(c, snapshot)); 319 return ret; 320 321 create_lostfound: 322 /* 323 * we always create lost+found in the root snapshot; we don't want 324 * different branches of the snapshot tree to have different lost+found 325 */ 326 snapshot = le32_to_cpu(st.root_snapshot); 327 /* 328 * XXX: we could have a nicer log message here if we had a nice way to 329 * walk backpointers to print a path 330 */ 331 struct printbuf path = PRINTBUF; 332 ret = bch2_inum_to_path(trans, root_inum, &path); 333 if (ret) 334 goto err; 335 336 bch_notice(c, "creating %s/lost+found in subvol %llu snapshot %u", 337 path.buf, root_inum.subvol, snapshot); 338 printbuf_exit(&path); 339 340 u64 now = bch2_current_time(c); 341 u64 cpu = raw_smp_processor_id(); 342 343 bch2_inode_init_early(c, lostfound); 344 bch2_inode_init_late(lostfound, now, 0, 0, S_IFDIR|0700, 0, &root_inode); 345 lostfound->bi_dir = root_inode.bi_inum; 346 lostfound->bi_snapshot = le32_to_cpu(st.root_snapshot); 347 348 root_inode.bi_nlink++; 349 350 ret = bch2_inode_create(trans, &lostfound_iter, lostfound, snapshot, cpu); 351 if (ret) 352 goto err; 353 354 bch2_btree_iter_set_snapshot(&lostfound_iter, snapshot); 355 ret = bch2_btree_iter_traverse(&lostfound_iter); 356 if (ret) 357 goto err; 358 359 ret = bch2_dirent_create_snapshot(trans, 360 0, root_inode.bi_inum, snapshot, &root_hash_info, 361 mode_to_type(lostfound->bi_mode), 362 &lostfound_str, 363 lostfound->bi_inum, 364 &lostfound->bi_dir_offset, 365 STR_HASH_must_create) ?: 366 bch2_inode_write_flags(trans, &lostfound_iter, lostfound, 367 BTREE_UPDATE_internal_snapshot_node); 368 err: 369 bch_err_msg(c, ret, "creating lost+found"); 370 bch2_trans_iter_exit(trans, &lostfound_iter); 371 return ret; 372 } 373 374 static inline bool inode_should_reattach(struct bch_inode_unpacked *inode) 375 { 376 if (inode->bi_inum == BCACHEFS_ROOT_INO && 377 inode->bi_subvol == BCACHEFS_ROOT_SUBVOL) 378 return false; 379 380 return !inode->bi_dir && !(inode->bi_flags & BCH_INODE_unlinked); 381 } 382 383 static int maybe_delete_dirent(struct btree_trans *trans, struct bpos d_pos, u32 snapshot) 384 { 385 struct btree_iter iter; 386 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_dirents, 387 SPOS(d_pos.inode, d_pos.offset, snapshot), 388 BTREE_ITER_intent| 389 BTREE_ITER_with_updates); 390 int ret = bkey_err(k); 391 if (ret) 392 return ret; 393 394 if (bpos_eq(k.k->p, d_pos)) { 395 /* 396 * delet_at() doesn't work because the update path doesn't 397 * internally use BTREE_ITER_with_updates yet 398 */ 399 struct bkey_i *k = bch2_trans_kmalloc(trans, sizeof(*k)); 400 ret = PTR_ERR_OR_ZERO(k); 401 if (ret) 402 goto err; 403 404 bkey_init(&k->k); 405 k->k.type = KEY_TYPE_whiteout; 406 k->k.p = iter.pos; 407 ret = bch2_trans_update(trans, &iter, k, BTREE_UPDATE_internal_snapshot_node); 408 } 409 err: 410 bch2_trans_iter_exit(trans, &iter); 411 return ret; 412 } 413 414 static int reattach_inode(struct btree_trans *trans, struct bch_inode_unpacked *inode) 415 { 416 struct bch_fs *c = trans->c; 417 struct bch_inode_unpacked lostfound; 418 char name_buf[20]; 419 int ret; 420 421 u32 dirent_snapshot = inode->bi_snapshot; 422 if (inode->bi_subvol) { 423 inode->bi_parent_subvol = BCACHEFS_ROOT_SUBVOL; 424 425 u64 root_inum; 426 ret = subvol_lookup(trans, inode->bi_parent_subvol, 427 &dirent_snapshot, &root_inum); 428 if (ret) 429 return ret; 430 431 snprintf(name_buf, sizeof(name_buf), "subvol-%u", inode->bi_subvol); 432 } else { 433 snprintf(name_buf, sizeof(name_buf), "%llu", inode->bi_inum); 434 } 435 436 ret = lookup_lostfound(trans, dirent_snapshot, &lostfound, inode->bi_inum); 437 if (ret) 438 return ret; 439 440 lostfound.bi_nlink += S_ISDIR(inode->bi_mode); 441 442 /* ensure lost+found inode is also present in inode snapshot */ 443 if (!inode->bi_subvol) { 444 BUG_ON(!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, lostfound.bi_snapshot)); 445 lostfound.bi_snapshot = inode->bi_snapshot; 446 } 447 448 ret = __bch2_fsck_write_inode(trans, &lostfound); 449 if (ret) 450 return ret; 451 452 struct bch_hash_info dir_hash = bch2_hash_info_init(c, &lostfound); 453 struct qstr name = QSTR(name_buf); 454 455 inode->bi_dir = lostfound.bi_inum; 456 457 ret = bch2_dirent_create_snapshot(trans, 458 inode->bi_parent_subvol, lostfound.bi_inum, 459 dirent_snapshot, 460 &dir_hash, 461 inode_d_type(inode), 462 &name, 463 inode->bi_subvol ?: inode->bi_inum, 464 &inode->bi_dir_offset, 465 STR_HASH_must_create); 466 if (ret) { 467 bch_err_msg(c, ret, "error creating dirent"); 468 return ret; 469 } 470 471 ret = __bch2_fsck_write_inode(trans, inode); 472 if (ret) 473 return ret; 474 475 /* 476 * Fix up inodes in child snapshots: if they should also be reattached 477 * update the backpointer field, if they should not be we need to emit 478 * whiteouts for the dirent we just created. 479 */ 480 if (!inode->bi_subvol && bch2_snapshot_is_leaf(c, inode->bi_snapshot) <= 0) { 481 snapshot_id_list whiteouts_done; 482 struct btree_iter iter; 483 struct bkey_s_c k; 484 485 darray_init(&whiteouts_done); 486 487 for_each_btree_key_reverse_norestart(trans, iter, 488 BTREE_ID_inodes, SPOS(0, inode->bi_inum, inode->bi_snapshot - 1), 489 BTREE_ITER_all_snapshots|BTREE_ITER_intent, k, ret) { 490 if (k.k->p.offset != inode->bi_inum) 491 break; 492 493 if (!bkey_is_inode(k.k) || 494 !bch2_snapshot_is_ancestor(c, k.k->p.snapshot, inode->bi_snapshot) || 495 snapshot_list_has_ancestor(c, &whiteouts_done, k.k->p.snapshot)) 496 continue; 497 498 struct bch_inode_unpacked child_inode; 499 ret = bch2_inode_unpack(k, &child_inode); 500 if (ret) 501 break; 502 503 if (!inode_should_reattach(&child_inode)) { 504 ret = maybe_delete_dirent(trans, 505 SPOS(lostfound.bi_inum, inode->bi_dir_offset, 506 dirent_snapshot), 507 k.k->p.snapshot); 508 if (ret) 509 break; 510 511 ret = snapshot_list_add(c, &whiteouts_done, k.k->p.snapshot); 512 if (ret) 513 break; 514 } else { 515 iter.snapshot = k.k->p.snapshot; 516 child_inode.bi_dir = inode->bi_dir; 517 child_inode.bi_dir_offset = inode->bi_dir_offset; 518 519 ret = bch2_inode_write_flags(trans, &iter, &child_inode, 520 BTREE_UPDATE_internal_snapshot_node); 521 if (ret) 522 break; 523 } 524 } 525 darray_exit(&whiteouts_done); 526 bch2_trans_iter_exit(trans, &iter); 527 } 528 529 return ret; 530 } 531 532 static struct bkey_s_c_dirent dirent_get_by_pos(struct btree_trans *trans, 533 struct btree_iter *iter, 534 struct bpos pos) 535 { 536 return bch2_bkey_get_iter_typed(trans, iter, BTREE_ID_dirents, pos, 0, dirent); 537 } 538 539 static int remove_backpointer(struct btree_trans *trans, 540 struct bch_inode_unpacked *inode) 541 { 542 if (!inode->bi_dir) 543 return 0; 544 545 struct bch_fs *c = trans->c; 546 struct btree_iter iter; 547 struct bkey_s_c_dirent d = dirent_get_by_pos(trans, &iter, 548 SPOS(inode->bi_dir, inode->bi_dir_offset, inode->bi_snapshot)); 549 int ret = bkey_err(d) ?: 550 dirent_points_to_inode(c, d, inode) ?: 551 __remove_dirent(trans, d.k->p); 552 bch2_trans_iter_exit(trans, &iter); 553 return ret; 554 } 555 556 static int reattach_subvol(struct btree_trans *trans, struct bkey_s_c_subvolume s) 557 { 558 struct bch_fs *c = trans->c; 559 560 struct bch_inode_unpacked inode; 561 int ret = bch2_inode_find_by_inum_trans(trans, 562 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 563 &inode); 564 if (ret) 565 return ret; 566 567 ret = remove_backpointer(trans, &inode); 568 if (!bch2_err_matches(ret, ENOENT)) 569 bch_err_msg(c, ret, "removing dirent"); 570 if (ret) 571 return ret; 572 573 ret = reattach_inode(trans, &inode); 574 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 575 return ret; 576 } 577 578 static int reconstruct_subvol(struct btree_trans *trans, u32 snapshotid, u32 subvolid, u64 inum) 579 { 580 struct bch_fs *c = trans->c; 581 582 if (!bch2_snapshot_is_leaf(c, snapshotid)) { 583 bch_err(c, "need to reconstruct subvol, but have interior node snapshot"); 584 return -BCH_ERR_fsck_repair_unimplemented; 585 } 586 587 /* 588 * If inum isn't set, that means we're being called from check_dirents, 589 * not check_inodes - the root of this subvolume doesn't exist or we 590 * would have found it there: 591 */ 592 if (!inum) { 593 struct btree_iter inode_iter = {}; 594 struct bch_inode_unpacked new_inode; 595 u64 cpu = raw_smp_processor_id(); 596 597 bch2_inode_init_early(c, &new_inode); 598 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, S_IFDIR|0755, 0, NULL); 599 600 new_inode.bi_subvol = subvolid; 601 602 int ret = bch2_inode_create(trans, &inode_iter, &new_inode, snapshotid, cpu) ?: 603 bch2_btree_iter_traverse(&inode_iter) ?: 604 bch2_inode_write(trans, &inode_iter, &new_inode); 605 bch2_trans_iter_exit(trans, &inode_iter); 606 if (ret) 607 return ret; 608 609 inum = new_inode.bi_inum; 610 } 611 612 bch_info(c, "reconstructing subvol %u with root inode %llu", subvolid, inum); 613 614 struct bkey_i_subvolume *new_subvol = bch2_trans_kmalloc(trans, sizeof(*new_subvol)); 615 int ret = PTR_ERR_OR_ZERO(new_subvol); 616 if (ret) 617 return ret; 618 619 bkey_subvolume_init(&new_subvol->k_i); 620 new_subvol->k.p.offset = subvolid; 621 new_subvol->v.snapshot = cpu_to_le32(snapshotid); 622 new_subvol->v.inode = cpu_to_le64(inum); 623 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &new_subvol->k_i, 0); 624 if (ret) 625 return ret; 626 627 struct btree_iter iter; 628 struct bkey_i_snapshot *s = bch2_bkey_get_mut_typed(trans, &iter, 629 BTREE_ID_snapshots, POS(0, snapshotid), 630 0, snapshot); 631 ret = PTR_ERR_OR_ZERO(s); 632 bch_err_msg(c, ret, "getting snapshot %u", snapshotid); 633 if (ret) 634 return ret; 635 636 u32 snapshot_tree = le32_to_cpu(s->v.tree); 637 638 s->v.subvol = cpu_to_le32(subvolid); 639 SET_BCH_SNAPSHOT_SUBVOL(&s->v, true); 640 bch2_trans_iter_exit(trans, &iter); 641 642 struct bkey_i_snapshot_tree *st = bch2_bkey_get_mut_typed(trans, &iter, 643 BTREE_ID_snapshot_trees, POS(0, snapshot_tree), 644 0, snapshot_tree); 645 ret = PTR_ERR_OR_ZERO(st); 646 bch_err_msg(c, ret, "getting snapshot tree %u", snapshot_tree); 647 if (ret) 648 return ret; 649 650 if (!st->v.master_subvol) 651 st->v.master_subvol = cpu_to_le32(subvolid); 652 653 bch2_trans_iter_exit(trans, &iter); 654 return 0; 655 } 656 657 static int reconstruct_inode(struct btree_trans *trans, enum btree_id btree, u32 snapshot, u64 inum) 658 { 659 struct bch_fs *c = trans->c; 660 unsigned i_mode = S_IFREG; 661 u64 i_size = 0; 662 663 switch (btree) { 664 case BTREE_ID_extents: { 665 struct btree_iter iter = {}; 666 667 bch2_trans_iter_init(trans, &iter, BTREE_ID_extents, SPOS(inum, U64_MAX, snapshot), 0); 668 struct bkey_s_c k = bch2_btree_iter_peek_prev_min(&iter, POS(inum, 0)); 669 bch2_trans_iter_exit(trans, &iter); 670 int ret = bkey_err(k); 671 if (ret) 672 return ret; 673 674 i_size = k.k->p.offset << 9; 675 break; 676 } 677 case BTREE_ID_dirents: 678 i_mode = S_IFDIR; 679 break; 680 case BTREE_ID_xattrs: 681 break; 682 default: 683 BUG(); 684 } 685 686 struct bch_inode_unpacked new_inode; 687 bch2_inode_init_early(c, &new_inode); 688 bch2_inode_init_late(&new_inode, bch2_current_time(c), 0, 0, i_mode|0600, 0, NULL); 689 new_inode.bi_size = i_size; 690 new_inode.bi_inum = inum; 691 new_inode.bi_snapshot = snapshot; 692 693 return __bch2_fsck_write_inode(trans, &new_inode); 694 } 695 696 struct snapshots_seen { 697 struct bpos pos; 698 snapshot_id_list ids; 699 }; 700 701 static inline void snapshots_seen_exit(struct snapshots_seen *s) 702 { 703 darray_exit(&s->ids); 704 } 705 706 static inline void snapshots_seen_init(struct snapshots_seen *s) 707 { 708 memset(s, 0, sizeof(*s)); 709 } 710 711 static int snapshots_seen_add_inorder(struct bch_fs *c, struct snapshots_seen *s, u32 id) 712 { 713 u32 *i; 714 __darray_for_each(s->ids, i) { 715 if (*i == id) 716 return 0; 717 if (*i > id) 718 break; 719 } 720 721 int ret = darray_insert_item(&s->ids, i - s->ids.data, id); 722 if (ret) 723 bch_err(c, "error reallocating snapshots_seen table (size %zu)", 724 s->ids.size); 725 return ret; 726 } 727 728 static int snapshots_seen_update(struct bch_fs *c, struct snapshots_seen *s, 729 enum btree_id btree_id, struct bpos pos) 730 { 731 if (!bkey_eq(s->pos, pos)) 732 s->ids.nr = 0; 733 s->pos = pos; 734 735 return snapshot_list_add_nodup(c, &s->ids, pos.snapshot); 736 } 737 738 /** 739 * key_visible_in_snapshot - returns true if @id is a descendent of @ancestor, 740 * and @ancestor hasn't been overwritten in @seen 741 * 742 * @c: filesystem handle 743 * @seen: list of snapshot ids already seen at current position 744 * @id: descendent snapshot id 745 * @ancestor: ancestor snapshot id 746 * 747 * Returns: whether key in @ancestor snapshot is visible in @id snapshot 748 */ 749 static bool key_visible_in_snapshot(struct bch_fs *c, struct snapshots_seen *seen, 750 u32 id, u32 ancestor) 751 { 752 ssize_t i; 753 754 EBUG_ON(id > ancestor); 755 756 /* @ancestor should be the snapshot most recently added to @seen */ 757 EBUG_ON(ancestor != seen->pos.snapshot); 758 EBUG_ON(ancestor != darray_last(seen->ids)); 759 760 if (id == ancestor) 761 return true; 762 763 if (!bch2_snapshot_is_ancestor(c, id, ancestor)) 764 return false; 765 766 /* 767 * We know that @id is a descendant of @ancestor, we're checking if 768 * we've seen a key that overwrote @ancestor - i.e. also a descendent of 769 * @ascestor and with @id as a descendent. 770 * 771 * But we already know that we're scanning IDs between @id and @ancestor 772 * numerically, since snapshot ID lists are kept sorted, so if we find 773 * an id that's an ancestor of @id we're done: 774 */ 775 776 for (i = seen->ids.nr - 2; 777 i >= 0 && seen->ids.data[i] >= id; 778 --i) 779 if (bch2_snapshot_is_ancestor(c, id, seen->ids.data[i])) 780 return false; 781 782 return true; 783 } 784 785 /** 786 * ref_visible - given a key with snapshot id @src that points to a key with 787 * snapshot id @dst, test whether there is some snapshot in which @dst is 788 * visible. 789 * 790 * @c: filesystem handle 791 * @s: list of snapshot IDs already seen at @src 792 * @src: snapshot ID of src key 793 * @dst: snapshot ID of dst key 794 * Returns: true if there is some snapshot in which @dst is visible 795 * 796 * Assumes we're visiting @src keys in natural key order 797 */ 798 static bool ref_visible(struct bch_fs *c, struct snapshots_seen *s, 799 u32 src, u32 dst) 800 { 801 return dst <= src 802 ? key_visible_in_snapshot(c, s, dst, src) 803 : bch2_snapshot_is_ancestor(c, src, dst); 804 } 805 806 static int ref_visible2(struct bch_fs *c, 807 u32 src, struct snapshots_seen *src_seen, 808 u32 dst, struct snapshots_seen *dst_seen) 809 { 810 if (dst > src) { 811 swap(dst, src); 812 swap(dst_seen, src_seen); 813 } 814 return key_visible_in_snapshot(c, src_seen, dst, src); 815 } 816 817 #define for_each_visible_inode(_c, _s, _w, _snapshot, _i) \ 818 for (_i = (_w)->inodes.data; _i < (_w)->inodes.data + (_w)->inodes.nr && \ 819 (_i)->snapshot <= (_snapshot); _i++) \ 820 if (key_visible_in_snapshot(_c, _s, _i->snapshot, _snapshot)) 821 822 struct inode_walker_entry { 823 struct bch_inode_unpacked inode; 824 u32 snapshot; 825 u64 count; 826 u64 i_size; 827 }; 828 829 struct inode_walker { 830 bool first_this_inode; 831 bool have_inodes; 832 bool recalculate_sums; 833 struct bpos last_pos; 834 835 DARRAY(struct inode_walker_entry) inodes; 836 snapshot_id_list deletes; 837 }; 838 839 static void inode_walker_exit(struct inode_walker *w) 840 { 841 darray_exit(&w->inodes); 842 darray_exit(&w->deletes); 843 } 844 845 static struct inode_walker inode_walker_init(void) 846 { 847 return (struct inode_walker) { 0, }; 848 } 849 850 static int add_inode(struct bch_fs *c, struct inode_walker *w, 851 struct bkey_s_c inode) 852 { 853 struct bch_inode_unpacked u; 854 855 return bch2_inode_unpack(inode, &u) ?: 856 darray_push(&w->inodes, ((struct inode_walker_entry) { 857 .inode = u, 858 .snapshot = inode.k->p.snapshot, 859 })); 860 } 861 862 static int get_inodes_all_snapshots(struct btree_trans *trans, 863 struct inode_walker *w, u64 inum) 864 { 865 struct bch_fs *c = trans->c; 866 struct btree_iter iter; 867 struct bkey_s_c k; 868 int ret; 869 870 /* 871 * We no longer have inodes for w->last_pos; clear this to avoid 872 * screwing up check_i_sectors/check_subdir_count if we take a 873 * transaction restart here: 874 */ 875 w->have_inodes = false; 876 w->recalculate_sums = false; 877 w->inodes.nr = 0; 878 879 for_each_btree_key_norestart(trans, iter, BTREE_ID_inodes, POS(0, inum), 880 BTREE_ITER_all_snapshots, k, ret) { 881 if (k.k->p.offset != inum) 882 break; 883 884 if (bkey_is_inode(k.k)) 885 add_inode(c, w, k); 886 } 887 bch2_trans_iter_exit(trans, &iter); 888 889 if (ret) 890 return ret; 891 892 w->first_this_inode = true; 893 w->have_inodes = true; 894 return 0; 895 } 896 897 static struct inode_walker_entry * 898 lookup_inode_for_snapshot(struct bch_fs *c, struct inode_walker *w, struct bkey_s_c k) 899 { 900 bool is_whiteout = k.k->type == KEY_TYPE_whiteout; 901 902 struct inode_walker_entry *i; 903 __darray_for_each(w->inodes, i) 904 if (bch2_snapshot_is_ancestor(c, k.k->p.snapshot, i->snapshot)) 905 goto found; 906 907 return NULL; 908 found: 909 BUG_ON(k.k->p.snapshot > i->snapshot); 910 911 if (k.k->p.snapshot != i->snapshot && !is_whiteout) { 912 struct inode_walker_entry new = *i; 913 914 new.snapshot = k.k->p.snapshot; 915 new.count = 0; 916 new.i_size = 0; 917 918 struct printbuf buf = PRINTBUF; 919 bch2_bkey_val_to_text(&buf, c, k); 920 921 bch_info(c, "have key for inode %llu:%u but have inode in ancestor snapshot %u\n" 922 "unexpected because we should always update the inode when we update a key in that inode\n" 923 "%s", 924 w->last_pos.inode, k.k->p.snapshot, i->snapshot, buf.buf); 925 printbuf_exit(&buf); 926 927 while (i > w->inodes.data && i[-1].snapshot > k.k->p.snapshot) 928 --i; 929 930 size_t pos = i - w->inodes.data; 931 int ret = darray_insert_item(&w->inodes, pos, new); 932 if (ret) 933 return ERR_PTR(ret); 934 935 i = w->inodes.data + pos; 936 } 937 938 return i; 939 } 940 941 static struct inode_walker_entry *walk_inode(struct btree_trans *trans, 942 struct inode_walker *w, 943 struct bkey_s_c k) 944 { 945 if (w->last_pos.inode != k.k->p.inode) { 946 int ret = get_inodes_all_snapshots(trans, w, k.k->p.inode); 947 if (ret) 948 return ERR_PTR(ret); 949 } 950 951 w->last_pos = k.k->p; 952 953 return lookup_inode_for_snapshot(trans->c, w, k); 954 } 955 956 static int get_visible_inodes(struct btree_trans *trans, 957 struct inode_walker *w, 958 struct snapshots_seen *s, 959 u64 inum) 960 { 961 struct bch_fs *c = trans->c; 962 struct btree_iter iter; 963 struct bkey_s_c k; 964 int ret; 965 966 w->inodes.nr = 0; 967 w->deletes.nr = 0; 968 969 for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, SPOS(0, inum, s->pos.snapshot), 970 BTREE_ITER_all_snapshots, k, ret) { 971 if (k.k->p.offset != inum) 972 break; 973 974 if (!ref_visible(c, s, s->pos.snapshot, k.k->p.snapshot)) 975 continue; 976 977 if (snapshot_list_has_ancestor(c, &w->deletes, k.k->p.snapshot)) 978 continue; 979 980 ret = bkey_is_inode(k.k) 981 ? add_inode(c, w, k) 982 : snapshot_list_add(c, &w->deletes, k.k->p.snapshot); 983 if (ret) 984 break; 985 } 986 bch2_trans_iter_exit(trans, &iter); 987 988 return ret; 989 } 990 991 /* 992 * Prefer to delete the first one, since that will be the one at the wrong 993 * offset: 994 * return value: 0 -> delete k1, 1 -> delete k2 995 */ 996 int bch2_fsck_update_backpointers(struct btree_trans *trans, 997 struct snapshots_seen *s, 998 const struct bch_hash_desc desc, 999 struct bch_hash_info *hash_info, 1000 struct bkey_i *new) 1001 { 1002 if (new->k.type != KEY_TYPE_dirent) 1003 return 0; 1004 1005 struct bkey_i_dirent *d = bkey_i_to_dirent(new); 1006 struct inode_walker target = inode_walker_init(); 1007 int ret = 0; 1008 1009 if (d->v.d_type == DT_SUBVOL) { 1010 BUG(); 1011 } else { 1012 ret = get_visible_inodes(trans, &target, s, le64_to_cpu(d->v.d_inum)); 1013 if (ret) 1014 goto err; 1015 1016 darray_for_each(target.inodes, i) { 1017 i->inode.bi_dir_offset = d->k.p.offset; 1018 ret = __bch2_fsck_write_inode(trans, &i->inode); 1019 if (ret) 1020 goto err; 1021 } 1022 } 1023 err: 1024 inode_walker_exit(&target); 1025 return ret; 1026 } 1027 1028 static struct bkey_s_c_dirent inode_get_dirent(struct btree_trans *trans, 1029 struct btree_iter *iter, 1030 struct bch_inode_unpacked *inode, 1031 u32 *snapshot) 1032 { 1033 if (inode->bi_subvol) { 1034 u64 inum; 1035 int ret = subvol_lookup(trans, inode->bi_parent_subvol, snapshot, &inum); 1036 if (ret) 1037 return ((struct bkey_s_c_dirent) { .k = ERR_PTR(ret) }); 1038 } 1039 1040 return dirent_get_by_pos(trans, iter, SPOS(inode->bi_dir, inode->bi_dir_offset, *snapshot)); 1041 } 1042 1043 static int check_inode_deleted_list(struct btree_trans *trans, struct bpos p) 1044 { 1045 struct btree_iter iter; 1046 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_deleted_inodes, p, 0); 1047 int ret = bkey_err(k) ?: k.k->type == KEY_TYPE_set; 1048 bch2_trans_iter_exit(trans, &iter); 1049 return ret; 1050 } 1051 1052 static int check_inode_dirent_inode(struct btree_trans *trans, 1053 struct bch_inode_unpacked *inode, 1054 bool *write_inode) 1055 { 1056 struct bch_fs *c = trans->c; 1057 struct printbuf buf = PRINTBUF; 1058 1059 u32 inode_snapshot = inode->bi_snapshot; 1060 struct btree_iter dirent_iter = {}; 1061 struct bkey_s_c_dirent d = inode_get_dirent(trans, &dirent_iter, inode, &inode_snapshot); 1062 int ret = bkey_err(d); 1063 if (ret && !bch2_err_matches(ret, ENOENT)) 1064 return ret; 1065 1066 if (fsck_err_on(ret, 1067 trans, inode_points_to_missing_dirent, 1068 "inode points to missing dirent\n%s", 1069 (bch2_inode_unpacked_to_text(&buf, inode), buf.buf)) || 1070 fsck_err_on(!ret && dirent_points_to_inode_nowarn(d, inode), 1071 trans, inode_points_to_wrong_dirent, 1072 "%s", 1073 (printbuf_reset(&buf), 1074 dirent_inode_mismatch_msg(&buf, c, d, inode), 1075 buf.buf))) { 1076 /* 1077 * We just clear the backpointer fields for now. If we find a 1078 * dirent that points to this inode in check_dirents(), we'll 1079 * update it then; then when we get to check_path() if the 1080 * backpointer is still 0 we'll reattach it. 1081 */ 1082 inode->bi_dir = 0; 1083 inode->bi_dir_offset = 0; 1084 *write_inode = true; 1085 } 1086 1087 ret = 0; 1088 fsck_err: 1089 bch2_trans_iter_exit(trans, &dirent_iter); 1090 printbuf_exit(&buf); 1091 bch_err_fn(c, ret); 1092 return ret; 1093 } 1094 1095 static int get_snapshot_root_inode(struct btree_trans *trans, 1096 struct bch_inode_unpacked *root, 1097 u64 inum) 1098 { 1099 struct btree_iter iter; 1100 struct bkey_s_c k; 1101 int ret = 0; 1102 1103 for_each_btree_key_reverse_norestart(trans, iter, BTREE_ID_inodes, 1104 SPOS(0, inum, U32_MAX), 1105 BTREE_ITER_all_snapshots, k, ret) { 1106 if (k.k->p.offset != inum) 1107 break; 1108 if (bkey_is_inode(k.k)) 1109 goto found_root; 1110 } 1111 if (ret) 1112 goto err; 1113 BUG(); 1114 found_root: 1115 ret = bch2_inode_unpack(k, root); 1116 err: 1117 bch2_trans_iter_exit(trans, &iter); 1118 return ret; 1119 } 1120 1121 static int check_inode(struct btree_trans *trans, 1122 struct btree_iter *iter, 1123 struct bkey_s_c k, 1124 struct bch_inode_unpacked *snapshot_root, 1125 struct snapshots_seen *s) 1126 { 1127 struct bch_fs *c = trans->c; 1128 struct printbuf buf = PRINTBUF; 1129 struct bch_inode_unpacked u; 1130 bool do_update = false; 1131 int ret; 1132 1133 ret = bch2_check_key_has_snapshot(trans, iter, k); 1134 if (ret < 0) 1135 goto err; 1136 if (ret) 1137 return 0; 1138 1139 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1140 if (ret) 1141 goto err; 1142 1143 if (!bkey_is_inode(k.k)) 1144 return 0; 1145 1146 ret = bch2_inode_unpack(k, &u); 1147 if (ret) 1148 goto err; 1149 1150 if (snapshot_root->bi_inum != u.bi_inum) { 1151 ret = get_snapshot_root_inode(trans, snapshot_root, u.bi_inum); 1152 if (ret) 1153 goto err; 1154 } 1155 1156 if (fsck_err_on(u.bi_hash_seed != snapshot_root->bi_hash_seed || 1157 INODE_STR_HASH(&u) != INODE_STR_HASH(snapshot_root), 1158 trans, inode_snapshot_mismatch, 1159 "inode hash info in different snapshots don't match")) { 1160 u.bi_hash_seed = snapshot_root->bi_hash_seed; 1161 SET_INODE_STR_HASH(&u, INODE_STR_HASH(snapshot_root)); 1162 do_update = true; 1163 } 1164 1165 if (u.bi_dir || u.bi_dir_offset) { 1166 ret = check_inode_dirent_inode(trans, &u, &do_update); 1167 if (ret) 1168 goto err; 1169 } 1170 1171 if (fsck_err_on(u.bi_dir && (u.bi_flags & BCH_INODE_unlinked), 1172 trans, inode_unlinked_but_has_dirent, 1173 "inode unlinked but has dirent\n%s", 1174 (printbuf_reset(&buf), 1175 bch2_inode_unpacked_to_text(&buf, &u), 1176 buf.buf))) { 1177 u.bi_flags &= ~BCH_INODE_unlinked; 1178 do_update = true; 1179 } 1180 1181 if (S_ISDIR(u.bi_mode) && (u.bi_flags & BCH_INODE_unlinked)) { 1182 /* Check for this early so that check_unreachable_inode() will reattach it */ 1183 1184 ret = bch2_empty_dir_snapshot(trans, k.k->p.offset, 0, k.k->p.snapshot); 1185 if (ret && ret != -BCH_ERR_ENOTEMPTY_dir_not_empty) 1186 goto err; 1187 1188 fsck_err_on(ret, trans, inode_dir_unlinked_but_not_empty, 1189 "dir unlinked but not empty\n%s", 1190 (printbuf_reset(&buf), 1191 bch2_inode_unpacked_to_text(&buf, &u), 1192 buf.buf)); 1193 u.bi_flags &= ~BCH_INODE_unlinked; 1194 do_update = true; 1195 ret = 0; 1196 } 1197 1198 ret = bch2_inode_has_child_snapshots(trans, k.k->p); 1199 if (ret < 0) 1200 goto err; 1201 1202 if (fsck_err_on(ret != !!(u.bi_flags & BCH_INODE_has_child_snapshot), 1203 trans, inode_has_child_snapshots_wrong, 1204 "inode has_child_snapshots flag wrong (should be %u)\n%s", 1205 ret, 1206 (printbuf_reset(&buf), 1207 bch2_inode_unpacked_to_text(&buf, &u), 1208 buf.buf))) { 1209 if (ret) 1210 u.bi_flags |= BCH_INODE_has_child_snapshot; 1211 else 1212 u.bi_flags &= ~BCH_INODE_has_child_snapshot; 1213 do_update = true; 1214 } 1215 ret = 0; 1216 1217 if ((u.bi_flags & BCH_INODE_unlinked) && 1218 !(u.bi_flags & BCH_INODE_has_child_snapshot)) { 1219 if (!test_bit(BCH_FS_started, &c->flags)) { 1220 /* 1221 * If we're not in online fsck, don't delete unlinked 1222 * inodes, just make sure they're on the deleted list. 1223 * 1224 * They might be referred to by a logged operation - 1225 * i.e. we might have crashed in the middle of a 1226 * truncate on an unlinked but open file - so we want to 1227 * let the delete_dead_inodes kill it after resuming 1228 * logged ops. 1229 */ 1230 ret = check_inode_deleted_list(trans, k.k->p); 1231 if (ret < 0) 1232 goto err_noprint; 1233 1234 fsck_err_on(!ret, 1235 trans, unlinked_inode_not_on_deleted_list, 1236 "inode %llu:%u unlinked, but not on deleted list", 1237 u.bi_inum, k.k->p.snapshot); 1238 1239 ret = bch2_btree_bit_mod_buffered(trans, BTREE_ID_deleted_inodes, k.k->p, 1); 1240 if (ret) 1241 goto err; 1242 } else { 1243 ret = bch2_inode_or_descendents_is_open(trans, k.k->p); 1244 if (ret < 0) 1245 goto err; 1246 1247 if (fsck_err_on(!ret, 1248 trans, inode_unlinked_and_not_open, 1249 "inode %llu:%u unlinked and not open", 1250 u.bi_inum, u.bi_snapshot)) { 1251 ret = bch2_inode_rm_snapshot(trans, u.bi_inum, iter->pos.snapshot); 1252 bch_err_msg(c, ret, "in fsck deleting inode"); 1253 goto err_noprint; 1254 } 1255 ret = 0; 1256 } 1257 } 1258 1259 if (fsck_err_on(u.bi_parent_subvol && 1260 (u.bi_subvol == 0 || 1261 u.bi_subvol == BCACHEFS_ROOT_SUBVOL), 1262 trans, inode_bi_parent_nonzero, 1263 "inode %llu:%u has subvol %u but nonzero parent subvol %u", 1264 u.bi_inum, k.k->p.snapshot, u.bi_subvol, u.bi_parent_subvol)) { 1265 u.bi_parent_subvol = 0; 1266 do_update = true; 1267 } 1268 1269 if (u.bi_subvol) { 1270 struct bch_subvolume s; 1271 1272 ret = bch2_subvolume_get(trans, u.bi_subvol, false, &s); 1273 if (ret && !bch2_err_matches(ret, ENOENT)) 1274 goto err; 1275 1276 if (ret && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 1277 ret = reconstruct_subvol(trans, k.k->p.snapshot, u.bi_subvol, u.bi_inum); 1278 goto do_update; 1279 } 1280 1281 if (fsck_err_on(ret, 1282 trans, inode_bi_subvol_missing, 1283 "inode %llu:%u bi_subvol points to missing subvolume %u", 1284 u.bi_inum, k.k->p.snapshot, u.bi_subvol) || 1285 fsck_err_on(le64_to_cpu(s.inode) != u.bi_inum || 1286 !bch2_snapshot_is_ancestor(c, le32_to_cpu(s.snapshot), 1287 k.k->p.snapshot), 1288 trans, inode_bi_subvol_wrong, 1289 "inode %llu:%u points to subvol %u, but subvol points to %llu:%u", 1290 u.bi_inum, k.k->p.snapshot, u.bi_subvol, 1291 le64_to_cpu(s.inode), 1292 le32_to_cpu(s.snapshot))) { 1293 u.bi_subvol = 0; 1294 u.bi_parent_subvol = 0; 1295 do_update = true; 1296 } 1297 } 1298 1299 if (fsck_err_on(u.bi_journal_seq > journal_cur_seq(&c->journal), 1300 trans, inode_journal_seq_in_future, 1301 "inode journal seq in future (currently at %llu)\n%s", 1302 journal_cur_seq(&c->journal), 1303 (printbuf_reset(&buf), 1304 bch2_inode_unpacked_to_text(&buf, &u), 1305 buf.buf))) { 1306 u.bi_journal_seq = journal_cur_seq(&c->journal); 1307 do_update = true; 1308 } 1309 do_update: 1310 if (do_update) { 1311 ret = __bch2_fsck_write_inode(trans, &u); 1312 bch_err_msg(c, ret, "in fsck updating inode"); 1313 if (ret) 1314 goto err_noprint; 1315 } 1316 err: 1317 fsck_err: 1318 bch_err_fn(c, ret); 1319 err_noprint: 1320 printbuf_exit(&buf); 1321 return ret; 1322 } 1323 1324 int bch2_check_inodes(struct bch_fs *c) 1325 { 1326 struct bch_inode_unpacked snapshot_root = {}; 1327 struct snapshots_seen s; 1328 1329 snapshots_seen_init(&s); 1330 1331 int ret = bch2_trans_run(c, 1332 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 1333 POS_MIN, 1334 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1335 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1336 check_inode(trans, &iter, k, &snapshot_root, &s))); 1337 1338 snapshots_seen_exit(&s); 1339 bch_err_fn(c, ret); 1340 return ret; 1341 } 1342 1343 static int find_oldest_inode_needs_reattach(struct btree_trans *trans, 1344 struct bch_inode_unpacked *inode) 1345 { 1346 struct bch_fs *c = trans->c; 1347 struct btree_iter iter; 1348 struct bkey_s_c k; 1349 int ret = 0; 1350 1351 /* 1352 * We look for inodes to reattach in natural key order, leaves first, 1353 * but we should do the reattach at the oldest version that needs to be 1354 * reattached: 1355 */ 1356 for_each_btree_key_norestart(trans, iter, 1357 BTREE_ID_inodes, 1358 SPOS(0, inode->bi_inum, inode->bi_snapshot + 1), 1359 BTREE_ITER_all_snapshots, k, ret) { 1360 if (k.k->p.offset != inode->bi_inum) 1361 break; 1362 1363 if (!bch2_snapshot_is_ancestor(c, inode->bi_snapshot, k.k->p.snapshot)) 1364 continue; 1365 1366 if (!bkey_is_inode(k.k)) 1367 break; 1368 1369 struct bch_inode_unpacked parent_inode; 1370 ret = bch2_inode_unpack(k, &parent_inode); 1371 if (ret) 1372 break; 1373 1374 if (!inode_should_reattach(&parent_inode)) 1375 break; 1376 1377 *inode = parent_inode; 1378 } 1379 bch2_trans_iter_exit(trans, &iter); 1380 1381 return ret; 1382 } 1383 1384 static int check_unreachable_inode(struct btree_trans *trans, 1385 struct btree_iter *iter, 1386 struct bkey_s_c k) 1387 { 1388 struct printbuf buf = PRINTBUF; 1389 int ret = 0; 1390 1391 if (!bkey_is_inode(k.k)) 1392 return 0; 1393 1394 struct bch_inode_unpacked inode; 1395 ret = bch2_inode_unpack(k, &inode); 1396 if (ret) 1397 return ret; 1398 1399 if (!inode_should_reattach(&inode)) 1400 return 0; 1401 1402 ret = find_oldest_inode_needs_reattach(trans, &inode); 1403 if (ret) 1404 return ret; 1405 1406 if (fsck_err(trans, inode_unreachable, 1407 "unreachable inode:\n%s", 1408 (bch2_inode_unpacked_to_text(&buf, &inode), 1409 buf.buf))) 1410 ret = reattach_inode(trans, &inode); 1411 fsck_err: 1412 printbuf_exit(&buf); 1413 return ret; 1414 } 1415 1416 /* 1417 * Reattach unreachable (but not unlinked) inodes 1418 * 1419 * Run after check_inodes() and check_dirents(), so we node that inode 1420 * backpointer fields point to valid dirents, and every inode that has a dirent 1421 * that points to it has its backpointer field set - so we're just looking for 1422 * non-unlinked inodes without backpointers: 1423 * 1424 * XXX: this is racy w.r.t. hardlink removal in online fsck 1425 */ 1426 int bch2_check_unreachable_inodes(struct bch_fs *c) 1427 { 1428 int ret = bch2_trans_run(c, 1429 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 1430 POS_MIN, 1431 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 1432 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 1433 check_unreachable_inode(trans, &iter, k))); 1434 bch_err_fn(c, ret); 1435 return ret; 1436 } 1437 1438 static inline bool btree_matches_i_mode(enum btree_id btree, unsigned mode) 1439 { 1440 switch (btree) { 1441 case BTREE_ID_extents: 1442 return S_ISREG(mode) || S_ISLNK(mode); 1443 case BTREE_ID_dirents: 1444 return S_ISDIR(mode); 1445 case BTREE_ID_xattrs: 1446 return true; 1447 default: 1448 BUG(); 1449 } 1450 } 1451 1452 static int check_key_has_inode(struct btree_trans *trans, 1453 struct btree_iter *iter, 1454 struct inode_walker *inode, 1455 struct inode_walker_entry *i, 1456 struct bkey_s_c k) 1457 { 1458 struct bch_fs *c = trans->c; 1459 struct printbuf buf = PRINTBUF; 1460 int ret = PTR_ERR_OR_ZERO(i); 1461 if (ret) 1462 return ret; 1463 1464 if (k.k->type == KEY_TYPE_whiteout) 1465 goto out; 1466 1467 if (!i && (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_inodes))) { 1468 ret = reconstruct_inode(trans, iter->btree_id, k.k->p.snapshot, k.k->p.inode) ?: 1469 bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 1470 if (ret) 1471 goto err; 1472 1473 inode->last_pos.inode--; 1474 ret = -BCH_ERR_transaction_restart_nested; 1475 goto err; 1476 } 1477 1478 if (fsck_err_on(!i, 1479 trans, key_in_missing_inode, 1480 "key in missing inode:\n %s", 1481 (printbuf_reset(&buf), 1482 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1483 goto delete; 1484 1485 if (fsck_err_on(i && !btree_matches_i_mode(iter->btree_id, i->inode.bi_mode), 1486 trans, key_in_wrong_inode_type, 1487 "key for wrong inode mode %o:\n %s", 1488 i->inode.bi_mode, 1489 (printbuf_reset(&buf), 1490 bch2_bkey_val_to_text(&buf, c, k), buf.buf))) 1491 goto delete; 1492 out: 1493 err: 1494 fsck_err: 1495 printbuf_exit(&buf); 1496 bch_err_fn(c, ret); 1497 return ret; 1498 delete: 1499 ret = bch2_btree_delete_at(trans, iter, BTREE_UPDATE_internal_snapshot_node); 1500 goto out; 1501 } 1502 1503 static int check_i_sectors_notnested(struct btree_trans *trans, struct inode_walker *w) 1504 { 1505 struct bch_fs *c = trans->c; 1506 int ret = 0; 1507 s64 count2; 1508 1509 darray_for_each(w->inodes, i) { 1510 if (i->inode.bi_sectors == i->count) 1511 continue; 1512 1513 count2 = bch2_count_inode_sectors(trans, w->last_pos.inode, i->snapshot); 1514 1515 if (w->recalculate_sums) 1516 i->count = count2; 1517 1518 if (i->count != count2) { 1519 bch_err_ratelimited(c, "fsck counted i_sectors wrong for inode %llu:%u: got %llu should be %llu", 1520 w->last_pos.inode, i->snapshot, i->count, count2); 1521 i->count = count2; 1522 } 1523 1524 if (fsck_err_on(!(i->inode.bi_flags & BCH_INODE_i_sectors_dirty), 1525 trans, inode_i_sectors_wrong, 1526 "inode %llu:%u has incorrect i_sectors: got %llu, should be %llu", 1527 w->last_pos.inode, i->snapshot, 1528 i->inode.bi_sectors, i->count)) { 1529 i->inode.bi_sectors = i->count; 1530 ret = bch2_fsck_write_inode(trans, &i->inode); 1531 if (ret) 1532 break; 1533 } 1534 } 1535 fsck_err: 1536 bch_err_fn(c, ret); 1537 return ret; 1538 } 1539 1540 static int check_i_sectors(struct btree_trans *trans, struct inode_walker *w) 1541 { 1542 u32 restart_count = trans->restart_count; 1543 return check_i_sectors_notnested(trans, w) ?: 1544 trans_was_restarted(trans, restart_count); 1545 } 1546 1547 struct extent_end { 1548 u32 snapshot; 1549 u64 offset; 1550 struct snapshots_seen seen; 1551 }; 1552 1553 struct extent_ends { 1554 struct bpos last_pos; 1555 DARRAY(struct extent_end) e; 1556 }; 1557 1558 static void extent_ends_reset(struct extent_ends *extent_ends) 1559 { 1560 darray_for_each(extent_ends->e, i) 1561 snapshots_seen_exit(&i->seen); 1562 extent_ends->e.nr = 0; 1563 } 1564 1565 static void extent_ends_exit(struct extent_ends *extent_ends) 1566 { 1567 extent_ends_reset(extent_ends); 1568 darray_exit(&extent_ends->e); 1569 } 1570 1571 static void extent_ends_init(struct extent_ends *extent_ends) 1572 { 1573 memset(extent_ends, 0, sizeof(*extent_ends)); 1574 } 1575 1576 static int extent_ends_at(struct bch_fs *c, 1577 struct extent_ends *extent_ends, 1578 struct snapshots_seen *seen, 1579 struct bkey_s_c k) 1580 { 1581 struct extent_end *i, n = (struct extent_end) { 1582 .offset = k.k->p.offset, 1583 .snapshot = k.k->p.snapshot, 1584 .seen = *seen, 1585 }; 1586 1587 n.seen.ids.data = kmemdup(seen->ids.data, 1588 sizeof(seen->ids.data[0]) * seen->ids.size, 1589 GFP_KERNEL); 1590 if (!n.seen.ids.data) 1591 return -BCH_ERR_ENOMEM_fsck_extent_ends_at; 1592 1593 __darray_for_each(extent_ends->e, i) { 1594 if (i->snapshot == k.k->p.snapshot) { 1595 snapshots_seen_exit(&i->seen); 1596 *i = n; 1597 return 0; 1598 } 1599 1600 if (i->snapshot >= k.k->p.snapshot) 1601 break; 1602 } 1603 1604 return darray_insert_item(&extent_ends->e, i - extent_ends->e.data, n); 1605 } 1606 1607 static int overlapping_extents_found(struct btree_trans *trans, 1608 enum btree_id btree, 1609 struct bpos pos1, struct snapshots_seen *pos1_seen, 1610 struct bkey pos2, 1611 bool *fixed, 1612 struct extent_end *extent_end) 1613 { 1614 struct bch_fs *c = trans->c; 1615 struct printbuf buf = PRINTBUF; 1616 struct btree_iter iter1, iter2 = { NULL }; 1617 struct bkey_s_c k1, k2; 1618 int ret; 1619 1620 BUG_ON(bkey_le(pos1, bkey_start_pos(&pos2))); 1621 1622 bch2_trans_iter_init(trans, &iter1, btree, pos1, 1623 BTREE_ITER_all_snapshots| 1624 BTREE_ITER_not_extents); 1625 k1 = bch2_btree_iter_peek_max(&iter1, POS(pos1.inode, U64_MAX)); 1626 ret = bkey_err(k1); 1627 if (ret) 1628 goto err; 1629 1630 prt_str(&buf, "\n "); 1631 bch2_bkey_val_to_text(&buf, c, k1); 1632 1633 if (!bpos_eq(pos1, k1.k->p)) { 1634 prt_str(&buf, "\n wanted\n "); 1635 bch2_bpos_to_text(&buf, pos1); 1636 prt_str(&buf, "\n "); 1637 bch2_bkey_to_text(&buf, &pos2); 1638 1639 bch_err(c, "%s: error finding first overlapping extent when repairing, got%s", 1640 __func__, buf.buf); 1641 ret = -BCH_ERR_internal_fsck_err; 1642 goto err; 1643 } 1644 1645 bch2_trans_copy_iter(&iter2, &iter1); 1646 1647 while (1) { 1648 bch2_btree_iter_advance(&iter2); 1649 1650 k2 = bch2_btree_iter_peek_max(&iter2, POS(pos1.inode, U64_MAX)); 1651 ret = bkey_err(k2); 1652 if (ret) 1653 goto err; 1654 1655 if (bpos_ge(k2.k->p, pos2.p)) 1656 break; 1657 } 1658 1659 prt_str(&buf, "\n "); 1660 bch2_bkey_val_to_text(&buf, c, k2); 1661 1662 if (bpos_gt(k2.k->p, pos2.p) || 1663 pos2.size != k2.k->size) { 1664 bch_err(c, "%s: error finding seconding overlapping extent when repairing%s", 1665 __func__, buf.buf); 1666 ret = -BCH_ERR_internal_fsck_err; 1667 goto err; 1668 } 1669 1670 prt_printf(&buf, "\n overwriting %s extent", 1671 pos1.snapshot >= pos2.p.snapshot ? "first" : "second"); 1672 1673 if (fsck_err(trans, extent_overlapping, 1674 "overlapping extents%s", buf.buf)) { 1675 struct btree_iter *old_iter = &iter1; 1676 struct disk_reservation res = { 0 }; 1677 1678 if (pos1.snapshot < pos2.p.snapshot) { 1679 old_iter = &iter2; 1680 swap(k1, k2); 1681 } 1682 1683 trans->extra_disk_res += bch2_bkey_sectors_compressed(k2); 1684 1685 ret = bch2_trans_update_extent_overwrite(trans, old_iter, 1686 BTREE_UPDATE_internal_snapshot_node, 1687 k1, k2) ?: 1688 bch2_trans_commit(trans, &res, NULL, BCH_TRANS_COMMIT_no_enospc); 1689 bch2_disk_reservation_put(c, &res); 1690 1691 if (ret) 1692 goto err; 1693 1694 *fixed = true; 1695 1696 if (pos1.snapshot == pos2.p.snapshot) { 1697 /* 1698 * We overwrote the first extent, and did the overwrite 1699 * in the same snapshot: 1700 */ 1701 extent_end->offset = bkey_start_offset(&pos2); 1702 } else if (pos1.snapshot > pos2.p.snapshot) { 1703 /* 1704 * We overwrote the first extent in pos2's snapshot: 1705 */ 1706 ret = snapshots_seen_add_inorder(c, pos1_seen, pos2.p.snapshot); 1707 } else { 1708 /* 1709 * We overwrote the second extent - restart 1710 * check_extent() from the top: 1711 */ 1712 ret = -BCH_ERR_transaction_restart_nested; 1713 } 1714 } 1715 fsck_err: 1716 err: 1717 bch2_trans_iter_exit(trans, &iter2); 1718 bch2_trans_iter_exit(trans, &iter1); 1719 printbuf_exit(&buf); 1720 return ret; 1721 } 1722 1723 static int check_overlapping_extents(struct btree_trans *trans, 1724 struct snapshots_seen *seen, 1725 struct extent_ends *extent_ends, 1726 struct bkey_s_c k, 1727 struct btree_iter *iter, 1728 bool *fixed) 1729 { 1730 struct bch_fs *c = trans->c; 1731 int ret = 0; 1732 1733 /* transaction restart, running again */ 1734 if (bpos_eq(extent_ends->last_pos, k.k->p)) 1735 return 0; 1736 1737 if (extent_ends->last_pos.inode != k.k->p.inode) 1738 extent_ends_reset(extent_ends); 1739 1740 darray_for_each(extent_ends->e, i) { 1741 if (i->offset <= bkey_start_offset(k.k)) 1742 continue; 1743 1744 if (!ref_visible2(c, 1745 k.k->p.snapshot, seen, 1746 i->snapshot, &i->seen)) 1747 continue; 1748 1749 ret = overlapping_extents_found(trans, iter->btree_id, 1750 SPOS(iter->pos.inode, 1751 i->offset, 1752 i->snapshot), 1753 &i->seen, 1754 *k.k, fixed, i); 1755 if (ret) 1756 goto err; 1757 } 1758 1759 extent_ends->last_pos = k.k->p; 1760 err: 1761 return ret; 1762 } 1763 1764 static int check_extent_overbig(struct btree_trans *trans, struct btree_iter *iter, 1765 struct bkey_s_c k) 1766 { 1767 struct bch_fs *c = trans->c; 1768 struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); 1769 struct bch_extent_crc_unpacked crc; 1770 const union bch_extent_entry *i; 1771 unsigned encoded_extent_max_sectors = c->opts.encoded_extent_max >> 9; 1772 1773 bkey_for_each_crc(k.k, ptrs, crc, i) 1774 if (crc_is_encoded(crc) && 1775 crc.uncompressed_size > encoded_extent_max_sectors) { 1776 struct printbuf buf = PRINTBUF; 1777 1778 bch2_bkey_val_to_text(&buf, c, k); 1779 bch_err(c, "overbig encoded extent, please report this:\n %s", buf.buf); 1780 printbuf_exit(&buf); 1781 } 1782 1783 return 0; 1784 } 1785 1786 static int check_extent(struct btree_trans *trans, struct btree_iter *iter, 1787 struct bkey_s_c k, 1788 struct inode_walker *inode, 1789 struct snapshots_seen *s, 1790 struct extent_ends *extent_ends, 1791 struct disk_reservation *res) 1792 { 1793 struct bch_fs *c = trans->c; 1794 struct printbuf buf = PRINTBUF; 1795 int ret = 0; 1796 1797 ret = bch2_check_key_has_snapshot(trans, iter, k); 1798 if (ret) { 1799 ret = ret < 0 ? ret : 0; 1800 goto out; 1801 } 1802 1803 if (inode->last_pos.inode != k.k->p.inode && inode->have_inodes) { 1804 ret = check_i_sectors(trans, inode); 1805 if (ret) 1806 goto err; 1807 } 1808 1809 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 1810 if (ret) 1811 goto err; 1812 1813 struct inode_walker_entry *extent_i = walk_inode(trans, inode, k); 1814 ret = PTR_ERR_OR_ZERO(extent_i); 1815 if (ret) 1816 goto err; 1817 1818 ret = check_key_has_inode(trans, iter, inode, extent_i, k); 1819 if (ret) 1820 goto err; 1821 1822 if (k.k->type != KEY_TYPE_whiteout) { 1823 ret = check_overlapping_extents(trans, s, extent_ends, k, iter, 1824 &inode->recalculate_sums); 1825 if (ret) 1826 goto err; 1827 1828 /* 1829 * Check inodes in reverse order, from oldest snapshots to 1830 * newest, starting from the inode that matches this extent's 1831 * snapshot. If we didn't have one, iterate over all inodes: 1832 */ 1833 for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); 1834 inode->inodes.data && i >= inode->inodes.data; 1835 --i) { 1836 if (i->snapshot > k.k->p.snapshot || 1837 !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) 1838 continue; 1839 1840 if (fsck_err_on(k.k->p.offset > round_up(i->inode.bi_size, block_bytes(c)) >> 9 && 1841 !bkey_extent_is_reservation(k), 1842 trans, extent_past_end_of_inode, 1843 "extent type past end of inode %llu:%u, i_size %llu\n %s", 1844 i->inode.bi_inum, i->snapshot, i->inode.bi_size, 1845 (bch2_bkey_val_to_text(&buf, c, k), buf.buf))) { 1846 struct btree_iter iter2; 1847 1848 bch2_trans_copy_iter(&iter2, iter); 1849 bch2_btree_iter_set_snapshot(&iter2, i->snapshot); 1850 ret = bch2_btree_iter_traverse(&iter2) ?: 1851 bch2_btree_delete_at(trans, &iter2, 1852 BTREE_UPDATE_internal_snapshot_node); 1853 bch2_trans_iter_exit(trans, &iter2); 1854 if (ret) 1855 goto err; 1856 1857 iter->k.type = KEY_TYPE_whiteout; 1858 break; 1859 } 1860 } 1861 } 1862 1863 ret = bch2_trans_commit(trans, res, NULL, BCH_TRANS_COMMIT_no_enospc); 1864 if (ret) 1865 goto err; 1866 1867 if (bkey_extent_is_allocation(k.k)) { 1868 for (struct inode_walker_entry *i = extent_i ?: &darray_last(inode->inodes); 1869 inode->inodes.data && i >= inode->inodes.data; 1870 --i) { 1871 if (i->snapshot > k.k->p.snapshot || 1872 !key_visible_in_snapshot(c, s, i->snapshot, k.k->p.snapshot)) 1873 continue; 1874 1875 i->count += k.k->size; 1876 } 1877 } 1878 1879 if (k.k->type != KEY_TYPE_whiteout) { 1880 ret = extent_ends_at(c, extent_ends, s, k); 1881 if (ret) 1882 goto err; 1883 } 1884 out: 1885 err: 1886 fsck_err: 1887 printbuf_exit(&buf); 1888 bch_err_fn(c, ret); 1889 return ret; 1890 } 1891 1892 /* 1893 * Walk extents: verify that extents have a corresponding S_ISREG inode, and 1894 * that i_size an i_sectors are consistent 1895 */ 1896 int bch2_check_extents(struct bch_fs *c) 1897 { 1898 struct inode_walker w = inode_walker_init(); 1899 struct snapshots_seen s; 1900 struct extent_ends extent_ends; 1901 struct disk_reservation res = { 0 }; 1902 1903 snapshots_seen_init(&s); 1904 extent_ends_init(&extent_ends); 1905 1906 int ret = bch2_trans_run(c, 1907 for_each_btree_key(trans, iter, BTREE_ID_extents, 1908 POS(BCACHEFS_ROOT_INO, 0), 1909 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, ({ 1910 bch2_disk_reservation_put(c, &res); 1911 check_extent(trans, &iter, k, &w, &s, &extent_ends, &res) ?: 1912 check_extent_overbig(trans, &iter, k); 1913 })) ?: 1914 check_i_sectors_notnested(trans, &w)); 1915 1916 bch2_disk_reservation_put(c, &res); 1917 extent_ends_exit(&extent_ends); 1918 inode_walker_exit(&w); 1919 snapshots_seen_exit(&s); 1920 1921 bch_err_fn(c, ret); 1922 return ret; 1923 } 1924 1925 int bch2_check_indirect_extents(struct bch_fs *c) 1926 { 1927 struct disk_reservation res = { 0 }; 1928 1929 int ret = bch2_trans_run(c, 1930 for_each_btree_key_commit(trans, iter, BTREE_ID_reflink, 1931 POS_MIN, 1932 BTREE_ITER_prefetch, k, 1933 &res, NULL, 1934 BCH_TRANS_COMMIT_no_enospc, ({ 1935 bch2_disk_reservation_put(c, &res); 1936 check_extent_overbig(trans, &iter, k); 1937 }))); 1938 1939 bch2_disk_reservation_put(c, &res); 1940 bch_err_fn(c, ret); 1941 return ret; 1942 } 1943 1944 static int check_subdir_count_notnested(struct btree_trans *trans, struct inode_walker *w) 1945 { 1946 struct bch_fs *c = trans->c; 1947 int ret = 0; 1948 s64 count2; 1949 1950 darray_for_each(w->inodes, i) { 1951 if (i->inode.bi_nlink == i->count) 1952 continue; 1953 1954 count2 = bch2_count_subdirs(trans, w->last_pos.inode, i->snapshot); 1955 if (count2 < 0) 1956 return count2; 1957 1958 if (i->count != count2) { 1959 bch_err_ratelimited(c, "fsck counted subdirectories wrong for inum %llu:%u: got %llu should be %llu", 1960 w->last_pos.inode, i->snapshot, i->count, count2); 1961 i->count = count2; 1962 if (i->inode.bi_nlink == i->count) 1963 continue; 1964 } 1965 1966 if (fsck_err_on(i->inode.bi_nlink != i->count, 1967 trans, inode_dir_wrong_nlink, 1968 "directory %llu:%u with wrong i_nlink: got %u, should be %llu", 1969 w->last_pos.inode, i->snapshot, i->inode.bi_nlink, i->count)) { 1970 i->inode.bi_nlink = i->count; 1971 ret = bch2_fsck_write_inode(trans, &i->inode); 1972 if (ret) 1973 break; 1974 } 1975 } 1976 fsck_err: 1977 bch_err_fn(c, ret); 1978 return ret; 1979 } 1980 1981 static int check_subdir_dirents_count(struct btree_trans *trans, struct inode_walker *w) 1982 { 1983 u32 restart_count = trans->restart_count; 1984 return check_subdir_count_notnested(trans, w) ?: 1985 trans_was_restarted(trans, restart_count); 1986 } 1987 1988 noinline_for_stack 1989 static int check_dirent_inode_dirent(struct btree_trans *trans, 1990 struct btree_iter *iter, 1991 struct bkey_s_c_dirent d, 1992 struct bch_inode_unpacked *target) 1993 { 1994 struct bch_fs *c = trans->c; 1995 struct printbuf buf = PRINTBUF; 1996 struct btree_iter bp_iter = { NULL }; 1997 int ret = 0; 1998 1999 if (inode_points_to_dirent(target, d)) 2000 return 0; 2001 2002 if (!target->bi_dir && 2003 !target->bi_dir_offset) { 2004 fsck_err_on(S_ISDIR(target->bi_mode), 2005 trans, inode_dir_missing_backpointer, 2006 "directory with missing backpointer\n%s", 2007 (printbuf_reset(&buf), 2008 bch2_bkey_val_to_text(&buf, c, d.s_c), 2009 prt_printf(&buf, "\n"), 2010 bch2_inode_unpacked_to_text(&buf, target), 2011 buf.buf)); 2012 2013 fsck_err_on(target->bi_flags & BCH_INODE_unlinked, 2014 trans, inode_unlinked_but_has_dirent, 2015 "inode unlinked but has dirent\n%s", 2016 (printbuf_reset(&buf), 2017 bch2_bkey_val_to_text(&buf, c, d.s_c), 2018 prt_printf(&buf, "\n"), 2019 bch2_inode_unpacked_to_text(&buf, target), 2020 buf.buf)); 2021 2022 target->bi_flags &= ~BCH_INODE_unlinked; 2023 target->bi_dir = d.k->p.inode; 2024 target->bi_dir_offset = d.k->p.offset; 2025 return __bch2_fsck_write_inode(trans, target); 2026 } 2027 2028 if (bch2_inode_should_have_single_bp(target) && 2029 !fsck_err(trans, inode_wrong_backpointer, 2030 "dirent points to inode that does not point back:\n %s", 2031 (bch2_bkey_val_to_text(&buf, c, d.s_c), 2032 prt_printf(&buf, "\n "), 2033 bch2_inode_unpacked_to_text(&buf, target), 2034 buf.buf))) 2035 goto err; 2036 2037 struct bkey_s_c_dirent bp_dirent = dirent_get_by_pos(trans, &bp_iter, 2038 SPOS(target->bi_dir, target->bi_dir_offset, target->bi_snapshot)); 2039 ret = bkey_err(bp_dirent); 2040 if (ret && !bch2_err_matches(ret, ENOENT)) 2041 goto err; 2042 2043 bool backpointer_exists = !ret; 2044 ret = 0; 2045 2046 if (fsck_err_on(!backpointer_exists, 2047 trans, inode_wrong_backpointer, 2048 "inode %llu:%u has wrong backpointer:\n" 2049 "got %llu:%llu\n" 2050 "should be %llu:%llu", 2051 target->bi_inum, target->bi_snapshot, 2052 target->bi_dir, 2053 target->bi_dir_offset, 2054 d.k->p.inode, 2055 d.k->p.offset)) { 2056 target->bi_dir = d.k->p.inode; 2057 target->bi_dir_offset = d.k->p.offset; 2058 ret = __bch2_fsck_write_inode(trans, target); 2059 goto out; 2060 } 2061 2062 bch2_bkey_val_to_text(&buf, c, d.s_c); 2063 prt_newline(&buf); 2064 if (backpointer_exists) 2065 bch2_bkey_val_to_text(&buf, c, bp_dirent.s_c); 2066 2067 if (fsck_err_on(backpointer_exists && 2068 (S_ISDIR(target->bi_mode) || 2069 target->bi_subvol), 2070 trans, inode_dir_multiple_links, 2071 "%s %llu:%u with multiple links\n%s", 2072 S_ISDIR(target->bi_mode) ? "directory" : "subvolume", 2073 target->bi_inum, target->bi_snapshot, buf.buf)) { 2074 ret = __remove_dirent(trans, d.k->p); 2075 goto out; 2076 } 2077 2078 /* 2079 * hardlinked file with nlink 0: 2080 * We're just adjusting nlink here so check_nlinks() will pick 2081 * it up, it ignores inodes with nlink 0 2082 */ 2083 if (fsck_err_on(backpointer_exists && !target->bi_nlink, 2084 trans, inode_multiple_links_but_nlink_0, 2085 "inode %llu:%u type %s has multiple links but i_nlink 0\n%s", 2086 target->bi_inum, target->bi_snapshot, bch2_d_types[d.v->d_type], buf.buf)) { 2087 target->bi_nlink++; 2088 target->bi_flags &= ~BCH_INODE_unlinked; 2089 ret = __bch2_fsck_write_inode(trans, target); 2090 if (ret) 2091 goto err; 2092 } 2093 out: 2094 err: 2095 fsck_err: 2096 bch2_trans_iter_exit(trans, &bp_iter); 2097 printbuf_exit(&buf); 2098 bch_err_fn(c, ret); 2099 return ret; 2100 } 2101 2102 noinline_for_stack 2103 static int check_dirent_target(struct btree_trans *trans, 2104 struct btree_iter *iter, 2105 struct bkey_s_c_dirent d, 2106 struct bch_inode_unpacked *target) 2107 { 2108 struct bch_fs *c = trans->c; 2109 struct bkey_i_dirent *n; 2110 struct printbuf buf = PRINTBUF; 2111 int ret = 0; 2112 2113 ret = check_dirent_inode_dirent(trans, iter, d, target); 2114 if (ret) 2115 goto err; 2116 2117 if (fsck_err_on(d.v->d_type != inode_d_type(target), 2118 trans, dirent_d_type_wrong, 2119 "incorrect d_type: got %s, should be %s:\n%s", 2120 bch2_d_type_str(d.v->d_type), 2121 bch2_d_type_str(inode_d_type(target)), 2122 (printbuf_reset(&buf), 2123 bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 2124 n = bch2_trans_kmalloc(trans, bkey_bytes(d.k)); 2125 ret = PTR_ERR_OR_ZERO(n); 2126 if (ret) 2127 goto err; 2128 2129 bkey_reassemble(&n->k_i, d.s_c); 2130 n->v.d_type = inode_d_type(target); 2131 if (n->v.d_type == DT_SUBVOL) { 2132 n->v.d_parent_subvol = cpu_to_le32(target->bi_parent_subvol); 2133 n->v.d_child_subvol = cpu_to_le32(target->bi_subvol); 2134 } else { 2135 n->v.d_inum = cpu_to_le64(target->bi_inum); 2136 } 2137 2138 ret = bch2_trans_update(trans, iter, &n->k_i, 0); 2139 if (ret) 2140 goto err; 2141 2142 d = dirent_i_to_s_c(n); 2143 } 2144 err: 2145 fsck_err: 2146 printbuf_exit(&buf); 2147 bch_err_fn(c, ret); 2148 return ret; 2149 } 2150 2151 /* find a subvolume that's a descendent of @snapshot: */ 2152 static int find_snapshot_subvol(struct btree_trans *trans, u32 snapshot, u32 *subvolid) 2153 { 2154 struct btree_iter iter; 2155 struct bkey_s_c k; 2156 int ret; 2157 2158 for_each_btree_key_norestart(trans, iter, BTREE_ID_subvolumes, POS_MIN, 0, k, ret) { 2159 if (k.k->type != KEY_TYPE_subvolume) 2160 continue; 2161 2162 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 2163 if (bch2_snapshot_is_ancestor(trans->c, le32_to_cpu(s.v->snapshot), snapshot)) { 2164 bch2_trans_iter_exit(trans, &iter); 2165 *subvolid = k.k->p.offset; 2166 goto found; 2167 } 2168 } 2169 if (!ret) 2170 ret = -ENOENT; 2171 found: 2172 bch2_trans_iter_exit(trans, &iter); 2173 return ret; 2174 } 2175 2176 noinline_for_stack 2177 static int check_dirent_to_subvol(struct btree_trans *trans, struct btree_iter *iter, 2178 struct bkey_s_c_dirent d) 2179 { 2180 struct bch_fs *c = trans->c; 2181 struct btree_iter subvol_iter = {}; 2182 struct bch_inode_unpacked subvol_root; 2183 u32 parent_subvol = le32_to_cpu(d.v->d_parent_subvol); 2184 u32 target_subvol = le32_to_cpu(d.v->d_child_subvol); 2185 u32 parent_snapshot; 2186 u32 new_parent_subvol = 0; 2187 u64 parent_inum; 2188 struct printbuf buf = PRINTBUF; 2189 int ret = 0; 2190 2191 ret = subvol_lookup(trans, parent_subvol, &parent_snapshot, &parent_inum); 2192 if (ret && !bch2_err_matches(ret, ENOENT)) 2193 return ret; 2194 2195 if (ret || 2196 (!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot))) { 2197 int ret2 = find_snapshot_subvol(trans, d.k->p.snapshot, &new_parent_subvol); 2198 if (ret2 && !bch2_err_matches(ret, ENOENT)) 2199 return ret2; 2200 } 2201 2202 if (ret && 2203 !new_parent_subvol && 2204 (c->sb.btrees_lost_data & BIT_ULL(BTREE_ID_subvolumes))) { 2205 /* 2206 * Couldn't find a subvol for dirent's snapshot - but we lost 2207 * subvols, so we need to reconstruct: 2208 */ 2209 ret = reconstruct_subvol(trans, d.k->p.snapshot, parent_subvol, 0); 2210 if (ret) 2211 return ret; 2212 2213 parent_snapshot = d.k->p.snapshot; 2214 } 2215 2216 if (fsck_err_on(ret, 2217 trans, dirent_to_missing_parent_subvol, 2218 "dirent parent_subvol points to missing subvolume\n%s", 2219 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf)) || 2220 fsck_err_on(!ret && !bch2_snapshot_is_ancestor(c, parent_snapshot, d.k->p.snapshot), 2221 trans, dirent_not_visible_in_parent_subvol, 2222 "dirent not visible in parent_subvol (not an ancestor of subvol snap %u)\n%s", 2223 parent_snapshot, 2224 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) { 2225 if (!new_parent_subvol) { 2226 bch_err(c, "could not find a subvol for snapshot %u", d.k->p.snapshot); 2227 return -BCH_ERR_fsck_repair_unimplemented; 2228 } 2229 2230 struct bkey_i_dirent *new_dirent = bch2_bkey_make_mut_typed(trans, iter, &d.s_c, 0, dirent); 2231 ret = PTR_ERR_OR_ZERO(new_dirent); 2232 if (ret) 2233 goto err; 2234 2235 new_dirent->v.d_parent_subvol = cpu_to_le32(new_parent_subvol); 2236 } 2237 2238 struct bkey_s_c_subvolume s = 2239 bch2_bkey_get_iter_typed(trans, &subvol_iter, 2240 BTREE_ID_subvolumes, POS(0, target_subvol), 2241 0, subvolume); 2242 ret = bkey_err(s.s_c); 2243 if (ret && !bch2_err_matches(ret, ENOENT)) 2244 return ret; 2245 2246 if (ret) { 2247 if (fsck_err(trans, dirent_to_missing_subvol, 2248 "dirent points to missing subvolume\n%s", 2249 (bch2_bkey_val_to_text(&buf, c, d.s_c), buf.buf))) 2250 return __remove_dirent(trans, d.k->p); 2251 ret = 0; 2252 goto out; 2253 } 2254 2255 if (fsck_err_on(le32_to_cpu(s.v->fs_path_parent) != parent_subvol, 2256 trans, subvol_fs_path_parent_wrong, 2257 "subvol with wrong fs_path_parent, should be be %u\n%s", 2258 parent_subvol, 2259 (bch2_bkey_val_to_text(&buf, c, s.s_c), buf.buf))) { 2260 struct bkey_i_subvolume *n = 2261 bch2_bkey_make_mut_typed(trans, &subvol_iter, &s.s_c, 0, subvolume); 2262 ret = PTR_ERR_OR_ZERO(n); 2263 if (ret) 2264 goto err; 2265 2266 n->v.fs_path_parent = cpu_to_le32(parent_subvol); 2267 } 2268 2269 u64 target_inum = le64_to_cpu(s.v->inode); 2270 u32 target_snapshot = le32_to_cpu(s.v->snapshot); 2271 2272 ret = lookup_inode(trans, target_inum, target_snapshot, &subvol_root); 2273 if (ret && !bch2_err_matches(ret, ENOENT)) 2274 goto err; 2275 2276 if (ret) { 2277 bch_err(c, "subvol %u points to missing inode root %llu", target_subvol, target_inum); 2278 ret = -BCH_ERR_fsck_repair_unimplemented; 2279 goto err; 2280 } 2281 2282 if (fsck_err_on(!ret && parent_subvol != subvol_root.bi_parent_subvol, 2283 trans, inode_bi_parent_wrong, 2284 "subvol root %llu has wrong bi_parent_subvol: got %u, should be %u", 2285 target_inum, 2286 subvol_root.bi_parent_subvol, parent_subvol)) { 2287 subvol_root.bi_parent_subvol = parent_subvol; 2288 subvol_root.bi_snapshot = le32_to_cpu(s.v->snapshot); 2289 ret = __bch2_fsck_write_inode(trans, &subvol_root); 2290 if (ret) 2291 goto err; 2292 } 2293 2294 ret = check_dirent_target(trans, iter, d, &subvol_root); 2295 if (ret) 2296 goto err; 2297 out: 2298 err: 2299 fsck_err: 2300 bch2_trans_iter_exit(trans, &subvol_iter); 2301 printbuf_exit(&buf); 2302 return ret; 2303 } 2304 2305 static int check_dirent(struct btree_trans *trans, struct btree_iter *iter, 2306 struct bkey_s_c k, 2307 struct bch_hash_info *hash_info, 2308 struct inode_walker *dir, 2309 struct inode_walker *target, 2310 struct snapshots_seen *s) 2311 { 2312 struct bch_fs *c = trans->c; 2313 struct inode_walker_entry *i; 2314 struct printbuf buf = PRINTBUF; 2315 int ret = 0; 2316 2317 ret = bch2_check_key_has_snapshot(trans, iter, k); 2318 if (ret) { 2319 ret = ret < 0 ? ret : 0; 2320 goto out; 2321 } 2322 2323 ret = snapshots_seen_update(c, s, iter->btree_id, k.k->p); 2324 if (ret) 2325 goto err; 2326 2327 if (k.k->type == KEY_TYPE_whiteout) 2328 goto out; 2329 2330 if (dir->last_pos.inode != k.k->p.inode && dir->have_inodes) { 2331 ret = check_subdir_dirents_count(trans, dir); 2332 if (ret) 2333 goto err; 2334 } 2335 2336 i = walk_inode(trans, dir, k); 2337 ret = PTR_ERR_OR_ZERO(i); 2338 if (ret < 0) 2339 goto err; 2340 2341 ret = check_key_has_inode(trans, iter, dir, i, k); 2342 if (ret) 2343 goto err; 2344 2345 if (!i) 2346 goto out; 2347 2348 if (dir->first_this_inode) 2349 *hash_info = bch2_hash_info_init(c, &i->inode); 2350 dir->first_this_inode = false; 2351 2352 ret = bch2_str_hash_check_key(trans, s, &bch2_dirent_hash_desc, hash_info, iter, k); 2353 if (ret < 0) 2354 goto err; 2355 if (ret) { 2356 /* dirent has been deleted */ 2357 ret = 0; 2358 goto out; 2359 } 2360 2361 if (k.k->type != KEY_TYPE_dirent) 2362 goto out; 2363 2364 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2365 2366 if (d.v->d_type == DT_SUBVOL) { 2367 ret = check_dirent_to_subvol(trans, iter, d); 2368 if (ret) 2369 goto err; 2370 } else { 2371 ret = get_visible_inodes(trans, target, s, le64_to_cpu(d.v->d_inum)); 2372 if (ret) 2373 goto err; 2374 2375 if (fsck_err_on(!target->inodes.nr, 2376 trans, dirent_to_missing_inode, 2377 "dirent points to missing inode:\n%s", 2378 (printbuf_reset(&buf), 2379 bch2_bkey_val_to_text(&buf, c, k), 2380 buf.buf))) { 2381 ret = __remove_dirent(trans, d.k->p); 2382 if (ret) 2383 goto err; 2384 } 2385 2386 darray_for_each(target->inodes, i) { 2387 ret = check_dirent_target(trans, iter, d, &i->inode); 2388 if (ret) 2389 goto err; 2390 } 2391 2392 darray_for_each(target->deletes, i) 2393 if (fsck_err_on(!snapshot_list_has_id(&s->ids, *i), 2394 trans, dirent_to_overwritten_inode, 2395 "dirent points to inode overwritten in snapshot %u:\n%s", 2396 *i, 2397 (printbuf_reset(&buf), 2398 bch2_bkey_val_to_text(&buf, c, k), 2399 buf.buf))) { 2400 struct btree_iter delete_iter; 2401 bch2_trans_iter_init(trans, &delete_iter, 2402 BTREE_ID_dirents, 2403 SPOS(k.k->p.inode, k.k->p.offset, *i), 2404 BTREE_ITER_intent); 2405 ret = bch2_btree_iter_traverse(&delete_iter) ?: 2406 bch2_hash_delete_at(trans, bch2_dirent_hash_desc, 2407 hash_info, 2408 &delete_iter, 2409 BTREE_UPDATE_internal_snapshot_node); 2410 bch2_trans_iter_exit(trans, &delete_iter); 2411 if (ret) 2412 goto err; 2413 2414 } 2415 } 2416 2417 ret = bch2_trans_commit(trans, NULL, NULL, BCH_TRANS_COMMIT_no_enospc); 2418 if (ret) 2419 goto err; 2420 2421 for_each_visible_inode(c, s, dir, d.k->p.snapshot, i) { 2422 if (d.v->d_type == DT_DIR) 2423 i->count++; 2424 i->i_size += bkey_bytes(d.k); 2425 } 2426 out: 2427 err: 2428 fsck_err: 2429 printbuf_exit(&buf); 2430 bch_err_fn(c, ret); 2431 return ret; 2432 } 2433 2434 /* 2435 * Walk dirents: verify that they all have a corresponding S_ISDIR inode, 2436 * validate d_type 2437 */ 2438 int bch2_check_dirents(struct bch_fs *c) 2439 { 2440 struct inode_walker dir = inode_walker_init(); 2441 struct inode_walker target = inode_walker_init(); 2442 struct snapshots_seen s; 2443 struct bch_hash_info hash_info; 2444 2445 snapshots_seen_init(&s); 2446 2447 int ret = bch2_trans_run(c, 2448 for_each_btree_key(trans, iter, BTREE_ID_dirents, 2449 POS(BCACHEFS_ROOT_INO, 0), 2450 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 2451 check_dirent(trans, &iter, k, &hash_info, &dir, &target, &s)) ?: 2452 check_subdir_count_notnested(trans, &dir)); 2453 2454 snapshots_seen_exit(&s); 2455 inode_walker_exit(&dir); 2456 inode_walker_exit(&target); 2457 bch_err_fn(c, ret); 2458 return ret; 2459 } 2460 2461 static int check_xattr(struct btree_trans *trans, struct btree_iter *iter, 2462 struct bkey_s_c k, 2463 struct bch_hash_info *hash_info, 2464 struct inode_walker *inode) 2465 { 2466 struct bch_fs *c = trans->c; 2467 struct inode_walker_entry *i; 2468 int ret; 2469 2470 ret = bch2_check_key_has_snapshot(trans, iter, k); 2471 if (ret < 0) 2472 return ret; 2473 if (ret) 2474 return 0; 2475 2476 i = walk_inode(trans, inode, k); 2477 ret = PTR_ERR_OR_ZERO(i); 2478 if (ret) 2479 return ret; 2480 2481 ret = check_key_has_inode(trans, iter, inode, i, k); 2482 if (ret) 2483 return ret; 2484 2485 if (!i) 2486 return 0; 2487 2488 if (inode->first_this_inode) 2489 *hash_info = bch2_hash_info_init(c, &i->inode); 2490 inode->first_this_inode = false; 2491 2492 ret = bch2_str_hash_check_key(trans, NULL, &bch2_xattr_hash_desc, hash_info, iter, k); 2493 bch_err_fn(c, ret); 2494 return ret; 2495 } 2496 2497 /* 2498 * Walk xattrs: verify that they all have a corresponding inode 2499 */ 2500 int bch2_check_xattrs(struct bch_fs *c) 2501 { 2502 struct inode_walker inode = inode_walker_init(); 2503 struct bch_hash_info hash_info; 2504 int ret = 0; 2505 2506 ret = bch2_trans_run(c, 2507 for_each_btree_key_commit(trans, iter, BTREE_ID_xattrs, 2508 POS(BCACHEFS_ROOT_INO, 0), 2509 BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, 2510 k, 2511 NULL, NULL, 2512 BCH_TRANS_COMMIT_no_enospc, 2513 check_xattr(trans, &iter, k, &hash_info, &inode))); 2514 2515 inode_walker_exit(&inode); 2516 bch_err_fn(c, ret); 2517 return ret; 2518 } 2519 2520 static int check_root_trans(struct btree_trans *trans) 2521 { 2522 struct bch_fs *c = trans->c; 2523 struct bch_inode_unpacked root_inode; 2524 u32 snapshot; 2525 u64 inum; 2526 int ret; 2527 2528 ret = subvol_lookup(trans, BCACHEFS_ROOT_SUBVOL, &snapshot, &inum); 2529 if (ret && !bch2_err_matches(ret, ENOENT)) 2530 return ret; 2531 2532 if (mustfix_fsck_err_on(ret, trans, root_subvol_missing, 2533 "root subvol missing")) { 2534 struct bkey_i_subvolume *root_subvol = 2535 bch2_trans_kmalloc(trans, sizeof(*root_subvol)); 2536 ret = PTR_ERR_OR_ZERO(root_subvol); 2537 if (ret) 2538 goto err; 2539 2540 snapshot = U32_MAX; 2541 inum = BCACHEFS_ROOT_INO; 2542 2543 bkey_subvolume_init(&root_subvol->k_i); 2544 root_subvol->k.p.offset = BCACHEFS_ROOT_SUBVOL; 2545 root_subvol->v.flags = 0; 2546 root_subvol->v.snapshot = cpu_to_le32(snapshot); 2547 root_subvol->v.inode = cpu_to_le64(inum); 2548 ret = bch2_btree_insert_trans(trans, BTREE_ID_subvolumes, &root_subvol->k_i, 0); 2549 bch_err_msg(c, ret, "writing root subvol"); 2550 if (ret) 2551 goto err; 2552 } 2553 2554 ret = lookup_inode(trans, BCACHEFS_ROOT_INO, snapshot, &root_inode); 2555 if (ret && !bch2_err_matches(ret, ENOENT)) 2556 return ret; 2557 2558 if (mustfix_fsck_err_on(ret, 2559 trans, root_dir_missing, 2560 "root directory missing") || 2561 mustfix_fsck_err_on(!S_ISDIR(root_inode.bi_mode), 2562 trans, root_inode_not_dir, 2563 "root inode not a directory")) { 2564 bch2_inode_init(c, &root_inode, 0, 0, S_IFDIR|0755, 2565 0, NULL); 2566 root_inode.bi_inum = inum; 2567 root_inode.bi_snapshot = snapshot; 2568 2569 ret = __bch2_fsck_write_inode(trans, &root_inode); 2570 bch_err_msg(c, ret, "writing root inode"); 2571 } 2572 err: 2573 fsck_err: 2574 return ret; 2575 } 2576 2577 /* Get root directory, create if it doesn't exist: */ 2578 int bch2_check_root(struct bch_fs *c) 2579 { 2580 int ret = bch2_trans_commit_do(c, NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2581 check_root_trans(trans)); 2582 bch_err_fn(c, ret); 2583 return ret; 2584 } 2585 2586 typedef DARRAY(u32) darray_u32; 2587 2588 static bool darray_u32_has(darray_u32 *d, u32 v) 2589 { 2590 darray_for_each(*d, i) 2591 if (*i == v) 2592 return true; 2593 return false; 2594 } 2595 2596 static int check_subvol_path(struct btree_trans *trans, struct btree_iter *iter, struct bkey_s_c k) 2597 { 2598 struct bch_fs *c = trans->c; 2599 struct btree_iter parent_iter = {}; 2600 darray_u32 subvol_path = {}; 2601 struct printbuf buf = PRINTBUF; 2602 int ret = 0; 2603 2604 if (k.k->type != KEY_TYPE_subvolume) 2605 return 0; 2606 2607 while (k.k->p.offset != BCACHEFS_ROOT_SUBVOL) { 2608 ret = darray_push(&subvol_path, k.k->p.offset); 2609 if (ret) 2610 goto err; 2611 2612 struct bkey_s_c_subvolume s = bkey_s_c_to_subvolume(k); 2613 2614 struct bch_inode_unpacked subvol_root; 2615 ret = bch2_inode_find_by_inum_trans(trans, 2616 (subvol_inum) { s.k->p.offset, le64_to_cpu(s.v->inode) }, 2617 &subvol_root); 2618 if (ret) 2619 break; 2620 2621 u32 parent = le32_to_cpu(s.v->fs_path_parent); 2622 2623 if (darray_u32_has(&subvol_path, parent)) { 2624 if (fsck_err(c, subvol_loop, "subvolume loop")) 2625 ret = reattach_subvol(trans, s); 2626 break; 2627 } 2628 2629 bch2_trans_iter_exit(trans, &parent_iter); 2630 bch2_trans_iter_init(trans, &parent_iter, 2631 BTREE_ID_subvolumes, POS(0, parent), 0); 2632 k = bch2_btree_iter_peek_slot(&parent_iter); 2633 ret = bkey_err(k); 2634 if (ret) 2635 goto err; 2636 2637 if (fsck_err_on(k.k->type != KEY_TYPE_subvolume, 2638 trans, subvol_unreachable, 2639 "unreachable subvolume %s", 2640 (bch2_bkey_val_to_text(&buf, c, s.s_c), 2641 buf.buf))) { 2642 ret = reattach_subvol(trans, s); 2643 break; 2644 } 2645 } 2646 fsck_err: 2647 err: 2648 printbuf_exit(&buf); 2649 darray_exit(&subvol_path); 2650 bch2_trans_iter_exit(trans, &parent_iter); 2651 return ret; 2652 } 2653 2654 int bch2_check_subvolume_structure(struct bch_fs *c) 2655 { 2656 int ret = bch2_trans_run(c, 2657 for_each_btree_key_commit(trans, iter, 2658 BTREE_ID_subvolumes, POS_MIN, BTREE_ITER_prefetch, k, 2659 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 2660 check_subvol_path(trans, &iter, k))); 2661 bch_err_fn(c, ret); 2662 return ret; 2663 } 2664 2665 struct pathbuf_entry { 2666 u64 inum; 2667 u32 snapshot; 2668 }; 2669 2670 typedef DARRAY(struct pathbuf_entry) pathbuf; 2671 2672 static int bch2_bi_depth_renumber_one(struct btree_trans *trans, struct pathbuf_entry *p, 2673 u32 new_depth) 2674 { 2675 struct btree_iter iter; 2676 struct bkey_s_c k = bch2_bkey_get_iter(trans, &iter, BTREE_ID_inodes, 2677 SPOS(0, p->inum, p->snapshot), 0); 2678 2679 struct bch_inode_unpacked inode; 2680 int ret = bkey_err(k) ?: 2681 !bkey_is_inode(k.k) ? -BCH_ERR_ENOENT_inode 2682 : bch2_inode_unpack(k, &inode); 2683 if (ret) 2684 goto err; 2685 2686 if (inode.bi_depth != new_depth) { 2687 inode.bi_depth = new_depth; 2688 ret = __bch2_fsck_write_inode(trans, &inode) ?: 2689 bch2_trans_commit(trans, NULL, NULL, 0); 2690 } 2691 err: 2692 bch2_trans_iter_exit(trans, &iter); 2693 return ret; 2694 } 2695 2696 static int bch2_bi_depth_renumber(struct btree_trans *trans, pathbuf *path, u32 new_bi_depth) 2697 { 2698 u32 restart_count = trans->restart_count; 2699 int ret = 0; 2700 2701 darray_for_each_reverse(*path, i) { 2702 ret = nested_lockrestart_do(trans, 2703 bch2_bi_depth_renumber_one(trans, i, new_bi_depth)); 2704 bch_err_fn(trans->c, ret); 2705 if (ret) 2706 break; 2707 2708 new_bi_depth++; 2709 } 2710 2711 return ret ?: trans_was_restarted(trans, restart_count); 2712 } 2713 2714 static bool path_is_dup(pathbuf *p, u64 inum, u32 snapshot) 2715 { 2716 darray_for_each(*p, i) 2717 if (i->inum == inum && 2718 i->snapshot == snapshot) 2719 return true; 2720 return false; 2721 } 2722 2723 static int check_path_loop(struct btree_trans *trans, struct bkey_s_c inode_k) 2724 { 2725 struct bch_fs *c = trans->c; 2726 struct btree_iter inode_iter = {}; 2727 pathbuf path = {}; 2728 struct printbuf buf = PRINTBUF; 2729 u32 snapshot = inode_k.k->p.snapshot; 2730 bool redo_bi_depth = false; 2731 u32 min_bi_depth = U32_MAX; 2732 int ret = 0; 2733 2734 struct bch_inode_unpacked inode; 2735 ret = bch2_inode_unpack(inode_k, &inode); 2736 if (ret) 2737 return ret; 2738 2739 while (!inode.bi_subvol) { 2740 struct btree_iter dirent_iter; 2741 struct bkey_s_c_dirent d; 2742 u32 parent_snapshot = snapshot; 2743 2744 d = inode_get_dirent(trans, &dirent_iter, &inode, &parent_snapshot); 2745 ret = bkey_err(d.s_c); 2746 if (ret && !bch2_err_matches(ret, ENOENT)) 2747 goto out; 2748 2749 if (!ret && (ret = dirent_points_to_inode(c, d, &inode))) 2750 bch2_trans_iter_exit(trans, &dirent_iter); 2751 2752 if (bch2_err_matches(ret, ENOENT)) { 2753 printbuf_reset(&buf); 2754 bch2_bkey_val_to_text(&buf, c, inode_k); 2755 bch_err(c, "unreachable inode in check_directory_structure: %s\n%s", 2756 bch2_err_str(ret), buf.buf); 2757 goto out; 2758 } 2759 2760 bch2_trans_iter_exit(trans, &dirent_iter); 2761 2762 ret = darray_push(&path, ((struct pathbuf_entry) { 2763 .inum = inode.bi_inum, 2764 .snapshot = snapshot, 2765 })); 2766 if (ret) 2767 return ret; 2768 2769 snapshot = parent_snapshot; 2770 2771 bch2_trans_iter_exit(trans, &inode_iter); 2772 inode_k = bch2_bkey_get_iter(trans, &inode_iter, BTREE_ID_inodes, 2773 SPOS(0, inode.bi_dir, snapshot), 0); 2774 2775 struct bch_inode_unpacked parent_inode; 2776 ret = bkey_err(inode_k) ?: 2777 !bkey_is_inode(inode_k.k) ? -BCH_ERR_ENOENT_inode 2778 : bch2_inode_unpack(inode_k, &parent_inode); 2779 if (ret) { 2780 /* Should have been caught in dirents pass */ 2781 bch_err_msg(c, ret, "error looking up parent directory"); 2782 goto out; 2783 } 2784 2785 min_bi_depth = parent_inode.bi_depth; 2786 2787 if (parent_inode.bi_depth < inode.bi_depth && 2788 min_bi_depth < U16_MAX) 2789 break; 2790 2791 inode = parent_inode; 2792 snapshot = inode_k.k->p.snapshot; 2793 redo_bi_depth = true; 2794 2795 if (path_is_dup(&path, inode.bi_inum, snapshot)) { 2796 /* XXX print path */ 2797 bch_err(c, "directory structure loop"); 2798 2799 darray_for_each(path, i) 2800 pr_err("%llu:%u", i->inum, i->snapshot); 2801 pr_err("%llu:%u", inode.bi_inum, snapshot); 2802 2803 if (fsck_err(trans, dir_loop, "directory structure loop")) { 2804 ret = remove_backpointer(trans, &inode); 2805 bch_err_msg(c, ret, "removing dirent"); 2806 if (ret) 2807 break; 2808 2809 ret = reattach_inode(trans, &inode); 2810 bch_err_msg(c, ret, "reattaching inode %llu", inode.bi_inum); 2811 } 2812 2813 goto out; 2814 } 2815 } 2816 2817 if (inode.bi_subvol) 2818 min_bi_depth = 0; 2819 2820 if (redo_bi_depth) 2821 ret = bch2_bi_depth_renumber(trans, &path, min_bi_depth); 2822 out: 2823 fsck_err: 2824 bch2_trans_iter_exit(trans, &inode_iter); 2825 darray_exit(&path); 2826 printbuf_exit(&buf); 2827 bch_err_fn(c, ret); 2828 return ret; 2829 } 2830 2831 /* 2832 * Check for loops in the directory structure: all other connectivity issues 2833 * have been fixed by prior passes 2834 */ 2835 int bch2_check_directory_structure(struct bch_fs *c) 2836 { 2837 int ret = bch2_trans_run(c, 2838 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, POS_MIN, 2839 BTREE_ITER_intent| 2840 BTREE_ITER_prefetch| 2841 BTREE_ITER_all_snapshots, k, 2842 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, ({ 2843 if (!S_ISDIR(bkey_inode_mode(k))) 2844 continue; 2845 2846 if (bch2_inode_flags(k) & BCH_INODE_unlinked) 2847 continue; 2848 2849 check_path_loop(trans, k); 2850 }))); 2851 2852 bch_err_fn(c, ret); 2853 return ret; 2854 } 2855 2856 struct nlink_table { 2857 size_t nr; 2858 size_t size; 2859 2860 struct nlink { 2861 u64 inum; 2862 u32 snapshot; 2863 u32 count; 2864 } *d; 2865 }; 2866 2867 static int add_nlink(struct bch_fs *c, struct nlink_table *t, 2868 u64 inum, u32 snapshot) 2869 { 2870 if (t->nr == t->size) { 2871 size_t new_size = max_t(size_t, 128UL, t->size * 2); 2872 void *d = kvmalloc_array(new_size, sizeof(t->d[0]), GFP_KERNEL); 2873 2874 if (!d) { 2875 bch_err(c, "fsck: error allocating memory for nlink_table, size %zu", 2876 new_size); 2877 return -BCH_ERR_ENOMEM_fsck_add_nlink; 2878 } 2879 2880 if (t->d) 2881 memcpy(d, t->d, t->size * sizeof(t->d[0])); 2882 kvfree(t->d); 2883 2884 t->d = d; 2885 t->size = new_size; 2886 } 2887 2888 2889 t->d[t->nr++] = (struct nlink) { 2890 .inum = inum, 2891 .snapshot = snapshot, 2892 }; 2893 2894 return 0; 2895 } 2896 2897 static int nlink_cmp(const void *_l, const void *_r) 2898 { 2899 const struct nlink *l = _l; 2900 const struct nlink *r = _r; 2901 2902 return cmp_int(l->inum, r->inum); 2903 } 2904 2905 static void inc_link(struct bch_fs *c, struct snapshots_seen *s, 2906 struct nlink_table *links, 2907 u64 range_start, u64 range_end, u64 inum, u32 snapshot) 2908 { 2909 struct nlink *link, key = { 2910 .inum = inum, .snapshot = U32_MAX, 2911 }; 2912 2913 if (inum < range_start || inum >= range_end) 2914 return; 2915 2916 link = __inline_bsearch(&key, links->d, links->nr, 2917 sizeof(links->d[0]), nlink_cmp); 2918 if (!link) 2919 return; 2920 2921 while (link > links->d && link[0].inum == link[-1].inum) 2922 --link; 2923 2924 for (; link < links->d + links->nr && link->inum == inum; link++) 2925 if (ref_visible(c, s, snapshot, link->snapshot)) { 2926 link->count++; 2927 if (link->snapshot >= snapshot) 2928 break; 2929 } 2930 } 2931 2932 noinline_for_stack 2933 static int check_nlinks_find_hardlinks(struct bch_fs *c, 2934 struct nlink_table *t, 2935 u64 start, u64 *end) 2936 { 2937 int ret = bch2_trans_run(c, 2938 for_each_btree_key(trans, iter, BTREE_ID_inodes, 2939 POS(0, start), 2940 BTREE_ITER_intent| 2941 BTREE_ITER_prefetch| 2942 BTREE_ITER_all_snapshots, k, ({ 2943 if (!bkey_is_inode(k.k)) 2944 continue; 2945 2946 /* Should never fail, checked by bch2_inode_invalid: */ 2947 struct bch_inode_unpacked u; 2948 _ret3 = bch2_inode_unpack(k, &u); 2949 if (_ret3) 2950 break; 2951 2952 /* 2953 * Backpointer and directory structure checks are sufficient for 2954 * directories, since they can't have hardlinks: 2955 */ 2956 if (S_ISDIR(u.bi_mode)) 2957 continue; 2958 2959 /* 2960 * Previous passes ensured that bi_nlink is nonzero if 2961 * it had multiple hardlinks: 2962 */ 2963 if (!u.bi_nlink) 2964 continue; 2965 2966 ret = add_nlink(c, t, k.k->p.offset, k.k->p.snapshot); 2967 if (ret) { 2968 *end = k.k->p.offset; 2969 ret = 0; 2970 break; 2971 } 2972 0; 2973 }))); 2974 2975 bch_err_fn(c, ret); 2976 return ret; 2977 } 2978 2979 noinline_for_stack 2980 static int check_nlinks_walk_dirents(struct bch_fs *c, struct nlink_table *links, 2981 u64 range_start, u64 range_end) 2982 { 2983 struct snapshots_seen s; 2984 2985 snapshots_seen_init(&s); 2986 2987 int ret = bch2_trans_run(c, 2988 for_each_btree_key(trans, iter, BTREE_ID_dirents, POS_MIN, 2989 BTREE_ITER_intent| 2990 BTREE_ITER_prefetch| 2991 BTREE_ITER_all_snapshots, k, ({ 2992 ret = snapshots_seen_update(c, &s, iter.btree_id, k.k->p); 2993 if (ret) 2994 break; 2995 2996 if (k.k->type == KEY_TYPE_dirent) { 2997 struct bkey_s_c_dirent d = bkey_s_c_to_dirent(k); 2998 2999 if (d.v->d_type != DT_DIR && 3000 d.v->d_type != DT_SUBVOL) 3001 inc_link(c, &s, links, range_start, range_end, 3002 le64_to_cpu(d.v->d_inum), d.k->p.snapshot); 3003 } 3004 0; 3005 }))); 3006 3007 snapshots_seen_exit(&s); 3008 3009 bch_err_fn(c, ret); 3010 return ret; 3011 } 3012 3013 static int check_nlinks_update_inode(struct btree_trans *trans, struct btree_iter *iter, 3014 struct bkey_s_c k, 3015 struct nlink_table *links, 3016 size_t *idx, u64 range_end) 3017 { 3018 struct bch_inode_unpacked u; 3019 struct nlink *link = &links->d[*idx]; 3020 int ret = 0; 3021 3022 if (k.k->p.offset >= range_end) 3023 return 1; 3024 3025 if (!bkey_is_inode(k.k)) 3026 return 0; 3027 3028 ret = bch2_inode_unpack(k, &u); 3029 if (ret) 3030 return ret; 3031 3032 if (S_ISDIR(u.bi_mode)) 3033 return 0; 3034 3035 if (!u.bi_nlink) 3036 return 0; 3037 3038 while ((cmp_int(link->inum, k.k->p.offset) ?: 3039 cmp_int(link->snapshot, k.k->p.snapshot)) < 0) { 3040 BUG_ON(*idx == links->nr); 3041 link = &links->d[++*idx]; 3042 } 3043 3044 if (fsck_err_on(bch2_inode_nlink_get(&u) != link->count, 3045 trans, inode_wrong_nlink, 3046 "inode %llu type %s has wrong i_nlink (%u, should be %u)", 3047 u.bi_inum, bch2_d_types[mode_to_type(u.bi_mode)], 3048 bch2_inode_nlink_get(&u), link->count)) { 3049 bch2_inode_nlink_set(&u, link->count); 3050 ret = __bch2_fsck_write_inode(trans, &u); 3051 } 3052 fsck_err: 3053 return ret; 3054 } 3055 3056 noinline_for_stack 3057 static int check_nlinks_update_hardlinks(struct bch_fs *c, 3058 struct nlink_table *links, 3059 u64 range_start, u64 range_end) 3060 { 3061 size_t idx = 0; 3062 3063 int ret = bch2_trans_run(c, 3064 for_each_btree_key_commit(trans, iter, BTREE_ID_inodes, 3065 POS(0, range_start), 3066 BTREE_ITER_intent|BTREE_ITER_prefetch|BTREE_ITER_all_snapshots, k, 3067 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 3068 check_nlinks_update_inode(trans, &iter, k, links, &idx, range_end))); 3069 if (ret < 0) { 3070 bch_err(c, "error in fsck walking inodes: %s", bch2_err_str(ret)); 3071 return ret; 3072 } 3073 3074 return 0; 3075 } 3076 3077 int bch2_check_nlinks(struct bch_fs *c) 3078 { 3079 struct nlink_table links = { 0 }; 3080 u64 this_iter_range_start, next_iter_range_start = 0; 3081 int ret = 0; 3082 3083 do { 3084 this_iter_range_start = next_iter_range_start; 3085 next_iter_range_start = U64_MAX; 3086 3087 ret = check_nlinks_find_hardlinks(c, &links, 3088 this_iter_range_start, 3089 &next_iter_range_start); 3090 3091 ret = check_nlinks_walk_dirents(c, &links, 3092 this_iter_range_start, 3093 next_iter_range_start); 3094 if (ret) 3095 break; 3096 3097 ret = check_nlinks_update_hardlinks(c, &links, 3098 this_iter_range_start, 3099 next_iter_range_start); 3100 if (ret) 3101 break; 3102 3103 links.nr = 0; 3104 } while (next_iter_range_start != U64_MAX); 3105 3106 kvfree(links.d); 3107 bch_err_fn(c, ret); 3108 return ret; 3109 } 3110 3111 static int fix_reflink_p_key(struct btree_trans *trans, struct btree_iter *iter, 3112 struct bkey_s_c k) 3113 { 3114 struct bkey_s_c_reflink_p p; 3115 struct bkey_i_reflink_p *u; 3116 3117 if (k.k->type != KEY_TYPE_reflink_p) 3118 return 0; 3119 3120 p = bkey_s_c_to_reflink_p(k); 3121 3122 if (!p.v->front_pad && !p.v->back_pad) 3123 return 0; 3124 3125 u = bch2_trans_kmalloc(trans, sizeof(*u)); 3126 int ret = PTR_ERR_OR_ZERO(u); 3127 if (ret) 3128 return ret; 3129 3130 bkey_reassemble(&u->k_i, k); 3131 u->v.front_pad = 0; 3132 u->v.back_pad = 0; 3133 3134 return bch2_trans_update(trans, iter, &u->k_i, BTREE_TRIGGER_norun); 3135 } 3136 3137 int bch2_fix_reflink_p(struct bch_fs *c) 3138 { 3139 if (c->sb.version >= bcachefs_metadata_version_reflink_p_fix) 3140 return 0; 3141 3142 int ret = bch2_trans_run(c, 3143 for_each_btree_key_commit(trans, iter, 3144 BTREE_ID_extents, POS_MIN, 3145 BTREE_ITER_intent|BTREE_ITER_prefetch| 3146 BTREE_ITER_all_snapshots, k, 3147 NULL, NULL, BCH_TRANS_COMMIT_no_enospc, 3148 fix_reflink_p_key(trans, &iter, k))); 3149 bch_err_fn(c, ret); 3150 return ret; 3151 } 3152 3153 #ifndef NO_BCACHEFS_CHARDEV 3154 3155 struct fsck_thread { 3156 struct thread_with_stdio thr; 3157 struct bch_fs *c; 3158 struct bch_opts opts; 3159 }; 3160 3161 static void bch2_fsck_thread_exit(struct thread_with_stdio *_thr) 3162 { 3163 struct fsck_thread *thr = container_of(_thr, struct fsck_thread, thr); 3164 kfree(thr); 3165 } 3166 3167 static int bch2_fsck_offline_thread_fn(struct thread_with_stdio *stdio) 3168 { 3169 struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); 3170 struct bch_fs *c = thr->c; 3171 3172 int ret = PTR_ERR_OR_ZERO(c); 3173 if (ret) 3174 return ret; 3175 3176 ret = bch2_fs_start(thr->c); 3177 if (ret) 3178 goto err; 3179 3180 if (test_bit(BCH_FS_errors_fixed, &c->flags)) { 3181 bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: errors fixed\n", c->name); 3182 ret |= 1; 3183 } 3184 if (test_bit(BCH_FS_error, &c->flags)) { 3185 bch2_stdio_redirect_printf(&stdio->stdio, false, "%s: still has errors\n", c->name); 3186 ret |= 4; 3187 } 3188 err: 3189 bch2_fs_stop(c); 3190 return ret; 3191 } 3192 3193 static const struct thread_with_stdio_ops bch2_offline_fsck_ops = { 3194 .exit = bch2_fsck_thread_exit, 3195 .fn = bch2_fsck_offline_thread_fn, 3196 }; 3197 3198 long bch2_ioctl_fsck_offline(struct bch_ioctl_fsck_offline __user *user_arg) 3199 { 3200 struct bch_ioctl_fsck_offline arg; 3201 struct fsck_thread *thr = NULL; 3202 darray_str(devs) = {}; 3203 long ret = 0; 3204 3205 if (copy_from_user(&arg, user_arg, sizeof(arg))) 3206 return -EFAULT; 3207 3208 if (arg.flags) 3209 return -EINVAL; 3210 3211 if (!capable(CAP_SYS_ADMIN)) 3212 return -EPERM; 3213 3214 for (size_t i = 0; i < arg.nr_devs; i++) { 3215 u64 dev_u64; 3216 ret = copy_from_user_errcode(&dev_u64, &user_arg->devs[i], sizeof(u64)); 3217 if (ret) 3218 goto err; 3219 3220 char *dev_str = strndup_user((char __user *)(unsigned long) dev_u64, PATH_MAX); 3221 ret = PTR_ERR_OR_ZERO(dev_str); 3222 if (ret) 3223 goto err; 3224 3225 ret = darray_push(&devs, dev_str); 3226 if (ret) { 3227 kfree(dev_str); 3228 goto err; 3229 } 3230 } 3231 3232 thr = kzalloc(sizeof(*thr), GFP_KERNEL); 3233 if (!thr) { 3234 ret = -ENOMEM; 3235 goto err; 3236 } 3237 3238 thr->opts = bch2_opts_empty(); 3239 3240 if (arg.opts) { 3241 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); 3242 ret = PTR_ERR_OR_ZERO(optstr) ?: 3243 bch2_parse_mount_opts(NULL, &thr->opts, NULL, optstr); 3244 if (!IS_ERR(optstr)) 3245 kfree(optstr); 3246 3247 if (ret) 3248 goto err; 3249 } 3250 3251 opt_set(thr->opts, stdio, (u64)(unsigned long)&thr->thr.stdio); 3252 opt_set(thr->opts, read_only, 1); 3253 opt_set(thr->opts, ratelimit_errors, 0); 3254 3255 /* We need request_key() to be called before we punt to kthread: */ 3256 opt_set(thr->opts, nostart, true); 3257 3258 bch2_thread_with_stdio_init(&thr->thr, &bch2_offline_fsck_ops); 3259 3260 thr->c = bch2_fs_open(devs.data, arg.nr_devs, thr->opts); 3261 3262 if (!IS_ERR(thr->c) && 3263 thr->c->opts.errors == BCH_ON_ERROR_panic) 3264 thr->c->opts.errors = BCH_ON_ERROR_ro; 3265 3266 ret = __bch2_run_thread_with_stdio(&thr->thr); 3267 out: 3268 darray_for_each(devs, i) 3269 kfree(*i); 3270 darray_exit(&devs); 3271 return ret; 3272 err: 3273 if (thr) 3274 bch2_fsck_thread_exit(&thr->thr); 3275 pr_err("ret %s", bch2_err_str(ret)); 3276 goto out; 3277 } 3278 3279 static int bch2_fsck_online_thread_fn(struct thread_with_stdio *stdio) 3280 { 3281 struct fsck_thread *thr = container_of(stdio, struct fsck_thread, thr); 3282 struct bch_fs *c = thr->c; 3283 3284 c->stdio_filter = current; 3285 c->stdio = &thr->thr.stdio; 3286 3287 /* 3288 * XXX: can we figure out a way to do this without mucking with c->opts? 3289 */ 3290 unsigned old_fix_errors = c->opts.fix_errors; 3291 if (opt_defined(thr->opts, fix_errors)) 3292 c->opts.fix_errors = thr->opts.fix_errors; 3293 else 3294 c->opts.fix_errors = FSCK_FIX_ask; 3295 3296 c->opts.fsck = true; 3297 set_bit(BCH_FS_fsck_running, &c->flags); 3298 3299 c->curr_recovery_pass = BCH_RECOVERY_PASS_check_alloc_info; 3300 int ret = bch2_run_online_recovery_passes(c); 3301 3302 clear_bit(BCH_FS_fsck_running, &c->flags); 3303 bch_err_fn(c, ret); 3304 3305 c->stdio = NULL; 3306 c->stdio_filter = NULL; 3307 c->opts.fix_errors = old_fix_errors; 3308 3309 up(&c->online_fsck_mutex); 3310 bch2_ro_ref_put(c); 3311 return ret; 3312 } 3313 3314 static const struct thread_with_stdio_ops bch2_online_fsck_ops = { 3315 .exit = bch2_fsck_thread_exit, 3316 .fn = bch2_fsck_online_thread_fn, 3317 }; 3318 3319 long bch2_ioctl_fsck_online(struct bch_fs *c, struct bch_ioctl_fsck_online arg) 3320 { 3321 struct fsck_thread *thr = NULL; 3322 long ret = 0; 3323 3324 if (arg.flags) 3325 return -EINVAL; 3326 3327 if (!capable(CAP_SYS_ADMIN)) 3328 return -EPERM; 3329 3330 if (!bch2_ro_ref_tryget(c)) 3331 return -EROFS; 3332 3333 if (down_trylock(&c->online_fsck_mutex)) { 3334 bch2_ro_ref_put(c); 3335 return -EAGAIN; 3336 } 3337 3338 thr = kzalloc(sizeof(*thr), GFP_KERNEL); 3339 if (!thr) { 3340 ret = -ENOMEM; 3341 goto err; 3342 } 3343 3344 thr->c = c; 3345 thr->opts = bch2_opts_empty(); 3346 3347 if (arg.opts) { 3348 char *optstr = strndup_user((char __user *)(unsigned long) arg.opts, 1 << 16); 3349 3350 ret = PTR_ERR_OR_ZERO(optstr) ?: 3351 bch2_parse_mount_opts(c, &thr->opts, NULL, optstr); 3352 if (!IS_ERR(optstr)) 3353 kfree(optstr); 3354 3355 if (ret) 3356 goto err; 3357 } 3358 3359 ret = bch2_run_thread_with_stdio(&thr->thr, &bch2_online_fsck_ops); 3360 err: 3361 if (ret < 0) { 3362 bch_err_fn(c, ret); 3363 if (thr) 3364 bch2_fsck_thread_exit(&thr->thr); 3365 up(&c->online_fsck_mutex); 3366 bch2_ro_ref_put(c); 3367 } 3368 return ret; 3369 } 3370 3371 #endif /* NO_BCACHEFS_CHARDEV */ 3372