1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_icache.h" 16 #include "xfs_iwalk.h" 17 #include "xfs_ialloc.h" 18 #include "xfs_dir2.h" 19 #include "xfs_dir2_priv.h" 20 #include "xfs_ag.h" 21 #include "scrub/scrub.h" 22 #include "scrub/common.h" 23 #include "scrub/repair.h" 24 #include "scrub/xfile.h" 25 #include "scrub/xfarray.h" 26 #include "scrub/iscan.h" 27 #include "scrub/nlinks.h" 28 #include "scrub/trace.h" 29 #include "scrub/readdir.h" 30 #include "scrub/tempfile.h" 31 32 /* 33 * Live Inode Link Count Checking 34 * ============================== 35 * 36 * Inode link counts are "summary" metadata, in the sense that they are 37 * computed as the number of directory entries referencing each file on the 38 * filesystem. Therefore, we compute the correct link counts by creating a 39 * shadow link count structure and walking every inode. 40 */ 41 42 /* Set us up to scrub inode link counts. */ 43 int 44 xchk_setup_nlinks( 45 struct xfs_scrub *sc) 46 { 47 xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); 48 49 sc->buf = kzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS); 50 if (!sc->buf) 51 return -ENOMEM; 52 53 return xchk_setup_fs(sc); 54 } 55 56 /* 57 * Part 1: Collecting file link counts. For each file, we create a shadow link 58 * counting structure, then walk the entire directory tree, incrementing parent 59 * and child link counts for each directory entry seen. 60 * 61 * To avoid false corruption reports in part 2, any failure in this part must 62 * set the INCOMPLETE flag even when a negative errno is returned. This care 63 * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, 64 * ECANCELED) that are absorbed into a scrub state flag update by 65 * xchk_*_process_error. Scrub and repair share the same incore data 66 * structures, so the INCOMPLETE flag is critical to prevent a repair based on 67 * insufficient information. 68 * 69 * Because we are scanning a live filesystem, it's possible that another thread 70 * will try to update the link counts for an inode that we've already scanned. 71 * This will cause our counts to be incorrect. Therefore, we hook all 72 * directory entry updates because that is when link count updates occur. By 73 * shadowing transaction updates in this manner, live nlink check can ensure by 74 * locking the inode and the shadow structure that its own copies are not out 75 * of date. Because the hook code runs in a different process context from the 76 * scrub code and the scrub state flags are not accessed atomically, failures 77 * in the hook code must abort the iscan and the scrubber must notice the 78 * aborted scan and set the incomplete flag. 79 * 80 * Note that we use jump labels and srcu notifier hooks to minimize the 81 * overhead when live nlinks is /not/ running. Locking order for nlink 82 * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock. 83 */ 84 85 /* 86 * Add a delta to an nlink counter, clamping the value to U32_MAX. Because 87 * XFS_MAXLINK < U32_MAX, the checking code will produce the correct results 88 * even if we lose some precision. 89 */ 90 static inline void 91 careful_add( 92 xfs_nlink_t *nlinkp, 93 int delta) 94 { 95 uint64_t new_value = (uint64_t)(*nlinkp) + delta; 96 97 BUILD_BUG_ON(XFS_MAXLINK > U32_MAX); 98 *nlinkp = min_t(uint64_t, new_value, U32_MAX); 99 } 100 101 /* Update incore link count information. Caller must hold the nlinks lock. */ 102 STATIC int 103 xchk_nlinks_update_incore( 104 struct xchk_nlink_ctrs *xnc, 105 xfs_ino_t ino, 106 int parents_delta, 107 int backrefs_delta, 108 int children_delta) 109 { 110 struct xchk_nlink nl; 111 int error; 112 113 if (!xnc->nlinks) 114 return 0; 115 116 error = xfarray_load_sparse(xnc->nlinks, ino, &nl); 117 if (error) 118 return error; 119 120 trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta, 121 backrefs_delta, children_delta); 122 123 careful_add(&nl.parents, parents_delta); 124 careful_add(&nl.backrefs, backrefs_delta); 125 careful_add(&nl.children, children_delta); 126 127 nl.flags |= XCHK_NLINK_WRITTEN; 128 error = xfarray_store(xnc->nlinks, ino, &nl); 129 if (error == -EFBIG) { 130 /* 131 * EFBIG means we tried to store data at too high a byte offset 132 * in the sparse array. IOWs, we cannot complete the check and 133 * must notify userspace that the check was incomplete. 134 */ 135 error = -ECANCELED; 136 } 137 return error; 138 } 139 140 /* 141 * Apply a link count change from the regular filesystem into our shadow link 142 * count structure based on a directory update in progress. 143 */ 144 STATIC int 145 xchk_nlinks_live_update( 146 struct notifier_block *nb, 147 unsigned long action, 148 void *data) 149 { 150 struct xfs_dir_update_params *p = data; 151 struct xchk_nlink_ctrs *xnc; 152 int error; 153 154 xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb); 155 156 /* 157 * Ignore temporary directories being used to stage dir repairs, since 158 * we don't bump the link counts of the children. 159 */ 160 if (xrep_is_tempfile(p->dp)) 161 return NOTIFY_DONE; 162 163 trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino, 164 p->delta, p->name->name, p->name->len); 165 166 /* 167 * If we've already scanned @dp, update the number of parents that link 168 * to @ip. If @ip is a subdirectory, update the number of child links 169 * going out of @dp. 170 */ 171 if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) { 172 mutex_lock(&xnc->lock); 173 error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta, 174 0, 0); 175 if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode)) 176 error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, 177 0, p->delta); 178 mutex_unlock(&xnc->lock); 179 if (error) 180 goto out_abort; 181 } 182 183 /* 184 * If @ip is a subdirectory and we've already scanned it, update the 185 * number of backrefs pointing to @dp. 186 */ 187 if (S_ISDIR(VFS_IC(p->ip)->i_mode) && 188 xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) { 189 mutex_lock(&xnc->lock); 190 error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, 191 p->delta, 0); 192 mutex_unlock(&xnc->lock); 193 if (error) 194 goto out_abort; 195 } 196 197 return NOTIFY_DONE; 198 199 out_abort: 200 xchk_iscan_abort(&xnc->collect_iscan); 201 return NOTIFY_DONE; 202 } 203 204 /* Bump the observed link count for the inode referenced by this entry. */ 205 STATIC int 206 xchk_nlinks_collect_dirent( 207 struct xfs_scrub *sc, 208 struct xfs_inode *dp, 209 xfs_dir2_dataptr_t dapos, 210 const struct xfs_name *name, 211 xfs_ino_t ino, 212 void *priv) 213 { 214 struct xchk_nlink_ctrs *xnc = priv; 215 bool dot = false, dotdot = false; 216 int error; 217 218 /* Does this name make sense? */ 219 if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) { 220 error = -ECANCELED; 221 goto out_abort; 222 } 223 224 if (name->len == 1 && name->name[0] == '.') 225 dot = true; 226 else if (name->len == 2 && name->name[0] == '.' && 227 name->name[1] == '.') 228 dotdot = true; 229 230 /* Don't accept a '.' entry that points somewhere else. */ 231 if (dot && ino != dp->i_ino) { 232 error = -ECANCELED; 233 goto out_abort; 234 } 235 236 /* Don't accept an invalid inode number. */ 237 if (!xfs_verify_dir_ino(sc->mp, ino)) { 238 error = -ECANCELED; 239 goto out_abort; 240 } 241 242 /* Update the shadow link counts if we haven't already failed. */ 243 244 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 245 error = -ECANCELED; 246 goto out_incomplete; 247 } 248 249 trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name); 250 251 mutex_lock(&xnc->lock); 252 253 /* 254 * If this is a dotdot entry, it is a back link from dp to ino. How 255 * we handle this depends on whether or not dp is the root directory. 256 * 257 * The root directory is its own parent, so we pretend the dotdot entry 258 * establishes the "parent" of the root directory. Increment the 259 * number of parents of the root directory. 260 * 261 * Otherwise, increment the number of backrefs pointing back to ino. 262 */ 263 if (dotdot) { 264 if (dp == sc->mp->m_rootip) 265 error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 266 else 267 error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0); 268 if (error) 269 goto out_unlock; 270 } 271 272 /* 273 * If this dirent is a forward link from dp to ino, increment the 274 * number of parents linking into ino. 275 */ 276 if (!dot && !dotdot) { 277 error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 278 if (error) 279 goto out_unlock; 280 } 281 282 /* 283 * If this dirent is a forward link to a subdirectory, increment the 284 * number of child links of dp. 285 */ 286 if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) { 287 error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1); 288 if (error) 289 goto out_unlock; 290 } 291 292 mutex_unlock(&xnc->lock); 293 return 0; 294 295 out_unlock: 296 mutex_unlock(&xnc->lock); 297 out_abort: 298 xchk_iscan_abort(&xnc->collect_iscan); 299 out_incomplete: 300 xchk_set_incomplete(sc); 301 return error; 302 } 303 304 /* Walk a directory to bump the observed link counts of the children. */ 305 STATIC int 306 xchk_nlinks_collect_dir( 307 struct xchk_nlink_ctrs *xnc, 308 struct xfs_inode *dp) 309 { 310 struct xfs_scrub *sc = xnc->sc; 311 unsigned int lock_mode; 312 int error = 0; 313 314 /* 315 * Ignore temporary directories being used to stage dir repairs, since 316 * we don't bump the link counts of the children. 317 */ 318 if (xrep_is_tempfile(dp)) 319 return 0; 320 321 /* Prevent anyone from changing this directory while we walk it. */ 322 xfs_ilock(dp, XFS_IOLOCK_SHARED); 323 lock_mode = xfs_ilock_data_map_shared(dp); 324 325 /* 326 * The dotdot entry of an unlinked directory still points to the last 327 * parent, but the parent no longer links to this directory. Skip the 328 * directory to avoid overcounting. 329 */ 330 if (VFS_I(dp)->i_nlink == 0) 331 goto out_unlock; 332 333 /* 334 * We cannot count file links if the directory looks as though it has 335 * been zapped by the inode record repair code. 336 */ 337 if (xchk_dir_looks_zapped(dp)) { 338 error = -EBUSY; 339 goto out_abort; 340 } 341 342 error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc); 343 if (error == -ECANCELED) { 344 error = 0; 345 goto out_unlock; 346 } 347 if (error) 348 goto out_abort; 349 350 xchk_iscan_mark_visited(&xnc->collect_iscan, dp); 351 goto out_unlock; 352 353 out_abort: 354 xchk_set_incomplete(sc); 355 xchk_iscan_abort(&xnc->collect_iscan); 356 out_unlock: 357 xfs_iunlock(dp, lock_mode); 358 xfs_iunlock(dp, XFS_IOLOCK_SHARED); 359 return error; 360 } 361 362 /* If this looks like a valid pointer, count it. */ 363 static inline int 364 xchk_nlinks_collect_metafile( 365 struct xchk_nlink_ctrs *xnc, 366 xfs_ino_t ino) 367 { 368 if (!xfs_verify_ino(xnc->sc->mp, ino)) 369 return 0; 370 371 trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino); 372 return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 373 } 374 375 /* Bump the link counts of metadata files rooted in the superblock. */ 376 STATIC int 377 xchk_nlinks_collect_metafiles( 378 struct xchk_nlink_ctrs *xnc) 379 { 380 struct xfs_mount *mp = xnc->sc->mp; 381 int error = -ECANCELED; 382 383 384 if (xchk_iscan_aborted(&xnc->collect_iscan)) 385 goto out_incomplete; 386 387 mutex_lock(&xnc->lock); 388 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino); 389 if (error) 390 goto out_abort; 391 392 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino); 393 if (error) 394 goto out_abort; 395 396 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino); 397 if (error) 398 goto out_abort; 399 400 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino); 401 if (error) 402 goto out_abort; 403 404 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino); 405 if (error) 406 goto out_abort; 407 mutex_unlock(&xnc->lock); 408 409 return 0; 410 411 out_abort: 412 mutex_unlock(&xnc->lock); 413 xchk_iscan_abort(&xnc->collect_iscan); 414 out_incomplete: 415 xchk_set_incomplete(xnc->sc); 416 return error; 417 } 418 419 /* Advance the collection scan cursor for this non-directory file. */ 420 static inline int 421 xchk_nlinks_collect_file( 422 struct xchk_nlink_ctrs *xnc, 423 struct xfs_inode *ip) 424 { 425 xfs_ilock(ip, XFS_IOLOCK_SHARED); 426 xchk_iscan_mark_visited(&xnc->collect_iscan, ip); 427 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 428 return 0; 429 } 430 431 /* Walk all directories and count inode links. */ 432 STATIC int 433 xchk_nlinks_collect( 434 struct xchk_nlink_ctrs *xnc) 435 { 436 struct xfs_scrub *sc = xnc->sc; 437 struct xfs_inode *ip; 438 int error; 439 440 /* Count the rt and quota files that are rooted in the superblock. */ 441 error = xchk_nlinks_collect_metafiles(xnc); 442 if (error) 443 return error; 444 445 /* 446 * Set up for a potentially lengthy filesystem scan by reducing our 447 * transaction resource usage for the duration. Specifically: 448 * 449 * Cancel the transaction to release the log grant space while we scan 450 * the filesystem. 451 * 452 * Create a new empty transaction to eliminate the possibility of the 453 * inode scan deadlocking on cyclical metadata. 454 * 455 * We pass the empty transaction to the file scanning function to avoid 456 * repeatedly cycling empty transactions. This can be done even though 457 * we take the IOLOCK to quiesce the file because empty transactions 458 * do not take sb_internal. 459 */ 460 xchk_trans_cancel(sc); 461 error = xchk_trans_alloc_empty(sc); 462 if (error) 463 return error; 464 465 while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) { 466 if (S_ISDIR(VFS_I(ip)->i_mode)) 467 error = xchk_nlinks_collect_dir(xnc, ip); 468 else 469 error = xchk_nlinks_collect_file(xnc, ip); 470 xchk_irele(sc, ip); 471 if (error) 472 break; 473 474 if (xchk_should_terminate(sc, &error)) 475 break; 476 } 477 xchk_iscan_iter_finish(&xnc->collect_iscan); 478 if (error) { 479 xchk_set_incomplete(sc); 480 /* 481 * If we couldn't grab an inode that was busy with a state 482 * change, change the error code so that we exit to userspace 483 * as quickly as possible. 484 */ 485 if (error == -EBUSY) 486 return -ECANCELED; 487 return error; 488 } 489 490 /* 491 * Switch out for a real transaction in preparation for building a new 492 * tree. 493 */ 494 xchk_trans_cancel(sc); 495 return xchk_setup_fs(sc); 496 } 497 498 /* 499 * Part 2: Comparing file link counters. Walk each inode and compare the link 500 * counts against our shadow information; and then walk each shadow link count 501 * structure (that wasn't covered in the first part), comparing it against the 502 * file. 503 */ 504 505 /* Read the observed link count for comparison with the actual inode. */ 506 STATIC int 507 xchk_nlinks_comparison_read( 508 struct xchk_nlink_ctrs *xnc, 509 xfs_ino_t ino, 510 struct xchk_nlink *obs) 511 { 512 struct xchk_nlink nl; 513 int error; 514 515 error = xfarray_load_sparse(xnc->nlinks, ino, &nl); 516 if (error) 517 return error; 518 519 nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN); 520 521 error = xfarray_store(xnc->nlinks, ino, &nl); 522 if (error == -EFBIG) { 523 /* 524 * EFBIG means we tried to store data at too high a byte offset 525 * in the sparse array. IOWs, we cannot complete the check and 526 * must notify userspace that the check was incomplete. This 527 * shouldn't really happen outside of the collection phase. 528 */ 529 xchk_set_incomplete(xnc->sc); 530 return -ECANCELED; 531 } 532 if (error) 533 return error; 534 535 /* Copy the counters, but do not expose the internal state. */ 536 obs->parents = nl.parents; 537 obs->backrefs = nl.backrefs; 538 obs->children = nl.children; 539 obs->flags = 0; 540 return 0; 541 } 542 543 /* Check our link count against an inode. */ 544 STATIC int 545 xchk_nlinks_compare_inode( 546 struct xchk_nlink_ctrs *xnc, 547 struct xfs_inode *ip) 548 { 549 struct xchk_nlink obs; 550 struct xfs_scrub *sc = xnc->sc; 551 uint64_t total_links; 552 unsigned int actual_nlink; 553 int error; 554 555 /* 556 * Ignore temporary files being used to stage repairs, since we assume 557 * they're correct for non-directories, and the directory repair code 558 * doesn't bump the link counts for the children. 559 */ 560 if (xrep_is_tempfile(ip)) 561 return 0; 562 563 xfs_ilock(ip, XFS_ILOCK_SHARED); 564 mutex_lock(&xnc->lock); 565 566 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 567 xchk_set_incomplete(xnc->sc); 568 error = -ECANCELED; 569 goto out_scanlock; 570 } 571 572 error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs); 573 if (error) 574 goto out_scanlock; 575 576 /* 577 * If we don't have ftype to get an accurate count of the subdirectory 578 * entries in this directory, take advantage of the fact that on a 579 * consistent ftype=0 filesystem, the number of subdirectory 580 * backreferences (dotdot entries) pointing towards this directory 581 * should be equal to the number of subdirectory entries in the 582 * directory. 583 */ 584 if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode)) 585 obs.children = obs.backrefs; 586 587 total_links = xchk_nlink_total(ip, &obs); 588 actual_nlink = VFS_I(ip)->i_nlink; 589 590 trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs); 591 592 /* 593 * If we found so many parents that we'd overflow i_nlink, we must flag 594 * this as a corruption. The VFS won't let users increase the link 595 * count, but it will let them decrease it. 596 */ 597 if (total_links > XFS_MAXLINK) { 598 xchk_ino_set_corrupt(sc, ip->i_ino); 599 goto out_corrupt; 600 } 601 602 /* Link counts should match. */ 603 if (total_links != actual_nlink) { 604 xchk_ino_set_corrupt(sc, ip->i_ino); 605 goto out_corrupt; 606 } 607 608 if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) { 609 /* 610 * The collection phase ignores directories with zero link 611 * count, so we ignore them here too. 612 * 613 * The number of subdirectory backreferences (dotdot entries) 614 * pointing towards this directory should be equal to the 615 * number of subdirectory entries in the directory. 616 */ 617 if (obs.children != obs.backrefs) 618 xchk_ino_xref_set_corrupt(sc, ip->i_ino); 619 } else { 620 /* 621 * Non-directories and unlinked directories should not have 622 * back references. 623 */ 624 if (obs.backrefs != 0) { 625 xchk_ino_set_corrupt(sc, ip->i_ino); 626 goto out_corrupt; 627 } 628 629 /* 630 * Non-directories and unlinked directories should not have 631 * children. 632 */ 633 if (obs.children != 0) { 634 xchk_ino_set_corrupt(sc, ip->i_ino); 635 goto out_corrupt; 636 } 637 } 638 639 if (ip == sc->mp->m_rootip) { 640 /* 641 * For the root of a directory tree, both the '.' and '..' 642 * entries should point to the root directory. The dotdot 643 * entry is counted as a parent of the root /and/ a backref of 644 * the root directory. 645 */ 646 if (obs.parents != 1) { 647 xchk_ino_set_corrupt(sc, ip->i_ino); 648 goto out_corrupt; 649 } 650 } else if (actual_nlink > 0) { 651 /* 652 * Linked files that are not the root directory should have at 653 * least one parent. 654 */ 655 if (obs.parents == 0) { 656 xchk_ino_set_corrupt(sc, ip->i_ino); 657 goto out_corrupt; 658 } 659 } 660 661 out_corrupt: 662 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 663 error = -ECANCELED; 664 out_scanlock: 665 mutex_unlock(&xnc->lock); 666 xfs_iunlock(ip, XFS_ILOCK_SHARED); 667 return error; 668 } 669 670 /* 671 * Check our link count against an inode that wasn't checked previously. This 672 * is intended to catch directories with dangling links, though we could be 673 * racing with inode allocation in other threads. 674 */ 675 STATIC int 676 xchk_nlinks_compare_inum( 677 struct xchk_nlink_ctrs *xnc, 678 xfs_ino_t ino) 679 { 680 struct xchk_nlink obs; 681 struct xfs_mount *mp = xnc->sc->mp; 682 struct xfs_trans *tp = xnc->sc->tp; 683 struct xfs_buf *agi_bp; 684 struct xfs_inode *ip; 685 int error; 686 687 /* 688 * The first iget failed, so try again with the variant that returns 689 * either an incore inode or the AGI buffer. If the function returns 690 * EINVAL/ENOENT, it should have passed us the AGI buffer so that we 691 * can guarantee that the inode won't be allocated while we check for 692 * a zero link count in the observed link count data. 693 */ 694 error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip); 695 if (!error) { 696 /* Actually got an inode, so use the inode compare. */ 697 error = xchk_nlinks_compare_inode(xnc, ip); 698 xchk_irele(xnc->sc, ip); 699 return error; 700 } 701 if (error == -ENOENT || error == -EINVAL) { 702 /* No inode was found. Check for zero link count below. */ 703 error = 0; 704 } 705 if (error) 706 goto out_agi; 707 708 /* Ensure that we have protected against inode allocation/freeing. */ 709 if (agi_bp == NULL) { 710 ASSERT(agi_bp != NULL); 711 xchk_set_incomplete(xnc->sc); 712 return -ECANCELED; 713 } 714 715 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 716 xchk_set_incomplete(xnc->sc); 717 error = -ECANCELED; 718 goto out_agi; 719 } 720 721 mutex_lock(&xnc->lock); 722 error = xchk_nlinks_comparison_read(xnc, ino, &obs); 723 if (error) 724 goto out_scanlock; 725 726 trace_xchk_nlinks_check_zero(mp, ino, &obs); 727 728 /* 729 * If we can't grab the inode, the link count had better be zero. We 730 * still hold the AGI to prevent inode allocation/freeing. 731 */ 732 if (xchk_nlink_total(NULL, &obs) != 0) { 733 xchk_ino_set_corrupt(xnc->sc, ino); 734 error = -ECANCELED; 735 } 736 737 out_scanlock: 738 mutex_unlock(&xnc->lock); 739 out_agi: 740 if (agi_bp) 741 xfs_trans_brelse(tp, agi_bp); 742 return error; 743 } 744 745 /* 746 * Try to visit every inode in the filesystem to compare the link count. Move 747 * on if we can't grab an inode, since we'll revisit unchecked nlink records in 748 * the second part. 749 */ 750 static int 751 xchk_nlinks_compare_iter( 752 struct xchk_nlink_ctrs *xnc, 753 struct xfs_inode **ipp) 754 { 755 int error; 756 757 do { 758 error = xchk_iscan_iter(&xnc->compare_iscan, ipp); 759 } while (error == -EBUSY); 760 761 return error; 762 } 763 764 /* Compare the link counts we observed against the live information. */ 765 STATIC int 766 xchk_nlinks_compare( 767 struct xchk_nlink_ctrs *xnc) 768 { 769 struct xchk_nlink nl; 770 struct xfs_scrub *sc = xnc->sc; 771 struct xfs_inode *ip; 772 xfarray_idx_t cur = XFARRAY_CURSOR_INIT; 773 int error; 774 775 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 776 return 0; 777 778 /* 779 * Create a new empty transaction so that we can advance the iscan 780 * cursor without deadlocking if the inobt has a cycle and push on the 781 * inactivation workqueue. 782 */ 783 xchk_trans_cancel(sc); 784 error = xchk_trans_alloc_empty(sc); 785 if (error) 786 return error; 787 788 /* 789 * Use the inobt to walk all allocated inodes to compare the link 790 * counts. Inodes skipped by _compare_iter will be tried again in the 791 * next phase of the scan. 792 */ 793 xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan); 794 while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) { 795 error = xchk_nlinks_compare_inode(xnc, ip); 796 xchk_iscan_mark_visited(&xnc->compare_iscan, ip); 797 xchk_irele(sc, ip); 798 if (error) 799 break; 800 801 if (xchk_should_terminate(sc, &error)) 802 break; 803 } 804 xchk_iscan_iter_finish(&xnc->compare_iscan); 805 xchk_iscan_teardown(&xnc->compare_iscan); 806 if (error) 807 return error; 808 809 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 810 return 0; 811 812 /* 813 * Walk all the non-null nlink observations that weren't checked in the 814 * previous step. 815 */ 816 mutex_lock(&xnc->lock); 817 while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) { 818 xfs_ino_t ino = cur - 1; 819 820 if (nl.flags & XCHK_NLINK_COMPARE_SCANNED) 821 continue; 822 823 mutex_unlock(&xnc->lock); 824 825 error = xchk_nlinks_compare_inum(xnc, ino); 826 if (error) 827 return error; 828 829 if (xchk_should_terminate(xnc->sc, &error)) 830 return error; 831 832 mutex_lock(&xnc->lock); 833 } 834 mutex_unlock(&xnc->lock); 835 836 return error; 837 } 838 839 /* Tear down everything associated with a nlinks check. */ 840 static void 841 xchk_nlinks_teardown_scan( 842 void *priv) 843 { 844 struct xchk_nlink_ctrs *xnc = priv; 845 846 /* Discourage any hook functions that might be running. */ 847 xchk_iscan_abort(&xnc->collect_iscan); 848 849 xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook); 850 851 xfarray_destroy(xnc->nlinks); 852 xnc->nlinks = NULL; 853 854 xchk_iscan_teardown(&xnc->collect_iscan); 855 mutex_destroy(&xnc->lock); 856 xnc->sc = NULL; 857 } 858 859 /* 860 * Scan all inodes in the entire filesystem to generate link count data. If 861 * the scan is successful, the counts will be left alive for a repair. If any 862 * error occurs, we'll tear everything down. 863 */ 864 STATIC int 865 xchk_nlinks_setup_scan( 866 struct xfs_scrub *sc, 867 struct xchk_nlink_ctrs *xnc) 868 { 869 struct xfs_mount *mp = sc->mp; 870 char *descr; 871 unsigned long long max_inos; 872 xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1; 873 xfs_agino_t first_agino, last_agino; 874 int error; 875 876 ASSERT(xnc->sc == NULL); 877 xnc->sc = sc; 878 879 mutex_init(&xnc->lock); 880 881 /* Retry iget every tenth of a second for up to 30 seconds. */ 882 xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan); 883 884 /* 885 * Set up enough space to store an nlink record for the highest 886 * possible inode number in this system. 887 */ 888 xfs_agino_range(mp, last_agno, &first_agino, &last_agino); 889 max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1; 890 descr = xchk_xfile_descr(sc, "file link counts"); 891 error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos), 892 sizeof(struct xchk_nlink), &xnc->nlinks); 893 kfree(descr); 894 if (error) 895 goto out_teardown; 896 897 /* 898 * Hook into the directory entry code so that we can capture updates to 899 * file link counts. The hook only triggers for inodes that were 900 * already scanned, and the scanner thread takes each inode's ILOCK, 901 * which means that any in-progress inode updates will finish before we 902 * can scan the inode. 903 */ 904 ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); 905 xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update); 906 error = xfs_dir_hook_add(mp, &xnc->dhook); 907 if (error) 908 goto out_teardown; 909 910 /* Use deferred cleanup to pass the inode link count data to repair. */ 911 sc->buf_cleanup = xchk_nlinks_teardown_scan; 912 return 0; 913 914 out_teardown: 915 xchk_nlinks_teardown_scan(xnc); 916 return error; 917 } 918 919 /* Scrub the link count of all inodes on the filesystem. */ 920 int 921 xchk_nlinks( 922 struct xfs_scrub *sc) 923 { 924 struct xchk_nlink_ctrs *xnc = sc->buf; 925 int error = 0; 926 927 /* Set ourselves up to check link counts on the live filesystem. */ 928 error = xchk_nlinks_setup_scan(sc, xnc); 929 if (error) 930 return error; 931 932 /* Walk all inodes, picking up link count information. */ 933 error = xchk_nlinks_collect(xnc); 934 if (!xchk_xref_process_error(sc, 0, 0, &error)) 935 return error; 936 937 /* Fail fast if we're not playing with a full dataset. */ 938 if (xchk_iscan_aborted(&xnc->collect_iscan)) 939 xchk_set_incomplete(sc); 940 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) 941 return 0; 942 943 /* Compare link counts. */ 944 error = xchk_nlinks_compare(xnc); 945 if (!xchk_xref_process_error(sc, 0, 0, &error)) 946 return error; 947 948 /* Check one last time for an incomplete dataset. */ 949 if (xchk_iscan_aborted(&xnc->collect_iscan)) 950 xchk_set_incomplete(sc); 951 952 return 0; 953 } 954