1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_icache.h" 16 #include "xfs_iwalk.h" 17 #include "xfs_ialloc.h" 18 #include "xfs_dir2.h" 19 #include "xfs_dir2_priv.h" 20 #include "xfs_ag.h" 21 #include "scrub/scrub.h" 22 #include "scrub/common.h" 23 #include "scrub/repair.h" 24 #include "scrub/xfile.h" 25 #include "scrub/xfarray.h" 26 #include "scrub/iscan.h" 27 #include "scrub/orphanage.h" 28 #include "scrub/nlinks.h" 29 #include "scrub/trace.h" 30 #include "scrub/readdir.h" 31 #include "scrub/tempfile.h" 32 33 /* 34 * Live Inode Link Count Checking 35 * ============================== 36 * 37 * Inode link counts are "summary" metadata, in the sense that they are 38 * computed as the number of directory entries referencing each file on the 39 * filesystem. Therefore, we compute the correct link counts by creating a 40 * shadow link count structure and walking every inode. 41 */ 42 43 /* Set us up to scrub inode link counts. */ 44 int 45 xchk_setup_nlinks( 46 struct xfs_scrub *sc) 47 { 48 struct xchk_nlink_ctrs *xnc; 49 int error; 50 51 xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); 52 53 if (xchk_could_repair(sc)) { 54 error = xrep_setup_nlinks(sc); 55 if (error) 56 return error; 57 } 58 59 xnc = kvzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS); 60 if (!xnc) 61 return -ENOMEM; 62 xnc->xname.name = xnc->namebuf; 63 xnc->sc = sc; 64 sc->buf = xnc; 65 66 return xchk_setup_fs(sc); 67 } 68 69 /* 70 * Part 1: Collecting file link counts. For each file, we create a shadow link 71 * counting structure, then walk the entire directory tree, incrementing parent 72 * and child link counts for each directory entry seen. 73 * 74 * To avoid false corruption reports in part 2, any failure in this part must 75 * set the INCOMPLETE flag even when a negative errno is returned. This care 76 * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, 77 * ECANCELED) that are absorbed into a scrub state flag update by 78 * xchk_*_process_error. Scrub and repair share the same incore data 79 * structures, so the INCOMPLETE flag is critical to prevent a repair based on 80 * insufficient information. 81 * 82 * Because we are scanning a live filesystem, it's possible that another thread 83 * will try to update the link counts for an inode that we've already scanned. 84 * This will cause our counts to be incorrect. Therefore, we hook all 85 * directory entry updates because that is when link count updates occur. By 86 * shadowing transaction updates in this manner, live nlink check can ensure by 87 * locking the inode and the shadow structure that its own copies are not out 88 * of date. Because the hook code runs in a different process context from the 89 * scrub code and the scrub state flags are not accessed atomically, failures 90 * in the hook code must abort the iscan and the scrubber must notice the 91 * aborted scan and set the incomplete flag. 92 * 93 * Note that we use jump labels and srcu notifier hooks to minimize the 94 * overhead when live nlinks is /not/ running. Locking order for nlink 95 * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock. 96 */ 97 98 /* 99 * Add a delta to an nlink counter, clamping the value to U32_MAX. Because 100 * XFS_MAXLINK < U32_MAX, the checking code will produce the correct results 101 * even if we lose some precision. 102 */ 103 static inline void 104 careful_add( 105 xfs_nlink_t *nlinkp, 106 int delta) 107 { 108 uint64_t new_value = (uint64_t)(*nlinkp) + delta; 109 110 BUILD_BUG_ON(XFS_MAXLINK > U32_MAX); 111 *nlinkp = min_t(uint64_t, new_value, U32_MAX); 112 } 113 114 /* Update incore link count information. Caller must hold the nlinks lock. */ 115 STATIC int 116 xchk_nlinks_update_incore( 117 struct xchk_nlink_ctrs *xnc, 118 xfs_ino_t ino, 119 int parents_delta, 120 int backrefs_delta, 121 int children_delta) 122 { 123 struct xchk_nlink nl; 124 int error; 125 126 if (!xnc->nlinks) 127 return 0; 128 129 error = xfarray_load_sparse(xnc->nlinks, ino, &nl); 130 if (error) 131 return error; 132 133 trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta, 134 backrefs_delta, children_delta); 135 136 careful_add(&nl.parents, parents_delta); 137 careful_add(&nl.backrefs, backrefs_delta); 138 careful_add(&nl.children, children_delta); 139 140 nl.flags |= XCHK_NLINK_WRITTEN; 141 error = xfarray_store(xnc->nlinks, ino, &nl); 142 if (error == -EFBIG) { 143 /* 144 * EFBIG means we tried to store data at too high a byte offset 145 * in the sparse array. IOWs, we cannot complete the check and 146 * must notify userspace that the check was incomplete. 147 */ 148 error = -ECANCELED; 149 } 150 return error; 151 } 152 153 /* 154 * Apply a link count change from the regular filesystem into our shadow link 155 * count structure based on a directory update in progress. 156 */ 157 STATIC int 158 xchk_nlinks_live_update( 159 struct notifier_block *nb, 160 unsigned long action, 161 void *data) 162 { 163 struct xfs_dir_update_params *p = data; 164 struct xchk_nlink_ctrs *xnc; 165 int error; 166 167 xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb); 168 169 /* 170 * Ignore temporary directories being used to stage dir repairs, since 171 * we don't bump the link counts of the children. 172 */ 173 if (xrep_is_tempfile(p->dp)) 174 return NOTIFY_DONE; 175 176 trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino, 177 p->delta, p->name->name, p->name->len); 178 179 /* 180 * If we've already scanned @dp, update the number of parents that link 181 * to @ip. If @ip is a subdirectory, update the number of child links 182 * going out of @dp. 183 */ 184 if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) { 185 mutex_lock(&xnc->lock); 186 error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta, 187 0, 0); 188 if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode)) 189 error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, 190 0, p->delta); 191 mutex_unlock(&xnc->lock); 192 if (error) 193 goto out_abort; 194 } 195 196 /* 197 * If @ip is a subdirectory and we've already scanned it, update the 198 * number of backrefs pointing to @dp. 199 */ 200 if (S_ISDIR(VFS_IC(p->ip)->i_mode) && 201 xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) { 202 mutex_lock(&xnc->lock); 203 error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, 204 p->delta, 0); 205 mutex_unlock(&xnc->lock); 206 if (error) 207 goto out_abort; 208 } 209 210 return NOTIFY_DONE; 211 212 out_abort: 213 xchk_iscan_abort(&xnc->collect_iscan); 214 return NOTIFY_DONE; 215 } 216 217 /* Bump the observed link count for the inode referenced by this entry. */ 218 STATIC int 219 xchk_nlinks_collect_dirent( 220 struct xfs_scrub *sc, 221 struct xfs_inode *dp, 222 xfs_dir2_dataptr_t dapos, 223 const struct xfs_name *name, 224 xfs_ino_t ino, 225 void *priv) 226 { 227 struct xchk_nlink_ctrs *xnc = priv; 228 bool dot = false, dotdot = false; 229 int error; 230 231 /* Does this name make sense? */ 232 if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) { 233 error = -ECANCELED; 234 goto out_abort; 235 } 236 237 if (name->len == 1 && name->name[0] == '.') 238 dot = true; 239 else if (name->len == 2 && name->name[0] == '.' && 240 name->name[1] == '.') 241 dotdot = true; 242 243 /* Don't accept a '.' entry that points somewhere else. */ 244 if (dot && ino != dp->i_ino) { 245 error = -ECANCELED; 246 goto out_abort; 247 } 248 249 /* Don't accept an invalid inode number. */ 250 if (!xfs_verify_dir_ino(sc->mp, ino)) { 251 error = -ECANCELED; 252 goto out_abort; 253 } 254 255 /* Update the shadow link counts if we haven't already failed. */ 256 257 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 258 error = -ECANCELED; 259 goto out_incomplete; 260 } 261 262 trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name); 263 264 mutex_lock(&xnc->lock); 265 266 /* 267 * If this is a dotdot entry, it is a back link from dp to ino. How 268 * we handle this depends on whether or not dp is the root directory. 269 * 270 * The root directory is its own parent, so we pretend the dotdot entry 271 * establishes the "parent" of the root directory. Increment the 272 * number of parents of the root directory. 273 * 274 * Otherwise, increment the number of backrefs pointing back to ino. 275 */ 276 if (dotdot) { 277 if (dp == sc->mp->m_rootip) 278 error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 279 else 280 error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0); 281 if (error) 282 goto out_unlock; 283 } 284 285 /* 286 * If this dirent is a forward link from dp to ino, increment the 287 * number of parents linking into ino. 288 */ 289 if (!dot && !dotdot) { 290 error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 291 if (error) 292 goto out_unlock; 293 } 294 295 /* 296 * If this dirent is a forward link to a subdirectory, increment the 297 * number of child links of dp. 298 */ 299 if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) { 300 error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1); 301 if (error) 302 goto out_unlock; 303 } 304 305 mutex_unlock(&xnc->lock); 306 return 0; 307 308 out_unlock: 309 mutex_unlock(&xnc->lock); 310 out_abort: 311 xchk_iscan_abort(&xnc->collect_iscan); 312 out_incomplete: 313 xchk_set_incomplete(sc); 314 return error; 315 } 316 317 /* Walk a directory to bump the observed link counts of the children. */ 318 STATIC int 319 xchk_nlinks_collect_dir( 320 struct xchk_nlink_ctrs *xnc, 321 struct xfs_inode *dp) 322 { 323 struct xfs_scrub *sc = xnc->sc; 324 unsigned int lock_mode; 325 int error = 0; 326 327 /* 328 * Ignore temporary directories being used to stage dir repairs, since 329 * we don't bump the link counts of the children. 330 */ 331 if (xrep_is_tempfile(dp)) 332 return 0; 333 334 /* Prevent anyone from changing this directory while we walk it. */ 335 xfs_ilock(dp, XFS_IOLOCK_SHARED); 336 lock_mode = xfs_ilock_data_map_shared(dp); 337 338 /* 339 * The dotdot entry of an unlinked directory still points to the last 340 * parent, but the parent no longer links to this directory. Skip the 341 * directory to avoid overcounting. 342 */ 343 if (VFS_I(dp)->i_nlink == 0) 344 goto out_unlock; 345 346 /* 347 * We cannot count file links if the directory looks as though it has 348 * been zapped by the inode record repair code. 349 */ 350 if (xchk_dir_looks_zapped(dp)) { 351 error = -EBUSY; 352 goto out_abort; 353 } 354 355 error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc); 356 if (error == -ECANCELED) { 357 error = 0; 358 goto out_unlock; 359 } 360 if (error) 361 goto out_abort; 362 363 xchk_iscan_mark_visited(&xnc->collect_iscan, dp); 364 goto out_unlock; 365 366 out_abort: 367 xchk_set_incomplete(sc); 368 xchk_iscan_abort(&xnc->collect_iscan); 369 out_unlock: 370 xfs_iunlock(dp, lock_mode); 371 xfs_iunlock(dp, XFS_IOLOCK_SHARED); 372 return error; 373 } 374 375 /* If this looks like a valid pointer, count it. */ 376 static inline int 377 xchk_nlinks_collect_metafile( 378 struct xchk_nlink_ctrs *xnc, 379 xfs_ino_t ino) 380 { 381 if (!xfs_verify_ino(xnc->sc->mp, ino)) 382 return 0; 383 384 trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino); 385 return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 386 } 387 388 /* Bump the link counts of metadata files rooted in the superblock. */ 389 STATIC int 390 xchk_nlinks_collect_metafiles( 391 struct xchk_nlink_ctrs *xnc) 392 { 393 struct xfs_mount *mp = xnc->sc->mp; 394 int error = -ECANCELED; 395 396 397 if (xchk_iscan_aborted(&xnc->collect_iscan)) 398 goto out_incomplete; 399 400 mutex_lock(&xnc->lock); 401 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino); 402 if (error) 403 goto out_abort; 404 405 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino); 406 if (error) 407 goto out_abort; 408 409 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino); 410 if (error) 411 goto out_abort; 412 413 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino); 414 if (error) 415 goto out_abort; 416 417 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino); 418 if (error) 419 goto out_abort; 420 mutex_unlock(&xnc->lock); 421 422 return 0; 423 424 out_abort: 425 mutex_unlock(&xnc->lock); 426 xchk_iscan_abort(&xnc->collect_iscan); 427 out_incomplete: 428 xchk_set_incomplete(xnc->sc); 429 return error; 430 } 431 432 /* Advance the collection scan cursor for this non-directory file. */ 433 static inline int 434 xchk_nlinks_collect_file( 435 struct xchk_nlink_ctrs *xnc, 436 struct xfs_inode *ip) 437 { 438 xfs_ilock(ip, XFS_IOLOCK_SHARED); 439 xchk_iscan_mark_visited(&xnc->collect_iscan, ip); 440 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 441 return 0; 442 } 443 444 /* Walk all directories and count inode links. */ 445 STATIC int 446 xchk_nlinks_collect( 447 struct xchk_nlink_ctrs *xnc) 448 { 449 struct xfs_scrub *sc = xnc->sc; 450 struct xfs_inode *ip; 451 int error; 452 453 /* Count the rt and quota files that are rooted in the superblock. */ 454 error = xchk_nlinks_collect_metafiles(xnc); 455 if (error) 456 return error; 457 458 /* 459 * Set up for a potentially lengthy filesystem scan by reducing our 460 * transaction resource usage for the duration. Specifically: 461 * 462 * Cancel the transaction to release the log grant space while we scan 463 * the filesystem. 464 * 465 * Create a new empty transaction to eliminate the possibility of the 466 * inode scan deadlocking on cyclical metadata. 467 * 468 * We pass the empty transaction to the file scanning function to avoid 469 * repeatedly cycling empty transactions. This can be done even though 470 * we take the IOLOCK to quiesce the file because empty transactions 471 * do not take sb_internal. 472 */ 473 xchk_trans_cancel(sc); 474 error = xchk_trans_alloc_empty(sc); 475 if (error) 476 return error; 477 478 while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) { 479 if (S_ISDIR(VFS_I(ip)->i_mode)) 480 error = xchk_nlinks_collect_dir(xnc, ip); 481 else 482 error = xchk_nlinks_collect_file(xnc, ip); 483 xchk_irele(sc, ip); 484 if (error) 485 break; 486 487 if (xchk_should_terminate(sc, &error)) 488 break; 489 } 490 xchk_iscan_iter_finish(&xnc->collect_iscan); 491 if (error) { 492 xchk_set_incomplete(sc); 493 /* 494 * If we couldn't grab an inode that was busy with a state 495 * change, change the error code so that we exit to userspace 496 * as quickly as possible. 497 */ 498 if (error == -EBUSY) 499 return -ECANCELED; 500 return error; 501 } 502 503 /* 504 * Switch out for a real transaction in preparation for building a new 505 * tree. 506 */ 507 xchk_trans_cancel(sc); 508 return xchk_setup_fs(sc); 509 } 510 511 /* 512 * Part 2: Comparing file link counters. Walk each inode and compare the link 513 * counts against our shadow information; and then walk each shadow link count 514 * structure (that wasn't covered in the first part), comparing it against the 515 * file. 516 */ 517 518 /* Read the observed link count for comparison with the actual inode. */ 519 STATIC int 520 xchk_nlinks_comparison_read( 521 struct xchk_nlink_ctrs *xnc, 522 xfs_ino_t ino, 523 struct xchk_nlink *obs) 524 { 525 struct xchk_nlink nl; 526 int error; 527 528 error = xfarray_load_sparse(xnc->nlinks, ino, &nl); 529 if (error) 530 return error; 531 532 nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN); 533 534 error = xfarray_store(xnc->nlinks, ino, &nl); 535 if (error == -EFBIG) { 536 /* 537 * EFBIG means we tried to store data at too high a byte offset 538 * in the sparse array. IOWs, we cannot complete the check and 539 * must notify userspace that the check was incomplete. This 540 * shouldn't really happen outside of the collection phase. 541 */ 542 xchk_set_incomplete(xnc->sc); 543 return -ECANCELED; 544 } 545 if (error) 546 return error; 547 548 /* Copy the counters, but do not expose the internal state. */ 549 obs->parents = nl.parents; 550 obs->backrefs = nl.backrefs; 551 obs->children = nl.children; 552 obs->flags = 0; 553 return 0; 554 } 555 556 /* Check our link count against an inode. */ 557 STATIC int 558 xchk_nlinks_compare_inode( 559 struct xchk_nlink_ctrs *xnc, 560 struct xfs_inode *ip) 561 { 562 struct xchk_nlink obs; 563 struct xfs_scrub *sc = xnc->sc; 564 uint64_t total_links; 565 unsigned int actual_nlink; 566 int error; 567 568 /* 569 * Ignore temporary files being used to stage repairs, since we assume 570 * they're correct for non-directories, and the directory repair code 571 * doesn't bump the link counts for the children. 572 */ 573 if (xrep_is_tempfile(ip)) 574 return 0; 575 576 xfs_ilock(ip, XFS_ILOCK_SHARED); 577 mutex_lock(&xnc->lock); 578 579 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 580 xchk_set_incomplete(xnc->sc); 581 error = -ECANCELED; 582 goto out_scanlock; 583 } 584 585 error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs); 586 if (error) 587 goto out_scanlock; 588 589 /* 590 * If we don't have ftype to get an accurate count of the subdirectory 591 * entries in this directory, take advantage of the fact that on a 592 * consistent ftype=0 filesystem, the number of subdirectory 593 * backreferences (dotdot entries) pointing towards this directory 594 * should be equal to the number of subdirectory entries in the 595 * directory. 596 */ 597 if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode)) 598 obs.children = obs.backrefs; 599 600 total_links = xchk_nlink_total(ip, &obs); 601 actual_nlink = VFS_I(ip)->i_nlink; 602 603 trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs); 604 605 /* 606 * If we found so many parents that we'd overflow i_nlink, we must flag 607 * this as a corruption. The VFS won't let users increase the link 608 * count, but it will let them decrease it. 609 */ 610 if (total_links > XFS_NLINK_PINNED) { 611 xchk_ino_set_corrupt(sc, ip->i_ino); 612 goto out_corrupt; 613 } else if (total_links > XFS_MAXLINK) { 614 xchk_ino_set_warning(sc, ip->i_ino); 615 } 616 617 /* Link counts should match. */ 618 if (total_links != actual_nlink) { 619 xchk_ino_set_corrupt(sc, ip->i_ino); 620 goto out_corrupt; 621 } 622 623 if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) { 624 /* 625 * The collection phase ignores directories with zero link 626 * count, so we ignore them here too. 627 * 628 * The number of subdirectory backreferences (dotdot entries) 629 * pointing towards this directory should be equal to the 630 * number of subdirectory entries in the directory. 631 */ 632 if (obs.children != obs.backrefs) 633 xchk_ino_xref_set_corrupt(sc, ip->i_ino); 634 } else { 635 /* 636 * Non-directories and unlinked directories should not have 637 * back references. 638 */ 639 if (obs.backrefs != 0) { 640 xchk_ino_set_corrupt(sc, ip->i_ino); 641 goto out_corrupt; 642 } 643 644 /* 645 * Non-directories and unlinked directories should not have 646 * children. 647 */ 648 if (obs.children != 0) { 649 xchk_ino_set_corrupt(sc, ip->i_ino); 650 goto out_corrupt; 651 } 652 } 653 654 if (ip == sc->mp->m_rootip) { 655 /* 656 * For the root of a directory tree, both the '.' and '..' 657 * entries should point to the root directory. The dotdot 658 * entry is counted as a parent of the root /and/ a backref of 659 * the root directory. 660 */ 661 if (obs.parents != 1) { 662 xchk_ino_set_corrupt(sc, ip->i_ino); 663 goto out_corrupt; 664 } 665 } else if (actual_nlink > 0) { 666 /* 667 * Linked files that are not the root directory should have at 668 * least one parent. 669 */ 670 if (obs.parents == 0) { 671 xchk_ino_set_corrupt(sc, ip->i_ino); 672 goto out_corrupt; 673 } 674 } 675 676 out_corrupt: 677 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 678 error = -ECANCELED; 679 out_scanlock: 680 mutex_unlock(&xnc->lock); 681 xfs_iunlock(ip, XFS_ILOCK_SHARED); 682 return error; 683 } 684 685 /* 686 * Check our link count against an inode that wasn't checked previously. This 687 * is intended to catch directories with dangling links, though we could be 688 * racing with inode allocation in other threads. 689 */ 690 STATIC int 691 xchk_nlinks_compare_inum( 692 struct xchk_nlink_ctrs *xnc, 693 xfs_ino_t ino) 694 { 695 struct xchk_nlink obs; 696 struct xfs_mount *mp = xnc->sc->mp; 697 struct xfs_trans *tp = xnc->sc->tp; 698 struct xfs_buf *agi_bp; 699 struct xfs_inode *ip; 700 int error; 701 702 /* 703 * The first iget failed, so try again with the variant that returns 704 * either an incore inode or the AGI buffer. If the function returns 705 * EINVAL/ENOENT, it should have passed us the AGI buffer so that we 706 * can guarantee that the inode won't be allocated while we check for 707 * a zero link count in the observed link count data. 708 */ 709 error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip); 710 if (!error) { 711 /* Actually got an inode, so use the inode compare. */ 712 error = xchk_nlinks_compare_inode(xnc, ip); 713 xchk_irele(xnc->sc, ip); 714 return error; 715 } 716 if (error == -ENOENT || error == -EINVAL) { 717 /* No inode was found. Check for zero link count below. */ 718 error = 0; 719 } 720 if (error) 721 goto out_agi; 722 723 /* Ensure that we have protected against inode allocation/freeing. */ 724 if (agi_bp == NULL) { 725 ASSERT(agi_bp != NULL); 726 xchk_set_incomplete(xnc->sc); 727 return -ECANCELED; 728 } 729 730 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 731 xchk_set_incomplete(xnc->sc); 732 error = -ECANCELED; 733 goto out_agi; 734 } 735 736 mutex_lock(&xnc->lock); 737 error = xchk_nlinks_comparison_read(xnc, ino, &obs); 738 if (error) 739 goto out_scanlock; 740 741 trace_xchk_nlinks_check_zero(mp, ino, &obs); 742 743 /* 744 * If we can't grab the inode, the link count had better be zero. We 745 * still hold the AGI to prevent inode allocation/freeing. 746 */ 747 if (xchk_nlink_total(NULL, &obs) != 0) { 748 xchk_ino_set_corrupt(xnc->sc, ino); 749 error = -ECANCELED; 750 } 751 752 out_scanlock: 753 mutex_unlock(&xnc->lock); 754 out_agi: 755 if (agi_bp) 756 xfs_trans_brelse(tp, agi_bp); 757 return error; 758 } 759 760 /* 761 * Try to visit every inode in the filesystem to compare the link count. Move 762 * on if we can't grab an inode, since we'll revisit unchecked nlink records in 763 * the second part. 764 */ 765 static int 766 xchk_nlinks_compare_iter( 767 struct xchk_nlink_ctrs *xnc, 768 struct xfs_inode **ipp) 769 { 770 int error; 771 772 do { 773 error = xchk_iscan_iter(&xnc->compare_iscan, ipp); 774 } while (error == -EBUSY); 775 776 return error; 777 } 778 779 /* Compare the link counts we observed against the live information. */ 780 STATIC int 781 xchk_nlinks_compare( 782 struct xchk_nlink_ctrs *xnc) 783 { 784 struct xchk_nlink nl; 785 struct xfs_scrub *sc = xnc->sc; 786 struct xfs_inode *ip; 787 xfarray_idx_t cur = XFARRAY_CURSOR_INIT; 788 int error; 789 790 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 791 return 0; 792 793 /* 794 * Create a new empty transaction so that we can advance the iscan 795 * cursor without deadlocking if the inobt has a cycle and push on the 796 * inactivation workqueue. 797 */ 798 xchk_trans_cancel(sc); 799 error = xchk_trans_alloc_empty(sc); 800 if (error) 801 return error; 802 803 /* 804 * Use the inobt to walk all allocated inodes to compare the link 805 * counts. Inodes skipped by _compare_iter will be tried again in the 806 * next phase of the scan. 807 */ 808 xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan); 809 while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) { 810 error = xchk_nlinks_compare_inode(xnc, ip); 811 xchk_iscan_mark_visited(&xnc->compare_iscan, ip); 812 xchk_irele(sc, ip); 813 if (error) 814 break; 815 816 if (xchk_should_terminate(sc, &error)) 817 break; 818 } 819 xchk_iscan_iter_finish(&xnc->compare_iscan); 820 xchk_iscan_teardown(&xnc->compare_iscan); 821 if (error) 822 return error; 823 824 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 825 return 0; 826 827 /* 828 * Walk all the non-null nlink observations that weren't checked in the 829 * previous step. 830 */ 831 mutex_lock(&xnc->lock); 832 while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) { 833 xfs_ino_t ino = cur - 1; 834 835 if (nl.flags & XCHK_NLINK_COMPARE_SCANNED) 836 continue; 837 838 mutex_unlock(&xnc->lock); 839 840 error = xchk_nlinks_compare_inum(xnc, ino); 841 if (error) 842 return error; 843 844 if (xchk_should_terminate(xnc->sc, &error)) 845 return error; 846 847 mutex_lock(&xnc->lock); 848 } 849 mutex_unlock(&xnc->lock); 850 851 return error; 852 } 853 854 /* Tear down everything associated with a nlinks check. */ 855 static void 856 xchk_nlinks_teardown_scan( 857 void *priv) 858 { 859 struct xchk_nlink_ctrs *xnc = priv; 860 861 /* Discourage any hook functions that might be running. */ 862 xchk_iscan_abort(&xnc->collect_iscan); 863 864 xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook); 865 866 xfarray_destroy(xnc->nlinks); 867 xnc->nlinks = NULL; 868 869 xchk_iscan_teardown(&xnc->collect_iscan); 870 mutex_destroy(&xnc->lock); 871 xnc->sc = NULL; 872 } 873 874 /* 875 * Scan all inodes in the entire filesystem to generate link count data. If 876 * the scan is successful, the counts will be left alive for a repair. If any 877 * error occurs, we'll tear everything down. 878 */ 879 STATIC int 880 xchk_nlinks_setup_scan( 881 struct xfs_scrub *sc, 882 struct xchk_nlink_ctrs *xnc) 883 { 884 struct xfs_mount *mp = sc->mp; 885 char *descr; 886 unsigned long long max_inos; 887 xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1; 888 xfs_agino_t first_agino, last_agino; 889 int error; 890 891 mutex_init(&xnc->lock); 892 893 /* Retry iget every tenth of a second for up to 30 seconds. */ 894 xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan); 895 896 /* 897 * Set up enough space to store an nlink record for the highest 898 * possible inode number in this system. 899 */ 900 xfs_agino_range(mp, last_agno, &first_agino, &last_agino); 901 max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1; 902 descr = xchk_xfile_descr(sc, "file link counts"); 903 error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos), 904 sizeof(struct xchk_nlink), &xnc->nlinks); 905 kfree(descr); 906 if (error) 907 goto out_teardown; 908 909 /* 910 * Hook into the directory entry code so that we can capture updates to 911 * file link counts. The hook only triggers for inodes that were 912 * already scanned, and the scanner thread takes each inode's ILOCK, 913 * which means that any in-progress inode updates will finish before we 914 * can scan the inode. 915 */ 916 ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); 917 xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update); 918 error = xfs_dir_hook_add(mp, &xnc->dhook); 919 if (error) 920 goto out_teardown; 921 922 /* Use deferred cleanup to pass the inode link count data to repair. */ 923 sc->buf_cleanup = xchk_nlinks_teardown_scan; 924 return 0; 925 926 out_teardown: 927 xchk_nlinks_teardown_scan(xnc); 928 return error; 929 } 930 931 /* Scrub the link count of all inodes on the filesystem. */ 932 int 933 xchk_nlinks( 934 struct xfs_scrub *sc) 935 { 936 struct xchk_nlink_ctrs *xnc = sc->buf; 937 int error = 0; 938 939 /* Set ourselves up to check link counts on the live filesystem. */ 940 error = xchk_nlinks_setup_scan(sc, xnc); 941 if (error) 942 return error; 943 944 /* Walk all inodes, picking up link count information. */ 945 error = xchk_nlinks_collect(xnc); 946 if (!xchk_xref_process_error(sc, 0, 0, &error)) 947 return error; 948 949 /* Fail fast if we're not playing with a full dataset. */ 950 if (xchk_iscan_aborted(&xnc->collect_iscan)) 951 xchk_set_incomplete(sc); 952 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) 953 return 0; 954 955 /* Compare link counts. */ 956 error = xchk_nlinks_compare(xnc); 957 if (!xchk_xref_process_error(sc, 0, 0, &error)) 958 return error; 959 960 /* Check one last time for an incomplete dataset. */ 961 if (xchk_iscan_aborted(&xnc->collect_iscan)) 962 xchk_set_incomplete(sc); 963 964 return 0; 965 } 966