1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_icache.h" 16 #include "xfs_iwalk.h" 17 #include "xfs_ialloc.h" 18 #include "xfs_dir2.h" 19 #include "xfs_dir2_priv.h" 20 #include "xfs_ag.h" 21 #include "scrub/scrub.h" 22 #include "scrub/common.h" 23 #include "scrub/repair.h" 24 #include "scrub/xfile.h" 25 #include "scrub/xfarray.h" 26 #include "scrub/iscan.h" 27 #include "scrub/nlinks.h" 28 #include "scrub/trace.h" 29 #include "scrub/readdir.h" 30 31 /* 32 * Live Inode Link Count Checking 33 * ============================== 34 * 35 * Inode link counts are "summary" metadata, in the sense that they are 36 * computed as the number of directory entries referencing each file on the 37 * filesystem. Therefore, we compute the correct link counts by creating a 38 * shadow link count structure and walking every inode. 39 */ 40 41 /* Set us up to scrub inode link counts. */ 42 int 43 xchk_setup_nlinks( 44 struct xfs_scrub *sc) 45 { 46 xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); 47 48 sc->buf = kzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS); 49 if (!sc->buf) 50 return -ENOMEM; 51 52 return xchk_setup_fs(sc); 53 } 54 55 /* 56 * Part 1: Collecting file link counts. For each file, we create a shadow link 57 * counting structure, then walk the entire directory tree, incrementing parent 58 * and child link counts for each directory entry seen. 59 * 60 * To avoid false corruption reports in part 2, any failure in this part must 61 * set the INCOMPLETE flag even when a negative errno is returned. This care 62 * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, 63 * ECANCELED) that are absorbed into a scrub state flag update by 64 * xchk_*_process_error. Scrub and repair share the same incore data 65 * structures, so the INCOMPLETE flag is critical to prevent a repair based on 66 * insufficient information. 67 * 68 * Because we are scanning a live filesystem, it's possible that another thread 69 * will try to update the link counts for an inode that we've already scanned. 70 * This will cause our counts to be incorrect. Therefore, we hook all 71 * directory entry updates because that is when link count updates occur. By 72 * shadowing transaction updates in this manner, live nlink check can ensure by 73 * locking the inode and the shadow structure that its own copies are not out 74 * of date. Because the hook code runs in a different process context from the 75 * scrub code and the scrub state flags are not accessed atomically, failures 76 * in the hook code must abort the iscan and the scrubber must notice the 77 * aborted scan and set the incomplete flag. 78 * 79 * Note that we use jump labels and srcu notifier hooks to minimize the 80 * overhead when live nlinks is /not/ running. Locking order for nlink 81 * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock. 82 */ 83 84 /* 85 * Add a delta to an nlink counter, clamping the value to U32_MAX. Because 86 * XFS_MAXLINK < U32_MAX, the checking code will produce the correct results 87 * even if we lose some precision. 88 */ 89 static inline void 90 careful_add( 91 xfs_nlink_t *nlinkp, 92 int delta) 93 { 94 uint64_t new_value = (uint64_t)(*nlinkp) + delta; 95 96 BUILD_BUG_ON(XFS_MAXLINK > U32_MAX); 97 *nlinkp = min_t(uint64_t, new_value, U32_MAX); 98 } 99 100 /* Update incore link count information. Caller must hold the nlinks lock. */ 101 STATIC int 102 xchk_nlinks_update_incore( 103 struct xchk_nlink_ctrs *xnc, 104 xfs_ino_t ino, 105 int parents_delta, 106 int backrefs_delta, 107 int children_delta) 108 { 109 struct xchk_nlink nl; 110 int error; 111 112 if (!xnc->nlinks) 113 return 0; 114 115 error = xfarray_load_sparse(xnc->nlinks, ino, &nl); 116 if (error) 117 return error; 118 119 trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta, 120 backrefs_delta, children_delta); 121 122 careful_add(&nl.parents, parents_delta); 123 careful_add(&nl.backrefs, backrefs_delta); 124 careful_add(&nl.children, children_delta); 125 126 nl.flags |= XCHK_NLINK_WRITTEN; 127 error = xfarray_store(xnc->nlinks, ino, &nl); 128 if (error == -EFBIG) { 129 /* 130 * EFBIG means we tried to store data at too high a byte offset 131 * in the sparse array. IOWs, we cannot complete the check and 132 * must notify userspace that the check was incomplete. 133 */ 134 error = -ECANCELED; 135 } 136 return error; 137 } 138 139 /* 140 * Apply a link count change from the regular filesystem into our shadow link 141 * count structure based on a directory update in progress. 142 */ 143 STATIC int 144 xchk_nlinks_live_update( 145 struct notifier_block *nb, 146 unsigned long action, 147 void *data) 148 { 149 struct xfs_dir_update_params *p = data; 150 struct xchk_nlink_ctrs *xnc; 151 int error; 152 153 xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb); 154 155 trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino, 156 p->delta, p->name->name, p->name->len); 157 158 /* 159 * If we've already scanned @dp, update the number of parents that link 160 * to @ip. If @ip is a subdirectory, update the number of child links 161 * going out of @dp. 162 */ 163 if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) { 164 mutex_lock(&xnc->lock); 165 error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta, 166 0, 0); 167 if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode)) 168 error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, 169 0, p->delta); 170 mutex_unlock(&xnc->lock); 171 if (error) 172 goto out_abort; 173 } 174 175 /* 176 * If @ip is a subdirectory and we've already scanned it, update the 177 * number of backrefs pointing to @dp. 178 */ 179 if (S_ISDIR(VFS_IC(p->ip)->i_mode) && 180 xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) { 181 mutex_lock(&xnc->lock); 182 error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, 183 p->delta, 0); 184 mutex_unlock(&xnc->lock); 185 if (error) 186 goto out_abort; 187 } 188 189 return NOTIFY_DONE; 190 191 out_abort: 192 xchk_iscan_abort(&xnc->collect_iscan); 193 return NOTIFY_DONE; 194 } 195 196 /* Bump the observed link count for the inode referenced by this entry. */ 197 STATIC int 198 xchk_nlinks_collect_dirent( 199 struct xfs_scrub *sc, 200 struct xfs_inode *dp, 201 xfs_dir2_dataptr_t dapos, 202 const struct xfs_name *name, 203 xfs_ino_t ino, 204 void *priv) 205 { 206 struct xchk_nlink_ctrs *xnc = priv; 207 bool dot = false, dotdot = false; 208 int error; 209 210 /* Does this name make sense? */ 211 if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) { 212 error = -ECANCELED; 213 goto out_abort; 214 } 215 216 if (name->len == 1 && name->name[0] == '.') 217 dot = true; 218 else if (name->len == 2 && name->name[0] == '.' && 219 name->name[1] == '.') 220 dotdot = true; 221 222 /* Don't accept a '.' entry that points somewhere else. */ 223 if (dot && ino != dp->i_ino) { 224 error = -ECANCELED; 225 goto out_abort; 226 } 227 228 /* Don't accept an invalid inode number. */ 229 if (!xfs_verify_dir_ino(sc->mp, ino)) { 230 error = -ECANCELED; 231 goto out_abort; 232 } 233 234 /* Update the shadow link counts if we haven't already failed. */ 235 236 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 237 error = -ECANCELED; 238 goto out_incomplete; 239 } 240 241 trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name); 242 243 mutex_lock(&xnc->lock); 244 245 /* 246 * If this is a dotdot entry, it is a back link from dp to ino. How 247 * we handle this depends on whether or not dp is the root directory. 248 * 249 * The root directory is its own parent, so we pretend the dotdot entry 250 * establishes the "parent" of the root directory. Increment the 251 * number of parents of the root directory. 252 * 253 * Otherwise, increment the number of backrefs pointing back to ino. 254 */ 255 if (dotdot) { 256 if (dp == sc->mp->m_rootip) 257 error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 258 else 259 error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0); 260 if (error) 261 goto out_unlock; 262 } 263 264 /* 265 * If this dirent is a forward link from dp to ino, increment the 266 * number of parents linking into ino. 267 */ 268 if (!dot && !dotdot) { 269 error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 270 if (error) 271 goto out_unlock; 272 } 273 274 /* 275 * If this dirent is a forward link to a subdirectory, increment the 276 * number of child links of dp. 277 */ 278 if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) { 279 error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1); 280 if (error) 281 goto out_unlock; 282 } 283 284 mutex_unlock(&xnc->lock); 285 return 0; 286 287 out_unlock: 288 mutex_unlock(&xnc->lock); 289 out_abort: 290 xchk_iscan_abort(&xnc->collect_iscan); 291 out_incomplete: 292 xchk_set_incomplete(sc); 293 return error; 294 } 295 296 /* Walk a directory to bump the observed link counts of the children. */ 297 STATIC int 298 xchk_nlinks_collect_dir( 299 struct xchk_nlink_ctrs *xnc, 300 struct xfs_inode *dp) 301 { 302 struct xfs_scrub *sc = xnc->sc; 303 unsigned int lock_mode; 304 int error = 0; 305 306 /* Prevent anyone from changing this directory while we walk it. */ 307 xfs_ilock(dp, XFS_IOLOCK_SHARED); 308 lock_mode = xfs_ilock_data_map_shared(dp); 309 310 /* 311 * The dotdot entry of an unlinked directory still points to the last 312 * parent, but the parent no longer links to this directory. Skip the 313 * directory to avoid overcounting. 314 */ 315 if (VFS_I(dp)->i_nlink == 0) 316 goto out_unlock; 317 318 /* 319 * We cannot count file links if the directory looks as though it has 320 * been zapped by the inode record repair code. 321 */ 322 if (xchk_dir_looks_zapped(dp)) { 323 error = -EBUSY; 324 goto out_abort; 325 } 326 327 error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc); 328 if (error == -ECANCELED) { 329 error = 0; 330 goto out_unlock; 331 } 332 if (error) 333 goto out_abort; 334 335 xchk_iscan_mark_visited(&xnc->collect_iscan, dp); 336 goto out_unlock; 337 338 out_abort: 339 xchk_set_incomplete(sc); 340 xchk_iscan_abort(&xnc->collect_iscan); 341 out_unlock: 342 xfs_iunlock(dp, lock_mode); 343 xfs_iunlock(dp, XFS_IOLOCK_SHARED); 344 return error; 345 } 346 347 /* If this looks like a valid pointer, count it. */ 348 static inline int 349 xchk_nlinks_collect_metafile( 350 struct xchk_nlink_ctrs *xnc, 351 xfs_ino_t ino) 352 { 353 if (!xfs_verify_ino(xnc->sc->mp, ino)) 354 return 0; 355 356 trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino); 357 return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 358 } 359 360 /* Bump the link counts of metadata files rooted in the superblock. */ 361 STATIC int 362 xchk_nlinks_collect_metafiles( 363 struct xchk_nlink_ctrs *xnc) 364 { 365 struct xfs_mount *mp = xnc->sc->mp; 366 int error = -ECANCELED; 367 368 369 if (xchk_iscan_aborted(&xnc->collect_iscan)) 370 goto out_incomplete; 371 372 mutex_lock(&xnc->lock); 373 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino); 374 if (error) 375 goto out_abort; 376 377 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino); 378 if (error) 379 goto out_abort; 380 381 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino); 382 if (error) 383 goto out_abort; 384 385 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino); 386 if (error) 387 goto out_abort; 388 389 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino); 390 if (error) 391 goto out_abort; 392 mutex_unlock(&xnc->lock); 393 394 return 0; 395 396 out_abort: 397 mutex_unlock(&xnc->lock); 398 xchk_iscan_abort(&xnc->collect_iscan); 399 out_incomplete: 400 xchk_set_incomplete(xnc->sc); 401 return error; 402 } 403 404 /* Advance the collection scan cursor for this non-directory file. */ 405 static inline int 406 xchk_nlinks_collect_file( 407 struct xchk_nlink_ctrs *xnc, 408 struct xfs_inode *ip) 409 { 410 xfs_ilock(ip, XFS_IOLOCK_SHARED); 411 xchk_iscan_mark_visited(&xnc->collect_iscan, ip); 412 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 413 return 0; 414 } 415 416 /* Walk all directories and count inode links. */ 417 STATIC int 418 xchk_nlinks_collect( 419 struct xchk_nlink_ctrs *xnc) 420 { 421 struct xfs_scrub *sc = xnc->sc; 422 struct xfs_inode *ip; 423 int error; 424 425 /* Count the rt and quota files that are rooted in the superblock. */ 426 error = xchk_nlinks_collect_metafiles(xnc); 427 if (error) 428 return error; 429 430 /* 431 * Set up for a potentially lengthy filesystem scan by reducing our 432 * transaction resource usage for the duration. Specifically: 433 * 434 * Cancel the transaction to release the log grant space while we scan 435 * the filesystem. 436 * 437 * Create a new empty transaction to eliminate the possibility of the 438 * inode scan deadlocking on cyclical metadata. 439 * 440 * We pass the empty transaction to the file scanning function to avoid 441 * repeatedly cycling empty transactions. This can be done even though 442 * we take the IOLOCK to quiesce the file because empty transactions 443 * do not take sb_internal. 444 */ 445 xchk_trans_cancel(sc); 446 error = xchk_trans_alloc_empty(sc); 447 if (error) 448 return error; 449 450 while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) { 451 if (S_ISDIR(VFS_I(ip)->i_mode)) 452 error = xchk_nlinks_collect_dir(xnc, ip); 453 else 454 error = xchk_nlinks_collect_file(xnc, ip); 455 xchk_irele(sc, ip); 456 if (error) 457 break; 458 459 if (xchk_should_terminate(sc, &error)) 460 break; 461 } 462 xchk_iscan_iter_finish(&xnc->collect_iscan); 463 if (error) { 464 xchk_set_incomplete(sc); 465 /* 466 * If we couldn't grab an inode that was busy with a state 467 * change, change the error code so that we exit to userspace 468 * as quickly as possible. 469 */ 470 if (error == -EBUSY) 471 return -ECANCELED; 472 return error; 473 } 474 475 /* 476 * Switch out for a real transaction in preparation for building a new 477 * tree. 478 */ 479 xchk_trans_cancel(sc); 480 return xchk_setup_fs(sc); 481 } 482 483 /* 484 * Part 2: Comparing file link counters. Walk each inode and compare the link 485 * counts against our shadow information; and then walk each shadow link count 486 * structure (that wasn't covered in the first part), comparing it against the 487 * file. 488 */ 489 490 /* Read the observed link count for comparison with the actual inode. */ 491 STATIC int 492 xchk_nlinks_comparison_read( 493 struct xchk_nlink_ctrs *xnc, 494 xfs_ino_t ino, 495 struct xchk_nlink *obs) 496 { 497 struct xchk_nlink nl; 498 int error; 499 500 error = xfarray_load_sparse(xnc->nlinks, ino, &nl); 501 if (error) 502 return error; 503 504 nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN); 505 506 error = xfarray_store(xnc->nlinks, ino, &nl); 507 if (error == -EFBIG) { 508 /* 509 * EFBIG means we tried to store data at too high a byte offset 510 * in the sparse array. IOWs, we cannot complete the check and 511 * must notify userspace that the check was incomplete. This 512 * shouldn't really happen outside of the collection phase. 513 */ 514 xchk_set_incomplete(xnc->sc); 515 return -ECANCELED; 516 } 517 if (error) 518 return error; 519 520 /* Copy the counters, but do not expose the internal state. */ 521 obs->parents = nl.parents; 522 obs->backrefs = nl.backrefs; 523 obs->children = nl.children; 524 obs->flags = 0; 525 return 0; 526 } 527 528 /* Check our link count against an inode. */ 529 STATIC int 530 xchk_nlinks_compare_inode( 531 struct xchk_nlink_ctrs *xnc, 532 struct xfs_inode *ip) 533 { 534 struct xchk_nlink obs; 535 struct xfs_scrub *sc = xnc->sc; 536 uint64_t total_links; 537 unsigned int actual_nlink; 538 int error; 539 540 xfs_ilock(ip, XFS_ILOCK_SHARED); 541 mutex_lock(&xnc->lock); 542 543 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 544 xchk_set_incomplete(xnc->sc); 545 error = -ECANCELED; 546 goto out_scanlock; 547 } 548 549 error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs); 550 if (error) 551 goto out_scanlock; 552 553 /* 554 * If we don't have ftype to get an accurate count of the subdirectory 555 * entries in this directory, take advantage of the fact that on a 556 * consistent ftype=0 filesystem, the number of subdirectory 557 * backreferences (dotdot entries) pointing towards this directory 558 * should be equal to the number of subdirectory entries in the 559 * directory. 560 */ 561 if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode)) 562 obs.children = obs.backrefs; 563 564 total_links = xchk_nlink_total(ip, &obs); 565 actual_nlink = VFS_I(ip)->i_nlink; 566 567 trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs); 568 569 /* 570 * If we found so many parents that we'd overflow i_nlink, we must flag 571 * this as a corruption. The VFS won't let users increase the link 572 * count, but it will let them decrease it. 573 */ 574 if (total_links > XFS_MAXLINK) { 575 xchk_ino_set_corrupt(sc, ip->i_ino); 576 goto out_corrupt; 577 } 578 579 /* Link counts should match. */ 580 if (total_links != actual_nlink) { 581 xchk_ino_set_corrupt(sc, ip->i_ino); 582 goto out_corrupt; 583 } 584 585 if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) { 586 /* 587 * The collection phase ignores directories with zero link 588 * count, so we ignore them here too. 589 * 590 * The number of subdirectory backreferences (dotdot entries) 591 * pointing towards this directory should be equal to the 592 * number of subdirectory entries in the directory. 593 */ 594 if (obs.children != obs.backrefs) 595 xchk_ino_xref_set_corrupt(sc, ip->i_ino); 596 } else { 597 /* 598 * Non-directories and unlinked directories should not have 599 * back references. 600 */ 601 if (obs.backrefs != 0) { 602 xchk_ino_set_corrupt(sc, ip->i_ino); 603 goto out_corrupt; 604 } 605 606 /* 607 * Non-directories and unlinked directories should not have 608 * children. 609 */ 610 if (obs.children != 0) { 611 xchk_ino_set_corrupt(sc, ip->i_ino); 612 goto out_corrupt; 613 } 614 } 615 616 if (ip == sc->mp->m_rootip) { 617 /* 618 * For the root of a directory tree, both the '.' and '..' 619 * entries should point to the root directory. The dotdot 620 * entry is counted as a parent of the root /and/ a backref of 621 * the root directory. 622 */ 623 if (obs.parents != 1) { 624 xchk_ino_set_corrupt(sc, ip->i_ino); 625 goto out_corrupt; 626 } 627 } else if (actual_nlink > 0) { 628 /* 629 * Linked files that are not the root directory should have at 630 * least one parent. 631 */ 632 if (obs.parents == 0) { 633 xchk_ino_set_corrupt(sc, ip->i_ino); 634 goto out_corrupt; 635 } 636 } 637 638 out_corrupt: 639 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 640 error = -ECANCELED; 641 out_scanlock: 642 mutex_unlock(&xnc->lock); 643 xfs_iunlock(ip, XFS_ILOCK_SHARED); 644 return error; 645 } 646 647 /* 648 * Check our link count against an inode that wasn't checked previously. This 649 * is intended to catch directories with dangling links, though we could be 650 * racing with inode allocation in other threads. 651 */ 652 STATIC int 653 xchk_nlinks_compare_inum( 654 struct xchk_nlink_ctrs *xnc, 655 xfs_ino_t ino) 656 { 657 struct xchk_nlink obs; 658 struct xfs_mount *mp = xnc->sc->mp; 659 struct xfs_trans *tp = xnc->sc->tp; 660 struct xfs_buf *agi_bp; 661 struct xfs_inode *ip; 662 int error; 663 664 /* 665 * The first iget failed, so try again with the variant that returns 666 * either an incore inode or the AGI buffer. If the function returns 667 * EINVAL/ENOENT, it should have passed us the AGI buffer so that we 668 * can guarantee that the inode won't be allocated while we check for 669 * a zero link count in the observed link count data. 670 */ 671 error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip); 672 if (!error) { 673 /* Actually got an inode, so use the inode compare. */ 674 error = xchk_nlinks_compare_inode(xnc, ip); 675 xchk_irele(xnc->sc, ip); 676 return error; 677 } 678 if (error == -ENOENT || error == -EINVAL) { 679 /* No inode was found. Check for zero link count below. */ 680 error = 0; 681 } 682 if (error) 683 goto out_agi; 684 685 /* Ensure that we have protected against inode allocation/freeing. */ 686 if (agi_bp == NULL) { 687 ASSERT(agi_bp != NULL); 688 xchk_set_incomplete(xnc->sc); 689 return -ECANCELED; 690 } 691 692 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 693 xchk_set_incomplete(xnc->sc); 694 error = -ECANCELED; 695 goto out_agi; 696 } 697 698 mutex_lock(&xnc->lock); 699 error = xchk_nlinks_comparison_read(xnc, ino, &obs); 700 if (error) 701 goto out_scanlock; 702 703 trace_xchk_nlinks_check_zero(mp, ino, &obs); 704 705 /* 706 * If we can't grab the inode, the link count had better be zero. We 707 * still hold the AGI to prevent inode allocation/freeing. 708 */ 709 if (xchk_nlink_total(NULL, &obs) != 0) { 710 xchk_ino_set_corrupt(xnc->sc, ino); 711 error = -ECANCELED; 712 } 713 714 out_scanlock: 715 mutex_unlock(&xnc->lock); 716 out_agi: 717 if (agi_bp) 718 xfs_trans_brelse(tp, agi_bp); 719 return error; 720 } 721 722 /* 723 * Try to visit every inode in the filesystem to compare the link count. Move 724 * on if we can't grab an inode, since we'll revisit unchecked nlink records in 725 * the second part. 726 */ 727 static int 728 xchk_nlinks_compare_iter( 729 struct xchk_nlink_ctrs *xnc, 730 struct xfs_inode **ipp) 731 { 732 int error; 733 734 do { 735 error = xchk_iscan_iter(&xnc->compare_iscan, ipp); 736 } while (error == -EBUSY); 737 738 return error; 739 } 740 741 /* Compare the link counts we observed against the live information. */ 742 STATIC int 743 xchk_nlinks_compare( 744 struct xchk_nlink_ctrs *xnc) 745 { 746 struct xchk_nlink nl; 747 struct xfs_scrub *sc = xnc->sc; 748 struct xfs_inode *ip; 749 xfarray_idx_t cur = XFARRAY_CURSOR_INIT; 750 int error; 751 752 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 753 return 0; 754 755 /* 756 * Create a new empty transaction so that we can advance the iscan 757 * cursor without deadlocking if the inobt has a cycle and push on the 758 * inactivation workqueue. 759 */ 760 xchk_trans_cancel(sc); 761 error = xchk_trans_alloc_empty(sc); 762 if (error) 763 return error; 764 765 /* 766 * Use the inobt to walk all allocated inodes to compare the link 767 * counts. Inodes skipped by _compare_iter will be tried again in the 768 * next phase of the scan. 769 */ 770 xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan); 771 while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) { 772 error = xchk_nlinks_compare_inode(xnc, ip); 773 xchk_iscan_mark_visited(&xnc->compare_iscan, ip); 774 xchk_irele(sc, ip); 775 if (error) 776 break; 777 778 if (xchk_should_terminate(sc, &error)) 779 break; 780 } 781 xchk_iscan_iter_finish(&xnc->compare_iscan); 782 xchk_iscan_teardown(&xnc->compare_iscan); 783 if (error) 784 return error; 785 786 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 787 return 0; 788 789 /* 790 * Walk all the non-null nlink observations that weren't checked in the 791 * previous step. 792 */ 793 mutex_lock(&xnc->lock); 794 while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) { 795 xfs_ino_t ino = cur - 1; 796 797 if (nl.flags & XCHK_NLINK_COMPARE_SCANNED) 798 continue; 799 800 mutex_unlock(&xnc->lock); 801 802 error = xchk_nlinks_compare_inum(xnc, ino); 803 if (error) 804 return error; 805 806 if (xchk_should_terminate(xnc->sc, &error)) 807 return error; 808 809 mutex_lock(&xnc->lock); 810 } 811 mutex_unlock(&xnc->lock); 812 813 return error; 814 } 815 816 /* Tear down everything associated with a nlinks check. */ 817 static void 818 xchk_nlinks_teardown_scan( 819 void *priv) 820 { 821 struct xchk_nlink_ctrs *xnc = priv; 822 823 /* Discourage any hook functions that might be running. */ 824 xchk_iscan_abort(&xnc->collect_iscan); 825 826 xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook); 827 828 xfarray_destroy(xnc->nlinks); 829 xnc->nlinks = NULL; 830 831 xchk_iscan_teardown(&xnc->collect_iscan); 832 mutex_destroy(&xnc->lock); 833 xnc->sc = NULL; 834 } 835 836 /* 837 * Scan all inodes in the entire filesystem to generate link count data. If 838 * the scan is successful, the counts will be left alive for a repair. If any 839 * error occurs, we'll tear everything down. 840 */ 841 STATIC int 842 xchk_nlinks_setup_scan( 843 struct xfs_scrub *sc, 844 struct xchk_nlink_ctrs *xnc) 845 { 846 struct xfs_mount *mp = sc->mp; 847 char *descr; 848 unsigned long long max_inos; 849 xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1; 850 xfs_agino_t first_agino, last_agino; 851 int error; 852 853 ASSERT(xnc->sc == NULL); 854 xnc->sc = sc; 855 856 mutex_init(&xnc->lock); 857 858 /* Retry iget every tenth of a second for up to 30 seconds. */ 859 xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan); 860 861 /* 862 * Set up enough space to store an nlink record for the highest 863 * possible inode number in this system. 864 */ 865 xfs_agino_range(mp, last_agno, &first_agino, &last_agino); 866 max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1; 867 descr = xchk_xfile_descr(sc, "file link counts"); 868 error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos), 869 sizeof(struct xchk_nlink), &xnc->nlinks); 870 kfree(descr); 871 if (error) 872 goto out_teardown; 873 874 /* 875 * Hook into the directory entry code so that we can capture updates to 876 * file link counts. The hook only triggers for inodes that were 877 * already scanned, and the scanner thread takes each inode's ILOCK, 878 * which means that any in-progress inode updates will finish before we 879 * can scan the inode. 880 */ 881 ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); 882 xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update); 883 error = xfs_dir_hook_add(mp, &xnc->dhook); 884 if (error) 885 goto out_teardown; 886 887 /* Use deferred cleanup to pass the inode link count data to repair. */ 888 sc->buf_cleanup = xchk_nlinks_teardown_scan; 889 return 0; 890 891 out_teardown: 892 xchk_nlinks_teardown_scan(xnc); 893 return error; 894 } 895 896 /* Scrub the link count of all inodes on the filesystem. */ 897 int 898 xchk_nlinks( 899 struct xfs_scrub *sc) 900 { 901 struct xchk_nlink_ctrs *xnc = sc->buf; 902 int error = 0; 903 904 /* Set ourselves up to check link counts on the live filesystem. */ 905 error = xchk_nlinks_setup_scan(sc, xnc); 906 if (error) 907 return error; 908 909 /* Walk all inodes, picking up link count information. */ 910 error = xchk_nlinks_collect(xnc); 911 if (!xchk_xref_process_error(sc, 0, 0, &error)) 912 return error; 913 914 /* Fail fast if we're not playing with a full dataset. */ 915 if (xchk_iscan_aborted(&xnc->collect_iscan)) 916 xchk_set_incomplete(sc); 917 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) 918 return 0; 919 920 /* Compare link counts. */ 921 error = xchk_nlinks_compare(xnc); 922 if (!xchk_xref_process_error(sc, 0, 0, &error)) 923 return error; 924 925 /* Check one last time for an incomplete dataset. */ 926 if (xchk_iscan_aborted(&xnc->collect_iscan)) 927 xchk_set_incomplete(sc); 928 929 return 0; 930 } 931