1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_icache.h" 16 #include "xfs_iwalk.h" 17 #include "xfs_ialloc.h" 18 #include "xfs_dir2.h" 19 #include "xfs_dir2_priv.h" 20 #include "xfs_ag.h" 21 #include "xfs_parent.h" 22 #include "scrub/scrub.h" 23 #include "scrub/common.h" 24 #include "scrub/repair.h" 25 #include "scrub/xfile.h" 26 #include "scrub/xfarray.h" 27 #include "scrub/iscan.h" 28 #include "scrub/orphanage.h" 29 #include "scrub/nlinks.h" 30 #include "scrub/trace.h" 31 #include "scrub/readdir.h" 32 #include "scrub/tempfile.h" 33 #include "scrub/listxattr.h" 34 35 /* 36 * Live Inode Link Count Checking 37 * ============================== 38 * 39 * Inode link counts are "summary" metadata, in the sense that they are 40 * computed as the number of directory entries referencing each file on the 41 * filesystem. Therefore, we compute the correct link counts by creating a 42 * shadow link count structure and walking every inode. 43 */ 44 45 /* Set us up to scrub inode link counts. */ 46 int 47 xchk_setup_nlinks( 48 struct xfs_scrub *sc) 49 { 50 struct xchk_nlink_ctrs *xnc; 51 int error; 52 53 xchk_fsgates_enable(sc, XCHK_FSGATES_DIRENTS); 54 55 if (xchk_could_repair(sc)) { 56 error = xrep_setup_nlinks(sc); 57 if (error) 58 return error; 59 } 60 61 xnc = kvzalloc(sizeof(struct xchk_nlink_ctrs), XCHK_GFP_FLAGS); 62 if (!xnc) 63 return -ENOMEM; 64 xnc->xname.name = xnc->namebuf; 65 xnc->sc = sc; 66 sc->buf = xnc; 67 68 return xchk_setup_fs(sc); 69 } 70 71 /* 72 * Part 1: Collecting file link counts. For each file, we create a shadow link 73 * counting structure, then walk the entire directory tree, incrementing parent 74 * and child link counts for each directory entry seen. 75 * 76 * To avoid false corruption reports in part 2, any failure in this part must 77 * set the INCOMPLETE flag even when a negative errno is returned. This care 78 * must be taken with certain errno values (i.e. EFSBADCRC, EFSCORRUPTED, 79 * ECANCELED) that are absorbed into a scrub state flag update by 80 * xchk_*_process_error. Scrub and repair share the same incore data 81 * structures, so the INCOMPLETE flag is critical to prevent a repair based on 82 * insufficient information. 83 * 84 * Because we are scanning a live filesystem, it's possible that another thread 85 * will try to update the link counts for an inode that we've already scanned. 86 * This will cause our counts to be incorrect. Therefore, we hook all 87 * directory entry updates because that is when link count updates occur. By 88 * shadowing transaction updates in this manner, live nlink check can ensure by 89 * locking the inode and the shadow structure that its own copies are not out 90 * of date. Because the hook code runs in a different process context from the 91 * scrub code and the scrub state flags are not accessed atomically, failures 92 * in the hook code must abort the iscan and the scrubber must notice the 93 * aborted scan and set the incomplete flag. 94 * 95 * Note that we use jump labels and srcu notifier hooks to minimize the 96 * overhead when live nlinks is /not/ running. Locking order for nlink 97 * observations is inode ILOCK -> iscan_lock/xchk_nlink_ctrs lock. 98 */ 99 100 /* 101 * Add a delta to an nlink counter, clamping the value to U32_MAX. Because 102 * XFS_MAXLINK < U32_MAX, the checking code will produce the correct results 103 * even if we lose some precision. 104 */ 105 static inline void 106 careful_add( 107 xfs_nlink_t *nlinkp, 108 int delta) 109 { 110 uint64_t new_value = (uint64_t)(*nlinkp) + delta; 111 112 BUILD_BUG_ON(XFS_MAXLINK > U32_MAX); 113 *nlinkp = min_t(uint64_t, new_value, U32_MAX); 114 } 115 116 /* Update incore link count information. Caller must hold the nlinks lock. */ 117 STATIC int 118 xchk_nlinks_update_incore( 119 struct xchk_nlink_ctrs *xnc, 120 xfs_ino_t ino, 121 int parents_delta, 122 int backrefs_delta, 123 int children_delta) 124 { 125 struct xchk_nlink nl; 126 int error; 127 128 if (!xnc->nlinks) 129 return 0; 130 131 error = xfarray_load_sparse(xnc->nlinks, ino, &nl); 132 if (error) 133 return error; 134 135 trace_xchk_nlinks_update_incore(xnc->sc->mp, ino, &nl, parents_delta, 136 backrefs_delta, children_delta); 137 138 careful_add(&nl.parents, parents_delta); 139 careful_add(&nl.backrefs, backrefs_delta); 140 careful_add(&nl.children, children_delta); 141 142 nl.flags |= XCHK_NLINK_WRITTEN; 143 error = xfarray_store(xnc->nlinks, ino, &nl); 144 if (error == -EFBIG) { 145 /* 146 * EFBIG means we tried to store data at too high a byte offset 147 * in the sparse array. IOWs, we cannot complete the check and 148 * must notify userspace that the check was incomplete. 149 */ 150 error = -ECANCELED; 151 } 152 return error; 153 } 154 155 /* 156 * Apply a link count change from the regular filesystem into our shadow link 157 * count structure based on a directory update in progress. 158 */ 159 STATIC int 160 xchk_nlinks_live_update( 161 struct notifier_block *nb, 162 unsigned long action, 163 void *data) 164 { 165 struct xfs_dir_update_params *p = data; 166 struct xchk_nlink_ctrs *xnc; 167 int error; 168 169 xnc = container_of(nb, struct xchk_nlink_ctrs, dhook.dirent_hook.nb); 170 171 /* 172 * Ignore temporary directories being used to stage dir repairs, since 173 * we don't bump the link counts of the children. 174 */ 175 if (xrep_is_tempfile(p->dp)) 176 return NOTIFY_DONE; 177 178 trace_xchk_nlinks_live_update(xnc->sc->mp, p->dp, action, p->ip->i_ino, 179 p->delta, p->name->name, p->name->len); 180 181 /* 182 * If we've already scanned @dp, update the number of parents that link 183 * to @ip. If @ip is a subdirectory, update the number of child links 184 * going out of @dp. 185 */ 186 if (xchk_iscan_want_live_update(&xnc->collect_iscan, p->dp->i_ino)) { 187 mutex_lock(&xnc->lock); 188 error = xchk_nlinks_update_incore(xnc, p->ip->i_ino, p->delta, 189 0, 0); 190 if (!error && S_ISDIR(VFS_IC(p->ip)->i_mode)) 191 error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, 192 0, p->delta); 193 mutex_unlock(&xnc->lock); 194 if (error) 195 goto out_abort; 196 } 197 198 /* 199 * If @ip is a subdirectory and we've already scanned it, update the 200 * number of backrefs pointing to @dp. 201 */ 202 if (S_ISDIR(VFS_IC(p->ip)->i_mode) && 203 xchk_iscan_want_live_update(&xnc->collect_iscan, p->ip->i_ino)) { 204 mutex_lock(&xnc->lock); 205 error = xchk_nlinks_update_incore(xnc, p->dp->i_ino, 0, 206 p->delta, 0); 207 mutex_unlock(&xnc->lock); 208 if (error) 209 goto out_abort; 210 } 211 212 return NOTIFY_DONE; 213 214 out_abort: 215 xchk_iscan_abort(&xnc->collect_iscan); 216 return NOTIFY_DONE; 217 } 218 219 /* Bump the observed link count for the inode referenced by this entry. */ 220 STATIC int 221 xchk_nlinks_collect_dirent( 222 struct xfs_scrub *sc, 223 struct xfs_inode *dp, 224 xfs_dir2_dataptr_t dapos, 225 const struct xfs_name *name, 226 xfs_ino_t ino, 227 void *priv) 228 { 229 struct xchk_nlink_ctrs *xnc = priv; 230 bool dot = false, dotdot = false; 231 int error; 232 233 /* Does this name make sense? */ 234 if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) { 235 error = -ECANCELED; 236 goto out_abort; 237 } 238 239 if (name->len == 1 && name->name[0] == '.') 240 dot = true; 241 else if (name->len == 2 && name->name[0] == '.' && 242 name->name[1] == '.') 243 dotdot = true; 244 245 /* Don't accept a '.' entry that points somewhere else. */ 246 if (dot && ino != dp->i_ino) { 247 error = -ECANCELED; 248 goto out_abort; 249 } 250 251 /* Don't accept an invalid inode number. */ 252 if (!xfs_verify_dir_ino(sc->mp, ino)) { 253 error = -ECANCELED; 254 goto out_abort; 255 } 256 257 /* Update the shadow link counts if we haven't already failed. */ 258 259 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 260 error = -ECANCELED; 261 goto out_incomplete; 262 } 263 264 trace_xchk_nlinks_collect_dirent(sc->mp, dp, ino, name); 265 266 mutex_lock(&xnc->lock); 267 268 /* 269 * If this is a dotdot entry, it is a back link from dp to ino. How 270 * we handle this depends on whether or not dp is the root directory. 271 * 272 * The root directory is its own parent, so we pretend the dotdot entry 273 * establishes the "parent" of the root directory. Increment the 274 * number of parents of the root directory. 275 * 276 * Otherwise, increment the number of backrefs pointing back to ino. 277 * 278 * If the filesystem has parent pointers, we walk the pptrs to 279 * determine the backref count. 280 */ 281 if (dotdot) { 282 if (xchk_inode_is_dirtree_root(dp)) 283 error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 284 else if (!xfs_has_parent(sc->mp)) 285 error = xchk_nlinks_update_incore(xnc, ino, 0, 1, 0); 286 else 287 error = 0; 288 if (error) 289 goto out_unlock; 290 } 291 292 /* 293 * If this dirent is a forward link from dp to ino, increment the 294 * number of parents linking into ino. 295 */ 296 if (!dot && !dotdot) { 297 error = xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 298 if (error) 299 goto out_unlock; 300 } 301 302 /* 303 * If this dirent is a forward link to a subdirectory, increment the 304 * number of child links of dp. 305 */ 306 if (!dot && !dotdot && name->type == XFS_DIR3_FT_DIR) { 307 error = xchk_nlinks_update_incore(xnc, dp->i_ino, 0, 0, 1); 308 if (error) 309 goto out_unlock; 310 } 311 312 mutex_unlock(&xnc->lock); 313 return 0; 314 315 out_unlock: 316 mutex_unlock(&xnc->lock); 317 out_abort: 318 xchk_iscan_abort(&xnc->collect_iscan); 319 out_incomplete: 320 xchk_set_incomplete(sc); 321 return error; 322 } 323 324 /* Bump the backref count for the inode referenced by this parent pointer. */ 325 STATIC int 326 xchk_nlinks_collect_pptr( 327 struct xfs_scrub *sc, 328 struct xfs_inode *ip, 329 unsigned int attr_flags, 330 const unsigned char *name, 331 unsigned int namelen, 332 const void *value, 333 unsigned int valuelen, 334 void *priv) 335 { 336 struct xfs_name xname = { 337 .name = name, 338 .len = namelen, 339 }; 340 struct xchk_nlink_ctrs *xnc = priv; 341 const struct xfs_parent_rec *pptr_rec = value; 342 xfs_ino_t parent_ino; 343 int error; 344 345 /* Update the shadow link counts if we haven't already failed. */ 346 347 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 348 error = -ECANCELED; 349 goto out_incomplete; 350 } 351 352 if (!(attr_flags & XFS_ATTR_PARENT)) 353 return 0; 354 355 error = xfs_parent_from_attr(sc->mp, attr_flags, name, namelen, value, 356 valuelen, &parent_ino, NULL); 357 if (error) 358 return error; 359 360 trace_xchk_nlinks_collect_pptr(sc->mp, ip, &xname, pptr_rec); 361 362 mutex_lock(&xnc->lock); 363 364 error = xchk_nlinks_update_incore(xnc, parent_ino, 0, 1, 0); 365 if (error) 366 goto out_unlock; 367 368 mutex_unlock(&xnc->lock); 369 return 0; 370 371 out_unlock: 372 mutex_unlock(&xnc->lock); 373 xchk_iscan_abort(&xnc->collect_iscan); 374 out_incomplete: 375 xchk_set_incomplete(sc); 376 return error; 377 } 378 379 static uint 380 xchk_nlinks_ilock_dir( 381 struct xfs_inode *ip) 382 { 383 uint lock_mode = XFS_ILOCK_SHARED; 384 385 /* 386 * We're going to scan the directory entries, so we must be ready to 387 * pull the data fork mappings into memory if they aren't already. 388 */ 389 if (xfs_need_iread_extents(&ip->i_df)) 390 lock_mode = XFS_ILOCK_EXCL; 391 392 /* 393 * We're going to scan the parent pointers, so we must be ready to 394 * pull the attr fork mappings into memory if they aren't already. 395 */ 396 if (xfs_has_parent(ip->i_mount) && xfs_inode_has_attr_fork(ip) && 397 xfs_need_iread_extents(&ip->i_af)) 398 lock_mode = XFS_ILOCK_EXCL; 399 400 /* 401 * Take the IOLOCK so that other threads cannot start a directory 402 * update while we're scanning. 403 */ 404 lock_mode |= XFS_IOLOCK_SHARED; 405 xfs_ilock(ip, lock_mode); 406 return lock_mode; 407 } 408 409 /* Walk a directory to bump the observed link counts of the children. */ 410 STATIC int 411 xchk_nlinks_collect_dir( 412 struct xchk_nlink_ctrs *xnc, 413 struct xfs_inode *dp) 414 { 415 struct xfs_scrub *sc = xnc->sc; 416 unsigned int lock_mode; 417 int error = 0; 418 419 /* 420 * Ignore temporary directories being used to stage dir repairs, since 421 * we don't bump the link counts of the children. 422 */ 423 if (xrep_is_tempfile(dp)) 424 return 0; 425 426 /* Prevent anyone from changing this directory while we walk it. */ 427 lock_mode = xchk_nlinks_ilock_dir(dp); 428 429 /* 430 * The dotdot entry of an unlinked directory still points to the last 431 * parent, but the parent no longer links to this directory. Skip the 432 * directory to avoid overcounting. 433 */ 434 if (VFS_I(dp)->i_nlink == 0) 435 goto out_unlock; 436 437 /* 438 * We cannot count file links if the directory looks as though it has 439 * been zapped by the inode record repair code. 440 */ 441 if (xchk_dir_looks_zapped(dp)) { 442 error = -EBUSY; 443 goto out_abort; 444 } 445 446 error = xchk_dir_walk(sc, dp, xchk_nlinks_collect_dirent, xnc); 447 if (error == -ECANCELED) { 448 error = 0; 449 goto out_unlock; 450 } 451 if (error) 452 goto out_abort; 453 454 /* Walk the parent pointers to get real backref counts. */ 455 if (xfs_has_parent(sc->mp)) { 456 /* 457 * If the extended attributes look as though they has been 458 * zapped by the inode record repair code, we cannot scan for 459 * parent pointers. 460 */ 461 if (xchk_pptr_looks_zapped(dp)) { 462 error = -EBUSY; 463 goto out_unlock; 464 } 465 466 error = xchk_xattr_walk(sc, dp, xchk_nlinks_collect_pptr, NULL, 467 xnc); 468 if (error == -ECANCELED) { 469 error = 0; 470 goto out_unlock; 471 } 472 if (error) 473 goto out_abort; 474 } 475 476 xchk_iscan_mark_visited(&xnc->collect_iscan, dp); 477 goto out_unlock; 478 479 out_abort: 480 xchk_set_incomplete(sc); 481 xchk_iscan_abort(&xnc->collect_iscan); 482 out_unlock: 483 xfs_iunlock(dp, lock_mode); 484 return error; 485 } 486 487 /* If this looks like a valid pointer, count it. */ 488 static inline int 489 xchk_nlinks_collect_metafile( 490 struct xchk_nlink_ctrs *xnc, 491 xfs_ino_t ino) 492 { 493 if (!xfs_verify_ino(xnc->sc->mp, ino)) 494 return 0; 495 496 trace_xchk_nlinks_collect_metafile(xnc->sc->mp, ino); 497 return xchk_nlinks_update_incore(xnc, ino, 1, 0, 0); 498 } 499 500 /* Bump the link counts of metadata files rooted in the superblock. */ 501 STATIC int 502 xchk_nlinks_collect_metafiles( 503 struct xchk_nlink_ctrs *xnc) 504 { 505 struct xfs_mount *mp = xnc->sc->mp; 506 int error = -ECANCELED; 507 508 509 if (xchk_iscan_aborted(&xnc->collect_iscan)) 510 goto out_incomplete; 511 512 mutex_lock(&xnc->lock); 513 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rbmino); 514 if (error) 515 goto out_abort; 516 517 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_rsumino); 518 if (error) 519 goto out_abort; 520 521 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_uquotino); 522 if (error) 523 goto out_abort; 524 525 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_gquotino); 526 if (error) 527 goto out_abort; 528 529 error = xchk_nlinks_collect_metafile(xnc, mp->m_sb.sb_pquotino); 530 if (error) 531 goto out_abort; 532 mutex_unlock(&xnc->lock); 533 534 return 0; 535 536 out_abort: 537 mutex_unlock(&xnc->lock); 538 xchk_iscan_abort(&xnc->collect_iscan); 539 out_incomplete: 540 xchk_set_incomplete(xnc->sc); 541 return error; 542 } 543 544 /* Advance the collection scan cursor for this non-directory file. */ 545 static inline int 546 xchk_nlinks_collect_file( 547 struct xchk_nlink_ctrs *xnc, 548 struct xfs_inode *ip) 549 { 550 xfs_ilock(ip, XFS_IOLOCK_SHARED); 551 xchk_iscan_mark_visited(&xnc->collect_iscan, ip); 552 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 553 return 0; 554 } 555 556 /* Walk all directories and count inode links. */ 557 STATIC int 558 xchk_nlinks_collect( 559 struct xchk_nlink_ctrs *xnc) 560 { 561 struct xfs_scrub *sc = xnc->sc; 562 struct xfs_inode *ip; 563 int error; 564 565 /* Count the rt and quota files that are rooted in the superblock. */ 566 error = xchk_nlinks_collect_metafiles(xnc); 567 if (error) 568 return error; 569 570 /* 571 * Set up for a potentially lengthy filesystem scan by reducing our 572 * transaction resource usage for the duration. Specifically: 573 * 574 * Cancel the transaction to release the log grant space while we scan 575 * the filesystem. 576 * 577 * Create a new empty transaction to eliminate the possibility of the 578 * inode scan deadlocking on cyclical metadata. 579 * 580 * We pass the empty transaction to the file scanning function to avoid 581 * repeatedly cycling empty transactions. This can be done even though 582 * we take the IOLOCK to quiesce the file because empty transactions 583 * do not take sb_internal. 584 */ 585 xchk_trans_cancel(sc); 586 xchk_trans_alloc_empty(sc); 587 588 while ((error = xchk_iscan_iter(&xnc->collect_iscan, &ip)) == 1) { 589 if (S_ISDIR(VFS_I(ip)->i_mode)) 590 error = xchk_nlinks_collect_dir(xnc, ip); 591 else 592 error = xchk_nlinks_collect_file(xnc, ip); 593 xchk_irele(sc, ip); 594 if (error) 595 break; 596 597 if (xchk_should_terminate(sc, &error)) 598 break; 599 } 600 xchk_iscan_iter_finish(&xnc->collect_iscan); 601 if (error) { 602 xchk_set_incomplete(sc); 603 /* 604 * If we couldn't grab an inode that was busy with a state 605 * change, change the error code so that we exit to userspace 606 * as quickly as possible. 607 */ 608 if (error == -EBUSY) 609 return -ECANCELED; 610 return error; 611 } 612 613 /* 614 * Switch out for a real transaction in preparation for building a new 615 * tree. 616 */ 617 xchk_trans_cancel(sc); 618 return xchk_setup_fs(sc); 619 } 620 621 /* 622 * Part 2: Comparing file link counters. Walk each inode and compare the link 623 * counts against our shadow information; and then walk each shadow link count 624 * structure (that wasn't covered in the first part), comparing it against the 625 * file. 626 */ 627 628 /* Read the observed link count for comparison with the actual inode. */ 629 STATIC int 630 xchk_nlinks_comparison_read( 631 struct xchk_nlink_ctrs *xnc, 632 xfs_ino_t ino, 633 struct xchk_nlink *obs) 634 { 635 struct xchk_nlink nl; 636 int error; 637 638 error = xfarray_load_sparse(xnc->nlinks, ino, &nl); 639 if (error) 640 return error; 641 642 nl.flags |= (XCHK_NLINK_COMPARE_SCANNED | XCHK_NLINK_WRITTEN); 643 644 error = xfarray_store(xnc->nlinks, ino, &nl); 645 if (error == -EFBIG) { 646 /* 647 * EFBIG means we tried to store data at too high a byte offset 648 * in the sparse array. IOWs, we cannot complete the check and 649 * must notify userspace that the check was incomplete. This 650 * shouldn't really happen outside of the collection phase. 651 */ 652 xchk_set_incomplete(xnc->sc); 653 return -ECANCELED; 654 } 655 if (error) 656 return error; 657 658 /* Copy the counters, but do not expose the internal state. */ 659 obs->parents = nl.parents; 660 obs->backrefs = nl.backrefs; 661 obs->children = nl.children; 662 obs->flags = 0; 663 return 0; 664 } 665 666 /* Check our link count against an inode. */ 667 STATIC int 668 xchk_nlinks_compare_inode( 669 struct xchk_nlink_ctrs *xnc, 670 struct xfs_inode *ip) 671 { 672 struct xchk_nlink obs; 673 struct xfs_scrub *sc = xnc->sc; 674 uint64_t total_links; 675 unsigned int actual_nlink; 676 int error; 677 678 /* 679 * Ignore temporary files being used to stage repairs, since we assume 680 * they're correct for non-directories, and the directory repair code 681 * doesn't bump the link counts for the children. 682 */ 683 if (xrep_is_tempfile(ip)) 684 return 0; 685 686 xfs_ilock(ip, XFS_ILOCK_SHARED); 687 mutex_lock(&xnc->lock); 688 689 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 690 xchk_set_incomplete(xnc->sc); 691 error = -ECANCELED; 692 goto out_scanlock; 693 } 694 695 error = xchk_nlinks_comparison_read(xnc, ip->i_ino, &obs); 696 if (error) 697 goto out_scanlock; 698 699 /* 700 * If we don't have ftype to get an accurate count of the subdirectory 701 * entries in this directory, take advantage of the fact that on a 702 * consistent ftype=0 filesystem, the number of subdirectory 703 * backreferences (dotdot entries) pointing towards this directory 704 * should be equal to the number of subdirectory entries in the 705 * directory. 706 */ 707 if (!xfs_has_ftype(sc->mp) && S_ISDIR(VFS_I(ip)->i_mode)) 708 obs.children = obs.backrefs; 709 710 total_links = xchk_nlink_total(ip, &obs); 711 actual_nlink = VFS_I(ip)->i_nlink; 712 713 trace_xchk_nlinks_compare_inode(sc->mp, ip, &obs); 714 715 /* 716 * If we found so many parents that we'd overflow i_nlink, we must flag 717 * this as a corruption. The VFS won't let users increase the link 718 * count, but it will let them decrease it. 719 */ 720 if (total_links > XFS_NLINK_PINNED) { 721 xchk_ino_set_corrupt(sc, ip->i_ino); 722 goto out_corrupt; 723 } else if (total_links > XFS_MAXLINK) { 724 xchk_ino_set_warning(sc, ip->i_ino); 725 } 726 727 /* Link counts should match. */ 728 if (total_links != actual_nlink) { 729 xchk_ino_set_corrupt(sc, ip->i_ino); 730 goto out_corrupt; 731 } 732 733 if (S_ISDIR(VFS_I(ip)->i_mode) && actual_nlink > 0) { 734 /* 735 * The collection phase ignores directories with zero link 736 * count, so we ignore them here too. 737 * 738 * The number of subdirectory backreferences (dotdot entries) 739 * pointing towards this directory should be equal to the 740 * number of subdirectory entries in the directory. 741 */ 742 if (obs.children != obs.backrefs) 743 xchk_ino_xref_set_corrupt(sc, ip->i_ino); 744 } else { 745 /* 746 * Non-directories and unlinked directories should not have 747 * back references. 748 */ 749 if (obs.backrefs != 0) { 750 xchk_ino_set_corrupt(sc, ip->i_ino); 751 goto out_corrupt; 752 } 753 754 /* 755 * Non-directories and unlinked directories should not have 756 * children. 757 */ 758 if (obs.children != 0) { 759 xchk_ino_set_corrupt(sc, ip->i_ino); 760 goto out_corrupt; 761 } 762 } 763 764 if (xchk_inode_is_dirtree_root(ip)) { 765 /* 766 * For the root of a directory tree, both the '.' and '..' 767 * entries should point to the root directory. The dotdot 768 * entry is counted as a parent of the root /and/ a backref of 769 * the root directory. 770 */ 771 if (obs.parents != 1) { 772 xchk_ino_set_corrupt(sc, ip->i_ino); 773 goto out_corrupt; 774 } 775 } else if (actual_nlink > 0) { 776 /* 777 * Linked files that are not the root directory should have at 778 * least one parent. 779 */ 780 if (obs.parents == 0) { 781 xchk_ino_set_corrupt(sc, ip->i_ino); 782 goto out_corrupt; 783 } 784 } 785 786 out_corrupt: 787 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 788 error = -ECANCELED; 789 out_scanlock: 790 mutex_unlock(&xnc->lock); 791 xfs_iunlock(ip, XFS_ILOCK_SHARED); 792 return error; 793 } 794 795 /* 796 * Check our link count against an inode that wasn't checked previously. This 797 * is intended to catch directories with dangling links, though we could be 798 * racing with inode allocation in other threads. 799 */ 800 STATIC int 801 xchk_nlinks_compare_inum( 802 struct xchk_nlink_ctrs *xnc, 803 xfs_ino_t ino) 804 { 805 struct xchk_nlink obs; 806 struct xfs_mount *mp = xnc->sc->mp; 807 struct xfs_trans *tp = xnc->sc->tp; 808 struct xfs_buf *agi_bp; 809 struct xfs_inode *ip; 810 int error; 811 812 /* 813 * The first iget failed, so try again with the variant that returns 814 * either an incore inode or the AGI buffer. If the function returns 815 * EINVAL/ENOENT, it should have passed us the AGI buffer so that we 816 * can guarantee that the inode won't be allocated while we check for 817 * a zero link count in the observed link count data. 818 */ 819 error = xchk_iget_agi(xnc->sc, ino, &agi_bp, &ip); 820 if (!error) { 821 /* Actually got an inode, so use the inode compare. */ 822 error = xchk_nlinks_compare_inode(xnc, ip); 823 xchk_irele(xnc->sc, ip); 824 return error; 825 } 826 if (error == -ENOENT || error == -EINVAL) { 827 /* No inode was found. Check for zero link count below. */ 828 error = 0; 829 } 830 if (error) 831 goto out_agi; 832 833 /* Ensure that we have protected against inode allocation/freeing. */ 834 if (agi_bp == NULL) { 835 ASSERT(agi_bp != NULL); 836 xchk_set_incomplete(xnc->sc); 837 return -ECANCELED; 838 } 839 840 if (xchk_iscan_aborted(&xnc->collect_iscan)) { 841 xchk_set_incomplete(xnc->sc); 842 error = -ECANCELED; 843 goto out_agi; 844 } 845 846 mutex_lock(&xnc->lock); 847 error = xchk_nlinks_comparison_read(xnc, ino, &obs); 848 if (error) 849 goto out_scanlock; 850 851 trace_xchk_nlinks_check_zero(mp, ino, &obs); 852 853 /* 854 * If we can't grab the inode, the link count had better be zero. We 855 * still hold the AGI to prevent inode allocation/freeing. 856 */ 857 if (xchk_nlink_total(NULL, &obs) != 0) { 858 xchk_ino_set_corrupt(xnc->sc, ino); 859 error = -ECANCELED; 860 } 861 862 out_scanlock: 863 mutex_unlock(&xnc->lock); 864 out_agi: 865 if (agi_bp) 866 xfs_trans_brelse(tp, agi_bp); 867 return error; 868 } 869 870 /* 871 * Try to visit every inode in the filesystem to compare the link count. Move 872 * on if we can't grab an inode, since we'll revisit unchecked nlink records in 873 * the second part. 874 */ 875 static int 876 xchk_nlinks_compare_iter( 877 struct xchk_nlink_ctrs *xnc, 878 struct xfs_inode **ipp) 879 { 880 int error; 881 882 do { 883 error = xchk_iscan_iter(&xnc->compare_iscan, ipp); 884 } while (error == -EBUSY); 885 886 return error; 887 } 888 889 /* Compare the link counts we observed against the live information. */ 890 STATIC int 891 xchk_nlinks_compare( 892 struct xchk_nlink_ctrs *xnc) 893 { 894 struct xchk_nlink nl; 895 struct xfs_scrub *sc = xnc->sc; 896 struct xfs_inode *ip; 897 xfarray_idx_t cur = XFARRAY_CURSOR_INIT; 898 int error; 899 900 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 901 return 0; 902 903 /* 904 * Create a new empty transaction so that we can advance the iscan 905 * cursor without deadlocking if the inobt has a cycle and push on the 906 * inactivation workqueue. 907 */ 908 xchk_trans_cancel(sc); 909 xchk_trans_alloc_empty(sc); 910 911 /* 912 * Use the inobt to walk all allocated inodes to compare the link 913 * counts. Inodes skipped by _compare_iter will be tried again in the 914 * next phase of the scan. 915 */ 916 xchk_iscan_start(sc, 0, 0, &xnc->compare_iscan); 917 while ((error = xchk_nlinks_compare_iter(xnc, &ip)) == 1) { 918 error = xchk_nlinks_compare_inode(xnc, ip); 919 xchk_iscan_mark_visited(&xnc->compare_iscan, ip); 920 xchk_irele(sc, ip); 921 if (error) 922 break; 923 924 if (xchk_should_terminate(sc, &error)) 925 break; 926 } 927 xchk_iscan_iter_finish(&xnc->compare_iscan); 928 xchk_iscan_teardown(&xnc->compare_iscan); 929 if (error) 930 return error; 931 932 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 933 return 0; 934 935 /* 936 * Walk all the non-null nlink observations that weren't checked in the 937 * previous step. 938 */ 939 mutex_lock(&xnc->lock); 940 while ((error = xfarray_iter(xnc->nlinks, &cur, &nl)) == 1) { 941 xfs_ino_t ino = cur - 1; 942 943 if (nl.flags & XCHK_NLINK_COMPARE_SCANNED) 944 continue; 945 946 mutex_unlock(&xnc->lock); 947 948 error = xchk_nlinks_compare_inum(xnc, ino); 949 if (error) 950 return error; 951 952 if (xchk_should_terminate(xnc->sc, &error)) 953 return error; 954 955 mutex_lock(&xnc->lock); 956 } 957 mutex_unlock(&xnc->lock); 958 959 return error; 960 } 961 962 /* Tear down everything associated with a nlinks check. */ 963 static void 964 xchk_nlinks_teardown_scan( 965 void *priv) 966 { 967 struct xchk_nlink_ctrs *xnc = priv; 968 969 /* Discourage any hook functions that might be running. */ 970 xchk_iscan_abort(&xnc->collect_iscan); 971 972 xfs_dir_hook_del(xnc->sc->mp, &xnc->dhook); 973 974 xfarray_destroy(xnc->nlinks); 975 xnc->nlinks = NULL; 976 977 xchk_iscan_teardown(&xnc->collect_iscan); 978 mutex_destroy(&xnc->lock); 979 xnc->sc = NULL; 980 } 981 982 /* 983 * Scan all inodes in the entire filesystem to generate link count data. If 984 * the scan is successful, the counts will be left alive for a repair. If any 985 * error occurs, we'll tear everything down. 986 */ 987 STATIC int 988 xchk_nlinks_setup_scan( 989 struct xfs_scrub *sc, 990 struct xchk_nlink_ctrs *xnc) 991 { 992 struct xfs_mount *mp = sc->mp; 993 char *descr; 994 unsigned long long max_inos; 995 xfs_agnumber_t last_agno = mp->m_sb.sb_agcount - 1; 996 xfs_agino_t first_agino, last_agino; 997 int error; 998 999 mutex_init(&xnc->lock); 1000 1001 /* Retry iget every tenth of a second for up to 30 seconds. */ 1002 xchk_iscan_start(sc, 30000, 100, &xnc->collect_iscan); 1003 1004 /* 1005 * Set up enough space to store an nlink record for the highest 1006 * possible inode number in this system. 1007 */ 1008 xfs_agino_range(mp, last_agno, &first_agino, &last_agino); 1009 max_inos = XFS_AGINO_TO_INO(mp, last_agno, last_agino) + 1; 1010 descr = xchk_xfile_descr(sc, "file link counts"); 1011 error = xfarray_create(descr, min(XFS_MAXINUMBER + 1, max_inos), 1012 sizeof(struct xchk_nlink), &xnc->nlinks); 1013 kfree(descr); 1014 if (error) 1015 goto out_teardown; 1016 1017 /* 1018 * Hook into the directory entry code so that we can capture updates to 1019 * file link counts. The hook only triggers for inodes that were 1020 * already scanned, and the scanner thread takes each inode's ILOCK, 1021 * which means that any in-progress inode updates will finish before we 1022 * can scan the inode. 1023 */ 1024 ASSERT(sc->flags & XCHK_FSGATES_DIRENTS); 1025 xfs_dir_hook_setup(&xnc->dhook, xchk_nlinks_live_update); 1026 error = xfs_dir_hook_add(mp, &xnc->dhook); 1027 if (error) 1028 goto out_teardown; 1029 1030 /* Use deferred cleanup to pass the inode link count data to repair. */ 1031 sc->buf_cleanup = xchk_nlinks_teardown_scan; 1032 return 0; 1033 1034 out_teardown: 1035 xchk_nlinks_teardown_scan(xnc); 1036 return error; 1037 } 1038 1039 /* Scrub the link count of all inodes on the filesystem. */ 1040 int 1041 xchk_nlinks( 1042 struct xfs_scrub *sc) 1043 { 1044 struct xchk_nlink_ctrs *xnc = sc->buf; 1045 int error = 0; 1046 1047 /* Set ourselves up to check link counts on the live filesystem. */ 1048 error = xchk_nlinks_setup_scan(sc, xnc); 1049 if (error) 1050 return error; 1051 1052 /* Walk all inodes, picking up link count information. */ 1053 error = xchk_nlinks_collect(xnc); 1054 if (!xchk_xref_process_error(sc, 0, 0, &error)) 1055 return error; 1056 1057 /* Fail fast if we're not playing with a full dataset. */ 1058 if (xchk_iscan_aborted(&xnc->collect_iscan)) 1059 xchk_set_incomplete(sc); 1060 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_INCOMPLETE) 1061 return 0; 1062 1063 /* Compare link counts. */ 1064 error = xchk_nlinks_compare(xnc); 1065 if (!xchk_xref_process_error(sc, 0, 0, &error)) 1066 return error; 1067 1068 /* Check one last time for an incomplete dataset. */ 1069 if (xchk_iscan_aborted(&xnc->collect_iscan)) 1070 xchk_set_incomplete(sc); 1071 1072 return 0; 1073 } 1074