1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_log_format.h" 14 #include "xfs_trans.h" 15 #include "xfs_ag.h" 16 #include "xfs_inode.h" 17 #include "xfs_ialloc.h" 18 #include "xfs_icache.h" 19 #include "xfs_da_format.h" 20 #include "xfs_reflink.h" 21 #include "xfs_rmap.h" 22 #include "xfs_bmap_util.h" 23 #include "xfs_rtbitmap.h" 24 #include "scrub/scrub.h" 25 #include "scrub/common.h" 26 #include "scrub/btree.h" 27 #include "scrub/trace.h" 28 #include "scrub/repair.h" 29 30 /* Prepare the attached inode for scrubbing. */ 31 static inline int 32 xchk_prepare_iscrub( 33 struct xfs_scrub *sc) 34 { 35 int error; 36 37 xchk_ilock(sc, XFS_IOLOCK_EXCL); 38 39 error = xchk_trans_alloc(sc, 0); 40 if (error) 41 return error; 42 43 error = xchk_ino_dqattach(sc); 44 if (error) 45 return error; 46 47 xchk_ilock(sc, XFS_ILOCK_EXCL); 48 return 0; 49 } 50 51 /* Install this scrub-by-handle inode and prepare it for scrubbing. */ 52 static inline int 53 xchk_install_handle_iscrub( 54 struct xfs_scrub *sc, 55 struct xfs_inode *ip) 56 { 57 int error; 58 59 error = xchk_install_handle_inode(sc, ip); 60 if (error) 61 return error; 62 63 /* 64 * Don't allow scrubbing by handle of any non-directory inode records 65 * in the metadata directory tree. We don't know if any of the scans 66 * launched by this scrubber will end up indirectly trying to lock this 67 * file. 68 * 69 * Scrubbers of inode-rooted metadata files (e.g. quota files) will 70 * attach all the resources needed to scrub the inode and call 71 * xchk_inode directly. Userspace cannot call this directly. 72 */ 73 if (xfs_is_metadir_inode(ip) && !S_ISDIR(VFS_I(ip)->i_mode)) { 74 xchk_irele(sc, ip); 75 sc->ip = NULL; 76 return -ENOENT; 77 } 78 79 return xchk_prepare_iscrub(sc); 80 } 81 82 /* 83 * Grab total control of the inode metadata. In the best case, we grab the 84 * incore inode and take all locks on it. If the incore inode cannot be 85 * constructed due to corruption problems, lock the AGI so that we can single 86 * step the loading process to fix everything that can go wrong. 87 */ 88 int 89 xchk_setup_inode( 90 struct xfs_scrub *sc) 91 { 92 struct xfs_imap imap; 93 struct xfs_inode *ip; 94 struct xfs_mount *mp = sc->mp; 95 struct xfs_inode *ip_in = XFS_I(file_inode(sc->file)); 96 struct xfs_buf *agi_bp; 97 struct xfs_perag *pag; 98 xfs_agnumber_t agno = XFS_INO_TO_AGNO(mp, sc->sm->sm_ino); 99 int error; 100 101 if (xchk_need_intent_drain(sc)) 102 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); 103 104 /* We want to scan the opened inode, so lock it and exit. */ 105 if (sc->sm->sm_ino == 0 || sc->sm->sm_ino == ip_in->i_ino) { 106 error = xchk_install_live_inode(sc, ip_in); 107 if (error) 108 return error; 109 110 return xchk_prepare_iscrub(sc); 111 } 112 113 /* 114 * On pre-metadir filesystems, reject internal metadata files. For 115 * metadir filesystems, limited scrubbing of any file in the metadata 116 * directory tree by handle is allowed, because that is the only way to 117 * validate the lack of parent pointers in the sb-root metadata inodes. 118 */ 119 if (!xfs_has_metadir(mp) && xfs_is_sb_inum(mp, sc->sm->sm_ino)) 120 return -ENOENT; 121 /* Reject obviously bad inode numbers. */ 122 if (!xfs_verify_ino(sc->mp, sc->sm->sm_ino)) 123 return -ENOENT; 124 125 /* Try a safe untrusted iget. */ 126 error = xchk_iget_safe(sc, sc->sm->sm_ino, &ip); 127 if (!error) 128 return xchk_install_handle_iscrub(sc, ip); 129 if (error == -ENOENT) 130 return error; 131 if (error != -EFSCORRUPTED && error != -EFSBADCRC && error != -EINVAL) 132 goto out_error; 133 134 /* 135 * EINVAL with IGET_UNTRUSTED probably means one of several things: 136 * userspace gave us an inode number that doesn't correspond to fs 137 * space; the inode btree lacks a record for this inode; or there is 138 * a record, and it says this inode is free. 139 * 140 * EFSCORRUPTED/EFSBADCRC could mean that the inode was mappable, but 141 * some other metadata corruption (e.g. inode forks) prevented 142 * instantiation of the incore inode. Or it could mean the inobt is 143 * corrupt. 144 * 145 * We want to look up this inode in the inobt directly to distinguish 146 * three different scenarios: (1) the inobt says the inode is free, 147 * in which case there's nothing to do; (2) the inobt is corrupt so we 148 * should flag the corruption and exit to userspace to let it fix the 149 * inobt; and (3) the inobt says the inode is allocated, but loading it 150 * failed due to corruption. 151 * 152 * Allocate a transaction and grab the AGI to prevent inobt activity in 153 * this AG. Retry the iget in case someone allocated a new inode after 154 * the first iget failed. 155 */ 156 error = xchk_trans_alloc(sc, 0); 157 if (error) 158 goto out_error; 159 160 error = xchk_iget_agi(sc, sc->sm->sm_ino, &agi_bp, &ip); 161 if (error == 0) { 162 /* Actually got the incore inode, so install it and proceed. */ 163 xchk_trans_cancel(sc); 164 return xchk_install_handle_iscrub(sc, ip); 165 } 166 if (error == -ENOENT) 167 goto out_gone; 168 if (error != -EFSCORRUPTED && error != -EFSBADCRC && error != -EINVAL) 169 goto out_cancel; 170 171 /* Ensure that we have protected against inode allocation/freeing. */ 172 if (agi_bp == NULL) { 173 ASSERT(agi_bp != NULL); 174 error = -ECANCELED; 175 goto out_cancel; 176 } 177 178 /* 179 * Untrusted iget failed a second time. Let's try an inobt lookup. 180 * If the inobt doesn't think this is an allocated inode then we'll 181 * return ENOENT to signal that the check can be skipped. 182 * 183 * If the lookup signals corruption, we'll mark this inode corrupt and 184 * exit to userspace. There's little chance of fixing anything until 185 * the inobt is straightened out, but there's nothing we can do here. 186 * 187 * If the lookup encounters a runtime error, exit to userspace. 188 */ 189 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, sc->sm->sm_ino)); 190 if (!pag) { 191 error = -EFSCORRUPTED; 192 goto out_cancel; 193 } 194 195 error = xfs_imap(pag, sc->tp, sc->sm->sm_ino, &imap, 196 XFS_IGET_UNTRUSTED); 197 xfs_perag_put(pag); 198 if (error == -EINVAL || error == -ENOENT) 199 goto out_gone; 200 if (error) 201 goto out_cancel; 202 203 /* 204 * The lookup succeeded. Chances are the ondisk inode is corrupt and 205 * preventing iget from reading it. Retain the scrub transaction and 206 * the AGI buffer to prevent anyone from allocating or freeing inodes. 207 * This ensures that we preserve the inconsistency between the inobt 208 * saying the inode is allocated and the icache being unable to load 209 * the inode until we can flag the corruption in xchk_inode. The 210 * scrub function has to note the corruption, since we're not really 211 * supposed to do that from the setup function. Save the mapping to 212 * make repairs to the ondisk inode buffer. 213 */ 214 if (xchk_could_repair(sc)) 215 xrep_setup_inode(sc, &imap); 216 return 0; 217 218 out_cancel: 219 xchk_trans_cancel(sc); 220 out_error: 221 trace_xchk_op_error(sc, agno, XFS_INO_TO_AGBNO(mp, sc->sm->sm_ino), 222 error, __return_address); 223 return error; 224 out_gone: 225 /* The file is gone, so there's nothing to check. */ 226 xchk_trans_cancel(sc); 227 return -ENOENT; 228 } 229 230 /* Inode core */ 231 232 /* Validate di_extsize hint. */ 233 STATIC void 234 xchk_inode_extsize( 235 struct xfs_scrub *sc, 236 struct xfs_dinode *dip, 237 xfs_ino_t ino, 238 uint16_t mode, 239 uint16_t flags) 240 { 241 xfs_failaddr_t fa; 242 uint32_t value = be32_to_cpu(dip->di_extsize); 243 244 fa = xfs_inode_validate_extsize(sc->mp, value, mode, flags); 245 if (fa) 246 xchk_ino_set_corrupt(sc, ino); 247 248 /* 249 * XFS allows a sysadmin to change the rt extent size when adding a rt 250 * section to a filesystem after formatting. If there are any 251 * directories with extszinherit and rtinherit set, the hint could 252 * become misaligned with the new rextsize. The verifier doesn't check 253 * this, because we allow rtinherit directories even without an rt 254 * device. Flag this as an administrative warning since we will clean 255 * this up eventually. 256 */ 257 if ((flags & XFS_DIFLAG_RTINHERIT) && 258 (flags & XFS_DIFLAG_EXTSZINHERIT) && 259 xfs_extlen_to_rtxmod(sc->mp, value) > 0) 260 xchk_ino_set_warning(sc, ino); 261 } 262 263 /* Validate di_cowextsize hint. */ 264 STATIC void 265 xchk_inode_cowextsize( 266 struct xfs_scrub *sc, 267 struct xfs_dinode *dip, 268 xfs_ino_t ino, 269 uint16_t mode, 270 uint16_t flags, 271 uint64_t flags2) 272 { 273 xfs_failaddr_t fa; 274 uint32_t value = be32_to_cpu(dip->di_cowextsize); 275 276 fa = xfs_inode_validate_cowextsize(sc->mp, value, mode, flags, flags2); 277 if (fa) 278 xchk_ino_set_corrupt(sc, ino); 279 280 /* 281 * XFS allows a sysadmin to change the rt extent size when adding a rt 282 * section to a filesystem after formatting. If there are any 283 * directories with cowextsize and rtinherit set, the hint could become 284 * misaligned with the new rextsize. The verifier doesn't check this, 285 * because we allow rtinherit directories even without an rt device. 286 * Flag this as an administrative warning since we will clean this up 287 * eventually. 288 */ 289 if ((flags & XFS_DIFLAG_RTINHERIT) && 290 (flags2 & XFS_DIFLAG2_COWEXTSIZE) && 291 value % sc->mp->m_sb.sb_rextsize > 0) 292 xchk_ino_set_warning(sc, ino); 293 } 294 295 /* Make sure the di_flags make sense for the inode. */ 296 STATIC void 297 xchk_inode_flags( 298 struct xfs_scrub *sc, 299 struct xfs_dinode *dip, 300 xfs_ino_t ino, 301 uint16_t mode, 302 uint16_t flags) 303 { 304 struct xfs_mount *mp = sc->mp; 305 306 /* di_flags are all taken, last bit cannot be used */ 307 if (flags & ~XFS_DIFLAG_ANY) 308 goto bad; 309 310 /* rt flags require rt device */ 311 if ((flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 312 goto bad; 313 314 /* new rt bitmap flag only valid for rbmino */ 315 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 316 goto bad; 317 318 /* directory-only flags */ 319 if ((flags & (XFS_DIFLAG_RTINHERIT | 320 XFS_DIFLAG_EXTSZINHERIT | 321 XFS_DIFLAG_PROJINHERIT | 322 XFS_DIFLAG_NOSYMLINKS)) && 323 !S_ISDIR(mode)) 324 goto bad; 325 326 /* file-only flags */ 327 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 328 !S_ISREG(mode)) 329 goto bad; 330 331 /* filestreams and rt make no sense */ 332 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 333 goto bad; 334 335 return; 336 bad: 337 xchk_ino_set_corrupt(sc, ino); 338 } 339 340 /* Make sure the di_flags2 make sense for the inode. */ 341 STATIC void 342 xchk_inode_flags2( 343 struct xfs_scrub *sc, 344 struct xfs_dinode *dip, 345 xfs_ino_t ino, 346 uint16_t mode, 347 uint16_t flags, 348 uint64_t flags2) 349 { 350 struct xfs_mount *mp = sc->mp; 351 352 /* Unknown di_flags2 could be from a future kernel */ 353 if (flags2 & ~XFS_DIFLAG2_ANY) 354 xchk_ino_set_warning(sc, ino); 355 356 /* reflink flag requires reflink feature */ 357 if ((flags2 & XFS_DIFLAG2_REFLINK) && 358 !xfs_has_reflink(mp)) 359 goto bad; 360 361 /* cowextsize flag is checked w.r.t. mode separately */ 362 363 /* file/dir-only flags */ 364 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 365 goto bad; 366 367 /* file-only flags */ 368 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 369 goto bad; 370 371 /* realtime and reflink don't always go together */ 372 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK) && 373 !xfs_has_rtreflink(mp)) 374 goto bad; 375 376 /* no bigtime iflag without the bigtime feature */ 377 if (xfs_dinode_has_bigtime(dip) && !xfs_has_bigtime(mp)) 378 goto bad; 379 380 /* no large extent counts without the filesystem feature */ 381 if ((flags2 & XFS_DIFLAG2_NREXT64) && !xfs_has_large_extent_counts(mp)) 382 goto bad; 383 384 return; 385 bad: 386 xchk_ino_set_corrupt(sc, ino); 387 } 388 389 static inline void 390 xchk_dinode_nsec( 391 struct xfs_scrub *sc, 392 xfs_ino_t ino, 393 struct xfs_dinode *dip, 394 const xfs_timestamp_t ts) 395 { 396 struct timespec64 tv; 397 398 tv = xfs_inode_from_disk_ts(dip, ts); 399 if (tv.tv_nsec < 0 || tv.tv_nsec >= NSEC_PER_SEC) 400 xchk_ino_set_corrupt(sc, ino); 401 } 402 403 /* Scrub all the ondisk inode fields. */ 404 STATIC void 405 xchk_dinode( 406 struct xfs_scrub *sc, 407 struct xfs_dinode *dip, 408 xfs_ino_t ino) 409 { 410 struct xfs_mount *mp = sc->mp; 411 size_t fork_recs; 412 unsigned long long isize; 413 uint64_t flags2; 414 xfs_extnum_t nextents; 415 xfs_extnum_t naextents; 416 prid_t prid; 417 uint16_t flags; 418 uint16_t mode; 419 420 flags = be16_to_cpu(dip->di_flags); 421 if (dip->di_version >= 3) 422 flags2 = be64_to_cpu(dip->di_flags2); 423 else 424 flags2 = 0; 425 426 /* di_mode */ 427 mode = be16_to_cpu(dip->di_mode); 428 switch (mode & S_IFMT) { 429 case S_IFLNK: 430 case S_IFREG: 431 case S_IFDIR: 432 case S_IFCHR: 433 case S_IFBLK: 434 case S_IFIFO: 435 case S_IFSOCK: 436 /* mode is recognized */ 437 break; 438 default: 439 xchk_ino_set_corrupt(sc, ino); 440 break; 441 } 442 443 /* v1/v2 fields */ 444 switch (dip->di_version) { 445 case 1: 446 /* 447 * We autoconvert v1 inodes into v2 inodes on writeout, 448 * so just mark this inode for preening. 449 */ 450 xchk_ino_set_preen(sc, ino); 451 prid = 0; 452 break; 453 case 2: 454 case 3: 455 if (xfs_dinode_is_metadir(dip)) { 456 if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) 457 xchk_ino_set_corrupt(sc, ino); 458 } else { 459 if (dip->di_metatype != 0) 460 xchk_ino_set_corrupt(sc, ino); 461 } 462 463 if (dip->di_mode == 0 && sc->ip) 464 xchk_ino_set_corrupt(sc, ino); 465 466 if (dip->di_projid_hi != 0 && 467 !xfs_has_projid32(mp)) 468 xchk_ino_set_corrupt(sc, ino); 469 470 prid = be16_to_cpu(dip->di_projid_lo); 471 break; 472 default: 473 xchk_ino_set_corrupt(sc, ino); 474 return; 475 } 476 477 if (xfs_has_projid32(mp)) 478 prid |= (prid_t)be16_to_cpu(dip->di_projid_hi) << 16; 479 480 /* 481 * di_uid/di_gid -- -1 isn't invalid, but there's no way that 482 * userspace could have created that. 483 */ 484 if (dip->di_uid == cpu_to_be32(-1U) || 485 dip->di_gid == cpu_to_be32(-1U)) 486 xchk_ino_set_warning(sc, ino); 487 488 /* 489 * project id of -1 isn't supposed to be valid, but the kernel didn't 490 * always validate that. 491 */ 492 if (prid == -1U) 493 xchk_ino_set_warning(sc, ino); 494 495 /* di_format */ 496 switch (dip->di_format) { 497 case XFS_DINODE_FMT_DEV: 498 if (!S_ISCHR(mode) && !S_ISBLK(mode) && 499 !S_ISFIFO(mode) && !S_ISSOCK(mode)) 500 xchk_ino_set_corrupt(sc, ino); 501 break; 502 case XFS_DINODE_FMT_LOCAL: 503 if (!S_ISDIR(mode) && !S_ISLNK(mode)) 504 xchk_ino_set_corrupt(sc, ino); 505 break; 506 case XFS_DINODE_FMT_EXTENTS: 507 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 508 xchk_ino_set_corrupt(sc, ino); 509 break; 510 case XFS_DINODE_FMT_BTREE: 511 if (!S_ISREG(mode) && !S_ISDIR(mode)) 512 xchk_ino_set_corrupt(sc, ino); 513 break; 514 case XFS_DINODE_FMT_META_BTREE: 515 if (!S_ISREG(mode)) 516 xchk_ino_set_corrupt(sc, ino); 517 break; 518 case XFS_DINODE_FMT_UUID: 519 default: 520 xchk_ino_set_corrupt(sc, ino); 521 break; 522 } 523 524 /* di_[amc]time.nsec */ 525 xchk_dinode_nsec(sc, ino, dip, dip->di_atime); 526 xchk_dinode_nsec(sc, ino, dip, dip->di_mtime); 527 xchk_dinode_nsec(sc, ino, dip, dip->di_ctime); 528 529 /* 530 * di_size. xfs_dinode_verify checks for things that screw up 531 * the VFS such as the upper bit being set and zero-length 532 * symlinks/directories, but we can do more here. 533 */ 534 isize = be64_to_cpu(dip->di_size); 535 if (isize & (1ULL << 63)) 536 xchk_ino_set_corrupt(sc, ino); 537 538 /* Devices, fifos, and sockets must have zero size */ 539 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 540 xchk_ino_set_corrupt(sc, ino); 541 542 /* Directories can't be larger than the data section size (32G) */ 543 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 544 xchk_ino_set_corrupt(sc, ino); 545 546 /* Symlinks can't be larger than SYMLINK_MAXLEN */ 547 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 548 xchk_ino_set_corrupt(sc, ino); 549 550 /* 551 * Warn if the running kernel can't handle the kinds of offsets 552 * needed to deal with the file size. In other words, if the 553 * pagecache can't cache all the blocks in this file due to 554 * overly large offsets, flag the inode for admin review. 555 */ 556 if (isize > mp->m_super->s_maxbytes) 557 xchk_ino_set_warning(sc, ino); 558 559 /* di_nblocks */ 560 if (flags2 & XFS_DIFLAG2_REFLINK) { 561 ; /* nblocks can exceed dblocks */ 562 } else if (flags & XFS_DIFLAG_REALTIME) { 563 /* 564 * nblocks is the sum of data extents (in the rtdev), 565 * attr extents (in the datadev), and both forks' bmbt 566 * blocks (in the datadev). This clumsy check is the 567 * best we can do without cross-referencing with the 568 * inode forks. 569 */ 570 if (be64_to_cpu(dip->di_nblocks) >= 571 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 572 xchk_ino_set_corrupt(sc, ino); 573 } else { 574 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 575 xchk_ino_set_corrupt(sc, ino); 576 } 577 578 xchk_inode_flags(sc, dip, ino, mode, flags); 579 580 xchk_inode_extsize(sc, dip, ino, mode, flags); 581 582 nextents = xfs_dfork_data_extents(dip); 583 naextents = xfs_dfork_attr_extents(dip); 584 585 /* di_nextents */ 586 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 587 switch (dip->di_format) { 588 case XFS_DINODE_FMT_EXTENTS: 589 if (nextents > fork_recs) 590 xchk_ino_set_corrupt(sc, ino); 591 break; 592 case XFS_DINODE_FMT_BTREE: 593 if (nextents <= fork_recs) 594 xchk_ino_set_corrupt(sc, ino); 595 break; 596 default: 597 if (nextents != 0) 598 xchk_ino_set_corrupt(sc, ino); 599 break; 600 } 601 602 /* di_forkoff */ 603 if (XFS_DFORK_BOFF(dip) >= mp->m_sb.sb_inodesize) 604 xchk_ino_set_corrupt(sc, ino); 605 if (naextents != 0 && dip->di_forkoff == 0) 606 xchk_ino_set_corrupt(sc, ino); 607 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 608 xchk_ino_set_corrupt(sc, ino); 609 610 /* di_aformat */ 611 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 612 dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 613 dip->di_aformat != XFS_DINODE_FMT_BTREE) 614 xchk_ino_set_corrupt(sc, ino); 615 616 /* di_anextents */ 617 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 618 switch (dip->di_aformat) { 619 case XFS_DINODE_FMT_EXTENTS: 620 if (naextents > fork_recs) 621 xchk_ino_set_corrupt(sc, ino); 622 break; 623 case XFS_DINODE_FMT_BTREE: 624 if (naextents <= fork_recs) 625 xchk_ino_set_corrupt(sc, ino); 626 break; 627 default: 628 if (naextents != 0) 629 xchk_ino_set_corrupt(sc, ino); 630 } 631 632 if (dip->di_version >= 3) { 633 xchk_dinode_nsec(sc, ino, dip, dip->di_crtime); 634 xchk_inode_flags2(sc, dip, ino, mode, flags, flags2); 635 xchk_inode_cowextsize(sc, dip, ino, mode, flags, 636 flags2); 637 } 638 } 639 640 /* 641 * Make sure the finobt doesn't think this inode is free. 642 * We don't have to check the inobt ourselves because we got the inode via 643 * IGET_UNTRUSTED, which checks the inobt for us. 644 */ 645 static void 646 xchk_inode_xref_finobt( 647 struct xfs_scrub *sc, 648 xfs_ino_t ino) 649 { 650 struct xfs_inobt_rec_incore rec; 651 xfs_agino_t agino; 652 int has_record; 653 int error; 654 655 if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm)) 656 return; 657 658 agino = XFS_INO_TO_AGINO(sc->mp, ino); 659 660 /* 661 * Try to get the finobt record. If we can't get it, then we're 662 * in good shape. 663 */ 664 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, 665 &has_record); 666 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 667 !has_record) 668 return; 669 670 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); 671 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 672 !has_record) 673 return; 674 675 /* 676 * Otherwise, make sure this record either doesn't cover this inode, 677 * or that it does but it's marked present. 678 */ 679 if (rec.ir_startino > agino || 680 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) 681 return; 682 683 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) 684 xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); 685 } 686 687 /* Cross reference the inode fields with the forks. */ 688 STATIC void 689 xchk_inode_xref_bmap( 690 struct xfs_scrub *sc, 691 struct xfs_dinode *dip) 692 { 693 xfs_extnum_t nextents; 694 xfs_filblks_t count; 695 xfs_filblks_t acount; 696 int error; 697 698 if (xchk_skip_xref(sc->sm)) 699 return; 700 701 /* Walk all the extents to check nextents/naextents/nblocks. */ 702 error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count); 703 if (!xchk_should_check_xref(sc, &error, NULL)) 704 return; 705 if (nextents < xfs_dfork_data_extents(dip)) 706 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 707 708 error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents, &acount); 709 if (!xchk_should_check_xref(sc, &error, NULL)) 710 return; 711 if (nextents != xfs_dfork_attr_extents(dip)) 712 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 713 714 /* Check nblocks against the inode. */ 715 if (count + acount != be64_to_cpu(dip->di_nblocks)) 716 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 717 } 718 719 /* Cross-reference with the other btrees. */ 720 STATIC void 721 xchk_inode_xref( 722 struct xfs_scrub *sc, 723 xfs_ino_t ino, 724 struct xfs_dinode *dip) 725 { 726 xfs_agnumber_t agno; 727 xfs_agblock_t agbno; 728 int error; 729 730 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 731 return; 732 733 agno = XFS_INO_TO_AGNO(sc->mp, ino); 734 agbno = XFS_INO_TO_AGBNO(sc->mp, ino); 735 736 error = xchk_ag_init_existing(sc, agno, &sc->sa); 737 if (!xchk_xref_process_error(sc, agno, agbno, &error)) 738 goto out_free; 739 740 xchk_xref_is_used_space(sc, agbno, 1); 741 xchk_inode_xref_finobt(sc, ino); 742 xchk_xref_is_only_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES); 743 xchk_xref_is_not_shared(sc, agbno, 1); 744 xchk_xref_is_not_cow_staging(sc, agbno, 1); 745 xchk_inode_xref_bmap(sc, dip); 746 747 out_free: 748 xchk_ag_free(sc, &sc->sa); 749 } 750 751 /* 752 * If the reflink iflag disagrees with a scan for shared data fork extents, 753 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o 754 * any shared extents). We already checked for reflink iflag set on a non 755 * reflink filesystem. 756 */ 757 static void 758 xchk_inode_check_reflink_iflag( 759 struct xfs_scrub *sc, 760 xfs_ino_t ino) 761 { 762 struct xfs_mount *mp = sc->mp; 763 bool has_shared; 764 int error; 765 766 if (!xfs_has_reflink(mp)) 767 return; 768 769 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 770 &has_shared); 771 if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 772 XFS_INO_TO_AGBNO(mp, ino), &error)) 773 return; 774 if (xfs_is_reflink_inode(sc->ip) && !has_shared) 775 xchk_ino_set_preen(sc, ino); 776 else if (!xfs_is_reflink_inode(sc->ip) && has_shared) 777 xchk_ino_set_corrupt(sc, ino); 778 } 779 780 /* 781 * If this inode has zero link count, it must be on the unlinked list. If 782 * it has nonzero link count, it must not be on the unlinked list. 783 */ 784 STATIC void 785 xchk_inode_check_unlinked( 786 struct xfs_scrub *sc) 787 { 788 if (VFS_I(sc->ip)->i_nlink == 0) { 789 if (!xfs_inode_on_unlinked_list(sc->ip)) 790 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 791 } else { 792 if (xfs_inode_on_unlinked_list(sc->ip)) 793 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 794 } 795 } 796 797 /* Scrub an inode. */ 798 int 799 xchk_inode( 800 struct xfs_scrub *sc) 801 { 802 struct xfs_dinode di; 803 int error = 0; 804 805 /* 806 * If sc->ip is NULL, that means that the setup function called 807 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED 808 * and a NULL inode, so flag the corruption error and return. 809 */ 810 if (!sc->ip) { 811 xchk_ino_set_corrupt(sc, sc->sm->sm_ino); 812 return 0; 813 } 814 815 /* Scrub the inode core. */ 816 xfs_inode_to_disk(sc->ip, &di, 0); 817 xchk_dinode(sc, &di, sc->ip->i_ino); 818 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 819 goto out; 820 821 /* 822 * Look for discrepancies between file's data blocks and the reflink 823 * iflag. We already checked the iflag against the file mode when 824 * we scrubbed the dinode. 825 */ 826 if (S_ISREG(VFS_I(sc->ip)->i_mode)) 827 xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino); 828 829 xchk_inode_check_unlinked(sc); 830 831 xchk_inode_xref(sc, sc->ip->i_ino, &di); 832 out: 833 return error; 834 } 835