1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2017 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_log_format.h" 14 #include "xfs_inode.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_da_format.h" 17 #include "xfs_reflink.h" 18 #include "xfs_rmap.h" 19 #include "xfs_bmap_util.h" 20 #include "scrub/scrub.h" 21 #include "scrub/common.h" 22 #include "scrub/btree.h" 23 24 /* 25 * Grab total control of the inode metadata. It doesn't matter here if 26 * the file data is still changing; exclusive access to the metadata is 27 * the goal. 28 */ 29 int 30 xchk_setup_inode( 31 struct xfs_scrub *sc) 32 { 33 int error; 34 35 /* 36 * Try to get the inode. If the verifiers fail, we try again 37 * in raw mode. 38 */ 39 error = xchk_get_inode(sc); 40 switch (error) { 41 case 0: 42 break; 43 case -EFSCORRUPTED: 44 case -EFSBADCRC: 45 return xchk_trans_alloc(sc, 0); 46 default: 47 return error; 48 } 49 50 /* Got the inode, lock it and we're ready to go. */ 51 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 52 xfs_ilock(sc->ip, sc->ilock_flags); 53 error = xchk_trans_alloc(sc, 0); 54 if (error) 55 goto out; 56 sc->ilock_flags |= XFS_ILOCK_EXCL; 57 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 58 59 out: 60 /* scrub teardown will unlock and release the inode for us */ 61 return error; 62 } 63 64 /* Inode core */ 65 66 /* Validate di_extsize hint. */ 67 STATIC void 68 xchk_inode_extsize( 69 struct xfs_scrub *sc, 70 struct xfs_dinode *dip, 71 xfs_ino_t ino, 72 uint16_t mode, 73 uint16_t flags) 74 { 75 xfs_failaddr_t fa; 76 77 fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), 78 mode, flags); 79 if (fa) 80 xchk_ino_set_corrupt(sc, ino); 81 } 82 83 /* 84 * Validate di_cowextsize hint. 85 * 86 * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 87 * These functions must be kept in sync with each other. 88 */ 89 STATIC void 90 xchk_inode_cowextsize( 91 struct xfs_scrub *sc, 92 struct xfs_dinode *dip, 93 xfs_ino_t ino, 94 uint16_t mode, 95 uint16_t flags, 96 uint64_t flags2) 97 { 98 xfs_failaddr_t fa; 99 100 fa = xfs_inode_validate_cowextsize(sc->mp, 101 be32_to_cpu(dip->di_cowextsize), mode, flags, 102 flags2); 103 if (fa) 104 xchk_ino_set_corrupt(sc, ino); 105 } 106 107 /* Make sure the di_flags make sense for the inode. */ 108 STATIC void 109 xchk_inode_flags( 110 struct xfs_scrub *sc, 111 struct xfs_dinode *dip, 112 xfs_ino_t ino, 113 uint16_t mode, 114 uint16_t flags) 115 { 116 struct xfs_mount *mp = sc->mp; 117 118 /* di_flags are all taken, last bit cannot be used */ 119 if (flags & ~XFS_DIFLAG_ANY) 120 goto bad; 121 122 /* rt flags require rt device */ 123 if ((flags & XFS_DIFLAG_REALTIME) && !mp->m_rtdev_targp) 124 goto bad; 125 126 /* new rt bitmap flag only valid for rbmino */ 127 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 128 goto bad; 129 130 /* directory-only flags */ 131 if ((flags & (XFS_DIFLAG_RTINHERIT | 132 XFS_DIFLAG_EXTSZINHERIT | 133 XFS_DIFLAG_PROJINHERIT | 134 XFS_DIFLAG_NOSYMLINKS)) && 135 !S_ISDIR(mode)) 136 goto bad; 137 138 /* file-only flags */ 139 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 140 !S_ISREG(mode)) 141 goto bad; 142 143 /* filestreams and rt make no sense */ 144 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 145 goto bad; 146 147 return; 148 bad: 149 xchk_ino_set_corrupt(sc, ino); 150 } 151 152 /* Make sure the di_flags2 make sense for the inode. */ 153 STATIC void 154 xchk_inode_flags2( 155 struct xfs_scrub *sc, 156 struct xfs_dinode *dip, 157 xfs_ino_t ino, 158 uint16_t mode, 159 uint16_t flags, 160 uint64_t flags2) 161 { 162 struct xfs_mount *mp = sc->mp; 163 164 /* Unknown di_flags2 could be from a future kernel */ 165 if (flags2 & ~XFS_DIFLAG2_ANY) 166 xchk_ino_set_warning(sc, ino); 167 168 /* reflink flag requires reflink feature */ 169 if ((flags2 & XFS_DIFLAG2_REFLINK) && 170 !xfs_sb_version_hasreflink(&mp->m_sb)) 171 goto bad; 172 173 /* cowextsize flag is checked w.r.t. mode separately */ 174 175 /* file/dir-only flags */ 176 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 177 goto bad; 178 179 /* file-only flags */ 180 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 181 goto bad; 182 183 /* realtime and reflink make no sense, currently */ 184 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) 185 goto bad; 186 187 /* no bigtime iflag without the bigtime feature */ 188 if (xfs_dinode_has_bigtime(dip) && 189 !xfs_sb_version_hasbigtime(&mp->m_sb)) 190 goto bad; 191 192 return; 193 bad: 194 xchk_ino_set_corrupt(sc, ino); 195 } 196 197 static inline void 198 xchk_dinode_nsec( 199 struct xfs_scrub *sc, 200 xfs_ino_t ino, 201 struct xfs_dinode *dip, 202 const xfs_timestamp_t ts) 203 { 204 struct timespec64 tv; 205 206 tv = xfs_inode_from_disk_ts(dip, ts); 207 if (tv.tv_nsec < 0 || tv.tv_nsec >= NSEC_PER_SEC) 208 xchk_ino_set_corrupt(sc, ino); 209 } 210 211 /* Scrub all the ondisk inode fields. */ 212 STATIC void 213 xchk_dinode( 214 struct xfs_scrub *sc, 215 struct xfs_dinode *dip, 216 xfs_ino_t ino) 217 { 218 struct xfs_mount *mp = sc->mp; 219 size_t fork_recs; 220 unsigned long long isize; 221 uint64_t flags2; 222 uint32_t nextents; 223 uint16_t flags; 224 uint16_t mode; 225 226 flags = be16_to_cpu(dip->di_flags); 227 if (dip->di_version >= 3) 228 flags2 = be64_to_cpu(dip->di_flags2); 229 else 230 flags2 = 0; 231 232 /* di_mode */ 233 mode = be16_to_cpu(dip->di_mode); 234 switch (mode & S_IFMT) { 235 case S_IFLNK: 236 case S_IFREG: 237 case S_IFDIR: 238 case S_IFCHR: 239 case S_IFBLK: 240 case S_IFIFO: 241 case S_IFSOCK: 242 /* mode is recognized */ 243 break; 244 default: 245 xchk_ino_set_corrupt(sc, ino); 246 break; 247 } 248 249 /* v1/v2 fields */ 250 switch (dip->di_version) { 251 case 1: 252 /* 253 * We autoconvert v1 inodes into v2 inodes on writeout, 254 * so just mark this inode for preening. 255 */ 256 xchk_ino_set_preen(sc, ino); 257 break; 258 case 2: 259 case 3: 260 if (dip->di_onlink != 0) 261 xchk_ino_set_corrupt(sc, ino); 262 263 if (dip->di_mode == 0 && sc->ip) 264 xchk_ino_set_corrupt(sc, ino); 265 266 if (dip->di_projid_hi != 0 && 267 !xfs_sb_version_hasprojid32bit(&mp->m_sb)) 268 xchk_ino_set_corrupt(sc, ino); 269 break; 270 default: 271 xchk_ino_set_corrupt(sc, ino); 272 return; 273 } 274 275 /* 276 * di_uid/di_gid -- -1 isn't invalid, but there's no way that 277 * userspace could have created that. 278 */ 279 if (dip->di_uid == cpu_to_be32(-1U) || 280 dip->di_gid == cpu_to_be32(-1U)) 281 xchk_ino_set_warning(sc, ino); 282 283 /* di_format */ 284 switch (dip->di_format) { 285 case XFS_DINODE_FMT_DEV: 286 if (!S_ISCHR(mode) && !S_ISBLK(mode) && 287 !S_ISFIFO(mode) && !S_ISSOCK(mode)) 288 xchk_ino_set_corrupt(sc, ino); 289 break; 290 case XFS_DINODE_FMT_LOCAL: 291 if (!S_ISDIR(mode) && !S_ISLNK(mode)) 292 xchk_ino_set_corrupt(sc, ino); 293 break; 294 case XFS_DINODE_FMT_EXTENTS: 295 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 296 xchk_ino_set_corrupt(sc, ino); 297 break; 298 case XFS_DINODE_FMT_BTREE: 299 if (!S_ISREG(mode) && !S_ISDIR(mode)) 300 xchk_ino_set_corrupt(sc, ino); 301 break; 302 case XFS_DINODE_FMT_UUID: 303 default: 304 xchk_ino_set_corrupt(sc, ino); 305 break; 306 } 307 308 /* di_[amc]time.nsec */ 309 xchk_dinode_nsec(sc, ino, dip, dip->di_atime); 310 xchk_dinode_nsec(sc, ino, dip, dip->di_mtime); 311 xchk_dinode_nsec(sc, ino, dip, dip->di_ctime); 312 313 /* 314 * di_size. xfs_dinode_verify checks for things that screw up 315 * the VFS such as the upper bit being set and zero-length 316 * symlinks/directories, but we can do more here. 317 */ 318 isize = be64_to_cpu(dip->di_size); 319 if (isize & (1ULL << 63)) 320 xchk_ino_set_corrupt(sc, ino); 321 322 /* Devices, fifos, and sockets must have zero size */ 323 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 324 xchk_ino_set_corrupt(sc, ino); 325 326 /* Directories can't be larger than the data section size (32G) */ 327 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 328 xchk_ino_set_corrupt(sc, ino); 329 330 /* Symlinks can't be larger than SYMLINK_MAXLEN */ 331 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 332 xchk_ino_set_corrupt(sc, ino); 333 334 /* 335 * Warn if the running kernel can't handle the kinds of offsets 336 * needed to deal with the file size. In other words, if the 337 * pagecache can't cache all the blocks in this file due to 338 * overly large offsets, flag the inode for admin review. 339 */ 340 if (isize >= mp->m_super->s_maxbytes) 341 xchk_ino_set_warning(sc, ino); 342 343 /* di_nblocks */ 344 if (flags2 & XFS_DIFLAG2_REFLINK) { 345 ; /* nblocks can exceed dblocks */ 346 } else if (flags & XFS_DIFLAG_REALTIME) { 347 /* 348 * nblocks is the sum of data extents (in the rtdev), 349 * attr extents (in the datadev), and both forks' bmbt 350 * blocks (in the datadev). This clumsy check is the 351 * best we can do without cross-referencing with the 352 * inode forks. 353 */ 354 if (be64_to_cpu(dip->di_nblocks) >= 355 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 356 xchk_ino_set_corrupt(sc, ino); 357 } else { 358 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 359 xchk_ino_set_corrupt(sc, ino); 360 } 361 362 xchk_inode_flags(sc, dip, ino, mode, flags); 363 364 xchk_inode_extsize(sc, dip, ino, mode, flags); 365 366 /* di_nextents */ 367 nextents = be32_to_cpu(dip->di_nextents); 368 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 369 switch (dip->di_format) { 370 case XFS_DINODE_FMT_EXTENTS: 371 if (nextents > fork_recs) 372 xchk_ino_set_corrupt(sc, ino); 373 break; 374 case XFS_DINODE_FMT_BTREE: 375 if (nextents <= fork_recs) 376 xchk_ino_set_corrupt(sc, ino); 377 break; 378 default: 379 if (nextents != 0) 380 xchk_ino_set_corrupt(sc, ino); 381 break; 382 } 383 384 /* di_forkoff */ 385 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) 386 xchk_ino_set_corrupt(sc, ino); 387 if (dip->di_anextents != 0 && dip->di_forkoff == 0) 388 xchk_ino_set_corrupt(sc, ino); 389 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 390 xchk_ino_set_corrupt(sc, ino); 391 392 /* di_aformat */ 393 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 394 dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 395 dip->di_aformat != XFS_DINODE_FMT_BTREE) 396 xchk_ino_set_corrupt(sc, ino); 397 398 /* di_anextents */ 399 nextents = be16_to_cpu(dip->di_anextents); 400 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 401 switch (dip->di_aformat) { 402 case XFS_DINODE_FMT_EXTENTS: 403 if (nextents > fork_recs) 404 xchk_ino_set_corrupt(sc, ino); 405 break; 406 case XFS_DINODE_FMT_BTREE: 407 if (nextents <= fork_recs) 408 xchk_ino_set_corrupt(sc, ino); 409 break; 410 default: 411 if (nextents != 0) 412 xchk_ino_set_corrupt(sc, ino); 413 } 414 415 if (dip->di_version >= 3) { 416 xchk_dinode_nsec(sc, ino, dip, dip->di_crtime); 417 xchk_inode_flags2(sc, dip, ino, mode, flags, flags2); 418 xchk_inode_cowextsize(sc, dip, ino, mode, flags, 419 flags2); 420 } 421 } 422 423 /* 424 * Make sure the finobt doesn't think this inode is free. 425 * We don't have to check the inobt ourselves because we got the inode via 426 * IGET_UNTRUSTED, which checks the inobt for us. 427 */ 428 static void 429 xchk_inode_xref_finobt( 430 struct xfs_scrub *sc, 431 xfs_ino_t ino) 432 { 433 struct xfs_inobt_rec_incore rec; 434 xfs_agino_t agino; 435 int has_record; 436 int error; 437 438 if (!sc->sa.fino_cur || xchk_skip_xref(sc->sm)) 439 return; 440 441 agino = XFS_INO_TO_AGINO(sc->mp, ino); 442 443 /* 444 * Try to get the finobt record. If we can't get it, then we're 445 * in good shape. 446 */ 447 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, 448 &has_record); 449 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 450 !has_record) 451 return; 452 453 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); 454 if (!xchk_should_check_xref(sc, &error, &sc->sa.fino_cur) || 455 !has_record) 456 return; 457 458 /* 459 * Otherwise, make sure this record either doesn't cover this inode, 460 * or that it does but it's marked present. 461 */ 462 if (rec.ir_startino > agino || 463 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) 464 return; 465 466 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) 467 xchk_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); 468 } 469 470 /* Cross reference the inode fields with the forks. */ 471 STATIC void 472 xchk_inode_xref_bmap( 473 struct xfs_scrub *sc, 474 struct xfs_dinode *dip) 475 { 476 xfs_extnum_t nextents; 477 xfs_filblks_t count; 478 xfs_filblks_t acount; 479 int error; 480 481 if (xchk_skip_xref(sc->sm)) 482 return; 483 484 /* Walk all the extents to check nextents/naextents/nblocks. */ 485 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, 486 &nextents, &count); 487 if (!xchk_should_check_xref(sc, &error, NULL)) 488 return; 489 if (nextents < be32_to_cpu(dip->di_nextents)) 490 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 491 492 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, 493 &nextents, &acount); 494 if (!xchk_should_check_xref(sc, &error, NULL)) 495 return; 496 if (nextents != be16_to_cpu(dip->di_anextents)) 497 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 498 499 /* Check nblocks against the inode. */ 500 if (count + acount != be64_to_cpu(dip->di_nblocks)) 501 xchk_ino_xref_set_corrupt(sc, sc->ip->i_ino); 502 } 503 504 /* Cross-reference with the other btrees. */ 505 STATIC void 506 xchk_inode_xref( 507 struct xfs_scrub *sc, 508 xfs_ino_t ino, 509 struct xfs_dinode *dip) 510 { 511 xfs_agnumber_t agno; 512 xfs_agblock_t agbno; 513 int error; 514 515 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 516 return; 517 518 agno = XFS_INO_TO_AGNO(sc->mp, ino); 519 agbno = XFS_INO_TO_AGBNO(sc->mp, ino); 520 521 error = xchk_ag_init(sc, agno, &sc->sa); 522 if (!xchk_xref_process_error(sc, agno, agbno, &error)) 523 return; 524 525 xchk_xref_is_used_space(sc, agbno, 1); 526 xchk_inode_xref_finobt(sc, ino); 527 xchk_xref_is_owned_by(sc, agbno, 1, &XFS_RMAP_OINFO_INODES); 528 xchk_xref_is_not_shared(sc, agbno, 1); 529 xchk_inode_xref_bmap(sc, dip); 530 531 xchk_ag_free(sc, &sc->sa); 532 } 533 534 /* 535 * If the reflink iflag disagrees with a scan for shared data fork extents, 536 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o 537 * any shared extents). We already checked for reflink iflag set on a non 538 * reflink filesystem. 539 */ 540 static void 541 xchk_inode_check_reflink_iflag( 542 struct xfs_scrub *sc, 543 xfs_ino_t ino) 544 { 545 struct xfs_mount *mp = sc->mp; 546 bool has_shared; 547 int error; 548 549 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 550 return; 551 552 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 553 &has_shared); 554 if (!xchk_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 555 XFS_INO_TO_AGBNO(mp, ino), &error)) 556 return; 557 if (xfs_is_reflink_inode(sc->ip) && !has_shared) 558 xchk_ino_set_preen(sc, ino); 559 else if (!xfs_is_reflink_inode(sc->ip) && has_shared) 560 xchk_ino_set_corrupt(sc, ino); 561 } 562 563 /* Scrub an inode. */ 564 int 565 xchk_inode( 566 struct xfs_scrub *sc) 567 { 568 struct xfs_dinode di; 569 int error = 0; 570 571 /* 572 * If sc->ip is NULL, that means that the setup function called 573 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED 574 * and a NULL inode, so flag the corruption error and return. 575 */ 576 if (!sc->ip) { 577 xchk_ino_set_corrupt(sc, sc->sm->sm_ino); 578 return 0; 579 } 580 581 /* Scrub the inode core. */ 582 xfs_inode_to_disk(sc->ip, &di, 0); 583 xchk_dinode(sc, &di, sc->ip->i_ino); 584 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 585 goto out; 586 587 /* 588 * Look for discrepancies between file's data blocks and the reflink 589 * iflag. We already checked the iflag against the file mode when 590 * we scrubbed the dinode. 591 */ 592 if (S_ISREG(VFS_I(sc->ip)->i_mode)) 593 xchk_inode_check_reflink_iflag(sc, sc->ip->i_ino); 594 595 xchk_inode_xref(sc, sc->ip->i_ino, &di); 596 out: 597 return error; 598 } 599