1 /* 2 * Copyright (C) 2017 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_trans_resv.h" 25 #include "xfs_mount.h" 26 #include "xfs_defer.h" 27 #include "xfs_btree.h" 28 #include "xfs_bit.h" 29 #include "xfs_log_format.h" 30 #include "xfs_trans.h" 31 #include "xfs_sb.h" 32 #include "xfs_inode.h" 33 #include "xfs_icache.h" 34 #include "xfs_inode_buf.h" 35 #include "xfs_inode_fork.h" 36 #include "xfs_ialloc.h" 37 #include "xfs_da_format.h" 38 #include "xfs_reflink.h" 39 #include "xfs_rmap.h" 40 #include "xfs_bmap.h" 41 #include "xfs_bmap_util.h" 42 #include "scrub/xfs_scrub.h" 43 #include "scrub/scrub.h" 44 #include "scrub/common.h" 45 #include "scrub/btree.h" 46 #include "scrub/trace.h" 47 48 /* 49 * Grab total control of the inode metadata. It doesn't matter here if 50 * the file data is still changing; exclusive access to the metadata is 51 * the goal. 52 */ 53 int 54 xfs_scrub_setup_inode( 55 struct xfs_scrub_context *sc, 56 struct xfs_inode *ip) 57 { 58 int error; 59 60 /* 61 * Try to get the inode. If the verifiers fail, we try again 62 * in raw mode. 63 */ 64 error = xfs_scrub_get_inode(sc, ip); 65 switch (error) { 66 case 0: 67 break; 68 case -EFSCORRUPTED: 69 case -EFSBADCRC: 70 return xfs_scrub_trans_alloc(sc, 0); 71 default: 72 return error; 73 } 74 75 /* Got the inode, lock it and we're ready to go. */ 76 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 77 xfs_ilock(sc->ip, sc->ilock_flags); 78 error = xfs_scrub_trans_alloc(sc, 0); 79 if (error) 80 goto out; 81 sc->ilock_flags |= XFS_ILOCK_EXCL; 82 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 83 84 out: 85 /* scrub teardown will unlock and release the inode for us */ 86 return error; 87 } 88 89 /* Inode core */ 90 91 /* Validate di_extsize hint. */ 92 STATIC void 93 xfs_scrub_inode_extsize( 94 struct xfs_scrub_context *sc, 95 struct xfs_dinode *dip, 96 xfs_ino_t ino, 97 uint16_t mode, 98 uint16_t flags) 99 { 100 xfs_failaddr_t fa; 101 102 fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize), 103 mode, flags); 104 if (fa) 105 xfs_scrub_ino_set_corrupt(sc, ino); 106 } 107 108 /* 109 * Validate di_cowextsize hint. 110 * 111 * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 112 * These functions must be kept in sync with each other. 113 */ 114 STATIC void 115 xfs_scrub_inode_cowextsize( 116 struct xfs_scrub_context *sc, 117 struct xfs_dinode *dip, 118 xfs_ino_t ino, 119 uint16_t mode, 120 uint16_t flags, 121 uint64_t flags2) 122 { 123 xfs_failaddr_t fa; 124 125 fa = xfs_inode_validate_cowextsize(sc->mp, 126 be32_to_cpu(dip->di_cowextsize), mode, flags, 127 flags2); 128 if (fa) 129 xfs_scrub_ino_set_corrupt(sc, ino); 130 } 131 132 /* Make sure the di_flags make sense for the inode. */ 133 STATIC void 134 xfs_scrub_inode_flags( 135 struct xfs_scrub_context *sc, 136 struct xfs_dinode *dip, 137 xfs_ino_t ino, 138 uint16_t mode, 139 uint16_t flags) 140 { 141 struct xfs_mount *mp = sc->mp; 142 143 if (flags & ~XFS_DIFLAG_ANY) 144 goto bad; 145 146 /* rt flags require rt device */ 147 if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) && 148 !mp->m_rtdev_targp) 149 goto bad; 150 151 /* new rt bitmap flag only valid for rbmino */ 152 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 153 goto bad; 154 155 /* directory-only flags */ 156 if ((flags & (XFS_DIFLAG_RTINHERIT | 157 XFS_DIFLAG_EXTSZINHERIT | 158 XFS_DIFLAG_PROJINHERIT | 159 XFS_DIFLAG_NOSYMLINKS)) && 160 !S_ISDIR(mode)) 161 goto bad; 162 163 /* file-only flags */ 164 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 165 !S_ISREG(mode)) 166 goto bad; 167 168 /* filestreams and rt make no sense */ 169 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 170 goto bad; 171 172 return; 173 bad: 174 xfs_scrub_ino_set_corrupt(sc, ino); 175 } 176 177 /* Make sure the di_flags2 make sense for the inode. */ 178 STATIC void 179 xfs_scrub_inode_flags2( 180 struct xfs_scrub_context *sc, 181 struct xfs_dinode *dip, 182 xfs_ino_t ino, 183 uint16_t mode, 184 uint16_t flags, 185 uint64_t flags2) 186 { 187 struct xfs_mount *mp = sc->mp; 188 189 if (flags2 & ~XFS_DIFLAG2_ANY) 190 goto bad; 191 192 /* reflink flag requires reflink feature */ 193 if ((flags2 & XFS_DIFLAG2_REFLINK) && 194 !xfs_sb_version_hasreflink(&mp->m_sb)) 195 goto bad; 196 197 /* cowextsize flag is checked w.r.t. mode separately */ 198 199 /* file/dir-only flags */ 200 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 201 goto bad; 202 203 /* file-only flags */ 204 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 205 goto bad; 206 207 /* realtime and reflink make no sense, currently */ 208 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) 209 goto bad; 210 211 /* dax and reflink make no sense, currently */ 212 if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) 213 goto bad; 214 215 return; 216 bad: 217 xfs_scrub_ino_set_corrupt(sc, ino); 218 } 219 220 /* Scrub all the ondisk inode fields. */ 221 STATIC void 222 xfs_scrub_dinode( 223 struct xfs_scrub_context *sc, 224 struct xfs_dinode *dip, 225 xfs_ino_t ino) 226 { 227 struct xfs_mount *mp = sc->mp; 228 size_t fork_recs; 229 unsigned long long isize; 230 uint64_t flags2; 231 uint32_t nextents; 232 uint16_t flags; 233 uint16_t mode; 234 235 flags = be16_to_cpu(dip->di_flags); 236 if (dip->di_version >= 3) 237 flags2 = be64_to_cpu(dip->di_flags2); 238 else 239 flags2 = 0; 240 241 /* di_mode */ 242 mode = be16_to_cpu(dip->di_mode); 243 switch (mode & S_IFMT) { 244 case S_IFLNK: 245 case S_IFREG: 246 case S_IFDIR: 247 case S_IFCHR: 248 case S_IFBLK: 249 case S_IFIFO: 250 case S_IFSOCK: 251 /* mode is recognized */ 252 break; 253 default: 254 xfs_scrub_ino_set_corrupt(sc, ino); 255 break; 256 } 257 258 /* v1/v2 fields */ 259 switch (dip->di_version) { 260 case 1: 261 /* 262 * We autoconvert v1 inodes into v2 inodes on writeout, 263 * so just mark this inode for preening. 264 */ 265 xfs_scrub_ino_set_preen(sc, ino); 266 break; 267 case 2: 268 case 3: 269 if (dip->di_onlink != 0) 270 xfs_scrub_ino_set_corrupt(sc, ino); 271 272 if (dip->di_mode == 0 && sc->ip) 273 xfs_scrub_ino_set_corrupt(sc, ino); 274 275 if (dip->di_projid_hi != 0 && 276 !xfs_sb_version_hasprojid32bit(&mp->m_sb)) 277 xfs_scrub_ino_set_corrupt(sc, ino); 278 break; 279 default: 280 xfs_scrub_ino_set_corrupt(sc, ino); 281 return; 282 } 283 284 /* 285 * di_uid/di_gid -- -1 isn't invalid, but there's no way that 286 * userspace could have created that. 287 */ 288 if (dip->di_uid == cpu_to_be32(-1U) || 289 dip->di_gid == cpu_to_be32(-1U)) 290 xfs_scrub_ino_set_warning(sc, ino); 291 292 /* di_format */ 293 switch (dip->di_format) { 294 case XFS_DINODE_FMT_DEV: 295 if (!S_ISCHR(mode) && !S_ISBLK(mode) && 296 !S_ISFIFO(mode) && !S_ISSOCK(mode)) 297 xfs_scrub_ino_set_corrupt(sc, ino); 298 break; 299 case XFS_DINODE_FMT_LOCAL: 300 if (!S_ISDIR(mode) && !S_ISLNK(mode)) 301 xfs_scrub_ino_set_corrupt(sc, ino); 302 break; 303 case XFS_DINODE_FMT_EXTENTS: 304 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 305 xfs_scrub_ino_set_corrupt(sc, ino); 306 break; 307 case XFS_DINODE_FMT_BTREE: 308 if (!S_ISREG(mode) && !S_ISDIR(mode)) 309 xfs_scrub_ino_set_corrupt(sc, ino); 310 break; 311 case XFS_DINODE_FMT_UUID: 312 default: 313 xfs_scrub_ino_set_corrupt(sc, ino); 314 break; 315 } 316 317 /* di_[amc]time.nsec */ 318 if (be32_to_cpu(dip->di_atime.t_nsec) >= NSEC_PER_SEC) 319 xfs_scrub_ino_set_corrupt(sc, ino); 320 if (be32_to_cpu(dip->di_mtime.t_nsec) >= NSEC_PER_SEC) 321 xfs_scrub_ino_set_corrupt(sc, ino); 322 if (be32_to_cpu(dip->di_ctime.t_nsec) >= NSEC_PER_SEC) 323 xfs_scrub_ino_set_corrupt(sc, ino); 324 325 /* 326 * di_size. xfs_dinode_verify checks for things that screw up 327 * the VFS such as the upper bit being set and zero-length 328 * symlinks/directories, but we can do more here. 329 */ 330 isize = be64_to_cpu(dip->di_size); 331 if (isize & (1ULL << 63)) 332 xfs_scrub_ino_set_corrupt(sc, ino); 333 334 /* Devices, fifos, and sockets must have zero size */ 335 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 336 xfs_scrub_ino_set_corrupt(sc, ino); 337 338 /* Directories can't be larger than the data section size (32G) */ 339 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 340 xfs_scrub_ino_set_corrupt(sc, ino); 341 342 /* Symlinks can't be larger than SYMLINK_MAXLEN */ 343 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 344 xfs_scrub_ino_set_corrupt(sc, ino); 345 346 /* 347 * Warn if the running kernel can't handle the kinds of offsets 348 * needed to deal with the file size. In other words, if the 349 * pagecache can't cache all the blocks in this file due to 350 * overly large offsets, flag the inode for admin review. 351 */ 352 if (isize >= mp->m_super->s_maxbytes) 353 xfs_scrub_ino_set_warning(sc, ino); 354 355 /* di_nblocks */ 356 if (flags2 & XFS_DIFLAG2_REFLINK) { 357 ; /* nblocks can exceed dblocks */ 358 } else if (flags & XFS_DIFLAG_REALTIME) { 359 /* 360 * nblocks is the sum of data extents (in the rtdev), 361 * attr extents (in the datadev), and both forks' bmbt 362 * blocks (in the datadev). This clumsy check is the 363 * best we can do without cross-referencing with the 364 * inode forks. 365 */ 366 if (be64_to_cpu(dip->di_nblocks) >= 367 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 368 xfs_scrub_ino_set_corrupt(sc, ino); 369 } else { 370 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 371 xfs_scrub_ino_set_corrupt(sc, ino); 372 } 373 374 xfs_scrub_inode_flags(sc, dip, ino, mode, flags); 375 376 xfs_scrub_inode_extsize(sc, dip, ino, mode, flags); 377 378 /* di_nextents */ 379 nextents = be32_to_cpu(dip->di_nextents); 380 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 381 switch (dip->di_format) { 382 case XFS_DINODE_FMT_EXTENTS: 383 if (nextents > fork_recs) 384 xfs_scrub_ino_set_corrupt(sc, ino); 385 break; 386 case XFS_DINODE_FMT_BTREE: 387 if (nextents <= fork_recs) 388 xfs_scrub_ino_set_corrupt(sc, ino); 389 break; 390 default: 391 if (nextents != 0) 392 xfs_scrub_ino_set_corrupt(sc, ino); 393 break; 394 } 395 396 /* di_forkoff */ 397 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) 398 xfs_scrub_ino_set_corrupt(sc, ino); 399 if (dip->di_anextents != 0 && dip->di_forkoff == 0) 400 xfs_scrub_ino_set_corrupt(sc, ino); 401 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 402 xfs_scrub_ino_set_corrupt(sc, ino); 403 404 /* di_aformat */ 405 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 406 dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 407 dip->di_aformat != XFS_DINODE_FMT_BTREE) 408 xfs_scrub_ino_set_corrupt(sc, ino); 409 410 /* di_anextents */ 411 nextents = be16_to_cpu(dip->di_anextents); 412 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 413 switch (dip->di_aformat) { 414 case XFS_DINODE_FMT_EXTENTS: 415 if (nextents > fork_recs) 416 xfs_scrub_ino_set_corrupt(sc, ino); 417 break; 418 case XFS_DINODE_FMT_BTREE: 419 if (nextents <= fork_recs) 420 xfs_scrub_ino_set_corrupt(sc, ino); 421 break; 422 default: 423 if (nextents != 0) 424 xfs_scrub_ino_set_corrupt(sc, ino); 425 } 426 427 if (dip->di_version >= 3) { 428 if (be32_to_cpu(dip->di_crtime.t_nsec) >= NSEC_PER_SEC) 429 xfs_scrub_ino_set_corrupt(sc, ino); 430 xfs_scrub_inode_flags2(sc, dip, ino, mode, flags, flags2); 431 xfs_scrub_inode_cowextsize(sc, dip, ino, mode, flags, 432 flags2); 433 } 434 } 435 436 /* 437 * Make sure the finobt doesn't think this inode is free. 438 * We don't have to check the inobt ourselves because we got the inode via 439 * IGET_UNTRUSTED, which checks the inobt for us. 440 */ 441 static void 442 xfs_scrub_inode_xref_finobt( 443 struct xfs_scrub_context *sc, 444 xfs_ino_t ino) 445 { 446 struct xfs_inobt_rec_incore rec; 447 xfs_agino_t agino; 448 int has_record; 449 int error; 450 451 if (!sc->sa.fino_cur || xfs_scrub_skip_xref(sc->sm)) 452 return; 453 454 agino = XFS_INO_TO_AGINO(sc->mp, ino); 455 456 /* 457 * Try to get the finobt record. If we can't get it, then we're 458 * in good shape. 459 */ 460 error = xfs_inobt_lookup(sc->sa.fino_cur, agino, XFS_LOOKUP_LE, 461 &has_record); 462 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || 463 !has_record) 464 return; 465 466 error = xfs_inobt_get_rec(sc->sa.fino_cur, &rec, &has_record); 467 if (!xfs_scrub_should_check_xref(sc, &error, &sc->sa.fino_cur) || 468 !has_record) 469 return; 470 471 /* 472 * Otherwise, make sure this record either doesn't cover this inode, 473 * or that it does but it's marked present. 474 */ 475 if (rec.ir_startino > agino || 476 rec.ir_startino + XFS_INODES_PER_CHUNK <= agino) 477 return; 478 479 if (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)) 480 xfs_scrub_btree_xref_set_corrupt(sc, sc->sa.fino_cur, 0); 481 } 482 483 /* Cross reference the inode fields with the forks. */ 484 STATIC void 485 xfs_scrub_inode_xref_bmap( 486 struct xfs_scrub_context *sc, 487 struct xfs_dinode *dip) 488 { 489 xfs_extnum_t nextents; 490 xfs_filblks_t count; 491 xfs_filblks_t acount; 492 int error; 493 494 if (xfs_scrub_skip_xref(sc->sm)) 495 return; 496 497 /* Walk all the extents to check nextents/naextents/nblocks. */ 498 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, 499 &nextents, &count); 500 if (!xfs_scrub_should_check_xref(sc, &error, NULL)) 501 return; 502 if (nextents < be32_to_cpu(dip->di_nextents)) 503 xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); 504 505 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, 506 &nextents, &acount); 507 if (!xfs_scrub_should_check_xref(sc, &error, NULL)) 508 return; 509 if (nextents != be16_to_cpu(dip->di_anextents)) 510 xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); 511 512 /* Check nblocks against the inode. */ 513 if (count + acount != be64_to_cpu(dip->di_nblocks)) 514 xfs_scrub_ino_xref_set_corrupt(sc, sc->ip->i_ino); 515 } 516 517 /* Cross-reference with the other btrees. */ 518 STATIC void 519 xfs_scrub_inode_xref( 520 struct xfs_scrub_context *sc, 521 xfs_ino_t ino, 522 struct xfs_dinode *dip) 523 { 524 struct xfs_owner_info oinfo; 525 xfs_agnumber_t agno; 526 xfs_agblock_t agbno; 527 int error; 528 529 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 530 return; 531 532 agno = XFS_INO_TO_AGNO(sc->mp, ino); 533 agbno = XFS_INO_TO_AGBNO(sc->mp, ino); 534 535 error = xfs_scrub_ag_init(sc, agno, &sc->sa); 536 if (!xfs_scrub_xref_process_error(sc, agno, agbno, &error)) 537 return; 538 539 xfs_scrub_xref_is_used_space(sc, agbno, 1); 540 xfs_scrub_inode_xref_finobt(sc, ino); 541 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_INODES); 542 xfs_scrub_xref_is_owned_by(sc, agbno, 1, &oinfo); 543 xfs_scrub_xref_is_not_shared(sc, agbno, 1); 544 xfs_scrub_inode_xref_bmap(sc, dip); 545 546 xfs_scrub_ag_free(sc, &sc->sa); 547 } 548 549 /* 550 * If the reflink iflag disagrees with a scan for shared data fork extents, 551 * either flag an error (shared extents w/ no flag) or a preen (flag set w/o 552 * any shared extents). We already checked for reflink iflag set on a non 553 * reflink filesystem. 554 */ 555 static void 556 xfs_scrub_inode_check_reflink_iflag( 557 struct xfs_scrub_context *sc, 558 xfs_ino_t ino) 559 { 560 struct xfs_mount *mp = sc->mp; 561 bool has_shared; 562 int error; 563 564 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 565 return; 566 567 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 568 &has_shared); 569 if (!xfs_scrub_xref_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 570 XFS_INO_TO_AGBNO(mp, ino), &error)) 571 return; 572 if (xfs_is_reflink_inode(sc->ip) && !has_shared) 573 xfs_scrub_ino_set_preen(sc, ino); 574 else if (!xfs_is_reflink_inode(sc->ip) && has_shared) 575 xfs_scrub_ino_set_corrupt(sc, ino); 576 } 577 578 /* Scrub an inode. */ 579 int 580 xfs_scrub_inode( 581 struct xfs_scrub_context *sc) 582 { 583 struct xfs_dinode di; 584 int error = 0; 585 586 /* 587 * If sc->ip is NULL, that means that the setup function called 588 * xfs_iget to look up the inode. xfs_iget returned a EFSCORRUPTED 589 * and a NULL inode, so flag the corruption error and return. 590 */ 591 if (!sc->ip) { 592 xfs_scrub_ino_set_corrupt(sc, sc->sm->sm_ino); 593 return 0; 594 } 595 596 /* Scrub the inode core. */ 597 xfs_inode_to_disk(sc->ip, &di, 0); 598 xfs_scrub_dinode(sc, &di, sc->ip->i_ino); 599 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 600 goto out; 601 602 /* 603 * Look for discrepancies between file's data blocks and the reflink 604 * iflag. We already checked the iflag against the file mode when 605 * we scrubbed the dinode. 606 */ 607 if (S_ISREG(VFS_I(sc->ip)->i_mode)) 608 xfs_scrub_inode_check_reflink_iflag(sc, sc->ip->i_ino); 609 610 xfs_scrub_inode_xref(sc, sc->ip->i_ino, &di); 611 out: 612 return error; 613 } 614