1 /* 2 * Copyright (C) 2017 Oracle. All Rights Reserved. 3 * 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 * 6 * This program is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU General Public License 8 * as published by the Free Software Foundation; either version 2 9 * of the License, or (at your option) any later version. 10 * 11 * This program is distributed in the hope that it would be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write the Free Software Foundation, 18 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA. 19 */ 20 #include "xfs.h" 21 #include "xfs_fs.h" 22 #include "xfs_shared.h" 23 #include "xfs_format.h" 24 #include "xfs_trans_resv.h" 25 #include "xfs_mount.h" 26 #include "xfs_defer.h" 27 #include "xfs_btree.h" 28 #include "xfs_bit.h" 29 #include "xfs_log_format.h" 30 #include "xfs_trans.h" 31 #include "xfs_sb.h" 32 #include "xfs_inode.h" 33 #include "xfs_icache.h" 34 #include "xfs_inode_buf.h" 35 #include "xfs_inode_fork.h" 36 #include "xfs_ialloc.h" 37 #include "xfs_da_format.h" 38 #include "xfs_reflink.h" 39 #include "scrub/xfs_scrub.h" 40 #include "scrub/scrub.h" 41 #include "scrub/common.h" 42 #include "scrub/trace.h" 43 44 /* 45 * Grab total control of the inode metadata. It doesn't matter here if 46 * the file data is still changing; exclusive access to the metadata is 47 * the goal. 48 */ 49 int 50 xfs_scrub_setup_inode( 51 struct xfs_scrub_context *sc, 52 struct xfs_inode *ip) 53 { 54 struct xfs_mount *mp = sc->mp; 55 int error; 56 57 /* 58 * Try to get the inode. If the verifiers fail, we try again 59 * in raw mode. 60 */ 61 error = xfs_scrub_get_inode(sc, ip); 62 switch (error) { 63 case 0: 64 break; 65 case -EFSCORRUPTED: 66 case -EFSBADCRC: 67 return 0; 68 default: 69 return error; 70 } 71 72 /* Got the inode, lock it and we're ready to go. */ 73 sc->ilock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 74 xfs_ilock(sc->ip, sc->ilock_flags); 75 error = xfs_scrub_trans_alloc(sc->sm, mp, &sc->tp); 76 if (error) 77 goto out; 78 sc->ilock_flags |= XFS_ILOCK_EXCL; 79 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 80 81 out: 82 /* scrub teardown will unlock and release the inode for us */ 83 return error; 84 } 85 86 /* Inode core */ 87 88 /* 89 * Validate di_extsize hint. 90 * 91 * The rules are documented at xfs_ioctl_setattr_check_extsize(). 92 * These functions must be kept in sync with each other. 93 */ 94 STATIC void 95 xfs_scrub_inode_extsize( 96 struct xfs_scrub_context *sc, 97 struct xfs_buf *bp, 98 struct xfs_dinode *dip, 99 xfs_ino_t ino, 100 uint16_t mode, 101 uint16_t flags) 102 { 103 struct xfs_mount *mp = sc->mp; 104 bool rt_flag; 105 bool hint_flag; 106 bool inherit_flag; 107 uint32_t extsize; 108 uint32_t extsize_bytes; 109 uint32_t blocksize_bytes; 110 111 rt_flag = (flags & XFS_DIFLAG_REALTIME); 112 hint_flag = (flags & XFS_DIFLAG_EXTSIZE); 113 inherit_flag = (flags & XFS_DIFLAG_EXTSZINHERIT); 114 extsize = be32_to_cpu(dip->di_extsize); 115 extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize); 116 117 if (rt_flag) 118 blocksize_bytes = mp->m_sb.sb_rextsize << mp->m_sb.sb_blocklog; 119 else 120 blocksize_bytes = mp->m_sb.sb_blocksize; 121 122 if ((hint_flag || inherit_flag) && !(S_ISDIR(mode) || S_ISREG(mode))) 123 goto bad; 124 125 if (hint_flag && !S_ISREG(mode)) 126 goto bad; 127 128 if (inherit_flag && !S_ISDIR(mode)) 129 goto bad; 130 131 if ((hint_flag || inherit_flag) && extsize == 0) 132 goto bad; 133 134 if (!(hint_flag || inherit_flag) && extsize != 0) 135 goto bad; 136 137 if (extsize_bytes % blocksize_bytes) 138 goto bad; 139 140 if (extsize > MAXEXTLEN) 141 goto bad; 142 143 if (!rt_flag && extsize > mp->m_sb.sb_agblocks / 2) 144 goto bad; 145 146 return; 147 bad: 148 xfs_scrub_ino_set_corrupt(sc, ino, bp); 149 } 150 151 /* 152 * Validate di_cowextsize hint. 153 * 154 * The rules are documented at xfs_ioctl_setattr_check_cowextsize(). 155 * These functions must be kept in sync with each other. 156 */ 157 STATIC void 158 xfs_scrub_inode_cowextsize( 159 struct xfs_scrub_context *sc, 160 struct xfs_buf *bp, 161 struct xfs_dinode *dip, 162 xfs_ino_t ino, 163 uint16_t mode, 164 uint16_t flags, 165 uint64_t flags2) 166 { 167 struct xfs_mount *mp = sc->mp; 168 bool rt_flag; 169 bool hint_flag; 170 uint32_t extsize; 171 uint32_t extsize_bytes; 172 173 rt_flag = (flags & XFS_DIFLAG_REALTIME); 174 hint_flag = (flags2 & XFS_DIFLAG2_COWEXTSIZE); 175 extsize = be32_to_cpu(dip->di_cowextsize); 176 extsize_bytes = XFS_FSB_TO_B(sc->mp, extsize); 177 178 if (hint_flag && !xfs_sb_version_hasreflink(&mp->m_sb)) 179 goto bad; 180 181 if (hint_flag && !(S_ISDIR(mode) || S_ISREG(mode))) 182 goto bad; 183 184 if (hint_flag && extsize == 0) 185 goto bad; 186 187 if (!hint_flag && extsize != 0) 188 goto bad; 189 190 if (hint_flag && rt_flag) 191 goto bad; 192 193 if (extsize_bytes % mp->m_sb.sb_blocksize) 194 goto bad; 195 196 if (extsize > MAXEXTLEN) 197 goto bad; 198 199 if (extsize > mp->m_sb.sb_agblocks / 2) 200 goto bad; 201 202 return; 203 bad: 204 xfs_scrub_ino_set_corrupt(sc, ino, bp); 205 } 206 207 /* Make sure the di_flags make sense for the inode. */ 208 STATIC void 209 xfs_scrub_inode_flags( 210 struct xfs_scrub_context *sc, 211 struct xfs_buf *bp, 212 struct xfs_dinode *dip, 213 xfs_ino_t ino, 214 uint16_t mode, 215 uint16_t flags) 216 { 217 struct xfs_mount *mp = sc->mp; 218 219 if (flags & ~XFS_DIFLAG_ANY) 220 goto bad; 221 222 /* rt flags require rt device */ 223 if ((flags & (XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT)) && 224 !mp->m_rtdev_targp) 225 goto bad; 226 227 /* new rt bitmap flag only valid for rbmino */ 228 if ((flags & XFS_DIFLAG_NEWRTBM) && ino != mp->m_sb.sb_rbmino) 229 goto bad; 230 231 /* directory-only flags */ 232 if ((flags & (XFS_DIFLAG_RTINHERIT | 233 XFS_DIFLAG_EXTSZINHERIT | 234 XFS_DIFLAG_PROJINHERIT | 235 XFS_DIFLAG_NOSYMLINKS)) && 236 !S_ISDIR(mode)) 237 goto bad; 238 239 /* file-only flags */ 240 if ((flags & (XFS_DIFLAG_REALTIME | FS_XFLAG_EXTSIZE)) && 241 !S_ISREG(mode)) 242 goto bad; 243 244 /* filestreams and rt make no sense */ 245 if ((flags & XFS_DIFLAG_FILESTREAM) && (flags & XFS_DIFLAG_REALTIME)) 246 goto bad; 247 248 return; 249 bad: 250 xfs_scrub_ino_set_corrupt(sc, ino, bp); 251 } 252 253 /* Make sure the di_flags2 make sense for the inode. */ 254 STATIC void 255 xfs_scrub_inode_flags2( 256 struct xfs_scrub_context *sc, 257 struct xfs_buf *bp, 258 struct xfs_dinode *dip, 259 xfs_ino_t ino, 260 uint16_t mode, 261 uint16_t flags, 262 uint64_t flags2) 263 { 264 struct xfs_mount *mp = sc->mp; 265 266 if (flags2 & ~XFS_DIFLAG2_ANY) 267 goto bad; 268 269 /* reflink flag requires reflink feature */ 270 if ((flags2 & XFS_DIFLAG2_REFLINK) && 271 !xfs_sb_version_hasreflink(&mp->m_sb)) 272 goto bad; 273 274 /* cowextsize flag is checked w.r.t. mode separately */ 275 276 /* file/dir-only flags */ 277 if ((flags2 & XFS_DIFLAG2_DAX) && !(S_ISREG(mode) || S_ISDIR(mode))) 278 goto bad; 279 280 /* file-only flags */ 281 if ((flags2 & XFS_DIFLAG2_REFLINK) && !S_ISREG(mode)) 282 goto bad; 283 284 /* realtime and reflink make no sense, currently */ 285 if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) 286 goto bad; 287 288 /* dax and reflink make no sense, currently */ 289 if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) 290 goto bad; 291 292 return; 293 bad: 294 xfs_scrub_ino_set_corrupt(sc, ino, bp); 295 } 296 297 /* Scrub all the ondisk inode fields. */ 298 STATIC void 299 xfs_scrub_dinode( 300 struct xfs_scrub_context *sc, 301 struct xfs_buf *bp, 302 struct xfs_dinode *dip, 303 xfs_ino_t ino) 304 { 305 struct xfs_mount *mp = sc->mp; 306 size_t fork_recs; 307 unsigned long long isize; 308 uint64_t flags2; 309 uint32_t nextents; 310 uint16_t flags; 311 uint16_t mode; 312 313 flags = be16_to_cpu(dip->di_flags); 314 if (dip->di_version >= 3) 315 flags2 = be64_to_cpu(dip->di_flags2); 316 else 317 flags2 = 0; 318 319 /* di_mode */ 320 mode = be16_to_cpu(dip->di_mode); 321 switch (mode & S_IFMT) { 322 case S_IFLNK: 323 case S_IFREG: 324 case S_IFDIR: 325 case S_IFCHR: 326 case S_IFBLK: 327 case S_IFIFO: 328 case S_IFSOCK: 329 /* mode is recognized */ 330 break; 331 default: 332 xfs_scrub_ino_set_corrupt(sc, ino, bp); 333 break; 334 } 335 336 /* v1/v2 fields */ 337 switch (dip->di_version) { 338 case 1: 339 /* 340 * We autoconvert v1 inodes into v2 inodes on writeout, 341 * so just mark this inode for preening. 342 */ 343 xfs_scrub_ino_set_preen(sc, ino, bp); 344 break; 345 case 2: 346 case 3: 347 if (dip->di_onlink != 0) 348 xfs_scrub_ino_set_corrupt(sc, ino, bp); 349 350 if (dip->di_mode == 0 && sc->ip) 351 xfs_scrub_ino_set_corrupt(sc, ino, bp); 352 353 if (dip->di_projid_hi != 0 && 354 !xfs_sb_version_hasprojid32bit(&mp->m_sb)) 355 xfs_scrub_ino_set_corrupt(sc, ino, bp); 356 break; 357 default: 358 xfs_scrub_ino_set_corrupt(sc, ino, bp); 359 return; 360 } 361 362 /* 363 * di_uid/di_gid -- -1 isn't invalid, but there's no way that 364 * userspace could have created that. 365 */ 366 if (dip->di_uid == cpu_to_be32(-1U) || 367 dip->di_gid == cpu_to_be32(-1U)) 368 xfs_scrub_ino_set_warning(sc, ino, bp); 369 370 /* di_format */ 371 switch (dip->di_format) { 372 case XFS_DINODE_FMT_DEV: 373 if (!S_ISCHR(mode) && !S_ISBLK(mode) && 374 !S_ISFIFO(mode) && !S_ISSOCK(mode)) 375 xfs_scrub_ino_set_corrupt(sc, ino, bp); 376 break; 377 case XFS_DINODE_FMT_LOCAL: 378 if (!S_ISDIR(mode) && !S_ISLNK(mode)) 379 xfs_scrub_ino_set_corrupt(sc, ino, bp); 380 break; 381 case XFS_DINODE_FMT_EXTENTS: 382 if (!S_ISREG(mode) && !S_ISDIR(mode) && !S_ISLNK(mode)) 383 xfs_scrub_ino_set_corrupt(sc, ino, bp); 384 break; 385 case XFS_DINODE_FMT_BTREE: 386 if (!S_ISREG(mode) && !S_ISDIR(mode)) 387 xfs_scrub_ino_set_corrupt(sc, ino, bp); 388 break; 389 case XFS_DINODE_FMT_UUID: 390 default: 391 xfs_scrub_ino_set_corrupt(sc, ino, bp); 392 break; 393 } 394 395 /* 396 * di_size. xfs_dinode_verify checks for things that screw up 397 * the VFS such as the upper bit being set and zero-length 398 * symlinks/directories, but we can do more here. 399 */ 400 isize = be64_to_cpu(dip->di_size); 401 if (isize & (1ULL << 63)) 402 xfs_scrub_ino_set_corrupt(sc, ino, bp); 403 404 /* Devices, fifos, and sockets must have zero size */ 405 if (!S_ISDIR(mode) && !S_ISREG(mode) && !S_ISLNK(mode) && isize != 0) 406 xfs_scrub_ino_set_corrupt(sc, ino, bp); 407 408 /* Directories can't be larger than the data section size (32G) */ 409 if (S_ISDIR(mode) && (isize == 0 || isize >= XFS_DIR2_SPACE_SIZE)) 410 xfs_scrub_ino_set_corrupt(sc, ino, bp); 411 412 /* Symlinks can't be larger than SYMLINK_MAXLEN */ 413 if (S_ISLNK(mode) && (isize == 0 || isize >= XFS_SYMLINK_MAXLEN)) 414 xfs_scrub_ino_set_corrupt(sc, ino, bp); 415 416 /* 417 * Warn if the running kernel can't handle the kinds of offsets 418 * needed to deal with the file size. In other words, if the 419 * pagecache can't cache all the blocks in this file due to 420 * overly large offsets, flag the inode for admin review. 421 */ 422 if (isize >= mp->m_super->s_maxbytes) 423 xfs_scrub_ino_set_warning(sc, ino, bp); 424 425 /* di_nblocks */ 426 if (flags2 & XFS_DIFLAG2_REFLINK) { 427 ; /* nblocks can exceed dblocks */ 428 } else if (flags & XFS_DIFLAG_REALTIME) { 429 /* 430 * nblocks is the sum of data extents (in the rtdev), 431 * attr extents (in the datadev), and both forks' bmbt 432 * blocks (in the datadev). This clumsy check is the 433 * best we can do without cross-referencing with the 434 * inode forks. 435 */ 436 if (be64_to_cpu(dip->di_nblocks) >= 437 mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks) 438 xfs_scrub_ino_set_corrupt(sc, ino, bp); 439 } else { 440 if (be64_to_cpu(dip->di_nblocks) >= mp->m_sb.sb_dblocks) 441 xfs_scrub_ino_set_corrupt(sc, ino, bp); 442 } 443 444 xfs_scrub_inode_flags(sc, bp, dip, ino, mode, flags); 445 446 xfs_scrub_inode_extsize(sc, bp, dip, ino, mode, flags); 447 448 /* di_nextents */ 449 nextents = be32_to_cpu(dip->di_nextents); 450 fork_recs = XFS_DFORK_DSIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 451 switch (dip->di_format) { 452 case XFS_DINODE_FMT_EXTENTS: 453 if (nextents > fork_recs) 454 xfs_scrub_ino_set_corrupt(sc, ino, bp); 455 break; 456 case XFS_DINODE_FMT_BTREE: 457 if (nextents <= fork_recs) 458 xfs_scrub_ino_set_corrupt(sc, ino, bp); 459 break; 460 default: 461 if (nextents != 0) 462 xfs_scrub_ino_set_corrupt(sc, ino, bp); 463 break; 464 } 465 466 /* di_forkoff */ 467 if (XFS_DFORK_APTR(dip) >= (char *)dip + mp->m_sb.sb_inodesize) 468 xfs_scrub_ino_set_corrupt(sc, ino, bp); 469 if (dip->di_anextents != 0 && dip->di_forkoff == 0) 470 xfs_scrub_ino_set_corrupt(sc, ino, bp); 471 if (dip->di_forkoff == 0 && dip->di_aformat != XFS_DINODE_FMT_EXTENTS) 472 xfs_scrub_ino_set_corrupt(sc, ino, bp); 473 474 /* di_aformat */ 475 if (dip->di_aformat != XFS_DINODE_FMT_LOCAL && 476 dip->di_aformat != XFS_DINODE_FMT_EXTENTS && 477 dip->di_aformat != XFS_DINODE_FMT_BTREE) 478 xfs_scrub_ino_set_corrupt(sc, ino, bp); 479 480 /* di_anextents */ 481 nextents = be16_to_cpu(dip->di_anextents); 482 fork_recs = XFS_DFORK_ASIZE(dip, mp) / sizeof(struct xfs_bmbt_rec); 483 switch (dip->di_aformat) { 484 case XFS_DINODE_FMT_EXTENTS: 485 if (nextents > fork_recs) 486 xfs_scrub_ino_set_corrupt(sc, ino, bp); 487 break; 488 case XFS_DINODE_FMT_BTREE: 489 if (nextents <= fork_recs) 490 xfs_scrub_ino_set_corrupt(sc, ino, bp); 491 break; 492 default: 493 if (nextents != 0) 494 xfs_scrub_ino_set_corrupt(sc, ino, bp); 495 } 496 497 if (dip->di_version >= 3) { 498 xfs_scrub_inode_flags2(sc, bp, dip, ino, mode, flags, flags2); 499 xfs_scrub_inode_cowextsize(sc, bp, dip, ino, mode, flags, 500 flags2); 501 } 502 } 503 504 /* Map and read a raw inode. */ 505 STATIC int 506 xfs_scrub_inode_map_raw( 507 struct xfs_scrub_context *sc, 508 xfs_ino_t ino, 509 struct xfs_buf **bpp, 510 struct xfs_dinode **dipp) 511 { 512 struct xfs_imap imap; 513 struct xfs_mount *mp = sc->mp; 514 struct xfs_buf *bp = NULL; 515 struct xfs_dinode *dip; 516 int error; 517 518 error = xfs_imap(mp, sc->tp, ino, &imap, XFS_IGET_UNTRUSTED); 519 if (error == -EINVAL) { 520 /* 521 * Inode could have gotten deleted out from under us; 522 * just forget about it. 523 */ 524 error = -ENOENT; 525 goto out; 526 } 527 if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 528 XFS_INO_TO_AGBNO(mp, ino), &error)) 529 goto out; 530 531 error = xfs_trans_read_buf(mp, sc->tp, mp->m_ddev_targp, 532 imap.im_blkno, imap.im_len, XBF_UNMAPPED, &bp, 533 NULL); 534 if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 535 XFS_INO_TO_AGBNO(mp, ino), &error)) 536 goto out; 537 538 /* 539 * Is this really an inode? We disabled verifiers in the above 540 * xfs_trans_read_buf call because the inode buffer verifier 541 * fails on /any/ inode record in the inode cluster with a bad 542 * magic or version number, not just the one that we're 543 * checking. Therefore, grab the buffer unconditionally, attach 544 * the inode verifiers by hand, and run the inode verifier only 545 * on the one inode we want. 546 */ 547 bp->b_ops = &xfs_inode_buf_ops; 548 dip = xfs_buf_offset(bp, imap.im_boffset); 549 if (!xfs_dinode_verify(mp, ino, dip) || 550 !xfs_dinode_good_version(mp, dip->di_version)) { 551 xfs_scrub_ino_set_corrupt(sc, ino, bp); 552 goto out_buf; 553 } 554 555 /* ...and is it the one we asked for? */ 556 if (be32_to_cpu(dip->di_gen) != sc->sm->sm_gen) { 557 error = -ENOENT; 558 goto out_buf; 559 } 560 561 *dipp = dip; 562 *bpp = bp; 563 out: 564 return error; 565 out_buf: 566 xfs_trans_brelse(sc->tp, bp); 567 return error; 568 } 569 570 /* Scrub an inode. */ 571 int 572 xfs_scrub_inode( 573 struct xfs_scrub_context *sc) 574 { 575 struct xfs_dinode di; 576 struct xfs_mount *mp = sc->mp; 577 struct xfs_buf *bp = NULL; 578 struct xfs_dinode *dip; 579 xfs_ino_t ino; 580 581 bool has_shared; 582 int error = 0; 583 584 /* Did we get the in-core inode, or are we doing this manually? */ 585 if (sc->ip) { 586 ino = sc->ip->i_ino; 587 xfs_inode_to_disk(sc->ip, &di, 0); 588 dip = &di; 589 } else { 590 /* Map & read inode. */ 591 ino = sc->sm->sm_ino; 592 error = xfs_scrub_inode_map_raw(sc, ino, &bp, &dip); 593 if (error || !bp) 594 goto out; 595 } 596 597 xfs_scrub_dinode(sc, bp, dip, ino); 598 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 599 goto out; 600 601 /* Now let's do the things that require a live inode. */ 602 if (!sc->ip) 603 goto out; 604 605 /* 606 * Does this inode have the reflink flag set but no shared extents? 607 * Set the preening flag if this is the case. 608 */ 609 if (xfs_is_reflink_inode(sc->ip)) { 610 error = xfs_reflink_inode_has_shared_extents(sc->tp, sc->ip, 611 &has_shared); 612 if (!xfs_scrub_process_error(sc, XFS_INO_TO_AGNO(mp, ino), 613 XFS_INO_TO_AGBNO(mp, ino), &error)) 614 goto out; 615 if (!has_shared) 616 xfs_scrub_ino_set_preen(sc, ino, bp); 617 } 618 619 out: 620 if (bp) 621 xfs_trans_brelse(sc->tp, bp); 622 return error; 623 } 624