1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_bit.h" 15 #include "xfs_log_format.h" 16 #include "xfs_trans.h" 17 #include "xfs_sb.h" 18 #include "xfs_inode.h" 19 #include "xfs_icache.h" 20 #include "xfs_inode_buf.h" 21 #include "xfs_inode_fork.h" 22 #include "xfs_ialloc.h" 23 #include "xfs_da_format.h" 24 #include "xfs_reflink.h" 25 #include "xfs_alloc.h" 26 #include "xfs_rmap.h" 27 #include "xfs_rmap_btree.h" 28 #include "xfs_bmap.h" 29 #include "xfs_bmap_btree.h" 30 #include "xfs_bmap_util.h" 31 #include "xfs_dir2.h" 32 #include "xfs_dir2_priv.h" 33 #include "xfs_quota_defs.h" 34 #include "xfs_quota.h" 35 #include "xfs_ag.h" 36 #include "xfs_rtbitmap.h" 37 #include "xfs_attr_leaf.h" 38 #include "xfs_log_priv.h" 39 #include "xfs_health.h" 40 #include "scrub/xfs_scrub.h" 41 #include "scrub/scrub.h" 42 #include "scrub/common.h" 43 #include "scrub/btree.h" 44 #include "scrub/trace.h" 45 #include "scrub/repair.h" 46 47 /* 48 * Inode Record Repair 49 * =================== 50 * 51 * Roughly speaking, inode problems can be classified based on whether or not 52 * they trip the dinode verifiers. If those trip, then we won't be able to 53 * xfs_iget ourselves the inode. 54 * 55 * Therefore, the xrep_dinode_* functions fix anything that will cause the 56 * inode buffer verifier or the dinode verifier. The xrep_inode_* functions 57 * fix things on live incore inodes. The inode repair functions make decisions 58 * with security and usability implications when reviving a file: 59 * 60 * - Files with zero di_mode or a garbage di_mode are converted to regular file 61 * that only root can read. This file may not actually contain user data, 62 * if the file was not previously a regular file. Setuid and setgid bits 63 * are cleared. 64 * 65 * - Zero-size directories can be truncated to look empty. It is necessary to 66 * run the bmapbtd and directory repair functions to fully rebuild the 67 * directory. 68 * 69 * - Zero-size symbolic link targets can be truncated to '?'. It is necessary 70 * to run the bmapbtd and symlink repair functions to salvage the symlink. 71 * 72 * - Invalid extent size hints will be removed. 73 * 74 * - Quotacheck will be scheduled if we repaired an inode that was so badly 75 * damaged that the ondisk inode had to be rebuilt. 76 * 77 * - Invalid user, group, or project IDs (aka -1U) will be reset to zero. 78 * Setuid and setgid bits are cleared. 79 * 80 * - Data and attr forks are reset to extents format with zero extents if the 81 * fork data is inconsistent. It is necessary to run the bmapbtd or bmapbta 82 * repair functions to recover the space mapping. 83 * 84 * - ACLs will not be recovered if the attr fork is zapped or the extended 85 * attribute structure itself requires salvaging. 86 * 87 * - If the attr fork is zapped, the user and group ids are reset to root and 88 * the setuid and setgid bits are removed. 89 */ 90 91 /* 92 * All the information we need to repair the ondisk inode if we can't iget the 93 * incore inode. We don't allocate this buffer unless we're going to perform 94 * a repair to the ondisk inode cluster buffer. 95 */ 96 struct xrep_inode { 97 /* Inode mapping that we saved from the initial lookup attempt. */ 98 struct xfs_imap imap; 99 100 struct xfs_scrub *sc; 101 102 /* Blocks in use on the data device by data extents or bmbt blocks. */ 103 xfs_rfsblock_t data_blocks; 104 105 /* Blocks in use on the rt device. */ 106 xfs_rfsblock_t rt_blocks; 107 108 /* Blocks in use by the attr fork. */ 109 xfs_rfsblock_t attr_blocks; 110 111 /* Number of data device extents for the data fork. */ 112 xfs_extnum_t data_extents; 113 114 /* 115 * Number of realtime device extents for the data fork. If 116 * data_extents and rt_extents indicate that the data fork has extents 117 * on both devices, we'll just back away slowly. 118 */ 119 xfs_extnum_t rt_extents; 120 121 /* Number of (data device) extents for the attr fork. */ 122 xfs_aextnum_t attr_extents; 123 124 /* Sick state to set after zapping parts of the inode. */ 125 unsigned int ino_sick_mask; 126 127 /* Must we remove all access from this file? */ 128 bool zap_acls; 129 }; 130 131 /* 132 * Setup function for inode repair. @imap contains the ondisk inode mapping 133 * information so that we can correct the ondisk inode cluster buffer if 134 * necessary to make iget work. 135 */ 136 int 137 xrep_setup_inode( 138 struct xfs_scrub *sc, 139 const struct xfs_imap *imap) 140 { 141 struct xrep_inode *ri; 142 143 sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS); 144 if (!sc->buf) 145 return -ENOMEM; 146 147 ri = sc->buf; 148 memcpy(&ri->imap, imap, sizeof(struct xfs_imap)); 149 ri->sc = sc; 150 return 0; 151 } 152 153 /* 154 * Make sure this ondisk inode can pass the inode buffer verifier. This is 155 * not the same as the dinode verifier. 156 */ 157 STATIC void 158 xrep_dinode_buf_core( 159 struct xfs_scrub *sc, 160 struct xfs_buf *bp, 161 unsigned int ioffset) 162 { 163 struct xfs_dinode *dip = xfs_buf_offset(bp, ioffset); 164 struct xfs_trans *tp = sc->tp; 165 struct xfs_mount *mp = sc->mp; 166 xfs_agino_t agino; 167 bool crc_ok = false; 168 bool magic_ok = false; 169 bool unlinked_ok = false; 170 171 agino = be32_to_cpu(dip->di_next_unlinked); 172 173 if (xfs_verify_agino_or_null(bp->b_pag, agino)) 174 unlinked_ok = true; 175 176 if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && 177 xfs_dinode_good_version(mp, dip->di_version)) 178 magic_ok = true; 179 180 if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 181 XFS_DINODE_CRC_OFF)) 182 crc_ok = true; 183 184 if (magic_ok && unlinked_ok && crc_ok) 185 return; 186 187 if (!magic_ok) { 188 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 189 dip->di_version = 3; 190 } 191 if (!unlinked_ok) 192 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 193 xfs_dinode_calc_crc(mp, dip); 194 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF); 195 xfs_trans_log_buf(tp, bp, ioffset, 196 ioffset + sizeof(struct xfs_dinode) - 1); 197 } 198 199 /* Make sure this inode cluster buffer can pass the inode buffer verifier. */ 200 STATIC void 201 xrep_dinode_buf( 202 struct xfs_scrub *sc, 203 struct xfs_buf *bp) 204 { 205 struct xfs_mount *mp = sc->mp; 206 int i; 207 int ni; 208 209 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 210 for (i = 0; i < ni; i++) 211 xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog); 212 } 213 214 /* Reinitialize things that never change in an inode. */ 215 STATIC void 216 xrep_dinode_header( 217 struct xfs_scrub *sc, 218 struct xfs_dinode *dip) 219 { 220 trace_xrep_dinode_header(sc, dip); 221 222 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 223 if (!xfs_dinode_good_version(sc->mp, dip->di_version)) 224 dip->di_version = 3; 225 dip->di_ino = cpu_to_be64(sc->sm->sm_ino); 226 uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid); 227 dip->di_gen = cpu_to_be32(sc->sm->sm_gen); 228 } 229 230 /* Turn di_mode into /something/ recognizable. */ 231 STATIC void 232 xrep_dinode_mode( 233 struct xrep_inode *ri, 234 struct xfs_dinode *dip) 235 { 236 struct xfs_scrub *sc = ri->sc; 237 uint16_t mode = be16_to_cpu(dip->di_mode); 238 239 trace_xrep_dinode_mode(sc, dip); 240 241 if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN) 242 return; 243 244 /* bad mode, so we set it to a file that only root can read */ 245 mode = S_IFREG; 246 dip->di_mode = cpu_to_be16(mode); 247 dip->di_uid = 0; 248 dip->di_gid = 0; 249 ri->zap_acls = true; 250 } 251 252 /* Fix any conflicting flags that the verifiers complain about. */ 253 STATIC void 254 xrep_dinode_flags( 255 struct xfs_scrub *sc, 256 struct xfs_dinode *dip, 257 bool isrt) 258 { 259 struct xfs_mount *mp = sc->mp; 260 uint64_t flags2 = be64_to_cpu(dip->di_flags2); 261 uint16_t flags = be16_to_cpu(dip->di_flags); 262 uint16_t mode = be16_to_cpu(dip->di_mode); 263 264 trace_xrep_dinode_flags(sc, dip); 265 266 if (isrt) 267 flags |= XFS_DIFLAG_REALTIME; 268 else 269 flags &= ~XFS_DIFLAG_REALTIME; 270 271 /* 272 * For regular files on a reflink filesystem, set the REFLINK flag to 273 * protect shared extents. A later stage will actually check those 274 * extents and clear the flag if possible. 275 */ 276 if (xfs_has_reflink(mp) && S_ISREG(mode)) 277 flags2 |= XFS_DIFLAG2_REFLINK; 278 else 279 flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE); 280 if (flags & XFS_DIFLAG_REALTIME) 281 flags2 &= ~XFS_DIFLAG2_REFLINK; 282 if (!xfs_has_bigtime(mp)) 283 flags2 &= ~XFS_DIFLAG2_BIGTIME; 284 if (!xfs_has_large_extent_counts(mp)) 285 flags2 &= ~XFS_DIFLAG2_NREXT64; 286 if (flags2 & XFS_DIFLAG2_NREXT64) 287 dip->di_nrext64_pad = 0; 288 else if (dip->di_version >= 3) 289 dip->di_v3_pad = 0; 290 dip->di_flags = cpu_to_be16(flags); 291 dip->di_flags2 = cpu_to_be64(flags2); 292 } 293 294 /* 295 * Blow out symlink; now it points nowhere. We don't have to worry about 296 * incore state because this inode is failing the verifiers. 297 */ 298 STATIC void 299 xrep_dinode_zap_symlink( 300 struct xrep_inode *ri, 301 struct xfs_dinode *dip) 302 { 303 struct xfs_scrub *sc = ri->sc; 304 char *p; 305 306 trace_xrep_dinode_zap_symlink(sc, dip); 307 308 dip->di_format = XFS_DINODE_FMT_LOCAL; 309 dip->di_size = cpu_to_be64(1); 310 p = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 311 *p = '?'; 312 ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED; 313 } 314 315 /* 316 * Blow out dir, make the parent point to the root. In the future repair will 317 * reconstruct this directory for us. Note that there's no in-core directory 318 * inode because the sf verifier tripped, so we don't have to worry about the 319 * dentry cache. 320 */ 321 STATIC void 322 xrep_dinode_zap_dir( 323 struct xrep_inode *ri, 324 struct xfs_dinode *dip) 325 { 326 struct xfs_scrub *sc = ri->sc; 327 struct xfs_mount *mp = sc->mp; 328 struct xfs_dir2_sf_hdr *sfp; 329 int i8count; 330 331 trace_xrep_dinode_zap_dir(sc, dip); 332 333 dip->di_format = XFS_DINODE_FMT_LOCAL; 334 i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM; 335 sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 336 sfp->count = 0; 337 sfp->i8count = i8count; 338 xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino); 339 dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count)); 340 ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED; 341 } 342 343 /* Make sure we don't have a garbage file size. */ 344 STATIC void 345 xrep_dinode_size( 346 struct xrep_inode *ri, 347 struct xfs_dinode *dip) 348 { 349 struct xfs_scrub *sc = ri->sc; 350 uint64_t size = be64_to_cpu(dip->di_size); 351 uint16_t mode = be16_to_cpu(dip->di_mode); 352 353 trace_xrep_dinode_size(sc, dip); 354 355 switch (mode & S_IFMT) { 356 case S_IFIFO: 357 case S_IFCHR: 358 case S_IFBLK: 359 case S_IFSOCK: 360 /* di_size can't be nonzero for special files */ 361 dip->di_size = 0; 362 break; 363 case S_IFREG: 364 /* Regular files can't be larger than 2^63-1 bytes. */ 365 dip->di_size = cpu_to_be64(size & ~(1ULL << 63)); 366 break; 367 case S_IFLNK: 368 /* 369 * Truncate ridiculously oversized symlinks. If the size is 370 * zero, reset it to point to the current directory. Both of 371 * these conditions trigger dinode verifier errors, so there 372 * is no in-core state to reset. 373 */ 374 if (size > XFS_SYMLINK_MAXLEN) 375 dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN); 376 else if (size == 0) 377 xrep_dinode_zap_symlink(ri, dip); 378 break; 379 case S_IFDIR: 380 /* 381 * Directories can't have a size larger than 32G. If the size 382 * is zero, reset it to an empty directory. Both of these 383 * conditions trigger dinode verifier errors, so there is no 384 * in-core state to reset. 385 */ 386 if (size > XFS_DIR2_SPACE_SIZE) 387 dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE); 388 else if (size == 0) 389 xrep_dinode_zap_dir(ri, dip); 390 break; 391 } 392 } 393 394 /* Fix extent size hints. */ 395 STATIC void 396 xrep_dinode_extsize_hints( 397 struct xfs_scrub *sc, 398 struct xfs_dinode *dip) 399 { 400 struct xfs_mount *mp = sc->mp; 401 uint64_t flags2 = be64_to_cpu(dip->di_flags2); 402 uint16_t flags = be16_to_cpu(dip->di_flags); 403 uint16_t mode = be16_to_cpu(dip->di_mode); 404 405 xfs_failaddr_t fa; 406 407 trace_xrep_dinode_extsize_hints(sc, dip); 408 409 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 410 mode, flags); 411 if (fa) { 412 dip->di_extsize = 0; 413 dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE | 414 XFS_DIFLAG_EXTSZINHERIT); 415 } 416 417 if (dip->di_version < 3) 418 return; 419 420 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 421 mode, flags, flags2); 422 if (fa) { 423 dip->di_cowextsize = 0; 424 dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE); 425 } 426 } 427 428 /* Count extents and blocks for an inode given an rmap. */ 429 STATIC int 430 xrep_dinode_walk_rmap( 431 struct xfs_btree_cur *cur, 432 const struct xfs_rmap_irec *rec, 433 void *priv) 434 { 435 struct xrep_inode *ri = priv; 436 int error = 0; 437 438 if (xchk_should_terminate(ri->sc, &error)) 439 return error; 440 441 /* We only care about this inode. */ 442 if (rec->rm_owner != ri->sc->sm->sm_ino) 443 return 0; 444 445 if (rec->rm_flags & XFS_RMAP_ATTR_FORK) { 446 ri->attr_blocks += rec->rm_blockcount; 447 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) 448 ri->attr_extents++; 449 450 return 0; 451 } 452 453 ri->data_blocks += rec->rm_blockcount; 454 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) 455 ri->data_extents++; 456 457 return 0; 458 } 459 460 /* Count extents and blocks for an inode from all AG rmap data. */ 461 STATIC int 462 xrep_dinode_count_ag_rmaps( 463 struct xrep_inode *ri, 464 struct xfs_perag *pag) 465 { 466 struct xfs_btree_cur *cur; 467 struct xfs_buf *agf; 468 int error; 469 470 error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf); 471 if (error) 472 return error; 473 474 cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag); 475 error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri); 476 xfs_btree_del_cursor(cur, error); 477 xfs_trans_brelse(ri->sc->tp, agf); 478 return error; 479 } 480 481 /* Count extents and blocks for a given inode from all rmap data. */ 482 STATIC int 483 xrep_dinode_count_rmaps( 484 struct xrep_inode *ri) 485 { 486 struct xfs_perag *pag; 487 xfs_agnumber_t agno; 488 int error; 489 490 if (!xfs_has_rmapbt(ri->sc->mp) || xfs_has_realtime(ri->sc->mp)) 491 return -EOPNOTSUPP; 492 493 for_each_perag(ri->sc->mp, agno, pag) { 494 error = xrep_dinode_count_ag_rmaps(ri, pag); 495 if (error) { 496 xfs_perag_rele(pag); 497 return error; 498 } 499 } 500 501 /* Can't have extents on both the rt and the data device. */ 502 if (ri->data_extents && ri->rt_extents) 503 return -EFSCORRUPTED; 504 505 trace_xrep_dinode_count_rmaps(ri->sc, 506 ri->data_blocks, ri->rt_blocks, ri->attr_blocks, 507 ri->data_extents, ri->rt_extents, ri->attr_extents); 508 return 0; 509 } 510 511 /* Return true if this extents-format ifork looks like garbage. */ 512 STATIC bool 513 xrep_dinode_bad_extents_fork( 514 struct xfs_scrub *sc, 515 struct xfs_dinode *dip, 516 unsigned int dfork_size, 517 int whichfork) 518 { 519 struct xfs_bmbt_irec new; 520 struct xfs_bmbt_rec *dp; 521 xfs_extnum_t nex; 522 bool isrt; 523 unsigned int i; 524 525 nex = xfs_dfork_nextents(dip, whichfork); 526 if (nex > dfork_size / sizeof(struct xfs_bmbt_rec)) 527 return true; 528 529 dp = XFS_DFORK_PTR(dip, whichfork); 530 531 isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME); 532 for (i = 0; i < nex; i++, dp++) { 533 xfs_failaddr_t fa; 534 535 xfs_bmbt_disk_get_all(dp, &new); 536 fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork, 537 &new); 538 if (fa) 539 return true; 540 } 541 542 return false; 543 } 544 545 /* Return true if this btree-format ifork looks like garbage. */ 546 STATIC bool 547 xrep_dinode_bad_bmbt_fork( 548 struct xfs_scrub *sc, 549 struct xfs_dinode *dip, 550 unsigned int dfork_size, 551 int whichfork) 552 { 553 struct xfs_bmdr_block *dfp; 554 xfs_extnum_t nex; 555 unsigned int i; 556 unsigned int dmxr; 557 unsigned int nrecs; 558 unsigned int level; 559 560 nex = xfs_dfork_nextents(dip, whichfork); 561 if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec)) 562 return true; 563 564 if (dfork_size < sizeof(struct xfs_bmdr_block)) 565 return true; 566 567 dfp = XFS_DFORK_PTR(dip, whichfork); 568 nrecs = be16_to_cpu(dfp->bb_numrecs); 569 level = be16_to_cpu(dfp->bb_level); 570 571 if (nrecs == 0 || XFS_BMDR_SPACE_CALC(nrecs) > dfork_size) 572 return true; 573 if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork)) 574 return true; 575 576 dmxr = xfs_bmdr_maxrecs(dfork_size, 0); 577 for (i = 1; i <= nrecs; i++) { 578 struct xfs_bmbt_key *fkp; 579 xfs_bmbt_ptr_t *fpp; 580 xfs_fileoff_t fileoff; 581 xfs_fsblock_t fsbno; 582 583 fkp = XFS_BMDR_KEY_ADDR(dfp, i); 584 fileoff = be64_to_cpu(fkp->br_startoff); 585 if (!xfs_verify_fileoff(sc->mp, fileoff)) 586 return true; 587 588 fpp = XFS_BMDR_PTR_ADDR(dfp, i, dmxr); 589 fsbno = be64_to_cpu(*fpp); 590 if (!xfs_verify_fsbno(sc->mp, fsbno)) 591 return true; 592 } 593 594 return false; 595 } 596 597 /* 598 * Check the data fork for things that will fail the ifork verifiers or the 599 * ifork formatters. 600 */ 601 STATIC bool 602 xrep_dinode_check_dfork( 603 struct xfs_scrub *sc, 604 struct xfs_dinode *dip, 605 uint16_t mode) 606 { 607 void *dfork_ptr; 608 int64_t data_size; 609 unsigned int fmt; 610 unsigned int dfork_size; 611 612 /* 613 * Verifier functions take signed int64_t, so check for bogus negative 614 * values first. 615 */ 616 data_size = be64_to_cpu(dip->di_size); 617 if (data_size < 0) 618 return true; 619 620 fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK); 621 switch (mode & S_IFMT) { 622 case S_IFIFO: 623 case S_IFCHR: 624 case S_IFBLK: 625 case S_IFSOCK: 626 if (fmt != XFS_DINODE_FMT_DEV) 627 return true; 628 break; 629 case S_IFREG: 630 if (fmt == XFS_DINODE_FMT_LOCAL) 631 return true; 632 fallthrough; 633 case S_IFLNK: 634 case S_IFDIR: 635 switch (fmt) { 636 case XFS_DINODE_FMT_LOCAL: 637 case XFS_DINODE_FMT_EXTENTS: 638 case XFS_DINODE_FMT_BTREE: 639 break; 640 default: 641 return true; 642 } 643 break; 644 default: 645 return true; 646 } 647 648 dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK); 649 dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 650 651 switch (fmt) { 652 case XFS_DINODE_FMT_DEV: 653 break; 654 case XFS_DINODE_FMT_LOCAL: 655 /* dir/symlink structure cannot be larger than the fork */ 656 if (data_size > dfork_size) 657 return true; 658 /* directory structure must pass verification. */ 659 if (S_ISDIR(mode) && 660 xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL) 661 return true; 662 /* symlink structure must pass verification. */ 663 if (S_ISLNK(mode) && 664 xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL) 665 return true; 666 break; 667 case XFS_DINODE_FMT_EXTENTS: 668 if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size, 669 XFS_DATA_FORK)) 670 return true; 671 break; 672 case XFS_DINODE_FMT_BTREE: 673 if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size, 674 XFS_DATA_FORK)) 675 return true; 676 break; 677 default: 678 return true; 679 } 680 681 return false; 682 } 683 684 static void 685 xrep_dinode_set_data_nextents( 686 struct xfs_dinode *dip, 687 xfs_extnum_t nextents) 688 { 689 if (xfs_dinode_has_large_extent_counts(dip)) 690 dip->di_big_nextents = cpu_to_be64(nextents); 691 else 692 dip->di_nextents = cpu_to_be32(nextents); 693 } 694 695 static void 696 xrep_dinode_set_attr_nextents( 697 struct xfs_dinode *dip, 698 xfs_extnum_t nextents) 699 { 700 if (xfs_dinode_has_large_extent_counts(dip)) 701 dip->di_big_anextents = cpu_to_be32(nextents); 702 else 703 dip->di_anextents = cpu_to_be16(nextents); 704 } 705 706 /* Reset the data fork to something sane. */ 707 STATIC void 708 xrep_dinode_zap_dfork( 709 struct xrep_inode *ri, 710 struct xfs_dinode *dip, 711 uint16_t mode) 712 { 713 struct xfs_scrub *sc = ri->sc; 714 715 trace_xrep_dinode_zap_dfork(sc, dip); 716 717 ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED; 718 719 xrep_dinode_set_data_nextents(dip, 0); 720 ri->data_blocks = 0; 721 ri->rt_blocks = 0; 722 723 /* Special files always get reset to DEV */ 724 switch (mode & S_IFMT) { 725 case S_IFIFO: 726 case S_IFCHR: 727 case S_IFBLK: 728 case S_IFSOCK: 729 dip->di_format = XFS_DINODE_FMT_DEV; 730 dip->di_size = 0; 731 return; 732 } 733 734 /* 735 * If we have data extents, reset to an empty map and hope the user 736 * will run the bmapbtd checker next. 737 */ 738 if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) { 739 dip->di_format = XFS_DINODE_FMT_EXTENTS; 740 return; 741 } 742 743 /* Otherwise, reset the local format to the minimum. */ 744 switch (mode & S_IFMT) { 745 case S_IFLNK: 746 xrep_dinode_zap_symlink(ri, dip); 747 break; 748 case S_IFDIR: 749 xrep_dinode_zap_dir(ri, dip); 750 break; 751 } 752 } 753 754 /* 755 * Check the attr fork for things that will fail the ifork verifiers or the 756 * ifork formatters. 757 */ 758 STATIC bool 759 xrep_dinode_check_afork( 760 struct xfs_scrub *sc, 761 struct xfs_dinode *dip) 762 { 763 struct xfs_attr_sf_hdr *afork_ptr; 764 size_t attr_size; 765 unsigned int afork_size; 766 767 if (XFS_DFORK_BOFF(dip) == 0) 768 return dip->di_aformat != XFS_DINODE_FMT_EXTENTS || 769 xfs_dfork_attr_extents(dip) != 0; 770 771 afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK); 772 afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); 773 774 switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) { 775 case XFS_DINODE_FMT_LOCAL: 776 /* Fork has to be large enough to extract the xattr size. */ 777 if (afork_size < sizeof(struct xfs_attr_sf_hdr)) 778 return true; 779 780 /* xattr structure cannot be larger than the fork */ 781 attr_size = be16_to_cpu(afork_ptr->totsize); 782 if (attr_size > afork_size) 783 return true; 784 785 /* xattr structure must pass verification. */ 786 return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL; 787 case XFS_DINODE_FMT_EXTENTS: 788 if (xrep_dinode_bad_extents_fork(sc, dip, afork_size, 789 XFS_ATTR_FORK)) 790 return true; 791 break; 792 case XFS_DINODE_FMT_BTREE: 793 if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size, 794 XFS_ATTR_FORK)) 795 return true; 796 break; 797 default: 798 return true; 799 } 800 801 return false; 802 } 803 804 /* 805 * Reset the attr fork to empty. Since the attr fork could have contained 806 * ACLs, make the file readable only by root. 807 */ 808 STATIC void 809 xrep_dinode_zap_afork( 810 struct xrep_inode *ri, 811 struct xfs_dinode *dip, 812 uint16_t mode) 813 { 814 struct xfs_scrub *sc = ri->sc; 815 816 trace_xrep_dinode_zap_afork(sc, dip); 817 818 ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED; 819 820 dip->di_aformat = XFS_DINODE_FMT_EXTENTS; 821 xrep_dinode_set_attr_nextents(dip, 0); 822 ri->attr_blocks = 0; 823 824 /* 825 * If the data fork is in btree format, removing the attr fork entirely 826 * might cause verifier failures if the next level down in the bmbt 827 * could now fit in the data fork area. 828 */ 829 if (dip->di_format != XFS_DINODE_FMT_BTREE) 830 dip->di_forkoff = 0; 831 dip->di_mode = cpu_to_be16(mode & ~0777); 832 dip->di_uid = 0; 833 dip->di_gid = 0; 834 } 835 836 /* Make sure the fork offset is a sensible value. */ 837 STATIC void 838 xrep_dinode_ensure_forkoff( 839 struct xrep_inode *ri, 840 struct xfs_dinode *dip, 841 uint16_t mode) 842 { 843 struct xfs_bmdr_block *bmdr; 844 struct xfs_scrub *sc = ri->sc; 845 xfs_extnum_t attr_extents, data_extents; 846 size_t bmdr_minsz = XFS_BMDR_SPACE_CALC(1); 847 unsigned int lit_sz = XFS_LITINO(sc->mp); 848 unsigned int afork_min, dfork_min; 849 850 trace_xrep_dinode_ensure_forkoff(sc, dip); 851 852 /* 853 * Before calling this function, xrep_dinode_core ensured that both 854 * forks actually fit inside their respective literal areas. If this 855 * was not the case, the fork was reset to FMT_EXTENTS with zero 856 * records. If the rmapbt scan found attr or data fork blocks, this 857 * will be noted in the dinode_stats, and we must leave enough room 858 * for the bmap repair code to reconstruct the mapping structure. 859 * 860 * First, compute the minimum space required for the attr fork. 861 */ 862 switch (dip->di_aformat) { 863 case XFS_DINODE_FMT_LOCAL: 864 /* 865 * If we still have a shortform xattr structure at all, that 866 * means the attr fork area was exactly large enough to fit 867 * the sf structure. 868 */ 869 afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK); 870 break; 871 case XFS_DINODE_FMT_EXTENTS: 872 attr_extents = xfs_dfork_attr_extents(dip); 873 if (attr_extents) { 874 /* 875 * We must maintain sufficient space to hold the entire 876 * extent map array in the data fork. Note that we 877 * previously zapped the fork if it had no chance of 878 * fitting in the inode. 879 */ 880 afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents; 881 } else if (ri->attr_extents > 0) { 882 /* 883 * The attr fork thinks it has zero extents, but we 884 * found some xattr extents. We need to leave enough 885 * empty space here so that the incore attr fork will 886 * get created (and hence trigger the attr fork bmap 887 * repairer). 888 */ 889 afork_min = bmdr_minsz; 890 } else { 891 /* No extents on disk or found in rmapbt. */ 892 afork_min = 0; 893 } 894 break; 895 case XFS_DINODE_FMT_BTREE: 896 /* Must have space for btree header and key/pointers. */ 897 bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); 898 afork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr); 899 break; 900 default: 901 /* We should never see any other formats. */ 902 afork_min = 0; 903 break; 904 } 905 906 /* Compute the minimum space required for the data fork. */ 907 switch (dip->di_format) { 908 case XFS_DINODE_FMT_DEV: 909 dfork_min = sizeof(__be32); 910 break; 911 case XFS_DINODE_FMT_UUID: 912 dfork_min = sizeof(uuid_t); 913 break; 914 case XFS_DINODE_FMT_LOCAL: 915 /* 916 * If we still have a shortform data fork at all, that means 917 * the data fork area was large enough to fit whatever was in 918 * there. 919 */ 920 dfork_min = be64_to_cpu(dip->di_size); 921 break; 922 case XFS_DINODE_FMT_EXTENTS: 923 data_extents = xfs_dfork_data_extents(dip); 924 if (data_extents) { 925 /* 926 * We must maintain sufficient space to hold the entire 927 * extent map array in the data fork. Note that we 928 * previously zapped the fork if it had no chance of 929 * fitting in the inode. 930 */ 931 dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents; 932 } else if (ri->data_extents > 0 || ri->rt_extents > 0) { 933 /* 934 * The data fork thinks it has zero extents, but we 935 * found some data extents. We need to leave enough 936 * empty space here so that the data fork bmap repair 937 * will recover the mappings. 938 */ 939 dfork_min = bmdr_minsz; 940 } else { 941 /* No extents on disk or found in rmapbt. */ 942 dfork_min = 0; 943 } 944 break; 945 case XFS_DINODE_FMT_BTREE: 946 /* Must have space for btree header and key/pointers. */ 947 bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 948 dfork_min = XFS_BMAP_BROOT_SPACE(sc->mp, bmdr); 949 break; 950 default: 951 dfork_min = 0; 952 break; 953 } 954 955 /* 956 * Round all values up to the nearest 8 bytes, because that is the 957 * precision of di_forkoff. 958 */ 959 afork_min = roundup(afork_min, 8); 960 dfork_min = roundup(dfork_min, 8); 961 bmdr_minsz = roundup(bmdr_minsz, 8); 962 963 ASSERT(dfork_min <= lit_sz); 964 ASSERT(afork_min <= lit_sz); 965 966 /* 967 * If the data fork was zapped and we don't have enough space for the 968 * recovery fork, move the attr fork up. 969 */ 970 if (dip->di_format == XFS_DINODE_FMT_EXTENTS && 971 xfs_dfork_data_extents(dip) == 0 && 972 (ri->data_extents > 0 || ri->rt_extents > 0) && 973 bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) { 974 if (bmdr_minsz + afork_min > lit_sz) { 975 /* 976 * The attr for and the stub fork we need to recover 977 * the data fork won't both fit. Zap the attr fork. 978 */ 979 xrep_dinode_zap_afork(ri, dip, mode); 980 afork_min = bmdr_minsz; 981 } else { 982 void *before, *after; 983 984 /* Otherwise, just slide the attr fork up. */ 985 before = XFS_DFORK_APTR(dip); 986 dip->di_forkoff = bmdr_minsz >> 3; 987 after = XFS_DFORK_APTR(dip); 988 memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp)); 989 } 990 } 991 992 /* 993 * If the attr fork was zapped and we don't have enough space for the 994 * recovery fork, move the attr fork down. 995 */ 996 if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS && 997 xfs_dfork_attr_extents(dip) == 0 && 998 ri->attr_extents > 0 && 999 bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) { 1000 if (dip->di_format == XFS_DINODE_FMT_BTREE) { 1001 /* 1002 * If the data fork is in btree format then we can't 1003 * adjust forkoff because that runs the risk of 1004 * violating the extents/btree format transition rules. 1005 */ 1006 } else if (bmdr_minsz + dfork_min > lit_sz) { 1007 /* 1008 * If we can't move the attr fork, too bad, we lose the 1009 * attr fork and leak its blocks. 1010 */ 1011 xrep_dinode_zap_afork(ri, dip, mode); 1012 } else { 1013 /* 1014 * Otherwise, just slide the attr fork down. The attr 1015 * fork is empty, so we don't have any old contents to 1016 * move here. 1017 */ 1018 dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3; 1019 } 1020 } 1021 } 1022 1023 /* 1024 * Zap the data/attr forks if we spot anything that isn't going to pass the 1025 * ifork verifiers or the ifork formatters, because we need to get the inode 1026 * into good enough shape that the higher level repair functions can run. 1027 */ 1028 STATIC void 1029 xrep_dinode_zap_forks( 1030 struct xrep_inode *ri, 1031 struct xfs_dinode *dip) 1032 { 1033 struct xfs_scrub *sc = ri->sc; 1034 xfs_extnum_t data_extents; 1035 xfs_extnum_t attr_extents; 1036 xfs_filblks_t nblocks; 1037 uint16_t mode; 1038 bool zap_datafork = false; 1039 bool zap_attrfork = ri->zap_acls; 1040 1041 trace_xrep_dinode_zap_forks(sc, dip); 1042 1043 mode = be16_to_cpu(dip->di_mode); 1044 1045 data_extents = xfs_dfork_data_extents(dip); 1046 attr_extents = xfs_dfork_attr_extents(dip); 1047 nblocks = be64_to_cpu(dip->di_nblocks); 1048 1049 /* Inode counters don't make sense? */ 1050 if (data_extents > nblocks) 1051 zap_datafork = true; 1052 if (attr_extents > nblocks) 1053 zap_attrfork = true; 1054 if (data_extents + attr_extents > nblocks) 1055 zap_datafork = zap_attrfork = true; 1056 1057 if (!zap_datafork) 1058 zap_datafork = xrep_dinode_check_dfork(sc, dip, mode); 1059 if (!zap_attrfork) 1060 zap_attrfork = xrep_dinode_check_afork(sc, dip); 1061 1062 /* Zap whatever's bad. */ 1063 if (zap_attrfork) 1064 xrep_dinode_zap_afork(ri, dip, mode); 1065 if (zap_datafork) 1066 xrep_dinode_zap_dfork(ri, dip, mode); 1067 xrep_dinode_ensure_forkoff(ri, dip, mode); 1068 1069 /* 1070 * Zero di_nblocks if we don't have any extents at all to satisfy the 1071 * buffer verifier. 1072 */ 1073 data_extents = xfs_dfork_data_extents(dip); 1074 attr_extents = xfs_dfork_attr_extents(dip); 1075 if (data_extents + attr_extents == 0) 1076 dip->di_nblocks = 0; 1077 } 1078 1079 /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */ 1080 STATIC int 1081 xrep_dinode_core( 1082 struct xrep_inode *ri) 1083 { 1084 struct xfs_scrub *sc = ri->sc; 1085 struct xfs_buf *bp; 1086 struct xfs_dinode *dip; 1087 xfs_ino_t ino = sc->sm->sm_ino; 1088 int error; 1089 int iget_error; 1090 1091 /* Figure out what this inode had mapped in both forks. */ 1092 error = xrep_dinode_count_rmaps(ri); 1093 if (error) 1094 return error; 1095 1096 /* Read the inode cluster buffer. */ 1097 error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp, 1098 ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp, 1099 NULL); 1100 if (error) 1101 return error; 1102 1103 /* Make sure we can pass the inode buffer verifier. */ 1104 xrep_dinode_buf(sc, bp); 1105 bp->b_ops = &xfs_inode_buf_ops; 1106 1107 /* Fix everything the verifier will complain about. */ 1108 dip = xfs_buf_offset(bp, ri->imap.im_boffset); 1109 xrep_dinode_header(sc, dip); 1110 xrep_dinode_mode(ri, dip); 1111 xrep_dinode_flags(sc, dip, ri->rt_extents > 0); 1112 xrep_dinode_size(ri, dip); 1113 xrep_dinode_extsize_hints(sc, dip); 1114 xrep_dinode_zap_forks(ri, dip); 1115 1116 /* Write out the inode. */ 1117 trace_xrep_dinode_fixed(sc, dip); 1118 xfs_dinode_calc_crc(sc->mp, dip); 1119 xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF); 1120 xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset, 1121 ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1); 1122 1123 /* 1124 * In theory, we've fixed the ondisk inode record enough that we should 1125 * be able to load the inode into the cache. Try to iget that inode 1126 * now while we hold the AGI and the inode cluster buffer and take the 1127 * IOLOCK so that we can continue with repairs without anyone else 1128 * accessing the inode. If iget fails, we still need to commit the 1129 * changes. 1130 */ 1131 iget_error = xchk_iget(sc, ino, &sc->ip); 1132 if (!iget_error) 1133 xchk_ilock(sc, XFS_IOLOCK_EXCL); 1134 1135 /* 1136 * Commit the inode cluster buffer updates and drop the AGI buffer that 1137 * we've been holding since scrub setup. From here on out, repairs 1138 * deal only with the cached inode. 1139 */ 1140 error = xrep_trans_commit(sc); 1141 if (error) 1142 return error; 1143 1144 if (iget_error) 1145 return iget_error; 1146 1147 error = xchk_trans_alloc(sc, 0); 1148 if (error) 1149 return error; 1150 1151 error = xrep_ino_dqattach(sc); 1152 if (error) 1153 return error; 1154 1155 xchk_ilock(sc, XFS_ILOCK_EXCL); 1156 if (ri->ino_sick_mask) 1157 xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask); 1158 return 0; 1159 } 1160 1161 /* Fix everything xfs_dinode_verify cares about. */ 1162 STATIC int 1163 xrep_dinode_problems( 1164 struct xrep_inode *ri) 1165 { 1166 struct xfs_scrub *sc = ri->sc; 1167 int error; 1168 1169 error = xrep_dinode_core(ri); 1170 if (error) 1171 return error; 1172 1173 /* We had to fix a totally busted inode, schedule quotacheck. */ 1174 if (XFS_IS_UQUOTA_ON(sc->mp)) 1175 xrep_force_quotacheck(sc, XFS_DQTYPE_USER); 1176 if (XFS_IS_GQUOTA_ON(sc->mp)) 1177 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP); 1178 if (XFS_IS_PQUOTA_ON(sc->mp)) 1179 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ); 1180 1181 return 0; 1182 } 1183 1184 /* 1185 * Fix problems that the verifiers don't care about. In general these are 1186 * errors that don't cause problems elsewhere in the kernel that we can easily 1187 * detect, so we don't check them all that rigorously. 1188 */ 1189 1190 /* Make sure block and extent counts are ok. */ 1191 STATIC int 1192 xrep_inode_blockcounts( 1193 struct xfs_scrub *sc) 1194 { 1195 struct xfs_ifork *ifp; 1196 xfs_filblks_t count; 1197 xfs_filblks_t acount; 1198 xfs_extnum_t nextents; 1199 int error; 1200 1201 trace_xrep_inode_blockcounts(sc); 1202 1203 /* Set data fork counters from the data fork mappings. */ 1204 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_DATA_FORK, 1205 &nextents, &count); 1206 if (error) 1207 return error; 1208 if (xfs_is_reflink_inode(sc->ip)) { 1209 /* 1210 * data fork blockcount can exceed physical storage if a user 1211 * reflinks the same block over and over again. 1212 */ 1213 ; 1214 } else if (XFS_IS_REALTIME_INODE(sc->ip)) { 1215 if (count >= sc->mp->m_sb.sb_rblocks) 1216 return -EFSCORRUPTED; 1217 } else { 1218 if (count >= sc->mp->m_sb.sb_dblocks) 1219 return -EFSCORRUPTED; 1220 } 1221 error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents); 1222 if (error) 1223 return error; 1224 sc->ip->i_df.if_nextents = nextents; 1225 1226 /* Set attr fork counters from the attr fork mappings. */ 1227 ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK); 1228 if (ifp) { 1229 error = xfs_bmap_count_blocks(sc->tp, sc->ip, XFS_ATTR_FORK, 1230 &nextents, &acount); 1231 if (error) 1232 return error; 1233 if (count >= sc->mp->m_sb.sb_dblocks) 1234 return -EFSCORRUPTED; 1235 error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK, 1236 nextents); 1237 if (error) 1238 return error; 1239 ifp->if_nextents = nextents; 1240 } else { 1241 acount = 0; 1242 } 1243 1244 sc->ip->i_nblocks = count + acount; 1245 return 0; 1246 } 1247 1248 /* Check for invalid uid/gid/prid. */ 1249 STATIC void 1250 xrep_inode_ids( 1251 struct xfs_scrub *sc) 1252 { 1253 bool dirty = false; 1254 1255 trace_xrep_inode_ids(sc); 1256 1257 if (!uid_valid(VFS_I(sc->ip)->i_uid)) { 1258 i_uid_write(VFS_I(sc->ip), 0); 1259 dirty = true; 1260 if (XFS_IS_UQUOTA_ON(sc->mp)) 1261 xrep_force_quotacheck(sc, XFS_DQTYPE_USER); 1262 } 1263 1264 if (!gid_valid(VFS_I(sc->ip)->i_gid)) { 1265 i_gid_write(VFS_I(sc->ip), 0); 1266 dirty = true; 1267 if (XFS_IS_GQUOTA_ON(sc->mp)) 1268 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP); 1269 } 1270 1271 if (sc->ip->i_projid == -1U) { 1272 sc->ip->i_projid = 0; 1273 dirty = true; 1274 if (XFS_IS_PQUOTA_ON(sc->mp)) 1275 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ); 1276 } 1277 1278 /* strip setuid/setgid if we touched any of the ids */ 1279 if (dirty) 1280 VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID); 1281 } 1282 1283 static inline void 1284 xrep_clamp_timestamp( 1285 struct xfs_inode *ip, 1286 struct timespec64 *ts) 1287 { 1288 ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC); 1289 *ts = timestamp_truncate(*ts, VFS_I(ip)); 1290 } 1291 1292 /* Nanosecond counters can't have more than 1 billion. */ 1293 STATIC void 1294 xrep_inode_timestamps( 1295 struct xfs_inode *ip) 1296 { 1297 struct timespec64 tstamp; 1298 struct inode *inode = VFS_I(ip); 1299 1300 tstamp = inode_get_atime(inode); 1301 xrep_clamp_timestamp(ip, &tstamp); 1302 inode_set_atime_to_ts(inode, tstamp); 1303 1304 tstamp = inode_get_mtime(inode); 1305 xrep_clamp_timestamp(ip, &tstamp); 1306 inode_set_mtime_to_ts(inode, tstamp); 1307 1308 tstamp = inode_get_ctime(inode); 1309 xrep_clamp_timestamp(ip, &tstamp); 1310 inode_set_ctime_to_ts(inode, tstamp); 1311 1312 xrep_clamp_timestamp(ip, &ip->i_crtime); 1313 } 1314 1315 /* Fix inode flags that don't make sense together. */ 1316 STATIC void 1317 xrep_inode_flags( 1318 struct xfs_scrub *sc) 1319 { 1320 uint16_t mode; 1321 1322 trace_xrep_inode_flags(sc); 1323 1324 mode = VFS_I(sc->ip)->i_mode; 1325 1326 /* Clear junk flags */ 1327 if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY) 1328 sc->ip->i_diflags &= ~XFS_DIFLAG_ANY; 1329 1330 /* NEWRTBM only applies to realtime bitmaps */ 1331 if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino) 1332 sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM; 1333 else 1334 sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM; 1335 1336 /* These only make sense for directories. */ 1337 if (!S_ISDIR(mode)) 1338 sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT | 1339 XFS_DIFLAG_EXTSZINHERIT | 1340 XFS_DIFLAG_PROJINHERIT | 1341 XFS_DIFLAG_NOSYMLINKS); 1342 1343 /* These only make sense for files. */ 1344 if (!S_ISREG(mode)) 1345 sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME | 1346 XFS_DIFLAG_EXTSIZE); 1347 1348 /* These only make sense for non-rt files. */ 1349 if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) 1350 sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM; 1351 1352 /* Immutable and append only? Drop the append. */ 1353 if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) && 1354 (sc->ip->i_diflags & XFS_DIFLAG_APPEND)) 1355 sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND; 1356 1357 /* Clear junk flags. */ 1358 if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY) 1359 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY; 1360 1361 /* No reflink flag unless we support it and it's a file. */ 1362 if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode)) 1363 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; 1364 1365 /* DAX only applies to files and dirs. */ 1366 if (!(S_ISREG(mode) || S_ISDIR(mode))) 1367 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX; 1368 1369 /* No reflink files on the realtime device. */ 1370 if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) 1371 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; 1372 } 1373 1374 /* 1375 * Fix size problems with block/node format directories. If we fail to find 1376 * the extent list, just bail out and let the bmapbtd repair functions clean 1377 * up that mess. 1378 */ 1379 STATIC void 1380 xrep_inode_blockdir_size( 1381 struct xfs_scrub *sc) 1382 { 1383 struct xfs_iext_cursor icur; 1384 struct xfs_bmbt_irec got; 1385 struct xfs_ifork *ifp; 1386 xfs_fileoff_t off; 1387 int error; 1388 1389 trace_xrep_inode_blockdir_size(sc); 1390 1391 error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK); 1392 if (error) 1393 return; 1394 1395 /* Find the last block before 32G; this is the dir size. */ 1396 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); 1397 off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE); 1398 if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) { 1399 /* zero-extents directory? */ 1400 return; 1401 } 1402 1403 off = got.br_startoff + got.br_blockcount; 1404 sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE, 1405 XFS_FSB_TO_B(sc->mp, off)); 1406 } 1407 1408 /* Fix size problems with short format directories. */ 1409 STATIC void 1410 xrep_inode_sfdir_size( 1411 struct xfs_scrub *sc) 1412 { 1413 struct xfs_ifork *ifp; 1414 1415 trace_xrep_inode_sfdir_size(sc); 1416 1417 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); 1418 sc->ip->i_disk_size = ifp->if_bytes; 1419 } 1420 1421 /* 1422 * Fix any irregularities in a directory inode's size now that we can iterate 1423 * extent maps and access other regular inode data. 1424 */ 1425 STATIC void 1426 xrep_inode_dir_size( 1427 struct xfs_scrub *sc) 1428 { 1429 trace_xrep_inode_dir_size(sc); 1430 1431 switch (sc->ip->i_df.if_format) { 1432 case XFS_DINODE_FMT_EXTENTS: 1433 case XFS_DINODE_FMT_BTREE: 1434 xrep_inode_blockdir_size(sc); 1435 break; 1436 case XFS_DINODE_FMT_LOCAL: 1437 xrep_inode_sfdir_size(sc); 1438 break; 1439 } 1440 } 1441 1442 /* Fix extent size hint problems. */ 1443 STATIC void 1444 xrep_inode_extsize( 1445 struct xfs_scrub *sc) 1446 { 1447 /* Fix misaligned extent size hints on a directory. */ 1448 if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) && 1449 (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && 1450 xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) { 1451 sc->ip->i_extsize = 0; 1452 sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT; 1453 } 1454 } 1455 1456 /* Fix any irregularities in an inode that the verifiers don't catch. */ 1457 STATIC int 1458 xrep_inode_problems( 1459 struct xfs_scrub *sc) 1460 { 1461 int error; 1462 1463 error = xrep_inode_blockcounts(sc); 1464 if (error) 1465 return error; 1466 xrep_inode_timestamps(sc->ip); 1467 xrep_inode_flags(sc); 1468 xrep_inode_ids(sc); 1469 /* 1470 * We can now do a better job fixing the size of a directory now that 1471 * we can scan the data fork extents than we could in xrep_dinode_size. 1472 */ 1473 if (S_ISDIR(VFS_I(sc->ip)->i_mode)) 1474 xrep_inode_dir_size(sc); 1475 xrep_inode_extsize(sc); 1476 1477 trace_xrep_inode_fixed(sc); 1478 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); 1479 return xrep_roll_trans(sc); 1480 } 1481 1482 /* Repair an inode's fields. */ 1483 int 1484 xrep_inode( 1485 struct xfs_scrub *sc) 1486 { 1487 int error = 0; 1488 1489 /* 1490 * No inode? That means we failed the _iget verifiers. Repair all 1491 * the things that the inode verifiers care about, then retry _iget. 1492 */ 1493 if (!sc->ip) { 1494 struct xrep_inode *ri = sc->buf; 1495 1496 ASSERT(ri != NULL); 1497 1498 error = xrep_dinode_problems(ri); 1499 if (error) 1500 return error; 1501 1502 /* By this point we had better have a working incore inode. */ 1503 if (!sc->ip) 1504 return -EFSCORRUPTED; 1505 } 1506 1507 xfs_trans_ijoin(sc->tp, sc->ip, 0); 1508 1509 /* If we found corruption of any kind, try to fix it. */ 1510 if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) || 1511 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) { 1512 error = xrep_inode_problems(sc); 1513 if (error) 1514 return error; 1515 } 1516 1517 /* See if we can clear the reflink flag. */ 1518 if (xfs_is_reflink_inode(sc->ip)) { 1519 error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp); 1520 if (error) 1521 return error; 1522 } 1523 1524 return xrep_defer_finish(sc); 1525 } 1526