1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_bit.h" 15 #include "xfs_log_format.h" 16 #include "xfs_trans.h" 17 #include "xfs_sb.h" 18 #include "xfs_inode.h" 19 #include "xfs_icache.h" 20 #include "xfs_inode_buf.h" 21 #include "xfs_inode_fork.h" 22 #include "xfs_ialloc.h" 23 #include "xfs_da_format.h" 24 #include "xfs_reflink.h" 25 #include "xfs_alloc.h" 26 #include "xfs_rmap.h" 27 #include "xfs_rmap_btree.h" 28 #include "xfs_bmap.h" 29 #include "xfs_bmap_btree.h" 30 #include "xfs_bmap_util.h" 31 #include "xfs_dir2.h" 32 #include "xfs_dir2_priv.h" 33 #include "xfs_quota_defs.h" 34 #include "xfs_quota.h" 35 #include "xfs_ag.h" 36 #include "xfs_rtbitmap.h" 37 #include "xfs_attr_leaf.h" 38 #include "xfs_log_priv.h" 39 #include "xfs_health.h" 40 #include "xfs_symlink_remote.h" 41 #include "xfs_rtgroup.h" 42 #include "xfs_rtrmap_btree.h" 43 #include "xfs_rtrefcount_btree.h" 44 #include "scrub/xfs_scrub.h" 45 #include "scrub/scrub.h" 46 #include "scrub/common.h" 47 #include "scrub/btree.h" 48 #include "scrub/trace.h" 49 #include "scrub/repair.h" 50 #include "scrub/iscan.h" 51 #include "scrub/readdir.h" 52 #include "scrub/tempfile.h" 53 54 /* 55 * Inode Record Repair 56 * =================== 57 * 58 * Roughly speaking, inode problems can be classified based on whether or not 59 * they trip the dinode verifiers. If those trip, then we won't be able to 60 * xfs_iget ourselves the inode. 61 * 62 * Therefore, the xrep_dinode_* functions fix anything that will cause the 63 * inode buffer verifier or the dinode verifier. The xrep_inode_* functions 64 * fix things on live incore inodes. The inode repair functions make decisions 65 * with security and usability implications when reviving a file: 66 * 67 * - Files with zero di_mode or a garbage di_mode are converted to regular file 68 * that only root can read. This file may not actually contain user data, 69 * if the file was not previously a regular file. Setuid and setgid bits 70 * are cleared. 71 * 72 * - Zero-size directories can be truncated to look empty. It is necessary to 73 * run the bmapbtd and directory repair functions to fully rebuild the 74 * directory. 75 * 76 * - Zero-size symbolic link targets can be truncated to '?'. It is necessary 77 * to run the bmapbtd and symlink repair functions to salvage the symlink. 78 * 79 * - Invalid extent size hints will be removed. 80 * 81 * - Quotacheck will be scheduled if we repaired an inode that was so badly 82 * damaged that the ondisk inode had to be rebuilt. 83 * 84 * - Invalid user, group, or project IDs (aka -1U) will be reset to zero. 85 * Setuid and setgid bits are cleared. 86 * 87 * - Data and attr forks are reset to extents format with zero extents if the 88 * fork data is inconsistent. It is necessary to run the bmapbtd or bmapbta 89 * repair functions to recover the space mapping. 90 * 91 * - ACLs will not be recovered if the attr fork is zapped or the extended 92 * attribute structure itself requires salvaging. 93 * 94 * - If the attr fork is zapped, the user and group ids are reset to root and 95 * the setuid and setgid bits are removed. 96 */ 97 98 /* 99 * All the information we need to repair the ondisk inode if we can't iget the 100 * incore inode. We don't allocate this buffer unless we're going to perform 101 * a repair to the ondisk inode cluster buffer. 102 */ 103 struct xrep_inode { 104 /* Inode mapping that we saved from the initial lookup attempt. */ 105 struct xfs_imap imap; 106 107 struct xfs_scrub *sc; 108 109 /* Blocks in use on the data device by data extents or bmbt blocks. */ 110 xfs_rfsblock_t data_blocks; 111 112 /* Blocks in use on the rt device. */ 113 xfs_rfsblock_t rt_blocks; 114 115 /* Blocks in use by the attr fork. */ 116 xfs_rfsblock_t attr_blocks; 117 118 /* Number of data device extents for the data fork. */ 119 xfs_extnum_t data_extents; 120 121 /* 122 * Number of realtime device extents for the data fork. If 123 * data_extents and rt_extents indicate that the data fork has extents 124 * on both devices, we'll just back away slowly. 125 */ 126 xfs_extnum_t rt_extents; 127 128 /* Number of (data device) extents for the attr fork. */ 129 xfs_aextnum_t attr_extents; 130 131 /* Sick state to set after zapping parts of the inode. */ 132 unsigned int ino_sick_mask; 133 134 /* Must we remove all access from this file? */ 135 bool zap_acls; 136 137 /* Inode scanner to see if we can find the ftype from dirents */ 138 struct xchk_iscan ftype_iscan; 139 uint8_t alleged_ftype; 140 }; 141 142 /* 143 * Setup function for inode repair. @imap contains the ondisk inode mapping 144 * information so that we can correct the ondisk inode cluster buffer if 145 * necessary to make iget work. 146 */ 147 int 148 xrep_setup_inode( 149 struct xfs_scrub *sc, 150 const struct xfs_imap *imap) 151 { 152 struct xrep_inode *ri; 153 154 sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS); 155 if (!sc->buf) 156 return -ENOMEM; 157 158 ri = sc->buf; 159 memcpy(&ri->imap, imap, sizeof(struct xfs_imap)); 160 ri->sc = sc; 161 return 0; 162 } 163 164 /* 165 * Make sure this ondisk inode can pass the inode buffer verifier. This is 166 * not the same as the dinode verifier. 167 */ 168 STATIC void 169 xrep_dinode_buf_core( 170 struct xfs_scrub *sc, 171 struct xfs_buf *bp, 172 unsigned int ioffset) 173 { 174 struct xfs_dinode *dip = xfs_buf_offset(bp, ioffset); 175 struct xfs_trans *tp = sc->tp; 176 struct xfs_mount *mp = sc->mp; 177 xfs_agino_t agino; 178 bool crc_ok = false; 179 bool magic_ok = false; 180 bool unlinked_ok = false; 181 182 agino = be32_to_cpu(dip->di_next_unlinked); 183 184 if (xfs_verify_agino_or_null(bp->b_pag, agino)) 185 unlinked_ok = true; 186 187 if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && 188 xfs_dinode_good_version(mp, dip->di_version)) 189 magic_ok = true; 190 191 if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 192 XFS_DINODE_CRC_OFF)) 193 crc_ok = true; 194 195 if (magic_ok && unlinked_ok && crc_ok) 196 return; 197 198 if (!magic_ok) { 199 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 200 dip->di_version = 3; 201 } 202 if (!unlinked_ok) 203 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 204 xfs_dinode_calc_crc(mp, dip); 205 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF); 206 xfs_trans_log_buf(tp, bp, ioffset, 207 ioffset + sizeof(struct xfs_dinode) - 1); 208 } 209 210 /* Make sure this inode cluster buffer can pass the inode buffer verifier. */ 211 STATIC void 212 xrep_dinode_buf( 213 struct xfs_scrub *sc, 214 struct xfs_buf *bp) 215 { 216 struct xfs_mount *mp = sc->mp; 217 int i; 218 int ni; 219 220 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 221 for (i = 0; i < ni; i++) 222 xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog); 223 } 224 225 /* Reinitialize things that never change in an inode. */ 226 STATIC void 227 xrep_dinode_header( 228 struct xfs_scrub *sc, 229 struct xfs_dinode *dip) 230 { 231 trace_xrep_dinode_header(sc, dip); 232 233 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 234 if (!xfs_dinode_good_version(sc->mp, dip->di_version)) 235 dip->di_version = 3; 236 dip->di_ino = cpu_to_be64(sc->sm->sm_ino); 237 uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid); 238 dip->di_gen = cpu_to_be32(sc->sm->sm_gen); 239 } 240 241 /* 242 * If this directory entry points to the scrub target inode, then the directory 243 * we're scanning is the parent of the scrub target inode. 244 */ 245 STATIC int 246 xrep_dinode_findmode_dirent( 247 struct xfs_scrub *sc, 248 struct xfs_inode *dp, 249 xfs_dir2_dataptr_t dapos, 250 const struct xfs_name *name, 251 xfs_ino_t ino, 252 void *priv) 253 { 254 struct xrep_inode *ri = priv; 255 int error = 0; 256 257 if (xchk_should_terminate(ri->sc, &error)) 258 return error; 259 260 if (ino != sc->sm->sm_ino) 261 return 0; 262 263 /* Ignore garbage directory entry names. */ 264 if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) 265 return -EFSCORRUPTED; 266 267 /* Don't pick up dot or dotdot entries; we only want child dirents. */ 268 if (xfs_dir2_samename(name, &xfs_name_dotdot) || 269 xfs_dir2_samename(name, &xfs_name_dot)) 270 return 0; 271 272 /* 273 * Uhoh, more than one parent for this inode and they don't agree on 274 * the file type? 275 */ 276 if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN && 277 ri->alleged_ftype != name->type) { 278 trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type, 279 ri->alleged_ftype); 280 return -EFSCORRUPTED; 281 } 282 283 /* We found a potential parent; remember the ftype. */ 284 trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type); 285 ri->alleged_ftype = name->type; 286 return 0; 287 } 288 289 /* Try to lock a directory, or wait a jiffy. */ 290 static inline int 291 xrep_dinode_ilock_nowait( 292 struct xfs_inode *dp, 293 unsigned int lock_mode) 294 { 295 if (xfs_ilock_nowait(dp, lock_mode)) 296 return true; 297 298 schedule_timeout_killable(1); 299 return false; 300 } 301 302 /* 303 * Try to lock a directory to look for ftype hints. Since we already hold the 304 * AGI buffer, we cannot block waiting for the ILOCK because rename can take 305 * the ILOCK and then try to lock AGIs. 306 */ 307 STATIC int 308 xrep_dinode_trylock_directory( 309 struct xrep_inode *ri, 310 struct xfs_inode *dp, 311 unsigned int *lock_modep) 312 { 313 unsigned long deadline = jiffies + msecs_to_jiffies(30000); 314 unsigned int lock_mode; 315 int error = 0; 316 317 do { 318 if (xchk_should_terminate(ri->sc, &error)) 319 return error; 320 321 if (xfs_need_iread_extents(&dp->i_df)) 322 lock_mode = XFS_ILOCK_EXCL; 323 else 324 lock_mode = XFS_ILOCK_SHARED; 325 326 if (xrep_dinode_ilock_nowait(dp, lock_mode)) { 327 *lock_modep = lock_mode; 328 return 0; 329 } 330 } while (!time_is_before_jiffies(deadline)); 331 return -EBUSY; 332 } 333 334 /* 335 * If this is a directory, walk the dirents looking for any that point to the 336 * scrub target inode. 337 */ 338 STATIC int 339 xrep_dinode_findmode_walk_directory( 340 struct xrep_inode *ri, 341 struct xfs_inode *dp) 342 { 343 struct xfs_scrub *sc = ri->sc; 344 unsigned int lock_mode; 345 int error = 0; 346 347 /* Ignore temporary repair directories. */ 348 if (xrep_is_tempfile(dp)) 349 return 0; 350 351 /* 352 * Scan the directory to see if there it contains an entry pointing to 353 * the directory that we are repairing. 354 */ 355 error = xrep_dinode_trylock_directory(ri, dp, &lock_mode); 356 if (error) 357 return error; 358 359 /* 360 * If this directory is known to be sick, we cannot scan it reliably 361 * and must abort. 362 */ 363 if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE | 364 XFS_SICK_INO_BMBTD | 365 XFS_SICK_INO_DIR)) { 366 error = -EFSCORRUPTED; 367 goto out_unlock; 368 } 369 370 /* 371 * We cannot complete our parent pointer scan if a directory looks as 372 * though it has been zapped by the inode record repair code. 373 */ 374 if (xchk_dir_looks_zapped(dp)) { 375 error = -EBUSY; 376 goto out_unlock; 377 } 378 379 error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri); 380 if (error) 381 goto out_unlock; 382 383 out_unlock: 384 xfs_iunlock(dp, lock_mode); 385 return error; 386 } 387 388 /* 389 * Try to find the mode of the inode being repaired by looking for directories 390 * that point down to this file. 391 */ 392 STATIC int 393 xrep_dinode_find_mode( 394 struct xrep_inode *ri, 395 uint16_t *mode) 396 { 397 struct xfs_scrub *sc = ri->sc; 398 struct xfs_inode *dp; 399 int error; 400 401 /* No ftype means we have no other metadata to consult. */ 402 if (!xfs_has_ftype(sc->mp)) { 403 *mode = S_IFREG; 404 return 0; 405 } 406 407 /* 408 * Scan all directories for parents that might point down to this 409 * inode. Skip the inode being repaired during the scan since it 410 * cannot be its own parent. Note that we still hold the AGI locked 411 * so there's a real possibility that _iscan_iter can return EBUSY. 412 */ 413 xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan); 414 xchk_iscan_set_agi_trylock(&ri->ftype_iscan); 415 ri->ftype_iscan.skip_ino = sc->sm->sm_ino; 416 ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN; 417 while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) { 418 if (S_ISDIR(VFS_I(dp)->i_mode)) 419 error = xrep_dinode_findmode_walk_directory(ri, dp); 420 xchk_iscan_mark_visited(&ri->ftype_iscan, dp); 421 xchk_irele(sc, dp); 422 if (error < 0) 423 break; 424 if (xchk_should_terminate(sc, &error)) 425 break; 426 } 427 xchk_iscan_iter_finish(&ri->ftype_iscan); 428 xchk_iscan_teardown(&ri->ftype_iscan); 429 430 if (error == -EBUSY) { 431 if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) { 432 /* 433 * If we got an EBUSY after finding at least one 434 * dirent, that means the scan found an inode on the 435 * inactivation list and could not open it. Accept the 436 * alleged ftype and install a new mode below. 437 */ 438 error = 0; 439 } else if (!(sc->flags & XCHK_TRY_HARDER)) { 440 /* 441 * Otherwise, retry the operation one time to see if 442 * the reason for the delay is an inode from the same 443 * cluster buffer waiting on the inactivation list. 444 */ 445 error = -EDEADLOCK; 446 } 447 } 448 if (error) 449 return error; 450 451 /* 452 * Convert the discovered ftype into the file mode. If all else fails, 453 * return S_IFREG. 454 */ 455 switch (ri->alleged_ftype) { 456 case XFS_DIR3_FT_DIR: 457 *mode = S_IFDIR; 458 break; 459 case XFS_DIR3_FT_WHT: 460 case XFS_DIR3_FT_CHRDEV: 461 *mode = S_IFCHR; 462 break; 463 case XFS_DIR3_FT_BLKDEV: 464 *mode = S_IFBLK; 465 break; 466 case XFS_DIR3_FT_FIFO: 467 *mode = S_IFIFO; 468 break; 469 case XFS_DIR3_FT_SOCK: 470 *mode = S_IFSOCK; 471 break; 472 case XFS_DIR3_FT_SYMLINK: 473 *mode = S_IFLNK; 474 break; 475 default: 476 *mode = S_IFREG; 477 break; 478 } 479 return 0; 480 } 481 482 /* Turn di_mode into /something/ recognizable. Returns true if we succeed. */ 483 STATIC int 484 xrep_dinode_mode( 485 struct xrep_inode *ri, 486 struct xfs_dinode *dip) 487 { 488 struct xfs_scrub *sc = ri->sc; 489 uint16_t mode = be16_to_cpu(dip->di_mode); 490 int error; 491 492 trace_xrep_dinode_mode(sc, dip); 493 494 if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN) 495 return 0; 496 497 /* Try to fix the mode. If we cannot, then leave everything alone. */ 498 error = xrep_dinode_find_mode(ri, &mode); 499 switch (error) { 500 case -EINTR: 501 case -EBUSY: 502 case -EDEADLOCK: 503 /* temporary failure or fatal signal */ 504 return error; 505 case 0: 506 /* found mode */ 507 break; 508 default: 509 /* some other error, assume S_IFREG */ 510 mode = S_IFREG; 511 break; 512 } 513 514 /* bad mode, so we set it to a file that only root can read */ 515 dip->di_mode = cpu_to_be16(mode); 516 dip->di_uid = 0; 517 dip->di_gid = 0; 518 ri->zap_acls = true; 519 return 0; 520 } 521 522 /* Fix unused link count fields having nonzero values. */ 523 STATIC void 524 xrep_dinode_nlinks( 525 struct xfs_dinode *dip) 526 { 527 if (dip->di_version < 2) { 528 dip->di_nlink = 0; 529 return; 530 } 531 532 if (xfs_dinode_is_metadir(dip)) { 533 if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) 534 dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN); 535 } else { 536 dip->di_metatype = 0; 537 } 538 } 539 540 /* Fix any conflicting flags that the verifiers complain about. */ 541 STATIC void 542 xrep_dinode_flags( 543 struct xfs_scrub *sc, 544 struct xfs_dinode *dip, 545 bool isrt) 546 { 547 struct xfs_mount *mp = sc->mp; 548 uint64_t flags2 = be64_to_cpu(dip->di_flags2); 549 uint16_t flags = be16_to_cpu(dip->di_flags); 550 uint16_t mode = be16_to_cpu(dip->di_mode); 551 552 trace_xrep_dinode_flags(sc, dip); 553 554 if (isrt) 555 flags |= XFS_DIFLAG_REALTIME; 556 else 557 flags &= ~XFS_DIFLAG_REALTIME; 558 559 /* 560 * For regular files on a reflink filesystem, set the REFLINK flag to 561 * protect shared extents. A later stage will actually check those 562 * extents and clear the flag if possible. 563 */ 564 if (xfs_has_reflink(mp) && S_ISREG(mode)) 565 flags2 |= XFS_DIFLAG2_REFLINK; 566 else 567 flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE); 568 if (!xfs_has_bigtime(mp)) 569 flags2 &= ~XFS_DIFLAG2_BIGTIME; 570 if (!xfs_has_large_extent_counts(mp)) 571 flags2 &= ~XFS_DIFLAG2_NREXT64; 572 if (flags2 & XFS_DIFLAG2_NREXT64) 573 dip->di_nrext64_pad = 0; 574 else if (dip->di_version >= 3) 575 dip->di_v3_pad = 0; 576 577 if (flags2 & XFS_DIFLAG2_METADATA) { 578 xfs_failaddr_t fa; 579 580 fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags, 581 flags2); 582 if (fa) 583 flags2 &= ~XFS_DIFLAG2_METADATA; 584 } 585 586 dip->di_flags = cpu_to_be16(flags); 587 dip->di_flags2 = cpu_to_be64(flags2); 588 } 589 590 /* 591 * Blow out symlink; now it points nowhere. We don't have to worry about 592 * incore state because this inode is failing the verifiers. 593 */ 594 STATIC void 595 xrep_dinode_zap_symlink( 596 struct xrep_inode *ri, 597 struct xfs_dinode *dip) 598 { 599 struct xfs_scrub *sc = ri->sc; 600 char *p; 601 602 trace_xrep_dinode_zap_symlink(sc, dip); 603 604 dip->di_format = XFS_DINODE_FMT_LOCAL; 605 dip->di_size = cpu_to_be64(1); 606 p = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 607 *p = '?'; 608 ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED; 609 } 610 611 /* 612 * Blow out dir, make the parent point to the root. In the future repair will 613 * reconstruct this directory for us. Note that there's no in-core directory 614 * inode because the sf verifier tripped, so we don't have to worry about the 615 * dentry cache. 616 */ 617 STATIC void 618 xrep_dinode_zap_dir( 619 struct xrep_inode *ri, 620 struct xfs_dinode *dip) 621 { 622 struct xfs_scrub *sc = ri->sc; 623 struct xfs_mount *mp = sc->mp; 624 struct xfs_dir2_sf_hdr *sfp; 625 int i8count; 626 627 trace_xrep_dinode_zap_dir(sc, dip); 628 629 dip->di_format = XFS_DINODE_FMT_LOCAL; 630 i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM; 631 sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 632 sfp->count = 0; 633 sfp->i8count = i8count; 634 xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino); 635 dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count)); 636 ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED; 637 } 638 639 /* Make sure we don't have a garbage file size. */ 640 STATIC void 641 xrep_dinode_size( 642 struct xrep_inode *ri, 643 struct xfs_dinode *dip) 644 { 645 struct xfs_scrub *sc = ri->sc; 646 uint64_t size = be64_to_cpu(dip->di_size); 647 uint16_t mode = be16_to_cpu(dip->di_mode); 648 649 trace_xrep_dinode_size(sc, dip); 650 651 switch (mode & S_IFMT) { 652 case S_IFIFO: 653 case S_IFCHR: 654 case S_IFBLK: 655 case S_IFSOCK: 656 /* di_size can't be nonzero for special files */ 657 dip->di_size = 0; 658 break; 659 case S_IFREG: 660 /* Regular files can't be larger than 2^63-1 bytes. */ 661 dip->di_size = cpu_to_be64(size & ~(1ULL << 63)); 662 break; 663 case S_IFLNK: 664 /* 665 * Truncate ridiculously oversized symlinks. If the size is 666 * zero, reset it to point to the current directory. Both of 667 * these conditions trigger dinode verifier errors, so there 668 * is no in-core state to reset. 669 */ 670 if (size > XFS_SYMLINK_MAXLEN) 671 dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN); 672 else if (size == 0) 673 xrep_dinode_zap_symlink(ri, dip); 674 break; 675 case S_IFDIR: 676 /* 677 * Directories can't have a size larger than 32G. If the size 678 * is zero, reset it to an empty directory. Both of these 679 * conditions trigger dinode verifier errors, so there is no 680 * in-core state to reset. 681 */ 682 if (size > XFS_DIR2_SPACE_SIZE) 683 dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE); 684 else if (size == 0) 685 xrep_dinode_zap_dir(ri, dip); 686 break; 687 } 688 } 689 690 /* Fix extent size hints. */ 691 STATIC void 692 xrep_dinode_extsize_hints( 693 struct xfs_scrub *sc, 694 struct xfs_dinode *dip) 695 { 696 struct xfs_mount *mp = sc->mp; 697 uint64_t flags2 = be64_to_cpu(dip->di_flags2); 698 uint16_t flags = be16_to_cpu(dip->di_flags); 699 uint16_t mode = be16_to_cpu(dip->di_mode); 700 701 xfs_failaddr_t fa; 702 703 trace_xrep_dinode_extsize_hints(sc, dip); 704 705 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 706 mode, flags); 707 if (fa) { 708 dip->di_extsize = 0; 709 dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE | 710 XFS_DIFLAG_EXTSZINHERIT); 711 } 712 713 if (dip->di_version < 3) 714 return; 715 716 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 717 mode, flags, flags2); 718 if (fa) { 719 dip->di_cowextsize = 0; 720 dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE); 721 } 722 } 723 724 /* Count extents and blocks for an inode given an rmap. */ 725 STATIC int 726 xrep_dinode_walk_rmap( 727 struct xfs_btree_cur *cur, 728 const struct xfs_rmap_irec *rec, 729 void *priv) 730 { 731 struct xrep_inode *ri = priv; 732 int error = 0; 733 734 if (xchk_should_terminate(ri->sc, &error)) 735 return error; 736 737 /* We only care about this inode. */ 738 if (rec->rm_owner != ri->sc->sm->sm_ino) 739 return 0; 740 741 if (rec->rm_flags & XFS_RMAP_ATTR_FORK) { 742 ri->attr_blocks += rec->rm_blockcount; 743 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) 744 ri->attr_extents++; 745 746 return 0; 747 } 748 749 ri->data_blocks += rec->rm_blockcount; 750 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) 751 ri->data_extents++; 752 753 return 0; 754 } 755 756 /* Count extents and blocks for an inode from all AG rmap data. */ 757 STATIC int 758 xrep_dinode_count_ag_rmaps( 759 struct xrep_inode *ri, 760 struct xfs_perag *pag) 761 { 762 struct xfs_btree_cur *cur; 763 struct xfs_buf *agf; 764 int error; 765 766 error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf); 767 if (error) 768 return error; 769 770 cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag); 771 error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri); 772 xfs_btree_del_cursor(cur, error); 773 xfs_trans_brelse(ri->sc->tp, agf); 774 return error; 775 } 776 777 /* Count extents and blocks for an inode given an rt rmap. */ 778 STATIC int 779 xrep_dinode_walk_rtrmap( 780 struct xfs_btree_cur *cur, 781 const struct xfs_rmap_irec *rec, 782 void *priv) 783 { 784 struct xrep_inode *ri = priv; 785 int error = 0; 786 787 if (xchk_should_terminate(ri->sc, &error)) 788 return error; 789 790 /* We only care about this inode. */ 791 if (rec->rm_owner != ri->sc->sm->sm_ino) 792 return 0; 793 794 if (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) 795 return -EFSCORRUPTED; 796 797 ri->rt_blocks += rec->rm_blockcount; 798 ri->rt_extents++; 799 return 0; 800 } 801 802 /* Count extents and blocks for an inode from all realtime rmap data. */ 803 STATIC int 804 xrep_dinode_count_rtgroup_rmaps( 805 struct xrep_inode *ri, 806 struct xfs_rtgroup *rtg) 807 { 808 struct xfs_scrub *sc = ri->sc; 809 int error; 810 811 error = xrep_rtgroup_init(sc, rtg, &sc->sr, XFS_RTGLOCK_RMAP); 812 if (error) 813 return error; 814 815 error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_dinode_walk_rtrmap, 816 ri); 817 xchk_rtgroup_btcur_free(&sc->sr); 818 xchk_rtgroup_free(sc, &sc->sr); 819 return error; 820 } 821 822 /* Count extents and blocks for a given inode from all rmap data. */ 823 STATIC int 824 xrep_dinode_count_rmaps( 825 struct xrep_inode *ri) 826 { 827 struct xfs_perag *pag = NULL; 828 struct xfs_rtgroup *rtg = NULL; 829 int error; 830 831 if (!xfs_has_rmapbt(ri->sc->mp)) 832 return -EOPNOTSUPP; 833 834 while ((rtg = xfs_rtgroup_next(ri->sc->mp, rtg))) { 835 error = xrep_dinode_count_rtgroup_rmaps(ri, rtg); 836 if (error) { 837 xfs_rtgroup_rele(rtg); 838 return error; 839 } 840 } 841 842 while ((pag = xfs_perag_next(ri->sc->mp, pag))) { 843 error = xrep_dinode_count_ag_rmaps(ri, pag); 844 if (error) { 845 xfs_perag_rele(pag); 846 return error; 847 } 848 } 849 850 /* Can't have extents on both the rt and the data device. */ 851 if (ri->data_extents && ri->rt_extents) 852 return -EFSCORRUPTED; 853 854 trace_xrep_dinode_count_rmaps(ri->sc, 855 ri->data_blocks, ri->rt_blocks, ri->attr_blocks, 856 ri->data_extents, ri->rt_extents, ri->attr_extents); 857 return 0; 858 } 859 860 /* Return true if this extents-format ifork looks like garbage. */ 861 STATIC bool 862 xrep_dinode_bad_extents_fork( 863 struct xfs_scrub *sc, 864 struct xfs_dinode *dip, 865 unsigned int dfork_size, 866 int whichfork) 867 { 868 struct xfs_bmbt_irec new; 869 struct xfs_bmbt_rec *dp; 870 xfs_extnum_t nex; 871 bool isrt; 872 unsigned int i; 873 874 nex = xfs_dfork_nextents(dip, whichfork); 875 if (nex > dfork_size / sizeof(struct xfs_bmbt_rec)) 876 return true; 877 878 dp = XFS_DFORK_PTR(dip, whichfork); 879 880 isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME); 881 for (i = 0; i < nex; i++, dp++) { 882 xfs_failaddr_t fa; 883 884 xfs_bmbt_disk_get_all(dp, &new); 885 fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork, 886 &new); 887 if (fa) 888 return true; 889 } 890 891 return false; 892 } 893 894 /* Return true if this btree-format ifork looks like garbage. */ 895 STATIC bool 896 xrep_dinode_bad_bmbt_fork( 897 struct xfs_scrub *sc, 898 struct xfs_dinode *dip, 899 unsigned int dfork_size, 900 int whichfork) 901 { 902 struct xfs_bmdr_block *dfp; 903 xfs_extnum_t nex; 904 unsigned int i; 905 unsigned int dmxr; 906 unsigned int nrecs; 907 unsigned int level; 908 909 nex = xfs_dfork_nextents(dip, whichfork); 910 if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec)) 911 return true; 912 913 if (dfork_size < sizeof(struct xfs_bmdr_block)) 914 return true; 915 916 dfp = XFS_DFORK_PTR(dip, whichfork); 917 nrecs = be16_to_cpu(dfp->bb_numrecs); 918 level = be16_to_cpu(dfp->bb_level); 919 920 if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size) 921 return true; 922 if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork)) 923 return true; 924 925 dmxr = xfs_bmdr_maxrecs(dfork_size, 0); 926 for (i = 1; i <= nrecs; i++) { 927 struct xfs_bmbt_key *fkp; 928 xfs_bmbt_ptr_t *fpp; 929 xfs_fileoff_t fileoff; 930 xfs_fsblock_t fsbno; 931 932 fkp = xfs_bmdr_key_addr(dfp, i); 933 fileoff = be64_to_cpu(fkp->br_startoff); 934 if (!xfs_verify_fileoff(sc->mp, fileoff)) 935 return true; 936 937 fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr); 938 fsbno = be64_to_cpu(*fpp); 939 if (!xfs_verify_fsbno(sc->mp, fsbno)) 940 return true; 941 } 942 943 return false; 944 } 945 946 /* Return true if this rmap-format ifork looks like garbage. */ 947 STATIC bool 948 xrep_dinode_bad_rtrmapbt_fork( 949 struct xfs_scrub *sc, 950 struct xfs_dinode *dip, 951 unsigned int dfork_size) 952 { 953 struct xfs_rtrmap_root *dfp; 954 unsigned int nrecs; 955 unsigned int level; 956 957 if (dfork_size < sizeof(struct xfs_rtrmap_root)) 958 return true; 959 960 dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 961 nrecs = be16_to_cpu(dfp->bb_numrecs); 962 level = be16_to_cpu(dfp->bb_level); 963 964 if (level > sc->mp->m_rtrmap_maxlevels) 965 return true; 966 if (xfs_rtrmap_droot_space_calc(level, nrecs) > dfork_size) 967 return true; 968 if (level > 0 && nrecs == 0) 969 return true; 970 971 return false; 972 } 973 974 /* Return true if this refcount-format ifork looks like garbage. */ 975 STATIC bool 976 xrep_dinode_bad_rtrefcountbt_fork( 977 struct xfs_scrub *sc, 978 struct xfs_dinode *dip, 979 unsigned int dfork_size) 980 { 981 struct xfs_rtrefcount_root *dfp; 982 unsigned int nrecs; 983 unsigned int level; 984 985 if (dfork_size < sizeof(struct xfs_rtrefcount_root)) 986 return true; 987 988 dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 989 nrecs = be16_to_cpu(dfp->bb_numrecs); 990 level = be16_to_cpu(dfp->bb_level); 991 992 if (level > sc->mp->m_rtrefc_maxlevels) 993 return true; 994 if (xfs_rtrefcount_droot_space_calc(level, nrecs) > dfork_size) 995 return true; 996 if (level > 0 && nrecs == 0) 997 return true; 998 999 return false; 1000 } 1001 1002 /* Check a metadata-btree fork. */ 1003 STATIC bool 1004 xrep_dinode_bad_metabt_fork( 1005 struct xfs_scrub *sc, 1006 struct xfs_dinode *dip, 1007 unsigned int dfork_size, 1008 int whichfork) 1009 { 1010 if (whichfork != XFS_DATA_FORK) 1011 return true; 1012 1013 switch (be16_to_cpu(dip->di_metatype)) { 1014 case XFS_METAFILE_RTRMAP: 1015 return xrep_dinode_bad_rtrmapbt_fork(sc, dip, dfork_size); 1016 case XFS_METAFILE_RTREFCOUNT: 1017 return xrep_dinode_bad_rtrefcountbt_fork(sc, dip, dfork_size); 1018 default: 1019 return true; 1020 } 1021 1022 return false; 1023 } 1024 1025 /* 1026 * Check the data fork for things that will fail the ifork verifiers or the 1027 * ifork formatters. 1028 */ 1029 STATIC bool 1030 xrep_dinode_check_dfork( 1031 struct xfs_scrub *sc, 1032 struct xfs_dinode *dip, 1033 uint16_t mode) 1034 { 1035 void *dfork_ptr; 1036 int64_t data_size; 1037 unsigned int fmt; 1038 unsigned int dfork_size; 1039 1040 /* 1041 * Verifier functions take signed int64_t, so check for bogus negative 1042 * values first. 1043 */ 1044 data_size = be64_to_cpu(dip->di_size); 1045 if (data_size < 0) 1046 return true; 1047 1048 fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK); 1049 switch (mode & S_IFMT) { 1050 case S_IFIFO: 1051 case S_IFCHR: 1052 case S_IFBLK: 1053 case S_IFSOCK: 1054 if (fmt != XFS_DINODE_FMT_DEV) 1055 return true; 1056 break; 1057 case S_IFREG: 1058 if (fmt == XFS_DINODE_FMT_LOCAL) 1059 return true; 1060 fallthrough; 1061 case S_IFLNK: 1062 case S_IFDIR: 1063 switch (fmt) { 1064 case XFS_DINODE_FMT_LOCAL: 1065 case XFS_DINODE_FMT_EXTENTS: 1066 case XFS_DINODE_FMT_BTREE: 1067 break; 1068 default: 1069 return true; 1070 } 1071 break; 1072 default: 1073 return true; 1074 } 1075 1076 dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK); 1077 dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1078 1079 switch (fmt) { 1080 case XFS_DINODE_FMT_DEV: 1081 break; 1082 case XFS_DINODE_FMT_LOCAL: 1083 /* dir/symlink structure cannot be larger than the fork */ 1084 if (data_size > dfork_size) 1085 return true; 1086 /* directory structure must pass verification. */ 1087 if (S_ISDIR(mode) && 1088 xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL) 1089 return true; 1090 /* symlink structure must pass verification. */ 1091 if (S_ISLNK(mode) && 1092 xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL) 1093 return true; 1094 break; 1095 case XFS_DINODE_FMT_EXTENTS: 1096 if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size, 1097 XFS_DATA_FORK)) 1098 return true; 1099 break; 1100 case XFS_DINODE_FMT_BTREE: 1101 if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size, 1102 XFS_DATA_FORK)) 1103 return true; 1104 break; 1105 case XFS_DINODE_FMT_META_BTREE: 1106 if (xrep_dinode_bad_metabt_fork(sc, dip, dfork_size, 1107 XFS_DATA_FORK)) 1108 return true; 1109 break; 1110 default: 1111 return true; 1112 } 1113 1114 return false; 1115 } 1116 1117 static void 1118 xrep_dinode_set_data_nextents( 1119 struct xfs_dinode *dip, 1120 xfs_extnum_t nextents) 1121 { 1122 if (xfs_dinode_has_large_extent_counts(dip)) 1123 dip->di_big_nextents = cpu_to_be64(nextents); 1124 else 1125 dip->di_nextents = cpu_to_be32(nextents); 1126 } 1127 1128 static void 1129 xrep_dinode_set_attr_nextents( 1130 struct xfs_dinode *dip, 1131 xfs_extnum_t nextents) 1132 { 1133 if (xfs_dinode_has_large_extent_counts(dip)) 1134 dip->di_big_anextents = cpu_to_be32(nextents); 1135 else 1136 dip->di_anextents = cpu_to_be16(nextents); 1137 } 1138 1139 /* Reset the data fork to something sane. */ 1140 STATIC void 1141 xrep_dinode_zap_dfork( 1142 struct xrep_inode *ri, 1143 struct xfs_dinode *dip, 1144 uint16_t mode) 1145 { 1146 struct xfs_scrub *sc = ri->sc; 1147 1148 trace_xrep_dinode_zap_dfork(sc, dip); 1149 1150 ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED; 1151 1152 xrep_dinode_set_data_nextents(dip, 0); 1153 ri->data_blocks = 0; 1154 ri->rt_blocks = 0; 1155 1156 /* Special files always get reset to DEV */ 1157 switch (mode & S_IFMT) { 1158 case S_IFIFO: 1159 case S_IFCHR: 1160 case S_IFBLK: 1161 case S_IFSOCK: 1162 dip->di_format = XFS_DINODE_FMT_DEV; 1163 dip->di_size = 0; 1164 return; 1165 } 1166 1167 /* 1168 * If we have data extents, reset to an empty map and hope the user 1169 * will run the bmapbtd checker next. 1170 */ 1171 if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) { 1172 dip->di_format = XFS_DINODE_FMT_EXTENTS; 1173 return; 1174 } 1175 1176 /* Otherwise, reset the local format to the minimum. */ 1177 switch (mode & S_IFMT) { 1178 case S_IFLNK: 1179 xrep_dinode_zap_symlink(ri, dip); 1180 break; 1181 case S_IFDIR: 1182 xrep_dinode_zap_dir(ri, dip); 1183 break; 1184 } 1185 } 1186 1187 /* 1188 * Check the attr fork for things that will fail the ifork verifiers or the 1189 * ifork formatters. 1190 */ 1191 STATIC bool 1192 xrep_dinode_check_afork( 1193 struct xfs_scrub *sc, 1194 struct xfs_dinode *dip) 1195 { 1196 struct xfs_attr_sf_hdr *afork_ptr; 1197 size_t attr_size; 1198 unsigned int afork_size; 1199 1200 if (XFS_DFORK_BOFF(dip) == 0) 1201 return dip->di_aformat != XFS_DINODE_FMT_EXTENTS || 1202 xfs_dfork_attr_extents(dip) != 0; 1203 1204 afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK); 1205 afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); 1206 1207 switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) { 1208 case XFS_DINODE_FMT_LOCAL: 1209 /* Fork has to be large enough to extract the xattr size. */ 1210 if (afork_size < sizeof(struct xfs_attr_sf_hdr)) 1211 return true; 1212 1213 /* xattr structure cannot be larger than the fork */ 1214 attr_size = be16_to_cpu(afork_ptr->totsize); 1215 if (attr_size > afork_size) 1216 return true; 1217 1218 /* xattr structure must pass verification. */ 1219 return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL; 1220 case XFS_DINODE_FMT_EXTENTS: 1221 if (xrep_dinode_bad_extents_fork(sc, dip, afork_size, 1222 XFS_ATTR_FORK)) 1223 return true; 1224 break; 1225 case XFS_DINODE_FMT_BTREE: 1226 if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size, 1227 XFS_ATTR_FORK)) 1228 return true; 1229 break; 1230 case XFS_DINODE_FMT_META_BTREE: 1231 if (xrep_dinode_bad_metabt_fork(sc, dip, afork_size, 1232 XFS_ATTR_FORK)) 1233 return true; 1234 break; 1235 default: 1236 return true; 1237 } 1238 1239 return false; 1240 } 1241 1242 /* 1243 * Reset the attr fork to empty. Since the attr fork could have contained 1244 * ACLs, make the file readable only by root. 1245 */ 1246 STATIC void 1247 xrep_dinode_zap_afork( 1248 struct xrep_inode *ri, 1249 struct xfs_dinode *dip, 1250 uint16_t mode) 1251 { 1252 struct xfs_scrub *sc = ri->sc; 1253 1254 trace_xrep_dinode_zap_afork(sc, dip); 1255 1256 ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED; 1257 1258 dip->di_aformat = XFS_DINODE_FMT_EXTENTS; 1259 xrep_dinode_set_attr_nextents(dip, 0); 1260 ri->attr_blocks = 0; 1261 1262 /* 1263 * If the data fork is in btree format, removing the attr fork entirely 1264 * might cause verifier failures if the next level down in the bmbt 1265 * could now fit in the data fork area. 1266 */ 1267 if (dip->di_format != XFS_DINODE_FMT_BTREE) 1268 dip->di_forkoff = 0; 1269 dip->di_mode = cpu_to_be16(mode & ~0777); 1270 dip->di_uid = 0; 1271 dip->di_gid = 0; 1272 } 1273 1274 /* Make sure the fork offset is a sensible value. */ 1275 STATIC void 1276 xrep_dinode_ensure_forkoff( 1277 struct xrep_inode *ri, 1278 struct xfs_dinode *dip, 1279 uint16_t mode) 1280 { 1281 struct xfs_bmdr_block *bmdr; 1282 struct xfs_rtrmap_root *rmdr; 1283 struct xfs_rtrefcount_root *rcdr; 1284 struct xfs_scrub *sc = ri->sc; 1285 xfs_extnum_t attr_extents, data_extents; 1286 size_t bmdr_minsz = xfs_bmdr_space_calc(1); 1287 unsigned int lit_sz = XFS_LITINO(sc->mp); 1288 unsigned int afork_min, dfork_min; 1289 1290 trace_xrep_dinode_ensure_forkoff(sc, dip); 1291 1292 /* 1293 * Before calling this function, xrep_dinode_core ensured that both 1294 * forks actually fit inside their respective literal areas. If this 1295 * was not the case, the fork was reset to FMT_EXTENTS with zero 1296 * records. If the rmapbt scan found attr or data fork blocks, this 1297 * will be noted in the dinode_stats, and we must leave enough room 1298 * for the bmap repair code to reconstruct the mapping structure. 1299 * 1300 * First, compute the minimum space required for the attr fork. 1301 */ 1302 switch (dip->di_aformat) { 1303 case XFS_DINODE_FMT_LOCAL: 1304 /* 1305 * If we still have a shortform xattr structure at all, that 1306 * means the attr fork area was exactly large enough to fit 1307 * the sf structure. 1308 */ 1309 afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK); 1310 break; 1311 case XFS_DINODE_FMT_EXTENTS: 1312 attr_extents = xfs_dfork_attr_extents(dip); 1313 if (attr_extents) { 1314 /* 1315 * We must maintain sufficient space to hold the entire 1316 * extent map array in the data fork. Note that we 1317 * previously zapped the fork if it had no chance of 1318 * fitting in the inode. 1319 */ 1320 afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents; 1321 } else if (ri->attr_extents > 0) { 1322 /* 1323 * The attr fork thinks it has zero extents, but we 1324 * found some xattr extents. We need to leave enough 1325 * empty space here so that the incore attr fork will 1326 * get created (and hence trigger the attr fork bmap 1327 * repairer). 1328 */ 1329 afork_min = bmdr_minsz; 1330 } else { 1331 /* No extents on disk or found in rmapbt. */ 1332 afork_min = 0; 1333 } 1334 break; 1335 case XFS_DINODE_FMT_BTREE: 1336 /* Must have space for btree header and key/pointers. */ 1337 bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); 1338 afork_min = xfs_bmap_broot_space(sc->mp, bmdr); 1339 break; 1340 default: 1341 /* We should never see any other formats. */ 1342 afork_min = 0; 1343 break; 1344 } 1345 1346 /* Compute the minimum space required for the data fork. */ 1347 switch (dip->di_format) { 1348 case XFS_DINODE_FMT_DEV: 1349 dfork_min = sizeof(__be32); 1350 break; 1351 case XFS_DINODE_FMT_UUID: 1352 dfork_min = sizeof(uuid_t); 1353 break; 1354 case XFS_DINODE_FMT_LOCAL: 1355 /* 1356 * If we still have a shortform data fork at all, that means 1357 * the data fork area was large enough to fit whatever was in 1358 * there. 1359 */ 1360 dfork_min = be64_to_cpu(dip->di_size); 1361 break; 1362 case XFS_DINODE_FMT_EXTENTS: 1363 data_extents = xfs_dfork_data_extents(dip); 1364 if (data_extents) { 1365 /* 1366 * We must maintain sufficient space to hold the entire 1367 * extent map array in the data fork. Note that we 1368 * previously zapped the fork if it had no chance of 1369 * fitting in the inode. 1370 */ 1371 dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents; 1372 } else if (ri->data_extents > 0 || ri->rt_extents > 0) { 1373 /* 1374 * The data fork thinks it has zero extents, but we 1375 * found some data extents. We need to leave enough 1376 * empty space here so that the data fork bmap repair 1377 * will recover the mappings. 1378 */ 1379 dfork_min = bmdr_minsz; 1380 } else { 1381 /* No extents on disk or found in rmapbt. */ 1382 dfork_min = 0; 1383 } 1384 break; 1385 case XFS_DINODE_FMT_BTREE: 1386 /* Must have space for btree header and key/pointers. */ 1387 bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1388 dfork_min = xfs_bmap_broot_space(sc->mp, bmdr); 1389 break; 1390 case XFS_DINODE_FMT_META_BTREE: 1391 switch (be16_to_cpu(dip->di_metatype)) { 1392 case XFS_METAFILE_RTRMAP: 1393 rmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1394 dfork_min = xfs_rtrmap_broot_space(sc->mp, rmdr); 1395 break; 1396 case XFS_METAFILE_RTREFCOUNT: 1397 rcdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1398 dfork_min = xfs_rtrefcount_broot_space(sc->mp, rcdr); 1399 break; 1400 default: 1401 dfork_min = 0; 1402 break; 1403 } 1404 break; 1405 default: 1406 dfork_min = 0; 1407 break; 1408 } 1409 1410 /* 1411 * Round all values up to the nearest 8 bytes, because that is the 1412 * precision of di_forkoff. 1413 */ 1414 afork_min = roundup(afork_min, 8); 1415 dfork_min = roundup(dfork_min, 8); 1416 bmdr_minsz = roundup(bmdr_minsz, 8); 1417 1418 ASSERT(dfork_min <= lit_sz); 1419 ASSERT(afork_min <= lit_sz); 1420 1421 /* 1422 * If the data fork was zapped and we don't have enough space for the 1423 * recovery fork, move the attr fork up. 1424 */ 1425 if (dip->di_format == XFS_DINODE_FMT_EXTENTS && 1426 xfs_dfork_data_extents(dip) == 0 && 1427 (ri->data_extents > 0 || ri->rt_extents > 0) && 1428 bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) { 1429 if (bmdr_minsz + afork_min > lit_sz) { 1430 /* 1431 * The attr for and the stub fork we need to recover 1432 * the data fork won't both fit. Zap the attr fork. 1433 */ 1434 xrep_dinode_zap_afork(ri, dip, mode); 1435 afork_min = bmdr_minsz; 1436 } else { 1437 void *before, *after; 1438 1439 /* Otherwise, just slide the attr fork up. */ 1440 before = XFS_DFORK_APTR(dip); 1441 dip->di_forkoff = bmdr_minsz >> 3; 1442 after = XFS_DFORK_APTR(dip); 1443 memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp)); 1444 } 1445 } 1446 1447 /* 1448 * If the attr fork was zapped and we don't have enough space for the 1449 * recovery fork, move the attr fork down. 1450 */ 1451 if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS && 1452 xfs_dfork_attr_extents(dip) == 0 && 1453 ri->attr_extents > 0 && 1454 bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) { 1455 if (dip->di_format == XFS_DINODE_FMT_BTREE) { 1456 /* 1457 * If the data fork is in btree format then we can't 1458 * adjust forkoff because that runs the risk of 1459 * violating the extents/btree format transition rules. 1460 */ 1461 } else if (bmdr_minsz + dfork_min > lit_sz) { 1462 /* 1463 * If we can't move the attr fork, too bad, we lose the 1464 * attr fork and leak its blocks. 1465 */ 1466 xrep_dinode_zap_afork(ri, dip, mode); 1467 } else { 1468 /* 1469 * Otherwise, just slide the attr fork down. The attr 1470 * fork is empty, so we don't have any old contents to 1471 * move here. 1472 */ 1473 dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3; 1474 } 1475 } 1476 } 1477 1478 /* 1479 * Zap the data/attr forks if we spot anything that isn't going to pass the 1480 * ifork verifiers or the ifork formatters, because we need to get the inode 1481 * into good enough shape that the higher level repair functions can run. 1482 */ 1483 STATIC void 1484 xrep_dinode_zap_forks( 1485 struct xrep_inode *ri, 1486 struct xfs_dinode *dip) 1487 { 1488 struct xfs_scrub *sc = ri->sc; 1489 xfs_extnum_t data_extents; 1490 xfs_extnum_t attr_extents; 1491 xfs_filblks_t nblocks; 1492 uint16_t mode; 1493 bool zap_datafork = false; 1494 bool zap_attrfork = ri->zap_acls; 1495 1496 trace_xrep_dinode_zap_forks(sc, dip); 1497 1498 mode = be16_to_cpu(dip->di_mode); 1499 1500 data_extents = xfs_dfork_data_extents(dip); 1501 attr_extents = xfs_dfork_attr_extents(dip); 1502 nblocks = be64_to_cpu(dip->di_nblocks); 1503 1504 /* Inode counters don't make sense? */ 1505 if (data_extents > nblocks) 1506 zap_datafork = true; 1507 if (attr_extents > nblocks) 1508 zap_attrfork = true; 1509 if (data_extents + attr_extents > nblocks) 1510 zap_datafork = zap_attrfork = true; 1511 1512 if (!zap_datafork) 1513 zap_datafork = xrep_dinode_check_dfork(sc, dip, mode); 1514 if (!zap_attrfork) 1515 zap_attrfork = xrep_dinode_check_afork(sc, dip); 1516 1517 /* Zap whatever's bad. */ 1518 if (zap_attrfork) 1519 xrep_dinode_zap_afork(ri, dip, mode); 1520 if (zap_datafork) 1521 xrep_dinode_zap_dfork(ri, dip, mode); 1522 xrep_dinode_ensure_forkoff(ri, dip, mode); 1523 1524 /* 1525 * Zero di_nblocks if we don't have any extents at all to satisfy the 1526 * buffer verifier. 1527 */ 1528 data_extents = xfs_dfork_data_extents(dip); 1529 attr_extents = xfs_dfork_attr_extents(dip); 1530 if (data_extents + attr_extents == 0) 1531 dip->di_nblocks = 0; 1532 } 1533 1534 /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */ 1535 STATIC int 1536 xrep_dinode_core( 1537 struct xrep_inode *ri) 1538 { 1539 struct xfs_scrub *sc = ri->sc; 1540 struct xfs_buf *bp; 1541 struct xfs_dinode *dip; 1542 xfs_ino_t ino = sc->sm->sm_ino; 1543 int error; 1544 int iget_error; 1545 1546 /* Figure out what this inode had mapped in both forks. */ 1547 error = xrep_dinode_count_rmaps(ri); 1548 if (error) 1549 return error; 1550 1551 /* Read the inode cluster buffer. */ 1552 error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp, 1553 ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp, 1554 NULL); 1555 if (error) 1556 return error; 1557 1558 /* Make sure we can pass the inode buffer verifier. */ 1559 xrep_dinode_buf(sc, bp); 1560 bp->b_ops = &xfs_inode_buf_ops; 1561 1562 /* Fix everything the verifier will complain about. */ 1563 dip = xfs_buf_offset(bp, ri->imap.im_boffset); 1564 xrep_dinode_header(sc, dip); 1565 iget_error = xrep_dinode_mode(ri, dip); 1566 if (iget_error) 1567 goto write; 1568 xrep_dinode_nlinks(dip); 1569 xrep_dinode_flags(sc, dip, ri->rt_extents > 0); 1570 xrep_dinode_size(ri, dip); 1571 xrep_dinode_extsize_hints(sc, dip); 1572 xrep_dinode_zap_forks(ri, dip); 1573 1574 write: 1575 /* Write out the inode. */ 1576 trace_xrep_dinode_fixed(sc, dip); 1577 xfs_dinode_calc_crc(sc->mp, dip); 1578 xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF); 1579 xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset, 1580 ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1); 1581 1582 /* 1583 * In theory, we've fixed the ondisk inode record enough that we should 1584 * be able to load the inode into the cache. Try to iget that inode 1585 * now while we hold the AGI and the inode cluster buffer and take the 1586 * IOLOCK so that we can continue with repairs without anyone else 1587 * accessing the inode. If iget fails, we still need to commit the 1588 * changes. 1589 */ 1590 if (!iget_error) 1591 iget_error = xchk_iget(sc, ino, &sc->ip); 1592 if (!iget_error) 1593 xchk_ilock(sc, XFS_IOLOCK_EXCL); 1594 1595 /* 1596 * Commit the inode cluster buffer updates and drop the AGI buffer that 1597 * we've been holding since scrub setup. From here on out, repairs 1598 * deal only with the cached inode. 1599 */ 1600 error = xrep_trans_commit(sc); 1601 if (error) 1602 return error; 1603 1604 if (iget_error) 1605 return iget_error; 1606 1607 error = xchk_trans_alloc(sc, 0); 1608 if (error) 1609 return error; 1610 1611 error = xrep_ino_dqattach(sc); 1612 if (error) 1613 return error; 1614 1615 xchk_ilock(sc, XFS_ILOCK_EXCL); 1616 if (ri->ino_sick_mask) 1617 xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask); 1618 return 0; 1619 } 1620 1621 /* Fix everything xfs_dinode_verify cares about. */ 1622 STATIC int 1623 xrep_dinode_problems( 1624 struct xrep_inode *ri) 1625 { 1626 struct xfs_scrub *sc = ri->sc; 1627 int error; 1628 1629 error = xrep_dinode_core(ri); 1630 if (error) 1631 return error; 1632 1633 /* We had to fix a totally busted inode, schedule quotacheck. */ 1634 if (XFS_IS_UQUOTA_ON(sc->mp)) 1635 xrep_force_quotacheck(sc, XFS_DQTYPE_USER); 1636 if (XFS_IS_GQUOTA_ON(sc->mp)) 1637 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP); 1638 if (XFS_IS_PQUOTA_ON(sc->mp)) 1639 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ); 1640 1641 return 0; 1642 } 1643 1644 /* 1645 * Fix problems that the verifiers don't care about. In general these are 1646 * errors that don't cause problems elsewhere in the kernel that we can easily 1647 * detect, so we don't check them all that rigorously. 1648 */ 1649 1650 /* Make sure block and extent counts are ok. */ 1651 STATIC int 1652 xrep_inode_blockcounts( 1653 struct xfs_scrub *sc) 1654 { 1655 struct xfs_ifork *ifp; 1656 xfs_filblks_t count; 1657 xfs_filblks_t acount; 1658 xfs_extnum_t nextents; 1659 int error; 1660 1661 trace_xrep_inode_blockcounts(sc); 1662 1663 /* Set data fork counters from the data fork mappings. */ 1664 error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count); 1665 if (error) 1666 return error; 1667 if (xfs_is_reflink_inode(sc->ip)) { 1668 /* 1669 * data fork blockcount can exceed physical storage if a user 1670 * reflinks the same block over and over again. 1671 */ 1672 ; 1673 } else if (XFS_IS_REALTIME_INODE(sc->ip)) { 1674 if (count >= sc->mp->m_sb.sb_rblocks) 1675 return -EFSCORRUPTED; 1676 } else { 1677 if (count >= sc->mp->m_sb.sb_dblocks) 1678 return -EFSCORRUPTED; 1679 } 1680 error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents); 1681 if (error) 1682 return error; 1683 sc->ip->i_df.if_nextents = nextents; 1684 1685 /* Set attr fork counters from the attr fork mappings. */ 1686 ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK); 1687 if (ifp) { 1688 error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents, 1689 &acount); 1690 if (error) 1691 return error; 1692 if (count >= sc->mp->m_sb.sb_dblocks) 1693 return -EFSCORRUPTED; 1694 error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK, 1695 nextents); 1696 if (error) 1697 return error; 1698 ifp->if_nextents = nextents; 1699 } else { 1700 acount = 0; 1701 } 1702 1703 sc->ip->i_nblocks = count + acount; 1704 return 0; 1705 } 1706 1707 /* Check for invalid uid/gid/prid. */ 1708 STATIC void 1709 xrep_inode_ids( 1710 struct xfs_scrub *sc) 1711 { 1712 bool dirty = false; 1713 1714 trace_xrep_inode_ids(sc); 1715 1716 if (!uid_valid(VFS_I(sc->ip)->i_uid)) { 1717 i_uid_write(VFS_I(sc->ip), 0); 1718 dirty = true; 1719 if (XFS_IS_UQUOTA_ON(sc->mp)) 1720 xrep_force_quotacheck(sc, XFS_DQTYPE_USER); 1721 } 1722 1723 if (!gid_valid(VFS_I(sc->ip)->i_gid)) { 1724 i_gid_write(VFS_I(sc->ip), 0); 1725 dirty = true; 1726 if (XFS_IS_GQUOTA_ON(sc->mp)) 1727 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP); 1728 } 1729 1730 if (sc->ip->i_projid == -1U) { 1731 sc->ip->i_projid = 0; 1732 dirty = true; 1733 if (XFS_IS_PQUOTA_ON(sc->mp)) 1734 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ); 1735 } 1736 1737 /* strip setuid/setgid if we touched any of the ids */ 1738 if (dirty) 1739 VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID); 1740 } 1741 1742 static inline void 1743 xrep_clamp_timestamp( 1744 struct xfs_inode *ip, 1745 struct timespec64 *ts) 1746 { 1747 ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC); 1748 *ts = timestamp_truncate(*ts, VFS_I(ip)); 1749 } 1750 1751 /* Nanosecond counters can't have more than 1 billion. */ 1752 STATIC void 1753 xrep_inode_timestamps( 1754 struct xfs_inode *ip) 1755 { 1756 struct timespec64 tstamp; 1757 struct inode *inode = VFS_I(ip); 1758 1759 tstamp = inode_get_atime(inode); 1760 xrep_clamp_timestamp(ip, &tstamp); 1761 inode_set_atime_to_ts(inode, tstamp); 1762 1763 tstamp = inode_get_mtime(inode); 1764 xrep_clamp_timestamp(ip, &tstamp); 1765 inode_set_mtime_to_ts(inode, tstamp); 1766 1767 tstamp = inode_get_ctime(inode); 1768 xrep_clamp_timestamp(ip, &tstamp); 1769 inode_set_ctime_to_ts(inode, tstamp); 1770 1771 xrep_clamp_timestamp(ip, &ip->i_crtime); 1772 } 1773 1774 /* Fix inode flags that don't make sense together. */ 1775 STATIC void 1776 xrep_inode_flags( 1777 struct xfs_scrub *sc) 1778 { 1779 uint16_t mode; 1780 1781 trace_xrep_inode_flags(sc); 1782 1783 mode = VFS_I(sc->ip)->i_mode; 1784 1785 /* Clear junk flags */ 1786 if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY) 1787 sc->ip->i_diflags &= ~XFS_DIFLAG_ANY; 1788 1789 /* NEWRTBM only applies to realtime bitmaps */ 1790 if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino) 1791 sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM; 1792 else 1793 sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM; 1794 1795 /* These only make sense for directories. */ 1796 if (!S_ISDIR(mode)) 1797 sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT | 1798 XFS_DIFLAG_EXTSZINHERIT | 1799 XFS_DIFLAG_PROJINHERIT | 1800 XFS_DIFLAG_NOSYMLINKS); 1801 1802 /* These only make sense for files. */ 1803 if (!S_ISREG(mode)) 1804 sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME | 1805 XFS_DIFLAG_EXTSIZE); 1806 1807 /* These only make sense for non-rt files. */ 1808 if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) 1809 sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM; 1810 1811 /* Immutable and append only? Drop the append. */ 1812 if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) && 1813 (sc->ip->i_diflags & XFS_DIFLAG_APPEND)) 1814 sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND; 1815 1816 /* Clear junk flags. */ 1817 if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY) 1818 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY; 1819 1820 /* No reflink flag unless we support it and it's a file. */ 1821 if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode)) 1822 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; 1823 1824 /* DAX only applies to files and dirs. */ 1825 if (!(S_ISREG(mode) || S_ISDIR(mode))) 1826 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX; 1827 } 1828 1829 /* 1830 * Fix size problems with block/node format directories. If we fail to find 1831 * the extent list, just bail out and let the bmapbtd repair functions clean 1832 * up that mess. 1833 */ 1834 STATIC void 1835 xrep_inode_blockdir_size( 1836 struct xfs_scrub *sc) 1837 { 1838 struct xfs_iext_cursor icur; 1839 struct xfs_bmbt_irec got; 1840 struct xfs_ifork *ifp; 1841 xfs_fileoff_t off; 1842 int error; 1843 1844 trace_xrep_inode_blockdir_size(sc); 1845 1846 error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK); 1847 if (error) 1848 return; 1849 1850 /* Find the last block before 32G; this is the dir size. */ 1851 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); 1852 off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE); 1853 if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) { 1854 /* zero-extents directory? */ 1855 return; 1856 } 1857 1858 off = got.br_startoff + got.br_blockcount; 1859 sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE, 1860 XFS_FSB_TO_B(sc->mp, off)); 1861 } 1862 1863 /* Fix size problems with short format directories. */ 1864 STATIC void 1865 xrep_inode_sfdir_size( 1866 struct xfs_scrub *sc) 1867 { 1868 struct xfs_ifork *ifp; 1869 1870 trace_xrep_inode_sfdir_size(sc); 1871 1872 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); 1873 sc->ip->i_disk_size = ifp->if_bytes; 1874 } 1875 1876 /* 1877 * Fix any irregularities in a directory inode's size now that we can iterate 1878 * extent maps and access other regular inode data. 1879 */ 1880 STATIC void 1881 xrep_inode_dir_size( 1882 struct xfs_scrub *sc) 1883 { 1884 trace_xrep_inode_dir_size(sc); 1885 1886 switch (sc->ip->i_df.if_format) { 1887 case XFS_DINODE_FMT_EXTENTS: 1888 case XFS_DINODE_FMT_BTREE: 1889 xrep_inode_blockdir_size(sc); 1890 break; 1891 case XFS_DINODE_FMT_LOCAL: 1892 xrep_inode_sfdir_size(sc); 1893 break; 1894 } 1895 } 1896 1897 /* Fix extent size hint problems. */ 1898 STATIC void 1899 xrep_inode_extsize( 1900 struct xfs_scrub *sc) 1901 { 1902 /* Fix misaligned extent size hints on a directory. */ 1903 if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) && 1904 (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && 1905 xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) { 1906 sc->ip->i_extsize = 0; 1907 sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT; 1908 } 1909 } 1910 1911 /* Ensure this file has an attr fork if it needs to hold a parent pointer. */ 1912 STATIC int 1913 xrep_inode_pptr( 1914 struct xfs_scrub *sc) 1915 { 1916 struct xfs_mount *mp = sc->mp; 1917 struct xfs_inode *ip = sc->ip; 1918 struct inode *inode = VFS_I(ip); 1919 1920 if (!xfs_has_parent(mp)) 1921 return 0; 1922 1923 /* 1924 * Unlinked inodes that cannot be added to the directory tree will not 1925 * have a parent pointer. 1926 */ 1927 if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) 1928 return 0; 1929 1930 /* Children of the superblock do not have parent pointers. */ 1931 if (xchk_inode_is_sb_rooted(ip)) 1932 return 0; 1933 1934 /* Inode already has an attr fork; no further work possible here. */ 1935 if (xfs_inode_has_attr_fork(ip)) 1936 return 0; 1937 1938 return xfs_bmap_add_attrfork(sc->tp, ip, 1939 sizeof(struct xfs_attr_sf_hdr), true); 1940 } 1941 1942 /* Fix COW extent size hint problems. */ 1943 STATIC void 1944 xrep_inode_cowextsize( 1945 struct xfs_scrub *sc) 1946 { 1947 /* Fix misaligned CoW extent size hints on a directory. */ 1948 if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) && 1949 (sc->ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) && 1950 sc->ip->i_extsize % sc->mp->m_sb.sb_rextsize > 0) { 1951 sc->ip->i_cowextsize = 0; 1952 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; 1953 } 1954 } 1955 1956 /* Fix any irregularities in an inode that the verifiers don't catch. */ 1957 STATIC int 1958 xrep_inode_problems( 1959 struct xfs_scrub *sc) 1960 { 1961 int error; 1962 1963 error = xrep_inode_blockcounts(sc); 1964 if (error) 1965 return error; 1966 error = xrep_inode_pptr(sc); 1967 if (error) 1968 return error; 1969 xrep_inode_timestamps(sc->ip); 1970 xrep_inode_flags(sc); 1971 xrep_inode_ids(sc); 1972 /* 1973 * We can now do a better job fixing the size of a directory now that 1974 * we can scan the data fork extents than we could in xrep_dinode_size. 1975 */ 1976 if (S_ISDIR(VFS_I(sc->ip)->i_mode)) 1977 xrep_inode_dir_size(sc); 1978 xrep_inode_extsize(sc); 1979 xrep_inode_cowextsize(sc); 1980 1981 trace_xrep_inode_fixed(sc); 1982 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); 1983 return xrep_roll_trans(sc); 1984 } 1985 1986 /* 1987 * Make sure this inode's unlinked list pointers are consistent with its 1988 * link count. 1989 */ 1990 STATIC int 1991 xrep_inode_unlinked( 1992 struct xfs_scrub *sc) 1993 { 1994 unsigned int nlink = VFS_I(sc->ip)->i_nlink; 1995 int error; 1996 1997 /* 1998 * If this inode is linked from the directory tree and on the unlinked 1999 * list, remove it from the unlinked list. 2000 */ 2001 if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) { 2002 struct xfs_perag *pag; 2003 int error; 2004 2005 pag = xfs_perag_get(sc->mp, 2006 XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino)); 2007 error = xfs_iunlink_remove(sc->tp, pag, sc->ip); 2008 xfs_perag_put(pag); 2009 if (error) 2010 return error; 2011 } 2012 2013 /* 2014 * If this inode is not linked from the directory tree yet not on the 2015 * unlinked list, put it on the unlinked list. 2016 */ 2017 if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) { 2018 error = xfs_iunlink(sc->tp, sc->ip); 2019 if (error) 2020 return error; 2021 } 2022 2023 return 0; 2024 } 2025 2026 /* Repair an inode's fields. */ 2027 int 2028 xrep_inode( 2029 struct xfs_scrub *sc) 2030 { 2031 int error = 0; 2032 2033 /* 2034 * No inode? That means we failed the _iget verifiers. Repair all 2035 * the things that the inode verifiers care about, then retry _iget. 2036 */ 2037 if (!sc->ip) { 2038 struct xrep_inode *ri = sc->buf; 2039 2040 ASSERT(ri != NULL); 2041 2042 error = xrep_dinode_problems(ri); 2043 if (error == -EBUSY) { 2044 /* 2045 * Directory scan to recover inode mode encountered a 2046 * busy inode, so we did not continue repairing things. 2047 */ 2048 return 0; 2049 } 2050 if (error) 2051 return error; 2052 2053 /* By this point we had better have a working incore inode. */ 2054 if (!sc->ip) 2055 return -EFSCORRUPTED; 2056 } 2057 2058 xfs_trans_ijoin(sc->tp, sc->ip, 0); 2059 2060 /* If we found corruption of any kind, try to fix it. */ 2061 if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) || 2062 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) { 2063 error = xrep_inode_problems(sc); 2064 if (error) 2065 return error; 2066 } 2067 2068 /* See if we can clear the reflink flag. */ 2069 if (xfs_is_reflink_inode(sc->ip)) { 2070 error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp); 2071 if (error) 2072 return error; 2073 } 2074 2075 /* Reconnect incore unlinked list */ 2076 error = xrep_inode_unlinked(sc); 2077 if (error) 2078 return error; 2079 2080 return xrep_defer_finish(sc); 2081 } 2082