1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_bit.h" 15 #include "xfs_log_format.h" 16 #include "xfs_trans.h" 17 #include "xfs_sb.h" 18 #include "xfs_inode.h" 19 #include "xfs_icache.h" 20 #include "xfs_inode_buf.h" 21 #include "xfs_inode_fork.h" 22 #include "xfs_ialloc.h" 23 #include "xfs_da_format.h" 24 #include "xfs_reflink.h" 25 #include "xfs_alloc.h" 26 #include "xfs_rmap.h" 27 #include "xfs_rmap_btree.h" 28 #include "xfs_bmap.h" 29 #include "xfs_bmap_btree.h" 30 #include "xfs_bmap_util.h" 31 #include "xfs_dir2.h" 32 #include "xfs_dir2_priv.h" 33 #include "xfs_quota_defs.h" 34 #include "xfs_quota.h" 35 #include "xfs_ag.h" 36 #include "xfs_rtbitmap.h" 37 #include "xfs_attr_leaf.h" 38 #include "xfs_log_priv.h" 39 #include "xfs_health.h" 40 #include "xfs_symlink_remote.h" 41 #include "xfs_rtgroup.h" 42 #include "xfs_rtrmap_btree.h" 43 #include "xfs_rtrefcount_btree.h" 44 #include "scrub/xfs_scrub.h" 45 #include "scrub/scrub.h" 46 #include "scrub/common.h" 47 #include "scrub/btree.h" 48 #include "scrub/trace.h" 49 #include "scrub/repair.h" 50 #include "scrub/iscan.h" 51 #include "scrub/readdir.h" 52 #include "scrub/tempfile.h" 53 54 /* 55 * Inode Record Repair 56 * =================== 57 * 58 * Roughly speaking, inode problems can be classified based on whether or not 59 * they trip the dinode verifiers. If those trip, then we won't be able to 60 * xfs_iget ourselves the inode. 61 * 62 * Therefore, the xrep_dinode_* functions fix anything that will cause the 63 * inode buffer verifier or the dinode verifier. The xrep_inode_* functions 64 * fix things on live incore inodes. The inode repair functions make decisions 65 * with security and usability implications when reviving a file: 66 * 67 * - Files with zero di_mode or a garbage di_mode are converted to regular file 68 * that only root can read. This file may not actually contain user data, 69 * if the file was not previously a regular file. Setuid and setgid bits 70 * are cleared. 71 * 72 * - Zero-size directories can be truncated to look empty. It is necessary to 73 * run the bmapbtd and directory repair functions to fully rebuild the 74 * directory. 75 * 76 * - Zero-size symbolic link targets can be truncated to '?'. It is necessary 77 * to run the bmapbtd and symlink repair functions to salvage the symlink. 78 * 79 * - Invalid extent size hints will be removed. 80 * 81 * - Quotacheck will be scheduled if we repaired an inode that was so badly 82 * damaged that the ondisk inode had to be rebuilt. 83 * 84 * - Invalid user, group, or project IDs (aka -1U) will be reset to zero. 85 * Setuid and setgid bits are cleared. 86 * 87 * - Data and attr forks are reset to extents format with zero extents if the 88 * fork data is inconsistent. It is necessary to run the bmapbtd or bmapbta 89 * repair functions to recover the space mapping. 90 * 91 * - ACLs will not be recovered if the attr fork is zapped or the extended 92 * attribute structure itself requires salvaging. 93 * 94 * - If the attr fork is zapped, the user and group ids are reset to root and 95 * the setuid and setgid bits are removed. 96 */ 97 98 /* 99 * All the information we need to repair the ondisk inode if we can't iget the 100 * incore inode. We don't allocate this buffer unless we're going to perform 101 * a repair to the ondisk inode cluster buffer. 102 */ 103 struct xrep_inode { 104 /* Inode mapping that we saved from the initial lookup attempt. */ 105 struct xfs_imap imap; 106 107 struct xfs_scrub *sc; 108 109 /* Blocks in use on the data device by data extents or bmbt blocks. */ 110 xfs_rfsblock_t data_blocks; 111 112 /* Blocks in use on the rt device. */ 113 xfs_rfsblock_t rt_blocks; 114 115 /* Blocks in use by the attr fork. */ 116 xfs_rfsblock_t attr_blocks; 117 118 /* Number of data device extents for the data fork. */ 119 xfs_extnum_t data_extents; 120 121 /* 122 * Number of realtime device extents for the data fork. If 123 * data_extents and rt_extents indicate that the data fork has extents 124 * on both devices, we'll just back away slowly. 125 */ 126 xfs_extnum_t rt_extents; 127 128 /* Number of (data device) extents for the attr fork. */ 129 xfs_aextnum_t attr_extents; 130 131 /* Sick state to set after zapping parts of the inode. */ 132 unsigned int ino_sick_mask; 133 134 /* Must we remove all access from this file? */ 135 bool zap_acls; 136 137 /* Inode scanner to see if we can find the ftype from dirents */ 138 struct xchk_iscan ftype_iscan; 139 uint8_t alleged_ftype; 140 }; 141 142 /* 143 * Setup function for inode repair. @imap contains the ondisk inode mapping 144 * information so that we can correct the ondisk inode cluster buffer if 145 * necessary to make iget work. 146 */ 147 int 148 xrep_setup_inode( 149 struct xfs_scrub *sc, 150 const struct xfs_imap *imap) 151 { 152 struct xrep_inode *ri; 153 154 sc->buf = kzalloc(sizeof(struct xrep_inode), XCHK_GFP_FLAGS); 155 if (!sc->buf) 156 return -ENOMEM; 157 158 ri = sc->buf; 159 memcpy(&ri->imap, imap, sizeof(struct xfs_imap)); 160 ri->sc = sc; 161 return 0; 162 } 163 164 /* 165 * Make sure this ondisk inode can pass the inode buffer verifier. This is 166 * not the same as the dinode verifier. 167 */ 168 STATIC void 169 xrep_dinode_buf_core( 170 struct xfs_scrub *sc, 171 struct xfs_buf *bp, 172 unsigned int ioffset) 173 { 174 struct xfs_dinode *dip = xfs_buf_offset(bp, ioffset); 175 struct xfs_trans *tp = sc->tp; 176 struct xfs_mount *mp = sc->mp; 177 xfs_agino_t agino; 178 bool crc_ok = false; 179 bool magic_ok = false; 180 bool unlinked_ok = false; 181 182 agino = be32_to_cpu(dip->di_next_unlinked); 183 184 if (xfs_verify_agino_or_null(bp->b_pag, agino)) 185 unlinked_ok = true; 186 187 if (dip->di_magic == cpu_to_be16(XFS_DINODE_MAGIC) && 188 xfs_dinode_good_version(mp, dip->di_version)) 189 magic_ok = true; 190 191 if (xfs_verify_cksum((char *)dip, mp->m_sb.sb_inodesize, 192 XFS_DINODE_CRC_OFF)) 193 crc_ok = true; 194 195 if (magic_ok && unlinked_ok && crc_ok) 196 return; 197 198 if (!magic_ok) { 199 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 200 dip->di_version = 3; 201 } 202 if (!unlinked_ok) 203 dip->di_next_unlinked = cpu_to_be32(NULLAGINO); 204 xfs_dinode_calc_crc(mp, dip); 205 xfs_trans_buf_set_type(tp, bp, XFS_BLFT_DINO_BUF); 206 xfs_trans_log_buf(tp, bp, ioffset, 207 ioffset + sizeof(struct xfs_dinode) - 1); 208 } 209 210 /* Make sure this inode cluster buffer can pass the inode buffer verifier. */ 211 STATIC void 212 xrep_dinode_buf( 213 struct xfs_scrub *sc, 214 struct xfs_buf *bp) 215 { 216 struct xfs_mount *mp = sc->mp; 217 int i; 218 int ni; 219 220 ni = XFS_BB_TO_FSB(mp, bp->b_length) * mp->m_sb.sb_inopblock; 221 for (i = 0; i < ni; i++) 222 xrep_dinode_buf_core(sc, bp, i << mp->m_sb.sb_inodelog); 223 } 224 225 /* Reinitialize things that never change in an inode. */ 226 STATIC void 227 xrep_dinode_header( 228 struct xfs_scrub *sc, 229 struct xfs_dinode *dip) 230 { 231 trace_xrep_dinode_header(sc, dip); 232 233 dip->di_magic = cpu_to_be16(XFS_DINODE_MAGIC); 234 if (!xfs_dinode_good_version(sc->mp, dip->di_version)) 235 dip->di_version = 3; 236 dip->di_ino = cpu_to_be64(sc->sm->sm_ino); 237 uuid_copy(&dip->di_uuid, &sc->mp->m_sb.sb_meta_uuid); 238 dip->di_gen = cpu_to_be32(sc->sm->sm_gen); 239 } 240 241 /* 242 * If this directory entry points to the scrub target inode, then the directory 243 * we're scanning is the parent of the scrub target inode. 244 */ 245 STATIC int 246 xrep_dinode_findmode_dirent( 247 struct xfs_scrub *sc, 248 struct xfs_inode *dp, 249 xfs_dir2_dataptr_t dapos, 250 const struct xfs_name *name, 251 xfs_ino_t ino, 252 void *priv) 253 { 254 struct xrep_inode *ri = priv; 255 int error = 0; 256 257 if (xchk_should_terminate(ri->sc, &error)) 258 return error; 259 260 if (ino != sc->sm->sm_ino) 261 return 0; 262 263 /* Ignore garbage directory entry names. */ 264 if (name->len == 0 || !xfs_dir2_namecheck(name->name, name->len)) 265 return -EFSCORRUPTED; 266 267 /* Don't pick up dot or dotdot entries; we only want child dirents. */ 268 if (xfs_dir2_samename(name, &xfs_name_dotdot) || 269 xfs_dir2_samename(name, &xfs_name_dot)) 270 return 0; 271 272 /* 273 * Uhoh, more than one parent for this inode and they don't agree on 274 * the file type? 275 */ 276 if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN && 277 ri->alleged_ftype != name->type) { 278 trace_xrep_dinode_findmode_dirent_inval(ri->sc, dp, name->type, 279 ri->alleged_ftype); 280 return -EFSCORRUPTED; 281 } 282 283 /* We found a potential parent; remember the ftype. */ 284 trace_xrep_dinode_findmode_dirent(ri->sc, dp, name->type); 285 ri->alleged_ftype = name->type; 286 return 0; 287 } 288 289 /* Try to lock a directory, or wait a jiffy. */ 290 static inline int 291 xrep_dinode_ilock_nowait( 292 struct xfs_inode *dp, 293 unsigned int lock_mode) 294 { 295 if (xfs_ilock_nowait(dp, lock_mode)) 296 return true; 297 298 schedule_timeout_killable(1); 299 return false; 300 } 301 302 /* 303 * Try to lock a directory to look for ftype hints. Since we already hold the 304 * AGI buffer, we cannot block waiting for the ILOCK because rename can take 305 * the ILOCK and then try to lock AGIs. 306 */ 307 STATIC int 308 xrep_dinode_trylock_directory( 309 struct xrep_inode *ri, 310 struct xfs_inode *dp, 311 unsigned int *lock_modep) 312 { 313 unsigned long deadline = jiffies + msecs_to_jiffies(30000); 314 unsigned int lock_mode; 315 int error = 0; 316 317 do { 318 if (xchk_should_terminate(ri->sc, &error)) 319 return error; 320 321 if (xfs_need_iread_extents(&dp->i_df)) 322 lock_mode = XFS_ILOCK_EXCL; 323 else 324 lock_mode = XFS_ILOCK_SHARED; 325 326 if (xrep_dinode_ilock_nowait(dp, lock_mode)) { 327 *lock_modep = lock_mode; 328 return 0; 329 } 330 } while (!time_is_before_jiffies(deadline)); 331 return -EBUSY; 332 } 333 334 /* 335 * If this is a directory, walk the dirents looking for any that point to the 336 * scrub target inode. 337 */ 338 STATIC int 339 xrep_dinode_findmode_walk_directory( 340 struct xrep_inode *ri, 341 struct xfs_inode *dp) 342 { 343 struct xfs_scrub *sc = ri->sc; 344 unsigned int lock_mode; 345 int error = 0; 346 347 /* Ignore temporary repair directories. */ 348 if (xrep_is_tempfile(dp)) 349 return 0; 350 351 /* 352 * Scan the directory to see if there it contains an entry pointing to 353 * the directory that we are repairing. 354 */ 355 error = xrep_dinode_trylock_directory(ri, dp, &lock_mode); 356 if (error) 357 return error; 358 359 /* 360 * If this directory is known to be sick, we cannot scan it reliably 361 * and must abort. 362 */ 363 if (xfs_inode_has_sickness(dp, XFS_SICK_INO_CORE | 364 XFS_SICK_INO_BMBTD | 365 XFS_SICK_INO_DIR)) { 366 error = -EFSCORRUPTED; 367 goto out_unlock; 368 } 369 370 /* 371 * We cannot complete our parent pointer scan if a directory looks as 372 * though it has been zapped by the inode record repair code. 373 */ 374 if (xchk_dir_looks_zapped(dp)) { 375 error = -EBUSY; 376 goto out_unlock; 377 } 378 379 error = xchk_dir_walk(sc, dp, xrep_dinode_findmode_dirent, ri); 380 if (error) 381 goto out_unlock; 382 383 out_unlock: 384 xfs_iunlock(dp, lock_mode); 385 return error; 386 } 387 388 /* 389 * Try to find the mode of the inode being repaired by looking for directories 390 * that point down to this file. 391 */ 392 STATIC int 393 xrep_dinode_find_mode( 394 struct xrep_inode *ri, 395 uint16_t *mode) 396 { 397 struct xfs_scrub *sc = ri->sc; 398 struct xfs_inode *dp; 399 int error; 400 401 /* No ftype means we have no other metadata to consult. */ 402 if (!xfs_has_ftype(sc->mp)) { 403 *mode = S_IFREG; 404 return 0; 405 } 406 407 /* 408 * Scan all directories for parents that might point down to this 409 * inode. Skip the inode being repaired during the scan since it 410 * cannot be its own parent. Note that we still hold the AGI locked 411 * so there's a real possibility that _iscan_iter can return EBUSY. 412 */ 413 xchk_iscan_start(sc, 5000, 100, &ri->ftype_iscan); 414 xchk_iscan_set_agi_trylock(&ri->ftype_iscan); 415 ri->ftype_iscan.skip_ino = sc->sm->sm_ino; 416 ri->alleged_ftype = XFS_DIR3_FT_UNKNOWN; 417 while ((error = xchk_iscan_iter(&ri->ftype_iscan, &dp)) == 1) { 418 if (S_ISDIR(VFS_I(dp)->i_mode)) 419 error = xrep_dinode_findmode_walk_directory(ri, dp); 420 xchk_iscan_mark_visited(&ri->ftype_iscan, dp); 421 xchk_irele(sc, dp); 422 if (error < 0) 423 break; 424 if (xchk_should_terminate(sc, &error)) 425 break; 426 } 427 xchk_iscan_iter_finish(&ri->ftype_iscan); 428 xchk_iscan_teardown(&ri->ftype_iscan); 429 430 if (error == -EBUSY) { 431 if (ri->alleged_ftype != XFS_DIR3_FT_UNKNOWN) { 432 /* 433 * If we got an EBUSY after finding at least one 434 * dirent, that means the scan found an inode on the 435 * inactivation list and could not open it. Accept the 436 * alleged ftype and install a new mode below. 437 */ 438 error = 0; 439 } else if (!(sc->flags & XCHK_TRY_HARDER)) { 440 /* 441 * Otherwise, retry the operation one time to see if 442 * the reason for the delay is an inode from the same 443 * cluster buffer waiting on the inactivation list. 444 */ 445 error = -EDEADLOCK; 446 } 447 } 448 if (error) 449 return error; 450 451 /* 452 * Convert the discovered ftype into the file mode. If all else fails, 453 * return S_IFREG. 454 */ 455 switch (ri->alleged_ftype) { 456 case XFS_DIR3_FT_DIR: 457 *mode = S_IFDIR; 458 break; 459 case XFS_DIR3_FT_WHT: 460 case XFS_DIR3_FT_CHRDEV: 461 *mode = S_IFCHR; 462 break; 463 case XFS_DIR3_FT_BLKDEV: 464 *mode = S_IFBLK; 465 break; 466 case XFS_DIR3_FT_FIFO: 467 *mode = S_IFIFO; 468 break; 469 case XFS_DIR3_FT_SOCK: 470 *mode = S_IFSOCK; 471 break; 472 case XFS_DIR3_FT_SYMLINK: 473 *mode = S_IFLNK; 474 break; 475 default: 476 *mode = S_IFREG; 477 break; 478 } 479 return 0; 480 } 481 482 /* Turn di_mode into /something/ recognizable. Returns true if we succeed. */ 483 STATIC int 484 xrep_dinode_mode( 485 struct xrep_inode *ri, 486 struct xfs_dinode *dip) 487 { 488 struct xfs_scrub *sc = ri->sc; 489 uint16_t mode = be16_to_cpu(dip->di_mode); 490 int error; 491 492 trace_xrep_dinode_mode(sc, dip); 493 494 if (mode == 0 || xfs_mode_to_ftype(mode) != XFS_DIR3_FT_UNKNOWN) 495 return 0; 496 497 /* Try to fix the mode. If we cannot, then leave everything alone. */ 498 error = xrep_dinode_find_mode(ri, &mode); 499 switch (error) { 500 case -EINTR: 501 case -EBUSY: 502 case -EDEADLOCK: 503 /* temporary failure or fatal signal */ 504 return error; 505 case 0: 506 /* found mode */ 507 break; 508 default: 509 /* some other error, assume S_IFREG */ 510 mode = S_IFREG; 511 break; 512 } 513 514 /* bad mode, so we set it to a file that only root can read */ 515 dip->di_mode = cpu_to_be16(mode); 516 dip->di_uid = 0; 517 dip->di_gid = 0; 518 ri->zap_acls = true; 519 return 0; 520 } 521 522 /* Fix unused link count fields having nonzero values. */ 523 STATIC void 524 xrep_dinode_nlinks( 525 struct xfs_dinode *dip) 526 { 527 if (dip->di_version < 2) { 528 dip->di_nlink = 0; 529 return; 530 } 531 532 if (xfs_dinode_is_metadir(dip)) { 533 if (be16_to_cpu(dip->di_metatype) >= XFS_METAFILE_MAX) 534 dip->di_metatype = cpu_to_be16(XFS_METAFILE_UNKNOWN); 535 } else { 536 dip->di_metatype = 0; 537 } 538 } 539 540 /* Fix any conflicting flags that the verifiers complain about. */ 541 STATIC void 542 xrep_dinode_flags( 543 struct xfs_scrub *sc, 544 struct xfs_dinode *dip, 545 bool isrt) 546 { 547 struct xfs_mount *mp = sc->mp; 548 uint64_t flags2 = be64_to_cpu(dip->di_flags2); 549 uint16_t flags = be16_to_cpu(dip->di_flags); 550 uint16_t mode = be16_to_cpu(dip->di_mode); 551 552 trace_xrep_dinode_flags(sc, dip); 553 554 if (isrt) 555 flags |= XFS_DIFLAG_REALTIME; 556 else 557 flags &= ~XFS_DIFLAG_REALTIME; 558 559 /* 560 * For regular files on a reflink filesystem, set the REFLINK flag to 561 * protect shared extents. A later stage will actually check those 562 * extents and clear the flag if possible. 563 */ 564 if (xfs_has_reflink(mp) && S_ISREG(mode)) 565 flags2 |= XFS_DIFLAG2_REFLINK; 566 else 567 flags2 &= ~(XFS_DIFLAG2_REFLINK | XFS_DIFLAG2_COWEXTSIZE); 568 if (!xfs_has_bigtime(mp)) 569 flags2 &= ~XFS_DIFLAG2_BIGTIME; 570 if (!xfs_has_large_extent_counts(mp)) 571 flags2 &= ~XFS_DIFLAG2_NREXT64; 572 if (flags2 & XFS_DIFLAG2_NREXT64) 573 dip->di_nrext64_pad = 0; 574 else if (dip->di_version >= 3) 575 dip->di_v3_pad = 0; 576 577 if (flags2 & XFS_DIFLAG2_METADATA) { 578 xfs_failaddr_t fa; 579 580 fa = xfs_dinode_verify_metadir(sc->mp, dip, mode, flags, 581 flags2); 582 if (fa) 583 flags2 &= ~XFS_DIFLAG2_METADATA; 584 } 585 586 dip->di_flags = cpu_to_be16(flags); 587 dip->di_flags2 = cpu_to_be64(flags2); 588 } 589 590 /* 591 * Blow out symlink; now it points nowhere. We don't have to worry about 592 * incore state because this inode is failing the verifiers. 593 */ 594 STATIC void 595 xrep_dinode_zap_symlink( 596 struct xrep_inode *ri, 597 struct xfs_dinode *dip) 598 { 599 struct xfs_scrub *sc = ri->sc; 600 char *p; 601 602 trace_xrep_dinode_zap_symlink(sc, dip); 603 604 dip->di_format = XFS_DINODE_FMT_LOCAL; 605 dip->di_size = cpu_to_be64(1); 606 p = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 607 *p = '?'; 608 ri->ino_sick_mask |= XFS_SICK_INO_SYMLINK_ZAPPED; 609 } 610 611 /* 612 * Blow out dir, make the parent point to the root. In the future repair will 613 * reconstruct this directory for us. Note that there's no in-core directory 614 * inode because the sf verifier tripped, so we don't have to worry about the 615 * dentry cache. 616 */ 617 STATIC void 618 xrep_dinode_zap_dir( 619 struct xrep_inode *ri, 620 struct xfs_dinode *dip) 621 { 622 struct xfs_scrub *sc = ri->sc; 623 struct xfs_mount *mp = sc->mp; 624 struct xfs_dir2_sf_hdr *sfp; 625 int i8count; 626 627 trace_xrep_dinode_zap_dir(sc, dip); 628 629 dip->di_format = XFS_DINODE_FMT_LOCAL; 630 i8count = mp->m_sb.sb_rootino > XFS_DIR2_MAX_SHORT_INUM; 631 sfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 632 sfp->count = 0; 633 sfp->i8count = i8count; 634 xfs_dir2_sf_put_parent_ino(sfp, mp->m_sb.sb_rootino); 635 dip->di_size = cpu_to_be64(xfs_dir2_sf_hdr_size(i8count)); 636 ri->ino_sick_mask |= XFS_SICK_INO_DIR_ZAPPED; 637 } 638 639 /* Make sure we don't have a garbage file size. */ 640 STATIC void 641 xrep_dinode_size( 642 struct xrep_inode *ri, 643 struct xfs_dinode *dip) 644 { 645 struct xfs_scrub *sc = ri->sc; 646 uint64_t size = be64_to_cpu(dip->di_size); 647 uint16_t mode = be16_to_cpu(dip->di_mode); 648 649 trace_xrep_dinode_size(sc, dip); 650 651 switch (mode & S_IFMT) { 652 case S_IFIFO: 653 case S_IFCHR: 654 case S_IFBLK: 655 case S_IFSOCK: 656 /* di_size can't be nonzero for special files */ 657 dip->di_size = 0; 658 break; 659 case S_IFREG: 660 /* Regular files can't be larger than 2^63-1 bytes. */ 661 dip->di_size = cpu_to_be64(size & ~(1ULL << 63)); 662 break; 663 case S_IFLNK: 664 /* 665 * Truncate ridiculously oversized symlinks. If the size is 666 * zero, reset it to point to the current directory. Both of 667 * these conditions trigger dinode verifier errors, so there 668 * is no in-core state to reset. 669 */ 670 if (size > XFS_SYMLINK_MAXLEN) 671 dip->di_size = cpu_to_be64(XFS_SYMLINK_MAXLEN); 672 else if (size == 0) 673 xrep_dinode_zap_symlink(ri, dip); 674 break; 675 case S_IFDIR: 676 /* 677 * Directories can't have a size larger than 32G. If the size 678 * is zero, reset it to an empty directory. Both of these 679 * conditions trigger dinode verifier errors, so there is no 680 * in-core state to reset. 681 */ 682 if (size > XFS_DIR2_SPACE_SIZE) 683 dip->di_size = cpu_to_be64(XFS_DIR2_SPACE_SIZE); 684 else if (size == 0) 685 xrep_dinode_zap_dir(ri, dip); 686 break; 687 } 688 } 689 690 /* Fix extent size hints. */ 691 STATIC void 692 xrep_dinode_extsize_hints( 693 struct xfs_scrub *sc, 694 struct xfs_dinode *dip) 695 { 696 struct xfs_mount *mp = sc->mp; 697 uint64_t flags2 = be64_to_cpu(dip->di_flags2); 698 uint16_t flags = be16_to_cpu(dip->di_flags); 699 uint16_t mode = be16_to_cpu(dip->di_mode); 700 701 xfs_failaddr_t fa; 702 703 trace_xrep_dinode_extsize_hints(sc, dip); 704 705 fa = xfs_inode_validate_extsize(mp, be32_to_cpu(dip->di_extsize), 706 mode, flags); 707 if (fa) { 708 dip->di_extsize = 0; 709 dip->di_flags &= ~cpu_to_be16(XFS_DIFLAG_EXTSIZE | 710 XFS_DIFLAG_EXTSZINHERIT); 711 } 712 713 if (dip->di_version < 3) 714 return; 715 716 fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), 717 mode, flags, flags2); 718 if (fa) { 719 dip->di_cowextsize = 0; 720 dip->di_flags2 &= ~cpu_to_be64(XFS_DIFLAG2_COWEXTSIZE); 721 } 722 } 723 724 /* Count extents and blocks for an inode given an rmap. */ 725 STATIC int 726 xrep_dinode_walk_rmap( 727 struct xfs_btree_cur *cur, 728 const struct xfs_rmap_irec *rec, 729 void *priv) 730 { 731 struct xrep_inode *ri = priv; 732 int error = 0; 733 734 if (xchk_should_terminate(ri->sc, &error)) 735 return error; 736 737 /* We only care about this inode. */ 738 if (rec->rm_owner != ri->sc->sm->sm_ino) 739 return 0; 740 741 if (rec->rm_flags & XFS_RMAP_ATTR_FORK) { 742 ri->attr_blocks += rec->rm_blockcount; 743 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) 744 ri->attr_extents++; 745 746 return 0; 747 } 748 749 ri->data_blocks += rec->rm_blockcount; 750 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK)) 751 ri->data_extents++; 752 753 return 0; 754 } 755 756 /* Count extents and blocks for an inode from all AG rmap data. */ 757 STATIC int 758 xrep_dinode_count_ag_rmaps( 759 struct xrep_inode *ri, 760 struct xfs_perag *pag) 761 { 762 struct xfs_btree_cur *cur; 763 struct xfs_buf *agf; 764 int error; 765 766 error = xfs_alloc_read_agf(pag, ri->sc->tp, 0, &agf); 767 if (error) 768 return error; 769 770 cur = xfs_rmapbt_init_cursor(ri->sc->mp, ri->sc->tp, agf, pag); 771 error = xfs_rmap_query_all(cur, xrep_dinode_walk_rmap, ri); 772 xfs_btree_del_cursor(cur, error); 773 xfs_trans_brelse(ri->sc->tp, agf); 774 return error; 775 } 776 777 /* Count extents and blocks for an inode given an rt rmap. */ 778 STATIC int 779 xrep_dinode_walk_rtrmap( 780 struct xfs_btree_cur *cur, 781 const struct xfs_rmap_irec *rec, 782 void *priv) 783 { 784 struct xrep_inode *ri = priv; 785 int error = 0; 786 787 if (xchk_should_terminate(ri->sc, &error)) 788 return error; 789 790 /* We only care about this inode. */ 791 if (rec->rm_owner != ri->sc->sm->sm_ino) 792 return 0; 793 794 if (rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) 795 return -EFSCORRUPTED; 796 797 ri->rt_blocks += rec->rm_blockcount; 798 ri->rt_extents++; 799 return 0; 800 } 801 802 /* Count extents and blocks for an inode from all realtime rmap data. */ 803 STATIC int 804 xrep_dinode_count_rtgroup_rmaps( 805 struct xrep_inode *ri, 806 struct xfs_rtgroup *rtg) 807 { 808 struct xfs_scrub *sc = ri->sc; 809 int error; 810 811 error = xrep_rtgroup_init(sc, rtg, &sc->sr, XFS_RTGLOCK_RMAP); 812 if (error) 813 return error; 814 815 error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_dinode_walk_rtrmap, 816 ri); 817 xchk_rtgroup_btcur_free(&sc->sr); 818 xchk_rtgroup_free(sc, &sc->sr); 819 return error; 820 } 821 822 /* Count extents and blocks for a given inode from all rmap data. */ 823 STATIC int 824 xrep_dinode_count_rmaps( 825 struct xrep_inode *ri) 826 { 827 struct xfs_perag *pag = NULL; 828 struct xfs_rtgroup *rtg = NULL; 829 int error; 830 831 if (!xfs_has_rmapbt(ri->sc->mp)) 832 return -EOPNOTSUPP; 833 834 while ((rtg = xfs_rtgroup_next(ri->sc->mp, rtg))) { 835 error = xrep_dinode_count_rtgroup_rmaps(ri, rtg); 836 if (error) { 837 xfs_rtgroup_rele(rtg); 838 return error; 839 } 840 } 841 842 while ((pag = xfs_perag_next(ri->sc->mp, pag))) { 843 error = xrep_dinode_count_ag_rmaps(ri, pag); 844 if (error) { 845 xfs_perag_rele(pag); 846 return error; 847 } 848 } 849 850 /* Can't have extents on both the rt and the data device. */ 851 if (ri->data_extents && ri->rt_extents) 852 return -EFSCORRUPTED; 853 854 trace_xrep_dinode_count_rmaps(ri->sc, 855 ri->data_blocks, ri->rt_blocks, ri->attr_blocks, 856 ri->data_extents, ri->rt_extents, ri->attr_extents); 857 return 0; 858 } 859 860 /* Return true if this extents-format ifork looks like garbage. */ 861 STATIC bool 862 xrep_dinode_bad_extents_fork( 863 struct xfs_scrub *sc, 864 struct xfs_dinode *dip, 865 unsigned int dfork_size, 866 int whichfork) 867 { 868 struct xfs_bmbt_irec new; 869 struct xfs_bmbt_rec *dp; 870 xfs_extnum_t nex; 871 bool isrt; 872 unsigned int i; 873 874 nex = xfs_dfork_nextents(dip, whichfork); 875 if (nex > dfork_size / sizeof(struct xfs_bmbt_rec)) 876 return true; 877 878 dp = XFS_DFORK_PTR(dip, whichfork); 879 880 isrt = dip->di_flags & cpu_to_be16(XFS_DIFLAG_REALTIME); 881 for (i = 0; i < nex; i++, dp++) { 882 xfs_failaddr_t fa; 883 884 xfs_bmbt_disk_get_all(dp, &new); 885 fa = xfs_bmap_validate_extent_raw(sc->mp, isrt, whichfork, 886 &new); 887 if (fa) 888 return true; 889 } 890 891 return false; 892 } 893 894 /* Return true if this btree-format ifork looks like garbage. */ 895 STATIC bool 896 xrep_dinode_bad_bmbt_fork( 897 struct xfs_scrub *sc, 898 struct xfs_dinode *dip, 899 unsigned int dfork_size, 900 int whichfork) 901 { 902 struct xfs_bmdr_block *dfp; 903 xfs_extnum_t nex; 904 unsigned int i; 905 unsigned int dmxr; 906 unsigned int nrecs; 907 unsigned int level; 908 909 nex = xfs_dfork_nextents(dip, whichfork); 910 if (nex <= dfork_size / sizeof(struct xfs_bmbt_rec)) 911 return true; 912 913 if (dfork_size < sizeof(struct xfs_bmdr_block)) 914 return true; 915 916 dfp = XFS_DFORK_PTR(dip, whichfork); 917 nrecs = be16_to_cpu(dfp->bb_numrecs); 918 level = be16_to_cpu(dfp->bb_level); 919 920 if (nrecs == 0 || xfs_bmdr_space_calc(nrecs) > dfork_size) 921 return true; 922 if (level == 0 || level >= XFS_BM_MAXLEVELS(sc->mp, whichfork)) 923 return true; 924 925 dmxr = xfs_bmdr_maxrecs(dfork_size, 0); 926 for (i = 1; i <= nrecs; i++) { 927 struct xfs_bmbt_key *fkp; 928 xfs_bmbt_ptr_t *fpp; 929 xfs_fileoff_t fileoff; 930 xfs_fsblock_t fsbno; 931 932 fkp = xfs_bmdr_key_addr(dfp, i); 933 fileoff = be64_to_cpu(fkp->br_startoff); 934 if (!xfs_verify_fileoff(sc->mp, fileoff)) 935 return true; 936 937 fpp = xfs_bmdr_ptr_addr(dfp, i, dmxr); 938 fsbno = be64_to_cpu(*fpp); 939 if (!xfs_verify_fsbno(sc->mp, fsbno)) 940 return true; 941 } 942 943 return false; 944 } 945 946 /* Return true if this rmap-format ifork looks like garbage. */ 947 STATIC bool 948 xrep_dinode_bad_rtrmapbt_fork( 949 struct xfs_scrub *sc, 950 struct xfs_dinode *dip, 951 unsigned int dfork_size) 952 { 953 struct xfs_rtrmap_root *dfp; 954 unsigned int nrecs; 955 unsigned int level; 956 957 if (dfork_size < sizeof(struct xfs_rtrmap_root)) 958 return true; 959 960 dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 961 nrecs = be16_to_cpu(dfp->bb_numrecs); 962 level = be16_to_cpu(dfp->bb_level); 963 964 if (level > sc->mp->m_rtrmap_maxlevels) 965 return true; 966 if (xfs_rtrmap_droot_space_calc(level, nrecs) > dfork_size) 967 return true; 968 if (level > 0 && nrecs == 0) 969 return true; 970 971 return false; 972 } 973 974 /* Return true if this refcount-format ifork looks like garbage. */ 975 STATIC bool 976 xrep_dinode_bad_rtrefcountbt_fork( 977 struct xfs_scrub *sc, 978 struct xfs_dinode *dip, 979 unsigned int dfork_size) 980 { 981 struct xfs_rtrefcount_root *dfp; 982 unsigned int nrecs; 983 unsigned int level; 984 985 if (dfork_size < sizeof(struct xfs_rtrefcount_root)) 986 return true; 987 988 dfp = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 989 nrecs = be16_to_cpu(dfp->bb_numrecs); 990 level = be16_to_cpu(dfp->bb_level); 991 992 if (level > sc->mp->m_rtrefc_maxlevels) 993 return true; 994 if (xfs_rtrefcount_droot_space_calc(level, nrecs) > dfork_size) 995 return true; 996 if (level > 0 && nrecs == 0) 997 return true; 998 999 return false; 1000 } 1001 1002 /* Check a metadata-btree fork. */ 1003 STATIC bool 1004 xrep_dinode_bad_metabt_fork( 1005 struct xfs_scrub *sc, 1006 struct xfs_dinode *dip, 1007 unsigned int dfork_size, 1008 int whichfork) 1009 { 1010 if (whichfork != XFS_DATA_FORK) 1011 return true; 1012 1013 switch (be16_to_cpu(dip->di_metatype)) { 1014 case XFS_METAFILE_RTRMAP: 1015 return xrep_dinode_bad_rtrmapbt_fork(sc, dip, dfork_size); 1016 case XFS_METAFILE_RTREFCOUNT: 1017 return xrep_dinode_bad_rtrefcountbt_fork(sc, dip, dfork_size); 1018 default: 1019 return true; 1020 } 1021 1022 return false; 1023 } 1024 1025 /* 1026 * Check the data fork for things that will fail the ifork verifiers or the 1027 * ifork formatters. 1028 */ 1029 STATIC bool 1030 xrep_dinode_check_dfork( 1031 struct xfs_scrub *sc, 1032 struct xfs_dinode *dip, 1033 uint16_t mode) 1034 { 1035 void *dfork_ptr; 1036 int64_t data_size; 1037 unsigned int fmt; 1038 unsigned int dfork_size; 1039 1040 /* 1041 * Verifier functions take signed int64_t, so check for bogus negative 1042 * values first. 1043 */ 1044 data_size = be64_to_cpu(dip->di_size); 1045 if (data_size < 0) 1046 return true; 1047 1048 fmt = XFS_DFORK_FORMAT(dip, XFS_DATA_FORK); 1049 switch (mode & S_IFMT) { 1050 case S_IFIFO: 1051 case S_IFCHR: 1052 case S_IFBLK: 1053 case S_IFSOCK: 1054 if (fmt != XFS_DINODE_FMT_DEV) 1055 return true; 1056 break; 1057 case S_IFREG: 1058 switch (fmt) { 1059 case XFS_DINODE_FMT_LOCAL: 1060 return true; 1061 case XFS_DINODE_FMT_EXTENTS: 1062 case XFS_DINODE_FMT_BTREE: 1063 case XFS_DINODE_FMT_META_BTREE: 1064 break; 1065 default: 1066 return true; 1067 } 1068 break; 1069 case S_IFLNK: 1070 case S_IFDIR: 1071 switch (fmt) { 1072 case XFS_DINODE_FMT_LOCAL: 1073 case XFS_DINODE_FMT_EXTENTS: 1074 case XFS_DINODE_FMT_BTREE: 1075 break; 1076 default: 1077 return true; 1078 } 1079 break; 1080 default: 1081 return true; 1082 } 1083 1084 dfork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_DATA_FORK); 1085 dfork_ptr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1086 1087 switch (fmt) { 1088 case XFS_DINODE_FMT_DEV: 1089 break; 1090 case XFS_DINODE_FMT_LOCAL: 1091 /* dir/symlink structure cannot be larger than the fork */ 1092 if (data_size > dfork_size) 1093 return true; 1094 /* directory structure must pass verification. */ 1095 if (S_ISDIR(mode) && 1096 xfs_dir2_sf_verify(sc->mp, dfork_ptr, data_size) != NULL) 1097 return true; 1098 /* symlink structure must pass verification. */ 1099 if (S_ISLNK(mode) && 1100 xfs_symlink_shortform_verify(dfork_ptr, data_size) != NULL) 1101 return true; 1102 break; 1103 case XFS_DINODE_FMT_EXTENTS: 1104 if (xrep_dinode_bad_extents_fork(sc, dip, dfork_size, 1105 XFS_DATA_FORK)) 1106 return true; 1107 break; 1108 case XFS_DINODE_FMT_BTREE: 1109 if (xrep_dinode_bad_bmbt_fork(sc, dip, dfork_size, 1110 XFS_DATA_FORK)) 1111 return true; 1112 break; 1113 case XFS_DINODE_FMT_META_BTREE: 1114 if (xrep_dinode_bad_metabt_fork(sc, dip, dfork_size, 1115 XFS_DATA_FORK)) 1116 return true; 1117 break; 1118 default: 1119 return true; 1120 } 1121 1122 return false; 1123 } 1124 1125 static void 1126 xrep_dinode_set_data_nextents( 1127 struct xfs_dinode *dip, 1128 xfs_extnum_t nextents) 1129 { 1130 if (xfs_dinode_has_large_extent_counts(dip)) 1131 dip->di_big_nextents = cpu_to_be64(nextents); 1132 else 1133 dip->di_nextents = cpu_to_be32(nextents); 1134 } 1135 1136 static void 1137 xrep_dinode_set_attr_nextents( 1138 struct xfs_dinode *dip, 1139 xfs_extnum_t nextents) 1140 { 1141 if (xfs_dinode_has_large_extent_counts(dip)) 1142 dip->di_big_anextents = cpu_to_be32(nextents); 1143 else 1144 dip->di_anextents = cpu_to_be16(nextents); 1145 } 1146 1147 /* Reset the data fork to something sane. */ 1148 STATIC void 1149 xrep_dinode_zap_dfork( 1150 struct xrep_inode *ri, 1151 struct xfs_dinode *dip, 1152 uint16_t mode) 1153 { 1154 struct xfs_scrub *sc = ri->sc; 1155 1156 trace_xrep_dinode_zap_dfork(sc, dip); 1157 1158 ri->ino_sick_mask |= XFS_SICK_INO_BMBTD_ZAPPED; 1159 1160 xrep_dinode_set_data_nextents(dip, 0); 1161 ri->data_blocks = 0; 1162 ri->rt_blocks = 0; 1163 1164 /* Special files always get reset to DEV */ 1165 switch (mode & S_IFMT) { 1166 case S_IFIFO: 1167 case S_IFCHR: 1168 case S_IFBLK: 1169 case S_IFSOCK: 1170 dip->di_format = XFS_DINODE_FMT_DEV; 1171 dip->di_size = 0; 1172 return; 1173 } 1174 1175 /* 1176 * If we have data extents, reset to an empty map and hope the user 1177 * will run the bmapbtd checker next. 1178 */ 1179 if (ri->data_extents || ri->rt_extents || S_ISREG(mode)) { 1180 dip->di_format = XFS_DINODE_FMT_EXTENTS; 1181 return; 1182 } 1183 1184 /* Otherwise, reset the local format to the minimum. */ 1185 switch (mode & S_IFMT) { 1186 case S_IFLNK: 1187 xrep_dinode_zap_symlink(ri, dip); 1188 break; 1189 case S_IFDIR: 1190 xrep_dinode_zap_dir(ri, dip); 1191 break; 1192 } 1193 } 1194 1195 /* 1196 * Check the attr fork for things that will fail the ifork verifiers or the 1197 * ifork formatters. 1198 */ 1199 STATIC bool 1200 xrep_dinode_check_afork( 1201 struct xfs_scrub *sc, 1202 struct xfs_dinode *dip) 1203 { 1204 struct xfs_attr_sf_hdr *afork_ptr; 1205 size_t attr_size; 1206 unsigned int afork_size; 1207 1208 if (XFS_DFORK_BOFF(dip) == 0) 1209 return dip->di_aformat != XFS_DINODE_FMT_EXTENTS || 1210 xfs_dfork_attr_extents(dip) != 0; 1211 1212 afork_size = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK); 1213 afork_ptr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); 1214 1215 switch (XFS_DFORK_FORMAT(dip, XFS_ATTR_FORK)) { 1216 case XFS_DINODE_FMT_LOCAL: 1217 /* Fork has to be large enough to extract the xattr size. */ 1218 if (afork_size < sizeof(struct xfs_attr_sf_hdr)) 1219 return true; 1220 1221 /* xattr structure cannot be larger than the fork */ 1222 attr_size = be16_to_cpu(afork_ptr->totsize); 1223 if (attr_size > afork_size) 1224 return true; 1225 1226 /* xattr structure must pass verification. */ 1227 return xfs_attr_shortform_verify(afork_ptr, attr_size) != NULL; 1228 case XFS_DINODE_FMT_EXTENTS: 1229 if (xrep_dinode_bad_extents_fork(sc, dip, afork_size, 1230 XFS_ATTR_FORK)) 1231 return true; 1232 break; 1233 case XFS_DINODE_FMT_BTREE: 1234 if (xrep_dinode_bad_bmbt_fork(sc, dip, afork_size, 1235 XFS_ATTR_FORK)) 1236 return true; 1237 break; 1238 case XFS_DINODE_FMT_META_BTREE: 1239 if (xrep_dinode_bad_metabt_fork(sc, dip, afork_size, 1240 XFS_ATTR_FORK)) 1241 return true; 1242 break; 1243 default: 1244 return true; 1245 } 1246 1247 return false; 1248 } 1249 1250 /* 1251 * Reset the attr fork to empty. Since the attr fork could have contained 1252 * ACLs, make the file readable only by root. 1253 */ 1254 STATIC void 1255 xrep_dinode_zap_afork( 1256 struct xrep_inode *ri, 1257 struct xfs_dinode *dip, 1258 uint16_t mode) 1259 { 1260 struct xfs_scrub *sc = ri->sc; 1261 1262 trace_xrep_dinode_zap_afork(sc, dip); 1263 1264 ri->ino_sick_mask |= XFS_SICK_INO_BMBTA_ZAPPED; 1265 1266 dip->di_aformat = XFS_DINODE_FMT_EXTENTS; 1267 xrep_dinode_set_attr_nextents(dip, 0); 1268 ri->attr_blocks = 0; 1269 1270 /* 1271 * If the data fork is in btree format, removing the attr fork entirely 1272 * might cause verifier failures if the next level down in the bmbt 1273 * could now fit in the data fork area. 1274 */ 1275 if (dip->di_format != XFS_DINODE_FMT_BTREE) 1276 dip->di_forkoff = 0; 1277 dip->di_mode = cpu_to_be16(mode & ~0777); 1278 dip->di_uid = 0; 1279 dip->di_gid = 0; 1280 } 1281 1282 /* Make sure the fork offset is a sensible value. */ 1283 STATIC void 1284 xrep_dinode_ensure_forkoff( 1285 struct xrep_inode *ri, 1286 struct xfs_dinode *dip, 1287 uint16_t mode) 1288 { 1289 struct xfs_bmdr_block *bmdr; 1290 struct xfs_rtrmap_root *rmdr; 1291 struct xfs_rtrefcount_root *rcdr; 1292 struct xfs_scrub *sc = ri->sc; 1293 xfs_extnum_t attr_extents, data_extents; 1294 size_t bmdr_minsz = xfs_bmdr_space_calc(1); 1295 unsigned int lit_sz = XFS_LITINO(sc->mp); 1296 unsigned int afork_min, dfork_min; 1297 1298 trace_xrep_dinode_ensure_forkoff(sc, dip); 1299 1300 /* 1301 * Before calling this function, xrep_dinode_core ensured that both 1302 * forks actually fit inside their respective literal areas. If this 1303 * was not the case, the fork was reset to FMT_EXTENTS with zero 1304 * records. If the rmapbt scan found attr or data fork blocks, this 1305 * will be noted in the dinode_stats, and we must leave enough room 1306 * for the bmap repair code to reconstruct the mapping structure. 1307 * 1308 * First, compute the minimum space required for the attr fork. 1309 */ 1310 switch (dip->di_aformat) { 1311 case XFS_DINODE_FMT_LOCAL: 1312 /* 1313 * If we still have a shortform xattr structure at all, that 1314 * means the attr fork area was exactly large enough to fit 1315 * the sf structure. 1316 */ 1317 afork_min = XFS_DFORK_SIZE(dip, sc->mp, XFS_ATTR_FORK); 1318 break; 1319 case XFS_DINODE_FMT_EXTENTS: 1320 attr_extents = xfs_dfork_attr_extents(dip); 1321 if (attr_extents) { 1322 /* 1323 * We must maintain sufficient space to hold the entire 1324 * extent map array in the data fork. Note that we 1325 * previously zapped the fork if it had no chance of 1326 * fitting in the inode. 1327 */ 1328 afork_min = sizeof(struct xfs_bmbt_rec) * attr_extents; 1329 } else if (ri->attr_extents > 0) { 1330 /* 1331 * The attr fork thinks it has zero extents, but we 1332 * found some xattr extents. We need to leave enough 1333 * empty space here so that the incore attr fork will 1334 * get created (and hence trigger the attr fork bmap 1335 * repairer). 1336 */ 1337 afork_min = bmdr_minsz; 1338 } else { 1339 /* No extents on disk or found in rmapbt. */ 1340 afork_min = 0; 1341 } 1342 break; 1343 case XFS_DINODE_FMT_BTREE: 1344 /* Must have space for btree header and key/pointers. */ 1345 bmdr = XFS_DFORK_PTR(dip, XFS_ATTR_FORK); 1346 afork_min = xfs_bmap_broot_space(sc->mp, bmdr); 1347 break; 1348 default: 1349 /* We should never see any other formats. */ 1350 afork_min = 0; 1351 break; 1352 } 1353 1354 /* Compute the minimum space required for the data fork. */ 1355 switch (dip->di_format) { 1356 case XFS_DINODE_FMT_DEV: 1357 dfork_min = sizeof(__be32); 1358 break; 1359 case XFS_DINODE_FMT_UUID: 1360 dfork_min = sizeof(uuid_t); 1361 break; 1362 case XFS_DINODE_FMT_LOCAL: 1363 /* 1364 * If we still have a shortform data fork at all, that means 1365 * the data fork area was large enough to fit whatever was in 1366 * there. 1367 */ 1368 dfork_min = be64_to_cpu(dip->di_size); 1369 break; 1370 case XFS_DINODE_FMT_EXTENTS: 1371 data_extents = xfs_dfork_data_extents(dip); 1372 if (data_extents) { 1373 /* 1374 * We must maintain sufficient space to hold the entire 1375 * extent map array in the data fork. Note that we 1376 * previously zapped the fork if it had no chance of 1377 * fitting in the inode. 1378 */ 1379 dfork_min = sizeof(struct xfs_bmbt_rec) * data_extents; 1380 } else if (ri->data_extents > 0 || ri->rt_extents > 0) { 1381 /* 1382 * The data fork thinks it has zero extents, but we 1383 * found some data extents. We need to leave enough 1384 * empty space here so that the data fork bmap repair 1385 * will recover the mappings. 1386 */ 1387 dfork_min = bmdr_minsz; 1388 } else { 1389 /* No extents on disk or found in rmapbt. */ 1390 dfork_min = 0; 1391 } 1392 break; 1393 case XFS_DINODE_FMT_BTREE: 1394 /* Must have space for btree header and key/pointers. */ 1395 bmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1396 dfork_min = xfs_bmap_broot_space(sc->mp, bmdr); 1397 break; 1398 case XFS_DINODE_FMT_META_BTREE: 1399 switch (be16_to_cpu(dip->di_metatype)) { 1400 case XFS_METAFILE_RTRMAP: 1401 rmdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1402 dfork_min = xfs_rtrmap_broot_space(sc->mp, rmdr); 1403 break; 1404 case XFS_METAFILE_RTREFCOUNT: 1405 rcdr = XFS_DFORK_PTR(dip, XFS_DATA_FORK); 1406 dfork_min = xfs_rtrefcount_broot_space(sc->mp, rcdr); 1407 break; 1408 default: 1409 dfork_min = 0; 1410 break; 1411 } 1412 break; 1413 default: 1414 dfork_min = 0; 1415 break; 1416 } 1417 1418 /* 1419 * Round all values up to the nearest 8 bytes, because that is the 1420 * precision of di_forkoff. 1421 */ 1422 afork_min = roundup(afork_min, 8); 1423 dfork_min = roundup(dfork_min, 8); 1424 bmdr_minsz = roundup(bmdr_minsz, 8); 1425 1426 ASSERT(dfork_min <= lit_sz); 1427 ASSERT(afork_min <= lit_sz); 1428 1429 /* 1430 * If the data fork was zapped and we don't have enough space for the 1431 * recovery fork, move the attr fork up. 1432 */ 1433 if (dip->di_format == XFS_DINODE_FMT_EXTENTS && 1434 xfs_dfork_data_extents(dip) == 0 && 1435 (ri->data_extents > 0 || ri->rt_extents > 0) && 1436 bmdr_minsz > XFS_DFORK_DSIZE(dip, sc->mp)) { 1437 if (bmdr_minsz + afork_min > lit_sz) { 1438 /* 1439 * The attr for and the stub fork we need to recover 1440 * the data fork won't both fit. Zap the attr fork. 1441 */ 1442 xrep_dinode_zap_afork(ri, dip, mode); 1443 afork_min = bmdr_minsz; 1444 } else { 1445 void *before, *after; 1446 1447 /* Otherwise, just slide the attr fork up. */ 1448 before = XFS_DFORK_APTR(dip); 1449 dip->di_forkoff = bmdr_minsz >> 3; 1450 after = XFS_DFORK_APTR(dip); 1451 memmove(after, before, XFS_DFORK_ASIZE(dip, sc->mp)); 1452 } 1453 } 1454 1455 /* 1456 * If the attr fork was zapped and we don't have enough space for the 1457 * recovery fork, move the attr fork down. 1458 */ 1459 if (dip->di_aformat == XFS_DINODE_FMT_EXTENTS && 1460 xfs_dfork_attr_extents(dip) == 0 && 1461 ri->attr_extents > 0 && 1462 bmdr_minsz > XFS_DFORK_ASIZE(dip, sc->mp)) { 1463 if (dip->di_format == XFS_DINODE_FMT_BTREE) { 1464 /* 1465 * If the data fork is in btree format then we can't 1466 * adjust forkoff because that runs the risk of 1467 * violating the extents/btree format transition rules. 1468 */ 1469 } else if (bmdr_minsz + dfork_min > lit_sz) { 1470 /* 1471 * If we can't move the attr fork, too bad, we lose the 1472 * attr fork and leak its blocks. 1473 */ 1474 xrep_dinode_zap_afork(ri, dip, mode); 1475 } else { 1476 /* 1477 * Otherwise, just slide the attr fork down. The attr 1478 * fork is empty, so we don't have any old contents to 1479 * move here. 1480 */ 1481 dip->di_forkoff = (lit_sz - bmdr_minsz) >> 3; 1482 } 1483 } 1484 } 1485 1486 /* 1487 * Zap the data/attr forks if we spot anything that isn't going to pass the 1488 * ifork verifiers or the ifork formatters, because we need to get the inode 1489 * into good enough shape that the higher level repair functions can run. 1490 */ 1491 STATIC void 1492 xrep_dinode_zap_forks( 1493 struct xrep_inode *ri, 1494 struct xfs_dinode *dip) 1495 { 1496 struct xfs_scrub *sc = ri->sc; 1497 xfs_extnum_t data_extents; 1498 xfs_extnum_t attr_extents; 1499 xfs_filblks_t nblocks; 1500 uint16_t mode; 1501 bool zap_datafork = false; 1502 bool zap_attrfork = ri->zap_acls; 1503 1504 trace_xrep_dinode_zap_forks(sc, dip); 1505 1506 mode = be16_to_cpu(dip->di_mode); 1507 1508 data_extents = xfs_dfork_data_extents(dip); 1509 attr_extents = xfs_dfork_attr_extents(dip); 1510 nblocks = be64_to_cpu(dip->di_nblocks); 1511 1512 /* Inode counters don't make sense? */ 1513 if (data_extents > nblocks) 1514 zap_datafork = true; 1515 if (attr_extents > nblocks) 1516 zap_attrfork = true; 1517 if (data_extents + attr_extents > nblocks) 1518 zap_datafork = zap_attrfork = true; 1519 1520 if (!zap_datafork) 1521 zap_datafork = xrep_dinode_check_dfork(sc, dip, mode); 1522 if (!zap_attrfork) 1523 zap_attrfork = xrep_dinode_check_afork(sc, dip); 1524 1525 /* Zap whatever's bad. */ 1526 if (zap_attrfork) 1527 xrep_dinode_zap_afork(ri, dip, mode); 1528 if (zap_datafork) 1529 xrep_dinode_zap_dfork(ri, dip, mode); 1530 xrep_dinode_ensure_forkoff(ri, dip, mode); 1531 1532 /* 1533 * Zero di_nblocks if we don't have any extents at all to satisfy the 1534 * buffer verifier. 1535 */ 1536 data_extents = xfs_dfork_data_extents(dip); 1537 attr_extents = xfs_dfork_attr_extents(dip); 1538 if (data_extents + attr_extents == 0) 1539 dip->di_nblocks = 0; 1540 } 1541 1542 /* Inode didn't pass dinode verifiers, so fix the raw buffer and retry iget. */ 1543 STATIC int 1544 xrep_dinode_core( 1545 struct xrep_inode *ri) 1546 { 1547 struct xfs_scrub *sc = ri->sc; 1548 struct xfs_buf *bp; 1549 struct xfs_dinode *dip; 1550 xfs_ino_t ino = sc->sm->sm_ino; 1551 int error; 1552 int iget_error; 1553 1554 /* Figure out what this inode had mapped in both forks. */ 1555 error = xrep_dinode_count_rmaps(ri); 1556 if (error) 1557 return error; 1558 1559 /* Read the inode cluster buffer. */ 1560 error = xfs_trans_read_buf(sc->mp, sc->tp, sc->mp->m_ddev_targp, 1561 ri->imap.im_blkno, ri->imap.im_len, XBF_UNMAPPED, &bp, 1562 NULL); 1563 if (error) 1564 return error; 1565 1566 /* Make sure we can pass the inode buffer verifier. */ 1567 xrep_dinode_buf(sc, bp); 1568 bp->b_ops = &xfs_inode_buf_ops; 1569 1570 /* Fix everything the verifier will complain about. */ 1571 dip = xfs_buf_offset(bp, ri->imap.im_boffset); 1572 xrep_dinode_header(sc, dip); 1573 iget_error = xrep_dinode_mode(ri, dip); 1574 if (iget_error) 1575 goto write; 1576 xrep_dinode_nlinks(dip); 1577 xrep_dinode_flags(sc, dip, ri->rt_extents > 0); 1578 xrep_dinode_size(ri, dip); 1579 xrep_dinode_extsize_hints(sc, dip); 1580 xrep_dinode_zap_forks(ri, dip); 1581 1582 write: 1583 /* Write out the inode. */ 1584 trace_xrep_dinode_fixed(sc, dip); 1585 xfs_dinode_calc_crc(sc->mp, dip); 1586 xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_DINO_BUF); 1587 xfs_trans_log_buf(sc->tp, bp, ri->imap.im_boffset, 1588 ri->imap.im_boffset + sc->mp->m_sb.sb_inodesize - 1); 1589 1590 /* 1591 * In theory, we've fixed the ondisk inode record enough that we should 1592 * be able to load the inode into the cache. Try to iget that inode 1593 * now while we hold the AGI and the inode cluster buffer and take the 1594 * IOLOCK so that we can continue with repairs without anyone else 1595 * accessing the inode. If iget fails, we still need to commit the 1596 * changes. 1597 */ 1598 if (!iget_error) 1599 iget_error = xchk_iget(sc, ino, &sc->ip); 1600 if (!iget_error) 1601 xchk_ilock(sc, XFS_IOLOCK_EXCL); 1602 1603 /* 1604 * Commit the inode cluster buffer updates and drop the AGI buffer that 1605 * we've been holding since scrub setup. From here on out, repairs 1606 * deal only with the cached inode. 1607 */ 1608 error = xrep_trans_commit(sc); 1609 if (error) 1610 return error; 1611 1612 if (iget_error) 1613 return iget_error; 1614 1615 error = xchk_trans_alloc(sc, 0); 1616 if (error) 1617 return error; 1618 1619 error = xrep_ino_dqattach(sc); 1620 if (error) 1621 return error; 1622 1623 xchk_ilock(sc, XFS_ILOCK_EXCL); 1624 if (ri->ino_sick_mask) 1625 xfs_inode_mark_sick(sc->ip, ri->ino_sick_mask); 1626 return 0; 1627 } 1628 1629 /* Fix everything xfs_dinode_verify cares about. */ 1630 STATIC int 1631 xrep_dinode_problems( 1632 struct xrep_inode *ri) 1633 { 1634 struct xfs_scrub *sc = ri->sc; 1635 int error; 1636 1637 error = xrep_dinode_core(ri); 1638 if (error) 1639 return error; 1640 1641 /* We had to fix a totally busted inode, schedule quotacheck. */ 1642 if (XFS_IS_UQUOTA_ON(sc->mp)) 1643 xrep_force_quotacheck(sc, XFS_DQTYPE_USER); 1644 if (XFS_IS_GQUOTA_ON(sc->mp)) 1645 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP); 1646 if (XFS_IS_PQUOTA_ON(sc->mp)) 1647 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ); 1648 1649 return 0; 1650 } 1651 1652 /* 1653 * Fix problems that the verifiers don't care about. In general these are 1654 * errors that don't cause problems elsewhere in the kernel that we can easily 1655 * detect, so we don't check them all that rigorously. 1656 */ 1657 1658 /* Make sure block and extent counts are ok. */ 1659 STATIC int 1660 xrep_inode_blockcounts( 1661 struct xfs_scrub *sc) 1662 { 1663 struct xfs_ifork *ifp; 1664 xfs_filblks_t count; 1665 xfs_filblks_t acount; 1666 xfs_extnum_t nextents; 1667 int error; 1668 1669 trace_xrep_inode_blockcounts(sc); 1670 1671 /* Set data fork counters from the data fork mappings. */ 1672 error = xchk_inode_count_blocks(sc, XFS_DATA_FORK, &nextents, &count); 1673 if (error) 1674 return error; 1675 if (xfs_is_reflink_inode(sc->ip)) { 1676 /* 1677 * data fork blockcount can exceed physical storage if a user 1678 * reflinks the same block over and over again. 1679 */ 1680 ; 1681 } else if (XFS_IS_REALTIME_INODE(sc->ip)) { 1682 if (count >= sc->mp->m_sb.sb_rblocks) 1683 return -EFSCORRUPTED; 1684 } else { 1685 if (count >= sc->mp->m_sb.sb_dblocks) 1686 return -EFSCORRUPTED; 1687 } 1688 error = xrep_ino_ensure_extent_count(sc, XFS_DATA_FORK, nextents); 1689 if (error) 1690 return error; 1691 sc->ip->i_df.if_nextents = nextents; 1692 1693 /* Set attr fork counters from the attr fork mappings. */ 1694 ifp = xfs_ifork_ptr(sc->ip, XFS_ATTR_FORK); 1695 if (ifp) { 1696 error = xchk_inode_count_blocks(sc, XFS_ATTR_FORK, &nextents, 1697 &acount); 1698 if (error) 1699 return error; 1700 if (count >= sc->mp->m_sb.sb_dblocks) 1701 return -EFSCORRUPTED; 1702 error = xrep_ino_ensure_extent_count(sc, XFS_ATTR_FORK, 1703 nextents); 1704 if (error) 1705 return error; 1706 ifp->if_nextents = nextents; 1707 } else { 1708 acount = 0; 1709 } 1710 1711 sc->ip->i_nblocks = count + acount; 1712 return 0; 1713 } 1714 1715 /* Check for invalid uid/gid/prid. */ 1716 STATIC void 1717 xrep_inode_ids( 1718 struct xfs_scrub *sc) 1719 { 1720 bool dirty = false; 1721 1722 trace_xrep_inode_ids(sc); 1723 1724 if (!uid_valid(VFS_I(sc->ip)->i_uid)) { 1725 i_uid_write(VFS_I(sc->ip), 0); 1726 dirty = true; 1727 if (XFS_IS_UQUOTA_ON(sc->mp)) 1728 xrep_force_quotacheck(sc, XFS_DQTYPE_USER); 1729 } 1730 1731 if (!gid_valid(VFS_I(sc->ip)->i_gid)) { 1732 i_gid_write(VFS_I(sc->ip), 0); 1733 dirty = true; 1734 if (XFS_IS_GQUOTA_ON(sc->mp)) 1735 xrep_force_quotacheck(sc, XFS_DQTYPE_GROUP); 1736 } 1737 1738 if (sc->ip->i_projid == -1U) { 1739 sc->ip->i_projid = 0; 1740 dirty = true; 1741 if (XFS_IS_PQUOTA_ON(sc->mp)) 1742 xrep_force_quotacheck(sc, XFS_DQTYPE_PROJ); 1743 } 1744 1745 /* strip setuid/setgid if we touched any of the ids */ 1746 if (dirty) 1747 VFS_I(sc->ip)->i_mode &= ~(S_ISUID | S_ISGID); 1748 } 1749 1750 static inline void 1751 xrep_clamp_timestamp( 1752 struct xfs_inode *ip, 1753 struct timespec64 *ts) 1754 { 1755 ts->tv_nsec = clamp_t(long, ts->tv_nsec, 0, NSEC_PER_SEC); 1756 *ts = timestamp_truncate(*ts, VFS_I(ip)); 1757 } 1758 1759 /* Nanosecond counters can't have more than 1 billion. */ 1760 STATIC void 1761 xrep_inode_timestamps( 1762 struct xfs_inode *ip) 1763 { 1764 struct timespec64 tstamp; 1765 struct inode *inode = VFS_I(ip); 1766 1767 tstamp = inode_get_atime(inode); 1768 xrep_clamp_timestamp(ip, &tstamp); 1769 inode_set_atime_to_ts(inode, tstamp); 1770 1771 tstamp = inode_get_mtime(inode); 1772 xrep_clamp_timestamp(ip, &tstamp); 1773 inode_set_mtime_to_ts(inode, tstamp); 1774 1775 tstamp = inode_get_ctime(inode); 1776 xrep_clamp_timestamp(ip, &tstamp); 1777 inode_set_ctime_to_ts(inode, tstamp); 1778 1779 xrep_clamp_timestamp(ip, &ip->i_crtime); 1780 } 1781 1782 /* Fix inode flags that don't make sense together. */ 1783 STATIC void 1784 xrep_inode_flags( 1785 struct xfs_scrub *sc) 1786 { 1787 uint16_t mode; 1788 1789 trace_xrep_inode_flags(sc); 1790 1791 mode = VFS_I(sc->ip)->i_mode; 1792 1793 /* Clear junk flags */ 1794 if (sc->ip->i_diflags & ~XFS_DIFLAG_ANY) 1795 sc->ip->i_diflags &= ~XFS_DIFLAG_ANY; 1796 1797 /* NEWRTBM only applies to realtime bitmaps */ 1798 if (sc->ip->i_ino == sc->mp->m_sb.sb_rbmino) 1799 sc->ip->i_diflags |= XFS_DIFLAG_NEWRTBM; 1800 else 1801 sc->ip->i_diflags &= ~XFS_DIFLAG_NEWRTBM; 1802 1803 /* These only make sense for directories. */ 1804 if (!S_ISDIR(mode)) 1805 sc->ip->i_diflags &= ~(XFS_DIFLAG_RTINHERIT | 1806 XFS_DIFLAG_EXTSZINHERIT | 1807 XFS_DIFLAG_PROJINHERIT | 1808 XFS_DIFLAG_NOSYMLINKS); 1809 1810 /* These only make sense for files. */ 1811 if (!S_ISREG(mode)) 1812 sc->ip->i_diflags &= ~(XFS_DIFLAG_REALTIME | 1813 XFS_DIFLAG_EXTSIZE); 1814 1815 /* These only make sense for non-rt files. */ 1816 if (sc->ip->i_diflags & XFS_DIFLAG_REALTIME) 1817 sc->ip->i_diflags &= ~XFS_DIFLAG_FILESTREAM; 1818 1819 /* Immutable and append only? Drop the append. */ 1820 if ((sc->ip->i_diflags & XFS_DIFLAG_IMMUTABLE) && 1821 (sc->ip->i_diflags & XFS_DIFLAG_APPEND)) 1822 sc->ip->i_diflags &= ~XFS_DIFLAG_APPEND; 1823 1824 /* Clear junk flags. */ 1825 if (sc->ip->i_diflags2 & ~XFS_DIFLAG2_ANY) 1826 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_ANY; 1827 1828 /* No reflink flag unless we support it and it's a file. */ 1829 if (!xfs_has_reflink(sc->mp) || !S_ISREG(mode)) 1830 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK; 1831 1832 /* DAX only applies to files and dirs. */ 1833 if (!(S_ISREG(mode) || S_ISDIR(mode))) 1834 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_DAX; 1835 } 1836 1837 /* 1838 * Fix size problems with block/node format directories. If we fail to find 1839 * the extent list, just bail out and let the bmapbtd repair functions clean 1840 * up that mess. 1841 */ 1842 STATIC void 1843 xrep_inode_blockdir_size( 1844 struct xfs_scrub *sc) 1845 { 1846 struct xfs_iext_cursor icur; 1847 struct xfs_bmbt_irec got; 1848 struct xfs_ifork *ifp; 1849 xfs_fileoff_t off; 1850 int error; 1851 1852 trace_xrep_inode_blockdir_size(sc); 1853 1854 error = xfs_iread_extents(sc->tp, sc->ip, XFS_DATA_FORK); 1855 if (error) 1856 return; 1857 1858 /* Find the last block before 32G; this is the dir size. */ 1859 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); 1860 off = XFS_B_TO_FSB(sc->mp, XFS_DIR2_SPACE_SIZE); 1861 if (!xfs_iext_lookup_extent_before(sc->ip, ifp, &off, &icur, &got)) { 1862 /* zero-extents directory? */ 1863 return; 1864 } 1865 1866 off = got.br_startoff + got.br_blockcount; 1867 sc->ip->i_disk_size = min_t(loff_t, XFS_DIR2_SPACE_SIZE, 1868 XFS_FSB_TO_B(sc->mp, off)); 1869 } 1870 1871 /* Fix size problems with short format directories. */ 1872 STATIC void 1873 xrep_inode_sfdir_size( 1874 struct xfs_scrub *sc) 1875 { 1876 struct xfs_ifork *ifp; 1877 1878 trace_xrep_inode_sfdir_size(sc); 1879 1880 ifp = xfs_ifork_ptr(sc->ip, XFS_DATA_FORK); 1881 sc->ip->i_disk_size = ifp->if_bytes; 1882 } 1883 1884 /* 1885 * Fix any irregularities in a directory inode's size now that we can iterate 1886 * extent maps and access other regular inode data. 1887 */ 1888 STATIC void 1889 xrep_inode_dir_size( 1890 struct xfs_scrub *sc) 1891 { 1892 trace_xrep_inode_dir_size(sc); 1893 1894 switch (sc->ip->i_df.if_format) { 1895 case XFS_DINODE_FMT_EXTENTS: 1896 case XFS_DINODE_FMT_BTREE: 1897 xrep_inode_blockdir_size(sc); 1898 break; 1899 case XFS_DINODE_FMT_LOCAL: 1900 xrep_inode_sfdir_size(sc); 1901 break; 1902 } 1903 } 1904 1905 /* Fix extent size hint problems. */ 1906 STATIC void 1907 xrep_inode_extsize( 1908 struct xfs_scrub *sc) 1909 { 1910 /* Fix misaligned extent size hints on a directory. */ 1911 if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) && 1912 (sc->ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) && 1913 xfs_extlen_to_rtxmod(sc->mp, sc->ip->i_extsize) > 0) { 1914 sc->ip->i_extsize = 0; 1915 sc->ip->i_diflags &= ~XFS_DIFLAG_EXTSZINHERIT; 1916 } 1917 } 1918 1919 /* Ensure this file has an attr fork if it needs to hold a parent pointer. */ 1920 STATIC int 1921 xrep_inode_pptr( 1922 struct xfs_scrub *sc) 1923 { 1924 struct xfs_mount *mp = sc->mp; 1925 struct xfs_inode *ip = sc->ip; 1926 struct inode *inode = VFS_I(ip); 1927 1928 if (!xfs_has_parent(mp)) 1929 return 0; 1930 1931 /* 1932 * Unlinked inodes that cannot be added to the directory tree will not 1933 * have a parent pointer. 1934 */ 1935 if (inode->i_nlink == 0 && !(inode->i_state & I_LINKABLE)) 1936 return 0; 1937 1938 /* Children of the superblock do not have parent pointers. */ 1939 if (xchk_inode_is_sb_rooted(ip)) 1940 return 0; 1941 1942 /* Inode already has an attr fork; no further work possible here. */ 1943 if (xfs_inode_has_attr_fork(ip)) 1944 return 0; 1945 1946 return xfs_bmap_add_attrfork(sc->tp, ip, 1947 sizeof(struct xfs_attr_sf_hdr), true); 1948 } 1949 1950 /* Fix COW extent size hint problems. */ 1951 STATIC void 1952 xrep_inode_cowextsize( 1953 struct xfs_scrub *sc) 1954 { 1955 /* Fix misaligned CoW extent size hints on a directory. */ 1956 if ((sc->ip->i_diflags & XFS_DIFLAG_RTINHERIT) && 1957 (sc->ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE) && 1958 sc->ip->i_extsize % sc->mp->m_sb.sb_rextsize > 0) { 1959 sc->ip->i_cowextsize = 0; 1960 sc->ip->i_diflags2 &= ~XFS_DIFLAG2_COWEXTSIZE; 1961 } 1962 } 1963 1964 /* Fix any irregularities in an inode that the verifiers don't catch. */ 1965 STATIC int 1966 xrep_inode_problems( 1967 struct xfs_scrub *sc) 1968 { 1969 int error; 1970 1971 error = xrep_inode_blockcounts(sc); 1972 if (error) 1973 return error; 1974 error = xrep_inode_pptr(sc); 1975 if (error) 1976 return error; 1977 xrep_inode_timestamps(sc->ip); 1978 xrep_inode_flags(sc); 1979 xrep_inode_ids(sc); 1980 /* 1981 * We can now do a better job fixing the size of a directory now that 1982 * we can scan the data fork extents than we could in xrep_dinode_size. 1983 */ 1984 if (S_ISDIR(VFS_I(sc->ip)->i_mode)) 1985 xrep_inode_dir_size(sc); 1986 xrep_inode_extsize(sc); 1987 xrep_inode_cowextsize(sc); 1988 1989 trace_xrep_inode_fixed(sc); 1990 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); 1991 return xrep_roll_trans(sc); 1992 } 1993 1994 /* 1995 * Make sure this inode's unlinked list pointers are consistent with its 1996 * link count. 1997 */ 1998 STATIC int 1999 xrep_inode_unlinked( 2000 struct xfs_scrub *sc) 2001 { 2002 unsigned int nlink = VFS_I(sc->ip)->i_nlink; 2003 int error; 2004 2005 /* 2006 * If this inode is linked from the directory tree and on the unlinked 2007 * list, remove it from the unlinked list. 2008 */ 2009 if (nlink > 0 && xfs_inode_on_unlinked_list(sc->ip)) { 2010 struct xfs_perag *pag; 2011 int error; 2012 2013 pag = xfs_perag_get(sc->mp, 2014 XFS_INO_TO_AGNO(sc->mp, sc->ip->i_ino)); 2015 error = xfs_iunlink_remove(sc->tp, pag, sc->ip); 2016 xfs_perag_put(pag); 2017 if (error) 2018 return error; 2019 } 2020 2021 /* 2022 * If this inode is not linked from the directory tree yet not on the 2023 * unlinked list, put it on the unlinked list. 2024 */ 2025 if (nlink == 0 && !xfs_inode_on_unlinked_list(sc->ip)) { 2026 error = xfs_iunlink(sc->tp, sc->ip); 2027 if (error) 2028 return error; 2029 } 2030 2031 return 0; 2032 } 2033 2034 /* Repair an inode's fields. */ 2035 int 2036 xrep_inode( 2037 struct xfs_scrub *sc) 2038 { 2039 int error = 0; 2040 2041 /* 2042 * No inode? That means we failed the _iget verifiers. Repair all 2043 * the things that the inode verifiers care about, then retry _iget. 2044 */ 2045 if (!sc->ip) { 2046 struct xrep_inode *ri = sc->buf; 2047 2048 ASSERT(ri != NULL); 2049 2050 error = xrep_dinode_problems(ri); 2051 if (error == -EBUSY) { 2052 /* 2053 * Directory scan to recover inode mode encountered a 2054 * busy inode, so we did not continue repairing things. 2055 */ 2056 return 0; 2057 } 2058 if (error) 2059 return error; 2060 2061 /* By this point we had better have a working incore inode. */ 2062 if (!sc->ip) 2063 return -EFSCORRUPTED; 2064 } 2065 2066 xfs_trans_ijoin(sc->tp, sc->ip, 0); 2067 2068 /* If we found corruption of any kind, try to fix it. */ 2069 if ((sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) || 2070 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_XCORRUPT)) { 2071 error = xrep_inode_problems(sc); 2072 if (error) 2073 return error; 2074 } 2075 2076 /* See if we can clear the reflink flag. */ 2077 if (xfs_is_reflink_inode(sc->ip)) { 2078 error = xfs_reflink_clear_inode_flag(sc->ip, &sc->tp); 2079 if (error) 2080 return error; 2081 } 2082 2083 /* Reconnect incore unlinked list */ 2084 error = xrep_inode_unlinked(sc); 2085 if (error) 2086 return error; 2087 2088 return xrep_defer_finish(sc); 2089 } 2090