1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_bit.h" 14 #include "xfs_log_format.h" 15 #include "xfs_trans.h" 16 #include "xfs_inode.h" 17 #include "xfs_alloc.h" 18 #include "xfs_bmap.h" 19 #include "xfs_bmap_btree.h" 20 #include "xfs_rmap.h" 21 #include "xfs_rmap_btree.h" 22 #include "xfs_rtgroup.h" 23 #include "xfs_health.h" 24 #include "scrub/scrub.h" 25 #include "scrub/common.h" 26 #include "scrub/btree.h" 27 #include "scrub/health.h" 28 #include "xfs_ag.h" 29 30 /* Set us up with an inode's bmap. */ 31 int 32 xchk_setup_inode_bmap( 33 struct xfs_scrub *sc) 34 { 35 int error; 36 37 if (xchk_need_intent_drain(sc)) 38 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); 39 40 error = xchk_iget_for_scrubbing(sc); 41 if (error) 42 goto out; 43 44 xchk_ilock(sc, XFS_IOLOCK_EXCL); 45 46 /* 47 * We don't want any ephemeral data/cow fork updates sitting around 48 * while we inspect block mappings, so wait for directio to finish 49 * and flush dirty data if we have delalloc reservations. 50 */ 51 if (S_ISREG(VFS_I(sc->ip)->i_mode) && 52 sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) { 53 struct address_space *mapping = VFS_I(sc->ip)->i_mapping; 54 bool is_repair = xchk_could_repair(sc); 55 56 xchk_ilock(sc, XFS_MMAPLOCK_EXCL); 57 58 /* Break all our leases, we're going to mess with things. */ 59 if (is_repair) { 60 error = xfs_break_layouts(VFS_I(sc->ip), 61 &sc->ilock_flags, BREAK_WRITE); 62 if (error) 63 goto out; 64 } 65 66 inode_dio_wait(VFS_I(sc->ip)); 67 68 /* 69 * Try to flush all incore state to disk before we examine the 70 * space mappings for the data fork. Leave accumulated errors 71 * in the mapping for the writer threads to consume. 72 * 73 * On ENOSPC or EIO writeback errors, we continue into the 74 * extent mapping checks because write failures do not 75 * necessarily imply anything about the correctness of the file 76 * metadata. The metadata and the file data could be on 77 * completely separate devices; a media failure might only 78 * affect a subset of the disk, etc. We can handle delalloc 79 * extents in the scrubber, so leaving them in memory is fine. 80 */ 81 error = filemap_fdatawrite(mapping); 82 if (!error) 83 error = filemap_fdatawait_keep_errors(mapping); 84 if (error && (error != -ENOSPC && error != -EIO)) 85 goto out; 86 87 /* Drop the page cache if we're repairing block mappings. */ 88 if (is_repair) { 89 error = invalidate_inode_pages2( 90 VFS_I(sc->ip)->i_mapping); 91 if (error) 92 goto out; 93 } 94 95 } 96 97 /* Got the inode, lock it and we're ready to go. */ 98 error = xchk_trans_alloc(sc, 0); 99 if (error) 100 goto out; 101 102 error = xchk_ino_dqattach(sc); 103 if (error) 104 goto out; 105 106 xchk_ilock(sc, XFS_ILOCK_EXCL); 107 out: 108 /* scrub teardown will unlock and release the inode */ 109 return error; 110 } 111 112 /* 113 * Inode fork block mapping (BMBT) scrubber. 114 * More complex than the others because we have to scrub 115 * all the extents regardless of whether or not the fork 116 * is in btree format. 117 */ 118 119 struct xchk_bmap_info { 120 struct xfs_scrub *sc; 121 122 /* Incore extent tree cursor */ 123 struct xfs_iext_cursor icur; 124 125 /* Previous fork mapping that we examined */ 126 struct xfs_bmbt_irec prev_rec; 127 128 /* Is this a realtime fork? */ 129 bool is_rt; 130 131 /* May mappings point to shared space? */ 132 bool is_shared; 133 134 /* Was the incore extent tree loaded? */ 135 bool was_loaded; 136 137 /* Which inode fork are we checking? */ 138 int whichfork; 139 }; 140 141 /* Look for a corresponding rmap for this irec. */ 142 static inline bool 143 xchk_bmap_get_rmap( 144 struct xchk_bmap_info *info, 145 struct xfs_bmbt_irec *irec, 146 xfs_agblock_t agbno, 147 uint64_t owner, 148 struct xfs_rmap_irec *rmap) 149 { 150 xfs_fileoff_t offset; 151 unsigned int rflags = 0; 152 int has_rmap; 153 int error; 154 155 if (info->whichfork == XFS_ATTR_FORK) 156 rflags |= XFS_RMAP_ATTR_FORK; 157 if (irec->br_state == XFS_EXT_UNWRITTEN) 158 rflags |= XFS_RMAP_UNWRITTEN; 159 160 /* 161 * CoW staging extents are owned (on disk) by the refcountbt, so 162 * their rmaps do not have offsets. 163 */ 164 if (info->whichfork == XFS_COW_FORK) 165 offset = 0; 166 else 167 offset = irec->br_startoff; 168 169 /* 170 * If the caller thinks this could be a shared bmbt extent (IOWs, 171 * any data fork extent of a reflink inode) then we have to use the 172 * range rmap lookup to make sure we get the correct owner/offset. 173 */ 174 if (info->is_shared) { 175 error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, 176 owner, offset, rflags, rmap, &has_rmap); 177 } else { 178 error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 179 owner, offset, rflags, rmap, &has_rmap); 180 } 181 if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur)) 182 return false; 183 184 if (!has_rmap) 185 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 186 irec->br_startoff); 187 return has_rmap; 188 } 189 190 /* Make sure that we have rmapbt records for this data/attr fork extent. */ 191 STATIC void 192 xchk_bmap_xref_rmap( 193 struct xchk_bmap_info *info, 194 struct xfs_bmbt_irec *irec, 195 xfs_agblock_t agbno) 196 { 197 struct xfs_rmap_irec rmap; 198 unsigned long long rmap_end; 199 uint64_t owner = info->sc->ip->i_ino; 200 201 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) 202 return; 203 204 /* Find the rmap record for this irec. */ 205 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap)) 206 return; 207 208 /* 209 * The rmap must be an exact match for this incore file mapping record, 210 * which may have arisen from multiple ondisk records. 211 */ 212 if (rmap.rm_startblock != agbno) 213 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 214 irec->br_startoff); 215 216 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 217 if (rmap_end != agbno + irec->br_blockcount) 218 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 219 irec->br_startoff); 220 221 /* Check the logical offsets. */ 222 if (rmap.rm_offset != irec->br_startoff) 223 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 224 irec->br_startoff); 225 226 rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount; 227 if (rmap_end != irec->br_startoff + irec->br_blockcount) 228 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 229 irec->br_startoff); 230 231 /* Check the owner */ 232 if (rmap.rm_owner != owner) 233 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 234 irec->br_startoff); 235 236 /* 237 * Check for discrepancies between the unwritten flag in the irec and 238 * the rmap. Note that the (in-memory) CoW fork distinguishes between 239 * unwritten and written extents, but we don't track that in the rmap 240 * records because the blocks are owned (on-disk) by the refcountbt, 241 * which doesn't track unwritten state. 242 */ 243 if (!!(irec->br_state == XFS_EXT_UNWRITTEN) != 244 !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) 245 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 246 irec->br_startoff); 247 248 if (!!(info->whichfork == XFS_ATTR_FORK) != 249 !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) 250 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 251 irec->br_startoff); 252 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 253 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 254 irec->br_startoff); 255 } 256 257 /* Make sure that we have rmapbt records for this COW fork extent. */ 258 STATIC void 259 xchk_bmap_xref_rmap_cow( 260 struct xchk_bmap_info *info, 261 struct xfs_bmbt_irec *irec, 262 xfs_agblock_t agbno) 263 { 264 struct xfs_rmap_irec rmap; 265 unsigned long long rmap_end; 266 uint64_t owner = XFS_RMAP_OWN_COW; 267 268 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) 269 return; 270 271 /* Find the rmap record for this irec. */ 272 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap)) 273 return; 274 275 /* 276 * CoW staging extents are owned by the refcount btree, so the rmap 277 * can start before and end after the physical space allocated to this 278 * mapping. There are no offsets to check. 279 */ 280 if (rmap.rm_startblock > agbno) 281 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 282 irec->br_startoff); 283 284 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 285 if (rmap_end < agbno + irec->br_blockcount) 286 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 287 irec->br_startoff); 288 289 /* Check the owner */ 290 if (rmap.rm_owner != owner) 291 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 292 irec->br_startoff); 293 294 /* 295 * No flags allowed. Note that the (in-memory) CoW fork distinguishes 296 * between unwritten and written extents, but we don't track that in 297 * the rmap records because the blocks are owned (on-disk) by the 298 * refcountbt, which doesn't track unwritten state. 299 */ 300 if (rmap.rm_flags & XFS_RMAP_ATTR_FORK) 301 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 302 irec->br_startoff); 303 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 304 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 305 irec->br_startoff); 306 if (rmap.rm_flags & XFS_RMAP_UNWRITTEN) 307 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 308 irec->br_startoff); 309 } 310 311 /* Cross-reference a single rtdev extent record. */ 312 STATIC void 313 xchk_bmap_rt_iextent_xref( 314 struct xfs_inode *ip, 315 struct xchk_bmap_info *info, 316 struct xfs_bmbt_irec *irec) 317 { 318 int error; 319 320 error = xchk_rtgroup_init_existing(info->sc, 321 xfs_rtb_to_rgno(ip->i_mount, irec->br_startblock), 322 &info->sc->sr); 323 if (!xchk_fblock_process_error(info->sc, info->whichfork, 324 irec->br_startoff, &error)) 325 return; 326 327 xchk_rtgroup_lock(&info->sc->sr, XCHK_RTGLOCK_ALL); 328 xchk_xref_is_used_rt_space(info->sc, irec->br_startblock, 329 irec->br_blockcount); 330 331 xchk_rtgroup_free(info->sc, &info->sc->sr); 332 } 333 334 /* Cross-reference a single datadev extent record. */ 335 STATIC void 336 xchk_bmap_iextent_xref( 337 struct xfs_inode *ip, 338 struct xchk_bmap_info *info, 339 struct xfs_bmbt_irec *irec) 340 { 341 struct xfs_owner_info oinfo; 342 struct xfs_mount *mp = info->sc->mp; 343 xfs_agnumber_t agno; 344 xfs_agblock_t agbno; 345 xfs_extlen_t len; 346 int error; 347 348 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); 349 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); 350 len = irec->br_blockcount; 351 352 error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa); 353 if (!xchk_fblock_process_error(info->sc, info->whichfork, 354 irec->br_startoff, &error)) 355 goto out_free; 356 357 xchk_xref_is_used_space(info->sc, agbno, len); 358 xchk_xref_is_not_inode_chunk(info->sc, agbno, len); 359 switch (info->whichfork) { 360 case XFS_DATA_FORK: 361 xchk_bmap_xref_rmap(info, irec, agbno); 362 if (!xfs_is_reflink_inode(info->sc->ip)) { 363 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, 364 info->whichfork, irec->br_startoff); 365 xchk_xref_is_only_owned_by(info->sc, agbno, 366 irec->br_blockcount, &oinfo); 367 xchk_xref_is_not_shared(info->sc, agbno, 368 irec->br_blockcount); 369 } 370 xchk_xref_is_not_cow_staging(info->sc, agbno, 371 irec->br_blockcount); 372 break; 373 case XFS_ATTR_FORK: 374 xchk_bmap_xref_rmap(info, irec, agbno); 375 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, 376 info->whichfork, irec->br_startoff); 377 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, 378 &oinfo); 379 xchk_xref_is_not_shared(info->sc, agbno, 380 irec->br_blockcount); 381 xchk_xref_is_not_cow_staging(info->sc, agbno, 382 irec->br_blockcount); 383 break; 384 case XFS_COW_FORK: 385 xchk_bmap_xref_rmap_cow(info, irec, agbno); 386 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, 387 &XFS_RMAP_OINFO_COW); 388 xchk_xref_is_cow_staging(info->sc, agbno, 389 irec->br_blockcount); 390 xchk_xref_is_not_shared(info->sc, agbno, 391 irec->br_blockcount); 392 break; 393 } 394 395 out_free: 396 xchk_ag_free(info->sc, &info->sc->sa); 397 } 398 399 /* 400 * Directories and attr forks should never have blocks that can't be addressed 401 * by a xfs_dablk_t. 402 */ 403 STATIC void 404 xchk_bmap_dirattr_extent( 405 struct xfs_inode *ip, 406 struct xchk_bmap_info *info, 407 struct xfs_bmbt_irec *irec) 408 { 409 struct xfs_mount *mp = ip->i_mount; 410 xfs_fileoff_t off; 411 412 if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK) 413 return; 414 415 if (!xfs_verify_dablk(mp, irec->br_startoff)) 416 xchk_fblock_set_corrupt(info->sc, info->whichfork, 417 irec->br_startoff); 418 419 off = irec->br_startoff + irec->br_blockcount - 1; 420 if (!xfs_verify_dablk(mp, off)) 421 xchk_fblock_set_corrupt(info->sc, info->whichfork, off); 422 } 423 424 /* Scrub a single extent record. */ 425 STATIC void 426 xchk_bmap_iextent( 427 struct xfs_inode *ip, 428 struct xchk_bmap_info *info, 429 struct xfs_bmbt_irec *irec) 430 { 431 struct xfs_mount *mp = info->sc->mp; 432 433 /* 434 * Check for out-of-order extents. This record could have come 435 * from the incore list, for which there is no ordering check. 436 */ 437 if (irec->br_startoff < info->prev_rec.br_startoff + 438 info->prev_rec.br_blockcount) 439 xchk_fblock_set_corrupt(info->sc, info->whichfork, 440 irec->br_startoff); 441 442 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 443 xchk_fblock_set_corrupt(info->sc, info->whichfork, 444 irec->br_startoff); 445 446 xchk_bmap_dirattr_extent(ip, info, irec); 447 448 /* Make sure the extent points to a valid place. */ 449 if (info->is_rt && 450 !xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount)) 451 xchk_fblock_set_corrupt(info->sc, info->whichfork, 452 irec->br_startoff); 453 if (!info->is_rt && 454 !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount)) 455 xchk_fblock_set_corrupt(info->sc, info->whichfork, 456 irec->br_startoff); 457 458 /* We don't allow unwritten extents on attr forks. */ 459 if (irec->br_state == XFS_EXT_UNWRITTEN && 460 info->whichfork == XFS_ATTR_FORK) 461 xchk_fblock_set_corrupt(info->sc, info->whichfork, 462 irec->br_startoff); 463 464 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 465 return; 466 467 if (info->is_rt) 468 xchk_bmap_rt_iextent_xref(ip, info, irec); 469 else 470 xchk_bmap_iextent_xref(ip, info, irec); 471 } 472 473 /* Scrub a bmbt record. */ 474 STATIC int 475 xchk_bmapbt_rec( 476 struct xchk_btree *bs, 477 const union xfs_btree_rec *rec) 478 { 479 struct xfs_bmbt_irec irec; 480 struct xfs_bmbt_irec iext_irec; 481 struct xfs_iext_cursor icur; 482 struct xchk_bmap_info *info = bs->private; 483 struct xfs_inode *ip = bs->cur->bc_ino.ip; 484 struct xfs_buf *bp = NULL; 485 struct xfs_btree_block *block; 486 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork); 487 uint64_t owner; 488 int i; 489 490 /* 491 * Check the owners of the btree blocks up to the level below 492 * the root since the verifiers don't do that. 493 */ 494 if (xfs_has_crc(bs->cur->bc_mp) && 495 bs->cur->bc_levels[0].ptr == 1) { 496 for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { 497 block = xfs_btree_get_block(bs->cur, i, &bp); 498 owner = be64_to_cpu(block->bb_u.l.bb_owner); 499 if (owner != ip->i_ino) 500 xchk_fblock_set_corrupt(bs->sc, 501 info->whichfork, 0); 502 } 503 } 504 505 /* 506 * Check that the incore extent tree contains an extent that matches 507 * this one exactly. We validate those cached bmaps later, so we don't 508 * need to check them here. If the incore extent tree was just loaded 509 * from disk by the scrubber, we assume that its contents match what's 510 * on disk (we still hold the ILOCK) and skip the equivalence check. 511 */ 512 if (!info->was_loaded) 513 return 0; 514 515 xfs_bmbt_disk_get_all(&rec->bmbt, &irec); 516 if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) { 517 xchk_fblock_set_corrupt(bs->sc, info->whichfork, 518 irec.br_startoff); 519 return 0; 520 } 521 522 if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur, 523 &iext_irec) || 524 irec.br_startoff != iext_irec.br_startoff || 525 irec.br_startblock != iext_irec.br_startblock || 526 irec.br_blockcount != iext_irec.br_blockcount || 527 irec.br_state != iext_irec.br_state) 528 xchk_fblock_set_corrupt(bs->sc, info->whichfork, 529 irec.br_startoff); 530 return 0; 531 } 532 533 /* Scan the btree records. */ 534 STATIC int 535 xchk_bmap_btree( 536 struct xfs_scrub *sc, 537 int whichfork, 538 struct xchk_bmap_info *info) 539 { 540 struct xfs_owner_info oinfo; 541 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork); 542 struct xfs_mount *mp = sc->mp; 543 struct xfs_inode *ip = sc->ip; 544 struct xfs_btree_cur *cur; 545 int error; 546 547 /* Load the incore bmap cache if it's not loaded. */ 548 info->was_loaded = !xfs_need_iread_extents(ifp); 549 550 error = xfs_iread_extents(sc->tp, ip, whichfork); 551 if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) 552 goto out; 553 554 /* Check the btree structure. */ 555 cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); 556 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 557 error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info); 558 xfs_btree_del_cursor(cur, error); 559 out: 560 return error; 561 } 562 563 struct xchk_bmap_check_rmap_info { 564 struct xfs_scrub *sc; 565 int whichfork; 566 struct xfs_iext_cursor icur; 567 }; 568 569 /* Can we find bmaps that fit this rmap? */ 570 STATIC int 571 xchk_bmap_check_rmap( 572 struct xfs_btree_cur *cur, 573 const struct xfs_rmap_irec *rec, 574 void *priv) 575 { 576 struct xfs_bmbt_irec irec; 577 struct xfs_rmap_irec check_rec; 578 struct xchk_bmap_check_rmap_info *sbcri = priv; 579 struct xfs_ifork *ifp; 580 struct xfs_scrub *sc = sbcri->sc; 581 bool have_map; 582 583 /* Is this even the right fork? */ 584 if (rec->rm_owner != sc->ip->i_ino) 585 return 0; 586 if ((sbcri->whichfork == XFS_ATTR_FORK) ^ 587 !!(rec->rm_flags & XFS_RMAP_ATTR_FORK)) 588 return 0; 589 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) 590 return 0; 591 592 /* Now look up the bmbt record. */ 593 ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork); 594 if (!ifp) { 595 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 596 rec->rm_offset); 597 goto out; 598 } 599 have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset, 600 &sbcri->icur, &irec); 601 if (!have_map) 602 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 603 rec->rm_offset); 604 /* 605 * bmap extent record lengths are constrained to 2^21 blocks in length 606 * because of space constraints in the on-disk metadata structure. 607 * However, rmap extent record lengths are constrained only by AG 608 * length, so we have to loop through the bmbt to make sure that the 609 * entire rmap is covered by bmbt records. 610 */ 611 check_rec = *rec; 612 while (have_map) { 613 if (irec.br_startoff != check_rec.rm_offset) 614 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 615 check_rec.rm_offset); 616 if (irec.br_startblock != 617 xfs_agbno_to_fsb(to_perag(cur->bc_group), 618 check_rec.rm_startblock)) 619 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 620 check_rec.rm_offset); 621 if (irec.br_blockcount > check_rec.rm_blockcount) 622 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 623 check_rec.rm_offset); 624 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 625 break; 626 check_rec.rm_startblock += irec.br_blockcount; 627 check_rec.rm_offset += irec.br_blockcount; 628 check_rec.rm_blockcount -= irec.br_blockcount; 629 if (check_rec.rm_blockcount == 0) 630 break; 631 have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec); 632 if (!have_map) 633 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 634 check_rec.rm_offset); 635 } 636 637 out: 638 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 639 return -ECANCELED; 640 return 0; 641 } 642 643 /* Make sure each rmap has a corresponding bmbt entry. */ 644 STATIC int 645 xchk_bmap_check_ag_rmaps( 646 struct xfs_scrub *sc, 647 int whichfork, 648 struct xfs_perag *pag) 649 { 650 struct xchk_bmap_check_rmap_info sbcri; 651 struct xfs_btree_cur *cur; 652 struct xfs_buf *agf; 653 int error; 654 655 error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf); 656 if (error) 657 return error; 658 659 cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag); 660 661 sbcri.sc = sc; 662 sbcri.whichfork = whichfork; 663 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri); 664 if (error == -ECANCELED) 665 error = 0; 666 667 xfs_btree_del_cursor(cur, error); 668 xfs_trans_brelse(sc->tp, agf); 669 return error; 670 } 671 672 /* 673 * Decide if we want to scan the reverse mappings to determine if the attr 674 * fork /really/ has zero space mappings. 675 */ 676 STATIC bool 677 xchk_bmap_check_empty_attrfork( 678 struct xfs_inode *ip) 679 { 680 struct xfs_ifork *ifp = &ip->i_af; 681 682 /* 683 * If the dinode repair found a bad attr fork, it will reset the fork 684 * to extents format with zero records and wait for the this scrubber 685 * to reconstruct the block mappings. If the fork is not in this 686 * state, then the fork cannot have been zapped. 687 */ 688 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0) 689 return false; 690 691 /* 692 * Files can have an attr fork in EXTENTS format with zero records for 693 * several reasons: 694 * 695 * a) an attr set created a fork but ran out of space 696 * b) attr replace deleted an old attr but failed during the set step 697 * c) the data fork was in btree format when all attrs were deleted, so 698 * the fork was left in place 699 * d) the inode repair code zapped the fork 700 * 701 * Only in case (d) do we want to scan the rmapbt to see if we need to 702 * rebuild the attr fork. The fork zap code clears all DAC permission 703 * bits and zeroes the uid and gid, so avoid the scan if any of those 704 * three conditions are not met. 705 */ 706 if ((VFS_I(ip)->i_mode & 0777) != 0) 707 return false; 708 if (!uid_eq(VFS_I(ip)->i_uid, GLOBAL_ROOT_UID)) 709 return false; 710 if (!gid_eq(VFS_I(ip)->i_gid, GLOBAL_ROOT_GID)) 711 return false; 712 713 return true; 714 } 715 716 /* 717 * Decide if we want to scan the reverse mappings to determine if the data 718 * fork /really/ has zero space mappings. 719 */ 720 STATIC bool 721 xchk_bmap_check_empty_datafork( 722 struct xfs_inode *ip) 723 { 724 struct xfs_ifork *ifp = &ip->i_df; 725 726 /* Don't support realtime rmap checks yet. */ 727 if (XFS_IS_REALTIME_INODE(ip)) 728 return false; 729 730 /* 731 * If the dinode repair found a bad data fork, it will reset the fork 732 * to extents format with zero records and wait for the this scrubber 733 * to reconstruct the block mappings. If the fork is not in this 734 * state, then the fork cannot have been zapped. 735 */ 736 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0) 737 return false; 738 739 /* 740 * If we encounter an empty data fork along with evidence that the fork 741 * might not really be empty, we need to scan the reverse mappings to 742 * decide if we're going to rebuild the fork. Data forks with nonzero 743 * file size are scanned. 744 */ 745 return i_size_read(VFS_I(ip)) != 0; 746 } 747 748 /* 749 * Decide if we want to walk every rmap btree in the fs to make sure that each 750 * rmap for this file fork has corresponding bmbt entries. 751 */ 752 static bool 753 xchk_bmap_want_check_rmaps( 754 struct xchk_bmap_info *info) 755 { 756 struct xfs_scrub *sc = info->sc; 757 758 if (!xfs_has_rmapbt(sc->mp)) 759 return false; 760 if (info->whichfork == XFS_COW_FORK) 761 return false; 762 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 763 return false; 764 765 if (info->whichfork == XFS_ATTR_FORK) 766 return xchk_bmap_check_empty_attrfork(sc->ip); 767 768 return xchk_bmap_check_empty_datafork(sc->ip); 769 } 770 771 /* Make sure each rmap has a corresponding bmbt entry. */ 772 STATIC int 773 xchk_bmap_check_rmaps( 774 struct xfs_scrub *sc, 775 int whichfork) 776 { 777 struct xfs_perag *pag = NULL; 778 int error; 779 780 while ((pag = xfs_perag_next(sc->mp, pag))) { 781 error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag); 782 if (error || 783 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) { 784 xfs_perag_rele(pag); 785 return error; 786 } 787 } 788 789 return 0; 790 } 791 792 /* Scrub a delalloc reservation from the incore extent map tree. */ 793 STATIC void 794 xchk_bmap_iextent_delalloc( 795 struct xfs_inode *ip, 796 struct xchk_bmap_info *info, 797 struct xfs_bmbt_irec *irec) 798 { 799 struct xfs_mount *mp = info->sc->mp; 800 801 /* 802 * Check for out-of-order extents. This record could have come 803 * from the incore list, for which there is no ordering check. 804 */ 805 if (irec->br_startoff < info->prev_rec.br_startoff + 806 info->prev_rec.br_blockcount) 807 xchk_fblock_set_corrupt(info->sc, info->whichfork, 808 irec->br_startoff); 809 810 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 811 xchk_fblock_set_corrupt(info->sc, info->whichfork, 812 irec->br_startoff); 813 814 /* Make sure the extent points to a valid place. */ 815 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) 816 xchk_fblock_set_corrupt(info->sc, info->whichfork, 817 irec->br_startoff); 818 } 819 820 /* Decide if this individual fork mapping is ok. */ 821 static bool 822 xchk_bmap_iext_mapping( 823 struct xchk_bmap_info *info, 824 const struct xfs_bmbt_irec *irec) 825 { 826 /* There should never be a "hole" extent in either extent list. */ 827 if (irec->br_startblock == HOLESTARTBLOCK) 828 return false; 829 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) 830 return false; 831 return true; 832 } 833 834 /* Are these two mappings contiguous with each other? */ 835 static inline bool 836 xchk_are_bmaps_contiguous( 837 const struct xchk_bmap_info *info, 838 const struct xfs_bmbt_irec *b1, 839 const struct xfs_bmbt_irec *b2) 840 { 841 struct xfs_mount *mp = info->sc->mp; 842 843 /* Don't try to combine unallocated mappings. */ 844 if (!xfs_bmap_is_real_extent(b1)) 845 return false; 846 if (!xfs_bmap_is_real_extent(b2)) 847 return false; 848 849 /* Does b2 come right after b1 in the logical and physical range? */ 850 if (b1->br_startoff + b1->br_blockcount != b2->br_startoff) 851 return false; 852 if (b1->br_startblock + b1->br_blockcount != b2->br_startblock) 853 return false; 854 if (b1->br_state != b2->br_state) 855 return false; 856 857 /* 858 * Don't combine bmaps that would cross rtgroup boundaries. This is a 859 * valid state, but if combined they will fail rtb extent checks. 860 */ 861 if (info->is_rt && xfs_has_rtgroups(mp)) { 862 if (xfs_rtb_to_rgno(mp, b1->br_startblock) != 863 xfs_rtb_to_rgno(mp, b2->br_startblock)) 864 return false; 865 } 866 867 return true; 868 } 869 870 /* 871 * Walk the incore extent records, accumulating consecutive contiguous records 872 * into a single incore mapping. Returns true if @irec has been set to a 873 * mapping or false if there are no more mappings. Caller must ensure that 874 * @info.icur is zeroed before the first call. 875 */ 876 static bool 877 xchk_bmap_iext_iter( 878 struct xchk_bmap_info *info, 879 struct xfs_bmbt_irec *irec) 880 { 881 struct xfs_bmbt_irec got; 882 struct xfs_ifork *ifp; 883 unsigned int nr = 0; 884 885 ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork); 886 887 /* Advance to the next iextent record and check the mapping. */ 888 xfs_iext_next(ifp, &info->icur); 889 if (!xfs_iext_get_extent(ifp, &info->icur, irec)) 890 return false; 891 892 if (!xchk_bmap_iext_mapping(info, irec)) { 893 xchk_fblock_set_corrupt(info->sc, info->whichfork, 894 irec->br_startoff); 895 return false; 896 } 897 nr++; 898 899 /* 900 * Iterate subsequent iextent records and merge them with the one 901 * that we just read, if possible. 902 */ 903 while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) { 904 if (!xchk_are_bmaps_contiguous(info, irec, &got)) 905 break; 906 907 if (!xchk_bmap_iext_mapping(info, &got)) { 908 xchk_fblock_set_corrupt(info->sc, info->whichfork, 909 got.br_startoff); 910 return false; 911 } 912 nr++; 913 914 irec->br_blockcount += got.br_blockcount; 915 xfs_iext_next(ifp, &info->icur); 916 } 917 918 /* 919 * If the merged mapping could be expressed with fewer bmbt records 920 * than we actually found, notify the user that this fork could be 921 * optimized. CoW forks only exist in memory so we ignore them. 922 */ 923 if (nr > 1 && info->whichfork != XFS_COW_FORK && 924 howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr) 925 xchk_ino_set_preen(info->sc, info->sc->ip->i_ino); 926 927 return true; 928 } 929 930 /* 931 * Scrub an inode fork's block mappings. 932 * 933 * First we scan every record in every btree block, if applicable. 934 * Then we unconditionally scan the incore extent cache. 935 */ 936 STATIC int 937 xchk_bmap( 938 struct xfs_scrub *sc, 939 int whichfork) 940 { 941 struct xfs_bmbt_irec irec; 942 struct xchk_bmap_info info = { NULL }; 943 struct xfs_mount *mp = sc->mp; 944 struct xfs_inode *ip = sc->ip; 945 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 946 xfs_fileoff_t endoff; 947 int error = 0; 948 949 /* Non-existent forks can be ignored. */ 950 if (!ifp) 951 return -ENOENT; 952 953 info.is_rt = xfs_ifork_is_realtime(ip, whichfork); 954 info.whichfork = whichfork; 955 info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip); 956 info.sc = sc; 957 958 switch (whichfork) { 959 case XFS_COW_FORK: 960 /* No CoW forks on non-reflink filesystems. */ 961 if (!xfs_has_reflink(mp)) { 962 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 963 return 0; 964 } 965 break; 966 case XFS_ATTR_FORK: 967 /* 968 * "attr" means that an attr fork was created at some point in 969 * the life of this filesystem. "attr2" means that inodes have 970 * variable-sized data/attr fork areas. Hence we only check 971 * attr here. 972 */ 973 if (!xfs_has_attr(mp)) 974 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 975 break; 976 default: 977 ASSERT(whichfork == XFS_DATA_FORK); 978 break; 979 } 980 981 /* Check the fork values */ 982 switch (ifp->if_format) { 983 case XFS_DINODE_FMT_UUID: 984 case XFS_DINODE_FMT_DEV: 985 case XFS_DINODE_FMT_LOCAL: 986 /* No mappings to check. */ 987 if (whichfork == XFS_COW_FORK) 988 xchk_fblock_set_corrupt(sc, whichfork, 0); 989 return 0; 990 case XFS_DINODE_FMT_EXTENTS: 991 break; 992 case XFS_DINODE_FMT_BTREE: 993 if (whichfork == XFS_COW_FORK) { 994 xchk_fblock_set_corrupt(sc, whichfork, 0); 995 return 0; 996 } 997 998 error = xchk_bmap_btree(sc, whichfork, &info); 999 if (error) 1000 return error; 1001 break; 1002 default: 1003 xchk_fblock_set_corrupt(sc, whichfork, 0); 1004 return 0; 1005 } 1006 1007 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 1008 return 0; 1009 1010 /* Find the offset of the last extent in the mapping. */ 1011 error = xfs_bmap_last_offset(ip, &endoff, whichfork); 1012 if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) 1013 return error; 1014 1015 /* 1016 * Scrub extent records. We use a special iterator function here that 1017 * combines adjacent mappings if they are logically and physically 1018 * contiguous. For large allocations that require multiple bmbt 1019 * records, this reduces the number of cross-referencing calls, which 1020 * reduces runtime. Cross referencing with the rmap is simpler because 1021 * the rmap must match the combined mapping exactly. 1022 */ 1023 while (xchk_bmap_iext_iter(&info, &irec)) { 1024 if (xchk_should_terminate(sc, &error) || 1025 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 1026 return 0; 1027 1028 if (irec.br_startoff >= endoff) { 1029 xchk_fblock_set_corrupt(sc, whichfork, 1030 irec.br_startoff); 1031 return 0; 1032 } 1033 1034 if (isnullstartblock(irec.br_startblock)) 1035 xchk_bmap_iextent_delalloc(ip, &info, &irec); 1036 else 1037 xchk_bmap_iextent(ip, &info, &irec); 1038 memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec)); 1039 } 1040 1041 if (xchk_bmap_want_check_rmaps(&info)) { 1042 error = xchk_bmap_check_rmaps(sc, whichfork); 1043 if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error)) 1044 return error; 1045 } 1046 1047 return 0; 1048 } 1049 1050 /* Scrub an inode's data fork. */ 1051 int 1052 xchk_bmap_data( 1053 struct xfs_scrub *sc) 1054 { 1055 int error; 1056 1057 if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTD_ZAPPED)) { 1058 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1059 return 0; 1060 } 1061 1062 error = xchk_bmap(sc, XFS_DATA_FORK); 1063 if (error) 1064 return error; 1065 1066 /* If the data fork is clean, it is clearly not zapped. */ 1067 xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTD_ZAPPED); 1068 return 0; 1069 } 1070 1071 /* Scrub an inode's attr fork. */ 1072 int 1073 xchk_bmap_attr( 1074 struct xfs_scrub *sc) 1075 { 1076 int error; 1077 1078 /* 1079 * If the attr fork has been zapped, it's possible that forkoff was 1080 * reset to zero and hence sc->ip->i_afp is NULL. We don't want the 1081 * NULL ifp check in xchk_bmap to conclude that the attr fork is ok, 1082 * so short circuit that logic by setting the corruption flag and 1083 * returning immediately. 1084 */ 1085 if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTA_ZAPPED)) { 1086 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1087 return 0; 1088 } 1089 1090 error = xchk_bmap(sc, XFS_ATTR_FORK); 1091 if (error) 1092 return error; 1093 1094 /* If the attr fork is clean, it is clearly not zapped. */ 1095 xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTA_ZAPPED); 1096 return 0; 1097 } 1098 1099 /* Scrub an inode's CoW fork. */ 1100 int 1101 xchk_bmap_cow( 1102 struct xfs_scrub *sc) 1103 { 1104 return xchk_bmap(sc, XFS_COW_FORK); 1105 } 1106