1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_bit.h" 14 #include "xfs_log_format.h" 15 #include "xfs_trans.h" 16 #include "xfs_inode.h" 17 #include "xfs_alloc.h" 18 #include "xfs_bmap.h" 19 #include "xfs_bmap_btree.h" 20 #include "xfs_rmap.h" 21 #include "xfs_rmap_btree.h" 22 #include "xfs_health.h" 23 #include "scrub/scrub.h" 24 #include "scrub/common.h" 25 #include "scrub/btree.h" 26 #include "scrub/health.h" 27 #include "xfs_ag.h" 28 29 /* Set us up with an inode's bmap. */ 30 int 31 xchk_setup_inode_bmap( 32 struct xfs_scrub *sc) 33 { 34 int error; 35 36 if (xchk_need_intent_drain(sc)) 37 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); 38 39 error = xchk_iget_for_scrubbing(sc); 40 if (error) 41 goto out; 42 43 xchk_ilock(sc, XFS_IOLOCK_EXCL); 44 45 /* 46 * We don't want any ephemeral data/cow fork updates sitting around 47 * while we inspect block mappings, so wait for directio to finish 48 * and flush dirty data if we have delalloc reservations. 49 */ 50 if (S_ISREG(VFS_I(sc->ip)->i_mode) && 51 sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) { 52 struct address_space *mapping = VFS_I(sc->ip)->i_mapping; 53 bool is_repair = xchk_could_repair(sc); 54 55 xchk_ilock(sc, XFS_MMAPLOCK_EXCL); 56 57 /* Break all our leases, we're going to mess with things. */ 58 if (is_repair) { 59 error = xfs_break_layouts(VFS_I(sc->ip), 60 &sc->ilock_flags, BREAK_WRITE); 61 if (error) 62 goto out; 63 } 64 65 inode_dio_wait(VFS_I(sc->ip)); 66 67 /* 68 * Try to flush all incore state to disk before we examine the 69 * space mappings for the data fork. Leave accumulated errors 70 * in the mapping for the writer threads to consume. 71 * 72 * On ENOSPC or EIO writeback errors, we continue into the 73 * extent mapping checks because write failures do not 74 * necessarily imply anything about the correctness of the file 75 * metadata. The metadata and the file data could be on 76 * completely separate devices; a media failure might only 77 * affect a subset of the disk, etc. We can handle delalloc 78 * extents in the scrubber, so leaving them in memory is fine. 79 */ 80 error = filemap_fdatawrite(mapping); 81 if (!error) 82 error = filemap_fdatawait_keep_errors(mapping); 83 if (error && (error != -ENOSPC && error != -EIO)) 84 goto out; 85 86 /* Drop the page cache if we're repairing block mappings. */ 87 if (is_repair) { 88 error = invalidate_inode_pages2( 89 VFS_I(sc->ip)->i_mapping); 90 if (error) 91 goto out; 92 } 93 94 } 95 96 /* Got the inode, lock it and we're ready to go. */ 97 error = xchk_trans_alloc(sc, 0); 98 if (error) 99 goto out; 100 101 error = xchk_ino_dqattach(sc); 102 if (error) 103 goto out; 104 105 xchk_ilock(sc, XFS_ILOCK_EXCL); 106 out: 107 /* scrub teardown will unlock and release the inode */ 108 return error; 109 } 110 111 /* 112 * Inode fork block mapping (BMBT) scrubber. 113 * More complex than the others because we have to scrub 114 * all the extents regardless of whether or not the fork 115 * is in btree format. 116 */ 117 118 struct xchk_bmap_info { 119 struct xfs_scrub *sc; 120 121 /* Incore extent tree cursor */ 122 struct xfs_iext_cursor icur; 123 124 /* Previous fork mapping that we examined */ 125 struct xfs_bmbt_irec prev_rec; 126 127 /* Is this a realtime fork? */ 128 bool is_rt; 129 130 /* May mappings point to shared space? */ 131 bool is_shared; 132 133 /* Was the incore extent tree loaded? */ 134 bool was_loaded; 135 136 /* Which inode fork are we checking? */ 137 int whichfork; 138 }; 139 140 /* Look for a corresponding rmap for this irec. */ 141 static inline bool 142 xchk_bmap_get_rmap( 143 struct xchk_bmap_info *info, 144 struct xfs_bmbt_irec *irec, 145 xfs_agblock_t agbno, 146 uint64_t owner, 147 struct xfs_rmap_irec *rmap) 148 { 149 xfs_fileoff_t offset; 150 unsigned int rflags = 0; 151 int has_rmap; 152 int error; 153 154 if (info->whichfork == XFS_ATTR_FORK) 155 rflags |= XFS_RMAP_ATTR_FORK; 156 if (irec->br_state == XFS_EXT_UNWRITTEN) 157 rflags |= XFS_RMAP_UNWRITTEN; 158 159 /* 160 * CoW staging extents are owned (on disk) by the refcountbt, so 161 * their rmaps do not have offsets. 162 */ 163 if (info->whichfork == XFS_COW_FORK) 164 offset = 0; 165 else 166 offset = irec->br_startoff; 167 168 /* 169 * If the caller thinks this could be a shared bmbt extent (IOWs, 170 * any data fork extent of a reflink inode) then we have to use the 171 * range rmap lookup to make sure we get the correct owner/offset. 172 */ 173 if (info->is_shared) { 174 error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, 175 owner, offset, rflags, rmap, &has_rmap); 176 } else { 177 error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 178 owner, offset, rflags, rmap, &has_rmap); 179 } 180 if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur)) 181 return false; 182 183 if (!has_rmap) 184 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 185 irec->br_startoff); 186 return has_rmap; 187 } 188 189 /* Make sure that we have rmapbt records for this data/attr fork extent. */ 190 STATIC void 191 xchk_bmap_xref_rmap( 192 struct xchk_bmap_info *info, 193 struct xfs_bmbt_irec *irec, 194 xfs_agblock_t agbno) 195 { 196 struct xfs_rmap_irec rmap; 197 unsigned long long rmap_end; 198 uint64_t owner = info->sc->ip->i_ino; 199 200 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) 201 return; 202 203 /* Find the rmap record for this irec. */ 204 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap)) 205 return; 206 207 /* 208 * The rmap must be an exact match for this incore file mapping record, 209 * which may have arisen from multiple ondisk records. 210 */ 211 if (rmap.rm_startblock != agbno) 212 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 213 irec->br_startoff); 214 215 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 216 if (rmap_end != agbno + irec->br_blockcount) 217 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 218 irec->br_startoff); 219 220 /* Check the logical offsets. */ 221 if (rmap.rm_offset != irec->br_startoff) 222 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 223 irec->br_startoff); 224 225 rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount; 226 if (rmap_end != irec->br_startoff + irec->br_blockcount) 227 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 228 irec->br_startoff); 229 230 /* Check the owner */ 231 if (rmap.rm_owner != owner) 232 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 233 irec->br_startoff); 234 235 /* 236 * Check for discrepancies between the unwritten flag in the irec and 237 * the rmap. Note that the (in-memory) CoW fork distinguishes between 238 * unwritten and written extents, but we don't track that in the rmap 239 * records because the blocks are owned (on-disk) by the refcountbt, 240 * which doesn't track unwritten state. 241 */ 242 if (!!(irec->br_state == XFS_EXT_UNWRITTEN) != 243 !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) 244 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 245 irec->br_startoff); 246 247 if (!!(info->whichfork == XFS_ATTR_FORK) != 248 !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) 249 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 250 irec->br_startoff); 251 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 252 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 253 irec->br_startoff); 254 } 255 256 /* Make sure that we have rmapbt records for this COW fork extent. */ 257 STATIC void 258 xchk_bmap_xref_rmap_cow( 259 struct xchk_bmap_info *info, 260 struct xfs_bmbt_irec *irec, 261 xfs_agblock_t agbno) 262 { 263 struct xfs_rmap_irec rmap; 264 unsigned long long rmap_end; 265 uint64_t owner = XFS_RMAP_OWN_COW; 266 267 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) 268 return; 269 270 /* Find the rmap record for this irec. */ 271 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap)) 272 return; 273 274 /* 275 * CoW staging extents are owned by the refcount btree, so the rmap 276 * can start before and end after the physical space allocated to this 277 * mapping. There are no offsets to check. 278 */ 279 if (rmap.rm_startblock > agbno) 280 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 281 irec->br_startoff); 282 283 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 284 if (rmap_end < agbno + irec->br_blockcount) 285 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 286 irec->br_startoff); 287 288 /* Check the owner */ 289 if (rmap.rm_owner != owner) 290 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 291 irec->br_startoff); 292 293 /* 294 * No flags allowed. Note that the (in-memory) CoW fork distinguishes 295 * between unwritten and written extents, but we don't track that in 296 * the rmap records because the blocks are owned (on-disk) by the 297 * refcountbt, which doesn't track unwritten state. 298 */ 299 if (rmap.rm_flags & XFS_RMAP_ATTR_FORK) 300 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 301 irec->br_startoff); 302 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 303 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 304 irec->br_startoff); 305 if (rmap.rm_flags & XFS_RMAP_UNWRITTEN) 306 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 307 irec->br_startoff); 308 } 309 310 /* Cross-reference a single rtdev extent record. */ 311 STATIC void 312 xchk_bmap_rt_iextent_xref( 313 struct xfs_inode *ip, 314 struct xchk_bmap_info *info, 315 struct xfs_bmbt_irec *irec) 316 { 317 xchk_xref_is_used_rt_space(info->sc, irec->br_startblock, 318 irec->br_blockcount); 319 } 320 321 /* Cross-reference a single datadev extent record. */ 322 STATIC void 323 xchk_bmap_iextent_xref( 324 struct xfs_inode *ip, 325 struct xchk_bmap_info *info, 326 struct xfs_bmbt_irec *irec) 327 { 328 struct xfs_owner_info oinfo; 329 struct xfs_mount *mp = info->sc->mp; 330 xfs_agnumber_t agno; 331 xfs_agblock_t agbno; 332 xfs_extlen_t len; 333 int error; 334 335 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); 336 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); 337 len = irec->br_blockcount; 338 339 error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa); 340 if (!xchk_fblock_process_error(info->sc, info->whichfork, 341 irec->br_startoff, &error)) 342 goto out_free; 343 344 xchk_xref_is_used_space(info->sc, agbno, len); 345 xchk_xref_is_not_inode_chunk(info->sc, agbno, len); 346 switch (info->whichfork) { 347 case XFS_DATA_FORK: 348 xchk_bmap_xref_rmap(info, irec, agbno); 349 if (!xfs_is_reflink_inode(info->sc->ip)) { 350 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, 351 info->whichfork, irec->br_startoff); 352 xchk_xref_is_only_owned_by(info->sc, agbno, 353 irec->br_blockcount, &oinfo); 354 xchk_xref_is_not_shared(info->sc, agbno, 355 irec->br_blockcount); 356 } 357 xchk_xref_is_not_cow_staging(info->sc, agbno, 358 irec->br_blockcount); 359 break; 360 case XFS_ATTR_FORK: 361 xchk_bmap_xref_rmap(info, irec, agbno); 362 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, 363 info->whichfork, irec->br_startoff); 364 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, 365 &oinfo); 366 xchk_xref_is_not_shared(info->sc, agbno, 367 irec->br_blockcount); 368 xchk_xref_is_not_cow_staging(info->sc, agbno, 369 irec->br_blockcount); 370 break; 371 case XFS_COW_FORK: 372 xchk_bmap_xref_rmap_cow(info, irec, agbno); 373 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, 374 &XFS_RMAP_OINFO_COW); 375 xchk_xref_is_cow_staging(info->sc, agbno, 376 irec->br_blockcount); 377 xchk_xref_is_not_shared(info->sc, agbno, 378 irec->br_blockcount); 379 break; 380 } 381 382 out_free: 383 xchk_ag_free(info->sc, &info->sc->sa); 384 } 385 386 /* 387 * Directories and attr forks should never have blocks that can't be addressed 388 * by a xfs_dablk_t. 389 */ 390 STATIC void 391 xchk_bmap_dirattr_extent( 392 struct xfs_inode *ip, 393 struct xchk_bmap_info *info, 394 struct xfs_bmbt_irec *irec) 395 { 396 struct xfs_mount *mp = ip->i_mount; 397 xfs_fileoff_t off; 398 399 if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK) 400 return; 401 402 if (!xfs_verify_dablk(mp, irec->br_startoff)) 403 xchk_fblock_set_corrupt(info->sc, info->whichfork, 404 irec->br_startoff); 405 406 off = irec->br_startoff + irec->br_blockcount - 1; 407 if (!xfs_verify_dablk(mp, off)) 408 xchk_fblock_set_corrupt(info->sc, info->whichfork, off); 409 } 410 411 /* Scrub a single extent record. */ 412 STATIC void 413 xchk_bmap_iextent( 414 struct xfs_inode *ip, 415 struct xchk_bmap_info *info, 416 struct xfs_bmbt_irec *irec) 417 { 418 struct xfs_mount *mp = info->sc->mp; 419 420 /* 421 * Check for out-of-order extents. This record could have come 422 * from the incore list, for which there is no ordering check. 423 */ 424 if (irec->br_startoff < info->prev_rec.br_startoff + 425 info->prev_rec.br_blockcount) 426 xchk_fblock_set_corrupt(info->sc, info->whichfork, 427 irec->br_startoff); 428 429 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 430 xchk_fblock_set_corrupt(info->sc, info->whichfork, 431 irec->br_startoff); 432 433 xchk_bmap_dirattr_extent(ip, info, irec); 434 435 /* Make sure the extent points to a valid place. */ 436 if (info->is_rt && 437 !xfs_verify_rtbext(mp, irec->br_startblock, irec->br_blockcount)) 438 xchk_fblock_set_corrupt(info->sc, info->whichfork, 439 irec->br_startoff); 440 if (!info->is_rt && 441 !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount)) 442 xchk_fblock_set_corrupt(info->sc, info->whichfork, 443 irec->br_startoff); 444 445 /* We don't allow unwritten extents on attr forks. */ 446 if (irec->br_state == XFS_EXT_UNWRITTEN && 447 info->whichfork == XFS_ATTR_FORK) 448 xchk_fblock_set_corrupt(info->sc, info->whichfork, 449 irec->br_startoff); 450 451 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 452 return; 453 454 if (info->is_rt) 455 xchk_bmap_rt_iextent_xref(ip, info, irec); 456 else 457 xchk_bmap_iextent_xref(ip, info, irec); 458 } 459 460 /* Scrub a bmbt record. */ 461 STATIC int 462 xchk_bmapbt_rec( 463 struct xchk_btree *bs, 464 const union xfs_btree_rec *rec) 465 { 466 struct xfs_bmbt_irec irec; 467 struct xfs_bmbt_irec iext_irec; 468 struct xfs_iext_cursor icur; 469 struct xchk_bmap_info *info = bs->private; 470 struct xfs_inode *ip = bs->cur->bc_ino.ip; 471 struct xfs_buf *bp = NULL; 472 struct xfs_btree_block *block; 473 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork); 474 uint64_t owner; 475 int i; 476 477 /* 478 * Check the owners of the btree blocks up to the level below 479 * the root since the verifiers don't do that. 480 */ 481 if (xfs_has_crc(bs->cur->bc_mp) && 482 bs->cur->bc_levels[0].ptr == 1) { 483 for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { 484 block = xfs_btree_get_block(bs->cur, i, &bp); 485 owner = be64_to_cpu(block->bb_u.l.bb_owner); 486 if (owner != ip->i_ino) 487 xchk_fblock_set_corrupt(bs->sc, 488 info->whichfork, 0); 489 } 490 } 491 492 /* 493 * Check that the incore extent tree contains an extent that matches 494 * this one exactly. We validate those cached bmaps later, so we don't 495 * need to check them here. If the incore extent tree was just loaded 496 * from disk by the scrubber, we assume that its contents match what's 497 * on disk (we still hold the ILOCK) and skip the equivalence check. 498 */ 499 if (!info->was_loaded) 500 return 0; 501 502 xfs_bmbt_disk_get_all(&rec->bmbt, &irec); 503 if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) { 504 xchk_fblock_set_corrupt(bs->sc, info->whichfork, 505 irec.br_startoff); 506 return 0; 507 } 508 509 if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur, 510 &iext_irec) || 511 irec.br_startoff != iext_irec.br_startoff || 512 irec.br_startblock != iext_irec.br_startblock || 513 irec.br_blockcount != iext_irec.br_blockcount || 514 irec.br_state != iext_irec.br_state) 515 xchk_fblock_set_corrupt(bs->sc, info->whichfork, 516 irec.br_startoff); 517 return 0; 518 } 519 520 /* Scan the btree records. */ 521 STATIC int 522 xchk_bmap_btree( 523 struct xfs_scrub *sc, 524 int whichfork, 525 struct xchk_bmap_info *info) 526 { 527 struct xfs_owner_info oinfo; 528 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork); 529 struct xfs_mount *mp = sc->mp; 530 struct xfs_inode *ip = sc->ip; 531 struct xfs_btree_cur *cur; 532 int error; 533 534 /* Load the incore bmap cache if it's not loaded. */ 535 info->was_loaded = !xfs_need_iread_extents(ifp); 536 537 error = xfs_iread_extents(sc->tp, ip, whichfork); 538 if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) 539 goto out; 540 541 /* Check the btree structure. */ 542 cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); 543 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 544 error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info); 545 xfs_btree_del_cursor(cur, error); 546 out: 547 return error; 548 } 549 550 struct xchk_bmap_check_rmap_info { 551 struct xfs_scrub *sc; 552 int whichfork; 553 struct xfs_iext_cursor icur; 554 }; 555 556 /* Can we find bmaps that fit this rmap? */ 557 STATIC int 558 xchk_bmap_check_rmap( 559 struct xfs_btree_cur *cur, 560 const struct xfs_rmap_irec *rec, 561 void *priv) 562 { 563 struct xfs_bmbt_irec irec; 564 struct xfs_rmap_irec check_rec; 565 struct xchk_bmap_check_rmap_info *sbcri = priv; 566 struct xfs_ifork *ifp; 567 struct xfs_scrub *sc = sbcri->sc; 568 bool have_map; 569 570 /* Is this even the right fork? */ 571 if (rec->rm_owner != sc->ip->i_ino) 572 return 0; 573 if ((sbcri->whichfork == XFS_ATTR_FORK) ^ 574 !!(rec->rm_flags & XFS_RMAP_ATTR_FORK)) 575 return 0; 576 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) 577 return 0; 578 579 /* Now look up the bmbt record. */ 580 ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork); 581 if (!ifp) { 582 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 583 rec->rm_offset); 584 goto out; 585 } 586 have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset, 587 &sbcri->icur, &irec); 588 if (!have_map) 589 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 590 rec->rm_offset); 591 /* 592 * bmap extent record lengths are constrained to 2^21 blocks in length 593 * because of space constraints in the on-disk metadata structure. 594 * However, rmap extent record lengths are constrained only by AG 595 * length, so we have to loop through the bmbt to make sure that the 596 * entire rmap is covered by bmbt records. 597 */ 598 check_rec = *rec; 599 while (have_map) { 600 if (irec.br_startoff != check_rec.rm_offset) 601 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 602 check_rec.rm_offset); 603 if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp, 604 cur->bc_ag.pag->pag_agno, 605 check_rec.rm_startblock)) 606 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 607 check_rec.rm_offset); 608 if (irec.br_blockcount > check_rec.rm_blockcount) 609 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 610 check_rec.rm_offset); 611 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 612 break; 613 check_rec.rm_startblock += irec.br_blockcount; 614 check_rec.rm_offset += irec.br_blockcount; 615 check_rec.rm_blockcount -= irec.br_blockcount; 616 if (check_rec.rm_blockcount == 0) 617 break; 618 have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec); 619 if (!have_map) 620 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 621 check_rec.rm_offset); 622 } 623 624 out: 625 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 626 return -ECANCELED; 627 return 0; 628 } 629 630 /* Make sure each rmap has a corresponding bmbt entry. */ 631 STATIC int 632 xchk_bmap_check_ag_rmaps( 633 struct xfs_scrub *sc, 634 int whichfork, 635 struct xfs_perag *pag) 636 { 637 struct xchk_bmap_check_rmap_info sbcri; 638 struct xfs_btree_cur *cur; 639 struct xfs_buf *agf; 640 int error; 641 642 error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf); 643 if (error) 644 return error; 645 646 cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag); 647 648 sbcri.sc = sc; 649 sbcri.whichfork = whichfork; 650 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri); 651 if (error == -ECANCELED) 652 error = 0; 653 654 xfs_btree_del_cursor(cur, error); 655 xfs_trans_brelse(sc->tp, agf); 656 return error; 657 } 658 659 /* 660 * Decide if we want to scan the reverse mappings to determine if the attr 661 * fork /really/ has zero space mappings. 662 */ 663 STATIC bool 664 xchk_bmap_check_empty_attrfork( 665 struct xfs_inode *ip) 666 { 667 struct xfs_ifork *ifp = &ip->i_af; 668 669 /* 670 * If the dinode repair found a bad attr fork, it will reset the fork 671 * to extents format with zero records and wait for the this scrubber 672 * to reconstruct the block mappings. If the fork is not in this 673 * state, then the fork cannot have been zapped. 674 */ 675 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0) 676 return false; 677 678 /* 679 * Files can have an attr fork in EXTENTS format with zero records for 680 * several reasons: 681 * 682 * a) an attr set created a fork but ran out of space 683 * b) attr replace deleted an old attr but failed during the set step 684 * c) the data fork was in btree format when all attrs were deleted, so 685 * the fork was left in place 686 * d) the inode repair code zapped the fork 687 * 688 * Only in case (d) do we want to scan the rmapbt to see if we need to 689 * rebuild the attr fork. The fork zap code clears all DAC permission 690 * bits and zeroes the uid and gid, so avoid the scan if any of those 691 * three conditions are not met. 692 */ 693 if ((VFS_I(ip)->i_mode & 0777) != 0) 694 return false; 695 if (!uid_eq(VFS_I(ip)->i_uid, GLOBAL_ROOT_UID)) 696 return false; 697 if (!gid_eq(VFS_I(ip)->i_gid, GLOBAL_ROOT_GID)) 698 return false; 699 700 return true; 701 } 702 703 /* 704 * Decide if we want to scan the reverse mappings to determine if the data 705 * fork /really/ has zero space mappings. 706 */ 707 STATIC bool 708 xchk_bmap_check_empty_datafork( 709 struct xfs_inode *ip) 710 { 711 struct xfs_ifork *ifp = &ip->i_df; 712 713 /* Don't support realtime rmap checks yet. */ 714 if (XFS_IS_REALTIME_INODE(ip)) 715 return false; 716 717 /* 718 * If the dinode repair found a bad data fork, it will reset the fork 719 * to extents format with zero records and wait for the this scrubber 720 * to reconstruct the block mappings. If the fork is not in this 721 * state, then the fork cannot have been zapped. 722 */ 723 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS || ifp->if_nextents != 0) 724 return false; 725 726 /* 727 * If we encounter an empty data fork along with evidence that the fork 728 * might not really be empty, we need to scan the reverse mappings to 729 * decide if we're going to rebuild the fork. Data forks with nonzero 730 * file size are scanned. 731 */ 732 return i_size_read(VFS_I(ip)) != 0; 733 } 734 735 /* 736 * Decide if we want to walk every rmap btree in the fs to make sure that each 737 * rmap for this file fork has corresponding bmbt entries. 738 */ 739 static bool 740 xchk_bmap_want_check_rmaps( 741 struct xchk_bmap_info *info) 742 { 743 struct xfs_scrub *sc = info->sc; 744 745 if (!xfs_has_rmapbt(sc->mp)) 746 return false; 747 if (info->whichfork == XFS_COW_FORK) 748 return false; 749 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 750 return false; 751 752 if (info->whichfork == XFS_ATTR_FORK) 753 return xchk_bmap_check_empty_attrfork(sc->ip); 754 755 return xchk_bmap_check_empty_datafork(sc->ip); 756 } 757 758 /* Make sure each rmap has a corresponding bmbt entry. */ 759 STATIC int 760 xchk_bmap_check_rmaps( 761 struct xfs_scrub *sc, 762 int whichfork) 763 { 764 struct xfs_perag *pag; 765 xfs_agnumber_t agno; 766 int error; 767 768 for_each_perag(sc->mp, agno, pag) { 769 error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag); 770 if (error || 771 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) { 772 xfs_perag_rele(pag); 773 return error; 774 } 775 } 776 777 return 0; 778 } 779 780 /* Scrub a delalloc reservation from the incore extent map tree. */ 781 STATIC void 782 xchk_bmap_iextent_delalloc( 783 struct xfs_inode *ip, 784 struct xchk_bmap_info *info, 785 struct xfs_bmbt_irec *irec) 786 { 787 struct xfs_mount *mp = info->sc->mp; 788 789 /* 790 * Check for out-of-order extents. This record could have come 791 * from the incore list, for which there is no ordering check. 792 */ 793 if (irec->br_startoff < info->prev_rec.br_startoff + 794 info->prev_rec.br_blockcount) 795 xchk_fblock_set_corrupt(info->sc, info->whichfork, 796 irec->br_startoff); 797 798 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 799 xchk_fblock_set_corrupt(info->sc, info->whichfork, 800 irec->br_startoff); 801 802 /* Make sure the extent points to a valid place. */ 803 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) 804 xchk_fblock_set_corrupt(info->sc, info->whichfork, 805 irec->br_startoff); 806 } 807 808 /* Decide if this individual fork mapping is ok. */ 809 static bool 810 xchk_bmap_iext_mapping( 811 struct xchk_bmap_info *info, 812 const struct xfs_bmbt_irec *irec) 813 { 814 /* There should never be a "hole" extent in either extent list. */ 815 if (irec->br_startblock == HOLESTARTBLOCK) 816 return false; 817 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) 818 return false; 819 return true; 820 } 821 822 /* Are these two mappings contiguous with each other? */ 823 static inline bool 824 xchk_are_bmaps_contiguous( 825 const struct xfs_bmbt_irec *b1, 826 const struct xfs_bmbt_irec *b2) 827 { 828 /* Don't try to combine unallocated mappings. */ 829 if (!xfs_bmap_is_real_extent(b1)) 830 return false; 831 if (!xfs_bmap_is_real_extent(b2)) 832 return false; 833 834 /* Does b2 come right after b1 in the logical and physical range? */ 835 if (b1->br_startoff + b1->br_blockcount != b2->br_startoff) 836 return false; 837 if (b1->br_startblock + b1->br_blockcount != b2->br_startblock) 838 return false; 839 if (b1->br_state != b2->br_state) 840 return false; 841 return true; 842 } 843 844 /* 845 * Walk the incore extent records, accumulating consecutive contiguous records 846 * into a single incore mapping. Returns true if @irec has been set to a 847 * mapping or false if there are no more mappings. Caller must ensure that 848 * @info.icur is zeroed before the first call. 849 */ 850 static bool 851 xchk_bmap_iext_iter( 852 struct xchk_bmap_info *info, 853 struct xfs_bmbt_irec *irec) 854 { 855 struct xfs_bmbt_irec got; 856 struct xfs_ifork *ifp; 857 unsigned int nr = 0; 858 859 ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork); 860 861 /* Advance to the next iextent record and check the mapping. */ 862 xfs_iext_next(ifp, &info->icur); 863 if (!xfs_iext_get_extent(ifp, &info->icur, irec)) 864 return false; 865 866 if (!xchk_bmap_iext_mapping(info, irec)) { 867 xchk_fblock_set_corrupt(info->sc, info->whichfork, 868 irec->br_startoff); 869 return false; 870 } 871 nr++; 872 873 /* 874 * Iterate subsequent iextent records and merge them with the one 875 * that we just read, if possible. 876 */ 877 while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) { 878 if (!xchk_are_bmaps_contiguous(irec, &got)) 879 break; 880 881 if (!xchk_bmap_iext_mapping(info, &got)) { 882 xchk_fblock_set_corrupt(info->sc, info->whichfork, 883 got.br_startoff); 884 return false; 885 } 886 nr++; 887 888 irec->br_blockcount += got.br_blockcount; 889 xfs_iext_next(ifp, &info->icur); 890 } 891 892 /* 893 * If the merged mapping could be expressed with fewer bmbt records 894 * than we actually found, notify the user that this fork could be 895 * optimized. CoW forks only exist in memory so we ignore them. 896 */ 897 if (nr > 1 && info->whichfork != XFS_COW_FORK && 898 howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr) 899 xchk_ino_set_preen(info->sc, info->sc->ip->i_ino); 900 901 return true; 902 } 903 904 /* 905 * Scrub an inode fork's block mappings. 906 * 907 * First we scan every record in every btree block, if applicable. 908 * Then we unconditionally scan the incore extent cache. 909 */ 910 STATIC int 911 xchk_bmap( 912 struct xfs_scrub *sc, 913 int whichfork) 914 { 915 struct xfs_bmbt_irec irec; 916 struct xchk_bmap_info info = { NULL }; 917 struct xfs_mount *mp = sc->mp; 918 struct xfs_inode *ip = sc->ip; 919 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 920 xfs_fileoff_t endoff; 921 int error = 0; 922 923 /* Non-existent forks can be ignored. */ 924 if (!ifp) 925 return -ENOENT; 926 927 info.is_rt = xfs_ifork_is_realtime(ip, whichfork); 928 info.whichfork = whichfork; 929 info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip); 930 info.sc = sc; 931 932 switch (whichfork) { 933 case XFS_COW_FORK: 934 /* No CoW forks on non-reflink filesystems. */ 935 if (!xfs_has_reflink(mp)) { 936 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 937 return 0; 938 } 939 break; 940 case XFS_ATTR_FORK: 941 if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) 942 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 943 break; 944 default: 945 ASSERT(whichfork == XFS_DATA_FORK); 946 break; 947 } 948 949 /* Check the fork values */ 950 switch (ifp->if_format) { 951 case XFS_DINODE_FMT_UUID: 952 case XFS_DINODE_FMT_DEV: 953 case XFS_DINODE_FMT_LOCAL: 954 /* No mappings to check. */ 955 if (whichfork == XFS_COW_FORK) 956 xchk_fblock_set_corrupt(sc, whichfork, 0); 957 return 0; 958 case XFS_DINODE_FMT_EXTENTS: 959 break; 960 case XFS_DINODE_FMT_BTREE: 961 if (whichfork == XFS_COW_FORK) { 962 xchk_fblock_set_corrupt(sc, whichfork, 0); 963 return 0; 964 } 965 966 error = xchk_bmap_btree(sc, whichfork, &info); 967 if (error) 968 return error; 969 break; 970 default: 971 xchk_fblock_set_corrupt(sc, whichfork, 0); 972 return 0; 973 } 974 975 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 976 return 0; 977 978 /* Find the offset of the last extent in the mapping. */ 979 error = xfs_bmap_last_offset(ip, &endoff, whichfork); 980 if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) 981 return error; 982 983 /* 984 * Scrub extent records. We use a special iterator function here that 985 * combines adjacent mappings if they are logically and physically 986 * contiguous. For large allocations that require multiple bmbt 987 * records, this reduces the number of cross-referencing calls, which 988 * reduces runtime. Cross referencing with the rmap is simpler because 989 * the rmap must match the combined mapping exactly. 990 */ 991 while (xchk_bmap_iext_iter(&info, &irec)) { 992 if (xchk_should_terminate(sc, &error) || 993 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 994 return 0; 995 996 if (irec.br_startoff >= endoff) { 997 xchk_fblock_set_corrupt(sc, whichfork, 998 irec.br_startoff); 999 return 0; 1000 } 1001 1002 if (isnullstartblock(irec.br_startblock)) 1003 xchk_bmap_iextent_delalloc(ip, &info, &irec); 1004 else 1005 xchk_bmap_iextent(ip, &info, &irec); 1006 memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec)); 1007 } 1008 1009 if (xchk_bmap_want_check_rmaps(&info)) { 1010 error = xchk_bmap_check_rmaps(sc, whichfork); 1011 if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error)) 1012 return error; 1013 } 1014 1015 return 0; 1016 } 1017 1018 /* Scrub an inode's data fork. */ 1019 int 1020 xchk_bmap_data( 1021 struct xfs_scrub *sc) 1022 { 1023 int error; 1024 1025 if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTD_ZAPPED)) { 1026 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1027 return 0; 1028 } 1029 1030 error = xchk_bmap(sc, XFS_DATA_FORK); 1031 if (error) 1032 return error; 1033 1034 /* If the data fork is clean, it is clearly not zapped. */ 1035 xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTD_ZAPPED); 1036 return 0; 1037 } 1038 1039 /* Scrub an inode's attr fork. */ 1040 int 1041 xchk_bmap_attr( 1042 struct xfs_scrub *sc) 1043 { 1044 int error; 1045 1046 /* 1047 * If the attr fork has been zapped, it's possible that forkoff was 1048 * reset to zero and hence sc->ip->i_afp is NULL. We don't want the 1049 * NULL ifp check in xchk_bmap to conclude that the attr fork is ok, 1050 * so short circuit that logic by setting the corruption flag and 1051 * returning immediately. 1052 */ 1053 if (xchk_file_looks_zapped(sc, XFS_SICK_INO_BMBTA_ZAPPED)) { 1054 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 1055 return 0; 1056 } 1057 1058 error = xchk_bmap(sc, XFS_ATTR_FORK); 1059 if (error) 1060 return error; 1061 1062 /* If the attr fork is clean, it is clearly not zapped. */ 1063 xchk_mark_healthy_if_clean(sc, XFS_SICK_INO_BMBTA_ZAPPED); 1064 return 0; 1065 } 1066 1067 /* Scrub an inode's CoW fork. */ 1068 int 1069 xchk_bmap_cow( 1070 struct xfs_scrub *sc) 1071 { 1072 return xchk_bmap(sc, XFS_COW_FORK); 1073 } 1074