1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_bit.h" 14 #include "xfs_log_format.h" 15 #include "xfs_trans.h" 16 #include "xfs_inode.h" 17 #include "xfs_alloc.h" 18 #include "xfs_bmap.h" 19 #include "xfs_bmap_btree.h" 20 #include "xfs_rmap.h" 21 #include "xfs_rmap_btree.h" 22 #include "scrub/scrub.h" 23 #include "scrub/common.h" 24 #include "scrub/btree.h" 25 #include "xfs_ag.h" 26 27 /* Set us up with an inode's bmap. */ 28 int 29 xchk_setup_inode_bmap( 30 struct xfs_scrub *sc) 31 { 32 int error; 33 34 if (xchk_need_intent_drain(sc)) 35 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); 36 37 error = xchk_iget_for_scrubbing(sc); 38 if (error) 39 goto out; 40 41 sc->ilock_flags = XFS_IOLOCK_EXCL; 42 xfs_ilock(sc->ip, XFS_IOLOCK_EXCL); 43 44 /* 45 * We don't want any ephemeral data/cow fork updates sitting around 46 * while we inspect block mappings, so wait for directio to finish 47 * and flush dirty data if we have delalloc reservations. 48 */ 49 if (S_ISREG(VFS_I(sc->ip)->i_mode) && 50 sc->sm->sm_type != XFS_SCRUB_TYPE_BMBTA) { 51 struct address_space *mapping = VFS_I(sc->ip)->i_mapping; 52 53 sc->ilock_flags |= XFS_MMAPLOCK_EXCL; 54 xfs_ilock(sc->ip, XFS_MMAPLOCK_EXCL); 55 56 inode_dio_wait(VFS_I(sc->ip)); 57 58 /* 59 * Try to flush all incore state to disk before we examine the 60 * space mappings for the data fork. Leave accumulated errors 61 * in the mapping for the writer threads to consume. 62 * 63 * On ENOSPC or EIO writeback errors, we continue into the 64 * extent mapping checks because write failures do not 65 * necessarily imply anything about the correctness of the file 66 * metadata. The metadata and the file data could be on 67 * completely separate devices; a media failure might only 68 * affect a subset of the disk, etc. We can handle delalloc 69 * extents in the scrubber, so leaving them in memory is fine. 70 */ 71 error = filemap_fdatawrite(mapping); 72 if (!error) 73 error = filemap_fdatawait_keep_errors(mapping); 74 if (error && (error != -ENOSPC && error != -EIO)) 75 goto out; 76 } 77 78 /* Got the inode, lock it and we're ready to go. */ 79 error = xchk_trans_alloc(sc, 0); 80 if (error) 81 goto out; 82 sc->ilock_flags |= XFS_ILOCK_EXCL; 83 xfs_ilock(sc->ip, XFS_ILOCK_EXCL); 84 85 out: 86 /* scrub teardown will unlock and release the inode */ 87 return error; 88 } 89 90 /* 91 * Inode fork block mapping (BMBT) scrubber. 92 * More complex than the others because we have to scrub 93 * all the extents regardless of whether or not the fork 94 * is in btree format. 95 */ 96 97 struct xchk_bmap_info { 98 struct xfs_scrub *sc; 99 100 /* Incore extent tree cursor */ 101 struct xfs_iext_cursor icur; 102 103 /* Previous fork mapping that we examined */ 104 struct xfs_bmbt_irec prev_rec; 105 106 /* Is this a realtime fork? */ 107 bool is_rt; 108 109 /* May mappings point to shared space? */ 110 bool is_shared; 111 112 /* Was the incore extent tree loaded? */ 113 bool was_loaded; 114 115 /* Which inode fork are we checking? */ 116 int whichfork; 117 }; 118 119 /* Look for a corresponding rmap for this irec. */ 120 static inline bool 121 xchk_bmap_get_rmap( 122 struct xchk_bmap_info *info, 123 struct xfs_bmbt_irec *irec, 124 xfs_agblock_t agbno, 125 uint64_t owner, 126 struct xfs_rmap_irec *rmap) 127 { 128 xfs_fileoff_t offset; 129 unsigned int rflags = 0; 130 int has_rmap; 131 int error; 132 133 if (info->whichfork == XFS_ATTR_FORK) 134 rflags |= XFS_RMAP_ATTR_FORK; 135 if (irec->br_state == XFS_EXT_UNWRITTEN) 136 rflags |= XFS_RMAP_UNWRITTEN; 137 138 /* 139 * CoW staging extents are owned (on disk) by the refcountbt, so 140 * their rmaps do not have offsets. 141 */ 142 if (info->whichfork == XFS_COW_FORK) 143 offset = 0; 144 else 145 offset = irec->br_startoff; 146 147 /* 148 * If the caller thinks this could be a shared bmbt extent (IOWs, 149 * any data fork extent of a reflink inode) then we have to use the 150 * range rmap lookup to make sure we get the correct owner/offset. 151 */ 152 if (info->is_shared) { 153 error = xfs_rmap_lookup_le_range(info->sc->sa.rmap_cur, agbno, 154 owner, offset, rflags, rmap, &has_rmap); 155 } else { 156 error = xfs_rmap_lookup_le(info->sc->sa.rmap_cur, agbno, 157 owner, offset, rflags, rmap, &has_rmap); 158 } 159 if (!xchk_should_check_xref(info->sc, &error, &info->sc->sa.rmap_cur)) 160 return false; 161 162 if (!has_rmap) 163 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 164 irec->br_startoff); 165 return has_rmap; 166 } 167 168 /* Make sure that we have rmapbt records for this data/attr fork extent. */ 169 STATIC void 170 xchk_bmap_xref_rmap( 171 struct xchk_bmap_info *info, 172 struct xfs_bmbt_irec *irec, 173 xfs_agblock_t agbno) 174 { 175 struct xfs_rmap_irec rmap; 176 unsigned long long rmap_end; 177 uint64_t owner = info->sc->ip->i_ino; 178 179 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) 180 return; 181 182 /* Find the rmap record for this irec. */ 183 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap)) 184 return; 185 186 /* 187 * The rmap must be an exact match for this incore file mapping record, 188 * which may have arisen from multiple ondisk records. 189 */ 190 if (rmap.rm_startblock != agbno) 191 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 192 irec->br_startoff); 193 194 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 195 if (rmap_end != agbno + irec->br_blockcount) 196 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 197 irec->br_startoff); 198 199 /* Check the logical offsets. */ 200 if (rmap.rm_offset != irec->br_startoff) 201 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 202 irec->br_startoff); 203 204 rmap_end = (unsigned long long)rmap.rm_offset + rmap.rm_blockcount; 205 if (rmap_end != irec->br_startoff + irec->br_blockcount) 206 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 207 irec->br_startoff); 208 209 /* Check the owner */ 210 if (rmap.rm_owner != owner) 211 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 212 irec->br_startoff); 213 214 /* 215 * Check for discrepancies between the unwritten flag in the irec and 216 * the rmap. Note that the (in-memory) CoW fork distinguishes between 217 * unwritten and written extents, but we don't track that in the rmap 218 * records because the blocks are owned (on-disk) by the refcountbt, 219 * which doesn't track unwritten state. 220 */ 221 if (!!(irec->br_state == XFS_EXT_UNWRITTEN) != 222 !!(rmap.rm_flags & XFS_RMAP_UNWRITTEN)) 223 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 224 irec->br_startoff); 225 226 if (!!(info->whichfork == XFS_ATTR_FORK) != 227 !!(rmap.rm_flags & XFS_RMAP_ATTR_FORK)) 228 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 229 irec->br_startoff); 230 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 231 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 232 irec->br_startoff); 233 } 234 235 /* Make sure that we have rmapbt records for this COW fork extent. */ 236 STATIC void 237 xchk_bmap_xref_rmap_cow( 238 struct xchk_bmap_info *info, 239 struct xfs_bmbt_irec *irec, 240 xfs_agblock_t agbno) 241 { 242 struct xfs_rmap_irec rmap; 243 unsigned long long rmap_end; 244 uint64_t owner = XFS_RMAP_OWN_COW; 245 246 if (!info->sc->sa.rmap_cur || xchk_skip_xref(info->sc->sm)) 247 return; 248 249 /* Find the rmap record for this irec. */ 250 if (!xchk_bmap_get_rmap(info, irec, agbno, owner, &rmap)) 251 return; 252 253 /* 254 * CoW staging extents are owned by the refcount btree, so the rmap 255 * can start before and end after the physical space allocated to this 256 * mapping. There are no offsets to check. 257 */ 258 if (rmap.rm_startblock > agbno) 259 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 260 irec->br_startoff); 261 262 rmap_end = (unsigned long long)rmap.rm_startblock + rmap.rm_blockcount; 263 if (rmap_end < agbno + irec->br_blockcount) 264 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 265 irec->br_startoff); 266 267 /* Check the owner */ 268 if (rmap.rm_owner != owner) 269 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 270 irec->br_startoff); 271 272 /* 273 * No flags allowed. Note that the (in-memory) CoW fork distinguishes 274 * between unwritten and written extents, but we don't track that in 275 * the rmap records because the blocks are owned (on-disk) by the 276 * refcountbt, which doesn't track unwritten state. 277 */ 278 if (rmap.rm_flags & XFS_RMAP_ATTR_FORK) 279 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 280 irec->br_startoff); 281 if (rmap.rm_flags & XFS_RMAP_BMBT_BLOCK) 282 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 283 irec->br_startoff); 284 if (rmap.rm_flags & XFS_RMAP_UNWRITTEN) 285 xchk_fblock_xref_set_corrupt(info->sc, info->whichfork, 286 irec->br_startoff); 287 } 288 289 /* Cross-reference a single rtdev extent record. */ 290 STATIC void 291 xchk_bmap_rt_iextent_xref( 292 struct xfs_inode *ip, 293 struct xchk_bmap_info *info, 294 struct xfs_bmbt_irec *irec) 295 { 296 xchk_xref_is_used_rt_space(info->sc, irec->br_startblock, 297 irec->br_blockcount); 298 } 299 300 /* Cross-reference a single datadev extent record. */ 301 STATIC void 302 xchk_bmap_iextent_xref( 303 struct xfs_inode *ip, 304 struct xchk_bmap_info *info, 305 struct xfs_bmbt_irec *irec) 306 { 307 struct xfs_owner_info oinfo; 308 struct xfs_mount *mp = info->sc->mp; 309 xfs_agnumber_t agno; 310 xfs_agblock_t agbno; 311 xfs_extlen_t len; 312 int error; 313 314 agno = XFS_FSB_TO_AGNO(mp, irec->br_startblock); 315 agbno = XFS_FSB_TO_AGBNO(mp, irec->br_startblock); 316 len = irec->br_blockcount; 317 318 error = xchk_ag_init_existing(info->sc, agno, &info->sc->sa); 319 if (!xchk_fblock_process_error(info->sc, info->whichfork, 320 irec->br_startoff, &error)) 321 goto out_free; 322 323 xchk_xref_is_used_space(info->sc, agbno, len); 324 xchk_xref_is_not_inode_chunk(info->sc, agbno, len); 325 switch (info->whichfork) { 326 case XFS_DATA_FORK: 327 xchk_bmap_xref_rmap(info, irec, agbno); 328 if (!xfs_is_reflink_inode(info->sc->ip)) { 329 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, 330 info->whichfork, irec->br_startoff); 331 xchk_xref_is_only_owned_by(info->sc, agbno, 332 irec->br_blockcount, &oinfo); 333 xchk_xref_is_not_shared(info->sc, agbno, 334 irec->br_blockcount); 335 } 336 xchk_xref_is_not_cow_staging(info->sc, agbno, 337 irec->br_blockcount); 338 break; 339 case XFS_ATTR_FORK: 340 xchk_bmap_xref_rmap(info, irec, agbno); 341 xfs_rmap_ino_owner(&oinfo, info->sc->ip->i_ino, 342 info->whichfork, irec->br_startoff); 343 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, 344 &oinfo); 345 xchk_xref_is_not_shared(info->sc, agbno, 346 irec->br_blockcount); 347 xchk_xref_is_not_cow_staging(info->sc, agbno, 348 irec->br_blockcount); 349 break; 350 case XFS_COW_FORK: 351 xchk_bmap_xref_rmap_cow(info, irec, agbno); 352 xchk_xref_is_only_owned_by(info->sc, agbno, irec->br_blockcount, 353 &XFS_RMAP_OINFO_COW); 354 xchk_xref_is_cow_staging(info->sc, agbno, 355 irec->br_blockcount); 356 xchk_xref_is_not_shared(info->sc, agbno, 357 irec->br_blockcount); 358 break; 359 } 360 361 out_free: 362 xchk_ag_free(info->sc, &info->sc->sa); 363 } 364 365 /* 366 * Directories and attr forks should never have blocks that can't be addressed 367 * by a xfs_dablk_t. 368 */ 369 STATIC void 370 xchk_bmap_dirattr_extent( 371 struct xfs_inode *ip, 372 struct xchk_bmap_info *info, 373 struct xfs_bmbt_irec *irec) 374 { 375 struct xfs_mount *mp = ip->i_mount; 376 xfs_fileoff_t off; 377 378 if (!S_ISDIR(VFS_I(ip)->i_mode) && info->whichfork != XFS_ATTR_FORK) 379 return; 380 381 if (!xfs_verify_dablk(mp, irec->br_startoff)) 382 xchk_fblock_set_corrupt(info->sc, info->whichfork, 383 irec->br_startoff); 384 385 off = irec->br_startoff + irec->br_blockcount - 1; 386 if (!xfs_verify_dablk(mp, off)) 387 xchk_fblock_set_corrupt(info->sc, info->whichfork, off); 388 } 389 390 /* Scrub a single extent record. */ 391 STATIC void 392 xchk_bmap_iextent( 393 struct xfs_inode *ip, 394 struct xchk_bmap_info *info, 395 struct xfs_bmbt_irec *irec) 396 { 397 struct xfs_mount *mp = info->sc->mp; 398 399 /* 400 * Check for out-of-order extents. This record could have come 401 * from the incore list, for which there is no ordering check. 402 */ 403 if (irec->br_startoff < info->prev_rec.br_startoff + 404 info->prev_rec.br_blockcount) 405 xchk_fblock_set_corrupt(info->sc, info->whichfork, 406 irec->br_startoff); 407 408 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 409 xchk_fblock_set_corrupt(info->sc, info->whichfork, 410 irec->br_startoff); 411 412 xchk_bmap_dirattr_extent(ip, info, irec); 413 414 /* Make sure the extent points to a valid place. */ 415 if (info->is_rt && 416 !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount)) 417 xchk_fblock_set_corrupt(info->sc, info->whichfork, 418 irec->br_startoff); 419 if (!info->is_rt && 420 !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount)) 421 xchk_fblock_set_corrupt(info->sc, info->whichfork, 422 irec->br_startoff); 423 424 /* We don't allow unwritten extents on attr forks. */ 425 if (irec->br_state == XFS_EXT_UNWRITTEN && 426 info->whichfork == XFS_ATTR_FORK) 427 xchk_fblock_set_corrupt(info->sc, info->whichfork, 428 irec->br_startoff); 429 430 if (info->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 431 return; 432 433 if (info->is_rt) 434 xchk_bmap_rt_iextent_xref(ip, info, irec); 435 else 436 xchk_bmap_iextent_xref(ip, info, irec); 437 } 438 439 /* Scrub a bmbt record. */ 440 STATIC int 441 xchk_bmapbt_rec( 442 struct xchk_btree *bs, 443 const union xfs_btree_rec *rec) 444 { 445 struct xfs_bmbt_irec irec; 446 struct xfs_bmbt_irec iext_irec; 447 struct xfs_iext_cursor icur; 448 struct xchk_bmap_info *info = bs->private; 449 struct xfs_inode *ip = bs->cur->bc_ino.ip; 450 struct xfs_buf *bp = NULL; 451 struct xfs_btree_block *block; 452 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, info->whichfork); 453 uint64_t owner; 454 int i; 455 456 /* 457 * Check the owners of the btree blocks up to the level below 458 * the root since the verifiers don't do that. 459 */ 460 if (xfs_has_crc(bs->cur->bc_mp) && 461 bs->cur->bc_levels[0].ptr == 1) { 462 for (i = 0; i < bs->cur->bc_nlevels - 1; i++) { 463 block = xfs_btree_get_block(bs->cur, i, &bp); 464 owner = be64_to_cpu(block->bb_u.l.bb_owner); 465 if (owner != ip->i_ino) 466 xchk_fblock_set_corrupt(bs->sc, 467 info->whichfork, 0); 468 } 469 } 470 471 /* 472 * Check that the incore extent tree contains an extent that matches 473 * this one exactly. We validate those cached bmaps later, so we don't 474 * need to check them here. If the incore extent tree was just loaded 475 * from disk by the scrubber, we assume that its contents match what's 476 * on disk (we still hold the ILOCK) and skip the equivalence check. 477 */ 478 if (!info->was_loaded) 479 return 0; 480 481 xfs_bmbt_disk_get_all(&rec->bmbt, &irec); 482 if (xfs_bmap_validate_extent(ip, info->whichfork, &irec) != NULL) { 483 xchk_fblock_set_corrupt(bs->sc, info->whichfork, 484 irec.br_startoff); 485 return 0; 486 } 487 488 if (!xfs_iext_lookup_extent(ip, ifp, irec.br_startoff, &icur, 489 &iext_irec) || 490 irec.br_startoff != iext_irec.br_startoff || 491 irec.br_startblock != iext_irec.br_startblock || 492 irec.br_blockcount != iext_irec.br_blockcount || 493 irec.br_state != iext_irec.br_state) 494 xchk_fblock_set_corrupt(bs->sc, info->whichfork, 495 irec.br_startoff); 496 return 0; 497 } 498 499 /* Scan the btree records. */ 500 STATIC int 501 xchk_bmap_btree( 502 struct xfs_scrub *sc, 503 int whichfork, 504 struct xchk_bmap_info *info) 505 { 506 struct xfs_owner_info oinfo; 507 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork); 508 struct xfs_mount *mp = sc->mp; 509 struct xfs_inode *ip = sc->ip; 510 struct xfs_btree_cur *cur; 511 int error; 512 513 /* Load the incore bmap cache if it's not loaded. */ 514 info->was_loaded = !xfs_need_iread_extents(ifp); 515 516 error = xfs_iread_extents(sc->tp, ip, whichfork); 517 if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) 518 goto out; 519 520 /* Check the btree structure. */ 521 cur = xfs_bmbt_init_cursor(mp, sc->tp, ip, whichfork); 522 xfs_rmap_ino_bmbt_owner(&oinfo, ip->i_ino, whichfork); 523 error = xchk_btree(sc, cur, xchk_bmapbt_rec, &oinfo, info); 524 xfs_btree_del_cursor(cur, error); 525 out: 526 return error; 527 } 528 529 struct xchk_bmap_check_rmap_info { 530 struct xfs_scrub *sc; 531 int whichfork; 532 struct xfs_iext_cursor icur; 533 }; 534 535 /* Can we find bmaps that fit this rmap? */ 536 STATIC int 537 xchk_bmap_check_rmap( 538 struct xfs_btree_cur *cur, 539 const struct xfs_rmap_irec *rec, 540 void *priv) 541 { 542 struct xfs_bmbt_irec irec; 543 struct xfs_rmap_irec check_rec; 544 struct xchk_bmap_check_rmap_info *sbcri = priv; 545 struct xfs_ifork *ifp; 546 struct xfs_scrub *sc = sbcri->sc; 547 bool have_map; 548 549 /* Is this even the right fork? */ 550 if (rec->rm_owner != sc->ip->i_ino) 551 return 0; 552 if ((sbcri->whichfork == XFS_ATTR_FORK) ^ 553 !!(rec->rm_flags & XFS_RMAP_ATTR_FORK)) 554 return 0; 555 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) 556 return 0; 557 558 /* Now look up the bmbt record. */ 559 ifp = xfs_ifork_ptr(sc->ip, sbcri->whichfork); 560 if (!ifp) { 561 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 562 rec->rm_offset); 563 goto out; 564 } 565 have_map = xfs_iext_lookup_extent(sc->ip, ifp, rec->rm_offset, 566 &sbcri->icur, &irec); 567 if (!have_map) 568 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 569 rec->rm_offset); 570 /* 571 * bmap extent record lengths are constrained to 2^21 blocks in length 572 * because of space constraints in the on-disk metadata structure. 573 * However, rmap extent record lengths are constrained only by AG 574 * length, so we have to loop through the bmbt to make sure that the 575 * entire rmap is covered by bmbt records. 576 */ 577 check_rec = *rec; 578 while (have_map) { 579 if (irec.br_startoff != check_rec.rm_offset) 580 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 581 check_rec.rm_offset); 582 if (irec.br_startblock != XFS_AGB_TO_FSB(sc->mp, 583 cur->bc_ag.pag->pag_agno, 584 check_rec.rm_startblock)) 585 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 586 check_rec.rm_offset); 587 if (irec.br_blockcount > check_rec.rm_blockcount) 588 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 589 check_rec.rm_offset); 590 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 591 break; 592 check_rec.rm_startblock += irec.br_blockcount; 593 check_rec.rm_offset += irec.br_blockcount; 594 check_rec.rm_blockcount -= irec.br_blockcount; 595 if (check_rec.rm_blockcount == 0) 596 break; 597 have_map = xfs_iext_next_extent(ifp, &sbcri->icur, &irec); 598 if (!have_map) 599 xchk_fblock_set_corrupt(sc, sbcri->whichfork, 600 check_rec.rm_offset); 601 } 602 603 out: 604 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 605 return -ECANCELED; 606 return 0; 607 } 608 609 /* Make sure each rmap has a corresponding bmbt entry. */ 610 STATIC int 611 xchk_bmap_check_ag_rmaps( 612 struct xfs_scrub *sc, 613 int whichfork, 614 struct xfs_perag *pag) 615 { 616 struct xchk_bmap_check_rmap_info sbcri; 617 struct xfs_btree_cur *cur; 618 struct xfs_buf *agf; 619 int error; 620 621 error = xfs_alloc_read_agf(pag, sc->tp, 0, &agf); 622 if (error) 623 return error; 624 625 cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, pag); 626 627 sbcri.sc = sc; 628 sbcri.whichfork = whichfork; 629 error = xfs_rmap_query_all(cur, xchk_bmap_check_rmap, &sbcri); 630 if (error == -ECANCELED) 631 error = 0; 632 633 xfs_btree_del_cursor(cur, error); 634 xfs_trans_brelse(sc->tp, agf); 635 return error; 636 } 637 638 /* 639 * Decide if we want to walk every rmap btree in the fs to make sure that each 640 * rmap for this file fork has corresponding bmbt entries. 641 */ 642 static bool 643 xchk_bmap_want_check_rmaps( 644 struct xchk_bmap_info *info) 645 { 646 struct xfs_scrub *sc = info->sc; 647 struct xfs_ifork *ifp; 648 649 if (!xfs_has_rmapbt(sc->mp)) 650 return false; 651 if (info->whichfork == XFS_COW_FORK) 652 return false; 653 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 654 return false; 655 656 /* Don't support realtime rmap checks yet. */ 657 if (info->is_rt) 658 return false; 659 660 /* 661 * The inode repair code zaps broken inode forks by resetting them back 662 * to EXTENTS format and zero extent records. If we encounter a fork 663 * in this state along with evidence that the fork isn't supposed to be 664 * empty, we need to scan the reverse mappings to decide if we're going 665 * to rebuild the fork. Data forks with nonzero file size are scanned. 666 * xattr forks are never empty of content, so they are always scanned. 667 */ 668 ifp = xfs_ifork_ptr(sc->ip, info->whichfork); 669 if (ifp->if_format == XFS_DINODE_FMT_EXTENTS && ifp->if_nextents == 0) { 670 if (info->whichfork == XFS_DATA_FORK && 671 i_size_read(VFS_I(sc->ip)) == 0) 672 return false; 673 674 return true; 675 } 676 677 return false; 678 } 679 680 /* Make sure each rmap has a corresponding bmbt entry. */ 681 STATIC int 682 xchk_bmap_check_rmaps( 683 struct xfs_scrub *sc, 684 int whichfork) 685 { 686 struct xfs_perag *pag; 687 xfs_agnumber_t agno; 688 int error; 689 690 for_each_perag(sc->mp, agno, pag) { 691 error = xchk_bmap_check_ag_rmaps(sc, whichfork, pag); 692 if (error || 693 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) { 694 xfs_perag_rele(pag); 695 return error; 696 } 697 } 698 699 return 0; 700 } 701 702 /* Scrub a delalloc reservation from the incore extent map tree. */ 703 STATIC void 704 xchk_bmap_iextent_delalloc( 705 struct xfs_inode *ip, 706 struct xchk_bmap_info *info, 707 struct xfs_bmbt_irec *irec) 708 { 709 struct xfs_mount *mp = info->sc->mp; 710 711 /* 712 * Check for out-of-order extents. This record could have come 713 * from the incore list, for which there is no ordering check. 714 */ 715 if (irec->br_startoff < info->prev_rec.br_startoff + 716 info->prev_rec.br_blockcount) 717 xchk_fblock_set_corrupt(info->sc, info->whichfork, 718 irec->br_startoff); 719 720 if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) 721 xchk_fblock_set_corrupt(info->sc, info->whichfork, 722 irec->br_startoff); 723 724 /* Make sure the extent points to a valid place. */ 725 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) 726 xchk_fblock_set_corrupt(info->sc, info->whichfork, 727 irec->br_startoff); 728 } 729 730 /* Decide if this individual fork mapping is ok. */ 731 static bool 732 xchk_bmap_iext_mapping( 733 struct xchk_bmap_info *info, 734 const struct xfs_bmbt_irec *irec) 735 { 736 /* There should never be a "hole" extent in either extent list. */ 737 if (irec->br_startblock == HOLESTARTBLOCK) 738 return false; 739 if (irec->br_blockcount > XFS_MAX_BMBT_EXTLEN) 740 return false; 741 return true; 742 } 743 744 /* Are these two mappings contiguous with each other? */ 745 static inline bool 746 xchk_are_bmaps_contiguous( 747 const struct xfs_bmbt_irec *b1, 748 const struct xfs_bmbt_irec *b2) 749 { 750 /* Don't try to combine unallocated mappings. */ 751 if (!xfs_bmap_is_real_extent(b1)) 752 return false; 753 if (!xfs_bmap_is_real_extent(b2)) 754 return false; 755 756 /* Does b2 come right after b1 in the logical and physical range? */ 757 if (b1->br_startoff + b1->br_blockcount != b2->br_startoff) 758 return false; 759 if (b1->br_startblock + b1->br_blockcount != b2->br_startblock) 760 return false; 761 if (b1->br_state != b2->br_state) 762 return false; 763 return true; 764 } 765 766 /* 767 * Walk the incore extent records, accumulating consecutive contiguous records 768 * into a single incore mapping. Returns true if @irec has been set to a 769 * mapping or false if there are no more mappings. Caller must ensure that 770 * @info.icur is zeroed before the first call. 771 */ 772 static bool 773 xchk_bmap_iext_iter( 774 struct xchk_bmap_info *info, 775 struct xfs_bmbt_irec *irec) 776 { 777 struct xfs_bmbt_irec got; 778 struct xfs_ifork *ifp; 779 unsigned int nr = 0; 780 781 ifp = xfs_ifork_ptr(info->sc->ip, info->whichfork); 782 783 /* Advance to the next iextent record and check the mapping. */ 784 xfs_iext_next(ifp, &info->icur); 785 if (!xfs_iext_get_extent(ifp, &info->icur, irec)) 786 return false; 787 788 if (!xchk_bmap_iext_mapping(info, irec)) { 789 xchk_fblock_set_corrupt(info->sc, info->whichfork, 790 irec->br_startoff); 791 return false; 792 } 793 nr++; 794 795 /* 796 * Iterate subsequent iextent records and merge them with the one 797 * that we just read, if possible. 798 */ 799 while (xfs_iext_peek_next_extent(ifp, &info->icur, &got)) { 800 if (!xchk_are_bmaps_contiguous(irec, &got)) 801 break; 802 803 if (!xchk_bmap_iext_mapping(info, &got)) { 804 xchk_fblock_set_corrupt(info->sc, info->whichfork, 805 got.br_startoff); 806 return false; 807 } 808 nr++; 809 810 irec->br_blockcount += got.br_blockcount; 811 xfs_iext_next(ifp, &info->icur); 812 } 813 814 /* 815 * If the merged mapping could be expressed with fewer bmbt records 816 * than we actually found, notify the user that this fork could be 817 * optimized. CoW forks only exist in memory so we ignore them. 818 */ 819 if (nr > 1 && info->whichfork != XFS_COW_FORK && 820 howmany_64(irec->br_blockcount, XFS_MAX_BMBT_EXTLEN) < nr) 821 xchk_ino_set_preen(info->sc, info->sc->ip->i_ino); 822 823 return true; 824 } 825 826 /* 827 * Scrub an inode fork's block mappings. 828 * 829 * First we scan every record in every btree block, if applicable. 830 * Then we unconditionally scan the incore extent cache. 831 */ 832 STATIC int 833 xchk_bmap( 834 struct xfs_scrub *sc, 835 int whichfork) 836 { 837 struct xfs_bmbt_irec irec; 838 struct xchk_bmap_info info = { NULL }; 839 struct xfs_mount *mp = sc->mp; 840 struct xfs_inode *ip = sc->ip; 841 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, whichfork); 842 xfs_fileoff_t endoff; 843 int error = 0; 844 845 /* Non-existent forks can be ignored. */ 846 if (!ifp) 847 goto out; 848 849 info.is_rt = whichfork == XFS_DATA_FORK && XFS_IS_REALTIME_INODE(ip); 850 info.whichfork = whichfork; 851 info.is_shared = whichfork == XFS_DATA_FORK && xfs_is_reflink_inode(ip); 852 info.sc = sc; 853 854 switch (whichfork) { 855 case XFS_COW_FORK: 856 /* No CoW forks on non-reflink inodes/filesystems. */ 857 if (!xfs_is_reflink_inode(ip)) { 858 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 859 goto out; 860 } 861 break; 862 case XFS_ATTR_FORK: 863 if (!xfs_has_attr(mp) && !xfs_has_attr2(mp)) 864 xchk_ino_set_corrupt(sc, sc->ip->i_ino); 865 break; 866 default: 867 ASSERT(whichfork == XFS_DATA_FORK); 868 break; 869 } 870 871 /* Check the fork values */ 872 switch (ifp->if_format) { 873 case XFS_DINODE_FMT_UUID: 874 case XFS_DINODE_FMT_DEV: 875 case XFS_DINODE_FMT_LOCAL: 876 /* No mappings to check. */ 877 if (whichfork == XFS_COW_FORK) 878 xchk_fblock_set_corrupt(sc, whichfork, 0); 879 goto out; 880 case XFS_DINODE_FMT_EXTENTS: 881 break; 882 case XFS_DINODE_FMT_BTREE: 883 if (whichfork == XFS_COW_FORK) { 884 xchk_fblock_set_corrupt(sc, whichfork, 0); 885 goto out; 886 } 887 888 error = xchk_bmap_btree(sc, whichfork, &info); 889 if (error) 890 goto out; 891 break; 892 default: 893 xchk_fblock_set_corrupt(sc, whichfork, 0); 894 goto out; 895 } 896 897 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 898 goto out; 899 900 /* Find the offset of the last extent in the mapping. */ 901 error = xfs_bmap_last_offset(ip, &endoff, whichfork); 902 if (!xchk_fblock_process_error(sc, whichfork, 0, &error)) 903 goto out; 904 905 /* 906 * Scrub extent records. We use a special iterator function here that 907 * combines adjacent mappings if they are logically and physically 908 * contiguous. For large allocations that require multiple bmbt 909 * records, this reduces the number of cross-referencing calls, which 910 * reduces runtime. Cross referencing with the rmap is simpler because 911 * the rmap must match the combined mapping exactly. 912 */ 913 while (xchk_bmap_iext_iter(&info, &irec)) { 914 if (xchk_should_terminate(sc, &error) || 915 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 916 goto out; 917 918 if (irec.br_startoff >= endoff) { 919 xchk_fblock_set_corrupt(sc, whichfork, 920 irec.br_startoff); 921 goto out; 922 } 923 924 if (isnullstartblock(irec.br_startblock)) 925 xchk_bmap_iextent_delalloc(ip, &info, &irec); 926 else 927 xchk_bmap_iextent(ip, &info, &irec); 928 memcpy(&info.prev_rec, &irec, sizeof(struct xfs_bmbt_irec)); 929 } 930 931 if (xchk_bmap_want_check_rmaps(&info)) { 932 error = xchk_bmap_check_rmaps(sc, whichfork); 933 if (!xchk_fblock_xref_process_error(sc, whichfork, 0, &error)) 934 goto out; 935 } 936 out: 937 return error; 938 } 939 940 /* Scrub an inode's data fork. */ 941 int 942 xchk_bmap_data( 943 struct xfs_scrub *sc) 944 { 945 return xchk_bmap(sc, XFS_DATA_FORK); 946 } 947 948 /* Scrub an inode's attr fork. */ 949 int 950 xchk_bmap_attr( 951 struct xfs_scrub *sc) 952 { 953 return xchk_bmap(sc, XFS_ATTR_FORK); 954 } 955 956 /* Scrub an inode's CoW fork. */ 957 int 958 xchk_bmap_cow( 959 struct xfs_scrub *sc) 960 { 961 if (!xfs_is_reflink_inode(sc->ip)) 962 return -ENOENT; 963 964 return xchk_bmap(sc, XFS_COW_FORK); 965 } 966