1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_btree_staging.h" 15 #include "xfs_bit.h" 16 #include "xfs_log_format.h" 17 #include "xfs_trans.h" 18 #include "xfs_sb.h" 19 #include "xfs_inode.h" 20 #include "xfs_inode_fork.h" 21 #include "xfs_alloc.h" 22 #include "xfs_rtalloc.h" 23 #include "xfs_bmap.h" 24 #include "xfs_bmap_util.h" 25 #include "xfs_bmap_btree.h" 26 #include "xfs_rmap.h" 27 #include "xfs_rmap_btree.h" 28 #include "xfs_rtrmap_btree.h" 29 #include "xfs_refcount.h" 30 #include "xfs_quota.h" 31 #include "xfs_ialloc.h" 32 #include "xfs_ag.h" 33 #include "xfs_reflink.h" 34 #include "xfs_rtgroup.h" 35 #include "scrub/xfs_scrub.h" 36 #include "scrub/scrub.h" 37 #include "scrub/common.h" 38 #include "scrub/btree.h" 39 #include "scrub/trace.h" 40 #include "scrub/repair.h" 41 #include "scrub/bitmap.h" 42 #include "scrub/fsb_bitmap.h" 43 #include "scrub/xfile.h" 44 #include "scrub/xfarray.h" 45 #include "scrub/newbt.h" 46 #include "scrub/reap.h" 47 48 /* 49 * Inode Fork Block Mapping (BMBT) Repair 50 * ====================================== 51 * 52 * Gather all the rmap records for the inode and fork we're fixing, reset the 53 * incore fork, then recreate the btree. 54 */ 55 56 enum reflink_scan_state { 57 RLS_IRRELEVANT = -1, /* not applicable to this file */ 58 RLS_UNKNOWN, /* shared extent scans required */ 59 RLS_SET_IFLAG, /* iflag must be set */ 60 }; 61 62 struct xrep_bmap { 63 /* Old bmbt blocks */ 64 struct xfsb_bitmap old_bmbt_blocks; 65 66 /* New fork. */ 67 struct xrep_newbt new_bmapbt; 68 69 /* List of new bmap records. */ 70 struct xfarray *bmap_records; 71 72 struct xfs_scrub *sc; 73 74 /* How many blocks did we find allocated to this file? */ 75 xfs_rfsblock_t nblocks; 76 77 /* How many bmbt blocks did we find for this fork? */ 78 xfs_rfsblock_t old_bmbt_block_count; 79 80 /* get_records()'s position in the free space record array. */ 81 xfarray_idx_t array_cur; 82 83 /* How many real (non-hole, non-delalloc) mappings do we have? */ 84 uint64_t real_mappings; 85 86 /* Which fork are we fixing? */ 87 int whichfork; 88 89 /* What d the REFLINK flag be set when the repair is over? */ 90 enum reflink_scan_state reflink_scan; 91 92 /* Do we allow unwritten extents? */ 93 bool allow_unwritten; 94 }; 95 96 /* Is this space extent shared? Flag the inode if it is. */ 97 STATIC int 98 xrep_bmap_discover_shared( 99 struct xrep_bmap *rb, 100 xfs_fsblock_t startblock, 101 xfs_filblks_t blockcount) 102 { 103 struct xfs_scrub *sc = rb->sc; 104 struct xfs_btree_cur *cur; 105 xfs_agblock_t agbno; 106 xfs_agblock_t fbno; 107 xfs_extlen_t flen; 108 int error; 109 110 if (XFS_IS_REALTIME_INODE(sc->ip)) { 111 agbno = xfs_rtb_to_rgbno(sc->mp, startblock); 112 cur = sc->sr.refc_cur; 113 } else { 114 agbno = XFS_FSB_TO_AGBNO(sc->mp, startblock); 115 cur = sc->sa.refc_cur; 116 } 117 error = xfs_refcount_find_shared(cur, agbno, blockcount, &fbno, &flen, 118 false); 119 if (error) 120 return error; 121 122 if (fbno != NULLAGBLOCK) 123 rb->reflink_scan = RLS_SET_IFLAG; 124 125 return 0; 126 } 127 128 /* Remember this reverse-mapping as a series of bmap records. */ 129 STATIC int 130 xrep_bmap_from_rmap( 131 struct xrep_bmap *rb, 132 xfs_fileoff_t startoff, 133 xfs_fsblock_t startblock, 134 xfs_filblks_t blockcount, 135 bool unwritten) 136 { 137 struct xfs_bmbt_irec irec = { 138 .br_startoff = startoff, 139 .br_startblock = startblock, 140 .br_state = unwritten ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM, 141 }; 142 struct xfs_bmbt_rec rbe; 143 struct xfs_scrub *sc = rb->sc; 144 int error = 0; 145 146 /* 147 * If we're repairing the data fork of a non-reflinked regular file on 148 * a reflink filesystem, we need to figure out if this space extent is 149 * shared. 150 */ 151 if (rb->reflink_scan == RLS_UNKNOWN && !unwritten) { 152 error = xrep_bmap_discover_shared(rb, startblock, blockcount); 153 if (error) 154 return error; 155 } 156 157 do { 158 xfs_failaddr_t fa; 159 160 irec.br_blockcount = min_t(xfs_filblks_t, blockcount, 161 XFS_MAX_BMBT_EXTLEN); 162 163 fa = xfs_bmap_validate_extent(sc->ip, rb->whichfork, &irec); 164 if (fa) 165 return -EFSCORRUPTED; 166 167 xfs_bmbt_disk_set_all(&rbe, &irec); 168 169 trace_xrep_bmap_found(sc->ip, rb->whichfork, &irec); 170 171 if (xchk_should_terminate(sc, &error)) 172 return error; 173 174 error = xfarray_append(rb->bmap_records, &rbe); 175 if (error) 176 return error; 177 178 rb->real_mappings++; 179 180 irec.br_startblock += irec.br_blockcount; 181 irec.br_startoff += irec.br_blockcount; 182 blockcount -= irec.br_blockcount; 183 } while (blockcount > 0); 184 185 return 0; 186 } 187 188 /* Check for any obvious errors or conflicts in the file mapping. */ 189 STATIC int 190 xrep_bmap_check_fork_rmap( 191 struct xrep_bmap *rb, 192 struct xfs_btree_cur *cur, 193 const struct xfs_rmap_irec *rec) 194 { 195 struct xfs_scrub *sc = rb->sc; 196 enum xbtree_recpacking outcome; 197 int error; 198 199 /* 200 * Data extents for rt files are never stored on the data device, but 201 * everything else (xattrs, bmbt blocks) can be. 202 */ 203 if (XFS_IS_REALTIME_INODE(sc->ip) && 204 !(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))) 205 return -EFSCORRUPTED; 206 207 /* Check that this is within the AG. */ 208 if (!xfs_verify_agbext(to_perag(cur->bc_group), rec->rm_startblock, 209 rec->rm_blockcount)) 210 return -EFSCORRUPTED; 211 212 /* Check the file offset range. */ 213 if (!(rec->rm_flags & XFS_RMAP_BMBT_BLOCK) && 214 !xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount)) 215 return -EFSCORRUPTED; 216 217 /* No contradictory flags. */ 218 if ((rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK)) && 219 (rec->rm_flags & XFS_RMAP_UNWRITTEN)) 220 return -EFSCORRUPTED; 221 222 /* Make sure this isn't free space. */ 223 error = xfs_alloc_has_records(sc->sa.bno_cur, rec->rm_startblock, 224 rec->rm_blockcount, &outcome); 225 if (error) 226 return error; 227 if (outcome != XBTREE_RECPACKING_EMPTY) 228 return -EFSCORRUPTED; 229 230 /* Must not be an inode chunk. */ 231 error = xfs_ialloc_has_inodes_at_extent(sc->sa.ino_cur, 232 rec->rm_startblock, rec->rm_blockcount, &outcome); 233 if (error) 234 return error; 235 if (outcome != XBTREE_RECPACKING_EMPTY) 236 return -EFSCORRUPTED; 237 238 return 0; 239 } 240 241 /* Record extents that belong to this inode's fork. */ 242 STATIC int 243 xrep_bmap_walk_rmap( 244 struct xfs_btree_cur *cur, 245 const struct xfs_rmap_irec *rec, 246 void *priv) 247 { 248 struct xrep_bmap *rb = priv; 249 xfs_fsblock_t fsbno; 250 int error = 0; 251 252 if (xchk_should_terminate(rb->sc, &error)) 253 return error; 254 255 if (rec->rm_owner != rb->sc->ip->i_ino) 256 return 0; 257 258 error = xrep_bmap_check_fork_rmap(rb, cur, rec); 259 if (error) 260 return error; 261 262 /* 263 * Record all blocks allocated to this file even if the extent isn't 264 * for the fork we're rebuilding so that we can reset di_nblocks later. 265 */ 266 rb->nblocks += rec->rm_blockcount; 267 268 /* If this rmap isn't for the fork we want, we're done. */ 269 if (rb->whichfork == XFS_DATA_FORK && 270 (rec->rm_flags & XFS_RMAP_ATTR_FORK)) 271 return 0; 272 if (rb->whichfork == XFS_ATTR_FORK && 273 !(rec->rm_flags & XFS_RMAP_ATTR_FORK)) 274 return 0; 275 276 /* Reject unwritten extents if we don't allow those. */ 277 if ((rec->rm_flags & XFS_RMAP_UNWRITTEN) && !rb->allow_unwritten) 278 return -EFSCORRUPTED; 279 280 fsbno = xfs_agbno_to_fsb(to_perag(cur->bc_group), rec->rm_startblock); 281 282 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) { 283 rb->old_bmbt_block_count += rec->rm_blockcount; 284 return xfsb_bitmap_set(&rb->old_bmbt_blocks, fsbno, 285 rec->rm_blockcount); 286 } 287 288 return xrep_bmap_from_rmap(rb, rec->rm_offset, fsbno, 289 rec->rm_blockcount, 290 rec->rm_flags & XFS_RMAP_UNWRITTEN); 291 } 292 293 /* 294 * Compare two block mapping records. We want to sort in order of increasing 295 * file offset. 296 */ 297 static int 298 xrep_bmap_extent_cmp( 299 const void *a, 300 const void *b) 301 { 302 const struct xfs_bmbt_rec *ba = a; 303 const struct xfs_bmbt_rec *bb = b; 304 xfs_fileoff_t ao = xfs_bmbt_disk_get_startoff(ba); 305 xfs_fileoff_t bo = xfs_bmbt_disk_get_startoff(bb); 306 307 if (ao > bo) 308 return 1; 309 else if (ao < bo) 310 return -1; 311 return 0; 312 } 313 314 /* 315 * Sort the bmap extents by fork offset or else the records will be in the 316 * wrong order. Ensure there are no overlaps in the file offset ranges. 317 */ 318 STATIC int 319 xrep_bmap_sort_records( 320 struct xrep_bmap *rb) 321 { 322 struct xfs_bmbt_irec irec; 323 xfs_fileoff_t next_off = 0; 324 xfarray_idx_t array_cur; 325 int error; 326 327 error = xfarray_sort(rb->bmap_records, xrep_bmap_extent_cmp, 328 XFARRAY_SORT_KILLABLE); 329 if (error) 330 return error; 331 332 foreach_xfarray_idx(rb->bmap_records, array_cur) { 333 struct xfs_bmbt_rec rec; 334 335 if (xchk_should_terminate(rb->sc, &error)) 336 return error; 337 338 error = xfarray_load(rb->bmap_records, array_cur, &rec); 339 if (error) 340 return error; 341 342 xfs_bmbt_disk_get_all(&rec, &irec); 343 344 if (irec.br_startoff < next_off) 345 return -EFSCORRUPTED; 346 347 next_off = irec.br_startoff + irec.br_blockcount; 348 } 349 350 return 0; 351 } 352 353 /* Scan one AG for reverse mappings that we can turn into extent maps. */ 354 STATIC int 355 xrep_bmap_scan_ag( 356 struct xrep_bmap *rb, 357 struct xfs_perag *pag) 358 { 359 struct xfs_scrub *sc = rb->sc; 360 int error; 361 362 error = xrep_ag_init(sc, pag, &sc->sa); 363 if (error) 364 return error; 365 366 error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_bmap_walk_rmap, rb); 367 xchk_ag_free(sc, &sc->sa); 368 return error; 369 } 370 371 #ifdef CONFIG_XFS_RT 372 /* Check for any obvious errors or conflicts in the file mapping. */ 373 STATIC int 374 xrep_bmap_check_rtfork_rmap( 375 struct xfs_scrub *sc, 376 struct xfs_btree_cur *cur, 377 const struct xfs_rmap_irec *rec) 378 { 379 /* xattr extents are never stored on realtime devices */ 380 if (rec->rm_flags & XFS_RMAP_ATTR_FORK) 381 return -EFSCORRUPTED; 382 383 /* bmbt blocks are never stored on realtime devices */ 384 if (rec->rm_flags & XFS_RMAP_BMBT_BLOCK) 385 return -EFSCORRUPTED; 386 387 /* Data extents for non-rt files are never stored on the rt device. */ 388 if (!XFS_IS_REALTIME_INODE(sc->ip)) 389 return -EFSCORRUPTED; 390 391 /* Check the file offsets and physical extents. */ 392 if (!xfs_verify_fileext(sc->mp, rec->rm_offset, rec->rm_blockcount)) 393 return -EFSCORRUPTED; 394 395 /* Check that this is within the rtgroup. */ 396 if (!xfs_verify_rgbext(to_rtg(cur->bc_group), rec->rm_startblock, 397 rec->rm_blockcount)) 398 return -EFSCORRUPTED; 399 400 /* Make sure this isn't free space. */ 401 return xrep_require_rtext_inuse(sc, rec->rm_startblock, 402 rec->rm_blockcount); 403 } 404 405 /* Record realtime extents that belong to this inode's fork. */ 406 STATIC int 407 xrep_bmap_walk_rtrmap( 408 struct xfs_btree_cur *cur, 409 const struct xfs_rmap_irec *rec, 410 void *priv) 411 { 412 struct xrep_bmap *rb = priv; 413 int error = 0; 414 415 if (xchk_should_terminate(rb->sc, &error)) 416 return error; 417 418 /* Skip extents which are not owned by this inode and fork. */ 419 if (rec->rm_owner != rb->sc->ip->i_ino) 420 return 0; 421 422 error = xrep_bmap_check_rtfork_rmap(rb->sc, cur, rec); 423 if (error) 424 return error; 425 426 /* 427 * Record all blocks allocated to this file even if the extent isn't 428 * for the fork we're rebuilding so that we can reset di_nblocks later. 429 */ 430 rb->nblocks += rec->rm_blockcount; 431 432 /* If this rmap isn't for the fork we want, we're done. */ 433 if (rb->whichfork == XFS_DATA_FORK && 434 (rec->rm_flags & XFS_RMAP_ATTR_FORK)) 435 return 0; 436 if (rb->whichfork == XFS_ATTR_FORK && 437 !(rec->rm_flags & XFS_RMAP_ATTR_FORK)) 438 return 0; 439 440 return xrep_bmap_from_rmap(rb, rec->rm_offset, 441 xfs_rgbno_to_rtb(to_rtg(cur->bc_group), 442 rec->rm_startblock), 443 rec->rm_blockcount, 444 rec->rm_flags & XFS_RMAP_UNWRITTEN); 445 } 446 447 /* Scan the realtime reverse mappings to build the new extent map. */ 448 STATIC int 449 xrep_bmap_scan_rtgroup( 450 struct xrep_bmap *rb, 451 struct xfs_rtgroup *rtg) 452 { 453 struct xfs_scrub *sc = rb->sc; 454 int error; 455 456 if (!xfs_has_rtrmapbt(sc->mp)) 457 return 0; 458 459 error = xrep_rtgroup_init(sc, rtg, &sc->sr, 460 XFS_RTGLOCK_RMAP | 461 XFS_RTGLOCK_REFCOUNT | 462 XFS_RTGLOCK_BITMAP_SHARED); 463 if (error) 464 return error; 465 466 error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_bmap_walk_rtrmap, rb); 467 xchk_rtgroup_btcur_free(&sc->sr); 468 xchk_rtgroup_free(sc, &sc->sr); 469 return error; 470 } 471 #else 472 static inline int 473 xrep_bmap_scan_rtgroup(struct xrep_bmap *rb, struct xfs_rtgroup *rtg) 474 { 475 return -EFSCORRUPTED; 476 } 477 #endif 478 479 /* Find the delalloc extents from the old incore extent tree. */ 480 STATIC int 481 xrep_bmap_find_delalloc( 482 struct xrep_bmap *rb) 483 { 484 struct xfs_bmbt_irec irec; 485 struct xfs_iext_cursor icur; 486 struct xfs_bmbt_rec rbe; 487 struct xfs_inode *ip = rb->sc->ip; 488 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, rb->whichfork); 489 int error = 0; 490 491 /* 492 * Skip this scan if we don't expect to find delayed allocation 493 * reservations in this fork. 494 */ 495 if (rb->whichfork == XFS_ATTR_FORK || ip->i_delayed_blks == 0) 496 return 0; 497 498 for_each_xfs_iext(ifp, &icur, &irec) { 499 if (!isnullstartblock(irec.br_startblock)) 500 continue; 501 502 xfs_bmbt_disk_set_all(&rbe, &irec); 503 504 trace_xrep_bmap_found(ip, rb->whichfork, &irec); 505 506 if (xchk_should_terminate(rb->sc, &error)) 507 return error; 508 509 error = xfarray_append(rb->bmap_records, &rbe); 510 if (error) 511 return error; 512 } 513 514 return 0; 515 } 516 517 /* 518 * Collect block mappings for this fork of this inode and decide if we have 519 * enough space to rebuild. Caller is responsible for cleaning up the list if 520 * anything goes wrong. 521 */ 522 STATIC int 523 xrep_bmap_find_mappings( 524 struct xrep_bmap *rb) 525 { 526 struct xfs_scrub *sc = rb->sc; 527 struct xfs_perag *pag = NULL; 528 int error = 0; 529 530 /* 531 * Iterate the rtrmaps for extents. Metadata files never have content 532 * on the realtime device, so there's no need to scan them. 533 */ 534 if (!xfs_is_metadir_inode(sc->ip)) { 535 struct xfs_rtgroup *rtg = NULL; 536 537 while ((rtg = xfs_rtgroup_next(sc->mp, rtg))) { 538 error = xrep_bmap_scan_rtgroup(rb, rtg); 539 if (error) { 540 xfs_rtgroup_rele(rtg); 541 return error; 542 } 543 } 544 } 545 546 /* Iterate the rmaps for extents. */ 547 while ((pag = xfs_perag_next(sc->mp, pag))) { 548 error = xrep_bmap_scan_ag(rb, pag); 549 if (error) { 550 xfs_perag_rele(pag); 551 return error; 552 } 553 } 554 555 return xrep_bmap_find_delalloc(rb); 556 } 557 558 /* Retrieve real extent mappings for bulk loading the bmap btree. */ 559 STATIC int 560 xrep_bmap_get_records( 561 struct xfs_btree_cur *cur, 562 unsigned int idx, 563 struct xfs_btree_block *block, 564 unsigned int nr_wanted, 565 void *priv) 566 { 567 struct xfs_bmbt_rec rec; 568 struct xfs_bmbt_irec *irec = &cur->bc_rec.b; 569 struct xrep_bmap *rb = priv; 570 union xfs_btree_rec *block_rec; 571 unsigned int loaded; 572 int error; 573 574 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { 575 do { 576 error = xfarray_load(rb->bmap_records, rb->array_cur++, 577 &rec); 578 if (error) 579 return error; 580 581 xfs_bmbt_disk_get_all(&rec, irec); 582 } while (isnullstartblock(irec->br_startblock)); 583 584 block_rec = xfs_btree_rec_addr(cur, idx, block); 585 cur->bc_ops->init_rec_from_cur(cur, block_rec); 586 } 587 588 return loaded; 589 } 590 591 /* Feed one of the new btree blocks to the bulk loader. */ 592 STATIC int 593 xrep_bmap_claim_block( 594 struct xfs_btree_cur *cur, 595 union xfs_btree_ptr *ptr, 596 void *priv) 597 { 598 struct xrep_bmap *rb = priv; 599 600 return xrep_newbt_claim_block(cur, &rb->new_bmapbt, ptr); 601 } 602 603 /* Figure out how much space we need to create the incore btree root block. */ 604 STATIC size_t 605 xrep_bmap_iroot_size( 606 struct xfs_btree_cur *cur, 607 unsigned int level, 608 unsigned int nr_this_level, 609 void *priv) 610 { 611 ASSERT(level > 0); 612 613 return xfs_bmap_broot_space_calc(cur->bc_mp, nr_this_level); 614 } 615 616 /* Update the inode counters. */ 617 STATIC int 618 xrep_bmap_reset_counters( 619 struct xrep_bmap *rb) 620 { 621 struct xfs_scrub *sc = rb->sc; 622 struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake; 623 int64_t delta; 624 625 if (rb->reflink_scan == RLS_SET_IFLAG) 626 sc->ip->i_diflags2 |= XFS_DIFLAG2_REFLINK; 627 628 /* 629 * Update the inode block counts to reflect the extents we found in the 630 * rmapbt. 631 */ 632 delta = ifake->if_blocks - rb->old_bmbt_block_count; 633 sc->ip->i_nblocks = rb->nblocks + delta; 634 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); 635 636 /* 637 * Adjust the quota counts by the difference in size between the old 638 * and new bmbt. 639 */ 640 xfs_trans_mod_dquot_byino(sc->tp, sc->ip, XFS_TRANS_DQ_BCOUNT, delta); 641 return 0; 642 } 643 644 /* 645 * Create a new iext tree and load it with block mappings. If the inode is 646 * in extents format, that's all we need to do to commit the new mappings. 647 * If it is in btree format, this takes care of preloading the incore tree. 648 */ 649 STATIC int 650 xrep_bmap_extents_load( 651 struct xrep_bmap *rb) 652 { 653 struct xfs_iext_cursor icur; 654 struct xfs_bmbt_irec irec; 655 struct xfs_ifork *ifp = rb->new_bmapbt.ifake.if_fork; 656 xfarray_idx_t array_cur; 657 int error; 658 659 ASSERT(ifp->if_bytes == 0); 660 661 /* Add all the mappings (incl. delalloc) to the incore extent tree. */ 662 xfs_iext_first(ifp, &icur); 663 foreach_xfarray_idx(rb->bmap_records, array_cur) { 664 struct xfs_bmbt_rec rec; 665 666 error = xfarray_load(rb->bmap_records, array_cur, &rec); 667 if (error) 668 return error; 669 670 xfs_bmbt_disk_get_all(&rec, &irec); 671 672 xfs_iext_insert_raw(ifp, &icur, &irec); 673 if (!isnullstartblock(irec.br_startblock)) 674 ifp->if_nextents++; 675 676 xfs_iext_next(ifp, &icur); 677 } 678 679 return xrep_ino_ensure_extent_count(rb->sc, rb->whichfork, 680 ifp->if_nextents); 681 } 682 683 /* 684 * Reserve new btree blocks, bulk load the bmap records into the ondisk btree, 685 * and load the incore extent tree. 686 */ 687 STATIC int 688 xrep_bmap_btree_load( 689 struct xrep_bmap *rb, 690 struct xfs_btree_cur *bmap_cur) 691 { 692 struct xfs_scrub *sc = rb->sc; 693 int error; 694 695 /* Compute how many blocks we'll need. */ 696 error = xfs_btree_bload_compute_geometry(bmap_cur, 697 &rb->new_bmapbt.bload, rb->real_mappings); 698 if (error) 699 return error; 700 701 /* Last chance to abort before we start committing fixes. */ 702 if (xchk_should_terminate(sc, &error)) 703 return error; 704 705 /* 706 * Guess how many blocks we're going to need to rebuild an entire bmap 707 * from the number of extents we found, and pump up our transaction to 708 * have sufficient block reservation. We're allowed to exceed file 709 * quota to repair inconsistent metadata. 710 */ 711 error = xfs_trans_reserve_more_inode(sc->tp, sc->ip, 712 rb->new_bmapbt.bload.nr_blocks, 0, true); 713 if (error) 714 return error; 715 716 /* Reserve the space we'll need for the new btree. */ 717 error = xrep_newbt_alloc_blocks(&rb->new_bmapbt, 718 rb->new_bmapbt.bload.nr_blocks); 719 if (error) 720 return error; 721 722 /* Add all observed bmap records. */ 723 rb->array_cur = XFARRAY_CURSOR_INIT; 724 error = xfs_btree_bload(bmap_cur, &rb->new_bmapbt.bload, rb); 725 if (error) 726 return error; 727 728 /* 729 * Load the new bmap records into the new incore extent tree to 730 * preserve delalloc reservations for regular files. The directory 731 * code loads the extent tree during xfs_dir_open and assumes 732 * thereafter that it remains loaded, so we must not violate that 733 * assumption. 734 */ 735 return xrep_bmap_extents_load(rb); 736 } 737 738 /* 739 * Use the collected bmap information to stage a new bmap fork. If this is 740 * successful we'll return with the new fork information logged to the repair 741 * transaction but not yet committed. The caller must ensure that the inode 742 * is joined to the transaction; the inode will be joined to a clean 743 * transaction when the function returns. 744 */ 745 STATIC int 746 xrep_bmap_build_new_fork( 747 struct xrep_bmap *rb) 748 { 749 struct xfs_owner_info oinfo; 750 struct xfs_scrub *sc = rb->sc; 751 struct xfs_btree_cur *bmap_cur; 752 struct xbtree_ifakeroot *ifake = &rb->new_bmapbt.ifake; 753 int error; 754 755 error = xrep_bmap_sort_records(rb); 756 if (error) 757 return error; 758 759 /* 760 * Prepare to construct the new fork by initializing the new btree 761 * structure and creating a fake ifork in the ifakeroot structure. 762 */ 763 xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork); 764 error = xrep_newbt_init_inode(&rb->new_bmapbt, sc, rb->whichfork, 765 &oinfo); 766 if (error) 767 return error; 768 769 rb->new_bmapbt.bload.get_records = xrep_bmap_get_records; 770 rb->new_bmapbt.bload.claim_block = xrep_bmap_claim_block; 771 rb->new_bmapbt.bload.iroot_size = xrep_bmap_iroot_size; 772 773 /* 774 * Allocate a new bmap btree cursor for reloading an inode block mapping 775 * data structure. 776 */ 777 bmap_cur = xfs_bmbt_init_cursor(sc->mp, NULL, sc->ip, XFS_STAGING_FORK); 778 xfs_btree_stage_ifakeroot(bmap_cur, ifake); 779 780 /* 781 * Figure out the size and format of the new fork, then fill it with 782 * all the bmap records we've found. Join the inode to the transaction 783 * so that we can roll the transaction while holding the inode locked. 784 */ 785 if (rb->real_mappings <= XFS_IFORK_MAXEXT(sc->ip, rb->whichfork)) { 786 ifake->if_fork->if_format = XFS_DINODE_FMT_EXTENTS; 787 error = xrep_bmap_extents_load(rb); 788 } else { 789 ifake->if_fork->if_format = XFS_DINODE_FMT_BTREE; 790 error = xrep_bmap_btree_load(rb, bmap_cur); 791 } 792 if (error) 793 goto err_cur; 794 795 /* 796 * Install the new fork in the inode. After this point the old mapping 797 * data are no longer accessible and the new tree is live. We delete 798 * the cursor immediately after committing the staged root because the 799 * staged fork might be in extents format. 800 */ 801 xfs_bmbt_commit_staged_btree(bmap_cur, sc->tp, rb->whichfork); 802 xfs_btree_del_cursor(bmap_cur, 0); 803 804 /* Reset the inode counters now that we've changed the fork. */ 805 error = xrep_bmap_reset_counters(rb); 806 if (error) 807 goto err_newbt; 808 809 /* Dispose of any unused blocks and the accounting information. */ 810 error = xrep_newbt_commit(&rb->new_bmapbt); 811 if (error) 812 return error; 813 814 return xrep_roll_trans(sc); 815 816 err_cur: 817 if (bmap_cur) 818 xfs_btree_del_cursor(bmap_cur, error); 819 err_newbt: 820 xrep_newbt_cancel(&rb->new_bmapbt); 821 return error; 822 } 823 824 /* 825 * Now that we've logged the new inode btree, invalidate all of the old blocks 826 * and free them, if there were any. 827 */ 828 STATIC int 829 xrep_bmap_remove_old_tree( 830 struct xrep_bmap *rb) 831 { 832 struct xfs_scrub *sc = rb->sc; 833 struct xfs_owner_info oinfo; 834 835 /* Free the old bmbt blocks if they're not in use. */ 836 xfs_rmap_ino_bmbt_owner(&oinfo, sc->ip->i_ino, rb->whichfork); 837 return xrep_reap_fsblocks(sc, &rb->old_bmbt_blocks, &oinfo); 838 } 839 840 /* Check for garbage inputs. Returns -ECANCELED if there's nothing to do. */ 841 STATIC int 842 xrep_bmap_check_inputs( 843 struct xfs_scrub *sc, 844 int whichfork) 845 { 846 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, whichfork); 847 848 ASSERT(whichfork == XFS_DATA_FORK || whichfork == XFS_ATTR_FORK); 849 850 if (!xfs_has_rmapbt(sc->mp)) 851 return -EOPNOTSUPP; 852 853 /* No fork means nothing to rebuild. */ 854 if (!ifp) 855 return -ECANCELED; 856 857 /* 858 * We only know how to repair extent mappings, which is to say that we 859 * only support extents and btree fork format. Repairs to a local 860 * format fork require a higher level repair function, so we do not 861 * have any work to do here. 862 */ 863 switch (ifp->if_format) { 864 case XFS_DINODE_FMT_DEV: 865 case XFS_DINODE_FMT_LOCAL: 866 case XFS_DINODE_FMT_UUID: 867 case XFS_DINODE_FMT_META_BTREE: 868 return -ECANCELED; 869 case XFS_DINODE_FMT_EXTENTS: 870 case XFS_DINODE_FMT_BTREE: 871 break; 872 default: 873 return -EFSCORRUPTED; 874 } 875 876 if (whichfork == XFS_ATTR_FORK) 877 return 0; 878 879 /* Only files, symlinks, and directories get to have data forks. */ 880 switch (VFS_I(sc->ip)->i_mode & S_IFMT) { 881 case S_IFREG: 882 case S_IFDIR: 883 case S_IFLNK: 884 /* ok */ 885 break; 886 default: 887 return -EINVAL; 888 } 889 890 return 0; 891 } 892 893 /* Set up the initial state of the reflink scan. */ 894 static inline enum reflink_scan_state 895 xrep_bmap_init_reflink_scan( 896 struct xfs_scrub *sc, 897 int whichfork) 898 { 899 /* cannot share on non-reflink filesystem */ 900 if (!xfs_has_reflink(sc->mp)) 901 return RLS_IRRELEVANT; 902 903 /* preserve flag if it's already set */ 904 if (xfs_is_reflink_inode(sc->ip)) 905 return RLS_SET_IFLAG; 906 907 /* can only share regular files */ 908 if (!S_ISREG(VFS_I(sc->ip)->i_mode)) 909 return RLS_IRRELEVANT; 910 911 /* cannot share attr fork extents */ 912 if (whichfork != XFS_DATA_FORK) 913 return RLS_IRRELEVANT; 914 915 return RLS_UNKNOWN; 916 } 917 918 /* Repair an inode fork. */ 919 int 920 xrep_bmap( 921 struct xfs_scrub *sc, 922 int whichfork, 923 bool allow_unwritten) 924 { 925 struct xrep_bmap *rb; 926 char *descr; 927 xfs_extnum_t max_bmbt_recs; 928 bool large_extcount; 929 int error = 0; 930 931 error = xrep_bmap_check_inputs(sc, whichfork); 932 if (error == -ECANCELED) 933 return 0; 934 if (error) 935 return error; 936 937 rb = kzalloc(sizeof(struct xrep_bmap), XCHK_GFP_FLAGS); 938 if (!rb) 939 return -ENOMEM; 940 rb->sc = sc; 941 rb->whichfork = whichfork; 942 rb->reflink_scan = xrep_bmap_init_reflink_scan(sc, whichfork); 943 rb->allow_unwritten = allow_unwritten; 944 945 /* Set up enough storage to handle the max records for this fork. */ 946 large_extcount = xfs_has_large_extent_counts(sc->mp); 947 max_bmbt_recs = xfs_iext_max_nextents(large_extcount, whichfork); 948 descr = xchk_xfile_ino_descr(sc, "%s fork mapping records", 949 whichfork == XFS_DATA_FORK ? "data" : "attr"); 950 error = xfarray_create(descr, max_bmbt_recs, 951 sizeof(struct xfs_bmbt_rec), &rb->bmap_records); 952 kfree(descr); 953 if (error) 954 goto out_rb; 955 956 /* Collect all reverse mappings for this fork's extents. */ 957 xfsb_bitmap_init(&rb->old_bmbt_blocks); 958 error = xrep_bmap_find_mappings(rb); 959 if (error) 960 goto out_bitmap; 961 962 xfs_trans_ijoin(sc->tp, sc->ip, 0); 963 964 /* Rebuild the bmap information. */ 965 error = xrep_bmap_build_new_fork(rb); 966 if (error) 967 goto out_bitmap; 968 969 /* Kill the old tree. */ 970 error = xrep_bmap_remove_old_tree(rb); 971 if (error) 972 goto out_bitmap; 973 974 out_bitmap: 975 xfsb_bitmap_destroy(&rb->old_bmbt_blocks); 976 xfarray_destroy(rb->bmap_records); 977 out_rb: 978 kfree(rb); 979 return error; 980 } 981 982 /* Repair an inode's data fork. */ 983 int 984 xrep_bmap_data( 985 struct xfs_scrub *sc) 986 { 987 return xrep_bmap(sc, XFS_DATA_FORK, true); 988 } 989 990 /* Repair an inode's attr fork. */ 991 int 992 xrep_bmap_attr( 993 struct xfs_scrub *sc) 994 { 995 return xrep_bmap(sc, XFS_ATTR_FORK, false); 996 } 997