1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2020-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs_platform.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_btree_staging.h" 15 #include "xfs_buf_mem.h" 16 #include "xfs_btree_mem.h" 17 #include "xfs_bit.h" 18 #include "xfs_log_format.h" 19 #include "xfs_trans.h" 20 #include "xfs_sb.h" 21 #include "xfs_alloc.h" 22 #include "xfs_rmap.h" 23 #include "xfs_rmap_btree.h" 24 #include "xfs_rtrmap_btree.h" 25 #include "xfs_inode.h" 26 #include "xfs_icache.h" 27 #include "xfs_bmap.h" 28 #include "xfs_bmap_btree.h" 29 #include "xfs_quota.h" 30 #include "xfs_rtalloc.h" 31 #include "xfs_ag.h" 32 #include "xfs_rtgroup.h" 33 #include "xfs_refcount.h" 34 #include "scrub/xfs_scrub.h" 35 #include "scrub/scrub.h" 36 #include "scrub/common.h" 37 #include "scrub/btree.h" 38 #include "scrub/trace.h" 39 #include "scrub/repair.h" 40 #include "scrub/bitmap.h" 41 #include "scrub/fsb_bitmap.h" 42 #include "scrub/rgb_bitmap.h" 43 #include "scrub/xfile.h" 44 #include "scrub/xfarray.h" 45 #include "scrub/iscan.h" 46 #include "scrub/newbt.h" 47 #include "scrub/reap.h" 48 49 /* 50 * Realtime Reverse Mapping Btree Repair 51 * ===================================== 52 * 53 * This isn't quite as difficult as repairing the rmap btree on the data 54 * device, since we only store the data fork extents of realtime files on the 55 * realtime device. We still have to freeze the filesystem and stop the 56 * background threads like we do for the rmap repair, but we only have to scan 57 * realtime inodes. 58 * 59 * Collecting entries for the new realtime rmap btree is easy -- all we have 60 * to do is generate rtrmap entries from the data fork mappings of all realtime 61 * files in the filesystem. We then scan the rmap btrees of the data device 62 * looking for extents belonging to the old btree and note them in a bitmap. 63 * 64 * To rebuild the realtime rmap btree, we bulk-load the collected mappings into 65 * a new btree cursor and atomically swap that into the realtime inode. Then 66 * we can free the blocks from the old btree. 67 * 68 * We use the 'xrep_rtrmap' prefix for all the rmap functions. 69 */ 70 71 /* Context for collecting rmaps */ 72 struct xrep_rtrmap { 73 /* new rtrmapbt information */ 74 struct xrep_newbt new_btree; 75 76 /* lock for the xfbtree and xfile */ 77 struct mutex lock; 78 79 /* rmap records generated from primary metadata */ 80 struct xfbtree rtrmap_btree; 81 82 struct xfs_scrub *sc; 83 84 /* bitmap of old rtrmapbt blocks */ 85 struct xfsb_bitmap old_rtrmapbt_blocks; 86 87 /* Hooks into rtrmap update code. */ 88 struct xfs_rmap_hook rhook; 89 90 /* inode scan cursor */ 91 struct xchk_iscan iscan; 92 93 /* in-memory btree cursor for the ->get_blocks walk */ 94 struct xfs_btree_cur *mcur; 95 96 /* Number of records we're staging in the new btree. */ 97 uint64_t nr_records; 98 }; 99 100 /* Set us up to repair rt reverse mapping btrees. */ 101 int 102 xrep_setup_rtrmapbt( 103 struct xfs_scrub *sc) 104 { 105 struct xrep_rtrmap *rr; 106 int error; 107 108 xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP); 109 110 error = xrep_setup_xfbtree(sc, "realtime reverse mapping records"); 111 if (error) 112 return error; 113 114 rr = kzalloc(sizeof(struct xrep_rtrmap), XCHK_GFP_FLAGS); 115 if (!rr) 116 return -ENOMEM; 117 118 rr->sc = sc; 119 sc->buf = rr; 120 return 0; 121 } 122 123 /* Make sure there's nothing funny about this mapping. */ 124 STATIC int 125 xrep_rtrmap_check_mapping( 126 struct xfs_scrub *sc, 127 const struct xfs_rmap_irec *rec) 128 { 129 if (xfs_rtrmap_check_irec(sc->sr.rtg, rec) != NULL) 130 return -EFSCORRUPTED; 131 132 /* Make sure this isn't free space. */ 133 return xrep_require_rtext_inuse(sc, rec->rm_startblock, 134 rec->rm_blockcount); 135 } 136 137 /* Store a reverse-mapping record. */ 138 static inline int 139 xrep_rtrmap_stash( 140 struct xrep_rtrmap *rr, 141 xfs_rgblock_t startblock, 142 xfs_extlen_t blockcount, 143 uint64_t owner, 144 uint64_t offset, 145 unsigned int flags) 146 { 147 struct xfs_rmap_irec rmap = { 148 .rm_startblock = startblock, 149 .rm_blockcount = blockcount, 150 .rm_owner = owner, 151 .rm_offset = offset, 152 .rm_flags = flags, 153 }; 154 struct xfs_scrub *sc = rr->sc; 155 struct xfs_btree_cur *mcur; 156 int error = 0; 157 158 if (xchk_should_terminate(sc, &error)) 159 return error; 160 161 if (xchk_iscan_aborted(&rr->iscan)) 162 return -EFSCORRUPTED; 163 164 trace_xrep_rtrmap_found(sc->mp, &rmap); 165 166 /* Add entry to in-memory btree. */ 167 mutex_lock(&rr->lock); 168 mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, sc->tp, &rr->rtrmap_btree); 169 error = xfs_rmap_map_raw(mcur, &rmap); 170 xfs_btree_del_cursor(mcur, error); 171 if (error) 172 goto out_cancel; 173 174 error = xfbtree_trans_commit(&rr->rtrmap_btree, sc->tp); 175 if (error) 176 goto out_abort; 177 178 mutex_unlock(&rr->lock); 179 return 0; 180 181 out_cancel: 182 xfbtree_trans_cancel(&rr->rtrmap_btree, sc->tp); 183 out_abort: 184 xchk_iscan_abort(&rr->iscan); 185 mutex_unlock(&rr->lock); 186 return error; 187 } 188 189 /* Finding all file and bmbt extents. */ 190 191 /* Context for accumulating rmaps for an inode fork. */ 192 struct xrep_rtrmap_ifork { 193 /* 194 * Accumulate rmap data here to turn multiple adjacent bmaps into a 195 * single rmap. 196 */ 197 struct xfs_rmap_irec accum; 198 199 struct xrep_rtrmap *rr; 200 }; 201 202 /* Stash an rmap that we accumulated while walking an inode fork. */ 203 STATIC int 204 xrep_rtrmap_stash_accumulated( 205 struct xrep_rtrmap_ifork *rf) 206 { 207 if (rf->accum.rm_blockcount == 0) 208 return 0; 209 210 return xrep_rtrmap_stash(rf->rr, rf->accum.rm_startblock, 211 rf->accum.rm_blockcount, rf->accum.rm_owner, 212 rf->accum.rm_offset, rf->accum.rm_flags); 213 } 214 215 /* Accumulate a bmbt record. */ 216 STATIC int 217 xrep_rtrmap_visit_bmbt( 218 struct xfs_btree_cur *cur, 219 struct xfs_bmbt_irec *rec, 220 void *priv) 221 { 222 struct xrep_rtrmap_ifork *rf = priv; 223 struct xfs_rmap_irec *accum = &rf->accum; 224 struct xfs_mount *mp = rf->rr->sc->mp; 225 xfs_rgblock_t rgbno; 226 unsigned int rmap_flags = 0; 227 int error; 228 229 if (xfs_rtb_to_rgno(mp, rec->br_startblock) != 230 rtg_rgno(rf->rr->sc->sr.rtg)) 231 return 0; 232 233 if (rec->br_state == XFS_EXT_UNWRITTEN) 234 rmap_flags |= XFS_RMAP_UNWRITTEN; 235 236 /* If this bmap is adjacent to the previous one, just add it. */ 237 rgbno = xfs_rtb_to_rgbno(mp, rec->br_startblock); 238 if (accum->rm_blockcount > 0 && 239 rec->br_startoff == accum->rm_offset + accum->rm_blockcount && 240 rgbno == accum->rm_startblock + accum->rm_blockcount && 241 rmap_flags == accum->rm_flags) { 242 accum->rm_blockcount += rec->br_blockcount; 243 return 0; 244 } 245 246 /* Otherwise stash the old rmap and start accumulating a new one. */ 247 error = xrep_rtrmap_stash_accumulated(rf); 248 if (error) 249 return error; 250 251 accum->rm_startblock = rgbno; 252 accum->rm_blockcount = rec->br_blockcount; 253 accum->rm_offset = rec->br_startoff; 254 accum->rm_flags = rmap_flags; 255 return 0; 256 } 257 258 /* 259 * Iterate the block mapping btree to collect rmap records for anything in this 260 * fork that maps to the rt volume. Sets @mappings_done to true if we've 261 * scanned the block mappings in this fork. 262 */ 263 STATIC int 264 xrep_rtrmap_scan_bmbt( 265 struct xrep_rtrmap_ifork *rf, 266 struct xfs_inode *ip, 267 bool *mappings_done) 268 { 269 struct xrep_rtrmap *rr = rf->rr; 270 struct xfs_btree_cur *cur; 271 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); 272 int error = 0; 273 274 *mappings_done = false; 275 276 /* 277 * If the incore extent cache is already loaded, we'll just use the 278 * incore extent scanner to record mappings. Don't bother walking the 279 * ondisk extent tree. 280 */ 281 if (!xfs_need_iread_extents(ifp)) 282 return 0; 283 284 /* Accumulate all the mappings in the bmap btree. */ 285 cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, XFS_DATA_FORK); 286 error = xfs_bmap_query_all(cur, xrep_rtrmap_visit_bmbt, rf); 287 xfs_btree_del_cursor(cur, error); 288 if (error) 289 return error; 290 291 /* Stash any remaining accumulated rmaps and exit. */ 292 *mappings_done = true; 293 return xrep_rtrmap_stash_accumulated(rf); 294 } 295 296 /* 297 * Iterate the in-core extent cache to collect rmap records for anything in 298 * this fork that matches the AG. 299 */ 300 STATIC int 301 xrep_rtrmap_scan_iext( 302 struct xrep_rtrmap_ifork *rf, 303 struct xfs_ifork *ifp) 304 { 305 struct xfs_bmbt_irec rec; 306 struct xfs_iext_cursor icur; 307 int error; 308 309 for_each_xfs_iext(ifp, &icur, &rec) { 310 if (isnullstartblock(rec.br_startblock)) 311 continue; 312 error = xrep_rtrmap_visit_bmbt(NULL, &rec, rf); 313 if (error) 314 return error; 315 } 316 317 return xrep_rtrmap_stash_accumulated(rf); 318 } 319 320 /* Find all the extents on the realtime device mapped by an inode fork. */ 321 STATIC int 322 xrep_rtrmap_scan_dfork( 323 struct xrep_rtrmap *rr, 324 struct xfs_inode *ip) 325 { 326 struct xrep_rtrmap_ifork rf = { 327 .accum = { .rm_owner = ip->i_ino, }, 328 .rr = rr, 329 }; 330 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); 331 int error = 0; 332 333 if (ifp->if_format == XFS_DINODE_FMT_BTREE) { 334 bool mappings_done; 335 336 /* 337 * Scan the bmbt for mappings. If the incore extent tree is 338 * loaded, we want to scan the cached mappings since that's 339 * faster when the extent counts are very high. 340 */ 341 error = xrep_rtrmap_scan_bmbt(&rf, ip, &mappings_done); 342 if (error || mappings_done) 343 return error; 344 } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) { 345 /* realtime data forks should only be extents or btree */ 346 return -EFSCORRUPTED; 347 } 348 349 /* Scan incore extent cache. */ 350 return xrep_rtrmap_scan_iext(&rf, ifp); 351 } 352 353 /* Record reverse mappings for a file. */ 354 STATIC int 355 xrep_rtrmap_scan_inode( 356 struct xrep_rtrmap *rr, 357 struct xfs_inode *ip) 358 { 359 unsigned int lock_mode; 360 int error = 0; 361 362 /* Skip the rt rmap btree inode. */ 363 if (rr->sc->ip == ip) 364 return 0; 365 366 lock_mode = xfs_ilock_data_map_shared(ip); 367 368 /* Check the data fork if it's on the realtime device. */ 369 if (XFS_IS_REALTIME_INODE(ip)) { 370 error = xrep_rtrmap_scan_dfork(rr, ip); 371 if (error) 372 goto out_unlock; 373 } 374 375 xchk_iscan_mark_visited(&rr->iscan, ip); 376 out_unlock: 377 xfs_iunlock(ip, lock_mode); 378 return error; 379 } 380 381 /* Record extents that belong to the realtime rmap inode. */ 382 STATIC int 383 xrep_rtrmap_walk_rmap( 384 struct xfs_btree_cur *cur, 385 const struct xfs_rmap_irec *rec, 386 void *priv) 387 { 388 struct xrep_rtrmap *rr = priv; 389 int error = 0; 390 391 if (xchk_should_terminate(rr->sc, &error)) 392 return error; 393 394 /* Skip extents which are not owned by this inode and fork. */ 395 if (rec->rm_owner != rr->sc->ip->i_ino) 396 return 0; 397 398 error = xrep_check_ino_btree_mapping(rr->sc, rec); 399 if (error) 400 return error; 401 402 return xfsb_bitmap_set(&rr->old_rtrmapbt_blocks, 403 xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock), 404 rec->rm_blockcount); 405 } 406 407 /* Scan one AG for reverse mappings for the realtime rmap btree. */ 408 STATIC int 409 xrep_rtrmap_scan_ag( 410 struct xrep_rtrmap *rr, 411 struct xfs_perag *pag) 412 { 413 struct xfs_scrub *sc = rr->sc; 414 int error; 415 416 error = xrep_ag_init(sc, pag, &sc->sa); 417 if (error) 418 return error; 419 420 error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrmap_walk_rmap, rr); 421 xchk_ag_free(sc, &sc->sa); 422 return error; 423 } 424 425 struct xrep_rtrmap_stash_run { 426 struct xrep_rtrmap *rr; 427 uint64_t owner; 428 }; 429 430 static int 431 xrep_rtrmap_stash_run( 432 uint32_t start, 433 uint32_t len, 434 void *priv) 435 { 436 struct xrep_rtrmap_stash_run *rsr = priv; 437 struct xrep_rtrmap *rr = rsr->rr; 438 xfs_rgblock_t rgbno = start; 439 440 return xrep_rtrmap_stash(rr, rgbno, len, rsr->owner, 0, 0); 441 } 442 443 /* 444 * Emit rmaps for every extent of bits set in the bitmap. Caller must ensure 445 * that the ranges are in units of FS blocks. 446 */ 447 STATIC int 448 xrep_rtrmap_stash_bitmap( 449 struct xrep_rtrmap *rr, 450 struct xrgb_bitmap *bitmap, 451 const struct xfs_owner_info *oinfo) 452 { 453 struct xrep_rtrmap_stash_run rsr = { 454 .rr = rr, 455 .owner = oinfo->oi_owner, 456 }; 457 458 return xrgb_bitmap_walk(bitmap, xrep_rtrmap_stash_run, &rsr); 459 } 460 461 /* Record a CoW staging extent. */ 462 STATIC int 463 xrep_rtrmap_walk_cowblocks( 464 struct xfs_btree_cur *cur, 465 const struct xfs_refcount_irec *irec, 466 void *priv) 467 { 468 struct xrgb_bitmap *bitmap = priv; 469 470 if (!xfs_refcount_check_domain(irec) || 471 irec->rc_domain != XFS_REFC_DOMAIN_COW) 472 return -EFSCORRUPTED; 473 474 return xrgb_bitmap_set(bitmap, irec->rc_startblock, 475 irec->rc_blockcount); 476 } 477 478 /* 479 * Collect rmaps for the blocks containing the refcount btree, and all CoW 480 * staging extents. 481 */ 482 STATIC int 483 xrep_rtrmap_find_refcount_rmaps( 484 struct xrep_rtrmap *rr) 485 { 486 struct xrgb_bitmap cow_blocks; /* COWBIT */ 487 struct xfs_refcount_irec low = { 488 .rc_startblock = 0, 489 .rc_domain = XFS_REFC_DOMAIN_COW, 490 }; 491 struct xfs_refcount_irec high = { 492 .rc_startblock = -1U, 493 .rc_domain = XFS_REFC_DOMAIN_COW, 494 }; 495 struct xfs_scrub *sc = rr->sc; 496 int error; 497 498 if (!xfs_has_rtreflink(sc->mp)) 499 return 0; 500 501 xrgb_bitmap_init(&cow_blocks); 502 503 /* Collect rmaps for CoW staging extents. */ 504 error = xfs_refcount_query_range(sc->sr.refc_cur, &low, &high, 505 xrep_rtrmap_walk_cowblocks, &cow_blocks); 506 if (error) 507 goto out_bitmap; 508 509 /* Generate rmaps for everything. */ 510 error = xrep_rtrmap_stash_bitmap(rr, &cow_blocks, &XFS_RMAP_OINFO_COW); 511 if (error) 512 goto out_bitmap; 513 514 out_bitmap: 515 xrgb_bitmap_destroy(&cow_blocks); 516 return error; 517 } 518 519 /* Count and check all collected records. */ 520 STATIC int 521 xrep_rtrmap_check_record( 522 struct xfs_btree_cur *cur, 523 const struct xfs_rmap_irec *rec, 524 void *priv) 525 { 526 struct xrep_rtrmap *rr = priv; 527 int error; 528 529 error = xrep_rtrmap_check_mapping(rr->sc, rec); 530 if (error) 531 return error; 532 533 rr->nr_records++; 534 return 0; 535 } 536 537 /* Generate all the reverse-mappings for the realtime device. */ 538 STATIC int 539 xrep_rtrmap_find_rmaps( 540 struct xrep_rtrmap *rr) 541 { 542 struct xfs_scrub *sc = rr->sc; 543 struct xfs_perag *pag = NULL; 544 struct xfs_inode *ip; 545 struct xfs_btree_cur *mcur; 546 int error; 547 548 /* Generate rmaps for the realtime superblock */ 549 if (xfs_has_rtsb(sc->mp) && rtg_rgno(rr->sc->sr.rtg) == 0) { 550 error = xrep_rtrmap_stash(rr, 0, sc->mp->m_sb.sb_rextsize, 551 XFS_RMAP_OWN_FS, 0, 0); 552 if (error) 553 return error; 554 } 555 556 /* Find CoW staging extents. */ 557 xrep_rtgroup_btcur_init(sc, &sc->sr); 558 error = xrep_rtrmap_find_refcount_rmaps(rr); 559 xchk_rtgroup_btcur_free(&sc->sr); 560 if (error) 561 return error; 562 563 /* 564 * Set up for a potentially lengthy filesystem scan by reducing our 565 * transaction resource usage for the duration. Specifically: 566 * 567 * Unlock the realtime metadata inodes and cancel the transaction to 568 * release the log grant space while we scan the filesystem. 569 * 570 * Create a new empty transaction to eliminate the possibility of the 571 * inode scan deadlocking on cyclical metadata. 572 * 573 * We pass the empty transaction to the file scanning function to avoid 574 * repeatedly cycling empty transactions. This can be done even though 575 * we take the IOLOCK to quiesce the file because empty transactions 576 * do not take sb_internal. 577 */ 578 xchk_trans_cancel(sc); 579 xchk_rtgroup_unlock(&sc->sr); 580 xchk_trans_alloc_empty(sc); 581 582 while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) { 583 error = xrep_rtrmap_scan_inode(rr, ip); 584 xchk_irele(sc, ip); 585 if (error) 586 break; 587 588 if (xchk_should_terminate(sc, &error)) 589 break; 590 } 591 xchk_iscan_iter_finish(&rr->iscan); 592 if (error) 593 return error; 594 595 /* 596 * Switch out for a real transaction and lock the RT metadata in 597 * preparation for building a new tree. 598 */ 599 xchk_trans_cancel(sc); 600 error = xchk_setup_rt(sc); 601 if (error) 602 return error; 603 error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL); 604 if (error) 605 return error; 606 607 /* 608 * If a hook failed to update the in-memory btree, we lack the data to 609 * continue the repair. 610 */ 611 if (xchk_iscan_aborted(&rr->iscan)) 612 return -EFSCORRUPTED; 613 614 /* Scan for old rtrmap blocks. */ 615 while ((pag = xfs_perag_next(sc->mp, pag))) { 616 error = xrep_rtrmap_scan_ag(rr, pag); 617 if (error) { 618 xfs_perag_rele(pag); 619 return error; 620 } 621 } 622 623 /* 624 * Now that we have everything locked again, we need to count the 625 * number of rmap records stashed in the btree. This should reflect 626 * all actively-owned rt files in the filesystem. At the same time, 627 * check all our records before we start building a new btree, which 628 * requires the rtbitmap lock. 629 */ 630 mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, NULL, &rr->rtrmap_btree); 631 rr->nr_records = 0; 632 error = xfs_rmap_query_all(mcur, xrep_rtrmap_check_record, rr); 633 xfs_btree_del_cursor(mcur, error); 634 635 return error; 636 } 637 638 /* Building the new rtrmap btree. */ 639 640 /* Retrieve rtrmapbt data for bulk load. */ 641 STATIC int 642 xrep_rtrmap_get_records( 643 struct xfs_btree_cur *cur, 644 unsigned int idx, 645 struct xfs_btree_block *block, 646 unsigned int nr_wanted, 647 void *priv) 648 { 649 struct xrep_rtrmap *rr = priv; 650 union xfs_btree_rec *block_rec; 651 unsigned int loaded; 652 int error; 653 654 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { 655 int stat = 0; 656 657 error = xfs_btree_increment(rr->mcur, 0, &stat); 658 if (error) 659 return error; 660 if (!stat) 661 return -EFSCORRUPTED; 662 663 error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat); 664 if (error) 665 return error; 666 if (!stat) 667 return -EFSCORRUPTED; 668 669 block_rec = xfs_btree_rec_addr(cur, idx, block); 670 cur->bc_ops->init_rec_from_cur(cur, block_rec); 671 } 672 673 return loaded; 674 } 675 676 /* Feed one of the new btree blocks to the bulk loader. */ 677 STATIC int 678 xrep_rtrmap_claim_block( 679 struct xfs_btree_cur *cur, 680 union xfs_btree_ptr *ptr, 681 void *priv) 682 { 683 struct xrep_rtrmap *rr = priv; 684 685 return xrep_newbt_claim_block(cur, &rr->new_btree, ptr); 686 } 687 688 /* Figure out how much space we need to create the incore btree root block. */ 689 STATIC size_t 690 xrep_rtrmap_iroot_size( 691 struct xfs_btree_cur *cur, 692 unsigned int level, 693 unsigned int nr_this_level, 694 void *priv) 695 { 696 return xfs_rtrmap_broot_space_calc(cur->bc_mp, level, nr_this_level); 697 } 698 699 /* 700 * Use the collected rmap information to stage a new rmap btree. If this is 701 * successful we'll return with the new btree root information logged to the 702 * repair transaction but not yet committed. This implements section (III) 703 * above. 704 */ 705 STATIC int 706 xrep_rtrmap_build_new_tree( 707 struct xrep_rtrmap *rr) 708 { 709 struct xfs_scrub *sc = rr->sc; 710 struct xfs_rtgroup *rtg = sc->sr.rtg; 711 struct xfs_btree_cur *rmap_cur; 712 int error; 713 714 /* 715 * Prepare to construct the new btree by reserving disk space for the 716 * new btree and setting up all the accounting information we'll need 717 * to root the new btree while it's under construction and before we 718 * attach it to the realtime rmapbt inode. 719 */ 720 error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc); 721 if (error) 722 return error; 723 724 rr->new_btree.bload.get_records = xrep_rtrmap_get_records; 725 rr->new_btree.bload.claim_block = xrep_rtrmap_claim_block; 726 rr->new_btree.bload.iroot_size = xrep_rtrmap_iroot_size; 727 728 rmap_cur = xfs_rtrmapbt_init_cursor(NULL, rtg); 729 xfs_btree_stage_ifakeroot(rmap_cur, &rr->new_btree.ifake); 730 731 /* Compute how many blocks we'll need for the rmaps collected. */ 732 error = xfs_btree_bload_compute_geometry(rmap_cur, 733 &rr->new_btree.bload, rr->nr_records); 734 if (error) 735 goto err_cur; 736 737 /* Last chance to abort before we start committing fixes. */ 738 if (xchk_should_terminate(sc, &error)) 739 goto err_cur; 740 741 /* 742 * Guess how many blocks we're going to need to rebuild an entire 743 * rtrmapbt from the number of extents we found, and pump up our 744 * transaction to have sufficient block reservation. We're allowed 745 * to exceed quota to repair inconsistent metadata, though this is 746 * unlikely. 747 */ 748 error = xfs_trans_reserve_more_inode(sc->tp, rtg_rmap(rtg), 749 rr->new_btree.bload.nr_blocks, 0, true); 750 if (error) 751 goto err_cur; 752 753 /* Reserve the space we'll need for the new btree. */ 754 error = xrep_newbt_alloc_blocks(&rr->new_btree, 755 rr->new_btree.bload.nr_blocks); 756 if (error) 757 goto err_cur; 758 759 /* 760 * Create a cursor to the in-memory btree so that we can bulk load the 761 * new btree. 762 */ 763 rr->mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, NULL, &rr->rtrmap_btree); 764 error = xfs_btree_goto_left_edge(rr->mcur); 765 if (error) 766 goto err_mcur; 767 768 /* Add all observed rmap records. */ 769 rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE; 770 error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr); 771 if (error) 772 goto err_mcur; 773 774 /* 775 * Install the new rtrmap btree in the inode. After this point the old 776 * btree is no longer accessible, the new tree is live, and we can 777 * delete the cursor. 778 */ 779 xfs_rtrmapbt_commit_staged_btree(rmap_cur, sc->tp); 780 xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks); 781 xfs_btree_del_cursor(rmap_cur, 0); 782 xfs_btree_del_cursor(rr->mcur, 0); 783 rr->mcur = NULL; 784 785 /* 786 * Now that we've written the new btree to disk, we don't need to keep 787 * updating the in-memory btree. Abort the scan to stop live updates. 788 */ 789 xchk_iscan_abort(&rr->iscan); 790 791 /* Dispose of any unused blocks and the accounting information. */ 792 error = xrep_newbt_commit(&rr->new_btree); 793 if (error) 794 return error; 795 796 return xrep_roll_trans(sc); 797 798 err_mcur: 799 xfs_btree_del_cursor(rr->mcur, error); 800 err_cur: 801 xfs_btree_del_cursor(rmap_cur, error); 802 xrep_newbt_cancel(&rr->new_btree); 803 return error; 804 } 805 806 /* Reaping the old btree. */ 807 808 static inline bool 809 xrep_rtrmapbt_want_live_update( 810 struct xchk_iscan *iscan, 811 const struct xfs_owner_info *oi) 812 { 813 if (xchk_iscan_aborted(iscan)) 814 return false; 815 816 /* 817 * We scanned the CoW staging extents before we started the iscan, so 818 * we need all the updates. 819 */ 820 if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner)) 821 return true; 822 823 /* Ignore updates to files that the scanner hasn't visited yet. */ 824 return xchk_iscan_want_live_update(iscan, oi->oi_owner); 825 } 826 827 /* 828 * Apply a rtrmapbt update from the regular filesystem into our shadow btree. 829 * We're running from the thread that owns the rtrmap ILOCK and is generating 830 * the update, so we must be careful about which parts of the struct 831 * xrep_rtrmap that we change. 832 */ 833 static int 834 xrep_rtrmapbt_live_update( 835 struct notifier_block *nb, 836 unsigned long action, 837 void *data) 838 { 839 struct xfs_rmap_update_params *p = data; 840 struct xrep_rtrmap *rr; 841 struct xfs_mount *mp; 842 struct xfs_btree_cur *mcur; 843 struct xfs_trans *tp; 844 int error; 845 846 rr = container_of(nb, struct xrep_rtrmap, rhook.rmap_hook.nb); 847 mp = rr->sc->mp; 848 849 if (!xrep_rtrmapbt_want_live_update(&rr->iscan, &p->oinfo)) 850 goto out_unlock; 851 852 trace_xrep_rmap_live_update(rtg_group(rr->sc->sr.rtg), action, p); 853 854 tp = xfs_trans_alloc_empty(mp); 855 856 mutex_lock(&rr->lock); 857 mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, tp, &rr->rtrmap_btree); 858 error = __xfs_rmap_finish_intent(mcur, action, p->startblock, 859 p->blockcount, &p->oinfo, p->unwritten); 860 xfs_btree_del_cursor(mcur, error); 861 if (error) 862 goto out_cancel; 863 864 error = xfbtree_trans_commit(&rr->rtrmap_btree, tp); 865 if (error) 866 goto out_cancel; 867 868 xfs_trans_cancel(tp); 869 mutex_unlock(&rr->lock); 870 return NOTIFY_DONE; 871 872 out_cancel: 873 xfbtree_trans_cancel(&rr->rtrmap_btree, tp); 874 xfs_trans_cancel(tp); 875 xchk_iscan_abort(&rr->iscan); 876 mutex_unlock(&rr->lock); 877 out_unlock: 878 return NOTIFY_DONE; 879 } 880 881 /* Set up the filesystem scan components. */ 882 STATIC int 883 xrep_rtrmap_setup_scan( 884 struct xrep_rtrmap *rr) 885 { 886 struct xfs_scrub *sc = rr->sc; 887 int error; 888 889 mutex_init(&rr->lock); 890 xfsb_bitmap_init(&rr->old_rtrmapbt_blocks); 891 892 /* Set up some storage */ 893 error = xfs_rtrmapbt_mem_init(sc->mp, &rr->rtrmap_btree, sc->xmbtp, 894 rtg_rgno(sc->sr.rtg)); 895 if (error) 896 goto out_bitmap; 897 898 /* Retry iget every tenth of a second for up to 30 seconds. */ 899 xchk_iscan_start(sc, 30000, 100, &rr->iscan); 900 901 /* 902 * Hook into live rtrmap operations so that we can update our in-memory 903 * btree to reflect live changes on the filesystem. Since we drop the 904 * rtrmap ILOCK to scan all the inodes, we need this piece to avoid 905 * installing a stale btree. 906 */ 907 ASSERT(sc->flags & XCHK_FSGATES_RMAP); 908 xfs_rmap_hook_setup(&rr->rhook, xrep_rtrmapbt_live_update); 909 error = xfs_rmap_hook_add(rtg_group(sc->sr.rtg), &rr->rhook); 910 if (error) 911 goto out_iscan; 912 return 0; 913 914 out_iscan: 915 xchk_iscan_teardown(&rr->iscan); 916 xfbtree_destroy(&rr->rtrmap_btree); 917 out_bitmap: 918 xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks); 919 mutex_destroy(&rr->lock); 920 return error; 921 } 922 923 /* Tear down scan components. */ 924 STATIC void 925 xrep_rtrmap_teardown( 926 struct xrep_rtrmap *rr) 927 { 928 struct xfs_scrub *sc = rr->sc; 929 930 xchk_iscan_abort(&rr->iscan); 931 xfs_rmap_hook_del(rtg_group(sc->sr.rtg), &rr->rhook); 932 xchk_iscan_teardown(&rr->iscan); 933 xfbtree_destroy(&rr->rtrmap_btree); 934 xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks); 935 mutex_destroy(&rr->lock); 936 } 937 938 /* Repair the realtime rmap btree. */ 939 int 940 xrep_rtrmapbt( 941 struct xfs_scrub *sc) 942 { 943 struct xrep_rtrmap *rr = sc->buf; 944 int error; 945 946 /* Make sure any problems with the fork are fixed. */ 947 error = xrep_metadata_inode_forks(sc); 948 if (error) 949 return error; 950 951 error = xrep_rtrmap_setup_scan(rr); 952 if (error) 953 return error; 954 955 /* Collect rmaps for realtime files. */ 956 error = xrep_rtrmap_find_rmaps(rr); 957 if (error) 958 goto out_records; 959 960 xfs_trans_ijoin(sc->tp, sc->ip, 0); 961 962 /* Rebuild the rtrmap information. */ 963 error = xrep_rtrmap_build_new_tree(rr); 964 if (error) 965 goto out_records; 966 967 /* 968 * Free all the extents that were allocated to the former rtrmapbt and 969 * aren't cross-linked with something else. 970 */ 971 error = xrep_reap_metadir_fsblocks(rr->sc, &rr->old_rtrmapbt_blocks); 972 if (error) 973 goto out_records; 974 975 out_records: 976 xrep_rtrmap_teardown(rr); 977 return error; 978 } 979