1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2020-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_btree_staging.h" 15 #include "xfs_buf_mem.h" 16 #include "xfs_btree_mem.h" 17 #include "xfs_bit.h" 18 #include "xfs_log_format.h" 19 #include "xfs_trans.h" 20 #include "xfs_sb.h" 21 #include "xfs_alloc.h" 22 #include "xfs_rmap.h" 23 #include "xfs_rmap_btree.h" 24 #include "xfs_rtrmap_btree.h" 25 #include "xfs_inode.h" 26 #include "xfs_icache.h" 27 #include "xfs_bmap.h" 28 #include "xfs_bmap_btree.h" 29 #include "xfs_quota.h" 30 #include "xfs_rtalloc.h" 31 #include "xfs_ag.h" 32 #include "xfs_rtgroup.h" 33 #include "xfs_refcount.h" 34 #include "scrub/xfs_scrub.h" 35 #include "scrub/scrub.h" 36 #include "scrub/common.h" 37 #include "scrub/btree.h" 38 #include "scrub/trace.h" 39 #include "scrub/repair.h" 40 #include "scrub/bitmap.h" 41 #include "scrub/fsb_bitmap.h" 42 #include "scrub/rgb_bitmap.h" 43 #include "scrub/xfile.h" 44 #include "scrub/xfarray.h" 45 #include "scrub/iscan.h" 46 #include "scrub/newbt.h" 47 #include "scrub/reap.h" 48 49 /* 50 * Realtime Reverse Mapping Btree Repair 51 * ===================================== 52 * 53 * This isn't quite as difficult as repairing the rmap btree on the data 54 * device, since we only store the data fork extents of realtime files on the 55 * realtime device. We still have to freeze the filesystem and stop the 56 * background threads like we do for the rmap repair, but we only have to scan 57 * realtime inodes. 58 * 59 * Collecting entries for the new realtime rmap btree is easy -- all we have 60 * to do is generate rtrmap entries from the data fork mappings of all realtime 61 * files in the filesystem. We then scan the rmap btrees of the data device 62 * looking for extents belonging to the old btree and note them in a bitmap. 63 * 64 * To rebuild the realtime rmap btree, we bulk-load the collected mappings into 65 * a new btree cursor and atomically swap that into the realtime inode. Then 66 * we can free the blocks from the old btree. 67 * 68 * We use the 'xrep_rtrmap' prefix for all the rmap functions. 69 */ 70 71 /* Context for collecting rmaps */ 72 struct xrep_rtrmap { 73 /* new rtrmapbt information */ 74 struct xrep_newbt new_btree; 75 76 /* lock for the xfbtree and xfile */ 77 struct mutex lock; 78 79 /* rmap records generated from primary metadata */ 80 struct xfbtree rtrmap_btree; 81 82 struct xfs_scrub *sc; 83 84 /* bitmap of old rtrmapbt blocks */ 85 struct xfsb_bitmap old_rtrmapbt_blocks; 86 87 /* Hooks into rtrmap update code. */ 88 struct xfs_rmap_hook rhook; 89 90 /* inode scan cursor */ 91 struct xchk_iscan iscan; 92 93 /* in-memory btree cursor for the ->get_blocks walk */ 94 struct xfs_btree_cur *mcur; 95 96 /* Number of records we're staging in the new btree. */ 97 uint64_t nr_records; 98 }; 99 100 /* Set us up to repair rt reverse mapping btrees. */ 101 int 102 xrep_setup_rtrmapbt( 103 struct xfs_scrub *sc) 104 { 105 struct xrep_rtrmap *rr; 106 char *descr; 107 int error; 108 109 xchk_fsgates_enable(sc, XCHK_FSGATES_RMAP); 110 111 descr = xchk_xfile_rtgroup_descr(sc, "reverse mapping records"); 112 error = xrep_setup_xfbtree(sc, descr); 113 kfree(descr); 114 if (error) 115 return error; 116 117 rr = kzalloc(sizeof(struct xrep_rtrmap), XCHK_GFP_FLAGS); 118 if (!rr) 119 return -ENOMEM; 120 121 rr->sc = sc; 122 sc->buf = rr; 123 return 0; 124 } 125 126 /* Make sure there's nothing funny about this mapping. */ 127 STATIC int 128 xrep_rtrmap_check_mapping( 129 struct xfs_scrub *sc, 130 const struct xfs_rmap_irec *rec) 131 { 132 if (xfs_rtrmap_check_irec(sc->sr.rtg, rec) != NULL) 133 return -EFSCORRUPTED; 134 135 /* Make sure this isn't free space. */ 136 return xrep_require_rtext_inuse(sc, rec->rm_startblock, 137 rec->rm_blockcount); 138 } 139 140 /* Store a reverse-mapping record. */ 141 static inline int 142 xrep_rtrmap_stash( 143 struct xrep_rtrmap *rr, 144 xfs_rgblock_t startblock, 145 xfs_extlen_t blockcount, 146 uint64_t owner, 147 uint64_t offset, 148 unsigned int flags) 149 { 150 struct xfs_rmap_irec rmap = { 151 .rm_startblock = startblock, 152 .rm_blockcount = blockcount, 153 .rm_owner = owner, 154 .rm_offset = offset, 155 .rm_flags = flags, 156 }; 157 struct xfs_scrub *sc = rr->sc; 158 struct xfs_btree_cur *mcur; 159 int error = 0; 160 161 if (xchk_should_terminate(sc, &error)) 162 return error; 163 164 if (xchk_iscan_aborted(&rr->iscan)) 165 return -EFSCORRUPTED; 166 167 trace_xrep_rtrmap_found(sc->mp, &rmap); 168 169 /* Add entry to in-memory btree. */ 170 mutex_lock(&rr->lock); 171 mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, sc->tp, &rr->rtrmap_btree); 172 error = xfs_rmap_map_raw(mcur, &rmap); 173 xfs_btree_del_cursor(mcur, error); 174 if (error) 175 goto out_cancel; 176 177 error = xfbtree_trans_commit(&rr->rtrmap_btree, sc->tp); 178 if (error) 179 goto out_abort; 180 181 mutex_unlock(&rr->lock); 182 return 0; 183 184 out_cancel: 185 xfbtree_trans_cancel(&rr->rtrmap_btree, sc->tp); 186 out_abort: 187 xchk_iscan_abort(&rr->iscan); 188 mutex_unlock(&rr->lock); 189 return error; 190 } 191 192 /* Finding all file and bmbt extents. */ 193 194 /* Context for accumulating rmaps for an inode fork. */ 195 struct xrep_rtrmap_ifork { 196 /* 197 * Accumulate rmap data here to turn multiple adjacent bmaps into a 198 * single rmap. 199 */ 200 struct xfs_rmap_irec accum; 201 202 struct xrep_rtrmap *rr; 203 }; 204 205 /* Stash an rmap that we accumulated while walking an inode fork. */ 206 STATIC int 207 xrep_rtrmap_stash_accumulated( 208 struct xrep_rtrmap_ifork *rf) 209 { 210 if (rf->accum.rm_blockcount == 0) 211 return 0; 212 213 return xrep_rtrmap_stash(rf->rr, rf->accum.rm_startblock, 214 rf->accum.rm_blockcount, rf->accum.rm_owner, 215 rf->accum.rm_offset, rf->accum.rm_flags); 216 } 217 218 /* Accumulate a bmbt record. */ 219 STATIC int 220 xrep_rtrmap_visit_bmbt( 221 struct xfs_btree_cur *cur, 222 struct xfs_bmbt_irec *rec, 223 void *priv) 224 { 225 struct xrep_rtrmap_ifork *rf = priv; 226 struct xfs_rmap_irec *accum = &rf->accum; 227 struct xfs_mount *mp = rf->rr->sc->mp; 228 xfs_rgblock_t rgbno; 229 unsigned int rmap_flags = 0; 230 int error; 231 232 if (xfs_rtb_to_rgno(mp, rec->br_startblock) != 233 rtg_rgno(rf->rr->sc->sr.rtg)) 234 return 0; 235 236 if (rec->br_state == XFS_EXT_UNWRITTEN) 237 rmap_flags |= XFS_RMAP_UNWRITTEN; 238 239 /* If this bmap is adjacent to the previous one, just add it. */ 240 rgbno = xfs_rtb_to_rgbno(mp, rec->br_startblock); 241 if (accum->rm_blockcount > 0 && 242 rec->br_startoff == accum->rm_offset + accum->rm_blockcount && 243 rgbno == accum->rm_startblock + accum->rm_blockcount && 244 rmap_flags == accum->rm_flags) { 245 accum->rm_blockcount += rec->br_blockcount; 246 return 0; 247 } 248 249 /* Otherwise stash the old rmap and start accumulating a new one. */ 250 error = xrep_rtrmap_stash_accumulated(rf); 251 if (error) 252 return error; 253 254 accum->rm_startblock = rgbno; 255 accum->rm_blockcount = rec->br_blockcount; 256 accum->rm_offset = rec->br_startoff; 257 accum->rm_flags = rmap_flags; 258 return 0; 259 } 260 261 /* 262 * Iterate the block mapping btree to collect rmap records for anything in this 263 * fork that maps to the rt volume. Sets @mappings_done to true if we've 264 * scanned the block mappings in this fork. 265 */ 266 STATIC int 267 xrep_rtrmap_scan_bmbt( 268 struct xrep_rtrmap_ifork *rf, 269 struct xfs_inode *ip, 270 bool *mappings_done) 271 { 272 struct xrep_rtrmap *rr = rf->rr; 273 struct xfs_btree_cur *cur; 274 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); 275 int error = 0; 276 277 *mappings_done = false; 278 279 /* 280 * If the incore extent cache is already loaded, we'll just use the 281 * incore extent scanner to record mappings. Don't bother walking the 282 * ondisk extent tree. 283 */ 284 if (!xfs_need_iread_extents(ifp)) 285 return 0; 286 287 /* Accumulate all the mappings in the bmap btree. */ 288 cur = xfs_bmbt_init_cursor(rr->sc->mp, rr->sc->tp, ip, XFS_DATA_FORK); 289 error = xfs_bmap_query_all(cur, xrep_rtrmap_visit_bmbt, rf); 290 xfs_btree_del_cursor(cur, error); 291 if (error) 292 return error; 293 294 /* Stash any remaining accumulated rmaps and exit. */ 295 *mappings_done = true; 296 return xrep_rtrmap_stash_accumulated(rf); 297 } 298 299 /* 300 * Iterate the in-core extent cache to collect rmap records for anything in 301 * this fork that matches the AG. 302 */ 303 STATIC int 304 xrep_rtrmap_scan_iext( 305 struct xrep_rtrmap_ifork *rf, 306 struct xfs_ifork *ifp) 307 { 308 struct xfs_bmbt_irec rec; 309 struct xfs_iext_cursor icur; 310 int error; 311 312 for_each_xfs_iext(ifp, &icur, &rec) { 313 if (isnullstartblock(rec.br_startblock)) 314 continue; 315 error = xrep_rtrmap_visit_bmbt(NULL, &rec, rf); 316 if (error) 317 return error; 318 } 319 320 return xrep_rtrmap_stash_accumulated(rf); 321 } 322 323 /* Find all the extents on the realtime device mapped by an inode fork. */ 324 STATIC int 325 xrep_rtrmap_scan_dfork( 326 struct xrep_rtrmap *rr, 327 struct xfs_inode *ip) 328 { 329 struct xrep_rtrmap_ifork rf = { 330 .accum = { .rm_owner = ip->i_ino, }, 331 .rr = rr, 332 }; 333 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_DATA_FORK); 334 int error = 0; 335 336 if (ifp->if_format == XFS_DINODE_FMT_BTREE) { 337 bool mappings_done; 338 339 /* 340 * Scan the bmbt for mappings. If the incore extent tree is 341 * loaded, we want to scan the cached mappings since that's 342 * faster when the extent counts are very high. 343 */ 344 error = xrep_rtrmap_scan_bmbt(&rf, ip, &mappings_done); 345 if (error || mappings_done) 346 return error; 347 } else if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) { 348 /* realtime data forks should only be extents or btree */ 349 return -EFSCORRUPTED; 350 } 351 352 /* Scan incore extent cache. */ 353 return xrep_rtrmap_scan_iext(&rf, ifp); 354 } 355 356 /* Record reverse mappings for a file. */ 357 STATIC int 358 xrep_rtrmap_scan_inode( 359 struct xrep_rtrmap *rr, 360 struct xfs_inode *ip) 361 { 362 unsigned int lock_mode; 363 int error = 0; 364 365 /* Skip the rt rmap btree inode. */ 366 if (rr->sc->ip == ip) 367 return 0; 368 369 lock_mode = xfs_ilock_data_map_shared(ip); 370 371 /* Check the data fork if it's on the realtime device. */ 372 if (XFS_IS_REALTIME_INODE(ip)) { 373 error = xrep_rtrmap_scan_dfork(rr, ip); 374 if (error) 375 goto out_unlock; 376 } 377 378 xchk_iscan_mark_visited(&rr->iscan, ip); 379 out_unlock: 380 xfs_iunlock(ip, lock_mode); 381 return error; 382 } 383 384 /* Record extents that belong to the realtime rmap inode. */ 385 STATIC int 386 xrep_rtrmap_walk_rmap( 387 struct xfs_btree_cur *cur, 388 const struct xfs_rmap_irec *rec, 389 void *priv) 390 { 391 struct xrep_rtrmap *rr = priv; 392 int error = 0; 393 394 if (xchk_should_terminate(rr->sc, &error)) 395 return error; 396 397 /* Skip extents which are not owned by this inode and fork. */ 398 if (rec->rm_owner != rr->sc->ip->i_ino) 399 return 0; 400 401 error = xrep_check_ino_btree_mapping(rr->sc, rec); 402 if (error) 403 return error; 404 405 return xfsb_bitmap_set(&rr->old_rtrmapbt_blocks, 406 xfs_gbno_to_fsb(cur->bc_group, rec->rm_startblock), 407 rec->rm_blockcount); 408 } 409 410 /* Scan one AG for reverse mappings for the realtime rmap btree. */ 411 STATIC int 412 xrep_rtrmap_scan_ag( 413 struct xrep_rtrmap *rr, 414 struct xfs_perag *pag) 415 { 416 struct xfs_scrub *sc = rr->sc; 417 int error; 418 419 error = xrep_ag_init(sc, pag, &sc->sa); 420 if (error) 421 return error; 422 423 error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_rtrmap_walk_rmap, rr); 424 xchk_ag_free(sc, &sc->sa); 425 return error; 426 } 427 428 struct xrep_rtrmap_stash_run { 429 struct xrep_rtrmap *rr; 430 uint64_t owner; 431 }; 432 433 static int 434 xrep_rtrmap_stash_run( 435 uint32_t start, 436 uint32_t len, 437 void *priv) 438 { 439 struct xrep_rtrmap_stash_run *rsr = priv; 440 struct xrep_rtrmap *rr = rsr->rr; 441 xfs_rgblock_t rgbno = start; 442 443 return xrep_rtrmap_stash(rr, rgbno, len, rsr->owner, 0, 0); 444 } 445 446 /* 447 * Emit rmaps for every extent of bits set in the bitmap. Caller must ensure 448 * that the ranges are in units of FS blocks. 449 */ 450 STATIC int 451 xrep_rtrmap_stash_bitmap( 452 struct xrep_rtrmap *rr, 453 struct xrgb_bitmap *bitmap, 454 const struct xfs_owner_info *oinfo) 455 { 456 struct xrep_rtrmap_stash_run rsr = { 457 .rr = rr, 458 .owner = oinfo->oi_owner, 459 }; 460 461 return xrgb_bitmap_walk(bitmap, xrep_rtrmap_stash_run, &rsr); 462 } 463 464 /* Record a CoW staging extent. */ 465 STATIC int 466 xrep_rtrmap_walk_cowblocks( 467 struct xfs_btree_cur *cur, 468 const struct xfs_refcount_irec *irec, 469 void *priv) 470 { 471 struct xrgb_bitmap *bitmap = priv; 472 473 if (!xfs_refcount_check_domain(irec) || 474 irec->rc_domain != XFS_REFC_DOMAIN_COW) 475 return -EFSCORRUPTED; 476 477 return xrgb_bitmap_set(bitmap, irec->rc_startblock, 478 irec->rc_blockcount); 479 } 480 481 /* 482 * Collect rmaps for the blocks containing the refcount btree, and all CoW 483 * staging extents. 484 */ 485 STATIC int 486 xrep_rtrmap_find_refcount_rmaps( 487 struct xrep_rtrmap *rr) 488 { 489 struct xrgb_bitmap cow_blocks; /* COWBIT */ 490 struct xfs_refcount_irec low = { 491 .rc_startblock = 0, 492 .rc_domain = XFS_REFC_DOMAIN_COW, 493 }; 494 struct xfs_refcount_irec high = { 495 .rc_startblock = -1U, 496 .rc_domain = XFS_REFC_DOMAIN_COW, 497 }; 498 struct xfs_scrub *sc = rr->sc; 499 int error; 500 501 if (!xfs_has_rtreflink(sc->mp)) 502 return 0; 503 504 xrgb_bitmap_init(&cow_blocks); 505 506 /* Collect rmaps for CoW staging extents. */ 507 error = xfs_refcount_query_range(sc->sr.refc_cur, &low, &high, 508 xrep_rtrmap_walk_cowblocks, &cow_blocks); 509 if (error) 510 goto out_bitmap; 511 512 /* Generate rmaps for everything. */ 513 error = xrep_rtrmap_stash_bitmap(rr, &cow_blocks, &XFS_RMAP_OINFO_COW); 514 if (error) 515 goto out_bitmap; 516 517 out_bitmap: 518 xrgb_bitmap_destroy(&cow_blocks); 519 return error; 520 } 521 522 /* Count and check all collected records. */ 523 STATIC int 524 xrep_rtrmap_check_record( 525 struct xfs_btree_cur *cur, 526 const struct xfs_rmap_irec *rec, 527 void *priv) 528 { 529 struct xrep_rtrmap *rr = priv; 530 int error; 531 532 error = xrep_rtrmap_check_mapping(rr->sc, rec); 533 if (error) 534 return error; 535 536 rr->nr_records++; 537 return 0; 538 } 539 540 /* Generate all the reverse-mappings for the realtime device. */ 541 STATIC int 542 xrep_rtrmap_find_rmaps( 543 struct xrep_rtrmap *rr) 544 { 545 struct xfs_scrub *sc = rr->sc; 546 struct xfs_perag *pag = NULL; 547 struct xfs_inode *ip; 548 struct xfs_btree_cur *mcur; 549 int error; 550 551 /* Generate rmaps for the realtime superblock */ 552 if (xfs_has_rtsb(sc->mp) && rtg_rgno(rr->sc->sr.rtg) == 0) { 553 error = xrep_rtrmap_stash(rr, 0, sc->mp->m_sb.sb_rextsize, 554 XFS_RMAP_OWN_FS, 0, 0); 555 if (error) 556 return error; 557 } 558 559 /* Find CoW staging extents. */ 560 xrep_rtgroup_btcur_init(sc, &sc->sr); 561 error = xrep_rtrmap_find_refcount_rmaps(rr); 562 xchk_rtgroup_btcur_free(&sc->sr); 563 if (error) 564 return error; 565 566 /* 567 * Set up for a potentially lengthy filesystem scan by reducing our 568 * transaction resource usage for the duration. Specifically: 569 * 570 * Unlock the realtime metadata inodes and cancel the transaction to 571 * release the log grant space while we scan the filesystem. 572 * 573 * Create a new empty transaction to eliminate the possibility of the 574 * inode scan deadlocking on cyclical metadata. 575 * 576 * We pass the empty transaction to the file scanning function to avoid 577 * repeatedly cycling empty transactions. This can be done even though 578 * we take the IOLOCK to quiesce the file because empty transactions 579 * do not take sb_internal. 580 */ 581 xchk_trans_cancel(sc); 582 xchk_rtgroup_unlock(&sc->sr); 583 error = xchk_trans_alloc_empty(sc); 584 if (error) 585 return error; 586 587 while ((error = xchk_iscan_iter(&rr->iscan, &ip)) == 1) { 588 error = xrep_rtrmap_scan_inode(rr, ip); 589 xchk_irele(sc, ip); 590 if (error) 591 break; 592 593 if (xchk_should_terminate(sc, &error)) 594 break; 595 } 596 xchk_iscan_iter_finish(&rr->iscan); 597 if (error) 598 return error; 599 600 /* 601 * Switch out for a real transaction and lock the RT metadata in 602 * preparation for building a new tree. 603 */ 604 xchk_trans_cancel(sc); 605 error = xchk_setup_rt(sc); 606 if (error) 607 return error; 608 error = xchk_rtgroup_lock(sc, &sc->sr, XCHK_RTGLOCK_ALL); 609 if (error) 610 return error; 611 612 /* 613 * If a hook failed to update the in-memory btree, we lack the data to 614 * continue the repair. 615 */ 616 if (xchk_iscan_aborted(&rr->iscan)) 617 return -EFSCORRUPTED; 618 619 /* Scan for old rtrmap blocks. */ 620 while ((pag = xfs_perag_next(sc->mp, pag))) { 621 error = xrep_rtrmap_scan_ag(rr, pag); 622 if (error) { 623 xfs_perag_rele(pag); 624 return error; 625 } 626 } 627 628 /* 629 * Now that we have everything locked again, we need to count the 630 * number of rmap records stashed in the btree. This should reflect 631 * all actively-owned rt files in the filesystem. At the same time, 632 * check all our records before we start building a new btree, which 633 * requires the rtbitmap lock. 634 */ 635 mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, NULL, &rr->rtrmap_btree); 636 rr->nr_records = 0; 637 error = xfs_rmap_query_all(mcur, xrep_rtrmap_check_record, rr); 638 xfs_btree_del_cursor(mcur, error); 639 640 return error; 641 } 642 643 /* Building the new rtrmap btree. */ 644 645 /* Retrieve rtrmapbt data for bulk load. */ 646 STATIC int 647 xrep_rtrmap_get_records( 648 struct xfs_btree_cur *cur, 649 unsigned int idx, 650 struct xfs_btree_block *block, 651 unsigned int nr_wanted, 652 void *priv) 653 { 654 struct xrep_rtrmap *rr = priv; 655 union xfs_btree_rec *block_rec; 656 unsigned int loaded; 657 int error; 658 659 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { 660 int stat = 0; 661 662 error = xfs_btree_increment(rr->mcur, 0, &stat); 663 if (error) 664 return error; 665 if (!stat) 666 return -EFSCORRUPTED; 667 668 error = xfs_rmap_get_rec(rr->mcur, &cur->bc_rec.r, &stat); 669 if (error) 670 return error; 671 if (!stat) 672 return -EFSCORRUPTED; 673 674 block_rec = xfs_btree_rec_addr(cur, idx, block); 675 cur->bc_ops->init_rec_from_cur(cur, block_rec); 676 } 677 678 return loaded; 679 } 680 681 /* Feed one of the new btree blocks to the bulk loader. */ 682 STATIC int 683 xrep_rtrmap_claim_block( 684 struct xfs_btree_cur *cur, 685 union xfs_btree_ptr *ptr, 686 void *priv) 687 { 688 struct xrep_rtrmap *rr = priv; 689 690 return xrep_newbt_claim_block(cur, &rr->new_btree, ptr); 691 } 692 693 /* Figure out how much space we need to create the incore btree root block. */ 694 STATIC size_t 695 xrep_rtrmap_iroot_size( 696 struct xfs_btree_cur *cur, 697 unsigned int level, 698 unsigned int nr_this_level, 699 void *priv) 700 { 701 return xfs_rtrmap_broot_space_calc(cur->bc_mp, level, nr_this_level); 702 } 703 704 /* 705 * Use the collected rmap information to stage a new rmap btree. If this is 706 * successful we'll return with the new btree root information logged to the 707 * repair transaction but not yet committed. This implements section (III) 708 * above. 709 */ 710 STATIC int 711 xrep_rtrmap_build_new_tree( 712 struct xrep_rtrmap *rr) 713 { 714 struct xfs_scrub *sc = rr->sc; 715 struct xfs_rtgroup *rtg = sc->sr.rtg; 716 struct xfs_btree_cur *rmap_cur; 717 int error; 718 719 /* 720 * Prepare to construct the new btree by reserving disk space for the 721 * new btree and setting up all the accounting information we'll need 722 * to root the new btree while it's under construction and before we 723 * attach it to the realtime rmapbt inode. 724 */ 725 error = xrep_newbt_init_metadir_inode(&rr->new_btree, sc); 726 if (error) 727 return error; 728 729 rr->new_btree.bload.get_records = xrep_rtrmap_get_records; 730 rr->new_btree.bload.claim_block = xrep_rtrmap_claim_block; 731 rr->new_btree.bload.iroot_size = xrep_rtrmap_iroot_size; 732 733 rmap_cur = xfs_rtrmapbt_init_cursor(NULL, rtg); 734 xfs_btree_stage_ifakeroot(rmap_cur, &rr->new_btree.ifake); 735 736 /* Compute how many blocks we'll need for the rmaps collected. */ 737 error = xfs_btree_bload_compute_geometry(rmap_cur, 738 &rr->new_btree.bload, rr->nr_records); 739 if (error) 740 goto err_cur; 741 742 /* Last chance to abort before we start committing fixes. */ 743 if (xchk_should_terminate(sc, &error)) 744 goto err_cur; 745 746 /* 747 * Guess how many blocks we're going to need to rebuild an entire 748 * rtrmapbt from the number of extents we found, and pump up our 749 * transaction to have sufficient block reservation. We're allowed 750 * to exceed quota to repair inconsistent metadata, though this is 751 * unlikely. 752 */ 753 error = xfs_trans_reserve_more_inode(sc->tp, rtg_rmap(rtg), 754 rr->new_btree.bload.nr_blocks, 0, true); 755 if (error) 756 goto err_cur; 757 758 /* Reserve the space we'll need for the new btree. */ 759 error = xrep_newbt_alloc_blocks(&rr->new_btree, 760 rr->new_btree.bload.nr_blocks); 761 if (error) 762 goto err_cur; 763 764 /* 765 * Create a cursor to the in-memory btree so that we can bulk load the 766 * new btree. 767 */ 768 rr->mcur = xfs_rtrmapbt_mem_cursor(sc->sr.rtg, NULL, &rr->rtrmap_btree); 769 error = xfs_btree_goto_left_edge(rr->mcur); 770 if (error) 771 goto err_mcur; 772 773 /* Add all observed rmap records. */ 774 rr->new_btree.ifake.if_fork->if_format = XFS_DINODE_FMT_META_BTREE; 775 error = xfs_btree_bload(rmap_cur, &rr->new_btree.bload, rr); 776 if (error) 777 goto err_mcur; 778 779 /* 780 * Install the new rtrmap btree in the inode. After this point the old 781 * btree is no longer accessible, the new tree is live, and we can 782 * delete the cursor. 783 */ 784 xfs_rtrmapbt_commit_staged_btree(rmap_cur, sc->tp); 785 xrep_inode_set_nblocks(rr->sc, rr->new_btree.ifake.if_blocks); 786 xfs_btree_del_cursor(rmap_cur, 0); 787 xfs_btree_del_cursor(rr->mcur, 0); 788 rr->mcur = NULL; 789 790 /* 791 * Now that we've written the new btree to disk, we don't need to keep 792 * updating the in-memory btree. Abort the scan to stop live updates. 793 */ 794 xchk_iscan_abort(&rr->iscan); 795 796 /* Dispose of any unused blocks and the accounting information. */ 797 error = xrep_newbt_commit(&rr->new_btree); 798 if (error) 799 return error; 800 801 return xrep_roll_trans(sc); 802 803 err_mcur: 804 xfs_btree_del_cursor(rr->mcur, error); 805 err_cur: 806 xfs_btree_del_cursor(rmap_cur, error); 807 xrep_newbt_cancel(&rr->new_btree); 808 return error; 809 } 810 811 /* Reaping the old btree. */ 812 813 /* Reap the old rtrmapbt blocks. */ 814 STATIC int 815 xrep_rtrmap_remove_old_tree( 816 struct xrep_rtrmap *rr) 817 { 818 int error; 819 820 /* 821 * Free all the extents that were allocated to the former rtrmapbt and 822 * aren't cross-linked with something else. 823 */ 824 error = xrep_reap_metadir_fsblocks(rr->sc, &rr->old_rtrmapbt_blocks); 825 if (error) 826 return error; 827 828 /* 829 * Ensure the proper reservation for the rtrmap inode so that we don't 830 * fail to expand the new btree. 831 */ 832 return xrep_reset_metafile_resv(rr->sc); 833 } 834 835 static inline bool 836 xrep_rtrmapbt_want_live_update( 837 struct xchk_iscan *iscan, 838 const struct xfs_owner_info *oi) 839 { 840 if (xchk_iscan_aborted(iscan)) 841 return false; 842 843 /* 844 * We scanned the CoW staging extents before we started the iscan, so 845 * we need all the updates. 846 */ 847 if (XFS_RMAP_NON_INODE_OWNER(oi->oi_owner)) 848 return true; 849 850 /* Ignore updates to files that the scanner hasn't visited yet. */ 851 return xchk_iscan_want_live_update(iscan, oi->oi_owner); 852 } 853 854 /* 855 * Apply a rtrmapbt update from the regular filesystem into our shadow btree. 856 * We're running from the thread that owns the rtrmap ILOCK and is generating 857 * the update, so we must be careful about which parts of the struct 858 * xrep_rtrmap that we change. 859 */ 860 static int 861 xrep_rtrmapbt_live_update( 862 struct notifier_block *nb, 863 unsigned long action, 864 void *data) 865 { 866 struct xfs_rmap_update_params *p = data; 867 struct xrep_rtrmap *rr; 868 struct xfs_mount *mp; 869 struct xfs_btree_cur *mcur; 870 struct xfs_trans *tp; 871 void *txcookie; 872 int error; 873 874 rr = container_of(nb, struct xrep_rtrmap, rhook.rmap_hook.nb); 875 mp = rr->sc->mp; 876 877 if (!xrep_rtrmapbt_want_live_update(&rr->iscan, &p->oinfo)) 878 goto out_unlock; 879 880 trace_xrep_rmap_live_update(rtg_group(rr->sc->sr.rtg), action, p); 881 882 error = xrep_trans_alloc_hook_dummy(mp, &txcookie, &tp); 883 if (error) 884 goto out_abort; 885 886 mutex_lock(&rr->lock); 887 mcur = xfs_rtrmapbt_mem_cursor(rr->sc->sr.rtg, tp, &rr->rtrmap_btree); 888 error = __xfs_rmap_finish_intent(mcur, action, p->startblock, 889 p->blockcount, &p->oinfo, p->unwritten); 890 xfs_btree_del_cursor(mcur, error); 891 if (error) 892 goto out_cancel; 893 894 error = xfbtree_trans_commit(&rr->rtrmap_btree, tp); 895 if (error) 896 goto out_cancel; 897 898 xrep_trans_cancel_hook_dummy(&txcookie, tp); 899 mutex_unlock(&rr->lock); 900 return NOTIFY_DONE; 901 902 out_cancel: 903 xfbtree_trans_cancel(&rr->rtrmap_btree, tp); 904 xrep_trans_cancel_hook_dummy(&txcookie, tp); 905 out_abort: 906 xchk_iscan_abort(&rr->iscan); 907 mutex_unlock(&rr->lock); 908 out_unlock: 909 return NOTIFY_DONE; 910 } 911 912 /* Set up the filesystem scan components. */ 913 STATIC int 914 xrep_rtrmap_setup_scan( 915 struct xrep_rtrmap *rr) 916 { 917 struct xfs_scrub *sc = rr->sc; 918 int error; 919 920 mutex_init(&rr->lock); 921 xfsb_bitmap_init(&rr->old_rtrmapbt_blocks); 922 923 /* Set up some storage */ 924 error = xfs_rtrmapbt_mem_init(sc->mp, &rr->rtrmap_btree, sc->xmbtp, 925 rtg_rgno(sc->sr.rtg)); 926 if (error) 927 goto out_bitmap; 928 929 /* Retry iget every tenth of a second for up to 30 seconds. */ 930 xchk_iscan_start(sc, 30000, 100, &rr->iscan); 931 932 /* 933 * Hook into live rtrmap operations so that we can update our in-memory 934 * btree to reflect live changes on the filesystem. Since we drop the 935 * rtrmap ILOCK to scan all the inodes, we need this piece to avoid 936 * installing a stale btree. 937 */ 938 ASSERT(sc->flags & XCHK_FSGATES_RMAP); 939 xfs_rmap_hook_setup(&rr->rhook, xrep_rtrmapbt_live_update); 940 error = xfs_rmap_hook_add(rtg_group(sc->sr.rtg), &rr->rhook); 941 if (error) 942 goto out_iscan; 943 return 0; 944 945 out_iscan: 946 xchk_iscan_teardown(&rr->iscan); 947 xfbtree_destroy(&rr->rtrmap_btree); 948 out_bitmap: 949 xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks); 950 mutex_destroy(&rr->lock); 951 return error; 952 } 953 954 /* Tear down scan components. */ 955 STATIC void 956 xrep_rtrmap_teardown( 957 struct xrep_rtrmap *rr) 958 { 959 struct xfs_scrub *sc = rr->sc; 960 961 xchk_iscan_abort(&rr->iscan); 962 xfs_rmap_hook_del(rtg_group(sc->sr.rtg), &rr->rhook); 963 xchk_iscan_teardown(&rr->iscan); 964 xfbtree_destroy(&rr->rtrmap_btree); 965 xfsb_bitmap_destroy(&rr->old_rtrmapbt_blocks); 966 mutex_destroy(&rr->lock); 967 } 968 969 /* Repair the realtime rmap btree. */ 970 int 971 xrep_rtrmapbt( 972 struct xfs_scrub *sc) 973 { 974 struct xrep_rtrmap *rr = sc->buf; 975 int error; 976 977 /* Make sure any problems with the fork are fixed. */ 978 error = xrep_metadata_inode_forks(sc); 979 if (error) 980 return error; 981 982 error = xrep_rtrmap_setup_scan(rr); 983 if (error) 984 return error; 985 986 /* Collect rmaps for realtime files. */ 987 error = xrep_rtrmap_find_rmaps(rr); 988 if (error) 989 goto out_records; 990 991 xfs_trans_ijoin(sc->tp, sc->ip, 0); 992 993 /* Rebuild the rtrmap information. */ 994 error = xrep_rtrmap_build_new_tree(rr); 995 if (error) 996 goto out_records; 997 998 /* Kill the old tree. */ 999 error = xrep_rtrmap_remove_old_tree(rr); 1000 if (error) 1001 goto out_records; 1002 1003 out_records: 1004 xrep_rtrmap_teardown(rr); 1005 return error; 1006 } 1007