| repair.c (86a464179cef7185ad9e540d51063e7f196e55ba) | repair.c (e06ef14b9f8eb5edab8c466680818d436eefdff0) |
|---|---|
| 1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6#include "xfs.h" 7#include "xfs_fs.h" 8#include "xfs_shared.h" --- 307 unchanged lines hidden (view full) --- 316 * 317 * For rmapbt reconstructions we must use different tactics for extent 318 * collection. First we iterate all primary metadata (this excludes the old 319 * rmapbt, obviously) to generate new rmap records. The gaps in the rmap 320 * records are collected as bitmap. The bnobt records are collected as 321 * sublist. As with the other btrees we subtract sublist from bitmap, and the 322 * result (since the rmapbt lives in the free space) are the blocks from the 323 * old rmapbt. | 1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6#include "xfs.h" 7#include "xfs_fs.h" 8#include "xfs_shared.h" --- 307 unchanged lines hidden (view full) --- 316 * 317 * For rmapbt reconstructions we must use different tactics for extent 318 * collection. First we iterate all primary metadata (this excludes the old 319 * rmapbt, obviously) to generate new rmap records. The gaps in the rmap 320 * records are collected as bitmap. The bnobt records are collected as 321 * sublist. As with the other btrees we subtract sublist from bitmap, and the 322 * result (since the rmapbt lives in the free space) are the blocks from the 323 * old rmapbt. |
| 324 * 325 * Disposal of Blocks from Old per-AG Btrees 326 * 327 * Now that we've constructed a new btree to replace the damaged one, we want 328 * to dispose of the blocks that (we think) the old btree was using. 329 * Previously, we used the rmapbt to collect the extents (bitmap) with the 330 * rmap owner corresponding to the tree we rebuilt, collected extents for any 331 * blocks with the same rmap owner that are owned by another data structure 332 * (sublist), and subtracted sublist from bitmap. In theory the extents 333 * remaining in bitmap are the old btree's blocks. 334 * 335 * Unfortunately, it's possible that the btree was crosslinked with other 336 * blocks on disk. The rmap data can tell us if there are multiple owners, so 337 * if the rmapbt says there is an owner of this block other than @oinfo, then 338 * the block is crosslinked. Remove the reverse mapping and continue. 339 * 340 * If there is one rmap record, we can free the block, which removes the 341 * reverse mapping but doesn't add the block to the free space. Our repair 342 * strategy is to hope the other metadata objects crosslinked on this block 343 * will be rebuilt (atop different blocks), thereby removing all the cross 344 * links. 345 * 346 * If there are no rmap records at all, we also free the block. If the btree 347 * being rebuilt lives in the free space (bnobt/cntbt/rmapbt) then there isn't 348 * supposed to be a rmap record and everything is ok. For other btrees there 349 * had to have been an rmap entry for the block to have ended up on @bitmap, 350 * so if it's gone now there's something wrong and the fs will shut down. 351 * 352 * Note: If there are multiple rmap records with only the same rmap owner as 353 * the btree we're trying to rebuild and the block is indeed owned by another 354 * data structure with the same rmap owner, then the block will be in sublist 355 * and therefore doesn't need disposal. If there are multiple rmap records 356 * with only the same rmap owner but the block is not owned by something with 357 * the same rmap owner, the block will be freed. 358 * 359 * The caller is responsible for locking the AG headers for the entire rebuild 360 * operation so that nothing else can sneak in and change the AG state while 361 * we're not looking. We also assume that the caller already invalidated any 362 * buffers associated with @bitmap. | |
| 363 */ 364 | 324 */ 325 |
| 365static int 366xrep_invalidate_block( 367 uint64_t fsbno, 368 void *priv) 369{ 370 struct xfs_scrub *sc = priv; 371 struct xfs_buf *bp; 372 int error; 373 374 /* Skip AG headers and post-EOFS blocks */ 375 if (!xfs_verify_fsbno(sc->mp, fsbno)) 376 return 0; 377 378 error = xfs_buf_incore(sc->mp->m_ddev_targp, 379 XFS_FSB_TO_DADDR(sc->mp, fsbno), 380 XFS_FSB_TO_BB(sc->mp, 1), XBF_TRYLOCK, &bp); 381 if (error) 382 return 0; 383 384 xfs_trans_bjoin(sc->tp, bp); 385 xfs_trans_binval(sc->tp, bp); 386 return 0; 387} 388 389/* 390 * Invalidate buffers for per-AG btree blocks we're dumping. This function 391 * is not intended for use with file data repairs; we have bunmapi for that. 392 */ 393int 394xrep_invalidate_blocks( 395 struct xfs_scrub *sc, 396 struct xbitmap *bitmap) 397{ 398 /* 399 * For each block in each extent, see if there's an incore buffer for 400 * exactly that block; if so, invalidate it. The buffer cache only 401 * lets us look for one buffer at a time, so we have to look one block 402 * at a time. Avoid invalidating AG headers and post-EOFS blocks 403 * because we never own those; and if we can't TRYLOCK the buffer we 404 * assume it's owned by someone else. 405 */ 406 return xbitmap_walk_bits(bitmap, xrep_invalidate_block, sc); 407} 408 | |
| 409/* Ensure the freelist is the correct size. */ 410int 411xrep_fix_freelist( 412 struct xfs_scrub *sc, 413 bool can_shrink) 414{ 415 struct xfs_alloc_arg args = {0}; 416 417 args.mp = sc->mp; 418 args.tp = sc->tp; 419 args.agno = sc->sa.pag->pag_agno; 420 args.alignment = 1; 421 args.pag = sc->sa.pag; 422 423 return xfs_alloc_fix_freelist(&args, 424 can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK); 425} 426 | 326/* Ensure the freelist is the correct size. */ 327int 328xrep_fix_freelist( 329 struct xfs_scrub *sc, 330 bool can_shrink) 331{ 332 struct xfs_alloc_arg args = {0}; 333 334 args.mp = sc->mp; 335 args.tp = sc->tp; 336 args.agno = sc->sa.pag->pag_agno; 337 args.alignment = 1; 338 args.pag = sc->sa.pag; 339 340 return xfs_alloc_fix_freelist(&args, 341 can_shrink ? 0 : XFS_ALLOC_FLAG_NOSHRINK); 342} 343 |
| 427/* Information about reaping extents after a repair. */ 428struct xrep_reap_state { 429 struct xfs_scrub *sc; 430 431 /* Reverse mapping owner and metadata reservation type. */ 432 const struct xfs_owner_info *oinfo; 433 enum xfs_ag_resv_type resv; 434}; 435 | |
| 436/* | 344/* |
| 437 * Put a block back on the AGFL. 438 */ 439STATIC int 440xrep_put_freelist( 441 struct xfs_scrub *sc, 442 xfs_agblock_t agbno) 443{ 444 struct xfs_buf *agfl_bp; 445 int error; 446 447 /* Make sure there's space on the freelist. */ 448 error = xrep_fix_freelist(sc, true); 449 if (error) 450 return error; 451 452 /* 453 * Since we're "freeing" a lost block onto the AGFL, we have to 454 * create an rmap for the block prior to merging it or else other 455 * parts will break. 456 */ 457 error = xfs_rmap_alloc(sc->tp, sc->sa.agf_bp, sc->sa.pag, agbno, 1, 458 &XFS_RMAP_OINFO_AG); 459 if (error) 460 return error; 461 462 /* Put the block on the AGFL. */ 463 error = xfs_alloc_read_agfl(sc->sa.pag, sc->tp, &agfl_bp); 464 if (error) 465 return error; 466 467 error = xfs_alloc_put_freelist(sc->sa.pag, sc->tp, sc->sa.agf_bp, 468 agfl_bp, agbno, 0); 469 if (error) 470 return error; 471 xfs_extent_busy_insert(sc->tp, sc->sa.pag, agbno, 1, 472 XFS_EXTENT_BUSY_SKIP_DISCARD); 473 474 return 0; 475} 476 477/* Dispose of a single block. */ 478STATIC int 479xrep_reap_block( 480 uint64_t fsbno, 481 void *priv) 482{ 483 struct xrep_reap_state *rs = priv; 484 struct xfs_scrub *sc = rs->sc; 485 struct xfs_btree_cur *cur; 486 struct xfs_buf *agf_bp = NULL; 487 xfs_agblock_t agbno; 488 bool has_other_rmap; 489 int error; 490 491 ASSERT(sc->ip != NULL || 492 XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno); 493 trace_xrep_dispose_btree_extent(sc->mp, 494 XFS_FSB_TO_AGNO(sc->mp, fsbno), 495 XFS_FSB_TO_AGBNO(sc->mp, fsbno), 1); 496 497 agbno = XFS_FSB_TO_AGBNO(sc->mp, fsbno); 498 ASSERT(XFS_FSB_TO_AGNO(sc->mp, fsbno) == sc->sa.pag->pag_agno); 499 500 /* 501 * If we are repairing per-inode metadata, we need to read in the AGF 502 * buffer. Otherwise, we're repairing a per-AG structure, so reuse 503 * the AGF buffer that the setup functions already grabbed. 504 */ 505 if (sc->ip) { 506 error = xfs_alloc_read_agf(sc->sa.pag, sc->tp, 0, &agf_bp); 507 if (error) 508 return error; 509 } else { 510 agf_bp = sc->sa.agf_bp; 511 } 512 cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf_bp, sc->sa.pag); 513 514 /* Can we find any other rmappings? */ 515 error = xfs_rmap_has_other_keys(cur, agbno, 1, rs->oinfo, 516 &has_other_rmap); 517 xfs_btree_del_cursor(cur, error); 518 if (error) 519 goto out_free; 520 521 /* 522 * If there are other rmappings, this block is cross linked and must 523 * not be freed. Remove the reverse mapping and move on. Otherwise, 524 * we were the only owner of the block, so free the extent, which will 525 * also remove the rmap. 526 * 527 * XXX: XFS doesn't support detecting the case where a single block 528 * metadata structure is crosslinked with a multi-block structure 529 * because the buffer cache doesn't detect aliasing problems, so we 530 * can't fix 100% of crosslinking problems (yet). The verifiers will 531 * blow on writeout, the filesystem will shut down, and the admin gets 532 * to run xfs_repair. 533 */ 534 if (has_other_rmap) 535 error = xfs_rmap_free(sc->tp, agf_bp, sc->sa.pag, agbno, 536 1, rs->oinfo); 537 else if (rs->resv == XFS_AG_RESV_AGFL) 538 error = xrep_put_freelist(sc, agbno); 539 else 540 error = xfs_free_extent(sc->tp, sc->sa.pag, agbno, 1, rs->oinfo, 541 rs->resv); 542 if (agf_bp != sc->sa.agf_bp) 543 xfs_trans_brelse(sc->tp, agf_bp); 544 if (error) 545 return error; 546 547 if (sc->ip) 548 return xfs_trans_roll_inode(&sc->tp, sc->ip); 549 return xrep_roll_ag_trans(sc); 550 551out_free: 552 if (agf_bp != sc->sa.agf_bp) 553 xfs_trans_brelse(sc->tp, agf_bp); 554 return error; 555} 556 557/* Dispose of every block of every extent in the bitmap. */ 558int 559xrep_reap_extents( 560 struct xfs_scrub *sc, 561 struct xbitmap *bitmap, 562 const struct xfs_owner_info *oinfo, 563 enum xfs_ag_resv_type type) 564{ 565 struct xrep_reap_state rs = { 566 .sc = sc, 567 .oinfo = oinfo, 568 .resv = type, 569 }; 570 571 ASSERT(xfs_has_rmapbt(sc->mp)); 572 573 return xbitmap_walk_bits(bitmap, xrep_reap_block, &rs); 574} 575 576/* | |
| 577 * Finding per-AG Btree Roots for AGF/AGI Reconstruction 578 * 579 * If the AGF or AGI become slightly corrupted, it may be necessary to rebuild 580 * the AG headers by using the rmap data to rummage through the AG looking for 581 * btree roots. This is not guaranteed to work if the AG is heavily damaged 582 * or the rmap data are corrupt. 583 * 584 * Callers of xrep_find_ag_btree_roots must lock the AGF and AGFL --- 324 unchanged lines hidden --- | 345 * Finding per-AG Btree Roots for AGF/AGI Reconstruction 346 * 347 * If the AGF or AGI become slightly corrupted, it may be necessary to rebuild 348 * the AG headers by using the rmap data to rummage through the AG looking for 349 * btree roots. This is not guaranteed to work if the AG is heavily damaged 350 * or the rmap data are corrupt. 351 * 352 * Callers of xrep_find_ag_btree_roots must lock the AGF and AGFL --- 324 unchanged lines hidden --- |