1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_btree_staging.h" 15 #include "xfs_bit.h" 16 #include "xfs_log_format.h" 17 #include "xfs_trans.h" 18 #include "xfs_sb.h" 19 #include "xfs_inode.h" 20 #include "xfs_alloc.h" 21 #include "xfs_ialloc.h" 22 #include "xfs_ialloc_btree.h" 23 #include "xfs_icache.h" 24 #include "xfs_rmap.h" 25 #include "xfs_rmap_btree.h" 26 #include "xfs_log.h" 27 #include "xfs_trans_priv.h" 28 #include "xfs_error.h" 29 #include "xfs_health.h" 30 #include "xfs_ag.h" 31 #include "scrub/xfs_scrub.h" 32 #include "scrub/scrub.h" 33 #include "scrub/common.h" 34 #include "scrub/btree.h" 35 #include "scrub/trace.h" 36 #include "scrub/repair.h" 37 #include "scrub/bitmap.h" 38 #include "scrub/agb_bitmap.h" 39 #include "scrub/xfile.h" 40 #include "scrub/xfarray.h" 41 #include "scrub/newbt.h" 42 #include "scrub/reap.h" 43 44 /* 45 * Inode Btree Repair 46 * ================== 47 * 48 * A quick refresher of inode btrees on a v5 filesystem: 49 * 50 * - Inode records are read into memory in units of 'inode clusters'. However 51 * many inodes fit in a cluster buffer is the smallest number of inodes that 52 * can be allocated or freed. Clusters are never smaller than one fs block 53 * though they can span multiple blocks. The size (in fs blocks) is 54 * computed with xfs_icluster_size_fsb(). The fs block alignment of a 55 * cluster is computed with xfs_ialloc_cluster_alignment(). 56 * 57 * - Each inode btree record can describe a single 'inode chunk'. The chunk 58 * size is defined to be 64 inodes. If sparse inodes are enabled, every 59 * inobt record must be aligned to the chunk size; if not, every record must 60 * be aligned to the start of a cluster. It is possible to construct an XFS 61 * geometry where one inobt record maps to multiple inode clusters; it is 62 * also possible to construct a geometry where multiple inobt records map to 63 * different parts of one inode cluster. 64 * 65 * - If sparse inodes are not enabled, the smallest unit of allocation for 66 * inode records is enough to contain one inode chunk's worth of inodes. 67 * 68 * - If sparse inodes are enabled, the holemask field will be active. Each 69 * bit of the holemask represents 4 potential inodes; if set, the 70 * corresponding space does *not* contain inodes and must be left alone. 71 * Clusters cannot be smaller than 4 inodes. The smallest unit of allocation 72 * of inode records is one inode cluster. 73 * 74 * So what's the rebuild algorithm? 75 * 76 * Iterate the reverse mapping records looking for OWN_INODES and OWN_INOBT 77 * records. The OWN_INOBT records are the old inode btree blocks and will be 78 * cleared out after we've rebuilt the tree. Each possible inode cluster 79 * within an OWN_INODES record will be read in; for each possible inobt record 80 * associated with that cluster, compute the freemask calculated from the 81 * i_mode data in the inode chunk. For sparse inodes the holemask will be 82 * calculated by creating the properly aligned inobt record and punching out 83 * any chunk that's missing. Inode allocations and frees grab the AGI first, 84 * so repair protects itself from concurrent access by locking the AGI. 85 * 86 * Once we've reconstructed all the inode records, we can create new inode 87 * btree roots and reload the btrees. We rebuild both inode trees at the same 88 * time because they have the same rmap owner and it would be more complex to 89 * figure out if the other tree isn't in need of a rebuild and which OWN_INOBT 90 * blocks it owns. We have all the data we need to build both, so dump 91 * everything and start over. 92 * 93 * We use the prefix 'xrep_ibt' because we rebuild both inode btrees at once. 94 */ 95 96 struct xrep_ibt { 97 /* Record under construction. */ 98 struct xfs_inobt_rec_incore rie; 99 100 /* new inobt information */ 101 struct xrep_newbt new_inobt; 102 103 /* new finobt information */ 104 struct xrep_newbt new_finobt; 105 106 /* Old inode btree blocks we found in the rmap. */ 107 struct xagb_bitmap old_iallocbt_blocks; 108 109 /* Reconstructed inode records. */ 110 struct xfarray *inode_records; 111 112 struct xfs_scrub *sc; 113 114 /* Number of inodes assigned disk space. */ 115 unsigned int icount; 116 117 /* Number of inodes in use. */ 118 unsigned int iused; 119 120 /* Number of finobt records needed. */ 121 unsigned int finobt_recs; 122 123 /* get_records()'s position in the inode record array. */ 124 xfarray_idx_t array_cur; 125 }; 126 127 /* 128 * Is this inode in use? If the inode is in memory we can tell from i_mode, 129 * otherwise we have to check di_mode in the on-disk buffer. We only care 130 * that the high (i.e. non-permission) bits of _mode are zero. This should be 131 * safe because repair keeps all AG headers locked until the end, and process 132 * trying to perform an inode allocation/free must lock the AGI. 133 * 134 * @cluster_ag_base is the inode offset of the cluster within the AG. 135 * @cluster_bp is the cluster buffer. 136 * @cluster_index is the inode offset within the inode cluster. 137 */ 138 STATIC int 139 xrep_ibt_check_ifree( 140 struct xrep_ibt *ri, 141 xfs_agino_t cluster_ag_base, 142 struct xfs_buf *cluster_bp, 143 unsigned int cluster_index, 144 bool *inuse) 145 { 146 struct xfs_scrub *sc = ri->sc; 147 struct xfs_mount *mp = sc->mp; 148 struct xfs_dinode *dip; 149 xfs_ino_t fsino; 150 xfs_agino_t agino; 151 xfs_agnumber_t agno = ri->sc->sa.pag->pag_agno; 152 unsigned int cluster_buf_base; 153 unsigned int offset; 154 int error; 155 156 agino = cluster_ag_base + cluster_index; 157 fsino = XFS_AGINO_TO_INO(mp, agno, agino); 158 159 /* Inode uncached or half assembled, read disk buffer */ 160 cluster_buf_base = XFS_INO_TO_OFFSET(mp, cluster_ag_base); 161 offset = (cluster_buf_base + cluster_index) * mp->m_sb.sb_inodesize; 162 if (offset >= BBTOB(cluster_bp->b_length)) 163 return -EFSCORRUPTED; 164 dip = xfs_buf_offset(cluster_bp, offset); 165 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) 166 return -EFSCORRUPTED; 167 168 if (dip->di_version >= 3 && be64_to_cpu(dip->di_ino) != fsino) 169 return -EFSCORRUPTED; 170 171 /* Will the in-core inode tell us if it's in use? */ 172 error = xchk_inode_is_allocated(sc, agino, inuse); 173 if (!error) 174 return 0; 175 176 *inuse = dip->di_mode != 0; 177 return 0; 178 } 179 180 /* Stash the accumulated inobt record for rebuilding. */ 181 STATIC int 182 xrep_ibt_stash( 183 struct xrep_ibt *ri) 184 { 185 int error = 0; 186 187 if (xchk_should_terminate(ri->sc, &error)) 188 return error; 189 190 ri->rie.ir_freecount = xfs_inobt_rec_freecount(&ri->rie); 191 if (xfs_inobt_check_irec(ri->sc->sa.pag, &ri->rie) != NULL) 192 return -EFSCORRUPTED; 193 194 if (ri->rie.ir_freecount > 0) 195 ri->finobt_recs++; 196 197 trace_xrep_ibt_found(ri->sc->mp, ri->sc->sa.pag->pag_agno, &ri->rie); 198 199 error = xfarray_append(ri->inode_records, &ri->rie); 200 if (error) 201 return error; 202 203 ri->rie.ir_startino = NULLAGINO; 204 return 0; 205 } 206 207 /* 208 * Given an extent of inodes and an inode cluster buffer, calculate the 209 * location of the corresponding inobt record (creating it if necessary), 210 * then update the parts of the holemask and freemask of that record that 211 * correspond to the inode extent we were given. 212 * 213 * @cluster_ir_startino is the AG inode number of an inobt record that we're 214 * proposing to create for this inode cluster. If sparse inodes are enabled, 215 * we must round down to a chunk boundary to find the actual sparse record. 216 * @cluster_bp is the buffer of the inode cluster. 217 * @nr_inodes is the number of inodes to check from the cluster. 218 */ 219 STATIC int 220 xrep_ibt_cluster_record( 221 struct xrep_ibt *ri, 222 xfs_agino_t cluster_ir_startino, 223 struct xfs_buf *cluster_bp, 224 unsigned int nr_inodes) 225 { 226 struct xfs_scrub *sc = ri->sc; 227 struct xfs_mount *mp = sc->mp; 228 xfs_agino_t ir_startino; 229 unsigned int cluster_base; 230 unsigned int cluster_index; 231 int error = 0; 232 233 ir_startino = cluster_ir_startino; 234 if (xfs_has_sparseinodes(mp)) 235 ir_startino = rounddown(ir_startino, XFS_INODES_PER_CHUNK); 236 cluster_base = cluster_ir_startino - ir_startino; 237 238 /* 239 * If the accumulated inobt record doesn't map this cluster, add it to 240 * the list and reset it. 241 */ 242 if (ri->rie.ir_startino != NULLAGINO && 243 ri->rie.ir_startino + XFS_INODES_PER_CHUNK <= ir_startino) { 244 error = xrep_ibt_stash(ri); 245 if (error) 246 return error; 247 } 248 249 if (ri->rie.ir_startino == NULLAGINO) { 250 ri->rie.ir_startino = ir_startino; 251 ri->rie.ir_free = XFS_INOBT_ALL_FREE; 252 ri->rie.ir_holemask = 0xFFFF; 253 ri->rie.ir_count = 0; 254 } 255 256 /* Record the whole cluster. */ 257 ri->icount += nr_inodes; 258 ri->rie.ir_count += nr_inodes; 259 ri->rie.ir_holemask &= ~xfs_inobt_maskn( 260 cluster_base / XFS_INODES_PER_HOLEMASK_BIT, 261 nr_inodes / XFS_INODES_PER_HOLEMASK_BIT); 262 263 /* Which inodes within this cluster are free? */ 264 for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) { 265 bool inuse = false; 266 267 error = xrep_ibt_check_ifree(ri, cluster_ir_startino, 268 cluster_bp, cluster_index, &inuse); 269 if (error) 270 return error; 271 if (!inuse) 272 continue; 273 ri->iused++; 274 ri->rie.ir_free &= ~XFS_INOBT_MASK(cluster_base + 275 cluster_index); 276 } 277 return 0; 278 } 279 280 /* 281 * For each inode cluster covering the physical extent recorded by the rmapbt, 282 * we must calculate the properly aligned startino of that cluster, then 283 * iterate each cluster to fill in used and filled masks appropriately. We 284 * then use the (startino, used, filled) information to construct the 285 * appropriate inode records. 286 */ 287 STATIC int 288 xrep_ibt_process_cluster( 289 struct xrep_ibt *ri, 290 xfs_agblock_t cluster_bno) 291 { 292 struct xfs_imap imap; 293 struct xfs_buf *cluster_bp; 294 struct xfs_scrub *sc = ri->sc; 295 struct xfs_mount *mp = sc->mp; 296 struct xfs_ino_geometry *igeo = M_IGEO(mp); 297 xfs_agino_t cluster_ag_base; 298 xfs_agino_t irec_index; 299 unsigned int nr_inodes; 300 int error; 301 302 nr_inodes = min_t(unsigned int, igeo->inodes_per_cluster, 303 XFS_INODES_PER_CHUNK); 304 305 /* 306 * Grab the inode cluster buffer. This is safe to do with a broken 307 * inobt because imap_to_bp directly maps the buffer without touching 308 * either inode btree. 309 */ 310 imap.im_blkno = XFS_AGB_TO_DADDR(mp, sc->sa.pag->pag_agno, cluster_bno); 311 imap.im_len = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster); 312 imap.im_boffset = 0; 313 error = xfs_imap_to_bp(mp, sc->tp, &imap, &cluster_bp); 314 if (error) 315 return error; 316 317 /* 318 * Record the contents of each possible inobt record mapping this 319 * cluster. 320 */ 321 cluster_ag_base = XFS_AGB_TO_AGINO(mp, cluster_bno); 322 for (irec_index = 0; 323 irec_index < igeo->inodes_per_cluster; 324 irec_index += XFS_INODES_PER_CHUNK) { 325 error = xrep_ibt_cluster_record(ri, 326 cluster_ag_base + irec_index, cluster_bp, 327 nr_inodes); 328 if (error) 329 break; 330 331 } 332 333 xfs_trans_brelse(sc->tp, cluster_bp); 334 return error; 335 } 336 337 /* Check for any obvious conflicts in the inode chunk extent. */ 338 STATIC int 339 xrep_ibt_check_inode_ext( 340 struct xfs_scrub *sc, 341 xfs_agblock_t agbno, 342 xfs_extlen_t len) 343 { 344 struct xfs_mount *mp = sc->mp; 345 struct xfs_ino_geometry *igeo = M_IGEO(mp); 346 xfs_agino_t agino; 347 enum xbtree_recpacking outcome; 348 int error; 349 350 /* Inode records must be within the AG. */ 351 if (!xfs_verify_agbext(sc->sa.pag, agbno, len)) 352 return -EFSCORRUPTED; 353 354 /* The entire record must align to the inode cluster size. */ 355 if (!IS_ALIGNED(agbno, igeo->blocks_per_cluster) || 356 !IS_ALIGNED(agbno + len, igeo->blocks_per_cluster)) 357 return -EFSCORRUPTED; 358 359 /* 360 * The entire record must also adhere to the inode cluster alignment 361 * size if sparse inodes are not enabled. 362 */ 363 if (!xfs_has_sparseinodes(mp) && 364 (!IS_ALIGNED(agbno, igeo->cluster_align) || 365 !IS_ALIGNED(agbno + len, igeo->cluster_align))) 366 return -EFSCORRUPTED; 367 368 /* 369 * On a sparse inode fs, this cluster could be part of a sparse chunk. 370 * Sparse clusters must be aligned to sparse chunk alignment. 371 */ 372 if (xfs_has_sparseinodes(mp) && mp->m_sb.sb_spino_align && 373 (!IS_ALIGNED(agbno, mp->m_sb.sb_spino_align) || 374 !IS_ALIGNED(agbno + len, mp->m_sb.sb_spino_align))) 375 return -EFSCORRUPTED; 376 377 /* Make sure the entire range of blocks are valid AG inodes. */ 378 agino = XFS_AGB_TO_AGINO(mp, agbno); 379 if (!xfs_verify_agino(sc->sa.pag, agino)) 380 return -EFSCORRUPTED; 381 382 agino = XFS_AGB_TO_AGINO(mp, agbno + len) - 1; 383 if (!xfs_verify_agino(sc->sa.pag, agino)) 384 return -EFSCORRUPTED; 385 386 /* Make sure this isn't free space. */ 387 error = xfs_alloc_has_records(sc->sa.bno_cur, agbno, len, &outcome); 388 if (error) 389 return error; 390 if (outcome != XBTREE_RECPACKING_EMPTY) 391 return -EFSCORRUPTED; 392 393 return 0; 394 } 395 396 /* Found a fragment of the old inode btrees; dispose of them later. */ 397 STATIC int 398 xrep_ibt_record_old_btree_blocks( 399 struct xrep_ibt *ri, 400 const struct xfs_rmap_irec *rec) 401 { 402 if (!xfs_verify_agbext(ri->sc->sa.pag, rec->rm_startblock, 403 rec->rm_blockcount)) 404 return -EFSCORRUPTED; 405 406 return xagb_bitmap_set(&ri->old_iallocbt_blocks, rec->rm_startblock, 407 rec->rm_blockcount); 408 } 409 410 /* Record extents that belong to inode cluster blocks. */ 411 STATIC int 412 xrep_ibt_record_inode_blocks( 413 struct xrep_ibt *ri, 414 const struct xfs_rmap_irec *rec) 415 { 416 struct xfs_mount *mp = ri->sc->mp; 417 struct xfs_ino_geometry *igeo = M_IGEO(mp); 418 xfs_agblock_t cluster_base; 419 int error; 420 421 error = xrep_ibt_check_inode_ext(ri->sc, rec->rm_startblock, 422 rec->rm_blockcount); 423 if (error) 424 return error; 425 426 trace_xrep_ibt_walk_rmap(mp, ri->sc->sa.pag->pag_agno, 427 rec->rm_startblock, rec->rm_blockcount, rec->rm_owner, 428 rec->rm_offset, rec->rm_flags); 429 430 /* 431 * Record the free/hole masks for each inode cluster that could be 432 * mapped by this rmap record. 433 */ 434 for (cluster_base = 0; 435 cluster_base < rec->rm_blockcount; 436 cluster_base += igeo->blocks_per_cluster) { 437 error = xrep_ibt_process_cluster(ri, 438 rec->rm_startblock + cluster_base); 439 if (error) 440 return error; 441 } 442 443 return 0; 444 } 445 446 STATIC int 447 xrep_ibt_walk_rmap( 448 struct xfs_btree_cur *cur, 449 const struct xfs_rmap_irec *rec, 450 void *priv) 451 { 452 struct xrep_ibt *ri = priv; 453 int error = 0; 454 455 if (xchk_should_terminate(ri->sc, &error)) 456 return error; 457 458 switch (rec->rm_owner) { 459 case XFS_RMAP_OWN_INOBT: 460 return xrep_ibt_record_old_btree_blocks(ri, rec); 461 case XFS_RMAP_OWN_INODES: 462 return xrep_ibt_record_inode_blocks(ri, rec); 463 } 464 return 0; 465 } 466 467 /* 468 * Iterate all reverse mappings to find the inodes (OWN_INODES) and the inode 469 * btrees (OWN_INOBT). Figure out if we have enough free space to reconstruct 470 * the inode btrees. The caller must clean up the lists if anything goes 471 * wrong. 472 */ 473 STATIC int 474 xrep_ibt_find_inodes( 475 struct xrep_ibt *ri) 476 { 477 struct xfs_scrub *sc = ri->sc; 478 int error; 479 480 ri->rie.ir_startino = NULLAGINO; 481 482 /* Collect all reverse mappings for inode blocks. */ 483 xrep_ag_btcur_init(sc, &sc->sa); 484 error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_ibt_walk_rmap, ri); 485 xchk_ag_btcur_free(&sc->sa); 486 if (error) 487 return error; 488 489 /* If we have a record ready to go, add it to the array. */ 490 if (ri->rie.ir_startino != NULLAGINO) 491 return xrep_ibt_stash(ri); 492 493 return 0; 494 } 495 496 /* Update the AGI counters. */ 497 STATIC int 498 xrep_ibt_reset_counters( 499 struct xrep_ibt *ri) 500 { 501 struct xfs_scrub *sc = ri->sc; 502 struct xfs_agi *agi = sc->sa.agi_bp->b_addr; 503 unsigned int freecount = ri->icount - ri->iused; 504 505 /* Trigger inode count recalculation */ 506 xfs_force_summary_recalc(sc->mp); 507 508 /* 509 * The AGI header contains extra information related to the inode 510 * btrees, so we must update those fields here. 511 */ 512 agi->agi_count = cpu_to_be32(ri->icount); 513 agi->agi_freecount = cpu_to_be32(freecount); 514 xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, 515 XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 516 517 /* Reinitialize with the values we just logged. */ 518 return xrep_reinit_pagi(sc); 519 } 520 521 /* Retrieve finobt data for bulk load. */ 522 STATIC int 523 xrep_fibt_get_records( 524 struct xfs_btree_cur *cur, 525 unsigned int idx, 526 struct xfs_btree_block *block, 527 unsigned int nr_wanted, 528 void *priv) 529 { 530 struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i; 531 struct xrep_ibt *ri = priv; 532 union xfs_btree_rec *block_rec; 533 unsigned int loaded; 534 int error; 535 536 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { 537 do { 538 error = xfarray_load(ri->inode_records, 539 ri->array_cur++, irec); 540 } while (error == 0 && xfs_inobt_rec_freecount(irec) == 0); 541 if (error) 542 return error; 543 544 block_rec = xfs_btree_rec_addr(cur, idx, block); 545 cur->bc_ops->init_rec_from_cur(cur, block_rec); 546 } 547 548 return loaded; 549 } 550 551 /* Retrieve inobt data for bulk load. */ 552 STATIC int 553 xrep_ibt_get_records( 554 struct xfs_btree_cur *cur, 555 unsigned int idx, 556 struct xfs_btree_block *block, 557 unsigned int nr_wanted, 558 void *priv) 559 { 560 struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i; 561 struct xrep_ibt *ri = priv; 562 union xfs_btree_rec *block_rec; 563 unsigned int loaded; 564 int error; 565 566 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { 567 error = xfarray_load(ri->inode_records, ri->array_cur++, irec); 568 if (error) 569 return error; 570 571 block_rec = xfs_btree_rec_addr(cur, idx, block); 572 cur->bc_ops->init_rec_from_cur(cur, block_rec); 573 } 574 575 return loaded; 576 } 577 578 /* Feed one of the new inobt blocks to the bulk loader. */ 579 STATIC int 580 xrep_ibt_claim_block( 581 struct xfs_btree_cur *cur, 582 union xfs_btree_ptr *ptr, 583 void *priv) 584 { 585 struct xrep_ibt *ri = priv; 586 587 return xrep_newbt_claim_block(cur, &ri->new_inobt, ptr); 588 } 589 590 /* Feed one of the new finobt blocks to the bulk loader. */ 591 STATIC int 592 xrep_fibt_claim_block( 593 struct xfs_btree_cur *cur, 594 union xfs_btree_ptr *ptr, 595 void *priv) 596 { 597 struct xrep_ibt *ri = priv; 598 599 return xrep_newbt_claim_block(cur, &ri->new_finobt, ptr); 600 } 601 602 /* Make sure the records do not overlap in inumber address space. */ 603 STATIC int 604 xrep_ibt_check_overlap( 605 struct xrep_ibt *ri) 606 { 607 struct xfs_inobt_rec_incore irec; 608 xfarray_idx_t cur; 609 xfs_agino_t next_agino = 0; 610 int error = 0; 611 612 foreach_xfarray_idx(ri->inode_records, cur) { 613 if (xchk_should_terminate(ri->sc, &error)) 614 return error; 615 616 error = xfarray_load(ri->inode_records, cur, &irec); 617 if (error) 618 return error; 619 620 if (irec.ir_startino < next_agino) 621 return -EFSCORRUPTED; 622 623 next_agino = irec.ir_startino + XFS_INODES_PER_CHUNK; 624 } 625 626 return error; 627 } 628 629 /* Build new inode btrees and dispose of the old one. */ 630 STATIC int 631 xrep_ibt_build_new_trees( 632 struct xrep_ibt *ri) 633 { 634 struct xfs_scrub *sc = ri->sc; 635 struct xfs_btree_cur *ino_cur; 636 struct xfs_btree_cur *fino_cur = NULL; 637 xfs_fsblock_t fsbno; 638 bool need_finobt; 639 int error; 640 641 need_finobt = xfs_has_finobt(sc->mp); 642 643 /* 644 * Create new btrees for staging all the inobt records we collected 645 * earlier. The records were collected in order of increasing agino, 646 * so we do not have to sort them. Ensure there are no overlapping 647 * records. 648 */ 649 error = xrep_ibt_check_overlap(ri); 650 if (error) 651 return error; 652 653 /* 654 * The new inode btrees will not be rooted in the AGI until we've 655 * successfully rebuilt the tree. 656 * 657 * Start by setting up the inobt staging cursor. 658 */ 659 fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, 660 XFS_IBT_BLOCK(sc->mp)), 661 xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, fsbno, 662 XFS_AG_RESV_NONE); 663 ri->new_inobt.bload.claim_block = xrep_ibt_claim_block; 664 ri->new_inobt.bload.get_records = xrep_ibt_get_records; 665 666 ino_cur = xfs_inobt_init_cursor(sc->sa.pag, NULL, NULL); 667 xfs_btree_stage_afakeroot(ino_cur, &ri->new_inobt.afake); 668 error = xfs_btree_bload_compute_geometry(ino_cur, &ri->new_inobt.bload, 669 xfarray_length(ri->inode_records)); 670 if (error) 671 goto err_inocur; 672 673 /* Set up finobt staging cursor. */ 674 if (need_finobt) { 675 enum xfs_ag_resv_type resv = XFS_AG_RESV_METADATA; 676 677 if (sc->mp->m_finobt_nores) 678 resv = XFS_AG_RESV_NONE; 679 680 fsbno = XFS_AGB_TO_FSB(sc->mp, sc->sa.pag->pag_agno, 681 XFS_FIBT_BLOCK(sc->mp)), 682 xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT, 683 fsbno, resv); 684 ri->new_finobt.bload.claim_block = xrep_fibt_claim_block; 685 ri->new_finobt.bload.get_records = xrep_fibt_get_records; 686 687 fino_cur = xfs_finobt_init_cursor(sc->sa.pag, NULL, NULL); 688 xfs_btree_stage_afakeroot(fino_cur, &ri->new_finobt.afake); 689 error = xfs_btree_bload_compute_geometry(fino_cur, 690 &ri->new_finobt.bload, ri->finobt_recs); 691 if (error) 692 goto err_finocur; 693 } 694 695 /* Last chance to abort before we start committing fixes. */ 696 if (xchk_should_terminate(sc, &error)) 697 goto err_finocur; 698 699 /* Reserve all the space we need to build the new btrees. */ 700 error = xrep_newbt_alloc_blocks(&ri->new_inobt, 701 ri->new_inobt.bload.nr_blocks); 702 if (error) 703 goto err_finocur; 704 705 if (need_finobt) { 706 error = xrep_newbt_alloc_blocks(&ri->new_finobt, 707 ri->new_finobt.bload.nr_blocks); 708 if (error) 709 goto err_finocur; 710 } 711 712 /* Add all inobt records. */ 713 ri->array_cur = XFARRAY_CURSOR_INIT; 714 error = xfs_btree_bload(ino_cur, &ri->new_inobt.bload, ri); 715 if (error) 716 goto err_finocur; 717 718 /* Add all finobt records. */ 719 if (need_finobt) { 720 ri->array_cur = XFARRAY_CURSOR_INIT; 721 error = xfs_btree_bload(fino_cur, &ri->new_finobt.bload, ri); 722 if (error) 723 goto err_finocur; 724 } 725 726 /* 727 * Install the new btrees in the AG header. After this point the old 728 * btrees are no longer accessible and the new trees are live. 729 */ 730 xfs_inobt_commit_staged_btree(ino_cur, sc->tp, sc->sa.agi_bp); 731 xfs_btree_del_cursor(ino_cur, 0); 732 733 if (fino_cur) { 734 xfs_inobt_commit_staged_btree(fino_cur, sc->tp, sc->sa.agi_bp); 735 xfs_btree_del_cursor(fino_cur, 0); 736 } 737 738 /* Reset the AGI counters now that we've changed the inode roots. */ 739 error = xrep_ibt_reset_counters(ri); 740 if (error) 741 goto err_finobt; 742 743 /* Free unused blocks and bitmap. */ 744 if (need_finobt) { 745 error = xrep_newbt_commit(&ri->new_finobt); 746 if (error) 747 goto err_inobt; 748 } 749 error = xrep_newbt_commit(&ri->new_inobt); 750 if (error) 751 return error; 752 753 return xrep_roll_ag_trans(sc); 754 755 err_finocur: 756 if (need_finobt) 757 xfs_btree_del_cursor(fino_cur, error); 758 err_inocur: 759 xfs_btree_del_cursor(ino_cur, error); 760 err_finobt: 761 if (need_finobt) 762 xrep_newbt_cancel(&ri->new_finobt); 763 err_inobt: 764 xrep_newbt_cancel(&ri->new_inobt); 765 return error; 766 } 767 768 /* 769 * Now that we've logged the roots of the new btrees, invalidate all of the 770 * old blocks and free them. 771 */ 772 STATIC int 773 xrep_ibt_remove_old_trees( 774 struct xrep_ibt *ri) 775 { 776 struct xfs_scrub *sc = ri->sc; 777 int error; 778 779 /* 780 * Free the old inode btree blocks if they're not in use. It's ok to 781 * reap with XFS_AG_RESV_NONE even if the finobt had a per-AG 782 * reservation because we reset the reservation before releasing the 783 * AGI and AGF header buffer locks. 784 */ 785 error = xrep_reap_agblocks(sc, &ri->old_iallocbt_blocks, 786 &XFS_RMAP_OINFO_INOBT, XFS_AG_RESV_NONE); 787 if (error) 788 return error; 789 790 /* 791 * If the finobt is enabled and has a per-AG reservation, make sure we 792 * reinitialize the per-AG reservations. 793 */ 794 if (xfs_has_finobt(sc->mp) && !sc->mp->m_finobt_nores) 795 sc->flags |= XREP_RESET_PERAG_RESV; 796 797 return 0; 798 } 799 800 /* Repair both inode btrees. */ 801 int 802 xrep_iallocbt( 803 struct xfs_scrub *sc) 804 { 805 struct xrep_ibt *ri; 806 struct xfs_mount *mp = sc->mp; 807 char *descr; 808 xfs_agino_t first_agino, last_agino; 809 int error = 0; 810 811 /* We require the rmapbt to rebuild anything. */ 812 if (!xfs_has_rmapbt(mp)) 813 return -EOPNOTSUPP; 814 815 ri = kzalloc(sizeof(struct xrep_ibt), XCHK_GFP_FLAGS); 816 if (!ri) 817 return -ENOMEM; 818 ri->sc = sc; 819 820 /* We rebuild both inode btrees. */ 821 sc->sick_mask = XFS_SICK_AG_INOBT | XFS_SICK_AG_FINOBT; 822 823 /* Set up enough storage to handle an AG with nothing but inodes. */ 824 xfs_agino_range(mp, sc->sa.pag->pag_agno, &first_agino, &last_agino); 825 last_agino /= XFS_INODES_PER_CHUNK; 826 descr = xchk_xfile_ag_descr(sc, "inode index records"); 827 error = xfarray_create(descr, last_agino, 828 sizeof(struct xfs_inobt_rec_incore), 829 &ri->inode_records); 830 kfree(descr); 831 if (error) 832 goto out_ri; 833 834 /* Collect the inode data and find the old btree blocks. */ 835 xagb_bitmap_init(&ri->old_iallocbt_blocks); 836 error = xrep_ibt_find_inodes(ri); 837 if (error) 838 goto out_bitmap; 839 840 /* Rebuild the inode indexes. */ 841 error = xrep_ibt_build_new_trees(ri); 842 if (error) 843 goto out_bitmap; 844 845 /* Kill the old tree. */ 846 error = xrep_ibt_remove_old_trees(ri); 847 if (error) 848 goto out_bitmap; 849 850 out_bitmap: 851 xagb_bitmap_destroy(&ri->old_iallocbt_blocks); 852 xfarray_destroy(ri->inode_records); 853 out_ri: 854 kfree(ri); 855 return error; 856 } 857 858 /* Make sure both btrees are ok after we've rebuilt them. */ 859 int 860 xrep_revalidate_iallocbt( 861 struct xfs_scrub *sc) 862 { 863 __u32 old_type = sc->sm->sm_type; 864 int error; 865 866 /* 867 * We must update sm_type temporarily so that the tree-to-tree cross 868 * reference checks will work in the correct direction, and also so 869 * that tracing will report correctly if there are more errors. 870 */ 871 sc->sm->sm_type = XFS_SCRUB_TYPE_INOBT; 872 error = xchk_iallocbt(sc); 873 if (error) 874 goto out; 875 876 if (xfs_has_finobt(sc->mp)) { 877 sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT; 878 error = xchk_iallocbt(sc); 879 } 880 881 out: 882 sc->sm->sm_type = old_type; 883 return error; 884 } 885