1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs_platform.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_btree_staging.h" 15 #include "xfs_bit.h" 16 #include "xfs_log_format.h" 17 #include "xfs_trans.h" 18 #include "xfs_sb.h" 19 #include "xfs_inode.h" 20 #include "xfs_alloc.h" 21 #include "xfs_ialloc.h" 22 #include "xfs_ialloc_btree.h" 23 #include "xfs_icache.h" 24 #include "xfs_rmap.h" 25 #include "xfs_rmap_btree.h" 26 #include "xfs_log.h" 27 #include "xfs_trans_priv.h" 28 #include "xfs_error.h" 29 #include "xfs_health.h" 30 #include "xfs_ag.h" 31 #include "scrub/xfs_scrub.h" 32 #include "scrub/scrub.h" 33 #include "scrub/common.h" 34 #include "scrub/btree.h" 35 #include "scrub/trace.h" 36 #include "scrub/repair.h" 37 #include "scrub/bitmap.h" 38 #include "scrub/agb_bitmap.h" 39 #include "scrub/xfile.h" 40 #include "scrub/xfarray.h" 41 #include "scrub/newbt.h" 42 #include "scrub/reap.h" 43 44 /* 45 * Inode Btree Repair 46 * ================== 47 * 48 * A quick refresher of inode btrees on a v5 filesystem: 49 * 50 * - Inode records are read into memory in units of 'inode clusters'. However 51 * many inodes fit in a cluster buffer is the smallest number of inodes that 52 * can be allocated or freed. Clusters are never smaller than one fs block 53 * though they can span multiple blocks. The size (in fs blocks) is 54 * computed with xfs_icluster_size_fsb(). The fs block alignment of a 55 * cluster is computed with xfs_ialloc_cluster_alignment(). 56 * 57 * - Each inode btree record can describe a single 'inode chunk'. The chunk 58 * size is defined to be 64 inodes. If sparse inodes are enabled, every 59 * inobt record must be aligned to the chunk size; if not, every record must 60 * be aligned to the start of a cluster. It is possible to construct an XFS 61 * geometry where one inobt record maps to multiple inode clusters; it is 62 * also possible to construct a geometry where multiple inobt records map to 63 * different parts of one inode cluster. 64 * 65 * - If sparse inodes are not enabled, the smallest unit of allocation for 66 * inode records is enough to contain one inode chunk's worth of inodes. 67 * 68 * - If sparse inodes are enabled, the holemask field will be active. Each 69 * bit of the holemask represents 4 potential inodes; if set, the 70 * corresponding space does *not* contain inodes and must be left alone. 71 * Clusters cannot be smaller than 4 inodes. The smallest unit of allocation 72 * of inode records is one inode cluster. 73 * 74 * So what's the rebuild algorithm? 75 * 76 * Iterate the reverse mapping records looking for OWN_INODES and OWN_INOBT 77 * records. The OWN_INOBT records are the old inode btree blocks and will be 78 * cleared out after we've rebuilt the tree. Each possible inode cluster 79 * within an OWN_INODES record will be read in; for each possible inobt record 80 * associated with that cluster, compute the freemask calculated from the 81 * i_mode data in the inode chunk. For sparse inodes the holemask will be 82 * calculated by creating the properly aligned inobt record and punching out 83 * any chunk that's missing. Inode allocations and frees grab the AGI first, 84 * so repair protects itself from concurrent access by locking the AGI. 85 * 86 * Once we've reconstructed all the inode records, we can create new inode 87 * btree roots and reload the btrees. We rebuild both inode trees at the same 88 * time because they have the same rmap owner and it would be more complex to 89 * figure out if the other tree isn't in need of a rebuild and which OWN_INOBT 90 * blocks it owns. We have all the data we need to build both, so dump 91 * everything and start over. 92 * 93 * We use the prefix 'xrep_ibt' because we rebuild both inode btrees at once. 94 */ 95 96 struct xrep_ibt { 97 /* Record under construction. */ 98 struct xfs_inobt_rec_incore rie; 99 100 /* new inobt information */ 101 struct xrep_newbt new_inobt; 102 103 /* new finobt information */ 104 struct xrep_newbt new_finobt; 105 106 /* Old inode btree blocks we found in the rmap. */ 107 struct xagb_bitmap old_iallocbt_blocks; 108 109 /* Reconstructed inode records. */ 110 struct xfarray *inode_records; 111 112 struct xfs_scrub *sc; 113 114 /* Number of inodes assigned disk space. */ 115 unsigned int icount; 116 117 /* Number of inodes in use. */ 118 unsigned int iused; 119 120 /* Number of finobt records needed. */ 121 unsigned int finobt_recs; 122 123 /* get_records()'s position in the inode record array. */ 124 xfarray_idx_t array_cur; 125 }; 126 127 /* 128 * Is this inode in use? If the inode is in memory we can tell from i_mode, 129 * otherwise we have to check di_mode in the on-disk buffer. We only care 130 * that the high (i.e. non-permission) bits of _mode are zero. This should be 131 * safe because repair keeps all AG headers locked until the end, and process 132 * trying to perform an inode allocation/free must lock the AGI. 133 * 134 * @cluster_ag_base is the inode offset of the cluster within the AG. 135 * @cluster_bp is the cluster buffer. 136 * @cluster_index is the inode offset within the inode cluster. 137 */ 138 STATIC int 139 xrep_ibt_check_ifree( 140 struct xrep_ibt *ri, 141 xfs_agino_t cluster_ag_base, 142 struct xfs_buf *cluster_bp, 143 unsigned int cluster_index, 144 bool *inuse) 145 { 146 struct xfs_scrub *sc = ri->sc; 147 struct xfs_mount *mp = sc->mp; 148 struct xfs_dinode *dip; 149 xfs_agino_t agino; 150 unsigned int cluster_buf_base; 151 unsigned int offset; 152 int error; 153 154 agino = cluster_ag_base + cluster_index; 155 156 /* Inode uncached or half assembled, read disk buffer */ 157 cluster_buf_base = XFS_INO_TO_OFFSET(mp, cluster_ag_base); 158 offset = (cluster_buf_base + cluster_index) * mp->m_sb.sb_inodesize; 159 if (offset >= BBTOB(cluster_bp->b_length)) 160 return -EFSCORRUPTED; 161 dip = xfs_buf_offset(cluster_bp, offset); 162 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) 163 return -EFSCORRUPTED; 164 165 if (dip->di_version >= 3 && 166 be64_to_cpu(dip->di_ino) != xfs_agino_to_ino(ri->sc->sa.pag, agino)) 167 return -EFSCORRUPTED; 168 169 /* Will the in-core inode tell us if it's in use? */ 170 error = xchk_inode_is_allocated(sc, agino, inuse); 171 if (!error) 172 return 0; 173 174 *inuse = dip->di_mode != 0; 175 return 0; 176 } 177 178 /* Stash the accumulated inobt record for rebuilding. */ 179 STATIC int 180 xrep_ibt_stash( 181 struct xrep_ibt *ri) 182 { 183 int error = 0; 184 185 if (xchk_should_terminate(ri->sc, &error)) 186 return error; 187 188 ri->rie.ir_freecount = xfs_inobt_rec_freecount(&ri->rie); 189 if (xfs_inobt_check_irec(ri->sc->sa.pag, &ri->rie) != NULL) 190 return -EFSCORRUPTED; 191 192 if (ri->rie.ir_freecount > 0) 193 ri->finobt_recs++; 194 195 trace_xrep_ibt_found(ri->sc->sa.pag, &ri->rie); 196 197 error = xfarray_append(ri->inode_records, &ri->rie); 198 if (error) 199 return error; 200 201 ri->rie.ir_startino = NULLAGINO; 202 return 0; 203 } 204 205 /* 206 * Given an extent of inodes and an inode cluster buffer, calculate the 207 * location of the corresponding inobt record (creating it if necessary), 208 * then update the parts of the holemask and freemask of that record that 209 * correspond to the inode extent we were given. 210 * 211 * @cluster_ir_startino is the AG inode number of an inobt record that we're 212 * proposing to create for this inode cluster. If sparse inodes are enabled, 213 * we must round down to a chunk boundary to find the actual sparse record. 214 * @cluster_bp is the buffer of the inode cluster. 215 * @nr_inodes is the number of inodes to check from the cluster. 216 */ 217 STATIC int 218 xrep_ibt_cluster_record( 219 struct xrep_ibt *ri, 220 xfs_agino_t cluster_ir_startino, 221 struct xfs_buf *cluster_bp, 222 unsigned int nr_inodes) 223 { 224 struct xfs_scrub *sc = ri->sc; 225 struct xfs_mount *mp = sc->mp; 226 xfs_agino_t ir_startino; 227 unsigned int cluster_base; 228 unsigned int cluster_index; 229 int error = 0; 230 231 ir_startino = cluster_ir_startino; 232 if (xfs_has_sparseinodes(mp)) 233 ir_startino = rounddown(ir_startino, XFS_INODES_PER_CHUNK); 234 cluster_base = cluster_ir_startino - ir_startino; 235 236 /* 237 * If the accumulated inobt record doesn't map this cluster, add it to 238 * the list and reset it. 239 */ 240 if (ri->rie.ir_startino != NULLAGINO && 241 ri->rie.ir_startino + XFS_INODES_PER_CHUNK <= ir_startino) { 242 error = xrep_ibt_stash(ri); 243 if (error) 244 return error; 245 } 246 247 if (ri->rie.ir_startino == NULLAGINO) { 248 ri->rie.ir_startino = ir_startino; 249 ri->rie.ir_free = XFS_INOBT_ALL_FREE; 250 ri->rie.ir_holemask = 0xFFFF; 251 ri->rie.ir_count = 0; 252 } 253 254 /* Record the whole cluster. */ 255 ri->icount += nr_inodes; 256 ri->rie.ir_count += nr_inodes; 257 ri->rie.ir_holemask &= ~xfs_inobt_maskn( 258 cluster_base / XFS_INODES_PER_HOLEMASK_BIT, 259 nr_inodes / XFS_INODES_PER_HOLEMASK_BIT); 260 261 /* Which inodes within this cluster are free? */ 262 for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) { 263 bool inuse = false; 264 265 error = xrep_ibt_check_ifree(ri, cluster_ir_startino, 266 cluster_bp, cluster_index, &inuse); 267 if (error) 268 return error; 269 if (!inuse) 270 continue; 271 ri->iused++; 272 ri->rie.ir_free &= ~XFS_INOBT_MASK(cluster_base + 273 cluster_index); 274 } 275 return 0; 276 } 277 278 /* 279 * For each inode cluster covering the physical extent recorded by the rmapbt, 280 * we must calculate the properly aligned startino of that cluster, then 281 * iterate each cluster to fill in used and filled masks appropriately. We 282 * then use the (startino, used, filled) information to construct the 283 * appropriate inode records. 284 */ 285 STATIC int 286 xrep_ibt_process_cluster( 287 struct xrep_ibt *ri, 288 xfs_agblock_t cluster_bno) 289 { 290 struct xfs_buf *cluster_bp; 291 struct xfs_scrub *sc = ri->sc; 292 struct xfs_mount *mp = sc->mp; 293 struct xfs_ino_geometry *igeo = M_IGEO(mp); 294 xfs_agino_t cluster_ag_base; 295 xfs_agino_t irec_index; 296 unsigned int nr_inodes; 297 int error; 298 299 nr_inodes = min_t(unsigned int, igeo->inodes_per_cluster, 300 XFS_INODES_PER_CHUNK); 301 302 /* 303 * Grab the inode cluster buffer. This is safe to do with a broken 304 * inobt because imap_to_bp directly maps the buffer without touching 305 * either inode btree. 306 */ 307 error = xfs_read_icluster(sc->sa.pag, sc->tp, cluster_bno, 308 &cluster_bp); 309 if (error) 310 return error; 311 312 /* 313 * Record the contents of each possible inobt record mapping this 314 * cluster. 315 */ 316 cluster_ag_base = XFS_AGB_TO_AGINO(mp, cluster_bno); 317 for (irec_index = 0; 318 irec_index < igeo->inodes_per_cluster; 319 irec_index += XFS_INODES_PER_CHUNK) { 320 error = xrep_ibt_cluster_record(ri, 321 cluster_ag_base + irec_index, cluster_bp, 322 nr_inodes); 323 if (error) 324 break; 325 326 } 327 328 xfs_trans_brelse(sc->tp, cluster_bp); 329 return error; 330 } 331 332 /* Check for any obvious conflicts in the inode chunk extent. */ 333 STATIC int 334 xrep_ibt_check_inode_ext( 335 struct xfs_scrub *sc, 336 xfs_agblock_t agbno, 337 xfs_extlen_t len) 338 { 339 struct xfs_mount *mp = sc->mp; 340 struct xfs_ino_geometry *igeo = M_IGEO(mp); 341 xfs_agino_t agino; 342 enum xbtree_recpacking outcome; 343 int error; 344 345 /* Inode records must be within the AG. */ 346 if (!xfs_verify_agbext(sc->sa.pag, agbno, len)) 347 return -EFSCORRUPTED; 348 349 /* The entire record must align to the inode cluster size. */ 350 if (!IS_ALIGNED(agbno, igeo->blocks_per_cluster) || 351 !IS_ALIGNED(agbno + len, igeo->blocks_per_cluster)) 352 return -EFSCORRUPTED; 353 354 /* 355 * The entire record must also adhere to the inode cluster alignment 356 * size if sparse inodes are not enabled. 357 */ 358 if (!xfs_has_sparseinodes(mp) && 359 (!IS_ALIGNED(agbno, igeo->cluster_align) || 360 !IS_ALIGNED(agbno + len, igeo->cluster_align))) 361 return -EFSCORRUPTED; 362 363 /* 364 * On a sparse inode fs, this cluster could be part of a sparse chunk. 365 * Sparse clusters must be aligned to sparse chunk alignment. 366 */ 367 if (xfs_has_sparseinodes(mp) && mp->m_sb.sb_spino_align && 368 (!IS_ALIGNED(agbno, mp->m_sb.sb_spino_align) || 369 !IS_ALIGNED(agbno + len, mp->m_sb.sb_spino_align))) 370 return -EFSCORRUPTED; 371 372 /* Make sure the entire range of blocks are valid AG inodes. */ 373 agino = XFS_AGB_TO_AGINO(mp, agbno); 374 if (!xfs_verify_agino(sc->sa.pag, agino)) 375 return -EFSCORRUPTED; 376 377 agino = XFS_AGB_TO_AGINO(mp, agbno + len) - 1; 378 if (!xfs_verify_agino(sc->sa.pag, agino)) 379 return -EFSCORRUPTED; 380 381 /* Make sure this isn't free space. */ 382 error = xfs_alloc_has_records(sc->sa.bno_cur, agbno, len, &outcome); 383 if (error) 384 return error; 385 if (outcome != XBTREE_RECPACKING_EMPTY) 386 return -EFSCORRUPTED; 387 388 return 0; 389 } 390 391 /* Found a fragment of the old inode btrees; dispose of them later. */ 392 STATIC int 393 xrep_ibt_record_old_btree_blocks( 394 struct xrep_ibt *ri, 395 const struct xfs_rmap_irec *rec) 396 { 397 if (!xfs_verify_agbext(ri->sc->sa.pag, rec->rm_startblock, 398 rec->rm_blockcount)) 399 return -EFSCORRUPTED; 400 401 return xagb_bitmap_set(&ri->old_iallocbt_blocks, rec->rm_startblock, 402 rec->rm_blockcount); 403 } 404 405 /* Record extents that belong to inode cluster blocks. */ 406 STATIC int 407 xrep_ibt_record_inode_blocks( 408 struct xrep_ibt *ri, 409 const struct xfs_rmap_irec *rec) 410 { 411 struct xfs_mount *mp = ri->sc->mp; 412 struct xfs_ino_geometry *igeo = M_IGEO(mp); 413 xfs_agblock_t cluster_base; 414 int error; 415 416 error = xrep_ibt_check_inode_ext(ri->sc, rec->rm_startblock, 417 rec->rm_blockcount); 418 if (error) 419 return error; 420 421 trace_xrep_ibt_walk_rmap(ri->sc->sa.pag, rec); 422 423 /* 424 * Record the free/hole masks for each inode cluster that could be 425 * mapped by this rmap record. 426 */ 427 for (cluster_base = 0; 428 cluster_base < rec->rm_blockcount; 429 cluster_base += igeo->blocks_per_cluster) { 430 error = xrep_ibt_process_cluster(ri, 431 rec->rm_startblock + cluster_base); 432 if (error) 433 return error; 434 } 435 436 return 0; 437 } 438 439 STATIC int 440 xrep_ibt_walk_rmap( 441 struct xfs_btree_cur *cur, 442 const struct xfs_rmap_irec *rec, 443 void *priv) 444 { 445 struct xrep_ibt *ri = priv; 446 int error = 0; 447 448 if (xchk_should_terminate(ri->sc, &error)) 449 return error; 450 451 switch (rec->rm_owner) { 452 case XFS_RMAP_OWN_INOBT: 453 return xrep_ibt_record_old_btree_blocks(ri, rec); 454 case XFS_RMAP_OWN_INODES: 455 return xrep_ibt_record_inode_blocks(ri, rec); 456 } 457 return 0; 458 } 459 460 /* 461 * Iterate all reverse mappings to find the inodes (OWN_INODES) and the inode 462 * btrees (OWN_INOBT). Figure out if we have enough free space to reconstruct 463 * the inode btrees. The caller must clean up the lists if anything goes 464 * wrong. 465 */ 466 STATIC int 467 xrep_ibt_find_inodes( 468 struct xrep_ibt *ri) 469 { 470 struct xfs_scrub *sc = ri->sc; 471 int error; 472 473 ri->rie.ir_startino = NULLAGINO; 474 475 /* Collect all reverse mappings for inode blocks. */ 476 xrep_ag_btcur_init(sc, &sc->sa); 477 error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_ibt_walk_rmap, ri); 478 xchk_ag_btcur_free(&sc->sa); 479 if (error) 480 return error; 481 482 /* If we have a record ready to go, add it to the array. */ 483 if (ri->rie.ir_startino != NULLAGINO) 484 return xrep_ibt_stash(ri); 485 486 return 0; 487 } 488 489 /* Update the AGI counters. */ 490 STATIC int 491 xrep_ibt_reset_counters( 492 struct xrep_ibt *ri) 493 { 494 struct xfs_scrub *sc = ri->sc; 495 struct xfs_agi *agi = sc->sa.agi_bp->b_addr; 496 unsigned int freecount = ri->icount - ri->iused; 497 498 /* Trigger inode count recalculation */ 499 xfs_force_summary_recalc(sc->mp); 500 501 /* 502 * The AGI header contains extra information related to the inode 503 * btrees, so we must update those fields here. 504 */ 505 agi->agi_count = cpu_to_be32(ri->icount); 506 agi->agi_freecount = cpu_to_be32(freecount); 507 xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, 508 XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 509 510 /* Reinitialize with the values we just logged. */ 511 return xrep_reinit_pagi(sc); 512 } 513 514 /* Retrieve finobt data for bulk load. */ 515 STATIC int 516 xrep_fibt_get_records( 517 struct xfs_btree_cur *cur, 518 unsigned int idx, 519 struct xfs_btree_block *block, 520 unsigned int nr_wanted, 521 void *priv) 522 { 523 struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i; 524 struct xrep_ibt *ri = priv; 525 union xfs_btree_rec *block_rec; 526 unsigned int loaded; 527 int error; 528 529 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { 530 do { 531 error = xfarray_load(ri->inode_records, 532 ri->array_cur++, irec); 533 } while (error == 0 && xfs_inobt_rec_freecount(irec) == 0); 534 if (error) 535 return error; 536 537 block_rec = xfs_btree_rec_addr(cur, idx, block); 538 cur->bc_ops->init_rec_from_cur(cur, block_rec); 539 } 540 541 return loaded; 542 } 543 544 /* Retrieve inobt data for bulk load. */ 545 STATIC int 546 xrep_ibt_get_records( 547 struct xfs_btree_cur *cur, 548 unsigned int idx, 549 struct xfs_btree_block *block, 550 unsigned int nr_wanted, 551 void *priv) 552 { 553 struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i; 554 struct xrep_ibt *ri = priv; 555 union xfs_btree_rec *block_rec; 556 unsigned int loaded; 557 int error; 558 559 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { 560 error = xfarray_load(ri->inode_records, ri->array_cur++, irec); 561 if (error) 562 return error; 563 564 block_rec = xfs_btree_rec_addr(cur, idx, block); 565 cur->bc_ops->init_rec_from_cur(cur, block_rec); 566 } 567 568 return loaded; 569 } 570 571 /* Feed one of the new inobt blocks to the bulk loader. */ 572 STATIC int 573 xrep_ibt_claim_block( 574 struct xfs_btree_cur *cur, 575 union xfs_btree_ptr *ptr, 576 void *priv) 577 { 578 struct xrep_ibt *ri = priv; 579 580 return xrep_newbt_claim_block(cur, &ri->new_inobt, ptr); 581 } 582 583 /* Feed one of the new finobt blocks to the bulk loader. */ 584 STATIC int 585 xrep_fibt_claim_block( 586 struct xfs_btree_cur *cur, 587 union xfs_btree_ptr *ptr, 588 void *priv) 589 { 590 struct xrep_ibt *ri = priv; 591 592 return xrep_newbt_claim_block(cur, &ri->new_finobt, ptr); 593 } 594 595 /* Make sure the records do not overlap in inumber address space. */ 596 STATIC int 597 xrep_ibt_check_overlap( 598 struct xrep_ibt *ri) 599 { 600 struct xfs_inobt_rec_incore irec; 601 xfarray_idx_t cur; 602 xfs_agino_t next_agino = 0; 603 int error = 0; 604 605 foreach_xfarray_idx(ri->inode_records, cur) { 606 if (xchk_should_terminate(ri->sc, &error)) 607 return error; 608 609 error = xfarray_load(ri->inode_records, cur, &irec); 610 if (error) 611 return error; 612 613 if (irec.ir_startino < next_agino) 614 return -EFSCORRUPTED; 615 616 next_agino = irec.ir_startino + XFS_INODES_PER_CHUNK; 617 } 618 619 return error; 620 } 621 622 /* Build new inode btrees and dispose of the old one. */ 623 STATIC int 624 xrep_ibt_build_new_trees( 625 struct xrep_ibt *ri) 626 { 627 struct xfs_scrub *sc = ri->sc; 628 struct xfs_btree_cur *ino_cur; 629 struct xfs_btree_cur *fino_cur = NULL; 630 bool need_finobt; 631 int error; 632 633 need_finobt = xfs_has_finobt(sc->mp); 634 635 /* 636 * Create new btrees for staging all the inobt records we collected 637 * earlier. The records were collected in order of increasing agino, 638 * so we do not have to sort them. Ensure there are no overlapping 639 * records. 640 */ 641 error = xrep_ibt_check_overlap(ri); 642 if (error) 643 return error; 644 645 /* 646 * The new inode btrees will not be rooted in the AGI until we've 647 * successfully rebuilt the tree. 648 * 649 * Start by setting up the inobt staging cursor. 650 */ 651 xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, 652 xfs_agbno_to_fsb(sc->sa.pag, XFS_IBT_BLOCK(sc->mp)), 653 XFS_AG_RESV_NONE); 654 ri->new_inobt.bload.claim_block = xrep_ibt_claim_block; 655 ri->new_inobt.bload.get_records = xrep_ibt_get_records; 656 657 ino_cur = xfs_inobt_init_cursor(sc->sa.pag, NULL, NULL); 658 xfs_btree_stage_afakeroot(ino_cur, &ri->new_inobt.afake); 659 error = xfs_btree_bload_compute_geometry(ino_cur, &ri->new_inobt.bload, 660 xfarray_length(ri->inode_records)); 661 if (error) 662 goto err_inocur; 663 664 /* Set up finobt staging cursor. */ 665 if (need_finobt) { 666 enum xfs_ag_resv_type resv = XFS_AG_RESV_METADATA; 667 668 if (sc->mp->m_finobt_nores) 669 resv = XFS_AG_RESV_NONE; 670 671 xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT, 672 xfs_agbno_to_fsb(sc->sa.pag, XFS_FIBT_BLOCK(sc->mp)), 673 resv); 674 ri->new_finobt.bload.claim_block = xrep_fibt_claim_block; 675 ri->new_finobt.bload.get_records = xrep_fibt_get_records; 676 677 fino_cur = xfs_finobt_init_cursor(sc->sa.pag, NULL, NULL); 678 xfs_btree_stage_afakeroot(fino_cur, &ri->new_finobt.afake); 679 error = xfs_btree_bload_compute_geometry(fino_cur, 680 &ri->new_finobt.bload, ri->finobt_recs); 681 if (error) 682 goto err_finocur; 683 } 684 685 /* Last chance to abort before we start committing fixes. */ 686 if (xchk_should_terminate(sc, &error)) 687 goto err_finocur; 688 689 /* Reserve all the space we need to build the new btrees. */ 690 error = xrep_newbt_alloc_blocks(&ri->new_inobt, 691 ri->new_inobt.bload.nr_blocks); 692 if (error) 693 goto err_finocur; 694 695 if (need_finobt) { 696 error = xrep_newbt_alloc_blocks(&ri->new_finobt, 697 ri->new_finobt.bload.nr_blocks); 698 if (error) 699 goto err_finocur; 700 } 701 702 /* Add all inobt records. */ 703 ri->array_cur = XFARRAY_CURSOR_INIT; 704 error = xfs_btree_bload(ino_cur, &ri->new_inobt.bload, ri); 705 if (error) 706 goto err_finocur; 707 708 /* Add all finobt records. */ 709 if (need_finobt) { 710 ri->array_cur = XFARRAY_CURSOR_INIT; 711 error = xfs_btree_bload(fino_cur, &ri->new_finobt.bload, ri); 712 if (error) 713 goto err_finocur; 714 } 715 716 /* 717 * Install the new btrees in the AG header. After this point the old 718 * btrees are no longer accessible and the new trees are live. 719 */ 720 xfs_inobt_commit_staged_btree(ino_cur, sc->tp, sc->sa.agi_bp); 721 xfs_btree_del_cursor(ino_cur, 0); 722 723 if (fino_cur) { 724 xfs_inobt_commit_staged_btree(fino_cur, sc->tp, sc->sa.agi_bp); 725 xfs_btree_del_cursor(fino_cur, 0); 726 } 727 728 /* Reset the AGI counters now that we've changed the inode roots. */ 729 error = xrep_ibt_reset_counters(ri); 730 if (error) 731 goto err_finobt; 732 733 /* Free unused blocks and bitmap. */ 734 if (need_finobt) { 735 error = xrep_newbt_commit(&ri->new_finobt); 736 if (error) 737 goto err_inobt; 738 } 739 error = xrep_newbt_commit(&ri->new_inobt); 740 if (error) 741 return error; 742 743 return xrep_roll_ag_trans(sc); 744 745 err_finocur: 746 if (need_finobt) 747 xfs_btree_del_cursor(fino_cur, error); 748 err_inocur: 749 xfs_btree_del_cursor(ino_cur, error); 750 err_finobt: 751 if (need_finobt) 752 xrep_newbt_cancel(&ri->new_finobt); 753 err_inobt: 754 xrep_newbt_cancel(&ri->new_inobt); 755 return error; 756 } 757 758 /* 759 * Now that we've logged the roots of the new btrees, invalidate all of the 760 * old blocks and free them. 761 */ 762 STATIC int 763 xrep_ibt_remove_old_trees( 764 struct xrep_ibt *ri) 765 { 766 struct xfs_scrub *sc = ri->sc; 767 int error; 768 769 /* 770 * Free the old inode btree blocks if they're not in use. It's ok to 771 * reap with XFS_AG_RESV_NONE even if the finobt had a per-AG 772 * reservation because we reset the reservation before releasing the 773 * AGI and AGF header buffer locks. 774 */ 775 error = xrep_reap_agblocks(sc, &ri->old_iallocbt_blocks, 776 &XFS_RMAP_OINFO_INOBT, XFS_AG_RESV_NONE); 777 if (error) 778 return error; 779 780 /* 781 * If the finobt is enabled and has a per-AG reservation, make sure we 782 * reinitialize the per-AG reservations. 783 */ 784 if (xfs_has_finobt(sc->mp) && !sc->mp->m_finobt_nores) 785 sc->flags |= XREP_RESET_PERAG_RESV; 786 787 return 0; 788 } 789 790 /* Repair both inode btrees. */ 791 int 792 xrep_iallocbt( 793 struct xfs_scrub *sc) 794 { 795 struct xrep_ibt *ri; 796 struct xfs_mount *mp = sc->mp; 797 xfs_agino_t first_agino, last_agino; 798 int error = 0; 799 800 /* We require the rmapbt to rebuild anything. */ 801 if (!xfs_has_rmapbt(mp)) 802 return -EOPNOTSUPP; 803 804 ri = kzalloc_obj(struct xrep_ibt, XCHK_GFP_FLAGS); 805 if (!ri) 806 return -ENOMEM; 807 ri->sc = sc; 808 809 /* We rebuild both inode btrees. */ 810 sc->sick_mask = XFS_SICK_AG_INOBT | XFS_SICK_AG_FINOBT; 811 812 /* Set up enough storage to handle an AG with nothing but inodes. */ 813 xfs_agino_range(mp, pag_agno(sc->sa.pag), &first_agino, &last_agino); 814 last_agino /= XFS_INODES_PER_CHUNK; 815 error = xfarray_create("inode index records", last_agino, 816 sizeof(struct xfs_inobt_rec_incore), 817 &ri->inode_records); 818 if (error) 819 goto out_ri; 820 821 /* Collect the inode data and find the old btree blocks. */ 822 xagb_bitmap_init(&ri->old_iallocbt_blocks); 823 error = xrep_ibt_find_inodes(ri); 824 if (error) 825 goto out_bitmap; 826 827 /* Rebuild the inode indexes. */ 828 error = xrep_ibt_build_new_trees(ri); 829 if (error) 830 goto out_bitmap; 831 832 /* Kill the old tree. */ 833 error = xrep_ibt_remove_old_trees(ri); 834 if (error) 835 goto out_bitmap; 836 837 out_bitmap: 838 xagb_bitmap_destroy(&ri->old_iallocbt_blocks); 839 xfarray_destroy(ri->inode_records); 840 out_ri: 841 kfree(ri); 842 return error; 843 } 844 845 /* Make sure both btrees are ok after we've rebuilt them. */ 846 int 847 xrep_revalidate_iallocbt( 848 struct xfs_scrub *sc) 849 { 850 __u32 old_type = sc->sm->sm_type; 851 int error; 852 853 /* 854 * We must update sm_type temporarily so that the tree-to-tree cross 855 * reference checks will work in the correct direction, and also so 856 * that tracing will report correctly if there are more errors. 857 */ 858 sc->sm->sm_type = XFS_SCRUB_TYPE_INOBT; 859 error = xchk_iallocbt(sc); 860 if (error) 861 goto out; 862 863 /* 864 * If the inobt is still corrupt, we've failed to repair the filesystem 865 * and should just bail out. 866 * 867 * If the inobt fails cross-examination with the finobt, the scan will 868 * free the finobt cursor, so we need to mark the repair incomplete 869 * and avoid walking off the end of the NULL finobt cursor. 870 */ 871 if (!xfs_has_finobt(sc->mp) || 872 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 873 goto out; 874 875 sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT; 876 if (!sc->sa.fino_cur) { 877 xchk_set_incomplete(sc); 878 goto out; 879 } 880 error = xchk_iallocbt(sc); 881 882 out: 883 sc->sm->sm_type = old_type; 884 return error; 885 } 886