1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2018-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_btree_staging.h" 15 #include "xfs_bit.h" 16 #include "xfs_log_format.h" 17 #include "xfs_trans.h" 18 #include "xfs_sb.h" 19 #include "xfs_inode.h" 20 #include "xfs_alloc.h" 21 #include "xfs_ialloc.h" 22 #include "xfs_ialloc_btree.h" 23 #include "xfs_icache.h" 24 #include "xfs_rmap.h" 25 #include "xfs_rmap_btree.h" 26 #include "xfs_log.h" 27 #include "xfs_trans_priv.h" 28 #include "xfs_error.h" 29 #include "xfs_health.h" 30 #include "xfs_ag.h" 31 #include "scrub/xfs_scrub.h" 32 #include "scrub/scrub.h" 33 #include "scrub/common.h" 34 #include "scrub/btree.h" 35 #include "scrub/trace.h" 36 #include "scrub/repair.h" 37 #include "scrub/bitmap.h" 38 #include "scrub/agb_bitmap.h" 39 #include "scrub/xfile.h" 40 #include "scrub/xfarray.h" 41 #include "scrub/newbt.h" 42 #include "scrub/reap.h" 43 44 /* 45 * Inode Btree Repair 46 * ================== 47 * 48 * A quick refresher of inode btrees on a v5 filesystem: 49 * 50 * - Inode records are read into memory in units of 'inode clusters'. However 51 * many inodes fit in a cluster buffer is the smallest number of inodes that 52 * can be allocated or freed. Clusters are never smaller than one fs block 53 * though they can span multiple blocks. The size (in fs blocks) is 54 * computed with xfs_icluster_size_fsb(). The fs block alignment of a 55 * cluster is computed with xfs_ialloc_cluster_alignment(). 56 * 57 * - Each inode btree record can describe a single 'inode chunk'. The chunk 58 * size is defined to be 64 inodes. If sparse inodes are enabled, every 59 * inobt record must be aligned to the chunk size; if not, every record must 60 * be aligned to the start of a cluster. It is possible to construct an XFS 61 * geometry where one inobt record maps to multiple inode clusters; it is 62 * also possible to construct a geometry where multiple inobt records map to 63 * different parts of one inode cluster. 64 * 65 * - If sparse inodes are not enabled, the smallest unit of allocation for 66 * inode records is enough to contain one inode chunk's worth of inodes. 67 * 68 * - If sparse inodes are enabled, the holemask field will be active. Each 69 * bit of the holemask represents 4 potential inodes; if set, the 70 * corresponding space does *not* contain inodes and must be left alone. 71 * Clusters cannot be smaller than 4 inodes. The smallest unit of allocation 72 * of inode records is one inode cluster. 73 * 74 * So what's the rebuild algorithm? 75 * 76 * Iterate the reverse mapping records looking for OWN_INODES and OWN_INOBT 77 * records. The OWN_INOBT records are the old inode btree blocks and will be 78 * cleared out after we've rebuilt the tree. Each possible inode cluster 79 * within an OWN_INODES record will be read in; for each possible inobt record 80 * associated with that cluster, compute the freemask calculated from the 81 * i_mode data in the inode chunk. For sparse inodes the holemask will be 82 * calculated by creating the properly aligned inobt record and punching out 83 * any chunk that's missing. Inode allocations and frees grab the AGI first, 84 * so repair protects itself from concurrent access by locking the AGI. 85 * 86 * Once we've reconstructed all the inode records, we can create new inode 87 * btree roots and reload the btrees. We rebuild both inode trees at the same 88 * time because they have the same rmap owner and it would be more complex to 89 * figure out if the other tree isn't in need of a rebuild and which OWN_INOBT 90 * blocks it owns. We have all the data we need to build both, so dump 91 * everything and start over. 92 * 93 * We use the prefix 'xrep_ibt' because we rebuild both inode btrees at once. 94 */ 95 96 struct xrep_ibt { 97 /* Record under construction. */ 98 struct xfs_inobt_rec_incore rie; 99 100 /* new inobt information */ 101 struct xrep_newbt new_inobt; 102 103 /* new finobt information */ 104 struct xrep_newbt new_finobt; 105 106 /* Old inode btree blocks we found in the rmap. */ 107 struct xagb_bitmap old_iallocbt_blocks; 108 109 /* Reconstructed inode records. */ 110 struct xfarray *inode_records; 111 112 struct xfs_scrub *sc; 113 114 /* Number of inodes assigned disk space. */ 115 unsigned int icount; 116 117 /* Number of inodes in use. */ 118 unsigned int iused; 119 120 /* Number of finobt records needed. */ 121 unsigned int finobt_recs; 122 123 /* get_records()'s position in the inode record array. */ 124 xfarray_idx_t array_cur; 125 }; 126 127 /* 128 * Is this inode in use? If the inode is in memory we can tell from i_mode, 129 * otherwise we have to check di_mode in the on-disk buffer. We only care 130 * that the high (i.e. non-permission) bits of _mode are zero. This should be 131 * safe because repair keeps all AG headers locked until the end, and process 132 * trying to perform an inode allocation/free must lock the AGI. 133 * 134 * @cluster_ag_base is the inode offset of the cluster within the AG. 135 * @cluster_bp is the cluster buffer. 136 * @cluster_index is the inode offset within the inode cluster. 137 */ 138 STATIC int 139 xrep_ibt_check_ifree( 140 struct xrep_ibt *ri, 141 xfs_agino_t cluster_ag_base, 142 struct xfs_buf *cluster_bp, 143 unsigned int cluster_index, 144 bool *inuse) 145 { 146 struct xfs_scrub *sc = ri->sc; 147 struct xfs_mount *mp = sc->mp; 148 struct xfs_dinode *dip; 149 xfs_agino_t agino; 150 unsigned int cluster_buf_base; 151 unsigned int offset; 152 int error; 153 154 agino = cluster_ag_base + cluster_index; 155 156 /* Inode uncached or half assembled, read disk buffer */ 157 cluster_buf_base = XFS_INO_TO_OFFSET(mp, cluster_ag_base); 158 offset = (cluster_buf_base + cluster_index) * mp->m_sb.sb_inodesize; 159 if (offset >= BBTOB(cluster_bp->b_length)) 160 return -EFSCORRUPTED; 161 dip = xfs_buf_offset(cluster_bp, offset); 162 if (be16_to_cpu(dip->di_magic) != XFS_DINODE_MAGIC) 163 return -EFSCORRUPTED; 164 165 if (dip->di_version >= 3 && 166 be64_to_cpu(dip->di_ino) != xfs_agino_to_ino(ri->sc->sa.pag, agino)) 167 return -EFSCORRUPTED; 168 169 /* Will the in-core inode tell us if it's in use? */ 170 error = xchk_inode_is_allocated(sc, agino, inuse); 171 if (!error) 172 return 0; 173 174 *inuse = dip->di_mode != 0; 175 return 0; 176 } 177 178 /* Stash the accumulated inobt record for rebuilding. */ 179 STATIC int 180 xrep_ibt_stash( 181 struct xrep_ibt *ri) 182 { 183 int error = 0; 184 185 if (xchk_should_terminate(ri->sc, &error)) 186 return error; 187 188 ri->rie.ir_freecount = xfs_inobt_rec_freecount(&ri->rie); 189 if (xfs_inobt_check_irec(ri->sc->sa.pag, &ri->rie) != NULL) 190 return -EFSCORRUPTED; 191 192 if (ri->rie.ir_freecount > 0) 193 ri->finobt_recs++; 194 195 trace_xrep_ibt_found(ri->sc->sa.pag, &ri->rie); 196 197 error = xfarray_append(ri->inode_records, &ri->rie); 198 if (error) 199 return error; 200 201 ri->rie.ir_startino = NULLAGINO; 202 return 0; 203 } 204 205 /* 206 * Given an extent of inodes and an inode cluster buffer, calculate the 207 * location of the corresponding inobt record (creating it if necessary), 208 * then update the parts of the holemask and freemask of that record that 209 * correspond to the inode extent we were given. 210 * 211 * @cluster_ir_startino is the AG inode number of an inobt record that we're 212 * proposing to create for this inode cluster. If sparse inodes are enabled, 213 * we must round down to a chunk boundary to find the actual sparse record. 214 * @cluster_bp is the buffer of the inode cluster. 215 * @nr_inodes is the number of inodes to check from the cluster. 216 */ 217 STATIC int 218 xrep_ibt_cluster_record( 219 struct xrep_ibt *ri, 220 xfs_agino_t cluster_ir_startino, 221 struct xfs_buf *cluster_bp, 222 unsigned int nr_inodes) 223 { 224 struct xfs_scrub *sc = ri->sc; 225 struct xfs_mount *mp = sc->mp; 226 xfs_agino_t ir_startino; 227 unsigned int cluster_base; 228 unsigned int cluster_index; 229 int error = 0; 230 231 ir_startino = cluster_ir_startino; 232 if (xfs_has_sparseinodes(mp)) 233 ir_startino = rounddown(ir_startino, XFS_INODES_PER_CHUNK); 234 cluster_base = cluster_ir_startino - ir_startino; 235 236 /* 237 * If the accumulated inobt record doesn't map this cluster, add it to 238 * the list and reset it. 239 */ 240 if (ri->rie.ir_startino != NULLAGINO && 241 ri->rie.ir_startino + XFS_INODES_PER_CHUNK <= ir_startino) { 242 error = xrep_ibt_stash(ri); 243 if (error) 244 return error; 245 } 246 247 if (ri->rie.ir_startino == NULLAGINO) { 248 ri->rie.ir_startino = ir_startino; 249 ri->rie.ir_free = XFS_INOBT_ALL_FREE; 250 ri->rie.ir_holemask = 0xFFFF; 251 ri->rie.ir_count = 0; 252 } 253 254 /* Record the whole cluster. */ 255 ri->icount += nr_inodes; 256 ri->rie.ir_count += nr_inodes; 257 ri->rie.ir_holemask &= ~xfs_inobt_maskn( 258 cluster_base / XFS_INODES_PER_HOLEMASK_BIT, 259 nr_inodes / XFS_INODES_PER_HOLEMASK_BIT); 260 261 /* Which inodes within this cluster are free? */ 262 for (cluster_index = 0; cluster_index < nr_inodes; cluster_index++) { 263 bool inuse = false; 264 265 error = xrep_ibt_check_ifree(ri, cluster_ir_startino, 266 cluster_bp, cluster_index, &inuse); 267 if (error) 268 return error; 269 if (!inuse) 270 continue; 271 ri->iused++; 272 ri->rie.ir_free &= ~XFS_INOBT_MASK(cluster_base + 273 cluster_index); 274 } 275 return 0; 276 } 277 278 /* 279 * For each inode cluster covering the physical extent recorded by the rmapbt, 280 * we must calculate the properly aligned startino of that cluster, then 281 * iterate each cluster to fill in used and filled masks appropriately. We 282 * then use the (startino, used, filled) information to construct the 283 * appropriate inode records. 284 */ 285 STATIC int 286 xrep_ibt_process_cluster( 287 struct xrep_ibt *ri, 288 xfs_agblock_t cluster_bno) 289 { 290 struct xfs_imap imap; 291 struct xfs_buf *cluster_bp; 292 struct xfs_scrub *sc = ri->sc; 293 struct xfs_mount *mp = sc->mp; 294 struct xfs_ino_geometry *igeo = M_IGEO(mp); 295 xfs_agino_t cluster_ag_base; 296 xfs_agino_t irec_index; 297 unsigned int nr_inodes; 298 int error; 299 300 nr_inodes = min_t(unsigned int, igeo->inodes_per_cluster, 301 XFS_INODES_PER_CHUNK); 302 303 /* 304 * Grab the inode cluster buffer. This is safe to do with a broken 305 * inobt because imap_to_bp directly maps the buffer without touching 306 * either inode btree. 307 */ 308 imap.im_blkno = xfs_agbno_to_daddr(sc->sa.pag, cluster_bno); 309 imap.im_len = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster); 310 imap.im_boffset = 0; 311 error = xfs_imap_to_bp(mp, sc->tp, &imap, &cluster_bp); 312 if (error) 313 return error; 314 315 /* 316 * Record the contents of each possible inobt record mapping this 317 * cluster. 318 */ 319 cluster_ag_base = XFS_AGB_TO_AGINO(mp, cluster_bno); 320 for (irec_index = 0; 321 irec_index < igeo->inodes_per_cluster; 322 irec_index += XFS_INODES_PER_CHUNK) { 323 error = xrep_ibt_cluster_record(ri, 324 cluster_ag_base + irec_index, cluster_bp, 325 nr_inodes); 326 if (error) 327 break; 328 329 } 330 331 xfs_trans_brelse(sc->tp, cluster_bp); 332 return error; 333 } 334 335 /* Check for any obvious conflicts in the inode chunk extent. */ 336 STATIC int 337 xrep_ibt_check_inode_ext( 338 struct xfs_scrub *sc, 339 xfs_agblock_t agbno, 340 xfs_extlen_t len) 341 { 342 struct xfs_mount *mp = sc->mp; 343 struct xfs_ino_geometry *igeo = M_IGEO(mp); 344 xfs_agino_t agino; 345 enum xbtree_recpacking outcome; 346 int error; 347 348 /* Inode records must be within the AG. */ 349 if (!xfs_verify_agbext(sc->sa.pag, agbno, len)) 350 return -EFSCORRUPTED; 351 352 /* The entire record must align to the inode cluster size. */ 353 if (!IS_ALIGNED(agbno, igeo->blocks_per_cluster) || 354 !IS_ALIGNED(agbno + len, igeo->blocks_per_cluster)) 355 return -EFSCORRUPTED; 356 357 /* 358 * The entire record must also adhere to the inode cluster alignment 359 * size if sparse inodes are not enabled. 360 */ 361 if (!xfs_has_sparseinodes(mp) && 362 (!IS_ALIGNED(agbno, igeo->cluster_align) || 363 !IS_ALIGNED(agbno + len, igeo->cluster_align))) 364 return -EFSCORRUPTED; 365 366 /* 367 * On a sparse inode fs, this cluster could be part of a sparse chunk. 368 * Sparse clusters must be aligned to sparse chunk alignment. 369 */ 370 if (xfs_has_sparseinodes(mp) && mp->m_sb.sb_spino_align && 371 (!IS_ALIGNED(agbno, mp->m_sb.sb_spino_align) || 372 !IS_ALIGNED(agbno + len, mp->m_sb.sb_spino_align))) 373 return -EFSCORRUPTED; 374 375 /* Make sure the entire range of blocks are valid AG inodes. */ 376 agino = XFS_AGB_TO_AGINO(mp, agbno); 377 if (!xfs_verify_agino(sc->sa.pag, agino)) 378 return -EFSCORRUPTED; 379 380 agino = XFS_AGB_TO_AGINO(mp, agbno + len) - 1; 381 if (!xfs_verify_agino(sc->sa.pag, agino)) 382 return -EFSCORRUPTED; 383 384 /* Make sure this isn't free space. */ 385 error = xfs_alloc_has_records(sc->sa.bno_cur, agbno, len, &outcome); 386 if (error) 387 return error; 388 if (outcome != XBTREE_RECPACKING_EMPTY) 389 return -EFSCORRUPTED; 390 391 return 0; 392 } 393 394 /* Found a fragment of the old inode btrees; dispose of them later. */ 395 STATIC int 396 xrep_ibt_record_old_btree_blocks( 397 struct xrep_ibt *ri, 398 const struct xfs_rmap_irec *rec) 399 { 400 if (!xfs_verify_agbext(ri->sc->sa.pag, rec->rm_startblock, 401 rec->rm_blockcount)) 402 return -EFSCORRUPTED; 403 404 return xagb_bitmap_set(&ri->old_iallocbt_blocks, rec->rm_startblock, 405 rec->rm_blockcount); 406 } 407 408 /* Record extents that belong to inode cluster blocks. */ 409 STATIC int 410 xrep_ibt_record_inode_blocks( 411 struct xrep_ibt *ri, 412 const struct xfs_rmap_irec *rec) 413 { 414 struct xfs_mount *mp = ri->sc->mp; 415 struct xfs_ino_geometry *igeo = M_IGEO(mp); 416 xfs_agblock_t cluster_base; 417 int error; 418 419 error = xrep_ibt_check_inode_ext(ri->sc, rec->rm_startblock, 420 rec->rm_blockcount); 421 if (error) 422 return error; 423 424 trace_xrep_ibt_walk_rmap(ri->sc->sa.pag, rec); 425 426 /* 427 * Record the free/hole masks for each inode cluster that could be 428 * mapped by this rmap record. 429 */ 430 for (cluster_base = 0; 431 cluster_base < rec->rm_blockcount; 432 cluster_base += igeo->blocks_per_cluster) { 433 error = xrep_ibt_process_cluster(ri, 434 rec->rm_startblock + cluster_base); 435 if (error) 436 return error; 437 } 438 439 return 0; 440 } 441 442 STATIC int 443 xrep_ibt_walk_rmap( 444 struct xfs_btree_cur *cur, 445 const struct xfs_rmap_irec *rec, 446 void *priv) 447 { 448 struct xrep_ibt *ri = priv; 449 int error = 0; 450 451 if (xchk_should_terminate(ri->sc, &error)) 452 return error; 453 454 switch (rec->rm_owner) { 455 case XFS_RMAP_OWN_INOBT: 456 return xrep_ibt_record_old_btree_blocks(ri, rec); 457 case XFS_RMAP_OWN_INODES: 458 return xrep_ibt_record_inode_blocks(ri, rec); 459 } 460 return 0; 461 } 462 463 /* 464 * Iterate all reverse mappings to find the inodes (OWN_INODES) and the inode 465 * btrees (OWN_INOBT). Figure out if we have enough free space to reconstruct 466 * the inode btrees. The caller must clean up the lists if anything goes 467 * wrong. 468 */ 469 STATIC int 470 xrep_ibt_find_inodes( 471 struct xrep_ibt *ri) 472 { 473 struct xfs_scrub *sc = ri->sc; 474 int error; 475 476 ri->rie.ir_startino = NULLAGINO; 477 478 /* Collect all reverse mappings for inode blocks. */ 479 xrep_ag_btcur_init(sc, &sc->sa); 480 error = xfs_rmap_query_all(sc->sa.rmap_cur, xrep_ibt_walk_rmap, ri); 481 xchk_ag_btcur_free(&sc->sa); 482 if (error) 483 return error; 484 485 /* If we have a record ready to go, add it to the array. */ 486 if (ri->rie.ir_startino != NULLAGINO) 487 return xrep_ibt_stash(ri); 488 489 return 0; 490 } 491 492 /* Update the AGI counters. */ 493 STATIC int 494 xrep_ibt_reset_counters( 495 struct xrep_ibt *ri) 496 { 497 struct xfs_scrub *sc = ri->sc; 498 struct xfs_agi *agi = sc->sa.agi_bp->b_addr; 499 unsigned int freecount = ri->icount - ri->iused; 500 501 /* Trigger inode count recalculation */ 502 xfs_force_summary_recalc(sc->mp); 503 504 /* 505 * The AGI header contains extra information related to the inode 506 * btrees, so we must update those fields here. 507 */ 508 agi->agi_count = cpu_to_be32(ri->icount); 509 agi->agi_freecount = cpu_to_be32(freecount); 510 xfs_ialloc_log_agi(sc->tp, sc->sa.agi_bp, 511 XFS_AGI_COUNT | XFS_AGI_FREECOUNT); 512 513 /* Reinitialize with the values we just logged. */ 514 return xrep_reinit_pagi(sc); 515 } 516 517 /* Retrieve finobt data for bulk load. */ 518 STATIC int 519 xrep_fibt_get_records( 520 struct xfs_btree_cur *cur, 521 unsigned int idx, 522 struct xfs_btree_block *block, 523 unsigned int nr_wanted, 524 void *priv) 525 { 526 struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i; 527 struct xrep_ibt *ri = priv; 528 union xfs_btree_rec *block_rec; 529 unsigned int loaded; 530 int error; 531 532 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { 533 do { 534 error = xfarray_load(ri->inode_records, 535 ri->array_cur++, irec); 536 } while (error == 0 && xfs_inobt_rec_freecount(irec) == 0); 537 if (error) 538 return error; 539 540 block_rec = xfs_btree_rec_addr(cur, idx, block); 541 cur->bc_ops->init_rec_from_cur(cur, block_rec); 542 } 543 544 return loaded; 545 } 546 547 /* Retrieve inobt data for bulk load. */ 548 STATIC int 549 xrep_ibt_get_records( 550 struct xfs_btree_cur *cur, 551 unsigned int idx, 552 struct xfs_btree_block *block, 553 unsigned int nr_wanted, 554 void *priv) 555 { 556 struct xfs_inobt_rec_incore *irec = &cur->bc_rec.i; 557 struct xrep_ibt *ri = priv; 558 union xfs_btree_rec *block_rec; 559 unsigned int loaded; 560 int error; 561 562 for (loaded = 0; loaded < nr_wanted; loaded++, idx++) { 563 error = xfarray_load(ri->inode_records, ri->array_cur++, irec); 564 if (error) 565 return error; 566 567 block_rec = xfs_btree_rec_addr(cur, idx, block); 568 cur->bc_ops->init_rec_from_cur(cur, block_rec); 569 } 570 571 return loaded; 572 } 573 574 /* Feed one of the new inobt blocks to the bulk loader. */ 575 STATIC int 576 xrep_ibt_claim_block( 577 struct xfs_btree_cur *cur, 578 union xfs_btree_ptr *ptr, 579 void *priv) 580 { 581 struct xrep_ibt *ri = priv; 582 583 return xrep_newbt_claim_block(cur, &ri->new_inobt, ptr); 584 } 585 586 /* Feed one of the new finobt blocks to the bulk loader. */ 587 STATIC int 588 xrep_fibt_claim_block( 589 struct xfs_btree_cur *cur, 590 union xfs_btree_ptr *ptr, 591 void *priv) 592 { 593 struct xrep_ibt *ri = priv; 594 595 return xrep_newbt_claim_block(cur, &ri->new_finobt, ptr); 596 } 597 598 /* Make sure the records do not overlap in inumber address space. */ 599 STATIC int 600 xrep_ibt_check_overlap( 601 struct xrep_ibt *ri) 602 { 603 struct xfs_inobt_rec_incore irec; 604 xfarray_idx_t cur; 605 xfs_agino_t next_agino = 0; 606 int error = 0; 607 608 foreach_xfarray_idx(ri->inode_records, cur) { 609 if (xchk_should_terminate(ri->sc, &error)) 610 return error; 611 612 error = xfarray_load(ri->inode_records, cur, &irec); 613 if (error) 614 return error; 615 616 if (irec.ir_startino < next_agino) 617 return -EFSCORRUPTED; 618 619 next_agino = irec.ir_startino + XFS_INODES_PER_CHUNK; 620 } 621 622 return error; 623 } 624 625 /* Build new inode btrees and dispose of the old one. */ 626 STATIC int 627 xrep_ibt_build_new_trees( 628 struct xrep_ibt *ri) 629 { 630 struct xfs_scrub *sc = ri->sc; 631 struct xfs_btree_cur *ino_cur; 632 struct xfs_btree_cur *fino_cur = NULL; 633 bool need_finobt; 634 int error; 635 636 need_finobt = xfs_has_finobt(sc->mp); 637 638 /* 639 * Create new btrees for staging all the inobt records we collected 640 * earlier. The records were collected in order of increasing agino, 641 * so we do not have to sort them. Ensure there are no overlapping 642 * records. 643 */ 644 error = xrep_ibt_check_overlap(ri); 645 if (error) 646 return error; 647 648 /* 649 * The new inode btrees will not be rooted in the AGI until we've 650 * successfully rebuilt the tree. 651 * 652 * Start by setting up the inobt staging cursor. 653 */ 654 xrep_newbt_init_ag(&ri->new_inobt, sc, &XFS_RMAP_OINFO_INOBT, 655 xfs_agbno_to_fsb(sc->sa.pag, XFS_IBT_BLOCK(sc->mp)), 656 XFS_AG_RESV_NONE); 657 ri->new_inobt.bload.claim_block = xrep_ibt_claim_block; 658 ri->new_inobt.bload.get_records = xrep_ibt_get_records; 659 660 ino_cur = xfs_inobt_init_cursor(sc->sa.pag, NULL, NULL); 661 xfs_btree_stage_afakeroot(ino_cur, &ri->new_inobt.afake); 662 error = xfs_btree_bload_compute_geometry(ino_cur, &ri->new_inobt.bload, 663 xfarray_length(ri->inode_records)); 664 if (error) 665 goto err_inocur; 666 667 /* Set up finobt staging cursor. */ 668 if (need_finobt) { 669 enum xfs_ag_resv_type resv = XFS_AG_RESV_METADATA; 670 671 if (sc->mp->m_finobt_nores) 672 resv = XFS_AG_RESV_NONE; 673 674 xrep_newbt_init_ag(&ri->new_finobt, sc, &XFS_RMAP_OINFO_INOBT, 675 xfs_agbno_to_fsb(sc->sa.pag, XFS_FIBT_BLOCK(sc->mp)), 676 resv); 677 ri->new_finobt.bload.claim_block = xrep_fibt_claim_block; 678 ri->new_finobt.bload.get_records = xrep_fibt_get_records; 679 680 fino_cur = xfs_finobt_init_cursor(sc->sa.pag, NULL, NULL); 681 xfs_btree_stage_afakeroot(fino_cur, &ri->new_finobt.afake); 682 error = xfs_btree_bload_compute_geometry(fino_cur, 683 &ri->new_finobt.bload, ri->finobt_recs); 684 if (error) 685 goto err_finocur; 686 } 687 688 /* Last chance to abort before we start committing fixes. */ 689 if (xchk_should_terminate(sc, &error)) 690 goto err_finocur; 691 692 /* Reserve all the space we need to build the new btrees. */ 693 error = xrep_newbt_alloc_blocks(&ri->new_inobt, 694 ri->new_inobt.bload.nr_blocks); 695 if (error) 696 goto err_finocur; 697 698 if (need_finobt) { 699 error = xrep_newbt_alloc_blocks(&ri->new_finobt, 700 ri->new_finobt.bload.nr_blocks); 701 if (error) 702 goto err_finocur; 703 } 704 705 /* Add all inobt records. */ 706 ri->array_cur = XFARRAY_CURSOR_INIT; 707 error = xfs_btree_bload(ino_cur, &ri->new_inobt.bload, ri); 708 if (error) 709 goto err_finocur; 710 711 /* Add all finobt records. */ 712 if (need_finobt) { 713 ri->array_cur = XFARRAY_CURSOR_INIT; 714 error = xfs_btree_bload(fino_cur, &ri->new_finobt.bload, ri); 715 if (error) 716 goto err_finocur; 717 } 718 719 /* 720 * Install the new btrees in the AG header. After this point the old 721 * btrees are no longer accessible and the new trees are live. 722 */ 723 xfs_inobt_commit_staged_btree(ino_cur, sc->tp, sc->sa.agi_bp); 724 xfs_btree_del_cursor(ino_cur, 0); 725 726 if (fino_cur) { 727 xfs_inobt_commit_staged_btree(fino_cur, sc->tp, sc->sa.agi_bp); 728 xfs_btree_del_cursor(fino_cur, 0); 729 } 730 731 /* Reset the AGI counters now that we've changed the inode roots. */ 732 error = xrep_ibt_reset_counters(ri); 733 if (error) 734 goto err_finobt; 735 736 /* Free unused blocks and bitmap. */ 737 if (need_finobt) { 738 error = xrep_newbt_commit(&ri->new_finobt); 739 if (error) 740 goto err_inobt; 741 } 742 error = xrep_newbt_commit(&ri->new_inobt); 743 if (error) 744 return error; 745 746 return xrep_roll_ag_trans(sc); 747 748 err_finocur: 749 if (need_finobt) 750 xfs_btree_del_cursor(fino_cur, error); 751 err_inocur: 752 xfs_btree_del_cursor(ino_cur, error); 753 err_finobt: 754 if (need_finobt) 755 xrep_newbt_cancel(&ri->new_finobt); 756 err_inobt: 757 xrep_newbt_cancel(&ri->new_inobt); 758 return error; 759 } 760 761 /* 762 * Now that we've logged the roots of the new btrees, invalidate all of the 763 * old blocks and free them. 764 */ 765 STATIC int 766 xrep_ibt_remove_old_trees( 767 struct xrep_ibt *ri) 768 { 769 struct xfs_scrub *sc = ri->sc; 770 int error; 771 772 /* 773 * Free the old inode btree blocks if they're not in use. It's ok to 774 * reap with XFS_AG_RESV_NONE even if the finobt had a per-AG 775 * reservation because we reset the reservation before releasing the 776 * AGI and AGF header buffer locks. 777 */ 778 error = xrep_reap_agblocks(sc, &ri->old_iallocbt_blocks, 779 &XFS_RMAP_OINFO_INOBT, XFS_AG_RESV_NONE); 780 if (error) 781 return error; 782 783 /* 784 * If the finobt is enabled and has a per-AG reservation, make sure we 785 * reinitialize the per-AG reservations. 786 */ 787 if (xfs_has_finobt(sc->mp) && !sc->mp->m_finobt_nores) 788 sc->flags |= XREP_RESET_PERAG_RESV; 789 790 return 0; 791 } 792 793 /* Repair both inode btrees. */ 794 int 795 xrep_iallocbt( 796 struct xfs_scrub *sc) 797 { 798 struct xrep_ibt *ri; 799 struct xfs_mount *mp = sc->mp; 800 char *descr; 801 xfs_agino_t first_agino, last_agino; 802 int error = 0; 803 804 /* We require the rmapbt to rebuild anything. */ 805 if (!xfs_has_rmapbt(mp)) 806 return -EOPNOTSUPP; 807 808 ri = kzalloc(sizeof(struct xrep_ibt), XCHK_GFP_FLAGS); 809 if (!ri) 810 return -ENOMEM; 811 ri->sc = sc; 812 813 /* We rebuild both inode btrees. */ 814 sc->sick_mask = XFS_SICK_AG_INOBT | XFS_SICK_AG_FINOBT; 815 816 /* Set up enough storage to handle an AG with nothing but inodes. */ 817 xfs_agino_range(mp, pag_agno(sc->sa.pag), &first_agino, &last_agino); 818 last_agino /= XFS_INODES_PER_CHUNK; 819 descr = xchk_xfile_ag_descr(sc, "inode index records"); 820 error = xfarray_create(descr, last_agino, 821 sizeof(struct xfs_inobt_rec_incore), 822 &ri->inode_records); 823 kfree(descr); 824 if (error) 825 goto out_ri; 826 827 /* Collect the inode data and find the old btree blocks. */ 828 xagb_bitmap_init(&ri->old_iallocbt_blocks); 829 error = xrep_ibt_find_inodes(ri); 830 if (error) 831 goto out_bitmap; 832 833 /* Rebuild the inode indexes. */ 834 error = xrep_ibt_build_new_trees(ri); 835 if (error) 836 goto out_bitmap; 837 838 /* Kill the old tree. */ 839 error = xrep_ibt_remove_old_trees(ri); 840 if (error) 841 goto out_bitmap; 842 843 out_bitmap: 844 xagb_bitmap_destroy(&ri->old_iallocbt_blocks); 845 xfarray_destroy(ri->inode_records); 846 out_ri: 847 kfree(ri); 848 return error; 849 } 850 851 /* Make sure both btrees are ok after we've rebuilt them. */ 852 int 853 xrep_revalidate_iallocbt( 854 struct xfs_scrub *sc) 855 { 856 __u32 old_type = sc->sm->sm_type; 857 int error; 858 859 /* 860 * We must update sm_type temporarily so that the tree-to-tree cross 861 * reference checks will work in the correct direction, and also so 862 * that tracing will report correctly if there are more errors. 863 */ 864 sc->sm->sm_type = XFS_SCRUB_TYPE_INOBT; 865 error = xchk_iallocbt(sc); 866 if (error) 867 goto out; 868 869 if (xfs_has_finobt(sc->mp)) { 870 sc->sm->sm_type = XFS_SCRUB_TYPE_FINOBT; 871 error = xchk_iallocbt(sc); 872 } 873 874 out: 875 sc->sm->sm_type = old_type; 876 return error; 877 } 878