1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs_platform.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_dir2.h" 16 #include "xfs_dir2_priv.h" 17 #include "xfs_attr_leaf.h" 18 #include "scrub/scrub.h" 19 #include "scrub/common.h" 20 #include "scrub/trace.h" 21 #include "scrub/dabtree.h" 22 23 /* Directory/Attribute Btree */ 24 25 /* 26 * Check for da btree operation errors. See the section about handling 27 * operational errors in common.c. 28 */ 29 bool 30 xchk_da_process_error( 31 struct xchk_da_btree *ds, 32 int level, 33 int *error) 34 { 35 struct xfs_scrub *sc = ds->sc; 36 37 if (*error == 0) 38 return true; 39 40 switch (*error) { 41 case -EDEADLOCK: 42 case -ECHRNG: 43 /* Used to restart an op with deadlock avoidance. */ 44 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); 45 break; 46 case -EFSBADCRC: 47 case -EFSCORRUPTED: 48 case -EIO: 49 case -ENODATA: 50 /* Note the badness but don't abort. */ 51 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 52 *error = 0; 53 fallthrough; 54 default: 55 trace_xchk_file_op_error(sc, ds->dargs.whichfork, 56 xfs_dir2_da_to_db(ds->dargs.geo, 57 ds->state->path.blk[level].blkno), 58 *error, __return_address); 59 break; 60 } 61 return false; 62 } 63 64 /* 65 * Check for da btree corruption. See the section about handling 66 * operational errors in common.c. 67 */ 68 void 69 xchk_da_set_corrupt( 70 struct xchk_da_btree *ds, 71 int level) 72 { 73 struct xfs_scrub *sc = ds->sc; 74 75 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 76 77 trace_xchk_fblock_error(sc, ds->dargs.whichfork, 78 xfs_dir2_da_to_db(ds->dargs.geo, 79 ds->state->path.blk[level].blkno), 80 __return_address); 81 } 82 83 /* Flag a da btree node in need of optimization. */ 84 void 85 xchk_da_set_preen( 86 struct xchk_da_btree *ds, 87 int level) 88 { 89 struct xfs_scrub *sc = ds->sc; 90 91 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 92 trace_xchk_fblock_preen(sc, ds->dargs.whichfork, 93 xfs_dir2_da_to_db(ds->dargs.geo, 94 ds->state->path.blk[level].blkno), 95 __return_address); 96 } 97 98 /* Find an entry at a certain level in a da btree. */ 99 static struct xfs_da_node_entry * 100 xchk_da_btree_node_entry( 101 struct xchk_da_btree *ds, 102 int level) 103 { 104 struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; 105 struct xfs_da3_icnode_hdr hdr; 106 107 ASSERT(blk->magic == XFS_DA_NODE_MAGIC); 108 109 xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr); 110 return hdr.btree + blk->index; 111 } 112 113 /* Scrub a da btree hash (key). */ 114 int 115 xchk_da_btree_hash( 116 struct xchk_da_btree *ds, 117 int level, 118 __be32 *hashp) 119 { 120 struct xfs_da_node_entry *entry; 121 xfs_dahash_t hash; 122 xfs_dahash_t parent_hash; 123 124 /* Is this hash in order? */ 125 hash = be32_to_cpu(*hashp); 126 if (hash < ds->hashes[level]) 127 xchk_da_set_corrupt(ds, level); 128 ds->hashes[level] = hash; 129 130 if (level == 0) 131 return 0; 132 133 /* Is this hash no larger than the parent hash? */ 134 entry = xchk_da_btree_node_entry(ds, level - 1); 135 parent_hash = be32_to_cpu(entry->hashval); 136 if (parent_hash < hash) 137 xchk_da_set_corrupt(ds, level); 138 139 return 0; 140 } 141 142 /* 143 * Check a da btree pointer. Returns true if it's ok to use this 144 * pointer. 145 */ 146 STATIC bool 147 xchk_da_btree_ptr_ok( 148 struct xchk_da_btree *ds, 149 int level, 150 xfs_dablk_t blkno) 151 { 152 if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) { 153 xchk_da_set_corrupt(ds, level); 154 return false; 155 } 156 157 return true; 158 } 159 160 /* 161 * The da btree scrubber can handle leaf1 blocks as a degenerate 162 * form of leafn blocks. Since the regular da code doesn't handle 163 * leaf1, we must multiplex the verifiers. 164 */ 165 static void 166 xchk_da_btree_read_verify( 167 struct xfs_buf *bp) 168 { 169 struct xfs_da_blkinfo *info = bp->b_addr; 170 171 switch (be16_to_cpu(info->magic)) { 172 case XFS_DIR2_LEAF1_MAGIC: 173 case XFS_DIR3_LEAF1_MAGIC: 174 bp->b_ops = &xfs_dir3_leaf1_buf_ops; 175 bp->b_ops->verify_read(bp); 176 return; 177 default: 178 /* 179 * xfs_da3_node_buf_ops already know how to handle 180 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. 181 */ 182 bp->b_ops = &xfs_da3_node_buf_ops; 183 bp->b_ops->verify_read(bp); 184 return; 185 } 186 } 187 static void 188 xchk_da_btree_write_verify( 189 struct xfs_buf *bp) 190 { 191 struct xfs_da_blkinfo *info = bp->b_addr; 192 193 switch (be16_to_cpu(info->magic)) { 194 case XFS_DIR2_LEAF1_MAGIC: 195 case XFS_DIR3_LEAF1_MAGIC: 196 bp->b_ops = &xfs_dir3_leaf1_buf_ops; 197 bp->b_ops->verify_write(bp); 198 return; 199 default: 200 /* 201 * xfs_da3_node_buf_ops already know how to handle 202 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. 203 */ 204 bp->b_ops = &xfs_da3_node_buf_ops; 205 bp->b_ops->verify_write(bp); 206 return; 207 } 208 } 209 static void * 210 xchk_da_btree_verify( 211 struct xfs_buf *bp) 212 { 213 struct xfs_da_blkinfo *info = bp->b_addr; 214 215 switch (be16_to_cpu(info->magic)) { 216 case XFS_DIR2_LEAF1_MAGIC: 217 case XFS_DIR3_LEAF1_MAGIC: 218 bp->b_ops = &xfs_dir3_leaf1_buf_ops; 219 return bp->b_ops->verify_struct(bp); 220 default: 221 bp->b_ops = &xfs_da3_node_buf_ops; 222 return bp->b_ops->verify_struct(bp); 223 } 224 } 225 226 static const struct xfs_buf_ops xchk_da_btree_buf_ops = { 227 .name = "xchk_da_btree", 228 .verify_read = xchk_da_btree_read_verify, 229 .verify_write = xchk_da_btree_write_verify, 230 .verify_struct = xchk_da_btree_verify, 231 }; 232 233 /* Check a block's sibling. */ 234 STATIC int 235 xchk_da_btree_block_check_sibling( 236 struct xchk_da_btree *ds, 237 int level, 238 int direction, 239 xfs_dablk_t sibling) 240 { 241 struct xfs_da_state_path *path = &ds->state->path; 242 struct xfs_da_state_path *altpath = &ds->state->altpath; 243 int retval; 244 int plevel; 245 int error; 246 247 memcpy(altpath, path, sizeof(ds->state->altpath)); 248 249 /* 250 * If the pointer is null, we shouldn't be able to move the upper 251 * level pointer anywhere. 252 */ 253 if (sibling == 0) { 254 error = xfs_da3_path_shift(ds->state, altpath, direction, 255 false, &retval); 256 if (error == 0 && retval == 0) 257 xchk_da_set_corrupt(ds, level); 258 error = 0; 259 goto out; 260 } 261 262 /* Move the alternate cursor one block in the direction given. */ 263 error = xfs_da3_path_shift(ds->state, altpath, direction, false, 264 &retval); 265 if (!xchk_da_process_error(ds, level, &error)) 266 goto out; 267 if (retval) { 268 xchk_da_set_corrupt(ds, level); 269 goto out; 270 } 271 if (altpath->blk[level].bp) 272 xchk_buffer_recheck(ds->sc, altpath->blk[level].bp); 273 274 /* Compare upper level pointer to sibling pointer. */ 275 if (altpath->blk[level].blkno != sibling) 276 xchk_da_set_corrupt(ds, level); 277 278 out: 279 /* Free all buffers in the altpath that aren't referenced from path. */ 280 for (plevel = 0; plevel < altpath->active; plevel++) { 281 if (altpath->blk[plevel].bp == NULL || 282 (plevel < path->active && 283 altpath->blk[plevel].bp == path->blk[plevel].bp)) 284 continue; 285 286 xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp); 287 altpath->blk[plevel].bp = NULL; 288 } 289 290 return error; 291 } 292 293 /* Check a block's sibling pointers. */ 294 STATIC int 295 xchk_da_btree_block_check_siblings( 296 struct xchk_da_btree *ds, 297 int level, 298 struct xfs_da_blkinfo *hdr) 299 { 300 xfs_dablk_t forw; 301 xfs_dablk_t back; 302 int error = 0; 303 304 forw = be32_to_cpu(hdr->forw); 305 back = be32_to_cpu(hdr->back); 306 307 /* Top level blocks should not have sibling pointers. */ 308 if (level == 0) { 309 if (forw != 0 || back != 0) 310 xchk_da_set_corrupt(ds, level); 311 return 0; 312 } 313 314 /* 315 * Check back (left) and forw (right) pointers. These functions 316 * absorb error codes for us. 317 */ 318 error = xchk_da_btree_block_check_sibling(ds, level, 0, back); 319 if (error) 320 goto out; 321 error = xchk_da_btree_block_check_sibling(ds, level, 1, forw); 322 323 out: 324 memset(&ds->state->altpath, 0, sizeof(ds->state->altpath)); 325 return error; 326 } 327 328 /* Load a dir/attribute block from a btree. */ 329 STATIC int 330 xchk_da_btree_block( 331 struct xchk_da_btree *ds, 332 int level, 333 xfs_dablk_t blkno) 334 { 335 struct xfs_da_state_blk *blk; 336 struct xfs_da_intnode *node; 337 struct xfs_da_node_entry *btree; 338 struct xfs_da3_blkinfo *hdr3; 339 struct xfs_da_args *dargs = &ds->dargs; 340 struct xfs_inode *ip = ds->dargs.dp; 341 xfs_failaddr_t fa; 342 xfs_ino_t owner; 343 int *pmaxrecs; 344 struct xfs_da3_icnode_hdr nodehdr; 345 int error = 0; 346 347 blk = &ds->state->path.blk[level]; 348 ds->state->path.active = level + 1; 349 350 /* Release old block. */ 351 if (blk->bp) { 352 xfs_trans_brelse(dargs->trans, blk->bp); 353 blk->bp = NULL; 354 } 355 356 /* Check the pointer. */ 357 blk->blkno = blkno; 358 if (!xchk_da_btree_ptr_ok(ds, level, blkno)) 359 goto out_nobuf; 360 361 /* Read the buffer. */ 362 error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, 363 XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork, 364 &xchk_da_btree_buf_ops); 365 if (!xchk_da_process_error(ds, level, &error)) 366 goto out_nobuf; 367 if (blk->bp) 368 xchk_buffer_recheck(ds->sc, blk->bp); 369 370 /* 371 * We didn't find a dir btree root block, which means that 372 * there's no LEAF1/LEAFN tree (at least not where it's supposed 373 * to be), so jump out now. 374 */ 375 if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 && 376 blk->bp == NULL) 377 goto out_nobuf; 378 379 /* It's /not/ ok for attr trees not to have a da btree. */ 380 if (blk->bp == NULL) { 381 xchk_da_set_corrupt(ds, level); 382 goto out_nobuf; 383 } 384 385 hdr3 = blk->bp->b_addr; 386 blk->magic = be16_to_cpu(hdr3->hdr.magic); 387 pmaxrecs = &ds->maxrecs[level]; 388 389 /* We only started zeroing the header on v5 filesystems. */ 390 if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad) 391 xchk_da_set_corrupt(ds, level); 392 393 /* Check the owner. */ 394 if (xfs_has_crc(ip->i_mount)) { 395 owner = be64_to_cpu(hdr3->owner); 396 if (owner != ip->i_ino) 397 xchk_da_set_corrupt(ds, level); 398 } 399 400 /* Check the siblings. */ 401 error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr); 402 if (error) 403 goto out; 404 405 /* Interpret the buffer. */ 406 switch (blk->magic) { 407 case XFS_ATTR_LEAF_MAGIC: 408 case XFS_ATTR3_LEAF_MAGIC: 409 xfs_trans_buf_set_type(dargs->trans, blk->bp, 410 XFS_BLFT_ATTR_LEAF_BUF); 411 blk->magic = XFS_ATTR_LEAF_MAGIC; 412 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs); 413 if (ds->tree_level != 0) 414 xchk_da_set_corrupt(ds, level); 415 break; 416 case XFS_DIR2_LEAFN_MAGIC: 417 case XFS_DIR3_LEAFN_MAGIC: 418 xfs_trans_buf_set_type(dargs->trans, blk->bp, 419 XFS_BLFT_DIR_LEAFN_BUF); 420 blk->magic = XFS_DIR2_LEAFN_MAGIC; 421 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); 422 if (ds->tree_level != 0) 423 xchk_da_set_corrupt(ds, level); 424 break; 425 case XFS_DIR2_LEAF1_MAGIC: 426 case XFS_DIR3_LEAF1_MAGIC: 427 xfs_trans_buf_set_type(dargs->trans, blk->bp, 428 XFS_BLFT_DIR_LEAF1_BUF); 429 blk->magic = XFS_DIR2_LEAF1_MAGIC; 430 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); 431 if (ds->tree_level != 0) 432 xchk_da_set_corrupt(ds, level); 433 break; 434 case XFS_DA_NODE_MAGIC: 435 case XFS_DA3_NODE_MAGIC: 436 xfs_trans_buf_set_type(dargs->trans, blk->bp, 437 XFS_BLFT_DA_NODE_BUF); 438 blk->magic = XFS_DA_NODE_MAGIC; 439 node = blk->bp->b_addr; 440 xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node); 441 btree = nodehdr.btree; 442 *pmaxrecs = nodehdr.count; 443 blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval); 444 if (level == 0) { 445 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { 446 xchk_da_set_corrupt(ds, level); 447 goto out_freebp; 448 } 449 ds->tree_level = nodehdr.level; 450 } else { 451 if (ds->tree_level != nodehdr.level) { 452 xchk_da_set_corrupt(ds, level); 453 goto out_freebp; 454 } 455 } 456 457 /* XXX: Check hdr3.pad32 once we know how to fix it. */ 458 break; 459 default: 460 xchk_da_set_corrupt(ds, level); 461 goto out_freebp; 462 } 463 464 fa = xfs_da3_header_check(blk->bp, dargs->owner); 465 if (fa) { 466 xchk_da_set_corrupt(ds, level); 467 goto out_freebp; 468 } 469 470 /* 471 * If we've been handed a block that is below the dabtree root, does 472 * its hashval match what the parent block expected to see? 473 */ 474 if (level > 0) { 475 struct xfs_da_node_entry *key; 476 477 key = xchk_da_btree_node_entry(ds, level - 1); 478 if (be32_to_cpu(key->hashval) != blk->hashval) { 479 xchk_da_set_corrupt(ds, level); 480 goto out_freebp; 481 } 482 } 483 484 out: 485 return error; 486 out_freebp: 487 xfs_trans_brelse(dargs->trans, blk->bp); 488 blk->bp = NULL; 489 out_nobuf: 490 blk->blkno = 0; 491 return error; 492 } 493 494 /* Visit all nodes and leaves of a da btree. */ 495 int 496 xchk_da_btree( 497 struct xfs_scrub *sc, 498 int whichfork, 499 xchk_da_btree_rec_fn scrub_fn, 500 void *private) 501 { 502 struct xchk_da_btree *ds; 503 struct xfs_mount *mp = sc->mp; 504 struct xfs_da_state_blk *blks; 505 struct xfs_da_node_entry *key; 506 xfs_dablk_t blkno; 507 int level; 508 int error; 509 510 /* Skip short format data structures; no btree to scan. */ 511 if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork))) 512 return 0; 513 514 /* Set up initial da state. */ 515 ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS); 516 if (!ds) 517 return -ENOMEM; 518 ds->dargs.dp = sc->ip; 519 ds->dargs.whichfork = whichfork; 520 ds->dargs.trans = sc->tp; 521 ds->dargs.op_flags = XFS_DA_OP_OKNOENT; 522 ds->dargs.owner = sc->ip->i_ino; 523 ds->state = xfs_da_state_alloc(&ds->dargs); 524 ds->sc = sc; 525 ds->private = private; 526 if (whichfork == XFS_ATTR_FORK) { 527 ds->dargs.geo = mp->m_attr_geo; 528 ds->lowest = 0; 529 ds->highest = 0; 530 } else { 531 ds->dargs.geo = mp->m_dir_geo; 532 ds->lowest = ds->dargs.geo->leafblk; 533 ds->highest = ds->dargs.geo->freeblk; 534 } 535 blkno = ds->lowest; 536 level = 0; 537 538 /* Find the root of the da tree, if present. */ 539 blks = ds->state->path.blk; 540 error = xchk_da_btree_block(ds, level, blkno); 541 if (error) 542 goto out_state; 543 /* 544 * We didn't find a block at ds->lowest, which means that there's 545 * no LEAF1/LEAFN tree (at least not where it's supposed to be), 546 * so jump out now. 547 */ 548 if (blks[level].bp == NULL) 549 goto out_state; 550 551 blks[level].index = 0; 552 while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) { 553 /* Handle leaf block. */ 554 if (blks[level].magic != XFS_DA_NODE_MAGIC) { 555 /* End of leaf, pop back towards the root. */ 556 if (blks[level].index >= ds->maxrecs[level]) { 557 if (level > 0) 558 blks[level - 1].index++; 559 ds->tree_level++; 560 level--; 561 continue; 562 } 563 564 /* Dispatch record scrubbing. */ 565 error = scrub_fn(ds, level); 566 if (error) 567 break; 568 if (xchk_should_terminate(sc, &error) || 569 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 570 break; 571 572 blks[level].index++; 573 continue; 574 } 575 576 577 /* End of node, pop back towards the root. */ 578 if (blks[level].index >= ds->maxrecs[level]) { 579 if (level > 0) 580 blks[level - 1].index++; 581 ds->tree_level++; 582 level--; 583 continue; 584 } 585 586 /* Hashes in order for scrub? */ 587 key = xchk_da_btree_node_entry(ds, level); 588 error = xchk_da_btree_hash(ds, level, &key->hashval); 589 if (error) 590 goto out; 591 592 /* Drill another level deeper. */ 593 blkno = be32_to_cpu(key->before); 594 level++; 595 if (level >= XFS_DA_NODE_MAXDEPTH) { 596 /* Too deep! */ 597 xchk_da_set_corrupt(ds, level - 1); 598 break; 599 } 600 ds->tree_level--; 601 error = xchk_da_btree_block(ds, level, blkno); 602 if (error) 603 goto out; 604 if (blks[level].bp == NULL) 605 goto out; 606 607 blks[level].index = 0; 608 } 609 610 out: 611 /* Release all the buffers we're tracking. */ 612 for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) { 613 if (blks[level].bp == NULL) 614 continue; 615 xfs_trans_brelse(sc->tp, blks[level].bp); 616 blks[level].bp = NULL; 617 } 618 619 out_state: 620 xfs_da_state_free(ds->state); 621 kfree(ds); 622 return error; 623 } 624