1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_dir2.h" 16 #include "xfs_dir2_priv.h" 17 #include "xfs_attr_leaf.h" 18 #include "scrub/scrub.h" 19 #include "scrub/common.h" 20 #include "scrub/trace.h" 21 #include "scrub/dabtree.h" 22 23 /* Directory/Attribute Btree */ 24 25 /* 26 * Check for da btree operation errors. See the section about handling 27 * operational errors in common.c. 28 */ 29 bool 30 xchk_da_process_error( 31 struct xchk_da_btree *ds, 32 int level, 33 int *error) 34 { 35 struct xfs_scrub *sc = ds->sc; 36 37 if (*error == 0) 38 return true; 39 40 switch (*error) { 41 case -EDEADLOCK: 42 case -ECHRNG: 43 /* Used to restart an op with deadlock avoidance. */ 44 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); 45 break; 46 case -EFSBADCRC: 47 case -EFSCORRUPTED: 48 /* Note the badness but don't abort. */ 49 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 50 *error = 0; 51 fallthrough; 52 default: 53 trace_xchk_file_op_error(sc, ds->dargs.whichfork, 54 xfs_dir2_da_to_db(ds->dargs.geo, 55 ds->state->path.blk[level].blkno), 56 *error, __return_address); 57 break; 58 } 59 return false; 60 } 61 62 /* 63 * Check for da btree corruption. See the section about handling 64 * operational errors in common.c. 65 */ 66 void 67 xchk_da_set_corrupt( 68 struct xchk_da_btree *ds, 69 int level) 70 { 71 struct xfs_scrub *sc = ds->sc; 72 73 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 74 75 trace_xchk_fblock_error(sc, ds->dargs.whichfork, 76 xfs_dir2_da_to_db(ds->dargs.geo, 77 ds->state->path.blk[level].blkno), 78 __return_address); 79 } 80 81 /* Flag a da btree node in need of optimization. */ 82 void 83 xchk_da_set_preen( 84 struct xchk_da_btree *ds, 85 int level) 86 { 87 struct xfs_scrub *sc = ds->sc; 88 89 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_PREEN; 90 trace_xchk_fblock_preen(sc, ds->dargs.whichfork, 91 xfs_dir2_da_to_db(ds->dargs.geo, 92 ds->state->path.blk[level].blkno), 93 __return_address); 94 } 95 96 /* Find an entry at a certain level in a da btree. */ 97 static struct xfs_da_node_entry * 98 xchk_da_btree_node_entry( 99 struct xchk_da_btree *ds, 100 int level) 101 { 102 struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; 103 struct xfs_da3_icnode_hdr hdr; 104 105 ASSERT(blk->magic == XFS_DA_NODE_MAGIC); 106 107 xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr); 108 return hdr.btree + blk->index; 109 } 110 111 /* Scrub a da btree hash (key). */ 112 int 113 xchk_da_btree_hash( 114 struct xchk_da_btree *ds, 115 int level, 116 __be32 *hashp) 117 { 118 struct xfs_da_node_entry *entry; 119 xfs_dahash_t hash; 120 xfs_dahash_t parent_hash; 121 122 /* Is this hash in order? */ 123 hash = be32_to_cpu(*hashp); 124 if (hash < ds->hashes[level]) 125 xchk_da_set_corrupt(ds, level); 126 ds->hashes[level] = hash; 127 128 if (level == 0) 129 return 0; 130 131 /* Is this hash no larger than the parent hash? */ 132 entry = xchk_da_btree_node_entry(ds, level - 1); 133 parent_hash = be32_to_cpu(entry->hashval); 134 if (parent_hash < hash) 135 xchk_da_set_corrupt(ds, level); 136 137 return 0; 138 } 139 140 /* 141 * Check a da btree pointer. Returns true if it's ok to use this 142 * pointer. 143 */ 144 STATIC bool 145 xchk_da_btree_ptr_ok( 146 struct xchk_da_btree *ds, 147 int level, 148 xfs_dablk_t blkno) 149 { 150 if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) { 151 xchk_da_set_corrupt(ds, level); 152 return false; 153 } 154 155 return true; 156 } 157 158 /* 159 * The da btree scrubber can handle leaf1 blocks as a degenerate 160 * form of leafn blocks. Since the regular da code doesn't handle 161 * leaf1, we must multiplex the verifiers. 162 */ 163 static void 164 xchk_da_btree_read_verify( 165 struct xfs_buf *bp) 166 { 167 struct xfs_da_blkinfo *info = bp->b_addr; 168 169 switch (be16_to_cpu(info->magic)) { 170 case XFS_DIR2_LEAF1_MAGIC: 171 case XFS_DIR3_LEAF1_MAGIC: 172 bp->b_ops = &xfs_dir3_leaf1_buf_ops; 173 bp->b_ops->verify_read(bp); 174 return; 175 default: 176 /* 177 * xfs_da3_node_buf_ops already know how to handle 178 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. 179 */ 180 bp->b_ops = &xfs_da3_node_buf_ops; 181 bp->b_ops->verify_read(bp); 182 return; 183 } 184 } 185 static void 186 xchk_da_btree_write_verify( 187 struct xfs_buf *bp) 188 { 189 struct xfs_da_blkinfo *info = bp->b_addr; 190 191 switch (be16_to_cpu(info->magic)) { 192 case XFS_DIR2_LEAF1_MAGIC: 193 case XFS_DIR3_LEAF1_MAGIC: 194 bp->b_ops = &xfs_dir3_leaf1_buf_ops; 195 bp->b_ops->verify_write(bp); 196 return; 197 default: 198 /* 199 * xfs_da3_node_buf_ops already know how to handle 200 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. 201 */ 202 bp->b_ops = &xfs_da3_node_buf_ops; 203 bp->b_ops->verify_write(bp); 204 return; 205 } 206 } 207 static void * 208 xchk_da_btree_verify( 209 struct xfs_buf *bp) 210 { 211 struct xfs_da_blkinfo *info = bp->b_addr; 212 213 switch (be16_to_cpu(info->magic)) { 214 case XFS_DIR2_LEAF1_MAGIC: 215 case XFS_DIR3_LEAF1_MAGIC: 216 bp->b_ops = &xfs_dir3_leaf1_buf_ops; 217 return bp->b_ops->verify_struct(bp); 218 default: 219 bp->b_ops = &xfs_da3_node_buf_ops; 220 return bp->b_ops->verify_struct(bp); 221 } 222 } 223 224 static const struct xfs_buf_ops xchk_da_btree_buf_ops = { 225 .name = "xchk_da_btree", 226 .verify_read = xchk_da_btree_read_verify, 227 .verify_write = xchk_da_btree_write_verify, 228 .verify_struct = xchk_da_btree_verify, 229 }; 230 231 /* Check a block's sibling. */ 232 STATIC int 233 xchk_da_btree_block_check_sibling( 234 struct xchk_da_btree *ds, 235 int level, 236 int direction, 237 xfs_dablk_t sibling) 238 { 239 struct xfs_da_state_path *path = &ds->state->path; 240 struct xfs_da_state_path *altpath = &ds->state->altpath; 241 int retval; 242 int plevel; 243 int error; 244 245 memcpy(altpath, path, sizeof(ds->state->altpath)); 246 247 /* 248 * If the pointer is null, we shouldn't be able to move the upper 249 * level pointer anywhere. 250 */ 251 if (sibling == 0) { 252 error = xfs_da3_path_shift(ds->state, altpath, direction, 253 false, &retval); 254 if (error == 0 && retval == 0) 255 xchk_da_set_corrupt(ds, level); 256 error = 0; 257 goto out; 258 } 259 260 /* Move the alternate cursor one block in the direction given. */ 261 error = xfs_da3_path_shift(ds->state, altpath, direction, false, 262 &retval); 263 if (!xchk_da_process_error(ds, level, &error)) 264 goto out; 265 if (retval) { 266 xchk_da_set_corrupt(ds, level); 267 goto out; 268 } 269 if (altpath->blk[level].bp) 270 xchk_buffer_recheck(ds->sc, altpath->blk[level].bp); 271 272 /* Compare upper level pointer to sibling pointer. */ 273 if (altpath->blk[level].blkno != sibling) 274 xchk_da_set_corrupt(ds, level); 275 276 out: 277 /* Free all buffers in the altpath that aren't referenced from path. */ 278 for (plevel = 0; plevel < altpath->active; plevel++) { 279 if (altpath->blk[plevel].bp == NULL || 280 (plevel < path->active && 281 altpath->blk[plevel].bp == path->blk[plevel].bp)) 282 continue; 283 284 xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp); 285 altpath->blk[plevel].bp = NULL; 286 } 287 288 return error; 289 } 290 291 /* Check a block's sibling pointers. */ 292 STATIC int 293 xchk_da_btree_block_check_siblings( 294 struct xchk_da_btree *ds, 295 int level, 296 struct xfs_da_blkinfo *hdr) 297 { 298 xfs_dablk_t forw; 299 xfs_dablk_t back; 300 int error = 0; 301 302 forw = be32_to_cpu(hdr->forw); 303 back = be32_to_cpu(hdr->back); 304 305 /* Top level blocks should not have sibling pointers. */ 306 if (level == 0) { 307 if (forw != 0 || back != 0) 308 xchk_da_set_corrupt(ds, level); 309 return 0; 310 } 311 312 /* 313 * Check back (left) and forw (right) pointers. These functions 314 * absorb error codes for us. 315 */ 316 error = xchk_da_btree_block_check_sibling(ds, level, 0, back); 317 if (error) 318 goto out; 319 error = xchk_da_btree_block_check_sibling(ds, level, 1, forw); 320 321 out: 322 memset(&ds->state->altpath, 0, sizeof(ds->state->altpath)); 323 return error; 324 } 325 326 /* Load a dir/attribute block from a btree. */ 327 STATIC int 328 xchk_da_btree_block( 329 struct xchk_da_btree *ds, 330 int level, 331 xfs_dablk_t blkno) 332 { 333 struct xfs_da_state_blk *blk; 334 struct xfs_da_intnode *node; 335 struct xfs_da_node_entry *btree; 336 struct xfs_da3_blkinfo *hdr3; 337 struct xfs_da_args *dargs = &ds->dargs; 338 struct xfs_inode *ip = ds->dargs.dp; 339 xfs_failaddr_t fa; 340 xfs_ino_t owner; 341 int *pmaxrecs; 342 struct xfs_da3_icnode_hdr nodehdr; 343 int error = 0; 344 345 blk = &ds->state->path.blk[level]; 346 ds->state->path.active = level + 1; 347 348 /* Release old block. */ 349 if (blk->bp) { 350 xfs_trans_brelse(dargs->trans, blk->bp); 351 blk->bp = NULL; 352 } 353 354 /* Check the pointer. */ 355 blk->blkno = blkno; 356 if (!xchk_da_btree_ptr_ok(ds, level, blkno)) 357 goto out_nobuf; 358 359 /* Read the buffer. */ 360 error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, 361 XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork, 362 &xchk_da_btree_buf_ops); 363 if (!xchk_da_process_error(ds, level, &error)) 364 goto out_nobuf; 365 if (blk->bp) 366 xchk_buffer_recheck(ds->sc, blk->bp); 367 368 /* 369 * We didn't find a dir btree root block, which means that 370 * there's no LEAF1/LEAFN tree (at least not where it's supposed 371 * to be), so jump out now. 372 */ 373 if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 && 374 blk->bp == NULL) 375 goto out_nobuf; 376 377 /* It's /not/ ok for attr trees not to have a da btree. */ 378 if (blk->bp == NULL) { 379 xchk_da_set_corrupt(ds, level); 380 goto out_nobuf; 381 } 382 383 hdr3 = blk->bp->b_addr; 384 blk->magic = be16_to_cpu(hdr3->hdr.magic); 385 pmaxrecs = &ds->maxrecs[level]; 386 387 /* We only started zeroing the header on v5 filesystems. */ 388 if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad) 389 xchk_da_set_corrupt(ds, level); 390 391 /* Check the owner. */ 392 if (xfs_has_crc(ip->i_mount)) { 393 owner = be64_to_cpu(hdr3->owner); 394 if (owner != ip->i_ino) 395 xchk_da_set_corrupt(ds, level); 396 } 397 398 /* Check the siblings. */ 399 error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr); 400 if (error) 401 goto out; 402 403 /* Interpret the buffer. */ 404 switch (blk->magic) { 405 case XFS_ATTR_LEAF_MAGIC: 406 case XFS_ATTR3_LEAF_MAGIC: 407 xfs_trans_buf_set_type(dargs->trans, blk->bp, 408 XFS_BLFT_ATTR_LEAF_BUF); 409 blk->magic = XFS_ATTR_LEAF_MAGIC; 410 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs); 411 if (ds->tree_level != 0) 412 xchk_da_set_corrupt(ds, level); 413 break; 414 case XFS_DIR2_LEAFN_MAGIC: 415 case XFS_DIR3_LEAFN_MAGIC: 416 xfs_trans_buf_set_type(dargs->trans, blk->bp, 417 XFS_BLFT_DIR_LEAFN_BUF); 418 blk->magic = XFS_DIR2_LEAFN_MAGIC; 419 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); 420 if (ds->tree_level != 0) 421 xchk_da_set_corrupt(ds, level); 422 break; 423 case XFS_DIR2_LEAF1_MAGIC: 424 case XFS_DIR3_LEAF1_MAGIC: 425 xfs_trans_buf_set_type(dargs->trans, blk->bp, 426 XFS_BLFT_DIR_LEAF1_BUF); 427 blk->magic = XFS_DIR2_LEAF1_MAGIC; 428 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); 429 if (ds->tree_level != 0) 430 xchk_da_set_corrupt(ds, level); 431 break; 432 case XFS_DA_NODE_MAGIC: 433 case XFS_DA3_NODE_MAGIC: 434 xfs_trans_buf_set_type(dargs->trans, blk->bp, 435 XFS_BLFT_DA_NODE_BUF); 436 blk->magic = XFS_DA_NODE_MAGIC; 437 node = blk->bp->b_addr; 438 xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node); 439 btree = nodehdr.btree; 440 *pmaxrecs = nodehdr.count; 441 blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval); 442 if (level == 0) { 443 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { 444 xchk_da_set_corrupt(ds, level); 445 goto out_freebp; 446 } 447 ds->tree_level = nodehdr.level; 448 } else { 449 if (ds->tree_level != nodehdr.level) { 450 xchk_da_set_corrupt(ds, level); 451 goto out_freebp; 452 } 453 } 454 455 /* XXX: Check hdr3.pad32 once we know how to fix it. */ 456 break; 457 default: 458 xchk_da_set_corrupt(ds, level); 459 goto out_freebp; 460 } 461 462 fa = xfs_da3_header_check(blk->bp, dargs->owner); 463 if (fa) { 464 xchk_da_set_corrupt(ds, level); 465 goto out_freebp; 466 } 467 468 /* 469 * If we've been handed a block that is below the dabtree root, does 470 * its hashval match what the parent block expected to see? 471 */ 472 if (level > 0) { 473 struct xfs_da_node_entry *key; 474 475 key = xchk_da_btree_node_entry(ds, level - 1); 476 if (be32_to_cpu(key->hashval) != blk->hashval) { 477 xchk_da_set_corrupt(ds, level); 478 goto out_freebp; 479 } 480 } 481 482 out: 483 return error; 484 out_freebp: 485 xfs_trans_brelse(dargs->trans, blk->bp); 486 blk->bp = NULL; 487 out_nobuf: 488 blk->blkno = 0; 489 return error; 490 } 491 492 /* Visit all nodes and leaves of a da btree. */ 493 int 494 xchk_da_btree( 495 struct xfs_scrub *sc, 496 int whichfork, 497 xchk_da_btree_rec_fn scrub_fn, 498 void *private) 499 { 500 struct xchk_da_btree *ds; 501 struct xfs_mount *mp = sc->mp; 502 struct xfs_da_state_blk *blks; 503 struct xfs_da_node_entry *key; 504 xfs_dablk_t blkno; 505 int level; 506 int error; 507 508 /* Skip short format data structures; no btree to scan. */ 509 if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork))) 510 return 0; 511 512 /* Set up initial da state. */ 513 ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS); 514 if (!ds) 515 return -ENOMEM; 516 ds->dargs.dp = sc->ip; 517 ds->dargs.whichfork = whichfork; 518 ds->dargs.trans = sc->tp; 519 ds->dargs.op_flags = XFS_DA_OP_OKNOENT; 520 ds->dargs.owner = sc->ip->i_ino; 521 ds->state = xfs_da_state_alloc(&ds->dargs); 522 ds->sc = sc; 523 ds->private = private; 524 if (whichfork == XFS_ATTR_FORK) { 525 ds->dargs.geo = mp->m_attr_geo; 526 ds->lowest = 0; 527 ds->highest = 0; 528 } else { 529 ds->dargs.geo = mp->m_dir_geo; 530 ds->lowest = ds->dargs.geo->leafblk; 531 ds->highest = ds->dargs.geo->freeblk; 532 } 533 blkno = ds->lowest; 534 level = 0; 535 536 /* Find the root of the da tree, if present. */ 537 blks = ds->state->path.blk; 538 error = xchk_da_btree_block(ds, level, blkno); 539 if (error) 540 goto out_state; 541 /* 542 * We didn't find a block at ds->lowest, which means that there's 543 * no LEAF1/LEAFN tree (at least not where it's supposed to be), 544 * so jump out now. 545 */ 546 if (blks[level].bp == NULL) 547 goto out_state; 548 549 blks[level].index = 0; 550 while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) { 551 /* Handle leaf block. */ 552 if (blks[level].magic != XFS_DA_NODE_MAGIC) { 553 /* End of leaf, pop back towards the root. */ 554 if (blks[level].index >= ds->maxrecs[level]) { 555 if (level > 0) 556 blks[level - 1].index++; 557 ds->tree_level++; 558 level--; 559 continue; 560 } 561 562 /* Dispatch record scrubbing. */ 563 error = scrub_fn(ds, level); 564 if (error) 565 break; 566 if (xchk_should_terminate(sc, &error) || 567 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 568 break; 569 570 blks[level].index++; 571 continue; 572 } 573 574 575 /* End of node, pop back towards the root. */ 576 if (blks[level].index >= ds->maxrecs[level]) { 577 if (level > 0) 578 blks[level - 1].index++; 579 ds->tree_level++; 580 level--; 581 continue; 582 } 583 584 /* Hashes in order for scrub? */ 585 key = xchk_da_btree_node_entry(ds, level); 586 error = xchk_da_btree_hash(ds, level, &key->hashval); 587 if (error) 588 goto out; 589 590 /* Drill another level deeper. */ 591 blkno = be32_to_cpu(key->before); 592 level++; 593 if (level >= XFS_DA_NODE_MAXDEPTH) { 594 /* Too deep! */ 595 xchk_da_set_corrupt(ds, level - 1); 596 break; 597 } 598 ds->tree_level--; 599 error = xchk_da_btree_block(ds, level, blkno); 600 if (error) 601 goto out; 602 if (blks[level].bp == NULL) 603 goto out; 604 605 blks[level].index = 0; 606 } 607 608 out: 609 /* Release all the buffers we're tracking. */ 610 for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) { 611 if (blks[level].bp == NULL) 612 continue; 613 xfs_trans_brelse(sc->tp, blks[level].bp); 614 blks[level].bp = NULL; 615 } 616 617 out_state: 618 xfs_da_state_free(ds->state); 619 kfree(ds); 620 return error; 621 } 622