1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_dir2.h" 16 #include "xfs_dir2_priv.h" 17 #include "xfs_attr_leaf.h" 18 #include "scrub/scrub.h" 19 #include "scrub/common.h" 20 #include "scrub/trace.h" 21 #include "scrub/dabtree.h" 22 23 /* Directory/Attribute Btree */ 24 25 /* 26 * Check for da btree operation errors. See the section about handling 27 * operational errors in common.c. 28 */ 29 bool 30 xchk_da_process_error( 31 struct xchk_da_btree *ds, 32 int level, 33 int *error) 34 { 35 struct xfs_scrub *sc = ds->sc; 36 37 if (*error == 0) 38 return true; 39 40 switch (*error) { 41 case -EDEADLOCK: 42 case -ECHRNG: 43 /* Used to restart an op with deadlock avoidance. */ 44 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); 45 break; 46 case -EFSBADCRC: 47 case -EFSCORRUPTED: 48 /* Note the badness but don't abort. */ 49 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 50 *error = 0; 51 fallthrough; 52 default: 53 trace_xchk_file_op_error(sc, ds->dargs.whichfork, 54 xfs_dir2_da_to_db(ds->dargs.geo, 55 ds->state->path.blk[level].blkno), 56 *error, __return_address); 57 break; 58 } 59 return false; 60 } 61 62 /* 63 * Check for da btree corruption. See the section about handling 64 * operational errors in common.c. 65 */ 66 void 67 xchk_da_set_corrupt( 68 struct xchk_da_btree *ds, 69 int level) 70 { 71 struct xfs_scrub *sc = ds->sc; 72 73 sc->sm->sm_flags |= XFS_SCRUB_OFLAG_CORRUPT; 74 75 trace_xchk_fblock_error(sc, ds->dargs.whichfork, 76 xfs_dir2_da_to_db(ds->dargs.geo, 77 ds->state->path.blk[level].blkno), 78 __return_address); 79 } 80 81 static struct xfs_da_node_entry * 82 xchk_da_btree_node_entry( 83 struct xchk_da_btree *ds, 84 int level) 85 { 86 struct xfs_da_state_blk *blk = &ds->state->path.blk[level]; 87 struct xfs_da3_icnode_hdr hdr; 88 89 ASSERT(blk->magic == XFS_DA_NODE_MAGIC); 90 91 xfs_da3_node_hdr_from_disk(ds->sc->mp, &hdr, blk->bp->b_addr); 92 return hdr.btree + blk->index; 93 } 94 95 /* Scrub a da btree hash (key). */ 96 int 97 xchk_da_btree_hash( 98 struct xchk_da_btree *ds, 99 int level, 100 __be32 *hashp) 101 { 102 struct xfs_da_node_entry *entry; 103 xfs_dahash_t hash; 104 xfs_dahash_t parent_hash; 105 106 /* Is this hash in order? */ 107 hash = be32_to_cpu(*hashp); 108 if (hash < ds->hashes[level]) 109 xchk_da_set_corrupt(ds, level); 110 ds->hashes[level] = hash; 111 112 if (level == 0) 113 return 0; 114 115 /* Is this hash no larger than the parent hash? */ 116 entry = xchk_da_btree_node_entry(ds, level - 1); 117 parent_hash = be32_to_cpu(entry->hashval); 118 if (parent_hash < hash) 119 xchk_da_set_corrupt(ds, level); 120 121 return 0; 122 } 123 124 /* 125 * Check a da btree pointer. Returns true if it's ok to use this 126 * pointer. 127 */ 128 STATIC bool 129 xchk_da_btree_ptr_ok( 130 struct xchk_da_btree *ds, 131 int level, 132 xfs_dablk_t blkno) 133 { 134 if (blkno < ds->lowest || (ds->highest != 0 && blkno >= ds->highest)) { 135 xchk_da_set_corrupt(ds, level); 136 return false; 137 } 138 139 return true; 140 } 141 142 /* 143 * The da btree scrubber can handle leaf1 blocks as a degenerate 144 * form of leafn blocks. Since the regular da code doesn't handle 145 * leaf1, we must multiplex the verifiers. 146 */ 147 static void 148 xchk_da_btree_read_verify( 149 struct xfs_buf *bp) 150 { 151 struct xfs_da_blkinfo *info = bp->b_addr; 152 153 switch (be16_to_cpu(info->magic)) { 154 case XFS_DIR2_LEAF1_MAGIC: 155 case XFS_DIR3_LEAF1_MAGIC: 156 bp->b_ops = &xfs_dir3_leaf1_buf_ops; 157 bp->b_ops->verify_read(bp); 158 return; 159 default: 160 /* 161 * xfs_da3_node_buf_ops already know how to handle 162 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. 163 */ 164 bp->b_ops = &xfs_da3_node_buf_ops; 165 bp->b_ops->verify_read(bp); 166 return; 167 } 168 } 169 static void 170 xchk_da_btree_write_verify( 171 struct xfs_buf *bp) 172 { 173 struct xfs_da_blkinfo *info = bp->b_addr; 174 175 switch (be16_to_cpu(info->magic)) { 176 case XFS_DIR2_LEAF1_MAGIC: 177 case XFS_DIR3_LEAF1_MAGIC: 178 bp->b_ops = &xfs_dir3_leaf1_buf_ops; 179 bp->b_ops->verify_write(bp); 180 return; 181 default: 182 /* 183 * xfs_da3_node_buf_ops already know how to handle 184 * DA*_NODE, ATTR*_LEAF, and DIR*_LEAFN blocks. 185 */ 186 bp->b_ops = &xfs_da3_node_buf_ops; 187 bp->b_ops->verify_write(bp); 188 return; 189 } 190 } 191 static void * 192 xchk_da_btree_verify( 193 struct xfs_buf *bp) 194 { 195 struct xfs_da_blkinfo *info = bp->b_addr; 196 197 switch (be16_to_cpu(info->magic)) { 198 case XFS_DIR2_LEAF1_MAGIC: 199 case XFS_DIR3_LEAF1_MAGIC: 200 bp->b_ops = &xfs_dir3_leaf1_buf_ops; 201 return bp->b_ops->verify_struct(bp); 202 default: 203 bp->b_ops = &xfs_da3_node_buf_ops; 204 return bp->b_ops->verify_struct(bp); 205 } 206 } 207 208 static const struct xfs_buf_ops xchk_da_btree_buf_ops = { 209 .name = "xchk_da_btree", 210 .verify_read = xchk_da_btree_read_verify, 211 .verify_write = xchk_da_btree_write_verify, 212 .verify_struct = xchk_da_btree_verify, 213 }; 214 215 /* Check a block's sibling. */ 216 STATIC int 217 xchk_da_btree_block_check_sibling( 218 struct xchk_da_btree *ds, 219 int level, 220 int direction, 221 xfs_dablk_t sibling) 222 { 223 struct xfs_da_state_path *path = &ds->state->path; 224 struct xfs_da_state_path *altpath = &ds->state->altpath; 225 int retval; 226 int plevel; 227 int error; 228 229 memcpy(altpath, path, sizeof(ds->state->altpath)); 230 231 /* 232 * If the pointer is null, we shouldn't be able to move the upper 233 * level pointer anywhere. 234 */ 235 if (sibling == 0) { 236 error = xfs_da3_path_shift(ds->state, altpath, direction, 237 false, &retval); 238 if (error == 0 && retval == 0) 239 xchk_da_set_corrupt(ds, level); 240 error = 0; 241 goto out; 242 } 243 244 /* Move the alternate cursor one block in the direction given. */ 245 error = xfs_da3_path_shift(ds->state, altpath, direction, false, 246 &retval); 247 if (!xchk_da_process_error(ds, level, &error)) 248 goto out; 249 if (retval) { 250 xchk_da_set_corrupt(ds, level); 251 goto out; 252 } 253 if (altpath->blk[level].bp) 254 xchk_buffer_recheck(ds->sc, altpath->blk[level].bp); 255 256 /* Compare upper level pointer to sibling pointer. */ 257 if (altpath->blk[level].blkno != sibling) 258 xchk_da_set_corrupt(ds, level); 259 260 out: 261 /* Free all buffers in the altpath that aren't referenced from path. */ 262 for (plevel = 0; plevel < altpath->active; plevel++) { 263 if (altpath->blk[plevel].bp == NULL || 264 (plevel < path->active && 265 altpath->blk[plevel].bp == path->blk[plevel].bp)) 266 continue; 267 268 xfs_trans_brelse(ds->dargs.trans, altpath->blk[plevel].bp); 269 altpath->blk[plevel].bp = NULL; 270 } 271 272 return error; 273 } 274 275 /* Check a block's sibling pointers. */ 276 STATIC int 277 xchk_da_btree_block_check_siblings( 278 struct xchk_da_btree *ds, 279 int level, 280 struct xfs_da_blkinfo *hdr) 281 { 282 xfs_dablk_t forw; 283 xfs_dablk_t back; 284 int error = 0; 285 286 forw = be32_to_cpu(hdr->forw); 287 back = be32_to_cpu(hdr->back); 288 289 /* Top level blocks should not have sibling pointers. */ 290 if (level == 0) { 291 if (forw != 0 || back != 0) 292 xchk_da_set_corrupt(ds, level); 293 return 0; 294 } 295 296 /* 297 * Check back (left) and forw (right) pointers. These functions 298 * absorb error codes for us. 299 */ 300 error = xchk_da_btree_block_check_sibling(ds, level, 0, back); 301 if (error) 302 goto out; 303 error = xchk_da_btree_block_check_sibling(ds, level, 1, forw); 304 305 out: 306 memset(&ds->state->altpath, 0, sizeof(ds->state->altpath)); 307 return error; 308 } 309 310 /* Load a dir/attribute block from a btree. */ 311 STATIC int 312 xchk_da_btree_block( 313 struct xchk_da_btree *ds, 314 int level, 315 xfs_dablk_t blkno) 316 { 317 struct xfs_da_state_blk *blk; 318 struct xfs_da_intnode *node; 319 struct xfs_da_node_entry *btree; 320 struct xfs_da3_blkinfo *hdr3; 321 struct xfs_da_args *dargs = &ds->dargs; 322 struct xfs_inode *ip = ds->dargs.dp; 323 xfs_ino_t owner; 324 int *pmaxrecs; 325 struct xfs_da3_icnode_hdr nodehdr; 326 int error = 0; 327 328 blk = &ds->state->path.blk[level]; 329 ds->state->path.active = level + 1; 330 331 /* Release old block. */ 332 if (blk->bp) { 333 xfs_trans_brelse(dargs->trans, blk->bp); 334 blk->bp = NULL; 335 } 336 337 /* Check the pointer. */ 338 blk->blkno = blkno; 339 if (!xchk_da_btree_ptr_ok(ds, level, blkno)) 340 goto out_nobuf; 341 342 /* Read the buffer. */ 343 error = xfs_da_read_buf(dargs->trans, dargs->dp, blk->blkno, 344 XFS_DABUF_MAP_HOLE_OK, &blk->bp, dargs->whichfork, 345 &xchk_da_btree_buf_ops); 346 if (!xchk_da_process_error(ds, level, &error)) 347 goto out_nobuf; 348 if (blk->bp) 349 xchk_buffer_recheck(ds->sc, blk->bp); 350 351 /* 352 * We didn't find a dir btree root block, which means that 353 * there's no LEAF1/LEAFN tree (at least not where it's supposed 354 * to be), so jump out now. 355 */ 356 if (ds->dargs.whichfork == XFS_DATA_FORK && level == 0 && 357 blk->bp == NULL) 358 goto out_nobuf; 359 360 /* It's /not/ ok for attr trees not to have a da btree. */ 361 if (blk->bp == NULL) { 362 xchk_da_set_corrupt(ds, level); 363 goto out_nobuf; 364 } 365 366 hdr3 = blk->bp->b_addr; 367 blk->magic = be16_to_cpu(hdr3->hdr.magic); 368 pmaxrecs = &ds->maxrecs[level]; 369 370 /* We only started zeroing the header on v5 filesystems. */ 371 if (xfs_has_crc(ds->sc->mp) && hdr3->hdr.pad) 372 xchk_da_set_corrupt(ds, level); 373 374 /* Check the owner. */ 375 if (xfs_has_crc(ip->i_mount)) { 376 owner = be64_to_cpu(hdr3->owner); 377 if (owner != ip->i_ino) 378 xchk_da_set_corrupt(ds, level); 379 } 380 381 /* Check the siblings. */ 382 error = xchk_da_btree_block_check_siblings(ds, level, &hdr3->hdr); 383 if (error) 384 goto out; 385 386 /* Interpret the buffer. */ 387 switch (blk->magic) { 388 case XFS_ATTR_LEAF_MAGIC: 389 case XFS_ATTR3_LEAF_MAGIC: 390 xfs_trans_buf_set_type(dargs->trans, blk->bp, 391 XFS_BLFT_ATTR_LEAF_BUF); 392 blk->magic = XFS_ATTR_LEAF_MAGIC; 393 blk->hashval = xfs_attr_leaf_lasthash(blk->bp, pmaxrecs); 394 if (ds->tree_level != 0) 395 xchk_da_set_corrupt(ds, level); 396 break; 397 case XFS_DIR2_LEAFN_MAGIC: 398 case XFS_DIR3_LEAFN_MAGIC: 399 xfs_trans_buf_set_type(dargs->trans, blk->bp, 400 XFS_BLFT_DIR_LEAFN_BUF); 401 blk->magic = XFS_DIR2_LEAFN_MAGIC; 402 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); 403 if (ds->tree_level != 0) 404 xchk_da_set_corrupt(ds, level); 405 break; 406 case XFS_DIR2_LEAF1_MAGIC: 407 case XFS_DIR3_LEAF1_MAGIC: 408 xfs_trans_buf_set_type(dargs->trans, blk->bp, 409 XFS_BLFT_DIR_LEAF1_BUF); 410 blk->magic = XFS_DIR2_LEAF1_MAGIC; 411 blk->hashval = xfs_dir2_leaf_lasthash(ip, blk->bp, pmaxrecs); 412 if (ds->tree_level != 0) 413 xchk_da_set_corrupt(ds, level); 414 break; 415 case XFS_DA_NODE_MAGIC: 416 case XFS_DA3_NODE_MAGIC: 417 xfs_trans_buf_set_type(dargs->trans, blk->bp, 418 XFS_BLFT_DA_NODE_BUF); 419 blk->magic = XFS_DA_NODE_MAGIC; 420 node = blk->bp->b_addr; 421 xfs_da3_node_hdr_from_disk(ip->i_mount, &nodehdr, node); 422 btree = nodehdr.btree; 423 *pmaxrecs = nodehdr.count; 424 blk->hashval = be32_to_cpu(btree[*pmaxrecs - 1].hashval); 425 if (level == 0) { 426 if (nodehdr.level >= XFS_DA_NODE_MAXDEPTH) { 427 xchk_da_set_corrupt(ds, level); 428 goto out_freebp; 429 } 430 ds->tree_level = nodehdr.level; 431 } else { 432 if (ds->tree_level != nodehdr.level) { 433 xchk_da_set_corrupt(ds, level); 434 goto out_freebp; 435 } 436 } 437 438 /* XXX: Check hdr3.pad32 once we know how to fix it. */ 439 break; 440 default: 441 xchk_da_set_corrupt(ds, level); 442 goto out_freebp; 443 } 444 445 /* 446 * If we've been handed a block that is below the dabtree root, does 447 * its hashval match what the parent block expected to see? 448 */ 449 if (level > 0) { 450 struct xfs_da_node_entry *key; 451 452 key = xchk_da_btree_node_entry(ds, level - 1); 453 if (be32_to_cpu(key->hashval) != blk->hashval) { 454 xchk_da_set_corrupt(ds, level); 455 goto out_freebp; 456 } 457 } 458 459 out: 460 return error; 461 out_freebp: 462 xfs_trans_brelse(dargs->trans, blk->bp); 463 blk->bp = NULL; 464 out_nobuf: 465 blk->blkno = 0; 466 return error; 467 } 468 469 /* Visit all nodes and leaves of a da btree. */ 470 int 471 xchk_da_btree( 472 struct xfs_scrub *sc, 473 int whichfork, 474 xchk_da_btree_rec_fn scrub_fn, 475 void *private) 476 { 477 struct xchk_da_btree *ds; 478 struct xfs_mount *mp = sc->mp; 479 struct xfs_da_state_blk *blks; 480 struct xfs_da_node_entry *key; 481 xfs_dablk_t blkno; 482 int level; 483 int error; 484 485 /* Skip short format data structures; no btree to scan. */ 486 if (!xfs_ifork_has_extents(xfs_ifork_ptr(sc->ip, whichfork))) 487 return 0; 488 489 /* Set up initial da state. */ 490 ds = kzalloc(sizeof(struct xchk_da_btree), XCHK_GFP_FLAGS); 491 if (!ds) 492 return -ENOMEM; 493 ds->dargs.dp = sc->ip; 494 ds->dargs.whichfork = whichfork; 495 ds->dargs.trans = sc->tp; 496 ds->dargs.op_flags = XFS_DA_OP_OKNOENT; 497 ds->state = xfs_da_state_alloc(&ds->dargs); 498 ds->sc = sc; 499 ds->private = private; 500 if (whichfork == XFS_ATTR_FORK) { 501 ds->dargs.geo = mp->m_attr_geo; 502 ds->lowest = 0; 503 ds->highest = 0; 504 } else { 505 ds->dargs.geo = mp->m_dir_geo; 506 ds->lowest = ds->dargs.geo->leafblk; 507 ds->highest = ds->dargs.geo->freeblk; 508 } 509 blkno = ds->lowest; 510 level = 0; 511 512 /* Find the root of the da tree, if present. */ 513 blks = ds->state->path.blk; 514 error = xchk_da_btree_block(ds, level, blkno); 515 if (error) 516 goto out_state; 517 /* 518 * We didn't find a block at ds->lowest, which means that there's 519 * no LEAF1/LEAFN tree (at least not where it's supposed to be), 520 * so jump out now. 521 */ 522 if (blks[level].bp == NULL) 523 goto out_state; 524 525 blks[level].index = 0; 526 while (level >= 0 && level < XFS_DA_NODE_MAXDEPTH) { 527 /* Handle leaf block. */ 528 if (blks[level].magic != XFS_DA_NODE_MAGIC) { 529 /* End of leaf, pop back towards the root. */ 530 if (blks[level].index >= ds->maxrecs[level]) { 531 if (level > 0) 532 blks[level - 1].index++; 533 ds->tree_level++; 534 level--; 535 continue; 536 } 537 538 /* Dispatch record scrubbing. */ 539 error = scrub_fn(ds, level); 540 if (error) 541 break; 542 if (xchk_should_terminate(sc, &error) || 543 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 544 break; 545 546 blks[level].index++; 547 continue; 548 } 549 550 551 /* End of node, pop back towards the root. */ 552 if (blks[level].index >= ds->maxrecs[level]) { 553 if (level > 0) 554 blks[level - 1].index++; 555 ds->tree_level++; 556 level--; 557 continue; 558 } 559 560 /* Hashes in order for scrub? */ 561 key = xchk_da_btree_node_entry(ds, level); 562 error = xchk_da_btree_hash(ds, level, &key->hashval); 563 if (error) 564 goto out; 565 566 /* Drill another level deeper. */ 567 blkno = be32_to_cpu(key->before); 568 level++; 569 if (level >= XFS_DA_NODE_MAXDEPTH) { 570 /* Too deep! */ 571 xchk_da_set_corrupt(ds, level - 1); 572 break; 573 } 574 ds->tree_level--; 575 error = xchk_da_btree_block(ds, level, blkno); 576 if (error) 577 goto out; 578 if (blks[level].bp == NULL) 579 goto out; 580 581 blks[level].index = 0; 582 } 583 584 out: 585 /* Release all the buffers we're tracking. */ 586 for (level = 0; level < XFS_DA_NODE_MAXDEPTH; level++) { 587 if (blks[level].bp == NULL) 588 continue; 589 xfs_trans_brelse(sc->tp, blks[level].bp); 590 blks[level].bp = NULL; 591 } 592 593 out_state: 594 xfs_da_state_free(ds->state); 595 kfree(ds); 596 return error; 597 } 598