1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs_platform.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_inode.h" 13 #include "xfs_btree.h" 14 #include "scrub/scrub.h" 15 #include "scrub/common.h" 16 #include "scrub/btree.h" 17 #include "scrub/trace.h" 18 19 /* btree scrubbing */ 20 21 /* 22 * Check for btree operation errors. See the section about handling 23 * operational errors in common.c. 24 */ 25 static bool 26 __xchk_btree_process_error( 27 struct xfs_scrub *sc, 28 struct xfs_btree_cur *cur, 29 int level, 30 int *error, 31 __u32 errflag, 32 void *ret_ip) 33 { 34 if (*error == 0) 35 return true; 36 37 switch (*error) { 38 case -EDEADLOCK: 39 case -ECHRNG: 40 /* Used to restart an op with deadlock avoidance. */ 41 trace_xchk_deadlock_retry(sc->ip, sc->sm, *error); 42 break; 43 case -EFSBADCRC: 44 case -EFSCORRUPTED: 45 case -EIO: 46 case -ENODATA: 47 /* Note the badness but don't abort. */ 48 sc->sm->sm_flags |= errflag; 49 *error = 0; 50 fallthrough; 51 default: 52 if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) 53 trace_xchk_ifork_btree_op_error(sc, cur, level, 54 *error, ret_ip); 55 else 56 trace_xchk_btree_op_error(sc, cur, level, 57 *error, ret_ip); 58 break; 59 } 60 return false; 61 } 62 63 bool 64 xchk_btree_process_error( 65 struct xfs_scrub *sc, 66 struct xfs_btree_cur *cur, 67 int level, 68 int *error) 69 { 70 return __xchk_btree_process_error(sc, cur, level, error, 71 XFS_SCRUB_OFLAG_CORRUPT, __return_address); 72 } 73 74 bool 75 xchk_btree_xref_process_error( 76 struct xfs_scrub *sc, 77 struct xfs_btree_cur *cur, 78 int level, 79 int *error) 80 { 81 return __xchk_btree_process_error(sc, cur, level, error, 82 XFS_SCRUB_OFLAG_XFAIL, __return_address); 83 } 84 85 /* Record btree block corruption. */ 86 static void 87 __xchk_btree_set_corrupt( 88 struct xfs_scrub *sc, 89 struct xfs_btree_cur *cur, 90 int level, 91 __u32 errflag, 92 void *ret_ip) 93 { 94 sc->sm->sm_flags |= errflag; 95 96 if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE) 97 trace_xchk_ifork_btree_error(sc, cur, level, 98 ret_ip); 99 else 100 trace_xchk_btree_error(sc, cur, level, 101 ret_ip); 102 } 103 104 void 105 xchk_btree_set_corrupt( 106 struct xfs_scrub *sc, 107 struct xfs_btree_cur *cur, 108 int level) 109 { 110 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_CORRUPT, 111 __return_address); 112 } 113 114 void 115 xchk_btree_xref_set_corrupt( 116 struct xfs_scrub *sc, 117 struct xfs_btree_cur *cur, 118 int level) 119 { 120 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_XCORRUPT, 121 __return_address); 122 } 123 124 void 125 xchk_btree_set_preen( 126 struct xfs_scrub *sc, 127 struct xfs_btree_cur *cur, 128 int level) 129 { 130 __xchk_btree_set_corrupt(sc, cur, level, XFS_SCRUB_OFLAG_PREEN, 131 __return_address); 132 } 133 134 /* 135 * Make sure this record is in order and doesn't stray outside of the parent 136 * keys. 137 */ 138 STATIC void 139 xchk_btree_rec( 140 struct xchk_btree *bs) 141 { 142 struct xfs_btree_cur *cur = bs->cur; 143 union xfs_btree_rec *rec; 144 union xfs_btree_key key; 145 union xfs_btree_key hkey; 146 union xfs_btree_key *keyp; 147 struct xfs_btree_block *block; 148 struct xfs_btree_block *keyblock; 149 struct xfs_buf *bp; 150 151 block = xfs_btree_get_block(cur, 0, &bp); 152 rec = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, block); 153 154 trace_xchk_btree_rec(bs->sc, cur, 0); 155 156 /* Are all records across all record blocks in order? */ 157 if (bs->lastrec_valid && 158 !cur->bc_ops->recs_inorder(cur, &bs->lastrec, rec)) 159 xchk_btree_set_corrupt(bs->sc, cur, 0); 160 memcpy(&bs->lastrec, rec, cur->bc_ops->rec_len); 161 bs->lastrec_valid = true; 162 163 if (cur->bc_nlevels == 1) 164 return; 165 166 /* Is low_key(rec) at least as large as the parent low key? */ 167 cur->bc_ops->init_key_from_rec(&key, rec); 168 keyblock = xfs_btree_get_block(cur, 1, &bp); 169 keyp = xfs_btree_key_addr(cur, cur->bc_levels[1].ptr, keyblock); 170 if (xfs_btree_keycmp_lt(cur, &key, keyp)) 171 xchk_btree_set_corrupt(bs->sc, cur, 1); 172 173 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) 174 return; 175 176 /* Is high_key(rec) no larger than the parent high key? */ 177 cur->bc_ops->init_high_key_from_rec(&hkey, rec); 178 keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[1].ptr, keyblock); 179 if (xfs_btree_keycmp_lt(cur, keyp, &hkey)) 180 xchk_btree_set_corrupt(bs->sc, cur, 1); 181 } 182 183 /* 184 * Make sure this key is in order and doesn't stray outside of the parent 185 * keys. 186 */ 187 STATIC void 188 xchk_btree_key( 189 struct xchk_btree *bs, 190 int level) 191 { 192 struct xfs_btree_cur *cur = bs->cur; 193 union xfs_btree_key *key; 194 union xfs_btree_key *keyp; 195 struct xfs_btree_block *block; 196 struct xfs_btree_block *keyblock; 197 struct xfs_buf *bp; 198 199 block = xfs_btree_get_block(cur, level, &bp); 200 key = xfs_btree_key_addr(cur, cur->bc_levels[level].ptr, block); 201 202 trace_xchk_btree_key(bs->sc, cur, level); 203 204 /* Are all low keys across all node blocks in order? */ 205 if (bs->lastkey[level - 1].valid && 206 !cur->bc_ops->keys_inorder(cur, &bs->lastkey[level - 1].key, key)) 207 xchk_btree_set_corrupt(bs->sc, cur, level); 208 memcpy(&bs->lastkey[level - 1].key, key, cur->bc_ops->key_len); 209 bs->lastkey[level - 1].valid = true; 210 211 if (level + 1 >= cur->bc_nlevels) 212 return; 213 214 /* Is this block's low key at least as large as the parent low key? */ 215 keyblock = xfs_btree_get_block(cur, level + 1, &bp); 216 keyp = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, keyblock); 217 if (xfs_btree_keycmp_lt(cur, key, keyp)) 218 xchk_btree_set_corrupt(bs->sc, cur, level); 219 220 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) 221 return; 222 223 /* Is this block's high key no larger than the parent high key? */ 224 key = xfs_btree_high_key_addr(cur, cur->bc_levels[level].ptr, block); 225 keyp = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr, 226 keyblock); 227 if (xfs_btree_keycmp_lt(cur, keyp, key)) 228 xchk_btree_set_corrupt(bs->sc, cur, level); 229 } 230 231 /* 232 * Check a btree pointer. Returns true if it's ok to use this pointer. 233 * Callers do not need to set the corrupt flag. 234 */ 235 static bool 236 xchk_btree_ptr_ok( 237 struct xchk_btree *bs, 238 int level, 239 union xfs_btree_ptr *ptr) 240 { 241 /* A btree rooted in an inode has no block pointer to the root. */ 242 if (bs->cur->bc_ops->type == XFS_BTREE_TYPE_INODE && 243 level == bs->cur->bc_nlevels) 244 return true; 245 246 /* Otherwise, check the pointers. */ 247 if (__xfs_btree_check_ptr(bs->cur, ptr, 0, level)) { 248 xchk_btree_set_corrupt(bs->sc, bs->cur, level); 249 return false; 250 } 251 252 return true; 253 } 254 255 /* Check that a btree block's sibling matches what we expect it. */ 256 STATIC int 257 xchk_btree_block_check_sibling( 258 struct xchk_btree *bs, 259 int level, 260 int direction, 261 union xfs_btree_ptr *sibling) 262 { 263 struct xfs_btree_cur *cur = bs->cur; 264 struct xfs_btree_block *pblock; 265 struct xfs_buf *pbp; 266 struct xfs_btree_cur *ncur = NULL; 267 union xfs_btree_ptr *pp; 268 int success; 269 int error; 270 271 error = xfs_btree_dup_cursor(cur, &ncur); 272 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error) || 273 !ncur) 274 return error; 275 276 /* 277 * If the pointer is null, we shouldn't be able to move the upper 278 * level pointer anywhere. 279 */ 280 if (xfs_btree_ptr_is_null(cur, sibling)) { 281 if (direction > 0) 282 error = xfs_btree_increment(ncur, level + 1, &success); 283 else 284 error = xfs_btree_decrement(ncur, level + 1, &success); 285 if (error == 0 && success) 286 xchk_btree_set_corrupt(bs->sc, cur, level); 287 error = 0; 288 goto out; 289 } 290 291 /* Increment upper level pointer. */ 292 if (direction > 0) 293 error = xfs_btree_increment(ncur, level + 1, &success); 294 else 295 error = xfs_btree_decrement(ncur, level + 1, &success); 296 if (!xchk_btree_process_error(bs->sc, cur, level + 1, &error)) 297 goto out; 298 if (!success) { 299 xchk_btree_set_corrupt(bs->sc, cur, level + 1); 300 goto out; 301 } 302 303 /* Compare upper level pointer to sibling pointer. */ 304 pblock = xfs_btree_get_block(ncur, level + 1, &pbp); 305 pp = xfs_btree_ptr_addr(ncur, ncur->bc_levels[level + 1].ptr, pblock); 306 if (!xchk_btree_ptr_ok(bs, level + 1, pp)) 307 goto out; 308 if (pbp) 309 xchk_buffer_recheck(bs->sc, pbp); 310 311 if (xfs_btree_cmp_two_ptrs(cur, pp, sibling)) 312 xchk_btree_set_corrupt(bs->sc, cur, level); 313 out: 314 xfs_btree_del_cursor(ncur, XFS_BTREE_ERROR); 315 return error; 316 } 317 318 /* Check the siblings of a btree block. */ 319 STATIC int 320 xchk_btree_block_check_siblings( 321 struct xchk_btree *bs, 322 struct xfs_btree_block *block) 323 { 324 struct xfs_btree_cur *cur = bs->cur; 325 union xfs_btree_ptr leftsib; 326 union xfs_btree_ptr rightsib; 327 int level; 328 int error = 0; 329 330 xfs_btree_get_sibling(cur, block, &leftsib, XFS_BB_LEFTSIB); 331 xfs_btree_get_sibling(cur, block, &rightsib, XFS_BB_RIGHTSIB); 332 level = xfs_btree_get_level(block); 333 334 /* Root block should never have siblings. */ 335 if (level == cur->bc_nlevels - 1) { 336 if (!xfs_btree_ptr_is_null(cur, &leftsib) || 337 !xfs_btree_ptr_is_null(cur, &rightsib)) 338 xchk_btree_set_corrupt(bs->sc, cur, level); 339 goto out; 340 } 341 342 /* 343 * Does the left & right sibling pointers match the adjacent 344 * parent level pointers? 345 * (These function absorbs error codes for us.) 346 */ 347 error = xchk_btree_block_check_sibling(bs, level, -1, &leftsib); 348 if (error) 349 return error; 350 error = xchk_btree_block_check_sibling(bs, level, 1, &rightsib); 351 if (error) 352 return error; 353 out: 354 return error; 355 } 356 357 struct check_owner { 358 struct list_head list; 359 xfs_daddr_t daddr; 360 int level; 361 }; 362 363 /* 364 * Make sure this btree block isn't in the free list and that there's 365 * an rmap record for it. 366 */ 367 STATIC int 368 xchk_btree_check_block_owner( 369 struct xchk_btree *bs, 370 int level, 371 xfs_daddr_t daddr) 372 { 373 xfs_agnumber_t agno; 374 xfs_agblock_t agbno; 375 bool is_bnobt, is_rmapbt; 376 bool init_sa; 377 int error = 0; 378 379 if (!bs->cur) 380 return 0; 381 382 is_bnobt = xfs_btree_is_bno(bs->cur->bc_ops); 383 is_rmapbt = xfs_btree_is_rmap(bs->cur->bc_ops); 384 agno = xfs_daddr_to_agno(bs->cur->bc_mp, daddr); 385 agbno = xfs_daddr_to_agbno(bs->cur->bc_mp, daddr); 386 387 /* 388 * If the btree being examined is not itself a per-AG btree, initialize 389 * sc->sa so that we can check for the presence of an ownership record 390 * in the rmap btree for the AG containing the block. 391 */ 392 init_sa = bs->cur->bc_ops->type != XFS_BTREE_TYPE_AG; 393 if (init_sa) { 394 error = xchk_ag_init_existing(bs->sc, agno, &bs->sc->sa); 395 if (!xchk_btree_xref_process_error(bs->sc, bs->cur, 396 level, &error)) 397 goto out_free; 398 } 399 400 xchk_xref_is_used_space(bs->sc, agbno, 1); 401 /* 402 * The bnobt scrubber aliases bs->cur to bs->sc->sa.bno_cur, so we 403 * have to nullify it (to shut down further block owner checks) if 404 * self-xref encounters problems. 405 */ 406 if (!bs->sc->sa.bno_cur && is_bnobt) 407 bs->cur = NULL; 408 409 xchk_xref_is_only_owned_by(bs->sc, agbno, 1, bs->oinfo); 410 if (!bs->sc->sa.rmap_cur && is_rmapbt) 411 bs->cur = NULL; 412 413 out_free: 414 if (init_sa) 415 xchk_ag_free(bs->sc, &bs->sc->sa); 416 417 return error; 418 } 419 420 /* Check the owner of a btree block. */ 421 STATIC int 422 xchk_btree_check_owner( 423 struct xchk_btree *bs, 424 int level, 425 struct xfs_buf *bp) 426 { 427 struct xfs_btree_cur *cur = bs->cur; 428 429 /* 430 * In theory, xfs_btree_get_block should only give us a null buffer 431 * pointer for the root of a root-in-inode btree type, but we need 432 * to check defensively here in case the cursor state is also screwed 433 * up. 434 */ 435 if (bp == NULL) { 436 if (cur->bc_ops->type != XFS_BTREE_TYPE_INODE) 437 xchk_btree_set_corrupt(bs->sc, bs->cur, level); 438 return 0; 439 } 440 441 /* 442 * We want to cross-reference each btree block with the bnobt 443 * and the rmapbt. We cannot cross-reference the bnobt or 444 * rmapbt while scanning the bnobt or rmapbt, respectively, 445 * because we cannot alter the cursor and we'd prefer not to 446 * duplicate cursors. Therefore, save the buffer daddr for 447 * later scanning. 448 */ 449 if (xfs_btree_is_bno(cur->bc_ops) || xfs_btree_is_rmap(cur->bc_ops)) { 450 struct check_owner *co; 451 452 co = kmalloc(sizeof(struct check_owner), XCHK_GFP_FLAGS); 453 if (!co) 454 return -ENOMEM; 455 456 INIT_LIST_HEAD(&co->list); 457 co->level = level; 458 co->daddr = xfs_buf_daddr(bp); 459 list_add_tail(&co->list, &bs->to_check); 460 return 0; 461 } 462 463 return xchk_btree_check_block_owner(bs, level, xfs_buf_daddr(bp)); 464 } 465 466 /* Decide if we want to check minrecs of a btree block in the inode root. */ 467 static inline bool 468 xchk_btree_check_iroot_minrecs( 469 struct xchk_btree *bs) 470 { 471 /* 472 * xfs_bmap_add_attrfork_btree had an implementation bug wherein it 473 * would miscalculate the space required for the data fork bmbt root 474 * when adding an attr fork, and promote the iroot contents to an 475 * external block unnecessarily. This went unnoticed for many years 476 * until scrub found filesystems in this state. Inode rooted btrees are 477 * not supposed to have immediate child blocks that are small enough 478 * that the contents could fit in the inode root, but we can't fail 479 * existing filesystems, so instead we disable the check for data fork 480 * bmap btrees when there's an attr fork. 481 */ 482 if (xfs_btree_is_bmap(bs->cur->bc_ops) && 483 bs->cur->bc_ino.whichfork == XFS_DATA_FORK && 484 xfs_inode_has_attr_fork(bs->sc->ip)) 485 return false; 486 487 return true; 488 } 489 490 /* 491 * Check that this btree block has at least minrecs records or is one of the 492 * special blocks that don't require that. 493 */ 494 STATIC void 495 xchk_btree_check_minrecs( 496 struct xchk_btree *bs, 497 int level, 498 struct xfs_btree_block *block) 499 { 500 struct xfs_btree_cur *cur = bs->cur; 501 unsigned int root_level = cur->bc_nlevels - 1; 502 unsigned int numrecs = be16_to_cpu(block->bb_numrecs); 503 504 /* More records than minrecs means the block is ok. */ 505 if (numrecs >= cur->bc_ops->get_minrecs(cur, level)) 506 return; 507 508 /* 509 * For btrees rooted in the inode, it's possible that the root block 510 * contents spilled into a regular ondisk block because there wasn't 511 * enough space in the inode root. The number of records in that 512 * child block might be less than the standard minrecs, but that's ok 513 * provided that there's only one direct child of the root. 514 */ 515 if (cur->bc_ops->type == XFS_BTREE_TYPE_INODE && 516 level == cur->bc_nlevels - 2) { 517 struct xfs_btree_block *root_block; 518 struct xfs_buf *root_bp; 519 int root_maxrecs; 520 521 root_block = xfs_btree_get_block(cur, root_level, &root_bp); 522 root_maxrecs = cur->bc_ops->get_dmaxrecs(cur, root_level); 523 if (xchk_btree_check_iroot_minrecs(bs) && 524 (be16_to_cpu(root_block->bb_numrecs) != 1 || 525 numrecs <= root_maxrecs)) 526 xchk_btree_set_corrupt(bs->sc, cur, level); 527 return; 528 } 529 530 /* 531 * Otherwise, only the root level is allowed to have fewer than minrecs 532 * records or keyptrs. 533 */ 534 if (level < root_level) 535 xchk_btree_set_corrupt(bs->sc, cur, level); 536 } 537 538 /* 539 * If this btree block has a parent, make sure that the parent's keys capture 540 * the keyspace contained in this block. 541 */ 542 STATIC void 543 xchk_btree_block_check_keys( 544 struct xchk_btree *bs, 545 int level, 546 struct xfs_btree_block *block) 547 { 548 union xfs_btree_key block_key; 549 union xfs_btree_key *block_high_key; 550 union xfs_btree_key *parent_low_key, *parent_high_key; 551 struct xfs_btree_cur *cur = bs->cur; 552 struct xfs_btree_block *parent_block; 553 struct xfs_buf *bp; 554 555 if (level == cur->bc_nlevels - 1) 556 return; 557 558 xfs_btree_get_keys(cur, block, &block_key); 559 560 /* Make sure the low key of this block matches the parent. */ 561 parent_block = xfs_btree_get_block(cur, level + 1, &bp); 562 parent_low_key = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, 563 parent_block); 564 if (xfs_btree_keycmp_ne(cur, &block_key, parent_low_key)) { 565 xchk_btree_set_corrupt(bs->sc, bs->cur, level); 566 return; 567 } 568 569 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) 570 return; 571 572 /* Make sure the high key of this block matches the parent. */ 573 parent_high_key = xfs_btree_high_key_addr(cur, 574 cur->bc_levels[level + 1].ptr, parent_block); 575 block_high_key = xfs_btree_high_key_from_key(cur, &block_key); 576 if (xfs_btree_keycmp_ne(cur, block_high_key, parent_high_key)) 577 xchk_btree_set_corrupt(bs->sc, bs->cur, level); 578 } 579 580 /* 581 * Grab and scrub a btree block given a btree pointer. Returns block 582 * and buffer pointers (if applicable) if they're ok to use. 583 */ 584 STATIC int 585 xchk_btree_get_block( 586 struct xchk_btree *bs, 587 int level, 588 union xfs_btree_ptr *pp, 589 struct xfs_btree_block **pblock, 590 struct xfs_buf **pbp) 591 { 592 int error; 593 594 *pblock = NULL; 595 *pbp = NULL; 596 597 error = xfs_btree_lookup_get_block(bs->cur, level, pp, pblock); 598 if (!xchk_btree_process_error(bs->sc, bs->cur, level, &error) || 599 !*pblock) 600 return error; 601 602 xfs_btree_get_block(bs->cur, level, pbp); 603 if (__xfs_btree_check_block(bs->cur, *pblock, level, *pbp)) { 604 xchk_btree_set_corrupt(bs->sc, bs->cur, level); 605 return 0; 606 } 607 if (*pbp) 608 xchk_buffer_recheck(bs->sc, *pbp); 609 610 xchk_btree_check_minrecs(bs, level, *pblock); 611 612 /* 613 * Check the block's owner; this function absorbs error codes 614 * for us. 615 */ 616 error = xchk_btree_check_owner(bs, level, *pbp); 617 if (error) 618 return error; 619 620 /* 621 * Check the block's siblings; this function absorbs error codes 622 * for us. 623 */ 624 error = xchk_btree_block_check_siblings(bs, *pblock); 625 if (error) 626 return error; 627 628 xchk_btree_block_check_keys(bs, level, *pblock); 629 return 0; 630 } 631 632 /* 633 * Check that the low and high keys of this block match the keys stored 634 * in the parent block. 635 */ 636 STATIC void 637 xchk_btree_block_keys( 638 struct xchk_btree *bs, 639 int level, 640 struct xfs_btree_block *block) 641 { 642 union xfs_btree_key block_keys; 643 struct xfs_btree_cur *cur = bs->cur; 644 union xfs_btree_key *high_bk; 645 union xfs_btree_key *parent_keys; 646 union xfs_btree_key *high_pk; 647 struct xfs_btree_block *parent_block; 648 struct xfs_buf *bp; 649 650 if (level >= cur->bc_nlevels - 1) 651 return; 652 653 /* Calculate the keys for this block. */ 654 xfs_btree_get_keys(cur, block, &block_keys); 655 656 /* Obtain the parent's copy of the keys for this block. */ 657 parent_block = xfs_btree_get_block(cur, level + 1, &bp); 658 parent_keys = xfs_btree_key_addr(cur, cur->bc_levels[level + 1].ptr, 659 parent_block); 660 661 if (xfs_btree_keycmp_ne(cur, &block_keys, parent_keys)) 662 xchk_btree_set_corrupt(bs->sc, cur, 1); 663 664 if (!(cur->bc_ops->geom_flags & XFS_BTGEO_OVERLAPPING)) 665 return; 666 667 /* Get high keys */ 668 high_bk = xfs_btree_high_key_from_key(cur, &block_keys); 669 high_pk = xfs_btree_high_key_addr(cur, cur->bc_levels[level + 1].ptr, 670 parent_block); 671 672 if (xfs_btree_keycmp_ne(cur, high_bk, high_pk)) 673 xchk_btree_set_corrupt(bs->sc, cur, 1); 674 } 675 676 /* 677 * Visit all nodes and leaves of a btree. Check that all pointers and 678 * records are in order, that the keys reflect the records, and use a callback 679 * so that the caller can verify individual records. 680 */ 681 int 682 xchk_btree( 683 struct xfs_scrub *sc, 684 struct xfs_btree_cur *cur, 685 xchk_btree_rec_fn scrub_fn, 686 const struct xfs_owner_info *oinfo, 687 void *private) 688 { 689 union xfs_btree_ptr ptr; 690 struct xchk_btree *bs; 691 union xfs_btree_ptr *pp; 692 union xfs_btree_rec *recp; 693 struct xfs_btree_block *block; 694 struct xfs_buf *bp; 695 struct check_owner *co; 696 struct check_owner *n; 697 size_t cur_sz; 698 int level; 699 int error = 0; 700 701 /* 702 * Allocate the btree scrub context from the heap, because this 703 * structure can get rather large. Don't let a caller feed us a 704 * totally absurd size. 705 */ 706 cur_sz = xchk_btree_sizeof(cur->bc_nlevels); 707 if (cur_sz > PAGE_SIZE) { 708 xchk_btree_set_corrupt(sc, cur, 0); 709 return 0; 710 } 711 bs = kzalloc(cur_sz, XCHK_GFP_FLAGS); 712 if (!bs) 713 return -ENOMEM; 714 bs->cur = cur; 715 bs->scrub_rec = scrub_fn; 716 bs->oinfo = oinfo; 717 bs->private = private; 718 bs->sc = sc; 719 720 /* Initialize scrub state */ 721 INIT_LIST_HEAD(&bs->to_check); 722 723 /* 724 * Load the root of the btree. The helper function absorbs 725 * error codes for us. 726 */ 727 level = cur->bc_nlevels - 1; 728 xfs_btree_init_ptr_from_cur(cur, &ptr); 729 if (!xchk_btree_ptr_ok(bs, cur->bc_nlevels, &ptr)) 730 goto out; 731 error = xchk_btree_get_block(bs, level, &ptr, &block, &bp); 732 if (error || !block) 733 goto out; 734 735 cur->bc_levels[level].ptr = 1; 736 737 while (level < cur->bc_nlevels) { 738 block = xfs_btree_get_block(cur, level, &bp); 739 740 if (level == 0) { 741 /* End of leaf, pop back towards the root. */ 742 if (cur->bc_levels[level].ptr > 743 be16_to_cpu(block->bb_numrecs)) { 744 xchk_btree_block_keys(bs, level, block); 745 if (level < cur->bc_nlevels - 1) 746 cur->bc_levels[level + 1].ptr++; 747 level++; 748 continue; 749 } 750 751 /* Records in order for scrub? */ 752 xchk_btree_rec(bs); 753 754 /* Call out to the record checker. */ 755 recp = xfs_btree_rec_addr(cur, cur->bc_levels[0].ptr, 756 block); 757 error = bs->scrub_rec(bs, recp); 758 if (error) 759 break; 760 if (xchk_should_terminate(sc, &error) || 761 (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT)) 762 break; 763 764 cur->bc_levels[level].ptr++; 765 continue; 766 } 767 768 /* End of node, pop back towards the root. */ 769 if (cur->bc_levels[level].ptr > 770 be16_to_cpu(block->bb_numrecs)) { 771 xchk_btree_block_keys(bs, level, block); 772 if (level < cur->bc_nlevels - 1) 773 cur->bc_levels[level + 1].ptr++; 774 level++; 775 continue; 776 } 777 778 /* Keys in order for scrub? */ 779 xchk_btree_key(bs, level); 780 781 /* Drill another level deeper. */ 782 pp = xfs_btree_ptr_addr(cur, cur->bc_levels[level].ptr, block); 783 if (!xchk_btree_ptr_ok(bs, level, pp)) { 784 cur->bc_levels[level].ptr++; 785 continue; 786 } 787 level--; 788 error = xchk_btree_get_block(bs, level, pp, &block, &bp); 789 if (error || !block) 790 goto out; 791 792 cur->bc_levels[level].ptr = 1; 793 } 794 795 out: 796 /* Process deferred owner checks on btree blocks. */ 797 list_for_each_entry_safe(co, n, &bs->to_check, list) { 798 if (!error && bs->cur) 799 error = xchk_btree_check_block_owner(bs, co->level, 800 co->daddr); 801 list_del(&co->list); 802 kfree(co); 803 } 804 kfree(bs); 805 806 return error; 807 } 808