1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2017-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_ag.h" 13 #include "xfs_btree.h" 14 #include "xfs_rmap.h" 15 #include "xfs_refcount.h" 16 #include "scrub/scrub.h" 17 #include "scrub/common.h" 18 #include "scrub/btree.h" 19 #include "scrub/trace.h" 20 21 /* 22 * Set us up to scrub reference count btrees. 23 */ 24 int 25 xchk_setup_ag_refcountbt( 26 struct xfs_scrub *sc) 27 { 28 if (xchk_need_intent_drain(sc)) 29 xchk_fsgates_enable(sc, XCHK_FSGATES_DRAIN); 30 return xchk_setup_ag_btree(sc, false); 31 } 32 33 /* Reference count btree scrubber. */ 34 35 /* 36 * Confirming Reference Counts via Reverse Mappings 37 * 38 * We want to count the reverse mappings overlapping a refcount record 39 * (bno, len, refcount), allowing for the possibility that some of the 40 * overlap may come from smaller adjoining reverse mappings, while some 41 * comes from single extents which overlap the range entirely. The 42 * outer loop is as follows: 43 * 44 * 1. For all reverse mappings overlapping the refcount extent, 45 * a. If a given rmap completely overlaps, mark it as seen. 46 * b. Otherwise, record the fragment (in agbno order) for later 47 * processing. 48 * 49 * Once we've seen all the rmaps, we know that for all blocks in the 50 * refcount record we want to find $refcount owners and we've already 51 * visited $seen extents that overlap all the blocks. Therefore, we 52 * need to find ($refcount - $seen) owners for every block in the 53 * extent; call that quantity $target_nr. Proceed as follows: 54 * 55 * 2. Pull the first $target_nr fragments from the list; all of them 56 * should start at or before the start of the extent. 57 * Call this subset of fragments the working set. 58 * 3. Until there are no more unprocessed fragments, 59 * a. Find the shortest fragments in the set and remove them. 60 * b. Note the block number of the end of these fragments. 61 * c. Pull the same number of fragments from the list. All of these 62 * fragments should start at the block number recorded in the 63 * previous step. 64 * d. Put those fragments in the set. 65 * 4. Check that there are $target_nr fragments remaining in the list, 66 * and that they all end at or beyond the end of the refcount extent. 67 * 68 * If the refcount is correct, all the check conditions in the algorithm 69 * should always hold true. If not, the refcount is incorrect. 70 */ 71 struct xchk_refcnt_frag { 72 struct list_head list; 73 struct xfs_rmap_irec rm; 74 }; 75 76 struct xchk_refcnt_check { 77 struct xfs_scrub *sc; 78 struct list_head fragments; 79 80 /* refcount extent we're examining */ 81 xfs_agblock_t bno; 82 xfs_extlen_t len; 83 xfs_nlink_t refcount; 84 85 /* number of owners seen */ 86 xfs_nlink_t seen; 87 }; 88 89 /* 90 * Decide if the given rmap is large enough that we can redeem it 91 * towards refcount verification now, or if it's a fragment, in 92 * which case we'll hang onto it in the hopes that we'll later 93 * discover that we've collected exactly the correct number of 94 * fragments as the refcountbt says we should have. 95 */ 96 STATIC int 97 xchk_refcountbt_rmap_check( 98 struct xfs_btree_cur *cur, 99 const struct xfs_rmap_irec *rec, 100 void *priv) 101 { 102 struct xchk_refcnt_check *refchk = priv; 103 struct xchk_refcnt_frag *frag; 104 xfs_agblock_t rm_last; 105 xfs_agblock_t rc_last; 106 int error = 0; 107 108 if (xchk_should_terminate(refchk->sc, &error)) 109 return error; 110 111 rm_last = rec->rm_startblock + rec->rm_blockcount - 1; 112 rc_last = refchk->bno + refchk->len - 1; 113 114 /* Confirm that a single-owner refc extent is a CoW stage. */ 115 if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) { 116 xchk_btree_xref_set_corrupt(refchk->sc, cur, 0); 117 return 0; 118 } 119 120 if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) { 121 /* 122 * The rmap overlaps the refcount record, so we can confirm 123 * one refcount owner seen. 124 */ 125 refchk->seen++; 126 } else { 127 /* 128 * This rmap covers only part of the refcount record, so 129 * save the fragment for later processing. If the rmapbt 130 * is healthy each rmap_irec we see will be in agbno order 131 * so we don't need insertion sort here. 132 */ 133 frag = kmalloc(sizeof(struct xchk_refcnt_frag), 134 XCHK_GFP_FLAGS); 135 if (!frag) 136 return -ENOMEM; 137 memcpy(&frag->rm, rec, sizeof(frag->rm)); 138 list_add_tail(&frag->list, &refchk->fragments); 139 } 140 141 return 0; 142 } 143 144 /* 145 * Given a bunch of rmap fragments, iterate through them, keeping 146 * a running tally of the refcount. If this ever deviates from 147 * what we expect (which is the refcountbt's refcount minus the 148 * number of extents that totally covered the refcountbt extent), 149 * we have a refcountbt error. 150 */ 151 STATIC void 152 xchk_refcountbt_process_rmap_fragments( 153 struct xchk_refcnt_check *refchk) 154 { 155 struct list_head worklist; 156 struct xchk_refcnt_frag *frag; 157 struct xchk_refcnt_frag *n; 158 xfs_agblock_t bno; 159 xfs_agblock_t rbno; 160 xfs_agblock_t next_rbno; 161 xfs_nlink_t nr; 162 xfs_nlink_t target_nr; 163 164 target_nr = refchk->refcount - refchk->seen; 165 if (target_nr == 0) 166 return; 167 168 /* 169 * There are (refchk->rc.rc_refcount - refchk->nr refcount) 170 * references we haven't found yet. Pull that many off the 171 * fragment list and figure out where the smallest rmap ends 172 * (and therefore the next rmap should start). All the rmaps 173 * we pull off should start at or before the beginning of the 174 * refcount record's range. 175 */ 176 INIT_LIST_HEAD(&worklist); 177 rbno = NULLAGBLOCK; 178 179 /* Make sure the fragments actually /are/ in agbno order. */ 180 bno = 0; 181 list_for_each_entry(frag, &refchk->fragments, list) { 182 if (frag->rm.rm_startblock < bno) 183 goto done; 184 bno = frag->rm.rm_startblock; 185 } 186 187 /* 188 * Find all the rmaps that start at or before the refc extent, 189 * and put them on the worklist. 190 */ 191 nr = 0; 192 list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 193 if (frag->rm.rm_startblock > refchk->bno || nr > target_nr) 194 break; 195 bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 196 if (bno < rbno) 197 rbno = bno; 198 list_move_tail(&frag->list, &worklist); 199 nr++; 200 } 201 202 /* 203 * We should have found exactly $target_nr rmap fragments starting 204 * at or before the refcount extent. 205 */ 206 if (nr != target_nr) 207 goto done; 208 209 while (!list_empty(&refchk->fragments)) { 210 /* Discard any fragments ending at rbno from the worklist. */ 211 nr = 0; 212 next_rbno = NULLAGBLOCK; 213 list_for_each_entry_safe(frag, n, &worklist, list) { 214 bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 215 if (bno != rbno) { 216 if (bno < next_rbno) 217 next_rbno = bno; 218 continue; 219 } 220 list_del(&frag->list); 221 kfree(frag); 222 nr++; 223 } 224 225 /* Try to add nr rmaps starting at rbno to the worklist. */ 226 list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 227 bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 228 if (frag->rm.rm_startblock != rbno) 229 goto done; 230 list_move_tail(&frag->list, &worklist); 231 if (next_rbno > bno) 232 next_rbno = bno; 233 nr--; 234 if (nr == 0) 235 break; 236 } 237 238 /* 239 * If we get here and nr > 0, this means that we added fewer 240 * items to the worklist than we discarded because the fragment 241 * list ran out of items. Therefore, we cannot maintain the 242 * required refcount. Something is wrong, so we're done. 243 */ 244 if (nr) 245 goto done; 246 247 rbno = next_rbno; 248 } 249 250 /* 251 * Make sure the last extent we processed ends at or beyond 252 * the end of the refcount extent. 253 */ 254 if (rbno < refchk->bno + refchk->len) 255 goto done; 256 257 /* Actually record us having seen the remaining refcount. */ 258 refchk->seen = refchk->refcount; 259 done: 260 /* Delete fragments and work list. */ 261 list_for_each_entry_safe(frag, n, &worklist, list) { 262 list_del(&frag->list); 263 kfree(frag); 264 } 265 list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 266 list_del(&frag->list); 267 kfree(frag); 268 } 269 } 270 271 /* Use the rmap entries covering this extent to verify the refcount. */ 272 STATIC void 273 xchk_refcountbt_xref_rmap( 274 struct xfs_scrub *sc, 275 const struct xfs_refcount_irec *irec) 276 { 277 struct xchk_refcnt_check refchk = { 278 .sc = sc, 279 .bno = irec->rc_startblock, 280 .len = irec->rc_blockcount, 281 .refcount = irec->rc_refcount, 282 .seen = 0, 283 }; 284 struct xfs_rmap_irec low; 285 struct xfs_rmap_irec high; 286 struct xchk_refcnt_frag *frag; 287 struct xchk_refcnt_frag *n; 288 int error; 289 290 if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm)) 291 return; 292 293 /* Cross-reference with the rmapbt to confirm the refcount. */ 294 memset(&low, 0, sizeof(low)); 295 low.rm_startblock = irec->rc_startblock; 296 memset(&high, 0xFF, sizeof(high)); 297 high.rm_startblock = irec->rc_startblock + irec->rc_blockcount - 1; 298 299 INIT_LIST_HEAD(&refchk.fragments); 300 error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, 301 &xchk_refcountbt_rmap_check, &refchk); 302 if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 303 goto out_free; 304 305 xchk_refcountbt_process_rmap_fragments(&refchk); 306 if (irec->rc_refcount != refchk.seen) { 307 trace_xchk_refcount_incorrect(sc->sa.pag, irec, refchk.seen); 308 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 309 } 310 311 out_free: 312 list_for_each_entry_safe(frag, n, &refchk.fragments, list) { 313 list_del(&frag->list); 314 kfree(frag); 315 } 316 } 317 318 /* Cross-reference with the other btrees. */ 319 STATIC void 320 xchk_refcountbt_xref( 321 struct xfs_scrub *sc, 322 const struct xfs_refcount_irec *irec) 323 { 324 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 325 return; 326 327 xchk_xref_is_used_space(sc, irec->rc_startblock, irec->rc_blockcount); 328 xchk_xref_is_not_inode_chunk(sc, irec->rc_startblock, 329 irec->rc_blockcount); 330 xchk_refcountbt_xref_rmap(sc, irec); 331 } 332 333 struct xchk_refcbt_records { 334 /* Previous refcount record. */ 335 struct xfs_refcount_irec prev_rec; 336 337 /* The next AG block where we aren't expecting shared extents. */ 338 xfs_agblock_t next_unshared_agbno; 339 340 /* Number of CoW blocks we expect. */ 341 xfs_agblock_t cow_blocks; 342 343 /* Was the last record a shared or CoW staging extent? */ 344 enum xfs_refc_domain prev_domain; 345 }; 346 347 STATIC int 348 xchk_refcountbt_rmap_check_gap( 349 struct xfs_btree_cur *cur, 350 const struct xfs_rmap_irec *rec, 351 void *priv) 352 { 353 xfs_agblock_t *next_bno = priv; 354 355 if (*next_bno != NULLAGBLOCK && rec->rm_startblock < *next_bno) 356 return -ECANCELED; 357 358 *next_bno = rec->rm_startblock + rec->rm_blockcount; 359 return 0; 360 } 361 362 /* 363 * Make sure that a gap in the reference count records does not correspond to 364 * overlapping records (i.e. shared extents) in the reverse mappings. 365 */ 366 static inline void 367 xchk_refcountbt_xref_gaps( 368 struct xfs_scrub *sc, 369 struct xchk_refcbt_records *rrc, 370 xfs_agblock_t bno) 371 { 372 struct xfs_rmap_irec low; 373 struct xfs_rmap_irec high; 374 xfs_agblock_t next_bno = NULLAGBLOCK; 375 int error; 376 377 if (bno <= rrc->next_unshared_agbno || !sc->sa.rmap_cur || 378 xchk_skip_xref(sc->sm)) 379 return; 380 381 memset(&low, 0, sizeof(low)); 382 low.rm_startblock = rrc->next_unshared_agbno; 383 memset(&high, 0xFF, sizeof(high)); 384 high.rm_startblock = bno - 1; 385 386 error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, 387 xchk_refcountbt_rmap_check_gap, &next_bno); 388 if (error == -ECANCELED) 389 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 390 else 391 xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur); 392 } 393 394 static inline bool 395 xchk_refcount_mergeable( 396 struct xchk_refcbt_records *rrc, 397 const struct xfs_refcount_irec *r2) 398 { 399 const struct xfs_refcount_irec *r1 = &rrc->prev_rec; 400 401 /* Ignore if prev_rec is not yet initialized. */ 402 if (r1->rc_blockcount > 0) 403 return false; 404 405 if (r1->rc_domain != r2->rc_domain) 406 return false; 407 if (r1->rc_startblock + r1->rc_blockcount != r2->rc_startblock) 408 return false; 409 if (r1->rc_refcount != r2->rc_refcount) 410 return false; 411 if ((unsigned long long)r1->rc_blockcount + r2->rc_blockcount > 412 MAXREFCEXTLEN) 413 return false; 414 415 return true; 416 } 417 418 /* Flag failures for records that could be merged. */ 419 STATIC void 420 xchk_refcountbt_check_mergeable( 421 struct xchk_btree *bs, 422 struct xchk_refcbt_records *rrc, 423 const struct xfs_refcount_irec *irec) 424 { 425 if (bs->sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 426 return; 427 428 if (xchk_refcount_mergeable(rrc, irec)) 429 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 430 431 memcpy(&rrc->prev_rec, irec, sizeof(struct xfs_refcount_irec)); 432 } 433 434 /* Scrub a refcountbt record. */ 435 STATIC int 436 xchk_refcountbt_rec( 437 struct xchk_btree *bs, 438 const union xfs_btree_rec *rec) 439 { 440 struct xfs_refcount_irec irec; 441 struct xchk_refcbt_records *rrc = bs->private; 442 443 xfs_refcount_btrec_to_irec(rec, &irec); 444 if (xfs_refcount_check_irec(bs->cur->bc_ag.pag, &irec) != NULL) { 445 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 446 return 0; 447 } 448 449 if (irec.rc_domain == XFS_REFC_DOMAIN_COW) 450 rrc->cow_blocks += irec.rc_blockcount; 451 452 /* Shared records always come before CoW records. */ 453 if (irec.rc_domain == XFS_REFC_DOMAIN_SHARED && 454 rrc->prev_domain == XFS_REFC_DOMAIN_COW) 455 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 456 rrc->prev_domain = irec.rc_domain; 457 458 xchk_refcountbt_check_mergeable(bs, rrc, &irec); 459 xchk_refcountbt_xref(bs->sc, &irec); 460 461 /* 462 * If this is a record for a shared extent, check that all blocks 463 * between the previous record and this one have at most one reverse 464 * mapping. 465 */ 466 if (irec.rc_domain == XFS_REFC_DOMAIN_SHARED) { 467 xchk_refcountbt_xref_gaps(bs->sc, rrc, irec.rc_startblock); 468 rrc->next_unshared_agbno = irec.rc_startblock + 469 irec.rc_blockcount; 470 } 471 472 return 0; 473 } 474 475 /* Make sure we have as many refc blocks as the rmap says. */ 476 STATIC void 477 xchk_refcount_xref_rmap( 478 struct xfs_scrub *sc, 479 xfs_filblks_t cow_blocks) 480 { 481 xfs_extlen_t refcbt_blocks = 0; 482 xfs_filblks_t blocks; 483 int error; 484 485 if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm)) 486 return; 487 488 /* Check that we saw as many refcbt blocks as the rmap knows about. */ 489 error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks); 490 if (!xchk_btree_process_error(sc, sc->sa.refc_cur, 0, &error)) 491 return; 492 error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, 493 &XFS_RMAP_OINFO_REFC, &blocks); 494 if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 495 return; 496 if (blocks != refcbt_blocks) 497 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 498 499 /* Check that we saw as many cow blocks as the rmap knows about. */ 500 error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, 501 &XFS_RMAP_OINFO_COW, &blocks); 502 if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 503 return; 504 if (blocks != cow_blocks) 505 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 506 } 507 508 /* Scrub the refcount btree for some AG. */ 509 int 510 xchk_refcountbt( 511 struct xfs_scrub *sc) 512 { 513 struct xchk_refcbt_records rrc = { 514 .cow_blocks = 0, 515 .next_unshared_agbno = 0, 516 .prev_domain = XFS_REFC_DOMAIN_SHARED, 517 }; 518 int error; 519 520 error = xchk_btree(sc, sc->sa.refc_cur, xchk_refcountbt_rec, 521 &XFS_RMAP_OINFO_REFC, &rrc); 522 if (error) 523 return error; 524 525 /* 526 * Check that all blocks between the last refcount > 1 record and the 527 * end of the AG have at most one reverse mapping. 528 */ 529 xchk_refcountbt_xref_gaps(sc, &rrc, sc->mp->m_sb.sb_agblocks); 530 531 xchk_refcount_xref_rmap(sc, rrc.cow_blocks); 532 533 return 0; 534 } 535 536 /* xref check that a cow staging extent is marked in the refcountbt. */ 537 void 538 xchk_xref_is_cow_staging( 539 struct xfs_scrub *sc, 540 xfs_agblock_t agbno, 541 xfs_extlen_t len) 542 { 543 struct xfs_refcount_irec rc; 544 int has_refcount; 545 int error; 546 547 if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) 548 return; 549 550 /* Find the CoW staging extent. */ 551 error = xfs_refcount_lookup_le(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW, 552 agbno, &has_refcount); 553 if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) 554 return; 555 if (!has_refcount) { 556 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 557 return; 558 } 559 560 error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount); 561 if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) 562 return; 563 if (!has_refcount) { 564 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 565 return; 566 } 567 568 /* CoW lookup returned a shared extent record? */ 569 if (rc.rc_domain != XFS_REFC_DOMAIN_COW) 570 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 571 572 /* Must be at least as long as what was passed in */ 573 if (rc.rc_blockcount < len) 574 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 575 } 576 577 /* 578 * xref check that the extent is not shared. Only file data blocks 579 * can have multiple owners. 580 */ 581 void 582 xchk_xref_is_not_shared( 583 struct xfs_scrub *sc, 584 xfs_agblock_t agbno, 585 xfs_extlen_t len) 586 { 587 enum xbtree_recpacking outcome; 588 int error; 589 590 if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) 591 return; 592 593 error = xfs_refcount_has_records(sc->sa.refc_cur, 594 XFS_REFC_DOMAIN_SHARED, agbno, len, &outcome); 595 if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) 596 return; 597 if (outcome != XBTREE_RECPACKING_EMPTY) 598 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 599 } 600 601 /* xref check that the extent is not being used for CoW staging. */ 602 void 603 xchk_xref_is_not_cow_staging( 604 struct xfs_scrub *sc, 605 xfs_agblock_t agbno, 606 xfs_extlen_t len) 607 { 608 enum xbtree_recpacking outcome; 609 int error; 610 611 if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) 612 return; 613 614 error = xfs_refcount_has_records(sc->sa.refc_cur, XFS_REFC_DOMAIN_COW, 615 agbno, len, &outcome); 616 if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) 617 return; 618 if (outcome != XBTREE_RECPACKING_EMPTY) 619 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 620 } 621