1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * Copyright (C) 2017 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <darrick.wong@oracle.com> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_bit.h" 15 #include "xfs_log_format.h" 16 #include "xfs_trans.h" 17 #include "xfs_sb.h" 18 #include "xfs_alloc.h" 19 #include "xfs_rmap.h" 20 #include "xfs_refcount.h" 21 #include "scrub/xfs_scrub.h" 22 #include "scrub/scrub.h" 23 #include "scrub/common.h" 24 #include "scrub/btree.h" 25 #include "scrub/trace.h" 26 27 /* 28 * Set us up to scrub reference count btrees. 29 */ 30 int 31 xchk_setup_ag_refcountbt( 32 struct xfs_scrub *sc, 33 struct xfs_inode *ip) 34 { 35 return xchk_setup_ag_btree(sc, ip, false); 36 } 37 38 /* Reference count btree scrubber. */ 39 40 /* 41 * Confirming Reference Counts via Reverse Mappings 42 * 43 * We want to count the reverse mappings overlapping a refcount record 44 * (bno, len, refcount), allowing for the possibility that some of the 45 * overlap may come from smaller adjoining reverse mappings, while some 46 * comes from single extents which overlap the range entirely. The 47 * outer loop is as follows: 48 * 49 * 1. For all reverse mappings overlapping the refcount extent, 50 * a. If a given rmap completely overlaps, mark it as seen. 51 * b. Otherwise, record the fragment (in agbno order) for later 52 * processing. 53 * 54 * Once we've seen all the rmaps, we know that for all blocks in the 55 * refcount record we want to find $refcount owners and we've already 56 * visited $seen extents that overlap all the blocks. Therefore, we 57 * need to find ($refcount - $seen) owners for every block in the 58 * extent; call that quantity $target_nr. Proceed as follows: 59 * 60 * 2. Pull the first $target_nr fragments from the list; all of them 61 * should start at or before the start of the extent. 62 * Call this subset of fragments the working set. 63 * 3. Until there are no more unprocessed fragments, 64 * a. Find the shortest fragments in the set and remove them. 65 * b. Note the block number of the end of these fragments. 66 * c. Pull the same number of fragments from the list. All of these 67 * fragments should start at the block number recorded in the 68 * previous step. 69 * d. Put those fragments in the set. 70 * 4. Check that there are $target_nr fragments remaining in the list, 71 * and that they all end at or beyond the end of the refcount extent. 72 * 73 * If the refcount is correct, all the check conditions in the algorithm 74 * should always hold true. If not, the refcount is incorrect. 75 */ 76 struct xchk_refcnt_frag { 77 struct list_head list; 78 struct xfs_rmap_irec rm; 79 }; 80 81 struct xchk_refcnt_check { 82 struct xfs_scrub *sc; 83 struct list_head fragments; 84 85 /* refcount extent we're examining */ 86 xfs_agblock_t bno; 87 xfs_extlen_t len; 88 xfs_nlink_t refcount; 89 90 /* number of owners seen */ 91 xfs_nlink_t seen; 92 }; 93 94 /* 95 * Decide if the given rmap is large enough that we can redeem it 96 * towards refcount verification now, or if it's a fragment, in 97 * which case we'll hang onto it in the hopes that we'll later 98 * discover that we've collected exactly the correct number of 99 * fragments as the refcountbt says we should have. 100 */ 101 STATIC int 102 xchk_refcountbt_rmap_check( 103 struct xfs_btree_cur *cur, 104 struct xfs_rmap_irec *rec, 105 void *priv) 106 { 107 struct xchk_refcnt_check *refchk = priv; 108 struct xchk_refcnt_frag *frag; 109 xfs_agblock_t rm_last; 110 xfs_agblock_t rc_last; 111 int error = 0; 112 113 if (xchk_should_terminate(refchk->sc, &error)) 114 return error; 115 116 rm_last = rec->rm_startblock + rec->rm_blockcount - 1; 117 rc_last = refchk->bno + refchk->len - 1; 118 119 /* Confirm that a single-owner refc extent is a CoW stage. */ 120 if (refchk->refcount == 1 && rec->rm_owner != XFS_RMAP_OWN_COW) { 121 xchk_btree_xref_set_corrupt(refchk->sc, cur, 0); 122 return 0; 123 } 124 125 if (rec->rm_startblock <= refchk->bno && rm_last >= rc_last) { 126 /* 127 * The rmap overlaps the refcount record, so we can confirm 128 * one refcount owner seen. 129 */ 130 refchk->seen++; 131 } else { 132 /* 133 * This rmap covers only part of the refcount record, so 134 * save the fragment for later processing. If the rmapbt 135 * is healthy each rmap_irec we see will be in agbno order 136 * so we don't need insertion sort here. 137 */ 138 frag = kmem_alloc(sizeof(struct xchk_refcnt_frag), 139 KM_MAYFAIL); 140 if (!frag) 141 return -ENOMEM; 142 memcpy(&frag->rm, rec, sizeof(frag->rm)); 143 list_add_tail(&frag->list, &refchk->fragments); 144 } 145 146 return 0; 147 } 148 149 /* 150 * Given a bunch of rmap fragments, iterate through them, keeping 151 * a running tally of the refcount. If this ever deviates from 152 * what we expect (which is the refcountbt's refcount minus the 153 * number of extents that totally covered the refcountbt extent), 154 * we have a refcountbt error. 155 */ 156 STATIC void 157 xchk_refcountbt_process_rmap_fragments( 158 struct xchk_refcnt_check *refchk) 159 { 160 struct list_head worklist; 161 struct xchk_refcnt_frag *frag; 162 struct xchk_refcnt_frag *n; 163 xfs_agblock_t bno; 164 xfs_agblock_t rbno; 165 xfs_agblock_t next_rbno; 166 xfs_nlink_t nr; 167 xfs_nlink_t target_nr; 168 169 target_nr = refchk->refcount - refchk->seen; 170 if (target_nr == 0) 171 return; 172 173 /* 174 * There are (refchk->rc.rc_refcount - refchk->nr refcount) 175 * references we haven't found yet. Pull that many off the 176 * fragment list and figure out where the smallest rmap ends 177 * (and therefore the next rmap should start). All the rmaps 178 * we pull off should start at or before the beginning of the 179 * refcount record's range. 180 */ 181 INIT_LIST_HEAD(&worklist); 182 rbno = NULLAGBLOCK; 183 nr = 1; 184 185 /* Make sure the fragments actually /are/ in agbno order. */ 186 bno = 0; 187 list_for_each_entry(frag, &refchk->fragments, list) { 188 if (frag->rm.rm_startblock < bno) 189 goto done; 190 bno = frag->rm.rm_startblock; 191 } 192 193 /* 194 * Find all the rmaps that start at or before the refc extent, 195 * and put them on the worklist. 196 */ 197 list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 198 if (frag->rm.rm_startblock > refchk->bno) 199 goto done; 200 bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 201 if (bno < rbno) 202 rbno = bno; 203 list_move_tail(&frag->list, &worklist); 204 if (nr == target_nr) 205 break; 206 nr++; 207 } 208 209 /* 210 * We should have found exactly $target_nr rmap fragments starting 211 * at or before the refcount extent. 212 */ 213 if (nr != target_nr) 214 goto done; 215 216 while (!list_empty(&refchk->fragments)) { 217 /* Discard any fragments ending at rbno from the worklist. */ 218 nr = 0; 219 next_rbno = NULLAGBLOCK; 220 list_for_each_entry_safe(frag, n, &worklist, list) { 221 bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 222 if (bno != rbno) { 223 if (bno < next_rbno) 224 next_rbno = bno; 225 continue; 226 } 227 list_del(&frag->list); 228 kmem_free(frag); 229 nr++; 230 } 231 232 /* Try to add nr rmaps starting at rbno to the worklist. */ 233 list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 234 bno = frag->rm.rm_startblock + frag->rm.rm_blockcount; 235 if (frag->rm.rm_startblock != rbno) 236 goto done; 237 list_move_tail(&frag->list, &worklist); 238 if (next_rbno > bno) 239 next_rbno = bno; 240 nr--; 241 if (nr == 0) 242 break; 243 } 244 245 /* 246 * If we get here and nr > 0, this means that we added fewer 247 * items to the worklist than we discarded because the fragment 248 * list ran out of items. Therefore, we cannot maintain the 249 * required refcount. Something is wrong, so we're done. 250 */ 251 if (nr) 252 goto done; 253 254 rbno = next_rbno; 255 } 256 257 /* 258 * Make sure the last extent we processed ends at or beyond 259 * the end of the refcount extent. 260 */ 261 if (rbno < refchk->bno + refchk->len) 262 goto done; 263 264 /* Actually record us having seen the remaining refcount. */ 265 refchk->seen = refchk->refcount; 266 done: 267 /* Delete fragments and work list. */ 268 list_for_each_entry_safe(frag, n, &worklist, list) { 269 list_del(&frag->list); 270 kmem_free(frag); 271 } 272 list_for_each_entry_safe(frag, n, &refchk->fragments, list) { 273 list_del(&frag->list); 274 kmem_free(frag); 275 } 276 } 277 278 /* Use the rmap entries covering this extent to verify the refcount. */ 279 STATIC void 280 xchk_refcountbt_xref_rmap( 281 struct xfs_scrub *sc, 282 xfs_agblock_t bno, 283 xfs_extlen_t len, 284 xfs_nlink_t refcount) 285 { 286 struct xchk_refcnt_check refchk = { 287 .sc = sc, 288 .bno = bno, 289 .len = len, 290 .refcount = refcount, 291 .seen = 0, 292 }; 293 struct xfs_rmap_irec low; 294 struct xfs_rmap_irec high; 295 struct xchk_refcnt_frag *frag; 296 struct xchk_refcnt_frag *n; 297 int error; 298 299 if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm)) 300 return; 301 302 /* Cross-reference with the rmapbt to confirm the refcount. */ 303 memset(&low, 0, sizeof(low)); 304 low.rm_startblock = bno; 305 memset(&high, 0xFF, sizeof(high)); 306 high.rm_startblock = bno + len - 1; 307 308 INIT_LIST_HEAD(&refchk.fragments); 309 error = xfs_rmap_query_range(sc->sa.rmap_cur, &low, &high, 310 &xchk_refcountbt_rmap_check, &refchk); 311 if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 312 goto out_free; 313 314 xchk_refcountbt_process_rmap_fragments(&refchk); 315 if (refcount != refchk.seen) 316 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 317 318 out_free: 319 list_for_each_entry_safe(frag, n, &refchk.fragments, list) { 320 list_del(&frag->list); 321 kmem_free(frag); 322 } 323 } 324 325 /* Cross-reference with the other btrees. */ 326 STATIC void 327 xchk_refcountbt_xref( 328 struct xfs_scrub *sc, 329 xfs_agblock_t agbno, 330 xfs_extlen_t len, 331 xfs_nlink_t refcount) 332 { 333 if (sc->sm->sm_flags & XFS_SCRUB_OFLAG_CORRUPT) 334 return; 335 336 xchk_xref_is_used_space(sc, agbno, len); 337 xchk_xref_is_not_inode_chunk(sc, agbno, len); 338 xchk_refcountbt_xref_rmap(sc, agbno, len, refcount); 339 } 340 341 /* Scrub a refcountbt record. */ 342 STATIC int 343 xchk_refcountbt_rec( 344 struct xchk_btree *bs, 345 union xfs_btree_rec *rec) 346 { 347 struct xfs_mount *mp = bs->cur->bc_mp; 348 xfs_agblock_t *cow_blocks = bs->private; 349 xfs_agnumber_t agno = bs->cur->bc_private.a.agno; 350 xfs_agblock_t bno; 351 xfs_extlen_t len; 352 xfs_nlink_t refcount; 353 bool has_cowflag; 354 int error = 0; 355 356 bno = be32_to_cpu(rec->refc.rc_startblock); 357 len = be32_to_cpu(rec->refc.rc_blockcount); 358 refcount = be32_to_cpu(rec->refc.rc_refcount); 359 360 /* Only CoW records can have refcount == 1. */ 361 has_cowflag = (bno & XFS_REFC_COW_START); 362 if ((refcount == 1 && !has_cowflag) || (refcount != 1 && has_cowflag)) 363 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 364 if (has_cowflag) 365 (*cow_blocks) += len; 366 367 /* Check the extent. */ 368 bno &= ~XFS_REFC_COW_START; 369 if (bno + len <= bno || 370 !xfs_verify_agbno(mp, agno, bno) || 371 !xfs_verify_agbno(mp, agno, bno + len - 1)) 372 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 373 374 if (refcount == 0) 375 xchk_btree_set_corrupt(bs->sc, bs->cur, 0); 376 377 xchk_refcountbt_xref(bs->sc, bno, len, refcount); 378 379 return error; 380 } 381 382 /* Make sure we have as many refc blocks as the rmap says. */ 383 STATIC void 384 xchk_refcount_xref_rmap( 385 struct xfs_scrub *sc, 386 xfs_filblks_t cow_blocks) 387 { 388 xfs_extlen_t refcbt_blocks = 0; 389 xfs_filblks_t blocks; 390 int error; 391 392 if (!sc->sa.rmap_cur || xchk_skip_xref(sc->sm)) 393 return; 394 395 /* Check that we saw as many refcbt blocks as the rmap knows about. */ 396 error = xfs_btree_count_blocks(sc->sa.refc_cur, &refcbt_blocks); 397 if (!xchk_btree_process_error(sc, sc->sa.refc_cur, 0, &error)) 398 return; 399 error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, 400 &XFS_RMAP_OINFO_REFC, &blocks); 401 if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 402 return; 403 if (blocks != refcbt_blocks) 404 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 405 406 /* Check that we saw as many cow blocks as the rmap knows about. */ 407 error = xchk_count_rmap_ownedby_ag(sc, sc->sa.rmap_cur, 408 &XFS_RMAP_OINFO_COW, &blocks); 409 if (!xchk_should_check_xref(sc, &error, &sc->sa.rmap_cur)) 410 return; 411 if (blocks != cow_blocks) 412 xchk_btree_xref_set_corrupt(sc, sc->sa.rmap_cur, 0); 413 } 414 415 /* Scrub the refcount btree for some AG. */ 416 int 417 xchk_refcountbt( 418 struct xfs_scrub *sc) 419 { 420 xfs_agblock_t cow_blocks = 0; 421 int error; 422 423 error = xchk_btree(sc, sc->sa.refc_cur, xchk_refcountbt_rec, 424 &XFS_RMAP_OINFO_REFC, &cow_blocks); 425 if (error) 426 return error; 427 428 xchk_refcount_xref_rmap(sc, cow_blocks); 429 430 return 0; 431 } 432 433 /* xref check that a cow staging extent is marked in the refcountbt. */ 434 void 435 xchk_xref_is_cow_staging( 436 struct xfs_scrub *sc, 437 xfs_agblock_t agbno, 438 xfs_extlen_t len) 439 { 440 struct xfs_refcount_irec rc; 441 bool has_cowflag; 442 int has_refcount; 443 int error; 444 445 if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) 446 return; 447 448 /* Find the CoW staging extent. */ 449 error = xfs_refcount_lookup_le(sc->sa.refc_cur, 450 agbno + XFS_REFC_COW_START, &has_refcount); 451 if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) 452 return; 453 if (!has_refcount) { 454 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 455 return; 456 } 457 458 error = xfs_refcount_get_rec(sc->sa.refc_cur, &rc, &has_refcount); 459 if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) 460 return; 461 if (!has_refcount) { 462 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 463 return; 464 } 465 466 /* CoW flag must be set, refcount must be 1. */ 467 has_cowflag = (rc.rc_startblock & XFS_REFC_COW_START); 468 if (!has_cowflag || rc.rc_refcount != 1) 469 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 470 471 /* Must be at least as long as what was passed in */ 472 if (rc.rc_blockcount < len) 473 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 474 } 475 476 /* 477 * xref check that the extent is not shared. Only file data blocks 478 * can have multiple owners. 479 */ 480 void 481 xchk_xref_is_not_shared( 482 struct xfs_scrub *sc, 483 xfs_agblock_t agbno, 484 xfs_extlen_t len) 485 { 486 bool shared; 487 int error; 488 489 if (!sc->sa.refc_cur || xchk_skip_xref(sc->sm)) 490 return; 491 492 error = xfs_refcount_has_record(sc->sa.refc_cur, agbno, len, &shared); 493 if (!xchk_should_check_xref(sc, &error, &sc->sa.refc_cur)) 494 return; 495 if (shared) 496 xchk_btree_xref_set_corrupt(sc, sc->sa.refc_cur, 0); 497 } 498