1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2022-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_defer.h" 13 #include "xfs_btree.h" 14 #include "xfs_log_format.h" 15 #include "xfs_trans.h" 16 #include "xfs_inode.h" 17 #include "xfs_inode_fork.h" 18 #include "xfs_alloc.h" 19 #include "xfs_bmap.h" 20 #include "xfs_rmap.h" 21 #include "xfs_refcount.h" 22 #include "xfs_quota.h" 23 #include "xfs_ialloc.h" 24 #include "xfs_ag.h" 25 #include "xfs_error.h" 26 #include "xfs_errortag.h" 27 #include "xfs_icache.h" 28 #include "xfs_refcount_btree.h" 29 #include "xfs_rtalloc.h" 30 #include "xfs_rtbitmap.h" 31 #include "xfs_rtgroup.h" 32 #include "scrub/xfs_scrub.h" 33 #include "scrub/scrub.h" 34 #include "scrub/common.h" 35 #include "scrub/trace.h" 36 #include "scrub/repair.h" 37 #include "scrub/bitmap.h" 38 #include "scrub/off_bitmap.h" 39 #include "scrub/fsb_bitmap.h" 40 #include "scrub/rtb_bitmap.h" 41 #include "scrub/reap.h" 42 43 /* 44 * CoW Fork Mapping Repair 45 * ======================= 46 * 47 * Although CoW staging extents are owned by incore CoW inode forks, on disk 48 * they are owned by the refcount btree. The ondisk metadata does not record 49 * any ownership information, which limits what we can do to repair the 50 * mappings in the CoW fork. At most, we can replace ifork mappings that lack 51 * an entry in the refcount btree or are described by a reverse mapping record 52 * whose owner is not OWN_COW. 53 * 54 * Replacing extents is also tricky -- we can't touch written CoW fork extents 55 * since they are undergoing writeback, and delalloc extents do not require 56 * repair since they only exist incore. Hence the most we can do is find the 57 * bad parts of unwritten mappings, allocate a replacement set of blocks, and 58 * replace the incore mapping. We use the regular reaping process to unmap 59 * or free the discarded blocks, as appropriate. 60 */ 61 struct xrep_cow { 62 struct xfs_scrub *sc; 63 64 /* Bitmap of file offset ranges that need replacing. */ 65 struct xoff_bitmap bad_fileoffs; 66 67 /* Bitmap of fsblocks that were removed from the CoW fork. */ 68 union { 69 struct xfsb_bitmap old_cowfork_fsblocks; 70 struct xrtb_bitmap old_cowfork_rtblocks; 71 }; 72 73 /* CoW fork mappings used to scan for bad CoW staging extents. */ 74 struct xfs_bmbt_irec irec; 75 76 /* refcount btree block number of irec.br_startblock */ 77 unsigned int irec_startbno; 78 79 /* refcount btree block number of the next refcount record we expect */ 80 unsigned int next_bno; 81 }; 82 83 /* CoW staging extent. */ 84 struct xrep_cow_extent { 85 xfs_fsblock_t fsbno; 86 xfs_extlen_t len; 87 }; 88 89 /* 90 * Mark the part of the file range that corresponds to the given physical 91 * space. Caller must ensure that the physical range is within xc->irec. 92 */ 93 STATIC int 94 xrep_cow_mark_file_range( 95 struct xrep_cow *xc, 96 xfs_fsblock_t startblock, 97 xfs_filblks_t blockcount) 98 { 99 xfs_fileoff_t startoff; 100 101 startoff = xc->irec.br_startoff + 102 (startblock - xc->irec.br_startblock); 103 104 trace_xrep_cow_mark_file_range(xc->sc->ip, startblock, startoff, 105 blockcount); 106 107 return xoff_bitmap_set(&xc->bad_fileoffs, startoff, blockcount); 108 } 109 110 /* 111 * Trim @src to fit within the CoW fork mapping being examined, and put the 112 * result in @dst. 113 */ 114 static inline void 115 xrep_cow_trim_refcount( 116 struct xrep_cow *xc, 117 struct xfs_refcount_irec *dst, 118 const struct xfs_refcount_irec *src) 119 { 120 unsigned int adj; 121 122 memcpy(dst, src, sizeof(*dst)); 123 124 if (dst->rc_startblock < xc->irec_startbno) { 125 adj = xc->irec_startbno - dst->rc_startblock; 126 dst->rc_blockcount -= adj; 127 dst->rc_startblock += adj; 128 } 129 130 if (dst->rc_startblock + dst->rc_blockcount > 131 xc->irec_startbno + xc->irec.br_blockcount) { 132 adj = (dst->rc_startblock + dst->rc_blockcount) - 133 (xc->irec_startbno + xc->irec.br_blockcount); 134 dst->rc_blockcount -= adj; 135 } 136 } 137 138 /* Mark any shared CoW staging extents. */ 139 STATIC int 140 xrep_cow_mark_shared_staging( 141 struct xfs_btree_cur *cur, 142 const struct xfs_refcount_irec *rec, 143 void *priv) 144 { 145 struct xrep_cow *xc = priv; 146 struct xfs_refcount_irec rrec; 147 148 if (!xfs_refcount_check_domain(rec) || 149 rec->rc_domain != XFS_REFC_DOMAIN_SHARED) 150 return -EFSCORRUPTED; 151 152 xrep_cow_trim_refcount(xc, &rrec, rec); 153 154 return xrep_cow_mark_file_range(xc, 155 xfs_gbno_to_fsb(cur->bc_group, rrec.rc_startblock), 156 rrec.rc_blockcount); 157 } 158 159 /* 160 * Mark any portion of the CoW fork file offset range where there is not a CoW 161 * staging extent record in the refcountbt, and keep a record of where we did 162 * find correct refcountbt records. Staging records are always cleaned out at 163 * mount time, so any two inodes trying to map the same staging area would have 164 * already taken the fs down due to refcount btree verifier errors. Hence this 165 * inode should be the sole creator of the staging extent records ondisk. 166 */ 167 STATIC int 168 xrep_cow_mark_missing_staging( 169 struct xfs_btree_cur *cur, 170 const struct xfs_refcount_irec *rec, 171 void *priv) 172 { 173 struct xrep_cow *xc = priv; 174 struct xfs_refcount_irec rrec; 175 int error; 176 177 if (!xfs_refcount_check_domain(rec) || 178 rec->rc_domain != XFS_REFC_DOMAIN_COW) 179 return -EFSCORRUPTED; 180 181 xrep_cow_trim_refcount(xc, &rrec, rec); 182 183 if (xc->next_bno >= rrec.rc_startblock) 184 goto next; 185 186 error = xrep_cow_mark_file_range(xc, 187 xfs_gbno_to_fsb(cur->bc_group, xc->next_bno), 188 rrec.rc_startblock - xc->next_bno); 189 if (error) 190 return error; 191 192 next: 193 xc->next_bno = rrec.rc_startblock + rrec.rc_blockcount; 194 return 0; 195 } 196 197 /* 198 * Mark any area that does not correspond to a CoW staging rmap. These are 199 * cross-linked areas that must be avoided. 200 */ 201 STATIC int 202 xrep_cow_mark_missing_staging_rmap( 203 struct xfs_btree_cur *cur, 204 const struct xfs_rmap_irec *rec, 205 void *priv) 206 { 207 struct xrep_cow *xc = priv; 208 xfs_agblock_t rec_bno; 209 xfs_extlen_t rec_len; 210 unsigned int adj; 211 212 if (rec->rm_owner == XFS_RMAP_OWN_COW) 213 return 0; 214 215 rec_bno = rec->rm_startblock; 216 rec_len = rec->rm_blockcount; 217 if (rec_bno < xc->irec_startbno) { 218 adj = xc->irec_startbno - rec_bno; 219 rec_len -= adj; 220 rec_bno += adj; 221 } 222 223 if (rec_bno + rec_len > xc->irec_startbno + xc->irec.br_blockcount) { 224 adj = (rec_bno + rec_len) - 225 (xc->irec_startbno + xc->irec.br_blockcount); 226 rec_len -= adj; 227 } 228 229 return xrep_cow_mark_file_range(xc, 230 xfs_gbno_to_fsb(cur->bc_group, rec_bno), rec_len); 231 } 232 233 /* 234 * Find any part of the CoW fork mapping that isn't a single-owner CoW staging 235 * extent and mark the corresponding part of the file range in the bitmap. 236 */ 237 STATIC int 238 xrep_cow_find_bad( 239 struct xrep_cow *xc) 240 { 241 struct xfs_refcount_irec rc_low = { 0 }; 242 struct xfs_refcount_irec rc_high = { 0 }; 243 struct xfs_rmap_irec rm_low = { 0 }; 244 struct xfs_rmap_irec rm_high = { 0 }; 245 struct xfs_perag *pag; 246 struct xfs_scrub *sc = xc->sc; 247 xfs_agnumber_t agno; 248 int error; 249 250 agno = XFS_FSB_TO_AGNO(sc->mp, xc->irec.br_startblock); 251 xc->irec_startbno = XFS_FSB_TO_AGBNO(sc->mp, xc->irec.br_startblock); 252 253 pag = xfs_perag_get(sc->mp, agno); 254 if (!pag) 255 return -EFSCORRUPTED; 256 257 error = xrep_ag_init(sc, pag, &sc->sa); 258 if (error) 259 goto out_pag; 260 261 /* Mark any CoW fork extents that are shared. */ 262 rc_low.rc_startblock = xc->irec_startbno; 263 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1; 264 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED; 265 error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high, 266 xrep_cow_mark_shared_staging, xc); 267 if (error) 268 goto out_sa; 269 270 /* Make sure there are CoW staging extents for the whole mapping. */ 271 rc_low.rc_startblock = xc->irec_startbno; 272 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1; 273 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW; 274 xc->next_bno = xc->irec_startbno; 275 error = xfs_refcount_query_range(sc->sa.refc_cur, &rc_low, &rc_high, 276 xrep_cow_mark_missing_staging, xc); 277 if (error) 278 goto out_sa; 279 280 if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) { 281 error = xrep_cow_mark_file_range(xc, 282 xfs_agbno_to_fsb(pag, xc->next_bno), 283 xc->irec_startbno + xc->irec.br_blockcount - 284 xc->next_bno); 285 if (error) 286 goto out_sa; 287 } 288 289 /* Mark any area has an rmap that isn't a COW staging extent. */ 290 rm_low.rm_startblock = xc->irec_startbno; 291 memset(&rm_high, 0xFF, sizeof(rm_high)); 292 rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1; 293 error = xfs_rmap_query_range(sc->sa.rmap_cur, &rm_low, &rm_high, 294 xrep_cow_mark_missing_staging_rmap, xc); 295 if (error) 296 goto out_sa; 297 298 /* 299 * If userspace is forcing us to rebuild the CoW fork or someone turned 300 * on the debugging knob, replace everything in the CoW fork. 301 */ 302 if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) || 303 XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) { 304 error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock, 305 xc->irec.br_blockcount); 306 if (error) 307 return error; 308 } 309 310 out_sa: 311 xchk_ag_free(sc, &sc->sa); 312 out_pag: 313 xfs_perag_put(pag); 314 return 0; 315 } 316 317 /* 318 * Find any part of the CoW fork mapping that isn't a single-owner CoW staging 319 * extent and mark the corresponding part of the file range in the bitmap. 320 */ 321 STATIC int 322 xrep_cow_find_bad_rt( 323 struct xrep_cow *xc) 324 { 325 struct xfs_refcount_irec rc_low = { 0 }; 326 struct xfs_refcount_irec rc_high = { 0 }; 327 struct xfs_rmap_irec rm_low = { 0 }; 328 struct xfs_rmap_irec rm_high = { 0 }; 329 struct xfs_scrub *sc = xc->sc; 330 struct xfs_rtgroup *rtg; 331 int error = 0; 332 333 xc->irec_startbno = xfs_rtb_to_rgbno(sc->mp, xc->irec.br_startblock); 334 335 rtg = xfs_rtgroup_get(sc->mp, 336 xfs_rtb_to_rgno(sc->mp, xc->irec.br_startblock)); 337 if (!rtg) 338 return -EFSCORRUPTED; 339 340 error = xrep_rtgroup_init(sc, rtg, &sc->sr, 341 XFS_RTGLOCK_RMAP | XFS_RTGLOCK_REFCOUNT); 342 if (error) 343 goto out_rtg; 344 345 /* Mark any CoW fork extents that are shared. */ 346 rc_low.rc_startblock = xc->irec_startbno; 347 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1; 348 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_SHARED; 349 error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high, 350 xrep_cow_mark_shared_staging, xc); 351 if (error) 352 goto out_sr; 353 354 /* Make sure there are CoW staging extents for the whole mapping. */ 355 rc_low.rc_startblock = xc->irec_startbno; 356 rc_high.rc_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1; 357 rc_low.rc_domain = rc_high.rc_domain = XFS_REFC_DOMAIN_COW; 358 xc->next_bno = xc->irec_startbno; 359 error = xfs_refcount_query_range(sc->sr.refc_cur, &rc_low, &rc_high, 360 xrep_cow_mark_missing_staging, xc); 361 if (error) 362 goto out_sr; 363 364 if (xc->next_bno < xc->irec_startbno + xc->irec.br_blockcount) { 365 error = xrep_cow_mark_file_range(xc, 366 xfs_rgbno_to_rtb(rtg, xc->next_bno), 367 xc->irec_startbno + xc->irec.br_blockcount - 368 xc->next_bno); 369 if (error) 370 goto out_sr; 371 } 372 373 /* Mark any area has an rmap that isn't a COW staging extent. */ 374 rm_low.rm_startblock = xc->irec_startbno; 375 memset(&rm_high, 0xFF, sizeof(rm_high)); 376 rm_high.rm_startblock = xc->irec_startbno + xc->irec.br_blockcount - 1; 377 error = xfs_rmap_query_range(sc->sr.rmap_cur, &rm_low, &rm_high, 378 xrep_cow_mark_missing_staging_rmap, xc); 379 if (error) 380 goto out_sr; 381 382 /* 383 * If userspace is forcing us to rebuild the CoW fork or someone 384 * turned on the debugging knob, replace everything in the 385 * CoW fork and then scan for staging extents in the refcountbt. 386 */ 387 if ((sc->sm->sm_flags & XFS_SCRUB_IFLAG_FORCE_REBUILD) || 388 XFS_TEST_ERROR(false, sc->mp, XFS_ERRTAG_FORCE_SCRUB_REPAIR)) { 389 error = xrep_cow_mark_file_range(xc, xc->irec.br_startblock, 390 xc->irec.br_blockcount); 391 if (error) 392 goto out_rtg; 393 } 394 395 out_sr: 396 xchk_rtgroup_btcur_free(&sc->sr); 397 xchk_rtgroup_free(sc, &sc->sr); 398 out_rtg: 399 xfs_rtgroup_put(rtg); 400 return error; 401 } 402 403 /* 404 * Allocate a replacement CoW staging extent of up to the given number of 405 * blocks, and fill out the mapping. 406 */ 407 STATIC int 408 xrep_cow_alloc( 409 struct xfs_scrub *sc, 410 xfs_extlen_t maxlen, 411 struct xrep_cow_extent *repl) 412 { 413 struct xfs_alloc_arg args = { 414 .tp = sc->tp, 415 .mp = sc->mp, 416 .oinfo = XFS_RMAP_OINFO_SKIP_UPDATE, 417 .minlen = 1, 418 .maxlen = maxlen, 419 .prod = 1, 420 .resv = XFS_AG_RESV_NONE, 421 .datatype = XFS_ALLOC_USERDATA, 422 }; 423 int error; 424 425 error = xfs_trans_reserve_more(sc->tp, maxlen, 0); 426 if (error) 427 return error; 428 429 error = xfs_alloc_vextent_start_ag(&args, 430 XFS_INO_TO_FSB(sc->mp, sc->ip->i_ino)); 431 if (error) 432 return error; 433 if (args.fsbno == NULLFSBLOCK) 434 return -ENOSPC; 435 436 xfs_refcount_alloc_cow_extent(sc->tp, false, args.fsbno, args.len); 437 438 repl->fsbno = args.fsbno; 439 repl->len = args.len; 440 return 0; 441 } 442 443 /* 444 * Allocate a replacement rt CoW staging extent of up to the given number of 445 * blocks, and fill out the mapping. 446 */ 447 STATIC int 448 xrep_cow_alloc_rt( 449 struct xfs_scrub *sc, 450 xfs_extlen_t maxlen, 451 struct xrep_cow_extent *repl) 452 { 453 xfs_rtxlen_t maxrtx = xfs_rtb_to_rtx(sc->mp, maxlen); 454 int error; 455 456 error = xfs_trans_reserve_more(sc->tp, 0, maxrtx); 457 if (error) 458 return error; 459 460 error = xfs_rtallocate_rtgs(sc->tp, NULLRTBLOCK, 1, maxrtx, 1, false, 461 false, &repl->fsbno, &repl->len); 462 if (error) 463 return error; 464 465 xfs_refcount_alloc_cow_extent(sc->tp, true, repl->fsbno, repl->len); 466 return 0; 467 } 468 469 /* 470 * Look up the current CoW fork mapping so that we only allocate enough to 471 * replace a single mapping. If we don't find a mapping that covers the start 472 * of the file range, or we find a delalloc or written extent, something is 473 * seriously wrong, since we didn't drop the ILOCK. 474 */ 475 static inline int 476 xrep_cow_find_mapping( 477 struct xrep_cow *xc, 478 struct xfs_iext_cursor *icur, 479 xfs_fileoff_t startoff, 480 struct xfs_bmbt_irec *got) 481 { 482 struct xfs_inode *ip = xc->sc->ip; 483 struct xfs_ifork *ifp = xfs_ifork_ptr(ip, XFS_COW_FORK); 484 485 if (!xfs_iext_lookup_extent(ip, ifp, startoff, icur, got)) 486 goto bad; 487 488 if (got->br_startoff > startoff) 489 goto bad; 490 491 if (got->br_blockcount == 0) 492 goto bad; 493 494 if (isnullstartblock(got->br_startblock)) 495 goto bad; 496 497 if (xfs_bmap_is_written_extent(got)) 498 goto bad; 499 500 return 0; 501 bad: 502 ASSERT(0); 503 return -EFSCORRUPTED; 504 } 505 506 #define REPLACE_LEFT_SIDE (1U << 0) 507 #define REPLACE_RIGHT_SIDE (1U << 1) 508 509 /* 510 * Given a CoW fork mapping @got and a replacement mapping @repl, remap the 511 * beginning of @got with the space described by @rep. 512 */ 513 static inline void 514 xrep_cow_replace_mapping( 515 struct xfs_inode *ip, 516 struct xfs_iext_cursor *icur, 517 const struct xfs_bmbt_irec *got, 518 const struct xrep_cow_extent *repl) 519 { 520 struct xfs_bmbt_irec new = *got; /* struct copy */ 521 522 ASSERT(repl->len > 0); 523 ASSERT(!isnullstartblock(got->br_startblock)); 524 525 trace_xrep_cow_replace_mapping(ip, got, repl->fsbno, repl->len); 526 527 if (got->br_blockcount == repl->len) { 528 /* 529 * The new extent is a complete replacement for the existing 530 * extent. Update the COW fork record. 531 */ 532 new.br_startblock = repl->fsbno; 533 xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new); 534 return; 535 } 536 537 /* 538 * The new extent can replace the beginning of the COW fork record. 539 * Move the left side of @got upwards, then insert the new record. 540 */ 541 new.br_startoff += repl->len; 542 new.br_startblock += repl->len; 543 new.br_blockcount -= repl->len; 544 xfs_iext_update_extent(ip, BMAP_COWFORK, icur, &new); 545 546 new.br_startoff = got->br_startoff; 547 new.br_startblock = repl->fsbno; 548 new.br_blockcount = repl->len; 549 xfs_iext_insert(ip, icur, &new, BMAP_COWFORK); 550 } 551 552 /* 553 * Replace the unwritten CoW staging extent backing the given file range with a 554 * new space extent that isn't as problematic. 555 */ 556 STATIC int 557 xrep_cow_replace_range( 558 struct xrep_cow *xc, 559 xfs_fileoff_t startoff, 560 xfs_extlen_t *blockcount) 561 { 562 struct xfs_iext_cursor icur; 563 struct xrep_cow_extent repl; 564 struct xfs_bmbt_irec got; 565 struct xfs_scrub *sc = xc->sc; 566 xfs_fileoff_t nextoff; 567 xfs_extlen_t alloc_len; 568 int error; 569 570 /* 571 * Put the existing CoW fork mapping in @got. If @got ends before 572 * @rep, truncate @rep so we only replace one extent mapping at a time. 573 */ 574 error = xrep_cow_find_mapping(xc, &icur, startoff, &got); 575 if (error) 576 return error; 577 nextoff = min(startoff + *blockcount, 578 got.br_startoff + got.br_blockcount); 579 580 /* 581 * Allocate a replacement extent. If we don't fill all the blocks, 582 * shorten the quantity that will be deleted in this step. 583 */ 584 alloc_len = min_t(xfs_fileoff_t, XFS_MAX_BMBT_EXTLEN, 585 nextoff - startoff); 586 if (XFS_IS_REALTIME_INODE(sc->ip)) 587 error = xrep_cow_alloc_rt(sc, alloc_len, &repl); 588 else 589 error = xrep_cow_alloc(sc, alloc_len, &repl); 590 if (error) 591 return error; 592 593 /* 594 * Replace the old mapping with the new one, and commit the metadata 595 * changes made so far. 596 */ 597 xrep_cow_replace_mapping(sc->ip, &icur, &got, &repl); 598 599 xfs_inode_set_cowblocks_tag(sc->ip); 600 error = xfs_defer_finish(&sc->tp); 601 if (error) 602 return error; 603 604 /* Note the old CoW staging extents; we'll reap them all later. */ 605 if (XFS_IS_REALTIME_INODE(sc->ip)) 606 error = xrtb_bitmap_set(&xc->old_cowfork_rtblocks, 607 got.br_startblock, repl.len); 608 else 609 error = xfsb_bitmap_set(&xc->old_cowfork_fsblocks, 610 got.br_startblock, repl.len); 611 if (error) 612 return error; 613 614 *blockcount = repl.len; 615 return 0; 616 } 617 618 /* 619 * Replace a bad part of an unwritten CoW staging extent with a fresh delalloc 620 * reservation. 621 */ 622 STATIC int 623 xrep_cow_replace( 624 uint64_t startoff, 625 uint64_t blockcount, 626 void *priv) 627 { 628 struct xrep_cow *xc = priv; 629 int error = 0; 630 631 while (blockcount > 0) { 632 xfs_extlen_t len = min_t(xfs_filblks_t, blockcount, 633 XFS_MAX_BMBT_EXTLEN); 634 635 error = xrep_cow_replace_range(xc, startoff, &len); 636 if (error) 637 break; 638 639 blockcount -= len; 640 startoff += len; 641 } 642 643 return error; 644 } 645 646 /* 647 * Repair an inode's CoW fork. The CoW fork is an in-core structure, so 648 * there's no btree to rebuid. Instead, we replace any mappings that are 649 * cross-linked or lack ondisk CoW fork records in the refcount btree. 650 */ 651 int 652 xrep_bmap_cow( 653 struct xfs_scrub *sc) 654 { 655 struct xrep_cow *xc; 656 struct xfs_iext_cursor icur; 657 struct xfs_ifork *ifp = xfs_ifork_ptr(sc->ip, XFS_COW_FORK); 658 int error; 659 660 if (!xfs_has_rmapbt(sc->mp) || !xfs_has_reflink(sc->mp)) 661 return -EOPNOTSUPP; 662 663 if (!ifp) 664 return 0; 665 666 /* 667 * Realtime files with large extent sizes are not supported because 668 * we could encounter an CoW mapping that has been partially written 669 * out *and* requires replacement, and there's no solution to that. 670 */ 671 if (xfs_inode_has_bigrtalloc(sc->ip)) 672 return -EOPNOTSUPP; 673 674 /* Metadata inodes aren't supposed to have data on the rt volume. */ 675 if (xfs_is_metadir_inode(sc->ip) && XFS_IS_REALTIME_INODE(sc->ip)) 676 return -EOPNOTSUPP; 677 678 /* 679 * If we're somehow not in extents format, then reinitialize it to 680 * an empty extent mapping fork and exit. 681 */ 682 if (ifp->if_format != XFS_DINODE_FMT_EXTENTS) { 683 ifp->if_format = XFS_DINODE_FMT_EXTENTS; 684 ifp->if_nextents = 0; 685 return 0; 686 } 687 688 xc = kzalloc(sizeof(struct xrep_cow), XCHK_GFP_FLAGS); 689 if (!xc) 690 return -ENOMEM; 691 692 xfs_trans_ijoin(sc->tp, sc->ip, 0); 693 694 xc->sc = sc; 695 xoff_bitmap_init(&xc->bad_fileoffs); 696 if (XFS_IS_REALTIME_INODE(sc->ip)) 697 xrtb_bitmap_init(&xc->old_cowfork_rtblocks); 698 else 699 xfsb_bitmap_init(&xc->old_cowfork_fsblocks); 700 701 for_each_xfs_iext(ifp, &icur, &xc->irec) { 702 if (xchk_should_terminate(sc, &error)) 703 goto out_bitmap; 704 705 /* 706 * delalloc reservations only exist incore, so there is no 707 * ondisk metadata that we can examine. Hence we leave them 708 * alone. 709 */ 710 if (isnullstartblock(xc->irec.br_startblock)) 711 continue; 712 713 /* 714 * COW fork extents are only in the written state if writeback 715 * is actively writing to disk. We cannot restart the write 716 * at a different disk address since we've already issued the 717 * IO, so we leave these alone and hope for the best. 718 */ 719 if (xfs_bmap_is_written_extent(&xc->irec)) 720 continue; 721 722 if (XFS_IS_REALTIME_INODE(sc->ip)) 723 error = xrep_cow_find_bad_rt(xc); 724 else 725 error = xrep_cow_find_bad(xc); 726 if (error) 727 goto out_bitmap; 728 } 729 730 /* Replace any bad unwritten mappings with fresh reservations. */ 731 error = xoff_bitmap_walk(&xc->bad_fileoffs, xrep_cow_replace, xc); 732 if (error) 733 goto out_bitmap; 734 735 /* 736 * Reap as many of the old CoW blocks as we can. They are owned ondisk 737 * by the refcount btree, not the inode, so it is correct to treat them 738 * like inode metadata. 739 */ 740 if (XFS_IS_REALTIME_INODE(sc->ip)) 741 error = xrep_reap_rtblocks(sc, &xc->old_cowfork_rtblocks, 742 &XFS_RMAP_OINFO_COW); 743 else 744 error = xrep_reap_fsblocks(sc, &xc->old_cowfork_fsblocks, 745 &XFS_RMAP_OINFO_COW); 746 if (error) 747 goto out_bitmap; 748 749 out_bitmap: 750 if (XFS_IS_REALTIME_INODE(sc->ip)) 751 xrtb_bitmap_destroy(&xc->old_cowfork_rtblocks); 752 else 753 xfsb_bitmap_destroy(&xc->old_cowfork_fsblocks); 754 xoff_bitmap_destroy(&xc->bad_fileoffs); 755 kfree(xc); 756 return error; 757 } 758