1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2020-2023 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs_platform.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_btree.h" 13 #include "xfs_log_format.h" 14 #include "xfs_trans.h" 15 #include "xfs_rtalloc.h" 16 #include "xfs_inode.h" 17 #include "xfs_bit.h" 18 #include "xfs_bmap.h" 19 #include "xfs_bmap_btree.h" 20 #include "xfs_rmap.h" 21 #include "xfs_rtrmap_btree.h" 22 #include "xfs_exchmaps.h" 23 #include "xfs_rtbitmap.h" 24 #include "xfs_rtgroup.h" 25 #include "xfs_extent_busy.h" 26 #include "xfs_refcount.h" 27 #include "scrub/scrub.h" 28 #include "scrub/common.h" 29 #include "scrub/trace.h" 30 #include "scrub/repair.h" 31 #include "scrub/xfile.h" 32 #include "scrub/tempfile.h" 33 #include "scrub/tempexch.h" 34 #include "scrub/reap.h" 35 #include "scrub/rtbitmap.h" 36 37 /* rt bitmap content repairs */ 38 39 /* Set up to repair the realtime bitmap for this group. */ 40 int 41 xrep_setup_rtbitmap( 42 struct xfs_scrub *sc, 43 struct xchk_rtbitmap *rtb) 44 { 45 struct xfs_mount *mp = sc->mp; 46 unsigned long long blocks = mp->m_sb.sb_rbmblocks; 47 int error; 48 49 error = xrep_tempfile_create(sc, S_IFREG); 50 if (error) 51 return error; 52 53 /* Create an xfile to hold our reconstructed bitmap. */ 54 error = xfile_create("realtime bitmap file", 55 blocks * mp->m_sb.sb_blocksize, &sc->xfile); 56 if (error) 57 return error; 58 59 /* 60 * Reserve enough blocks to write out a completely new bitmap file, 61 * plus twice as many blocks as we would need if we can only allocate 62 * one block per data fork mapping. This should cover the 63 * preallocation of the temporary file and exchanging the extent 64 * mappings. 65 * 66 * We cannot use xfs_exchmaps_estimate because we have not yet 67 * constructed the replacement bitmap and therefore do not know how 68 * many extents it will use. By the time we do, we will have a dirty 69 * transaction (which we cannot drop because we cannot drop the 70 * rtbitmap ILOCK) and cannot ask for more reservation. 71 */ 72 blocks += xfs_bmbt_calc_size(mp, blocks) * 2; 73 if (blocks > UINT_MAX) 74 return -EOPNOTSUPP; 75 76 rtb->resblks += blocks; 77 return 0; 78 } 79 80 static inline xrep_wordoff_t 81 rtx_to_wordoff( 82 struct xfs_mount *mp, 83 xfs_rtxnum_t rtx) 84 { 85 return rtx >> XFS_NBWORDLOG; 86 } 87 88 static inline xrep_wordcnt_t 89 rtxlen_to_wordcnt( 90 xfs_rtxlen_t rtxlen) 91 { 92 return rtxlen >> XFS_NBWORDLOG; 93 } 94 95 /* Helper functions to record rtwords in an xfile. */ 96 97 static inline int 98 xfbmp_load( 99 struct xchk_rtbitmap *rtb, 100 xrep_wordoff_t wordoff, 101 xfs_rtword_t *word) 102 { 103 union xfs_rtword_raw urk; 104 int error; 105 106 ASSERT(xfs_has_rtgroups(rtb->sc->mp)); 107 108 error = xfile_load(rtb->sc->xfile, &urk, 109 sizeof(union xfs_rtword_raw), 110 wordoff << XFS_WORDLOG); 111 if (error) 112 return error; 113 114 *word = be32_to_cpu(urk.rtg); 115 return 0; 116 } 117 118 static inline int 119 xfbmp_store( 120 struct xchk_rtbitmap *rtb, 121 xrep_wordoff_t wordoff, 122 const xfs_rtword_t word) 123 { 124 union xfs_rtword_raw urk; 125 126 ASSERT(xfs_has_rtgroups(rtb->sc->mp)); 127 128 urk.rtg = cpu_to_be32(word); 129 return xfile_store(rtb->sc->xfile, &urk, 130 sizeof(union xfs_rtword_raw), 131 wordoff << XFS_WORDLOG); 132 } 133 134 static inline int 135 xfbmp_copyin( 136 struct xchk_rtbitmap *rtb, 137 xrep_wordoff_t wordoff, 138 const union xfs_rtword_raw *word, 139 xrep_wordcnt_t nr_words) 140 { 141 return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG, 142 wordoff << XFS_WORDLOG); 143 } 144 145 static inline int 146 xfbmp_copyout( 147 struct xchk_rtbitmap *rtb, 148 xrep_wordoff_t wordoff, 149 union xfs_rtword_raw *word, 150 xrep_wordcnt_t nr_words) 151 { 152 return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG, 153 wordoff << XFS_WORDLOG); 154 } 155 156 /* Perform a logical OR operation on an rtword in the incore bitmap. */ 157 static int 158 xrep_rtbitmap_or( 159 struct xchk_rtbitmap *rtb, 160 xrep_wordoff_t wordoff, 161 xfs_rtword_t mask) 162 { 163 xfs_rtword_t word; 164 int error; 165 166 error = xfbmp_load(rtb, wordoff, &word); 167 if (error) 168 return error; 169 170 trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word); 171 172 return xfbmp_store(rtb, wordoff, word | mask); 173 } 174 175 /* 176 * Mark as free every rt extent between the next rt block we expected to see 177 * in the rtrmap records and the given rt block. 178 */ 179 STATIC int 180 xrep_rtbitmap_mark_free( 181 struct xchk_rtbitmap *rtb, 182 xfs_rgblock_t rgbno) 183 { 184 struct xfs_mount *mp = rtb->sc->mp; 185 struct xchk_rt *sr = &rtb->sc->sr; 186 struct xfs_rtgroup *rtg = sr->rtg; 187 xfs_rtxnum_t startrtx; 188 xfs_rtxnum_t nextrtx; 189 xrep_wordoff_t wordoff, nextwordoff; 190 unsigned int bit; 191 unsigned int bufwsize; 192 xfs_extlen_t mod; 193 xfs_rtword_t mask; 194 enum xbtree_recpacking outcome; 195 int error; 196 197 if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno)) 198 return -EFSCORRUPTED; 199 200 /* 201 * Convert rt blocks to rt extents The block range we find must be 202 * aligned to an rtextent boundary on both ends. 203 */ 204 startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno); 205 mod = xfs_rgbno_to_rtxoff(mp, rtb->next_rgbno); 206 if (mod) 207 return -EFSCORRUPTED; 208 209 nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1; 210 mod = xfs_rgbno_to_rtxoff(mp, rgbno - 1); 211 if (mod != mp->m_sb.sb_rextsize - 1) 212 return -EFSCORRUPTED; 213 214 /* Must not be shared or CoW staging. */ 215 if (sr->refc_cur) { 216 error = xfs_refcount_has_records(sr->refc_cur, 217 XFS_REFC_DOMAIN_SHARED, rtb->next_rgbno, 218 rgbno - rtb->next_rgbno, &outcome); 219 if (error) 220 return error; 221 if (outcome != XBTREE_RECPACKING_EMPTY) 222 return -EFSCORRUPTED; 223 224 error = xfs_refcount_has_records(sr->refc_cur, 225 XFS_REFC_DOMAIN_COW, rtb->next_rgbno, 226 rgbno - rtb->next_rgbno, &outcome); 227 if (error) 228 return error; 229 if (outcome != XBTREE_RECPACKING_EMPTY) 230 return -EFSCORRUPTED; 231 } 232 233 trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1); 234 235 /* Set bits as needed to round startrtx up to the nearest word. */ 236 bit = startrtx & XREP_RTBMP_WORDMASK; 237 if (bit) { 238 xfs_rtblock_t len = nextrtx - startrtx; 239 unsigned int lastbit; 240 241 lastbit = min(bit + len, XFS_NBWORD); 242 mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit; 243 244 error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx), 245 mask); 246 if (error || lastbit - bit == len) 247 return error; 248 startrtx += XFS_NBWORD - bit; 249 } 250 251 /* Set bits as needed to round nextrtx down to the nearest word. */ 252 bit = nextrtx & XREP_RTBMP_WORDMASK; 253 if (bit) { 254 mask = ((xfs_rtword_t)1 << bit) - 1; 255 256 error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx), 257 mask); 258 if (error || startrtx + bit == nextrtx) 259 return error; 260 nextrtx -= bit; 261 } 262 263 trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1); 264 265 /* Set all the words in between, up to a whole fs block at once. */ 266 wordoff = rtx_to_wordoff(mp, startrtx); 267 nextwordoff = rtx_to_wordoff(mp, nextrtx); 268 bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG; 269 270 while (wordoff < nextwordoff) { 271 xrep_wordoff_t rem; 272 xrep_wordcnt_t wordcnt; 273 274 wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff, 275 bufwsize); 276 277 /* 278 * Try to keep us aligned to the rtwords buffer to reduce the 279 * number of xfile writes. 280 */ 281 rem = wordoff & (bufwsize - 1); 282 if (rem) 283 wordcnt = min_t(xrep_wordcnt_t, wordcnt, 284 bufwsize - rem); 285 286 error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt); 287 if (error) 288 return error; 289 290 wordoff += wordcnt; 291 } 292 293 return 0; 294 } 295 296 /* Set free space in the rtbitmap based on rtrmapbt records. */ 297 STATIC int 298 xrep_rtbitmap_walk_rtrmap( 299 struct xfs_btree_cur *cur, 300 const struct xfs_rmap_irec *rec, 301 void *priv) 302 { 303 struct xchk_rtbitmap *rtb = priv; 304 int error = 0; 305 306 if (xchk_should_terminate(rtb->sc, &error)) 307 return error; 308 309 if (rtb->next_rgbno < rec->rm_startblock) { 310 error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock); 311 if (error) 312 return error; 313 } 314 315 rtb->next_rgbno = max(rtb->next_rgbno, 316 rec->rm_startblock + rec->rm_blockcount); 317 return 0; 318 } 319 320 /* 321 * Walk the rtrmapbt to find all the gaps between records, and mark the gaps 322 * in the realtime bitmap that we're computing. 323 */ 324 STATIC int 325 xrep_rtbitmap_find_freespace( 326 struct xchk_rtbitmap *rtb) 327 { 328 struct xfs_scrub *sc = rtb->sc; 329 struct xfs_mount *mp = sc->mp; 330 struct xfs_rtgroup *rtg = sc->sr.rtg; 331 uint64_t blockcount; 332 int error; 333 334 /* Prepare a buffer of ones so that we can accelerate bulk setting. */ 335 memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize); 336 337 xrep_rtgroup_btcur_init(sc, &sc->sr); 338 error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap, 339 rtb); 340 if (error) 341 goto out; 342 343 /* 344 * Mark as free every possible rt extent from the last one we saw to 345 * the end of the rt group. 346 */ 347 blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize; 348 if (rtb->next_rgbno < blockcount) { 349 error = xrep_rtbitmap_mark_free(rtb, blockcount); 350 if (error) 351 goto out; 352 } 353 354 out: 355 xchk_rtgroup_btcur_free(&sc->sr); 356 return error; 357 } 358 359 static int 360 xrep_rtbitmap_prep_buf( 361 struct xfs_scrub *sc, 362 struct xfs_buf *bp, 363 void *data) 364 { 365 struct xchk_rtbitmap *rtb = data; 366 struct xfs_mount *mp = sc->mp; 367 union xfs_rtword_raw *ondisk; 368 int error; 369 370 rtb->args.mp = sc->mp; 371 rtb->args.tp = sc->tp; 372 rtb->args.rbmbp = bp; 373 ondisk = xfs_rbmblock_wordptr(&rtb->args, 0); 374 rtb->args.rbmbp = NULL; 375 376 error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk, 377 mp->m_blockwsize); 378 if (error) 379 return error; 380 381 if (xfs_has_rtgroups(sc->mp)) { 382 struct xfs_rtbuf_blkinfo *hdr = bp->b_addr; 383 384 hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC); 385 hdr->rt_owner = cpu_to_be64(sc->ip->i_ino); 386 hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp)); 387 hdr->rt_lsn = 0; 388 uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid); 389 bp->b_ops = &xfs_rtbitmap_buf_ops; 390 } else { 391 bp->b_ops = &xfs_rtbuf_ops; 392 } 393 394 rtb->prep_wordoff += mp->m_blockwsize; 395 xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF); 396 return 0; 397 } 398 399 /* 400 * Make sure that the given range of the data fork of the realtime file is 401 * mapped to written blocks. The caller must ensure that the inode is joined 402 * to the transaction. 403 */ 404 STATIC int 405 xrep_rtbitmap_data_mappings( 406 struct xfs_scrub *sc, 407 xfs_filblks_t len) 408 { 409 struct xfs_bmbt_irec map; 410 xfs_fileoff_t off = 0; 411 int error; 412 413 ASSERT(sc->ip != NULL); 414 415 while (off < len) { 416 int nmaps = 1; 417 418 /* 419 * If we have a real extent mapping this block then we're 420 * in ok shape. 421 */ 422 error = xfs_bmapi_read(sc->ip, off, len - off, &map, &nmaps, 423 XFS_DATA_FORK); 424 if (error) 425 return error; 426 if (nmaps == 0) { 427 ASSERT(nmaps != 0); 428 return -EFSCORRUPTED; 429 } 430 431 /* 432 * Written extents are ok. Holes are not filled because we 433 * do not know the freespace information. 434 */ 435 if (xfs_bmap_is_written_extent(&map) || 436 map.br_startblock == HOLESTARTBLOCK) { 437 off = map.br_startoff + map.br_blockcount; 438 continue; 439 } 440 441 /* 442 * If we find a delalloc reservation then something is very 443 * very wrong. Bail out. 444 */ 445 if (map.br_startblock == DELAYSTARTBLOCK) 446 return -EFSCORRUPTED; 447 448 /* Make sure we're really converting an unwritten extent. */ 449 if (map.br_state != XFS_EXT_UNWRITTEN) { 450 ASSERT(map.br_state == XFS_EXT_UNWRITTEN); 451 return -EFSCORRUPTED; 452 } 453 454 /* Make sure this block has a real zeroed extent mapped. */ 455 nmaps = 1; 456 error = xfs_bmapi_write(sc->tp, sc->ip, map.br_startoff, 457 map.br_blockcount, 458 XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 459 0, &map, &nmaps); 460 if (error) 461 return error; 462 463 /* Commit new extent and all deferred work. */ 464 error = xrep_defer_finish(sc); 465 if (error) 466 return error; 467 468 off = map.br_startoff + map.br_blockcount; 469 } 470 471 return 0; 472 } 473 474 /* Fix broken rt volume geometry. */ 475 STATIC int 476 xrep_rtbitmap_geometry( 477 struct xfs_scrub *sc, 478 struct xchk_rtbitmap *rtb) 479 { 480 struct xfs_mount *mp = sc->mp; 481 struct xfs_trans *tp = sc->tp; 482 483 /* Superblock fields */ 484 if (mp->m_sb.sb_rextents != rtb->rextents) 485 xfs_trans_mod_sb(sc->tp, XFS_TRANS_SB_REXTENTS, 486 rtb->rextents - mp->m_sb.sb_rextents); 487 488 if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks) 489 xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS, 490 rtb->rbmblocks - mp->m_sb.sb_rbmblocks); 491 492 if (mp->m_sb.sb_rextslog != rtb->rextslog) 493 xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG, 494 rtb->rextslog - mp->m_sb.sb_rextslog); 495 496 /* Fix broken isize */ 497 sc->ip->i_disk_size = roundup_64(sc->ip->i_disk_size, 498 mp->m_sb.sb_blocksize); 499 500 if (sc->ip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks)) 501 sc->ip->i_disk_size = XFS_FSB_TO_B(mp, rtb->rbmblocks); 502 503 xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); 504 return xrep_roll_trans(sc); 505 } 506 507 /* Repair the realtime bitmap file metadata. */ 508 int 509 xrep_rtbitmap( 510 struct xfs_scrub *sc) 511 { 512 struct xchk_rtbitmap *rtb = sc->buf; 513 struct xfs_mount *mp = sc->mp; 514 struct xfs_group *xg = rtg_group(sc->sr.rtg); 515 unsigned long long blocks = 0; 516 unsigned int busy_gen; 517 int error; 518 519 /* We require the realtime rmapbt to rebuild anything. */ 520 if (!xfs_has_rtrmapbt(sc->mp)) 521 return -EOPNOTSUPP; 522 /* We require atomic file exchange range to rebuild anything. */ 523 if (!xfs_has_exchange_range(sc->mp)) 524 return -EOPNOTSUPP; 525 526 /* Impossibly large rtbitmap means we can't touch the filesystem. */ 527 if (rtb->rbmblocks > U32_MAX) 528 return 0; 529 530 /* 531 * If the size of the rt bitmap file is larger than what we reserved, 532 * figure out if we need to adjust the block reservation in the 533 * transaction. 534 */ 535 blocks = xfs_bmbt_calc_size(mp, rtb->rbmblocks); 536 if (blocks > UINT_MAX) 537 return -EOPNOTSUPP; 538 if (blocks > rtb->resblks) { 539 error = xfs_trans_reserve_more(sc->tp, blocks, 0); 540 if (error) 541 return error; 542 543 rtb->resblks += blocks; 544 } 545 546 /* Fix inode core and forks. */ 547 error = xrep_metadata_inode_forks(sc); 548 if (error) 549 return error; 550 551 xfs_trans_ijoin(sc->tp, sc->ip, 0); 552 553 /* Ensure no unwritten extents. */ 554 error = xrep_rtbitmap_data_mappings(sc, rtb->rbmblocks); 555 if (error) 556 return error; 557 558 /* 559 * Fix inconsistent bitmap geometry. This function returns with a 560 * clean scrub transaction. 561 */ 562 error = xrep_rtbitmap_geometry(sc, rtb); 563 if (error) 564 return error; 565 566 /* 567 * Make sure the busy extent list is clear because we can't put extents 568 * on there twice. 569 */ 570 if (!xfs_extent_busy_list_empty(xg, &busy_gen)) { 571 error = xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0); 572 if (error) 573 return error; 574 } 575 576 /* 577 * Generate the new rtbitmap data. We don't need the rtbmp information 578 * once this call is finished. 579 */ 580 error = xrep_rtbitmap_find_freespace(rtb); 581 if (error) 582 return error; 583 584 /* 585 * Try to take ILOCK_EXCL of the temporary file. We had better be the 586 * only ones holding onto this inode, but we can't block while holding 587 * the rtbitmap file's ILOCK_EXCL. 588 */ 589 while (!xrep_tempfile_ilock_nowait(sc)) { 590 if (xchk_should_terminate(sc, &error)) 591 return error; 592 delay(1); 593 } 594 595 /* 596 * Make sure we have space allocated for the part of the bitmap 597 * file that corresponds to this group. We already joined sc->ip. 598 */ 599 xfs_trans_ijoin(sc->tp, sc->tempip, 0); 600 error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks); 601 if (error) 602 return error; 603 604 /* Last chance to abort before we start committing fixes. */ 605 if (xchk_should_terminate(sc, &error)) 606 return error; 607 608 /* Copy the bitmap file that we generated. */ 609 error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks, 610 xrep_rtbitmap_prep_buf, rtb); 611 if (error) 612 return error; 613 error = xrep_tempfile_set_isize(sc, 614 XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks)); 615 if (error) 616 return error; 617 618 /* 619 * Now exchange the data fork contents. We're done with the temporary 620 * buffer, so we can reuse it for the tempfile exchmaps information. 621 */ 622 error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0, 623 rtb->rbmblocks, &rtb->tempexch); 624 if (error) 625 return error; 626 627 error = xrep_tempexch_contents(sc, &rtb->tempexch); 628 if (error) 629 return error; 630 631 /* Free the old rtbitmap blocks if they're not in use. */ 632 return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK); 633 } 634