1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_quota.h" 17 #include "xfs_bmap.h" 18 #include "xfs_bmap_btree.h" 19 #include "xfs_trans_space.h" 20 #include "xfs_dir2.h" 21 #include "xfs_exchrange.h" 22 #include "xfs_exchmaps.h" 23 #include "xfs_defer.h" 24 #include "xfs_symlink_remote.h" 25 #include "xfs_metafile.h" 26 #include "scrub/scrub.h" 27 #include "scrub/common.h" 28 #include "scrub/repair.h" 29 #include "scrub/trace.h" 30 #include "scrub/tempfile.h" 31 #include "scrub/tempexch.h" 32 #include "scrub/xfile.h" 33 34 /* 35 * Create a temporary file for reconstructing metadata, with the intention of 36 * atomically exchanging the temporary file's contents with the file that's 37 * being repaired. 38 */ 39 int 40 xrep_tempfile_create( 41 struct xfs_scrub *sc, 42 uint16_t mode) 43 { 44 struct xfs_icreate_args args = { 45 .pip = sc->mp->m_rootip, 46 .mode = mode, 47 .flags = XFS_ICREATE_TMPFILE | XFS_ICREATE_UNLINKABLE, 48 }; 49 struct xfs_mount *mp = sc->mp; 50 struct xfs_trans *tp = NULL; 51 struct xfs_dquot *udqp; 52 struct xfs_dquot *gdqp; 53 struct xfs_dquot *pdqp; 54 struct xfs_trans_res *tres; 55 struct xfs_inode *dp = mp->m_rootip; 56 xfs_ino_t ino; 57 unsigned int resblks; 58 bool is_dir = S_ISDIR(mode); 59 int error; 60 61 if (xfs_is_shutdown(mp)) 62 return -EIO; 63 if (xfs_is_readonly(mp)) 64 return -EROFS; 65 66 ASSERT(sc->tp == NULL); 67 ASSERT(sc->tempip == NULL); 68 69 /* 70 * Make sure that we have allocated dquot(s) on disk. The temporary 71 * inode should be completely root owned so that we don't fail due to 72 * quota limits. 73 */ 74 error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp); 75 if (error) 76 return error; 77 78 if (is_dir) { 79 resblks = xfs_mkdir_space_res(mp, 0); 80 tres = &M_RES(mp)->tr_mkdir; 81 } else { 82 resblks = XFS_IALLOC_SPACE_RES(mp); 83 tres = &M_RES(mp)->tr_create_tmpfile; 84 } 85 86 error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks, 87 &tp); 88 if (error) 89 goto out_release_dquots; 90 91 /* Allocate inode, set up directory. */ 92 error = xfs_dialloc(&tp, &args, &ino); 93 if (error) 94 goto out_trans_cancel; 95 error = xfs_icreate(tp, ino, &args, &sc->tempip); 96 if (error) 97 goto out_trans_cancel; 98 99 /* We don't touch file data, so drop the realtime flags. */ 100 sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT); 101 xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE); 102 103 /* 104 * Mark our temporary file as private so that LSMs and the ACL code 105 * don't try to add their own metadata or reason about these files. 106 * The file should never be exposed to userspace. 107 */ 108 VFS_I(sc->tempip)->i_flags |= S_PRIVATE; 109 VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR; 110 111 if (is_dir) { 112 error = xfs_dir_init(tp, sc->tempip, dp); 113 if (error) 114 goto out_trans_cancel; 115 } else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) { 116 /* 117 * Initialize the temporary symlink with a meaningless target 118 * that won't trip the verifiers. Repair must rewrite the 119 * target with meaningful content before swapping with the file 120 * being repaired. A single-byte target will not write a 121 * remote target block, so the owner is irrelevant. 122 */ 123 error = xfs_symlink_write_target(tp, sc->tempip, 124 sc->tempip->i_ino, ".", 1, 0, 0); 125 if (error) 126 goto out_trans_cancel; 127 } 128 129 /* 130 * Attach the dquot(s) to the inodes and modify them incore. 131 * These ids of the inode couldn't have changed since the new 132 * inode has been locked ever since it was created. 133 */ 134 xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp); 135 136 /* 137 * Put our temp file on the unlinked list so it's purged automatically. 138 * All file-based metadata being reconstructed using this file must be 139 * atomically exchanged with the original file because the contents 140 * here will be purged when the inode is dropped or log recovery cleans 141 * out the unlinked list. 142 */ 143 error = xfs_iunlink(tp, sc->tempip); 144 if (error) 145 goto out_trans_cancel; 146 147 error = xfs_trans_commit(tp); 148 if (error) 149 goto out_release_inode; 150 151 trace_xrep_tempfile_create(sc); 152 153 xfs_qm_dqrele(udqp); 154 xfs_qm_dqrele(gdqp); 155 xfs_qm_dqrele(pdqp); 156 157 /* Finish setting up the incore / vfs context. */ 158 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL); 159 xfs_setup_iops(sc->tempip); 160 xfs_finish_inode_setup(sc->tempip); 161 162 sc->temp_ilock_flags = 0; 163 return error; 164 165 out_trans_cancel: 166 xfs_trans_cancel(tp); 167 out_release_inode: 168 /* 169 * Wait until after the current transaction is aborted to finish the 170 * setup of the inode and release the inode. This prevents recursive 171 * transactions and deadlocks from xfs_inactive. 172 */ 173 if (sc->tempip) { 174 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL); 175 xfs_finish_inode_setup(sc->tempip); 176 xchk_irele(sc, sc->tempip); 177 } 178 out_release_dquots: 179 xfs_qm_dqrele(udqp); 180 xfs_qm_dqrele(gdqp); 181 xfs_qm_dqrele(pdqp); 182 183 return error; 184 } 185 186 /* 187 * Temporary files have to be created before we even know which inode we're 188 * going to scrub, so we assume that they will be part of the regular directory 189 * tree. If it turns out that we're actually scrubbing a file from the 190 * metadata directory tree, we have to subtract the temp file from the root 191 * dquots and detach the dquots. 192 */ 193 int 194 xrep_tempfile_adjust_directory_tree( 195 struct xfs_scrub *sc) 196 { 197 int error; 198 199 if (!sc->tempip) 200 return 0; 201 202 ASSERT(sc->tp == NULL); 203 ASSERT(!xfs_is_metadir_inode(sc->tempip)); 204 205 if (!sc->ip || !xfs_is_metadir_inode(sc->ip)) 206 return 0; 207 208 xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); 209 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; 210 211 error = xchk_trans_alloc(sc, 0); 212 if (error) 213 goto out_iolock; 214 215 xrep_tempfile_ilock(sc); 216 xfs_trans_ijoin(sc->tp, sc->tempip, 0); 217 218 /* Metadir files are not accounted in quota, so drop icount */ 219 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, -1L); 220 xfs_metafile_set_iflag(sc->tp, sc->tempip, XFS_METAFILE_UNKNOWN); 221 222 error = xrep_trans_commit(sc); 223 if (error) 224 goto out_ilock; 225 226 xfs_qm_dqdetach(sc->tempip); 227 out_ilock: 228 xrep_tempfile_iunlock(sc); 229 out_iolock: 230 xrep_tempfile_iounlock(sc); 231 return error; 232 } 233 234 /* 235 * Remove this temporary file from the metadata directory tree so that it can 236 * be inactivated the normal way. 237 */ 238 STATIC int 239 xrep_tempfile_remove_metadir( 240 struct xfs_scrub *sc) 241 { 242 int error; 243 244 if (!sc->tempip || !xfs_is_metadir_inode(sc->tempip)) 245 return 0; 246 247 ASSERT(sc->tp == NULL); 248 249 xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); 250 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; 251 252 error = xchk_trans_alloc(sc, 0); 253 if (error) 254 goto out_iolock; 255 256 xrep_tempfile_ilock(sc); 257 xfs_trans_ijoin(sc->tp, sc->tempip, 0); 258 259 xfs_metafile_clear_iflag(sc->tp, sc->tempip); 260 261 /* Non-metadir files are accounted in quota, so bump bcount/icount */ 262 error = xfs_qm_dqattach_locked(sc->tempip, false); 263 if (error) 264 goto out_cancel; 265 266 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, 1L); 267 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_BCOUNT, 268 sc->tempip->i_nblocks); 269 error = xrep_trans_commit(sc); 270 goto out_ilock; 271 272 out_cancel: 273 xchk_trans_cancel(sc); 274 out_ilock: 275 xrep_tempfile_iunlock(sc); 276 out_iolock: 277 xrep_tempfile_iounlock(sc); 278 return error; 279 } 280 281 /* Take IOLOCK_EXCL on the temporary file, maybe. */ 282 bool 283 xrep_tempfile_iolock_nowait( 284 struct xfs_scrub *sc) 285 { 286 if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) { 287 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; 288 return true; 289 } 290 291 return false; 292 } 293 294 /* 295 * Take the temporary file's IOLOCK while holding a different inode's IOLOCK. 296 * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock 297 * to avoid deadlocks and lockdep complaints. 298 */ 299 int 300 xrep_tempfile_iolock_polled( 301 struct xfs_scrub *sc) 302 { 303 int error = 0; 304 305 while (!xrep_tempfile_iolock_nowait(sc)) { 306 if (xchk_should_terminate(sc, &error)) 307 return error; 308 delay(1); 309 } 310 311 return 0; 312 } 313 314 /* Release IOLOCK_EXCL on the temporary file. */ 315 void 316 xrep_tempfile_iounlock( 317 struct xfs_scrub *sc) 318 { 319 xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL); 320 sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL; 321 } 322 323 /* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */ 324 void 325 xrep_tempfile_ilock( 326 struct xfs_scrub *sc) 327 { 328 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 329 xfs_ilock(sc->tempip, XFS_ILOCK_EXCL); 330 } 331 332 /* Try to grab ILOCK_EXCL on the temporary file. */ 333 bool 334 xrep_tempfile_ilock_nowait( 335 struct xfs_scrub *sc) 336 { 337 if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) { 338 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 339 return true; 340 } 341 342 return false; 343 } 344 345 /* Unlock ILOCK_EXCL on the temporary file after an update. */ 346 void 347 xrep_tempfile_iunlock( 348 struct xfs_scrub *sc) 349 { 350 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL); 351 sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL; 352 } 353 354 /* 355 * Begin the process of making changes to both the file being scrubbed and 356 * the temporary file by taking ILOCK_EXCL on both. 357 */ 358 void 359 xrep_tempfile_ilock_both( 360 struct xfs_scrub *sc) 361 { 362 xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL); 363 sc->ilock_flags |= XFS_ILOCK_EXCL; 364 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 365 } 366 367 /* Unlock ILOCK_EXCL on both files. */ 368 void 369 xrep_tempfile_iunlock_both( 370 struct xfs_scrub *sc) 371 { 372 xrep_tempfile_iunlock(sc); 373 xchk_iunlock(sc, XFS_ILOCK_EXCL); 374 } 375 376 /* Release the temporary file. */ 377 void 378 xrep_tempfile_rele( 379 struct xfs_scrub *sc) 380 { 381 if (!sc->tempip) 382 return; 383 384 if (sc->temp_ilock_flags) { 385 xfs_iunlock(sc->tempip, sc->temp_ilock_flags); 386 sc->temp_ilock_flags = 0; 387 } 388 389 xrep_tempfile_remove_metadir(sc); 390 xchk_irele(sc, sc->tempip); 391 sc->tempip = NULL; 392 } 393 394 /* 395 * Make sure that the given range of the data fork of the temporary file is 396 * mapped to written blocks. The caller must ensure that both inodes are 397 * joined to the transaction. 398 */ 399 int 400 xrep_tempfile_prealloc( 401 struct xfs_scrub *sc, 402 xfs_fileoff_t off, 403 xfs_filblks_t len) 404 { 405 struct xfs_bmbt_irec map; 406 xfs_fileoff_t end = off + len; 407 int error; 408 409 ASSERT(sc->tempip != NULL); 410 ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip)); 411 412 for (; off < end; off = map.br_startoff + map.br_blockcount) { 413 int nmaps = 1; 414 415 /* 416 * If we have a real extent mapping this block then we're 417 * in ok shape. 418 */ 419 error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps, 420 XFS_DATA_FORK); 421 if (error) 422 return error; 423 if (nmaps == 0) { 424 ASSERT(nmaps != 0); 425 return -EFSCORRUPTED; 426 } 427 428 if (xfs_bmap_is_written_extent(&map)) 429 continue; 430 431 /* 432 * If we find a delalloc reservation then something is very 433 * very wrong. Bail out. 434 */ 435 if (map.br_startblock == DELAYSTARTBLOCK) 436 return -EFSCORRUPTED; 437 438 /* 439 * Make sure this block has a real zeroed extent allocated to 440 * it. 441 */ 442 nmaps = 1; 443 error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off, 444 XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map, 445 &nmaps); 446 if (error) 447 return error; 448 if (nmaps != 1) 449 return -EFSCORRUPTED; 450 451 trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map); 452 453 /* Commit new extent and all deferred work. */ 454 error = xfs_defer_finish(&sc->tp); 455 if (error) 456 return error; 457 } 458 459 return 0; 460 } 461 462 /* 463 * Write data to each block of a file. The given range of the tempfile's data 464 * fork must already be populated with written extents. 465 */ 466 int 467 xrep_tempfile_copyin( 468 struct xfs_scrub *sc, 469 xfs_fileoff_t off, 470 xfs_filblks_t len, 471 xrep_tempfile_copyin_fn prep_fn, 472 void *data) 473 { 474 LIST_HEAD(buffers_list); 475 struct xfs_mount *mp = sc->mp; 476 struct xfs_buf *bp; 477 xfs_fileoff_t flush_mask; 478 xfs_fileoff_t end = off + len; 479 loff_t pos = XFS_FSB_TO_B(mp, off); 480 int error = 0; 481 482 ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode)); 483 484 /* Flush buffers to disk every 512K */ 485 flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1; 486 487 for (; off < end; off++, pos += mp->m_sb.sb_blocksize) { 488 struct xfs_bmbt_irec map; 489 int nmaps = 1; 490 491 /* Read block mapping for this file block. */ 492 error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0); 493 if (error) 494 goto out_err; 495 if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) { 496 error = -EFSCORRUPTED; 497 goto out_err; 498 } 499 500 /* Get the metadata buffer for this offset in the file. */ 501 error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp, 502 XFS_FSB_TO_DADDR(mp, map.br_startblock), 503 mp->m_bsize, 0, &bp); 504 if (error) 505 goto out_err; 506 507 trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map); 508 509 /* Read in a block's worth of data from the xfile. */ 510 error = prep_fn(sc, bp, data); 511 if (error) { 512 xfs_trans_brelse(sc->tp, bp); 513 goto out_err; 514 } 515 516 /* Queue buffer, and flush if we have too much dirty data. */ 517 xfs_buf_delwri_queue_here(bp, &buffers_list); 518 xfs_trans_brelse(sc->tp, bp); 519 520 if (!(off & flush_mask)) { 521 error = xfs_buf_delwri_submit(&buffers_list); 522 if (error) 523 goto out_err; 524 } 525 } 526 527 /* 528 * Write the new blocks to disk. If the ordered list isn't empty after 529 * that, then something went wrong and we have to fail. This should 530 * never happen, but we'll check anyway. 531 */ 532 error = xfs_buf_delwri_submit(&buffers_list); 533 if (error) 534 goto out_err; 535 536 if (!list_empty(&buffers_list)) { 537 ASSERT(list_empty(&buffers_list)); 538 error = -EIO; 539 goto out_err; 540 } 541 542 return 0; 543 544 out_err: 545 xfs_buf_delwri_cancel(&buffers_list); 546 return error; 547 } 548 549 /* 550 * Set the temporary file's size. Caller must join the tempfile to the scrub 551 * transaction and is responsible for adjusting block mappings as needed. 552 */ 553 int 554 xrep_tempfile_set_isize( 555 struct xfs_scrub *sc, 556 unsigned long long isize) 557 { 558 if (sc->tempip->i_disk_size == isize) 559 return 0; 560 561 sc->tempip->i_disk_size = isize; 562 i_size_write(VFS_I(sc->tempip), isize); 563 return xrep_tempfile_roll_trans(sc); 564 } 565 566 /* 567 * Roll a repair transaction involving the temporary file. Caller must join 568 * both the temporary file and the file being scrubbed to the transaction. 569 * This function return with both inodes joined to a new scrub transaction, 570 * or the usual negative errno. 571 */ 572 int 573 xrep_tempfile_roll_trans( 574 struct xfs_scrub *sc) 575 { 576 int error; 577 578 xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE); 579 error = xrep_roll_trans(sc); 580 if (error) 581 return error; 582 583 xfs_trans_ijoin(sc->tp, sc->tempip, 0); 584 return 0; 585 } 586 587 /* 588 * Fill out the mapping exchange request in preparation for atomically 589 * committing the contents of a metadata file that we've rebuilt in the temp 590 * file. 591 */ 592 STATIC int 593 xrep_tempexch_prep_request( 594 struct xfs_scrub *sc, 595 int whichfork, 596 struct xrep_tempexch *tx) 597 { 598 struct xfs_exchmaps_req *req = &tx->req; 599 600 memset(tx, 0, sizeof(struct xrep_tempexch)); 601 602 /* COW forks don't exist on disk. */ 603 if (whichfork == XFS_COW_FORK) { 604 ASSERT(0); 605 return -EINVAL; 606 } 607 608 /* Both files should have the relevant forks. */ 609 if (!xfs_ifork_ptr(sc->ip, whichfork) || 610 !xfs_ifork_ptr(sc->tempip, whichfork)) { 611 ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL); 612 ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL); 613 return -EINVAL; 614 } 615 616 /* Exchange all mappings in both forks. */ 617 req->ip1 = sc->tempip; 618 req->ip2 = sc->ip; 619 req->startoff1 = 0; 620 req->startoff2 = 0; 621 switch (whichfork) { 622 case XFS_ATTR_FORK: 623 req->flags |= XFS_EXCHMAPS_ATTR_FORK; 624 break; 625 case XFS_DATA_FORK: 626 /* Always exchange sizes when exchanging data fork mappings. */ 627 req->flags |= XFS_EXCHMAPS_SET_SIZES; 628 break; 629 } 630 req->blockcount = XFS_MAX_FILEOFF; 631 632 return 0; 633 } 634 635 /* 636 * Fill out the mapping exchange resource estimation structures in preparation 637 * for exchanging the contents of a metadata file that we've rebuilt in the 638 * temp file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files. 639 */ 640 STATIC int 641 xrep_tempexch_estimate( 642 struct xfs_scrub *sc, 643 struct xrep_tempexch *tx) 644 { 645 struct xfs_exchmaps_req *req = &tx->req; 646 struct xfs_ifork *ifp; 647 struct xfs_ifork *tifp; 648 int whichfork = xfs_exchmaps_reqfork(req); 649 int state = 0; 650 651 /* 652 * The exchmaps code only knows how to exchange file fork space 653 * mappings. Any fork data in local format must be promoted to a 654 * single block before the exchange can take place. 655 */ 656 ifp = xfs_ifork_ptr(sc->ip, whichfork); 657 if (ifp->if_format == XFS_DINODE_FMT_LOCAL) 658 state |= 1; 659 660 tifp = xfs_ifork_ptr(sc->tempip, whichfork); 661 if (tifp->if_format == XFS_DINODE_FMT_LOCAL) 662 state |= 2; 663 664 switch (state) { 665 case 0: 666 /* Both files have mapped extents; use the regular estimate. */ 667 return xfs_exchrange_estimate(req); 668 case 1: 669 /* 670 * The file being repaired is in local format, but the temp 671 * file has mapped extents. To perform the exchange, the file 672 * being repaired must have its shorform data converted to an 673 * ondisk block so that the forks will be in extents format. 674 * We need one resblk for the conversion; the number of 675 * exchanges is (worst case) the temporary file's extent count 676 * plus the block we converted. 677 */ 678 req->ip1_bcount = sc->tempip->i_nblocks; 679 req->ip2_bcount = 1; 680 req->nr_exchanges = 1 + tifp->if_nextents; 681 req->resblks = 1; 682 break; 683 case 2: 684 /* 685 * The temporary file is in local format, but the file being 686 * repaired has mapped extents. To perform the exchange, the 687 * temp file must have its shortform data converted to an 688 * ondisk block, and the fork changed to extents format. We 689 * need one resblk for the conversion; the number of exchanges 690 * is (worst case) the extent count of the file being repaired 691 * plus the block we converted. 692 */ 693 req->ip1_bcount = 1; 694 req->ip2_bcount = sc->ip->i_nblocks; 695 req->nr_exchanges = 1 + ifp->if_nextents; 696 req->resblks = 1; 697 break; 698 case 3: 699 /* 700 * Both forks are in local format. To perform the exchange, 701 * both files must have their shortform data converted to 702 * fsblocks, and both forks must be converted to extents 703 * format. We need two resblks for the two conversions, and 704 * the number of exchanges is 1 since there's only one block at 705 * fileoff 0. Presumably, the caller could not exchange the 706 * two inode fork areas directly. 707 */ 708 req->ip1_bcount = 1; 709 req->ip2_bcount = 1; 710 req->nr_exchanges = 1; 711 req->resblks = 2; 712 break; 713 } 714 715 return xfs_exchmaps_estimate_overhead(req); 716 } 717 718 /* 719 * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip 720 * this if quota enforcement is disabled or if both inodes' dquots are the 721 * same. The qretry structure must be initialized to zeroes before the first 722 * call to this function. 723 */ 724 STATIC int 725 xrep_tempexch_reserve_quota( 726 struct xfs_scrub *sc, 727 const struct xrep_tempexch *tx) 728 { 729 struct xfs_trans *tp = sc->tp; 730 const struct xfs_exchmaps_req *req = &tx->req; 731 int64_t ddelta, rdelta; 732 int error; 733 734 /* 735 * Don't bother with a quota reservation if we're not enforcing them 736 * or the two inodes have the same dquots. 737 */ 738 if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 || 739 (req->ip1->i_udquot == req->ip2->i_udquot && 740 req->ip1->i_gdquot == req->ip2->i_gdquot && 741 req->ip1->i_pdquot == req->ip2->i_pdquot)) 742 return 0; 743 744 /* 745 * Quota reservation for each file comes from two sources. First, we 746 * need to account for any net gain in mapped blocks during the 747 * exchange. Second, we need reservation for the gross gain in mapped 748 * blocks so that we don't trip over any quota block reservation 749 * assertions. We must reserve the gross gain because the quota code 750 * subtracts from bcount the number of blocks that we unmap; it does 751 * not add that quantity back to the quota block reservation. 752 */ 753 ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount); 754 rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount); 755 error = xfs_trans_reserve_quota_nblks(tp, req->ip1, 756 ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount, 757 true); 758 if (error) 759 return error; 760 761 ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount); 762 rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount); 763 return xfs_trans_reserve_quota_nblks(tp, req->ip2, 764 ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount, 765 true); 766 } 767 768 /* 769 * Prepare an existing transaction for an atomic file contents exchange. 770 * 771 * This function fills out the mapping exchange request and resource estimation 772 * structures in preparation for exchanging the contents of a metadata file 773 * that has been rebuilt in the temp file. Next, it reserves space and quota 774 * for the transaction. 775 * 776 * The caller must hold ILOCK_EXCL of the scrub target file and the temporary 777 * file. The caller must join both inodes to the transaction with no unlock 778 * flags, and is responsible for dropping both ILOCKs when appropriate. Only 779 * use this when those ILOCKs cannot be dropped. 780 */ 781 int 782 xrep_tempexch_trans_reserve( 783 struct xfs_scrub *sc, 784 int whichfork, 785 struct xrep_tempexch *tx) 786 { 787 int error; 788 789 ASSERT(sc->tp != NULL); 790 xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL); 791 xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL); 792 793 error = xrep_tempexch_prep_request(sc, whichfork, tx); 794 if (error) 795 return error; 796 797 error = xfs_exchmaps_estimate(&tx->req); 798 if (error) 799 return error; 800 801 error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0); 802 if (error) 803 return error; 804 805 return xrep_tempexch_reserve_quota(sc, tx); 806 } 807 808 /* 809 * Create a new transaction for a file contents exchange. 810 * 811 * This function fills out the mapping excahange request and resource 812 * estimation structures in preparation for exchanging the contents of a 813 * metadata file that has been rebuilt in the temp file. Next, it reserves 814 * space, takes ILOCK_EXCL of both inodes, joins them to the transaction and 815 * reserves quota for the transaction. 816 * 817 * The caller is responsible for dropping both ILOCKs when appropriate. 818 */ 819 int 820 xrep_tempexch_trans_alloc( 821 struct xfs_scrub *sc, 822 int whichfork, 823 struct xrep_tempexch *tx) 824 { 825 unsigned int flags = 0; 826 int error; 827 828 ASSERT(sc->tp == NULL); 829 ASSERT(xfs_has_exchange_range(sc->mp)); 830 831 error = xrep_tempexch_prep_request(sc, whichfork, tx); 832 if (error) 833 return error; 834 835 error = xrep_tempexch_estimate(sc, tx); 836 if (error) 837 return error; 838 839 if (xfs_has_lazysbcount(sc->mp)) 840 flags |= XFS_TRANS_RES_FDBLKS; 841 842 error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate, 843 tx->req.resblks, 0, flags, &sc->tp); 844 if (error) 845 return error; 846 847 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 848 sc->ilock_flags |= XFS_ILOCK_EXCL; 849 xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip); 850 851 return xrep_tempexch_reserve_quota(sc, tx); 852 } 853 854 /* 855 * Exchange file mappings (and hence file contents) between the file being 856 * repaired and the temporary file. Returns with both inodes locked and joined 857 * to a clean scrub transaction. 858 */ 859 int 860 xrep_tempexch_contents( 861 struct xfs_scrub *sc, 862 struct xrep_tempexch *tx) 863 { 864 int error; 865 866 ASSERT(xfs_has_exchange_range(sc->mp)); 867 868 xfs_exchange_mappings(sc->tp, &tx->req); 869 error = xfs_defer_finish(&sc->tp); 870 if (error) 871 return error; 872 873 /* 874 * If we exchanged the ondisk sizes of two metadata files, we must 875 * exchanged the incore sizes as well. 876 */ 877 if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) { 878 loff_t temp; 879 880 temp = i_size_read(VFS_I(sc->ip)); 881 i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip))); 882 i_size_write(VFS_I(sc->tempip), temp); 883 } 884 885 return 0; 886 } 887 888 /* 889 * Write local format data from one of the temporary file's forks into the same 890 * fork of file being repaired, and exchange the file sizes, if appropriate. 891 * Caller must ensure that the file being repaired has enough fork space to 892 * hold all the bytes. 893 */ 894 void 895 xrep_tempfile_copyout_local( 896 struct xfs_scrub *sc, 897 int whichfork) 898 { 899 struct xfs_ifork *temp_ifp; 900 struct xfs_ifork *ifp; 901 unsigned int ilog_flags = XFS_ILOG_CORE; 902 903 temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork); 904 ifp = xfs_ifork_ptr(sc->ip, whichfork); 905 906 ASSERT(temp_ifp != NULL); 907 ASSERT(ifp != NULL); 908 ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL); 909 ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); 910 911 switch (whichfork) { 912 case XFS_DATA_FORK: 913 ASSERT(sc->tempip->i_disk_size <= 914 xfs_inode_data_fork_size(sc->ip)); 915 break; 916 case XFS_ATTR_FORK: 917 ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff); 918 break; 919 default: 920 ASSERT(0); 921 return; 922 } 923 924 /* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */ 925 xfs_idestroy_fork(ifp); 926 xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data, 927 temp_ifp->if_bytes); 928 929 if (whichfork == XFS_DATA_FORK) { 930 i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip))); 931 sc->ip->i_disk_size = sc->tempip->i_disk_size; 932 } 933 934 ilog_flags |= xfs_ilog_fdata(whichfork); 935 xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags); 936 } 937 938 /* Decide if a given XFS inode is a temporary file for a repair. */ 939 bool 940 xrep_is_tempfile( 941 const struct xfs_inode *ip) 942 { 943 const struct inode *inode = &ip->i_vnode; 944 struct xfs_mount *mp = ip->i_mount; 945 946 /* 947 * Files in the metadata directory tree also have S_PRIVATE set and 948 * IOP_XATTR unset, so we must distinguish them separately. 949 */ 950 if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA)) 951 return false; 952 953 if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR)) 954 return true; 955 956 return false; 957 } 958