1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_quota.h" 17 #include "xfs_bmap.h" 18 #include "xfs_bmap_btree.h" 19 #include "xfs_trans_space.h" 20 #include "xfs_dir2.h" 21 #include "xfs_exchrange.h" 22 #include "xfs_exchmaps.h" 23 #include "xfs_defer.h" 24 #include "xfs_symlink_remote.h" 25 #include "xfs_metafile.h" 26 #include "scrub/scrub.h" 27 #include "scrub/common.h" 28 #include "scrub/repair.h" 29 #include "scrub/trace.h" 30 #include "scrub/tempfile.h" 31 #include "scrub/tempexch.h" 32 #include "scrub/xfile.h" 33 34 /* 35 * Create a temporary file for reconstructing metadata, with the intention of 36 * atomically exchanging the temporary file's contents with the file that's 37 * being repaired. 38 */ 39 int 40 xrep_tempfile_create( 41 struct xfs_scrub *sc, 42 uint16_t mode) 43 { 44 struct xfs_icreate_args args = { 45 .pip = sc->mp->m_rootip, 46 .mode = mode, 47 .flags = XFS_ICREATE_TMPFILE | XFS_ICREATE_UNLINKABLE, 48 }; 49 struct xfs_mount *mp = sc->mp; 50 struct xfs_trans *tp = NULL; 51 struct xfs_dquot *udqp; 52 struct xfs_dquot *gdqp; 53 struct xfs_dquot *pdqp; 54 struct xfs_trans_res *tres; 55 struct xfs_inode *dp = mp->m_rootip; 56 xfs_ino_t ino; 57 unsigned int resblks; 58 bool is_dir = S_ISDIR(mode); 59 int error; 60 61 if (xfs_is_shutdown(mp)) 62 return -EIO; 63 if (xfs_is_readonly(mp)) 64 return -EROFS; 65 66 ASSERT(sc->tp == NULL); 67 ASSERT(sc->tempip == NULL); 68 69 /* 70 * Make sure that we have allocated dquot(s) on disk. The temporary 71 * inode should be completely root owned so that we don't fail due to 72 * quota limits. 73 */ 74 error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp); 75 if (error) 76 return error; 77 78 if (is_dir) { 79 resblks = xfs_mkdir_space_res(mp, 0); 80 tres = &M_RES(mp)->tr_mkdir; 81 } else { 82 resblks = XFS_IALLOC_SPACE_RES(mp); 83 tres = &M_RES(mp)->tr_create_tmpfile; 84 } 85 86 error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks, 87 &tp); 88 if (error) 89 goto out_release_dquots; 90 91 /* Allocate inode, set up directory. */ 92 error = xfs_dialloc(&tp, &args, &ino); 93 if (error) 94 goto out_trans_cancel; 95 error = xfs_icreate(tp, ino, &args, &sc->tempip); 96 if (error) 97 goto out_trans_cancel; 98 99 /* We don't touch file data, so drop the realtime flags. */ 100 sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT); 101 xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE); 102 103 /* 104 * Mark our temporary file as private so that LSMs and the ACL code 105 * don't try to add their own metadata or reason about these files. 106 * The file should never be exposed to userspace. 107 */ 108 VFS_I(sc->tempip)->i_flags |= S_PRIVATE; 109 VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR; 110 111 if (is_dir) { 112 error = xfs_dir_init(tp, sc->tempip, dp); 113 if (error) 114 goto out_trans_cancel; 115 } else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) { 116 /* 117 * Initialize the temporary symlink with a meaningless target 118 * that won't trip the verifiers. Repair must rewrite the 119 * target with meaningful content before swapping with the file 120 * being repaired. A single-byte target will not write a 121 * remote target block, so the owner is irrelevant. 122 */ 123 error = xfs_symlink_write_target(tp, sc->tempip, 124 sc->tempip->i_ino, ".", 1, 0, 0); 125 if (error) 126 goto out_trans_cancel; 127 } 128 129 /* 130 * Attach the dquot(s) to the inodes and modify them incore. 131 * These ids of the inode couldn't have changed since the new 132 * inode has been locked ever since it was created. 133 */ 134 xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp); 135 136 /* 137 * Put our temp file on the unlinked list so it's purged automatically. 138 * All file-based metadata being reconstructed using this file must be 139 * atomically exchanged with the original file because the contents 140 * here will be purged when the inode is dropped or log recovery cleans 141 * out the unlinked list. 142 */ 143 error = xfs_iunlink(tp, sc->tempip); 144 if (error) 145 goto out_trans_cancel; 146 147 error = xfs_trans_commit(tp); 148 if (error) 149 goto out_release_inode; 150 151 trace_xrep_tempfile_create(sc); 152 153 xfs_qm_dqrele(udqp); 154 xfs_qm_dqrele(gdqp); 155 xfs_qm_dqrele(pdqp); 156 157 /* Finish setting up the incore / vfs context. */ 158 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL); 159 xfs_setup_iops(sc->tempip); 160 xfs_finish_inode_setup(sc->tempip); 161 162 sc->temp_ilock_flags = 0; 163 return error; 164 165 out_trans_cancel: 166 xfs_trans_cancel(tp); 167 out_release_inode: 168 /* 169 * Wait until after the current transaction is aborted to finish the 170 * setup of the inode and release the inode. This prevents recursive 171 * transactions and deadlocks from xfs_inactive. 172 */ 173 if (sc->tempip) { 174 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL); 175 xfs_finish_inode_setup(sc->tempip); 176 xchk_irele(sc, sc->tempip); 177 } 178 out_release_dquots: 179 xfs_qm_dqrele(udqp); 180 xfs_qm_dqrele(gdqp); 181 xfs_qm_dqrele(pdqp); 182 183 return error; 184 } 185 186 /* 187 * Move sc->tempip from the regular directory tree to the metadata directory 188 * tree if sc->ip is part of the metadata directory tree and tempip has an 189 * eligible file mode. 190 * 191 * Temporary files have to be created before we even know which inode we're 192 * going to scrub, so we assume that they will be part of the regular directory 193 * tree. If it turns out that we're actually scrubbing a file from the 194 * metadata directory tree, we have to subtract the temp file from the root 195 * dquots and detach the dquots prior to setting the METADATA iflag. However, 196 * the scrub setup functions grab sc->ip and create sc->tempip before we 197 * actually get around to checking if the file mode is the right type for the 198 * scrubber. 199 */ 200 int 201 xrep_tempfile_adjust_directory_tree( 202 struct xfs_scrub *sc) 203 { 204 int error; 205 206 if (!sc->tempip) 207 return 0; 208 209 ASSERT(sc->tp == NULL); 210 ASSERT(!xfs_is_metadir_inode(sc->tempip)); 211 212 if (!sc->ip || !xfs_is_metadir_inode(sc->ip)) 213 return 0; 214 if (!S_ISDIR(VFS_I(sc->tempip)->i_mode) && 215 !S_ISREG(VFS_I(sc->tempip)->i_mode)) 216 return 0; 217 218 xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); 219 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; 220 221 error = xchk_trans_alloc(sc, 0); 222 if (error) 223 goto out_iolock; 224 225 xrep_tempfile_ilock(sc); 226 xfs_trans_ijoin(sc->tp, sc->tempip, 0); 227 228 /* Metadir files are not accounted in quota, so drop icount */ 229 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, -1L); 230 xfs_metafile_set_iflag(sc->tp, sc->tempip, XFS_METAFILE_UNKNOWN); 231 232 error = xrep_trans_commit(sc); 233 if (error) 234 goto out_ilock; 235 236 xfs_iflags_set(sc->tempip, XFS_IRECOVERY); 237 xfs_qm_dqdetach(sc->tempip); 238 out_ilock: 239 xrep_tempfile_iunlock(sc); 240 out_iolock: 241 xrep_tempfile_iounlock(sc); 242 return error; 243 } 244 245 /* 246 * Remove this temporary file from the metadata directory tree so that it can 247 * be inactivated the normal way. 248 */ 249 STATIC int 250 xrep_tempfile_remove_metadir( 251 struct xfs_scrub *sc) 252 { 253 int error; 254 255 if (!sc->tempip || !xfs_is_metadir_inode(sc->tempip)) 256 return 0; 257 258 ASSERT(sc->tp == NULL); 259 260 xfs_iflags_clear(sc->tempip, XFS_IRECOVERY); 261 262 xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); 263 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; 264 265 error = xchk_trans_alloc(sc, 0); 266 if (error) 267 goto out_iolock; 268 269 xrep_tempfile_ilock(sc); 270 xfs_trans_ijoin(sc->tp, sc->tempip, 0); 271 272 xfs_metafile_clear_iflag(sc->tp, sc->tempip); 273 274 /* Non-metadir files are accounted in quota, so bump bcount/icount */ 275 error = xfs_qm_dqattach_locked(sc->tempip, false); 276 if (error) 277 goto out_cancel; 278 279 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, 1L); 280 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_BCOUNT, 281 sc->tempip->i_nblocks); 282 error = xrep_trans_commit(sc); 283 goto out_ilock; 284 285 out_cancel: 286 xchk_trans_cancel(sc); 287 out_ilock: 288 xrep_tempfile_iunlock(sc); 289 out_iolock: 290 xrep_tempfile_iounlock(sc); 291 return error; 292 } 293 294 /* Take IOLOCK_EXCL on the temporary file, maybe. */ 295 bool 296 xrep_tempfile_iolock_nowait( 297 struct xfs_scrub *sc) 298 { 299 if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) { 300 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; 301 return true; 302 } 303 304 return false; 305 } 306 307 /* 308 * Take the temporary file's IOLOCK while holding a different inode's IOLOCK. 309 * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock 310 * to avoid deadlocks and lockdep complaints. 311 */ 312 int 313 xrep_tempfile_iolock_polled( 314 struct xfs_scrub *sc) 315 { 316 int error = 0; 317 318 while (!xrep_tempfile_iolock_nowait(sc)) { 319 if (xchk_should_terminate(sc, &error)) 320 return error; 321 delay(1); 322 } 323 324 return 0; 325 } 326 327 /* Release IOLOCK_EXCL on the temporary file. */ 328 void 329 xrep_tempfile_iounlock( 330 struct xfs_scrub *sc) 331 { 332 xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL); 333 sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL; 334 } 335 336 /* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */ 337 void 338 xrep_tempfile_ilock( 339 struct xfs_scrub *sc) 340 { 341 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 342 xfs_ilock(sc->tempip, XFS_ILOCK_EXCL); 343 } 344 345 /* Try to grab ILOCK_EXCL on the temporary file. */ 346 bool 347 xrep_tempfile_ilock_nowait( 348 struct xfs_scrub *sc) 349 { 350 if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) { 351 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 352 return true; 353 } 354 355 return false; 356 } 357 358 /* Unlock ILOCK_EXCL on the temporary file after an update. */ 359 void 360 xrep_tempfile_iunlock( 361 struct xfs_scrub *sc) 362 { 363 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL); 364 sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL; 365 } 366 367 /* 368 * Begin the process of making changes to both the file being scrubbed and 369 * the temporary file by taking ILOCK_EXCL on both. 370 */ 371 void 372 xrep_tempfile_ilock_both( 373 struct xfs_scrub *sc) 374 { 375 xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL); 376 sc->ilock_flags |= XFS_ILOCK_EXCL; 377 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 378 } 379 380 /* Unlock ILOCK_EXCL on both files. */ 381 void 382 xrep_tempfile_iunlock_both( 383 struct xfs_scrub *sc) 384 { 385 xrep_tempfile_iunlock(sc); 386 xchk_iunlock(sc, XFS_ILOCK_EXCL); 387 } 388 389 /* Release the temporary file. */ 390 void 391 xrep_tempfile_rele( 392 struct xfs_scrub *sc) 393 { 394 if (!sc->tempip) 395 return; 396 397 if (sc->temp_ilock_flags) { 398 xfs_iunlock(sc->tempip, sc->temp_ilock_flags); 399 sc->temp_ilock_flags = 0; 400 } 401 402 xrep_tempfile_remove_metadir(sc); 403 xchk_irele(sc, sc->tempip); 404 sc->tempip = NULL; 405 } 406 407 /* 408 * Make sure that the given range of the data fork of the temporary file is 409 * mapped to written blocks. The caller must ensure that both inodes are 410 * joined to the transaction. 411 */ 412 int 413 xrep_tempfile_prealloc( 414 struct xfs_scrub *sc, 415 xfs_fileoff_t off, 416 xfs_filblks_t len) 417 { 418 struct xfs_bmbt_irec map; 419 xfs_fileoff_t end = off + len; 420 int error; 421 422 ASSERT(sc->tempip != NULL); 423 ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip)); 424 425 for (; off < end; off = map.br_startoff + map.br_blockcount) { 426 int nmaps = 1; 427 428 /* 429 * If we have a real extent mapping this block then we're 430 * in ok shape. 431 */ 432 error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps, 433 XFS_DATA_FORK); 434 if (error) 435 return error; 436 if (nmaps == 0) { 437 ASSERT(nmaps != 0); 438 return -EFSCORRUPTED; 439 } 440 441 if (xfs_bmap_is_written_extent(&map)) 442 continue; 443 444 /* 445 * If we find a delalloc reservation then something is very 446 * very wrong. Bail out. 447 */ 448 if (map.br_startblock == DELAYSTARTBLOCK) 449 return -EFSCORRUPTED; 450 451 /* 452 * Make sure this block has a real zeroed extent allocated to 453 * it. 454 */ 455 nmaps = 1; 456 error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off, 457 XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map, 458 &nmaps); 459 if (error) 460 return error; 461 if (nmaps != 1) 462 return -EFSCORRUPTED; 463 464 trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map); 465 466 /* Commit new extent and all deferred work. */ 467 error = xfs_defer_finish(&sc->tp); 468 if (error) 469 return error; 470 } 471 472 return 0; 473 } 474 475 /* 476 * Write data to each block of a file. The given range of the tempfile's data 477 * fork must already be populated with written extents. 478 */ 479 int 480 xrep_tempfile_copyin( 481 struct xfs_scrub *sc, 482 xfs_fileoff_t off, 483 xfs_filblks_t len, 484 xrep_tempfile_copyin_fn prep_fn, 485 void *data) 486 { 487 LIST_HEAD(buffers_list); 488 struct xfs_mount *mp = sc->mp; 489 struct xfs_buf *bp; 490 xfs_fileoff_t flush_mask; 491 xfs_fileoff_t end = off + len; 492 loff_t pos = XFS_FSB_TO_B(mp, off); 493 int error = 0; 494 495 ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode)); 496 497 /* Flush buffers to disk every 512K */ 498 flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1; 499 500 for (; off < end; off++, pos += mp->m_sb.sb_blocksize) { 501 struct xfs_bmbt_irec map; 502 int nmaps = 1; 503 504 /* Read block mapping for this file block. */ 505 error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0); 506 if (error) 507 goto out_err; 508 if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) { 509 error = -EFSCORRUPTED; 510 goto out_err; 511 } 512 513 /* Get the metadata buffer for this offset in the file. */ 514 error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp, 515 XFS_FSB_TO_DADDR(mp, map.br_startblock), 516 mp->m_bsize, 0, &bp); 517 if (error) 518 goto out_err; 519 520 trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map); 521 522 /* Read in a block's worth of data from the xfile. */ 523 error = prep_fn(sc, bp, data); 524 if (error) { 525 xfs_trans_brelse(sc->tp, bp); 526 goto out_err; 527 } 528 529 /* Queue buffer, and flush if we have too much dirty data. */ 530 xfs_buf_delwri_queue_here(bp, &buffers_list); 531 xfs_trans_brelse(sc->tp, bp); 532 533 if (!(off & flush_mask)) { 534 error = xfs_buf_delwri_submit(&buffers_list); 535 if (error) 536 goto out_err; 537 } 538 } 539 540 /* 541 * Write the new blocks to disk. If the ordered list isn't empty after 542 * that, then something went wrong and we have to fail. This should 543 * never happen, but we'll check anyway. 544 */ 545 error = xfs_buf_delwri_submit(&buffers_list); 546 if (error) 547 goto out_err; 548 549 if (!list_empty(&buffers_list)) { 550 ASSERT(list_empty(&buffers_list)); 551 error = -EIO; 552 goto out_err; 553 } 554 555 return 0; 556 557 out_err: 558 xfs_buf_delwri_cancel(&buffers_list); 559 return error; 560 } 561 562 /* 563 * Set the temporary file's size. Caller must join the tempfile to the scrub 564 * transaction and is responsible for adjusting block mappings as needed. 565 */ 566 int 567 xrep_tempfile_set_isize( 568 struct xfs_scrub *sc, 569 unsigned long long isize) 570 { 571 if (sc->tempip->i_disk_size == isize) 572 return 0; 573 574 sc->tempip->i_disk_size = isize; 575 i_size_write(VFS_I(sc->tempip), isize); 576 return xrep_tempfile_roll_trans(sc); 577 } 578 579 /* 580 * Roll a repair transaction involving the temporary file. Caller must join 581 * both the temporary file and the file being scrubbed to the transaction. 582 * This function return with both inodes joined to a new scrub transaction, 583 * or the usual negative errno. 584 */ 585 int 586 xrep_tempfile_roll_trans( 587 struct xfs_scrub *sc) 588 { 589 int error; 590 591 xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE); 592 error = xrep_roll_trans(sc); 593 if (error) 594 return error; 595 596 xfs_trans_ijoin(sc->tp, sc->tempip, 0); 597 return 0; 598 } 599 600 /* 601 * Fill out the mapping exchange request in preparation for atomically 602 * committing the contents of a metadata file that we've rebuilt in the temp 603 * file. 604 */ 605 STATIC int 606 xrep_tempexch_prep_request( 607 struct xfs_scrub *sc, 608 int whichfork, 609 xfs_fileoff_t off, 610 xfs_filblks_t len, 611 struct xrep_tempexch *tx) 612 { 613 struct xfs_exchmaps_req *req = &tx->req; 614 615 memset(tx, 0, sizeof(struct xrep_tempexch)); 616 617 /* COW forks don't exist on disk. */ 618 if (whichfork == XFS_COW_FORK) { 619 ASSERT(0); 620 return -EINVAL; 621 } 622 623 /* Both files should have the relevant forks. */ 624 if (!xfs_ifork_ptr(sc->ip, whichfork) || 625 !xfs_ifork_ptr(sc->tempip, whichfork)) { 626 ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL); 627 ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL); 628 return -EINVAL; 629 } 630 631 /* Exchange all mappings in both forks. */ 632 req->ip1 = sc->tempip; 633 req->ip2 = sc->ip; 634 req->startoff1 = off; 635 req->startoff2 = off; 636 switch (whichfork) { 637 case XFS_ATTR_FORK: 638 req->flags |= XFS_EXCHMAPS_ATTR_FORK; 639 break; 640 case XFS_DATA_FORK: 641 /* Exchange sizes when exchanging all data fork mappings. */ 642 if (off == 0 && len == XFS_MAX_FILEOFF) 643 req->flags |= XFS_EXCHMAPS_SET_SIZES; 644 break; 645 } 646 req->blockcount = len; 647 648 return 0; 649 } 650 651 /* 652 * Fill out the mapping exchange resource estimation structures in preparation 653 * for exchanging the contents of a metadata file that we've rebuilt in the 654 * temp file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files. 655 */ 656 STATIC int 657 xrep_tempexch_estimate( 658 struct xfs_scrub *sc, 659 struct xrep_tempexch *tx) 660 { 661 struct xfs_exchmaps_req *req = &tx->req; 662 struct xfs_ifork *ifp; 663 struct xfs_ifork *tifp; 664 int whichfork = xfs_exchmaps_reqfork(req); 665 int state = 0; 666 667 /* 668 * The exchmaps code only knows how to exchange file fork space 669 * mappings. Any fork data in local format must be promoted to a 670 * single block before the exchange can take place. 671 */ 672 ifp = xfs_ifork_ptr(sc->ip, whichfork); 673 if (ifp->if_format == XFS_DINODE_FMT_LOCAL) 674 state |= 1; 675 676 tifp = xfs_ifork_ptr(sc->tempip, whichfork); 677 if (tifp->if_format == XFS_DINODE_FMT_LOCAL) 678 state |= 2; 679 680 switch (state) { 681 case 0: 682 /* Both files have mapped extents; use the regular estimate. */ 683 return xfs_exchrange_estimate(req); 684 case 1: 685 /* 686 * The file being repaired is in local format, but the temp 687 * file has mapped extents. To perform the exchange, the file 688 * being repaired must have its shorform data converted to an 689 * ondisk block so that the forks will be in extents format. 690 * We need one resblk for the conversion; the number of 691 * exchanges is (worst case) the temporary file's extent count 692 * plus the block we converted. 693 */ 694 req->ip1_bcount = sc->tempip->i_nblocks; 695 req->ip2_bcount = 1; 696 req->nr_exchanges = 1 + tifp->if_nextents; 697 req->resblks = 1; 698 break; 699 case 2: 700 /* 701 * The temporary file is in local format, but the file being 702 * repaired has mapped extents. To perform the exchange, the 703 * temp file must have its shortform data converted to an 704 * ondisk block, and the fork changed to extents format. We 705 * need one resblk for the conversion; the number of exchanges 706 * is (worst case) the extent count of the file being repaired 707 * plus the block we converted. 708 */ 709 req->ip1_bcount = 1; 710 req->ip2_bcount = sc->ip->i_nblocks; 711 req->nr_exchanges = 1 + ifp->if_nextents; 712 req->resblks = 1; 713 break; 714 case 3: 715 /* 716 * Both forks are in local format. To perform the exchange, 717 * both files must have their shortform data converted to 718 * fsblocks, and both forks must be converted to extents 719 * format. We need two resblks for the two conversions, and 720 * the number of exchanges is 1 since there's only one block at 721 * fileoff 0. Presumably, the caller could not exchange the 722 * two inode fork areas directly. 723 */ 724 req->ip1_bcount = 1; 725 req->ip2_bcount = 1; 726 req->nr_exchanges = 1; 727 req->resblks = 2; 728 break; 729 } 730 731 return xfs_exchmaps_estimate_overhead(req); 732 } 733 734 /* 735 * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip 736 * this if quota enforcement is disabled or if both inodes' dquots are the 737 * same. The qretry structure must be initialized to zeroes before the first 738 * call to this function. 739 */ 740 STATIC int 741 xrep_tempexch_reserve_quota( 742 struct xfs_scrub *sc, 743 const struct xrep_tempexch *tx) 744 { 745 struct xfs_trans *tp = sc->tp; 746 const struct xfs_exchmaps_req *req = &tx->req; 747 int64_t ddelta, rdelta; 748 int error; 749 750 /* 751 * Don't bother with a quota reservation if we're not enforcing them 752 * or the two inodes have the same dquots. 753 */ 754 if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 || 755 xfs_is_metadir_inode(req->ip1) || 756 (req->ip1->i_udquot == req->ip2->i_udquot && 757 req->ip1->i_gdquot == req->ip2->i_gdquot && 758 req->ip1->i_pdquot == req->ip2->i_pdquot)) 759 return 0; 760 761 /* 762 * Quota reservation for each file comes from two sources. First, we 763 * need to account for any net gain in mapped blocks during the 764 * exchange. Second, we need reservation for the gross gain in mapped 765 * blocks so that we don't trip over any quota block reservation 766 * assertions. We must reserve the gross gain because the quota code 767 * subtracts from bcount the number of blocks that we unmap; it does 768 * not add that quantity back to the quota block reservation. 769 */ 770 ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount); 771 rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount); 772 error = xfs_trans_reserve_quota_nblks(tp, req->ip1, 773 ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount, 774 true); 775 if (error) 776 return error; 777 778 ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount); 779 rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount); 780 return xfs_trans_reserve_quota_nblks(tp, req->ip2, 781 ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount, 782 true); 783 } 784 785 /* 786 * Prepare an existing transaction for an atomic file contents exchange. 787 * 788 * This function fills out the mapping exchange request and resource estimation 789 * structures in preparation for exchanging the contents of a metadata file 790 * that has been rebuilt in the temp file. Next, it reserves space and quota 791 * for the transaction. 792 * 793 * The caller must hold ILOCK_EXCL of the scrub target file and the temporary 794 * file. The caller must join both inodes to the transaction with no unlock 795 * flags, and is responsible for dropping both ILOCKs when appropriate. Only 796 * use this when those ILOCKs cannot be dropped. 797 */ 798 int 799 xrep_tempexch_trans_reserve( 800 struct xfs_scrub *sc, 801 int whichfork, 802 xfs_fileoff_t off, 803 xfs_filblks_t len, 804 struct xrep_tempexch *tx) 805 { 806 int error; 807 808 ASSERT(sc->tp != NULL); 809 xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL); 810 xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL); 811 812 error = xrep_tempexch_prep_request(sc, whichfork, off, len, tx); 813 if (error) 814 return error; 815 816 error = xfs_exchmaps_estimate(&tx->req); 817 if (error) 818 return error; 819 820 error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0); 821 if (error) 822 return error; 823 824 return xrep_tempexch_reserve_quota(sc, tx); 825 } 826 827 /* 828 * Create a new transaction for a file contents exchange. 829 * 830 * This function fills out the mapping excahange request and resource 831 * estimation structures in preparation for exchanging the contents of a 832 * metadata file that has been rebuilt in the temp file. Next, it reserves 833 * space, takes ILOCK_EXCL of both inodes, joins them to the transaction and 834 * reserves quota for the transaction. 835 * 836 * The caller is responsible for dropping both ILOCKs when appropriate. 837 */ 838 int 839 xrep_tempexch_trans_alloc( 840 struct xfs_scrub *sc, 841 int whichfork, 842 struct xrep_tempexch *tx) 843 { 844 unsigned int flags = 0; 845 int error; 846 847 ASSERT(sc->tp == NULL); 848 ASSERT(xfs_has_exchange_range(sc->mp)); 849 850 error = xrep_tempexch_prep_request(sc, whichfork, 0, XFS_MAX_FILEOFF, 851 tx); 852 if (error) 853 return error; 854 855 error = xrep_tempexch_estimate(sc, tx); 856 if (error) 857 return error; 858 859 if (xfs_has_lazysbcount(sc->mp)) 860 flags |= XFS_TRANS_RES_FDBLKS; 861 862 error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate, 863 tx->req.resblks, 0, flags, &sc->tp); 864 if (error) 865 return error; 866 867 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 868 sc->ilock_flags |= XFS_ILOCK_EXCL; 869 xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip); 870 871 return xrep_tempexch_reserve_quota(sc, tx); 872 } 873 874 /* 875 * Exchange file mappings (and hence file contents) between the file being 876 * repaired and the temporary file. Returns with both inodes locked and joined 877 * to a clean scrub transaction. 878 */ 879 int 880 xrep_tempexch_contents( 881 struct xfs_scrub *sc, 882 struct xrep_tempexch *tx) 883 { 884 int error; 885 886 ASSERT(xfs_has_exchange_range(sc->mp)); 887 888 xfs_exchange_mappings(sc->tp, &tx->req); 889 error = xfs_defer_finish(&sc->tp); 890 if (error) 891 return error; 892 893 /* 894 * If we exchanged the ondisk sizes of two metadata files, we must 895 * exchanged the incore sizes as well. 896 */ 897 if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) { 898 loff_t temp; 899 900 temp = i_size_read(VFS_I(sc->ip)); 901 i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip))); 902 i_size_write(VFS_I(sc->tempip), temp); 903 } 904 905 return 0; 906 } 907 908 /* 909 * Write local format data from one of the temporary file's forks into the same 910 * fork of file being repaired, and exchange the file sizes, if appropriate. 911 * Caller must ensure that the file being repaired has enough fork space to 912 * hold all the bytes. 913 */ 914 void 915 xrep_tempfile_copyout_local( 916 struct xfs_scrub *sc, 917 int whichfork) 918 { 919 struct xfs_ifork *temp_ifp; 920 struct xfs_ifork *ifp; 921 unsigned int ilog_flags = XFS_ILOG_CORE; 922 923 temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork); 924 ifp = xfs_ifork_ptr(sc->ip, whichfork); 925 926 ASSERT(temp_ifp != NULL); 927 ASSERT(ifp != NULL); 928 ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL); 929 ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); 930 931 switch (whichfork) { 932 case XFS_DATA_FORK: 933 ASSERT(sc->tempip->i_disk_size <= 934 xfs_inode_data_fork_size(sc->ip)); 935 break; 936 case XFS_ATTR_FORK: 937 ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff); 938 break; 939 default: 940 ASSERT(0); 941 return; 942 } 943 944 /* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */ 945 xfs_idestroy_fork(ifp); 946 xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data, 947 temp_ifp->if_bytes); 948 949 if (whichfork == XFS_DATA_FORK) { 950 i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip))); 951 sc->ip->i_disk_size = sc->tempip->i_disk_size; 952 } 953 954 ilog_flags |= xfs_ilog_fdata(whichfork); 955 xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags); 956 } 957 958 /* Decide if a given XFS inode is a temporary file for a repair. */ 959 bool 960 xrep_is_tempfile( 961 const struct xfs_inode *ip) 962 { 963 const struct inode *inode = &ip->i_vnode; 964 struct xfs_mount *mp = ip->i_mount; 965 966 /* 967 * Files in the metadata directory tree also have S_PRIVATE set and 968 * IOP_XATTR unset, so we must distinguish them separately. We (ab)use 969 * the IRECOVERY flag to mark temporary metadir inodes knowing that the 970 * end of log recovery clears IRECOVERY, so the only ones that can 971 * exist during online repair are the ones we create. 972 */ 973 if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA)) 974 return __xfs_iflags_test(ip, XFS_IRECOVERY); 975 976 if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR)) 977 return true; 978 979 return false; 980 } 981