1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (c) 2021-2024 Oracle. All Rights Reserved. 4 * Author: Darrick J. Wong <djwong@kernel.org> 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_trans_resv.h" 11 #include "xfs_mount.h" 12 #include "xfs_log_format.h" 13 #include "xfs_trans.h" 14 #include "xfs_inode.h" 15 #include "xfs_ialloc.h" 16 #include "xfs_quota.h" 17 #include "xfs_bmap.h" 18 #include "xfs_bmap_btree.h" 19 #include "xfs_trans_space.h" 20 #include "xfs_dir2.h" 21 #include "xfs_exchrange.h" 22 #include "xfs_exchmaps.h" 23 #include "xfs_defer.h" 24 #include "xfs_symlink_remote.h" 25 #include "xfs_metafile.h" 26 #include "scrub/scrub.h" 27 #include "scrub/common.h" 28 #include "scrub/repair.h" 29 #include "scrub/trace.h" 30 #include "scrub/tempfile.h" 31 #include "scrub/tempexch.h" 32 #include "scrub/xfile.h" 33 34 /* 35 * Create a temporary file for reconstructing metadata, with the intention of 36 * atomically exchanging the temporary file's contents with the file that's 37 * being repaired. 38 */ 39 int 40 xrep_tempfile_create( 41 struct xfs_scrub *sc, 42 uint16_t mode) 43 { 44 struct xfs_icreate_args args = { 45 .pip = sc->mp->m_rootip, 46 .mode = mode, 47 .flags = XFS_ICREATE_TMPFILE | XFS_ICREATE_UNLINKABLE, 48 }; 49 struct xfs_mount *mp = sc->mp; 50 struct xfs_trans *tp = NULL; 51 struct xfs_dquot *udqp; 52 struct xfs_dquot *gdqp; 53 struct xfs_dquot *pdqp; 54 struct xfs_trans_res *tres; 55 struct xfs_inode *dp = mp->m_rootip; 56 xfs_ino_t ino; 57 unsigned int resblks; 58 bool is_dir = S_ISDIR(mode); 59 int error; 60 61 if (xfs_is_shutdown(mp)) 62 return -EIO; 63 if (xfs_is_readonly(mp)) 64 return -EROFS; 65 66 ASSERT(sc->tp == NULL); 67 ASSERT(sc->tempip == NULL); 68 69 /* 70 * Make sure that we have allocated dquot(s) on disk. The temporary 71 * inode should be completely root owned so that we don't fail due to 72 * quota limits. 73 */ 74 error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp); 75 if (error) 76 return error; 77 78 if (is_dir) { 79 resblks = xfs_mkdir_space_res(mp, 0); 80 tres = &M_RES(mp)->tr_mkdir; 81 } else { 82 resblks = XFS_IALLOC_SPACE_RES(mp); 83 tres = &M_RES(mp)->tr_create_tmpfile; 84 } 85 86 error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks, 87 &tp); 88 if (error) 89 goto out_release_dquots; 90 91 /* Allocate inode, set up directory. */ 92 error = xfs_dialloc(&tp, &args, &ino); 93 if (error) 94 goto out_trans_cancel; 95 error = xfs_icreate(tp, ino, &args, &sc->tempip); 96 if (error) 97 goto out_trans_cancel; 98 99 /* We don't touch file data, so drop the realtime flags. */ 100 sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT); 101 xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE); 102 103 /* 104 * Mark our temporary file as private so that LSMs and the ACL code 105 * don't try to add their own metadata or reason about these files. 106 * The file should never be exposed to userspace. 107 */ 108 VFS_I(sc->tempip)->i_flags |= S_PRIVATE; 109 VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR; 110 111 if (is_dir) { 112 error = xfs_dir_init(tp, sc->tempip, dp); 113 if (error) 114 goto out_trans_cancel; 115 } else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) { 116 /* 117 * Initialize the temporary symlink with a meaningless target 118 * that won't trip the verifiers. Repair must rewrite the 119 * target with meaningful content before swapping with the file 120 * being repaired. A single-byte target will not write a 121 * remote target block, so the owner is irrelevant. 122 */ 123 error = xfs_symlink_write_target(tp, sc->tempip, 124 sc->tempip->i_ino, ".", 1, 0, 0); 125 if (error) 126 goto out_trans_cancel; 127 } 128 129 /* 130 * Attach the dquot(s) to the inodes and modify them incore. 131 * These ids of the inode couldn't have changed since the new 132 * inode has been locked ever since it was created. 133 */ 134 xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp); 135 136 /* 137 * Put our temp file on the unlinked list so it's purged automatically. 138 * All file-based metadata being reconstructed using this file must be 139 * atomically exchanged with the original file because the contents 140 * here will be purged when the inode is dropped or log recovery cleans 141 * out the unlinked list. 142 */ 143 error = xfs_iunlink(tp, sc->tempip); 144 if (error) 145 goto out_trans_cancel; 146 147 error = xfs_trans_commit(tp); 148 if (error) 149 goto out_release_inode; 150 151 trace_xrep_tempfile_create(sc); 152 153 xfs_qm_dqrele(udqp); 154 xfs_qm_dqrele(gdqp); 155 xfs_qm_dqrele(pdqp); 156 157 /* Finish setting up the incore / vfs context. */ 158 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL); 159 xfs_setup_iops(sc->tempip); 160 xfs_finish_inode_setup(sc->tempip); 161 162 sc->temp_ilock_flags = 0; 163 return error; 164 165 out_trans_cancel: 166 xfs_trans_cancel(tp); 167 out_release_inode: 168 /* 169 * Wait until after the current transaction is aborted to finish the 170 * setup of the inode and release the inode. This prevents recursive 171 * transactions and deadlocks from xfs_inactive. 172 */ 173 if (sc->tempip) { 174 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL); 175 xfs_finish_inode_setup(sc->tempip); 176 xchk_irele(sc, sc->tempip); 177 } 178 out_release_dquots: 179 xfs_qm_dqrele(udqp); 180 xfs_qm_dqrele(gdqp); 181 xfs_qm_dqrele(pdqp); 182 183 return error; 184 } 185 186 /* 187 * Move sc->tempip from the regular directory tree to the metadata directory 188 * tree if sc->ip is part of the metadata directory tree and tempip has an 189 * eligible file mode. 190 * 191 * Temporary files have to be created before we even know which inode we're 192 * going to scrub, so we assume that they will be part of the regular directory 193 * tree. If it turns out that we're actually scrubbing a file from the 194 * metadata directory tree, we have to subtract the temp file from the root 195 * dquots and detach the dquots prior to setting the METADATA iflag. However, 196 * the scrub setup functions grab sc->ip and create sc->tempip before we 197 * actually get around to checking if the file mode is the right type for the 198 * scrubber. 199 */ 200 int 201 xrep_tempfile_adjust_directory_tree( 202 struct xfs_scrub *sc) 203 { 204 int error; 205 206 if (!sc->tempip) 207 return 0; 208 209 ASSERT(sc->tp == NULL); 210 ASSERT(!xfs_is_metadir_inode(sc->tempip)); 211 212 if (!sc->ip || !xfs_is_metadir_inode(sc->ip)) 213 return 0; 214 if (!S_ISDIR(VFS_I(sc->tempip)->i_mode) && 215 !S_ISREG(VFS_I(sc->tempip)->i_mode)) 216 return 0; 217 218 xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); 219 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; 220 221 error = xchk_trans_alloc(sc, 0); 222 if (error) 223 goto out_iolock; 224 225 xrep_tempfile_ilock(sc); 226 xfs_trans_ijoin(sc->tp, sc->tempip, 0); 227 228 /* Metadir files are not accounted in quota, so drop icount */ 229 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, -1L); 230 xfs_metafile_set_iflag(sc->tp, sc->tempip, XFS_METAFILE_UNKNOWN); 231 232 error = xrep_trans_commit(sc); 233 if (error) 234 goto out_ilock; 235 236 xfs_iflags_set(sc->tempip, XFS_IRECOVERY); 237 xfs_qm_dqdetach(sc->tempip); 238 out_ilock: 239 xrep_tempfile_iunlock(sc); 240 out_iolock: 241 xrep_tempfile_iounlock(sc); 242 return error; 243 } 244 245 /* 246 * Remove this temporary file from the metadata directory tree so that it can 247 * be inactivated the normal way. 248 */ 249 STATIC int 250 xrep_tempfile_remove_metadir( 251 struct xfs_scrub *sc) 252 { 253 int error; 254 255 if (!sc->tempip || !xfs_is_metadir_inode(sc->tempip)) 256 return 0; 257 258 ASSERT(sc->tp == NULL); 259 260 xfs_iflags_clear(sc->tempip, XFS_IRECOVERY); 261 262 xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL); 263 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; 264 265 error = xchk_trans_alloc(sc, 0); 266 if (error) 267 goto out_iolock; 268 269 xrep_tempfile_ilock(sc); 270 xfs_trans_ijoin(sc->tp, sc->tempip, 0); 271 272 xfs_metafile_clear_iflag(sc->tp, sc->tempip); 273 274 /* Non-metadir files are accounted in quota, so bump bcount/icount */ 275 error = xfs_qm_dqattach_locked(sc->tempip, false); 276 if (error) 277 goto out_cancel; 278 279 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, 1L); 280 xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_BCOUNT, 281 sc->tempip->i_nblocks); 282 error = xrep_trans_commit(sc); 283 goto out_ilock; 284 285 out_cancel: 286 xchk_trans_cancel(sc); 287 out_ilock: 288 xrep_tempfile_iunlock(sc); 289 out_iolock: 290 xrep_tempfile_iounlock(sc); 291 return error; 292 } 293 294 /* Take IOLOCK_EXCL on the temporary file, maybe. */ 295 bool 296 xrep_tempfile_iolock_nowait( 297 struct xfs_scrub *sc) 298 { 299 if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) { 300 sc->temp_ilock_flags |= XFS_IOLOCK_EXCL; 301 return true; 302 } 303 304 return false; 305 } 306 307 /* 308 * Take the temporary file's IOLOCK while holding a different inode's IOLOCK. 309 * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock 310 * to avoid deadlocks and lockdep complaints. 311 */ 312 int 313 xrep_tempfile_iolock_polled( 314 struct xfs_scrub *sc) 315 { 316 int error = 0; 317 318 while (!xrep_tempfile_iolock_nowait(sc)) { 319 if (xchk_should_terminate(sc, &error)) 320 return error; 321 delay(1); 322 } 323 324 return 0; 325 } 326 327 /* Release IOLOCK_EXCL on the temporary file. */ 328 void 329 xrep_tempfile_iounlock( 330 struct xfs_scrub *sc) 331 { 332 xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL); 333 sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL; 334 } 335 336 /* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */ 337 void 338 xrep_tempfile_ilock( 339 struct xfs_scrub *sc) 340 { 341 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 342 xfs_ilock(sc->tempip, XFS_ILOCK_EXCL); 343 } 344 345 /* Try to grab ILOCK_EXCL on the temporary file. */ 346 bool 347 xrep_tempfile_ilock_nowait( 348 struct xfs_scrub *sc) 349 { 350 if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) { 351 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 352 return true; 353 } 354 355 return false; 356 } 357 358 /* Unlock ILOCK_EXCL on the temporary file after an update. */ 359 void 360 xrep_tempfile_iunlock( 361 struct xfs_scrub *sc) 362 { 363 xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL); 364 sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL; 365 } 366 367 /* 368 * Begin the process of making changes to both the file being scrubbed and 369 * the temporary file by taking ILOCK_EXCL on both. 370 */ 371 void 372 xrep_tempfile_ilock_both( 373 struct xfs_scrub *sc) 374 { 375 xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL); 376 sc->ilock_flags |= XFS_ILOCK_EXCL; 377 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 378 } 379 380 /* Unlock ILOCK_EXCL on both files. */ 381 void 382 xrep_tempfile_iunlock_both( 383 struct xfs_scrub *sc) 384 { 385 xrep_tempfile_iunlock(sc); 386 xchk_iunlock(sc, XFS_ILOCK_EXCL); 387 } 388 389 /* Release the temporary file. */ 390 void 391 xrep_tempfile_rele( 392 struct xfs_scrub *sc) 393 { 394 if (!sc->tempip) 395 return; 396 397 if (sc->temp_ilock_flags) { 398 xfs_iunlock(sc->tempip, sc->temp_ilock_flags); 399 sc->temp_ilock_flags = 0; 400 } 401 402 xrep_tempfile_remove_metadir(sc); 403 xchk_irele(sc, sc->tempip); 404 sc->tempip = NULL; 405 } 406 407 /* 408 * Make sure that the given range of the data fork of the temporary file is 409 * mapped to written blocks. The caller must ensure that both inodes are 410 * joined to the transaction. 411 */ 412 int 413 xrep_tempfile_prealloc( 414 struct xfs_scrub *sc, 415 xfs_fileoff_t off, 416 xfs_filblks_t len) 417 { 418 struct xfs_bmbt_irec map; 419 xfs_fileoff_t end = off + len; 420 int error; 421 422 ASSERT(sc->tempip != NULL); 423 ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip)); 424 425 for (; off < end; off = map.br_startoff + map.br_blockcount) { 426 int nmaps = 1; 427 428 /* 429 * If we have a real extent mapping this block then we're 430 * in ok shape. 431 */ 432 error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps, 433 XFS_DATA_FORK); 434 if (error) 435 return error; 436 if (nmaps == 0) { 437 ASSERT(nmaps != 0); 438 return -EFSCORRUPTED; 439 } 440 441 if (xfs_bmap_is_written_extent(&map)) 442 continue; 443 444 /* 445 * If we find a delalloc reservation then something is very 446 * very wrong. Bail out. 447 */ 448 if (map.br_startblock == DELAYSTARTBLOCK) 449 return -EFSCORRUPTED; 450 451 /* 452 * Make sure this block has a real zeroed extent allocated to 453 * it. 454 */ 455 nmaps = 1; 456 error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off, 457 XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map, 458 &nmaps); 459 if (error) 460 return error; 461 if (nmaps != 1) 462 return -EFSCORRUPTED; 463 464 trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map); 465 466 /* Commit new extent and all deferred work. */ 467 error = xfs_defer_finish(&sc->tp); 468 if (error) 469 return error; 470 } 471 472 return 0; 473 } 474 475 /* 476 * Write data to each block of a file. The given range of the tempfile's data 477 * fork must already be populated with written extents. 478 */ 479 int 480 xrep_tempfile_copyin( 481 struct xfs_scrub *sc, 482 xfs_fileoff_t off, 483 xfs_filblks_t len, 484 xrep_tempfile_copyin_fn prep_fn, 485 void *data) 486 { 487 LIST_HEAD(buffers_list); 488 struct xfs_mount *mp = sc->mp; 489 struct xfs_buf *bp; 490 xfs_fileoff_t flush_mask; 491 xfs_fileoff_t end = off + len; 492 loff_t pos = XFS_FSB_TO_B(mp, off); 493 int error = 0; 494 495 ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode)); 496 497 /* Flush buffers to disk every 512K */ 498 flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1; 499 500 for (; off < end; off++, pos += mp->m_sb.sb_blocksize) { 501 struct xfs_bmbt_irec map; 502 int nmaps = 1; 503 504 /* Read block mapping for this file block. */ 505 error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0); 506 if (error) 507 goto out_err; 508 if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) { 509 error = -EFSCORRUPTED; 510 goto out_err; 511 } 512 513 /* Get the metadata buffer for this offset in the file. */ 514 error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp, 515 XFS_FSB_TO_DADDR(mp, map.br_startblock), 516 mp->m_bsize, 0, &bp); 517 if (error) 518 goto out_err; 519 520 trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map); 521 522 /* Read in a block's worth of data from the xfile. */ 523 error = prep_fn(sc, bp, data); 524 if (error) { 525 xfs_trans_brelse(sc->tp, bp); 526 goto out_err; 527 } 528 529 /* Queue buffer, and flush if we have too much dirty data. */ 530 xfs_buf_delwri_queue_here(bp, &buffers_list); 531 xfs_trans_brelse(sc->tp, bp); 532 533 if (!(off & flush_mask)) { 534 error = xfs_buf_delwri_submit(&buffers_list); 535 if (error) 536 goto out_err; 537 } 538 } 539 540 /* 541 * Write the new blocks to disk. If the ordered list isn't empty after 542 * that, then something went wrong and we have to fail. This should 543 * never happen, but we'll check anyway. 544 */ 545 error = xfs_buf_delwri_submit(&buffers_list); 546 if (error) 547 goto out_err; 548 549 if (!list_empty(&buffers_list)) { 550 ASSERT(list_empty(&buffers_list)); 551 error = -EIO; 552 goto out_err; 553 } 554 555 return 0; 556 557 out_err: 558 xfs_buf_delwri_cancel(&buffers_list); 559 return error; 560 } 561 562 /* 563 * Set the temporary file's size. Caller must join the tempfile to the scrub 564 * transaction and is responsible for adjusting block mappings as needed. 565 */ 566 int 567 xrep_tempfile_set_isize( 568 struct xfs_scrub *sc, 569 unsigned long long isize) 570 { 571 if (sc->tempip->i_disk_size == isize) 572 return 0; 573 574 sc->tempip->i_disk_size = isize; 575 i_size_write(VFS_I(sc->tempip), isize); 576 return xrep_tempfile_roll_trans(sc); 577 } 578 579 /* 580 * Roll a repair transaction involving the temporary file. Caller must join 581 * both the temporary file and the file being scrubbed to the transaction. 582 * This function return with both inodes joined to a new scrub transaction, 583 * or the usual negative errno. 584 */ 585 int 586 xrep_tempfile_roll_trans( 587 struct xfs_scrub *sc) 588 { 589 int error; 590 591 xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE); 592 error = xrep_roll_trans(sc); 593 if (error) 594 return error; 595 596 xfs_trans_ijoin(sc->tp, sc->tempip, 0); 597 return 0; 598 } 599 600 /* 601 * Fill out the mapping exchange request in preparation for atomically 602 * committing the contents of a metadata file that we've rebuilt in the temp 603 * file. 604 */ 605 STATIC int 606 xrep_tempexch_prep_request( 607 struct xfs_scrub *sc, 608 int whichfork, 609 struct xrep_tempexch *tx) 610 { 611 struct xfs_exchmaps_req *req = &tx->req; 612 613 memset(tx, 0, sizeof(struct xrep_tempexch)); 614 615 /* COW forks don't exist on disk. */ 616 if (whichfork == XFS_COW_FORK) { 617 ASSERT(0); 618 return -EINVAL; 619 } 620 621 /* Both files should have the relevant forks. */ 622 if (!xfs_ifork_ptr(sc->ip, whichfork) || 623 !xfs_ifork_ptr(sc->tempip, whichfork)) { 624 ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL); 625 ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL); 626 return -EINVAL; 627 } 628 629 /* Exchange all mappings in both forks. */ 630 req->ip1 = sc->tempip; 631 req->ip2 = sc->ip; 632 req->startoff1 = 0; 633 req->startoff2 = 0; 634 switch (whichfork) { 635 case XFS_ATTR_FORK: 636 req->flags |= XFS_EXCHMAPS_ATTR_FORK; 637 break; 638 case XFS_DATA_FORK: 639 /* Always exchange sizes when exchanging data fork mappings. */ 640 req->flags |= XFS_EXCHMAPS_SET_SIZES; 641 break; 642 } 643 req->blockcount = XFS_MAX_FILEOFF; 644 645 return 0; 646 } 647 648 /* 649 * Fill out the mapping exchange resource estimation structures in preparation 650 * for exchanging the contents of a metadata file that we've rebuilt in the 651 * temp file. Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files. 652 */ 653 STATIC int 654 xrep_tempexch_estimate( 655 struct xfs_scrub *sc, 656 struct xrep_tempexch *tx) 657 { 658 struct xfs_exchmaps_req *req = &tx->req; 659 struct xfs_ifork *ifp; 660 struct xfs_ifork *tifp; 661 int whichfork = xfs_exchmaps_reqfork(req); 662 int state = 0; 663 664 /* 665 * The exchmaps code only knows how to exchange file fork space 666 * mappings. Any fork data in local format must be promoted to a 667 * single block before the exchange can take place. 668 */ 669 ifp = xfs_ifork_ptr(sc->ip, whichfork); 670 if (ifp->if_format == XFS_DINODE_FMT_LOCAL) 671 state |= 1; 672 673 tifp = xfs_ifork_ptr(sc->tempip, whichfork); 674 if (tifp->if_format == XFS_DINODE_FMT_LOCAL) 675 state |= 2; 676 677 switch (state) { 678 case 0: 679 /* Both files have mapped extents; use the regular estimate. */ 680 return xfs_exchrange_estimate(req); 681 case 1: 682 /* 683 * The file being repaired is in local format, but the temp 684 * file has mapped extents. To perform the exchange, the file 685 * being repaired must have its shorform data converted to an 686 * ondisk block so that the forks will be in extents format. 687 * We need one resblk for the conversion; the number of 688 * exchanges is (worst case) the temporary file's extent count 689 * plus the block we converted. 690 */ 691 req->ip1_bcount = sc->tempip->i_nblocks; 692 req->ip2_bcount = 1; 693 req->nr_exchanges = 1 + tifp->if_nextents; 694 req->resblks = 1; 695 break; 696 case 2: 697 /* 698 * The temporary file is in local format, but the file being 699 * repaired has mapped extents. To perform the exchange, the 700 * temp file must have its shortform data converted to an 701 * ondisk block, and the fork changed to extents format. We 702 * need one resblk for the conversion; the number of exchanges 703 * is (worst case) the extent count of the file being repaired 704 * plus the block we converted. 705 */ 706 req->ip1_bcount = 1; 707 req->ip2_bcount = sc->ip->i_nblocks; 708 req->nr_exchanges = 1 + ifp->if_nextents; 709 req->resblks = 1; 710 break; 711 case 3: 712 /* 713 * Both forks are in local format. To perform the exchange, 714 * both files must have their shortform data converted to 715 * fsblocks, and both forks must be converted to extents 716 * format. We need two resblks for the two conversions, and 717 * the number of exchanges is 1 since there's only one block at 718 * fileoff 0. Presumably, the caller could not exchange the 719 * two inode fork areas directly. 720 */ 721 req->ip1_bcount = 1; 722 req->ip2_bcount = 1; 723 req->nr_exchanges = 1; 724 req->resblks = 2; 725 break; 726 } 727 728 return xfs_exchmaps_estimate_overhead(req); 729 } 730 731 /* 732 * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip 733 * this if quota enforcement is disabled or if both inodes' dquots are the 734 * same. The qretry structure must be initialized to zeroes before the first 735 * call to this function. 736 */ 737 STATIC int 738 xrep_tempexch_reserve_quota( 739 struct xfs_scrub *sc, 740 const struct xrep_tempexch *tx) 741 { 742 struct xfs_trans *tp = sc->tp; 743 const struct xfs_exchmaps_req *req = &tx->req; 744 int64_t ddelta, rdelta; 745 int error; 746 747 /* 748 * Don't bother with a quota reservation if we're not enforcing them 749 * or the two inodes have the same dquots. 750 */ 751 if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 || 752 (req->ip1->i_udquot == req->ip2->i_udquot && 753 req->ip1->i_gdquot == req->ip2->i_gdquot && 754 req->ip1->i_pdquot == req->ip2->i_pdquot)) 755 return 0; 756 757 /* 758 * Quota reservation for each file comes from two sources. First, we 759 * need to account for any net gain in mapped blocks during the 760 * exchange. Second, we need reservation for the gross gain in mapped 761 * blocks so that we don't trip over any quota block reservation 762 * assertions. We must reserve the gross gain because the quota code 763 * subtracts from bcount the number of blocks that we unmap; it does 764 * not add that quantity back to the quota block reservation. 765 */ 766 ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount); 767 rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount); 768 error = xfs_trans_reserve_quota_nblks(tp, req->ip1, 769 ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount, 770 true); 771 if (error) 772 return error; 773 774 ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount); 775 rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount); 776 return xfs_trans_reserve_quota_nblks(tp, req->ip2, 777 ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount, 778 true); 779 } 780 781 /* 782 * Prepare an existing transaction for an atomic file contents exchange. 783 * 784 * This function fills out the mapping exchange request and resource estimation 785 * structures in preparation for exchanging the contents of a metadata file 786 * that has been rebuilt in the temp file. Next, it reserves space and quota 787 * for the transaction. 788 * 789 * The caller must hold ILOCK_EXCL of the scrub target file and the temporary 790 * file. The caller must join both inodes to the transaction with no unlock 791 * flags, and is responsible for dropping both ILOCKs when appropriate. Only 792 * use this when those ILOCKs cannot be dropped. 793 */ 794 int 795 xrep_tempexch_trans_reserve( 796 struct xfs_scrub *sc, 797 int whichfork, 798 struct xrep_tempexch *tx) 799 { 800 int error; 801 802 ASSERT(sc->tp != NULL); 803 xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL); 804 xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL); 805 806 error = xrep_tempexch_prep_request(sc, whichfork, tx); 807 if (error) 808 return error; 809 810 error = xfs_exchmaps_estimate(&tx->req); 811 if (error) 812 return error; 813 814 error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0); 815 if (error) 816 return error; 817 818 return xrep_tempexch_reserve_quota(sc, tx); 819 } 820 821 /* 822 * Create a new transaction for a file contents exchange. 823 * 824 * This function fills out the mapping excahange request and resource 825 * estimation structures in preparation for exchanging the contents of a 826 * metadata file that has been rebuilt in the temp file. Next, it reserves 827 * space, takes ILOCK_EXCL of both inodes, joins them to the transaction and 828 * reserves quota for the transaction. 829 * 830 * The caller is responsible for dropping both ILOCKs when appropriate. 831 */ 832 int 833 xrep_tempexch_trans_alloc( 834 struct xfs_scrub *sc, 835 int whichfork, 836 struct xrep_tempexch *tx) 837 { 838 unsigned int flags = 0; 839 int error; 840 841 ASSERT(sc->tp == NULL); 842 ASSERT(xfs_has_exchange_range(sc->mp)); 843 844 error = xrep_tempexch_prep_request(sc, whichfork, tx); 845 if (error) 846 return error; 847 848 error = xrep_tempexch_estimate(sc, tx); 849 if (error) 850 return error; 851 852 if (xfs_has_lazysbcount(sc->mp)) 853 flags |= XFS_TRANS_RES_FDBLKS; 854 855 error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate, 856 tx->req.resblks, 0, flags, &sc->tp); 857 if (error) 858 return error; 859 860 sc->temp_ilock_flags |= XFS_ILOCK_EXCL; 861 sc->ilock_flags |= XFS_ILOCK_EXCL; 862 xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip); 863 864 return xrep_tempexch_reserve_quota(sc, tx); 865 } 866 867 /* 868 * Exchange file mappings (and hence file contents) between the file being 869 * repaired and the temporary file. Returns with both inodes locked and joined 870 * to a clean scrub transaction. 871 */ 872 int 873 xrep_tempexch_contents( 874 struct xfs_scrub *sc, 875 struct xrep_tempexch *tx) 876 { 877 int error; 878 879 ASSERT(xfs_has_exchange_range(sc->mp)); 880 881 xfs_exchange_mappings(sc->tp, &tx->req); 882 error = xfs_defer_finish(&sc->tp); 883 if (error) 884 return error; 885 886 /* 887 * If we exchanged the ondisk sizes of two metadata files, we must 888 * exchanged the incore sizes as well. 889 */ 890 if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) { 891 loff_t temp; 892 893 temp = i_size_read(VFS_I(sc->ip)); 894 i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip))); 895 i_size_write(VFS_I(sc->tempip), temp); 896 } 897 898 return 0; 899 } 900 901 /* 902 * Write local format data from one of the temporary file's forks into the same 903 * fork of file being repaired, and exchange the file sizes, if appropriate. 904 * Caller must ensure that the file being repaired has enough fork space to 905 * hold all the bytes. 906 */ 907 void 908 xrep_tempfile_copyout_local( 909 struct xfs_scrub *sc, 910 int whichfork) 911 { 912 struct xfs_ifork *temp_ifp; 913 struct xfs_ifork *ifp; 914 unsigned int ilog_flags = XFS_ILOG_CORE; 915 916 temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork); 917 ifp = xfs_ifork_ptr(sc->ip, whichfork); 918 919 ASSERT(temp_ifp != NULL); 920 ASSERT(ifp != NULL); 921 ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL); 922 ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL); 923 924 switch (whichfork) { 925 case XFS_DATA_FORK: 926 ASSERT(sc->tempip->i_disk_size <= 927 xfs_inode_data_fork_size(sc->ip)); 928 break; 929 case XFS_ATTR_FORK: 930 ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff); 931 break; 932 default: 933 ASSERT(0); 934 return; 935 } 936 937 /* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */ 938 xfs_idestroy_fork(ifp); 939 xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data, 940 temp_ifp->if_bytes); 941 942 if (whichfork == XFS_DATA_FORK) { 943 i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip))); 944 sc->ip->i_disk_size = sc->tempip->i_disk_size; 945 } 946 947 ilog_flags |= xfs_ilog_fdata(whichfork); 948 xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags); 949 } 950 951 /* Decide if a given XFS inode is a temporary file for a repair. */ 952 bool 953 xrep_is_tempfile( 954 const struct xfs_inode *ip) 955 { 956 const struct inode *inode = &ip->i_vnode; 957 struct xfs_mount *mp = ip->i_mount; 958 959 /* 960 * Files in the metadata directory tree also have S_PRIVATE set and 961 * IOP_XATTR unset, so we must distinguish them separately. We (ab)use 962 * the IRECOVERY flag to mark temporary metadir inodes knowing that the 963 * end of log recovery clears IRECOVERY, so the only ones that can 964 * exist during online repair are the ones we create. 965 */ 966 if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA)) 967 return __xfs_iflags_test(ip, XFS_IRECOVERY); 968 969 if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR)) 970 return true; 971 972 return false; 973 } 974