xfs/scrub/tempfile.c

// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (c) 2021-2024 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_inode.h"
#include "xfs_ialloc.h"
#include "xfs_quota.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_trans_space.h"
#include "xfs_dir2.h"
#include "xfs_exchrange.h"
#include "xfs_exchmaps.h"
#include "xfs_defer.h"
#include "xfs_symlink_remote.h"
#include "xfs_metafile.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/repair.h"
#include "scrub/trace.h"
#include "scrub/tempfile.h"
#include "scrub/tempexch.h"
#include "scrub/xfile.h"

/*
 * Create a temporary file for reconstructing metadata, with the intention of
 * atomically exchanging the temporary file's contents with the file that's
 * being repaired.
 */
int
xrep_tempfile_create(
	struct xfs_scrub	*sc,
	uint16_t		mode)
{
	struct xfs_icreate_args	args = {
		.pip		= sc->mp->m_rootip,
		.mode		= mode,
		.flags		= XFS_ICREATE_TMPFILE | XFS_ICREATE_UNLINKABLE,
	};
	struct xfs_mount	*mp = sc->mp;
	struct xfs_trans	*tp = NULL;
	struct xfs_dquot	*udqp;
	struct xfs_dquot	*gdqp;
	struct xfs_dquot	*pdqp;
	struct xfs_trans_res	*tres;
	struct xfs_inode	*dp = mp->m_rootip;
	xfs_ino_t		ino;
	unsigned int		resblks;
	bool			is_dir = S_ISDIR(mode);
	int			error;

	if (xfs_is_shutdown(mp))
		return -EIO;
	if (xfs_is_readonly(mp))
		return -EROFS;

	ASSERT(sc->tp == NULL);
	ASSERT(sc->tempip == NULL);

	/*
	 * Make sure that we have allocated dquot(s) on disk.  The temporary
	 * inode should be completely root owned so that we don't fail due to
	 * quota limits.
	 */
	error = xfs_icreate_dqalloc(&args, &udqp, &gdqp, &pdqp);
	if (error)
		return error;

	if (is_dir) {
		resblks = xfs_mkdir_space_res(mp, 0);
		tres = &M_RES(mp)->tr_mkdir;
	} else {
		resblks = XFS_IALLOC_SPACE_RES(mp);
		tres = &M_RES(mp)->tr_create_tmpfile;
	}

	error = xfs_trans_alloc_icreate(mp, tres, udqp, gdqp, pdqp, resblks,
			&tp);
	if (error)
		goto out_release_dquots;

	/* Allocate inode, set up directory. */
	error = xfs_dialloc(&tp, &args, &ino);
	if (error)
		goto out_trans_cancel;
	error = xfs_icreate(tp, ino, &args, &sc->tempip);
	if (error)
		goto out_trans_cancel;

	/* We don't touch file data, so drop the realtime flags. */
	sc->tempip->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT);
	xfs_trans_log_inode(tp, sc->tempip, XFS_ILOG_CORE);

	/*
	 * Mark our temporary file as private so that LSMs and the ACL code
	 * don't try to add their own metadata or reason about these files.
	 * The file should never be exposed to userspace.
	 */
	VFS_I(sc->tempip)->i_flags |= S_PRIVATE;
	VFS_I(sc->tempip)->i_opflags &= ~IOP_XATTR;

	if (is_dir) {
		error = xfs_dir_init(tp, sc->tempip, dp);
		if (error)
			goto out_trans_cancel;
	} else if (S_ISLNK(VFS_I(sc->tempip)->i_mode)) {
		/*
		 * Initialize the temporary symlink with a meaningless target
		 * that won't trip the verifiers.  Repair must rewrite the
		 * target with meaningful content before swapping with the file
		 * being repaired.  A single-byte target will not write a
		 * remote target block, so the owner is irrelevant.
		 */
		error = xfs_symlink_write_target(tp, sc->tempip,
				sc->tempip->i_ino, ".", 1, 0, 0);
		if (error)
			goto out_trans_cancel;
	}

	/*
	 * Attach the dquot(s) to the inodes and modify them incore.
	 * These ids of the inode couldn't have changed since the new
	 * inode has been locked ever since it was created.
	 */
	xfs_qm_vop_create_dqattach(tp, sc->tempip, udqp, gdqp, pdqp);

	/*
	 * Put our temp file on the unlinked list so it's purged automatically.
	 * All file-based metadata being reconstructed using this file must be
	 * atomically exchanged with the original file because the contents
	 * here will be purged when the inode is dropped or log recovery cleans
	 * out the unlinked list.
	 */
	error = xfs_iunlink(tp, sc->tempip);
	if (error)
		goto out_trans_cancel;

	error = xfs_trans_commit(tp);
	if (error)
		goto out_release_inode;

	trace_xrep_tempfile_create(sc);

	xfs_qm_dqrele(udqp);
	xfs_qm_dqrele(gdqp);
	xfs_qm_dqrele(pdqp);

	/* Finish setting up the incore / vfs context. */
	xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
	xfs_setup_iops(sc->tempip);
	xfs_finish_inode_setup(sc->tempip);

	sc->temp_ilock_flags = 0;
	return error;

out_trans_cancel:
	xfs_trans_cancel(tp);
out_release_inode:
	/*
	 * Wait until after the current transaction is aborted to finish the
	 * setup of the inode and release the inode.  This prevents recursive
	 * transactions and deadlocks from xfs_inactive.
	 */
	if (sc->tempip) {
		xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
		xfs_finish_inode_setup(sc->tempip);
		xchk_irele(sc, sc->tempip);
	}
out_release_dquots:
	xfs_qm_dqrele(udqp);
	xfs_qm_dqrele(gdqp);
	xfs_qm_dqrele(pdqp);

	return error;
}

/*
 * Move sc->tempip from the regular directory tree to the metadata directory
 * tree if sc->ip is part of the metadata directory tree and tempip has an
 * eligible file mode.
 *
 * Temporary files have to be created before we even know which inode we're
 * going to scrub, so we assume that they will be part of the regular directory
 * tree.  If it turns out that we're actually scrubbing a file from the
 * metadata directory tree, we have to subtract the temp file from the root
 * dquots and detach the dquots prior to setting the METADATA iflag.  However,
 * the scrub setup functions grab sc->ip and create sc->tempip before we
 * actually get around to checking if the file mode is the right type for the
 * scrubber.
 */
int
xrep_tempfile_adjust_directory_tree(
	struct xfs_scrub	*sc)
{
	int			error;

	if (!sc->tempip)
		return 0;

	ASSERT(sc->tp == NULL);
	ASSERT(!xfs_is_metadir_inode(sc->tempip));

	if (!sc->ip || !xfs_is_metadir_inode(sc->ip))
		return 0;
	if (!S_ISDIR(VFS_I(sc->tempip)->i_mode) &&
	    !S_ISREG(VFS_I(sc->tempip)->i_mode))
		return 0;

	xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
	sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;

	error = xchk_trans_alloc(sc, 0);
	if (error)
		goto out_iolock;

	xrep_tempfile_ilock(sc);
	xfs_trans_ijoin(sc->tp, sc->tempip, 0);

	/* Metadir files are not accounted in quota, so drop icount */
	xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, -1L);
	xfs_metafile_set_iflag(sc->tp, sc->tempip, XFS_METAFILE_UNKNOWN);

	error = xrep_trans_commit(sc);
	if (error)
		goto out_ilock;

	xfs_iflags_set(sc->tempip, XFS_IRECOVERY);
	xfs_qm_dqdetach(sc->tempip);
out_ilock:
	xrep_tempfile_iunlock(sc);
out_iolock:
	xrep_tempfile_iounlock(sc);
	return error;
}

/*
 * Remove this temporary file from the metadata directory tree so that it can
 * be inactivated the normal way.
 */
STATIC int
xrep_tempfile_remove_metadir(
	struct xfs_scrub	*sc)
{
	int			error;

	if (!sc->tempip || !xfs_is_metadir_inode(sc->tempip))
		return 0;

	ASSERT(sc->tp == NULL);

	xfs_iflags_clear(sc->tempip, XFS_IRECOVERY);

	xfs_ilock(sc->tempip, XFS_IOLOCK_EXCL);
	sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;

	error = xchk_trans_alloc(sc, 0);
	if (error)
		goto out_iolock;

	xrep_tempfile_ilock(sc);
	xfs_trans_ijoin(sc->tp, sc->tempip, 0);

	xfs_metafile_clear_iflag(sc->tp, sc->tempip);

	/* Non-metadir files are accounted in quota, so bump bcount/icount */
	error = xfs_qm_dqattach_locked(sc->tempip, false);
	if (error)
		goto out_cancel;

	xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_ICOUNT, 1L);
	xfs_trans_mod_dquot_byino(sc->tp, sc->tempip, XFS_TRANS_DQ_BCOUNT,
			sc->tempip->i_nblocks);
	error = xrep_trans_commit(sc);
	goto out_ilock;

out_cancel:
	xchk_trans_cancel(sc);
out_ilock:
	xrep_tempfile_iunlock(sc);
out_iolock:
	xrep_tempfile_iounlock(sc);
	return error;
}

/* Take IOLOCK_EXCL on the temporary file, maybe. */
bool
xrep_tempfile_iolock_nowait(
	struct xfs_scrub	*sc)
{
	if (xfs_ilock_nowait(sc->tempip, XFS_IOLOCK_EXCL)) {
		sc->temp_ilock_flags |= XFS_IOLOCK_EXCL;
		return true;
	}

	return false;
}

/*
 * Take the temporary file's IOLOCK while holding a different inode's IOLOCK.
 * In theory nobody else should hold the tempfile's IOLOCK, but we use trylock
 * to avoid deadlocks and lockdep complaints.
 */
int
xrep_tempfile_iolock_polled(
	struct xfs_scrub	*sc)
{
	int			error = 0;

	while (!xrep_tempfile_iolock_nowait(sc)) {
		if (xchk_should_terminate(sc, &error))
			return error;
		delay(1);
	}

	return 0;
}

/* Release IOLOCK_EXCL on the temporary file. */
void
xrep_tempfile_iounlock(
	struct xfs_scrub	*sc)
{
	xfs_iunlock(sc->tempip, XFS_IOLOCK_EXCL);
	sc->temp_ilock_flags &= ~XFS_IOLOCK_EXCL;
}

/* Prepare the temporary file for metadata updates by grabbing ILOCK_EXCL. */
void
xrep_tempfile_ilock(
	struct xfs_scrub	*sc)
{
	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
	xfs_ilock(sc->tempip, XFS_ILOCK_EXCL);
}

/* Try to grab ILOCK_EXCL on the temporary file. */
bool
xrep_tempfile_ilock_nowait(
	struct xfs_scrub	*sc)
{
	if (xfs_ilock_nowait(sc->tempip, XFS_ILOCK_EXCL)) {
		sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
		return true;
	}

	return false;
}

/* Unlock ILOCK_EXCL on the temporary file after an update. */
void
xrep_tempfile_iunlock(
	struct xfs_scrub	*sc)
{
	xfs_iunlock(sc->tempip, XFS_ILOCK_EXCL);
	sc->temp_ilock_flags &= ~XFS_ILOCK_EXCL;
}

/*
 * Begin the process of making changes to both the file being scrubbed and
 * the temporary file by taking ILOCK_EXCL on both.
 */
void
xrep_tempfile_ilock_both(
	struct xfs_scrub	*sc)
{
	xfs_lock_two_inodes(sc->ip, XFS_ILOCK_EXCL, sc->tempip, XFS_ILOCK_EXCL);
	sc->ilock_flags |= XFS_ILOCK_EXCL;
	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
}

/* Unlock ILOCK_EXCL on both files. */
void
xrep_tempfile_iunlock_both(
	struct xfs_scrub	*sc)
{
	xrep_tempfile_iunlock(sc);
	xchk_iunlock(sc, XFS_ILOCK_EXCL);
}

/* Release the temporary file. */
void
xrep_tempfile_rele(
	struct xfs_scrub	*sc)
{
	if (!sc->tempip)
		return;

	if (sc->temp_ilock_flags) {
		xfs_iunlock(sc->tempip, sc->temp_ilock_flags);
		sc->temp_ilock_flags = 0;
	}

	xrep_tempfile_remove_metadir(sc);
	xchk_irele(sc, sc->tempip);
	sc->tempip = NULL;
}

/*
 * Make sure that the given range of the data fork of the temporary file is
 * mapped to written blocks.  The caller must ensure that both inodes are
 * joined to the transaction.
 */
int
xrep_tempfile_prealloc(
	struct xfs_scrub	*sc,
	xfs_fileoff_t		off,
	xfs_filblks_t		len)
{
	struct xfs_bmbt_irec	map;
	xfs_fileoff_t		end = off + len;
	int			error;

	ASSERT(sc->tempip != NULL);
	ASSERT(!XFS_NOT_DQATTACHED(sc->mp, sc->tempip));

	for (; off < end; off = map.br_startoff + map.br_blockcount) {
		int		nmaps = 1;

		/*
		 * If we have a real extent mapping this block then we're
		 * in ok shape.
		 */
		error = xfs_bmapi_read(sc->tempip, off, end - off, &map, &nmaps,
				XFS_DATA_FORK);
		if (error)
			return error;
		if (nmaps == 0) {
			ASSERT(nmaps != 0);
			return -EFSCORRUPTED;
		}

		if (xfs_bmap_is_written_extent(&map))
			continue;

		/*
		 * If we find a delalloc reservation then something is very
		 * very wrong.  Bail out.
		 */
		if (map.br_startblock == DELAYSTARTBLOCK)
			return -EFSCORRUPTED;

		/*
		 * Make sure this block has a real zeroed extent allocated to
		 * it.
		 */
		nmaps = 1;
		error = xfs_bmapi_write(sc->tp, sc->tempip, off, end - off,
				XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO, 0, &map,
				&nmaps);
		if (error)
			return error;
		if (nmaps != 1)
			return -EFSCORRUPTED;

		trace_xrep_tempfile_prealloc(sc, XFS_DATA_FORK, &map);

		/* Commit new extent and all deferred work. */
		error = xfs_defer_finish(&sc->tp);
		if (error)
			return error;
	}

	return 0;
}

/*
 * Write data to each block of a file.  The given range of the tempfile's data
 * fork must already be populated with written extents.
 */
int
xrep_tempfile_copyin(
	struct xfs_scrub	*sc,
	xfs_fileoff_t		off,
	xfs_filblks_t		len,
	xrep_tempfile_copyin_fn	prep_fn,
	void			*data)
{
	LIST_HEAD(buffers_list);
	struct xfs_mount	*mp = sc->mp;
	struct xfs_buf		*bp;
	xfs_fileoff_t		flush_mask;
	xfs_fileoff_t		end = off + len;
	loff_t			pos = XFS_FSB_TO_B(mp, off);
	int			error = 0;

	ASSERT(S_ISREG(VFS_I(sc->tempip)->i_mode));

	/* Flush buffers to disk every 512K */
	flush_mask = XFS_B_TO_FSBT(mp, (1U << 19)) - 1;

	for (; off < end; off++, pos += mp->m_sb.sb_blocksize) {
		struct xfs_bmbt_irec	map;
		int			nmaps = 1;

		/* Read block mapping for this file block. */
		error = xfs_bmapi_read(sc->tempip, off, 1, &map, &nmaps, 0);
		if (error)
			goto out_err;
		if (nmaps == 0 || !xfs_bmap_is_written_extent(&map)) {
			error = -EFSCORRUPTED;
			goto out_err;
		}

		/* Get the metadata buffer for this offset in the file. */
		error = xfs_trans_get_buf(sc->tp, mp->m_ddev_targp,
				XFS_FSB_TO_DADDR(mp, map.br_startblock),
				mp->m_bsize, 0, &bp);
		if (error)
			goto out_err;

		trace_xrep_tempfile_copyin(sc, XFS_DATA_FORK, &map);

		/* Read in a block's worth of data from the xfile. */
		error = prep_fn(sc, bp, data);
		if (error) {
			xfs_trans_brelse(sc->tp, bp);
			goto out_err;
		}

		/* Queue buffer, and flush if we have too much dirty data. */
		xfs_buf_delwri_queue_here(bp, &buffers_list);
		xfs_trans_brelse(sc->tp, bp);

		if (!(off & flush_mask)) {
			error = xfs_buf_delwri_submit(&buffers_list);
			if (error)
				goto out_err;
		}
	}

	/*
	 * Write the new blocks to disk.  If the ordered list isn't empty after
	 * that, then something went wrong and we have to fail.  This should
	 * never happen, but we'll check anyway.
	 */
	error = xfs_buf_delwri_submit(&buffers_list);
	if (error)
		goto out_err;

	if (!list_empty(&buffers_list)) {
		ASSERT(list_empty(&buffers_list));
		error = -EIO;
		goto out_err;
	}

	return 0;

out_err:
	xfs_buf_delwri_cancel(&buffers_list);
	return error;
}

/*
 * Set the temporary file's size.  Caller must join the tempfile to the scrub
 * transaction and is responsible for adjusting block mappings as needed.
 */
int
xrep_tempfile_set_isize(
	struct xfs_scrub	*sc,
	unsigned long long	isize)
{
	if (sc->tempip->i_disk_size == isize)
		return 0;

	sc->tempip->i_disk_size = isize;
	i_size_write(VFS_I(sc->tempip), isize);
	return xrep_tempfile_roll_trans(sc);
}

/*
 * Roll a repair transaction involving the temporary file.  Caller must join
 * both the temporary file and the file being scrubbed to the transaction.
 * This function return with both inodes joined to a new scrub transaction,
 * or the usual negative errno.
 */
int
xrep_tempfile_roll_trans(
	struct xfs_scrub	*sc)
{
	int			error;

	xfs_trans_log_inode(sc->tp, sc->tempip, XFS_ILOG_CORE);
	error = xrep_roll_trans(sc);
	if (error)
		return error;

	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
	return 0;
}

/*
 * Fill out the mapping exchange request in preparation for atomically
 * committing the contents of a metadata file that we've rebuilt in the temp
 * file.
 */
STATIC int
xrep_tempexch_prep_request(
	struct xfs_scrub	*sc,
	int			whichfork,
	xfs_fileoff_t		off,
	xfs_filblks_t		len,
	struct xrep_tempexch	*tx)
{
	struct xfs_exchmaps_req	*req = &tx->req;

	memset(tx, 0, sizeof(struct xrep_tempexch));

	/* COW forks don't exist on disk. */
	if (whichfork == XFS_COW_FORK) {
		ASSERT(0);
		return -EINVAL;
	}

	/* Both files should have the relevant forks. */
	if (!xfs_ifork_ptr(sc->ip, whichfork) ||
	    !xfs_ifork_ptr(sc->tempip, whichfork)) {
		ASSERT(xfs_ifork_ptr(sc->ip, whichfork) != NULL);
		ASSERT(xfs_ifork_ptr(sc->tempip, whichfork) != NULL);
		return -EINVAL;
	}

	/* Exchange all mappings in both forks. */
	req->ip1 = sc->tempip;
	req->ip2 = sc->ip;
	req->startoff1 = off;
	req->startoff2 = off;
	switch (whichfork) {
	case XFS_ATTR_FORK:
		req->flags |= XFS_EXCHMAPS_ATTR_FORK;
		break;
	case XFS_DATA_FORK:
		/* Exchange sizes when exchanging all data fork mappings. */
		if (off == 0 && len == XFS_MAX_FILEOFF)
			req->flags |= XFS_EXCHMAPS_SET_SIZES;
		break;
	}
	req->blockcount = len;

	return 0;
}

/*
 * Fill out the mapping exchange resource estimation structures in preparation
 * for exchanging the contents of a metadata file that we've rebuilt in the
 * temp file.  Caller must hold IOLOCK_EXCL but not ILOCK_EXCL on both files.
 */
STATIC int
xrep_tempexch_estimate(
	struct xfs_scrub	*sc,
	struct xrep_tempexch	*tx)
{
	struct xfs_exchmaps_req	*req = &tx->req;
	struct xfs_ifork	*ifp;
	struct xfs_ifork	*tifp;
	int			whichfork = xfs_exchmaps_reqfork(req);
	int			state = 0;

	/*
	 * The exchmaps code only knows how to exchange file fork space
	 * mappings.  Any fork data in local format must be promoted to a
	 * single block before the exchange can take place.
	 */
	ifp = xfs_ifork_ptr(sc->ip, whichfork);
	if (ifp->if_format == XFS_DINODE_FMT_LOCAL)
		state |= 1;

	tifp = xfs_ifork_ptr(sc->tempip, whichfork);
	if (tifp->if_format == XFS_DINODE_FMT_LOCAL)
		state |= 2;

	switch (state) {
	case 0:
		/* Both files have mapped extents; use the regular estimate. */
		return xfs_exchrange_estimate(req);
	case 1:
		/*
		 * The file being repaired is in local format, but the temp
		 * file has mapped extents.  To perform the exchange, the file
		 * being repaired must have its shorform data converted to an
		 * ondisk block so that the forks will be in extents format.
		 * We need one resblk for the conversion; the number of
		 * exchanges is (worst case) the temporary file's extent count
		 * plus the block we converted.
		 */
		req->ip1_bcount = sc->tempip->i_nblocks;
		req->ip2_bcount = 1;
		req->nr_exchanges = 1 + tifp->if_nextents;
		req->resblks = 1;
		break;
	case 2:
		/*
		 * The temporary file is in local format, but the file being
		 * repaired has mapped extents.  To perform the exchange, the
		 * temp file must have its shortform data converted to an
		 * ondisk block, and the fork changed to extents format.  We
		 * need one resblk for the conversion; the number of exchanges
		 * is (worst case) the extent count of the file being repaired
		 * plus the block we converted.
		 */
		req->ip1_bcount = 1;
		req->ip2_bcount = sc->ip->i_nblocks;
		req->nr_exchanges = 1 + ifp->if_nextents;
		req->resblks = 1;
		break;
	case 3:
		/*
		 * Both forks are in local format.  To perform the exchange,
		 * both files must have their shortform data converted to
		 * fsblocks, and both forks must be converted to extents
		 * format.  We need two resblks for the two conversions, and
		 * the number of exchanges is 1 since there's only one block at
		 * fileoff 0.  Presumably, the caller could not exchange the
		 * two inode fork areas directly.
		 */
		req->ip1_bcount = 1;
		req->ip2_bcount = 1;
		req->nr_exchanges = 1;
		req->resblks = 2;
		break;
	}

	return xfs_exchmaps_estimate_overhead(req);
}

/*
 * Obtain a quota reservation to make sure we don't hit EDQUOT.  We can skip
 * this if quota enforcement is disabled or if both inodes' dquots are the
 * same.  The qretry structure must be initialized to zeroes before the first
 * call to this function.
 */
STATIC int
xrep_tempexch_reserve_quota(
	struct xfs_scrub		*sc,
	const struct xrep_tempexch	*tx)
{
	struct xfs_trans		*tp = sc->tp;
	const struct xfs_exchmaps_req	*req = &tx->req;
	int64_t				ddelta, rdelta;
	int				error;

	/*
	 * Don't bother with a quota reservation if we're not enforcing them
	 * or the two inodes have the same dquots.
	 */
	if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 ||
	    xfs_is_metadir_inode(req->ip1) ||
	    (req->ip1->i_udquot == req->ip2->i_udquot &&
	     req->ip1->i_gdquot == req->ip2->i_gdquot &&
	     req->ip1->i_pdquot == req->ip2->i_pdquot))
		return 0;

	/*
	 * Quota reservation for each file comes from two sources.  First, we
	 * need to account for any net gain in mapped blocks during the
	 * exchange.  Second, we need reservation for the gross gain in mapped
	 * blocks so that we don't trip over any quota block reservation
	 * assertions.  We must reserve the gross gain because the quota code
	 * subtracts from bcount the number of blocks that we unmap; it does
	 * not add that quantity back to the quota block reservation.
	 */
	ddelta = max_t(int64_t, 0, req->ip2_bcount - req->ip1_bcount);
	rdelta = max_t(int64_t, 0, req->ip2_rtbcount - req->ip1_rtbcount);
	error = xfs_trans_reserve_quota_nblks(tp, req->ip1,
			ddelta + req->ip1_bcount, rdelta + req->ip1_rtbcount,
			true);
	if (error)
		return error;

	ddelta = max_t(int64_t, 0, req->ip1_bcount - req->ip2_bcount);
	rdelta = max_t(int64_t, 0, req->ip1_rtbcount - req->ip2_rtbcount);
	return xfs_trans_reserve_quota_nblks(tp, req->ip2,
			ddelta + req->ip2_bcount, rdelta + req->ip2_rtbcount,
			true);
}

/*
 * Prepare an existing transaction for an atomic file contents exchange.
 *
 * This function fills out the mapping exchange request and resource estimation
 * structures in preparation for exchanging the contents of a metadata file
 * that has been rebuilt in the temp file.  Next, it reserves space and quota
 * for the transaction.
 *
 * The caller must hold ILOCK_EXCL of the scrub target file and the temporary
 * file.  The caller must join both inodes to the transaction with no unlock
 * flags, and is responsible for dropping both ILOCKs when appropriate.  Only
 * use this when those ILOCKs cannot be dropped.
 */
int
xrep_tempexch_trans_reserve(
	struct xfs_scrub	*sc,
	int			whichfork,
	xfs_fileoff_t		off,
	xfs_filblks_t		len,
	struct xrep_tempexch	*tx)
{
	int			error;

	ASSERT(sc->tp != NULL);
	xfs_assert_ilocked(sc->ip, XFS_ILOCK_EXCL);
	xfs_assert_ilocked(sc->tempip, XFS_ILOCK_EXCL);

	error = xrep_tempexch_prep_request(sc, whichfork, off, len, tx);
	if (error)
		return error;

	error = xfs_exchmaps_estimate(&tx->req);
	if (error)
		return error;

	error = xfs_trans_reserve_more(sc->tp, tx->req.resblks, 0);
	if (error)
		return error;

	return xrep_tempexch_reserve_quota(sc, tx);
}

/*
 * Create a new transaction for a file contents exchange.
 *
 * This function fills out the mapping excahange request and resource
 * estimation structures in preparation for exchanging the contents of a
 * metadata file that has been rebuilt in the temp file.  Next, it reserves
 * space, takes ILOCK_EXCL of both inodes, joins them to the transaction and
 * reserves quota for the transaction.
 *
 * The caller is responsible for dropping both ILOCKs when appropriate.
 */
int
xrep_tempexch_trans_alloc(
	struct xfs_scrub	*sc,
	int			whichfork,
	struct xrep_tempexch	*tx)
{
	unsigned int		flags = 0;
	int			error;

	ASSERT(sc->tp == NULL);
	ASSERT(xfs_has_exchange_range(sc->mp));

	error = xrep_tempexch_prep_request(sc, whichfork, 0, XFS_MAX_FILEOFF,
			tx);
	if (error)
		return error;

	error = xrep_tempexch_estimate(sc, tx);
	if (error)
		return error;

	if (xfs_has_lazysbcount(sc->mp))
		flags |= XFS_TRANS_RES_FDBLKS;

	error = xfs_trans_alloc(sc->mp, &M_RES(sc->mp)->tr_itruncate,
			tx->req.resblks, 0, flags, &sc->tp);
	if (error)
		return error;

	sc->temp_ilock_flags |= XFS_ILOCK_EXCL;
	sc->ilock_flags |= XFS_ILOCK_EXCL;
	xfs_exchrange_ilock(sc->tp, sc->ip, sc->tempip);

	return xrep_tempexch_reserve_quota(sc, tx);
}

/*
 * Exchange file mappings (and hence file contents) between the file being
 * repaired and the temporary file.  Returns with both inodes locked and joined
 * to a clean scrub transaction.
 */
int
xrep_tempexch_contents(
	struct xfs_scrub	*sc,
	struct xrep_tempexch	*tx)
{
	int			error;

	ASSERT(xfs_has_exchange_range(sc->mp));

	xfs_exchange_mappings(sc->tp, &tx->req);
	error = xfs_defer_finish(&sc->tp);
	if (error)
		return error;

	/*
	 * If we exchanged the ondisk sizes of two metadata files, we must
	 * exchanged the incore sizes as well.
	 */
	if (tx->req.flags & XFS_EXCHMAPS_SET_SIZES) {
		loff_t	temp;

		temp = i_size_read(VFS_I(sc->ip));
		i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
		i_size_write(VFS_I(sc->tempip), temp);
	}

	return 0;
}

/*
 * Write local format data from one of the temporary file's forks into the same
 * fork of file being repaired, and exchange the file sizes, if appropriate.
 * Caller must ensure that the file being repaired has enough fork space to
 * hold all the bytes.
 */
void
xrep_tempfile_copyout_local(
	struct xfs_scrub	*sc,
	int			whichfork)
{
	struct xfs_ifork	*temp_ifp;
	struct xfs_ifork	*ifp;
	unsigned int		ilog_flags = XFS_ILOG_CORE;

	temp_ifp = xfs_ifork_ptr(sc->tempip, whichfork);
	ifp = xfs_ifork_ptr(sc->ip, whichfork);

	ASSERT(temp_ifp != NULL);
	ASSERT(ifp != NULL);
	ASSERT(temp_ifp->if_format == XFS_DINODE_FMT_LOCAL);
	ASSERT(ifp->if_format == XFS_DINODE_FMT_LOCAL);

	switch (whichfork) {
	case XFS_DATA_FORK:
		ASSERT(sc->tempip->i_disk_size <=
					xfs_inode_data_fork_size(sc->ip));
		break;
	case XFS_ATTR_FORK:
		ASSERT(sc->tempip->i_forkoff >= sc->ip->i_forkoff);
		break;
	default:
		ASSERT(0);
		return;
	}

	/* Recreate @sc->ip's incore fork (ifp) with data from temp_ifp. */
	xfs_idestroy_fork(ifp);
	xfs_init_local_fork(sc->ip, whichfork, temp_ifp->if_data,
			temp_ifp->if_bytes);

	if (whichfork == XFS_DATA_FORK) {
		i_size_write(VFS_I(sc->ip), i_size_read(VFS_I(sc->tempip)));
		sc->ip->i_disk_size = sc->tempip->i_disk_size;
	}

	ilog_flags |= xfs_ilog_fdata(whichfork);
	xfs_trans_log_inode(sc->tp, sc->ip, ilog_flags);
}

/* Decide if a given XFS inode is a temporary file for a repair. */
bool
xrep_is_tempfile(
	const struct xfs_inode	*ip)
{
	const struct inode	*inode = &ip->i_vnode;
	struct xfs_mount	*mp = ip->i_mount;

	/*
	 * Files in the metadata directory tree also have S_PRIVATE set and
	 * IOP_XATTR unset, so we must distinguish them separately.  We (ab)use
	 * the IRECOVERY flag to mark temporary metadir inodes knowing that the
	 * end of log recovery clears IRECOVERY, so the only ones that can
	 * exist during online repair are the ones we create.
	 */
	if (xfs_has_metadir(mp) && (ip->i_diflags2 & XFS_DIFLAG2_METADATA))
		return __xfs_iflags_test(ip, XFS_IRECOVERY);

	if (IS_PRIVATE(inode) && !(inode->i_opflags & IOP_XATTR))
		return true;

	return false;
}