// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Copyright (C) 2020-2023 Oracle.  All Rights Reserved.
 * Author: Darrick J. Wong <djwong@kernel.org>
 */
#include "xfs.h"
#include "xfs_fs.h"
#include "xfs_shared.h"
#include "xfs_format.h"
#include "xfs_trans_resv.h"
#include "xfs_mount.h"
#include "xfs_btree.h"
#include "xfs_log_format.h"
#include "xfs_trans.h"
#include "xfs_rtalloc.h"
#include "xfs_inode.h"
#include "xfs_bit.h"
#include "xfs_bmap.h"
#include "xfs_bmap_btree.h"
#include "xfs_rmap.h"
#include "xfs_rtrmap_btree.h"
#include "xfs_exchmaps.h"
#include "xfs_rtbitmap.h"
#include "xfs_rtgroup.h"
#include "xfs_extent_busy.h"
#include "xfs_refcount.h"
#include "scrub/scrub.h"
#include "scrub/common.h"
#include "scrub/trace.h"
#include "scrub/repair.h"
#include "scrub/xfile.h"
#include "scrub/tempfile.h"
#include "scrub/tempexch.h"
#include "scrub/reap.h"
#include "scrub/rtbitmap.h"

/* rt bitmap content repairs */

/* Set up to repair the realtime bitmap for this group. */
int
xrep_setup_rtbitmap(
	struct xfs_scrub	*sc,
	struct xchk_rtbitmap	*rtb)
{
	struct xfs_mount	*mp = sc->mp;
	char			*descr;
	unsigned long long	blocks = mp->m_sb.sb_rbmblocks;
	int			error;

	error = xrep_tempfile_create(sc, S_IFREG);
	if (error)
		return error;

	/* Create an xfile to hold our reconstructed bitmap. */
	descr = xchk_xfile_rtgroup_descr(sc, "bitmap file");
	error = xfile_create(descr, blocks * mp->m_sb.sb_blocksize, &sc->xfile);
	kfree(descr);
	if (error)
		return error;

	/*
	 * Reserve enough blocks to write out a completely new bitmap file,
	 * plus twice as many blocks as we would need if we can only allocate
	 * one block per data fork mapping.  This should cover the
	 * preallocation of the temporary file and exchanging the extent
	 * mappings.
	 *
	 * We cannot use xfs_exchmaps_estimate because we have not yet
	 * constructed the replacement bitmap and therefore do not know how
	 * many extents it will use.  By the time we do, we will have a dirty
	 * transaction (which we cannot drop because we cannot drop the
	 * rtbitmap ILOCK) and cannot ask for more reservation.
	 */
	blocks += xfs_bmbt_calc_size(mp, blocks) * 2;
	if (blocks > UINT_MAX)
		return -EOPNOTSUPP;

	rtb->resblks += blocks;
	return 0;
}

static inline xrep_wordoff_t
rtx_to_wordoff(
	struct xfs_mount	*mp,
	xfs_rtxnum_t		rtx)
{
	return rtx >> XFS_NBWORDLOG;
}

static inline xrep_wordcnt_t
rtxlen_to_wordcnt(
	xfs_rtxlen_t	rtxlen)
{
	return rtxlen >> XFS_NBWORDLOG;
}

/* Helper functions to record rtwords in an xfile. */

static inline int
xfbmp_load(
	struct xchk_rtbitmap	*rtb,
	xrep_wordoff_t		wordoff,
	xfs_rtword_t		*word)
{
	union xfs_rtword_raw	urk;
	int			error;

	ASSERT(xfs_has_rtgroups(rtb->sc->mp));

	error = xfile_load(rtb->sc->xfile, &urk,
			sizeof(union xfs_rtword_raw),
			wordoff << XFS_WORDLOG);
	if (error)
		return error;

	*word = be32_to_cpu(urk.rtg);
	return 0;
}

static inline int
xfbmp_store(
	struct xchk_rtbitmap	*rtb,
	xrep_wordoff_t		wordoff,
	const xfs_rtword_t	word)
{
	union xfs_rtword_raw	urk;

	ASSERT(xfs_has_rtgroups(rtb->sc->mp));

	urk.rtg = cpu_to_be32(word);
	return xfile_store(rtb->sc->xfile, &urk,
			sizeof(union xfs_rtword_raw),
			wordoff << XFS_WORDLOG);
}

static inline int
xfbmp_copyin(
	struct xchk_rtbitmap	*rtb,
	xrep_wordoff_t		wordoff,
	const union xfs_rtword_raw	*word,
	xrep_wordcnt_t		nr_words)
{
	return xfile_store(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
			wordoff << XFS_WORDLOG);
}

static inline int
xfbmp_copyout(
	struct xchk_rtbitmap	*rtb,
	xrep_wordoff_t		wordoff,
	union xfs_rtword_raw	*word,
	xrep_wordcnt_t		nr_words)
{
	return xfile_load(rtb->sc->xfile, word, nr_words << XFS_WORDLOG,
			wordoff << XFS_WORDLOG);
}

/* Perform a logical OR operation on an rtword in the incore bitmap. */
static int
xrep_rtbitmap_or(
	struct xchk_rtbitmap	*rtb,
	xrep_wordoff_t		wordoff,
	xfs_rtword_t		mask)
{
	xfs_rtword_t		word;
	int			error;

	error = xfbmp_load(rtb, wordoff, &word);
	if (error)
		return error;

	trace_xrep_rtbitmap_or(rtb->sc->mp, wordoff, mask, word);

	return xfbmp_store(rtb, wordoff, word | mask);
}

/*
 * Mark as free every rt extent between the next rt block we expected to see
 * in the rtrmap records and the given rt block.
 */
STATIC int
xrep_rtbitmap_mark_free(
	struct xchk_rtbitmap	*rtb,
	xfs_rgblock_t		rgbno)
{
	struct xfs_mount	*mp = rtb->sc->mp;
	struct xchk_rt		*sr = &rtb->sc->sr;
	struct xfs_rtgroup	*rtg = sr->rtg;
	xfs_rtxnum_t		startrtx;
	xfs_rtxnum_t		nextrtx;
	xrep_wordoff_t		wordoff, nextwordoff;
	unsigned int		bit;
	unsigned int		bufwsize;
	xfs_extlen_t		mod;
	xfs_rtword_t		mask;
	enum xbtree_recpacking	outcome;
	int			error;

	if (!xfs_verify_rgbext(rtg, rtb->next_rgbno, rgbno - rtb->next_rgbno))
		return -EFSCORRUPTED;

	/*
	 * Convert rt blocks to rt extents  The block range we find must be
	 * aligned to an rtextent boundary on both ends.
	 */
	startrtx = xfs_rgbno_to_rtx(mp, rtb->next_rgbno);
	mod = xfs_rgbno_to_rtxoff(mp, rtb->next_rgbno);
	if (mod)
		return -EFSCORRUPTED;

	nextrtx = xfs_rgbno_to_rtx(mp, rgbno - 1) + 1;
	mod = xfs_rgbno_to_rtxoff(mp, rgbno - 1);
	if (mod != mp->m_sb.sb_rextsize - 1)
		return -EFSCORRUPTED;

	/* Must not be shared or CoW staging. */
	if (sr->refc_cur) {
		error = xfs_refcount_has_records(sr->refc_cur,
				XFS_REFC_DOMAIN_SHARED, rtb->next_rgbno,
				rgbno - rtb->next_rgbno, &outcome);
		if (error)
			return error;
		if (outcome != XBTREE_RECPACKING_EMPTY)
			return -EFSCORRUPTED;

		error = xfs_refcount_has_records(sr->refc_cur,
				XFS_REFC_DOMAIN_COW, rtb->next_rgbno,
				rgbno - rtb->next_rgbno, &outcome);
		if (error)
			return error;
		if (outcome != XBTREE_RECPACKING_EMPTY)
			return -EFSCORRUPTED;
	}

	trace_xrep_rtbitmap_record_free(mp, startrtx, nextrtx - 1);

	/* Set bits as needed to round startrtx up to the nearest word. */
	bit = startrtx & XREP_RTBMP_WORDMASK;
	if (bit) {
		xfs_rtblock_t	len = nextrtx - startrtx;
		unsigned int	lastbit;

		lastbit = min(bit + len, XFS_NBWORD);
		mask = (((xfs_rtword_t)1 << (lastbit - bit)) - 1) << bit;

		error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, startrtx),
				mask);
		if (error || lastbit - bit == len)
			return error;
		startrtx += XFS_NBWORD - bit;
	}

	/* Set bits as needed to round nextrtx down to the nearest word. */
	bit = nextrtx & XREP_RTBMP_WORDMASK;
	if (bit) {
		mask = ((xfs_rtword_t)1 << bit) - 1;

		error = xrep_rtbitmap_or(rtb, rtx_to_wordoff(mp, nextrtx),
				mask);
		if (error || startrtx + bit == nextrtx)
			return error;
		nextrtx -= bit;
	}

	trace_xrep_rtbitmap_record_free_bulk(mp, startrtx, nextrtx - 1);

	/* Set all the words in between, up to a whole fs block at once. */
	wordoff = rtx_to_wordoff(mp, startrtx);
	nextwordoff = rtx_to_wordoff(mp, nextrtx);
	bufwsize = mp->m_sb.sb_blocksize >> XFS_WORDLOG;

	while (wordoff < nextwordoff) {
		xrep_wordoff_t	rem;
		xrep_wordcnt_t	wordcnt;

		wordcnt = min_t(xrep_wordcnt_t, nextwordoff - wordoff,
				bufwsize);

		/*
		 * Try to keep us aligned to the rtwords buffer to reduce the
		 * number of xfile writes.
		 */
		rem = wordoff & (bufwsize - 1);
		if (rem)
			wordcnt = min_t(xrep_wordcnt_t, wordcnt,
					bufwsize - rem);

		error = xfbmp_copyin(rtb, wordoff, rtb->words, wordcnt);
		if (error)
			return error;

		wordoff += wordcnt;
	}

	return 0;
}

/* Set free space in the rtbitmap based on rtrmapbt records. */
STATIC int
xrep_rtbitmap_walk_rtrmap(
	struct xfs_btree_cur		*cur,
	const struct xfs_rmap_irec	*rec,
	void				*priv)
{
	struct xchk_rtbitmap		*rtb = priv;
	int				error = 0;

	if (xchk_should_terminate(rtb->sc, &error))
		return error;

	if (rtb->next_rgbno < rec->rm_startblock) {
		error = xrep_rtbitmap_mark_free(rtb, rec->rm_startblock);
		if (error)
			return error;
	}

	rtb->next_rgbno = max(rtb->next_rgbno,
			      rec->rm_startblock + rec->rm_blockcount);
	return 0;
}

/*
 * Walk the rtrmapbt to find all the gaps between records, and mark the gaps
 * in the realtime bitmap that we're computing.
 */
STATIC int
xrep_rtbitmap_find_freespace(
	struct xchk_rtbitmap	*rtb)
{
	struct xfs_scrub	*sc = rtb->sc;
	struct xfs_mount	*mp = sc->mp;
	struct xfs_rtgroup	*rtg = sc->sr.rtg;
	uint64_t		blockcount;
	int			error;

	/* Prepare a buffer of ones so that we can accelerate bulk setting. */
	memset(rtb->words, 0xFF, mp->m_sb.sb_blocksize);

	xrep_rtgroup_btcur_init(sc, &sc->sr);
	error = xfs_rmap_query_all(sc->sr.rmap_cur, xrep_rtbitmap_walk_rtrmap,
			rtb);
	if (error)
		goto out;

	/*
	 * Mark as free every possible rt extent from the last one we saw to
	 * the end of the rt group.
	 */
	blockcount = rtg->rtg_extents * mp->m_sb.sb_rextsize;
	if (rtb->next_rgbno < blockcount) {
		error = xrep_rtbitmap_mark_free(rtb, blockcount);
		if (error)
			goto out;
	}

out:
	xchk_rtgroup_btcur_free(&sc->sr);
	return error;
}

static int
xrep_rtbitmap_prep_buf(
	struct xfs_scrub	*sc,
	struct xfs_buf		*bp,
	void			*data)
{
	struct xchk_rtbitmap	*rtb = data;
	struct xfs_mount	*mp = sc->mp;
	union xfs_rtword_raw	*ondisk;
	int			error;

	rtb->args.mp = sc->mp;
	rtb->args.tp = sc->tp;
	rtb->args.rbmbp = bp;
	ondisk = xfs_rbmblock_wordptr(&rtb->args, 0);
	rtb->args.rbmbp = NULL;

	error = xfbmp_copyout(rtb, rtb->prep_wordoff, ondisk,
			mp->m_blockwsize);
	if (error)
		return error;

	if (xfs_has_rtgroups(sc->mp)) {
		struct xfs_rtbuf_blkinfo	*hdr = bp->b_addr;

		hdr->rt_magic = cpu_to_be32(XFS_RTBITMAP_MAGIC);
		hdr->rt_owner = cpu_to_be64(sc->ip->i_ino);
		hdr->rt_blkno = cpu_to_be64(xfs_buf_daddr(bp));
		hdr->rt_lsn = 0;
		uuid_copy(&hdr->rt_uuid, &sc->mp->m_sb.sb_meta_uuid);
		bp->b_ops = &xfs_rtbitmap_buf_ops;
	} else {
		bp->b_ops = &xfs_rtbuf_ops;
	}

	rtb->prep_wordoff += mp->m_blockwsize;
	xfs_trans_buf_set_type(sc->tp, bp, XFS_BLFT_RTBITMAP_BUF);
	return 0;
}

/*
 * Make sure that the given range of the data fork of the realtime file is
 * mapped to written blocks.  The caller must ensure that the inode is joined
 * to the transaction.
 */
STATIC int
xrep_rtbitmap_data_mappings(
	struct xfs_scrub	*sc,
	xfs_filblks_t		len)
{
	struct xfs_bmbt_irec	map;
	xfs_fileoff_t		off = 0;
	int			error;

	ASSERT(sc->ip != NULL);

	while (off < len) {
		int		nmaps = 1;

		/*
		 * If we have a real extent mapping this block then we're
		 * in ok shape.
		 */
		error = xfs_bmapi_read(sc->ip, off, len - off, &map, &nmaps,
				XFS_DATA_FORK);
		if (error)
			return error;
		if (nmaps == 0) {
			ASSERT(nmaps != 0);
			return -EFSCORRUPTED;
		}

		/*
		 * Written extents are ok.  Holes are not filled because we
		 * do not know the freespace information.
		 */
		if (xfs_bmap_is_written_extent(&map) ||
		    map.br_startblock == HOLESTARTBLOCK) {
			off = map.br_startoff + map.br_blockcount;
			continue;
		}

		/*
		 * If we find a delalloc reservation then something is very
		 * very wrong.  Bail out.
		 */
		if (map.br_startblock == DELAYSTARTBLOCK)
			return -EFSCORRUPTED;

		/* Make sure we're really converting an unwritten extent. */
		if (map.br_state != XFS_EXT_UNWRITTEN) {
			ASSERT(map.br_state == XFS_EXT_UNWRITTEN);
			return -EFSCORRUPTED;
		}

		/* Make sure this block has a real zeroed extent mapped. */
		nmaps = 1;
		error = xfs_bmapi_write(sc->tp, sc->ip, map.br_startoff,
				map.br_blockcount,
				XFS_BMAPI_CONVERT | XFS_BMAPI_ZERO,
				0, &map, &nmaps);
		if (error)
			return error;

		/* Commit new extent and all deferred work. */
		error = xrep_defer_finish(sc);
		if (error)
			return error;

		off = map.br_startoff + map.br_blockcount;
	}

	return 0;
}

/* Fix broken rt volume geometry. */
STATIC int
xrep_rtbitmap_geometry(
	struct xfs_scrub	*sc,
	struct xchk_rtbitmap	*rtb)
{
	struct xfs_mount	*mp = sc->mp;
	struct xfs_trans	*tp = sc->tp;

	/* Superblock fields */
	if (mp->m_sb.sb_rextents != rtb->rextents)
		xfs_trans_mod_sb(sc->tp, XFS_TRANS_SB_REXTENTS,
				rtb->rextents - mp->m_sb.sb_rextents);

	if (mp->m_sb.sb_rbmblocks != rtb->rbmblocks)
		xfs_trans_mod_sb(tp, XFS_TRANS_SB_RBMBLOCKS,
				rtb->rbmblocks - mp->m_sb.sb_rbmblocks);

	if (mp->m_sb.sb_rextslog != rtb->rextslog)
		xfs_trans_mod_sb(tp, XFS_TRANS_SB_REXTSLOG,
				rtb->rextslog - mp->m_sb.sb_rextslog);

	/* Fix broken isize */
	sc->ip->i_disk_size = roundup_64(sc->ip->i_disk_size,
					 mp->m_sb.sb_blocksize);

	if (sc->ip->i_disk_size < XFS_FSB_TO_B(mp, rtb->rbmblocks))
		sc->ip->i_disk_size = XFS_FSB_TO_B(mp, rtb->rbmblocks);

	xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE);
	return xrep_roll_trans(sc);
}

/* Repair the realtime bitmap file metadata. */
int
xrep_rtbitmap(
	struct xfs_scrub	*sc)
{
	struct xchk_rtbitmap	*rtb = sc->buf;
	struct xfs_mount	*mp = sc->mp;
	struct xfs_group	*xg = rtg_group(sc->sr.rtg);
	unsigned long long	blocks = 0;
	unsigned int		busy_gen;
	int			error;

	/* We require the realtime rmapbt to rebuild anything. */
	if (!xfs_has_rtrmapbt(sc->mp))
		return -EOPNOTSUPP;
	/* We require atomic file exchange range to rebuild anything. */
	if (!xfs_has_exchange_range(sc->mp))
		return -EOPNOTSUPP;

	/* Impossibly large rtbitmap means we can't touch the filesystem. */
	if (rtb->rbmblocks > U32_MAX)
		return 0;

	/*
	 * If the size of the rt bitmap file is larger than what we reserved,
	 * figure out if we need to adjust the block reservation in the
	 * transaction.
	 */
	blocks = xfs_bmbt_calc_size(mp, rtb->rbmblocks);
	if (blocks > UINT_MAX)
		return -EOPNOTSUPP;
	if (blocks > rtb->resblks) {
		error = xfs_trans_reserve_more(sc->tp, blocks, 0);
		if (error)
			return error;

		rtb->resblks += blocks;
	}

	/* Fix inode core and forks. */
	error = xrep_metadata_inode_forks(sc);
	if (error)
		return error;

	xfs_trans_ijoin(sc->tp, sc->ip, 0);

	/* Ensure no unwritten extents. */
	error = xrep_rtbitmap_data_mappings(sc, rtb->rbmblocks);
	if (error)
		return error;

	/*
	 * Fix inconsistent bitmap geometry.  This function returns with a
	 * clean scrub transaction.
	 */
	error = xrep_rtbitmap_geometry(sc, rtb);
	if (error)
		return error;

	/*
	 * Make sure the busy extent list is clear because we can't put extents
	 * on there twice.
	 */
	if (!xfs_extent_busy_list_empty(xg, &busy_gen)) {
		error = xfs_extent_busy_flush(sc->tp, xg, busy_gen, 0);
		if (error)
			return error;
	}

	/*
	 * Generate the new rtbitmap data.  We don't need the rtbmp information
	 * once this call is finished.
	 */
	error = xrep_rtbitmap_find_freespace(rtb);
	if (error)
		return error;

	/*
	 * Try to take ILOCK_EXCL of the temporary file.  We had better be the
	 * only ones holding onto this inode, but we can't block while holding
	 * the rtbitmap file's ILOCK_EXCL.
	 */
	while (!xrep_tempfile_ilock_nowait(sc)) {
		if (xchk_should_terminate(sc, &error))
			return error;
		delay(1);
	}

	/*
	 * Make sure we have space allocated for the part of the bitmap
	 * file that corresponds to this group.  We already joined sc->ip.
	 */
	xfs_trans_ijoin(sc->tp, sc->tempip, 0);
	error = xrep_tempfile_prealloc(sc, 0, rtb->rbmblocks);
	if (error)
		return error;

	/* Last chance to abort before we start committing fixes. */
	if (xchk_should_terminate(sc, &error))
		return error;

	/* Copy the bitmap file that we generated. */
	error = xrep_tempfile_copyin(sc, 0, rtb->rbmblocks,
			xrep_rtbitmap_prep_buf, rtb);
	if (error)
		return error;
	error = xrep_tempfile_set_isize(sc,
			XFS_FSB_TO_B(sc->mp, sc->mp->m_sb.sb_rbmblocks));
	if (error)
		return error;

	/*
	 * Now exchange the data fork contents.  We're done with the temporary
	 * buffer, so we can reuse it for the tempfile exchmaps information.
	 */
	error = xrep_tempexch_trans_reserve(sc, XFS_DATA_FORK, 0,
			rtb->rbmblocks, &rtb->tempexch);
	if (error)
		return error;

	error = xrep_tempexch_contents(sc, &rtb->tempexch);
	if (error)
		return error;

	/* Free the old rtbitmap blocks if they're not in use. */
	return xrep_reap_ifork(sc, sc->tempip, XFS_DATA_FORK);
}