xref: /linux/fs/xfs/xfs_bmap_util.c (revision 292378edcb408c652e841fdc867fc14f8b4995fa)
168988114SDave Chinner /*
268988114SDave Chinner  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3c24b5dfaSDave Chinner  * Copyright (c) 2012 Red Hat, Inc.
468988114SDave Chinner  * All Rights Reserved.
568988114SDave Chinner  *
668988114SDave Chinner  * This program is free software; you can redistribute it and/or
768988114SDave Chinner  * modify it under the terms of the GNU General Public License as
868988114SDave Chinner  * published by the Free Software Foundation.
968988114SDave Chinner  *
1068988114SDave Chinner  * This program is distributed in the hope that it would be useful,
1168988114SDave Chinner  * but WITHOUT ANY WARRANTY; without even the implied warranty of
1268988114SDave Chinner  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1368988114SDave Chinner  * GNU General Public License for more details.
1468988114SDave Chinner  *
1568988114SDave Chinner  * You should have received a copy of the GNU General Public License
1668988114SDave Chinner  * along with this program; if not, write the Free Software Foundation,
1768988114SDave Chinner  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
1868988114SDave Chinner  */
1968988114SDave Chinner #include "xfs.h"
2068988114SDave Chinner #include "xfs_fs.h"
2170a9883cSDave Chinner #include "xfs_shared.h"
22239880efSDave Chinner #include "xfs_format.h"
23239880efSDave Chinner #include "xfs_log_format.h"
24239880efSDave Chinner #include "xfs_trans_resv.h"
2568988114SDave Chinner #include "xfs_bit.h"
2668988114SDave Chinner #include "xfs_mount.h"
2757062787SDave Chinner #include "xfs_da_format.h"
283ab78df2SDarrick J. Wong #include "xfs_defer.h"
2968988114SDave Chinner #include "xfs_inode.h"
3068988114SDave Chinner #include "xfs_btree.h"
31239880efSDave Chinner #include "xfs_trans.h"
3268988114SDave Chinner #include "xfs_extfree_item.h"
3368988114SDave Chinner #include "xfs_alloc.h"
3468988114SDave Chinner #include "xfs_bmap.h"
3568988114SDave Chinner #include "xfs_bmap_util.h"
36a4fbe6abSDave Chinner #include "xfs_bmap_btree.h"
3768988114SDave Chinner #include "xfs_rtalloc.h"
3868988114SDave Chinner #include "xfs_error.h"
3968988114SDave Chinner #include "xfs_quota.h"
4068988114SDave Chinner #include "xfs_trans_space.h"
4168988114SDave Chinner #include "xfs_trace.h"
42c24b5dfaSDave Chinner #include "xfs_icache.h"
43239880efSDave Chinner #include "xfs_log.h"
449c194644SDarrick J. Wong #include "xfs_rmap_btree.h"
4568988114SDave Chinner 
4668988114SDave Chinner /* Kernel only BMAP related definitions and functions */
4768988114SDave Chinner 
4868988114SDave Chinner /*
4968988114SDave Chinner  * Convert the given file system block to a disk block.  We have to treat it
5068988114SDave Chinner  * differently based on whether the file is a real time file or not, because the
5168988114SDave Chinner  * bmap code does.
5268988114SDave Chinner  */
5368988114SDave Chinner xfs_daddr_t
5468988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
5568988114SDave Chinner {
5668988114SDave Chinner 	return (XFS_IS_REALTIME_INODE(ip) ? \
5768988114SDave Chinner 		 (xfs_daddr_t)XFS_FSB_TO_BB((ip)->i_mount, (fsb)) : \
5868988114SDave Chinner 		 XFS_FSB_TO_DADDR((ip)->i_mount, (fsb)));
5968988114SDave Chinner }
6068988114SDave Chinner 
6168988114SDave Chinner /*
623fbbbea3SDave Chinner  * Routine to zero an extent on disk allocated to the specific inode.
633fbbbea3SDave Chinner  *
643fbbbea3SDave Chinner  * The VFS functions take a linearised filesystem block offset, so we have to
653fbbbea3SDave Chinner  * convert the sparse xfs fsb to the right format first.
663fbbbea3SDave Chinner  * VFS types are real funky, too.
673fbbbea3SDave Chinner  */
683fbbbea3SDave Chinner int
693fbbbea3SDave Chinner xfs_zero_extent(
703fbbbea3SDave Chinner 	struct xfs_inode *ip,
713fbbbea3SDave Chinner 	xfs_fsblock_t	start_fsb,
723fbbbea3SDave Chinner 	xfs_off_t	count_fsb)
733fbbbea3SDave Chinner {
743fbbbea3SDave Chinner 	struct xfs_mount *mp = ip->i_mount;
753fbbbea3SDave Chinner 	xfs_daddr_t	sector = xfs_fsb_to_db(ip, start_fsb);
763fbbbea3SDave Chinner 	sector_t	block = XFS_BB_TO_FSBT(mp, sector);
773fbbbea3SDave Chinner 
783dc29161SMatthew Wilcox 	return blkdev_issue_zeroout(xfs_find_bdev_for_inode(VFS_I(ip)),
793dc29161SMatthew Wilcox 		block << (mp->m_super->s_blocksize_bits - 9),
803dc29161SMatthew Wilcox 		count_fsb << (mp->m_super->s_blocksize_bits - 9),
813dc29161SMatthew Wilcox 		GFP_NOFS, true);
823fbbbea3SDave Chinner }
833fbbbea3SDave Chinner 
8468988114SDave Chinner int
8568988114SDave Chinner xfs_bmap_rtalloc(
8668988114SDave Chinner 	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
8768988114SDave Chinner {
8868988114SDave Chinner 	xfs_alloctype_t	atype = 0;	/* type for allocation routines */
8968988114SDave Chinner 	int		error;		/* error return value */
9068988114SDave Chinner 	xfs_mount_t	*mp;		/* mount point structure */
9168988114SDave Chinner 	xfs_extlen_t	prod = 0;	/* product factor for allocators */
9268988114SDave Chinner 	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
9368988114SDave Chinner 	xfs_extlen_t	align;		/* minimum allocation alignment */
9468988114SDave Chinner 	xfs_rtblock_t	rtb;
9568988114SDave Chinner 
9668988114SDave Chinner 	mp = ap->ip->i_mount;
9768988114SDave Chinner 	align = xfs_get_extsz_hint(ap->ip);
9868988114SDave Chinner 	prod = align / mp->m_sb.sb_rextsize;
9968988114SDave Chinner 	error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
10068988114SDave Chinner 					align, 1, ap->eof, 0,
10168988114SDave Chinner 					ap->conv, &ap->offset, &ap->length);
10268988114SDave Chinner 	if (error)
10368988114SDave Chinner 		return error;
10468988114SDave Chinner 	ASSERT(ap->length);
10568988114SDave Chinner 	ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
10668988114SDave Chinner 
10768988114SDave Chinner 	/*
10868988114SDave Chinner 	 * If the offset & length are not perfectly aligned
10968988114SDave Chinner 	 * then kill prod, it will just get us in trouble.
11068988114SDave Chinner 	 */
11168988114SDave Chinner 	if (do_mod(ap->offset, align) || ap->length % align)
11268988114SDave Chinner 		prod = 1;
11368988114SDave Chinner 	/*
11468988114SDave Chinner 	 * Set ralen to be the actual requested length in rtextents.
11568988114SDave Chinner 	 */
11668988114SDave Chinner 	ralen = ap->length / mp->m_sb.sb_rextsize;
11768988114SDave Chinner 	/*
11868988114SDave Chinner 	 * If the old value was close enough to MAXEXTLEN that
11968988114SDave Chinner 	 * we rounded up to it, cut it back so it's valid again.
12068988114SDave Chinner 	 * Note that if it's a really large request (bigger than
12168988114SDave Chinner 	 * MAXEXTLEN), we don't hear about that number, and can't
12268988114SDave Chinner 	 * adjust the starting point to match it.
12368988114SDave Chinner 	 */
12468988114SDave Chinner 	if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
12568988114SDave Chinner 		ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
12668988114SDave Chinner 
12768988114SDave Chinner 	/*
1284b680afbSDave Chinner 	 * Lock out modifications to both the RT bitmap and summary inodes
12968988114SDave Chinner 	 */
130f4a0660dSDarrick J. Wong 	xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
13168988114SDave Chinner 	xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
132f4a0660dSDarrick J. Wong 	xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
1334b680afbSDave Chinner 	xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
13468988114SDave Chinner 
13568988114SDave Chinner 	/*
13668988114SDave Chinner 	 * If it's an allocation to an empty file at offset 0,
13768988114SDave Chinner 	 * pick an extent that will space things out in the rt area.
13868988114SDave Chinner 	 */
13968988114SDave Chinner 	if (ap->eof && ap->offset == 0) {
14068988114SDave Chinner 		xfs_rtblock_t uninitialized_var(rtx); /* realtime extent no */
14168988114SDave Chinner 
14268988114SDave Chinner 		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
14368988114SDave Chinner 		if (error)
14468988114SDave Chinner 			return error;
14568988114SDave Chinner 		ap->blkno = rtx * mp->m_sb.sb_rextsize;
14668988114SDave Chinner 	} else {
14768988114SDave Chinner 		ap->blkno = 0;
14868988114SDave Chinner 	}
14968988114SDave Chinner 
15068988114SDave Chinner 	xfs_bmap_adjacent(ap);
15168988114SDave Chinner 
15268988114SDave Chinner 	/*
15368988114SDave Chinner 	 * Realtime allocation, done through xfs_rtallocate_extent.
15468988114SDave Chinner 	 */
15568988114SDave Chinner 	atype = ap->blkno == 0 ?  XFS_ALLOCTYPE_ANY_AG : XFS_ALLOCTYPE_NEAR_BNO;
15668988114SDave Chinner 	do_div(ap->blkno, mp->m_sb.sb_rextsize);
15768988114SDave Chinner 	rtb = ap->blkno;
15868988114SDave Chinner 	ap->length = ralen;
15968988114SDave Chinner 	if ((error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
16068988114SDave Chinner 				&ralen, atype, ap->wasdel, prod, &rtb)))
16168988114SDave Chinner 		return error;
16268988114SDave Chinner 	if (rtb == NULLFSBLOCK && prod > 1 &&
16368988114SDave Chinner 	    (error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1,
16468988114SDave Chinner 					   ap->length, &ralen, atype,
16568988114SDave Chinner 					   ap->wasdel, 1, &rtb)))
16668988114SDave Chinner 		return error;
16768988114SDave Chinner 	ap->blkno = rtb;
16868988114SDave Chinner 	if (ap->blkno != NULLFSBLOCK) {
16968988114SDave Chinner 		ap->blkno *= mp->m_sb.sb_rextsize;
17068988114SDave Chinner 		ralen *= mp->m_sb.sb_rextsize;
17168988114SDave Chinner 		ap->length = ralen;
17268988114SDave Chinner 		ap->ip->i_d.di_nblocks += ralen;
17368988114SDave Chinner 		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
17468988114SDave Chinner 		if (ap->wasdel)
17568988114SDave Chinner 			ap->ip->i_delayed_blks -= ralen;
17668988114SDave Chinner 		/*
17768988114SDave Chinner 		 * Adjust the disk quota also. This was reserved
17868988114SDave Chinner 		 * earlier.
17968988114SDave Chinner 		 */
18068988114SDave Chinner 		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
18168988114SDave Chinner 			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
18268988114SDave Chinner 					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
1833fbbbea3SDave Chinner 
1843fbbbea3SDave Chinner 		/* Zero the extent if we were asked to do so */
185*292378edSDave Chinner 		if (ap->datatype & XFS_ALLOC_USERDATA_ZERO) {
1863fbbbea3SDave Chinner 			error = xfs_zero_extent(ap->ip, ap->blkno, ap->length);
1873fbbbea3SDave Chinner 			if (error)
1883fbbbea3SDave Chinner 				return error;
1893fbbbea3SDave Chinner 		}
19068988114SDave Chinner 	} else {
19168988114SDave Chinner 		ap->length = 0;
19268988114SDave Chinner 	}
19368988114SDave Chinner 	return 0;
19468988114SDave Chinner }
19568988114SDave Chinner 
19668988114SDave Chinner /*
19768988114SDave Chinner  * Check if the endoff is outside the last extent. If so the caller will grow
19868988114SDave Chinner  * the allocation to a stripe unit boundary.  All offsets are considered outside
19968988114SDave Chinner  * the end of file for an empty fork, so 1 is returned in *eof in that case.
20068988114SDave Chinner  */
20168988114SDave Chinner int
20268988114SDave Chinner xfs_bmap_eof(
20368988114SDave Chinner 	struct xfs_inode	*ip,
20468988114SDave Chinner 	xfs_fileoff_t		endoff,
20568988114SDave Chinner 	int			whichfork,
20668988114SDave Chinner 	int			*eof)
20768988114SDave Chinner {
20868988114SDave Chinner 	struct xfs_bmbt_irec	rec;
20968988114SDave Chinner 	int			error;
21068988114SDave Chinner 
21168988114SDave Chinner 	error = xfs_bmap_last_extent(NULL, ip, whichfork, &rec, eof);
21268988114SDave Chinner 	if (error || *eof)
21368988114SDave Chinner 		return error;
21468988114SDave Chinner 
21568988114SDave Chinner 	*eof = endoff >= rec.br_startoff + rec.br_blockcount;
21668988114SDave Chinner 	return 0;
21768988114SDave Chinner }
21868988114SDave Chinner 
21968988114SDave Chinner /*
22068988114SDave Chinner  * Extent tree block counting routines.
22168988114SDave Chinner  */
22268988114SDave Chinner 
22368988114SDave Chinner /*
22468988114SDave Chinner  * Count leaf blocks given a range of extent records.
22568988114SDave Chinner  */
22668988114SDave Chinner STATIC void
22768988114SDave Chinner xfs_bmap_count_leaves(
22868988114SDave Chinner 	xfs_ifork_t		*ifp,
22968988114SDave Chinner 	xfs_extnum_t		idx,
23068988114SDave Chinner 	int			numrecs,
23168988114SDave Chinner 	int			*count)
23268988114SDave Chinner {
23368988114SDave Chinner 	int		b;
23468988114SDave Chinner 
23568988114SDave Chinner 	for (b = 0; b < numrecs; b++) {
23668988114SDave Chinner 		xfs_bmbt_rec_host_t *frp = xfs_iext_get_ext(ifp, idx + b);
23768988114SDave Chinner 		*count += xfs_bmbt_get_blockcount(frp);
23868988114SDave Chinner 	}
23968988114SDave Chinner }
24068988114SDave Chinner 
24168988114SDave Chinner /*
24268988114SDave Chinner  * Count leaf blocks given a range of extent records originally
24368988114SDave Chinner  * in btree format.
24468988114SDave Chinner  */
24568988114SDave Chinner STATIC void
24668988114SDave Chinner xfs_bmap_disk_count_leaves(
24768988114SDave Chinner 	struct xfs_mount	*mp,
24868988114SDave Chinner 	struct xfs_btree_block	*block,
24968988114SDave Chinner 	int			numrecs,
25068988114SDave Chinner 	int			*count)
25168988114SDave Chinner {
25268988114SDave Chinner 	int		b;
25368988114SDave Chinner 	xfs_bmbt_rec_t	*frp;
25468988114SDave Chinner 
25568988114SDave Chinner 	for (b = 1; b <= numrecs; b++) {
25668988114SDave Chinner 		frp = XFS_BMBT_REC_ADDR(mp, block, b);
25768988114SDave Chinner 		*count += xfs_bmbt_disk_get_blockcount(frp);
25868988114SDave Chinner 	}
25968988114SDave Chinner }
26068988114SDave Chinner 
26168988114SDave Chinner /*
26268988114SDave Chinner  * Recursively walks each level of a btree
2638be11e92SZhi Yong Wu  * to count total fsblocks in use.
26468988114SDave Chinner  */
26568988114SDave Chinner STATIC int                                     /* error */
26668988114SDave Chinner xfs_bmap_count_tree(
26768988114SDave Chinner 	xfs_mount_t     *mp,            /* file system mount point */
26868988114SDave Chinner 	xfs_trans_t     *tp,            /* transaction pointer */
26968988114SDave Chinner 	xfs_ifork_t	*ifp,		/* inode fork pointer */
27068988114SDave Chinner 	xfs_fsblock_t   blockno,	/* file system block number */
27168988114SDave Chinner 	int             levelin,	/* level in btree */
27268988114SDave Chinner 	int		*count)		/* Count of blocks */
27368988114SDave Chinner {
27468988114SDave Chinner 	int			error;
27568988114SDave Chinner 	xfs_buf_t		*bp, *nbp;
27668988114SDave Chinner 	int			level = levelin;
27768988114SDave Chinner 	__be64			*pp;
27868988114SDave Chinner 	xfs_fsblock_t           bno = blockno;
27968988114SDave Chinner 	xfs_fsblock_t		nextbno;
28068988114SDave Chinner 	struct xfs_btree_block	*block, *nextblock;
28168988114SDave Chinner 	int			numrecs;
28268988114SDave Chinner 
28368988114SDave Chinner 	error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp, XFS_BMAP_BTREE_REF,
28468988114SDave Chinner 						&xfs_bmbt_buf_ops);
28568988114SDave Chinner 	if (error)
28668988114SDave Chinner 		return error;
28768988114SDave Chinner 	*count += 1;
28868988114SDave Chinner 	block = XFS_BUF_TO_BLOCK(bp);
28968988114SDave Chinner 
29068988114SDave Chinner 	if (--level) {
29168988114SDave Chinner 		/* Not at node above leaves, count this level of nodes */
29268988114SDave Chinner 		nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
29368988114SDave Chinner 		while (nextbno != NULLFSBLOCK) {
29468988114SDave Chinner 			error = xfs_btree_read_bufl(mp, tp, nextbno, 0, &nbp,
29568988114SDave Chinner 						XFS_BMAP_BTREE_REF,
29668988114SDave Chinner 						&xfs_bmbt_buf_ops);
29768988114SDave Chinner 			if (error)
29868988114SDave Chinner 				return error;
29968988114SDave Chinner 			*count += 1;
30068988114SDave Chinner 			nextblock = XFS_BUF_TO_BLOCK(nbp);
30168988114SDave Chinner 			nextbno = be64_to_cpu(nextblock->bb_u.l.bb_rightsib);
30268988114SDave Chinner 			xfs_trans_brelse(tp, nbp);
30368988114SDave Chinner 		}
30468988114SDave Chinner 
30568988114SDave Chinner 		/* Dive to the next level */
30668988114SDave Chinner 		pp = XFS_BMBT_PTR_ADDR(mp, block, 1, mp->m_bmap_dmxr[1]);
30768988114SDave Chinner 		bno = be64_to_cpu(*pp);
30868988114SDave Chinner 		if (unlikely((error =
30968988114SDave Chinner 		     xfs_bmap_count_tree(mp, tp, ifp, bno, level, count)) < 0)) {
31068988114SDave Chinner 			xfs_trans_brelse(tp, bp);
31168988114SDave Chinner 			XFS_ERROR_REPORT("xfs_bmap_count_tree(1)",
31268988114SDave Chinner 					 XFS_ERRLEVEL_LOW, mp);
3132451337dSDave Chinner 			return -EFSCORRUPTED;
31468988114SDave Chinner 		}
31568988114SDave Chinner 		xfs_trans_brelse(tp, bp);
31668988114SDave Chinner 	} else {
31768988114SDave Chinner 		/* count all level 1 nodes and their leaves */
31868988114SDave Chinner 		for (;;) {
31968988114SDave Chinner 			nextbno = be64_to_cpu(block->bb_u.l.bb_rightsib);
32068988114SDave Chinner 			numrecs = be16_to_cpu(block->bb_numrecs);
32168988114SDave Chinner 			xfs_bmap_disk_count_leaves(mp, block, numrecs, count);
32268988114SDave Chinner 			xfs_trans_brelse(tp, bp);
32368988114SDave Chinner 			if (nextbno == NULLFSBLOCK)
32468988114SDave Chinner 				break;
32568988114SDave Chinner 			bno = nextbno;
32668988114SDave Chinner 			error = xfs_btree_read_bufl(mp, tp, bno, 0, &bp,
32768988114SDave Chinner 						XFS_BMAP_BTREE_REF,
32868988114SDave Chinner 						&xfs_bmbt_buf_ops);
32968988114SDave Chinner 			if (error)
33068988114SDave Chinner 				return error;
33168988114SDave Chinner 			*count += 1;
33268988114SDave Chinner 			block = XFS_BUF_TO_BLOCK(bp);
33368988114SDave Chinner 		}
33468988114SDave Chinner 	}
33568988114SDave Chinner 	return 0;
33668988114SDave Chinner }
33768988114SDave Chinner 
33868988114SDave Chinner /*
33968988114SDave Chinner  * Count fsblocks of the given fork.
34068988114SDave Chinner  */
3410d5a75e9SEric Sandeen static int					/* error */
34268988114SDave Chinner xfs_bmap_count_blocks(
34368988114SDave Chinner 	xfs_trans_t		*tp,		/* transaction pointer */
34468988114SDave Chinner 	xfs_inode_t		*ip,		/* incore inode */
34568988114SDave Chinner 	int			whichfork,	/* data or attr fork */
34668988114SDave Chinner 	int			*count)		/* out: count of blocks */
34768988114SDave Chinner {
34868988114SDave Chinner 	struct xfs_btree_block	*block;	/* current btree block */
34968988114SDave Chinner 	xfs_fsblock_t		bno;	/* block # of "block" */
35068988114SDave Chinner 	xfs_ifork_t		*ifp;	/* fork structure */
35168988114SDave Chinner 	int			level;	/* btree level, for checking */
35268988114SDave Chinner 	xfs_mount_t		*mp;	/* file system mount structure */
35368988114SDave Chinner 	__be64			*pp;	/* pointer to block address */
35468988114SDave Chinner 
35568988114SDave Chinner 	bno = NULLFSBLOCK;
35668988114SDave Chinner 	mp = ip->i_mount;
35768988114SDave Chinner 	ifp = XFS_IFORK_PTR(ip, whichfork);
35868988114SDave Chinner 	if ( XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS ) {
35968988114SDave Chinner 		xfs_bmap_count_leaves(ifp, 0,
36068988114SDave Chinner 			ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t),
36168988114SDave Chinner 			count);
36268988114SDave Chinner 		return 0;
36368988114SDave Chinner 	}
36468988114SDave Chinner 
36568988114SDave Chinner 	/*
36668988114SDave Chinner 	 * Root level must use BMAP_BROOT_PTR_ADDR macro to get ptr out.
36768988114SDave Chinner 	 */
36868988114SDave Chinner 	block = ifp->if_broot;
36968988114SDave Chinner 	level = be16_to_cpu(block->bb_level);
37068988114SDave Chinner 	ASSERT(level > 0);
37168988114SDave Chinner 	pp = XFS_BMAP_BROOT_PTR_ADDR(mp, block, 1, ifp->if_broot_bytes);
37268988114SDave Chinner 	bno = be64_to_cpu(*pp);
373d5cf09baSChristoph Hellwig 	ASSERT(bno != NULLFSBLOCK);
37468988114SDave Chinner 	ASSERT(XFS_FSB_TO_AGNO(mp, bno) < mp->m_sb.sb_agcount);
37568988114SDave Chinner 	ASSERT(XFS_FSB_TO_AGBNO(mp, bno) < mp->m_sb.sb_agblocks);
37668988114SDave Chinner 
37768988114SDave Chinner 	if (unlikely(xfs_bmap_count_tree(mp, tp, ifp, bno, level, count) < 0)) {
37868988114SDave Chinner 		XFS_ERROR_REPORT("xfs_bmap_count_blocks(2)", XFS_ERRLEVEL_LOW,
37968988114SDave Chinner 				 mp);
3802451337dSDave Chinner 		return -EFSCORRUPTED;
38168988114SDave Chinner 	}
38268988114SDave Chinner 
38368988114SDave Chinner 	return 0;
38468988114SDave Chinner }
38568988114SDave Chinner 
38668988114SDave Chinner /*
38768988114SDave Chinner  * returns 1 for success, 0 if we failed to map the extent.
38868988114SDave Chinner  */
38968988114SDave Chinner STATIC int
39068988114SDave Chinner xfs_getbmapx_fix_eof_hole(
39168988114SDave Chinner 	xfs_inode_t		*ip,		/* xfs incore inode pointer */
39268988114SDave Chinner 	struct getbmapx		*out,		/* output structure */
39368988114SDave Chinner 	int			prealloced,	/* this is a file with
39468988114SDave Chinner 						 * preallocated data space */
39568988114SDave Chinner 	__int64_t		end,		/* last block requested */
39668988114SDave Chinner 	xfs_fsblock_t		startblock)
39768988114SDave Chinner {
39868988114SDave Chinner 	__int64_t		fixlen;
39968988114SDave Chinner 	xfs_mount_t		*mp;		/* file system mount point */
40068988114SDave Chinner 	xfs_ifork_t		*ifp;		/* inode fork pointer */
40168988114SDave Chinner 	xfs_extnum_t		lastx;		/* last extent pointer */
40268988114SDave Chinner 	xfs_fileoff_t		fileblock;
40368988114SDave Chinner 
40468988114SDave Chinner 	if (startblock == HOLESTARTBLOCK) {
40568988114SDave Chinner 		mp = ip->i_mount;
40668988114SDave Chinner 		out->bmv_block = -1;
40768988114SDave Chinner 		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
40868988114SDave Chinner 		fixlen -= out->bmv_offset;
40968988114SDave Chinner 		if (prealloced && out->bmv_offset + out->bmv_length == end) {
41068988114SDave Chinner 			/* Came to hole at EOF. Trim it. */
41168988114SDave Chinner 			if (fixlen <= 0)
41268988114SDave Chinner 				return 0;
41368988114SDave Chinner 			out->bmv_length = fixlen;
41468988114SDave Chinner 		}
41568988114SDave Chinner 	} else {
41668988114SDave Chinner 		if (startblock == DELAYSTARTBLOCK)
41768988114SDave Chinner 			out->bmv_block = -2;
41868988114SDave Chinner 		else
41968988114SDave Chinner 			out->bmv_block = xfs_fsb_to_db(ip, startblock);
42068988114SDave Chinner 		fileblock = XFS_BB_TO_FSB(ip->i_mount, out->bmv_offset);
42168988114SDave Chinner 		ifp = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
42268988114SDave Chinner 		if (xfs_iext_bno_to_ext(ifp, fileblock, &lastx) &&
42368988114SDave Chinner 		   (lastx == (ifp->if_bytes / (uint)sizeof(xfs_bmbt_rec_t))-1))
42468988114SDave Chinner 			out->bmv_oflags |= BMV_OF_LAST;
42568988114SDave Chinner 	}
42668988114SDave Chinner 
42768988114SDave Chinner 	return 1;
42868988114SDave Chinner }
42968988114SDave Chinner 
43068988114SDave Chinner /*
43168988114SDave Chinner  * Get inode's extents as described in bmv, and format for output.
43268988114SDave Chinner  * Calls formatter to fill the user's buffer until all extents
43368988114SDave Chinner  * are mapped, until the passed-in bmv->bmv_count slots have
43468988114SDave Chinner  * been filled, or until the formatter short-circuits the loop,
43568988114SDave Chinner  * if it is tracking filled-in extents on its own.
43668988114SDave Chinner  */
43768988114SDave Chinner int						/* error code */
43868988114SDave Chinner xfs_getbmap(
43968988114SDave Chinner 	xfs_inode_t		*ip,
44068988114SDave Chinner 	struct getbmapx		*bmv,		/* user bmap structure */
44168988114SDave Chinner 	xfs_bmap_format_t	formatter,	/* format to user */
44268988114SDave Chinner 	void			*arg)		/* formatter arg */
44368988114SDave Chinner {
44468988114SDave Chinner 	__int64_t		bmvend;		/* last block requested */
44568988114SDave Chinner 	int			error = 0;	/* return value */
44668988114SDave Chinner 	__int64_t		fixlen;		/* length for -1 case */
44768988114SDave Chinner 	int			i;		/* extent number */
44868988114SDave Chinner 	int			lock;		/* lock state */
44968988114SDave Chinner 	xfs_bmbt_irec_t		*map;		/* buffer for user's data */
45068988114SDave Chinner 	xfs_mount_t		*mp;		/* file system mount point */
45168988114SDave Chinner 	int			nex;		/* # of user extents can do */
45268988114SDave Chinner 	int			nexleft;	/* # of user extents left */
45368988114SDave Chinner 	int			subnex;		/* # of bmapi's can do */
45468988114SDave Chinner 	int			nmap;		/* number of map entries */
45568988114SDave Chinner 	struct getbmapx		*out;		/* output structure */
45668988114SDave Chinner 	int			whichfork;	/* data or attr fork */
45768988114SDave Chinner 	int			prealloced;	/* this is a file with
45868988114SDave Chinner 						 * preallocated data space */
45968988114SDave Chinner 	int			iflags;		/* interface flags */
46068988114SDave Chinner 	int			bmapi_flags;	/* flags for xfs_bmapi */
46168988114SDave Chinner 	int			cur_ext = 0;
46268988114SDave Chinner 
46368988114SDave Chinner 	mp = ip->i_mount;
46468988114SDave Chinner 	iflags = bmv->bmv_iflags;
46568988114SDave Chinner 	whichfork = iflags & BMV_IF_ATTRFORK ? XFS_ATTR_FORK : XFS_DATA_FORK;
46668988114SDave Chinner 
46768988114SDave Chinner 	if (whichfork == XFS_ATTR_FORK) {
46868988114SDave Chinner 		if (XFS_IFORK_Q(ip)) {
46968988114SDave Chinner 			if (ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS &&
47068988114SDave Chinner 			    ip->i_d.di_aformat != XFS_DINODE_FMT_BTREE &&
47168988114SDave Chinner 			    ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)
4722451337dSDave Chinner 				return -EINVAL;
47368988114SDave Chinner 		} else if (unlikely(
47468988114SDave Chinner 			   ip->i_d.di_aformat != 0 &&
47568988114SDave Chinner 			   ip->i_d.di_aformat != XFS_DINODE_FMT_EXTENTS)) {
47668988114SDave Chinner 			XFS_ERROR_REPORT("xfs_getbmap", XFS_ERRLEVEL_LOW,
47768988114SDave Chinner 					 ip->i_mount);
4782451337dSDave Chinner 			return -EFSCORRUPTED;
47968988114SDave Chinner 		}
48068988114SDave Chinner 
48168988114SDave Chinner 		prealloced = 0;
48268988114SDave Chinner 		fixlen = 1LL << 32;
48368988114SDave Chinner 	} else {
48468988114SDave Chinner 		if (ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS &&
48568988114SDave Chinner 		    ip->i_d.di_format != XFS_DINODE_FMT_BTREE &&
48668988114SDave Chinner 		    ip->i_d.di_format != XFS_DINODE_FMT_LOCAL)
4872451337dSDave Chinner 			return -EINVAL;
48868988114SDave Chinner 
48968988114SDave Chinner 		if (xfs_get_extsz_hint(ip) ||
49068988114SDave Chinner 		    ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC|XFS_DIFLAG_APPEND)){
49168988114SDave Chinner 			prealloced = 1;
49268988114SDave Chinner 			fixlen = mp->m_super->s_maxbytes;
49368988114SDave Chinner 		} else {
49468988114SDave Chinner 			prealloced = 0;
49568988114SDave Chinner 			fixlen = XFS_ISIZE(ip);
49668988114SDave Chinner 		}
49768988114SDave Chinner 	}
49868988114SDave Chinner 
49968988114SDave Chinner 	if (bmv->bmv_length == -1) {
50068988114SDave Chinner 		fixlen = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, fixlen));
50168988114SDave Chinner 		bmv->bmv_length =
50268988114SDave Chinner 			max_t(__int64_t, fixlen - bmv->bmv_offset, 0);
50368988114SDave Chinner 	} else if (bmv->bmv_length == 0) {
50468988114SDave Chinner 		bmv->bmv_entries = 0;
50568988114SDave Chinner 		return 0;
50668988114SDave Chinner 	} else if (bmv->bmv_length < 0) {
5072451337dSDave Chinner 		return -EINVAL;
50868988114SDave Chinner 	}
50968988114SDave Chinner 
51068988114SDave Chinner 	nex = bmv->bmv_count - 1;
51168988114SDave Chinner 	if (nex <= 0)
5122451337dSDave Chinner 		return -EINVAL;
51368988114SDave Chinner 	bmvend = bmv->bmv_offset + bmv->bmv_length;
51468988114SDave Chinner 
51568988114SDave Chinner 
51668988114SDave Chinner 	if (bmv->bmv_count > ULONG_MAX / sizeof(struct getbmapx))
5172451337dSDave Chinner 		return -ENOMEM;
518fdd3cceeSDave Chinner 	out = kmem_zalloc_large(bmv->bmv_count * sizeof(struct getbmapx), 0);
51968988114SDave Chinner 	if (!out)
5202451337dSDave Chinner 		return -ENOMEM;
52168988114SDave Chinner 
52268988114SDave Chinner 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
523efa70be1SChristoph Hellwig 	if (whichfork == XFS_DATA_FORK) {
524efa70be1SChristoph Hellwig 		if (!(iflags & BMV_IF_DELALLOC) &&
525efa70be1SChristoph Hellwig 		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
5262451337dSDave Chinner 			error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
52768988114SDave Chinner 			if (error)
52868988114SDave Chinner 				goto out_unlock_iolock;
529efa70be1SChristoph Hellwig 
53068988114SDave Chinner 			/*
531efa70be1SChristoph Hellwig 			 * Even after flushing the inode, there can still be
532efa70be1SChristoph Hellwig 			 * delalloc blocks on the inode beyond EOF due to
533efa70be1SChristoph Hellwig 			 * speculative preallocation.  These are not removed
534efa70be1SChristoph Hellwig 			 * until the release function is called or the inode
535efa70be1SChristoph Hellwig 			 * is inactivated.  Hence we cannot assert here that
536efa70be1SChristoph Hellwig 			 * ip->i_delayed_blks == 0.
53768988114SDave Chinner 			 */
53868988114SDave Chinner 		}
53968988114SDave Chinner 
540309ecac8SChristoph Hellwig 		lock = xfs_ilock_data_map_shared(ip);
541efa70be1SChristoph Hellwig 	} else {
542efa70be1SChristoph Hellwig 		lock = xfs_ilock_attr_map_shared(ip);
543efa70be1SChristoph Hellwig 	}
54468988114SDave Chinner 
54568988114SDave Chinner 	/*
54668988114SDave Chinner 	 * Don't let nex be bigger than the number of extents
54768988114SDave Chinner 	 * we can have assuming alternating holes and real extents.
54868988114SDave Chinner 	 */
54968988114SDave Chinner 	if (nex > XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1)
55068988114SDave Chinner 		nex = XFS_IFORK_NEXTENTS(ip, whichfork) * 2 + 1;
55168988114SDave Chinner 
55268988114SDave Chinner 	bmapi_flags = xfs_bmapi_aflag(whichfork);
55368988114SDave Chinner 	if (!(iflags & BMV_IF_PREALLOC))
55468988114SDave Chinner 		bmapi_flags |= XFS_BMAPI_IGSTATE;
55568988114SDave Chinner 
55668988114SDave Chinner 	/*
55768988114SDave Chinner 	 * Allocate enough space to handle "subnex" maps at a time.
55868988114SDave Chinner 	 */
5592451337dSDave Chinner 	error = -ENOMEM;
56068988114SDave Chinner 	subnex = 16;
56168988114SDave Chinner 	map = kmem_alloc(subnex * sizeof(*map), KM_MAYFAIL | KM_NOFS);
56268988114SDave Chinner 	if (!map)
56368988114SDave Chinner 		goto out_unlock_ilock;
56468988114SDave Chinner 
56568988114SDave Chinner 	bmv->bmv_entries = 0;
56668988114SDave Chinner 
56768988114SDave Chinner 	if (XFS_IFORK_NEXTENTS(ip, whichfork) == 0 &&
56868988114SDave Chinner 	    (whichfork == XFS_ATTR_FORK || !(iflags & BMV_IF_DELALLOC))) {
56968988114SDave Chinner 		error = 0;
57068988114SDave Chinner 		goto out_free_map;
57168988114SDave Chinner 	}
57268988114SDave Chinner 
57368988114SDave Chinner 	nexleft = nex;
57468988114SDave Chinner 
57568988114SDave Chinner 	do {
57668988114SDave Chinner 		nmap = (nexleft > subnex) ? subnex : nexleft;
57768988114SDave Chinner 		error = xfs_bmapi_read(ip, XFS_BB_TO_FSBT(mp, bmv->bmv_offset),
57868988114SDave Chinner 				       XFS_BB_TO_FSB(mp, bmv->bmv_length),
57968988114SDave Chinner 				       map, &nmap, bmapi_flags);
58068988114SDave Chinner 		if (error)
58168988114SDave Chinner 			goto out_free_map;
58268988114SDave Chinner 		ASSERT(nmap <= subnex);
58368988114SDave Chinner 
58468988114SDave Chinner 		for (i = 0; i < nmap && nexleft && bmv->bmv_length; i++) {
58568988114SDave Chinner 			out[cur_ext].bmv_oflags = 0;
58668988114SDave Chinner 			if (map[i].br_state == XFS_EXT_UNWRITTEN)
58768988114SDave Chinner 				out[cur_ext].bmv_oflags |= BMV_OF_PREALLOC;
58868988114SDave Chinner 			else if (map[i].br_startblock == DELAYSTARTBLOCK)
58968988114SDave Chinner 				out[cur_ext].bmv_oflags |= BMV_OF_DELALLOC;
59068988114SDave Chinner 			out[cur_ext].bmv_offset =
59168988114SDave Chinner 				XFS_FSB_TO_BB(mp, map[i].br_startoff);
59268988114SDave Chinner 			out[cur_ext].bmv_length =
59368988114SDave Chinner 				XFS_FSB_TO_BB(mp, map[i].br_blockcount);
59468988114SDave Chinner 			out[cur_ext].bmv_unused1 = 0;
59568988114SDave Chinner 			out[cur_ext].bmv_unused2 = 0;
59668988114SDave Chinner 
59768988114SDave Chinner 			/*
59868988114SDave Chinner 			 * delayed allocation extents that start beyond EOF can
59968988114SDave Chinner 			 * occur due to speculative EOF allocation when the
60068988114SDave Chinner 			 * delalloc extent is larger than the largest freespace
60168988114SDave Chinner 			 * extent at conversion time. These extents cannot be
60268988114SDave Chinner 			 * converted by data writeback, so can exist here even
60368988114SDave Chinner 			 * if we are not supposed to be finding delalloc
60468988114SDave Chinner 			 * extents.
60568988114SDave Chinner 			 */
60668988114SDave Chinner 			if (map[i].br_startblock == DELAYSTARTBLOCK &&
60768988114SDave Chinner 			    map[i].br_startoff <= XFS_B_TO_FSB(mp, XFS_ISIZE(ip)))
60868988114SDave Chinner 				ASSERT((iflags & BMV_IF_DELALLOC) != 0);
60968988114SDave Chinner 
61068988114SDave Chinner                         if (map[i].br_startblock == HOLESTARTBLOCK &&
61168988114SDave Chinner 			    whichfork == XFS_ATTR_FORK) {
61268988114SDave Chinner 				/* came to the end of attribute fork */
61368988114SDave Chinner 				out[cur_ext].bmv_oflags |= BMV_OF_LAST;
61468988114SDave Chinner 				goto out_free_map;
61568988114SDave Chinner 			}
61668988114SDave Chinner 
61768988114SDave Chinner 			if (!xfs_getbmapx_fix_eof_hole(ip, &out[cur_ext],
61868988114SDave Chinner 					prealloced, bmvend,
61968988114SDave Chinner 					map[i].br_startblock))
62068988114SDave Chinner 				goto out_free_map;
62168988114SDave Chinner 
62268988114SDave Chinner 			bmv->bmv_offset =
62368988114SDave Chinner 				out[cur_ext].bmv_offset +
62468988114SDave Chinner 				out[cur_ext].bmv_length;
62568988114SDave Chinner 			bmv->bmv_length =
62668988114SDave Chinner 				max_t(__int64_t, 0, bmvend - bmv->bmv_offset);
62768988114SDave Chinner 
62868988114SDave Chinner 			/*
62968988114SDave Chinner 			 * In case we don't want to return the hole,
63068988114SDave Chinner 			 * don't increase cur_ext so that we can reuse
63168988114SDave Chinner 			 * it in the next loop.
63268988114SDave Chinner 			 */
63368988114SDave Chinner 			if ((iflags & BMV_IF_NO_HOLES) &&
63468988114SDave Chinner 			    map[i].br_startblock == HOLESTARTBLOCK) {
63568988114SDave Chinner 				memset(&out[cur_ext], 0, sizeof(out[cur_ext]));
63668988114SDave Chinner 				continue;
63768988114SDave Chinner 			}
63868988114SDave Chinner 
63968988114SDave Chinner 			nexleft--;
64068988114SDave Chinner 			bmv->bmv_entries++;
64168988114SDave Chinner 			cur_ext++;
64268988114SDave Chinner 		}
64368988114SDave Chinner 	} while (nmap && nexleft && bmv->bmv_length);
64468988114SDave Chinner 
64568988114SDave Chinner  out_free_map:
64668988114SDave Chinner 	kmem_free(map);
64768988114SDave Chinner  out_unlock_ilock:
64801f4f327SChristoph Hellwig 	xfs_iunlock(ip, lock);
64968988114SDave Chinner  out_unlock_iolock:
65068988114SDave Chinner 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
65168988114SDave Chinner 
65268988114SDave Chinner 	for (i = 0; i < cur_ext; i++) {
65368988114SDave Chinner 		int full = 0;	/* user array is full */
65468988114SDave Chinner 
65568988114SDave Chinner 		/* format results & advance arg */
65668988114SDave Chinner 		error = formatter(&arg, &out[i], &full);
65768988114SDave Chinner 		if (error || full)
65868988114SDave Chinner 			break;
65968988114SDave Chinner 	}
66068988114SDave Chinner 
66168988114SDave Chinner 	kmem_free(out);
66268988114SDave Chinner 	return error;
66368988114SDave Chinner }
66468988114SDave Chinner 
66568988114SDave Chinner /*
66668988114SDave Chinner  * dead simple method of punching delalyed allocation blocks from a range in
66768988114SDave Chinner  * the inode. Walks a block at a time so will be slow, but is only executed in
668ad4809bfSZhi Yong Wu  * rare error cases so the overhead is not critical. This will always punch out
66968988114SDave Chinner  * both the start and end blocks, even if the ranges only partially overlap
67068988114SDave Chinner  * them, so it is up to the caller to ensure that partial blocks are not
67168988114SDave Chinner  * passed in.
67268988114SDave Chinner  */
67368988114SDave Chinner int
67468988114SDave Chinner xfs_bmap_punch_delalloc_range(
67568988114SDave Chinner 	struct xfs_inode	*ip,
67668988114SDave Chinner 	xfs_fileoff_t		start_fsb,
67768988114SDave Chinner 	xfs_fileoff_t		length)
67868988114SDave Chinner {
67968988114SDave Chinner 	xfs_fileoff_t		remaining = length;
68068988114SDave Chinner 	int			error = 0;
68168988114SDave Chinner 
68268988114SDave Chinner 	ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
68368988114SDave Chinner 
68468988114SDave Chinner 	do {
68568988114SDave Chinner 		int		done;
68668988114SDave Chinner 		xfs_bmbt_irec_t	imap;
68768988114SDave Chinner 		int		nimaps = 1;
68868988114SDave Chinner 		xfs_fsblock_t	firstblock;
6892c3234d1SDarrick J. Wong 		struct xfs_defer_ops dfops;
69068988114SDave Chinner 
69168988114SDave Chinner 		/*
69268988114SDave Chinner 		 * Map the range first and check that it is a delalloc extent
69368988114SDave Chinner 		 * before trying to unmap the range. Otherwise we will be
69468988114SDave Chinner 		 * trying to remove a real extent (which requires a
69568988114SDave Chinner 		 * transaction) or a hole, which is probably a bad idea...
69668988114SDave Chinner 		 */
69768988114SDave Chinner 		error = xfs_bmapi_read(ip, start_fsb, 1, &imap, &nimaps,
69868988114SDave Chinner 				       XFS_BMAPI_ENTIRE);
69968988114SDave Chinner 
70068988114SDave Chinner 		if (error) {
70168988114SDave Chinner 			/* something screwed, just bail */
70268988114SDave Chinner 			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
70368988114SDave Chinner 				xfs_alert(ip->i_mount,
70468988114SDave Chinner 			"Failed delalloc mapping lookup ino %lld fsb %lld.",
70568988114SDave Chinner 						ip->i_ino, start_fsb);
70668988114SDave Chinner 			}
70768988114SDave Chinner 			break;
70868988114SDave Chinner 		}
70968988114SDave Chinner 		if (!nimaps) {
71068988114SDave Chinner 			/* nothing there */
71168988114SDave Chinner 			goto next_block;
71268988114SDave Chinner 		}
71368988114SDave Chinner 		if (imap.br_startblock != DELAYSTARTBLOCK) {
71468988114SDave Chinner 			/* been converted, ignore */
71568988114SDave Chinner 			goto next_block;
71668988114SDave Chinner 		}
71768988114SDave Chinner 		WARN_ON(imap.br_blockcount == 0);
71868988114SDave Chinner 
71968988114SDave Chinner 		/*
7202c3234d1SDarrick J. Wong 		 * Note: while we initialise the firstblock/dfops pair, they
72168988114SDave Chinner 		 * should never be used because blocks should never be
72268988114SDave Chinner 		 * allocated or freed for a delalloc extent and hence we need
72368988114SDave Chinner 		 * don't cancel or finish them after the xfs_bunmapi() call.
72468988114SDave Chinner 		 */
7252c3234d1SDarrick J. Wong 		xfs_defer_init(&dfops, &firstblock);
72668988114SDave Chinner 		error = xfs_bunmapi(NULL, ip, start_fsb, 1, 0, 1, &firstblock,
7272c3234d1SDarrick J. Wong 					&dfops, &done);
72868988114SDave Chinner 		if (error)
72968988114SDave Chinner 			break;
73068988114SDave Chinner 
7312c3234d1SDarrick J. Wong 		ASSERT(!xfs_defer_has_unfinished_work(&dfops));
73268988114SDave Chinner next_block:
73368988114SDave Chinner 		start_fsb++;
73468988114SDave Chinner 		remaining--;
73568988114SDave Chinner 	} while(remaining > 0);
73668988114SDave Chinner 
73768988114SDave Chinner 	return error;
73868988114SDave Chinner }
739c24b5dfaSDave Chinner 
740c24b5dfaSDave Chinner /*
741c24b5dfaSDave Chinner  * Test whether it is appropriate to check an inode for and free post EOF
742c24b5dfaSDave Chinner  * blocks. The 'force' parameter determines whether we should also consider
743c24b5dfaSDave Chinner  * regular files that are marked preallocated or append-only.
744c24b5dfaSDave Chinner  */
745c24b5dfaSDave Chinner bool
746c24b5dfaSDave Chinner xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
747c24b5dfaSDave Chinner {
748c24b5dfaSDave Chinner 	/* prealloc/delalloc exists only on regular files */
749c19b3b05SDave Chinner 	if (!S_ISREG(VFS_I(ip)->i_mode))
750c24b5dfaSDave Chinner 		return false;
751c24b5dfaSDave Chinner 
752c24b5dfaSDave Chinner 	/*
753c24b5dfaSDave Chinner 	 * Zero sized files with no cached pages and delalloc blocks will not
754c24b5dfaSDave Chinner 	 * have speculative prealloc/delalloc blocks to remove.
755c24b5dfaSDave Chinner 	 */
756c24b5dfaSDave Chinner 	if (VFS_I(ip)->i_size == 0 &&
7572667c6f9SDave Chinner 	    VFS_I(ip)->i_mapping->nrpages == 0 &&
758c24b5dfaSDave Chinner 	    ip->i_delayed_blks == 0)
759c24b5dfaSDave Chinner 		return false;
760c24b5dfaSDave Chinner 
761c24b5dfaSDave Chinner 	/* If we haven't read in the extent list, then don't do it now. */
762c24b5dfaSDave Chinner 	if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
763c24b5dfaSDave Chinner 		return false;
764c24b5dfaSDave Chinner 
765c24b5dfaSDave Chinner 	/*
766c24b5dfaSDave Chinner 	 * Do not free real preallocated or append-only files unless the file
767c24b5dfaSDave Chinner 	 * has delalloc blocks and we are forced to remove them.
768c24b5dfaSDave Chinner 	 */
769c24b5dfaSDave Chinner 	if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
770c24b5dfaSDave Chinner 		if (!force || ip->i_delayed_blks == 0)
771c24b5dfaSDave Chinner 			return false;
772c24b5dfaSDave Chinner 
773c24b5dfaSDave Chinner 	return true;
774c24b5dfaSDave Chinner }
775c24b5dfaSDave Chinner 
776c24b5dfaSDave Chinner /*
777c24b5dfaSDave Chinner  * This is called by xfs_inactive to free any blocks beyond eof
778c24b5dfaSDave Chinner  * when the link count isn't zero and by xfs_dm_punch_hole() when
779c24b5dfaSDave Chinner  * punching a hole to EOF.
780c24b5dfaSDave Chinner  */
781c24b5dfaSDave Chinner int
782c24b5dfaSDave Chinner xfs_free_eofblocks(
783c24b5dfaSDave Chinner 	xfs_mount_t	*mp,
784c24b5dfaSDave Chinner 	xfs_inode_t	*ip,
785c24b5dfaSDave Chinner 	bool		need_iolock)
786c24b5dfaSDave Chinner {
787c24b5dfaSDave Chinner 	xfs_trans_t	*tp;
788c24b5dfaSDave Chinner 	int		error;
789c24b5dfaSDave Chinner 	xfs_fileoff_t	end_fsb;
790c24b5dfaSDave Chinner 	xfs_fileoff_t	last_fsb;
791c24b5dfaSDave Chinner 	xfs_filblks_t	map_len;
792c24b5dfaSDave Chinner 	int		nimaps;
793c24b5dfaSDave Chinner 	xfs_bmbt_irec_t	imap;
794c24b5dfaSDave Chinner 
795c24b5dfaSDave Chinner 	/*
796c24b5dfaSDave Chinner 	 * Figure out if there are any blocks beyond the end
797c24b5dfaSDave Chinner 	 * of the file.  If not, then there is nothing to do.
798c24b5dfaSDave Chinner 	 */
799c24b5dfaSDave Chinner 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
800c24b5dfaSDave Chinner 	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
801c24b5dfaSDave Chinner 	if (last_fsb <= end_fsb)
802c24b5dfaSDave Chinner 		return 0;
803c24b5dfaSDave Chinner 	map_len = last_fsb - end_fsb;
804c24b5dfaSDave Chinner 
805c24b5dfaSDave Chinner 	nimaps = 1;
806c24b5dfaSDave Chinner 	xfs_ilock(ip, XFS_ILOCK_SHARED);
807c24b5dfaSDave Chinner 	error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
808c24b5dfaSDave Chinner 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
809c24b5dfaSDave Chinner 
810c24b5dfaSDave Chinner 	if (!error && (nimaps != 0) &&
811c24b5dfaSDave Chinner 	    (imap.br_startblock != HOLESTARTBLOCK ||
812c24b5dfaSDave Chinner 	     ip->i_delayed_blks)) {
813c24b5dfaSDave Chinner 		/*
814c24b5dfaSDave Chinner 		 * Attach the dquots to the inode up front.
815c24b5dfaSDave Chinner 		 */
816c24b5dfaSDave Chinner 		error = xfs_qm_dqattach(ip, 0);
817c24b5dfaSDave Chinner 		if (error)
818c24b5dfaSDave Chinner 			return error;
819c24b5dfaSDave Chinner 
820c24b5dfaSDave Chinner 		/*
821c24b5dfaSDave Chinner 		 * There are blocks after the end of file.
822c24b5dfaSDave Chinner 		 * Free them up now by truncating the file to
823c24b5dfaSDave Chinner 		 * its current size.
824c24b5dfaSDave Chinner 		 */
825c24b5dfaSDave Chinner 		if (need_iolock) {
826253f4911SChristoph Hellwig 			if (!xfs_ilock_nowait(ip, XFS_IOLOCK_EXCL))
8272451337dSDave Chinner 				return -EAGAIN;
828c24b5dfaSDave Chinner 		}
829c24b5dfaSDave Chinner 
830253f4911SChristoph Hellwig 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
831253f4911SChristoph Hellwig 				&tp);
832c24b5dfaSDave Chinner 		if (error) {
833c24b5dfaSDave Chinner 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
834c24b5dfaSDave Chinner 			if (need_iolock)
835c24b5dfaSDave Chinner 				xfs_iunlock(ip, XFS_IOLOCK_EXCL);
836c24b5dfaSDave Chinner 			return error;
837c24b5dfaSDave Chinner 		}
838c24b5dfaSDave Chinner 
839c24b5dfaSDave Chinner 		xfs_ilock(ip, XFS_ILOCK_EXCL);
840c24b5dfaSDave Chinner 		xfs_trans_ijoin(tp, ip, 0);
841c24b5dfaSDave Chinner 
842c24b5dfaSDave Chinner 		/*
843c24b5dfaSDave Chinner 		 * Do not update the on-disk file size.  If we update the
844c24b5dfaSDave Chinner 		 * on-disk file size and then the system crashes before the
845c24b5dfaSDave Chinner 		 * contents of the file are flushed to disk then the files
846c24b5dfaSDave Chinner 		 * may be full of holes (ie NULL files bug).
847c24b5dfaSDave Chinner 		 */
848c24b5dfaSDave Chinner 		error = xfs_itruncate_extents(&tp, ip, XFS_DATA_FORK,
849c24b5dfaSDave Chinner 					      XFS_ISIZE(ip));
850c24b5dfaSDave Chinner 		if (error) {
851c24b5dfaSDave Chinner 			/*
852c24b5dfaSDave Chinner 			 * If we get an error at this point we simply don't
853c24b5dfaSDave Chinner 			 * bother truncating the file.
854c24b5dfaSDave Chinner 			 */
8554906e215SChristoph Hellwig 			xfs_trans_cancel(tp);
856c24b5dfaSDave Chinner 		} else {
85770393313SChristoph Hellwig 			error = xfs_trans_commit(tp);
858c24b5dfaSDave Chinner 			if (!error)
859c24b5dfaSDave Chinner 				xfs_inode_clear_eofblocks_tag(ip);
860c24b5dfaSDave Chinner 		}
861c24b5dfaSDave Chinner 
862c24b5dfaSDave Chinner 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
863c24b5dfaSDave Chinner 		if (need_iolock)
864c24b5dfaSDave Chinner 			xfs_iunlock(ip, XFS_IOLOCK_EXCL);
865c24b5dfaSDave Chinner 	}
866c24b5dfaSDave Chinner 	return error;
867c24b5dfaSDave Chinner }
868c24b5dfaSDave Chinner 
86983aee9e4SChristoph Hellwig int
870c24b5dfaSDave Chinner xfs_alloc_file_space(
87183aee9e4SChristoph Hellwig 	struct xfs_inode	*ip,
872c24b5dfaSDave Chinner 	xfs_off_t		offset,
873c24b5dfaSDave Chinner 	xfs_off_t		len,
8745f8aca8bSChristoph Hellwig 	int			alloc_type)
875c24b5dfaSDave Chinner {
876c24b5dfaSDave Chinner 	xfs_mount_t		*mp = ip->i_mount;
877c24b5dfaSDave Chinner 	xfs_off_t		count;
878c24b5dfaSDave Chinner 	xfs_filblks_t		allocated_fsb;
879c24b5dfaSDave Chinner 	xfs_filblks_t		allocatesize_fsb;
880c24b5dfaSDave Chinner 	xfs_extlen_t		extsz, temp;
881c24b5dfaSDave Chinner 	xfs_fileoff_t		startoffset_fsb;
882c24b5dfaSDave Chinner 	xfs_fsblock_t		firstfsb;
883c24b5dfaSDave Chinner 	int			nimaps;
884c24b5dfaSDave Chinner 	int			quota_flag;
885c24b5dfaSDave Chinner 	int			rt;
886c24b5dfaSDave Chinner 	xfs_trans_t		*tp;
887c24b5dfaSDave Chinner 	xfs_bmbt_irec_t		imaps[1], *imapp;
8882c3234d1SDarrick J. Wong 	struct xfs_defer_ops	dfops;
889c24b5dfaSDave Chinner 	uint			qblocks, resblks, resrtextents;
890c24b5dfaSDave Chinner 	int			error;
891c24b5dfaSDave Chinner 
892c24b5dfaSDave Chinner 	trace_xfs_alloc_file_space(ip);
893c24b5dfaSDave Chinner 
894c24b5dfaSDave Chinner 	if (XFS_FORCED_SHUTDOWN(mp))
8952451337dSDave Chinner 		return -EIO;
896c24b5dfaSDave Chinner 
897c24b5dfaSDave Chinner 	error = xfs_qm_dqattach(ip, 0);
898c24b5dfaSDave Chinner 	if (error)
899c24b5dfaSDave Chinner 		return error;
900c24b5dfaSDave Chinner 
901c24b5dfaSDave Chinner 	if (len <= 0)
9022451337dSDave Chinner 		return -EINVAL;
903c24b5dfaSDave Chinner 
904c24b5dfaSDave Chinner 	rt = XFS_IS_REALTIME_INODE(ip);
905c24b5dfaSDave Chinner 	extsz = xfs_get_extsz_hint(ip);
906c24b5dfaSDave Chinner 
907c24b5dfaSDave Chinner 	count = len;
908c24b5dfaSDave Chinner 	imapp = &imaps[0];
909c24b5dfaSDave Chinner 	nimaps = 1;
910c24b5dfaSDave Chinner 	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
911c24b5dfaSDave Chinner 	allocatesize_fsb = XFS_B_TO_FSB(mp, count);
912c24b5dfaSDave Chinner 
913c24b5dfaSDave Chinner 	/*
914c24b5dfaSDave Chinner 	 * Allocate file space until done or until there is an error
915c24b5dfaSDave Chinner 	 */
916c24b5dfaSDave Chinner 	while (allocatesize_fsb && !error) {
917c24b5dfaSDave Chinner 		xfs_fileoff_t	s, e;
918c24b5dfaSDave Chinner 
919c24b5dfaSDave Chinner 		/*
920c24b5dfaSDave Chinner 		 * Determine space reservations for data/realtime.
921c24b5dfaSDave Chinner 		 */
922c24b5dfaSDave Chinner 		if (unlikely(extsz)) {
923c24b5dfaSDave Chinner 			s = startoffset_fsb;
924c24b5dfaSDave Chinner 			do_div(s, extsz);
925c24b5dfaSDave Chinner 			s *= extsz;
926c24b5dfaSDave Chinner 			e = startoffset_fsb + allocatesize_fsb;
927c24b5dfaSDave Chinner 			if ((temp = do_mod(startoffset_fsb, extsz)))
928c24b5dfaSDave Chinner 				e += temp;
929c24b5dfaSDave Chinner 			if ((temp = do_mod(e, extsz)))
930c24b5dfaSDave Chinner 				e += extsz - temp;
931c24b5dfaSDave Chinner 		} else {
932c24b5dfaSDave Chinner 			s = 0;
933c24b5dfaSDave Chinner 			e = allocatesize_fsb;
934c24b5dfaSDave Chinner 		}
935c24b5dfaSDave Chinner 
936c24b5dfaSDave Chinner 		/*
937c24b5dfaSDave Chinner 		 * The transaction reservation is limited to a 32-bit block
938c24b5dfaSDave Chinner 		 * count, hence we need to limit the number of blocks we are
939c24b5dfaSDave Chinner 		 * trying to reserve to avoid an overflow. We can't allocate
940c24b5dfaSDave Chinner 		 * more than @nimaps extents, and an extent is limited on disk
941c24b5dfaSDave Chinner 		 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
942c24b5dfaSDave Chinner 		 */
943c24b5dfaSDave Chinner 		resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
944c24b5dfaSDave Chinner 		if (unlikely(rt)) {
945c24b5dfaSDave Chinner 			resrtextents = qblocks = resblks;
946c24b5dfaSDave Chinner 			resrtextents /= mp->m_sb.sb_rextsize;
947c24b5dfaSDave Chinner 			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
948c24b5dfaSDave Chinner 			quota_flag = XFS_QMOPT_RES_RTBLKS;
949c24b5dfaSDave Chinner 		} else {
950c24b5dfaSDave Chinner 			resrtextents = 0;
951c24b5dfaSDave Chinner 			resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
952c24b5dfaSDave Chinner 			quota_flag = XFS_QMOPT_RES_REGBLKS;
953c24b5dfaSDave Chinner 		}
954c24b5dfaSDave Chinner 
955c24b5dfaSDave Chinner 		/*
956c24b5dfaSDave Chinner 		 * Allocate and setup the transaction.
957c24b5dfaSDave Chinner 		 */
958253f4911SChristoph Hellwig 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
959253f4911SChristoph Hellwig 				resrtextents, 0, &tp);
960253f4911SChristoph Hellwig 
961c24b5dfaSDave Chinner 		/*
962c24b5dfaSDave Chinner 		 * Check for running out of space
963c24b5dfaSDave Chinner 		 */
964c24b5dfaSDave Chinner 		if (error) {
965c24b5dfaSDave Chinner 			/*
966c24b5dfaSDave Chinner 			 * Free the transaction structure.
967c24b5dfaSDave Chinner 			 */
9682451337dSDave Chinner 			ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
969c24b5dfaSDave Chinner 			break;
970c24b5dfaSDave Chinner 		}
971c24b5dfaSDave Chinner 		xfs_ilock(ip, XFS_ILOCK_EXCL);
972c24b5dfaSDave Chinner 		error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
973c24b5dfaSDave Chinner 						      0, quota_flag);
974c24b5dfaSDave Chinner 		if (error)
975c24b5dfaSDave Chinner 			goto error1;
976c24b5dfaSDave Chinner 
977c24b5dfaSDave Chinner 		xfs_trans_ijoin(tp, ip, 0);
978c24b5dfaSDave Chinner 
9792c3234d1SDarrick J. Wong 		xfs_defer_init(&dfops, &firstfsb);
980c24b5dfaSDave Chinner 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
981c24b5dfaSDave Chinner 					allocatesize_fsb, alloc_type, &firstfsb,
9822c3234d1SDarrick J. Wong 					resblks, imapp, &nimaps, &dfops);
983f6106efaSEric Sandeen 		if (error)
984c24b5dfaSDave Chinner 			goto error0;
985c24b5dfaSDave Chinner 
986c24b5dfaSDave Chinner 		/*
987c24b5dfaSDave Chinner 		 * Complete the transaction
988c24b5dfaSDave Chinner 		 */
9892c3234d1SDarrick J. Wong 		error = xfs_defer_finish(&tp, &dfops, NULL);
990f6106efaSEric Sandeen 		if (error)
991c24b5dfaSDave Chinner 			goto error0;
992c24b5dfaSDave Chinner 
99370393313SChristoph Hellwig 		error = xfs_trans_commit(tp);
994c24b5dfaSDave Chinner 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
995f6106efaSEric Sandeen 		if (error)
996c24b5dfaSDave Chinner 			break;
997c24b5dfaSDave Chinner 
998c24b5dfaSDave Chinner 		allocated_fsb = imapp->br_blockcount;
999c24b5dfaSDave Chinner 
1000c24b5dfaSDave Chinner 		if (nimaps == 0) {
10012451337dSDave Chinner 			error = -ENOSPC;
1002c24b5dfaSDave Chinner 			break;
1003c24b5dfaSDave Chinner 		}
1004c24b5dfaSDave Chinner 
1005c24b5dfaSDave Chinner 		startoffset_fsb += allocated_fsb;
1006c24b5dfaSDave Chinner 		allocatesize_fsb -= allocated_fsb;
1007c24b5dfaSDave Chinner 	}
1008c24b5dfaSDave Chinner 
1009c24b5dfaSDave Chinner 	return error;
1010c24b5dfaSDave Chinner 
1011c24b5dfaSDave Chinner error0:	/* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
10122c3234d1SDarrick J. Wong 	xfs_defer_cancel(&dfops);
1013c24b5dfaSDave Chinner 	xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
1014c24b5dfaSDave Chinner 
1015c24b5dfaSDave Chinner error1:	/* Just cancel transaction */
10164906e215SChristoph Hellwig 	xfs_trans_cancel(tp);
1017c24b5dfaSDave Chinner 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1018c24b5dfaSDave Chinner 	return error;
1019c24b5dfaSDave Chinner }
1020c24b5dfaSDave Chinner 
1021bdb0d04fSChristoph Hellwig static int
1022bdb0d04fSChristoph Hellwig xfs_unmap_extent(
102383aee9e4SChristoph Hellwig 	struct xfs_inode	*ip,
1024bdb0d04fSChristoph Hellwig 	xfs_fileoff_t		startoffset_fsb,
1025bdb0d04fSChristoph Hellwig 	xfs_filblks_t		len_fsb,
1026bdb0d04fSChristoph Hellwig 	int			*done)
1027c24b5dfaSDave Chinner {
1028bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
1029bdb0d04fSChristoph Hellwig 	struct xfs_trans	*tp;
10302c3234d1SDarrick J. Wong 	struct xfs_defer_ops	dfops;
1031c24b5dfaSDave Chinner 	xfs_fsblock_t		firstfsb;
1032bdb0d04fSChristoph Hellwig 	uint			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
1033bdb0d04fSChristoph Hellwig 	int			error;
1034c24b5dfaSDave Chinner 
1035bdb0d04fSChristoph Hellwig 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
1036bdb0d04fSChristoph Hellwig 	if (error) {
1037bdb0d04fSChristoph Hellwig 		ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
1038bdb0d04fSChristoph Hellwig 		return error;
1039bdb0d04fSChristoph Hellwig 	}
1040c24b5dfaSDave Chinner 
1041bdb0d04fSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
1042bdb0d04fSChristoph Hellwig 	error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot,
1043bdb0d04fSChristoph Hellwig 			ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS);
1044bdb0d04fSChristoph Hellwig 	if (error)
1045bdb0d04fSChristoph Hellwig 		goto out_trans_cancel;
1046c24b5dfaSDave Chinner 
1047bdb0d04fSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, 0);
1048c24b5dfaSDave Chinner 
10492c3234d1SDarrick J. Wong 	xfs_defer_init(&dfops, &firstfsb);
1050bdb0d04fSChristoph Hellwig 	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, &firstfsb,
10512c3234d1SDarrick J. Wong 			&dfops, done);
1052bdb0d04fSChristoph Hellwig 	if (error)
1053bdb0d04fSChristoph Hellwig 		goto out_bmap_cancel;
1054bdb0d04fSChristoph Hellwig 
10552c3234d1SDarrick J. Wong 	error = xfs_defer_finish(&tp, &dfops, ip);
1056bdb0d04fSChristoph Hellwig 	if (error)
1057bdb0d04fSChristoph Hellwig 		goto out_bmap_cancel;
1058bdb0d04fSChristoph Hellwig 
1059bdb0d04fSChristoph Hellwig 	error = xfs_trans_commit(tp);
1060bdb0d04fSChristoph Hellwig out_unlock:
1061bdb0d04fSChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1062bdb0d04fSChristoph Hellwig 	return error;
1063bdb0d04fSChristoph Hellwig 
1064bdb0d04fSChristoph Hellwig out_bmap_cancel:
10652c3234d1SDarrick J. Wong 	xfs_defer_cancel(&dfops);
1066bdb0d04fSChristoph Hellwig out_trans_cancel:
1067bdb0d04fSChristoph Hellwig 	xfs_trans_cancel(tp);
1068bdb0d04fSChristoph Hellwig 	goto out_unlock;
1069bdb0d04fSChristoph Hellwig }
1070bdb0d04fSChristoph Hellwig 
1071bdb0d04fSChristoph Hellwig static int
1072bdb0d04fSChristoph Hellwig xfs_adjust_extent_unmap_boundaries(
1073bdb0d04fSChristoph Hellwig 	struct xfs_inode	*ip,
1074bdb0d04fSChristoph Hellwig 	xfs_fileoff_t		*startoffset_fsb,
1075bdb0d04fSChristoph Hellwig 	xfs_fileoff_t		*endoffset_fsb)
1076bdb0d04fSChristoph Hellwig {
1077bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
1078bdb0d04fSChristoph Hellwig 	struct xfs_bmbt_irec	imap;
1079bdb0d04fSChristoph Hellwig 	int			nimap, error;
1080c24b5dfaSDave Chinner 	xfs_extlen_t		mod = 0;
1081c24b5dfaSDave Chinner 
1082c24b5dfaSDave Chinner 	nimap = 1;
1083bdb0d04fSChristoph Hellwig 	error = xfs_bmapi_read(ip, *startoffset_fsb, 1, &imap, &nimap, 0);
1084c24b5dfaSDave Chinner 	if (error)
1085bdb0d04fSChristoph Hellwig 		return error;
1086c24b5dfaSDave Chinner 
1087c24b5dfaSDave Chinner 	if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
1088c24b5dfaSDave Chinner 		xfs_daddr_t	block;
1089c24b5dfaSDave Chinner 
1090c24b5dfaSDave Chinner 		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1091c24b5dfaSDave Chinner 		block = imap.br_startblock;
1092c24b5dfaSDave Chinner 		mod = do_div(block, mp->m_sb.sb_rextsize);
1093c24b5dfaSDave Chinner 		if (mod)
1094bdb0d04fSChristoph Hellwig 			*startoffset_fsb += mp->m_sb.sb_rextsize - mod;
1095c24b5dfaSDave Chinner 	}
1096c24b5dfaSDave Chinner 
1097c24b5dfaSDave Chinner 	nimap = 1;
1098bdb0d04fSChristoph Hellwig 	error = xfs_bmapi_read(ip, *endoffset_fsb - 1, 1, &imap, &nimap, 0);
1099c24b5dfaSDave Chinner 	if (error)
1100c24b5dfaSDave Chinner 		return error;
1101c24b5dfaSDave Chinner 
1102c24b5dfaSDave Chinner 	if (nimap && imap.br_startblock != HOLESTARTBLOCK) {
1103c24b5dfaSDave Chinner 		ASSERT(imap.br_startblock != DELAYSTARTBLOCK);
1104c24b5dfaSDave Chinner 		mod++;
1105bdb0d04fSChristoph Hellwig 		if (mod && mod != mp->m_sb.sb_rextsize)
1106bdb0d04fSChristoph Hellwig 			*endoffset_fsb -= mod;
1107c24b5dfaSDave Chinner 	}
1108c24b5dfaSDave Chinner 
1109bdb0d04fSChristoph Hellwig 	return 0;
1110c24b5dfaSDave Chinner }
1111bdb0d04fSChristoph Hellwig 
1112bdb0d04fSChristoph Hellwig static int
1113bdb0d04fSChristoph Hellwig xfs_flush_unmap_range(
1114bdb0d04fSChristoph Hellwig 	struct xfs_inode	*ip,
1115bdb0d04fSChristoph Hellwig 	xfs_off_t		offset,
1116bdb0d04fSChristoph Hellwig 	xfs_off_t		len)
1117bdb0d04fSChristoph Hellwig {
1118bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
1119bdb0d04fSChristoph Hellwig 	struct inode		*inode = VFS_I(ip);
1120bdb0d04fSChristoph Hellwig 	xfs_off_t		rounding, start, end;
1121bdb0d04fSChristoph Hellwig 	int			error;
1122bdb0d04fSChristoph Hellwig 
1123bdb0d04fSChristoph Hellwig 	/* wait for the completion of any pending DIOs */
1124bdb0d04fSChristoph Hellwig 	inode_dio_wait(inode);
1125bdb0d04fSChristoph Hellwig 
1126bdb0d04fSChristoph Hellwig 	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
1127bdb0d04fSChristoph Hellwig 	start = round_down(offset, rounding);
1128bdb0d04fSChristoph Hellwig 	end = round_up(offset + len, rounding) - 1;
1129bdb0d04fSChristoph Hellwig 
1130bdb0d04fSChristoph Hellwig 	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
1131c24b5dfaSDave Chinner 	if (error)
1132c24b5dfaSDave Chinner 		return error;
1133bdb0d04fSChristoph Hellwig 	truncate_pagecache_range(inode, start, end);
1134bdb0d04fSChristoph Hellwig 	return 0;
1135c24b5dfaSDave Chinner }
1136c24b5dfaSDave Chinner 
1137c24b5dfaSDave Chinner int
1138c24b5dfaSDave Chinner xfs_free_file_space(
1139c24b5dfaSDave Chinner 	struct xfs_inode	*ip,
1140c24b5dfaSDave Chinner 	xfs_off_t		offset,
1141c24b5dfaSDave Chinner 	xfs_off_t		len)
1142c24b5dfaSDave Chinner {
1143bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
1144c24b5dfaSDave Chinner 	xfs_fileoff_t		startoffset_fsb;
1145bdb0d04fSChristoph Hellwig 	xfs_fileoff_t		endoffset_fsb;
11463c2bdc91SChristoph Hellwig 	int			done = 0, error;
1147c24b5dfaSDave Chinner 
1148c24b5dfaSDave Chinner 	trace_xfs_free_file_space(ip);
1149c24b5dfaSDave Chinner 
1150c24b5dfaSDave Chinner 	error = xfs_qm_dqattach(ip, 0);
1151c24b5dfaSDave Chinner 	if (error)
1152c24b5dfaSDave Chinner 		return error;
1153c24b5dfaSDave Chinner 
1154c24b5dfaSDave Chinner 	if (len <= 0)	/* if nothing being freed */
1155bdb0d04fSChristoph Hellwig 		return 0;
1156bdb0d04fSChristoph Hellwig 
1157bdb0d04fSChristoph Hellwig 	error = xfs_flush_unmap_range(ip, offset, len);
1158bdb0d04fSChristoph Hellwig 	if (error)
1159c24b5dfaSDave Chinner 		return error;
1160bdb0d04fSChristoph Hellwig 
1161c24b5dfaSDave Chinner 	startoffset_fsb = XFS_B_TO_FSB(mp, offset);
1162c24b5dfaSDave Chinner 	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
1163c24b5dfaSDave Chinner 
1164bdb0d04fSChristoph Hellwig 	/*
1165bdb0d04fSChristoph Hellwig 	 * Need to zero the stuff we're not freeing, on disk.  If it's a RT file
1166bdb0d04fSChristoph Hellwig 	 * and we can't use unwritten extents then we actually need to ensure
1167bdb0d04fSChristoph Hellwig 	 * to zero the whole extent, otherwise we just need to take of block
1168bdb0d04fSChristoph Hellwig 	 * boundaries, and xfs_bunmapi will handle the rest.
1169bdb0d04fSChristoph Hellwig 	 */
1170bdb0d04fSChristoph Hellwig 	if (XFS_IS_REALTIME_INODE(ip) &&
1171bdb0d04fSChristoph Hellwig 	    !xfs_sb_version_hasextflgbit(&mp->m_sb)) {
1172bdb0d04fSChristoph Hellwig 		error = xfs_adjust_extent_unmap_boundaries(ip, &startoffset_fsb,
1173bdb0d04fSChristoph Hellwig 				&endoffset_fsb);
1174c24b5dfaSDave Chinner 		if (error)
1175c24b5dfaSDave Chinner 			return error;
1176bdb0d04fSChristoph Hellwig 	}
1177c24b5dfaSDave Chinner 
11783c2bdc91SChristoph Hellwig 	if (endoffset_fsb > startoffset_fsb) {
11793c2bdc91SChristoph Hellwig 		while (!done) {
1180bdb0d04fSChristoph Hellwig 			error = xfs_unmap_extent(ip, startoffset_fsb,
1181bdb0d04fSChristoph Hellwig 					endoffset_fsb - startoffset_fsb, &done);
11823c2bdc91SChristoph Hellwig 			if (error)
11833c2bdc91SChristoph Hellwig 				return error;
11843c2bdc91SChristoph Hellwig 		}
1185c24b5dfaSDave Chinner 	}
1186c24b5dfaSDave Chinner 
11873c2bdc91SChristoph Hellwig 	/*
11883c2bdc91SChristoph Hellwig 	 * Now that we've unmap all full blocks we'll have to zero out any
11893c2bdc91SChristoph Hellwig 	 * partial block at the beginning and/or end.  xfs_zero_range is
11903c2bdc91SChristoph Hellwig 	 * smart enough to skip any holes, including those we just created.
11913c2bdc91SChristoph Hellwig 	 */
11923c2bdc91SChristoph Hellwig 	return xfs_zero_range(ip, offset, len, NULL);
1193c24b5dfaSDave Chinner }
1194c24b5dfaSDave Chinner 
11955d11fb4bSBrian Foster /*
11965d11fb4bSBrian Foster  * Preallocate and zero a range of a file. This mechanism has the allocation
11975d11fb4bSBrian Foster  * semantics of fallocate and in addition converts data in the range to zeroes.
11985d11fb4bSBrian Foster  */
1199865e9446SChristoph Hellwig int
1200c24b5dfaSDave Chinner xfs_zero_file_space(
1201c24b5dfaSDave Chinner 	struct xfs_inode	*ip,
1202c24b5dfaSDave Chinner 	xfs_off_t		offset,
12035f8aca8bSChristoph Hellwig 	xfs_off_t		len)
1204c24b5dfaSDave Chinner {
1205c24b5dfaSDave Chinner 	struct xfs_mount	*mp = ip->i_mount;
12065d11fb4bSBrian Foster 	uint			blksize;
1207c24b5dfaSDave Chinner 	int			error;
1208c24b5dfaSDave Chinner 
1209897b73b6SDave Chinner 	trace_xfs_zero_file_space(ip);
1210897b73b6SDave Chinner 
12115d11fb4bSBrian Foster 	blksize = 1 << mp->m_sb.sb_blocklog;
1212c24b5dfaSDave Chinner 
1213c24b5dfaSDave Chinner 	/*
12145d11fb4bSBrian Foster 	 * Punch a hole and prealloc the range. We use hole punch rather than
12155d11fb4bSBrian Foster 	 * unwritten extent conversion for two reasons:
12165d11fb4bSBrian Foster 	 *
12175d11fb4bSBrian Foster 	 * 1.) Hole punch handles partial block zeroing for us.
12185d11fb4bSBrian Foster 	 *
12195d11fb4bSBrian Foster 	 * 2.) If prealloc returns ENOSPC, the file range is still zero-valued
12205d11fb4bSBrian Foster 	 * by virtue of the hole punch.
1221c24b5dfaSDave Chinner 	 */
12225d11fb4bSBrian Foster 	error = xfs_free_file_space(ip, offset, len);
1223c24b5dfaSDave Chinner 	if (error)
12245f8aca8bSChristoph Hellwig 		goto out;
1225c24b5dfaSDave Chinner 
12265d11fb4bSBrian Foster 	error = xfs_alloc_file_space(ip, round_down(offset, blksize),
12275d11fb4bSBrian Foster 				     round_up(offset + len, blksize) -
12285d11fb4bSBrian Foster 				     round_down(offset, blksize),
12295d11fb4bSBrian Foster 				     XFS_BMAPI_PREALLOC);
12305f8aca8bSChristoph Hellwig out:
1231c24b5dfaSDave Chinner 	return error;
1232c24b5dfaSDave Chinner 
1233c24b5dfaSDave Chinner }
1234c24b5dfaSDave Chinner 
1235c24b5dfaSDave Chinner /*
1236a904b1caSNamjae Jeon  * @next_fsb will keep track of the extent currently undergoing shift.
1237a904b1caSNamjae Jeon  * @stop_fsb will keep track of the extent at which we have to stop.
1238a904b1caSNamjae Jeon  * If we are shifting left, we will start with block (offset + len) and
1239a904b1caSNamjae Jeon  * shift each extent till last extent.
1240a904b1caSNamjae Jeon  * If we are shifting right, we will start with last extent inside file space
1241a904b1caSNamjae Jeon  * and continue until we reach the block corresponding to offset.
1242e1d8fb88SNamjae Jeon  */
124372c1a739Skbuild test robot static int
1244a904b1caSNamjae Jeon xfs_shift_file_space(
1245e1d8fb88SNamjae Jeon 	struct xfs_inode        *ip,
1246e1d8fb88SNamjae Jeon 	xfs_off_t               offset,
1247a904b1caSNamjae Jeon 	xfs_off_t               len,
1248a904b1caSNamjae Jeon 	enum shift_direction	direction)
1249e1d8fb88SNamjae Jeon {
1250e1d8fb88SNamjae Jeon 	int			done = 0;
1251e1d8fb88SNamjae Jeon 	struct xfs_mount	*mp = ip->i_mount;
1252e1d8fb88SNamjae Jeon 	struct xfs_trans	*tp;
1253e1d8fb88SNamjae Jeon 	int			error;
12542c3234d1SDarrick J. Wong 	struct xfs_defer_ops	dfops;
1255e1d8fb88SNamjae Jeon 	xfs_fsblock_t		first_block;
1256a904b1caSNamjae Jeon 	xfs_fileoff_t		stop_fsb;
12572c845f5aSBrian Foster 	xfs_fileoff_t		next_fsb;
1258e1d8fb88SNamjae Jeon 	xfs_fileoff_t		shift_fsb;
1259e1d8fb88SNamjae Jeon 
1260a904b1caSNamjae Jeon 	ASSERT(direction == SHIFT_LEFT || direction == SHIFT_RIGHT);
1261e1d8fb88SNamjae Jeon 
1262a904b1caSNamjae Jeon 	if (direction == SHIFT_LEFT) {
12632c845f5aSBrian Foster 		next_fsb = XFS_B_TO_FSB(mp, offset + len);
1264a904b1caSNamjae Jeon 		stop_fsb = XFS_B_TO_FSB(mp, VFS_I(ip)->i_size);
1265a904b1caSNamjae Jeon 	} else {
1266a904b1caSNamjae Jeon 		/*
1267a904b1caSNamjae Jeon 		 * If right shift, delegate the work of initialization of
1268a904b1caSNamjae Jeon 		 * next_fsb to xfs_bmap_shift_extent as it has ilock held.
1269a904b1caSNamjae Jeon 		 */
1270a904b1caSNamjae Jeon 		next_fsb = NULLFSBLOCK;
1271a904b1caSNamjae Jeon 		stop_fsb = XFS_B_TO_FSB(mp, offset);
1272a904b1caSNamjae Jeon 	}
1273e1d8fb88SNamjae Jeon 
1274a904b1caSNamjae Jeon 	shift_fsb = XFS_B_TO_FSB(mp, len);
1275f71721d0SBrian Foster 
1276f71721d0SBrian Foster 	/*
1277f71721d0SBrian Foster 	 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
1278f71721d0SBrian Foster 	 * into the accessible region of the file.
1279f71721d0SBrian Foster 	 */
128041b9d726SBrian Foster 	if (xfs_can_free_eofblocks(ip, true)) {
128141b9d726SBrian Foster 		error = xfs_free_eofblocks(mp, ip, false);
128241b9d726SBrian Foster 		if (error)
128341b9d726SBrian Foster 			return error;
128441b9d726SBrian Foster 	}
12851669a8caSDave Chinner 
1286f71721d0SBrian Foster 	/*
1287f71721d0SBrian Foster 	 * Writeback and invalidate cache for the remainder of the file as we're
1288a904b1caSNamjae Jeon 	 * about to shift down every extent from offset to EOF.
1289f71721d0SBrian Foster 	 */
1290f71721d0SBrian Foster 	error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1291a904b1caSNamjae Jeon 					     offset, -1);
1292f71721d0SBrian Foster 	if (error)
1293f71721d0SBrian Foster 		return error;
1294f71721d0SBrian Foster 	error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
129509cbfeafSKirill A. Shutemov 					offset >> PAGE_SHIFT, -1);
1296e1d8fb88SNamjae Jeon 	if (error)
1297e1d8fb88SNamjae Jeon 		return error;
1298e1d8fb88SNamjae Jeon 
1299a904b1caSNamjae Jeon 	/*
1300a904b1caSNamjae Jeon 	 * The extent shiting code works on extent granularity. So, if
1301a904b1caSNamjae Jeon 	 * stop_fsb is not the starting block of extent, we need to split
1302a904b1caSNamjae Jeon 	 * the extent at stop_fsb.
1303a904b1caSNamjae Jeon 	 */
1304a904b1caSNamjae Jeon 	if (direction == SHIFT_RIGHT) {
1305a904b1caSNamjae Jeon 		error = xfs_bmap_split_extent(ip, stop_fsb);
1306a904b1caSNamjae Jeon 		if (error)
1307a904b1caSNamjae Jeon 			return error;
1308a904b1caSNamjae Jeon 	}
1309a904b1caSNamjae Jeon 
1310e1d8fb88SNamjae Jeon 	while (!error && !done) {
1311e1d8fb88SNamjae Jeon 		/*
1312e1d8fb88SNamjae Jeon 		 * We would need to reserve permanent block for transaction.
1313e1d8fb88SNamjae Jeon 		 * This will come into picture when after shifting extent into
1314e1d8fb88SNamjae Jeon 		 * hole we found that adjacent extents can be merged which
1315e1d8fb88SNamjae Jeon 		 * may lead to freeing of a block during record update.
1316e1d8fb88SNamjae Jeon 		 */
1317253f4911SChristoph Hellwig 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
1318253f4911SChristoph Hellwig 				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
1319253f4911SChristoph Hellwig 		if (error)
1320e1d8fb88SNamjae Jeon 			break;
1321e1d8fb88SNamjae Jeon 
1322e1d8fb88SNamjae Jeon 		xfs_ilock(ip, XFS_ILOCK_EXCL);
1323e1d8fb88SNamjae Jeon 		error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot,
1324e1d8fb88SNamjae Jeon 				ip->i_gdquot, ip->i_pdquot,
1325e1d8fb88SNamjae Jeon 				XFS_DIOSTRAT_SPACE_RES(mp, 0), 0,
1326e1d8fb88SNamjae Jeon 				XFS_QMOPT_RES_REGBLKS);
1327e1d8fb88SNamjae Jeon 		if (error)
1328d4a97a04SBrian Foster 			goto out_trans_cancel;
1329e1d8fb88SNamjae Jeon 
1330a904b1caSNamjae Jeon 		xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1331e1d8fb88SNamjae Jeon 
13322c3234d1SDarrick J. Wong 		xfs_defer_init(&dfops, &first_block);
1333e1d8fb88SNamjae Jeon 
1334e1d8fb88SNamjae Jeon 		/*
1335e1d8fb88SNamjae Jeon 		 * We are using the write transaction in which max 2 bmbt
1336e1d8fb88SNamjae Jeon 		 * updates are allowed
1337e1d8fb88SNamjae Jeon 		 */
1338a904b1caSNamjae Jeon 		error = xfs_bmap_shift_extents(tp, ip, &next_fsb, shift_fsb,
13392c3234d1SDarrick J. Wong 				&done, stop_fsb, &first_block, &dfops,
1340a904b1caSNamjae Jeon 				direction, XFS_BMAP_MAX_SHIFT_EXTENTS);
1341e1d8fb88SNamjae Jeon 		if (error)
1342d4a97a04SBrian Foster 			goto out_bmap_cancel;
1343e1d8fb88SNamjae Jeon 
13442c3234d1SDarrick J. Wong 		error = xfs_defer_finish(&tp, &dfops, NULL);
1345e1d8fb88SNamjae Jeon 		if (error)
1346d4a97a04SBrian Foster 			goto out_bmap_cancel;
1347e1d8fb88SNamjae Jeon 
134870393313SChristoph Hellwig 		error = xfs_trans_commit(tp);
1349e1d8fb88SNamjae Jeon 	}
1350e1d8fb88SNamjae Jeon 
1351e1d8fb88SNamjae Jeon 	return error;
1352e1d8fb88SNamjae Jeon 
1353d4a97a04SBrian Foster out_bmap_cancel:
13542c3234d1SDarrick J. Wong 	xfs_defer_cancel(&dfops);
1355d4a97a04SBrian Foster out_trans_cancel:
13564906e215SChristoph Hellwig 	xfs_trans_cancel(tp);
1357e1d8fb88SNamjae Jeon 	return error;
1358e1d8fb88SNamjae Jeon }
1359e1d8fb88SNamjae Jeon 
1360e1d8fb88SNamjae Jeon /*
1361a904b1caSNamjae Jeon  * xfs_collapse_file_space()
1362a904b1caSNamjae Jeon  *	This routine frees disk space and shift extent for the given file.
1363a904b1caSNamjae Jeon  *	The first thing we do is to free data blocks in the specified range
1364a904b1caSNamjae Jeon  *	by calling xfs_free_file_space(). It would also sync dirty data
1365a904b1caSNamjae Jeon  *	and invalidate page cache over the region on which collapse range
1366a904b1caSNamjae Jeon  *	is working. And Shift extent records to the left to cover a hole.
1367a904b1caSNamjae Jeon  * RETURNS:
1368a904b1caSNamjae Jeon  *	0 on success
1369a904b1caSNamjae Jeon  *	errno on error
1370a904b1caSNamjae Jeon  *
1371a904b1caSNamjae Jeon  */
1372a904b1caSNamjae Jeon int
1373a904b1caSNamjae Jeon xfs_collapse_file_space(
1374a904b1caSNamjae Jeon 	struct xfs_inode	*ip,
1375a904b1caSNamjae Jeon 	xfs_off_t		offset,
1376a904b1caSNamjae Jeon 	xfs_off_t		len)
1377a904b1caSNamjae Jeon {
1378a904b1caSNamjae Jeon 	int error;
1379a904b1caSNamjae Jeon 
1380a904b1caSNamjae Jeon 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1381a904b1caSNamjae Jeon 	trace_xfs_collapse_file_space(ip);
1382a904b1caSNamjae Jeon 
1383a904b1caSNamjae Jeon 	error = xfs_free_file_space(ip, offset, len);
1384a904b1caSNamjae Jeon 	if (error)
1385a904b1caSNamjae Jeon 		return error;
1386a904b1caSNamjae Jeon 
1387a904b1caSNamjae Jeon 	return xfs_shift_file_space(ip, offset, len, SHIFT_LEFT);
1388a904b1caSNamjae Jeon }
1389a904b1caSNamjae Jeon 
1390a904b1caSNamjae Jeon /*
1391a904b1caSNamjae Jeon  * xfs_insert_file_space()
1392a904b1caSNamjae Jeon  *	This routine create hole space by shifting extents for the given file.
1393a904b1caSNamjae Jeon  *	The first thing we do is to sync dirty data and invalidate page cache
1394a904b1caSNamjae Jeon  *	over the region on which insert range is working. And split an extent
1395a904b1caSNamjae Jeon  *	to two extents at given offset by calling xfs_bmap_split_extent.
1396a904b1caSNamjae Jeon  *	And shift all extent records which are laying between [offset,
1397a904b1caSNamjae Jeon  *	last allocated extent] to the right to reserve hole range.
1398a904b1caSNamjae Jeon  * RETURNS:
1399a904b1caSNamjae Jeon  *	0 on success
1400a904b1caSNamjae Jeon  *	errno on error
1401a904b1caSNamjae Jeon  */
1402a904b1caSNamjae Jeon int
1403a904b1caSNamjae Jeon xfs_insert_file_space(
1404a904b1caSNamjae Jeon 	struct xfs_inode	*ip,
1405a904b1caSNamjae Jeon 	loff_t			offset,
1406a904b1caSNamjae Jeon 	loff_t			len)
1407a904b1caSNamjae Jeon {
1408a904b1caSNamjae Jeon 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
1409a904b1caSNamjae Jeon 	trace_xfs_insert_file_space(ip);
1410a904b1caSNamjae Jeon 
1411a904b1caSNamjae Jeon 	return xfs_shift_file_space(ip, offset, len, SHIFT_RIGHT);
1412a904b1caSNamjae Jeon }
1413a904b1caSNamjae Jeon 
1414a904b1caSNamjae Jeon /*
1415a133d952SDave Chinner  * We need to check that the format of the data fork in the temporary inode is
1416a133d952SDave Chinner  * valid for the target inode before doing the swap. This is not a problem with
1417a133d952SDave Chinner  * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
1418a133d952SDave Chinner  * data fork depending on the space the attribute fork is taking so we can get
1419a133d952SDave Chinner  * invalid formats on the target inode.
1420a133d952SDave Chinner  *
1421a133d952SDave Chinner  * E.g. target has space for 7 extents in extent format, temp inode only has
1422a133d952SDave Chinner  * space for 6.  If we defragment down to 7 extents, then the tmp format is a
1423a133d952SDave Chinner  * btree, but when swapped it needs to be in extent format. Hence we can't just
1424a133d952SDave Chinner  * blindly swap data forks on attr2 filesystems.
1425a133d952SDave Chinner  *
1426a133d952SDave Chinner  * Note that we check the swap in both directions so that we don't end up with
1427a133d952SDave Chinner  * a corrupt temporary inode, either.
1428a133d952SDave Chinner  *
1429a133d952SDave Chinner  * Note that fixing the way xfs_fsr sets up the attribute fork in the source
1430a133d952SDave Chinner  * inode will prevent this situation from occurring, so all we do here is
1431a133d952SDave Chinner  * reject and log the attempt. basically we are putting the responsibility on
1432a133d952SDave Chinner  * userspace to get this right.
1433a133d952SDave Chinner  */
1434a133d952SDave Chinner static int
1435a133d952SDave Chinner xfs_swap_extents_check_format(
1436a133d952SDave Chinner 	xfs_inode_t	*ip,	/* target inode */
1437a133d952SDave Chinner 	xfs_inode_t	*tip)	/* tmp inode */
1438a133d952SDave Chinner {
1439a133d952SDave Chinner 
1440a133d952SDave Chinner 	/* Should never get a local format */
1441a133d952SDave Chinner 	if (ip->i_d.di_format == XFS_DINODE_FMT_LOCAL ||
1442a133d952SDave Chinner 	    tip->i_d.di_format == XFS_DINODE_FMT_LOCAL)
14432451337dSDave Chinner 		return -EINVAL;
1444a133d952SDave Chinner 
1445a133d952SDave Chinner 	/*
1446a133d952SDave Chinner 	 * if the target inode has less extents that then temporary inode then
1447a133d952SDave Chinner 	 * why did userspace call us?
1448a133d952SDave Chinner 	 */
1449a133d952SDave Chinner 	if (ip->i_d.di_nextents < tip->i_d.di_nextents)
14502451337dSDave Chinner 		return -EINVAL;
1451a133d952SDave Chinner 
1452a133d952SDave Chinner 	/*
1453a133d952SDave Chinner 	 * if the target inode is in extent form and the temp inode is in btree
1454a133d952SDave Chinner 	 * form then we will end up with the target inode in the wrong format
1455a133d952SDave Chinner 	 * as we already know there are less extents in the temp inode.
1456a133d952SDave Chinner 	 */
1457a133d952SDave Chinner 	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1458a133d952SDave Chinner 	    tip->i_d.di_format == XFS_DINODE_FMT_BTREE)
14592451337dSDave Chinner 		return -EINVAL;
1460a133d952SDave Chinner 
1461a133d952SDave Chinner 	/* Check temp in extent form to max in target */
1462a133d952SDave Chinner 	if (tip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1463a133d952SDave Chinner 	    XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) >
1464a133d952SDave Chinner 			XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
14652451337dSDave Chinner 		return -EINVAL;
1466a133d952SDave Chinner 
1467a133d952SDave Chinner 	/* Check target in extent form to max in temp */
1468a133d952SDave Chinner 	if (ip->i_d.di_format == XFS_DINODE_FMT_EXTENTS &&
1469a133d952SDave Chinner 	    XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) >
1470a133d952SDave Chinner 			XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
14712451337dSDave Chinner 		return -EINVAL;
1472a133d952SDave Chinner 
1473a133d952SDave Chinner 	/*
1474a133d952SDave Chinner 	 * If we are in a btree format, check that the temp root block will fit
1475a133d952SDave Chinner 	 * in the target and that it has enough extents to be in btree format
1476a133d952SDave Chinner 	 * in the target.
1477a133d952SDave Chinner 	 *
1478a133d952SDave Chinner 	 * Note that we have to be careful to allow btree->extent conversions
1479a133d952SDave Chinner 	 * (a common defrag case) which will occur when the temp inode is in
1480a133d952SDave Chinner 	 * extent format...
1481a133d952SDave Chinner 	 */
1482a133d952SDave Chinner 	if (tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
1483a133d952SDave Chinner 		if (XFS_IFORK_BOFF(ip) &&
1484a133d952SDave Chinner 		    XFS_BMAP_BMDR_SPACE(tip->i_df.if_broot) > XFS_IFORK_BOFF(ip))
14852451337dSDave Chinner 			return -EINVAL;
1486a133d952SDave Chinner 		if (XFS_IFORK_NEXTENTS(tip, XFS_DATA_FORK) <=
1487a133d952SDave Chinner 		    XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
14882451337dSDave Chinner 			return -EINVAL;
1489a133d952SDave Chinner 	}
1490a133d952SDave Chinner 
1491a133d952SDave Chinner 	/* Reciprocal target->temp btree format checks */
1492a133d952SDave Chinner 	if (ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
1493a133d952SDave Chinner 		if (XFS_IFORK_BOFF(tip) &&
1494a133d952SDave Chinner 		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
14952451337dSDave Chinner 			return -EINVAL;
1496a133d952SDave Chinner 		if (XFS_IFORK_NEXTENTS(ip, XFS_DATA_FORK) <=
1497a133d952SDave Chinner 		    XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
14982451337dSDave Chinner 			return -EINVAL;
1499a133d952SDave Chinner 	}
1500a133d952SDave Chinner 
1501a133d952SDave Chinner 	return 0;
1502a133d952SDave Chinner }
1503a133d952SDave Chinner 
15047abbb8f9SDave Chinner static int
15054ef897a2SDave Chinner xfs_swap_extent_flush(
15064ef897a2SDave Chinner 	struct xfs_inode	*ip)
15074ef897a2SDave Chinner {
15084ef897a2SDave Chinner 	int	error;
15094ef897a2SDave Chinner 
15104ef897a2SDave Chinner 	error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
15114ef897a2SDave Chinner 	if (error)
15124ef897a2SDave Chinner 		return error;
15134ef897a2SDave Chinner 	truncate_pagecache_range(VFS_I(ip), 0, -1);
15144ef897a2SDave Chinner 
15154ef897a2SDave Chinner 	/* Verify O_DIRECT for ftmp */
15164ef897a2SDave Chinner 	if (VFS_I(ip)->i_mapping->nrpages)
15174ef897a2SDave Chinner 		return -EINVAL;
15184ef897a2SDave Chinner 	return 0;
15194ef897a2SDave Chinner }
15204ef897a2SDave Chinner 
15214ef897a2SDave Chinner int
1522a133d952SDave Chinner xfs_swap_extents(
1523a133d952SDave Chinner 	xfs_inode_t	*ip,	/* target inode */
1524a133d952SDave Chinner 	xfs_inode_t	*tip,	/* tmp inode */
1525a133d952SDave Chinner 	xfs_swapext_t	*sxp)
1526a133d952SDave Chinner {
1527a133d952SDave Chinner 	xfs_mount_t	*mp = ip->i_mount;
1528a133d952SDave Chinner 	xfs_trans_t	*tp;
1529a133d952SDave Chinner 	xfs_bstat_t	*sbp = &sxp->sx_stat;
1530a133d952SDave Chinner 	xfs_ifork_t	*tempifp, *ifp, *tifp;
1531a133d952SDave Chinner 	int		src_log_flags, target_log_flags;
1532a133d952SDave Chinner 	int		error = 0;
1533a133d952SDave Chinner 	int		aforkblks = 0;
1534a133d952SDave Chinner 	int		taforkblks = 0;
1535a133d952SDave Chinner 	__uint64_t	tmp;
153681217683SDave Chinner 	int		lock_flags;
1537a133d952SDave Chinner 
15382b0eeb5eSDarrick J. Wong 	/* XXX: we can't do this with rmap, will fix later */
15392b0eeb5eSDarrick J. Wong 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
15402b0eeb5eSDarrick J. Wong 		return -EOPNOTSUPP;
15412b0eeb5eSDarrick J. Wong 
1542a133d952SDave Chinner 	tempifp = kmem_alloc(sizeof(xfs_ifork_t), KM_MAYFAIL);
1543a133d952SDave Chinner 	if (!tempifp) {
15442451337dSDave Chinner 		error = -ENOMEM;
1545a133d952SDave Chinner 		goto out;
1546a133d952SDave Chinner 	}
1547a133d952SDave Chinner 
1548a133d952SDave Chinner 	/*
1549723cac48SDave Chinner 	 * Lock the inodes against other IO, page faults and truncate to
1550723cac48SDave Chinner 	 * begin with.  Then we can ensure the inodes are flushed and have no
1551723cac48SDave Chinner 	 * page cache safely. Once we have done this we can take the ilocks and
1552723cac48SDave Chinner 	 * do the rest of the checks.
1553a133d952SDave Chinner 	 */
1554723cac48SDave Chinner 	lock_flags = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL;
1555a133d952SDave Chinner 	xfs_lock_two_inodes(ip, tip, XFS_IOLOCK_EXCL);
1556723cac48SDave Chinner 	xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
1557a133d952SDave Chinner 
1558a133d952SDave Chinner 	/* Verify that both files have the same format */
1559c19b3b05SDave Chinner 	if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
15602451337dSDave Chinner 		error = -EINVAL;
1561a133d952SDave Chinner 		goto out_unlock;
1562a133d952SDave Chinner 	}
1563a133d952SDave Chinner 
1564a133d952SDave Chinner 	/* Verify both files are either real-time or non-realtime */
1565a133d952SDave Chinner 	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
15662451337dSDave Chinner 		error = -EINVAL;
1567a133d952SDave Chinner 		goto out_unlock;
1568a133d952SDave Chinner 	}
1569a133d952SDave Chinner 
15704ef897a2SDave Chinner 	error = xfs_swap_extent_flush(ip);
1571a133d952SDave Chinner 	if (error)
1572a133d952SDave Chinner 		goto out_unlock;
15734ef897a2SDave Chinner 	error = xfs_swap_extent_flush(tip);
15744ef897a2SDave Chinner 	if (error)
15754ef897a2SDave Chinner 		goto out_unlock;
1576a133d952SDave Chinner 
1577253f4911SChristoph Hellwig 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
1578253f4911SChristoph Hellwig 	if (error)
1579a133d952SDave Chinner 		goto out_unlock;
1580723cac48SDave Chinner 
1581723cac48SDave Chinner 	/*
1582723cac48SDave Chinner 	 * Lock and join the inodes to the tansaction so that transaction commit
1583723cac48SDave Chinner 	 * or cancel will unlock the inodes from this point onwards.
1584723cac48SDave Chinner 	 */
15854ef897a2SDave Chinner 	xfs_lock_two_inodes(ip, tip, XFS_ILOCK_EXCL);
15864ef897a2SDave Chinner 	lock_flags |= XFS_ILOCK_EXCL;
1587723cac48SDave Chinner 	xfs_trans_ijoin(tp, ip, lock_flags);
1588723cac48SDave Chinner 	xfs_trans_ijoin(tp, tip, lock_flags);
1589723cac48SDave Chinner 
1590a133d952SDave Chinner 
1591a133d952SDave Chinner 	/* Verify all data are being swapped */
1592a133d952SDave Chinner 	if (sxp->sx_offset != 0 ||
1593a133d952SDave Chinner 	    sxp->sx_length != ip->i_d.di_size ||
1594a133d952SDave Chinner 	    sxp->sx_length != tip->i_d.di_size) {
15952451337dSDave Chinner 		error = -EFAULT;
15964ef897a2SDave Chinner 		goto out_trans_cancel;
1597a133d952SDave Chinner 	}
1598a133d952SDave Chinner 
1599a133d952SDave Chinner 	trace_xfs_swap_extent_before(ip, 0);
1600a133d952SDave Chinner 	trace_xfs_swap_extent_before(tip, 1);
1601a133d952SDave Chinner 
1602a133d952SDave Chinner 	/* check inode formats now that data is flushed */
1603a133d952SDave Chinner 	error = xfs_swap_extents_check_format(ip, tip);
1604a133d952SDave Chinner 	if (error) {
1605a133d952SDave Chinner 		xfs_notice(mp,
1606a133d952SDave Chinner 		    "%s: inode 0x%llx format is incompatible for exchanging.",
1607a133d952SDave Chinner 				__func__, ip->i_ino);
16084ef897a2SDave Chinner 		goto out_trans_cancel;
1609a133d952SDave Chinner 	}
1610a133d952SDave Chinner 
1611a133d952SDave Chinner 	/*
1612a133d952SDave Chinner 	 * Compare the current change & modify times with that
1613a133d952SDave Chinner 	 * passed in.  If they differ, we abort this swap.
1614a133d952SDave Chinner 	 * This is the mechanism used to ensure the calling
1615a133d952SDave Chinner 	 * process that the file was not changed out from
1616a133d952SDave Chinner 	 * under it.
1617a133d952SDave Chinner 	 */
1618a133d952SDave Chinner 	if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
1619a133d952SDave Chinner 	    (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
1620a133d952SDave Chinner 	    (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
1621a133d952SDave Chinner 	    (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
16222451337dSDave Chinner 		error = -EBUSY;
162381217683SDave Chinner 		goto out_trans_cancel;
1624a133d952SDave Chinner 	}
1625a133d952SDave Chinner 	/*
1626a133d952SDave Chinner 	 * Count the number of extended attribute blocks
1627a133d952SDave Chinner 	 */
1628a133d952SDave Chinner 	if ( ((XFS_IFORK_Q(ip) != 0) && (ip->i_d.di_anextents > 0)) &&
1629a133d952SDave Chinner 	     (ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
1630a133d952SDave Chinner 		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &aforkblks);
1631a133d952SDave Chinner 		if (error)
1632a133d952SDave Chinner 			goto out_trans_cancel;
1633a133d952SDave Chinner 	}
1634a133d952SDave Chinner 	if ( ((XFS_IFORK_Q(tip) != 0) && (tip->i_d.di_anextents > 0)) &&
1635a133d952SDave Chinner 	     (tip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL)) {
1636a133d952SDave Chinner 		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK,
1637a133d952SDave Chinner 			&taforkblks);
1638a133d952SDave Chinner 		if (error)
1639a133d952SDave Chinner 			goto out_trans_cancel;
1640a133d952SDave Chinner 	}
1641a133d952SDave Chinner 
164221b5c978SDave Chinner 	/*
164321b5c978SDave Chinner 	 * Before we've swapped the forks, lets set the owners of the forks
164421b5c978SDave Chinner 	 * appropriately. We have to do this as we are demand paging the btree
164521b5c978SDave Chinner 	 * buffers, and so the validation done on read will expect the owner
164621b5c978SDave Chinner 	 * field to be correctly set. Once we change the owners, we can swap the
164721b5c978SDave Chinner 	 * inode forks.
164821b5c978SDave Chinner 	 *
164921b5c978SDave Chinner 	 * Note the trickiness in setting the log flags - we set the owner log
165021b5c978SDave Chinner 	 * flag on the opposite inode (i.e. the inode we are setting the new
165121b5c978SDave Chinner 	 * owner to be) because once we swap the forks and log that, log
165221b5c978SDave Chinner 	 * recovery is going to see the fork as owned by the swapped inode,
165321b5c978SDave Chinner 	 * not the pre-swapped inodes.
165421b5c978SDave Chinner 	 */
165521b5c978SDave Chinner 	src_log_flags = XFS_ILOG_CORE;
165621b5c978SDave Chinner 	target_log_flags = XFS_ILOG_CORE;
165721b5c978SDave Chinner 	if (ip->i_d.di_version == 3 &&
165821b5c978SDave Chinner 	    ip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
1659638f4416SDave Chinner 		target_log_flags |= XFS_ILOG_DOWNER;
1660638f4416SDave Chinner 		error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK,
1661638f4416SDave Chinner 					      tip->i_ino, NULL);
166221b5c978SDave Chinner 		if (error)
166321b5c978SDave Chinner 			goto out_trans_cancel;
166421b5c978SDave Chinner 	}
166521b5c978SDave Chinner 
166621b5c978SDave Chinner 	if (tip->i_d.di_version == 3 &&
166721b5c978SDave Chinner 	    tip->i_d.di_format == XFS_DINODE_FMT_BTREE) {
1668638f4416SDave Chinner 		src_log_flags |= XFS_ILOG_DOWNER;
1669638f4416SDave Chinner 		error = xfs_bmbt_change_owner(tp, tip, XFS_DATA_FORK,
1670638f4416SDave Chinner 					      ip->i_ino, NULL);
167121b5c978SDave Chinner 		if (error)
167221b5c978SDave Chinner 			goto out_trans_cancel;
167321b5c978SDave Chinner 	}
167421b5c978SDave Chinner 
1675a133d952SDave Chinner 	/*
1676a133d952SDave Chinner 	 * Swap the data forks of the inodes
1677a133d952SDave Chinner 	 */
1678a133d952SDave Chinner 	ifp = &ip->i_df;
1679a133d952SDave Chinner 	tifp = &tip->i_df;
1680a133d952SDave Chinner 	*tempifp = *ifp;	/* struct copy */
1681a133d952SDave Chinner 	*ifp = *tifp;		/* struct copy */
1682a133d952SDave Chinner 	*tifp = *tempifp;	/* struct copy */
1683a133d952SDave Chinner 
1684a133d952SDave Chinner 	/*
1685a133d952SDave Chinner 	 * Fix the on-disk inode values
1686a133d952SDave Chinner 	 */
1687a133d952SDave Chinner 	tmp = (__uint64_t)ip->i_d.di_nblocks;
1688a133d952SDave Chinner 	ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
1689a133d952SDave Chinner 	tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
1690a133d952SDave Chinner 
1691a133d952SDave Chinner 	tmp = (__uint64_t) ip->i_d.di_nextents;
1692a133d952SDave Chinner 	ip->i_d.di_nextents = tip->i_d.di_nextents;
1693a133d952SDave Chinner 	tip->i_d.di_nextents = tmp;
1694a133d952SDave Chinner 
1695a133d952SDave Chinner 	tmp = (__uint64_t) ip->i_d.di_format;
1696a133d952SDave Chinner 	ip->i_d.di_format = tip->i_d.di_format;
1697a133d952SDave Chinner 	tip->i_d.di_format = tmp;
1698a133d952SDave Chinner 
1699a133d952SDave Chinner 	/*
1700a133d952SDave Chinner 	 * The extents in the source inode could still contain speculative
1701a133d952SDave Chinner 	 * preallocation beyond EOF (e.g. the file is open but not modified
1702a133d952SDave Chinner 	 * while defrag is in progress). In that case, we need to copy over the
1703a133d952SDave Chinner 	 * number of delalloc blocks the data fork in the source inode is
1704a133d952SDave Chinner 	 * tracking beyond EOF so that when the fork is truncated away when the
1705a133d952SDave Chinner 	 * temporary inode is unlinked we don't underrun the i_delayed_blks
1706a133d952SDave Chinner 	 * counter on that inode.
1707a133d952SDave Chinner 	 */
1708a133d952SDave Chinner 	ASSERT(tip->i_delayed_blks == 0);
1709a133d952SDave Chinner 	tip->i_delayed_blks = ip->i_delayed_blks;
1710a133d952SDave Chinner 	ip->i_delayed_blks = 0;
1711a133d952SDave Chinner 
1712a133d952SDave Chinner 	switch (ip->i_d.di_format) {
1713a133d952SDave Chinner 	case XFS_DINODE_FMT_EXTENTS:
1714a133d952SDave Chinner 		/* If the extents fit in the inode, fix the
1715a133d952SDave Chinner 		 * pointer.  Otherwise it's already NULL or
1716a133d952SDave Chinner 		 * pointing to the extent.
1717a133d952SDave Chinner 		 */
1718a133d952SDave Chinner 		if (ip->i_d.di_nextents <= XFS_INLINE_EXTS) {
1719a133d952SDave Chinner 			ifp->if_u1.if_extents =
1720a133d952SDave Chinner 				ifp->if_u2.if_inline_ext;
1721a133d952SDave Chinner 		}
1722a133d952SDave Chinner 		src_log_flags |= XFS_ILOG_DEXT;
1723a133d952SDave Chinner 		break;
1724a133d952SDave Chinner 	case XFS_DINODE_FMT_BTREE:
172521b5c978SDave Chinner 		ASSERT(ip->i_d.di_version < 3 ||
1726638f4416SDave Chinner 		       (src_log_flags & XFS_ILOG_DOWNER));
1727a133d952SDave Chinner 		src_log_flags |= XFS_ILOG_DBROOT;
1728a133d952SDave Chinner 		break;
1729a133d952SDave Chinner 	}
1730a133d952SDave Chinner 
1731a133d952SDave Chinner 	switch (tip->i_d.di_format) {
1732a133d952SDave Chinner 	case XFS_DINODE_FMT_EXTENTS:
1733a133d952SDave Chinner 		/* If the extents fit in the inode, fix the
1734a133d952SDave Chinner 		 * pointer.  Otherwise it's already NULL or
1735a133d952SDave Chinner 		 * pointing to the extent.
1736a133d952SDave Chinner 		 */
1737a133d952SDave Chinner 		if (tip->i_d.di_nextents <= XFS_INLINE_EXTS) {
1738a133d952SDave Chinner 			tifp->if_u1.if_extents =
1739a133d952SDave Chinner 				tifp->if_u2.if_inline_ext;
1740a133d952SDave Chinner 		}
1741a133d952SDave Chinner 		target_log_flags |= XFS_ILOG_DEXT;
1742a133d952SDave Chinner 		break;
1743a133d952SDave Chinner 	case XFS_DINODE_FMT_BTREE:
1744a133d952SDave Chinner 		target_log_flags |= XFS_ILOG_DBROOT;
174521b5c978SDave Chinner 		ASSERT(tip->i_d.di_version < 3 ||
1746638f4416SDave Chinner 		       (target_log_flags & XFS_ILOG_DOWNER));
1747a133d952SDave Chinner 		break;
1748a133d952SDave Chinner 	}
1749a133d952SDave Chinner 
1750a133d952SDave Chinner 	xfs_trans_log_inode(tp, ip,  src_log_flags);
1751a133d952SDave Chinner 	xfs_trans_log_inode(tp, tip, target_log_flags);
1752a133d952SDave Chinner 
1753a133d952SDave Chinner 	/*
1754a133d952SDave Chinner 	 * If this is a synchronous mount, make sure that the
1755a133d952SDave Chinner 	 * transaction goes to disk before returning to the user.
1756a133d952SDave Chinner 	 */
1757a133d952SDave Chinner 	if (mp->m_flags & XFS_MOUNT_WSYNC)
1758a133d952SDave Chinner 		xfs_trans_set_sync(tp);
1759a133d952SDave Chinner 
176070393313SChristoph Hellwig 	error = xfs_trans_commit(tp);
1761a133d952SDave Chinner 
1762a133d952SDave Chinner 	trace_xfs_swap_extent_after(ip, 0);
1763a133d952SDave Chinner 	trace_xfs_swap_extent_after(tip, 1);
1764a133d952SDave Chinner out:
1765a133d952SDave Chinner 	kmem_free(tempifp);
1766a133d952SDave Chinner 	return error;
1767a133d952SDave Chinner 
1768a133d952SDave Chinner out_unlock:
176981217683SDave Chinner 	xfs_iunlock(ip, lock_flags);
177081217683SDave Chinner 	xfs_iunlock(tip, lock_flags);
1771a133d952SDave Chinner 	goto out;
1772a133d952SDave Chinner 
1773a133d952SDave Chinner out_trans_cancel:
17744906e215SChristoph Hellwig 	xfs_trans_cancel(tp);
1775723cac48SDave Chinner 	goto out;
1776a133d952SDave Chinner }
1777