xref: /linux/fs/xfs/xfs_bmap_util.c (revision 85ef08b5a667615bc7be5058259753dc42a7adcd)
10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
268988114SDave Chinner /*
368988114SDave Chinner  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4c24b5dfaSDave Chinner  * Copyright (c) 2012 Red Hat, Inc.
568988114SDave Chinner  * All Rights Reserved.
668988114SDave Chinner  */
768988114SDave Chinner #include "xfs.h"
868988114SDave Chinner #include "xfs_fs.h"
970a9883cSDave Chinner #include "xfs_shared.h"
10239880efSDave Chinner #include "xfs_format.h"
11239880efSDave Chinner #include "xfs_log_format.h"
12239880efSDave Chinner #include "xfs_trans_resv.h"
1368988114SDave Chinner #include "xfs_bit.h"
1468988114SDave Chinner #include "xfs_mount.h"
153ab78df2SDarrick J. Wong #include "xfs_defer.h"
1668988114SDave Chinner #include "xfs_inode.h"
1768988114SDave Chinner #include "xfs_btree.h"
18239880efSDave Chinner #include "xfs_trans.h"
1968988114SDave Chinner #include "xfs_alloc.h"
2068988114SDave Chinner #include "xfs_bmap.h"
2168988114SDave Chinner #include "xfs_bmap_util.h"
22a4fbe6abSDave Chinner #include "xfs_bmap_btree.h"
2368988114SDave Chinner #include "xfs_rtalloc.h"
2468988114SDave Chinner #include "xfs_error.h"
2568988114SDave Chinner #include "xfs_quota.h"
2668988114SDave Chinner #include "xfs_trans_space.h"
2768988114SDave Chinner #include "xfs_trace.h"
28c24b5dfaSDave Chinner #include "xfs_icache.h"
29f86f4037SDarrick J. Wong #include "xfs_iomap.h"
30f86f4037SDarrick J. Wong #include "xfs_reflink.h"
3168988114SDave Chinner 
3268988114SDave Chinner /* Kernel only BMAP related definitions and functions */
3368988114SDave Chinner 
3468988114SDave Chinner /*
3568988114SDave Chinner  * Convert the given file system block to a disk block.  We have to treat it
3668988114SDave Chinner  * differently based on whether the file is a real time file or not, because the
3768988114SDave Chinner  * bmap code does.
3868988114SDave Chinner  */
3968988114SDave Chinner xfs_daddr_t
4068988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
4168988114SDave Chinner {
42ecfc28a4SChristoph Hellwig 	if (XFS_IS_REALTIME_INODE(ip))
43ecfc28a4SChristoph Hellwig 		return XFS_FSB_TO_BB(ip->i_mount, fsb);
44ecfc28a4SChristoph Hellwig 	return XFS_FSB_TO_DADDR(ip->i_mount, fsb);
4568988114SDave Chinner }
4668988114SDave Chinner 
4768988114SDave Chinner /*
483fbbbea3SDave Chinner  * Routine to zero an extent on disk allocated to the specific inode.
493fbbbea3SDave Chinner  *
503fbbbea3SDave Chinner  * The VFS functions take a linearised filesystem block offset, so we have to
513fbbbea3SDave Chinner  * convert the sparse xfs fsb to the right format first.
523fbbbea3SDave Chinner  * VFS types are real funky, too.
533fbbbea3SDave Chinner  */
543fbbbea3SDave Chinner int
553fbbbea3SDave Chinner xfs_zero_extent(
563fbbbea3SDave Chinner 	struct xfs_inode	*ip,
573fbbbea3SDave Chinner 	xfs_fsblock_t		start_fsb,
583fbbbea3SDave Chinner 	xfs_off_t		count_fsb)
593fbbbea3SDave Chinner {
603fbbbea3SDave Chinner 	struct xfs_mount	*mp = ip->i_mount;
6130fa529eSChristoph Hellwig 	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
623fbbbea3SDave Chinner 	xfs_daddr_t		sector = xfs_fsb_to_db(ip, start_fsb);
633fbbbea3SDave Chinner 	sector_t		block = XFS_BB_TO_FSBT(mp, sector);
643fbbbea3SDave Chinner 
6530fa529eSChristoph Hellwig 	return blkdev_issue_zeroout(target->bt_bdev,
663dc29161SMatthew Wilcox 		block << (mp->m_super->s_blocksize_bits - 9),
673dc29161SMatthew Wilcox 		count_fsb << (mp->m_super->s_blocksize_bits - 9),
68ee472d83SChristoph Hellwig 		GFP_NOFS, 0);
693fbbbea3SDave Chinner }
703fbbbea3SDave Chinner 
71bb9c2e54SDave Chinner #ifdef CONFIG_XFS_RT
7268988114SDave Chinner int
7368988114SDave Chinner xfs_bmap_rtalloc(
7468988114SDave Chinner 	struct xfs_bmalloca	*ap)	/* bmap alloc argument struct */
7568988114SDave Chinner {
7668988114SDave Chinner 	int		error;		/* error return value */
7768988114SDave Chinner 	xfs_mount_t	*mp;		/* mount point structure */
7868988114SDave Chinner 	xfs_extlen_t	prod = 0;	/* product factor for allocators */
790703a8e1SDave Chinner 	xfs_extlen_t	mod = 0;	/* product factor for allocators */
8068988114SDave Chinner 	xfs_extlen_t	ralen = 0;	/* realtime allocation length */
8168988114SDave Chinner 	xfs_extlen_t	align;		/* minimum allocation alignment */
8268988114SDave Chinner 	xfs_rtblock_t	rtb;
8368988114SDave Chinner 
8468988114SDave Chinner 	mp = ap->ip->i_mount;
8568988114SDave Chinner 	align = xfs_get_extsz_hint(ap->ip);
8668988114SDave Chinner 	prod = align / mp->m_sb.sb_rextsize;
8768988114SDave Chinner 	error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
8868988114SDave Chinner 					align, 1, ap->eof, 0,
8968988114SDave Chinner 					ap->conv, &ap->offset, &ap->length);
9068988114SDave Chinner 	if (error)
9168988114SDave Chinner 		return error;
9268988114SDave Chinner 	ASSERT(ap->length);
9368988114SDave Chinner 	ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
9468988114SDave Chinner 
9568988114SDave Chinner 	/*
9668988114SDave Chinner 	 * If the offset & length are not perfectly aligned
9768988114SDave Chinner 	 * then kill prod, it will just get us in trouble.
9868988114SDave Chinner 	 */
990703a8e1SDave Chinner 	div_u64_rem(ap->offset, align, &mod);
1000703a8e1SDave Chinner 	if (mod || ap->length % align)
10168988114SDave Chinner 		prod = 1;
10268988114SDave Chinner 	/*
10368988114SDave Chinner 	 * Set ralen to be the actual requested length in rtextents.
10468988114SDave Chinner 	 */
10568988114SDave Chinner 	ralen = ap->length / mp->m_sb.sb_rextsize;
10668988114SDave Chinner 	/*
10768988114SDave Chinner 	 * If the old value was close enough to MAXEXTLEN that
10868988114SDave Chinner 	 * we rounded up to it, cut it back so it's valid again.
10968988114SDave Chinner 	 * Note that if it's a really large request (bigger than
11068988114SDave Chinner 	 * MAXEXTLEN), we don't hear about that number, and can't
11168988114SDave Chinner 	 * adjust the starting point to match it.
11268988114SDave Chinner 	 */
11368988114SDave Chinner 	if (ralen * mp->m_sb.sb_rextsize >= MAXEXTLEN)
11468988114SDave Chinner 		ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
11568988114SDave Chinner 
11668988114SDave Chinner 	/*
1174b680afbSDave Chinner 	 * Lock out modifications to both the RT bitmap and summary inodes
11868988114SDave Chinner 	 */
119f4a0660dSDarrick J. Wong 	xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
12068988114SDave Chinner 	xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
121f4a0660dSDarrick J. Wong 	xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
1224b680afbSDave Chinner 	xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
12368988114SDave Chinner 
12468988114SDave Chinner 	/*
12568988114SDave Chinner 	 * If it's an allocation to an empty file at offset 0,
12668988114SDave Chinner 	 * pick an extent that will space things out in the rt area.
12768988114SDave Chinner 	 */
12868988114SDave Chinner 	if (ap->eof && ap->offset == 0) {
1293f649ab7SKees Cook 		xfs_rtblock_t rtx; /* realtime extent no */
13068988114SDave Chinner 
13168988114SDave Chinner 		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
13268988114SDave Chinner 		if (error)
13368988114SDave Chinner 			return error;
13468988114SDave Chinner 		ap->blkno = rtx * mp->m_sb.sb_rextsize;
13568988114SDave Chinner 	} else {
13668988114SDave Chinner 		ap->blkno = 0;
13768988114SDave Chinner 	}
13868988114SDave Chinner 
13968988114SDave Chinner 	xfs_bmap_adjacent(ap);
14068988114SDave Chinner 
14168988114SDave Chinner 	/*
14268988114SDave Chinner 	 * Realtime allocation, done through xfs_rtallocate_extent.
14368988114SDave Chinner 	 */
14468988114SDave Chinner 	do_div(ap->blkno, mp->m_sb.sb_rextsize);
14568988114SDave Chinner 	rtb = ap->blkno;
14668988114SDave Chinner 	ap->length = ralen;
147089ec2f8SChristoph Hellwig 	error = xfs_rtallocate_extent(ap->tp, ap->blkno, 1, ap->length,
148089ec2f8SChristoph Hellwig 				&ralen, ap->wasdel, prod, &rtb);
149089ec2f8SChristoph Hellwig 	if (error)
15068988114SDave Chinner 		return error;
151089ec2f8SChristoph Hellwig 
15268988114SDave Chinner 	ap->blkno = rtb;
15368988114SDave Chinner 	if (ap->blkno != NULLFSBLOCK) {
15468988114SDave Chinner 		ap->blkno *= mp->m_sb.sb_rextsize;
15568988114SDave Chinner 		ralen *= mp->m_sb.sb_rextsize;
15668988114SDave Chinner 		ap->length = ralen;
15768988114SDave Chinner 		ap->ip->i_d.di_nblocks += ralen;
15868988114SDave Chinner 		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
15968988114SDave Chinner 		if (ap->wasdel)
16068988114SDave Chinner 			ap->ip->i_delayed_blks -= ralen;
16168988114SDave Chinner 		/*
16268988114SDave Chinner 		 * Adjust the disk quota also. This was reserved
16368988114SDave Chinner 		 * earlier.
16468988114SDave Chinner 		 */
16568988114SDave Chinner 		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
16668988114SDave Chinner 			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
16768988114SDave Chinner 					XFS_TRANS_DQ_RTBCOUNT, (long) ralen);
16868988114SDave Chinner 	} else {
16968988114SDave Chinner 		ap->length = 0;
17068988114SDave Chinner 	}
17168988114SDave Chinner 	return 0;
17268988114SDave Chinner }
173bb9c2e54SDave Chinner #endif /* CONFIG_XFS_RT */
17468988114SDave Chinner 
17568988114SDave Chinner /*
17668988114SDave Chinner  * Extent tree block counting routines.
17768988114SDave Chinner  */
17868988114SDave Chinner 
17968988114SDave Chinner /*
180d29cb3e4SDarrick J. Wong  * Count leaf blocks given a range of extent records.  Delayed allocation
181d29cb3e4SDarrick J. Wong  * extents are not counted towards the totals.
18268988114SDave Chinner  */
183e17a5c6fSChristoph Hellwig xfs_extnum_t
18468988114SDave Chinner xfs_bmap_count_leaves(
185d29cb3e4SDarrick J. Wong 	struct xfs_ifork	*ifp,
186e7f5d5caSDarrick J. Wong 	xfs_filblks_t		*count)
18768988114SDave Chinner {
188b2b1712aSChristoph Hellwig 	struct xfs_iext_cursor	icur;
189e17a5c6fSChristoph Hellwig 	struct xfs_bmbt_irec	got;
190b2b1712aSChristoph Hellwig 	xfs_extnum_t		numrecs = 0;
19168988114SDave Chinner 
192b2b1712aSChristoph Hellwig 	for_each_xfs_iext(ifp, &icur, &got) {
193e17a5c6fSChristoph Hellwig 		if (!isnullstartblock(got.br_startblock)) {
194e17a5c6fSChristoph Hellwig 			*count += got.br_blockcount;
195e17a5c6fSChristoph Hellwig 			numrecs++;
19668988114SDave Chinner 		}
19768988114SDave Chinner 	}
198b2b1712aSChristoph Hellwig 
199e17a5c6fSChristoph Hellwig 	return numrecs;
200d29cb3e4SDarrick J. Wong }
20168988114SDave Chinner 
20268988114SDave Chinner /*
203d29cb3e4SDarrick J. Wong  * Count fsblocks of the given fork.  Delayed allocation extents are
204d29cb3e4SDarrick J. Wong  * not counted towards the totals.
20568988114SDave Chinner  */
206e7f5d5caSDarrick J. Wong int
20768988114SDave Chinner xfs_bmap_count_blocks(
208e7f5d5caSDarrick J. Wong 	struct xfs_trans	*tp,
209e7f5d5caSDarrick J. Wong 	struct xfs_inode	*ip,
210e7f5d5caSDarrick J. Wong 	int			whichfork,
211e7f5d5caSDarrick J. Wong 	xfs_extnum_t		*nextents,
212e7f5d5caSDarrick J. Wong 	xfs_filblks_t		*count)
21368988114SDave Chinner {
214fec40e22SDarrick J. Wong 	struct xfs_mount	*mp = ip->i_mount;
215fec40e22SDarrick J. Wong 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
216fec40e22SDarrick J. Wong 	struct xfs_btree_cur	*cur;
217fec40e22SDarrick J. Wong 	xfs_extlen_t		btblocks = 0;
218e7f5d5caSDarrick J. Wong 	int			error;
21968988114SDave Chinner 
220e7f5d5caSDarrick J. Wong 	*nextents = 0;
221e7f5d5caSDarrick J. Wong 	*count = 0;
222fec40e22SDarrick J. Wong 
223e7f5d5caSDarrick J. Wong 	if (!ifp)
22468988114SDave Chinner 		return 0;
225e7f5d5caSDarrick J. Wong 
226f7e67b20SChristoph Hellwig 	switch (ifp->if_format) {
227e7f5d5caSDarrick J. Wong 	case XFS_DINODE_FMT_BTREE:
228e7f5d5caSDarrick J. Wong 		if (!(ifp->if_flags & XFS_IFEXTENTS)) {
229e7f5d5caSDarrick J. Wong 			error = xfs_iread_extents(tp, ip, whichfork);
230e7f5d5caSDarrick J. Wong 			if (error)
231e7f5d5caSDarrick J. Wong 				return error;
23268988114SDave Chinner 		}
23368988114SDave Chinner 
234fec40e22SDarrick J. Wong 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
235fec40e22SDarrick J. Wong 		error = xfs_btree_count_blocks(cur, &btblocks);
236fec40e22SDarrick J. Wong 		xfs_btree_del_cursor(cur, error);
237fec40e22SDarrick J. Wong 		if (error)
238fec40e22SDarrick J. Wong 			return error;
23968988114SDave Chinner 
240fec40e22SDarrick J. Wong 		/*
241fec40e22SDarrick J. Wong 		 * xfs_btree_count_blocks includes the root block contained in
242fec40e22SDarrick J. Wong 		 * the inode fork in @btblocks, so subtract one because we're
243fec40e22SDarrick J. Wong 		 * only interested in allocated disk blocks.
244fec40e22SDarrick J. Wong 		 */
245fec40e22SDarrick J. Wong 		*count += btblocks - 1;
246fec40e22SDarrick J. Wong 
247fec40e22SDarrick J. Wong 		/* fall through */
248fec40e22SDarrick J. Wong 	case XFS_DINODE_FMT_EXTENTS:
249fec40e22SDarrick J. Wong 		*nextents = xfs_bmap_count_leaves(ifp, count);
250fec40e22SDarrick J. Wong 		break;
251e7f5d5caSDarrick J. Wong 	}
25268988114SDave Chinner 
25368988114SDave Chinner 	return 0;
25468988114SDave Chinner }
25568988114SDave Chinner 
256abbf9e8aSChristoph Hellwig static int
257abbf9e8aSChristoph Hellwig xfs_getbmap_report_one(
258f86f4037SDarrick J. Wong 	struct xfs_inode	*ip,
259abbf9e8aSChristoph Hellwig 	struct getbmapx		*bmv,
260232b5194SChristoph Hellwig 	struct kgetbmap		*out,
261abbf9e8aSChristoph Hellwig 	int64_t			bmv_end,
262abbf9e8aSChristoph Hellwig 	struct xfs_bmbt_irec	*got)
263f86f4037SDarrick J. Wong {
264232b5194SChristoph Hellwig 	struct kgetbmap		*p = out + bmv->bmv_entries;
265d392bc81SChristoph Hellwig 	bool			shared = false;
266f86f4037SDarrick J. Wong 	int			error;
267f86f4037SDarrick J. Wong 
268d392bc81SChristoph Hellwig 	error = xfs_reflink_trim_around_shared(ip, got, &shared);
269f86f4037SDarrick J. Wong 	if (error)
270f86f4037SDarrick J. Wong 		return error;
271f86f4037SDarrick J. Wong 
272abbf9e8aSChristoph Hellwig 	if (isnullstartblock(got->br_startblock) ||
273abbf9e8aSChristoph Hellwig 	    got->br_startblock == DELAYSTARTBLOCK) {
274f86f4037SDarrick J. Wong 		/*
275abbf9e8aSChristoph Hellwig 		 * Delalloc extents that start beyond EOF can occur due to
276abbf9e8aSChristoph Hellwig 		 * speculative EOF allocation when the delalloc extent is larger
277abbf9e8aSChristoph Hellwig 		 * than the largest freespace extent at conversion time.  These
278abbf9e8aSChristoph Hellwig 		 * extents cannot be converted by data writeback, so can exist
279abbf9e8aSChristoph Hellwig 		 * here even if we are not supposed to be finding delalloc
280abbf9e8aSChristoph Hellwig 		 * extents.
281f86f4037SDarrick J. Wong 		 */
282abbf9e8aSChristoph Hellwig 		if (got->br_startoff < XFS_B_TO_FSB(ip->i_mount, XFS_ISIZE(ip)))
283abbf9e8aSChristoph Hellwig 			ASSERT((bmv->bmv_iflags & BMV_IF_DELALLOC) != 0);
284abbf9e8aSChristoph Hellwig 
285abbf9e8aSChristoph Hellwig 		p->bmv_oflags |= BMV_OF_DELALLOC;
286abbf9e8aSChristoph Hellwig 		p->bmv_block = -2;
287f86f4037SDarrick J. Wong 	} else {
288abbf9e8aSChristoph Hellwig 		p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
289f86f4037SDarrick J. Wong 	}
290f86f4037SDarrick J. Wong 
291abbf9e8aSChristoph Hellwig 	if (got->br_state == XFS_EXT_UNWRITTEN &&
292abbf9e8aSChristoph Hellwig 	    (bmv->bmv_iflags & BMV_IF_PREALLOC))
293abbf9e8aSChristoph Hellwig 		p->bmv_oflags |= BMV_OF_PREALLOC;
294abbf9e8aSChristoph Hellwig 
295abbf9e8aSChristoph Hellwig 	if (shared)
296abbf9e8aSChristoph Hellwig 		p->bmv_oflags |= BMV_OF_SHARED;
297abbf9e8aSChristoph Hellwig 
298abbf9e8aSChristoph Hellwig 	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
299abbf9e8aSChristoph Hellwig 	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
300abbf9e8aSChristoph Hellwig 
301abbf9e8aSChristoph Hellwig 	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
302abbf9e8aSChristoph Hellwig 	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
303abbf9e8aSChristoph Hellwig 	bmv->bmv_entries++;
304f86f4037SDarrick J. Wong 	return 0;
305f86f4037SDarrick J. Wong }
306f86f4037SDarrick J. Wong 
307abbf9e8aSChristoph Hellwig static void
308abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(
309abbf9e8aSChristoph Hellwig 	struct xfs_inode	*ip,
310abbf9e8aSChristoph Hellwig 	struct getbmapx		*bmv,
311232b5194SChristoph Hellwig 	struct kgetbmap		*out,
312abbf9e8aSChristoph Hellwig 	int64_t			bmv_end,
313abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		bno,
314abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		end)
315abbf9e8aSChristoph Hellwig {
316232b5194SChristoph Hellwig 	struct kgetbmap		*p = out + bmv->bmv_entries;
317abbf9e8aSChristoph Hellwig 
318abbf9e8aSChristoph Hellwig 	if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
319abbf9e8aSChristoph Hellwig 		return;
320abbf9e8aSChristoph Hellwig 
321abbf9e8aSChristoph Hellwig 	p->bmv_block = -1;
322abbf9e8aSChristoph Hellwig 	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
323abbf9e8aSChristoph Hellwig 	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
324abbf9e8aSChristoph Hellwig 
325abbf9e8aSChristoph Hellwig 	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
326abbf9e8aSChristoph Hellwig 	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
327abbf9e8aSChristoph Hellwig 	bmv->bmv_entries++;
328abbf9e8aSChristoph Hellwig }
329abbf9e8aSChristoph Hellwig 
330abbf9e8aSChristoph Hellwig static inline bool
331abbf9e8aSChristoph Hellwig xfs_getbmap_full(
332abbf9e8aSChristoph Hellwig 	struct getbmapx		*bmv)
333abbf9e8aSChristoph Hellwig {
334abbf9e8aSChristoph Hellwig 	return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
335abbf9e8aSChristoph Hellwig }
336abbf9e8aSChristoph Hellwig 
337abbf9e8aSChristoph Hellwig static bool
338abbf9e8aSChristoph Hellwig xfs_getbmap_next_rec(
339abbf9e8aSChristoph Hellwig 	struct xfs_bmbt_irec	*rec,
340abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		total_end)
341abbf9e8aSChristoph Hellwig {
342abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		end = rec->br_startoff + rec->br_blockcount;
343abbf9e8aSChristoph Hellwig 
344abbf9e8aSChristoph Hellwig 	if (end == total_end)
345abbf9e8aSChristoph Hellwig 		return false;
346abbf9e8aSChristoph Hellwig 
347abbf9e8aSChristoph Hellwig 	rec->br_startoff += rec->br_blockcount;
348abbf9e8aSChristoph Hellwig 	if (!isnullstartblock(rec->br_startblock) &&
349abbf9e8aSChristoph Hellwig 	    rec->br_startblock != DELAYSTARTBLOCK)
350abbf9e8aSChristoph Hellwig 		rec->br_startblock += rec->br_blockcount;
351abbf9e8aSChristoph Hellwig 	rec->br_blockcount = total_end - end;
352abbf9e8aSChristoph Hellwig 	return true;
353abbf9e8aSChristoph Hellwig }
354abbf9e8aSChristoph Hellwig 
35568988114SDave Chinner /*
35668988114SDave Chinner  * Get inode's extents as described in bmv, and format for output.
35768988114SDave Chinner  * Calls formatter to fill the user's buffer until all extents
35868988114SDave Chinner  * are mapped, until the passed-in bmv->bmv_count slots have
35968988114SDave Chinner  * been filled, or until the formatter short-circuits the loop,
36068988114SDave Chinner  * if it is tracking filled-in extents on its own.
36168988114SDave Chinner  */
36268988114SDave Chinner int						/* error code */
36368988114SDave Chinner xfs_getbmap(
364232b5194SChristoph Hellwig 	struct xfs_inode	*ip,
36568988114SDave Chinner 	struct getbmapx		*bmv,		/* user bmap structure */
366232b5194SChristoph Hellwig 	struct kgetbmap		*out)
36768988114SDave Chinner {
368abbf9e8aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
369abbf9e8aSChristoph Hellwig 	int			iflags = bmv->bmv_iflags;
370232b5194SChristoph Hellwig 	int			whichfork, lock, error = 0;
371abbf9e8aSChristoph Hellwig 	int64_t			bmv_end, max_len;
372abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		bno, first_bno;
373abbf9e8aSChristoph Hellwig 	struct xfs_ifork	*ifp;
374abbf9e8aSChristoph Hellwig 	struct xfs_bmbt_irec	got, rec;
375abbf9e8aSChristoph Hellwig 	xfs_filblks_t		len;
376b2b1712aSChristoph Hellwig 	struct xfs_iext_cursor	icur;
37768988114SDave Chinner 
378232b5194SChristoph Hellwig 	if (bmv->bmv_iflags & ~BMV_IF_VALID)
379232b5194SChristoph Hellwig 		return -EINVAL;
380f86f4037SDarrick J. Wong #ifndef DEBUG
381f86f4037SDarrick J. Wong 	/* Only allow CoW fork queries if we're debugging. */
382f86f4037SDarrick J. Wong 	if (iflags & BMV_IF_COWFORK)
383f86f4037SDarrick J. Wong 		return -EINVAL;
384f86f4037SDarrick J. Wong #endif
385f86f4037SDarrick J. Wong 	if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
386f86f4037SDarrick J. Wong 		return -EINVAL;
387f86f4037SDarrick J. Wong 
388abbf9e8aSChristoph Hellwig 	if (bmv->bmv_length < -1)
389abbf9e8aSChristoph Hellwig 		return -EINVAL;
390abbf9e8aSChristoph Hellwig 	bmv->bmv_entries = 0;
391abbf9e8aSChristoph Hellwig 	if (bmv->bmv_length == 0)
392abbf9e8aSChristoph Hellwig 		return 0;
393abbf9e8aSChristoph Hellwig 
394f86f4037SDarrick J. Wong 	if (iflags & BMV_IF_ATTRFORK)
395f86f4037SDarrick J. Wong 		whichfork = XFS_ATTR_FORK;
396f86f4037SDarrick J. Wong 	else if (iflags & BMV_IF_COWFORK)
397f86f4037SDarrick J. Wong 		whichfork = XFS_COW_FORK;
398f86f4037SDarrick J. Wong 	else
399f86f4037SDarrick J. Wong 		whichfork = XFS_DATA_FORK;
400abbf9e8aSChristoph Hellwig 	ifp = XFS_IFORK_PTR(ip, whichfork);
40168988114SDave Chinner 
40268988114SDave Chinner 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
403f86f4037SDarrick J. Wong 	switch (whichfork) {
404abbf9e8aSChristoph Hellwig 	case XFS_ATTR_FORK:
405abbf9e8aSChristoph Hellwig 		if (!XFS_IFORK_Q(ip))
406abbf9e8aSChristoph Hellwig 			goto out_unlock_iolock;
407abbf9e8aSChristoph Hellwig 
408abbf9e8aSChristoph Hellwig 		max_len = 1LL << 32;
409abbf9e8aSChristoph Hellwig 		lock = xfs_ilock_attr_map_shared(ip);
410abbf9e8aSChristoph Hellwig 		break;
411abbf9e8aSChristoph Hellwig 	case XFS_COW_FORK:
412abbf9e8aSChristoph Hellwig 		/* No CoW fork? Just return */
413abbf9e8aSChristoph Hellwig 		if (!ifp)
414abbf9e8aSChristoph Hellwig 			goto out_unlock_iolock;
415abbf9e8aSChristoph Hellwig 
416abbf9e8aSChristoph Hellwig 		if (xfs_get_cowextsz_hint(ip))
417abbf9e8aSChristoph Hellwig 			max_len = mp->m_super->s_maxbytes;
418abbf9e8aSChristoph Hellwig 		else
419abbf9e8aSChristoph Hellwig 			max_len = XFS_ISIZE(ip);
420abbf9e8aSChristoph Hellwig 
421abbf9e8aSChristoph Hellwig 		lock = XFS_ILOCK_SHARED;
422abbf9e8aSChristoph Hellwig 		xfs_ilock(ip, lock);
423abbf9e8aSChristoph Hellwig 		break;
424f86f4037SDarrick J. Wong 	case XFS_DATA_FORK:
425efa70be1SChristoph Hellwig 		if (!(iflags & BMV_IF_DELALLOC) &&
426efa70be1SChristoph Hellwig 		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_d.di_size)) {
4272451337dSDave Chinner 			error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
42868988114SDave Chinner 			if (error)
42968988114SDave Chinner 				goto out_unlock_iolock;
430efa70be1SChristoph Hellwig 
43168988114SDave Chinner 			/*
432efa70be1SChristoph Hellwig 			 * Even after flushing the inode, there can still be
433efa70be1SChristoph Hellwig 			 * delalloc blocks on the inode beyond EOF due to
434efa70be1SChristoph Hellwig 			 * speculative preallocation.  These are not removed
435efa70be1SChristoph Hellwig 			 * until the release function is called or the inode
436efa70be1SChristoph Hellwig 			 * is inactivated.  Hence we cannot assert here that
437efa70be1SChristoph Hellwig 			 * ip->i_delayed_blks == 0.
43868988114SDave Chinner 			 */
43968988114SDave Chinner 		}
44068988114SDave Chinner 
441abbf9e8aSChristoph Hellwig 		if (xfs_get_extsz_hint(ip) ||
442abbf9e8aSChristoph Hellwig 		    (ip->i_d.di_flags &
443abbf9e8aSChristoph Hellwig 		     (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
444abbf9e8aSChristoph Hellwig 			max_len = mp->m_super->s_maxbytes;
445abbf9e8aSChristoph Hellwig 		else
446abbf9e8aSChristoph Hellwig 			max_len = XFS_ISIZE(ip);
447abbf9e8aSChristoph Hellwig 
448309ecac8SChristoph Hellwig 		lock = xfs_ilock_data_map_shared(ip);
449f86f4037SDarrick J. Wong 		break;
450efa70be1SChristoph Hellwig 	}
45168988114SDave Chinner 
452f7e67b20SChristoph Hellwig 	switch (ifp->if_format) {
453abbf9e8aSChristoph Hellwig 	case XFS_DINODE_FMT_EXTENTS:
454abbf9e8aSChristoph Hellwig 	case XFS_DINODE_FMT_BTREE:
455abbf9e8aSChristoph Hellwig 		break;
456abbf9e8aSChristoph Hellwig 	case XFS_DINODE_FMT_LOCAL:
457abbf9e8aSChristoph Hellwig 		/* Local format inode forks report no extents. */
45868988114SDave Chinner 		goto out_unlock_ilock;
459abbf9e8aSChristoph Hellwig 	default:
460abbf9e8aSChristoph Hellwig 		error = -EINVAL;
461abbf9e8aSChristoph Hellwig 		goto out_unlock_ilock;
46268988114SDave Chinner 	}
46368988114SDave Chinner 
464abbf9e8aSChristoph Hellwig 	if (bmv->bmv_length == -1) {
465abbf9e8aSChristoph Hellwig 		max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
466abbf9e8aSChristoph Hellwig 		bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
467abbf9e8aSChristoph Hellwig 	}
468abbf9e8aSChristoph Hellwig 
469abbf9e8aSChristoph Hellwig 	bmv_end = bmv->bmv_offset + bmv->bmv_length;
470abbf9e8aSChristoph Hellwig 
471abbf9e8aSChristoph Hellwig 	first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
472abbf9e8aSChristoph Hellwig 	len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
473abbf9e8aSChristoph Hellwig 
474abbf9e8aSChristoph Hellwig 	if (!(ifp->if_flags & XFS_IFEXTENTS)) {
475abbf9e8aSChristoph Hellwig 		error = xfs_iread_extents(NULL, ip, whichfork);
476abbf9e8aSChristoph Hellwig 		if (error)
477abbf9e8aSChristoph Hellwig 			goto out_unlock_ilock;
478abbf9e8aSChristoph Hellwig 	}
479abbf9e8aSChristoph Hellwig 
480b2b1712aSChristoph Hellwig 	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
481abbf9e8aSChristoph Hellwig 		/*
482abbf9e8aSChristoph Hellwig 		 * Report a whole-file hole if the delalloc flag is set to
483abbf9e8aSChristoph Hellwig 		 * stay compatible with the old implementation.
484abbf9e8aSChristoph Hellwig 		 */
485abbf9e8aSChristoph Hellwig 		if (iflags & BMV_IF_DELALLOC)
486abbf9e8aSChristoph Hellwig 			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
487abbf9e8aSChristoph Hellwig 					XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
488abbf9e8aSChristoph Hellwig 		goto out_unlock_ilock;
489abbf9e8aSChristoph Hellwig 	}
490abbf9e8aSChristoph Hellwig 
491abbf9e8aSChristoph Hellwig 	while (!xfs_getbmap_full(bmv)) {
492abbf9e8aSChristoph Hellwig 		xfs_trim_extent(&got, first_bno, len);
493abbf9e8aSChristoph Hellwig 
494abbf9e8aSChristoph Hellwig 		/*
495abbf9e8aSChristoph Hellwig 		 * Report an entry for a hole if this extent doesn't directly
496abbf9e8aSChristoph Hellwig 		 * follow the previous one.
497abbf9e8aSChristoph Hellwig 		 */
498abbf9e8aSChristoph Hellwig 		if (got.br_startoff > bno) {
499abbf9e8aSChristoph Hellwig 			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
500abbf9e8aSChristoph Hellwig 					got.br_startoff);
501abbf9e8aSChristoph Hellwig 			if (xfs_getbmap_full(bmv))
502abbf9e8aSChristoph Hellwig 				break;
503abbf9e8aSChristoph Hellwig 		}
504abbf9e8aSChristoph Hellwig 
505abbf9e8aSChristoph Hellwig 		/*
506abbf9e8aSChristoph Hellwig 		 * In order to report shared extents accurately, we report each
507abbf9e8aSChristoph Hellwig 		 * distinct shared / unshared part of a single bmbt record with
508abbf9e8aSChristoph Hellwig 		 * an individual getbmapx record.
509abbf9e8aSChristoph Hellwig 		 */
510abbf9e8aSChristoph Hellwig 		bno = got.br_startoff + got.br_blockcount;
511abbf9e8aSChristoph Hellwig 		rec = got;
51268988114SDave Chinner 		do {
513abbf9e8aSChristoph Hellwig 			error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
514abbf9e8aSChristoph Hellwig 					&rec);
515abbf9e8aSChristoph Hellwig 			if (error || xfs_getbmap_full(bmv))
516abbf9e8aSChristoph Hellwig 				goto out_unlock_ilock;
517abbf9e8aSChristoph Hellwig 		} while (xfs_getbmap_next_rec(&rec, bno));
51868988114SDave Chinner 
519b2b1712aSChristoph Hellwig 		if (!xfs_iext_next_extent(ifp, &icur, &got)) {
520abbf9e8aSChristoph Hellwig 			xfs_fileoff_t	end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
52168988114SDave Chinner 
522abbf9e8aSChristoph Hellwig 			out[bmv->bmv_entries - 1].bmv_oflags |= BMV_OF_LAST;
52368988114SDave Chinner 
524abbf9e8aSChristoph Hellwig 			if (whichfork != XFS_ATTR_FORK && bno < end &&
525abbf9e8aSChristoph Hellwig 			    !xfs_getbmap_full(bmv)) {
526abbf9e8aSChristoph Hellwig 				xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
527abbf9e8aSChristoph Hellwig 						bno, end);
528abbf9e8aSChristoph Hellwig 			}
529abbf9e8aSChristoph Hellwig 			break;
53068988114SDave Chinner 		}
53168988114SDave Chinner 
532abbf9e8aSChristoph Hellwig 		if (bno >= first_bno + len)
533abbf9e8aSChristoph Hellwig 			break;
53468988114SDave Chinner 	}
53568988114SDave Chinner 
53668988114SDave Chinner out_unlock_ilock:
53701f4f327SChristoph Hellwig 	xfs_iunlock(ip, lock);
53868988114SDave Chinner out_unlock_iolock:
53968988114SDave Chinner 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
54068988114SDave Chinner 	return error;
54168988114SDave Chinner }
54268988114SDave Chinner 
54368988114SDave Chinner /*
544e2ac8363SChristoph Hellwig  * Dead simple method of punching delalyed allocation blocks from a range in
545e2ac8363SChristoph Hellwig  * the inode.  This will always punch out both the start and end blocks, even
546e2ac8363SChristoph Hellwig  * if the ranges only partially overlap them, so it is up to the caller to
547e2ac8363SChristoph Hellwig  * ensure that partial blocks are not passed in.
54868988114SDave Chinner  */
54968988114SDave Chinner int
55068988114SDave Chinner xfs_bmap_punch_delalloc_range(
55168988114SDave Chinner 	struct xfs_inode	*ip,
55268988114SDave Chinner 	xfs_fileoff_t		start_fsb,
55368988114SDave Chinner 	xfs_fileoff_t		length)
55468988114SDave Chinner {
555e2ac8363SChristoph Hellwig 	struct xfs_ifork	*ifp = &ip->i_df;
556e2ac8363SChristoph Hellwig 	xfs_fileoff_t		end_fsb = start_fsb + length;
557e2ac8363SChristoph Hellwig 	struct xfs_bmbt_irec	got, del;
558e2ac8363SChristoph Hellwig 	struct xfs_iext_cursor	icur;
55968988114SDave Chinner 	int			error = 0;
56068988114SDave Chinner 
5610065b541SChristoph Hellwig 	ASSERT(ifp->if_flags & XFS_IFEXTENTS);
56268988114SDave Chinner 
5630065b541SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
564e2ac8363SChristoph Hellwig 	if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
565d4380177SChristoph Hellwig 		goto out_unlock;
566e2ac8363SChristoph Hellwig 
567e2ac8363SChristoph Hellwig 	while (got.br_startoff + got.br_blockcount > start_fsb) {
568e2ac8363SChristoph Hellwig 		del = got;
569e2ac8363SChristoph Hellwig 		xfs_trim_extent(&del, start_fsb, length);
570e2ac8363SChristoph Hellwig 
571e2ac8363SChristoph Hellwig 		/*
572e2ac8363SChristoph Hellwig 		 * A delete can push the cursor forward. Step back to the
573e2ac8363SChristoph Hellwig 		 * previous extent on non-delalloc or extents outside the
574e2ac8363SChristoph Hellwig 		 * target range.
575e2ac8363SChristoph Hellwig 		 */
576e2ac8363SChristoph Hellwig 		if (!del.br_blockcount ||
577e2ac8363SChristoph Hellwig 		    !isnullstartblock(del.br_startblock)) {
578e2ac8363SChristoph Hellwig 			if (!xfs_iext_prev_extent(ifp, &icur, &got))
579e2ac8363SChristoph Hellwig 				break;
580e2ac8363SChristoph Hellwig 			continue;
581e2ac8363SChristoph Hellwig 		}
582e2ac8363SChristoph Hellwig 
583e2ac8363SChristoph Hellwig 		error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur,
584e2ac8363SChristoph Hellwig 						  &got, &del);
585e2ac8363SChristoph Hellwig 		if (error || !xfs_iext_get_extent(ifp, &icur, &got))
586e2ac8363SChristoph Hellwig 			break;
587e2ac8363SChristoph Hellwig 	}
58868988114SDave Chinner 
589d4380177SChristoph Hellwig out_unlock:
590d4380177SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
59168988114SDave Chinner 	return error;
59268988114SDave Chinner }
593c24b5dfaSDave Chinner 
594c24b5dfaSDave Chinner /*
595c24b5dfaSDave Chinner  * Test whether it is appropriate to check an inode for and free post EOF
596c24b5dfaSDave Chinner  * blocks. The 'force' parameter determines whether we should also consider
597c24b5dfaSDave Chinner  * regular files that are marked preallocated or append-only.
598c24b5dfaSDave Chinner  */
599c24b5dfaSDave Chinner bool
600c24b5dfaSDave Chinner xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
601c24b5dfaSDave Chinner {
602c24b5dfaSDave Chinner 	/* prealloc/delalloc exists only on regular files */
603c19b3b05SDave Chinner 	if (!S_ISREG(VFS_I(ip)->i_mode))
604c24b5dfaSDave Chinner 		return false;
605c24b5dfaSDave Chinner 
606c24b5dfaSDave Chinner 	/*
607c24b5dfaSDave Chinner 	 * Zero sized files with no cached pages and delalloc blocks will not
608c24b5dfaSDave Chinner 	 * have speculative prealloc/delalloc blocks to remove.
609c24b5dfaSDave Chinner 	 */
610c24b5dfaSDave Chinner 	if (VFS_I(ip)->i_size == 0 &&
6112667c6f9SDave Chinner 	    VFS_I(ip)->i_mapping->nrpages == 0 &&
612c24b5dfaSDave Chinner 	    ip->i_delayed_blks == 0)
613c24b5dfaSDave Chinner 		return false;
614c24b5dfaSDave Chinner 
615c24b5dfaSDave Chinner 	/* If we haven't read in the extent list, then don't do it now. */
616c24b5dfaSDave Chinner 	if (!(ip->i_df.if_flags & XFS_IFEXTENTS))
617c24b5dfaSDave Chinner 		return false;
618c24b5dfaSDave Chinner 
619c24b5dfaSDave Chinner 	/*
620c24b5dfaSDave Chinner 	 * Do not free real preallocated or append-only files unless the file
621c24b5dfaSDave Chinner 	 * has delalloc blocks and we are forced to remove them.
622c24b5dfaSDave Chinner 	 */
623c24b5dfaSDave Chinner 	if (ip->i_d.di_flags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
624c24b5dfaSDave Chinner 		if (!force || ip->i_delayed_blks == 0)
625c24b5dfaSDave Chinner 			return false;
626c24b5dfaSDave Chinner 
627c24b5dfaSDave Chinner 	return true;
628c24b5dfaSDave Chinner }
629c24b5dfaSDave Chinner 
630c24b5dfaSDave Chinner /*
6313b4683c2SBrian Foster  * This is called to free any blocks beyond eof. The caller must hold
6323b4683c2SBrian Foster  * IOLOCK_EXCL unless we are in the inode reclaim path and have the only
6333b4683c2SBrian Foster  * reference to the inode.
634c24b5dfaSDave Chinner  */
635c24b5dfaSDave Chinner int
636c24b5dfaSDave Chinner xfs_free_eofblocks(
637a36b9261SBrian Foster 	struct xfs_inode	*ip)
638c24b5dfaSDave Chinner {
639a36b9261SBrian Foster 	struct xfs_trans	*tp;
640c24b5dfaSDave Chinner 	int			error;
641c24b5dfaSDave Chinner 	xfs_fileoff_t		end_fsb;
642c24b5dfaSDave Chinner 	xfs_fileoff_t		last_fsb;
643c24b5dfaSDave Chinner 	xfs_filblks_t		map_len;
644c24b5dfaSDave Chinner 	int			nimaps;
645a36b9261SBrian Foster 	struct xfs_bmbt_irec	imap;
646a36b9261SBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
647a36b9261SBrian Foster 
648c24b5dfaSDave Chinner 	/*
649c24b5dfaSDave Chinner 	 * Figure out if there are any blocks beyond the end
650c24b5dfaSDave Chinner 	 * of the file.  If not, then there is nothing to do.
651c24b5dfaSDave Chinner 	 */
652c24b5dfaSDave Chinner 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
653c24b5dfaSDave Chinner 	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
654c24b5dfaSDave Chinner 	if (last_fsb <= end_fsb)
655c24b5dfaSDave Chinner 		return 0;
656c24b5dfaSDave Chinner 	map_len = last_fsb - end_fsb;
657c24b5dfaSDave Chinner 
658c24b5dfaSDave Chinner 	nimaps = 1;
659c24b5dfaSDave Chinner 	xfs_ilock(ip, XFS_ILOCK_SHARED);
660c24b5dfaSDave Chinner 	error = xfs_bmapi_read(ip, end_fsb, map_len, &imap, &nimaps, 0);
661c24b5dfaSDave Chinner 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
662c24b5dfaSDave Chinner 
663a36b9261SBrian Foster 	/*
664a36b9261SBrian Foster 	 * If there are blocks after the end of file, truncate the file to its
665a36b9261SBrian Foster 	 * current size to free them up.
666a36b9261SBrian Foster 	 */
667c24b5dfaSDave Chinner 	if (!error && (nimaps != 0) &&
668c24b5dfaSDave Chinner 	    (imap.br_startblock != HOLESTARTBLOCK ||
669c24b5dfaSDave Chinner 	     ip->i_delayed_blks)) {
670c24b5dfaSDave Chinner 		/*
671c24b5dfaSDave Chinner 		 * Attach the dquots to the inode up front.
672c24b5dfaSDave Chinner 		 */
673c14cfccaSDarrick J. Wong 		error = xfs_qm_dqattach(ip);
674c24b5dfaSDave Chinner 		if (error)
675c24b5dfaSDave Chinner 			return error;
676c24b5dfaSDave Chinner 
677e4229d6bSBrian Foster 		/* wait on dio to ensure i_size has settled */
678e4229d6bSBrian Foster 		inode_dio_wait(VFS_I(ip));
679e4229d6bSBrian Foster 
680253f4911SChristoph Hellwig 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0,
681253f4911SChristoph Hellwig 				&tp);
682c24b5dfaSDave Chinner 		if (error) {
683c24b5dfaSDave Chinner 			ASSERT(XFS_FORCED_SHUTDOWN(mp));
684c24b5dfaSDave Chinner 			return error;
685c24b5dfaSDave Chinner 		}
686c24b5dfaSDave Chinner 
687c24b5dfaSDave Chinner 		xfs_ilock(ip, XFS_ILOCK_EXCL);
688c24b5dfaSDave Chinner 		xfs_trans_ijoin(tp, ip, 0);
689c24b5dfaSDave Chinner 
690c24b5dfaSDave Chinner 		/*
691c24b5dfaSDave Chinner 		 * Do not update the on-disk file size.  If we update the
692c24b5dfaSDave Chinner 		 * on-disk file size and then the system crashes before the
693c24b5dfaSDave Chinner 		 * contents of the file are flushed to disk then the files
694c24b5dfaSDave Chinner 		 * may be full of holes (ie NULL files bug).
695c24b5dfaSDave Chinner 		 */
6964e529339SBrian Foster 		error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK,
6974e529339SBrian Foster 					XFS_ISIZE(ip), XFS_BMAPI_NODISCARD);
698c24b5dfaSDave Chinner 		if (error) {
699c24b5dfaSDave Chinner 			/*
700c24b5dfaSDave Chinner 			 * If we get an error at this point we simply don't
701c24b5dfaSDave Chinner 			 * bother truncating the file.
702c24b5dfaSDave Chinner 			 */
7034906e215SChristoph Hellwig 			xfs_trans_cancel(tp);
704c24b5dfaSDave Chinner 		} else {
70570393313SChristoph Hellwig 			error = xfs_trans_commit(tp);
706c24b5dfaSDave Chinner 			if (!error)
707c24b5dfaSDave Chinner 				xfs_inode_clear_eofblocks_tag(ip);
708c24b5dfaSDave Chinner 		}
709c24b5dfaSDave Chinner 
710c24b5dfaSDave Chinner 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
711c24b5dfaSDave Chinner 	}
712c24b5dfaSDave Chinner 	return error;
713c24b5dfaSDave Chinner }
714c24b5dfaSDave Chinner 
71583aee9e4SChristoph Hellwig int
716c24b5dfaSDave Chinner xfs_alloc_file_space(
71783aee9e4SChristoph Hellwig 	struct xfs_inode	*ip,
718c24b5dfaSDave Chinner 	xfs_off_t		offset,
719c24b5dfaSDave Chinner 	xfs_off_t		len,
7205f8aca8bSChristoph Hellwig 	int			alloc_type)
721c24b5dfaSDave Chinner {
722c24b5dfaSDave Chinner 	xfs_mount_t		*mp = ip->i_mount;
723c24b5dfaSDave Chinner 	xfs_off_t		count;
724c24b5dfaSDave Chinner 	xfs_filblks_t		allocated_fsb;
725c24b5dfaSDave Chinner 	xfs_filblks_t		allocatesize_fsb;
726c24b5dfaSDave Chinner 	xfs_extlen_t		extsz, temp;
727c24b5dfaSDave Chinner 	xfs_fileoff_t		startoffset_fsb;
728e093c4beSMax Reitz 	xfs_fileoff_t		endoffset_fsb;
729c24b5dfaSDave Chinner 	int			nimaps;
730c24b5dfaSDave Chinner 	int			quota_flag;
731c24b5dfaSDave Chinner 	int			rt;
732c24b5dfaSDave Chinner 	xfs_trans_t		*tp;
733c24b5dfaSDave Chinner 	xfs_bmbt_irec_t		imaps[1], *imapp;
734c24b5dfaSDave Chinner 	uint			qblocks, resblks, resrtextents;
735c24b5dfaSDave Chinner 	int			error;
736c24b5dfaSDave Chinner 
737c24b5dfaSDave Chinner 	trace_xfs_alloc_file_space(ip);
738c24b5dfaSDave Chinner 
739c24b5dfaSDave Chinner 	if (XFS_FORCED_SHUTDOWN(mp))
7402451337dSDave Chinner 		return -EIO;
741c24b5dfaSDave Chinner 
742c14cfccaSDarrick J. Wong 	error = xfs_qm_dqattach(ip);
743c24b5dfaSDave Chinner 	if (error)
744c24b5dfaSDave Chinner 		return error;
745c24b5dfaSDave Chinner 
746c24b5dfaSDave Chinner 	if (len <= 0)
7472451337dSDave Chinner 		return -EINVAL;
748c24b5dfaSDave Chinner 
749c24b5dfaSDave Chinner 	rt = XFS_IS_REALTIME_INODE(ip);
750c24b5dfaSDave Chinner 	extsz = xfs_get_extsz_hint(ip);
751c24b5dfaSDave Chinner 
752c24b5dfaSDave Chinner 	count = len;
753c24b5dfaSDave Chinner 	imapp = &imaps[0];
754c24b5dfaSDave Chinner 	nimaps = 1;
755c24b5dfaSDave Chinner 	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
756e093c4beSMax Reitz 	endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
757e093c4beSMax Reitz 	allocatesize_fsb = endoffset_fsb - startoffset_fsb;
758c24b5dfaSDave Chinner 
759c24b5dfaSDave Chinner 	/*
760c24b5dfaSDave Chinner 	 * Allocate file space until done or until there is an error
761c24b5dfaSDave Chinner 	 */
762c24b5dfaSDave Chinner 	while (allocatesize_fsb && !error) {
763c24b5dfaSDave Chinner 		xfs_fileoff_t	s, e;
764c24b5dfaSDave Chinner 
765c24b5dfaSDave Chinner 		/*
766c24b5dfaSDave Chinner 		 * Determine space reservations for data/realtime.
767c24b5dfaSDave Chinner 		 */
768c24b5dfaSDave Chinner 		if (unlikely(extsz)) {
769c24b5dfaSDave Chinner 			s = startoffset_fsb;
770c24b5dfaSDave Chinner 			do_div(s, extsz);
771c24b5dfaSDave Chinner 			s *= extsz;
772c24b5dfaSDave Chinner 			e = startoffset_fsb + allocatesize_fsb;
7730703a8e1SDave Chinner 			div_u64_rem(startoffset_fsb, extsz, &temp);
7740703a8e1SDave Chinner 			if (temp)
775c24b5dfaSDave Chinner 				e += temp;
7760703a8e1SDave Chinner 			div_u64_rem(e, extsz, &temp);
7770703a8e1SDave Chinner 			if (temp)
778c24b5dfaSDave Chinner 				e += extsz - temp;
779c24b5dfaSDave Chinner 		} else {
780c24b5dfaSDave Chinner 			s = 0;
781c24b5dfaSDave Chinner 			e = allocatesize_fsb;
782c24b5dfaSDave Chinner 		}
783c24b5dfaSDave Chinner 
784c24b5dfaSDave Chinner 		/*
785c24b5dfaSDave Chinner 		 * The transaction reservation is limited to a 32-bit block
786c24b5dfaSDave Chinner 		 * count, hence we need to limit the number of blocks we are
787c24b5dfaSDave Chinner 		 * trying to reserve to avoid an overflow. We can't allocate
788c24b5dfaSDave Chinner 		 * more than @nimaps extents, and an extent is limited on disk
789c24b5dfaSDave Chinner 		 * to MAXEXTLEN (21 bits), so use that to enforce the limit.
790c24b5dfaSDave Chinner 		 */
791c24b5dfaSDave Chinner 		resblks = min_t(xfs_fileoff_t, (e - s), (MAXEXTLEN * nimaps));
792c24b5dfaSDave Chinner 		if (unlikely(rt)) {
793c24b5dfaSDave Chinner 			resrtextents = qblocks = resblks;
794c24b5dfaSDave Chinner 			resrtextents /= mp->m_sb.sb_rextsize;
795c24b5dfaSDave Chinner 			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
796c24b5dfaSDave Chinner 			quota_flag = XFS_QMOPT_RES_RTBLKS;
797c24b5dfaSDave Chinner 		} else {
798c24b5dfaSDave Chinner 			resrtextents = 0;
799c24b5dfaSDave Chinner 			resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
800c24b5dfaSDave Chinner 			quota_flag = XFS_QMOPT_RES_REGBLKS;
801c24b5dfaSDave Chinner 		}
802c24b5dfaSDave Chinner 
803c24b5dfaSDave Chinner 		/*
804c24b5dfaSDave Chinner 		 * Allocate and setup the transaction.
805c24b5dfaSDave Chinner 		 */
806253f4911SChristoph Hellwig 		error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks,
807253f4911SChristoph Hellwig 				resrtextents, 0, &tp);
808253f4911SChristoph Hellwig 
809c24b5dfaSDave Chinner 		/*
810c24b5dfaSDave Chinner 		 * Check for running out of space
811c24b5dfaSDave Chinner 		 */
812c24b5dfaSDave Chinner 		if (error) {
813c24b5dfaSDave Chinner 			/*
814c24b5dfaSDave Chinner 			 * Free the transaction structure.
815c24b5dfaSDave Chinner 			 */
8162451337dSDave Chinner 			ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
817c24b5dfaSDave Chinner 			break;
818c24b5dfaSDave Chinner 		}
819c24b5dfaSDave Chinner 		xfs_ilock(ip, XFS_ILOCK_EXCL);
820c24b5dfaSDave Chinner 		error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks,
821c24b5dfaSDave Chinner 						      0, quota_flag);
822c24b5dfaSDave Chinner 		if (error)
823c24b5dfaSDave Chinner 			goto error1;
824c24b5dfaSDave Chinner 
825727e1acdSChandan Babu R 		error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
826727e1acdSChandan Babu R 				XFS_IEXT_ADD_NOSPLIT_CNT);
827727e1acdSChandan Babu R 		if (error)
828727e1acdSChandan Babu R 			goto error0;
829727e1acdSChandan Babu R 
830c24b5dfaSDave Chinner 		xfs_trans_ijoin(tp, ip, 0);
831c24b5dfaSDave Chinner 
832c24b5dfaSDave Chinner 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
833da781e64SBrian Foster 					allocatesize_fsb, alloc_type, 0, imapp,
834da781e64SBrian Foster 					&nimaps);
835f6106efaSEric Sandeen 		if (error)
836c24b5dfaSDave Chinner 			goto error0;
837c24b5dfaSDave Chinner 
838c24b5dfaSDave Chinner 		/*
839c24b5dfaSDave Chinner 		 * Complete the transaction
840c24b5dfaSDave Chinner 		 */
84170393313SChristoph Hellwig 		error = xfs_trans_commit(tp);
842c24b5dfaSDave Chinner 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
843f6106efaSEric Sandeen 		if (error)
844c24b5dfaSDave Chinner 			break;
845c24b5dfaSDave Chinner 
846c24b5dfaSDave Chinner 		allocated_fsb = imapp->br_blockcount;
847c24b5dfaSDave Chinner 
848c24b5dfaSDave Chinner 		if (nimaps == 0) {
8492451337dSDave Chinner 			error = -ENOSPC;
850c24b5dfaSDave Chinner 			break;
851c24b5dfaSDave Chinner 		}
852c24b5dfaSDave Chinner 
853c24b5dfaSDave Chinner 		startoffset_fsb += allocated_fsb;
854c24b5dfaSDave Chinner 		allocatesize_fsb -= allocated_fsb;
855c24b5dfaSDave Chinner 	}
856c24b5dfaSDave Chinner 
857c24b5dfaSDave Chinner 	return error;
858c24b5dfaSDave Chinner 
859c8eac49eSBrian Foster error0:	/* unlock inode, unreserve quota blocks, cancel trans */
860c24b5dfaSDave Chinner 	xfs_trans_unreserve_quota_nblks(tp, ip, (long)qblocks, 0, quota_flag);
861c24b5dfaSDave Chinner 
862c24b5dfaSDave Chinner error1:	/* Just cancel transaction */
8634906e215SChristoph Hellwig 	xfs_trans_cancel(tp);
864c24b5dfaSDave Chinner 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
865c24b5dfaSDave Chinner 	return error;
866c24b5dfaSDave Chinner }
867c24b5dfaSDave Chinner 
868bdb0d04fSChristoph Hellwig static int
869bdb0d04fSChristoph Hellwig xfs_unmap_extent(
87083aee9e4SChristoph Hellwig 	struct xfs_inode	*ip,
871bdb0d04fSChristoph Hellwig 	xfs_fileoff_t		startoffset_fsb,
872bdb0d04fSChristoph Hellwig 	xfs_filblks_t		len_fsb,
873bdb0d04fSChristoph Hellwig 	int			*done)
874c24b5dfaSDave Chinner {
875bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
876bdb0d04fSChristoph Hellwig 	struct xfs_trans	*tp;
877bdb0d04fSChristoph Hellwig 	uint			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
878bdb0d04fSChristoph Hellwig 	int			error;
879c24b5dfaSDave Chinner 
880bdb0d04fSChristoph Hellwig 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, 0, &tp);
881bdb0d04fSChristoph Hellwig 	if (error) {
882bdb0d04fSChristoph Hellwig 		ASSERT(error == -ENOSPC || XFS_FORCED_SHUTDOWN(mp));
883bdb0d04fSChristoph Hellwig 		return error;
884bdb0d04fSChristoph Hellwig 	}
885c24b5dfaSDave Chinner 
886bdb0d04fSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
887bdb0d04fSChristoph Hellwig 	error = xfs_trans_reserve_quota(tp, mp, ip->i_udquot, ip->i_gdquot,
888bdb0d04fSChristoph Hellwig 			ip->i_pdquot, resblks, 0, XFS_QMOPT_RES_REGBLKS);
889bdb0d04fSChristoph Hellwig 	if (error)
890bdb0d04fSChristoph Hellwig 		goto out_trans_cancel;
891c24b5dfaSDave Chinner 
892bdb0d04fSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, 0);
893c24b5dfaSDave Chinner 
894*85ef08b5SChandan Babu R 	error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
895*85ef08b5SChandan Babu R 			XFS_IEXT_PUNCH_HOLE_CNT);
896*85ef08b5SChandan Babu R 	if (error)
897*85ef08b5SChandan Babu R 		goto out_trans_cancel;
898*85ef08b5SChandan Babu R 
8992af52842SBrian Foster 	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, done);
900bdb0d04fSChristoph Hellwig 	if (error)
901c8eac49eSBrian Foster 		goto out_trans_cancel;
902bdb0d04fSChristoph Hellwig 
903bdb0d04fSChristoph Hellwig 	error = xfs_trans_commit(tp);
904bdb0d04fSChristoph Hellwig out_unlock:
905bdb0d04fSChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
906bdb0d04fSChristoph Hellwig 	return error;
907bdb0d04fSChristoph Hellwig 
908bdb0d04fSChristoph Hellwig out_trans_cancel:
909bdb0d04fSChristoph Hellwig 	xfs_trans_cancel(tp);
910bdb0d04fSChristoph Hellwig 	goto out_unlock;
911bdb0d04fSChristoph Hellwig }
912bdb0d04fSChristoph Hellwig 
913249bd908SDave Chinner /* Caller must first wait for the completion of any pending DIOs if required. */
9142c307174SDave Chinner int
915bdb0d04fSChristoph Hellwig xfs_flush_unmap_range(
916bdb0d04fSChristoph Hellwig 	struct xfs_inode	*ip,
917bdb0d04fSChristoph Hellwig 	xfs_off_t		offset,
918bdb0d04fSChristoph Hellwig 	xfs_off_t		len)
919bdb0d04fSChristoph Hellwig {
920bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
921bdb0d04fSChristoph Hellwig 	struct inode		*inode = VFS_I(ip);
922bdb0d04fSChristoph Hellwig 	xfs_off_t		rounding, start, end;
923bdb0d04fSChristoph Hellwig 	int			error;
924bdb0d04fSChristoph Hellwig 
925bdb0d04fSChristoph Hellwig 	rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
926bdb0d04fSChristoph Hellwig 	start = round_down(offset, rounding);
927bdb0d04fSChristoph Hellwig 	end = round_up(offset + len, rounding) - 1;
928bdb0d04fSChristoph Hellwig 
929bdb0d04fSChristoph Hellwig 	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
930c24b5dfaSDave Chinner 	if (error)
931c24b5dfaSDave Chinner 		return error;
932bdb0d04fSChristoph Hellwig 	truncate_pagecache_range(inode, start, end);
933bdb0d04fSChristoph Hellwig 	return 0;
934c24b5dfaSDave Chinner }
935c24b5dfaSDave Chinner 
936c24b5dfaSDave Chinner int
937c24b5dfaSDave Chinner xfs_free_file_space(
938c24b5dfaSDave Chinner 	struct xfs_inode	*ip,
939c24b5dfaSDave Chinner 	xfs_off_t		offset,
940c24b5dfaSDave Chinner 	xfs_off_t		len)
941c24b5dfaSDave Chinner {
942bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
943c24b5dfaSDave Chinner 	xfs_fileoff_t		startoffset_fsb;
944bdb0d04fSChristoph Hellwig 	xfs_fileoff_t		endoffset_fsb;
9453c2bdc91SChristoph Hellwig 	int			done = 0, error;
946c24b5dfaSDave Chinner 
947c24b5dfaSDave Chinner 	trace_xfs_free_file_space(ip);
948c24b5dfaSDave Chinner 
949c14cfccaSDarrick J. Wong 	error = xfs_qm_dqattach(ip);
950c24b5dfaSDave Chinner 	if (error)
951c24b5dfaSDave Chinner 		return error;
952c24b5dfaSDave Chinner 
953c24b5dfaSDave Chinner 	if (len <= 0)	/* if nothing being freed */
954bdb0d04fSChristoph Hellwig 		return 0;
955bdb0d04fSChristoph Hellwig 
956c24b5dfaSDave Chinner 	startoffset_fsb = XFS_B_TO_FSB(mp, offset);
957c24b5dfaSDave Chinner 	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
958c24b5dfaSDave Chinner 
959fe341eb1SDarrick J. Wong 	/* We can only free complete realtime extents. */
96025219dbfSDarrick J. Wong 	if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) {
96125219dbfSDarrick J. Wong 		startoffset_fsb = roundup_64(startoffset_fsb,
96225219dbfSDarrick J. Wong 					     mp->m_sb.sb_rextsize);
96325219dbfSDarrick J. Wong 		endoffset_fsb = rounddown_64(endoffset_fsb,
96425219dbfSDarrick J. Wong 					     mp->m_sb.sb_rextsize);
965fe341eb1SDarrick J. Wong 	}
966fe341eb1SDarrick J. Wong 
967bdb0d04fSChristoph Hellwig 	/*
968daa79baeSChristoph Hellwig 	 * Need to zero the stuff we're not freeing, on disk.
969bdb0d04fSChristoph Hellwig 	 */
9703c2bdc91SChristoph Hellwig 	if (endoffset_fsb > startoffset_fsb) {
9713c2bdc91SChristoph Hellwig 		while (!done) {
972bdb0d04fSChristoph Hellwig 			error = xfs_unmap_extent(ip, startoffset_fsb,
973bdb0d04fSChristoph Hellwig 					endoffset_fsb - startoffset_fsb, &done);
9743c2bdc91SChristoph Hellwig 			if (error)
9753c2bdc91SChristoph Hellwig 				return error;
9763c2bdc91SChristoph Hellwig 		}
977c24b5dfaSDave Chinner 	}
978c24b5dfaSDave Chinner 
9793c2bdc91SChristoph Hellwig 	/*
9803c2bdc91SChristoph Hellwig 	 * Now that we've unmap all full blocks we'll have to zero out any
981f5c54717SChristoph Hellwig 	 * partial block at the beginning and/or end.  iomap_zero_range is smart
982f5c54717SChristoph Hellwig 	 * enough to skip any holes, including those we just created, but we
983f5c54717SChristoph Hellwig 	 * must take care not to zero beyond EOF and enlarge i_size.
9843c2bdc91SChristoph Hellwig 	 */
9853dd09d5aSCalvin Owens 	if (offset >= XFS_ISIZE(ip))
9863dd09d5aSCalvin Owens 		return 0;
9873dd09d5aSCalvin Owens 	if (offset + len > XFS_ISIZE(ip))
9883dd09d5aSCalvin Owens 		len = XFS_ISIZE(ip) - offset;
989f150b423SChristoph Hellwig 	error = iomap_zero_range(VFS_I(ip), offset, len, NULL,
990f150b423SChristoph Hellwig 			&xfs_buffered_write_iomap_ops);
991e53c4b59SDarrick J. Wong 	if (error)
992e53c4b59SDarrick J. Wong 		return error;
993e53c4b59SDarrick J. Wong 
994e53c4b59SDarrick J. Wong 	/*
995e53c4b59SDarrick J. Wong 	 * If we zeroed right up to EOF and EOF straddles a page boundary we
996e53c4b59SDarrick J. Wong 	 * must make sure that the post-EOF area is also zeroed because the
997e53c4b59SDarrick J. Wong 	 * page could be mmap'd and iomap_zero_range doesn't do that for us.
998e53c4b59SDarrick J. Wong 	 * Writeback of the eof page will do this, albeit clumsily.
999e53c4b59SDarrick J. Wong 	 */
1000a579121fSDarrick J. Wong 	if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) {
1001e53c4b59SDarrick J. Wong 		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1002a579121fSDarrick J. Wong 				round_down(offset + len, PAGE_SIZE), LLONG_MAX);
1003e53c4b59SDarrick J. Wong 	}
1004e53c4b59SDarrick J. Wong 
1005e53c4b59SDarrick J. Wong 	return error;
1006c24b5dfaSDave Chinner }
1007c24b5dfaSDave Chinner 
100872c1a739Skbuild test robot static int
10094ed36c6bSChristoph Hellwig xfs_prepare_shift(
1010e1d8fb88SNamjae Jeon 	struct xfs_inode	*ip,
10114ed36c6bSChristoph Hellwig 	loff_t			offset)
1012e1d8fb88SNamjae Jeon {
1013d0c22041SBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
1014e1d8fb88SNamjae Jeon 	int			error;
1015f71721d0SBrian Foster 
1016f71721d0SBrian Foster 	/*
1017f71721d0SBrian Foster 	 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
1018f71721d0SBrian Foster 	 * into the accessible region of the file.
1019f71721d0SBrian Foster 	 */
102041b9d726SBrian Foster 	if (xfs_can_free_eofblocks(ip, true)) {
1021a36b9261SBrian Foster 		error = xfs_free_eofblocks(ip);
102241b9d726SBrian Foster 		if (error)
102341b9d726SBrian Foster 			return error;
102441b9d726SBrian Foster 	}
10251669a8caSDave Chinner 
1026f71721d0SBrian Foster 	/*
1027d0c22041SBrian Foster 	 * Shift operations must stabilize the start block offset boundary along
1028d0c22041SBrian Foster 	 * with the full range of the operation. If we don't, a COW writeback
1029d0c22041SBrian Foster 	 * completion could race with an insert, front merge with the start
1030d0c22041SBrian Foster 	 * extent (after split) during the shift and corrupt the file. Start
1031d0c22041SBrian Foster 	 * with the block just prior to the start to stabilize the boundary.
1032d0c22041SBrian Foster 	 */
1033d0c22041SBrian Foster 	offset = round_down(offset, 1 << mp->m_sb.sb_blocklog);
1034d0c22041SBrian Foster 	if (offset)
1035d0c22041SBrian Foster 		offset -= (1 << mp->m_sb.sb_blocklog);
1036d0c22041SBrian Foster 
1037d0c22041SBrian Foster 	/*
1038f71721d0SBrian Foster 	 * Writeback and invalidate cache for the remainder of the file as we're
1039a904b1caSNamjae Jeon 	 * about to shift down every extent from offset to EOF.
1040f71721d0SBrian Foster 	 */
10417f9f71beSDave Chinner 	error = xfs_flush_unmap_range(ip, offset, XFS_ISIZE(ip));
10421749d1eaSBrian Foster 	if (error)
10431749d1eaSBrian Foster 		return error;
1044e1d8fb88SNamjae Jeon 
1045a904b1caSNamjae Jeon 	/*
10463af423b0SDarrick J. Wong 	 * Clean out anything hanging around in the cow fork now that
10473af423b0SDarrick J. Wong 	 * we've flushed all the dirty data out to disk to avoid having
10483af423b0SDarrick J. Wong 	 * CoW extents at the wrong offsets.
10493af423b0SDarrick J. Wong 	 */
105051d62690SChristoph Hellwig 	if (xfs_inode_has_cow_data(ip)) {
10513af423b0SDarrick J. Wong 		error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
10523af423b0SDarrick J. Wong 				true);
10533af423b0SDarrick J. Wong 		if (error)
10543af423b0SDarrick J. Wong 			return error;
10553af423b0SDarrick J. Wong 	}
10563af423b0SDarrick J. Wong 
10574ed36c6bSChristoph Hellwig 	return 0;
1058e1d8fb88SNamjae Jeon }
1059e1d8fb88SNamjae Jeon 
1060e1d8fb88SNamjae Jeon /*
1061a904b1caSNamjae Jeon  * xfs_collapse_file_space()
1062a904b1caSNamjae Jeon  *	This routine frees disk space and shift extent for the given file.
1063a904b1caSNamjae Jeon  *	The first thing we do is to free data blocks in the specified range
1064a904b1caSNamjae Jeon  *	by calling xfs_free_file_space(). It would also sync dirty data
1065a904b1caSNamjae Jeon  *	and invalidate page cache over the region on which collapse range
1066a904b1caSNamjae Jeon  *	is working. And Shift extent records to the left to cover a hole.
1067a904b1caSNamjae Jeon  * RETURNS:
1068a904b1caSNamjae Jeon  *	0 on success
1069a904b1caSNamjae Jeon  *	errno on error
1070a904b1caSNamjae Jeon  *
1071a904b1caSNamjae Jeon  */
1072a904b1caSNamjae Jeon int
1073a904b1caSNamjae Jeon xfs_collapse_file_space(
1074a904b1caSNamjae Jeon 	struct xfs_inode	*ip,
1075a904b1caSNamjae Jeon 	xfs_off_t		offset,
1076a904b1caSNamjae Jeon 	xfs_off_t		len)
1077a904b1caSNamjae Jeon {
10784ed36c6bSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
10794ed36c6bSChristoph Hellwig 	struct xfs_trans	*tp;
1080a904b1caSNamjae Jeon 	int			error;
10814ed36c6bSChristoph Hellwig 	xfs_fileoff_t		next_fsb = XFS_B_TO_FSB(mp, offset + len);
10824ed36c6bSChristoph Hellwig 	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
1083ecfea3f0SChristoph Hellwig 	bool			done = false;
1084a904b1caSNamjae Jeon 
1085a904b1caSNamjae Jeon 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
10869ad1a23aSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
10879ad1a23aSChristoph Hellwig 
1088a904b1caSNamjae Jeon 	trace_xfs_collapse_file_space(ip);
1089a904b1caSNamjae Jeon 
1090a904b1caSNamjae Jeon 	error = xfs_free_file_space(ip, offset, len);
1091a904b1caSNamjae Jeon 	if (error)
1092a904b1caSNamjae Jeon 		return error;
1093a904b1caSNamjae Jeon 
10944ed36c6bSChristoph Hellwig 	error = xfs_prepare_shift(ip, offset);
10954ed36c6bSChristoph Hellwig 	if (error)
10964ed36c6bSChristoph Hellwig 		return error;
10974ed36c6bSChristoph Hellwig 
1098211683b2SBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
10994ed36c6bSChristoph Hellwig 	if (error)
1100211683b2SBrian Foster 		return error;
11014ed36c6bSChristoph Hellwig 
11024ed36c6bSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
1103211683b2SBrian Foster 	xfs_trans_ijoin(tp, ip, 0);
11044ed36c6bSChristoph Hellwig 
1105211683b2SBrian Foster 	while (!done) {
1106ecfea3f0SChristoph Hellwig 		error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
1107333f950cSBrian Foster 				&done);
11084ed36c6bSChristoph Hellwig 		if (error)
1109c8eac49eSBrian Foster 			goto out_trans_cancel;
1110211683b2SBrian Foster 		if (done)
1111211683b2SBrian Foster 			break;
11124ed36c6bSChristoph Hellwig 
1113211683b2SBrian Foster 		/* finish any deferred frees and roll the transaction */
1114211683b2SBrian Foster 		error = xfs_defer_finish(&tp);
1115211683b2SBrian Foster 		if (error)
1116211683b2SBrian Foster 			goto out_trans_cancel;
11174ed36c6bSChristoph Hellwig 	}
11184ed36c6bSChristoph Hellwig 
1119211683b2SBrian Foster 	error = xfs_trans_commit(tp);
1120211683b2SBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
11214ed36c6bSChristoph Hellwig 	return error;
11224ed36c6bSChristoph Hellwig 
11234ed36c6bSChristoph Hellwig out_trans_cancel:
11244ed36c6bSChristoph Hellwig 	xfs_trans_cancel(tp);
1125211683b2SBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
11264ed36c6bSChristoph Hellwig 	return error;
1127a904b1caSNamjae Jeon }
1128a904b1caSNamjae Jeon 
1129a904b1caSNamjae Jeon /*
1130a904b1caSNamjae Jeon  * xfs_insert_file_space()
1131a904b1caSNamjae Jeon  *	This routine create hole space by shifting extents for the given file.
1132a904b1caSNamjae Jeon  *	The first thing we do is to sync dirty data and invalidate page cache
1133a904b1caSNamjae Jeon  *	over the region on which insert range is working. And split an extent
1134a904b1caSNamjae Jeon  *	to two extents at given offset by calling xfs_bmap_split_extent.
1135a904b1caSNamjae Jeon  *	And shift all extent records which are laying between [offset,
1136a904b1caSNamjae Jeon  *	last allocated extent] to the right to reserve hole range.
1137a904b1caSNamjae Jeon  * RETURNS:
1138a904b1caSNamjae Jeon  *	0 on success
1139a904b1caSNamjae Jeon  *	errno on error
1140a904b1caSNamjae Jeon  */
1141a904b1caSNamjae Jeon int
1142a904b1caSNamjae Jeon xfs_insert_file_space(
1143a904b1caSNamjae Jeon 	struct xfs_inode	*ip,
1144a904b1caSNamjae Jeon 	loff_t			offset,
1145a904b1caSNamjae Jeon 	loff_t			len)
1146a904b1caSNamjae Jeon {
11474ed36c6bSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
11484ed36c6bSChristoph Hellwig 	struct xfs_trans	*tp;
11494ed36c6bSChristoph Hellwig 	int			error;
11504ed36c6bSChristoph Hellwig 	xfs_fileoff_t		stop_fsb = XFS_B_TO_FSB(mp, offset);
11514ed36c6bSChristoph Hellwig 	xfs_fileoff_t		next_fsb = NULLFSBLOCK;
11524ed36c6bSChristoph Hellwig 	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
1153ecfea3f0SChristoph Hellwig 	bool			done = false;
11544ed36c6bSChristoph Hellwig 
1155a904b1caSNamjae Jeon 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
11569ad1a23aSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
11579ad1a23aSChristoph Hellwig 
1158a904b1caSNamjae Jeon 	trace_xfs_insert_file_space(ip);
1159a904b1caSNamjae Jeon 
1160f62cb48eSDarrick J. Wong 	error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
1161f62cb48eSDarrick J. Wong 	if (error)
1162f62cb48eSDarrick J. Wong 		return error;
1163f62cb48eSDarrick J. Wong 
11644ed36c6bSChristoph Hellwig 	error = xfs_prepare_shift(ip, offset);
11654ed36c6bSChristoph Hellwig 	if (error)
11664ed36c6bSChristoph Hellwig 		return error;
11674ed36c6bSChristoph Hellwig 
1168b73df17eSBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
1169b73df17eSBrian Foster 			XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
1170b73df17eSBrian Foster 	if (error)
1171b73df17eSBrian Foster 		return error;
1172b73df17eSBrian Foster 
1173b73df17eSBrian Foster 	xfs_ilock(ip, XFS_ILOCK_EXCL);
1174dd87f87dSBrian Foster 	xfs_trans_ijoin(tp, ip, 0);
1175b73df17eSBrian Foster 
1176*85ef08b5SChandan Babu R 	error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
1177*85ef08b5SChandan Babu R 			XFS_IEXT_PUNCH_HOLE_CNT);
1178*85ef08b5SChandan Babu R 	if (error)
1179*85ef08b5SChandan Babu R 		goto out_trans_cancel;
1180*85ef08b5SChandan Babu R 
1181dd87f87dSBrian Foster 	/*
1182dd87f87dSBrian Foster 	 * The extent shifting code works on extent granularity. So, if stop_fsb
1183dd87f87dSBrian Foster 	 * is not the starting block of extent, we need to split the extent at
1184dd87f87dSBrian Foster 	 * stop_fsb.
1185dd87f87dSBrian Foster 	 */
1186b73df17eSBrian Foster 	error = xfs_bmap_split_extent(tp, ip, stop_fsb);
1187b73df17eSBrian Foster 	if (error)
1188b73df17eSBrian Foster 		goto out_trans_cancel;
1189b73df17eSBrian Foster 
1190dd87f87dSBrian Foster 	do {
11919c516e0eSBrian Foster 		error = xfs_defer_finish(&tp);
11924ed36c6bSChristoph Hellwig 		if (error)
1193dd87f87dSBrian Foster 			goto out_trans_cancel;
11944ed36c6bSChristoph Hellwig 
1195ecfea3f0SChristoph Hellwig 		error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
1196333f950cSBrian Foster 				&done, stop_fsb);
11974ed36c6bSChristoph Hellwig 		if (error)
1198c8eac49eSBrian Foster 			goto out_trans_cancel;
1199dd87f87dSBrian Foster 	} while (!done);
12004ed36c6bSChristoph Hellwig 
12014ed36c6bSChristoph Hellwig 	error = xfs_trans_commit(tp);
1202dd87f87dSBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
12034ed36c6bSChristoph Hellwig 	return error;
12044ed36c6bSChristoph Hellwig 
1205c8eac49eSBrian Foster out_trans_cancel:
12064ed36c6bSChristoph Hellwig 	xfs_trans_cancel(tp);
1207dd87f87dSBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
12084ed36c6bSChristoph Hellwig 	return error;
1209a904b1caSNamjae Jeon }
1210a904b1caSNamjae Jeon 
1211a904b1caSNamjae Jeon /*
1212a133d952SDave Chinner  * We need to check that the format of the data fork in the temporary inode is
1213a133d952SDave Chinner  * valid for the target inode before doing the swap. This is not a problem with
1214a133d952SDave Chinner  * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
1215a133d952SDave Chinner  * data fork depending on the space the attribute fork is taking so we can get
1216a133d952SDave Chinner  * invalid formats on the target inode.
1217a133d952SDave Chinner  *
1218a133d952SDave Chinner  * E.g. target has space for 7 extents in extent format, temp inode only has
1219a133d952SDave Chinner  * space for 6.  If we defragment down to 7 extents, then the tmp format is a
1220a133d952SDave Chinner  * btree, but when swapped it needs to be in extent format. Hence we can't just
1221a133d952SDave Chinner  * blindly swap data forks on attr2 filesystems.
1222a133d952SDave Chinner  *
1223a133d952SDave Chinner  * Note that we check the swap in both directions so that we don't end up with
1224a133d952SDave Chinner  * a corrupt temporary inode, either.
1225a133d952SDave Chinner  *
1226a133d952SDave Chinner  * Note that fixing the way xfs_fsr sets up the attribute fork in the source
1227a133d952SDave Chinner  * inode will prevent this situation from occurring, so all we do here is
1228a133d952SDave Chinner  * reject and log the attempt. basically we are putting the responsibility on
1229a133d952SDave Chinner  * userspace to get this right.
1230a133d952SDave Chinner  */
1231a133d952SDave Chinner static int
1232a133d952SDave Chinner xfs_swap_extents_check_format(
1233e06259aaSDarrick J. Wong 	struct xfs_inode	*ip,	/* target inode */
1234e06259aaSDarrick J. Wong 	struct xfs_inode	*tip)	/* tmp inode */
1235a133d952SDave Chinner {
1236f7e67b20SChristoph Hellwig 	struct xfs_ifork	*ifp = &ip->i_df;
1237f7e67b20SChristoph Hellwig 	struct xfs_ifork	*tifp = &tip->i_df;
1238a133d952SDave Chinner 
1239765d3c39SDarrick J. Wong 	/* User/group/project quota ids must match if quotas are enforced. */
1240765d3c39SDarrick J. Wong 	if (XFS_IS_QUOTA_ON(ip->i_mount) &&
1241765d3c39SDarrick J. Wong 	    (!uid_eq(VFS_I(ip)->i_uid, VFS_I(tip)->i_uid) ||
1242765d3c39SDarrick J. Wong 	     !gid_eq(VFS_I(ip)->i_gid, VFS_I(tip)->i_gid) ||
1243765d3c39SDarrick J. Wong 	     ip->i_d.di_projid != tip->i_d.di_projid))
1244765d3c39SDarrick J. Wong 		return -EINVAL;
1245765d3c39SDarrick J. Wong 
1246a133d952SDave Chinner 	/* Should never get a local format */
1247f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
1248f7e67b20SChristoph Hellwig 	    tifp->if_format == XFS_DINODE_FMT_LOCAL)
12492451337dSDave Chinner 		return -EINVAL;
1250a133d952SDave Chinner 
1251a133d952SDave Chinner 	/*
1252a133d952SDave Chinner 	 * if the target inode has less extents that then temporary inode then
1253a133d952SDave Chinner 	 * why did userspace call us?
1254a133d952SDave Chinner 	 */
1255f7e67b20SChristoph Hellwig 	if (ifp->if_nextents < tifp->if_nextents)
12562451337dSDave Chinner 		return -EINVAL;
1257a133d952SDave Chinner 
1258a133d952SDave Chinner 	/*
12591f08af52SDarrick J. Wong 	 * If we have to use the (expensive) rmap swap method, we can
12601f08af52SDarrick J. Wong 	 * handle any number of extents and any format.
12611f08af52SDarrick J. Wong 	 */
12621f08af52SDarrick J. Wong 	if (xfs_sb_version_hasrmapbt(&ip->i_mount->m_sb))
12631f08af52SDarrick J. Wong 		return 0;
12641f08af52SDarrick J. Wong 
12651f08af52SDarrick J. Wong 	/*
1266a133d952SDave Chinner 	 * if the target inode is in extent form and the temp inode is in btree
1267a133d952SDave Chinner 	 * form then we will end up with the target inode in the wrong format
1268a133d952SDave Chinner 	 * as we already know there are less extents in the temp inode.
1269a133d952SDave Chinner 	 */
1270f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1271f7e67b20SChristoph Hellwig 	    tifp->if_format == XFS_DINODE_FMT_BTREE)
12722451337dSDave Chinner 		return -EINVAL;
1273a133d952SDave Chinner 
1274a133d952SDave Chinner 	/* Check temp in extent form to max in target */
1275f7e67b20SChristoph Hellwig 	if (tifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1276f7e67b20SChristoph Hellwig 	    tifp->if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
12772451337dSDave Chinner 		return -EINVAL;
1278a133d952SDave Chinner 
1279a133d952SDave Chinner 	/* Check target in extent form to max in temp */
1280f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1281f7e67b20SChristoph Hellwig 	    ifp->if_nextents > XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
12822451337dSDave Chinner 		return -EINVAL;
1283a133d952SDave Chinner 
1284a133d952SDave Chinner 	/*
1285a133d952SDave Chinner 	 * If we are in a btree format, check that the temp root block will fit
1286a133d952SDave Chinner 	 * in the target and that it has enough extents to be in btree format
1287a133d952SDave Chinner 	 * in the target.
1288a133d952SDave Chinner 	 *
1289a133d952SDave Chinner 	 * Note that we have to be careful to allow btree->extent conversions
1290a133d952SDave Chinner 	 * (a common defrag case) which will occur when the temp inode is in
1291a133d952SDave Chinner 	 * extent format...
1292a133d952SDave Chinner 	 */
1293f7e67b20SChristoph Hellwig 	if (tifp->if_format == XFS_DINODE_FMT_BTREE) {
12940cbe48ccSArnd Bergmann 		if (XFS_IFORK_Q(ip) &&
1295f7e67b20SChristoph Hellwig 		    XFS_BMAP_BMDR_SPACE(tifp->if_broot) > XFS_IFORK_BOFF(ip))
12962451337dSDave Chinner 			return -EINVAL;
1297f7e67b20SChristoph Hellwig 		if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
12982451337dSDave Chinner 			return -EINVAL;
1299a133d952SDave Chinner 	}
1300a133d952SDave Chinner 
1301a133d952SDave Chinner 	/* Reciprocal target->temp btree format checks */
1302f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
13030cbe48ccSArnd Bergmann 		if (XFS_IFORK_Q(tip) &&
1304a133d952SDave Chinner 		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > XFS_IFORK_BOFF(tip))
13052451337dSDave Chinner 			return -EINVAL;
1306f7e67b20SChristoph Hellwig 		if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
13072451337dSDave Chinner 			return -EINVAL;
1308a133d952SDave Chinner 	}
1309a133d952SDave Chinner 
1310a133d952SDave Chinner 	return 0;
1311a133d952SDave Chinner }
1312a133d952SDave Chinner 
13137abbb8f9SDave Chinner static int
13144ef897a2SDave Chinner xfs_swap_extent_flush(
13154ef897a2SDave Chinner 	struct xfs_inode	*ip)
13164ef897a2SDave Chinner {
13174ef897a2SDave Chinner 	int	error;
13184ef897a2SDave Chinner 
13194ef897a2SDave Chinner 	error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
13204ef897a2SDave Chinner 	if (error)
13214ef897a2SDave Chinner 		return error;
13224ef897a2SDave Chinner 	truncate_pagecache_range(VFS_I(ip), 0, -1);
13234ef897a2SDave Chinner 
13244ef897a2SDave Chinner 	/* Verify O_DIRECT for ftmp */
13254ef897a2SDave Chinner 	if (VFS_I(ip)->i_mapping->nrpages)
13264ef897a2SDave Chinner 		return -EINVAL;
13274ef897a2SDave Chinner 	return 0;
13284ef897a2SDave Chinner }
13294ef897a2SDave Chinner 
13301f08af52SDarrick J. Wong /*
13311f08af52SDarrick J. Wong  * Move extents from one file to another, when rmap is enabled.
13321f08af52SDarrick J. Wong  */
13331f08af52SDarrick J. Wong STATIC int
13341f08af52SDarrick J. Wong xfs_swap_extent_rmap(
13351f08af52SDarrick J. Wong 	struct xfs_trans		**tpp,
13361f08af52SDarrick J. Wong 	struct xfs_inode		*ip,
13371f08af52SDarrick J. Wong 	struct xfs_inode		*tip)
13381f08af52SDarrick J. Wong {
13397a7943c7SBrian Foster 	struct xfs_trans		*tp = *tpp;
13401f08af52SDarrick J. Wong 	struct xfs_bmbt_irec		irec;
13411f08af52SDarrick J. Wong 	struct xfs_bmbt_irec		uirec;
13421f08af52SDarrick J. Wong 	struct xfs_bmbt_irec		tirec;
13431f08af52SDarrick J. Wong 	xfs_fileoff_t			offset_fsb;
13441f08af52SDarrick J. Wong 	xfs_fileoff_t			end_fsb;
13451f08af52SDarrick J. Wong 	xfs_filblks_t			count_fsb;
13461f08af52SDarrick J. Wong 	int				error;
13471f08af52SDarrick J. Wong 	xfs_filblks_t			ilen;
13481f08af52SDarrick J. Wong 	xfs_filblks_t			rlen;
13491f08af52SDarrick J. Wong 	int				nimaps;
1350c8ce540dSDarrick J. Wong 	uint64_t			tip_flags2;
13511f08af52SDarrick J. Wong 
13521f08af52SDarrick J. Wong 	/*
13531f08af52SDarrick J. Wong 	 * If the source file has shared blocks, we must flag the donor
13541f08af52SDarrick J. Wong 	 * file as having shared blocks so that we get the shared-block
13551f08af52SDarrick J. Wong 	 * rmap functions when we go to fix up the rmaps.  The flags
13561f08af52SDarrick J. Wong 	 * will be switch for reals later.
13571f08af52SDarrick J. Wong 	 */
13581f08af52SDarrick J. Wong 	tip_flags2 = tip->i_d.di_flags2;
13591f08af52SDarrick J. Wong 	if (ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK)
13601f08af52SDarrick J. Wong 		tip->i_d.di_flags2 |= XFS_DIFLAG2_REFLINK;
13611f08af52SDarrick J. Wong 
13621f08af52SDarrick J. Wong 	offset_fsb = 0;
13631f08af52SDarrick J. Wong 	end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip)));
13641f08af52SDarrick J. Wong 	count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
13651f08af52SDarrick J. Wong 
13661f08af52SDarrick J. Wong 	while (count_fsb) {
13671f08af52SDarrick J. Wong 		/* Read extent from the donor file */
13681f08af52SDarrick J. Wong 		nimaps = 1;
13691f08af52SDarrick J. Wong 		error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec,
13701f08af52SDarrick J. Wong 				&nimaps, 0);
13711f08af52SDarrick J. Wong 		if (error)
13721f08af52SDarrick J. Wong 			goto out;
13731f08af52SDarrick J. Wong 		ASSERT(nimaps == 1);
13741f08af52SDarrick J. Wong 		ASSERT(tirec.br_startblock != DELAYSTARTBLOCK);
13751f08af52SDarrick J. Wong 
13761f08af52SDarrick J. Wong 		trace_xfs_swap_extent_rmap_remap(tip, &tirec);
13771f08af52SDarrick J. Wong 		ilen = tirec.br_blockcount;
13781f08af52SDarrick J. Wong 
13791f08af52SDarrick J. Wong 		/* Unmap the old blocks in the source file. */
13801f08af52SDarrick J. Wong 		while (tirec.br_blockcount) {
1381c8eac49eSBrian Foster 			ASSERT(tp->t_firstblock == NULLFSBLOCK);
13821f08af52SDarrick J. Wong 			trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
13831f08af52SDarrick J. Wong 
13841f08af52SDarrick J. Wong 			/* Read extent from the source file */
13851f08af52SDarrick J. Wong 			nimaps = 1;
13861f08af52SDarrick J. Wong 			error = xfs_bmapi_read(ip, tirec.br_startoff,
13871f08af52SDarrick J. Wong 					tirec.br_blockcount, &irec,
13881f08af52SDarrick J. Wong 					&nimaps, 0);
13891f08af52SDarrick J. Wong 			if (error)
1390d5a2e289SBrian Foster 				goto out;
13911f08af52SDarrick J. Wong 			ASSERT(nimaps == 1);
13921f08af52SDarrick J. Wong 			ASSERT(tirec.br_startoff == irec.br_startoff);
13931f08af52SDarrick J. Wong 			trace_xfs_swap_extent_rmap_remap_piece(ip, &irec);
13941f08af52SDarrick J. Wong 
13951f08af52SDarrick J. Wong 			/* Trim the extent. */
13961f08af52SDarrick J. Wong 			uirec = tirec;
13971f08af52SDarrick J. Wong 			uirec.br_blockcount = rlen = min_t(xfs_filblks_t,
13981f08af52SDarrick J. Wong 					tirec.br_blockcount,
13991f08af52SDarrick J. Wong 					irec.br_blockcount);
14001f08af52SDarrick J. Wong 			trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);
14011f08af52SDarrick J. Wong 
14021f08af52SDarrick J. Wong 			/* Remove the mapping from the donor file. */
14033e08f42aSDarrick J. Wong 			xfs_bmap_unmap_extent(tp, tip, &uirec);
14041f08af52SDarrick J. Wong 
14051f08af52SDarrick J. Wong 			/* Remove the mapping from the source file. */
14063e08f42aSDarrick J. Wong 			xfs_bmap_unmap_extent(tp, ip, &irec);
14071f08af52SDarrick J. Wong 
14081f08af52SDarrick J. Wong 			/* Map the donor file's blocks into the source file. */
14093e08f42aSDarrick J. Wong 			xfs_bmap_map_extent(tp, ip, &uirec);
14101f08af52SDarrick J. Wong 
14111f08af52SDarrick J. Wong 			/* Map the source file's blocks into the donor file. */
14123e08f42aSDarrick J. Wong 			xfs_bmap_map_extent(tp, tip, &irec);
14131f08af52SDarrick J. Wong 
14149e28a242SBrian Foster 			error = xfs_defer_finish(tpp);
14157a7943c7SBrian Foster 			tp = *tpp;
14161f08af52SDarrick J. Wong 			if (error)
14179b1f4e98SBrian Foster 				goto out;
14181f08af52SDarrick J. Wong 
14191f08af52SDarrick J. Wong 			tirec.br_startoff += rlen;
14201f08af52SDarrick J. Wong 			if (tirec.br_startblock != HOLESTARTBLOCK &&
14211f08af52SDarrick J. Wong 			    tirec.br_startblock != DELAYSTARTBLOCK)
14221f08af52SDarrick J. Wong 				tirec.br_startblock += rlen;
14231f08af52SDarrick J. Wong 			tirec.br_blockcount -= rlen;
14241f08af52SDarrick J. Wong 		}
14251f08af52SDarrick J. Wong 
14261f08af52SDarrick J. Wong 		/* Roll on... */
14271f08af52SDarrick J. Wong 		count_fsb -= ilen;
14281f08af52SDarrick J. Wong 		offset_fsb += ilen;
14291f08af52SDarrick J. Wong 	}
14301f08af52SDarrick J. Wong 
14311f08af52SDarrick J. Wong 	tip->i_d.di_flags2 = tip_flags2;
14321f08af52SDarrick J. Wong 	return 0;
14331f08af52SDarrick J. Wong 
14341f08af52SDarrick J. Wong out:
14351f08af52SDarrick J. Wong 	trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);
14361f08af52SDarrick J. Wong 	tip->i_d.di_flags2 = tip_flags2;
14371f08af52SDarrick J. Wong 	return error;
14381f08af52SDarrick J. Wong }
14391f08af52SDarrick J. Wong 
144039aff5fdSDarrick J. Wong /* Swap the extents of two files by swapping data forks. */
144139aff5fdSDarrick J. Wong STATIC int
144239aff5fdSDarrick J. Wong xfs_swap_extent_forks(
144339aff5fdSDarrick J. Wong 	struct xfs_trans	*tp,
144439aff5fdSDarrick J. Wong 	struct xfs_inode	*ip,
144539aff5fdSDarrick J. Wong 	struct xfs_inode	*tip,
144639aff5fdSDarrick J. Wong 	int			*src_log_flags,
144739aff5fdSDarrick J. Wong 	int			*target_log_flags)
144839aff5fdSDarrick J. Wong {
1449e7f5d5caSDarrick J. Wong 	xfs_filblks_t		aforkblks = 0;
1450e7f5d5caSDarrick J. Wong 	xfs_filblks_t		taforkblks = 0;
1451e7f5d5caSDarrick J. Wong 	xfs_extnum_t		junk;
1452c8ce540dSDarrick J. Wong 	uint64_t		tmp;
145339aff5fdSDarrick J. Wong 	int			error;
145439aff5fdSDarrick J. Wong 
145539aff5fdSDarrick J. Wong 	/*
145639aff5fdSDarrick J. Wong 	 * Count the number of extended attribute blocks
145739aff5fdSDarrick J. Wong 	 */
1458daf83964SChristoph Hellwig 	if (XFS_IFORK_Q(ip) && ip->i_afp->if_nextents > 0 &&
1459f7e67b20SChristoph Hellwig 	    ip->i_afp->if_format != XFS_DINODE_FMT_LOCAL) {
1460e7f5d5caSDarrick J. Wong 		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk,
146139aff5fdSDarrick J. Wong 				&aforkblks);
146239aff5fdSDarrick J. Wong 		if (error)
146339aff5fdSDarrick J. Wong 			return error;
146439aff5fdSDarrick J. Wong 	}
1465daf83964SChristoph Hellwig 	if (XFS_IFORK_Q(tip) && tip->i_afp->if_nextents > 0 &&
1466f7e67b20SChristoph Hellwig 	    tip->i_afp->if_format != XFS_DINODE_FMT_LOCAL) {
1467e7f5d5caSDarrick J. Wong 		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk,
146839aff5fdSDarrick J. Wong 				&taforkblks);
146939aff5fdSDarrick J. Wong 		if (error)
147039aff5fdSDarrick J. Wong 			return error;
147139aff5fdSDarrick J. Wong 	}
147239aff5fdSDarrick J. Wong 
147339aff5fdSDarrick J. Wong 	/*
14746fb10d6dSBrian Foster 	 * Btree format (v3) inodes have the inode number stamped in the bmbt
14756fb10d6dSBrian Foster 	 * block headers. We can't start changing the bmbt blocks until the
14766fb10d6dSBrian Foster 	 * inode owner change is logged so recovery does the right thing in the
14776fb10d6dSBrian Foster 	 * event of a crash. Set the owner change log flags now and leave the
14786fb10d6dSBrian Foster 	 * bmbt scan as the last step.
147939aff5fdSDarrick J. Wong 	 */
14806471e9c5SChristoph Hellwig 	if (xfs_sb_version_has_v3inode(&ip->i_mount->m_sb)) {
1481f7e67b20SChristoph Hellwig 		if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE)
148239aff5fdSDarrick J. Wong 			(*target_log_flags) |= XFS_ILOG_DOWNER;
1483f7e67b20SChristoph Hellwig 		if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE)
148439aff5fdSDarrick J. Wong 			(*src_log_flags) |= XFS_ILOG_DOWNER;
14856471e9c5SChristoph Hellwig 	}
148639aff5fdSDarrick J. Wong 
148739aff5fdSDarrick J. Wong 	/*
148839aff5fdSDarrick J. Wong 	 * Swap the data forks of the inodes
148939aff5fdSDarrick J. Wong 	 */
1490897992b7SGustavo A. R. Silva 	swap(ip->i_df, tip->i_df);
149139aff5fdSDarrick J. Wong 
149239aff5fdSDarrick J. Wong 	/*
149339aff5fdSDarrick J. Wong 	 * Fix the on-disk inode values
149439aff5fdSDarrick J. Wong 	 */
1495c8ce540dSDarrick J. Wong 	tmp = (uint64_t)ip->i_d.di_nblocks;
149639aff5fdSDarrick J. Wong 	ip->i_d.di_nblocks = tip->i_d.di_nblocks - taforkblks + aforkblks;
149739aff5fdSDarrick J. Wong 	tip->i_d.di_nblocks = tmp + taforkblks - aforkblks;
149839aff5fdSDarrick J. Wong 
149939aff5fdSDarrick J. Wong 	/*
150039aff5fdSDarrick J. Wong 	 * The extents in the source inode could still contain speculative
150139aff5fdSDarrick J. Wong 	 * preallocation beyond EOF (e.g. the file is open but not modified
150239aff5fdSDarrick J. Wong 	 * while defrag is in progress). In that case, we need to copy over the
150339aff5fdSDarrick J. Wong 	 * number of delalloc blocks the data fork in the source inode is
150439aff5fdSDarrick J. Wong 	 * tracking beyond EOF so that when the fork is truncated away when the
150539aff5fdSDarrick J. Wong 	 * temporary inode is unlinked we don't underrun the i_delayed_blks
150639aff5fdSDarrick J. Wong 	 * counter on that inode.
150739aff5fdSDarrick J. Wong 	 */
150839aff5fdSDarrick J. Wong 	ASSERT(tip->i_delayed_blks == 0);
150939aff5fdSDarrick J. Wong 	tip->i_delayed_blks = ip->i_delayed_blks;
151039aff5fdSDarrick J. Wong 	ip->i_delayed_blks = 0;
151139aff5fdSDarrick J. Wong 
1512f7e67b20SChristoph Hellwig 	switch (ip->i_df.if_format) {
151339aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_EXTENTS:
151439aff5fdSDarrick J. Wong 		(*src_log_flags) |= XFS_ILOG_DEXT;
151539aff5fdSDarrick J. Wong 		break;
151639aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_BTREE:
15176471e9c5SChristoph Hellwig 		ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) ||
151839aff5fdSDarrick J. Wong 		       (*src_log_flags & XFS_ILOG_DOWNER));
151939aff5fdSDarrick J. Wong 		(*src_log_flags) |= XFS_ILOG_DBROOT;
152039aff5fdSDarrick J. Wong 		break;
152139aff5fdSDarrick J. Wong 	}
152239aff5fdSDarrick J. Wong 
1523f7e67b20SChristoph Hellwig 	switch (tip->i_df.if_format) {
152439aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_EXTENTS:
152539aff5fdSDarrick J. Wong 		(*target_log_flags) |= XFS_ILOG_DEXT;
152639aff5fdSDarrick J. Wong 		break;
152739aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_BTREE:
152839aff5fdSDarrick J. Wong 		(*target_log_flags) |= XFS_ILOG_DBROOT;
15296471e9c5SChristoph Hellwig 		ASSERT(!xfs_sb_version_has_v3inode(&ip->i_mount->m_sb) ||
153039aff5fdSDarrick J. Wong 		       (*target_log_flags & XFS_ILOG_DOWNER));
153139aff5fdSDarrick J. Wong 		break;
153239aff5fdSDarrick J. Wong 	}
153339aff5fdSDarrick J. Wong 
153439aff5fdSDarrick J. Wong 	return 0;
153539aff5fdSDarrick J. Wong }
153639aff5fdSDarrick J. Wong 
15372dd3d709SBrian Foster /*
15382dd3d709SBrian Foster  * Fix up the owners of the bmbt blocks to refer to the current inode. The
15392dd3d709SBrian Foster  * change owner scan attempts to order all modified buffers in the current
15402dd3d709SBrian Foster  * transaction. In the event of ordered buffer failure, the offending buffer is
15412dd3d709SBrian Foster  * physically logged as a fallback and the scan returns -EAGAIN. We must roll
15422dd3d709SBrian Foster  * the transaction in this case to replenish the fallback log reservation and
15432dd3d709SBrian Foster  * restart the scan. This process repeats until the scan completes.
15442dd3d709SBrian Foster  */
15452dd3d709SBrian Foster static int
15462dd3d709SBrian Foster xfs_swap_change_owner(
15472dd3d709SBrian Foster 	struct xfs_trans	**tpp,
15482dd3d709SBrian Foster 	struct xfs_inode	*ip,
15492dd3d709SBrian Foster 	struct xfs_inode	*tmpip)
15502dd3d709SBrian Foster {
15512dd3d709SBrian Foster 	int			error;
15522dd3d709SBrian Foster 	struct xfs_trans	*tp = *tpp;
15532dd3d709SBrian Foster 
15542dd3d709SBrian Foster 	do {
15552dd3d709SBrian Foster 		error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino,
15562dd3d709SBrian Foster 					      NULL);
15572dd3d709SBrian Foster 		/* success or fatal error */
15582dd3d709SBrian Foster 		if (error != -EAGAIN)
15592dd3d709SBrian Foster 			break;
15602dd3d709SBrian Foster 
15612dd3d709SBrian Foster 		error = xfs_trans_roll(tpp);
15622dd3d709SBrian Foster 		if (error)
15632dd3d709SBrian Foster 			break;
15642dd3d709SBrian Foster 		tp = *tpp;
15652dd3d709SBrian Foster 
15662dd3d709SBrian Foster 		/*
15672dd3d709SBrian Foster 		 * Redirty both inodes so they can relog and keep the log tail
15682dd3d709SBrian Foster 		 * moving forward.
15692dd3d709SBrian Foster 		 */
15702dd3d709SBrian Foster 		xfs_trans_ijoin(tp, ip, 0);
15712dd3d709SBrian Foster 		xfs_trans_ijoin(tp, tmpip, 0);
15722dd3d709SBrian Foster 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
15732dd3d709SBrian Foster 		xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE);
15742dd3d709SBrian Foster 	} while (true);
15752dd3d709SBrian Foster 
15762dd3d709SBrian Foster 	return error;
15772dd3d709SBrian Foster }
15782dd3d709SBrian Foster 
15794ef897a2SDave Chinner int
1580a133d952SDave Chinner xfs_swap_extents(
1581e06259aaSDarrick J. Wong 	struct xfs_inode	*ip,	/* target inode */
1582e06259aaSDarrick J. Wong 	struct xfs_inode	*tip,	/* tmp inode */
1583e06259aaSDarrick J. Wong 	struct xfs_swapext	*sxp)
1584a133d952SDave Chinner {
1585e06259aaSDarrick J. Wong 	struct xfs_mount	*mp = ip->i_mount;
1586e06259aaSDarrick J. Wong 	struct xfs_trans	*tp;
1587e06259aaSDarrick J. Wong 	struct xfs_bstat	*sbp = &sxp->sx_stat;
1588a133d952SDave Chinner 	int			src_log_flags, target_log_flags;
1589a133d952SDave Chinner 	int			error = 0;
159081217683SDave Chinner 	int			lock_flags;
1591c8ce540dSDarrick J. Wong 	uint64_t		f;
15922dd3d709SBrian Foster 	int			resblks = 0;
1593f74681baSBrian Foster 	unsigned int		flags = 0;
1594a133d952SDave Chinner 
1595a133d952SDave Chinner 	/*
1596723cac48SDave Chinner 	 * Lock the inodes against other IO, page faults and truncate to
1597723cac48SDave Chinner 	 * begin with.  Then we can ensure the inodes are flushed and have no
1598723cac48SDave Chinner 	 * page cache safely. Once we have done this we can take the ilocks and
1599723cac48SDave Chinner 	 * do the rest of the checks.
1600a133d952SDave Chinner 	 */
160165523218SChristoph Hellwig 	lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
160265523218SChristoph Hellwig 	lock_flags = XFS_MMAPLOCK_EXCL;
16037c2d238aSDarrick J. Wong 	xfs_lock_two_inodes(ip, XFS_MMAPLOCK_EXCL, tip, XFS_MMAPLOCK_EXCL);
1604a133d952SDave Chinner 
1605a133d952SDave Chinner 	/* Verify that both files have the same format */
1606c19b3b05SDave Chinner 	if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
16072451337dSDave Chinner 		error = -EINVAL;
1608a133d952SDave Chinner 		goto out_unlock;
1609a133d952SDave Chinner 	}
1610a133d952SDave Chinner 
1611a133d952SDave Chinner 	/* Verify both files are either real-time or non-realtime */
1612a133d952SDave Chinner 	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
16132451337dSDave Chinner 		error = -EINVAL;
1614a133d952SDave Chinner 		goto out_unlock;
1615a133d952SDave Chinner 	}
1616a133d952SDave Chinner 
16172713fefaSDarrick J. Wong 	error = xfs_qm_dqattach(ip);
16182713fefaSDarrick J. Wong 	if (error)
16192713fefaSDarrick J. Wong 		goto out_unlock;
16202713fefaSDarrick J. Wong 
16212713fefaSDarrick J. Wong 	error = xfs_qm_dqattach(tip);
16222713fefaSDarrick J. Wong 	if (error)
16232713fefaSDarrick J. Wong 		goto out_unlock;
16242713fefaSDarrick J. Wong 
16254ef897a2SDave Chinner 	error = xfs_swap_extent_flush(ip);
1626a133d952SDave Chinner 	if (error)
1627a133d952SDave Chinner 		goto out_unlock;
16284ef897a2SDave Chinner 	error = xfs_swap_extent_flush(tip);
16294ef897a2SDave Chinner 	if (error)
16304ef897a2SDave Chinner 		goto out_unlock;
1631a133d952SDave Chinner 
163296987eeaSChristoph Hellwig 	if (xfs_inode_has_cow_data(tip)) {
163396987eeaSChristoph Hellwig 		error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true);
163496987eeaSChristoph Hellwig 		if (error)
16358bc3b5e4SDarrick J. Wong 			goto out_unlock;
163696987eeaSChristoph Hellwig 	}
163796987eeaSChristoph Hellwig 
16381f08af52SDarrick J. Wong 	/*
16391f08af52SDarrick J. Wong 	 * Extent "swapping" with rmap requires a permanent reservation and
16401f08af52SDarrick J. Wong 	 * a block reservation because it's really just a remap operation
16411f08af52SDarrick J. Wong 	 * performed with log redo items!
16421f08af52SDarrick J. Wong 	 */
16431f08af52SDarrick J. Wong 	if (xfs_sb_version_hasrmapbt(&mp->m_sb)) {
1644b3fed434SBrian Foster 		int		w = XFS_DATA_FORK;
1645daf83964SChristoph Hellwig 		uint32_t	ipnext = ip->i_df.if_nextents;
1646daf83964SChristoph Hellwig 		uint32_t	tipnext	= tip->i_df.if_nextents;
1647b3fed434SBrian Foster 
16481f08af52SDarrick J. Wong 		/*
1649b3fed434SBrian Foster 		 * Conceptually this shouldn't affect the shape of either bmbt,
1650b3fed434SBrian Foster 		 * but since we atomically move extents one by one, we reserve
1651b3fed434SBrian Foster 		 * enough space to rebuild both trees.
16521f08af52SDarrick J. Wong 		 */
1653b3fed434SBrian Foster 		resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w);
1654b3fed434SBrian Foster 		resblks +=  XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w);
1655b3fed434SBrian Foster 
1656b3fed434SBrian Foster 		/*
1657f74681baSBrian Foster 		 * If either inode straddles a bmapbt block allocation boundary,
1658f74681baSBrian Foster 		 * the rmapbt algorithm triggers repeated allocs and frees as
1659f74681baSBrian Foster 		 * extents are remapped. This can exhaust the block reservation
1660f74681baSBrian Foster 		 * prematurely and cause shutdown. Return freed blocks to the
1661f74681baSBrian Foster 		 * transaction reservation to counter this behavior.
1662b3fed434SBrian Foster 		 */
1663f74681baSBrian Foster 		flags |= XFS_TRANS_RES_FDBLKS;
16642dd3d709SBrian Foster 	}
1665f74681baSBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, flags,
1666f74681baSBrian Foster 				&tp);
1667253f4911SChristoph Hellwig 	if (error)
1668a133d952SDave Chinner 		goto out_unlock;
1669723cac48SDave Chinner 
1670723cac48SDave Chinner 	/*
1671723cac48SDave Chinner 	 * Lock and join the inodes to the tansaction so that transaction commit
1672723cac48SDave Chinner 	 * or cancel will unlock the inodes from this point onwards.
1673723cac48SDave Chinner 	 */
16747c2d238aSDarrick J. Wong 	xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL);
16754ef897a2SDave Chinner 	lock_flags |= XFS_ILOCK_EXCL;
167639aff5fdSDarrick J. Wong 	xfs_trans_ijoin(tp, ip, 0);
167739aff5fdSDarrick J. Wong 	xfs_trans_ijoin(tp, tip, 0);
1678723cac48SDave Chinner 
1679a133d952SDave Chinner 
1680a133d952SDave Chinner 	/* Verify all data are being swapped */
1681a133d952SDave Chinner 	if (sxp->sx_offset != 0 ||
1682a133d952SDave Chinner 	    sxp->sx_length != ip->i_d.di_size ||
1683a133d952SDave Chinner 	    sxp->sx_length != tip->i_d.di_size) {
16842451337dSDave Chinner 		error = -EFAULT;
16854ef897a2SDave Chinner 		goto out_trans_cancel;
1686a133d952SDave Chinner 	}
1687a133d952SDave Chinner 
1688a133d952SDave Chinner 	trace_xfs_swap_extent_before(ip, 0);
1689a133d952SDave Chinner 	trace_xfs_swap_extent_before(tip, 1);
1690a133d952SDave Chinner 
1691a133d952SDave Chinner 	/* check inode formats now that data is flushed */
1692a133d952SDave Chinner 	error = xfs_swap_extents_check_format(ip, tip);
1693a133d952SDave Chinner 	if (error) {
1694a133d952SDave Chinner 		xfs_notice(mp,
1695a133d952SDave Chinner 		    "%s: inode 0x%llx format is incompatible for exchanging.",
1696a133d952SDave Chinner 				__func__, ip->i_ino);
16974ef897a2SDave Chinner 		goto out_trans_cancel;
1698a133d952SDave Chinner 	}
1699a133d952SDave Chinner 
1700a133d952SDave Chinner 	/*
1701a133d952SDave Chinner 	 * Compare the current change & modify times with that
1702a133d952SDave Chinner 	 * passed in.  If they differ, we abort this swap.
1703a133d952SDave Chinner 	 * This is the mechanism used to ensure the calling
1704a133d952SDave Chinner 	 * process that the file was not changed out from
1705a133d952SDave Chinner 	 * under it.
1706a133d952SDave Chinner 	 */
1707a133d952SDave Chinner 	if ((sbp->bs_ctime.tv_sec != VFS_I(ip)->i_ctime.tv_sec) ||
1708a133d952SDave Chinner 	    (sbp->bs_ctime.tv_nsec != VFS_I(ip)->i_ctime.tv_nsec) ||
1709a133d952SDave Chinner 	    (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
1710a133d952SDave Chinner 	    (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
17112451337dSDave Chinner 		error = -EBUSY;
171281217683SDave Chinner 		goto out_trans_cancel;
1713a133d952SDave Chinner 	}
1714a133d952SDave Chinner 
171521b5c978SDave Chinner 	/*
171621b5c978SDave Chinner 	 * Note the trickiness in setting the log flags - we set the owner log
171721b5c978SDave Chinner 	 * flag on the opposite inode (i.e. the inode we are setting the new
171821b5c978SDave Chinner 	 * owner to be) because once we swap the forks and log that, log
171921b5c978SDave Chinner 	 * recovery is going to see the fork as owned by the swapped inode,
172021b5c978SDave Chinner 	 * not the pre-swapped inodes.
172121b5c978SDave Chinner 	 */
172221b5c978SDave Chinner 	src_log_flags = XFS_ILOG_CORE;
172321b5c978SDave Chinner 	target_log_flags = XFS_ILOG_CORE;
172439aff5fdSDarrick J. Wong 
17251f08af52SDarrick J. Wong 	if (xfs_sb_version_hasrmapbt(&mp->m_sb))
17261f08af52SDarrick J. Wong 		error = xfs_swap_extent_rmap(&tp, ip, tip);
17271f08af52SDarrick J. Wong 	else
172839aff5fdSDarrick J. Wong 		error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags,
172939aff5fdSDarrick J. Wong 				&target_log_flags);
173021b5c978SDave Chinner 	if (error)
173121b5c978SDave Chinner 		goto out_trans_cancel;
1732a133d952SDave Chinner 
1733f0bc4d13SDarrick J. Wong 	/* Do we have to swap reflink flags? */
1734f0bc4d13SDarrick J. Wong 	if ((ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK) ^
1735f0bc4d13SDarrick J. Wong 	    (tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK)) {
1736f0bc4d13SDarrick J. Wong 		f = ip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
1737f0bc4d13SDarrick J. Wong 		ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1738f0bc4d13SDarrick J. Wong 		ip->i_d.di_flags2 |= tip->i_d.di_flags2 & XFS_DIFLAG2_REFLINK;
1739f0bc4d13SDarrick J. Wong 		tip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1740f0bc4d13SDarrick J. Wong 		tip->i_d.di_flags2 |= f & XFS_DIFLAG2_REFLINK;
174152bfcdd7SDarrick J. Wong 	}
174252bfcdd7SDarrick J. Wong 
174352bfcdd7SDarrick J. Wong 	/* Swap the cow forks. */
174452bfcdd7SDarrick J. Wong 	if (xfs_sb_version_hasreflink(&mp->m_sb)) {
1745f7e67b20SChristoph Hellwig 		ASSERT(!ip->i_cowfp ||
1746f7e67b20SChristoph Hellwig 		       ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
1747f7e67b20SChristoph Hellwig 		ASSERT(!tip->i_cowfp ||
1748f7e67b20SChristoph Hellwig 		       tip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
174952bfcdd7SDarrick J. Wong 
1750897992b7SGustavo A. R. Silva 		swap(ip->i_cowfp, tip->i_cowfp);
175152bfcdd7SDarrick J. Wong 
17525bcffe30SChristoph Hellwig 		if (ip->i_cowfp && ip->i_cowfp->if_bytes)
175383104d44SDarrick J. Wong 			xfs_inode_set_cowblocks_tag(ip);
175452bfcdd7SDarrick J. Wong 		else
175552bfcdd7SDarrick J. Wong 			xfs_inode_clear_cowblocks_tag(ip);
17565bcffe30SChristoph Hellwig 		if (tip->i_cowfp && tip->i_cowfp->if_bytes)
175783104d44SDarrick J. Wong 			xfs_inode_set_cowblocks_tag(tip);
175852bfcdd7SDarrick J. Wong 		else
175952bfcdd7SDarrick J. Wong 			xfs_inode_clear_cowblocks_tag(tip);
1760f0bc4d13SDarrick J. Wong 	}
1761f0bc4d13SDarrick J. Wong 
1762a133d952SDave Chinner 	xfs_trans_log_inode(tp, ip,  src_log_flags);
1763a133d952SDave Chinner 	xfs_trans_log_inode(tp, tip, target_log_flags);
1764a133d952SDave Chinner 
1765a133d952SDave Chinner 	/*
17666fb10d6dSBrian Foster 	 * The extent forks have been swapped, but crc=1,rmapbt=0 filesystems
17676fb10d6dSBrian Foster 	 * have inode number owner values in the bmbt blocks that still refer to
17686fb10d6dSBrian Foster 	 * the old inode. Scan each bmbt to fix up the owner values with the
17696fb10d6dSBrian Foster 	 * inode number of the current inode.
17706fb10d6dSBrian Foster 	 */
17716fb10d6dSBrian Foster 	if (src_log_flags & XFS_ILOG_DOWNER) {
17722dd3d709SBrian Foster 		error = xfs_swap_change_owner(&tp, ip, tip);
17736fb10d6dSBrian Foster 		if (error)
17746fb10d6dSBrian Foster 			goto out_trans_cancel;
17756fb10d6dSBrian Foster 	}
17766fb10d6dSBrian Foster 	if (target_log_flags & XFS_ILOG_DOWNER) {
17772dd3d709SBrian Foster 		error = xfs_swap_change_owner(&tp, tip, ip);
17786fb10d6dSBrian Foster 		if (error)
17796fb10d6dSBrian Foster 			goto out_trans_cancel;
17806fb10d6dSBrian Foster 	}
17816fb10d6dSBrian Foster 
17826fb10d6dSBrian Foster 	/*
1783a133d952SDave Chinner 	 * If this is a synchronous mount, make sure that the
1784a133d952SDave Chinner 	 * transaction goes to disk before returning to the user.
1785a133d952SDave Chinner 	 */
1786a133d952SDave Chinner 	if (mp->m_flags & XFS_MOUNT_WSYNC)
1787a133d952SDave Chinner 		xfs_trans_set_sync(tp);
1788a133d952SDave Chinner 
178970393313SChristoph Hellwig 	error = xfs_trans_commit(tp);
1790a133d952SDave Chinner 
1791a133d952SDave Chinner 	trace_xfs_swap_extent_after(ip, 0);
1792a133d952SDave Chinner 	trace_xfs_swap_extent_after(tip, 1);
179339aff5fdSDarrick J. Wong 
179465523218SChristoph Hellwig out_unlock:
179539aff5fdSDarrick J. Wong 	xfs_iunlock(ip, lock_flags);
179639aff5fdSDarrick J. Wong 	xfs_iunlock(tip, lock_flags);
179765523218SChristoph Hellwig 	unlock_two_nondirectories(VFS_I(ip), VFS_I(tip));
1798a133d952SDave Chinner 	return error;
1799a133d952SDave Chinner 
180039aff5fdSDarrick J. Wong out_trans_cancel:
180139aff5fdSDarrick J. Wong 	xfs_trans_cancel(tp);
180265523218SChristoph Hellwig 	goto out_unlock;
1803a133d952SDave Chinner }
1804