xref: /linux/fs/xfs/xfs_bmap_util.c (revision 36ec807b627b4c0a0a382f0ae48eac7187d14b2b)
10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
268988114SDave Chinner /*
368988114SDave Chinner  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4c24b5dfaSDave Chinner  * Copyright (c) 2012 Red Hat, Inc.
568988114SDave Chinner  * All Rights Reserved.
668988114SDave Chinner  */
768988114SDave Chinner #include "xfs.h"
868988114SDave Chinner #include "xfs_fs.h"
970a9883cSDave Chinner #include "xfs_shared.h"
10239880efSDave Chinner #include "xfs_format.h"
11239880efSDave Chinner #include "xfs_log_format.h"
12239880efSDave Chinner #include "xfs_trans_resv.h"
1368988114SDave Chinner #include "xfs_bit.h"
1468988114SDave Chinner #include "xfs_mount.h"
153ab78df2SDarrick J. Wong #include "xfs_defer.h"
1668988114SDave Chinner #include "xfs_inode.h"
1768988114SDave Chinner #include "xfs_btree.h"
18239880efSDave Chinner #include "xfs_trans.h"
1968988114SDave Chinner #include "xfs_alloc.h"
2068988114SDave Chinner #include "xfs_bmap.h"
2168988114SDave Chinner #include "xfs_bmap_util.h"
22a4fbe6abSDave Chinner #include "xfs_bmap_btree.h"
2368988114SDave Chinner #include "xfs_rtalloc.h"
2468988114SDave Chinner #include "xfs_error.h"
2568988114SDave Chinner #include "xfs_quota.h"
2668988114SDave Chinner #include "xfs_trans_space.h"
2768988114SDave Chinner #include "xfs_trace.h"
28c24b5dfaSDave Chinner #include "xfs_icache.h"
29f86f4037SDarrick J. Wong #include "xfs_iomap.h"
30f86f4037SDarrick J. Wong #include "xfs_reflink.h"
31fa5a3872SDarrick J. Wong #include "xfs_rtbitmap.h"
3268988114SDave Chinner 
3368988114SDave Chinner /* Kernel only BMAP related definitions and functions */
3468988114SDave Chinner 
3568988114SDave Chinner /*
3668988114SDave Chinner  * Convert the given file system block to a disk block.  We have to treat it
3768988114SDave Chinner  * differently based on whether the file is a real time file or not, because the
3868988114SDave Chinner  * bmap code does.
3968988114SDave Chinner  */
4068988114SDave Chinner xfs_daddr_t
4168988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
4268988114SDave Chinner {
43ecfc28a4SChristoph Hellwig 	if (XFS_IS_REALTIME_INODE(ip))
44ecfc28a4SChristoph Hellwig 		return XFS_FSB_TO_BB(ip->i_mount, fsb);
45ecfc28a4SChristoph Hellwig 	return XFS_FSB_TO_DADDR(ip->i_mount, fsb);
4668988114SDave Chinner }
4768988114SDave Chinner 
4868988114SDave Chinner /*
493fbbbea3SDave Chinner  * Routine to zero an extent on disk allocated to the specific inode.
503fbbbea3SDave Chinner  *
513fbbbea3SDave Chinner  * The VFS functions take a linearised filesystem block offset, so we have to
523fbbbea3SDave Chinner  * convert the sparse xfs fsb to the right format first.
533fbbbea3SDave Chinner  * VFS types are real funky, too.
543fbbbea3SDave Chinner  */
553fbbbea3SDave Chinner int
563fbbbea3SDave Chinner xfs_zero_extent(
573fbbbea3SDave Chinner 	struct xfs_inode	*ip,
583fbbbea3SDave Chinner 	xfs_fsblock_t		start_fsb,
593fbbbea3SDave Chinner 	xfs_off_t		count_fsb)
603fbbbea3SDave Chinner {
613fbbbea3SDave Chinner 	struct xfs_mount	*mp = ip->i_mount;
6230fa529eSChristoph Hellwig 	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
633fbbbea3SDave Chinner 	xfs_daddr_t		sector = xfs_fsb_to_db(ip, start_fsb);
643fbbbea3SDave Chinner 	sector_t		block = XFS_BB_TO_FSBT(mp, sector);
653fbbbea3SDave Chinner 
6630fa529eSChristoph Hellwig 	return blkdev_issue_zeroout(target->bt_bdev,
673dc29161SMatthew Wilcox 		block << (mp->m_super->s_blocksize_bits - 9),
683dc29161SMatthew Wilcox 		count_fsb << (mp->m_super->s_blocksize_bits - 9),
690b3a76e9SDave Chinner 		GFP_KERNEL, 0);
703fbbbea3SDave Chinner }
713fbbbea3SDave Chinner 
7268988114SDave Chinner /*
7368988114SDave Chinner  * Extent tree block counting routines.
7468988114SDave Chinner  */
7568988114SDave Chinner 
7668988114SDave Chinner /*
77d29cb3e4SDarrick J. Wong  * Count leaf blocks given a range of extent records.  Delayed allocation
78d29cb3e4SDarrick J. Wong  * extents are not counted towards the totals.
7968988114SDave Chinner  */
80e17a5c6fSChristoph Hellwig xfs_extnum_t
8168988114SDave Chinner xfs_bmap_count_leaves(
82d29cb3e4SDarrick J. Wong 	struct xfs_ifork	*ifp,
83e7f5d5caSDarrick J. Wong 	xfs_filblks_t		*count)
8468988114SDave Chinner {
85b2b1712aSChristoph Hellwig 	struct xfs_iext_cursor	icur;
86e17a5c6fSChristoph Hellwig 	struct xfs_bmbt_irec	got;
87b2b1712aSChristoph Hellwig 	xfs_extnum_t		numrecs = 0;
8868988114SDave Chinner 
89b2b1712aSChristoph Hellwig 	for_each_xfs_iext(ifp, &icur, &got) {
90e17a5c6fSChristoph Hellwig 		if (!isnullstartblock(got.br_startblock)) {
91e17a5c6fSChristoph Hellwig 			*count += got.br_blockcount;
92e17a5c6fSChristoph Hellwig 			numrecs++;
9368988114SDave Chinner 		}
9468988114SDave Chinner 	}
95b2b1712aSChristoph Hellwig 
96e17a5c6fSChristoph Hellwig 	return numrecs;
97d29cb3e4SDarrick J. Wong }
9868988114SDave Chinner 
9968988114SDave Chinner /*
100d29cb3e4SDarrick J. Wong  * Count fsblocks of the given fork.  Delayed allocation extents are
101d29cb3e4SDarrick J. Wong  * not counted towards the totals.
10268988114SDave Chinner  */
103e7f5d5caSDarrick J. Wong int
10468988114SDave Chinner xfs_bmap_count_blocks(
105e7f5d5caSDarrick J. Wong 	struct xfs_trans	*tp,
106e7f5d5caSDarrick J. Wong 	struct xfs_inode	*ip,
107e7f5d5caSDarrick J. Wong 	int			whichfork,
108e7f5d5caSDarrick J. Wong 	xfs_extnum_t		*nextents,
109e7f5d5caSDarrick J. Wong 	xfs_filblks_t		*count)
11068988114SDave Chinner {
111fec40e22SDarrick J. Wong 	struct xfs_mount	*mp = ip->i_mount;
112732436efSDarrick J. Wong 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
113fec40e22SDarrick J. Wong 	struct xfs_btree_cur	*cur;
114fec40e22SDarrick J. Wong 	xfs_extlen_t		btblocks = 0;
115e7f5d5caSDarrick J. Wong 	int			error;
11668988114SDave Chinner 
117e7f5d5caSDarrick J. Wong 	*nextents = 0;
118e7f5d5caSDarrick J. Wong 	*count = 0;
119fec40e22SDarrick J. Wong 
120e7f5d5caSDarrick J. Wong 	if (!ifp)
12168988114SDave Chinner 		return 0;
122e7f5d5caSDarrick J. Wong 
123f7e67b20SChristoph Hellwig 	switch (ifp->if_format) {
124e7f5d5caSDarrick J. Wong 	case XFS_DINODE_FMT_BTREE:
125e7f5d5caSDarrick J. Wong 		error = xfs_iread_extents(tp, ip, whichfork);
126e7f5d5caSDarrick J. Wong 		if (error)
127e7f5d5caSDarrick J. Wong 			return error;
12868988114SDave Chinner 
129fec40e22SDarrick J. Wong 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
130fec40e22SDarrick J. Wong 		error = xfs_btree_count_blocks(cur, &btblocks);
131fec40e22SDarrick J. Wong 		xfs_btree_del_cursor(cur, error);
132fec40e22SDarrick J. Wong 		if (error)
133fec40e22SDarrick J. Wong 			return error;
13468988114SDave Chinner 
135fec40e22SDarrick J. Wong 		/*
136fec40e22SDarrick J. Wong 		 * xfs_btree_count_blocks includes the root block contained in
137fec40e22SDarrick J. Wong 		 * the inode fork in @btblocks, so subtract one because we're
138fec40e22SDarrick J. Wong 		 * only interested in allocated disk blocks.
139fec40e22SDarrick J. Wong 		 */
140fec40e22SDarrick J. Wong 		*count += btblocks - 1;
141fec40e22SDarrick J. Wong 
14253004ee7SGustavo A. R. Silva 		fallthrough;
143fec40e22SDarrick J. Wong 	case XFS_DINODE_FMT_EXTENTS:
144fec40e22SDarrick J. Wong 		*nextents = xfs_bmap_count_leaves(ifp, count);
145fec40e22SDarrick J. Wong 		break;
146e7f5d5caSDarrick J. Wong 	}
14768988114SDave Chinner 
14868988114SDave Chinner 	return 0;
14968988114SDave Chinner }
15068988114SDave Chinner 
151abbf9e8aSChristoph Hellwig static int
152abbf9e8aSChristoph Hellwig xfs_getbmap_report_one(
153f86f4037SDarrick J. Wong 	struct xfs_inode	*ip,
154abbf9e8aSChristoph Hellwig 	struct getbmapx		*bmv,
155232b5194SChristoph Hellwig 	struct kgetbmap		*out,
156abbf9e8aSChristoph Hellwig 	int64_t			bmv_end,
157abbf9e8aSChristoph Hellwig 	struct xfs_bmbt_irec	*got)
158f86f4037SDarrick J. Wong {
159232b5194SChristoph Hellwig 	struct kgetbmap		*p = out + bmv->bmv_entries;
160d392bc81SChristoph Hellwig 	bool			shared = false;
161f86f4037SDarrick J. Wong 	int			error;
162f86f4037SDarrick J. Wong 
163d392bc81SChristoph Hellwig 	error = xfs_reflink_trim_around_shared(ip, got, &shared);
164f86f4037SDarrick J. Wong 	if (error)
165f86f4037SDarrick J. Wong 		return error;
166f86f4037SDarrick J. Wong 
167abbf9e8aSChristoph Hellwig 	if (isnullstartblock(got->br_startblock) ||
168abbf9e8aSChristoph Hellwig 	    got->br_startblock == DELAYSTARTBLOCK) {
169f86f4037SDarrick J. Wong 		/*
1708ee81ed5SYe Bin 		 * Take the flush completion as being a point-in-time snapshot
1718ee81ed5SYe Bin 		 * where there are no delalloc extents, and if any new ones
1728ee81ed5SYe Bin 		 * have been created racily, just skip them as being 'after'
1738ee81ed5SYe Bin 		 * the flush and so don't get reported.
174f86f4037SDarrick J. Wong 		 */
1758ee81ed5SYe Bin 		if (!(bmv->bmv_iflags & BMV_IF_DELALLOC))
1768ee81ed5SYe Bin 			return 0;
177abbf9e8aSChristoph Hellwig 
178abbf9e8aSChristoph Hellwig 		p->bmv_oflags |= BMV_OF_DELALLOC;
179abbf9e8aSChristoph Hellwig 		p->bmv_block = -2;
180f86f4037SDarrick J. Wong 	} else {
181abbf9e8aSChristoph Hellwig 		p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
182f86f4037SDarrick J. Wong 	}
183f86f4037SDarrick J. Wong 
184abbf9e8aSChristoph Hellwig 	if (got->br_state == XFS_EXT_UNWRITTEN &&
185abbf9e8aSChristoph Hellwig 	    (bmv->bmv_iflags & BMV_IF_PREALLOC))
186abbf9e8aSChristoph Hellwig 		p->bmv_oflags |= BMV_OF_PREALLOC;
187abbf9e8aSChristoph Hellwig 
188abbf9e8aSChristoph Hellwig 	if (shared)
189abbf9e8aSChristoph Hellwig 		p->bmv_oflags |= BMV_OF_SHARED;
190abbf9e8aSChristoph Hellwig 
191abbf9e8aSChristoph Hellwig 	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
192abbf9e8aSChristoph Hellwig 	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
193abbf9e8aSChristoph Hellwig 
194abbf9e8aSChristoph Hellwig 	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
195abbf9e8aSChristoph Hellwig 	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
196abbf9e8aSChristoph Hellwig 	bmv->bmv_entries++;
197f86f4037SDarrick J. Wong 	return 0;
198f86f4037SDarrick J. Wong }
199f86f4037SDarrick J. Wong 
200abbf9e8aSChristoph Hellwig static void
201abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(
202abbf9e8aSChristoph Hellwig 	struct xfs_inode	*ip,
203abbf9e8aSChristoph Hellwig 	struct getbmapx		*bmv,
204232b5194SChristoph Hellwig 	struct kgetbmap		*out,
205abbf9e8aSChristoph Hellwig 	int64_t			bmv_end,
206abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		bno,
207abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		end)
208abbf9e8aSChristoph Hellwig {
209232b5194SChristoph Hellwig 	struct kgetbmap		*p = out + bmv->bmv_entries;
210abbf9e8aSChristoph Hellwig 
211abbf9e8aSChristoph Hellwig 	if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
212abbf9e8aSChristoph Hellwig 		return;
213abbf9e8aSChristoph Hellwig 
214abbf9e8aSChristoph Hellwig 	p->bmv_block = -1;
215abbf9e8aSChristoph Hellwig 	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
216abbf9e8aSChristoph Hellwig 	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
217abbf9e8aSChristoph Hellwig 
218abbf9e8aSChristoph Hellwig 	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
219abbf9e8aSChristoph Hellwig 	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
220abbf9e8aSChristoph Hellwig 	bmv->bmv_entries++;
221abbf9e8aSChristoph Hellwig }
222abbf9e8aSChristoph Hellwig 
223abbf9e8aSChristoph Hellwig static inline bool
224abbf9e8aSChristoph Hellwig xfs_getbmap_full(
225abbf9e8aSChristoph Hellwig 	struct getbmapx		*bmv)
226abbf9e8aSChristoph Hellwig {
227abbf9e8aSChristoph Hellwig 	return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
228abbf9e8aSChristoph Hellwig }
229abbf9e8aSChristoph Hellwig 
230abbf9e8aSChristoph Hellwig static bool
231abbf9e8aSChristoph Hellwig xfs_getbmap_next_rec(
232abbf9e8aSChristoph Hellwig 	struct xfs_bmbt_irec	*rec,
233abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		total_end)
234abbf9e8aSChristoph Hellwig {
235abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		end = rec->br_startoff + rec->br_blockcount;
236abbf9e8aSChristoph Hellwig 
237abbf9e8aSChristoph Hellwig 	if (end == total_end)
238abbf9e8aSChristoph Hellwig 		return false;
239abbf9e8aSChristoph Hellwig 
240abbf9e8aSChristoph Hellwig 	rec->br_startoff += rec->br_blockcount;
241abbf9e8aSChristoph Hellwig 	if (!isnullstartblock(rec->br_startblock) &&
242abbf9e8aSChristoph Hellwig 	    rec->br_startblock != DELAYSTARTBLOCK)
243abbf9e8aSChristoph Hellwig 		rec->br_startblock += rec->br_blockcount;
244abbf9e8aSChristoph Hellwig 	rec->br_blockcount = total_end - end;
245abbf9e8aSChristoph Hellwig 	return true;
246abbf9e8aSChristoph Hellwig }
247abbf9e8aSChristoph Hellwig 
24868988114SDave Chinner /*
24968988114SDave Chinner  * Get inode's extents as described in bmv, and format for output.
25068988114SDave Chinner  * Calls formatter to fill the user's buffer until all extents
25168988114SDave Chinner  * are mapped, until the passed-in bmv->bmv_count slots have
25268988114SDave Chinner  * been filled, or until the formatter short-circuits the loop,
25368988114SDave Chinner  * if it is tracking filled-in extents on its own.
25468988114SDave Chinner  */
25568988114SDave Chinner int						/* error code */
25668988114SDave Chinner xfs_getbmap(
257232b5194SChristoph Hellwig 	struct xfs_inode	*ip,
25868988114SDave Chinner 	struct getbmapx		*bmv,		/* user bmap structure */
259232b5194SChristoph Hellwig 	struct kgetbmap		*out)
26068988114SDave Chinner {
261abbf9e8aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
262abbf9e8aSChristoph Hellwig 	int			iflags = bmv->bmv_iflags;
263232b5194SChristoph Hellwig 	int			whichfork, lock, error = 0;
264abbf9e8aSChristoph Hellwig 	int64_t			bmv_end, max_len;
265abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		bno, first_bno;
266abbf9e8aSChristoph Hellwig 	struct xfs_ifork	*ifp;
267abbf9e8aSChristoph Hellwig 	struct xfs_bmbt_irec	got, rec;
268abbf9e8aSChristoph Hellwig 	xfs_filblks_t		len;
269b2b1712aSChristoph Hellwig 	struct xfs_iext_cursor	icur;
27068988114SDave Chinner 
271232b5194SChristoph Hellwig 	if (bmv->bmv_iflags & ~BMV_IF_VALID)
272232b5194SChristoph Hellwig 		return -EINVAL;
273f86f4037SDarrick J. Wong #ifndef DEBUG
274f86f4037SDarrick J. Wong 	/* Only allow CoW fork queries if we're debugging. */
275f86f4037SDarrick J. Wong 	if (iflags & BMV_IF_COWFORK)
276f86f4037SDarrick J. Wong 		return -EINVAL;
277f86f4037SDarrick J. Wong #endif
278f86f4037SDarrick J. Wong 	if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
279f86f4037SDarrick J. Wong 		return -EINVAL;
280f86f4037SDarrick J. Wong 
281abbf9e8aSChristoph Hellwig 	if (bmv->bmv_length < -1)
282abbf9e8aSChristoph Hellwig 		return -EINVAL;
283abbf9e8aSChristoph Hellwig 	bmv->bmv_entries = 0;
284abbf9e8aSChristoph Hellwig 	if (bmv->bmv_length == 0)
285abbf9e8aSChristoph Hellwig 		return 0;
286abbf9e8aSChristoph Hellwig 
287f86f4037SDarrick J. Wong 	if (iflags & BMV_IF_ATTRFORK)
288f86f4037SDarrick J. Wong 		whichfork = XFS_ATTR_FORK;
289f86f4037SDarrick J. Wong 	else if (iflags & BMV_IF_COWFORK)
290f86f4037SDarrick J. Wong 		whichfork = XFS_COW_FORK;
291f86f4037SDarrick J. Wong 	else
292f86f4037SDarrick J. Wong 		whichfork = XFS_DATA_FORK;
29368988114SDave Chinner 
29468988114SDave Chinner 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
295f86f4037SDarrick J. Wong 	switch (whichfork) {
296abbf9e8aSChristoph Hellwig 	case XFS_ATTR_FORK:
297001c179cSChenXiaoSong 		lock = xfs_ilock_attr_map_shared(ip);
298932b42c6SDarrick J. Wong 		if (!xfs_inode_has_attr_fork(ip))
299001c179cSChenXiaoSong 			goto out_unlock_ilock;
300abbf9e8aSChristoph Hellwig 
301abbf9e8aSChristoph Hellwig 		max_len = 1LL << 32;
302abbf9e8aSChristoph Hellwig 		break;
303abbf9e8aSChristoph Hellwig 	case XFS_COW_FORK:
304001c179cSChenXiaoSong 		lock = XFS_ILOCK_SHARED;
305001c179cSChenXiaoSong 		xfs_ilock(ip, lock);
306001c179cSChenXiaoSong 
307abbf9e8aSChristoph Hellwig 		/* No CoW fork? Just return */
308001c179cSChenXiaoSong 		if (!xfs_ifork_ptr(ip, whichfork))
309001c179cSChenXiaoSong 			goto out_unlock_ilock;
310abbf9e8aSChristoph Hellwig 
311abbf9e8aSChristoph Hellwig 		if (xfs_get_cowextsz_hint(ip))
312abbf9e8aSChristoph Hellwig 			max_len = mp->m_super->s_maxbytes;
313abbf9e8aSChristoph Hellwig 		else
314abbf9e8aSChristoph Hellwig 			max_len = XFS_ISIZE(ip);
315abbf9e8aSChristoph Hellwig 		break;
316f86f4037SDarrick J. Wong 	case XFS_DATA_FORK:
317efa70be1SChristoph Hellwig 		if (!(iflags & BMV_IF_DELALLOC) &&
31813d2c10bSChristoph Hellwig 		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_disk_size)) {
3192451337dSDave Chinner 			error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
32068988114SDave Chinner 			if (error)
32168988114SDave Chinner 				goto out_unlock_iolock;
322efa70be1SChristoph Hellwig 
32368988114SDave Chinner 			/*
324efa70be1SChristoph Hellwig 			 * Even after flushing the inode, there can still be
325efa70be1SChristoph Hellwig 			 * delalloc blocks on the inode beyond EOF due to
326efa70be1SChristoph Hellwig 			 * speculative preallocation.  These are not removed
327efa70be1SChristoph Hellwig 			 * until the release function is called or the inode
328efa70be1SChristoph Hellwig 			 * is inactivated.  Hence we cannot assert here that
329efa70be1SChristoph Hellwig 			 * ip->i_delayed_blks == 0.
33068988114SDave Chinner 			 */
33168988114SDave Chinner 		}
33268988114SDave Chinner 
333abbf9e8aSChristoph Hellwig 		if (xfs_get_extsz_hint(ip) ||
334db07349dSChristoph Hellwig 		    (ip->i_diflags &
335abbf9e8aSChristoph Hellwig 		     (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
336abbf9e8aSChristoph Hellwig 			max_len = mp->m_super->s_maxbytes;
337abbf9e8aSChristoph Hellwig 		else
338abbf9e8aSChristoph Hellwig 			max_len = XFS_ISIZE(ip);
339abbf9e8aSChristoph Hellwig 
340309ecac8SChristoph Hellwig 		lock = xfs_ilock_data_map_shared(ip);
341f86f4037SDarrick J. Wong 		break;
342efa70be1SChristoph Hellwig 	}
34368988114SDave Chinner 
344001c179cSChenXiaoSong 	ifp = xfs_ifork_ptr(ip, whichfork);
345001c179cSChenXiaoSong 
346f7e67b20SChristoph Hellwig 	switch (ifp->if_format) {
347abbf9e8aSChristoph Hellwig 	case XFS_DINODE_FMT_EXTENTS:
348abbf9e8aSChristoph Hellwig 	case XFS_DINODE_FMT_BTREE:
349abbf9e8aSChristoph Hellwig 		break;
350abbf9e8aSChristoph Hellwig 	case XFS_DINODE_FMT_LOCAL:
351abbf9e8aSChristoph Hellwig 		/* Local format inode forks report no extents. */
35268988114SDave Chinner 		goto out_unlock_ilock;
353abbf9e8aSChristoph Hellwig 	default:
354abbf9e8aSChristoph Hellwig 		error = -EINVAL;
355abbf9e8aSChristoph Hellwig 		goto out_unlock_ilock;
35668988114SDave Chinner 	}
35768988114SDave Chinner 
358abbf9e8aSChristoph Hellwig 	if (bmv->bmv_length == -1) {
359abbf9e8aSChristoph Hellwig 		max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
360abbf9e8aSChristoph Hellwig 		bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
361abbf9e8aSChristoph Hellwig 	}
362abbf9e8aSChristoph Hellwig 
363abbf9e8aSChristoph Hellwig 	bmv_end = bmv->bmv_offset + bmv->bmv_length;
364abbf9e8aSChristoph Hellwig 
365abbf9e8aSChristoph Hellwig 	first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
366abbf9e8aSChristoph Hellwig 	len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
367abbf9e8aSChristoph Hellwig 
368abbf9e8aSChristoph Hellwig 	error = xfs_iread_extents(NULL, ip, whichfork);
369abbf9e8aSChristoph Hellwig 	if (error)
370abbf9e8aSChristoph Hellwig 		goto out_unlock_ilock;
371abbf9e8aSChristoph Hellwig 
372b2b1712aSChristoph Hellwig 	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
373abbf9e8aSChristoph Hellwig 		/*
374abbf9e8aSChristoph Hellwig 		 * Report a whole-file hole if the delalloc flag is set to
375abbf9e8aSChristoph Hellwig 		 * stay compatible with the old implementation.
376abbf9e8aSChristoph Hellwig 		 */
377abbf9e8aSChristoph Hellwig 		if (iflags & BMV_IF_DELALLOC)
378abbf9e8aSChristoph Hellwig 			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
379abbf9e8aSChristoph Hellwig 					XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
380abbf9e8aSChristoph Hellwig 		goto out_unlock_ilock;
381abbf9e8aSChristoph Hellwig 	}
382abbf9e8aSChristoph Hellwig 
383abbf9e8aSChristoph Hellwig 	while (!xfs_getbmap_full(bmv)) {
384abbf9e8aSChristoph Hellwig 		xfs_trim_extent(&got, first_bno, len);
385abbf9e8aSChristoph Hellwig 
386abbf9e8aSChristoph Hellwig 		/*
387abbf9e8aSChristoph Hellwig 		 * Report an entry for a hole if this extent doesn't directly
388abbf9e8aSChristoph Hellwig 		 * follow the previous one.
389abbf9e8aSChristoph Hellwig 		 */
390abbf9e8aSChristoph Hellwig 		if (got.br_startoff > bno) {
391abbf9e8aSChristoph Hellwig 			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
392abbf9e8aSChristoph Hellwig 					got.br_startoff);
393abbf9e8aSChristoph Hellwig 			if (xfs_getbmap_full(bmv))
394abbf9e8aSChristoph Hellwig 				break;
395abbf9e8aSChristoph Hellwig 		}
396abbf9e8aSChristoph Hellwig 
397abbf9e8aSChristoph Hellwig 		/*
398abbf9e8aSChristoph Hellwig 		 * In order to report shared extents accurately, we report each
399abbf9e8aSChristoph Hellwig 		 * distinct shared / unshared part of a single bmbt record with
400abbf9e8aSChristoph Hellwig 		 * an individual getbmapx record.
401abbf9e8aSChristoph Hellwig 		 */
402abbf9e8aSChristoph Hellwig 		bno = got.br_startoff + got.br_blockcount;
403abbf9e8aSChristoph Hellwig 		rec = got;
40468988114SDave Chinner 		do {
405abbf9e8aSChristoph Hellwig 			error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
406abbf9e8aSChristoph Hellwig 					&rec);
407abbf9e8aSChristoph Hellwig 			if (error || xfs_getbmap_full(bmv))
408abbf9e8aSChristoph Hellwig 				goto out_unlock_ilock;
409abbf9e8aSChristoph Hellwig 		} while (xfs_getbmap_next_rec(&rec, bno));
41068988114SDave Chinner 
411b2b1712aSChristoph Hellwig 		if (!xfs_iext_next_extent(ifp, &icur, &got)) {
412abbf9e8aSChristoph Hellwig 			xfs_fileoff_t	end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
41368988114SDave Chinner 
4141bba82feSDarrick J. Wong 			if (bmv->bmv_entries > 0)
4151bba82feSDarrick J. Wong 				out[bmv->bmv_entries - 1].bmv_oflags |=
4161bba82feSDarrick J. Wong 								BMV_OF_LAST;
41768988114SDave Chinner 
418abbf9e8aSChristoph Hellwig 			if (whichfork != XFS_ATTR_FORK && bno < end &&
419abbf9e8aSChristoph Hellwig 			    !xfs_getbmap_full(bmv)) {
420abbf9e8aSChristoph Hellwig 				xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
421abbf9e8aSChristoph Hellwig 						bno, end);
422abbf9e8aSChristoph Hellwig 			}
423abbf9e8aSChristoph Hellwig 			break;
42468988114SDave Chinner 		}
42568988114SDave Chinner 
426abbf9e8aSChristoph Hellwig 		if (bno >= first_bno + len)
427abbf9e8aSChristoph Hellwig 			break;
42868988114SDave Chinner 	}
42968988114SDave Chinner 
43068988114SDave Chinner out_unlock_ilock:
43101f4f327SChristoph Hellwig 	xfs_iunlock(ip, lock);
43268988114SDave Chinner out_unlock_iolock:
43368988114SDave Chinner 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
43468988114SDave Chinner 	return error;
43568988114SDave Chinner }
43668988114SDave Chinner 
43768988114SDave Chinner /*
438e2ac8363SChristoph Hellwig  * Dead simple method of punching delalyed allocation blocks from a range in
439e2ac8363SChristoph Hellwig  * the inode.  This will always punch out both the start and end blocks, even
440e2ac8363SChristoph Hellwig  * if the ranges only partially overlap them, so it is up to the caller to
441e2ac8363SChristoph Hellwig  * ensure that partial blocks are not passed in.
44268988114SDave Chinner  */
443cc3c92e7SChristoph Hellwig void
44468988114SDave Chinner xfs_bmap_punch_delalloc_range(
44568988114SDave Chinner 	struct xfs_inode	*ip,
4467348b322SDave Chinner 	xfs_off_t		start_byte,
4477348b322SDave Chinner 	xfs_off_t		end_byte)
44868988114SDave Chinner {
4497348b322SDave Chinner 	struct xfs_mount	*mp = ip->i_mount;
450e2ac8363SChristoph Hellwig 	struct xfs_ifork	*ifp = &ip->i_df;
4517348b322SDave Chinner 	xfs_fileoff_t		start_fsb = XFS_B_TO_FSBT(mp, start_byte);
4527348b322SDave Chinner 	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, end_byte);
453e2ac8363SChristoph Hellwig 	struct xfs_bmbt_irec	got, del;
454e2ac8363SChristoph Hellwig 	struct xfs_iext_cursor	icur;
45568988114SDave Chinner 
456b2197a36SChristoph Hellwig 	ASSERT(!xfs_need_iread_extents(ifp));
45768988114SDave Chinner 
4580065b541SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
459e2ac8363SChristoph Hellwig 	if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
460d4380177SChristoph Hellwig 		goto out_unlock;
461e2ac8363SChristoph Hellwig 
462e2ac8363SChristoph Hellwig 	while (got.br_startoff + got.br_blockcount > start_fsb) {
463e2ac8363SChristoph Hellwig 		del = got;
4647348b322SDave Chinner 		xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb);
465e2ac8363SChristoph Hellwig 
466e2ac8363SChristoph Hellwig 		/*
467e2ac8363SChristoph Hellwig 		 * A delete can push the cursor forward. Step back to the
468e2ac8363SChristoph Hellwig 		 * previous extent on non-delalloc or extents outside the
469e2ac8363SChristoph Hellwig 		 * target range.
470e2ac8363SChristoph Hellwig 		 */
471e2ac8363SChristoph Hellwig 		if (!del.br_blockcount ||
472e2ac8363SChristoph Hellwig 		    !isnullstartblock(del.br_startblock)) {
473e2ac8363SChristoph Hellwig 			if (!xfs_iext_prev_extent(ifp, &icur, &got))
474e2ac8363SChristoph Hellwig 				break;
475e2ac8363SChristoph Hellwig 			continue;
476e2ac8363SChristoph Hellwig 		}
477e2ac8363SChristoph Hellwig 
478cc3c92e7SChristoph Hellwig 		xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur, &got, &del);
479cc3c92e7SChristoph Hellwig 		if (!xfs_iext_get_extent(ifp, &icur, &got))
480e2ac8363SChristoph Hellwig 			break;
481e2ac8363SChristoph Hellwig 	}
48268988114SDave Chinner 
483d4380177SChristoph Hellwig out_unlock:
484d4380177SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
48568988114SDave Chinner }
486c24b5dfaSDave Chinner 
487c24b5dfaSDave Chinner /*
488c24b5dfaSDave Chinner  * Test whether it is appropriate to check an inode for and free post EOF
489610b2916SChristoph Hellwig  * blocks.
490c24b5dfaSDave Chinner  */
491c24b5dfaSDave Chinner bool
4927d88329eSDarrick J. Wong xfs_can_free_eofblocks(
493610b2916SChristoph Hellwig 	struct xfs_inode	*ip)
494c24b5dfaSDave Chinner {
4957d88329eSDarrick J. Wong 	struct xfs_bmbt_irec	imap;
4967d88329eSDarrick J. Wong 	struct xfs_mount	*mp = ip->i_mount;
4977d88329eSDarrick J. Wong 	xfs_fileoff_t		end_fsb;
4987d88329eSDarrick J. Wong 	xfs_fileoff_t		last_fsb;
4997d88329eSDarrick J. Wong 	int			nimaps = 1;
5007d88329eSDarrick J. Wong 	int			error;
5017d88329eSDarrick J. Wong 
5027d88329eSDarrick J. Wong 	/*
5037d88329eSDarrick J. Wong 	 * Caller must either hold the exclusive io lock; or be inactivating
5047d88329eSDarrick J. Wong 	 * the inode, which guarantees there are no other users of the inode.
5057d88329eSDarrick J. Wong 	 */
5063fed24ffSMatthew Wilcox (Oracle) 	if (!(VFS_I(ip)->i_state & I_FREEING))
5073fed24ffSMatthew Wilcox (Oracle) 		xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL);
5087d88329eSDarrick J. Wong 
509c24b5dfaSDave Chinner 	/* prealloc/delalloc exists only on regular files */
510c19b3b05SDave Chinner 	if (!S_ISREG(VFS_I(ip)->i_mode))
511c24b5dfaSDave Chinner 		return false;
512c24b5dfaSDave Chinner 
513c24b5dfaSDave Chinner 	/*
514c24b5dfaSDave Chinner 	 * Zero sized files with no cached pages and delalloc blocks will not
515c24b5dfaSDave Chinner 	 * have speculative prealloc/delalloc blocks to remove.
516c24b5dfaSDave Chinner 	 */
517c24b5dfaSDave Chinner 	if (VFS_I(ip)->i_size == 0 &&
5182667c6f9SDave Chinner 	    VFS_I(ip)->i_mapping->nrpages == 0 &&
519c24b5dfaSDave Chinner 	    ip->i_delayed_blks == 0)
520c24b5dfaSDave Chinner 		return false;
521c24b5dfaSDave Chinner 
522c24b5dfaSDave Chinner 	/* If we haven't read in the extent list, then don't do it now. */
523b2197a36SChristoph Hellwig 	if (xfs_need_iread_extents(&ip->i_df))
524c24b5dfaSDave Chinner 		return false;
525c24b5dfaSDave Chinner 
526c24b5dfaSDave Chinner 	/*
527610b2916SChristoph Hellwig 	 * Only free real extents for inodes with persistent preallocations or
528610b2916SChristoph Hellwig 	 * the append-only flag.
529c24b5dfaSDave Chinner 	 */
530db07349dSChristoph Hellwig 	if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
531610b2916SChristoph Hellwig 		if (ip->i_delayed_blks == 0)
532c24b5dfaSDave Chinner 			return false;
533c24b5dfaSDave Chinner 
5347d88329eSDarrick J. Wong 	/*
5357d88329eSDarrick J. Wong 	 * Do not try to free post-EOF blocks if EOF is beyond the end of the
5367d88329eSDarrick J. Wong 	 * range supported by the page cache, because the truncation will loop
5377d88329eSDarrick J. Wong 	 * forever.
5387d88329eSDarrick J. Wong 	 */
5397d88329eSDarrick J. Wong 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
5406b700a5bSDarrick J. Wong 	if (xfs_inode_has_bigrtalloc(ip))
5415f57f730SDarrick J. Wong 		end_fsb = xfs_rtb_roundup_rtx(mp, end_fsb);
5427d88329eSDarrick J. Wong 	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
5437d88329eSDarrick J. Wong 	if (last_fsb <= end_fsb)
5447d88329eSDarrick J. Wong 		return false;
5457d88329eSDarrick J. Wong 
5467d88329eSDarrick J. Wong 	/*
5477d88329eSDarrick J. Wong 	 * Look up the mapping for the first block past EOF.  If we can't find
5487d88329eSDarrick J. Wong 	 * it, there's nothing to free.
5497d88329eSDarrick J. Wong 	 */
5507d88329eSDarrick J. Wong 	xfs_ilock(ip, XFS_ILOCK_SHARED);
5517d88329eSDarrick J. Wong 	error = xfs_bmapi_read(ip, end_fsb, last_fsb - end_fsb, &imap, &nimaps,
5527d88329eSDarrick J. Wong 			0);
5537d88329eSDarrick J. Wong 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
5547d88329eSDarrick J. Wong 	if (error || nimaps == 0)
5557d88329eSDarrick J. Wong 		return false;
5567d88329eSDarrick J. Wong 
5577d88329eSDarrick J. Wong 	/*
5587d88329eSDarrick J. Wong 	 * If there's a real mapping there or there are delayed allocation
5597d88329eSDarrick J. Wong 	 * reservations, then we have post-EOF blocks to try to free.
5607d88329eSDarrick J. Wong 	 */
5617d88329eSDarrick J. Wong 	return imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks;
562c24b5dfaSDave Chinner }
563c24b5dfaSDave Chinner 
564c24b5dfaSDave Chinner /*
5653b4683c2SBrian Foster  * This is called to free any blocks beyond eof. The caller must hold
5663b4683c2SBrian Foster  * IOLOCK_EXCL unless we are in the inode reclaim path and have the only
5673b4683c2SBrian Foster  * reference to the inode.
568c24b5dfaSDave Chinner  */
569c24b5dfaSDave Chinner int
570c24b5dfaSDave Chinner xfs_free_eofblocks(
571a36b9261SBrian Foster 	struct xfs_inode	*ip)
572c24b5dfaSDave Chinner {
573a36b9261SBrian Foster 	struct xfs_trans	*tp;
574a36b9261SBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
5757d88329eSDarrick J. Wong 	int			error;
576a36b9261SBrian Foster 
5777d88329eSDarrick J. Wong 	/* Attach the dquots to the inode up front. */
578c14cfccaSDarrick J. Wong 	error = xfs_qm_dqattach(ip);
579c24b5dfaSDave Chinner 	if (error)
580c24b5dfaSDave Chinner 		return error;
581c24b5dfaSDave Chinner 
5827d88329eSDarrick J. Wong 	/* Wait on dio to ensure i_size has settled. */
583e4229d6bSBrian Foster 	inode_dio_wait(VFS_I(ip));
584e4229d6bSBrian Foster 
585610b2916SChristoph Hellwig 	/*
586610b2916SChristoph Hellwig 	 * For preallocated files only free delayed allocations.
587610b2916SChristoph Hellwig 	 *
588610b2916SChristoph Hellwig 	 * Note that this means we also leave speculative preallocations in
589610b2916SChristoph Hellwig 	 * place for preallocated files.
590610b2916SChristoph Hellwig 	 */
591610b2916SChristoph Hellwig 	if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)) {
592610b2916SChristoph Hellwig 		if (ip->i_delayed_blks) {
593610b2916SChristoph Hellwig 			xfs_bmap_punch_delalloc_range(ip,
594610b2916SChristoph Hellwig 				round_up(XFS_ISIZE(ip), mp->m_sb.sb_blocksize),
595610b2916SChristoph Hellwig 				LLONG_MAX);
596610b2916SChristoph Hellwig 		}
597610b2916SChristoph Hellwig 		xfs_inode_clear_eofblocks_tag(ip);
598610b2916SChristoph Hellwig 		return 0;
599610b2916SChristoph Hellwig 	}
600610b2916SChristoph Hellwig 
6017d88329eSDarrick J. Wong 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
602c24b5dfaSDave Chinner 	if (error) {
60375c8c50fSDave Chinner 		ASSERT(xfs_is_shutdown(mp));
604c24b5dfaSDave Chinner 		return error;
605c24b5dfaSDave Chinner 	}
606c24b5dfaSDave Chinner 
607c24b5dfaSDave Chinner 	xfs_ilock(ip, XFS_ILOCK_EXCL);
608c24b5dfaSDave Chinner 	xfs_trans_ijoin(tp, ip, 0);
609c24b5dfaSDave Chinner 
610c24b5dfaSDave Chinner 	/*
6117d88329eSDarrick J. Wong 	 * Do not update the on-disk file size.  If we update the on-disk file
6127d88329eSDarrick J. Wong 	 * size and then the system crashes before the contents of the file are
6137d88329eSDarrick J. Wong 	 * flushed to disk then the files may be full of holes (ie NULL files
6147d88329eSDarrick J. Wong 	 * bug).
615c24b5dfaSDave Chinner 	 */
6164e529339SBrian Foster 	error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK,
6174e529339SBrian Foster 				XFS_ISIZE(ip), XFS_BMAPI_NODISCARD);
6187d88329eSDarrick J. Wong 	if (error)
6197d88329eSDarrick J. Wong 		goto err_cancel;
6207d88329eSDarrick J. Wong 
6217d88329eSDarrick J. Wong 	error = xfs_trans_commit(tp);
6227d88329eSDarrick J. Wong 	if (error)
6237d88329eSDarrick J. Wong 		goto out_unlock;
6247d88329eSDarrick J. Wong 
6257d88329eSDarrick J. Wong 	xfs_inode_clear_eofblocks_tag(ip);
6267d88329eSDarrick J. Wong 	goto out_unlock;
6277d88329eSDarrick J. Wong 
6287d88329eSDarrick J. Wong err_cancel:
629c24b5dfaSDave Chinner 	/*
630c24b5dfaSDave Chinner 	 * If we get an error at this point we simply don't
631c24b5dfaSDave Chinner 	 * bother truncating the file.
632c24b5dfaSDave Chinner 	 */
6334906e215SChristoph Hellwig 	xfs_trans_cancel(tp);
6347d88329eSDarrick J. Wong out_unlock:
635c24b5dfaSDave Chinner 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
636c24b5dfaSDave Chinner 	return error;
637c24b5dfaSDave Chinner }
638c24b5dfaSDave Chinner 
63983aee9e4SChristoph Hellwig int
640c24b5dfaSDave Chinner xfs_alloc_file_space(
64183aee9e4SChristoph Hellwig 	struct xfs_inode	*ip,
642c24b5dfaSDave Chinner 	xfs_off_t		offset,
6434d1b97f9SDarrick J. Wong 	xfs_off_t		len)
644c24b5dfaSDave Chinner {
645c24b5dfaSDave Chinner 	xfs_mount_t		*mp = ip->i_mount;
646c24b5dfaSDave Chinner 	xfs_off_t		count;
647c24b5dfaSDave Chinner 	xfs_filblks_t		allocatesize_fsb;
648c24b5dfaSDave Chinner 	xfs_extlen_t		extsz, temp;
649c24b5dfaSDave Chinner 	xfs_fileoff_t		startoffset_fsb;
650e093c4beSMax Reitz 	xfs_fileoff_t		endoffset_fsb;
651c24b5dfaSDave Chinner 	int			rt;
652c24b5dfaSDave Chinner 	xfs_trans_t		*tp;
653c24b5dfaSDave Chinner 	xfs_bmbt_irec_t		imaps[1], *imapp;
654c24b5dfaSDave Chinner 	int			error;
655c24b5dfaSDave Chinner 
656c24b5dfaSDave Chinner 	trace_xfs_alloc_file_space(ip);
657c24b5dfaSDave Chinner 
65875c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
6592451337dSDave Chinner 		return -EIO;
660c24b5dfaSDave Chinner 
661c14cfccaSDarrick J. Wong 	error = xfs_qm_dqattach(ip);
662c24b5dfaSDave Chinner 	if (error)
663c24b5dfaSDave Chinner 		return error;
664c24b5dfaSDave Chinner 
665c24b5dfaSDave Chinner 	if (len <= 0)
6662451337dSDave Chinner 		return -EINVAL;
667c24b5dfaSDave Chinner 
668c24b5dfaSDave Chinner 	rt = XFS_IS_REALTIME_INODE(ip);
669c24b5dfaSDave Chinner 	extsz = xfs_get_extsz_hint(ip);
670c24b5dfaSDave Chinner 
671c24b5dfaSDave Chinner 	count = len;
672c24b5dfaSDave Chinner 	imapp = &imaps[0];
673c24b5dfaSDave Chinner 	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
674e093c4beSMax Reitz 	endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
675e093c4beSMax Reitz 	allocatesize_fsb = endoffset_fsb - startoffset_fsb;
676c24b5dfaSDave Chinner 
677c24b5dfaSDave Chinner 	/*
678c24b5dfaSDave Chinner 	 * Allocate file space until done or until there is an error
679c24b5dfaSDave Chinner 	 */
680c24b5dfaSDave Chinner 	while (allocatesize_fsb && !error) {
681c24b5dfaSDave Chinner 		xfs_fileoff_t	s, e;
6823de4eb10SDarrick J. Wong 		unsigned int	dblocks, rblocks, resblks;
68335dc55b9SChristoph Hellwig 		int		nimaps = 1;
684c24b5dfaSDave Chinner 
685c24b5dfaSDave Chinner 		/*
686c24b5dfaSDave Chinner 		 * Determine space reservations for data/realtime.
687c24b5dfaSDave Chinner 		 */
688c24b5dfaSDave Chinner 		if (unlikely(extsz)) {
689c24b5dfaSDave Chinner 			s = startoffset_fsb;
690c24b5dfaSDave Chinner 			do_div(s, extsz);
691c24b5dfaSDave Chinner 			s *= extsz;
692c24b5dfaSDave Chinner 			e = startoffset_fsb + allocatesize_fsb;
6930703a8e1SDave Chinner 			div_u64_rem(startoffset_fsb, extsz, &temp);
6940703a8e1SDave Chinner 			if (temp)
695c24b5dfaSDave Chinner 				e += temp;
6960703a8e1SDave Chinner 			div_u64_rem(e, extsz, &temp);
6970703a8e1SDave Chinner 			if (temp)
698c24b5dfaSDave Chinner 				e += extsz - temp;
699c24b5dfaSDave Chinner 		} else {
700c24b5dfaSDave Chinner 			s = 0;
701c24b5dfaSDave Chinner 			e = allocatesize_fsb;
702c24b5dfaSDave Chinner 		}
703c24b5dfaSDave Chinner 
704c24b5dfaSDave Chinner 		/*
705c24b5dfaSDave Chinner 		 * The transaction reservation is limited to a 32-bit block
706c24b5dfaSDave Chinner 		 * count, hence we need to limit the number of blocks we are
707c24b5dfaSDave Chinner 		 * trying to reserve to avoid an overflow. We can't allocate
708c24b5dfaSDave Chinner 		 * more than @nimaps extents, and an extent is limited on disk
70995f0b95eSChandan Babu R 		 * to XFS_BMBT_MAX_EXTLEN (21 bits), so use that to enforce the
71095f0b95eSChandan Babu R 		 * limit.
711c24b5dfaSDave Chinner 		 */
71295f0b95eSChandan Babu R 		resblks = min_t(xfs_fileoff_t, (e - s),
71395f0b95eSChandan Babu R 				(XFS_MAX_BMBT_EXTLEN * nimaps));
714c24b5dfaSDave Chinner 		if (unlikely(rt)) {
71502b7ee4eSDarrick J. Wong 			dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
71602b7ee4eSDarrick J. Wong 			rblocks = resblks;
717c24b5dfaSDave Chinner 		} else {
71802b7ee4eSDarrick J. Wong 			dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
71902b7ee4eSDarrick J. Wong 			rblocks = 0;
720c24b5dfaSDave Chinner 		}
721c24b5dfaSDave Chinner 
7223de4eb10SDarrick J. Wong 		error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write,
7233de4eb10SDarrick J. Wong 				dblocks, rblocks, false, &tp);
724c24b5dfaSDave Chinner 		if (error)
7253de4eb10SDarrick J. Wong 			break;
726c24b5dfaSDave Chinner 
72725576c54SChristoph Hellwig 		error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK,
7284f86bb4bSChandan Babu R 				XFS_IEXT_ADD_NOSPLIT_CNT);
729727e1acdSChandan Babu R 		if (error)
73035b11010SDarrick J. Wong 			goto error;
731727e1acdSChandan Babu R 
7326773da87SChristoph Hellwig 		/*
7336773da87SChristoph Hellwig 		 * If the allocator cannot find a single free extent large
7346773da87SChristoph Hellwig 		 * enough to cover the start block of the requested range,
7356773da87SChristoph Hellwig 		 * xfs_bmapi_write will return -ENOSR.
7366773da87SChristoph Hellwig 		 *
7376773da87SChristoph Hellwig 		 * In that case we simply need to keep looping with the same
7386773da87SChristoph Hellwig 		 * startoffset_fsb so that one of the following allocations
7396773da87SChristoph Hellwig 		 * will eventually reach the requested range.
7406773da87SChristoph Hellwig 		 */
741c24b5dfaSDave Chinner 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
7424d1b97f9SDarrick J. Wong 				allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
743da781e64SBrian Foster 				&nimaps);
7446773da87SChristoph Hellwig 		if (error) {
7456773da87SChristoph Hellwig 			if (error != -ENOSR)
74635b11010SDarrick J. Wong 				goto error;
7476773da87SChristoph Hellwig 			error = 0;
7486773da87SChristoph Hellwig 		} else {
7496773da87SChristoph Hellwig 			startoffset_fsb += imapp->br_blockcount;
7506773da87SChristoph Hellwig 			allocatesize_fsb -= imapp->br_blockcount;
7516773da87SChristoph Hellwig 		}
752c24b5dfaSDave Chinner 
7530b02c8c0SDave Chinner 		ip->i_diflags |= XFS_DIFLAG_PREALLOC;
7540b02c8c0SDave Chinner 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
7550b02c8c0SDave Chinner 
75670393313SChristoph Hellwig 		error = xfs_trans_commit(tp);
757c24b5dfaSDave Chinner 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
758c24b5dfaSDave Chinner 	}
759c24b5dfaSDave Chinner 
760c24b5dfaSDave Chinner 	return error;
761c24b5dfaSDave Chinner 
76235b11010SDarrick J. Wong error:
7634906e215SChristoph Hellwig 	xfs_trans_cancel(tp);
764c24b5dfaSDave Chinner 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
765c24b5dfaSDave Chinner 	return error;
766c24b5dfaSDave Chinner }
767c24b5dfaSDave Chinner 
768bdb0d04fSChristoph Hellwig static int
769bdb0d04fSChristoph Hellwig xfs_unmap_extent(
77083aee9e4SChristoph Hellwig 	struct xfs_inode	*ip,
771bdb0d04fSChristoph Hellwig 	xfs_fileoff_t		startoffset_fsb,
772bdb0d04fSChristoph Hellwig 	xfs_filblks_t		len_fsb,
773bdb0d04fSChristoph Hellwig 	int			*done)
774c24b5dfaSDave Chinner {
775bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
776bdb0d04fSChristoph Hellwig 	struct xfs_trans	*tp;
777bdb0d04fSChristoph Hellwig 	uint			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
778bdb0d04fSChristoph Hellwig 	int			error;
779c24b5dfaSDave Chinner 
7803de4eb10SDarrick J. Wong 	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, resblks, 0,
7813a1af6c3SDarrick J. Wong 			false, &tp);
782bdb0d04fSChristoph Hellwig 	if (error)
7833a1af6c3SDarrick J. Wong 		return error;
784c24b5dfaSDave Chinner 
78525576c54SChristoph Hellwig 	error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK,
78685ef08b5SChandan Babu R 			XFS_IEXT_PUNCH_HOLE_CNT);
78785ef08b5SChandan Babu R 	if (error)
78885ef08b5SChandan Babu R 		goto out_trans_cancel;
78985ef08b5SChandan Babu R 
7902af52842SBrian Foster 	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, done);
791bdb0d04fSChristoph Hellwig 	if (error)
792c8eac49eSBrian Foster 		goto out_trans_cancel;
793bdb0d04fSChristoph Hellwig 
794bdb0d04fSChristoph Hellwig 	error = xfs_trans_commit(tp);
795bdb0d04fSChristoph Hellwig out_unlock:
796bdb0d04fSChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
797bdb0d04fSChristoph Hellwig 	return error;
798bdb0d04fSChristoph Hellwig 
799bdb0d04fSChristoph Hellwig out_trans_cancel:
800bdb0d04fSChristoph Hellwig 	xfs_trans_cancel(tp);
801bdb0d04fSChristoph Hellwig 	goto out_unlock;
802bdb0d04fSChristoph Hellwig }
803bdb0d04fSChristoph Hellwig 
804249bd908SDave Chinner /* Caller must first wait for the completion of any pending DIOs if required. */
8052c307174SDave Chinner int
806bdb0d04fSChristoph Hellwig xfs_flush_unmap_range(
807bdb0d04fSChristoph Hellwig 	struct xfs_inode	*ip,
808bdb0d04fSChristoph Hellwig 	xfs_off_t		offset,
809bdb0d04fSChristoph Hellwig 	xfs_off_t		len)
810bdb0d04fSChristoph Hellwig {
811bdb0d04fSChristoph Hellwig 	struct inode		*inode = VFS_I(ip);
812bdb0d04fSChristoph Hellwig 	xfs_off_t		rounding, start, end;
813bdb0d04fSChristoph Hellwig 	int			error;
814bdb0d04fSChristoph Hellwig 
815d3b689d7SJohn Garry 	/*
816d3b689d7SJohn Garry 	 * Make sure we extend the flush out to extent alignment
817d3b689d7SJohn Garry 	 * boundaries so any extent range overlapping the start/end
818d3b689d7SJohn Garry 	 * of the modification we are about to do is clean and idle.
819d3b689d7SJohn Garry 	 */
820d3b689d7SJohn Garry 	rounding = max_t(xfs_off_t, xfs_inode_alloc_unitsize(ip), PAGE_SIZE);
821d3b689d7SJohn Garry 	start = rounddown_64(offset, rounding);
822d3b689d7SJohn Garry 	end = roundup_64(offset + len, rounding) - 1;
823bdb0d04fSChristoph Hellwig 
824bdb0d04fSChristoph Hellwig 	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
825c24b5dfaSDave Chinner 	if (error)
826c24b5dfaSDave Chinner 		return error;
827bdb0d04fSChristoph Hellwig 	truncate_pagecache_range(inode, start, end);
828bdb0d04fSChristoph Hellwig 	return 0;
829c24b5dfaSDave Chinner }
830c24b5dfaSDave Chinner 
831c24b5dfaSDave Chinner int
832c24b5dfaSDave Chinner xfs_free_file_space(
833c24b5dfaSDave Chinner 	struct xfs_inode	*ip,
834c24b5dfaSDave Chinner 	xfs_off_t		offset,
835c24b5dfaSDave Chinner 	xfs_off_t		len)
836c24b5dfaSDave Chinner {
837bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
838c24b5dfaSDave Chinner 	xfs_fileoff_t		startoffset_fsb;
839bdb0d04fSChristoph Hellwig 	xfs_fileoff_t		endoffset_fsb;
8403c2bdc91SChristoph Hellwig 	int			done = 0, error;
841c24b5dfaSDave Chinner 
842c24b5dfaSDave Chinner 	trace_xfs_free_file_space(ip);
843c24b5dfaSDave Chinner 
844c14cfccaSDarrick J. Wong 	error = xfs_qm_dqattach(ip);
845c24b5dfaSDave Chinner 	if (error)
846c24b5dfaSDave Chinner 		return error;
847c24b5dfaSDave Chinner 
848c24b5dfaSDave Chinner 	if (len <= 0)	/* if nothing being freed */
849bdb0d04fSChristoph Hellwig 		return 0;
850bdb0d04fSChristoph Hellwig 
851c24b5dfaSDave Chinner 	startoffset_fsb = XFS_B_TO_FSB(mp, offset);
852c24b5dfaSDave Chinner 	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
853c24b5dfaSDave Chinner 
854fe341eb1SDarrick J. Wong 	/* We can only free complete realtime extents. */
8556b700a5bSDarrick J. Wong 	if (xfs_inode_has_bigrtalloc(ip)) {
8565f57f730SDarrick J. Wong 		startoffset_fsb = xfs_rtb_roundup_rtx(mp, startoffset_fsb);
8575f57f730SDarrick J. Wong 		endoffset_fsb = xfs_rtb_rounddown_rtx(mp, endoffset_fsb);
858fe341eb1SDarrick J. Wong 	}
859fe341eb1SDarrick J. Wong 
860bdb0d04fSChristoph Hellwig 	/*
861daa79baeSChristoph Hellwig 	 * Need to zero the stuff we're not freeing, on disk.
862bdb0d04fSChristoph Hellwig 	 */
8633c2bdc91SChristoph Hellwig 	if (endoffset_fsb > startoffset_fsb) {
8643c2bdc91SChristoph Hellwig 		while (!done) {
865bdb0d04fSChristoph Hellwig 			error = xfs_unmap_extent(ip, startoffset_fsb,
866bdb0d04fSChristoph Hellwig 					endoffset_fsb - startoffset_fsb, &done);
8673c2bdc91SChristoph Hellwig 			if (error)
8683c2bdc91SChristoph Hellwig 				return error;
8693c2bdc91SChristoph Hellwig 		}
870c24b5dfaSDave Chinner 	}
871c24b5dfaSDave Chinner 
8723c2bdc91SChristoph Hellwig 	/*
8733c2bdc91SChristoph Hellwig 	 * Now that we've unmap all full blocks we'll have to zero out any
874f1ba5fafSShiyang Ruan 	 * partial block at the beginning and/or end.  xfs_zero_range is smart
875f5c54717SChristoph Hellwig 	 * enough to skip any holes, including those we just created, but we
876f5c54717SChristoph Hellwig 	 * must take care not to zero beyond EOF and enlarge i_size.
8773c2bdc91SChristoph Hellwig 	 */
8783dd09d5aSCalvin Owens 	if (offset >= XFS_ISIZE(ip))
8793dd09d5aSCalvin Owens 		return 0;
8803dd09d5aSCalvin Owens 	if (offset + len > XFS_ISIZE(ip))
8813dd09d5aSCalvin Owens 		len = XFS_ISIZE(ip) - offset;
882f1ba5fafSShiyang Ruan 	error = xfs_zero_range(ip, offset, len, NULL);
883e53c4b59SDarrick J. Wong 	if (error)
884e53c4b59SDarrick J. Wong 		return error;
885e53c4b59SDarrick J. Wong 
886e53c4b59SDarrick J. Wong 	/*
887e53c4b59SDarrick J. Wong 	 * If we zeroed right up to EOF and EOF straddles a page boundary we
888e53c4b59SDarrick J. Wong 	 * must make sure that the post-EOF area is also zeroed because the
889f1ba5fafSShiyang Ruan 	 * page could be mmap'd and xfs_zero_range doesn't do that for us.
890e53c4b59SDarrick J. Wong 	 * Writeback of the eof page will do this, albeit clumsily.
891e53c4b59SDarrick J. Wong 	 */
892a579121fSDarrick J. Wong 	if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) {
893e53c4b59SDarrick J. Wong 		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
894a579121fSDarrick J. Wong 				round_down(offset + len, PAGE_SIZE), LLONG_MAX);
895e53c4b59SDarrick J. Wong 	}
896e53c4b59SDarrick J. Wong 
897e53c4b59SDarrick J. Wong 	return error;
898c24b5dfaSDave Chinner }
899c24b5dfaSDave Chinner 
90072c1a739Skbuild test robot static int
9014ed36c6bSChristoph Hellwig xfs_prepare_shift(
902e1d8fb88SNamjae Jeon 	struct xfs_inode	*ip,
9034ed36c6bSChristoph Hellwig 	loff_t			offset)
904e1d8fb88SNamjae Jeon {
905*f23660f0SJohn Garry 	unsigned int		rounding;
906e1d8fb88SNamjae Jeon 	int			error;
907f71721d0SBrian Foster 
908f71721d0SBrian Foster 	/*
909f71721d0SBrian Foster 	 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
910f71721d0SBrian Foster 	 * into the accessible region of the file.
911f71721d0SBrian Foster 	 */
912610b2916SChristoph Hellwig 	if (xfs_can_free_eofblocks(ip)) {
913a36b9261SBrian Foster 		error = xfs_free_eofblocks(ip);
91441b9d726SBrian Foster 		if (error)
91541b9d726SBrian Foster 			return error;
91641b9d726SBrian Foster 	}
9171669a8caSDave Chinner 
918f71721d0SBrian Foster 	/*
919d0c22041SBrian Foster 	 * Shift operations must stabilize the start block offset boundary along
920d0c22041SBrian Foster 	 * with the full range of the operation. If we don't, a COW writeback
921d0c22041SBrian Foster 	 * completion could race with an insert, front merge with the start
922d0c22041SBrian Foster 	 * extent (after split) during the shift and corrupt the file. Start
923*f23660f0SJohn Garry 	 * with the allocation unit just prior to the start to stabilize the
924*f23660f0SJohn Garry 	 * boundary.
925d0c22041SBrian Foster 	 */
926*f23660f0SJohn Garry 	rounding = xfs_inode_alloc_unitsize(ip);
927*f23660f0SJohn Garry 	offset = rounddown_64(offset, rounding);
928d0c22041SBrian Foster 	if (offset)
929*f23660f0SJohn Garry 		offset -= rounding;
930d0c22041SBrian Foster 
931d0c22041SBrian Foster 	/*
932f71721d0SBrian Foster 	 * Writeback and invalidate cache for the remainder of the file as we're
933a904b1caSNamjae Jeon 	 * about to shift down every extent from offset to EOF.
934f71721d0SBrian Foster 	 */
9357f9f71beSDave Chinner 	error = xfs_flush_unmap_range(ip, offset, XFS_ISIZE(ip));
9361749d1eaSBrian Foster 	if (error)
9371749d1eaSBrian Foster 		return error;
938e1d8fb88SNamjae Jeon 
939a904b1caSNamjae Jeon 	/*
9403af423b0SDarrick J. Wong 	 * Clean out anything hanging around in the cow fork now that
9413af423b0SDarrick J. Wong 	 * we've flushed all the dirty data out to disk to avoid having
9423af423b0SDarrick J. Wong 	 * CoW extents at the wrong offsets.
9433af423b0SDarrick J. Wong 	 */
94451d62690SChristoph Hellwig 	if (xfs_inode_has_cow_data(ip)) {
9453af423b0SDarrick J. Wong 		error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
9463af423b0SDarrick J. Wong 				true);
9473af423b0SDarrick J. Wong 		if (error)
9483af423b0SDarrick J. Wong 			return error;
9493af423b0SDarrick J. Wong 	}
9503af423b0SDarrick J. Wong 
9514ed36c6bSChristoph Hellwig 	return 0;
952e1d8fb88SNamjae Jeon }
953e1d8fb88SNamjae Jeon 
954e1d8fb88SNamjae Jeon /*
955a904b1caSNamjae Jeon  * xfs_collapse_file_space()
956a904b1caSNamjae Jeon  *	This routine frees disk space and shift extent for the given file.
957a904b1caSNamjae Jeon  *	The first thing we do is to free data blocks in the specified range
958a904b1caSNamjae Jeon  *	by calling xfs_free_file_space(). It would also sync dirty data
959a904b1caSNamjae Jeon  *	and invalidate page cache over the region on which collapse range
960a904b1caSNamjae Jeon  *	is working. And Shift extent records to the left to cover a hole.
961a904b1caSNamjae Jeon  * RETURNS:
962a904b1caSNamjae Jeon  *	0 on success
963a904b1caSNamjae Jeon  *	errno on error
964a904b1caSNamjae Jeon  *
965a904b1caSNamjae Jeon  */
966a904b1caSNamjae Jeon int
967a904b1caSNamjae Jeon xfs_collapse_file_space(
968a904b1caSNamjae Jeon 	struct xfs_inode	*ip,
969a904b1caSNamjae Jeon 	xfs_off_t		offset,
970a904b1caSNamjae Jeon 	xfs_off_t		len)
971a904b1caSNamjae Jeon {
9724ed36c6bSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
9734ed36c6bSChristoph Hellwig 	struct xfs_trans	*tp;
974a904b1caSNamjae Jeon 	int			error;
9754ed36c6bSChristoph Hellwig 	xfs_fileoff_t		next_fsb = XFS_B_TO_FSB(mp, offset + len);
9764ed36c6bSChristoph Hellwig 	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
977ecfea3f0SChristoph Hellwig 	bool			done = false;
978a904b1caSNamjae Jeon 
9793fed24ffSMatthew Wilcox (Oracle) 	xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
9809ad1a23aSChristoph Hellwig 
981a904b1caSNamjae Jeon 	trace_xfs_collapse_file_space(ip);
982a904b1caSNamjae Jeon 
983a904b1caSNamjae Jeon 	error = xfs_free_file_space(ip, offset, len);
984a904b1caSNamjae Jeon 	if (error)
985a904b1caSNamjae Jeon 		return error;
986a904b1caSNamjae Jeon 
9874ed36c6bSChristoph Hellwig 	error = xfs_prepare_shift(ip, offset);
9884ed36c6bSChristoph Hellwig 	if (error)
9894ed36c6bSChristoph Hellwig 		return error;
9904ed36c6bSChristoph Hellwig 
991211683b2SBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
9924ed36c6bSChristoph Hellwig 	if (error)
993211683b2SBrian Foster 		return error;
9944ed36c6bSChristoph Hellwig 
9954ed36c6bSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
996211683b2SBrian Foster 	xfs_trans_ijoin(tp, ip, 0);
9974ed36c6bSChristoph Hellwig 
998211683b2SBrian Foster 	while (!done) {
999ecfea3f0SChristoph Hellwig 		error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
1000333f950cSBrian Foster 				&done);
10014ed36c6bSChristoph Hellwig 		if (error)
1002c8eac49eSBrian Foster 			goto out_trans_cancel;
1003211683b2SBrian Foster 		if (done)
1004211683b2SBrian Foster 			break;
10054ed36c6bSChristoph Hellwig 
1006211683b2SBrian Foster 		/* finish any deferred frees and roll the transaction */
1007211683b2SBrian Foster 		error = xfs_defer_finish(&tp);
1008211683b2SBrian Foster 		if (error)
1009211683b2SBrian Foster 			goto out_trans_cancel;
10104ed36c6bSChristoph Hellwig 	}
10114ed36c6bSChristoph Hellwig 
1012211683b2SBrian Foster 	error = xfs_trans_commit(tp);
1013211683b2SBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
10144ed36c6bSChristoph Hellwig 	return error;
10154ed36c6bSChristoph Hellwig 
10164ed36c6bSChristoph Hellwig out_trans_cancel:
10174ed36c6bSChristoph Hellwig 	xfs_trans_cancel(tp);
1018211683b2SBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
10194ed36c6bSChristoph Hellwig 	return error;
1020a904b1caSNamjae Jeon }
1021a904b1caSNamjae Jeon 
1022a904b1caSNamjae Jeon /*
1023a904b1caSNamjae Jeon  * xfs_insert_file_space()
1024a904b1caSNamjae Jeon  *	This routine create hole space by shifting extents for the given file.
1025a904b1caSNamjae Jeon  *	The first thing we do is to sync dirty data and invalidate page cache
1026a904b1caSNamjae Jeon  *	over the region on which insert range is working. And split an extent
1027a904b1caSNamjae Jeon  *	to two extents at given offset by calling xfs_bmap_split_extent.
1028a904b1caSNamjae Jeon  *	And shift all extent records which are laying between [offset,
1029a904b1caSNamjae Jeon  *	last allocated extent] to the right to reserve hole range.
1030a904b1caSNamjae Jeon  * RETURNS:
1031a904b1caSNamjae Jeon  *	0 on success
1032a904b1caSNamjae Jeon  *	errno on error
1033a904b1caSNamjae Jeon  */
1034a904b1caSNamjae Jeon int
1035a904b1caSNamjae Jeon xfs_insert_file_space(
1036a904b1caSNamjae Jeon 	struct xfs_inode	*ip,
1037a904b1caSNamjae Jeon 	loff_t			offset,
1038a904b1caSNamjae Jeon 	loff_t			len)
1039a904b1caSNamjae Jeon {
10404ed36c6bSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
10414ed36c6bSChristoph Hellwig 	struct xfs_trans	*tp;
10424ed36c6bSChristoph Hellwig 	int			error;
10434ed36c6bSChristoph Hellwig 	xfs_fileoff_t		stop_fsb = XFS_B_TO_FSB(mp, offset);
10444ed36c6bSChristoph Hellwig 	xfs_fileoff_t		next_fsb = NULLFSBLOCK;
10454ed36c6bSChristoph Hellwig 	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
1046ecfea3f0SChristoph Hellwig 	bool			done = false;
10474ed36c6bSChristoph Hellwig 
10483fed24ffSMatthew Wilcox (Oracle) 	xfs_assert_ilocked(ip, XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL);
10499ad1a23aSChristoph Hellwig 
1050a904b1caSNamjae Jeon 	trace_xfs_insert_file_space(ip);
1051a904b1caSNamjae Jeon 
1052f62cb48eSDarrick J. Wong 	error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
1053f62cb48eSDarrick J. Wong 	if (error)
1054f62cb48eSDarrick J. Wong 		return error;
1055f62cb48eSDarrick J. Wong 
10564ed36c6bSChristoph Hellwig 	error = xfs_prepare_shift(ip, offset);
10574ed36c6bSChristoph Hellwig 	if (error)
10584ed36c6bSChristoph Hellwig 		return error;
10594ed36c6bSChristoph Hellwig 
1060b73df17eSBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
1061b73df17eSBrian Foster 			XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
1062b73df17eSBrian Foster 	if (error)
1063b73df17eSBrian Foster 		return error;
1064b73df17eSBrian Foster 
1065b73df17eSBrian Foster 	xfs_ilock(ip, XFS_ILOCK_EXCL);
1066dd87f87dSBrian Foster 	xfs_trans_ijoin(tp, ip, 0);
1067b73df17eSBrian Foster 
106825576c54SChristoph Hellwig 	error = xfs_iext_count_extend(tp, ip, XFS_DATA_FORK,
106985ef08b5SChandan Babu R 			XFS_IEXT_PUNCH_HOLE_CNT);
107085ef08b5SChandan Babu R 	if (error)
107185ef08b5SChandan Babu R 		goto out_trans_cancel;
107285ef08b5SChandan Babu R 
1073dd87f87dSBrian Foster 	/*
1074dd87f87dSBrian Foster 	 * The extent shifting code works on extent granularity. So, if stop_fsb
1075dd87f87dSBrian Foster 	 * is not the starting block of extent, we need to split the extent at
1076dd87f87dSBrian Foster 	 * stop_fsb.
1077dd87f87dSBrian Foster 	 */
1078b73df17eSBrian Foster 	error = xfs_bmap_split_extent(tp, ip, stop_fsb);
1079b73df17eSBrian Foster 	if (error)
1080b73df17eSBrian Foster 		goto out_trans_cancel;
1081b73df17eSBrian Foster 
1082dd87f87dSBrian Foster 	do {
10839c516e0eSBrian Foster 		error = xfs_defer_finish(&tp);
10844ed36c6bSChristoph Hellwig 		if (error)
1085dd87f87dSBrian Foster 			goto out_trans_cancel;
10864ed36c6bSChristoph Hellwig 
1087ecfea3f0SChristoph Hellwig 		error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
1088333f950cSBrian Foster 				&done, stop_fsb);
10894ed36c6bSChristoph Hellwig 		if (error)
1090c8eac49eSBrian Foster 			goto out_trans_cancel;
1091dd87f87dSBrian Foster 	} while (!done);
10924ed36c6bSChristoph Hellwig 
10934ed36c6bSChristoph Hellwig 	error = xfs_trans_commit(tp);
1094dd87f87dSBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
10954ed36c6bSChristoph Hellwig 	return error;
10964ed36c6bSChristoph Hellwig 
1097c8eac49eSBrian Foster out_trans_cancel:
10984ed36c6bSChristoph Hellwig 	xfs_trans_cancel(tp);
1099dd87f87dSBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
11004ed36c6bSChristoph Hellwig 	return error;
1101a904b1caSNamjae Jeon }
1102a904b1caSNamjae Jeon 
1103a904b1caSNamjae Jeon /*
1104a133d952SDave Chinner  * We need to check that the format of the data fork in the temporary inode is
1105a133d952SDave Chinner  * valid for the target inode before doing the swap. This is not a problem with
1106a133d952SDave Chinner  * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
1107a133d952SDave Chinner  * data fork depending on the space the attribute fork is taking so we can get
1108a133d952SDave Chinner  * invalid formats on the target inode.
1109a133d952SDave Chinner  *
1110a133d952SDave Chinner  * E.g. target has space for 7 extents in extent format, temp inode only has
1111a133d952SDave Chinner  * space for 6.  If we defragment down to 7 extents, then the tmp format is a
1112a133d952SDave Chinner  * btree, but when swapped it needs to be in extent format. Hence we can't just
1113a133d952SDave Chinner  * blindly swap data forks on attr2 filesystems.
1114a133d952SDave Chinner  *
1115a133d952SDave Chinner  * Note that we check the swap in both directions so that we don't end up with
1116a133d952SDave Chinner  * a corrupt temporary inode, either.
1117a133d952SDave Chinner  *
1118a133d952SDave Chinner  * Note that fixing the way xfs_fsr sets up the attribute fork in the source
1119a133d952SDave Chinner  * inode will prevent this situation from occurring, so all we do here is
1120a133d952SDave Chinner  * reject and log the attempt. basically we are putting the responsibility on
1121a133d952SDave Chinner  * userspace to get this right.
1122a133d952SDave Chinner  */
1123a133d952SDave Chinner static int
1124a133d952SDave Chinner xfs_swap_extents_check_format(
1125e06259aaSDarrick J. Wong 	struct xfs_inode	*ip,	/* target inode */
1126e06259aaSDarrick J. Wong 	struct xfs_inode	*tip)	/* tmp inode */
1127a133d952SDave Chinner {
1128f7e67b20SChristoph Hellwig 	struct xfs_ifork	*ifp = &ip->i_df;
1129f7e67b20SChristoph Hellwig 	struct xfs_ifork	*tifp = &tip->i_df;
1130a133d952SDave Chinner 
1131765d3c39SDarrick J. Wong 	/* User/group/project quota ids must match if quotas are enforced. */
1132765d3c39SDarrick J. Wong 	if (XFS_IS_QUOTA_ON(ip->i_mount) &&
1133765d3c39SDarrick J. Wong 	    (!uid_eq(VFS_I(ip)->i_uid, VFS_I(tip)->i_uid) ||
1134765d3c39SDarrick J. Wong 	     !gid_eq(VFS_I(ip)->i_gid, VFS_I(tip)->i_gid) ||
1135ceaf603cSChristoph Hellwig 	     ip->i_projid != tip->i_projid))
1136765d3c39SDarrick J. Wong 		return -EINVAL;
1137765d3c39SDarrick J. Wong 
1138a133d952SDave Chinner 	/* Should never get a local format */
1139f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
1140f7e67b20SChristoph Hellwig 	    tifp->if_format == XFS_DINODE_FMT_LOCAL)
11412451337dSDave Chinner 		return -EINVAL;
1142a133d952SDave Chinner 
1143a133d952SDave Chinner 	/*
1144a133d952SDave Chinner 	 * if the target inode has less extents that then temporary inode then
1145a133d952SDave Chinner 	 * why did userspace call us?
1146a133d952SDave Chinner 	 */
1147f7e67b20SChristoph Hellwig 	if (ifp->if_nextents < tifp->if_nextents)
11482451337dSDave Chinner 		return -EINVAL;
1149a133d952SDave Chinner 
1150a133d952SDave Chinner 	/*
11511f08af52SDarrick J. Wong 	 * If we have to use the (expensive) rmap swap method, we can
11521f08af52SDarrick J. Wong 	 * handle any number of extents and any format.
11531f08af52SDarrick J. Wong 	 */
115438c26bfdSDave Chinner 	if (xfs_has_rmapbt(ip->i_mount))
11551f08af52SDarrick J. Wong 		return 0;
11561f08af52SDarrick J. Wong 
11571f08af52SDarrick J. Wong 	/*
1158a133d952SDave Chinner 	 * if the target inode is in extent form and the temp inode is in btree
1159a133d952SDave Chinner 	 * form then we will end up with the target inode in the wrong format
1160a133d952SDave Chinner 	 * as we already know there are less extents in the temp inode.
1161a133d952SDave Chinner 	 */
1162f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1163f7e67b20SChristoph Hellwig 	    tifp->if_format == XFS_DINODE_FMT_BTREE)
11642451337dSDave Chinner 		return -EINVAL;
1165a133d952SDave Chinner 
1166a133d952SDave Chinner 	/* Check temp in extent form to max in target */
1167f7e67b20SChristoph Hellwig 	if (tifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1168f7e67b20SChristoph Hellwig 	    tifp->if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
11692451337dSDave Chinner 		return -EINVAL;
1170a133d952SDave Chinner 
1171a133d952SDave Chinner 	/* Check target in extent form to max in temp */
1172f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1173f7e67b20SChristoph Hellwig 	    ifp->if_nextents > XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
11742451337dSDave Chinner 		return -EINVAL;
1175a133d952SDave Chinner 
1176a133d952SDave Chinner 	/*
1177a133d952SDave Chinner 	 * If we are in a btree format, check that the temp root block will fit
1178a133d952SDave Chinner 	 * in the target and that it has enough extents to be in btree format
1179a133d952SDave Chinner 	 * in the target.
1180a133d952SDave Chinner 	 *
1181a133d952SDave Chinner 	 * Note that we have to be careful to allow btree->extent conversions
1182a133d952SDave Chinner 	 * (a common defrag case) which will occur when the temp inode is in
1183a133d952SDave Chinner 	 * extent format...
1184a133d952SDave Chinner 	 */
1185f7e67b20SChristoph Hellwig 	if (tifp->if_format == XFS_DINODE_FMT_BTREE) {
1186932b42c6SDarrick J. Wong 		if (xfs_inode_has_attr_fork(ip) &&
1187c01147d9SDarrick J. Wong 		    XFS_BMAP_BMDR_SPACE(tifp->if_broot) > xfs_inode_fork_boff(ip))
11882451337dSDave Chinner 			return -EINVAL;
1189f7e67b20SChristoph Hellwig 		if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
11902451337dSDave Chinner 			return -EINVAL;
1191a133d952SDave Chinner 	}
1192a133d952SDave Chinner 
1193a133d952SDave Chinner 	/* Reciprocal target->temp btree format checks */
1194f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
1195932b42c6SDarrick J. Wong 		if (xfs_inode_has_attr_fork(tip) &&
1196c01147d9SDarrick J. Wong 		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > xfs_inode_fork_boff(tip))
11972451337dSDave Chinner 			return -EINVAL;
1198f7e67b20SChristoph Hellwig 		if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
11992451337dSDave Chinner 			return -EINVAL;
1200a133d952SDave Chinner 	}
1201a133d952SDave Chinner 
1202a133d952SDave Chinner 	return 0;
1203a133d952SDave Chinner }
1204a133d952SDave Chinner 
12057abbb8f9SDave Chinner static int
12064ef897a2SDave Chinner xfs_swap_extent_flush(
12074ef897a2SDave Chinner 	struct xfs_inode	*ip)
12084ef897a2SDave Chinner {
12094ef897a2SDave Chinner 	int	error;
12104ef897a2SDave Chinner 
12114ef897a2SDave Chinner 	error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
12124ef897a2SDave Chinner 	if (error)
12134ef897a2SDave Chinner 		return error;
12144ef897a2SDave Chinner 	truncate_pagecache_range(VFS_I(ip), 0, -1);
12154ef897a2SDave Chinner 
12164ef897a2SDave Chinner 	/* Verify O_DIRECT for ftmp */
12174ef897a2SDave Chinner 	if (VFS_I(ip)->i_mapping->nrpages)
12184ef897a2SDave Chinner 		return -EINVAL;
12194ef897a2SDave Chinner 	return 0;
12204ef897a2SDave Chinner }
12214ef897a2SDave Chinner 
12221f08af52SDarrick J. Wong /*
12231f08af52SDarrick J. Wong  * Move extents from one file to another, when rmap is enabled.
12241f08af52SDarrick J. Wong  */
12251f08af52SDarrick J. Wong STATIC int
12261f08af52SDarrick J. Wong xfs_swap_extent_rmap(
12271f08af52SDarrick J. Wong 	struct xfs_trans		**tpp,
12281f08af52SDarrick J. Wong 	struct xfs_inode		*ip,
12291f08af52SDarrick J. Wong 	struct xfs_inode		*tip)
12301f08af52SDarrick J. Wong {
12317a7943c7SBrian Foster 	struct xfs_trans		*tp = *tpp;
12321f08af52SDarrick J. Wong 	struct xfs_bmbt_irec		irec;
12331f08af52SDarrick J. Wong 	struct xfs_bmbt_irec		uirec;
12341f08af52SDarrick J. Wong 	struct xfs_bmbt_irec		tirec;
12351f08af52SDarrick J. Wong 	xfs_fileoff_t			offset_fsb;
12361f08af52SDarrick J. Wong 	xfs_fileoff_t			end_fsb;
12371f08af52SDarrick J. Wong 	xfs_filblks_t			count_fsb;
12381f08af52SDarrick J. Wong 	int				error;
12391f08af52SDarrick J. Wong 	xfs_filblks_t			ilen;
12401f08af52SDarrick J. Wong 	xfs_filblks_t			rlen;
12411f08af52SDarrick J. Wong 	int				nimaps;
1242c8ce540dSDarrick J. Wong 	uint64_t			tip_flags2;
12431f08af52SDarrick J. Wong 
12441f08af52SDarrick J. Wong 	/*
12451f08af52SDarrick J. Wong 	 * If the source file has shared blocks, we must flag the donor
12461f08af52SDarrick J. Wong 	 * file as having shared blocks so that we get the shared-block
12471f08af52SDarrick J. Wong 	 * rmap functions when we go to fix up the rmaps.  The flags
12481f08af52SDarrick J. Wong 	 * will be switch for reals later.
12491f08af52SDarrick J. Wong 	 */
12503e09ab8fSChristoph Hellwig 	tip_flags2 = tip->i_diflags2;
12513e09ab8fSChristoph Hellwig 	if (ip->i_diflags2 & XFS_DIFLAG2_REFLINK)
12523e09ab8fSChristoph Hellwig 		tip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
12531f08af52SDarrick J. Wong 
12541f08af52SDarrick J. Wong 	offset_fsb = 0;
12551f08af52SDarrick J. Wong 	end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip)));
12561f08af52SDarrick J. Wong 	count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
12571f08af52SDarrick J. Wong 
12581f08af52SDarrick J. Wong 	while (count_fsb) {
12591f08af52SDarrick J. Wong 		/* Read extent from the donor file */
12601f08af52SDarrick J. Wong 		nimaps = 1;
12611f08af52SDarrick J. Wong 		error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec,
12621f08af52SDarrick J. Wong 				&nimaps, 0);
12631f08af52SDarrick J. Wong 		if (error)
12641f08af52SDarrick J. Wong 			goto out;
12651f08af52SDarrick J. Wong 		ASSERT(nimaps == 1);
12661f08af52SDarrick J. Wong 		ASSERT(tirec.br_startblock != DELAYSTARTBLOCK);
12671f08af52SDarrick J. Wong 
12681f08af52SDarrick J. Wong 		trace_xfs_swap_extent_rmap_remap(tip, &tirec);
12691f08af52SDarrick J. Wong 		ilen = tirec.br_blockcount;
12701f08af52SDarrick J. Wong 
12711f08af52SDarrick J. Wong 		/* Unmap the old blocks in the source file. */
12721f08af52SDarrick J. Wong 		while (tirec.br_blockcount) {
1273692b6cddSDave Chinner 			ASSERT(tp->t_highest_agno == NULLAGNUMBER);
12741f08af52SDarrick J. Wong 			trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
12751f08af52SDarrick J. Wong 
12761f08af52SDarrick J. Wong 			/* Read extent from the source file */
12771f08af52SDarrick J. Wong 			nimaps = 1;
12781f08af52SDarrick J. Wong 			error = xfs_bmapi_read(ip, tirec.br_startoff,
12791f08af52SDarrick J. Wong 					tirec.br_blockcount, &irec,
12801f08af52SDarrick J. Wong 					&nimaps, 0);
12811f08af52SDarrick J. Wong 			if (error)
1282d5a2e289SBrian Foster 				goto out;
12831f08af52SDarrick J. Wong 			ASSERT(nimaps == 1);
12841f08af52SDarrick J. Wong 			ASSERT(tirec.br_startoff == irec.br_startoff);
12851f08af52SDarrick J. Wong 			trace_xfs_swap_extent_rmap_remap_piece(ip, &irec);
12861f08af52SDarrick J. Wong 
12871f08af52SDarrick J. Wong 			/* Trim the extent. */
12881f08af52SDarrick J. Wong 			uirec = tirec;
12891f08af52SDarrick J. Wong 			uirec.br_blockcount = rlen = min_t(xfs_filblks_t,
12901f08af52SDarrick J. Wong 					tirec.br_blockcount,
12911f08af52SDarrick J. Wong 					irec.br_blockcount);
12921f08af52SDarrick J. Wong 			trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);
12931f08af52SDarrick J. Wong 
1294bcc561f2SChandan Babu R 			if (xfs_bmap_is_real_extent(&uirec)) {
129525576c54SChristoph Hellwig 				error = xfs_iext_count_extend(tp, ip,
1296bcc561f2SChandan Babu R 						XFS_DATA_FORK,
1297bcc561f2SChandan Babu R 						XFS_IEXT_SWAP_RMAP_CNT);
1298bcc561f2SChandan Babu R 				if (error)
1299bcc561f2SChandan Babu R 					goto out;
1300bcc561f2SChandan Babu R 			}
1301bcc561f2SChandan Babu R 
1302bcc561f2SChandan Babu R 			if (xfs_bmap_is_real_extent(&irec)) {
130325576c54SChristoph Hellwig 				error = xfs_iext_count_extend(tp, tip,
1304bcc561f2SChandan Babu R 						XFS_DATA_FORK,
1305bcc561f2SChandan Babu R 						XFS_IEXT_SWAP_RMAP_CNT);
1306bcc561f2SChandan Babu R 				if (error)
1307bcc561f2SChandan Babu R 					goto out;
1308bcc561f2SChandan Babu R 			}
1309bcc561f2SChandan Babu R 
13101f08af52SDarrick J. Wong 			/* Remove the mapping from the donor file. */
131152f80706SDarrick J. Wong 			xfs_bmap_unmap_extent(tp, tip, XFS_DATA_FORK, &uirec);
13121f08af52SDarrick J. Wong 
13131f08af52SDarrick J. Wong 			/* Remove the mapping from the source file. */
131452f80706SDarrick J. Wong 			xfs_bmap_unmap_extent(tp, ip, XFS_DATA_FORK, &irec);
13151f08af52SDarrick J. Wong 
13161f08af52SDarrick J. Wong 			/* Map the donor file's blocks into the source file. */
131752f80706SDarrick J. Wong 			xfs_bmap_map_extent(tp, ip, XFS_DATA_FORK, &uirec);
13181f08af52SDarrick J. Wong 
13191f08af52SDarrick J. Wong 			/* Map the source file's blocks into the donor file. */
132052f80706SDarrick J. Wong 			xfs_bmap_map_extent(tp, tip, XFS_DATA_FORK, &irec);
13211f08af52SDarrick J. Wong 
13229e28a242SBrian Foster 			error = xfs_defer_finish(tpp);
13237a7943c7SBrian Foster 			tp = *tpp;
13241f08af52SDarrick J. Wong 			if (error)
13259b1f4e98SBrian Foster 				goto out;
13261f08af52SDarrick J. Wong 
13271f08af52SDarrick J. Wong 			tirec.br_startoff += rlen;
13281f08af52SDarrick J. Wong 			if (tirec.br_startblock != HOLESTARTBLOCK &&
13291f08af52SDarrick J. Wong 			    tirec.br_startblock != DELAYSTARTBLOCK)
13301f08af52SDarrick J. Wong 				tirec.br_startblock += rlen;
13311f08af52SDarrick J. Wong 			tirec.br_blockcount -= rlen;
13321f08af52SDarrick J. Wong 		}
13331f08af52SDarrick J. Wong 
13341f08af52SDarrick J. Wong 		/* Roll on... */
13351f08af52SDarrick J. Wong 		count_fsb -= ilen;
13361f08af52SDarrick J. Wong 		offset_fsb += ilen;
13371f08af52SDarrick J. Wong 	}
13381f08af52SDarrick J. Wong 
13393e09ab8fSChristoph Hellwig 	tip->i_diflags2 = tip_flags2;
13401f08af52SDarrick J. Wong 	return 0;
13411f08af52SDarrick J. Wong 
13421f08af52SDarrick J. Wong out:
13431f08af52SDarrick J. Wong 	trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);
13443e09ab8fSChristoph Hellwig 	tip->i_diflags2 = tip_flags2;
13451f08af52SDarrick J. Wong 	return error;
13461f08af52SDarrick J. Wong }
13471f08af52SDarrick J. Wong 
134839aff5fdSDarrick J. Wong /* Swap the extents of two files by swapping data forks. */
134939aff5fdSDarrick J. Wong STATIC int
135039aff5fdSDarrick J. Wong xfs_swap_extent_forks(
135139aff5fdSDarrick J. Wong 	struct xfs_trans	*tp,
135239aff5fdSDarrick J. Wong 	struct xfs_inode	*ip,
135339aff5fdSDarrick J. Wong 	struct xfs_inode	*tip,
135439aff5fdSDarrick J. Wong 	int			*src_log_flags,
135539aff5fdSDarrick J. Wong 	int			*target_log_flags)
135639aff5fdSDarrick J. Wong {
1357e7f5d5caSDarrick J. Wong 	xfs_filblks_t		aforkblks = 0;
1358e7f5d5caSDarrick J. Wong 	xfs_filblks_t		taforkblks = 0;
1359e7f5d5caSDarrick J. Wong 	xfs_extnum_t		junk;
1360c8ce540dSDarrick J. Wong 	uint64_t		tmp;
136139aff5fdSDarrick J. Wong 	int			error;
136239aff5fdSDarrick J. Wong 
136339aff5fdSDarrick J. Wong 	/*
136439aff5fdSDarrick J. Wong 	 * Count the number of extended attribute blocks
136539aff5fdSDarrick J. Wong 	 */
1366932b42c6SDarrick J. Wong 	if (xfs_inode_has_attr_fork(ip) && ip->i_af.if_nextents > 0 &&
13672ed5b09bSDarrick J. Wong 	    ip->i_af.if_format != XFS_DINODE_FMT_LOCAL) {
1368e7f5d5caSDarrick J. Wong 		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk,
136939aff5fdSDarrick J. Wong 				&aforkblks);
137039aff5fdSDarrick J. Wong 		if (error)
137139aff5fdSDarrick J. Wong 			return error;
137239aff5fdSDarrick J. Wong 	}
1373932b42c6SDarrick J. Wong 	if (xfs_inode_has_attr_fork(tip) && tip->i_af.if_nextents > 0 &&
13742ed5b09bSDarrick J. Wong 	    tip->i_af.if_format != XFS_DINODE_FMT_LOCAL) {
1375e7f5d5caSDarrick J. Wong 		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk,
137639aff5fdSDarrick J. Wong 				&taforkblks);
137739aff5fdSDarrick J. Wong 		if (error)
137839aff5fdSDarrick J. Wong 			return error;
137939aff5fdSDarrick J. Wong 	}
138039aff5fdSDarrick J. Wong 
138139aff5fdSDarrick J. Wong 	/*
13826fb10d6dSBrian Foster 	 * Btree format (v3) inodes have the inode number stamped in the bmbt
13836fb10d6dSBrian Foster 	 * block headers. We can't start changing the bmbt blocks until the
13846fb10d6dSBrian Foster 	 * inode owner change is logged so recovery does the right thing in the
13856fb10d6dSBrian Foster 	 * event of a crash. Set the owner change log flags now and leave the
13866fb10d6dSBrian Foster 	 * bmbt scan as the last step.
138739aff5fdSDarrick J. Wong 	 */
138838c26bfdSDave Chinner 	if (xfs_has_v3inodes(ip->i_mount)) {
1389f7e67b20SChristoph Hellwig 		if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE)
139039aff5fdSDarrick J. Wong 			(*target_log_flags) |= XFS_ILOG_DOWNER;
1391f7e67b20SChristoph Hellwig 		if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE)
139239aff5fdSDarrick J. Wong 			(*src_log_flags) |= XFS_ILOG_DOWNER;
13936471e9c5SChristoph Hellwig 	}
139439aff5fdSDarrick J. Wong 
139539aff5fdSDarrick J. Wong 	/*
139639aff5fdSDarrick J. Wong 	 * Swap the data forks of the inodes
139739aff5fdSDarrick J. Wong 	 */
1398897992b7SGustavo A. R. Silva 	swap(ip->i_df, tip->i_df);
139939aff5fdSDarrick J. Wong 
140039aff5fdSDarrick J. Wong 	/*
140139aff5fdSDarrick J. Wong 	 * Fix the on-disk inode values
140239aff5fdSDarrick J. Wong 	 */
14036e73a545SChristoph Hellwig 	tmp = (uint64_t)ip->i_nblocks;
14046e73a545SChristoph Hellwig 	ip->i_nblocks = tip->i_nblocks - taforkblks + aforkblks;
14056e73a545SChristoph Hellwig 	tip->i_nblocks = tmp + taforkblks - aforkblks;
140639aff5fdSDarrick J. Wong 
140739aff5fdSDarrick J. Wong 	/*
140839aff5fdSDarrick J. Wong 	 * The extents in the source inode could still contain speculative
140939aff5fdSDarrick J. Wong 	 * preallocation beyond EOF (e.g. the file is open but not modified
141039aff5fdSDarrick J. Wong 	 * while defrag is in progress). In that case, we need to copy over the
141139aff5fdSDarrick J. Wong 	 * number of delalloc blocks the data fork in the source inode is
141239aff5fdSDarrick J. Wong 	 * tracking beyond EOF so that when the fork is truncated away when the
141339aff5fdSDarrick J. Wong 	 * temporary inode is unlinked we don't underrun the i_delayed_blks
141439aff5fdSDarrick J. Wong 	 * counter on that inode.
141539aff5fdSDarrick J. Wong 	 */
141639aff5fdSDarrick J. Wong 	ASSERT(tip->i_delayed_blks == 0);
141739aff5fdSDarrick J. Wong 	tip->i_delayed_blks = ip->i_delayed_blks;
141839aff5fdSDarrick J. Wong 	ip->i_delayed_blks = 0;
141939aff5fdSDarrick J. Wong 
1420f7e67b20SChristoph Hellwig 	switch (ip->i_df.if_format) {
142139aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_EXTENTS:
142239aff5fdSDarrick J. Wong 		(*src_log_flags) |= XFS_ILOG_DEXT;
142339aff5fdSDarrick J. Wong 		break;
142439aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_BTREE:
142538c26bfdSDave Chinner 		ASSERT(!xfs_has_v3inodes(ip->i_mount) ||
142639aff5fdSDarrick J. Wong 		       (*src_log_flags & XFS_ILOG_DOWNER));
142739aff5fdSDarrick J. Wong 		(*src_log_flags) |= XFS_ILOG_DBROOT;
142839aff5fdSDarrick J. Wong 		break;
142939aff5fdSDarrick J. Wong 	}
143039aff5fdSDarrick J. Wong 
1431f7e67b20SChristoph Hellwig 	switch (tip->i_df.if_format) {
143239aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_EXTENTS:
143339aff5fdSDarrick J. Wong 		(*target_log_flags) |= XFS_ILOG_DEXT;
143439aff5fdSDarrick J. Wong 		break;
143539aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_BTREE:
143639aff5fdSDarrick J. Wong 		(*target_log_flags) |= XFS_ILOG_DBROOT;
143738c26bfdSDave Chinner 		ASSERT(!xfs_has_v3inodes(ip->i_mount) ||
143839aff5fdSDarrick J. Wong 		       (*target_log_flags & XFS_ILOG_DOWNER));
143939aff5fdSDarrick J. Wong 		break;
144039aff5fdSDarrick J. Wong 	}
144139aff5fdSDarrick J. Wong 
144239aff5fdSDarrick J. Wong 	return 0;
144339aff5fdSDarrick J. Wong }
144439aff5fdSDarrick J. Wong 
14452dd3d709SBrian Foster /*
14462dd3d709SBrian Foster  * Fix up the owners of the bmbt blocks to refer to the current inode. The
14472dd3d709SBrian Foster  * change owner scan attempts to order all modified buffers in the current
14482dd3d709SBrian Foster  * transaction. In the event of ordered buffer failure, the offending buffer is
14492dd3d709SBrian Foster  * physically logged as a fallback and the scan returns -EAGAIN. We must roll
14502dd3d709SBrian Foster  * the transaction in this case to replenish the fallback log reservation and
14512dd3d709SBrian Foster  * restart the scan. This process repeats until the scan completes.
14522dd3d709SBrian Foster  */
14532dd3d709SBrian Foster static int
14542dd3d709SBrian Foster xfs_swap_change_owner(
14552dd3d709SBrian Foster 	struct xfs_trans	**tpp,
14562dd3d709SBrian Foster 	struct xfs_inode	*ip,
14572dd3d709SBrian Foster 	struct xfs_inode	*tmpip)
14582dd3d709SBrian Foster {
14592dd3d709SBrian Foster 	int			error;
14602dd3d709SBrian Foster 	struct xfs_trans	*tp = *tpp;
14612dd3d709SBrian Foster 
14622dd3d709SBrian Foster 	do {
14632dd3d709SBrian Foster 		error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino,
14642dd3d709SBrian Foster 					      NULL);
14652dd3d709SBrian Foster 		/* success or fatal error */
14662dd3d709SBrian Foster 		if (error != -EAGAIN)
14672dd3d709SBrian Foster 			break;
14682dd3d709SBrian Foster 
14692dd3d709SBrian Foster 		error = xfs_trans_roll(tpp);
14702dd3d709SBrian Foster 		if (error)
14712dd3d709SBrian Foster 			break;
14722dd3d709SBrian Foster 		tp = *tpp;
14732dd3d709SBrian Foster 
14742dd3d709SBrian Foster 		/*
14752dd3d709SBrian Foster 		 * Redirty both inodes so they can relog and keep the log tail
14762dd3d709SBrian Foster 		 * moving forward.
14772dd3d709SBrian Foster 		 */
14782dd3d709SBrian Foster 		xfs_trans_ijoin(tp, ip, 0);
14792dd3d709SBrian Foster 		xfs_trans_ijoin(tp, tmpip, 0);
14802dd3d709SBrian Foster 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
14812dd3d709SBrian Foster 		xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE);
14822dd3d709SBrian Foster 	} while (true);
14832dd3d709SBrian Foster 
14842dd3d709SBrian Foster 	return error;
14852dd3d709SBrian Foster }
14862dd3d709SBrian Foster 
14874ef897a2SDave Chinner int
1488a133d952SDave Chinner xfs_swap_extents(
1489e06259aaSDarrick J. Wong 	struct xfs_inode	*ip,	/* target inode */
1490e06259aaSDarrick J. Wong 	struct xfs_inode	*tip,	/* tmp inode */
1491e06259aaSDarrick J. Wong 	struct xfs_swapext	*sxp)
1492a133d952SDave Chinner {
1493e06259aaSDarrick J. Wong 	struct xfs_mount	*mp = ip->i_mount;
1494e06259aaSDarrick J. Wong 	struct xfs_trans	*tp;
1495e06259aaSDarrick J. Wong 	struct xfs_bstat	*sbp = &sxp->sx_stat;
1496a133d952SDave Chinner 	int			src_log_flags, target_log_flags;
1497a133d952SDave Chinner 	int			error = 0;
1498c8ce540dSDarrick J. Wong 	uint64_t		f;
14992dd3d709SBrian Foster 	int			resblks = 0;
1500f74681baSBrian Foster 	unsigned int		flags = 0;
150175d1e312SJeff Layton 	struct timespec64	ctime, mtime;
1502a133d952SDave Chinner 
1503a133d952SDave Chinner 	/*
1504723cac48SDave Chinner 	 * Lock the inodes against other IO, page faults and truncate to
1505723cac48SDave Chinner 	 * begin with.  Then we can ensure the inodes are flushed and have no
1506723cac48SDave Chinner 	 * page cache safely. Once we have done this we can take the ilocks and
1507723cac48SDave Chinner 	 * do the rest of the checks.
1508a133d952SDave Chinner 	 */
150965523218SChristoph Hellwig 	lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
1510d2c292d8SJan Kara 	filemap_invalidate_lock_two(VFS_I(ip)->i_mapping,
1511d2c292d8SJan Kara 				    VFS_I(tip)->i_mapping);
1512a133d952SDave Chinner 
1513a133d952SDave Chinner 	/* Verify that both files have the same format */
1514c19b3b05SDave Chinner 	if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
15152451337dSDave Chinner 		error = -EINVAL;
1516a133d952SDave Chinner 		goto out_unlock;
1517a133d952SDave Chinner 	}
1518a133d952SDave Chinner 
1519a133d952SDave Chinner 	/* Verify both files are either real-time or non-realtime */
1520a133d952SDave Chinner 	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
15212451337dSDave Chinner 		error = -EINVAL;
1522a133d952SDave Chinner 		goto out_unlock;
1523a133d952SDave Chinner 	}
1524a133d952SDave Chinner 
15252713fefaSDarrick J. Wong 	error = xfs_qm_dqattach(ip);
15262713fefaSDarrick J. Wong 	if (error)
15272713fefaSDarrick J. Wong 		goto out_unlock;
15282713fefaSDarrick J. Wong 
15292713fefaSDarrick J. Wong 	error = xfs_qm_dqattach(tip);
15302713fefaSDarrick J. Wong 	if (error)
15312713fefaSDarrick J. Wong 		goto out_unlock;
15322713fefaSDarrick J. Wong 
15334ef897a2SDave Chinner 	error = xfs_swap_extent_flush(ip);
1534a133d952SDave Chinner 	if (error)
1535a133d952SDave Chinner 		goto out_unlock;
15364ef897a2SDave Chinner 	error = xfs_swap_extent_flush(tip);
15374ef897a2SDave Chinner 	if (error)
15384ef897a2SDave Chinner 		goto out_unlock;
1539a133d952SDave Chinner 
154096987eeaSChristoph Hellwig 	if (xfs_inode_has_cow_data(tip)) {
154196987eeaSChristoph Hellwig 		error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true);
154296987eeaSChristoph Hellwig 		if (error)
15438bc3b5e4SDarrick J. Wong 			goto out_unlock;
154496987eeaSChristoph Hellwig 	}
154596987eeaSChristoph Hellwig 
15461f08af52SDarrick J. Wong 	/*
15471f08af52SDarrick J. Wong 	 * Extent "swapping" with rmap requires a permanent reservation and
15481f08af52SDarrick J. Wong 	 * a block reservation because it's really just a remap operation
15491f08af52SDarrick J. Wong 	 * performed with log redo items!
15501f08af52SDarrick J. Wong 	 */
155138c26bfdSDave Chinner 	if (xfs_has_rmapbt(mp)) {
1552b3fed434SBrian Foster 		int		w = XFS_DATA_FORK;
1553daf83964SChristoph Hellwig 		uint32_t	ipnext = ip->i_df.if_nextents;
1554daf83964SChristoph Hellwig 		uint32_t	tipnext	= tip->i_df.if_nextents;
1555b3fed434SBrian Foster 
15561f08af52SDarrick J. Wong 		/*
1557b3fed434SBrian Foster 		 * Conceptually this shouldn't affect the shape of either bmbt,
1558b3fed434SBrian Foster 		 * but since we atomically move extents one by one, we reserve
1559b3fed434SBrian Foster 		 * enough space to rebuild both trees.
15601f08af52SDarrick J. Wong 		 */
1561b3fed434SBrian Foster 		resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w);
1562b3fed434SBrian Foster 		resblks +=  XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w);
1563b3fed434SBrian Foster 
1564b3fed434SBrian Foster 		/*
1565f74681baSBrian Foster 		 * If either inode straddles a bmapbt block allocation boundary,
1566f74681baSBrian Foster 		 * the rmapbt algorithm triggers repeated allocs and frees as
1567f74681baSBrian Foster 		 * extents are remapped. This can exhaust the block reservation
1568f74681baSBrian Foster 		 * prematurely and cause shutdown. Return freed blocks to the
1569f74681baSBrian Foster 		 * transaction reservation to counter this behavior.
1570b3fed434SBrian Foster 		 */
1571f74681baSBrian Foster 		flags |= XFS_TRANS_RES_FDBLKS;
15722dd3d709SBrian Foster 	}
1573f74681baSBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, flags,
1574f74681baSBrian Foster 				&tp);
1575253f4911SChristoph Hellwig 	if (error)
1576a133d952SDave Chinner 		goto out_unlock;
1577723cac48SDave Chinner 
1578723cac48SDave Chinner 	/*
1579723cac48SDave Chinner 	 * Lock and join the inodes to the tansaction so that transaction commit
1580723cac48SDave Chinner 	 * or cancel will unlock the inodes from this point onwards.
1581723cac48SDave Chinner 	 */
15827c2d238aSDarrick J. Wong 	xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL);
158339aff5fdSDarrick J. Wong 	xfs_trans_ijoin(tp, ip, 0);
158439aff5fdSDarrick J. Wong 	xfs_trans_ijoin(tp, tip, 0);
1585723cac48SDave Chinner 
1586a133d952SDave Chinner 
1587a133d952SDave Chinner 	/* Verify all data are being swapped */
1588a133d952SDave Chinner 	if (sxp->sx_offset != 0 ||
158913d2c10bSChristoph Hellwig 	    sxp->sx_length != ip->i_disk_size ||
159013d2c10bSChristoph Hellwig 	    sxp->sx_length != tip->i_disk_size) {
15912451337dSDave Chinner 		error = -EFAULT;
15924ef897a2SDave Chinner 		goto out_trans_cancel;
1593a133d952SDave Chinner 	}
1594a133d952SDave Chinner 
1595a133d952SDave Chinner 	trace_xfs_swap_extent_before(ip, 0);
1596a133d952SDave Chinner 	trace_xfs_swap_extent_before(tip, 1);
1597a133d952SDave Chinner 
1598a133d952SDave Chinner 	/* check inode formats now that data is flushed */
1599a133d952SDave Chinner 	error = xfs_swap_extents_check_format(ip, tip);
1600a133d952SDave Chinner 	if (error) {
1601a133d952SDave Chinner 		xfs_notice(mp,
1602a133d952SDave Chinner 		    "%s: inode 0x%llx format is incompatible for exchanging.",
1603a133d952SDave Chinner 				__func__, ip->i_ino);
16044ef897a2SDave Chinner 		goto out_trans_cancel;
1605a133d952SDave Chinner 	}
1606a133d952SDave Chinner 
1607a133d952SDave Chinner 	/*
1608a133d952SDave Chinner 	 * Compare the current change & modify times with that
1609a133d952SDave Chinner 	 * passed in.  If they differ, we abort this swap.
1610a133d952SDave Chinner 	 * This is the mechanism used to ensure the calling
1611a133d952SDave Chinner 	 * process that the file was not changed out from
1612a133d952SDave Chinner 	 * under it.
1613a133d952SDave Chinner 	 */
1614a0a415e3SJeff Layton 	ctime = inode_get_ctime(VFS_I(ip));
161575d1e312SJeff Layton 	mtime = inode_get_mtime(VFS_I(ip));
1616a0a415e3SJeff Layton 	if ((sbp->bs_ctime.tv_sec != ctime.tv_sec) ||
1617a0a415e3SJeff Layton 	    (sbp->bs_ctime.tv_nsec != ctime.tv_nsec) ||
161875d1e312SJeff Layton 	    (sbp->bs_mtime.tv_sec != mtime.tv_sec) ||
161975d1e312SJeff Layton 	    (sbp->bs_mtime.tv_nsec != mtime.tv_nsec)) {
16202451337dSDave Chinner 		error = -EBUSY;
162181217683SDave Chinner 		goto out_trans_cancel;
1622a133d952SDave Chinner 	}
1623a133d952SDave Chinner 
162421b5c978SDave Chinner 	/*
162521b5c978SDave Chinner 	 * Note the trickiness in setting the log flags - we set the owner log
162621b5c978SDave Chinner 	 * flag on the opposite inode (i.e. the inode we are setting the new
162721b5c978SDave Chinner 	 * owner to be) because once we swap the forks and log that, log
162821b5c978SDave Chinner 	 * recovery is going to see the fork as owned by the swapped inode,
162921b5c978SDave Chinner 	 * not the pre-swapped inodes.
163021b5c978SDave Chinner 	 */
163121b5c978SDave Chinner 	src_log_flags = XFS_ILOG_CORE;
163221b5c978SDave Chinner 	target_log_flags = XFS_ILOG_CORE;
163339aff5fdSDarrick J. Wong 
163438c26bfdSDave Chinner 	if (xfs_has_rmapbt(mp))
16351f08af52SDarrick J. Wong 		error = xfs_swap_extent_rmap(&tp, ip, tip);
16361f08af52SDarrick J. Wong 	else
163739aff5fdSDarrick J. Wong 		error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags,
163839aff5fdSDarrick J. Wong 				&target_log_flags);
163921b5c978SDave Chinner 	if (error)
164021b5c978SDave Chinner 		goto out_trans_cancel;
1641a133d952SDave Chinner 
1642f0bc4d13SDarrick J. Wong 	/* Do we have to swap reflink flags? */
16433e09ab8fSChristoph Hellwig 	if ((ip->i_diflags2 & XFS_DIFLAG2_REFLINK) ^
16443e09ab8fSChristoph Hellwig 	    (tip->i_diflags2 & XFS_DIFLAG2_REFLINK)) {
16453e09ab8fSChristoph Hellwig 		f = ip->i_diflags2 & XFS_DIFLAG2_REFLINK;
16463e09ab8fSChristoph Hellwig 		ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
16473e09ab8fSChristoph Hellwig 		ip->i_diflags2 |= tip->i_diflags2 & XFS_DIFLAG2_REFLINK;
16483e09ab8fSChristoph Hellwig 		tip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
16493e09ab8fSChristoph Hellwig 		tip->i_diflags2 |= f & XFS_DIFLAG2_REFLINK;
165052bfcdd7SDarrick J. Wong 	}
165152bfcdd7SDarrick J. Wong 
165252bfcdd7SDarrick J. Wong 	/* Swap the cow forks. */
165338c26bfdSDave Chinner 	if (xfs_has_reflink(mp)) {
1654f7e67b20SChristoph Hellwig 		ASSERT(!ip->i_cowfp ||
1655f7e67b20SChristoph Hellwig 		       ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
1656f7e67b20SChristoph Hellwig 		ASSERT(!tip->i_cowfp ||
1657f7e67b20SChristoph Hellwig 		       tip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
165852bfcdd7SDarrick J. Wong 
1659897992b7SGustavo A. R. Silva 		swap(ip->i_cowfp, tip->i_cowfp);
166052bfcdd7SDarrick J. Wong 
16615bcffe30SChristoph Hellwig 		if (ip->i_cowfp && ip->i_cowfp->if_bytes)
166283104d44SDarrick J. Wong 			xfs_inode_set_cowblocks_tag(ip);
166352bfcdd7SDarrick J. Wong 		else
166452bfcdd7SDarrick J. Wong 			xfs_inode_clear_cowblocks_tag(ip);
16655bcffe30SChristoph Hellwig 		if (tip->i_cowfp && tip->i_cowfp->if_bytes)
166683104d44SDarrick J. Wong 			xfs_inode_set_cowblocks_tag(tip);
166752bfcdd7SDarrick J. Wong 		else
166852bfcdd7SDarrick J. Wong 			xfs_inode_clear_cowblocks_tag(tip);
1669f0bc4d13SDarrick J. Wong 	}
1670f0bc4d13SDarrick J. Wong 
1671a133d952SDave Chinner 	xfs_trans_log_inode(tp, ip,  src_log_flags);
1672a133d952SDave Chinner 	xfs_trans_log_inode(tp, tip, target_log_flags);
1673a133d952SDave Chinner 
1674a133d952SDave Chinner 	/*
16756fb10d6dSBrian Foster 	 * The extent forks have been swapped, but crc=1,rmapbt=0 filesystems
16766fb10d6dSBrian Foster 	 * have inode number owner values in the bmbt blocks that still refer to
16776fb10d6dSBrian Foster 	 * the old inode. Scan each bmbt to fix up the owner values with the
16786fb10d6dSBrian Foster 	 * inode number of the current inode.
16796fb10d6dSBrian Foster 	 */
16806fb10d6dSBrian Foster 	if (src_log_flags & XFS_ILOG_DOWNER) {
16812dd3d709SBrian Foster 		error = xfs_swap_change_owner(&tp, ip, tip);
16826fb10d6dSBrian Foster 		if (error)
16836fb10d6dSBrian Foster 			goto out_trans_cancel;
16846fb10d6dSBrian Foster 	}
16856fb10d6dSBrian Foster 	if (target_log_flags & XFS_ILOG_DOWNER) {
16862dd3d709SBrian Foster 		error = xfs_swap_change_owner(&tp, tip, ip);
16876fb10d6dSBrian Foster 		if (error)
16886fb10d6dSBrian Foster 			goto out_trans_cancel;
16896fb10d6dSBrian Foster 	}
16906fb10d6dSBrian Foster 
16916fb10d6dSBrian Foster 	/*
1692a133d952SDave Chinner 	 * If this is a synchronous mount, make sure that the
1693a133d952SDave Chinner 	 * transaction goes to disk before returning to the user.
1694a133d952SDave Chinner 	 */
16950560f31aSDave Chinner 	if (xfs_has_wsync(mp))
1696a133d952SDave Chinner 		xfs_trans_set_sync(tp);
1697a133d952SDave Chinner 
169870393313SChristoph Hellwig 	error = xfs_trans_commit(tp);
1699a133d952SDave Chinner 
1700a133d952SDave Chinner 	trace_xfs_swap_extent_after(ip, 0);
1701a133d952SDave Chinner 	trace_xfs_swap_extent_after(tip, 1);
170239aff5fdSDarrick J. Wong 
1703d2c292d8SJan Kara out_unlock_ilock:
1704d2c292d8SJan Kara 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1705d2c292d8SJan Kara 	xfs_iunlock(tip, XFS_ILOCK_EXCL);
170665523218SChristoph Hellwig out_unlock:
1707d2c292d8SJan Kara 	filemap_invalidate_unlock_two(VFS_I(ip)->i_mapping,
1708d2c292d8SJan Kara 				      VFS_I(tip)->i_mapping);
170965523218SChristoph Hellwig 	unlock_two_nondirectories(VFS_I(ip), VFS_I(tip));
1710a133d952SDave Chinner 	return error;
1711a133d952SDave Chinner 
171239aff5fdSDarrick J. Wong out_trans_cancel:
171339aff5fdSDarrick J. Wong 	xfs_trans_cancel(tp);
1714d2c292d8SJan Kara 	goto out_unlock_ilock;
1715a133d952SDave Chinner }
1716