xref: /linux/fs/xfs/xfs_bmap_util.c (revision fa5a387230861116c2434c20d29fc4b3fd077d24)
10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
268988114SDave Chinner /*
368988114SDave Chinner  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
4c24b5dfaSDave Chinner  * Copyright (c) 2012 Red Hat, Inc.
568988114SDave Chinner  * All Rights Reserved.
668988114SDave Chinner  */
768988114SDave Chinner #include "xfs.h"
868988114SDave Chinner #include "xfs_fs.h"
970a9883cSDave Chinner #include "xfs_shared.h"
10239880efSDave Chinner #include "xfs_format.h"
11239880efSDave Chinner #include "xfs_log_format.h"
12239880efSDave Chinner #include "xfs_trans_resv.h"
1368988114SDave Chinner #include "xfs_bit.h"
1468988114SDave Chinner #include "xfs_mount.h"
153ab78df2SDarrick J. Wong #include "xfs_defer.h"
1668988114SDave Chinner #include "xfs_inode.h"
1768988114SDave Chinner #include "xfs_btree.h"
18239880efSDave Chinner #include "xfs_trans.h"
1968988114SDave Chinner #include "xfs_alloc.h"
2068988114SDave Chinner #include "xfs_bmap.h"
2168988114SDave Chinner #include "xfs_bmap_util.h"
22a4fbe6abSDave Chinner #include "xfs_bmap_btree.h"
2368988114SDave Chinner #include "xfs_rtalloc.h"
2468988114SDave Chinner #include "xfs_error.h"
2568988114SDave Chinner #include "xfs_quota.h"
2668988114SDave Chinner #include "xfs_trans_space.h"
2768988114SDave Chinner #include "xfs_trace.h"
28c24b5dfaSDave Chinner #include "xfs_icache.h"
29f86f4037SDarrick J. Wong #include "xfs_iomap.h"
30f86f4037SDarrick J. Wong #include "xfs_reflink.h"
31fa5a3872SDarrick J. Wong #include "xfs_rtbitmap.h"
3268988114SDave Chinner 
3368988114SDave Chinner /* Kernel only BMAP related definitions and functions */
3468988114SDave Chinner 
3568988114SDave Chinner /*
3668988114SDave Chinner  * Convert the given file system block to a disk block.  We have to treat it
3768988114SDave Chinner  * differently based on whether the file is a real time file or not, because the
3868988114SDave Chinner  * bmap code does.
3968988114SDave Chinner  */
4068988114SDave Chinner xfs_daddr_t
4168988114SDave Chinner xfs_fsb_to_db(struct xfs_inode *ip, xfs_fsblock_t fsb)
4268988114SDave Chinner {
43ecfc28a4SChristoph Hellwig 	if (XFS_IS_REALTIME_INODE(ip))
44ecfc28a4SChristoph Hellwig 		return XFS_FSB_TO_BB(ip->i_mount, fsb);
45ecfc28a4SChristoph Hellwig 	return XFS_FSB_TO_DADDR(ip->i_mount, fsb);
4668988114SDave Chinner }
4768988114SDave Chinner 
4868988114SDave Chinner /*
493fbbbea3SDave Chinner  * Routine to zero an extent on disk allocated to the specific inode.
503fbbbea3SDave Chinner  *
513fbbbea3SDave Chinner  * The VFS functions take a linearised filesystem block offset, so we have to
523fbbbea3SDave Chinner  * convert the sparse xfs fsb to the right format first.
533fbbbea3SDave Chinner  * VFS types are real funky, too.
543fbbbea3SDave Chinner  */
553fbbbea3SDave Chinner int
563fbbbea3SDave Chinner xfs_zero_extent(
573fbbbea3SDave Chinner 	struct xfs_inode	*ip,
583fbbbea3SDave Chinner 	xfs_fsblock_t		start_fsb,
593fbbbea3SDave Chinner 	xfs_off_t		count_fsb)
603fbbbea3SDave Chinner {
613fbbbea3SDave Chinner 	struct xfs_mount	*mp = ip->i_mount;
6230fa529eSChristoph Hellwig 	struct xfs_buftarg	*target = xfs_inode_buftarg(ip);
633fbbbea3SDave Chinner 	xfs_daddr_t		sector = xfs_fsb_to_db(ip, start_fsb);
643fbbbea3SDave Chinner 	sector_t		block = XFS_BB_TO_FSBT(mp, sector);
653fbbbea3SDave Chinner 
6630fa529eSChristoph Hellwig 	return blkdev_issue_zeroout(target->bt_bdev,
673dc29161SMatthew Wilcox 		block << (mp->m_super->s_blocksize_bits - 9),
683dc29161SMatthew Wilcox 		count_fsb << (mp->m_super->s_blocksize_bits - 9),
69ee472d83SChristoph Hellwig 		GFP_NOFS, 0);
703fbbbea3SDave Chinner }
713fbbbea3SDave Chinner 
72bb9c2e54SDave Chinner #ifdef CONFIG_XFS_RT
7368988114SDave Chinner int
7468988114SDave Chinner xfs_bmap_rtalloc(
759d5e8492SDarrick J. Wong 	struct xfs_bmalloca	*ap)
7668988114SDave Chinner {
779d5e8492SDarrick J. Wong 	struct xfs_mount	*mp = ap->ip->i_mount;
789d5e8492SDarrick J. Wong 	xfs_fileoff_t		orig_offset = ap->offset;
79*2d5f216bSDarrick J. Wong 	xfs_rtxnum_t		rtx;
80a684c538SDarrick J. Wong 	xfs_rtxlen_t		prod = 0;  /* product factor for allocators */
810703a8e1SDave Chinner 	xfs_extlen_t		mod = 0;   /* product factor for allocators */
82a684c538SDarrick J. Wong 	xfs_rtxlen_t		ralen = 0; /* realtime allocation length */
8368988114SDave Chinner 	xfs_extlen_t		align;     /* minimum allocation alignment */
849d5e8492SDarrick J. Wong 	xfs_extlen_t		orig_length = ap->length;
859d5e8492SDarrick J. Wong 	xfs_extlen_t		minlen = mp->m_sb.sb_rextsize;
86a684c538SDarrick J. Wong 	xfs_rtxlen_t		raminlen;
879d5e8492SDarrick J. Wong 	bool			rtlocked = false;
88676a659bSDarrick J. Wong 	bool			ignore_locality = false;
899d5e8492SDarrick J. Wong 	int			error;
9068988114SDave Chinner 
9168988114SDave Chinner 	align = xfs_get_extsz_hint(ap->ip);
929d5e8492SDarrick J. Wong retry:
9368988114SDave Chinner 	prod = align / mp->m_sb.sb_rextsize;
9468988114SDave Chinner 	error = xfs_bmap_extsize_align(mp, &ap->got, &ap->prev,
9568988114SDave Chinner 					align, 1, ap->eof, 0,
9668988114SDave Chinner 					ap->conv, &ap->offset, &ap->length);
9768988114SDave Chinner 	if (error)
9868988114SDave Chinner 		return error;
9968988114SDave Chinner 	ASSERT(ap->length);
10068988114SDave Chinner 	ASSERT(ap->length % mp->m_sb.sb_rextsize == 0);
10168988114SDave Chinner 
10268988114SDave Chinner 	/*
1039d5e8492SDarrick J. Wong 	 * If we shifted the file offset downward to satisfy an extent size
1049d5e8492SDarrick J. Wong 	 * hint, increase minlen by that amount so that the allocator won't
1059d5e8492SDarrick J. Wong 	 * give us an allocation that's too short to cover at least one of the
1069d5e8492SDarrick J. Wong 	 * blocks that the caller asked for.
1079d5e8492SDarrick J. Wong 	 */
1089d5e8492SDarrick J. Wong 	if (ap->offset != orig_offset)
1099d5e8492SDarrick J. Wong 		minlen += orig_offset - ap->offset;
1109d5e8492SDarrick J. Wong 
1119d5e8492SDarrick J. Wong 	/*
11268988114SDave Chinner 	 * If the offset & length are not perfectly aligned
11368988114SDave Chinner 	 * then kill prod, it will just get us in trouble.
11468988114SDave Chinner 	 */
1150703a8e1SDave Chinner 	div_u64_rem(ap->offset, align, &mod);
1160703a8e1SDave Chinner 	if (mod || ap->length % align)
11768988114SDave Chinner 		prod = 1;
11868988114SDave Chinner 	/*
11968988114SDave Chinner 	 * Set ralen to be the actual requested length in rtextents.
12068988114SDave Chinner 	 */
12168988114SDave Chinner 	ralen = ap->length / mp->m_sb.sb_rextsize;
12268988114SDave Chinner 	/*
12395f0b95eSChandan Babu R 	 * If the old value was close enough to XFS_BMBT_MAX_EXTLEN that
12468988114SDave Chinner 	 * we rounded up to it, cut it back so it's valid again.
12568988114SDave Chinner 	 * Note that if it's a really large request (bigger than
12695f0b95eSChandan Babu R 	 * XFS_BMBT_MAX_EXTLEN), we don't hear about that number, and can't
12768988114SDave Chinner 	 * adjust the starting point to match it.
12868988114SDave Chinner 	 */
129fa5a3872SDarrick J. Wong 	if (xfs_rtxlen_to_extlen(mp, ralen) >= XFS_MAX_BMBT_EXTLEN)
13095f0b95eSChandan Babu R 		ralen = XFS_MAX_BMBT_EXTLEN / mp->m_sb.sb_rextsize;
13168988114SDave Chinner 
13268988114SDave Chinner 	/*
1334b680afbSDave Chinner 	 * Lock out modifications to both the RT bitmap and summary inodes
13468988114SDave Chinner 	 */
1359d5e8492SDarrick J. Wong 	if (!rtlocked) {
136f4a0660dSDarrick J. Wong 		xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL|XFS_ILOCK_RTBITMAP);
13768988114SDave Chinner 		xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
138f4a0660dSDarrick J. Wong 		xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL|XFS_ILOCK_RTSUM);
1394b680afbSDave Chinner 		xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
1409d5e8492SDarrick J. Wong 		rtlocked = true;
1419d5e8492SDarrick J. Wong 	}
14268988114SDave Chinner 
14368988114SDave Chinner 	/*
14468988114SDave Chinner 	 * If it's an allocation to an empty file at offset 0,
14568988114SDave Chinner 	 * pick an extent that will space things out in the rt area.
14668988114SDave Chinner 	 */
14768988114SDave Chinner 	if (ap->eof && ap->offset == 0) {
14868988114SDave Chinner 		error = xfs_rtpick_extent(mp, ap->tp, ralen, &rtx);
14968988114SDave Chinner 		if (error)
15068988114SDave Chinner 			return error;
151fa5a3872SDarrick J. Wong 		ap->blkno = xfs_rtx_to_rtb(mp, rtx);
15268988114SDave Chinner 	} else {
15368988114SDave Chinner 		ap->blkno = 0;
15468988114SDave Chinner 	}
15568988114SDave Chinner 
15668988114SDave Chinner 	xfs_bmap_adjacent(ap);
15768988114SDave Chinner 
15868988114SDave Chinner 	/*
15968988114SDave Chinner 	 * Realtime allocation, done through xfs_rtallocate_extent.
16068988114SDave Chinner 	 */
161676a659bSDarrick J. Wong 	if (ignore_locality)
162676a659bSDarrick J. Wong 		ap->blkno = 0;
163676a659bSDarrick J. Wong 	else
16468988114SDave Chinner 		do_div(ap->blkno, mp->m_sb.sb_rextsize);
165*2d5f216bSDarrick J. Wong 	rtx = ap->blkno;
16668988114SDave Chinner 	ap->length = ralen;
1679d5e8492SDarrick J. Wong 	raminlen = max_t(xfs_extlen_t, 1, minlen / mp->m_sb.sb_rextsize);
1689d5e8492SDarrick J. Wong 	error = xfs_rtallocate_extent(ap->tp, ap->blkno, raminlen, ap->length,
169*2d5f216bSDarrick J. Wong 			&ralen, ap->wasdel, prod, &rtx);
170089ec2f8SChristoph Hellwig 	if (error)
17168988114SDave Chinner 		return error;
172089ec2f8SChristoph Hellwig 
173*2d5f216bSDarrick J. Wong 	if (rtx != NULLRTEXTNO) {
174fa5a3872SDarrick J. Wong 		ap->blkno = xfs_rtx_to_rtb(mp, rtx);
175fa5a3872SDarrick J. Wong 		ap->length = xfs_rtxlen_to_extlen(mp, ralen);
1769d5e8492SDarrick J. Wong 		ap->ip->i_nblocks += ap->length;
17768988114SDave Chinner 		xfs_trans_log_inode(ap->tp, ap->ip, XFS_ILOG_CORE);
17868988114SDave Chinner 		if (ap->wasdel)
1799d5e8492SDarrick J. Wong 			ap->ip->i_delayed_blks -= ap->length;
18068988114SDave Chinner 		/*
18168988114SDave Chinner 		 * Adjust the disk quota also. This was reserved
18268988114SDave Chinner 		 * earlier.
18368988114SDave Chinner 		 */
18468988114SDave Chinner 		xfs_trans_mod_dquot_byino(ap->tp, ap->ip,
18568988114SDave Chinner 			ap->wasdel ? XFS_TRANS_DQ_DELRTBCOUNT :
1869d5e8492SDarrick J. Wong 					XFS_TRANS_DQ_RTBCOUNT, ap->length);
1879d5e8492SDarrick J. Wong 		return 0;
18868988114SDave Chinner 	}
1899d5e8492SDarrick J. Wong 
1909d5e8492SDarrick J. Wong 	if (align > mp->m_sb.sb_rextsize) {
1919d5e8492SDarrick J. Wong 		/*
1929d5e8492SDarrick J. Wong 		 * We previously enlarged the request length to try to satisfy
1939d5e8492SDarrick J. Wong 		 * an extent size hint.  The allocator didn't return anything,
1949d5e8492SDarrick J. Wong 		 * so reset the parameters to the original values and try again
1959d5e8492SDarrick J. Wong 		 * without alignment criteria.
1969d5e8492SDarrick J. Wong 		 */
1979d5e8492SDarrick J. Wong 		ap->offset = orig_offset;
1989d5e8492SDarrick J. Wong 		ap->length = orig_length;
1999d5e8492SDarrick J. Wong 		minlen = align = mp->m_sb.sb_rextsize;
2009d5e8492SDarrick J. Wong 		goto retry;
2019d5e8492SDarrick J. Wong 	}
2029d5e8492SDarrick J. Wong 
203676a659bSDarrick J. Wong 	if (!ignore_locality && ap->blkno != 0) {
204676a659bSDarrick J. Wong 		/*
205676a659bSDarrick J. Wong 		 * If we can't allocate near a specific rt extent, try again
206676a659bSDarrick J. Wong 		 * without locality criteria.
207676a659bSDarrick J. Wong 		 */
208676a659bSDarrick J. Wong 		ignore_locality = true;
209676a659bSDarrick J. Wong 		goto retry;
210676a659bSDarrick J. Wong 	}
211676a659bSDarrick J. Wong 
2129d5e8492SDarrick J. Wong 	ap->blkno = NULLFSBLOCK;
2139d5e8492SDarrick J. Wong 	ap->length = 0;
21468988114SDave Chinner 	return 0;
21568988114SDave Chinner }
216bb9c2e54SDave Chinner #endif /* CONFIG_XFS_RT */
21768988114SDave Chinner 
21868988114SDave Chinner /*
21968988114SDave Chinner  * Extent tree block counting routines.
22068988114SDave Chinner  */
22168988114SDave Chinner 
22268988114SDave Chinner /*
223d29cb3e4SDarrick J. Wong  * Count leaf blocks given a range of extent records.  Delayed allocation
224d29cb3e4SDarrick J. Wong  * extents are not counted towards the totals.
22568988114SDave Chinner  */
226e17a5c6fSChristoph Hellwig xfs_extnum_t
22768988114SDave Chinner xfs_bmap_count_leaves(
228d29cb3e4SDarrick J. Wong 	struct xfs_ifork	*ifp,
229e7f5d5caSDarrick J. Wong 	xfs_filblks_t		*count)
23068988114SDave Chinner {
231b2b1712aSChristoph Hellwig 	struct xfs_iext_cursor	icur;
232e17a5c6fSChristoph Hellwig 	struct xfs_bmbt_irec	got;
233b2b1712aSChristoph Hellwig 	xfs_extnum_t		numrecs = 0;
23468988114SDave Chinner 
235b2b1712aSChristoph Hellwig 	for_each_xfs_iext(ifp, &icur, &got) {
236e17a5c6fSChristoph Hellwig 		if (!isnullstartblock(got.br_startblock)) {
237e17a5c6fSChristoph Hellwig 			*count += got.br_blockcount;
238e17a5c6fSChristoph Hellwig 			numrecs++;
23968988114SDave Chinner 		}
24068988114SDave Chinner 	}
241b2b1712aSChristoph Hellwig 
242e17a5c6fSChristoph Hellwig 	return numrecs;
243d29cb3e4SDarrick J. Wong }
24468988114SDave Chinner 
24568988114SDave Chinner /*
246d29cb3e4SDarrick J. Wong  * Count fsblocks of the given fork.  Delayed allocation extents are
247d29cb3e4SDarrick J. Wong  * not counted towards the totals.
24868988114SDave Chinner  */
249e7f5d5caSDarrick J. Wong int
25068988114SDave Chinner xfs_bmap_count_blocks(
251e7f5d5caSDarrick J. Wong 	struct xfs_trans	*tp,
252e7f5d5caSDarrick J. Wong 	struct xfs_inode	*ip,
253e7f5d5caSDarrick J. Wong 	int			whichfork,
254e7f5d5caSDarrick J. Wong 	xfs_extnum_t		*nextents,
255e7f5d5caSDarrick J. Wong 	xfs_filblks_t		*count)
25668988114SDave Chinner {
257fec40e22SDarrick J. Wong 	struct xfs_mount	*mp = ip->i_mount;
258732436efSDarrick J. Wong 	struct xfs_ifork	*ifp = xfs_ifork_ptr(ip, whichfork);
259fec40e22SDarrick J. Wong 	struct xfs_btree_cur	*cur;
260fec40e22SDarrick J. Wong 	xfs_extlen_t		btblocks = 0;
261e7f5d5caSDarrick J. Wong 	int			error;
26268988114SDave Chinner 
263e7f5d5caSDarrick J. Wong 	*nextents = 0;
264e7f5d5caSDarrick J. Wong 	*count = 0;
265fec40e22SDarrick J. Wong 
266e7f5d5caSDarrick J. Wong 	if (!ifp)
26768988114SDave Chinner 		return 0;
268e7f5d5caSDarrick J. Wong 
269f7e67b20SChristoph Hellwig 	switch (ifp->if_format) {
270e7f5d5caSDarrick J. Wong 	case XFS_DINODE_FMT_BTREE:
271e7f5d5caSDarrick J. Wong 		error = xfs_iread_extents(tp, ip, whichfork);
272e7f5d5caSDarrick J. Wong 		if (error)
273e7f5d5caSDarrick J. Wong 			return error;
27468988114SDave Chinner 
275fec40e22SDarrick J. Wong 		cur = xfs_bmbt_init_cursor(mp, tp, ip, whichfork);
276fec40e22SDarrick J. Wong 		error = xfs_btree_count_blocks(cur, &btblocks);
277fec40e22SDarrick J. Wong 		xfs_btree_del_cursor(cur, error);
278fec40e22SDarrick J. Wong 		if (error)
279fec40e22SDarrick J. Wong 			return error;
28068988114SDave Chinner 
281fec40e22SDarrick J. Wong 		/*
282fec40e22SDarrick J. Wong 		 * xfs_btree_count_blocks includes the root block contained in
283fec40e22SDarrick J. Wong 		 * the inode fork in @btblocks, so subtract one because we're
284fec40e22SDarrick J. Wong 		 * only interested in allocated disk blocks.
285fec40e22SDarrick J. Wong 		 */
286fec40e22SDarrick J. Wong 		*count += btblocks - 1;
287fec40e22SDarrick J. Wong 
28853004ee7SGustavo A. R. Silva 		fallthrough;
289fec40e22SDarrick J. Wong 	case XFS_DINODE_FMT_EXTENTS:
290fec40e22SDarrick J. Wong 		*nextents = xfs_bmap_count_leaves(ifp, count);
291fec40e22SDarrick J. Wong 		break;
292e7f5d5caSDarrick J. Wong 	}
29368988114SDave Chinner 
29468988114SDave Chinner 	return 0;
29568988114SDave Chinner }
29668988114SDave Chinner 
297abbf9e8aSChristoph Hellwig static int
298abbf9e8aSChristoph Hellwig xfs_getbmap_report_one(
299f86f4037SDarrick J. Wong 	struct xfs_inode	*ip,
300abbf9e8aSChristoph Hellwig 	struct getbmapx		*bmv,
301232b5194SChristoph Hellwig 	struct kgetbmap		*out,
302abbf9e8aSChristoph Hellwig 	int64_t			bmv_end,
303abbf9e8aSChristoph Hellwig 	struct xfs_bmbt_irec	*got)
304f86f4037SDarrick J. Wong {
305232b5194SChristoph Hellwig 	struct kgetbmap		*p = out + bmv->bmv_entries;
306d392bc81SChristoph Hellwig 	bool			shared = false;
307f86f4037SDarrick J. Wong 	int			error;
308f86f4037SDarrick J. Wong 
309d392bc81SChristoph Hellwig 	error = xfs_reflink_trim_around_shared(ip, got, &shared);
310f86f4037SDarrick J. Wong 	if (error)
311f86f4037SDarrick J. Wong 		return error;
312f86f4037SDarrick J. Wong 
313abbf9e8aSChristoph Hellwig 	if (isnullstartblock(got->br_startblock) ||
314abbf9e8aSChristoph Hellwig 	    got->br_startblock == DELAYSTARTBLOCK) {
315f86f4037SDarrick J. Wong 		/*
3168ee81ed5SYe Bin 		 * Take the flush completion as being a point-in-time snapshot
3178ee81ed5SYe Bin 		 * where there are no delalloc extents, and if any new ones
3188ee81ed5SYe Bin 		 * have been created racily, just skip them as being 'after'
3198ee81ed5SYe Bin 		 * the flush and so don't get reported.
320f86f4037SDarrick J. Wong 		 */
3218ee81ed5SYe Bin 		if (!(bmv->bmv_iflags & BMV_IF_DELALLOC))
3228ee81ed5SYe Bin 			return 0;
323abbf9e8aSChristoph Hellwig 
324abbf9e8aSChristoph Hellwig 		p->bmv_oflags |= BMV_OF_DELALLOC;
325abbf9e8aSChristoph Hellwig 		p->bmv_block = -2;
326f86f4037SDarrick J. Wong 	} else {
327abbf9e8aSChristoph Hellwig 		p->bmv_block = xfs_fsb_to_db(ip, got->br_startblock);
328f86f4037SDarrick J. Wong 	}
329f86f4037SDarrick J. Wong 
330abbf9e8aSChristoph Hellwig 	if (got->br_state == XFS_EXT_UNWRITTEN &&
331abbf9e8aSChristoph Hellwig 	    (bmv->bmv_iflags & BMV_IF_PREALLOC))
332abbf9e8aSChristoph Hellwig 		p->bmv_oflags |= BMV_OF_PREALLOC;
333abbf9e8aSChristoph Hellwig 
334abbf9e8aSChristoph Hellwig 	if (shared)
335abbf9e8aSChristoph Hellwig 		p->bmv_oflags |= BMV_OF_SHARED;
336abbf9e8aSChristoph Hellwig 
337abbf9e8aSChristoph Hellwig 	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, got->br_startoff);
338abbf9e8aSChristoph Hellwig 	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, got->br_blockcount);
339abbf9e8aSChristoph Hellwig 
340abbf9e8aSChristoph Hellwig 	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
341abbf9e8aSChristoph Hellwig 	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
342abbf9e8aSChristoph Hellwig 	bmv->bmv_entries++;
343f86f4037SDarrick J. Wong 	return 0;
344f86f4037SDarrick J. Wong }
345f86f4037SDarrick J. Wong 
346abbf9e8aSChristoph Hellwig static void
347abbf9e8aSChristoph Hellwig xfs_getbmap_report_hole(
348abbf9e8aSChristoph Hellwig 	struct xfs_inode	*ip,
349abbf9e8aSChristoph Hellwig 	struct getbmapx		*bmv,
350232b5194SChristoph Hellwig 	struct kgetbmap		*out,
351abbf9e8aSChristoph Hellwig 	int64_t			bmv_end,
352abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		bno,
353abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		end)
354abbf9e8aSChristoph Hellwig {
355232b5194SChristoph Hellwig 	struct kgetbmap		*p = out + bmv->bmv_entries;
356abbf9e8aSChristoph Hellwig 
357abbf9e8aSChristoph Hellwig 	if (bmv->bmv_iflags & BMV_IF_NO_HOLES)
358abbf9e8aSChristoph Hellwig 		return;
359abbf9e8aSChristoph Hellwig 
360abbf9e8aSChristoph Hellwig 	p->bmv_block = -1;
361abbf9e8aSChristoph Hellwig 	p->bmv_offset = XFS_FSB_TO_BB(ip->i_mount, bno);
362abbf9e8aSChristoph Hellwig 	p->bmv_length = XFS_FSB_TO_BB(ip->i_mount, end - bno);
363abbf9e8aSChristoph Hellwig 
364abbf9e8aSChristoph Hellwig 	bmv->bmv_offset = p->bmv_offset + p->bmv_length;
365abbf9e8aSChristoph Hellwig 	bmv->bmv_length = max(0LL, bmv_end - bmv->bmv_offset);
366abbf9e8aSChristoph Hellwig 	bmv->bmv_entries++;
367abbf9e8aSChristoph Hellwig }
368abbf9e8aSChristoph Hellwig 
369abbf9e8aSChristoph Hellwig static inline bool
370abbf9e8aSChristoph Hellwig xfs_getbmap_full(
371abbf9e8aSChristoph Hellwig 	struct getbmapx		*bmv)
372abbf9e8aSChristoph Hellwig {
373abbf9e8aSChristoph Hellwig 	return bmv->bmv_length == 0 || bmv->bmv_entries >= bmv->bmv_count - 1;
374abbf9e8aSChristoph Hellwig }
375abbf9e8aSChristoph Hellwig 
376abbf9e8aSChristoph Hellwig static bool
377abbf9e8aSChristoph Hellwig xfs_getbmap_next_rec(
378abbf9e8aSChristoph Hellwig 	struct xfs_bmbt_irec	*rec,
379abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		total_end)
380abbf9e8aSChristoph Hellwig {
381abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		end = rec->br_startoff + rec->br_blockcount;
382abbf9e8aSChristoph Hellwig 
383abbf9e8aSChristoph Hellwig 	if (end == total_end)
384abbf9e8aSChristoph Hellwig 		return false;
385abbf9e8aSChristoph Hellwig 
386abbf9e8aSChristoph Hellwig 	rec->br_startoff += rec->br_blockcount;
387abbf9e8aSChristoph Hellwig 	if (!isnullstartblock(rec->br_startblock) &&
388abbf9e8aSChristoph Hellwig 	    rec->br_startblock != DELAYSTARTBLOCK)
389abbf9e8aSChristoph Hellwig 		rec->br_startblock += rec->br_blockcount;
390abbf9e8aSChristoph Hellwig 	rec->br_blockcount = total_end - end;
391abbf9e8aSChristoph Hellwig 	return true;
392abbf9e8aSChristoph Hellwig }
393abbf9e8aSChristoph Hellwig 
39468988114SDave Chinner /*
39568988114SDave Chinner  * Get inode's extents as described in bmv, and format for output.
39668988114SDave Chinner  * Calls formatter to fill the user's buffer until all extents
39768988114SDave Chinner  * are mapped, until the passed-in bmv->bmv_count slots have
39868988114SDave Chinner  * been filled, or until the formatter short-circuits the loop,
39968988114SDave Chinner  * if it is tracking filled-in extents on its own.
40068988114SDave Chinner  */
40168988114SDave Chinner int						/* error code */
40268988114SDave Chinner xfs_getbmap(
403232b5194SChristoph Hellwig 	struct xfs_inode	*ip,
40468988114SDave Chinner 	struct getbmapx		*bmv,		/* user bmap structure */
405232b5194SChristoph Hellwig 	struct kgetbmap		*out)
40668988114SDave Chinner {
407abbf9e8aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
408abbf9e8aSChristoph Hellwig 	int			iflags = bmv->bmv_iflags;
409232b5194SChristoph Hellwig 	int			whichfork, lock, error = 0;
410abbf9e8aSChristoph Hellwig 	int64_t			bmv_end, max_len;
411abbf9e8aSChristoph Hellwig 	xfs_fileoff_t		bno, first_bno;
412abbf9e8aSChristoph Hellwig 	struct xfs_ifork	*ifp;
413abbf9e8aSChristoph Hellwig 	struct xfs_bmbt_irec	got, rec;
414abbf9e8aSChristoph Hellwig 	xfs_filblks_t		len;
415b2b1712aSChristoph Hellwig 	struct xfs_iext_cursor	icur;
41668988114SDave Chinner 
417232b5194SChristoph Hellwig 	if (bmv->bmv_iflags & ~BMV_IF_VALID)
418232b5194SChristoph Hellwig 		return -EINVAL;
419f86f4037SDarrick J. Wong #ifndef DEBUG
420f86f4037SDarrick J. Wong 	/* Only allow CoW fork queries if we're debugging. */
421f86f4037SDarrick J. Wong 	if (iflags & BMV_IF_COWFORK)
422f86f4037SDarrick J. Wong 		return -EINVAL;
423f86f4037SDarrick J. Wong #endif
424f86f4037SDarrick J. Wong 	if ((iflags & BMV_IF_ATTRFORK) && (iflags & BMV_IF_COWFORK))
425f86f4037SDarrick J. Wong 		return -EINVAL;
426f86f4037SDarrick J. Wong 
427abbf9e8aSChristoph Hellwig 	if (bmv->bmv_length < -1)
428abbf9e8aSChristoph Hellwig 		return -EINVAL;
429abbf9e8aSChristoph Hellwig 	bmv->bmv_entries = 0;
430abbf9e8aSChristoph Hellwig 	if (bmv->bmv_length == 0)
431abbf9e8aSChristoph Hellwig 		return 0;
432abbf9e8aSChristoph Hellwig 
433f86f4037SDarrick J. Wong 	if (iflags & BMV_IF_ATTRFORK)
434f86f4037SDarrick J. Wong 		whichfork = XFS_ATTR_FORK;
435f86f4037SDarrick J. Wong 	else if (iflags & BMV_IF_COWFORK)
436f86f4037SDarrick J. Wong 		whichfork = XFS_COW_FORK;
437f86f4037SDarrick J. Wong 	else
438f86f4037SDarrick J. Wong 		whichfork = XFS_DATA_FORK;
43968988114SDave Chinner 
44068988114SDave Chinner 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
441f86f4037SDarrick J. Wong 	switch (whichfork) {
442abbf9e8aSChristoph Hellwig 	case XFS_ATTR_FORK:
443001c179cSChenXiaoSong 		lock = xfs_ilock_attr_map_shared(ip);
444932b42c6SDarrick J. Wong 		if (!xfs_inode_has_attr_fork(ip))
445001c179cSChenXiaoSong 			goto out_unlock_ilock;
446abbf9e8aSChristoph Hellwig 
447abbf9e8aSChristoph Hellwig 		max_len = 1LL << 32;
448abbf9e8aSChristoph Hellwig 		break;
449abbf9e8aSChristoph Hellwig 	case XFS_COW_FORK:
450001c179cSChenXiaoSong 		lock = XFS_ILOCK_SHARED;
451001c179cSChenXiaoSong 		xfs_ilock(ip, lock);
452001c179cSChenXiaoSong 
453abbf9e8aSChristoph Hellwig 		/* No CoW fork? Just return */
454001c179cSChenXiaoSong 		if (!xfs_ifork_ptr(ip, whichfork))
455001c179cSChenXiaoSong 			goto out_unlock_ilock;
456abbf9e8aSChristoph Hellwig 
457abbf9e8aSChristoph Hellwig 		if (xfs_get_cowextsz_hint(ip))
458abbf9e8aSChristoph Hellwig 			max_len = mp->m_super->s_maxbytes;
459abbf9e8aSChristoph Hellwig 		else
460abbf9e8aSChristoph Hellwig 			max_len = XFS_ISIZE(ip);
461abbf9e8aSChristoph Hellwig 		break;
462f86f4037SDarrick J. Wong 	case XFS_DATA_FORK:
463efa70be1SChristoph Hellwig 		if (!(iflags & BMV_IF_DELALLOC) &&
46413d2c10bSChristoph Hellwig 		    (ip->i_delayed_blks || XFS_ISIZE(ip) > ip->i_disk_size)) {
4652451337dSDave Chinner 			error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
46668988114SDave Chinner 			if (error)
46768988114SDave Chinner 				goto out_unlock_iolock;
468efa70be1SChristoph Hellwig 
46968988114SDave Chinner 			/*
470efa70be1SChristoph Hellwig 			 * Even after flushing the inode, there can still be
471efa70be1SChristoph Hellwig 			 * delalloc blocks on the inode beyond EOF due to
472efa70be1SChristoph Hellwig 			 * speculative preallocation.  These are not removed
473efa70be1SChristoph Hellwig 			 * until the release function is called or the inode
474efa70be1SChristoph Hellwig 			 * is inactivated.  Hence we cannot assert here that
475efa70be1SChristoph Hellwig 			 * ip->i_delayed_blks == 0.
47668988114SDave Chinner 			 */
47768988114SDave Chinner 		}
47868988114SDave Chinner 
479abbf9e8aSChristoph Hellwig 		if (xfs_get_extsz_hint(ip) ||
480db07349dSChristoph Hellwig 		    (ip->i_diflags &
481abbf9e8aSChristoph Hellwig 		     (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND)))
482abbf9e8aSChristoph Hellwig 			max_len = mp->m_super->s_maxbytes;
483abbf9e8aSChristoph Hellwig 		else
484abbf9e8aSChristoph Hellwig 			max_len = XFS_ISIZE(ip);
485abbf9e8aSChristoph Hellwig 
486309ecac8SChristoph Hellwig 		lock = xfs_ilock_data_map_shared(ip);
487f86f4037SDarrick J. Wong 		break;
488efa70be1SChristoph Hellwig 	}
48968988114SDave Chinner 
490001c179cSChenXiaoSong 	ifp = xfs_ifork_ptr(ip, whichfork);
491001c179cSChenXiaoSong 
492f7e67b20SChristoph Hellwig 	switch (ifp->if_format) {
493abbf9e8aSChristoph Hellwig 	case XFS_DINODE_FMT_EXTENTS:
494abbf9e8aSChristoph Hellwig 	case XFS_DINODE_FMT_BTREE:
495abbf9e8aSChristoph Hellwig 		break;
496abbf9e8aSChristoph Hellwig 	case XFS_DINODE_FMT_LOCAL:
497abbf9e8aSChristoph Hellwig 		/* Local format inode forks report no extents. */
49868988114SDave Chinner 		goto out_unlock_ilock;
499abbf9e8aSChristoph Hellwig 	default:
500abbf9e8aSChristoph Hellwig 		error = -EINVAL;
501abbf9e8aSChristoph Hellwig 		goto out_unlock_ilock;
50268988114SDave Chinner 	}
50368988114SDave Chinner 
504abbf9e8aSChristoph Hellwig 	if (bmv->bmv_length == -1) {
505abbf9e8aSChristoph Hellwig 		max_len = XFS_FSB_TO_BB(mp, XFS_B_TO_FSB(mp, max_len));
506abbf9e8aSChristoph Hellwig 		bmv->bmv_length = max(0LL, max_len - bmv->bmv_offset);
507abbf9e8aSChristoph Hellwig 	}
508abbf9e8aSChristoph Hellwig 
509abbf9e8aSChristoph Hellwig 	bmv_end = bmv->bmv_offset + bmv->bmv_length;
510abbf9e8aSChristoph Hellwig 
511abbf9e8aSChristoph Hellwig 	first_bno = bno = XFS_BB_TO_FSBT(mp, bmv->bmv_offset);
512abbf9e8aSChristoph Hellwig 	len = XFS_BB_TO_FSB(mp, bmv->bmv_length);
513abbf9e8aSChristoph Hellwig 
514abbf9e8aSChristoph Hellwig 	error = xfs_iread_extents(NULL, ip, whichfork);
515abbf9e8aSChristoph Hellwig 	if (error)
516abbf9e8aSChristoph Hellwig 		goto out_unlock_ilock;
517abbf9e8aSChristoph Hellwig 
518b2b1712aSChristoph Hellwig 	if (!xfs_iext_lookup_extent(ip, ifp, bno, &icur, &got)) {
519abbf9e8aSChristoph Hellwig 		/*
520abbf9e8aSChristoph Hellwig 		 * Report a whole-file hole if the delalloc flag is set to
521abbf9e8aSChristoph Hellwig 		 * stay compatible with the old implementation.
522abbf9e8aSChristoph Hellwig 		 */
523abbf9e8aSChristoph Hellwig 		if (iflags & BMV_IF_DELALLOC)
524abbf9e8aSChristoph Hellwig 			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
525abbf9e8aSChristoph Hellwig 					XFS_B_TO_FSB(mp, XFS_ISIZE(ip)));
526abbf9e8aSChristoph Hellwig 		goto out_unlock_ilock;
527abbf9e8aSChristoph Hellwig 	}
528abbf9e8aSChristoph Hellwig 
529abbf9e8aSChristoph Hellwig 	while (!xfs_getbmap_full(bmv)) {
530abbf9e8aSChristoph Hellwig 		xfs_trim_extent(&got, first_bno, len);
531abbf9e8aSChristoph Hellwig 
532abbf9e8aSChristoph Hellwig 		/*
533abbf9e8aSChristoph Hellwig 		 * Report an entry for a hole if this extent doesn't directly
534abbf9e8aSChristoph Hellwig 		 * follow the previous one.
535abbf9e8aSChristoph Hellwig 		 */
536abbf9e8aSChristoph Hellwig 		if (got.br_startoff > bno) {
537abbf9e8aSChristoph Hellwig 			xfs_getbmap_report_hole(ip, bmv, out, bmv_end, bno,
538abbf9e8aSChristoph Hellwig 					got.br_startoff);
539abbf9e8aSChristoph Hellwig 			if (xfs_getbmap_full(bmv))
540abbf9e8aSChristoph Hellwig 				break;
541abbf9e8aSChristoph Hellwig 		}
542abbf9e8aSChristoph Hellwig 
543abbf9e8aSChristoph Hellwig 		/*
544abbf9e8aSChristoph Hellwig 		 * In order to report shared extents accurately, we report each
545abbf9e8aSChristoph Hellwig 		 * distinct shared / unshared part of a single bmbt record with
546abbf9e8aSChristoph Hellwig 		 * an individual getbmapx record.
547abbf9e8aSChristoph Hellwig 		 */
548abbf9e8aSChristoph Hellwig 		bno = got.br_startoff + got.br_blockcount;
549abbf9e8aSChristoph Hellwig 		rec = got;
55068988114SDave Chinner 		do {
551abbf9e8aSChristoph Hellwig 			error = xfs_getbmap_report_one(ip, bmv, out, bmv_end,
552abbf9e8aSChristoph Hellwig 					&rec);
553abbf9e8aSChristoph Hellwig 			if (error || xfs_getbmap_full(bmv))
554abbf9e8aSChristoph Hellwig 				goto out_unlock_ilock;
555abbf9e8aSChristoph Hellwig 		} while (xfs_getbmap_next_rec(&rec, bno));
55668988114SDave Chinner 
557b2b1712aSChristoph Hellwig 		if (!xfs_iext_next_extent(ifp, &icur, &got)) {
558abbf9e8aSChristoph Hellwig 			xfs_fileoff_t	end = XFS_B_TO_FSB(mp, XFS_ISIZE(ip));
55968988114SDave Chinner 
5601bba82feSDarrick J. Wong 			if (bmv->bmv_entries > 0)
5611bba82feSDarrick J. Wong 				out[bmv->bmv_entries - 1].bmv_oflags |=
5621bba82feSDarrick J. Wong 								BMV_OF_LAST;
56368988114SDave Chinner 
564abbf9e8aSChristoph Hellwig 			if (whichfork != XFS_ATTR_FORK && bno < end &&
565abbf9e8aSChristoph Hellwig 			    !xfs_getbmap_full(bmv)) {
566abbf9e8aSChristoph Hellwig 				xfs_getbmap_report_hole(ip, bmv, out, bmv_end,
567abbf9e8aSChristoph Hellwig 						bno, end);
568abbf9e8aSChristoph Hellwig 			}
569abbf9e8aSChristoph Hellwig 			break;
57068988114SDave Chinner 		}
57168988114SDave Chinner 
572abbf9e8aSChristoph Hellwig 		if (bno >= first_bno + len)
573abbf9e8aSChristoph Hellwig 			break;
57468988114SDave Chinner 	}
57568988114SDave Chinner 
57668988114SDave Chinner out_unlock_ilock:
57701f4f327SChristoph Hellwig 	xfs_iunlock(ip, lock);
57868988114SDave Chinner out_unlock_iolock:
57968988114SDave Chinner 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
58068988114SDave Chinner 	return error;
58168988114SDave Chinner }
58268988114SDave Chinner 
58368988114SDave Chinner /*
584e2ac8363SChristoph Hellwig  * Dead simple method of punching delalyed allocation blocks from a range in
585e2ac8363SChristoph Hellwig  * the inode.  This will always punch out both the start and end blocks, even
586e2ac8363SChristoph Hellwig  * if the ranges only partially overlap them, so it is up to the caller to
587e2ac8363SChristoph Hellwig  * ensure that partial blocks are not passed in.
58868988114SDave Chinner  */
58968988114SDave Chinner int
59068988114SDave Chinner xfs_bmap_punch_delalloc_range(
59168988114SDave Chinner 	struct xfs_inode	*ip,
5927348b322SDave Chinner 	xfs_off_t		start_byte,
5937348b322SDave Chinner 	xfs_off_t		end_byte)
59468988114SDave Chinner {
5957348b322SDave Chinner 	struct xfs_mount	*mp = ip->i_mount;
596e2ac8363SChristoph Hellwig 	struct xfs_ifork	*ifp = &ip->i_df;
5977348b322SDave Chinner 	xfs_fileoff_t		start_fsb = XFS_B_TO_FSBT(mp, start_byte);
5987348b322SDave Chinner 	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, end_byte);
599e2ac8363SChristoph Hellwig 	struct xfs_bmbt_irec	got, del;
600e2ac8363SChristoph Hellwig 	struct xfs_iext_cursor	icur;
60168988114SDave Chinner 	int			error = 0;
60268988114SDave Chinner 
603b2197a36SChristoph Hellwig 	ASSERT(!xfs_need_iread_extents(ifp));
60468988114SDave Chinner 
6050065b541SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
606e2ac8363SChristoph Hellwig 	if (!xfs_iext_lookup_extent_before(ip, ifp, &end_fsb, &icur, &got))
607d4380177SChristoph Hellwig 		goto out_unlock;
608e2ac8363SChristoph Hellwig 
609e2ac8363SChristoph Hellwig 	while (got.br_startoff + got.br_blockcount > start_fsb) {
610e2ac8363SChristoph Hellwig 		del = got;
6117348b322SDave Chinner 		xfs_trim_extent(&del, start_fsb, end_fsb - start_fsb);
612e2ac8363SChristoph Hellwig 
613e2ac8363SChristoph Hellwig 		/*
614e2ac8363SChristoph Hellwig 		 * A delete can push the cursor forward. Step back to the
615e2ac8363SChristoph Hellwig 		 * previous extent on non-delalloc or extents outside the
616e2ac8363SChristoph Hellwig 		 * target range.
617e2ac8363SChristoph Hellwig 		 */
618e2ac8363SChristoph Hellwig 		if (!del.br_blockcount ||
619e2ac8363SChristoph Hellwig 		    !isnullstartblock(del.br_startblock)) {
620e2ac8363SChristoph Hellwig 			if (!xfs_iext_prev_extent(ifp, &icur, &got))
621e2ac8363SChristoph Hellwig 				break;
622e2ac8363SChristoph Hellwig 			continue;
623e2ac8363SChristoph Hellwig 		}
624e2ac8363SChristoph Hellwig 
625e2ac8363SChristoph Hellwig 		error = xfs_bmap_del_extent_delay(ip, XFS_DATA_FORK, &icur,
626e2ac8363SChristoph Hellwig 						  &got, &del);
627e2ac8363SChristoph Hellwig 		if (error || !xfs_iext_get_extent(ifp, &icur, &got))
628e2ac8363SChristoph Hellwig 			break;
629e2ac8363SChristoph Hellwig 	}
63068988114SDave Chinner 
631d4380177SChristoph Hellwig out_unlock:
632d4380177SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
63368988114SDave Chinner 	return error;
63468988114SDave Chinner }
635c24b5dfaSDave Chinner 
636c24b5dfaSDave Chinner /*
637c24b5dfaSDave Chinner  * Test whether it is appropriate to check an inode for and free post EOF
638c24b5dfaSDave Chinner  * blocks. The 'force' parameter determines whether we should also consider
639c24b5dfaSDave Chinner  * regular files that are marked preallocated or append-only.
640c24b5dfaSDave Chinner  */
641c24b5dfaSDave Chinner bool
6427d88329eSDarrick J. Wong xfs_can_free_eofblocks(
6437d88329eSDarrick J. Wong 	struct xfs_inode	*ip,
6447d88329eSDarrick J. Wong 	bool			force)
645c24b5dfaSDave Chinner {
6467d88329eSDarrick J. Wong 	struct xfs_bmbt_irec	imap;
6477d88329eSDarrick J. Wong 	struct xfs_mount	*mp = ip->i_mount;
6487d88329eSDarrick J. Wong 	xfs_fileoff_t		end_fsb;
6497d88329eSDarrick J. Wong 	xfs_fileoff_t		last_fsb;
6507d88329eSDarrick J. Wong 	int			nimaps = 1;
6517d88329eSDarrick J. Wong 	int			error;
6527d88329eSDarrick J. Wong 
6537d88329eSDarrick J. Wong 	/*
6547d88329eSDarrick J. Wong 	 * Caller must either hold the exclusive io lock; or be inactivating
6557d88329eSDarrick J. Wong 	 * the inode, which guarantees there are no other users of the inode.
6567d88329eSDarrick J. Wong 	 */
6577d88329eSDarrick J. Wong 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL) ||
6587d88329eSDarrick J. Wong 	       (VFS_I(ip)->i_state & I_FREEING));
6597d88329eSDarrick J. Wong 
660c24b5dfaSDave Chinner 	/* prealloc/delalloc exists only on regular files */
661c19b3b05SDave Chinner 	if (!S_ISREG(VFS_I(ip)->i_mode))
662c24b5dfaSDave Chinner 		return false;
663c24b5dfaSDave Chinner 
664c24b5dfaSDave Chinner 	/*
665c24b5dfaSDave Chinner 	 * Zero sized files with no cached pages and delalloc blocks will not
666c24b5dfaSDave Chinner 	 * have speculative prealloc/delalloc blocks to remove.
667c24b5dfaSDave Chinner 	 */
668c24b5dfaSDave Chinner 	if (VFS_I(ip)->i_size == 0 &&
6692667c6f9SDave Chinner 	    VFS_I(ip)->i_mapping->nrpages == 0 &&
670c24b5dfaSDave Chinner 	    ip->i_delayed_blks == 0)
671c24b5dfaSDave Chinner 		return false;
672c24b5dfaSDave Chinner 
673c24b5dfaSDave Chinner 	/* If we haven't read in the extent list, then don't do it now. */
674b2197a36SChristoph Hellwig 	if (xfs_need_iread_extents(&ip->i_df))
675c24b5dfaSDave Chinner 		return false;
676c24b5dfaSDave Chinner 
677c24b5dfaSDave Chinner 	/*
678c24b5dfaSDave Chinner 	 * Do not free real preallocated or append-only files unless the file
679c24b5dfaSDave Chinner 	 * has delalloc blocks and we are forced to remove them.
680c24b5dfaSDave Chinner 	 */
681db07349dSChristoph Hellwig 	if (ip->i_diflags & (XFS_DIFLAG_PREALLOC | XFS_DIFLAG_APPEND))
682c24b5dfaSDave Chinner 		if (!force || ip->i_delayed_blks == 0)
683c24b5dfaSDave Chinner 			return false;
684c24b5dfaSDave Chinner 
6857d88329eSDarrick J. Wong 	/*
6867d88329eSDarrick J. Wong 	 * Do not try to free post-EOF blocks if EOF is beyond the end of the
6877d88329eSDarrick J. Wong 	 * range supported by the page cache, because the truncation will loop
6887d88329eSDarrick J. Wong 	 * forever.
6897d88329eSDarrick J. Wong 	 */
6907d88329eSDarrick J. Wong 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_ISIZE(ip));
6918944c6fbSDarrick J. Wong 	if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1)
6928944c6fbSDarrick J. Wong 		end_fsb = roundup_64(end_fsb, mp->m_sb.sb_rextsize);
6937d88329eSDarrick J. Wong 	last_fsb = XFS_B_TO_FSB(mp, mp->m_super->s_maxbytes);
6947d88329eSDarrick J. Wong 	if (last_fsb <= end_fsb)
6957d88329eSDarrick J. Wong 		return false;
6967d88329eSDarrick J. Wong 
6977d88329eSDarrick J. Wong 	/*
6987d88329eSDarrick J. Wong 	 * Look up the mapping for the first block past EOF.  If we can't find
6997d88329eSDarrick J. Wong 	 * it, there's nothing to free.
7007d88329eSDarrick J. Wong 	 */
7017d88329eSDarrick J. Wong 	xfs_ilock(ip, XFS_ILOCK_SHARED);
7027d88329eSDarrick J. Wong 	error = xfs_bmapi_read(ip, end_fsb, last_fsb - end_fsb, &imap, &nimaps,
7037d88329eSDarrick J. Wong 			0);
7047d88329eSDarrick J. Wong 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
7057d88329eSDarrick J. Wong 	if (error || nimaps == 0)
7067d88329eSDarrick J. Wong 		return false;
7077d88329eSDarrick J. Wong 
7087d88329eSDarrick J. Wong 	/*
7097d88329eSDarrick J. Wong 	 * If there's a real mapping there or there are delayed allocation
7107d88329eSDarrick J. Wong 	 * reservations, then we have post-EOF blocks to try to free.
7117d88329eSDarrick J. Wong 	 */
7127d88329eSDarrick J. Wong 	return imap.br_startblock != HOLESTARTBLOCK || ip->i_delayed_blks;
713c24b5dfaSDave Chinner }
714c24b5dfaSDave Chinner 
715c24b5dfaSDave Chinner /*
7163b4683c2SBrian Foster  * This is called to free any blocks beyond eof. The caller must hold
7173b4683c2SBrian Foster  * IOLOCK_EXCL unless we are in the inode reclaim path and have the only
7183b4683c2SBrian Foster  * reference to the inode.
719c24b5dfaSDave Chinner  */
720c24b5dfaSDave Chinner int
721c24b5dfaSDave Chinner xfs_free_eofblocks(
722a36b9261SBrian Foster 	struct xfs_inode	*ip)
723c24b5dfaSDave Chinner {
724a36b9261SBrian Foster 	struct xfs_trans	*tp;
725a36b9261SBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
7267d88329eSDarrick J. Wong 	int			error;
727a36b9261SBrian Foster 
7287d88329eSDarrick J. Wong 	/* Attach the dquots to the inode up front. */
729c14cfccaSDarrick J. Wong 	error = xfs_qm_dqattach(ip);
730c24b5dfaSDave Chinner 	if (error)
731c24b5dfaSDave Chinner 		return error;
732c24b5dfaSDave Chinner 
7337d88329eSDarrick J. Wong 	/* Wait on dio to ensure i_size has settled. */
734e4229d6bSBrian Foster 	inode_dio_wait(VFS_I(ip));
735e4229d6bSBrian Foster 
7367d88329eSDarrick J. Wong 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
737c24b5dfaSDave Chinner 	if (error) {
73875c8c50fSDave Chinner 		ASSERT(xfs_is_shutdown(mp));
739c24b5dfaSDave Chinner 		return error;
740c24b5dfaSDave Chinner 	}
741c24b5dfaSDave Chinner 
742c24b5dfaSDave Chinner 	xfs_ilock(ip, XFS_ILOCK_EXCL);
743c24b5dfaSDave Chinner 	xfs_trans_ijoin(tp, ip, 0);
744c24b5dfaSDave Chinner 
745c24b5dfaSDave Chinner 	/*
7467d88329eSDarrick J. Wong 	 * Do not update the on-disk file size.  If we update the on-disk file
7477d88329eSDarrick J. Wong 	 * size and then the system crashes before the contents of the file are
7487d88329eSDarrick J. Wong 	 * flushed to disk then the files may be full of holes (ie NULL files
7497d88329eSDarrick J. Wong 	 * bug).
750c24b5dfaSDave Chinner 	 */
7514e529339SBrian Foster 	error = xfs_itruncate_extents_flags(&tp, ip, XFS_DATA_FORK,
7524e529339SBrian Foster 				XFS_ISIZE(ip), XFS_BMAPI_NODISCARD);
7537d88329eSDarrick J. Wong 	if (error)
7547d88329eSDarrick J. Wong 		goto err_cancel;
7557d88329eSDarrick J. Wong 
7567d88329eSDarrick J. Wong 	error = xfs_trans_commit(tp);
7577d88329eSDarrick J. Wong 	if (error)
7587d88329eSDarrick J. Wong 		goto out_unlock;
7597d88329eSDarrick J. Wong 
7607d88329eSDarrick J. Wong 	xfs_inode_clear_eofblocks_tag(ip);
7617d88329eSDarrick J. Wong 	goto out_unlock;
7627d88329eSDarrick J. Wong 
7637d88329eSDarrick J. Wong err_cancel:
764c24b5dfaSDave Chinner 	/*
765c24b5dfaSDave Chinner 	 * If we get an error at this point we simply don't
766c24b5dfaSDave Chinner 	 * bother truncating the file.
767c24b5dfaSDave Chinner 	 */
7684906e215SChristoph Hellwig 	xfs_trans_cancel(tp);
7697d88329eSDarrick J. Wong out_unlock:
770c24b5dfaSDave Chinner 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
771c24b5dfaSDave Chinner 	return error;
772c24b5dfaSDave Chinner }
773c24b5dfaSDave Chinner 
77483aee9e4SChristoph Hellwig int
775c24b5dfaSDave Chinner xfs_alloc_file_space(
77683aee9e4SChristoph Hellwig 	struct xfs_inode	*ip,
777c24b5dfaSDave Chinner 	xfs_off_t		offset,
7784d1b97f9SDarrick J. Wong 	xfs_off_t		len)
779c24b5dfaSDave Chinner {
780c24b5dfaSDave Chinner 	xfs_mount_t		*mp = ip->i_mount;
781c24b5dfaSDave Chinner 	xfs_off_t		count;
782c24b5dfaSDave Chinner 	xfs_filblks_t		allocated_fsb;
783c24b5dfaSDave Chinner 	xfs_filblks_t		allocatesize_fsb;
784c24b5dfaSDave Chinner 	xfs_extlen_t		extsz, temp;
785c24b5dfaSDave Chinner 	xfs_fileoff_t		startoffset_fsb;
786e093c4beSMax Reitz 	xfs_fileoff_t		endoffset_fsb;
787c24b5dfaSDave Chinner 	int			nimaps;
788c24b5dfaSDave Chinner 	int			rt;
789c24b5dfaSDave Chinner 	xfs_trans_t		*tp;
790c24b5dfaSDave Chinner 	xfs_bmbt_irec_t		imaps[1], *imapp;
791c24b5dfaSDave Chinner 	int			error;
792c24b5dfaSDave Chinner 
793c24b5dfaSDave Chinner 	trace_xfs_alloc_file_space(ip);
794c24b5dfaSDave Chinner 
79575c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
7962451337dSDave Chinner 		return -EIO;
797c24b5dfaSDave Chinner 
798c14cfccaSDarrick J. Wong 	error = xfs_qm_dqattach(ip);
799c24b5dfaSDave Chinner 	if (error)
800c24b5dfaSDave Chinner 		return error;
801c24b5dfaSDave Chinner 
802c24b5dfaSDave Chinner 	if (len <= 0)
8032451337dSDave Chinner 		return -EINVAL;
804c24b5dfaSDave Chinner 
805c24b5dfaSDave Chinner 	rt = XFS_IS_REALTIME_INODE(ip);
806c24b5dfaSDave Chinner 	extsz = xfs_get_extsz_hint(ip);
807c24b5dfaSDave Chinner 
808c24b5dfaSDave Chinner 	count = len;
809c24b5dfaSDave Chinner 	imapp = &imaps[0];
810c24b5dfaSDave Chinner 	nimaps = 1;
811c24b5dfaSDave Chinner 	startoffset_fsb	= XFS_B_TO_FSBT(mp, offset);
812e093c4beSMax Reitz 	endoffset_fsb = XFS_B_TO_FSB(mp, offset + count);
813e093c4beSMax Reitz 	allocatesize_fsb = endoffset_fsb - startoffset_fsb;
814c24b5dfaSDave Chinner 
815c24b5dfaSDave Chinner 	/*
816c24b5dfaSDave Chinner 	 * Allocate file space until done or until there is an error
817c24b5dfaSDave Chinner 	 */
818c24b5dfaSDave Chinner 	while (allocatesize_fsb && !error) {
819c24b5dfaSDave Chinner 		xfs_fileoff_t	s, e;
8203de4eb10SDarrick J. Wong 		unsigned int	dblocks, rblocks, resblks;
821c24b5dfaSDave Chinner 
822c24b5dfaSDave Chinner 		/*
823c24b5dfaSDave Chinner 		 * Determine space reservations for data/realtime.
824c24b5dfaSDave Chinner 		 */
825c24b5dfaSDave Chinner 		if (unlikely(extsz)) {
826c24b5dfaSDave Chinner 			s = startoffset_fsb;
827c24b5dfaSDave Chinner 			do_div(s, extsz);
828c24b5dfaSDave Chinner 			s *= extsz;
829c24b5dfaSDave Chinner 			e = startoffset_fsb + allocatesize_fsb;
8300703a8e1SDave Chinner 			div_u64_rem(startoffset_fsb, extsz, &temp);
8310703a8e1SDave Chinner 			if (temp)
832c24b5dfaSDave Chinner 				e += temp;
8330703a8e1SDave Chinner 			div_u64_rem(e, extsz, &temp);
8340703a8e1SDave Chinner 			if (temp)
835c24b5dfaSDave Chinner 				e += extsz - temp;
836c24b5dfaSDave Chinner 		} else {
837c24b5dfaSDave Chinner 			s = 0;
838c24b5dfaSDave Chinner 			e = allocatesize_fsb;
839c24b5dfaSDave Chinner 		}
840c24b5dfaSDave Chinner 
841c24b5dfaSDave Chinner 		/*
842c24b5dfaSDave Chinner 		 * The transaction reservation is limited to a 32-bit block
843c24b5dfaSDave Chinner 		 * count, hence we need to limit the number of blocks we are
844c24b5dfaSDave Chinner 		 * trying to reserve to avoid an overflow. We can't allocate
845c24b5dfaSDave Chinner 		 * more than @nimaps extents, and an extent is limited on disk
84695f0b95eSChandan Babu R 		 * to XFS_BMBT_MAX_EXTLEN (21 bits), so use that to enforce the
84795f0b95eSChandan Babu R 		 * limit.
848c24b5dfaSDave Chinner 		 */
84995f0b95eSChandan Babu R 		resblks = min_t(xfs_fileoff_t, (e - s),
85095f0b95eSChandan Babu R 				(XFS_MAX_BMBT_EXTLEN * nimaps));
851c24b5dfaSDave Chinner 		if (unlikely(rt)) {
85202b7ee4eSDarrick J. Wong 			dblocks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
85302b7ee4eSDarrick J. Wong 			rblocks = resblks;
854c24b5dfaSDave Chinner 		} else {
85502b7ee4eSDarrick J. Wong 			dblocks = XFS_DIOSTRAT_SPACE_RES(mp, resblks);
85602b7ee4eSDarrick J. Wong 			rblocks = 0;
857c24b5dfaSDave Chinner 		}
858c24b5dfaSDave Chinner 
8593de4eb10SDarrick J. Wong 		error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write,
8603de4eb10SDarrick J. Wong 				dblocks, rblocks, false, &tp);
861c24b5dfaSDave Chinner 		if (error)
8623de4eb10SDarrick J. Wong 			break;
863c24b5dfaSDave Chinner 
864727e1acdSChandan Babu R 		error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
865727e1acdSChandan Babu R 				XFS_IEXT_ADD_NOSPLIT_CNT);
8664f86bb4bSChandan Babu R 		if (error == -EFBIG)
8674f86bb4bSChandan Babu R 			error = xfs_iext_count_upgrade(tp, ip,
8684f86bb4bSChandan Babu R 					XFS_IEXT_ADD_NOSPLIT_CNT);
869727e1acdSChandan Babu R 		if (error)
87035b11010SDarrick J. Wong 			goto error;
871727e1acdSChandan Babu R 
872c24b5dfaSDave Chinner 		error = xfs_bmapi_write(tp, ip, startoffset_fsb,
8734d1b97f9SDarrick J. Wong 				allocatesize_fsb, XFS_BMAPI_PREALLOC, 0, imapp,
874da781e64SBrian Foster 				&nimaps);
875f6106efaSEric Sandeen 		if (error)
87635b11010SDarrick J. Wong 			goto error;
877c24b5dfaSDave Chinner 
8780b02c8c0SDave Chinner 		ip->i_diflags |= XFS_DIFLAG_PREALLOC;
8790b02c8c0SDave Chinner 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
8800b02c8c0SDave Chinner 
88170393313SChristoph Hellwig 		error = xfs_trans_commit(tp);
882c24b5dfaSDave Chinner 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
883f6106efaSEric Sandeen 		if (error)
884c24b5dfaSDave Chinner 			break;
885c24b5dfaSDave Chinner 
886c24b5dfaSDave Chinner 		allocated_fsb = imapp->br_blockcount;
887c24b5dfaSDave Chinner 
888c24b5dfaSDave Chinner 		if (nimaps == 0) {
8892451337dSDave Chinner 			error = -ENOSPC;
890c24b5dfaSDave Chinner 			break;
891c24b5dfaSDave Chinner 		}
892c24b5dfaSDave Chinner 
893c24b5dfaSDave Chinner 		startoffset_fsb += allocated_fsb;
894c24b5dfaSDave Chinner 		allocatesize_fsb -= allocated_fsb;
895c24b5dfaSDave Chinner 	}
896c24b5dfaSDave Chinner 
897c24b5dfaSDave Chinner 	return error;
898c24b5dfaSDave Chinner 
89935b11010SDarrick J. Wong error:
9004906e215SChristoph Hellwig 	xfs_trans_cancel(tp);
901c24b5dfaSDave Chinner 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
902c24b5dfaSDave Chinner 	return error;
903c24b5dfaSDave Chinner }
904c24b5dfaSDave Chinner 
905bdb0d04fSChristoph Hellwig static int
906bdb0d04fSChristoph Hellwig xfs_unmap_extent(
90783aee9e4SChristoph Hellwig 	struct xfs_inode	*ip,
908bdb0d04fSChristoph Hellwig 	xfs_fileoff_t		startoffset_fsb,
909bdb0d04fSChristoph Hellwig 	xfs_filblks_t		len_fsb,
910bdb0d04fSChristoph Hellwig 	int			*done)
911c24b5dfaSDave Chinner {
912bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
913bdb0d04fSChristoph Hellwig 	struct xfs_trans	*tp;
914bdb0d04fSChristoph Hellwig 	uint			resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
915bdb0d04fSChristoph Hellwig 	int			error;
916c24b5dfaSDave Chinner 
9173de4eb10SDarrick J. Wong 	error = xfs_trans_alloc_inode(ip, &M_RES(mp)->tr_write, resblks, 0,
9183a1af6c3SDarrick J. Wong 			false, &tp);
919bdb0d04fSChristoph Hellwig 	if (error)
9203a1af6c3SDarrick J. Wong 		return error;
921c24b5dfaSDave Chinner 
92285ef08b5SChandan Babu R 	error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
92385ef08b5SChandan Babu R 			XFS_IEXT_PUNCH_HOLE_CNT);
9244f86bb4bSChandan Babu R 	if (error == -EFBIG)
9254f86bb4bSChandan Babu R 		error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT);
92685ef08b5SChandan Babu R 	if (error)
92785ef08b5SChandan Babu R 		goto out_trans_cancel;
92885ef08b5SChandan Babu R 
9292af52842SBrian Foster 	error = xfs_bunmapi(tp, ip, startoffset_fsb, len_fsb, 0, 2, done);
930bdb0d04fSChristoph Hellwig 	if (error)
931c8eac49eSBrian Foster 		goto out_trans_cancel;
932bdb0d04fSChristoph Hellwig 
933bdb0d04fSChristoph Hellwig 	error = xfs_trans_commit(tp);
934bdb0d04fSChristoph Hellwig out_unlock:
935bdb0d04fSChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
936bdb0d04fSChristoph Hellwig 	return error;
937bdb0d04fSChristoph Hellwig 
938bdb0d04fSChristoph Hellwig out_trans_cancel:
939bdb0d04fSChristoph Hellwig 	xfs_trans_cancel(tp);
940bdb0d04fSChristoph Hellwig 	goto out_unlock;
941bdb0d04fSChristoph Hellwig }
942bdb0d04fSChristoph Hellwig 
943249bd908SDave Chinner /* Caller must first wait for the completion of any pending DIOs if required. */
9442c307174SDave Chinner int
945bdb0d04fSChristoph Hellwig xfs_flush_unmap_range(
946bdb0d04fSChristoph Hellwig 	struct xfs_inode	*ip,
947bdb0d04fSChristoph Hellwig 	xfs_off_t		offset,
948bdb0d04fSChristoph Hellwig 	xfs_off_t		len)
949bdb0d04fSChristoph Hellwig {
950bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
951bdb0d04fSChristoph Hellwig 	struct inode		*inode = VFS_I(ip);
952bdb0d04fSChristoph Hellwig 	xfs_off_t		rounding, start, end;
953bdb0d04fSChristoph Hellwig 	int			error;
954bdb0d04fSChristoph Hellwig 
95520bd8e63SDarrick J. Wong 	rounding = max_t(xfs_off_t, mp->m_sb.sb_blocksize, PAGE_SIZE);
956bdb0d04fSChristoph Hellwig 	start = round_down(offset, rounding);
957bdb0d04fSChristoph Hellwig 	end = round_up(offset + len, rounding) - 1;
958bdb0d04fSChristoph Hellwig 
959bdb0d04fSChristoph Hellwig 	error = filemap_write_and_wait_range(inode->i_mapping, start, end);
960c24b5dfaSDave Chinner 	if (error)
961c24b5dfaSDave Chinner 		return error;
962bdb0d04fSChristoph Hellwig 	truncate_pagecache_range(inode, start, end);
963bdb0d04fSChristoph Hellwig 	return 0;
964c24b5dfaSDave Chinner }
965c24b5dfaSDave Chinner 
966c24b5dfaSDave Chinner int
967c24b5dfaSDave Chinner xfs_free_file_space(
968c24b5dfaSDave Chinner 	struct xfs_inode	*ip,
969c24b5dfaSDave Chinner 	xfs_off_t		offset,
970c24b5dfaSDave Chinner 	xfs_off_t		len)
971c24b5dfaSDave Chinner {
972bdb0d04fSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
973c24b5dfaSDave Chinner 	xfs_fileoff_t		startoffset_fsb;
974bdb0d04fSChristoph Hellwig 	xfs_fileoff_t		endoffset_fsb;
9753c2bdc91SChristoph Hellwig 	int			done = 0, error;
976c24b5dfaSDave Chinner 
977c24b5dfaSDave Chinner 	trace_xfs_free_file_space(ip);
978c24b5dfaSDave Chinner 
979c14cfccaSDarrick J. Wong 	error = xfs_qm_dqattach(ip);
980c24b5dfaSDave Chinner 	if (error)
981c24b5dfaSDave Chinner 		return error;
982c24b5dfaSDave Chinner 
983c24b5dfaSDave Chinner 	if (len <= 0)	/* if nothing being freed */
984bdb0d04fSChristoph Hellwig 		return 0;
985bdb0d04fSChristoph Hellwig 
986c24b5dfaSDave Chinner 	startoffset_fsb = XFS_B_TO_FSB(mp, offset);
987c24b5dfaSDave Chinner 	endoffset_fsb = XFS_B_TO_FSBT(mp, offset + len);
988c24b5dfaSDave Chinner 
989fe341eb1SDarrick J. Wong 	/* We can only free complete realtime extents. */
99025219dbfSDarrick J. Wong 	if (XFS_IS_REALTIME_INODE(ip) && mp->m_sb.sb_rextsize > 1) {
99125219dbfSDarrick J. Wong 		startoffset_fsb = roundup_64(startoffset_fsb,
99225219dbfSDarrick J. Wong 					     mp->m_sb.sb_rextsize);
99325219dbfSDarrick J. Wong 		endoffset_fsb = rounddown_64(endoffset_fsb,
99425219dbfSDarrick J. Wong 					     mp->m_sb.sb_rextsize);
995fe341eb1SDarrick J. Wong 	}
996fe341eb1SDarrick J. Wong 
997bdb0d04fSChristoph Hellwig 	/*
998daa79baeSChristoph Hellwig 	 * Need to zero the stuff we're not freeing, on disk.
999bdb0d04fSChristoph Hellwig 	 */
10003c2bdc91SChristoph Hellwig 	if (endoffset_fsb > startoffset_fsb) {
10013c2bdc91SChristoph Hellwig 		while (!done) {
1002bdb0d04fSChristoph Hellwig 			error = xfs_unmap_extent(ip, startoffset_fsb,
1003bdb0d04fSChristoph Hellwig 					endoffset_fsb - startoffset_fsb, &done);
10043c2bdc91SChristoph Hellwig 			if (error)
10053c2bdc91SChristoph Hellwig 				return error;
10063c2bdc91SChristoph Hellwig 		}
1007c24b5dfaSDave Chinner 	}
1008c24b5dfaSDave Chinner 
10093c2bdc91SChristoph Hellwig 	/*
10103c2bdc91SChristoph Hellwig 	 * Now that we've unmap all full blocks we'll have to zero out any
1011f1ba5fafSShiyang Ruan 	 * partial block at the beginning and/or end.  xfs_zero_range is smart
1012f5c54717SChristoph Hellwig 	 * enough to skip any holes, including those we just created, but we
1013f5c54717SChristoph Hellwig 	 * must take care not to zero beyond EOF and enlarge i_size.
10143c2bdc91SChristoph Hellwig 	 */
10153dd09d5aSCalvin Owens 	if (offset >= XFS_ISIZE(ip))
10163dd09d5aSCalvin Owens 		return 0;
10173dd09d5aSCalvin Owens 	if (offset + len > XFS_ISIZE(ip))
10183dd09d5aSCalvin Owens 		len = XFS_ISIZE(ip) - offset;
1019f1ba5fafSShiyang Ruan 	error = xfs_zero_range(ip, offset, len, NULL);
1020e53c4b59SDarrick J. Wong 	if (error)
1021e53c4b59SDarrick J. Wong 		return error;
1022e53c4b59SDarrick J. Wong 
1023e53c4b59SDarrick J. Wong 	/*
1024e53c4b59SDarrick J. Wong 	 * If we zeroed right up to EOF and EOF straddles a page boundary we
1025e53c4b59SDarrick J. Wong 	 * must make sure that the post-EOF area is also zeroed because the
1026f1ba5fafSShiyang Ruan 	 * page could be mmap'd and xfs_zero_range doesn't do that for us.
1027e53c4b59SDarrick J. Wong 	 * Writeback of the eof page will do this, albeit clumsily.
1028e53c4b59SDarrick J. Wong 	 */
1029a579121fSDarrick J. Wong 	if (offset + len >= XFS_ISIZE(ip) && offset_in_page(offset + len) > 0) {
1030e53c4b59SDarrick J. Wong 		error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping,
1031a579121fSDarrick J. Wong 				round_down(offset + len, PAGE_SIZE), LLONG_MAX);
1032e53c4b59SDarrick J. Wong 	}
1033e53c4b59SDarrick J. Wong 
1034e53c4b59SDarrick J. Wong 	return error;
1035c24b5dfaSDave Chinner }
1036c24b5dfaSDave Chinner 
103772c1a739Skbuild test robot static int
10384ed36c6bSChristoph Hellwig xfs_prepare_shift(
1039e1d8fb88SNamjae Jeon 	struct xfs_inode	*ip,
10404ed36c6bSChristoph Hellwig 	loff_t			offset)
1041e1d8fb88SNamjae Jeon {
1042d0c22041SBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
1043e1d8fb88SNamjae Jeon 	int			error;
1044f71721d0SBrian Foster 
1045f71721d0SBrian Foster 	/*
1046f71721d0SBrian Foster 	 * Trim eofblocks to avoid shifting uninitialized post-eof preallocation
1047f71721d0SBrian Foster 	 * into the accessible region of the file.
1048f71721d0SBrian Foster 	 */
104941b9d726SBrian Foster 	if (xfs_can_free_eofblocks(ip, true)) {
1050a36b9261SBrian Foster 		error = xfs_free_eofblocks(ip);
105141b9d726SBrian Foster 		if (error)
105241b9d726SBrian Foster 			return error;
105341b9d726SBrian Foster 	}
10541669a8caSDave Chinner 
1055f71721d0SBrian Foster 	/*
1056d0c22041SBrian Foster 	 * Shift operations must stabilize the start block offset boundary along
1057d0c22041SBrian Foster 	 * with the full range of the operation. If we don't, a COW writeback
1058d0c22041SBrian Foster 	 * completion could race with an insert, front merge with the start
1059d0c22041SBrian Foster 	 * extent (after split) during the shift and corrupt the file. Start
1060d0c22041SBrian Foster 	 * with the block just prior to the start to stabilize the boundary.
1061d0c22041SBrian Foster 	 */
106220bd8e63SDarrick J. Wong 	offset = round_down(offset, mp->m_sb.sb_blocksize);
1063d0c22041SBrian Foster 	if (offset)
106420bd8e63SDarrick J. Wong 		offset -= mp->m_sb.sb_blocksize;
1065d0c22041SBrian Foster 
1066d0c22041SBrian Foster 	/*
1067f71721d0SBrian Foster 	 * Writeback and invalidate cache for the remainder of the file as we're
1068a904b1caSNamjae Jeon 	 * about to shift down every extent from offset to EOF.
1069f71721d0SBrian Foster 	 */
10707f9f71beSDave Chinner 	error = xfs_flush_unmap_range(ip, offset, XFS_ISIZE(ip));
10711749d1eaSBrian Foster 	if (error)
10721749d1eaSBrian Foster 		return error;
1073e1d8fb88SNamjae Jeon 
1074a904b1caSNamjae Jeon 	/*
10753af423b0SDarrick J. Wong 	 * Clean out anything hanging around in the cow fork now that
10763af423b0SDarrick J. Wong 	 * we've flushed all the dirty data out to disk to avoid having
10773af423b0SDarrick J. Wong 	 * CoW extents at the wrong offsets.
10783af423b0SDarrick J. Wong 	 */
107951d62690SChristoph Hellwig 	if (xfs_inode_has_cow_data(ip)) {
10803af423b0SDarrick J. Wong 		error = xfs_reflink_cancel_cow_range(ip, offset, NULLFILEOFF,
10813af423b0SDarrick J. Wong 				true);
10823af423b0SDarrick J. Wong 		if (error)
10833af423b0SDarrick J. Wong 			return error;
10843af423b0SDarrick J. Wong 	}
10853af423b0SDarrick J. Wong 
10864ed36c6bSChristoph Hellwig 	return 0;
1087e1d8fb88SNamjae Jeon }
1088e1d8fb88SNamjae Jeon 
1089e1d8fb88SNamjae Jeon /*
1090a904b1caSNamjae Jeon  * xfs_collapse_file_space()
1091a904b1caSNamjae Jeon  *	This routine frees disk space and shift extent for the given file.
1092a904b1caSNamjae Jeon  *	The first thing we do is to free data blocks in the specified range
1093a904b1caSNamjae Jeon  *	by calling xfs_free_file_space(). It would also sync dirty data
1094a904b1caSNamjae Jeon  *	and invalidate page cache over the region on which collapse range
1095a904b1caSNamjae Jeon  *	is working. And Shift extent records to the left to cover a hole.
1096a904b1caSNamjae Jeon  * RETURNS:
1097a904b1caSNamjae Jeon  *	0 on success
1098a904b1caSNamjae Jeon  *	errno on error
1099a904b1caSNamjae Jeon  *
1100a904b1caSNamjae Jeon  */
1101a904b1caSNamjae Jeon int
1102a904b1caSNamjae Jeon xfs_collapse_file_space(
1103a904b1caSNamjae Jeon 	struct xfs_inode	*ip,
1104a904b1caSNamjae Jeon 	xfs_off_t		offset,
1105a904b1caSNamjae Jeon 	xfs_off_t		len)
1106a904b1caSNamjae Jeon {
11074ed36c6bSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
11084ed36c6bSChristoph Hellwig 	struct xfs_trans	*tp;
1109a904b1caSNamjae Jeon 	int			error;
11104ed36c6bSChristoph Hellwig 	xfs_fileoff_t		next_fsb = XFS_B_TO_FSB(mp, offset + len);
11114ed36c6bSChristoph Hellwig 	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
1112ecfea3f0SChristoph Hellwig 	bool			done = false;
1113a904b1caSNamjae Jeon 
1114a904b1caSNamjae Jeon 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
11159ad1a23aSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
11169ad1a23aSChristoph Hellwig 
1117a904b1caSNamjae Jeon 	trace_xfs_collapse_file_space(ip);
1118a904b1caSNamjae Jeon 
1119a904b1caSNamjae Jeon 	error = xfs_free_file_space(ip, offset, len);
1120a904b1caSNamjae Jeon 	if (error)
1121a904b1caSNamjae Jeon 		return error;
1122a904b1caSNamjae Jeon 
11234ed36c6bSChristoph Hellwig 	error = xfs_prepare_shift(ip, offset);
11244ed36c6bSChristoph Hellwig 	if (error)
11254ed36c6bSChristoph Hellwig 		return error;
11264ed36c6bSChristoph Hellwig 
1127211683b2SBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
11284ed36c6bSChristoph Hellwig 	if (error)
1129211683b2SBrian Foster 		return error;
11304ed36c6bSChristoph Hellwig 
11314ed36c6bSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
1132211683b2SBrian Foster 	xfs_trans_ijoin(tp, ip, 0);
11334ed36c6bSChristoph Hellwig 
1134211683b2SBrian Foster 	while (!done) {
1135ecfea3f0SChristoph Hellwig 		error = xfs_bmap_collapse_extents(tp, ip, &next_fsb, shift_fsb,
1136333f950cSBrian Foster 				&done);
11374ed36c6bSChristoph Hellwig 		if (error)
1138c8eac49eSBrian Foster 			goto out_trans_cancel;
1139211683b2SBrian Foster 		if (done)
1140211683b2SBrian Foster 			break;
11414ed36c6bSChristoph Hellwig 
1142211683b2SBrian Foster 		/* finish any deferred frees and roll the transaction */
1143211683b2SBrian Foster 		error = xfs_defer_finish(&tp);
1144211683b2SBrian Foster 		if (error)
1145211683b2SBrian Foster 			goto out_trans_cancel;
11464ed36c6bSChristoph Hellwig 	}
11474ed36c6bSChristoph Hellwig 
1148211683b2SBrian Foster 	error = xfs_trans_commit(tp);
1149211683b2SBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
11504ed36c6bSChristoph Hellwig 	return error;
11514ed36c6bSChristoph Hellwig 
11524ed36c6bSChristoph Hellwig out_trans_cancel:
11534ed36c6bSChristoph Hellwig 	xfs_trans_cancel(tp);
1154211683b2SBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
11554ed36c6bSChristoph Hellwig 	return error;
1156a904b1caSNamjae Jeon }
1157a904b1caSNamjae Jeon 
1158a904b1caSNamjae Jeon /*
1159a904b1caSNamjae Jeon  * xfs_insert_file_space()
1160a904b1caSNamjae Jeon  *	This routine create hole space by shifting extents for the given file.
1161a904b1caSNamjae Jeon  *	The first thing we do is to sync dirty data and invalidate page cache
1162a904b1caSNamjae Jeon  *	over the region on which insert range is working. And split an extent
1163a904b1caSNamjae Jeon  *	to two extents at given offset by calling xfs_bmap_split_extent.
1164a904b1caSNamjae Jeon  *	And shift all extent records which are laying between [offset,
1165a904b1caSNamjae Jeon  *	last allocated extent] to the right to reserve hole range.
1166a904b1caSNamjae Jeon  * RETURNS:
1167a904b1caSNamjae Jeon  *	0 on success
1168a904b1caSNamjae Jeon  *	errno on error
1169a904b1caSNamjae Jeon  */
1170a904b1caSNamjae Jeon int
1171a904b1caSNamjae Jeon xfs_insert_file_space(
1172a904b1caSNamjae Jeon 	struct xfs_inode	*ip,
1173a904b1caSNamjae Jeon 	loff_t			offset,
1174a904b1caSNamjae Jeon 	loff_t			len)
1175a904b1caSNamjae Jeon {
11764ed36c6bSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
11774ed36c6bSChristoph Hellwig 	struct xfs_trans	*tp;
11784ed36c6bSChristoph Hellwig 	int			error;
11794ed36c6bSChristoph Hellwig 	xfs_fileoff_t		stop_fsb = XFS_B_TO_FSB(mp, offset);
11804ed36c6bSChristoph Hellwig 	xfs_fileoff_t		next_fsb = NULLFSBLOCK;
11814ed36c6bSChristoph Hellwig 	xfs_fileoff_t		shift_fsb = XFS_B_TO_FSB(mp, len);
1182ecfea3f0SChristoph Hellwig 	bool			done = false;
11834ed36c6bSChristoph Hellwig 
1184a904b1caSNamjae Jeon 	ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
11859ad1a23aSChristoph Hellwig 	ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
11869ad1a23aSChristoph Hellwig 
1187a904b1caSNamjae Jeon 	trace_xfs_insert_file_space(ip);
1188a904b1caSNamjae Jeon 
1189f62cb48eSDarrick J. Wong 	error = xfs_bmap_can_insert_extents(ip, stop_fsb, shift_fsb);
1190f62cb48eSDarrick J. Wong 	if (error)
1191f62cb48eSDarrick J. Wong 		return error;
1192f62cb48eSDarrick J. Wong 
11934ed36c6bSChristoph Hellwig 	error = xfs_prepare_shift(ip, offset);
11944ed36c6bSChristoph Hellwig 	if (error)
11954ed36c6bSChristoph Hellwig 		return error;
11964ed36c6bSChristoph Hellwig 
1197b73df17eSBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write,
1198b73df17eSBrian Foster 			XFS_DIOSTRAT_SPACE_RES(mp, 0), 0, 0, &tp);
1199b73df17eSBrian Foster 	if (error)
1200b73df17eSBrian Foster 		return error;
1201b73df17eSBrian Foster 
1202b73df17eSBrian Foster 	xfs_ilock(ip, XFS_ILOCK_EXCL);
1203dd87f87dSBrian Foster 	xfs_trans_ijoin(tp, ip, 0);
1204b73df17eSBrian Foster 
120585ef08b5SChandan Babu R 	error = xfs_iext_count_may_overflow(ip, XFS_DATA_FORK,
120685ef08b5SChandan Babu R 			XFS_IEXT_PUNCH_HOLE_CNT);
12074f86bb4bSChandan Babu R 	if (error == -EFBIG)
12084f86bb4bSChandan Babu R 		error = xfs_iext_count_upgrade(tp, ip, XFS_IEXT_PUNCH_HOLE_CNT);
120985ef08b5SChandan Babu R 	if (error)
121085ef08b5SChandan Babu R 		goto out_trans_cancel;
121185ef08b5SChandan Babu R 
1212dd87f87dSBrian Foster 	/*
1213dd87f87dSBrian Foster 	 * The extent shifting code works on extent granularity. So, if stop_fsb
1214dd87f87dSBrian Foster 	 * is not the starting block of extent, we need to split the extent at
1215dd87f87dSBrian Foster 	 * stop_fsb.
1216dd87f87dSBrian Foster 	 */
1217b73df17eSBrian Foster 	error = xfs_bmap_split_extent(tp, ip, stop_fsb);
1218b73df17eSBrian Foster 	if (error)
1219b73df17eSBrian Foster 		goto out_trans_cancel;
1220b73df17eSBrian Foster 
1221dd87f87dSBrian Foster 	do {
12229c516e0eSBrian Foster 		error = xfs_defer_finish(&tp);
12234ed36c6bSChristoph Hellwig 		if (error)
1224dd87f87dSBrian Foster 			goto out_trans_cancel;
12254ed36c6bSChristoph Hellwig 
1226ecfea3f0SChristoph Hellwig 		error = xfs_bmap_insert_extents(tp, ip, &next_fsb, shift_fsb,
1227333f950cSBrian Foster 				&done, stop_fsb);
12284ed36c6bSChristoph Hellwig 		if (error)
1229c8eac49eSBrian Foster 			goto out_trans_cancel;
1230dd87f87dSBrian Foster 	} while (!done);
12314ed36c6bSChristoph Hellwig 
12324ed36c6bSChristoph Hellwig 	error = xfs_trans_commit(tp);
1233dd87f87dSBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
12344ed36c6bSChristoph Hellwig 	return error;
12354ed36c6bSChristoph Hellwig 
1236c8eac49eSBrian Foster out_trans_cancel:
12374ed36c6bSChristoph Hellwig 	xfs_trans_cancel(tp);
1238dd87f87dSBrian Foster 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
12394ed36c6bSChristoph Hellwig 	return error;
1240a904b1caSNamjae Jeon }
1241a904b1caSNamjae Jeon 
1242a904b1caSNamjae Jeon /*
1243a133d952SDave Chinner  * We need to check that the format of the data fork in the temporary inode is
1244a133d952SDave Chinner  * valid for the target inode before doing the swap. This is not a problem with
1245a133d952SDave Chinner  * attr1 because of the fixed fork offset, but attr2 has a dynamically sized
1246a133d952SDave Chinner  * data fork depending on the space the attribute fork is taking so we can get
1247a133d952SDave Chinner  * invalid formats on the target inode.
1248a133d952SDave Chinner  *
1249a133d952SDave Chinner  * E.g. target has space for 7 extents in extent format, temp inode only has
1250a133d952SDave Chinner  * space for 6.  If we defragment down to 7 extents, then the tmp format is a
1251a133d952SDave Chinner  * btree, but when swapped it needs to be in extent format. Hence we can't just
1252a133d952SDave Chinner  * blindly swap data forks on attr2 filesystems.
1253a133d952SDave Chinner  *
1254a133d952SDave Chinner  * Note that we check the swap in both directions so that we don't end up with
1255a133d952SDave Chinner  * a corrupt temporary inode, either.
1256a133d952SDave Chinner  *
1257a133d952SDave Chinner  * Note that fixing the way xfs_fsr sets up the attribute fork in the source
1258a133d952SDave Chinner  * inode will prevent this situation from occurring, so all we do here is
1259a133d952SDave Chinner  * reject and log the attempt. basically we are putting the responsibility on
1260a133d952SDave Chinner  * userspace to get this right.
1261a133d952SDave Chinner  */
1262a133d952SDave Chinner static int
1263a133d952SDave Chinner xfs_swap_extents_check_format(
1264e06259aaSDarrick J. Wong 	struct xfs_inode	*ip,	/* target inode */
1265e06259aaSDarrick J. Wong 	struct xfs_inode	*tip)	/* tmp inode */
1266a133d952SDave Chinner {
1267f7e67b20SChristoph Hellwig 	struct xfs_ifork	*ifp = &ip->i_df;
1268f7e67b20SChristoph Hellwig 	struct xfs_ifork	*tifp = &tip->i_df;
1269a133d952SDave Chinner 
1270765d3c39SDarrick J. Wong 	/* User/group/project quota ids must match if quotas are enforced. */
1271765d3c39SDarrick J. Wong 	if (XFS_IS_QUOTA_ON(ip->i_mount) &&
1272765d3c39SDarrick J. Wong 	    (!uid_eq(VFS_I(ip)->i_uid, VFS_I(tip)->i_uid) ||
1273765d3c39SDarrick J. Wong 	     !gid_eq(VFS_I(ip)->i_gid, VFS_I(tip)->i_gid) ||
1274ceaf603cSChristoph Hellwig 	     ip->i_projid != tip->i_projid))
1275765d3c39SDarrick J. Wong 		return -EINVAL;
1276765d3c39SDarrick J. Wong 
1277a133d952SDave Chinner 	/* Should never get a local format */
1278f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_LOCAL ||
1279f7e67b20SChristoph Hellwig 	    tifp->if_format == XFS_DINODE_FMT_LOCAL)
12802451337dSDave Chinner 		return -EINVAL;
1281a133d952SDave Chinner 
1282a133d952SDave Chinner 	/*
1283a133d952SDave Chinner 	 * if the target inode has less extents that then temporary inode then
1284a133d952SDave Chinner 	 * why did userspace call us?
1285a133d952SDave Chinner 	 */
1286f7e67b20SChristoph Hellwig 	if (ifp->if_nextents < tifp->if_nextents)
12872451337dSDave Chinner 		return -EINVAL;
1288a133d952SDave Chinner 
1289a133d952SDave Chinner 	/*
12901f08af52SDarrick J. Wong 	 * If we have to use the (expensive) rmap swap method, we can
12911f08af52SDarrick J. Wong 	 * handle any number of extents and any format.
12921f08af52SDarrick J. Wong 	 */
129338c26bfdSDave Chinner 	if (xfs_has_rmapbt(ip->i_mount))
12941f08af52SDarrick J. Wong 		return 0;
12951f08af52SDarrick J. Wong 
12961f08af52SDarrick J. Wong 	/*
1297a133d952SDave Chinner 	 * if the target inode is in extent form and the temp inode is in btree
1298a133d952SDave Chinner 	 * form then we will end up with the target inode in the wrong format
1299a133d952SDave Chinner 	 * as we already know there are less extents in the temp inode.
1300a133d952SDave Chinner 	 */
1301f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1302f7e67b20SChristoph Hellwig 	    tifp->if_format == XFS_DINODE_FMT_BTREE)
13032451337dSDave Chinner 		return -EINVAL;
1304a133d952SDave Chinner 
1305a133d952SDave Chinner 	/* Check temp in extent form to max in target */
1306f7e67b20SChristoph Hellwig 	if (tifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1307f7e67b20SChristoph Hellwig 	    tifp->if_nextents > XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
13082451337dSDave Chinner 		return -EINVAL;
1309a133d952SDave Chinner 
1310a133d952SDave Chinner 	/* Check target in extent form to max in temp */
1311f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_EXTENTS &&
1312f7e67b20SChristoph Hellwig 	    ifp->if_nextents > XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
13132451337dSDave Chinner 		return -EINVAL;
1314a133d952SDave Chinner 
1315a133d952SDave Chinner 	/*
1316a133d952SDave Chinner 	 * If we are in a btree format, check that the temp root block will fit
1317a133d952SDave Chinner 	 * in the target and that it has enough extents to be in btree format
1318a133d952SDave Chinner 	 * in the target.
1319a133d952SDave Chinner 	 *
1320a133d952SDave Chinner 	 * Note that we have to be careful to allow btree->extent conversions
1321a133d952SDave Chinner 	 * (a common defrag case) which will occur when the temp inode is in
1322a133d952SDave Chinner 	 * extent format...
1323a133d952SDave Chinner 	 */
1324f7e67b20SChristoph Hellwig 	if (tifp->if_format == XFS_DINODE_FMT_BTREE) {
1325932b42c6SDarrick J. Wong 		if (xfs_inode_has_attr_fork(ip) &&
1326c01147d9SDarrick J. Wong 		    XFS_BMAP_BMDR_SPACE(tifp->if_broot) > xfs_inode_fork_boff(ip))
13272451337dSDave Chinner 			return -EINVAL;
1328f7e67b20SChristoph Hellwig 		if (tifp->if_nextents <= XFS_IFORK_MAXEXT(ip, XFS_DATA_FORK))
13292451337dSDave Chinner 			return -EINVAL;
1330a133d952SDave Chinner 	}
1331a133d952SDave Chinner 
1332a133d952SDave Chinner 	/* Reciprocal target->temp btree format checks */
1333f7e67b20SChristoph Hellwig 	if (ifp->if_format == XFS_DINODE_FMT_BTREE) {
1334932b42c6SDarrick J. Wong 		if (xfs_inode_has_attr_fork(tip) &&
1335c01147d9SDarrick J. Wong 		    XFS_BMAP_BMDR_SPACE(ip->i_df.if_broot) > xfs_inode_fork_boff(tip))
13362451337dSDave Chinner 			return -EINVAL;
1337f7e67b20SChristoph Hellwig 		if (ifp->if_nextents <= XFS_IFORK_MAXEXT(tip, XFS_DATA_FORK))
13382451337dSDave Chinner 			return -EINVAL;
1339a133d952SDave Chinner 	}
1340a133d952SDave Chinner 
1341a133d952SDave Chinner 	return 0;
1342a133d952SDave Chinner }
1343a133d952SDave Chinner 
13447abbb8f9SDave Chinner static int
13454ef897a2SDave Chinner xfs_swap_extent_flush(
13464ef897a2SDave Chinner 	struct xfs_inode	*ip)
13474ef897a2SDave Chinner {
13484ef897a2SDave Chinner 	int	error;
13494ef897a2SDave Chinner 
13504ef897a2SDave Chinner 	error = filemap_write_and_wait(VFS_I(ip)->i_mapping);
13514ef897a2SDave Chinner 	if (error)
13524ef897a2SDave Chinner 		return error;
13534ef897a2SDave Chinner 	truncate_pagecache_range(VFS_I(ip), 0, -1);
13544ef897a2SDave Chinner 
13554ef897a2SDave Chinner 	/* Verify O_DIRECT for ftmp */
13564ef897a2SDave Chinner 	if (VFS_I(ip)->i_mapping->nrpages)
13574ef897a2SDave Chinner 		return -EINVAL;
13584ef897a2SDave Chinner 	return 0;
13594ef897a2SDave Chinner }
13604ef897a2SDave Chinner 
13611f08af52SDarrick J. Wong /*
13621f08af52SDarrick J. Wong  * Move extents from one file to another, when rmap is enabled.
13631f08af52SDarrick J. Wong  */
13641f08af52SDarrick J. Wong STATIC int
13651f08af52SDarrick J. Wong xfs_swap_extent_rmap(
13661f08af52SDarrick J. Wong 	struct xfs_trans		**tpp,
13671f08af52SDarrick J. Wong 	struct xfs_inode		*ip,
13681f08af52SDarrick J. Wong 	struct xfs_inode		*tip)
13691f08af52SDarrick J. Wong {
13707a7943c7SBrian Foster 	struct xfs_trans		*tp = *tpp;
13711f08af52SDarrick J. Wong 	struct xfs_bmbt_irec		irec;
13721f08af52SDarrick J. Wong 	struct xfs_bmbt_irec		uirec;
13731f08af52SDarrick J. Wong 	struct xfs_bmbt_irec		tirec;
13741f08af52SDarrick J. Wong 	xfs_fileoff_t			offset_fsb;
13751f08af52SDarrick J. Wong 	xfs_fileoff_t			end_fsb;
13761f08af52SDarrick J. Wong 	xfs_filblks_t			count_fsb;
13771f08af52SDarrick J. Wong 	int				error;
13781f08af52SDarrick J. Wong 	xfs_filblks_t			ilen;
13791f08af52SDarrick J. Wong 	xfs_filblks_t			rlen;
13801f08af52SDarrick J. Wong 	int				nimaps;
1381c8ce540dSDarrick J. Wong 	uint64_t			tip_flags2;
13821f08af52SDarrick J. Wong 
13831f08af52SDarrick J. Wong 	/*
13841f08af52SDarrick J. Wong 	 * If the source file has shared blocks, we must flag the donor
13851f08af52SDarrick J. Wong 	 * file as having shared blocks so that we get the shared-block
13861f08af52SDarrick J. Wong 	 * rmap functions when we go to fix up the rmaps.  The flags
13871f08af52SDarrick J. Wong 	 * will be switch for reals later.
13881f08af52SDarrick J. Wong 	 */
13893e09ab8fSChristoph Hellwig 	tip_flags2 = tip->i_diflags2;
13903e09ab8fSChristoph Hellwig 	if (ip->i_diflags2 & XFS_DIFLAG2_REFLINK)
13913e09ab8fSChristoph Hellwig 		tip->i_diflags2 |= XFS_DIFLAG2_REFLINK;
13921f08af52SDarrick J. Wong 
13931f08af52SDarrick J. Wong 	offset_fsb = 0;
13941f08af52SDarrick J. Wong 	end_fsb = XFS_B_TO_FSB(ip->i_mount, i_size_read(VFS_I(ip)));
13951f08af52SDarrick J. Wong 	count_fsb = (xfs_filblks_t)(end_fsb - offset_fsb);
13961f08af52SDarrick J. Wong 
13971f08af52SDarrick J. Wong 	while (count_fsb) {
13981f08af52SDarrick J. Wong 		/* Read extent from the donor file */
13991f08af52SDarrick J. Wong 		nimaps = 1;
14001f08af52SDarrick J. Wong 		error = xfs_bmapi_read(tip, offset_fsb, count_fsb, &tirec,
14011f08af52SDarrick J. Wong 				&nimaps, 0);
14021f08af52SDarrick J. Wong 		if (error)
14031f08af52SDarrick J. Wong 			goto out;
14041f08af52SDarrick J. Wong 		ASSERT(nimaps == 1);
14051f08af52SDarrick J. Wong 		ASSERT(tirec.br_startblock != DELAYSTARTBLOCK);
14061f08af52SDarrick J. Wong 
14071f08af52SDarrick J. Wong 		trace_xfs_swap_extent_rmap_remap(tip, &tirec);
14081f08af52SDarrick J. Wong 		ilen = tirec.br_blockcount;
14091f08af52SDarrick J. Wong 
14101f08af52SDarrick J. Wong 		/* Unmap the old blocks in the source file. */
14111f08af52SDarrick J. Wong 		while (tirec.br_blockcount) {
1412692b6cddSDave Chinner 			ASSERT(tp->t_highest_agno == NULLAGNUMBER);
14131f08af52SDarrick J. Wong 			trace_xfs_swap_extent_rmap_remap_piece(tip, &tirec);
14141f08af52SDarrick J. Wong 
14151f08af52SDarrick J. Wong 			/* Read extent from the source file */
14161f08af52SDarrick J. Wong 			nimaps = 1;
14171f08af52SDarrick J. Wong 			error = xfs_bmapi_read(ip, tirec.br_startoff,
14181f08af52SDarrick J. Wong 					tirec.br_blockcount, &irec,
14191f08af52SDarrick J. Wong 					&nimaps, 0);
14201f08af52SDarrick J. Wong 			if (error)
1421d5a2e289SBrian Foster 				goto out;
14221f08af52SDarrick J. Wong 			ASSERT(nimaps == 1);
14231f08af52SDarrick J. Wong 			ASSERT(tirec.br_startoff == irec.br_startoff);
14241f08af52SDarrick J. Wong 			trace_xfs_swap_extent_rmap_remap_piece(ip, &irec);
14251f08af52SDarrick J. Wong 
14261f08af52SDarrick J. Wong 			/* Trim the extent. */
14271f08af52SDarrick J. Wong 			uirec = tirec;
14281f08af52SDarrick J. Wong 			uirec.br_blockcount = rlen = min_t(xfs_filblks_t,
14291f08af52SDarrick J. Wong 					tirec.br_blockcount,
14301f08af52SDarrick J. Wong 					irec.br_blockcount);
14311f08af52SDarrick J. Wong 			trace_xfs_swap_extent_rmap_remap_piece(tip, &uirec);
14321f08af52SDarrick J. Wong 
1433bcc561f2SChandan Babu R 			if (xfs_bmap_is_real_extent(&uirec)) {
1434bcc561f2SChandan Babu R 				error = xfs_iext_count_may_overflow(ip,
1435bcc561f2SChandan Babu R 						XFS_DATA_FORK,
1436bcc561f2SChandan Babu R 						XFS_IEXT_SWAP_RMAP_CNT);
14374f86bb4bSChandan Babu R 				if (error == -EFBIG)
14384f86bb4bSChandan Babu R 					error = xfs_iext_count_upgrade(tp, ip,
14394f86bb4bSChandan Babu R 							XFS_IEXT_SWAP_RMAP_CNT);
1440bcc561f2SChandan Babu R 				if (error)
1441bcc561f2SChandan Babu R 					goto out;
1442bcc561f2SChandan Babu R 			}
1443bcc561f2SChandan Babu R 
1444bcc561f2SChandan Babu R 			if (xfs_bmap_is_real_extent(&irec)) {
1445bcc561f2SChandan Babu R 				error = xfs_iext_count_may_overflow(tip,
1446bcc561f2SChandan Babu R 						XFS_DATA_FORK,
1447bcc561f2SChandan Babu R 						XFS_IEXT_SWAP_RMAP_CNT);
14484f86bb4bSChandan Babu R 				if (error == -EFBIG)
14494f86bb4bSChandan Babu R 					error = xfs_iext_count_upgrade(tp, ip,
14504f86bb4bSChandan Babu R 							XFS_IEXT_SWAP_RMAP_CNT);
1451bcc561f2SChandan Babu R 				if (error)
1452bcc561f2SChandan Babu R 					goto out;
1453bcc561f2SChandan Babu R 			}
1454bcc561f2SChandan Babu R 
14551f08af52SDarrick J. Wong 			/* Remove the mapping from the donor file. */
14563e08f42aSDarrick J. Wong 			xfs_bmap_unmap_extent(tp, tip, &uirec);
14571f08af52SDarrick J. Wong 
14581f08af52SDarrick J. Wong 			/* Remove the mapping from the source file. */
14593e08f42aSDarrick J. Wong 			xfs_bmap_unmap_extent(tp, ip, &irec);
14601f08af52SDarrick J. Wong 
14611f08af52SDarrick J. Wong 			/* Map the donor file's blocks into the source file. */
14623e08f42aSDarrick J. Wong 			xfs_bmap_map_extent(tp, ip, &uirec);
14631f08af52SDarrick J. Wong 
14641f08af52SDarrick J. Wong 			/* Map the source file's blocks into the donor file. */
14653e08f42aSDarrick J. Wong 			xfs_bmap_map_extent(tp, tip, &irec);
14661f08af52SDarrick J. Wong 
14679e28a242SBrian Foster 			error = xfs_defer_finish(tpp);
14687a7943c7SBrian Foster 			tp = *tpp;
14691f08af52SDarrick J. Wong 			if (error)
14709b1f4e98SBrian Foster 				goto out;
14711f08af52SDarrick J. Wong 
14721f08af52SDarrick J. Wong 			tirec.br_startoff += rlen;
14731f08af52SDarrick J. Wong 			if (tirec.br_startblock != HOLESTARTBLOCK &&
14741f08af52SDarrick J. Wong 			    tirec.br_startblock != DELAYSTARTBLOCK)
14751f08af52SDarrick J. Wong 				tirec.br_startblock += rlen;
14761f08af52SDarrick J. Wong 			tirec.br_blockcount -= rlen;
14771f08af52SDarrick J. Wong 		}
14781f08af52SDarrick J. Wong 
14791f08af52SDarrick J. Wong 		/* Roll on... */
14801f08af52SDarrick J. Wong 		count_fsb -= ilen;
14811f08af52SDarrick J. Wong 		offset_fsb += ilen;
14821f08af52SDarrick J. Wong 	}
14831f08af52SDarrick J. Wong 
14843e09ab8fSChristoph Hellwig 	tip->i_diflags2 = tip_flags2;
14851f08af52SDarrick J. Wong 	return 0;
14861f08af52SDarrick J. Wong 
14871f08af52SDarrick J. Wong out:
14881f08af52SDarrick J. Wong 	trace_xfs_swap_extent_rmap_error(ip, error, _RET_IP_);
14893e09ab8fSChristoph Hellwig 	tip->i_diflags2 = tip_flags2;
14901f08af52SDarrick J. Wong 	return error;
14911f08af52SDarrick J. Wong }
14921f08af52SDarrick J. Wong 
149339aff5fdSDarrick J. Wong /* Swap the extents of two files by swapping data forks. */
149439aff5fdSDarrick J. Wong STATIC int
149539aff5fdSDarrick J. Wong xfs_swap_extent_forks(
149639aff5fdSDarrick J. Wong 	struct xfs_trans	*tp,
149739aff5fdSDarrick J. Wong 	struct xfs_inode	*ip,
149839aff5fdSDarrick J. Wong 	struct xfs_inode	*tip,
149939aff5fdSDarrick J. Wong 	int			*src_log_flags,
150039aff5fdSDarrick J. Wong 	int			*target_log_flags)
150139aff5fdSDarrick J. Wong {
1502e7f5d5caSDarrick J. Wong 	xfs_filblks_t		aforkblks = 0;
1503e7f5d5caSDarrick J. Wong 	xfs_filblks_t		taforkblks = 0;
1504e7f5d5caSDarrick J. Wong 	xfs_extnum_t		junk;
1505c8ce540dSDarrick J. Wong 	uint64_t		tmp;
150639aff5fdSDarrick J. Wong 	int			error;
150739aff5fdSDarrick J. Wong 
150839aff5fdSDarrick J. Wong 	/*
150939aff5fdSDarrick J. Wong 	 * Count the number of extended attribute blocks
151039aff5fdSDarrick J. Wong 	 */
1511932b42c6SDarrick J. Wong 	if (xfs_inode_has_attr_fork(ip) && ip->i_af.if_nextents > 0 &&
15122ed5b09bSDarrick J. Wong 	    ip->i_af.if_format != XFS_DINODE_FMT_LOCAL) {
1513e7f5d5caSDarrick J. Wong 		error = xfs_bmap_count_blocks(tp, ip, XFS_ATTR_FORK, &junk,
151439aff5fdSDarrick J. Wong 				&aforkblks);
151539aff5fdSDarrick J. Wong 		if (error)
151639aff5fdSDarrick J. Wong 			return error;
151739aff5fdSDarrick J. Wong 	}
1518932b42c6SDarrick J. Wong 	if (xfs_inode_has_attr_fork(tip) && tip->i_af.if_nextents > 0 &&
15192ed5b09bSDarrick J. Wong 	    tip->i_af.if_format != XFS_DINODE_FMT_LOCAL) {
1520e7f5d5caSDarrick J. Wong 		error = xfs_bmap_count_blocks(tp, tip, XFS_ATTR_FORK, &junk,
152139aff5fdSDarrick J. Wong 				&taforkblks);
152239aff5fdSDarrick J. Wong 		if (error)
152339aff5fdSDarrick J. Wong 			return error;
152439aff5fdSDarrick J. Wong 	}
152539aff5fdSDarrick J. Wong 
152639aff5fdSDarrick J. Wong 	/*
15276fb10d6dSBrian Foster 	 * Btree format (v3) inodes have the inode number stamped in the bmbt
15286fb10d6dSBrian Foster 	 * block headers. We can't start changing the bmbt blocks until the
15296fb10d6dSBrian Foster 	 * inode owner change is logged so recovery does the right thing in the
15306fb10d6dSBrian Foster 	 * event of a crash. Set the owner change log flags now and leave the
15316fb10d6dSBrian Foster 	 * bmbt scan as the last step.
153239aff5fdSDarrick J. Wong 	 */
153338c26bfdSDave Chinner 	if (xfs_has_v3inodes(ip->i_mount)) {
1534f7e67b20SChristoph Hellwig 		if (ip->i_df.if_format == XFS_DINODE_FMT_BTREE)
153539aff5fdSDarrick J. Wong 			(*target_log_flags) |= XFS_ILOG_DOWNER;
1536f7e67b20SChristoph Hellwig 		if (tip->i_df.if_format == XFS_DINODE_FMT_BTREE)
153739aff5fdSDarrick J. Wong 			(*src_log_flags) |= XFS_ILOG_DOWNER;
15386471e9c5SChristoph Hellwig 	}
153939aff5fdSDarrick J. Wong 
154039aff5fdSDarrick J. Wong 	/*
154139aff5fdSDarrick J. Wong 	 * Swap the data forks of the inodes
154239aff5fdSDarrick J. Wong 	 */
1543897992b7SGustavo A. R. Silva 	swap(ip->i_df, tip->i_df);
154439aff5fdSDarrick J. Wong 
154539aff5fdSDarrick J. Wong 	/*
154639aff5fdSDarrick J. Wong 	 * Fix the on-disk inode values
154739aff5fdSDarrick J. Wong 	 */
15486e73a545SChristoph Hellwig 	tmp = (uint64_t)ip->i_nblocks;
15496e73a545SChristoph Hellwig 	ip->i_nblocks = tip->i_nblocks - taforkblks + aforkblks;
15506e73a545SChristoph Hellwig 	tip->i_nblocks = tmp + taforkblks - aforkblks;
155139aff5fdSDarrick J. Wong 
155239aff5fdSDarrick J. Wong 	/*
155339aff5fdSDarrick J. Wong 	 * The extents in the source inode could still contain speculative
155439aff5fdSDarrick J. Wong 	 * preallocation beyond EOF (e.g. the file is open but not modified
155539aff5fdSDarrick J. Wong 	 * while defrag is in progress). In that case, we need to copy over the
155639aff5fdSDarrick J. Wong 	 * number of delalloc blocks the data fork in the source inode is
155739aff5fdSDarrick J. Wong 	 * tracking beyond EOF so that when the fork is truncated away when the
155839aff5fdSDarrick J. Wong 	 * temporary inode is unlinked we don't underrun the i_delayed_blks
155939aff5fdSDarrick J. Wong 	 * counter on that inode.
156039aff5fdSDarrick J. Wong 	 */
156139aff5fdSDarrick J. Wong 	ASSERT(tip->i_delayed_blks == 0);
156239aff5fdSDarrick J. Wong 	tip->i_delayed_blks = ip->i_delayed_blks;
156339aff5fdSDarrick J. Wong 	ip->i_delayed_blks = 0;
156439aff5fdSDarrick J. Wong 
1565f7e67b20SChristoph Hellwig 	switch (ip->i_df.if_format) {
156639aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_EXTENTS:
156739aff5fdSDarrick J. Wong 		(*src_log_flags) |= XFS_ILOG_DEXT;
156839aff5fdSDarrick J. Wong 		break;
156939aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_BTREE:
157038c26bfdSDave Chinner 		ASSERT(!xfs_has_v3inodes(ip->i_mount) ||
157139aff5fdSDarrick J. Wong 		       (*src_log_flags & XFS_ILOG_DOWNER));
157239aff5fdSDarrick J. Wong 		(*src_log_flags) |= XFS_ILOG_DBROOT;
157339aff5fdSDarrick J. Wong 		break;
157439aff5fdSDarrick J. Wong 	}
157539aff5fdSDarrick J. Wong 
1576f7e67b20SChristoph Hellwig 	switch (tip->i_df.if_format) {
157739aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_EXTENTS:
157839aff5fdSDarrick J. Wong 		(*target_log_flags) |= XFS_ILOG_DEXT;
157939aff5fdSDarrick J. Wong 		break;
158039aff5fdSDarrick J. Wong 	case XFS_DINODE_FMT_BTREE:
158139aff5fdSDarrick J. Wong 		(*target_log_flags) |= XFS_ILOG_DBROOT;
158238c26bfdSDave Chinner 		ASSERT(!xfs_has_v3inodes(ip->i_mount) ||
158339aff5fdSDarrick J. Wong 		       (*target_log_flags & XFS_ILOG_DOWNER));
158439aff5fdSDarrick J. Wong 		break;
158539aff5fdSDarrick J. Wong 	}
158639aff5fdSDarrick J. Wong 
158739aff5fdSDarrick J. Wong 	return 0;
158839aff5fdSDarrick J. Wong }
158939aff5fdSDarrick J. Wong 
15902dd3d709SBrian Foster /*
15912dd3d709SBrian Foster  * Fix up the owners of the bmbt blocks to refer to the current inode. The
15922dd3d709SBrian Foster  * change owner scan attempts to order all modified buffers in the current
15932dd3d709SBrian Foster  * transaction. In the event of ordered buffer failure, the offending buffer is
15942dd3d709SBrian Foster  * physically logged as a fallback and the scan returns -EAGAIN. We must roll
15952dd3d709SBrian Foster  * the transaction in this case to replenish the fallback log reservation and
15962dd3d709SBrian Foster  * restart the scan. This process repeats until the scan completes.
15972dd3d709SBrian Foster  */
15982dd3d709SBrian Foster static int
15992dd3d709SBrian Foster xfs_swap_change_owner(
16002dd3d709SBrian Foster 	struct xfs_trans	**tpp,
16012dd3d709SBrian Foster 	struct xfs_inode	*ip,
16022dd3d709SBrian Foster 	struct xfs_inode	*tmpip)
16032dd3d709SBrian Foster {
16042dd3d709SBrian Foster 	int			error;
16052dd3d709SBrian Foster 	struct xfs_trans	*tp = *tpp;
16062dd3d709SBrian Foster 
16072dd3d709SBrian Foster 	do {
16082dd3d709SBrian Foster 		error = xfs_bmbt_change_owner(tp, ip, XFS_DATA_FORK, ip->i_ino,
16092dd3d709SBrian Foster 					      NULL);
16102dd3d709SBrian Foster 		/* success or fatal error */
16112dd3d709SBrian Foster 		if (error != -EAGAIN)
16122dd3d709SBrian Foster 			break;
16132dd3d709SBrian Foster 
16142dd3d709SBrian Foster 		error = xfs_trans_roll(tpp);
16152dd3d709SBrian Foster 		if (error)
16162dd3d709SBrian Foster 			break;
16172dd3d709SBrian Foster 		tp = *tpp;
16182dd3d709SBrian Foster 
16192dd3d709SBrian Foster 		/*
16202dd3d709SBrian Foster 		 * Redirty both inodes so they can relog and keep the log tail
16212dd3d709SBrian Foster 		 * moving forward.
16222dd3d709SBrian Foster 		 */
16232dd3d709SBrian Foster 		xfs_trans_ijoin(tp, ip, 0);
16242dd3d709SBrian Foster 		xfs_trans_ijoin(tp, tmpip, 0);
16252dd3d709SBrian Foster 		xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
16262dd3d709SBrian Foster 		xfs_trans_log_inode(tp, tmpip, XFS_ILOG_CORE);
16272dd3d709SBrian Foster 	} while (true);
16282dd3d709SBrian Foster 
16292dd3d709SBrian Foster 	return error;
16302dd3d709SBrian Foster }
16312dd3d709SBrian Foster 
16324ef897a2SDave Chinner int
1633a133d952SDave Chinner xfs_swap_extents(
1634e06259aaSDarrick J. Wong 	struct xfs_inode	*ip,	/* target inode */
1635e06259aaSDarrick J. Wong 	struct xfs_inode	*tip,	/* tmp inode */
1636e06259aaSDarrick J. Wong 	struct xfs_swapext	*sxp)
1637a133d952SDave Chinner {
1638e06259aaSDarrick J. Wong 	struct xfs_mount	*mp = ip->i_mount;
1639e06259aaSDarrick J. Wong 	struct xfs_trans	*tp;
1640e06259aaSDarrick J. Wong 	struct xfs_bstat	*sbp = &sxp->sx_stat;
1641a133d952SDave Chinner 	int			src_log_flags, target_log_flags;
1642a133d952SDave Chinner 	int			error = 0;
1643c8ce540dSDarrick J. Wong 	uint64_t		f;
16442dd3d709SBrian Foster 	int			resblks = 0;
1645f74681baSBrian Foster 	unsigned int		flags = 0;
1646a0a415e3SJeff Layton 	struct timespec64	ctime;
1647a133d952SDave Chinner 
1648a133d952SDave Chinner 	/*
1649723cac48SDave Chinner 	 * Lock the inodes against other IO, page faults and truncate to
1650723cac48SDave Chinner 	 * begin with.  Then we can ensure the inodes are flushed and have no
1651723cac48SDave Chinner 	 * page cache safely. Once we have done this we can take the ilocks and
1652723cac48SDave Chinner 	 * do the rest of the checks.
1653a133d952SDave Chinner 	 */
165465523218SChristoph Hellwig 	lock_two_nondirectories(VFS_I(ip), VFS_I(tip));
1655d2c292d8SJan Kara 	filemap_invalidate_lock_two(VFS_I(ip)->i_mapping,
1656d2c292d8SJan Kara 				    VFS_I(tip)->i_mapping);
1657a133d952SDave Chinner 
1658a133d952SDave Chinner 	/* Verify that both files have the same format */
1659c19b3b05SDave Chinner 	if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
16602451337dSDave Chinner 		error = -EINVAL;
1661a133d952SDave Chinner 		goto out_unlock;
1662a133d952SDave Chinner 	}
1663a133d952SDave Chinner 
1664a133d952SDave Chinner 	/* Verify both files are either real-time or non-realtime */
1665a133d952SDave Chinner 	if (XFS_IS_REALTIME_INODE(ip) != XFS_IS_REALTIME_INODE(tip)) {
16662451337dSDave Chinner 		error = -EINVAL;
1667a133d952SDave Chinner 		goto out_unlock;
1668a133d952SDave Chinner 	}
1669a133d952SDave Chinner 
16702713fefaSDarrick J. Wong 	error = xfs_qm_dqattach(ip);
16712713fefaSDarrick J. Wong 	if (error)
16722713fefaSDarrick J. Wong 		goto out_unlock;
16732713fefaSDarrick J. Wong 
16742713fefaSDarrick J. Wong 	error = xfs_qm_dqattach(tip);
16752713fefaSDarrick J. Wong 	if (error)
16762713fefaSDarrick J. Wong 		goto out_unlock;
16772713fefaSDarrick J. Wong 
16784ef897a2SDave Chinner 	error = xfs_swap_extent_flush(ip);
1679a133d952SDave Chinner 	if (error)
1680a133d952SDave Chinner 		goto out_unlock;
16814ef897a2SDave Chinner 	error = xfs_swap_extent_flush(tip);
16824ef897a2SDave Chinner 	if (error)
16834ef897a2SDave Chinner 		goto out_unlock;
1684a133d952SDave Chinner 
168596987eeaSChristoph Hellwig 	if (xfs_inode_has_cow_data(tip)) {
168696987eeaSChristoph Hellwig 		error = xfs_reflink_cancel_cow_range(tip, 0, NULLFILEOFF, true);
168796987eeaSChristoph Hellwig 		if (error)
16888bc3b5e4SDarrick J. Wong 			goto out_unlock;
168996987eeaSChristoph Hellwig 	}
169096987eeaSChristoph Hellwig 
16911f08af52SDarrick J. Wong 	/*
16921f08af52SDarrick J. Wong 	 * Extent "swapping" with rmap requires a permanent reservation and
16931f08af52SDarrick J. Wong 	 * a block reservation because it's really just a remap operation
16941f08af52SDarrick J. Wong 	 * performed with log redo items!
16951f08af52SDarrick J. Wong 	 */
169638c26bfdSDave Chinner 	if (xfs_has_rmapbt(mp)) {
1697b3fed434SBrian Foster 		int		w = XFS_DATA_FORK;
1698daf83964SChristoph Hellwig 		uint32_t	ipnext = ip->i_df.if_nextents;
1699daf83964SChristoph Hellwig 		uint32_t	tipnext	= tip->i_df.if_nextents;
1700b3fed434SBrian Foster 
17011f08af52SDarrick J. Wong 		/*
1702b3fed434SBrian Foster 		 * Conceptually this shouldn't affect the shape of either bmbt,
1703b3fed434SBrian Foster 		 * but since we atomically move extents one by one, we reserve
1704b3fed434SBrian Foster 		 * enough space to rebuild both trees.
17051f08af52SDarrick J. Wong 		 */
1706b3fed434SBrian Foster 		resblks = XFS_SWAP_RMAP_SPACE_RES(mp, ipnext, w);
1707b3fed434SBrian Foster 		resblks +=  XFS_SWAP_RMAP_SPACE_RES(mp, tipnext, w);
1708b3fed434SBrian Foster 
1709b3fed434SBrian Foster 		/*
1710f74681baSBrian Foster 		 * If either inode straddles a bmapbt block allocation boundary,
1711f74681baSBrian Foster 		 * the rmapbt algorithm triggers repeated allocs and frees as
1712f74681baSBrian Foster 		 * extents are remapped. This can exhaust the block reservation
1713f74681baSBrian Foster 		 * prematurely and cause shutdown. Return freed blocks to the
1714f74681baSBrian Foster 		 * transaction reservation to counter this behavior.
1715b3fed434SBrian Foster 		 */
1716f74681baSBrian Foster 		flags |= XFS_TRANS_RES_FDBLKS;
17172dd3d709SBrian Foster 	}
1718f74681baSBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, resblks, 0, flags,
1719f74681baSBrian Foster 				&tp);
1720253f4911SChristoph Hellwig 	if (error)
1721a133d952SDave Chinner 		goto out_unlock;
1722723cac48SDave Chinner 
1723723cac48SDave Chinner 	/*
1724723cac48SDave Chinner 	 * Lock and join the inodes to the tansaction so that transaction commit
1725723cac48SDave Chinner 	 * or cancel will unlock the inodes from this point onwards.
1726723cac48SDave Chinner 	 */
17277c2d238aSDarrick J. Wong 	xfs_lock_two_inodes(ip, XFS_ILOCK_EXCL, tip, XFS_ILOCK_EXCL);
172839aff5fdSDarrick J. Wong 	xfs_trans_ijoin(tp, ip, 0);
172939aff5fdSDarrick J. Wong 	xfs_trans_ijoin(tp, tip, 0);
1730723cac48SDave Chinner 
1731a133d952SDave Chinner 
1732a133d952SDave Chinner 	/* Verify all data are being swapped */
1733a133d952SDave Chinner 	if (sxp->sx_offset != 0 ||
173413d2c10bSChristoph Hellwig 	    sxp->sx_length != ip->i_disk_size ||
173513d2c10bSChristoph Hellwig 	    sxp->sx_length != tip->i_disk_size) {
17362451337dSDave Chinner 		error = -EFAULT;
17374ef897a2SDave Chinner 		goto out_trans_cancel;
1738a133d952SDave Chinner 	}
1739a133d952SDave Chinner 
1740a133d952SDave Chinner 	trace_xfs_swap_extent_before(ip, 0);
1741a133d952SDave Chinner 	trace_xfs_swap_extent_before(tip, 1);
1742a133d952SDave Chinner 
1743a133d952SDave Chinner 	/* check inode formats now that data is flushed */
1744a133d952SDave Chinner 	error = xfs_swap_extents_check_format(ip, tip);
1745a133d952SDave Chinner 	if (error) {
1746a133d952SDave Chinner 		xfs_notice(mp,
1747a133d952SDave Chinner 		    "%s: inode 0x%llx format is incompatible for exchanging.",
1748a133d952SDave Chinner 				__func__, ip->i_ino);
17494ef897a2SDave Chinner 		goto out_trans_cancel;
1750a133d952SDave Chinner 	}
1751a133d952SDave Chinner 
1752a133d952SDave Chinner 	/*
1753a133d952SDave Chinner 	 * Compare the current change & modify times with that
1754a133d952SDave Chinner 	 * passed in.  If they differ, we abort this swap.
1755a133d952SDave Chinner 	 * This is the mechanism used to ensure the calling
1756a133d952SDave Chinner 	 * process that the file was not changed out from
1757a133d952SDave Chinner 	 * under it.
1758a133d952SDave Chinner 	 */
1759a0a415e3SJeff Layton 	ctime = inode_get_ctime(VFS_I(ip));
1760a0a415e3SJeff Layton 	if ((sbp->bs_ctime.tv_sec != ctime.tv_sec) ||
1761a0a415e3SJeff Layton 	    (sbp->bs_ctime.tv_nsec != ctime.tv_nsec) ||
1762a133d952SDave Chinner 	    (sbp->bs_mtime.tv_sec != VFS_I(ip)->i_mtime.tv_sec) ||
1763a133d952SDave Chinner 	    (sbp->bs_mtime.tv_nsec != VFS_I(ip)->i_mtime.tv_nsec)) {
17642451337dSDave Chinner 		error = -EBUSY;
176581217683SDave Chinner 		goto out_trans_cancel;
1766a133d952SDave Chinner 	}
1767a133d952SDave Chinner 
176821b5c978SDave Chinner 	/*
176921b5c978SDave Chinner 	 * Note the trickiness in setting the log flags - we set the owner log
177021b5c978SDave Chinner 	 * flag on the opposite inode (i.e. the inode we are setting the new
177121b5c978SDave Chinner 	 * owner to be) because once we swap the forks and log that, log
177221b5c978SDave Chinner 	 * recovery is going to see the fork as owned by the swapped inode,
177321b5c978SDave Chinner 	 * not the pre-swapped inodes.
177421b5c978SDave Chinner 	 */
177521b5c978SDave Chinner 	src_log_flags = XFS_ILOG_CORE;
177621b5c978SDave Chinner 	target_log_flags = XFS_ILOG_CORE;
177739aff5fdSDarrick J. Wong 
177838c26bfdSDave Chinner 	if (xfs_has_rmapbt(mp))
17791f08af52SDarrick J. Wong 		error = xfs_swap_extent_rmap(&tp, ip, tip);
17801f08af52SDarrick J. Wong 	else
178139aff5fdSDarrick J. Wong 		error = xfs_swap_extent_forks(tp, ip, tip, &src_log_flags,
178239aff5fdSDarrick J. Wong 				&target_log_flags);
178321b5c978SDave Chinner 	if (error)
178421b5c978SDave Chinner 		goto out_trans_cancel;
1785a133d952SDave Chinner 
1786f0bc4d13SDarrick J. Wong 	/* Do we have to swap reflink flags? */
17873e09ab8fSChristoph Hellwig 	if ((ip->i_diflags2 & XFS_DIFLAG2_REFLINK) ^
17883e09ab8fSChristoph Hellwig 	    (tip->i_diflags2 & XFS_DIFLAG2_REFLINK)) {
17893e09ab8fSChristoph Hellwig 		f = ip->i_diflags2 & XFS_DIFLAG2_REFLINK;
17903e09ab8fSChristoph Hellwig 		ip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
17913e09ab8fSChristoph Hellwig 		ip->i_diflags2 |= tip->i_diflags2 & XFS_DIFLAG2_REFLINK;
17923e09ab8fSChristoph Hellwig 		tip->i_diflags2 &= ~XFS_DIFLAG2_REFLINK;
17933e09ab8fSChristoph Hellwig 		tip->i_diflags2 |= f & XFS_DIFLAG2_REFLINK;
179452bfcdd7SDarrick J. Wong 	}
179552bfcdd7SDarrick J. Wong 
179652bfcdd7SDarrick J. Wong 	/* Swap the cow forks. */
179738c26bfdSDave Chinner 	if (xfs_has_reflink(mp)) {
1798f7e67b20SChristoph Hellwig 		ASSERT(!ip->i_cowfp ||
1799f7e67b20SChristoph Hellwig 		       ip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
1800f7e67b20SChristoph Hellwig 		ASSERT(!tip->i_cowfp ||
1801f7e67b20SChristoph Hellwig 		       tip->i_cowfp->if_format == XFS_DINODE_FMT_EXTENTS);
180252bfcdd7SDarrick J. Wong 
1803897992b7SGustavo A. R. Silva 		swap(ip->i_cowfp, tip->i_cowfp);
180452bfcdd7SDarrick J. Wong 
18055bcffe30SChristoph Hellwig 		if (ip->i_cowfp && ip->i_cowfp->if_bytes)
180683104d44SDarrick J. Wong 			xfs_inode_set_cowblocks_tag(ip);
180752bfcdd7SDarrick J. Wong 		else
180852bfcdd7SDarrick J. Wong 			xfs_inode_clear_cowblocks_tag(ip);
18095bcffe30SChristoph Hellwig 		if (tip->i_cowfp && tip->i_cowfp->if_bytes)
181083104d44SDarrick J. Wong 			xfs_inode_set_cowblocks_tag(tip);
181152bfcdd7SDarrick J. Wong 		else
181252bfcdd7SDarrick J. Wong 			xfs_inode_clear_cowblocks_tag(tip);
1813f0bc4d13SDarrick J. Wong 	}
1814f0bc4d13SDarrick J. Wong 
1815a133d952SDave Chinner 	xfs_trans_log_inode(tp, ip,  src_log_flags);
1816a133d952SDave Chinner 	xfs_trans_log_inode(tp, tip, target_log_flags);
1817a133d952SDave Chinner 
1818a133d952SDave Chinner 	/*
18196fb10d6dSBrian Foster 	 * The extent forks have been swapped, but crc=1,rmapbt=0 filesystems
18206fb10d6dSBrian Foster 	 * have inode number owner values in the bmbt blocks that still refer to
18216fb10d6dSBrian Foster 	 * the old inode. Scan each bmbt to fix up the owner values with the
18226fb10d6dSBrian Foster 	 * inode number of the current inode.
18236fb10d6dSBrian Foster 	 */
18246fb10d6dSBrian Foster 	if (src_log_flags & XFS_ILOG_DOWNER) {
18252dd3d709SBrian Foster 		error = xfs_swap_change_owner(&tp, ip, tip);
18266fb10d6dSBrian Foster 		if (error)
18276fb10d6dSBrian Foster 			goto out_trans_cancel;
18286fb10d6dSBrian Foster 	}
18296fb10d6dSBrian Foster 	if (target_log_flags & XFS_ILOG_DOWNER) {
18302dd3d709SBrian Foster 		error = xfs_swap_change_owner(&tp, tip, ip);
18316fb10d6dSBrian Foster 		if (error)
18326fb10d6dSBrian Foster 			goto out_trans_cancel;
18336fb10d6dSBrian Foster 	}
18346fb10d6dSBrian Foster 
18356fb10d6dSBrian Foster 	/*
1836a133d952SDave Chinner 	 * If this is a synchronous mount, make sure that the
1837a133d952SDave Chinner 	 * transaction goes to disk before returning to the user.
1838a133d952SDave Chinner 	 */
18390560f31aSDave Chinner 	if (xfs_has_wsync(mp))
1840a133d952SDave Chinner 		xfs_trans_set_sync(tp);
1841a133d952SDave Chinner 
184270393313SChristoph Hellwig 	error = xfs_trans_commit(tp);
1843a133d952SDave Chinner 
1844a133d952SDave Chinner 	trace_xfs_swap_extent_after(ip, 0);
1845a133d952SDave Chinner 	trace_xfs_swap_extent_after(tip, 1);
184639aff5fdSDarrick J. Wong 
1847d2c292d8SJan Kara out_unlock_ilock:
1848d2c292d8SJan Kara 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
1849d2c292d8SJan Kara 	xfs_iunlock(tip, XFS_ILOCK_EXCL);
185065523218SChristoph Hellwig out_unlock:
1851d2c292d8SJan Kara 	filemap_invalidate_unlock_two(VFS_I(ip)->i_mapping,
1852d2c292d8SJan Kara 				      VFS_I(tip)->i_mapping);
185365523218SChristoph Hellwig 	unlock_two_nondirectories(VFS_I(ip), VFS_I(tip));
1854a133d952SDave Chinner 	return error;
1855a133d952SDave Chinner 
185639aff5fdSDarrick J. Wong out_trans_cancel:
185739aff5fdSDarrick J. Wong 	xfs_trans_cancel(tp);
1858d2c292d8SJan Kara 	goto out_unlock_ilock;
1859a133d952SDave Chinner }
1860