xref: /linux/fs/xfs/xfs_aops.c (revision 568570fdf2b941115f0b1cf8d539255a1c707d9e)
10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
2c59d87c4SChristoph Hellwig /*
3c59d87c4SChristoph Hellwig  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
498c1a7c0SChristoph Hellwig  * Copyright (c) 2016-2018 Christoph Hellwig.
5c59d87c4SChristoph Hellwig  * All Rights Reserved.
6c59d87c4SChristoph Hellwig  */
7c59d87c4SChristoph Hellwig #include "xfs.h"
870a9883cSDave Chinner #include "xfs_shared.h"
9239880efSDave Chinner #include "xfs_format.h"
10239880efSDave Chinner #include "xfs_log_format.h"
11239880efSDave Chinner #include "xfs_trans_resv.h"
12c59d87c4SChristoph Hellwig #include "xfs_mount.h"
13c59d87c4SChristoph Hellwig #include "xfs_inode.h"
14239880efSDave Chinner #include "xfs_trans.h"
15c59d87c4SChristoph Hellwig #include "xfs_iomap.h"
16c59d87c4SChristoph Hellwig #include "xfs_trace.h"
17c59d87c4SChristoph Hellwig #include "xfs_bmap.h"
1868988114SDave Chinner #include "xfs_bmap_util.h"
19ef473667SDarrick J. Wong #include "xfs_reflink.h"
20c2beff99SDarrick J. Wong #include "xfs_errortag.h"
21c2beff99SDarrick J. Wong #include "xfs_error.h"
22c59d87c4SChristoph Hellwig 
23fbcc0256SDave Chinner struct xfs_writepage_ctx {
24598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx ctx;
25d9252d52SBrian Foster 	unsigned int		data_seq;
26e666aa37SChristoph Hellwig 	unsigned int		cow_seq;
27fbcc0256SDave Chinner };
28fbcc0256SDave Chinner 
29598ecfbaSChristoph Hellwig static inline struct xfs_writepage_ctx *
XFS_WPC(struct iomap_writepage_ctx * ctx)30598ecfbaSChristoph Hellwig XFS_WPC(struct iomap_writepage_ctx *ctx)
31598ecfbaSChristoph Hellwig {
32598ecfbaSChristoph Hellwig 	return container_of(ctx, struct xfs_writepage_ctx, ctx);
33598ecfbaSChristoph Hellwig }
34598ecfbaSChristoph Hellwig 
35c59d87c4SChristoph Hellwig /*
36fc0063c4SChristoph Hellwig  * Fast and loose check if this write could update the on-disk inode size.
37fc0063c4SChristoph Hellwig  */
xfs_ioend_is_append(struct iomap_ioend * ioend)38598ecfbaSChristoph Hellwig static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend)
39fc0063c4SChristoph Hellwig {
40fc0063c4SChristoph Hellwig 	return ioend->io_offset + ioend->io_size >
4113d2c10bSChristoph Hellwig 		XFS_I(ioend->io_inode)->i_disk_size;
42fc0063c4SChristoph Hellwig }
43fc0063c4SChristoph Hellwig 
44fc0063c4SChristoph Hellwig /*
452813d682SChristoph Hellwig  * Update on-disk file size now that data has been written to disk.
46c59d87c4SChristoph Hellwig  */
47e7a3d7e7SBrian Foster int
xfs_setfilesize(struct xfs_inode * ip,xfs_off_t offset,size_t size)48e7a3d7e7SBrian Foster xfs_setfilesize(
492ba66237SChristoph Hellwig 	struct xfs_inode	*ip,
502ba66237SChristoph Hellwig 	xfs_off_t		offset,
512ba66237SChristoph Hellwig 	size_t			size)
52c59d87c4SChristoph Hellwig {
53e7a3d7e7SBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
54e7a3d7e7SBrian Foster 	struct xfs_trans	*tp;
55c59d87c4SChristoph Hellwig 	xfs_fsize_t		isize;
56e7a3d7e7SBrian Foster 	int			error;
57e7a3d7e7SBrian Foster 
58e7a3d7e7SBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
59e7a3d7e7SBrian Foster 	if (error)
60e7a3d7e7SBrian Foster 		return error;
61c59d87c4SChristoph Hellwig 
62aa6bf01dSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
632ba66237SChristoph Hellwig 	isize = xfs_new_eof(ip, offset + size);
64281627dfSChristoph Hellwig 	if (!isize) {
65281627dfSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
664906e215SChristoph Hellwig 		xfs_trans_cancel(tp);
67281627dfSChristoph Hellwig 		return 0;
68c59d87c4SChristoph Hellwig 	}
69c59d87c4SChristoph Hellwig 
702ba66237SChristoph Hellwig 	trace_xfs_setfilesize(ip, offset, size);
71281627dfSChristoph Hellwig 
7213d2c10bSChristoph Hellwig 	ip->i_disk_size = isize;
73281627dfSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
74281627dfSChristoph Hellwig 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
75281627dfSChristoph Hellwig 
7670393313SChristoph Hellwig 	return xfs_trans_commit(tp);
77c59d87c4SChristoph Hellwig }
78c59d87c4SChristoph Hellwig 
79c59d87c4SChristoph Hellwig /*
80c59d87c4SChristoph Hellwig  * IO write completion.
81c59d87c4SChristoph Hellwig  */
82c59d87c4SChristoph Hellwig STATIC void
xfs_end_ioend(struct iomap_ioend * ioend)83cb357bf3SDarrick J. Wong xfs_end_ioend(
84598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend)
85c59d87c4SChristoph Hellwig {
86c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
875ca5916bSBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
88787eb485SChristoph Hellwig 	xfs_off_t		offset = ioend->io_offset;
89787eb485SChristoph Hellwig 	size_t			size = ioend->io_size;
9073d30d48SChristoph Hellwig 	unsigned int		nofs_flag;
914e4cbee9SChristoph Hellwig 	int			error;
92c59d87c4SChristoph Hellwig 
93af055e37SBrian Foster 	/*
9473d30d48SChristoph Hellwig 	 * We can allocate memory here while doing writeback on behalf of
9573d30d48SChristoph Hellwig 	 * memory reclaim.  To avoid memory allocation deadlocks set the
9673d30d48SChristoph Hellwig 	 * task-wide nofs context for the following operations.
9773d30d48SChristoph Hellwig 	 */
9873d30d48SChristoph Hellwig 	nofs_flag = memalloc_nofs_save();
9973d30d48SChristoph Hellwig 
10073d30d48SChristoph Hellwig 	/*
101f9dd7ba4SBhaskar Chowdhury 	 * Just clean up the in-memory structures if the fs has been shut down.
102af055e37SBrian Foster 	 */
1035ca5916bSBrian Foster 	if (xfs_is_shutdown(mp)) {
1040e51a8e1SChristoph Hellwig 		error = -EIO;
10543caeb18SDarrick J. Wong 		goto done;
10643caeb18SDarrick J. Wong 	}
10743caeb18SDarrick J. Wong 
10843caeb18SDarrick J. Wong 	/*
1095ca5916bSBrian Foster 	 * Clean up all COW blocks and underlying data fork delalloc blocks on
1105ca5916bSBrian Foster 	 * I/O error. The delalloc punch is required because this ioend was
1115ca5916bSBrian Foster 	 * mapped to blocks in the COW fork and the associated pages are no
1125ca5916bSBrian Foster 	 * longer dirty. If we don't remove delalloc blocks here, they become
1135ca5916bSBrian Foster 	 * stale and can corrupt free space accounting on unmount.
114c59d87c4SChristoph Hellwig 	 */
115ae5535efSChristoph Hellwig 	error = blk_status_to_errno(ioend->io_bio.bi_status);
116787eb485SChristoph Hellwig 	if (unlikely(error)) {
1175ca5916bSBrian Foster 		if (ioend->io_flags & IOMAP_F_SHARED) {
118787eb485SChristoph Hellwig 			xfs_reflink_cancel_cow_range(ip, offset, size, true);
119*8fe3b21eSChristoph Hellwig 			xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, offset,
1207348b322SDave Chinner 					offset + size);
1215ca5916bSBrian Foster 		}
1225cb13dcdSZhaohongjiang 		goto done;
123787eb485SChristoph Hellwig 	}
124787eb485SChristoph Hellwig 
125787eb485SChristoph Hellwig 	/*
126787eb485SChristoph Hellwig 	 * Success: commit the COW or unwritten blocks if needed.
127787eb485SChristoph Hellwig 	 */
128760fea8bSChristoph Hellwig 	if (ioend->io_flags & IOMAP_F_SHARED)
129787eb485SChristoph Hellwig 		error = xfs_reflink_end_cow(ip, offset, size);
1304e087a3bSChristoph Hellwig 	else if (ioend->io_type == IOMAP_UNWRITTEN)
131ee70daabSEryu Guan 		error = xfs_iomap_write_unwritten(ip, offset, size, false);
13284803fb7SChristoph Hellwig 
1337cd3099fSBrian Foster 	if (!error && xfs_ioend_is_append(ioend))
1347cd3099fSBrian Foster 		error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
13504f658eeSChristoph Hellwig done:
136598ecfbaSChristoph Hellwig 	iomap_finish_ioends(ioend, error);
13773d30d48SChristoph Hellwig 	memalloc_nofs_restore(nofs_flag);
1383994fc48SDarrick J. Wong }
1393994fc48SDarrick J. Wong 
140ebb7fb15SDave Chinner /*
141ebb7fb15SDave Chinner  * Finish all pending IO completions that require transactional modifications.
142ebb7fb15SDave Chinner  *
143ebb7fb15SDave Chinner  * We try to merge physical and logically contiguous ioends before completion to
144ebb7fb15SDave Chinner  * minimise the number of transactions we need to perform during IO completion.
145ebb7fb15SDave Chinner  * Both unwritten extent conversion and COW remapping need to iterate and modify
146ebb7fb15SDave Chinner  * one physical extent at a time, so we gain nothing by merging physically
147ebb7fb15SDave Chinner  * discontiguous extents here.
148ebb7fb15SDave Chinner  *
149ebb7fb15SDave Chinner  * The ioend chain length that we can be processing here is largely unbound in
150ebb7fb15SDave Chinner  * length and we may have to perform significant amounts of work on each ioend
151ebb7fb15SDave Chinner  * to complete it. Hence we have to be careful about holding the CPU for too
152ebb7fb15SDave Chinner  * long in this loop.
153ebb7fb15SDave Chinner  */
154cb357bf3SDarrick J. Wong void
xfs_end_io(struct work_struct * work)155cb357bf3SDarrick J. Wong xfs_end_io(
156cb357bf3SDarrick J. Wong 	struct work_struct	*work)
157cb357bf3SDarrick J. Wong {
158433dad94SChristoph Hellwig 	struct xfs_inode	*ip =
159433dad94SChristoph Hellwig 		container_of(work, struct xfs_inode, i_ioend_work);
160598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend;
161433dad94SChristoph Hellwig 	struct list_head	tmp;
162cb357bf3SDarrick J. Wong 	unsigned long		flags;
163cb357bf3SDarrick J. Wong 
164cb357bf3SDarrick J. Wong 	spin_lock_irqsave(&ip->i_ioend_lock, flags);
165433dad94SChristoph Hellwig 	list_replace_init(&ip->i_ioend_list, &tmp);
166cb357bf3SDarrick J. Wong 	spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
167cb357bf3SDarrick J. Wong 
168598ecfbaSChristoph Hellwig 	iomap_sort_ioends(&tmp);
169598ecfbaSChristoph Hellwig 	while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
170433dad94SChristoph Hellwig 			io_list))) {
171cb357bf3SDarrick J. Wong 		list_del_init(&ioend->io_list);
1726e552494SBrian Foster 		iomap_ioend_try_merge(ioend, &tmp);
173cb357bf3SDarrick J. Wong 		xfs_end_ioend(ioend);
174ebb7fb15SDave Chinner 		cond_resched();
175cb357bf3SDarrick J. Wong 	}
176cb357bf3SDarrick J. Wong }
177cb357bf3SDarrick J. Wong 
1780e51a8e1SChristoph Hellwig STATIC void
xfs_end_bio(struct bio * bio)1790e51a8e1SChristoph Hellwig xfs_end_bio(
1800e51a8e1SChristoph Hellwig 	struct bio		*bio)
181c59d87c4SChristoph Hellwig {
182ae5535efSChristoph Hellwig 	struct iomap_ioend	*ioend = iomap_ioend_from_bio(bio);
183cb357bf3SDarrick J. Wong 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
184cb357bf3SDarrick J. Wong 	unsigned long		flags;
185c59d87c4SChristoph Hellwig 
186cb357bf3SDarrick J. Wong 	spin_lock_irqsave(&ip->i_ioend_lock, flags);
187cb357bf3SDarrick J. Wong 	if (list_empty(&ip->i_ioend_list))
188598ecfbaSChristoph Hellwig 		WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue,
189cb357bf3SDarrick J. Wong 					 &ip->i_ioend_work));
190cb357bf3SDarrick J. Wong 	list_add_tail(&ioend->io_list, &ip->i_ioend_list);
191cb357bf3SDarrick J. Wong 	spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
192c59d87c4SChristoph Hellwig }
193c59d87c4SChristoph Hellwig 
194d9252d52SBrian Foster /*
195d9252d52SBrian Foster  * Fast revalidation of the cached writeback mapping. Return true if the current
196d9252d52SBrian Foster  * mapping is valid, false otherwise.
197d9252d52SBrian Foster  */
198d9252d52SBrian Foster static bool
xfs_imap_valid(struct iomap_writepage_ctx * wpc,struct xfs_inode * ip,loff_t offset)199d9252d52SBrian Foster xfs_imap_valid(
200598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx	*wpc,
201d9252d52SBrian Foster 	struct xfs_inode		*ip,
2024e087a3bSChristoph Hellwig 	loff_t				offset)
203d9252d52SBrian Foster {
2044e087a3bSChristoph Hellwig 	if (offset < wpc->iomap.offset ||
2054e087a3bSChristoph Hellwig 	    offset >= wpc->iomap.offset + wpc->iomap.length)
206d9252d52SBrian Foster 		return false;
207d9252d52SBrian Foster 	/*
208d9252d52SBrian Foster 	 * If this is a COW mapping, it is sufficient to check that the mapping
209d9252d52SBrian Foster 	 * covers the offset. Be careful to check this first because the caller
210d9252d52SBrian Foster 	 * can revalidate a COW mapping without updating the data seqno.
211d9252d52SBrian Foster 	 */
212760fea8bSChristoph Hellwig 	if (wpc->iomap.flags & IOMAP_F_SHARED)
213d9252d52SBrian Foster 		return true;
214d9252d52SBrian Foster 
215d9252d52SBrian Foster 	/*
216d9252d52SBrian Foster 	 * This is not a COW mapping. Check the sequence number of the data fork
217d9252d52SBrian Foster 	 * because concurrent changes could have invalidated the extent. Check
218d9252d52SBrian Foster 	 * the COW fork because concurrent changes since the last time we
219d9252d52SBrian Foster 	 * checked (and found nothing at this offset) could have added
220d9252d52SBrian Foster 	 * overlapping blocks.
221d9252d52SBrian Foster 	 */
222c2beff99SDarrick J. Wong 	if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) {
223c2beff99SDarrick J. Wong 		trace_xfs_wb_data_iomap_invalid(ip, &wpc->iomap,
224c2beff99SDarrick J. Wong 				XFS_WPC(wpc)->data_seq, XFS_DATA_FORK);
225d9252d52SBrian Foster 		return false;
226c2beff99SDarrick J. Wong 	}
227d9252d52SBrian Foster 	if (xfs_inode_has_cow_data(ip) &&
228c2beff99SDarrick J. Wong 	    XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) {
229c2beff99SDarrick J. Wong 		trace_xfs_wb_cow_iomap_invalid(ip, &wpc->iomap,
230c2beff99SDarrick J. Wong 				XFS_WPC(wpc)->cow_seq, XFS_COW_FORK);
231d9252d52SBrian Foster 		return false;
232c2beff99SDarrick J. Wong 	}
233d9252d52SBrian Foster 	return true;
234d9252d52SBrian Foster }
235d9252d52SBrian Foster 
236598ecfbaSChristoph Hellwig static int
xfs_map_blocks(struct iomap_writepage_ctx * wpc,struct inode * inode,loff_t offset,unsigned int len)237c59d87c4SChristoph Hellwig xfs_map_blocks(
238598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx *wpc,
239c59d87c4SChristoph Hellwig 	struct inode		*inode,
24019871b5cSChristoph Hellwig 	loff_t			offset,
24119871b5cSChristoph Hellwig 	unsigned int		len)
242c59d87c4SChristoph Hellwig {
243c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
244c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
24593407472SFabian Frederick 	ssize_t			count = i_blocksize(inode);
246b4e29032SChristoph Hellwig 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
247b4e29032SChristoph Hellwig 	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
248c2f09217SDarrick J. Wong 	xfs_fileoff_t		cow_fsb;
249c2f09217SDarrick J. Wong 	int			whichfork;
2505c665e5bSChristoph Hellwig 	struct xfs_bmbt_irec	imap;
251060d4eaaSChristoph Hellwig 	struct xfs_iext_cursor	icur;
2527588cbeeSChristoph Hellwig 	int			retries = 0;
253c59d87c4SChristoph Hellwig 	int			error = 0;
2542e08371aSZhang Yi 	unsigned int		*seq;
255c59d87c4SChristoph Hellwig 
25675c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
257d9252d52SBrian Foster 		return -EIO;
258d9252d52SBrian Foster 
259c2beff99SDarrick J. Wong 	XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS);
260c2beff99SDarrick J. Wong 
261889c65b3SChristoph Hellwig 	/*
262889c65b3SChristoph Hellwig 	 * COW fork blocks can overlap data fork blocks even if the blocks
263889c65b3SChristoph Hellwig 	 * aren't shared.  COW I/O always takes precedent, so we must always
264889c65b3SChristoph Hellwig 	 * check for overlap on reflink inodes unless the mapping is already a
265e666aa37SChristoph Hellwig 	 * COW one, or the COW fork hasn't changed from the last time we looked
266e666aa37SChristoph Hellwig 	 * at it.
267e666aa37SChristoph Hellwig 	 *
268e666aa37SChristoph Hellwig 	 * It's safe to check the COW fork if_seq here without the ILOCK because
269e666aa37SChristoph Hellwig 	 * we've indirectly protected against concurrent updates: writeback has
270e666aa37SChristoph Hellwig 	 * the page locked, which prevents concurrent invalidations by reflink
271e666aa37SChristoph Hellwig 	 * and directio and prevents concurrent buffered writes to the same
272e666aa37SChristoph Hellwig 	 * page.  Changes to if_seq always happen under i_lock, which protects
273e666aa37SChristoph Hellwig 	 * against concurrent updates and provides a memory barrier on the way
274e666aa37SChristoph Hellwig 	 * out that ensures that we always see the current value.
275889c65b3SChristoph Hellwig 	 */
2764e087a3bSChristoph Hellwig 	if (xfs_imap_valid(wpc, ip, offset))
277889c65b3SChristoph Hellwig 		return 0;
278889c65b3SChristoph Hellwig 
279889c65b3SChristoph Hellwig 	/*
280889c65b3SChristoph Hellwig 	 * If we don't have a valid map, now it's time to get a new one for this
281889c65b3SChristoph Hellwig 	 * offset.  This will convert delayed allocations (including COW ones)
282889c65b3SChristoph Hellwig 	 * into real extents.  If we return without a valid map, it means we
283889c65b3SChristoph Hellwig 	 * landed in a hole and we skip the block.
284889c65b3SChristoph Hellwig 	 */
2857588cbeeSChristoph Hellwig retry:
286c2f09217SDarrick J. Wong 	cow_fsb = NULLFILEOFF;
287c2f09217SDarrick J. Wong 	whichfork = XFS_DATA_FORK;
288c59d87c4SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_SHARED);
289b2197a36SChristoph Hellwig 	ASSERT(!xfs_need_iread_extents(&ip->i_df));
290060d4eaaSChristoph Hellwig 
291060d4eaaSChristoph Hellwig 	/*
292060d4eaaSChristoph Hellwig 	 * Check if this is offset is covered by a COW extents, and if yes use
293060d4eaaSChristoph Hellwig 	 * it directly instead of looking up anything in the data fork.
294060d4eaaSChristoph Hellwig 	 */
29551d62690SChristoph Hellwig 	if (xfs_inode_has_cow_data(ip) &&
296e666aa37SChristoph Hellwig 	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
297e666aa37SChristoph Hellwig 		cow_fsb = imap.br_startoff;
298e666aa37SChristoph Hellwig 	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
299598ecfbaSChristoph Hellwig 		XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
3005c665e5bSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
301be225fecSChristoph Hellwig 
302760fea8bSChristoph Hellwig 		whichfork = XFS_COW_FORK;
3035c665e5bSChristoph Hellwig 		goto allocate_blocks;
3045c665e5bSChristoph Hellwig 	}
3055c665e5bSChristoph Hellwig 
3065c665e5bSChristoph Hellwig 	/*
307d9252d52SBrian Foster 	 * No COW extent overlap. Revalidate now that we may have updated
308d9252d52SBrian Foster 	 * ->cow_seq. If the data mapping is still valid, we're done.
3095c665e5bSChristoph Hellwig 	 */
3104e087a3bSChristoph Hellwig 	if (xfs_imap_valid(wpc, ip, offset)) {
3115c665e5bSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
3125c665e5bSChristoph Hellwig 		return 0;
3135c665e5bSChristoph Hellwig 	}
3145c665e5bSChristoph Hellwig 
3155c665e5bSChristoph Hellwig 	/*
3165c665e5bSChristoph Hellwig 	 * If we don't have a valid map, now it's time to get a new one for this
3175c665e5bSChristoph Hellwig 	 * offset.  This will convert delayed allocations (including COW ones)
3185c665e5bSChristoph Hellwig 	 * into real extents.
3195c665e5bSChristoph Hellwig 	 */
3203345746eSChristoph Hellwig 	if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap))
3213345746eSChristoph Hellwig 		imap.br_startoff = end_fsb;	/* fake a hole past EOF */
322598ecfbaSChristoph Hellwig 	XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq);
323c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
324c59d87c4SChristoph Hellwig 
32512df89f2SChristoph Hellwig 	/* landed in a hole or beyond EOF? */
3263345746eSChristoph Hellwig 	if (imap.br_startoff > offset_fsb) {
3273345746eSChristoph Hellwig 		imap.br_blockcount = imap.br_startoff - offset_fsb;
3285c665e5bSChristoph Hellwig 		imap.br_startoff = offset_fsb;
3295c665e5bSChristoph Hellwig 		imap.br_startblock = HOLESTARTBLOCK;
330be225fecSChristoph Hellwig 		imap.br_state = XFS_EXT_NORM;
33112df89f2SChristoph Hellwig 	}
33212df89f2SChristoph Hellwig 
333e666aa37SChristoph Hellwig 	/*
33412df89f2SChristoph Hellwig 	 * Truncate to the next COW extent if there is one.  This is the only
33512df89f2SChristoph Hellwig 	 * opportunity to do this because we can skip COW fork lookups for the
33612df89f2SChristoph Hellwig 	 * subsequent blocks in the mapping; however, the requirement to treat
33712df89f2SChristoph Hellwig 	 * the COW range separately remains.
338e666aa37SChristoph Hellwig 	 */
339e666aa37SChristoph Hellwig 	if (cow_fsb != NULLFILEOFF &&
340e666aa37SChristoph Hellwig 	    cow_fsb < imap.br_startoff + imap.br_blockcount)
341e666aa37SChristoph Hellwig 		imap.br_blockcount = cow_fsb - imap.br_startoff;
342e666aa37SChristoph Hellwig 
343be225fecSChristoph Hellwig 	/* got a delalloc extent? */
34412df89f2SChristoph Hellwig 	if (imap.br_startblock != HOLESTARTBLOCK &&
34512df89f2SChristoph Hellwig 	    isnullstartblock(imap.br_startblock))
3465c665e5bSChristoph Hellwig 		goto allocate_blocks;
347e2f6ad46SDave Chinner 
348304a68b9SDave Chinner 	xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq);
349760fea8bSChristoph Hellwig 	trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
3505c665e5bSChristoph Hellwig 	return 0;
3515c665e5bSChristoph Hellwig allocate_blocks:
3522e08371aSZhang Yi 	/*
3532e08371aSZhang Yi 	 * Convert a dellalloc extent to a real one. The current page is held
3542e08371aSZhang Yi 	 * locked so nothing could have removed the block backing offset_fsb,
3552e08371aSZhang Yi 	 * although it could have moved from the COW to the data fork by another
3562e08371aSZhang Yi 	 * thread.
3572e08371aSZhang Yi 	 */
3582e08371aSZhang Yi 	if (whichfork == XFS_COW_FORK)
3592e08371aSZhang Yi 		seq = &XFS_WPC(wpc)->cow_seq;
3602e08371aSZhang Yi 	else
3612e08371aSZhang Yi 		seq = &XFS_WPC(wpc)->data_seq;
3622e08371aSZhang Yi 
3632e08371aSZhang Yi 	error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
3642e08371aSZhang Yi 				&wpc->iomap, seq);
3657588cbeeSChristoph Hellwig 	if (error) {
3667588cbeeSChristoph Hellwig 		/*
3677588cbeeSChristoph Hellwig 		 * If we failed to find the extent in the COW fork we might have
3687588cbeeSChristoph Hellwig 		 * raced with a COW to data fork conversion or truncate.
3697588cbeeSChristoph Hellwig 		 * Restart the lookup to catch the extent in the data fork for
3707588cbeeSChristoph Hellwig 		 * the former case, but prevent additional retries to avoid
3717588cbeeSChristoph Hellwig 		 * looping forever for the latter case.
3727588cbeeSChristoph Hellwig 		 */
373760fea8bSChristoph Hellwig 		if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++)
3747588cbeeSChristoph Hellwig 			goto retry;
3757588cbeeSChristoph Hellwig 		ASSERT(error != -EAGAIN);
3765c665e5bSChristoph Hellwig 		return error;
3777588cbeeSChristoph Hellwig 	}
3784ad765edSChristoph Hellwig 
3794ad765edSChristoph Hellwig 	/*
3804ad765edSChristoph Hellwig 	 * Due to merging the return real extent might be larger than the
3814ad765edSChristoph Hellwig 	 * original delalloc one.  Trim the return extent to the next COW
3824ad765edSChristoph Hellwig 	 * boundary again to force a re-lookup.
3834ad765edSChristoph Hellwig 	 */
384760fea8bSChristoph Hellwig 	if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) {
3854e087a3bSChristoph Hellwig 		loff_t		cow_offset = XFS_FSB_TO_B(mp, cow_fsb);
3864ad765edSChristoph Hellwig 
3874e087a3bSChristoph Hellwig 		if (cow_offset < wpc->iomap.offset + wpc->iomap.length)
3884e087a3bSChristoph Hellwig 			wpc->iomap.length = cow_offset - wpc->iomap.offset;
3894e087a3bSChristoph Hellwig 	}
3904e087a3bSChristoph Hellwig 
3914e087a3bSChristoph Hellwig 	ASSERT(wpc->iomap.offset <= offset);
3924e087a3bSChristoph Hellwig 	ASSERT(wpc->iomap.offset + wpc->iomap.length > offset);
393760fea8bSChristoph Hellwig 	trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap);
394c59d87c4SChristoph Hellwig 	return 0;
395c59d87c4SChristoph Hellwig }
396c59d87c4SChristoph Hellwig 
397598ecfbaSChristoph Hellwig static int
xfs_prepare_ioend(struct iomap_ioend * ioend,int status)398598ecfbaSChristoph Hellwig xfs_prepare_ioend(
399598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend,
400e10de372SDave Chinner 	int			status)
401c59d87c4SChristoph Hellwig {
40273d30d48SChristoph Hellwig 	unsigned int		nofs_flag;
40373d30d48SChristoph Hellwig 
40473d30d48SChristoph Hellwig 	/*
40573d30d48SChristoph Hellwig 	 * We can allocate memory here while doing writeback on behalf of
40673d30d48SChristoph Hellwig 	 * memory reclaim.  To avoid memory allocation deadlocks set the
40773d30d48SChristoph Hellwig 	 * task-wide nofs context for the following operations.
40873d30d48SChristoph Hellwig 	 */
40973d30d48SChristoph Hellwig 	nofs_flag = memalloc_nofs_save();
41073d30d48SChristoph Hellwig 
4115eda4300SDarrick J. Wong 	/* Convert CoW extents to regular */
412760fea8bSChristoph Hellwig 	if (!status && (ioend->io_flags & IOMAP_F_SHARED)) {
4135eda4300SDarrick J. Wong 		status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
4145eda4300SDarrick J. Wong 				ioend->io_offset, ioend->io_size);
4155eda4300SDarrick J. Wong 	}
4165eda4300SDarrick J. Wong 
41773d30d48SChristoph Hellwig 	memalloc_nofs_restore(nofs_flag);
41873d30d48SChristoph Hellwig 
4197adb8f14SBrian Foster 	/* send ioends that might require a transaction to the completion wq */
4207adb8f14SBrian Foster 	if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN ||
4217adb8f14SBrian Foster 	    (ioend->io_flags & IOMAP_F_SHARED))
422ae5535efSChristoph Hellwig 		ioend->io_bio.bi_end_io = xfs_end_bio;
423e10de372SDave Chinner 	return status;
4247bf7f352SDave Chinner }
4257bf7f352SDave Chinner 
426c59d87c4SChristoph Hellwig /*
4278ac5b996SDave Chinner  * If the folio has delalloc blocks on it, the caller is asking us to punch them
4288ac5b996SDave Chinner  * out. If we don't, we can leave a stale delalloc mapping covered by a clean
4298ac5b996SDave Chinner  * page that needs to be dirtied again before the delalloc mapping can be
4308ac5b996SDave Chinner  * converted. This stale delalloc mapping can trip up a later direct I/O read
4318ac5b996SDave Chinner  * operation on the same region.
432c59d87c4SChristoph Hellwig  *
4338ac5b996SDave Chinner  * We prevent this by truncating away the delalloc regions on the folio. Because
43482cb1417SChristoph Hellwig  * they are delalloc, we can do this without needing a transaction. Indeed - if
43582cb1417SChristoph Hellwig  * we get ENOSPC errors, we have to be able to do this truncation without a
4368ac5b996SDave Chinner  * transaction as there is no space left for block reservation (typically why
4378ac5b996SDave Chinner  * we see a ENOSPC in writeback).
438c59d87c4SChristoph Hellwig  */
439598ecfbaSChristoph Hellwig static void
xfs_discard_folio(struct folio * folio,loff_t pos)4406e478521SMatthew Wilcox (Oracle) xfs_discard_folio(
4416e478521SMatthew Wilcox (Oracle) 	struct folio		*folio,
4426e478521SMatthew Wilcox (Oracle) 	loff_t			pos)
443c59d87c4SChristoph Hellwig {
4447348b322SDave Chinner 	struct xfs_inode	*ip = XFS_I(folio->mapping->host);
44503625721SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
446c59d87c4SChristoph Hellwig 
44775c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
448e9c3a8e8SDarrick J. Wong 		return;
449c59d87c4SChristoph Hellwig 
4504ab45e25SChristoph Hellwig 	xfs_alert_ratelimited(mp,
4516e478521SMatthew Wilcox (Oracle) 		"page discard on page "PTR_FMT", inode 0x%llx, pos %llu.",
4526e478521SMatthew Wilcox (Oracle) 			folio, ip->i_ino, pos);
453c59d87c4SChristoph Hellwig 
4548ac5b996SDave Chinner 	/*
455c1950a11SZizhen Pang 	 * The end of the punch range is always the offset of the first
4568ac5b996SDave Chinner 	 * byte of the next folio. Hence the end offset is only dependent on the
4578ac5b996SDave Chinner 	 * folio itself and not the start offset that is passed in.
4588ac5b996SDave Chinner 	 */
459*8fe3b21eSChristoph Hellwig 	xfs_bmap_punch_delalloc_range(ip, XFS_DATA_FORK, pos,
4608ac5b996SDave Chinner 				folio_pos(folio) + folio_size(folio));
461c59d87c4SChristoph Hellwig }
462c59d87c4SChristoph Hellwig 
463598ecfbaSChristoph Hellwig static const struct iomap_writeback_ops xfs_writeback_ops = {
464598ecfbaSChristoph Hellwig 	.map_blocks		= xfs_map_blocks,
465598ecfbaSChristoph Hellwig 	.prepare_ioend		= xfs_prepare_ioend,
4666e478521SMatthew Wilcox (Oracle) 	.discard_folio		= xfs_discard_folio,
467598ecfbaSChristoph Hellwig };
468c59d87c4SChristoph Hellwig 
469c59d87c4SChristoph Hellwig STATIC int
xfs_vm_writepages(struct address_space * mapping,struct writeback_control * wbc)470c59d87c4SChristoph Hellwig xfs_vm_writepages(
471c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
472c59d87c4SChristoph Hellwig 	struct writeback_control *wbc)
473c59d87c4SChristoph Hellwig {
474be225fecSChristoph Hellwig 	struct xfs_writepage_ctx wpc = { };
475fbcc0256SDave Chinner 
476c59d87c4SChristoph Hellwig 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
477598ecfbaSChristoph Hellwig 	return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
478c59d87c4SChristoph Hellwig }
479c59d87c4SChristoph Hellwig 
4806e2608dfSDan Williams STATIC int
xfs_dax_writepages(struct address_space * mapping,struct writeback_control * wbc)4816e2608dfSDan Williams xfs_dax_writepages(
4826e2608dfSDan Williams 	struct address_space	*mapping,
4836e2608dfSDan Williams 	struct writeback_control *wbc)
4846e2608dfSDan Williams {
48530fa529eSChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(mapping->host);
48630fa529eSChristoph Hellwig 
48730fa529eSChristoph Hellwig 	xfs_iflags_clear(ip, XFS_ITRUNCATED);
4886e2608dfSDan Williams 	return dax_writeback_mapping_range(mapping,
4893f666c56SVivek Goyal 			xfs_inode_buftarg(ip)->bt_daxdev, wbc);
4906e2608dfSDan Williams }
4916e2608dfSDan Williams 
492c59d87c4SChristoph Hellwig STATIC sector_t
xfs_vm_bmap(struct address_space * mapping,sector_t block)493c59d87c4SChristoph Hellwig xfs_vm_bmap(
494c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
495c59d87c4SChristoph Hellwig 	sector_t		block)
496c59d87c4SChristoph Hellwig {
497b84e7722SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(mapping->host);
498c59d87c4SChristoph Hellwig 
499b84e7722SChristoph Hellwig 	trace_xfs_vm_bmap(ip);
500db1327b1SDarrick J. Wong 
501db1327b1SDarrick J. Wong 	/*
502db1327b1SDarrick J. Wong 	 * The swap code (ab-)uses ->bmap to get a block mapping and then
503793057e1SIngo Molnar 	 * bypasses the file system for actual I/O.  We really can't allow
504db1327b1SDarrick J. Wong 	 * that on reflinks inodes, so we have to skip out here.  And yes,
505eb5e248dSDarrick J. Wong 	 * 0 is the magic code for a bmap error.
506eb5e248dSDarrick J. Wong 	 *
507eb5e248dSDarrick J. Wong 	 * Since we don't pass back blockdev info, we can't return bmap
508eb5e248dSDarrick J. Wong 	 * information for rt files either.
509db1327b1SDarrick J. Wong 	 */
51066ae56a5SChristoph Hellwig 	if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip))
511db1327b1SDarrick J. Wong 		return 0;
512690c2a38SChristoph Hellwig 	return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
513c59d87c4SChristoph Hellwig }
514c59d87c4SChristoph Hellwig 
515c59d87c4SChristoph Hellwig STATIC int
xfs_vm_read_folio(struct file * unused,struct folio * folio)5167479c505SMatthew Wilcox (Oracle) xfs_vm_read_folio(
517c59d87c4SChristoph Hellwig 	struct file		*unused,
5187479c505SMatthew Wilcox (Oracle) 	struct folio		*folio)
519c59d87c4SChristoph Hellwig {
5207479c505SMatthew Wilcox (Oracle) 	return iomap_read_folio(folio, &xfs_read_iomap_ops);
521c59d87c4SChristoph Hellwig }
522c59d87c4SChristoph Hellwig 
5239d24a13aSMatthew Wilcox (Oracle) STATIC void
xfs_vm_readahead(struct readahead_control * rac)5249d24a13aSMatthew Wilcox (Oracle) xfs_vm_readahead(
5259d24a13aSMatthew Wilcox (Oracle) 	struct readahead_control	*rac)
526c59d87c4SChristoph Hellwig {
5279d24a13aSMatthew Wilcox (Oracle) 	iomap_readahead(rac, &xfs_read_iomap_ops);
52822e757a4SDave Chinner }
52922e757a4SDave Chinner 
53067482129SDarrick J. Wong static int
xfs_iomap_swapfile_activate(struct swap_info_struct * sis,struct file * swap_file,sector_t * span)53167482129SDarrick J. Wong xfs_iomap_swapfile_activate(
53267482129SDarrick J. Wong 	struct swap_info_struct		*sis,
53367482129SDarrick J. Wong 	struct file			*swap_file,
53467482129SDarrick J. Wong 	sector_t			*span)
53567482129SDarrick J. Wong {
53630fa529eSChristoph Hellwig 	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
537690c2a38SChristoph Hellwig 	return iomap_swapfile_activate(sis, swap_file, span,
538690c2a38SChristoph Hellwig 			&xfs_read_iomap_ops);
53967482129SDarrick J. Wong }
54067482129SDarrick J. Wong 
541c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = {
5427479c505SMatthew Wilcox (Oracle) 	.read_folio		= xfs_vm_read_folio,
5439d24a13aSMatthew Wilcox (Oracle) 	.readahead		= xfs_vm_readahead,
544c59d87c4SChristoph Hellwig 	.writepages		= xfs_vm_writepages,
5454ce02c67SRitesh Harjani (IBM) 	.dirty_folio		= iomap_dirty_folio,
5468597447dSMatthew Wilcox (Oracle) 	.release_folio		= iomap_release_folio,
547d82354f6SMatthew Wilcox (Oracle) 	.invalidate_folio	= iomap_invalidate_folio,
548c59d87c4SChristoph Hellwig 	.bmap			= xfs_vm_bmap,
5492ec810d5SMatthew Wilcox (Oracle) 	.migrate_folio		= filemap_migrate_folio,
55082cb1417SChristoph Hellwig 	.is_partially_uptodate  = iomap_is_partially_uptodate,
551af7628d6SMatthew Wilcox (Oracle) 	.error_remove_folio	= generic_error_remove_folio,
55267482129SDarrick J. Wong 	.swap_activate		= xfs_iomap_swapfile_activate,
553c59d87c4SChristoph Hellwig };
5546e2608dfSDan Williams 
5556e2608dfSDan Williams const struct address_space_operations xfs_dax_aops = {
5566e2608dfSDan Williams 	.writepages		= xfs_dax_writepages,
55746de8b97SMatthew Wilcox (Oracle) 	.dirty_folio		= noop_dirty_folio,
55867482129SDarrick J. Wong 	.swap_activate		= xfs_iomap_swapfile_activate,
5596e2608dfSDan Williams };
560