xref: /linux/fs/xfs/xfs_aops.c (revision 4ce02c67972211be488408c275c8fbf19faf29b3)
10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
2c59d87c4SChristoph Hellwig /*
3c59d87c4SChristoph Hellwig  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
498c1a7c0SChristoph Hellwig  * Copyright (c) 2016-2018 Christoph Hellwig.
5c59d87c4SChristoph Hellwig  * All Rights Reserved.
6c59d87c4SChristoph Hellwig  */
7c59d87c4SChristoph Hellwig #include "xfs.h"
870a9883cSDave Chinner #include "xfs_shared.h"
9239880efSDave Chinner #include "xfs_format.h"
10239880efSDave Chinner #include "xfs_log_format.h"
11239880efSDave Chinner #include "xfs_trans_resv.h"
12c59d87c4SChristoph Hellwig #include "xfs_mount.h"
13c59d87c4SChristoph Hellwig #include "xfs_inode.h"
14239880efSDave Chinner #include "xfs_trans.h"
15c59d87c4SChristoph Hellwig #include "xfs_iomap.h"
16c59d87c4SChristoph Hellwig #include "xfs_trace.h"
17c59d87c4SChristoph Hellwig #include "xfs_bmap.h"
1868988114SDave Chinner #include "xfs_bmap_util.h"
19ef473667SDarrick J. Wong #include "xfs_reflink.h"
20c2beff99SDarrick J. Wong #include "xfs_errortag.h"
21c2beff99SDarrick J. Wong #include "xfs_error.h"
22c59d87c4SChristoph Hellwig 
23fbcc0256SDave Chinner struct xfs_writepage_ctx {
24598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx ctx;
25d9252d52SBrian Foster 	unsigned int		data_seq;
26e666aa37SChristoph Hellwig 	unsigned int		cow_seq;
27fbcc0256SDave Chinner };
28fbcc0256SDave Chinner 
29598ecfbaSChristoph Hellwig static inline struct xfs_writepage_ctx *
30598ecfbaSChristoph Hellwig XFS_WPC(struct iomap_writepage_ctx *ctx)
31598ecfbaSChristoph Hellwig {
32598ecfbaSChristoph Hellwig 	return container_of(ctx, struct xfs_writepage_ctx, ctx);
33598ecfbaSChristoph Hellwig }
34598ecfbaSChristoph Hellwig 
35c59d87c4SChristoph Hellwig /*
36fc0063c4SChristoph Hellwig  * Fast and loose check if this write could update the on-disk inode size.
37fc0063c4SChristoph Hellwig  */
38598ecfbaSChristoph Hellwig static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend)
39fc0063c4SChristoph Hellwig {
40fc0063c4SChristoph Hellwig 	return ioend->io_offset + ioend->io_size >
4113d2c10bSChristoph Hellwig 		XFS_I(ioend->io_inode)->i_disk_size;
42fc0063c4SChristoph Hellwig }
43fc0063c4SChristoph Hellwig 
44fc0063c4SChristoph Hellwig /*
452813d682SChristoph Hellwig  * Update on-disk file size now that data has been written to disk.
46c59d87c4SChristoph Hellwig  */
47e7a3d7e7SBrian Foster int
48e7a3d7e7SBrian Foster xfs_setfilesize(
492ba66237SChristoph Hellwig 	struct xfs_inode	*ip,
502ba66237SChristoph Hellwig 	xfs_off_t		offset,
512ba66237SChristoph Hellwig 	size_t			size)
52c59d87c4SChristoph Hellwig {
53e7a3d7e7SBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
54e7a3d7e7SBrian Foster 	struct xfs_trans	*tp;
55c59d87c4SChristoph Hellwig 	xfs_fsize_t		isize;
56e7a3d7e7SBrian Foster 	int			error;
57e7a3d7e7SBrian Foster 
58e7a3d7e7SBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
59e7a3d7e7SBrian Foster 	if (error)
60e7a3d7e7SBrian Foster 		return error;
61c59d87c4SChristoph Hellwig 
62aa6bf01dSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
632ba66237SChristoph Hellwig 	isize = xfs_new_eof(ip, offset + size);
64281627dfSChristoph Hellwig 	if (!isize) {
65281627dfSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
664906e215SChristoph Hellwig 		xfs_trans_cancel(tp);
67281627dfSChristoph Hellwig 		return 0;
68c59d87c4SChristoph Hellwig 	}
69c59d87c4SChristoph Hellwig 
702ba66237SChristoph Hellwig 	trace_xfs_setfilesize(ip, offset, size);
71281627dfSChristoph Hellwig 
7213d2c10bSChristoph Hellwig 	ip->i_disk_size = isize;
73281627dfSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
74281627dfSChristoph Hellwig 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
75281627dfSChristoph Hellwig 
7670393313SChristoph Hellwig 	return xfs_trans_commit(tp);
77c59d87c4SChristoph Hellwig }
78c59d87c4SChristoph Hellwig 
79c59d87c4SChristoph Hellwig /*
80c59d87c4SChristoph Hellwig  * IO write completion.
81c59d87c4SChristoph Hellwig  */
82c59d87c4SChristoph Hellwig STATIC void
83cb357bf3SDarrick J. Wong xfs_end_ioend(
84598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend)
85c59d87c4SChristoph Hellwig {
86c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
875ca5916bSBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
88787eb485SChristoph Hellwig 	xfs_off_t		offset = ioend->io_offset;
89787eb485SChristoph Hellwig 	size_t			size = ioend->io_size;
9073d30d48SChristoph Hellwig 	unsigned int		nofs_flag;
914e4cbee9SChristoph Hellwig 	int			error;
92c59d87c4SChristoph Hellwig 
93af055e37SBrian Foster 	/*
9473d30d48SChristoph Hellwig 	 * We can allocate memory here while doing writeback on behalf of
9573d30d48SChristoph Hellwig 	 * memory reclaim.  To avoid memory allocation deadlocks set the
9673d30d48SChristoph Hellwig 	 * task-wide nofs context for the following operations.
9773d30d48SChristoph Hellwig 	 */
9873d30d48SChristoph Hellwig 	nofs_flag = memalloc_nofs_save();
9973d30d48SChristoph Hellwig 
10073d30d48SChristoph Hellwig 	/*
101f9dd7ba4SBhaskar Chowdhury 	 * Just clean up the in-memory structures if the fs has been shut down.
102af055e37SBrian Foster 	 */
1035ca5916bSBrian Foster 	if (xfs_is_shutdown(mp)) {
1040e51a8e1SChristoph Hellwig 		error = -EIO;
10543caeb18SDarrick J. Wong 		goto done;
10643caeb18SDarrick J. Wong 	}
10743caeb18SDarrick J. Wong 
10843caeb18SDarrick J. Wong 	/*
1095ca5916bSBrian Foster 	 * Clean up all COW blocks and underlying data fork delalloc blocks on
1105ca5916bSBrian Foster 	 * I/O error. The delalloc punch is required because this ioend was
1115ca5916bSBrian Foster 	 * mapped to blocks in the COW fork and the associated pages are no
1125ca5916bSBrian Foster 	 * longer dirty. If we don't remove delalloc blocks here, they become
1135ca5916bSBrian Foster 	 * stale and can corrupt free space accounting on unmount.
114c59d87c4SChristoph Hellwig 	 */
1154e4cbee9SChristoph Hellwig 	error = blk_status_to_errno(ioend->io_bio->bi_status);
116787eb485SChristoph Hellwig 	if (unlikely(error)) {
1175ca5916bSBrian Foster 		if (ioend->io_flags & IOMAP_F_SHARED) {
118787eb485SChristoph Hellwig 			xfs_reflink_cancel_cow_range(ip, offset, size, true);
1197348b322SDave Chinner 			xfs_bmap_punch_delalloc_range(ip, offset,
1207348b322SDave Chinner 					offset + size);
1215ca5916bSBrian Foster 		}
1225cb13dcdSZhaohongjiang 		goto done;
123787eb485SChristoph Hellwig 	}
124787eb485SChristoph Hellwig 
125787eb485SChristoph Hellwig 	/*
126787eb485SChristoph Hellwig 	 * Success: commit the COW or unwritten blocks if needed.
127787eb485SChristoph Hellwig 	 */
128760fea8bSChristoph Hellwig 	if (ioend->io_flags & IOMAP_F_SHARED)
129787eb485SChristoph Hellwig 		error = xfs_reflink_end_cow(ip, offset, size);
1304e087a3bSChristoph Hellwig 	else if (ioend->io_type == IOMAP_UNWRITTEN)
131ee70daabSEryu Guan 		error = xfs_iomap_write_unwritten(ip, offset, size, false);
13284803fb7SChristoph Hellwig 
1337cd3099fSBrian Foster 	if (!error && xfs_ioend_is_append(ioend))
1347cd3099fSBrian Foster 		error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
13504f658eeSChristoph Hellwig done:
136598ecfbaSChristoph Hellwig 	iomap_finish_ioends(ioend, error);
13773d30d48SChristoph Hellwig 	memalloc_nofs_restore(nofs_flag);
1383994fc48SDarrick J. Wong }
1393994fc48SDarrick J. Wong 
140ebb7fb15SDave Chinner /*
141ebb7fb15SDave Chinner  * Finish all pending IO completions that require transactional modifications.
142ebb7fb15SDave Chinner  *
143ebb7fb15SDave Chinner  * We try to merge physical and logically contiguous ioends before completion to
144ebb7fb15SDave Chinner  * minimise the number of transactions we need to perform during IO completion.
145ebb7fb15SDave Chinner  * Both unwritten extent conversion and COW remapping need to iterate and modify
146ebb7fb15SDave Chinner  * one physical extent at a time, so we gain nothing by merging physically
147ebb7fb15SDave Chinner  * discontiguous extents here.
148ebb7fb15SDave Chinner  *
149ebb7fb15SDave Chinner  * The ioend chain length that we can be processing here is largely unbound in
150ebb7fb15SDave Chinner  * length and we may have to perform significant amounts of work on each ioend
151ebb7fb15SDave Chinner  * to complete it. Hence we have to be careful about holding the CPU for too
152ebb7fb15SDave Chinner  * long in this loop.
153ebb7fb15SDave Chinner  */
154cb357bf3SDarrick J. Wong void
155cb357bf3SDarrick J. Wong xfs_end_io(
156cb357bf3SDarrick J. Wong 	struct work_struct	*work)
157cb357bf3SDarrick J. Wong {
158433dad94SChristoph Hellwig 	struct xfs_inode	*ip =
159433dad94SChristoph Hellwig 		container_of(work, struct xfs_inode, i_ioend_work);
160598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend;
161433dad94SChristoph Hellwig 	struct list_head	tmp;
162cb357bf3SDarrick J. Wong 	unsigned long		flags;
163cb357bf3SDarrick J. Wong 
164cb357bf3SDarrick J. Wong 	spin_lock_irqsave(&ip->i_ioend_lock, flags);
165433dad94SChristoph Hellwig 	list_replace_init(&ip->i_ioend_list, &tmp);
166cb357bf3SDarrick J. Wong 	spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
167cb357bf3SDarrick J. Wong 
168598ecfbaSChristoph Hellwig 	iomap_sort_ioends(&tmp);
169598ecfbaSChristoph Hellwig 	while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
170433dad94SChristoph Hellwig 			io_list))) {
171cb357bf3SDarrick J. Wong 		list_del_init(&ioend->io_list);
1726e552494SBrian Foster 		iomap_ioend_try_merge(ioend, &tmp);
173cb357bf3SDarrick J. Wong 		xfs_end_ioend(ioend);
174ebb7fb15SDave Chinner 		cond_resched();
175cb357bf3SDarrick J. Wong 	}
176cb357bf3SDarrick J. Wong }
177cb357bf3SDarrick J. Wong 
1780e51a8e1SChristoph Hellwig STATIC void
1790e51a8e1SChristoph Hellwig xfs_end_bio(
1800e51a8e1SChristoph Hellwig 	struct bio		*bio)
181c59d87c4SChristoph Hellwig {
182598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend = bio->bi_private;
183cb357bf3SDarrick J. Wong 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
184cb357bf3SDarrick J. Wong 	unsigned long		flags;
185c59d87c4SChristoph Hellwig 
186cb357bf3SDarrick J. Wong 	spin_lock_irqsave(&ip->i_ioend_lock, flags);
187cb357bf3SDarrick J. Wong 	if (list_empty(&ip->i_ioend_list))
188598ecfbaSChristoph Hellwig 		WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue,
189cb357bf3SDarrick J. Wong 					 &ip->i_ioend_work));
190cb357bf3SDarrick J. Wong 	list_add_tail(&ioend->io_list, &ip->i_ioend_list);
191cb357bf3SDarrick J. Wong 	spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
192c59d87c4SChristoph Hellwig }
193c59d87c4SChristoph Hellwig 
194d9252d52SBrian Foster /*
195d9252d52SBrian Foster  * Fast revalidation of the cached writeback mapping. Return true if the current
196d9252d52SBrian Foster  * mapping is valid, false otherwise.
197d9252d52SBrian Foster  */
198d9252d52SBrian Foster static bool
199d9252d52SBrian Foster xfs_imap_valid(
200598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx	*wpc,
201d9252d52SBrian Foster 	struct xfs_inode		*ip,
2024e087a3bSChristoph Hellwig 	loff_t				offset)
203d9252d52SBrian Foster {
2044e087a3bSChristoph Hellwig 	if (offset < wpc->iomap.offset ||
2054e087a3bSChristoph Hellwig 	    offset >= wpc->iomap.offset + wpc->iomap.length)
206d9252d52SBrian Foster 		return false;
207d9252d52SBrian Foster 	/*
208d9252d52SBrian Foster 	 * If this is a COW mapping, it is sufficient to check that the mapping
209d9252d52SBrian Foster 	 * covers the offset. Be careful to check this first because the caller
210d9252d52SBrian Foster 	 * can revalidate a COW mapping without updating the data seqno.
211d9252d52SBrian Foster 	 */
212760fea8bSChristoph Hellwig 	if (wpc->iomap.flags & IOMAP_F_SHARED)
213d9252d52SBrian Foster 		return true;
214d9252d52SBrian Foster 
215d9252d52SBrian Foster 	/*
216d9252d52SBrian Foster 	 * This is not a COW mapping. Check the sequence number of the data fork
217d9252d52SBrian Foster 	 * because concurrent changes could have invalidated the extent. Check
218d9252d52SBrian Foster 	 * the COW fork because concurrent changes since the last time we
219d9252d52SBrian Foster 	 * checked (and found nothing at this offset) could have added
220d9252d52SBrian Foster 	 * overlapping blocks.
221d9252d52SBrian Foster 	 */
222c2beff99SDarrick J. Wong 	if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) {
223c2beff99SDarrick J. Wong 		trace_xfs_wb_data_iomap_invalid(ip, &wpc->iomap,
224c2beff99SDarrick J. Wong 				XFS_WPC(wpc)->data_seq, XFS_DATA_FORK);
225d9252d52SBrian Foster 		return false;
226c2beff99SDarrick J. Wong 	}
227d9252d52SBrian Foster 	if (xfs_inode_has_cow_data(ip) &&
228c2beff99SDarrick J. Wong 	    XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) {
229c2beff99SDarrick J. Wong 		trace_xfs_wb_cow_iomap_invalid(ip, &wpc->iomap,
230c2beff99SDarrick J. Wong 				XFS_WPC(wpc)->cow_seq, XFS_COW_FORK);
231d9252d52SBrian Foster 		return false;
232c2beff99SDarrick J. Wong 	}
233d9252d52SBrian Foster 	return true;
234d9252d52SBrian Foster }
235d9252d52SBrian Foster 
2364ad765edSChristoph Hellwig /*
2374ad765edSChristoph Hellwig  * Pass in a dellalloc extent and convert it to real extents, return the real
2384e087a3bSChristoph Hellwig  * extent that maps offset_fsb in wpc->iomap.
2394ad765edSChristoph Hellwig  *
2404ad765edSChristoph Hellwig  * The current page is held locked so nothing could have removed the block
2417588cbeeSChristoph Hellwig  * backing offset_fsb, although it could have moved from the COW to the data
2427588cbeeSChristoph Hellwig  * fork by another thread.
2434ad765edSChristoph Hellwig  */
2444ad765edSChristoph Hellwig static int
2454ad765edSChristoph Hellwig xfs_convert_blocks(
246598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx *wpc,
2474ad765edSChristoph Hellwig 	struct xfs_inode	*ip,
248760fea8bSChristoph Hellwig 	int			whichfork,
2494e087a3bSChristoph Hellwig 	loff_t			offset)
2504ad765edSChristoph Hellwig {
2514ad765edSChristoph Hellwig 	int			error;
252598ecfbaSChristoph Hellwig 	unsigned		*seq;
253598ecfbaSChristoph Hellwig 
254598ecfbaSChristoph Hellwig 	if (whichfork == XFS_COW_FORK)
255598ecfbaSChristoph Hellwig 		seq = &XFS_WPC(wpc)->cow_seq;
256598ecfbaSChristoph Hellwig 	else
257598ecfbaSChristoph Hellwig 		seq = &XFS_WPC(wpc)->data_seq;
2584ad765edSChristoph Hellwig 
2594ad765edSChristoph Hellwig 	/*
2604e087a3bSChristoph Hellwig 	 * Attempt to allocate whatever delalloc extent currently backs offset
2614e087a3bSChristoph Hellwig 	 * and put the result into wpc->iomap.  Allocate in a loop because it
2624e087a3bSChristoph Hellwig 	 * may take several attempts to allocate real blocks for a contiguous
2634e087a3bSChristoph Hellwig 	 * delalloc extent if free space is sufficiently fragmented.
2644ad765edSChristoph Hellwig 	 */
2654ad765edSChristoph Hellwig 	do {
266760fea8bSChristoph Hellwig 		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
267598ecfbaSChristoph Hellwig 				&wpc->iomap, seq);
2684ad765edSChristoph Hellwig 		if (error)
2694ad765edSChristoph Hellwig 			return error;
2704e087a3bSChristoph Hellwig 	} while (wpc->iomap.offset + wpc->iomap.length <= offset);
2714ad765edSChristoph Hellwig 
2724ad765edSChristoph Hellwig 	return 0;
2734ad765edSChristoph Hellwig }
2744ad765edSChristoph Hellwig 
275598ecfbaSChristoph Hellwig static int
276c59d87c4SChristoph Hellwig xfs_map_blocks(
277598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx *wpc,
278c59d87c4SChristoph Hellwig 	struct inode		*inode,
2795c665e5bSChristoph Hellwig 	loff_t			offset)
280c59d87c4SChristoph Hellwig {
281c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
282c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
28393407472SFabian Frederick 	ssize_t			count = i_blocksize(inode);
284b4e29032SChristoph Hellwig 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
285b4e29032SChristoph Hellwig 	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
286c2f09217SDarrick J. Wong 	xfs_fileoff_t		cow_fsb;
287c2f09217SDarrick J. Wong 	int			whichfork;
2885c665e5bSChristoph Hellwig 	struct xfs_bmbt_irec	imap;
289060d4eaaSChristoph Hellwig 	struct xfs_iext_cursor	icur;
2907588cbeeSChristoph Hellwig 	int			retries = 0;
291c59d87c4SChristoph Hellwig 	int			error = 0;
292c59d87c4SChristoph Hellwig 
29375c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
294d9252d52SBrian Foster 		return -EIO;
295d9252d52SBrian Foster 
296c2beff99SDarrick J. Wong 	XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS);
297c2beff99SDarrick J. Wong 
298889c65b3SChristoph Hellwig 	/*
299889c65b3SChristoph Hellwig 	 * COW fork blocks can overlap data fork blocks even if the blocks
300889c65b3SChristoph Hellwig 	 * aren't shared.  COW I/O always takes precedent, so we must always
301889c65b3SChristoph Hellwig 	 * check for overlap on reflink inodes unless the mapping is already a
302e666aa37SChristoph Hellwig 	 * COW one, or the COW fork hasn't changed from the last time we looked
303e666aa37SChristoph Hellwig 	 * at it.
304e666aa37SChristoph Hellwig 	 *
305e666aa37SChristoph Hellwig 	 * It's safe to check the COW fork if_seq here without the ILOCK because
306e666aa37SChristoph Hellwig 	 * we've indirectly protected against concurrent updates: writeback has
307e666aa37SChristoph Hellwig 	 * the page locked, which prevents concurrent invalidations by reflink
308e666aa37SChristoph Hellwig 	 * and directio and prevents concurrent buffered writes to the same
309e666aa37SChristoph Hellwig 	 * page.  Changes to if_seq always happen under i_lock, which protects
310e666aa37SChristoph Hellwig 	 * against concurrent updates and provides a memory barrier on the way
311e666aa37SChristoph Hellwig 	 * out that ensures that we always see the current value.
312889c65b3SChristoph Hellwig 	 */
3134e087a3bSChristoph Hellwig 	if (xfs_imap_valid(wpc, ip, offset))
314889c65b3SChristoph Hellwig 		return 0;
315889c65b3SChristoph Hellwig 
316889c65b3SChristoph Hellwig 	/*
317889c65b3SChristoph Hellwig 	 * If we don't have a valid map, now it's time to get a new one for this
318889c65b3SChristoph Hellwig 	 * offset.  This will convert delayed allocations (including COW ones)
319889c65b3SChristoph Hellwig 	 * into real extents.  If we return without a valid map, it means we
320889c65b3SChristoph Hellwig 	 * landed in a hole and we skip the block.
321889c65b3SChristoph Hellwig 	 */
3227588cbeeSChristoph Hellwig retry:
323c2f09217SDarrick J. Wong 	cow_fsb = NULLFILEOFF;
324c2f09217SDarrick J. Wong 	whichfork = XFS_DATA_FORK;
325c59d87c4SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_SHARED);
326b2197a36SChristoph Hellwig 	ASSERT(!xfs_need_iread_extents(&ip->i_df));
327060d4eaaSChristoph Hellwig 
328060d4eaaSChristoph Hellwig 	/*
329060d4eaaSChristoph Hellwig 	 * Check if this is offset is covered by a COW extents, and if yes use
330060d4eaaSChristoph Hellwig 	 * it directly instead of looking up anything in the data fork.
331060d4eaaSChristoph Hellwig 	 */
33251d62690SChristoph Hellwig 	if (xfs_inode_has_cow_data(ip) &&
333e666aa37SChristoph Hellwig 	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
334e666aa37SChristoph Hellwig 		cow_fsb = imap.br_startoff;
335e666aa37SChristoph Hellwig 	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
336598ecfbaSChristoph Hellwig 		XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
3375c665e5bSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
338be225fecSChristoph Hellwig 
339760fea8bSChristoph Hellwig 		whichfork = XFS_COW_FORK;
3405c665e5bSChristoph Hellwig 		goto allocate_blocks;
3415c665e5bSChristoph Hellwig 	}
3425c665e5bSChristoph Hellwig 
3435c665e5bSChristoph Hellwig 	/*
344d9252d52SBrian Foster 	 * No COW extent overlap. Revalidate now that we may have updated
345d9252d52SBrian Foster 	 * ->cow_seq. If the data mapping is still valid, we're done.
3465c665e5bSChristoph Hellwig 	 */
3474e087a3bSChristoph Hellwig 	if (xfs_imap_valid(wpc, ip, offset)) {
3485c665e5bSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
3495c665e5bSChristoph Hellwig 		return 0;
3505c665e5bSChristoph Hellwig 	}
3515c665e5bSChristoph Hellwig 
3525c665e5bSChristoph Hellwig 	/*
3535c665e5bSChristoph Hellwig 	 * If we don't have a valid map, now it's time to get a new one for this
3545c665e5bSChristoph Hellwig 	 * offset.  This will convert delayed allocations (including COW ones)
3555c665e5bSChristoph Hellwig 	 * into real extents.
3565c665e5bSChristoph Hellwig 	 */
3573345746eSChristoph Hellwig 	if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap))
3583345746eSChristoph Hellwig 		imap.br_startoff = end_fsb;	/* fake a hole past EOF */
359598ecfbaSChristoph Hellwig 	XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq);
360c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
361c59d87c4SChristoph Hellwig 
36212df89f2SChristoph Hellwig 	/* landed in a hole or beyond EOF? */
3633345746eSChristoph Hellwig 	if (imap.br_startoff > offset_fsb) {
3643345746eSChristoph Hellwig 		imap.br_blockcount = imap.br_startoff - offset_fsb;
3655c665e5bSChristoph Hellwig 		imap.br_startoff = offset_fsb;
3665c665e5bSChristoph Hellwig 		imap.br_startblock = HOLESTARTBLOCK;
367be225fecSChristoph Hellwig 		imap.br_state = XFS_EXT_NORM;
36812df89f2SChristoph Hellwig 	}
36912df89f2SChristoph Hellwig 
370e666aa37SChristoph Hellwig 	/*
37112df89f2SChristoph Hellwig 	 * Truncate to the next COW extent if there is one.  This is the only
37212df89f2SChristoph Hellwig 	 * opportunity to do this because we can skip COW fork lookups for the
37312df89f2SChristoph Hellwig 	 * subsequent blocks in the mapping; however, the requirement to treat
37412df89f2SChristoph Hellwig 	 * the COW range separately remains.
375e666aa37SChristoph Hellwig 	 */
376e666aa37SChristoph Hellwig 	if (cow_fsb != NULLFILEOFF &&
377e666aa37SChristoph Hellwig 	    cow_fsb < imap.br_startoff + imap.br_blockcount)
378e666aa37SChristoph Hellwig 		imap.br_blockcount = cow_fsb - imap.br_startoff;
379e666aa37SChristoph Hellwig 
380be225fecSChristoph Hellwig 	/* got a delalloc extent? */
38112df89f2SChristoph Hellwig 	if (imap.br_startblock != HOLESTARTBLOCK &&
38212df89f2SChristoph Hellwig 	    isnullstartblock(imap.br_startblock))
3835c665e5bSChristoph Hellwig 		goto allocate_blocks;
384e2f6ad46SDave Chinner 
385304a68b9SDave Chinner 	xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq);
386760fea8bSChristoph Hellwig 	trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
3875c665e5bSChristoph Hellwig 	return 0;
3885c665e5bSChristoph Hellwig allocate_blocks:
389760fea8bSChristoph Hellwig 	error = xfs_convert_blocks(wpc, ip, whichfork, offset);
3907588cbeeSChristoph Hellwig 	if (error) {
3917588cbeeSChristoph Hellwig 		/*
3927588cbeeSChristoph Hellwig 		 * If we failed to find the extent in the COW fork we might have
3937588cbeeSChristoph Hellwig 		 * raced with a COW to data fork conversion or truncate.
3947588cbeeSChristoph Hellwig 		 * Restart the lookup to catch the extent in the data fork for
3957588cbeeSChristoph Hellwig 		 * the former case, but prevent additional retries to avoid
3967588cbeeSChristoph Hellwig 		 * looping forever for the latter case.
3977588cbeeSChristoph Hellwig 		 */
398760fea8bSChristoph Hellwig 		if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++)
3997588cbeeSChristoph Hellwig 			goto retry;
4007588cbeeSChristoph Hellwig 		ASSERT(error != -EAGAIN);
4015c665e5bSChristoph Hellwig 		return error;
4027588cbeeSChristoph Hellwig 	}
4034ad765edSChristoph Hellwig 
4044ad765edSChristoph Hellwig 	/*
4054ad765edSChristoph Hellwig 	 * Due to merging the return real extent might be larger than the
4064ad765edSChristoph Hellwig 	 * original delalloc one.  Trim the return extent to the next COW
4074ad765edSChristoph Hellwig 	 * boundary again to force a re-lookup.
4084ad765edSChristoph Hellwig 	 */
409760fea8bSChristoph Hellwig 	if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) {
4104e087a3bSChristoph Hellwig 		loff_t		cow_offset = XFS_FSB_TO_B(mp, cow_fsb);
4114ad765edSChristoph Hellwig 
4124e087a3bSChristoph Hellwig 		if (cow_offset < wpc->iomap.offset + wpc->iomap.length)
4134e087a3bSChristoph Hellwig 			wpc->iomap.length = cow_offset - wpc->iomap.offset;
4144e087a3bSChristoph Hellwig 	}
4154e087a3bSChristoph Hellwig 
4164e087a3bSChristoph Hellwig 	ASSERT(wpc->iomap.offset <= offset);
4174e087a3bSChristoph Hellwig 	ASSERT(wpc->iomap.offset + wpc->iomap.length > offset);
418760fea8bSChristoph Hellwig 	trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap);
419c59d87c4SChristoph Hellwig 	return 0;
420c59d87c4SChristoph Hellwig }
421c59d87c4SChristoph Hellwig 
422598ecfbaSChristoph Hellwig static int
423598ecfbaSChristoph Hellwig xfs_prepare_ioend(
424598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend,
425e10de372SDave Chinner 	int			status)
426c59d87c4SChristoph Hellwig {
42773d30d48SChristoph Hellwig 	unsigned int		nofs_flag;
42873d30d48SChristoph Hellwig 
42973d30d48SChristoph Hellwig 	/*
43073d30d48SChristoph Hellwig 	 * We can allocate memory here while doing writeback on behalf of
43173d30d48SChristoph Hellwig 	 * memory reclaim.  To avoid memory allocation deadlocks set the
43273d30d48SChristoph Hellwig 	 * task-wide nofs context for the following operations.
43373d30d48SChristoph Hellwig 	 */
43473d30d48SChristoph Hellwig 	nofs_flag = memalloc_nofs_save();
43573d30d48SChristoph Hellwig 
4365eda4300SDarrick J. Wong 	/* Convert CoW extents to regular */
437760fea8bSChristoph Hellwig 	if (!status && (ioend->io_flags & IOMAP_F_SHARED)) {
4385eda4300SDarrick J. Wong 		status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
4395eda4300SDarrick J. Wong 				ioend->io_offset, ioend->io_size);
4405eda4300SDarrick J. Wong 	}
4415eda4300SDarrick J. Wong 
44273d30d48SChristoph Hellwig 	memalloc_nofs_restore(nofs_flag);
44373d30d48SChristoph Hellwig 
4447adb8f14SBrian Foster 	/* send ioends that might require a transaction to the completion wq */
4457adb8f14SBrian Foster 	if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN ||
4467adb8f14SBrian Foster 	    (ioend->io_flags & IOMAP_F_SHARED))
4470e51a8e1SChristoph Hellwig 		ioend->io_bio->bi_end_io = xfs_end_bio;
448e10de372SDave Chinner 	return status;
4497bf7f352SDave Chinner }
4507bf7f352SDave Chinner 
451c59d87c4SChristoph Hellwig /*
4528ac5b996SDave Chinner  * If the folio has delalloc blocks on it, the caller is asking us to punch them
4538ac5b996SDave Chinner  * out. If we don't, we can leave a stale delalloc mapping covered by a clean
4548ac5b996SDave Chinner  * page that needs to be dirtied again before the delalloc mapping can be
4558ac5b996SDave Chinner  * converted. This stale delalloc mapping can trip up a later direct I/O read
4568ac5b996SDave Chinner  * operation on the same region.
457c59d87c4SChristoph Hellwig  *
4588ac5b996SDave Chinner  * We prevent this by truncating away the delalloc regions on the folio. Because
45982cb1417SChristoph Hellwig  * they are delalloc, we can do this without needing a transaction. Indeed - if
46082cb1417SChristoph Hellwig  * we get ENOSPC errors, we have to be able to do this truncation without a
4618ac5b996SDave Chinner  * transaction as there is no space left for block reservation (typically why
4628ac5b996SDave Chinner  * we see a ENOSPC in writeback).
463c59d87c4SChristoph Hellwig  */
464598ecfbaSChristoph Hellwig static void
4656e478521SMatthew Wilcox (Oracle) xfs_discard_folio(
4666e478521SMatthew Wilcox (Oracle) 	struct folio		*folio,
4676e478521SMatthew Wilcox (Oracle) 	loff_t			pos)
468c59d87c4SChristoph Hellwig {
4697348b322SDave Chinner 	struct xfs_inode	*ip = XFS_I(folio->mapping->host);
47003625721SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
47103625721SChristoph Hellwig 	int			error;
472c59d87c4SChristoph Hellwig 
47375c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
474e9c3a8e8SDarrick J. Wong 		return;
475c59d87c4SChristoph Hellwig 
4764ab45e25SChristoph Hellwig 	xfs_alert_ratelimited(mp,
4776e478521SMatthew Wilcox (Oracle) 		"page discard on page "PTR_FMT", inode 0x%llx, pos %llu.",
4786e478521SMatthew Wilcox (Oracle) 			folio, ip->i_ino, pos);
479c59d87c4SChristoph Hellwig 
4808ac5b996SDave Chinner 	/*
4818ac5b996SDave Chinner 	 * The end of the punch range is always the offset of the the first
4828ac5b996SDave Chinner 	 * byte of the next folio. Hence the end offset is only dependent on the
4838ac5b996SDave Chinner 	 * folio itself and not the start offset that is passed in.
4848ac5b996SDave Chinner 	 */
4857348b322SDave Chinner 	error = xfs_bmap_punch_delalloc_range(ip, pos,
4868ac5b996SDave Chinner 				folio_pos(folio) + folio_size(folio));
4877348b322SDave Chinner 
48875c8c50fSDave Chinner 	if (error && !xfs_is_shutdown(mp))
48903625721SChristoph Hellwig 		xfs_alert(mp, "page discard unable to remove delalloc mapping.");
490c59d87c4SChristoph Hellwig }
491c59d87c4SChristoph Hellwig 
492598ecfbaSChristoph Hellwig static const struct iomap_writeback_ops xfs_writeback_ops = {
493598ecfbaSChristoph Hellwig 	.map_blocks		= xfs_map_blocks,
494598ecfbaSChristoph Hellwig 	.prepare_ioend		= xfs_prepare_ioend,
4956e478521SMatthew Wilcox (Oracle) 	.discard_folio		= xfs_discard_folio,
496598ecfbaSChristoph Hellwig };
497c59d87c4SChristoph Hellwig 
498c59d87c4SChristoph Hellwig STATIC int
499c59d87c4SChristoph Hellwig xfs_vm_writepages(
500c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
501c59d87c4SChristoph Hellwig 	struct writeback_control *wbc)
502c59d87c4SChristoph Hellwig {
503be225fecSChristoph Hellwig 	struct xfs_writepage_ctx wpc = { };
504fbcc0256SDave Chinner 
505756b1c34SDave Chinner 	/*
506756b1c34SDave Chinner 	 * Writing back data in a transaction context can result in recursive
507756b1c34SDave Chinner 	 * transactions. This is bad, so issue a warning and get out of here.
508756b1c34SDave Chinner 	 */
509756b1c34SDave Chinner 	if (WARN_ON_ONCE(current->journal_info))
510756b1c34SDave Chinner 		return 0;
511756b1c34SDave Chinner 
512c59d87c4SChristoph Hellwig 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
513598ecfbaSChristoph Hellwig 	return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
514c59d87c4SChristoph Hellwig }
515c59d87c4SChristoph Hellwig 
5166e2608dfSDan Williams STATIC int
5176e2608dfSDan Williams xfs_dax_writepages(
5186e2608dfSDan Williams 	struct address_space	*mapping,
5196e2608dfSDan Williams 	struct writeback_control *wbc)
5206e2608dfSDan Williams {
52130fa529eSChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(mapping->host);
52230fa529eSChristoph Hellwig 
52330fa529eSChristoph Hellwig 	xfs_iflags_clear(ip, XFS_ITRUNCATED);
5246e2608dfSDan Williams 	return dax_writeback_mapping_range(mapping,
5253f666c56SVivek Goyal 			xfs_inode_buftarg(ip)->bt_daxdev, wbc);
5266e2608dfSDan Williams }
5276e2608dfSDan Williams 
528c59d87c4SChristoph Hellwig STATIC sector_t
529c59d87c4SChristoph Hellwig xfs_vm_bmap(
530c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
531c59d87c4SChristoph Hellwig 	sector_t		block)
532c59d87c4SChristoph Hellwig {
533b84e7722SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(mapping->host);
534c59d87c4SChristoph Hellwig 
535b84e7722SChristoph Hellwig 	trace_xfs_vm_bmap(ip);
536db1327b1SDarrick J. Wong 
537db1327b1SDarrick J. Wong 	/*
538db1327b1SDarrick J. Wong 	 * The swap code (ab-)uses ->bmap to get a block mapping and then
539793057e1SIngo Molnar 	 * bypasses the file system for actual I/O.  We really can't allow
540db1327b1SDarrick J. Wong 	 * that on reflinks inodes, so we have to skip out here.  And yes,
541eb5e248dSDarrick J. Wong 	 * 0 is the magic code for a bmap error.
542eb5e248dSDarrick J. Wong 	 *
543eb5e248dSDarrick J. Wong 	 * Since we don't pass back blockdev info, we can't return bmap
544eb5e248dSDarrick J. Wong 	 * information for rt files either.
545db1327b1SDarrick J. Wong 	 */
54666ae56a5SChristoph Hellwig 	if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip))
547db1327b1SDarrick J. Wong 		return 0;
548690c2a38SChristoph Hellwig 	return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
549c59d87c4SChristoph Hellwig }
550c59d87c4SChristoph Hellwig 
551c59d87c4SChristoph Hellwig STATIC int
5527479c505SMatthew Wilcox (Oracle) xfs_vm_read_folio(
553c59d87c4SChristoph Hellwig 	struct file		*unused,
5547479c505SMatthew Wilcox (Oracle) 	struct folio		*folio)
555c59d87c4SChristoph Hellwig {
5567479c505SMatthew Wilcox (Oracle) 	return iomap_read_folio(folio, &xfs_read_iomap_ops);
557c59d87c4SChristoph Hellwig }
558c59d87c4SChristoph Hellwig 
5599d24a13aSMatthew Wilcox (Oracle) STATIC void
5609d24a13aSMatthew Wilcox (Oracle) xfs_vm_readahead(
5619d24a13aSMatthew Wilcox (Oracle) 	struct readahead_control	*rac)
562c59d87c4SChristoph Hellwig {
5639d24a13aSMatthew Wilcox (Oracle) 	iomap_readahead(rac, &xfs_read_iomap_ops);
56422e757a4SDave Chinner }
56522e757a4SDave Chinner 
56667482129SDarrick J. Wong static int
56767482129SDarrick J. Wong xfs_iomap_swapfile_activate(
56867482129SDarrick J. Wong 	struct swap_info_struct		*sis,
56967482129SDarrick J. Wong 	struct file			*swap_file,
57067482129SDarrick J. Wong 	sector_t			*span)
57167482129SDarrick J. Wong {
57230fa529eSChristoph Hellwig 	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
573690c2a38SChristoph Hellwig 	return iomap_swapfile_activate(sis, swap_file, span,
574690c2a38SChristoph Hellwig 			&xfs_read_iomap_ops);
57567482129SDarrick J. Wong }
57667482129SDarrick J. Wong 
577c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = {
5787479c505SMatthew Wilcox (Oracle) 	.read_folio		= xfs_vm_read_folio,
5799d24a13aSMatthew Wilcox (Oracle) 	.readahead		= xfs_vm_readahead,
580c59d87c4SChristoph Hellwig 	.writepages		= xfs_vm_writepages,
581*4ce02c67SRitesh Harjani (IBM) 	.dirty_folio		= iomap_dirty_folio,
5828597447dSMatthew Wilcox (Oracle) 	.release_folio		= iomap_release_folio,
583d82354f6SMatthew Wilcox (Oracle) 	.invalidate_folio	= iomap_invalidate_folio,
584c59d87c4SChristoph Hellwig 	.bmap			= xfs_vm_bmap,
5852ec810d5SMatthew Wilcox (Oracle) 	.migrate_folio		= filemap_migrate_folio,
58682cb1417SChristoph Hellwig 	.is_partially_uptodate  = iomap_is_partially_uptodate,
587c59d87c4SChristoph Hellwig 	.error_remove_page	= generic_error_remove_page,
58867482129SDarrick J. Wong 	.swap_activate		= xfs_iomap_swapfile_activate,
589c59d87c4SChristoph Hellwig };
5906e2608dfSDan Williams 
5916e2608dfSDan Williams const struct address_space_operations xfs_dax_aops = {
5926e2608dfSDan Williams 	.writepages		= xfs_dax_writepages,
59346de8b97SMatthew Wilcox (Oracle) 	.dirty_folio		= noop_dirty_folio,
59467482129SDarrick J. Wong 	.swap_activate		= xfs_iomap_swapfile_activate,
5956e2608dfSDan Williams };
596