xref: /linux/fs/xfs/xfs_aops.c (revision d82354f6b05fc3b35029b3f75ddbf41b82af3bc8)
10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0
2c59d87c4SChristoph Hellwig /*
3c59d87c4SChristoph Hellwig  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
498c1a7c0SChristoph Hellwig  * Copyright (c) 2016-2018 Christoph Hellwig.
5c59d87c4SChristoph Hellwig  * All Rights Reserved.
6c59d87c4SChristoph Hellwig  */
7c59d87c4SChristoph Hellwig #include "xfs.h"
870a9883cSDave Chinner #include "xfs_shared.h"
9239880efSDave Chinner #include "xfs_format.h"
10239880efSDave Chinner #include "xfs_log_format.h"
11239880efSDave Chinner #include "xfs_trans_resv.h"
12c59d87c4SChristoph Hellwig #include "xfs_mount.h"
13c59d87c4SChristoph Hellwig #include "xfs_inode.h"
14239880efSDave Chinner #include "xfs_trans.h"
15c59d87c4SChristoph Hellwig #include "xfs_iomap.h"
16c59d87c4SChristoph Hellwig #include "xfs_trace.h"
17c59d87c4SChristoph Hellwig #include "xfs_bmap.h"
1868988114SDave Chinner #include "xfs_bmap_util.h"
19ef473667SDarrick J. Wong #include "xfs_reflink.h"
20c59d87c4SChristoph Hellwig 
21fbcc0256SDave Chinner struct xfs_writepage_ctx {
22598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx ctx;
23d9252d52SBrian Foster 	unsigned int		data_seq;
24e666aa37SChristoph Hellwig 	unsigned int		cow_seq;
25fbcc0256SDave Chinner };
26fbcc0256SDave Chinner 
27598ecfbaSChristoph Hellwig static inline struct xfs_writepage_ctx *
28598ecfbaSChristoph Hellwig XFS_WPC(struct iomap_writepage_ctx *ctx)
29598ecfbaSChristoph Hellwig {
30598ecfbaSChristoph Hellwig 	return container_of(ctx, struct xfs_writepage_ctx, ctx);
31598ecfbaSChristoph Hellwig }
32598ecfbaSChristoph Hellwig 
33c59d87c4SChristoph Hellwig /*
34fc0063c4SChristoph Hellwig  * Fast and loose check if this write could update the on-disk inode size.
35fc0063c4SChristoph Hellwig  */
36598ecfbaSChristoph Hellwig static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend)
37fc0063c4SChristoph Hellwig {
38fc0063c4SChristoph Hellwig 	return ioend->io_offset + ioend->io_size >
3913d2c10bSChristoph Hellwig 		XFS_I(ioend->io_inode)->i_disk_size;
40fc0063c4SChristoph Hellwig }
41fc0063c4SChristoph Hellwig 
42fc0063c4SChristoph Hellwig /*
432813d682SChristoph Hellwig  * Update on-disk file size now that data has been written to disk.
44c59d87c4SChristoph Hellwig  */
45e7a3d7e7SBrian Foster int
46e7a3d7e7SBrian Foster xfs_setfilesize(
472ba66237SChristoph Hellwig 	struct xfs_inode	*ip,
482ba66237SChristoph Hellwig 	xfs_off_t		offset,
492ba66237SChristoph Hellwig 	size_t			size)
50c59d87c4SChristoph Hellwig {
51e7a3d7e7SBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
52e7a3d7e7SBrian Foster 	struct xfs_trans	*tp;
53c59d87c4SChristoph Hellwig 	xfs_fsize_t		isize;
54e7a3d7e7SBrian Foster 	int			error;
55e7a3d7e7SBrian Foster 
56e7a3d7e7SBrian Foster 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
57e7a3d7e7SBrian Foster 	if (error)
58e7a3d7e7SBrian Foster 		return error;
59c59d87c4SChristoph Hellwig 
60aa6bf01dSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
612ba66237SChristoph Hellwig 	isize = xfs_new_eof(ip, offset + size);
62281627dfSChristoph Hellwig 	if (!isize) {
63281627dfSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
644906e215SChristoph Hellwig 		xfs_trans_cancel(tp);
65281627dfSChristoph Hellwig 		return 0;
66c59d87c4SChristoph Hellwig 	}
67c59d87c4SChristoph Hellwig 
682ba66237SChristoph Hellwig 	trace_xfs_setfilesize(ip, offset, size);
69281627dfSChristoph Hellwig 
7013d2c10bSChristoph Hellwig 	ip->i_disk_size = isize;
71281627dfSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
72281627dfSChristoph Hellwig 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
73281627dfSChristoph Hellwig 
7470393313SChristoph Hellwig 	return xfs_trans_commit(tp);
75c59d87c4SChristoph Hellwig }
76c59d87c4SChristoph Hellwig 
77c59d87c4SChristoph Hellwig /*
78c59d87c4SChristoph Hellwig  * IO write completion.
79c59d87c4SChristoph Hellwig  */
80c59d87c4SChristoph Hellwig STATIC void
81cb357bf3SDarrick J. Wong xfs_end_ioend(
82598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend)
83c59d87c4SChristoph Hellwig {
84c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
855ca5916bSBrian Foster 	struct xfs_mount	*mp = ip->i_mount;
86787eb485SChristoph Hellwig 	xfs_off_t		offset = ioend->io_offset;
87787eb485SChristoph Hellwig 	size_t			size = ioend->io_size;
8873d30d48SChristoph Hellwig 	unsigned int		nofs_flag;
894e4cbee9SChristoph Hellwig 	int			error;
90c59d87c4SChristoph Hellwig 
91af055e37SBrian Foster 	/*
9273d30d48SChristoph Hellwig 	 * We can allocate memory here while doing writeback on behalf of
9373d30d48SChristoph Hellwig 	 * memory reclaim.  To avoid memory allocation deadlocks set the
9473d30d48SChristoph Hellwig 	 * task-wide nofs context for the following operations.
9573d30d48SChristoph Hellwig 	 */
9673d30d48SChristoph Hellwig 	nofs_flag = memalloc_nofs_save();
9773d30d48SChristoph Hellwig 
9873d30d48SChristoph Hellwig 	/*
99f9dd7ba4SBhaskar Chowdhury 	 * Just clean up the in-memory structures if the fs has been shut down.
100af055e37SBrian Foster 	 */
1015ca5916bSBrian Foster 	if (xfs_is_shutdown(mp)) {
1020e51a8e1SChristoph Hellwig 		error = -EIO;
10343caeb18SDarrick J. Wong 		goto done;
10443caeb18SDarrick J. Wong 	}
10543caeb18SDarrick J. Wong 
10643caeb18SDarrick J. Wong 	/*
1075ca5916bSBrian Foster 	 * Clean up all COW blocks and underlying data fork delalloc blocks on
1085ca5916bSBrian Foster 	 * I/O error. The delalloc punch is required because this ioend was
1095ca5916bSBrian Foster 	 * mapped to blocks in the COW fork and the associated pages are no
1105ca5916bSBrian Foster 	 * longer dirty. If we don't remove delalloc blocks here, they become
1115ca5916bSBrian Foster 	 * stale and can corrupt free space accounting on unmount.
112c59d87c4SChristoph Hellwig 	 */
1134e4cbee9SChristoph Hellwig 	error = blk_status_to_errno(ioend->io_bio->bi_status);
114787eb485SChristoph Hellwig 	if (unlikely(error)) {
1155ca5916bSBrian Foster 		if (ioend->io_flags & IOMAP_F_SHARED) {
116787eb485SChristoph Hellwig 			xfs_reflink_cancel_cow_range(ip, offset, size, true);
1175ca5916bSBrian Foster 			xfs_bmap_punch_delalloc_range(ip,
1185ca5916bSBrian Foster 						      XFS_B_TO_FSBT(mp, offset),
1195ca5916bSBrian Foster 						      XFS_B_TO_FSB(mp, size));
1205ca5916bSBrian Foster 		}
1215cb13dcdSZhaohongjiang 		goto done;
122787eb485SChristoph Hellwig 	}
123787eb485SChristoph Hellwig 
124787eb485SChristoph Hellwig 	/*
125787eb485SChristoph Hellwig 	 * Success: commit the COW or unwritten blocks if needed.
126787eb485SChristoph Hellwig 	 */
127760fea8bSChristoph Hellwig 	if (ioend->io_flags & IOMAP_F_SHARED)
128787eb485SChristoph Hellwig 		error = xfs_reflink_end_cow(ip, offset, size);
1294e087a3bSChristoph Hellwig 	else if (ioend->io_type == IOMAP_UNWRITTEN)
130ee70daabSEryu Guan 		error = xfs_iomap_write_unwritten(ip, offset, size, false);
13184803fb7SChristoph Hellwig 
1327cd3099fSBrian Foster 	if (!error && xfs_ioend_is_append(ioend))
1337cd3099fSBrian Foster 		error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size);
13404f658eeSChristoph Hellwig done:
135598ecfbaSChristoph Hellwig 	iomap_finish_ioends(ioend, error);
13673d30d48SChristoph Hellwig 	memalloc_nofs_restore(nofs_flag);
1373994fc48SDarrick J. Wong }
1383994fc48SDarrick J. Wong 
139ebb7fb15SDave Chinner /*
140ebb7fb15SDave Chinner  * Finish all pending IO completions that require transactional modifications.
141ebb7fb15SDave Chinner  *
142ebb7fb15SDave Chinner  * We try to merge physical and logically contiguous ioends before completion to
143ebb7fb15SDave Chinner  * minimise the number of transactions we need to perform during IO completion.
144ebb7fb15SDave Chinner  * Both unwritten extent conversion and COW remapping need to iterate and modify
145ebb7fb15SDave Chinner  * one physical extent at a time, so we gain nothing by merging physically
146ebb7fb15SDave Chinner  * discontiguous extents here.
147ebb7fb15SDave Chinner  *
148ebb7fb15SDave Chinner  * The ioend chain length that we can be processing here is largely unbound in
149ebb7fb15SDave Chinner  * length and we may have to perform significant amounts of work on each ioend
150ebb7fb15SDave Chinner  * to complete it. Hence we have to be careful about holding the CPU for too
151ebb7fb15SDave Chinner  * long in this loop.
152ebb7fb15SDave Chinner  */
153cb357bf3SDarrick J. Wong void
154cb357bf3SDarrick J. Wong xfs_end_io(
155cb357bf3SDarrick J. Wong 	struct work_struct	*work)
156cb357bf3SDarrick J. Wong {
157433dad94SChristoph Hellwig 	struct xfs_inode	*ip =
158433dad94SChristoph Hellwig 		container_of(work, struct xfs_inode, i_ioend_work);
159598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend;
160433dad94SChristoph Hellwig 	struct list_head	tmp;
161cb357bf3SDarrick J. Wong 	unsigned long		flags;
162cb357bf3SDarrick J. Wong 
163cb357bf3SDarrick J. Wong 	spin_lock_irqsave(&ip->i_ioend_lock, flags);
164433dad94SChristoph Hellwig 	list_replace_init(&ip->i_ioend_list, &tmp);
165cb357bf3SDarrick J. Wong 	spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
166cb357bf3SDarrick J. Wong 
167598ecfbaSChristoph Hellwig 	iomap_sort_ioends(&tmp);
168598ecfbaSChristoph Hellwig 	while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend,
169433dad94SChristoph Hellwig 			io_list))) {
170cb357bf3SDarrick J. Wong 		list_del_init(&ioend->io_list);
1716e552494SBrian Foster 		iomap_ioend_try_merge(ioend, &tmp);
172cb357bf3SDarrick J. Wong 		xfs_end_ioend(ioend);
173ebb7fb15SDave Chinner 		cond_resched();
174cb357bf3SDarrick J. Wong 	}
175cb357bf3SDarrick J. Wong }
176cb357bf3SDarrick J. Wong 
1770e51a8e1SChristoph Hellwig STATIC void
1780e51a8e1SChristoph Hellwig xfs_end_bio(
1790e51a8e1SChristoph Hellwig 	struct bio		*bio)
180c59d87c4SChristoph Hellwig {
181598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend = bio->bi_private;
182cb357bf3SDarrick J. Wong 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
183cb357bf3SDarrick J. Wong 	unsigned long		flags;
184c59d87c4SChristoph Hellwig 
185cb357bf3SDarrick J. Wong 	spin_lock_irqsave(&ip->i_ioend_lock, flags);
186cb357bf3SDarrick J. Wong 	if (list_empty(&ip->i_ioend_list))
187598ecfbaSChristoph Hellwig 		WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue,
188cb357bf3SDarrick J. Wong 					 &ip->i_ioend_work));
189cb357bf3SDarrick J. Wong 	list_add_tail(&ioend->io_list, &ip->i_ioend_list);
190cb357bf3SDarrick J. Wong 	spin_unlock_irqrestore(&ip->i_ioend_lock, flags);
191c59d87c4SChristoph Hellwig }
192c59d87c4SChristoph Hellwig 
193d9252d52SBrian Foster /*
194d9252d52SBrian Foster  * Fast revalidation of the cached writeback mapping. Return true if the current
195d9252d52SBrian Foster  * mapping is valid, false otherwise.
196d9252d52SBrian Foster  */
197d9252d52SBrian Foster static bool
198d9252d52SBrian Foster xfs_imap_valid(
199598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx	*wpc,
200d9252d52SBrian Foster 	struct xfs_inode		*ip,
2014e087a3bSChristoph Hellwig 	loff_t				offset)
202d9252d52SBrian Foster {
2034e087a3bSChristoph Hellwig 	if (offset < wpc->iomap.offset ||
2044e087a3bSChristoph Hellwig 	    offset >= wpc->iomap.offset + wpc->iomap.length)
205d9252d52SBrian Foster 		return false;
206d9252d52SBrian Foster 	/*
207d9252d52SBrian Foster 	 * If this is a COW mapping, it is sufficient to check that the mapping
208d9252d52SBrian Foster 	 * covers the offset. Be careful to check this first because the caller
209d9252d52SBrian Foster 	 * can revalidate a COW mapping without updating the data seqno.
210d9252d52SBrian Foster 	 */
211760fea8bSChristoph Hellwig 	if (wpc->iomap.flags & IOMAP_F_SHARED)
212d9252d52SBrian Foster 		return true;
213d9252d52SBrian Foster 
214d9252d52SBrian Foster 	/*
215d9252d52SBrian Foster 	 * This is not a COW mapping. Check the sequence number of the data fork
216d9252d52SBrian Foster 	 * because concurrent changes could have invalidated the extent. Check
217d9252d52SBrian Foster 	 * the COW fork because concurrent changes since the last time we
218d9252d52SBrian Foster 	 * checked (and found nothing at this offset) could have added
219d9252d52SBrian Foster 	 * overlapping blocks.
220d9252d52SBrian Foster 	 */
221598ecfbaSChristoph Hellwig 	if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq))
222d9252d52SBrian Foster 		return false;
223d9252d52SBrian Foster 	if (xfs_inode_has_cow_data(ip) &&
224598ecfbaSChristoph Hellwig 	    XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq))
225d9252d52SBrian Foster 		return false;
226d9252d52SBrian Foster 	return true;
227d9252d52SBrian Foster }
228d9252d52SBrian Foster 
2294ad765edSChristoph Hellwig /*
2304ad765edSChristoph Hellwig  * Pass in a dellalloc extent and convert it to real extents, return the real
2314e087a3bSChristoph Hellwig  * extent that maps offset_fsb in wpc->iomap.
2324ad765edSChristoph Hellwig  *
2334ad765edSChristoph Hellwig  * The current page is held locked so nothing could have removed the block
2347588cbeeSChristoph Hellwig  * backing offset_fsb, although it could have moved from the COW to the data
2357588cbeeSChristoph Hellwig  * fork by another thread.
2364ad765edSChristoph Hellwig  */
2374ad765edSChristoph Hellwig static int
2384ad765edSChristoph Hellwig xfs_convert_blocks(
239598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx *wpc,
2404ad765edSChristoph Hellwig 	struct xfs_inode	*ip,
241760fea8bSChristoph Hellwig 	int			whichfork,
2424e087a3bSChristoph Hellwig 	loff_t			offset)
2434ad765edSChristoph Hellwig {
2444ad765edSChristoph Hellwig 	int			error;
245598ecfbaSChristoph Hellwig 	unsigned		*seq;
246598ecfbaSChristoph Hellwig 
247598ecfbaSChristoph Hellwig 	if (whichfork == XFS_COW_FORK)
248598ecfbaSChristoph Hellwig 		seq = &XFS_WPC(wpc)->cow_seq;
249598ecfbaSChristoph Hellwig 	else
250598ecfbaSChristoph Hellwig 		seq = &XFS_WPC(wpc)->data_seq;
2514ad765edSChristoph Hellwig 
2524ad765edSChristoph Hellwig 	/*
2534e087a3bSChristoph Hellwig 	 * Attempt to allocate whatever delalloc extent currently backs offset
2544e087a3bSChristoph Hellwig 	 * and put the result into wpc->iomap.  Allocate in a loop because it
2554e087a3bSChristoph Hellwig 	 * may take several attempts to allocate real blocks for a contiguous
2564e087a3bSChristoph Hellwig 	 * delalloc extent if free space is sufficiently fragmented.
2574ad765edSChristoph Hellwig 	 */
2584ad765edSChristoph Hellwig 	do {
259760fea8bSChristoph Hellwig 		error = xfs_bmapi_convert_delalloc(ip, whichfork, offset,
260598ecfbaSChristoph Hellwig 				&wpc->iomap, seq);
2614ad765edSChristoph Hellwig 		if (error)
2624ad765edSChristoph Hellwig 			return error;
2634e087a3bSChristoph Hellwig 	} while (wpc->iomap.offset + wpc->iomap.length <= offset);
2644ad765edSChristoph Hellwig 
2654ad765edSChristoph Hellwig 	return 0;
2664ad765edSChristoph Hellwig }
2674ad765edSChristoph Hellwig 
268598ecfbaSChristoph Hellwig static int
269c59d87c4SChristoph Hellwig xfs_map_blocks(
270598ecfbaSChristoph Hellwig 	struct iomap_writepage_ctx *wpc,
271c59d87c4SChristoph Hellwig 	struct inode		*inode,
2725c665e5bSChristoph Hellwig 	loff_t			offset)
273c59d87c4SChristoph Hellwig {
274c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
275c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
27693407472SFabian Frederick 	ssize_t			count = i_blocksize(inode);
277b4e29032SChristoph Hellwig 	xfs_fileoff_t		offset_fsb = XFS_B_TO_FSBT(mp, offset);
278b4e29032SChristoph Hellwig 	xfs_fileoff_t		end_fsb = XFS_B_TO_FSB(mp, offset + count);
279c2f09217SDarrick J. Wong 	xfs_fileoff_t		cow_fsb;
280c2f09217SDarrick J. Wong 	int			whichfork;
2815c665e5bSChristoph Hellwig 	struct xfs_bmbt_irec	imap;
282060d4eaaSChristoph Hellwig 	struct xfs_iext_cursor	icur;
2837588cbeeSChristoph Hellwig 	int			retries = 0;
284c59d87c4SChristoph Hellwig 	int			error = 0;
285c59d87c4SChristoph Hellwig 
28675c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
287d9252d52SBrian Foster 		return -EIO;
288d9252d52SBrian Foster 
289889c65b3SChristoph Hellwig 	/*
290889c65b3SChristoph Hellwig 	 * COW fork blocks can overlap data fork blocks even if the blocks
291889c65b3SChristoph Hellwig 	 * aren't shared.  COW I/O always takes precedent, so we must always
292889c65b3SChristoph Hellwig 	 * check for overlap on reflink inodes unless the mapping is already a
293e666aa37SChristoph Hellwig 	 * COW one, or the COW fork hasn't changed from the last time we looked
294e666aa37SChristoph Hellwig 	 * at it.
295e666aa37SChristoph Hellwig 	 *
296e666aa37SChristoph Hellwig 	 * It's safe to check the COW fork if_seq here without the ILOCK because
297e666aa37SChristoph Hellwig 	 * we've indirectly protected against concurrent updates: writeback has
298e666aa37SChristoph Hellwig 	 * the page locked, which prevents concurrent invalidations by reflink
299e666aa37SChristoph Hellwig 	 * and directio and prevents concurrent buffered writes to the same
300e666aa37SChristoph Hellwig 	 * page.  Changes to if_seq always happen under i_lock, which protects
301e666aa37SChristoph Hellwig 	 * against concurrent updates and provides a memory barrier on the way
302e666aa37SChristoph Hellwig 	 * out that ensures that we always see the current value.
303889c65b3SChristoph Hellwig 	 */
3044e087a3bSChristoph Hellwig 	if (xfs_imap_valid(wpc, ip, offset))
305889c65b3SChristoph Hellwig 		return 0;
306889c65b3SChristoph Hellwig 
307889c65b3SChristoph Hellwig 	/*
308889c65b3SChristoph Hellwig 	 * If we don't have a valid map, now it's time to get a new one for this
309889c65b3SChristoph Hellwig 	 * offset.  This will convert delayed allocations (including COW ones)
310889c65b3SChristoph Hellwig 	 * into real extents.  If we return without a valid map, it means we
311889c65b3SChristoph Hellwig 	 * landed in a hole and we skip the block.
312889c65b3SChristoph Hellwig 	 */
3137588cbeeSChristoph Hellwig retry:
314c2f09217SDarrick J. Wong 	cow_fsb = NULLFILEOFF;
315c2f09217SDarrick J. Wong 	whichfork = XFS_DATA_FORK;
316c59d87c4SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_SHARED);
317b2197a36SChristoph Hellwig 	ASSERT(!xfs_need_iread_extents(&ip->i_df));
318060d4eaaSChristoph Hellwig 
319060d4eaaSChristoph Hellwig 	/*
320060d4eaaSChristoph Hellwig 	 * Check if this is offset is covered by a COW extents, and if yes use
321060d4eaaSChristoph Hellwig 	 * it directly instead of looking up anything in the data fork.
322060d4eaaSChristoph Hellwig 	 */
32351d62690SChristoph Hellwig 	if (xfs_inode_has_cow_data(ip) &&
324e666aa37SChristoph Hellwig 	    xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap))
325e666aa37SChristoph Hellwig 		cow_fsb = imap.br_startoff;
326e666aa37SChristoph Hellwig 	if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) {
327598ecfbaSChristoph Hellwig 		XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq);
3285c665e5bSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
329be225fecSChristoph Hellwig 
330760fea8bSChristoph Hellwig 		whichfork = XFS_COW_FORK;
3315c665e5bSChristoph Hellwig 		goto allocate_blocks;
3325c665e5bSChristoph Hellwig 	}
3335c665e5bSChristoph Hellwig 
3345c665e5bSChristoph Hellwig 	/*
335d9252d52SBrian Foster 	 * No COW extent overlap. Revalidate now that we may have updated
336d9252d52SBrian Foster 	 * ->cow_seq. If the data mapping is still valid, we're done.
3375c665e5bSChristoph Hellwig 	 */
3384e087a3bSChristoph Hellwig 	if (xfs_imap_valid(wpc, ip, offset)) {
3395c665e5bSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_SHARED);
3405c665e5bSChristoph Hellwig 		return 0;
3415c665e5bSChristoph Hellwig 	}
3425c665e5bSChristoph Hellwig 
3435c665e5bSChristoph Hellwig 	/*
3445c665e5bSChristoph Hellwig 	 * If we don't have a valid map, now it's time to get a new one for this
3455c665e5bSChristoph Hellwig 	 * offset.  This will convert delayed allocations (including COW ones)
3465c665e5bSChristoph Hellwig 	 * into real extents.
3475c665e5bSChristoph Hellwig 	 */
3483345746eSChristoph Hellwig 	if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap))
3493345746eSChristoph Hellwig 		imap.br_startoff = end_fsb;	/* fake a hole past EOF */
350598ecfbaSChristoph Hellwig 	XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq);
351c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
352c59d87c4SChristoph Hellwig 
35312df89f2SChristoph Hellwig 	/* landed in a hole or beyond EOF? */
3543345746eSChristoph Hellwig 	if (imap.br_startoff > offset_fsb) {
3553345746eSChristoph Hellwig 		imap.br_blockcount = imap.br_startoff - offset_fsb;
3565c665e5bSChristoph Hellwig 		imap.br_startoff = offset_fsb;
3575c665e5bSChristoph Hellwig 		imap.br_startblock = HOLESTARTBLOCK;
358be225fecSChristoph Hellwig 		imap.br_state = XFS_EXT_NORM;
35912df89f2SChristoph Hellwig 	}
36012df89f2SChristoph Hellwig 
361e666aa37SChristoph Hellwig 	/*
36212df89f2SChristoph Hellwig 	 * Truncate to the next COW extent if there is one.  This is the only
36312df89f2SChristoph Hellwig 	 * opportunity to do this because we can skip COW fork lookups for the
36412df89f2SChristoph Hellwig 	 * subsequent blocks in the mapping; however, the requirement to treat
36512df89f2SChristoph Hellwig 	 * the COW range separately remains.
366e666aa37SChristoph Hellwig 	 */
367e666aa37SChristoph Hellwig 	if (cow_fsb != NULLFILEOFF &&
368e666aa37SChristoph Hellwig 	    cow_fsb < imap.br_startoff + imap.br_blockcount)
369e666aa37SChristoph Hellwig 		imap.br_blockcount = cow_fsb - imap.br_startoff;
370e666aa37SChristoph Hellwig 
371be225fecSChristoph Hellwig 	/* got a delalloc extent? */
37212df89f2SChristoph Hellwig 	if (imap.br_startblock != HOLESTARTBLOCK &&
37312df89f2SChristoph Hellwig 	    isnullstartblock(imap.br_startblock))
3745c665e5bSChristoph Hellwig 		goto allocate_blocks;
375e2f6ad46SDave Chinner 
376740fd671SChristoph Hellwig 	xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0);
377760fea8bSChristoph Hellwig 	trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap);
3785c665e5bSChristoph Hellwig 	return 0;
3795c665e5bSChristoph Hellwig allocate_blocks:
380760fea8bSChristoph Hellwig 	error = xfs_convert_blocks(wpc, ip, whichfork, offset);
3817588cbeeSChristoph Hellwig 	if (error) {
3827588cbeeSChristoph Hellwig 		/*
3837588cbeeSChristoph Hellwig 		 * If we failed to find the extent in the COW fork we might have
3847588cbeeSChristoph Hellwig 		 * raced with a COW to data fork conversion or truncate.
3857588cbeeSChristoph Hellwig 		 * Restart the lookup to catch the extent in the data fork for
3867588cbeeSChristoph Hellwig 		 * the former case, but prevent additional retries to avoid
3877588cbeeSChristoph Hellwig 		 * looping forever for the latter case.
3887588cbeeSChristoph Hellwig 		 */
389760fea8bSChristoph Hellwig 		if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++)
3907588cbeeSChristoph Hellwig 			goto retry;
3917588cbeeSChristoph Hellwig 		ASSERT(error != -EAGAIN);
3925c665e5bSChristoph Hellwig 		return error;
3937588cbeeSChristoph Hellwig 	}
3944ad765edSChristoph Hellwig 
3954ad765edSChristoph Hellwig 	/*
3964ad765edSChristoph Hellwig 	 * Due to merging the return real extent might be larger than the
3974ad765edSChristoph Hellwig 	 * original delalloc one.  Trim the return extent to the next COW
3984ad765edSChristoph Hellwig 	 * boundary again to force a re-lookup.
3994ad765edSChristoph Hellwig 	 */
400760fea8bSChristoph Hellwig 	if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) {
4014e087a3bSChristoph Hellwig 		loff_t		cow_offset = XFS_FSB_TO_B(mp, cow_fsb);
4024ad765edSChristoph Hellwig 
4034e087a3bSChristoph Hellwig 		if (cow_offset < wpc->iomap.offset + wpc->iomap.length)
4044e087a3bSChristoph Hellwig 			wpc->iomap.length = cow_offset - wpc->iomap.offset;
4054e087a3bSChristoph Hellwig 	}
4064e087a3bSChristoph Hellwig 
4074e087a3bSChristoph Hellwig 	ASSERT(wpc->iomap.offset <= offset);
4084e087a3bSChristoph Hellwig 	ASSERT(wpc->iomap.offset + wpc->iomap.length > offset);
409760fea8bSChristoph Hellwig 	trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap);
410c59d87c4SChristoph Hellwig 	return 0;
411c59d87c4SChristoph Hellwig }
412c59d87c4SChristoph Hellwig 
413598ecfbaSChristoph Hellwig static int
414598ecfbaSChristoph Hellwig xfs_prepare_ioend(
415598ecfbaSChristoph Hellwig 	struct iomap_ioend	*ioend,
416e10de372SDave Chinner 	int			status)
417c59d87c4SChristoph Hellwig {
41873d30d48SChristoph Hellwig 	unsigned int		nofs_flag;
41973d30d48SChristoph Hellwig 
42073d30d48SChristoph Hellwig 	/*
42173d30d48SChristoph Hellwig 	 * We can allocate memory here while doing writeback on behalf of
42273d30d48SChristoph Hellwig 	 * memory reclaim.  To avoid memory allocation deadlocks set the
42373d30d48SChristoph Hellwig 	 * task-wide nofs context for the following operations.
42473d30d48SChristoph Hellwig 	 */
42573d30d48SChristoph Hellwig 	nofs_flag = memalloc_nofs_save();
42673d30d48SChristoph Hellwig 
4275eda4300SDarrick J. Wong 	/* Convert CoW extents to regular */
428760fea8bSChristoph Hellwig 	if (!status && (ioend->io_flags & IOMAP_F_SHARED)) {
4295eda4300SDarrick J. Wong 		status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
4305eda4300SDarrick J. Wong 				ioend->io_offset, ioend->io_size);
4315eda4300SDarrick J. Wong 	}
4325eda4300SDarrick J. Wong 
43373d30d48SChristoph Hellwig 	memalloc_nofs_restore(nofs_flag);
43473d30d48SChristoph Hellwig 
4357adb8f14SBrian Foster 	/* send ioends that might require a transaction to the completion wq */
4367adb8f14SBrian Foster 	if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN ||
4377adb8f14SBrian Foster 	    (ioend->io_flags & IOMAP_F_SHARED))
4380e51a8e1SChristoph Hellwig 		ioend->io_bio->bi_end_io = xfs_end_bio;
439e10de372SDave Chinner 	return status;
4407bf7f352SDave Chinner }
4417bf7f352SDave Chinner 
442c59d87c4SChristoph Hellwig /*
44382cb1417SChristoph Hellwig  * If the page has delalloc blocks on it, we need to punch them out before we
444c59d87c4SChristoph Hellwig  * invalidate the page.  If we don't, we leave a stale delalloc mapping on the
44582cb1417SChristoph Hellwig  * inode that can trip up a later direct I/O read operation on the same region.
446c59d87c4SChristoph Hellwig  *
44782cb1417SChristoph Hellwig  * We prevent this by truncating away the delalloc regions on the page.  Because
44882cb1417SChristoph Hellwig  * they are delalloc, we can do this without needing a transaction. Indeed - if
44982cb1417SChristoph Hellwig  * we get ENOSPC errors, we have to be able to do this truncation without a
45082cb1417SChristoph Hellwig  * transaction as there is no space left for block reservation (typically why we
45182cb1417SChristoph Hellwig  * see a ENOSPC in writeback).
452c59d87c4SChristoph Hellwig  */
453598ecfbaSChristoph Hellwig static void
4546e478521SMatthew Wilcox (Oracle) xfs_discard_folio(
4556e478521SMatthew Wilcox (Oracle) 	struct folio		*folio,
4566e478521SMatthew Wilcox (Oracle) 	loff_t			pos)
457c59d87c4SChristoph Hellwig {
4586e478521SMatthew Wilcox (Oracle) 	struct inode		*inode = folio->mapping->host;
459c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
46003625721SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
4616e478521SMatthew Wilcox (Oracle) 	size_t			offset = offset_in_folio(folio, pos);
4626e478521SMatthew Wilcox (Oracle) 	xfs_fileoff_t		start_fsb = XFS_B_TO_FSBT(mp, pos);
4636e478521SMatthew Wilcox (Oracle) 	xfs_fileoff_t		pageoff_fsb = XFS_B_TO_FSBT(mp, offset);
46403625721SChristoph Hellwig 	int			error;
465c59d87c4SChristoph Hellwig 
46675c8c50fSDave Chinner 	if (xfs_is_shutdown(mp))
467c59d87c4SChristoph Hellwig 		goto out_invalidate;
468c59d87c4SChristoph Hellwig 
4694ab45e25SChristoph Hellwig 	xfs_alert_ratelimited(mp,
4706e478521SMatthew Wilcox (Oracle) 		"page discard on page "PTR_FMT", inode 0x%llx, pos %llu.",
4716e478521SMatthew Wilcox (Oracle) 			folio, ip->i_ino, pos);
472c59d87c4SChristoph Hellwig 
47303625721SChristoph Hellwig 	error = xfs_bmap_punch_delalloc_range(ip, start_fsb,
4746e478521SMatthew Wilcox (Oracle) 			i_blocks_per_folio(inode, folio) - pageoff_fsb);
47575c8c50fSDave Chinner 	if (error && !xfs_is_shutdown(mp))
47603625721SChristoph Hellwig 		xfs_alert(mp, "page discard unable to remove delalloc mapping.");
477c59d87c4SChristoph Hellwig out_invalidate:
4786e478521SMatthew Wilcox (Oracle) 	iomap_invalidate_folio(folio, offset, folio_size(folio) - offset);
479c59d87c4SChristoph Hellwig }
480c59d87c4SChristoph Hellwig 
481598ecfbaSChristoph Hellwig static const struct iomap_writeback_ops xfs_writeback_ops = {
482598ecfbaSChristoph Hellwig 	.map_blocks		= xfs_map_blocks,
483598ecfbaSChristoph Hellwig 	.prepare_ioend		= xfs_prepare_ioend,
4846e478521SMatthew Wilcox (Oracle) 	.discard_folio		= xfs_discard_folio,
485598ecfbaSChristoph Hellwig };
486c59d87c4SChristoph Hellwig 
487c59d87c4SChristoph Hellwig STATIC int
488c59d87c4SChristoph Hellwig xfs_vm_writepages(
489c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
490c59d87c4SChristoph Hellwig 	struct writeback_control *wbc)
491c59d87c4SChristoph Hellwig {
492be225fecSChristoph Hellwig 	struct xfs_writepage_ctx wpc = { };
493fbcc0256SDave Chinner 
494756b1c34SDave Chinner 	/*
495756b1c34SDave Chinner 	 * Writing back data in a transaction context can result in recursive
496756b1c34SDave Chinner 	 * transactions. This is bad, so issue a warning and get out of here.
497756b1c34SDave Chinner 	 */
498756b1c34SDave Chinner 	if (WARN_ON_ONCE(current->journal_info))
499756b1c34SDave Chinner 		return 0;
500756b1c34SDave Chinner 
501c59d87c4SChristoph Hellwig 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
502598ecfbaSChristoph Hellwig 	return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
503c59d87c4SChristoph Hellwig }
504c59d87c4SChristoph Hellwig 
5056e2608dfSDan Williams STATIC int
5066e2608dfSDan Williams xfs_dax_writepages(
5076e2608dfSDan Williams 	struct address_space	*mapping,
5086e2608dfSDan Williams 	struct writeback_control *wbc)
5096e2608dfSDan Williams {
51030fa529eSChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(mapping->host);
51130fa529eSChristoph Hellwig 
51230fa529eSChristoph Hellwig 	xfs_iflags_clear(ip, XFS_ITRUNCATED);
5136e2608dfSDan Williams 	return dax_writeback_mapping_range(mapping,
5143f666c56SVivek Goyal 			xfs_inode_buftarg(ip)->bt_daxdev, wbc);
5156e2608dfSDan Williams }
5166e2608dfSDan Williams 
517c59d87c4SChristoph Hellwig STATIC sector_t
518c59d87c4SChristoph Hellwig xfs_vm_bmap(
519c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
520c59d87c4SChristoph Hellwig 	sector_t		block)
521c59d87c4SChristoph Hellwig {
522b84e7722SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(mapping->host);
523c59d87c4SChristoph Hellwig 
524b84e7722SChristoph Hellwig 	trace_xfs_vm_bmap(ip);
525db1327b1SDarrick J. Wong 
526db1327b1SDarrick J. Wong 	/*
527db1327b1SDarrick J. Wong 	 * The swap code (ab-)uses ->bmap to get a block mapping and then
528793057e1SIngo Molnar 	 * bypasses the file system for actual I/O.  We really can't allow
529db1327b1SDarrick J. Wong 	 * that on reflinks inodes, so we have to skip out here.  And yes,
530eb5e248dSDarrick J. Wong 	 * 0 is the magic code for a bmap error.
531eb5e248dSDarrick J. Wong 	 *
532eb5e248dSDarrick J. Wong 	 * Since we don't pass back blockdev info, we can't return bmap
533eb5e248dSDarrick J. Wong 	 * information for rt files either.
534db1327b1SDarrick J. Wong 	 */
53566ae56a5SChristoph Hellwig 	if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip))
536db1327b1SDarrick J. Wong 		return 0;
537690c2a38SChristoph Hellwig 	return iomap_bmap(mapping, block, &xfs_read_iomap_ops);
538c59d87c4SChristoph Hellwig }
539c59d87c4SChristoph Hellwig 
540c59d87c4SChristoph Hellwig STATIC int
541c59d87c4SChristoph Hellwig xfs_vm_readpage(
542c59d87c4SChristoph Hellwig 	struct file		*unused,
543c59d87c4SChristoph Hellwig 	struct page		*page)
544c59d87c4SChristoph Hellwig {
545690c2a38SChristoph Hellwig 	return iomap_readpage(page, &xfs_read_iomap_ops);
546c59d87c4SChristoph Hellwig }
547c59d87c4SChristoph Hellwig 
5489d24a13aSMatthew Wilcox (Oracle) STATIC void
5499d24a13aSMatthew Wilcox (Oracle) xfs_vm_readahead(
5509d24a13aSMatthew Wilcox (Oracle) 	struct readahead_control	*rac)
551c59d87c4SChristoph Hellwig {
5529d24a13aSMatthew Wilcox (Oracle) 	iomap_readahead(rac, &xfs_read_iomap_ops);
55322e757a4SDave Chinner }
55422e757a4SDave Chinner 
55567482129SDarrick J. Wong static int
55667482129SDarrick J. Wong xfs_iomap_swapfile_activate(
55767482129SDarrick J. Wong 	struct swap_info_struct		*sis,
55867482129SDarrick J. Wong 	struct file			*swap_file,
55967482129SDarrick J. Wong 	sector_t			*span)
56067482129SDarrick J. Wong {
56130fa529eSChristoph Hellwig 	sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev;
562690c2a38SChristoph Hellwig 	return iomap_swapfile_activate(sis, swap_file, span,
563690c2a38SChristoph Hellwig 			&xfs_read_iomap_ops);
56467482129SDarrick J. Wong }
56567482129SDarrick J. Wong 
566c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = {
567c59d87c4SChristoph Hellwig 	.readpage		= xfs_vm_readpage,
5689d24a13aSMatthew Wilcox (Oracle) 	.readahead		= xfs_vm_readahead,
569c59d87c4SChristoph Hellwig 	.writepages		= xfs_vm_writepages,
570fd7353f8SMatthew Wilcox (Oracle) 	.set_page_dirty		= __set_page_dirty_nobuffers,
5719e91c572SChristoph Hellwig 	.releasepage		= iomap_releasepage,
572*d82354f6SMatthew Wilcox (Oracle) 	.invalidate_folio	= iomap_invalidate_folio,
573c59d87c4SChristoph Hellwig 	.bmap			= xfs_vm_bmap,
5746e2608dfSDan Williams 	.direct_IO		= noop_direct_IO,
57582cb1417SChristoph Hellwig 	.migratepage		= iomap_migrate_page,
57682cb1417SChristoph Hellwig 	.is_partially_uptodate  = iomap_is_partially_uptodate,
577c59d87c4SChristoph Hellwig 	.error_remove_page	= generic_error_remove_page,
57867482129SDarrick J. Wong 	.swap_activate		= xfs_iomap_swapfile_activate,
579c59d87c4SChristoph Hellwig };
5806e2608dfSDan Williams 
5816e2608dfSDan Williams const struct address_space_operations xfs_dax_aops = {
5826e2608dfSDan Williams 	.writepages		= xfs_dax_writepages,
5836e2608dfSDan Williams 	.direct_IO		= noop_direct_IO,
584b82a96c9SMatthew Wilcox (Oracle) 	.set_page_dirty		= __set_page_dirty_no_writeback,
5856e2608dfSDan Williams 	.invalidatepage		= noop_invalidatepage,
58667482129SDarrick J. Wong 	.swap_activate		= xfs_iomap_swapfile_activate,
5876e2608dfSDan Williams };
588