xref: /linux/fs/xfs/xfs_aops.c (revision 43caeb187deb92b3cc343fce9c2310512f6ac9cd)
1c59d87c4SChristoph Hellwig /*
2c59d87c4SChristoph Hellwig  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3c59d87c4SChristoph Hellwig  * All Rights Reserved.
4c59d87c4SChristoph Hellwig  *
5c59d87c4SChristoph Hellwig  * This program is free software; you can redistribute it and/or
6c59d87c4SChristoph Hellwig  * modify it under the terms of the GNU General Public License as
7c59d87c4SChristoph Hellwig  * published by the Free Software Foundation.
8c59d87c4SChristoph Hellwig  *
9c59d87c4SChristoph Hellwig  * This program is distributed in the hope that it would be useful,
10c59d87c4SChristoph Hellwig  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11c59d87c4SChristoph Hellwig  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12c59d87c4SChristoph Hellwig  * GNU General Public License for more details.
13c59d87c4SChristoph Hellwig  *
14c59d87c4SChristoph Hellwig  * You should have received a copy of the GNU General Public License
15c59d87c4SChristoph Hellwig  * along with this program; if not, write the Free Software Foundation,
16c59d87c4SChristoph Hellwig  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17c59d87c4SChristoph Hellwig  */
18c59d87c4SChristoph Hellwig #include "xfs.h"
1970a9883cSDave Chinner #include "xfs_shared.h"
20239880efSDave Chinner #include "xfs_format.h"
21239880efSDave Chinner #include "xfs_log_format.h"
22239880efSDave Chinner #include "xfs_trans_resv.h"
23c59d87c4SChristoph Hellwig #include "xfs_mount.h"
24c59d87c4SChristoph Hellwig #include "xfs_inode.h"
25239880efSDave Chinner #include "xfs_trans.h"
26281627dfSChristoph Hellwig #include "xfs_inode_item.h"
27c59d87c4SChristoph Hellwig #include "xfs_alloc.h"
28c59d87c4SChristoph Hellwig #include "xfs_error.h"
29c59d87c4SChristoph Hellwig #include "xfs_iomap.h"
30c59d87c4SChristoph Hellwig #include "xfs_trace.h"
31c59d87c4SChristoph Hellwig #include "xfs_bmap.h"
3268988114SDave Chinner #include "xfs_bmap_util.h"
33a4fbe6abSDave Chinner #include "xfs_bmap_btree.h"
34ef473667SDarrick J. Wong #include "xfs_reflink.h"
35c59d87c4SChristoph Hellwig #include <linux/gfp.h>
36c59d87c4SChristoph Hellwig #include <linux/mpage.h>
37c59d87c4SChristoph Hellwig #include <linux/pagevec.h>
38c59d87c4SChristoph Hellwig #include <linux/writeback.h>
39c59d87c4SChristoph Hellwig 
40273dda76SChristoph Hellwig /* flags for direct write completions */
41273dda76SChristoph Hellwig #define XFS_DIO_FLAG_UNWRITTEN	(1 << 0)
42273dda76SChristoph Hellwig #define XFS_DIO_FLAG_APPEND	(1 << 1)
43273dda76SChristoph Hellwig 
44fbcc0256SDave Chinner /*
45fbcc0256SDave Chinner  * structure owned by writepages passed to individual writepage calls
46fbcc0256SDave Chinner  */
47fbcc0256SDave Chinner struct xfs_writepage_ctx {
48fbcc0256SDave Chinner 	struct xfs_bmbt_irec    imap;
49fbcc0256SDave Chinner 	bool			imap_valid;
50fbcc0256SDave Chinner 	unsigned int		io_type;
51fbcc0256SDave Chinner 	struct xfs_ioend	*ioend;
52fbcc0256SDave Chinner 	sector_t		last_block;
53fbcc0256SDave Chinner };
54fbcc0256SDave Chinner 
55c59d87c4SChristoph Hellwig void
56c59d87c4SChristoph Hellwig xfs_count_page_state(
57c59d87c4SChristoph Hellwig 	struct page		*page,
58c59d87c4SChristoph Hellwig 	int			*delalloc,
59c59d87c4SChristoph Hellwig 	int			*unwritten)
60c59d87c4SChristoph Hellwig {
61c59d87c4SChristoph Hellwig 	struct buffer_head	*bh, *head;
62c59d87c4SChristoph Hellwig 
63c59d87c4SChristoph Hellwig 	*delalloc = *unwritten = 0;
64c59d87c4SChristoph Hellwig 
65c59d87c4SChristoph Hellwig 	bh = head = page_buffers(page);
66c59d87c4SChristoph Hellwig 	do {
67c59d87c4SChristoph Hellwig 		if (buffer_unwritten(bh))
68c59d87c4SChristoph Hellwig 			(*unwritten) = 1;
69c59d87c4SChristoph Hellwig 		else if (buffer_delay(bh))
70c59d87c4SChristoph Hellwig 			(*delalloc) = 1;
71c59d87c4SChristoph Hellwig 	} while ((bh = bh->b_this_page) != head);
72c59d87c4SChristoph Hellwig }
73c59d87c4SChristoph Hellwig 
7420a90f58SRoss Zwisler struct block_device *
75c59d87c4SChristoph Hellwig xfs_find_bdev_for_inode(
76c59d87c4SChristoph Hellwig 	struct inode		*inode)
77c59d87c4SChristoph Hellwig {
78c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
79c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
80c59d87c4SChristoph Hellwig 
81c59d87c4SChristoph Hellwig 	if (XFS_IS_REALTIME_INODE(ip))
82c59d87c4SChristoph Hellwig 		return mp->m_rtdev_targp->bt_bdev;
83c59d87c4SChristoph Hellwig 	else
84c59d87c4SChristoph Hellwig 		return mp->m_ddev_targp->bt_bdev;
85c59d87c4SChristoph Hellwig }
86c59d87c4SChristoph Hellwig 
87c59d87c4SChristoph Hellwig /*
8837992c18SDave Chinner  * We're now finished for good with this page.  Update the page state via the
8937992c18SDave Chinner  * associated buffer_heads, paying attention to the start and end offsets that
9037992c18SDave Chinner  * we need to process on the page.
9128b783e4SDave Chinner  *
9228b783e4SDave Chinner  * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last
9328b783e4SDave Chinner  * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or
9428b783e4SDave Chinner  * the page at all, as we may be racing with memory reclaim and it can free both
9528b783e4SDave Chinner  * the bufferhead chain and the page as it will see the page as clean and
9628b783e4SDave Chinner  * unused.
9737992c18SDave Chinner  */
9837992c18SDave Chinner static void
9937992c18SDave Chinner xfs_finish_page_writeback(
10037992c18SDave Chinner 	struct inode		*inode,
10137992c18SDave Chinner 	struct bio_vec		*bvec,
10237992c18SDave Chinner 	int			error)
10337992c18SDave Chinner {
10437992c18SDave Chinner 	unsigned int		end = bvec->bv_offset + bvec->bv_len - 1;
10528b783e4SDave Chinner 	struct buffer_head	*head, *bh, *next;
10637992c18SDave Chinner 	unsigned int		off = 0;
10728b783e4SDave Chinner 	unsigned int		bsize;
10837992c18SDave Chinner 
10937992c18SDave Chinner 	ASSERT(bvec->bv_offset < PAGE_SIZE);
110690a7871SChristoph Hellwig 	ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0);
11137992c18SDave Chinner 	ASSERT(end < PAGE_SIZE);
112690a7871SChristoph Hellwig 	ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0);
11337992c18SDave Chinner 
11437992c18SDave Chinner 	bh = head = page_buffers(bvec->bv_page);
11537992c18SDave Chinner 
11628b783e4SDave Chinner 	bsize = bh->b_size;
11737992c18SDave Chinner 	do {
11828b783e4SDave Chinner 		next = bh->b_this_page;
11937992c18SDave Chinner 		if (off < bvec->bv_offset)
12037992c18SDave Chinner 			goto next_bh;
12137992c18SDave Chinner 		if (off > end)
12237992c18SDave Chinner 			break;
12337992c18SDave Chinner 		bh->b_end_io(bh, !error);
12437992c18SDave Chinner next_bh:
12528b783e4SDave Chinner 		off += bsize;
12628b783e4SDave Chinner 	} while ((bh = next) != head);
12737992c18SDave Chinner }
12837992c18SDave Chinner 
12937992c18SDave Chinner /*
13037992c18SDave Chinner  * We're now finished for good with this ioend structure.  Update the page
13137992c18SDave Chinner  * state, release holds on bios, and finally free up memory.  Do not use the
13237992c18SDave Chinner  * ioend after this.
133c59d87c4SChristoph Hellwig  */
134c59d87c4SChristoph Hellwig STATIC void
135c59d87c4SChristoph Hellwig xfs_destroy_ioend(
1360e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
1370e51a8e1SChristoph Hellwig 	int			error)
138c59d87c4SChristoph Hellwig {
13937992c18SDave Chinner 	struct inode		*inode = ioend->io_inode;
1400e51a8e1SChristoph Hellwig 	struct bio		*last = ioend->io_bio;
14137992c18SDave Chinner 	struct bio		*bio, *next;
142c59d87c4SChristoph Hellwig 
1430e51a8e1SChristoph Hellwig 	for (bio = &ioend->io_inline_bio; bio; bio = next) {
14437992c18SDave Chinner 		struct bio_vec	*bvec;
14537992c18SDave Chinner 		int		i;
14637992c18SDave Chinner 
1470e51a8e1SChristoph Hellwig 		/*
1480e51a8e1SChristoph Hellwig 		 * For the last bio, bi_private points to the ioend, so we
1490e51a8e1SChristoph Hellwig 		 * need to explicitly end the iteration here.
1500e51a8e1SChristoph Hellwig 		 */
1510e51a8e1SChristoph Hellwig 		if (bio == last)
1520e51a8e1SChristoph Hellwig 			next = NULL;
1530e51a8e1SChristoph Hellwig 		else
15437992c18SDave Chinner 			next = bio->bi_private;
15537992c18SDave Chinner 
15637992c18SDave Chinner 		/* walk each page on bio, ending page IO on them */
15737992c18SDave Chinner 		bio_for_each_segment_all(bvec, bio, i)
15837992c18SDave Chinner 			xfs_finish_page_writeback(inode, bvec, error);
15937992c18SDave Chinner 
16037992c18SDave Chinner 		bio_put(bio);
161c59d87c4SChristoph Hellwig 	}
162c59d87c4SChristoph Hellwig }
163c59d87c4SChristoph Hellwig 
164c59d87c4SChristoph Hellwig /*
165fc0063c4SChristoph Hellwig  * Fast and loose check if this write could update the on-disk inode size.
166fc0063c4SChristoph Hellwig  */
167fc0063c4SChristoph Hellwig static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
168fc0063c4SChristoph Hellwig {
169fc0063c4SChristoph Hellwig 	return ioend->io_offset + ioend->io_size >
170fc0063c4SChristoph Hellwig 		XFS_I(ioend->io_inode)->i_d.di_size;
171fc0063c4SChristoph Hellwig }
172fc0063c4SChristoph Hellwig 
173281627dfSChristoph Hellwig STATIC int
174281627dfSChristoph Hellwig xfs_setfilesize_trans_alloc(
175281627dfSChristoph Hellwig 	struct xfs_ioend	*ioend)
176281627dfSChristoph Hellwig {
177281627dfSChristoph Hellwig 	struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
178281627dfSChristoph Hellwig 	struct xfs_trans	*tp;
179281627dfSChristoph Hellwig 	int			error;
180281627dfSChristoph Hellwig 
181253f4911SChristoph Hellwig 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
182253f4911SChristoph Hellwig 	if (error)
183281627dfSChristoph Hellwig 		return error;
184281627dfSChristoph Hellwig 
185281627dfSChristoph Hellwig 	ioend->io_append_trans = tp;
186281627dfSChristoph Hellwig 
187281627dfSChristoph Hellwig 	/*
188437a255aSDave Chinner 	 * We may pass freeze protection with a transaction.  So tell lockdep
189d9457dc0SJan Kara 	 * we released it.
190d9457dc0SJan Kara 	 */
191bee9182dSOleg Nesterov 	__sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
192d9457dc0SJan Kara 	/*
193281627dfSChristoph Hellwig 	 * We hand off the transaction to the completion thread now, so
194281627dfSChristoph Hellwig 	 * clear the flag here.
195281627dfSChristoph Hellwig 	 */
196281627dfSChristoph Hellwig 	current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
197281627dfSChristoph Hellwig 	return 0;
198281627dfSChristoph Hellwig }
199281627dfSChristoph Hellwig 
200fc0063c4SChristoph Hellwig /*
2012813d682SChristoph Hellwig  * Update on-disk file size now that data has been written to disk.
202c59d87c4SChristoph Hellwig  */
203281627dfSChristoph Hellwig STATIC int
204e372843aSChristoph Hellwig __xfs_setfilesize(
2052ba66237SChristoph Hellwig 	struct xfs_inode	*ip,
2062ba66237SChristoph Hellwig 	struct xfs_trans	*tp,
2072ba66237SChristoph Hellwig 	xfs_off_t		offset,
2082ba66237SChristoph Hellwig 	size_t			size)
209c59d87c4SChristoph Hellwig {
210c59d87c4SChristoph Hellwig 	xfs_fsize_t		isize;
211c59d87c4SChristoph Hellwig 
212aa6bf01dSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
2132ba66237SChristoph Hellwig 	isize = xfs_new_eof(ip, offset + size);
214281627dfSChristoph Hellwig 	if (!isize) {
215281627dfSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
2164906e215SChristoph Hellwig 		xfs_trans_cancel(tp);
217281627dfSChristoph Hellwig 		return 0;
218c59d87c4SChristoph Hellwig 	}
219c59d87c4SChristoph Hellwig 
2202ba66237SChristoph Hellwig 	trace_xfs_setfilesize(ip, offset, size);
221281627dfSChristoph Hellwig 
222281627dfSChristoph Hellwig 	ip->i_d.di_size = isize;
223281627dfSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
224281627dfSChristoph Hellwig 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
225281627dfSChristoph Hellwig 
22670393313SChristoph Hellwig 	return xfs_trans_commit(tp);
227c59d87c4SChristoph Hellwig }
228c59d87c4SChristoph Hellwig 
229e372843aSChristoph Hellwig int
230e372843aSChristoph Hellwig xfs_setfilesize(
231e372843aSChristoph Hellwig 	struct xfs_inode	*ip,
232e372843aSChristoph Hellwig 	xfs_off_t		offset,
233e372843aSChristoph Hellwig 	size_t			size)
234e372843aSChristoph Hellwig {
235e372843aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
236e372843aSChristoph Hellwig 	struct xfs_trans	*tp;
237e372843aSChristoph Hellwig 	int			error;
238e372843aSChristoph Hellwig 
239e372843aSChristoph Hellwig 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
240e372843aSChristoph Hellwig 	if (error)
241e372843aSChristoph Hellwig 		return error;
242e372843aSChristoph Hellwig 
243e372843aSChristoph Hellwig 	return __xfs_setfilesize(ip, tp, offset, size);
244e372843aSChristoph Hellwig }
245e372843aSChristoph Hellwig 
2462ba66237SChristoph Hellwig STATIC int
2472ba66237SChristoph Hellwig xfs_setfilesize_ioend(
2480e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
2490e51a8e1SChristoph Hellwig 	int			error)
2502ba66237SChristoph Hellwig {
2512ba66237SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
2522ba66237SChristoph Hellwig 	struct xfs_trans	*tp = ioend->io_append_trans;
2532ba66237SChristoph Hellwig 
2542ba66237SChristoph Hellwig 	/*
2552ba66237SChristoph Hellwig 	 * The transaction may have been allocated in the I/O submission thread,
2562ba66237SChristoph Hellwig 	 * thus we need to mark ourselves as being in a transaction manually.
2572ba66237SChristoph Hellwig 	 * Similarly for freeze protection.
2582ba66237SChristoph Hellwig 	 */
2592ba66237SChristoph Hellwig 	current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
260bee9182dSOleg Nesterov 	__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
2612ba66237SChristoph Hellwig 
2625cb13dcdSZhaohongjiang 	/* we abort the update if there was an IO error */
2630e51a8e1SChristoph Hellwig 	if (error) {
2645cb13dcdSZhaohongjiang 		xfs_trans_cancel(tp);
2650e51a8e1SChristoph Hellwig 		return error;
2665cb13dcdSZhaohongjiang 	}
2675cb13dcdSZhaohongjiang 
268e372843aSChristoph Hellwig 	return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
2692ba66237SChristoph Hellwig }
2702ba66237SChristoph Hellwig 
271c59d87c4SChristoph Hellwig /*
272c59d87c4SChristoph Hellwig  * IO write completion.
273c59d87c4SChristoph Hellwig  */
274c59d87c4SChristoph Hellwig STATIC void
275c59d87c4SChristoph Hellwig xfs_end_io(
276c59d87c4SChristoph Hellwig 	struct work_struct *work)
277c59d87c4SChristoph Hellwig {
2780e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend =
2790e51a8e1SChristoph Hellwig 		container_of(work, struct xfs_ioend, io_work);
280c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
2810e51a8e1SChristoph Hellwig 	int			error = ioend->io_bio->bi_error;
282c59d87c4SChristoph Hellwig 
283af055e37SBrian Foster 	/*
284af055e37SBrian Foster 	 * Set an error if the mount has shut down and proceed with end I/O
285af055e37SBrian Foster 	 * processing so it can perform whatever cleanups are necessary.
286af055e37SBrian Foster 	 */
287af055e37SBrian Foster 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
2880e51a8e1SChristoph Hellwig 		error = -EIO;
28904f658eeSChristoph Hellwig 
290c59d87c4SChristoph Hellwig 	/*
291*43caeb18SDarrick J. Wong 	 * For a CoW extent, we need to move the mapping from the CoW fork
292*43caeb18SDarrick J. Wong 	 * to the data fork.  If instead an error happened, just dump the
293*43caeb18SDarrick J. Wong 	 * new blocks.
294*43caeb18SDarrick J. Wong 	 */
295*43caeb18SDarrick J. Wong 	if (ioend->io_type == XFS_IO_COW) {
296*43caeb18SDarrick J. Wong 		if (error)
297*43caeb18SDarrick J. Wong 			goto done;
298*43caeb18SDarrick J. Wong 		if (ioend->io_bio->bi_error) {
299*43caeb18SDarrick J. Wong 			error = xfs_reflink_cancel_cow_range(ip,
300*43caeb18SDarrick J. Wong 					ioend->io_offset, ioend->io_size);
301*43caeb18SDarrick J. Wong 			goto done;
302*43caeb18SDarrick J. Wong 		}
303*43caeb18SDarrick J. Wong 		error = xfs_reflink_end_cow(ip, ioend->io_offset,
304*43caeb18SDarrick J. Wong 				ioend->io_size);
305*43caeb18SDarrick J. Wong 		if (error)
306*43caeb18SDarrick J. Wong 			goto done;
307*43caeb18SDarrick J. Wong 	}
308*43caeb18SDarrick J. Wong 
309*43caeb18SDarrick J. Wong 	/*
310c59d87c4SChristoph Hellwig 	 * For unwritten extents we need to issue transactions to convert a
311c59d87c4SChristoph Hellwig 	 * range to normal written extens after the data I/O has finished.
3125cb13dcdSZhaohongjiang 	 * Detecting and handling completion IO errors is done individually
3135cb13dcdSZhaohongjiang 	 * for each case as different cleanup operations need to be performed
3145cb13dcdSZhaohongjiang 	 * on error.
315c59d87c4SChristoph Hellwig 	 */
3160d882a36SAlain Renaud 	if (ioend->io_type == XFS_IO_UNWRITTEN) {
3170e51a8e1SChristoph Hellwig 		if (error)
3185cb13dcdSZhaohongjiang 			goto done;
319c59d87c4SChristoph Hellwig 		error = xfs_iomap_write_unwritten(ip, ioend->io_offset,
320c59d87c4SChristoph Hellwig 						  ioend->io_size);
321281627dfSChristoph Hellwig 	} else if (ioend->io_append_trans) {
3220e51a8e1SChristoph Hellwig 		error = xfs_setfilesize_ioend(ioend, error);
32384803fb7SChristoph Hellwig 	} else {
324*43caeb18SDarrick J. Wong 		ASSERT(!xfs_ioend_is_append(ioend) ||
325*43caeb18SDarrick J. Wong 		       ioend->io_type == XFS_IO_COW);
32684803fb7SChristoph Hellwig 	}
32784803fb7SChristoph Hellwig 
32804f658eeSChristoph Hellwig done:
3290e51a8e1SChristoph Hellwig 	xfs_destroy_ioend(ioend, error);
330c59d87c4SChristoph Hellwig }
331c59d87c4SChristoph Hellwig 
3320e51a8e1SChristoph Hellwig STATIC void
3330e51a8e1SChristoph Hellwig xfs_end_bio(
3340e51a8e1SChristoph Hellwig 	struct bio		*bio)
335c59d87c4SChristoph Hellwig {
3360e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend = bio->bi_private;
3370e51a8e1SChristoph Hellwig 	struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
338c59d87c4SChristoph Hellwig 
339*43caeb18SDarrick J. Wong 	if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW)
3400e51a8e1SChristoph Hellwig 		queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
3410e51a8e1SChristoph Hellwig 	else if (ioend->io_append_trans)
3420e51a8e1SChristoph Hellwig 		queue_work(mp->m_data_workqueue, &ioend->io_work);
3430e51a8e1SChristoph Hellwig 	else
3440e51a8e1SChristoph Hellwig 		xfs_destroy_ioend(ioend, bio->bi_error);
345c59d87c4SChristoph Hellwig }
346c59d87c4SChristoph Hellwig 
347c59d87c4SChristoph Hellwig STATIC int
348c59d87c4SChristoph Hellwig xfs_map_blocks(
349c59d87c4SChristoph Hellwig 	struct inode		*inode,
350c59d87c4SChristoph Hellwig 	loff_t			offset,
351c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
352988ef927SDave Chinner 	int			type)
353c59d87c4SChristoph Hellwig {
354c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
355c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
356c59d87c4SChristoph Hellwig 	ssize_t			count = 1 << inode->i_blkbits;
357c59d87c4SChristoph Hellwig 	xfs_fileoff_t		offset_fsb, end_fsb;
358c59d87c4SChristoph Hellwig 	int			error = 0;
359c59d87c4SChristoph Hellwig 	int			bmapi_flags = XFS_BMAPI_ENTIRE;
360c59d87c4SChristoph Hellwig 	int			nimaps = 1;
361c59d87c4SChristoph Hellwig 
362c59d87c4SChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(mp))
363b474c7aeSEric Sandeen 		return -EIO;
364c59d87c4SChristoph Hellwig 
365ef473667SDarrick J. Wong 	ASSERT(type != XFS_IO_COW);
3660d882a36SAlain Renaud 	if (type == XFS_IO_UNWRITTEN)
367c59d87c4SChristoph Hellwig 		bmapi_flags |= XFS_BMAPI_IGSTATE;
368c59d87c4SChristoph Hellwig 
369c59d87c4SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_SHARED);
370c59d87c4SChristoph Hellwig 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
371c59d87c4SChristoph Hellwig 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
372d2c28191SDave Chinner 	ASSERT(offset <= mp->m_super->s_maxbytes);
373c59d87c4SChristoph Hellwig 
374d2c28191SDave Chinner 	if (offset + count > mp->m_super->s_maxbytes)
375d2c28191SDave Chinner 		count = mp->m_super->s_maxbytes - offset;
376c59d87c4SChristoph Hellwig 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
377c59d87c4SChristoph Hellwig 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
3785c8ed202SDave Chinner 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
3795c8ed202SDave Chinner 				imap, &nimaps, bmapi_flags);
380ef473667SDarrick J. Wong 	/*
381ef473667SDarrick J. Wong 	 * Truncate an overwrite extent if there's a pending CoW
382ef473667SDarrick J. Wong 	 * reservation before the end of this extent.  This forces us
383ef473667SDarrick J. Wong 	 * to come back to writepage to take care of the CoW.
384ef473667SDarrick J. Wong 	 */
385ef473667SDarrick J. Wong 	if (nimaps && type == XFS_IO_OVERWRITE)
386ef473667SDarrick J. Wong 		xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap);
387c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
388c59d87c4SChristoph Hellwig 
389c59d87c4SChristoph Hellwig 	if (error)
3902451337dSDave Chinner 		return error;
391c59d87c4SChristoph Hellwig 
3920d882a36SAlain Renaud 	if (type == XFS_IO_DELALLOC &&
393c59d87c4SChristoph Hellwig 	    (!nimaps || isnullstartblock(imap->br_startblock))) {
39460b4984fSDarrick J. Wong 		error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset,
39560b4984fSDarrick J. Wong 				imap);
396c59d87c4SChristoph Hellwig 		if (!error)
397ef473667SDarrick J. Wong 			trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
3982451337dSDave Chinner 		return error;
399c59d87c4SChristoph Hellwig 	}
400c59d87c4SChristoph Hellwig 
401c59d87c4SChristoph Hellwig #ifdef DEBUG
4020d882a36SAlain Renaud 	if (type == XFS_IO_UNWRITTEN) {
403c59d87c4SChristoph Hellwig 		ASSERT(nimaps);
404c59d87c4SChristoph Hellwig 		ASSERT(imap->br_startblock != HOLESTARTBLOCK);
405c59d87c4SChristoph Hellwig 		ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
406c59d87c4SChristoph Hellwig 	}
407c59d87c4SChristoph Hellwig #endif
408c59d87c4SChristoph Hellwig 	if (nimaps)
409c59d87c4SChristoph Hellwig 		trace_xfs_map_blocks_found(ip, offset, count, type, imap);
410c59d87c4SChristoph Hellwig 	return 0;
411c59d87c4SChristoph Hellwig }
412c59d87c4SChristoph Hellwig 
413fbcc0256SDave Chinner STATIC bool
414c59d87c4SChristoph Hellwig xfs_imap_valid(
415c59d87c4SChristoph Hellwig 	struct inode		*inode,
416c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
417c59d87c4SChristoph Hellwig 	xfs_off_t		offset)
418c59d87c4SChristoph Hellwig {
419c59d87c4SChristoph Hellwig 	offset >>= inode->i_blkbits;
420c59d87c4SChristoph Hellwig 
421c59d87c4SChristoph Hellwig 	return offset >= imap->br_startoff &&
422c59d87c4SChristoph Hellwig 		offset < imap->br_startoff + imap->br_blockcount;
423c59d87c4SChristoph Hellwig }
424c59d87c4SChristoph Hellwig 
425c59d87c4SChristoph Hellwig STATIC void
426c59d87c4SChristoph Hellwig xfs_start_buffer_writeback(
427c59d87c4SChristoph Hellwig 	struct buffer_head	*bh)
428c59d87c4SChristoph Hellwig {
429c59d87c4SChristoph Hellwig 	ASSERT(buffer_mapped(bh));
430c59d87c4SChristoph Hellwig 	ASSERT(buffer_locked(bh));
431c59d87c4SChristoph Hellwig 	ASSERT(!buffer_delay(bh));
432c59d87c4SChristoph Hellwig 	ASSERT(!buffer_unwritten(bh));
433c59d87c4SChristoph Hellwig 
434c59d87c4SChristoph Hellwig 	mark_buffer_async_write(bh);
435c59d87c4SChristoph Hellwig 	set_buffer_uptodate(bh);
436c59d87c4SChristoph Hellwig 	clear_buffer_dirty(bh);
437c59d87c4SChristoph Hellwig }
438c59d87c4SChristoph Hellwig 
439c59d87c4SChristoph Hellwig STATIC void
440c59d87c4SChristoph Hellwig xfs_start_page_writeback(
441c59d87c4SChristoph Hellwig 	struct page		*page,
442e10de372SDave Chinner 	int			clear_dirty)
443c59d87c4SChristoph Hellwig {
444c59d87c4SChristoph Hellwig 	ASSERT(PageLocked(page));
445c59d87c4SChristoph Hellwig 	ASSERT(!PageWriteback(page));
4460d085a52SDave Chinner 
4470d085a52SDave Chinner 	/*
4480d085a52SDave Chinner 	 * if the page was not fully cleaned, we need to ensure that the higher
4490d085a52SDave Chinner 	 * layers come back to it correctly. That means we need to keep the page
4500d085a52SDave Chinner 	 * dirty, and for WB_SYNC_ALL writeback we need to ensure the
4510d085a52SDave Chinner 	 * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to
4520d085a52SDave Chinner 	 * write this page in this writeback sweep will be made.
4530d085a52SDave Chinner 	 */
4540d085a52SDave Chinner 	if (clear_dirty) {
455c59d87c4SChristoph Hellwig 		clear_page_dirty_for_io(page);
456c59d87c4SChristoph Hellwig 		set_page_writeback(page);
4570d085a52SDave Chinner 	} else
4580d085a52SDave Chinner 		set_page_writeback_keepwrite(page);
4590d085a52SDave Chinner 
460c59d87c4SChristoph Hellwig 	unlock_page(page);
461c59d87c4SChristoph Hellwig }
462c59d87c4SChristoph Hellwig 
463c7c1a7d8SZhi Yong Wu static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
464c59d87c4SChristoph Hellwig {
465c59d87c4SChristoph Hellwig 	return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
466c59d87c4SChristoph Hellwig }
467c59d87c4SChristoph Hellwig 
468c59d87c4SChristoph Hellwig /*
469bb18782aSDave Chinner  * Submit the bio for an ioend. We are passed an ioend with a bio attached to
470bb18782aSDave Chinner  * it, and we submit that bio. The ioend may be used for multiple bio
471bb18782aSDave Chinner  * submissions, so we only want to allocate an append transaction for the ioend
472bb18782aSDave Chinner  * once. In the case of multiple bio submission, each bio will take an IO
473bb18782aSDave Chinner  * reference to the ioend to ensure that the ioend completion is only done once
474bb18782aSDave Chinner  * all bios have been submitted and the ioend is really done.
4757bf7f352SDave Chinner  *
4767bf7f352SDave Chinner  * If @fail is non-zero, it means that we have a situation where some part of
4777bf7f352SDave Chinner  * the submission process has failed after we have marked paged for writeback
478bb18782aSDave Chinner  * and unlocked them. In this situation, we need to fail the bio and ioend
479bb18782aSDave Chinner  * rather than submit it to IO. This typically only happens on a filesystem
480bb18782aSDave Chinner  * shutdown.
481c59d87c4SChristoph Hellwig  */
482e10de372SDave Chinner STATIC int
483c59d87c4SChristoph Hellwig xfs_submit_ioend(
484c59d87c4SChristoph Hellwig 	struct writeback_control *wbc,
4850e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
486e10de372SDave Chinner 	int			status)
487c59d87c4SChristoph Hellwig {
488e10de372SDave Chinner 	/* Reserve log space if we might write beyond the on-disk inode size. */
489e10de372SDave Chinner 	if (!status &&
4900e51a8e1SChristoph Hellwig 	    ioend->io_type != XFS_IO_UNWRITTEN &&
491bb18782aSDave Chinner 	    xfs_ioend_is_append(ioend) &&
492bb18782aSDave Chinner 	    !ioend->io_append_trans)
493e10de372SDave Chinner 		status = xfs_setfilesize_trans_alloc(ioend);
494bb18782aSDave Chinner 
4950e51a8e1SChristoph Hellwig 	ioend->io_bio->bi_private = ioend;
4960e51a8e1SChristoph Hellwig 	ioend->io_bio->bi_end_io = xfs_end_bio;
49750bfcd0cSMike Christie 	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
49850bfcd0cSMike Christie 			 (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
4997bf7f352SDave Chinner 	/*
5007bf7f352SDave Chinner 	 * If we are failing the IO now, just mark the ioend with an
5017bf7f352SDave Chinner 	 * error and finish it. This will run IO completion immediately
5027bf7f352SDave Chinner 	 * as there is only one reference to the ioend at this point in
5037bf7f352SDave Chinner 	 * time.
5047bf7f352SDave Chinner 	 */
505e10de372SDave Chinner 	if (status) {
5060e51a8e1SChristoph Hellwig 		ioend->io_bio->bi_error = status;
5070e51a8e1SChristoph Hellwig 		bio_endio(ioend->io_bio);
508e10de372SDave Chinner 		return status;
5097bf7f352SDave Chinner 	}
5107bf7f352SDave Chinner 
5114e49ea4aSMike Christie 	submit_bio(ioend->io_bio);
512e10de372SDave Chinner 	return 0;
513c59d87c4SChristoph Hellwig }
514c59d87c4SChristoph Hellwig 
5150e51a8e1SChristoph Hellwig static void
5160e51a8e1SChristoph Hellwig xfs_init_bio_from_bh(
5170e51a8e1SChristoph Hellwig 	struct bio		*bio,
5180e51a8e1SChristoph Hellwig 	struct buffer_head	*bh)
5190e51a8e1SChristoph Hellwig {
5200e51a8e1SChristoph Hellwig 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
5210e51a8e1SChristoph Hellwig 	bio->bi_bdev = bh->b_bdev;
5220e51a8e1SChristoph Hellwig }
5230e51a8e1SChristoph Hellwig 
5240e51a8e1SChristoph Hellwig static struct xfs_ioend *
5250e51a8e1SChristoph Hellwig xfs_alloc_ioend(
5260e51a8e1SChristoph Hellwig 	struct inode		*inode,
5270e51a8e1SChristoph Hellwig 	unsigned int		type,
5280e51a8e1SChristoph Hellwig 	xfs_off_t		offset,
5290e51a8e1SChristoph Hellwig 	struct buffer_head	*bh)
5300e51a8e1SChristoph Hellwig {
5310e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend;
5320e51a8e1SChristoph Hellwig 	struct bio		*bio;
5330e51a8e1SChristoph Hellwig 
5340e51a8e1SChristoph Hellwig 	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
5350e51a8e1SChristoph Hellwig 	xfs_init_bio_from_bh(bio, bh);
5360e51a8e1SChristoph Hellwig 
5370e51a8e1SChristoph Hellwig 	ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
5380e51a8e1SChristoph Hellwig 	INIT_LIST_HEAD(&ioend->io_list);
5390e51a8e1SChristoph Hellwig 	ioend->io_type = type;
5400e51a8e1SChristoph Hellwig 	ioend->io_inode = inode;
5410e51a8e1SChristoph Hellwig 	ioend->io_size = 0;
5420e51a8e1SChristoph Hellwig 	ioend->io_offset = offset;
5430e51a8e1SChristoph Hellwig 	INIT_WORK(&ioend->io_work, xfs_end_io);
5440e51a8e1SChristoph Hellwig 	ioend->io_append_trans = NULL;
5450e51a8e1SChristoph Hellwig 	ioend->io_bio = bio;
5460e51a8e1SChristoph Hellwig 	return ioend;
5470e51a8e1SChristoph Hellwig }
5480e51a8e1SChristoph Hellwig 
5490e51a8e1SChristoph Hellwig /*
5500e51a8e1SChristoph Hellwig  * Allocate a new bio, and chain the old bio to the new one.
5510e51a8e1SChristoph Hellwig  *
5520e51a8e1SChristoph Hellwig  * Note that we have to do perform the chaining in this unintuitive order
5530e51a8e1SChristoph Hellwig  * so that the bi_private linkage is set up in the right direction for the
5540e51a8e1SChristoph Hellwig  * traversal in xfs_destroy_ioend().
5550e51a8e1SChristoph Hellwig  */
5560e51a8e1SChristoph Hellwig static void
5570e51a8e1SChristoph Hellwig xfs_chain_bio(
5580e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
5590e51a8e1SChristoph Hellwig 	struct writeback_control *wbc,
5600e51a8e1SChristoph Hellwig 	struct buffer_head	*bh)
5610e51a8e1SChristoph Hellwig {
5620e51a8e1SChristoph Hellwig 	struct bio *new;
5630e51a8e1SChristoph Hellwig 
5640e51a8e1SChristoph Hellwig 	new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
5650e51a8e1SChristoph Hellwig 	xfs_init_bio_from_bh(new, bh);
5660e51a8e1SChristoph Hellwig 
5670e51a8e1SChristoph Hellwig 	bio_chain(ioend->io_bio, new);
5680e51a8e1SChristoph Hellwig 	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */
56950bfcd0cSMike Christie 	bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE,
57050bfcd0cSMike Christie 			  (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0);
5714e49ea4aSMike Christie 	submit_bio(ioend->io_bio);
5720e51a8e1SChristoph Hellwig 	ioend->io_bio = new;
5730e51a8e1SChristoph Hellwig }
5740e51a8e1SChristoph Hellwig 
575c59d87c4SChristoph Hellwig /*
576c59d87c4SChristoph Hellwig  * Test to see if we've been building up a completion structure for
577c59d87c4SChristoph Hellwig  * earlier buffers -- if so, we try to append to this ioend if we
578c59d87c4SChristoph Hellwig  * can, otherwise we finish off any current ioend and start another.
579e10de372SDave Chinner  * Return the ioend we finished off so that the caller can submit it
580e10de372SDave Chinner  * once it has finished processing the dirty page.
581c59d87c4SChristoph Hellwig  */
582c59d87c4SChristoph Hellwig STATIC void
583c59d87c4SChristoph Hellwig xfs_add_to_ioend(
584c59d87c4SChristoph Hellwig 	struct inode		*inode,
585c59d87c4SChristoph Hellwig 	struct buffer_head	*bh,
586c59d87c4SChristoph Hellwig 	xfs_off_t		offset,
587e10de372SDave Chinner 	struct xfs_writepage_ctx *wpc,
588bb18782aSDave Chinner 	struct writeback_control *wbc,
589e10de372SDave Chinner 	struct list_head	*iolist)
590c59d87c4SChristoph Hellwig {
591fbcc0256SDave Chinner 	if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
5920df61da8SDarrick J. Wong 	    bh->b_blocknr != wpc->last_block + 1 ||
5930df61da8SDarrick J. Wong 	    offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
594e10de372SDave Chinner 		if (wpc->ioend)
595e10de372SDave Chinner 			list_add(&wpc->ioend->io_list, iolist);
5960e51a8e1SChristoph Hellwig 		wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
597c59d87c4SChristoph Hellwig 	}
598c59d87c4SChristoph Hellwig 
5990e51a8e1SChristoph Hellwig 	/*
6000e51a8e1SChristoph Hellwig 	 * If the buffer doesn't fit into the bio we need to allocate a new
6010e51a8e1SChristoph Hellwig 	 * one.  This shouldn't happen more than once for a given buffer.
6020e51a8e1SChristoph Hellwig 	 */
6030e51a8e1SChristoph Hellwig 	while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
6040e51a8e1SChristoph Hellwig 		xfs_chain_bio(wpc->ioend, wbc, bh);
605bb18782aSDave Chinner 
606fbcc0256SDave Chinner 	wpc->ioend->io_size += bh->b_size;
607fbcc0256SDave Chinner 	wpc->last_block = bh->b_blocknr;
608e10de372SDave Chinner 	xfs_start_buffer_writeback(bh);
609c59d87c4SChristoph Hellwig }
610c59d87c4SChristoph Hellwig 
611c59d87c4SChristoph Hellwig STATIC void
612c59d87c4SChristoph Hellwig xfs_map_buffer(
613c59d87c4SChristoph Hellwig 	struct inode		*inode,
614c59d87c4SChristoph Hellwig 	struct buffer_head	*bh,
615c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
616c59d87c4SChristoph Hellwig 	xfs_off_t		offset)
617c59d87c4SChristoph Hellwig {
618c59d87c4SChristoph Hellwig 	sector_t		bn;
619c59d87c4SChristoph Hellwig 	struct xfs_mount	*m = XFS_I(inode)->i_mount;
620c59d87c4SChristoph Hellwig 	xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
621c59d87c4SChristoph Hellwig 	xfs_daddr_t		iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
622c59d87c4SChristoph Hellwig 
623c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
624c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
625c59d87c4SChristoph Hellwig 
626c59d87c4SChristoph Hellwig 	bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
627c59d87c4SChristoph Hellwig 	      ((offset - iomap_offset) >> inode->i_blkbits);
628c59d87c4SChristoph Hellwig 
629c59d87c4SChristoph Hellwig 	ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
630c59d87c4SChristoph Hellwig 
631c59d87c4SChristoph Hellwig 	bh->b_blocknr = bn;
632c59d87c4SChristoph Hellwig 	set_buffer_mapped(bh);
633c59d87c4SChristoph Hellwig }
634c59d87c4SChristoph Hellwig 
635c59d87c4SChristoph Hellwig STATIC void
636c59d87c4SChristoph Hellwig xfs_map_at_offset(
637c59d87c4SChristoph Hellwig 	struct inode		*inode,
638c59d87c4SChristoph Hellwig 	struct buffer_head	*bh,
639c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
640c59d87c4SChristoph Hellwig 	xfs_off_t		offset)
641c59d87c4SChristoph Hellwig {
642c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
643c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
644c59d87c4SChristoph Hellwig 
645c59d87c4SChristoph Hellwig 	xfs_map_buffer(inode, bh, imap, offset);
646c59d87c4SChristoph Hellwig 	set_buffer_mapped(bh);
647c59d87c4SChristoph Hellwig 	clear_buffer_delay(bh);
648c59d87c4SChristoph Hellwig 	clear_buffer_unwritten(bh);
649c59d87c4SChristoph Hellwig }
650c59d87c4SChristoph Hellwig 
651c59d87c4SChristoph Hellwig /*
652a49935f2SDave Chinner  * Test if a given page contains at least one buffer of a given @type.
653a49935f2SDave Chinner  * If @check_all_buffers is true, then we walk all the buffers in the page to
654a49935f2SDave Chinner  * try to find one of the type passed in. If it is not set, then the caller only
655a49935f2SDave Chinner  * needs to check the first buffer on the page for a match.
656c59d87c4SChristoph Hellwig  */
657a49935f2SDave Chinner STATIC bool
6586ffc4db5SDave Chinner xfs_check_page_type(
659c59d87c4SChristoph Hellwig 	struct page		*page,
660a49935f2SDave Chinner 	unsigned int		type,
661a49935f2SDave Chinner 	bool			check_all_buffers)
662c59d87c4SChristoph Hellwig {
663a49935f2SDave Chinner 	struct buffer_head	*bh;
664a49935f2SDave Chinner 	struct buffer_head	*head;
665c59d87c4SChristoph Hellwig 
666a49935f2SDave Chinner 	if (PageWriteback(page))
667a49935f2SDave Chinner 		return false;
668a49935f2SDave Chinner 	if (!page->mapping)
669a49935f2SDave Chinner 		return false;
670a49935f2SDave Chinner 	if (!page_has_buffers(page))
671a49935f2SDave Chinner 		return false;
672c59d87c4SChristoph Hellwig 
673c59d87c4SChristoph Hellwig 	bh = head = page_buffers(page);
674c59d87c4SChristoph Hellwig 	do {
675a49935f2SDave Chinner 		if (buffer_unwritten(bh)) {
676a49935f2SDave Chinner 			if (type == XFS_IO_UNWRITTEN)
677a49935f2SDave Chinner 				return true;
678a49935f2SDave Chinner 		} else if (buffer_delay(bh)) {
679805eeb8eSDan Carpenter 			if (type == XFS_IO_DELALLOC)
680a49935f2SDave Chinner 				return true;
681a49935f2SDave Chinner 		} else if (buffer_dirty(bh) && buffer_mapped(bh)) {
682805eeb8eSDan Carpenter 			if (type == XFS_IO_OVERWRITE)
683a49935f2SDave Chinner 				return true;
684a49935f2SDave Chinner 		}
685a49935f2SDave Chinner 
686a49935f2SDave Chinner 		/* If we are only checking the first buffer, we are done now. */
687a49935f2SDave Chinner 		if (!check_all_buffers)
688c59d87c4SChristoph Hellwig 			break;
689c59d87c4SChristoph Hellwig 	} while ((bh = bh->b_this_page) != head);
690c59d87c4SChristoph Hellwig 
691a49935f2SDave Chinner 	return false;
692c59d87c4SChristoph Hellwig }
693c59d87c4SChristoph Hellwig 
694c59d87c4SChristoph Hellwig STATIC void
695c59d87c4SChristoph Hellwig xfs_vm_invalidatepage(
696c59d87c4SChristoph Hellwig 	struct page		*page,
697d47992f8SLukas Czerner 	unsigned int		offset,
698d47992f8SLukas Czerner 	unsigned int		length)
699c59d87c4SChristoph Hellwig {
70034097dfeSLukas Czerner 	trace_xfs_invalidatepage(page->mapping->host, page, offset,
70134097dfeSLukas Czerner 				 length);
70234097dfeSLukas Czerner 	block_invalidatepage(page, offset, length);
703c59d87c4SChristoph Hellwig }
704c59d87c4SChristoph Hellwig 
705c59d87c4SChristoph Hellwig /*
706c59d87c4SChristoph Hellwig  * If the page has delalloc buffers on it, we need to punch them out before we
707c59d87c4SChristoph Hellwig  * invalidate the page. If we don't, we leave a stale delalloc mapping on the
708c59d87c4SChristoph Hellwig  * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
709c59d87c4SChristoph Hellwig  * is done on that same region - the delalloc extent is returned when none is
710c59d87c4SChristoph Hellwig  * supposed to be there.
711c59d87c4SChristoph Hellwig  *
712c59d87c4SChristoph Hellwig  * We prevent this by truncating away the delalloc regions on the page before
713c59d87c4SChristoph Hellwig  * invalidating it. Because they are delalloc, we can do this without needing a
714c59d87c4SChristoph Hellwig  * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
715c59d87c4SChristoph Hellwig  * truncation without a transaction as there is no space left for block
716c59d87c4SChristoph Hellwig  * reservation (typically why we see a ENOSPC in writeback).
717c59d87c4SChristoph Hellwig  *
718c59d87c4SChristoph Hellwig  * This is not a performance critical path, so for now just do the punching a
719c59d87c4SChristoph Hellwig  * buffer head at a time.
720c59d87c4SChristoph Hellwig  */
721c59d87c4SChristoph Hellwig STATIC void
722c59d87c4SChristoph Hellwig xfs_aops_discard_page(
723c59d87c4SChristoph Hellwig 	struct page		*page)
724c59d87c4SChristoph Hellwig {
725c59d87c4SChristoph Hellwig 	struct inode		*inode = page->mapping->host;
726c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
727c59d87c4SChristoph Hellwig 	struct buffer_head	*bh, *head;
728c59d87c4SChristoph Hellwig 	loff_t			offset = page_offset(page);
729c59d87c4SChristoph Hellwig 
730a49935f2SDave Chinner 	if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
731c59d87c4SChristoph Hellwig 		goto out_invalidate;
732c59d87c4SChristoph Hellwig 
733c59d87c4SChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
734c59d87c4SChristoph Hellwig 		goto out_invalidate;
735c59d87c4SChristoph Hellwig 
736c59d87c4SChristoph Hellwig 	xfs_alert(ip->i_mount,
737c59d87c4SChristoph Hellwig 		"page discard on page %p, inode 0x%llx, offset %llu.",
738c59d87c4SChristoph Hellwig 			page, ip->i_ino, offset);
739c59d87c4SChristoph Hellwig 
740c59d87c4SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
741c59d87c4SChristoph Hellwig 	bh = head = page_buffers(page);
742c59d87c4SChristoph Hellwig 	do {
743c59d87c4SChristoph Hellwig 		int		error;
744c59d87c4SChristoph Hellwig 		xfs_fileoff_t	start_fsb;
745c59d87c4SChristoph Hellwig 
746c59d87c4SChristoph Hellwig 		if (!buffer_delay(bh))
747c59d87c4SChristoph Hellwig 			goto next_buffer;
748c59d87c4SChristoph Hellwig 
749c59d87c4SChristoph Hellwig 		start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
750c59d87c4SChristoph Hellwig 		error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
751c59d87c4SChristoph Hellwig 		if (error) {
752c59d87c4SChristoph Hellwig 			/* something screwed, just bail */
753c59d87c4SChristoph Hellwig 			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
754c59d87c4SChristoph Hellwig 				xfs_alert(ip->i_mount,
755c59d87c4SChristoph Hellwig 			"page discard unable to remove delalloc mapping.");
756c59d87c4SChristoph Hellwig 			}
757c59d87c4SChristoph Hellwig 			break;
758c59d87c4SChristoph Hellwig 		}
759c59d87c4SChristoph Hellwig next_buffer:
760c59d87c4SChristoph Hellwig 		offset += 1 << inode->i_blkbits;
761c59d87c4SChristoph Hellwig 
762c59d87c4SChristoph Hellwig 	} while ((bh = bh->b_this_page) != head);
763c59d87c4SChristoph Hellwig 
764c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
765c59d87c4SChristoph Hellwig out_invalidate:
76609cbfeafSKirill A. Shutemov 	xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
767c59d87c4SChristoph Hellwig 	return;
768c59d87c4SChristoph Hellwig }
769c59d87c4SChristoph Hellwig 
770ef473667SDarrick J. Wong static int
771ef473667SDarrick J. Wong xfs_map_cow(
772ef473667SDarrick J. Wong 	struct xfs_writepage_ctx *wpc,
773ef473667SDarrick J. Wong 	struct inode		*inode,
774ef473667SDarrick J. Wong 	loff_t			offset,
775ef473667SDarrick J. Wong 	unsigned int		*new_type)
776ef473667SDarrick J. Wong {
777ef473667SDarrick J. Wong 	struct xfs_inode	*ip = XFS_I(inode);
778ef473667SDarrick J. Wong 	struct xfs_bmbt_irec	imap;
779ef473667SDarrick J. Wong 	bool			is_cow = false, need_alloc = false;
780ef473667SDarrick J. Wong 	int			error;
781ef473667SDarrick J. Wong 
782ef473667SDarrick J. Wong 	/*
783ef473667SDarrick J. Wong 	 * If we already have a valid COW mapping keep using it.
784ef473667SDarrick J. Wong 	 */
785ef473667SDarrick J. Wong 	if (wpc->io_type == XFS_IO_COW) {
786ef473667SDarrick J. Wong 		wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset);
787ef473667SDarrick J. Wong 		if (wpc->imap_valid) {
788ef473667SDarrick J. Wong 			*new_type = XFS_IO_COW;
789ef473667SDarrick J. Wong 			return 0;
790ef473667SDarrick J. Wong 		}
791ef473667SDarrick J. Wong 	}
792ef473667SDarrick J. Wong 
793ef473667SDarrick J. Wong 	/*
794ef473667SDarrick J. Wong 	 * Else we need to check if there is a COW mapping at this offset.
795ef473667SDarrick J. Wong 	 */
796ef473667SDarrick J. Wong 	xfs_ilock(ip, XFS_ILOCK_SHARED);
797ef473667SDarrick J. Wong 	is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc);
798ef473667SDarrick J. Wong 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
799ef473667SDarrick J. Wong 
800ef473667SDarrick J. Wong 	if (!is_cow)
801ef473667SDarrick J. Wong 		return 0;
802ef473667SDarrick J. Wong 
803ef473667SDarrick J. Wong 	/*
804ef473667SDarrick J. Wong 	 * And if the COW mapping has a delayed extent here we need to
805ef473667SDarrick J. Wong 	 * allocate real space for it now.
806ef473667SDarrick J. Wong 	 */
807ef473667SDarrick J. Wong 	if (need_alloc) {
808ef473667SDarrick J. Wong 		error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
809ef473667SDarrick J. Wong 				&imap);
810ef473667SDarrick J. Wong 		if (error)
811ef473667SDarrick J. Wong 			return error;
812ef473667SDarrick J. Wong 	}
813ef473667SDarrick J. Wong 
814ef473667SDarrick J. Wong 	wpc->io_type = *new_type = XFS_IO_COW;
815ef473667SDarrick J. Wong 	wpc->imap_valid = true;
816ef473667SDarrick J. Wong 	wpc->imap = imap;
817ef473667SDarrick J. Wong 	return 0;
818ef473667SDarrick J. Wong }
819ef473667SDarrick J. Wong 
820c59d87c4SChristoph Hellwig /*
821e10de372SDave Chinner  * We implement an immediate ioend submission policy here to avoid needing to
822e10de372SDave Chinner  * chain multiple ioends and hence nest mempool allocations which can violate
823e10de372SDave Chinner  * forward progress guarantees we need to provide. The current ioend we are
824e10de372SDave Chinner  * adding buffers to is cached on the writepage context, and if the new buffer
825e10de372SDave Chinner  * does not append to the cached ioend it will create a new ioend and cache that
826e10de372SDave Chinner  * instead.
827e10de372SDave Chinner  *
828e10de372SDave Chinner  * If a new ioend is created and cached, the old ioend is returned and queued
829e10de372SDave Chinner  * locally for submission once the entire page is processed or an error has been
830e10de372SDave Chinner  * detected.  While ioends are submitted immediately after they are completed,
831e10de372SDave Chinner  * batching optimisations are provided by higher level block plugging.
832e10de372SDave Chinner  *
833e10de372SDave Chinner  * At the end of a writeback pass, there will be a cached ioend remaining on the
834e10de372SDave Chinner  * writepage context that the caller will need to submit.
835e10de372SDave Chinner  */
836bfce7d2eSDave Chinner static int
837bfce7d2eSDave Chinner xfs_writepage_map(
838bfce7d2eSDave Chinner 	struct xfs_writepage_ctx *wpc,
839e10de372SDave Chinner 	struct writeback_control *wbc,
840bfce7d2eSDave Chinner 	struct inode		*inode,
841bfce7d2eSDave Chinner 	struct page		*page,
842bfce7d2eSDave Chinner 	loff_t			offset,
843bfce7d2eSDave Chinner 	__uint64_t              end_offset)
844bfce7d2eSDave Chinner {
845e10de372SDave Chinner 	LIST_HEAD(submit_list);
846e10de372SDave Chinner 	struct xfs_ioend	*ioend, *next;
847bfce7d2eSDave Chinner 	struct buffer_head	*bh, *head;
848bfce7d2eSDave Chinner 	ssize_t			len = 1 << inode->i_blkbits;
849bfce7d2eSDave Chinner 	int			error = 0;
850bfce7d2eSDave Chinner 	int			count = 0;
851e10de372SDave Chinner 	int			uptodate = 1;
852ef473667SDarrick J. Wong 	unsigned int		new_type;
853bfce7d2eSDave Chinner 
854bfce7d2eSDave Chinner 	bh = head = page_buffers(page);
855bfce7d2eSDave Chinner 	offset = page_offset(page);
856bfce7d2eSDave Chinner 	do {
857bfce7d2eSDave Chinner 		if (offset >= end_offset)
858bfce7d2eSDave Chinner 			break;
859bfce7d2eSDave Chinner 		if (!buffer_uptodate(bh))
860bfce7d2eSDave Chinner 			uptodate = 0;
861bfce7d2eSDave Chinner 
862bfce7d2eSDave Chinner 		/*
863bfce7d2eSDave Chinner 		 * set_page_dirty dirties all buffers in a page, independent
864bfce7d2eSDave Chinner 		 * of their state.  The dirty state however is entirely
865bfce7d2eSDave Chinner 		 * meaningless for holes (!mapped && uptodate), so skip
866bfce7d2eSDave Chinner 		 * buffers covering holes here.
867bfce7d2eSDave Chinner 		 */
868bfce7d2eSDave Chinner 		if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
869bfce7d2eSDave Chinner 			wpc->imap_valid = false;
870bfce7d2eSDave Chinner 			continue;
871bfce7d2eSDave Chinner 		}
872bfce7d2eSDave Chinner 
873ef473667SDarrick J. Wong 		if (buffer_unwritten(bh))
874ef473667SDarrick J. Wong 			new_type = XFS_IO_UNWRITTEN;
875ef473667SDarrick J. Wong 		else if (buffer_delay(bh))
876ef473667SDarrick J. Wong 			new_type = XFS_IO_DELALLOC;
877ef473667SDarrick J. Wong 		else if (buffer_uptodate(bh))
878ef473667SDarrick J. Wong 			new_type = XFS_IO_OVERWRITE;
879ef473667SDarrick J. Wong 		else {
880bfce7d2eSDave Chinner 			if (PageUptodate(page))
881bfce7d2eSDave Chinner 				ASSERT(buffer_mapped(bh));
882bfce7d2eSDave Chinner 			/*
883bfce7d2eSDave Chinner 			 * This buffer is not uptodate and will not be
884bfce7d2eSDave Chinner 			 * written to disk.  Ensure that we will put any
885bfce7d2eSDave Chinner 			 * subsequent writeable buffers into a new
886bfce7d2eSDave Chinner 			 * ioend.
887bfce7d2eSDave Chinner 			 */
888bfce7d2eSDave Chinner 			wpc->imap_valid = false;
889bfce7d2eSDave Chinner 			continue;
890bfce7d2eSDave Chinner 		}
891bfce7d2eSDave Chinner 
892ef473667SDarrick J. Wong 		if (xfs_is_reflink_inode(XFS_I(inode))) {
893ef473667SDarrick J. Wong 			error = xfs_map_cow(wpc, inode, offset, &new_type);
894ef473667SDarrick J. Wong 			if (error)
895ef473667SDarrick J. Wong 				goto out;
896ef473667SDarrick J. Wong 		}
897ef473667SDarrick J. Wong 
898ef473667SDarrick J. Wong 		if (wpc->io_type != new_type) {
899ef473667SDarrick J. Wong 			wpc->io_type = new_type;
900ef473667SDarrick J. Wong 			wpc->imap_valid = false;
901ef473667SDarrick J. Wong 		}
902ef473667SDarrick J. Wong 
903bfce7d2eSDave Chinner 		if (wpc->imap_valid)
904bfce7d2eSDave Chinner 			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
905bfce7d2eSDave Chinner 							 offset);
906bfce7d2eSDave Chinner 		if (!wpc->imap_valid) {
907bfce7d2eSDave Chinner 			error = xfs_map_blocks(inode, offset, &wpc->imap,
908bfce7d2eSDave Chinner 					     wpc->io_type);
909bfce7d2eSDave Chinner 			if (error)
910e10de372SDave Chinner 				goto out;
911bfce7d2eSDave Chinner 			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
912bfce7d2eSDave Chinner 							 offset);
913bfce7d2eSDave Chinner 		}
914bfce7d2eSDave Chinner 		if (wpc->imap_valid) {
915bfce7d2eSDave Chinner 			lock_buffer(bh);
916bfce7d2eSDave Chinner 			if (wpc->io_type != XFS_IO_OVERWRITE)
917bfce7d2eSDave Chinner 				xfs_map_at_offset(inode, bh, &wpc->imap, offset);
918bb18782aSDave Chinner 			xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
919bfce7d2eSDave Chinner 			count++;
920bfce7d2eSDave Chinner 		}
921bfce7d2eSDave Chinner 
922bfce7d2eSDave Chinner 	} while (offset += len, ((bh = bh->b_this_page) != head));
923bfce7d2eSDave Chinner 
924bfce7d2eSDave Chinner 	if (uptodate && bh == head)
925bfce7d2eSDave Chinner 		SetPageUptodate(page);
926bfce7d2eSDave Chinner 
927e10de372SDave Chinner 	ASSERT(wpc->ioend || list_empty(&submit_list));
928bfce7d2eSDave Chinner 
929e10de372SDave Chinner out:
930bfce7d2eSDave Chinner 	/*
931e10de372SDave Chinner 	 * On error, we have to fail the ioend here because we have locked
932e10de372SDave Chinner 	 * buffers in the ioend. If we don't do this, we'll deadlock
933e10de372SDave Chinner 	 * invalidating the page as that tries to lock the buffers on the page.
934e10de372SDave Chinner 	 * Also, because we may have set pages under writeback, we have to make
935e10de372SDave Chinner 	 * sure we run IO completion to mark the error state of the IO
936e10de372SDave Chinner 	 * appropriately, so we can't cancel the ioend directly here. That means
937e10de372SDave Chinner 	 * we have to mark this page as under writeback if we included any
938e10de372SDave Chinner 	 * buffers from it in the ioend chain so that completion treats it
939e10de372SDave Chinner 	 * correctly.
940bfce7d2eSDave Chinner 	 *
941e10de372SDave Chinner 	 * If we didn't include the page in the ioend, the on error we can
942e10de372SDave Chinner 	 * simply discard and unlock it as there are no other users of the page
943e10de372SDave Chinner 	 * or it's buffers right now. The caller will still need to trigger
944e10de372SDave Chinner 	 * submission of outstanding ioends on the writepage context so they are
945e10de372SDave Chinner 	 * treated correctly on error.
946bfce7d2eSDave Chinner 	 */
947e10de372SDave Chinner 	if (count) {
948e10de372SDave Chinner 		xfs_start_page_writeback(page, !error);
949e10de372SDave Chinner 
950e10de372SDave Chinner 		/*
951e10de372SDave Chinner 		 * Preserve the original error if there was one, otherwise catch
952e10de372SDave Chinner 		 * submission errors here and propagate into subsequent ioend
953e10de372SDave Chinner 		 * submissions.
954e10de372SDave Chinner 		 */
955e10de372SDave Chinner 		list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
956e10de372SDave Chinner 			int error2;
957e10de372SDave Chinner 
958e10de372SDave Chinner 			list_del_init(&ioend->io_list);
959e10de372SDave Chinner 			error2 = xfs_submit_ioend(wbc, ioend, error);
960e10de372SDave Chinner 			if (error2 && !error)
961e10de372SDave Chinner 				error = error2;
962e10de372SDave Chinner 		}
963e10de372SDave Chinner 	} else if (error) {
964bfce7d2eSDave Chinner 		xfs_aops_discard_page(page);
965bfce7d2eSDave Chinner 		ClearPageUptodate(page);
966bfce7d2eSDave Chinner 		unlock_page(page);
967e10de372SDave Chinner 	} else {
968e10de372SDave Chinner 		/*
969e10de372SDave Chinner 		 * We can end up here with no error and nothing to write if we
970e10de372SDave Chinner 		 * race with a partial page truncate on a sub-page block sized
971e10de372SDave Chinner 		 * filesystem. In that case we need to mark the page clean.
972e10de372SDave Chinner 		 */
973e10de372SDave Chinner 		xfs_start_page_writeback(page, 1);
974e10de372SDave Chinner 		end_page_writeback(page);
975bfce7d2eSDave Chinner 	}
976e10de372SDave Chinner 
977bfce7d2eSDave Chinner 	mapping_set_error(page->mapping, error);
978bfce7d2eSDave Chinner 	return error;
979bfce7d2eSDave Chinner }
980bfce7d2eSDave Chinner 
981c59d87c4SChristoph Hellwig /*
982c59d87c4SChristoph Hellwig  * Write out a dirty page.
983c59d87c4SChristoph Hellwig  *
984c59d87c4SChristoph Hellwig  * For delalloc space on the page we need to allocate space and flush it.
985c59d87c4SChristoph Hellwig  * For unwritten space on the page we need to start the conversion to
986c59d87c4SChristoph Hellwig  * regular allocated space.
987c59d87c4SChristoph Hellwig  * For any other dirty buffer heads on the page we should flush them.
988c59d87c4SChristoph Hellwig  */
989c59d87c4SChristoph Hellwig STATIC int
990fbcc0256SDave Chinner xfs_do_writepage(
991c59d87c4SChristoph Hellwig 	struct page		*page,
992fbcc0256SDave Chinner 	struct writeback_control *wbc,
993fbcc0256SDave Chinner 	void			*data)
994c59d87c4SChristoph Hellwig {
995fbcc0256SDave Chinner 	struct xfs_writepage_ctx *wpc = data;
996c59d87c4SChristoph Hellwig 	struct inode		*inode = page->mapping->host;
997c59d87c4SChristoph Hellwig 	loff_t			offset;
998c59d87c4SChristoph Hellwig 	__uint64_t              end_offset;
999ad68972aSDave Chinner 	pgoff_t                 end_index;
1000c59d87c4SChristoph Hellwig 
100134097dfeSLukas Czerner 	trace_xfs_writepage(inode, page, 0, 0);
1002c59d87c4SChristoph Hellwig 
1003c59d87c4SChristoph Hellwig 	ASSERT(page_has_buffers(page));
1004c59d87c4SChristoph Hellwig 
1005c59d87c4SChristoph Hellwig 	/*
1006c59d87c4SChristoph Hellwig 	 * Refuse to write the page out if we are called from reclaim context.
1007c59d87c4SChristoph Hellwig 	 *
1008c59d87c4SChristoph Hellwig 	 * This avoids stack overflows when called from deeply used stacks in
1009c59d87c4SChristoph Hellwig 	 * random callers for direct reclaim or memcg reclaim.  We explicitly
1010c59d87c4SChristoph Hellwig 	 * allow reclaim from kswapd as the stack usage there is relatively low.
1011c59d87c4SChristoph Hellwig 	 *
101294054fa3SMel Gorman 	 * This should never happen except in the case of a VM regression so
101394054fa3SMel Gorman 	 * warn about it.
1014c59d87c4SChristoph Hellwig 	 */
101594054fa3SMel Gorman 	if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
101694054fa3SMel Gorman 			PF_MEMALLOC))
1017c59d87c4SChristoph Hellwig 		goto redirty;
1018c59d87c4SChristoph Hellwig 
1019c59d87c4SChristoph Hellwig 	/*
1020c59d87c4SChristoph Hellwig 	 * Given that we do not allow direct reclaim to call us, we should
1021c59d87c4SChristoph Hellwig 	 * never be called while in a filesystem transaction.
1022c59d87c4SChristoph Hellwig 	 */
1023448011e2SChristoph Hellwig 	if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
1024c59d87c4SChristoph Hellwig 		goto redirty;
1025c59d87c4SChristoph Hellwig 
10268695d27eSJie Liu 	/*
1027ad68972aSDave Chinner 	 * Is this page beyond the end of the file?
1028ad68972aSDave Chinner 	 *
10298695d27eSJie Liu 	 * The page index is less than the end_index, adjust the end_offset
10308695d27eSJie Liu 	 * to the highest offset that this page should represent.
10318695d27eSJie Liu 	 * -----------------------------------------------------
10328695d27eSJie Liu 	 * |			file mapping	       | <EOF> |
10338695d27eSJie Liu 	 * -----------------------------------------------------
10348695d27eSJie Liu 	 * | Page ... | Page N-2 | Page N-1 |  Page N  |       |
10358695d27eSJie Liu 	 * ^--------------------------------^----------|--------
10368695d27eSJie Liu 	 * |     desired writeback range    |      see else    |
10378695d27eSJie Liu 	 * ---------------------------------^------------------|
10388695d27eSJie Liu 	 */
1039ad68972aSDave Chinner 	offset = i_size_read(inode);
104009cbfeafSKirill A. Shutemov 	end_index = offset >> PAGE_SHIFT;
10418695d27eSJie Liu 	if (page->index < end_index)
104209cbfeafSKirill A. Shutemov 		end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
10438695d27eSJie Liu 	else {
10448695d27eSJie Liu 		/*
10458695d27eSJie Liu 		 * Check whether the page to write out is beyond or straddles
10468695d27eSJie Liu 		 * i_size or not.
10478695d27eSJie Liu 		 * -------------------------------------------------------
10488695d27eSJie Liu 		 * |		file mapping		        | <EOF>  |
10498695d27eSJie Liu 		 * -------------------------------------------------------
10508695d27eSJie Liu 		 * | Page ... | Page N-2 | Page N-1 |  Page N   | Beyond |
10518695d27eSJie Liu 		 * ^--------------------------------^-----------|---------
10528695d27eSJie Liu 		 * |				    |      Straddles     |
10538695d27eSJie Liu 		 * ---------------------------------^-----------|--------|
10548695d27eSJie Liu 		 */
105509cbfeafSKirill A. Shutemov 		unsigned offset_into_page = offset & (PAGE_SIZE - 1);
10566b7a03f0SChristoph Hellwig 
10576b7a03f0SChristoph Hellwig 		/*
1058ff9a28f6SJan Kara 		 * Skip the page if it is fully outside i_size, e.g. due to a
1059ff9a28f6SJan Kara 		 * truncate operation that is in progress. We must redirty the
1060ff9a28f6SJan Kara 		 * page so that reclaim stops reclaiming it. Otherwise
1061ff9a28f6SJan Kara 		 * xfs_vm_releasepage() is called on it and gets confused.
10628695d27eSJie Liu 		 *
10638695d27eSJie Liu 		 * Note that the end_index is unsigned long, it would overflow
10648695d27eSJie Liu 		 * if the given offset is greater than 16TB on 32-bit system
10658695d27eSJie Liu 		 * and if we do check the page is fully outside i_size or not
10668695d27eSJie Liu 		 * via "if (page->index >= end_index + 1)" as "end_index + 1"
10678695d27eSJie Liu 		 * will be evaluated to 0.  Hence this page will be redirtied
10688695d27eSJie Liu 		 * and be written out repeatedly which would result in an
10698695d27eSJie Liu 		 * infinite loop, the user program that perform this operation
10708695d27eSJie Liu 		 * will hang.  Instead, we can verify this situation by checking
10718695d27eSJie Liu 		 * if the page to write is totally beyond the i_size or if it's
10728695d27eSJie Liu 		 * offset is just equal to the EOF.
10736b7a03f0SChristoph Hellwig 		 */
10748695d27eSJie Liu 		if (page->index > end_index ||
10758695d27eSJie Liu 		    (page->index == end_index && offset_into_page == 0))
1076ff9a28f6SJan Kara 			goto redirty;
10776b7a03f0SChristoph Hellwig 
10786b7a03f0SChristoph Hellwig 		/*
10796b7a03f0SChristoph Hellwig 		 * The page straddles i_size.  It must be zeroed out on each
10806b7a03f0SChristoph Hellwig 		 * and every writepage invocation because it may be mmapped.
10816b7a03f0SChristoph Hellwig 		 * "A file is mapped in multiples of the page size.  For a file
10826b7a03f0SChristoph Hellwig 		 * that is not a multiple of the page size, the remaining
10836b7a03f0SChristoph Hellwig 		 * memory is zeroed when mapped, and writes to that region are
10846b7a03f0SChristoph Hellwig 		 * not written out to the file."
10856b7a03f0SChristoph Hellwig 		 */
108609cbfeafSKirill A. Shutemov 		zero_user_segment(page, offset_into_page, PAGE_SIZE);
10878695d27eSJie Liu 
10888695d27eSJie Liu 		/* Adjust the end_offset to the end of file */
10898695d27eSJie Liu 		end_offset = offset;
1090c59d87c4SChristoph Hellwig 	}
1091c59d87c4SChristoph Hellwig 
1092e10de372SDave Chinner 	return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
1093c59d87c4SChristoph Hellwig 
1094c59d87c4SChristoph Hellwig redirty:
1095c59d87c4SChristoph Hellwig 	redirty_page_for_writepage(wbc, page);
1096c59d87c4SChristoph Hellwig 	unlock_page(page);
1097c59d87c4SChristoph Hellwig 	return 0;
1098c59d87c4SChristoph Hellwig }
1099c59d87c4SChristoph Hellwig 
1100c59d87c4SChristoph Hellwig STATIC int
1101fbcc0256SDave Chinner xfs_vm_writepage(
1102fbcc0256SDave Chinner 	struct page		*page,
1103fbcc0256SDave Chinner 	struct writeback_control *wbc)
1104fbcc0256SDave Chinner {
1105fbcc0256SDave Chinner 	struct xfs_writepage_ctx wpc = {
1106fbcc0256SDave Chinner 		.io_type = XFS_IO_INVALID,
1107fbcc0256SDave Chinner 	};
1108fbcc0256SDave Chinner 	int			ret;
1109fbcc0256SDave Chinner 
1110fbcc0256SDave Chinner 	ret = xfs_do_writepage(page, wbc, &wpc);
1111e10de372SDave Chinner 	if (wpc.ioend)
1112e10de372SDave Chinner 		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
1113e10de372SDave Chinner 	return ret;
1114fbcc0256SDave Chinner }
1115fbcc0256SDave Chinner 
1116fbcc0256SDave Chinner STATIC int
1117c59d87c4SChristoph Hellwig xfs_vm_writepages(
1118c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
1119c59d87c4SChristoph Hellwig 	struct writeback_control *wbc)
1120c59d87c4SChristoph Hellwig {
1121fbcc0256SDave Chinner 	struct xfs_writepage_ctx wpc = {
1122fbcc0256SDave Chinner 		.io_type = XFS_IO_INVALID,
1123fbcc0256SDave Chinner 	};
1124fbcc0256SDave Chinner 	int			ret;
1125fbcc0256SDave Chinner 
1126c59d87c4SChristoph Hellwig 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
11277f6d5b52SRoss Zwisler 	if (dax_mapping(mapping))
11287f6d5b52SRoss Zwisler 		return dax_writeback_mapping_range(mapping,
11297f6d5b52SRoss Zwisler 				xfs_find_bdev_for_inode(mapping->host), wbc);
11307f6d5b52SRoss Zwisler 
1131fbcc0256SDave Chinner 	ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
1132e10de372SDave Chinner 	if (wpc.ioend)
1133e10de372SDave Chinner 		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
1134e10de372SDave Chinner 	return ret;
1135c59d87c4SChristoph Hellwig }
1136c59d87c4SChristoph Hellwig 
1137c59d87c4SChristoph Hellwig /*
1138c59d87c4SChristoph Hellwig  * Called to move a page into cleanable state - and from there
1139c59d87c4SChristoph Hellwig  * to be released. The page should already be clean. We always
1140c59d87c4SChristoph Hellwig  * have buffer heads in this call.
1141c59d87c4SChristoph Hellwig  *
1142c59d87c4SChristoph Hellwig  * Returns 1 if the page is ok to release, 0 otherwise.
1143c59d87c4SChristoph Hellwig  */
1144c59d87c4SChristoph Hellwig STATIC int
1145c59d87c4SChristoph Hellwig xfs_vm_releasepage(
1146c59d87c4SChristoph Hellwig 	struct page		*page,
1147c59d87c4SChristoph Hellwig 	gfp_t			gfp_mask)
1148c59d87c4SChristoph Hellwig {
1149c59d87c4SChristoph Hellwig 	int			delalloc, unwritten;
1150c59d87c4SChristoph Hellwig 
115134097dfeSLukas Czerner 	trace_xfs_releasepage(page->mapping->host, page, 0, 0);
1152c59d87c4SChristoph Hellwig 
115399579cceSBrian Foster 	/*
115499579cceSBrian Foster 	 * mm accommodates an old ext3 case where clean pages might not have had
115599579cceSBrian Foster 	 * the dirty bit cleared. Thus, it can send actual dirty pages to
115699579cceSBrian Foster 	 * ->releasepage() via shrink_active_list(). Conversely,
115799579cceSBrian Foster 	 * block_invalidatepage() can send pages that are still marked dirty
115899579cceSBrian Foster 	 * but otherwise have invalidated buffers.
115999579cceSBrian Foster 	 *
116099579cceSBrian Foster 	 * We've historically freed buffers on the latter. Instead, quietly
116199579cceSBrian Foster 	 * filter out all dirty pages to avoid spurious buffer state warnings.
116299579cceSBrian Foster 	 * This can likely be removed once shrink_active_list() is fixed.
116399579cceSBrian Foster 	 */
116499579cceSBrian Foster 	if (PageDirty(page))
116599579cceSBrian Foster 		return 0;
116699579cceSBrian Foster 
1167c59d87c4SChristoph Hellwig 	xfs_count_page_state(page, &delalloc, &unwritten);
1168c59d87c4SChristoph Hellwig 
1169448011e2SChristoph Hellwig 	if (WARN_ON_ONCE(delalloc))
1170c59d87c4SChristoph Hellwig 		return 0;
1171448011e2SChristoph Hellwig 	if (WARN_ON_ONCE(unwritten))
1172c59d87c4SChristoph Hellwig 		return 0;
1173c59d87c4SChristoph Hellwig 
1174c59d87c4SChristoph Hellwig 	return try_to_free_buffers(page);
1175c59d87c4SChristoph Hellwig }
1176c59d87c4SChristoph Hellwig 
1177a719370bSDave Chinner /*
1178273dda76SChristoph Hellwig  * When we map a DIO buffer, we may need to pass flags to
1179273dda76SChristoph Hellwig  * xfs_end_io_direct_write to tell it what kind of write IO we are doing.
11803e12dbbdSDave Chinner  *
11813e12dbbdSDave Chinner  * Note that for DIO, an IO to the highest supported file block offset (i.e.
11823e12dbbdSDave Chinner  * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
11833e12dbbdSDave Chinner  * bit variable. Hence if we see this overflow, we have to assume that the IO is
11843e12dbbdSDave Chinner  * extending the file size. We won't know for sure until IO completion is run
11853e12dbbdSDave Chinner  * and the actual max write offset is communicated to the IO completion
11863e12dbbdSDave Chinner  * routine.
1187a719370bSDave Chinner  */
1188a719370bSDave Chinner static void
1189a719370bSDave Chinner xfs_map_direct(
1190a719370bSDave Chinner 	struct inode		*inode,
1191a719370bSDave Chinner 	struct buffer_head	*bh_result,
1192a719370bSDave Chinner 	struct xfs_bmbt_irec	*imap,
1193273dda76SChristoph Hellwig 	xfs_off_t		offset)
1194a719370bSDave Chinner {
1195273dda76SChristoph Hellwig 	uintptr_t		*flags = (uintptr_t *)&bh_result->b_private;
1196d5cc2e3fSDave Chinner 	xfs_off_t		size = bh_result->b_size;
1197d5cc2e3fSDave Chinner 
1198273dda76SChristoph Hellwig 	trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
1199273dda76SChristoph Hellwig 		ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap);
1200d5cc2e3fSDave Chinner 
1201273dda76SChristoph Hellwig 	if (ISUNWRITTEN(imap)) {
1202273dda76SChristoph Hellwig 		*flags |= XFS_DIO_FLAG_UNWRITTEN;
1203a06c277aSDave Chinner 		set_buffer_defer_completion(bh_result);
1204273dda76SChristoph Hellwig 	} else if (offset + size > i_size_read(inode) || offset + size < 0) {
1205273dda76SChristoph Hellwig 		*flags |= XFS_DIO_FLAG_APPEND;
1206273dda76SChristoph Hellwig 		set_buffer_defer_completion(bh_result);
1207a719370bSDave Chinner 	}
1208a719370bSDave Chinner }
1209a719370bSDave Chinner 
12101fdca9c2SDave Chinner /*
12111fdca9c2SDave Chinner  * If this is O_DIRECT or the mpage code calling tell them how large the mapping
12121fdca9c2SDave Chinner  * is, so that we can avoid repeated get_blocks calls.
12131fdca9c2SDave Chinner  *
12141fdca9c2SDave Chinner  * If the mapping spans EOF, then we have to break the mapping up as the mapping
12151fdca9c2SDave Chinner  * for blocks beyond EOF must be marked new so that sub block regions can be
12161fdca9c2SDave Chinner  * correctly zeroed. We can't do this for mappings within EOF unless the mapping
12171fdca9c2SDave Chinner  * was just allocated or is unwritten, otherwise the callers would overwrite
12181fdca9c2SDave Chinner  * existing data with zeros. Hence we have to split the mapping into a range up
12191fdca9c2SDave Chinner  * to and including EOF, and a second mapping for beyond EOF.
12201fdca9c2SDave Chinner  */
12211fdca9c2SDave Chinner static void
12221fdca9c2SDave Chinner xfs_map_trim_size(
12231fdca9c2SDave Chinner 	struct inode		*inode,
12241fdca9c2SDave Chinner 	sector_t		iblock,
12251fdca9c2SDave Chinner 	struct buffer_head	*bh_result,
12261fdca9c2SDave Chinner 	struct xfs_bmbt_irec	*imap,
12271fdca9c2SDave Chinner 	xfs_off_t		offset,
12281fdca9c2SDave Chinner 	ssize_t			size)
12291fdca9c2SDave Chinner {
12301fdca9c2SDave Chinner 	xfs_off_t		mapping_size;
12311fdca9c2SDave Chinner 
12321fdca9c2SDave Chinner 	mapping_size = imap->br_startoff + imap->br_blockcount - iblock;
12331fdca9c2SDave Chinner 	mapping_size <<= inode->i_blkbits;
12341fdca9c2SDave Chinner 
12351fdca9c2SDave Chinner 	ASSERT(mapping_size > 0);
12361fdca9c2SDave Chinner 	if (mapping_size > size)
12371fdca9c2SDave Chinner 		mapping_size = size;
12381fdca9c2SDave Chinner 	if (offset < i_size_read(inode) &&
12391fdca9c2SDave Chinner 	    offset + mapping_size >= i_size_read(inode)) {
12401fdca9c2SDave Chinner 		/* limit mapping to block that spans EOF */
12411fdca9c2SDave Chinner 		mapping_size = roundup_64(i_size_read(inode) - offset,
12421fdca9c2SDave Chinner 					  1 << inode->i_blkbits);
12431fdca9c2SDave Chinner 	}
12441fdca9c2SDave Chinner 	if (mapping_size > LONG_MAX)
12451fdca9c2SDave Chinner 		mapping_size = LONG_MAX;
12461fdca9c2SDave Chinner 
12471fdca9c2SDave Chinner 	bh_result->b_size = mapping_size;
12481fdca9c2SDave Chinner }
12491fdca9c2SDave Chinner 
1250c59d87c4SChristoph Hellwig STATIC int
1251c59d87c4SChristoph Hellwig __xfs_get_blocks(
1252c59d87c4SChristoph Hellwig 	struct inode		*inode,
1253c59d87c4SChristoph Hellwig 	sector_t		iblock,
1254c59d87c4SChristoph Hellwig 	struct buffer_head	*bh_result,
1255c59d87c4SChristoph Hellwig 	int			create,
12563e12dbbdSDave Chinner 	bool			direct,
12573e12dbbdSDave Chinner 	bool			dax_fault)
1258c59d87c4SChristoph Hellwig {
1259c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
1260c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
1261c59d87c4SChristoph Hellwig 	xfs_fileoff_t		offset_fsb, end_fsb;
1262c59d87c4SChristoph Hellwig 	int			error = 0;
1263c59d87c4SChristoph Hellwig 	int			lockmode = 0;
1264c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	imap;
1265c59d87c4SChristoph Hellwig 	int			nimaps = 1;
1266c59d87c4SChristoph Hellwig 	xfs_off_t		offset;
1267c59d87c4SChristoph Hellwig 	ssize_t			size;
1268c59d87c4SChristoph Hellwig 	int			new = 0;
1269c59d87c4SChristoph Hellwig 
12706e8a27a8SChristoph Hellwig 	BUG_ON(create && !direct);
12716e8a27a8SChristoph Hellwig 
1272c59d87c4SChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(mp))
1273b474c7aeSEric Sandeen 		return -EIO;
1274c59d87c4SChristoph Hellwig 
1275c59d87c4SChristoph Hellwig 	offset = (xfs_off_t)iblock << inode->i_blkbits;
1276c59d87c4SChristoph Hellwig 	ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
1277c59d87c4SChristoph Hellwig 	size = bh_result->b_size;
1278c59d87c4SChristoph Hellwig 
12796e8a27a8SChristoph Hellwig 	if (!create && offset >= i_size_read(inode))
1280c59d87c4SChristoph Hellwig 		return 0;
1281c59d87c4SChristoph Hellwig 
1282507630b2SDave Chinner 	/*
1283507630b2SDave Chinner 	 * Direct I/O is usually done on preallocated files, so try getting
12846e8a27a8SChristoph Hellwig 	 * a block mapping without an exclusive lock first.
1285507630b2SDave Chinner 	 */
1286309ecac8SChristoph Hellwig 	lockmode = xfs_ilock_data_map_shared(ip);
1287c59d87c4SChristoph Hellwig 
1288d2c28191SDave Chinner 	ASSERT(offset <= mp->m_super->s_maxbytes);
1289d2c28191SDave Chinner 	if (offset + size > mp->m_super->s_maxbytes)
1290d2c28191SDave Chinner 		size = mp->m_super->s_maxbytes - offset;
1291c59d87c4SChristoph Hellwig 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
1292c59d87c4SChristoph Hellwig 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
1293c59d87c4SChristoph Hellwig 
12945c8ed202SDave Chinner 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
12955c8ed202SDave Chinner 				&imap, &nimaps, XFS_BMAPI_ENTIRE);
1296c59d87c4SChristoph Hellwig 	if (error)
1297c59d87c4SChristoph Hellwig 		goto out_unlock;
1298c59d87c4SChristoph Hellwig 
12991ca19157SDave Chinner 	/* for DAX, we convert unwritten extents directly */
1300c59d87c4SChristoph Hellwig 	if (create &&
1301c59d87c4SChristoph Hellwig 	    (!nimaps ||
1302c59d87c4SChristoph Hellwig 	     (imap.br_startblock == HOLESTARTBLOCK ||
13031ca19157SDave Chinner 	      imap.br_startblock == DELAYSTARTBLOCK) ||
13041ca19157SDave Chinner 	     (IS_DAX(inode) && ISUNWRITTEN(&imap)))) {
1305507630b2SDave Chinner 		/*
1306009c6e87SBrian Foster 		 * xfs_iomap_write_direct() expects the shared lock. It
1307009c6e87SBrian Foster 		 * is unlocked on return.
1308507630b2SDave Chinner 		 */
1309009c6e87SBrian Foster 		if (lockmode == XFS_ILOCK_EXCL)
1310009c6e87SBrian Foster 			xfs_ilock_demote(ip, lockmode);
1311009c6e87SBrian Foster 
1312c59d87c4SChristoph Hellwig 		error = xfs_iomap_write_direct(ip, offset, size,
1313c59d87c4SChristoph Hellwig 					       &imap, nimaps);
1314507630b2SDave Chinner 		if (error)
13152451337dSDave Chinner 			return error;
1316d3bc815aSDave Chinner 		new = 1;
13176b698edeSDave Chinner 
1318d5cc2e3fSDave Chinner 		trace_xfs_get_blocks_alloc(ip, offset, size,
1319d5cc2e3fSDave Chinner 				ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
1320d5cc2e3fSDave Chinner 						   : XFS_IO_DELALLOC, &imap);
1321c59d87c4SChristoph Hellwig 	} else if (nimaps) {
1322d5cc2e3fSDave Chinner 		trace_xfs_get_blocks_found(ip, offset, size,
1323d5cc2e3fSDave Chinner 				ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
1324d5cc2e3fSDave Chinner 						   : XFS_IO_OVERWRITE, &imap);
1325507630b2SDave Chinner 		xfs_iunlock(ip, lockmode);
1326c59d87c4SChristoph Hellwig 	} else {
1327c59d87c4SChristoph Hellwig 		trace_xfs_get_blocks_notfound(ip, offset, size);
1328c59d87c4SChristoph Hellwig 		goto out_unlock;
1329c59d87c4SChristoph Hellwig 	}
1330c59d87c4SChristoph Hellwig 
13311ca19157SDave Chinner 	if (IS_DAX(inode) && create) {
13321ca19157SDave Chinner 		ASSERT(!ISUNWRITTEN(&imap));
13331ca19157SDave Chinner 		/* zeroing is not needed at a higher layer */
13341ca19157SDave Chinner 		new = 0;
13351ca19157SDave Chinner 	}
13361ca19157SDave Chinner 
13371fdca9c2SDave Chinner 	/* trim mapping down to size requested */
13386e8a27a8SChristoph Hellwig 	xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
13391fdca9c2SDave Chinner 
1340c59d87c4SChristoph Hellwig 	/*
1341a719370bSDave Chinner 	 * For unwritten extents do not report a disk address in the buffered
1342a719370bSDave Chinner 	 * read case (treat as if we're reading into a hole).
1343c59d87c4SChristoph Hellwig 	 */
1344a719370bSDave Chinner 	if (imap.br_startblock != HOLESTARTBLOCK &&
1345a719370bSDave Chinner 	    imap.br_startblock != DELAYSTARTBLOCK &&
1346a719370bSDave Chinner 	    (create || !ISUNWRITTEN(&imap))) {
1347c59d87c4SChristoph Hellwig 		xfs_map_buffer(inode, bh_result, &imap, offset);
1348a719370bSDave Chinner 		if (ISUNWRITTEN(&imap))
1349c59d87c4SChristoph Hellwig 			set_buffer_unwritten(bh_result);
1350a719370bSDave Chinner 		/* direct IO needs special help */
13516e8a27a8SChristoph Hellwig 		if (create) {
1352273dda76SChristoph Hellwig 			if (dax_fault)
1353273dda76SChristoph Hellwig 				ASSERT(!ISUNWRITTEN(&imap));
1354273dda76SChristoph Hellwig 			else
1355273dda76SChristoph Hellwig 				xfs_map_direct(inode, bh_result, &imap, offset);
1356273dda76SChristoph Hellwig 		}
1357c59d87c4SChristoph Hellwig 	}
1358c59d87c4SChristoph Hellwig 
1359c59d87c4SChristoph Hellwig 	/*
1360c59d87c4SChristoph Hellwig 	 * If this is a realtime file, data may be on a different device.
1361c59d87c4SChristoph Hellwig 	 * to that pointed to from the buffer_head b_bdev currently.
1362c59d87c4SChristoph Hellwig 	 */
1363c59d87c4SChristoph Hellwig 	bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
1364c59d87c4SChristoph Hellwig 
1365c59d87c4SChristoph Hellwig 	/*
1366c59d87c4SChristoph Hellwig 	 * If we previously allocated a block out beyond eof and we are now
1367c59d87c4SChristoph Hellwig 	 * coming back to use it then we will need to flag it as new even if it
1368c59d87c4SChristoph Hellwig 	 * has a disk address.
1369c59d87c4SChristoph Hellwig 	 *
1370c59d87c4SChristoph Hellwig 	 * With sub-block writes into unwritten extents we also need to mark
1371c59d87c4SChristoph Hellwig 	 * the buffer as new so that the unwritten parts of the buffer gets
1372c59d87c4SChristoph Hellwig 	 * correctly zeroed.
1373c59d87c4SChristoph Hellwig 	 */
1374c59d87c4SChristoph Hellwig 	if (create &&
1375c59d87c4SChristoph Hellwig 	    ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
1376c59d87c4SChristoph Hellwig 	     (offset >= i_size_read(inode)) ||
1377c59d87c4SChristoph Hellwig 	     (new || ISUNWRITTEN(&imap))))
1378c59d87c4SChristoph Hellwig 		set_buffer_new(bh_result);
1379c59d87c4SChristoph Hellwig 
13806e8a27a8SChristoph Hellwig 	BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK);
1381c59d87c4SChristoph Hellwig 
1382c59d87c4SChristoph Hellwig 	return 0;
1383c59d87c4SChristoph Hellwig 
1384c59d87c4SChristoph Hellwig out_unlock:
1385c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, lockmode);
13862451337dSDave Chinner 	return error;
1387c59d87c4SChristoph Hellwig }
1388c59d87c4SChristoph Hellwig 
1389c59d87c4SChristoph Hellwig int
1390c59d87c4SChristoph Hellwig xfs_get_blocks(
1391c59d87c4SChristoph Hellwig 	struct inode		*inode,
1392c59d87c4SChristoph Hellwig 	sector_t		iblock,
1393c59d87c4SChristoph Hellwig 	struct buffer_head	*bh_result,
1394c59d87c4SChristoph Hellwig 	int			create)
1395c59d87c4SChristoph Hellwig {
13963e12dbbdSDave Chinner 	return __xfs_get_blocks(inode, iblock, bh_result, create, false, false);
1397c59d87c4SChristoph Hellwig }
1398c59d87c4SChristoph Hellwig 
13996b698edeSDave Chinner int
1400c59d87c4SChristoph Hellwig xfs_get_blocks_direct(
1401c59d87c4SChristoph Hellwig 	struct inode		*inode,
1402c59d87c4SChristoph Hellwig 	sector_t		iblock,
1403c59d87c4SChristoph Hellwig 	struct buffer_head	*bh_result,
1404c59d87c4SChristoph Hellwig 	int			create)
1405c59d87c4SChristoph Hellwig {
14063e12dbbdSDave Chinner 	return __xfs_get_blocks(inode, iblock, bh_result, create, true, false);
14073e12dbbdSDave Chinner }
14083e12dbbdSDave Chinner 
14093e12dbbdSDave Chinner int
14103e12dbbdSDave Chinner xfs_get_blocks_dax_fault(
14113e12dbbdSDave Chinner 	struct inode		*inode,
14123e12dbbdSDave Chinner 	sector_t		iblock,
14133e12dbbdSDave Chinner 	struct buffer_head	*bh_result,
14143e12dbbdSDave Chinner 	int			create)
14153e12dbbdSDave Chinner {
14163e12dbbdSDave Chinner 	return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
1417c59d87c4SChristoph Hellwig }
1418c59d87c4SChristoph Hellwig 
1419273dda76SChristoph Hellwig /*
1420273dda76SChristoph Hellwig  * Complete a direct I/O write request.
1421273dda76SChristoph Hellwig  *
1422273dda76SChristoph Hellwig  * xfs_map_direct passes us some flags in the private data to tell us what to
1423273dda76SChristoph Hellwig  * do.  If no flags are set, then the write IO is an overwrite wholly within
1424273dda76SChristoph Hellwig  * the existing allocated file size and so there is nothing for us to do.
1425273dda76SChristoph Hellwig  *
1426273dda76SChristoph Hellwig  * Note that in this case the completion can be called in interrupt context,
1427273dda76SChristoph Hellwig  * whereas if we have flags set we will always be called in task context
1428273dda76SChristoph Hellwig  * (i.e. from a workqueue).
1429273dda76SChristoph Hellwig  */
1430fa8d972dSChristoph Hellwig int
1431273dda76SChristoph Hellwig xfs_end_io_direct_write(
1432273dda76SChristoph Hellwig 	struct kiocb		*iocb,
1433c59d87c4SChristoph Hellwig 	loff_t			offset,
1434273dda76SChristoph Hellwig 	ssize_t			size,
1435273dda76SChristoph Hellwig 	void			*private)
1436c59d87c4SChristoph Hellwig {
1437273dda76SChristoph Hellwig 	struct inode		*inode = file_inode(iocb->ki_filp);
1438273dda76SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
1439273dda76SChristoph Hellwig 	uintptr_t		flags = (uintptr_t)private;
1440273dda76SChristoph Hellwig 	int			error = 0;
14412ba66237SChristoph Hellwig 
1442273dda76SChristoph Hellwig 	trace_xfs_end_io_direct_write(ip, offset, size);
1443273dda76SChristoph Hellwig 
1444e372843aSChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
1445273dda76SChristoph Hellwig 		return -EIO;
1446273dda76SChristoph Hellwig 
1447273dda76SChristoph Hellwig 	if (size <= 0)
1448273dda76SChristoph Hellwig 		return size;
1449c59d87c4SChristoph Hellwig 
1450c59d87c4SChristoph Hellwig 	/*
1451273dda76SChristoph Hellwig 	 * The flags tell us whether we are doing unwritten extent conversions
14526dfa1b67SDave Chinner 	 * or an append transaction that updates the on-disk file size. These
14536dfa1b67SDave Chinner 	 * cases are the only cases where we should *potentially* be needing
1454a06c277aSDave Chinner 	 * to update the VFS inode size.
1455273dda76SChristoph Hellwig 	 */
1456273dda76SChristoph Hellwig 	if (flags == 0) {
1457273dda76SChristoph Hellwig 		ASSERT(offset + size <= i_size_read(inode));
1458273dda76SChristoph Hellwig 		return 0;
1459273dda76SChristoph Hellwig 	}
1460273dda76SChristoph Hellwig 
1461273dda76SChristoph Hellwig 	/*
14626dfa1b67SDave Chinner 	 * We need to update the in-core inode size here so that we don't end up
1463a06c277aSDave Chinner 	 * with the on-disk inode size being outside the in-core inode size. We
1464a06c277aSDave Chinner 	 * have no other method of updating EOF for AIO, so always do it here
1465a06c277aSDave Chinner 	 * if necessary.
1466b9d59846SDave Chinner 	 *
1467b9d59846SDave Chinner 	 * We need to lock the test/set EOF update as we can be racing with
1468b9d59846SDave Chinner 	 * other IO completions here to update the EOF. Failing to serialise
1469b9d59846SDave Chinner 	 * here can result in EOF moving backwards and Bad Things Happen when
1470b9d59846SDave Chinner 	 * that occurs.
14712813d682SChristoph Hellwig 	 */
1472273dda76SChristoph Hellwig 	spin_lock(&ip->i_flags_lock);
14732ba66237SChristoph Hellwig 	if (offset + size > i_size_read(inode))
14742ba66237SChristoph Hellwig 		i_size_write(inode, offset + size);
1475273dda76SChristoph Hellwig 	spin_unlock(&ip->i_flags_lock);
14762813d682SChristoph Hellwig 
1477273dda76SChristoph Hellwig 	if (flags & XFS_DIO_FLAG_UNWRITTEN) {
1478273dda76SChristoph Hellwig 		trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
1479c59d87c4SChristoph Hellwig 
1480273dda76SChristoph Hellwig 		error = xfs_iomap_write_unwritten(ip, offset, size);
1481273dda76SChristoph Hellwig 	} else if (flags & XFS_DIO_FLAG_APPEND) {
1482273dda76SChristoph Hellwig 		trace_xfs_end_io_direct_write_append(ip, offset, size);
1483273dda76SChristoph Hellwig 
1484e372843aSChristoph Hellwig 		error = xfs_setfilesize(ip, offset, size);
14852ba66237SChristoph Hellwig 	}
1486c59d87c4SChristoph Hellwig 
1487273dda76SChristoph Hellwig 	return error;
14886e1ba0bcSDave Chinner }
14896e1ba0bcSDave Chinner 
1490c59d87c4SChristoph Hellwig STATIC ssize_t
1491c59d87c4SChristoph Hellwig xfs_vm_direct_IO(
1492c59d87c4SChristoph Hellwig 	struct kiocb		*iocb,
1493c8b8e32dSChristoph Hellwig 	struct iov_iter		*iter)
1494c59d87c4SChristoph Hellwig {
1495c59d87c4SChristoph Hellwig 	/*
1496fa8d972dSChristoph Hellwig 	 * We just need the method present so that open/fcntl allow direct I/O.
1497c59d87c4SChristoph Hellwig 	 */
1498fa8d972dSChristoph Hellwig 	return -EINVAL;
1499c59d87c4SChristoph Hellwig }
1500c59d87c4SChristoph Hellwig 
1501c59d87c4SChristoph Hellwig STATIC sector_t
1502c59d87c4SChristoph Hellwig xfs_vm_bmap(
1503c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
1504c59d87c4SChristoph Hellwig 	sector_t		block)
1505c59d87c4SChristoph Hellwig {
1506c59d87c4SChristoph Hellwig 	struct inode		*inode = (struct inode *)mapping->host;
1507c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
1508c59d87c4SChristoph Hellwig 
1509c59d87c4SChristoph Hellwig 	trace_xfs_vm_bmap(XFS_I(inode));
1510c59d87c4SChristoph Hellwig 	xfs_ilock(ip, XFS_IOLOCK_SHARED);
15114bc1ea6bSDave Chinner 	filemap_write_and_wait(mapping);
1512c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, XFS_IOLOCK_SHARED);
1513c59d87c4SChristoph Hellwig 	return generic_block_bmap(mapping, block, xfs_get_blocks);
1514c59d87c4SChristoph Hellwig }
1515c59d87c4SChristoph Hellwig 
1516c59d87c4SChristoph Hellwig STATIC int
1517c59d87c4SChristoph Hellwig xfs_vm_readpage(
1518c59d87c4SChristoph Hellwig 	struct file		*unused,
1519c59d87c4SChristoph Hellwig 	struct page		*page)
1520c59d87c4SChristoph Hellwig {
1521121e213eSDave Chinner 	trace_xfs_vm_readpage(page->mapping->host, 1);
1522c59d87c4SChristoph Hellwig 	return mpage_readpage(page, xfs_get_blocks);
1523c59d87c4SChristoph Hellwig }
1524c59d87c4SChristoph Hellwig 
1525c59d87c4SChristoph Hellwig STATIC int
1526c59d87c4SChristoph Hellwig xfs_vm_readpages(
1527c59d87c4SChristoph Hellwig 	struct file		*unused,
1528c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
1529c59d87c4SChristoph Hellwig 	struct list_head	*pages,
1530c59d87c4SChristoph Hellwig 	unsigned		nr_pages)
1531c59d87c4SChristoph Hellwig {
1532121e213eSDave Chinner 	trace_xfs_vm_readpages(mapping->host, nr_pages);
1533c59d87c4SChristoph Hellwig 	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1534c59d87c4SChristoph Hellwig }
1535c59d87c4SChristoph Hellwig 
153622e757a4SDave Chinner /*
153722e757a4SDave Chinner  * This is basically a copy of __set_page_dirty_buffers() with one
153822e757a4SDave Chinner  * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
153922e757a4SDave Chinner  * dirty, we'll never be able to clean them because we don't write buffers
154022e757a4SDave Chinner  * beyond EOF, and that means we can't invalidate pages that span EOF
154122e757a4SDave Chinner  * that have been marked dirty. Further, the dirty state can leak into
154222e757a4SDave Chinner  * the file interior if the file is extended, resulting in all sorts of
154322e757a4SDave Chinner  * bad things happening as the state does not match the underlying data.
154422e757a4SDave Chinner  *
154522e757a4SDave Chinner  * XXX: this really indicates that bufferheads in XFS need to die. Warts like
154622e757a4SDave Chinner  * this only exist because of bufferheads and how the generic code manages them.
154722e757a4SDave Chinner  */
154822e757a4SDave Chinner STATIC int
154922e757a4SDave Chinner xfs_vm_set_page_dirty(
155022e757a4SDave Chinner 	struct page		*page)
155122e757a4SDave Chinner {
155222e757a4SDave Chinner 	struct address_space	*mapping = page->mapping;
155322e757a4SDave Chinner 	struct inode		*inode = mapping->host;
155422e757a4SDave Chinner 	loff_t			end_offset;
155522e757a4SDave Chinner 	loff_t			offset;
155622e757a4SDave Chinner 	int			newly_dirty;
155722e757a4SDave Chinner 
155822e757a4SDave Chinner 	if (unlikely(!mapping))
155922e757a4SDave Chinner 		return !TestSetPageDirty(page);
156022e757a4SDave Chinner 
156122e757a4SDave Chinner 	end_offset = i_size_read(inode);
156222e757a4SDave Chinner 	offset = page_offset(page);
156322e757a4SDave Chinner 
156422e757a4SDave Chinner 	spin_lock(&mapping->private_lock);
156522e757a4SDave Chinner 	if (page_has_buffers(page)) {
156622e757a4SDave Chinner 		struct buffer_head *head = page_buffers(page);
156722e757a4SDave Chinner 		struct buffer_head *bh = head;
156822e757a4SDave Chinner 
156922e757a4SDave Chinner 		do {
157022e757a4SDave Chinner 			if (offset < end_offset)
157122e757a4SDave Chinner 				set_buffer_dirty(bh);
157222e757a4SDave Chinner 			bh = bh->b_this_page;
157322e757a4SDave Chinner 			offset += 1 << inode->i_blkbits;
157422e757a4SDave Chinner 		} while (bh != head);
157522e757a4SDave Chinner 	}
1576c4843a75SGreg Thelen 	/*
157781f8c3a4SJohannes Weiner 	 * Lock out page->mem_cgroup migration to keep PageDirty
157881f8c3a4SJohannes Weiner 	 * synchronized with per-memcg dirty page counters.
1579c4843a75SGreg Thelen 	 */
158062cccb8cSJohannes Weiner 	lock_page_memcg(page);
158122e757a4SDave Chinner 	newly_dirty = !TestSetPageDirty(page);
158222e757a4SDave Chinner 	spin_unlock(&mapping->private_lock);
158322e757a4SDave Chinner 
158422e757a4SDave Chinner 	if (newly_dirty) {
158522e757a4SDave Chinner 		/* sigh - __set_page_dirty() is static, so copy it here, too */
158622e757a4SDave Chinner 		unsigned long flags;
158722e757a4SDave Chinner 
158822e757a4SDave Chinner 		spin_lock_irqsave(&mapping->tree_lock, flags);
158922e757a4SDave Chinner 		if (page->mapping) {	/* Race with truncate? */
159022e757a4SDave Chinner 			WARN_ON_ONCE(!PageUptodate(page));
159162cccb8cSJohannes Weiner 			account_page_dirtied(page, mapping);
159222e757a4SDave Chinner 			radix_tree_tag_set(&mapping->page_tree,
159322e757a4SDave Chinner 					page_index(page), PAGECACHE_TAG_DIRTY);
159422e757a4SDave Chinner 		}
159522e757a4SDave Chinner 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
159622e757a4SDave Chinner 	}
159762cccb8cSJohannes Weiner 	unlock_page_memcg(page);
1598c4843a75SGreg Thelen 	if (newly_dirty)
1599c4843a75SGreg Thelen 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
160022e757a4SDave Chinner 	return newly_dirty;
160122e757a4SDave Chinner }
160222e757a4SDave Chinner 
1603c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = {
1604c59d87c4SChristoph Hellwig 	.readpage		= xfs_vm_readpage,
1605c59d87c4SChristoph Hellwig 	.readpages		= xfs_vm_readpages,
1606c59d87c4SChristoph Hellwig 	.writepage		= xfs_vm_writepage,
1607c59d87c4SChristoph Hellwig 	.writepages		= xfs_vm_writepages,
160822e757a4SDave Chinner 	.set_page_dirty		= xfs_vm_set_page_dirty,
1609c59d87c4SChristoph Hellwig 	.releasepage		= xfs_vm_releasepage,
1610c59d87c4SChristoph Hellwig 	.invalidatepage		= xfs_vm_invalidatepage,
1611c59d87c4SChristoph Hellwig 	.bmap			= xfs_vm_bmap,
1612c59d87c4SChristoph Hellwig 	.direct_IO		= xfs_vm_direct_IO,
1613c59d87c4SChristoph Hellwig 	.migratepage		= buffer_migrate_page,
1614c59d87c4SChristoph Hellwig 	.is_partially_uptodate  = block_is_partially_uptodate,
1615c59d87c4SChristoph Hellwig 	.error_remove_page	= generic_error_remove_page,
1616c59d87c4SChristoph Hellwig };
1617