xref: /linux/fs/xfs/xfs_aops.c (revision 787eb485509f9d58962bd8b4dbc6a5ac6e2034fe)
1c59d87c4SChristoph Hellwig /*
2c59d87c4SChristoph Hellwig  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3c59d87c4SChristoph Hellwig  * All Rights Reserved.
4c59d87c4SChristoph Hellwig  *
5c59d87c4SChristoph Hellwig  * This program is free software; you can redistribute it and/or
6c59d87c4SChristoph Hellwig  * modify it under the terms of the GNU General Public License as
7c59d87c4SChristoph Hellwig  * published by the Free Software Foundation.
8c59d87c4SChristoph Hellwig  *
9c59d87c4SChristoph Hellwig  * This program is distributed in the hope that it would be useful,
10c59d87c4SChristoph Hellwig  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11c59d87c4SChristoph Hellwig  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12c59d87c4SChristoph Hellwig  * GNU General Public License for more details.
13c59d87c4SChristoph Hellwig  *
14c59d87c4SChristoph Hellwig  * You should have received a copy of the GNU General Public License
15c59d87c4SChristoph Hellwig  * along with this program; if not, write the Free Software Foundation,
16c59d87c4SChristoph Hellwig  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17c59d87c4SChristoph Hellwig  */
18c59d87c4SChristoph Hellwig #include "xfs.h"
1970a9883cSDave Chinner #include "xfs_shared.h"
20239880efSDave Chinner #include "xfs_format.h"
21239880efSDave Chinner #include "xfs_log_format.h"
22239880efSDave Chinner #include "xfs_trans_resv.h"
23c59d87c4SChristoph Hellwig #include "xfs_mount.h"
24c59d87c4SChristoph Hellwig #include "xfs_inode.h"
25239880efSDave Chinner #include "xfs_trans.h"
26281627dfSChristoph Hellwig #include "xfs_inode_item.h"
27c59d87c4SChristoph Hellwig #include "xfs_alloc.h"
28c59d87c4SChristoph Hellwig #include "xfs_error.h"
29c59d87c4SChristoph Hellwig #include "xfs_iomap.h"
30c59d87c4SChristoph Hellwig #include "xfs_trace.h"
31c59d87c4SChristoph Hellwig #include "xfs_bmap.h"
3268988114SDave Chinner #include "xfs_bmap_util.h"
33a4fbe6abSDave Chinner #include "xfs_bmap_btree.h"
34ef473667SDarrick J. Wong #include "xfs_reflink.h"
35c59d87c4SChristoph Hellwig #include <linux/gfp.h>
36c59d87c4SChristoph Hellwig #include <linux/mpage.h>
37c59d87c4SChristoph Hellwig #include <linux/pagevec.h>
38c59d87c4SChristoph Hellwig #include <linux/writeback.h>
39c59d87c4SChristoph Hellwig 
40fbcc0256SDave Chinner /*
41fbcc0256SDave Chinner  * structure owned by writepages passed to individual writepage calls
42fbcc0256SDave Chinner  */
43fbcc0256SDave Chinner struct xfs_writepage_ctx {
44fbcc0256SDave Chinner 	struct xfs_bmbt_irec    imap;
45fbcc0256SDave Chinner 	bool			imap_valid;
46fbcc0256SDave Chinner 	unsigned int		io_type;
47fbcc0256SDave Chinner 	struct xfs_ioend	*ioend;
48fbcc0256SDave Chinner 	sector_t		last_block;
49fbcc0256SDave Chinner };
50fbcc0256SDave Chinner 
51c59d87c4SChristoph Hellwig void
52c59d87c4SChristoph Hellwig xfs_count_page_state(
53c59d87c4SChristoph Hellwig 	struct page		*page,
54c59d87c4SChristoph Hellwig 	int			*delalloc,
55c59d87c4SChristoph Hellwig 	int			*unwritten)
56c59d87c4SChristoph Hellwig {
57c59d87c4SChristoph Hellwig 	struct buffer_head	*bh, *head;
58c59d87c4SChristoph Hellwig 
59c59d87c4SChristoph Hellwig 	*delalloc = *unwritten = 0;
60c59d87c4SChristoph Hellwig 
61c59d87c4SChristoph Hellwig 	bh = head = page_buffers(page);
62c59d87c4SChristoph Hellwig 	do {
63c59d87c4SChristoph Hellwig 		if (buffer_unwritten(bh))
64c59d87c4SChristoph Hellwig 			(*unwritten) = 1;
65c59d87c4SChristoph Hellwig 		else if (buffer_delay(bh))
66c59d87c4SChristoph Hellwig 			(*delalloc) = 1;
67c59d87c4SChristoph Hellwig 	} while ((bh = bh->b_this_page) != head);
68c59d87c4SChristoph Hellwig }
69c59d87c4SChristoph Hellwig 
7020a90f58SRoss Zwisler struct block_device *
71c59d87c4SChristoph Hellwig xfs_find_bdev_for_inode(
72c59d87c4SChristoph Hellwig 	struct inode		*inode)
73c59d87c4SChristoph Hellwig {
74c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
75c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
76c59d87c4SChristoph Hellwig 
77c59d87c4SChristoph Hellwig 	if (XFS_IS_REALTIME_INODE(ip))
78c59d87c4SChristoph Hellwig 		return mp->m_rtdev_targp->bt_bdev;
79c59d87c4SChristoph Hellwig 	else
80c59d87c4SChristoph Hellwig 		return mp->m_ddev_targp->bt_bdev;
81c59d87c4SChristoph Hellwig }
82c59d87c4SChristoph Hellwig 
83c59d87c4SChristoph Hellwig /*
8437992c18SDave Chinner  * We're now finished for good with this page.  Update the page state via the
8537992c18SDave Chinner  * associated buffer_heads, paying attention to the start and end offsets that
8637992c18SDave Chinner  * we need to process on the page.
8728b783e4SDave Chinner  *
8828b783e4SDave Chinner  * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last
8928b783e4SDave Chinner  * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or
9028b783e4SDave Chinner  * the page at all, as we may be racing with memory reclaim and it can free both
9128b783e4SDave Chinner  * the bufferhead chain and the page as it will see the page as clean and
9228b783e4SDave Chinner  * unused.
9337992c18SDave Chinner  */
9437992c18SDave Chinner static void
9537992c18SDave Chinner xfs_finish_page_writeback(
9637992c18SDave Chinner 	struct inode		*inode,
9737992c18SDave Chinner 	struct bio_vec		*bvec,
9837992c18SDave Chinner 	int			error)
9937992c18SDave Chinner {
10037992c18SDave Chinner 	unsigned int		end = bvec->bv_offset + bvec->bv_len - 1;
10128b783e4SDave Chinner 	struct buffer_head	*head, *bh, *next;
10237992c18SDave Chinner 	unsigned int		off = 0;
10328b783e4SDave Chinner 	unsigned int		bsize;
10437992c18SDave Chinner 
10537992c18SDave Chinner 	ASSERT(bvec->bv_offset < PAGE_SIZE);
10693407472SFabian Frederick 	ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
10737992c18SDave Chinner 	ASSERT(end < PAGE_SIZE);
10893407472SFabian Frederick 	ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
10937992c18SDave Chinner 
11037992c18SDave Chinner 	bh = head = page_buffers(bvec->bv_page);
11137992c18SDave Chinner 
11228b783e4SDave Chinner 	bsize = bh->b_size;
11337992c18SDave Chinner 	do {
11428b783e4SDave Chinner 		next = bh->b_this_page;
11537992c18SDave Chinner 		if (off < bvec->bv_offset)
11637992c18SDave Chinner 			goto next_bh;
11737992c18SDave Chinner 		if (off > end)
11837992c18SDave Chinner 			break;
11937992c18SDave Chinner 		bh->b_end_io(bh, !error);
12037992c18SDave Chinner next_bh:
12128b783e4SDave Chinner 		off += bsize;
12228b783e4SDave Chinner 	} while ((bh = next) != head);
12337992c18SDave Chinner }
12437992c18SDave Chinner 
12537992c18SDave Chinner /*
12637992c18SDave Chinner  * We're now finished for good with this ioend structure.  Update the page
12737992c18SDave Chinner  * state, release holds on bios, and finally free up memory.  Do not use the
12837992c18SDave Chinner  * ioend after this.
129c59d87c4SChristoph Hellwig  */
130c59d87c4SChristoph Hellwig STATIC void
131c59d87c4SChristoph Hellwig xfs_destroy_ioend(
1320e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
1330e51a8e1SChristoph Hellwig 	int			error)
134c59d87c4SChristoph Hellwig {
13537992c18SDave Chinner 	struct inode		*inode = ioend->io_inode;
1360e51a8e1SChristoph Hellwig 	struct bio		*last = ioend->io_bio;
13737992c18SDave Chinner 	struct bio		*bio, *next;
138c59d87c4SChristoph Hellwig 
1390e51a8e1SChristoph Hellwig 	for (bio = &ioend->io_inline_bio; bio; bio = next) {
14037992c18SDave Chinner 		struct bio_vec	*bvec;
14137992c18SDave Chinner 		int		i;
14237992c18SDave Chinner 
1430e51a8e1SChristoph Hellwig 		/*
1440e51a8e1SChristoph Hellwig 		 * For the last bio, bi_private points to the ioend, so we
1450e51a8e1SChristoph Hellwig 		 * need to explicitly end the iteration here.
1460e51a8e1SChristoph Hellwig 		 */
1470e51a8e1SChristoph Hellwig 		if (bio == last)
1480e51a8e1SChristoph Hellwig 			next = NULL;
1490e51a8e1SChristoph Hellwig 		else
15037992c18SDave Chinner 			next = bio->bi_private;
15137992c18SDave Chinner 
15237992c18SDave Chinner 		/* walk each page on bio, ending page IO on them */
15337992c18SDave Chinner 		bio_for_each_segment_all(bvec, bio, i)
15437992c18SDave Chinner 			xfs_finish_page_writeback(inode, bvec, error);
15537992c18SDave Chinner 
15637992c18SDave Chinner 		bio_put(bio);
157c59d87c4SChristoph Hellwig 	}
158c59d87c4SChristoph Hellwig }
159c59d87c4SChristoph Hellwig 
160c59d87c4SChristoph Hellwig /*
161fc0063c4SChristoph Hellwig  * Fast and loose check if this write could update the on-disk inode size.
162fc0063c4SChristoph Hellwig  */
163fc0063c4SChristoph Hellwig static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
164fc0063c4SChristoph Hellwig {
165fc0063c4SChristoph Hellwig 	return ioend->io_offset + ioend->io_size >
166fc0063c4SChristoph Hellwig 		XFS_I(ioend->io_inode)->i_d.di_size;
167fc0063c4SChristoph Hellwig }
168fc0063c4SChristoph Hellwig 
169281627dfSChristoph Hellwig STATIC int
170281627dfSChristoph Hellwig xfs_setfilesize_trans_alloc(
171281627dfSChristoph Hellwig 	struct xfs_ioend	*ioend)
172281627dfSChristoph Hellwig {
173281627dfSChristoph Hellwig 	struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
174281627dfSChristoph Hellwig 	struct xfs_trans	*tp;
175281627dfSChristoph Hellwig 	int			error;
176281627dfSChristoph Hellwig 
177253f4911SChristoph Hellwig 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
178253f4911SChristoph Hellwig 	if (error)
179281627dfSChristoph Hellwig 		return error;
180281627dfSChristoph Hellwig 
181281627dfSChristoph Hellwig 	ioend->io_append_trans = tp;
182281627dfSChristoph Hellwig 
183281627dfSChristoph Hellwig 	/*
184437a255aSDave Chinner 	 * We may pass freeze protection with a transaction.  So tell lockdep
185d9457dc0SJan Kara 	 * we released it.
186d9457dc0SJan Kara 	 */
187bee9182dSOleg Nesterov 	__sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
188d9457dc0SJan Kara 	/*
189281627dfSChristoph Hellwig 	 * We hand off the transaction to the completion thread now, so
190281627dfSChristoph Hellwig 	 * clear the flag here.
191281627dfSChristoph Hellwig 	 */
192281627dfSChristoph Hellwig 	current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
193281627dfSChristoph Hellwig 	return 0;
194281627dfSChristoph Hellwig }
195281627dfSChristoph Hellwig 
196fc0063c4SChristoph Hellwig /*
1972813d682SChristoph Hellwig  * Update on-disk file size now that data has been written to disk.
198c59d87c4SChristoph Hellwig  */
199281627dfSChristoph Hellwig STATIC int
200e372843aSChristoph Hellwig __xfs_setfilesize(
2012ba66237SChristoph Hellwig 	struct xfs_inode	*ip,
2022ba66237SChristoph Hellwig 	struct xfs_trans	*tp,
2032ba66237SChristoph Hellwig 	xfs_off_t		offset,
2042ba66237SChristoph Hellwig 	size_t			size)
205c59d87c4SChristoph Hellwig {
206c59d87c4SChristoph Hellwig 	xfs_fsize_t		isize;
207c59d87c4SChristoph Hellwig 
208aa6bf01dSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
2092ba66237SChristoph Hellwig 	isize = xfs_new_eof(ip, offset + size);
210281627dfSChristoph Hellwig 	if (!isize) {
211281627dfSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
2124906e215SChristoph Hellwig 		xfs_trans_cancel(tp);
213281627dfSChristoph Hellwig 		return 0;
214c59d87c4SChristoph Hellwig 	}
215c59d87c4SChristoph Hellwig 
2162ba66237SChristoph Hellwig 	trace_xfs_setfilesize(ip, offset, size);
217281627dfSChristoph Hellwig 
218281627dfSChristoph Hellwig 	ip->i_d.di_size = isize;
219281627dfSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
220281627dfSChristoph Hellwig 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
221281627dfSChristoph Hellwig 
22270393313SChristoph Hellwig 	return xfs_trans_commit(tp);
223c59d87c4SChristoph Hellwig }
224c59d87c4SChristoph Hellwig 
225e372843aSChristoph Hellwig int
226e372843aSChristoph Hellwig xfs_setfilesize(
227e372843aSChristoph Hellwig 	struct xfs_inode	*ip,
228e372843aSChristoph Hellwig 	xfs_off_t		offset,
229e372843aSChristoph Hellwig 	size_t			size)
230e372843aSChristoph Hellwig {
231e372843aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
232e372843aSChristoph Hellwig 	struct xfs_trans	*tp;
233e372843aSChristoph Hellwig 	int			error;
234e372843aSChristoph Hellwig 
235e372843aSChristoph Hellwig 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
236e372843aSChristoph Hellwig 	if (error)
237e372843aSChristoph Hellwig 		return error;
238e372843aSChristoph Hellwig 
239e372843aSChristoph Hellwig 	return __xfs_setfilesize(ip, tp, offset, size);
240e372843aSChristoph Hellwig }
241e372843aSChristoph Hellwig 
2422ba66237SChristoph Hellwig STATIC int
2432ba66237SChristoph Hellwig xfs_setfilesize_ioend(
2440e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
2450e51a8e1SChristoph Hellwig 	int			error)
2462ba66237SChristoph Hellwig {
2472ba66237SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
2482ba66237SChristoph Hellwig 	struct xfs_trans	*tp = ioend->io_append_trans;
2492ba66237SChristoph Hellwig 
2502ba66237SChristoph Hellwig 	/*
2512ba66237SChristoph Hellwig 	 * The transaction may have been allocated in the I/O submission thread,
2522ba66237SChristoph Hellwig 	 * thus we need to mark ourselves as being in a transaction manually.
2532ba66237SChristoph Hellwig 	 * Similarly for freeze protection.
2542ba66237SChristoph Hellwig 	 */
2552ba66237SChristoph Hellwig 	current_set_flags_nested(&tp->t_pflags, PF_FSTRANS);
256bee9182dSOleg Nesterov 	__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
2572ba66237SChristoph Hellwig 
2585cb13dcdSZhaohongjiang 	/* we abort the update if there was an IO error */
2590e51a8e1SChristoph Hellwig 	if (error) {
2605cb13dcdSZhaohongjiang 		xfs_trans_cancel(tp);
2610e51a8e1SChristoph Hellwig 		return error;
2625cb13dcdSZhaohongjiang 	}
2635cb13dcdSZhaohongjiang 
264e372843aSChristoph Hellwig 	return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
2652ba66237SChristoph Hellwig }
2662ba66237SChristoph Hellwig 
267c59d87c4SChristoph Hellwig /*
268c59d87c4SChristoph Hellwig  * IO write completion.
269c59d87c4SChristoph Hellwig  */
270c59d87c4SChristoph Hellwig STATIC void
271c59d87c4SChristoph Hellwig xfs_end_io(
272c59d87c4SChristoph Hellwig 	struct work_struct *work)
273c59d87c4SChristoph Hellwig {
2740e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend =
2750e51a8e1SChristoph Hellwig 		container_of(work, struct xfs_ioend, io_work);
276c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
277*787eb485SChristoph Hellwig 	xfs_off_t		offset = ioend->io_offset;
278*787eb485SChristoph Hellwig 	size_t			size = ioend->io_size;
2790e51a8e1SChristoph Hellwig 	int			error = ioend->io_bio->bi_error;
280c59d87c4SChristoph Hellwig 
281af055e37SBrian Foster 	/*
282*787eb485SChristoph Hellwig 	 * Just clean up the in-memory strutures if the fs has been shut down.
283af055e37SBrian Foster 	 */
284*787eb485SChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
2850e51a8e1SChristoph Hellwig 		error = -EIO;
28643caeb18SDarrick J. Wong 		goto done;
28743caeb18SDarrick J. Wong 	}
28843caeb18SDarrick J. Wong 
28943caeb18SDarrick J. Wong 	/*
290*787eb485SChristoph Hellwig 	 * Clean up any COW blocks on an I/O error.
291c59d87c4SChristoph Hellwig 	 */
292*787eb485SChristoph Hellwig 	if (unlikely(error)) {
293*787eb485SChristoph Hellwig 		switch (ioend->io_type) {
294*787eb485SChristoph Hellwig 		case XFS_IO_COW:
295*787eb485SChristoph Hellwig 			xfs_reflink_cancel_cow_range(ip, offset, size, true);
296*787eb485SChristoph Hellwig 			break;
297*787eb485SChristoph Hellwig 		}
298*787eb485SChristoph Hellwig 
2995cb13dcdSZhaohongjiang 		goto done;
300*787eb485SChristoph Hellwig 	}
301*787eb485SChristoph Hellwig 
302*787eb485SChristoph Hellwig 	/*
303*787eb485SChristoph Hellwig 	 * Success:  commit the COW or unwritten blocks if needed.
304*787eb485SChristoph Hellwig 	 */
305*787eb485SChristoph Hellwig 	switch (ioend->io_type) {
306*787eb485SChristoph Hellwig 	case XFS_IO_COW:
307*787eb485SChristoph Hellwig 		error = xfs_reflink_end_cow(ip, offset, size);
308*787eb485SChristoph Hellwig 		break;
309*787eb485SChristoph Hellwig 	case XFS_IO_UNWRITTEN:
310*787eb485SChristoph Hellwig 		error = xfs_iomap_write_unwritten(ip, offset, size);
311*787eb485SChristoph Hellwig 		break;
312*787eb485SChristoph Hellwig 	default:
313*787eb485SChristoph Hellwig 		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
314*787eb485SChristoph Hellwig 		break;
31584803fb7SChristoph Hellwig 	}
31684803fb7SChristoph Hellwig 
31704f658eeSChristoph Hellwig done:
318*787eb485SChristoph Hellwig 	if (ioend->io_append_trans)
319*787eb485SChristoph Hellwig 		error = xfs_setfilesize_ioend(ioend, error);
3200e51a8e1SChristoph Hellwig 	xfs_destroy_ioend(ioend, error);
321c59d87c4SChristoph Hellwig }
322c59d87c4SChristoph Hellwig 
3230e51a8e1SChristoph Hellwig STATIC void
3240e51a8e1SChristoph Hellwig xfs_end_bio(
3250e51a8e1SChristoph Hellwig 	struct bio		*bio)
326c59d87c4SChristoph Hellwig {
3270e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend = bio->bi_private;
3280e51a8e1SChristoph Hellwig 	struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
329c59d87c4SChristoph Hellwig 
33043caeb18SDarrick J. Wong 	if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW)
3310e51a8e1SChristoph Hellwig 		queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
3320e51a8e1SChristoph Hellwig 	else if (ioend->io_append_trans)
3330e51a8e1SChristoph Hellwig 		queue_work(mp->m_data_workqueue, &ioend->io_work);
3340e51a8e1SChristoph Hellwig 	else
3350e51a8e1SChristoph Hellwig 		xfs_destroy_ioend(ioend, bio->bi_error);
336c59d87c4SChristoph Hellwig }
337c59d87c4SChristoph Hellwig 
338c59d87c4SChristoph Hellwig STATIC int
339c59d87c4SChristoph Hellwig xfs_map_blocks(
340c59d87c4SChristoph Hellwig 	struct inode		*inode,
341c59d87c4SChristoph Hellwig 	loff_t			offset,
342c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
343988ef927SDave Chinner 	int			type)
344c59d87c4SChristoph Hellwig {
345c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
346c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
34793407472SFabian Frederick 	ssize_t			count = i_blocksize(inode);
348c59d87c4SChristoph Hellwig 	xfs_fileoff_t		offset_fsb, end_fsb;
349c59d87c4SChristoph Hellwig 	int			error = 0;
350c59d87c4SChristoph Hellwig 	int			bmapi_flags = XFS_BMAPI_ENTIRE;
351c59d87c4SChristoph Hellwig 	int			nimaps = 1;
352c59d87c4SChristoph Hellwig 
353c59d87c4SChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(mp))
354b474c7aeSEric Sandeen 		return -EIO;
355c59d87c4SChristoph Hellwig 
356ef473667SDarrick J. Wong 	ASSERT(type != XFS_IO_COW);
3570d882a36SAlain Renaud 	if (type == XFS_IO_UNWRITTEN)
358c59d87c4SChristoph Hellwig 		bmapi_flags |= XFS_BMAPI_IGSTATE;
359c59d87c4SChristoph Hellwig 
360c59d87c4SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_SHARED);
361c59d87c4SChristoph Hellwig 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
362c59d87c4SChristoph Hellwig 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
363d2c28191SDave Chinner 	ASSERT(offset <= mp->m_super->s_maxbytes);
364c59d87c4SChristoph Hellwig 
365d2c28191SDave Chinner 	if (offset + count > mp->m_super->s_maxbytes)
366d2c28191SDave Chinner 		count = mp->m_super->s_maxbytes - offset;
367c59d87c4SChristoph Hellwig 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
368c59d87c4SChristoph Hellwig 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
3695c8ed202SDave Chinner 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
3705c8ed202SDave Chinner 				imap, &nimaps, bmapi_flags);
371ef473667SDarrick J. Wong 	/*
372ef473667SDarrick J. Wong 	 * Truncate an overwrite extent if there's a pending CoW
373ef473667SDarrick J. Wong 	 * reservation before the end of this extent.  This forces us
374ef473667SDarrick J. Wong 	 * to come back to writepage to take care of the CoW.
375ef473667SDarrick J. Wong 	 */
376ef473667SDarrick J. Wong 	if (nimaps && type == XFS_IO_OVERWRITE)
377ef473667SDarrick J. Wong 		xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap);
378c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
379c59d87c4SChristoph Hellwig 
380c59d87c4SChristoph Hellwig 	if (error)
3812451337dSDave Chinner 		return error;
382c59d87c4SChristoph Hellwig 
3830d882a36SAlain Renaud 	if (type == XFS_IO_DELALLOC &&
384c59d87c4SChristoph Hellwig 	    (!nimaps || isnullstartblock(imap->br_startblock))) {
38560b4984fSDarrick J. Wong 		error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset,
38660b4984fSDarrick J. Wong 				imap);
387c59d87c4SChristoph Hellwig 		if (!error)
388ef473667SDarrick J. Wong 			trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
3892451337dSDave Chinner 		return error;
390c59d87c4SChristoph Hellwig 	}
391c59d87c4SChristoph Hellwig 
392c59d87c4SChristoph Hellwig #ifdef DEBUG
3930d882a36SAlain Renaud 	if (type == XFS_IO_UNWRITTEN) {
394c59d87c4SChristoph Hellwig 		ASSERT(nimaps);
395c59d87c4SChristoph Hellwig 		ASSERT(imap->br_startblock != HOLESTARTBLOCK);
396c59d87c4SChristoph Hellwig 		ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
397c59d87c4SChristoph Hellwig 	}
398c59d87c4SChristoph Hellwig #endif
399c59d87c4SChristoph Hellwig 	if (nimaps)
400c59d87c4SChristoph Hellwig 		trace_xfs_map_blocks_found(ip, offset, count, type, imap);
401c59d87c4SChristoph Hellwig 	return 0;
402c59d87c4SChristoph Hellwig }
403c59d87c4SChristoph Hellwig 
404fbcc0256SDave Chinner STATIC bool
405c59d87c4SChristoph Hellwig xfs_imap_valid(
406c59d87c4SChristoph Hellwig 	struct inode		*inode,
407c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
408c59d87c4SChristoph Hellwig 	xfs_off_t		offset)
409c59d87c4SChristoph Hellwig {
410c59d87c4SChristoph Hellwig 	offset >>= inode->i_blkbits;
411c59d87c4SChristoph Hellwig 
412c59d87c4SChristoph Hellwig 	return offset >= imap->br_startoff &&
413c59d87c4SChristoph Hellwig 		offset < imap->br_startoff + imap->br_blockcount;
414c59d87c4SChristoph Hellwig }
415c59d87c4SChristoph Hellwig 
416c59d87c4SChristoph Hellwig STATIC void
417c59d87c4SChristoph Hellwig xfs_start_buffer_writeback(
418c59d87c4SChristoph Hellwig 	struct buffer_head	*bh)
419c59d87c4SChristoph Hellwig {
420c59d87c4SChristoph Hellwig 	ASSERT(buffer_mapped(bh));
421c59d87c4SChristoph Hellwig 	ASSERT(buffer_locked(bh));
422c59d87c4SChristoph Hellwig 	ASSERT(!buffer_delay(bh));
423c59d87c4SChristoph Hellwig 	ASSERT(!buffer_unwritten(bh));
424c59d87c4SChristoph Hellwig 
425c59d87c4SChristoph Hellwig 	mark_buffer_async_write(bh);
426c59d87c4SChristoph Hellwig 	set_buffer_uptodate(bh);
427c59d87c4SChristoph Hellwig 	clear_buffer_dirty(bh);
428c59d87c4SChristoph Hellwig }
429c59d87c4SChristoph Hellwig 
430c59d87c4SChristoph Hellwig STATIC void
431c59d87c4SChristoph Hellwig xfs_start_page_writeback(
432c59d87c4SChristoph Hellwig 	struct page		*page,
433e10de372SDave Chinner 	int			clear_dirty)
434c59d87c4SChristoph Hellwig {
435c59d87c4SChristoph Hellwig 	ASSERT(PageLocked(page));
436c59d87c4SChristoph Hellwig 	ASSERT(!PageWriteback(page));
4370d085a52SDave Chinner 
4380d085a52SDave Chinner 	/*
4390d085a52SDave Chinner 	 * if the page was not fully cleaned, we need to ensure that the higher
4400d085a52SDave Chinner 	 * layers come back to it correctly. That means we need to keep the page
4410d085a52SDave Chinner 	 * dirty, and for WB_SYNC_ALL writeback we need to ensure the
4420d085a52SDave Chinner 	 * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to
4430d085a52SDave Chinner 	 * write this page in this writeback sweep will be made.
4440d085a52SDave Chinner 	 */
4450d085a52SDave Chinner 	if (clear_dirty) {
446c59d87c4SChristoph Hellwig 		clear_page_dirty_for_io(page);
447c59d87c4SChristoph Hellwig 		set_page_writeback(page);
4480d085a52SDave Chinner 	} else
4490d085a52SDave Chinner 		set_page_writeback_keepwrite(page);
4500d085a52SDave Chinner 
451c59d87c4SChristoph Hellwig 	unlock_page(page);
452c59d87c4SChristoph Hellwig }
453c59d87c4SChristoph Hellwig 
454c7c1a7d8SZhi Yong Wu static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
455c59d87c4SChristoph Hellwig {
456c59d87c4SChristoph Hellwig 	return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
457c59d87c4SChristoph Hellwig }
458c59d87c4SChristoph Hellwig 
459c59d87c4SChristoph Hellwig /*
460bb18782aSDave Chinner  * Submit the bio for an ioend. We are passed an ioend with a bio attached to
461bb18782aSDave Chinner  * it, and we submit that bio. The ioend may be used for multiple bio
462bb18782aSDave Chinner  * submissions, so we only want to allocate an append transaction for the ioend
463bb18782aSDave Chinner  * once. In the case of multiple bio submission, each bio will take an IO
464bb18782aSDave Chinner  * reference to the ioend to ensure that the ioend completion is only done once
465bb18782aSDave Chinner  * all bios have been submitted and the ioend is really done.
4667bf7f352SDave Chinner  *
4677bf7f352SDave Chinner  * If @fail is non-zero, it means that we have a situation where some part of
4687bf7f352SDave Chinner  * the submission process has failed after we have marked paged for writeback
469bb18782aSDave Chinner  * and unlocked them. In this situation, we need to fail the bio and ioend
470bb18782aSDave Chinner  * rather than submit it to IO. This typically only happens on a filesystem
471bb18782aSDave Chinner  * shutdown.
472c59d87c4SChristoph Hellwig  */
473e10de372SDave Chinner STATIC int
474c59d87c4SChristoph Hellwig xfs_submit_ioend(
475c59d87c4SChristoph Hellwig 	struct writeback_control *wbc,
4760e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
477e10de372SDave Chinner 	int			status)
478c59d87c4SChristoph Hellwig {
4795eda4300SDarrick J. Wong 	/* Convert CoW extents to regular */
4805eda4300SDarrick J. Wong 	if (!status && ioend->io_type == XFS_IO_COW) {
4815eda4300SDarrick J. Wong 		status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
4825eda4300SDarrick J. Wong 				ioend->io_offset, ioend->io_size);
4835eda4300SDarrick J. Wong 	}
4845eda4300SDarrick J. Wong 
485e10de372SDave Chinner 	/* Reserve log space if we might write beyond the on-disk inode size. */
486e10de372SDave Chinner 	if (!status &&
4870e51a8e1SChristoph Hellwig 	    ioend->io_type != XFS_IO_UNWRITTEN &&
488bb18782aSDave Chinner 	    xfs_ioend_is_append(ioend) &&
489bb18782aSDave Chinner 	    !ioend->io_append_trans)
490e10de372SDave Chinner 		status = xfs_setfilesize_trans_alloc(ioend);
491bb18782aSDave Chinner 
4920e51a8e1SChristoph Hellwig 	ioend->io_bio->bi_private = ioend;
4930e51a8e1SChristoph Hellwig 	ioend->io_bio->bi_end_io = xfs_end_bio;
4947637241eSJens Axboe 	ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
49570fd7614SChristoph Hellwig 
4967bf7f352SDave Chinner 	/*
4977bf7f352SDave Chinner 	 * If we are failing the IO now, just mark the ioend with an
4987bf7f352SDave Chinner 	 * error and finish it. This will run IO completion immediately
4997bf7f352SDave Chinner 	 * as there is only one reference to the ioend at this point in
5007bf7f352SDave Chinner 	 * time.
5017bf7f352SDave Chinner 	 */
502e10de372SDave Chinner 	if (status) {
5030e51a8e1SChristoph Hellwig 		ioend->io_bio->bi_error = status;
5040e51a8e1SChristoph Hellwig 		bio_endio(ioend->io_bio);
505e10de372SDave Chinner 		return status;
5067bf7f352SDave Chinner 	}
5077bf7f352SDave Chinner 
5084e49ea4aSMike Christie 	submit_bio(ioend->io_bio);
509e10de372SDave Chinner 	return 0;
510c59d87c4SChristoph Hellwig }
511c59d87c4SChristoph Hellwig 
5120e51a8e1SChristoph Hellwig static void
5130e51a8e1SChristoph Hellwig xfs_init_bio_from_bh(
5140e51a8e1SChristoph Hellwig 	struct bio		*bio,
5150e51a8e1SChristoph Hellwig 	struct buffer_head	*bh)
5160e51a8e1SChristoph Hellwig {
5170e51a8e1SChristoph Hellwig 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
5180e51a8e1SChristoph Hellwig 	bio->bi_bdev = bh->b_bdev;
5190e51a8e1SChristoph Hellwig }
5200e51a8e1SChristoph Hellwig 
5210e51a8e1SChristoph Hellwig static struct xfs_ioend *
5220e51a8e1SChristoph Hellwig xfs_alloc_ioend(
5230e51a8e1SChristoph Hellwig 	struct inode		*inode,
5240e51a8e1SChristoph Hellwig 	unsigned int		type,
5250e51a8e1SChristoph Hellwig 	xfs_off_t		offset,
5260e51a8e1SChristoph Hellwig 	struct buffer_head	*bh)
5270e51a8e1SChristoph Hellwig {
5280e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend;
5290e51a8e1SChristoph Hellwig 	struct bio		*bio;
5300e51a8e1SChristoph Hellwig 
5310e51a8e1SChristoph Hellwig 	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
5320e51a8e1SChristoph Hellwig 	xfs_init_bio_from_bh(bio, bh);
5330e51a8e1SChristoph Hellwig 
5340e51a8e1SChristoph Hellwig 	ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
5350e51a8e1SChristoph Hellwig 	INIT_LIST_HEAD(&ioend->io_list);
5360e51a8e1SChristoph Hellwig 	ioend->io_type = type;
5370e51a8e1SChristoph Hellwig 	ioend->io_inode = inode;
5380e51a8e1SChristoph Hellwig 	ioend->io_size = 0;
5390e51a8e1SChristoph Hellwig 	ioend->io_offset = offset;
5400e51a8e1SChristoph Hellwig 	INIT_WORK(&ioend->io_work, xfs_end_io);
5410e51a8e1SChristoph Hellwig 	ioend->io_append_trans = NULL;
5420e51a8e1SChristoph Hellwig 	ioend->io_bio = bio;
5430e51a8e1SChristoph Hellwig 	return ioend;
5440e51a8e1SChristoph Hellwig }
5450e51a8e1SChristoph Hellwig 
5460e51a8e1SChristoph Hellwig /*
5470e51a8e1SChristoph Hellwig  * Allocate a new bio, and chain the old bio to the new one.
5480e51a8e1SChristoph Hellwig  *
5490e51a8e1SChristoph Hellwig  * Note that we have to do perform the chaining in this unintuitive order
5500e51a8e1SChristoph Hellwig  * so that the bi_private linkage is set up in the right direction for the
5510e51a8e1SChristoph Hellwig  * traversal in xfs_destroy_ioend().
5520e51a8e1SChristoph Hellwig  */
5530e51a8e1SChristoph Hellwig static void
5540e51a8e1SChristoph Hellwig xfs_chain_bio(
5550e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
5560e51a8e1SChristoph Hellwig 	struct writeback_control *wbc,
5570e51a8e1SChristoph Hellwig 	struct buffer_head	*bh)
5580e51a8e1SChristoph Hellwig {
5590e51a8e1SChristoph Hellwig 	struct bio *new;
5600e51a8e1SChristoph Hellwig 
5610e51a8e1SChristoph Hellwig 	new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
5620e51a8e1SChristoph Hellwig 	xfs_init_bio_from_bh(new, bh);
5630e51a8e1SChristoph Hellwig 
5640e51a8e1SChristoph Hellwig 	bio_chain(ioend->io_bio, new);
5650e51a8e1SChristoph Hellwig 	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */
5667637241eSJens Axboe 	ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
5674e49ea4aSMike Christie 	submit_bio(ioend->io_bio);
5680e51a8e1SChristoph Hellwig 	ioend->io_bio = new;
5690e51a8e1SChristoph Hellwig }
5700e51a8e1SChristoph Hellwig 
571c59d87c4SChristoph Hellwig /*
572c59d87c4SChristoph Hellwig  * Test to see if we've been building up a completion structure for
573c59d87c4SChristoph Hellwig  * earlier buffers -- if so, we try to append to this ioend if we
574c59d87c4SChristoph Hellwig  * can, otherwise we finish off any current ioend and start another.
575e10de372SDave Chinner  * Return the ioend we finished off so that the caller can submit it
576e10de372SDave Chinner  * once it has finished processing the dirty page.
577c59d87c4SChristoph Hellwig  */
578c59d87c4SChristoph Hellwig STATIC void
579c59d87c4SChristoph Hellwig xfs_add_to_ioend(
580c59d87c4SChristoph Hellwig 	struct inode		*inode,
581c59d87c4SChristoph Hellwig 	struct buffer_head	*bh,
582c59d87c4SChristoph Hellwig 	xfs_off_t		offset,
583e10de372SDave Chinner 	struct xfs_writepage_ctx *wpc,
584bb18782aSDave Chinner 	struct writeback_control *wbc,
585e10de372SDave Chinner 	struct list_head	*iolist)
586c59d87c4SChristoph Hellwig {
587fbcc0256SDave Chinner 	if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
5880df61da8SDarrick J. Wong 	    bh->b_blocknr != wpc->last_block + 1 ||
5890df61da8SDarrick J. Wong 	    offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
590e10de372SDave Chinner 		if (wpc->ioend)
591e10de372SDave Chinner 			list_add(&wpc->ioend->io_list, iolist);
5920e51a8e1SChristoph Hellwig 		wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
593c59d87c4SChristoph Hellwig 	}
594c59d87c4SChristoph Hellwig 
5950e51a8e1SChristoph Hellwig 	/*
5960e51a8e1SChristoph Hellwig 	 * If the buffer doesn't fit into the bio we need to allocate a new
5970e51a8e1SChristoph Hellwig 	 * one.  This shouldn't happen more than once for a given buffer.
5980e51a8e1SChristoph Hellwig 	 */
5990e51a8e1SChristoph Hellwig 	while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
6000e51a8e1SChristoph Hellwig 		xfs_chain_bio(wpc->ioend, wbc, bh);
601bb18782aSDave Chinner 
602fbcc0256SDave Chinner 	wpc->ioend->io_size += bh->b_size;
603fbcc0256SDave Chinner 	wpc->last_block = bh->b_blocknr;
604e10de372SDave Chinner 	xfs_start_buffer_writeback(bh);
605c59d87c4SChristoph Hellwig }
606c59d87c4SChristoph Hellwig 
607c59d87c4SChristoph Hellwig STATIC void
608c59d87c4SChristoph Hellwig xfs_map_buffer(
609c59d87c4SChristoph Hellwig 	struct inode		*inode,
610c59d87c4SChristoph Hellwig 	struct buffer_head	*bh,
611c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
612c59d87c4SChristoph Hellwig 	xfs_off_t		offset)
613c59d87c4SChristoph Hellwig {
614c59d87c4SChristoph Hellwig 	sector_t		bn;
615c59d87c4SChristoph Hellwig 	struct xfs_mount	*m = XFS_I(inode)->i_mount;
616c59d87c4SChristoph Hellwig 	xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
617c59d87c4SChristoph Hellwig 	xfs_daddr_t		iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
618c59d87c4SChristoph Hellwig 
619c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
620c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
621c59d87c4SChristoph Hellwig 
622c59d87c4SChristoph Hellwig 	bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
623c59d87c4SChristoph Hellwig 	      ((offset - iomap_offset) >> inode->i_blkbits);
624c59d87c4SChristoph Hellwig 
625c59d87c4SChristoph Hellwig 	ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
626c59d87c4SChristoph Hellwig 
627c59d87c4SChristoph Hellwig 	bh->b_blocknr = bn;
628c59d87c4SChristoph Hellwig 	set_buffer_mapped(bh);
629c59d87c4SChristoph Hellwig }
630c59d87c4SChristoph Hellwig 
631c59d87c4SChristoph Hellwig STATIC void
632c59d87c4SChristoph Hellwig xfs_map_at_offset(
633c59d87c4SChristoph Hellwig 	struct inode		*inode,
634c59d87c4SChristoph Hellwig 	struct buffer_head	*bh,
635c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
636c59d87c4SChristoph Hellwig 	xfs_off_t		offset)
637c59d87c4SChristoph Hellwig {
638c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
639c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
640c59d87c4SChristoph Hellwig 
641c59d87c4SChristoph Hellwig 	xfs_map_buffer(inode, bh, imap, offset);
642c59d87c4SChristoph Hellwig 	set_buffer_mapped(bh);
643c59d87c4SChristoph Hellwig 	clear_buffer_delay(bh);
644c59d87c4SChristoph Hellwig 	clear_buffer_unwritten(bh);
645c59d87c4SChristoph Hellwig }
646c59d87c4SChristoph Hellwig 
647c59d87c4SChristoph Hellwig /*
648a49935f2SDave Chinner  * Test if a given page contains at least one buffer of a given @type.
649a49935f2SDave Chinner  * If @check_all_buffers is true, then we walk all the buffers in the page to
650a49935f2SDave Chinner  * try to find one of the type passed in. If it is not set, then the caller only
651a49935f2SDave Chinner  * needs to check the first buffer on the page for a match.
652c59d87c4SChristoph Hellwig  */
653a49935f2SDave Chinner STATIC bool
6546ffc4db5SDave Chinner xfs_check_page_type(
655c59d87c4SChristoph Hellwig 	struct page		*page,
656a49935f2SDave Chinner 	unsigned int		type,
657a49935f2SDave Chinner 	bool			check_all_buffers)
658c59d87c4SChristoph Hellwig {
659a49935f2SDave Chinner 	struct buffer_head	*bh;
660a49935f2SDave Chinner 	struct buffer_head	*head;
661c59d87c4SChristoph Hellwig 
662a49935f2SDave Chinner 	if (PageWriteback(page))
663a49935f2SDave Chinner 		return false;
664a49935f2SDave Chinner 	if (!page->mapping)
665a49935f2SDave Chinner 		return false;
666a49935f2SDave Chinner 	if (!page_has_buffers(page))
667a49935f2SDave Chinner 		return false;
668c59d87c4SChristoph Hellwig 
669c59d87c4SChristoph Hellwig 	bh = head = page_buffers(page);
670c59d87c4SChristoph Hellwig 	do {
671a49935f2SDave Chinner 		if (buffer_unwritten(bh)) {
672a49935f2SDave Chinner 			if (type == XFS_IO_UNWRITTEN)
673a49935f2SDave Chinner 				return true;
674a49935f2SDave Chinner 		} else if (buffer_delay(bh)) {
675805eeb8eSDan Carpenter 			if (type == XFS_IO_DELALLOC)
676a49935f2SDave Chinner 				return true;
677a49935f2SDave Chinner 		} else if (buffer_dirty(bh) && buffer_mapped(bh)) {
678805eeb8eSDan Carpenter 			if (type == XFS_IO_OVERWRITE)
679a49935f2SDave Chinner 				return true;
680a49935f2SDave Chinner 		}
681a49935f2SDave Chinner 
682a49935f2SDave Chinner 		/* If we are only checking the first buffer, we are done now. */
683a49935f2SDave Chinner 		if (!check_all_buffers)
684c59d87c4SChristoph Hellwig 			break;
685c59d87c4SChristoph Hellwig 	} while ((bh = bh->b_this_page) != head);
686c59d87c4SChristoph Hellwig 
687a49935f2SDave Chinner 	return false;
688c59d87c4SChristoph Hellwig }
689c59d87c4SChristoph Hellwig 
690c59d87c4SChristoph Hellwig STATIC void
691c59d87c4SChristoph Hellwig xfs_vm_invalidatepage(
692c59d87c4SChristoph Hellwig 	struct page		*page,
693d47992f8SLukas Czerner 	unsigned int		offset,
694d47992f8SLukas Czerner 	unsigned int		length)
695c59d87c4SChristoph Hellwig {
69634097dfeSLukas Czerner 	trace_xfs_invalidatepage(page->mapping->host, page, offset,
69734097dfeSLukas Czerner 				 length);
69834097dfeSLukas Czerner 	block_invalidatepage(page, offset, length);
699c59d87c4SChristoph Hellwig }
700c59d87c4SChristoph Hellwig 
701c59d87c4SChristoph Hellwig /*
702c59d87c4SChristoph Hellwig  * If the page has delalloc buffers on it, we need to punch them out before we
703c59d87c4SChristoph Hellwig  * invalidate the page. If we don't, we leave a stale delalloc mapping on the
704c59d87c4SChristoph Hellwig  * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
705c59d87c4SChristoph Hellwig  * is done on that same region - the delalloc extent is returned when none is
706c59d87c4SChristoph Hellwig  * supposed to be there.
707c59d87c4SChristoph Hellwig  *
708c59d87c4SChristoph Hellwig  * We prevent this by truncating away the delalloc regions on the page before
709c59d87c4SChristoph Hellwig  * invalidating it. Because they are delalloc, we can do this without needing a
710c59d87c4SChristoph Hellwig  * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
711c59d87c4SChristoph Hellwig  * truncation without a transaction as there is no space left for block
712c59d87c4SChristoph Hellwig  * reservation (typically why we see a ENOSPC in writeback).
713c59d87c4SChristoph Hellwig  *
714c59d87c4SChristoph Hellwig  * This is not a performance critical path, so for now just do the punching a
715c59d87c4SChristoph Hellwig  * buffer head at a time.
716c59d87c4SChristoph Hellwig  */
717c59d87c4SChristoph Hellwig STATIC void
718c59d87c4SChristoph Hellwig xfs_aops_discard_page(
719c59d87c4SChristoph Hellwig 	struct page		*page)
720c59d87c4SChristoph Hellwig {
721c59d87c4SChristoph Hellwig 	struct inode		*inode = page->mapping->host;
722c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
723c59d87c4SChristoph Hellwig 	struct buffer_head	*bh, *head;
724c59d87c4SChristoph Hellwig 	loff_t			offset = page_offset(page);
725c59d87c4SChristoph Hellwig 
726a49935f2SDave Chinner 	if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
727c59d87c4SChristoph Hellwig 		goto out_invalidate;
728c59d87c4SChristoph Hellwig 
729c59d87c4SChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
730c59d87c4SChristoph Hellwig 		goto out_invalidate;
731c59d87c4SChristoph Hellwig 
732c59d87c4SChristoph Hellwig 	xfs_alert(ip->i_mount,
733c59d87c4SChristoph Hellwig 		"page discard on page %p, inode 0x%llx, offset %llu.",
734c59d87c4SChristoph Hellwig 			page, ip->i_ino, offset);
735c59d87c4SChristoph Hellwig 
736c59d87c4SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
737c59d87c4SChristoph Hellwig 	bh = head = page_buffers(page);
738c59d87c4SChristoph Hellwig 	do {
739c59d87c4SChristoph Hellwig 		int		error;
740c59d87c4SChristoph Hellwig 		xfs_fileoff_t	start_fsb;
741c59d87c4SChristoph Hellwig 
742c59d87c4SChristoph Hellwig 		if (!buffer_delay(bh))
743c59d87c4SChristoph Hellwig 			goto next_buffer;
744c59d87c4SChristoph Hellwig 
745c59d87c4SChristoph Hellwig 		start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
746c59d87c4SChristoph Hellwig 		error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
747c59d87c4SChristoph Hellwig 		if (error) {
748c59d87c4SChristoph Hellwig 			/* something screwed, just bail */
749c59d87c4SChristoph Hellwig 			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
750c59d87c4SChristoph Hellwig 				xfs_alert(ip->i_mount,
751c59d87c4SChristoph Hellwig 			"page discard unable to remove delalloc mapping.");
752c59d87c4SChristoph Hellwig 			}
753c59d87c4SChristoph Hellwig 			break;
754c59d87c4SChristoph Hellwig 		}
755c59d87c4SChristoph Hellwig next_buffer:
75693407472SFabian Frederick 		offset += i_blocksize(inode);
757c59d87c4SChristoph Hellwig 
758c59d87c4SChristoph Hellwig 	} while ((bh = bh->b_this_page) != head);
759c59d87c4SChristoph Hellwig 
760c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
761c59d87c4SChristoph Hellwig out_invalidate:
76209cbfeafSKirill A. Shutemov 	xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
763c59d87c4SChristoph Hellwig 	return;
764c59d87c4SChristoph Hellwig }
765c59d87c4SChristoph Hellwig 
766ef473667SDarrick J. Wong static int
767ef473667SDarrick J. Wong xfs_map_cow(
768ef473667SDarrick J. Wong 	struct xfs_writepage_ctx *wpc,
769ef473667SDarrick J. Wong 	struct inode		*inode,
770ef473667SDarrick J. Wong 	loff_t			offset,
771ef473667SDarrick J. Wong 	unsigned int		*new_type)
772ef473667SDarrick J. Wong {
773ef473667SDarrick J. Wong 	struct xfs_inode	*ip = XFS_I(inode);
774ef473667SDarrick J. Wong 	struct xfs_bmbt_irec	imap;
775092d5d9dSChristoph Hellwig 	bool			is_cow = false;
776ef473667SDarrick J. Wong 	int			error;
777ef473667SDarrick J. Wong 
778ef473667SDarrick J. Wong 	/*
779ef473667SDarrick J. Wong 	 * If we already have a valid COW mapping keep using it.
780ef473667SDarrick J. Wong 	 */
781ef473667SDarrick J. Wong 	if (wpc->io_type == XFS_IO_COW) {
782ef473667SDarrick J. Wong 		wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset);
783ef473667SDarrick J. Wong 		if (wpc->imap_valid) {
784ef473667SDarrick J. Wong 			*new_type = XFS_IO_COW;
785ef473667SDarrick J. Wong 			return 0;
786ef473667SDarrick J. Wong 		}
787ef473667SDarrick J. Wong 	}
788ef473667SDarrick J. Wong 
789ef473667SDarrick J. Wong 	/*
790ef473667SDarrick J. Wong 	 * Else we need to check if there is a COW mapping at this offset.
791ef473667SDarrick J. Wong 	 */
792ef473667SDarrick J. Wong 	xfs_ilock(ip, XFS_ILOCK_SHARED);
793092d5d9dSChristoph Hellwig 	is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
794ef473667SDarrick J. Wong 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
795ef473667SDarrick J. Wong 
796ef473667SDarrick J. Wong 	if (!is_cow)
797ef473667SDarrick J. Wong 		return 0;
798ef473667SDarrick J. Wong 
799ef473667SDarrick J. Wong 	/*
800ef473667SDarrick J. Wong 	 * And if the COW mapping has a delayed extent here we need to
801ef473667SDarrick J. Wong 	 * allocate real space for it now.
802ef473667SDarrick J. Wong 	 */
803092d5d9dSChristoph Hellwig 	if (isnullstartblock(imap.br_startblock)) {
804ef473667SDarrick J. Wong 		error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
805ef473667SDarrick J. Wong 				&imap);
806ef473667SDarrick J. Wong 		if (error)
807ef473667SDarrick J. Wong 			return error;
808ef473667SDarrick J. Wong 	}
809ef473667SDarrick J. Wong 
810ef473667SDarrick J. Wong 	wpc->io_type = *new_type = XFS_IO_COW;
811ef473667SDarrick J. Wong 	wpc->imap_valid = true;
812ef473667SDarrick J. Wong 	wpc->imap = imap;
813ef473667SDarrick J. Wong 	return 0;
814ef473667SDarrick J. Wong }
815ef473667SDarrick J. Wong 
816c59d87c4SChristoph Hellwig /*
817e10de372SDave Chinner  * We implement an immediate ioend submission policy here to avoid needing to
818e10de372SDave Chinner  * chain multiple ioends and hence nest mempool allocations which can violate
819e10de372SDave Chinner  * forward progress guarantees we need to provide. The current ioend we are
820e10de372SDave Chinner  * adding buffers to is cached on the writepage context, and if the new buffer
821e10de372SDave Chinner  * does not append to the cached ioend it will create a new ioend and cache that
822e10de372SDave Chinner  * instead.
823e10de372SDave Chinner  *
824e10de372SDave Chinner  * If a new ioend is created and cached, the old ioend is returned and queued
825e10de372SDave Chinner  * locally for submission once the entire page is processed or an error has been
826e10de372SDave Chinner  * detected.  While ioends are submitted immediately after they are completed,
827e10de372SDave Chinner  * batching optimisations are provided by higher level block plugging.
828e10de372SDave Chinner  *
829e10de372SDave Chinner  * At the end of a writeback pass, there will be a cached ioend remaining on the
830e10de372SDave Chinner  * writepage context that the caller will need to submit.
831e10de372SDave Chinner  */
832bfce7d2eSDave Chinner static int
833bfce7d2eSDave Chinner xfs_writepage_map(
834bfce7d2eSDave Chinner 	struct xfs_writepage_ctx *wpc,
835e10de372SDave Chinner 	struct writeback_control *wbc,
836bfce7d2eSDave Chinner 	struct inode		*inode,
837bfce7d2eSDave Chinner 	struct page		*page,
838bfce7d2eSDave Chinner 	loff_t			offset,
839bfce7d2eSDave Chinner 	__uint64_t              end_offset)
840bfce7d2eSDave Chinner {
841e10de372SDave Chinner 	LIST_HEAD(submit_list);
842e10de372SDave Chinner 	struct xfs_ioend	*ioend, *next;
843bfce7d2eSDave Chinner 	struct buffer_head	*bh, *head;
84493407472SFabian Frederick 	ssize_t			len = i_blocksize(inode);
845bfce7d2eSDave Chinner 	int			error = 0;
846bfce7d2eSDave Chinner 	int			count = 0;
847e10de372SDave Chinner 	int			uptodate = 1;
848ef473667SDarrick J. Wong 	unsigned int		new_type;
849bfce7d2eSDave Chinner 
850bfce7d2eSDave Chinner 	bh = head = page_buffers(page);
851bfce7d2eSDave Chinner 	offset = page_offset(page);
852bfce7d2eSDave Chinner 	do {
853bfce7d2eSDave Chinner 		if (offset >= end_offset)
854bfce7d2eSDave Chinner 			break;
855bfce7d2eSDave Chinner 		if (!buffer_uptodate(bh))
856bfce7d2eSDave Chinner 			uptodate = 0;
857bfce7d2eSDave Chinner 
858bfce7d2eSDave Chinner 		/*
859bfce7d2eSDave Chinner 		 * set_page_dirty dirties all buffers in a page, independent
860bfce7d2eSDave Chinner 		 * of their state.  The dirty state however is entirely
861bfce7d2eSDave Chinner 		 * meaningless for holes (!mapped && uptodate), so skip
862bfce7d2eSDave Chinner 		 * buffers covering holes here.
863bfce7d2eSDave Chinner 		 */
864bfce7d2eSDave Chinner 		if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
865bfce7d2eSDave Chinner 			wpc->imap_valid = false;
866bfce7d2eSDave Chinner 			continue;
867bfce7d2eSDave Chinner 		}
868bfce7d2eSDave Chinner 
869ef473667SDarrick J. Wong 		if (buffer_unwritten(bh))
870ef473667SDarrick J. Wong 			new_type = XFS_IO_UNWRITTEN;
871ef473667SDarrick J. Wong 		else if (buffer_delay(bh))
872ef473667SDarrick J. Wong 			new_type = XFS_IO_DELALLOC;
873ef473667SDarrick J. Wong 		else if (buffer_uptodate(bh))
874ef473667SDarrick J. Wong 			new_type = XFS_IO_OVERWRITE;
875ef473667SDarrick J. Wong 		else {
876bfce7d2eSDave Chinner 			if (PageUptodate(page))
877bfce7d2eSDave Chinner 				ASSERT(buffer_mapped(bh));
878bfce7d2eSDave Chinner 			/*
879bfce7d2eSDave Chinner 			 * This buffer is not uptodate and will not be
880bfce7d2eSDave Chinner 			 * written to disk.  Ensure that we will put any
881bfce7d2eSDave Chinner 			 * subsequent writeable buffers into a new
882bfce7d2eSDave Chinner 			 * ioend.
883bfce7d2eSDave Chinner 			 */
884bfce7d2eSDave Chinner 			wpc->imap_valid = false;
885bfce7d2eSDave Chinner 			continue;
886bfce7d2eSDave Chinner 		}
887bfce7d2eSDave Chinner 
888ef473667SDarrick J. Wong 		if (xfs_is_reflink_inode(XFS_I(inode))) {
889ef473667SDarrick J. Wong 			error = xfs_map_cow(wpc, inode, offset, &new_type);
890ef473667SDarrick J. Wong 			if (error)
891ef473667SDarrick J. Wong 				goto out;
892ef473667SDarrick J. Wong 		}
893ef473667SDarrick J. Wong 
894ef473667SDarrick J. Wong 		if (wpc->io_type != new_type) {
895ef473667SDarrick J. Wong 			wpc->io_type = new_type;
896ef473667SDarrick J. Wong 			wpc->imap_valid = false;
897ef473667SDarrick J. Wong 		}
898ef473667SDarrick J. Wong 
899bfce7d2eSDave Chinner 		if (wpc->imap_valid)
900bfce7d2eSDave Chinner 			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
901bfce7d2eSDave Chinner 							 offset);
902bfce7d2eSDave Chinner 		if (!wpc->imap_valid) {
903bfce7d2eSDave Chinner 			error = xfs_map_blocks(inode, offset, &wpc->imap,
904bfce7d2eSDave Chinner 					     wpc->io_type);
905bfce7d2eSDave Chinner 			if (error)
906e10de372SDave Chinner 				goto out;
907bfce7d2eSDave Chinner 			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
908bfce7d2eSDave Chinner 							 offset);
909bfce7d2eSDave Chinner 		}
910bfce7d2eSDave Chinner 		if (wpc->imap_valid) {
911bfce7d2eSDave Chinner 			lock_buffer(bh);
912bfce7d2eSDave Chinner 			if (wpc->io_type != XFS_IO_OVERWRITE)
913bfce7d2eSDave Chinner 				xfs_map_at_offset(inode, bh, &wpc->imap, offset);
914bb18782aSDave Chinner 			xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
915bfce7d2eSDave Chinner 			count++;
916bfce7d2eSDave Chinner 		}
917bfce7d2eSDave Chinner 
918bfce7d2eSDave Chinner 	} while (offset += len, ((bh = bh->b_this_page) != head));
919bfce7d2eSDave Chinner 
920bfce7d2eSDave Chinner 	if (uptodate && bh == head)
921bfce7d2eSDave Chinner 		SetPageUptodate(page);
922bfce7d2eSDave Chinner 
923e10de372SDave Chinner 	ASSERT(wpc->ioend || list_empty(&submit_list));
924bfce7d2eSDave Chinner 
925e10de372SDave Chinner out:
926bfce7d2eSDave Chinner 	/*
927e10de372SDave Chinner 	 * On error, we have to fail the ioend here because we have locked
928e10de372SDave Chinner 	 * buffers in the ioend. If we don't do this, we'll deadlock
929e10de372SDave Chinner 	 * invalidating the page as that tries to lock the buffers on the page.
930e10de372SDave Chinner 	 * Also, because we may have set pages under writeback, we have to make
931e10de372SDave Chinner 	 * sure we run IO completion to mark the error state of the IO
932e10de372SDave Chinner 	 * appropriately, so we can't cancel the ioend directly here. That means
933e10de372SDave Chinner 	 * we have to mark this page as under writeback if we included any
934e10de372SDave Chinner 	 * buffers from it in the ioend chain so that completion treats it
935e10de372SDave Chinner 	 * correctly.
936bfce7d2eSDave Chinner 	 *
937e10de372SDave Chinner 	 * If we didn't include the page in the ioend, the on error we can
938e10de372SDave Chinner 	 * simply discard and unlock it as there are no other users of the page
939e10de372SDave Chinner 	 * or it's buffers right now. The caller will still need to trigger
940e10de372SDave Chinner 	 * submission of outstanding ioends on the writepage context so they are
941e10de372SDave Chinner 	 * treated correctly on error.
942bfce7d2eSDave Chinner 	 */
943e10de372SDave Chinner 	if (count) {
944e10de372SDave Chinner 		xfs_start_page_writeback(page, !error);
945e10de372SDave Chinner 
946e10de372SDave Chinner 		/*
947e10de372SDave Chinner 		 * Preserve the original error if there was one, otherwise catch
948e10de372SDave Chinner 		 * submission errors here and propagate into subsequent ioend
949e10de372SDave Chinner 		 * submissions.
950e10de372SDave Chinner 		 */
951e10de372SDave Chinner 		list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
952e10de372SDave Chinner 			int error2;
953e10de372SDave Chinner 
954e10de372SDave Chinner 			list_del_init(&ioend->io_list);
955e10de372SDave Chinner 			error2 = xfs_submit_ioend(wbc, ioend, error);
956e10de372SDave Chinner 			if (error2 && !error)
957e10de372SDave Chinner 				error = error2;
958e10de372SDave Chinner 		}
959e10de372SDave Chinner 	} else if (error) {
960bfce7d2eSDave Chinner 		xfs_aops_discard_page(page);
961bfce7d2eSDave Chinner 		ClearPageUptodate(page);
962bfce7d2eSDave Chinner 		unlock_page(page);
963e10de372SDave Chinner 	} else {
964e10de372SDave Chinner 		/*
965e10de372SDave Chinner 		 * We can end up here with no error and nothing to write if we
966e10de372SDave Chinner 		 * race with a partial page truncate on a sub-page block sized
967e10de372SDave Chinner 		 * filesystem. In that case we need to mark the page clean.
968e10de372SDave Chinner 		 */
969e10de372SDave Chinner 		xfs_start_page_writeback(page, 1);
970e10de372SDave Chinner 		end_page_writeback(page);
971bfce7d2eSDave Chinner 	}
972e10de372SDave Chinner 
973bfce7d2eSDave Chinner 	mapping_set_error(page->mapping, error);
974bfce7d2eSDave Chinner 	return error;
975bfce7d2eSDave Chinner }
976bfce7d2eSDave Chinner 
977c59d87c4SChristoph Hellwig /*
978c59d87c4SChristoph Hellwig  * Write out a dirty page.
979c59d87c4SChristoph Hellwig  *
980c59d87c4SChristoph Hellwig  * For delalloc space on the page we need to allocate space and flush it.
981c59d87c4SChristoph Hellwig  * For unwritten space on the page we need to start the conversion to
982c59d87c4SChristoph Hellwig  * regular allocated space.
983c59d87c4SChristoph Hellwig  * For any other dirty buffer heads on the page we should flush them.
984c59d87c4SChristoph Hellwig  */
985c59d87c4SChristoph Hellwig STATIC int
986fbcc0256SDave Chinner xfs_do_writepage(
987c59d87c4SChristoph Hellwig 	struct page		*page,
988fbcc0256SDave Chinner 	struct writeback_control *wbc,
989fbcc0256SDave Chinner 	void			*data)
990c59d87c4SChristoph Hellwig {
991fbcc0256SDave Chinner 	struct xfs_writepage_ctx *wpc = data;
992c59d87c4SChristoph Hellwig 	struct inode		*inode = page->mapping->host;
993c59d87c4SChristoph Hellwig 	loff_t			offset;
994c59d87c4SChristoph Hellwig 	__uint64_t              end_offset;
995ad68972aSDave Chinner 	pgoff_t                 end_index;
996c59d87c4SChristoph Hellwig 
99734097dfeSLukas Czerner 	trace_xfs_writepage(inode, page, 0, 0);
998c59d87c4SChristoph Hellwig 
999c59d87c4SChristoph Hellwig 	ASSERT(page_has_buffers(page));
1000c59d87c4SChristoph Hellwig 
1001c59d87c4SChristoph Hellwig 	/*
1002c59d87c4SChristoph Hellwig 	 * Refuse to write the page out if we are called from reclaim context.
1003c59d87c4SChristoph Hellwig 	 *
1004c59d87c4SChristoph Hellwig 	 * This avoids stack overflows when called from deeply used stacks in
1005c59d87c4SChristoph Hellwig 	 * random callers for direct reclaim or memcg reclaim.  We explicitly
1006c59d87c4SChristoph Hellwig 	 * allow reclaim from kswapd as the stack usage there is relatively low.
1007c59d87c4SChristoph Hellwig 	 *
100894054fa3SMel Gorman 	 * This should never happen except in the case of a VM regression so
100994054fa3SMel Gorman 	 * warn about it.
1010c59d87c4SChristoph Hellwig 	 */
101194054fa3SMel Gorman 	if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
101294054fa3SMel Gorman 			PF_MEMALLOC))
1013c59d87c4SChristoph Hellwig 		goto redirty;
1014c59d87c4SChristoph Hellwig 
1015c59d87c4SChristoph Hellwig 	/*
1016c59d87c4SChristoph Hellwig 	 * Given that we do not allow direct reclaim to call us, we should
1017c59d87c4SChristoph Hellwig 	 * never be called while in a filesystem transaction.
1018c59d87c4SChristoph Hellwig 	 */
1019448011e2SChristoph Hellwig 	if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
1020c59d87c4SChristoph Hellwig 		goto redirty;
1021c59d87c4SChristoph Hellwig 
10228695d27eSJie Liu 	/*
1023ad68972aSDave Chinner 	 * Is this page beyond the end of the file?
1024ad68972aSDave Chinner 	 *
10258695d27eSJie Liu 	 * The page index is less than the end_index, adjust the end_offset
10268695d27eSJie Liu 	 * to the highest offset that this page should represent.
10278695d27eSJie Liu 	 * -----------------------------------------------------
10288695d27eSJie Liu 	 * |			file mapping	       | <EOF> |
10298695d27eSJie Liu 	 * -----------------------------------------------------
10308695d27eSJie Liu 	 * | Page ... | Page N-2 | Page N-1 |  Page N  |       |
10318695d27eSJie Liu 	 * ^--------------------------------^----------|--------
10328695d27eSJie Liu 	 * |     desired writeback range    |      see else    |
10338695d27eSJie Liu 	 * ---------------------------------^------------------|
10348695d27eSJie Liu 	 */
1035ad68972aSDave Chinner 	offset = i_size_read(inode);
103609cbfeafSKirill A. Shutemov 	end_index = offset >> PAGE_SHIFT;
10378695d27eSJie Liu 	if (page->index < end_index)
103809cbfeafSKirill A. Shutemov 		end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
10398695d27eSJie Liu 	else {
10408695d27eSJie Liu 		/*
10418695d27eSJie Liu 		 * Check whether the page to write out is beyond or straddles
10428695d27eSJie Liu 		 * i_size or not.
10438695d27eSJie Liu 		 * -------------------------------------------------------
10448695d27eSJie Liu 		 * |		file mapping		        | <EOF>  |
10458695d27eSJie Liu 		 * -------------------------------------------------------
10468695d27eSJie Liu 		 * | Page ... | Page N-2 | Page N-1 |  Page N   | Beyond |
10478695d27eSJie Liu 		 * ^--------------------------------^-----------|---------
10488695d27eSJie Liu 		 * |				    |      Straddles     |
10498695d27eSJie Liu 		 * ---------------------------------^-----------|--------|
10508695d27eSJie Liu 		 */
105109cbfeafSKirill A. Shutemov 		unsigned offset_into_page = offset & (PAGE_SIZE - 1);
10526b7a03f0SChristoph Hellwig 
10536b7a03f0SChristoph Hellwig 		/*
1054ff9a28f6SJan Kara 		 * Skip the page if it is fully outside i_size, e.g. due to a
1055ff9a28f6SJan Kara 		 * truncate operation that is in progress. We must redirty the
1056ff9a28f6SJan Kara 		 * page so that reclaim stops reclaiming it. Otherwise
1057ff9a28f6SJan Kara 		 * xfs_vm_releasepage() is called on it and gets confused.
10588695d27eSJie Liu 		 *
10598695d27eSJie Liu 		 * Note that the end_index is unsigned long, it would overflow
10608695d27eSJie Liu 		 * if the given offset is greater than 16TB on 32-bit system
10618695d27eSJie Liu 		 * and if we do check the page is fully outside i_size or not
10628695d27eSJie Liu 		 * via "if (page->index >= end_index + 1)" as "end_index + 1"
10638695d27eSJie Liu 		 * will be evaluated to 0.  Hence this page will be redirtied
10648695d27eSJie Liu 		 * and be written out repeatedly which would result in an
10658695d27eSJie Liu 		 * infinite loop, the user program that perform this operation
10668695d27eSJie Liu 		 * will hang.  Instead, we can verify this situation by checking
10678695d27eSJie Liu 		 * if the page to write is totally beyond the i_size or if it's
10688695d27eSJie Liu 		 * offset is just equal to the EOF.
10696b7a03f0SChristoph Hellwig 		 */
10708695d27eSJie Liu 		if (page->index > end_index ||
10718695d27eSJie Liu 		    (page->index == end_index && offset_into_page == 0))
1072ff9a28f6SJan Kara 			goto redirty;
10736b7a03f0SChristoph Hellwig 
10746b7a03f0SChristoph Hellwig 		/*
10756b7a03f0SChristoph Hellwig 		 * The page straddles i_size.  It must be zeroed out on each
10766b7a03f0SChristoph Hellwig 		 * and every writepage invocation because it may be mmapped.
10776b7a03f0SChristoph Hellwig 		 * "A file is mapped in multiples of the page size.  For a file
10786b7a03f0SChristoph Hellwig 		 * that is not a multiple of the page size, the remaining
10796b7a03f0SChristoph Hellwig 		 * memory is zeroed when mapped, and writes to that region are
10806b7a03f0SChristoph Hellwig 		 * not written out to the file."
10816b7a03f0SChristoph Hellwig 		 */
108209cbfeafSKirill A. Shutemov 		zero_user_segment(page, offset_into_page, PAGE_SIZE);
10838695d27eSJie Liu 
10848695d27eSJie Liu 		/* Adjust the end_offset to the end of file */
10858695d27eSJie Liu 		end_offset = offset;
1086c59d87c4SChristoph Hellwig 	}
1087c59d87c4SChristoph Hellwig 
1088e10de372SDave Chinner 	return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
1089c59d87c4SChristoph Hellwig 
1090c59d87c4SChristoph Hellwig redirty:
1091c59d87c4SChristoph Hellwig 	redirty_page_for_writepage(wbc, page);
1092c59d87c4SChristoph Hellwig 	unlock_page(page);
1093c59d87c4SChristoph Hellwig 	return 0;
1094c59d87c4SChristoph Hellwig }
1095c59d87c4SChristoph Hellwig 
1096c59d87c4SChristoph Hellwig STATIC int
1097fbcc0256SDave Chinner xfs_vm_writepage(
1098fbcc0256SDave Chinner 	struct page		*page,
1099fbcc0256SDave Chinner 	struct writeback_control *wbc)
1100fbcc0256SDave Chinner {
1101fbcc0256SDave Chinner 	struct xfs_writepage_ctx wpc = {
1102fbcc0256SDave Chinner 		.io_type = XFS_IO_INVALID,
1103fbcc0256SDave Chinner 	};
1104fbcc0256SDave Chinner 	int			ret;
1105fbcc0256SDave Chinner 
1106fbcc0256SDave Chinner 	ret = xfs_do_writepage(page, wbc, &wpc);
1107e10de372SDave Chinner 	if (wpc.ioend)
1108e10de372SDave Chinner 		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
1109e10de372SDave Chinner 	return ret;
1110fbcc0256SDave Chinner }
1111fbcc0256SDave Chinner 
1112fbcc0256SDave Chinner STATIC int
1113c59d87c4SChristoph Hellwig xfs_vm_writepages(
1114c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
1115c59d87c4SChristoph Hellwig 	struct writeback_control *wbc)
1116c59d87c4SChristoph Hellwig {
1117fbcc0256SDave Chinner 	struct xfs_writepage_ctx wpc = {
1118fbcc0256SDave Chinner 		.io_type = XFS_IO_INVALID,
1119fbcc0256SDave Chinner 	};
1120fbcc0256SDave Chinner 	int			ret;
1121fbcc0256SDave Chinner 
1122c59d87c4SChristoph Hellwig 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
11237f6d5b52SRoss Zwisler 	if (dax_mapping(mapping))
11247f6d5b52SRoss Zwisler 		return dax_writeback_mapping_range(mapping,
11257f6d5b52SRoss Zwisler 				xfs_find_bdev_for_inode(mapping->host), wbc);
11267f6d5b52SRoss Zwisler 
1127fbcc0256SDave Chinner 	ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
1128e10de372SDave Chinner 	if (wpc.ioend)
1129e10de372SDave Chinner 		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
1130e10de372SDave Chinner 	return ret;
1131c59d87c4SChristoph Hellwig }
1132c59d87c4SChristoph Hellwig 
1133c59d87c4SChristoph Hellwig /*
1134c59d87c4SChristoph Hellwig  * Called to move a page into cleanable state - and from there
1135c59d87c4SChristoph Hellwig  * to be released. The page should already be clean. We always
1136c59d87c4SChristoph Hellwig  * have buffer heads in this call.
1137c59d87c4SChristoph Hellwig  *
1138c59d87c4SChristoph Hellwig  * Returns 1 if the page is ok to release, 0 otherwise.
1139c59d87c4SChristoph Hellwig  */
1140c59d87c4SChristoph Hellwig STATIC int
1141c59d87c4SChristoph Hellwig xfs_vm_releasepage(
1142c59d87c4SChristoph Hellwig 	struct page		*page,
1143c59d87c4SChristoph Hellwig 	gfp_t			gfp_mask)
1144c59d87c4SChristoph Hellwig {
1145c59d87c4SChristoph Hellwig 	int			delalloc, unwritten;
1146c59d87c4SChristoph Hellwig 
114734097dfeSLukas Czerner 	trace_xfs_releasepage(page->mapping->host, page, 0, 0);
1148c59d87c4SChristoph Hellwig 
114999579cceSBrian Foster 	/*
115099579cceSBrian Foster 	 * mm accommodates an old ext3 case where clean pages might not have had
115199579cceSBrian Foster 	 * the dirty bit cleared. Thus, it can send actual dirty pages to
115299579cceSBrian Foster 	 * ->releasepage() via shrink_active_list(). Conversely,
115399579cceSBrian Foster 	 * block_invalidatepage() can send pages that are still marked dirty
115499579cceSBrian Foster 	 * but otherwise have invalidated buffers.
115599579cceSBrian Foster 	 *
11560a417b8dSJan Kara 	 * We want to release the latter to avoid unnecessary buildup of the
11570a417b8dSJan Kara 	 * LRU, skip the former and warn if we've left any lingering
11580a417b8dSJan Kara 	 * delalloc/unwritten buffers on clean pages. Skip pages with delalloc
11590a417b8dSJan Kara 	 * or unwritten buffers and warn if the page is not dirty. Otherwise
11600a417b8dSJan Kara 	 * try to release the buffers.
116199579cceSBrian Foster 	 */
1162c59d87c4SChristoph Hellwig 	xfs_count_page_state(page, &delalloc, &unwritten);
1163c59d87c4SChristoph Hellwig 
11640a417b8dSJan Kara 	if (delalloc) {
11650a417b8dSJan Kara 		WARN_ON_ONCE(!PageDirty(page));
1166c59d87c4SChristoph Hellwig 		return 0;
11670a417b8dSJan Kara 	}
11680a417b8dSJan Kara 	if (unwritten) {
11690a417b8dSJan Kara 		WARN_ON_ONCE(!PageDirty(page));
1170c59d87c4SChristoph Hellwig 		return 0;
11710a417b8dSJan Kara 	}
1172c59d87c4SChristoph Hellwig 
1173c59d87c4SChristoph Hellwig 	return try_to_free_buffers(page);
1174c59d87c4SChristoph Hellwig }
1175c59d87c4SChristoph Hellwig 
1176a719370bSDave Chinner /*
11771fdca9c2SDave Chinner  * If this is O_DIRECT or the mpage code calling tell them how large the mapping
11781fdca9c2SDave Chinner  * is, so that we can avoid repeated get_blocks calls.
11791fdca9c2SDave Chinner  *
11801fdca9c2SDave Chinner  * If the mapping spans EOF, then we have to break the mapping up as the mapping
11811fdca9c2SDave Chinner  * for blocks beyond EOF must be marked new so that sub block regions can be
11821fdca9c2SDave Chinner  * correctly zeroed. We can't do this for mappings within EOF unless the mapping
11831fdca9c2SDave Chinner  * was just allocated or is unwritten, otherwise the callers would overwrite
11841fdca9c2SDave Chinner  * existing data with zeros. Hence we have to split the mapping into a range up
11851fdca9c2SDave Chinner  * to and including EOF, and a second mapping for beyond EOF.
11861fdca9c2SDave Chinner  */
11871fdca9c2SDave Chinner static void
11881fdca9c2SDave Chinner xfs_map_trim_size(
11891fdca9c2SDave Chinner 	struct inode		*inode,
11901fdca9c2SDave Chinner 	sector_t		iblock,
11911fdca9c2SDave Chinner 	struct buffer_head	*bh_result,
11921fdca9c2SDave Chinner 	struct xfs_bmbt_irec	*imap,
11931fdca9c2SDave Chinner 	xfs_off_t		offset,
11941fdca9c2SDave Chinner 	ssize_t			size)
11951fdca9c2SDave Chinner {
11961fdca9c2SDave Chinner 	xfs_off_t		mapping_size;
11971fdca9c2SDave Chinner 
11981fdca9c2SDave Chinner 	mapping_size = imap->br_startoff + imap->br_blockcount - iblock;
11991fdca9c2SDave Chinner 	mapping_size <<= inode->i_blkbits;
12001fdca9c2SDave Chinner 
12011fdca9c2SDave Chinner 	ASSERT(mapping_size > 0);
12021fdca9c2SDave Chinner 	if (mapping_size > size)
12031fdca9c2SDave Chinner 		mapping_size = size;
12041fdca9c2SDave Chinner 	if (offset < i_size_read(inode) &&
12051fdca9c2SDave Chinner 	    offset + mapping_size >= i_size_read(inode)) {
12061fdca9c2SDave Chinner 		/* limit mapping to block that spans EOF */
12071fdca9c2SDave Chinner 		mapping_size = roundup_64(i_size_read(inode) - offset,
120893407472SFabian Frederick 					  i_blocksize(inode));
12091fdca9c2SDave Chinner 	}
12101fdca9c2SDave Chinner 	if (mapping_size > LONG_MAX)
12111fdca9c2SDave Chinner 		mapping_size = LONG_MAX;
12121fdca9c2SDave Chinner 
12131fdca9c2SDave Chinner 	bh_result->b_size = mapping_size;
12141fdca9c2SDave Chinner }
12151fdca9c2SDave Chinner 
12160613f16cSDarrick J. Wong static int
1217acdda3aaSChristoph Hellwig xfs_get_blocks(
1218c59d87c4SChristoph Hellwig 	struct inode		*inode,
1219c59d87c4SChristoph Hellwig 	sector_t		iblock,
1220c59d87c4SChristoph Hellwig 	struct buffer_head	*bh_result,
1221acdda3aaSChristoph Hellwig 	int			create)
1222c59d87c4SChristoph Hellwig {
1223c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
1224c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
1225c59d87c4SChristoph Hellwig 	xfs_fileoff_t		offset_fsb, end_fsb;
1226c59d87c4SChristoph Hellwig 	int			error = 0;
1227c59d87c4SChristoph Hellwig 	int			lockmode = 0;
1228c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	imap;
1229c59d87c4SChristoph Hellwig 	int			nimaps = 1;
1230c59d87c4SChristoph Hellwig 	xfs_off_t		offset;
1231c59d87c4SChristoph Hellwig 	ssize_t			size;
1232c59d87c4SChristoph Hellwig 
1233acdda3aaSChristoph Hellwig 	BUG_ON(create);
12346e8a27a8SChristoph Hellwig 
1235c59d87c4SChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(mp))
1236b474c7aeSEric Sandeen 		return -EIO;
1237c59d87c4SChristoph Hellwig 
1238c59d87c4SChristoph Hellwig 	offset = (xfs_off_t)iblock << inode->i_blkbits;
123993407472SFabian Frederick 	ASSERT(bh_result->b_size >= i_blocksize(inode));
1240c59d87c4SChristoph Hellwig 	size = bh_result->b_size;
1241c59d87c4SChristoph Hellwig 
1242acdda3aaSChristoph Hellwig 	if (offset >= i_size_read(inode))
1243c59d87c4SChristoph Hellwig 		return 0;
1244c59d87c4SChristoph Hellwig 
1245507630b2SDave Chinner 	/*
1246507630b2SDave Chinner 	 * Direct I/O is usually done on preallocated files, so try getting
12476e8a27a8SChristoph Hellwig 	 * a block mapping without an exclusive lock first.
1248507630b2SDave Chinner 	 */
1249309ecac8SChristoph Hellwig 	lockmode = xfs_ilock_data_map_shared(ip);
1250c59d87c4SChristoph Hellwig 
1251d2c28191SDave Chinner 	ASSERT(offset <= mp->m_super->s_maxbytes);
1252d2c28191SDave Chinner 	if (offset + size > mp->m_super->s_maxbytes)
1253d2c28191SDave Chinner 		size = mp->m_super->s_maxbytes - offset;
1254c59d87c4SChristoph Hellwig 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
1255c59d87c4SChristoph Hellwig 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
1256c59d87c4SChristoph Hellwig 
12575c8ed202SDave Chinner 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
12585c8ed202SDave Chinner 				&imap, &nimaps, XFS_BMAPI_ENTIRE);
1259c59d87c4SChristoph Hellwig 	if (error)
1260c59d87c4SChristoph Hellwig 		goto out_unlock;
1261c59d87c4SChristoph Hellwig 
1262acdda3aaSChristoph Hellwig 	if (nimaps) {
1263d5cc2e3fSDave Chinner 		trace_xfs_get_blocks_found(ip, offset, size,
1264d5cc2e3fSDave Chinner 				ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN
1265d5cc2e3fSDave Chinner 						   : XFS_IO_OVERWRITE, &imap);
1266507630b2SDave Chinner 		xfs_iunlock(ip, lockmode);
1267c59d87c4SChristoph Hellwig 	} else {
1268c59d87c4SChristoph Hellwig 		trace_xfs_get_blocks_notfound(ip, offset, size);
1269c59d87c4SChristoph Hellwig 		goto out_unlock;
1270c59d87c4SChristoph Hellwig 	}
1271c59d87c4SChristoph Hellwig 
12721fdca9c2SDave Chinner 	/* trim mapping down to size requested */
12736e8a27a8SChristoph Hellwig 	xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
12741fdca9c2SDave Chinner 
1275c59d87c4SChristoph Hellwig 	/*
1276a719370bSDave Chinner 	 * For unwritten extents do not report a disk address in the buffered
1277a719370bSDave Chinner 	 * read case (treat as if we're reading into a hole).
1278c59d87c4SChristoph Hellwig 	 */
1279a719370bSDave Chinner 	if (imap.br_startblock != HOLESTARTBLOCK &&
1280a719370bSDave Chinner 	    imap.br_startblock != DELAYSTARTBLOCK &&
1281acdda3aaSChristoph Hellwig 	    !ISUNWRITTEN(&imap))
1282c59d87c4SChristoph Hellwig 		xfs_map_buffer(inode, bh_result, &imap, offset);
1283c59d87c4SChristoph Hellwig 
1284c59d87c4SChristoph Hellwig 	/*
1285c59d87c4SChristoph Hellwig 	 * If this is a realtime file, data may be on a different device.
1286c59d87c4SChristoph Hellwig 	 * to that pointed to from the buffer_head b_bdev currently.
1287c59d87c4SChristoph Hellwig 	 */
1288c59d87c4SChristoph Hellwig 	bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
1289c59d87c4SChristoph Hellwig 	return 0;
1290c59d87c4SChristoph Hellwig 
1291c59d87c4SChristoph Hellwig out_unlock:
1292c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, lockmode);
12932451337dSDave Chinner 	return error;
1294c59d87c4SChristoph Hellwig }
1295c59d87c4SChristoph Hellwig 
1296c59d87c4SChristoph Hellwig STATIC ssize_t
1297c59d87c4SChristoph Hellwig xfs_vm_direct_IO(
1298c59d87c4SChristoph Hellwig 	struct kiocb		*iocb,
1299c8b8e32dSChristoph Hellwig 	struct iov_iter		*iter)
1300c59d87c4SChristoph Hellwig {
1301c59d87c4SChristoph Hellwig 	/*
1302fa8d972dSChristoph Hellwig 	 * We just need the method present so that open/fcntl allow direct I/O.
1303c59d87c4SChristoph Hellwig 	 */
1304fa8d972dSChristoph Hellwig 	return -EINVAL;
1305c59d87c4SChristoph Hellwig }
1306c59d87c4SChristoph Hellwig 
1307c59d87c4SChristoph Hellwig STATIC sector_t
1308c59d87c4SChristoph Hellwig xfs_vm_bmap(
1309c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
1310c59d87c4SChristoph Hellwig 	sector_t		block)
1311c59d87c4SChristoph Hellwig {
1312c59d87c4SChristoph Hellwig 	struct inode		*inode = (struct inode *)mapping->host;
1313c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
1314c59d87c4SChristoph Hellwig 
1315c59d87c4SChristoph Hellwig 	trace_xfs_vm_bmap(XFS_I(inode));
1316db1327b1SDarrick J. Wong 
1317db1327b1SDarrick J. Wong 	/*
1318db1327b1SDarrick J. Wong 	 * The swap code (ab-)uses ->bmap to get a block mapping and then
1319db1327b1SDarrick J. Wong 	 * bypasseѕ the file system for actual I/O.  We really can't allow
1320db1327b1SDarrick J. Wong 	 * that on reflinks inodes, so we have to skip out here.  And yes,
1321db1327b1SDarrick J. Wong 	 * 0 is the magic code for a bmap error..
1322db1327b1SDarrick J. Wong 	 */
132365523218SChristoph Hellwig 	if (xfs_is_reflink_inode(ip))
1324db1327b1SDarrick J. Wong 		return 0;
132565523218SChristoph Hellwig 
13264bc1ea6bSDave Chinner 	filemap_write_and_wait(mapping);
1327c59d87c4SChristoph Hellwig 	return generic_block_bmap(mapping, block, xfs_get_blocks);
1328c59d87c4SChristoph Hellwig }
1329c59d87c4SChristoph Hellwig 
1330c59d87c4SChristoph Hellwig STATIC int
1331c59d87c4SChristoph Hellwig xfs_vm_readpage(
1332c59d87c4SChristoph Hellwig 	struct file		*unused,
1333c59d87c4SChristoph Hellwig 	struct page		*page)
1334c59d87c4SChristoph Hellwig {
1335121e213eSDave Chinner 	trace_xfs_vm_readpage(page->mapping->host, 1);
1336c59d87c4SChristoph Hellwig 	return mpage_readpage(page, xfs_get_blocks);
1337c59d87c4SChristoph Hellwig }
1338c59d87c4SChristoph Hellwig 
1339c59d87c4SChristoph Hellwig STATIC int
1340c59d87c4SChristoph Hellwig xfs_vm_readpages(
1341c59d87c4SChristoph Hellwig 	struct file		*unused,
1342c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
1343c59d87c4SChristoph Hellwig 	struct list_head	*pages,
1344c59d87c4SChristoph Hellwig 	unsigned		nr_pages)
1345c59d87c4SChristoph Hellwig {
1346121e213eSDave Chinner 	trace_xfs_vm_readpages(mapping->host, nr_pages);
1347c59d87c4SChristoph Hellwig 	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1348c59d87c4SChristoph Hellwig }
1349c59d87c4SChristoph Hellwig 
135022e757a4SDave Chinner /*
135122e757a4SDave Chinner  * This is basically a copy of __set_page_dirty_buffers() with one
135222e757a4SDave Chinner  * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
135322e757a4SDave Chinner  * dirty, we'll never be able to clean them because we don't write buffers
135422e757a4SDave Chinner  * beyond EOF, and that means we can't invalidate pages that span EOF
135522e757a4SDave Chinner  * that have been marked dirty. Further, the dirty state can leak into
135622e757a4SDave Chinner  * the file interior if the file is extended, resulting in all sorts of
135722e757a4SDave Chinner  * bad things happening as the state does not match the underlying data.
135822e757a4SDave Chinner  *
135922e757a4SDave Chinner  * XXX: this really indicates that bufferheads in XFS need to die. Warts like
136022e757a4SDave Chinner  * this only exist because of bufferheads and how the generic code manages them.
136122e757a4SDave Chinner  */
136222e757a4SDave Chinner STATIC int
136322e757a4SDave Chinner xfs_vm_set_page_dirty(
136422e757a4SDave Chinner 	struct page		*page)
136522e757a4SDave Chinner {
136622e757a4SDave Chinner 	struct address_space	*mapping = page->mapping;
136722e757a4SDave Chinner 	struct inode		*inode = mapping->host;
136822e757a4SDave Chinner 	loff_t			end_offset;
136922e757a4SDave Chinner 	loff_t			offset;
137022e757a4SDave Chinner 	int			newly_dirty;
137122e757a4SDave Chinner 
137222e757a4SDave Chinner 	if (unlikely(!mapping))
137322e757a4SDave Chinner 		return !TestSetPageDirty(page);
137422e757a4SDave Chinner 
137522e757a4SDave Chinner 	end_offset = i_size_read(inode);
137622e757a4SDave Chinner 	offset = page_offset(page);
137722e757a4SDave Chinner 
137822e757a4SDave Chinner 	spin_lock(&mapping->private_lock);
137922e757a4SDave Chinner 	if (page_has_buffers(page)) {
138022e757a4SDave Chinner 		struct buffer_head *head = page_buffers(page);
138122e757a4SDave Chinner 		struct buffer_head *bh = head;
138222e757a4SDave Chinner 
138322e757a4SDave Chinner 		do {
138422e757a4SDave Chinner 			if (offset < end_offset)
138522e757a4SDave Chinner 				set_buffer_dirty(bh);
138622e757a4SDave Chinner 			bh = bh->b_this_page;
138793407472SFabian Frederick 			offset += i_blocksize(inode);
138822e757a4SDave Chinner 		} while (bh != head);
138922e757a4SDave Chinner 	}
1390c4843a75SGreg Thelen 	/*
139181f8c3a4SJohannes Weiner 	 * Lock out page->mem_cgroup migration to keep PageDirty
139281f8c3a4SJohannes Weiner 	 * synchronized with per-memcg dirty page counters.
1393c4843a75SGreg Thelen 	 */
139462cccb8cSJohannes Weiner 	lock_page_memcg(page);
139522e757a4SDave Chinner 	newly_dirty = !TestSetPageDirty(page);
139622e757a4SDave Chinner 	spin_unlock(&mapping->private_lock);
139722e757a4SDave Chinner 
139822e757a4SDave Chinner 	if (newly_dirty) {
139922e757a4SDave Chinner 		/* sigh - __set_page_dirty() is static, so copy it here, too */
140022e757a4SDave Chinner 		unsigned long flags;
140122e757a4SDave Chinner 
140222e757a4SDave Chinner 		spin_lock_irqsave(&mapping->tree_lock, flags);
140322e757a4SDave Chinner 		if (page->mapping) {	/* Race with truncate? */
140422e757a4SDave Chinner 			WARN_ON_ONCE(!PageUptodate(page));
140562cccb8cSJohannes Weiner 			account_page_dirtied(page, mapping);
140622e757a4SDave Chinner 			radix_tree_tag_set(&mapping->page_tree,
140722e757a4SDave Chinner 					page_index(page), PAGECACHE_TAG_DIRTY);
140822e757a4SDave Chinner 		}
140922e757a4SDave Chinner 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
141022e757a4SDave Chinner 	}
141162cccb8cSJohannes Weiner 	unlock_page_memcg(page);
1412c4843a75SGreg Thelen 	if (newly_dirty)
1413c4843a75SGreg Thelen 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
141422e757a4SDave Chinner 	return newly_dirty;
141522e757a4SDave Chinner }
141622e757a4SDave Chinner 
1417c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = {
1418c59d87c4SChristoph Hellwig 	.readpage		= xfs_vm_readpage,
1419c59d87c4SChristoph Hellwig 	.readpages		= xfs_vm_readpages,
1420c59d87c4SChristoph Hellwig 	.writepage		= xfs_vm_writepage,
1421c59d87c4SChristoph Hellwig 	.writepages		= xfs_vm_writepages,
142222e757a4SDave Chinner 	.set_page_dirty		= xfs_vm_set_page_dirty,
1423c59d87c4SChristoph Hellwig 	.releasepage		= xfs_vm_releasepage,
1424c59d87c4SChristoph Hellwig 	.invalidatepage		= xfs_vm_invalidatepage,
1425c59d87c4SChristoph Hellwig 	.bmap			= xfs_vm_bmap,
1426c59d87c4SChristoph Hellwig 	.direct_IO		= xfs_vm_direct_IO,
1427c59d87c4SChristoph Hellwig 	.migratepage		= buffer_migrate_page,
1428c59d87c4SChristoph Hellwig 	.is_partially_uptodate  = block_is_partially_uptodate,
1429c59d87c4SChristoph Hellwig 	.error_remove_page	= generic_error_remove_page,
1430c59d87c4SChristoph Hellwig };
1431