xref: /linux/fs/xfs/xfs_aops.c (revision 4e4cbee93d56137ebff722be022cae5f70ef84fb)
1c59d87c4SChristoph Hellwig /*
2c59d87c4SChristoph Hellwig  * Copyright (c) 2000-2005 Silicon Graphics, Inc.
3c59d87c4SChristoph Hellwig  * All Rights Reserved.
4c59d87c4SChristoph Hellwig  *
5c59d87c4SChristoph Hellwig  * This program is free software; you can redistribute it and/or
6c59d87c4SChristoph Hellwig  * modify it under the terms of the GNU General Public License as
7c59d87c4SChristoph Hellwig  * published by the Free Software Foundation.
8c59d87c4SChristoph Hellwig  *
9c59d87c4SChristoph Hellwig  * This program is distributed in the hope that it would be useful,
10c59d87c4SChristoph Hellwig  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11c59d87c4SChristoph Hellwig  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12c59d87c4SChristoph Hellwig  * GNU General Public License for more details.
13c59d87c4SChristoph Hellwig  *
14c59d87c4SChristoph Hellwig  * You should have received a copy of the GNU General Public License
15c59d87c4SChristoph Hellwig  * along with this program; if not, write the Free Software Foundation,
16c59d87c4SChristoph Hellwig  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17c59d87c4SChristoph Hellwig  */
18c59d87c4SChristoph Hellwig #include "xfs.h"
1970a9883cSDave Chinner #include "xfs_shared.h"
20239880efSDave Chinner #include "xfs_format.h"
21239880efSDave Chinner #include "xfs_log_format.h"
22239880efSDave Chinner #include "xfs_trans_resv.h"
23c59d87c4SChristoph Hellwig #include "xfs_mount.h"
24c59d87c4SChristoph Hellwig #include "xfs_inode.h"
25239880efSDave Chinner #include "xfs_trans.h"
26281627dfSChristoph Hellwig #include "xfs_inode_item.h"
27c59d87c4SChristoph Hellwig #include "xfs_alloc.h"
28c59d87c4SChristoph Hellwig #include "xfs_error.h"
29c59d87c4SChristoph Hellwig #include "xfs_iomap.h"
30c59d87c4SChristoph Hellwig #include "xfs_trace.h"
31c59d87c4SChristoph Hellwig #include "xfs_bmap.h"
3268988114SDave Chinner #include "xfs_bmap_util.h"
33a4fbe6abSDave Chinner #include "xfs_bmap_btree.h"
34ef473667SDarrick J. Wong #include "xfs_reflink.h"
35c59d87c4SChristoph Hellwig #include <linux/gfp.h>
36c59d87c4SChristoph Hellwig #include <linux/mpage.h>
37c59d87c4SChristoph Hellwig #include <linux/pagevec.h>
38c59d87c4SChristoph Hellwig #include <linux/writeback.h>
39c59d87c4SChristoph Hellwig 
40fbcc0256SDave Chinner /*
41fbcc0256SDave Chinner  * structure owned by writepages passed to individual writepage calls
42fbcc0256SDave Chinner  */
43fbcc0256SDave Chinner struct xfs_writepage_ctx {
44fbcc0256SDave Chinner 	struct xfs_bmbt_irec    imap;
45fbcc0256SDave Chinner 	bool			imap_valid;
46fbcc0256SDave Chinner 	unsigned int		io_type;
47fbcc0256SDave Chinner 	struct xfs_ioend	*ioend;
48fbcc0256SDave Chinner 	sector_t		last_block;
49fbcc0256SDave Chinner };
50fbcc0256SDave Chinner 
51c59d87c4SChristoph Hellwig void
52c59d87c4SChristoph Hellwig xfs_count_page_state(
53c59d87c4SChristoph Hellwig 	struct page		*page,
54c59d87c4SChristoph Hellwig 	int			*delalloc,
55c59d87c4SChristoph Hellwig 	int			*unwritten)
56c59d87c4SChristoph Hellwig {
57c59d87c4SChristoph Hellwig 	struct buffer_head	*bh, *head;
58c59d87c4SChristoph Hellwig 
59c59d87c4SChristoph Hellwig 	*delalloc = *unwritten = 0;
60c59d87c4SChristoph Hellwig 
61c59d87c4SChristoph Hellwig 	bh = head = page_buffers(page);
62c59d87c4SChristoph Hellwig 	do {
63c59d87c4SChristoph Hellwig 		if (buffer_unwritten(bh))
64c59d87c4SChristoph Hellwig 			(*unwritten) = 1;
65c59d87c4SChristoph Hellwig 		else if (buffer_delay(bh))
66c59d87c4SChristoph Hellwig 			(*delalloc) = 1;
67c59d87c4SChristoph Hellwig 	} while ((bh = bh->b_this_page) != head);
68c59d87c4SChristoph Hellwig }
69c59d87c4SChristoph Hellwig 
7020a90f58SRoss Zwisler struct block_device *
71c59d87c4SChristoph Hellwig xfs_find_bdev_for_inode(
72c59d87c4SChristoph Hellwig 	struct inode		*inode)
73c59d87c4SChristoph Hellwig {
74c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
75c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
76c59d87c4SChristoph Hellwig 
77c59d87c4SChristoph Hellwig 	if (XFS_IS_REALTIME_INODE(ip))
78c59d87c4SChristoph Hellwig 		return mp->m_rtdev_targp->bt_bdev;
79c59d87c4SChristoph Hellwig 	else
80c59d87c4SChristoph Hellwig 		return mp->m_ddev_targp->bt_bdev;
81c59d87c4SChristoph Hellwig }
82c59d87c4SChristoph Hellwig 
83c59d87c4SChristoph Hellwig /*
8437992c18SDave Chinner  * We're now finished for good with this page.  Update the page state via the
8537992c18SDave Chinner  * associated buffer_heads, paying attention to the start and end offsets that
8637992c18SDave Chinner  * we need to process on the page.
8728b783e4SDave Chinner  *
8828b783e4SDave Chinner  * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last
8928b783e4SDave Chinner  * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or
9028b783e4SDave Chinner  * the page at all, as we may be racing with memory reclaim and it can free both
9128b783e4SDave Chinner  * the bufferhead chain and the page as it will see the page as clean and
9228b783e4SDave Chinner  * unused.
9337992c18SDave Chinner  */
9437992c18SDave Chinner static void
9537992c18SDave Chinner xfs_finish_page_writeback(
9637992c18SDave Chinner 	struct inode		*inode,
9737992c18SDave Chinner 	struct bio_vec		*bvec,
9837992c18SDave Chinner 	int			error)
9937992c18SDave Chinner {
10037992c18SDave Chinner 	unsigned int		end = bvec->bv_offset + bvec->bv_len - 1;
10128b783e4SDave Chinner 	struct buffer_head	*head, *bh, *next;
10237992c18SDave Chinner 	unsigned int		off = 0;
10328b783e4SDave Chinner 	unsigned int		bsize;
10437992c18SDave Chinner 
10537992c18SDave Chinner 	ASSERT(bvec->bv_offset < PAGE_SIZE);
10693407472SFabian Frederick 	ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0);
10737992c18SDave Chinner 	ASSERT(end < PAGE_SIZE);
10893407472SFabian Frederick 	ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0);
10937992c18SDave Chinner 
11037992c18SDave Chinner 	bh = head = page_buffers(bvec->bv_page);
11137992c18SDave Chinner 
11228b783e4SDave Chinner 	bsize = bh->b_size;
11337992c18SDave Chinner 	do {
114161f55efSEryu Guan 		if (off > end)
115161f55efSEryu Guan 			break;
11628b783e4SDave Chinner 		next = bh->b_this_page;
11737992c18SDave Chinner 		if (off < bvec->bv_offset)
11837992c18SDave Chinner 			goto next_bh;
11937992c18SDave Chinner 		bh->b_end_io(bh, !error);
12037992c18SDave Chinner next_bh:
12128b783e4SDave Chinner 		off += bsize;
12228b783e4SDave Chinner 	} while ((bh = next) != head);
12337992c18SDave Chinner }
12437992c18SDave Chinner 
12537992c18SDave Chinner /*
12637992c18SDave Chinner  * We're now finished for good with this ioend structure.  Update the page
12737992c18SDave Chinner  * state, release holds on bios, and finally free up memory.  Do not use the
12837992c18SDave Chinner  * ioend after this.
129c59d87c4SChristoph Hellwig  */
130c59d87c4SChristoph Hellwig STATIC void
131c59d87c4SChristoph Hellwig xfs_destroy_ioend(
1320e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
1330e51a8e1SChristoph Hellwig 	int			error)
134c59d87c4SChristoph Hellwig {
13537992c18SDave Chinner 	struct inode		*inode = ioend->io_inode;
1360e51a8e1SChristoph Hellwig 	struct bio		*last = ioend->io_bio;
13737992c18SDave Chinner 	struct bio		*bio, *next;
138c59d87c4SChristoph Hellwig 
1390e51a8e1SChristoph Hellwig 	for (bio = &ioend->io_inline_bio; bio; bio = next) {
14037992c18SDave Chinner 		struct bio_vec	*bvec;
14137992c18SDave Chinner 		int		i;
14237992c18SDave Chinner 
1430e51a8e1SChristoph Hellwig 		/*
1440e51a8e1SChristoph Hellwig 		 * For the last bio, bi_private points to the ioend, so we
1450e51a8e1SChristoph Hellwig 		 * need to explicitly end the iteration here.
1460e51a8e1SChristoph Hellwig 		 */
1470e51a8e1SChristoph Hellwig 		if (bio == last)
1480e51a8e1SChristoph Hellwig 			next = NULL;
1490e51a8e1SChristoph Hellwig 		else
15037992c18SDave Chinner 			next = bio->bi_private;
15137992c18SDave Chinner 
15237992c18SDave Chinner 		/* walk each page on bio, ending page IO on them */
15337992c18SDave Chinner 		bio_for_each_segment_all(bvec, bio, i)
15437992c18SDave Chinner 			xfs_finish_page_writeback(inode, bvec, error);
15537992c18SDave Chinner 
15637992c18SDave Chinner 		bio_put(bio);
157c59d87c4SChristoph Hellwig 	}
158c59d87c4SChristoph Hellwig }
159c59d87c4SChristoph Hellwig 
160c59d87c4SChristoph Hellwig /*
161fc0063c4SChristoph Hellwig  * Fast and loose check if this write could update the on-disk inode size.
162fc0063c4SChristoph Hellwig  */
163fc0063c4SChristoph Hellwig static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend)
164fc0063c4SChristoph Hellwig {
165fc0063c4SChristoph Hellwig 	return ioend->io_offset + ioend->io_size >
166fc0063c4SChristoph Hellwig 		XFS_I(ioend->io_inode)->i_d.di_size;
167fc0063c4SChristoph Hellwig }
168fc0063c4SChristoph Hellwig 
169281627dfSChristoph Hellwig STATIC int
170281627dfSChristoph Hellwig xfs_setfilesize_trans_alloc(
171281627dfSChristoph Hellwig 	struct xfs_ioend	*ioend)
172281627dfSChristoph Hellwig {
173281627dfSChristoph Hellwig 	struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
174281627dfSChristoph Hellwig 	struct xfs_trans	*tp;
175281627dfSChristoph Hellwig 	int			error;
176281627dfSChristoph Hellwig 
177253f4911SChristoph Hellwig 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
178253f4911SChristoph Hellwig 	if (error)
179281627dfSChristoph Hellwig 		return error;
180281627dfSChristoph Hellwig 
181281627dfSChristoph Hellwig 	ioend->io_append_trans = tp;
182281627dfSChristoph Hellwig 
183281627dfSChristoph Hellwig 	/*
184437a255aSDave Chinner 	 * We may pass freeze protection with a transaction.  So tell lockdep
185d9457dc0SJan Kara 	 * we released it.
186d9457dc0SJan Kara 	 */
187bee9182dSOleg Nesterov 	__sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS);
188d9457dc0SJan Kara 	/*
189281627dfSChristoph Hellwig 	 * We hand off the transaction to the completion thread now, so
190281627dfSChristoph Hellwig 	 * clear the flag here.
191281627dfSChristoph Hellwig 	 */
1929070733bSMichal Hocko 	current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
193281627dfSChristoph Hellwig 	return 0;
194281627dfSChristoph Hellwig }
195281627dfSChristoph Hellwig 
196fc0063c4SChristoph Hellwig /*
1972813d682SChristoph Hellwig  * Update on-disk file size now that data has been written to disk.
198c59d87c4SChristoph Hellwig  */
199281627dfSChristoph Hellwig STATIC int
200e372843aSChristoph Hellwig __xfs_setfilesize(
2012ba66237SChristoph Hellwig 	struct xfs_inode	*ip,
2022ba66237SChristoph Hellwig 	struct xfs_trans	*tp,
2032ba66237SChristoph Hellwig 	xfs_off_t		offset,
2042ba66237SChristoph Hellwig 	size_t			size)
205c59d87c4SChristoph Hellwig {
206c59d87c4SChristoph Hellwig 	xfs_fsize_t		isize;
207c59d87c4SChristoph Hellwig 
208aa6bf01dSChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
2092ba66237SChristoph Hellwig 	isize = xfs_new_eof(ip, offset + size);
210281627dfSChristoph Hellwig 	if (!isize) {
211281627dfSChristoph Hellwig 		xfs_iunlock(ip, XFS_ILOCK_EXCL);
2124906e215SChristoph Hellwig 		xfs_trans_cancel(tp);
213281627dfSChristoph Hellwig 		return 0;
214c59d87c4SChristoph Hellwig 	}
215c59d87c4SChristoph Hellwig 
2162ba66237SChristoph Hellwig 	trace_xfs_setfilesize(ip, offset, size);
217281627dfSChristoph Hellwig 
218281627dfSChristoph Hellwig 	ip->i_d.di_size = isize;
219281627dfSChristoph Hellwig 	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
220281627dfSChristoph Hellwig 	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
221281627dfSChristoph Hellwig 
22270393313SChristoph Hellwig 	return xfs_trans_commit(tp);
223c59d87c4SChristoph Hellwig }
224c59d87c4SChristoph Hellwig 
225e372843aSChristoph Hellwig int
226e372843aSChristoph Hellwig xfs_setfilesize(
227e372843aSChristoph Hellwig 	struct xfs_inode	*ip,
228e372843aSChristoph Hellwig 	xfs_off_t		offset,
229e372843aSChristoph Hellwig 	size_t			size)
230e372843aSChristoph Hellwig {
231e372843aSChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
232e372843aSChristoph Hellwig 	struct xfs_trans	*tp;
233e372843aSChristoph Hellwig 	int			error;
234e372843aSChristoph Hellwig 
235e372843aSChristoph Hellwig 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp);
236e372843aSChristoph Hellwig 	if (error)
237e372843aSChristoph Hellwig 		return error;
238e372843aSChristoph Hellwig 
239e372843aSChristoph Hellwig 	return __xfs_setfilesize(ip, tp, offset, size);
240e372843aSChristoph Hellwig }
241e372843aSChristoph Hellwig 
2422ba66237SChristoph Hellwig STATIC int
2432ba66237SChristoph Hellwig xfs_setfilesize_ioend(
2440e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
2450e51a8e1SChristoph Hellwig 	int			error)
2462ba66237SChristoph Hellwig {
2472ba66237SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
2482ba66237SChristoph Hellwig 	struct xfs_trans	*tp = ioend->io_append_trans;
2492ba66237SChristoph Hellwig 
2502ba66237SChristoph Hellwig 	/*
2512ba66237SChristoph Hellwig 	 * The transaction may have been allocated in the I/O submission thread,
2522ba66237SChristoph Hellwig 	 * thus we need to mark ourselves as being in a transaction manually.
2532ba66237SChristoph Hellwig 	 * Similarly for freeze protection.
2542ba66237SChristoph Hellwig 	 */
2559070733bSMichal Hocko 	current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
256bee9182dSOleg Nesterov 	__sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
2572ba66237SChristoph Hellwig 
2585cb13dcdSZhaohongjiang 	/* we abort the update if there was an IO error */
2590e51a8e1SChristoph Hellwig 	if (error) {
2605cb13dcdSZhaohongjiang 		xfs_trans_cancel(tp);
2610e51a8e1SChristoph Hellwig 		return error;
2625cb13dcdSZhaohongjiang 	}
2635cb13dcdSZhaohongjiang 
264e372843aSChristoph Hellwig 	return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size);
2652ba66237SChristoph Hellwig }
2662ba66237SChristoph Hellwig 
267c59d87c4SChristoph Hellwig /*
268c59d87c4SChristoph Hellwig  * IO write completion.
269c59d87c4SChristoph Hellwig  */
270c59d87c4SChristoph Hellwig STATIC void
271c59d87c4SChristoph Hellwig xfs_end_io(
272c59d87c4SChristoph Hellwig 	struct work_struct *work)
273c59d87c4SChristoph Hellwig {
2740e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend =
2750e51a8e1SChristoph Hellwig 		container_of(work, struct xfs_ioend, io_work);
276c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(ioend->io_inode);
277787eb485SChristoph Hellwig 	xfs_off_t		offset = ioend->io_offset;
278787eb485SChristoph Hellwig 	size_t			size = ioend->io_size;
279*4e4cbee9SChristoph Hellwig 	int			error;
280c59d87c4SChristoph Hellwig 
281af055e37SBrian Foster 	/*
282787eb485SChristoph Hellwig 	 * Just clean up the in-memory strutures if the fs has been shut down.
283af055e37SBrian Foster 	 */
284787eb485SChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
2850e51a8e1SChristoph Hellwig 		error = -EIO;
28643caeb18SDarrick J. Wong 		goto done;
28743caeb18SDarrick J. Wong 	}
28843caeb18SDarrick J. Wong 
28943caeb18SDarrick J. Wong 	/*
290787eb485SChristoph Hellwig 	 * Clean up any COW blocks on an I/O error.
291c59d87c4SChristoph Hellwig 	 */
292*4e4cbee9SChristoph Hellwig 	error = blk_status_to_errno(ioend->io_bio->bi_status);
293787eb485SChristoph Hellwig 	if (unlikely(error)) {
294787eb485SChristoph Hellwig 		switch (ioend->io_type) {
295787eb485SChristoph Hellwig 		case XFS_IO_COW:
296787eb485SChristoph Hellwig 			xfs_reflink_cancel_cow_range(ip, offset, size, true);
297787eb485SChristoph Hellwig 			break;
298787eb485SChristoph Hellwig 		}
299787eb485SChristoph Hellwig 
3005cb13dcdSZhaohongjiang 		goto done;
301787eb485SChristoph Hellwig 	}
302787eb485SChristoph Hellwig 
303787eb485SChristoph Hellwig 	/*
304787eb485SChristoph Hellwig 	 * Success:  commit the COW or unwritten blocks if needed.
305787eb485SChristoph Hellwig 	 */
306787eb485SChristoph Hellwig 	switch (ioend->io_type) {
307787eb485SChristoph Hellwig 	case XFS_IO_COW:
308787eb485SChristoph Hellwig 		error = xfs_reflink_end_cow(ip, offset, size);
309787eb485SChristoph Hellwig 		break;
310787eb485SChristoph Hellwig 	case XFS_IO_UNWRITTEN:
311787eb485SChristoph Hellwig 		error = xfs_iomap_write_unwritten(ip, offset, size);
312787eb485SChristoph Hellwig 		break;
313787eb485SChristoph Hellwig 	default:
314787eb485SChristoph Hellwig 		ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans);
315787eb485SChristoph Hellwig 		break;
31684803fb7SChristoph Hellwig 	}
31784803fb7SChristoph Hellwig 
31804f658eeSChristoph Hellwig done:
319787eb485SChristoph Hellwig 	if (ioend->io_append_trans)
320787eb485SChristoph Hellwig 		error = xfs_setfilesize_ioend(ioend, error);
3210e51a8e1SChristoph Hellwig 	xfs_destroy_ioend(ioend, error);
322c59d87c4SChristoph Hellwig }
323c59d87c4SChristoph Hellwig 
3240e51a8e1SChristoph Hellwig STATIC void
3250e51a8e1SChristoph Hellwig xfs_end_bio(
3260e51a8e1SChristoph Hellwig 	struct bio		*bio)
327c59d87c4SChristoph Hellwig {
3280e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend = bio->bi_private;
3290e51a8e1SChristoph Hellwig 	struct xfs_mount	*mp = XFS_I(ioend->io_inode)->i_mount;
330c59d87c4SChristoph Hellwig 
33143caeb18SDarrick J. Wong 	if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW)
3320e51a8e1SChristoph Hellwig 		queue_work(mp->m_unwritten_workqueue, &ioend->io_work);
3330e51a8e1SChristoph Hellwig 	else if (ioend->io_append_trans)
3340e51a8e1SChristoph Hellwig 		queue_work(mp->m_data_workqueue, &ioend->io_work);
3350e51a8e1SChristoph Hellwig 	else
336*4e4cbee9SChristoph Hellwig 		xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status));
337c59d87c4SChristoph Hellwig }
338c59d87c4SChristoph Hellwig 
339c59d87c4SChristoph Hellwig STATIC int
340c59d87c4SChristoph Hellwig xfs_map_blocks(
341c59d87c4SChristoph Hellwig 	struct inode		*inode,
342c59d87c4SChristoph Hellwig 	loff_t			offset,
343c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
344988ef927SDave Chinner 	int			type)
345c59d87c4SChristoph Hellwig {
346c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
347c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
34893407472SFabian Frederick 	ssize_t			count = i_blocksize(inode);
349c59d87c4SChristoph Hellwig 	xfs_fileoff_t		offset_fsb, end_fsb;
350c59d87c4SChristoph Hellwig 	int			error = 0;
351c59d87c4SChristoph Hellwig 	int			bmapi_flags = XFS_BMAPI_ENTIRE;
352c59d87c4SChristoph Hellwig 	int			nimaps = 1;
353c59d87c4SChristoph Hellwig 
354c59d87c4SChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(mp))
355b474c7aeSEric Sandeen 		return -EIO;
356c59d87c4SChristoph Hellwig 
357ef473667SDarrick J. Wong 	ASSERT(type != XFS_IO_COW);
3580d882a36SAlain Renaud 	if (type == XFS_IO_UNWRITTEN)
359c59d87c4SChristoph Hellwig 		bmapi_flags |= XFS_BMAPI_IGSTATE;
360c59d87c4SChristoph Hellwig 
361c59d87c4SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_SHARED);
362c59d87c4SChristoph Hellwig 	ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
363c59d87c4SChristoph Hellwig 	       (ip->i_df.if_flags & XFS_IFEXTENTS));
364d2c28191SDave Chinner 	ASSERT(offset <= mp->m_super->s_maxbytes);
365c59d87c4SChristoph Hellwig 
366d2c28191SDave Chinner 	if (offset + count > mp->m_super->s_maxbytes)
367d2c28191SDave Chinner 		count = mp->m_super->s_maxbytes - offset;
368c59d87c4SChristoph Hellwig 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
369c59d87c4SChristoph Hellwig 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
3705c8ed202SDave Chinner 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
3715c8ed202SDave Chinner 				imap, &nimaps, bmapi_flags);
372ef473667SDarrick J. Wong 	/*
373ef473667SDarrick J. Wong 	 * Truncate an overwrite extent if there's a pending CoW
374ef473667SDarrick J. Wong 	 * reservation before the end of this extent.  This forces us
375ef473667SDarrick J. Wong 	 * to come back to writepage to take care of the CoW.
376ef473667SDarrick J. Wong 	 */
377ef473667SDarrick J. Wong 	if (nimaps && type == XFS_IO_OVERWRITE)
378ef473667SDarrick J. Wong 		xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap);
379c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
380c59d87c4SChristoph Hellwig 
381c59d87c4SChristoph Hellwig 	if (error)
3822451337dSDave Chinner 		return error;
383c59d87c4SChristoph Hellwig 
3840d882a36SAlain Renaud 	if (type == XFS_IO_DELALLOC &&
385c59d87c4SChristoph Hellwig 	    (!nimaps || isnullstartblock(imap->br_startblock))) {
38660b4984fSDarrick J. Wong 		error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset,
38760b4984fSDarrick J. Wong 				imap);
388c59d87c4SChristoph Hellwig 		if (!error)
389ef473667SDarrick J. Wong 			trace_xfs_map_blocks_alloc(ip, offset, count, type, imap);
3902451337dSDave Chinner 		return error;
391c59d87c4SChristoph Hellwig 	}
392c59d87c4SChristoph Hellwig 
393c59d87c4SChristoph Hellwig #ifdef DEBUG
3940d882a36SAlain Renaud 	if (type == XFS_IO_UNWRITTEN) {
395c59d87c4SChristoph Hellwig 		ASSERT(nimaps);
396c59d87c4SChristoph Hellwig 		ASSERT(imap->br_startblock != HOLESTARTBLOCK);
397c59d87c4SChristoph Hellwig 		ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
398c59d87c4SChristoph Hellwig 	}
399c59d87c4SChristoph Hellwig #endif
400c59d87c4SChristoph Hellwig 	if (nimaps)
401c59d87c4SChristoph Hellwig 		trace_xfs_map_blocks_found(ip, offset, count, type, imap);
402c59d87c4SChristoph Hellwig 	return 0;
403c59d87c4SChristoph Hellwig }
404c59d87c4SChristoph Hellwig 
405fbcc0256SDave Chinner STATIC bool
406c59d87c4SChristoph Hellwig xfs_imap_valid(
407c59d87c4SChristoph Hellwig 	struct inode		*inode,
408c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
409c59d87c4SChristoph Hellwig 	xfs_off_t		offset)
410c59d87c4SChristoph Hellwig {
411c59d87c4SChristoph Hellwig 	offset >>= inode->i_blkbits;
412c59d87c4SChristoph Hellwig 
413c59d87c4SChristoph Hellwig 	return offset >= imap->br_startoff &&
414c59d87c4SChristoph Hellwig 		offset < imap->br_startoff + imap->br_blockcount;
415c59d87c4SChristoph Hellwig }
416c59d87c4SChristoph Hellwig 
417c59d87c4SChristoph Hellwig STATIC void
418c59d87c4SChristoph Hellwig xfs_start_buffer_writeback(
419c59d87c4SChristoph Hellwig 	struct buffer_head	*bh)
420c59d87c4SChristoph Hellwig {
421c59d87c4SChristoph Hellwig 	ASSERT(buffer_mapped(bh));
422c59d87c4SChristoph Hellwig 	ASSERT(buffer_locked(bh));
423c59d87c4SChristoph Hellwig 	ASSERT(!buffer_delay(bh));
424c59d87c4SChristoph Hellwig 	ASSERT(!buffer_unwritten(bh));
425c59d87c4SChristoph Hellwig 
426c59d87c4SChristoph Hellwig 	mark_buffer_async_write(bh);
427c59d87c4SChristoph Hellwig 	set_buffer_uptodate(bh);
428c59d87c4SChristoph Hellwig 	clear_buffer_dirty(bh);
429c59d87c4SChristoph Hellwig }
430c59d87c4SChristoph Hellwig 
431c59d87c4SChristoph Hellwig STATIC void
432c59d87c4SChristoph Hellwig xfs_start_page_writeback(
433c59d87c4SChristoph Hellwig 	struct page		*page,
434e10de372SDave Chinner 	int			clear_dirty)
435c59d87c4SChristoph Hellwig {
436c59d87c4SChristoph Hellwig 	ASSERT(PageLocked(page));
437c59d87c4SChristoph Hellwig 	ASSERT(!PageWriteback(page));
4380d085a52SDave Chinner 
4390d085a52SDave Chinner 	/*
4400d085a52SDave Chinner 	 * if the page was not fully cleaned, we need to ensure that the higher
4410d085a52SDave Chinner 	 * layers come back to it correctly. That means we need to keep the page
4420d085a52SDave Chinner 	 * dirty, and for WB_SYNC_ALL writeback we need to ensure the
4430d085a52SDave Chinner 	 * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to
4440d085a52SDave Chinner 	 * write this page in this writeback sweep will be made.
4450d085a52SDave Chinner 	 */
4460d085a52SDave Chinner 	if (clear_dirty) {
447c59d87c4SChristoph Hellwig 		clear_page_dirty_for_io(page);
448c59d87c4SChristoph Hellwig 		set_page_writeback(page);
4490d085a52SDave Chinner 	} else
4500d085a52SDave Chinner 		set_page_writeback_keepwrite(page);
4510d085a52SDave Chinner 
452c59d87c4SChristoph Hellwig 	unlock_page(page);
453c59d87c4SChristoph Hellwig }
454c59d87c4SChristoph Hellwig 
455c7c1a7d8SZhi Yong Wu static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
456c59d87c4SChristoph Hellwig {
457c59d87c4SChristoph Hellwig 	return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh));
458c59d87c4SChristoph Hellwig }
459c59d87c4SChristoph Hellwig 
460c59d87c4SChristoph Hellwig /*
461bb18782aSDave Chinner  * Submit the bio for an ioend. We are passed an ioend with a bio attached to
462bb18782aSDave Chinner  * it, and we submit that bio. The ioend may be used for multiple bio
463bb18782aSDave Chinner  * submissions, so we only want to allocate an append transaction for the ioend
464bb18782aSDave Chinner  * once. In the case of multiple bio submission, each bio will take an IO
465bb18782aSDave Chinner  * reference to the ioend to ensure that the ioend completion is only done once
466bb18782aSDave Chinner  * all bios have been submitted and the ioend is really done.
4677bf7f352SDave Chinner  *
4687bf7f352SDave Chinner  * If @fail is non-zero, it means that we have a situation where some part of
4697bf7f352SDave Chinner  * the submission process has failed after we have marked paged for writeback
470bb18782aSDave Chinner  * and unlocked them. In this situation, we need to fail the bio and ioend
471bb18782aSDave Chinner  * rather than submit it to IO. This typically only happens on a filesystem
472bb18782aSDave Chinner  * shutdown.
473c59d87c4SChristoph Hellwig  */
474e10de372SDave Chinner STATIC int
475c59d87c4SChristoph Hellwig xfs_submit_ioend(
476c59d87c4SChristoph Hellwig 	struct writeback_control *wbc,
4770e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
478e10de372SDave Chinner 	int			status)
479c59d87c4SChristoph Hellwig {
4805eda4300SDarrick J. Wong 	/* Convert CoW extents to regular */
4815eda4300SDarrick J. Wong 	if (!status && ioend->io_type == XFS_IO_COW) {
4825eda4300SDarrick J. Wong 		status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode),
4835eda4300SDarrick J. Wong 				ioend->io_offset, ioend->io_size);
4845eda4300SDarrick J. Wong 	}
4855eda4300SDarrick J. Wong 
486e10de372SDave Chinner 	/* Reserve log space if we might write beyond the on-disk inode size. */
487e10de372SDave Chinner 	if (!status &&
4880e51a8e1SChristoph Hellwig 	    ioend->io_type != XFS_IO_UNWRITTEN &&
489bb18782aSDave Chinner 	    xfs_ioend_is_append(ioend) &&
490bb18782aSDave Chinner 	    !ioend->io_append_trans)
491e10de372SDave Chinner 		status = xfs_setfilesize_trans_alloc(ioend);
492bb18782aSDave Chinner 
4930e51a8e1SChristoph Hellwig 	ioend->io_bio->bi_private = ioend;
4940e51a8e1SChristoph Hellwig 	ioend->io_bio->bi_end_io = xfs_end_bio;
4957637241eSJens Axboe 	ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
49670fd7614SChristoph Hellwig 
4977bf7f352SDave Chinner 	/*
4987bf7f352SDave Chinner 	 * If we are failing the IO now, just mark the ioend with an
4997bf7f352SDave Chinner 	 * error and finish it. This will run IO completion immediately
5007bf7f352SDave Chinner 	 * as there is only one reference to the ioend at this point in
5017bf7f352SDave Chinner 	 * time.
5027bf7f352SDave Chinner 	 */
503e10de372SDave Chinner 	if (status) {
504*4e4cbee9SChristoph Hellwig 		ioend->io_bio->bi_status = errno_to_blk_status(status);
5050e51a8e1SChristoph Hellwig 		bio_endio(ioend->io_bio);
506e10de372SDave Chinner 		return status;
5077bf7f352SDave Chinner 	}
5087bf7f352SDave Chinner 
5094e49ea4aSMike Christie 	submit_bio(ioend->io_bio);
510e10de372SDave Chinner 	return 0;
511c59d87c4SChristoph Hellwig }
512c59d87c4SChristoph Hellwig 
5130e51a8e1SChristoph Hellwig static void
5140e51a8e1SChristoph Hellwig xfs_init_bio_from_bh(
5150e51a8e1SChristoph Hellwig 	struct bio		*bio,
5160e51a8e1SChristoph Hellwig 	struct buffer_head	*bh)
5170e51a8e1SChristoph Hellwig {
5180e51a8e1SChristoph Hellwig 	bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
5190e51a8e1SChristoph Hellwig 	bio->bi_bdev = bh->b_bdev;
5200e51a8e1SChristoph Hellwig }
5210e51a8e1SChristoph Hellwig 
5220e51a8e1SChristoph Hellwig static struct xfs_ioend *
5230e51a8e1SChristoph Hellwig xfs_alloc_ioend(
5240e51a8e1SChristoph Hellwig 	struct inode		*inode,
5250e51a8e1SChristoph Hellwig 	unsigned int		type,
5260e51a8e1SChristoph Hellwig 	xfs_off_t		offset,
5270e51a8e1SChristoph Hellwig 	struct buffer_head	*bh)
5280e51a8e1SChristoph Hellwig {
5290e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend;
5300e51a8e1SChristoph Hellwig 	struct bio		*bio;
5310e51a8e1SChristoph Hellwig 
5320e51a8e1SChristoph Hellwig 	bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset);
5330e51a8e1SChristoph Hellwig 	xfs_init_bio_from_bh(bio, bh);
5340e51a8e1SChristoph Hellwig 
5350e51a8e1SChristoph Hellwig 	ioend = container_of(bio, struct xfs_ioend, io_inline_bio);
5360e51a8e1SChristoph Hellwig 	INIT_LIST_HEAD(&ioend->io_list);
5370e51a8e1SChristoph Hellwig 	ioend->io_type = type;
5380e51a8e1SChristoph Hellwig 	ioend->io_inode = inode;
5390e51a8e1SChristoph Hellwig 	ioend->io_size = 0;
5400e51a8e1SChristoph Hellwig 	ioend->io_offset = offset;
5410e51a8e1SChristoph Hellwig 	INIT_WORK(&ioend->io_work, xfs_end_io);
5420e51a8e1SChristoph Hellwig 	ioend->io_append_trans = NULL;
5430e51a8e1SChristoph Hellwig 	ioend->io_bio = bio;
5440e51a8e1SChristoph Hellwig 	return ioend;
5450e51a8e1SChristoph Hellwig }
5460e51a8e1SChristoph Hellwig 
5470e51a8e1SChristoph Hellwig /*
5480e51a8e1SChristoph Hellwig  * Allocate a new bio, and chain the old bio to the new one.
5490e51a8e1SChristoph Hellwig  *
5500e51a8e1SChristoph Hellwig  * Note that we have to do perform the chaining in this unintuitive order
5510e51a8e1SChristoph Hellwig  * so that the bi_private linkage is set up in the right direction for the
5520e51a8e1SChristoph Hellwig  * traversal in xfs_destroy_ioend().
5530e51a8e1SChristoph Hellwig  */
5540e51a8e1SChristoph Hellwig static void
5550e51a8e1SChristoph Hellwig xfs_chain_bio(
5560e51a8e1SChristoph Hellwig 	struct xfs_ioend	*ioend,
5570e51a8e1SChristoph Hellwig 	struct writeback_control *wbc,
5580e51a8e1SChristoph Hellwig 	struct buffer_head	*bh)
5590e51a8e1SChristoph Hellwig {
5600e51a8e1SChristoph Hellwig 	struct bio *new;
5610e51a8e1SChristoph Hellwig 
5620e51a8e1SChristoph Hellwig 	new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
5630e51a8e1SChristoph Hellwig 	xfs_init_bio_from_bh(new, bh);
5640e51a8e1SChristoph Hellwig 
5650e51a8e1SChristoph Hellwig 	bio_chain(ioend->io_bio, new);
5660e51a8e1SChristoph Hellwig 	bio_get(ioend->io_bio);		/* for xfs_destroy_ioend */
5677637241eSJens Axboe 	ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
5684e49ea4aSMike Christie 	submit_bio(ioend->io_bio);
5690e51a8e1SChristoph Hellwig 	ioend->io_bio = new;
5700e51a8e1SChristoph Hellwig }
5710e51a8e1SChristoph Hellwig 
572c59d87c4SChristoph Hellwig /*
573c59d87c4SChristoph Hellwig  * Test to see if we've been building up a completion structure for
574c59d87c4SChristoph Hellwig  * earlier buffers -- if so, we try to append to this ioend if we
575c59d87c4SChristoph Hellwig  * can, otherwise we finish off any current ioend and start another.
576e10de372SDave Chinner  * Return the ioend we finished off so that the caller can submit it
577e10de372SDave Chinner  * once it has finished processing the dirty page.
578c59d87c4SChristoph Hellwig  */
579c59d87c4SChristoph Hellwig STATIC void
580c59d87c4SChristoph Hellwig xfs_add_to_ioend(
581c59d87c4SChristoph Hellwig 	struct inode		*inode,
582c59d87c4SChristoph Hellwig 	struct buffer_head	*bh,
583c59d87c4SChristoph Hellwig 	xfs_off_t		offset,
584e10de372SDave Chinner 	struct xfs_writepage_ctx *wpc,
585bb18782aSDave Chinner 	struct writeback_control *wbc,
586e10de372SDave Chinner 	struct list_head	*iolist)
587c59d87c4SChristoph Hellwig {
588fbcc0256SDave Chinner 	if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
5890df61da8SDarrick J. Wong 	    bh->b_blocknr != wpc->last_block + 1 ||
5900df61da8SDarrick J. Wong 	    offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
591e10de372SDave Chinner 		if (wpc->ioend)
592e10de372SDave Chinner 			list_add(&wpc->ioend->io_list, iolist);
5930e51a8e1SChristoph Hellwig 		wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh);
594c59d87c4SChristoph Hellwig 	}
595c59d87c4SChristoph Hellwig 
5960e51a8e1SChristoph Hellwig 	/*
5970e51a8e1SChristoph Hellwig 	 * If the buffer doesn't fit into the bio we need to allocate a new
5980e51a8e1SChristoph Hellwig 	 * one.  This shouldn't happen more than once for a given buffer.
5990e51a8e1SChristoph Hellwig 	 */
6000e51a8e1SChristoph Hellwig 	while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size)
6010e51a8e1SChristoph Hellwig 		xfs_chain_bio(wpc->ioend, wbc, bh);
602bb18782aSDave Chinner 
603fbcc0256SDave Chinner 	wpc->ioend->io_size += bh->b_size;
604fbcc0256SDave Chinner 	wpc->last_block = bh->b_blocknr;
605e10de372SDave Chinner 	xfs_start_buffer_writeback(bh);
606c59d87c4SChristoph Hellwig }
607c59d87c4SChristoph Hellwig 
608c59d87c4SChristoph Hellwig STATIC void
609c59d87c4SChristoph Hellwig xfs_map_buffer(
610c59d87c4SChristoph Hellwig 	struct inode		*inode,
611c59d87c4SChristoph Hellwig 	struct buffer_head	*bh,
612c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
613c59d87c4SChristoph Hellwig 	xfs_off_t		offset)
614c59d87c4SChristoph Hellwig {
615c59d87c4SChristoph Hellwig 	sector_t		bn;
616c59d87c4SChristoph Hellwig 	struct xfs_mount	*m = XFS_I(inode)->i_mount;
617c59d87c4SChristoph Hellwig 	xfs_off_t		iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff);
618c59d87c4SChristoph Hellwig 	xfs_daddr_t		iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock);
619c59d87c4SChristoph Hellwig 
620c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
621c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
622c59d87c4SChristoph Hellwig 
623c59d87c4SChristoph Hellwig 	bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) +
624c59d87c4SChristoph Hellwig 	      ((offset - iomap_offset) >> inode->i_blkbits);
625c59d87c4SChristoph Hellwig 
626c59d87c4SChristoph Hellwig 	ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode)));
627c59d87c4SChristoph Hellwig 
628c59d87c4SChristoph Hellwig 	bh->b_blocknr = bn;
629c59d87c4SChristoph Hellwig 	set_buffer_mapped(bh);
630c59d87c4SChristoph Hellwig }
631c59d87c4SChristoph Hellwig 
632c59d87c4SChristoph Hellwig STATIC void
633c59d87c4SChristoph Hellwig xfs_map_at_offset(
634c59d87c4SChristoph Hellwig 	struct inode		*inode,
635c59d87c4SChristoph Hellwig 	struct buffer_head	*bh,
636c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	*imap,
637c59d87c4SChristoph Hellwig 	xfs_off_t		offset)
638c59d87c4SChristoph Hellwig {
639c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != HOLESTARTBLOCK);
640c59d87c4SChristoph Hellwig 	ASSERT(imap->br_startblock != DELAYSTARTBLOCK);
641c59d87c4SChristoph Hellwig 
642c59d87c4SChristoph Hellwig 	xfs_map_buffer(inode, bh, imap, offset);
643c59d87c4SChristoph Hellwig 	set_buffer_mapped(bh);
644c59d87c4SChristoph Hellwig 	clear_buffer_delay(bh);
645c59d87c4SChristoph Hellwig 	clear_buffer_unwritten(bh);
646c59d87c4SChristoph Hellwig }
647c59d87c4SChristoph Hellwig 
648c59d87c4SChristoph Hellwig /*
649a49935f2SDave Chinner  * Test if a given page contains at least one buffer of a given @type.
650a49935f2SDave Chinner  * If @check_all_buffers is true, then we walk all the buffers in the page to
651a49935f2SDave Chinner  * try to find one of the type passed in. If it is not set, then the caller only
652a49935f2SDave Chinner  * needs to check the first buffer on the page for a match.
653c59d87c4SChristoph Hellwig  */
654a49935f2SDave Chinner STATIC bool
6556ffc4db5SDave Chinner xfs_check_page_type(
656c59d87c4SChristoph Hellwig 	struct page		*page,
657a49935f2SDave Chinner 	unsigned int		type,
658a49935f2SDave Chinner 	bool			check_all_buffers)
659c59d87c4SChristoph Hellwig {
660a49935f2SDave Chinner 	struct buffer_head	*bh;
661a49935f2SDave Chinner 	struct buffer_head	*head;
662c59d87c4SChristoph Hellwig 
663a49935f2SDave Chinner 	if (PageWriteback(page))
664a49935f2SDave Chinner 		return false;
665a49935f2SDave Chinner 	if (!page->mapping)
666a49935f2SDave Chinner 		return false;
667a49935f2SDave Chinner 	if (!page_has_buffers(page))
668a49935f2SDave Chinner 		return false;
669c59d87c4SChristoph Hellwig 
670c59d87c4SChristoph Hellwig 	bh = head = page_buffers(page);
671c59d87c4SChristoph Hellwig 	do {
672a49935f2SDave Chinner 		if (buffer_unwritten(bh)) {
673a49935f2SDave Chinner 			if (type == XFS_IO_UNWRITTEN)
674a49935f2SDave Chinner 				return true;
675a49935f2SDave Chinner 		} else if (buffer_delay(bh)) {
676805eeb8eSDan Carpenter 			if (type == XFS_IO_DELALLOC)
677a49935f2SDave Chinner 				return true;
678a49935f2SDave Chinner 		} else if (buffer_dirty(bh) && buffer_mapped(bh)) {
679805eeb8eSDan Carpenter 			if (type == XFS_IO_OVERWRITE)
680a49935f2SDave Chinner 				return true;
681a49935f2SDave Chinner 		}
682a49935f2SDave Chinner 
683a49935f2SDave Chinner 		/* If we are only checking the first buffer, we are done now. */
684a49935f2SDave Chinner 		if (!check_all_buffers)
685c59d87c4SChristoph Hellwig 			break;
686c59d87c4SChristoph Hellwig 	} while ((bh = bh->b_this_page) != head);
687c59d87c4SChristoph Hellwig 
688a49935f2SDave Chinner 	return false;
689c59d87c4SChristoph Hellwig }
690c59d87c4SChristoph Hellwig 
691c59d87c4SChristoph Hellwig STATIC void
692c59d87c4SChristoph Hellwig xfs_vm_invalidatepage(
693c59d87c4SChristoph Hellwig 	struct page		*page,
694d47992f8SLukas Czerner 	unsigned int		offset,
695d47992f8SLukas Czerner 	unsigned int		length)
696c59d87c4SChristoph Hellwig {
69734097dfeSLukas Czerner 	trace_xfs_invalidatepage(page->mapping->host, page, offset,
69834097dfeSLukas Czerner 				 length);
69934097dfeSLukas Czerner 	block_invalidatepage(page, offset, length);
700c59d87c4SChristoph Hellwig }
701c59d87c4SChristoph Hellwig 
702c59d87c4SChristoph Hellwig /*
703c59d87c4SChristoph Hellwig  * If the page has delalloc buffers on it, we need to punch them out before we
704c59d87c4SChristoph Hellwig  * invalidate the page. If we don't, we leave a stale delalloc mapping on the
705c59d87c4SChristoph Hellwig  * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read
706c59d87c4SChristoph Hellwig  * is done on that same region - the delalloc extent is returned when none is
707c59d87c4SChristoph Hellwig  * supposed to be there.
708c59d87c4SChristoph Hellwig  *
709c59d87c4SChristoph Hellwig  * We prevent this by truncating away the delalloc regions on the page before
710c59d87c4SChristoph Hellwig  * invalidating it. Because they are delalloc, we can do this without needing a
711c59d87c4SChristoph Hellwig  * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this
712c59d87c4SChristoph Hellwig  * truncation without a transaction as there is no space left for block
713c59d87c4SChristoph Hellwig  * reservation (typically why we see a ENOSPC in writeback).
714c59d87c4SChristoph Hellwig  *
715c59d87c4SChristoph Hellwig  * This is not a performance critical path, so for now just do the punching a
716c59d87c4SChristoph Hellwig  * buffer head at a time.
717c59d87c4SChristoph Hellwig  */
718c59d87c4SChristoph Hellwig STATIC void
719c59d87c4SChristoph Hellwig xfs_aops_discard_page(
720c59d87c4SChristoph Hellwig 	struct page		*page)
721c59d87c4SChristoph Hellwig {
722c59d87c4SChristoph Hellwig 	struct inode		*inode = page->mapping->host;
723c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
724c59d87c4SChristoph Hellwig 	struct buffer_head	*bh, *head;
725c59d87c4SChristoph Hellwig 	loff_t			offset = page_offset(page);
726c59d87c4SChristoph Hellwig 
727a49935f2SDave Chinner 	if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true))
728c59d87c4SChristoph Hellwig 		goto out_invalidate;
729c59d87c4SChristoph Hellwig 
730c59d87c4SChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(ip->i_mount))
731c59d87c4SChristoph Hellwig 		goto out_invalidate;
732c59d87c4SChristoph Hellwig 
733c59d87c4SChristoph Hellwig 	xfs_alert(ip->i_mount,
734c59d87c4SChristoph Hellwig 		"page discard on page %p, inode 0x%llx, offset %llu.",
735c59d87c4SChristoph Hellwig 			page, ip->i_ino, offset);
736c59d87c4SChristoph Hellwig 
737c59d87c4SChristoph Hellwig 	xfs_ilock(ip, XFS_ILOCK_EXCL);
738c59d87c4SChristoph Hellwig 	bh = head = page_buffers(page);
739c59d87c4SChristoph Hellwig 	do {
740c59d87c4SChristoph Hellwig 		int		error;
741c59d87c4SChristoph Hellwig 		xfs_fileoff_t	start_fsb;
742c59d87c4SChristoph Hellwig 
743c59d87c4SChristoph Hellwig 		if (!buffer_delay(bh))
744c59d87c4SChristoph Hellwig 			goto next_buffer;
745c59d87c4SChristoph Hellwig 
746c59d87c4SChristoph Hellwig 		start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
747c59d87c4SChristoph Hellwig 		error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1);
748c59d87c4SChristoph Hellwig 		if (error) {
749c59d87c4SChristoph Hellwig 			/* something screwed, just bail */
750c59d87c4SChristoph Hellwig 			if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) {
751c59d87c4SChristoph Hellwig 				xfs_alert(ip->i_mount,
752c59d87c4SChristoph Hellwig 			"page discard unable to remove delalloc mapping.");
753c59d87c4SChristoph Hellwig 			}
754c59d87c4SChristoph Hellwig 			break;
755c59d87c4SChristoph Hellwig 		}
756c59d87c4SChristoph Hellwig next_buffer:
75793407472SFabian Frederick 		offset += i_blocksize(inode);
758c59d87c4SChristoph Hellwig 
759c59d87c4SChristoph Hellwig 	} while ((bh = bh->b_this_page) != head);
760c59d87c4SChristoph Hellwig 
761c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, XFS_ILOCK_EXCL);
762c59d87c4SChristoph Hellwig out_invalidate:
76309cbfeafSKirill A. Shutemov 	xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
764c59d87c4SChristoph Hellwig 	return;
765c59d87c4SChristoph Hellwig }
766c59d87c4SChristoph Hellwig 
767ef473667SDarrick J. Wong static int
768ef473667SDarrick J. Wong xfs_map_cow(
769ef473667SDarrick J. Wong 	struct xfs_writepage_ctx *wpc,
770ef473667SDarrick J. Wong 	struct inode		*inode,
771ef473667SDarrick J. Wong 	loff_t			offset,
772ef473667SDarrick J. Wong 	unsigned int		*new_type)
773ef473667SDarrick J. Wong {
774ef473667SDarrick J. Wong 	struct xfs_inode	*ip = XFS_I(inode);
775ef473667SDarrick J. Wong 	struct xfs_bmbt_irec	imap;
776092d5d9dSChristoph Hellwig 	bool			is_cow = false;
777ef473667SDarrick J. Wong 	int			error;
778ef473667SDarrick J. Wong 
779ef473667SDarrick J. Wong 	/*
780ef473667SDarrick J. Wong 	 * If we already have a valid COW mapping keep using it.
781ef473667SDarrick J. Wong 	 */
782ef473667SDarrick J. Wong 	if (wpc->io_type == XFS_IO_COW) {
783ef473667SDarrick J. Wong 		wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset);
784ef473667SDarrick J. Wong 		if (wpc->imap_valid) {
785ef473667SDarrick J. Wong 			*new_type = XFS_IO_COW;
786ef473667SDarrick J. Wong 			return 0;
787ef473667SDarrick J. Wong 		}
788ef473667SDarrick J. Wong 	}
789ef473667SDarrick J. Wong 
790ef473667SDarrick J. Wong 	/*
791ef473667SDarrick J. Wong 	 * Else we need to check if there is a COW mapping at this offset.
792ef473667SDarrick J. Wong 	 */
793ef473667SDarrick J. Wong 	xfs_ilock(ip, XFS_ILOCK_SHARED);
794092d5d9dSChristoph Hellwig 	is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap);
795ef473667SDarrick J. Wong 	xfs_iunlock(ip, XFS_ILOCK_SHARED);
796ef473667SDarrick J. Wong 
797ef473667SDarrick J. Wong 	if (!is_cow)
798ef473667SDarrick J. Wong 		return 0;
799ef473667SDarrick J. Wong 
800ef473667SDarrick J. Wong 	/*
801ef473667SDarrick J. Wong 	 * And if the COW mapping has a delayed extent here we need to
802ef473667SDarrick J. Wong 	 * allocate real space for it now.
803ef473667SDarrick J. Wong 	 */
804092d5d9dSChristoph Hellwig 	if (isnullstartblock(imap.br_startblock)) {
805ef473667SDarrick J. Wong 		error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset,
806ef473667SDarrick J. Wong 				&imap);
807ef473667SDarrick J. Wong 		if (error)
808ef473667SDarrick J. Wong 			return error;
809ef473667SDarrick J. Wong 	}
810ef473667SDarrick J. Wong 
811ef473667SDarrick J. Wong 	wpc->io_type = *new_type = XFS_IO_COW;
812ef473667SDarrick J. Wong 	wpc->imap_valid = true;
813ef473667SDarrick J. Wong 	wpc->imap = imap;
814ef473667SDarrick J. Wong 	return 0;
815ef473667SDarrick J. Wong }
816ef473667SDarrick J. Wong 
817c59d87c4SChristoph Hellwig /*
818e10de372SDave Chinner  * We implement an immediate ioend submission policy here to avoid needing to
819e10de372SDave Chinner  * chain multiple ioends and hence nest mempool allocations which can violate
820e10de372SDave Chinner  * forward progress guarantees we need to provide. The current ioend we are
821e10de372SDave Chinner  * adding buffers to is cached on the writepage context, and if the new buffer
822e10de372SDave Chinner  * does not append to the cached ioend it will create a new ioend and cache that
823e10de372SDave Chinner  * instead.
824e10de372SDave Chinner  *
825e10de372SDave Chinner  * If a new ioend is created and cached, the old ioend is returned and queued
826e10de372SDave Chinner  * locally for submission once the entire page is processed or an error has been
827e10de372SDave Chinner  * detected.  While ioends are submitted immediately after they are completed,
828e10de372SDave Chinner  * batching optimisations are provided by higher level block plugging.
829e10de372SDave Chinner  *
830e10de372SDave Chinner  * At the end of a writeback pass, there will be a cached ioend remaining on the
831e10de372SDave Chinner  * writepage context that the caller will need to submit.
832e10de372SDave Chinner  */
833bfce7d2eSDave Chinner static int
834bfce7d2eSDave Chinner xfs_writepage_map(
835bfce7d2eSDave Chinner 	struct xfs_writepage_ctx *wpc,
836e10de372SDave Chinner 	struct writeback_control *wbc,
837bfce7d2eSDave Chinner 	struct inode		*inode,
838bfce7d2eSDave Chinner 	struct page		*page,
839bfce7d2eSDave Chinner 	loff_t			offset,
840bfce7d2eSDave Chinner 	__uint64_t              end_offset)
841bfce7d2eSDave Chinner {
842e10de372SDave Chinner 	LIST_HEAD(submit_list);
843e10de372SDave Chinner 	struct xfs_ioend	*ioend, *next;
844bfce7d2eSDave Chinner 	struct buffer_head	*bh, *head;
84593407472SFabian Frederick 	ssize_t			len = i_blocksize(inode);
846bfce7d2eSDave Chinner 	int			error = 0;
847bfce7d2eSDave Chinner 	int			count = 0;
848e10de372SDave Chinner 	int			uptodate = 1;
849ef473667SDarrick J. Wong 	unsigned int		new_type;
850bfce7d2eSDave Chinner 
851bfce7d2eSDave Chinner 	bh = head = page_buffers(page);
852bfce7d2eSDave Chinner 	offset = page_offset(page);
853bfce7d2eSDave Chinner 	do {
854bfce7d2eSDave Chinner 		if (offset >= end_offset)
855bfce7d2eSDave Chinner 			break;
856bfce7d2eSDave Chinner 		if (!buffer_uptodate(bh))
857bfce7d2eSDave Chinner 			uptodate = 0;
858bfce7d2eSDave Chinner 
859bfce7d2eSDave Chinner 		/*
860bfce7d2eSDave Chinner 		 * set_page_dirty dirties all buffers in a page, independent
861bfce7d2eSDave Chinner 		 * of their state.  The dirty state however is entirely
862bfce7d2eSDave Chinner 		 * meaningless for holes (!mapped && uptodate), so skip
863bfce7d2eSDave Chinner 		 * buffers covering holes here.
864bfce7d2eSDave Chinner 		 */
865bfce7d2eSDave Chinner 		if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
866bfce7d2eSDave Chinner 			wpc->imap_valid = false;
867bfce7d2eSDave Chinner 			continue;
868bfce7d2eSDave Chinner 		}
869bfce7d2eSDave Chinner 
870ef473667SDarrick J. Wong 		if (buffer_unwritten(bh))
871ef473667SDarrick J. Wong 			new_type = XFS_IO_UNWRITTEN;
872ef473667SDarrick J. Wong 		else if (buffer_delay(bh))
873ef473667SDarrick J. Wong 			new_type = XFS_IO_DELALLOC;
874ef473667SDarrick J. Wong 		else if (buffer_uptodate(bh))
875ef473667SDarrick J. Wong 			new_type = XFS_IO_OVERWRITE;
876ef473667SDarrick J. Wong 		else {
877bfce7d2eSDave Chinner 			if (PageUptodate(page))
878bfce7d2eSDave Chinner 				ASSERT(buffer_mapped(bh));
879bfce7d2eSDave Chinner 			/*
880bfce7d2eSDave Chinner 			 * This buffer is not uptodate and will not be
881bfce7d2eSDave Chinner 			 * written to disk.  Ensure that we will put any
882bfce7d2eSDave Chinner 			 * subsequent writeable buffers into a new
883bfce7d2eSDave Chinner 			 * ioend.
884bfce7d2eSDave Chinner 			 */
885bfce7d2eSDave Chinner 			wpc->imap_valid = false;
886bfce7d2eSDave Chinner 			continue;
887bfce7d2eSDave Chinner 		}
888bfce7d2eSDave Chinner 
889ef473667SDarrick J. Wong 		if (xfs_is_reflink_inode(XFS_I(inode))) {
890ef473667SDarrick J. Wong 			error = xfs_map_cow(wpc, inode, offset, &new_type);
891ef473667SDarrick J. Wong 			if (error)
892ef473667SDarrick J. Wong 				goto out;
893ef473667SDarrick J. Wong 		}
894ef473667SDarrick J. Wong 
895ef473667SDarrick J. Wong 		if (wpc->io_type != new_type) {
896ef473667SDarrick J. Wong 			wpc->io_type = new_type;
897ef473667SDarrick J. Wong 			wpc->imap_valid = false;
898ef473667SDarrick J. Wong 		}
899ef473667SDarrick J. Wong 
900bfce7d2eSDave Chinner 		if (wpc->imap_valid)
901bfce7d2eSDave Chinner 			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
902bfce7d2eSDave Chinner 							 offset);
903bfce7d2eSDave Chinner 		if (!wpc->imap_valid) {
904bfce7d2eSDave Chinner 			error = xfs_map_blocks(inode, offset, &wpc->imap,
905bfce7d2eSDave Chinner 					     wpc->io_type);
906bfce7d2eSDave Chinner 			if (error)
907e10de372SDave Chinner 				goto out;
908bfce7d2eSDave Chinner 			wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
909bfce7d2eSDave Chinner 							 offset);
910bfce7d2eSDave Chinner 		}
911bfce7d2eSDave Chinner 		if (wpc->imap_valid) {
912bfce7d2eSDave Chinner 			lock_buffer(bh);
913bfce7d2eSDave Chinner 			if (wpc->io_type != XFS_IO_OVERWRITE)
914bfce7d2eSDave Chinner 				xfs_map_at_offset(inode, bh, &wpc->imap, offset);
915bb18782aSDave Chinner 			xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list);
916bfce7d2eSDave Chinner 			count++;
917bfce7d2eSDave Chinner 		}
918bfce7d2eSDave Chinner 
919bfce7d2eSDave Chinner 	} while (offset += len, ((bh = bh->b_this_page) != head));
920bfce7d2eSDave Chinner 
921bfce7d2eSDave Chinner 	if (uptodate && bh == head)
922bfce7d2eSDave Chinner 		SetPageUptodate(page);
923bfce7d2eSDave Chinner 
924e10de372SDave Chinner 	ASSERT(wpc->ioend || list_empty(&submit_list));
925bfce7d2eSDave Chinner 
926e10de372SDave Chinner out:
927bfce7d2eSDave Chinner 	/*
928e10de372SDave Chinner 	 * On error, we have to fail the ioend here because we have locked
929e10de372SDave Chinner 	 * buffers in the ioend. If we don't do this, we'll deadlock
930e10de372SDave Chinner 	 * invalidating the page as that tries to lock the buffers on the page.
931e10de372SDave Chinner 	 * Also, because we may have set pages under writeback, we have to make
932e10de372SDave Chinner 	 * sure we run IO completion to mark the error state of the IO
933e10de372SDave Chinner 	 * appropriately, so we can't cancel the ioend directly here. That means
934e10de372SDave Chinner 	 * we have to mark this page as under writeback if we included any
935e10de372SDave Chinner 	 * buffers from it in the ioend chain so that completion treats it
936e10de372SDave Chinner 	 * correctly.
937bfce7d2eSDave Chinner 	 *
938e10de372SDave Chinner 	 * If we didn't include the page in the ioend, the on error we can
939e10de372SDave Chinner 	 * simply discard and unlock it as there are no other users of the page
940e10de372SDave Chinner 	 * or it's buffers right now. The caller will still need to trigger
941e10de372SDave Chinner 	 * submission of outstanding ioends on the writepage context so they are
942e10de372SDave Chinner 	 * treated correctly on error.
943bfce7d2eSDave Chinner 	 */
944e10de372SDave Chinner 	if (count) {
945e10de372SDave Chinner 		xfs_start_page_writeback(page, !error);
946e10de372SDave Chinner 
947e10de372SDave Chinner 		/*
948e10de372SDave Chinner 		 * Preserve the original error if there was one, otherwise catch
949e10de372SDave Chinner 		 * submission errors here and propagate into subsequent ioend
950e10de372SDave Chinner 		 * submissions.
951e10de372SDave Chinner 		 */
952e10de372SDave Chinner 		list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
953e10de372SDave Chinner 			int error2;
954e10de372SDave Chinner 
955e10de372SDave Chinner 			list_del_init(&ioend->io_list);
956e10de372SDave Chinner 			error2 = xfs_submit_ioend(wbc, ioend, error);
957e10de372SDave Chinner 			if (error2 && !error)
958e10de372SDave Chinner 				error = error2;
959e10de372SDave Chinner 		}
960e10de372SDave Chinner 	} else if (error) {
961bfce7d2eSDave Chinner 		xfs_aops_discard_page(page);
962bfce7d2eSDave Chinner 		ClearPageUptodate(page);
963bfce7d2eSDave Chinner 		unlock_page(page);
964e10de372SDave Chinner 	} else {
965e10de372SDave Chinner 		/*
966e10de372SDave Chinner 		 * We can end up here with no error and nothing to write if we
967e10de372SDave Chinner 		 * race with a partial page truncate on a sub-page block sized
968e10de372SDave Chinner 		 * filesystem. In that case we need to mark the page clean.
969e10de372SDave Chinner 		 */
970e10de372SDave Chinner 		xfs_start_page_writeback(page, 1);
971e10de372SDave Chinner 		end_page_writeback(page);
972bfce7d2eSDave Chinner 	}
973e10de372SDave Chinner 
974bfce7d2eSDave Chinner 	mapping_set_error(page->mapping, error);
975bfce7d2eSDave Chinner 	return error;
976bfce7d2eSDave Chinner }
977bfce7d2eSDave Chinner 
978c59d87c4SChristoph Hellwig /*
979c59d87c4SChristoph Hellwig  * Write out a dirty page.
980c59d87c4SChristoph Hellwig  *
981c59d87c4SChristoph Hellwig  * For delalloc space on the page we need to allocate space and flush it.
982c59d87c4SChristoph Hellwig  * For unwritten space on the page we need to start the conversion to
983c59d87c4SChristoph Hellwig  * regular allocated space.
984c59d87c4SChristoph Hellwig  * For any other dirty buffer heads on the page we should flush them.
985c59d87c4SChristoph Hellwig  */
986c59d87c4SChristoph Hellwig STATIC int
987fbcc0256SDave Chinner xfs_do_writepage(
988c59d87c4SChristoph Hellwig 	struct page		*page,
989fbcc0256SDave Chinner 	struct writeback_control *wbc,
990fbcc0256SDave Chinner 	void			*data)
991c59d87c4SChristoph Hellwig {
992fbcc0256SDave Chinner 	struct xfs_writepage_ctx *wpc = data;
993c59d87c4SChristoph Hellwig 	struct inode		*inode = page->mapping->host;
994c59d87c4SChristoph Hellwig 	loff_t			offset;
995c59d87c4SChristoph Hellwig 	__uint64_t              end_offset;
996ad68972aSDave Chinner 	pgoff_t                 end_index;
997c59d87c4SChristoph Hellwig 
99834097dfeSLukas Czerner 	trace_xfs_writepage(inode, page, 0, 0);
999c59d87c4SChristoph Hellwig 
1000c59d87c4SChristoph Hellwig 	ASSERT(page_has_buffers(page));
1001c59d87c4SChristoph Hellwig 
1002c59d87c4SChristoph Hellwig 	/*
1003c59d87c4SChristoph Hellwig 	 * Refuse to write the page out if we are called from reclaim context.
1004c59d87c4SChristoph Hellwig 	 *
1005c59d87c4SChristoph Hellwig 	 * This avoids stack overflows when called from deeply used stacks in
1006c59d87c4SChristoph Hellwig 	 * random callers for direct reclaim or memcg reclaim.  We explicitly
1007c59d87c4SChristoph Hellwig 	 * allow reclaim from kswapd as the stack usage there is relatively low.
1008c59d87c4SChristoph Hellwig 	 *
100994054fa3SMel Gorman 	 * This should never happen except in the case of a VM regression so
101094054fa3SMel Gorman 	 * warn about it.
1011c59d87c4SChristoph Hellwig 	 */
101294054fa3SMel Gorman 	if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) ==
101394054fa3SMel Gorman 			PF_MEMALLOC))
1014c59d87c4SChristoph Hellwig 		goto redirty;
1015c59d87c4SChristoph Hellwig 
1016c59d87c4SChristoph Hellwig 	/*
1017c59d87c4SChristoph Hellwig 	 * Given that we do not allow direct reclaim to call us, we should
1018c59d87c4SChristoph Hellwig 	 * never be called while in a filesystem transaction.
1019c59d87c4SChristoph Hellwig 	 */
10209070733bSMichal Hocko 	if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
1021c59d87c4SChristoph Hellwig 		goto redirty;
1022c59d87c4SChristoph Hellwig 
10238695d27eSJie Liu 	/*
1024ad68972aSDave Chinner 	 * Is this page beyond the end of the file?
1025ad68972aSDave Chinner 	 *
10268695d27eSJie Liu 	 * The page index is less than the end_index, adjust the end_offset
10278695d27eSJie Liu 	 * to the highest offset that this page should represent.
10288695d27eSJie Liu 	 * -----------------------------------------------------
10298695d27eSJie Liu 	 * |			file mapping	       | <EOF> |
10308695d27eSJie Liu 	 * -----------------------------------------------------
10318695d27eSJie Liu 	 * | Page ... | Page N-2 | Page N-1 |  Page N  |       |
10328695d27eSJie Liu 	 * ^--------------------------------^----------|--------
10338695d27eSJie Liu 	 * |     desired writeback range    |      see else    |
10348695d27eSJie Liu 	 * ---------------------------------^------------------|
10358695d27eSJie Liu 	 */
1036ad68972aSDave Chinner 	offset = i_size_read(inode);
103709cbfeafSKirill A. Shutemov 	end_index = offset >> PAGE_SHIFT;
10388695d27eSJie Liu 	if (page->index < end_index)
103909cbfeafSKirill A. Shutemov 		end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
10408695d27eSJie Liu 	else {
10418695d27eSJie Liu 		/*
10428695d27eSJie Liu 		 * Check whether the page to write out is beyond or straddles
10438695d27eSJie Liu 		 * i_size or not.
10448695d27eSJie Liu 		 * -------------------------------------------------------
10458695d27eSJie Liu 		 * |		file mapping		        | <EOF>  |
10468695d27eSJie Liu 		 * -------------------------------------------------------
10478695d27eSJie Liu 		 * | Page ... | Page N-2 | Page N-1 |  Page N   | Beyond |
10488695d27eSJie Liu 		 * ^--------------------------------^-----------|---------
10498695d27eSJie Liu 		 * |				    |      Straddles     |
10508695d27eSJie Liu 		 * ---------------------------------^-----------|--------|
10518695d27eSJie Liu 		 */
105209cbfeafSKirill A. Shutemov 		unsigned offset_into_page = offset & (PAGE_SIZE - 1);
10536b7a03f0SChristoph Hellwig 
10546b7a03f0SChristoph Hellwig 		/*
1055ff9a28f6SJan Kara 		 * Skip the page if it is fully outside i_size, e.g. due to a
1056ff9a28f6SJan Kara 		 * truncate operation that is in progress. We must redirty the
1057ff9a28f6SJan Kara 		 * page so that reclaim stops reclaiming it. Otherwise
1058ff9a28f6SJan Kara 		 * xfs_vm_releasepage() is called on it and gets confused.
10598695d27eSJie Liu 		 *
10608695d27eSJie Liu 		 * Note that the end_index is unsigned long, it would overflow
10618695d27eSJie Liu 		 * if the given offset is greater than 16TB on 32-bit system
10628695d27eSJie Liu 		 * and if we do check the page is fully outside i_size or not
10638695d27eSJie Liu 		 * via "if (page->index >= end_index + 1)" as "end_index + 1"
10648695d27eSJie Liu 		 * will be evaluated to 0.  Hence this page will be redirtied
10658695d27eSJie Liu 		 * and be written out repeatedly which would result in an
10668695d27eSJie Liu 		 * infinite loop, the user program that perform this operation
10678695d27eSJie Liu 		 * will hang.  Instead, we can verify this situation by checking
10688695d27eSJie Liu 		 * if the page to write is totally beyond the i_size or if it's
10698695d27eSJie Liu 		 * offset is just equal to the EOF.
10706b7a03f0SChristoph Hellwig 		 */
10718695d27eSJie Liu 		if (page->index > end_index ||
10728695d27eSJie Liu 		    (page->index == end_index && offset_into_page == 0))
1073ff9a28f6SJan Kara 			goto redirty;
10746b7a03f0SChristoph Hellwig 
10756b7a03f0SChristoph Hellwig 		/*
10766b7a03f0SChristoph Hellwig 		 * The page straddles i_size.  It must be zeroed out on each
10776b7a03f0SChristoph Hellwig 		 * and every writepage invocation because it may be mmapped.
10786b7a03f0SChristoph Hellwig 		 * "A file is mapped in multiples of the page size.  For a file
10796b7a03f0SChristoph Hellwig 		 * that is not a multiple of the page size, the remaining
10806b7a03f0SChristoph Hellwig 		 * memory is zeroed when mapped, and writes to that region are
10816b7a03f0SChristoph Hellwig 		 * not written out to the file."
10826b7a03f0SChristoph Hellwig 		 */
108309cbfeafSKirill A. Shutemov 		zero_user_segment(page, offset_into_page, PAGE_SIZE);
10848695d27eSJie Liu 
10858695d27eSJie Liu 		/* Adjust the end_offset to the end of file */
10868695d27eSJie Liu 		end_offset = offset;
1087c59d87c4SChristoph Hellwig 	}
1088c59d87c4SChristoph Hellwig 
1089e10de372SDave Chinner 	return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
1090c59d87c4SChristoph Hellwig 
1091c59d87c4SChristoph Hellwig redirty:
1092c59d87c4SChristoph Hellwig 	redirty_page_for_writepage(wbc, page);
1093c59d87c4SChristoph Hellwig 	unlock_page(page);
1094c59d87c4SChristoph Hellwig 	return 0;
1095c59d87c4SChristoph Hellwig }
1096c59d87c4SChristoph Hellwig 
1097c59d87c4SChristoph Hellwig STATIC int
1098fbcc0256SDave Chinner xfs_vm_writepage(
1099fbcc0256SDave Chinner 	struct page		*page,
1100fbcc0256SDave Chinner 	struct writeback_control *wbc)
1101fbcc0256SDave Chinner {
1102fbcc0256SDave Chinner 	struct xfs_writepage_ctx wpc = {
1103fbcc0256SDave Chinner 		.io_type = XFS_IO_INVALID,
1104fbcc0256SDave Chinner 	};
1105fbcc0256SDave Chinner 	int			ret;
1106fbcc0256SDave Chinner 
1107fbcc0256SDave Chinner 	ret = xfs_do_writepage(page, wbc, &wpc);
1108e10de372SDave Chinner 	if (wpc.ioend)
1109e10de372SDave Chinner 		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
1110e10de372SDave Chinner 	return ret;
1111fbcc0256SDave Chinner }
1112fbcc0256SDave Chinner 
1113fbcc0256SDave Chinner STATIC int
1114c59d87c4SChristoph Hellwig xfs_vm_writepages(
1115c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
1116c59d87c4SChristoph Hellwig 	struct writeback_control *wbc)
1117c59d87c4SChristoph Hellwig {
1118fbcc0256SDave Chinner 	struct xfs_writepage_ctx wpc = {
1119fbcc0256SDave Chinner 		.io_type = XFS_IO_INVALID,
1120fbcc0256SDave Chinner 	};
1121fbcc0256SDave Chinner 	int			ret;
1122fbcc0256SDave Chinner 
1123c59d87c4SChristoph Hellwig 	xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
11247f6d5b52SRoss Zwisler 	if (dax_mapping(mapping))
11257f6d5b52SRoss Zwisler 		return dax_writeback_mapping_range(mapping,
11267f6d5b52SRoss Zwisler 				xfs_find_bdev_for_inode(mapping->host), wbc);
11277f6d5b52SRoss Zwisler 
1128fbcc0256SDave Chinner 	ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
1129e10de372SDave Chinner 	if (wpc.ioend)
1130e10de372SDave Chinner 		ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
1131e10de372SDave Chinner 	return ret;
1132c59d87c4SChristoph Hellwig }
1133c59d87c4SChristoph Hellwig 
1134c59d87c4SChristoph Hellwig /*
1135c59d87c4SChristoph Hellwig  * Called to move a page into cleanable state - and from there
1136c59d87c4SChristoph Hellwig  * to be released. The page should already be clean. We always
1137c59d87c4SChristoph Hellwig  * have buffer heads in this call.
1138c59d87c4SChristoph Hellwig  *
1139c59d87c4SChristoph Hellwig  * Returns 1 if the page is ok to release, 0 otherwise.
1140c59d87c4SChristoph Hellwig  */
1141c59d87c4SChristoph Hellwig STATIC int
1142c59d87c4SChristoph Hellwig xfs_vm_releasepage(
1143c59d87c4SChristoph Hellwig 	struct page		*page,
1144c59d87c4SChristoph Hellwig 	gfp_t			gfp_mask)
1145c59d87c4SChristoph Hellwig {
1146c59d87c4SChristoph Hellwig 	int			delalloc, unwritten;
1147c59d87c4SChristoph Hellwig 
114834097dfeSLukas Czerner 	trace_xfs_releasepage(page->mapping->host, page, 0, 0);
1149c59d87c4SChristoph Hellwig 
115099579cceSBrian Foster 	/*
115199579cceSBrian Foster 	 * mm accommodates an old ext3 case where clean pages might not have had
115299579cceSBrian Foster 	 * the dirty bit cleared. Thus, it can send actual dirty pages to
115399579cceSBrian Foster 	 * ->releasepage() via shrink_active_list(). Conversely,
115499579cceSBrian Foster 	 * block_invalidatepage() can send pages that are still marked dirty
115599579cceSBrian Foster 	 * but otherwise have invalidated buffers.
115699579cceSBrian Foster 	 *
11570a417b8dSJan Kara 	 * We want to release the latter to avoid unnecessary buildup of the
11580a417b8dSJan Kara 	 * LRU, skip the former and warn if we've left any lingering
11590a417b8dSJan Kara 	 * delalloc/unwritten buffers on clean pages. Skip pages with delalloc
11600a417b8dSJan Kara 	 * or unwritten buffers and warn if the page is not dirty. Otherwise
11610a417b8dSJan Kara 	 * try to release the buffers.
116299579cceSBrian Foster 	 */
1163c59d87c4SChristoph Hellwig 	xfs_count_page_state(page, &delalloc, &unwritten);
1164c59d87c4SChristoph Hellwig 
11650a417b8dSJan Kara 	if (delalloc) {
11660a417b8dSJan Kara 		WARN_ON_ONCE(!PageDirty(page));
1167c59d87c4SChristoph Hellwig 		return 0;
11680a417b8dSJan Kara 	}
11690a417b8dSJan Kara 	if (unwritten) {
11700a417b8dSJan Kara 		WARN_ON_ONCE(!PageDirty(page));
1171c59d87c4SChristoph Hellwig 		return 0;
11720a417b8dSJan Kara 	}
1173c59d87c4SChristoph Hellwig 
1174c59d87c4SChristoph Hellwig 	return try_to_free_buffers(page);
1175c59d87c4SChristoph Hellwig }
1176c59d87c4SChristoph Hellwig 
1177a719370bSDave Chinner /*
11781fdca9c2SDave Chinner  * If this is O_DIRECT or the mpage code calling tell them how large the mapping
11791fdca9c2SDave Chinner  * is, so that we can avoid repeated get_blocks calls.
11801fdca9c2SDave Chinner  *
11811fdca9c2SDave Chinner  * If the mapping spans EOF, then we have to break the mapping up as the mapping
11821fdca9c2SDave Chinner  * for blocks beyond EOF must be marked new so that sub block regions can be
11831fdca9c2SDave Chinner  * correctly zeroed. We can't do this for mappings within EOF unless the mapping
11841fdca9c2SDave Chinner  * was just allocated or is unwritten, otherwise the callers would overwrite
11851fdca9c2SDave Chinner  * existing data with zeros. Hence we have to split the mapping into a range up
11861fdca9c2SDave Chinner  * to and including EOF, and a second mapping for beyond EOF.
11871fdca9c2SDave Chinner  */
11881fdca9c2SDave Chinner static void
11891fdca9c2SDave Chinner xfs_map_trim_size(
11901fdca9c2SDave Chinner 	struct inode		*inode,
11911fdca9c2SDave Chinner 	sector_t		iblock,
11921fdca9c2SDave Chinner 	struct buffer_head	*bh_result,
11931fdca9c2SDave Chinner 	struct xfs_bmbt_irec	*imap,
11941fdca9c2SDave Chinner 	xfs_off_t		offset,
11951fdca9c2SDave Chinner 	ssize_t			size)
11961fdca9c2SDave Chinner {
11971fdca9c2SDave Chinner 	xfs_off_t		mapping_size;
11981fdca9c2SDave Chinner 
11991fdca9c2SDave Chinner 	mapping_size = imap->br_startoff + imap->br_blockcount - iblock;
12001fdca9c2SDave Chinner 	mapping_size <<= inode->i_blkbits;
12011fdca9c2SDave Chinner 
12021fdca9c2SDave Chinner 	ASSERT(mapping_size > 0);
12031fdca9c2SDave Chinner 	if (mapping_size > size)
12041fdca9c2SDave Chinner 		mapping_size = size;
12051fdca9c2SDave Chinner 	if (offset < i_size_read(inode) &&
12061fdca9c2SDave Chinner 	    offset + mapping_size >= i_size_read(inode)) {
12071fdca9c2SDave Chinner 		/* limit mapping to block that spans EOF */
12081fdca9c2SDave Chinner 		mapping_size = roundup_64(i_size_read(inode) - offset,
120993407472SFabian Frederick 					  i_blocksize(inode));
12101fdca9c2SDave Chinner 	}
12111fdca9c2SDave Chinner 	if (mapping_size > LONG_MAX)
12121fdca9c2SDave Chinner 		mapping_size = LONG_MAX;
12131fdca9c2SDave Chinner 
12141fdca9c2SDave Chinner 	bh_result->b_size = mapping_size;
12151fdca9c2SDave Chinner }
12161fdca9c2SDave Chinner 
12170613f16cSDarrick J. Wong static int
1218acdda3aaSChristoph Hellwig xfs_get_blocks(
1219c59d87c4SChristoph Hellwig 	struct inode		*inode,
1220c59d87c4SChristoph Hellwig 	sector_t		iblock,
1221c59d87c4SChristoph Hellwig 	struct buffer_head	*bh_result,
1222acdda3aaSChristoph Hellwig 	int			create)
1223c59d87c4SChristoph Hellwig {
1224c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
1225c59d87c4SChristoph Hellwig 	struct xfs_mount	*mp = ip->i_mount;
1226c59d87c4SChristoph Hellwig 	xfs_fileoff_t		offset_fsb, end_fsb;
1227c59d87c4SChristoph Hellwig 	int			error = 0;
1228c59d87c4SChristoph Hellwig 	int			lockmode = 0;
1229c59d87c4SChristoph Hellwig 	struct xfs_bmbt_irec	imap;
1230c59d87c4SChristoph Hellwig 	int			nimaps = 1;
1231c59d87c4SChristoph Hellwig 	xfs_off_t		offset;
1232c59d87c4SChristoph Hellwig 	ssize_t			size;
1233c59d87c4SChristoph Hellwig 
1234acdda3aaSChristoph Hellwig 	BUG_ON(create);
12356e8a27a8SChristoph Hellwig 
1236c59d87c4SChristoph Hellwig 	if (XFS_FORCED_SHUTDOWN(mp))
1237b474c7aeSEric Sandeen 		return -EIO;
1238c59d87c4SChristoph Hellwig 
1239c59d87c4SChristoph Hellwig 	offset = (xfs_off_t)iblock << inode->i_blkbits;
124093407472SFabian Frederick 	ASSERT(bh_result->b_size >= i_blocksize(inode));
1241c59d87c4SChristoph Hellwig 	size = bh_result->b_size;
1242c59d87c4SChristoph Hellwig 
1243acdda3aaSChristoph Hellwig 	if (offset >= i_size_read(inode))
1244c59d87c4SChristoph Hellwig 		return 0;
1245c59d87c4SChristoph Hellwig 
1246507630b2SDave Chinner 	/*
1247507630b2SDave Chinner 	 * Direct I/O is usually done on preallocated files, so try getting
12486e8a27a8SChristoph Hellwig 	 * a block mapping without an exclusive lock first.
1249507630b2SDave Chinner 	 */
1250309ecac8SChristoph Hellwig 	lockmode = xfs_ilock_data_map_shared(ip);
1251c59d87c4SChristoph Hellwig 
1252d2c28191SDave Chinner 	ASSERT(offset <= mp->m_super->s_maxbytes);
1253d2c28191SDave Chinner 	if (offset + size > mp->m_super->s_maxbytes)
1254d2c28191SDave Chinner 		size = mp->m_super->s_maxbytes - offset;
1255c59d87c4SChristoph Hellwig 	end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
1256c59d87c4SChristoph Hellwig 	offset_fsb = XFS_B_TO_FSBT(mp, offset);
1257c59d87c4SChristoph Hellwig 
12585c8ed202SDave Chinner 	error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb,
12595c8ed202SDave Chinner 				&imap, &nimaps, XFS_BMAPI_ENTIRE);
1260c59d87c4SChristoph Hellwig 	if (error)
1261c59d87c4SChristoph Hellwig 		goto out_unlock;
1262c59d87c4SChristoph Hellwig 
1263acdda3aaSChristoph Hellwig 	if (nimaps) {
1264d5cc2e3fSDave Chinner 		trace_xfs_get_blocks_found(ip, offset, size,
126563fbb4c1SChristoph Hellwig 			imap.br_state == XFS_EXT_UNWRITTEN ?
126663fbb4c1SChristoph Hellwig 				XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap);
1267507630b2SDave Chinner 		xfs_iunlock(ip, lockmode);
1268c59d87c4SChristoph Hellwig 	} else {
1269c59d87c4SChristoph Hellwig 		trace_xfs_get_blocks_notfound(ip, offset, size);
1270c59d87c4SChristoph Hellwig 		goto out_unlock;
1271c59d87c4SChristoph Hellwig 	}
1272c59d87c4SChristoph Hellwig 
12731fdca9c2SDave Chinner 	/* trim mapping down to size requested */
12746e8a27a8SChristoph Hellwig 	xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size);
12751fdca9c2SDave Chinner 
1276c59d87c4SChristoph Hellwig 	/*
1277a719370bSDave Chinner 	 * For unwritten extents do not report a disk address in the buffered
1278a719370bSDave Chinner 	 * read case (treat as if we're reading into a hole).
1279c59d87c4SChristoph Hellwig 	 */
12809c4f29d3SChristoph Hellwig 	if (xfs_bmap_is_real_extent(&imap))
1281c59d87c4SChristoph Hellwig 		xfs_map_buffer(inode, bh_result, &imap, offset);
1282c59d87c4SChristoph Hellwig 
1283c59d87c4SChristoph Hellwig 	/*
1284c59d87c4SChristoph Hellwig 	 * If this is a realtime file, data may be on a different device.
1285c59d87c4SChristoph Hellwig 	 * to that pointed to from the buffer_head b_bdev currently.
1286c59d87c4SChristoph Hellwig 	 */
1287c59d87c4SChristoph Hellwig 	bh_result->b_bdev = xfs_find_bdev_for_inode(inode);
1288c59d87c4SChristoph Hellwig 	return 0;
1289c59d87c4SChristoph Hellwig 
1290c59d87c4SChristoph Hellwig out_unlock:
1291c59d87c4SChristoph Hellwig 	xfs_iunlock(ip, lockmode);
12922451337dSDave Chinner 	return error;
1293c59d87c4SChristoph Hellwig }
1294c59d87c4SChristoph Hellwig 
1295c59d87c4SChristoph Hellwig STATIC ssize_t
1296c59d87c4SChristoph Hellwig xfs_vm_direct_IO(
1297c59d87c4SChristoph Hellwig 	struct kiocb		*iocb,
1298c8b8e32dSChristoph Hellwig 	struct iov_iter		*iter)
1299c59d87c4SChristoph Hellwig {
1300c59d87c4SChristoph Hellwig 	/*
1301fa8d972dSChristoph Hellwig 	 * We just need the method present so that open/fcntl allow direct I/O.
1302c59d87c4SChristoph Hellwig 	 */
1303fa8d972dSChristoph Hellwig 	return -EINVAL;
1304c59d87c4SChristoph Hellwig }
1305c59d87c4SChristoph Hellwig 
1306c59d87c4SChristoph Hellwig STATIC sector_t
1307c59d87c4SChristoph Hellwig xfs_vm_bmap(
1308c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
1309c59d87c4SChristoph Hellwig 	sector_t		block)
1310c59d87c4SChristoph Hellwig {
1311c59d87c4SChristoph Hellwig 	struct inode		*inode = (struct inode *)mapping->host;
1312c59d87c4SChristoph Hellwig 	struct xfs_inode	*ip = XFS_I(inode);
1313c59d87c4SChristoph Hellwig 
1314c59d87c4SChristoph Hellwig 	trace_xfs_vm_bmap(XFS_I(inode));
1315db1327b1SDarrick J. Wong 
1316db1327b1SDarrick J. Wong 	/*
1317db1327b1SDarrick J. Wong 	 * The swap code (ab-)uses ->bmap to get a block mapping and then
1318db1327b1SDarrick J. Wong 	 * bypasseѕ the file system for actual I/O.  We really can't allow
1319db1327b1SDarrick J. Wong 	 * that on reflinks inodes, so we have to skip out here.  And yes,
1320db1327b1SDarrick J. Wong 	 * 0 is the magic code for a bmap error..
1321db1327b1SDarrick J. Wong 	 */
132265523218SChristoph Hellwig 	if (xfs_is_reflink_inode(ip))
1323db1327b1SDarrick J. Wong 		return 0;
132465523218SChristoph Hellwig 
13254bc1ea6bSDave Chinner 	filemap_write_and_wait(mapping);
1326c59d87c4SChristoph Hellwig 	return generic_block_bmap(mapping, block, xfs_get_blocks);
1327c59d87c4SChristoph Hellwig }
1328c59d87c4SChristoph Hellwig 
1329c59d87c4SChristoph Hellwig STATIC int
1330c59d87c4SChristoph Hellwig xfs_vm_readpage(
1331c59d87c4SChristoph Hellwig 	struct file		*unused,
1332c59d87c4SChristoph Hellwig 	struct page		*page)
1333c59d87c4SChristoph Hellwig {
1334121e213eSDave Chinner 	trace_xfs_vm_readpage(page->mapping->host, 1);
1335c59d87c4SChristoph Hellwig 	return mpage_readpage(page, xfs_get_blocks);
1336c59d87c4SChristoph Hellwig }
1337c59d87c4SChristoph Hellwig 
1338c59d87c4SChristoph Hellwig STATIC int
1339c59d87c4SChristoph Hellwig xfs_vm_readpages(
1340c59d87c4SChristoph Hellwig 	struct file		*unused,
1341c59d87c4SChristoph Hellwig 	struct address_space	*mapping,
1342c59d87c4SChristoph Hellwig 	struct list_head	*pages,
1343c59d87c4SChristoph Hellwig 	unsigned		nr_pages)
1344c59d87c4SChristoph Hellwig {
1345121e213eSDave Chinner 	trace_xfs_vm_readpages(mapping->host, nr_pages);
1346c59d87c4SChristoph Hellwig 	return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
1347c59d87c4SChristoph Hellwig }
1348c59d87c4SChristoph Hellwig 
134922e757a4SDave Chinner /*
135022e757a4SDave Chinner  * This is basically a copy of __set_page_dirty_buffers() with one
135122e757a4SDave Chinner  * small tweak: buffers beyond EOF do not get marked dirty. If we mark them
135222e757a4SDave Chinner  * dirty, we'll never be able to clean them because we don't write buffers
135322e757a4SDave Chinner  * beyond EOF, and that means we can't invalidate pages that span EOF
135422e757a4SDave Chinner  * that have been marked dirty. Further, the dirty state can leak into
135522e757a4SDave Chinner  * the file interior if the file is extended, resulting in all sorts of
135622e757a4SDave Chinner  * bad things happening as the state does not match the underlying data.
135722e757a4SDave Chinner  *
135822e757a4SDave Chinner  * XXX: this really indicates that bufferheads in XFS need to die. Warts like
135922e757a4SDave Chinner  * this only exist because of bufferheads and how the generic code manages them.
136022e757a4SDave Chinner  */
136122e757a4SDave Chinner STATIC int
136222e757a4SDave Chinner xfs_vm_set_page_dirty(
136322e757a4SDave Chinner 	struct page		*page)
136422e757a4SDave Chinner {
136522e757a4SDave Chinner 	struct address_space	*mapping = page->mapping;
136622e757a4SDave Chinner 	struct inode		*inode = mapping->host;
136722e757a4SDave Chinner 	loff_t			end_offset;
136822e757a4SDave Chinner 	loff_t			offset;
136922e757a4SDave Chinner 	int			newly_dirty;
137022e757a4SDave Chinner 
137122e757a4SDave Chinner 	if (unlikely(!mapping))
137222e757a4SDave Chinner 		return !TestSetPageDirty(page);
137322e757a4SDave Chinner 
137422e757a4SDave Chinner 	end_offset = i_size_read(inode);
137522e757a4SDave Chinner 	offset = page_offset(page);
137622e757a4SDave Chinner 
137722e757a4SDave Chinner 	spin_lock(&mapping->private_lock);
137822e757a4SDave Chinner 	if (page_has_buffers(page)) {
137922e757a4SDave Chinner 		struct buffer_head *head = page_buffers(page);
138022e757a4SDave Chinner 		struct buffer_head *bh = head;
138122e757a4SDave Chinner 
138222e757a4SDave Chinner 		do {
138322e757a4SDave Chinner 			if (offset < end_offset)
138422e757a4SDave Chinner 				set_buffer_dirty(bh);
138522e757a4SDave Chinner 			bh = bh->b_this_page;
138693407472SFabian Frederick 			offset += i_blocksize(inode);
138722e757a4SDave Chinner 		} while (bh != head);
138822e757a4SDave Chinner 	}
1389c4843a75SGreg Thelen 	/*
139081f8c3a4SJohannes Weiner 	 * Lock out page->mem_cgroup migration to keep PageDirty
139181f8c3a4SJohannes Weiner 	 * synchronized with per-memcg dirty page counters.
1392c4843a75SGreg Thelen 	 */
139362cccb8cSJohannes Weiner 	lock_page_memcg(page);
139422e757a4SDave Chinner 	newly_dirty = !TestSetPageDirty(page);
139522e757a4SDave Chinner 	spin_unlock(&mapping->private_lock);
139622e757a4SDave Chinner 
139722e757a4SDave Chinner 	if (newly_dirty) {
139822e757a4SDave Chinner 		/* sigh - __set_page_dirty() is static, so copy it here, too */
139922e757a4SDave Chinner 		unsigned long flags;
140022e757a4SDave Chinner 
140122e757a4SDave Chinner 		spin_lock_irqsave(&mapping->tree_lock, flags);
140222e757a4SDave Chinner 		if (page->mapping) {	/* Race with truncate? */
140322e757a4SDave Chinner 			WARN_ON_ONCE(!PageUptodate(page));
140462cccb8cSJohannes Weiner 			account_page_dirtied(page, mapping);
140522e757a4SDave Chinner 			radix_tree_tag_set(&mapping->page_tree,
140622e757a4SDave Chinner 					page_index(page), PAGECACHE_TAG_DIRTY);
140722e757a4SDave Chinner 		}
140822e757a4SDave Chinner 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
140922e757a4SDave Chinner 	}
141062cccb8cSJohannes Weiner 	unlock_page_memcg(page);
1411c4843a75SGreg Thelen 	if (newly_dirty)
1412c4843a75SGreg Thelen 		__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
141322e757a4SDave Chinner 	return newly_dirty;
141422e757a4SDave Chinner }
141522e757a4SDave Chinner 
1416c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = {
1417c59d87c4SChristoph Hellwig 	.readpage		= xfs_vm_readpage,
1418c59d87c4SChristoph Hellwig 	.readpages		= xfs_vm_readpages,
1419c59d87c4SChristoph Hellwig 	.writepage		= xfs_vm_writepage,
1420c59d87c4SChristoph Hellwig 	.writepages		= xfs_vm_writepages,
142122e757a4SDave Chinner 	.set_page_dirty		= xfs_vm_set_page_dirty,
1422c59d87c4SChristoph Hellwig 	.releasepage		= xfs_vm_releasepage,
1423c59d87c4SChristoph Hellwig 	.invalidatepage		= xfs_vm_invalidatepage,
1424c59d87c4SChristoph Hellwig 	.bmap			= xfs_vm_bmap,
1425c59d87c4SChristoph Hellwig 	.direct_IO		= xfs_vm_direct_IO,
1426c59d87c4SChristoph Hellwig 	.migratepage		= buffer_migrate_page,
1427c59d87c4SChristoph Hellwig 	.is_partially_uptodate  = block_is_partially_uptodate,
1428c59d87c4SChristoph Hellwig 	.error_remove_page	= generic_error_remove_page,
1429c59d87c4SChristoph Hellwig };
1430