1c59d87c4SChristoph Hellwig /* 2c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3c59d87c4SChristoph Hellwig * All Rights Reserved. 4c59d87c4SChristoph Hellwig * 5c59d87c4SChristoph Hellwig * This program is free software; you can redistribute it and/or 6c59d87c4SChristoph Hellwig * modify it under the terms of the GNU General Public License as 7c59d87c4SChristoph Hellwig * published by the Free Software Foundation. 8c59d87c4SChristoph Hellwig * 9c59d87c4SChristoph Hellwig * This program is distributed in the hope that it would be useful, 10c59d87c4SChristoph Hellwig * but WITHOUT ANY WARRANTY; without even the implied warranty of 11c59d87c4SChristoph Hellwig * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12c59d87c4SChristoph Hellwig * GNU General Public License for more details. 13c59d87c4SChristoph Hellwig * 14c59d87c4SChristoph Hellwig * You should have received a copy of the GNU General Public License 15c59d87c4SChristoph Hellwig * along with this program; if not, write the Free Software Foundation, 16c59d87c4SChristoph Hellwig * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17c59d87c4SChristoph Hellwig */ 18c59d87c4SChristoph Hellwig #include "xfs.h" 1970a9883cSDave Chinner #include "xfs_shared.h" 20239880efSDave Chinner #include "xfs_format.h" 21239880efSDave Chinner #include "xfs_log_format.h" 22239880efSDave Chinner #include "xfs_trans_resv.h" 23c59d87c4SChristoph Hellwig #include "xfs_mount.h" 24c59d87c4SChristoph Hellwig #include "xfs_inode.h" 25239880efSDave Chinner #include "xfs_trans.h" 26281627dfSChristoph Hellwig #include "xfs_inode_item.h" 27c59d87c4SChristoph Hellwig #include "xfs_alloc.h" 28c59d87c4SChristoph Hellwig #include "xfs_error.h" 29c59d87c4SChristoph Hellwig #include "xfs_iomap.h" 30c59d87c4SChristoph Hellwig #include "xfs_trace.h" 31c59d87c4SChristoph Hellwig #include "xfs_bmap.h" 3268988114SDave Chinner #include "xfs_bmap_util.h" 33a4fbe6abSDave Chinner #include "xfs_bmap_btree.h" 34ef473667SDarrick J. Wong #include "xfs_reflink.h" 35c59d87c4SChristoph Hellwig #include <linux/gfp.h> 36c59d87c4SChristoph Hellwig #include <linux/mpage.h> 37c59d87c4SChristoph Hellwig #include <linux/pagevec.h> 38c59d87c4SChristoph Hellwig #include <linux/writeback.h> 39c59d87c4SChristoph Hellwig 40fbcc0256SDave Chinner /* 41fbcc0256SDave Chinner * structure owned by writepages passed to individual writepage calls 42fbcc0256SDave Chinner */ 43fbcc0256SDave Chinner struct xfs_writepage_ctx { 44fbcc0256SDave Chinner struct xfs_bmbt_irec imap; 45fbcc0256SDave Chinner bool imap_valid; 46fbcc0256SDave Chinner unsigned int io_type; 47fbcc0256SDave Chinner struct xfs_ioend *ioend; 48fbcc0256SDave Chinner sector_t last_block; 49fbcc0256SDave Chinner }; 50fbcc0256SDave Chinner 51c59d87c4SChristoph Hellwig void 52c59d87c4SChristoph Hellwig xfs_count_page_state( 53c59d87c4SChristoph Hellwig struct page *page, 54c59d87c4SChristoph Hellwig int *delalloc, 55c59d87c4SChristoph Hellwig int *unwritten) 56c59d87c4SChristoph Hellwig { 57c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 58c59d87c4SChristoph Hellwig 59c59d87c4SChristoph Hellwig *delalloc = *unwritten = 0; 60c59d87c4SChristoph Hellwig 61c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 62c59d87c4SChristoph Hellwig do { 63c59d87c4SChristoph Hellwig if (buffer_unwritten(bh)) 64c59d87c4SChristoph Hellwig (*unwritten) = 1; 65c59d87c4SChristoph Hellwig else if (buffer_delay(bh)) 66c59d87c4SChristoph Hellwig (*delalloc) = 1; 67c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 68c59d87c4SChristoph Hellwig } 69c59d87c4SChristoph Hellwig 7020a90f58SRoss Zwisler struct block_device * 71c59d87c4SChristoph Hellwig xfs_find_bdev_for_inode( 72c59d87c4SChristoph Hellwig struct inode *inode) 73c59d87c4SChristoph Hellwig { 74c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 75c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 76c59d87c4SChristoph Hellwig 77c59d87c4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip)) 78c59d87c4SChristoph Hellwig return mp->m_rtdev_targp->bt_bdev; 79c59d87c4SChristoph Hellwig else 80c59d87c4SChristoph Hellwig return mp->m_ddev_targp->bt_bdev; 81c59d87c4SChristoph Hellwig } 82c59d87c4SChristoph Hellwig 83c59d87c4SChristoph Hellwig /* 8437992c18SDave Chinner * We're now finished for good with this page. Update the page state via the 8537992c18SDave Chinner * associated buffer_heads, paying attention to the start and end offsets that 8637992c18SDave Chinner * we need to process on the page. 8728b783e4SDave Chinner * 8828b783e4SDave Chinner * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last 8928b783e4SDave Chinner * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or 9028b783e4SDave Chinner * the page at all, as we may be racing with memory reclaim and it can free both 9128b783e4SDave Chinner * the bufferhead chain and the page as it will see the page as clean and 9228b783e4SDave Chinner * unused. 9337992c18SDave Chinner */ 9437992c18SDave Chinner static void 9537992c18SDave Chinner xfs_finish_page_writeback( 9637992c18SDave Chinner struct inode *inode, 9737992c18SDave Chinner struct bio_vec *bvec, 9837992c18SDave Chinner int error) 9937992c18SDave Chinner { 10037992c18SDave Chinner unsigned int end = bvec->bv_offset + bvec->bv_len - 1; 10128b783e4SDave Chinner struct buffer_head *head, *bh, *next; 10237992c18SDave Chinner unsigned int off = 0; 10328b783e4SDave Chinner unsigned int bsize; 10437992c18SDave Chinner 10537992c18SDave Chinner ASSERT(bvec->bv_offset < PAGE_SIZE); 10693407472SFabian Frederick ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0); 10737992c18SDave Chinner ASSERT(end < PAGE_SIZE); 10893407472SFabian Frederick ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); 10937992c18SDave Chinner 11037992c18SDave Chinner bh = head = page_buffers(bvec->bv_page); 11137992c18SDave Chinner 11228b783e4SDave Chinner bsize = bh->b_size; 11337992c18SDave Chinner do { 114161f55efSEryu Guan if (off > end) 115161f55efSEryu Guan break; 11628b783e4SDave Chinner next = bh->b_this_page; 11737992c18SDave Chinner if (off < bvec->bv_offset) 11837992c18SDave Chinner goto next_bh; 11937992c18SDave Chinner bh->b_end_io(bh, !error); 12037992c18SDave Chinner next_bh: 12128b783e4SDave Chinner off += bsize; 12228b783e4SDave Chinner } while ((bh = next) != head); 12337992c18SDave Chinner } 12437992c18SDave Chinner 12537992c18SDave Chinner /* 12637992c18SDave Chinner * We're now finished for good with this ioend structure. Update the page 12737992c18SDave Chinner * state, release holds on bios, and finally free up memory. Do not use the 12837992c18SDave Chinner * ioend after this. 129c59d87c4SChristoph Hellwig */ 130c59d87c4SChristoph Hellwig STATIC void 131c59d87c4SChristoph Hellwig xfs_destroy_ioend( 1320e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 1330e51a8e1SChristoph Hellwig int error) 134c59d87c4SChristoph Hellwig { 13537992c18SDave Chinner struct inode *inode = ioend->io_inode; 1360e51a8e1SChristoph Hellwig struct bio *last = ioend->io_bio; 13737992c18SDave Chinner struct bio *bio, *next; 138c59d87c4SChristoph Hellwig 1390e51a8e1SChristoph Hellwig for (bio = &ioend->io_inline_bio; bio; bio = next) { 14037992c18SDave Chinner struct bio_vec *bvec; 14137992c18SDave Chinner int i; 14237992c18SDave Chinner 1430e51a8e1SChristoph Hellwig /* 1440e51a8e1SChristoph Hellwig * For the last bio, bi_private points to the ioend, so we 1450e51a8e1SChristoph Hellwig * need to explicitly end the iteration here. 1460e51a8e1SChristoph Hellwig */ 1470e51a8e1SChristoph Hellwig if (bio == last) 1480e51a8e1SChristoph Hellwig next = NULL; 1490e51a8e1SChristoph Hellwig else 15037992c18SDave Chinner next = bio->bi_private; 15137992c18SDave Chinner 15237992c18SDave Chinner /* walk each page on bio, ending page IO on them */ 15337992c18SDave Chinner bio_for_each_segment_all(bvec, bio, i) 15437992c18SDave Chinner xfs_finish_page_writeback(inode, bvec, error); 15537992c18SDave Chinner 15637992c18SDave Chinner bio_put(bio); 157c59d87c4SChristoph Hellwig } 158c59d87c4SChristoph Hellwig } 159c59d87c4SChristoph Hellwig 160c59d87c4SChristoph Hellwig /* 161fc0063c4SChristoph Hellwig * Fast and loose check if this write could update the on-disk inode size. 162fc0063c4SChristoph Hellwig */ 163fc0063c4SChristoph Hellwig static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) 164fc0063c4SChristoph Hellwig { 165fc0063c4SChristoph Hellwig return ioend->io_offset + ioend->io_size > 166fc0063c4SChristoph Hellwig XFS_I(ioend->io_inode)->i_d.di_size; 167fc0063c4SChristoph Hellwig } 168fc0063c4SChristoph Hellwig 169281627dfSChristoph Hellwig STATIC int 170281627dfSChristoph Hellwig xfs_setfilesize_trans_alloc( 171281627dfSChristoph Hellwig struct xfs_ioend *ioend) 172281627dfSChristoph Hellwig { 173281627dfSChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 174281627dfSChristoph Hellwig struct xfs_trans *tp; 175281627dfSChristoph Hellwig int error; 176281627dfSChristoph Hellwig 177253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 178253f4911SChristoph Hellwig if (error) 179281627dfSChristoph Hellwig return error; 180281627dfSChristoph Hellwig 181281627dfSChristoph Hellwig ioend->io_append_trans = tp; 182281627dfSChristoph Hellwig 183281627dfSChristoph Hellwig /* 184437a255aSDave Chinner * We may pass freeze protection with a transaction. So tell lockdep 185d9457dc0SJan Kara * we released it. 186d9457dc0SJan Kara */ 187bee9182dSOleg Nesterov __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); 188d9457dc0SJan Kara /* 189281627dfSChristoph Hellwig * We hand off the transaction to the completion thread now, so 190281627dfSChristoph Hellwig * clear the flag here. 191281627dfSChristoph Hellwig */ 1929070733bSMichal Hocko current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 193281627dfSChristoph Hellwig return 0; 194281627dfSChristoph Hellwig } 195281627dfSChristoph Hellwig 196fc0063c4SChristoph Hellwig /* 1972813d682SChristoph Hellwig * Update on-disk file size now that data has been written to disk. 198c59d87c4SChristoph Hellwig */ 199281627dfSChristoph Hellwig STATIC int 200e372843aSChristoph Hellwig __xfs_setfilesize( 2012ba66237SChristoph Hellwig struct xfs_inode *ip, 2022ba66237SChristoph Hellwig struct xfs_trans *tp, 2032ba66237SChristoph Hellwig xfs_off_t offset, 2042ba66237SChristoph Hellwig size_t size) 205c59d87c4SChristoph Hellwig { 206c59d87c4SChristoph Hellwig xfs_fsize_t isize; 207c59d87c4SChristoph Hellwig 208aa6bf01dSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 2092ba66237SChristoph Hellwig isize = xfs_new_eof(ip, offset + size); 210281627dfSChristoph Hellwig if (!isize) { 211281627dfSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 2124906e215SChristoph Hellwig xfs_trans_cancel(tp); 213281627dfSChristoph Hellwig return 0; 214c59d87c4SChristoph Hellwig } 215c59d87c4SChristoph Hellwig 2162ba66237SChristoph Hellwig trace_xfs_setfilesize(ip, offset, size); 217281627dfSChristoph Hellwig 218281627dfSChristoph Hellwig ip->i_d.di_size = isize; 219281627dfSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 220281627dfSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 221281627dfSChristoph Hellwig 22270393313SChristoph Hellwig return xfs_trans_commit(tp); 223c59d87c4SChristoph Hellwig } 224c59d87c4SChristoph Hellwig 225e372843aSChristoph Hellwig int 226e372843aSChristoph Hellwig xfs_setfilesize( 227e372843aSChristoph Hellwig struct xfs_inode *ip, 228e372843aSChristoph Hellwig xfs_off_t offset, 229e372843aSChristoph Hellwig size_t size) 230e372843aSChristoph Hellwig { 231e372843aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 232e372843aSChristoph Hellwig struct xfs_trans *tp; 233e372843aSChristoph Hellwig int error; 234e372843aSChristoph Hellwig 235e372843aSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 236e372843aSChristoph Hellwig if (error) 237e372843aSChristoph Hellwig return error; 238e372843aSChristoph Hellwig 239e372843aSChristoph Hellwig return __xfs_setfilesize(ip, tp, offset, size); 240e372843aSChristoph Hellwig } 241e372843aSChristoph Hellwig 2422ba66237SChristoph Hellwig STATIC int 2432ba66237SChristoph Hellwig xfs_setfilesize_ioend( 2440e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 2450e51a8e1SChristoph Hellwig int error) 2462ba66237SChristoph Hellwig { 2472ba66237SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 2482ba66237SChristoph Hellwig struct xfs_trans *tp = ioend->io_append_trans; 2492ba66237SChristoph Hellwig 2502ba66237SChristoph Hellwig /* 2512ba66237SChristoph Hellwig * The transaction may have been allocated in the I/O submission thread, 2522ba66237SChristoph Hellwig * thus we need to mark ourselves as being in a transaction manually. 2532ba66237SChristoph Hellwig * Similarly for freeze protection. 2542ba66237SChristoph Hellwig */ 2559070733bSMichal Hocko current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 256bee9182dSOleg Nesterov __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); 2572ba66237SChristoph Hellwig 2585cb13dcdSZhaohongjiang /* we abort the update if there was an IO error */ 2590e51a8e1SChristoph Hellwig if (error) { 2605cb13dcdSZhaohongjiang xfs_trans_cancel(tp); 2610e51a8e1SChristoph Hellwig return error; 2625cb13dcdSZhaohongjiang } 2635cb13dcdSZhaohongjiang 264e372843aSChristoph Hellwig return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); 2652ba66237SChristoph Hellwig } 2662ba66237SChristoph Hellwig 267c59d87c4SChristoph Hellwig /* 268c59d87c4SChristoph Hellwig * IO write completion. 269c59d87c4SChristoph Hellwig */ 270c59d87c4SChristoph Hellwig STATIC void 271c59d87c4SChristoph Hellwig xfs_end_io( 272c59d87c4SChristoph Hellwig struct work_struct *work) 273c59d87c4SChristoph Hellwig { 2740e51a8e1SChristoph Hellwig struct xfs_ioend *ioend = 2750e51a8e1SChristoph Hellwig container_of(work, struct xfs_ioend, io_work); 276c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 277787eb485SChristoph Hellwig xfs_off_t offset = ioend->io_offset; 278787eb485SChristoph Hellwig size_t size = ioend->io_size; 279*4e4cbee9SChristoph Hellwig int error; 280c59d87c4SChristoph Hellwig 281af055e37SBrian Foster /* 282787eb485SChristoph Hellwig * Just clean up the in-memory strutures if the fs has been shut down. 283af055e37SBrian Foster */ 284787eb485SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 2850e51a8e1SChristoph Hellwig error = -EIO; 28643caeb18SDarrick J. Wong goto done; 28743caeb18SDarrick J. Wong } 28843caeb18SDarrick J. Wong 28943caeb18SDarrick J. Wong /* 290787eb485SChristoph Hellwig * Clean up any COW blocks on an I/O error. 291c59d87c4SChristoph Hellwig */ 292*4e4cbee9SChristoph Hellwig error = blk_status_to_errno(ioend->io_bio->bi_status); 293787eb485SChristoph Hellwig if (unlikely(error)) { 294787eb485SChristoph Hellwig switch (ioend->io_type) { 295787eb485SChristoph Hellwig case XFS_IO_COW: 296787eb485SChristoph Hellwig xfs_reflink_cancel_cow_range(ip, offset, size, true); 297787eb485SChristoph Hellwig break; 298787eb485SChristoph Hellwig } 299787eb485SChristoph Hellwig 3005cb13dcdSZhaohongjiang goto done; 301787eb485SChristoph Hellwig } 302787eb485SChristoph Hellwig 303787eb485SChristoph Hellwig /* 304787eb485SChristoph Hellwig * Success: commit the COW or unwritten blocks if needed. 305787eb485SChristoph Hellwig */ 306787eb485SChristoph Hellwig switch (ioend->io_type) { 307787eb485SChristoph Hellwig case XFS_IO_COW: 308787eb485SChristoph Hellwig error = xfs_reflink_end_cow(ip, offset, size); 309787eb485SChristoph Hellwig break; 310787eb485SChristoph Hellwig case XFS_IO_UNWRITTEN: 311787eb485SChristoph Hellwig error = xfs_iomap_write_unwritten(ip, offset, size); 312787eb485SChristoph Hellwig break; 313787eb485SChristoph Hellwig default: 314787eb485SChristoph Hellwig ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); 315787eb485SChristoph Hellwig break; 31684803fb7SChristoph Hellwig } 31784803fb7SChristoph Hellwig 31804f658eeSChristoph Hellwig done: 319787eb485SChristoph Hellwig if (ioend->io_append_trans) 320787eb485SChristoph Hellwig error = xfs_setfilesize_ioend(ioend, error); 3210e51a8e1SChristoph Hellwig xfs_destroy_ioend(ioend, error); 322c59d87c4SChristoph Hellwig } 323c59d87c4SChristoph Hellwig 3240e51a8e1SChristoph Hellwig STATIC void 3250e51a8e1SChristoph Hellwig xfs_end_bio( 3260e51a8e1SChristoph Hellwig struct bio *bio) 327c59d87c4SChristoph Hellwig { 3280e51a8e1SChristoph Hellwig struct xfs_ioend *ioend = bio->bi_private; 3290e51a8e1SChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 330c59d87c4SChristoph Hellwig 33143caeb18SDarrick J. Wong if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW) 3320e51a8e1SChristoph Hellwig queue_work(mp->m_unwritten_workqueue, &ioend->io_work); 3330e51a8e1SChristoph Hellwig else if (ioend->io_append_trans) 3340e51a8e1SChristoph Hellwig queue_work(mp->m_data_workqueue, &ioend->io_work); 3350e51a8e1SChristoph Hellwig else 336*4e4cbee9SChristoph Hellwig xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status)); 337c59d87c4SChristoph Hellwig } 338c59d87c4SChristoph Hellwig 339c59d87c4SChristoph Hellwig STATIC int 340c59d87c4SChristoph Hellwig xfs_map_blocks( 341c59d87c4SChristoph Hellwig struct inode *inode, 342c59d87c4SChristoph Hellwig loff_t offset, 343c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 344988ef927SDave Chinner int type) 345c59d87c4SChristoph Hellwig { 346c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 347c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 34893407472SFabian Frederick ssize_t count = i_blocksize(inode); 349c59d87c4SChristoph Hellwig xfs_fileoff_t offset_fsb, end_fsb; 350c59d87c4SChristoph Hellwig int error = 0; 351c59d87c4SChristoph Hellwig int bmapi_flags = XFS_BMAPI_ENTIRE; 352c59d87c4SChristoph Hellwig int nimaps = 1; 353c59d87c4SChristoph Hellwig 354c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 355b474c7aeSEric Sandeen return -EIO; 356c59d87c4SChristoph Hellwig 357ef473667SDarrick J. Wong ASSERT(type != XFS_IO_COW); 3580d882a36SAlain Renaud if (type == XFS_IO_UNWRITTEN) 359c59d87c4SChristoph Hellwig bmapi_flags |= XFS_BMAPI_IGSTATE; 360c59d87c4SChristoph Hellwig 361c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 362c59d87c4SChristoph Hellwig ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 363c59d87c4SChristoph Hellwig (ip->i_df.if_flags & XFS_IFEXTENTS)); 364d2c28191SDave Chinner ASSERT(offset <= mp->m_super->s_maxbytes); 365c59d87c4SChristoph Hellwig 366d2c28191SDave Chinner if (offset + count > mp->m_super->s_maxbytes) 367d2c28191SDave Chinner count = mp->m_super->s_maxbytes - offset; 368c59d87c4SChristoph Hellwig end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 369c59d87c4SChristoph Hellwig offset_fsb = XFS_B_TO_FSBT(mp, offset); 3705c8ed202SDave Chinner error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 3715c8ed202SDave Chinner imap, &nimaps, bmapi_flags); 372ef473667SDarrick J. Wong /* 373ef473667SDarrick J. Wong * Truncate an overwrite extent if there's a pending CoW 374ef473667SDarrick J. Wong * reservation before the end of this extent. This forces us 375ef473667SDarrick J. Wong * to come back to writepage to take care of the CoW. 376ef473667SDarrick J. Wong */ 377ef473667SDarrick J. Wong if (nimaps && type == XFS_IO_OVERWRITE) 378ef473667SDarrick J. Wong xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap); 379c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 380c59d87c4SChristoph Hellwig 381c59d87c4SChristoph Hellwig if (error) 3822451337dSDave Chinner return error; 383c59d87c4SChristoph Hellwig 3840d882a36SAlain Renaud if (type == XFS_IO_DELALLOC && 385c59d87c4SChristoph Hellwig (!nimaps || isnullstartblock(imap->br_startblock))) { 38660b4984fSDarrick J. Wong error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset, 38760b4984fSDarrick J. Wong imap); 388c59d87c4SChristoph Hellwig if (!error) 389ef473667SDarrick J. Wong trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); 3902451337dSDave Chinner return error; 391c59d87c4SChristoph Hellwig } 392c59d87c4SChristoph Hellwig 393c59d87c4SChristoph Hellwig #ifdef DEBUG 3940d882a36SAlain Renaud if (type == XFS_IO_UNWRITTEN) { 395c59d87c4SChristoph Hellwig ASSERT(nimaps); 396c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 397c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 398c59d87c4SChristoph Hellwig } 399c59d87c4SChristoph Hellwig #endif 400c59d87c4SChristoph Hellwig if (nimaps) 401c59d87c4SChristoph Hellwig trace_xfs_map_blocks_found(ip, offset, count, type, imap); 402c59d87c4SChristoph Hellwig return 0; 403c59d87c4SChristoph Hellwig } 404c59d87c4SChristoph Hellwig 405fbcc0256SDave Chinner STATIC bool 406c59d87c4SChristoph Hellwig xfs_imap_valid( 407c59d87c4SChristoph Hellwig struct inode *inode, 408c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 409c59d87c4SChristoph Hellwig xfs_off_t offset) 410c59d87c4SChristoph Hellwig { 411c59d87c4SChristoph Hellwig offset >>= inode->i_blkbits; 412c59d87c4SChristoph Hellwig 413c59d87c4SChristoph Hellwig return offset >= imap->br_startoff && 414c59d87c4SChristoph Hellwig offset < imap->br_startoff + imap->br_blockcount; 415c59d87c4SChristoph Hellwig } 416c59d87c4SChristoph Hellwig 417c59d87c4SChristoph Hellwig STATIC void 418c59d87c4SChristoph Hellwig xfs_start_buffer_writeback( 419c59d87c4SChristoph Hellwig struct buffer_head *bh) 420c59d87c4SChristoph Hellwig { 421c59d87c4SChristoph Hellwig ASSERT(buffer_mapped(bh)); 422c59d87c4SChristoph Hellwig ASSERT(buffer_locked(bh)); 423c59d87c4SChristoph Hellwig ASSERT(!buffer_delay(bh)); 424c59d87c4SChristoph Hellwig ASSERT(!buffer_unwritten(bh)); 425c59d87c4SChristoph Hellwig 426c59d87c4SChristoph Hellwig mark_buffer_async_write(bh); 427c59d87c4SChristoph Hellwig set_buffer_uptodate(bh); 428c59d87c4SChristoph Hellwig clear_buffer_dirty(bh); 429c59d87c4SChristoph Hellwig } 430c59d87c4SChristoph Hellwig 431c59d87c4SChristoph Hellwig STATIC void 432c59d87c4SChristoph Hellwig xfs_start_page_writeback( 433c59d87c4SChristoph Hellwig struct page *page, 434e10de372SDave Chinner int clear_dirty) 435c59d87c4SChristoph Hellwig { 436c59d87c4SChristoph Hellwig ASSERT(PageLocked(page)); 437c59d87c4SChristoph Hellwig ASSERT(!PageWriteback(page)); 4380d085a52SDave Chinner 4390d085a52SDave Chinner /* 4400d085a52SDave Chinner * if the page was not fully cleaned, we need to ensure that the higher 4410d085a52SDave Chinner * layers come back to it correctly. That means we need to keep the page 4420d085a52SDave Chinner * dirty, and for WB_SYNC_ALL writeback we need to ensure the 4430d085a52SDave Chinner * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to 4440d085a52SDave Chinner * write this page in this writeback sweep will be made. 4450d085a52SDave Chinner */ 4460d085a52SDave Chinner if (clear_dirty) { 447c59d87c4SChristoph Hellwig clear_page_dirty_for_io(page); 448c59d87c4SChristoph Hellwig set_page_writeback(page); 4490d085a52SDave Chinner } else 4500d085a52SDave Chinner set_page_writeback_keepwrite(page); 4510d085a52SDave Chinner 452c59d87c4SChristoph Hellwig unlock_page(page); 453c59d87c4SChristoph Hellwig } 454c59d87c4SChristoph Hellwig 455c7c1a7d8SZhi Yong Wu static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh) 456c59d87c4SChristoph Hellwig { 457c59d87c4SChristoph Hellwig return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 458c59d87c4SChristoph Hellwig } 459c59d87c4SChristoph Hellwig 460c59d87c4SChristoph Hellwig /* 461bb18782aSDave Chinner * Submit the bio for an ioend. We are passed an ioend with a bio attached to 462bb18782aSDave Chinner * it, and we submit that bio. The ioend may be used for multiple bio 463bb18782aSDave Chinner * submissions, so we only want to allocate an append transaction for the ioend 464bb18782aSDave Chinner * once. In the case of multiple bio submission, each bio will take an IO 465bb18782aSDave Chinner * reference to the ioend to ensure that the ioend completion is only done once 466bb18782aSDave Chinner * all bios have been submitted and the ioend is really done. 4677bf7f352SDave Chinner * 4687bf7f352SDave Chinner * If @fail is non-zero, it means that we have a situation where some part of 4697bf7f352SDave Chinner * the submission process has failed after we have marked paged for writeback 470bb18782aSDave Chinner * and unlocked them. In this situation, we need to fail the bio and ioend 471bb18782aSDave Chinner * rather than submit it to IO. This typically only happens on a filesystem 472bb18782aSDave Chinner * shutdown. 473c59d87c4SChristoph Hellwig */ 474e10de372SDave Chinner STATIC int 475c59d87c4SChristoph Hellwig xfs_submit_ioend( 476c59d87c4SChristoph Hellwig struct writeback_control *wbc, 4770e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 478e10de372SDave Chinner int status) 479c59d87c4SChristoph Hellwig { 4805eda4300SDarrick J. Wong /* Convert CoW extents to regular */ 4815eda4300SDarrick J. Wong if (!status && ioend->io_type == XFS_IO_COW) { 4825eda4300SDarrick J. Wong status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), 4835eda4300SDarrick J. Wong ioend->io_offset, ioend->io_size); 4845eda4300SDarrick J. Wong } 4855eda4300SDarrick J. Wong 486e10de372SDave Chinner /* Reserve log space if we might write beyond the on-disk inode size. */ 487e10de372SDave Chinner if (!status && 4880e51a8e1SChristoph Hellwig ioend->io_type != XFS_IO_UNWRITTEN && 489bb18782aSDave Chinner xfs_ioend_is_append(ioend) && 490bb18782aSDave Chinner !ioend->io_append_trans) 491e10de372SDave Chinner status = xfs_setfilesize_trans_alloc(ioend); 492bb18782aSDave Chinner 4930e51a8e1SChristoph Hellwig ioend->io_bio->bi_private = ioend; 4940e51a8e1SChristoph Hellwig ioend->io_bio->bi_end_io = xfs_end_bio; 4957637241eSJens Axboe ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 49670fd7614SChristoph Hellwig 4977bf7f352SDave Chinner /* 4987bf7f352SDave Chinner * If we are failing the IO now, just mark the ioend with an 4997bf7f352SDave Chinner * error and finish it. This will run IO completion immediately 5007bf7f352SDave Chinner * as there is only one reference to the ioend at this point in 5017bf7f352SDave Chinner * time. 5027bf7f352SDave Chinner */ 503e10de372SDave Chinner if (status) { 504*4e4cbee9SChristoph Hellwig ioend->io_bio->bi_status = errno_to_blk_status(status); 5050e51a8e1SChristoph Hellwig bio_endio(ioend->io_bio); 506e10de372SDave Chinner return status; 5077bf7f352SDave Chinner } 5087bf7f352SDave Chinner 5094e49ea4aSMike Christie submit_bio(ioend->io_bio); 510e10de372SDave Chinner return 0; 511c59d87c4SChristoph Hellwig } 512c59d87c4SChristoph Hellwig 5130e51a8e1SChristoph Hellwig static void 5140e51a8e1SChristoph Hellwig xfs_init_bio_from_bh( 5150e51a8e1SChristoph Hellwig struct bio *bio, 5160e51a8e1SChristoph Hellwig struct buffer_head *bh) 5170e51a8e1SChristoph Hellwig { 5180e51a8e1SChristoph Hellwig bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 5190e51a8e1SChristoph Hellwig bio->bi_bdev = bh->b_bdev; 5200e51a8e1SChristoph Hellwig } 5210e51a8e1SChristoph Hellwig 5220e51a8e1SChristoph Hellwig static struct xfs_ioend * 5230e51a8e1SChristoph Hellwig xfs_alloc_ioend( 5240e51a8e1SChristoph Hellwig struct inode *inode, 5250e51a8e1SChristoph Hellwig unsigned int type, 5260e51a8e1SChristoph Hellwig xfs_off_t offset, 5270e51a8e1SChristoph Hellwig struct buffer_head *bh) 5280e51a8e1SChristoph Hellwig { 5290e51a8e1SChristoph Hellwig struct xfs_ioend *ioend; 5300e51a8e1SChristoph Hellwig struct bio *bio; 5310e51a8e1SChristoph Hellwig 5320e51a8e1SChristoph Hellwig bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset); 5330e51a8e1SChristoph Hellwig xfs_init_bio_from_bh(bio, bh); 5340e51a8e1SChristoph Hellwig 5350e51a8e1SChristoph Hellwig ioend = container_of(bio, struct xfs_ioend, io_inline_bio); 5360e51a8e1SChristoph Hellwig INIT_LIST_HEAD(&ioend->io_list); 5370e51a8e1SChristoph Hellwig ioend->io_type = type; 5380e51a8e1SChristoph Hellwig ioend->io_inode = inode; 5390e51a8e1SChristoph Hellwig ioend->io_size = 0; 5400e51a8e1SChristoph Hellwig ioend->io_offset = offset; 5410e51a8e1SChristoph Hellwig INIT_WORK(&ioend->io_work, xfs_end_io); 5420e51a8e1SChristoph Hellwig ioend->io_append_trans = NULL; 5430e51a8e1SChristoph Hellwig ioend->io_bio = bio; 5440e51a8e1SChristoph Hellwig return ioend; 5450e51a8e1SChristoph Hellwig } 5460e51a8e1SChristoph Hellwig 5470e51a8e1SChristoph Hellwig /* 5480e51a8e1SChristoph Hellwig * Allocate a new bio, and chain the old bio to the new one. 5490e51a8e1SChristoph Hellwig * 5500e51a8e1SChristoph Hellwig * Note that we have to do perform the chaining in this unintuitive order 5510e51a8e1SChristoph Hellwig * so that the bi_private linkage is set up in the right direction for the 5520e51a8e1SChristoph Hellwig * traversal in xfs_destroy_ioend(). 5530e51a8e1SChristoph Hellwig */ 5540e51a8e1SChristoph Hellwig static void 5550e51a8e1SChristoph Hellwig xfs_chain_bio( 5560e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 5570e51a8e1SChristoph Hellwig struct writeback_control *wbc, 5580e51a8e1SChristoph Hellwig struct buffer_head *bh) 5590e51a8e1SChristoph Hellwig { 5600e51a8e1SChristoph Hellwig struct bio *new; 5610e51a8e1SChristoph Hellwig 5620e51a8e1SChristoph Hellwig new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); 5630e51a8e1SChristoph Hellwig xfs_init_bio_from_bh(new, bh); 5640e51a8e1SChristoph Hellwig 5650e51a8e1SChristoph Hellwig bio_chain(ioend->io_bio, new); 5660e51a8e1SChristoph Hellwig bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ 5677637241eSJens Axboe ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 5684e49ea4aSMike Christie submit_bio(ioend->io_bio); 5690e51a8e1SChristoph Hellwig ioend->io_bio = new; 5700e51a8e1SChristoph Hellwig } 5710e51a8e1SChristoph Hellwig 572c59d87c4SChristoph Hellwig /* 573c59d87c4SChristoph Hellwig * Test to see if we've been building up a completion structure for 574c59d87c4SChristoph Hellwig * earlier buffers -- if so, we try to append to this ioend if we 575c59d87c4SChristoph Hellwig * can, otherwise we finish off any current ioend and start another. 576e10de372SDave Chinner * Return the ioend we finished off so that the caller can submit it 577e10de372SDave Chinner * once it has finished processing the dirty page. 578c59d87c4SChristoph Hellwig */ 579c59d87c4SChristoph Hellwig STATIC void 580c59d87c4SChristoph Hellwig xfs_add_to_ioend( 581c59d87c4SChristoph Hellwig struct inode *inode, 582c59d87c4SChristoph Hellwig struct buffer_head *bh, 583c59d87c4SChristoph Hellwig xfs_off_t offset, 584e10de372SDave Chinner struct xfs_writepage_ctx *wpc, 585bb18782aSDave Chinner struct writeback_control *wbc, 586e10de372SDave Chinner struct list_head *iolist) 587c59d87c4SChristoph Hellwig { 588fbcc0256SDave Chinner if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type || 5890df61da8SDarrick J. Wong bh->b_blocknr != wpc->last_block + 1 || 5900df61da8SDarrick J. Wong offset != wpc->ioend->io_offset + wpc->ioend->io_size) { 591e10de372SDave Chinner if (wpc->ioend) 592e10de372SDave Chinner list_add(&wpc->ioend->io_list, iolist); 5930e51a8e1SChristoph Hellwig wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh); 594c59d87c4SChristoph Hellwig } 595c59d87c4SChristoph Hellwig 5960e51a8e1SChristoph Hellwig /* 5970e51a8e1SChristoph Hellwig * If the buffer doesn't fit into the bio we need to allocate a new 5980e51a8e1SChristoph Hellwig * one. This shouldn't happen more than once for a given buffer. 5990e51a8e1SChristoph Hellwig */ 6000e51a8e1SChristoph Hellwig while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size) 6010e51a8e1SChristoph Hellwig xfs_chain_bio(wpc->ioend, wbc, bh); 602bb18782aSDave Chinner 603fbcc0256SDave Chinner wpc->ioend->io_size += bh->b_size; 604fbcc0256SDave Chinner wpc->last_block = bh->b_blocknr; 605e10de372SDave Chinner xfs_start_buffer_writeback(bh); 606c59d87c4SChristoph Hellwig } 607c59d87c4SChristoph Hellwig 608c59d87c4SChristoph Hellwig STATIC void 609c59d87c4SChristoph Hellwig xfs_map_buffer( 610c59d87c4SChristoph Hellwig struct inode *inode, 611c59d87c4SChristoph Hellwig struct buffer_head *bh, 612c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 613c59d87c4SChristoph Hellwig xfs_off_t offset) 614c59d87c4SChristoph Hellwig { 615c59d87c4SChristoph Hellwig sector_t bn; 616c59d87c4SChristoph Hellwig struct xfs_mount *m = XFS_I(inode)->i_mount; 617c59d87c4SChristoph Hellwig xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); 618c59d87c4SChristoph Hellwig xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); 619c59d87c4SChristoph Hellwig 620c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 621c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 622c59d87c4SChristoph Hellwig 623c59d87c4SChristoph Hellwig bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + 624c59d87c4SChristoph Hellwig ((offset - iomap_offset) >> inode->i_blkbits); 625c59d87c4SChristoph Hellwig 626c59d87c4SChristoph Hellwig ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); 627c59d87c4SChristoph Hellwig 628c59d87c4SChristoph Hellwig bh->b_blocknr = bn; 629c59d87c4SChristoph Hellwig set_buffer_mapped(bh); 630c59d87c4SChristoph Hellwig } 631c59d87c4SChristoph Hellwig 632c59d87c4SChristoph Hellwig STATIC void 633c59d87c4SChristoph Hellwig xfs_map_at_offset( 634c59d87c4SChristoph Hellwig struct inode *inode, 635c59d87c4SChristoph Hellwig struct buffer_head *bh, 636c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 637c59d87c4SChristoph Hellwig xfs_off_t offset) 638c59d87c4SChristoph Hellwig { 639c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 640c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 641c59d87c4SChristoph Hellwig 642c59d87c4SChristoph Hellwig xfs_map_buffer(inode, bh, imap, offset); 643c59d87c4SChristoph Hellwig set_buffer_mapped(bh); 644c59d87c4SChristoph Hellwig clear_buffer_delay(bh); 645c59d87c4SChristoph Hellwig clear_buffer_unwritten(bh); 646c59d87c4SChristoph Hellwig } 647c59d87c4SChristoph Hellwig 648c59d87c4SChristoph Hellwig /* 649a49935f2SDave Chinner * Test if a given page contains at least one buffer of a given @type. 650a49935f2SDave Chinner * If @check_all_buffers is true, then we walk all the buffers in the page to 651a49935f2SDave Chinner * try to find one of the type passed in. If it is not set, then the caller only 652a49935f2SDave Chinner * needs to check the first buffer on the page for a match. 653c59d87c4SChristoph Hellwig */ 654a49935f2SDave Chinner STATIC bool 6556ffc4db5SDave Chinner xfs_check_page_type( 656c59d87c4SChristoph Hellwig struct page *page, 657a49935f2SDave Chinner unsigned int type, 658a49935f2SDave Chinner bool check_all_buffers) 659c59d87c4SChristoph Hellwig { 660a49935f2SDave Chinner struct buffer_head *bh; 661a49935f2SDave Chinner struct buffer_head *head; 662c59d87c4SChristoph Hellwig 663a49935f2SDave Chinner if (PageWriteback(page)) 664a49935f2SDave Chinner return false; 665a49935f2SDave Chinner if (!page->mapping) 666a49935f2SDave Chinner return false; 667a49935f2SDave Chinner if (!page_has_buffers(page)) 668a49935f2SDave Chinner return false; 669c59d87c4SChristoph Hellwig 670c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 671c59d87c4SChristoph Hellwig do { 672a49935f2SDave Chinner if (buffer_unwritten(bh)) { 673a49935f2SDave Chinner if (type == XFS_IO_UNWRITTEN) 674a49935f2SDave Chinner return true; 675a49935f2SDave Chinner } else if (buffer_delay(bh)) { 676805eeb8eSDan Carpenter if (type == XFS_IO_DELALLOC) 677a49935f2SDave Chinner return true; 678a49935f2SDave Chinner } else if (buffer_dirty(bh) && buffer_mapped(bh)) { 679805eeb8eSDan Carpenter if (type == XFS_IO_OVERWRITE) 680a49935f2SDave Chinner return true; 681a49935f2SDave Chinner } 682a49935f2SDave Chinner 683a49935f2SDave Chinner /* If we are only checking the first buffer, we are done now. */ 684a49935f2SDave Chinner if (!check_all_buffers) 685c59d87c4SChristoph Hellwig break; 686c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 687c59d87c4SChristoph Hellwig 688a49935f2SDave Chinner return false; 689c59d87c4SChristoph Hellwig } 690c59d87c4SChristoph Hellwig 691c59d87c4SChristoph Hellwig STATIC void 692c59d87c4SChristoph Hellwig xfs_vm_invalidatepage( 693c59d87c4SChristoph Hellwig struct page *page, 694d47992f8SLukas Czerner unsigned int offset, 695d47992f8SLukas Czerner unsigned int length) 696c59d87c4SChristoph Hellwig { 69734097dfeSLukas Czerner trace_xfs_invalidatepage(page->mapping->host, page, offset, 69834097dfeSLukas Czerner length); 69934097dfeSLukas Czerner block_invalidatepage(page, offset, length); 700c59d87c4SChristoph Hellwig } 701c59d87c4SChristoph Hellwig 702c59d87c4SChristoph Hellwig /* 703c59d87c4SChristoph Hellwig * If the page has delalloc buffers on it, we need to punch them out before we 704c59d87c4SChristoph Hellwig * invalidate the page. If we don't, we leave a stale delalloc mapping on the 705c59d87c4SChristoph Hellwig * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read 706c59d87c4SChristoph Hellwig * is done on that same region - the delalloc extent is returned when none is 707c59d87c4SChristoph Hellwig * supposed to be there. 708c59d87c4SChristoph Hellwig * 709c59d87c4SChristoph Hellwig * We prevent this by truncating away the delalloc regions on the page before 710c59d87c4SChristoph Hellwig * invalidating it. Because they are delalloc, we can do this without needing a 711c59d87c4SChristoph Hellwig * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this 712c59d87c4SChristoph Hellwig * truncation without a transaction as there is no space left for block 713c59d87c4SChristoph Hellwig * reservation (typically why we see a ENOSPC in writeback). 714c59d87c4SChristoph Hellwig * 715c59d87c4SChristoph Hellwig * This is not a performance critical path, so for now just do the punching a 716c59d87c4SChristoph Hellwig * buffer head at a time. 717c59d87c4SChristoph Hellwig */ 718c59d87c4SChristoph Hellwig STATIC void 719c59d87c4SChristoph Hellwig xfs_aops_discard_page( 720c59d87c4SChristoph Hellwig struct page *page) 721c59d87c4SChristoph Hellwig { 722c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 723c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 724c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 725c59d87c4SChristoph Hellwig loff_t offset = page_offset(page); 726c59d87c4SChristoph Hellwig 727a49935f2SDave Chinner if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true)) 728c59d87c4SChristoph Hellwig goto out_invalidate; 729c59d87c4SChristoph Hellwig 730c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 731c59d87c4SChristoph Hellwig goto out_invalidate; 732c59d87c4SChristoph Hellwig 733c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 734c59d87c4SChristoph Hellwig "page discard on page %p, inode 0x%llx, offset %llu.", 735c59d87c4SChristoph Hellwig page, ip->i_ino, offset); 736c59d87c4SChristoph Hellwig 737c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 738c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 739c59d87c4SChristoph Hellwig do { 740c59d87c4SChristoph Hellwig int error; 741c59d87c4SChristoph Hellwig xfs_fileoff_t start_fsb; 742c59d87c4SChristoph Hellwig 743c59d87c4SChristoph Hellwig if (!buffer_delay(bh)) 744c59d87c4SChristoph Hellwig goto next_buffer; 745c59d87c4SChristoph Hellwig 746c59d87c4SChristoph Hellwig start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 747c59d87c4SChristoph Hellwig error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); 748c59d87c4SChristoph Hellwig if (error) { 749c59d87c4SChristoph Hellwig /* something screwed, just bail */ 750c59d87c4SChristoph Hellwig if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 751c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 752c59d87c4SChristoph Hellwig "page discard unable to remove delalloc mapping."); 753c59d87c4SChristoph Hellwig } 754c59d87c4SChristoph Hellwig break; 755c59d87c4SChristoph Hellwig } 756c59d87c4SChristoph Hellwig next_buffer: 75793407472SFabian Frederick offset += i_blocksize(inode); 758c59d87c4SChristoph Hellwig 759c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 760c59d87c4SChristoph Hellwig 761c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 762c59d87c4SChristoph Hellwig out_invalidate: 76309cbfeafSKirill A. Shutemov xfs_vm_invalidatepage(page, 0, PAGE_SIZE); 764c59d87c4SChristoph Hellwig return; 765c59d87c4SChristoph Hellwig } 766c59d87c4SChristoph Hellwig 767ef473667SDarrick J. Wong static int 768ef473667SDarrick J. Wong xfs_map_cow( 769ef473667SDarrick J. Wong struct xfs_writepage_ctx *wpc, 770ef473667SDarrick J. Wong struct inode *inode, 771ef473667SDarrick J. Wong loff_t offset, 772ef473667SDarrick J. Wong unsigned int *new_type) 773ef473667SDarrick J. Wong { 774ef473667SDarrick J. Wong struct xfs_inode *ip = XFS_I(inode); 775ef473667SDarrick J. Wong struct xfs_bmbt_irec imap; 776092d5d9dSChristoph Hellwig bool is_cow = false; 777ef473667SDarrick J. Wong int error; 778ef473667SDarrick J. Wong 779ef473667SDarrick J. Wong /* 780ef473667SDarrick J. Wong * If we already have a valid COW mapping keep using it. 781ef473667SDarrick J. Wong */ 782ef473667SDarrick J. Wong if (wpc->io_type == XFS_IO_COW) { 783ef473667SDarrick J. Wong wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset); 784ef473667SDarrick J. Wong if (wpc->imap_valid) { 785ef473667SDarrick J. Wong *new_type = XFS_IO_COW; 786ef473667SDarrick J. Wong return 0; 787ef473667SDarrick J. Wong } 788ef473667SDarrick J. Wong } 789ef473667SDarrick J. Wong 790ef473667SDarrick J. Wong /* 791ef473667SDarrick J. Wong * Else we need to check if there is a COW mapping at this offset. 792ef473667SDarrick J. Wong */ 793ef473667SDarrick J. Wong xfs_ilock(ip, XFS_ILOCK_SHARED); 794092d5d9dSChristoph Hellwig is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap); 795ef473667SDarrick J. Wong xfs_iunlock(ip, XFS_ILOCK_SHARED); 796ef473667SDarrick J. Wong 797ef473667SDarrick J. Wong if (!is_cow) 798ef473667SDarrick J. Wong return 0; 799ef473667SDarrick J. Wong 800ef473667SDarrick J. Wong /* 801ef473667SDarrick J. Wong * And if the COW mapping has a delayed extent here we need to 802ef473667SDarrick J. Wong * allocate real space for it now. 803ef473667SDarrick J. Wong */ 804092d5d9dSChristoph Hellwig if (isnullstartblock(imap.br_startblock)) { 805ef473667SDarrick J. Wong error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset, 806ef473667SDarrick J. Wong &imap); 807ef473667SDarrick J. Wong if (error) 808ef473667SDarrick J. Wong return error; 809ef473667SDarrick J. Wong } 810ef473667SDarrick J. Wong 811ef473667SDarrick J. Wong wpc->io_type = *new_type = XFS_IO_COW; 812ef473667SDarrick J. Wong wpc->imap_valid = true; 813ef473667SDarrick J. Wong wpc->imap = imap; 814ef473667SDarrick J. Wong return 0; 815ef473667SDarrick J. Wong } 816ef473667SDarrick J. Wong 817c59d87c4SChristoph Hellwig /* 818e10de372SDave Chinner * We implement an immediate ioend submission policy here to avoid needing to 819e10de372SDave Chinner * chain multiple ioends and hence nest mempool allocations which can violate 820e10de372SDave Chinner * forward progress guarantees we need to provide. The current ioend we are 821e10de372SDave Chinner * adding buffers to is cached on the writepage context, and if the new buffer 822e10de372SDave Chinner * does not append to the cached ioend it will create a new ioend and cache that 823e10de372SDave Chinner * instead. 824e10de372SDave Chinner * 825e10de372SDave Chinner * If a new ioend is created and cached, the old ioend is returned and queued 826e10de372SDave Chinner * locally for submission once the entire page is processed or an error has been 827e10de372SDave Chinner * detected. While ioends are submitted immediately after they are completed, 828e10de372SDave Chinner * batching optimisations are provided by higher level block plugging. 829e10de372SDave Chinner * 830e10de372SDave Chinner * At the end of a writeback pass, there will be a cached ioend remaining on the 831e10de372SDave Chinner * writepage context that the caller will need to submit. 832e10de372SDave Chinner */ 833bfce7d2eSDave Chinner static int 834bfce7d2eSDave Chinner xfs_writepage_map( 835bfce7d2eSDave Chinner struct xfs_writepage_ctx *wpc, 836e10de372SDave Chinner struct writeback_control *wbc, 837bfce7d2eSDave Chinner struct inode *inode, 838bfce7d2eSDave Chinner struct page *page, 839bfce7d2eSDave Chinner loff_t offset, 840bfce7d2eSDave Chinner __uint64_t end_offset) 841bfce7d2eSDave Chinner { 842e10de372SDave Chinner LIST_HEAD(submit_list); 843e10de372SDave Chinner struct xfs_ioend *ioend, *next; 844bfce7d2eSDave Chinner struct buffer_head *bh, *head; 84593407472SFabian Frederick ssize_t len = i_blocksize(inode); 846bfce7d2eSDave Chinner int error = 0; 847bfce7d2eSDave Chinner int count = 0; 848e10de372SDave Chinner int uptodate = 1; 849ef473667SDarrick J. Wong unsigned int new_type; 850bfce7d2eSDave Chinner 851bfce7d2eSDave Chinner bh = head = page_buffers(page); 852bfce7d2eSDave Chinner offset = page_offset(page); 853bfce7d2eSDave Chinner do { 854bfce7d2eSDave Chinner if (offset >= end_offset) 855bfce7d2eSDave Chinner break; 856bfce7d2eSDave Chinner if (!buffer_uptodate(bh)) 857bfce7d2eSDave Chinner uptodate = 0; 858bfce7d2eSDave Chinner 859bfce7d2eSDave Chinner /* 860bfce7d2eSDave Chinner * set_page_dirty dirties all buffers in a page, independent 861bfce7d2eSDave Chinner * of their state. The dirty state however is entirely 862bfce7d2eSDave Chinner * meaningless for holes (!mapped && uptodate), so skip 863bfce7d2eSDave Chinner * buffers covering holes here. 864bfce7d2eSDave Chinner */ 865bfce7d2eSDave Chinner if (!buffer_mapped(bh) && buffer_uptodate(bh)) { 866bfce7d2eSDave Chinner wpc->imap_valid = false; 867bfce7d2eSDave Chinner continue; 868bfce7d2eSDave Chinner } 869bfce7d2eSDave Chinner 870ef473667SDarrick J. Wong if (buffer_unwritten(bh)) 871ef473667SDarrick J. Wong new_type = XFS_IO_UNWRITTEN; 872ef473667SDarrick J. Wong else if (buffer_delay(bh)) 873ef473667SDarrick J. Wong new_type = XFS_IO_DELALLOC; 874ef473667SDarrick J. Wong else if (buffer_uptodate(bh)) 875ef473667SDarrick J. Wong new_type = XFS_IO_OVERWRITE; 876ef473667SDarrick J. Wong else { 877bfce7d2eSDave Chinner if (PageUptodate(page)) 878bfce7d2eSDave Chinner ASSERT(buffer_mapped(bh)); 879bfce7d2eSDave Chinner /* 880bfce7d2eSDave Chinner * This buffer is not uptodate and will not be 881bfce7d2eSDave Chinner * written to disk. Ensure that we will put any 882bfce7d2eSDave Chinner * subsequent writeable buffers into a new 883bfce7d2eSDave Chinner * ioend. 884bfce7d2eSDave Chinner */ 885bfce7d2eSDave Chinner wpc->imap_valid = false; 886bfce7d2eSDave Chinner continue; 887bfce7d2eSDave Chinner } 888bfce7d2eSDave Chinner 889ef473667SDarrick J. Wong if (xfs_is_reflink_inode(XFS_I(inode))) { 890ef473667SDarrick J. Wong error = xfs_map_cow(wpc, inode, offset, &new_type); 891ef473667SDarrick J. Wong if (error) 892ef473667SDarrick J. Wong goto out; 893ef473667SDarrick J. Wong } 894ef473667SDarrick J. Wong 895ef473667SDarrick J. Wong if (wpc->io_type != new_type) { 896ef473667SDarrick J. Wong wpc->io_type = new_type; 897ef473667SDarrick J. Wong wpc->imap_valid = false; 898ef473667SDarrick J. Wong } 899ef473667SDarrick J. Wong 900bfce7d2eSDave Chinner if (wpc->imap_valid) 901bfce7d2eSDave Chinner wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, 902bfce7d2eSDave Chinner offset); 903bfce7d2eSDave Chinner if (!wpc->imap_valid) { 904bfce7d2eSDave Chinner error = xfs_map_blocks(inode, offset, &wpc->imap, 905bfce7d2eSDave Chinner wpc->io_type); 906bfce7d2eSDave Chinner if (error) 907e10de372SDave Chinner goto out; 908bfce7d2eSDave Chinner wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, 909bfce7d2eSDave Chinner offset); 910bfce7d2eSDave Chinner } 911bfce7d2eSDave Chinner if (wpc->imap_valid) { 912bfce7d2eSDave Chinner lock_buffer(bh); 913bfce7d2eSDave Chinner if (wpc->io_type != XFS_IO_OVERWRITE) 914bfce7d2eSDave Chinner xfs_map_at_offset(inode, bh, &wpc->imap, offset); 915bb18782aSDave Chinner xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list); 916bfce7d2eSDave Chinner count++; 917bfce7d2eSDave Chinner } 918bfce7d2eSDave Chinner 919bfce7d2eSDave Chinner } while (offset += len, ((bh = bh->b_this_page) != head)); 920bfce7d2eSDave Chinner 921bfce7d2eSDave Chinner if (uptodate && bh == head) 922bfce7d2eSDave Chinner SetPageUptodate(page); 923bfce7d2eSDave Chinner 924e10de372SDave Chinner ASSERT(wpc->ioend || list_empty(&submit_list)); 925bfce7d2eSDave Chinner 926e10de372SDave Chinner out: 927bfce7d2eSDave Chinner /* 928e10de372SDave Chinner * On error, we have to fail the ioend here because we have locked 929e10de372SDave Chinner * buffers in the ioend. If we don't do this, we'll deadlock 930e10de372SDave Chinner * invalidating the page as that tries to lock the buffers on the page. 931e10de372SDave Chinner * Also, because we may have set pages under writeback, we have to make 932e10de372SDave Chinner * sure we run IO completion to mark the error state of the IO 933e10de372SDave Chinner * appropriately, so we can't cancel the ioend directly here. That means 934e10de372SDave Chinner * we have to mark this page as under writeback if we included any 935e10de372SDave Chinner * buffers from it in the ioend chain so that completion treats it 936e10de372SDave Chinner * correctly. 937bfce7d2eSDave Chinner * 938e10de372SDave Chinner * If we didn't include the page in the ioend, the on error we can 939e10de372SDave Chinner * simply discard and unlock it as there are no other users of the page 940e10de372SDave Chinner * or it's buffers right now. The caller will still need to trigger 941e10de372SDave Chinner * submission of outstanding ioends on the writepage context so they are 942e10de372SDave Chinner * treated correctly on error. 943bfce7d2eSDave Chinner */ 944e10de372SDave Chinner if (count) { 945e10de372SDave Chinner xfs_start_page_writeback(page, !error); 946e10de372SDave Chinner 947e10de372SDave Chinner /* 948e10de372SDave Chinner * Preserve the original error if there was one, otherwise catch 949e10de372SDave Chinner * submission errors here and propagate into subsequent ioend 950e10de372SDave Chinner * submissions. 951e10de372SDave Chinner */ 952e10de372SDave Chinner list_for_each_entry_safe(ioend, next, &submit_list, io_list) { 953e10de372SDave Chinner int error2; 954e10de372SDave Chinner 955e10de372SDave Chinner list_del_init(&ioend->io_list); 956e10de372SDave Chinner error2 = xfs_submit_ioend(wbc, ioend, error); 957e10de372SDave Chinner if (error2 && !error) 958e10de372SDave Chinner error = error2; 959e10de372SDave Chinner } 960e10de372SDave Chinner } else if (error) { 961bfce7d2eSDave Chinner xfs_aops_discard_page(page); 962bfce7d2eSDave Chinner ClearPageUptodate(page); 963bfce7d2eSDave Chinner unlock_page(page); 964e10de372SDave Chinner } else { 965e10de372SDave Chinner /* 966e10de372SDave Chinner * We can end up here with no error and nothing to write if we 967e10de372SDave Chinner * race with a partial page truncate on a sub-page block sized 968e10de372SDave Chinner * filesystem. In that case we need to mark the page clean. 969e10de372SDave Chinner */ 970e10de372SDave Chinner xfs_start_page_writeback(page, 1); 971e10de372SDave Chinner end_page_writeback(page); 972bfce7d2eSDave Chinner } 973e10de372SDave Chinner 974bfce7d2eSDave Chinner mapping_set_error(page->mapping, error); 975bfce7d2eSDave Chinner return error; 976bfce7d2eSDave Chinner } 977bfce7d2eSDave Chinner 978c59d87c4SChristoph Hellwig /* 979c59d87c4SChristoph Hellwig * Write out a dirty page. 980c59d87c4SChristoph Hellwig * 981c59d87c4SChristoph Hellwig * For delalloc space on the page we need to allocate space and flush it. 982c59d87c4SChristoph Hellwig * For unwritten space on the page we need to start the conversion to 983c59d87c4SChristoph Hellwig * regular allocated space. 984c59d87c4SChristoph Hellwig * For any other dirty buffer heads on the page we should flush them. 985c59d87c4SChristoph Hellwig */ 986c59d87c4SChristoph Hellwig STATIC int 987fbcc0256SDave Chinner xfs_do_writepage( 988c59d87c4SChristoph Hellwig struct page *page, 989fbcc0256SDave Chinner struct writeback_control *wbc, 990fbcc0256SDave Chinner void *data) 991c59d87c4SChristoph Hellwig { 992fbcc0256SDave Chinner struct xfs_writepage_ctx *wpc = data; 993c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 994c59d87c4SChristoph Hellwig loff_t offset; 995c59d87c4SChristoph Hellwig __uint64_t end_offset; 996ad68972aSDave Chinner pgoff_t end_index; 997c59d87c4SChristoph Hellwig 99834097dfeSLukas Czerner trace_xfs_writepage(inode, page, 0, 0); 999c59d87c4SChristoph Hellwig 1000c59d87c4SChristoph Hellwig ASSERT(page_has_buffers(page)); 1001c59d87c4SChristoph Hellwig 1002c59d87c4SChristoph Hellwig /* 1003c59d87c4SChristoph Hellwig * Refuse to write the page out if we are called from reclaim context. 1004c59d87c4SChristoph Hellwig * 1005c59d87c4SChristoph Hellwig * This avoids stack overflows when called from deeply used stacks in 1006c59d87c4SChristoph Hellwig * random callers for direct reclaim or memcg reclaim. We explicitly 1007c59d87c4SChristoph Hellwig * allow reclaim from kswapd as the stack usage there is relatively low. 1008c59d87c4SChristoph Hellwig * 100994054fa3SMel Gorman * This should never happen except in the case of a VM regression so 101094054fa3SMel Gorman * warn about it. 1011c59d87c4SChristoph Hellwig */ 101294054fa3SMel Gorman if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == 101394054fa3SMel Gorman PF_MEMALLOC)) 1014c59d87c4SChristoph Hellwig goto redirty; 1015c59d87c4SChristoph Hellwig 1016c59d87c4SChristoph Hellwig /* 1017c59d87c4SChristoph Hellwig * Given that we do not allow direct reclaim to call us, we should 1018c59d87c4SChristoph Hellwig * never be called while in a filesystem transaction. 1019c59d87c4SChristoph Hellwig */ 10209070733bSMichal Hocko if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS)) 1021c59d87c4SChristoph Hellwig goto redirty; 1022c59d87c4SChristoph Hellwig 10238695d27eSJie Liu /* 1024ad68972aSDave Chinner * Is this page beyond the end of the file? 1025ad68972aSDave Chinner * 10268695d27eSJie Liu * The page index is less than the end_index, adjust the end_offset 10278695d27eSJie Liu * to the highest offset that this page should represent. 10288695d27eSJie Liu * ----------------------------------------------------- 10298695d27eSJie Liu * | file mapping | <EOF> | 10308695d27eSJie Liu * ----------------------------------------------------- 10318695d27eSJie Liu * | Page ... | Page N-2 | Page N-1 | Page N | | 10328695d27eSJie Liu * ^--------------------------------^----------|-------- 10338695d27eSJie Liu * | desired writeback range | see else | 10348695d27eSJie Liu * ---------------------------------^------------------| 10358695d27eSJie Liu */ 1036ad68972aSDave Chinner offset = i_size_read(inode); 103709cbfeafSKirill A. Shutemov end_index = offset >> PAGE_SHIFT; 10388695d27eSJie Liu if (page->index < end_index) 103909cbfeafSKirill A. Shutemov end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT; 10408695d27eSJie Liu else { 10418695d27eSJie Liu /* 10428695d27eSJie Liu * Check whether the page to write out is beyond or straddles 10438695d27eSJie Liu * i_size or not. 10448695d27eSJie Liu * ------------------------------------------------------- 10458695d27eSJie Liu * | file mapping | <EOF> | 10468695d27eSJie Liu * ------------------------------------------------------- 10478695d27eSJie Liu * | Page ... | Page N-2 | Page N-1 | Page N | Beyond | 10488695d27eSJie Liu * ^--------------------------------^-----------|--------- 10498695d27eSJie Liu * | | Straddles | 10508695d27eSJie Liu * ---------------------------------^-----------|--------| 10518695d27eSJie Liu */ 105209cbfeafSKirill A. Shutemov unsigned offset_into_page = offset & (PAGE_SIZE - 1); 10536b7a03f0SChristoph Hellwig 10546b7a03f0SChristoph Hellwig /* 1055ff9a28f6SJan Kara * Skip the page if it is fully outside i_size, e.g. due to a 1056ff9a28f6SJan Kara * truncate operation that is in progress. We must redirty the 1057ff9a28f6SJan Kara * page so that reclaim stops reclaiming it. Otherwise 1058ff9a28f6SJan Kara * xfs_vm_releasepage() is called on it and gets confused. 10598695d27eSJie Liu * 10608695d27eSJie Liu * Note that the end_index is unsigned long, it would overflow 10618695d27eSJie Liu * if the given offset is greater than 16TB on 32-bit system 10628695d27eSJie Liu * and if we do check the page is fully outside i_size or not 10638695d27eSJie Liu * via "if (page->index >= end_index + 1)" as "end_index + 1" 10648695d27eSJie Liu * will be evaluated to 0. Hence this page will be redirtied 10658695d27eSJie Liu * and be written out repeatedly which would result in an 10668695d27eSJie Liu * infinite loop, the user program that perform this operation 10678695d27eSJie Liu * will hang. Instead, we can verify this situation by checking 10688695d27eSJie Liu * if the page to write is totally beyond the i_size or if it's 10698695d27eSJie Liu * offset is just equal to the EOF. 10706b7a03f0SChristoph Hellwig */ 10718695d27eSJie Liu if (page->index > end_index || 10728695d27eSJie Liu (page->index == end_index && offset_into_page == 0)) 1073ff9a28f6SJan Kara goto redirty; 10746b7a03f0SChristoph Hellwig 10756b7a03f0SChristoph Hellwig /* 10766b7a03f0SChristoph Hellwig * The page straddles i_size. It must be zeroed out on each 10776b7a03f0SChristoph Hellwig * and every writepage invocation because it may be mmapped. 10786b7a03f0SChristoph Hellwig * "A file is mapped in multiples of the page size. For a file 10796b7a03f0SChristoph Hellwig * that is not a multiple of the page size, the remaining 10806b7a03f0SChristoph Hellwig * memory is zeroed when mapped, and writes to that region are 10816b7a03f0SChristoph Hellwig * not written out to the file." 10826b7a03f0SChristoph Hellwig */ 108309cbfeafSKirill A. Shutemov zero_user_segment(page, offset_into_page, PAGE_SIZE); 10848695d27eSJie Liu 10858695d27eSJie Liu /* Adjust the end_offset to the end of file */ 10868695d27eSJie Liu end_offset = offset; 1087c59d87c4SChristoph Hellwig } 1088c59d87c4SChristoph Hellwig 1089e10de372SDave Chinner return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset); 1090c59d87c4SChristoph Hellwig 1091c59d87c4SChristoph Hellwig redirty: 1092c59d87c4SChristoph Hellwig redirty_page_for_writepage(wbc, page); 1093c59d87c4SChristoph Hellwig unlock_page(page); 1094c59d87c4SChristoph Hellwig return 0; 1095c59d87c4SChristoph Hellwig } 1096c59d87c4SChristoph Hellwig 1097c59d87c4SChristoph Hellwig STATIC int 1098fbcc0256SDave Chinner xfs_vm_writepage( 1099fbcc0256SDave Chinner struct page *page, 1100fbcc0256SDave Chinner struct writeback_control *wbc) 1101fbcc0256SDave Chinner { 1102fbcc0256SDave Chinner struct xfs_writepage_ctx wpc = { 1103fbcc0256SDave Chinner .io_type = XFS_IO_INVALID, 1104fbcc0256SDave Chinner }; 1105fbcc0256SDave Chinner int ret; 1106fbcc0256SDave Chinner 1107fbcc0256SDave Chinner ret = xfs_do_writepage(page, wbc, &wpc); 1108e10de372SDave Chinner if (wpc.ioend) 1109e10de372SDave Chinner ret = xfs_submit_ioend(wbc, wpc.ioend, ret); 1110e10de372SDave Chinner return ret; 1111fbcc0256SDave Chinner } 1112fbcc0256SDave Chinner 1113fbcc0256SDave Chinner STATIC int 1114c59d87c4SChristoph Hellwig xfs_vm_writepages( 1115c59d87c4SChristoph Hellwig struct address_space *mapping, 1116c59d87c4SChristoph Hellwig struct writeback_control *wbc) 1117c59d87c4SChristoph Hellwig { 1118fbcc0256SDave Chinner struct xfs_writepage_ctx wpc = { 1119fbcc0256SDave Chinner .io_type = XFS_IO_INVALID, 1120fbcc0256SDave Chinner }; 1121fbcc0256SDave Chinner int ret; 1122fbcc0256SDave Chinner 1123c59d87c4SChristoph Hellwig xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 11247f6d5b52SRoss Zwisler if (dax_mapping(mapping)) 11257f6d5b52SRoss Zwisler return dax_writeback_mapping_range(mapping, 11267f6d5b52SRoss Zwisler xfs_find_bdev_for_inode(mapping->host), wbc); 11277f6d5b52SRoss Zwisler 1128fbcc0256SDave Chinner ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc); 1129e10de372SDave Chinner if (wpc.ioend) 1130e10de372SDave Chinner ret = xfs_submit_ioend(wbc, wpc.ioend, ret); 1131e10de372SDave Chinner return ret; 1132c59d87c4SChristoph Hellwig } 1133c59d87c4SChristoph Hellwig 1134c59d87c4SChristoph Hellwig /* 1135c59d87c4SChristoph Hellwig * Called to move a page into cleanable state - and from there 1136c59d87c4SChristoph Hellwig * to be released. The page should already be clean. We always 1137c59d87c4SChristoph Hellwig * have buffer heads in this call. 1138c59d87c4SChristoph Hellwig * 1139c59d87c4SChristoph Hellwig * Returns 1 if the page is ok to release, 0 otherwise. 1140c59d87c4SChristoph Hellwig */ 1141c59d87c4SChristoph Hellwig STATIC int 1142c59d87c4SChristoph Hellwig xfs_vm_releasepage( 1143c59d87c4SChristoph Hellwig struct page *page, 1144c59d87c4SChristoph Hellwig gfp_t gfp_mask) 1145c59d87c4SChristoph Hellwig { 1146c59d87c4SChristoph Hellwig int delalloc, unwritten; 1147c59d87c4SChristoph Hellwig 114834097dfeSLukas Czerner trace_xfs_releasepage(page->mapping->host, page, 0, 0); 1149c59d87c4SChristoph Hellwig 115099579cceSBrian Foster /* 115199579cceSBrian Foster * mm accommodates an old ext3 case where clean pages might not have had 115299579cceSBrian Foster * the dirty bit cleared. Thus, it can send actual dirty pages to 115399579cceSBrian Foster * ->releasepage() via shrink_active_list(). Conversely, 115499579cceSBrian Foster * block_invalidatepage() can send pages that are still marked dirty 115599579cceSBrian Foster * but otherwise have invalidated buffers. 115699579cceSBrian Foster * 11570a417b8dSJan Kara * We want to release the latter to avoid unnecessary buildup of the 11580a417b8dSJan Kara * LRU, skip the former and warn if we've left any lingering 11590a417b8dSJan Kara * delalloc/unwritten buffers on clean pages. Skip pages with delalloc 11600a417b8dSJan Kara * or unwritten buffers and warn if the page is not dirty. Otherwise 11610a417b8dSJan Kara * try to release the buffers. 116299579cceSBrian Foster */ 1163c59d87c4SChristoph Hellwig xfs_count_page_state(page, &delalloc, &unwritten); 1164c59d87c4SChristoph Hellwig 11650a417b8dSJan Kara if (delalloc) { 11660a417b8dSJan Kara WARN_ON_ONCE(!PageDirty(page)); 1167c59d87c4SChristoph Hellwig return 0; 11680a417b8dSJan Kara } 11690a417b8dSJan Kara if (unwritten) { 11700a417b8dSJan Kara WARN_ON_ONCE(!PageDirty(page)); 1171c59d87c4SChristoph Hellwig return 0; 11720a417b8dSJan Kara } 1173c59d87c4SChristoph Hellwig 1174c59d87c4SChristoph Hellwig return try_to_free_buffers(page); 1175c59d87c4SChristoph Hellwig } 1176c59d87c4SChristoph Hellwig 1177a719370bSDave Chinner /* 11781fdca9c2SDave Chinner * If this is O_DIRECT or the mpage code calling tell them how large the mapping 11791fdca9c2SDave Chinner * is, so that we can avoid repeated get_blocks calls. 11801fdca9c2SDave Chinner * 11811fdca9c2SDave Chinner * If the mapping spans EOF, then we have to break the mapping up as the mapping 11821fdca9c2SDave Chinner * for blocks beyond EOF must be marked new so that sub block regions can be 11831fdca9c2SDave Chinner * correctly zeroed. We can't do this for mappings within EOF unless the mapping 11841fdca9c2SDave Chinner * was just allocated or is unwritten, otherwise the callers would overwrite 11851fdca9c2SDave Chinner * existing data with zeros. Hence we have to split the mapping into a range up 11861fdca9c2SDave Chinner * to and including EOF, and a second mapping for beyond EOF. 11871fdca9c2SDave Chinner */ 11881fdca9c2SDave Chinner static void 11891fdca9c2SDave Chinner xfs_map_trim_size( 11901fdca9c2SDave Chinner struct inode *inode, 11911fdca9c2SDave Chinner sector_t iblock, 11921fdca9c2SDave Chinner struct buffer_head *bh_result, 11931fdca9c2SDave Chinner struct xfs_bmbt_irec *imap, 11941fdca9c2SDave Chinner xfs_off_t offset, 11951fdca9c2SDave Chinner ssize_t size) 11961fdca9c2SDave Chinner { 11971fdca9c2SDave Chinner xfs_off_t mapping_size; 11981fdca9c2SDave Chinner 11991fdca9c2SDave Chinner mapping_size = imap->br_startoff + imap->br_blockcount - iblock; 12001fdca9c2SDave Chinner mapping_size <<= inode->i_blkbits; 12011fdca9c2SDave Chinner 12021fdca9c2SDave Chinner ASSERT(mapping_size > 0); 12031fdca9c2SDave Chinner if (mapping_size > size) 12041fdca9c2SDave Chinner mapping_size = size; 12051fdca9c2SDave Chinner if (offset < i_size_read(inode) && 12061fdca9c2SDave Chinner offset + mapping_size >= i_size_read(inode)) { 12071fdca9c2SDave Chinner /* limit mapping to block that spans EOF */ 12081fdca9c2SDave Chinner mapping_size = roundup_64(i_size_read(inode) - offset, 120993407472SFabian Frederick i_blocksize(inode)); 12101fdca9c2SDave Chinner } 12111fdca9c2SDave Chinner if (mapping_size > LONG_MAX) 12121fdca9c2SDave Chinner mapping_size = LONG_MAX; 12131fdca9c2SDave Chinner 12141fdca9c2SDave Chinner bh_result->b_size = mapping_size; 12151fdca9c2SDave Chinner } 12161fdca9c2SDave Chinner 12170613f16cSDarrick J. Wong static int 1218acdda3aaSChristoph Hellwig xfs_get_blocks( 1219c59d87c4SChristoph Hellwig struct inode *inode, 1220c59d87c4SChristoph Hellwig sector_t iblock, 1221c59d87c4SChristoph Hellwig struct buffer_head *bh_result, 1222acdda3aaSChristoph Hellwig int create) 1223c59d87c4SChristoph Hellwig { 1224c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1225c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1226c59d87c4SChristoph Hellwig xfs_fileoff_t offset_fsb, end_fsb; 1227c59d87c4SChristoph Hellwig int error = 0; 1228c59d87c4SChristoph Hellwig int lockmode = 0; 1229c59d87c4SChristoph Hellwig struct xfs_bmbt_irec imap; 1230c59d87c4SChristoph Hellwig int nimaps = 1; 1231c59d87c4SChristoph Hellwig xfs_off_t offset; 1232c59d87c4SChristoph Hellwig ssize_t size; 1233c59d87c4SChristoph Hellwig 1234acdda3aaSChristoph Hellwig BUG_ON(create); 12356e8a27a8SChristoph Hellwig 1236c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 1237b474c7aeSEric Sandeen return -EIO; 1238c59d87c4SChristoph Hellwig 1239c59d87c4SChristoph Hellwig offset = (xfs_off_t)iblock << inode->i_blkbits; 124093407472SFabian Frederick ASSERT(bh_result->b_size >= i_blocksize(inode)); 1241c59d87c4SChristoph Hellwig size = bh_result->b_size; 1242c59d87c4SChristoph Hellwig 1243acdda3aaSChristoph Hellwig if (offset >= i_size_read(inode)) 1244c59d87c4SChristoph Hellwig return 0; 1245c59d87c4SChristoph Hellwig 1246507630b2SDave Chinner /* 1247507630b2SDave Chinner * Direct I/O is usually done on preallocated files, so try getting 12486e8a27a8SChristoph Hellwig * a block mapping without an exclusive lock first. 1249507630b2SDave Chinner */ 1250309ecac8SChristoph Hellwig lockmode = xfs_ilock_data_map_shared(ip); 1251c59d87c4SChristoph Hellwig 1252d2c28191SDave Chinner ASSERT(offset <= mp->m_super->s_maxbytes); 1253d2c28191SDave Chinner if (offset + size > mp->m_super->s_maxbytes) 1254d2c28191SDave Chinner size = mp->m_super->s_maxbytes - offset; 1255c59d87c4SChristoph Hellwig end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); 1256c59d87c4SChristoph Hellwig offset_fsb = XFS_B_TO_FSBT(mp, offset); 1257c59d87c4SChristoph Hellwig 12585c8ed202SDave Chinner error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 12595c8ed202SDave Chinner &imap, &nimaps, XFS_BMAPI_ENTIRE); 1260c59d87c4SChristoph Hellwig if (error) 1261c59d87c4SChristoph Hellwig goto out_unlock; 1262c59d87c4SChristoph Hellwig 1263acdda3aaSChristoph Hellwig if (nimaps) { 1264d5cc2e3fSDave Chinner trace_xfs_get_blocks_found(ip, offset, size, 126563fbb4c1SChristoph Hellwig imap.br_state == XFS_EXT_UNWRITTEN ? 126663fbb4c1SChristoph Hellwig XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, &imap); 1267507630b2SDave Chinner xfs_iunlock(ip, lockmode); 1268c59d87c4SChristoph Hellwig } else { 1269c59d87c4SChristoph Hellwig trace_xfs_get_blocks_notfound(ip, offset, size); 1270c59d87c4SChristoph Hellwig goto out_unlock; 1271c59d87c4SChristoph Hellwig } 1272c59d87c4SChristoph Hellwig 12731fdca9c2SDave Chinner /* trim mapping down to size requested */ 12746e8a27a8SChristoph Hellwig xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); 12751fdca9c2SDave Chinner 1276c59d87c4SChristoph Hellwig /* 1277a719370bSDave Chinner * For unwritten extents do not report a disk address in the buffered 1278a719370bSDave Chinner * read case (treat as if we're reading into a hole). 1279c59d87c4SChristoph Hellwig */ 12809c4f29d3SChristoph Hellwig if (xfs_bmap_is_real_extent(&imap)) 1281c59d87c4SChristoph Hellwig xfs_map_buffer(inode, bh_result, &imap, offset); 1282c59d87c4SChristoph Hellwig 1283c59d87c4SChristoph Hellwig /* 1284c59d87c4SChristoph Hellwig * If this is a realtime file, data may be on a different device. 1285c59d87c4SChristoph Hellwig * to that pointed to from the buffer_head b_bdev currently. 1286c59d87c4SChristoph Hellwig */ 1287c59d87c4SChristoph Hellwig bh_result->b_bdev = xfs_find_bdev_for_inode(inode); 1288c59d87c4SChristoph Hellwig return 0; 1289c59d87c4SChristoph Hellwig 1290c59d87c4SChristoph Hellwig out_unlock: 1291c59d87c4SChristoph Hellwig xfs_iunlock(ip, lockmode); 12922451337dSDave Chinner return error; 1293c59d87c4SChristoph Hellwig } 1294c59d87c4SChristoph Hellwig 1295c59d87c4SChristoph Hellwig STATIC ssize_t 1296c59d87c4SChristoph Hellwig xfs_vm_direct_IO( 1297c59d87c4SChristoph Hellwig struct kiocb *iocb, 1298c8b8e32dSChristoph Hellwig struct iov_iter *iter) 1299c59d87c4SChristoph Hellwig { 1300c59d87c4SChristoph Hellwig /* 1301fa8d972dSChristoph Hellwig * We just need the method present so that open/fcntl allow direct I/O. 1302c59d87c4SChristoph Hellwig */ 1303fa8d972dSChristoph Hellwig return -EINVAL; 1304c59d87c4SChristoph Hellwig } 1305c59d87c4SChristoph Hellwig 1306c59d87c4SChristoph Hellwig STATIC sector_t 1307c59d87c4SChristoph Hellwig xfs_vm_bmap( 1308c59d87c4SChristoph Hellwig struct address_space *mapping, 1309c59d87c4SChristoph Hellwig sector_t block) 1310c59d87c4SChristoph Hellwig { 1311c59d87c4SChristoph Hellwig struct inode *inode = (struct inode *)mapping->host; 1312c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1313c59d87c4SChristoph Hellwig 1314c59d87c4SChristoph Hellwig trace_xfs_vm_bmap(XFS_I(inode)); 1315db1327b1SDarrick J. Wong 1316db1327b1SDarrick J. Wong /* 1317db1327b1SDarrick J. Wong * The swap code (ab-)uses ->bmap to get a block mapping and then 1318db1327b1SDarrick J. Wong * bypasseѕ the file system for actual I/O. We really can't allow 1319db1327b1SDarrick J. Wong * that on reflinks inodes, so we have to skip out here. And yes, 1320db1327b1SDarrick J. Wong * 0 is the magic code for a bmap error.. 1321db1327b1SDarrick J. Wong */ 132265523218SChristoph Hellwig if (xfs_is_reflink_inode(ip)) 1323db1327b1SDarrick J. Wong return 0; 132465523218SChristoph Hellwig 13254bc1ea6bSDave Chinner filemap_write_and_wait(mapping); 1326c59d87c4SChristoph Hellwig return generic_block_bmap(mapping, block, xfs_get_blocks); 1327c59d87c4SChristoph Hellwig } 1328c59d87c4SChristoph Hellwig 1329c59d87c4SChristoph Hellwig STATIC int 1330c59d87c4SChristoph Hellwig xfs_vm_readpage( 1331c59d87c4SChristoph Hellwig struct file *unused, 1332c59d87c4SChristoph Hellwig struct page *page) 1333c59d87c4SChristoph Hellwig { 1334121e213eSDave Chinner trace_xfs_vm_readpage(page->mapping->host, 1); 1335c59d87c4SChristoph Hellwig return mpage_readpage(page, xfs_get_blocks); 1336c59d87c4SChristoph Hellwig } 1337c59d87c4SChristoph Hellwig 1338c59d87c4SChristoph Hellwig STATIC int 1339c59d87c4SChristoph Hellwig xfs_vm_readpages( 1340c59d87c4SChristoph Hellwig struct file *unused, 1341c59d87c4SChristoph Hellwig struct address_space *mapping, 1342c59d87c4SChristoph Hellwig struct list_head *pages, 1343c59d87c4SChristoph Hellwig unsigned nr_pages) 1344c59d87c4SChristoph Hellwig { 1345121e213eSDave Chinner trace_xfs_vm_readpages(mapping->host, nr_pages); 1346c59d87c4SChristoph Hellwig return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1347c59d87c4SChristoph Hellwig } 1348c59d87c4SChristoph Hellwig 134922e757a4SDave Chinner /* 135022e757a4SDave Chinner * This is basically a copy of __set_page_dirty_buffers() with one 135122e757a4SDave Chinner * small tweak: buffers beyond EOF do not get marked dirty. If we mark them 135222e757a4SDave Chinner * dirty, we'll never be able to clean them because we don't write buffers 135322e757a4SDave Chinner * beyond EOF, and that means we can't invalidate pages that span EOF 135422e757a4SDave Chinner * that have been marked dirty. Further, the dirty state can leak into 135522e757a4SDave Chinner * the file interior if the file is extended, resulting in all sorts of 135622e757a4SDave Chinner * bad things happening as the state does not match the underlying data. 135722e757a4SDave Chinner * 135822e757a4SDave Chinner * XXX: this really indicates that bufferheads in XFS need to die. Warts like 135922e757a4SDave Chinner * this only exist because of bufferheads and how the generic code manages them. 136022e757a4SDave Chinner */ 136122e757a4SDave Chinner STATIC int 136222e757a4SDave Chinner xfs_vm_set_page_dirty( 136322e757a4SDave Chinner struct page *page) 136422e757a4SDave Chinner { 136522e757a4SDave Chinner struct address_space *mapping = page->mapping; 136622e757a4SDave Chinner struct inode *inode = mapping->host; 136722e757a4SDave Chinner loff_t end_offset; 136822e757a4SDave Chinner loff_t offset; 136922e757a4SDave Chinner int newly_dirty; 137022e757a4SDave Chinner 137122e757a4SDave Chinner if (unlikely(!mapping)) 137222e757a4SDave Chinner return !TestSetPageDirty(page); 137322e757a4SDave Chinner 137422e757a4SDave Chinner end_offset = i_size_read(inode); 137522e757a4SDave Chinner offset = page_offset(page); 137622e757a4SDave Chinner 137722e757a4SDave Chinner spin_lock(&mapping->private_lock); 137822e757a4SDave Chinner if (page_has_buffers(page)) { 137922e757a4SDave Chinner struct buffer_head *head = page_buffers(page); 138022e757a4SDave Chinner struct buffer_head *bh = head; 138122e757a4SDave Chinner 138222e757a4SDave Chinner do { 138322e757a4SDave Chinner if (offset < end_offset) 138422e757a4SDave Chinner set_buffer_dirty(bh); 138522e757a4SDave Chinner bh = bh->b_this_page; 138693407472SFabian Frederick offset += i_blocksize(inode); 138722e757a4SDave Chinner } while (bh != head); 138822e757a4SDave Chinner } 1389c4843a75SGreg Thelen /* 139081f8c3a4SJohannes Weiner * Lock out page->mem_cgroup migration to keep PageDirty 139181f8c3a4SJohannes Weiner * synchronized with per-memcg dirty page counters. 1392c4843a75SGreg Thelen */ 139362cccb8cSJohannes Weiner lock_page_memcg(page); 139422e757a4SDave Chinner newly_dirty = !TestSetPageDirty(page); 139522e757a4SDave Chinner spin_unlock(&mapping->private_lock); 139622e757a4SDave Chinner 139722e757a4SDave Chinner if (newly_dirty) { 139822e757a4SDave Chinner /* sigh - __set_page_dirty() is static, so copy it here, too */ 139922e757a4SDave Chinner unsigned long flags; 140022e757a4SDave Chinner 140122e757a4SDave Chinner spin_lock_irqsave(&mapping->tree_lock, flags); 140222e757a4SDave Chinner if (page->mapping) { /* Race with truncate? */ 140322e757a4SDave Chinner WARN_ON_ONCE(!PageUptodate(page)); 140462cccb8cSJohannes Weiner account_page_dirtied(page, mapping); 140522e757a4SDave Chinner radix_tree_tag_set(&mapping->page_tree, 140622e757a4SDave Chinner page_index(page), PAGECACHE_TAG_DIRTY); 140722e757a4SDave Chinner } 140822e757a4SDave Chinner spin_unlock_irqrestore(&mapping->tree_lock, flags); 140922e757a4SDave Chinner } 141062cccb8cSJohannes Weiner unlock_page_memcg(page); 1411c4843a75SGreg Thelen if (newly_dirty) 1412c4843a75SGreg Thelen __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 141322e757a4SDave Chinner return newly_dirty; 141422e757a4SDave Chinner } 141522e757a4SDave Chinner 1416c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = { 1417c59d87c4SChristoph Hellwig .readpage = xfs_vm_readpage, 1418c59d87c4SChristoph Hellwig .readpages = xfs_vm_readpages, 1419c59d87c4SChristoph Hellwig .writepage = xfs_vm_writepage, 1420c59d87c4SChristoph Hellwig .writepages = xfs_vm_writepages, 142122e757a4SDave Chinner .set_page_dirty = xfs_vm_set_page_dirty, 1422c59d87c4SChristoph Hellwig .releasepage = xfs_vm_releasepage, 1423c59d87c4SChristoph Hellwig .invalidatepage = xfs_vm_invalidatepage, 1424c59d87c4SChristoph Hellwig .bmap = xfs_vm_bmap, 1425c59d87c4SChristoph Hellwig .direct_IO = xfs_vm_direct_IO, 1426c59d87c4SChristoph Hellwig .migratepage = buffer_migrate_page, 1427c59d87c4SChristoph Hellwig .is_partially_uptodate = block_is_partially_uptodate, 1428c59d87c4SChristoph Hellwig .error_remove_page = generic_error_remove_page, 1429c59d87c4SChristoph Hellwig }; 1430