1c59d87c4SChristoph Hellwig /* 2c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3c59d87c4SChristoph Hellwig * All Rights Reserved. 4c59d87c4SChristoph Hellwig * 5c59d87c4SChristoph Hellwig * This program is free software; you can redistribute it and/or 6c59d87c4SChristoph Hellwig * modify it under the terms of the GNU General Public License as 7c59d87c4SChristoph Hellwig * published by the Free Software Foundation. 8c59d87c4SChristoph Hellwig * 9c59d87c4SChristoph Hellwig * This program is distributed in the hope that it would be useful, 10c59d87c4SChristoph Hellwig * but WITHOUT ANY WARRANTY; without even the implied warranty of 11c59d87c4SChristoph Hellwig * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12c59d87c4SChristoph Hellwig * GNU General Public License for more details. 13c59d87c4SChristoph Hellwig * 14c59d87c4SChristoph Hellwig * You should have received a copy of the GNU General Public License 15c59d87c4SChristoph Hellwig * along with this program; if not, write the Free Software Foundation, 16c59d87c4SChristoph Hellwig * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17c59d87c4SChristoph Hellwig */ 18c59d87c4SChristoph Hellwig #include "xfs.h" 1970a9883cSDave Chinner #include "xfs_shared.h" 20239880efSDave Chinner #include "xfs_format.h" 21239880efSDave Chinner #include "xfs_log_format.h" 22239880efSDave Chinner #include "xfs_trans_resv.h" 23c59d87c4SChristoph Hellwig #include "xfs_mount.h" 24c59d87c4SChristoph Hellwig #include "xfs_inode.h" 25239880efSDave Chinner #include "xfs_trans.h" 26281627dfSChristoph Hellwig #include "xfs_inode_item.h" 27c59d87c4SChristoph Hellwig #include "xfs_alloc.h" 28c59d87c4SChristoph Hellwig #include "xfs_error.h" 29c59d87c4SChristoph Hellwig #include "xfs_iomap.h" 30c59d87c4SChristoph Hellwig #include "xfs_trace.h" 31c59d87c4SChristoph Hellwig #include "xfs_bmap.h" 3268988114SDave Chinner #include "xfs_bmap_util.h" 33a4fbe6abSDave Chinner #include "xfs_bmap_btree.h" 34ef473667SDarrick J. Wong #include "xfs_reflink.h" 35c59d87c4SChristoph Hellwig #include <linux/gfp.h> 36c59d87c4SChristoph Hellwig #include <linux/mpage.h> 37c59d87c4SChristoph Hellwig #include <linux/pagevec.h> 38c59d87c4SChristoph Hellwig #include <linux/writeback.h> 39c59d87c4SChristoph Hellwig 40273dda76SChristoph Hellwig /* flags for direct write completions */ 41273dda76SChristoph Hellwig #define XFS_DIO_FLAG_UNWRITTEN (1 << 0) 42273dda76SChristoph Hellwig #define XFS_DIO_FLAG_APPEND (1 << 1) 43273dda76SChristoph Hellwig 44fbcc0256SDave Chinner /* 45fbcc0256SDave Chinner * structure owned by writepages passed to individual writepage calls 46fbcc0256SDave Chinner */ 47fbcc0256SDave Chinner struct xfs_writepage_ctx { 48fbcc0256SDave Chinner struct xfs_bmbt_irec imap; 49fbcc0256SDave Chinner bool imap_valid; 50fbcc0256SDave Chinner unsigned int io_type; 51fbcc0256SDave Chinner struct xfs_ioend *ioend; 52fbcc0256SDave Chinner sector_t last_block; 53fbcc0256SDave Chinner }; 54fbcc0256SDave Chinner 55c59d87c4SChristoph Hellwig void 56c59d87c4SChristoph Hellwig xfs_count_page_state( 57c59d87c4SChristoph Hellwig struct page *page, 58c59d87c4SChristoph Hellwig int *delalloc, 59c59d87c4SChristoph Hellwig int *unwritten) 60c59d87c4SChristoph Hellwig { 61c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 62c59d87c4SChristoph Hellwig 63c59d87c4SChristoph Hellwig *delalloc = *unwritten = 0; 64c59d87c4SChristoph Hellwig 65c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 66c59d87c4SChristoph Hellwig do { 67c59d87c4SChristoph Hellwig if (buffer_unwritten(bh)) 68c59d87c4SChristoph Hellwig (*unwritten) = 1; 69c59d87c4SChristoph Hellwig else if (buffer_delay(bh)) 70c59d87c4SChristoph Hellwig (*delalloc) = 1; 71c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 72c59d87c4SChristoph Hellwig } 73c59d87c4SChristoph Hellwig 7420a90f58SRoss Zwisler struct block_device * 75c59d87c4SChristoph Hellwig xfs_find_bdev_for_inode( 76c59d87c4SChristoph Hellwig struct inode *inode) 77c59d87c4SChristoph Hellwig { 78c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 79c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 80c59d87c4SChristoph Hellwig 81c59d87c4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip)) 82c59d87c4SChristoph Hellwig return mp->m_rtdev_targp->bt_bdev; 83c59d87c4SChristoph Hellwig else 84c59d87c4SChristoph Hellwig return mp->m_ddev_targp->bt_bdev; 85c59d87c4SChristoph Hellwig } 86c59d87c4SChristoph Hellwig 87c59d87c4SChristoph Hellwig /* 8837992c18SDave Chinner * We're now finished for good with this page. Update the page state via the 8937992c18SDave Chinner * associated buffer_heads, paying attention to the start and end offsets that 9037992c18SDave Chinner * we need to process on the page. 9128b783e4SDave Chinner * 9228b783e4SDave Chinner * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last 9328b783e4SDave Chinner * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or 9428b783e4SDave Chinner * the page at all, as we may be racing with memory reclaim and it can free both 9528b783e4SDave Chinner * the bufferhead chain and the page as it will see the page as clean and 9628b783e4SDave Chinner * unused. 9737992c18SDave Chinner */ 9837992c18SDave Chinner static void 9937992c18SDave Chinner xfs_finish_page_writeback( 10037992c18SDave Chinner struct inode *inode, 10137992c18SDave Chinner struct bio_vec *bvec, 10237992c18SDave Chinner int error) 10337992c18SDave Chinner { 10437992c18SDave Chinner unsigned int end = bvec->bv_offset + bvec->bv_len - 1; 10528b783e4SDave Chinner struct buffer_head *head, *bh, *next; 10637992c18SDave Chinner unsigned int off = 0; 10728b783e4SDave Chinner unsigned int bsize; 10837992c18SDave Chinner 10937992c18SDave Chinner ASSERT(bvec->bv_offset < PAGE_SIZE); 110690a7871SChristoph Hellwig ASSERT((bvec->bv_offset & ((1 << inode->i_blkbits) - 1)) == 0); 11137992c18SDave Chinner ASSERT(end < PAGE_SIZE); 112690a7871SChristoph Hellwig ASSERT((bvec->bv_len & ((1 << inode->i_blkbits) - 1)) == 0); 11337992c18SDave Chinner 11437992c18SDave Chinner bh = head = page_buffers(bvec->bv_page); 11537992c18SDave Chinner 11628b783e4SDave Chinner bsize = bh->b_size; 11737992c18SDave Chinner do { 11828b783e4SDave Chinner next = bh->b_this_page; 11937992c18SDave Chinner if (off < bvec->bv_offset) 12037992c18SDave Chinner goto next_bh; 12137992c18SDave Chinner if (off > end) 12237992c18SDave Chinner break; 12337992c18SDave Chinner bh->b_end_io(bh, !error); 12437992c18SDave Chinner next_bh: 12528b783e4SDave Chinner off += bsize; 12628b783e4SDave Chinner } while ((bh = next) != head); 12737992c18SDave Chinner } 12837992c18SDave Chinner 12937992c18SDave Chinner /* 13037992c18SDave Chinner * We're now finished for good with this ioend structure. Update the page 13137992c18SDave Chinner * state, release holds on bios, and finally free up memory. Do not use the 13237992c18SDave Chinner * ioend after this. 133c59d87c4SChristoph Hellwig */ 134c59d87c4SChristoph Hellwig STATIC void 135c59d87c4SChristoph Hellwig xfs_destroy_ioend( 1360e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 1370e51a8e1SChristoph Hellwig int error) 138c59d87c4SChristoph Hellwig { 13937992c18SDave Chinner struct inode *inode = ioend->io_inode; 1400e51a8e1SChristoph Hellwig struct bio *last = ioend->io_bio; 14137992c18SDave Chinner struct bio *bio, *next; 142c59d87c4SChristoph Hellwig 1430e51a8e1SChristoph Hellwig for (bio = &ioend->io_inline_bio; bio; bio = next) { 14437992c18SDave Chinner struct bio_vec *bvec; 14537992c18SDave Chinner int i; 14637992c18SDave Chinner 1470e51a8e1SChristoph Hellwig /* 1480e51a8e1SChristoph Hellwig * For the last bio, bi_private points to the ioend, so we 1490e51a8e1SChristoph Hellwig * need to explicitly end the iteration here. 1500e51a8e1SChristoph Hellwig */ 1510e51a8e1SChristoph Hellwig if (bio == last) 1520e51a8e1SChristoph Hellwig next = NULL; 1530e51a8e1SChristoph Hellwig else 15437992c18SDave Chinner next = bio->bi_private; 15537992c18SDave Chinner 15637992c18SDave Chinner /* walk each page on bio, ending page IO on them */ 15737992c18SDave Chinner bio_for_each_segment_all(bvec, bio, i) 15837992c18SDave Chinner xfs_finish_page_writeback(inode, bvec, error); 15937992c18SDave Chinner 16037992c18SDave Chinner bio_put(bio); 161c59d87c4SChristoph Hellwig } 162c59d87c4SChristoph Hellwig } 163c59d87c4SChristoph Hellwig 164c59d87c4SChristoph Hellwig /* 165fc0063c4SChristoph Hellwig * Fast and loose check if this write could update the on-disk inode size. 166fc0063c4SChristoph Hellwig */ 167fc0063c4SChristoph Hellwig static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) 168fc0063c4SChristoph Hellwig { 169fc0063c4SChristoph Hellwig return ioend->io_offset + ioend->io_size > 170fc0063c4SChristoph Hellwig XFS_I(ioend->io_inode)->i_d.di_size; 171fc0063c4SChristoph Hellwig } 172fc0063c4SChristoph Hellwig 173281627dfSChristoph Hellwig STATIC int 174281627dfSChristoph Hellwig xfs_setfilesize_trans_alloc( 175281627dfSChristoph Hellwig struct xfs_ioend *ioend) 176281627dfSChristoph Hellwig { 177281627dfSChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 178281627dfSChristoph Hellwig struct xfs_trans *tp; 179281627dfSChristoph Hellwig int error; 180281627dfSChristoph Hellwig 181253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 182253f4911SChristoph Hellwig if (error) 183281627dfSChristoph Hellwig return error; 184281627dfSChristoph Hellwig 185281627dfSChristoph Hellwig ioend->io_append_trans = tp; 186281627dfSChristoph Hellwig 187281627dfSChristoph Hellwig /* 188437a255aSDave Chinner * We may pass freeze protection with a transaction. So tell lockdep 189d9457dc0SJan Kara * we released it. 190d9457dc0SJan Kara */ 191bee9182dSOleg Nesterov __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); 192d9457dc0SJan Kara /* 193281627dfSChristoph Hellwig * We hand off the transaction to the completion thread now, so 194281627dfSChristoph Hellwig * clear the flag here. 195281627dfSChristoph Hellwig */ 196281627dfSChristoph Hellwig current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 197281627dfSChristoph Hellwig return 0; 198281627dfSChristoph Hellwig } 199281627dfSChristoph Hellwig 200fc0063c4SChristoph Hellwig /* 2012813d682SChristoph Hellwig * Update on-disk file size now that data has been written to disk. 202c59d87c4SChristoph Hellwig */ 203281627dfSChristoph Hellwig STATIC int 204e372843aSChristoph Hellwig __xfs_setfilesize( 2052ba66237SChristoph Hellwig struct xfs_inode *ip, 2062ba66237SChristoph Hellwig struct xfs_trans *tp, 2072ba66237SChristoph Hellwig xfs_off_t offset, 2082ba66237SChristoph Hellwig size_t size) 209c59d87c4SChristoph Hellwig { 210c59d87c4SChristoph Hellwig xfs_fsize_t isize; 211c59d87c4SChristoph Hellwig 212aa6bf01dSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 2132ba66237SChristoph Hellwig isize = xfs_new_eof(ip, offset + size); 214281627dfSChristoph Hellwig if (!isize) { 215281627dfSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 2164906e215SChristoph Hellwig xfs_trans_cancel(tp); 217281627dfSChristoph Hellwig return 0; 218c59d87c4SChristoph Hellwig } 219c59d87c4SChristoph Hellwig 2202ba66237SChristoph Hellwig trace_xfs_setfilesize(ip, offset, size); 221281627dfSChristoph Hellwig 222281627dfSChristoph Hellwig ip->i_d.di_size = isize; 223281627dfSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 224281627dfSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 225281627dfSChristoph Hellwig 22670393313SChristoph Hellwig return xfs_trans_commit(tp); 227c59d87c4SChristoph Hellwig } 228c59d87c4SChristoph Hellwig 229e372843aSChristoph Hellwig int 230e372843aSChristoph Hellwig xfs_setfilesize( 231e372843aSChristoph Hellwig struct xfs_inode *ip, 232e372843aSChristoph Hellwig xfs_off_t offset, 233e372843aSChristoph Hellwig size_t size) 234e372843aSChristoph Hellwig { 235e372843aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 236e372843aSChristoph Hellwig struct xfs_trans *tp; 237e372843aSChristoph Hellwig int error; 238e372843aSChristoph Hellwig 239e372843aSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 240e372843aSChristoph Hellwig if (error) 241e372843aSChristoph Hellwig return error; 242e372843aSChristoph Hellwig 243e372843aSChristoph Hellwig return __xfs_setfilesize(ip, tp, offset, size); 244e372843aSChristoph Hellwig } 245e372843aSChristoph Hellwig 2462ba66237SChristoph Hellwig STATIC int 2472ba66237SChristoph Hellwig xfs_setfilesize_ioend( 2480e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 2490e51a8e1SChristoph Hellwig int error) 2502ba66237SChristoph Hellwig { 2512ba66237SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 2522ba66237SChristoph Hellwig struct xfs_trans *tp = ioend->io_append_trans; 2532ba66237SChristoph Hellwig 2542ba66237SChristoph Hellwig /* 2552ba66237SChristoph Hellwig * The transaction may have been allocated in the I/O submission thread, 2562ba66237SChristoph Hellwig * thus we need to mark ourselves as being in a transaction manually. 2572ba66237SChristoph Hellwig * Similarly for freeze protection. 2582ba66237SChristoph Hellwig */ 2592ba66237SChristoph Hellwig current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); 260bee9182dSOleg Nesterov __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); 2612ba66237SChristoph Hellwig 2625cb13dcdSZhaohongjiang /* we abort the update if there was an IO error */ 2630e51a8e1SChristoph Hellwig if (error) { 2645cb13dcdSZhaohongjiang xfs_trans_cancel(tp); 2650e51a8e1SChristoph Hellwig return error; 2665cb13dcdSZhaohongjiang } 2675cb13dcdSZhaohongjiang 268e372843aSChristoph Hellwig return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); 2692ba66237SChristoph Hellwig } 2702ba66237SChristoph Hellwig 271c59d87c4SChristoph Hellwig /* 272c59d87c4SChristoph Hellwig * IO write completion. 273c59d87c4SChristoph Hellwig */ 274c59d87c4SChristoph Hellwig STATIC void 275c59d87c4SChristoph Hellwig xfs_end_io( 276c59d87c4SChristoph Hellwig struct work_struct *work) 277c59d87c4SChristoph Hellwig { 2780e51a8e1SChristoph Hellwig struct xfs_ioend *ioend = 2790e51a8e1SChristoph Hellwig container_of(work, struct xfs_ioend, io_work); 280c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 2810e51a8e1SChristoph Hellwig int error = ioend->io_bio->bi_error; 282c59d87c4SChristoph Hellwig 283af055e37SBrian Foster /* 284af055e37SBrian Foster * Set an error if the mount has shut down and proceed with end I/O 285af055e37SBrian Foster * processing so it can perform whatever cleanups are necessary. 286af055e37SBrian Foster */ 287af055e37SBrian Foster if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 2880e51a8e1SChristoph Hellwig error = -EIO; 28904f658eeSChristoph Hellwig 290c59d87c4SChristoph Hellwig /* 291*43caeb18SDarrick J. Wong * For a CoW extent, we need to move the mapping from the CoW fork 292*43caeb18SDarrick J. Wong * to the data fork. If instead an error happened, just dump the 293*43caeb18SDarrick J. Wong * new blocks. 294*43caeb18SDarrick J. Wong */ 295*43caeb18SDarrick J. Wong if (ioend->io_type == XFS_IO_COW) { 296*43caeb18SDarrick J. Wong if (error) 297*43caeb18SDarrick J. Wong goto done; 298*43caeb18SDarrick J. Wong if (ioend->io_bio->bi_error) { 299*43caeb18SDarrick J. Wong error = xfs_reflink_cancel_cow_range(ip, 300*43caeb18SDarrick J. Wong ioend->io_offset, ioend->io_size); 301*43caeb18SDarrick J. Wong goto done; 302*43caeb18SDarrick J. Wong } 303*43caeb18SDarrick J. Wong error = xfs_reflink_end_cow(ip, ioend->io_offset, 304*43caeb18SDarrick J. Wong ioend->io_size); 305*43caeb18SDarrick J. Wong if (error) 306*43caeb18SDarrick J. Wong goto done; 307*43caeb18SDarrick J. Wong } 308*43caeb18SDarrick J. Wong 309*43caeb18SDarrick J. Wong /* 310c59d87c4SChristoph Hellwig * For unwritten extents we need to issue transactions to convert a 311c59d87c4SChristoph Hellwig * range to normal written extens after the data I/O has finished. 3125cb13dcdSZhaohongjiang * Detecting and handling completion IO errors is done individually 3135cb13dcdSZhaohongjiang * for each case as different cleanup operations need to be performed 3145cb13dcdSZhaohongjiang * on error. 315c59d87c4SChristoph Hellwig */ 3160d882a36SAlain Renaud if (ioend->io_type == XFS_IO_UNWRITTEN) { 3170e51a8e1SChristoph Hellwig if (error) 3185cb13dcdSZhaohongjiang goto done; 319c59d87c4SChristoph Hellwig error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 320c59d87c4SChristoph Hellwig ioend->io_size); 321281627dfSChristoph Hellwig } else if (ioend->io_append_trans) { 3220e51a8e1SChristoph Hellwig error = xfs_setfilesize_ioend(ioend, error); 32384803fb7SChristoph Hellwig } else { 324*43caeb18SDarrick J. Wong ASSERT(!xfs_ioend_is_append(ioend) || 325*43caeb18SDarrick J. Wong ioend->io_type == XFS_IO_COW); 32684803fb7SChristoph Hellwig } 32784803fb7SChristoph Hellwig 32804f658eeSChristoph Hellwig done: 3290e51a8e1SChristoph Hellwig xfs_destroy_ioend(ioend, error); 330c59d87c4SChristoph Hellwig } 331c59d87c4SChristoph Hellwig 3320e51a8e1SChristoph Hellwig STATIC void 3330e51a8e1SChristoph Hellwig xfs_end_bio( 3340e51a8e1SChristoph Hellwig struct bio *bio) 335c59d87c4SChristoph Hellwig { 3360e51a8e1SChristoph Hellwig struct xfs_ioend *ioend = bio->bi_private; 3370e51a8e1SChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 338c59d87c4SChristoph Hellwig 339*43caeb18SDarrick J. Wong if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW) 3400e51a8e1SChristoph Hellwig queue_work(mp->m_unwritten_workqueue, &ioend->io_work); 3410e51a8e1SChristoph Hellwig else if (ioend->io_append_trans) 3420e51a8e1SChristoph Hellwig queue_work(mp->m_data_workqueue, &ioend->io_work); 3430e51a8e1SChristoph Hellwig else 3440e51a8e1SChristoph Hellwig xfs_destroy_ioend(ioend, bio->bi_error); 345c59d87c4SChristoph Hellwig } 346c59d87c4SChristoph Hellwig 347c59d87c4SChristoph Hellwig STATIC int 348c59d87c4SChristoph Hellwig xfs_map_blocks( 349c59d87c4SChristoph Hellwig struct inode *inode, 350c59d87c4SChristoph Hellwig loff_t offset, 351c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 352988ef927SDave Chinner int type) 353c59d87c4SChristoph Hellwig { 354c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 355c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 356c59d87c4SChristoph Hellwig ssize_t count = 1 << inode->i_blkbits; 357c59d87c4SChristoph Hellwig xfs_fileoff_t offset_fsb, end_fsb; 358c59d87c4SChristoph Hellwig int error = 0; 359c59d87c4SChristoph Hellwig int bmapi_flags = XFS_BMAPI_ENTIRE; 360c59d87c4SChristoph Hellwig int nimaps = 1; 361c59d87c4SChristoph Hellwig 362c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 363b474c7aeSEric Sandeen return -EIO; 364c59d87c4SChristoph Hellwig 365ef473667SDarrick J. Wong ASSERT(type != XFS_IO_COW); 3660d882a36SAlain Renaud if (type == XFS_IO_UNWRITTEN) 367c59d87c4SChristoph Hellwig bmapi_flags |= XFS_BMAPI_IGSTATE; 368c59d87c4SChristoph Hellwig 369c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 370c59d87c4SChristoph Hellwig ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 371c59d87c4SChristoph Hellwig (ip->i_df.if_flags & XFS_IFEXTENTS)); 372d2c28191SDave Chinner ASSERT(offset <= mp->m_super->s_maxbytes); 373c59d87c4SChristoph Hellwig 374d2c28191SDave Chinner if (offset + count > mp->m_super->s_maxbytes) 375d2c28191SDave Chinner count = mp->m_super->s_maxbytes - offset; 376c59d87c4SChristoph Hellwig end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 377c59d87c4SChristoph Hellwig offset_fsb = XFS_B_TO_FSBT(mp, offset); 3785c8ed202SDave Chinner error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 3795c8ed202SDave Chinner imap, &nimaps, bmapi_flags); 380ef473667SDarrick J. Wong /* 381ef473667SDarrick J. Wong * Truncate an overwrite extent if there's a pending CoW 382ef473667SDarrick J. Wong * reservation before the end of this extent. This forces us 383ef473667SDarrick J. Wong * to come back to writepage to take care of the CoW. 384ef473667SDarrick J. Wong */ 385ef473667SDarrick J. Wong if (nimaps && type == XFS_IO_OVERWRITE) 386ef473667SDarrick J. Wong xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap); 387c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 388c59d87c4SChristoph Hellwig 389c59d87c4SChristoph Hellwig if (error) 3902451337dSDave Chinner return error; 391c59d87c4SChristoph Hellwig 3920d882a36SAlain Renaud if (type == XFS_IO_DELALLOC && 393c59d87c4SChristoph Hellwig (!nimaps || isnullstartblock(imap->br_startblock))) { 39460b4984fSDarrick J. Wong error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset, 39560b4984fSDarrick J. Wong imap); 396c59d87c4SChristoph Hellwig if (!error) 397ef473667SDarrick J. Wong trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); 3982451337dSDave Chinner return error; 399c59d87c4SChristoph Hellwig } 400c59d87c4SChristoph Hellwig 401c59d87c4SChristoph Hellwig #ifdef DEBUG 4020d882a36SAlain Renaud if (type == XFS_IO_UNWRITTEN) { 403c59d87c4SChristoph Hellwig ASSERT(nimaps); 404c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 405c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 406c59d87c4SChristoph Hellwig } 407c59d87c4SChristoph Hellwig #endif 408c59d87c4SChristoph Hellwig if (nimaps) 409c59d87c4SChristoph Hellwig trace_xfs_map_blocks_found(ip, offset, count, type, imap); 410c59d87c4SChristoph Hellwig return 0; 411c59d87c4SChristoph Hellwig } 412c59d87c4SChristoph Hellwig 413fbcc0256SDave Chinner STATIC bool 414c59d87c4SChristoph Hellwig xfs_imap_valid( 415c59d87c4SChristoph Hellwig struct inode *inode, 416c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 417c59d87c4SChristoph Hellwig xfs_off_t offset) 418c59d87c4SChristoph Hellwig { 419c59d87c4SChristoph Hellwig offset >>= inode->i_blkbits; 420c59d87c4SChristoph Hellwig 421c59d87c4SChristoph Hellwig return offset >= imap->br_startoff && 422c59d87c4SChristoph Hellwig offset < imap->br_startoff + imap->br_blockcount; 423c59d87c4SChristoph Hellwig } 424c59d87c4SChristoph Hellwig 425c59d87c4SChristoph Hellwig STATIC void 426c59d87c4SChristoph Hellwig xfs_start_buffer_writeback( 427c59d87c4SChristoph Hellwig struct buffer_head *bh) 428c59d87c4SChristoph Hellwig { 429c59d87c4SChristoph Hellwig ASSERT(buffer_mapped(bh)); 430c59d87c4SChristoph Hellwig ASSERT(buffer_locked(bh)); 431c59d87c4SChristoph Hellwig ASSERT(!buffer_delay(bh)); 432c59d87c4SChristoph Hellwig ASSERT(!buffer_unwritten(bh)); 433c59d87c4SChristoph Hellwig 434c59d87c4SChristoph Hellwig mark_buffer_async_write(bh); 435c59d87c4SChristoph Hellwig set_buffer_uptodate(bh); 436c59d87c4SChristoph Hellwig clear_buffer_dirty(bh); 437c59d87c4SChristoph Hellwig } 438c59d87c4SChristoph Hellwig 439c59d87c4SChristoph Hellwig STATIC void 440c59d87c4SChristoph Hellwig xfs_start_page_writeback( 441c59d87c4SChristoph Hellwig struct page *page, 442e10de372SDave Chinner int clear_dirty) 443c59d87c4SChristoph Hellwig { 444c59d87c4SChristoph Hellwig ASSERT(PageLocked(page)); 445c59d87c4SChristoph Hellwig ASSERT(!PageWriteback(page)); 4460d085a52SDave Chinner 4470d085a52SDave Chinner /* 4480d085a52SDave Chinner * if the page was not fully cleaned, we need to ensure that the higher 4490d085a52SDave Chinner * layers come back to it correctly. That means we need to keep the page 4500d085a52SDave Chinner * dirty, and for WB_SYNC_ALL writeback we need to ensure the 4510d085a52SDave Chinner * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to 4520d085a52SDave Chinner * write this page in this writeback sweep will be made. 4530d085a52SDave Chinner */ 4540d085a52SDave Chinner if (clear_dirty) { 455c59d87c4SChristoph Hellwig clear_page_dirty_for_io(page); 456c59d87c4SChristoph Hellwig set_page_writeback(page); 4570d085a52SDave Chinner } else 4580d085a52SDave Chinner set_page_writeback_keepwrite(page); 4590d085a52SDave Chinner 460c59d87c4SChristoph Hellwig unlock_page(page); 461c59d87c4SChristoph Hellwig } 462c59d87c4SChristoph Hellwig 463c7c1a7d8SZhi Yong Wu static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh) 464c59d87c4SChristoph Hellwig { 465c59d87c4SChristoph Hellwig return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 466c59d87c4SChristoph Hellwig } 467c59d87c4SChristoph Hellwig 468c59d87c4SChristoph Hellwig /* 469bb18782aSDave Chinner * Submit the bio for an ioend. We are passed an ioend with a bio attached to 470bb18782aSDave Chinner * it, and we submit that bio. The ioend may be used for multiple bio 471bb18782aSDave Chinner * submissions, so we only want to allocate an append transaction for the ioend 472bb18782aSDave Chinner * once. In the case of multiple bio submission, each bio will take an IO 473bb18782aSDave Chinner * reference to the ioend to ensure that the ioend completion is only done once 474bb18782aSDave Chinner * all bios have been submitted and the ioend is really done. 4757bf7f352SDave Chinner * 4767bf7f352SDave Chinner * If @fail is non-zero, it means that we have a situation where some part of 4777bf7f352SDave Chinner * the submission process has failed after we have marked paged for writeback 478bb18782aSDave Chinner * and unlocked them. In this situation, we need to fail the bio and ioend 479bb18782aSDave Chinner * rather than submit it to IO. This typically only happens on a filesystem 480bb18782aSDave Chinner * shutdown. 481c59d87c4SChristoph Hellwig */ 482e10de372SDave Chinner STATIC int 483c59d87c4SChristoph Hellwig xfs_submit_ioend( 484c59d87c4SChristoph Hellwig struct writeback_control *wbc, 4850e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 486e10de372SDave Chinner int status) 487c59d87c4SChristoph Hellwig { 488e10de372SDave Chinner /* Reserve log space if we might write beyond the on-disk inode size. */ 489e10de372SDave Chinner if (!status && 4900e51a8e1SChristoph Hellwig ioend->io_type != XFS_IO_UNWRITTEN && 491bb18782aSDave Chinner xfs_ioend_is_append(ioend) && 492bb18782aSDave Chinner !ioend->io_append_trans) 493e10de372SDave Chinner status = xfs_setfilesize_trans_alloc(ioend); 494bb18782aSDave Chinner 4950e51a8e1SChristoph Hellwig ioend->io_bio->bi_private = ioend; 4960e51a8e1SChristoph Hellwig ioend->io_bio->bi_end_io = xfs_end_bio; 49750bfcd0cSMike Christie bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, 49850bfcd0cSMike Christie (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); 4997bf7f352SDave Chinner /* 5007bf7f352SDave Chinner * If we are failing the IO now, just mark the ioend with an 5017bf7f352SDave Chinner * error and finish it. This will run IO completion immediately 5027bf7f352SDave Chinner * as there is only one reference to the ioend at this point in 5037bf7f352SDave Chinner * time. 5047bf7f352SDave Chinner */ 505e10de372SDave Chinner if (status) { 5060e51a8e1SChristoph Hellwig ioend->io_bio->bi_error = status; 5070e51a8e1SChristoph Hellwig bio_endio(ioend->io_bio); 508e10de372SDave Chinner return status; 5097bf7f352SDave Chinner } 5107bf7f352SDave Chinner 5114e49ea4aSMike Christie submit_bio(ioend->io_bio); 512e10de372SDave Chinner return 0; 513c59d87c4SChristoph Hellwig } 514c59d87c4SChristoph Hellwig 5150e51a8e1SChristoph Hellwig static void 5160e51a8e1SChristoph Hellwig xfs_init_bio_from_bh( 5170e51a8e1SChristoph Hellwig struct bio *bio, 5180e51a8e1SChristoph Hellwig struct buffer_head *bh) 5190e51a8e1SChristoph Hellwig { 5200e51a8e1SChristoph Hellwig bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 5210e51a8e1SChristoph Hellwig bio->bi_bdev = bh->b_bdev; 5220e51a8e1SChristoph Hellwig } 5230e51a8e1SChristoph Hellwig 5240e51a8e1SChristoph Hellwig static struct xfs_ioend * 5250e51a8e1SChristoph Hellwig xfs_alloc_ioend( 5260e51a8e1SChristoph Hellwig struct inode *inode, 5270e51a8e1SChristoph Hellwig unsigned int type, 5280e51a8e1SChristoph Hellwig xfs_off_t offset, 5290e51a8e1SChristoph Hellwig struct buffer_head *bh) 5300e51a8e1SChristoph Hellwig { 5310e51a8e1SChristoph Hellwig struct xfs_ioend *ioend; 5320e51a8e1SChristoph Hellwig struct bio *bio; 5330e51a8e1SChristoph Hellwig 5340e51a8e1SChristoph Hellwig bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset); 5350e51a8e1SChristoph Hellwig xfs_init_bio_from_bh(bio, bh); 5360e51a8e1SChristoph Hellwig 5370e51a8e1SChristoph Hellwig ioend = container_of(bio, struct xfs_ioend, io_inline_bio); 5380e51a8e1SChristoph Hellwig INIT_LIST_HEAD(&ioend->io_list); 5390e51a8e1SChristoph Hellwig ioend->io_type = type; 5400e51a8e1SChristoph Hellwig ioend->io_inode = inode; 5410e51a8e1SChristoph Hellwig ioend->io_size = 0; 5420e51a8e1SChristoph Hellwig ioend->io_offset = offset; 5430e51a8e1SChristoph Hellwig INIT_WORK(&ioend->io_work, xfs_end_io); 5440e51a8e1SChristoph Hellwig ioend->io_append_trans = NULL; 5450e51a8e1SChristoph Hellwig ioend->io_bio = bio; 5460e51a8e1SChristoph Hellwig return ioend; 5470e51a8e1SChristoph Hellwig } 5480e51a8e1SChristoph Hellwig 5490e51a8e1SChristoph Hellwig /* 5500e51a8e1SChristoph Hellwig * Allocate a new bio, and chain the old bio to the new one. 5510e51a8e1SChristoph Hellwig * 5520e51a8e1SChristoph Hellwig * Note that we have to do perform the chaining in this unintuitive order 5530e51a8e1SChristoph Hellwig * so that the bi_private linkage is set up in the right direction for the 5540e51a8e1SChristoph Hellwig * traversal in xfs_destroy_ioend(). 5550e51a8e1SChristoph Hellwig */ 5560e51a8e1SChristoph Hellwig static void 5570e51a8e1SChristoph Hellwig xfs_chain_bio( 5580e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 5590e51a8e1SChristoph Hellwig struct writeback_control *wbc, 5600e51a8e1SChristoph Hellwig struct buffer_head *bh) 5610e51a8e1SChristoph Hellwig { 5620e51a8e1SChristoph Hellwig struct bio *new; 5630e51a8e1SChristoph Hellwig 5640e51a8e1SChristoph Hellwig new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); 5650e51a8e1SChristoph Hellwig xfs_init_bio_from_bh(new, bh); 5660e51a8e1SChristoph Hellwig 5670e51a8e1SChristoph Hellwig bio_chain(ioend->io_bio, new); 5680e51a8e1SChristoph Hellwig bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ 56950bfcd0cSMike Christie bio_set_op_attrs(ioend->io_bio, REQ_OP_WRITE, 57050bfcd0cSMike Christie (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : 0); 5714e49ea4aSMike Christie submit_bio(ioend->io_bio); 5720e51a8e1SChristoph Hellwig ioend->io_bio = new; 5730e51a8e1SChristoph Hellwig } 5740e51a8e1SChristoph Hellwig 575c59d87c4SChristoph Hellwig /* 576c59d87c4SChristoph Hellwig * Test to see if we've been building up a completion structure for 577c59d87c4SChristoph Hellwig * earlier buffers -- if so, we try to append to this ioend if we 578c59d87c4SChristoph Hellwig * can, otherwise we finish off any current ioend and start another. 579e10de372SDave Chinner * Return the ioend we finished off so that the caller can submit it 580e10de372SDave Chinner * once it has finished processing the dirty page. 581c59d87c4SChristoph Hellwig */ 582c59d87c4SChristoph Hellwig STATIC void 583c59d87c4SChristoph Hellwig xfs_add_to_ioend( 584c59d87c4SChristoph Hellwig struct inode *inode, 585c59d87c4SChristoph Hellwig struct buffer_head *bh, 586c59d87c4SChristoph Hellwig xfs_off_t offset, 587e10de372SDave Chinner struct xfs_writepage_ctx *wpc, 588bb18782aSDave Chinner struct writeback_control *wbc, 589e10de372SDave Chinner struct list_head *iolist) 590c59d87c4SChristoph Hellwig { 591fbcc0256SDave Chinner if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type || 5920df61da8SDarrick J. Wong bh->b_blocknr != wpc->last_block + 1 || 5930df61da8SDarrick J. Wong offset != wpc->ioend->io_offset + wpc->ioend->io_size) { 594e10de372SDave Chinner if (wpc->ioend) 595e10de372SDave Chinner list_add(&wpc->ioend->io_list, iolist); 5960e51a8e1SChristoph Hellwig wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh); 597c59d87c4SChristoph Hellwig } 598c59d87c4SChristoph Hellwig 5990e51a8e1SChristoph Hellwig /* 6000e51a8e1SChristoph Hellwig * If the buffer doesn't fit into the bio we need to allocate a new 6010e51a8e1SChristoph Hellwig * one. This shouldn't happen more than once for a given buffer. 6020e51a8e1SChristoph Hellwig */ 6030e51a8e1SChristoph Hellwig while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size) 6040e51a8e1SChristoph Hellwig xfs_chain_bio(wpc->ioend, wbc, bh); 605bb18782aSDave Chinner 606fbcc0256SDave Chinner wpc->ioend->io_size += bh->b_size; 607fbcc0256SDave Chinner wpc->last_block = bh->b_blocknr; 608e10de372SDave Chinner xfs_start_buffer_writeback(bh); 609c59d87c4SChristoph Hellwig } 610c59d87c4SChristoph Hellwig 611c59d87c4SChristoph Hellwig STATIC void 612c59d87c4SChristoph Hellwig xfs_map_buffer( 613c59d87c4SChristoph Hellwig struct inode *inode, 614c59d87c4SChristoph Hellwig struct buffer_head *bh, 615c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 616c59d87c4SChristoph Hellwig xfs_off_t offset) 617c59d87c4SChristoph Hellwig { 618c59d87c4SChristoph Hellwig sector_t bn; 619c59d87c4SChristoph Hellwig struct xfs_mount *m = XFS_I(inode)->i_mount; 620c59d87c4SChristoph Hellwig xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); 621c59d87c4SChristoph Hellwig xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); 622c59d87c4SChristoph Hellwig 623c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 624c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 625c59d87c4SChristoph Hellwig 626c59d87c4SChristoph Hellwig bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + 627c59d87c4SChristoph Hellwig ((offset - iomap_offset) >> inode->i_blkbits); 628c59d87c4SChristoph Hellwig 629c59d87c4SChristoph Hellwig ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); 630c59d87c4SChristoph Hellwig 631c59d87c4SChristoph Hellwig bh->b_blocknr = bn; 632c59d87c4SChristoph Hellwig set_buffer_mapped(bh); 633c59d87c4SChristoph Hellwig } 634c59d87c4SChristoph Hellwig 635c59d87c4SChristoph Hellwig STATIC void 636c59d87c4SChristoph Hellwig xfs_map_at_offset( 637c59d87c4SChristoph Hellwig struct inode *inode, 638c59d87c4SChristoph Hellwig struct buffer_head *bh, 639c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 640c59d87c4SChristoph Hellwig xfs_off_t offset) 641c59d87c4SChristoph Hellwig { 642c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 643c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 644c59d87c4SChristoph Hellwig 645c59d87c4SChristoph Hellwig xfs_map_buffer(inode, bh, imap, offset); 646c59d87c4SChristoph Hellwig set_buffer_mapped(bh); 647c59d87c4SChristoph Hellwig clear_buffer_delay(bh); 648c59d87c4SChristoph Hellwig clear_buffer_unwritten(bh); 649c59d87c4SChristoph Hellwig } 650c59d87c4SChristoph Hellwig 651c59d87c4SChristoph Hellwig /* 652a49935f2SDave Chinner * Test if a given page contains at least one buffer of a given @type. 653a49935f2SDave Chinner * If @check_all_buffers is true, then we walk all the buffers in the page to 654a49935f2SDave Chinner * try to find one of the type passed in. If it is not set, then the caller only 655a49935f2SDave Chinner * needs to check the first buffer on the page for a match. 656c59d87c4SChristoph Hellwig */ 657a49935f2SDave Chinner STATIC bool 6586ffc4db5SDave Chinner xfs_check_page_type( 659c59d87c4SChristoph Hellwig struct page *page, 660a49935f2SDave Chinner unsigned int type, 661a49935f2SDave Chinner bool check_all_buffers) 662c59d87c4SChristoph Hellwig { 663a49935f2SDave Chinner struct buffer_head *bh; 664a49935f2SDave Chinner struct buffer_head *head; 665c59d87c4SChristoph Hellwig 666a49935f2SDave Chinner if (PageWriteback(page)) 667a49935f2SDave Chinner return false; 668a49935f2SDave Chinner if (!page->mapping) 669a49935f2SDave Chinner return false; 670a49935f2SDave Chinner if (!page_has_buffers(page)) 671a49935f2SDave Chinner return false; 672c59d87c4SChristoph Hellwig 673c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 674c59d87c4SChristoph Hellwig do { 675a49935f2SDave Chinner if (buffer_unwritten(bh)) { 676a49935f2SDave Chinner if (type == XFS_IO_UNWRITTEN) 677a49935f2SDave Chinner return true; 678a49935f2SDave Chinner } else if (buffer_delay(bh)) { 679805eeb8eSDan Carpenter if (type == XFS_IO_DELALLOC) 680a49935f2SDave Chinner return true; 681a49935f2SDave Chinner } else if (buffer_dirty(bh) && buffer_mapped(bh)) { 682805eeb8eSDan Carpenter if (type == XFS_IO_OVERWRITE) 683a49935f2SDave Chinner return true; 684a49935f2SDave Chinner } 685a49935f2SDave Chinner 686a49935f2SDave Chinner /* If we are only checking the first buffer, we are done now. */ 687a49935f2SDave Chinner if (!check_all_buffers) 688c59d87c4SChristoph Hellwig break; 689c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 690c59d87c4SChristoph Hellwig 691a49935f2SDave Chinner return false; 692c59d87c4SChristoph Hellwig } 693c59d87c4SChristoph Hellwig 694c59d87c4SChristoph Hellwig STATIC void 695c59d87c4SChristoph Hellwig xfs_vm_invalidatepage( 696c59d87c4SChristoph Hellwig struct page *page, 697d47992f8SLukas Czerner unsigned int offset, 698d47992f8SLukas Czerner unsigned int length) 699c59d87c4SChristoph Hellwig { 70034097dfeSLukas Czerner trace_xfs_invalidatepage(page->mapping->host, page, offset, 70134097dfeSLukas Czerner length); 70234097dfeSLukas Czerner block_invalidatepage(page, offset, length); 703c59d87c4SChristoph Hellwig } 704c59d87c4SChristoph Hellwig 705c59d87c4SChristoph Hellwig /* 706c59d87c4SChristoph Hellwig * If the page has delalloc buffers on it, we need to punch them out before we 707c59d87c4SChristoph Hellwig * invalidate the page. If we don't, we leave a stale delalloc mapping on the 708c59d87c4SChristoph Hellwig * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read 709c59d87c4SChristoph Hellwig * is done on that same region - the delalloc extent is returned when none is 710c59d87c4SChristoph Hellwig * supposed to be there. 711c59d87c4SChristoph Hellwig * 712c59d87c4SChristoph Hellwig * We prevent this by truncating away the delalloc regions on the page before 713c59d87c4SChristoph Hellwig * invalidating it. Because they are delalloc, we can do this without needing a 714c59d87c4SChristoph Hellwig * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this 715c59d87c4SChristoph Hellwig * truncation without a transaction as there is no space left for block 716c59d87c4SChristoph Hellwig * reservation (typically why we see a ENOSPC in writeback). 717c59d87c4SChristoph Hellwig * 718c59d87c4SChristoph Hellwig * This is not a performance critical path, so for now just do the punching a 719c59d87c4SChristoph Hellwig * buffer head at a time. 720c59d87c4SChristoph Hellwig */ 721c59d87c4SChristoph Hellwig STATIC void 722c59d87c4SChristoph Hellwig xfs_aops_discard_page( 723c59d87c4SChristoph Hellwig struct page *page) 724c59d87c4SChristoph Hellwig { 725c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 726c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 727c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 728c59d87c4SChristoph Hellwig loff_t offset = page_offset(page); 729c59d87c4SChristoph Hellwig 730a49935f2SDave Chinner if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true)) 731c59d87c4SChristoph Hellwig goto out_invalidate; 732c59d87c4SChristoph Hellwig 733c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 734c59d87c4SChristoph Hellwig goto out_invalidate; 735c59d87c4SChristoph Hellwig 736c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 737c59d87c4SChristoph Hellwig "page discard on page %p, inode 0x%llx, offset %llu.", 738c59d87c4SChristoph Hellwig page, ip->i_ino, offset); 739c59d87c4SChristoph Hellwig 740c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 741c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 742c59d87c4SChristoph Hellwig do { 743c59d87c4SChristoph Hellwig int error; 744c59d87c4SChristoph Hellwig xfs_fileoff_t start_fsb; 745c59d87c4SChristoph Hellwig 746c59d87c4SChristoph Hellwig if (!buffer_delay(bh)) 747c59d87c4SChristoph Hellwig goto next_buffer; 748c59d87c4SChristoph Hellwig 749c59d87c4SChristoph Hellwig start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 750c59d87c4SChristoph Hellwig error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); 751c59d87c4SChristoph Hellwig if (error) { 752c59d87c4SChristoph Hellwig /* something screwed, just bail */ 753c59d87c4SChristoph Hellwig if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 754c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 755c59d87c4SChristoph Hellwig "page discard unable to remove delalloc mapping."); 756c59d87c4SChristoph Hellwig } 757c59d87c4SChristoph Hellwig break; 758c59d87c4SChristoph Hellwig } 759c59d87c4SChristoph Hellwig next_buffer: 760c59d87c4SChristoph Hellwig offset += 1 << inode->i_blkbits; 761c59d87c4SChristoph Hellwig 762c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 763c59d87c4SChristoph Hellwig 764c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 765c59d87c4SChristoph Hellwig out_invalidate: 76609cbfeafSKirill A. Shutemov xfs_vm_invalidatepage(page, 0, PAGE_SIZE); 767c59d87c4SChristoph Hellwig return; 768c59d87c4SChristoph Hellwig } 769c59d87c4SChristoph Hellwig 770ef473667SDarrick J. Wong static int 771ef473667SDarrick J. Wong xfs_map_cow( 772ef473667SDarrick J. Wong struct xfs_writepage_ctx *wpc, 773ef473667SDarrick J. Wong struct inode *inode, 774ef473667SDarrick J. Wong loff_t offset, 775ef473667SDarrick J. Wong unsigned int *new_type) 776ef473667SDarrick J. Wong { 777ef473667SDarrick J. Wong struct xfs_inode *ip = XFS_I(inode); 778ef473667SDarrick J. Wong struct xfs_bmbt_irec imap; 779ef473667SDarrick J. Wong bool is_cow = false, need_alloc = false; 780ef473667SDarrick J. Wong int error; 781ef473667SDarrick J. Wong 782ef473667SDarrick J. Wong /* 783ef473667SDarrick J. Wong * If we already have a valid COW mapping keep using it. 784ef473667SDarrick J. Wong */ 785ef473667SDarrick J. Wong if (wpc->io_type == XFS_IO_COW) { 786ef473667SDarrick J. Wong wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset); 787ef473667SDarrick J. Wong if (wpc->imap_valid) { 788ef473667SDarrick J. Wong *new_type = XFS_IO_COW; 789ef473667SDarrick J. Wong return 0; 790ef473667SDarrick J. Wong } 791ef473667SDarrick J. Wong } 792ef473667SDarrick J. Wong 793ef473667SDarrick J. Wong /* 794ef473667SDarrick J. Wong * Else we need to check if there is a COW mapping at this offset. 795ef473667SDarrick J. Wong */ 796ef473667SDarrick J. Wong xfs_ilock(ip, XFS_ILOCK_SHARED); 797ef473667SDarrick J. Wong is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap, &need_alloc); 798ef473667SDarrick J. Wong xfs_iunlock(ip, XFS_ILOCK_SHARED); 799ef473667SDarrick J. Wong 800ef473667SDarrick J. Wong if (!is_cow) 801ef473667SDarrick J. Wong return 0; 802ef473667SDarrick J. Wong 803ef473667SDarrick J. Wong /* 804ef473667SDarrick J. Wong * And if the COW mapping has a delayed extent here we need to 805ef473667SDarrick J. Wong * allocate real space for it now. 806ef473667SDarrick J. Wong */ 807ef473667SDarrick J. Wong if (need_alloc) { 808ef473667SDarrick J. Wong error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset, 809ef473667SDarrick J. Wong &imap); 810ef473667SDarrick J. Wong if (error) 811ef473667SDarrick J. Wong return error; 812ef473667SDarrick J. Wong } 813ef473667SDarrick J. Wong 814ef473667SDarrick J. Wong wpc->io_type = *new_type = XFS_IO_COW; 815ef473667SDarrick J. Wong wpc->imap_valid = true; 816ef473667SDarrick J. Wong wpc->imap = imap; 817ef473667SDarrick J. Wong return 0; 818ef473667SDarrick J. Wong } 819ef473667SDarrick J. Wong 820c59d87c4SChristoph Hellwig /* 821e10de372SDave Chinner * We implement an immediate ioend submission policy here to avoid needing to 822e10de372SDave Chinner * chain multiple ioends and hence nest mempool allocations which can violate 823e10de372SDave Chinner * forward progress guarantees we need to provide. The current ioend we are 824e10de372SDave Chinner * adding buffers to is cached on the writepage context, and if the new buffer 825e10de372SDave Chinner * does not append to the cached ioend it will create a new ioend and cache that 826e10de372SDave Chinner * instead. 827e10de372SDave Chinner * 828e10de372SDave Chinner * If a new ioend is created and cached, the old ioend is returned and queued 829e10de372SDave Chinner * locally for submission once the entire page is processed or an error has been 830e10de372SDave Chinner * detected. While ioends are submitted immediately after they are completed, 831e10de372SDave Chinner * batching optimisations are provided by higher level block plugging. 832e10de372SDave Chinner * 833e10de372SDave Chinner * At the end of a writeback pass, there will be a cached ioend remaining on the 834e10de372SDave Chinner * writepage context that the caller will need to submit. 835e10de372SDave Chinner */ 836bfce7d2eSDave Chinner static int 837bfce7d2eSDave Chinner xfs_writepage_map( 838bfce7d2eSDave Chinner struct xfs_writepage_ctx *wpc, 839e10de372SDave Chinner struct writeback_control *wbc, 840bfce7d2eSDave Chinner struct inode *inode, 841bfce7d2eSDave Chinner struct page *page, 842bfce7d2eSDave Chinner loff_t offset, 843bfce7d2eSDave Chinner __uint64_t end_offset) 844bfce7d2eSDave Chinner { 845e10de372SDave Chinner LIST_HEAD(submit_list); 846e10de372SDave Chinner struct xfs_ioend *ioend, *next; 847bfce7d2eSDave Chinner struct buffer_head *bh, *head; 848bfce7d2eSDave Chinner ssize_t len = 1 << inode->i_blkbits; 849bfce7d2eSDave Chinner int error = 0; 850bfce7d2eSDave Chinner int count = 0; 851e10de372SDave Chinner int uptodate = 1; 852ef473667SDarrick J. Wong unsigned int new_type; 853bfce7d2eSDave Chinner 854bfce7d2eSDave Chinner bh = head = page_buffers(page); 855bfce7d2eSDave Chinner offset = page_offset(page); 856bfce7d2eSDave Chinner do { 857bfce7d2eSDave Chinner if (offset >= end_offset) 858bfce7d2eSDave Chinner break; 859bfce7d2eSDave Chinner if (!buffer_uptodate(bh)) 860bfce7d2eSDave Chinner uptodate = 0; 861bfce7d2eSDave Chinner 862bfce7d2eSDave Chinner /* 863bfce7d2eSDave Chinner * set_page_dirty dirties all buffers in a page, independent 864bfce7d2eSDave Chinner * of their state. The dirty state however is entirely 865bfce7d2eSDave Chinner * meaningless for holes (!mapped && uptodate), so skip 866bfce7d2eSDave Chinner * buffers covering holes here. 867bfce7d2eSDave Chinner */ 868bfce7d2eSDave Chinner if (!buffer_mapped(bh) && buffer_uptodate(bh)) { 869bfce7d2eSDave Chinner wpc->imap_valid = false; 870bfce7d2eSDave Chinner continue; 871bfce7d2eSDave Chinner } 872bfce7d2eSDave Chinner 873ef473667SDarrick J. Wong if (buffer_unwritten(bh)) 874ef473667SDarrick J. Wong new_type = XFS_IO_UNWRITTEN; 875ef473667SDarrick J. Wong else if (buffer_delay(bh)) 876ef473667SDarrick J. Wong new_type = XFS_IO_DELALLOC; 877ef473667SDarrick J. Wong else if (buffer_uptodate(bh)) 878ef473667SDarrick J. Wong new_type = XFS_IO_OVERWRITE; 879ef473667SDarrick J. Wong else { 880bfce7d2eSDave Chinner if (PageUptodate(page)) 881bfce7d2eSDave Chinner ASSERT(buffer_mapped(bh)); 882bfce7d2eSDave Chinner /* 883bfce7d2eSDave Chinner * This buffer is not uptodate and will not be 884bfce7d2eSDave Chinner * written to disk. Ensure that we will put any 885bfce7d2eSDave Chinner * subsequent writeable buffers into a new 886bfce7d2eSDave Chinner * ioend. 887bfce7d2eSDave Chinner */ 888bfce7d2eSDave Chinner wpc->imap_valid = false; 889bfce7d2eSDave Chinner continue; 890bfce7d2eSDave Chinner } 891bfce7d2eSDave Chinner 892ef473667SDarrick J. Wong if (xfs_is_reflink_inode(XFS_I(inode))) { 893ef473667SDarrick J. Wong error = xfs_map_cow(wpc, inode, offset, &new_type); 894ef473667SDarrick J. Wong if (error) 895ef473667SDarrick J. Wong goto out; 896ef473667SDarrick J. Wong } 897ef473667SDarrick J. Wong 898ef473667SDarrick J. Wong if (wpc->io_type != new_type) { 899ef473667SDarrick J. Wong wpc->io_type = new_type; 900ef473667SDarrick J. Wong wpc->imap_valid = false; 901ef473667SDarrick J. Wong } 902ef473667SDarrick J. Wong 903bfce7d2eSDave Chinner if (wpc->imap_valid) 904bfce7d2eSDave Chinner wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, 905bfce7d2eSDave Chinner offset); 906bfce7d2eSDave Chinner if (!wpc->imap_valid) { 907bfce7d2eSDave Chinner error = xfs_map_blocks(inode, offset, &wpc->imap, 908bfce7d2eSDave Chinner wpc->io_type); 909bfce7d2eSDave Chinner if (error) 910e10de372SDave Chinner goto out; 911bfce7d2eSDave Chinner wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, 912bfce7d2eSDave Chinner offset); 913bfce7d2eSDave Chinner } 914bfce7d2eSDave Chinner if (wpc->imap_valid) { 915bfce7d2eSDave Chinner lock_buffer(bh); 916bfce7d2eSDave Chinner if (wpc->io_type != XFS_IO_OVERWRITE) 917bfce7d2eSDave Chinner xfs_map_at_offset(inode, bh, &wpc->imap, offset); 918bb18782aSDave Chinner xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list); 919bfce7d2eSDave Chinner count++; 920bfce7d2eSDave Chinner } 921bfce7d2eSDave Chinner 922bfce7d2eSDave Chinner } while (offset += len, ((bh = bh->b_this_page) != head)); 923bfce7d2eSDave Chinner 924bfce7d2eSDave Chinner if (uptodate && bh == head) 925bfce7d2eSDave Chinner SetPageUptodate(page); 926bfce7d2eSDave Chinner 927e10de372SDave Chinner ASSERT(wpc->ioend || list_empty(&submit_list)); 928bfce7d2eSDave Chinner 929e10de372SDave Chinner out: 930bfce7d2eSDave Chinner /* 931e10de372SDave Chinner * On error, we have to fail the ioend here because we have locked 932e10de372SDave Chinner * buffers in the ioend. If we don't do this, we'll deadlock 933e10de372SDave Chinner * invalidating the page as that tries to lock the buffers on the page. 934e10de372SDave Chinner * Also, because we may have set pages under writeback, we have to make 935e10de372SDave Chinner * sure we run IO completion to mark the error state of the IO 936e10de372SDave Chinner * appropriately, so we can't cancel the ioend directly here. That means 937e10de372SDave Chinner * we have to mark this page as under writeback if we included any 938e10de372SDave Chinner * buffers from it in the ioend chain so that completion treats it 939e10de372SDave Chinner * correctly. 940bfce7d2eSDave Chinner * 941e10de372SDave Chinner * If we didn't include the page in the ioend, the on error we can 942e10de372SDave Chinner * simply discard and unlock it as there are no other users of the page 943e10de372SDave Chinner * or it's buffers right now. The caller will still need to trigger 944e10de372SDave Chinner * submission of outstanding ioends on the writepage context so they are 945e10de372SDave Chinner * treated correctly on error. 946bfce7d2eSDave Chinner */ 947e10de372SDave Chinner if (count) { 948e10de372SDave Chinner xfs_start_page_writeback(page, !error); 949e10de372SDave Chinner 950e10de372SDave Chinner /* 951e10de372SDave Chinner * Preserve the original error if there was one, otherwise catch 952e10de372SDave Chinner * submission errors here and propagate into subsequent ioend 953e10de372SDave Chinner * submissions. 954e10de372SDave Chinner */ 955e10de372SDave Chinner list_for_each_entry_safe(ioend, next, &submit_list, io_list) { 956e10de372SDave Chinner int error2; 957e10de372SDave Chinner 958e10de372SDave Chinner list_del_init(&ioend->io_list); 959e10de372SDave Chinner error2 = xfs_submit_ioend(wbc, ioend, error); 960e10de372SDave Chinner if (error2 && !error) 961e10de372SDave Chinner error = error2; 962e10de372SDave Chinner } 963e10de372SDave Chinner } else if (error) { 964bfce7d2eSDave Chinner xfs_aops_discard_page(page); 965bfce7d2eSDave Chinner ClearPageUptodate(page); 966bfce7d2eSDave Chinner unlock_page(page); 967e10de372SDave Chinner } else { 968e10de372SDave Chinner /* 969e10de372SDave Chinner * We can end up here with no error and nothing to write if we 970e10de372SDave Chinner * race with a partial page truncate on a sub-page block sized 971e10de372SDave Chinner * filesystem. In that case we need to mark the page clean. 972e10de372SDave Chinner */ 973e10de372SDave Chinner xfs_start_page_writeback(page, 1); 974e10de372SDave Chinner end_page_writeback(page); 975bfce7d2eSDave Chinner } 976e10de372SDave Chinner 977bfce7d2eSDave Chinner mapping_set_error(page->mapping, error); 978bfce7d2eSDave Chinner return error; 979bfce7d2eSDave Chinner } 980bfce7d2eSDave Chinner 981c59d87c4SChristoph Hellwig /* 982c59d87c4SChristoph Hellwig * Write out a dirty page. 983c59d87c4SChristoph Hellwig * 984c59d87c4SChristoph Hellwig * For delalloc space on the page we need to allocate space and flush it. 985c59d87c4SChristoph Hellwig * For unwritten space on the page we need to start the conversion to 986c59d87c4SChristoph Hellwig * regular allocated space. 987c59d87c4SChristoph Hellwig * For any other dirty buffer heads on the page we should flush them. 988c59d87c4SChristoph Hellwig */ 989c59d87c4SChristoph Hellwig STATIC int 990fbcc0256SDave Chinner xfs_do_writepage( 991c59d87c4SChristoph Hellwig struct page *page, 992fbcc0256SDave Chinner struct writeback_control *wbc, 993fbcc0256SDave Chinner void *data) 994c59d87c4SChristoph Hellwig { 995fbcc0256SDave Chinner struct xfs_writepage_ctx *wpc = data; 996c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 997c59d87c4SChristoph Hellwig loff_t offset; 998c59d87c4SChristoph Hellwig __uint64_t end_offset; 999ad68972aSDave Chinner pgoff_t end_index; 1000c59d87c4SChristoph Hellwig 100134097dfeSLukas Czerner trace_xfs_writepage(inode, page, 0, 0); 1002c59d87c4SChristoph Hellwig 1003c59d87c4SChristoph Hellwig ASSERT(page_has_buffers(page)); 1004c59d87c4SChristoph Hellwig 1005c59d87c4SChristoph Hellwig /* 1006c59d87c4SChristoph Hellwig * Refuse to write the page out if we are called from reclaim context. 1007c59d87c4SChristoph Hellwig * 1008c59d87c4SChristoph Hellwig * This avoids stack overflows when called from deeply used stacks in 1009c59d87c4SChristoph Hellwig * random callers for direct reclaim or memcg reclaim. We explicitly 1010c59d87c4SChristoph Hellwig * allow reclaim from kswapd as the stack usage there is relatively low. 1011c59d87c4SChristoph Hellwig * 101294054fa3SMel Gorman * This should never happen except in the case of a VM regression so 101394054fa3SMel Gorman * warn about it. 1014c59d87c4SChristoph Hellwig */ 101594054fa3SMel Gorman if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == 101694054fa3SMel Gorman PF_MEMALLOC)) 1017c59d87c4SChristoph Hellwig goto redirty; 1018c59d87c4SChristoph Hellwig 1019c59d87c4SChristoph Hellwig /* 1020c59d87c4SChristoph Hellwig * Given that we do not allow direct reclaim to call us, we should 1021c59d87c4SChristoph Hellwig * never be called while in a filesystem transaction. 1022c59d87c4SChristoph Hellwig */ 1023448011e2SChristoph Hellwig if (WARN_ON_ONCE(current->flags & PF_FSTRANS)) 1024c59d87c4SChristoph Hellwig goto redirty; 1025c59d87c4SChristoph Hellwig 10268695d27eSJie Liu /* 1027ad68972aSDave Chinner * Is this page beyond the end of the file? 1028ad68972aSDave Chinner * 10298695d27eSJie Liu * The page index is less than the end_index, adjust the end_offset 10308695d27eSJie Liu * to the highest offset that this page should represent. 10318695d27eSJie Liu * ----------------------------------------------------- 10328695d27eSJie Liu * | file mapping | <EOF> | 10338695d27eSJie Liu * ----------------------------------------------------- 10348695d27eSJie Liu * | Page ... | Page N-2 | Page N-1 | Page N | | 10358695d27eSJie Liu * ^--------------------------------^----------|-------- 10368695d27eSJie Liu * | desired writeback range | see else | 10378695d27eSJie Liu * ---------------------------------^------------------| 10388695d27eSJie Liu */ 1039ad68972aSDave Chinner offset = i_size_read(inode); 104009cbfeafSKirill A. Shutemov end_index = offset >> PAGE_SHIFT; 10418695d27eSJie Liu if (page->index < end_index) 104209cbfeafSKirill A. Shutemov end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT; 10438695d27eSJie Liu else { 10448695d27eSJie Liu /* 10458695d27eSJie Liu * Check whether the page to write out is beyond or straddles 10468695d27eSJie Liu * i_size or not. 10478695d27eSJie Liu * ------------------------------------------------------- 10488695d27eSJie Liu * | file mapping | <EOF> | 10498695d27eSJie Liu * ------------------------------------------------------- 10508695d27eSJie Liu * | Page ... | Page N-2 | Page N-1 | Page N | Beyond | 10518695d27eSJie Liu * ^--------------------------------^-----------|--------- 10528695d27eSJie Liu * | | Straddles | 10538695d27eSJie Liu * ---------------------------------^-----------|--------| 10548695d27eSJie Liu */ 105509cbfeafSKirill A. Shutemov unsigned offset_into_page = offset & (PAGE_SIZE - 1); 10566b7a03f0SChristoph Hellwig 10576b7a03f0SChristoph Hellwig /* 1058ff9a28f6SJan Kara * Skip the page if it is fully outside i_size, e.g. due to a 1059ff9a28f6SJan Kara * truncate operation that is in progress. We must redirty the 1060ff9a28f6SJan Kara * page so that reclaim stops reclaiming it. Otherwise 1061ff9a28f6SJan Kara * xfs_vm_releasepage() is called on it and gets confused. 10628695d27eSJie Liu * 10638695d27eSJie Liu * Note that the end_index is unsigned long, it would overflow 10648695d27eSJie Liu * if the given offset is greater than 16TB on 32-bit system 10658695d27eSJie Liu * and if we do check the page is fully outside i_size or not 10668695d27eSJie Liu * via "if (page->index >= end_index + 1)" as "end_index + 1" 10678695d27eSJie Liu * will be evaluated to 0. Hence this page will be redirtied 10688695d27eSJie Liu * and be written out repeatedly which would result in an 10698695d27eSJie Liu * infinite loop, the user program that perform this operation 10708695d27eSJie Liu * will hang. Instead, we can verify this situation by checking 10718695d27eSJie Liu * if the page to write is totally beyond the i_size or if it's 10728695d27eSJie Liu * offset is just equal to the EOF. 10736b7a03f0SChristoph Hellwig */ 10748695d27eSJie Liu if (page->index > end_index || 10758695d27eSJie Liu (page->index == end_index && offset_into_page == 0)) 1076ff9a28f6SJan Kara goto redirty; 10776b7a03f0SChristoph Hellwig 10786b7a03f0SChristoph Hellwig /* 10796b7a03f0SChristoph Hellwig * The page straddles i_size. It must be zeroed out on each 10806b7a03f0SChristoph Hellwig * and every writepage invocation because it may be mmapped. 10816b7a03f0SChristoph Hellwig * "A file is mapped in multiples of the page size. For a file 10826b7a03f0SChristoph Hellwig * that is not a multiple of the page size, the remaining 10836b7a03f0SChristoph Hellwig * memory is zeroed when mapped, and writes to that region are 10846b7a03f0SChristoph Hellwig * not written out to the file." 10856b7a03f0SChristoph Hellwig */ 108609cbfeafSKirill A. Shutemov zero_user_segment(page, offset_into_page, PAGE_SIZE); 10878695d27eSJie Liu 10888695d27eSJie Liu /* Adjust the end_offset to the end of file */ 10898695d27eSJie Liu end_offset = offset; 1090c59d87c4SChristoph Hellwig } 1091c59d87c4SChristoph Hellwig 1092e10de372SDave Chinner return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset); 1093c59d87c4SChristoph Hellwig 1094c59d87c4SChristoph Hellwig redirty: 1095c59d87c4SChristoph Hellwig redirty_page_for_writepage(wbc, page); 1096c59d87c4SChristoph Hellwig unlock_page(page); 1097c59d87c4SChristoph Hellwig return 0; 1098c59d87c4SChristoph Hellwig } 1099c59d87c4SChristoph Hellwig 1100c59d87c4SChristoph Hellwig STATIC int 1101fbcc0256SDave Chinner xfs_vm_writepage( 1102fbcc0256SDave Chinner struct page *page, 1103fbcc0256SDave Chinner struct writeback_control *wbc) 1104fbcc0256SDave Chinner { 1105fbcc0256SDave Chinner struct xfs_writepage_ctx wpc = { 1106fbcc0256SDave Chinner .io_type = XFS_IO_INVALID, 1107fbcc0256SDave Chinner }; 1108fbcc0256SDave Chinner int ret; 1109fbcc0256SDave Chinner 1110fbcc0256SDave Chinner ret = xfs_do_writepage(page, wbc, &wpc); 1111e10de372SDave Chinner if (wpc.ioend) 1112e10de372SDave Chinner ret = xfs_submit_ioend(wbc, wpc.ioend, ret); 1113e10de372SDave Chinner return ret; 1114fbcc0256SDave Chinner } 1115fbcc0256SDave Chinner 1116fbcc0256SDave Chinner STATIC int 1117c59d87c4SChristoph Hellwig xfs_vm_writepages( 1118c59d87c4SChristoph Hellwig struct address_space *mapping, 1119c59d87c4SChristoph Hellwig struct writeback_control *wbc) 1120c59d87c4SChristoph Hellwig { 1121fbcc0256SDave Chinner struct xfs_writepage_ctx wpc = { 1122fbcc0256SDave Chinner .io_type = XFS_IO_INVALID, 1123fbcc0256SDave Chinner }; 1124fbcc0256SDave Chinner int ret; 1125fbcc0256SDave Chinner 1126c59d87c4SChristoph Hellwig xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 11277f6d5b52SRoss Zwisler if (dax_mapping(mapping)) 11287f6d5b52SRoss Zwisler return dax_writeback_mapping_range(mapping, 11297f6d5b52SRoss Zwisler xfs_find_bdev_for_inode(mapping->host), wbc); 11307f6d5b52SRoss Zwisler 1131fbcc0256SDave Chinner ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc); 1132e10de372SDave Chinner if (wpc.ioend) 1133e10de372SDave Chinner ret = xfs_submit_ioend(wbc, wpc.ioend, ret); 1134e10de372SDave Chinner return ret; 1135c59d87c4SChristoph Hellwig } 1136c59d87c4SChristoph Hellwig 1137c59d87c4SChristoph Hellwig /* 1138c59d87c4SChristoph Hellwig * Called to move a page into cleanable state - and from there 1139c59d87c4SChristoph Hellwig * to be released. The page should already be clean. We always 1140c59d87c4SChristoph Hellwig * have buffer heads in this call. 1141c59d87c4SChristoph Hellwig * 1142c59d87c4SChristoph Hellwig * Returns 1 if the page is ok to release, 0 otherwise. 1143c59d87c4SChristoph Hellwig */ 1144c59d87c4SChristoph Hellwig STATIC int 1145c59d87c4SChristoph Hellwig xfs_vm_releasepage( 1146c59d87c4SChristoph Hellwig struct page *page, 1147c59d87c4SChristoph Hellwig gfp_t gfp_mask) 1148c59d87c4SChristoph Hellwig { 1149c59d87c4SChristoph Hellwig int delalloc, unwritten; 1150c59d87c4SChristoph Hellwig 115134097dfeSLukas Czerner trace_xfs_releasepage(page->mapping->host, page, 0, 0); 1152c59d87c4SChristoph Hellwig 115399579cceSBrian Foster /* 115499579cceSBrian Foster * mm accommodates an old ext3 case where clean pages might not have had 115599579cceSBrian Foster * the dirty bit cleared. Thus, it can send actual dirty pages to 115699579cceSBrian Foster * ->releasepage() via shrink_active_list(). Conversely, 115799579cceSBrian Foster * block_invalidatepage() can send pages that are still marked dirty 115899579cceSBrian Foster * but otherwise have invalidated buffers. 115999579cceSBrian Foster * 116099579cceSBrian Foster * We've historically freed buffers on the latter. Instead, quietly 116199579cceSBrian Foster * filter out all dirty pages to avoid spurious buffer state warnings. 116299579cceSBrian Foster * This can likely be removed once shrink_active_list() is fixed. 116399579cceSBrian Foster */ 116499579cceSBrian Foster if (PageDirty(page)) 116599579cceSBrian Foster return 0; 116699579cceSBrian Foster 1167c59d87c4SChristoph Hellwig xfs_count_page_state(page, &delalloc, &unwritten); 1168c59d87c4SChristoph Hellwig 1169448011e2SChristoph Hellwig if (WARN_ON_ONCE(delalloc)) 1170c59d87c4SChristoph Hellwig return 0; 1171448011e2SChristoph Hellwig if (WARN_ON_ONCE(unwritten)) 1172c59d87c4SChristoph Hellwig return 0; 1173c59d87c4SChristoph Hellwig 1174c59d87c4SChristoph Hellwig return try_to_free_buffers(page); 1175c59d87c4SChristoph Hellwig } 1176c59d87c4SChristoph Hellwig 1177a719370bSDave Chinner /* 1178273dda76SChristoph Hellwig * When we map a DIO buffer, we may need to pass flags to 1179273dda76SChristoph Hellwig * xfs_end_io_direct_write to tell it what kind of write IO we are doing. 11803e12dbbdSDave Chinner * 11813e12dbbdSDave Chinner * Note that for DIO, an IO to the highest supported file block offset (i.e. 11823e12dbbdSDave Chinner * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 11833e12dbbdSDave Chinner * bit variable. Hence if we see this overflow, we have to assume that the IO is 11843e12dbbdSDave Chinner * extending the file size. We won't know for sure until IO completion is run 11853e12dbbdSDave Chinner * and the actual max write offset is communicated to the IO completion 11863e12dbbdSDave Chinner * routine. 1187a719370bSDave Chinner */ 1188a719370bSDave Chinner static void 1189a719370bSDave Chinner xfs_map_direct( 1190a719370bSDave Chinner struct inode *inode, 1191a719370bSDave Chinner struct buffer_head *bh_result, 1192a719370bSDave Chinner struct xfs_bmbt_irec *imap, 1193273dda76SChristoph Hellwig xfs_off_t offset) 1194a719370bSDave Chinner { 1195273dda76SChristoph Hellwig uintptr_t *flags = (uintptr_t *)&bh_result->b_private; 1196d5cc2e3fSDave Chinner xfs_off_t size = bh_result->b_size; 1197d5cc2e3fSDave Chinner 1198273dda76SChristoph Hellwig trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size, 1199273dda76SChristoph Hellwig ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap); 1200d5cc2e3fSDave Chinner 1201273dda76SChristoph Hellwig if (ISUNWRITTEN(imap)) { 1202273dda76SChristoph Hellwig *flags |= XFS_DIO_FLAG_UNWRITTEN; 1203a06c277aSDave Chinner set_buffer_defer_completion(bh_result); 1204273dda76SChristoph Hellwig } else if (offset + size > i_size_read(inode) || offset + size < 0) { 1205273dda76SChristoph Hellwig *flags |= XFS_DIO_FLAG_APPEND; 1206273dda76SChristoph Hellwig set_buffer_defer_completion(bh_result); 1207a719370bSDave Chinner } 1208a719370bSDave Chinner } 1209a719370bSDave Chinner 12101fdca9c2SDave Chinner /* 12111fdca9c2SDave Chinner * If this is O_DIRECT or the mpage code calling tell them how large the mapping 12121fdca9c2SDave Chinner * is, so that we can avoid repeated get_blocks calls. 12131fdca9c2SDave Chinner * 12141fdca9c2SDave Chinner * If the mapping spans EOF, then we have to break the mapping up as the mapping 12151fdca9c2SDave Chinner * for blocks beyond EOF must be marked new so that sub block regions can be 12161fdca9c2SDave Chinner * correctly zeroed. We can't do this for mappings within EOF unless the mapping 12171fdca9c2SDave Chinner * was just allocated or is unwritten, otherwise the callers would overwrite 12181fdca9c2SDave Chinner * existing data with zeros. Hence we have to split the mapping into a range up 12191fdca9c2SDave Chinner * to and including EOF, and a second mapping for beyond EOF. 12201fdca9c2SDave Chinner */ 12211fdca9c2SDave Chinner static void 12221fdca9c2SDave Chinner xfs_map_trim_size( 12231fdca9c2SDave Chinner struct inode *inode, 12241fdca9c2SDave Chinner sector_t iblock, 12251fdca9c2SDave Chinner struct buffer_head *bh_result, 12261fdca9c2SDave Chinner struct xfs_bmbt_irec *imap, 12271fdca9c2SDave Chinner xfs_off_t offset, 12281fdca9c2SDave Chinner ssize_t size) 12291fdca9c2SDave Chinner { 12301fdca9c2SDave Chinner xfs_off_t mapping_size; 12311fdca9c2SDave Chinner 12321fdca9c2SDave Chinner mapping_size = imap->br_startoff + imap->br_blockcount - iblock; 12331fdca9c2SDave Chinner mapping_size <<= inode->i_blkbits; 12341fdca9c2SDave Chinner 12351fdca9c2SDave Chinner ASSERT(mapping_size > 0); 12361fdca9c2SDave Chinner if (mapping_size > size) 12371fdca9c2SDave Chinner mapping_size = size; 12381fdca9c2SDave Chinner if (offset < i_size_read(inode) && 12391fdca9c2SDave Chinner offset + mapping_size >= i_size_read(inode)) { 12401fdca9c2SDave Chinner /* limit mapping to block that spans EOF */ 12411fdca9c2SDave Chinner mapping_size = roundup_64(i_size_read(inode) - offset, 12421fdca9c2SDave Chinner 1 << inode->i_blkbits); 12431fdca9c2SDave Chinner } 12441fdca9c2SDave Chinner if (mapping_size > LONG_MAX) 12451fdca9c2SDave Chinner mapping_size = LONG_MAX; 12461fdca9c2SDave Chinner 12471fdca9c2SDave Chinner bh_result->b_size = mapping_size; 12481fdca9c2SDave Chinner } 12491fdca9c2SDave Chinner 1250c59d87c4SChristoph Hellwig STATIC int 1251c59d87c4SChristoph Hellwig __xfs_get_blocks( 1252c59d87c4SChristoph Hellwig struct inode *inode, 1253c59d87c4SChristoph Hellwig sector_t iblock, 1254c59d87c4SChristoph Hellwig struct buffer_head *bh_result, 1255c59d87c4SChristoph Hellwig int create, 12563e12dbbdSDave Chinner bool direct, 12573e12dbbdSDave Chinner bool dax_fault) 1258c59d87c4SChristoph Hellwig { 1259c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1260c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1261c59d87c4SChristoph Hellwig xfs_fileoff_t offset_fsb, end_fsb; 1262c59d87c4SChristoph Hellwig int error = 0; 1263c59d87c4SChristoph Hellwig int lockmode = 0; 1264c59d87c4SChristoph Hellwig struct xfs_bmbt_irec imap; 1265c59d87c4SChristoph Hellwig int nimaps = 1; 1266c59d87c4SChristoph Hellwig xfs_off_t offset; 1267c59d87c4SChristoph Hellwig ssize_t size; 1268c59d87c4SChristoph Hellwig int new = 0; 1269c59d87c4SChristoph Hellwig 12706e8a27a8SChristoph Hellwig BUG_ON(create && !direct); 12716e8a27a8SChristoph Hellwig 1272c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 1273b474c7aeSEric Sandeen return -EIO; 1274c59d87c4SChristoph Hellwig 1275c59d87c4SChristoph Hellwig offset = (xfs_off_t)iblock << inode->i_blkbits; 1276c59d87c4SChristoph Hellwig ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1277c59d87c4SChristoph Hellwig size = bh_result->b_size; 1278c59d87c4SChristoph Hellwig 12796e8a27a8SChristoph Hellwig if (!create && offset >= i_size_read(inode)) 1280c59d87c4SChristoph Hellwig return 0; 1281c59d87c4SChristoph Hellwig 1282507630b2SDave Chinner /* 1283507630b2SDave Chinner * Direct I/O is usually done on preallocated files, so try getting 12846e8a27a8SChristoph Hellwig * a block mapping without an exclusive lock first. 1285507630b2SDave Chinner */ 1286309ecac8SChristoph Hellwig lockmode = xfs_ilock_data_map_shared(ip); 1287c59d87c4SChristoph Hellwig 1288d2c28191SDave Chinner ASSERT(offset <= mp->m_super->s_maxbytes); 1289d2c28191SDave Chinner if (offset + size > mp->m_super->s_maxbytes) 1290d2c28191SDave Chinner size = mp->m_super->s_maxbytes - offset; 1291c59d87c4SChristoph Hellwig end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); 1292c59d87c4SChristoph Hellwig offset_fsb = XFS_B_TO_FSBT(mp, offset); 1293c59d87c4SChristoph Hellwig 12945c8ed202SDave Chinner error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 12955c8ed202SDave Chinner &imap, &nimaps, XFS_BMAPI_ENTIRE); 1296c59d87c4SChristoph Hellwig if (error) 1297c59d87c4SChristoph Hellwig goto out_unlock; 1298c59d87c4SChristoph Hellwig 12991ca19157SDave Chinner /* for DAX, we convert unwritten extents directly */ 1300c59d87c4SChristoph Hellwig if (create && 1301c59d87c4SChristoph Hellwig (!nimaps || 1302c59d87c4SChristoph Hellwig (imap.br_startblock == HOLESTARTBLOCK || 13031ca19157SDave Chinner imap.br_startblock == DELAYSTARTBLOCK) || 13041ca19157SDave Chinner (IS_DAX(inode) && ISUNWRITTEN(&imap)))) { 1305507630b2SDave Chinner /* 1306009c6e87SBrian Foster * xfs_iomap_write_direct() expects the shared lock. It 1307009c6e87SBrian Foster * is unlocked on return. 1308507630b2SDave Chinner */ 1309009c6e87SBrian Foster if (lockmode == XFS_ILOCK_EXCL) 1310009c6e87SBrian Foster xfs_ilock_demote(ip, lockmode); 1311009c6e87SBrian Foster 1312c59d87c4SChristoph Hellwig error = xfs_iomap_write_direct(ip, offset, size, 1313c59d87c4SChristoph Hellwig &imap, nimaps); 1314507630b2SDave Chinner if (error) 13152451337dSDave Chinner return error; 1316d3bc815aSDave Chinner new = 1; 13176b698edeSDave Chinner 1318d5cc2e3fSDave Chinner trace_xfs_get_blocks_alloc(ip, offset, size, 1319d5cc2e3fSDave Chinner ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN 1320d5cc2e3fSDave Chinner : XFS_IO_DELALLOC, &imap); 1321c59d87c4SChristoph Hellwig } else if (nimaps) { 1322d5cc2e3fSDave Chinner trace_xfs_get_blocks_found(ip, offset, size, 1323d5cc2e3fSDave Chinner ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN 1324d5cc2e3fSDave Chinner : XFS_IO_OVERWRITE, &imap); 1325507630b2SDave Chinner xfs_iunlock(ip, lockmode); 1326c59d87c4SChristoph Hellwig } else { 1327c59d87c4SChristoph Hellwig trace_xfs_get_blocks_notfound(ip, offset, size); 1328c59d87c4SChristoph Hellwig goto out_unlock; 1329c59d87c4SChristoph Hellwig } 1330c59d87c4SChristoph Hellwig 13311ca19157SDave Chinner if (IS_DAX(inode) && create) { 13321ca19157SDave Chinner ASSERT(!ISUNWRITTEN(&imap)); 13331ca19157SDave Chinner /* zeroing is not needed at a higher layer */ 13341ca19157SDave Chinner new = 0; 13351ca19157SDave Chinner } 13361ca19157SDave Chinner 13371fdca9c2SDave Chinner /* trim mapping down to size requested */ 13386e8a27a8SChristoph Hellwig xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); 13391fdca9c2SDave Chinner 1340c59d87c4SChristoph Hellwig /* 1341a719370bSDave Chinner * For unwritten extents do not report a disk address in the buffered 1342a719370bSDave Chinner * read case (treat as if we're reading into a hole). 1343c59d87c4SChristoph Hellwig */ 1344a719370bSDave Chinner if (imap.br_startblock != HOLESTARTBLOCK && 1345a719370bSDave Chinner imap.br_startblock != DELAYSTARTBLOCK && 1346a719370bSDave Chinner (create || !ISUNWRITTEN(&imap))) { 1347c59d87c4SChristoph Hellwig xfs_map_buffer(inode, bh_result, &imap, offset); 1348a719370bSDave Chinner if (ISUNWRITTEN(&imap)) 1349c59d87c4SChristoph Hellwig set_buffer_unwritten(bh_result); 1350a719370bSDave Chinner /* direct IO needs special help */ 13516e8a27a8SChristoph Hellwig if (create) { 1352273dda76SChristoph Hellwig if (dax_fault) 1353273dda76SChristoph Hellwig ASSERT(!ISUNWRITTEN(&imap)); 1354273dda76SChristoph Hellwig else 1355273dda76SChristoph Hellwig xfs_map_direct(inode, bh_result, &imap, offset); 1356273dda76SChristoph Hellwig } 1357c59d87c4SChristoph Hellwig } 1358c59d87c4SChristoph Hellwig 1359c59d87c4SChristoph Hellwig /* 1360c59d87c4SChristoph Hellwig * If this is a realtime file, data may be on a different device. 1361c59d87c4SChristoph Hellwig * to that pointed to from the buffer_head b_bdev currently. 1362c59d87c4SChristoph Hellwig */ 1363c59d87c4SChristoph Hellwig bh_result->b_bdev = xfs_find_bdev_for_inode(inode); 1364c59d87c4SChristoph Hellwig 1365c59d87c4SChristoph Hellwig /* 1366c59d87c4SChristoph Hellwig * If we previously allocated a block out beyond eof and we are now 1367c59d87c4SChristoph Hellwig * coming back to use it then we will need to flag it as new even if it 1368c59d87c4SChristoph Hellwig * has a disk address. 1369c59d87c4SChristoph Hellwig * 1370c59d87c4SChristoph Hellwig * With sub-block writes into unwritten extents we also need to mark 1371c59d87c4SChristoph Hellwig * the buffer as new so that the unwritten parts of the buffer gets 1372c59d87c4SChristoph Hellwig * correctly zeroed. 1373c59d87c4SChristoph Hellwig */ 1374c59d87c4SChristoph Hellwig if (create && 1375c59d87c4SChristoph Hellwig ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || 1376c59d87c4SChristoph Hellwig (offset >= i_size_read(inode)) || 1377c59d87c4SChristoph Hellwig (new || ISUNWRITTEN(&imap)))) 1378c59d87c4SChristoph Hellwig set_buffer_new(bh_result); 1379c59d87c4SChristoph Hellwig 13806e8a27a8SChristoph Hellwig BUG_ON(direct && imap.br_startblock == DELAYSTARTBLOCK); 1381c59d87c4SChristoph Hellwig 1382c59d87c4SChristoph Hellwig return 0; 1383c59d87c4SChristoph Hellwig 1384c59d87c4SChristoph Hellwig out_unlock: 1385c59d87c4SChristoph Hellwig xfs_iunlock(ip, lockmode); 13862451337dSDave Chinner return error; 1387c59d87c4SChristoph Hellwig } 1388c59d87c4SChristoph Hellwig 1389c59d87c4SChristoph Hellwig int 1390c59d87c4SChristoph Hellwig xfs_get_blocks( 1391c59d87c4SChristoph Hellwig struct inode *inode, 1392c59d87c4SChristoph Hellwig sector_t iblock, 1393c59d87c4SChristoph Hellwig struct buffer_head *bh_result, 1394c59d87c4SChristoph Hellwig int create) 1395c59d87c4SChristoph Hellwig { 13963e12dbbdSDave Chinner return __xfs_get_blocks(inode, iblock, bh_result, create, false, false); 1397c59d87c4SChristoph Hellwig } 1398c59d87c4SChristoph Hellwig 13996b698edeSDave Chinner int 1400c59d87c4SChristoph Hellwig xfs_get_blocks_direct( 1401c59d87c4SChristoph Hellwig struct inode *inode, 1402c59d87c4SChristoph Hellwig sector_t iblock, 1403c59d87c4SChristoph Hellwig struct buffer_head *bh_result, 1404c59d87c4SChristoph Hellwig int create) 1405c59d87c4SChristoph Hellwig { 14063e12dbbdSDave Chinner return __xfs_get_blocks(inode, iblock, bh_result, create, true, false); 14073e12dbbdSDave Chinner } 14083e12dbbdSDave Chinner 14093e12dbbdSDave Chinner int 14103e12dbbdSDave Chinner xfs_get_blocks_dax_fault( 14113e12dbbdSDave Chinner struct inode *inode, 14123e12dbbdSDave Chinner sector_t iblock, 14133e12dbbdSDave Chinner struct buffer_head *bh_result, 14143e12dbbdSDave Chinner int create) 14153e12dbbdSDave Chinner { 14163e12dbbdSDave Chinner return __xfs_get_blocks(inode, iblock, bh_result, create, true, true); 1417c59d87c4SChristoph Hellwig } 1418c59d87c4SChristoph Hellwig 1419273dda76SChristoph Hellwig /* 1420273dda76SChristoph Hellwig * Complete a direct I/O write request. 1421273dda76SChristoph Hellwig * 1422273dda76SChristoph Hellwig * xfs_map_direct passes us some flags in the private data to tell us what to 1423273dda76SChristoph Hellwig * do. If no flags are set, then the write IO is an overwrite wholly within 1424273dda76SChristoph Hellwig * the existing allocated file size and so there is nothing for us to do. 1425273dda76SChristoph Hellwig * 1426273dda76SChristoph Hellwig * Note that in this case the completion can be called in interrupt context, 1427273dda76SChristoph Hellwig * whereas if we have flags set we will always be called in task context 1428273dda76SChristoph Hellwig * (i.e. from a workqueue). 1429273dda76SChristoph Hellwig */ 1430fa8d972dSChristoph Hellwig int 1431273dda76SChristoph Hellwig xfs_end_io_direct_write( 1432273dda76SChristoph Hellwig struct kiocb *iocb, 1433c59d87c4SChristoph Hellwig loff_t offset, 1434273dda76SChristoph Hellwig ssize_t size, 1435273dda76SChristoph Hellwig void *private) 1436c59d87c4SChristoph Hellwig { 1437273dda76SChristoph Hellwig struct inode *inode = file_inode(iocb->ki_filp); 1438273dda76SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1439273dda76SChristoph Hellwig uintptr_t flags = (uintptr_t)private; 1440273dda76SChristoph Hellwig int error = 0; 14412ba66237SChristoph Hellwig 1442273dda76SChristoph Hellwig trace_xfs_end_io_direct_write(ip, offset, size); 1443273dda76SChristoph Hellwig 1444e372843aSChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 1445273dda76SChristoph Hellwig return -EIO; 1446273dda76SChristoph Hellwig 1447273dda76SChristoph Hellwig if (size <= 0) 1448273dda76SChristoph Hellwig return size; 1449c59d87c4SChristoph Hellwig 1450c59d87c4SChristoph Hellwig /* 1451273dda76SChristoph Hellwig * The flags tell us whether we are doing unwritten extent conversions 14526dfa1b67SDave Chinner * or an append transaction that updates the on-disk file size. These 14536dfa1b67SDave Chinner * cases are the only cases where we should *potentially* be needing 1454a06c277aSDave Chinner * to update the VFS inode size. 1455273dda76SChristoph Hellwig */ 1456273dda76SChristoph Hellwig if (flags == 0) { 1457273dda76SChristoph Hellwig ASSERT(offset + size <= i_size_read(inode)); 1458273dda76SChristoph Hellwig return 0; 1459273dda76SChristoph Hellwig } 1460273dda76SChristoph Hellwig 1461273dda76SChristoph Hellwig /* 14626dfa1b67SDave Chinner * We need to update the in-core inode size here so that we don't end up 1463a06c277aSDave Chinner * with the on-disk inode size being outside the in-core inode size. We 1464a06c277aSDave Chinner * have no other method of updating EOF for AIO, so always do it here 1465a06c277aSDave Chinner * if necessary. 1466b9d59846SDave Chinner * 1467b9d59846SDave Chinner * We need to lock the test/set EOF update as we can be racing with 1468b9d59846SDave Chinner * other IO completions here to update the EOF. Failing to serialise 1469b9d59846SDave Chinner * here can result in EOF moving backwards and Bad Things Happen when 1470b9d59846SDave Chinner * that occurs. 14712813d682SChristoph Hellwig */ 1472273dda76SChristoph Hellwig spin_lock(&ip->i_flags_lock); 14732ba66237SChristoph Hellwig if (offset + size > i_size_read(inode)) 14742ba66237SChristoph Hellwig i_size_write(inode, offset + size); 1475273dda76SChristoph Hellwig spin_unlock(&ip->i_flags_lock); 14762813d682SChristoph Hellwig 1477273dda76SChristoph Hellwig if (flags & XFS_DIO_FLAG_UNWRITTEN) { 1478273dda76SChristoph Hellwig trace_xfs_end_io_direct_write_unwritten(ip, offset, size); 1479c59d87c4SChristoph Hellwig 1480273dda76SChristoph Hellwig error = xfs_iomap_write_unwritten(ip, offset, size); 1481273dda76SChristoph Hellwig } else if (flags & XFS_DIO_FLAG_APPEND) { 1482273dda76SChristoph Hellwig trace_xfs_end_io_direct_write_append(ip, offset, size); 1483273dda76SChristoph Hellwig 1484e372843aSChristoph Hellwig error = xfs_setfilesize(ip, offset, size); 14852ba66237SChristoph Hellwig } 1486c59d87c4SChristoph Hellwig 1487273dda76SChristoph Hellwig return error; 14886e1ba0bcSDave Chinner } 14896e1ba0bcSDave Chinner 1490c59d87c4SChristoph Hellwig STATIC ssize_t 1491c59d87c4SChristoph Hellwig xfs_vm_direct_IO( 1492c59d87c4SChristoph Hellwig struct kiocb *iocb, 1493c8b8e32dSChristoph Hellwig struct iov_iter *iter) 1494c59d87c4SChristoph Hellwig { 1495c59d87c4SChristoph Hellwig /* 1496fa8d972dSChristoph Hellwig * We just need the method present so that open/fcntl allow direct I/O. 1497c59d87c4SChristoph Hellwig */ 1498fa8d972dSChristoph Hellwig return -EINVAL; 1499c59d87c4SChristoph Hellwig } 1500c59d87c4SChristoph Hellwig 1501c59d87c4SChristoph Hellwig STATIC sector_t 1502c59d87c4SChristoph Hellwig xfs_vm_bmap( 1503c59d87c4SChristoph Hellwig struct address_space *mapping, 1504c59d87c4SChristoph Hellwig sector_t block) 1505c59d87c4SChristoph Hellwig { 1506c59d87c4SChristoph Hellwig struct inode *inode = (struct inode *)mapping->host; 1507c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1508c59d87c4SChristoph Hellwig 1509c59d87c4SChristoph Hellwig trace_xfs_vm_bmap(XFS_I(inode)); 1510c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_IOLOCK_SHARED); 15114bc1ea6bSDave Chinner filemap_write_and_wait(mapping); 1512c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED); 1513c59d87c4SChristoph Hellwig return generic_block_bmap(mapping, block, xfs_get_blocks); 1514c59d87c4SChristoph Hellwig } 1515c59d87c4SChristoph Hellwig 1516c59d87c4SChristoph Hellwig STATIC int 1517c59d87c4SChristoph Hellwig xfs_vm_readpage( 1518c59d87c4SChristoph Hellwig struct file *unused, 1519c59d87c4SChristoph Hellwig struct page *page) 1520c59d87c4SChristoph Hellwig { 1521121e213eSDave Chinner trace_xfs_vm_readpage(page->mapping->host, 1); 1522c59d87c4SChristoph Hellwig return mpage_readpage(page, xfs_get_blocks); 1523c59d87c4SChristoph Hellwig } 1524c59d87c4SChristoph Hellwig 1525c59d87c4SChristoph Hellwig STATIC int 1526c59d87c4SChristoph Hellwig xfs_vm_readpages( 1527c59d87c4SChristoph Hellwig struct file *unused, 1528c59d87c4SChristoph Hellwig struct address_space *mapping, 1529c59d87c4SChristoph Hellwig struct list_head *pages, 1530c59d87c4SChristoph Hellwig unsigned nr_pages) 1531c59d87c4SChristoph Hellwig { 1532121e213eSDave Chinner trace_xfs_vm_readpages(mapping->host, nr_pages); 1533c59d87c4SChristoph Hellwig return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1534c59d87c4SChristoph Hellwig } 1535c59d87c4SChristoph Hellwig 153622e757a4SDave Chinner /* 153722e757a4SDave Chinner * This is basically a copy of __set_page_dirty_buffers() with one 153822e757a4SDave Chinner * small tweak: buffers beyond EOF do not get marked dirty. If we mark them 153922e757a4SDave Chinner * dirty, we'll never be able to clean them because we don't write buffers 154022e757a4SDave Chinner * beyond EOF, and that means we can't invalidate pages that span EOF 154122e757a4SDave Chinner * that have been marked dirty. Further, the dirty state can leak into 154222e757a4SDave Chinner * the file interior if the file is extended, resulting in all sorts of 154322e757a4SDave Chinner * bad things happening as the state does not match the underlying data. 154422e757a4SDave Chinner * 154522e757a4SDave Chinner * XXX: this really indicates that bufferheads in XFS need to die. Warts like 154622e757a4SDave Chinner * this only exist because of bufferheads and how the generic code manages them. 154722e757a4SDave Chinner */ 154822e757a4SDave Chinner STATIC int 154922e757a4SDave Chinner xfs_vm_set_page_dirty( 155022e757a4SDave Chinner struct page *page) 155122e757a4SDave Chinner { 155222e757a4SDave Chinner struct address_space *mapping = page->mapping; 155322e757a4SDave Chinner struct inode *inode = mapping->host; 155422e757a4SDave Chinner loff_t end_offset; 155522e757a4SDave Chinner loff_t offset; 155622e757a4SDave Chinner int newly_dirty; 155722e757a4SDave Chinner 155822e757a4SDave Chinner if (unlikely(!mapping)) 155922e757a4SDave Chinner return !TestSetPageDirty(page); 156022e757a4SDave Chinner 156122e757a4SDave Chinner end_offset = i_size_read(inode); 156222e757a4SDave Chinner offset = page_offset(page); 156322e757a4SDave Chinner 156422e757a4SDave Chinner spin_lock(&mapping->private_lock); 156522e757a4SDave Chinner if (page_has_buffers(page)) { 156622e757a4SDave Chinner struct buffer_head *head = page_buffers(page); 156722e757a4SDave Chinner struct buffer_head *bh = head; 156822e757a4SDave Chinner 156922e757a4SDave Chinner do { 157022e757a4SDave Chinner if (offset < end_offset) 157122e757a4SDave Chinner set_buffer_dirty(bh); 157222e757a4SDave Chinner bh = bh->b_this_page; 157322e757a4SDave Chinner offset += 1 << inode->i_blkbits; 157422e757a4SDave Chinner } while (bh != head); 157522e757a4SDave Chinner } 1576c4843a75SGreg Thelen /* 157781f8c3a4SJohannes Weiner * Lock out page->mem_cgroup migration to keep PageDirty 157881f8c3a4SJohannes Weiner * synchronized with per-memcg dirty page counters. 1579c4843a75SGreg Thelen */ 158062cccb8cSJohannes Weiner lock_page_memcg(page); 158122e757a4SDave Chinner newly_dirty = !TestSetPageDirty(page); 158222e757a4SDave Chinner spin_unlock(&mapping->private_lock); 158322e757a4SDave Chinner 158422e757a4SDave Chinner if (newly_dirty) { 158522e757a4SDave Chinner /* sigh - __set_page_dirty() is static, so copy it here, too */ 158622e757a4SDave Chinner unsigned long flags; 158722e757a4SDave Chinner 158822e757a4SDave Chinner spin_lock_irqsave(&mapping->tree_lock, flags); 158922e757a4SDave Chinner if (page->mapping) { /* Race with truncate? */ 159022e757a4SDave Chinner WARN_ON_ONCE(!PageUptodate(page)); 159162cccb8cSJohannes Weiner account_page_dirtied(page, mapping); 159222e757a4SDave Chinner radix_tree_tag_set(&mapping->page_tree, 159322e757a4SDave Chinner page_index(page), PAGECACHE_TAG_DIRTY); 159422e757a4SDave Chinner } 159522e757a4SDave Chinner spin_unlock_irqrestore(&mapping->tree_lock, flags); 159622e757a4SDave Chinner } 159762cccb8cSJohannes Weiner unlock_page_memcg(page); 1598c4843a75SGreg Thelen if (newly_dirty) 1599c4843a75SGreg Thelen __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 160022e757a4SDave Chinner return newly_dirty; 160122e757a4SDave Chinner } 160222e757a4SDave Chinner 1603c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = { 1604c59d87c4SChristoph Hellwig .readpage = xfs_vm_readpage, 1605c59d87c4SChristoph Hellwig .readpages = xfs_vm_readpages, 1606c59d87c4SChristoph Hellwig .writepage = xfs_vm_writepage, 1607c59d87c4SChristoph Hellwig .writepages = xfs_vm_writepages, 160822e757a4SDave Chinner .set_page_dirty = xfs_vm_set_page_dirty, 1609c59d87c4SChristoph Hellwig .releasepage = xfs_vm_releasepage, 1610c59d87c4SChristoph Hellwig .invalidatepage = xfs_vm_invalidatepage, 1611c59d87c4SChristoph Hellwig .bmap = xfs_vm_bmap, 1612c59d87c4SChristoph Hellwig .direct_IO = xfs_vm_direct_IO, 1613c59d87c4SChristoph Hellwig .migratepage = buffer_migrate_page, 1614c59d87c4SChristoph Hellwig .is_partially_uptodate = block_is_partially_uptodate, 1615c59d87c4SChristoph Hellwig .error_remove_page = generic_error_remove_page, 1616c59d87c4SChristoph Hellwig }; 1617