1c59d87c4SChristoph Hellwig /* 2c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3c59d87c4SChristoph Hellwig * All Rights Reserved. 4c59d87c4SChristoph Hellwig * 5c59d87c4SChristoph Hellwig * This program is free software; you can redistribute it and/or 6c59d87c4SChristoph Hellwig * modify it under the terms of the GNU General Public License as 7c59d87c4SChristoph Hellwig * published by the Free Software Foundation. 8c59d87c4SChristoph Hellwig * 9c59d87c4SChristoph Hellwig * This program is distributed in the hope that it would be useful, 10c59d87c4SChristoph Hellwig * but WITHOUT ANY WARRANTY; without even the implied warranty of 11c59d87c4SChristoph Hellwig * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12c59d87c4SChristoph Hellwig * GNU General Public License for more details. 13c59d87c4SChristoph Hellwig * 14c59d87c4SChristoph Hellwig * You should have received a copy of the GNU General Public License 15c59d87c4SChristoph Hellwig * along with this program; if not, write the Free Software Foundation, 16c59d87c4SChristoph Hellwig * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17c59d87c4SChristoph Hellwig */ 18c59d87c4SChristoph Hellwig #include "xfs.h" 1970a9883cSDave Chinner #include "xfs_shared.h" 20239880efSDave Chinner #include "xfs_format.h" 21239880efSDave Chinner #include "xfs_log_format.h" 22239880efSDave Chinner #include "xfs_trans_resv.h" 23c59d87c4SChristoph Hellwig #include "xfs_mount.h" 24c59d87c4SChristoph Hellwig #include "xfs_inode.h" 25239880efSDave Chinner #include "xfs_trans.h" 26281627dfSChristoph Hellwig #include "xfs_inode_item.h" 27c59d87c4SChristoph Hellwig #include "xfs_alloc.h" 28c59d87c4SChristoph Hellwig #include "xfs_error.h" 29c59d87c4SChristoph Hellwig #include "xfs_iomap.h" 30c59d87c4SChristoph Hellwig #include "xfs_trace.h" 31c59d87c4SChristoph Hellwig #include "xfs_bmap.h" 3268988114SDave Chinner #include "xfs_bmap_util.h" 33a4fbe6abSDave Chinner #include "xfs_bmap_btree.h" 34ef473667SDarrick J. Wong #include "xfs_reflink.h" 35c59d87c4SChristoph Hellwig #include <linux/gfp.h> 36c59d87c4SChristoph Hellwig #include <linux/mpage.h> 37c59d87c4SChristoph Hellwig #include <linux/pagevec.h> 38c59d87c4SChristoph Hellwig #include <linux/writeback.h> 39c59d87c4SChristoph Hellwig 40fbcc0256SDave Chinner /* 41fbcc0256SDave Chinner * structure owned by writepages passed to individual writepage calls 42fbcc0256SDave Chinner */ 43fbcc0256SDave Chinner struct xfs_writepage_ctx { 44fbcc0256SDave Chinner struct xfs_bmbt_irec imap; 45fbcc0256SDave Chinner bool imap_valid; 46fbcc0256SDave Chinner unsigned int io_type; 47fbcc0256SDave Chinner struct xfs_ioend *ioend; 48fbcc0256SDave Chinner sector_t last_block; 49fbcc0256SDave Chinner }; 50fbcc0256SDave Chinner 51c59d87c4SChristoph Hellwig void 52c59d87c4SChristoph Hellwig xfs_count_page_state( 53c59d87c4SChristoph Hellwig struct page *page, 54c59d87c4SChristoph Hellwig int *delalloc, 55c59d87c4SChristoph Hellwig int *unwritten) 56c59d87c4SChristoph Hellwig { 57c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 58c59d87c4SChristoph Hellwig 59c59d87c4SChristoph Hellwig *delalloc = *unwritten = 0; 60c59d87c4SChristoph Hellwig 61c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 62c59d87c4SChristoph Hellwig do { 63c59d87c4SChristoph Hellwig if (buffer_unwritten(bh)) 64c59d87c4SChristoph Hellwig (*unwritten) = 1; 65c59d87c4SChristoph Hellwig else if (buffer_delay(bh)) 66c59d87c4SChristoph Hellwig (*delalloc) = 1; 67c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 68c59d87c4SChristoph Hellwig } 69c59d87c4SChristoph Hellwig 7020a90f58SRoss Zwisler struct block_device * 71c59d87c4SChristoph Hellwig xfs_find_bdev_for_inode( 72c59d87c4SChristoph Hellwig struct inode *inode) 73c59d87c4SChristoph Hellwig { 74c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 75c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 76c59d87c4SChristoph Hellwig 77c59d87c4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip)) 78c59d87c4SChristoph Hellwig return mp->m_rtdev_targp->bt_bdev; 79c59d87c4SChristoph Hellwig else 80c59d87c4SChristoph Hellwig return mp->m_ddev_targp->bt_bdev; 81c59d87c4SChristoph Hellwig } 82c59d87c4SChristoph Hellwig 83c59d87c4SChristoph Hellwig /* 8437992c18SDave Chinner * We're now finished for good with this page. Update the page state via the 8537992c18SDave Chinner * associated buffer_heads, paying attention to the start and end offsets that 8637992c18SDave Chinner * we need to process on the page. 8728b783e4SDave Chinner * 8828b783e4SDave Chinner * Landmine Warning: bh->b_end_io() will call end_page_writeback() on the last 8928b783e4SDave Chinner * buffer in the IO. Once it does this, it is unsafe to access the bufferhead or 9028b783e4SDave Chinner * the page at all, as we may be racing with memory reclaim and it can free both 9128b783e4SDave Chinner * the bufferhead chain and the page as it will see the page as clean and 9228b783e4SDave Chinner * unused. 9337992c18SDave Chinner */ 9437992c18SDave Chinner static void 9537992c18SDave Chinner xfs_finish_page_writeback( 9637992c18SDave Chinner struct inode *inode, 9737992c18SDave Chinner struct bio_vec *bvec, 9837992c18SDave Chinner int error) 9937992c18SDave Chinner { 10037992c18SDave Chinner unsigned int end = bvec->bv_offset + bvec->bv_len - 1; 10128b783e4SDave Chinner struct buffer_head *head, *bh, *next; 10237992c18SDave Chinner unsigned int off = 0; 10328b783e4SDave Chinner unsigned int bsize; 10437992c18SDave Chinner 10537992c18SDave Chinner ASSERT(bvec->bv_offset < PAGE_SIZE); 10693407472SFabian Frederick ASSERT((bvec->bv_offset & (i_blocksize(inode) - 1)) == 0); 10737992c18SDave Chinner ASSERT(end < PAGE_SIZE); 10893407472SFabian Frederick ASSERT((bvec->bv_len & (i_blocksize(inode) - 1)) == 0); 10937992c18SDave Chinner 11037992c18SDave Chinner bh = head = page_buffers(bvec->bv_page); 11137992c18SDave Chinner 11228b783e4SDave Chinner bsize = bh->b_size; 11337992c18SDave Chinner do { 11428b783e4SDave Chinner next = bh->b_this_page; 11537992c18SDave Chinner if (off < bvec->bv_offset) 11637992c18SDave Chinner goto next_bh; 11737992c18SDave Chinner if (off > end) 11837992c18SDave Chinner break; 11937992c18SDave Chinner bh->b_end_io(bh, !error); 12037992c18SDave Chinner next_bh: 12128b783e4SDave Chinner off += bsize; 12228b783e4SDave Chinner } while ((bh = next) != head); 12337992c18SDave Chinner } 12437992c18SDave Chinner 12537992c18SDave Chinner /* 12637992c18SDave Chinner * We're now finished for good with this ioend structure. Update the page 12737992c18SDave Chinner * state, release holds on bios, and finally free up memory. Do not use the 12837992c18SDave Chinner * ioend after this. 129c59d87c4SChristoph Hellwig */ 130c59d87c4SChristoph Hellwig STATIC void 131c59d87c4SChristoph Hellwig xfs_destroy_ioend( 1320e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 1330e51a8e1SChristoph Hellwig int error) 134c59d87c4SChristoph Hellwig { 13537992c18SDave Chinner struct inode *inode = ioend->io_inode; 1360e51a8e1SChristoph Hellwig struct bio *last = ioend->io_bio; 13737992c18SDave Chinner struct bio *bio, *next; 138c59d87c4SChristoph Hellwig 1390e51a8e1SChristoph Hellwig for (bio = &ioend->io_inline_bio; bio; bio = next) { 14037992c18SDave Chinner struct bio_vec *bvec; 14137992c18SDave Chinner int i; 14237992c18SDave Chinner 1430e51a8e1SChristoph Hellwig /* 1440e51a8e1SChristoph Hellwig * For the last bio, bi_private points to the ioend, so we 1450e51a8e1SChristoph Hellwig * need to explicitly end the iteration here. 1460e51a8e1SChristoph Hellwig */ 1470e51a8e1SChristoph Hellwig if (bio == last) 1480e51a8e1SChristoph Hellwig next = NULL; 1490e51a8e1SChristoph Hellwig else 15037992c18SDave Chinner next = bio->bi_private; 15137992c18SDave Chinner 15237992c18SDave Chinner /* walk each page on bio, ending page IO on them */ 15337992c18SDave Chinner bio_for_each_segment_all(bvec, bio, i) 15437992c18SDave Chinner xfs_finish_page_writeback(inode, bvec, error); 15537992c18SDave Chinner 15637992c18SDave Chinner bio_put(bio); 157c59d87c4SChristoph Hellwig } 158c59d87c4SChristoph Hellwig } 159c59d87c4SChristoph Hellwig 160c59d87c4SChristoph Hellwig /* 161fc0063c4SChristoph Hellwig * Fast and loose check if this write could update the on-disk inode size. 162fc0063c4SChristoph Hellwig */ 163fc0063c4SChristoph Hellwig static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) 164fc0063c4SChristoph Hellwig { 165fc0063c4SChristoph Hellwig return ioend->io_offset + ioend->io_size > 166fc0063c4SChristoph Hellwig XFS_I(ioend->io_inode)->i_d.di_size; 167fc0063c4SChristoph Hellwig } 168fc0063c4SChristoph Hellwig 169281627dfSChristoph Hellwig STATIC int 170281627dfSChristoph Hellwig xfs_setfilesize_trans_alloc( 171281627dfSChristoph Hellwig struct xfs_ioend *ioend) 172281627dfSChristoph Hellwig { 173281627dfSChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 174281627dfSChristoph Hellwig struct xfs_trans *tp; 175281627dfSChristoph Hellwig int error; 176281627dfSChristoph Hellwig 177253f4911SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 178253f4911SChristoph Hellwig if (error) 179281627dfSChristoph Hellwig return error; 180281627dfSChristoph Hellwig 181281627dfSChristoph Hellwig ioend->io_append_trans = tp; 182281627dfSChristoph Hellwig 183281627dfSChristoph Hellwig /* 184437a255aSDave Chinner * We may pass freeze protection with a transaction. So tell lockdep 185d9457dc0SJan Kara * we released it. 186d9457dc0SJan Kara */ 187bee9182dSOleg Nesterov __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); 188d9457dc0SJan Kara /* 189281627dfSChristoph Hellwig * We hand off the transaction to the completion thread now, so 190281627dfSChristoph Hellwig * clear the flag here. 191281627dfSChristoph Hellwig */ 192281627dfSChristoph Hellwig current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 193281627dfSChristoph Hellwig return 0; 194281627dfSChristoph Hellwig } 195281627dfSChristoph Hellwig 196fc0063c4SChristoph Hellwig /* 1972813d682SChristoph Hellwig * Update on-disk file size now that data has been written to disk. 198c59d87c4SChristoph Hellwig */ 199281627dfSChristoph Hellwig STATIC int 200e372843aSChristoph Hellwig __xfs_setfilesize( 2012ba66237SChristoph Hellwig struct xfs_inode *ip, 2022ba66237SChristoph Hellwig struct xfs_trans *tp, 2032ba66237SChristoph Hellwig xfs_off_t offset, 2042ba66237SChristoph Hellwig size_t size) 205c59d87c4SChristoph Hellwig { 206c59d87c4SChristoph Hellwig xfs_fsize_t isize; 207c59d87c4SChristoph Hellwig 208aa6bf01dSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 2092ba66237SChristoph Hellwig isize = xfs_new_eof(ip, offset + size); 210281627dfSChristoph Hellwig if (!isize) { 211281627dfSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 2124906e215SChristoph Hellwig xfs_trans_cancel(tp); 213281627dfSChristoph Hellwig return 0; 214c59d87c4SChristoph Hellwig } 215c59d87c4SChristoph Hellwig 2162ba66237SChristoph Hellwig trace_xfs_setfilesize(ip, offset, size); 217281627dfSChristoph Hellwig 218281627dfSChristoph Hellwig ip->i_d.di_size = isize; 219281627dfSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 220281627dfSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 221281627dfSChristoph Hellwig 22270393313SChristoph Hellwig return xfs_trans_commit(tp); 223c59d87c4SChristoph Hellwig } 224c59d87c4SChristoph Hellwig 225e372843aSChristoph Hellwig int 226e372843aSChristoph Hellwig xfs_setfilesize( 227e372843aSChristoph Hellwig struct xfs_inode *ip, 228e372843aSChristoph Hellwig xfs_off_t offset, 229e372843aSChristoph Hellwig size_t size) 230e372843aSChristoph Hellwig { 231e372843aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 232e372843aSChristoph Hellwig struct xfs_trans *tp; 233e372843aSChristoph Hellwig int error; 234e372843aSChristoph Hellwig 235e372843aSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 236e372843aSChristoph Hellwig if (error) 237e372843aSChristoph Hellwig return error; 238e372843aSChristoph Hellwig 239e372843aSChristoph Hellwig return __xfs_setfilesize(ip, tp, offset, size); 240e372843aSChristoph Hellwig } 241e372843aSChristoph Hellwig 2422ba66237SChristoph Hellwig STATIC int 2432ba66237SChristoph Hellwig xfs_setfilesize_ioend( 2440e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 2450e51a8e1SChristoph Hellwig int error) 2462ba66237SChristoph Hellwig { 2472ba66237SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 2482ba66237SChristoph Hellwig struct xfs_trans *tp = ioend->io_append_trans; 2492ba66237SChristoph Hellwig 2502ba66237SChristoph Hellwig /* 2512ba66237SChristoph Hellwig * The transaction may have been allocated in the I/O submission thread, 2522ba66237SChristoph Hellwig * thus we need to mark ourselves as being in a transaction manually. 2532ba66237SChristoph Hellwig * Similarly for freeze protection. 2542ba66237SChristoph Hellwig */ 2552ba66237SChristoph Hellwig current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); 256bee9182dSOleg Nesterov __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); 2572ba66237SChristoph Hellwig 2585cb13dcdSZhaohongjiang /* we abort the update if there was an IO error */ 2590e51a8e1SChristoph Hellwig if (error) { 2605cb13dcdSZhaohongjiang xfs_trans_cancel(tp); 2610e51a8e1SChristoph Hellwig return error; 2625cb13dcdSZhaohongjiang } 2635cb13dcdSZhaohongjiang 264e372843aSChristoph Hellwig return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); 2652ba66237SChristoph Hellwig } 2662ba66237SChristoph Hellwig 267c59d87c4SChristoph Hellwig /* 268c59d87c4SChristoph Hellwig * IO write completion. 269c59d87c4SChristoph Hellwig */ 270c59d87c4SChristoph Hellwig STATIC void 271c59d87c4SChristoph Hellwig xfs_end_io( 272c59d87c4SChristoph Hellwig struct work_struct *work) 273c59d87c4SChristoph Hellwig { 2740e51a8e1SChristoph Hellwig struct xfs_ioend *ioend = 2750e51a8e1SChristoph Hellwig container_of(work, struct xfs_ioend, io_work); 276c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 277*787eb485SChristoph Hellwig xfs_off_t offset = ioend->io_offset; 278*787eb485SChristoph Hellwig size_t size = ioend->io_size; 2790e51a8e1SChristoph Hellwig int error = ioend->io_bio->bi_error; 280c59d87c4SChristoph Hellwig 281af055e37SBrian Foster /* 282*787eb485SChristoph Hellwig * Just clean up the in-memory strutures if the fs has been shut down. 283af055e37SBrian Foster */ 284*787eb485SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 2850e51a8e1SChristoph Hellwig error = -EIO; 28643caeb18SDarrick J. Wong goto done; 28743caeb18SDarrick J. Wong } 28843caeb18SDarrick J. Wong 28943caeb18SDarrick J. Wong /* 290*787eb485SChristoph Hellwig * Clean up any COW blocks on an I/O error. 291c59d87c4SChristoph Hellwig */ 292*787eb485SChristoph Hellwig if (unlikely(error)) { 293*787eb485SChristoph Hellwig switch (ioend->io_type) { 294*787eb485SChristoph Hellwig case XFS_IO_COW: 295*787eb485SChristoph Hellwig xfs_reflink_cancel_cow_range(ip, offset, size, true); 296*787eb485SChristoph Hellwig break; 297*787eb485SChristoph Hellwig } 298*787eb485SChristoph Hellwig 2995cb13dcdSZhaohongjiang goto done; 300*787eb485SChristoph Hellwig } 301*787eb485SChristoph Hellwig 302*787eb485SChristoph Hellwig /* 303*787eb485SChristoph Hellwig * Success: commit the COW or unwritten blocks if needed. 304*787eb485SChristoph Hellwig */ 305*787eb485SChristoph Hellwig switch (ioend->io_type) { 306*787eb485SChristoph Hellwig case XFS_IO_COW: 307*787eb485SChristoph Hellwig error = xfs_reflink_end_cow(ip, offset, size); 308*787eb485SChristoph Hellwig break; 309*787eb485SChristoph Hellwig case XFS_IO_UNWRITTEN: 310*787eb485SChristoph Hellwig error = xfs_iomap_write_unwritten(ip, offset, size); 311*787eb485SChristoph Hellwig break; 312*787eb485SChristoph Hellwig default: 313*787eb485SChristoph Hellwig ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); 314*787eb485SChristoph Hellwig break; 31584803fb7SChristoph Hellwig } 31684803fb7SChristoph Hellwig 31704f658eeSChristoph Hellwig done: 318*787eb485SChristoph Hellwig if (ioend->io_append_trans) 319*787eb485SChristoph Hellwig error = xfs_setfilesize_ioend(ioend, error); 3200e51a8e1SChristoph Hellwig xfs_destroy_ioend(ioend, error); 321c59d87c4SChristoph Hellwig } 322c59d87c4SChristoph Hellwig 3230e51a8e1SChristoph Hellwig STATIC void 3240e51a8e1SChristoph Hellwig xfs_end_bio( 3250e51a8e1SChristoph Hellwig struct bio *bio) 326c59d87c4SChristoph Hellwig { 3270e51a8e1SChristoph Hellwig struct xfs_ioend *ioend = bio->bi_private; 3280e51a8e1SChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 329c59d87c4SChristoph Hellwig 33043caeb18SDarrick J. Wong if (ioend->io_type == XFS_IO_UNWRITTEN || ioend->io_type == XFS_IO_COW) 3310e51a8e1SChristoph Hellwig queue_work(mp->m_unwritten_workqueue, &ioend->io_work); 3320e51a8e1SChristoph Hellwig else if (ioend->io_append_trans) 3330e51a8e1SChristoph Hellwig queue_work(mp->m_data_workqueue, &ioend->io_work); 3340e51a8e1SChristoph Hellwig else 3350e51a8e1SChristoph Hellwig xfs_destroy_ioend(ioend, bio->bi_error); 336c59d87c4SChristoph Hellwig } 337c59d87c4SChristoph Hellwig 338c59d87c4SChristoph Hellwig STATIC int 339c59d87c4SChristoph Hellwig xfs_map_blocks( 340c59d87c4SChristoph Hellwig struct inode *inode, 341c59d87c4SChristoph Hellwig loff_t offset, 342c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 343988ef927SDave Chinner int type) 344c59d87c4SChristoph Hellwig { 345c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 346c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 34793407472SFabian Frederick ssize_t count = i_blocksize(inode); 348c59d87c4SChristoph Hellwig xfs_fileoff_t offset_fsb, end_fsb; 349c59d87c4SChristoph Hellwig int error = 0; 350c59d87c4SChristoph Hellwig int bmapi_flags = XFS_BMAPI_ENTIRE; 351c59d87c4SChristoph Hellwig int nimaps = 1; 352c59d87c4SChristoph Hellwig 353c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 354b474c7aeSEric Sandeen return -EIO; 355c59d87c4SChristoph Hellwig 356ef473667SDarrick J. Wong ASSERT(type != XFS_IO_COW); 3570d882a36SAlain Renaud if (type == XFS_IO_UNWRITTEN) 358c59d87c4SChristoph Hellwig bmapi_flags |= XFS_BMAPI_IGSTATE; 359c59d87c4SChristoph Hellwig 360c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 361c59d87c4SChristoph Hellwig ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 362c59d87c4SChristoph Hellwig (ip->i_df.if_flags & XFS_IFEXTENTS)); 363d2c28191SDave Chinner ASSERT(offset <= mp->m_super->s_maxbytes); 364c59d87c4SChristoph Hellwig 365d2c28191SDave Chinner if (offset + count > mp->m_super->s_maxbytes) 366d2c28191SDave Chinner count = mp->m_super->s_maxbytes - offset; 367c59d87c4SChristoph Hellwig end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 368c59d87c4SChristoph Hellwig offset_fsb = XFS_B_TO_FSBT(mp, offset); 3695c8ed202SDave Chinner error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 3705c8ed202SDave Chinner imap, &nimaps, bmapi_flags); 371ef473667SDarrick J. Wong /* 372ef473667SDarrick J. Wong * Truncate an overwrite extent if there's a pending CoW 373ef473667SDarrick J. Wong * reservation before the end of this extent. This forces us 374ef473667SDarrick J. Wong * to come back to writepage to take care of the CoW. 375ef473667SDarrick J. Wong */ 376ef473667SDarrick J. Wong if (nimaps && type == XFS_IO_OVERWRITE) 377ef473667SDarrick J. Wong xfs_reflink_trim_irec_to_next_cow(ip, offset_fsb, imap); 378c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 379c59d87c4SChristoph Hellwig 380c59d87c4SChristoph Hellwig if (error) 3812451337dSDave Chinner return error; 382c59d87c4SChristoph Hellwig 3830d882a36SAlain Renaud if (type == XFS_IO_DELALLOC && 384c59d87c4SChristoph Hellwig (!nimaps || isnullstartblock(imap->br_startblock))) { 38560b4984fSDarrick J. Wong error = xfs_iomap_write_allocate(ip, XFS_DATA_FORK, offset, 38660b4984fSDarrick J. Wong imap); 387c59d87c4SChristoph Hellwig if (!error) 388ef473667SDarrick J. Wong trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); 3892451337dSDave Chinner return error; 390c59d87c4SChristoph Hellwig } 391c59d87c4SChristoph Hellwig 392c59d87c4SChristoph Hellwig #ifdef DEBUG 3930d882a36SAlain Renaud if (type == XFS_IO_UNWRITTEN) { 394c59d87c4SChristoph Hellwig ASSERT(nimaps); 395c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 396c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 397c59d87c4SChristoph Hellwig } 398c59d87c4SChristoph Hellwig #endif 399c59d87c4SChristoph Hellwig if (nimaps) 400c59d87c4SChristoph Hellwig trace_xfs_map_blocks_found(ip, offset, count, type, imap); 401c59d87c4SChristoph Hellwig return 0; 402c59d87c4SChristoph Hellwig } 403c59d87c4SChristoph Hellwig 404fbcc0256SDave Chinner STATIC bool 405c59d87c4SChristoph Hellwig xfs_imap_valid( 406c59d87c4SChristoph Hellwig struct inode *inode, 407c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 408c59d87c4SChristoph Hellwig xfs_off_t offset) 409c59d87c4SChristoph Hellwig { 410c59d87c4SChristoph Hellwig offset >>= inode->i_blkbits; 411c59d87c4SChristoph Hellwig 412c59d87c4SChristoph Hellwig return offset >= imap->br_startoff && 413c59d87c4SChristoph Hellwig offset < imap->br_startoff + imap->br_blockcount; 414c59d87c4SChristoph Hellwig } 415c59d87c4SChristoph Hellwig 416c59d87c4SChristoph Hellwig STATIC void 417c59d87c4SChristoph Hellwig xfs_start_buffer_writeback( 418c59d87c4SChristoph Hellwig struct buffer_head *bh) 419c59d87c4SChristoph Hellwig { 420c59d87c4SChristoph Hellwig ASSERT(buffer_mapped(bh)); 421c59d87c4SChristoph Hellwig ASSERT(buffer_locked(bh)); 422c59d87c4SChristoph Hellwig ASSERT(!buffer_delay(bh)); 423c59d87c4SChristoph Hellwig ASSERT(!buffer_unwritten(bh)); 424c59d87c4SChristoph Hellwig 425c59d87c4SChristoph Hellwig mark_buffer_async_write(bh); 426c59d87c4SChristoph Hellwig set_buffer_uptodate(bh); 427c59d87c4SChristoph Hellwig clear_buffer_dirty(bh); 428c59d87c4SChristoph Hellwig } 429c59d87c4SChristoph Hellwig 430c59d87c4SChristoph Hellwig STATIC void 431c59d87c4SChristoph Hellwig xfs_start_page_writeback( 432c59d87c4SChristoph Hellwig struct page *page, 433e10de372SDave Chinner int clear_dirty) 434c59d87c4SChristoph Hellwig { 435c59d87c4SChristoph Hellwig ASSERT(PageLocked(page)); 436c59d87c4SChristoph Hellwig ASSERT(!PageWriteback(page)); 4370d085a52SDave Chinner 4380d085a52SDave Chinner /* 4390d085a52SDave Chinner * if the page was not fully cleaned, we need to ensure that the higher 4400d085a52SDave Chinner * layers come back to it correctly. That means we need to keep the page 4410d085a52SDave Chinner * dirty, and for WB_SYNC_ALL writeback we need to ensure the 4420d085a52SDave Chinner * PAGECACHE_TAG_TOWRITE index mark is not removed so another attempt to 4430d085a52SDave Chinner * write this page in this writeback sweep will be made. 4440d085a52SDave Chinner */ 4450d085a52SDave Chinner if (clear_dirty) { 446c59d87c4SChristoph Hellwig clear_page_dirty_for_io(page); 447c59d87c4SChristoph Hellwig set_page_writeback(page); 4480d085a52SDave Chinner } else 4490d085a52SDave Chinner set_page_writeback_keepwrite(page); 4500d085a52SDave Chinner 451c59d87c4SChristoph Hellwig unlock_page(page); 452c59d87c4SChristoph Hellwig } 453c59d87c4SChristoph Hellwig 454c7c1a7d8SZhi Yong Wu static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh) 455c59d87c4SChristoph Hellwig { 456c59d87c4SChristoph Hellwig return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 457c59d87c4SChristoph Hellwig } 458c59d87c4SChristoph Hellwig 459c59d87c4SChristoph Hellwig /* 460bb18782aSDave Chinner * Submit the bio for an ioend. We are passed an ioend with a bio attached to 461bb18782aSDave Chinner * it, and we submit that bio. The ioend may be used for multiple bio 462bb18782aSDave Chinner * submissions, so we only want to allocate an append transaction for the ioend 463bb18782aSDave Chinner * once. In the case of multiple bio submission, each bio will take an IO 464bb18782aSDave Chinner * reference to the ioend to ensure that the ioend completion is only done once 465bb18782aSDave Chinner * all bios have been submitted and the ioend is really done. 4667bf7f352SDave Chinner * 4677bf7f352SDave Chinner * If @fail is non-zero, it means that we have a situation where some part of 4687bf7f352SDave Chinner * the submission process has failed after we have marked paged for writeback 469bb18782aSDave Chinner * and unlocked them. In this situation, we need to fail the bio and ioend 470bb18782aSDave Chinner * rather than submit it to IO. This typically only happens on a filesystem 471bb18782aSDave Chinner * shutdown. 472c59d87c4SChristoph Hellwig */ 473e10de372SDave Chinner STATIC int 474c59d87c4SChristoph Hellwig xfs_submit_ioend( 475c59d87c4SChristoph Hellwig struct writeback_control *wbc, 4760e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 477e10de372SDave Chinner int status) 478c59d87c4SChristoph Hellwig { 4795eda4300SDarrick J. Wong /* Convert CoW extents to regular */ 4805eda4300SDarrick J. Wong if (!status && ioend->io_type == XFS_IO_COW) { 4815eda4300SDarrick J. Wong status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), 4825eda4300SDarrick J. Wong ioend->io_offset, ioend->io_size); 4835eda4300SDarrick J. Wong } 4845eda4300SDarrick J. Wong 485e10de372SDave Chinner /* Reserve log space if we might write beyond the on-disk inode size. */ 486e10de372SDave Chinner if (!status && 4870e51a8e1SChristoph Hellwig ioend->io_type != XFS_IO_UNWRITTEN && 488bb18782aSDave Chinner xfs_ioend_is_append(ioend) && 489bb18782aSDave Chinner !ioend->io_append_trans) 490e10de372SDave Chinner status = xfs_setfilesize_trans_alloc(ioend); 491bb18782aSDave Chinner 4920e51a8e1SChristoph Hellwig ioend->io_bio->bi_private = ioend; 4930e51a8e1SChristoph Hellwig ioend->io_bio->bi_end_io = xfs_end_bio; 4947637241eSJens Axboe ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 49570fd7614SChristoph Hellwig 4967bf7f352SDave Chinner /* 4977bf7f352SDave Chinner * If we are failing the IO now, just mark the ioend with an 4987bf7f352SDave Chinner * error and finish it. This will run IO completion immediately 4997bf7f352SDave Chinner * as there is only one reference to the ioend at this point in 5007bf7f352SDave Chinner * time. 5017bf7f352SDave Chinner */ 502e10de372SDave Chinner if (status) { 5030e51a8e1SChristoph Hellwig ioend->io_bio->bi_error = status; 5040e51a8e1SChristoph Hellwig bio_endio(ioend->io_bio); 505e10de372SDave Chinner return status; 5067bf7f352SDave Chinner } 5077bf7f352SDave Chinner 5084e49ea4aSMike Christie submit_bio(ioend->io_bio); 509e10de372SDave Chinner return 0; 510c59d87c4SChristoph Hellwig } 511c59d87c4SChristoph Hellwig 5120e51a8e1SChristoph Hellwig static void 5130e51a8e1SChristoph Hellwig xfs_init_bio_from_bh( 5140e51a8e1SChristoph Hellwig struct bio *bio, 5150e51a8e1SChristoph Hellwig struct buffer_head *bh) 5160e51a8e1SChristoph Hellwig { 5170e51a8e1SChristoph Hellwig bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9); 5180e51a8e1SChristoph Hellwig bio->bi_bdev = bh->b_bdev; 5190e51a8e1SChristoph Hellwig } 5200e51a8e1SChristoph Hellwig 5210e51a8e1SChristoph Hellwig static struct xfs_ioend * 5220e51a8e1SChristoph Hellwig xfs_alloc_ioend( 5230e51a8e1SChristoph Hellwig struct inode *inode, 5240e51a8e1SChristoph Hellwig unsigned int type, 5250e51a8e1SChristoph Hellwig xfs_off_t offset, 5260e51a8e1SChristoph Hellwig struct buffer_head *bh) 5270e51a8e1SChristoph Hellwig { 5280e51a8e1SChristoph Hellwig struct xfs_ioend *ioend; 5290e51a8e1SChristoph Hellwig struct bio *bio; 5300e51a8e1SChristoph Hellwig 5310e51a8e1SChristoph Hellwig bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, xfs_ioend_bioset); 5320e51a8e1SChristoph Hellwig xfs_init_bio_from_bh(bio, bh); 5330e51a8e1SChristoph Hellwig 5340e51a8e1SChristoph Hellwig ioend = container_of(bio, struct xfs_ioend, io_inline_bio); 5350e51a8e1SChristoph Hellwig INIT_LIST_HEAD(&ioend->io_list); 5360e51a8e1SChristoph Hellwig ioend->io_type = type; 5370e51a8e1SChristoph Hellwig ioend->io_inode = inode; 5380e51a8e1SChristoph Hellwig ioend->io_size = 0; 5390e51a8e1SChristoph Hellwig ioend->io_offset = offset; 5400e51a8e1SChristoph Hellwig INIT_WORK(&ioend->io_work, xfs_end_io); 5410e51a8e1SChristoph Hellwig ioend->io_append_trans = NULL; 5420e51a8e1SChristoph Hellwig ioend->io_bio = bio; 5430e51a8e1SChristoph Hellwig return ioend; 5440e51a8e1SChristoph Hellwig } 5450e51a8e1SChristoph Hellwig 5460e51a8e1SChristoph Hellwig /* 5470e51a8e1SChristoph Hellwig * Allocate a new bio, and chain the old bio to the new one. 5480e51a8e1SChristoph Hellwig * 5490e51a8e1SChristoph Hellwig * Note that we have to do perform the chaining in this unintuitive order 5500e51a8e1SChristoph Hellwig * so that the bi_private linkage is set up in the right direction for the 5510e51a8e1SChristoph Hellwig * traversal in xfs_destroy_ioend(). 5520e51a8e1SChristoph Hellwig */ 5530e51a8e1SChristoph Hellwig static void 5540e51a8e1SChristoph Hellwig xfs_chain_bio( 5550e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 5560e51a8e1SChristoph Hellwig struct writeback_control *wbc, 5570e51a8e1SChristoph Hellwig struct buffer_head *bh) 5580e51a8e1SChristoph Hellwig { 5590e51a8e1SChristoph Hellwig struct bio *new; 5600e51a8e1SChristoph Hellwig 5610e51a8e1SChristoph Hellwig new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); 5620e51a8e1SChristoph Hellwig xfs_init_bio_from_bh(new, bh); 5630e51a8e1SChristoph Hellwig 5640e51a8e1SChristoph Hellwig bio_chain(ioend->io_bio, new); 5650e51a8e1SChristoph Hellwig bio_get(ioend->io_bio); /* for xfs_destroy_ioend */ 5667637241eSJens Axboe ioend->io_bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 5674e49ea4aSMike Christie submit_bio(ioend->io_bio); 5680e51a8e1SChristoph Hellwig ioend->io_bio = new; 5690e51a8e1SChristoph Hellwig } 5700e51a8e1SChristoph Hellwig 571c59d87c4SChristoph Hellwig /* 572c59d87c4SChristoph Hellwig * Test to see if we've been building up a completion structure for 573c59d87c4SChristoph Hellwig * earlier buffers -- if so, we try to append to this ioend if we 574c59d87c4SChristoph Hellwig * can, otherwise we finish off any current ioend and start another. 575e10de372SDave Chinner * Return the ioend we finished off so that the caller can submit it 576e10de372SDave Chinner * once it has finished processing the dirty page. 577c59d87c4SChristoph Hellwig */ 578c59d87c4SChristoph Hellwig STATIC void 579c59d87c4SChristoph Hellwig xfs_add_to_ioend( 580c59d87c4SChristoph Hellwig struct inode *inode, 581c59d87c4SChristoph Hellwig struct buffer_head *bh, 582c59d87c4SChristoph Hellwig xfs_off_t offset, 583e10de372SDave Chinner struct xfs_writepage_ctx *wpc, 584bb18782aSDave Chinner struct writeback_control *wbc, 585e10de372SDave Chinner struct list_head *iolist) 586c59d87c4SChristoph Hellwig { 587fbcc0256SDave Chinner if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type || 5880df61da8SDarrick J. Wong bh->b_blocknr != wpc->last_block + 1 || 5890df61da8SDarrick J. Wong offset != wpc->ioend->io_offset + wpc->ioend->io_size) { 590e10de372SDave Chinner if (wpc->ioend) 591e10de372SDave Chinner list_add(&wpc->ioend->io_list, iolist); 5920e51a8e1SChristoph Hellwig wpc->ioend = xfs_alloc_ioend(inode, wpc->io_type, offset, bh); 593c59d87c4SChristoph Hellwig } 594c59d87c4SChristoph Hellwig 5950e51a8e1SChristoph Hellwig /* 5960e51a8e1SChristoph Hellwig * If the buffer doesn't fit into the bio we need to allocate a new 5970e51a8e1SChristoph Hellwig * one. This shouldn't happen more than once for a given buffer. 5980e51a8e1SChristoph Hellwig */ 5990e51a8e1SChristoph Hellwig while (xfs_bio_add_buffer(wpc->ioend->io_bio, bh) != bh->b_size) 6000e51a8e1SChristoph Hellwig xfs_chain_bio(wpc->ioend, wbc, bh); 601bb18782aSDave Chinner 602fbcc0256SDave Chinner wpc->ioend->io_size += bh->b_size; 603fbcc0256SDave Chinner wpc->last_block = bh->b_blocknr; 604e10de372SDave Chinner xfs_start_buffer_writeback(bh); 605c59d87c4SChristoph Hellwig } 606c59d87c4SChristoph Hellwig 607c59d87c4SChristoph Hellwig STATIC void 608c59d87c4SChristoph Hellwig xfs_map_buffer( 609c59d87c4SChristoph Hellwig struct inode *inode, 610c59d87c4SChristoph Hellwig struct buffer_head *bh, 611c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 612c59d87c4SChristoph Hellwig xfs_off_t offset) 613c59d87c4SChristoph Hellwig { 614c59d87c4SChristoph Hellwig sector_t bn; 615c59d87c4SChristoph Hellwig struct xfs_mount *m = XFS_I(inode)->i_mount; 616c59d87c4SChristoph Hellwig xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); 617c59d87c4SChristoph Hellwig xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); 618c59d87c4SChristoph Hellwig 619c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 620c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 621c59d87c4SChristoph Hellwig 622c59d87c4SChristoph Hellwig bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + 623c59d87c4SChristoph Hellwig ((offset - iomap_offset) >> inode->i_blkbits); 624c59d87c4SChristoph Hellwig 625c59d87c4SChristoph Hellwig ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); 626c59d87c4SChristoph Hellwig 627c59d87c4SChristoph Hellwig bh->b_blocknr = bn; 628c59d87c4SChristoph Hellwig set_buffer_mapped(bh); 629c59d87c4SChristoph Hellwig } 630c59d87c4SChristoph Hellwig 631c59d87c4SChristoph Hellwig STATIC void 632c59d87c4SChristoph Hellwig xfs_map_at_offset( 633c59d87c4SChristoph Hellwig struct inode *inode, 634c59d87c4SChristoph Hellwig struct buffer_head *bh, 635c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 636c59d87c4SChristoph Hellwig xfs_off_t offset) 637c59d87c4SChristoph Hellwig { 638c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 639c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 640c59d87c4SChristoph Hellwig 641c59d87c4SChristoph Hellwig xfs_map_buffer(inode, bh, imap, offset); 642c59d87c4SChristoph Hellwig set_buffer_mapped(bh); 643c59d87c4SChristoph Hellwig clear_buffer_delay(bh); 644c59d87c4SChristoph Hellwig clear_buffer_unwritten(bh); 645c59d87c4SChristoph Hellwig } 646c59d87c4SChristoph Hellwig 647c59d87c4SChristoph Hellwig /* 648a49935f2SDave Chinner * Test if a given page contains at least one buffer of a given @type. 649a49935f2SDave Chinner * If @check_all_buffers is true, then we walk all the buffers in the page to 650a49935f2SDave Chinner * try to find one of the type passed in. If it is not set, then the caller only 651a49935f2SDave Chinner * needs to check the first buffer on the page for a match. 652c59d87c4SChristoph Hellwig */ 653a49935f2SDave Chinner STATIC bool 6546ffc4db5SDave Chinner xfs_check_page_type( 655c59d87c4SChristoph Hellwig struct page *page, 656a49935f2SDave Chinner unsigned int type, 657a49935f2SDave Chinner bool check_all_buffers) 658c59d87c4SChristoph Hellwig { 659a49935f2SDave Chinner struct buffer_head *bh; 660a49935f2SDave Chinner struct buffer_head *head; 661c59d87c4SChristoph Hellwig 662a49935f2SDave Chinner if (PageWriteback(page)) 663a49935f2SDave Chinner return false; 664a49935f2SDave Chinner if (!page->mapping) 665a49935f2SDave Chinner return false; 666a49935f2SDave Chinner if (!page_has_buffers(page)) 667a49935f2SDave Chinner return false; 668c59d87c4SChristoph Hellwig 669c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 670c59d87c4SChristoph Hellwig do { 671a49935f2SDave Chinner if (buffer_unwritten(bh)) { 672a49935f2SDave Chinner if (type == XFS_IO_UNWRITTEN) 673a49935f2SDave Chinner return true; 674a49935f2SDave Chinner } else if (buffer_delay(bh)) { 675805eeb8eSDan Carpenter if (type == XFS_IO_DELALLOC) 676a49935f2SDave Chinner return true; 677a49935f2SDave Chinner } else if (buffer_dirty(bh) && buffer_mapped(bh)) { 678805eeb8eSDan Carpenter if (type == XFS_IO_OVERWRITE) 679a49935f2SDave Chinner return true; 680a49935f2SDave Chinner } 681a49935f2SDave Chinner 682a49935f2SDave Chinner /* If we are only checking the first buffer, we are done now. */ 683a49935f2SDave Chinner if (!check_all_buffers) 684c59d87c4SChristoph Hellwig break; 685c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 686c59d87c4SChristoph Hellwig 687a49935f2SDave Chinner return false; 688c59d87c4SChristoph Hellwig } 689c59d87c4SChristoph Hellwig 690c59d87c4SChristoph Hellwig STATIC void 691c59d87c4SChristoph Hellwig xfs_vm_invalidatepage( 692c59d87c4SChristoph Hellwig struct page *page, 693d47992f8SLukas Czerner unsigned int offset, 694d47992f8SLukas Czerner unsigned int length) 695c59d87c4SChristoph Hellwig { 69634097dfeSLukas Czerner trace_xfs_invalidatepage(page->mapping->host, page, offset, 69734097dfeSLukas Czerner length); 69834097dfeSLukas Czerner block_invalidatepage(page, offset, length); 699c59d87c4SChristoph Hellwig } 700c59d87c4SChristoph Hellwig 701c59d87c4SChristoph Hellwig /* 702c59d87c4SChristoph Hellwig * If the page has delalloc buffers on it, we need to punch them out before we 703c59d87c4SChristoph Hellwig * invalidate the page. If we don't, we leave a stale delalloc mapping on the 704c59d87c4SChristoph Hellwig * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read 705c59d87c4SChristoph Hellwig * is done on that same region - the delalloc extent is returned when none is 706c59d87c4SChristoph Hellwig * supposed to be there. 707c59d87c4SChristoph Hellwig * 708c59d87c4SChristoph Hellwig * We prevent this by truncating away the delalloc regions on the page before 709c59d87c4SChristoph Hellwig * invalidating it. Because they are delalloc, we can do this without needing a 710c59d87c4SChristoph Hellwig * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this 711c59d87c4SChristoph Hellwig * truncation without a transaction as there is no space left for block 712c59d87c4SChristoph Hellwig * reservation (typically why we see a ENOSPC in writeback). 713c59d87c4SChristoph Hellwig * 714c59d87c4SChristoph Hellwig * This is not a performance critical path, so for now just do the punching a 715c59d87c4SChristoph Hellwig * buffer head at a time. 716c59d87c4SChristoph Hellwig */ 717c59d87c4SChristoph Hellwig STATIC void 718c59d87c4SChristoph Hellwig xfs_aops_discard_page( 719c59d87c4SChristoph Hellwig struct page *page) 720c59d87c4SChristoph Hellwig { 721c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 722c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 723c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 724c59d87c4SChristoph Hellwig loff_t offset = page_offset(page); 725c59d87c4SChristoph Hellwig 726a49935f2SDave Chinner if (!xfs_check_page_type(page, XFS_IO_DELALLOC, true)) 727c59d87c4SChristoph Hellwig goto out_invalidate; 728c59d87c4SChristoph Hellwig 729c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 730c59d87c4SChristoph Hellwig goto out_invalidate; 731c59d87c4SChristoph Hellwig 732c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 733c59d87c4SChristoph Hellwig "page discard on page %p, inode 0x%llx, offset %llu.", 734c59d87c4SChristoph Hellwig page, ip->i_ino, offset); 735c59d87c4SChristoph Hellwig 736c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 737c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 738c59d87c4SChristoph Hellwig do { 739c59d87c4SChristoph Hellwig int error; 740c59d87c4SChristoph Hellwig xfs_fileoff_t start_fsb; 741c59d87c4SChristoph Hellwig 742c59d87c4SChristoph Hellwig if (!buffer_delay(bh)) 743c59d87c4SChristoph Hellwig goto next_buffer; 744c59d87c4SChristoph Hellwig 745c59d87c4SChristoph Hellwig start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 746c59d87c4SChristoph Hellwig error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); 747c59d87c4SChristoph Hellwig if (error) { 748c59d87c4SChristoph Hellwig /* something screwed, just bail */ 749c59d87c4SChristoph Hellwig if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 750c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 751c59d87c4SChristoph Hellwig "page discard unable to remove delalloc mapping."); 752c59d87c4SChristoph Hellwig } 753c59d87c4SChristoph Hellwig break; 754c59d87c4SChristoph Hellwig } 755c59d87c4SChristoph Hellwig next_buffer: 75693407472SFabian Frederick offset += i_blocksize(inode); 757c59d87c4SChristoph Hellwig 758c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 759c59d87c4SChristoph Hellwig 760c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 761c59d87c4SChristoph Hellwig out_invalidate: 76209cbfeafSKirill A. Shutemov xfs_vm_invalidatepage(page, 0, PAGE_SIZE); 763c59d87c4SChristoph Hellwig return; 764c59d87c4SChristoph Hellwig } 765c59d87c4SChristoph Hellwig 766ef473667SDarrick J. Wong static int 767ef473667SDarrick J. Wong xfs_map_cow( 768ef473667SDarrick J. Wong struct xfs_writepage_ctx *wpc, 769ef473667SDarrick J. Wong struct inode *inode, 770ef473667SDarrick J. Wong loff_t offset, 771ef473667SDarrick J. Wong unsigned int *new_type) 772ef473667SDarrick J. Wong { 773ef473667SDarrick J. Wong struct xfs_inode *ip = XFS_I(inode); 774ef473667SDarrick J. Wong struct xfs_bmbt_irec imap; 775092d5d9dSChristoph Hellwig bool is_cow = false; 776ef473667SDarrick J. Wong int error; 777ef473667SDarrick J. Wong 778ef473667SDarrick J. Wong /* 779ef473667SDarrick J. Wong * If we already have a valid COW mapping keep using it. 780ef473667SDarrick J. Wong */ 781ef473667SDarrick J. Wong if (wpc->io_type == XFS_IO_COW) { 782ef473667SDarrick J. Wong wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, offset); 783ef473667SDarrick J. Wong if (wpc->imap_valid) { 784ef473667SDarrick J. Wong *new_type = XFS_IO_COW; 785ef473667SDarrick J. Wong return 0; 786ef473667SDarrick J. Wong } 787ef473667SDarrick J. Wong } 788ef473667SDarrick J. Wong 789ef473667SDarrick J. Wong /* 790ef473667SDarrick J. Wong * Else we need to check if there is a COW mapping at this offset. 791ef473667SDarrick J. Wong */ 792ef473667SDarrick J. Wong xfs_ilock(ip, XFS_ILOCK_SHARED); 793092d5d9dSChristoph Hellwig is_cow = xfs_reflink_find_cow_mapping(ip, offset, &imap); 794ef473667SDarrick J. Wong xfs_iunlock(ip, XFS_ILOCK_SHARED); 795ef473667SDarrick J. Wong 796ef473667SDarrick J. Wong if (!is_cow) 797ef473667SDarrick J. Wong return 0; 798ef473667SDarrick J. Wong 799ef473667SDarrick J. Wong /* 800ef473667SDarrick J. Wong * And if the COW mapping has a delayed extent here we need to 801ef473667SDarrick J. Wong * allocate real space for it now. 802ef473667SDarrick J. Wong */ 803092d5d9dSChristoph Hellwig if (isnullstartblock(imap.br_startblock)) { 804ef473667SDarrick J. Wong error = xfs_iomap_write_allocate(ip, XFS_COW_FORK, offset, 805ef473667SDarrick J. Wong &imap); 806ef473667SDarrick J. Wong if (error) 807ef473667SDarrick J. Wong return error; 808ef473667SDarrick J. Wong } 809ef473667SDarrick J. Wong 810ef473667SDarrick J. Wong wpc->io_type = *new_type = XFS_IO_COW; 811ef473667SDarrick J. Wong wpc->imap_valid = true; 812ef473667SDarrick J. Wong wpc->imap = imap; 813ef473667SDarrick J. Wong return 0; 814ef473667SDarrick J. Wong } 815ef473667SDarrick J. Wong 816c59d87c4SChristoph Hellwig /* 817e10de372SDave Chinner * We implement an immediate ioend submission policy here to avoid needing to 818e10de372SDave Chinner * chain multiple ioends and hence nest mempool allocations which can violate 819e10de372SDave Chinner * forward progress guarantees we need to provide. The current ioend we are 820e10de372SDave Chinner * adding buffers to is cached on the writepage context, and if the new buffer 821e10de372SDave Chinner * does not append to the cached ioend it will create a new ioend and cache that 822e10de372SDave Chinner * instead. 823e10de372SDave Chinner * 824e10de372SDave Chinner * If a new ioend is created and cached, the old ioend is returned and queued 825e10de372SDave Chinner * locally for submission once the entire page is processed or an error has been 826e10de372SDave Chinner * detected. While ioends are submitted immediately after they are completed, 827e10de372SDave Chinner * batching optimisations are provided by higher level block plugging. 828e10de372SDave Chinner * 829e10de372SDave Chinner * At the end of a writeback pass, there will be a cached ioend remaining on the 830e10de372SDave Chinner * writepage context that the caller will need to submit. 831e10de372SDave Chinner */ 832bfce7d2eSDave Chinner static int 833bfce7d2eSDave Chinner xfs_writepage_map( 834bfce7d2eSDave Chinner struct xfs_writepage_ctx *wpc, 835e10de372SDave Chinner struct writeback_control *wbc, 836bfce7d2eSDave Chinner struct inode *inode, 837bfce7d2eSDave Chinner struct page *page, 838bfce7d2eSDave Chinner loff_t offset, 839bfce7d2eSDave Chinner __uint64_t end_offset) 840bfce7d2eSDave Chinner { 841e10de372SDave Chinner LIST_HEAD(submit_list); 842e10de372SDave Chinner struct xfs_ioend *ioend, *next; 843bfce7d2eSDave Chinner struct buffer_head *bh, *head; 84493407472SFabian Frederick ssize_t len = i_blocksize(inode); 845bfce7d2eSDave Chinner int error = 0; 846bfce7d2eSDave Chinner int count = 0; 847e10de372SDave Chinner int uptodate = 1; 848ef473667SDarrick J. Wong unsigned int new_type; 849bfce7d2eSDave Chinner 850bfce7d2eSDave Chinner bh = head = page_buffers(page); 851bfce7d2eSDave Chinner offset = page_offset(page); 852bfce7d2eSDave Chinner do { 853bfce7d2eSDave Chinner if (offset >= end_offset) 854bfce7d2eSDave Chinner break; 855bfce7d2eSDave Chinner if (!buffer_uptodate(bh)) 856bfce7d2eSDave Chinner uptodate = 0; 857bfce7d2eSDave Chinner 858bfce7d2eSDave Chinner /* 859bfce7d2eSDave Chinner * set_page_dirty dirties all buffers in a page, independent 860bfce7d2eSDave Chinner * of their state. The dirty state however is entirely 861bfce7d2eSDave Chinner * meaningless for holes (!mapped && uptodate), so skip 862bfce7d2eSDave Chinner * buffers covering holes here. 863bfce7d2eSDave Chinner */ 864bfce7d2eSDave Chinner if (!buffer_mapped(bh) && buffer_uptodate(bh)) { 865bfce7d2eSDave Chinner wpc->imap_valid = false; 866bfce7d2eSDave Chinner continue; 867bfce7d2eSDave Chinner } 868bfce7d2eSDave Chinner 869ef473667SDarrick J. Wong if (buffer_unwritten(bh)) 870ef473667SDarrick J. Wong new_type = XFS_IO_UNWRITTEN; 871ef473667SDarrick J. Wong else if (buffer_delay(bh)) 872ef473667SDarrick J. Wong new_type = XFS_IO_DELALLOC; 873ef473667SDarrick J. Wong else if (buffer_uptodate(bh)) 874ef473667SDarrick J. Wong new_type = XFS_IO_OVERWRITE; 875ef473667SDarrick J. Wong else { 876bfce7d2eSDave Chinner if (PageUptodate(page)) 877bfce7d2eSDave Chinner ASSERT(buffer_mapped(bh)); 878bfce7d2eSDave Chinner /* 879bfce7d2eSDave Chinner * This buffer is not uptodate and will not be 880bfce7d2eSDave Chinner * written to disk. Ensure that we will put any 881bfce7d2eSDave Chinner * subsequent writeable buffers into a new 882bfce7d2eSDave Chinner * ioend. 883bfce7d2eSDave Chinner */ 884bfce7d2eSDave Chinner wpc->imap_valid = false; 885bfce7d2eSDave Chinner continue; 886bfce7d2eSDave Chinner } 887bfce7d2eSDave Chinner 888ef473667SDarrick J. Wong if (xfs_is_reflink_inode(XFS_I(inode))) { 889ef473667SDarrick J. Wong error = xfs_map_cow(wpc, inode, offset, &new_type); 890ef473667SDarrick J. Wong if (error) 891ef473667SDarrick J. Wong goto out; 892ef473667SDarrick J. Wong } 893ef473667SDarrick J. Wong 894ef473667SDarrick J. Wong if (wpc->io_type != new_type) { 895ef473667SDarrick J. Wong wpc->io_type = new_type; 896ef473667SDarrick J. Wong wpc->imap_valid = false; 897ef473667SDarrick J. Wong } 898ef473667SDarrick J. Wong 899bfce7d2eSDave Chinner if (wpc->imap_valid) 900bfce7d2eSDave Chinner wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, 901bfce7d2eSDave Chinner offset); 902bfce7d2eSDave Chinner if (!wpc->imap_valid) { 903bfce7d2eSDave Chinner error = xfs_map_blocks(inode, offset, &wpc->imap, 904bfce7d2eSDave Chinner wpc->io_type); 905bfce7d2eSDave Chinner if (error) 906e10de372SDave Chinner goto out; 907bfce7d2eSDave Chinner wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap, 908bfce7d2eSDave Chinner offset); 909bfce7d2eSDave Chinner } 910bfce7d2eSDave Chinner if (wpc->imap_valid) { 911bfce7d2eSDave Chinner lock_buffer(bh); 912bfce7d2eSDave Chinner if (wpc->io_type != XFS_IO_OVERWRITE) 913bfce7d2eSDave Chinner xfs_map_at_offset(inode, bh, &wpc->imap, offset); 914bb18782aSDave Chinner xfs_add_to_ioend(inode, bh, offset, wpc, wbc, &submit_list); 915bfce7d2eSDave Chinner count++; 916bfce7d2eSDave Chinner } 917bfce7d2eSDave Chinner 918bfce7d2eSDave Chinner } while (offset += len, ((bh = bh->b_this_page) != head)); 919bfce7d2eSDave Chinner 920bfce7d2eSDave Chinner if (uptodate && bh == head) 921bfce7d2eSDave Chinner SetPageUptodate(page); 922bfce7d2eSDave Chinner 923e10de372SDave Chinner ASSERT(wpc->ioend || list_empty(&submit_list)); 924bfce7d2eSDave Chinner 925e10de372SDave Chinner out: 926bfce7d2eSDave Chinner /* 927e10de372SDave Chinner * On error, we have to fail the ioend here because we have locked 928e10de372SDave Chinner * buffers in the ioend. If we don't do this, we'll deadlock 929e10de372SDave Chinner * invalidating the page as that tries to lock the buffers on the page. 930e10de372SDave Chinner * Also, because we may have set pages under writeback, we have to make 931e10de372SDave Chinner * sure we run IO completion to mark the error state of the IO 932e10de372SDave Chinner * appropriately, so we can't cancel the ioend directly here. That means 933e10de372SDave Chinner * we have to mark this page as under writeback if we included any 934e10de372SDave Chinner * buffers from it in the ioend chain so that completion treats it 935e10de372SDave Chinner * correctly. 936bfce7d2eSDave Chinner * 937e10de372SDave Chinner * If we didn't include the page in the ioend, the on error we can 938e10de372SDave Chinner * simply discard and unlock it as there are no other users of the page 939e10de372SDave Chinner * or it's buffers right now. The caller will still need to trigger 940e10de372SDave Chinner * submission of outstanding ioends on the writepage context so they are 941e10de372SDave Chinner * treated correctly on error. 942bfce7d2eSDave Chinner */ 943e10de372SDave Chinner if (count) { 944e10de372SDave Chinner xfs_start_page_writeback(page, !error); 945e10de372SDave Chinner 946e10de372SDave Chinner /* 947e10de372SDave Chinner * Preserve the original error if there was one, otherwise catch 948e10de372SDave Chinner * submission errors here and propagate into subsequent ioend 949e10de372SDave Chinner * submissions. 950e10de372SDave Chinner */ 951e10de372SDave Chinner list_for_each_entry_safe(ioend, next, &submit_list, io_list) { 952e10de372SDave Chinner int error2; 953e10de372SDave Chinner 954e10de372SDave Chinner list_del_init(&ioend->io_list); 955e10de372SDave Chinner error2 = xfs_submit_ioend(wbc, ioend, error); 956e10de372SDave Chinner if (error2 && !error) 957e10de372SDave Chinner error = error2; 958e10de372SDave Chinner } 959e10de372SDave Chinner } else if (error) { 960bfce7d2eSDave Chinner xfs_aops_discard_page(page); 961bfce7d2eSDave Chinner ClearPageUptodate(page); 962bfce7d2eSDave Chinner unlock_page(page); 963e10de372SDave Chinner } else { 964e10de372SDave Chinner /* 965e10de372SDave Chinner * We can end up here with no error and nothing to write if we 966e10de372SDave Chinner * race with a partial page truncate on a sub-page block sized 967e10de372SDave Chinner * filesystem. In that case we need to mark the page clean. 968e10de372SDave Chinner */ 969e10de372SDave Chinner xfs_start_page_writeback(page, 1); 970e10de372SDave Chinner end_page_writeback(page); 971bfce7d2eSDave Chinner } 972e10de372SDave Chinner 973bfce7d2eSDave Chinner mapping_set_error(page->mapping, error); 974bfce7d2eSDave Chinner return error; 975bfce7d2eSDave Chinner } 976bfce7d2eSDave Chinner 977c59d87c4SChristoph Hellwig /* 978c59d87c4SChristoph Hellwig * Write out a dirty page. 979c59d87c4SChristoph Hellwig * 980c59d87c4SChristoph Hellwig * For delalloc space on the page we need to allocate space and flush it. 981c59d87c4SChristoph Hellwig * For unwritten space on the page we need to start the conversion to 982c59d87c4SChristoph Hellwig * regular allocated space. 983c59d87c4SChristoph Hellwig * For any other dirty buffer heads on the page we should flush them. 984c59d87c4SChristoph Hellwig */ 985c59d87c4SChristoph Hellwig STATIC int 986fbcc0256SDave Chinner xfs_do_writepage( 987c59d87c4SChristoph Hellwig struct page *page, 988fbcc0256SDave Chinner struct writeback_control *wbc, 989fbcc0256SDave Chinner void *data) 990c59d87c4SChristoph Hellwig { 991fbcc0256SDave Chinner struct xfs_writepage_ctx *wpc = data; 992c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 993c59d87c4SChristoph Hellwig loff_t offset; 994c59d87c4SChristoph Hellwig __uint64_t end_offset; 995ad68972aSDave Chinner pgoff_t end_index; 996c59d87c4SChristoph Hellwig 99734097dfeSLukas Czerner trace_xfs_writepage(inode, page, 0, 0); 998c59d87c4SChristoph Hellwig 999c59d87c4SChristoph Hellwig ASSERT(page_has_buffers(page)); 1000c59d87c4SChristoph Hellwig 1001c59d87c4SChristoph Hellwig /* 1002c59d87c4SChristoph Hellwig * Refuse to write the page out if we are called from reclaim context. 1003c59d87c4SChristoph Hellwig * 1004c59d87c4SChristoph Hellwig * This avoids stack overflows when called from deeply used stacks in 1005c59d87c4SChristoph Hellwig * random callers for direct reclaim or memcg reclaim. We explicitly 1006c59d87c4SChristoph Hellwig * allow reclaim from kswapd as the stack usage there is relatively low. 1007c59d87c4SChristoph Hellwig * 100894054fa3SMel Gorman * This should never happen except in the case of a VM regression so 100994054fa3SMel Gorman * warn about it. 1010c59d87c4SChristoph Hellwig */ 101194054fa3SMel Gorman if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == 101294054fa3SMel Gorman PF_MEMALLOC)) 1013c59d87c4SChristoph Hellwig goto redirty; 1014c59d87c4SChristoph Hellwig 1015c59d87c4SChristoph Hellwig /* 1016c59d87c4SChristoph Hellwig * Given that we do not allow direct reclaim to call us, we should 1017c59d87c4SChristoph Hellwig * never be called while in a filesystem transaction. 1018c59d87c4SChristoph Hellwig */ 1019448011e2SChristoph Hellwig if (WARN_ON_ONCE(current->flags & PF_FSTRANS)) 1020c59d87c4SChristoph Hellwig goto redirty; 1021c59d87c4SChristoph Hellwig 10228695d27eSJie Liu /* 1023ad68972aSDave Chinner * Is this page beyond the end of the file? 1024ad68972aSDave Chinner * 10258695d27eSJie Liu * The page index is less than the end_index, adjust the end_offset 10268695d27eSJie Liu * to the highest offset that this page should represent. 10278695d27eSJie Liu * ----------------------------------------------------- 10288695d27eSJie Liu * | file mapping | <EOF> | 10298695d27eSJie Liu * ----------------------------------------------------- 10308695d27eSJie Liu * | Page ... | Page N-2 | Page N-1 | Page N | | 10318695d27eSJie Liu * ^--------------------------------^----------|-------- 10328695d27eSJie Liu * | desired writeback range | see else | 10338695d27eSJie Liu * ---------------------------------^------------------| 10348695d27eSJie Liu */ 1035ad68972aSDave Chinner offset = i_size_read(inode); 103609cbfeafSKirill A. Shutemov end_index = offset >> PAGE_SHIFT; 10378695d27eSJie Liu if (page->index < end_index) 103809cbfeafSKirill A. Shutemov end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT; 10398695d27eSJie Liu else { 10408695d27eSJie Liu /* 10418695d27eSJie Liu * Check whether the page to write out is beyond or straddles 10428695d27eSJie Liu * i_size or not. 10438695d27eSJie Liu * ------------------------------------------------------- 10448695d27eSJie Liu * | file mapping | <EOF> | 10458695d27eSJie Liu * ------------------------------------------------------- 10468695d27eSJie Liu * | Page ... | Page N-2 | Page N-1 | Page N | Beyond | 10478695d27eSJie Liu * ^--------------------------------^-----------|--------- 10488695d27eSJie Liu * | | Straddles | 10498695d27eSJie Liu * ---------------------------------^-----------|--------| 10508695d27eSJie Liu */ 105109cbfeafSKirill A. Shutemov unsigned offset_into_page = offset & (PAGE_SIZE - 1); 10526b7a03f0SChristoph Hellwig 10536b7a03f0SChristoph Hellwig /* 1054ff9a28f6SJan Kara * Skip the page if it is fully outside i_size, e.g. due to a 1055ff9a28f6SJan Kara * truncate operation that is in progress. We must redirty the 1056ff9a28f6SJan Kara * page so that reclaim stops reclaiming it. Otherwise 1057ff9a28f6SJan Kara * xfs_vm_releasepage() is called on it and gets confused. 10588695d27eSJie Liu * 10598695d27eSJie Liu * Note that the end_index is unsigned long, it would overflow 10608695d27eSJie Liu * if the given offset is greater than 16TB on 32-bit system 10618695d27eSJie Liu * and if we do check the page is fully outside i_size or not 10628695d27eSJie Liu * via "if (page->index >= end_index + 1)" as "end_index + 1" 10638695d27eSJie Liu * will be evaluated to 0. Hence this page will be redirtied 10648695d27eSJie Liu * and be written out repeatedly which would result in an 10658695d27eSJie Liu * infinite loop, the user program that perform this operation 10668695d27eSJie Liu * will hang. Instead, we can verify this situation by checking 10678695d27eSJie Liu * if the page to write is totally beyond the i_size or if it's 10688695d27eSJie Liu * offset is just equal to the EOF. 10696b7a03f0SChristoph Hellwig */ 10708695d27eSJie Liu if (page->index > end_index || 10718695d27eSJie Liu (page->index == end_index && offset_into_page == 0)) 1072ff9a28f6SJan Kara goto redirty; 10736b7a03f0SChristoph Hellwig 10746b7a03f0SChristoph Hellwig /* 10756b7a03f0SChristoph Hellwig * The page straddles i_size. It must be zeroed out on each 10766b7a03f0SChristoph Hellwig * and every writepage invocation because it may be mmapped. 10776b7a03f0SChristoph Hellwig * "A file is mapped in multiples of the page size. For a file 10786b7a03f0SChristoph Hellwig * that is not a multiple of the page size, the remaining 10796b7a03f0SChristoph Hellwig * memory is zeroed when mapped, and writes to that region are 10806b7a03f0SChristoph Hellwig * not written out to the file." 10816b7a03f0SChristoph Hellwig */ 108209cbfeafSKirill A. Shutemov zero_user_segment(page, offset_into_page, PAGE_SIZE); 10838695d27eSJie Liu 10848695d27eSJie Liu /* Adjust the end_offset to the end of file */ 10858695d27eSJie Liu end_offset = offset; 1086c59d87c4SChristoph Hellwig } 1087c59d87c4SChristoph Hellwig 1088e10de372SDave Chinner return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset); 1089c59d87c4SChristoph Hellwig 1090c59d87c4SChristoph Hellwig redirty: 1091c59d87c4SChristoph Hellwig redirty_page_for_writepage(wbc, page); 1092c59d87c4SChristoph Hellwig unlock_page(page); 1093c59d87c4SChristoph Hellwig return 0; 1094c59d87c4SChristoph Hellwig } 1095c59d87c4SChristoph Hellwig 1096c59d87c4SChristoph Hellwig STATIC int 1097fbcc0256SDave Chinner xfs_vm_writepage( 1098fbcc0256SDave Chinner struct page *page, 1099fbcc0256SDave Chinner struct writeback_control *wbc) 1100fbcc0256SDave Chinner { 1101fbcc0256SDave Chinner struct xfs_writepage_ctx wpc = { 1102fbcc0256SDave Chinner .io_type = XFS_IO_INVALID, 1103fbcc0256SDave Chinner }; 1104fbcc0256SDave Chinner int ret; 1105fbcc0256SDave Chinner 1106fbcc0256SDave Chinner ret = xfs_do_writepage(page, wbc, &wpc); 1107e10de372SDave Chinner if (wpc.ioend) 1108e10de372SDave Chinner ret = xfs_submit_ioend(wbc, wpc.ioend, ret); 1109e10de372SDave Chinner return ret; 1110fbcc0256SDave Chinner } 1111fbcc0256SDave Chinner 1112fbcc0256SDave Chinner STATIC int 1113c59d87c4SChristoph Hellwig xfs_vm_writepages( 1114c59d87c4SChristoph Hellwig struct address_space *mapping, 1115c59d87c4SChristoph Hellwig struct writeback_control *wbc) 1116c59d87c4SChristoph Hellwig { 1117fbcc0256SDave Chinner struct xfs_writepage_ctx wpc = { 1118fbcc0256SDave Chinner .io_type = XFS_IO_INVALID, 1119fbcc0256SDave Chinner }; 1120fbcc0256SDave Chinner int ret; 1121fbcc0256SDave Chinner 1122c59d87c4SChristoph Hellwig xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 11237f6d5b52SRoss Zwisler if (dax_mapping(mapping)) 11247f6d5b52SRoss Zwisler return dax_writeback_mapping_range(mapping, 11257f6d5b52SRoss Zwisler xfs_find_bdev_for_inode(mapping->host), wbc); 11267f6d5b52SRoss Zwisler 1127fbcc0256SDave Chinner ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc); 1128e10de372SDave Chinner if (wpc.ioend) 1129e10de372SDave Chinner ret = xfs_submit_ioend(wbc, wpc.ioend, ret); 1130e10de372SDave Chinner return ret; 1131c59d87c4SChristoph Hellwig } 1132c59d87c4SChristoph Hellwig 1133c59d87c4SChristoph Hellwig /* 1134c59d87c4SChristoph Hellwig * Called to move a page into cleanable state - and from there 1135c59d87c4SChristoph Hellwig * to be released. The page should already be clean. We always 1136c59d87c4SChristoph Hellwig * have buffer heads in this call. 1137c59d87c4SChristoph Hellwig * 1138c59d87c4SChristoph Hellwig * Returns 1 if the page is ok to release, 0 otherwise. 1139c59d87c4SChristoph Hellwig */ 1140c59d87c4SChristoph Hellwig STATIC int 1141c59d87c4SChristoph Hellwig xfs_vm_releasepage( 1142c59d87c4SChristoph Hellwig struct page *page, 1143c59d87c4SChristoph Hellwig gfp_t gfp_mask) 1144c59d87c4SChristoph Hellwig { 1145c59d87c4SChristoph Hellwig int delalloc, unwritten; 1146c59d87c4SChristoph Hellwig 114734097dfeSLukas Czerner trace_xfs_releasepage(page->mapping->host, page, 0, 0); 1148c59d87c4SChristoph Hellwig 114999579cceSBrian Foster /* 115099579cceSBrian Foster * mm accommodates an old ext3 case where clean pages might not have had 115199579cceSBrian Foster * the dirty bit cleared. Thus, it can send actual dirty pages to 115299579cceSBrian Foster * ->releasepage() via shrink_active_list(). Conversely, 115399579cceSBrian Foster * block_invalidatepage() can send pages that are still marked dirty 115499579cceSBrian Foster * but otherwise have invalidated buffers. 115599579cceSBrian Foster * 11560a417b8dSJan Kara * We want to release the latter to avoid unnecessary buildup of the 11570a417b8dSJan Kara * LRU, skip the former and warn if we've left any lingering 11580a417b8dSJan Kara * delalloc/unwritten buffers on clean pages. Skip pages with delalloc 11590a417b8dSJan Kara * or unwritten buffers and warn if the page is not dirty. Otherwise 11600a417b8dSJan Kara * try to release the buffers. 116199579cceSBrian Foster */ 1162c59d87c4SChristoph Hellwig xfs_count_page_state(page, &delalloc, &unwritten); 1163c59d87c4SChristoph Hellwig 11640a417b8dSJan Kara if (delalloc) { 11650a417b8dSJan Kara WARN_ON_ONCE(!PageDirty(page)); 1166c59d87c4SChristoph Hellwig return 0; 11670a417b8dSJan Kara } 11680a417b8dSJan Kara if (unwritten) { 11690a417b8dSJan Kara WARN_ON_ONCE(!PageDirty(page)); 1170c59d87c4SChristoph Hellwig return 0; 11710a417b8dSJan Kara } 1172c59d87c4SChristoph Hellwig 1173c59d87c4SChristoph Hellwig return try_to_free_buffers(page); 1174c59d87c4SChristoph Hellwig } 1175c59d87c4SChristoph Hellwig 1176a719370bSDave Chinner /* 11771fdca9c2SDave Chinner * If this is O_DIRECT or the mpage code calling tell them how large the mapping 11781fdca9c2SDave Chinner * is, so that we can avoid repeated get_blocks calls. 11791fdca9c2SDave Chinner * 11801fdca9c2SDave Chinner * If the mapping spans EOF, then we have to break the mapping up as the mapping 11811fdca9c2SDave Chinner * for blocks beyond EOF must be marked new so that sub block regions can be 11821fdca9c2SDave Chinner * correctly zeroed. We can't do this for mappings within EOF unless the mapping 11831fdca9c2SDave Chinner * was just allocated or is unwritten, otherwise the callers would overwrite 11841fdca9c2SDave Chinner * existing data with zeros. Hence we have to split the mapping into a range up 11851fdca9c2SDave Chinner * to and including EOF, and a second mapping for beyond EOF. 11861fdca9c2SDave Chinner */ 11871fdca9c2SDave Chinner static void 11881fdca9c2SDave Chinner xfs_map_trim_size( 11891fdca9c2SDave Chinner struct inode *inode, 11901fdca9c2SDave Chinner sector_t iblock, 11911fdca9c2SDave Chinner struct buffer_head *bh_result, 11921fdca9c2SDave Chinner struct xfs_bmbt_irec *imap, 11931fdca9c2SDave Chinner xfs_off_t offset, 11941fdca9c2SDave Chinner ssize_t size) 11951fdca9c2SDave Chinner { 11961fdca9c2SDave Chinner xfs_off_t mapping_size; 11971fdca9c2SDave Chinner 11981fdca9c2SDave Chinner mapping_size = imap->br_startoff + imap->br_blockcount - iblock; 11991fdca9c2SDave Chinner mapping_size <<= inode->i_blkbits; 12001fdca9c2SDave Chinner 12011fdca9c2SDave Chinner ASSERT(mapping_size > 0); 12021fdca9c2SDave Chinner if (mapping_size > size) 12031fdca9c2SDave Chinner mapping_size = size; 12041fdca9c2SDave Chinner if (offset < i_size_read(inode) && 12051fdca9c2SDave Chinner offset + mapping_size >= i_size_read(inode)) { 12061fdca9c2SDave Chinner /* limit mapping to block that spans EOF */ 12071fdca9c2SDave Chinner mapping_size = roundup_64(i_size_read(inode) - offset, 120893407472SFabian Frederick i_blocksize(inode)); 12091fdca9c2SDave Chinner } 12101fdca9c2SDave Chinner if (mapping_size > LONG_MAX) 12111fdca9c2SDave Chinner mapping_size = LONG_MAX; 12121fdca9c2SDave Chinner 12131fdca9c2SDave Chinner bh_result->b_size = mapping_size; 12141fdca9c2SDave Chinner } 12151fdca9c2SDave Chinner 12160613f16cSDarrick J. Wong static int 1217acdda3aaSChristoph Hellwig xfs_get_blocks( 1218c59d87c4SChristoph Hellwig struct inode *inode, 1219c59d87c4SChristoph Hellwig sector_t iblock, 1220c59d87c4SChristoph Hellwig struct buffer_head *bh_result, 1221acdda3aaSChristoph Hellwig int create) 1222c59d87c4SChristoph Hellwig { 1223c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1224c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1225c59d87c4SChristoph Hellwig xfs_fileoff_t offset_fsb, end_fsb; 1226c59d87c4SChristoph Hellwig int error = 0; 1227c59d87c4SChristoph Hellwig int lockmode = 0; 1228c59d87c4SChristoph Hellwig struct xfs_bmbt_irec imap; 1229c59d87c4SChristoph Hellwig int nimaps = 1; 1230c59d87c4SChristoph Hellwig xfs_off_t offset; 1231c59d87c4SChristoph Hellwig ssize_t size; 1232c59d87c4SChristoph Hellwig 1233acdda3aaSChristoph Hellwig BUG_ON(create); 12346e8a27a8SChristoph Hellwig 1235c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 1236b474c7aeSEric Sandeen return -EIO; 1237c59d87c4SChristoph Hellwig 1238c59d87c4SChristoph Hellwig offset = (xfs_off_t)iblock << inode->i_blkbits; 123993407472SFabian Frederick ASSERT(bh_result->b_size >= i_blocksize(inode)); 1240c59d87c4SChristoph Hellwig size = bh_result->b_size; 1241c59d87c4SChristoph Hellwig 1242acdda3aaSChristoph Hellwig if (offset >= i_size_read(inode)) 1243c59d87c4SChristoph Hellwig return 0; 1244c59d87c4SChristoph Hellwig 1245507630b2SDave Chinner /* 1246507630b2SDave Chinner * Direct I/O is usually done on preallocated files, so try getting 12476e8a27a8SChristoph Hellwig * a block mapping without an exclusive lock first. 1248507630b2SDave Chinner */ 1249309ecac8SChristoph Hellwig lockmode = xfs_ilock_data_map_shared(ip); 1250c59d87c4SChristoph Hellwig 1251d2c28191SDave Chinner ASSERT(offset <= mp->m_super->s_maxbytes); 1252d2c28191SDave Chinner if (offset + size > mp->m_super->s_maxbytes) 1253d2c28191SDave Chinner size = mp->m_super->s_maxbytes - offset; 1254c59d87c4SChristoph Hellwig end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); 1255c59d87c4SChristoph Hellwig offset_fsb = XFS_B_TO_FSBT(mp, offset); 1256c59d87c4SChristoph Hellwig 12575c8ed202SDave Chinner error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 12585c8ed202SDave Chinner &imap, &nimaps, XFS_BMAPI_ENTIRE); 1259c59d87c4SChristoph Hellwig if (error) 1260c59d87c4SChristoph Hellwig goto out_unlock; 1261c59d87c4SChristoph Hellwig 1262acdda3aaSChristoph Hellwig if (nimaps) { 1263d5cc2e3fSDave Chinner trace_xfs_get_blocks_found(ip, offset, size, 1264d5cc2e3fSDave Chinner ISUNWRITTEN(&imap) ? XFS_IO_UNWRITTEN 1265d5cc2e3fSDave Chinner : XFS_IO_OVERWRITE, &imap); 1266507630b2SDave Chinner xfs_iunlock(ip, lockmode); 1267c59d87c4SChristoph Hellwig } else { 1268c59d87c4SChristoph Hellwig trace_xfs_get_blocks_notfound(ip, offset, size); 1269c59d87c4SChristoph Hellwig goto out_unlock; 1270c59d87c4SChristoph Hellwig } 1271c59d87c4SChristoph Hellwig 12721fdca9c2SDave Chinner /* trim mapping down to size requested */ 12736e8a27a8SChristoph Hellwig xfs_map_trim_size(inode, iblock, bh_result, &imap, offset, size); 12741fdca9c2SDave Chinner 1275c59d87c4SChristoph Hellwig /* 1276a719370bSDave Chinner * For unwritten extents do not report a disk address in the buffered 1277a719370bSDave Chinner * read case (treat as if we're reading into a hole). 1278c59d87c4SChristoph Hellwig */ 1279a719370bSDave Chinner if (imap.br_startblock != HOLESTARTBLOCK && 1280a719370bSDave Chinner imap.br_startblock != DELAYSTARTBLOCK && 1281acdda3aaSChristoph Hellwig !ISUNWRITTEN(&imap)) 1282c59d87c4SChristoph Hellwig xfs_map_buffer(inode, bh_result, &imap, offset); 1283c59d87c4SChristoph Hellwig 1284c59d87c4SChristoph Hellwig /* 1285c59d87c4SChristoph Hellwig * If this is a realtime file, data may be on a different device. 1286c59d87c4SChristoph Hellwig * to that pointed to from the buffer_head b_bdev currently. 1287c59d87c4SChristoph Hellwig */ 1288c59d87c4SChristoph Hellwig bh_result->b_bdev = xfs_find_bdev_for_inode(inode); 1289c59d87c4SChristoph Hellwig return 0; 1290c59d87c4SChristoph Hellwig 1291c59d87c4SChristoph Hellwig out_unlock: 1292c59d87c4SChristoph Hellwig xfs_iunlock(ip, lockmode); 12932451337dSDave Chinner return error; 1294c59d87c4SChristoph Hellwig } 1295c59d87c4SChristoph Hellwig 1296c59d87c4SChristoph Hellwig STATIC ssize_t 1297c59d87c4SChristoph Hellwig xfs_vm_direct_IO( 1298c59d87c4SChristoph Hellwig struct kiocb *iocb, 1299c8b8e32dSChristoph Hellwig struct iov_iter *iter) 1300c59d87c4SChristoph Hellwig { 1301c59d87c4SChristoph Hellwig /* 1302fa8d972dSChristoph Hellwig * We just need the method present so that open/fcntl allow direct I/O. 1303c59d87c4SChristoph Hellwig */ 1304fa8d972dSChristoph Hellwig return -EINVAL; 1305c59d87c4SChristoph Hellwig } 1306c59d87c4SChristoph Hellwig 1307c59d87c4SChristoph Hellwig STATIC sector_t 1308c59d87c4SChristoph Hellwig xfs_vm_bmap( 1309c59d87c4SChristoph Hellwig struct address_space *mapping, 1310c59d87c4SChristoph Hellwig sector_t block) 1311c59d87c4SChristoph Hellwig { 1312c59d87c4SChristoph Hellwig struct inode *inode = (struct inode *)mapping->host; 1313c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1314c59d87c4SChristoph Hellwig 1315c59d87c4SChristoph Hellwig trace_xfs_vm_bmap(XFS_I(inode)); 1316db1327b1SDarrick J. Wong 1317db1327b1SDarrick J. Wong /* 1318db1327b1SDarrick J. Wong * The swap code (ab-)uses ->bmap to get a block mapping and then 1319db1327b1SDarrick J. Wong * bypasseѕ the file system for actual I/O. We really can't allow 1320db1327b1SDarrick J. Wong * that on reflinks inodes, so we have to skip out here. And yes, 1321db1327b1SDarrick J. Wong * 0 is the magic code for a bmap error.. 1322db1327b1SDarrick J. Wong */ 132365523218SChristoph Hellwig if (xfs_is_reflink_inode(ip)) 1324db1327b1SDarrick J. Wong return 0; 132565523218SChristoph Hellwig 13264bc1ea6bSDave Chinner filemap_write_and_wait(mapping); 1327c59d87c4SChristoph Hellwig return generic_block_bmap(mapping, block, xfs_get_blocks); 1328c59d87c4SChristoph Hellwig } 1329c59d87c4SChristoph Hellwig 1330c59d87c4SChristoph Hellwig STATIC int 1331c59d87c4SChristoph Hellwig xfs_vm_readpage( 1332c59d87c4SChristoph Hellwig struct file *unused, 1333c59d87c4SChristoph Hellwig struct page *page) 1334c59d87c4SChristoph Hellwig { 1335121e213eSDave Chinner trace_xfs_vm_readpage(page->mapping->host, 1); 1336c59d87c4SChristoph Hellwig return mpage_readpage(page, xfs_get_blocks); 1337c59d87c4SChristoph Hellwig } 1338c59d87c4SChristoph Hellwig 1339c59d87c4SChristoph Hellwig STATIC int 1340c59d87c4SChristoph Hellwig xfs_vm_readpages( 1341c59d87c4SChristoph Hellwig struct file *unused, 1342c59d87c4SChristoph Hellwig struct address_space *mapping, 1343c59d87c4SChristoph Hellwig struct list_head *pages, 1344c59d87c4SChristoph Hellwig unsigned nr_pages) 1345c59d87c4SChristoph Hellwig { 1346121e213eSDave Chinner trace_xfs_vm_readpages(mapping->host, nr_pages); 1347c59d87c4SChristoph Hellwig return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1348c59d87c4SChristoph Hellwig } 1349c59d87c4SChristoph Hellwig 135022e757a4SDave Chinner /* 135122e757a4SDave Chinner * This is basically a copy of __set_page_dirty_buffers() with one 135222e757a4SDave Chinner * small tweak: buffers beyond EOF do not get marked dirty. If we mark them 135322e757a4SDave Chinner * dirty, we'll never be able to clean them because we don't write buffers 135422e757a4SDave Chinner * beyond EOF, and that means we can't invalidate pages that span EOF 135522e757a4SDave Chinner * that have been marked dirty. Further, the dirty state can leak into 135622e757a4SDave Chinner * the file interior if the file is extended, resulting in all sorts of 135722e757a4SDave Chinner * bad things happening as the state does not match the underlying data. 135822e757a4SDave Chinner * 135922e757a4SDave Chinner * XXX: this really indicates that bufferheads in XFS need to die. Warts like 136022e757a4SDave Chinner * this only exist because of bufferheads and how the generic code manages them. 136122e757a4SDave Chinner */ 136222e757a4SDave Chinner STATIC int 136322e757a4SDave Chinner xfs_vm_set_page_dirty( 136422e757a4SDave Chinner struct page *page) 136522e757a4SDave Chinner { 136622e757a4SDave Chinner struct address_space *mapping = page->mapping; 136722e757a4SDave Chinner struct inode *inode = mapping->host; 136822e757a4SDave Chinner loff_t end_offset; 136922e757a4SDave Chinner loff_t offset; 137022e757a4SDave Chinner int newly_dirty; 137122e757a4SDave Chinner 137222e757a4SDave Chinner if (unlikely(!mapping)) 137322e757a4SDave Chinner return !TestSetPageDirty(page); 137422e757a4SDave Chinner 137522e757a4SDave Chinner end_offset = i_size_read(inode); 137622e757a4SDave Chinner offset = page_offset(page); 137722e757a4SDave Chinner 137822e757a4SDave Chinner spin_lock(&mapping->private_lock); 137922e757a4SDave Chinner if (page_has_buffers(page)) { 138022e757a4SDave Chinner struct buffer_head *head = page_buffers(page); 138122e757a4SDave Chinner struct buffer_head *bh = head; 138222e757a4SDave Chinner 138322e757a4SDave Chinner do { 138422e757a4SDave Chinner if (offset < end_offset) 138522e757a4SDave Chinner set_buffer_dirty(bh); 138622e757a4SDave Chinner bh = bh->b_this_page; 138793407472SFabian Frederick offset += i_blocksize(inode); 138822e757a4SDave Chinner } while (bh != head); 138922e757a4SDave Chinner } 1390c4843a75SGreg Thelen /* 139181f8c3a4SJohannes Weiner * Lock out page->mem_cgroup migration to keep PageDirty 139281f8c3a4SJohannes Weiner * synchronized with per-memcg dirty page counters. 1393c4843a75SGreg Thelen */ 139462cccb8cSJohannes Weiner lock_page_memcg(page); 139522e757a4SDave Chinner newly_dirty = !TestSetPageDirty(page); 139622e757a4SDave Chinner spin_unlock(&mapping->private_lock); 139722e757a4SDave Chinner 139822e757a4SDave Chinner if (newly_dirty) { 139922e757a4SDave Chinner /* sigh - __set_page_dirty() is static, so copy it here, too */ 140022e757a4SDave Chinner unsigned long flags; 140122e757a4SDave Chinner 140222e757a4SDave Chinner spin_lock_irqsave(&mapping->tree_lock, flags); 140322e757a4SDave Chinner if (page->mapping) { /* Race with truncate? */ 140422e757a4SDave Chinner WARN_ON_ONCE(!PageUptodate(page)); 140562cccb8cSJohannes Weiner account_page_dirtied(page, mapping); 140622e757a4SDave Chinner radix_tree_tag_set(&mapping->page_tree, 140722e757a4SDave Chinner page_index(page), PAGECACHE_TAG_DIRTY); 140822e757a4SDave Chinner } 140922e757a4SDave Chinner spin_unlock_irqrestore(&mapping->tree_lock, flags); 141022e757a4SDave Chinner } 141162cccb8cSJohannes Weiner unlock_page_memcg(page); 1412c4843a75SGreg Thelen if (newly_dirty) 1413c4843a75SGreg Thelen __mark_inode_dirty(mapping->host, I_DIRTY_PAGES); 141422e757a4SDave Chinner return newly_dirty; 141522e757a4SDave Chinner } 141622e757a4SDave Chinner 1417c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = { 1418c59d87c4SChristoph Hellwig .readpage = xfs_vm_readpage, 1419c59d87c4SChristoph Hellwig .readpages = xfs_vm_readpages, 1420c59d87c4SChristoph Hellwig .writepage = xfs_vm_writepage, 1421c59d87c4SChristoph Hellwig .writepages = xfs_vm_writepages, 142222e757a4SDave Chinner .set_page_dirty = xfs_vm_set_page_dirty, 1423c59d87c4SChristoph Hellwig .releasepage = xfs_vm_releasepage, 1424c59d87c4SChristoph Hellwig .invalidatepage = xfs_vm_invalidatepage, 1425c59d87c4SChristoph Hellwig .bmap = xfs_vm_bmap, 1426c59d87c4SChristoph Hellwig .direct_IO = xfs_vm_direct_IO, 1427c59d87c4SChristoph Hellwig .migratepage = buffer_migrate_page, 1428c59d87c4SChristoph Hellwig .is_partially_uptodate = block_is_partially_uptodate, 1429c59d87c4SChristoph Hellwig .error_remove_page = generic_error_remove_page, 1430c59d87c4SChristoph Hellwig }; 1431