10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 2c59d87c4SChristoph Hellwig /* 3c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc. 498c1a7c0SChristoph Hellwig * Copyright (c) 2016-2018 Christoph Hellwig. 5c59d87c4SChristoph Hellwig * All Rights Reserved. 6c59d87c4SChristoph Hellwig */ 7c59d87c4SChristoph Hellwig #include "xfs.h" 870a9883cSDave Chinner #include "xfs_shared.h" 9239880efSDave Chinner #include "xfs_format.h" 10239880efSDave Chinner #include "xfs_log_format.h" 11239880efSDave Chinner #include "xfs_trans_resv.h" 12c59d87c4SChristoph Hellwig #include "xfs_mount.h" 13c59d87c4SChristoph Hellwig #include "xfs_inode.h" 14239880efSDave Chinner #include "xfs_trans.h" 15c59d87c4SChristoph Hellwig #include "xfs_iomap.h" 16c59d87c4SChristoph Hellwig #include "xfs_trace.h" 17c59d87c4SChristoph Hellwig #include "xfs_bmap.h" 1868988114SDave Chinner #include "xfs_bmap_util.h" 19ef473667SDarrick J. Wong #include "xfs_reflink.h" 20c59d87c4SChristoph Hellwig 21fbcc0256SDave Chinner /* 22fbcc0256SDave Chinner * structure owned by writepages passed to individual writepage calls 23fbcc0256SDave Chinner */ 24fbcc0256SDave Chinner struct xfs_writepage_ctx { 254e087a3bSChristoph Hellwig struct iomap iomap; 26be225fecSChristoph Hellwig int fork; 27d9252d52SBrian Foster unsigned int data_seq; 28e666aa37SChristoph Hellwig unsigned int cow_seq; 29fbcc0256SDave Chinner struct xfs_ioend *ioend; 30fbcc0256SDave Chinner }; 31fbcc0256SDave Chinner 3220a90f58SRoss Zwisler struct block_device * 33c59d87c4SChristoph Hellwig xfs_find_bdev_for_inode( 34c59d87c4SChristoph Hellwig struct inode *inode) 35c59d87c4SChristoph Hellwig { 36c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 37c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 38c59d87c4SChristoph Hellwig 39c59d87c4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip)) 40c59d87c4SChristoph Hellwig return mp->m_rtdev_targp->bt_bdev; 41c59d87c4SChristoph Hellwig else 42c59d87c4SChristoph Hellwig return mp->m_ddev_targp->bt_bdev; 43c59d87c4SChristoph Hellwig } 44c59d87c4SChristoph Hellwig 45486aff5eSDan Williams struct dax_device * 46486aff5eSDan Williams xfs_find_daxdev_for_inode( 47486aff5eSDan Williams struct inode *inode) 48486aff5eSDan Williams { 49486aff5eSDan Williams struct xfs_inode *ip = XFS_I(inode); 50486aff5eSDan Williams struct xfs_mount *mp = ip->i_mount; 51486aff5eSDan Williams 52486aff5eSDan Williams if (XFS_IS_REALTIME_INODE(ip)) 53486aff5eSDan Williams return mp->m_rtdev_targp->bt_daxdev; 54486aff5eSDan Williams else 55486aff5eSDan Williams return mp->m_ddev_targp->bt_daxdev; 56486aff5eSDan Williams } 57486aff5eSDan Williams 58ac8ee546SChristoph Hellwig static void 59ac8ee546SChristoph Hellwig xfs_finish_page_writeback( 60ac8ee546SChristoph Hellwig struct inode *inode, 61ac8ee546SChristoph Hellwig struct bio_vec *bvec, 62ac8ee546SChristoph Hellwig int error) 63ac8ee546SChristoph Hellwig { 6482cb1417SChristoph Hellwig struct iomap_page *iop = to_iomap_page(bvec->bv_page); 6582cb1417SChristoph Hellwig 66ac8ee546SChristoph Hellwig if (error) { 67ac8ee546SChristoph Hellwig SetPageError(bvec->bv_page); 68ac8ee546SChristoph Hellwig mapping_set_error(inode->i_mapping, -EIO); 69ac8ee546SChristoph Hellwig } 70ac8ee546SChristoph Hellwig 7182cb1417SChristoph Hellwig ASSERT(iop || i_blocksize(inode) == PAGE_SIZE); 7282cb1417SChristoph Hellwig ASSERT(!iop || atomic_read(&iop->write_count) > 0); 7337992c18SDave Chinner 7482cb1417SChristoph Hellwig if (!iop || atomic_dec_and_test(&iop->write_count)) 758353a814SChristoph Hellwig end_page_writeback(bvec->bv_page); 7637992c18SDave Chinner } 7737992c18SDave Chinner 7837992c18SDave Chinner /* 7937992c18SDave Chinner * We're now finished for good with this ioend structure. Update the page 8037992c18SDave Chinner * state, release holds on bios, and finally free up memory. Do not use the 8137992c18SDave Chinner * ioend after this. 82c59d87c4SChristoph Hellwig */ 83c59d87c4SChristoph Hellwig STATIC void 84c59d87c4SChristoph Hellwig xfs_destroy_ioend( 850e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 860e51a8e1SChristoph Hellwig int error) 87c59d87c4SChristoph Hellwig { 8837992c18SDave Chinner struct inode *inode = ioend->io_inode; 898353a814SChristoph Hellwig struct bio *bio = &ioend->io_inline_bio; 908353a814SChristoph Hellwig struct bio *last = ioend->io_bio, *next; 918353a814SChristoph Hellwig u64 start = bio->bi_iter.bi_sector; 928353a814SChristoph Hellwig bool quiet = bio_flagged(bio, BIO_QUIET); 93c59d87c4SChristoph Hellwig 940e51a8e1SChristoph Hellwig for (bio = &ioend->io_inline_bio; bio; bio = next) { 9537992c18SDave Chinner struct bio_vec *bvec; 966dc4f100SMing Lei struct bvec_iter_all iter_all; 9737992c18SDave Chinner 980e51a8e1SChristoph Hellwig /* 990e51a8e1SChristoph Hellwig * For the last bio, bi_private points to the ioend, so we 1000e51a8e1SChristoph Hellwig * need to explicitly end the iteration here. 1010e51a8e1SChristoph Hellwig */ 1020e51a8e1SChristoph Hellwig if (bio == last) 1030e51a8e1SChristoph Hellwig next = NULL; 1040e51a8e1SChristoph Hellwig else 10537992c18SDave Chinner next = bio->bi_private; 10637992c18SDave Chinner 10737992c18SDave Chinner /* walk each page on bio, ending page IO on them */ 1082b070cfeSChristoph Hellwig bio_for_each_segment_all(bvec, bio, iter_all) 10937992c18SDave Chinner xfs_finish_page_writeback(inode, bvec, error); 11037992c18SDave Chinner bio_put(bio); 111c59d87c4SChristoph Hellwig } 1128353a814SChristoph Hellwig 1138353a814SChristoph Hellwig if (unlikely(error && !quiet)) { 1148353a814SChristoph Hellwig xfs_err_ratelimited(XFS_I(inode)->i_mount, 1158353a814SChristoph Hellwig "writeback error on sector %llu", start); 1168353a814SChristoph Hellwig } 117c59d87c4SChristoph Hellwig } 118c59d87c4SChristoph Hellwig 119*433dad94SChristoph Hellwig static void 120*433dad94SChristoph Hellwig xfs_destroy_ioends( 121*433dad94SChristoph Hellwig struct xfs_ioend *ioend, 122*433dad94SChristoph Hellwig int error) 123*433dad94SChristoph Hellwig { 124*433dad94SChristoph Hellwig struct list_head tmp; 125*433dad94SChristoph Hellwig 126*433dad94SChristoph Hellwig list_replace_init(&ioend->io_list, &tmp); 127*433dad94SChristoph Hellwig xfs_destroy_ioend(ioend, error); 128*433dad94SChristoph Hellwig while ((ioend = list_first_entry_or_null(&tmp, struct xfs_ioend, 129*433dad94SChristoph Hellwig io_list))) { 130*433dad94SChristoph Hellwig list_del_init(&ioend->io_list); 131*433dad94SChristoph Hellwig xfs_destroy_ioend(ioend, error); 132*433dad94SChristoph Hellwig } 133*433dad94SChristoph Hellwig } 134*433dad94SChristoph Hellwig 135c59d87c4SChristoph Hellwig /* 136fc0063c4SChristoph Hellwig * Fast and loose check if this write could update the on-disk inode size. 137fc0063c4SChristoph Hellwig */ 138fc0063c4SChristoph Hellwig static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) 139fc0063c4SChristoph Hellwig { 140fc0063c4SChristoph Hellwig return ioend->io_offset + ioend->io_size > 141fc0063c4SChristoph Hellwig XFS_I(ioend->io_inode)->i_d.di_size; 142fc0063c4SChristoph Hellwig } 143fc0063c4SChristoph Hellwig 144281627dfSChristoph Hellwig STATIC int 145281627dfSChristoph Hellwig xfs_setfilesize_trans_alloc( 146281627dfSChristoph Hellwig struct xfs_ioend *ioend) 147281627dfSChristoph Hellwig { 148281627dfSChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 149281627dfSChristoph Hellwig struct xfs_trans *tp; 150281627dfSChristoph Hellwig int error; 151281627dfSChristoph Hellwig 15273d30d48SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 153253f4911SChristoph Hellwig if (error) 154281627dfSChristoph Hellwig return error; 155281627dfSChristoph Hellwig 156281627dfSChristoph Hellwig ioend->io_append_trans = tp; 157281627dfSChristoph Hellwig 158281627dfSChristoph Hellwig /* 159437a255aSDave Chinner * We may pass freeze protection with a transaction. So tell lockdep 160d9457dc0SJan Kara * we released it. 161d9457dc0SJan Kara */ 162bee9182dSOleg Nesterov __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); 163d9457dc0SJan Kara /* 164281627dfSChristoph Hellwig * We hand off the transaction to the completion thread now, so 165281627dfSChristoph Hellwig * clear the flag here. 166281627dfSChristoph Hellwig */ 1679070733bSMichal Hocko current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 168281627dfSChristoph Hellwig return 0; 169281627dfSChristoph Hellwig } 170281627dfSChristoph Hellwig 171fc0063c4SChristoph Hellwig /* 1722813d682SChristoph Hellwig * Update on-disk file size now that data has been written to disk. 173c59d87c4SChristoph Hellwig */ 174281627dfSChristoph Hellwig STATIC int 175e372843aSChristoph Hellwig __xfs_setfilesize( 1762ba66237SChristoph Hellwig struct xfs_inode *ip, 1772ba66237SChristoph Hellwig struct xfs_trans *tp, 1782ba66237SChristoph Hellwig xfs_off_t offset, 1792ba66237SChristoph Hellwig size_t size) 180c59d87c4SChristoph Hellwig { 181c59d87c4SChristoph Hellwig xfs_fsize_t isize; 182c59d87c4SChristoph Hellwig 183aa6bf01dSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 1842ba66237SChristoph Hellwig isize = xfs_new_eof(ip, offset + size); 185281627dfSChristoph Hellwig if (!isize) { 186281627dfSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 1874906e215SChristoph Hellwig xfs_trans_cancel(tp); 188281627dfSChristoph Hellwig return 0; 189c59d87c4SChristoph Hellwig } 190c59d87c4SChristoph Hellwig 1912ba66237SChristoph Hellwig trace_xfs_setfilesize(ip, offset, size); 192281627dfSChristoph Hellwig 193281627dfSChristoph Hellwig ip->i_d.di_size = isize; 194281627dfSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 195281627dfSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 196281627dfSChristoph Hellwig 19770393313SChristoph Hellwig return xfs_trans_commit(tp); 198c59d87c4SChristoph Hellwig } 199c59d87c4SChristoph Hellwig 200e372843aSChristoph Hellwig int 201e372843aSChristoph Hellwig xfs_setfilesize( 202e372843aSChristoph Hellwig struct xfs_inode *ip, 203e372843aSChristoph Hellwig xfs_off_t offset, 204e372843aSChristoph Hellwig size_t size) 205e372843aSChristoph Hellwig { 206e372843aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 207e372843aSChristoph Hellwig struct xfs_trans *tp; 208e372843aSChristoph Hellwig int error; 209e372843aSChristoph Hellwig 210e372843aSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 211e372843aSChristoph Hellwig if (error) 212e372843aSChristoph Hellwig return error; 213e372843aSChristoph Hellwig 214e372843aSChristoph Hellwig return __xfs_setfilesize(ip, tp, offset, size); 215e372843aSChristoph Hellwig } 216e372843aSChristoph Hellwig 2172ba66237SChristoph Hellwig STATIC int 2182ba66237SChristoph Hellwig xfs_setfilesize_ioend( 2190e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 2200e51a8e1SChristoph Hellwig int error) 2212ba66237SChristoph Hellwig { 2222ba66237SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 2232ba66237SChristoph Hellwig struct xfs_trans *tp = ioend->io_append_trans; 2242ba66237SChristoph Hellwig 2252ba66237SChristoph Hellwig /* 2262ba66237SChristoph Hellwig * The transaction may have been allocated in the I/O submission thread, 2272ba66237SChristoph Hellwig * thus we need to mark ourselves as being in a transaction manually. 2282ba66237SChristoph Hellwig * Similarly for freeze protection. 2292ba66237SChristoph Hellwig */ 2309070733bSMichal Hocko current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 231bee9182dSOleg Nesterov __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); 2322ba66237SChristoph Hellwig 2335cb13dcdSZhaohongjiang /* we abort the update if there was an IO error */ 2340e51a8e1SChristoph Hellwig if (error) { 2355cb13dcdSZhaohongjiang xfs_trans_cancel(tp); 2360e51a8e1SChristoph Hellwig return error; 2375cb13dcdSZhaohongjiang } 2385cb13dcdSZhaohongjiang 239e372843aSChristoph Hellwig return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); 2402ba66237SChristoph Hellwig } 2412ba66237SChristoph Hellwig 242c59d87c4SChristoph Hellwig /* 243c59d87c4SChristoph Hellwig * IO write completion. 244c59d87c4SChristoph Hellwig */ 245c59d87c4SChristoph Hellwig STATIC void 246cb357bf3SDarrick J. Wong xfs_end_ioend( 247cb357bf3SDarrick J. Wong struct xfs_ioend *ioend) 248c59d87c4SChristoph Hellwig { 249c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 250787eb485SChristoph Hellwig xfs_off_t offset = ioend->io_offset; 251787eb485SChristoph Hellwig size_t size = ioend->io_size; 25273d30d48SChristoph Hellwig unsigned int nofs_flag; 2534e4cbee9SChristoph Hellwig int error; 254c59d87c4SChristoph Hellwig 255af055e37SBrian Foster /* 25673d30d48SChristoph Hellwig * We can allocate memory here while doing writeback on behalf of 25773d30d48SChristoph Hellwig * memory reclaim. To avoid memory allocation deadlocks set the 25873d30d48SChristoph Hellwig * task-wide nofs context for the following operations. 25973d30d48SChristoph Hellwig */ 26073d30d48SChristoph Hellwig nofs_flag = memalloc_nofs_save(); 26173d30d48SChristoph Hellwig 26273d30d48SChristoph Hellwig /* 263787eb485SChristoph Hellwig * Just clean up the in-memory strutures if the fs has been shut down. 264af055e37SBrian Foster */ 265787eb485SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 2660e51a8e1SChristoph Hellwig error = -EIO; 26743caeb18SDarrick J. Wong goto done; 26843caeb18SDarrick J. Wong } 26943caeb18SDarrick J. Wong 27043caeb18SDarrick J. Wong /* 271787eb485SChristoph Hellwig * Clean up any COW blocks on an I/O error. 272c59d87c4SChristoph Hellwig */ 2734e4cbee9SChristoph Hellwig error = blk_status_to_errno(ioend->io_bio->bi_status); 274787eb485SChristoph Hellwig if (unlikely(error)) { 275be225fecSChristoph Hellwig if (ioend->io_fork == XFS_COW_FORK) 276787eb485SChristoph Hellwig xfs_reflink_cancel_cow_range(ip, offset, size, true); 2775cb13dcdSZhaohongjiang goto done; 278787eb485SChristoph Hellwig } 279787eb485SChristoph Hellwig 280787eb485SChristoph Hellwig /* 281787eb485SChristoph Hellwig * Success: commit the COW or unwritten blocks if needed. 282787eb485SChristoph Hellwig */ 283be225fecSChristoph Hellwig if (ioend->io_fork == XFS_COW_FORK) 284787eb485SChristoph Hellwig error = xfs_reflink_end_cow(ip, offset, size); 2854e087a3bSChristoph Hellwig else if (ioend->io_type == IOMAP_UNWRITTEN) 286ee70daabSEryu Guan error = xfs_iomap_write_unwritten(ip, offset, size, false); 287be225fecSChristoph Hellwig else 288787eb485SChristoph Hellwig ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); 28984803fb7SChristoph Hellwig 29004f658eeSChristoph Hellwig done: 291787eb485SChristoph Hellwig if (ioend->io_append_trans) 292787eb485SChristoph Hellwig error = xfs_setfilesize_ioend(ioend, error); 293*433dad94SChristoph Hellwig xfs_destroy_ioends(ioend, error); 29473d30d48SChristoph Hellwig memalloc_nofs_restore(nofs_flag); 2953994fc48SDarrick J. Wong } 2963994fc48SDarrick J. Wong 2973994fc48SDarrick J. Wong /* 2983994fc48SDarrick J. Wong * We can merge two adjacent ioends if they have the same set of work to do. 2993994fc48SDarrick J. Wong */ 3003994fc48SDarrick J. Wong static bool 3013994fc48SDarrick J. Wong xfs_ioend_can_merge( 3023994fc48SDarrick J. Wong struct xfs_ioend *ioend, 3033994fc48SDarrick J. Wong struct xfs_ioend *next) 3043994fc48SDarrick J. Wong { 305fe64e0d2SChristoph Hellwig if (ioend->io_bio->bi_status != next->io_bio->bi_status) 3063994fc48SDarrick J. Wong return false; 3073994fc48SDarrick J. Wong if ((ioend->io_fork == XFS_COW_FORK) ^ (next->io_fork == XFS_COW_FORK)) 3083994fc48SDarrick J. Wong return false; 3094e087a3bSChristoph Hellwig if ((ioend->io_type == IOMAP_UNWRITTEN) ^ 3104e087a3bSChristoph Hellwig (next->io_type == IOMAP_UNWRITTEN)) 3113994fc48SDarrick J. Wong return false; 3123994fc48SDarrick J. Wong if (ioend->io_offset + ioend->io_size != next->io_offset) 3133994fc48SDarrick J. Wong return false; 3143994fc48SDarrick J. Wong return true; 3153994fc48SDarrick J. Wong } 3163994fc48SDarrick J. Wong 3177dbae9fbSChristoph Hellwig /* 3187dbae9fbSChristoph Hellwig * If the to be merged ioend has a preallocated transaction for file 3197dbae9fbSChristoph Hellwig * size updates we need to ensure the ioend it is merged into also 3207dbae9fbSChristoph Hellwig * has one. If it already has one we can simply cancel the transaction 3217dbae9fbSChristoph Hellwig * as it is guaranteed to be clean. 3227dbae9fbSChristoph Hellwig */ 3237dbae9fbSChristoph Hellwig static void 3247dbae9fbSChristoph Hellwig xfs_ioend_merge_append_transactions( 3257dbae9fbSChristoph Hellwig struct xfs_ioend *ioend, 3267dbae9fbSChristoph Hellwig struct xfs_ioend *next) 3277dbae9fbSChristoph Hellwig { 3287dbae9fbSChristoph Hellwig if (!ioend->io_append_trans) { 3297dbae9fbSChristoph Hellwig ioend->io_append_trans = next->io_append_trans; 3307dbae9fbSChristoph Hellwig next->io_append_trans = NULL; 3317dbae9fbSChristoph Hellwig } else { 3327dbae9fbSChristoph Hellwig xfs_setfilesize_ioend(next, -ECANCELED); 3337dbae9fbSChristoph Hellwig } 3347dbae9fbSChristoph Hellwig } 3357dbae9fbSChristoph Hellwig 3363994fc48SDarrick J. Wong /* Try to merge adjacent completions. */ 3373994fc48SDarrick J. Wong STATIC void 3383994fc48SDarrick J. Wong xfs_ioend_try_merge( 3393994fc48SDarrick J. Wong struct xfs_ioend *ioend, 3403994fc48SDarrick J. Wong struct list_head *more_ioends) 3413994fc48SDarrick J. Wong { 342*433dad94SChristoph Hellwig struct xfs_ioend *next; 3433994fc48SDarrick J. Wong 344*433dad94SChristoph Hellwig INIT_LIST_HEAD(&ioend->io_list); 345*433dad94SChristoph Hellwig 346*433dad94SChristoph Hellwig while ((next = list_first_entry_or_null(more_ioends, struct xfs_ioend, 347*433dad94SChristoph Hellwig io_list))) { 348*433dad94SChristoph Hellwig if (!xfs_ioend_can_merge(ioend, next)) 3493994fc48SDarrick J. Wong break; 350*433dad94SChristoph Hellwig list_move_tail(&next->io_list, &ioend->io_list); 351*433dad94SChristoph Hellwig ioend->io_size += next->io_size; 352*433dad94SChristoph Hellwig if (next->io_append_trans) 353*433dad94SChristoph Hellwig xfs_ioend_merge_append_transactions(ioend, next); 3543994fc48SDarrick J. Wong } 3553994fc48SDarrick J. Wong } 3563994fc48SDarrick J. Wong 3573994fc48SDarrick J. Wong /* list_sort compare function for ioends */ 3583994fc48SDarrick J. Wong static int 3593994fc48SDarrick J. Wong xfs_ioend_compare( 3603994fc48SDarrick J. Wong void *priv, 3613994fc48SDarrick J. Wong struct list_head *a, 3623994fc48SDarrick J. Wong struct list_head *b) 3633994fc48SDarrick J. Wong { 3643994fc48SDarrick J. Wong struct xfs_ioend *ia; 3653994fc48SDarrick J. Wong struct xfs_ioend *ib; 3663994fc48SDarrick J. Wong 3673994fc48SDarrick J. Wong ia = container_of(a, struct xfs_ioend, io_list); 3683994fc48SDarrick J. Wong ib = container_of(b, struct xfs_ioend, io_list); 3693994fc48SDarrick J. Wong if (ia->io_offset < ib->io_offset) 3703994fc48SDarrick J. Wong return -1; 3713994fc48SDarrick J. Wong else if (ia->io_offset > ib->io_offset) 3723994fc48SDarrick J. Wong return 1; 3733994fc48SDarrick J. Wong return 0; 374c59d87c4SChristoph Hellwig } 375c59d87c4SChristoph Hellwig 376*433dad94SChristoph Hellwig static void 377*433dad94SChristoph Hellwig xfs_sort_ioends( 378*433dad94SChristoph Hellwig struct list_head *ioend_list) 379*433dad94SChristoph Hellwig { 380*433dad94SChristoph Hellwig list_sort(NULL, ioend_list, xfs_ioend_compare); 381*433dad94SChristoph Hellwig } 382*433dad94SChristoph Hellwig 383cb357bf3SDarrick J. Wong /* Finish all pending io completions. */ 384cb357bf3SDarrick J. Wong void 385cb357bf3SDarrick J. Wong xfs_end_io( 386cb357bf3SDarrick J. Wong struct work_struct *work) 387cb357bf3SDarrick J. Wong { 388*433dad94SChristoph Hellwig struct xfs_inode *ip = 389*433dad94SChristoph Hellwig container_of(work, struct xfs_inode, i_ioend_work); 390cb357bf3SDarrick J. Wong struct xfs_ioend *ioend; 391*433dad94SChristoph Hellwig struct list_head tmp; 392cb357bf3SDarrick J. Wong unsigned long flags; 393cb357bf3SDarrick J. Wong 394cb357bf3SDarrick J. Wong spin_lock_irqsave(&ip->i_ioend_lock, flags); 395*433dad94SChristoph Hellwig list_replace_init(&ip->i_ioend_list, &tmp); 396cb357bf3SDarrick J. Wong spin_unlock_irqrestore(&ip->i_ioend_lock, flags); 397cb357bf3SDarrick J. Wong 398*433dad94SChristoph Hellwig xfs_sort_ioends(&tmp); 399*433dad94SChristoph Hellwig while ((ioend = list_first_entry_or_null(&tmp, struct xfs_ioend, 400*433dad94SChristoph Hellwig io_list))) { 401cb357bf3SDarrick J. Wong list_del_init(&ioend->io_list); 402*433dad94SChristoph Hellwig xfs_ioend_try_merge(ioend, &tmp); 403cb357bf3SDarrick J. Wong xfs_end_ioend(ioend); 404cb357bf3SDarrick J. Wong } 405cb357bf3SDarrick J. Wong } 406cb357bf3SDarrick J. Wong 4070e51a8e1SChristoph Hellwig STATIC void 4080e51a8e1SChristoph Hellwig xfs_end_bio( 4090e51a8e1SChristoph Hellwig struct bio *bio) 410c59d87c4SChristoph Hellwig { 4110e51a8e1SChristoph Hellwig struct xfs_ioend *ioend = bio->bi_private; 412cb357bf3SDarrick J. Wong struct xfs_inode *ip = XFS_I(ioend->io_inode); 413cb357bf3SDarrick J. Wong struct xfs_mount *mp = ip->i_mount; 414cb357bf3SDarrick J. Wong unsigned long flags; 415c59d87c4SChristoph Hellwig 416be225fecSChristoph Hellwig if (ioend->io_fork == XFS_COW_FORK || 4174e087a3bSChristoph Hellwig ioend->io_type == IOMAP_UNWRITTEN || 418cb357bf3SDarrick J. Wong ioend->io_append_trans != NULL) { 419cb357bf3SDarrick J. Wong spin_lock_irqsave(&ip->i_ioend_lock, flags); 420cb357bf3SDarrick J. Wong if (list_empty(&ip->i_ioend_list)) 421cb357bf3SDarrick J. Wong WARN_ON_ONCE(!queue_work(mp->m_unwritten_workqueue, 422cb357bf3SDarrick J. Wong &ip->i_ioend_work)); 423cb357bf3SDarrick J. Wong list_add_tail(&ioend->io_list, &ip->i_ioend_list); 424cb357bf3SDarrick J. Wong spin_unlock_irqrestore(&ip->i_ioend_lock, flags); 425cb357bf3SDarrick J. Wong } else 4264e4cbee9SChristoph Hellwig xfs_destroy_ioend(ioend, blk_status_to_errno(bio->bi_status)); 427c59d87c4SChristoph Hellwig } 428c59d87c4SChristoph Hellwig 429d9252d52SBrian Foster /* 430d9252d52SBrian Foster * Fast revalidation of the cached writeback mapping. Return true if the current 431d9252d52SBrian Foster * mapping is valid, false otherwise. 432d9252d52SBrian Foster */ 433d9252d52SBrian Foster static bool 434d9252d52SBrian Foster xfs_imap_valid( 435d9252d52SBrian Foster struct xfs_writepage_ctx *wpc, 436d9252d52SBrian Foster struct xfs_inode *ip, 4374e087a3bSChristoph Hellwig loff_t offset) 438d9252d52SBrian Foster { 4394e087a3bSChristoph Hellwig if (offset < wpc->iomap.offset || 4404e087a3bSChristoph Hellwig offset >= wpc->iomap.offset + wpc->iomap.length) 441d9252d52SBrian Foster return false; 442d9252d52SBrian Foster /* 443d9252d52SBrian Foster * If this is a COW mapping, it is sufficient to check that the mapping 444d9252d52SBrian Foster * covers the offset. Be careful to check this first because the caller 445d9252d52SBrian Foster * can revalidate a COW mapping without updating the data seqno. 446d9252d52SBrian Foster */ 447be225fecSChristoph Hellwig if (wpc->fork == XFS_COW_FORK) 448d9252d52SBrian Foster return true; 449d9252d52SBrian Foster 450d9252d52SBrian Foster /* 451d9252d52SBrian Foster * This is not a COW mapping. Check the sequence number of the data fork 452d9252d52SBrian Foster * because concurrent changes could have invalidated the extent. Check 453d9252d52SBrian Foster * the COW fork because concurrent changes since the last time we 454d9252d52SBrian Foster * checked (and found nothing at this offset) could have added 455d9252d52SBrian Foster * overlapping blocks. 456d9252d52SBrian Foster */ 457d9252d52SBrian Foster if (wpc->data_seq != READ_ONCE(ip->i_df.if_seq)) 458d9252d52SBrian Foster return false; 459d9252d52SBrian Foster if (xfs_inode_has_cow_data(ip) && 460d9252d52SBrian Foster wpc->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) 461d9252d52SBrian Foster return false; 462d9252d52SBrian Foster return true; 463d9252d52SBrian Foster } 464d9252d52SBrian Foster 4654ad765edSChristoph Hellwig /* 4664ad765edSChristoph Hellwig * Pass in a dellalloc extent and convert it to real extents, return the real 4674e087a3bSChristoph Hellwig * extent that maps offset_fsb in wpc->iomap. 4684ad765edSChristoph Hellwig * 4694ad765edSChristoph Hellwig * The current page is held locked so nothing could have removed the block 4707588cbeeSChristoph Hellwig * backing offset_fsb, although it could have moved from the COW to the data 4717588cbeeSChristoph Hellwig * fork by another thread. 4724ad765edSChristoph Hellwig */ 4734ad765edSChristoph Hellwig static int 4744ad765edSChristoph Hellwig xfs_convert_blocks( 4754ad765edSChristoph Hellwig struct xfs_writepage_ctx *wpc, 4764ad765edSChristoph Hellwig struct xfs_inode *ip, 4774e087a3bSChristoph Hellwig loff_t offset) 4784ad765edSChristoph Hellwig { 4794ad765edSChristoph Hellwig int error; 4804ad765edSChristoph Hellwig 4814ad765edSChristoph Hellwig /* 4824e087a3bSChristoph Hellwig * Attempt to allocate whatever delalloc extent currently backs offset 4834e087a3bSChristoph Hellwig * and put the result into wpc->iomap. Allocate in a loop because it 4844e087a3bSChristoph Hellwig * may take several attempts to allocate real blocks for a contiguous 4854e087a3bSChristoph Hellwig * delalloc extent if free space is sufficiently fragmented. 4864ad765edSChristoph Hellwig */ 4874ad765edSChristoph Hellwig do { 4884e087a3bSChristoph Hellwig error = xfs_bmapi_convert_delalloc(ip, wpc->fork, offset, 4894e087a3bSChristoph Hellwig &wpc->iomap, wpc->fork == XFS_COW_FORK ? 4904ad765edSChristoph Hellwig &wpc->cow_seq : &wpc->data_seq); 4914ad765edSChristoph Hellwig if (error) 4924ad765edSChristoph Hellwig return error; 4934e087a3bSChristoph Hellwig } while (wpc->iomap.offset + wpc->iomap.length <= offset); 4944ad765edSChristoph Hellwig 4954ad765edSChristoph Hellwig return 0; 4964ad765edSChristoph Hellwig } 4974ad765edSChristoph Hellwig 498c59d87c4SChristoph Hellwig STATIC int 499c59d87c4SChristoph Hellwig xfs_map_blocks( 5005c665e5bSChristoph Hellwig struct xfs_writepage_ctx *wpc, 501c59d87c4SChristoph Hellwig struct inode *inode, 5025c665e5bSChristoph Hellwig loff_t offset) 503c59d87c4SChristoph Hellwig { 504c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 505c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 50693407472SFabian Frederick ssize_t count = i_blocksize(inode); 507b4e29032SChristoph Hellwig xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 508b4e29032SChristoph Hellwig xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); 509e666aa37SChristoph Hellwig xfs_fileoff_t cow_fsb = NULLFILEOFF; 5105c665e5bSChristoph Hellwig struct xfs_bmbt_irec imap; 511060d4eaaSChristoph Hellwig struct xfs_iext_cursor icur; 5127588cbeeSChristoph Hellwig int retries = 0; 513c59d87c4SChristoph Hellwig int error = 0; 514c59d87c4SChristoph Hellwig 515d9252d52SBrian Foster if (XFS_FORCED_SHUTDOWN(mp)) 516d9252d52SBrian Foster return -EIO; 517d9252d52SBrian Foster 518889c65b3SChristoph Hellwig /* 519889c65b3SChristoph Hellwig * COW fork blocks can overlap data fork blocks even if the blocks 520889c65b3SChristoph Hellwig * aren't shared. COW I/O always takes precedent, so we must always 521889c65b3SChristoph Hellwig * check for overlap on reflink inodes unless the mapping is already a 522e666aa37SChristoph Hellwig * COW one, or the COW fork hasn't changed from the last time we looked 523e666aa37SChristoph Hellwig * at it. 524e666aa37SChristoph Hellwig * 525e666aa37SChristoph Hellwig * It's safe to check the COW fork if_seq here without the ILOCK because 526e666aa37SChristoph Hellwig * we've indirectly protected against concurrent updates: writeback has 527e666aa37SChristoph Hellwig * the page locked, which prevents concurrent invalidations by reflink 528e666aa37SChristoph Hellwig * and directio and prevents concurrent buffered writes to the same 529e666aa37SChristoph Hellwig * page. Changes to if_seq always happen under i_lock, which protects 530e666aa37SChristoph Hellwig * against concurrent updates and provides a memory barrier on the way 531e666aa37SChristoph Hellwig * out that ensures that we always see the current value. 532889c65b3SChristoph Hellwig */ 5334e087a3bSChristoph Hellwig if (xfs_imap_valid(wpc, ip, offset)) 534889c65b3SChristoph Hellwig return 0; 535889c65b3SChristoph Hellwig 536889c65b3SChristoph Hellwig /* 537889c65b3SChristoph Hellwig * If we don't have a valid map, now it's time to get a new one for this 538889c65b3SChristoph Hellwig * offset. This will convert delayed allocations (including COW ones) 539889c65b3SChristoph Hellwig * into real extents. If we return without a valid map, it means we 540889c65b3SChristoph Hellwig * landed in a hole and we skip the block. 541889c65b3SChristoph Hellwig */ 5427588cbeeSChristoph Hellwig retry: 543c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 544c59d87c4SChristoph Hellwig ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 545c59d87c4SChristoph Hellwig (ip->i_df.if_flags & XFS_IFEXTENTS)); 546060d4eaaSChristoph Hellwig 547060d4eaaSChristoph Hellwig /* 548060d4eaaSChristoph Hellwig * Check if this is offset is covered by a COW extents, and if yes use 549060d4eaaSChristoph Hellwig * it directly instead of looking up anything in the data fork. 550060d4eaaSChristoph Hellwig */ 55151d62690SChristoph Hellwig if (xfs_inode_has_cow_data(ip) && 552e666aa37SChristoph Hellwig xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap)) 553e666aa37SChristoph Hellwig cow_fsb = imap.br_startoff; 554e666aa37SChristoph Hellwig if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { 5552ba090d5SChristoph Hellwig wpc->cow_seq = READ_ONCE(ip->i_cowfp->if_seq); 5565c665e5bSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 557be225fecSChristoph Hellwig 558be225fecSChristoph Hellwig wpc->fork = XFS_COW_FORK; 5595c665e5bSChristoph Hellwig goto allocate_blocks; 5605c665e5bSChristoph Hellwig } 5615c665e5bSChristoph Hellwig 5625c665e5bSChristoph Hellwig /* 563d9252d52SBrian Foster * No COW extent overlap. Revalidate now that we may have updated 564d9252d52SBrian Foster * ->cow_seq. If the data mapping is still valid, we're done. 5655c665e5bSChristoph Hellwig */ 5664e087a3bSChristoph Hellwig if (xfs_imap_valid(wpc, ip, offset)) { 5675c665e5bSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 5685c665e5bSChristoph Hellwig return 0; 5695c665e5bSChristoph Hellwig } 5705c665e5bSChristoph Hellwig 5715c665e5bSChristoph Hellwig /* 5725c665e5bSChristoph Hellwig * If we don't have a valid map, now it's time to get a new one for this 5735c665e5bSChristoph Hellwig * offset. This will convert delayed allocations (including COW ones) 5745c665e5bSChristoph Hellwig * into real extents. 5755c665e5bSChristoph Hellwig */ 5763345746eSChristoph Hellwig if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) 5773345746eSChristoph Hellwig imap.br_startoff = end_fsb; /* fake a hole past EOF */ 578d9252d52SBrian Foster wpc->data_seq = READ_ONCE(ip->i_df.if_seq); 579c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 580c59d87c4SChristoph Hellwig 581be225fecSChristoph Hellwig wpc->fork = XFS_DATA_FORK; 582be225fecSChristoph Hellwig 58312df89f2SChristoph Hellwig /* landed in a hole or beyond EOF? */ 5843345746eSChristoph Hellwig if (imap.br_startoff > offset_fsb) { 5853345746eSChristoph Hellwig imap.br_blockcount = imap.br_startoff - offset_fsb; 5865c665e5bSChristoph Hellwig imap.br_startoff = offset_fsb; 5875c665e5bSChristoph Hellwig imap.br_startblock = HOLESTARTBLOCK; 588be225fecSChristoph Hellwig imap.br_state = XFS_EXT_NORM; 58912df89f2SChristoph Hellwig } 59012df89f2SChristoph Hellwig 591e666aa37SChristoph Hellwig /* 59212df89f2SChristoph Hellwig * Truncate to the next COW extent if there is one. This is the only 59312df89f2SChristoph Hellwig * opportunity to do this because we can skip COW fork lookups for the 59412df89f2SChristoph Hellwig * subsequent blocks in the mapping; however, the requirement to treat 59512df89f2SChristoph Hellwig * the COW range separately remains. 596e666aa37SChristoph Hellwig */ 597e666aa37SChristoph Hellwig if (cow_fsb != NULLFILEOFF && 598e666aa37SChristoph Hellwig cow_fsb < imap.br_startoff + imap.br_blockcount) 599e666aa37SChristoph Hellwig imap.br_blockcount = cow_fsb - imap.br_startoff; 600e666aa37SChristoph Hellwig 601be225fecSChristoph Hellwig /* got a delalloc extent? */ 60212df89f2SChristoph Hellwig if (imap.br_startblock != HOLESTARTBLOCK && 60312df89f2SChristoph Hellwig isnullstartblock(imap.br_startblock)) 6045c665e5bSChristoph Hellwig goto allocate_blocks; 605e2f6ad46SDave Chinner 6064e087a3bSChristoph Hellwig xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0); 607be225fecSChristoph Hellwig trace_xfs_map_blocks_found(ip, offset, count, wpc->fork, &imap); 6085c665e5bSChristoph Hellwig return 0; 6095c665e5bSChristoph Hellwig allocate_blocks: 6104e087a3bSChristoph Hellwig error = xfs_convert_blocks(wpc, ip, offset); 6117588cbeeSChristoph Hellwig if (error) { 6127588cbeeSChristoph Hellwig /* 6137588cbeeSChristoph Hellwig * If we failed to find the extent in the COW fork we might have 6147588cbeeSChristoph Hellwig * raced with a COW to data fork conversion or truncate. 6157588cbeeSChristoph Hellwig * Restart the lookup to catch the extent in the data fork for 6167588cbeeSChristoph Hellwig * the former case, but prevent additional retries to avoid 6177588cbeeSChristoph Hellwig * looping forever for the latter case. 6187588cbeeSChristoph Hellwig */ 6197588cbeeSChristoph Hellwig if (error == -EAGAIN && wpc->fork == XFS_COW_FORK && !retries++) 6207588cbeeSChristoph Hellwig goto retry; 6217588cbeeSChristoph Hellwig ASSERT(error != -EAGAIN); 6225c665e5bSChristoph Hellwig return error; 6237588cbeeSChristoph Hellwig } 6244ad765edSChristoph Hellwig 6254ad765edSChristoph Hellwig /* 6264ad765edSChristoph Hellwig * Due to merging the return real extent might be larger than the 6274ad765edSChristoph Hellwig * original delalloc one. Trim the return extent to the next COW 6284ad765edSChristoph Hellwig * boundary again to force a re-lookup. 6294ad765edSChristoph Hellwig */ 6304e087a3bSChristoph Hellwig if (wpc->fork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) { 6314e087a3bSChristoph Hellwig loff_t cow_offset = XFS_FSB_TO_B(mp, cow_fsb); 6324ad765edSChristoph Hellwig 6334e087a3bSChristoph Hellwig if (cow_offset < wpc->iomap.offset + wpc->iomap.length) 6344e087a3bSChristoph Hellwig wpc->iomap.length = cow_offset - wpc->iomap.offset; 6354e087a3bSChristoph Hellwig } 6364e087a3bSChristoph Hellwig 6374e087a3bSChristoph Hellwig ASSERT(wpc->iomap.offset <= offset); 6384e087a3bSChristoph Hellwig ASSERT(wpc->iomap.offset + wpc->iomap.length > offset); 639be225fecSChristoph Hellwig trace_xfs_map_blocks_alloc(ip, offset, count, wpc->fork, &imap); 640c59d87c4SChristoph Hellwig return 0; 641c59d87c4SChristoph Hellwig } 642c59d87c4SChristoph Hellwig 643c59d87c4SChristoph Hellwig /* 644bb18782aSDave Chinner * Submit the bio for an ioend. We are passed an ioend with a bio attached to 645bb18782aSDave Chinner * it, and we submit that bio. The ioend may be used for multiple bio 646bb18782aSDave Chinner * submissions, so we only want to allocate an append transaction for the ioend 647bb18782aSDave Chinner * once. In the case of multiple bio submission, each bio will take an IO 648bb18782aSDave Chinner * reference to the ioend to ensure that the ioend completion is only done once 649bb18782aSDave Chinner * all bios have been submitted and the ioend is really done. 6507bf7f352SDave Chinner * 6510290d9c1SChristoph Hellwig * If @status is non-zero, it means that we have a situation where some part of 6527bf7f352SDave Chinner * the submission process has failed after we have marked paged for writeback 653bb18782aSDave Chinner * and unlocked them. In this situation, we need to fail the bio and ioend 654bb18782aSDave Chinner * rather than submit it to IO. This typically only happens on a filesystem 655bb18782aSDave Chinner * shutdown. 656c59d87c4SChristoph Hellwig */ 657e10de372SDave Chinner STATIC int 658c59d87c4SChristoph Hellwig xfs_submit_ioend( 659c59d87c4SChristoph Hellwig struct writeback_control *wbc, 6600e51a8e1SChristoph Hellwig struct xfs_ioend *ioend, 661e10de372SDave Chinner int status) 662c59d87c4SChristoph Hellwig { 66373d30d48SChristoph Hellwig unsigned int nofs_flag; 66473d30d48SChristoph Hellwig 66573d30d48SChristoph Hellwig /* 66673d30d48SChristoph Hellwig * We can allocate memory here while doing writeback on behalf of 66773d30d48SChristoph Hellwig * memory reclaim. To avoid memory allocation deadlocks set the 66873d30d48SChristoph Hellwig * task-wide nofs context for the following operations. 66973d30d48SChristoph Hellwig */ 67073d30d48SChristoph Hellwig nofs_flag = memalloc_nofs_save(); 67173d30d48SChristoph Hellwig 6725eda4300SDarrick J. Wong /* Convert CoW extents to regular */ 673be225fecSChristoph Hellwig if (!status && ioend->io_fork == XFS_COW_FORK) { 6745eda4300SDarrick J. Wong status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), 6755eda4300SDarrick J. Wong ioend->io_offset, ioend->io_size); 6765eda4300SDarrick J. Wong } 6775eda4300SDarrick J. Wong 678e10de372SDave Chinner /* Reserve log space if we might write beyond the on-disk inode size. */ 679e10de372SDave Chinner if (!status && 680be225fecSChristoph Hellwig (ioend->io_fork == XFS_COW_FORK || 6814e087a3bSChristoph Hellwig ioend->io_type != IOMAP_UNWRITTEN) && 682bb18782aSDave Chinner xfs_ioend_is_append(ioend) && 683bb18782aSDave Chinner !ioend->io_append_trans) 684e10de372SDave Chinner status = xfs_setfilesize_trans_alloc(ioend); 685bb18782aSDave Chinner 68673d30d48SChristoph Hellwig memalloc_nofs_restore(nofs_flag); 68773d30d48SChristoph Hellwig 6880e51a8e1SChristoph Hellwig ioend->io_bio->bi_private = ioend; 6890e51a8e1SChristoph Hellwig ioend->io_bio->bi_end_io = xfs_end_bio; 69070fd7614SChristoph Hellwig 6917bf7f352SDave Chinner /* 6927bf7f352SDave Chinner * If we are failing the IO now, just mark the ioend with an 6937bf7f352SDave Chinner * error and finish it. This will run IO completion immediately 6947bf7f352SDave Chinner * as there is only one reference to the ioend at this point in 6957bf7f352SDave Chinner * time. 6967bf7f352SDave Chinner */ 697e10de372SDave Chinner if (status) { 6984e4cbee9SChristoph Hellwig ioend->io_bio->bi_status = errno_to_blk_status(status); 6990e51a8e1SChristoph Hellwig bio_endio(ioend->io_bio); 700e10de372SDave Chinner return status; 7017bf7f352SDave Chinner } 7027bf7f352SDave Chinner 7034e49ea4aSMike Christie submit_bio(ioend->io_bio); 704e10de372SDave Chinner return 0; 705c59d87c4SChristoph Hellwig } 706c59d87c4SChristoph Hellwig 7070e51a8e1SChristoph Hellwig static struct xfs_ioend * 7080e51a8e1SChristoph Hellwig xfs_alloc_ioend( 7090e51a8e1SChristoph Hellwig struct inode *inode, 7104e087a3bSChristoph Hellwig struct xfs_writepage_ctx *wpc, 7110e51a8e1SChristoph Hellwig xfs_off_t offset, 712a2473735SChristoph Hellwig sector_t sector, 713a2473735SChristoph Hellwig struct writeback_control *wbc) 7140e51a8e1SChristoph Hellwig { 7150e51a8e1SChristoph Hellwig struct xfs_ioend *ioend; 7160e51a8e1SChristoph Hellwig struct bio *bio; 7170e51a8e1SChristoph Hellwig 718e292d7bcSKent Overstreet bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &xfs_ioend_bioset); 7194e087a3bSChristoph Hellwig bio_set_dev(bio, wpc->iomap.bdev); 7203faed667SChristoph Hellwig bio->bi_iter.bi_sector = sector; 721a2473735SChristoph Hellwig bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc); 722a2473735SChristoph Hellwig bio->bi_write_hint = inode->i_write_hint; 723adfb5fb4SChristoph Hellwig wbc_init_bio(wbc, bio); 7240e51a8e1SChristoph Hellwig 7250e51a8e1SChristoph Hellwig ioend = container_of(bio, struct xfs_ioend, io_inline_bio); 7260e51a8e1SChristoph Hellwig INIT_LIST_HEAD(&ioend->io_list); 7274e087a3bSChristoph Hellwig ioend->io_fork = wpc->fork; 7284e087a3bSChristoph Hellwig ioend->io_type = wpc->iomap.type; 7290e51a8e1SChristoph Hellwig ioend->io_inode = inode; 7300e51a8e1SChristoph Hellwig ioend->io_size = 0; 7310e51a8e1SChristoph Hellwig ioend->io_offset = offset; 7320e51a8e1SChristoph Hellwig ioend->io_append_trans = NULL; 7330e51a8e1SChristoph Hellwig ioend->io_bio = bio; 7340e51a8e1SChristoph Hellwig return ioend; 7350e51a8e1SChristoph Hellwig } 7360e51a8e1SChristoph Hellwig 7370e51a8e1SChristoph Hellwig /* 7380e51a8e1SChristoph Hellwig * Allocate a new bio, and chain the old bio to the new one. 7390e51a8e1SChristoph Hellwig * 7400e51a8e1SChristoph Hellwig * Note that we have to do perform the chaining in this unintuitive order 7410e51a8e1SChristoph Hellwig * so that the bi_private linkage is set up in the right direction for the 7420e51a8e1SChristoph Hellwig * traversal in xfs_destroy_ioend(). 7430e51a8e1SChristoph Hellwig */ 744a2473735SChristoph Hellwig static struct bio * 7450e51a8e1SChristoph Hellwig xfs_chain_bio( 746a2473735SChristoph Hellwig struct bio *prev) 7470e51a8e1SChristoph Hellwig { 7480e51a8e1SChristoph Hellwig struct bio *new; 7490e51a8e1SChristoph Hellwig 7500e51a8e1SChristoph Hellwig new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES); 751adfb5fb4SChristoph Hellwig bio_copy_dev(new, prev);/* also copies over blkcg information */ 752a2473735SChristoph Hellwig new->bi_iter.bi_sector = bio_end_sector(prev); 753a2473735SChristoph Hellwig new->bi_opf = prev->bi_opf; 754a2473735SChristoph Hellwig new->bi_write_hint = prev->bi_write_hint; 755a2473735SChristoph Hellwig 756a2473735SChristoph Hellwig bio_chain(prev, new); 757a2473735SChristoph Hellwig bio_get(prev); /* for xfs_destroy_ioend */ 758a2473735SChristoph Hellwig submit_bio(prev); 759a2473735SChristoph Hellwig return new; 7600e51a8e1SChristoph Hellwig } 7610e51a8e1SChristoph Hellwig 762c59d87c4SChristoph Hellwig /* 7633faed667SChristoph Hellwig * Test to see if we have an existing ioend structure that we could append to 7643faed667SChristoph Hellwig * first, otherwise finish off the current ioend and start another. 765c59d87c4SChristoph Hellwig */ 766c59d87c4SChristoph Hellwig STATIC void 767c59d87c4SChristoph Hellwig xfs_add_to_ioend( 768c59d87c4SChristoph Hellwig struct inode *inode, 769c59d87c4SChristoph Hellwig xfs_off_t offset, 7703faed667SChristoph Hellwig struct page *page, 77182cb1417SChristoph Hellwig struct iomap_page *iop, 772e10de372SDave Chinner struct xfs_writepage_ctx *wpc, 773bb18782aSDave Chinner struct writeback_control *wbc, 774e10de372SDave Chinner struct list_head *iolist) 775c59d87c4SChristoph Hellwig { 7764e087a3bSChristoph Hellwig sector_t sector = iomap_sector(&wpc->iomap, offset); 7773faed667SChristoph Hellwig unsigned len = i_blocksize(inode); 7783faed667SChristoph Hellwig unsigned poff = offset & (PAGE_SIZE - 1); 779ff896738SChristoph Hellwig bool merged, same_page = false; 7803faed667SChristoph Hellwig 781be225fecSChristoph Hellwig if (!wpc->ioend || 782be225fecSChristoph Hellwig wpc->fork != wpc->ioend->io_fork || 7834e087a3bSChristoph Hellwig wpc->iomap.type != wpc->ioend->io_type || 7843faed667SChristoph Hellwig sector != bio_end_sector(wpc->ioend->io_bio) || 7850df61da8SDarrick J. Wong offset != wpc->ioend->io_offset + wpc->ioend->io_size) { 786e10de372SDave Chinner if (wpc->ioend) 787e10de372SDave Chinner list_add(&wpc->ioend->io_list, iolist); 7884e087a3bSChristoph Hellwig wpc->ioend = xfs_alloc_ioend(inode, wpc, offset, sector, wbc); 789c59d87c4SChristoph Hellwig } 790c59d87c4SChristoph Hellwig 791ff896738SChristoph Hellwig merged = __bio_try_merge_page(wpc->ioend->io_bio, page, len, poff, 792ff896738SChristoph Hellwig &same_page); 793ff896738SChristoph Hellwig 794ff896738SChristoph Hellwig if (iop && !same_page) 79582cb1417SChristoph Hellwig atomic_inc(&iop->write_count); 796ff896738SChristoph Hellwig 797ff896738SChristoph Hellwig if (!merged) { 79879d08f89SMing Lei if (bio_full(wpc->ioend->io_bio, len)) 799a2473735SChristoph Hellwig wpc->ioend->io_bio = xfs_chain_bio(wpc->ioend->io_bio); 80007173c3eSMing Lei bio_add_page(wpc->ioend->io_bio, page, len, poff); 80182cb1417SChristoph Hellwig } 802bb18782aSDave Chinner 8033faed667SChristoph Hellwig wpc->ioend->io_size += len; 8049637d517SLinus Torvalds wbc_account_cgroup_owner(wbc, page, len); 805c59d87c4SChristoph Hellwig } 806c59d87c4SChristoph Hellwig 807c59d87c4SChristoph Hellwig STATIC void 808c59d87c4SChristoph Hellwig xfs_vm_invalidatepage( 809c59d87c4SChristoph Hellwig struct page *page, 810d47992f8SLukas Czerner unsigned int offset, 811d47992f8SLukas Czerner unsigned int length) 812c59d87c4SChristoph Hellwig { 81382cb1417SChristoph Hellwig trace_xfs_invalidatepage(page->mapping->host, page, offset, length); 81482cb1417SChristoph Hellwig iomap_invalidatepage(page, offset, length); 815c59d87c4SChristoph Hellwig } 816c59d87c4SChristoph Hellwig 817c59d87c4SChristoph Hellwig /* 81882cb1417SChristoph Hellwig * If the page has delalloc blocks on it, we need to punch them out before we 819c59d87c4SChristoph Hellwig * invalidate the page. If we don't, we leave a stale delalloc mapping on the 82082cb1417SChristoph Hellwig * inode that can trip up a later direct I/O read operation on the same region. 821c59d87c4SChristoph Hellwig * 82282cb1417SChristoph Hellwig * We prevent this by truncating away the delalloc regions on the page. Because 82382cb1417SChristoph Hellwig * they are delalloc, we can do this without needing a transaction. Indeed - if 82482cb1417SChristoph Hellwig * we get ENOSPC errors, we have to be able to do this truncation without a 82582cb1417SChristoph Hellwig * transaction as there is no space left for block reservation (typically why we 82682cb1417SChristoph Hellwig * see a ENOSPC in writeback). 827c59d87c4SChristoph Hellwig */ 828c59d87c4SChristoph Hellwig STATIC void 829c59d87c4SChristoph Hellwig xfs_aops_discard_page( 830c59d87c4SChristoph Hellwig struct page *page) 831c59d87c4SChristoph Hellwig { 832c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 833c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 83403625721SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 835c59d87c4SChristoph Hellwig loff_t offset = page_offset(page); 83603625721SChristoph Hellwig xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, offset); 83703625721SChristoph Hellwig int error; 838c59d87c4SChristoph Hellwig 83903625721SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 840c59d87c4SChristoph Hellwig goto out_invalidate; 841c59d87c4SChristoph Hellwig 84203625721SChristoph Hellwig xfs_alert(mp, 843c9690043SDarrick J. Wong "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", 844c59d87c4SChristoph Hellwig page, ip->i_ino, offset); 845c59d87c4SChristoph Hellwig 84603625721SChristoph Hellwig error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 84703625721SChristoph Hellwig PAGE_SIZE / i_blocksize(inode)); 84803625721SChristoph Hellwig if (error && !XFS_FORCED_SHUTDOWN(mp)) 84903625721SChristoph Hellwig xfs_alert(mp, "page discard unable to remove delalloc mapping."); 850c59d87c4SChristoph Hellwig out_invalidate: 85109cbfeafSKirill A. Shutemov xfs_vm_invalidatepage(page, 0, PAGE_SIZE); 852c59d87c4SChristoph Hellwig } 853c59d87c4SChristoph Hellwig 854c59d87c4SChristoph Hellwig /* 855e10de372SDave Chinner * We implement an immediate ioend submission policy here to avoid needing to 856e10de372SDave Chinner * chain multiple ioends and hence nest mempool allocations which can violate 857e10de372SDave Chinner * forward progress guarantees we need to provide. The current ioend we are 85882cb1417SChristoph Hellwig * adding blocks to is cached on the writepage context, and if the new block 859e10de372SDave Chinner * does not append to the cached ioend it will create a new ioend and cache that 860e10de372SDave Chinner * instead. 861e10de372SDave Chinner * 862e10de372SDave Chinner * If a new ioend is created and cached, the old ioend is returned and queued 863e10de372SDave Chinner * locally for submission once the entire page is processed or an error has been 864e10de372SDave Chinner * detected. While ioends are submitted immediately after they are completed, 865e10de372SDave Chinner * batching optimisations are provided by higher level block plugging. 866e10de372SDave Chinner * 867e10de372SDave Chinner * At the end of a writeback pass, there will be a cached ioend remaining on the 868e10de372SDave Chinner * writepage context that the caller will need to submit. 869e10de372SDave Chinner */ 870bfce7d2eSDave Chinner static int 871bfce7d2eSDave Chinner xfs_writepage_map( 872bfce7d2eSDave Chinner struct xfs_writepage_ctx *wpc, 873e10de372SDave Chinner struct writeback_control *wbc, 874bfce7d2eSDave Chinner struct inode *inode, 875bfce7d2eSDave Chinner struct page *page, 876c8ce540dSDarrick J. Wong uint64_t end_offset) 877bfce7d2eSDave Chinner { 878e10de372SDave Chinner LIST_HEAD(submit_list); 87982cb1417SChristoph Hellwig struct iomap_page *iop = to_iomap_page(page); 88082cb1417SChristoph Hellwig unsigned len = i_blocksize(inode); 881e10de372SDave Chinner struct xfs_ioend *ioend, *next; 8826a4c9501SChristoph Hellwig uint64_t file_offset; /* file offset of page */ 88382cb1417SChristoph Hellwig int error = 0, count = 0, i; 884bfce7d2eSDave Chinner 88582cb1417SChristoph Hellwig ASSERT(iop || i_blocksize(inode) == PAGE_SIZE); 88682cb1417SChristoph Hellwig ASSERT(!iop || atomic_read(&iop->write_count) == 0); 887ac8ee546SChristoph Hellwig 888e2f6ad46SDave Chinner /* 88982cb1417SChristoph Hellwig * Walk through the page to find areas to write back. If we run off the 89082cb1417SChristoph Hellwig * end of the current map or find the current map invalid, grab a new 89182cb1417SChristoph Hellwig * one. 892e2f6ad46SDave Chinner */ 89382cb1417SChristoph Hellwig for (i = 0, file_offset = page_offset(page); 89482cb1417SChristoph Hellwig i < (PAGE_SIZE >> inode->i_blkbits) && file_offset < end_offset; 89582cb1417SChristoph Hellwig i++, file_offset += len) { 89682cb1417SChristoph Hellwig if (iop && !test_bit(i, iop->uptodate)) 897bfce7d2eSDave Chinner continue; 898bfce7d2eSDave Chinner 8996a4c9501SChristoph Hellwig error = xfs_map_blocks(wpc, inode, file_offset); 900bfce7d2eSDave Chinner if (error) 901889c65b3SChristoph Hellwig break; 9024e087a3bSChristoph Hellwig if (wpc->iomap.type == IOMAP_HOLE) 903ac8ee546SChristoph Hellwig continue; 90482cb1417SChristoph Hellwig xfs_add_to_ioend(inode, file_offset, page, iop, wpc, wbc, 9053faed667SChristoph Hellwig &submit_list); 906bfce7d2eSDave Chinner count++; 907e2f6ad46SDave Chinner } 908bfce7d2eSDave Chinner 909e10de372SDave Chinner ASSERT(wpc->ioend || list_empty(&submit_list)); 9101b65d3ddSChristoph Hellwig ASSERT(PageLocked(page)); 9111b65d3ddSChristoph Hellwig ASSERT(!PageWriteback(page)); 912bfce7d2eSDave Chinner 913bfce7d2eSDave Chinner /* 91482cb1417SChristoph Hellwig * On error, we have to fail the ioend here because we may have set 91582cb1417SChristoph Hellwig * pages under writeback, we have to make sure we run IO completion to 91682cb1417SChristoph Hellwig * mark the error state of the IO appropriately, so we can't cancel the 91782cb1417SChristoph Hellwig * ioend directly here. That means we have to mark this page as under 91882cb1417SChristoph Hellwig * writeback if we included any blocks from it in the ioend chain so 91982cb1417SChristoph Hellwig * that completion treats it correctly. 920bfce7d2eSDave Chinner * 921e10de372SDave Chinner * If we didn't include the page in the ioend, the on error we can 922e10de372SDave Chinner * simply discard and unlock it as there are no other users of the page 92382cb1417SChristoph Hellwig * now. The caller will still need to trigger submission of outstanding 92482cb1417SChristoph Hellwig * ioends on the writepage context so they are treated correctly on 92582cb1417SChristoph Hellwig * error. 926bfce7d2eSDave Chinner */ 9278e1f065bSChristoph Hellwig if (unlikely(error)) { 9288e1f065bSChristoph Hellwig if (!count) { 9298e1f065bSChristoph Hellwig xfs_aops_discard_page(page); 9308e1f065bSChristoph Hellwig ClearPageUptodate(page); 9318e1f065bSChristoph Hellwig unlock_page(page); 9328e1f065bSChristoph Hellwig goto done; 9338e1f065bSChristoph Hellwig } 9348e1f065bSChristoph Hellwig 9351b65d3ddSChristoph Hellwig /* 9361b65d3ddSChristoph Hellwig * If the page was not fully cleaned, we need to ensure that the 9371b65d3ddSChristoph Hellwig * higher layers come back to it correctly. That means we need 9381b65d3ddSChristoph Hellwig * to keep the page dirty, and for WB_SYNC_ALL writeback we need 9391b65d3ddSChristoph Hellwig * to ensure the PAGECACHE_TAG_TOWRITE index mark is not removed 9401b65d3ddSChristoph Hellwig * so another attempt to write this page in this writeback sweep 9411b65d3ddSChristoph Hellwig * will be made. 9421b65d3ddSChristoph Hellwig */ 9431b65d3ddSChristoph Hellwig set_page_writeback_keepwrite(page); 9441b65d3ddSChristoph Hellwig } else { 9451b65d3ddSChristoph Hellwig clear_page_dirty_for_io(page); 9461b65d3ddSChristoph Hellwig set_page_writeback(page); 9471b65d3ddSChristoph Hellwig } 9488e1f065bSChristoph Hellwig 9491b65d3ddSChristoph Hellwig unlock_page(page); 950e10de372SDave Chinner 951e10de372SDave Chinner /* 952e10de372SDave Chinner * Preserve the original error if there was one, otherwise catch 953e10de372SDave Chinner * submission errors here and propagate into subsequent ioend 954e10de372SDave Chinner * submissions. 955e10de372SDave Chinner */ 956e10de372SDave Chinner list_for_each_entry_safe(ioend, next, &submit_list, io_list) { 957e10de372SDave Chinner int error2; 958e10de372SDave Chinner 959e10de372SDave Chinner list_del_init(&ioend->io_list); 960e10de372SDave Chinner error2 = xfs_submit_ioend(wbc, ioend, error); 961e10de372SDave Chinner if (error2 && !error) 962e10de372SDave Chinner error = error2; 963e10de372SDave Chinner } 964e10de372SDave Chinner 9658e1f065bSChristoph Hellwig /* 96682cb1417SChristoph Hellwig * We can end up here with no error and nothing to write only if we race 96782cb1417SChristoph Hellwig * with a partial page truncate on a sub-page block sized filesystem. 9688e1f065bSChristoph Hellwig */ 9698e1f065bSChristoph Hellwig if (!count) 9708e1f065bSChristoph Hellwig end_page_writeback(page); 9718e1f065bSChristoph Hellwig done: 972bfce7d2eSDave Chinner mapping_set_error(page->mapping, error); 973bfce7d2eSDave Chinner return error; 974bfce7d2eSDave Chinner } 975bfce7d2eSDave Chinner 976c59d87c4SChristoph Hellwig /* 977c59d87c4SChristoph Hellwig * Write out a dirty page. 978c59d87c4SChristoph Hellwig * 979c59d87c4SChristoph Hellwig * For delalloc space on the page we need to allocate space and flush it. 980c59d87c4SChristoph Hellwig * For unwritten space on the page we need to start the conversion to 981c59d87c4SChristoph Hellwig * regular allocated space. 982c59d87c4SChristoph Hellwig */ 983c59d87c4SChristoph Hellwig STATIC int 984fbcc0256SDave Chinner xfs_do_writepage( 985c59d87c4SChristoph Hellwig struct page *page, 986fbcc0256SDave Chinner struct writeback_control *wbc, 987fbcc0256SDave Chinner void *data) 988c59d87c4SChristoph Hellwig { 989fbcc0256SDave Chinner struct xfs_writepage_ctx *wpc = data; 990c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 991c59d87c4SChristoph Hellwig loff_t offset; 992c8ce540dSDarrick J. Wong uint64_t end_offset; 993ad68972aSDave Chinner pgoff_t end_index; 994c59d87c4SChristoph Hellwig 99534097dfeSLukas Czerner trace_xfs_writepage(inode, page, 0, 0); 996c59d87c4SChristoph Hellwig 997c59d87c4SChristoph Hellwig /* 998c59d87c4SChristoph Hellwig * Refuse to write the page out if we are called from reclaim context. 999c59d87c4SChristoph Hellwig * 1000c59d87c4SChristoph Hellwig * This avoids stack overflows when called from deeply used stacks in 1001c59d87c4SChristoph Hellwig * random callers for direct reclaim or memcg reclaim. We explicitly 1002c59d87c4SChristoph Hellwig * allow reclaim from kswapd as the stack usage there is relatively low. 1003c59d87c4SChristoph Hellwig * 100494054fa3SMel Gorman * This should never happen except in the case of a VM regression so 100594054fa3SMel Gorman * warn about it. 1006c59d87c4SChristoph Hellwig */ 100794054fa3SMel Gorman if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == 100894054fa3SMel Gorman PF_MEMALLOC)) 1009c59d87c4SChristoph Hellwig goto redirty; 1010c59d87c4SChristoph Hellwig 1011c59d87c4SChristoph Hellwig /* 1012c59d87c4SChristoph Hellwig * Given that we do not allow direct reclaim to call us, we should 1013c59d87c4SChristoph Hellwig * never be called while in a filesystem transaction. 1014c59d87c4SChristoph Hellwig */ 10159070733bSMichal Hocko if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS)) 1016c59d87c4SChristoph Hellwig goto redirty; 1017c59d87c4SChristoph Hellwig 10188695d27eSJie Liu /* 1019ad68972aSDave Chinner * Is this page beyond the end of the file? 1020ad68972aSDave Chinner * 10218695d27eSJie Liu * The page index is less than the end_index, adjust the end_offset 10228695d27eSJie Liu * to the highest offset that this page should represent. 10238695d27eSJie Liu * ----------------------------------------------------- 10248695d27eSJie Liu * | file mapping | <EOF> | 10258695d27eSJie Liu * ----------------------------------------------------- 10268695d27eSJie Liu * | Page ... | Page N-2 | Page N-1 | Page N | | 10278695d27eSJie Liu * ^--------------------------------^----------|-------- 10288695d27eSJie Liu * | desired writeback range | see else | 10298695d27eSJie Liu * ---------------------------------^------------------| 10308695d27eSJie Liu */ 1031ad68972aSDave Chinner offset = i_size_read(inode); 103209cbfeafSKirill A. Shutemov end_index = offset >> PAGE_SHIFT; 10338695d27eSJie Liu if (page->index < end_index) 103409cbfeafSKirill A. Shutemov end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT; 10358695d27eSJie Liu else { 10368695d27eSJie Liu /* 10378695d27eSJie Liu * Check whether the page to write out is beyond or straddles 10388695d27eSJie Liu * i_size or not. 10398695d27eSJie Liu * ------------------------------------------------------- 10408695d27eSJie Liu * | file mapping | <EOF> | 10418695d27eSJie Liu * ------------------------------------------------------- 10428695d27eSJie Liu * | Page ... | Page N-2 | Page N-1 | Page N | Beyond | 10438695d27eSJie Liu * ^--------------------------------^-----------|--------- 10448695d27eSJie Liu * | | Straddles | 10458695d27eSJie Liu * ---------------------------------^-----------|--------| 10468695d27eSJie Liu */ 104709cbfeafSKirill A. Shutemov unsigned offset_into_page = offset & (PAGE_SIZE - 1); 10486b7a03f0SChristoph Hellwig 10496b7a03f0SChristoph Hellwig /* 1050ff9a28f6SJan Kara * Skip the page if it is fully outside i_size, e.g. due to a 1051ff9a28f6SJan Kara * truncate operation that is in progress. We must redirty the 1052ff9a28f6SJan Kara * page so that reclaim stops reclaiming it. Otherwise 1053ff9a28f6SJan Kara * xfs_vm_releasepage() is called on it and gets confused. 10548695d27eSJie Liu * 10558695d27eSJie Liu * Note that the end_index is unsigned long, it would overflow 10568695d27eSJie Liu * if the given offset is greater than 16TB on 32-bit system 10578695d27eSJie Liu * and if we do check the page is fully outside i_size or not 10588695d27eSJie Liu * via "if (page->index >= end_index + 1)" as "end_index + 1" 10598695d27eSJie Liu * will be evaluated to 0. Hence this page will be redirtied 10608695d27eSJie Liu * and be written out repeatedly which would result in an 10618695d27eSJie Liu * infinite loop, the user program that perform this operation 10628695d27eSJie Liu * will hang. Instead, we can verify this situation by checking 10638695d27eSJie Liu * if the page to write is totally beyond the i_size or if it's 10648695d27eSJie Liu * offset is just equal to the EOF. 10656b7a03f0SChristoph Hellwig */ 10668695d27eSJie Liu if (page->index > end_index || 10678695d27eSJie Liu (page->index == end_index && offset_into_page == 0)) 1068ff9a28f6SJan Kara goto redirty; 10696b7a03f0SChristoph Hellwig 10706b7a03f0SChristoph Hellwig /* 10716b7a03f0SChristoph Hellwig * The page straddles i_size. It must be zeroed out on each 10726b7a03f0SChristoph Hellwig * and every writepage invocation because it may be mmapped. 10736b7a03f0SChristoph Hellwig * "A file is mapped in multiples of the page size. For a file 10746b7a03f0SChristoph Hellwig * that is not a multiple of the page size, the remaining 10756b7a03f0SChristoph Hellwig * memory is zeroed when mapped, and writes to that region are 10766b7a03f0SChristoph Hellwig * not written out to the file." 10776b7a03f0SChristoph Hellwig */ 107809cbfeafSKirill A. Shutemov zero_user_segment(page, offset_into_page, PAGE_SIZE); 10798695d27eSJie Liu 10808695d27eSJie Liu /* Adjust the end_offset to the end of file */ 10818695d27eSJie Liu end_offset = offset; 1082c59d87c4SChristoph Hellwig } 1083c59d87c4SChristoph Hellwig 10842d5f4b5bSDarrick J. Wong return xfs_writepage_map(wpc, wbc, inode, page, end_offset); 1085c59d87c4SChristoph Hellwig 1086c59d87c4SChristoph Hellwig redirty: 1087c59d87c4SChristoph Hellwig redirty_page_for_writepage(wbc, page); 1088c59d87c4SChristoph Hellwig unlock_page(page); 1089c59d87c4SChristoph Hellwig return 0; 1090c59d87c4SChristoph Hellwig } 1091c59d87c4SChristoph Hellwig 1092c59d87c4SChristoph Hellwig STATIC int 1093fbcc0256SDave Chinner xfs_vm_writepage( 1094fbcc0256SDave Chinner struct page *page, 1095fbcc0256SDave Chinner struct writeback_control *wbc) 1096fbcc0256SDave Chinner { 1097be225fecSChristoph Hellwig struct xfs_writepage_ctx wpc = { }; 1098fbcc0256SDave Chinner int ret; 1099fbcc0256SDave Chinner 1100fbcc0256SDave Chinner ret = xfs_do_writepage(page, wbc, &wpc); 1101e10de372SDave Chinner if (wpc.ioend) 1102e10de372SDave Chinner ret = xfs_submit_ioend(wbc, wpc.ioend, ret); 1103e10de372SDave Chinner return ret; 1104fbcc0256SDave Chinner } 1105fbcc0256SDave Chinner 1106fbcc0256SDave Chinner STATIC int 1107c59d87c4SChristoph Hellwig xfs_vm_writepages( 1108c59d87c4SChristoph Hellwig struct address_space *mapping, 1109c59d87c4SChristoph Hellwig struct writeback_control *wbc) 1110c59d87c4SChristoph Hellwig { 1111be225fecSChristoph Hellwig struct xfs_writepage_ctx wpc = { }; 1112fbcc0256SDave Chinner int ret; 1113fbcc0256SDave Chinner 1114c59d87c4SChristoph Hellwig xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 1115fbcc0256SDave Chinner ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc); 1116e10de372SDave Chinner if (wpc.ioend) 1117e10de372SDave Chinner ret = xfs_submit_ioend(wbc, wpc.ioend, ret); 1118e10de372SDave Chinner return ret; 1119c59d87c4SChristoph Hellwig } 1120c59d87c4SChristoph Hellwig 11216e2608dfSDan Williams STATIC int 11226e2608dfSDan Williams xfs_dax_writepages( 11236e2608dfSDan Williams struct address_space *mapping, 11246e2608dfSDan Williams struct writeback_control *wbc) 11256e2608dfSDan Williams { 11266e2608dfSDan Williams xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 11276e2608dfSDan Williams return dax_writeback_mapping_range(mapping, 11286e2608dfSDan Williams xfs_find_bdev_for_inode(mapping->host), wbc); 11296e2608dfSDan Williams } 11306e2608dfSDan Williams 1131c59d87c4SChristoph Hellwig STATIC int 1132c59d87c4SChristoph Hellwig xfs_vm_releasepage( 1133c59d87c4SChristoph Hellwig struct page *page, 1134c59d87c4SChristoph Hellwig gfp_t gfp_mask) 1135c59d87c4SChristoph Hellwig { 113634097dfeSLukas Czerner trace_xfs_releasepage(page->mapping->host, page, 0, 0); 113782cb1417SChristoph Hellwig return iomap_releasepage(page, gfp_mask); 1138c59d87c4SChristoph Hellwig } 1139c59d87c4SChristoph Hellwig 1140c59d87c4SChristoph Hellwig STATIC sector_t 1141c59d87c4SChristoph Hellwig xfs_vm_bmap( 1142c59d87c4SChristoph Hellwig struct address_space *mapping, 1143c59d87c4SChristoph Hellwig sector_t block) 1144c59d87c4SChristoph Hellwig { 1145b84e7722SChristoph Hellwig struct xfs_inode *ip = XFS_I(mapping->host); 1146c59d87c4SChristoph Hellwig 1147b84e7722SChristoph Hellwig trace_xfs_vm_bmap(ip); 1148db1327b1SDarrick J. Wong 1149db1327b1SDarrick J. Wong /* 1150db1327b1SDarrick J. Wong * The swap code (ab-)uses ->bmap to get a block mapping and then 1151793057e1SIngo Molnar * bypasses the file system for actual I/O. We really can't allow 1152db1327b1SDarrick J. Wong * that on reflinks inodes, so we have to skip out here. And yes, 1153eb5e248dSDarrick J. Wong * 0 is the magic code for a bmap error. 1154eb5e248dSDarrick J. Wong * 1155eb5e248dSDarrick J. Wong * Since we don't pass back blockdev info, we can't return bmap 1156eb5e248dSDarrick J. Wong * information for rt files either. 1157db1327b1SDarrick J. Wong */ 115866ae56a5SChristoph Hellwig if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip)) 1159db1327b1SDarrick J. Wong return 0; 1160b84e7722SChristoph Hellwig return iomap_bmap(mapping, block, &xfs_iomap_ops); 1161c59d87c4SChristoph Hellwig } 1162c59d87c4SChristoph Hellwig 1163c59d87c4SChristoph Hellwig STATIC int 1164c59d87c4SChristoph Hellwig xfs_vm_readpage( 1165c59d87c4SChristoph Hellwig struct file *unused, 1166c59d87c4SChristoph Hellwig struct page *page) 1167c59d87c4SChristoph Hellwig { 1168121e213eSDave Chinner trace_xfs_vm_readpage(page->mapping->host, 1); 11698b2e77c1SChristoph Hellwig return iomap_readpage(page, &xfs_iomap_ops); 1170c59d87c4SChristoph Hellwig } 1171c59d87c4SChristoph Hellwig 1172c59d87c4SChristoph Hellwig STATIC int 1173c59d87c4SChristoph Hellwig xfs_vm_readpages( 1174c59d87c4SChristoph Hellwig struct file *unused, 1175c59d87c4SChristoph Hellwig struct address_space *mapping, 1176c59d87c4SChristoph Hellwig struct list_head *pages, 1177c59d87c4SChristoph Hellwig unsigned nr_pages) 1178c59d87c4SChristoph Hellwig { 1179121e213eSDave Chinner trace_xfs_vm_readpages(mapping->host, nr_pages); 11808b2e77c1SChristoph Hellwig return iomap_readpages(mapping, pages, nr_pages, &xfs_iomap_ops); 118122e757a4SDave Chinner } 118222e757a4SDave Chinner 118367482129SDarrick J. Wong static int 118467482129SDarrick J. Wong xfs_iomap_swapfile_activate( 118567482129SDarrick J. Wong struct swap_info_struct *sis, 118667482129SDarrick J. Wong struct file *swap_file, 118767482129SDarrick J. Wong sector_t *span) 118867482129SDarrick J. Wong { 118967482129SDarrick J. Wong sis->bdev = xfs_find_bdev_for_inode(file_inode(swap_file)); 119067482129SDarrick J. Wong return iomap_swapfile_activate(sis, swap_file, span, &xfs_iomap_ops); 119167482129SDarrick J. Wong } 119267482129SDarrick J. Wong 1193c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = { 1194c59d87c4SChristoph Hellwig .readpage = xfs_vm_readpage, 1195c59d87c4SChristoph Hellwig .readpages = xfs_vm_readpages, 1196c59d87c4SChristoph Hellwig .writepage = xfs_vm_writepage, 1197c59d87c4SChristoph Hellwig .writepages = xfs_vm_writepages, 119882cb1417SChristoph Hellwig .set_page_dirty = iomap_set_page_dirty, 1199c59d87c4SChristoph Hellwig .releasepage = xfs_vm_releasepage, 1200c59d87c4SChristoph Hellwig .invalidatepage = xfs_vm_invalidatepage, 1201c59d87c4SChristoph Hellwig .bmap = xfs_vm_bmap, 12026e2608dfSDan Williams .direct_IO = noop_direct_IO, 120382cb1417SChristoph Hellwig .migratepage = iomap_migrate_page, 120482cb1417SChristoph Hellwig .is_partially_uptodate = iomap_is_partially_uptodate, 1205c59d87c4SChristoph Hellwig .error_remove_page = generic_error_remove_page, 120667482129SDarrick J. Wong .swap_activate = xfs_iomap_swapfile_activate, 1207c59d87c4SChristoph Hellwig }; 12086e2608dfSDan Williams 12096e2608dfSDan Williams const struct address_space_operations xfs_dax_aops = { 12106e2608dfSDan Williams .writepages = xfs_dax_writepages, 12116e2608dfSDan Williams .direct_IO = noop_direct_IO, 12126e2608dfSDan Williams .set_page_dirty = noop_set_page_dirty, 12136e2608dfSDan Williams .invalidatepage = noop_invalidatepage, 121467482129SDarrick J. Wong .swap_activate = xfs_iomap_swapfile_activate, 12156e2608dfSDan Williams }; 1216