10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 2c59d87c4SChristoph Hellwig /* 3c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc. 498c1a7c0SChristoph Hellwig * Copyright (c) 2016-2018 Christoph Hellwig. 5c59d87c4SChristoph Hellwig * All Rights Reserved. 6c59d87c4SChristoph Hellwig */ 7c59d87c4SChristoph Hellwig #include "xfs.h" 870a9883cSDave Chinner #include "xfs_shared.h" 9239880efSDave Chinner #include "xfs_format.h" 10239880efSDave Chinner #include "xfs_log_format.h" 11239880efSDave Chinner #include "xfs_trans_resv.h" 12c59d87c4SChristoph Hellwig #include "xfs_mount.h" 13c59d87c4SChristoph Hellwig #include "xfs_inode.h" 14239880efSDave Chinner #include "xfs_trans.h" 15c59d87c4SChristoph Hellwig #include "xfs_iomap.h" 16c59d87c4SChristoph Hellwig #include "xfs_trace.h" 17c59d87c4SChristoph Hellwig #include "xfs_bmap.h" 1868988114SDave Chinner #include "xfs_bmap_util.h" 19ef473667SDarrick J. Wong #include "xfs_reflink.h" 20c59d87c4SChristoph Hellwig 21fbcc0256SDave Chinner struct xfs_writepage_ctx { 22598ecfbaSChristoph Hellwig struct iomap_writepage_ctx ctx; 23d9252d52SBrian Foster unsigned int data_seq; 24e666aa37SChristoph Hellwig unsigned int cow_seq; 25fbcc0256SDave Chinner }; 26fbcc0256SDave Chinner 27598ecfbaSChristoph Hellwig static inline struct xfs_writepage_ctx * 28598ecfbaSChristoph Hellwig XFS_WPC(struct iomap_writepage_ctx *ctx) 29598ecfbaSChristoph Hellwig { 30598ecfbaSChristoph Hellwig return container_of(ctx, struct xfs_writepage_ctx, ctx); 31598ecfbaSChristoph Hellwig } 32598ecfbaSChristoph Hellwig 33c59d87c4SChristoph Hellwig /* 34fc0063c4SChristoph Hellwig * Fast and loose check if this write could update the on-disk inode size. 35fc0063c4SChristoph Hellwig */ 36598ecfbaSChristoph Hellwig static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend) 37fc0063c4SChristoph Hellwig { 38fc0063c4SChristoph Hellwig return ioend->io_offset + ioend->io_size > 39fc0063c4SChristoph Hellwig XFS_I(ioend->io_inode)->i_d.di_size; 40fc0063c4SChristoph Hellwig } 41fc0063c4SChristoph Hellwig 42281627dfSChristoph Hellwig STATIC int 43281627dfSChristoph Hellwig xfs_setfilesize_trans_alloc( 44598ecfbaSChristoph Hellwig struct iomap_ioend *ioend) 45281627dfSChristoph Hellwig { 46281627dfSChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 47281627dfSChristoph Hellwig struct xfs_trans *tp; 48281627dfSChristoph Hellwig int error; 49281627dfSChristoph Hellwig 5073d30d48SChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 51253f4911SChristoph Hellwig if (error) 52281627dfSChristoph Hellwig return error; 53281627dfSChristoph Hellwig 545653017bSChristoph Hellwig ioend->io_private = tp; 55281627dfSChristoph Hellwig 56281627dfSChristoph Hellwig /* 57437a255aSDave Chinner * We may pass freeze protection with a transaction. So tell lockdep 58d9457dc0SJan Kara * we released it. 59d9457dc0SJan Kara */ 60bee9182dSOleg Nesterov __sb_writers_release(ioend->io_inode->i_sb, SB_FREEZE_FS); 61d9457dc0SJan Kara /* 62281627dfSChristoph Hellwig * We hand off the transaction to the completion thread now, so 63281627dfSChristoph Hellwig * clear the flag here. 64281627dfSChristoph Hellwig */ 659070733bSMichal Hocko current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 66281627dfSChristoph Hellwig return 0; 67281627dfSChristoph Hellwig } 68281627dfSChristoph Hellwig 69fc0063c4SChristoph Hellwig /* 702813d682SChristoph Hellwig * Update on-disk file size now that data has been written to disk. 71c59d87c4SChristoph Hellwig */ 72281627dfSChristoph Hellwig STATIC int 73e372843aSChristoph Hellwig __xfs_setfilesize( 742ba66237SChristoph Hellwig struct xfs_inode *ip, 752ba66237SChristoph Hellwig struct xfs_trans *tp, 762ba66237SChristoph Hellwig xfs_off_t offset, 772ba66237SChristoph Hellwig size_t size) 78c59d87c4SChristoph Hellwig { 79c59d87c4SChristoph Hellwig xfs_fsize_t isize; 80c59d87c4SChristoph Hellwig 81aa6bf01dSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 822ba66237SChristoph Hellwig isize = xfs_new_eof(ip, offset + size); 83281627dfSChristoph Hellwig if (!isize) { 84281627dfSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 854906e215SChristoph Hellwig xfs_trans_cancel(tp); 86281627dfSChristoph Hellwig return 0; 87c59d87c4SChristoph Hellwig } 88c59d87c4SChristoph Hellwig 892ba66237SChristoph Hellwig trace_xfs_setfilesize(ip, offset, size); 90281627dfSChristoph Hellwig 91281627dfSChristoph Hellwig ip->i_d.di_size = isize; 92281627dfSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 93281627dfSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 94281627dfSChristoph Hellwig 9570393313SChristoph Hellwig return xfs_trans_commit(tp); 96c59d87c4SChristoph Hellwig } 97c59d87c4SChristoph Hellwig 98e372843aSChristoph Hellwig int 99e372843aSChristoph Hellwig xfs_setfilesize( 100e372843aSChristoph Hellwig struct xfs_inode *ip, 101e372843aSChristoph Hellwig xfs_off_t offset, 102e372843aSChristoph Hellwig size_t size) 103e372843aSChristoph Hellwig { 104e372843aSChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 105e372843aSChristoph Hellwig struct xfs_trans *tp; 106e372843aSChristoph Hellwig int error; 107e372843aSChristoph Hellwig 108e372843aSChristoph Hellwig error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 109e372843aSChristoph Hellwig if (error) 110e372843aSChristoph Hellwig return error; 111e372843aSChristoph Hellwig 112e372843aSChristoph Hellwig return __xfs_setfilesize(ip, tp, offset, size); 113e372843aSChristoph Hellwig } 114e372843aSChristoph Hellwig 1152ba66237SChristoph Hellwig STATIC int 1162ba66237SChristoph Hellwig xfs_setfilesize_ioend( 117598ecfbaSChristoph Hellwig struct iomap_ioend *ioend, 1180e51a8e1SChristoph Hellwig int error) 1192ba66237SChristoph Hellwig { 1202ba66237SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 1215653017bSChristoph Hellwig struct xfs_trans *tp = ioend->io_private; 1222ba66237SChristoph Hellwig 1232ba66237SChristoph Hellwig /* 1242ba66237SChristoph Hellwig * The transaction may have been allocated in the I/O submission thread, 1252ba66237SChristoph Hellwig * thus we need to mark ourselves as being in a transaction manually. 1262ba66237SChristoph Hellwig * Similarly for freeze protection. 1272ba66237SChristoph Hellwig */ 1289070733bSMichal Hocko current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS); 129bee9182dSOleg Nesterov __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS); 1302ba66237SChristoph Hellwig 1315cb13dcdSZhaohongjiang /* we abort the update if there was an IO error */ 1320e51a8e1SChristoph Hellwig if (error) { 1335cb13dcdSZhaohongjiang xfs_trans_cancel(tp); 1340e51a8e1SChristoph Hellwig return error; 1355cb13dcdSZhaohongjiang } 1365cb13dcdSZhaohongjiang 137e372843aSChristoph Hellwig return __xfs_setfilesize(ip, tp, ioend->io_offset, ioend->io_size); 1382ba66237SChristoph Hellwig } 1392ba66237SChristoph Hellwig 140c59d87c4SChristoph Hellwig /* 141c59d87c4SChristoph Hellwig * IO write completion. 142c59d87c4SChristoph Hellwig */ 143c59d87c4SChristoph Hellwig STATIC void 144cb357bf3SDarrick J. Wong xfs_end_ioend( 145598ecfbaSChristoph Hellwig struct iomap_ioend *ioend) 146c59d87c4SChristoph Hellwig { 147c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 148787eb485SChristoph Hellwig xfs_off_t offset = ioend->io_offset; 149787eb485SChristoph Hellwig size_t size = ioend->io_size; 15073d30d48SChristoph Hellwig unsigned int nofs_flag; 1514e4cbee9SChristoph Hellwig int error; 152c59d87c4SChristoph Hellwig 153af055e37SBrian Foster /* 15473d30d48SChristoph Hellwig * We can allocate memory here while doing writeback on behalf of 15573d30d48SChristoph Hellwig * memory reclaim. To avoid memory allocation deadlocks set the 15673d30d48SChristoph Hellwig * task-wide nofs context for the following operations. 15773d30d48SChristoph Hellwig */ 15873d30d48SChristoph Hellwig nofs_flag = memalloc_nofs_save(); 15973d30d48SChristoph Hellwig 16073d30d48SChristoph Hellwig /* 161787eb485SChristoph Hellwig * Just clean up the in-memory strutures if the fs has been shut down. 162af055e37SBrian Foster */ 163787eb485SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 1640e51a8e1SChristoph Hellwig error = -EIO; 16543caeb18SDarrick J. Wong goto done; 16643caeb18SDarrick J. Wong } 16743caeb18SDarrick J. Wong 16843caeb18SDarrick J. Wong /* 169787eb485SChristoph Hellwig * Clean up any COW blocks on an I/O error. 170c59d87c4SChristoph Hellwig */ 1714e4cbee9SChristoph Hellwig error = blk_status_to_errno(ioend->io_bio->bi_status); 172787eb485SChristoph Hellwig if (unlikely(error)) { 173760fea8bSChristoph Hellwig if (ioend->io_flags & IOMAP_F_SHARED) 174787eb485SChristoph Hellwig xfs_reflink_cancel_cow_range(ip, offset, size, true); 1755cb13dcdSZhaohongjiang goto done; 176787eb485SChristoph Hellwig } 177787eb485SChristoph Hellwig 178787eb485SChristoph Hellwig /* 179787eb485SChristoph Hellwig * Success: commit the COW or unwritten blocks if needed. 180787eb485SChristoph Hellwig */ 181760fea8bSChristoph Hellwig if (ioend->io_flags & IOMAP_F_SHARED) 182787eb485SChristoph Hellwig error = xfs_reflink_end_cow(ip, offset, size); 1834e087a3bSChristoph Hellwig else if (ioend->io_type == IOMAP_UNWRITTEN) 184ee70daabSEryu Guan error = xfs_iomap_write_unwritten(ip, offset, size, false); 185be225fecSChristoph Hellwig else 1865653017bSChristoph Hellwig ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_private); 18784803fb7SChristoph Hellwig 18804f658eeSChristoph Hellwig done: 1895653017bSChristoph Hellwig if (ioend->io_private) 190787eb485SChristoph Hellwig error = xfs_setfilesize_ioend(ioend, error); 191598ecfbaSChristoph Hellwig iomap_finish_ioends(ioend, error); 19273d30d48SChristoph Hellwig memalloc_nofs_restore(nofs_flag); 1933994fc48SDarrick J. Wong } 1943994fc48SDarrick J. Wong 1953994fc48SDarrick J. Wong /* 1967dbae9fbSChristoph Hellwig * If the to be merged ioend has a preallocated transaction for file 1977dbae9fbSChristoph Hellwig * size updates we need to ensure the ioend it is merged into also 1987dbae9fbSChristoph Hellwig * has one. If it already has one we can simply cancel the transaction 1997dbae9fbSChristoph Hellwig * as it is guaranteed to be clean. 2007dbae9fbSChristoph Hellwig */ 2017dbae9fbSChristoph Hellwig static void 2025653017bSChristoph Hellwig xfs_ioend_merge_private( 203598ecfbaSChristoph Hellwig struct iomap_ioend *ioend, 204598ecfbaSChristoph Hellwig struct iomap_ioend *next) 2057dbae9fbSChristoph Hellwig { 2065653017bSChristoph Hellwig if (!ioend->io_private) { 2075653017bSChristoph Hellwig ioend->io_private = next->io_private; 2085653017bSChristoph Hellwig next->io_private = NULL; 2097dbae9fbSChristoph Hellwig } else { 2107dbae9fbSChristoph Hellwig xfs_setfilesize_ioend(next, -ECANCELED); 2117dbae9fbSChristoph Hellwig } 2127dbae9fbSChristoph Hellwig } 2137dbae9fbSChristoph Hellwig 214cb357bf3SDarrick J. Wong /* Finish all pending io completions. */ 215cb357bf3SDarrick J. Wong void 216cb357bf3SDarrick J. Wong xfs_end_io( 217cb357bf3SDarrick J. Wong struct work_struct *work) 218cb357bf3SDarrick J. Wong { 219433dad94SChristoph Hellwig struct xfs_inode *ip = 220433dad94SChristoph Hellwig container_of(work, struct xfs_inode, i_ioend_work); 221598ecfbaSChristoph Hellwig struct iomap_ioend *ioend; 222433dad94SChristoph Hellwig struct list_head tmp; 223cb357bf3SDarrick J. Wong unsigned long flags; 224cb357bf3SDarrick J. Wong 225cb357bf3SDarrick J. Wong spin_lock_irqsave(&ip->i_ioend_lock, flags); 226433dad94SChristoph Hellwig list_replace_init(&ip->i_ioend_list, &tmp); 227cb357bf3SDarrick J. Wong spin_unlock_irqrestore(&ip->i_ioend_lock, flags); 228cb357bf3SDarrick J. Wong 229598ecfbaSChristoph Hellwig iomap_sort_ioends(&tmp); 230598ecfbaSChristoph Hellwig while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend, 231433dad94SChristoph Hellwig io_list))) { 232cb357bf3SDarrick J. Wong list_del_init(&ioend->io_list); 233598ecfbaSChristoph Hellwig iomap_ioend_try_merge(ioend, &tmp, xfs_ioend_merge_private); 234cb357bf3SDarrick J. Wong xfs_end_ioend(ioend); 235cb357bf3SDarrick J. Wong } 236cb357bf3SDarrick J. Wong } 237cb357bf3SDarrick J. Wong 238598ecfbaSChristoph Hellwig static inline bool xfs_ioend_needs_workqueue(struct iomap_ioend *ioend) 239760fea8bSChristoph Hellwig { 240760fea8bSChristoph Hellwig return ioend->io_private || 241760fea8bSChristoph Hellwig ioend->io_type == IOMAP_UNWRITTEN || 242760fea8bSChristoph Hellwig (ioend->io_flags & IOMAP_F_SHARED); 243760fea8bSChristoph Hellwig } 244760fea8bSChristoph Hellwig 2450e51a8e1SChristoph Hellwig STATIC void 2460e51a8e1SChristoph Hellwig xfs_end_bio( 2470e51a8e1SChristoph Hellwig struct bio *bio) 248c59d87c4SChristoph Hellwig { 249598ecfbaSChristoph Hellwig struct iomap_ioend *ioend = bio->bi_private; 250cb357bf3SDarrick J. Wong struct xfs_inode *ip = XFS_I(ioend->io_inode); 251cb357bf3SDarrick J. Wong unsigned long flags; 252c59d87c4SChristoph Hellwig 253598ecfbaSChristoph Hellwig ASSERT(xfs_ioend_needs_workqueue(ioend)); 254598ecfbaSChristoph Hellwig 255cb357bf3SDarrick J. Wong spin_lock_irqsave(&ip->i_ioend_lock, flags); 256cb357bf3SDarrick J. Wong if (list_empty(&ip->i_ioend_list)) 257598ecfbaSChristoph Hellwig WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue, 258cb357bf3SDarrick J. Wong &ip->i_ioend_work)); 259cb357bf3SDarrick J. Wong list_add_tail(&ioend->io_list, &ip->i_ioend_list); 260cb357bf3SDarrick J. Wong spin_unlock_irqrestore(&ip->i_ioend_lock, flags); 261c59d87c4SChristoph Hellwig } 262c59d87c4SChristoph Hellwig 263d9252d52SBrian Foster /* 264d9252d52SBrian Foster * Fast revalidation of the cached writeback mapping. Return true if the current 265d9252d52SBrian Foster * mapping is valid, false otherwise. 266d9252d52SBrian Foster */ 267d9252d52SBrian Foster static bool 268d9252d52SBrian Foster xfs_imap_valid( 269598ecfbaSChristoph Hellwig struct iomap_writepage_ctx *wpc, 270d9252d52SBrian Foster struct xfs_inode *ip, 2714e087a3bSChristoph Hellwig loff_t offset) 272d9252d52SBrian Foster { 2734e087a3bSChristoph Hellwig if (offset < wpc->iomap.offset || 2744e087a3bSChristoph Hellwig offset >= wpc->iomap.offset + wpc->iomap.length) 275d9252d52SBrian Foster return false; 276d9252d52SBrian Foster /* 277d9252d52SBrian Foster * If this is a COW mapping, it is sufficient to check that the mapping 278d9252d52SBrian Foster * covers the offset. Be careful to check this first because the caller 279d9252d52SBrian Foster * can revalidate a COW mapping without updating the data seqno. 280d9252d52SBrian Foster */ 281760fea8bSChristoph Hellwig if (wpc->iomap.flags & IOMAP_F_SHARED) 282d9252d52SBrian Foster return true; 283d9252d52SBrian Foster 284d9252d52SBrian Foster /* 285d9252d52SBrian Foster * This is not a COW mapping. Check the sequence number of the data fork 286d9252d52SBrian Foster * because concurrent changes could have invalidated the extent. Check 287d9252d52SBrian Foster * the COW fork because concurrent changes since the last time we 288d9252d52SBrian Foster * checked (and found nothing at this offset) could have added 289d9252d52SBrian Foster * overlapping blocks. 290d9252d52SBrian Foster */ 291598ecfbaSChristoph Hellwig if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) 292d9252d52SBrian Foster return false; 293d9252d52SBrian Foster if (xfs_inode_has_cow_data(ip) && 294598ecfbaSChristoph Hellwig XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) 295d9252d52SBrian Foster return false; 296d9252d52SBrian Foster return true; 297d9252d52SBrian Foster } 298d9252d52SBrian Foster 2994ad765edSChristoph Hellwig /* 3004ad765edSChristoph Hellwig * Pass in a dellalloc extent and convert it to real extents, return the real 3014e087a3bSChristoph Hellwig * extent that maps offset_fsb in wpc->iomap. 3024ad765edSChristoph Hellwig * 3034ad765edSChristoph Hellwig * The current page is held locked so nothing could have removed the block 3047588cbeeSChristoph Hellwig * backing offset_fsb, although it could have moved from the COW to the data 3057588cbeeSChristoph Hellwig * fork by another thread. 3064ad765edSChristoph Hellwig */ 3074ad765edSChristoph Hellwig static int 3084ad765edSChristoph Hellwig xfs_convert_blocks( 309598ecfbaSChristoph Hellwig struct iomap_writepage_ctx *wpc, 3104ad765edSChristoph Hellwig struct xfs_inode *ip, 311760fea8bSChristoph Hellwig int whichfork, 3124e087a3bSChristoph Hellwig loff_t offset) 3134ad765edSChristoph Hellwig { 3144ad765edSChristoph Hellwig int error; 315598ecfbaSChristoph Hellwig unsigned *seq; 316598ecfbaSChristoph Hellwig 317598ecfbaSChristoph Hellwig if (whichfork == XFS_COW_FORK) 318598ecfbaSChristoph Hellwig seq = &XFS_WPC(wpc)->cow_seq; 319598ecfbaSChristoph Hellwig else 320598ecfbaSChristoph Hellwig seq = &XFS_WPC(wpc)->data_seq; 3214ad765edSChristoph Hellwig 3224ad765edSChristoph Hellwig /* 3234e087a3bSChristoph Hellwig * Attempt to allocate whatever delalloc extent currently backs offset 3244e087a3bSChristoph Hellwig * and put the result into wpc->iomap. Allocate in a loop because it 3254e087a3bSChristoph Hellwig * may take several attempts to allocate real blocks for a contiguous 3264e087a3bSChristoph Hellwig * delalloc extent if free space is sufficiently fragmented. 3274ad765edSChristoph Hellwig */ 3284ad765edSChristoph Hellwig do { 329760fea8bSChristoph Hellwig error = xfs_bmapi_convert_delalloc(ip, whichfork, offset, 330598ecfbaSChristoph Hellwig &wpc->iomap, seq); 3314ad765edSChristoph Hellwig if (error) 3324ad765edSChristoph Hellwig return error; 3334e087a3bSChristoph Hellwig } while (wpc->iomap.offset + wpc->iomap.length <= offset); 3344ad765edSChristoph Hellwig 3354ad765edSChristoph Hellwig return 0; 3364ad765edSChristoph Hellwig } 3374ad765edSChristoph Hellwig 338598ecfbaSChristoph Hellwig static int 339c59d87c4SChristoph Hellwig xfs_map_blocks( 340598ecfbaSChristoph Hellwig struct iomap_writepage_ctx *wpc, 341c59d87c4SChristoph Hellwig struct inode *inode, 3425c665e5bSChristoph Hellwig loff_t offset) 343c59d87c4SChristoph Hellwig { 344c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 345c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 34693407472SFabian Frederick ssize_t count = i_blocksize(inode); 347b4e29032SChristoph Hellwig xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 348b4e29032SChristoph Hellwig xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); 349*c2f09217SDarrick J. Wong xfs_fileoff_t cow_fsb; 350*c2f09217SDarrick J. Wong int whichfork; 3515c665e5bSChristoph Hellwig struct xfs_bmbt_irec imap; 352060d4eaaSChristoph Hellwig struct xfs_iext_cursor icur; 3537588cbeeSChristoph Hellwig int retries = 0; 354c59d87c4SChristoph Hellwig int error = 0; 355c59d87c4SChristoph Hellwig 356d9252d52SBrian Foster if (XFS_FORCED_SHUTDOWN(mp)) 357d9252d52SBrian Foster return -EIO; 358d9252d52SBrian Foster 359889c65b3SChristoph Hellwig /* 360889c65b3SChristoph Hellwig * COW fork blocks can overlap data fork blocks even if the blocks 361889c65b3SChristoph Hellwig * aren't shared. COW I/O always takes precedent, so we must always 362889c65b3SChristoph Hellwig * check for overlap on reflink inodes unless the mapping is already a 363e666aa37SChristoph Hellwig * COW one, or the COW fork hasn't changed from the last time we looked 364e666aa37SChristoph Hellwig * at it. 365e666aa37SChristoph Hellwig * 366e666aa37SChristoph Hellwig * It's safe to check the COW fork if_seq here without the ILOCK because 367e666aa37SChristoph Hellwig * we've indirectly protected against concurrent updates: writeback has 368e666aa37SChristoph Hellwig * the page locked, which prevents concurrent invalidations by reflink 369e666aa37SChristoph Hellwig * and directio and prevents concurrent buffered writes to the same 370e666aa37SChristoph Hellwig * page. Changes to if_seq always happen under i_lock, which protects 371e666aa37SChristoph Hellwig * against concurrent updates and provides a memory barrier on the way 372e666aa37SChristoph Hellwig * out that ensures that we always see the current value. 373889c65b3SChristoph Hellwig */ 3744e087a3bSChristoph Hellwig if (xfs_imap_valid(wpc, ip, offset)) 375889c65b3SChristoph Hellwig return 0; 376889c65b3SChristoph Hellwig 377889c65b3SChristoph Hellwig /* 378889c65b3SChristoph Hellwig * If we don't have a valid map, now it's time to get a new one for this 379889c65b3SChristoph Hellwig * offset. This will convert delayed allocations (including COW ones) 380889c65b3SChristoph Hellwig * into real extents. If we return without a valid map, it means we 381889c65b3SChristoph Hellwig * landed in a hole and we skip the block. 382889c65b3SChristoph Hellwig */ 3837588cbeeSChristoph Hellwig retry: 384*c2f09217SDarrick J. Wong cow_fsb = NULLFILEOFF; 385*c2f09217SDarrick J. Wong whichfork = XFS_DATA_FORK; 386c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 387f7e67b20SChristoph Hellwig ASSERT(ip->i_df.if_format != XFS_DINODE_FMT_BTREE || 388c59d87c4SChristoph Hellwig (ip->i_df.if_flags & XFS_IFEXTENTS)); 389060d4eaaSChristoph Hellwig 390060d4eaaSChristoph Hellwig /* 391060d4eaaSChristoph Hellwig * Check if this is offset is covered by a COW extents, and if yes use 392060d4eaaSChristoph Hellwig * it directly instead of looking up anything in the data fork. 393060d4eaaSChristoph Hellwig */ 39451d62690SChristoph Hellwig if (xfs_inode_has_cow_data(ip) && 395e666aa37SChristoph Hellwig xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap)) 396e666aa37SChristoph Hellwig cow_fsb = imap.br_startoff; 397e666aa37SChristoph Hellwig if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { 398598ecfbaSChristoph Hellwig XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq); 3995c665e5bSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 400be225fecSChristoph Hellwig 401760fea8bSChristoph Hellwig whichfork = XFS_COW_FORK; 4025c665e5bSChristoph Hellwig goto allocate_blocks; 4035c665e5bSChristoph Hellwig } 4045c665e5bSChristoph Hellwig 4055c665e5bSChristoph Hellwig /* 406d9252d52SBrian Foster * No COW extent overlap. Revalidate now that we may have updated 407d9252d52SBrian Foster * ->cow_seq. If the data mapping is still valid, we're done. 4085c665e5bSChristoph Hellwig */ 4094e087a3bSChristoph Hellwig if (xfs_imap_valid(wpc, ip, offset)) { 4105c665e5bSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 4115c665e5bSChristoph Hellwig return 0; 4125c665e5bSChristoph Hellwig } 4135c665e5bSChristoph Hellwig 4145c665e5bSChristoph Hellwig /* 4155c665e5bSChristoph Hellwig * If we don't have a valid map, now it's time to get a new one for this 4165c665e5bSChristoph Hellwig * offset. This will convert delayed allocations (including COW ones) 4175c665e5bSChristoph Hellwig * into real extents. 4185c665e5bSChristoph Hellwig */ 4193345746eSChristoph Hellwig if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) 4203345746eSChristoph Hellwig imap.br_startoff = end_fsb; /* fake a hole past EOF */ 421598ecfbaSChristoph Hellwig XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq); 422c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 423c59d87c4SChristoph Hellwig 42412df89f2SChristoph Hellwig /* landed in a hole or beyond EOF? */ 4253345746eSChristoph Hellwig if (imap.br_startoff > offset_fsb) { 4263345746eSChristoph Hellwig imap.br_blockcount = imap.br_startoff - offset_fsb; 4275c665e5bSChristoph Hellwig imap.br_startoff = offset_fsb; 4285c665e5bSChristoph Hellwig imap.br_startblock = HOLESTARTBLOCK; 429be225fecSChristoph Hellwig imap.br_state = XFS_EXT_NORM; 43012df89f2SChristoph Hellwig } 43112df89f2SChristoph Hellwig 432e666aa37SChristoph Hellwig /* 43312df89f2SChristoph Hellwig * Truncate to the next COW extent if there is one. This is the only 43412df89f2SChristoph Hellwig * opportunity to do this because we can skip COW fork lookups for the 43512df89f2SChristoph Hellwig * subsequent blocks in the mapping; however, the requirement to treat 43612df89f2SChristoph Hellwig * the COW range separately remains. 437e666aa37SChristoph Hellwig */ 438e666aa37SChristoph Hellwig if (cow_fsb != NULLFILEOFF && 439e666aa37SChristoph Hellwig cow_fsb < imap.br_startoff + imap.br_blockcount) 440e666aa37SChristoph Hellwig imap.br_blockcount = cow_fsb - imap.br_startoff; 441e666aa37SChristoph Hellwig 442be225fecSChristoph Hellwig /* got a delalloc extent? */ 44312df89f2SChristoph Hellwig if (imap.br_startblock != HOLESTARTBLOCK && 44412df89f2SChristoph Hellwig isnullstartblock(imap.br_startblock)) 4455c665e5bSChristoph Hellwig goto allocate_blocks; 446e2f6ad46SDave Chinner 4474e087a3bSChristoph Hellwig xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0); 448760fea8bSChristoph Hellwig trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); 4495c665e5bSChristoph Hellwig return 0; 4505c665e5bSChristoph Hellwig allocate_blocks: 451760fea8bSChristoph Hellwig error = xfs_convert_blocks(wpc, ip, whichfork, offset); 4527588cbeeSChristoph Hellwig if (error) { 4537588cbeeSChristoph Hellwig /* 4547588cbeeSChristoph Hellwig * If we failed to find the extent in the COW fork we might have 4557588cbeeSChristoph Hellwig * raced with a COW to data fork conversion or truncate. 4567588cbeeSChristoph Hellwig * Restart the lookup to catch the extent in the data fork for 4577588cbeeSChristoph Hellwig * the former case, but prevent additional retries to avoid 4587588cbeeSChristoph Hellwig * looping forever for the latter case. 4597588cbeeSChristoph Hellwig */ 460760fea8bSChristoph Hellwig if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++) 4617588cbeeSChristoph Hellwig goto retry; 4627588cbeeSChristoph Hellwig ASSERT(error != -EAGAIN); 4635c665e5bSChristoph Hellwig return error; 4647588cbeeSChristoph Hellwig } 4654ad765edSChristoph Hellwig 4664ad765edSChristoph Hellwig /* 4674ad765edSChristoph Hellwig * Due to merging the return real extent might be larger than the 4684ad765edSChristoph Hellwig * original delalloc one. Trim the return extent to the next COW 4694ad765edSChristoph Hellwig * boundary again to force a re-lookup. 4704ad765edSChristoph Hellwig */ 471760fea8bSChristoph Hellwig if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) { 4724e087a3bSChristoph Hellwig loff_t cow_offset = XFS_FSB_TO_B(mp, cow_fsb); 4734ad765edSChristoph Hellwig 4744e087a3bSChristoph Hellwig if (cow_offset < wpc->iomap.offset + wpc->iomap.length) 4754e087a3bSChristoph Hellwig wpc->iomap.length = cow_offset - wpc->iomap.offset; 4764e087a3bSChristoph Hellwig } 4774e087a3bSChristoph Hellwig 4784e087a3bSChristoph Hellwig ASSERT(wpc->iomap.offset <= offset); 4794e087a3bSChristoph Hellwig ASSERT(wpc->iomap.offset + wpc->iomap.length > offset); 480760fea8bSChristoph Hellwig trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap); 481c59d87c4SChristoph Hellwig return 0; 482c59d87c4SChristoph Hellwig } 483c59d87c4SChristoph Hellwig 484598ecfbaSChristoph Hellwig static int 485598ecfbaSChristoph Hellwig xfs_prepare_ioend( 486598ecfbaSChristoph Hellwig struct iomap_ioend *ioend, 487e10de372SDave Chinner int status) 488c59d87c4SChristoph Hellwig { 48973d30d48SChristoph Hellwig unsigned int nofs_flag; 49073d30d48SChristoph Hellwig 49173d30d48SChristoph Hellwig /* 49273d30d48SChristoph Hellwig * We can allocate memory here while doing writeback on behalf of 49373d30d48SChristoph Hellwig * memory reclaim. To avoid memory allocation deadlocks set the 49473d30d48SChristoph Hellwig * task-wide nofs context for the following operations. 49573d30d48SChristoph Hellwig */ 49673d30d48SChristoph Hellwig nofs_flag = memalloc_nofs_save(); 49773d30d48SChristoph Hellwig 4985eda4300SDarrick J. Wong /* Convert CoW extents to regular */ 499760fea8bSChristoph Hellwig if (!status && (ioend->io_flags & IOMAP_F_SHARED)) { 5005eda4300SDarrick J. Wong status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), 5015eda4300SDarrick J. Wong ioend->io_offset, ioend->io_size); 5025eda4300SDarrick J. Wong } 5035eda4300SDarrick J. Wong 504e10de372SDave Chinner /* Reserve log space if we might write beyond the on-disk inode size. */ 505e10de372SDave Chinner if (!status && 506760fea8bSChristoph Hellwig ((ioend->io_flags & IOMAP_F_SHARED) || 5074e087a3bSChristoph Hellwig ioend->io_type != IOMAP_UNWRITTEN) && 508bb18782aSDave Chinner xfs_ioend_is_append(ioend) && 5095653017bSChristoph Hellwig !ioend->io_private) 510e10de372SDave Chinner status = xfs_setfilesize_trans_alloc(ioend); 511bb18782aSDave Chinner 51273d30d48SChristoph Hellwig memalloc_nofs_restore(nofs_flag); 51373d30d48SChristoph Hellwig 514598ecfbaSChristoph Hellwig if (xfs_ioend_needs_workqueue(ioend)) 5150e51a8e1SChristoph Hellwig ioend->io_bio->bi_end_io = xfs_end_bio; 516e10de372SDave Chinner return status; 5177bf7f352SDave Chinner } 5187bf7f352SDave Chinner 519c59d87c4SChristoph Hellwig /* 52082cb1417SChristoph Hellwig * If the page has delalloc blocks on it, we need to punch them out before we 521c59d87c4SChristoph Hellwig * invalidate the page. If we don't, we leave a stale delalloc mapping on the 52282cb1417SChristoph Hellwig * inode that can trip up a later direct I/O read operation on the same region. 523c59d87c4SChristoph Hellwig * 52482cb1417SChristoph Hellwig * We prevent this by truncating away the delalloc regions on the page. Because 52582cb1417SChristoph Hellwig * they are delalloc, we can do this without needing a transaction. Indeed - if 52682cb1417SChristoph Hellwig * we get ENOSPC errors, we have to be able to do this truncation without a 52782cb1417SChristoph Hellwig * transaction as there is no space left for block reservation (typically why we 52882cb1417SChristoph Hellwig * see a ENOSPC in writeback). 529c59d87c4SChristoph Hellwig */ 530598ecfbaSChristoph Hellwig static void 531598ecfbaSChristoph Hellwig xfs_discard_page( 532763e4cdcSBrian Foster struct page *page, 533763e4cdcSBrian Foster loff_t fileoff) 534c59d87c4SChristoph Hellwig { 535c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 536c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 53703625721SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 538763e4cdcSBrian Foster unsigned int pageoff = offset_in_page(fileoff); 539763e4cdcSBrian Foster xfs_fileoff_t start_fsb = XFS_B_TO_FSBT(mp, fileoff); 540763e4cdcSBrian Foster xfs_fileoff_t pageoff_fsb = XFS_B_TO_FSBT(mp, pageoff); 54103625721SChristoph Hellwig int error; 542c59d87c4SChristoph Hellwig 54303625721SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 544c59d87c4SChristoph Hellwig goto out_invalidate; 545c59d87c4SChristoph Hellwig 5464ab45e25SChristoph Hellwig xfs_alert_ratelimited(mp, 547c9690043SDarrick J. Wong "page discard on page "PTR_FMT", inode 0x%llx, offset %llu.", 548763e4cdcSBrian Foster page, ip->i_ino, fileoff); 549c59d87c4SChristoph Hellwig 55003625721SChristoph Hellwig error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 551763e4cdcSBrian Foster i_blocks_per_page(inode, page) - pageoff_fsb); 55203625721SChristoph Hellwig if (error && !XFS_FORCED_SHUTDOWN(mp)) 55303625721SChristoph Hellwig xfs_alert(mp, "page discard unable to remove delalloc mapping."); 554c59d87c4SChristoph Hellwig out_invalidate: 555763e4cdcSBrian Foster iomap_invalidatepage(page, pageoff, PAGE_SIZE - pageoff); 556c59d87c4SChristoph Hellwig } 557c59d87c4SChristoph Hellwig 558598ecfbaSChristoph Hellwig static const struct iomap_writeback_ops xfs_writeback_ops = { 559598ecfbaSChristoph Hellwig .map_blocks = xfs_map_blocks, 560598ecfbaSChristoph Hellwig .prepare_ioend = xfs_prepare_ioend, 561598ecfbaSChristoph Hellwig .discard_page = xfs_discard_page, 562598ecfbaSChristoph Hellwig }; 563c59d87c4SChristoph Hellwig 564c59d87c4SChristoph Hellwig STATIC int 565fbcc0256SDave Chinner xfs_vm_writepage( 566fbcc0256SDave Chinner struct page *page, 567fbcc0256SDave Chinner struct writeback_control *wbc) 568fbcc0256SDave Chinner { 569be225fecSChristoph Hellwig struct xfs_writepage_ctx wpc = { }; 570fbcc0256SDave Chinner 571598ecfbaSChristoph Hellwig return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops); 572fbcc0256SDave Chinner } 573fbcc0256SDave Chinner 574fbcc0256SDave Chinner STATIC int 575c59d87c4SChristoph Hellwig xfs_vm_writepages( 576c59d87c4SChristoph Hellwig struct address_space *mapping, 577c59d87c4SChristoph Hellwig struct writeback_control *wbc) 578c59d87c4SChristoph Hellwig { 579be225fecSChristoph Hellwig struct xfs_writepage_ctx wpc = { }; 580fbcc0256SDave Chinner 581c59d87c4SChristoph Hellwig xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 582598ecfbaSChristoph Hellwig return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); 583c59d87c4SChristoph Hellwig } 584c59d87c4SChristoph Hellwig 5856e2608dfSDan Williams STATIC int 5866e2608dfSDan Williams xfs_dax_writepages( 5876e2608dfSDan Williams struct address_space *mapping, 5886e2608dfSDan Williams struct writeback_control *wbc) 5896e2608dfSDan Williams { 59030fa529eSChristoph Hellwig struct xfs_inode *ip = XFS_I(mapping->host); 59130fa529eSChristoph Hellwig 59230fa529eSChristoph Hellwig xfs_iflags_clear(ip, XFS_ITRUNCATED); 5936e2608dfSDan Williams return dax_writeback_mapping_range(mapping, 5943f666c56SVivek Goyal xfs_inode_buftarg(ip)->bt_daxdev, wbc); 5956e2608dfSDan Williams } 5966e2608dfSDan Williams 597c59d87c4SChristoph Hellwig STATIC sector_t 598c59d87c4SChristoph Hellwig xfs_vm_bmap( 599c59d87c4SChristoph Hellwig struct address_space *mapping, 600c59d87c4SChristoph Hellwig sector_t block) 601c59d87c4SChristoph Hellwig { 602b84e7722SChristoph Hellwig struct xfs_inode *ip = XFS_I(mapping->host); 603c59d87c4SChristoph Hellwig 604b84e7722SChristoph Hellwig trace_xfs_vm_bmap(ip); 605db1327b1SDarrick J. Wong 606db1327b1SDarrick J. Wong /* 607db1327b1SDarrick J. Wong * The swap code (ab-)uses ->bmap to get a block mapping and then 608793057e1SIngo Molnar * bypasses the file system for actual I/O. We really can't allow 609db1327b1SDarrick J. Wong * that on reflinks inodes, so we have to skip out here. And yes, 610eb5e248dSDarrick J. Wong * 0 is the magic code for a bmap error. 611eb5e248dSDarrick J. Wong * 612eb5e248dSDarrick J. Wong * Since we don't pass back blockdev info, we can't return bmap 613eb5e248dSDarrick J. Wong * information for rt files either. 614db1327b1SDarrick J. Wong */ 61566ae56a5SChristoph Hellwig if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip)) 616db1327b1SDarrick J. Wong return 0; 617690c2a38SChristoph Hellwig return iomap_bmap(mapping, block, &xfs_read_iomap_ops); 618c59d87c4SChristoph Hellwig } 619c59d87c4SChristoph Hellwig 620c59d87c4SChristoph Hellwig STATIC int 621c59d87c4SChristoph Hellwig xfs_vm_readpage( 622c59d87c4SChristoph Hellwig struct file *unused, 623c59d87c4SChristoph Hellwig struct page *page) 624c59d87c4SChristoph Hellwig { 625690c2a38SChristoph Hellwig return iomap_readpage(page, &xfs_read_iomap_ops); 626c59d87c4SChristoph Hellwig } 627c59d87c4SChristoph Hellwig 6289d24a13aSMatthew Wilcox (Oracle) STATIC void 6299d24a13aSMatthew Wilcox (Oracle) xfs_vm_readahead( 6309d24a13aSMatthew Wilcox (Oracle) struct readahead_control *rac) 631c59d87c4SChristoph Hellwig { 6329d24a13aSMatthew Wilcox (Oracle) iomap_readahead(rac, &xfs_read_iomap_ops); 63322e757a4SDave Chinner } 63422e757a4SDave Chinner 63567482129SDarrick J. Wong static int 63667482129SDarrick J. Wong xfs_iomap_swapfile_activate( 63767482129SDarrick J. Wong struct swap_info_struct *sis, 63867482129SDarrick J. Wong struct file *swap_file, 63967482129SDarrick J. Wong sector_t *span) 64067482129SDarrick J. Wong { 64130fa529eSChristoph Hellwig sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; 642690c2a38SChristoph Hellwig return iomap_swapfile_activate(sis, swap_file, span, 643690c2a38SChristoph Hellwig &xfs_read_iomap_ops); 64467482129SDarrick J. Wong } 64567482129SDarrick J. Wong 646c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = { 647c59d87c4SChristoph Hellwig .readpage = xfs_vm_readpage, 6489d24a13aSMatthew Wilcox (Oracle) .readahead = xfs_vm_readahead, 649c59d87c4SChristoph Hellwig .writepage = xfs_vm_writepage, 650c59d87c4SChristoph Hellwig .writepages = xfs_vm_writepages, 65182cb1417SChristoph Hellwig .set_page_dirty = iomap_set_page_dirty, 6529e91c572SChristoph Hellwig .releasepage = iomap_releasepage, 6539e91c572SChristoph Hellwig .invalidatepage = iomap_invalidatepage, 654c59d87c4SChristoph Hellwig .bmap = xfs_vm_bmap, 6556e2608dfSDan Williams .direct_IO = noop_direct_IO, 65682cb1417SChristoph Hellwig .migratepage = iomap_migrate_page, 65782cb1417SChristoph Hellwig .is_partially_uptodate = iomap_is_partially_uptodate, 658c59d87c4SChristoph Hellwig .error_remove_page = generic_error_remove_page, 65967482129SDarrick J. Wong .swap_activate = xfs_iomap_swapfile_activate, 660c59d87c4SChristoph Hellwig }; 6616e2608dfSDan Williams 6626e2608dfSDan Williams const struct address_space_operations xfs_dax_aops = { 6636e2608dfSDan Williams .writepages = xfs_dax_writepages, 6646e2608dfSDan Williams .direct_IO = noop_direct_IO, 6656e2608dfSDan Williams .set_page_dirty = noop_set_page_dirty, 6666e2608dfSDan Williams .invalidatepage = noop_invalidatepage, 66767482129SDarrick J. Wong .swap_activate = xfs_iomap_swapfile_activate, 6686e2608dfSDan Williams }; 669