10b61f8a4SDave Chinner // SPDX-License-Identifier: GPL-2.0 2c59d87c4SChristoph Hellwig /* 3c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc. 498c1a7c0SChristoph Hellwig * Copyright (c) 2016-2018 Christoph Hellwig. 5c59d87c4SChristoph Hellwig * All Rights Reserved. 6c59d87c4SChristoph Hellwig */ 7c59d87c4SChristoph Hellwig #include "xfs.h" 870a9883cSDave Chinner #include "xfs_shared.h" 9239880efSDave Chinner #include "xfs_format.h" 10239880efSDave Chinner #include "xfs_log_format.h" 11239880efSDave Chinner #include "xfs_trans_resv.h" 12c59d87c4SChristoph Hellwig #include "xfs_mount.h" 13c59d87c4SChristoph Hellwig #include "xfs_inode.h" 14239880efSDave Chinner #include "xfs_trans.h" 15c59d87c4SChristoph Hellwig #include "xfs_iomap.h" 16c59d87c4SChristoph Hellwig #include "xfs_trace.h" 17c59d87c4SChristoph Hellwig #include "xfs_bmap.h" 1868988114SDave Chinner #include "xfs_bmap_util.h" 19ef473667SDarrick J. Wong #include "xfs_reflink.h" 20c2beff99SDarrick J. Wong #include "xfs_errortag.h" 21c2beff99SDarrick J. Wong #include "xfs_error.h" 22c59d87c4SChristoph Hellwig 23fbcc0256SDave Chinner struct xfs_writepage_ctx { 24598ecfbaSChristoph Hellwig struct iomap_writepage_ctx ctx; 25d9252d52SBrian Foster unsigned int data_seq; 26e666aa37SChristoph Hellwig unsigned int cow_seq; 27fbcc0256SDave Chinner }; 28fbcc0256SDave Chinner 29598ecfbaSChristoph Hellwig static inline struct xfs_writepage_ctx * 30598ecfbaSChristoph Hellwig XFS_WPC(struct iomap_writepage_ctx *ctx) 31598ecfbaSChristoph Hellwig { 32598ecfbaSChristoph Hellwig return container_of(ctx, struct xfs_writepage_ctx, ctx); 33598ecfbaSChristoph Hellwig } 34598ecfbaSChristoph Hellwig 35c59d87c4SChristoph Hellwig /* 36fc0063c4SChristoph Hellwig * Fast and loose check if this write could update the on-disk inode size. 37fc0063c4SChristoph Hellwig */ 38598ecfbaSChristoph Hellwig static inline bool xfs_ioend_is_append(struct iomap_ioend *ioend) 39fc0063c4SChristoph Hellwig { 40fc0063c4SChristoph Hellwig return ioend->io_offset + ioend->io_size > 4113d2c10bSChristoph Hellwig XFS_I(ioend->io_inode)->i_disk_size; 42fc0063c4SChristoph Hellwig } 43fc0063c4SChristoph Hellwig 44fc0063c4SChristoph Hellwig /* 452813d682SChristoph Hellwig * Update on-disk file size now that data has been written to disk. 46c59d87c4SChristoph Hellwig */ 47e7a3d7e7SBrian Foster int 48e7a3d7e7SBrian Foster xfs_setfilesize( 492ba66237SChristoph Hellwig struct xfs_inode *ip, 502ba66237SChristoph Hellwig xfs_off_t offset, 512ba66237SChristoph Hellwig size_t size) 52c59d87c4SChristoph Hellwig { 53e7a3d7e7SBrian Foster struct xfs_mount *mp = ip->i_mount; 54e7a3d7e7SBrian Foster struct xfs_trans *tp; 55c59d87c4SChristoph Hellwig xfs_fsize_t isize; 56e7a3d7e7SBrian Foster int error; 57e7a3d7e7SBrian Foster 58e7a3d7e7SBrian Foster error = xfs_trans_alloc(mp, &M_RES(mp)->tr_fsyncts, 0, 0, 0, &tp); 59e7a3d7e7SBrian Foster if (error) 60e7a3d7e7SBrian Foster return error; 61c59d87c4SChristoph Hellwig 62aa6bf01dSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 632ba66237SChristoph Hellwig isize = xfs_new_eof(ip, offset + size); 64281627dfSChristoph Hellwig if (!isize) { 65281627dfSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 664906e215SChristoph Hellwig xfs_trans_cancel(tp); 67281627dfSChristoph Hellwig return 0; 68c59d87c4SChristoph Hellwig } 69c59d87c4SChristoph Hellwig 702ba66237SChristoph Hellwig trace_xfs_setfilesize(ip, offset, size); 71281627dfSChristoph Hellwig 7213d2c10bSChristoph Hellwig ip->i_disk_size = isize; 73281627dfSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 74281627dfSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 75281627dfSChristoph Hellwig 7670393313SChristoph Hellwig return xfs_trans_commit(tp); 77c59d87c4SChristoph Hellwig } 78c59d87c4SChristoph Hellwig 79c59d87c4SChristoph Hellwig /* 80c59d87c4SChristoph Hellwig * IO write completion. 81c59d87c4SChristoph Hellwig */ 82c59d87c4SChristoph Hellwig STATIC void 83cb357bf3SDarrick J. Wong xfs_end_ioend( 84598ecfbaSChristoph Hellwig struct iomap_ioend *ioend) 85c59d87c4SChristoph Hellwig { 86c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 875ca5916bSBrian Foster struct xfs_mount *mp = ip->i_mount; 88787eb485SChristoph Hellwig xfs_off_t offset = ioend->io_offset; 89787eb485SChristoph Hellwig size_t size = ioend->io_size; 9073d30d48SChristoph Hellwig unsigned int nofs_flag; 914e4cbee9SChristoph Hellwig int error; 92c59d87c4SChristoph Hellwig 93af055e37SBrian Foster /* 9473d30d48SChristoph Hellwig * We can allocate memory here while doing writeback on behalf of 9573d30d48SChristoph Hellwig * memory reclaim. To avoid memory allocation deadlocks set the 9673d30d48SChristoph Hellwig * task-wide nofs context for the following operations. 9773d30d48SChristoph Hellwig */ 9873d30d48SChristoph Hellwig nofs_flag = memalloc_nofs_save(); 9973d30d48SChristoph Hellwig 10073d30d48SChristoph Hellwig /* 101f9dd7ba4SBhaskar Chowdhury * Just clean up the in-memory structures if the fs has been shut down. 102af055e37SBrian Foster */ 1035ca5916bSBrian Foster if (xfs_is_shutdown(mp)) { 1040e51a8e1SChristoph Hellwig error = -EIO; 10543caeb18SDarrick J. Wong goto done; 10643caeb18SDarrick J. Wong } 10743caeb18SDarrick J. Wong 10843caeb18SDarrick J. Wong /* 1095ca5916bSBrian Foster * Clean up all COW blocks and underlying data fork delalloc blocks on 1105ca5916bSBrian Foster * I/O error. The delalloc punch is required because this ioend was 1115ca5916bSBrian Foster * mapped to blocks in the COW fork and the associated pages are no 1125ca5916bSBrian Foster * longer dirty. If we don't remove delalloc blocks here, they become 1135ca5916bSBrian Foster * stale and can corrupt free space accounting on unmount. 114c59d87c4SChristoph Hellwig */ 1154e4cbee9SChristoph Hellwig error = blk_status_to_errno(ioend->io_bio->bi_status); 116787eb485SChristoph Hellwig if (unlikely(error)) { 1175ca5916bSBrian Foster if (ioend->io_flags & IOMAP_F_SHARED) { 118787eb485SChristoph Hellwig xfs_reflink_cancel_cow_range(ip, offset, size, true); 1197348b322SDave Chinner xfs_bmap_punch_delalloc_range(ip, offset, 1207348b322SDave Chinner offset + size); 1215ca5916bSBrian Foster } 1225cb13dcdSZhaohongjiang goto done; 123787eb485SChristoph Hellwig } 124787eb485SChristoph Hellwig 125787eb485SChristoph Hellwig /* 126787eb485SChristoph Hellwig * Success: commit the COW or unwritten blocks if needed. 127787eb485SChristoph Hellwig */ 128760fea8bSChristoph Hellwig if (ioend->io_flags & IOMAP_F_SHARED) 129787eb485SChristoph Hellwig error = xfs_reflink_end_cow(ip, offset, size); 1304e087a3bSChristoph Hellwig else if (ioend->io_type == IOMAP_UNWRITTEN) 131ee70daabSEryu Guan error = xfs_iomap_write_unwritten(ip, offset, size, false); 13284803fb7SChristoph Hellwig 1337cd3099fSBrian Foster if (!error && xfs_ioend_is_append(ioend)) 1347cd3099fSBrian Foster error = xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); 13504f658eeSChristoph Hellwig done: 136598ecfbaSChristoph Hellwig iomap_finish_ioends(ioend, error); 13773d30d48SChristoph Hellwig memalloc_nofs_restore(nofs_flag); 1383994fc48SDarrick J. Wong } 1393994fc48SDarrick J. Wong 140ebb7fb15SDave Chinner /* 141ebb7fb15SDave Chinner * Finish all pending IO completions that require transactional modifications. 142ebb7fb15SDave Chinner * 143ebb7fb15SDave Chinner * We try to merge physical and logically contiguous ioends before completion to 144ebb7fb15SDave Chinner * minimise the number of transactions we need to perform during IO completion. 145ebb7fb15SDave Chinner * Both unwritten extent conversion and COW remapping need to iterate and modify 146ebb7fb15SDave Chinner * one physical extent at a time, so we gain nothing by merging physically 147ebb7fb15SDave Chinner * discontiguous extents here. 148ebb7fb15SDave Chinner * 149ebb7fb15SDave Chinner * The ioend chain length that we can be processing here is largely unbound in 150ebb7fb15SDave Chinner * length and we may have to perform significant amounts of work on each ioend 151ebb7fb15SDave Chinner * to complete it. Hence we have to be careful about holding the CPU for too 152ebb7fb15SDave Chinner * long in this loop. 153ebb7fb15SDave Chinner */ 154cb357bf3SDarrick J. Wong void 155cb357bf3SDarrick J. Wong xfs_end_io( 156cb357bf3SDarrick J. Wong struct work_struct *work) 157cb357bf3SDarrick J. Wong { 158433dad94SChristoph Hellwig struct xfs_inode *ip = 159433dad94SChristoph Hellwig container_of(work, struct xfs_inode, i_ioend_work); 160598ecfbaSChristoph Hellwig struct iomap_ioend *ioend; 161433dad94SChristoph Hellwig struct list_head tmp; 162cb357bf3SDarrick J. Wong unsigned long flags; 163cb357bf3SDarrick J. Wong 164cb357bf3SDarrick J. Wong spin_lock_irqsave(&ip->i_ioend_lock, flags); 165433dad94SChristoph Hellwig list_replace_init(&ip->i_ioend_list, &tmp); 166cb357bf3SDarrick J. Wong spin_unlock_irqrestore(&ip->i_ioend_lock, flags); 167cb357bf3SDarrick J. Wong 168598ecfbaSChristoph Hellwig iomap_sort_ioends(&tmp); 169598ecfbaSChristoph Hellwig while ((ioend = list_first_entry_or_null(&tmp, struct iomap_ioend, 170433dad94SChristoph Hellwig io_list))) { 171cb357bf3SDarrick J. Wong list_del_init(&ioend->io_list); 1726e552494SBrian Foster iomap_ioend_try_merge(ioend, &tmp); 173cb357bf3SDarrick J. Wong xfs_end_ioend(ioend); 174ebb7fb15SDave Chinner cond_resched(); 175cb357bf3SDarrick J. Wong } 176cb357bf3SDarrick J. Wong } 177cb357bf3SDarrick J. Wong 1780e51a8e1SChristoph Hellwig STATIC void 1790e51a8e1SChristoph Hellwig xfs_end_bio( 1800e51a8e1SChristoph Hellwig struct bio *bio) 181c59d87c4SChristoph Hellwig { 182598ecfbaSChristoph Hellwig struct iomap_ioend *ioend = bio->bi_private; 183cb357bf3SDarrick J. Wong struct xfs_inode *ip = XFS_I(ioend->io_inode); 184cb357bf3SDarrick J. Wong unsigned long flags; 185c59d87c4SChristoph Hellwig 186cb357bf3SDarrick J. Wong spin_lock_irqsave(&ip->i_ioend_lock, flags); 187cb357bf3SDarrick J. Wong if (list_empty(&ip->i_ioend_list)) 188598ecfbaSChristoph Hellwig WARN_ON_ONCE(!queue_work(ip->i_mount->m_unwritten_workqueue, 189cb357bf3SDarrick J. Wong &ip->i_ioend_work)); 190cb357bf3SDarrick J. Wong list_add_tail(&ioend->io_list, &ip->i_ioend_list); 191cb357bf3SDarrick J. Wong spin_unlock_irqrestore(&ip->i_ioend_lock, flags); 192c59d87c4SChristoph Hellwig } 193c59d87c4SChristoph Hellwig 194d9252d52SBrian Foster /* 195d9252d52SBrian Foster * Fast revalidation of the cached writeback mapping. Return true if the current 196d9252d52SBrian Foster * mapping is valid, false otherwise. 197d9252d52SBrian Foster */ 198d9252d52SBrian Foster static bool 199d9252d52SBrian Foster xfs_imap_valid( 200598ecfbaSChristoph Hellwig struct iomap_writepage_ctx *wpc, 201d9252d52SBrian Foster struct xfs_inode *ip, 2024e087a3bSChristoph Hellwig loff_t offset) 203d9252d52SBrian Foster { 2044e087a3bSChristoph Hellwig if (offset < wpc->iomap.offset || 2054e087a3bSChristoph Hellwig offset >= wpc->iomap.offset + wpc->iomap.length) 206d9252d52SBrian Foster return false; 207d9252d52SBrian Foster /* 208d9252d52SBrian Foster * If this is a COW mapping, it is sufficient to check that the mapping 209d9252d52SBrian Foster * covers the offset. Be careful to check this first because the caller 210d9252d52SBrian Foster * can revalidate a COW mapping without updating the data seqno. 211d9252d52SBrian Foster */ 212760fea8bSChristoph Hellwig if (wpc->iomap.flags & IOMAP_F_SHARED) 213d9252d52SBrian Foster return true; 214d9252d52SBrian Foster 215d9252d52SBrian Foster /* 216d9252d52SBrian Foster * This is not a COW mapping. Check the sequence number of the data fork 217d9252d52SBrian Foster * because concurrent changes could have invalidated the extent. Check 218d9252d52SBrian Foster * the COW fork because concurrent changes since the last time we 219d9252d52SBrian Foster * checked (and found nothing at this offset) could have added 220d9252d52SBrian Foster * overlapping blocks. 221d9252d52SBrian Foster */ 222c2beff99SDarrick J. Wong if (XFS_WPC(wpc)->data_seq != READ_ONCE(ip->i_df.if_seq)) { 223c2beff99SDarrick J. Wong trace_xfs_wb_data_iomap_invalid(ip, &wpc->iomap, 224c2beff99SDarrick J. Wong XFS_WPC(wpc)->data_seq, XFS_DATA_FORK); 225d9252d52SBrian Foster return false; 226c2beff99SDarrick J. Wong } 227d9252d52SBrian Foster if (xfs_inode_has_cow_data(ip) && 228c2beff99SDarrick J. Wong XFS_WPC(wpc)->cow_seq != READ_ONCE(ip->i_cowfp->if_seq)) { 229c2beff99SDarrick J. Wong trace_xfs_wb_cow_iomap_invalid(ip, &wpc->iomap, 230c2beff99SDarrick J. Wong XFS_WPC(wpc)->cow_seq, XFS_COW_FORK); 231d9252d52SBrian Foster return false; 232c2beff99SDarrick J. Wong } 233d9252d52SBrian Foster return true; 234d9252d52SBrian Foster } 235d9252d52SBrian Foster 2364ad765edSChristoph Hellwig /* 2374ad765edSChristoph Hellwig * Pass in a dellalloc extent and convert it to real extents, return the real 2384e087a3bSChristoph Hellwig * extent that maps offset_fsb in wpc->iomap. 2394ad765edSChristoph Hellwig * 2404ad765edSChristoph Hellwig * The current page is held locked so nothing could have removed the block 2417588cbeeSChristoph Hellwig * backing offset_fsb, although it could have moved from the COW to the data 2427588cbeeSChristoph Hellwig * fork by another thread. 2434ad765edSChristoph Hellwig */ 2444ad765edSChristoph Hellwig static int 2454ad765edSChristoph Hellwig xfs_convert_blocks( 246598ecfbaSChristoph Hellwig struct iomap_writepage_ctx *wpc, 2474ad765edSChristoph Hellwig struct xfs_inode *ip, 248760fea8bSChristoph Hellwig int whichfork, 2494e087a3bSChristoph Hellwig loff_t offset) 2504ad765edSChristoph Hellwig { 2514ad765edSChristoph Hellwig int error; 252598ecfbaSChristoph Hellwig unsigned *seq; 253598ecfbaSChristoph Hellwig 254598ecfbaSChristoph Hellwig if (whichfork == XFS_COW_FORK) 255598ecfbaSChristoph Hellwig seq = &XFS_WPC(wpc)->cow_seq; 256598ecfbaSChristoph Hellwig else 257598ecfbaSChristoph Hellwig seq = &XFS_WPC(wpc)->data_seq; 2584ad765edSChristoph Hellwig 2594ad765edSChristoph Hellwig /* 2604e087a3bSChristoph Hellwig * Attempt to allocate whatever delalloc extent currently backs offset 2614e087a3bSChristoph Hellwig * and put the result into wpc->iomap. Allocate in a loop because it 2624e087a3bSChristoph Hellwig * may take several attempts to allocate real blocks for a contiguous 2634e087a3bSChristoph Hellwig * delalloc extent if free space is sufficiently fragmented. 2644ad765edSChristoph Hellwig */ 2654ad765edSChristoph Hellwig do { 266760fea8bSChristoph Hellwig error = xfs_bmapi_convert_delalloc(ip, whichfork, offset, 267598ecfbaSChristoph Hellwig &wpc->iomap, seq); 2684ad765edSChristoph Hellwig if (error) 2694ad765edSChristoph Hellwig return error; 2704e087a3bSChristoph Hellwig } while (wpc->iomap.offset + wpc->iomap.length <= offset); 2714ad765edSChristoph Hellwig 2724ad765edSChristoph Hellwig return 0; 2734ad765edSChristoph Hellwig } 2744ad765edSChristoph Hellwig 275598ecfbaSChristoph Hellwig static int 276c59d87c4SChristoph Hellwig xfs_map_blocks( 277598ecfbaSChristoph Hellwig struct iomap_writepage_ctx *wpc, 278c59d87c4SChristoph Hellwig struct inode *inode, 2795c665e5bSChristoph Hellwig loff_t offset) 280c59d87c4SChristoph Hellwig { 281c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 282c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 28393407472SFabian Frederick ssize_t count = i_blocksize(inode); 284b4e29032SChristoph Hellwig xfs_fileoff_t offset_fsb = XFS_B_TO_FSBT(mp, offset); 285b4e29032SChristoph Hellwig xfs_fileoff_t end_fsb = XFS_B_TO_FSB(mp, offset + count); 286c2f09217SDarrick J. Wong xfs_fileoff_t cow_fsb; 287c2f09217SDarrick J. Wong int whichfork; 2885c665e5bSChristoph Hellwig struct xfs_bmbt_irec imap; 289060d4eaaSChristoph Hellwig struct xfs_iext_cursor icur; 2907588cbeeSChristoph Hellwig int retries = 0; 291c59d87c4SChristoph Hellwig int error = 0; 292c59d87c4SChristoph Hellwig 29375c8c50fSDave Chinner if (xfs_is_shutdown(mp)) 294d9252d52SBrian Foster return -EIO; 295d9252d52SBrian Foster 296c2beff99SDarrick J. Wong XFS_ERRORTAG_DELAY(mp, XFS_ERRTAG_WB_DELAY_MS); 297c2beff99SDarrick J. Wong 298889c65b3SChristoph Hellwig /* 299889c65b3SChristoph Hellwig * COW fork blocks can overlap data fork blocks even if the blocks 300889c65b3SChristoph Hellwig * aren't shared. COW I/O always takes precedent, so we must always 301889c65b3SChristoph Hellwig * check for overlap on reflink inodes unless the mapping is already a 302e666aa37SChristoph Hellwig * COW one, or the COW fork hasn't changed from the last time we looked 303e666aa37SChristoph Hellwig * at it. 304e666aa37SChristoph Hellwig * 305e666aa37SChristoph Hellwig * It's safe to check the COW fork if_seq here without the ILOCK because 306e666aa37SChristoph Hellwig * we've indirectly protected against concurrent updates: writeback has 307e666aa37SChristoph Hellwig * the page locked, which prevents concurrent invalidations by reflink 308e666aa37SChristoph Hellwig * and directio and prevents concurrent buffered writes to the same 309e666aa37SChristoph Hellwig * page. Changes to if_seq always happen under i_lock, which protects 310e666aa37SChristoph Hellwig * against concurrent updates and provides a memory barrier on the way 311e666aa37SChristoph Hellwig * out that ensures that we always see the current value. 312889c65b3SChristoph Hellwig */ 3134e087a3bSChristoph Hellwig if (xfs_imap_valid(wpc, ip, offset)) 314889c65b3SChristoph Hellwig return 0; 315889c65b3SChristoph Hellwig 316889c65b3SChristoph Hellwig /* 317889c65b3SChristoph Hellwig * If we don't have a valid map, now it's time to get a new one for this 318889c65b3SChristoph Hellwig * offset. This will convert delayed allocations (including COW ones) 319889c65b3SChristoph Hellwig * into real extents. If we return without a valid map, it means we 320889c65b3SChristoph Hellwig * landed in a hole and we skip the block. 321889c65b3SChristoph Hellwig */ 3227588cbeeSChristoph Hellwig retry: 323c2f09217SDarrick J. Wong cow_fsb = NULLFILEOFF; 324c2f09217SDarrick J. Wong whichfork = XFS_DATA_FORK; 325c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 326b2197a36SChristoph Hellwig ASSERT(!xfs_need_iread_extents(&ip->i_df)); 327060d4eaaSChristoph Hellwig 328060d4eaaSChristoph Hellwig /* 329060d4eaaSChristoph Hellwig * Check if this is offset is covered by a COW extents, and if yes use 330060d4eaaSChristoph Hellwig * it directly instead of looking up anything in the data fork. 331060d4eaaSChristoph Hellwig */ 33251d62690SChristoph Hellwig if (xfs_inode_has_cow_data(ip) && 333e666aa37SChristoph Hellwig xfs_iext_lookup_extent(ip, ip->i_cowfp, offset_fsb, &icur, &imap)) 334e666aa37SChristoph Hellwig cow_fsb = imap.br_startoff; 335e666aa37SChristoph Hellwig if (cow_fsb != NULLFILEOFF && cow_fsb <= offset_fsb) { 336598ecfbaSChristoph Hellwig XFS_WPC(wpc)->cow_seq = READ_ONCE(ip->i_cowfp->if_seq); 3375c665e5bSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 338be225fecSChristoph Hellwig 339760fea8bSChristoph Hellwig whichfork = XFS_COW_FORK; 3405c665e5bSChristoph Hellwig goto allocate_blocks; 3415c665e5bSChristoph Hellwig } 3425c665e5bSChristoph Hellwig 3435c665e5bSChristoph Hellwig /* 344d9252d52SBrian Foster * No COW extent overlap. Revalidate now that we may have updated 345d9252d52SBrian Foster * ->cow_seq. If the data mapping is still valid, we're done. 3465c665e5bSChristoph Hellwig */ 3474e087a3bSChristoph Hellwig if (xfs_imap_valid(wpc, ip, offset)) { 3485c665e5bSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 3495c665e5bSChristoph Hellwig return 0; 3505c665e5bSChristoph Hellwig } 3515c665e5bSChristoph Hellwig 3525c665e5bSChristoph Hellwig /* 3535c665e5bSChristoph Hellwig * If we don't have a valid map, now it's time to get a new one for this 3545c665e5bSChristoph Hellwig * offset. This will convert delayed allocations (including COW ones) 3555c665e5bSChristoph Hellwig * into real extents. 3565c665e5bSChristoph Hellwig */ 3573345746eSChristoph Hellwig if (!xfs_iext_lookup_extent(ip, &ip->i_df, offset_fsb, &icur, &imap)) 3583345746eSChristoph Hellwig imap.br_startoff = end_fsb; /* fake a hole past EOF */ 359598ecfbaSChristoph Hellwig XFS_WPC(wpc)->data_seq = READ_ONCE(ip->i_df.if_seq); 360c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 361c59d87c4SChristoph Hellwig 36212df89f2SChristoph Hellwig /* landed in a hole or beyond EOF? */ 3633345746eSChristoph Hellwig if (imap.br_startoff > offset_fsb) { 3643345746eSChristoph Hellwig imap.br_blockcount = imap.br_startoff - offset_fsb; 3655c665e5bSChristoph Hellwig imap.br_startoff = offset_fsb; 3665c665e5bSChristoph Hellwig imap.br_startblock = HOLESTARTBLOCK; 367be225fecSChristoph Hellwig imap.br_state = XFS_EXT_NORM; 36812df89f2SChristoph Hellwig } 36912df89f2SChristoph Hellwig 370e666aa37SChristoph Hellwig /* 37112df89f2SChristoph Hellwig * Truncate to the next COW extent if there is one. This is the only 37212df89f2SChristoph Hellwig * opportunity to do this because we can skip COW fork lookups for the 37312df89f2SChristoph Hellwig * subsequent blocks in the mapping; however, the requirement to treat 37412df89f2SChristoph Hellwig * the COW range separately remains. 375e666aa37SChristoph Hellwig */ 376e666aa37SChristoph Hellwig if (cow_fsb != NULLFILEOFF && 377e666aa37SChristoph Hellwig cow_fsb < imap.br_startoff + imap.br_blockcount) 378e666aa37SChristoph Hellwig imap.br_blockcount = cow_fsb - imap.br_startoff; 379e666aa37SChristoph Hellwig 380be225fecSChristoph Hellwig /* got a delalloc extent? */ 38112df89f2SChristoph Hellwig if (imap.br_startblock != HOLESTARTBLOCK && 38212df89f2SChristoph Hellwig isnullstartblock(imap.br_startblock)) 3835c665e5bSChristoph Hellwig goto allocate_blocks; 384e2f6ad46SDave Chinner 385304a68b9SDave Chinner xfs_bmbt_to_iomap(ip, &wpc->iomap, &imap, 0, 0, XFS_WPC(wpc)->data_seq); 386760fea8bSChristoph Hellwig trace_xfs_map_blocks_found(ip, offset, count, whichfork, &imap); 3875c665e5bSChristoph Hellwig return 0; 3885c665e5bSChristoph Hellwig allocate_blocks: 389760fea8bSChristoph Hellwig error = xfs_convert_blocks(wpc, ip, whichfork, offset); 3907588cbeeSChristoph Hellwig if (error) { 3917588cbeeSChristoph Hellwig /* 3927588cbeeSChristoph Hellwig * If we failed to find the extent in the COW fork we might have 3937588cbeeSChristoph Hellwig * raced with a COW to data fork conversion or truncate. 3947588cbeeSChristoph Hellwig * Restart the lookup to catch the extent in the data fork for 3957588cbeeSChristoph Hellwig * the former case, but prevent additional retries to avoid 3967588cbeeSChristoph Hellwig * looping forever for the latter case. 3977588cbeeSChristoph Hellwig */ 398760fea8bSChristoph Hellwig if (error == -EAGAIN && whichfork == XFS_COW_FORK && !retries++) 3997588cbeeSChristoph Hellwig goto retry; 4007588cbeeSChristoph Hellwig ASSERT(error != -EAGAIN); 4015c665e5bSChristoph Hellwig return error; 4027588cbeeSChristoph Hellwig } 4034ad765edSChristoph Hellwig 4044ad765edSChristoph Hellwig /* 4054ad765edSChristoph Hellwig * Due to merging the return real extent might be larger than the 4064ad765edSChristoph Hellwig * original delalloc one. Trim the return extent to the next COW 4074ad765edSChristoph Hellwig * boundary again to force a re-lookup. 4084ad765edSChristoph Hellwig */ 409760fea8bSChristoph Hellwig if (whichfork != XFS_COW_FORK && cow_fsb != NULLFILEOFF) { 4104e087a3bSChristoph Hellwig loff_t cow_offset = XFS_FSB_TO_B(mp, cow_fsb); 4114ad765edSChristoph Hellwig 4124e087a3bSChristoph Hellwig if (cow_offset < wpc->iomap.offset + wpc->iomap.length) 4134e087a3bSChristoph Hellwig wpc->iomap.length = cow_offset - wpc->iomap.offset; 4144e087a3bSChristoph Hellwig } 4154e087a3bSChristoph Hellwig 4164e087a3bSChristoph Hellwig ASSERT(wpc->iomap.offset <= offset); 4174e087a3bSChristoph Hellwig ASSERT(wpc->iomap.offset + wpc->iomap.length > offset); 418760fea8bSChristoph Hellwig trace_xfs_map_blocks_alloc(ip, offset, count, whichfork, &imap); 419c59d87c4SChristoph Hellwig return 0; 420c59d87c4SChristoph Hellwig } 421c59d87c4SChristoph Hellwig 422598ecfbaSChristoph Hellwig static int 423598ecfbaSChristoph Hellwig xfs_prepare_ioend( 424598ecfbaSChristoph Hellwig struct iomap_ioend *ioend, 425e10de372SDave Chinner int status) 426c59d87c4SChristoph Hellwig { 42773d30d48SChristoph Hellwig unsigned int nofs_flag; 42873d30d48SChristoph Hellwig 42973d30d48SChristoph Hellwig /* 43073d30d48SChristoph Hellwig * We can allocate memory here while doing writeback on behalf of 43173d30d48SChristoph Hellwig * memory reclaim. To avoid memory allocation deadlocks set the 43273d30d48SChristoph Hellwig * task-wide nofs context for the following operations. 43373d30d48SChristoph Hellwig */ 43473d30d48SChristoph Hellwig nofs_flag = memalloc_nofs_save(); 43573d30d48SChristoph Hellwig 4365eda4300SDarrick J. Wong /* Convert CoW extents to regular */ 437760fea8bSChristoph Hellwig if (!status && (ioend->io_flags & IOMAP_F_SHARED)) { 4385eda4300SDarrick J. Wong status = xfs_reflink_convert_cow(XFS_I(ioend->io_inode), 4395eda4300SDarrick J. Wong ioend->io_offset, ioend->io_size); 4405eda4300SDarrick J. Wong } 4415eda4300SDarrick J. Wong 44273d30d48SChristoph Hellwig memalloc_nofs_restore(nofs_flag); 44373d30d48SChristoph Hellwig 4447adb8f14SBrian Foster /* send ioends that might require a transaction to the completion wq */ 4457adb8f14SBrian Foster if (xfs_ioend_is_append(ioend) || ioend->io_type == IOMAP_UNWRITTEN || 4467adb8f14SBrian Foster (ioend->io_flags & IOMAP_F_SHARED)) 4470e51a8e1SChristoph Hellwig ioend->io_bio->bi_end_io = xfs_end_bio; 448e10de372SDave Chinner return status; 4497bf7f352SDave Chinner } 4507bf7f352SDave Chinner 451c59d87c4SChristoph Hellwig /* 4528ac5b996SDave Chinner * If the folio has delalloc blocks on it, the caller is asking us to punch them 4538ac5b996SDave Chinner * out. If we don't, we can leave a stale delalloc mapping covered by a clean 4548ac5b996SDave Chinner * page that needs to be dirtied again before the delalloc mapping can be 4558ac5b996SDave Chinner * converted. This stale delalloc mapping can trip up a later direct I/O read 4568ac5b996SDave Chinner * operation on the same region. 457c59d87c4SChristoph Hellwig * 4588ac5b996SDave Chinner * We prevent this by truncating away the delalloc regions on the folio. Because 45982cb1417SChristoph Hellwig * they are delalloc, we can do this without needing a transaction. Indeed - if 46082cb1417SChristoph Hellwig * we get ENOSPC errors, we have to be able to do this truncation without a 4618ac5b996SDave Chinner * transaction as there is no space left for block reservation (typically why 4628ac5b996SDave Chinner * we see a ENOSPC in writeback). 463c59d87c4SChristoph Hellwig */ 464598ecfbaSChristoph Hellwig static void 4656e478521SMatthew Wilcox (Oracle) xfs_discard_folio( 4666e478521SMatthew Wilcox (Oracle) struct folio *folio, 4676e478521SMatthew Wilcox (Oracle) loff_t pos) 468c59d87c4SChristoph Hellwig { 4697348b322SDave Chinner struct xfs_inode *ip = XFS_I(folio->mapping->host); 47003625721SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 47103625721SChristoph Hellwig int error; 472c59d87c4SChristoph Hellwig 47375c8c50fSDave Chinner if (xfs_is_shutdown(mp)) 474e9c3a8e8SDarrick J. Wong return; 475c59d87c4SChristoph Hellwig 4764ab45e25SChristoph Hellwig xfs_alert_ratelimited(mp, 4776e478521SMatthew Wilcox (Oracle) "page discard on page "PTR_FMT", inode 0x%llx, pos %llu.", 4786e478521SMatthew Wilcox (Oracle) folio, ip->i_ino, pos); 479c59d87c4SChristoph Hellwig 4808ac5b996SDave Chinner /* 4818ac5b996SDave Chinner * The end of the punch range is always the offset of the the first 4828ac5b996SDave Chinner * byte of the next folio. Hence the end offset is only dependent on the 4838ac5b996SDave Chinner * folio itself and not the start offset that is passed in. 4848ac5b996SDave Chinner */ 4857348b322SDave Chinner error = xfs_bmap_punch_delalloc_range(ip, pos, 4868ac5b996SDave Chinner folio_pos(folio) + folio_size(folio)); 4877348b322SDave Chinner 48875c8c50fSDave Chinner if (error && !xfs_is_shutdown(mp)) 48903625721SChristoph Hellwig xfs_alert(mp, "page discard unable to remove delalloc mapping."); 490c59d87c4SChristoph Hellwig } 491c59d87c4SChristoph Hellwig 492598ecfbaSChristoph Hellwig static const struct iomap_writeback_ops xfs_writeback_ops = { 493598ecfbaSChristoph Hellwig .map_blocks = xfs_map_blocks, 494598ecfbaSChristoph Hellwig .prepare_ioend = xfs_prepare_ioend, 4956e478521SMatthew Wilcox (Oracle) .discard_folio = xfs_discard_folio, 496598ecfbaSChristoph Hellwig }; 497c59d87c4SChristoph Hellwig 498c59d87c4SChristoph Hellwig STATIC int 499c59d87c4SChristoph Hellwig xfs_vm_writepages( 500c59d87c4SChristoph Hellwig struct address_space *mapping, 501c59d87c4SChristoph Hellwig struct writeback_control *wbc) 502c59d87c4SChristoph Hellwig { 503be225fecSChristoph Hellwig struct xfs_writepage_ctx wpc = { }; 504fbcc0256SDave Chinner 505756b1c34SDave Chinner /* 506756b1c34SDave Chinner * Writing back data in a transaction context can result in recursive 507756b1c34SDave Chinner * transactions. This is bad, so issue a warning and get out of here. 508756b1c34SDave Chinner */ 509756b1c34SDave Chinner if (WARN_ON_ONCE(current->journal_info)) 510756b1c34SDave Chinner return 0; 511756b1c34SDave Chinner 512c59d87c4SChristoph Hellwig xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 513598ecfbaSChristoph Hellwig return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops); 514c59d87c4SChristoph Hellwig } 515c59d87c4SChristoph Hellwig 5166e2608dfSDan Williams STATIC int 5176e2608dfSDan Williams xfs_dax_writepages( 5186e2608dfSDan Williams struct address_space *mapping, 5196e2608dfSDan Williams struct writeback_control *wbc) 5206e2608dfSDan Williams { 52130fa529eSChristoph Hellwig struct xfs_inode *ip = XFS_I(mapping->host); 52230fa529eSChristoph Hellwig 52330fa529eSChristoph Hellwig xfs_iflags_clear(ip, XFS_ITRUNCATED); 5246e2608dfSDan Williams return dax_writeback_mapping_range(mapping, 5253f666c56SVivek Goyal xfs_inode_buftarg(ip)->bt_daxdev, wbc); 5266e2608dfSDan Williams } 5276e2608dfSDan Williams 528c59d87c4SChristoph Hellwig STATIC sector_t 529c59d87c4SChristoph Hellwig xfs_vm_bmap( 530c59d87c4SChristoph Hellwig struct address_space *mapping, 531c59d87c4SChristoph Hellwig sector_t block) 532c59d87c4SChristoph Hellwig { 533b84e7722SChristoph Hellwig struct xfs_inode *ip = XFS_I(mapping->host); 534c59d87c4SChristoph Hellwig 535b84e7722SChristoph Hellwig trace_xfs_vm_bmap(ip); 536db1327b1SDarrick J. Wong 537db1327b1SDarrick J. Wong /* 538db1327b1SDarrick J. Wong * The swap code (ab-)uses ->bmap to get a block mapping and then 539793057e1SIngo Molnar * bypasses the file system for actual I/O. We really can't allow 540db1327b1SDarrick J. Wong * that on reflinks inodes, so we have to skip out here. And yes, 541eb5e248dSDarrick J. Wong * 0 is the magic code for a bmap error. 542eb5e248dSDarrick J. Wong * 543eb5e248dSDarrick J. Wong * Since we don't pass back blockdev info, we can't return bmap 544eb5e248dSDarrick J. Wong * information for rt files either. 545db1327b1SDarrick J. Wong */ 54666ae56a5SChristoph Hellwig if (xfs_is_cow_inode(ip) || XFS_IS_REALTIME_INODE(ip)) 547db1327b1SDarrick J. Wong return 0; 548690c2a38SChristoph Hellwig return iomap_bmap(mapping, block, &xfs_read_iomap_ops); 549c59d87c4SChristoph Hellwig } 550c59d87c4SChristoph Hellwig 551c59d87c4SChristoph Hellwig STATIC int 5527479c505SMatthew Wilcox (Oracle) xfs_vm_read_folio( 553c59d87c4SChristoph Hellwig struct file *unused, 5547479c505SMatthew Wilcox (Oracle) struct folio *folio) 555c59d87c4SChristoph Hellwig { 5567479c505SMatthew Wilcox (Oracle) return iomap_read_folio(folio, &xfs_read_iomap_ops); 557c59d87c4SChristoph Hellwig } 558c59d87c4SChristoph Hellwig 5599d24a13aSMatthew Wilcox (Oracle) STATIC void 5609d24a13aSMatthew Wilcox (Oracle) xfs_vm_readahead( 5619d24a13aSMatthew Wilcox (Oracle) struct readahead_control *rac) 562c59d87c4SChristoph Hellwig { 5639d24a13aSMatthew Wilcox (Oracle) iomap_readahead(rac, &xfs_read_iomap_ops); 56422e757a4SDave Chinner } 56522e757a4SDave Chinner 56667482129SDarrick J. Wong static int 56767482129SDarrick J. Wong xfs_iomap_swapfile_activate( 56867482129SDarrick J. Wong struct swap_info_struct *sis, 56967482129SDarrick J. Wong struct file *swap_file, 57067482129SDarrick J. Wong sector_t *span) 57167482129SDarrick J. Wong { 57230fa529eSChristoph Hellwig sis->bdev = xfs_inode_buftarg(XFS_I(file_inode(swap_file)))->bt_bdev; 573690c2a38SChristoph Hellwig return iomap_swapfile_activate(sis, swap_file, span, 574690c2a38SChristoph Hellwig &xfs_read_iomap_ops); 57567482129SDarrick J. Wong } 57667482129SDarrick J. Wong 577c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = { 5787479c505SMatthew Wilcox (Oracle) .read_folio = xfs_vm_read_folio, 5799d24a13aSMatthew Wilcox (Oracle) .readahead = xfs_vm_readahead, 580c59d87c4SChristoph Hellwig .writepages = xfs_vm_writepages, 581*4ce02c67SRitesh Harjani (IBM) .dirty_folio = iomap_dirty_folio, 5828597447dSMatthew Wilcox (Oracle) .release_folio = iomap_release_folio, 583d82354f6SMatthew Wilcox (Oracle) .invalidate_folio = iomap_invalidate_folio, 584c59d87c4SChristoph Hellwig .bmap = xfs_vm_bmap, 5852ec810d5SMatthew Wilcox (Oracle) .migrate_folio = filemap_migrate_folio, 58682cb1417SChristoph Hellwig .is_partially_uptodate = iomap_is_partially_uptodate, 587c59d87c4SChristoph Hellwig .error_remove_page = generic_error_remove_page, 58867482129SDarrick J. Wong .swap_activate = xfs_iomap_swapfile_activate, 589c59d87c4SChristoph Hellwig }; 5906e2608dfSDan Williams 5916e2608dfSDan Williams const struct address_space_operations xfs_dax_aops = { 5926e2608dfSDan Williams .writepages = xfs_dax_writepages, 59346de8b97SMatthew Wilcox (Oracle) .dirty_folio = noop_dirty_folio, 59467482129SDarrick J. Wong .swap_activate = xfs_iomap_swapfile_activate, 5956e2608dfSDan Williams }; 596