1c59d87c4SChristoph Hellwig /* 2c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3c59d87c4SChristoph Hellwig * All Rights Reserved. 4c59d87c4SChristoph Hellwig * 5c59d87c4SChristoph Hellwig * This program is free software; you can redistribute it and/or 6c59d87c4SChristoph Hellwig * modify it under the terms of the GNU General Public License as 7c59d87c4SChristoph Hellwig * published by the Free Software Foundation. 8c59d87c4SChristoph Hellwig * 9c59d87c4SChristoph Hellwig * This program is distributed in the hope that it would be useful, 10c59d87c4SChristoph Hellwig * but WITHOUT ANY WARRANTY; without even the implied warranty of 11c59d87c4SChristoph Hellwig * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12c59d87c4SChristoph Hellwig * GNU General Public License for more details. 13c59d87c4SChristoph Hellwig * 14c59d87c4SChristoph Hellwig * You should have received a copy of the GNU General Public License 15c59d87c4SChristoph Hellwig * along with this program; if not, write the Free Software Foundation, 16c59d87c4SChristoph Hellwig * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17c59d87c4SChristoph Hellwig */ 18c59d87c4SChristoph Hellwig #include "xfs.h" 19*70a9883cSDave Chinner #include "xfs_format.h" 20*70a9883cSDave Chinner #include "xfs_shared.h" 21c59d87c4SChristoph Hellwig #include "xfs_sb.h" 22c59d87c4SChristoph Hellwig #include "xfs_ag.h" 23*70a9883cSDave Chinner #include "xfs_log.h" 24c59d87c4SChristoph Hellwig #include "xfs_trans.h" 25c59d87c4SChristoph Hellwig #include "xfs_mount.h" 26c59d87c4SChristoph Hellwig #include "xfs_bmap_btree.h" 27c59d87c4SChristoph Hellwig #include "xfs_dinode.h" 28c59d87c4SChristoph Hellwig #include "xfs_inode.h" 29281627dfSChristoph Hellwig #include "xfs_inode_item.h" 30c59d87c4SChristoph Hellwig #include "xfs_alloc.h" 31c59d87c4SChristoph Hellwig #include "xfs_error.h" 32c59d87c4SChristoph Hellwig #include "xfs_iomap.h" 33c59d87c4SChristoph Hellwig #include "xfs_trace.h" 34c59d87c4SChristoph Hellwig #include "xfs_bmap.h" 3568988114SDave Chinner #include "xfs_bmap_util.h" 36a27bb332SKent Overstreet #include <linux/aio.h> 37c59d87c4SChristoph Hellwig #include <linux/gfp.h> 38c59d87c4SChristoph Hellwig #include <linux/mpage.h> 39c59d87c4SChristoph Hellwig #include <linux/pagevec.h> 40c59d87c4SChristoph Hellwig #include <linux/writeback.h> 41c59d87c4SChristoph Hellwig 42c59d87c4SChristoph Hellwig void 43c59d87c4SChristoph Hellwig xfs_count_page_state( 44c59d87c4SChristoph Hellwig struct page *page, 45c59d87c4SChristoph Hellwig int *delalloc, 46c59d87c4SChristoph Hellwig int *unwritten) 47c59d87c4SChristoph Hellwig { 48c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 49c59d87c4SChristoph Hellwig 50c59d87c4SChristoph Hellwig *delalloc = *unwritten = 0; 51c59d87c4SChristoph Hellwig 52c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 53c59d87c4SChristoph Hellwig do { 54c59d87c4SChristoph Hellwig if (buffer_unwritten(bh)) 55c59d87c4SChristoph Hellwig (*unwritten) = 1; 56c59d87c4SChristoph Hellwig else if (buffer_delay(bh)) 57c59d87c4SChristoph Hellwig (*delalloc) = 1; 58c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 59c59d87c4SChristoph Hellwig } 60c59d87c4SChristoph Hellwig 61c59d87c4SChristoph Hellwig STATIC struct block_device * 62c59d87c4SChristoph Hellwig xfs_find_bdev_for_inode( 63c59d87c4SChristoph Hellwig struct inode *inode) 64c59d87c4SChristoph Hellwig { 65c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 66c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 67c59d87c4SChristoph Hellwig 68c59d87c4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip)) 69c59d87c4SChristoph Hellwig return mp->m_rtdev_targp->bt_bdev; 70c59d87c4SChristoph Hellwig else 71c59d87c4SChristoph Hellwig return mp->m_ddev_targp->bt_bdev; 72c59d87c4SChristoph Hellwig } 73c59d87c4SChristoph Hellwig 74c59d87c4SChristoph Hellwig /* 75c59d87c4SChristoph Hellwig * We're now finished for good with this ioend structure. 76c59d87c4SChristoph Hellwig * Update the page state via the associated buffer_heads, 77c59d87c4SChristoph Hellwig * release holds on the inode and bio, and finally free 78c59d87c4SChristoph Hellwig * up memory. Do not use the ioend after this. 79c59d87c4SChristoph Hellwig */ 80c59d87c4SChristoph Hellwig STATIC void 81c59d87c4SChristoph Hellwig xfs_destroy_ioend( 82c59d87c4SChristoph Hellwig xfs_ioend_t *ioend) 83c59d87c4SChristoph Hellwig { 84c59d87c4SChristoph Hellwig struct buffer_head *bh, *next; 85c59d87c4SChristoph Hellwig 86c59d87c4SChristoph Hellwig for (bh = ioend->io_buffer_head; bh; bh = next) { 87c59d87c4SChristoph Hellwig next = bh->b_private; 88c59d87c4SChristoph Hellwig bh->b_end_io(bh, !ioend->io_error); 89c59d87c4SChristoph Hellwig } 90c59d87c4SChristoph Hellwig 91c59d87c4SChristoph Hellwig mempool_free(ioend, xfs_ioend_pool); 92c59d87c4SChristoph Hellwig } 93c59d87c4SChristoph Hellwig 94c59d87c4SChristoph Hellwig /* 95fc0063c4SChristoph Hellwig * Fast and loose check if this write could update the on-disk inode size. 96fc0063c4SChristoph Hellwig */ 97fc0063c4SChristoph Hellwig static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) 98fc0063c4SChristoph Hellwig { 99fc0063c4SChristoph Hellwig return ioend->io_offset + ioend->io_size > 100fc0063c4SChristoph Hellwig XFS_I(ioend->io_inode)->i_d.di_size; 101fc0063c4SChristoph Hellwig } 102fc0063c4SChristoph Hellwig 103281627dfSChristoph Hellwig STATIC int 104281627dfSChristoph Hellwig xfs_setfilesize_trans_alloc( 105281627dfSChristoph Hellwig struct xfs_ioend *ioend) 106281627dfSChristoph Hellwig { 107281627dfSChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 108281627dfSChristoph Hellwig struct xfs_trans *tp; 109281627dfSChristoph Hellwig int error; 110281627dfSChristoph Hellwig 111281627dfSChristoph Hellwig tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); 112281627dfSChristoph Hellwig 1133d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); 114281627dfSChristoph Hellwig if (error) { 115281627dfSChristoph Hellwig xfs_trans_cancel(tp, 0); 116281627dfSChristoph Hellwig return error; 117281627dfSChristoph Hellwig } 118281627dfSChristoph Hellwig 119281627dfSChristoph Hellwig ioend->io_append_trans = tp; 120281627dfSChristoph Hellwig 121281627dfSChristoph Hellwig /* 122437a255aSDave Chinner * We may pass freeze protection with a transaction. So tell lockdep 123d9457dc0SJan Kara * we released it. 124d9457dc0SJan Kara */ 125d9457dc0SJan Kara rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], 126d9457dc0SJan Kara 1, _THIS_IP_); 127d9457dc0SJan Kara /* 128281627dfSChristoph Hellwig * We hand off the transaction to the completion thread now, so 129281627dfSChristoph Hellwig * clear the flag here. 130281627dfSChristoph Hellwig */ 131281627dfSChristoph Hellwig current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 132281627dfSChristoph Hellwig return 0; 133281627dfSChristoph Hellwig } 134281627dfSChristoph Hellwig 135fc0063c4SChristoph Hellwig /* 1362813d682SChristoph Hellwig * Update on-disk file size now that data has been written to disk. 137c59d87c4SChristoph Hellwig */ 138281627dfSChristoph Hellwig STATIC int 139c59d87c4SChristoph Hellwig xfs_setfilesize( 140aa6bf01dSChristoph Hellwig struct xfs_ioend *ioend) 141c59d87c4SChristoph Hellwig { 142aa6bf01dSChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 143281627dfSChristoph Hellwig struct xfs_trans *tp = ioend->io_append_trans; 144c59d87c4SChristoph Hellwig xfs_fsize_t isize; 145c59d87c4SChristoph Hellwig 146281627dfSChristoph Hellwig /* 147437a255aSDave Chinner * The transaction may have been allocated in the I/O submission thread, 148437a255aSDave Chinner * thus we need to mark ourselves as beeing in a transaction manually. 149437a255aSDave Chinner * Similarly for freeze protection. 150281627dfSChristoph Hellwig */ 151281627dfSChristoph Hellwig current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); 152437a255aSDave Chinner rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], 153437a255aSDave Chinner 0, 1, _THIS_IP_); 154281627dfSChristoph Hellwig 155aa6bf01dSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 1566923e686SChristoph Hellwig isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); 157281627dfSChristoph Hellwig if (!isize) { 158281627dfSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 159281627dfSChristoph Hellwig xfs_trans_cancel(tp, 0); 160281627dfSChristoph Hellwig return 0; 161c59d87c4SChristoph Hellwig } 162c59d87c4SChristoph Hellwig 163281627dfSChristoph Hellwig trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); 164281627dfSChristoph Hellwig 165281627dfSChristoph Hellwig ip->i_d.di_size = isize; 166281627dfSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 167281627dfSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 168281627dfSChristoph Hellwig 169281627dfSChristoph Hellwig return xfs_trans_commit(tp, 0); 170c59d87c4SChristoph Hellwig } 171c59d87c4SChristoph Hellwig 172c59d87c4SChristoph Hellwig /* 173c59d87c4SChristoph Hellwig * Schedule IO completion handling on the final put of an ioend. 174fc0063c4SChristoph Hellwig * 175fc0063c4SChristoph Hellwig * If there is no work to do we might as well call it a day and free the 176fc0063c4SChristoph Hellwig * ioend right now. 177c59d87c4SChristoph Hellwig */ 178c59d87c4SChristoph Hellwig STATIC void 179c59d87c4SChristoph Hellwig xfs_finish_ioend( 180c59d87c4SChristoph Hellwig struct xfs_ioend *ioend) 181c59d87c4SChristoph Hellwig { 182c59d87c4SChristoph Hellwig if (atomic_dec_and_test(&ioend->io_remaining)) { 183aa6bf01dSChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 184aa6bf01dSChristoph Hellwig 1850d882a36SAlain Renaud if (ioend->io_type == XFS_IO_UNWRITTEN) 186aa6bf01dSChristoph Hellwig queue_work(mp->m_unwritten_workqueue, &ioend->io_work); 187437a255aSDave Chinner else if (ioend->io_append_trans || 188437a255aSDave Chinner (ioend->io_isdirect && xfs_ioend_is_append(ioend))) 189aa6bf01dSChristoph Hellwig queue_work(mp->m_data_workqueue, &ioend->io_work); 190fc0063c4SChristoph Hellwig else 191fc0063c4SChristoph Hellwig xfs_destroy_ioend(ioend); 192c59d87c4SChristoph Hellwig } 193c59d87c4SChristoph Hellwig } 194c59d87c4SChristoph Hellwig 195c59d87c4SChristoph Hellwig /* 196c59d87c4SChristoph Hellwig * IO write completion. 197c59d87c4SChristoph Hellwig */ 198c59d87c4SChristoph Hellwig STATIC void 199c59d87c4SChristoph Hellwig xfs_end_io( 200c59d87c4SChristoph Hellwig struct work_struct *work) 201c59d87c4SChristoph Hellwig { 202c59d87c4SChristoph Hellwig xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); 203c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 204c59d87c4SChristoph Hellwig int error = 0; 205c59d87c4SChristoph Hellwig 20604f658eeSChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 207810627d9SChristoph Hellwig ioend->io_error = -EIO; 20804f658eeSChristoph Hellwig goto done; 20904f658eeSChristoph Hellwig } 21004f658eeSChristoph Hellwig if (ioend->io_error) 21104f658eeSChristoph Hellwig goto done; 21204f658eeSChristoph Hellwig 213c59d87c4SChristoph Hellwig /* 214c59d87c4SChristoph Hellwig * For unwritten extents we need to issue transactions to convert a 215c59d87c4SChristoph Hellwig * range to normal written extens after the data I/O has finished. 216c59d87c4SChristoph Hellwig */ 2170d882a36SAlain Renaud if (ioend->io_type == XFS_IO_UNWRITTEN) { 218c59d87c4SChristoph Hellwig error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 219c59d87c4SChristoph Hellwig ioend->io_size); 220437a255aSDave Chinner } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) { 221437a255aSDave Chinner /* 222437a255aSDave Chinner * For direct I/O we do not know if we need to allocate blocks 223437a255aSDave Chinner * or not so we can't preallocate an append transaction as that 224437a255aSDave Chinner * results in nested reservations and log space deadlocks. Hence 225437a255aSDave Chinner * allocate the transaction here. While this is sub-optimal and 226437a255aSDave Chinner * can block IO completion for some time, we're stuck with doing 227437a255aSDave Chinner * it this way until we can pass the ioend to the direct IO 228437a255aSDave Chinner * allocation callbacks and avoid nesting that way. 229437a255aSDave Chinner */ 230437a255aSDave Chinner error = xfs_setfilesize_trans_alloc(ioend); 231437a255aSDave Chinner if (error) 23204f658eeSChristoph Hellwig goto done; 233437a255aSDave Chinner error = xfs_setfilesize(ioend); 234281627dfSChristoph Hellwig } else if (ioend->io_append_trans) { 235281627dfSChristoph Hellwig error = xfs_setfilesize(ioend); 23684803fb7SChristoph Hellwig } else { 237281627dfSChristoph Hellwig ASSERT(!xfs_ioend_is_append(ioend)); 23884803fb7SChristoph Hellwig } 23984803fb7SChristoph Hellwig 24004f658eeSChristoph Hellwig done: 241437a255aSDave Chinner if (error) 242437a255aSDave Chinner ioend->io_error = -error; 243c59d87c4SChristoph Hellwig xfs_destroy_ioend(ioend); 244c59d87c4SChristoph Hellwig } 245c59d87c4SChristoph Hellwig 246c59d87c4SChristoph Hellwig /* 247c59d87c4SChristoph Hellwig * Call IO completion handling in caller context on the final put of an ioend. 248c59d87c4SChristoph Hellwig */ 249c59d87c4SChristoph Hellwig STATIC void 250c59d87c4SChristoph Hellwig xfs_finish_ioend_sync( 251c59d87c4SChristoph Hellwig struct xfs_ioend *ioend) 252c59d87c4SChristoph Hellwig { 253c59d87c4SChristoph Hellwig if (atomic_dec_and_test(&ioend->io_remaining)) 254c59d87c4SChristoph Hellwig xfs_end_io(&ioend->io_work); 255c59d87c4SChristoph Hellwig } 256c59d87c4SChristoph Hellwig 257c59d87c4SChristoph Hellwig /* 258c59d87c4SChristoph Hellwig * Allocate and initialise an IO completion structure. 259c59d87c4SChristoph Hellwig * We need to track unwritten extent write completion here initially. 260c59d87c4SChristoph Hellwig * We'll need to extend this for updating the ondisk inode size later 261c59d87c4SChristoph Hellwig * (vs. incore size). 262c59d87c4SChristoph Hellwig */ 263c59d87c4SChristoph Hellwig STATIC xfs_ioend_t * 264c59d87c4SChristoph Hellwig xfs_alloc_ioend( 265c59d87c4SChristoph Hellwig struct inode *inode, 266c59d87c4SChristoph Hellwig unsigned int type) 267c59d87c4SChristoph Hellwig { 268c59d87c4SChristoph Hellwig xfs_ioend_t *ioend; 269c59d87c4SChristoph Hellwig 270c59d87c4SChristoph Hellwig ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); 271c59d87c4SChristoph Hellwig 272c59d87c4SChristoph Hellwig /* 273c59d87c4SChristoph Hellwig * Set the count to 1 initially, which will prevent an I/O 274c59d87c4SChristoph Hellwig * completion callback from happening before we have started 275c59d87c4SChristoph Hellwig * all the I/O from calling the completion routine too early. 276c59d87c4SChristoph Hellwig */ 277c59d87c4SChristoph Hellwig atomic_set(&ioend->io_remaining, 1); 278281627dfSChristoph Hellwig ioend->io_isdirect = 0; 279c59d87c4SChristoph Hellwig ioend->io_error = 0; 280c59d87c4SChristoph Hellwig ioend->io_list = NULL; 281c59d87c4SChristoph Hellwig ioend->io_type = type; 282c59d87c4SChristoph Hellwig ioend->io_inode = inode; 283c59d87c4SChristoph Hellwig ioend->io_buffer_head = NULL; 284c59d87c4SChristoph Hellwig ioend->io_buffer_tail = NULL; 285c59d87c4SChristoph Hellwig ioend->io_offset = 0; 286c59d87c4SChristoph Hellwig ioend->io_size = 0; 287281627dfSChristoph Hellwig ioend->io_append_trans = NULL; 288c59d87c4SChristoph Hellwig 289c59d87c4SChristoph Hellwig INIT_WORK(&ioend->io_work, xfs_end_io); 290c59d87c4SChristoph Hellwig return ioend; 291c59d87c4SChristoph Hellwig } 292c59d87c4SChristoph Hellwig 293c59d87c4SChristoph Hellwig STATIC int 294c59d87c4SChristoph Hellwig xfs_map_blocks( 295c59d87c4SChristoph Hellwig struct inode *inode, 296c59d87c4SChristoph Hellwig loff_t offset, 297c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 298c59d87c4SChristoph Hellwig int type, 299c59d87c4SChristoph Hellwig int nonblocking) 300c59d87c4SChristoph Hellwig { 301c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 302c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 303c59d87c4SChristoph Hellwig ssize_t count = 1 << inode->i_blkbits; 304c59d87c4SChristoph Hellwig xfs_fileoff_t offset_fsb, end_fsb; 305c59d87c4SChristoph Hellwig int error = 0; 306c59d87c4SChristoph Hellwig int bmapi_flags = XFS_BMAPI_ENTIRE; 307c59d87c4SChristoph Hellwig int nimaps = 1; 308c59d87c4SChristoph Hellwig 309c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 310c59d87c4SChristoph Hellwig return -XFS_ERROR(EIO); 311c59d87c4SChristoph Hellwig 3120d882a36SAlain Renaud if (type == XFS_IO_UNWRITTEN) 313c59d87c4SChristoph Hellwig bmapi_flags |= XFS_BMAPI_IGSTATE; 314c59d87c4SChristoph Hellwig 315c59d87c4SChristoph Hellwig if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 316c59d87c4SChristoph Hellwig if (nonblocking) 317c59d87c4SChristoph Hellwig return -XFS_ERROR(EAGAIN); 318c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 319c59d87c4SChristoph Hellwig } 320c59d87c4SChristoph Hellwig 321c59d87c4SChristoph Hellwig ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 322c59d87c4SChristoph Hellwig (ip->i_df.if_flags & XFS_IFEXTENTS)); 323d2c28191SDave Chinner ASSERT(offset <= mp->m_super->s_maxbytes); 324c59d87c4SChristoph Hellwig 325d2c28191SDave Chinner if (offset + count > mp->m_super->s_maxbytes) 326d2c28191SDave Chinner count = mp->m_super->s_maxbytes - offset; 327c59d87c4SChristoph Hellwig end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 328c59d87c4SChristoph Hellwig offset_fsb = XFS_B_TO_FSBT(mp, offset); 3295c8ed202SDave Chinner error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 3305c8ed202SDave Chinner imap, &nimaps, bmapi_flags); 331c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 332c59d87c4SChristoph Hellwig 333c59d87c4SChristoph Hellwig if (error) 334c59d87c4SChristoph Hellwig return -XFS_ERROR(error); 335c59d87c4SChristoph Hellwig 3360d882a36SAlain Renaud if (type == XFS_IO_DELALLOC && 337c59d87c4SChristoph Hellwig (!nimaps || isnullstartblock(imap->br_startblock))) { 3380799a3e8SJie Liu error = xfs_iomap_write_allocate(ip, offset, imap); 339c59d87c4SChristoph Hellwig if (!error) 340c59d87c4SChristoph Hellwig trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); 341c59d87c4SChristoph Hellwig return -XFS_ERROR(error); 342c59d87c4SChristoph Hellwig } 343c59d87c4SChristoph Hellwig 344c59d87c4SChristoph Hellwig #ifdef DEBUG 3450d882a36SAlain Renaud if (type == XFS_IO_UNWRITTEN) { 346c59d87c4SChristoph Hellwig ASSERT(nimaps); 347c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 348c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 349c59d87c4SChristoph Hellwig } 350c59d87c4SChristoph Hellwig #endif 351c59d87c4SChristoph Hellwig if (nimaps) 352c59d87c4SChristoph Hellwig trace_xfs_map_blocks_found(ip, offset, count, type, imap); 353c59d87c4SChristoph Hellwig return 0; 354c59d87c4SChristoph Hellwig } 355c59d87c4SChristoph Hellwig 356c59d87c4SChristoph Hellwig STATIC int 357c59d87c4SChristoph Hellwig xfs_imap_valid( 358c59d87c4SChristoph Hellwig struct inode *inode, 359c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 360c59d87c4SChristoph Hellwig xfs_off_t offset) 361c59d87c4SChristoph Hellwig { 362c59d87c4SChristoph Hellwig offset >>= inode->i_blkbits; 363c59d87c4SChristoph Hellwig 364c59d87c4SChristoph Hellwig return offset >= imap->br_startoff && 365c59d87c4SChristoph Hellwig offset < imap->br_startoff + imap->br_blockcount; 366c59d87c4SChristoph Hellwig } 367c59d87c4SChristoph Hellwig 368c59d87c4SChristoph Hellwig /* 369c59d87c4SChristoph Hellwig * BIO completion handler for buffered IO. 370c59d87c4SChristoph Hellwig */ 371c59d87c4SChristoph Hellwig STATIC void 372c59d87c4SChristoph Hellwig xfs_end_bio( 373c59d87c4SChristoph Hellwig struct bio *bio, 374c59d87c4SChristoph Hellwig int error) 375c59d87c4SChristoph Hellwig { 376c59d87c4SChristoph Hellwig xfs_ioend_t *ioend = bio->bi_private; 377c59d87c4SChristoph Hellwig 378c59d87c4SChristoph Hellwig ASSERT(atomic_read(&bio->bi_cnt) >= 1); 379c59d87c4SChristoph Hellwig ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; 380c59d87c4SChristoph Hellwig 381c59d87c4SChristoph Hellwig /* Toss bio and pass work off to an xfsdatad thread */ 382c59d87c4SChristoph Hellwig bio->bi_private = NULL; 383c59d87c4SChristoph Hellwig bio->bi_end_io = NULL; 384c59d87c4SChristoph Hellwig bio_put(bio); 385c59d87c4SChristoph Hellwig 386c59d87c4SChristoph Hellwig xfs_finish_ioend(ioend); 387c59d87c4SChristoph Hellwig } 388c59d87c4SChristoph Hellwig 389c59d87c4SChristoph Hellwig STATIC void 390c59d87c4SChristoph Hellwig xfs_submit_ioend_bio( 391c59d87c4SChristoph Hellwig struct writeback_control *wbc, 392c59d87c4SChristoph Hellwig xfs_ioend_t *ioend, 393c59d87c4SChristoph Hellwig struct bio *bio) 394c59d87c4SChristoph Hellwig { 395c59d87c4SChristoph Hellwig atomic_inc(&ioend->io_remaining); 396c59d87c4SChristoph Hellwig bio->bi_private = ioend; 397c59d87c4SChristoph Hellwig bio->bi_end_io = xfs_end_bio; 398c59d87c4SChristoph Hellwig submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); 399c59d87c4SChristoph Hellwig } 400c59d87c4SChristoph Hellwig 401c59d87c4SChristoph Hellwig STATIC struct bio * 402c59d87c4SChristoph Hellwig xfs_alloc_ioend_bio( 403c59d87c4SChristoph Hellwig struct buffer_head *bh) 404c59d87c4SChristoph Hellwig { 405c59d87c4SChristoph Hellwig int nvecs = bio_get_nr_vecs(bh->b_bdev); 406c59d87c4SChristoph Hellwig struct bio *bio = bio_alloc(GFP_NOIO, nvecs); 407c59d87c4SChristoph Hellwig 408c59d87c4SChristoph Hellwig ASSERT(bio->bi_private == NULL); 409c59d87c4SChristoph Hellwig bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 410c59d87c4SChristoph Hellwig bio->bi_bdev = bh->b_bdev; 411c59d87c4SChristoph Hellwig return bio; 412c59d87c4SChristoph Hellwig } 413c59d87c4SChristoph Hellwig 414c59d87c4SChristoph Hellwig STATIC void 415c59d87c4SChristoph Hellwig xfs_start_buffer_writeback( 416c59d87c4SChristoph Hellwig struct buffer_head *bh) 417c59d87c4SChristoph Hellwig { 418c59d87c4SChristoph Hellwig ASSERT(buffer_mapped(bh)); 419c59d87c4SChristoph Hellwig ASSERT(buffer_locked(bh)); 420c59d87c4SChristoph Hellwig ASSERT(!buffer_delay(bh)); 421c59d87c4SChristoph Hellwig ASSERT(!buffer_unwritten(bh)); 422c59d87c4SChristoph Hellwig 423c59d87c4SChristoph Hellwig mark_buffer_async_write(bh); 424c59d87c4SChristoph Hellwig set_buffer_uptodate(bh); 425c59d87c4SChristoph Hellwig clear_buffer_dirty(bh); 426c59d87c4SChristoph Hellwig } 427c59d87c4SChristoph Hellwig 428c59d87c4SChristoph Hellwig STATIC void 429c59d87c4SChristoph Hellwig xfs_start_page_writeback( 430c59d87c4SChristoph Hellwig struct page *page, 431c59d87c4SChristoph Hellwig int clear_dirty, 432c59d87c4SChristoph Hellwig int buffers) 433c59d87c4SChristoph Hellwig { 434c59d87c4SChristoph Hellwig ASSERT(PageLocked(page)); 435c59d87c4SChristoph Hellwig ASSERT(!PageWriteback(page)); 436c59d87c4SChristoph Hellwig if (clear_dirty) 437c59d87c4SChristoph Hellwig clear_page_dirty_for_io(page); 438c59d87c4SChristoph Hellwig set_page_writeback(page); 439c59d87c4SChristoph Hellwig unlock_page(page); 440c59d87c4SChristoph Hellwig /* If no buffers on the page are to be written, finish it here */ 441c59d87c4SChristoph Hellwig if (!buffers) 442c59d87c4SChristoph Hellwig end_page_writeback(page); 443c59d87c4SChristoph Hellwig } 444c59d87c4SChristoph Hellwig 445c7c1a7d8SZhi Yong Wu static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh) 446c59d87c4SChristoph Hellwig { 447c59d87c4SChristoph Hellwig return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 448c59d87c4SChristoph Hellwig } 449c59d87c4SChristoph Hellwig 450c59d87c4SChristoph Hellwig /* 451c59d87c4SChristoph Hellwig * Submit all of the bios for all of the ioends we have saved up, covering the 452c59d87c4SChristoph Hellwig * initial writepage page and also any probed pages. 453c59d87c4SChristoph Hellwig * 454c59d87c4SChristoph Hellwig * Because we may have multiple ioends spanning a page, we need to start 455c59d87c4SChristoph Hellwig * writeback on all the buffers before we submit them for I/O. If we mark the 456c59d87c4SChristoph Hellwig * buffers as we got, then we can end up with a page that only has buffers 457c59d87c4SChristoph Hellwig * marked async write and I/O complete on can occur before we mark the other 458c59d87c4SChristoph Hellwig * buffers async write. 459c59d87c4SChristoph Hellwig * 460c59d87c4SChristoph Hellwig * The end result of this is that we trip a bug in end_page_writeback() because 461c59d87c4SChristoph Hellwig * we call it twice for the one page as the code in end_buffer_async_write() 462c59d87c4SChristoph Hellwig * assumes that all buffers on the page are started at the same time. 463c59d87c4SChristoph Hellwig * 464c59d87c4SChristoph Hellwig * The fix is two passes across the ioend list - one to start writeback on the 465c59d87c4SChristoph Hellwig * buffer_heads, and then submit them for I/O on the second pass. 4667bf7f352SDave Chinner * 4677bf7f352SDave Chinner * If @fail is non-zero, it means that we have a situation where some part of 4687bf7f352SDave Chinner * the submission process has failed after we have marked paged for writeback 4697bf7f352SDave Chinner * and unlocked them. In this situation, we need to fail the ioend chain rather 4707bf7f352SDave Chinner * than submit it to IO. This typically only happens on a filesystem shutdown. 471c59d87c4SChristoph Hellwig */ 472c59d87c4SChristoph Hellwig STATIC void 473c59d87c4SChristoph Hellwig xfs_submit_ioend( 474c59d87c4SChristoph Hellwig struct writeback_control *wbc, 4757bf7f352SDave Chinner xfs_ioend_t *ioend, 4767bf7f352SDave Chinner int fail) 477c59d87c4SChristoph Hellwig { 478c59d87c4SChristoph Hellwig xfs_ioend_t *head = ioend; 479c59d87c4SChristoph Hellwig xfs_ioend_t *next; 480c59d87c4SChristoph Hellwig struct buffer_head *bh; 481c59d87c4SChristoph Hellwig struct bio *bio; 482c59d87c4SChristoph Hellwig sector_t lastblock = 0; 483c59d87c4SChristoph Hellwig 484c59d87c4SChristoph Hellwig /* Pass 1 - start writeback */ 485c59d87c4SChristoph Hellwig do { 486c59d87c4SChristoph Hellwig next = ioend->io_list; 487c59d87c4SChristoph Hellwig for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) 488c59d87c4SChristoph Hellwig xfs_start_buffer_writeback(bh); 489c59d87c4SChristoph Hellwig } while ((ioend = next) != NULL); 490c59d87c4SChristoph Hellwig 491c59d87c4SChristoph Hellwig /* Pass 2 - submit I/O */ 492c59d87c4SChristoph Hellwig ioend = head; 493c59d87c4SChristoph Hellwig do { 494c59d87c4SChristoph Hellwig next = ioend->io_list; 495c59d87c4SChristoph Hellwig bio = NULL; 496c59d87c4SChristoph Hellwig 4977bf7f352SDave Chinner /* 4987bf7f352SDave Chinner * If we are failing the IO now, just mark the ioend with an 4997bf7f352SDave Chinner * error and finish it. This will run IO completion immediately 5007bf7f352SDave Chinner * as there is only one reference to the ioend at this point in 5017bf7f352SDave Chinner * time. 5027bf7f352SDave Chinner */ 5037bf7f352SDave Chinner if (fail) { 5047bf7f352SDave Chinner ioend->io_error = -fail; 5057bf7f352SDave Chinner xfs_finish_ioend(ioend); 5067bf7f352SDave Chinner continue; 5077bf7f352SDave Chinner } 5087bf7f352SDave Chinner 509c59d87c4SChristoph Hellwig for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 510c59d87c4SChristoph Hellwig 511c59d87c4SChristoph Hellwig if (!bio) { 512c59d87c4SChristoph Hellwig retry: 513c59d87c4SChristoph Hellwig bio = xfs_alloc_ioend_bio(bh); 514c59d87c4SChristoph Hellwig } else if (bh->b_blocknr != lastblock + 1) { 515c59d87c4SChristoph Hellwig xfs_submit_ioend_bio(wbc, ioend, bio); 516c59d87c4SChristoph Hellwig goto retry; 517c59d87c4SChristoph Hellwig } 518c59d87c4SChristoph Hellwig 519c7c1a7d8SZhi Yong Wu if (xfs_bio_add_buffer(bio, bh) != bh->b_size) { 520c59d87c4SChristoph Hellwig xfs_submit_ioend_bio(wbc, ioend, bio); 521c59d87c4SChristoph Hellwig goto retry; 522c59d87c4SChristoph Hellwig } 523c59d87c4SChristoph Hellwig 524c59d87c4SChristoph Hellwig lastblock = bh->b_blocknr; 525c59d87c4SChristoph Hellwig } 526c59d87c4SChristoph Hellwig if (bio) 527c59d87c4SChristoph Hellwig xfs_submit_ioend_bio(wbc, ioend, bio); 528c59d87c4SChristoph Hellwig xfs_finish_ioend(ioend); 529c59d87c4SChristoph Hellwig } while ((ioend = next) != NULL); 530c59d87c4SChristoph Hellwig } 531c59d87c4SChristoph Hellwig 532c59d87c4SChristoph Hellwig /* 533c59d87c4SChristoph Hellwig * Cancel submission of all buffer_heads so far in this endio. 534c59d87c4SChristoph Hellwig * Toss the endio too. Only ever called for the initial page 535c59d87c4SChristoph Hellwig * in a writepage request, so only ever one page. 536c59d87c4SChristoph Hellwig */ 537c59d87c4SChristoph Hellwig STATIC void 538c59d87c4SChristoph Hellwig xfs_cancel_ioend( 539c59d87c4SChristoph Hellwig xfs_ioend_t *ioend) 540c59d87c4SChristoph Hellwig { 541c59d87c4SChristoph Hellwig xfs_ioend_t *next; 542c59d87c4SChristoph Hellwig struct buffer_head *bh, *next_bh; 543c59d87c4SChristoph Hellwig 544c59d87c4SChristoph Hellwig do { 545c59d87c4SChristoph Hellwig next = ioend->io_list; 546c59d87c4SChristoph Hellwig bh = ioend->io_buffer_head; 547c59d87c4SChristoph Hellwig do { 548c59d87c4SChristoph Hellwig next_bh = bh->b_private; 549c59d87c4SChristoph Hellwig clear_buffer_async_write(bh); 550c59d87c4SChristoph Hellwig unlock_buffer(bh); 551c59d87c4SChristoph Hellwig } while ((bh = next_bh) != NULL); 552c59d87c4SChristoph Hellwig 553c59d87c4SChristoph Hellwig mempool_free(ioend, xfs_ioend_pool); 554c59d87c4SChristoph Hellwig } while ((ioend = next) != NULL); 555c59d87c4SChristoph Hellwig } 556c59d87c4SChristoph Hellwig 557c59d87c4SChristoph Hellwig /* 558c59d87c4SChristoph Hellwig * Test to see if we've been building up a completion structure for 559c59d87c4SChristoph Hellwig * earlier buffers -- if so, we try to append to this ioend if we 560c59d87c4SChristoph Hellwig * can, otherwise we finish off any current ioend and start another. 561c59d87c4SChristoph Hellwig * Return true if we've finished the given ioend. 562c59d87c4SChristoph Hellwig */ 563c59d87c4SChristoph Hellwig STATIC void 564c59d87c4SChristoph Hellwig xfs_add_to_ioend( 565c59d87c4SChristoph Hellwig struct inode *inode, 566c59d87c4SChristoph Hellwig struct buffer_head *bh, 567c59d87c4SChristoph Hellwig xfs_off_t offset, 568c59d87c4SChristoph Hellwig unsigned int type, 569c59d87c4SChristoph Hellwig xfs_ioend_t **result, 570c59d87c4SChristoph Hellwig int need_ioend) 571c59d87c4SChristoph Hellwig { 572c59d87c4SChristoph Hellwig xfs_ioend_t *ioend = *result; 573c59d87c4SChristoph Hellwig 574c59d87c4SChristoph Hellwig if (!ioend || need_ioend || type != ioend->io_type) { 575c59d87c4SChristoph Hellwig xfs_ioend_t *previous = *result; 576c59d87c4SChristoph Hellwig 577c59d87c4SChristoph Hellwig ioend = xfs_alloc_ioend(inode, type); 578c59d87c4SChristoph Hellwig ioend->io_offset = offset; 579c59d87c4SChristoph Hellwig ioend->io_buffer_head = bh; 580c59d87c4SChristoph Hellwig ioend->io_buffer_tail = bh; 581c59d87c4SChristoph Hellwig if (previous) 582c59d87c4SChristoph Hellwig previous->io_list = ioend; 583c59d87c4SChristoph Hellwig *result = ioend; 584c59d87c4SChristoph Hellwig } else { 585c59d87c4SChristoph Hellwig ioend->io_buffer_tail->b_private = bh; 586c59d87c4SChristoph Hellwig ioend->io_buffer_tail = bh; 587c59d87c4SChristoph Hellwig } 588c59d87c4SChristoph Hellwig 589c59d87c4SChristoph Hellwig bh->b_private = NULL; 590c59d87c4SChristoph Hellwig ioend->io_size += bh->b_size; 591c59d87c4SChristoph Hellwig } 592c59d87c4SChristoph Hellwig 593c59d87c4SChristoph Hellwig STATIC void 594c59d87c4SChristoph Hellwig xfs_map_buffer( 595c59d87c4SChristoph Hellwig struct inode *inode, 596c59d87c4SChristoph Hellwig struct buffer_head *bh, 597c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 598c59d87c4SChristoph Hellwig xfs_off_t offset) 599c59d87c4SChristoph Hellwig { 600c59d87c4SChristoph Hellwig sector_t bn; 601c59d87c4SChristoph Hellwig struct xfs_mount *m = XFS_I(inode)->i_mount; 602c59d87c4SChristoph Hellwig xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); 603c59d87c4SChristoph Hellwig xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); 604c59d87c4SChristoph Hellwig 605c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 606c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 607c59d87c4SChristoph Hellwig 608c59d87c4SChristoph Hellwig bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + 609c59d87c4SChristoph Hellwig ((offset - iomap_offset) >> inode->i_blkbits); 610c59d87c4SChristoph Hellwig 611c59d87c4SChristoph Hellwig ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); 612c59d87c4SChristoph Hellwig 613c59d87c4SChristoph Hellwig bh->b_blocknr = bn; 614c59d87c4SChristoph Hellwig set_buffer_mapped(bh); 615c59d87c4SChristoph Hellwig } 616c59d87c4SChristoph Hellwig 617c59d87c4SChristoph Hellwig STATIC void 618c59d87c4SChristoph Hellwig xfs_map_at_offset( 619c59d87c4SChristoph Hellwig struct inode *inode, 620c59d87c4SChristoph Hellwig struct buffer_head *bh, 621c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 622c59d87c4SChristoph Hellwig xfs_off_t offset) 623c59d87c4SChristoph Hellwig { 624c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 625c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 626c59d87c4SChristoph Hellwig 627c59d87c4SChristoph Hellwig xfs_map_buffer(inode, bh, imap, offset); 628c59d87c4SChristoph Hellwig set_buffer_mapped(bh); 629c59d87c4SChristoph Hellwig clear_buffer_delay(bh); 630c59d87c4SChristoph Hellwig clear_buffer_unwritten(bh); 631c59d87c4SChristoph Hellwig } 632c59d87c4SChristoph Hellwig 633c59d87c4SChristoph Hellwig /* 634c59d87c4SChristoph Hellwig * Test if a given page is suitable for writing as part of an unwritten 635c59d87c4SChristoph Hellwig * or delayed allocate extent. 636c59d87c4SChristoph Hellwig */ 637c59d87c4SChristoph Hellwig STATIC int 6386ffc4db5SDave Chinner xfs_check_page_type( 639c59d87c4SChristoph Hellwig struct page *page, 640c59d87c4SChristoph Hellwig unsigned int type) 641c59d87c4SChristoph Hellwig { 642c59d87c4SChristoph Hellwig if (PageWriteback(page)) 643c59d87c4SChristoph Hellwig return 0; 644c59d87c4SChristoph Hellwig 645c59d87c4SChristoph Hellwig if (page->mapping && page_has_buffers(page)) { 646c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 647c59d87c4SChristoph Hellwig int acceptable = 0; 648c59d87c4SChristoph Hellwig 649c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 650c59d87c4SChristoph Hellwig do { 651c59d87c4SChristoph Hellwig if (buffer_unwritten(bh)) 6520d882a36SAlain Renaud acceptable += (type == XFS_IO_UNWRITTEN); 653c59d87c4SChristoph Hellwig else if (buffer_delay(bh)) 6540d882a36SAlain Renaud acceptable += (type == XFS_IO_DELALLOC); 655c59d87c4SChristoph Hellwig else if (buffer_dirty(bh) && buffer_mapped(bh)) 6560d882a36SAlain Renaud acceptable += (type == XFS_IO_OVERWRITE); 657c59d87c4SChristoph Hellwig else 658c59d87c4SChristoph Hellwig break; 659c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 660c59d87c4SChristoph Hellwig 661c59d87c4SChristoph Hellwig if (acceptable) 662c59d87c4SChristoph Hellwig return 1; 663c59d87c4SChristoph Hellwig } 664c59d87c4SChristoph Hellwig 665c59d87c4SChristoph Hellwig return 0; 666c59d87c4SChristoph Hellwig } 667c59d87c4SChristoph Hellwig 668c59d87c4SChristoph Hellwig /* 669c59d87c4SChristoph Hellwig * Allocate & map buffers for page given the extent map. Write it out. 670c59d87c4SChristoph Hellwig * except for the original page of a writepage, this is called on 671c59d87c4SChristoph Hellwig * delalloc/unwritten pages only, for the original page it is possible 672c59d87c4SChristoph Hellwig * that the page has no mapping at all. 673c59d87c4SChristoph Hellwig */ 674c59d87c4SChristoph Hellwig STATIC int 675c59d87c4SChristoph Hellwig xfs_convert_page( 676c59d87c4SChristoph Hellwig struct inode *inode, 677c59d87c4SChristoph Hellwig struct page *page, 678c59d87c4SChristoph Hellwig loff_t tindex, 679c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 680c59d87c4SChristoph Hellwig xfs_ioend_t **ioendp, 681c59d87c4SChristoph Hellwig struct writeback_control *wbc) 682c59d87c4SChristoph Hellwig { 683c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 684c59d87c4SChristoph Hellwig xfs_off_t end_offset; 685c59d87c4SChristoph Hellwig unsigned long p_offset; 686c59d87c4SChristoph Hellwig unsigned int type; 687c59d87c4SChristoph Hellwig int len, page_dirty; 688c59d87c4SChristoph Hellwig int count = 0, done = 0, uptodate = 1; 689c59d87c4SChristoph Hellwig xfs_off_t offset = page_offset(page); 690c59d87c4SChristoph Hellwig 691c59d87c4SChristoph Hellwig if (page->index != tindex) 692c59d87c4SChristoph Hellwig goto fail; 693c59d87c4SChristoph Hellwig if (!trylock_page(page)) 694c59d87c4SChristoph Hellwig goto fail; 695c59d87c4SChristoph Hellwig if (PageWriteback(page)) 696c59d87c4SChristoph Hellwig goto fail_unlock_page; 697c59d87c4SChristoph Hellwig if (page->mapping != inode->i_mapping) 698c59d87c4SChristoph Hellwig goto fail_unlock_page; 6996ffc4db5SDave Chinner if (!xfs_check_page_type(page, (*ioendp)->io_type)) 700c59d87c4SChristoph Hellwig goto fail_unlock_page; 701c59d87c4SChristoph Hellwig 702c59d87c4SChristoph Hellwig /* 703c59d87c4SChristoph Hellwig * page_dirty is initially a count of buffers on the page before 704c59d87c4SChristoph Hellwig * EOF and is decremented as we move each into a cleanable state. 705c59d87c4SChristoph Hellwig * 706c59d87c4SChristoph Hellwig * Derivation: 707c59d87c4SChristoph Hellwig * 708c59d87c4SChristoph Hellwig * End offset is the highest offset that this page should represent. 709c59d87c4SChristoph Hellwig * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) 710c59d87c4SChristoph Hellwig * will evaluate non-zero and be less than PAGE_CACHE_SIZE and 711c59d87c4SChristoph Hellwig * hence give us the correct page_dirty count. On any other page, 712c59d87c4SChristoph Hellwig * it will be zero and in that case we need page_dirty to be the 713c59d87c4SChristoph Hellwig * count of buffers on the page. 714c59d87c4SChristoph Hellwig */ 715c59d87c4SChristoph Hellwig end_offset = min_t(unsigned long long, 716c59d87c4SChristoph Hellwig (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, 717c59d87c4SChristoph Hellwig i_size_read(inode)); 718c59d87c4SChristoph Hellwig 719480d7467SDave Chinner /* 720480d7467SDave Chinner * If the current map does not span the entire page we are about to try 721480d7467SDave Chinner * to write, then give up. The only way we can write a page that spans 722480d7467SDave Chinner * multiple mappings in a single writeback iteration is via the 723480d7467SDave Chinner * xfs_vm_writepage() function. Data integrity writeback requires the 724480d7467SDave Chinner * entire page to be written in a single attempt, otherwise the part of 725480d7467SDave Chinner * the page we don't write here doesn't get written as part of the data 726480d7467SDave Chinner * integrity sync. 727480d7467SDave Chinner * 728480d7467SDave Chinner * For normal writeback, we also don't attempt to write partial pages 729480d7467SDave Chinner * here as it simply means that write_cache_pages() will see it under 730480d7467SDave Chinner * writeback and ignore the page until some point in the future, at 731480d7467SDave Chinner * which time this will be the only page in the file that needs 732480d7467SDave Chinner * writeback. Hence for more optimal IO patterns, we should always 733480d7467SDave Chinner * avoid partial page writeback due to multiple mappings on a page here. 734480d7467SDave Chinner */ 735480d7467SDave Chinner if (!xfs_imap_valid(inode, imap, end_offset)) 736480d7467SDave Chinner goto fail_unlock_page; 737480d7467SDave Chinner 738c59d87c4SChristoph Hellwig len = 1 << inode->i_blkbits; 739c59d87c4SChristoph Hellwig p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), 740c59d87c4SChristoph Hellwig PAGE_CACHE_SIZE); 741c59d87c4SChristoph Hellwig p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 742c59d87c4SChristoph Hellwig page_dirty = p_offset / len; 743c59d87c4SChristoph Hellwig 744c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 745c59d87c4SChristoph Hellwig do { 746c59d87c4SChristoph Hellwig if (offset >= end_offset) 747c59d87c4SChristoph Hellwig break; 748c59d87c4SChristoph Hellwig if (!buffer_uptodate(bh)) 749c59d87c4SChristoph Hellwig uptodate = 0; 750c59d87c4SChristoph Hellwig if (!(PageUptodate(page) || buffer_uptodate(bh))) { 751c59d87c4SChristoph Hellwig done = 1; 752c59d87c4SChristoph Hellwig continue; 753c59d87c4SChristoph Hellwig } 754c59d87c4SChristoph Hellwig 755c59d87c4SChristoph Hellwig if (buffer_unwritten(bh) || buffer_delay(bh) || 756c59d87c4SChristoph Hellwig buffer_mapped(bh)) { 757c59d87c4SChristoph Hellwig if (buffer_unwritten(bh)) 7580d882a36SAlain Renaud type = XFS_IO_UNWRITTEN; 759c59d87c4SChristoph Hellwig else if (buffer_delay(bh)) 7600d882a36SAlain Renaud type = XFS_IO_DELALLOC; 761c59d87c4SChristoph Hellwig else 7620d882a36SAlain Renaud type = XFS_IO_OVERWRITE; 763c59d87c4SChristoph Hellwig 764c59d87c4SChristoph Hellwig if (!xfs_imap_valid(inode, imap, offset)) { 765c59d87c4SChristoph Hellwig done = 1; 766c59d87c4SChristoph Hellwig continue; 767c59d87c4SChristoph Hellwig } 768c59d87c4SChristoph Hellwig 769c59d87c4SChristoph Hellwig lock_buffer(bh); 7700d882a36SAlain Renaud if (type != XFS_IO_OVERWRITE) 771c59d87c4SChristoph Hellwig xfs_map_at_offset(inode, bh, imap, offset); 772c59d87c4SChristoph Hellwig xfs_add_to_ioend(inode, bh, offset, type, 773c59d87c4SChristoph Hellwig ioendp, done); 774c59d87c4SChristoph Hellwig 775c59d87c4SChristoph Hellwig page_dirty--; 776c59d87c4SChristoph Hellwig count++; 777c59d87c4SChristoph Hellwig } else { 778c59d87c4SChristoph Hellwig done = 1; 779c59d87c4SChristoph Hellwig } 780c59d87c4SChristoph Hellwig } while (offset += len, (bh = bh->b_this_page) != head); 781c59d87c4SChristoph Hellwig 782c59d87c4SChristoph Hellwig if (uptodate && bh == head) 783c59d87c4SChristoph Hellwig SetPageUptodate(page); 784c59d87c4SChristoph Hellwig 785c59d87c4SChristoph Hellwig if (count) { 786c59d87c4SChristoph Hellwig if (--wbc->nr_to_write <= 0 && 787c59d87c4SChristoph Hellwig wbc->sync_mode == WB_SYNC_NONE) 788c59d87c4SChristoph Hellwig done = 1; 789c59d87c4SChristoph Hellwig } 790c59d87c4SChristoph Hellwig xfs_start_page_writeback(page, !page_dirty, count); 791c59d87c4SChristoph Hellwig 792c59d87c4SChristoph Hellwig return done; 793c59d87c4SChristoph Hellwig fail_unlock_page: 794c59d87c4SChristoph Hellwig unlock_page(page); 795c59d87c4SChristoph Hellwig fail: 796c59d87c4SChristoph Hellwig return 1; 797c59d87c4SChristoph Hellwig } 798c59d87c4SChristoph Hellwig 799c59d87c4SChristoph Hellwig /* 800c59d87c4SChristoph Hellwig * Convert & write out a cluster of pages in the same extent as defined 801c59d87c4SChristoph Hellwig * by mp and following the start page. 802c59d87c4SChristoph Hellwig */ 803c59d87c4SChristoph Hellwig STATIC void 804c59d87c4SChristoph Hellwig xfs_cluster_write( 805c59d87c4SChristoph Hellwig struct inode *inode, 806c59d87c4SChristoph Hellwig pgoff_t tindex, 807c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 808c59d87c4SChristoph Hellwig xfs_ioend_t **ioendp, 809c59d87c4SChristoph Hellwig struct writeback_control *wbc, 810c59d87c4SChristoph Hellwig pgoff_t tlast) 811c59d87c4SChristoph Hellwig { 812c59d87c4SChristoph Hellwig struct pagevec pvec; 813c59d87c4SChristoph Hellwig int done = 0, i; 814c59d87c4SChristoph Hellwig 815c59d87c4SChristoph Hellwig pagevec_init(&pvec, 0); 816c59d87c4SChristoph Hellwig while (!done && tindex <= tlast) { 817c59d87c4SChristoph Hellwig unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); 818c59d87c4SChristoph Hellwig 819c59d87c4SChristoph Hellwig if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) 820c59d87c4SChristoph Hellwig break; 821c59d87c4SChristoph Hellwig 822c59d87c4SChristoph Hellwig for (i = 0; i < pagevec_count(&pvec); i++) { 823c59d87c4SChristoph Hellwig done = xfs_convert_page(inode, pvec.pages[i], tindex++, 824c59d87c4SChristoph Hellwig imap, ioendp, wbc); 825c59d87c4SChristoph Hellwig if (done) 826c59d87c4SChristoph Hellwig break; 827c59d87c4SChristoph Hellwig } 828c59d87c4SChristoph Hellwig 829c59d87c4SChristoph Hellwig pagevec_release(&pvec); 830c59d87c4SChristoph Hellwig cond_resched(); 831c59d87c4SChristoph Hellwig } 832c59d87c4SChristoph Hellwig } 833c59d87c4SChristoph Hellwig 834c59d87c4SChristoph Hellwig STATIC void 835c59d87c4SChristoph Hellwig xfs_vm_invalidatepage( 836c59d87c4SChristoph Hellwig struct page *page, 837d47992f8SLukas Czerner unsigned int offset, 838d47992f8SLukas Czerner unsigned int length) 839c59d87c4SChristoph Hellwig { 84034097dfeSLukas Czerner trace_xfs_invalidatepage(page->mapping->host, page, offset, 84134097dfeSLukas Czerner length); 84234097dfeSLukas Czerner block_invalidatepage(page, offset, length); 843c59d87c4SChristoph Hellwig } 844c59d87c4SChristoph Hellwig 845c59d87c4SChristoph Hellwig /* 846c59d87c4SChristoph Hellwig * If the page has delalloc buffers on it, we need to punch them out before we 847c59d87c4SChristoph Hellwig * invalidate the page. If we don't, we leave a stale delalloc mapping on the 848c59d87c4SChristoph Hellwig * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read 849c59d87c4SChristoph Hellwig * is done on that same region - the delalloc extent is returned when none is 850c59d87c4SChristoph Hellwig * supposed to be there. 851c59d87c4SChristoph Hellwig * 852c59d87c4SChristoph Hellwig * We prevent this by truncating away the delalloc regions on the page before 853c59d87c4SChristoph Hellwig * invalidating it. Because they are delalloc, we can do this without needing a 854c59d87c4SChristoph Hellwig * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this 855c59d87c4SChristoph Hellwig * truncation without a transaction as there is no space left for block 856c59d87c4SChristoph Hellwig * reservation (typically why we see a ENOSPC in writeback). 857c59d87c4SChristoph Hellwig * 858c59d87c4SChristoph Hellwig * This is not a performance critical path, so for now just do the punching a 859c59d87c4SChristoph Hellwig * buffer head at a time. 860c59d87c4SChristoph Hellwig */ 861c59d87c4SChristoph Hellwig STATIC void 862c59d87c4SChristoph Hellwig xfs_aops_discard_page( 863c59d87c4SChristoph Hellwig struct page *page) 864c59d87c4SChristoph Hellwig { 865c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 866c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 867c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 868c59d87c4SChristoph Hellwig loff_t offset = page_offset(page); 869c59d87c4SChristoph Hellwig 8700d882a36SAlain Renaud if (!xfs_check_page_type(page, XFS_IO_DELALLOC)) 871c59d87c4SChristoph Hellwig goto out_invalidate; 872c59d87c4SChristoph Hellwig 873c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 874c59d87c4SChristoph Hellwig goto out_invalidate; 875c59d87c4SChristoph Hellwig 876c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 877c59d87c4SChristoph Hellwig "page discard on page %p, inode 0x%llx, offset %llu.", 878c59d87c4SChristoph Hellwig page, ip->i_ino, offset); 879c59d87c4SChristoph Hellwig 880c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 881c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 882c59d87c4SChristoph Hellwig do { 883c59d87c4SChristoph Hellwig int error; 884c59d87c4SChristoph Hellwig xfs_fileoff_t start_fsb; 885c59d87c4SChristoph Hellwig 886c59d87c4SChristoph Hellwig if (!buffer_delay(bh)) 887c59d87c4SChristoph Hellwig goto next_buffer; 888c59d87c4SChristoph Hellwig 889c59d87c4SChristoph Hellwig start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 890c59d87c4SChristoph Hellwig error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); 891c59d87c4SChristoph Hellwig if (error) { 892c59d87c4SChristoph Hellwig /* something screwed, just bail */ 893c59d87c4SChristoph Hellwig if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 894c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 895c59d87c4SChristoph Hellwig "page discard unable to remove delalloc mapping."); 896c59d87c4SChristoph Hellwig } 897c59d87c4SChristoph Hellwig break; 898c59d87c4SChristoph Hellwig } 899c59d87c4SChristoph Hellwig next_buffer: 900c59d87c4SChristoph Hellwig offset += 1 << inode->i_blkbits; 901c59d87c4SChristoph Hellwig 902c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 903c59d87c4SChristoph Hellwig 904c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 905c59d87c4SChristoph Hellwig out_invalidate: 906d47992f8SLukas Czerner xfs_vm_invalidatepage(page, 0, PAGE_CACHE_SIZE); 907c59d87c4SChristoph Hellwig return; 908c59d87c4SChristoph Hellwig } 909c59d87c4SChristoph Hellwig 910c59d87c4SChristoph Hellwig /* 911c59d87c4SChristoph Hellwig * Write out a dirty page. 912c59d87c4SChristoph Hellwig * 913c59d87c4SChristoph Hellwig * For delalloc space on the page we need to allocate space and flush it. 914c59d87c4SChristoph Hellwig * For unwritten space on the page we need to start the conversion to 915c59d87c4SChristoph Hellwig * regular allocated space. 916c59d87c4SChristoph Hellwig * For any other dirty buffer heads on the page we should flush them. 917c59d87c4SChristoph Hellwig */ 918c59d87c4SChristoph Hellwig STATIC int 919c59d87c4SChristoph Hellwig xfs_vm_writepage( 920c59d87c4SChristoph Hellwig struct page *page, 921c59d87c4SChristoph Hellwig struct writeback_control *wbc) 922c59d87c4SChristoph Hellwig { 923c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 924c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 925c59d87c4SChristoph Hellwig struct xfs_bmbt_irec imap; 926c59d87c4SChristoph Hellwig xfs_ioend_t *ioend = NULL, *iohead = NULL; 927c59d87c4SChristoph Hellwig loff_t offset; 928c59d87c4SChristoph Hellwig unsigned int type; 929c59d87c4SChristoph Hellwig __uint64_t end_offset; 930c59d87c4SChristoph Hellwig pgoff_t end_index, last_index; 931c59d87c4SChristoph Hellwig ssize_t len; 932c59d87c4SChristoph Hellwig int err, imap_valid = 0, uptodate = 1; 933c59d87c4SChristoph Hellwig int count = 0; 934c59d87c4SChristoph Hellwig int nonblocking = 0; 935c59d87c4SChristoph Hellwig 93634097dfeSLukas Czerner trace_xfs_writepage(inode, page, 0, 0); 937c59d87c4SChristoph Hellwig 938c59d87c4SChristoph Hellwig ASSERT(page_has_buffers(page)); 939c59d87c4SChristoph Hellwig 940c59d87c4SChristoph Hellwig /* 941c59d87c4SChristoph Hellwig * Refuse to write the page out if we are called from reclaim context. 942c59d87c4SChristoph Hellwig * 943c59d87c4SChristoph Hellwig * This avoids stack overflows when called from deeply used stacks in 944c59d87c4SChristoph Hellwig * random callers for direct reclaim or memcg reclaim. We explicitly 945c59d87c4SChristoph Hellwig * allow reclaim from kswapd as the stack usage there is relatively low. 946c59d87c4SChristoph Hellwig * 94794054fa3SMel Gorman * This should never happen except in the case of a VM regression so 94894054fa3SMel Gorman * warn about it. 949c59d87c4SChristoph Hellwig */ 95094054fa3SMel Gorman if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == 95194054fa3SMel Gorman PF_MEMALLOC)) 952c59d87c4SChristoph Hellwig goto redirty; 953c59d87c4SChristoph Hellwig 954c59d87c4SChristoph Hellwig /* 955c59d87c4SChristoph Hellwig * Given that we do not allow direct reclaim to call us, we should 956c59d87c4SChristoph Hellwig * never be called while in a filesystem transaction. 957c59d87c4SChristoph Hellwig */ 958c59d87c4SChristoph Hellwig if (WARN_ON(current->flags & PF_FSTRANS)) 959c59d87c4SChristoph Hellwig goto redirty; 960c59d87c4SChristoph Hellwig 961c59d87c4SChristoph Hellwig /* Is this page beyond the end of the file? */ 962c59d87c4SChristoph Hellwig offset = i_size_read(inode); 963c59d87c4SChristoph Hellwig end_index = offset >> PAGE_CACHE_SHIFT; 964c59d87c4SChristoph Hellwig last_index = (offset - 1) >> PAGE_CACHE_SHIFT; 965c59d87c4SChristoph Hellwig if (page->index >= end_index) { 9666b7a03f0SChristoph Hellwig unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1); 9676b7a03f0SChristoph Hellwig 9686b7a03f0SChristoph Hellwig /* 969ff9a28f6SJan Kara * Skip the page if it is fully outside i_size, e.g. due to a 970ff9a28f6SJan Kara * truncate operation that is in progress. We must redirty the 971ff9a28f6SJan Kara * page so that reclaim stops reclaiming it. Otherwise 972ff9a28f6SJan Kara * xfs_vm_releasepage() is called on it and gets confused. 9736b7a03f0SChristoph Hellwig */ 974ff9a28f6SJan Kara if (page->index >= end_index + 1 || offset_into_page == 0) 975ff9a28f6SJan Kara goto redirty; 9766b7a03f0SChristoph Hellwig 9776b7a03f0SChristoph Hellwig /* 9786b7a03f0SChristoph Hellwig * The page straddles i_size. It must be zeroed out on each 9796b7a03f0SChristoph Hellwig * and every writepage invocation because it may be mmapped. 9806b7a03f0SChristoph Hellwig * "A file is mapped in multiples of the page size. For a file 9816b7a03f0SChristoph Hellwig * that is not a multiple of the page size, the remaining 9826b7a03f0SChristoph Hellwig * memory is zeroed when mapped, and writes to that region are 9836b7a03f0SChristoph Hellwig * not written out to the file." 9846b7a03f0SChristoph Hellwig */ 9856b7a03f0SChristoph Hellwig zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE); 986c59d87c4SChristoph Hellwig } 987c59d87c4SChristoph Hellwig 988c59d87c4SChristoph Hellwig end_offset = min_t(unsigned long long, 989c59d87c4SChristoph Hellwig (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, 990c59d87c4SChristoph Hellwig offset); 991c59d87c4SChristoph Hellwig len = 1 << inode->i_blkbits; 992c59d87c4SChristoph Hellwig 993c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 994c59d87c4SChristoph Hellwig offset = page_offset(page); 9950d882a36SAlain Renaud type = XFS_IO_OVERWRITE; 996c59d87c4SChristoph Hellwig 997c59d87c4SChristoph Hellwig if (wbc->sync_mode == WB_SYNC_NONE) 998c59d87c4SChristoph Hellwig nonblocking = 1; 999c59d87c4SChristoph Hellwig 1000c59d87c4SChristoph Hellwig do { 1001c59d87c4SChristoph Hellwig int new_ioend = 0; 1002c59d87c4SChristoph Hellwig 1003c59d87c4SChristoph Hellwig if (offset >= end_offset) 1004c59d87c4SChristoph Hellwig break; 1005c59d87c4SChristoph Hellwig if (!buffer_uptodate(bh)) 1006c59d87c4SChristoph Hellwig uptodate = 0; 1007c59d87c4SChristoph Hellwig 1008c59d87c4SChristoph Hellwig /* 1009c59d87c4SChristoph Hellwig * set_page_dirty dirties all buffers in a page, independent 1010c59d87c4SChristoph Hellwig * of their state. The dirty state however is entirely 1011c59d87c4SChristoph Hellwig * meaningless for holes (!mapped && uptodate), so skip 1012c59d87c4SChristoph Hellwig * buffers covering holes here. 1013c59d87c4SChristoph Hellwig */ 1014c59d87c4SChristoph Hellwig if (!buffer_mapped(bh) && buffer_uptodate(bh)) { 1015c59d87c4SChristoph Hellwig imap_valid = 0; 1016c59d87c4SChristoph Hellwig continue; 1017c59d87c4SChristoph Hellwig } 1018c59d87c4SChristoph Hellwig 1019c59d87c4SChristoph Hellwig if (buffer_unwritten(bh)) { 10200d882a36SAlain Renaud if (type != XFS_IO_UNWRITTEN) { 10210d882a36SAlain Renaud type = XFS_IO_UNWRITTEN; 1022c59d87c4SChristoph Hellwig imap_valid = 0; 1023c59d87c4SChristoph Hellwig } 1024c59d87c4SChristoph Hellwig } else if (buffer_delay(bh)) { 10250d882a36SAlain Renaud if (type != XFS_IO_DELALLOC) { 10260d882a36SAlain Renaud type = XFS_IO_DELALLOC; 1027c59d87c4SChristoph Hellwig imap_valid = 0; 1028c59d87c4SChristoph Hellwig } 1029c59d87c4SChristoph Hellwig } else if (buffer_uptodate(bh)) { 10300d882a36SAlain Renaud if (type != XFS_IO_OVERWRITE) { 10310d882a36SAlain Renaud type = XFS_IO_OVERWRITE; 1032c59d87c4SChristoph Hellwig imap_valid = 0; 1033c59d87c4SChristoph Hellwig } 1034c59d87c4SChristoph Hellwig } else { 10357d0fa3ecSAlain Renaud if (PageUptodate(page)) 1036c59d87c4SChristoph Hellwig ASSERT(buffer_mapped(bh)); 10377d0fa3ecSAlain Renaud /* 10387d0fa3ecSAlain Renaud * This buffer is not uptodate and will not be 10397d0fa3ecSAlain Renaud * written to disk. Ensure that we will put any 10407d0fa3ecSAlain Renaud * subsequent writeable buffers into a new 10417d0fa3ecSAlain Renaud * ioend. 10427d0fa3ecSAlain Renaud */ 1043c59d87c4SChristoph Hellwig imap_valid = 0; 1044c59d87c4SChristoph Hellwig continue; 1045c59d87c4SChristoph Hellwig } 1046c59d87c4SChristoph Hellwig 1047c59d87c4SChristoph Hellwig if (imap_valid) 1048c59d87c4SChristoph Hellwig imap_valid = xfs_imap_valid(inode, &imap, offset); 1049c59d87c4SChristoph Hellwig if (!imap_valid) { 1050c59d87c4SChristoph Hellwig /* 1051c59d87c4SChristoph Hellwig * If we didn't have a valid mapping then we need to 1052c59d87c4SChristoph Hellwig * put the new mapping into a separate ioend structure. 1053c59d87c4SChristoph Hellwig * This ensures non-contiguous extents always have 1054c59d87c4SChristoph Hellwig * separate ioends, which is particularly important 1055c59d87c4SChristoph Hellwig * for unwritten extent conversion at I/O completion 1056c59d87c4SChristoph Hellwig * time. 1057c59d87c4SChristoph Hellwig */ 1058c59d87c4SChristoph Hellwig new_ioend = 1; 1059c59d87c4SChristoph Hellwig err = xfs_map_blocks(inode, offset, &imap, type, 1060c59d87c4SChristoph Hellwig nonblocking); 1061c59d87c4SChristoph Hellwig if (err) 1062c59d87c4SChristoph Hellwig goto error; 1063c59d87c4SChristoph Hellwig imap_valid = xfs_imap_valid(inode, &imap, offset); 1064c59d87c4SChristoph Hellwig } 1065c59d87c4SChristoph Hellwig if (imap_valid) { 1066c59d87c4SChristoph Hellwig lock_buffer(bh); 10670d882a36SAlain Renaud if (type != XFS_IO_OVERWRITE) 1068c59d87c4SChristoph Hellwig xfs_map_at_offset(inode, bh, &imap, offset); 1069c59d87c4SChristoph Hellwig xfs_add_to_ioend(inode, bh, offset, type, &ioend, 1070c59d87c4SChristoph Hellwig new_ioend); 1071c59d87c4SChristoph Hellwig count++; 1072c59d87c4SChristoph Hellwig } 1073c59d87c4SChristoph Hellwig 1074c59d87c4SChristoph Hellwig if (!iohead) 1075c59d87c4SChristoph Hellwig iohead = ioend; 1076c59d87c4SChristoph Hellwig 1077c59d87c4SChristoph Hellwig } while (offset += len, ((bh = bh->b_this_page) != head)); 1078c59d87c4SChristoph Hellwig 1079c59d87c4SChristoph Hellwig if (uptodate && bh == head) 1080c59d87c4SChristoph Hellwig SetPageUptodate(page); 1081c59d87c4SChristoph Hellwig 1082c59d87c4SChristoph Hellwig xfs_start_page_writeback(page, 1, count); 1083c59d87c4SChristoph Hellwig 10847bf7f352SDave Chinner /* if there is no IO to be submitted for this page, we are done */ 10857bf7f352SDave Chinner if (!ioend) 10867bf7f352SDave Chinner return 0; 10877bf7f352SDave Chinner 10887bf7f352SDave Chinner ASSERT(iohead); 10897bf7f352SDave Chinner 10907bf7f352SDave Chinner /* 10917bf7f352SDave Chinner * Any errors from this point onwards need tobe reported through the IO 10927bf7f352SDave Chinner * completion path as we have marked the initial page as under writeback 10937bf7f352SDave Chinner * and unlocked it. 10947bf7f352SDave Chinner */ 10957bf7f352SDave Chinner if (imap_valid) { 1096c59d87c4SChristoph Hellwig xfs_off_t end_index; 1097c59d87c4SChristoph Hellwig 1098c59d87c4SChristoph Hellwig end_index = imap.br_startoff + imap.br_blockcount; 1099c59d87c4SChristoph Hellwig 1100c59d87c4SChristoph Hellwig /* to bytes */ 1101c59d87c4SChristoph Hellwig end_index <<= inode->i_blkbits; 1102c59d87c4SChristoph Hellwig 1103c59d87c4SChristoph Hellwig /* to pages */ 1104c59d87c4SChristoph Hellwig end_index = (end_index - 1) >> PAGE_CACHE_SHIFT; 1105c59d87c4SChristoph Hellwig 1106c59d87c4SChristoph Hellwig /* check against file size */ 1107c59d87c4SChristoph Hellwig if (end_index > last_index) 1108c59d87c4SChristoph Hellwig end_index = last_index; 1109c59d87c4SChristoph Hellwig 1110c59d87c4SChristoph Hellwig xfs_cluster_write(inode, page->index + 1, &imap, &ioend, 1111c59d87c4SChristoph Hellwig wbc, end_index); 1112c59d87c4SChristoph Hellwig } 1113c59d87c4SChristoph Hellwig 1114281627dfSChristoph Hellwig 11157bf7f352SDave Chinner /* 11167bf7f352SDave Chinner * Reserve log space if we might write beyond the on-disk inode size. 11177bf7f352SDave Chinner */ 11187bf7f352SDave Chinner err = 0; 11197bf7f352SDave Chinner if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend)) 11207bf7f352SDave Chinner err = xfs_setfilesize_trans_alloc(ioend); 11217bf7f352SDave Chinner 11227bf7f352SDave Chinner xfs_submit_ioend(wbc, iohead, err); 1123c59d87c4SChristoph Hellwig 1124c59d87c4SChristoph Hellwig return 0; 1125c59d87c4SChristoph Hellwig 1126c59d87c4SChristoph Hellwig error: 1127c59d87c4SChristoph Hellwig if (iohead) 1128c59d87c4SChristoph Hellwig xfs_cancel_ioend(iohead); 1129c59d87c4SChristoph Hellwig 1130c59d87c4SChristoph Hellwig if (err == -EAGAIN) 1131c59d87c4SChristoph Hellwig goto redirty; 1132c59d87c4SChristoph Hellwig 1133c59d87c4SChristoph Hellwig xfs_aops_discard_page(page); 1134c59d87c4SChristoph Hellwig ClearPageUptodate(page); 1135c59d87c4SChristoph Hellwig unlock_page(page); 1136c59d87c4SChristoph Hellwig return err; 1137c59d87c4SChristoph Hellwig 1138c59d87c4SChristoph Hellwig redirty: 1139c59d87c4SChristoph Hellwig redirty_page_for_writepage(wbc, page); 1140c59d87c4SChristoph Hellwig unlock_page(page); 1141c59d87c4SChristoph Hellwig return 0; 1142c59d87c4SChristoph Hellwig } 1143c59d87c4SChristoph Hellwig 1144c59d87c4SChristoph Hellwig STATIC int 1145c59d87c4SChristoph Hellwig xfs_vm_writepages( 1146c59d87c4SChristoph Hellwig struct address_space *mapping, 1147c59d87c4SChristoph Hellwig struct writeback_control *wbc) 1148c59d87c4SChristoph Hellwig { 1149c59d87c4SChristoph Hellwig xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 1150c59d87c4SChristoph Hellwig return generic_writepages(mapping, wbc); 1151c59d87c4SChristoph Hellwig } 1152c59d87c4SChristoph Hellwig 1153c59d87c4SChristoph Hellwig /* 1154c59d87c4SChristoph Hellwig * Called to move a page into cleanable state - and from there 1155c59d87c4SChristoph Hellwig * to be released. The page should already be clean. We always 1156c59d87c4SChristoph Hellwig * have buffer heads in this call. 1157c59d87c4SChristoph Hellwig * 1158c59d87c4SChristoph Hellwig * Returns 1 if the page is ok to release, 0 otherwise. 1159c59d87c4SChristoph Hellwig */ 1160c59d87c4SChristoph Hellwig STATIC int 1161c59d87c4SChristoph Hellwig xfs_vm_releasepage( 1162c59d87c4SChristoph Hellwig struct page *page, 1163c59d87c4SChristoph Hellwig gfp_t gfp_mask) 1164c59d87c4SChristoph Hellwig { 1165c59d87c4SChristoph Hellwig int delalloc, unwritten; 1166c59d87c4SChristoph Hellwig 116734097dfeSLukas Czerner trace_xfs_releasepage(page->mapping->host, page, 0, 0); 1168c59d87c4SChristoph Hellwig 1169c59d87c4SChristoph Hellwig xfs_count_page_state(page, &delalloc, &unwritten); 1170c59d87c4SChristoph Hellwig 1171c59d87c4SChristoph Hellwig if (WARN_ON(delalloc)) 1172c59d87c4SChristoph Hellwig return 0; 1173c59d87c4SChristoph Hellwig if (WARN_ON(unwritten)) 1174c59d87c4SChristoph Hellwig return 0; 1175c59d87c4SChristoph Hellwig 1176c59d87c4SChristoph Hellwig return try_to_free_buffers(page); 1177c59d87c4SChristoph Hellwig } 1178c59d87c4SChristoph Hellwig 1179c59d87c4SChristoph Hellwig STATIC int 1180c59d87c4SChristoph Hellwig __xfs_get_blocks( 1181c59d87c4SChristoph Hellwig struct inode *inode, 1182c59d87c4SChristoph Hellwig sector_t iblock, 1183c59d87c4SChristoph Hellwig struct buffer_head *bh_result, 1184c59d87c4SChristoph Hellwig int create, 1185c59d87c4SChristoph Hellwig int direct) 1186c59d87c4SChristoph Hellwig { 1187c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1188c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1189c59d87c4SChristoph Hellwig xfs_fileoff_t offset_fsb, end_fsb; 1190c59d87c4SChristoph Hellwig int error = 0; 1191c59d87c4SChristoph Hellwig int lockmode = 0; 1192c59d87c4SChristoph Hellwig struct xfs_bmbt_irec imap; 1193c59d87c4SChristoph Hellwig int nimaps = 1; 1194c59d87c4SChristoph Hellwig xfs_off_t offset; 1195c59d87c4SChristoph Hellwig ssize_t size; 1196c59d87c4SChristoph Hellwig int new = 0; 1197c59d87c4SChristoph Hellwig 1198c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 1199c59d87c4SChristoph Hellwig return -XFS_ERROR(EIO); 1200c59d87c4SChristoph Hellwig 1201c59d87c4SChristoph Hellwig offset = (xfs_off_t)iblock << inode->i_blkbits; 1202c59d87c4SChristoph Hellwig ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1203c59d87c4SChristoph Hellwig size = bh_result->b_size; 1204c59d87c4SChristoph Hellwig 1205c59d87c4SChristoph Hellwig if (!create && direct && offset >= i_size_read(inode)) 1206c59d87c4SChristoph Hellwig return 0; 1207c59d87c4SChristoph Hellwig 1208507630b2SDave Chinner /* 1209507630b2SDave Chinner * Direct I/O is usually done on preallocated files, so try getting 1210507630b2SDave Chinner * a block mapping without an exclusive lock first. For buffered 1211507630b2SDave Chinner * writes we already have the exclusive iolock anyway, so avoiding 1212507630b2SDave Chinner * a lock roundtrip here by taking the ilock exclusive from the 1213507630b2SDave Chinner * beginning is a useful micro optimization. 1214507630b2SDave Chinner */ 1215507630b2SDave Chinner if (create && !direct) { 1216c59d87c4SChristoph Hellwig lockmode = XFS_ILOCK_EXCL; 1217c59d87c4SChristoph Hellwig xfs_ilock(ip, lockmode); 1218c59d87c4SChristoph Hellwig } else { 1219c59d87c4SChristoph Hellwig lockmode = xfs_ilock_map_shared(ip); 1220c59d87c4SChristoph Hellwig } 1221c59d87c4SChristoph Hellwig 1222d2c28191SDave Chinner ASSERT(offset <= mp->m_super->s_maxbytes); 1223d2c28191SDave Chinner if (offset + size > mp->m_super->s_maxbytes) 1224d2c28191SDave Chinner size = mp->m_super->s_maxbytes - offset; 1225c59d87c4SChristoph Hellwig end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); 1226c59d87c4SChristoph Hellwig offset_fsb = XFS_B_TO_FSBT(mp, offset); 1227c59d87c4SChristoph Hellwig 12285c8ed202SDave Chinner error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 12295c8ed202SDave Chinner &imap, &nimaps, XFS_BMAPI_ENTIRE); 1230c59d87c4SChristoph Hellwig if (error) 1231c59d87c4SChristoph Hellwig goto out_unlock; 1232c59d87c4SChristoph Hellwig 1233c59d87c4SChristoph Hellwig if (create && 1234c59d87c4SChristoph Hellwig (!nimaps || 1235c59d87c4SChristoph Hellwig (imap.br_startblock == HOLESTARTBLOCK || 1236c59d87c4SChristoph Hellwig imap.br_startblock == DELAYSTARTBLOCK))) { 1237aff3a9edSDave Chinner if (direct || xfs_get_extsz_hint(ip)) { 1238507630b2SDave Chinner /* 1239507630b2SDave Chinner * Drop the ilock in preparation for starting the block 1240507630b2SDave Chinner * allocation transaction. It will be retaken 1241507630b2SDave Chinner * exclusively inside xfs_iomap_write_direct for the 1242507630b2SDave Chinner * actual allocation. 1243507630b2SDave Chinner */ 1244507630b2SDave Chinner xfs_iunlock(ip, lockmode); 1245c59d87c4SChristoph Hellwig error = xfs_iomap_write_direct(ip, offset, size, 1246c59d87c4SChristoph Hellwig &imap, nimaps); 1247507630b2SDave Chinner if (error) 1248507630b2SDave Chinner return -error; 1249d3bc815aSDave Chinner new = 1; 1250c59d87c4SChristoph Hellwig } else { 1251507630b2SDave Chinner /* 1252507630b2SDave Chinner * Delalloc reservations do not require a transaction, 1253d3bc815aSDave Chinner * we can go on without dropping the lock here. If we 1254d3bc815aSDave Chinner * are allocating a new delalloc block, make sure that 1255d3bc815aSDave Chinner * we set the new flag so that we mark the buffer new so 1256d3bc815aSDave Chinner * that we know that it is newly allocated if the write 1257d3bc815aSDave Chinner * fails. 1258507630b2SDave Chinner */ 1259d3bc815aSDave Chinner if (nimaps && imap.br_startblock == HOLESTARTBLOCK) 1260d3bc815aSDave Chinner new = 1; 1261c59d87c4SChristoph Hellwig error = xfs_iomap_write_delay(ip, offset, size, &imap); 1262c59d87c4SChristoph Hellwig if (error) 1263c59d87c4SChristoph Hellwig goto out_unlock; 1264c59d87c4SChristoph Hellwig 1265507630b2SDave Chinner xfs_iunlock(ip, lockmode); 1266507630b2SDave Chinner } 1267507630b2SDave Chinner 1268c59d87c4SChristoph Hellwig trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); 1269c59d87c4SChristoph Hellwig } else if (nimaps) { 1270c59d87c4SChristoph Hellwig trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); 1271507630b2SDave Chinner xfs_iunlock(ip, lockmode); 1272c59d87c4SChristoph Hellwig } else { 1273c59d87c4SChristoph Hellwig trace_xfs_get_blocks_notfound(ip, offset, size); 1274c59d87c4SChristoph Hellwig goto out_unlock; 1275c59d87c4SChristoph Hellwig } 1276c59d87c4SChristoph Hellwig 1277c59d87c4SChristoph Hellwig if (imap.br_startblock != HOLESTARTBLOCK && 1278c59d87c4SChristoph Hellwig imap.br_startblock != DELAYSTARTBLOCK) { 1279c59d87c4SChristoph Hellwig /* 1280c59d87c4SChristoph Hellwig * For unwritten extents do not report a disk address on 1281c59d87c4SChristoph Hellwig * the read case (treat as if we're reading into a hole). 1282c59d87c4SChristoph Hellwig */ 1283c59d87c4SChristoph Hellwig if (create || !ISUNWRITTEN(&imap)) 1284c59d87c4SChristoph Hellwig xfs_map_buffer(inode, bh_result, &imap, offset); 1285c59d87c4SChristoph Hellwig if (create && ISUNWRITTEN(&imap)) { 12867b7a8665SChristoph Hellwig if (direct) { 1287c59d87c4SChristoph Hellwig bh_result->b_private = inode; 12887b7a8665SChristoph Hellwig set_buffer_defer_completion(bh_result); 12897b7a8665SChristoph Hellwig } 1290c59d87c4SChristoph Hellwig set_buffer_unwritten(bh_result); 1291c59d87c4SChristoph Hellwig } 1292c59d87c4SChristoph Hellwig } 1293c59d87c4SChristoph Hellwig 1294c59d87c4SChristoph Hellwig /* 1295c59d87c4SChristoph Hellwig * If this is a realtime file, data may be on a different device. 1296c59d87c4SChristoph Hellwig * to that pointed to from the buffer_head b_bdev currently. 1297c59d87c4SChristoph Hellwig */ 1298c59d87c4SChristoph Hellwig bh_result->b_bdev = xfs_find_bdev_for_inode(inode); 1299c59d87c4SChristoph Hellwig 1300c59d87c4SChristoph Hellwig /* 1301c59d87c4SChristoph Hellwig * If we previously allocated a block out beyond eof and we are now 1302c59d87c4SChristoph Hellwig * coming back to use it then we will need to flag it as new even if it 1303c59d87c4SChristoph Hellwig * has a disk address. 1304c59d87c4SChristoph Hellwig * 1305c59d87c4SChristoph Hellwig * With sub-block writes into unwritten extents we also need to mark 1306c59d87c4SChristoph Hellwig * the buffer as new so that the unwritten parts of the buffer gets 1307c59d87c4SChristoph Hellwig * correctly zeroed. 1308c59d87c4SChristoph Hellwig */ 1309c59d87c4SChristoph Hellwig if (create && 1310c59d87c4SChristoph Hellwig ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || 1311c59d87c4SChristoph Hellwig (offset >= i_size_read(inode)) || 1312c59d87c4SChristoph Hellwig (new || ISUNWRITTEN(&imap)))) 1313c59d87c4SChristoph Hellwig set_buffer_new(bh_result); 1314c59d87c4SChristoph Hellwig 1315c59d87c4SChristoph Hellwig if (imap.br_startblock == DELAYSTARTBLOCK) { 1316c59d87c4SChristoph Hellwig BUG_ON(direct); 1317c59d87c4SChristoph Hellwig if (create) { 1318c59d87c4SChristoph Hellwig set_buffer_uptodate(bh_result); 1319c59d87c4SChristoph Hellwig set_buffer_mapped(bh_result); 1320c59d87c4SChristoph Hellwig set_buffer_delay(bh_result); 1321c59d87c4SChristoph Hellwig } 1322c59d87c4SChristoph Hellwig } 1323c59d87c4SChristoph Hellwig 1324c59d87c4SChristoph Hellwig /* 1325c59d87c4SChristoph Hellwig * If this is O_DIRECT or the mpage code calling tell them how large 1326c59d87c4SChristoph Hellwig * the mapping is, so that we can avoid repeated get_blocks calls. 1327c59d87c4SChristoph Hellwig */ 1328c59d87c4SChristoph Hellwig if (direct || size > (1 << inode->i_blkbits)) { 1329c59d87c4SChristoph Hellwig xfs_off_t mapping_size; 1330c59d87c4SChristoph Hellwig 1331c59d87c4SChristoph Hellwig mapping_size = imap.br_startoff + imap.br_blockcount - iblock; 1332c59d87c4SChristoph Hellwig mapping_size <<= inode->i_blkbits; 1333c59d87c4SChristoph Hellwig 1334c59d87c4SChristoph Hellwig ASSERT(mapping_size > 0); 1335c59d87c4SChristoph Hellwig if (mapping_size > size) 1336c59d87c4SChristoph Hellwig mapping_size = size; 1337c59d87c4SChristoph Hellwig if (mapping_size > LONG_MAX) 1338c59d87c4SChristoph Hellwig mapping_size = LONG_MAX; 1339c59d87c4SChristoph Hellwig 1340c59d87c4SChristoph Hellwig bh_result->b_size = mapping_size; 1341c59d87c4SChristoph Hellwig } 1342c59d87c4SChristoph Hellwig 1343c59d87c4SChristoph Hellwig return 0; 1344c59d87c4SChristoph Hellwig 1345c59d87c4SChristoph Hellwig out_unlock: 1346c59d87c4SChristoph Hellwig xfs_iunlock(ip, lockmode); 1347c59d87c4SChristoph Hellwig return -error; 1348c59d87c4SChristoph Hellwig } 1349c59d87c4SChristoph Hellwig 1350c59d87c4SChristoph Hellwig int 1351c59d87c4SChristoph Hellwig xfs_get_blocks( 1352c59d87c4SChristoph Hellwig struct inode *inode, 1353c59d87c4SChristoph Hellwig sector_t iblock, 1354c59d87c4SChristoph Hellwig struct buffer_head *bh_result, 1355c59d87c4SChristoph Hellwig int create) 1356c59d87c4SChristoph Hellwig { 1357c59d87c4SChristoph Hellwig return __xfs_get_blocks(inode, iblock, bh_result, create, 0); 1358c59d87c4SChristoph Hellwig } 1359c59d87c4SChristoph Hellwig 1360c59d87c4SChristoph Hellwig STATIC int 1361c59d87c4SChristoph Hellwig xfs_get_blocks_direct( 1362c59d87c4SChristoph Hellwig struct inode *inode, 1363c59d87c4SChristoph Hellwig sector_t iblock, 1364c59d87c4SChristoph Hellwig struct buffer_head *bh_result, 1365c59d87c4SChristoph Hellwig int create) 1366c59d87c4SChristoph Hellwig { 1367c59d87c4SChristoph Hellwig return __xfs_get_blocks(inode, iblock, bh_result, create, 1); 1368c59d87c4SChristoph Hellwig } 1369c59d87c4SChristoph Hellwig 1370c59d87c4SChristoph Hellwig /* 1371c59d87c4SChristoph Hellwig * Complete a direct I/O write request. 1372c59d87c4SChristoph Hellwig * 1373c59d87c4SChristoph Hellwig * If the private argument is non-NULL __xfs_get_blocks signals us that we 1374c59d87c4SChristoph Hellwig * need to issue a transaction to convert the range from unwritten to written 1375c59d87c4SChristoph Hellwig * extents. In case this is regular synchronous I/O we just call xfs_end_io 1376c59d87c4SChristoph Hellwig * to do this and we are done. But in case this was a successful AIO 1377c59d87c4SChristoph Hellwig * request this handler is called from interrupt context, from which we 1378c59d87c4SChristoph Hellwig * can't start transactions. In that case offload the I/O completion to 1379c59d87c4SChristoph Hellwig * the workqueues we also use for buffered I/O completion. 1380c59d87c4SChristoph Hellwig */ 1381c59d87c4SChristoph Hellwig STATIC void 1382c59d87c4SChristoph Hellwig xfs_end_io_direct_write( 1383c59d87c4SChristoph Hellwig struct kiocb *iocb, 1384c59d87c4SChristoph Hellwig loff_t offset, 1385c59d87c4SChristoph Hellwig ssize_t size, 13867b7a8665SChristoph Hellwig void *private) 1387c59d87c4SChristoph Hellwig { 1388c59d87c4SChristoph Hellwig struct xfs_ioend *ioend = iocb->private; 1389c59d87c4SChristoph Hellwig 1390c59d87c4SChristoph Hellwig /* 13912813d682SChristoph Hellwig * While the generic direct I/O code updates the inode size, it does 13922813d682SChristoph Hellwig * so only after the end_io handler is called, which means our 13932813d682SChristoph Hellwig * end_io handler thinks the on-disk size is outside the in-core 13942813d682SChristoph Hellwig * size. To prevent this just update it a little bit earlier here. 13952813d682SChristoph Hellwig */ 13962813d682SChristoph Hellwig if (offset + size > i_size_read(ioend->io_inode)) 13972813d682SChristoph Hellwig i_size_write(ioend->io_inode, offset + size); 13982813d682SChristoph Hellwig 13992813d682SChristoph Hellwig /* 1400c59d87c4SChristoph Hellwig * blockdev_direct_IO can return an error even after the I/O 1401c59d87c4SChristoph Hellwig * completion handler was called. Thus we need to protect 1402c59d87c4SChristoph Hellwig * against double-freeing. 1403c59d87c4SChristoph Hellwig */ 1404c59d87c4SChristoph Hellwig iocb->private = NULL; 1405c59d87c4SChristoph Hellwig 1406c59d87c4SChristoph Hellwig ioend->io_offset = offset; 1407c59d87c4SChristoph Hellwig ioend->io_size = size; 1408c59d87c4SChristoph Hellwig if (private && size > 0) 14090d882a36SAlain Renaud ioend->io_type = XFS_IO_UNWRITTEN; 1410c59d87c4SChristoph Hellwig 1411c59d87c4SChristoph Hellwig xfs_finish_ioend_sync(ioend); 1412c59d87c4SChristoph Hellwig } 1413c59d87c4SChristoph Hellwig 1414c59d87c4SChristoph Hellwig STATIC ssize_t 1415c59d87c4SChristoph Hellwig xfs_vm_direct_IO( 1416c59d87c4SChristoph Hellwig int rw, 1417c59d87c4SChristoph Hellwig struct kiocb *iocb, 1418c59d87c4SChristoph Hellwig const struct iovec *iov, 1419c59d87c4SChristoph Hellwig loff_t offset, 1420c59d87c4SChristoph Hellwig unsigned long nr_segs) 1421c59d87c4SChristoph Hellwig { 1422c59d87c4SChristoph Hellwig struct inode *inode = iocb->ki_filp->f_mapping->host; 1423c59d87c4SChristoph Hellwig struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1424281627dfSChristoph Hellwig struct xfs_ioend *ioend = NULL; 1425c59d87c4SChristoph Hellwig ssize_t ret; 1426c59d87c4SChristoph Hellwig 1427c59d87c4SChristoph Hellwig if (rw & WRITE) { 1428281627dfSChristoph Hellwig size_t size = iov_length(iov, nr_segs); 1429281627dfSChristoph Hellwig 1430281627dfSChristoph Hellwig /* 1431437a255aSDave Chinner * We cannot preallocate a size update transaction here as we 1432437a255aSDave Chinner * don't know whether allocation is necessary or not. Hence we 1433437a255aSDave Chinner * can only tell IO completion that one is necessary if we are 1434437a255aSDave Chinner * not doing unwritten extent conversion. 1435281627dfSChristoph Hellwig */ 14360d882a36SAlain Renaud iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); 1437437a255aSDave Chinner if (offset + size > XFS_I(inode)->i_d.di_size) 1438281627dfSChristoph Hellwig ioend->io_isdirect = 1; 1439c59d87c4SChristoph Hellwig 1440c59d87c4SChristoph Hellwig ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1441c59d87c4SChristoph Hellwig offset, nr_segs, 1442c59d87c4SChristoph Hellwig xfs_get_blocks_direct, 1443c59d87c4SChristoph Hellwig xfs_end_io_direct_write, NULL, 0); 1444c59d87c4SChristoph Hellwig if (ret != -EIOCBQUEUED && iocb->private) 1445437a255aSDave Chinner goto out_destroy_ioend; 1446c59d87c4SChristoph Hellwig } else { 1447c59d87c4SChristoph Hellwig ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1448c59d87c4SChristoph Hellwig offset, nr_segs, 1449c59d87c4SChristoph Hellwig xfs_get_blocks_direct, 1450c59d87c4SChristoph Hellwig NULL, NULL, 0); 1451c59d87c4SChristoph Hellwig } 1452c59d87c4SChristoph Hellwig 1453c59d87c4SChristoph Hellwig return ret; 1454281627dfSChristoph Hellwig 1455281627dfSChristoph Hellwig out_destroy_ioend: 1456281627dfSChristoph Hellwig xfs_destroy_ioend(ioend); 1457281627dfSChristoph Hellwig return ret; 1458c59d87c4SChristoph Hellwig } 1459c59d87c4SChristoph Hellwig 1460c59d87c4SChristoph Hellwig /* 14612813d682SChristoph Hellwig * Punch out the delalloc blocks we have already allocated. 14622813d682SChristoph Hellwig * 1463d3bc815aSDave Chinner * Don't bother with xfs_setattr given that nothing can have made it to disk yet 1464d3bc815aSDave Chinner * as the page is still locked at this point. 1465c59d87c4SChristoph Hellwig */ 1466d3bc815aSDave Chinner STATIC void 1467d3bc815aSDave Chinner xfs_vm_kill_delalloc_range( 1468d3bc815aSDave Chinner struct inode *inode, 1469d3bc815aSDave Chinner loff_t start, 1470d3bc815aSDave Chinner loff_t end) 1471d3bc815aSDave Chinner { 1472c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1473c59d87c4SChristoph Hellwig xfs_fileoff_t start_fsb; 1474c59d87c4SChristoph Hellwig xfs_fileoff_t end_fsb; 1475c59d87c4SChristoph Hellwig int error; 1476c59d87c4SChristoph Hellwig 1477d3bc815aSDave Chinner start_fsb = XFS_B_TO_FSB(ip->i_mount, start); 1478d3bc815aSDave Chinner end_fsb = XFS_B_TO_FSB(ip->i_mount, end); 1479c59d87c4SChristoph Hellwig if (end_fsb <= start_fsb) 1480c59d87c4SChristoph Hellwig return; 1481c59d87c4SChristoph Hellwig 1482c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 1483c59d87c4SChristoph Hellwig error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1484c59d87c4SChristoph Hellwig end_fsb - start_fsb); 1485c59d87c4SChristoph Hellwig if (error) { 1486c59d87c4SChristoph Hellwig /* something screwed, just bail */ 1487c59d87c4SChristoph Hellwig if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 1488c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 1489c59d87c4SChristoph Hellwig "xfs_vm_write_failed: unable to clean up ino %lld", 1490c59d87c4SChristoph Hellwig ip->i_ino); 1491c59d87c4SChristoph Hellwig } 1492c59d87c4SChristoph Hellwig } 1493c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 1494c59d87c4SChristoph Hellwig } 1495d3bc815aSDave Chinner 1496d3bc815aSDave Chinner STATIC void 1497d3bc815aSDave Chinner xfs_vm_write_failed( 1498d3bc815aSDave Chinner struct inode *inode, 1499d3bc815aSDave Chinner struct page *page, 1500d3bc815aSDave Chinner loff_t pos, 1501d3bc815aSDave Chinner unsigned len) 1502d3bc815aSDave Chinner { 150358e59854SJie Liu loff_t block_offset; 1504d3bc815aSDave Chinner loff_t block_start; 1505d3bc815aSDave Chinner loff_t block_end; 1506d3bc815aSDave Chinner loff_t from = pos & (PAGE_CACHE_SIZE - 1); 1507d3bc815aSDave Chinner loff_t to = from + len; 1508d3bc815aSDave Chinner struct buffer_head *bh, *head; 1509d3bc815aSDave Chinner 151058e59854SJie Liu /* 151158e59854SJie Liu * The request pos offset might be 32 or 64 bit, this is all fine 151258e59854SJie Liu * on 64-bit platform. However, for 64-bit pos request on 32-bit 151358e59854SJie Liu * platform, the high 32-bit will be masked off if we evaluate the 151458e59854SJie Liu * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is 151558e59854SJie Liu * 0xfffff000 as an unsigned long, hence the result is incorrect 151658e59854SJie Liu * which could cause the following ASSERT failed in most cases. 151758e59854SJie Liu * In order to avoid this, we can evaluate the block_offset of the 151858e59854SJie Liu * start of the page by using shifts rather than masks the mismatch 151958e59854SJie Liu * problem. 152058e59854SJie Liu */ 152158e59854SJie Liu block_offset = (pos >> PAGE_CACHE_SHIFT) << PAGE_CACHE_SHIFT; 152258e59854SJie Liu 1523d3bc815aSDave Chinner ASSERT(block_offset + from == pos); 1524d3bc815aSDave Chinner 1525d3bc815aSDave Chinner head = page_buffers(page); 1526d3bc815aSDave Chinner block_start = 0; 1527d3bc815aSDave Chinner for (bh = head; bh != head || !block_start; 1528d3bc815aSDave Chinner bh = bh->b_this_page, block_start = block_end, 1529d3bc815aSDave Chinner block_offset += bh->b_size) { 1530d3bc815aSDave Chinner block_end = block_start + bh->b_size; 1531d3bc815aSDave Chinner 1532d3bc815aSDave Chinner /* skip buffers before the write */ 1533d3bc815aSDave Chinner if (block_end <= from) 1534d3bc815aSDave Chinner continue; 1535d3bc815aSDave Chinner 1536d3bc815aSDave Chinner /* if the buffer is after the write, we're done */ 1537d3bc815aSDave Chinner if (block_start >= to) 1538d3bc815aSDave Chinner break; 1539d3bc815aSDave Chinner 1540d3bc815aSDave Chinner if (!buffer_delay(bh)) 1541d3bc815aSDave Chinner continue; 1542d3bc815aSDave Chinner 1543d3bc815aSDave Chinner if (!buffer_new(bh) && block_offset < i_size_read(inode)) 1544d3bc815aSDave Chinner continue; 1545d3bc815aSDave Chinner 1546d3bc815aSDave Chinner xfs_vm_kill_delalloc_range(inode, block_offset, 1547d3bc815aSDave Chinner block_offset + bh->b_size); 1548c59d87c4SChristoph Hellwig } 1549c59d87c4SChristoph Hellwig 1550d3bc815aSDave Chinner } 1551d3bc815aSDave Chinner 1552d3bc815aSDave Chinner /* 1553d3bc815aSDave Chinner * This used to call block_write_begin(), but it unlocks and releases the page 1554d3bc815aSDave Chinner * on error, and we need that page to be able to punch stale delalloc blocks out 1555d3bc815aSDave Chinner * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at 1556d3bc815aSDave Chinner * the appropriate point. 1557d3bc815aSDave Chinner */ 1558c59d87c4SChristoph Hellwig STATIC int 1559c59d87c4SChristoph Hellwig xfs_vm_write_begin( 1560c59d87c4SChristoph Hellwig struct file *file, 1561c59d87c4SChristoph Hellwig struct address_space *mapping, 1562c59d87c4SChristoph Hellwig loff_t pos, 1563c59d87c4SChristoph Hellwig unsigned len, 1564c59d87c4SChristoph Hellwig unsigned flags, 1565c59d87c4SChristoph Hellwig struct page **pagep, 1566c59d87c4SChristoph Hellwig void **fsdata) 1567c59d87c4SChristoph Hellwig { 1568d3bc815aSDave Chinner pgoff_t index = pos >> PAGE_CACHE_SHIFT; 1569d3bc815aSDave Chinner struct page *page; 1570d3bc815aSDave Chinner int status; 1571c59d87c4SChristoph Hellwig 1572d3bc815aSDave Chinner ASSERT(len <= PAGE_CACHE_SIZE); 1573d3bc815aSDave Chinner 1574d3bc815aSDave Chinner page = grab_cache_page_write_begin(mapping, index, 1575d3bc815aSDave Chinner flags | AOP_FLAG_NOFS); 1576d3bc815aSDave Chinner if (!page) 1577d3bc815aSDave Chinner return -ENOMEM; 1578d3bc815aSDave Chinner 1579d3bc815aSDave Chinner status = __block_write_begin(page, pos, len, xfs_get_blocks); 1580d3bc815aSDave Chinner if (unlikely(status)) { 1581d3bc815aSDave Chinner struct inode *inode = mapping->host; 1582d3bc815aSDave Chinner 1583d3bc815aSDave Chinner xfs_vm_write_failed(inode, page, pos, len); 1584d3bc815aSDave Chinner unlock_page(page); 1585d3bc815aSDave Chinner 1586d3bc815aSDave Chinner if (pos + len > i_size_read(inode)) 15877caef267SKirill A. Shutemov truncate_pagecache(inode, i_size_read(inode)); 1588d3bc815aSDave Chinner 1589d3bc815aSDave Chinner page_cache_release(page); 1590d3bc815aSDave Chinner page = NULL; 1591c59d87c4SChristoph Hellwig } 1592c59d87c4SChristoph Hellwig 1593d3bc815aSDave Chinner *pagep = page; 1594d3bc815aSDave Chinner return status; 1595d3bc815aSDave Chinner } 1596d3bc815aSDave Chinner 1597d3bc815aSDave Chinner /* 1598d3bc815aSDave Chinner * On failure, we only need to kill delalloc blocks beyond EOF because they 1599d3bc815aSDave Chinner * will never be written. For blocks within EOF, generic_write_end() zeros them 1600d3bc815aSDave Chinner * so they are safe to leave alone and be written with all the other valid data. 1601d3bc815aSDave Chinner */ 1602c59d87c4SChristoph Hellwig STATIC int 1603c59d87c4SChristoph Hellwig xfs_vm_write_end( 1604c59d87c4SChristoph Hellwig struct file *file, 1605c59d87c4SChristoph Hellwig struct address_space *mapping, 1606c59d87c4SChristoph Hellwig loff_t pos, 1607c59d87c4SChristoph Hellwig unsigned len, 1608c59d87c4SChristoph Hellwig unsigned copied, 1609c59d87c4SChristoph Hellwig struct page *page, 1610c59d87c4SChristoph Hellwig void *fsdata) 1611c59d87c4SChristoph Hellwig { 1612c59d87c4SChristoph Hellwig int ret; 1613c59d87c4SChristoph Hellwig 1614d3bc815aSDave Chinner ASSERT(len <= PAGE_CACHE_SIZE); 1615d3bc815aSDave Chinner 1616c59d87c4SChristoph Hellwig ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); 1617d3bc815aSDave Chinner if (unlikely(ret < len)) { 1618d3bc815aSDave Chinner struct inode *inode = mapping->host; 1619d3bc815aSDave Chinner size_t isize = i_size_read(inode); 1620d3bc815aSDave Chinner loff_t to = pos + len; 1621d3bc815aSDave Chinner 1622d3bc815aSDave Chinner if (to > isize) { 16237caef267SKirill A. Shutemov truncate_pagecache(inode, isize); 1624d3bc815aSDave Chinner xfs_vm_kill_delalloc_range(inode, isize, to); 1625d3bc815aSDave Chinner } 1626d3bc815aSDave Chinner } 1627c59d87c4SChristoph Hellwig return ret; 1628c59d87c4SChristoph Hellwig } 1629c59d87c4SChristoph Hellwig 1630c59d87c4SChristoph Hellwig STATIC sector_t 1631c59d87c4SChristoph Hellwig xfs_vm_bmap( 1632c59d87c4SChristoph Hellwig struct address_space *mapping, 1633c59d87c4SChristoph Hellwig sector_t block) 1634c59d87c4SChristoph Hellwig { 1635c59d87c4SChristoph Hellwig struct inode *inode = (struct inode *)mapping->host; 1636c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1637c59d87c4SChristoph Hellwig 1638c59d87c4SChristoph Hellwig trace_xfs_vm_bmap(XFS_I(inode)); 1639c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_IOLOCK_SHARED); 16404bc1ea6bSDave Chinner filemap_write_and_wait(mapping); 1641c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED); 1642c59d87c4SChristoph Hellwig return generic_block_bmap(mapping, block, xfs_get_blocks); 1643c59d87c4SChristoph Hellwig } 1644c59d87c4SChristoph Hellwig 1645c59d87c4SChristoph Hellwig STATIC int 1646c59d87c4SChristoph Hellwig xfs_vm_readpage( 1647c59d87c4SChristoph Hellwig struct file *unused, 1648c59d87c4SChristoph Hellwig struct page *page) 1649c59d87c4SChristoph Hellwig { 1650c59d87c4SChristoph Hellwig return mpage_readpage(page, xfs_get_blocks); 1651c59d87c4SChristoph Hellwig } 1652c59d87c4SChristoph Hellwig 1653c59d87c4SChristoph Hellwig STATIC int 1654c59d87c4SChristoph Hellwig xfs_vm_readpages( 1655c59d87c4SChristoph Hellwig struct file *unused, 1656c59d87c4SChristoph Hellwig struct address_space *mapping, 1657c59d87c4SChristoph Hellwig struct list_head *pages, 1658c59d87c4SChristoph Hellwig unsigned nr_pages) 1659c59d87c4SChristoph Hellwig { 1660c59d87c4SChristoph Hellwig return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1661c59d87c4SChristoph Hellwig } 1662c59d87c4SChristoph Hellwig 1663c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = { 1664c59d87c4SChristoph Hellwig .readpage = xfs_vm_readpage, 1665c59d87c4SChristoph Hellwig .readpages = xfs_vm_readpages, 1666c59d87c4SChristoph Hellwig .writepage = xfs_vm_writepage, 1667c59d87c4SChristoph Hellwig .writepages = xfs_vm_writepages, 1668c59d87c4SChristoph Hellwig .releasepage = xfs_vm_releasepage, 1669c59d87c4SChristoph Hellwig .invalidatepage = xfs_vm_invalidatepage, 1670c59d87c4SChristoph Hellwig .write_begin = xfs_vm_write_begin, 1671c59d87c4SChristoph Hellwig .write_end = xfs_vm_write_end, 1672c59d87c4SChristoph Hellwig .bmap = xfs_vm_bmap, 1673c59d87c4SChristoph Hellwig .direct_IO = xfs_vm_direct_IO, 1674c59d87c4SChristoph Hellwig .migratepage = buffer_migrate_page, 1675c59d87c4SChristoph Hellwig .is_partially_uptodate = block_is_partially_uptodate, 1676c59d87c4SChristoph Hellwig .error_remove_page = generic_error_remove_page, 1677c59d87c4SChristoph Hellwig }; 1678