1c59d87c4SChristoph Hellwig /* 2c59d87c4SChristoph Hellwig * Copyright (c) 2000-2005 Silicon Graphics, Inc. 3c59d87c4SChristoph Hellwig * All Rights Reserved. 4c59d87c4SChristoph Hellwig * 5c59d87c4SChristoph Hellwig * This program is free software; you can redistribute it and/or 6c59d87c4SChristoph Hellwig * modify it under the terms of the GNU General Public License as 7c59d87c4SChristoph Hellwig * published by the Free Software Foundation. 8c59d87c4SChristoph Hellwig * 9c59d87c4SChristoph Hellwig * This program is distributed in the hope that it would be useful, 10c59d87c4SChristoph Hellwig * but WITHOUT ANY WARRANTY; without even the implied warranty of 11c59d87c4SChristoph Hellwig * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12c59d87c4SChristoph Hellwig * GNU General Public License for more details. 13c59d87c4SChristoph Hellwig * 14c59d87c4SChristoph Hellwig * You should have received a copy of the GNU General Public License 15c59d87c4SChristoph Hellwig * along with this program; if not, write the Free Software Foundation, 16c59d87c4SChristoph Hellwig * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 17c59d87c4SChristoph Hellwig */ 18c59d87c4SChristoph Hellwig #include "xfs.h" 19c59d87c4SChristoph Hellwig #include "xfs_log.h" 20c59d87c4SChristoph Hellwig #include "xfs_sb.h" 21c59d87c4SChristoph Hellwig #include "xfs_ag.h" 22c59d87c4SChristoph Hellwig #include "xfs_trans.h" 23c59d87c4SChristoph Hellwig #include "xfs_mount.h" 24c59d87c4SChristoph Hellwig #include "xfs_bmap_btree.h" 25c59d87c4SChristoph Hellwig #include "xfs_dinode.h" 26c59d87c4SChristoph Hellwig #include "xfs_inode.h" 27281627dfSChristoph Hellwig #include "xfs_inode_item.h" 28c59d87c4SChristoph Hellwig #include "xfs_alloc.h" 29c59d87c4SChristoph Hellwig #include "xfs_error.h" 30c59d87c4SChristoph Hellwig #include "xfs_iomap.h" 31c59d87c4SChristoph Hellwig #include "xfs_trace.h" 32c59d87c4SChristoph Hellwig #include "xfs_bmap.h" 3368988114SDave Chinner #include "xfs_bmap_util.h" 34a27bb332SKent Overstreet #include <linux/aio.h> 35c59d87c4SChristoph Hellwig #include <linux/gfp.h> 36c59d87c4SChristoph Hellwig #include <linux/mpage.h> 37c59d87c4SChristoph Hellwig #include <linux/pagevec.h> 38c59d87c4SChristoph Hellwig #include <linux/writeback.h> 39c59d87c4SChristoph Hellwig 40c59d87c4SChristoph Hellwig void 41c59d87c4SChristoph Hellwig xfs_count_page_state( 42c59d87c4SChristoph Hellwig struct page *page, 43c59d87c4SChristoph Hellwig int *delalloc, 44c59d87c4SChristoph Hellwig int *unwritten) 45c59d87c4SChristoph Hellwig { 46c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 47c59d87c4SChristoph Hellwig 48c59d87c4SChristoph Hellwig *delalloc = *unwritten = 0; 49c59d87c4SChristoph Hellwig 50c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 51c59d87c4SChristoph Hellwig do { 52c59d87c4SChristoph Hellwig if (buffer_unwritten(bh)) 53c59d87c4SChristoph Hellwig (*unwritten) = 1; 54c59d87c4SChristoph Hellwig else if (buffer_delay(bh)) 55c59d87c4SChristoph Hellwig (*delalloc) = 1; 56c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 57c59d87c4SChristoph Hellwig } 58c59d87c4SChristoph Hellwig 59c59d87c4SChristoph Hellwig STATIC struct block_device * 60c59d87c4SChristoph Hellwig xfs_find_bdev_for_inode( 61c59d87c4SChristoph Hellwig struct inode *inode) 62c59d87c4SChristoph Hellwig { 63c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 64c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 65c59d87c4SChristoph Hellwig 66c59d87c4SChristoph Hellwig if (XFS_IS_REALTIME_INODE(ip)) 67c59d87c4SChristoph Hellwig return mp->m_rtdev_targp->bt_bdev; 68c59d87c4SChristoph Hellwig else 69c59d87c4SChristoph Hellwig return mp->m_ddev_targp->bt_bdev; 70c59d87c4SChristoph Hellwig } 71c59d87c4SChristoph Hellwig 72c59d87c4SChristoph Hellwig /* 73c59d87c4SChristoph Hellwig * We're now finished for good with this ioend structure. 74c59d87c4SChristoph Hellwig * Update the page state via the associated buffer_heads, 75c59d87c4SChristoph Hellwig * release holds on the inode and bio, and finally free 76c59d87c4SChristoph Hellwig * up memory. Do not use the ioend after this. 77c59d87c4SChristoph Hellwig */ 78c59d87c4SChristoph Hellwig STATIC void 79c59d87c4SChristoph Hellwig xfs_destroy_ioend( 80c59d87c4SChristoph Hellwig xfs_ioend_t *ioend) 81c59d87c4SChristoph Hellwig { 82c59d87c4SChristoph Hellwig struct buffer_head *bh, *next; 83c59d87c4SChristoph Hellwig 84c59d87c4SChristoph Hellwig for (bh = ioend->io_buffer_head; bh; bh = next) { 85c59d87c4SChristoph Hellwig next = bh->b_private; 86c59d87c4SChristoph Hellwig bh->b_end_io(bh, !ioend->io_error); 87c59d87c4SChristoph Hellwig } 88c59d87c4SChristoph Hellwig 89c59d87c4SChristoph Hellwig mempool_free(ioend, xfs_ioend_pool); 90c59d87c4SChristoph Hellwig } 91c59d87c4SChristoph Hellwig 92c59d87c4SChristoph Hellwig /* 93fc0063c4SChristoph Hellwig * Fast and loose check if this write could update the on-disk inode size. 94fc0063c4SChristoph Hellwig */ 95fc0063c4SChristoph Hellwig static inline bool xfs_ioend_is_append(struct xfs_ioend *ioend) 96fc0063c4SChristoph Hellwig { 97fc0063c4SChristoph Hellwig return ioend->io_offset + ioend->io_size > 98fc0063c4SChristoph Hellwig XFS_I(ioend->io_inode)->i_d.di_size; 99fc0063c4SChristoph Hellwig } 100fc0063c4SChristoph Hellwig 101281627dfSChristoph Hellwig STATIC int 102281627dfSChristoph Hellwig xfs_setfilesize_trans_alloc( 103281627dfSChristoph Hellwig struct xfs_ioend *ioend) 104281627dfSChristoph Hellwig { 105281627dfSChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 106281627dfSChristoph Hellwig struct xfs_trans *tp; 107281627dfSChristoph Hellwig int error; 108281627dfSChristoph Hellwig 109281627dfSChristoph Hellwig tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); 110281627dfSChristoph Hellwig 1113d3c8b52SJie Liu error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); 112281627dfSChristoph Hellwig if (error) { 113281627dfSChristoph Hellwig xfs_trans_cancel(tp, 0); 114281627dfSChristoph Hellwig return error; 115281627dfSChristoph Hellwig } 116281627dfSChristoph Hellwig 117281627dfSChristoph Hellwig ioend->io_append_trans = tp; 118281627dfSChristoph Hellwig 119281627dfSChristoph Hellwig /* 120437a255aSDave Chinner * We may pass freeze protection with a transaction. So tell lockdep 121d9457dc0SJan Kara * we released it. 122d9457dc0SJan Kara */ 123d9457dc0SJan Kara rwsem_release(&ioend->io_inode->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], 124d9457dc0SJan Kara 1, _THIS_IP_); 125d9457dc0SJan Kara /* 126281627dfSChristoph Hellwig * We hand off the transaction to the completion thread now, so 127281627dfSChristoph Hellwig * clear the flag here. 128281627dfSChristoph Hellwig */ 129281627dfSChristoph Hellwig current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); 130281627dfSChristoph Hellwig return 0; 131281627dfSChristoph Hellwig } 132281627dfSChristoph Hellwig 133fc0063c4SChristoph Hellwig /* 1342813d682SChristoph Hellwig * Update on-disk file size now that data has been written to disk. 135c59d87c4SChristoph Hellwig */ 136281627dfSChristoph Hellwig STATIC int 137c59d87c4SChristoph Hellwig xfs_setfilesize( 138aa6bf01dSChristoph Hellwig struct xfs_ioend *ioend) 139c59d87c4SChristoph Hellwig { 140aa6bf01dSChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 141281627dfSChristoph Hellwig struct xfs_trans *tp = ioend->io_append_trans; 142c59d87c4SChristoph Hellwig xfs_fsize_t isize; 143c59d87c4SChristoph Hellwig 144281627dfSChristoph Hellwig /* 145437a255aSDave Chinner * The transaction may have been allocated in the I/O submission thread, 146437a255aSDave Chinner * thus we need to mark ourselves as beeing in a transaction manually. 147437a255aSDave Chinner * Similarly for freeze protection. 148281627dfSChristoph Hellwig */ 149281627dfSChristoph Hellwig current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); 150437a255aSDave Chinner rwsem_acquire_read(&VFS_I(ip)->i_sb->s_writers.lock_map[SB_FREEZE_FS-1], 151437a255aSDave Chinner 0, 1, _THIS_IP_); 152281627dfSChristoph Hellwig 153aa6bf01dSChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 1546923e686SChristoph Hellwig isize = xfs_new_eof(ip, ioend->io_offset + ioend->io_size); 155281627dfSChristoph Hellwig if (!isize) { 156281627dfSChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 157281627dfSChristoph Hellwig xfs_trans_cancel(tp, 0); 158281627dfSChristoph Hellwig return 0; 159c59d87c4SChristoph Hellwig } 160c59d87c4SChristoph Hellwig 161281627dfSChristoph Hellwig trace_xfs_setfilesize(ip, ioend->io_offset, ioend->io_size); 162281627dfSChristoph Hellwig 163281627dfSChristoph Hellwig ip->i_d.di_size = isize; 164281627dfSChristoph Hellwig xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 165281627dfSChristoph Hellwig xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 166281627dfSChristoph Hellwig 167281627dfSChristoph Hellwig return xfs_trans_commit(tp, 0); 168c59d87c4SChristoph Hellwig } 169c59d87c4SChristoph Hellwig 170c59d87c4SChristoph Hellwig /* 171c59d87c4SChristoph Hellwig * Schedule IO completion handling on the final put of an ioend. 172fc0063c4SChristoph Hellwig * 173fc0063c4SChristoph Hellwig * If there is no work to do we might as well call it a day and free the 174fc0063c4SChristoph Hellwig * ioend right now. 175c59d87c4SChristoph Hellwig */ 176c59d87c4SChristoph Hellwig STATIC void 177c59d87c4SChristoph Hellwig xfs_finish_ioend( 178c59d87c4SChristoph Hellwig struct xfs_ioend *ioend) 179c59d87c4SChristoph Hellwig { 180c59d87c4SChristoph Hellwig if (atomic_dec_and_test(&ioend->io_remaining)) { 181aa6bf01dSChristoph Hellwig struct xfs_mount *mp = XFS_I(ioend->io_inode)->i_mount; 182aa6bf01dSChristoph Hellwig 1830d882a36SAlain Renaud if (ioend->io_type == XFS_IO_UNWRITTEN) 184aa6bf01dSChristoph Hellwig queue_work(mp->m_unwritten_workqueue, &ioend->io_work); 185437a255aSDave Chinner else if (ioend->io_append_trans || 186437a255aSDave Chinner (ioend->io_isdirect && xfs_ioend_is_append(ioend))) 187aa6bf01dSChristoph Hellwig queue_work(mp->m_data_workqueue, &ioend->io_work); 188fc0063c4SChristoph Hellwig else 189fc0063c4SChristoph Hellwig xfs_destroy_ioend(ioend); 190c59d87c4SChristoph Hellwig } 191c59d87c4SChristoph Hellwig } 192c59d87c4SChristoph Hellwig 193c59d87c4SChristoph Hellwig /* 194c59d87c4SChristoph Hellwig * IO write completion. 195c59d87c4SChristoph Hellwig */ 196c59d87c4SChristoph Hellwig STATIC void 197c59d87c4SChristoph Hellwig xfs_end_io( 198c59d87c4SChristoph Hellwig struct work_struct *work) 199c59d87c4SChristoph Hellwig { 200c59d87c4SChristoph Hellwig xfs_ioend_t *ioend = container_of(work, xfs_ioend_t, io_work); 201c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(ioend->io_inode); 202c59d87c4SChristoph Hellwig int error = 0; 203c59d87c4SChristoph Hellwig 20404f658eeSChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { 205810627d9SChristoph Hellwig ioend->io_error = -EIO; 20604f658eeSChristoph Hellwig goto done; 20704f658eeSChristoph Hellwig } 20804f658eeSChristoph Hellwig if (ioend->io_error) 20904f658eeSChristoph Hellwig goto done; 21004f658eeSChristoph Hellwig 211c59d87c4SChristoph Hellwig /* 212c59d87c4SChristoph Hellwig * For unwritten extents we need to issue transactions to convert a 213c59d87c4SChristoph Hellwig * range to normal written extens after the data I/O has finished. 214c59d87c4SChristoph Hellwig */ 2150d882a36SAlain Renaud if (ioend->io_type == XFS_IO_UNWRITTEN) { 216c59d87c4SChristoph Hellwig error = xfs_iomap_write_unwritten(ip, ioend->io_offset, 217c59d87c4SChristoph Hellwig ioend->io_size); 218437a255aSDave Chinner } else if (ioend->io_isdirect && xfs_ioend_is_append(ioend)) { 219437a255aSDave Chinner /* 220437a255aSDave Chinner * For direct I/O we do not know if we need to allocate blocks 221437a255aSDave Chinner * or not so we can't preallocate an append transaction as that 222437a255aSDave Chinner * results in nested reservations and log space deadlocks. Hence 223437a255aSDave Chinner * allocate the transaction here. While this is sub-optimal and 224437a255aSDave Chinner * can block IO completion for some time, we're stuck with doing 225437a255aSDave Chinner * it this way until we can pass the ioend to the direct IO 226437a255aSDave Chinner * allocation callbacks and avoid nesting that way. 227437a255aSDave Chinner */ 228437a255aSDave Chinner error = xfs_setfilesize_trans_alloc(ioend); 229437a255aSDave Chinner if (error) 23004f658eeSChristoph Hellwig goto done; 231437a255aSDave Chinner error = xfs_setfilesize(ioend); 232281627dfSChristoph Hellwig } else if (ioend->io_append_trans) { 233281627dfSChristoph Hellwig error = xfs_setfilesize(ioend); 23484803fb7SChristoph Hellwig } else { 235281627dfSChristoph Hellwig ASSERT(!xfs_ioend_is_append(ioend)); 23684803fb7SChristoph Hellwig } 23784803fb7SChristoph Hellwig 23804f658eeSChristoph Hellwig done: 239437a255aSDave Chinner if (error) 240437a255aSDave Chinner ioend->io_error = -error; 241c59d87c4SChristoph Hellwig xfs_destroy_ioend(ioend); 242c59d87c4SChristoph Hellwig } 243c59d87c4SChristoph Hellwig 244c59d87c4SChristoph Hellwig /* 245c59d87c4SChristoph Hellwig * Call IO completion handling in caller context on the final put of an ioend. 246c59d87c4SChristoph Hellwig */ 247c59d87c4SChristoph Hellwig STATIC void 248c59d87c4SChristoph Hellwig xfs_finish_ioend_sync( 249c59d87c4SChristoph Hellwig struct xfs_ioend *ioend) 250c59d87c4SChristoph Hellwig { 251c59d87c4SChristoph Hellwig if (atomic_dec_and_test(&ioend->io_remaining)) 252c59d87c4SChristoph Hellwig xfs_end_io(&ioend->io_work); 253c59d87c4SChristoph Hellwig } 254c59d87c4SChristoph Hellwig 255c59d87c4SChristoph Hellwig /* 256c59d87c4SChristoph Hellwig * Allocate and initialise an IO completion structure. 257c59d87c4SChristoph Hellwig * We need to track unwritten extent write completion here initially. 258c59d87c4SChristoph Hellwig * We'll need to extend this for updating the ondisk inode size later 259c59d87c4SChristoph Hellwig * (vs. incore size). 260c59d87c4SChristoph Hellwig */ 261c59d87c4SChristoph Hellwig STATIC xfs_ioend_t * 262c59d87c4SChristoph Hellwig xfs_alloc_ioend( 263c59d87c4SChristoph Hellwig struct inode *inode, 264c59d87c4SChristoph Hellwig unsigned int type) 265c59d87c4SChristoph Hellwig { 266c59d87c4SChristoph Hellwig xfs_ioend_t *ioend; 267c59d87c4SChristoph Hellwig 268c59d87c4SChristoph Hellwig ioend = mempool_alloc(xfs_ioend_pool, GFP_NOFS); 269c59d87c4SChristoph Hellwig 270c59d87c4SChristoph Hellwig /* 271c59d87c4SChristoph Hellwig * Set the count to 1 initially, which will prevent an I/O 272c59d87c4SChristoph Hellwig * completion callback from happening before we have started 273c59d87c4SChristoph Hellwig * all the I/O from calling the completion routine too early. 274c59d87c4SChristoph Hellwig */ 275c59d87c4SChristoph Hellwig atomic_set(&ioend->io_remaining, 1); 276281627dfSChristoph Hellwig ioend->io_isdirect = 0; 277c59d87c4SChristoph Hellwig ioend->io_error = 0; 278c59d87c4SChristoph Hellwig ioend->io_list = NULL; 279c59d87c4SChristoph Hellwig ioend->io_type = type; 280c59d87c4SChristoph Hellwig ioend->io_inode = inode; 281c59d87c4SChristoph Hellwig ioend->io_buffer_head = NULL; 282c59d87c4SChristoph Hellwig ioend->io_buffer_tail = NULL; 283c59d87c4SChristoph Hellwig ioend->io_offset = 0; 284c59d87c4SChristoph Hellwig ioend->io_size = 0; 285281627dfSChristoph Hellwig ioend->io_append_trans = NULL; 286c59d87c4SChristoph Hellwig 287c59d87c4SChristoph Hellwig INIT_WORK(&ioend->io_work, xfs_end_io); 288c59d87c4SChristoph Hellwig return ioend; 289c59d87c4SChristoph Hellwig } 290c59d87c4SChristoph Hellwig 291c59d87c4SChristoph Hellwig STATIC int 292c59d87c4SChristoph Hellwig xfs_map_blocks( 293c59d87c4SChristoph Hellwig struct inode *inode, 294c59d87c4SChristoph Hellwig loff_t offset, 295c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 296c59d87c4SChristoph Hellwig int type, 297c59d87c4SChristoph Hellwig int nonblocking) 298c59d87c4SChristoph Hellwig { 299c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 300c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 301c59d87c4SChristoph Hellwig ssize_t count = 1 << inode->i_blkbits; 302c59d87c4SChristoph Hellwig xfs_fileoff_t offset_fsb, end_fsb; 303c59d87c4SChristoph Hellwig int error = 0; 304c59d87c4SChristoph Hellwig int bmapi_flags = XFS_BMAPI_ENTIRE; 305c59d87c4SChristoph Hellwig int nimaps = 1; 306c59d87c4SChristoph Hellwig 307c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 308c59d87c4SChristoph Hellwig return -XFS_ERROR(EIO); 309c59d87c4SChristoph Hellwig 3100d882a36SAlain Renaud if (type == XFS_IO_UNWRITTEN) 311c59d87c4SChristoph Hellwig bmapi_flags |= XFS_BMAPI_IGSTATE; 312c59d87c4SChristoph Hellwig 313c59d87c4SChristoph Hellwig if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) { 314c59d87c4SChristoph Hellwig if (nonblocking) 315c59d87c4SChristoph Hellwig return -XFS_ERROR(EAGAIN); 316c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_SHARED); 317c59d87c4SChristoph Hellwig } 318c59d87c4SChristoph Hellwig 319c59d87c4SChristoph Hellwig ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE || 320c59d87c4SChristoph Hellwig (ip->i_df.if_flags & XFS_IFEXTENTS)); 321d2c28191SDave Chinner ASSERT(offset <= mp->m_super->s_maxbytes); 322c59d87c4SChristoph Hellwig 323d2c28191SDave Chinner if (offset + count > mp->m_super->s_maxbytes) 324d2c28191SDave Chinner count = mp->m_super->s_maxbytes - offset; 325c59d87c4SChristoph Hellwig end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 326c59d87c4SChristoph Hellwig offset_fsb = XFS_B_TO_FSBT(mp, offset); 3275c8ed202SDave Chinner error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 3285c8ed202SDave Chinner imap, &nimaps, bmapi_flags); 329c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_SHARED); 330c59d87c4SChristoph Hellwig 331c59d87c4SChristoph Hellwig if (error) 332c59d87c4SChristoph Hellwig return -XFS_ERROR(error); 333c59d87c4SChristoph Hellwig 3340d882a36SAlain Renaud if (type == XFS_IO_DELALLOC && 335c59d87c4SChristoph Hellwig (!nimaps || isnullstartblock(imap->br_startblock))) { 336*0799a3e8SJie Liu error = xfs_iomap_write_allocate(ip, offset, imap); 337c59d87c4SChristoph Hellwig if (!error) 338c59d87c4SChristoph Hellwig trace_xfs_map_blocks_alloc(ip, offset, count, type, imap); 339c59d87c4SChristoph Hellwig return -XFS_ERROR(error); 340c59d87c4SChristoph Hellwig } 341c59d87c4SChristoph Hellwig 342c59d87c4SChristoph Hellwig #ifdef DEBUG 3430d882a36SAlain Renaud if (type == XFS_IO_UNWRITTEN) { 344c59d87c4SChristoph Hellwig ASSERT(nimaps); 345c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 346c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 347c59d87c4SChristoph Hellwig } 348c59d87c4SChristoph Hellwig #endif 349c59d87c4SChristoph Hellwig if (nimaps) 350c59d87c4SChristoph Hellwig trace_xfs_map_blocks_found(ip, offset, count, type, imap); 351c59d87c4SChristoph Hellwig return 0; 352c59d87c4SChristoph Hellwig } 353c59d87c4SChristoph Hellwig 354c59d87c4SChristoph Hellwig STATIC int 355c59d87c4SChristoph Hellwig xfs_imap_valid( 356c59d87c4SChristoph Hellwig struct inode *inode, 357c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 358c59d87c4SChristoph Hellwig xfs_off_t offset) 359c59d87c4SChristoph Hellwig { 360c59d87c4SChristoph Hellwig offset >>= inode->i_blkbits; 361c59d87c4SChristoph Hellwig 362c59d87c4SChristoph Hellwig return offset >= imap->br_startoff && 363c59d87c4SChristoph Hellwig offset < imap->br_startoff + imap->br_blockcount; 364c59d87c4SChristoph Hellwig } 365c59d87c4SChristoph Hellwig 366c59d87c4SChristoph Hellwig /* 367c59d87c4SChristoph Hellwig * BIO completion handler for buffered IO. 368c59d87c4SChristoph Hellwig */ 369c59d87c4SChristoph Hellwig STATIC void 370c59d87c4SChristoph Hellwig xfs_end_bio( 371c59d87c4SChristoph Hellwig struct bio *bio, 372c59d87c4SChristoph Hellwig int error) 373c59d87c4SChristoph Hellwig { 374c59d87c4SChristoph Hellwig xfs_ioend_t *ioend = bio->bi_private; 375c59d87c4SChristoph Hellwig 376c59d87c4SChristoph Hellwig ASSERT(atomic_read(&bio->bi_cnt) >= 1); 377c59d87c4SChristoph Hellwig ioend->io_error = test_bit(BIO_UPTODATE, &bio->bi_flags) ? 0 : error; 378c59d87c4SChristoph Hellwig 379c59d87c4SChristoph Hellwig /* Toss bio and pass work off to an xfsdatad thread */ 380c59d87c4SChristoph Hellwig bio->bi_private = NULL; 381c59d87c4SChristoph Hellwig bio->bi_end_io = NULL; 382c59d87c4SChristoph Hellwig bio_put(bio); 383c59d87c4SChristoph Hellwig 384c59d87c4SChristoph Hellwig xfs_finish_ioend(ioend); 385c59d87c4SChristoph Hellwig } 386c59d87c4SChristoph Hellwig 387c59d87c4SChristoph Hellwig STATIC void 388c59d87c4SChristoph Hellwig xfs_submit_ioend_bio( 389c59d87c4SChristoph Hellwig struct writeback_control *wbc, 390c59d87c4SChristoph Hellwig xfs_ioend_t *ioend, 391c59d87c4SChristoph Hellwig struct bio *bio) 392c59d87c4SChristoph Hellwig { 393c59d87c4SChristoph Hellwig atomic_inc(&ioend->io_remaining); 394c59d87c4SChristoph Hellwig bio->bi_private = ioend; 395c59d87c4SChristoph Hellwig bio->bi_end_io = xfs_end_bio; 396c59d87c4SChristoph Hellwig submit_bio(wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE, bio); 397c59d87c4SChristoph Hellwig } 398c59d87c4SChristoph Hellwig 399c59d87c4SChristoph Hellwig STATIC struct bio * 400c59d87c4SChristoph Hellwig xfs_alloc_ioend_bio( 401c59d87c4SChristoph Hellwig struct buffer_head *bh) 402c59d87c4SChristoph Hellwig { 403c59d87c4SChristoph Hellwig int nvecs = bio_get_nr_vecs(bh->b_bdev); 404c59d87c4SChristoph Hellwig struct bio *bio = bio_alloc(GFP_NOIO, nvecs); 405c59d87c4SChristoph Hellwig 406c59d87c4SChristoph Hellwig ASSERT(bio->bi_private == NULL); 407c59d87c4SChristoph Hellwig bio->bi_sector = bh->b_blocknr * (bh->b_size >> 9); 408c59d87c4SChristoph Hellwig bio->bi_bdev = bh->b_bdev; 409c59d87c4SChristoph Hellwig return bio; 410c59d87c4SChristoph Hellwig } 411c59d87c4SChristoph Hellwig 412c59d87c4SChristoph Hellwig STATIC void 413c59d87c4SChristoph Hellwig xfs_start_buffer_writeback( 414c59d87c4SChristoph Hellwig struct buffer_head *bh) 415c59d87c4SChristoph Hellwig { 416c59d87c4SChristoph Hellwig ASSERT(buffer_mapped(bh)); 417c59d87c4SChristoph Hellwig ASSERT(buffer_locked(bh)); 418c59d87c4SChristoph Hellwig ASSERT(!buffer_delay(bh)); 419c59d87c4SChristoph Hellwig ASSERT(!buffer_unwritten(bh)); 420c59d87c4SChristoph Hellwig 421c59d87c4SChristoph Hellwig mark_buffer_async_write(bh); 422c59d87c4SChristoph Hellwig set_buffer_uptodate(bh); 423c59d87c4SChristoph Hellwig clear_buffer_dirty(bh); 424c59d87c4SChristoph Hellwig } 425c59d87c4SChristoph Hellwig 426c59d87c4SChristoph Hellwig STATIC void 427c59d87c4SChristoph Hellwig xfs_start_page_writeback( 428c59d87c4SChristoph Hellwig struct page *page, 429c59d87c4SChristoph Hellwig int clear_dirty, 430c59d87c4SChristoph Hellwig int buffers) 431c59d87c4SChristoph Hellwig { 432c59d87c4SChristoph Hellwig ASSERT(PageLocked(page)); 433c59d87c4SChristoph Hellwig ASSERT(!PageWriteback(page)); 434c59d87c4SChristoph Hellwig if (clear_dirty) 435c59d87c4SChristoph Hellwig clear_page_dirty_for_io(page); 436c59d87c4SChristoph Hellwig set_page_writeback(page); 437c59d87c4SChristoph Hellwig unlock_page(page); 438c59d87c4SChristoph Hellwig /* If no buffers on the page are to be written, finish it here */ 439c59d87c4SChristoph Hellwig if (!buffers) 440c59d87c4SChristoph Hellwig end_page_writeback(page); 441c59d87c4SChristoph Hellwig } 442c59d87c4SChristoph Hellwig 443c7c1a7d8SZhi Yong Wu static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh) 444c59d87c4SChristoph Hellwig { 445c59d87c4SChristoph Hellwig return bio_add_page(bio, bh->b_page, bh->b_size, bh_offset(bh)); 446c59d87c4SChristoph Hellwig } 447c59d87c4SChristoph Hellwig 448c59d87c4SChristoph Hellwig /* 449c59d87c4SChristoph Hellwig * Submit all of the bios for all of the ioends we have saved up, covering the 450c59d87c4SChristoph Hellwig * initial writepage page and also any probed pages. 451c59d87c4SChristoph Hellwig * 452c59d87c4SChristoph Hellwig * Because we may have multiple ioends spanning a page, we need to start 453c59d87c4SChristoph Hellwig * writeback on all the buffers before we submit them for I/O. If we mark the 454c59d87c4SChristoph Hellwig * buffers as we got, then we can end up with a page that only has buffers 455c59d87c4SChristoph Hellwig * marked async write and I/O complete on can occur before we mark the other 456c59d87c4SChristoph Hellwig * buffers async write. 457c59d87c4SChristoph Hellwig * 458c59d87c4SChristoph Hellwig * The end result of this is that we trip a bug in end_page_writeback() because 459c59d87c4SChristoph Hellwig * we call it twice for the one page as the code in end_buffer_async_write() 460c59d87c4SChristoph Hellwig * assumes that all buffers on the page are started at the same time. 461c59d87c4SChristoph Hellwig * 462c59d87c4SChristoph Hellwig * The fix is two passes across the ioend list - one to start writeback on the 463c59d87c4SChristoph Hellwig * buffer_heads, and then submit them for I/O on the second pass. 4647bf7f352SDave Chinner * 4657bf7f352SDave Chinner * If @fail is non-zero, it means that we have a situation where some part of 4667bf7f352SDave Chinner * the submission process has failed after we have marked paged for writeback 4677bf7f352SDave Chinner * and unlocked them. In this situation, we need to fail the ioend chain rather 4687bf7f352SDave Chinner * than submit it to IO. This typically only happens on a filesystem shutdown. 469c59d87c4SChristoph Hellwig */ 470c59d87c4SChristoph Hellwig STATIC void 471c59d87c4SChristoph Hellwig xfs_submit_ioend( 472c59d87c4SChristoph Hellwig struct writeback_control *wbc, 4737bf7f352SDave Chinner xfs_ioend_t *ioend, 4747bf7f352SDave Chinner int fail) 475c59d87c4SChristoph Hellwig { 476c59d87c4SChristoph Hellwig xfs_ioend_t *head = ioend; 477c59d87c4SChristoph Hellwig xfs_ioend_t *next; 478c59d87c4SChristoph Hellwig struct buffer_head *bh; 479c59d87c4SChristoph Hellwig struct bio *bio; 480c59d87c4SChristoph Hellwig sector_t lastblock = 0; 481c59d87c4SChristoph Hellwig 482c59d87c4SChristoph Hellwig /* Pass 1 - start writeback */ 483c59d87c4SChristoph Hellwig do { 484c59d87c4SChristoph Hellwig next = ioend->io_list; 485c59d87c4SChristoph Hellwig for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) 486c59d87c4SChristoph Hellwig xfs_start_buffer_writeback(bh); 487c59d87c4SChristoph Hellwig } while ((ioend = next) != NULL); 488c59d87c4SChristoph Hellwig 489c59d87c4SChristoph Hellwig /* Pass 2 - submit I/O */ 490c59d87c4SChristoph Hellwig ioend = head; 491c59d87c4SChristoph Hellwig do { 492c59d87c4SChristoph Hellwig next = ioend->io_list; 493c59d87c4SChristoph Hellwig bio = NULL; 494c59d87c4SChristoph Hellwig 4957bf7f352SDave Chinner /* 4967bf7f352SDave Chinner * If we are failing the IO now, just mark the ioend with an 4977bf7f352SDave Chinner * error and finish it. This will run IO completion immediately 4987bf7f352SDave Chinner * as there is only one reference to the ioend at this point in 4997bf7f352SDave Chinner * time. 5007bf7f352SDave Chinner */ 5017bf7f352SDave Chinner if (fail) { 5027bf7f352SDave Chinner ioend->io_error = -fail; 5037bf7f352SDave Chinner xfs_finish_ioend(ioend); 5047bf7f352SDave Chinner continue; 5057bf7f352SDave Chinner } 5067bf7f352SDave Chinner 507c59d87c4SChristoph Hellwig for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) { 508c59d87c4SChristoph Hellwig 509c59d87c4SChristoph Hellwig if (!bio) { 510c59d87c4SChristoph Hellwig retry: 511c59d87c4SChristoph Hellwig bio = xfs_alloc_ioend_bio(bh); 512c59d87c4SChristoph Hellwig } else if (bh->b_blocknr != lastblock + 1) { 513c59d87c4SChristoph Hellwig xfs_submit_ioend_bio(wbc, ioend, bio); 514c59d87c4SChristoph Hellwig goto retry; 515c59d87c4SChristoph Hellwig } 516c59d87c4SChristoph Hellwig 517c7c1a7d8SZhi Yong Wu if (xfs_bio_add_buffer(bio, bh) != bh->b_size) { 518c59d87c4SChristoph Hellwig xfs_submit_ioend_bio(wbc, ioend, bio); 519c59d87c4SChristoph Hellwig goto retry; 520c59d87c4SChristoph Hellwig } 521c59d87c4SChristoph Hellwig 522c59d87c4SChristoph Hellwig lastblock = bh->b_blocknr; 523c59d87c4SChristoph Hellwig } 524c59d87c4SChristoph Hellwig if (bio) 525c59d87c4SChristoph Hellwig xfs_submit_ioend_bio(wbc, ioend, bio); 526c59d87c4SChristoph Hellwig xfs_finish_ioend(ioend); 527c59d87c4SChristoph Hellwig } while ((ioend = next) != NULL); 528c59d87c4SChristoph Hellwig } 529c59d87c4SChristoph Hellwig 530c59d87c4SChristoph Hellwig /* 531c59d87c4SChristoph Hellwig * Cancel submission of all buffer_heads so far in this endio. 532c59d87c4SChristoph Hellwig * Toss the endio too. Only ever called for the initial page 533c59d87c4SChristoph Hellwig * in a writepage request, so only ever one page. 534c59d87c4SChristoph Hellwig */ 535c59d87c4SChristoph Hellwig STATIC void 536c59d87c4SChristoph Hellwig xfs_cancel_ioend( 537c59d87c4SChristoph Hellwig xfs_ioend_t *ioend) 538c59d87c4SChristoph Hellwig { 539c59d87c4SChristoph Hellwig xfs_ioend_t *next; 540c59d87c4SChristoph Hellwig struct buffer_head *bh, *next_bh; 541c59d87c4SChristoph Hellwig 542c59d87c4SChristoph Hellwig do { 543c59d87c4SChristoph Hellwig next = ioend->io_list; 544c59d87c4SChristoph Hellwig bh = ioend->io_buffer_head; 545c59d87c4SChristoph Hellwig do { 546c59d87c4SChristoph Hellwig next_bh = bh->b_private; 547c59d87c4SChristoph Hellwig clear_buffer_async_write(bh); 548c59d87c4SChristoph Hellwig unlock_buffer(bh); 549c59d87c4SChristoph Hellwig } while ((bh = next_bh) != NULL); 550c59d87c4SChristoph Hellwig 551c59d87c4SChristoph Hellwig mempool_free(ioend, xfs_ioend_pool); 552c59d87c4SChristoph Hellwig } while ((ioend = next) != NULL); 553c59d87c4SChristoph Hellwig } 554c59d87c4SChristoph Hellwig 555c59d87c4SChristoph Hellwig /* 556c59d87c4SChristoph Hellwig * Test to see if we've been building up a completion structure for 557c59d87c4SChristoph Hellwig * earlier buffers -- if so, we try to append to this ioend if we 558c59d87c4SChristoph Hellwig * can, otherwise we finish off any current ioend and start another. 559c59d87c4SChristoph Hellwig * Return true if we've finished the given ioend. 560c59d87c4SChristoph Hellwig */ 561c59d87c4SChristoph Hellwig STATIC void 562c59d87c4SChristoph Hellwig xfs_add_to_ioend( 563c59d87c4SChristoph Hellwig struct inode *inode, 564c59d87c4SChristoph Hellwig struct buffer_head *bh, 565c59d87c4SChristoph Hellwig xfs_off_t offset, 566c59d87c4SChristoph Hellwig unsigned int type, 567c59d87c4SChristoph Hellwig xfs_ioend_t **result, 568c59d87c4SChristoph Hellwig int need_ioend) 569c59d87c4SChristoph Hellwig { 570c59d87c4SChristoph Hellwig xfs_ioend_t *ioend = *result; 571c59d87c4SChristoph Hellwig 572c59d87c4SChristoph Hellwig if (!ioend || need_ioend || type != ioend->io_type) { 573c59d87c4SChristoph Hellwig xfs_ioend_t *previous = *result; 574c59d87c4SChristoph Hellwig 575c59d87c4SChristoph Hellwig ioend = xfs_alloc_ioend(inode, type); 576c59d87c4SChristoph Hellwig ioend->io_offset = offset; 577c59d87c4SChristoph Hellwig ioend->io_buffer_head = bh; 578c59d87c4SChristoph Hellwig ioend->io_buffer_tail = bh; 579c59d87c4SChristoph Hellwig if (previous) 580c59d87c4SChristoph Hellwig previous->io_list = ioend; 581c59d87c4SChristoph Hellwig *result = ioend; 582c59d87c4SChristoph Hellwig } else { 583c59d87c4SChristoph Hellwig ioend->io_buffer_tail->b_private = bh; 584c59d87c4SChristoph Hellwig ioend->io_buffer_tail = bh; 585c59d87c4SChristoph Hellwig } 586c59d87c4SChristoph Hellwig 587c59d87c4SChristoph Hellwig bh->b_private = NULL; 588c59d87c4SChristoph Hellwig ioend->io_size += bh->b_size; 589c59d87c4SChristoph Hellwig } 590c59d87c4SChristoph Hellwig 591c59d87c4SChristoph Hellwig STATIC void 592c59d87c4SChristoph Hellwig xfs_map_buffer( 593c59d87c4SChristoph Hellwig struct inode *inode, 594c59d87c4SChristoph Hellwig struct buffer_head *bh, 595c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 596c59d87c4SChristoph Hellwig xfs_off_t offset) 597c59d87c4SChristoph Hellwig { 598c59d87c4SChristoph Hellwig sector_t bn; 599c59d87c4SChristoph Hellwig struct xfs_mount *m = XFS_I(inode)->i_mount; 600c59d87c4SChristoph Hellwig xfs_off_t iomap_offset = XFS_FSB_TO_B(m, imap->br_startoff); 601c59d87c4SChristoph Hellwig xfs_daddr_t iomap_bn = xfs_fsb_to_db(XFS_I(inode), imap->br_startblock); 602c59d87c4SChristoph Hellwig 603c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 604c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 605c59d87c4SChristoph Hellwig 606c59d87c4SChristoph Hellwig bn = (iomap_bn >> (inode->i_blkbits - BBSHIFT)) + 607c59d87c4SChristoph Hellwig ((offset - iomap_offset) >> inode->i_blkbits); 608c59d87c4SChristoph Hellwig 609c59d87c4SChristoph Hellwig ASSERT(bn || XFS_IS_REALTIME_INODE(XFS_I(inode))); 610c59d87c4SChristoph Hellwig 611c59d87c4SChristoph Hellwig bh->b_blocknr = bn; 612c59d87c4SChristoph Hellwig set_buffer_mapped(bh); 613c59d87c4SChristoph Hellwig } 614c59d87c4SChristoph Hellwig 615c59d87c4SChristoph Hellwig STATIC void 616c59d87c4SChristoph Hellwig xfs_map_at_offset( 617c59d87c4SChristoph Hellwig struct inode *inode, 618c59d87c4SChristoph Hellwig struct buffer_head *bh, 619c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 620c59d87c4SChristoph Hellwig xfs_off_t offset) 621c59d87c4SChristoph Hellwig { 622c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != HOLESTARTBLOCK); 623c59d87c4SChristoph Hellwig ASSERT(imap->br_startblock != DELAYSTARTBLOCK); 624c59d87c4SChristoph Hellwig 625c59d87c4SChristoph Hellwig xfs_map_buffer(inode, bh, imap, offset); 626c59d87c4SChristoph Hellwig set_buffer_mapped(bh); 627c59d87c4SChristoph Hellwig clear_buffer_delay(bh); 628c59d87c4SChristoph Hellwig clear_buffer_unwritten(bh); 629c59d87c4SChristoph Hellwig } 630c59d87c4SChristoph Hellwig 631c59d87c4SChristoph Hellwig /* 632c59d87c4SChristoph Hellwig * Test if a given page is suitable for writing as part of an unwritten 633c59d87c4SChristoph Hellwig * or delayed allocate extent. 634c59d87c4SChristoph Hellwig */ 635c59d87c4SChristoph Hellwig STATIC int 6366ffc4db5SDave Chinner xfs_check_page_type( 637c59d87c4SChristoph Hellwig struct page *page, 638c59d87c4SChristoph Hellwig unsigned int type) 639c59d87c4SChristoph Hellwig { 640c59d87c4SChristoph Hellwig if (PageWriteback(page)) 641c59d87c4SChristoph Hellwig return 0; 642c59d87c4SChristoph Hellwig 643c59d87c4SChristoph Hellwig if (page->mapping && page_has_buffers(page)) { 644c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 645c59d87c4SChristoph Hellwig int acceptable = 0; 646c59d87c4SChristoph Hellwig 647c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 648c59d87c4SChristoph Hellwig do { 649c59d87c4SChristoph Hellwig if (buffer_unwritten(bh)) 6500d882a36SAlain Renaud acceptable += (type == XFS_IO_UNWRITTEN); 651c59d87c4SChristoph Hellwig else if (buffer_delay(bh)) 6520d882a36SAlain Renaud acceptable += (type == XFS_IO_DELALLOC); 653c59d87c4SChristoph Hellwig else if (buffer_dirty(bh) && buffer_mapped(bh)) 6540d882a36SAlain Renaud acceptable += (type == XFS_IO_OVERWRITE); 655c59d87c4SChristoph Hellwig else 656c59d87c4SChristoph Hellwig break; 657c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 658c59d87c4SChristoph Hellwig 659c59d87c4SChristoph Hellwig if (acceptable) 660c59d87c4SChristoph Hellwig return 1; 661c59d87c4SChristoph Hellwig } 662c59d87c4SChristoph Hellwig 663c59d87c4SChristoph Hellwig return 0; 664c59d87c4SChristoph Hellwig } 665c59d87c4SChristoph Hellwig 666c59d87c4SChristoph Hellwig /* 667c59d87c4SChristoph Hellwig * Allocate & map buffers for page given the extent map. Write it out. 668c59d87c4SChristoph Hellwig * except for the original page of a writepage, this is called on 669c59d87c4SChristoph Hellwig * delalloc/unwritten pages only, for the original page it is possible 670c59d87c4SChristoph Hellwig * that the page has no mapping at all. 671c59d87c4SChristoph Hellwig */ 672c59d87c4SChristoph Hellwig STATIC int 673c59d87c4SChristoph Hellwig xfs_convert_page( 674c59d87c4SChristoph Hellwig struct inode *inode, 675c59d87c4SChristoph Hellwig struct page *page, 676c59d87c4SChristoph Hellwig loff_t tindex, 677c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 678c59d87c4SChristoph Hellwig xfs_ioend_t **ioendp, 679c59d87c4SChristoph Hellwig struct writeback_control *wbc) 680c59d87c4SChristoph Hellwig { 681c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 682c59d87c4SChristoph Hellwig xfs_off_t end_offset; 683c59d87c4SChristoph Hellwig unsigned long p_offset; 684c59d87c4SChristoph Hellwig unsigned int type; 685c59d87c4SChristoph Hellwig int len, page_dirty; 686c59d87c4SChristoph Hellwig int count = 0, done = 0, uptodate = 1; 687c59d87c4SChristoph Hellwig xfs_off_t offset = page_offset(page); 688c59d87c4SChristoph Hellwig 689c59d87c4SChristoph Hellwig if (page->index != tindex) 690c59d87c4SChristoph Hellwig goto fail; 691c59d87c4SChristoph Hellwig if (!trylock_page(page)) 692c59d87c4SChristoph Hellwig goto fail; 693c59d87c4SChristoph Hellwig if (PageWriteback(page)) 694c59d87c4SChristoph Hellwig goto fail_unlock_page; 695c59d87c4SChristoph Hellwig if (page->mapping != inode->i_mapping) 696c59d87c4SChristoph Hellwig goto fail_unlock_page; 6976ffc4db5SDave Chinner if (!xfs_check_page_type(page, (*ioendp)->io_type)) 698c59d87c4SChristoph Hellwig goto fail_unlock_page; 699c59d87c4SChristoph Hellwig 700c59d87c4SChristoph Hellwig /* 701c59d87c4SChristoph Hellwig * page_dirty is initially a count of buffers on the page before 702c59d87c4SChristoph Hellwig * EOF and is decremented as we move each into a cleanable state. 703c59d87c4SChristoph Hellwig * 704c59d87c4SChristoph Hellwig * Derivation: 705c59d87c4SChristoph Hellwig * 706c59d87c4SChristoph Hellwig * End offset is the highest offset that this page should represent. 707c59d87c4SChristoph Hellwig * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1)) 708c59d87c4SChristoph Hellwig * will evaluate non-zero and be less than PAGE_CACHE_SIZE and 709c59d87c4SChristoph Hellwig * hence give us the correct page_dirty count. On any other page, 710c59d87c4SChristoph Hellwig * it will be zero and in that case we need page_dirty to be the 711c59d87c4SChristoph Hellwig * count of buffers on the page. 712c59d87c4SChristoph Hellwig */ 713c59d87c4SChristoph Hellwig end_offset = min_t(unsigned long long, 714c59d87c4SChristoph Hellwig (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, 715c59d87c4SChristoph Hellwig i_size_read(inode)); 716c59d87c4SChristoph Hellwig 717480d7467SDave Chinner /* 718480d7467SDave Chinner * If the current map does not span the entire page we are about to try 719480d7467SDave Chinner * to write, then give up. The only way we can write a page that spans 720480d7467SDave Chinner * multiple mappings in a single writeback iteration is via the 721480d7467SDave Chinner * xfs_vm_writepage() function. Data integrity writeback requires the 722480d7467SDave Chinner * entire page to be written in a single attempt, otherwise the part of 723480d7467SDave Chinner * the page we don't write here doesn't get written as part of the data 724480d7467SDave Chinner * integrity sync. 725480d7467SDave Chinner * 726480d7467SDave Chinner * For normal writeback, we also don't attempt to write partial pages 727480d7467SDave Chinner * here as it simply means that write_cache_pages() will see it under 728480d7467SDave Chinner * writeback and ignore the page until some point in the future, at 729480d7467SDave Chinner * which time this will be the only page in the file that needs 730480d7467SDave Chinner * writeback. Hence for more optimal IO patterns, we should always 731480d7467SDave Chinner * avoid partial page writeback due to multiple mappings on a page here. 732480d7467SDave Chinner */ 733480d7467SDave Chinner if (!xfs_imap_valid(inode, imap, end_offset)) 734480d7467SDave Chinner goto fail_unlock_page; 735480d7467SDave Chinner 736c59d87c4SChristoph Hellwig len = 1 << inode->i_blkbits; 737c59d87c4SChristoph Hellwig p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1), 738c59d87c4SChristoph Hellwig PAGE_CACHE_SIZE); 739c59d87c4SChristoph Hellwig p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE; 740c59d87c4SChristoph Hellwig page_dirty = p_offset / len; 741c59d87c4SChristoph Hellwig 742c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 743c59d87c4SChristoph Hellwig do { 744c59d87c4SChristoph Hellwig if (offset >= end_offset) 745c59d87c4SChristoph Hellwig break; 746c59d87c4SChristoph Hellwig if (!buffer_uptodate(bh)) 747c59d87c4SChristoph Hellwig uptodate = 0; 748c59d87c4SChristoph Hellwig if (!(PageUptodate(page) || buffer_uptodate(bh))) { 749c59d87c4SChristoph Hellwig done = 1; 750c59d87c4SChristoph Hellwig continue; 751c59d87c4SChristoph Hellwig } 752c59d87c4SChristoph Hellwig 753c59d87c4SChristoph Hellwig if (buffer_unwritten(bh) || buffer_delay(bh) || 754c59d87c4SChristoph Hellwig buffer_mapped(bh)) { 755c59d87c4SChristoph Hellwig if (buffer_unwritten(bh)) 7560d882a36SAlain Renaud type = XFS_IO_UNWRITTEN; 757c59d87c4SChristoph Hellwig else if (buffer_delay(bh)) 7580d882a36SAlain Renaud type = XFS_IO_DELALLOC; 759c59d87c4SChristoph Hellwig else 7600d882a36SAlain Renaud type = XFS_IO_OVERWRITE; 761c59d87c4SChristoph Hellwig 762c59d87c4SChristoph Hellwig if (!xfs_imap_valid(inode, imap, offset)) { 763c59d87c4SChristoph Hellwig done = 1; 764c59d87c4SChristoph Hellwig continue; 765c59d87c4SChristoph Hellwig } 766c59d87c4SChristoph Hellwig 767c59d87c4SChristoph Hellwig lock_buffer(bh); 7680d882a36SAlain Renaud if (type != XFS_IO_OVERWRITE) 769c59d87c4SChristoph Hellwig xfs_map_at_offset(inode, bh, imap, offset); 770c59d87c4SChristoph Hellwig xfs_add_to_ioend(inode, bh, offset, type, 771c59d87c4SChristoph Hellwig ioendp, done); 772c59d87c4SChristoph Hellwig 773c59d87c4SChristoph Hellwig page_dirty--; 774c59d87c4SChristoph Hellwig count++; 775c59d87c4SChristoph Hellwig } else { 776c59d87c4SChristoph Hellwig done = 1; 777c59d87c4SChristoph Hellwig } 778c59d87c4SChristoph Hellwig } while (offset += len, (bh = bh->b_this_page) != head); 779c59d87c4SChristoph Hellwig 780c59d87c4SChristoph Hellwig if (uptodate && bh == head) 781c59d87c4SChristoph Hellwig SetPageUptodate(page); 782c59d87c4SChristoph Hellwig 783c59d87c4SChristoph Hellwig if (count) { 784c59d87c4SChristoph Hellwig if (--wbc->nr_to_write <= 0 && 785c59d87c4SChristoph Hellwig wbc->sync_mode == WB_SYNC_NONE) 786c59d87c4SChristoph Hellwig done = 1; 787c59d87c4SChristoph Hellwig } 788c59d87c4SChristoph Hellwig xfs_start_page_writeback(page, !page_dirty, count); 789c59d87c4SChristoph Hellwig 790c59d87c4SChristoph Hellwig return done; 791c59d87c4SChristoph Hellwig fail_unlock_page: 792c59d87c4SChristoph Hellwig unlock_page(page); 793c59d87c4SChristoph Hellwig fail: 794c59d87c4SChristoph Hellwig return 1; 795c59d87c4SChristoph Hellwig } 796c59d87c4SChristoph Hellwig 797c59d87c4SChristoph Hellwig /* 798c59d87c4SChristoph Hellwig * Convert & write out a cluster of pages in the same extent as defined 799c59d87c4SChristoph Hellwig * by mp and following the start page. 800c59d87c4SChristoph Hellwig */ 801c59d87c4SChristoph Hellwig STATIC void 802c59d87c4SChristoph Hellwig xfs_cluster_write( 803c59d87c4SChristoph Hellwig struct inode *inode, 804c59d87c4SChristoph Hellwig pgoff_t tindex, 805c59d87c4SChristoph Hellwig struct xfs_bmbt_irec *imap, 806c59d87c4SChristoph Hellwig xfs_ioend_t **ioendp, 807c59d87c4SChristoph Hellwig struct writeback_control *wbc, 808c59d87c4SChristoph Hellwig pgoff_t tlast) 809c59d87c4SChristoph Hellwig { 810c59d87c4SChristoph Hellwig struct pagevec pvec; 811c59d87c4SChristoph Hellwig int done = 0, i; 812c59d87c4SChristoph Hellwig 813c59d87c4SChristoph Hellwig pagevec_init(&pvec, 0); 814c59d87c4SChristoph Hellwig while (!done && tindex <= tlast) { 815c59d87c4SChristoph Hellwig unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1); 816c59d87c4SChristoph Hellwig 817c59d87c4SChristoph Hellwig if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len)) 818c59d87c4SChristoph Hellwig break; 819c59d87c4SChristoph Hellwig 820c59d87c4SChristoph Hellwig for (i = 0; i < pagevec_count(&pvec); i++) { 821c59d87c4SChristoph Hellwig done = xfs_convert_page(inode, pvec.pages[i], tindex++, 822c59d87c4SChristoph Hellwig imap, ioendp, wbc); 823c59d87c4SChristoph Hellwig if (done) 824c59d87c4SChristoph Hellwig break; 825c59d87c4SChristoph Hellwig } 826c59d87c4SChristoph Hellwig 827c59d87c4SChristoph Hellwig pagevec_release(&pvec); 828c59d87c4SChristoph Hellwig cond_resched(); 829c59d87c4SChristoph Hellwig } 830c59d87c4SChristoph Hellwig } 831c59d87c4SChristoph Hellwig 832c59d87c4SChristoph Hellwig STATIC void 833c59d87c4SChristoph Hellwig xfs_vm_invalidatepage( 834c59d87c4SChristoph Hellwig struct page *page, 835d47992f8SLukas Czerner unsigned int offset, 836d47992f8SLukas Czerner unsigned int length) 837c59d87c4SChristoph Hellwig { 83834097dfeSLukas Czerner trace_xfs_invalidatepage(page->mapping->host, page, offset, 83934097dfeSLukas Czerner length); 84034097dfeSLukas Czerner block_invalidatepage(page, offset, length); 841c59d87c4SChristoph Hellwig } 842c59d87c4SChristoph Hellwig 843c59d87c4SChristoph Hellwig /* 844c59d87c4SChristoph Hellwig * If the page has delalloc buffers on it, we need to punch them out before we 845c59d87c4SChristoph Hellwig * invalidate the page. If we don't, we leave a stale delalloc mapping on the 846c59d87c4SChristoph Hellwig * inode that can trip a BUG() in xfs_get_blocks() later on if a direct IO read 847c59d87c4SChristoph Hellwig * is done on that same region - the delalloc extent is returned when none is 848c59d87c4SChristoph Hellwig * supposed to be there. 849c59d87c4SChristoph Hellwig * 850c59d87c4SChristoph Hellwig * We prevent this by truncating away the delalloc regions on the page before 851c59d87c4SChristoph Hellwig * invalidating it. Because they are delalloc, we can do this without needing a 852c59d87c4SChristoph Hellwig * transaction. Indeed - if we get ENOSPC errors, we have to be able to do this 853c59d87c4SChristoph Hellwig * truncation without a transaction as there is no space left for block 854c59d87c4SChristoph Hellwig * reservation (typically why we see a ENOSPC in writeback). 855c59d87c4SChristoph Hellwig * 856c59d87c4SChristoph Hellwig * This is not a performance critical path, so for now just do the punching a 857c59d87c4SChristoph Hellwig * buffer head at a time. 858c59d87c4SChristoph Hellwig */ 859c59d87c4SChristoph Hellwig STATIC void 860c59d87c4SChristoph Hellwig xfs_aops_discard_page( 861c59d87c4SChristoph Hellwig struct page *page) 862c59d87c4SChristoph Hellwig { 863c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 864c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 865c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 866c59d87c4SChristoph Hellwig loff_t offset = page_offset(page); 867c59d87c4SChristoph Hellwig 8680d882a36SAlain Renaud if (!xfs_check_page_type(page, XFS_IO_DELALLOC)) 869c59d87c4SChristoph Hellwig goto out_invalidate; 870c59d87c4SChristoph Hellwig 871c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 872c59d87c4SChristoph Hellwig goto out_invalidate; 873c59d87c4SChristoph Hellwig 874c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 875c59d87c4SChristoph Hellwig "page discard on page %p, inode 0x%llx, offset %llu.", 876c59d87c4SChristoph Hellwig page, ip->i_ino, offset); 877c59d87c4SChristoph Hellwig 878c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 879c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 880c59d87c4SChristoph Hellwig do { 881c59d87c4SChristoph Hellwig int error; 882c59d87c4SChristoph Hellwig xfs_fileoff_t start_fsb; 883c59d87c4SChristoph Hellwig 884c59d87c4SChristoph Hellwig if (!buffer_delay(bh)) 885c59d87c4SChristoph Hellwig goto next_buffer; 886c59d87c4SChristoph Hellwig 887c59d87c4SChristoph Hellwig start_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 888c59d87c4SChristoph Hellwig error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1); 889c59d87c4SChristoph Hellwig if (error) { 890c59d87c4SChristoph Hellwig /* something screwed, just bail */ 891c59d87c4SChristoph Hellwig if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 892c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 893c59d87c4SChristoph Hellwig "page discard unable to remove delalloc mapping."); 894c59d87c4SChristoph Hellwig } 895c59d87c4SChristoph Hellwig break; 896c59d87c4SChristoph Hellwig } 897c59d87c4SChristoph Hellwig next_buffer: 898c59d87c4SChristoph Hellwig offset += 1 << inode->i_blkbits; 899c59d87c4SChristoph Hellwig 900c59d87c4SChristoph Hellwig } while ((bh = bh->b_this_page) != head); 901c59d87c4SChristoph Hellwig 902c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 903c59d87c4SChristoph Hellwig out_invalidate: 904d47992f8SLukas Czerner xfs_vm_invalidatepage(page, 0, PAGE_CACHE_SIZE); 905c59d87c4SChristoph Hellwig return; 906c59d87c4SChristoph Hellwig } 907c59d87c4SChristoph Hellwig 908c59d87c4SChristoph Hellwig /* 909c59d87c4SChristoph Hellwig * Write out a dirty page. 910c59d87c4SChristoph Hellwig * 911c59d87c4SChristoph Hellwig * For delalloc space on the page we need to allocate space and flush it. 912c59d87c4SChristoph Hellwig * For unwritten space on the page we need to start the conversion to 913c59d87c4SChristoph Hellwig * regular allocated space. 914c59d87c4SChristoph Hellwig * For any other dirty buffer heads on the page we should flush them. 915c59d87c4SChristoph Hellwig */ 916c59d87c4SChristoph Hellwig STATIC int 917c59d87c4SChristoph Hellwig xfs_vm_writepage( 918c59d87c4SChristoph Hellwig struct page *page, 919c59d87c4SChristoph Hellwig struct writeback_control *wbc) 920c59d87c4SChristoph Hellwig { 921c59d87c4SChristoph Hellwig struct inode *inode = page->mapping->host; 922c59d87c4SChristoph Hellwig struct buffer_head *bh, *head; 923c59d87c4SChristoph Hellwig struct xfs_bmbt_irec imap; 924c59d87c4SChristoph Hellwig xfs_ioend_t *ioend = NULL, *iohead = NULL; 925c59d87c4SChristoph Hellwig loff_t offset; 926c59d87c4SChristoph Hellwig unsigned int type; 927c59d87c4SChristoph Hellwig __uint64_t end_offset; 928c59d87c4SChristoph Hellwig pgoff_t end_index, last_index; 929c59d87c4SChristoph Hellwig ssize_t len; 930c59d87c4SChristoph Hellwig int err, imap_valid = 0, uptodate = 1; 931c59d87c4SChristoph Hellwig int count = 0; 932c59d87c4SChristoph Hellwig int nonblocking = 0; 933c59d87c4SChristoph Hellwig 93434097dfeSLukas Czerner trace_xfs_writepage(inode, page, 0, 0); 935c59d87c4SChristoph Hellwig 936c59d87c4SChristoph Hellwig ASSERT(page_has_buffers(page)); 937c59d87c4SChristoph Hellwig 938c59d87c4SChristoph Hellwig /* 939c59d87c4SChristoph Hellwig * Refuse to write the page out if we are called from reclaim context. 940c59d87c4SChristoph Hellwig * 941c59d87c4SChristoph Hellwig * This avoids stack overflows when called from deeply used stacks in 942c59d87c4SChristoph Hellwig * random callers for direct reclaim or memcg reclaim. We explicitly 943c59d87c4SChristoph Hellwig * allow reclaim from kswapd as the stack usage there is relatively low. 944c59d87c4SChristoph Hellwig * 94594054fa3SMel Gorman * This should never happen except in the case of a VM regression so 94694054fa3SMel Gorman * warn about it. 947c59d87c4SChristoph Hellwig */ 94894054fa3SMel Gorman if (WARN_ON_ONCE((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == 94994054fa3SMel Gorman PF_MEMALLOC)) 950c59d87c4SChristoph Hellwig goto redirty; 951c59d87c4SChristoph Hellwig 952c59d87c4SChristoph Hellwig /* 953c59d87c4SChristoph Hellwig * Given that we do not allow direct reclaim to call us, we should 954c59d87c4SChristoph Hellwig * never be called while in a filesystem transaction. 955c59d87c4SChristoph Hellwig */ 956c59d87c4SChristoph Hellwig if (WARN_ON(current->flags & PF_FSTRANS)) 957c59d87c4SChristoph Hellwig goto redirty; 958c59d87c4SChristoph Hellwig 959c59d87c4SChristoph Hellwig /* Is this page beyond the end of the file? */ 960c59d87c4SChristoph Hellwig offset = i_size_read(inode); 961c59d87c4SChristoph Hellwig end_index = offset >> PAGE_CACHE_SHIFT; 962c59d87c4SChristoph Hellwig last_index = (offset - 1) >> PAGE_CACHE_SHIFT; 963c59d87c4SChristoph Hellwig if (page->index >= end_index) { 9646b7a03f0SChristoph Hellwig unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1); 9656b7a03f0SChristoph Hellwig 9666b7a03f0SChristoph Hellwig /* 967ff9a28f6SJan Kara * Skip the page if it is fully outside i_size, e.g. due to a 968ff9a28f6SJan Kara * truncate operation that is in progress. We must redirty the 969ff9a28f6SJan Kara * page so that reclaim stops reclaiming it. Otherwise 970ff9a28f6SJan Kara * xfs_vm_releasepage() is called on it and gets confused. 9716b7a03f0SChristoph Hellwig */ 972ff9a28f6SJan Kara if (page->index >= end_index + 1 || offset_into_page == 0) 973ff9a28f6SJan Kara goto redirty; 9746b7a03f0SChristoph Hellwig 9756b7a03f0SChristoph Hellwig /* 9766b7a03f0SChristoph Hellwig * The page straddles i_size. It must be zeroed out on each 9776b7a03f0SChristoph Hellwig * and every writepage invocation because it may be mmapped. 9786b7a03f0SChristoph Hellwig * "A file is mapped in multiples of the page size. For a file 9796b7a03f0SChristoph Hellwig * that is not a multiple of the page size, the remaining 9806b7a03f0SChristoph Hellwig * memory is zeroed when mapped, and writes to that region are 9816b7a03f0SChristoph Hellwig * not written out to the file." 9826b7a03f0SChristoph Hellwig */ 9836b7a03f0SChristoph Hellwig zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE); 984c59d87c4SChristoph Hellwig } 985c59d87c4SChristoph Hellwig 986c59d87c4SChristoph Hellwig end_offset = min_t(unsigned long long, 987c59d87c4SChristoph Hellwig (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT, 988c59d87c4SChristoph Hellwig offset); 989c59d87c4SChristoph Hellwig len = 1 << inode->i_blkbits; 990c59d87c4SChristoph Hellwig 991c59d87c4SChristoph Hellwig bh = head = page_buffers(page); 992c59d87c4SChristoph Hellwig offset = page_offset(page); 9930d882a36SAlain Renaud type = XFS_IO_OVERWRITE; 994c59d87c4SChristoph Hellwig 995c59d87c4SChristoph Hellwig if (wbc->sync_mode == WB_SYNC_NONE) 996c59d87c4SChristoph Hellwig nonblocking = 1; 997c59d87c4SChristoph Hellwig 998c59d87c4SChristoph Hellwig do { 999c59d87c4SChristoph Hellwig int new_ioend = 0; 1000c59d87c4SChristoph Hellwig 1001c59d87c4SChristoph Hellwig if (offset >= end_offset) 1002c59d87c4SChristoph Hellwig break; 1003c59d87c4SChristoph Hellwig if (!buffer_uptodate(bh)) 1004c59d87c4SChristoph Hellwig uptodate = 0; 1005c59d87c4SChristoph Hellwig 1006c59d87c4SChristoph Hellwig /* 1007c59d87c4SChristoph Hellwig * set_page_dirty dirties all buffers in a page, independent 1008c59d87c4SChristoph Hellwig * of their state. The dirty state however is entirely 1009c59d87c4SChristoph Hellwig * meaningless for holes (!mapped && uptodate), so skip 1010c59d87c4SChristoph Hellwig * buffers covering holes here. 1011c59d87c4SChristoph Hellwig */ 1012c59d87c4SChristoph Hellwig if (!buffer_mapped(bh) && buffer_uptodate(bh)) { 1013c59d87c4SChristoph Hellwig imap_valid = 0; 1014c59d87c4SChristoph Hellwig continue; 1015c59d87c4SChristoph Hellwig } 1016c59d87c4SChristoph Hellwig 1017c59d87c4SChristoph Hellwig if (buffer_unwritten(bh)) { 10180d882a36SAlain Renaud if (type != XFS_IO_UNWRITTEN) { 10190d882a36SAlain Renaud type = XFS_IO_UNWRITTEN; 1020c59d87c4SChristoph Hellwig imap_valid = 0; 1021c59d87c4SChristoph Hellwig } 1022c59d87c4SChristoph Hellwig } else if (buffer_delay(bh)) { 10230d882a36SAlain Renaud if (type != XFS_IO_DELALLOC) { 10240d882a36SAlain Renaud type = XFS_IO_DELALLOC; 1025c59d87c4SChristoph Hellwig imap_valid = 0; 1026c59d87c4SChristoph Hellwig } 1027c59d87c4SChristoph Hellwig } else if (buffer_uptodate(bh)) { 10280d882a36SAlain Renaud if (type != XFS_IO_OVERWRITE) { 10290d882a36SAlain Renaud type = XFS_IO_OVERWRITE; 1030c59d87c4SChristoph Hellwig imap_valid = 0; 1031c59d87c4SChristoph Hellwig } 1032c59d87c4SChristoph Hellwig } else { 10337d0fa3ecSAlain Renaud if (PageUptodate(page)) 1034c59d87c4SChristoph Hellwig ASSERT(buffer_mapped(bh)); 10357d0fa3ecSAlain Renaud /* 10367d0fa3ecSAlain Renaud * This buffer is not uptodate and will not be 10377d0fa3ecSAlain Renaud * written to disk. Ensure that we will put any 10387d0fa3ecSAlain Renaud * subsequent writeable buffers into a new 10397d0fa3ecSAlain Renaud * ioend. 10407d0fa3ecSAlain Renaud */ 1041c59d87c4SChristoph Hellwig imap_valid = 0; 1042c59d87c4SChristoph Hellwig continue; 1043c59d87c4SChristoph Hellwig } 1044c59d87c4SChristoph Hellwig 1045c59d87c4SChristoph Hellwig if (imap_valid) 1046c59d87c4SChristoph Hellwig imap_valid = xfs_imap_valid(inode, &imap, offset); 1047c59d87c4SChristoph Hellwig if (!imap_valid) { 1048c59d87c4SChristoph Hellwig /* 1049c59d87c4SChristoph Hellwig * If we didn't have a valid mapping then we need to 1050c59d87c4SChristoph Hellwig * put the new mapping into a separate ioend structure. 1051c59d87c4SChristoph Hellwig * This ensures non-contiguous extents always have 1052c59d87c4SChristoph Hellwig * separate ioends, which is particularly important 1053c59d87c4SChristoph Hellwig * for unwritten extent conversion at I/O completion 1054c59d87c4SChristoph Hellwig * time. 1055c59d87c4SChristoph Hellwig */ 1056c59d87c4SChristoph Hellwig new_ioend = 1; 1057c59d87c4SChristoph Hellwig err = xfs_map_blocks(inode, offset, &imap, type, 1058c59d87c4SChristoph Hellwig nonblocking); 1059c59d87c4SChristoph Hellwig if (err) 1060c59d87c4SChristoph Hellwig goto error; 1061c59d87c4SChristoph Hellwig imap_valid = xfs_imap_valid(inode, &imap, offset); 1062c59d87c4SChristoph Hellwig } 1063c59d87c4SChristoph Hellwig if (imap_valid) { 1064c59d87c4SChristoph Hellwig lock_buffer(bh); 10650d882a36SAlain Renaud if (type != XFS_IO_OVERWRITE) 1066c59d87c4SChristoph Hellwig xfs_map_at_offset(inode, bh, &imap, offset); 1067c59d87c4SChristoph Hellwig xfs_add_to_ioend(inode, bh, offset, type, &ioend, 1068c59d87c4SChristoph Hellwig new_ioend); 1069c59d87c4SChristoph Hellwig count++; 1070c59d87c4SChristoph Hellwig } 1071c59d87c4SChristoph Hellwig 1072c59d87c4SChristoph Hellwig if (!iohead) 1073c59d87c4SChristoph Hellwig iohead = ioend; 1074c59d87c4SChristoph Hellwig 1075c59d87c4SChristoph Hellwig } while (offset += len, ((bh = bh->b_this_page) != head)); 1076c59d87c4SChristoph Hellwig 1077c59d87c4SChristoph Hellwig if (uptodate && bh == head) 1078c59d87c4SChristoph Hellwig SetPageUptodate(page); 1079c59d87c4SChristoph Hellwig 1080c59d87c4SChristoph Hellwig xfs_start_page_writeback(page, 1, count); 1081c59d87c4SChristoph Hellwig 10827bf7f352SDave Chinner /* if there is no IO to be submitted for this page, we are done */ 10837bf7f352SDave Chinner if (!ioend) 10847bf7f352SDave Chinner return 0; 10857bf7f352SDave Chinner 10867bf7f352SDave Chinner ASSERT(iohead); 10877bf7f352SDave Chinner 10887bf7f352SDave Chinner /* 10897bf7f352SDave Chinner * Any errors from this point onwards need tobe reported through the IO 10907bf7f352SDave Chinner * completion path as we have marked the initial page as under writeback 10917bf7f352SDave Chinner * and unlocked it. 10927bf7f352SDave Chinner */ 10937bf7f352SDave Chinner if (imap_valid) { 1094c59d87c4SChristoph Hellwig xfs_off_t end_index; 1095c59d87c4SChristoph Hellwig 1096c59d87c4SChristoph Hellwig end_index = imap.br_startoff + imap.br_blockcount; 1097c59d87c4SChristoph Hellwig 1098c59d87c4SChristoph Hellwig /* to bytes */ 1099c59d87c4SChristoph Hellwig end_index <<= inode->i_blkbits; 1100c59d87c4SChristoph Hellwig 1101c59d87c4SChristoph Hellwig /* to pages */ 1102c59d87c4SChristoph Hellwig end_index = (end_index - 1) >> PAGE_CACHE_SHIFT; 1103c59d87c4SChristoph Hellwig 1104c59d87c4SChristoph Hellwig /* check against file size */ 1105c59d87c4SChristoph Hellwig if (end_index > last_index) 1106c59d87c4SChristoph Hellwig end_index = last_index; 1107c59d87c4SChristoph Hellwig 1108c59d87c4SChristoph Hellwig xfs_cluster_write(inode, page->index + 1, &imap, &ioend, 1109c59d87c4SChristoph Hellwig wbc, end_index); 1110c59d87c4SChristoph Hellwig } 1111c59d87c4SChristoph Hellwig 1112281627dfSChristoph Hellwig 11137bf7f352SDave Chinner /* 11147bf7f352SDave Chinner * Reserve log space if we might write beyond the on-disk inode size. 11157bf7f352SDave Chinner */ 11167bf7f352SDave Chinner err = 0; 11177bf7f352SDave Chinner if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend)) 11187bf7f352SDave Chinner err = xfs_setfilesize_trans_alloc(ioend); 11197bf7f352SDave Chinner 11207bf7f352SDave Chinner xfs_submit_ioend(wbc, iohead, err); 1121c59d87c4SChristoph Hellwig 1122c59d87c4SChristoph Hellwig return 0; 1123c59d87c4SChristoph Hellwig 1124c59d87c4SChristoph Hellwig error: 1125c59d87c4SChristoph Hellwig if (iohead) 1126c59d87c4SChristoph Hellwig xfs_cancel_ioend(iohead); 1127c59d87c4SChristoph Hellwig 1128c59d87c4SChristoph Hellwig if (err == -EAGAIN) 1129c59d87c4SChristoph Hellwig goto redirty; 1130c59d87c4SChristoph Hellwig 1131c59d87c4SChristoph Hellwig xfs_aops_discard_page(page); 1132c59d87c4SChristoph Hellwig ClearPageUptodate(page); 1133c59d87c4SChristoph Hellwig unlock_page(page); 1134c59d87c4SChristoph Hellwig return err; 1135c59d87c4SChristoph Hellwig 1136c59d87c4SChristoph Hellwig redirty: 1137c59d87c4SChristoph Hellwig redirty_page_for_writepage(wbc, page); 1138c59d87c4SChristoph Hellwig unlock_page(page); 1139c59d87c4SChristoph Hellwig return 0; 1140c59d87c4SChristoph Hellwig } 1141c59d87c4SChristoph Hellwig 1142c59d87c4SChristoph Hellwig STATIC int 1143c59d87c4SChristoph Hellwig xfs_vm_writepages( 1144c59d87c4SChristoph Hellwig struct address_space *mapping, 1145c59d87c4SChristoph Hellwig struct writeback_control *wbc) 1146c59d87c4SChristoph Hellwig { 1147c59d87c4SChristoph Hellwig xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED); 1148c59d87c4SChristoph Hellwig return generic_writepages(mapping, wbc); 1149c59d87c4SChristoph Hellwig } 1150c59d87c4SChristoph Hellwig 1151c59d87c4SChristoph Hellwig /* 1152c59d87c4SChristoph Hellwig * Called to move a page into cleanable state - and from there 1153c59d87c4SChristoph Hellwig * to be released. The page should already be clean. We always 1154c59d87c4SChristoph Hellwig * have buffer heads in this call. 1155c59d87c4SChristoph Hellwig * 1156c59d87c4SChristoph Hellwig * Returns 1 if the page is ok to release, 0 otherwise. 1157c59d87c4SChristoph Hellwig */ 1158c59d87c4SChristoph Hellwig STATIC int 1159c59d87c4SChristoph Hellwig xfs_vm_releasepage( 1160c59d87c4SChristoph Hellwig struct page *page, 1161c59d87c4SChristoph Hellwig gfp_t gfp_mask) 1162c59d87c4SChristoph Hellwig { 1163c59d87c4SChristoph Hellwig int delalloc, unwritten; 1164c59d87c4SChristoph Hellwig 116534097dfeSLukas Czerner trace_xfs_releasepage(page->mapping->host, page, 0, 0); 1166c59d87c4SChristoph Hellwig 1167c59d87c4SChristoph Hellwig xfs_count_page_state(page, &delalloc, &unwritten); 1168c59d87c4SChristoph Hellwig 1169c59d87c4SChristoph Hellwig if (WARN_ON(delalloc)) 1170c59d87c4SChristoph Hellwig return 0; 1171c59d87c4SChristoph Hellwig if (WARN_ON(unwritten)) 1172c59d87c4SChristoph Hellwig return 0; 1173c59d87c4SChristoph Hellwig 1174c59d87c4SChristoph Hellwig return try_to_free_buffers(page); 1175c59d87c4SChristoph Hellwig } 1176c59d87c4SChristoph Hellwig 1177c59d87c4SChristoph Hellwig STATIC int 1178c59d87c4SChristoph Hellwig __xfs_get_blocks( 1179c59d87c4SChristoph Hellwig struct inode *inode, 1180c59d87c4SChristoph Hellwig sector_t iblock, 1181c59d87c4SChristoph Hellwig struct buffer_head *bh_result, 1182c59d87c4SChristoph Hellwig int create, 1183c59d87c4SChristoph Hellwig int direct) 1184c59d87c4SChristoph Hellwig { 1185c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1186c59d87c4SChristoph Hellwig struct xfs_mount *mp = ip->i_mount; 1187c59d87c4SChristoph Hellwig xfs_fileoff_t offset_fsb, end_fsb; 1188c59d87c4SChristoph Hellwig int error = 0; 1189c59d87c4SChristoph Hellwig int lockmode = 0; 1190c59d87c4SChristoph Hellwig struct xfs_bmbt_irec imap; 1191c59d87c4SChristoph Hellwig int nimaps = 1; 1192c59d87c4SChristoph Hellwig xfs_off_t offset; 1193c59d87c4SChristoph Hellwig ssize_t size; 1194c59d87c4SChristoph Hellwig int new = 0; 1195c59d87c4SChristoph Hellwig 1196c59d87c4SChristoph Hellwig if (XFS_FORCED_SHUTDOWN(mp)) 1197c59d87c4SChristoph Hellwig return -XFS_ERROR(EIO); 1198c59d87c4SChristoph Hellwig 1199c59d87c4SChristoph Hellwig offset = (xfs_off_t)iblock << inode->i_blkbits; 1200c59d87c4SChristoph Hellwig ASSERT(bh_result->b_size >= (1 << inode->i_blkbits)); 1201c59d87c4SChristoph Hellwig size = bh_result->b_size; 1202c59d87c4SChristoph Hellwig 1203c59d87c4SChristoph Hellwig if (!create && direct && offset >= i_size_read(inode)) 1204c59d87c4SChristoph Hellwig return 0; 1205c59d87c4SChristoph Hellwig 1206507630b2SDave Chinner /* 1207507630b2SDave Chinner * Direct I/O is usually done on preallocated files, so try getting 1208507630b2SDave Chinner * a block mapping without an exclusive lock first. For buffered 1209507630b2SDave Chinner * writes we already have the exclusive iolock anyway, so avoiding 1210507630b2SDave Chinner * a lock roundtrip here by taking the ilock exclusive from the 1211507630b2SDave Chinner * beginning is a useful micro optimization. 1212507630b2SDave Chinner */ 1213507630b2SDave Chinner if (create && !direct) { 1214c59d87c4SChristoph Hellwig lockmode = XFS_ILOCK_EXCL; 1215c59d87c4SChristoph Hellwig xfs_ilock(ip, lockmode); 1216c59d87c4SChristoph Hellwig } else { 1217c59d87c4SChristoph Hellwig lockmode = xfs_ilock_map_shared(ip); 1218c59d87c4SChristoph Hellwig } 1219c59d87c4SChristoph Hellwig 1220d2c28191SDave Chinner ASSERT(offset <= mp->m_super->s_maxbytes); 1221d2c28191SDave Chinner if (offset + size > mp->m_super->s_maxbytes) 1222d2c28191SDave Chinner size = mp->m_super->s_maxbytes - offset; 1223c59d87c4SChristoph Hellwig end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); 1224c59d87c4SChristoph Hellwig offset_fsb = XFS_B_TO_FSBT(mp, offset); 1225c59d87c4SChristoph Hellwig 12265c8ed202SDave Chinner error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 12275c8ed202SDave Chinner &imap, &nimaps, XFS_BMAPI_ENTIRE); 1228c59d87c4SChristoph Hellwig if (error) 1229c59d87c4SChristoph Hellwig goto out_unlock; 1230c59d87c4SChristoph Hellwig 1231c59d87c4SChristoph Hellwig if (create && 1232c59d87c4SChristoph Hellwig (!nimaps || 1233c59d87c4SChristoph Hellwig (imap.br_startblock == HOLESTARTBLOCK || 1234c59d87c4SChristoph Hellwig imap.br_startblock == DELAYSTARTBLOCK))) { 1235aff3a9edSDave Chinner if (direct || xfs_get_extsz_hint(ip)) { 1236507630b2SDave Chinner /* 1237507630b2SDave Chinner * Drop the ilock in preparation for starting the block 1238507630b2SDave Chinner * allocation transaction. It will be retaken 1239507630b2SDave Chinner * exclusively inside xfs_iomap_write_direct for the 1240507630b2SDave Chinner * actual allocation. 1241507630b2SDave Chinner */ 1242507630b2SDave Chinner xfs_iunlock(ip, lockmode); 1243c59d87c4SChristoph Hellwig error = xfs_iomap_write_direct(ip, offset, size, 1244c59d87c4SChristoph Hellwig &imap, nimaps); 1245507630b2SDave Chinner if (error) 1246507630b2SDave Chinner return -error; 1247d3bc815aSDave Chinner new = 1; 1248c59d87c4SChristoph Hellwig } else { 1249507630b2SDave Chinner /* 1250507630b2SDave Chinner * Delalloc reservations do not require a transaction, 1251d3bc815aSDave Chinner * we can go on without dropping the lock here. If we 1252d3bc815aSDave Chinner * are allocating a new delalloc block, make sure that 1253d3bc815aSDave Chinner * we set the new flag so that we mark the buffer new so 1254d3bc815aSDave Chinner * that we know that it is newly allocated if the write 1255d3bc815aSDave Chinner * fails. 1256507630b2SDave Chinner */ 1257d3bc815aSDave Chinner if (nimaps && imap.br_startblock == HOLESTARTBLOCK) 1258d3bc815aSDave Chinner new = 1; 1259c59d87c4SChristoph Hellwig error = xfs_iomap_write_delay(ip, offset, size, &imap); 1260c59d87c4SChristoph Hellwig if (error) 1261c59d87c4SChristoph Hellwig goto out_unlock; 1262c59d87c4SChristoph Hellwig 1263507630b2SDave Chinner xfs_iunlock(ip, lockmode); 1264507630b2SDave Chinner } 1265507630b2SDave Chinner 1266c59d87c4SChristoph Hellwig trace_xfs_get_blocks_alloc(ip, offset, size, 0, &imap); 1267c59d87c4SChristoph Hellwig } else if (nimaps) { 1268c59d87c4SChristoph Hellwig trace_xfs_get_blocks_found(ip, offset, size, 0, &imap); 1269507630b2SDave Chinner xfs_iunlock(ip, lockmode); 1270c59d87c4SChristoph Hellwig } else { 1271c59d87c4SChristoph Hellwig trace_xfs_get_blocks_notfound(ip, offset, size); 1272c59d87c4SChristoph Hellwig goto out_unlock; 1273c59d87c4SChristoph Hellwig } 1274c59d87c4SChristoph Hellwig 1275c59d87c4SChristoph Hellwig if (imap.br_startblock != HOLESTARTBLOCK && 1276c59d87c4SChristoph Hellwig imap.br_startblock != DELAYSTARTBLOCK) { 1277c59d87c4SChristoph Hellwig /* 1278c59d87c4SChristoph Hellwig * For unwritten extents do not report a disk address on 1279c59d87c4SChristoph Hellwig * the read case (treat as if we're reading into a hole). 1280c59d87c4SChristoph Hellwig */ 1281c59d87c4SChristoph Hellwig if (create || !ISUNWRITTEN(&imap)) 1282c59d87c4SChristoph Hellwig xfs_map_buffer(inode, bh_result, &imap, offset); 1283c59d87c4SChristoph Hellwig if (create && ISUNWRITTEN(&imap)) { 12847b7a8665SChristoph Hellwig if (direct) { 1285c59d87c4SChristoph Hellwig bh_result->b_private = inode; 12867b7a8665SChristoph Hellwig set_buffer_defer_completion(bh_result); 12877b7a8665SChristoph Hellwig } 1288c59d87c4SChristoph Hellwig set_buffer_unwritten(bh_result); 1289c59d87c4SChristoph Hellwig } 1290c59d87c4SChristoph Hellwig } 1291c59d87c4SChristoph Hellwig 1292c59d87c4SChristoph Hellwig /* 1293c59d87c4SChristoph Hellwig * If this is a realtime file, data may be on a different device. 1294c59d87c4SChristoph Hellwig * to that pointed to from the buffer_head b_bdev currently. 1295c59d87c4SChristoph Hellwig */ 1296c59d87c4SChristoph Hellwig bh_result->b_bdev = xfs_find_bdev_for_inode(inode); 1297c59d87c4SChristoph Hellwig 1298c59d87c4SChristoph Hellwig /* 1299c59d87c4SChristoph Hellwig * If we previously allocated a block out beyond eof and we are now 1300c59d87c4SChristoph Hellwig * coming back to use it then we will need to flag it as new even if it 1301c59d87c4SChristoph Hellwig * has a disk address. 1302c59d87c4SChristoph Hellwig * 1303c59d87c4SChristoph Hellwig * With sub-block writes into unwritten extents we also need to mark 1304c59d87c4SChristoph Hellwig * the buffer as new so that the unwritten parts of the buffer gets 1305c59d87c4SChristoph Hellwig * correctly zeroed. 1306c59d87c4SChristoph Hellwig */ 1307c59d87c4SChristoph Hellwig if (create && 1308c59d87c4SChristoph Hellwig ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) || 1309c59d87c4SChristoph Hellwig (offset >= i_size_read(inode)) || 1310c59d87c4SChristoph Hellwig (new || ISUNWRITTEN(&imap)))) 1311c59d87c4SChristoph Hellwig set_buffer_new(bh_result); 1312c59d87c4SChristoph Hellwig 1313c59d87c4SChristoph Hellwig if (imap.br_startblock == DELAYSTARTBLOCK) { 1314c59d87c4SChristoph Hellwig BUG_ON(direct); 1315c59d87c4SChristoph Hellwig if (create) { 1316c59d87c4SChristoph Hellwig set_buffer_uptodate(bh_result); 1317c59d87c4SChristoph Hellwig set_buffer_mapped(bh_result); 1318c59d87c4SChristoph Hellwig set_buffer_delay(bh_result); 1319c59d87c4SChristoph Hellwig } 1320c59d87c4SChristoph Hellwig } 1321c59d87c4SChristoph Hellwig 1322c59d87c4SChristoph Hellwig /* 1323c59d87c4SChristoph Hellwig * If this is O_DIRECT or the mpage code calling tell them how large 1324c59d87c4SChristoph Hellwig * the mapping is, so that we can avoid repeated get_blocks calls. 1325c59d87c4SChristoph Hellwig */ 1326c59d87c4SChristoph Hellwig if (direct || size > (1 << inode->i_blkbits)) { 1327c59d87c4SChristoph Hellwig xfs_off_t mapping_size; 1328c59d87c4SChristoph Hellwig 1329c59d87c4SChristoph Hellwig mapping_size = imap.br_startoff + imap.br_blockcount - iblock; 1330c59d87c4SChristoph Hellwig mapping_size <<= inode->i_blkbits; 1331c59d87c4SChristoph Hellwig 1332c59d87c4SChristoph Hellwig ASSERT(mapping_size > 0); 1333c59d87c4SChristoph Hellwig if (mapping_size > size) 1334c59d87c4SChristoph Hellwig mapping_size = size; 1335c59d87c4SChristoph Hellwig if (mapping_size > LONG_MAX) 1336c59d87c4SChristoph Hellwig mapping_size = LONG_MAX; 1337c59d87c4SChristoph Hellwig 1338c59d87c4SChristoph Hellwig bh_result->b_size = mapping_size; 1339c59d87c4SChristoph Hellwig } 1340c59d87c4SChristoph Hellwig 1341c59d87c4SChristoph Hellwig return 0; 1342c59d87c4SChristoph Hellwig 1343c59d87c4SChristoph Hellwig out_unlock: 1344c59d87c4SChristoph Hellwig xfs_iunlock(ip, lockmode); 1345c59d87c4SChristoph Hellwig return -error; 1346c59d87c4SChristoph Hellwig } 1347c59d87c4SChristoph Hellwig 1348c59d87c4SChristoph Hellwig int 1349c59d87c4SChristoph Hellwig xfs_get_blocks( 1350c59d87c4SChristoph Hellwig struct inode *inode, 1351c59d87c4SChristoph Hellwig sector_t iblock, 1352c59d87c4SChristoph Hellwig struct buffer_head *bh_result, 1353c59d87c4SChristoph Hellwig int create) 1354c59d87c4SChristoph Hellwig { 1355c59d87c4SChristoph Hellwig return __xfs_get_blocks(inode, iblock, bh_result, create, 0); 1356c59d87c4SChristoph Hellwig } 1357c59d87c4SChristoph Hellwig 1358c59d87c4SChristoph Hellwig STATIC int 1359c59d87c4SChristoph Hellwig xfs_get_blocks_direct( 1360c59d87c4SChristoph Hellwig struct inode *inode, 1361c59d87c4SChristoph Hellwig sector_t iblock, 1362c59d87c4SChristoph Hellwig struct buffer_head *bh_result, 1363c59d87c4SChristoph Hellwig int create) 1364c59d87c4SChristoph Hellwig { 1365c59d87c4SChristoph Hellwig return __xfs_get_blocks(inode, iblock, bh_result, create, 1); 1366c59d87c4SChristoph Hellwig } 1367c59d87c4SChristoph Hellwig 1368c59d87c4SChristoph Hellwig /* 1369c59d87c4SChristoph Hellwig * Complete a direct I/O write request. 1370c59d87c4SChristoph Hellwig * 1371c59d87c4SChristoph Hellwig * If the private argument is non-NULL __xfs_get_blocks signals us that we 1372c59d87c4SChristoph Hellwig * need to issue a transaction to convert the range from unwritten to written 1373c59d87c4SChristoph Hellwig * extents. In case this is regular synchronous I/O we just call xfs_end_io 1374c59d87c4SChristoph Hellwig * to do this and we are done. But in case this was a successful AIO 1375c59d87c4SChristoph Hellwig * request this handler is called from interrupt context, from which we 1376c59d87c4SChristoph Hellwig * can't start transactions. In that case offload the I/O completion to 1377c59d87c4SChristoph Hellwig * the workqueues we also use for buffered I/O completion. 1378c59d87c4SChristoph Hellwig */ 1379c59d87c4SChristoph Hellwig STATIC void 1380c59d87c4SChristoph Hellwig xfs_end_io_direct_write( 1381c59d87c4SChristoph Hellwig struct kiocb *iocb, 1382c59d87c4SChristoph Hellwig loff_t offset, 1383c59d87c4SChristoph Hellwig ssize_t size, 13847b7a8665SChristoph Hellwig void *private) 1385c59d87c4SChristoph Hellwig { 1386c59d87c4SChristoph Hellwig struct xfs_ioend *ioend = iocb->private; 1387c59d87c4SChristoph Hellwig 1388c59d87c4SChristoph Hellwig /* 13892813d682SChristoph Hellwig * While the generic direct I/O code updates the inode size, it does 13902813d682SChristoph Hellwig * so only after the end_io handler is called, which means our 13912813d682SChristoph Hellwig * end_io handler thinks the on-disk size is outside the in-core 13922813d682SChristoph Hellwig * size. To prevent this just update it a little bit earlier here. 13932813d682SChristoph Hellwig */ 13942813d682SChristoph Hellwig if (offset + size > i_size_read(ioend->io_inode)) 13952813d682SChristoph Hellwig i_size_write(ioend->io_inode, offset + size); 13962813d682SChristoph Hellwig 13972813d682SChristoph Hellwig /* 1398c59d87c4SChristoph Hellwig * blockdev_direct_IO can return an error even after the I/O 1399c59d87c4SChristoph Hellwig * completion handler was called. Thus we need to protect 1400c59d87c4SChristoph Hellwig * against double-freeing. 1401c59d87c4SChristoph Hellwig */ 1402c59d87c4SChristoph Hellwig iocb->private = NULL; 1403c59d87c4SChristoph Hellwig 1404c59d87c4SChristoph Hellwig ioend->io_offset = offset; 1405c59d87c4SChristoph Hellwig ioend->io_size = size; 1406c59d87c4SChristoph Hellwig if (private && size > 0) 14070d882a36SAlain Renaud ioend->io_type = XFS_IO_UNWRITTEN; 1408c59d87c4SChristoph Hellwig 1409c59d87c4SChristoph Hellwig xfs_finish_ioend_sync(ioend); 1410c59d87c4SChristoph Hellwig } 1411c59d87c4SChristoph Hellwig 1412c59d87c4SChristoph Hellwig STATIC ssize_t 1413c59d87c4SChristoph Hellwig xfs_vm_direct_IO( 1414c59d87c4SChristoph Hellwig int rw, 1415c59d87c4SChristoph Hellwig struct kiocb *iocb, 1416c59d87c4SChristoph Hellwig const struct iovec *iov, 1417c59d87c4SChristoph Hellwig loff_t offset, 1418c59d87c4SChristoph Hellwig unsigned long nr_segs) 1419c59d87c4SChristoph Hellwig { 1420c59d87c4SChristoph Hellwig struct inode *inode = iocb->ki_filp->f_mapping->host; 1421c59d87c4SChristoph Hellwig struct block_device *bdev = xfs_find_bdev_for_inode(inode); 1422281627dfSChristoph Hellwig struct xfs_ioend *ioend = NULL; 1423c59d87c4SChristoph Hellwig ssize_t ret; 1424c59d87c4SChristoph Hellwig 1425c59d87c4SChristoph Hellwig if (rw & WRITE) { 1426281627dfSChristoph Hellwig size_t size = iov_length(iov, nr_segs); 1427281627dfSChristoph Hellwig 1428281627dfSChristoph Hellwig /* 1429437a255aSDave Chinner * We cannot preallocate a size update transaction here as we 1430437a255aSDave Chinner * don't know whether allocation is necessary or not. Hence we 1431437a255aSDave Chinner * can only tell IO completion that one is necessary if we are 1432437a255aSDave Chinner * not doing unwritten extent conversion. 1433281627dfSChristoph Hellwig */ 14340d882a36SAlain Renaud iocb->private = ioend = xfs_alloc_ioend(inode, XFS_IO_DIRECT); 1435437a255aSDave Chinner if (offset + size > XFS_I(inode)->i_d.di_size) 1436281627dfSChristoph Hellwig ioend->io_isdirect = 1; 1437c59d87c4SChristoph Hellwig 1438c59d87c4SChristoph Hellwig ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1439c59d87c4SChristoph Hellwig offset, nr_segs, 1440c59d87c4SChristoph Hellwig xfs_get_blocks_direct, 1441c59d87c4SChristoph Hellwig xfs_end_io_direct_write, NULL, 0); 1442c59d87c4SChristoph Hellwig if (ret != -EIOCBQUEUED && iocb->private) 1443437a255aSDave Chinner goto out_destroy_ioend; 1444c59d87c4SChristoph Hellwig } else { 1445c59d87c4SChristoph Hellwig ret = __blockdev_direct_IO(rw, iocb, inode, bdev, iov, 1446c59d87c4SChristoph Hellwig offset, nr_segs, 1447c59d87c4SChristoph Hellwig xfs_get_blocks_direct, 1448c59d87c4SChristoph Hellwig NULL, NULL, 0); 1449c59d87c4SChristoph Hellwig } 1450c59d87c4SChristoph Hellwig 1451c59d87c4SChristoph Hellwig return ret; 1452281627dfSChristoph Hellwig 1453281627dfSChristoph Hellwig out_destroy_ioend: 1454281627dfSChristoph Hellwig xfs_destroy_ioend(ioend); 1455281627dfSChristoph Hellwig return ret; 1456c59d87c4SChristoph Hellwig } 1457c59d87c4SChristoph Hellwig 1458c59d87c4SChristoph Hellwig /* 14592813d682SChristoph Hellwig * Punch out the delalloc blocks we have already allocated. 14602813d682SChristoph Hellwig * 1461d3bc815aSDave Chinner * Don't bother with xfs_setattr given that nothing can have made it to disk yet 1462d3bc815aSDave Chinner * as the page is still locked at this point. 1463c59d87c4SChristoph Hellwig */ 1464d3bc815aSDave Chinner STATIC void 1465d3bc815aSDave Chinner xfs_vm_kill_delalloc_range( 1466d3bc815aSDave Chinner struct inode *inode, 1467d3bc815aSDave Chinner loff_t start, 1468d3bc815aSDave Chinner loff_t end) 1469d3bc815aSDave Chinner { 1470c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1471c59d87c4SChristoph Hellwig xfs_fileoff_t start_fsb; 1472c59d87c4SChristoph Hellwig xfs_fileoff_t end_fsb; 1473c59d87c4SChristoph Hellwig int error; 1474c59d87c4SChristoph Hellwig 1475d3bc815aSDave Chinner start_fsb = XFS_B_TO_FSB(ip->i_mount, start); 1476d3bc815aSDave Chinner end_fsb = XFS_B_TO_FSB(ip->i_mount, end); 1477c59d87c4SChristoph Hellwig if (end_fsb <= start_fsb) 1478c59d87c4SChristoph Hellwig return; 1479c59d87c4SChristoph Hellwig 1480c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_ILOCK_EXCL); 1481c59d87c4SChristoph Hellwig error = xfs_bmap_punch_delalloc_range(ip, start_fsb, 1482c59d87c4SChristoph Hellwig end_fsb - start_fsb); 1483c59d87c4SChristoph Hellwig if (error) { 1484c59d87c4SChristoph Hellwig /* something screwed, just bail */ 1485c59d87c4SChristoph Hellwig if (!XFS_FORCED_SHUTDOWN(ip->i_mount)) { 1486c59d87c4SChristoph Hellwig xfs_alert(ip->i_mount, 1487c59d87c4SChristoph Hellwig "xfs_vm_write_failed: unable to clean up ino %lld", 1488c59d87c4SChristoph Hellwig ip->i_ino); 1489c59d87c4SChristoph Hellwig } 1490c59d87c4SChristoph Hellwig } 1491c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_ILOCK_EXCL); 1492c59d87c4SChristoph Hellwig } 1493d3bc815aSDave Chinner 1494d3bc815aSDave Chinner STATIC void 1495d3bc815aSDave Chinner xfs_vm_write_failed( 1496d3bc815aSDave Chinner struct inode *inode, 1497d3bc815aSDave Chinner struct page *page, 1498d3bc815aSDave Chinner loff_t pos, 1499d3bc815aSDave Chinner unsigned len) 1500d3bc815aSDave Chinner { 150158e59854SJie Liu loff_t block_offset; 1502d3bc815aSDave Chinner loff_t block_start; 1503d3bc815aSDave Chinner loff_t block_end; 1504d3bc815aSDave Chinner loff_t from = pos & (PAGE_CACHE_SIZE - 1); 1505d3bc815aSDave Chinner loff_t to = from + len; 1506d3bc815aSDave Chinner struct buffer_head *bh, *head; 1507d3bc815aSDave Chinner 150858e59854SJie Liu /* 150958e59854SJie Liu * The request pos offset might be 32 or 64 bit, this is all fine 151058e59854SJie Liu * on 64-bit platform. However, for 64-bit pos request on 32-bit 151158e59854SJie Liu * platform, the high 32-bit will be masked off if we evaluate the 151258e59854SJie Liu * block_offset via (pos & PAGE_MASK) because the PAGE_MASK is 151358e59854SJie Liu * 0xfffff000 as an unsigned long, hence the result is incorrect 151458e59854SJie Liu * which could cause the following ASSERT failed in most cases. 151558e59854SJie Liu * In order to avoid this, we can evaluate the block_offset of the 151658e59854SJie Liu * start of the page by using shifts rather than masks the mismatch 151758e59854SJie Liu * problem. 151858e59854SJie Liu */ 151958e59854SJie Liu block_offset = (pos >> PAGE_CACHE_SHIFT) << PAGE_CACHE_SHIFT; 152058e59854SJie Liu 1521d3bc815aSDave Chinner ASSERT(block_offset + from == pos); 1522d3bc815aSDave Chinner 1523d3bc815aSDave Chinner head = page_buffers(page); 1524d3bc815aSDave Chinner block_start = 0; 1525d3bc815aSDave Chinner for (bh = head; bh != head || !block_start; 1526d3bc815aSDave Chinner bh = bh->b_this_page, block_start = block_end, 1527d3bc815aSDave Chinner block_offset += bh->b_size) { 1528d3bc815aSDave Chinner block_end = block_start + bh->b_size; 1529d3bc815aSDave Chinner 1530d3bc815aSDave Chinner /* skip buffers before the write */ 1531d3bc815aSDave Chinner if (block_end <= from) 1532d3bc815aSDave Chinner continue; 1533d3bc815aSDave Chinner 1534d3bc815aSDave Chinner /* if the buffer is after the write, we're done */ 1535d3bc815aSDave Chinner if (block_start >= to) 1536d3bc815aSDave Chinner break; 1537d3bc815aSDave Chinner 1538d3bc815aSDave Chinner if (!buffer_delay(bh)) 1539d3bc815aSDave Chinner continue; 1540d3bc815aSDave Chinner 1541d3bc815aSDave Chinner if (!buffer_new(bh) && block_offset < i_size_read(inode)) 1542d3bc815aSDave Chinner continue; 1543d3bc815aSDave Chinner 1544d3bc815aSDave Chinner xfs_vm_kill_delalloc_range(inode, block_offset, 1545d3bc815aSDave Chinner block_offset + bh->b_size); 1546c59d87c4SChristoph Hellwig } 1547c59d87c4SChristoph Hellwig 1548d3bc815aSDave Chinner } 1549d3bc815aSDave Chinner 1550d3bc815aSDave Chinner /* 1551d3bc815aSDave Chinner * This used to call block_write_begin(), but it unlocks and releases the page 1552d3bc815aSDave Chinner * on error, and we need that page to be able to punch stale delalloc blocks out 1553d3bc815aSDave Chinner * on failure. hence we copy-n-waste it here and call xfs_vm_write_failed() at 1554d3bc815aSDave Chinner * the appropriate point. 1555d3bc815aSDave Chinner */ 1556c59d87c4SChristoph Hellwig STATIC int 1557c59d87c4SChristoph Hellwig xfs_vm_write_begin( 1558c59d87c4SChristoph Hellwig struct file *file, 1559c59d87c4SChristoph Hellwig struct address_space *mapping, 1560c59d87c4SChristoph Hellwig loff_t pos, 1561c59d87c4SChristoph Hellwig unsigned len, 1562c59d87c4SChristoph Hellwig unsigned flags, 1563c59d87c4SChristoph Hellwig struct page **pagep, 1564c59d87c4SChristoph Hellwig void **fsdata) 1565c59d87c4SChristoph Hellwig { 1566d3bc815aSDave Chinner pgoff_t index = pos >> PAGE_CACHE_SHIFT; 1567d3bc815aSDave Chinner struct page *page; 1568d3bc815aSDave Chinner int status; 1569c59d87c4SChristoph Hellwig 1570d3bc815aSDave Chinner ASSERT(len <= PAGE_CACHE_SIZE); 1571d3bc815aSDave Chinner 1572d3bc815aSDave Chinner page = grab_cache_page_write_begin(mapping, index, 1573d3bc815aSDave Chinner flags | AOP_FLAG_NOFS); 1574d3bc815aSDave Chinner if (!page) 1575d3bc815aSDave Chinner return -ENOMEM; 1576d3bc815aSDave Chinner 1577d3bc815aSDave Chinner status = __block_write_begin(page, pos, len, xfs_get_blocks); 1578d3bc815aSDave Chinner if (unlikely(status)) { 1579d3bc815aSDave Chinner struct inode *inode = mapping->host; 1580d3bc815aSDave Chinner 1581d3bc815aSDave Chinner xfs_vm_write_failed(inode, page, pos, len); 1582d3bc815aSDave Chinner unlock_page(page); 1583d3bc815aSDave Chinner 1584d3bc815aSDave Chinner if (pos + len > i_size_read(inode)) 15857caef267SKirill A. Shutemov truncate_pagecache(inode, i_size_read(inode)); 1586d3bc815aSDave Chinner 1587d3bc815aSDave Chinner page_cache_release(page); 1588d3bc815aSDave Chinner page = NULL; 1589c59d87c4SChristoph Hellwig } 1590c59d87c4SChristoph Hellwig 1591d3bc815aSDave Chinner *pagep = page; 1592d3bc815aSDave Chinner return status; 1593d3bc815aSDave Chinner } 1594d3bc815aSDave Chinner 1595d3bc815aSDave Chinner /* 1596d3bc815aSDave Chinner * On failure, we only need to kill delalloc blocks beyond EOF because they 1597d3bc815aSDave Chinner * will never be written. For blocks within EOF, generic_write_end() zeros them 1598d3bc815aSDave Chinner * so they are safe to leave alone and be written with all the other valid data. 1599d3bc815aSDave Chinner */ 1600c59d87c4SChristoph Hellwig STATIC int 1601c59d87c4SChristoph Hellwig xfs_vm_write_end( 1602c59d87c4SChristoph Hellwig struct file *file, 1603c59d87c4SChristoph Hellwig struct address_space *mapping, 1604c59d87c4SChristoph Hellwig loff_t pos, 1605c59d87c4SChristoph Hellwig unsigned len, 1606c59d87c4SChristoph Hellwig unsigned copied, 1607c59d87c4SChristoph Hellwig struct page *page, 1608c59d87c4SChristoph Hellwig void *fsdata) 1609c59d87c4SChristoph Hellwig { 1610c59d87c4SChristoph Hellwig int ret; 1611c59d87c4SChristoph Hellwig 1612d3bc815aSDave Chinner ASSERT(len <= PAGE_CACHE_SIZE); 1613d3bc815aSDave Chinner 1614c59d87c4SChristoph Hellwig ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata); 1615d3bc815aSDave Chinner if (unlikely(ret < len)) { 1616d3bc815aSDave Chinner struct inode *inode = mapping->host; 1617d3bc815aSDave Chinner size_t isize = i_size_read(inode); 1618d3bc815aSDave Chinner loff_t to = pos + len; 1619d3bc815aSDave Chinner 1620d3bc815aSDave Chinner if (to > isize) { 16217caef267SKirill A. Shutemov truncate_pagecache(inode, isize); 1622d3bc815aSDave Chinner xfs_vm_kill_delalloc_range(inode, isize, to); 1623d3bc815aSDave Chinner } 1624d3bc815aSDave Chinner } 1625c59d87c4SChristoph Hellwig return ret; 1626c59d87c4SChristoph Hellwig } 1627c59d87c4SChristoph Hellwig 1628c59d87c4SChristoph Hellwig STATIC sector_t 1629c59d87c4SChristoph Hellwig xfs_vm_bmap( 1630c59d87c4SChristoph Hellwig struct address_space *mapping, 1631c59d87c4SChristoph Hellwig sector_t block) 1632c59d87c4SChristoph Hellwig { 1633c59d87c4SChristoph Hellwig struct inode *inode = (struct inode *)mapping->host; 1634c59d87c4SChristoph Hellwig struct xfs_inode *ip = XFS_I(inode); 1635c59d87c4SChristoph Hellwig 1636c59d87c4SChristoph Hellwig trace_xfs_vm_bmap(XFS_I(inode)); 1637c59d87c4SChristoph Hellwig xfs_ilock(ip, XFS_IOLOCK_SHARED); 16384bc1ea6bSDave Chinner filemap_write_and_wait(mapping); 1639c59d87c4SChristoph Hellwig xfs_iunlock(ip, XFS_IOLOCK_SHARED); 1640c59d87c4SChristoph Hellwig return generic_block_bmap(mapping, block, xfs_get_blocks); 1641c59d87c4SChristoph Hellwig } 1642c59d87c4SChristoph Hellwig 1643c59d87c4SChristoph Hellwig STATIC int 1644c59d87c4SChristoph Hellwig xfs_vm_readpage( 1645c59d87c4SChristoph Hellwig struct file *unused, 1646c59d87c4SChristoph Hellwig struct page *page) 1647c59d87c4SChristoph Hellwig { 1648c59d87c4SChristoph Hellwig return mpage_readpage(page, xfs_get_blocks); 1649c59d87c4SChristoph Hellwig } 1650c59d87c4SChristoph Hellwig 1651c59d87c4SChristoph Hellwig STATIC int 1652c59d87c4SChristoph Hellwig xfs_vm_readpages( 1653c59d87c4SChristoph Hellwig struct file *unused, 1654c59d87c4SChristoph Hellwig struct address_space *mapping, 1655c59d87c4SChristoph Hellwig struct list_head *pages, 1656c59d87c4SChristoph Hellwig unsigned nr_pages) 1657c59d87c4SChristoph Hellwig { 1658c59d87c4SChristoph Hellwig return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks); 1659c59d87c4SChristoph Hellwig } 1660c59d87c4SChristoph Hellwig 1661c59d87c4SChristoph Hellwig const struct address_space_operations xfs_address_space_operations = { 1662c59d87c4SChristoph Hellwig .readpage = xfs_vm_readpage, 1663c59d87c4SChristoph Hellwig .readpages = xfs_vm_readpages, 1664c59d87c4SChristoph Hellwig .writepage = xfs_vm_writepage, 1665c59d87c4SChristoph Hellwig .writepages = xfs_vm_writepages, 1666c59d87c4SChristoph Hellwig .releasepage = xfs_vm_releasepage, 1667c59d87c4SChristoph Hellwig .invalidatepage = xfs_vm_invalidatepage, 1668c59d87c4SChristoph Hellwig .write_begin = xfs_vm_write_begin, 1669c59d87c4SChristoph Hellwig .write_end = xfs_vm_write_end, 1670c59d87c4SChristoph Hellwig .bmap = xfs_vm_bmap, 1671c59d87c4SChristoph Hellwig .direct_IO = xfs_vm_direct_IO, 1672c59d87c4SChristoph Hellwig .migratepage = buffer_migrate_page, 1673c59d87c4SChristoph Hellwig .is_partially_uptodate = block_is_partially_uptodate, 1674c59d87c4SChristoph Hellwig .error_remove_page = generic_error_remove_page, 1675c59d87c4SChristoph Hellwig }; 1676