1 /* 2 * Copyright (c) 2014 Christoph Hellwig. 3 */ 4 #include "xfs.h" 5 #include "xfs_format.h" 6 #include "xfs_log_format.h" 7 #include "xfs_trans_resv.h" 8 #include "xfs_sb.h" 9 #include "xfs_mount.h" 10 #include "xfs_inode.h" 11 #include "xfs_trans.h" 12 #include "xfs_log.h" 13 #include "xfs_bmap.h" 14 #include "xfs_bmap_util.h" 15 #include "xfs_error.h" 16 #include "xfs_iomap.h" 17 #include "xfs_shared.h" 18 #include "xfs_bit.h" 19 #include "xfs_pnfs.h" 20 21 /* 22 * Ensure that we do not have any outstanding pNFS layouts that can be used by 23 * clients to directly read from or write to this inode. This must be called 24 * before every operation that can remove blocks from the extent map. 25 * Additionally we call it during the write operation, where aren't concerned 26 * about exposing unallocated blocks but just want to provide basic 27 * synchronization between a local writer and pNFS clients. mmap writes would 28 * also benefit from this sort of synchronization, but due to the tricky locking 29 * rules in the page fault path we don't bother. 30 */ 31 int 32 xfs_break_layouts( 33 struct inode *inode, 34 uint *iolock, 35 bool with_imutex) 36 { 37 struct xfs_inode *ip = XFS_I(inode); 38 int error; 39 40 ASSERT(xfs_isilocked(ip, XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); 41 42 while ((error = break_layout(inode, false) == -EWOULDBLOCK)) { 43 xfs_iunlock(ip, *iolock); 44 if (with_imutex && (*iolock & XFS_IOLOCK_EXCL)) 45 mutex_unlock(&inode->i_mutex); 46 error = break_layout(inode, true); 47 *iolock = XFS_IOLOCK_EXCL; 48 if (with_imutex) 49 mutex_lock(&inode->i_mutex); 50 xfs_ilock(ip, *iolock); 51 } 52 53 return error; 54 } 55 56 /* 57 * Get a unique ID including its location so that the client can identify 58 * the exported device. 59 */ 60 int 61 xfs_fs_get_uuid( 62 struct super_block *sb, 63 u8 *buf, 64 u32 *len, 65 u64 *offset) 66 { 67 struct xfs_mount *mp = XFS_M(sb); 68 69 printk_once(KERN_NOTICE 70 "XFS (%s): using experimental pNFS feature, use at your own risk!\n", 71 mp->m_fsname); 72 73 if (*len < sizeof(uuid_t)) 74 return -EINVAL; 75 76 memcpy(buf, &mp->m_sb.sb_uuid, sizeof(uuid_t)); 77 *len = sizeof(uuid_t); 78 *offset = offsetof(struct xfs_dsb, sb_uuid); 79 return 0; 80 } 81 82 static void 83 xfs_bmbt_to_iomap( 84 struct xfs_inode *ip, 85 struct iomap *iomap, 86 struct xfs_bmbt_irec *imap) 87 { 88 struct xfs_mount *mp = ip->i_mount; 89 90 if (imap->br_startblock == HOLESTARTBLOCK) { 91 iomap->blkno = IOMAP_NULL_BLOCK; 92 iomap->type = IOMAP_HOLE; 93 } else if (imap->br_startblock == DELAYSTARTBLOCK) { 94 iomap->blkno = IOMAP_NULL_BLOCK; 95 iomap->type = IOMAP_DELALLOC; 96 } else { 97 iomap->blkno = 98 XFS_FSB_TO_DADDR(ip->i_mount, imap->br_startblock); 99 if (imap->br_state == XFS_EXT_UNWRITTEN) 100 iomap->type = IOMAP_UNWRITTEN; 101 else 102 iomap->type = IOMAP_MAPPED; 103 } 104 iomap->offset = XFS_FSB_TO_B(mp, imap->br_startoff); 105 iomap->length = XFS_FSB_TO_B(mp, imap->br_blockcount); 106 } 107 108 /* 109 * Get a layout for the pNFS client. 110 */ 111 int 112 xfs_fs_map_blocks( 113 struct inode *inode, 114 loff_t offset, 115 u64 length, 116 struct iomap *iomap, 117 bool write, 118 u32 *device_generation) 119 { 120 struct xfs_inode *ip = XFS_I(inode); 121 struct xfs_mount *mp = ip->i_mount; 122 struct xfs_bmbt_irec imap; 123 xfs_fileoff_t offset_fsb, end_fsb; 124 loff_t limit; 125 int bmapi_flags = XFS_BMAPI_ENTIRE; 126 int nimaps = 1; 127 uint lock_flags; 128 int error = 0; 129 130 if (XFS_FORCED_SHUTDOWN(mp)) 131 return -EIO; 132 133 /* 134 * We can't export inodes residing on the realtime device. The realtime 135 * device doesn't have a UUID to identify it, so the client has no way 136 * to find it. 137 */ 138 if (XFS_IS_REALTIME_INODE(ip)) 139 return -ENXIO; 140 141 /* 142 * Lock out any other I/O before we flush and invalidate the pagecache, 143 * and then hand out a layout to the remote system. This is very 144 * similar to direct I/O, except that the synchronization is much more 145 * complicated. See the comment near xfs_break_layouts for a detailed 146 * explanation. 147 */ 148 xfs_ilock(ip, XFS_IOLOCK_EXCL); 149 150 error = -EINVAL; 151 limit = mp->m_super->s_maxbytes; 152 if (!write) 153 limit = max(limit, round_up(i_size_read(inode), 154 inode->i_sb->s_blocksize)); 155 if (offset > limit) 156 goto out_unlock; 157 if (offset > limit - length) 158 length = limit - offset; 159 160 error = filemap_write_and_wait(inode->i_mapping); 161 if (error) 162 goto out_unlock; 163 error = invalidate_inode_pages2(inode->i_mapping); 164 if (WARN_ON_ONCE(error)) 165 return error; 166 167 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + length); 168 offset_fsb = XFS_B_TO_FSBT(mp, offset); 169 170 lock_flags = xfs_ilock_data_map_shared(ip); 171 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, 172 &imap, &nimaps, bmapi_flags); 173 xfs_iunlock(ip, lock_flags); 174 175 if (error) 176 goto out_unlock; 177 178 if (write) { 179 enum xfs_prealloc_flags flags = 0; 180 181 ASSERT(imap.br_startblock != DELAYSTARTBLOCK); 182 183 if (!nimaps || imap.br_startblock == HOLESTARTBLOCK) { 184 /* 185 * xfs_iomap_write_direct() expects to take ownership of 186 * the shared ilock. 187 */ 188 xfs_ilock(ip, XFS_ILOCK_SHARED); 189 error = xfs_iomap_write_direct(ip, offset, length, 190 &imap, nimaps); 191 if (error) 192 goto out_unlock; 193 194 /* 195 * Ensure the next transaction is committed 196 * synchronously so that the blocks allocated and 197 * handed out to the client are guaranteed to be 198 * present even after a server crash. 199 */ 200 flags |= XFS_PREALLOC_SET | XFS_PREALLOC_SYNC; 201 } 202 203 error = xfs_update_prealloc_flags(ip, flags); 204 if (error) 205 goto out_unlock; 206 } 207 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 208 209 xfs_bmbt_to_iomap(ip, iomap, &imap); 210 *device_generation = mp->m_generation; 211 return error; 212 out_unlock: 213 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 214 return error; 215 } 216 217 /* 218 * Ensure the size update falls into a valid allocated block. 219 */ 220 static int 221 xfs_pnfs_validate_isize( 222 struct xfs_inode *ip, 223 xfs_off_t isize) 224 { 225 struct xfs_bmbt_irec imap; 226 int nimaps = 1; 227 int error = 0; 228 229 xfs_ilock(ip, XFS_ILOCK_SHARED); 230 error = xfs_bmapi_read(ip, XFS_B_TO_FSBT(ip->i_mount, isize - 1), 1, 231 &imap, &nimaps, 0); 232 xfs_iunlock(ip, XFS_ILOCK_SHARED); 233 if (error) 234 return error; 235 236 if (imap.br_startblock == HOLESTARTBLOCK || 237 imap.br_startblock == DELAYSTARTBLOCK || 238 imap.br_state == XFS_EXT_UNWRITTEN) 239 return -EIO; 240 return 0; 241 } 242 243 /* 244 * Make sure the blocks described by maps are stable on disk. This includes 245 * converting any unwritten extents, flushing the disk cache and updating the 246 * time stamps. 247 * 248 * Note that we rely on the caller to always send us a timestamp update so that 249 * we always commit a transaction here. If that stops being true we will have 250 * to manually flush the cache here similar to what the fsync code path does 251 * for datasyncs on files that have no dirty metadata. 252 */ 253 int 254 xfs_fs_commit_blocks( 255 struct inode *inode, 256 struct iomap *maps, 257 int nr_maps, 258 struct iattr *iattr) 259 { 260 struct xfs_inode *ip = XFS_I(inode); 261 struct xfs_mount *mp = ip->i_mount; 262 struct xfs_trans *tp; 263 bool update_isize = false; 264 int error, i; 265 loff_t size; 266 267 ASSERT(iattr->ia_valid & (ATTR_ATIME|ATTR_CTIME|ATTR_MTIME)); 268 269 xfs_ilock(ip, XFS_IOLOCK_EXCL); 270 271 size = i_size_read(inode); 272 if ((iattr->ia_valid & ATTR_SIZE) && iattr->ia_size > size) { 273 update_isize = true; 274 size = iattr->ia_size; 275 } 276 277 for (i = 0; i < nr_maps; i++) { 278 u64 start, length, end; 279 280 start = maps[i].offset; 281 if (start > size) 282 continue; 283 284 end = start + maps[i].length; 285 if (end > size) 286 end = size; 287 288 length = end - start; 289 if (!length) 290 continue; 291 292 /* 293 * Make sure reads through the pagecache see the new data. 294 */ 295 error = invalidate_inode_pages2_range(inode->i_mapping, 296 start >> PAGE_CACHE_SHIFT, 297 (end - 1) >> PAGE_CACHE_SHIFT); 298 WARN_ON_ONCE(error); 299 300 error = xfs_iomap_write_unwritten(ip, start, length); 301 if (error) 302 goto out_drop_iolock; 303 } 304 305 if (update_isize) { 306 error = xfs_pnfs_validate_isize(ip, size); 307 if (error) 308 goto out_drop_iolock; 309 } 310 311 tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE); 312 error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0); 313 if (error) { 314 xfs_trans_cancel(tp); 315 goto out_drop_iolock; 316 } 317 318 xfs_ilock(ip, XFS_ILOCK_EXCL); 319 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 320 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 321 322 xfs_setattr_time(ip, iattr); 323 if (update_isize) { 324 i_size_write(inode, iattr->ia_size); 325 ip->i_d.di_size = iattr->ia_size; 326 } 327 328 xfs_trans_set_sync(tp); 329 error = xfs_trans_commit(tp); 330 331 out_drop_iolock: 332 xfs_iunlock(ip, XFS_IOLOCK_EXCL); 333 return error; 334 } 335