1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2000-2005 Silicon Graphics, Inc. 4 * All Rights Reserved. 5 */ 6 #include "xfs.h" 7 #include "xfs_fs.h" 8 #include "xfs_shared.h" 9 #include "xfs_format.h" 10 #include "xfs_log_format.h" 11 #include "xfs_trans_resv.h" 12 #include "xfs_mount.h" 13 #include "xfs_inode.h" 14 #include "xfs_trans.h" 15 #include "xfs_inode_item.h" 16 #include "xfs_bmap.h" 17 #include "xfs_bmap_util.h" 18 #include "xfs_dir2.h" 19 #include "xfs_dir2_priv.h" 20 #include "xfs_ioctl.h" 21 #include "xfs_trace.h" 22 #include "xfs_log.h" 23 #include "xfs_icache.h" 24 #include "xfs_pnfs.h" 25 #include "xfs_iomap.h" 26 #include "xfs_reflink.h" 27 28 #include <linux/falloc.h> 29 #include <linux/backing-dev.h> 30 #include <linux/mman.h> 31 #include <linux/fadvise.h> 32 33 static const struct vm_operations_struct xfs_file_vm_ops; 34 35 /* 36 * Decide if the given file range is aligned to the size of the fundamental 37 * allocation unit for the file. 38 */ 39 static bool 40 xfs_is_falloc_aligned( 41 struct xfs_inode *ip, 42 loff_t pos, 43 long long int len) 44 { 45 struct xfs_mount *mp = ip->i_mount; 46 uint64_t mask; 47 48 if (XFS_IS_REALTIME_INODE(ip)) { 49 if (!is_power_of_2(mp->m_sb.sb_rextsize)) { 50 u64 rextbytes; 51 u32 mod; 52 53 rextbytes = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize); 54 div_u64_rem(pos, rextbytes, &mod); 55 if (mod) 56 return false; 57 div_u64_rem(len, rextbytes, &mod); 58 return mod == 0; 59 } 60 mask = XFS_FSB_TO_B(mp, mp->m_sb.sb_rextsize) - 1; 61 } else { 62 mask = mp->m_sb.sb_blocksize - 1; 63 } 64 65 return !((pos | len) & mask); 66 } 67 68 int 69 xfs_update_prealloc_flags( 70 struct xfs_inode *ip, 71 enum xfs_prealloc_flags flags) 72 { 73 struct xfs_trans *tp; 74 int error; 75 76 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_writeid, 77 0, 0, 0, &tp); 78 if (error) 79 return error; 80 81 xfs_ilock(ip, XFS_ILOCK_EXCL); 82 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); 83 84 if (!(flags & XFS_PREALLOC_INVISIBLE)) { 85 VFS_I(ip)->i_mode &= ~S_ISUID; 86 if (VFS_I(ip)->i_mode & S_IXGRP) 87 VFS_I(ip)->i_mode &= ~S_ISGID; 88 xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); 89 } 90 91 if (flags & XFS_PREALLOC_SET) 92 ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC; 93 if (flags & XFS_PREALLOC_CLEAR) 94 ip->i_d.di_flags &= ~XFS_DIFLAG_PREALLOC; 95 96 xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); 97 if (flags & XFS_PREALLOC_SYNC) 98 xfs_trans_set_sync(tp); 99 return xfs_trans_commit(tp); 100 } 101 102 /* 103 * Fsync operations on directories are much simpler than on regular files, 104 * as there is no file data to flush, and thus also no need for explicit 105 * cache flush operations, and there are no non-transaction metadata updates 106 * on directories either. 107 */ 108 STATIC int 109 xfs_dir_fsync( 110 struct file *file, 111 loff_t start, 112 loff_t end, 113 int datasync) 114 { 115 struct xfs_inode *ip = XFS_I(file->f_mapping->host); 116 117 trace_xfs_dir_fsync(ip); 118 return xfs_log_force_inode(ip); 119 } 120 121 STATIC int 122 xfs_file_fsync( 123 struct file *file, 124 loff_t start, 125 loff_t end, 126 int datasync) 127 { 128 struct inode *inode = file->f_mapping->host; 129 struct xfs_inode *ip = XFS_I(inode); 130 struct xfs_inode_log_item *iip = ip->i_itemp; 131 struct xfs_mount *mp = ip->i_mount; 132 int error = 0; 133 int log_flushed = 0; 134 xfs_lsn_t lsn = 0; 135 136 trace_xfs_file_fsync(ip); 137 138 error = file_write_and_wait_range(file, start, end); 139 if (error) 140 return error; 141 142 if (XFS_FORCED_SHUTDOWN(mp)) 143 return -EIO; 144 145 xfs_iflags_clear(ip, XFS_ITRUNCATED); 146 147 /* 148 * If we have an RT and/or log subvolume we need to make sure to flush 149 * the write cache the device used for file data first. This is to 150 * ensure newly written file data make it to disk before logging the new 151 * inode size in case of an extending write. 152 */ 153 if (XFS_IS_REALTIME_INODE(ip)) 154 xfs_blkdev_issue_flush(mp->m_rtdev_targp); 155 else if (mp->m_logdev_targp != mp->m_ddev_targp) 156 xfs_blkdev_issue_flush(mp->m_ddev_targp); 157 158 /* 159 * All metadata updates are logged, which means that we just have to 160 * flush the log up to the latest LSN that touched the inode. If we have 161 * concurrent fsync/fdatasync() calls, we need them to all block on the 162 * log force before we clear the ili_fsync_fields field. This ensures 163 * that we don't get a racing sync operation that does not wait for the 164 * metadata to hit the journal before returning. If we race with 165 * clearing the ili_fsync_fields, then all that will happen is the log 166 * force will do nothing as the lsn will already be on disk. We can't 167 * race with setting ili_fsync_fields because that is done under 168 * XFS_ILOCK_EXCL, and that can't happen because we hold the lock shared 169 * until after the ili_fsync_fields is cleared. 170 */ 171 xfs_ilock(ip, XFS_ILOCK_SHARED); 172 if (xfs_ipincount(ip)) { 173 if (!datasync || 174 (iip->ili_fsync_fields & ~XFS_ILOG_TIMESTAMP)) 175 lsn = iip->ili_last_lsn; 176 } 177 178 if (lsn) { 179 error = xfs_log_force_lsn(mp, lsn, XFS_LOG_SYNC, &log_flushed); 180 spin_lock(&iip->ili_lock); 181 iip->ili_fsync_fields = 0; 182 spin_unlock(&iip->ili_lock); 183 } 184 xfs_iunlock(ip, XFS_ILOCK_SHARED); 185 186 /* 187 * If we only have a single device, and the log force about was 188 * a no-op we might have to flush the data device cache here. 189 * This can only happen for fdatasync/O_DSYNC if we were overwriting 190 * an already allocated file and thus do not have any metadata to 191 * commit. 192 */ 193 if (!log_flushed && !XFS_IS_REALTIME_INODE(ip) && 194 mp->m_logdev_targp == mp->m_ddev_targp) 195 xfs_blkdev_issue_flush(mp->m_ddev_targp); 196 197 return error; 198 } 199 200 STATIC ssize_t 201 xfs_file_dio_aio_read( 202 struct kiocb *iocb, 203 struct iov_iter *to) 204 { 205 struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); 206 size_t count = iov_iter_count(to); 207 ssize_t ret; 208 209 trace_xfs_file_direct_read(ip, count, iocb->ki_pos); 210 211 if (!count) 212 return 0; /* skip atime */ 213 214 file_accessed(iocb->ki_filp); 215 216 if (iocb->ki_flags & IOCB_NOWAIT) { 217 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 218 return -EAGAIN; 219 } else { 220 xfs_ilock(ip, XFS_IOLOCK_SHARED); 221 } 222 ret = iomap_dio_rw(iocb, to, &xfs_read_iomap_ops, NULL, 223 is_sync_kiocb(iocb)); 224 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 225 226 return ret; 227 } 228 229 static noinline ssize_t 230 xfs_file_dax_read( 231 struct kiocb *iocb, 232 struct iov_iter *to) 233 { 234 struct xfs_inode *ip = XFS_I(iocb->ki_filp->f_mapping->host); 235 size_t count = iov_iter_count(to); 236 ssize_t ret = 0; 237 238 trace_xfs_file_dax_read(ip, count, iocb->ki_pos); 239 240 if (!count) 241 return 0; /* skip atime */ 242 243 if (iocb->ki_flags & IOCB_NOWAIT) { 244 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 245 return -EAGAIN; 246 } else { 247 xfs_ilock(ip, XFS_IOLOCK_SHARED); 248 } 249 250 ret = dax_iomap_rw(iocb, to, &xfs_read_iomap_ops); 251 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 252 253 file_accessed(iocb->ki_filp); 254 return ret; 255 } 256 257 STATIC ssize_t 258 xfs_file_buffered_aio_read( 259 struct kiocb *iocb, 260 struct iov_iter *to) 261 { 262 struct xfs_inode *ip = XFS_I(file_inode(iocb->ki_filp)); 263 ssize_t ret; 264 265 trace_xfs_file_buffered_read(ip, iov_iter_count(to), iocb->ki_pos); 266 267 if (iocb->ki_flags & IOCB_NOWAIT) { 268 if (!xfs_ilock_nowait(ip, XFS_IOLOCK_SHARED)) 269 return -EAGAIN; 270 } else { 271 xfs_ilock(ip, XFS_IOLOCK_SHARED); 272 } 273 ret = generic_file_read_iter(iocb, to); 274 xfs_iunlock(ip, XFS_IOLOCK_SHARED); 275 276 return ret; 277 } 278 279 STATIC ssize_t 280 xfs_file_read_iter( 281 struct kiocb *iocb, 282 struct iov_iter *to) 283 { 284 struct inode *inode = file_inode(iocb->ki_filp); 285 struct xfs_mount *mp = XFS_I(inode)->i_mount; 286 ssize_t ret = 0; 287 288 XFS_STATS_INC(mp, xs_read_calls); 289 290 if (XFS_FORCED_SHUTDOWN(mp)) 291 return -EIO; 292 293 if (IS_DAX(inode)) 294 ret = xfs_file_dax_read(iocb, to); 295 else if (iocb->ki_flags & IOCB_DIRECT) 296 ret = xfs_file_dio_aio_read(iocb, to); 297 else 298 ret = xfs_file_buffered_aio_read(iocb, to); 299 300 if (ret > 0) 301 XFS_STATS_ADD(mp, xs_read_bytes, ret); 302 return ret; 303 } 304 305 /* 306 * Common pre-write limit and setup checks. 307 * 308 * Called with the iolocked held either shared and exclusive according to 309 * @iolock, and returns with it held. Might upgrade the iolock to exclusive 310 * if called for a direct write beyond i_size. 311 */ 312 STATIC ssize_t 313 xfs_file_aio_write_checks( 314 struct kiocb *iocb, 315 struct iov_iter *from, 316 int *iolock) 317 { 318 struct file *file = iocb->ki_filp; 319 struct inode *inode = file->f_mapping->host; 320 struct xfs_inode *ip = XFS_I(inode); 321 ssize_t error = 0; 322 size_t count = iov_iter_count(from); 323 bool drained_dio = false; 324 loff_t isize; 325 326 restart: 327 error = generic_write_checks(iocb, from); 328 if (error <= 0) 329 return error; 330 331 error = xfs_break_layouts(inode, iolock, BREAK_WRITE); 332 if (error) 333 return error; 334 335 /* 336 * For changing security info in file_remove_privs() we need i_rwsem 337 * exclusively. 338 */ 339 if (*iolock == XFS_IOLOCK_SHARED && !IS_NOSEC(inode)) { 340 xfs_iunlock(ip, *iolock); 341 *iolock = XFS_IOLOCK_EXCL; 342 xfs_ilock(ip, *iolock); 343 goto restart; 344 } 345 /* 346 * If the offset is beyond the size of the file, we need to zero any 347 * blocks that fall between the existing EOF and the start of this 348 * write. If zeroing is needed and we are currently holding the 349 * iolock shared, we need to update it to exclusive which implies 350 * having to redo all checks before. 351 * 352 * We need to serialise against EOF updates that occur in IO 353 * completions here. We want to make sure that nobody is changing the 354 * size while we do this check until we have placed an IO barrier (i.e. 355 * hold the XFS_IOLOCK_EXCL) that prevents new IO from being dispatched. 356 * The spinlock effectively forms a memory barrier once we have the 357 * XFS_IOLOCK_EXCL so we are guaranteed to see the latest EOF value 358 * and hence be able to correctly determine if we need to run zeroing. 359 */ 360 spin_lock(&ip->i_flags_lock); 361 isize = i_size_read(inode); 362 if (iocb->ki_pos > isize) { 363 spin_unlock(&ip->i_flags_lock); 364 if (!drained_dio) { 365 if (*iolock == XFS_IOLOCK_SHARED) { 366 xfs_iunlock(ip, *iolock); 367 *iolock = XFS_IOLOCK_EXCL; 368 xfs_ilock(ip, *iolock); 369 iov_iter_reexpand(from, count); 370 } 371 /* 372 * We now have an IO submission barrier in place, but 373 * AIO can do EOF updates during IO completion and hence 374 * we now need to wait for all of them to drain. Non-AIO 375 * DIO will have drained before we are given the 376 * XFS_IOLOCK_EXCL, and so for most cases this wait is a 377 * no-op. 378 */ 379 inode_dio_wait(inode); 380 drained_dio = true; 381 goto restart; 382 } 383 384 trace_xfs_zero_eof(ip, isize, iocb->ki_pos - isize); 385 error = iomap_zero_range(inode, isize, iocb->ki_pos - isize, 386 NULL, &xfs_buffered_write_iomap_ops); 387 if (error) 388 return error; 389 } else 390 spin_unlock(&ip->i_flags_lock); 391 392 /* 393 * Updating the timestamps will grab the ilock again from 394 * xfs_fs_dirty_inode, so we have to call it after dropping the 395 * lock above. Eventually we should look into a way to avoid 396 * the pointless lock roundtrip. 397 */ 398 return file_modified(file); 399 } 400 401 static int 402 xfs_dio_write_end_io( 403 struct kiocb *iocb, 404 ssize_t size, 405 int error, 406 unsigned flags) 407 { 408 struct inode *inode = file_inode(iocb->ki_filp); 409 struct xfs_inode *ip = XFS_I(inode); 410 loff_t offset = iocb->ki_pos; 411 unsigned int nofs_flag; 412 413 trace_xfs_end_io_direct_write(ip, offset, size); 414 415 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 416 return -EIO; 417 418 if (error) 419 return error; 420 if (!size) 421 return 0; 422 423 /* 424 * Capture amount written on completion as we can't reliably account 425 * for it on submission. 426 */ 427 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, size); 428 429 /* 430 * We can allocate memory here while doing writeback on behalf of 431 * memory reclaim. To avoid memory allocation deadlocks set the 432 * task-wide nofs context for the following operations. 433 */ 434 nofs_flag = memalloc_nofs_save(); 435 436 if (flags & IOMAP_DIO_COW) { 437 error = xfs_reflink_end_cow(ip, offset, size); 438 if (error) 439 goto out; 440 } 441 442 /* 443 * Unwritten conversion updates the in-core isize after extent 444 * conversion but before updating the on-disk size. Updating isize any 445 * earlier allows a racing dio read to find unwritten extents before 446 * they are converted. 447 */ 448 if (flags & IOMAP_DIO_UNWRITTEN) { 449 error = xfs_iomap_write_unwritten(ip, offset, size, true); 450 goto out; 451 } 452 453 /* 454 * We need to update the in-core inode size here so that we don't end up 455 * with the on-disk inode size being outside the in-core inode size. We 456 * have no other method of updating EOF for AIO, so always do it here 457 * if necessary. 458 * 459 * We need to lock the test/set EOF update as we can be racing with 460 * other IO completions here to update the EOF. Failing to serialise 461 * here can result in EOF moving backwards and Bad Things Happen when 462 * that occurs. 463 */ 464 spin_lock(&ip->i_flags_lock); 465 if (offset + size > i_size_read(inode)) { 466 i_size_write(inode, offset + size); 467 spin_unlock(&ip->i_flags_lock); 468 error = xfs_setfilesize(ip, offset, size); 469 } else { 470 spin_unlock(&ip->i_flags_lock); 471 } 472 473 out: 474 memalloc_nofs_restore(nofs_flag); 475 return error; 476 } 477 478 static const struct iomap_dio_ops xfs_dio_write_ops = { 479 .end_io = xfs_dio_write_end_io, 480 }; 481 482 /* 483 * xfs_file_dio_aio_write - handle direct IO writes 484 * 485 * Lock the inode appropriately to prepare for and issue a direct IO write. 486 * By separating it from the buffered write path we remove all the tricky to 487 * follow locking changes and looping. 488 * 489 * If there are cached pages or we're extending the file, we need IOLOCK_EXCL 490 * until we're sure the bytes at the new EOF have been zeroed and/or the cached 491 * pages are flushed out. 492 * 493 * In most cases the direct IO writes will be done holding IOLOCK_SHARED 494 * allowing them to be done in parallel with reads and other direct IO writes. 495 * However, if the IO is not aligned to filesystem blocks, the direct IO layer 496 * needs to do sub-block zeroing and that requires serialisation against other 497 * direct IOs to the same block. In this case we need to serialise the 498 * submission of the unaligned IOs so that we don't get racing block zeroing in 499 * the dio layer. To avoid the problem with aio, we also need to wait for 500 * outstanding IOs to complete so that unwritten extent conversion is completed 501 * before we try to map the overlapping block. This is currently implemented by 502 * hitting it with a big hammer (i.e. inode_dio_wait()). 503 * 504 * Returns with locks held indicated by @iolock and errors indicated by 505 * negative return values. 506 */ 507 STATIC ssize_t 508 xfs_file_dio_aio_write( 509 struct kiocb *iocb, 510 struct iov_iter *from) 511 { 512 struct file *file = iocb->ki_filp; 513 struct address_space *mapping = file->f_mapping; 514 struct inode *inode = mapping->host; 515 struct xfs_inode *ip = XFS_I(inode); 516 struct xfs_mount *mp = ip->i_mount; 517 ssize_t ret = 0; 518 int unaligned_io = 0; 519 int iolock; 520 size_t count = iov_iter_count(from); 521 struct xfs_buftarg *target = xfs_inode_buftarg(ip); 522 523 /* DIO must be aligned to device logical sector size */ 524 if ((iocb->ki_pos | count) & target->bt_logical_sectormask) 525 return -EINVAL; 526 527 /* 528 * Don't take the exclusive iolock here unless the I/O is unaligned to 529 * the file system block size. We don't need to consider the EOF 530 * extension case here because xfs_file_aio_write_checks() will relock 531 * the inode as necessary for EOF zeroing cases and fill out the new 532 * inode size as appropriate. 533 */ 534 if ((iocb->ki_pos & mp->m_blockmask) || 535 ((iocb->ki_pos + count) & mp->m_blockmask)) { 536 unaligned_io = 1; 537 538 /* 539 * We can't properly handle unaligned direct I/O to reflink 540 * files yet, as we can't unshare a partial block. 541 */ 542 if (xfs_is_cow_inode(ip)) { 543 trace_xfs_reflink_bounce_dio_write(ip, iocb->ki_pos, count); 544 return -ENOTBLK; 545 } 546 iolock = XFS_IOLOCK_EXCL; 547 } else { 548 iolock = XFS_IOLOCK_SHARED; 549 } 550 551 if (iocb->ki_flags & IOCB_NOWAIT) { 552 /* unaligned dio always waits, bail */ 553 if (unaligned_io) 554 return -EAGAIN; 555 if (!xfs_ilock_nowait(ip, iolock)) 556 return -EAGAIN; 557 } else { 558 xfs_ilock(ip, iolock); 559 } 560 561 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 562 if (ret) 563 goto out; 564 count = iov_iter_count(from); 565 566 /* 567 * If we are doing unaligned IO, we can't allow any other overlapping IO 568 * in-flight at the same time or we risk data corruption. Wait for all 569 * other IO to drain before we submit. If the IO is aligned, demote the 570 * iolock if we had to take the exclusive lock in 571 * xfs_file_aio_write_checks() for other reasons. 572 */ 573 if (unaligned_io) { 574 inode_dio_wait(inode); 575 } else if (iolock == XFS_IOLOCK_EXCL) { 576 xfs_ilock_demote(ip, XFS_IOLOCK_EXCL); 577 iolock = XFS_IOLOCK_SHARED; 578 } 579 580 trace_xfs_file_direct_write(ip, count, iocb->ki_pos); 581 /* 582 * If unaligned, this is the only IO in-flight. Wait on it before we 583 * release the iolock to prevent subsequent overlapping IO. 584 */ 585 ret = iomap_dio_rw(iocb, from, &xfs_direct_write_iomap_ops, 586 &xfs_dio_write_ops, 587 is_sync_kiocb(iocb) || unaligned_io); 588 out: 589 xfs_iunlock(ip, iolock); 590 591 /* 592 * No fallback to buffered IO after short writes for XFS, direct I/O 593 * will either complete fully or return an error. 594 */ 595 ASSERT(ret < 0 || ret == count); 596 return ret; 597 } 598 599 static noinline ssize_t 600 xfs_file_dax_write( 601 struct kiocb *iocb, 602 struct iov_iter *from) 603 { 604 struct inode *inode = iocb->ki_filp->f_mapping->host; 605 struct xfs_inode *ip = XFS_I(inode); 606 int iolock = XFS_IOLOCK_EXCL; 607 ssize_t ret, error = 0; 608 size_t count; 609 loff_t pos; 610 611 if (iocb->ki_flags & IOCB_NOWAIT) { 612 if (!xfs_ilock_nowait(ip, iolock)) 613 return -EAGAIN; 614 } else { 615 xfs_ilock(ip, iolock); 616 } 617 618 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 619 if (ret) 620 goto out; 621 622 pos = iocb->ki_pos; 623 count = iov_iter_count(from); 624 625 trace_xfs_file_dax_write(ip, count, pos); 626 ret = dax_iomap_rw(iocb, from, &xfs_direct_write_iomap_ops); 627 if (ret > 0 && iocb->ki_pos > i_size_read(inode)) { 628 i_size_write(inode, iocb->ki_pos); 629 error = xfs_setfilesize(ip, pos, ret); 630 } 631 out: 632 xfs_iunlock(ip, iolock); 633 if (error) 634 return error; 635 636 if (ret > 0) { 637 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); 638 639 /* Handle various SYNC-type writes */ 640 ret = generic_write_sync(iocb, ret); 641 } 642 return ret; 643 } 644 645 STATIC ssize_t 646 xfs_file_buffered_aio_write( 647 struct kiocb *iocb, 648 struct iov_iter *from) 649 { 650 struct file *file = iocb->ki_filp; 651 struct address_space *mapping = file->f_mapping; 652 struct inode *inode = mapping->host; 653 struct xfs_inode *ip = XFS_I(inode); 654 ssize_t ret; 655 int enospc = 0; 656 int iolock; 657 658 if (iocb->ki_flags & IOCB_NOWAIT) 659 return -EOPNOTSUPP; 660 661 write_retry: 662 iolock = XFS_IOLOCK_EXCL; 663 xfs_ilock(ip, iolock); 664 665 ret = xfs_file_aio_write_checks(iocb, from, &iolock); 666 if (ret) 667 goto out; 668 669 /* We can write back this queue in page reclaim */ 670 current->backing_dev_info = inode_to_bdi(inode); 671 672 trace_xfs_file_buffered_write(ip, iov_iter_count(from), iocb->ki_pos); 673 ret = iomap_file_buffered_write(iocb, from, 674 &xfs_buffered_write_iomap_ops); 675 if (likely(ret >= 0)) 676 iocb->ki_pos += ret; 677 678 /* 679 * If we hit a space limit, try to free up some lingering preallocated 680 * space before returning an error. In the case of ENOSPC, first try to 681 * write back all dirty inodes to free up some of the excess reserved 682 * metadata space. This reduces the chances that the eofblocks scan 683 * waits on dirty mappings. Since xfs_flush_inodes() is serialized, this 684 * also behaves as a filter to prevent too many eofblocks scans from 685 * running at the same time. 686 */ 687 if (ret == -EDQUOT && !enospc) { 688 xfs_iunlock(ip, iolock); 689 enospc = xfs_inode_free_quota_eofblocks(ip); 690 if (enospc) 691 goto write_retry; 692 enospc = xfs_inode_free_quota_cowblocks(ip); 693 if (enospc) 694 goto write_retry; 695 iolock = 0; 696 } else if (ret == -ENOSPC && !enospc) { 697 struct xfs_eofblocks eofb = {0}; 698 699 enospc = 1; 700 xfs_flush_inodes(ip->i_mount); 701 702 xfs_iunlock(ip, iolock); 703 eofb.eof_flags = XFS_EOF_FLAGS_SYNC; 704 xfs_icache_free_eofblocks(ip->i_mount, &eofb); 705 xfs_icache_free_cowblocks(ip->i_mount, &eofb); 706 goto write_retry; 707 } 708 709 current->backing_dev_info = NULL; 710 out: 711 if (iolock) 712 xfs_iunlock(ip, iolock); 713 714 if (ret > 0) { 715 XFS_STATS_ADD(ip->i_mount, xs_write_bytes, ret); 716 /* Handle various SYNC-type writes */ 717 ret = generic_write_sync(iocb, ret); 718 } 719 return ret; 720 } 721 722 STATIC ssize_t 723 xfs_file_write_iter( 724 struct kiocb *iocb, 725 struct iov_iter *from) 726 { 727 struct file *file = iocb->ki_filp; 728 struct address_space *mapping = file->f_mapping; 729 struct inode *inode = mapping->host; 730 struct xfs_inode *ip = XFS_I(inode); 731 ssize_t ret; 732 size_t ocount = iov_iter_count(from); 733 734 XFS_STATS_INC(ip->i_mount, xs_write_calls); 735 736 if (ocount == 0) 737 return 0; 738 739 if (XFS_FORCED_SHUTDOWN(ip->i_mount)) 740 return -EIO; 741 742 if (IS_DAX(inode)) 743 return xfs_file_dax_write(iocb, from); 744 745 if (iocb->ki_flags & IOCB_DIRECT) { 746 /* 747 * Allow a directio write to fall back to a buffered 748 * write *only* in the case that we're doing a reflink 749 * CoW. In all other directio scenarios we do not 750 * allow an operation to fall back to buffered mode. 751 */ 752 ret = xfs_file_dio_aio_write(iocb, from); 753 if (ret != -ENOTBLK) 754 return ret; 755 } 756 757 return xfs_file_buffered_aio_write(iocb, from); 758 } 759 760 static void 761 xfs_wait_dax_page( 762 struct inode *inode) 763 { 764 struct xfs_inode *ip = XFS_I(inode); 765 766 xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); 767 schedule(); 768 xfs_ilock(ip, XFS_MMAPLOCK_EXCL); 769 } 770 771 static int 772 xfs_break_dax_layouts( 773 struct inode *inode, 774 bool *retry) 775 { 776 struct page *page; 777 778 ASSERT(xfs_isilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL)); 779 780 page = dax_layout_busy_page(inode->i_mapping); 781 if (!page) 782 return 0; 783 784 *retry = true; 785 return ___wait_var_event(&page->_refcount, 786 atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE, 787 0, 0, xfs_wait_dax_page(inode)); 788 } 789 790 int 791 xfs_break_layouts( 792 struct inode *inode, 793 uint *iolock, 794 enum layout_break_reason reason) 795 { 796 bool retry; 797 int error; 798 799 ASSERT(xfs_isilocked(XFS_I(inode), XFS_IOLOCK_SHARED|XFS_IOLOCK_EXCL)); 800 801 do { 802 retry = false; 803 switch (reason) { 804 case BREAK_UNMAP: 805 error = xfs_break_dax_layouts(inode, &retry); 806 if (error || retry) 807 break; 808 /* fall through */ 809 case BREAK_WRITE: 810 error = xfs_break_leased_layouts(inode, iolock, &retry); 811 break; 812 default: 813 WARN_ON_ONCE(1); 814 error = -EINVAL; 815 } 816 } while (error == 0 && retry); 817 818 return error; 819 } 820 821 #define XFS_FALLOC_FL_SUPPORTED \ 822 (FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \ 823 FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | \ 824 FALLOC_FL_INSERT_RANGE | FALLOC_FL_UNSHARE_RANGE) 825 826 STATIC long 827 xfs_file_fallocate( 828 struct file *file, 829 int mode, 830 loff_t offset, 831 loff_t len) 832 { 833 struct inode *inode = file_inode(file); 834 struct xfs_inode *ip = XFS_I(inode); 835 long error; 836 enum xfs_prealloc_flags flags = 0; 837 uint iolock = XFS_IOLOCK_EXCL | XFS_MMAPLOCK_EXCL; 838 loff_t new_size = 0; 839 bool do_file_insert = false; 840 841 if (!S_ISREG(inode->i_mode)) 842 return -EINVAL; 843 if (mode & ~XFS_FALLOC_FL_SUPPORTED) 844 return -EOPNOTSUPP; 845 846 xfs_ilock(ip, iolock); 847 error = xfs_break_layouts(inode, &iolock, BREAK_UNMAP); 848 if (error) 849 goto out_unlock; 850 851 /* 852 * Must wait for all AIO to complete before we continue as AIO can 853 * change the file size on completion without holding any locks we 854 * currently hold. We must do this first because AIO can update both 855 * the on disk and in memory inode sizes, and the operations that follow 856 * require the in-memory size to be fully up-to-date. 857 */ 858 inode_dio_wait(inode); 859 860 /* 861 * Now AIO and DIO has drained we flush and (if necessary) invalidate 862 * the cached range over the first operation we are about to run. 863 * 864 * We care about zero and collapse here because they both run a hole 865 * punch over the range first. Because that can zero data, and the range 866 * of invalidation for the shift operations is much larger, we still do 867 * the required flush for collapse in xfs_prepare_shift(). 868 * 869 * Insert has the same range requirements as collapse, and we extend the 870 * file first which can zero data. Hence insert has the same 871 * flush/invalidate requirements as collapse and so they are both 872 * handled at the right time by xfs_prepare_shift(). 873 */ 874 if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE | 875 FALLOC_FL_COLLAPSE_RANGE)) { 876 error = xfs_flush_unmap_range(ip, offset, len); 877 if (error) 878 goto out_unlock; 879 } 880 881 if (mode & FALLOC_FL_PUNCH_HOLE) { 882 error = xfs_free_file_space(ip, offset, len); 883 if (error) 884 goto out_unlock; 885 } else if (mode & FALLOC_FL_COLLAPSE_RANGE) { 886 if (!xfs_is_falloc_aligned(ip, offset, len)) { 887 error = -EINVAL; 888 goto out_unlock; 889 } 890 891 /* 892 * There is no need to overlap collapse range with EOF, 893 * in which case it is effectively a truncate operation 894 */ 895 if (offset + len >= i_size_read(inode)) { 896 error = -EINVAL; 897 goto out_unlock; 898 } 899 900 new_size = i_size_read(inode) - len; 901 902 error = xfs_collapse_file_space(ip, offset, len); 903 if (error) 904 goto out_unlock; 905 } else if (mode & FALLOC_FL_INSERT_RANGE) { 906 loff_t isize = i_size_read(inode); 907 908 if (!xfs_is_falloc_aligned(ip, offset, len)) { 909 error = -EINVAL; 910 goto out_unlock; 911 } 912 913 /* 914 * New inode size must not exceed ->s_maxbytes, accounting for 915 * possible signed overflow. 916 */ 917 if (inode->i_sb->s_maxbytes - isize < len) { 918 error = -EFBIG; 919 goto out_unlock; 920 } 921 new_size = isize + len; 922 923 /* Offset should be less than i_size */ 924 if (offset >= isize) { 925 error = -EINVAL; 926 goto out_unlock; 927 } 928 do_file_insert = true; 929 } else { 930 flags |= XFS_PREALLOC_SET; 931 932 if (!(mode & FALLOC_FL_KEEP_SIZE) && 933 offset + len > i_size_read(inode)) { 934 new_size = offset + len; 935 error = inode_newsize_ok(inode, new_size); 936 if (error) 937 goto out_unlock; 938 } 939 940 if (mode & FALLOC_FL_ZERO_RANGE) { 941 /* 942 * Punch a hole and prealloc the range. We use a hole 943 * punch rather than unwritten extent conversion for two 944 * reasons: 945 * 946 * 1.) Hole punch handles partial block zeroing for us. 947 * 2.) If prealloc returns ENOSPC, the file range is 948 * still zero-valued by virtue of the hole punch. 949 */ 950 unsigned int blksize = i_blocksize(inode); 951 952 trace_xfs_zero_file_space(ip); 953 954 error = xfs_free_file_space(ip, offset, len); 955 if (error) 956 goto out_unlock; 957 958 len = round_up(offset + len, blksize) - 959 round_down(offset, blksize); 960 offset = round_down(offset, blksize); 961 } else if (mode & FALLOC_FL_UNSHARE_RANGE) { 962 error = xfs_reflink_unshare(ip, offset, len); 963 if (error) 964 goto out_unlock; 965 } else { 966 /* 967 * If always_cow mode we can't use preallocations and 968 * thus should not create them. 969 */ 970 if (xfs_is_always_cow_inode(ip)) { 971 error = -EOPNOTSUPP; 972 goto out_unlock; 973 } 974 } 975 976 if (!xfs_is_always_cow_inode(ip)) { 977 error = xfs_alloc_file_space(ip, offset, len, 978 XFS_BMAPI_PREALLOC); 979 if (error) 980 goto out_unlock; 981 } 982 } 983 984 if (file->f_flags & O_DSYNC) 985 flags |= XFS_PREALLOC_SYNC; 986 987 error = xfs_update_prealloc_flags(ip, flags); 988 if (error) 989 goto out_unlock; 990 991 /* Change file size if needed */ 992 if (new_size) { 993 struct iattr iattr; 994 995 iattr.ia_valid = ATTR_SIZE; 996 iattr.ia_size = new_size; 997 error = xfs_vn_setattr_size(file_dentry(file), &iattr); 998 if (error) 999 goto out_unlock; 1000 } 1001 1002 /* 1003 * Perform hole insertion now that the file size has been 1004 * updated so that if we crash during the operation we don't 1005 * leave shifted extents past EOF and hence losing access to 1006 * the data that is contained within them. 1007 */ 1008 if (do_file_insert) 1009 error = xfs_insert_file_space(ip, offset, len); 1010 1011 out_unlock: 1012 xfs_iunlock(ip, iolock); 1013 return error; 1014 } 1015 1016 STATIC int 1017 xfs_file_fadvise( 1018 struct file *file, 1019 loff_t start, 1020 loff_t end, 1021 int advice) 1022 { 1023 struct xfs_inode *ip = XFS_I(file_inode(file)); 1024 int ret; 1025 int lockflags = 0; 1026 1027 /* 1028 * Operations creating pages in page cache need protection from hole 1029 * punching and similar ops 1030 */ 1031 if (advice == POSIX_FADV_WILLNEED) { 1032 lockflags = XFS_IOLOCK_SHARED; 1033 xfs_ilock(ip, lockflags); 1034 } 1035 ret = generic_fadvise(file, start, end, advice); 1036 if (lockflags) 1037 xfs_iunlock(ip, lockflags); 1038 return ret; 1039 } 1040 1041 /* Does this file, inode, or mount want synchronous writes? */ 1042 static inline bool xfs_file_sync_writes(struct file *filp) 1043 { 1044 struct xfs_inode *ip = XFS_I(file_inode(filp)); 1045 1046 if (ip->i_mount->m_flags & XFS_MOUNT_WSYNC) 1047 return true; 1048 if (filp->f_flags & (__O_SYNC | O_DSYNC)) 1049 return true; 1050 if (IS_SYNC(file_inode(filp))) 1051 return true; 1052 1053 return false; 1054 } 1055 1056 STATIC loff_t 1057 xfs_file_remap_range( 1058 struct file *file_in, 1059 loff_t pos_in, 1060 struct file *file_out, 1061 loff_t pos_out, 1062 loff_t len, 1063 unsigned int remap_flags) 1064 { 1065 struct inode *inode_in = file_inode(file_in); 1066 struct xfs_inode *src = XFS_I(inode_in); 1067 struct inode *inode_out = file_inode(file_out); 1068 struct xfs_inode *dest = XFS_I(inode_out); 1069 struct xfs_mount *mp = src->i_mount; 1070 loff_t remapped = 0; 1071 xfs_extlen_t cowextsize; 1072 int ret; 1073 1074 if (remap_flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_ADVISORY)) 1075 return -EINVAL; 1076 1077 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1078 return -EOPNOTSUPP; 1079 1080 if (XFS_FORCED_SHUTDOWN(mp)) 1081 return -EIO; 1082 1083 /* Prepare and then clone file data. */ 1084 ret = xfs_reflink_remap_prep(file_in, pos_in, file_out, pos_out, 1085 &len, remap_flags); 1086 if (ret || len == 0) 1087 return ret; 1088 1089 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1090 1091 ret = xfs_reflink_remap_blocks(src, pos_in, dest, pos_out, len, 1092 &remapped); 1093 if (ret) 1094 goto out_unlock; 1095 1096 /* 1097 * Carry the cowextsize hint from src to dest if we're sharing the 1098 * entire source file to the entire destination file, the source file 1099 * has a cowextsize hint, and the destination file does not. 1100 */ 1101 cowextsize = 0; 1102 if (pos_in == 0 && len == i_size_read(inode_in) && 1103 (src->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE) && 1104 pos_out == 0 && len >= i_size_read(inode_out) && 1105 !(dest->i_d.di_flags2 & XFS_DIFLAG2_COWEXTSIZE)) 1106 cowextsize = src->i_d.di_cowextsize; 1107 1108 ret = xfs_reflink_update_dest(dest, pos_out + len, cowextsize, 1109 remap_flags); 1110 if (ret) 1111 goto out_unlock; 1112 1113 if (xfs_file_sync_writes(file_in) || xfs_file_sync_writes(file_out)) 1114 xfs_log_force_inode(dest); 1115 out_unlock: 1116 xfs_iunlock2_io_mmap(src, dest); 1117 if (ret) 1118 trace_xfs_reflink_remap_range_error(dest, ret, _RET_IP_); 1119 return remapped > 0 ? remapped : ret; 1120 } 1121 1122 STATIC int 1123 xfs_file_open( 1124 struct inode *inode, 1125 struct file *file) 1126 { 1127 if (!(file->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS) 1128 return -EFBIG; 1129 if (XFS_FORCED_SHUTDOWN(XFS_M(inode->i_sb))) 1130 return -EIO; 1131 file->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC; 1132 return 0; 1133 } 1134 1135 STATIC int 1136 xfs_dir_open( 1137 struct inode *inode, 1138 struct file *file) 1139 { 1140 struct xfs_inode *ip = XFS_I(inode); 1141 int mode; 1142 int error; 1143 1144 error = xfs_file_open(inode, file); 1145 if (error) 1146 return error; 1147 1148 /* 1149 * If there are any blocks, read-ahead block 0 as we're almost 1150 * certain to have the next operation be a read there. 1151 */ 1152 mode = xfs_ilock_data_map_shared(ip); 1153 if (ip->i_df.if_nextents > 0) 1154 error = xfs_dir3_data_readahead(ip, 0, 0); 1155 xfs_iunlock(ip, mode); 1156 return error; 1157 } 1158 1159 STATIC int 1160 xfs_file_release( 1161 struct inode *inode, 1162 struct file *filp) 1163 { 1164 return xfs_release(XFS_I(inode)); 1165 } 1166 1167 STATIC int 1168 xfs_file_readdir( 1169 struct file *file, 1170 struct dir_context *ctx) 1171 { 1172 struct inode *inode = file_inode(file); 1173 xfs_inode_t *ip = XFS_I(inode); 1174 size_t bufsize; 1175 1176 /* 1177 * The Linux API doesn't pass down the total size of the buffer 1178 * we read into down to the filesystem. With the filldir concept 1179 * it's not needed for correct information, but the XFS dir2 leaf 1180 * code wants an estimate of the buffer size to calculate it's 1181 * readahead window and size the buffers used for mapping to 1182 * physical blocks. 1183 * 1184 * Try to give it an estimate that's good enough, maybe at some 1185 * point we can change the ->readdir prototype to include the 1186 * buffer size. For now we use the current glibc buffer size. 1187 */ 1188 bufsize = (size_t)min_t(loff_t, XFS_READDIR_BUFSIZE, ip->i_d.di_size); 1189 1190 return xfs_readdir(NULL, ip, ctx, bufsize); 1191 } 1192 1193 STATIC loff_t 1194 xfs_file_llseek( 1195 struct file *file, 1196 loff_t offset, 1197 int whence) 1198 { 1199 struct inode *inode = file->f_mapping->host; 1200 1201 if (XFS_FORCED_SHUTDOWN(XFS_I(inode)->i_mount)) 1202 return -EIO; 1203 1204 switch (whence) { 1205 default: 1206 return generic_file_llseek(file, offset, whence); 1207 case SEEK_HOLE: 1208 offset = iomap_seek_hole(inode, offset, &xfs_seek_iomap_ops); 1209 break; 1210 case SEEK_DATA: 1211 offset = iomap_seek_data(inode, offset, &xfs_seek_iomap_ops); 1212 break; 1213 } 1214 1215 if (offset < 0) 1216 return offset; 1217 return vfs_setpos(file, offset, inode->i_sb->s_maxbytes); 1218 } 1219 1220 /* 1221 * Locking for serialisation of IO during page faults. This results in a lock 1222 * ordering of: 1223 * 1224 * mmap_lock (MM) 1225 * sb_start_pagefault(vfs, freeze) 1226 * i_mmaplock (XFS - truncate serialisation) 1227 * page_lock (MM) 1228 * i_lock (XFS - extent map serialisation) 1229 */ 1230 static vm_fault_t 1231 __xfs_filemap_fault( 1232 struct vm_fault *vmf, 1233 enum page_entry_size pe_size, 1234 bool write_fault) 1235 { 1236 struct inode *inode = file_inode(vmf->vma->vm_file); 1237 struct xfs_inode *ip = XFS_I(inode); 1238 vm_fault_t ret; 1239 1240 trace_xfs_filemap_fault(ip, pe_size, write_fault); 1241 1242 if (write_fault) { 1243 sb_start_pagefault(inode->i_sb); 1244 file_update_time(vmf->vma->vm_file); 1245 } 1246 1247 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1248 if (IS_DAX(inode)) { 1249 pfn_t pfn; 1250 1251 ret = dax_iomap_fault(vmf, pe_size, &pfn, NULL, 1252 (write_fault && !vmf->cow_page) ? 1253 &xfs_direct_write_iomap_ops : 1254 &xfs_read_iomap_ops); 1255 if (ret & VM_FAULT_NEEDDSYNC) 1256 ret = dax_finish_sync_fault(vmf, pe_size, pfn); 1257 } else { 1258 if (write_fault) 1259 ret = iomap_page_mkwrite(vmf, 1260 &xfs_buffered_write_iomap_ops); 1261 else 1262 ret = filemap_fault(vmf); 1263 } 1264 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1265 1266 if (write_fault) 1267 sb_end_pagefault(inode->i_sb); 1268 return ret; 1269 } 1270 1271 static inline bool 1272 xfs_is_write_fault( 1273 struct vm_fault *vmf) 1274 { 1275 return (vmf->flags & FAULT_FLAG_WRITE) && 1276 (vmf->vma->vm_flags & VM_SHARED); 1277 } 1278 1279 static vm_fault_t 1280 xfs_filemap_fault( 1281 struct vm_fault *vmf) 1282 { 1283 /* DAX can shortcut the normal fault path on write faults! */ 1284 return __xfs_filemap_fault(vmf, PE_SIZE_PTE, 1285 IS_DAX(file_inode(vmf->vma->vm_file)) && 1286 xfs_is_write_fault(vmf)); 1287 } 1288 1289 static vm_fault_t 1290 xfs_filemap_huge_fault( 1291 struct vm_fault *vmf, 1292 enum page_entry_size pe_size) 1293 { 1294 if (!IS_DAX(file_inode(vmf->vma->vm_file))) 1295 return VM_FAULT_FALLBACK; 1296 1297 /* DAX can shortcut the normal fault path on write faults! */ 1298 return __xfs_filemap_fault(vmf, pe_size, 1299 xfs_is_write_fault(vmf)); 1300 } 1301 1302 static vm_fault_t 1303 xfs_filemap_page_mkwrite( 1304 struct vm_fault *vmf) 1305 { 1306 return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); 1307 } 1308 1309 /* 1310 * pfn_mkwrite was originally intended to ensure we capture time stamp updates 1311 * on write faults. In reality, it needs to serialise against truncate and 1312 * prepare memory for writing so handle is as standard write fault. 1313 */ 1314 static vm_fault_t 1315 xfs_filemap_pfn_mkwrite( 1316 struct vm_fault *vmf) 1317 { 1318 1319 return __xfs_filemap_fault(vmf, PE_SIZE_PTE, true); 1320 } 1321 1322 static void 1323 xfs_filemap_map_pages( 1324 struct vm_fault *vmf, 1325 pgoff_t start_pgoff, 1326 pgoff_t end_pgoff) 1327 { 1328 struct inode *inode = file_inode(vmf->vma->vm_file); 1329 1330 xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1331 filemap_map_pages(vmf, start_pgoff, end_pgoff); 1332 xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED); 1333 } 1334 1335 static const struct vm_operations_struct xfs_file_vm_ops = { 1336 .fault = xfs_filemap_fault, 1337 .huge_fault = xfs_filemap_huge_fault, 1338 .map_pages = xfs_filemap_map_pages, 1339 .page_mkwrite = xfs_filemap_page_mkwrite, 1340 .pfn_mkwrite = xfs_filemap_pfn_mkwrite, 1341 }; 1342 1343 STATIC int 1344 xfs_file_mmap( 1345 struct file *file, 1346 struct vm_area_struct *vma) 1347 { 1348 struct inode *inode = file_inode(file); 1349 struct xfs_buftarg *target = xfs_inode_buftarg(XFS_I(inode)); 1350 1351 /* 1352 * We don't support synchronous mappings for non-DAX files and 1353 * for DAX files if underneath dax_device is not synchronous. 1354 */ 1355 if (!daxdev_mapping_supported(vma, target->bt_daxdev)) 1356 return -EOPNOTSUPP; 1357 1358 file_accessed(file); 1359 vma->vm_ops = &xfs_file_vm_ops; 1360 if (IS_DAX(inode)) 1361 vma->vm_flags |= VM_HUGEPAGE; 1362 return 0; 1363 } 1364 1365 const struct file_operations xfs_file_operations = { 1366 .llseek = xfs_file_llseek, 1367 .read_iter = xfs_file_read_iter, 1368 .write_iter = xfs_file_write_iter, 1369 .splice_read = generic_file_splice_read, 1370 .splice_write = iter_file_splice_write, 1371 .iopoll = iomap_dio_iopoll, 1372 .unlocked_ioctl = xfs_file_ioctl, 1373 #ifdef CONFIG_COMPAT 1374 .compat_ioctl = xfs_file_compat_ioctl, 1375 #endif 1376 .mmap = xfs_file_mmap, 1377 .mmap_supported_flags = MAP_SYNC, 1378 .open = xfs_file_open, 1379 .release = xfs_file_release, 1380 .fsync = xfs_file_fsync, 1381 .get_unmapped_area = thp_get_unmapped_area, 1382 .fallocate = xfs_file_fallocate, 1383 .fadvise = xfs_file_fadvise, 1384 .remap_file_range = xfs_file_remap_range, 1385 }; 1386 1387 const struct file_operations xfs_dir_file_operations = { 1388 .open = xfs_dir_open, 1389 .read = generic_read_dir, 1390 .iterate_shared = xfs_file_readdir, 1391 .llseek = generic_file_llseek, 1392 .unlocked_ioctl = xfs_file_ioctl, 1393 #ifdef CONFIG_COMPAT 1394 .compat_ioctl = xfs_file_compat_ioctl, 1395 #endif 1396 .fsync = xfs_dir_fsync, 1397 }; 1398