1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * linux/fs/nfs/file.c 4 * 5 * Copyright (C) 1992 Rick Sladkey 6 * 7 * Changes Copyright (C) 1994 by Florian La Roche 8 * - Do not copy data too often around in the kernel. 9 * - In nfs_file_read the return value of kmalloc wasn't checked. 10 * - Put in a better version of read look-ahead buffering. Original idea 11 * and implementation by Wai S Kok elekokws@ee.nus.sg. 12 * 13 * Expire cache on write to a file by Wai S Kok (Oct 1994). 14 * 15 * Total rewrite of read side for new NFS buffer cache.. Linus. 16 * 17 * nfs regular file handling functions 18 */ 19 20 #include <linux/module.h> 21 #include <linux/time.h> 22 #include <linux/kernel.h> 23 #include <linux/errno.h> 24 #include <linux/fcntl.h> 25 #include <linux/stat.h> 26 #include <linux/nfs_fs.h> 27 #include <linux/nfs_mount.h> 28 #include <linux/mm.h> 29 #include <linux/pagemap.h> 30 #include <linux/gfp.h> 31 #include <linux/rmap.h> 32 #include <linux/swap.h> 33 #include <linux/compaction.h> 34 35 #include <linux/uaccess.h> 36 #include <linux/filelock.h> 37 38 #include "delegation.h" 39 #include "internal.h" 40 #include "iostat.h" 41 #include "fscache.h" 42 #include "pnfs.h" 43 44 #include "nfstrace.h" 45 46 #define NFSDBG_FACILITY NFSDBG_FILE 47 48 static const struct vm_operations_struct nfs_file_vm_ops; 49 50 int nfs_check_flags(int flags) 51 { 52 if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT)) 53 return -EINVAL; 54 55 return 0; 56 } 57 EXPORT_SYMBOL_GPL(nfs_check_flags); 58 59 /* 60 * Open file 61 */ 62 static int 63 nfs_file_open(struct inode *inode, struct file *filp) 64 { 65 int res; 66 67 dprintk("NFS: open file(%pD2)\n", filp); 68 69 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 70 res = nfs_check_flags(filp->f_flags); 71 if (res) 72 return res; 73 74 res = nfs_open(inode, filp); 75 if (res == 0) 76 filp->f_mode |= FMODE_CAN_ODIRECT; 77 return res; 78 } 79 80 int 81 nfs_file_release(struct inode *inode, struct file *filp) 82 { 83 dprintk("NFS: release(%pD2)\n", filp); 84 85 nfs_inc_stats(inode, NFSIOS_VFSRELEASE); 86 nfs_file_clear_open_context(filp); 87 nfs_fscache_release_file(inode, filp); 88 return 0; 89 } 90 EXPORT_SYMBOL_GPL(nfs_file_release); 91 92 /** 93 * nfs_revalidate_file_size - Revalidate the file size 94 * @inode: pointer to inode struct 95 * @filp: pointer to struct file 96 * 97 * Revalidates the file length. This is basically a wrapper around 98 * nfs_revalidate_inode() that takes into account the fact that we may 99 * have cached writes (in which case we don't care about the server's 100 * idea of what the file length is), or O_DIRECT (in which case we 101 * shouldn't trust the cache). 102 */ 103 static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) 104 { 105 struct nfs_server *server = NFS_SERVER(inode); 106 107 if (filp->f_flags & O_DIRECT) 108 goto force_reval; 109 if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_SIZE)) 110 goto force_reval; 111 return 0; 112 force_reval: 113 return __nfs_revalidate_inode(server, inode); 114 } 115 116 loff_t nfs_file_llseek(struct file *filp, loff_t offset, int whence) 117 { 118 dprintk("NFS: llseek file(%pD2, %lld, %d)\n", 119 filp, offset, whence); 120 121 /* 122 * whence == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate 123 * the cached file length 124 */ 125 if (whence != SEEK_SET && whence != SEEK_CUR) { 126 struct inode *inode = filp->f_mapping->host; 127 128 int retval = nfs_revalidate_file_size(inode, filp); 129 if (retval < 0) 130 return (loff_t)retval; 131 } 132 133 return generic_file_llseek(filp, offset, whence); 134 } 135 EXPORT_SYMBOL_GPL(nfs_file_llseek); 136 137 /* 138 * Flush all dirty pages, and check for write errors. 139 */ 140 static int 141 nfs_file_flush(struct file *file, fl_owner_t id) 142 { 143 struct inode *inode = file_inode(file); 144 errseq_t since; 145 146 dprintk("NFS: flush(%pD2)\n", file); 147 148 nfs_inc_stats(inode, NFSIOS_VFSFLUSH); 149 if ((file->f_mode & FMODE_WRITE) == 0) 150 return 0; 151 152 /* Flush writes to the server and return any errors */ 153 since = filemap_sample_wb_err(file->f_mapping); 154 nfs_wb_all(inode); 155 return filemap_check_wb_err(file->f_mapping, since); 156 } 157 158 ssize_t 159 nfs_file_read(struct kiocb *iocb, struct iov_iter *to) 160 { 161 struct inode *inode = file_inode(iocb->ki_filp); 162 ssize_t result; 163 164 trace_nfs_file_read(iocb, to); 165 166 if (iocb->ki_flags & IOCB_DIRECT) 167 return nfs_file_direct_read(iocb, to, false); 168 169 dprintk("NFS: read(%pD2, %zu@%lu)\n", 170 iocb->ki_filp, 171 iov_iter_count(to), (unsigned long) iocb->ki_pos); 172 173 result = nfs_start_io_read(inode); 174 if (result) 175 return result; 176 177 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); 178 if (!result) { 179 result = generic_file_read_iter(iocb, to); 180 if (result > 0) 181 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); 182 } 183 nfs_end_io_read(inode); 184 return result; 185 } 186 EXPORT_SYMBOL_GPL(nfs_file_read); 187 188 ssize_t 189 nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe, 190 size_t len, unsigned int flags) 191 { 192 struct inode *inode = file_inode(in); 193 ssize_t result; 194 195 dprintk("NFS: splice_read(%pD2, %zu@%llu)\n", in, len, *ppos); 196 197 result = nfs_start_io_read(inode); 198 if (result) 199 return result; 200 201 result = nfs_revalidate_mapping(inode, in->f_mapping); 202 if (!result) { 203 result = filemap_splice_read(in, ppos, pipe, len, flags); 204 if (result > 0) 205 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, result); 206 } 207 nfs_end_io_read(inode); 208 return result; 209 } 210 EXPORT_SYMBOL_GPL(nfs_file_splice_read); 211 212 int 213 nfs_file_mmap_prepare(struct vm_area_desc *desc) 214 { 215 struct file *file = desc->file; 216 struct inode *inode = file_inode(file); 217 int status; 218 219 dprintk("NFS: mmap(%pD2)\n", file); 220 221 /* Note: generic_file_mmap_prepare() returns ENOSYS on nommu systems 222 * so we call that before revalidating the mapping 223 */ 224 status = generic_file_mmap_prepare(desc); 225 if (!status) { 226 desc->vm_ops = &nfs_file_vm_ops; 227 status = nfs_revalidate_mapping(inode, file->f_mapping); 228 } 229 return status; 230 } 231 EXPORT_SYMBOL_GPL(nfs_file_mmap_prepare); 232 233 /* 234 * Flush any dirty pages for this process, and check for write errors. 235 * The return status from this call provides a reliable indication of 236 * whether any write errors occurred for this process. 237 */ 238 static int 239 nfs_file_fsync_commit(struct file *file, int datasync) 240 { 241 struct inode *inode = file_inode(file); 242 int ret, ret2; 243 244 dprintk("NFS: fsync file(%pD2) datasync %d\n", file, datasync); 245 246 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 247 ret = nfs_commit_inode(inode, FLUSH_SYNC); 248 ret2 = file_check_and_advance_wb_err(file); 249 if (ret2 < 0) 250 return ret2; 251 return ret; 252 } 253 254 int 255 nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync) 256 { 257 struct inode *inode = file_inode(file); 258 struct nfs_inode *nfsi = NFS_I(inode); 259 long save_nredirtied = atomic_long_read(&nfsi->redirtied_pages); 260 long nredirtied; 261 int ret; 262 263 trace_nfs_fsync_enter(inode); 264 265 for (;;) { 266 ret = file_write_and_wait_range(file, start, end); 267 if (ret != 0) 268 break; 269 ret = nfs_file_fsync_commit(file, datasync); 270 if (ret != 0) 271 break; 272 ret = pnfs_sync_inode(inode, !!datasync); 273 if (ret != 0) 274 break; 275 nredirtied = atomic_long_read(&nfsi->redirtied_pages); 276 if (nredirtied == save_nredirtied) 277 break; 278 save_nredirtied = nredirtied; 279 } 280 281 trace_nfs_fsync_exit(inode, ret); 282 return ret; 283 } 284 EXPORT_SYMBOL_GPL(nfs_file_fsync); 285 286 void nfs_truncate_last_folio(struct address_space *mapping, loff_t from, 287 loff_t to) 288 { 289 struct folio *folio; 290 291 if (from >= to) 292 return; 293 294 folio = filemap_lock_folio(mapping, from >> PAGE_SHIFT); 295 if (IS_ERR(folio)) 296 return; 297 298 if (folio_mkclean(folio)) 299 folio_mark_dirty(folio); 300 301 if (folio_test_uptodate(folio)) { 302 loff_t fpos = folio_pos(folio); 303 size_t offset = from - fpos; 304 size_t end = folio_size(folio); 305 306 if (to - fpos < end) 307 end = to - fpos; 308 folio_zero_segment(folio, offset, end); 309 trace_nfs_size_truncate_folio(mapping->host, to); 310 } 311 312 folio_unlock(folio); 313 folio_put(folio); 314 } 315 EXPORT_SYMBOL_GPL(nfs_truncate_last_folio); 316 317 /* 318 * Decide whether a read/modify/write cycle may be more efficient 319 * then a modify/write/read cycle when writing to a page in the 320 * page cache. 321 * 322 * Some pNFS layout drivers can only read/write at a certain block 323 * granularity like all block devices and therefore we must perform 324 * read/modify/write whenever a page hasn't read yet and the data 325 * to be written there is not aligned to a block boundary and/or 326 * smaller than the block size. 327 * 328 * The modify/write/read cycle may occur if a page is read before 329 * being completely filled by the writer. In this situation, the 330 * page must be completely written to stable storage on the server 331 * before it can be refilled by reading in the page from the server. 332 * This can lead to expensive, small, FILE_SYNC mode writes being 333 * done. 334 * 335 * It may be more efficient to read the page first if the file is 336 * open for reading in addition to writing, the page is not marked 337 * as Uptodate, it is not dirty or waiting to be committed, 338 * indicating that it was previously allocated and then modified, 339 * that there were valid bytes of data in that range of the file, 340 * and that the new data won't completely replace the old data in 341 * that range of the file. 342 */ 343 static bool nfs_folio_is_full_write(struct folio *folio, loff_t pos, 344 unsigned int len) 345 { 346 unsigned int pglen = nfs_folio_length(folio); 347 unsigned int offset = offset_in_folio(folio, pos); 348 unsigned int end = offset + len; 349 350 return !pglen || (end >= pglen && !offset); 351 } 352 353 static bool nfs_want_read_modify_write(struct file *file, struct folio *folio, 354 loff_t pos, unsigned int len) 355 { 356 /* 357 * Up-to-date pages, those with ongoing or full-page write 358 * don't need read/modify/write 359 */ 360 if (folio_test_uptodate(folio) || folio_test_private(folio) || 361 nfs_folio_is_full_write(folio, pos, len)) 362 return false; 363 364 if (pnfs_ld_read_whole_page(file_inode(file))) 365 return true; 366 if (folio_test_dropbehind(folio)) 367 return false; 368 /* Open for reading too? */ 369 if (file->f_mode & FMODE_READ) 370 return true; 371 return false; 372 } 373 374 /* 375 * This does the "real" work of the write. We must allocate and lock the 376 * page to be sent back to the generic routine, which then copies the 377 * data from user space. 378 * 379 * If the writer ends up delaying the write, the writer needs to 380 * increment the page use counts until he is done with the page. 381 */ 382 static int nfs_write_begin(const struct kiocb *iocb, 383 struct address_space *mapping, 384 loff_t pos, unsigned len, struct folio **foliop, 385 void **fsdata) 386 { 387 struct folio *folio; 388 struct file *file = iocb->ki_filp; 389 int once_thru = 0; 390 int ret; 391 392 trace_nfs_write_begin(file_inode(file), pos, len); 393 394 dfprintk(PAGECACHE, "NFS: write_begin(%pD2(%lu), %u@%lld)\n", 395 file, mapping->host->i_ino, len, (long long) pos); 396 nfs_truncate_last_folio(mapping, i_size_read(mapping->host), pos); 397 398 start: 399 folio = write_begin_get_folio(iocb, mapping, pos >> PAGE_SHIFT, len); 400 if (IS_ERR(folio)) { 401 ret = PTR_ERR(folio); 402 goto out; 403 } 404 *foliop = folio; 405 406 ret = nfs_flush_incompatible(file, folio); 407 if (ret) { 408 folio_unlock(folio); 409 folio_put(folio); 410 } else if (!once_thru && 411 nfs_want_read_modify_write(file, folio, pos, len)) { 412 once_thru = 1; 413 folio_clear_dropbehind(folio); 414 ret = nfs_read_folio(file, folio); 415 folio_put(folio); 416 if (!ret) 417 goto start; 418 } 419 out: 420 trace_nfs_write_begin_done(file_inode(file), pos, len, ret); 421 return ret; 422 } 423 424 static int nfs_write_end(const struct kiocb *iocb, 425 struct address_space *mapping, 426 loff_t pos, unsigned len, unsigned copied, 427 struct folio *folio, void *fsdata) 428 { 429 struct file *file = iocb->ki_filp; 430 struct nfs_open_context *ctx = nfs_file_open_context(file); 431 unsigned offset = offset_in_folio(folio, pos); 432 int status; 433 434 trace_nfs_write_end(file_inode(file), pos, len); 435 dfprintk(PAGECACHE, "NFS: write_end(%pD2(%lu), %u@%lld)\n", 436 file, mapping->host->i_ino, len, (long long) pos); 437 438 /* 439 * Zero any uninitialised parts of the page, and then mark the page 440 * as up to date if it turns out that we're extending the file. 441 */ 442 if (!folio_test_uptodate(folio)) { 443 size_t fsize = folio_size(folio); 444 unsigned pglen = nfs_folio_length(folio); 445 unsigned end = offset + copied; 446 447 if (pglen == 0) { 448 folio_zero_segments(folio, 0, offset, end, fsize); 449 folio_mark_uptodate(folio); 450 } else if (end >= pglen) { 451 folio_zero_segment(folio, end, fsize); 452 if (offset == 0) 453 folio_mark_uptodate(folio); 454 } else 455 folio_zero_segment(folio, pglen, fsize); 456 } 457 458 status = nfs_update_folio(file, folio, offset, copied); 459 460 folio_unlock(folio); 461 folio_put(folio); 462 463 if (status < 0) { 464 trace_nfs_write_end_done(file_inode(file), pos, len, status); 465 return status; 466 } 467 NFS_I(mapping->host)->write_io += copied; 468 469 if (nfs_ctx_key_to_expire(ctx, mapping->host)) 470 nfs_wb_all(mapping->host); 471 472 trace_nfs_write_end_done(file_inode(file), pos, len, copied); 473 return copied; 474 } 475 476 /* 477 * Partially or wholly invalidate a page 478 * - Release the private state associated with a page if undergoing complete 479 * page invalidation 480 * - Called if either PG_private or PG_fscache is set on the page 481 * - Caller holds page lock 482 */ 483 static void nfs_invalidate_folio(struct folio *folio, size_t offset, 484 size_t length) 485 { 486 struct inode *inode = folio->mapping->host; 487 dfprintk(PAGECACHE, "NFS: invalidate_folio(%lu, %zu, %zu)\n", 488 folio->index, offset, length); 489 490 /* Cancel any unstarted writes on this page */ 491 if (offset != 0 || length < folio_size(folio)) 492 nfs_wb_folio(inode, folio); 493 else 494 nfs_wb_folio_cancel(inode, folio); 495 folio_wait_private_2(folio); /* [DEPRECATED] */ 496 trace_nfs_invalidate_folio(inode, folio_pos(folio) + offset, length); 497 } 498 499 /* 500 * Attempt to release the private state associated with a folio 501 * - Called if either private or fscache flags are set on the folio 502 * - Caller holds folio lock 503 * - Return true (may release folio) or false (may not) 504 */ 505 static bool nfs_release_folio(struct folio *folio, gfp_t gfp) 506 { 507 dfprintk(PAGECACHE, "NFS: release_folio(%p)\n", folio); 508 509 /* If the private flag is set, then the folio is not freeable */ 510 if (folio_test_private(folio)) { 511 if ((current_gfp_context(gfp) & GFP_KERNEL) != GFP_KERNEL || 512 current_is_kswapd() || current_is_kcompactd()) 513 return false; 514 if (nfs_wb_folio(folio->mapping->host, folio) < 0) 515 return false; 516 } 517 return nfs_fscache_release_folio(folio, gfp); 518 } 519 520 static void nfs_check_dirty_writeback(struct folio *folio, 521 bool *dirty, bool *writeback) 522 { 523 struct nfs_inode *nfsi; 524 struct address_space *mapping = folio->mapping; 525 526 /* 527 * Check if an unstable folio is currently being committed and 528 * if so, have the VM treat it as if the folio is under writeback 529 * so it will not block due to folios that will shortly be freeable. 530 */ 531 nfsi = NFS_I(mapping->host); 532 if (atomic_read(&nfsi->commit_info.rpcs_out)) { 533 *writeback = true; 534 return; 535 } 536 537 /* 538 * If the private flag is set, then the folio is not freeable 539 * and as the inode is not being committed, it's not going to 540 * be cleaned in the near future so treat it as dirty 541 */ 542 if (folio_test_private(folio)) 543 *dirty = true; 544 } 545 546 /* 547 * Attempt to clear the private state associated with a page when an error 548 * occurs that requires the cached contents of an inode to be written back or 549 * destroyed 550 * - Called if either PG_private or fscache is set on the page 551 * - Caller holds page lock 552 * - Return 0 if successful, -error otherwise 553 */ 554 static int nfs_launder_folio(struct folio *folio) 555 { 556 struct inode *inode = folio->mapping->host; 557 int ret; 558 559 dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n", 560 inode->i_ino, folio_pos(folio)); 561 562 folio_wait_private_2(folio); /* [DEPRECATED] */ 563 ret = nfs_wb_folio(inode, folio); 564 trace_nfs_launder_folio_done(inode, folio_pos(folio), 565 folio_size(folio), ret); 566 return ret; 567 } 568 569 static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, 570 sector_t *span) 571 { 572 unsigned long blocks; 573 long long isize; 574 int ret; 575 struct inode *inode = file_inode(file); 576 struct rpc_clnt *clnt = NFS_CLIENT(inode); 577 struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; 578 579 spin_lock(&inode->i_lock); 580 blocks = inode->i_blocks; 581 isize = inode->i_size; 582 spin_unlock(&inode->i_lock); 583 if (blocks*512 < isize) { 584 pr_warn("swap activate: swapfile has holes\n"); 585 return -EINVAL; 586 } 587 588 ret = rpc_clnt_swap_activate(clnt); 589 if (ret) 590 return ret; 591 ret = add_swap_extent(sis, 0, sis->max, 0); 592 if (ret < 0) { 593 rpc_clnt_swap_deactivate(clnt); 594 return ret; 595 } 596 597 *span = sis->pages; 598 599 if (cl->rpc_ops->enable_swap) 600 cl->rpc_ops->enable_swap(inode); 601 602 sis->flags |= SWP_FS_OPS; 603 return ret; 604 } 605 606 static void nfs_swap_deactivate(struct file *file) 607 { 608 struct inode *inode = file_inode(file); 609 struct rpc_clnt *clnt = NFS_CLIENT(inode); 610 struct nfs_client *cl = NFS_SERVER(inode)->nfs_client; 611 612 rpc_clnt_swap_deactivate(clnt); 613 if (cl->rpc_ops->disable_swap) 614 cl->rpc_ops->disable_swap(file_inode(file)); 615 } 616 617 const struct address_space_operations nfs_file_aops = { 618 .read_folio = nfs_read_folio, 619 .readahead = nfs_readahead, 620 .dirty_folio = filemap_dirty_folio, 621 .writepages = nfs_writepages, 622 .write_begin = nfs_write_begin, 623 .write_end = nfs_write_end, 624 .invalidate_folio = nfs_invalidate_folio, 625 .release_folio = nfs_release_folio, 626 .migrate_folio = nfs_migrate_folio, 627 .launder_folio = nfs_launder_folio, 628 .is_dirty_writeback = nfs_check_dirty_writeback, 629 .error_remove_folio = generic_error_remove_folio, 630 .swap_activate = nfs_swap_activate, 631 .swap_deactivate = nfs_swap_deactivate, 632 .swap_rw = nfs_swap_rw, 633 }; 634 635 /* 636 * Notification that a PTE pointing to an NFS page is about to be made 637 * writable, implying that someone is about to modify the page through a 638 * shared-writable mapping 639 */ 640 static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf) 641 { 642 struct file *filp = vmf->vma->vm_file; 643 struct inode *inode = file_inode(filp); 644 unsigned pagelen; 645 vm_fault_t ret = VM_FAULT_NOPAGE; 646 struct address_space *mapping; 647 struct folio *folio = page_folio(vmf->page); 648 649 dfprintk(PAGECACHE, "NFS: vm_page_mkwrite(%pD2(%lu), offset %lld)\n", 650 filp, filp->f_mapping->host->i_ino, 651 (long long)folio_pos(folio)); 652 653 sb_start_pagefault(inode->i_sb); 654 655 /* make sure the cache has finished storing the page */ 656 if (folio_test_private_2(folio) && /* [DEPRECATED] */ 657 folio_wait_private_2_killable(folio) < 0) { 658 ret = VM_FAULT_RETRY; 659 goto out; 660 } 661 662 wait_on_bit_action(&NFS_I(inode)->flags, NFS_INO_INVALIDATING, 663 nfs_wait_bit_killable, 664 TASK_KILLABLE|TASK_FREEZABLE_UNSAFE); 665 666 folio_lock(folio); 667 mapping = folio->mapping; 668 if (mapping != inode->i_mapping) 669 goto out_unlock; 670 671 folio_wait_writeback(folio); 672 673 pagelen = nfs_folio_length(folio); 674 if (pagelen == 0) 675 goto out_unlock; 676 677 ret = VM_FAULT_LOCKED; 678 if (nfs_flush_incompatible(filp, folio) == 0 && 679 nfs_update_folio(filp, folio, 0, pagelen) == 0) 680 goto out; 681 682 ret = VM_FAULT_SIGBUS; 683 out_unlock: 684 folio_unlock(folio); 685 out: 686 sb_end_pagefault(inode->i_sb); 687 return ret; 688 } 689 690 static const struct vm_operations_struct nfs_file_vm_ops = { 691 .fault = filemap_fault, 692 .map_pages = filemap_map_pages, 693 .page_mkwrite = nfs_vm_page_mkwrite, 694 }; 695 696 ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from) 697 { 698 struct file *file = iocb->ki_filp; 699 struct inode *inode = file_inode(file); 700 unsigned int mntflags = NFS_SERVER(inode)->flags; 701 ssize_t result, written; 702 errseq_t since; 703 int error; 704 705 trace_nfs_file_write(iocb, from); 706 707 result = nfs_key_timeout_notify(file, inode); 708 if (result) 709 return result; 710 711 if (iocb->ki_flags & IOCB_DIRECT) 712 return nfs_file_direct_write(iocb, from, false); 713 714 dprintk("NFS: write(%pD2, %zu@%Ld)\n", 715 file, iov_iter_count(from), (long long) iocb->ki_pos); 716 717 if (IS_SWAPFILE(inode)) 718 goto out_swapfile; 719 /* 720 * O_APPEND implies that we must revalidate the file length. 721 */ 722 if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) { 723 result = nfs_revalidate_file_size(inode, file); 724 if (result) 725 return result; 726 } 727 728 nfs_clear_invalid_mapping(file->f_mapping); 729 730 since = filemap_sample_wb_err(file->f_mapping); 731 error = nfs_start_io_write(inode); 732 if (error) 733 return error; 734 result = generic_write_checks(iocb, from); 735 if (result > 0) 736 result = generic_perform_write(iocb, from); 737 nfs_end_io_write(inode); 738 if (result <= 0) 739 goto out; 740 741 written = result; 742 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, written); 743 744 if (mntflags & NFS_MOUNT_WRITE_EAGER) { 745 result = filemap_fdatawrite_range(file->f_mapping, 746 iocb->ki_pos - written, 747 iocb->ki_pos - 1); 748 if (result < 0) 749 goto out; 750 } 751 if (mntflags & NFS_MOUNT_WRITE_WAIT) { 752 filemap_fdatawait_range(file->f_mapping, 753 iocb->ki_pos - written, 754 iocb->ki_pos - 1); 755 } 756 result = generic_write_sync(iocb, written); 757 if (result < 0) 758 return result; 759 760 out: 761 /* Return error values */ 762 error = filemap_check_wb_err(file->f_mapping, since); 763 switch (error) { 764 default: 765 break; 766 case -EDQUOT: 767 case -EFBIG: 768 case -ENOSPC: 769 nfs_wb_all(inode); 770 error = file_check_and_advance_wb_err(file); 771 if (error < 0) 772 result = error; 773 } 774 return result; 775 776 out_swapfile: 777 printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); 778 return -ETXTBSY; 779 } 780 EXPORT_SYMBOL_GPL(nfs_file_write); 781 782 static int 783 do_getlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) 784 { 785 struct inode *inode = filp->f_mapping->host; 786 int status = 0; 787 unsigned int saved_type = fl->c.flc_type; 788 789 /* Try local locking first */ 790 posix_test_lock(filp, fl); 791 if (fl->c.flc_type != F_UNLCK) { 792 /* found a conflict */ 793 goto out; 794 } 795 fl->c.flc_type = saved_type; 796 797 if (nfs_have_read_or_write_delegation(inode)) 798 goto out_noconflict; 799 800 if (is_local) 801 goto out_noconflict; 802 803 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 804 out: 805 return status; 806 out_noconflict: 807 fl->c.flc_type = F_UNLCK; 808 goto out; 809 } 810 811 static int 812 do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) 813 { 814 struct inode *inode = filp->f_mapping->host; 815 struct nfs_lock_context *l_ctx; 816 int status; 817 818 /* 819 * Flush all pending writes before doing anything 820 * with locks.. 821 */ 822 nfs_wb_all(inode); 823 824 l_ctx = nfs_get_lock_context(nfs_file_open_context(filp)); 825 if (!IS_ERR(l_ctx)) { 826 status = nfs_iocounter_wait(l_ctx); 827 nfs_put_lock_context(l_ctx); 828 /* NOTE: special case 829 * If we're signalled while cleaning up locks on process exit, we 830 * still need to complete the unlock. 831 */ 832 if (status < 0 && !(fl->c.flc_flags & FL_CLOSE)) 833 return status; 834 } 835 836 /* 837 * Use local locking if mounted with "-onolock" or with appropriate 838 * "-olocal_lock=" 839 */ 840 if (!is_local) 841 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 842 else 843 status = locks_lock_file_wait(filp, fl); 844 return status; 845 } 846 847 static int 848 do_setlk(struct file *filp, int cmd, struct file_lock *fl, int is_local) 849 { 850 struct inode *inode = filp->f_mapping->host; 851 int status; 852 853 /* 854 * Flush all pending writes before doing anything 855 * with locks.. 856 */ 857 status = nfs_sync_mapping(filp->f_mapping); 858 if (status != 0) 859 goto out; 860 861 /* 862 * Use local locking if mounted with "-onolock" or with appropriate 863 * "-olocal_lock=" 864 */ 865 if (!is_local) 866 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 867 else 868 status = locks_lock_file_wait(filp, fl); 869 if (status < 0) 870 goto out; 871 872 /* 873 * Invalidate cache to prevent missing any changes. If 874 * the file is mapped, clear the page cache as well so 875 * those mappings will be loaded. 876 * 877 * This makes locking act as a cache coherency point. 878 */ 879 nfs_sync_mapping(filp->f_mapping); 880 if (!nfs_have_read_or_write_delegation(inode)) { 881 nfs_zap_caches(inode); 882 if (mapping_mapped(filp->f_mapping)) 883 nfs_revalidate_mapping(inode, filp->f_mapping); 884 } 885 out: 886 return status; 887 } 888 889 /* 890 * Lock a (portion of) a file 891 */ 892 int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) 893 { 894 struct inode *inode = filp->f_mapping->host; 895 int ret = -ENOLCK; 896 int is_local = 0; 897 898 dprintk("NFS: lock(%pD2, t=%x, fl=%x, r=%lld:%lld)\n", 899 filp, fl->c.flc_type, fl->c.flc_flags, 900 (long long)fl->fl_start, (long long)fl->fl_end); 901 902 nfs_inc_stats(inode, NFSIOS_VFSLOCK); 903 904 if (fl->c.flc_flags & FL_RECLAIM) 905 return -ENOGRACE; 906 907 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FCNTL) 908 is_local = 1; 909 910 if (NFS_PROTO(inode)->lock_check_bounds != NULL) { 911 ret = NFS_PROTO(inode)->lock_check_bounds(fl); 912 if (ret < 0) 913 goto out_err; 914 } 915 916 if (IS_GETLK(cmd)) 917 ret = do_getlk(filp, cmd, fl, is_local); 918 else if (lock_is_unlock(fl)) 919 ret = do_unlk(filp, cmd, fl, is_local); 920 else 921 ret = do_setlk(filp, cmd, fl, is_local); 922 out_err: 923 return ret; 924 } 925 EXPORT_SYMBOL_GPL(nfs_lock); 926 927 /* 928 * Lock a (portion of) a file 929 */ 930 int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) 931 { 932 struct inode *inode = filp->f_mapping->host; 933 int is_local = 0; 934 935 dprintk("NFS: flock(%pD2, t=%x, fl=%x)\n", 936 filp, fl->c.flc_type, fl->c.flc_flags); 937 938 if (!(fl->c.flc_flags & FL_FLOCK)) 939 return -ENOLCK; 940 941 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) 942 is_local = 1; 943 944 /* We're simulating flock() locks using posix locks on the server */ 945 if (lock_is_unlock(fl)) 946 return do_unlk(filp, cmd, fl, is_local); 947 return do_setlk(filp, cmd, fl, is_local); 948 } 949 EXPORT_SYMBOL_GPL(nfs_flock); 950 951 const struct file_operations nfs_file_operations = { 952 .llseek = nfs_file_llseek, 953 .read_iter = nfs_file_read, 954 .write_iter = nfs_file_write, 955 .mmap_prepare = nfs_file_mmap_prepare, 956 .open = nfs_file_open, 957 .flush = nfs_file_flush, 958 .release = nfs_file_release, 959 .fsync = nfs_file_fsync, 960 .lock = nfs_lock, 961 .flock = nfs_flock, 962 .splice_read = nfs_file_splice_read, 963 .splice_write = iter_file_splice_write, 964 .check_flags = nfs_check_flags, 965 .setlease = simple_nosetlease, 966 .fop_flags = FOP_DONTCACHE, 967 }; 968 EXPORT_SYMBOL_GPL(nfs_file_operations); 969