1 /* 2 * linux/fs/nfs/file.c 3 * 4 * Copyright (C) 1992 Rick Sladkey 5 * 6 * Changes Copyright (C) 1994 by Florian La Roche 7 * - Do not copy data too often around in the kernel. 8 * - In nfs_file_read the return value of kmalloc wasn't checked. 9 * - Put in a better version of read look-ahead buffering. Original idea 10 * and implementation by Wai S Kok elekokws@ee.nus.sg. 11 * 12 * Expire cache on write to a file by Wai S Kok (Oct 1994). 13 * 14 * Total rewrite of read side for new NFS buffer cache.. Linus. 15 * 16 * nfs regular file handling functions 17 */ 18 19 #include <linux/time.h> 20 #include <linux/kernel.h> 21 #include <linux/errno.h> 22 #include <linux/fcntl.h> 23 #include <linux/stat.h> 24 #include <linux/nfs_fs.h> 25 #include <linux/nfs_mount.h> 26 #include <linux/mm.h> 27 #include <linux/slab.h> 28 #include <linux/pagemap.h> 29 #include <linux/smp_lock.h> 30 #include <linux/aio.h> 31 32 #include <asm/uaccess.h> 33 #include <asm/system.h> 34 35 #include "delegation.h" 36 #include "internal.h" 37 #include "iostat.h" 38 39 #define NFSDBG_FACILITY NFSDBG_FILE 40 41 static int nfs_file_open(struct inode *, struct file *); 42 static int nfs_file_release(struct inode *, struct file *); 43 static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); 44 static int nfs_file_mmap(struct file *, struct vm_area_struct *); 45 static ssize_t nfs_file_splice_read(struct file *filp, loff_t *ppos, 46 struct pipe_inode_info *pipe, 47 size_t count, unsigned int flags); 48 static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, 49 unsigned long nr_segs, loff_t pos); 50 static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, 51 unsigned long nr_segs, loff_t pos); 52 static int nfs_file_flush(struct file *, fl_owner_t id); 53 static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); 54 static int nfs_check_flags(int flags); 55 static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl); 56 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl); 57 static int nfs_setlease(struct file *file, long arg, struct file_lock **fl); 58 59 static struct vm_operations_struct nfs_file_vm_ops; 60 61 const struct file_operations nfs_file_operations = { 62 .llseek = nfs_file_llseek, 63 .read = do_sync_read, 64 .write = do_sync_write, 65 .aio_read = nfs_file_read, 66 .aio_write = nfs_file_write, 67 .mmap = nfs_file_mmap, 68 .open = nfs_file_open, 69 .flush = nfs_file_flush, 70 .release = nfs_file_release, 71 .fsync = nfs_fsync, 72 .lock = nfs_lock, 73 .flock = nfs_flock, 74 .splice_read = nfs_file_splice_read, 75 .check_flags = nfs_check_flags, 76 .setlease = nfs_setlease, 77 }; 78 79 const struct inode_operations nfs_file_inode_operations = { 80 .permission = nfs_permission, 81 .getattr = nfs_getattr, 82 .setattr = nfs_setattr, 83 }; 84 85 #ifdef CONFIG_NFS_V3 86 const struct inode_operations nfs3_file_inode_operations = { 87 .permission = nfs_permission, 88 .getattr = nfs_getattr, 89 .setattr = nfs_setattr, 90 .listxattr = nfs3_listxattr, 91 .getxattr = nfs3_getxattr, 92 .setxattr = nfs3_setxattr, 93 .removexattr = nfs3_removexattr, 94 }; 95 #endif /* CONFIG_NFS_v3 */ 96 97 /* Hack for future NFS swap support */ 98 #ifndef IS_SWAPFILE 99 # define IS_SWAPFILE(inode) (0) 100 #endif 101 102 static int nfs_check_flags(int flags) 103 { 104 if ((flags & (O_APPEND | O_DIRECT)) == (O_APPEND | O_DIRECT)) 105 return -EINVAL; 106 107 return 0; 108 } 109 110 /* 111 * Open file 112 */ 113 static int 114 nfs_file_open(struct inode *inode, struct file *filp) 115 { 116 int res; 117 118 res = nfs_check_flags(filp->f_flags); 119 if (res) 120 return res; 121 122 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 123 lock_kernel(); 124 res = NFS_PROTO(inode)->file_open(inode, filp); 125 unlock_kernel(); 126 return res; 127 } 128 129 static int 130 nfs_file_release(struct inode *inode, struct file *filp) 131 { 132 /* Ensure that dirty pages are flushed out with the right creds */ 133 if (filp->f_mode & FMODE_WRITE) 134 nfs_wb_all(filp->f_path.dentry->d_inode); 135 nfs_inc_stats(inode, NFSIOS_VFSRELEASE); 136 return NFS_PROTO(inode)->file_release(inode, filp); 137 } 138 139 /** 140 * nfs_revalidate_size - Revalidate the file size 141 * @inode - pointer to inode struct 142 * @file - pointer to struct file 143 * 144 * Revalidates the file length. This is basically a wrapper around 145 * nfs_revalidate_inode() that takes into account the fact that we may 146 * have cached writes (in which case we don't care about the server's 147 * idea of what the file length is), or O_DIRECT (in which case we 148 * shouldn't trust the cache). 149 */ 150 static int nfs_revalidate_file_size(struct inode *inode, struct file *filp) 151 { 152 struct nfs_server *server = NFS_SERVER(inode); 153 struct nfs_inode *nfsi = NFS_I(inode); 154 155 if (server->flags & NFS_MOUNT_NOAC) 156 goto force_reval; 157 if (filp->f_flags & O_DIRECT) 158 goto force_reval; 159 if (nfsi->npages != 0) 160 return 0; 161 if (!(nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE) && !nfs_attribute_timeout(inode)) 162 return 0; 163 force_reval: 164 return __nfs_revalidate_inode(server, inode); 165 } 166 167 static loff_t nfs_file_llseek(struct file *filp, loff_t offset, int origin) 168 { 169 /* origin == SEEK_END => we must revalidate the cached file length */ 170 if (origin == SEEK_END) { 171 struct inode *inode = filp->f_mapping->host; 172 int retval = nfs_revalidate_file_size(inode, filp); 173 if (retval < 0) 174 return (loff_t)retval; 175 } 176 return remote_llseek(filp, offset, origin); 177 } 178 179 /* 180 * Helper for nfs_file_flush() and nfs_fsync() 181 * 182 * Notice that it clears the NFS_CONTEXT_ERROR_WRITE before synching to 183 * disk, but it retrieves and clears ctx->error after synching, despite 184 * the two being set at the same time in nfs_context_set_write_error(). 185 * This is because the former is used to notify the _next_ call to 186 * nfs_file_write() that a write error occured, and hence cause it to 187 * fall back to doing a synchronous write. 188 */ 189 static int nfs_do_fsync(struct nfs_open_context *ctx, struct inode *inode) 190 { 191 int have_error, status; 192 int ret = 0; 193 194 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 195 status = nfs_wb_all(inode); 196 have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 197 if (have_error) 198 ret = xchg(&ctx->error, 0); 199 if (!ret) 200 ret = status; 201 return ret; 202 } 203 204 /* 205 * Flush all dirty pages, and check for write errors. 206 * 207 */ 208 static int 209 nfs_file_flush(struct file *file, fl_owner_t id) 210 { 211 struct nfs_open_context *ctx = nfs_file_open_context(file); 212 struct inode *inode = file->f_path.dentry->d_inode; 213 int status; 214 215 dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); 216 217 if ((file->f_mode & FMODE_WRITE) == 0) 218 return 0; 219 nfs_inc_stats(inode, NFSIOS_VFSFLUSH); 220 221 /* Ensure that data+attribute caches are up to date after close() */ 222 status = nfs_do_fsync(ctx, inode); 223 if (!status) 224 nfs_revalidate_inode(NFS_SERVER(inode), inode); 225 return status; 226 } 227 228 static ssize_t 229 nfs_file_read(struct kiocb *iocb, const struct iovec *iov, 230 unsigned long nr_segs, loff_t pos) 231 { 232 struct dentry * dentry = iocb->ki_filp->f_path.dentry; 233 struct inode * inode = dentry->d_inode; 234 ssize_t result; 235 size_t count = iov_length(iov, nr_segs); 236 237 #ifdef CONFIG_NFS_DIRECTIO 238 if (iocb->ki_filp->f_flags & O_DIRECT) 239 return nfs_file_direct_read(iocb, iov, nr_segs, pos); 240 #endif 241 242 dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", 243 dentry->d_parent->d_name.name, dentry->d_name.name, 244 (unsigned long) count, (unsigned long) pos); 245 246 result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); 247 nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); 248 if (!result) 249 result = generic_file_aio_read(iocb, iov, nr_segs, pos); 250 return result; 251 } 252 253 static ssize_t 254 nfs_file_splice_read(struct file *filp, loff_t *ppos, 255 struct pipe_inode_info *pipe, size_t count, 256 unsigned int flags) 257 { 258 struct dentry *dentry = filp->f_path.dentry; 259 struct inode *inode = dentry->d_inode; 260 ssize_t res; 261 262 dfprintk(VFS, "nfs: splice_read(%s/%s, %lu@%Lu)\n", 263 dentry->d_parent->d_name.name, dentry->d_name.name, 264 (unsigned long) count, (unsigned long long) *ppos); 265 266 res = nfs_revalidate_mapping(inode, filp->f_mapping); 267 if (!res) 268 res = generic_file_splice_read(filp, ppos, pipe, count, flags); 269 return res; 270 } 271 272 static int 273 nfs_file_mmap(struct file * file, struct vm_area_struct * vma) 274 { 275 struct dentry *dentry = file->f_path.dentry; 276 struct inode *inode = dentry->d_inode; 277 int status; 278 279 dfprintk(VFS, "nfs: mmap(%s/%s)\n", 280 dentry->d_parent->d_name.name, dentry->d_name.name); 281 282 status = nfs_revalidate_mapping(inode, file->f_mapping); 283 if (!status) { 284 vma->vm_ops = &nfs_file_vm_ops; 285 vma->vm_flags |= VM_CAN_NONLINEAR; 286 file_accessed(file); 287 } 288 return status; 289 } 290 291 /* 292 * Flush any dirty pages for this process, and check for write errors. 293 * The return status from this call provides a reliable indication of 294 * whether any write errors occurred for this process. 295 */ 296 static int 297 nfs_fsync(struct file *file, struct dentry *dentry, int datasync) 298 { 299 struct nfs_open_context *ctx = nfs_file_open_context(file); 300 struct inode *inode = dentry->d_inode; 301 302 dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); 303 304 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 305 return nfs_do_fsync(ctx, inode); 306 } 307 308 /* 309 * This does the "real" work of the write. We must allocate and lock the 310 * page to be sent back to the generic routine, which then copies the 311 * data from user space. 312 * 313 * If the writer ends up delaying the write, the writer needs to 314 * increment the page use counts until he is done with the page. 315 */ 316 static int nfs_write_begin(struct file *file, struct address_space *mapping, 317 loff_t pos, unsigned len, unsigned flags, 318 struct page **pagep, void **fsdata) 319 { 320 int ret; 321 pgoff_t index; 322 struct page *page; 323 index = pos >> PAGE_CACHE_SHIFT; 324 325 page = __grab_cache_page(mapping, index); 326 if (!page) 327 return -ENOMEM; 328 *pagep = page; 329 330 ret = nfs_flush_incompatible(file, page); 331 if (ret) { 332 unlock_page(page); 333 page_cache_release(page); 334 } 335 return ret; 336 } 337 338 static int nfs_write_end(struct file *file, struct address_space *mapping, 339 loff_t pos, unsigned len, unsigned copied, 340 struct page *page, void *fsdata) 341 { 342 unsigned offset = pos & (PAGE_CACHE_SIZE - 1); 343 int status; 344 345 lock_kernel(); 346 status = nfs_updatepage(file, page, offset, copied); 347 unlock_kernel(); 348 349 unlock_page(page); 350 page_cache_release(page); 351 352 if (status < 0) 353 return status; 354 return copied; 355 } 356 357 static void nfs_invalidate_page(struct page *page, unsigned long offset) 358 { 359 if (offset != 0) 360 return; 361 /* Cancel any unstarted writes on this page */ 362 nfs_wb_page_cancel(page->mapping->host, page); 363 } 364 365 static int nfs_release_page(struct page *page, gfp_t gfp) 366 { 367 /* If PagePrivate() is set, then the page is not freeable */ 368 return 0; 369 } 370 371 static int nfs_launder_page(struct page *page) 372 { 373 return nfs_wb_page(page->mapping->host, page); 374 } 375 376 const struct address_space_operations nfs_file_aops = { 377 .readpage = nfs_readpage, 378 .readpages = nfs_readpages, 379 .set_page_dirty = __set_page_dirty_nobuffers, 380 .writepage = nfs_writepage, 381 .writepages = nfs_writepages, 382 .write_begin = nfs_write_begin, 383 .write_end = nfs_write_end, 384 .invalidatepage = nfs_invalidate_page, 385 .releasepage = nfs_release_page, 386 #ifdef CONFIG_NFS_DIRECTIO 387 .direct_IO = nfs_direct_IO, 388 #endif 389 .launder_page = nfs_launder_page, 390 }; 391 392 static int nfs_vm_page_mkwrite(struct vm_area_struct *vma, struct page *page) 393 { 394 struct file *filp = vma->vm_file; 395 unsigned pagelen; 396 int ret = -EINVAL; 397 struct address_space *mapping; 398 399 lock_page(page); 400 mapping = page->mapping; 401 if (mapping != vma->vm_file->f_path.dentry->d_inode->i_mapping) 402 goto out_unlock; 403 404 ret = 0; 405 pagelen = nfs_page_length(page); 406 if (pagelen == 0) 407 goto out_unlock; 408 409 ret = nfs_flush_incompatible(filp, page); 410 if (ret != 0) 411 goto out_unlock; 412 413 ret = nfs_updatepage(filp, page, 0, pagelen); 414 if (ret == 0) 415 ret = pagelen; 416 out_unlock: 417 unlock_page(page); 418 return ret; 419 } 420 421 static struct vm_operations_struct nfs_file_vm_ops = { 422 .fault = filemap_fault, 423 .page_mkwrite = nfs_vm_page_mkwrite, 424 }; 425 426 static int nfs_need_sync_write(struct file *filp, struct inode *inode) 427 { 428 struct nfs_open_context *ctx; 429 430 if (IS_SYNC(inode) || (filp->f_flags & O_SYNC)) 431 return 1; 432 ctx = nfs_file_open_context(filp); 433 if (test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags)) 434 return 1; 435 return 0; 436 } 437 438 static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, 439 unsigned long nr_segs, loff_t pos) 440 { 441 struct dentry * dentry = iocb->ki_filp->f_path.dentry; 442 struct inode * inode = dentry->d_inode; 443 ssize_t result; 444 size_t count = iov_length(iov, nr_segs); 445 446 #ifdef CONFIG_NFS_DIRECTIO 447 if (iocb->ki_filp->f_flags & O_DIRECT) 448 return nfs_file_direct_write(iocb, iov, nr_segs, pos); 449 #endif 450 451 dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", 452 dentry->d_parent->d_name.name, dentry->d_name.name, 453 inode->i_ino, (unsigned long) count, (long long) pos); 454 455 result = -EBUSY; 456 if (IS_SWAPFILE(inode)) 457 goto out_swapfile; 458 /* 459 * O_APPEND implies that we must revalidate the file length. 460 */ 461 if (iocb->ki_filp->f_flags & O_APPEND) { 462 result = nfs_revalidate_file_size(inode, iocb->ki_filp); 463 if (result) 464 goto out; 465 } 466 467 result = count; 468 if (!count) 469 goto out; 470 471 nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); 472 result = generic_file_aio_write(iocb, iov, nr_segs, pos); 473 /* Return error values for O_SYNC and IS_SYNC() */ 474 if (result >= 0 && nfs_need_sync_write(iocb->ki_filp, inode)) { 475 int err = nfs_do_fsync(nfs_file_open_context(iocb->ki_filp), inode); 476 if (err < 0) 477 result = err; 478 } 479 out: 480 return result; 481 482 out_swapfile: 483 printk(KERN_INFO "NFS: attempt to write to active swap file!\n"); 484 goto out; 485 } 486 487 static int do_getlk(struct file *filp, int cmd, struct file_lock *fl) 488 { 489 struct inode *inode = filp->f_mapping->host; 490 int status = 0; 491 492 lock_kernel(); 493 /* Try local locking first */ 494 posix_test_lock(filp, fl); 495 if (fl->fl_type != F_UNLCK) { 496 /* found a conflict */ 497 goto out; 498 } 499 500 if (nfs_have_delegation(inode, FMODE_READ)) 501 goto out_noconflict; 502 503 if (NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM) 504 goto out_noconflict; 505 506 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 507 out: 508 unlock_kernel(); 509 return status; 510 out_noconflict: 511 fl->fl_type = F_UNLCK; 512 goto out; 513 } 514 515 static int do_vfs_lock(struct file *file, struct file_lock *fl) 516 { 517 int res = 0; 518 switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) { 519 case FL_POSIX: 520 res = posix_lock_file_wait(file, fl); 521 break; 522 case FL_FLOCK: 523 res = flock_lock_file_wait(file, fl); 524 break; 525 default: 526 BUG(); 527 } 528 if (res < 0) 529 dprintk(KERN_WARNING "%s: VFS is out of sync with lock manager" 530 " - error %d!\n", 531 __FUNCTION__, res); 532 return res; 533 } 534 535 static int do_unlk(struct file *filp, int cmd, struct file_lock *fl) 536 { 537 struct inode *inode = filp->f_mapping->host; 538 int status; 539 540 /* 541 * Flush all pending writes before doing anything 542 * with locks.. 543 */ 544 nfs_sync_mapping(filp->f_mapping); 545 546 /* NOTE: special case 547 * If we're signalled while cleaning up locks on process exit, we 548 * still need to complete the unlock. 549 */ 550 lock_kernel(); 551 /* Use local locking if mounted with "-onolock" */ 552 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) 553 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 554 else 555 status = do_vfs_lock(filp, fl); 556 unlock_kernel(); 557 return status; 558 } 559 560 static int do_setlk(struct file *filp, int cmd, struct file_lock *fl) 561 { 562 struct inode *inode = filp->f_mapping->host; 563 int status; 564 565 /* 566 * Flush all pending writes before doing anything 567 * with locks.. 568 */ 569 status = nfs_sync_mapping(filp->f_mapping); 570 if (status != 0) 571 goto out; 572 573 lock_kernel(); 574 /* Use local locking if mounted with "-onolock" */ 575 if (!(NFS_SERVER(inode)->flags & NFS_MOUNT_NONLM)) { 576 status = NFS_PROTO(inode)->lock(filp, cmd, fl); 577 /* If we were signalled we still need to ensure that 578 * we clean up any state on the server. We therefore 579 * record the lock call as having succeeded in order to 580 * ensure that locks_remove_posix() cleans it out when 581 * the process exits. 582 */ 583 if (status == -EINTR || status == -ERESTARTSYS) 584 do_vfs_lock(filp, fl); 585 } else 586 status = do_vfs_lock(filp, fl); 587 unlock_kernel(); 588 if (status < 0) 589 goto out; 590 /* 591 * Make sure we clear the cache whenever we try to get the lock. 592 * This makes locking act as a cache coherency point. 593 */ 594 nfs_sync_mapping(filp->f_mapping); 595 nfs_zap_caches(inode); 596 out: 597 return status; 598 } 599 600 /* 601 * Lock a (portion of) a file 602 */ 603 static int nfs_lock(struct file *filp, int cmd, struct file_lock *fl) 604 { 605 struct inode * inode = filp->f_mapping->host; 606 607 dprintk("NFS: nfs_lock(f=%s/%ld, t=%x, fl=%x, r=%Ld:%Ld)\n", 608 inode->i_sb->s_id, inode->i_ino, 609 fl->fl_type, fl->fl_flags, 610 (long long)fl->fl_start, (long long)fl->fl_end); 611 nfs_inc_stats(inode, NFSIOS_VFSLOCK); 612 613 /* No mandatory locks over NFS */ 614 if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK) 615 return -ENOLCK; 616 617 if (IS_GETLK(cmd)) 618 return do_getlk(filp, cmd, fl); 619 if (fl->fl_type == F_UNLCK) 620 return do_unlk(filp, cmd, fl); 621 return do_setlk(filp, cmd, fl); 622 } 623 624 /* 625 * Lock a (portion of) a file 626 */ 627 static int nfs_flock(struct file *filp, int cmd, struct file_lock *fl) 628 { 629 dprintk("NFS: nfs_flock(f=%s/%ld, t=%x, fl=%x)\n", 630 filp->f_path.dentry->d_inode->i_sb->s_id, 631 filp->f_path.dentry->d_inode->i_ino, 632 fl->fl_type, fl->fl_flags); 633 634 /* 635 * No BSD flocks over NFS allowed. 636 * Note: we could try to fake a POSIX lock request here by 637 * using ((u32) filp | 0x80000000) or some such as the pid. 638 * Not sure whether that would be unique, though, or whether 639 * that would break in other places. 640 */ 641 if (!(fl->fl_flags & FL_FLOCK)) 642 return -ENOLCK; 643 644 /* We're simulating flock() locks using posix locks on the server */ 645 fl->fl_owner = (fl_owner_t)filp; 646 fl->fl_start = 0; 647 fl->fl_end = OFFSET_MAX; 648 649 if (fl->fl_type == F_UNLCK) 650 return do_unlk(filp, cmd, fl); 651 return do_setlk(filp, cmd, fl); 652 } 653 654 static int nfs_setlease(struct file *file, long arg, struct file_lock **fl) 655 { 656 /* 657 * There is no protocol support for leases, so we have no way 658 * to implement them correctly in the face of opens by other 659 * clients. 660 */ 661 return -EINVAL; 662 } 663