1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/smp_lock.h> 13 #include <linux/fsnotify.h> 14 #include <linux/security.h> 15 #include <linux/module.h> 16 #include <linux/syscalls.h> 17 #include <linux/pagemap.h> 18 #include <linux/splice.h> 19 #include "read_write.h" 20 21 #include <asm/uaccess.h> 22 #include <asm/unistd.h> 23 24 const struct file_operations generic_ro_fops = { 25 .llseek = generic_file_llseek, 26 .read = do_sync_read, 27 .aio_read = generic_file_aio_read, 28 .mmap = generic_file_readonly_mmap, 29 .splice_read = generic_file_splice_read, 30 }; 31 32 EXPORT_SYMBOL(generic_ro_fops); 33 34 /** 35 * generic_file_llseek_unlocked - lockless generic llseek implementation 36 * @file: file structure to seek on 37 * @offset: file offset to seek to 38 * @origin: type of seek 39 * 40 * Updates the file offset to the value specified by @offset and @origin. 41 * Locking must be provided by the caller. 42 */ 43 loff_t 44 generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) 45 { 46 struct inode *inode = file->f_mapping->host; 47 48 switch (origin) { 49 case SEEK_END: 50 offset += inode->i_size; 51 break; 52 case SEEK_CUR: 53 /* 54 * Here we special-case the lseek(fd, 0, SEEK_CUR) 55 * position-querying operation. Avoid rewriting the "same" 56 * f_pos value back to the file because a concurrent read(), 57 * write() or lseek() might have altered it 58 */ 59 if (offset == 0) 60 return file->f_pos; 61 offset += file->f_pos; 62 break; 63 } 64 65 if (offset < 0 || offset > inode->i_sb->s_maxbytes) 66 return -EINVAL; 67 68 /* Special lock needed here? */ 69 if (offset != file->f_pos) { 70 file->f_pos = offset; 71 file->f_version = 0; 72 } 73 74 return offset; 75 } 76 EXPORT_SYMBOL(generic_file_llseek_unlocked); 77 78 /** 79 * generic_file_llseek - generic llseek implementation for regular files 80 * @file: file structure to seek on 81 * @offset: file offset to seek to 82 * @origin: type of seek 83 * 84 * This is a generic implemenation of ->llseek useable for all normal local 85 * filesystems. It just updates the file offset to the value specified by 86 * @offset and @origin under i_mutex. 87 */ 88 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 89 { 90 loff_t rval; 91 92 mutex_lock(&file->f_dentry->d_inode->i_mutex); 93 rval = generic_file_llseek_unlocked(file, offset, origin); 94 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 95 96 return rval; 97 } 98 EXPORT_SYMBOL(generic_file_llseek); 99 100 /** 101 * noop_llseek - No Operation Performed llseek implementation 102 * @file: file structure to seek on 103 * @offset: file offset to seek to 104 * @origin: type of seek 105 * 106 * This is an implementation of ->llseek useable for the rare special case when 107 * userspace expects the seek to succeed but the (device) file is actually not 108 * able to perform the seek. In this case you use noop_llseek() instead of 109 * falling back to the default implementation of ->llseek. 110 */ 111 loff_t noop_llseek(struct file *file, loff_t offset, int origin) 112 { 113 return file->f_pos; 114 } 115 EXPORT_SYMBOL(noop_llseek); 116 117 loff_t no_llseek(struct file *file, loff_t offset, int origin) 118 { 119 return -ESPIPE; 120 } 121 EXPORT_SYMBOL(no_llseek); 122 123 loff_t default_llseek(struct file *file, loff_t offset, int origin) 124 { 125 loff_t retval; 126 127 lock_kernel(); 128 switch (origin) { 129 case SEEK_END: 130 offset += i_size_read(file->f_path.dentry->d_inode); 131 break; 132 case SEEK_CUR: 133 if (offset == 0) { 134 retval = file->f_pos; 135 goto out; 136 } 137 offset += file->f_pos; 138 } 139 retval = -EINVAL; 140 if (offset >= 0) { 141 if (offset != file->f_pos) { 142 file->f_pos = offset; 143 file->f_version = 0; 144 } 145 retval = offset; 146 } 147 out: 148 unlock_kernel(); 149 return retval; 150 } 151 EXPORT_SYMBOL(default_llseek); 152 153 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 154 { 155 loff_t (*fn)(struct file *, loff_t, int); 156 157 fn = no_llseek; 158 if (file->f_mode & FMODE_LSEEK) { 159 fn = default_llseek; 160 if (file->f_op && file->f_op->llseek) 161 fn = file->f_op->llseek; 162 } 163 return fn(file, offset, origin); 164 } 165 EXPORT_SYMBOL(vfs_llseek); 166 167 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 168 { 169 off_t retval; 170 struct file * file; 171 int fput_needed; 172 173 retval = -EBADF; 174 file = fget_light(fd, &fput_needed); 175 if (!file) 176 goto bad; 177 178 retval = -EINVAL; 179 if (origin <= SEEK_MAX) { 180 loff_t res = vfs_llseek(file, offset, origin); 181 retval = res; 182 if (res != (loff_t)retval) 183 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 184 } 185 fput_light(file, fput_needed); 186 bad: 187 return retval; 188 } 189 190 #ifdef __ARCH_WANT_SYS_LLSEEK 191 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 192 unsigned long, offset_low, loff_t __user *, result, 193 unsigned int, origin) 194 { 195 int retval; 196 struct file * file; 197 loff_t offset; 198 int fput_needed; 199 200 retval = -EBADF; 201 file = fget_light(fd, &fput_needed); 202 if (!file) 203 goto bad; 204 205 retval = -EINVAL; 206 if (origin > SEEK_MAX) 207 goto out_putf; 208 209 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 210 origin); 211 212 retval = (int)offset; 213 if (offset >= 0) { 214 retval = -EFAULT; 215 if (!copy_to_user(result, &offset, sizeof(offset))) 216 retval = 0; 217 } 218 out_putf: 219 fput_light(file, fput_needed); 220 bad: 221 return retval; 222 } 223 #endif 224 225 /* 226 * rw_verify_area doesn't like huge counts. We limit 227 * them to something that fits in "int" so that others 228 * won't have to do range checks all the time. 229 */ 230 #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) 231 232 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 233 { 234 struct inode *inode; 235 loff_t pos; 236 int retval = -EINVAL; 237 238 inode = file->f_path.dentry->d_inode; 239 if (unlikely((ssize_t) count < 0)) 240 return retval; 241 pos = *ppos; 242 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 243 return retval; 244 245 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 246 retval = locks_mandatory_area( 247 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 248 inode, file, pos, count); 249 if (retval < 0) 250 return retval; 251 } 252 retval = security_file_permission(file, 253 read_write == READ ? MAY_READ : MAY_WRITE); 254 if (retval) 255 return retval; 256 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 257 } 258 259 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 260 { 261 set_current_state(TASK_UNINTERRUPTIBLE); 262 if (!kiocbIsKicked(iocb)) 263 schedule(); 264 else 265 kiocbClearKicked(iocb); 266 __set_current_state(TASK_RUNNING); 267 } 268 269 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 270 { 271 struct iovec iov = { .iov_base = buf, .iov_len = len }; 272 struct kiocb kiocb; 273 ssize_t ret; 274 275 init_sync_kiocb(&kiocb, filp); 276 kiocb.ki_pos = *ppos; 277 kiocb.ki_left = len; 278 kiocb.ki_nbytes = len; 279 280 for (;;) { 281 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 282 if (ret != -EIOCBRETRY) 283 break; 284 wait_on_retry_sync_kiocb(&kiocb); 285 } 286 287 if (-EIOCBQUEUED == ret) 288 ret = wait_on_sync_kiocb(&kiocb); 289 *ppos = kiocb.ki_pos; 290 return ret; 291 } 292 293 EXPORT_SYMBOL(do_sync_read); 294 295 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 296 { 297 ssize_t ret; 298 299 if (!(file->f_mode & FMODE_READ)) 300 return -EBADF; 301 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 302 return -EINVAL; 303 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 304 return -EFAULT; 305 306 ret = rw_verify_area(READ, file, pos, count); 307 if (ret >= 0) { 308 count = ret; 309 if (file->f_op->read) 310 ret = file->f_op->read(file, buf, count, pos); 311 else 312 ret = do_sync_read(file, buf, count, pos); 313 if (ret > 0) { 314 fsnotify_access(file); 315 add_rchar(current, ret); 316 } 317 inc_syscr(current); 318 } 319 320 return ret; 321 } 322 323 EXPORT_SYMBOL(vfs_read); 324 325 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 326 { 327 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 328 struct kiocb kiocb; 329 ssize_t ret; 330 331 init_sync_kiocb(&kiocb, filp); 332 kiocb.ki_pos = *ppos; 333 kiocb.ki_left = len; 334 kiocb.ki_nbytes = len; 335 336 for (;;) { 337 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 338 if (ret != -EIOCBRETRY) 339 break; 340 wait_on_retry_sync_kiocb(&kiocb); 341 } 342 343 if (-EIOCBQUEUED == ret) 344 ret = wait_on_sync_kiocb(&kiocb); 345 *ppos = kiocb.ki_pos; 346 return ret; 347 } 348 349 EXPORT_SYMBOL(do_sync_write); 350 351 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 352 { 353 ssize_t ret; 354 355 if (!(file->f_mode & FMODE_WRITE)) 356 return -EBADF; 357 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 358 return -EINVAL; 359 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 360 return -EFAULT; 361 362 ret = rw_verify_area(WRITE, file, pos, count); 363 if (ret >= 0) { 364 count = ret; 365 if (file->f_op->write) 366 ret = file->f_op->write(file, buf, count, pos); 367 else 368 ret = do_sync_write(file, buf, count, pos); 369 if (ret > 0) { 370 fsnotify_modify(file); 371 add_wchar(current, ret); 372 } 373 inc_syscw(current); 374 } 375 376 return ret; 377 } 378 379 EXPORT_SYMBOL(vfs_write); 380 381 static inline loff_t file_pos_read(struct file *file) 382 { 383 return file->f_pos; 384 } 385 386 static inline void file_pos_write(struct file *file, loff_t pos) 387 { 388 file->f_pos = pos; 389 } 390 391 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 392 { 393 struct file *file; 394 ssize_t ret = -EBADF; 395 int fput_needed; 396 397 file = fget_light(fd, &fput_needed); 398 if (file) { 399 loff_t pos = file_pos_read(file); 400 ret = vfs_read(file, buf, count, &pos); 401 file_pos_write(file, pos); 402 fput_light(file, fput_needed); 403 } 404 405 return ret; 406 } 407 408 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 409 size_t, count) 410 { 411 struct file *file; 412 ssize_t ret = -EBADF; 413 int fput_needed; 414 415 file = fget_light(fd, &fput_needed); 416 if (file) { 417 loff_t pos = file_pos_read(file); 418 ret = vfs_write(file, buf, count, &pos); 419 file_pos_write(file, pos); 420 fput_light(file, fput_needed); 421 } 422 423 return ret; 424 } 425 426 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 427 size_t count, loff_t pos) 428 { 429 struct file *file; 430 ssize_t ret = -EBADF; 431 int fput_needed; 432 433 if (pos < 0) 434 return -EINVAL; 435 436 file = fget_light(fd, &fput_needed); 437 if (file) { 438 ret = -ESPIPE; 439 if (file->f_mode & FMODE_PREAD) 440 ret = vfs_read(file, buf, count, &pos); 441 fput_light(file, fput_needed); 442 } 443 444 return ret; 445 } 446 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 447 asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) 448 { 449 return SYSC_pread64((unsigned int) fd, (char __user *) buf, 450 (size_t) count, pos); 451 } 452 SYSCALL_ALIAS(sys_pread64, SyS_pread64); 453 #endif 454 455 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 456 size_t count, loff_t pos) 457 { 458 struct file *file; 459 ssize_t ret = -EBADF; 460 int fput_needed; 461 462 if (pos < 0) 463 return -EINVAL; 464 465 file = fget_light(fd, &fput_needed); 466 if (file) { 467 ret = -ESPIPE; 468 if (file->f_mode & FMODE_PWRITE) 469 ret = vfs_write(file, buf, count, &pos); 470 fput_light(file, fput_needed); 471 } 472 473 return ret; 474 } 475 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 476 asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) 477 { 478 return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, 479 (size_t) count, pos); 480 } 481 SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); 482 #endif 483 484 /* 485 * Reduce an iovec's length in-place. Return the resulting number of segments 486 */ 487 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 488 { 489 unsigned long seg = 0; 490 size_t len = 0; 491 492 while (seg < nr_segs) { 493 seg++; 494 if (len + iov->iov_len >= to) { 495 iov->iov_len = to - len; 496 break; 497 } 498 len += iov->iov_len; 499 iov++; 500 } 501 return seg; 502 } 503 EXPORT_SYMBOL(iov_shorten); 504 505 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 506 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 507 { 508 struct kiocb kiocb; 509 ssize_t ret; 510 511 init_sync_kiocb(&kiocb, filp); 512 kiocb.ki_pos = *ppos; 513 kiocb.ki_left = len; 514 kiocb.ki_nbytes = len; 515 516 for (;;) { 517 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 518 if (ret != -EIOCBRETRY) 519 break; 520 wait_on_retry_sync_kiocb(&kiocb); 521 } 522 523 if (ret == -EIOCBQUEUED) 524 ret = wait_on_sync_kiocb(&kiocb); 525 *ppos = kiocb.ki_pos; 526 return ret; 527 } 528 529 /* Do it by hand, with file-ops */ 530 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 531 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 532 { 533 struct iovec *vector = iov; 534 ssize_t ret = 0; 535 536 while (nr_segs > 0) { 537 void __user *base; 538 size_t len; 539 ssize_t nr; 540 541 base = vector->iov_base; 542 len = vector->iov_len; 543 vector++; 544 nr_segs--; 545 546 nr = fn(filp, base, len, ppos); 547 548 if (nr < 0) { 549 if (!ret) 550 ret = nr; 551 break; 552 } 553 ret += nr; 554 if (nr != len) 555 break; 556 } 557 558 return ret; 559 } 560 561 /* A write operation does a read from user space and vice versa */ 562 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 563 564 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 565 unsigned long nr_segs, unsigned long fast_segs, 566 struct iovec *fast_pointer, 567 struct iovec **ret_pointer) 568 { 569 unsigned long seg; 570 ssize_t ret; 571 struct iovec *iov = fast_pointer; 572 573 /* 574 * SuS says "The readv() function *may* fail if the iovcnt argument 575 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 576 * traditionally returned zero for zero segments, so... 577 */ 578 if (nr_segs == 0) { 579 ret = 0; 580 goto out; 581 } 582 583 /* 584 * First get the "struct iovec" from user memory and 585 * verify all the pointers 586 */ 587 if (nr_segs > UIO_MAXIOV) { 588 ret = -EINVAL; 589 goto out; 590 } 591 if (nr_segs > fast_segs) { 592 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 593 if (iov == NULL) { 594 ret = -ENOMEM; 595 goto out; 596 } 597 } 598 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 599 ret = -EFAULT; 600 goto out; 601 } 602 603 /* 604 * According to the Single Unix Specification we should return EINVAL 605 * if an element length is < 0 when cast to ssize_t or if the 606 * total length would overflow the ssize_t return value of the 607 * system call. 608 */ 609 ret = 0; 610 for (seg = 0; seg < nr_segs; seg++) { 611 void __user *buf = iov[seg].iov_base; 612 ssize_t len = (ssize_t)iov[seg].iov_len; 613 614 /* see if we we're about to use an invalid len or if 615 * it's about to overflow ssize_t */ 616 if (len < 0 || (ret + len < ret)) { 617 ret = -EINVAL; 618 goto out; 619 } 620 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { 621 ret = -EFAULT; 622 goto out; 623 } 624 625 ret += len; 626 } 627 out: 628 *ret_pointer = iov; 629 return ret; 630 } 631 632 static ssize_t do_readv_writev(int type, struct file *file, 633 const struct iovec __user * uvector, 634 unsigned long nr_segs, loff_t *pos) 635 { 636 size_t tot_len; 637 struct iovec iovstack[UIO_FASTIOV]; 638 struct iovec *iov = iovstack; 639 ssize_t ret; 640 io_fn_t fn; 641 iov_fn_t fnv; 642 643 if (!file->f_op) { 644 ret = -EINVAL; 645 goto out; 646 } 647 648 ret = rw_copy_check_uvector(type, uvector, nr_segs, 649 ARRAY_SIZE(iovstack), iovstack, &iov); 650 if (ret <= 0) 651 goto out; 652 653 tot_len = ret; 654 ret = rw_verify_area(type, file, pos, tot_len); 655 if (ret < 0) 656 goto out; 657 658 fnv = NULL; 659 if (type == READ) { 660 fn = file->f_op->read; 661 fnv = file->f_op->aio_read; 662 } else { 663 fn = (io_fn_t)file->f_op->write; 664 fnv = file->f_op->aio_write; 665 } 666 667 if (fnv) 668 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 669 pos, fnv); 670 else 671 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 672 673 out: 674 if (iov != iovstack) 675 kfree(iov); 676 if ((ret + (type == READ)) > 0) { 677 if (type == READ) 678 fsnotify_access(file); 679 else 680 fsnotify_modify(file); 681 } 682 return ret; 683 } 684 685 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 686 unsigned long vlen, loff_t *pos) 687 { 688 if (!(file->f_mode & FMODE_READ)) 689 return -EBADF; 690 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 691 return -EINVAL; 692 693 return do_readv_writev(READ, file, vec, vlen, pos); 694 } 695 696 EXPORT_SYMBOL(vfs_readv); 697 698 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 699 unsigned long vlen, loff_t *pos) 700 { 701 if (!(file->f_mode & FMODE_WRITE)) 702 return -EBADF; 703 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 704 return -EINVAL; 705 706 return do_readv_writev(WRITE, file, vec, vlen, pos); 707 } 708 709 EXPORT_SYMBOL(vfs_writev); 710 711 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 712 unsigned long, vlen) 713 { 714 struct file *file; 715 ssize_t ret = -EBADF; 716 int fput_needed; 717 718 file = fget_light(fd, &fput_needed); 719 if (file) { 720 loff_t pos = file_pos_read(file); 721 ret = vfs_readv(file, vec, vlen, &pos); 722 file_pos_write(file, pos); 723 fput_light(file, fput_needed); 724 } 725 726 if (ret > 0) 727 add_rchar(current, ret); 728 inc_syscr(current); 729 return ret; 730 } 731 732 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 733 unsigned long, vlen) 734 { 735 struct file *file; 736 ssize_t ret = -EBADF; 737 int fput_needed; 738 739 file = fget_light(fd, &fput_needed); 740 if (file) { 741 loff_t pos = file_pos_read(file); 742 ret = vfs_writev(file, vec, vlen, &pos); 743 file_pos_write(file, pos); 744 fput_light(file, fput_needed); 745 } 746 747 if (ret > 0) 748 add_wchar(current, ret); 749 inc_syscw(current); 750 return ret; 751 } 752 753 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 754 { 755 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 756 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 757 } 758 759 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 760 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 761 { 762 loff_t pos = pos_from_hilo(pos_h, pos_l); 763 struct file *file; 764 ssize_t ret = -EBADF; 765 int fput_needed; 766 767 if (pos < 0) 768 return -EINVAL; 769 770 file = fget_light(fd, &fput_needed); 771 if (file) { 772 ret = -ESPIPE; 773 if (file->f_mode & FMODE_PREAD) 774 ret = vfs_readv(file, vec, vlen, &pos); 775 fput_light(file, fput_needed); 776 } 777 778 if (ret > 0) 779 add_rchar(current, ret); 780 inc_syscr(current); 781 return ret; 782 } 783 784 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 785 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 786 { 787 loff_t pos = pos_from_hilo(pos_h, pos_l); 788 struct file *file; 789 ssize_t ret = -EBADF; 790 int fput_needed; 791 792 if (pos < 0) 793 return -EINVAL; 794 795 file = fget_light(fd, &fput_needed); 796 if (file) { 797 ret = -ESPIPE; 798 if (file->f_mode & FMODE_PWRITE) 799 ret = vfs_writev(file, vec, vlen, &pos); 800 fput_light(file, fput_needed); 801 } 802 803 if (ret > 0) 804 add_wchar(current, ret); 805 inc_syscw(current); 806 return ret; 807 } 808 809 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 810 size_t count, loff_t max) 811 { 812 struct file * in_file, * out_file; 813 struct inode * in_inode, * out_inode; 814 loff_t pos; 815 ssize_t retval; 816 int fput_needed_in, fput_needed_out, fl; 817 818 /* 819 * Get input file, and verify that it is ok.. 820 */ 821 retval = -EBADF; 822 in_file = fget_light(in_fd, &fput_needed_in); 823 if (!in_file) 824 goto out; 825 if (!(in_file->f_mode & FMODE_READ)) 826 goto fput_in; 827 retval = -ESPIPE; 828 if (!ppos) 829 ppos = &in_file->f_pos; 830 else 831 if (!(in_file->f_mode & FMODE_PREAD)) 832 goto fput_in; 833 retval = rw_verify_area(READ, in_file, ppos, count); 834 if (retval < 0) 835 goto fput_in; 836 count = retval; 837 838 /* 839 * Get output file, and verify that it is ok.. 840 */ 841 retval = -EBADF; 842 out_file = fget_light(out_fd, &fput_needed_out); 843 if (!out_file) 844 goto fput_in; 845 if (!(out_file->f_mode & FMODE_WRITE)) 846 goto fput_out; 847 retval = -EINVAL; 848 in_inode = in_file->f_path.dentry->d_inode; 849 out_inode = out_file->f_path.dentry->d_inode; 850 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 851 if (retval < 0) 852 goto fput_out; 853 count = retval; 854 855 if (!max) 856 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 857 858 pos = *ppos; 859 if (unlikely(pos + count > max)) { 860 retval = -EOVERFLOW; 861 if (pos >= max) 862 goto fput_out; 863 count = max - pos; 864 } 865 866 fl = 0; 867 #if 0 868 /* 869 * We need to debate whether we can enable this or not. The 870 * man page documents EAGAIN return for the output at least, 871 * and the application is arguably buggy if it doesn't expect 872 * EAGAIN on a non-blocking file descriptor. 873 */ 874 if (in_file->f_flags & O_NONBLOCK) 875 fl = SPLICE_F_NONBLOCK; 876 #endif 877 retval = do_splice_direct(in_file, ppos, out_file, count, fl); 878 879 if (retval > 0) { 880 add_rchar(current, retval); 881 add_wchar(current, retval); 882 } 883 884 inc_syscr(current); 885 inc_syscw(current); 886 if (*ppos > max) 887 retval = -EOVERFLOW; 888 889 fput_out: 890 fput_light(out_file, fput_needed_out); 891 fput_in: 892 fput_light(in_file, fput_needed_in); 893 out: 894 return retval; 895 } 896 897 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 898 { 899 loff_t pos; 900 off_t off; 901 ssize_t ret; 902 903 if (offset) { 904 if (unlikely(get_user(off, offset))) 905 return -EFAULT; 906 pos = off; 907 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 908 if (unlikely(put_user(pos, offset))) 909 return -EFAULT; 910 return ret; 911 } 912 913 return do_sendfile(out_fd, in_fd, NULL, count, 0); 914 } 915 916 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 917 { 918 loff_t pos; 919 ssize_t ret; 920 921 if (offset) { 922 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 923 return -EFAULT; 924 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 925 if (unlikely(put_user(pos, offset))) 926 return -EFAULT; 927 return ret; 928 } 929 930 return do_sendfile(out_fd, in_fd, NULL, count, 0); 931 } 932