1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/smp_lock.h> 13 #include <linux/fsnotify.h> 14 #include <linux/security.h> 15 #include <linux/module.h> 16 #include <linux/syscalls.h> 17 #include <linux/pagemap.h> 18 #include <linux/splice.h> 19 #include "read_write.h" 20 21 #include <asm/uaccess.h> 22 #include <asm/unistd.h> 23 24 const struct file_operations generic_ro_fops = { 25 .llseek = generic_file_llseek, 26 .read = do_sync_read, 27 .aio_read = generic_file_aio_read, 28 .mmap = generic_file_readonly_mmap, 29 .splice_read = generic_file_splice_read, 30 }; 31 32 EXPORT_SYMBOL(generic_ro_fops); 33 34 /** 35 * generic_file_llseek_unlocked - lockless generic llseek implementation 36 * @file: file structure to seek on 37 * @offset: file offset to seek to 38 * @origin: type of seek 39 * 40 * Updates the file offset to the value specified by @offset and @origin. 41 * Locking must be provided by the caller. 42 */ 43 loff_t 44 generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) 45 { 46 struct inode *inode = file->f_mapping->host; 47 48 switch (origin) { 49 case SEEK_END: 50 offset += inode->i_size; 51 break; 52 case SEEK_CUR: 53 /* 54 * Here we special-case the lseek(fd, 0, SEEK_CUR) 55 * position-querying operation. Avoid rewriting the "same" 56 * f_pos value back to the file because a concurrent read(), 57 * write() or lseek() might have altered it 58 */ 59 if (offset == 0) 60 return file->f_pos; 61 offset += file->f_pos; 62 break; 63 } 64 65 if (offset < 0 || offset > inode->i_sb->s_maxbytes) 66 return -EINVAL; 67 68 /* Special lock needed here? */ 69 if (offset != file->f_pos) { 70 file->f_pos = offset; 71 file->f_version = 0; 72 } 73 74 return offset; 75 } 76 EXPORT_SYMBOL(generic_file_llseek_unlocked); 77 78 /** 79 * generic_file_llseek - generic llseek implementation for regular files 80 * @file: file structure to seek on 81 * @offset: file offset to seek to 82 * @origin: type of seek 83 * 84 * This is a generic implemenation of ->llseek useable for all normal local 85 * filesystems. It just updates the file offset to the value specified by 86 * @offset and @origin under i_mutex. 87 */ 88 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 89 { 90 loff_t rval; 91 92 mutex_lock(&file->f_dentry->d_inode->i_mutex); 93 rval = generic_file_llseek_unlocked(file, offset, origin); 94 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 95 96 return rval; 97 } 98 EXPORT_SYMBOL(generic_file_llseek); 99 100 /** 101 * noop_llseek - No Operation Performed llseek implementation 102 * @file: file structure to seek on 103 * @offset: file offset to seek to 104 * @origin: type of seek 105 * 106 * This is an implementation of ->llseek useable for the rare special case when 107 * userspace expects the seek to succeed but the (device) file is actually not 108 * able to perform the seek. In this case you use noop_llseek() instead of 109 * falling back to the default implementation of ->llseek. 110 */ 111 loff_t noop_llseek(struct file *file, loff_t offset, int origin) 112 { 113 return file->f_pos; 114 } 115 EXPORT_SYMBOL(noop_llseek); 116 117 loff_t no_llseek(struct file *file, loff_t offset, int origin) 118 { 119 return -ESPIPE; 120 } 121 EXPORT_SYMBOL(no_llseek); 122 123 loff_t default_llseek(struct file *file, loff_t offset, int origin) 124 { 125 loff_t retval; 126 127 mutex_lock(&file->f_dentry->d_inode->i_mutex); 128 switch (origin) { 129 case SEEK_END: 130 offset += i_size_read(file->f_path.dentry->d_inode); 131 break; 132 case SEEK_CUR: 133 if (offset == 0) { 134 retval = file->f_pos; 135 goto out; 136 } 137 offset += file->f_pos; 138 } 139 retval = -EINVAL; 140 if (offset >= 0) { 141 if (offset != file->f_pos) { 142 file->f_pos = offset; 143 file->f_version = 0; 144 } 145 retval = offset; 146 } 147 out: 148 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 149 return retval; 150 } 151 EXPORT_SYMBOL(default_llseek); 152 153 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 154 { 155 loff_t (*fn)(struct file *, loff_t, int); 156 157 fn = no_llseek; 158 if (file->f_mode & FMODE_LSEEK) { 159 if (file->f_op && file->f_op->llseek) 160 fn = file->f_op->llseek; 161 } 162 return fn(file, offset, origin); 163 } 164 EXPORT_SYMBOL(vfs_llseek); 165 166 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 167 { 168 off_t retval; 169 struct file * file; 170 int fput_needed; 171 172 retval = -EBADF; 173 file = fget_light(fd, &fput_needed); 174 if (!file) 175 goto bad; 176 177 retval = -EINVAL; 178 if (origin <= SEEK_MAX) { 179 loff_t res = vfs_llseek(file, offset, origin); 180 retval = res; 181 if (res != (loff_t)retval) 182 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 183 } 184 fput_light(file, fput_needed); 185 bad: 186 return retval; 187 } 188 189 #ifdef __ARCH_WANT_SYS_LLSEEK 190 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 191 unsigned long, offset_low, loff_t __user *, result, 192 unsigned int, origin) 193 { 194 int retval; 195 struct file * file; 196 loff_t offset; 197 int fput_needed; 198 199 retval = -EBADF; 200 file = fget_light(fd, &fput_needed); 201 if (!file) 202 goto bad; 203 204 retval = -EINVAL; 205 if (origin > SEEK_MAX) 206 goto out_putf; 207 208 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 209 origin); 210 211 retval = (int)offset; 212 if (offset >= 0) { 213 retval = -EFAULT; 214 if (!copy_to_user(result, &offset, sizeof(offset))) 215 retval = 0; 216 } 217 out_putf: 218 fput_light(file, fput_needed); 219 bad: 220 return retval; 221 } 222 #endif 223 224 /* 225 * rw_verify_area doesn't like huge counts. We limit 226 * them to something that fits in "int" so that others 227 * won't have to do range checks all the time. 228 */ 229 #define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) 230 231 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 232 { 233 struct inode *inode; 234 loff_t pos; 235 int retval = -EINVAL; 236 237 inode = file->f_path.dentry->d_inode; 238 if (unlikely((ssize_t) count < 0)) 239 return retval; 240 pos = *ppos; 241 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 242 return retval; 243 244 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 245 retval = locks_mandatory_area( 246 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 247 inode, file, pos, count); 248 if (retval < 0) 249 return retval; 250 } 251 retval = security_file_permission(file, 252 read_write == READ ? MAY_READ : MAY_WRITE); 253 if (retval) 254 return retval; 255 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 256 } 257 258 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 259 { 260 set_current_state(TASK_UNINTERRUPTIBLE); 261 if (!kiocbIsKicked(iocb)) 262 schedule(); 263 else 264 kiocbClearKicked(iocb); 265 __set_current_state(TASK_RUNNING); 266 } 267 268 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 269 { 270 struct iovec iov = { .iov_base = buf, .iov_len = len }; 271 struct kiocb kiocb; 272 ssize_t ret; 273 274 init_sync_kiocb(&kiocb, filp); 275 kiocb.ki_pos = *ppos; 276 kiocb.ki_left = len; 277 kiocb.ki_nbytes = len; 278 279 for (;;) { 280 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 281 if (ret != -EIOCBRETRY) 282 break; 283 wait_on_retry_sync_kiocb(&kiocb); 284 } 285 286 if (-EIOCBQUEUED == ret) 287 ret = wait_on_sync_kiocb(&kiocb); 288 *ppos = kiocb.ki_pos; 289 return ret; 290 } 291 292 EXPORT_SYMBOL(do_sync_read); 293 294 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 295 { 296 ssize_t ret; 297 298 if (!(file->f_mode & FMODE_READ)) 299 return -EBADF; 300 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 301 return -EINVAL; 302 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 303 return -EFAULT; 304 305 ret = rw_verify_area(READ, file, pos, count); 306 if (ret >= 0) { 307 count = ret; 308 if (file->f_op->read) 309 ret = file->f_op->read(file, buf, count, pos); 310 else 311 ret = do_sync_read(file, buf, count, pos); 312 if (ret > 0) { 313 fsnotify_access(file); 314 add_rchar(current, ret); 315 } 316 inc_syscr(current); 317 } 318 319 return ret; 320 } 321 322 EXPORT_SYMBOL(vfs_read); 323 324 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 325 { 326 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 327 struct kiocb kiocb; 328 ssize_t ret; 329 330 init_sync_kiocb(&kiocb, filp); 331 kiocb.ki_pos = *ppos; 332 kiocb.ki_left = len; 333 kiocb.ki_nbytes = len; 334 335 for (;;) { 336 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 337 if (ret != -EIOCBRETRY) 338 break; 339 wait_on_retry_sync_kiocb(&kiocb); 340 } 341 342 if (-EIOCBQUEUED == ret) 343 ret = wait_on_sync_kiocb(&kiocb); 344 *ppos = kiocb.ki_pos; 345 return ret; 346 } 347 348 EXPORT_SYMBOL(do_sync_write); 349 350 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 351 { 352 ssize_t ret; 353 354 if (!(file->f_mode & FMODE_WRITE)) 355 return -EBADF; 356 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 357 return -EINVAL; 358 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 359 return -EFAULT; 360 361 ret = rw_verify_area(WRITE, file, pos, count); 362 if (ret >= 0) { 363 count = ret; 364 if (file->f_op->write) 365 ret = file->f_op->write(file, buf, count, pos); 366 else 367 ret = do_sync_write(file, buf, count, pos); 368 if (ret > 0) { 369 fsnotify_modify(file); 370 add_wchar(current, ret); 371 } 372 inc_syscw(current); 373 } 374 375 return ret; 376 } 377 378 EXPORT_SYMBOL(vfs_write); 379 380 static inline loff_t file_pos_read(struct file *file) 381 { 382 return file->f_pos; 383 } 384 385 static inline void file_pos_write(struct file *file, loff_t pos) 386 { 387 file->f_pos = pos; 388 } 389 390 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 391 { 392 struct file *file; 393 ssize_t ret = -EBADF; 394 int fput_needed; 395 396 file = fget_light(fd, &fput_needed); 397 if (file) { 398 loff_t pos = file_pos_read(file); 399 ret = vfs_read(file, buf, count, &pos); 400 file_pos_write(file, pos); 401 fput_light(file, fput_needed); 402 } 403 404 return ret; 405 } 406 407 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 408 size_t, count) 409 { 410 struct file *file; 411 ssize_t ret = -EBADF; 412 int fput_needed; 413 414 file = fget_light(fd, &fput_needed); 415 if (file) { 416 loff_t pos = file_pos_read(file); 417 ret = vfs_write(file, buf, count, &pos); 418 file_pos_write(file, pos); 419 fput_light(file, fput_needed); 420 } 421 422 return ret; 423 } 424 425 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 426 size_t count, loff_t pos) 427 { 428 struct file *file; 429 ssize_t ret = -EBADF; 430 int fput_needed; 431 432 if (pos < 0) 433 return -EINVAL; 434 435 file = fget_light(fd, &fput_needed); 436 if (file) { 437 ret = -ESPIPE; 438 if (file->f_mode & FMODE_PREAD) 439 ret = vfs_read(file, buf, count, &pos); 440 fput_light(file, fput_needed); 441 } 442 443 return ret; 444 } 445 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 446 asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) 447 { 448 return SYSC_pread64((unsigned int) fd, (char __user *) buf, 449 (size_t) count, pos); 450 } 451 SYSCALL_ALIAS(sys_pread64, SyS_pread64); 452 #endif 453 454 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 455 size_t count, loff_t pos) 456 { 457 struct file *file; 458 ssize_t ret = -EBADF; 459 int fput_needed; 460 461 if (pos < 0) 462 return -EINVAL; 463 464 file = fget_light(fd, &fput_needed); 465 if (file) { 466 ret = -ESPIPE; 467 if (file->f_mode & FMODE_PWRITE) 468 ret = vfs_write(file, buf, count, &pos); 469 fput_light(file, fput_needed); 470 } 471 472 return ret; 473 } 474 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 475 asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) 476 { 477 return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, 478 (size_t) count, pos); 479 } 480 SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); 481 #endif 482 483 /* 484 * Reduce an iovec's length in-place. Return the resulting number of segments 485 */ 486 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 487 { 488 unsigned long seg = 0; 489 size_t len = 0; 490 491 while (seg < nr_segs) { 492 seg++; 493 if (len + iov->iov_len >= to) { 494 iov->iov_len = to - len; 495 break; 496 } 497 len += iov->iov_len; 498 iov++; 499 } 500 return seg; 501 } 502 EXPORT_SYMBOL(iov_shorten); 503 504 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 505 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 506 { 507 struct kiocb kiocb; 508 ssize_t ret; 509 510 init_sync_kiocb(&kiocb, filp); 511 kiocb.ki_pos = *ppos; 512 kiocb.ki_left = len; 513 kiocb.ki_nbytes = len; 514 515 for (;;) { 516 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 517 if (ret != -EIOCBRETRY) 518 break; 519 wait_on_retry_sync_kiocb(&kiocb); 520 } 521 522 if (ret == -EIOCBQUEUED) 523 ret = wait_on_sync_kiocb(&kiocb); 524 *ppos = kiocb.ki_pos; 525 return ret; 526 } 527 528 /* Do it by hand, with file-ops */ 529 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 530 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 531 { 532 struct iovec *vector = iov; 533 ssize_t ret = 0; 534 535 while (nr_segs > 0) { 536 void __user *base; 537 size_t len; 538 ssize_t nr; 539 540 base = vector->iov_base; 541 len = vector->iov_len; 542 vector++; 543 nr_segs--; 544 545 nr = fn(filp, base, len, ppos); 546 547 if (nr < 0) { 548 if (!ret) 549 ret = nr; 550 break; 551 } 552 ret += nr; 553 if (nr != len) 554 break; 555 } 556 557 return ret; 558 } 559 560 /* A write operation does a read from user space and vice versa */ 561 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 562 563 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 564 unsigned long nr_segs, unsigned long fast_segs, 565 struct iovec *fast_pointer, 566 struct iovec **ret_pointer) 567 { 568 unsigned long seg; 569 ssize_t ret; 570 struct iovec *iov = fast_pointer; 571 572 /* 573 * SuS says "The readv() function *may* fail if the iovcnt argument 574 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 575 * traditionally returned zero for zero segments, so... 576 */ 577 if (nr_segs == 0) { 578 ret = 0; 579 goto out; 580 } 581 582 /* 583 * First get the "struct iovec" from user memory and 584 * verify all the pointers 585 */ 586 if (nr_segs > UIO_MAXIOV) { 587 ret = -EINVAL; 588 goto out; 589 } 590 if (nr_segs > fast_segs) { 591 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 592 if (iov == NULL) { 593 ret = -ENOMEM; 594 goto out; 595 } 596 } 597 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 598 ret = -EFAULT; 599 goto out; 600 } 601 602 /* 603 * According to the Single Unix Specification we should return EINVAL 604 * if an element length is < 0 when cast to ssize_t or if the 605 * total length would overflow the ssize_t return value of the 606 * system call. 607 */ 608 ret = 0; 609 for (seg = 0; seg < nr_segs; seg++) { 610 void __user *buf = iov[seg].iov_base; 611 ssize_t len = (ssize_t)iov[seg].iov_len; 612 613 /* see if we we're about to use an invalid len or if 614 * it's about to overflow ssize_t */ 615 if (len < 0 || (ret + len < ret)) { 616 ret = -EINVAL; 617 goto out; 618 } 619 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { 620 ret = -EFAULT; 621 goto out; 622 } 623 624 ret += len; 625 } 626 out: 627 *ret_pointer = iov; 628 return ret; 629 } 630 631 static ssize_t do_readv_writev(int type, struct file *file, 632 const struct iovec __user * uvector, 633 unsigned long nr_segs, loff_t *pos) 634 { 635 size_t tot_len; 636 struct iovec iovstack[UIO_FASTIOV]; 637 struct iovec *iov = iovstack; 638 ssize_t ret; 639 io_fn_t fn; 640 iov_fn_t fnv; 641 642 if (!file->f_op) { 643 ret = -EINVAL; 644 goto out; 645 } 646 647 ret = rw_copy_check_uvector(type, uvector, nr_segs, 648 ARRAY_SIZE(iovstack), iovstack, &iov); 649 if (ret <= 0) 650 goto out; 651 652 tot_len = ret; 653 ret = rw_verify_area(type, file, pos, tot_len); 654 if (ret < 0) 655 goto out; 656 657 fnv = NULL; 658 if (type == READ) { 659 fn = file->f_op->read; 660 fnv = file->f_op->aio_read; 661 } else { 662 fn = (io_fn_t)file->f_op->write; 663 fnv = file->f_op->aio_write; 664 } 665 666 if (fnv) 667 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 668 pos, fnv); 669 else 670 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 671 672 out: 673 if (iov != iovstack) 674 kfree(iov); 675 if ((ret + (type == READ)) > 0) { 676 if (type == READ) 677 fsnotify_access(file); 678 else 679 fsnotify_modify(file); 680 } 681 return ret; 682 } 683 684 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 685 unsigned long vlen, loff_t *pos) 686 { 687 if (!(file->f_mode & FMODE_READ)) 688 return -EBADF; 689 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 690 return -EINVAL; 691 692 return do_readv_writev(READ, file, vec, vlen, pos); 693 } 694 695 EXPORT_SYMBOL(vfs_readv); 696 697 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 698 unsigned long vlen, loff_t *pos) 699 { 700 if (!(file->f_mode & FMODE_WRITE)) 701 return -EBADF; 702 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 703 return -EINVAL; 704 705 return do_readv_writev(WRITE, file, vec, vlen, pos); 706 } 707 708 EXPORT_SYMBOL(vfs_writev); 709 710 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 711 unsigned long, vlen) 712 { 713 struct file *file; 714 ssize_t ret = -EBADF; 715 int fput_needed; 716 717 file = fget_light(fd, &fput_needed); 718 if (file) { 719 loff_t pos = file_pos_read(file); 720 ret = vfs_readv(file, vec, vlen, &pos); 721 file_pos_write(file, pos); 722 fput_light(file, fput_needed); 723 } 724 725 if (ret > 0) 726 add_rchar(current, ret); 727 inc_syscr(current); 728 return ret; 729 } 730 731 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 732 unsigned long, vlen) 733 { 734 struct file *file; 735 ssize_t ret = -EBADF; 736 int fput_needed; 737 738 file = fget_light(fd, &fput_needed); 739 if (file) { 740 loff_t pos = file_pos_read(file); 741 ret = vfs_writev(file, vec, vlen, &pos); 742 file_pos_write(file, pos); 743 fput_light(file, fput_needed); 744 } 745 746 if (ret > 0) 747 add_wchar(current, ret); 748 inc_syscw(current); 749 return ret; 750 } 751 752 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 753 { 754 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 755 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 756 } 757 758 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 759 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 760 { 761 loff_t pos = pos_from_hilo(pos_h, pos_l); 762 struct file *file; 763 ssize_t ret = -EBADF; 764 int fput_needed; 765 766 if (pos < 0) 767 return -EINVAL; 768 769 file = fget_light(fd, &fput_needed); 770 if (file) { 771 ret = -ESPIPE; 772 if (file->f_mode & FMODE_PREAD) 773 ret = vfs_readv(file, vec, vlen, &pos); 774 fput_light(file, fput_needed); 775 } 776 777 if (ret > 0) 778 add_rchar(current, ret); 779 inc_syscr(current); 780 return ret; 781 } 782 783 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 784 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 785 { 786 loff_t pos = pos_from_hilo(pos_h, pos_l); 787 struct file *file; 788 ssize_t ret = -EBADF; 789 int fput_needed; 790 791 if (pos < 0) 792 return -EINVAL; 793 794 file = fget_light(fd, &fput_needed); 795 if (file) { 796 ret = -ESPIPE; 797 if (file->f_mode & FMODE_PWRITE) 798 ret = vfs_writev(file, vec, vlen, &pos); 799 fput_light(file, fput_needed); 800 } 801 802 if (ret > 0) 803 add_wchar(current, ret); 804 inc_syscw(current); 805 return ret; 806 } 807 808 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 809 size_t count, loff_t max) 810 { 811 struct file * in_file, * out_file; 812 struct inode * in_inode, * out_inode; 813 loff_t pos; 814 ssize_t retval; 815 int fput_needed_in, fput_needed_out, fl; 816 817 /* 818 * Get input file, and verify that it is ok.. 819 */ 820 retval = -EBADF; 821 in_file = fget_light(in_fd, &fput_needed_in); 822 if (!in_file) 823 goto out; 824 if (!(in_file->f_mode & FMODE_READ)) 825 goto fput_in; 826 retval = -ESPIPE; 827 if (!ppos) 828 ppos = &in_file->f_pos; 829 else 830 if (!(in_file->f_mode & FMODE_PREAD)) 831 goto fput_in; 832 retval = rw_verify_area(READ, in_file, ppos, count); 833 if (retval < 0) 834 goto fput_in; 835 count = retval; 836 837 /* 838 * Get output file, and verify that it is ok.. 839 */ 840 retval = -EBADF; 841 out_file = fget_light(out_fd, &fput_needed_out); 842 if (!out_file) 843 goto fput_in; 844 if (!(out_file->f_mode & FMODE_WRITE)) 845 goto fput_out; 846 retval = -EINVAL; 847 in_inode = in_file->f_path.dentry->d_inode; 848 out_inode = out_file->f_path.dentry->d_inode; 849 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 850 if (retval < 0) 851 goto fput_out; 852 count = retval; 853 854 if (!max) 855 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 856 857 pos = *ppos; 858 if (unlikely(pos + count > max)) { 859 retval = -EOVERFLOW; 860 if (pos >= max) 861 goto fput_out; 862 count = max - pos; 863 } 864 865 fl = 0; 866 #if 0 867 /* 868 * We need to debate whether we can enable this or not. The 869 * man page documents EAGAIN return for the output at least, 870 * and the application is arguably buggy if it doesn't expect 871 * EAGAIN on a non-blocking file descriptor. 872 */ 873 if (in_file->f_flags & O_NONBLOCK) 874 fl = SPLICE_F_NONBLOCK; 875 #endif 876 retval = do_splice_direct(in_file, ppos, out_file, count, fl); 877 878 if (retval > 0) { 879 add_rchar(current, retval); 880 add_wchar(current, retval); 881 } 882 883 inc_syscr(current); 884 inc_syscw(current); 885 if (*ppos > max) 886 retval = -EOVERFLOW; 887 888 fput_out: 889 fput_light(out_file, fput_needed_out); 890 fput_in: 891 fput_light(in_file, fput_needed_in); 892 out: 893 return retval; 894 } 895 896 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 897 { 898 loff_t pos; 899 off_t off; 900 ssize_t ret; 901 902 if (offset) { 903 if (unlikely(get_user(off, offset))) 904 return -EFAULT; 905 pos = off; 906 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 907 if (unlikely(put_user(pos, offset))) 908 return -EFAULT; 909 return ret; 910 } 911 912 return do_sendfile(out_fd, in_fd, NULL, count, 0); 913 } 914 915 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 916 { 917 loff_t pos; 918 ssize_t ret; 919 920 if (offset) { 921 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 922 return -EFAULT; 923 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 924 if (unlikely(put_user(pos, offset))) 925 return -EFAULT; 926 return ret; 927 } 928 929 return do_sendfile(out_fd, in_fd, NULL, count, 0); 930 } 931