1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/fsnotify.h> 13 #include <linux/security.h> 14 #include <linux/module.h> 15 #include <linux/syscalls.h> 16 #include <linux/pagemap.h> 17 #include <linux/splice.h> 18 #include "read_write.h" 19 20 #include <asm/uaccess.h> 21 #include <asm/unistd.h> 22 23 const struct file_operations generic_ro_fops = { 24 .llseek = generic_file_llseek, 25 .read = do_sync_read, 26 .aio_read = generic_file_aio_read, 27 .mmap = generic_file_readonly_mmap, 28 .splice_read = generic_file_splice_read, 29 }; 30 31 EXPORT_SYMBOL(generic_ro_fops); 32 33 static int 34 __negative_fpos_check(struct file *file, loff_t pos, size_t count) 35 { 36 /* 37 * pos or pos+count is negative here, check overflow. 38 * too big "count" will be caught in rw_verify_area(). 39 */ 40 if ((pos < 0) && (pos + count < pos)) 41 return -EOVERFLOW; 42 if (file->f_mode & FMODE_UNSIGNED_OFFSET) 43 return 0; 44 return -EINVAL; 45 } 46 47 /** 48 * generic_file_llseek_unlocked - lockless generic llseek implementation 49 * @file: file structure to seek on 50 * @offset: file offset to seek to 51 * @origin: type of seek 52 * 53 * Updates the file offset to the value specified by @offset and @origin. 54 * Locking must be provided by the caller. 55 */ 56 loff_t 57 generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) 58 { 59 struct inode *inode = file->f_mapping->host; 60 61 switch (origin) { 62 case SEEK_END: 63 offset += inode->i_size; 64 break; 65 case SEEK_CUR: 66 /* 67 * Here we special-case the lseek(fd, 0, SEEK_CUR) 68 * position-querying operation. Avoid rewriting the "same" 69 * f_pos value back to the file because a concurrent read(), 70 * write() or lseek() might have altered it 71 */ 72 if (offset == 0) 73 return file->f_pos; 74 offset += file->f_pos; 75 break; 76 } 77 78 if (offset < 0 && __negative_fpos_check(file, offset, 0)) 79 return -EINVAL; 80 if (offset > inode->i_sb->s_maxbytes) 81 return -EINVAL; 82 83 /* Special lock needed here? */ 84 if (offset != file->f_pos) { 85 file->f_pos = offset; 86 file->f_version = 0; 87 } 88 89 return offset; 90 } 91 EXPORT_SYMBOL(generic_file_llseek_unlocked); 92 93 /** 94 * generic_file_llseek - generic llseek implementation for regular files 95 * @file: file structure to seek on 96 * @offset: file offset to seek to 97 * @origin: type of seek 98 * 99 * This is a generic implemenation of ->llseek useable for all normal local 100 * filesystems. It just updates the file offset to the value specified by 101 * @offset and @origin under i_mutex. 102 */ 103 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 104 { 105 loff_t rval; 106 107 mutex_lock(&file->f_dentry->d_inode->i_mutex); 108 rval = generic_file_llseek_unlocked(file, offset, origin); 109 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 110 111 return rval; 112 } 113 EXPORT_SYMBOL(generic_file_llseek); 114 115 /** 116 * noop_llseek - No Operation Performed llseek implementation 117 * @file: file structure to seek on 118 * @offset: file offset to seek to 119 * @origin: type of seek 120 * 121 * This is an implementation of ->llseek useable for the rare special case when 122 * userspace expects the seek to succeed but the (device) file is actually not 123 * able to perform the seek. In this case you use noop_llseek() instead of 124 * falling back to the default implementation of ->llseek. 125 */ 126 loff_t noop_llseek(struct file *file, loff_t offset, int origin) 127 { 128 return file->f_pos; 129 } 130 EXPORT_SYMBOL(noop_llseek); 131 132 loff_t no_llseek(struct file *file, loff_t offset, int origin) 133 { 134 return -ESPIPE; 135 } 136 EXPORT_SYMBOL(no_llseek); 137 138 loff_t default_llseek(struct file *file, loff_t offset, int origin) 139 { 140 loff_t retval; 141 142 mutex_lock(&file->f_dentry->d_inode->i_mutex); 143 switch (origin) { 144 case SEEK_END: 145 offset += i_size_read(file->f_path.dentry->d_inode); 146 break; 147 case SEEK_CUR: 148 if (offset == 0) { 149 retval = file->f_pos; 150 goto out; 151 } 152 offset += file->f_pos; 153 } 154 retval = -EINVAL; 155 if (offset >= 0 || !__negative_fpos_check(file, offset, 0)) { 156 if (offset != file->f_pos) { 157 file->f_pos = offset; 158 file->f_version = 0; 159 } 160 retval = offset; 161 } 162 out: 163 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 164 return retval; 165 } 166 EXPORT_SYMBOL(default_llseek); 167 168 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 169 { 170 loff_t (*fn)(struct file *, loff_t, int); 171 172 fn = no_llseek; 173 if (file->f_mode & FMODE_LSEEK) { 174 if (file->f_op && file->f_op->llseek) 175 fn = file->f_op->llseek; 176 } 177 return fn(file, offset, origin); 178 } 179 EXPORT_SYMBOL(vfs_llseek); 180 181 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 182 { 183 off_t retval; 184 struct file * file; 185 int fput_needed; 186 187 retval = -EBADF; 188 file = fget_light(fd, &fput_needed); 189 if (!file) 190 goto bad; 191 192 retval = -EINVAL; 193 if (origin <= SEEK_MAX) { 194 loff_t res = vfs_llseek(file, offset, origin); 195 retval = res; 196 if (res != (loff_t)retval) 197 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 198 } 199 fput_light(file, fput_needed); 200 bad: 201 return retval; 202 } 203 204 #ifdef __ARCH_WANT_SYS_LLSEEK 205 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 206 unsigned long, offset_low, loff_t __user *, result, 207 unsigned int, origin) 208 { 209 int retval; 210 struct file * file; 211 loff_t offset; 212 int fput_needed; 213 214 retval = -EBADF; 215 file = fget_light(fd, &fput_needed); 216 if (!file) 217 goto bad; 218 219 retval = -EINVAL; 220 if (origin > SEEK_MAX) 221 goto out_putf; 222 223 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 224 origin); 225 226 retval = (int)offset; 227 if (offset >= 0) { 228 retval = -EFAULT; 229 if (!copy_to_user(result, &offset, sizeof(offset))) 230 retval = 0; 231 } 232 out_putf: 233 fput_light(file, fput_needed); 234 bad: 235 return retval; 236 } 237 #endif 238 239 240 /* 241 * rw_verify_area doesn't like huge counts. We limit 242 * them to something that fits in "int" so that others 243 * won't have to do range checks all the time. 244 */ 245 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 246 { 247 struct inode *inode; 248 loff_t pos; 249 int retval = -EINVAL; 250 251 inode = file->f_path.dentry->d_inode; 252 if (unlikely((ssize_t) count < 0)) 253 return retval; 254 pos = *ppos; 255 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) { 256 retval = __negative_fpos_check(file, pos, count); 257 if (retval) 258 return retval; 259 } 260 261 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 262 retval = locks_mandatory_area( 263 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 264 inode, file, pos, count); 265 if (retval < 0) 266 return retval; 267 } 268 retval = security_file_permission(file, 269 read_write == READ ? MAY_READ : MAY_WRITE); 270 if (retval) 271 return retval; 272 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 273 } 274 275 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 276 { 277 set_current_state(TASK_UNINTERRUPTIBLE); 278 if (!kiocbIsKicked(iocb)) 279 schedule(); 280 else 281 kiocbClearKicked(iocb); 282 __set_current_state(TASK_RUNNING); 283 } 284 285 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 286 { 287 struct iovec iov = { .iov_base = buf, .iov_len = len }; 288 struct kiocb kiocb; 289 ssize_t ret; 290 291 init_sync_kiocb(&kiocb, filp); 292 kiocb.ki_pos = *ppos; 293 kiocb.ki_left = len; 294 kiocb.ki_nbytes = len; 295 296 for (;;) { 297 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 298 if (ret != -EIOCBRETRY) 299 break; 300 wait_on_retry_sync_kiocb(&kiocb); 301 } 302 303 if (-EIOCBQUEUED == ret) 304 ret = wait_on_sync_kiocb(&kiocb); 305 *ppos = kiocb.ki_pos; 306 return ret; 307 } 308 309 EXPORT_SYMBOL(do_sync_read); 310 311 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 312 { 313 ssize_t ret; 314 315 if (!(file->f_mode & FMODE_READ)) 316 return -EBADF; 317 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 318 return -EINVAL; 319 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 320 return -EFAULT; 321 322 ret = rw_verify_area(READ, file, pos, count); 323 if (ret >= 0) { 324 count = ret; 325 if (file->f_op->read) 326 ret = file->f_op->read(file, buf, count, pos); 327 else 328 ret = do_sync_read(file, buf, count, pos); 329 if (ret > 0) { 330 fsnotify_access(file); 331 add_rchar(current, ret); 332 } 333 inc_syscr(current); 334 } 335 336 return ret; 337 } 338 339 EXPORT_SYMBOL(vfs_read); 340 341 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 342 { 343 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 344 struct kiocb kiocb; 345 ssize_t ret; 346 347 init_sync_kiocb(&kiocb, filp); 348 kiocb.ki_pos = *ppos; 349 kiocb.ki_left = len; 350 kiocb.ki_nbytes = len; 351 352 for (;;) { 353 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 354 if (ret != -EIOCBRETRY) 355 break; 356 wait_on_retry_sync_kiocb(&kiocb); 357 } 358 359 if (-EIOCBQUEUED == ret) 360 ret = wait_on_sync_kiocb(&kiocb); 361 *ppos = kiocb.ki_pos; 362 return ret; 363 } 364 365 EXPORT_SYMBOL(do_sync_write); 366 367 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 368 { 369 ssize_t ret; 370 371 if (!(file->f_mode & FMODE_WRITE)) 372 return -EBADF; 373 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 374 return -EINVAL; 375 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 376 return -EFAULT; 377 378 ret = rw_verify_area(WRITE, file, pos, count); 379 if (ret >= 0) { 380 count = ret; 381 if (file->f_op->write) 382 ret = file->f_op->write(file, buf, count, pos); 383 else 384 ret = do_sync_write(file, buf, count, pos); 385 if (ret > 0) { 386 fsnotify_modify(file); 387 add_wchar(current, ret); 388 } 389 inc_syscw(current); 390 } 391 392 return ret; 393 } 394 395 EXPORT_SYMBOL(vfs_write); 396 397 static inline loff_t file_pos_read(struct file *file) 398 { 399 return file->f_pos; 400 } 401 402 static inline void file_pos_write(struct file *file, loff_t pos) 403 { 404 file->f_pos = pos; 405 } 406 407 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 408 { 409 struct file *file; 410 ssize_t ret = -EBADF; 411 int fput_needed; 412 413 file = fget_light(fd, &fput_needed); 414 if (file) { 415 loff_t pos = file_pos_read(file); 416 ret = vfs_read(file, buf, count, &pos); 417 file_pos_write(file, pos); 418 fput_light(file, fput_needed); 419 } 420 421 return ret; 422 } 423 424 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 425 size_t, count) 426 { 427 struct file *file; 428 ssize_t ret = -EBADF; 429 int fput_needed; 430 431 file = fget_light(fd, &fput_needed); 432 if (file) { 433 loff_t pos = file_pos_read(file); 434 ret = vfs_write(file, buf, count, &pos); 435 file_pos_write(file, pos); 436 fput_light(file, fput_needed); 437 } 438 439 return ret; 440 } 441 442 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 443 size_t count, loff_t pos) 444 { 445 struct file *file; 446 ssize_t ret = -EBADF; 447 int fput_needed; 448 449 if (pos < 0) 450 return -EINVAL; 451 452 file = fget_light(fd, &fput_needed); 453 if (file) { 454 ret = -ESPIPE; 455 if (file->f_mode & FMODE_PREAD) 456 ret = vfs_read(file, buf, count, &pos); 457 fput_light(file, fput_needed); 458 } 459 460 return ret; 461 } 462 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 463 asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) 464 { 465 return SYSC_pread64((unsigned int) fd, (char __user *) buf, 466 (size_t) count, pos); 467 } 468 SYSCALL_ALIAS(sys_pread64, SyS_pread64); 469 #endif 470 471 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 472 size_t count, loff_t pos) 473 { 474 struct file *file; 475 ssize_t ret = -EBADF; 476 int fput_needed; 477 478 if (pos < 0) 479 return -EINVAL; 480 481 file = fget_light(fd, &fput_needed); 482 if (file) { 483 ret = -ESPIPE; 484 if (file->f_mode & FMODE_PWRITE) 485 ret = vfs_write(file, buf, count, &pos); 486 fput_light(file, fput_needed); 487 } 488 489 return ret; 490 } 491 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 492 asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) 493 { 494 return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, 495 (size_t) count, pos); 496 } 497 SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); 498 #endif 499 500 /* 501 * Reduce an iovec's length in-place. Return the resulting number of segments 502 */ 503 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 504 { 505 unsigned long seg = 0; 506 size_t len = 0; 507 508 while (seg < nr_segs) { 509 seg++; 510 if (len + iov->iov_len >= to) { 511 iov->iov_len = to - len; 512 break; 513 } 514 len += iov->iov_len; 515 iov++; 516 } 517 return seg; 518 } 519 EXPORT_SYMBOL(iov_shorten); 520 521 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 522 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 523 { 524 struct kiocb kiocb; 525 ssize_t ret; 526 527 init_sync_kiocb(&kiocb, filp); 528 kiocb.ki_pos = *ppos; 529 kiocb.ki_left = len; 530 kiocb.ki_nbytes = len; 531 532 for (;;) { 533 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 534 if (ret != -EIOCBRETRY) 535 break; 536 wait_on_retry_sync_kiocb(&kiocb); 537 } 538 539 if (ret == -EIOCBQUEUED) 540 ret = wait_on_sync_kiocb(&kiocb); 541 *ppos = kiocb.ki_pos; 542 return ret; 543 } 544 545 /* Do it by hand, with file-ops */ 546 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 547 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 548 { 549 struct iovec *vector = iov; 550 ssize_t ret = 0; 551 552 while (nr_segs > 0) { 553 void __user *base; 554 size_t len; 555 ssize_t nr; 556 557 base = vector->iov_base; 558 len = vector->iov_len; 559 vector++; 560 nr_segs--; 561 562 nr = fn(filp, base, len, ppos); 563 564 if (nr < 0) { 565 if (!ret) 566 ret = nr; 567 break; 568 } 569 ret += nr; 570 if (nr != len) 571 break; 572 } 573 574 return ret; 575 } 576 577 /* A write operation does a read from user space and vice versa */ 578 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 579 580 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 581 unsigned long nr_segs, unsigned long fast_segs, 582 struct iovec *fast_pointer, 583 struct iovec **ret_pointer) 584 { 585 unsigned long seg; 586 ssize_t ret; 587 struct iovec *iov = fast_pointer; 588 589 /* 590 * SuS says "The readv() function *may* fail if the iovcnt argument 591 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 592 * traditionally returned zero for zero segments, so... 593 */ 594 if (nr_segs == 0) { 595 ret = 0; 596 goto out; 597 } 598 599 /* 600 * First get the "struct iovec" from user memory and 601 * verify all the pointers 602 */ 603 if (nr_segs > UIO_MAXIOV) { 604 ret = -EINVAL; 605 goto out; 606 } 607 if (nr_segs > fast_segs) { 608 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 609 if (iov == NULL) { 610 ret = -ENOMEM; 611 goto out; 612 } 613 } 614 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 615 ret = -EFAULT; 616 goto out; 617 } 618 619 /* 620 * According to the Single Unix Specification we should return EINVAL 621 * if an element length is < 0 when cast to ssize_t or if the 622 * total length would overflow the ssize_t return value of the 623 * system call. 624 * 625 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 626 * overflow case. 627 */ 628 ret = 0; 629 for (seg = 0; seg < nr_segs; seg++) { 630 void __user *buf = iov[seg].iov_base; 631 ssize_t len = (ssize_t)iov[seg].iov_len; 632 633 /* see if we we're about to use an invalid len or if 634 * it's about to overflow ssize_t */ 635 if (len < 0) { 636 ret = -EINVAL; 637 goto out; 638 } 639 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { 640 ret = -EFAULT; 641 goto out; 642 } 643 if (len > MAX_RW_COUNT - ret) { 644 len = MAX_RW_COUNT - ret; 645 iov[seg].iov_len = len; 646 } 647 ret += len; 648 } 649 out: 650 *ret_pointer = iov; 651 return ret; 652 } 653 654 static ssize_t do_readv_writev(int type, struct file *file, 655 const struct iovec __user * uvector, 656 unsigned long nr_segs, loff_t *pos) 657 { 658 size_t tot_len; 659 struct iovec iovstack[UIO_FASTIOV]; 660 struct iovec *iov = iovstack; 661 ssize_t ret; 662 io_fn_t fn; 663 iov_fn_t fnv; 664 665 if (!file->f_op) { 666 ret = -EINVAL; 667 goto out; 668 } 669 670 ret = rw_copy_check_uvector(type, uvector, nr_segs, 671 ARRAY_SIZE(iovstack), iovstack, &iov); 672 if (ret <= 0) 673 goto out; 674 675 tot_len = ret; 676 ret = rw_verify_area(type, file, pos, tot_len); 677 if (ret < 0) 678 goto out; 679 680 fnv = NULL; 681 if (type == READ) { 682 fn = file->f_op->read; 683 fnv = file->f_op->aio_read; 684 } else { 685 fn = (io_fn_t)file->f_op->write; 686 fnv = file->f_op->aio_write; 687 } 688 689 if (fnv) 690 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 691 pos, fnv); 692 else 693 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 694 695 out: 696 if (iov != iovstack) 697 kfree(iov); 698 if ((ret + (type == READ)) > 0) { 699 if (type == READ) 700 fsnotify_access(file); 701 else 702 fsnotify_modify(file); 703 } 704 return ret; 705 } 706 707 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 708 unsigned long vlen, loff_t *pos) 709 { 710 if (!(file->f_mode & FMODE_READ)) 711 return -EBADF; 712 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 713 return -EINVAL; 714 715 return do_readv_writev(READ, file, vec, vlen, pos); 716 } 717 718 EXPORT_SYMBOL(vfs_readv); 719 720 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 721 unsigned long vlen, loff_t *pos) 722 { 723 if (!(file->f_mode & FMODE_WRITE)) 724 return -EBADF; 725 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 726 return -EINVAL; 727 728 return do_readv_writev(WRITE, file, vec, vlen, pos); 729 } 730 731 EXPORT_SYMBOL(vfs_writev); 732 733 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 734 unsigned long, vlen) 735 { 736 struct file *file; 737 ssize_t ret = -EBADF; 738 int fput_needed; 739 740 file = fget_light(fd, &fput_needed); 741 if (file) { 742 loff_t pos = file_pos_read(file); 743 ret = vfs_readv(file, vec, vlen, &pos); 744 file_pos_write(file, pos); 745 fput_light(file, fput_needed); 746 } 747 748 if (ret > 0) 749 add_rchar(current, ret); 750 inc_syscr(current); 751 return ret; 752 } 753 754 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 755 unsigned long, vlen) 756 { 757 struct file *file; 758 ssize_t ret = -EBADF; 759 int fput_needed; 760 761 file = fget_light(fd, &fput_needed); 762 if (file) { 763 loff_t pos = file_pos_read(file); 764 ret = vfs_writev(file, vec, vlen, &pos); 765 file_pos_write(file, pos); 766 fput_light(file, fput_needed); 767 } 768 769 if (ret > 0) 770 add_wchar(current, ret); 771 inc_syscw(current); 772 return ret; 773 } 774 775 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 776 { 777 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 778 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 779 } 780 781 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 782 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 783 { 784 loff_t pos = pos_from_hilo(pos_h, pos_l); 785 struct file *file; 786 ssize_t ret = -EBADF; 787 int fput_needed; 788 789 if (pos < 0) 790 return -EINVAL; 791 792 file = fget_light(fd, &fput_needed); 793 if (file) { 794 ret = -ESPIPE; 795 if (file->f_mode & FMODE_PREAD) 796 ret = vfs_readv(file, vec, vlen, &pos); 797 fput_light(file, fput_needed); 798 } 799 800 if (ret > 0) 801 add_rchar(current, ret); 802 inc_syscr(current); 803 return ret; 804 } 805 806 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 807 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 808 { 809 loff_t pos = pos_from_hilo(pos_h, pos_l); 810 struct file *file; 811 ssize_t ret = -EBADF; 812 int fput_needed; 813 814 if (pos < 0) 815 return -EINVAL; 816 817 file = fget_light(fd, &fput_needed); 818 if (file) { 819 ret = -ESPIPE; 820 if (file->f_mode & FMODE_PWRITE) 821 ret = vfs_writev(file, vec, vlen, &pos); 822 fput_light(file, fput_needed); 823 } 824 825 if (ret > 0) 826 add_wchar(current, ret); 827 inc_syscw(current); 828 return ret; 829 } 830 831 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 832 size_t count, loff_t max) 833 { 834 struct file * in_file, * out_file; 835 struct inode * in_inode, * out_inode; 836 loff_t pos; 837 ssize_t retval; 838 int fput_needed_in, fput_needed_out, fl; 839 840 /* 841 * Get input file, and verify that it is ok.. 842 */ 843 retval = -EBADF; 844 in_file = fget_light(in_fd, &fput_needed_in); 845 if (!in_file) 846 goto out; 847 if (!(in_file->f_mode & FMODE_READ)) 848 goto fput_in; 849 retval = -ESPIPE; 850 if (!ppos) 851 ppos = &in_file->f_pos; 852 else 853 if (!(in_file->f_mode & FMODE_PREAD)) 854 goto fput_in; 855 retval = rw_verify_area(READ, in_file, ppos, count); 856 if (retval < 0) 857 goto fput_in; 858 count = retval; 859 860 /* 861 * Get output file, and verify that it is ok.. 862 */ 863 retval = -EBADF; 864 out_file = fget_light(out_fd, &fput_needed_out); 865 if (!out_file) 866 goto fput_in; 867 if (!(out_file->f_mode & FMODE_WRITE)) 868 goto fput_out; 869 retval = -EINVAL; 870 in_inode = in_file->f_path.dentry->d_inode; 871 out_inode = out_file->f_path.dentry->d_inode; 872 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 873 if (retval < 0) 874 goto fput_out; 875 count = retval; 876 877 if (!max) 878 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 879 880 pos = *ppos; 881 if (unlikely(pos + count > max)) { 882 retval = -EOVERFLOW; 883 if (pos >= max) 884 goto fput_out; 885 count = max - pos; 886 } 887 888 fl = 0; 889 #if 0 890 /* 891 * We need to debate whether we can enable this or not. The 892 * man page documents EAGAIN return for the output at least, 893 * and the application is arguably buggy if it doesn't expect 894 * EAGAIN on a non-blocking file descriptor. 895 */ 896 if (in_file->f_flags & O_NONBLOCK) 897 fl = SPLICE_F_NONBLOCK; 898 #endif 899 retval = do_splice_direct(in_file, ppos, out_file, count, fl); 900 901 if (retval > 0) { 902 add_rchar(current, retval); 903 add_wchar(current, retval); 904 } 905 906 inc_syscr(current); 907 inc_syscw(current); 908 if (*ppos > max) 909 retval = -EOVERFLOW; 910 911 fput_out: 912 fput_light(out_file, fput_needed_out); 913 fput_in: 914 fput_light(in_file, fput_needed_in); 915 out: 916 return retval; 917 } 918 919 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 920 { 921 loff_t pos; 922 off_t off; 923 ssize_t ret; 924 925 if (offset) { 926 if (unlikely(get_user(off, offset))) 927 return -EFAULT; 928 pos = off; 929 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 930 if (unlikely(put_user(pos, offset))) 931 return -EFAULT; 932 return ret; 933 } 934 935 return do_sendfile(out_fd, in_fd, NULL, count, 0); 936 } 937 938 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 939 { 940 loff_t pos; 941 ssize_t ret; 942 943 if (offset) { 944 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 945 return -EFAULT; 946 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 947 if (unlikely(put_user(pos, offset))) 948 return -EFAULT; 949 return ret; 950 } 951 952 return do_sendfile(out_fd, in_fd, NULL, count, 0); 953 } 954