1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/smp_lock.h> 13 #include <linux/fsnotify.h> 14 #include <linux/security.h> 15 #include <linux/module.h> 16 #include <linux/syscalls.h> 17 #include <linux/pagemap.h> 18 #include <linux/splice.h> 19 #include "read_write.h" 20 21 #include <asm/uaccess.h> 22 #include <asm/unistd.h> 23 24 const struct file_operations generic_ro_fops = { 25 .llseek = generic_file_llseek, 26 .read = do_sync_read, 27 .aio_read = generic_file_aio_read, 28 .mmap = generic_file_readonly_mmap, 29 .splice_read = generic_file_splice_read, 30 }; 31 32 EXPORT_SYMBOL(generic_ro_fops); 33 34 static int 35 __negative_fpos_check(struct file *file, loff_t pos, size_t count) 36 { 37 /* 38 * pos or pos+count is negative here, check overflow. 39 * too big "count" will be caught in rw_verify_area(). 40 */ 41 if ((pos < 0) && (pos + count < pos)) 42 return -EOVERFLOW; 43 if (file->f_mode & FMODE_UNSIGNED_OFFSET) 44 return 0; 45 return -EINVAL; 46 } 47 48 /** 49 * generic_file_llseek_unlocked - lockless generic llseek implementation 50 * @file: file structure to seek on 51 * @offset: file offset to seek to 52 * @origin: type of seek 53 * 54 * Updates the file offset to the value specified by @offset and @origin. 55 * Locking must be provided by the caller. 56 */ 57 loff_t 58 generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin) 59 { 60 struct inode *inode = file->f_mapping->host; 61 62 switch (origin) { 63 case SEEK_END: 64 offset += inode->i_size; 65 break; 66 case SEEK_CUR: 67 /* 68 * Here we special-case the lseek(fd, 0, SEEK_CUR) 69 * position-querying operation. Avoid rewriting the "same" 70 * f_pos value back to the file because a concurrent read(), 71 * write() or lseek() might have altered it 72 */ 73 if (offset == 0) 74 return file->f_pos; 75 offset += file->f_pos; 76 break; 77 } 78 79 if (offset < 0 && __negative_fpos_check(file, offset, 0)) 80 return -EINVAL; 81 if (offset > inode->i_sb->s_maxbytes) 82 return -EINVAL; 83 84 /* Special lock needed here? */ 85 if (offset != file->f_pos) { 86 file->f_pos = offset; 87 file->f_version = 0; 88 } 89 90 return offset; 91 } 92 EXPORT_SYMBOL(generic_file_llseek_unlocked); 93 94 /** 95 * generic_file_llseek - generic llseek implementation for regular files 96 * @file: file structure to seek on 97 * @offset: file offset to seek to 98 * @origin: type of seek 99 * 100 * This is a generic implemenation of ->llseek useable for all normal local 101 * filesystems. It just updates the file offset to the value specified by 102 * @offset and @origin under i_mutex. 103 */ 104 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 105 { 106 loff_t rval; 107 108 mutex_lock(&file->f_dentry->d_inode->i_mutex); 109 rval = generic_file_llseek_unlocked(file, offset, origin); 110 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 111 112 return rval; 113 } 114 EXPORT_SYMBOL(generic_file_llseek); 115 116 /** 117 * noop_llseek - No Operation Performed llseek implementation 118 * @file: file structure to seek on 119 * @offset: file offset to seek to 120 * @origin: type of seek 121 * 122 * This is an implementation of ->llseek useable for the rare special case when 123 * userspace expects the seek to succeed but the (device) file is actually not 124 * able to perform the seek. In this case you use noop_llseek() instead of 125 * falling back to the default implementation of ->llseek. 126 */ 127 loff_t noop_llseek(struct file *file, loff_t offset, int origin) 128 { 129 return file->f_pos; 130 } 131 EXPORT_SYMBOL(noop_llseek); 132 133 loff_t no_llseek(struct file *file, loff_t offset, int origin) 134 { 135 return -ESPIPE; 136 } 137 EXPORT_SYMBOL(no_llseek); 138 139 loff_t default_llseek(struct file *file, loff_t offset, int origin) 140 { 141 loff_t retval; 142 143 mutex_lock(&file->f_dentry->d_inode->i_mutex); 144 switch (origin) { 145 case SEEK_END: 146 offset += i_size_read(file->f_path.dentry->d_inode); 147 break; 148 case SEEK_CUR: 149 if (offset == 0) { 150 retval = file->f_pos; 151 goto out; 152 } 153 offset += file->f_pos; 154 } 155 retval = -EINVAL; 156 if (offset >= 0 || !__negative_fpos_check(file, offset, 0)) { 157 if (offset != file->f_pos) { 158 file->f_pos = offset; 159 file->f_version = 0; 160 } 161 retval = offset; 162 } 163 out: 164 mutex_unlock(&file->f_dentry->d_inode->i_mutex); 165 return retval; 166 } 167 EXPORT_SYMBOL(default_llseek); 168 169 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 170 { 171 loff_t (*fn)(struct file *, loff_t, int); 172 173 fn = no_llseek; 174 if (file->f_mode & FMODE_LSEEK) { 175 if (file->f_op && file->f_op->llseek) 176 fn = file->f_op->llseek; 177 } 178 return fn(file, offset, origin); 179 } 180 EXPORT_SYMBOL(vfs_llseek); 181 182 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 183 { 184 off_t retval; 185 struct file * file; 186 int fput_needed; 187 188 retval = -EBADF; 189 file = fget_light(fd, &fput_needed); 190 if (!file) 191 goto bad; 192 193 retval = -EINVAL; 194 if (origin <= SEEK_MAX) { 195 loff_t res = vfs_llseek(file, offset, origin); 196 retval = res; 197 if (res != (loff_t)retval) 198 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 199 } 200 fput_light(file, fput_needed); 201 bad: 202 return retval; 203 } 204 205 #ifdef __ARCH_WANT_SYS_LLSEEK 206 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 207 unsigned long, offset_low, loff_t __user *, result, 208 unsigned int, origin) 209 { 210 int retval; 211 struct file * file; 212 loff_t offset; 213 int fput_needed; 214 215 retval = -EBADF; 216 file = fget_light(fd, &fput_needed); 217 if (!file) 218 goto bad; 219 220 retval = -EINVAL; 221 if (origin > SEEK_MAX) 222 goto out_putf; 223 224 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 225 origin); 226 227 retval = (int)offset; 228 if (offset >= 0) { 229 retval = -EFAULT; 230 if (!copy_to_user(result, &offset, sizeof(offset))) 231 retval = 0; 232 } 233 out_putf: 234 fput_light(file, fput_needed); 235 bad: 236 return retval; 237 } 238 #endif 239 240 241 /* 242 * rw_verify_area doesn't like huge counts. We limit 243 * them to something that fits in "int" so that others 244 * won't have to do range checks all the time. 245 */ 246 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 247 { 248 struct inode *inode; 249 loff_t pos; 250 int retval = -EINVAL; 251 252 inode = file->f_path.dentry->d_inode; 253 if (unlikely((ssize_t) count < 0)) 254 return retval; 255 pos = *ppos; 256 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) { 257 retval = __negative_fpos_check(file, pos, count); 258 if (retval) 259 return retval; 260 } 261 262 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 263 retval = locks_mandatory_area( 264 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 265 inode, file, pos, count); 266 if (retval < 0) 267 return retval; 268 } 269 retval = security_file_permission(file, 270 read_write == READ ? MAY_READ : MAY_WRITE); 271 if (retval) 272 return retval; 273 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 274 } 275 276 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 277 { 278 set_current_state(TASK_UNINTERRUPTIBLE); 279 if (!kiocbIsKicked(iocb)) 280 schedule(); 281 else 282 kiocbClearKicked(iocb); 283 __set_current_state(TASK_RUNNING); 284 } 285 286 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 287 { 288 struct iovec iov = { .iov_base = buf, .iov_len = len }; 289 struct kiocb kiocb; 290 ssize_t ret; 291 292 init_sync_kiocb(&kiocb, filp); 293 kiocb.ki_pos = *ppos; 294 kiocb.ki_left = len; 295 kiocb.ki_nbytes = len; 296 297 for (;;) { 298 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 299 if (ret != -EIOCBRETRY) 300 break; 301 wait_on_retry_sync_kiocb(&kiocb); 302 } 303 304 if (-EIOCBQUEUED == ret) 305 ret = wait_on_sync_kiocb(&kiocb); 306 *ppos = kiocb.ki_pos; 307 return ret; 308 } 309 310 EXPORT_SYMBOL(do_sync_read); 311 312 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 313 { 314 ssize_t ret; 315 316 if (!(file->f_mode & FMODE_READ)) 317 return -EBADF; 318 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 319 return -EINVAL; 320 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 321 return -EFAULT; 322 323 ret = rw_verify_area(READ, file, pos, count); 324 if (ret >= 0) { 325 count = ret; 326 if (file->f_op->read) 327 ret = file->f_op->read(file, buf, count, pos); 328 else 329 ret = do_sync_read(file, buf, count, pos); 330 if (ret > 0) { 331 fsnotify_access(file); 332 add_rchar(current, ret); 333 } 334 inc_syscr(current); 335 } 336 337 return ret; 338 } 339 340 EXPORT_SYMBOL(vfs_read); 341 342 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 343 { 344 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 345 struct kiocb kiocb; 346 ssize_t ret; 347 348 init_sync_kiocb(&kiocb, filp); 349 kiocb.ki_pos = *ppos; 350 kiocb.ki_left = len; 351 kiocb.ki_nbytes = len; 352 353 for (;;) { 354 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 355 if (ret != -EIOCBRETRY) 356 break; 357 wait_on_retry_sync_kiocb(&kiocb); 358 } 359 360 if (-EIOCBQUEUED == ret) 361 ret = wait_on_sync_kiocb(&kiocb); 362 *ppos = kiocb.ki_pos; 363 return ret; 364 } 365 366 EXPORT_SYMBOL(do_sync_write); 367 368 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 369 { 370 ssize_t ret; 371 372 if (!(file->f_mode & FMODE_WRITE)) 373 return -EBADF; 374 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 375 return -EINVAL; 376 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 377 return -EFAULT; 378 379 ret = rw_verify_area(WRITE, file, pos, count); 380 if (ret >= 0) { 381 count = ret; 382 if (file->f_op->write) 383 ret = file->f_op->write(file, buf, count, pos); 384 else 385 ret = do_sync_write(file, buf, count, pos); 386 if (ret > 0) { 387 fsnotify_modify(file); 388 add_wchar(current, ret); 389 } 390 inc_syscw(current); 391 } 392 393 return ret; 394 } 395 396 EXPORT_SYMBOL(vfs_write); 397 398 static inline loff_t file_pos_read(struct file *file) 399 { 400 return file->f_pos; 401 } 402 403 static inline void file_pos_write(struct file *file, loff_t pos) 404 { 405 file->f_pos = pos; 406 } 407 408 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 409 { 410 struct file *file; 411 ssize_t ret = -EBADF; 412 int fput_needed; 413 414 file = fget_light(fd, &fput_needed); 415 if (file) { 416 loff_t pos = file_pos_read(file); 417 ret = vfs_read(file, buf, count, &pos); 418 file_pos_write(file, pos); 419 fput_light(file, fput_needed); 420 } 421 422 return ret; 423 } 424 425 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 426 size_t, count) 427 { 428 struct file *file; 429 ssize_t ret = -EBADF; 430 int fput_needed; 431 432 file = fget_light(fd, &fput_needed); 433 if (file) { 434 loff_t pos = file_pos_read(file); 435 ret = vfs_write(file, buf, count, &pos); 436 file_pos_write(file, pos); 437 fput_light(file, fput_needed); 438 } 439 440 return ret; 441 } 442 443 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 444 size_t count, loff_t pos) 445 { 446 struct file *file; 447 ssize_t ret = -EBADF; 448 int fput_needed; 449 450 if (pos < 0) 451 return -EINVAL; 452 453 file = fget_light(fd, &fput_needed); 454 if (file) { 455 ret = -ESPIPE; 456 if (file->f_mode & FMODE_PREAD) 457 ret = vfs_read(file, buf, count, &pos); 458 fput_light(file, fput_needed); 459 } 460 461 return ret; 462 } 463 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 464 asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) 465 { 466 return SYSC_pread64((unsigned int) fd, (char __user *) buf, 467 (size_t) count, pos); 468 } 469 SYSCALL_ALIAS(sys_pread64, SyS_pread64); 470 #endif 471 472 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 473 size_t count, loff_t pos) 474 { 475 struct file *file; 476 ssize_t ret = -EBADF; 477 int fput_needed; 478 479 if (pos < 0) 480 return -EINVAL; 481 482 file = fget_light(fd, &fput_needed); 483 if (file) { 484 ret = -ESPIPE; 485 if (file->f_mode & FMODE_PWRITE) 486 ret = vfs_write(file, buf, count, &pos); 487 fput_light(file, fput_needed); 488 } 489 490 return ret; 491 } 492 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 493 asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) 494 { 495 return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, 496 (size_t) count, pos); 497 } 498 SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); 499 #endif 500 501 /* 502 * Reduce an iovec's length in-place. Return the resulting number of segments 503 */ 504 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 505 { 506 unsigned long seg = 0; 507 size_t len = 0; 508 509 while (seg < nr_segs) { 510 seg++; 511 if (len + iov->iov_len >= to) { 512 iov->iov_len = to - len; 513 break; 514 } 515 len += iov->iov_len; 516 iov++; 517 } 518 return seg; 519 } 520 EXPORT_SYMBOL(iov_shorten); 521 522 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 523 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 524 { 525 struct kiocb kiocb; 526 ssize_t ret; 527 528 init_sync_kiocb(&kiocb, filp); 529 kiocb.ki_pos = *ppos; 530 kiocb.ki_left = len; 531 kiocb.ki_nbytes = len; 532 533 for (;;) { 534 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 535 if (ret != -EIOCBRETRY) 536 break; 537 wait_on_retry_sync_kiocb(&kiocb); 538 } 539 540 if (ret == -EIOCBQUEUED) 541 ret = wait_on_sync_kiocb(&kiocb); 542 *ppos = kiocb.ki_pos; 543 return ret; 544 } 545 546 /* Do it by hand, with file-ops */ 547 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 548 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 549 { 550 struct iovec *vector = iov; 551 ssize_t ret = 0; 552 553 while (nr_segs > 0) { 554 void __user *base; 555 size_t len; 556 ssize_t nr; 557 558 base = vector->iov_base; 559 len = vector->iov_len; 560 vector++; 561 nr_segs--; 562 563 nr = fn(filp, base, len, ppos); 564 565 if (nr < 0) { 566 if (!ret) 567 ret = nr; 568 break; 569 } 570 ret += nr; 571 if (nr != len) 572 break; 573 } 574 575 return ret; 576 } 577 578 /* A write operation does a read from user space and vice versa */ 579 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 580 581 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 582 unsigned long nr_segs, unsigned long fast_segs, 583 struct iovec *fast_pointer, 584 struct iovec **ret_pointer) 585 { 586 unsigned long seg; 587 ssize_t ret; 588 struct iovec *iov = fast_pointer; 589 590 /* 591 * SuS says "The readv() function *may* fail if the iovcnt argument 592 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 593 * traditionally returned zero for zero segments, so... 594 */ 595 if (nr_segs == 0) { 596 ret = 0; 597 goto out; 598 } 599 600 /* 601 * First get the "struct iovec" from user memory and 602 * verify all the pointers 603 */ 604 if (nr_segs > UIO_MAXIOV) { 605 ret = -EINVAL; 606 goto out; 607 } 608 if (nr_segs > fast_segs) { 609 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 610 if (iov == NULL) { 611 ret = -ENOMEM; 612 goto out; 613 } 614 } 615 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 616 ret = -EFAULT; 617 goto out; 618 } 619 620 /* 621 * According to the Single Unix Specification we should return EINVAL 622 * if an element length is < 0 when cast to ssize_t or if the 623 * total length would overflow the ssize_t return value of the 624 * system call. 625 * 626 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 627 * overflow case. 628 */ 629 ret = 0; 630 for (seg = 0; seg < nr_segs; seg++) { 631 void __user *buf = iov[seg].iov_base; 632 ssize_t len = (ssize_t)iov[seg].iov_len; 633 634 /* see if we we're about to use an invalid len or if 635 * it's about to overflow ssize_t */ 636 if (len < 0) { 637 ret = -EINVAL; 638 goto out; 639 } 640 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) { 641 ret = -EFAULT; 642 goto out; 643 } 644 if (len > MAX_RW_COUNT - ret) { 645 len = MAX_RW_COUNT - ret; 646 iov[seg].iov_len = len; 647 } 648 ret += len; 649 } 650 out: 651 *ret_pointer = iov; 652 return ret; 653 } 654 655 static ssize_t do_readv_writev(int type, struct file *file, 656 const struct iovec __user * uvector, 657 unsigned long nr_segs, loff_t *pos) 658 { 659 size_t tot_len; 660 struct iovec iovstack[UIO_FASTIOV]; 661 struct iovec *iov = iovstack; 662 ssize_t ret; 663 io_fn_t fn; 664 iov_fn_t fnv; 665 666 if (!file->f_op) { 667 ret = -EINVAL; 668 goto out; 669 } 670 671 ret = rw_copy_check_uvector(type, uvector, nr_segs, 672 ARRAY_SIZE(iovstack), iovstack, &iov); 673 if (ret <= 0) 674 goto out; 675 676 tot_len = ret; 677 ret = rw_verify_area(type, file, pos, tot_len); 678 if (ret < 0) 679 goto out; 680 681 fnv = NULL; 682 if (type == READ) { 683 fn = file->f_op->read; 684 fnv = file->f_op->aio_read; 685 } else { 686 fn = (io_fn_t)file->f_op->write; 687 fnv = file->f_op->aio_write; 688 } 689 690 if (fnv) 691 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 692 pos, fnv); 693 else 694 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 695 696 out: 697 if (iov != iovstack) 698 kfree(iov); 699 if ((ret + (type == READ)) > 0) { 700 if (type == READ) 701 fsnotify_access(file); 702 else 703 fsnotify_modify(file); 704 } 705 return ret; 706 } 707 708 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 709 unsigned long vlen, loff_t *pos) 710 { 711 if (!(file->f_mode & FMODE_READ)) 712 return -EBADF; 713 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 714 return -EINVAL; 715 716 return do_readv_writev(READ, file, vec, vlen, pos); 717 } 718 719 EXPORT_SYMBOL(vfs_readv); 720 721 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 722 unsigned long vlen, loff_t *pos) 723 { 724 if (!(file->f_mode & FMODE_WRITE)) 725 return -EBADF; 726 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 727 return -EINVAL; 728 729 return do_readv_writev(WRITE, file, vec, vlen, pos); 730 } 731 732 EXPORT_SYMBOL(vfs_writev); 733 734 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 735 unsigned long, vlen) 736 { 737 struct file *file; 738 ssize_t ret = -EBADF; 739 int fput_needed; 740 741 file = fget_light(fd, &fput_needed); 742 if (file) { 743 loff_t pos = file_pos_read(file); 744 ret = vfs_readv(file, vec, vlen, &pos); 745 file_pos_write(file, pos); 746 fput_light(file, fput_needed); 747 } 748 749 if (ret > 0) 750 add_rchar(current, ret); 751 inc_syscr(current); 752 return ret; 753 } 754 755 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 756 unsigned long, vlen) 757 { 758 struct file *file; 759 ssize_t ret = -EBADF; 760 int fput_needed; 761 762 file = fget_light(fd, &fput_needed); 763 if (file) { 764 loff_t pos = file_pos_read(file); 765 ret = vfs_writev(file, vec, vlen, &pos); 766 file_pos_write(file, pos); 767 fput_light(file, fput_needed); 768 } 769 770 if (ret > 0) 771 add_wchar(current, ret); 772 inc_syscw(current); 773 return ret; 774 } 775 776 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 777 { 778 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 779 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 780 } 781 782 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 783 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 784 { 785 loff_t pos = pos_from_hilo(pos_h, pos_l); 786 struct file *file; 787 ssize_t ret = -EBADF; 788 int fput_needed; 789 790 if (pos < 0) 791 return -EINVAL; 792 793 file = fget_light(fd, &fput_needed); 794 if (file) { 795 ret = -ESPIPE; 796 if (file->f_mode & FMODE_PREAD) 797 ret = vfs_readv(file, vec, vlen, &pos); 798 fput_light(file, fput_needed); 799 } 800 801 if (ret > 0) 802 add_rchar(current, ret); 803 inc_syscr(current); 804 return ret; 805 } 806 807 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 808 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 809 { 810 loff_t pos = pos_from_hilo(pos_h, pos_l); 811 struct file *file; 812 ssize_t ret = -EBADF; 813 int fput_needed; 814 815 if (pos < 0) 816 return -EINVAL; 817 818 file = fget_light(fd, &fput_needed); 819 if (file) { 820 ret = -ESPIPE; 821 if (file->f_mode & FMODE_PWRITE) 822 ret = vfs_writev(file, vec, vlen, &pos); 823 fput_light(file, fput_needed); 824 } 825 826 if (ret > 0) 827 add_wchar(current, ret); 828 inc_syscw(current); 829 return ret; 830 } 831 832 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 833 size_t count, loff_t max) 834 { 835 struct file * in_file, * out_file; 836 struct inode * in_inode, * out_inode; 837 loff_t pos; 838 ssize_t retval; 839 int fput_needed_in, fput_needed_out, fl; 840 841 /* 842 * Get input file, and verify that it is ok.. 843 */ 844 retval = -EBADF; 845 in_file = fget_light(in_fd, &fput_needed_in); 846 if (!in_file) 847 goto out; 848 if (!(in_file->f_mode & FMODE_READ)) 849 goto fput_in; 850 retval = -ESPIPE; 851 if (!ppos) 852 ppos = &in_file->f_pos; 853 else 854 if (!(in_file->f_mode & FMODE_PREAD)) 855 goto fput_in; 856 retval = rw_verify_area(READ, in_file, ppos, count); 857 if (retval < 0) 858 goto fput_in; 859 count = retval; 860 861 /* 862 * Get output file, and verify that it is ok.. 863 */ 864 retval = -EBADF; 865 out_file = fget_light(out_fd, &fput_needed_out); 866 if (!out_file) 867 goto fput_in; 868 if (!(out_file->f_mode & FMODE_WRITE)) 869 goto fput_out; 870 retval = -EINVAL; 871 in_inode = in_file->f_path.dentry->d_inode; 872 out_inode = out_file->f_path.dentry->d_inode; 873 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 874 if (retval < 0) 875 goto fput_out; 876 count = retval; 877 878 if (!max) 879 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 880 881 pos = *ppos; 882 if (unlikely(pos + count > max)) { 883 retval = -EOVERFLOW; 884 if (pos >= max) 885 goto fput_out; 886 count = max - pos; 887 } 888 889 fl = 0; 890 #if 0 891 /* 892 * We need to debate whether we can enable this or not. The 893 * man page documents EAGAIN return for the output at least, 894 * and the application is arguably buggy if it doesn't expect 895 * EAGAIN on a non-blocking file descriptor. 896 */ 897 if (in_file->f_flags & O_NONBLOCK) 898 fl = SPLICE_F_NONBLOCK; 899 #endif 900 retval = do_splice_direct(in_file, ppos, out_file, count, fl); 901 902 if (retval > 0) { 903 add_rchar(current, retval); 904 add_wchar(current, retval); 905 } 906 907 inc_syscr(current); 908 inc_syscw(current); 909 if (*ppos > max) 910 retval = -EOVERFLOW; 911 912 fput_out: 913 fput_light(out_file, fput_needed_out); 914 fput_in: 915 fput_light(in_file, fput_needed_in); 916 out: 917 return retval; 918 } 919 920 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 921 { 922 loff_t pos; 923 off_t off; 924 ssize_t ret; 925 926 if (offset) { 927 if (unlikely(get_user(off, offset))) 928 return -EFAULT; 929 pos = off; 930 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 931 if (unlikely(put_user(pos, offset))) 932 return -EFAULT; 933 return ret; 934 } 935 936 return do_sendfile(out_fd, in_fd, NULL, count, 0); 937 } 938 939 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 940 { 941 loff_t pos; 942 ssize_t ret; 943 944 if (offset) { 945 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 946 return -EFAULT; 947 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 948 if (unlikely(put_user(pos, offset))) 949 return -EFAULT; 950 return ret; 951 } 952 953 return do_sendfile(out_fd, in_fd, NULL, count, 0); 954 } 955