1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/fsnotify.h> 13 #include <linux/security.h> 14 #include <linux/export.h> 15 #include <linux/syscalls.h> 16 #include <linux/pagemap.h> 17 #include <linux/splice.h> 18 #include "read_write.h" 19 20 #include <asm/uaccess.h> 21 #include <asm/unistd.h> 22 23 const struct file_operations generic_ro_fops = { 24 .llseek = generic_file_llseek, 25 .read = do_sync_read, 26 .aio_read = generic_file_aio_read, 27 .mmap = generic_file_readonly_mmap, 28 .splice_read = generic_file_splice_read, 29 }; 30 31 EXPORT_SYMBOL(generic_ro_fops); 32 33 static inline int unsigned_offsets(struct file *file) 34 { 35 return file->f_mode & FMODE_UNSIGNED_OFFSET; 36 } 37 38 static loff_t lseek_execute(struct file *file, struct inode *inode, 39 loff_t offset, loff_t maxsize) 40 { 41 if (offset < 0 && !unsigned_offsets(file)) 42 return -EINVAL; 43 if (offset > maxsize) 44 return -EINVAL; 45 46 if (offset != file->f_pos) { 47 file->f_pos = offset; 48 file->f_version = 0; 49 } 50 return offset; 51 } 52 53 /** 54 * generic_file_llseek_size - generic llseek implementation for regular files 55 * @file: file structure to seek on 56 * @offset: file offset to seek to 57 * @origin: type of seek 58 * @size: max size of file system 59 * 60 * This is a variant of generic_file_llseek that allows passing in a custom 61 * file size. 62 * 63 * Synchronization: 64 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 65 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 66 * read/writes behave like SEEK_SET against seeks. 67 */ 68 loff_t 69 generic_file_llseek_size(struct file *file, loff_t offset, int origin, 70 loff_t maxsize) 71 { 72 struct inode *inode = file->f_mapping->host; 73 74 switch (origin) { 75 case SEEK_END: 76 offset += i_size_read(inode); 77 break; 78 case SEEK_CUR: 79 /* 80 * Here we special-case the lseek(fd, 0, SEEK_CUR) 81 * position-querying operation. Avoid rewriting the "same" 82 * f_pos value back to the file because a concurrent read(), 83 * write() or lseek() might have altered it 84 */ 85 if (offset == 0) 86 return file->f_pos; 87 /* 88 * f_lock protects against read/modify/write race with other 89 * SEEK_CURs. Note that parallel writes and reads behave 90 * like SEEK_SET. 91 */ 92 spin_lock(&file->f_lock); 93 offset = lseek_execute(file, inode, file->f_pos + offset, 94 maxsize); 95 spin_unlock(&file->f_lock); 96 return offset; 97 case SEEK_DATA: 98 /* 99 * In the generic case the entire file is data, so as long as 100 * offset isn't at the end of the file then the offset is data. 101 */ 102 if (offset >= i_size_read(inode)) 103 return -ENXIO; 104 break; 105 case SEEK_HOLE: 106 /* 107 * There is a virtual hole at the end of the file, so as long as 108 * offset isn't i_size or larger, return i_size. 109 */ 110 if (offset >= i_size_read(inode)) 111 return -ENXIO; 112 offset = i_size_read(inode); 113 break; 114 } 115 116 return lseek_execute(file, inode, offset, maxsize); 117 } 118 EXPORT_SYMBOL(generic_file_llseek_size); 119 120 /** 121 * generic_file_llseek - generic llseek implementation for regular files 122 * @file: file structure to seek on 123 * @offset: file offset to seek to 124 * @origin: type of seek 125 * 126 * This is a generic implemenation of ->llseek useable for all normal local 127 * filesystems. It just updates the file offset to the value specified by 128 * @offset and @origin under i_mutex. 129 */ 130 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 131 { 132 struct inode *inode = file->f_mapping->host; 133 134 return generic_file_llseek_size(file, offset, origin, 135 inode->i_sb->s_maxbytes); 136 } 137 EXPORT_SYMBOL(generic_file_llseek); 138 139 /** 140 * noop_llseek - No Operation Performed llseek implementation 141 * @file: file structure to seek on 142 * @offset: file offset to seek to 143 * @origin: type of seek 144 * 145 * This is an implementation of ->llseek useable for the rare special case when 146 * userspace expects the seek to succeed but the (device) file is actually not 147 * able to perform the seek. In this case you use noop_llseek() instead of 148 * falling back to the default implementation of ->llseek. 149 */ 150 loff_t noop_llseek(struct file *file, loff_t offset, int origin) 151 { 152 return file->f_pos; 153 } 154 EXPORT_SYMBOL(noop_llseek); 155 156 loff_t no_llseek(struct file *file, loff_t offset, int origin) 157 { 158 return -ESPIPE; 159 } 160 EXPORT_SYMBOL(no_llseek); 161 162 loff_t default_llseek(struct file *file, loff_t offset, int origin) 163 { 164 struct inode *inode = file->f_path.dentry->d_inode; 165 loff_t retval; 166 167 mutex_lock(&inode->i_mutex); 168 switch (origin) { 169 case SEEK_END: 170 offset += i_size_read(inode); 171 break; 172 case SEEK_CUR: 173 if (offset == 0) { 174 retval = file->f_pos; 175 goto out; 176 } 177 offset += file->f_pos; 178 break; 179 case SEEK_DATA: 180 /* 181 * In the generic case the entire file is data, so as 182 * long as offset isn't at the end of the file then the 183 * offset is data. 184 */ 185 if (offset >= inode->i_size) { 186 retval = -ENXIO; 187 goto out; 188 } 189 break; 190 case SEEK_HOLE: 191 /* 192 * There is a virtual hole at the end of the file, so 193 * as long as offset isn't i_size or larger, return 194 * i_size. 195 */ 196 if (offset >= inode->i_size) { 197 retval = -ENXIO; 198 goto out; 199 } 200 offset = inode->i_size; 201 break; 202 } 203 retval = -EINVAL; 204 if (offset >= 0 || unsigned_offsets(file)) { 205 if (offset != file->f_pos) { 206 file->f_pos = offset; 207 file->f_version = 0; 208 } 209 retval = offset; 210 } 211 out: 212 mutex_unlock(&inode->i_mutex); 213 return retval; 214 } 215 EXPORT_SYMBOL(default_llseek); 216 217 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 218 { 219 loff_t (*fn)(struct file *, loff_t, int); 220 221 fn = no_llseek; 222 if (file->f_mode & FMODE_LSEEK) { 223 if (file->f_op && file->f_op->llseek) 224 fn = file->f_op->llseek; 225 } 226 return fn(file, offset, origin); 227 } 228 EXPORT_SYMBOL(vfs_llseek); 229 230 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, origin) 231 { 232 off_t retval; 233 struct file * file; 234 int fput_needed; 235 236 retval = -EBADF; 237 file = fget_light(fd, &fput_needed); 238 if (!file) 239 goto bad; 240 241 retval = -EINVAL; 242 if (origin <= SEEK_MAX) { 243 loff_t res = vfs_llseek(file, offset, origin); 244 retval = res; 245 if (res != (loff_t)retval) 246 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 247 } 248 fput_light(file, fput_needed); 249 bad: 250 return retval; 251 } 252 253 #ifdef __ARCH_WANT_SYS_LLSEEK 254 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 255 unsigned long, offset_low, loff_t __user *, result, 256 unsigned int, origin) 257 { 258 int retval; 259 struct file * file; 260 loff_t offset; 261 int fput_needed; 262 263 retval = -EBADF; 264 file = fget_light(fd, &fput_needed); 265 if (!file) 266 goto bad; 267 268 retval = -EINVAL; 269 if (origin > SEEK_MAX) 270 goto out_putf; 271 272 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 273 origin); 274 275 retval = (int)offset; 276 if (offset >= 0) { 277 retval = -EFAULT; 278 if (!copy_to_user(result, &offset, sizeof(offset))) 279 retval = 0; 280 } 281 out_putf: 282 fput_light(file, fput_needed); 283 bad: 284 return retval; 285 } 286 #endif 287 288 289 /* 290 * rw_verify_area doesn't like huge counts. We limit 291 * them to something that fits in "int" so that others 292 * won't have to do range checks all the time. 293 */ 294 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 295 { 296 struct inode *inode; 297 loff_t pos; 298 int retval = -EINVAL; 299 300 inode = file->f_path.dentry->d_inode; 301 if (unlikely((ssize_t) count < 0)) 302 return retval; 303 pos = *ppos; 304 if (unlikely(pos < 0)) { 305 if (!unsigned_offsets(file)) 306 return retval; 307 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 308 return -EOVERFLOW; 309 } else if (unlikely((loff_t) (pos + count) < 0)) { 310 if (!unsigned_offsets(file)) 311 return retval; 312 } 313 314 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 315 retval = locks_mandatory_area( 316 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 317 inode, file, pos, count); 318 if (retval < 0) 319 return retval; 320 } 321 retval = security_file_permission(file, 322 read_write == READ ? MAY_READ : MAY_WRITE); 323 if (retval) 324 return retval; 325 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 326 } 327 328 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 329 { 330 set_current_state(TASK_UNINTERRUPTIBLE); 331 if (!kiocbIsKicked(iocb)) 332 schedule(); 333 else 334 kiocbClearKicked(iocb); 335 __set_current_state(TASK_RUNNING); 336 } 337 338 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 339 { 340 struct iovec iov = { .iov_base = buf, .iov_len = len }; 341 struct kiocb kiocb; 342 ssize_t ret; 343 344 init_sync_kiocb(&kiocb, filp); 345 kiocb.ki_pos = *ppos; 346 kiocb.ki_left = len; 347 kiocb.ki_nbytes = len; 348 349 for (;;) { 350 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 351 if (ret != -EIOCBRETRY) 352 break; 353 wait_on_retry_sync_kiocb(&kiocb); 354 } 355 356 if (-EIOCBQUEUED == ret) 357 ret = wait_on_sync_kiocb(&kiocb); 358 *ppos = kiocb.ki_pos; 359 return ret; 360 } 361 362 EXPORT_SYMBOL(do_sync_read); 363 364 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 365 { 366 ssize_t ret; 367 368 if (!(file->f_mode & FMODE_READ)) 369 return -EBADF; 370 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 371 return -EINVAL; 372 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 373 return -EFAULT; 374 375 ret = rw_verify_area(READ, file, pos, count); 376 if (ret >= 0) { 377 count = ret; 378 if (file->f_op->read) 379 ret = file->f_op->read(file, buf, count, pos); 380 else 381 ret = do_sync_read(file, buf, count, pos); 382 if (ret > 0) { 383 fsnotify_access(file); 384 add_rchar(current, ret); 385 } 386 inc_syscr(current); 387 } 388 389 return ret; 390 } 391 392 EXPORT_SYMBOL(vfs_read); 393 394 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 395 { 396 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 397 struct kiocb kiocb; 398 ssize_t ret; 399 400 init_sync_kiocb(&kiocb, filp); 401 kiocb.ki_pos = *ppos; 402 kiocb.ki_left = len; 403 kiocb.ki_nbytes = len; 404 405 for (;;) { 406 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 407 if (ret != -EIOCBRETRY) 408 break; 409 wait_on_retry_sync_kiocb(&kiocb); 410 } 411 412 if (-EIOCBQUEUED == ret) 413 ret = wait_on_sync_kiocb(&kiocb); 414 *ppos = kiocb.ki_pos; 415 return ret; 416 } 417 418 EXPORT_SYMBOL(do_sync_write); 419 420 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 421 { 422 ssize_t ret; 423 424 if (!(file->f_mode & FMODE_WRITE)) 425 return -EBADF; 426 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 427 return -EINVAL; 428 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 429 return -EFAULT; 430 431 ret = rw_verify_area(WRITE, file, pos, count); 432 if (ret >= 0) { 433 count = ret; 434 if (file->f_op->write) 435 ret = file->f_op->write(file, buf, count, pos); 436 else 437 ret = do_sync_write(file, buf, count, pos); 438 if (ret > 0) { 439 fsnotify_modify(file); 440 add_wchar(current, ret); 441 } 442 inc_syscw(current); 443 } 444 445 return ret; 446 } 447 448 EXPORT_SYMBOL(vfs_write); 449 450 static inline loff_t file_pos_read(struct file *file) 451 { 452 return file->f_pos; 453 } 454 455 static inline void file_pos_write(struct file *file, loff_t pos) 456 { 457 file->f_pos = pos; 458 } 459 460 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 461 { 462 struct file *file; 463 ssize_t ret = -EBADF; 464 int fput_needed; 465 466 file = fget_light(fd, &fput_needed); 467 if (file) { 468 loff_t pos = file_pos_read(file); 469 ret = vfs_read(file, buf, count, &pos); 470 file_pos_write(file, pos); 471 fput_light(file, fput_needed); 472 } 473 474 return ret; 475 } 476 477 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 478 size_t, count) 479 { 480 struct file *file; 481 ssize_t ret = -EBADF; 482 int fput_needed; 483 484 file = fget_light(fd, &fput_needed); 485 if (file) { 486 loff_t pos = file_pos_read(file); 487 ret = vfs_write(file, buf, count, &pos); 488 file_pos_write(file, pos); 489 fput_light(file, fput_needed); 490 } 491 492 return ret; 493 } 494 495 SYSCALL_DEFINE(pread64)(unsigned int fd, char __user *buf, 496 size_t count, loff_t pos) 497 { 498 struct file *file; 499 ssize_t ret = -EBADF; 500 int fput_needed; 501 502 if (pos < 0) 503 return -EINVAL; 504 505 file = fget_light(fd, &fput_needed); 506 if (file) { 507 ret = -ESPIPE; 508 if (file->f_mode & FMODE_PREAD) 509 ret = vfs_read(file, buf, count, &pos); 510 fput_light(file, fput_needed); 511 } 512 513 return ret; 514 } 515 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 516 asmlinkage long SyS_pread64(long fd, long buf, long count, loff_t pos) 517 { 518 return SYSC_pread64((unsigned int) fd, (char __user *) buf, 519 (size_t) count, pos); 520 } 521 SYSCALL_ALIAS(sys_pread64, SyS_pread64); 522 #endif 523 524 SYSCALL_DEFINE(pwrite64)(unsigned int fd, const char __user *buf, 525 size_t count, loff_t pos) 526 { 527 struct file *file; 528 ssize_t ret = -EBADF; 529 int fput_needed; 530 531 if (pos < 0) 532 return -EINVAL; 533 534 file = fget_light(fd, &fput_needed); 535 if (file) { 536 ret = -ESPIPE; 537 if (file->f_mode & FMODE_PWRITE) 538 ret = vfs_write(file, buf, count, &pos); 539 fput_light(file, fput_needed); 540 } 541 542 return ret; 543 } 544 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS 545 asmlinkage long SyS_pwrite64(long fd, long buf, long count, loff_t pos) 546 { 547 return SYSC_pwrite64((unsigned int) fd, (const char __user *) buf, 548 (size_t) count, pos); 549 } 550 SYSCALL_ALIAS(sys_pwrite64, SyS_pwrite64); 551 #endif 552 553 /* 554 * Reduce an iovec's length in-place. Return the resulting number of segments 555 */ 556 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 557 { 558 unsigned long seg = 0; 559 size_t len = 0; 560 561 while (seg < nr_segs) { 562 seg++; 563 if (len + iov->iov_len >= to) { 564 iov->iov_len = to - len; 565 break; 566 } 567 len += iov->iov_len; 568 iov++; 569 } 570 return seg; 571 } 572 EXPORT_SYMBOL(iov_shorten); 573 574 ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 575 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 576 { 577 struct kiocb kiocb; 578 ssize_t ret; 579 580 init_sync_kiocb(&kiocb, filp); 581 kiocb.ki_pos = *ppos; 582 kiocb.ki_left = len; 583 kiocb.ki_nbytes = len; 584 585 for (;;) { 586 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 587 if (ret != -EIOCBRETRY) 588 break; 589 wait_on_retry_sync_kiocb(&kiocb); 590 } 591 592 if (ret == -EIOCBQUEUED) 593 ret = wait_on_sync_kiocb(&kiocb); 594 *ppos = kiocb.ki_pos; 595 return ret; 596 } 597 598 /* Do it by hand, with file-ops */ 599 ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 600 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 601 { 602 struct iovec *vector = iov; 603 ssize_t ret = 0; 604 605 while (nr_segs > 0) { 606 void __user *base; 607 size_t len; 608 ssize_t nr; 609 610 base = vector->iov_base; 611 len = vector->iov_len; 612 vector++; 613 nr_segs--; 614 615 nr = fn(filp, base, len, ppos); 616 617 if (nr < 0) { 618 if (!ret) 619 ret = nr; 620 break; 621 } 622 ret += nr; 623 if (nr != len) 624 break; 625 } 626 627 return ret; 628 } 629 630 /* A write operation does a read from user space and vice versa */ 631 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 632 633 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 634 unsigned long nr_segs, unsigned long fast_segs, 635 struct iovec *fast_pointer, 636 struct iovec **ret_pointer) 637 { 638 unsigned long seg; 639 ssize_t ret; 640 struct iovec *iov = fast_pointer; 641 642 /* 643 * SuS says "The readv() function *may* fail if the iovcnt argument 644 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 645 * traditionally returned zero for zero segments, so... 646 */ 647 if (nr_segs == 0) { 648 ret = 0; 649 goto out; 650 } 651 652 /* 653 * First get the "struct iovec" from user memory and 654 * verify all the pointers 655 */ 656 if (nr_segs > UIO_MAXIOV) { 657 ret = -EINVAL; 658 goto out; 659 } 660 if (nr_segs > fast_segs) { 661 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 662 if (iov == NULL) { 663 ret = -ENOMEM; 664 goto out; 665 } 666 } 667 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 668 ret = -EFAULT; 669 goto out; 670 } 671 672 /* 673 * According to the Single Unix Specification we should return EINVAL 674 * if an element length is < 0 when cast to ssize_t or if the 675 * total length would overflow the ssize_t return value of the 676 * system call. 677 * 678 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 679 * overflow case. 680 */ 681 ret = 0; 682 for (seg = 0; seg < nr_segs; seg++) { 683 void __user *buf = iov[seg].iov_base; 684 ssize_t len = (ssize_t)iov[seg].iov_len; 685 686 /* see if we we're about to use an invalid len or if 687 * it's about to overflow ssize_t */ 688 if (len < 0) { 689 ret = -EINVAL; 690 goto out; 691 } 692 if (type >= 0 693 && unlikely(!access_ok(vrfy_dir(type), buf, len))) { 694 ret = -EFAULT; 695 goto out; 696 } 697 if (len > MAX_RW_COUNT - ret) { 698 len = MAX_RW_COUNT - ret; 699 iov[seg].iov_len = len; 700 } 701 ret += len; 702 } 703 out: 704 *ret_pointer = iov; 705 return ret; 706 } 707 708 static ssize_t do_readv_writev(int type, struct file *file, 709 const struct iovec __user * uvector, 710 unsigned long nr_segs, loff_t *pos) 711 { 712 size_t tot_len; 713 struct iovec iovstack[UIO_FASTIOV]; 714 struct iovec *iov = iovstack; 715 ssize_t ret; 716 io_fn_t fn; 717 iov_fn_t fnv; 718 719 if (!file->f_op) { 720 ret = -EINVAL; 721 goto out; 722 } 723 724 ret = rw_copy_check_uvector(type, uvector, nr_segs, 725 ARRAY_SIZE(iovstack), iovstack, &iov); 726 if (ret <= 0) 727 goto out; 728 729 tot_len = ret; 730 ret = rw_verify_area(type, file, pos, tot_len); 731 if (ret < 0) 732 goto out; 733 734 fnv = NULL; 735 if (type == READ) { 736 fn = file->f_op->read; 737 fnv = file->f_op->aio_read; 738 } else { 739 fn = (io_fn_t)file->f_op->write; 740 fnv = file->f_op->aio_write; 741 } 742 743 if (fnv) 744 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 745 pos, fnv); 746 else 747 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 748 749 out: 750 if (iov != iovstack) 751 kfree(iov); 752 if ((ret + (type == READ)) > 0) { 753 if (type == READ) 754 fsnotify_access(file); 755 else 756 fsnotify_modify(file); 757 } 758 return ret; 759 } 760 761 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 762 unsigned long vlen, loff_t *pos) 763 { 764 if (!(file->f_mode & FMODE_READ)) 765 return -EBADF; 766 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 767 return -EINVAL; 768 769 return do_readv_writev(READ, file, vec, vlen, pos); 770 } 771 772 EXPORT_SYMBOL(vfs_readv); 773 774 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 775 unsigned long vlen, loff_t *pos) 776 { 777 if (!(file->f_mode & FMODE_WRITE)) 778 return -EBADF; 779 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 780 return -EINVAL; 781 782 return do_readv_writev(WRITE, file, vec, vlen, pos); 783 } 784 785 EXPORT_SYMBOL(vfs_writev); 786 787 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 788 unsigned long, vlen) 789 { 790 struct file *file; 791 ssize_t ret = -EBADF; 792 int fput_needed; 793 794 file = fget_light(fd, &fput_needed); 795 if (file) { 796 loff_t pos = file_pos_read(file); 797 ret = vfs_readv(file, vec, vlen, &pos); 798 file_pos_write(file, pos); 799 fput_light(file, fput_needed); 800 } 801 802 if (ret > 0) 803 add_rchar(current, ret); 804 inc_syscr(current); 805 return ret; 806 } 807 808 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 809 unsigned long, vlen) 810 { 811 struct file *file; 812 ssize_t ret = -EBADF; 813 int fput_needed; 814 815 file = fget_light(fd, &fput_needed); 816 if (file) { 817 loff_t pos = file_pos_read(file); 818 ret = vfs_writev(file, vec, vlen, &pos); 819 file_pos_write(file, pos); 820 fput_light(file, fput_needed); 821 } 822 823 if (ret > 0) 824 add_wchar(current, ret); 825 inc_syscw(current); 826 return ret; 827 } 828 829 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 830 { 831 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 832 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 833 } 834 835 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 836 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 837 { 838 loff_t pos = pos_from_hilo(pos_h, pos_l); 839 struct file *file; 840 ssize_t ret = -EBADF; 841 int fput_needed; 842 843 if (pos < 0) 844 return -EINVAL; 845 846 file = fget_light(fd, &fput_needed); 847 if (file) { 848 ret = -ESPIPE; 849 if (file->f_mode & FMODE_PREAD) 850 ret = vfs_readv(file, vec, vlen, &pos); 851 fput_light(file, fput_needed); 852 } 853 854 if (ret > 0) 855 add_rchar(current, ret); 856 inc_syscr(current); 857 return ret; 858 } 859 860 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 861 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 862 { 863 loff_t pos = pos_from_hilo(pos_h, pos_l); 864 struct file *file; 865 ssize_t ret = -EBADF; 866 int fput_needed; 867 868 if (pos < 0) 869 return -EINVAL; 870 871 file = fget_light(fd, &fput_needed); 872 if (file) { 873 ret = -ESPIPE; 874 if (file->f_mode & FMODE_PWRITE) 875 ret = vfs_writev(file, vec, vlen, &pos); 876 fput_light(file, fput_needed); 877 } 878 879 if (ret > 0) 880 add_wchar(current, ret); 881 inc_syscw(current); 882 return ret; 883 } 884 885 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 886 size_t count, loff_t max) 887 { 888 struct file * in_file, * out_file; 889 struct inode * in_inode, * out_inode; 890 loff_t pos; 891 ssize_t retval; 892 int fput_needed_in, fput_needed_out, fl; 893 894 /* 895 * Get input file, and verify that it is ok.. 896 */ 897 retval = -EBADF; 898 in_file = fget_light(in_fd, &fput_needed_in); 899 if (!in_file) 900 goto out; 901 if (!(in_file->f_mode & FMODE_READ)) 902 goto fput_in; 903 retval = -ESPIPE; 904 if (!ppos) 905 ppos = &in_file->f_pos; 906 else 907 if (!(in_file->f_mode & FMODE_PREAD)) 908 goto fput_in; 909 retval = rw_verify_area(READ, in_file, ppos, count); 910 if (retval < 0) 911 goto fput_in; 912 count = retval; 913 914 /* 915 * Get output file, and verify that it is ok.. 916 */ 917 retval = -EBADF; 918 out_file = fget_light(out_fd, &fput_needed_out); 919 if (!out_file) 920 goto fput_in; 921 if (!(out_file->f_mode & FMODE_WRITE)) 922 goto fput_out; 923 retval = -EINVAL; 924 in_inode = in_file->f_path.dentry->d_inode; 925 out_inode = out_file->f_path.dentry->d_inode; 926 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 927 if (retval < 0) 928 goto fput_out; 929 count = retval; 930 931 if (!max) 932 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 933 934 pos = *ppos; 935 if (unlikely(pos + count > max)) { 936 retval = -EOVERFLOW; 937 if (pos >= max) 938 goto fput_out; 939 count = max - pos; 940 } 941 942 fl = 0; 943 #if 0 944 /* 945 * We need to debate whether we can enable this or not. The 946 * man page documents EAGAIN return for the output at least, 947 * and the application is arguably buggy if it doesn't expect 948 * EAGAIN on a non-blocking file descriptor. 949 */ 950 if (in_file->f_flags & O_NONBLOCK) 951 fl = SPLICE_F_NONBLOCK; 952 #endif 953 retval = do_splice_direct(in_file, ppos, out_file, count, fl); 954 955 if (retval > 0) { 956 add_rchar(current, retval); 957 add_wchar(current, retval); 958 } 959 960 inc_syscr(current); 961 inc_syscw(current); 962 if (*ppos > max) 963 retval = -EOVERFLOW; 964 965 fput_out: 966 fput_light(out_file, fput_needed_out); 967 fput_in: 968 fput_light(in_file, fput_needed_in); 969 out: 970 return retval; 971 } 972 973 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 974 { 975 loff_t pos; 976 off_t off; 977 ssize_t ret; 978 979 if (offset) { 980 if (unlikely(get_user(off, offset))) 981 return -EFAULT; 982 pos = off; 983 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 984 if (unlikely(put_user(pos, offset))) 985 return -EFAULT; 986 return ret; 987 } 988 989 return do_sendfile(out_fd, in_fd, NULL, count, 0); 990 } 991 992 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 993 { 994 loff_t pos; 995 ssize_t ret; 996 997 if (offset) { 998 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 999 return -EFAULT; 1000 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1001 if (unlikely(put_user(pos, offset))) 1002 return -EFAULT; 1003 return ret; 1004 } 1005 1006 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1007 } 1008