1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/aio.h> 13 #include <linux/fsnotify.h> 14 #include <linux/security.h> 15 #include <linux/export.h> 16 #include <linux/syscalls.h> 17 #include <linux/pagemap.h> 18 #include <linux/splice.h> 19 #include <linux/compat.h> 20 #include "internal.h" 21 22 #include <asm/uaccess.h> 23 #include <asm/unistd.h> 24 25 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 26 typedef ssize_t (*iov_fn_t)(struct kiocb *, const struct iovec *, 27 unsigned long, loff_t); 28 29 const struct file_operations generic_ro_fops = { 30 .llseek = generic_file_llseek, 31 .read = do_sync_read, 32 .aio_read = generic_file_aio_read, 33 .mmap = generic_file_readonly_mmap, 34 .splice_read = generic_file_splice_read, 35 }; 36 37 EXPORT_SYMBOL(generic_ro_fops); 38 39 static inline int unsigned_offsets(struct file *file) 40 { 41 return file->f_mode & FMODE_UNSIGNED_OFFSET; 42 } 43 44 /** 45 * vfs_setpos - update the file offset for lseek 46 * @file: file structure in question 47 * @offset: file offset to seek to 48 * @maxsize: maximum file size 49 * 50 * This is a low-level filesystem helper for updating the file offset to 51 * the value specified by @offset if the given offset is valid and it is 52 * not equal to the current file offset. 53 * 54 * Return the specified offset on success and -EINVAL on invalid offset. 55 */ 56 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) 57 { 58 if (offset < 0 && !unsigned_offsets(file)) 59 return -EINVAL; 60 if (offset > maxsize) 61 return -EINVAL; 62 63 if (offset != file->f_pos) { 64 file->f_pos = offset; 65 file->f_version = 0; 66 } 67 return offset; 68 } 69 EXPORT_SYMBOL(vfs_setpos); 70 71 /** 72 * generic_file_llseek_size - generic llseek implementation for regular files 73 * @file: file structure to seek on 74 * @offset: file offset to seek to 75 * @whence: type of seek 76 * @size: max size of this file in file system 77 * @eof: offset used for SEEK_END position 78 * 79 * This is a variant of generic_file_llseek that allows passing in a custom 80 * maximum file size and a custom EOF position, for e.g. hashed directories 81 * 82 * Synchronization: 83 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 84 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 85 * read/writes behave like SEEK_SET against seeks. 86 */ 87 loff_t 88 generic_file_llseek_size(struct file *file, loff_t offset, int whence, 89 loff_t maxsize, loff_t eof) 90 { 91 switch (whence) { 92 case SEEK_END: 93 offset += eof; 94 break; 95 case SEEK_CUR: 96 /* 97 * Here we special-case the lseek(fd, 0, SEEK_CUR) 98 * position-querying operation. Avoid rewriting the "same" 99 * f_pos value back to the file because a concurrent read(), 100 * write() or lseek() might have altered it 101 */ 102 if (offset == 0) 103 return file->f_pos; 104 /* 105 * f_lock protects against read/modify/write race with other 106 * SEEK_CURs. Note that parallel writes and reads behave 107 * like SEEK_SET. 108 */ 109 spin_lock(&file->f_lock); 110 offset = vfs_setpos(file, file->f_pos + offset, maxsize); 111 spin_unlock(&file->f_lock); 112 return offset; 113 case SEEK_DATA: 114 /* 115 * In the generic case the entire file is data, so as long as 116 * offset isn't at the end of the file then the offset is data. 117 */ 118 if (offset >= eof) 119 return -ENXIO; 120 break; 121 case SEEK_HOLE: 122 /* 123 * There is a virtual hole at the end of the file, so as long as 124 * offset isn't i_size or larger, return i_size. 125 */ 126 if (offset >= eof) 127 return -ENXIO; 128 offset = eof; 129 break; 130 } 131 132 return vfs_setpos(file, offset, maxsize); 133 } 134 EXPORT_SYMBOL(generic_file_llseek_size); 135 136 /** 137 * generic_file_llseek - generic llseek implementation for regular files 138 * @file: file structure to seek on 139 * @offset: file offset to seek to 140 * @whence: type of seek 141 * 142 * This is a generic implemenation of ->llseek useable for all normal local 143 * filesystems. It just updates the file offset to the value specified by 144 * @offset and @whence. 145 */ 146 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) 147 { 148 struct inode *inode = file->f_mapping->host; 149 150 return generic_file_llseek_size(file, offset, whence, 151 inode->i_sb->s_maxbytes, 152 i_size_read(inode)); 153 } 154 EXPORT_SYMBOL(generic_file_llseek); 155 156 /** 157 * fixed_size_llseek - llseek implementation for fixed-sized devices 158 * @file: file structure to seek on 159 * @offset: file offset to seek to 160 * @whence: type of seek 161 * @size: size of the file 162 * 163 */ 164 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) 165 { 166 switch (whence) { 167 case SEEK_SET: case SEEK_CUR: case SEEK_END: 168 return generic_file_llseek_size(file, offset, whence, 169 size, size); 170 default: 171 return -EINVAL; 172 } 173 } 174 EXPORT_SYMBOL(fixed_size_llseek); 175 176 /** 177 * noop_llseek - No Operation Performed llseek implementation 178 * @file: file structure to seek on 179 * @offset: file offset to seek to 180 * @whence: type of seek 181 * 182 * This is an implementation of ->llseek useable for the rare special case when 183 * userspace expects the seek to succeed but the (device) file is actually not 184 * able to perform the seek. In this case you use noop_llseek() instead of 185 * falling back to the default implementation of ->llseek. 186 */ 187 loff_t noop_llseek(struct file *file, loff_t offset, int whence) 188 { 189 return file->f_pos; 190 } 191 EXPORT_SYMBOL(noop_llseek); 192 193 loff_t no_llseek(struct file *file, loff_t offset, int whence) 194 { 195 return -ESPIPE; 196 } 197 EXPORT_SYMBOL(no_llseek); 198 199 loff_t default_llseek(struct file *file, loff_t offset, int whence) 200 { 201 struct inode *inode = file_inode(file); 202 loff_t retval; 203 204 mutex_lock(&inode->i_mutex); 205 switch (whence) { 206 case SEEK_END: 207 offset += i_size_read(inode); 208 break; 209 case SEEK_CUR: 210 if (offset == 0) { 211 retval = file->f_pos; 212 goto out; 213 } 214 offset += file->f_pos; 215 break; 216 case SEEK_DATA: 217 /* 218 * In the generic case the entire file is data, so as 219 * long as offset isn't at the end of the file then the 220 * offset is data. 221 */ 222 if (offset >= inode->i_size) { 223 retval = -ENXIO; 224 goto out; 225 } 226 break; 227 case SEEK_HOLE: 228 /* 229 * There is a virtual hole at the end of the file, so 230 * as long as offset isn't i_size or larger, return 231 * i_size. 232 */ 233 if (offset >= inode->i_size) { 234 retval = -ENXIO; 235 goto out; 236 } 237 offset = inode->i_size; 238 break; 239 } 240 retval = -EINVAL; 241 if (offset >= 0 || unsigned_offsets(file)) { 242 if (offset != file->f_pos) { 243 file->f_pos = offset; 244 file->f_version = 0; 245 } 246 retval = offset; 247 } 248 out: 249 mutex_unlock(&inode->i_mutex); 250 return retval; 251 } 252 EXPORT_SYMBOL(default_llseek); 253 254 loff_t vfs_llseek(struct file *file, loff_t offset, int whence) 255 { 256 loff_t (*fn)(struct file *, loff_t, int); 257 258 fn = no_llseek; 259 if (file->f_mode & FMODE_LSEEK) { 260 if (file->f_op && file->f_op->llseek) 261 fn = file->f_op->llseek; 262 } 263 return fn(file, offset, whence); 264 } 265 EXPORT_SYMBOL(vfs_llseek); 266 267 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) 268 { 269 off_t retval; 270 struct fd f = fdget(fd); 271 if (!f.file) 272 return -EBADF; 273 274 retval = -EINVAL; 275 if (whence <= SEEK_MAX) { 276 loff_t res = vfs_llseek(f.file, offset, whence); 277 retval = res; 278 if (res != (loff_t)retval) 279 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 280 } 281 fdput(f); 282 return retval; 283 } 284 285 #ifdef CONFIG_COMPAT 286 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) 287 { 288 return sys_lseek(fd, offset, whence); 289 } 290 #endif 291 292 #ifdef __ARCH_WANT_SYS_LLSEEK 293 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 294 unsigned long, offset_low, loff_t __user *, result, 295 unsigned int, whence) 296 { 297 int retval; 298 struct fd f = fdget(fd); 299 loff_t offset; 300 301 if (!f.file) 302 return -EBADF; 303 304 retval = -EINVAL; 305 if (whence > SEEK_MAX) 306 goto out_putf; 307 308 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 309 whence); 310 311 retval = (int)offset; 312 if (offset >= 0) { 313 retval = -EFAULT; 314 if (!copy_to_user(result, &offset, sizeof(offset))) 315 retval = 0; 316 } 317 out_putf: 318 fdput(f); 319 return retval; 320 } 321 #endif 322 323 /* 324 * rw_verify_area doesn't like huge counts. We limit 325 * them to something that fits in "int" so that others 326 * won't have to do range checks all the time. 327 */ 328 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) 329 { 330 struct inode *inode; 331 loff_t pos; 332 int retval = -EINVAL; 333 334 inode = file_inode(file); 335 if (unlikely((ssize_t) count < 0)) 336 return retval; 337 pos = *ppos; 338 if (unlikely(pos < 0)) { 339 if (!unsigned_offsets(file)) 340 return retval; 341 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 342 return -EOVERFLOW; 343 } else if (unlikely((loff_t) (pos + count) < 0)) { 344 if (!unsigned_offsets(file)) 345 return retval; 346 } 347 348 if (unlikely(inode->i_flock && mandatory_lock(inode))) { 349 retval = locks_mandatory_area( 350 read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, 351 inode, file, pos, count); 352 if (retval < 0) 353 return retval; 354 } 355 retval = security_file_permission(file, 356 read_write == READ ? MAY_READ : MAY_WRITE); 357 if (retval) 358 return retval; 359 return count > MAX_RW_COUNT ? MAX_RW_COUNT : count; 360 } 361 362 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 363 { 364 struct iovec iov = { .iov_base = buf, .iov_len = len }; 365 struct kiocb kiocb; 366 ssize_t ret; 367 368 init_sync_kiocb(&kiocb, filp); 369 kiocb.ki_pos = *ppos; 370 kiocb.ki_left = len; 371 kiocb.ki_nbytes = len; 372 373 ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); 374 if (-EIOCBQUEUED == ret) 375 ret = wait_on_sync_kiocb(&kiocb); 376 *ppos = kiocb.ki_pos; 377 return ret; 378 } 379 380 EXPORT_SYMBOL(do_sync_read); 381 382 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 383 { 384 ssize_t ret; 385 386 if (!(file->f_mode & FMODE_READ)) 387 return -EBADF; 388 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 389 return -EINVAL; 390 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 391 return -EFAULT; 392 393 ret = rw_verify_area(READ, file, pos, count); 394 if (ret >= 0) { 395 count = ret; 396 if (file->f_op->read) 397 ret = file->f_op->read(file, buf, count, pos); 398 else 399 ret = do_sync_read(file, buf, count, pos); 400 if (ret > 0) { 401 fsnotify_access(file); 402 add_rchar(current, ret); 403 } 404 inc_syscr(current); 405 } 406 407 return ret; 408 } 409 410 EXPORT_SYMBOL(vfs_read); 411 412 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 413 { 414 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; 415 struct kiocb kiocb; 416 ssize_t ret; 417 418 init_sync_kiocb(&kiocb, filp); 419 kiocb.ki_pos = *ppos; 420 kiocb.ki_left = len; 421 kiocb.ki_nbytes = len; 422 423 ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); 424 if (-EIOCBQUEUED == ret) 425 ret = wait_on_sync_kiocb(&kiocb); 426 *ppos = kiocb.ki_pos; 427 return ret; 428 } 429 430 EXPORT_SYMBOL(do_sync_write); 431 432 ssize_t __kernel_write(struct file *file, const char *buf, size_t count, loff_t *pos) 433 { 434 mm_segment_t old_fs; 435 const char __user *p; 436 ssize_t ret; 437 438 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 439 return -EINVAL; 440 441 old_fs = get_fs(); 442 set_fs(get_ds()); 443 p = (__force const char __user *)buf; 444 if (count > MAX_RW_COUNT) 445 count = MAX_RW_COUNT; 446 if (file->f_op->write) 447 ret = file->f_op->write(file, p, count, pos); 448 else 449 ret = do_sync_write(file, p, count, pos); 450 set_fs(old_fs); 451 if (ret > 0) { 452 fsnotify_modify(file); 453 add_wchar(current, ret); 454 } 455 inc_syscw(current); 456 return ret; 457 } 458 459 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 460 { 461 ssize_t ret; 462 463 if (!(file->f_mode & FMODE_WRITE)) 464 return -EBADF; 465 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 466 return -EINVAL; 467 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 468 return -EFAULT; 469 470 ret = rw_verify_area(WRITE, file, pos, count); 471 if (ret >= 0) { 472 count = ret; 473 file_start_write(file); 474 if (file->f_op->write) 475 ret = file->f_op->write(file, buf, count, pos); 476 else 477 ret = do_sync_write(file, buf, count, pos); 478 if (ret > 0) { 479 fsnotify_modify(file); 480 add_wchar(current, ret); 481 } 482 inc_syscw(current); 483 file_end_write(file); 484 } 485 486 return ret; 487 } 488 489 EXPORT_SYMBOL(vfs_write); 490 491 static inline loff_t file_pos_read(struct file *file) 492 { 493 return file->f_pos; 494 } 495 496 static inline void file_pos_write(struct file *file, loff_t pos) 497 { 498 file->f_pos = pos; 499 } 500 501 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 502 { 503 struct fd f = fdget(fd); 504 ssize_t ret = -EBADF; 505 506 if (f.file) { 507 loff_t pos = file_pos_read(f.file); 508 ret = vfs_read(f.file, buf, count, &pos); 509 if (ret >= 0) 510 file_pos_write(f.file, pos); 511 fdput(f); 512 } 513 return ret; 514 } 515 516 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 517 size_t, count) 518 { 519 struct fd f = fdget(fd); 520 ssize_t ret = -EBADF; 521 522 if (f.file) { 523 loff_t pos = file_pos_read(f.file); 524 ret = vfs_write(f.file, buf, count, &pos); 525 if (ret >= 0) 526 file_pos_write(f.file, pos); 527 fdput(f); 528 } 529 530 return ret; 531 } 532 533 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, 534 size_t, count, loff_t, pos) 535 { 536 struct fd f; 537 ssize_t ret = -EBADF; 538 539 if (pos < 0) 540 return -EINVAL; 541 542 f = fdget(fd); 543 if (f.file) { 544 ret = -ESPIPE; 545 if (f.file->f_mode & FMODE_PREAD) 546 ret = vfs_read(f.file, buf, count, &pos); 547 fdput(f); 548 } 549 550 return ret; 551 } 552 553 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, 554 size_t, count, loff_t, pos) 555 { 556 struct fd f; 557 ssize_t ret = -EBADF; 558 559 if (pos < 0) 560 return -EINVAL; 561 562 f = fdget(fd); 563 if (f.file) { 564 ret = -ESPIPE; 565 if (f.file->f_mode & FMODE_PWRITE) 566 ret = vfs_write(f.file, buf, count, &pos); 567 fdput(f); 568 } 569 570 return ret; 571 } 572 573 /* 574 * Reduce an iovec's length in-place. Return the resulting number of segments 575 */ 576 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 577 { 578 unsigned long seg = 0; 579 size_t len = 0; 580 581 while (seg < nr_segs) { 582 seg++; 583 if (len + iov->iov_len >= to) { 584 iov->iov_len = to - len; 585 break; 586 } 587 len += iov->iov_len; 588 iov++; 589 } 590 return seg; 591 } 592 EXPORT_SYMBOL(iov_shorten); 593 594 static ssize_t do_sync_readv_writev(struct file *filp, const struct iovec *iov, 595 unsigned long nr_segs, size_t len, loff_t *ppos, iov_fn_t fn) 596 { 597 struct kiocb kiocb; 598 ssize_t ret; 599 600 init_sync_kiocb(&kiocb, filp); 601 kiocb.ki_pos = *ppos; 602 kiocb.ki_left = len; 603 kiocb.ki_nbytes = len; 604 605 ret = fn(&kiocb, iov, nr_segs, kiocb.ki_pos); 606 if (ret == -EIOCBQUEUED) 607 ret = wait_on_sync_kiocb(&kiocb); 608 *ppos = kiocb.ki_pos; 609 return ret; 610 } 611 612 /* Do it by hand, with file-ops */ 613 static ssize_t do_loop_readv_writev(struct file *filp, struct iovec *iov, 614 unsigned long nr_segs, loff_t *ppos, io_fn_t fn) 615 { 616 struct iovec *vector = iov; 617 ssize_t ret = 0; 618 619 while (nr_segs > 0) { 620 void __user *base; 621 size_t len; 622 ssize_t nr; 623 624 base = vector->iov_base; 625 len = vector->iov_len; 626 vector++; 627 nr_segs--; 628 629 nr = fn(filp, base, len, ppos); 630 631 if (nr < 0) { 632 if (!ret) 633 ret = nr; 634 break; 635 } 636 ret += nr; 637 if (nr != len) 638 break; 639 } 640 641 return ret; 642 } 643 644 /* A write operation does a read from user space and vice versa */ 645 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 646 647 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, 648 unsigned long nr_segs, unsigned long fast_segs, 649 struct iovec *fast_pointer, 650 struct iovec **ret_pointer) 651 { 652 unsigned long seg; 653 ssize_t ret; 654 struct iovec *iov = fast_pointer; 655 656 /* 657 * SuS says "The readv() function *may* fail if the iovcnt argument 658 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 659 * traditionally returned zero for zero segments, so... 660 */ 661 if (nr_segs == 0) { 662 ret = 0; 663 goto out; 664 } 665 666 /* 667 * First get the "struct iovec" from user memory and 668 * verify all the pointers 669 */ 670 if (nr_segs > UIO_MAXIOV) { 671 ret = -EINVAL; 672 goto out; 673 } 674 if (nr_segs > fast_segs) { 675 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 676 if (iov == NULL) { 677 ret = -ENOMEM; 678 goto out; 679 } 680 } 681 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) { 682 ret = -EFAULT; 683 goto out; 684 } 685 686 /* 687 * According to the Single Unix Specification we should return EINVAL 688 * if an element length is < 0 when cast to ssize_t or if the 689 * total length would overflow the ssize_t return value of the 690 * system call. 691 * 692 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the 693 * overflow case. 694 */ 695 ret = 0; 696 for (seg = 0; seg < nr_segs; seg++) { 697 void __user *buf = iov[seg].iov_base; 698 ssize_t len = (ssize_t)iov[seg].iov_len; 699 700 /* see if we we're about to use an invalid len or if 701 * it's about to overflow ssize_t */ 702 if (len < 0) { 703 ret = -EINVAL; 704 goto out; 705 } 706 if (type >= 0 707 && unlikely(!access_ok(vrfy_dir(type), buf, len))) { 708 ret = -EFAULT; 709 goto out; 710 } 711 if (len > MAX_RW_COUNT - ret) { 712 len = MAX_RW_COUNT - ret; 713 iov[seg].iov_len = len; 714 } 715 ret += len; 716 } 717 out: 718 *ret_pointer = iov; 719 return ret; 720 } 721 722 static ssize_t do_readv_writev(int type, struct file *file, 723 const struct iovec __user * uvector, 724 unsigned long nr_segs, loff_t *pos) 725 { 726 size_t tot_len; 727 struct iovec iovstack[UIO_FASTIOV]; 728 struct iovec *iov = iovstack; 729 ssize_t ret; 730 io_fn_t fn; 731 iov_fn_t fnv; 732 733 if (!file->f_op) { 734 ret = -EINVAL; 735 goto out; 736 } 737 738 ret = rw_copy_check_uvector(type, uvector, nr_segs, 739 ARRAY_SIZE(iovstack), iovstack, &iov); 740 if (ret <= 0) 741 goto out; 742 743 tot_len = ret; 744 ret = rw_verify_area(type, file, pos, tot_len); 745 if (ret < 0) 746 goto out; 747 748 fnv = NULL; 749 if (type == READ) { 750 fn = file->f_op->read; 751 fnv = file->f_op->aio_read; 752 } else { 753 fn = (io_fn_t)file->f_op->write; 754 fnv = file->f_op->aio_write; 755 file_start_write(file); 756 } 757 758 if (fnv) 759 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 760 pos, fnv); 761 else 762 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 763 764 if (type != READ) 765 file_end_write(file); 766 767 out: 768 if (iov != iovstack) 769 kfree(iov); 770 if ((ret + (type == READ)) > 0) { 771 if (type == READ) 772 fsnotify_access(file); 773 else 774 fsnotify_modify(file); 775 } 776 return ret; 777 } 778 779 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 780 unsigned long vlen, loff_t *pos) 781 { 782 if (!(file->f_mode & FMODE_READ)) 783 return -EBADF; 784 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 785 return -EINVAL; 786 787 return do_readv_writev(READ, file, vec, vlen, pos); 788 } 789 790 EXPORT_SYMBOL(vfs_readv); 791 792 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 793 unsigned long vlen, loff_t *pos) 794 { 795 if (!(file->f_mode & FMODE_WRITE)) 796 return -EBADF; 797 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 798 return -EINVAL; 799 800 return do_readv_writev(WRITE, file, vec, vlen, pos); 801 } 802 803 EXPORT_SYMBOL(vfs_writev); 804 805 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 806 unsigned long, vlen) 807 { 808 struct fd f = fdget(fd); 809 ssize_t ret = -EBADF; 810 811 if (f.file) { 812 loff_t pos = file_pos_read(f.file); 813 ret = vfs_readv(f.file, vec, vlen, &pos); 814 if (ret >= 0) 815 file_pos_write(f.file, pos); 816 fdput(f); 817 } 818 819 if (ret > 0) 820 add_rchar(current, ret); 821 inc_syscr(current); 822 return ret; 823 } 824 825 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 826 unsigned long, vlen) 827 { 828 struct fd f = fdget(fd); 829 ssize_t ret = -EBADF; 830 831 if (f.file) { 832 loff_t pos = file_pos_read(f.file); 833 ret = vfs_writev(f.file, vec, vlen, &pos); 834 if (ret >= 0) 835 file_pos_write(f.file, pos); 836 fdput(f); 837 } 838 839 if (ret > 0) 840 add_wchar(current, ret); 841 inc_syscw(current); 842 return ret; 843 } 844 845 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 846 { 847 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 848 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 849 } 850 851 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 852 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 853 { 854 loff_t pos = pos_from_hilo(pos_h, pos_l); 855 struct fd f; 856 ssize_t ret = -EBADF; 857 858 if (pos < 0) 859 return -EINVAL; 860 861 f = fdget(fd); 862 if (f.file) { 863 ret = -ESPIPE; 864 if (f.file->f_mode & FMODE_PREAD) 865 ret = vfs_readv(f.file, vec, vlen, &pos); 866 fdput(f); 867 } 868 869 if (ret > 0) 870 add_rchar(current, ret); 871 inc_syscr(current); 872 return ret; 873 } 874 875 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 876 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 877 { 878 loff_t pos = pos_from_hilo(pos_h, pos_l); 879 struct fd f; 880 ssize_t ret = -EBADF; 881 882 if (pos < 0) 883 return -EINVAL; 884 885 f = fdget(fd); 886 if (f.file) { 887 ret = -ESPIPE; 888 if (f.file->f_mode & FMODE_PWRITE) 889 ret = vfs_writev(f.file, vec, vlen, &pos); 890 fdput(f); 891 } 892 893 if (ret > 0) 894 add_wchar(current, ret); 895 inc_syscw(current); 896 return ret; 897 } 898 899 #ifdef CONFIG_COMPAT 900 901 static ssize_t compat_do_readv_writev(int type, struct file *file, 902 const struct compat_iovec __user *uvector, 903 unsigned long nr_segs, loff_t *pos) 904 { 905 compat_ssize_t tot_len; 906 struct iovec iovstack[UIO_FASTIOV]; 907 struct iovec *iov = iovstack; 908 ssize_t ret; 909 io_fn_t fn; 910 iov_fn_t fnv; 911 912 ret = -EINVAL; 913 if (!file->f_op) 914 goto out; 915 916 ret = -EFAULT; 917 if (!access_ok(VERIFY_READ, uvector, nr_segs*sizeof(*uvector))) 918 goto out; 919 920 ret = compat_rw_copy_check_uvector(type, uvector, nr_segs, 921 UIO_FASTIOV, iovstack, &iov); 922 if (ret <= 0) 923 goto out; 924 925 tot_len = ret; 926 ret = rw_verify_area(type, file, pos, tot_len); 927 if (ret < 0) 928 goto out; 929 930 fnv = NULL; 931 if (type == READ) { 932 fn = file->f_op->read; 933 fnv = file->f_op->aio_read; 934 } else { 935 fn = (io_fn_t)file->f_op->write; 936 fnv = file->f_op->aio_write; 937 file_start_write(file); 938 } 939 940 if (fnv) 941 ret = do_sync_readv_writev(file, iov, nr_segs, tot_len, 942 pos, fnv); 943 else 944 ret = do_loop_readv_writev(file, iov, nr_segs, pos, fn); 945 946 if (type != READ) 947 file_end_write(file); 948 949 out: 950 if (iov != iovstack) 951 kfree(iov); 952 if ((ret + (type == READ)) > 0) { 953 if (type == READ) 954 fsnotify_access(file); 955 else 956 fsnotify_modify(file); 957 } 958 return ret; 959 } 960 961 static size_t compat_readv(struct file *file, 962 const struct compat_iovec __user *vec, 963 unsigned long vlen, loff_t *pos) 964 { 965 ssize_t ret = -EBADF; 966 967 if (!(file->f_mode & FMODE_READ)) 968 goto out; 969 970 ret = -EINVAL; 971 if (!file->f_op || (!file->f_op->aio_read && !file->f_op->read)) 972 goto out; 973 974 ret = compat_do_readv_writev(READ, file, vec, vlen, pos); 975 976 out: 977 if (ret > 0) 978 add_rchar(current, ret); 979 inc_syscr(current); 980 return ret; 981 } 982 983 COMPAT_SYSCALL_DEFINE3(readv, unsigned long, fd, 984 const struct compat_iovec __user *,vec, 985 unsigned long, vlen) 986 { 987 struct fd f = fdget(fd); 988 ssize_t ret; 989 loff_t pos; 990 991 if (!f.file) 992 return -EBADF; 993 pos = f.file->f_pos; 994 ret = compat_readv(f.file, vec, vlen, &pos); 995 if (ret >= 0) 996 f.file->f_pos = pos; 997 fdput(f); 998 return ret; 999 } 1000 1001 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, 1002 const struct compat_iovec __user *,vec, 1003 unsigned long, vlen, loff_t, pos) 1004 { 1005 struct fd f; 1006 ssize_t ret; 1007 1008 if (pos < 0) 1009 return -EINVAL; 1010 f = fdget(fd); 1011 if (!f.file) 1012 return -EBADF; 1013 ret = -ESPIPE; 1014 if (f.file->f_mode & FMODE_PREAD) 1015 ret = compat_readv(f.file, vec, vlen, &pos); 1016 fdput(f); 1017 return ret; 1018 } 1019 1020 COMPAT_SYSCALL_DEFINE5(preadv, unsigned long, fd, 1021 const struct compat_iovec __user *,vec, 1022 unsigned long, vlen, u32, pos_low, u32, pos_high) 1023 { 1024 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1025 return compat_sys_preadv64(fd, vec, vlen, pos); 1026 } 1027 1028 static size_t compat_writev(struct file *file, 1029 const struct compat_iovec __user *vec, 1030 unsigned long vlen, loff_t *pos) 1031 { 1032 ssize_t ret = -EBADF; 1033 1034 if (!(file->f_mode & FMODE_WRITE)) 1035 goto out; 1036 1037 ret = -EINVAL; 1038 if (!file->f_op || (!file->f_op->aio_write && !file->f_op->write)) 1039 goto out; 1040 1041 ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos); 1042 1043 out: 1044 if (ret > 0) 1045 add_wchar(current, ret); 1046 inc_syscw(current); 1047 return ret; 1048 } 1049 1050 COMPAT_SYSCALL_DEFINE3(writev, unsigned long, fd, 1051 const struct compat_iovec __user *, vec, 1052 unsigned long, vlen) 1053 { 1054 struct fd f = fdget(fd); 1055 ssize_t ret; 1056 loff_t pos; 1057 1058 if (!f.file) 1059 return -EBADF; 1060 pos = f.file->f_pos; 1061 ret = compat_writev(f.file, vec, vlen, &pos); 1062 if (ret >= 0) 1063 f.file->f_pos = pos; 1064 fdput(f); 1065 return ret; 1066 } 1067 1068 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, 1069 const struct compat_iovec __user *,vec, 1070 unsigned long, vlen, loff_t, pos) 1071 { 1072 struct fd f; 1073 ssize_t ret; 1074 1075 if (pos < 0) 1076 return -EINVAL; 1077 f = fdget(fd); 1078 if (!f.file) 1079 return -EBADF; 1080 ret = -ESPIPE; 1081 if (f.file->f_mode & FMODE_PWRITE) 1082 ret = compat_writev(f.file, vec, vlen, &pos); 1083 fdput(f); 1084 return ret; 1085 } 1086 1087 COMPAT_SYSCALL_DEFINE5(pwritev, unsigned long, fd, 1088 const struct compat_iovec __user *,vec, 1089 unsigned long, vlen, u32, pos_low, u32, pos_high) 1090 { 1091 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1092 return compat_sys_pwritev64(fd, vec, vlen, pos); 1093 } 1094 #endif 1095 1096 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1097 size_t count, loff_t max) 1098 { 1099 struct fd in, out; 1100 struct inode *in_inode, *out_inode; 1101 loff_t pos; 1102 loff_t out_pos; 1103 ssize_t retval; 1104 int fl; 1105 1106 /* 1107 * Get input file, and verify that it is ok.. 1108 */ 1109 retval = -EBADF; 1110 in = fdget(in_fd); 1111 if (!in.file) 1112 goto out; 1113 if (!(in.file->f_mode & FMODE_READ)) 1114 goto fput_in; 1115 retval = -ESPIPE; 1116 if (!ppos) { 1117 pos = in.file->f_pos; 1118 } else { 1119 pos = *ppos; 1120 if (!(in.file->f_mode & FMODE_PREAD)) 1121 goto fput_in; 1122 } 1123 retval = rw_verify_area(READ, in.file, &pos, count); 1124 if (retval < 0) 1125 goto fput_in; 1126 count = retval; 1127 1128 /* 1129 * Get output file, and verify that it is ok.. 1130 */ 1131 retval = -EBADF; 1132 out = fdget(out_fd); 1133 if (!out.file) 1134 goto fput_in; 1135 if (!(out.file->f_mode & FMODE_WRITE)) 1136 goto fput_out; 1137 retval = -EINVAL; 1138 in_inode = file_inode(in.file); 1139 out_inode = file_inode(out.file); 1140 out_pos = out.file->f_pos; 1141 retval = rw_verify_area(WRITE, out.file, &out_pos, count); 1142 if (retval < 0) 1143 goto fput_out; 1144 count = retval; 1145 1146 if (!max) 1147 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 1148 1149 if (unlikely(pos + count > max)) { 1150 retval = -EOVERFLOW; 1151 if (pos >= max) 1152 goto fput_out; 1153 count = max - pos; 1154 } 1155 1156 fl = 0; 1157 #if 0 1158 /* 1159 * We need to debate whether we can enable this or not. The 1160 * man page documents EAGAIN return for the output at least, 1161 * and the application is arguably buggy if it doesn't expect 1162 * EAGAIN on a non-blocking file descriptor. 1163 */ 1164 if (in.file->f_flags & O_NONBLOCK) 1165 fl = SPLICE_F_NONBLOCK; 1166 #endif 1167 file_start_write(out.file); 1168 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, count, fl); 1169 file_end_write(out.file); 1170 1171 if (retval > 0) { 1172 add_rchar(current, retval); 1173 add_wchar(current, retval); 1174 fsnotify_access(in.file); 1175 fsnotify_modify(out.file); 1176 out.file->f_pos = out_pos; 1177 if (ppos) 1178 *ppos = pos; 1179 else 1180 in.file->f_pos = pos; 1181 } 1182 1183 inc_syscr(current); 1184 inc_syscw(current); 1185 if (pos > max) 1186 retval = -EOVERFLOW; 1187 1188 fput_out: 1189 fdput(out); 1190 fput_in: 1191 fdput(in); 1192 out: 1193 return retval; 1194 } 1195 1196 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 1197 { 1198 loff_t pos; 1199 off_t off; 1200 ssize_t ret; 1201 1202 if (offset) { 1203 if (unlikely(get_user(off, offset))) 1204 return -EFAULT; 1205 pos = off; 1206 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1207 if (unlikely(put_user(pos, offset))) 1208 return -EFAULT; 1209 return ret; 1210 } 1211 1212 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1213 } 1214 1215 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 1216 { 1217 loff_t pos; 1218 ssize_t ret; 1219 1220 if (offset) { 1221 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1222 return -EFAULT; 1223 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1224 if (unlikely(put_user(pos, offset))) 1225 return -EFAULT; 1226 return ret; 1227 } 1228 1229 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1230 } 1231 1232 #ifdef CONFIG_COMPAT 1233 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, 1234 compat_off_t __user *, offset, compat_size_t, count) 1235 { 1236 loff_t pos; 1237 off_t off; 1238 ssize_t ret; 1239 1240 if (offset) { 1241 if (unlikely(get_user(off, offset))) 1242 return -EFAULT; 1243 pos = off; 1244 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1245 if (unlikely(put_user(pos, offset))) 1246 return -EFAULT; 1247 return ret; 1248 } 1249 1250 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1251 } 1252 1253 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, 1254 compat_loff_t __user *, offset, compat_size_t, count) 1255 { 1256 loff_t pos; 1257 ssize_t ret; 1258 1259 if (offset) { 1260 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1261 return -EFAULT; 1262 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1263 if (unlikely(put_user(pos, offset))) 1264 return -EFAULT; 1265 return ret; 1266 } 1267 1268 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1269 } 1270 #endif 1271