1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/smp_lock.h> 13 #include <linux/fsnotify.h> 14 #include <linux/security.h> 15 #include <linux/module.h> 16 #include <linux/syscalls.h> 17 18 #include <asm/uaccess.h> 19 #include <asm/unistd.h> 20 21 struct file_operations generic_ro_fops = { 22 .llseek = generic_file_llseek, 23 .read = generic_file_read, 24 .mmap = generic_file_readonly_mmap, 25 .sendfile = generic_file_sendfile, 26 }; 27 28 EXPORT_SYMBOL(generic_ro_fops); 29 30 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 31 { 32 long long retval; 33 struct inode *inode = file->f_mapping->host; 34 35 down(&inode->i_sem); 36 switch (origin) { 37 case 2: 38 offset += inode->i_size; 39 break; 40 case 1: 41 offset += file->f_pos; 42 } 43 retval = -EINVAL; 44 if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { 45 if (offset != file->f_pos) { 46 file->f_pos = offset; 47 file->f_version = 0; 48 } 49 retval = offset; 50 } 51 up(&inode->i_sem); 52 return retval; 53 } 54 55 EXPORT_SYMBOL(generic_file_llseek); 56 57 loff_t remote_llseek(struct file *file, loff_t offset, int origin) 58 { 59 long long retval; 60 61 lock_kernel(); 62 switch (origin) { 63 case 2: 64 offset += i_size_read(file->f_dentry->d_inode); 65 break; 66 case 1: 67 offset += file->f_pos; 68 } 69 retval = -EINVAL; 70 if (offset>=0 && offset<=file->f_dentry->d_inode->i_sb->s_maxbytes) { 71 if (offset != file->f_pos) { 72 file->f_pos = offset; 73 file->f_version = 0; 74 } 75 retval = offset; 76 } 77 unlock_kernel(); 78 return retval; 79 } 80 EXPORT_SYMBOL(remote_llseek); 81 82 loff_t no_llseek(struct file *file, loff_t offset, int origin) 83 { 84 return -ESPIPE; 85 } 86 EXPORT_SYMBOL(no_llseek); 87 88 loff_t default_llseek(struct file *file, loff_t offset, int origin) 89 { 90 long long retval; 91 92 lock_kernel(); 93 switch (origin) { 94 case 2: 95 offset += i_size_read(file->f_dentry->d_inode); 96 break; 97 case 1: 98 offset += file->f_pos; 99 } 100 retval = -EINVAL; 101 if (offset >= 0) { 102 if (offset != file->f_pos) { 103 file->f_pos = offset; 104 file->f_version = 0; 105 } 106 retval = offset; 107 } 108 unlock_kernel(); 109 return retval; 110 } 111 EXPORT_SYMBOL(default_llseek); 112 113 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 114 { 115 loff_t (*fn)(struct file *, loff_t, int); 116 117 fn = no_llseek; 118 if (file->f_mode & FMODE_LSEEK) { 119 fn = default_llseek; 120 if (file->f_op && file->f_op->llseek) 121 fn = file->f_op->llseek; 122 } 123 return fn(file, offset, origin); 124 } 125 EXPORT_SYMBOL(vfs_llseek); 126 127 asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) 128 { 129 off_t retval; 130 struct file * file; 131 int fput_needed; 132 133 retval = -EBADF; 134 file = fget_light(fd, &fput_needed); 135 if (!file) 136 goto bad; 137 138 retval = -EINVAL; 139 if (origin <= 2) { 140 loff_t res = vfs_llseek(file, offset, origin); 141 retval = res; 142 if (res != (loff_t)retval) 143 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 144 } 145 fput_light(file, fput_needed); 146 bad: 147 return retval; 148 } 149 150 #ifdef __ARCH_WANT_SYS_LLSEEK 151 asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, 152 unsigned long offset_low, loff_t __user * result, 153 unsigned int origin) 154 { 155 int retval; 156 struct file * file; 157 loff_t offset; 158 int fput_needed; 159 160 retval = -EBADF; 161 file = fget_light(fd, &fput_needed); 162 if (!file) 163 goto bad; 164 165 retval = -EINVAL; 166 if (origin > 2) 167 goto out_putf; 168 169 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 170 origin); 171 172 retval = (int)offset; 173 if (offset >= 0) { 174 retval = -EFAULT; 175 if (!copy_to_user(result, &offset, sizeof(offset))) 176 retval = 0; 177 } 178 out_putf: 179 fput_light(file, fput_needed); 180 bad: 181 return retval; 182 } 183 #endif 184 185 186 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 187 { 188 struct inode *inode; 189 loff_t pos; 190 191 if (unlikely(count > file->f_maxcount)) 192 goto Einval; 193 pos = *ppos; 194 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 195 goto Einval; 196 197 inode = file->f_dentry->d_inode; 198 if (inode->i_flock && MANDATORY_LOCK(inode)) 199 return locks_mandatory_area(read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, pos, count); 200 return 0; 201 202 Einval: 203 return -EINVAL; 204 } 205 206 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 207 { 208 set_current_state(TASK_UNINTERRUPTIBLE); 209 if (!kiocbIsKicked(iocb)) 210 schedule(); 211 else 212 kiocbClearKicked(iocb); 213 __set_current_state(TASK_RUNNING); 214 } 215 216 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 217 { 218 struct kiocb kiocb; 219 ssize_t ret; 220 221 init_sync_kiocb(&kiocb, filp); 222 kiocb.ki_pos = *ppos; 223 while (-EIOCBRETRY == 224 (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) 225 wait_on_retry_sync_kiocb(&kiocb); 226 227 if (-EIOCBQUEUED == ret) 228 ret = wait_on_sync_kiocb(&kiocb); 229 *ppos = kiocb.ki_pos; 230 return ret; 231 } 232 233 EXPORT_SYMBOL(do_sync_read); 234 235 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 236 { 237 ssize_t ret; 238 239 if (!(file->f_mode & FMODE_READ)) 240 return -EBADF; 241 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 242 return -EINVAL; 243 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 244 return -EFAULT; 245 246 ret = rw_verify_area(READ, file, pos, count); 247 if (!ret) { 248 ret = security_file_permission (file, MAY_READ); 249 if (!ret) { 250 if (file->f_op->read) 251 ret = file->f_op->read(file, buf, count, pos); 252 else 253 ret = do_sync_read(file, buf, count, pos); 254 if (ret > 0) { 255 fsnotify_access(file->f_dentry); 256 current->rchar += ret; 257 } 258 current->syscr++; 259 } 260 } 261 262 return ret; 263 } 264 265 EXPORT_SYMBOL(vfs_read); 266 267 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 268 { 269 struct kiocb kiocb; 270 ssize_t ret; 271 272 init_sync_kiocb(&kiocb, filp); 273 kiocb.ki_pos = *ppos; 274 while (-EIOCBRETRY == 275 (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) 276 wait_on_retry_sync_kiocb(&kiocb); 277 278 if (-EIOCBQUEUED == ret) 279 ret = wait_on_sync_kiocb(&kiocb); 280 *ppos = kiocb.ki_pos; 281 return ret; 282 } 283 284 EXPORT_SYMBOL(do_sync_write); 285 286 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 287 { 288 ssize_t ret; 289 290 if (!(file->f_mode & FMODE_WRITE)) 291 return -EBADF; 292 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 293 return -EINVAL; 294 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 295 return -EFAULT; 296 297 ret = rw_verify_area(WRITE, file, pos, count); 298 if (!ret) { 299 ret = security_file_permission (file, MAY_WRITE); 300 if (!ret) { 301 if (file->f_op->write) 302 ret = file->f_op->write(file, buf, count, pos); 303 else 304 ret = do_sync_write(file, buf, count, pos); 305 if (ret > 0) { 306 fsnotify_modify(file->f_dentry); 307 current->wchar += ret; 308 } 309 current->syscw++; 310 } 311 } 312 313 return ret; 314 } 315 316 EXPORT_SYMBOL(vfs_write); 317 318 static inline loff_t file_pos_read(struct file *file) 319 { 320 return file->f_pos; 321 } 322 323 static inline void file_pos_write(struct file *file, loff_t pos) 324 { 325 file->f_pos = pos; 326 } 327 328 asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) 329 { 330 struct file *file; 331 ssize_t ret = -EBADF; 332 int fput_needed; 333 334 file = fget_light(fd, &fput_needed); 335 if (file) { 336 loff_t pos = file_pos_read(file); 337 ret = vfs_read(file, buf, count, &pos); 338 file_pos_write(file, pos); 339 fput_light(file, fput_needed); 340 } 341 342 return ret; 343 } 344 EXPORT_SYMBOL_GPL(sys_read); 345 346 asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) 347 { 348 struct file *file; 349 ssize_t ret = -EBADF; 350 int fput_needed; 351 352 file = fget_light(fd, &fput_needed); 353 if (file) { 354 loff_t pos = file_pos_read(file); 355 ret = vfs_write(file, buf, count, &pos); 356 file_pos_write(file, pos); 357 fput_light(file, fput_needed); 358 } 359 360 return ret; 361 } 362 363 asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, 364 size_t count, loff_t pos) 365 { 366 struct file *file; 367 ssize_t ret = -EBADF; 368 int fput_needed; 369 370 if (pos < 0) 371 return -EINVAL; 372 373 file = fget_light(fd, &fput_needed); 374 if (file) { 375 ret = -ESPIPE; 376 if (file->f_mode & FMODE_PREAD) 377 ret = vfs_read(file, buf, count, &pos); 378 fput_light(file, fput_needed); 379 } 380 381 return ret; 382 } 383 384 asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, 385 size_t count, loff_t pos) 386 { 387 struct file *file; 388 ssize_t ret = -EBADF; 389 int fput_needed; 390 391 if (pos < 0) 392 return -EINVAL; 393 394 file = fget_light(fd, &fput_needed); 395 if (file) { 396 ret = -ESPIPE; 397 if (file->f_mode & FMODE_PWRITE) 398 ret = vfs_write(file, buf, count, &pos); 399 fput_light(file, fput_needed); 400 } 401 402 return ret; 403 } 404 405 /* 406 * Reduce an iovec's length in-place. Return the resulting number of segments 407 */ 408 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 409 { 410 unsigned long seg = 0; 411 size_t len = 0; 412 413 while (seg < nr_segs) { 414 seg++; 415 if (len + iov->iov_len >= to) { 416 iov->iov_len = to - len; 417 break; 418 } 419 len += iov->iov_len; 420 iov++; 421 } 422 return seg; 423 } 424 425 EXPORT_SYMBOL(iov_shorten); 426 427 /* A write operation does a read from user space and vice versa */ 428 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 429 430 static ssize_t do_readv_writev(int type, struct file *file, 431 const struct iovec __user * uvector, 432 unsigned long nr_segs, loff_t *pos) 433 { 434 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 435 typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); 436 437 size_t tot_len; 438 struct iovec iovstack[UIO_FASTIOV]; 439 struct iovec *iov=iovstack, *vector; 440 ssize_t ret; 441 int seg; 442 io_fn_t fn; 443 iov_fn_t fnv; 444 445 /* 446 * SuS says "The readv() function *may* fail if the iovcnt argument 447 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 448 * traditionally returned zero for zero segments, so... 449 */ 450 ret = 0; 451 if (nr_segs == 0) 452 goto out; 453 454 /* 455 * First get the "struct iovec" from user memory and 456 * verify all the pointers 457 */ 458 ret = -EINVAL; 459 if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) 460 goto out; 461 if (!file->f_op) 462 goto out; 463 if (nr_segs > UIO_FASTIOV) { 464 ret = -ENOMEM; 465 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 466 if (!iov) 467 goto out; 468 } 469 ret = -EFAULT; 470 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) 471 goto out; 472 473 /* 474 * Single unix specification: 475 * We should -EINVAL if an element length is not >= 0 and fitting an 476 * ssize_t. The total length is fitting an ssize_t 477 * 478 * Be careful here because iov_len is a size_t not an ssize_t 479 */ 480 tot_len = 0; 481 ret = -EINVAL; 482 for (seg = 0; seg < nr_segs; seg++) { 483 void __user *buf = iov[seg].iov_base; 484 ssize_t len = (ssize_t)iov[seg].iov_len; 485 486 if (len < 0) /* size_t not fitting an ssize_t .. */ 487 goto out; 488 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) 489 goto Efault; 490 tot_len += len; 491 if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */ 492 goto out; 493 } 494 if (tot_len == 0) { 495 ret = 0; 496 goto out; 497 } 498 499 ret = rw_verify_area(type, file, pos, tot_len); 500 if (ret) 501 goto out; 502 503 fnv = NULL; 504 if (type == READ) { 505 fn = file->f_op->read; 506 fnv = file->f_op->readv; 507 } else { 508 fn = (io_fn_t)file->f_op->write; 509 fnv = file->f_op->writev; 510 } 511 if (fnv) { 512 ret = fnv(file, iov, nr_segs, pos); 513 goto out; 514 } 515 516 /* Do it by hand, with file-ops */ 517 ret = 0; 518 vector = iov; 519 while (nr_segs > 0) { 520 void __user * base; 521 size_t len; 522 ssize_t nr; 523 524 base = vector->iov_base; 525 len = vector->iov_len; 526 vector++; 527 nr_segs--; 528 529 nr = fn(file, base, len, pos); 530 531 if (nr < 0) { 532 if (!ret) ret = nr; 533 break; 534 } 535 ret += nr; 536 if (nr != len) 537 break; 538 } 539 out: 540 if (iov != iovstack) 541 kfree(iov); 542 if ((ret + (type == READ)) > 0) { 543 if (type == READ) 544 fsnotify_access(file->f_dentry); 545 else 546 fsnotify_modify(file->f_dentry); 547 } 548 return ret; 549 Efault: 550 ret = -EFAULT; 551 goto out; 552 } 553 554 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 555 unsigned long vlen, loff_t *pos) 556 { 557 if (!(file->f_mode & FMODE_READ)) 558 return -EBADF; 559 if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) 560 return -EINVAL; 561 562 return do_readv_writev(READ, file, vec, vlen, pos); 563 } 564 565 EXPORT_SYMBOL(vfs_readv); 566 567 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 568 unsigned long vlen, loff_t *pos) 569 { 570 if (!(file->f_mode & FMODE_WRITE)) 571 return -EBADF; 572 if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) 573 return -EINVAL; 574 575 return do_readv_writev(WRITE, file, vec, vlen, pos); 576 } 577 578 EXPORT_SYMBOL(vfs_writev); 579 580 asmlinkage ssize_t 581 sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) 582 { 583 struct file *file; 584 ssize_t ret = -EBADF; 585 int fput_needed; 586 587 file = fget_light(fd, &fput_needed); 588 if (file) { 589 loff_t pos = file_pos_read(file); 590 ret = vfs_readv(file, vec, vlen, &pos); 591 file_pos_write(file, pos); 592 fput_light(file, fput_needed); 593 } 594 595 if (ret > 0) 596 current->rchar += ret; 597 current->syscr++; 598 return ret; 599 } 600 601 asmlinkage ssize_t 602 sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) 603 { 604 struct file *file; 605 ssize_t ret = -EBADF; 606 int fput_needed; 607 608 file = fget_light(fd, &fput_needed); 609 if (file) { 610 loff_t pos = file_pos_read(file); 611 ret = vfs_writev(file, vec, vlen, &pos); 612 file_pos_write(file, pos); 613 fput_light(file, fput_needed); 614 } 615 616 if (ret > 0) 617 current->wchar += ret; 618 current->syscw++; 619 return ret; 620 } 621 622 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 623 size_t count, loff_t max) 624 { 625 struct file * in_file, * out_file; 626 struct inode * in_inode, * out_inode; 627 loff_t pos; 628 ssize_t retval; 629 int fput_needed_in, fput_needed_out; 630 631 /* 632 * Get input file, and verify that it is ok.. 633 */ 634 retval = -EBADF; 635 in_file = fget_light(in_fd, &fput_needed_in); 636 if (!in_file) 637 goto out; 638 if (!(in_file->f_mode & FMODE_READ)) 639 goto fput_in; 640 retval = -EINVAL; 641 in_inode = in_file->f_dentry->d_inode; 642 if (!in_inode) 643 goto fput_in; 644 if (!in_file->f_op || !in_file->f_op->sendfile) 645 goto fput_in; 646 retval = -ESPIPE; 647 if (!ppos) 648 ppos = &in_file->f_pos; 649 else 650 if (!(in_file->f_mode & FMODE_PREAD)) 651 goto fput_in; 652 retval = rw_verify_area(READ, in_file, ppos, count); 653 if (retval) 654 goto fput_in; 655 656 retval = security_file_permission (in_file, MAY_READ); 657 if (retval) 658 goto fput_in; 659 660 /* 661 * Get output file, and verify that it is ok.. 662 */ 663 retval = -EBADF; 664 out_file = fget_light(out_fd, &fput_needed_out); 665 if (!out_file) 666 goto fput_in; 667 if (!(out_file->f_mode & FMODE_WRITE)) 668 goto fput_out; 669 retval = -EINVAL; 670 if (!out_file->f_op || !out_file->f_op->sendpage) 671 goto fput_out; 672 out_inode = out_file->f_dentry->d_inode; 673 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 674 if (retval) 675 goto fput_out; 676 677 retval = security_file_permission (out_file, MAY_WRITE); 678 if (retval) 679 goto fput_out; 680 681 if (!max) 682 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 683 684 pos = *ppos; 685 retval = -EINVAL; 686 if (unlikely(pos < 0)) 687 goto fput_out; 688 if (unlikely(pos + count > max)) { 689 retval = -EOVERFLOW; 690 if (pos >= max) 691 goto fput_out; 692 count = max - pos; 693 } 694 695 retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); 696 697 if (retval > 0) { 698 current->rchar += retval; 699 current->wchar += retval; 700 } 701 current->syscr++; 702 current->syscw++; 703 704 if (*ppos > max) 705 retval = -EOVERFLOW; 706 707 fput_out: 708 fput_light(out_file, fput_needed_out); 709 fput_in: 710 fput_light(in_file, fput_needed_in); 711 out: 712 return retval; 713 } 714 715 asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) 716 { 717 loff_t pos; 718 off_t off; 719 ssize_t ret; 720 721 if (offset) { 722 if (unlikely(get_user(off, offset))) 723 return -EFAULT; 724 pos = off; 725 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 726 if (unlikely(put_user(pos, offset))) 727 return -EFAULT; 728 return ret; 729 } 730 731 return do_sendfile(out_fd, in_fd, NULL, count, 0); 732 } 733 734 asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) 735 { 736 loff_t pos; 737 ssize_t ret; 738 739 if (offset) { 740 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 741 return -EFAULT; 742 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 743 if (unlikely(put_user(pos, offset))) 744 return -EFAULT; 745 return ret; 746 } 747 748 return do_sendfile(out_fd, in_fd, NULL, count, 0); 749 } 750