1 /* 2 * linux/fs/read_write.c 3 * 4 * Copyright (C) 1991, 1992 Linus Torvalds 5 */ 6 7 #include <linux/slab.h> 8 #include <linux/stat.h> 9 #include <linux/fcntl.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/smp_lock.h> 13 #include <linux/dnotify.h> 14 #include <linux/security.h> 15 #include <linux/module.h> 16 #include <linux/syscalls.h> 17 18 #include <asm/uaccess.h> 19 #include <asm/unistd.h> 20 21 struct file_operations generic_ro_fops = { 22 .llseek = generic_file_llseek, 23 .read = generic_file_read, 24 .mmap = generic_file_readonly_mmap, 25 .sendfile = generic_file_sendfile, 26 }; 27 28 EXPORT_SYMBOL(generic_ro_fops); 29 30 loff_t generic_file_llseek(struct file *file, loff_t offset, int origin) 31 { 32 long long retval; 33 struct inode *inode = file->f_mapping->host; 34 35 down(&inode->i_sem); 36 switch (origin) { 37 case 2: 38 offset += inode->i_size; 39 break; 40 case 1: 41 offset += file->f_pos; 42 } 43 retval = -EINVAL; 44 if (offset>=0 && offset<=inode->i_sb->s_maxbytes) { 45 if (offset != file->f_pos) { 46 file->f_pos = offset; 47 file->f_version = 0; 48 } 49 retval = offset; 50 } 51 up(&inode->i_sem); 52 return retval; 53 } 54 55 EXPORT_SYMBOL(generic_file_llseek); 56 57 loff_t remote_llseek(struct file *file, loff_t offset, int origin) 58 { 59 long long retval; 60 61 lock_kernel(); 62 switch (origin) { 63 case 2: 64 offset += i_size_read(file->f_dentry->d_inode); 65 break; 66 case 1: 67 offset += file->f_pos; 68 } 69 retval = -EINVAL; 70 if (offset>=0 && offset<=file->f_dentry->d_inode->i_sb->s_maxbytes) { 71 if (offset != file->f_pos) { 72 file->f_pos = offset; 73 file->f_version = 0; 74 } 75 retval = offset; 76 } 77 unlock_kernel(); 78 return retval; 79 } 80 EXPORT_SYMBOL(remote_llseek); 81 82 loff_t no_llseek(struct file *file, loff_t offset, int origin) 83 { 84 return -ESPIPE; 85 } 86 EXPORT_SYMBOL(no_llseek); 87 88 loff_t default_llseek(struct file *file, loff_t offset, int origin) 89 { 90 long long retval; 91 92 lock_kernel(); 93 switch (origin) { 94 case 2: 95 offset += i_size_read(file->f_dentry->d_inode); 96 break; 97 case 1: 98 offset += file->f_pos; 99 } 100 retval = -EINVAL; 101 if (offset >= 0) { 102 if (offset != file->f_pos) { 103 file->f_pos = offset; 104 file->f_version = 0; 105 } 106 retval = offset; 107 } 108 unlock_kernel(); 109 return retval; 110 } 111 EXPORT_SYMBOL(default_llseek); 112 113 loff_t vfs_llseek(struct file *file, loff_t offset, int origin) 114 { 115 loff_t (*fn)(struct file *, loff_t, int); 116 117 fn = no_llseek; 118 if (file->f_mode & FMODE_LSEEK) { 119 fn = default_llseek; 120 if (file->f_op && file->f_op->llseek) 121 fn = file->f_op->llseek; 122 } 123 return fn(file, offset, origin); 124 } 125 EXPORT_SYMBOL(vfs_llseek); 126 127 asmlinkage off_t sys_lseek(unsigned int fd, off_t offset, unsigned int origin) 128 { 129 off_t retval; 130 struct file * file; 131 int fput_needed; 132 133 retval = -EBADF; 134 file = fget_light(fd, &fput_needed); 135 if (!file) 136 goto bad; 137 138 retval = -EINVAL; 139 if (origin <= 2) { 140 loff_t res = vfs_llseek(file, offset, origin); 141 retval = res; 142 if (res != (loff_t)retval) 143 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 144 } 145 fput_light(file, fput_needed); 146 bad: 147 return retval; 148 } 149 150 #ifdef __ARCH_WANT_SYS_LLSEEK 151 asmlinkage long sys_llseek(unsigned int fd, unsigned long offset_high, 152 unsigned long offset_low, loff_t __user * result, 153 unsigned int origin) 154 { 155 int retval; 156 struct file * file; 157 loff_t offset; 158 int fput_needed; 159 160 retval = -EBADF; 161 file = fget_light(fd, &fput_needed); 162 if (!file) 163 goto bad; 164 165 retval = -EINVAL; 166 if (origin > 2) 167 goto out_putf; 168 169 offset = vfs_llseek(file, ((loff_t) offset_high << 32) | offset_low, 170 origin); 171 172 retval = (int)offset; 173 if (offset >= 0) { 174 retval = -EFAULT; 175 if (!copy_to_user(result, &offset, sizeof(offset))) 176 retval = 0; 177 } 178 out_putf: 179 fput_light(file, fput_needed); 180 bad: 181 return retval; 182 } 183 #endif 184 185 186 int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count) 187 { 188 struct inode *inode; 189 loff_t pos; 190 191 if (unlikely(count > file->f_maxcount)) 192 goto Einval; 193 pos = *ppos; 194 if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) 195 goto Einval; 196 197 inode = file->f_dentry->d_inode; 198 if (inode->i_flock && MANDATORY_LOCK(inode)) 199 return locks_mandatory_area(read_write == READ ? FLOCK_VERIFY_READ : FLOCK_VERIFY_WRITE, inode, file, pos, count); 200 return 0; 201 202 Einval: 203 return -EINVAL; 204 } 205 206 static void wait_on_retry_sync_kiocb(struct kiocb *iocb) 207 { 208 set_current_state(TASK_UNINTERRUPTIBLE); 209 if (!kiocbIsKicked(iocb)) 210 schedule(); 211 else 212 kiocbClearKicked(iocb); 213 __set_current_state(TASK_RUNNING); 214 } 215 216 ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 217 { 218 struct kiocb kiocb; 219 ssize_t ret; 220 221 init_sync_kiocb(&kiocb, filp); 222 kiocb.ki_pos = *ppos; 223 while (-EIOCBRETRY == 224 (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) 225 wait_on_retry_sync_kiocb(&kiocb); 226 227 if (-EIOCBQUEUED == ret) 228 ret = wait_on_sync_kiocb(&kiocb); 229 *ppos = kiocb.ki_pos; 230 return ret; 231 } 232 233 EXPORT_SYMBOL(do_sync_read); 234 235 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 236 { 237 ssize_t ret; 238 239 if (!(file->f_mode & FMODE_READ)) 240 return -EBADF; 241 if (!file->f_op || (!file->f_op->read && !file->f_op->aio_read)) 242 return -EINVAL; 243 if (unlikely(!access_ok(VERIFY_WRITE, buf, count))) 244 return -EFAULT; 245 246 ret = rw_verify_area(READ, file, pos, count); 247 if (!ret) { 248 ret = security_file_permission (file, MAY_READ); 249 if (!ret) { 250 if (file->f_op->read) 251 ret = file->f_op->read(file, buf, count, pos); 252 else 253 ret = do_sync_read(file, buf, count, pos); 254 if (ret > 0) { 255 dnotify_parent(file->f_dentry, DN_ACCESS); 256 current->rchar += ret; 257 } 258 current->syscr++; 259 } 260 } 261 262 return ret; 263 } 264 265 EXPORT_SYMBOL(vfs_read); 266 267 ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 268 { 269 struct kiocb kiocb; 270 ssize_t ret; 271 272 init_sync_kiocb(&kiocb, filp); 273 kiocb.ki_pos = *ppos; 274 while (-EIOCBRETRY == 275 (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) 276 wait_on_retry_sync_kiocb(&kiocb); 277 278 if (-EIOCBQUEUED == ret) 279 ret = wait_on_sync_kiocb(&kiocb); 280 *ppos = kiocb.ki_pos; 281 return ret; 282 } 283 284 EXPORT_SYMBOL(do_sync_write); 285 286 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 287 { 288 ssize_t ret; 289 290 if (!(file->f_mode & FMODE_WRITE)) 291 return -EBADF; 292 if (!file->f_op || (!file->f_op->write && !file->f_op->aio_write)) 293 return -EINVAL; 294 if (unlikely(!access_ok(VERIFY_READ, buf, count))) 295 return -EFAULT; 296 297 ret = rw_verify_area(WRITE, file, pos, count); 298 if (!ret) { 299 ret = security_file_permission (file, MAY_WRITE); 300 if (!ret) { 301 if (file->f_op->write) 302 ret = file->f_op->write(file, buf, count, pos); 303 else 304 ret = do_sync_write(file, buf, count, pos); 305 if (ret > 0) { 306 dnotify_parent(file->f_dentry, DN_MODIFY); 307 current->wchar += ret; 308 } 309 current->syscw++; 310 } 311 } 312 313 return ret; 314 } 315 316 EXPORT_SYMBOL(vfs_write); 317 318 static inline loff_t file_pos_read(struct file *file) 319 { 320 return file->f_pos; 321 } 322 323 static inline void file_pos_write(struct file *file, loff_t pos) 324 { 325 file->f_pos = pos; 326 } 327 328 asmlinkage ssize_t sys_read(unsigned int fd, char __user * buf, size_t count) 329 { 330 struct file *file; 331 ssize_t ret = -EBADF; 332 int fput_needed; 333 334 file = fget_light(fd, &fput_needed); 335 if (file) { 336 loff_t pos = file_pos_read(file); 337 ret = vfs_read(file, buf, count, &pos); 338 file_pos_write(file, pos); 339 fput_light(file, fput_needed); 340 } 341 342 return ret; 343 } 344 EXPORT_SYMBOL_GPL(sys_read); 345 346 asmlinkage ssize_t sys_write(unsigned int fd, const char __user * buf, size_t count) 347 { 348 struct file *file; 349 ssize_t ret = -EBADF; 350 int fput_needed; 351 352 file = fget_light(fd, &fput_needed); 353 if (file) { 354 loff_t pos = file_pos_read(file); 355 ret = vfs_write(file, buf, count, &pos); 356 file_pos_write(file, pos); 357 fput_light(file, fput_needed); 358 } 359 360 return ret; 361 } 362 363 asmlinkage ssize_t sys_pread64(unsigned int fd, char __user *buf, 364 size_t count, loff_t pos) 365 { 366 struct file *file; 367 ssize_t ret = -EBADF; 368 int fput_needed; 369 370 if (pos < 0) 371 return -EINVAL; 372 373 file = fget_light(fd, &fput_needed); 374 if (file) { 375 ret = -ESPIPE; 376 if (file->f_mode & FMODE_PREAD) 377 ret = vfs_read(file, buf, count, &pos); 378 fput_light(file, fput_needed); 379 } 380 381 return ret; 382 } 383 384 asmlinkage ssize_t sys_pwrite64(unsigned int fd, const char __user *buf, 385 size_t count, loff_t pos) 386 { 387 struct file *file; 388 ssize_t ret = -EBADF; 389 int fput_needed; 390 391 if (pos < 0) 392 return -EINVAL; 393 394 file = fget_light(fd, &fput_needed); 395 if (file) { 396 ret = -ESPIPE; 397 if (file->f_mode & FMODE_PWRITE) 398 ret = vfs_write(file, buf, count, &pos); 399 fput_light(file, fput_needed); 400 } 401 402 return ret; 403 } 404 405 /* 406 * Reduce an iovec's length in-place. Return the resulting number of segments 407 */ 408 unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to) 409 { 410 unsigned long seg = 0; 411 size_t len = 0; 412 413 while (seg < nr_segs) { 414 seg++; 415 if (len + iov->iov_len >= to) { 416 iov->iov_len = to - len; 417 break; 418 } 419 len += iov->iov_len; 420 iov++; 421 } 422 return seg; 423 } 424 425 EXPORT_SYMBOL(iov_shorten); 426 427 /* A write operation does a read from user space and vice versa */ 428 #define vrfy_dir(type) ((type) == READ ? VERIFY_WRITE : VERIFY_READ) 429 430 static ssize_t do_readv_writev(int type, struct file *file, 431 const struct iovec __user * uvector, 432 unsigned long nr_segs, loff_t *pos) 433 { 434 typedef ssize_t (*io_fn_t)(struct file *, char __user *, size_t, loff_t *); 435 typedef ssize_t (*iov_fn_t)(struct file *, const struct iovec *, unsigned long, loff_t *); 436 437 size_t tot_len; 438 struct iovec iovstack[UIO_FASTIOV]; 439 struct iovec *iov=iovstack, *vector; 440 ssize_t ret; 441 int seg; 442 io_fn_t fn; 443 iov_fn_t fnv; 444 445 /* 446 * SuS says "The readv() function *may* fail if the iovcnt argument 447 * was less than or equal to 0, or greater than {IOV_MAX}. Linux has 448 * traditionally returned zero for zero segments, so... 449 */ 450 ret = 0; 451 if (nr_segs == 0) 452 goto out; 453 454 /* 455 * First get the "struct iovec" from user memory and 456 * verify all the pointers 457 */ 458 ret = -EINVAL; 459 if ((nr_segs > UIO_MAXIOV) || (nr_segs <= 0)) 460 goto out; 461 if (!file->f_op) 462 goto out; 463 if (nr_segs > UIO_FASTIOV) { 464 ret = -ENOMEM; 465 iov = kmalloc(nr_segs*sizeof(struct iovec), GFP_KERNEL); 466 if (!iov) 467 goto out; 468 } 469 ret = -EFAULT; 470 if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) 471 goto out; 472 473 /* 474 * Single unix specification: 475 * We should -EINVAL if an element length is not >= 0 and fitting an 476 * ssize_t. The total length is fitting an ssize_t 477 * 478 * Be careful here because iov_len is a size_t not an ssize_t 479 */ 480 tot_len = 0; 481 ret = -EINVAL; 482 for (seg = 0; seg < nr_segs; seg++) { 483 void __user *buf = iov[seg].iov_base; 484 ssize_t len = (ssize_t)iov[seg].iov_len; 485 486 if (len < 0) /* size_t not fitting an ssize_t .. */ 487 goto out; 488 if (unlikely(!access_ok(vrfy_dir(type), buf, len))) 489 goto Efault; 490 tot_len += len; 491 if ((ssize_t)tot_len < 0) /* maths overflow on the ssize_t */ 492 goto out; 493 } 494 if (tot_len == 0) { 495 ret = 0; 496 goto out; 497 } 498 499 ret = rw_verify_area(type, file, pos, tot_len); 500 if (ret) 501 goto out; 502 503 fnv = NULL; 504 if (type == READ) { 505 fn = file->f_op->read; 506 fnv = file->f_op->readv; 507 } else { 508 fn = (io_fn_t)file->f_op->write; 509 fnv = file->f_op->writev; 510 } 511 if (fnv) { 512 ret = fnv(file, iov, nr_segs, pos); 513 goto out; 514 } 515 516 /* Do it by hand, with file-ops */ 517 ret = 0; 518 vector = iov; 519 while (nr_segs > 0) { 520 void __user * base; 521 size_t len; 522 ssize_t nr; 523 524 base = vector->iov_base; 525 len = vector->iov_len; 526 vector++; 527 nr_segs--; 528 529 nr = fn(file, base, len, pos); 530 531 if (nr < 0) { 532 if (!ret) ret = nr; 533 break; 534 } 535 ret += nr; 536 if (nr != len) 537 break; 538 } 539 out: 540 if (iov != iovstack) 541 kfree(iov); 542 if ((ret + (type == READ)) > 0) 543 dnotify_parent(file->f_dentry, 544 (type == READ) ? DN_ACCESS : DN_MODIFY); 545 return ret; 546 Efault: 547 ret = -EFAULT; 548 goto out; 549 } 550 551 ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 552 unsigned long vlen, loff_t *pos) 553 { 554 if (!(file->f_mode & FMODE_READ)) 555 return -EBADF; 556 if (!file->f_op || (!file->f_op->readv && !file->f_op->read)) 557 return -EINVAL; 558 559 return do_readv_writev(READ, file, vec, vlen, pos); 560 } 561 562 EXPORT_SYMBOL(vfs_readv); 563 564 ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 565 unsigned long vlen, loff_t *pos) 566 { 567 if (!(file->f_mode & FMODE_WRITE)) 568 return -EBADF; 569 if (!file->f_op || (!file->f_op->writev && !file->f_op->write)) 570 return -EINVAL; 571 572 return do_readv_writev(WRITE, file, vec, vlen, pos); 573 } 574 575 EXPORT_SYMBOL(vfs_writev); 576 577 asmlinkage ssize_t 578 sys_readv(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) 579 { 580 struct file *file; 581 ssize_t ret = -EBADF; 582 int fput_needed; 583 584 file = fget_light(fd, &fput_needed); 585 if (file) { 586 loff_t pos = file_pos_read(file); 587 ret = vfs_readv(file, vec, vlen, &pos); 588 file_pos_write(file, pos); 589 fput_light(file, fput_needed); 590 } 591 592 if (ret > 0) 593 current->rchar += ret; 594 current->syscr++; 595 return ret; 596 } 597 598 asmlinkage ssize_t 599 sys_writev(unsigned long fd, const struct iovec __user *vec, unsigned long vlen) 600 { 601 struct file *file; 602 ssize_t ret = -EBADF; 603 int fput_needed; 604 605 file = fget_light(fd, &fput_needed); 606 if (file) { 607 loff_t pos = file_pos_read(file); 608 ret = vfs_writev(file, vec, vlen, &pos); 609 file_pos_write(file, pos); 610 fput_light(file, fput_needed); 611 } 612 613 if (ret > 0) 614 current->wchar += ret; 615 current->syscw++; 616 return ret; 617 } 618 619 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 620 size_t count, loff_t max) 621 { 622 struct file * in_file, * out_file; 623 struct inode * in_inode, * out_inode; 624 loff_t pos; 625 ssize_t retval; 626 int fput_needed_in, fput_needed_out; 627 628 /* 629 * Get input file, and verify that it is ok.. 630 */ 631 retval = -EBADF; 632 in_file = fget_light(in_fd, &fput_needed_in); 633 if (!in_file) 634 goto out; 635 if (!(in_file->f_mode & FMODE_READ)) 636 goto fput_in; 637 retval = -EINVAL; 638 in_inode = in_file->f_dentry->d_inode; 639 if (!in_inode) 640 goto fput_in; 641 if (!in_file->f_op || !in_file->f_op->sendfile) 642 goto fput_in; 643 retval = -ESPIPE; 644 if (!ppos) 645 ppos = &in_file->f_pos; 646 else 647 if (!(in_file->f_mode & FMODE_PREAD)) 648 goto fput_in; 649 retval = rw_verify_area(READ, in_file, ppos, count); 650 if (retval) 651 goto fput_in; 652 653 retval = security_file_permission (in_file, MAY_READ); 654 if (retval) 655 goto fput_in; 656 657 /* 658 * Get output file, and verify that it is ok.. 659 */ 660 retval = -EBADF; 661 out_file = fget_light(out_fd, &fput_needed_out); 662 if (!out_file) 663 goto fput_in; 664 if (!(out_file->f_mode & FMODE_WRITE)) 665 goto fput_out; 666 retval = -EINVAL; 667 if (!out_file->f_op || !out_file->f_op->sendpage) 668 goto fput_out; 669 out_inode = out_file->f_dentry->d_inode; 670 retval = rw_verify_area(WRITE, out_file, &out_file->f_pos, count); 671 if (retval) 672 goto fput_out; 673 674 retval = security_file_permission (out_file, MAY_WRITE); 675 if (retval) 676 goto fput_out; 677 678 if (!max) 679 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 680 681 pos = *ppos; 682 retval = -EINVAL; 683 if (unlikely(pos < 0)) 684 goto fput_out; 685 if (unlikely(pos + count > max)) { 686 retval = -EOVERFLOW; 687 if (pos >= max) 688 goto fput_out; 689 count = max - pos; 690 } 691 692 retval = in_file->f_op->sendfile(in_file, ppos, count, file_send_actor, out_file); 693 694 if (retval > 0) { 695 current->rchar += retval; 696 current->wchar += retval; 697 } 698 current->syscr++; 699 current->syscw++; 700 701 if (*ppos > max) 702 retval = -EOVERFLOW; 703 704 fput_out: 705 fput_light(out_file, fput_needed_out); 706 fput_in: 707 fput_light(in_file, fput_needed_in); 708 out: 709 return retval; 710 } 711 712 asmlinkage ssize_t sys_sendfile(int out_fd, int in_fd, off_t __user *offset, size_t count) 713 { 714 loff_t pos; 715 off_t off; 716 ssize_t ret; 717 718 if (offset) { 719 if (unlikely(get_user(off, offset))) 720 return -EFAULT; 721 pos = off; 722 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 723 if (unlikely(put_user(pos, offset))) 724 return -EFAULT; 725 return ret; 726 } 727 728 return do_sendfile(out_fd, in_fd, NULL, count, 0); 729 } 730 731 asmlinkage ssize_t sys_sendfile64(int out_fd, int in_fd, loff_t __user *offset, size_t count) 732 { 733 loff_t pos; 734 ssize_t ret; 735 736 if (offset) { 737 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 738 return -EFAULT; 739 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 740 if (unlikely(put_user(pos, offset))) 741 return -EFAULT; 742 return ret; 743 } 744 745 return do_sendfile(out_fd, in_fd, NULL, count, 0); 746 } 747