1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/read_write.c 4 * 5 * Copyright (C) 1991, 1992 Linus Torvalds 6 */ 7 8 #include <linux/slab.h> 9 #include <linux/stat.h> 10 #include <linux/sched/xacct.h> 11 #include <linux/fcntl.h> 12 #include <linux/file.h> 13 #include <linux/uio.h> 14 #include <linux/fsnotify.h> 15 #include <linux/security.h> 16 #include <linux/export.h> 17 #include <linux/syscalls.h> 18 #include <linux/pagemap.h> 19 #include <linux/splice.h> 20 #include <linux/compat.h> 21 #include <linux/mount.h> 22 #include <linux/fs.h> 23 #include "internal.h" 24 25 #include <linux/uaccess.h> 26 #include <asm/unistd.h> 27 28 const struct file_operations generic_ro_fops = { 29 .llseek = generic_file_llseek, 30 .read_iter = generic_file_read_iter, 31 .mmap = generic_file_readonly_mmap, 32 .splice_read = generic_file_splice_read, 33 }; 34 35 EXPORT_SYMBOL(generic_ro_fops); 36 37 static inline bool unsigned_offsets(struct file *file) 38 { 39 return file->f_mode & FMODE_UNSIGNED_OFFSET; 40 } 41 42 /** 43 * vfs_setpos - update the file offset for lseek 44 * @file: file structure in question 45 * @offset: file offset to seek to 46 * @maxsize: maximum file size 47 * 48 * This is a low-level filesystem helper for updating the file offset to 49 * the value specified by @offset if the given offset is valid and it is 50 * not equal to the current file offset. 51 * 52 * Return the specified offset on success and -EINVAL on invalid offset. 53 */ 54 loff_t vfs_setpos(struct file *file, loff_t offset, loff_t maxsize) 55 { 56 if (offset < 0 && !unsigned_offsets(file)) 57 return -EINVAL; 58 if (offset > maxsize) 59 return -EINVAL; 60 61 if (offset != file->f_pos) { 62 file->f_pos = offset; 63 file->f_version = 0; 64 } 65 return offset; 66 } 67 EXPORT_SYMBOL(vfs_setpos); 68 69 /** 70 * generic_file_llseek_size - generic llseek implementation for regular files 71 * @file: file structure to seek on 72 * @offset: file offset to seek to 73 * @whence: type of seek 74 * @size: max size of this file in file system 75 * @eof: offset used for SEEK_END position 76 * 77 * This is a variant of generic_file_llseek that allows passing in a custom 78 * maximum file size and a custom EOF position, for e.g. hashed directories 79 * 80 * Synchronization: 81 * SEEK_SET and SEEK_END are unsynchronized (but atomic on 64bit platforms) 82 * SEEK_CUR is synchronized against other SEEK_CURs, but not read/writes. 83 * read/writes behave like SEEK_SET against seeks. 84 */ 85 loff_t 86 generic_file_llseek_size(struct file *file, loff_t offset, int whence, 87 loff_t maxsize, loff_t eof) 88 { 89 switch (whence) { 90 case SEEK_END: 91 offset += eof; 92 break; 93 case SEEK_CUR: 94 /* 95 * Here we special-case the lseek(fd, 0, SEEK_CUR) 96 * position-querying operation. Avoid rewriting the "same" 97 * f_pos value back to the file because a concurrent read(), 98 * write() or lseek() might have altered it 99 */ 100 if (offset == 0) 101 return file->f_pos; 102 /* 103 * f_lock protects against read/modify/write race with other 104 * SEEK_CURs. Note that parallel writes and reads behave 105 * like SEEK_SET. 106 */ 107 spin_lock(&file->f_lock); 108 offset = vfs_setpos(file, file->f_pos + offset, maxsize); 109 spin_unlock(&file->f_lock); 110 return offset; 111 case SEEK_DATA: 112 /* 113 * In the generic case the entire file is data, so as long as 114 * offset isn't at the end of the file then the offset is data. 115 */ 116 if ((unsigned long long)offset >= eof) 117 return -ENXIO; 118 break; 119 case SEEK_HOLE: 120 /* 121 * There is a virtual hole at the end of the file, so as long as 122 * offset isn't i_size or larger, return i_size. 123 */ 124 if ((unsigned long long)offset >= eof) 125 return -ENXIO; 126 offset = eof; 127 break; 128 } 129 130 return vfs_setpos(file, offset, maxsize); 131 } 132 EXPORT_SYMBOL(generic_file_llseek_size); 133 134 /** 135 * generic_file_llseek - generic llseek implementation for regular files 136 * @file: file structure to seek on 137 * @offset: file offset to seek to 138 * @whence: type of seek 139 * 140 * This is a generic implemenation of ->llseek useable for all normal local 141 * filesystems. It just updates the file offset to the value specified by 142 * @offset and @whence. 143 */ 144 loff_t generic_file_llseek(struct file *file, loff_t offset, int whence) 145 { 146 struct inode *inode = file->f_mapping->host; 147 148 return generic_file_llseek_size(file, offset, whence, 149 inode->i_sb->s_maxbytes, 150 i_size_read(inode)); 151 } 152 EXPORT_SYMBOL(generic_file_llseek); 153 154 /** 155 * fixed_size_llseek - llseek implementation for fixed-sized devices 156 * @file: file structure to seek on 157 * @offset: file offset to seek to 158 * @whence: type of seek 159 * @size: size of the file 160 * 161 */ 162 loff_t fixed_size_llseek(struct file *file, loff_t offset, int whence, loff_t size) 163 { 164 switch (whence) { 165 case SEEK_SET: case SEEK_CUR: case SEEK_END: 166 return generic_file_llseek_size(file, offset, whence, 167 size, size); 168 default: 169 return -EINVAL; 170 } 171 } 172 EXPORT_SYMBOL(fixed_size_llseek); 173 174 /** 175 * no_seek_end_llseek - llseek implementation for fixed-sized devices 176 * @file: file structure to seek on 177 * @offset: file offset to seek to 178 * @whence: type of seek 179 * 180 */ 181 loff_t no_seek_end_llseek(struct file *file, loff_t offset, int whence) 182 { 183 switch (whence) { 184 case SEEK_SET: case SEEK_CUR: 185 return generic_file_llseek_size(file, offset, whence, 186 OFFSET_MAX, 0); 187 default: 188 return -EINVAL; 189 } 190 } 191 EXPORT_SYMBOL(no_seek_end_llseek); 192 193 /** 194 * no_seek_end_llseek_size - llseek implementation for fixed-sized devices 195 * @file: file structure to seek on 196 * @offset: file offset to seek to 197 * @whence: type of seek 198 * @size: maximal offset allowed 199 * 200 */ 201 loff_t no_seek_end_llseek_size(struct file *file, loff_t offset, int whence, loff_t size) 202 { 203 switch (whence) { 204 case SEEK_SET: case SEEK_CUR: 205 return generic_file_llseek_size(file, offset, whence, 206 size, 0); 207 default: 208 return -EINVAL; 209 } 210 } 211 EXPORT_SYMBOL(no_seek_end_llseek_size); 212 213 /** 214 * noop_llseek - No Operation Performed llseek implementation 215 * @file: file structure to seek on 216 * @offset: file offset to seek to 217 * @whence: type of seek 218 * 219 * This is an implementation of ->llseek useable for the rare special case when 220 * userspace expects the seek to succeed but the (device) file is actually not 221 * able to perform the seek. In this case you use noop_llseek() instead of 222 * falling back to the default implementation of ->llseek. 223 */ 224 loff_t noop_llseek(struct file *file, loff_t offset, int whence) 225 { 226 return file->f_pos; 227 } 228 EXPORT_SYMBOL(noop_llseek); 229 230 loff_t default_llseek(struct file *file, loff_t offset, int whence) 231 { 232 struct inode *inode = file_inode(file); 233 loff_t retval; 234 235 inode_lock(inode); 236 switch (whence) { 237 case SEEK_END: 238 offset += i_size_read(inode); 239 break; 240 case SEEK_CUR: 241 if (offset == 0) { 242 retval = file->f_pos; 243 goto out; 244 } 245 offset += file->f_pos; 246 break; 247 case SEEK_DATA: 248 /* 249 * In the generic case the entire file is data, so as 250 * long as offset isn't at the end of the file then the 251 * offset is data. 252 */ 253 if (offset >= inode->i_size) { 254 retval = -ENXIO; 255 goto out; 256 } 257 break; 258 case SEEK_HOLE: 259 /* 260 * There is a virtual hole at the end of the file, so 261 * as long as offset isn't i_size or larger, return 262 * i_size. 263 */ 264 if (offset >= inode->i_size) { 265 retval = -ENXIO; 266 goto out; 267 } 268 offset = inode->i_size; 269 break; 270 } 271 retval = -EINVAL; 272 if (offset >= 0 || unsigned_offsets(file)) { 273 if (offset != file->f_pos) { 274 file->f_pos = offset; 275 file->f_version = 0; 276 } 277 retval = offset; 278 } 279 out: 280 inode_unlock(inode); 281 return retval; 282 } 283 EXPORT_SYMBOL(default_llseek); 284 285 loff_t vfs_llseek(struct file *file, loff_t offset, int whence) 286 { 287 if (!(file->f_mode & FMODE_LSEEK)) 288 return -ESPIPE; 289 return file->f_op->llseek(file, offset, whence); 290 } 291 EXPORT_SYMBOL(vfs_llseek); 292 293 static off_t ksys_lseek(unsigned int fd, off_t offset, unsigned int whence) 294 { 295 off_t retval; 296 struct fd f = fdget_pos(fd); 297 if (!f.file) 298 return -EBADF; 299 300 retval = -EINVAL; 301 if (whence <= SEEK_MAX) { 302 loff_t res = vfs_llseek(f.file, offset, whence); 303 retval = res; 304 if (res != (loff_t)retval) 305 retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */ 306 } 307 fdput_pos(f); 308 return retval; 309 } 310 311 SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence) 312 { 313 return ksys_lseek(fd, offset, whence); 314 } 315 316 #ifdef CONFIG_COMPAT 317 COMPAT_SYSCALL_DEFINE3(lseek, unsigned int, fd, compat_off_t, offset, unsigned int, whence) 318 { 319 return ksys_lseek(fd, offset, whence); 320 } 321 #endif 322 323 #if !defined(CONFIG_64BIT) || defined(CONFIG_COMPAT) || \ 324 defined(__ARCH_WANT_SYS_LLSEEK) 325 SYSCALL_DEFINE5(llseek, unsigned int, fd, unsigned long, offset_high, 326 unsigned long, offset_low, loff_t __user *, result, 327 unsigned int, whence) 328 { 329 int retval; 330 struct fd f = fdget_pos(fd); 331 loff_t offset; 332 333 if (!f.file) 334 return -EBADF; 335 336 retval = -EINVAL; 337 if (whence > SEEK_MAX) 338 goto out_putf; 339 340 offset = vfs_llseek(f.file, ((loff_t) offset_high << 32) | offset_low, 341 whence); 342 343 retval = (int)offset; 344 if (offset >= 0) { 345 retval = -EFAULT; 346 if (!copy_to_user(result, &offset, sizeof(offset))) 347 retval = 0; 348 } 349 out_putf: 350 fdput_pos(f); 351 return retval; 352 } 353 #endif 354 355 int rw_verify_area(int read_write, struct file *file, const loff_t *ppos, size_t count) 356 { 357 if (unlikely((ssize_t) count < 0)) 358 return -EINVAL; 359 360 if (ppos) { 361 loff_t pos = *ppos; 362 363 if (unlikely(pos < 0)) { 364 if (!unsigned_offsets(file)) 365 return -EINVAL; 366 if (count >= -pos) /* both values are in 0..LLONG_MAX */ 367 return -EOVERFLOW; 368 } else if (unlikely((loff_t) (pos + count) < 0)) { 369 if (!unsigned_offsets(file)) 370 return -EINVAL; 371 } 372 } 373 374 return security_file_permission(file, 375 read_write == READ ? MAY_READ : MAY_WRITE); 376 } 377 EXPORT_SYMBOL(rw_verify_area); 378 379 static ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) 380 { 381 struct kiocb kiocb; 382 struct iov_iter iter; 383 ssize_t ret; 384 385 init_sync_kiocb(&kiocb, filp); 386 kiocb.ki_pos = (ppos ? *ppos : 0); 387 iov_iter_ubuf(&iter, READ, buf, len); 388 389 ret = call_read_iter(filp, &kiocb, &iter); 390 BUG_ON(ret == -EIOCBQUEUED); 391 if (ppos) 392 *ppos = kiocb.ki_pos; 393 return ret; 394 } 395 396 static int warn_unsupported(struct file *file, const char *op) 397 { 398 pr_warn_ratelimited( 399 "kernel %s not supported for file %pD4 (pid: %d comm: %.20s)\n", 400 op, file, current->pid, current->comm); 401 return -EINVAL; 402 } 403 404 ssize_t __kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) 405 { 406 struct kvec iov = { 407 .iov_base = buf, 408 .iov_len = min_t(size_t, count, MAX_RW_COUNT), 409 }; 410 struct kiocb kiocb; 411 struct iov_iter iter; 412 ssize_t ret; 413 414 if (WARN_ON_ONCE(!(file->f_mode & FMODE_READ))) 415 return -EINVAL; 416 if (!(file->f_mode & FMODE_CAN_READ)) 417 return -EINVAL; 418 /* 419 * Also fail if ->read_iter and ->read are both wired up as that 420 * implies very convoluted semantics. 421 */ 422 if (unlikely(!file->f_op->read_iter || file->f_op->read)) 423 return warn_unsupported(file, "read"); 424 425 init_sync_kiocb(&kiocb, file); 426 kiocb.ki_pos = pos ? *pos : 0; 427 iov_iter_kvec(&iter, READ, &iov, 1, iov.iov_len); 428 ret = file->f_op->read_iter(&kiocb, &iter); 429 if (ret > 0) { 430 if (pos) 431 *pos = kiocb.ki_pos; 432 fsnotify_access(file); 433 add_rchar(current, ret); 434 } 435 inc_syscr(current); 436 return ret; 437 } 438 439 ssize_t kernel_read(struct file *file, void *buf, size_t count, loff_t *pos) 440 { 441 ssize_t ret; 442 443 ret = rw_verify_area(READ, file, pos, count); 444 if (ret) 445 return ret; 446 return __kernel_read(file, buf, count, pos); 447 } 448 EXPORT_SYMBOL(kernel_read); 449 450 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos) 451 { 452 ssize_t ret; 453 454 if (!(file->f_mode & FMODE_READ)) 455 return -EBADF; 456 if (!(file->f_mode & FMODE_CAN_READ)) 457 return -EINVAL; 458 if (unlikely(!access_ok(buf, count))) 459 return -EFAULT; 460 461 ret = rw_verify_area(READ, file, pos, count); 462 if (ret) 463 return ret; 464 if (count > MAX_RW_COUNT) 465 count = MAX_RW_COUNT; 466 467 if (file->f_op->read) 468 ret = file->f_op->read(file, buf, count, pos); 469 else if (file->f_op->read_iter) 470 ret = new_sync_read(file, buf, count, pos); 471 else 472 ret = -EINVAL; 473 if (ret > 0) { 474 fsnotify_access(file); 475 add_rchar(current, ret); 476 } 477 inc_syscr(current); 478 return ret; 479 } 480 481 static ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) 482 { 483 struct kiocb kiocb; 484 struct iov_iter iter; 485 ssize_t ret; 486 487 init_sync_kiocb(&kiocb, filp); 488 kiocb.ki_pos = (ppos ? *ppos : 0); 489 iov_iter_ubuf(&iter, WRITE, (void __user *)buf, len); 490 491 ret = call_write_iter(filp, &kiocb, &iter); 492 BUG_ON(ret == -EIOCBQUEUED); 493 if (ret > 0 && ppos) 494 *ppos = kiocb.ki_pos; 495 return ret; 496 } 497 498 /* caller is responsible for file_start_write/file_end_write */ 499 ssize_t __kernel_write(struct file *file, const void *buf, size_t count, loff_t *pos) 500 { 501 struct kvec iov = { 502 .iov_base = (void *)buf, 503 .iov_len = min_t(size_t, count, MAX_RW_COUNT), 504 }; 505 struct kiocb kiocb; 506 struct iov_iter iter; 507 ssize_t ret; 508 509 if (WARN_ON_ONCE(!(file->f_mode & FMODE_WRITE))) 510 return -EBADF; 511 if (!(file->f_mode & FMODE_CAN_WRITE)) 512 return -EINVAL; 513 /* 514 * Also fail if ->write_iter and ->write are both wired up as that 515 * implies very convoluted semantics. 516 */ 517 if (unlikely(!file->f_op->write_iter || file->f_op->write)) 518 return warn_unsupported(file, "write"); 519 520 init_sync_kiocb(&kiocb, file); 521 kiocb.ki_pos = pos ? *pos : 0; 522 iov_iter_kvec(&iter, WRITE, &iov, 1, iov.iov_len); 523 ret = file->f_op->write_iter(&kiocb, &iter); 524 if (ret > 0) { 525 if (pos) 526 *pos = kiocb.ki_pos; 527 fsnotify_modify(file); 528 add_wchar(current, ret); 529 } 530 inc_syscw(current); 531 return ret; 532 } 533 /* 534 * This "EXPORT_SYMBOL_GPL()" is more of a "EXPORT_SYMBOL_DONTUSE()", 535 * but autofs is one of the few internal kernel users that actually 536 * wants this _and_ can be built as a module. So we need to export 537 * this symbol for autofs, even though it really isn't appropriate 538 * for any other kernel modules. 539 */ 540 EXPORT_SYMBOL_GPL(__kernel_write); 541 542 ssize_t kernel_write(struct file *file, const void *buf, size_t count, 543 loff_t *pos) 544 { 545 ssize_t ret; 546 547 ret = rw_verify_area(WRITE, file, pos, count); 548 if (ret) 549 return ret; 550 551 file_start_write(file); 552 ret = __kernel_write(file, buf, count, pos); 553 file_end_write(file); 554 return ret; 555 } 556 EXPORT_SYMBOL(kernel_write); 557 558 ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_t *pos) 559 { 560 ssize_t ret; 561 562 if (!(file->f_mode & FMODE_WRITE)) 563 return -EBADF; 564 if (!(file->f_mode & FMODE_CAN_WRITE)) 565 return -EINVAL; 566 if (unlikely(!access_ok(buf, count))) 567 return -EFAULT; 568 569 ret = rw_verify_area(WRITE, file, pos, count); 570 if (ret) 571 return ret; 572 if (count > MAX_RW_COUNT) 573 count = MAX_RW_COUNT; 574 file_start_write(file); 575 if (file->f_op->write) 576 ret = file->f_op->write(file, buf, count, pos); 577 else if (file->f_op->write_iter) 578 ret = new_sync_write(file, buf, count, pos); 579 else 580 ret = -EINVAL; 581 if (ret > 0) { 582 fsnotify_modify(file); 583 add_wchar(current, ret); 584 } 585 inc_syscw(current); 586 file_end_write(file); 587 return ret; 588 } 589 590 /* file_ppos returns &file->f_pos or NULL if file is stream */ 591 static inline loff_t *file_ppos(struct file *file) 592 { 593 return file->f_mode & FMODE_STREAM ? NULL : &file->f_pos; 594 } 595 596 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count) 597 { 598 struct fd f = fdget_pos(fd); 599 ssize_t ret = -EBADF; 600 601 if (f.file) { 602 loff_t pos, *ppos = file_ppos(f.file); 603 if (ppos) { 604 pos = *ppos; 605 ppos = &pos; 606 } 607 ret = vfs_read(f.file, buf, count, ppos); 608 if (ret >= 0 && ppos) 609 f.file->f_pos = pos; 610 fdput_pos(f); 611 } 612 return ret; 613 } 614 615 SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count) 616 { 617 return ksys_read(fd, buf, count); 618 } 619 620 ssize_t ksys_write(unsigned int fd, const char __user *buf, size_t count) 621 { 622 struct fd f = fdget_pos(fd); 623 ssize_t ret = -EBADF; 624 625 if (f.file) { 626 loff_t pos, *ppos = file_ppos(f.file); 627 if (ppos) { 628 pos = *ppos; 629 ppos = &pos; 630 } 631 ret = vfs_write(f.file, buf, count, ppos); 632 if (ret >= 0 && ppos) 633 f.file->f_pos = pos; 634 fdput_pos(f); 635 } 636 637 return ret; 638 } 639 640 SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf, 641 size_t, count) 642 { 643 return ksys_write(fd, buf, count); 644 } 645 646 ssize_t ksys_pread64(unsigned int fd, char __user *buf, size_t count, 647 loff_t pos) 648 { 649 struct fd f; 650 ssize_t ret = -EBADF; 651 652 if (pos < 0) 653 return -EINVAL; 654 655 f = fdget(fd); 656 if (f.file) { 657 ret = -ESPIPE; 658 if (f.file->f_mode & FMODE_PREAD) 659 ret = vfs_read(f.file, buf, count, &pos); 660 fdput(f); 661 } 662 663 return ret; 664 } 665 666 SYSCALL_DEFINE4(pread64, unsigned int, fd, char __user *, buf, 667 size_t, count, loff_t, pos) 668 { 669 return ksys_pread64(fd, buf, count, pos); 670 } 671 672 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PREAD64) 673 COMPAT_SYSCALL_DEFINE5(pread64, unsigned int, fd, char __user *, buf, 674 size_t, count, compat_arg_u64_dual(pos)) 675 { 676 return ksys_pread64(fd, buf, count, compat_arg_u64_glue(pos)); 677 } 678 #endif 679 680 ssize_t ksys_pwrite64(unsigned int fd, const char __user *buf, 681 size_t count, loff_t pos) 682 { 683 struct fd f; 684 ssize_t ret = -EBADF; 685 686 if (pos < 0) 687 return -EINVAL; 688 689 f = fdget(fd); 690 if (f.file) { 691 ret = -ESPIPE; 692 if (f.file->f_mode & FMODE_PWRITE) 693 ret = vfs_write(f.file, buf, count, &pos); 694 fdput(f); 695 } 696 697 return ret; 698 } 699 700 SYSCALL_DEFINE4(pwrite64, unsigned int, fd, const char __user *, buf, 701 size_t, count, loff_t, pos) 702 { 703 return ksys_pwrite64(fd, buf, count, pos); 704 } 705 706 #if defined(CONFIG_COMPAT) && defined(__ARCH_WANT_COMPAT_PWRITE64) 707 COMPAT_SYSCALL_DEFINE5(pwrite64, unsigned int, fd, const char __user *, buf, 708 size_t, count, compat_arg_u64_dual(pos)) 709 { 710 return ksys_pwrite64(fd, buf, count, compat_arg_u64_glue(pos)); 711 } 712 #endif 713 714 static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter, 715 loff_t *ppos, int type, rwf_t flags) 716 { 717 struct kiocb kiocb; 718 ssize_t ret; 719 720 init_sync_kiocb(&kiocb, filp); 721 ret = kiocb_set_rw_flags(&kiocb, flags); 722 if (ret) 723 return ret; 724 kiocb.ki_pos = (ppos ? *ppos : 0); 725 726 if (type == READ) 727 ret = call_read_iter(filp, &kiocb, iter); 728 else 729 ret = call_write_iter(filp, &kiocb, iter); 730 BUG_ON(ret == -EIOCBQUEUED); 731 if (ppos) 732 *ppos = kiocb.ki_pos; 733 return ret; 734 } 735 736 /* Do it by hand, with file-ops */ 737 static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter, 738 loff_t *ppos, int type, rwf_t flags) 739 { 740 ssize_t ret = 0; 741 742 if (flags & ~RWF_HIPRI) 743 return -EOPNOTSUPP; 744 745 while (iov_iter_count(iter)) { 746 struct iovec iovec = iov_iter_iovec(iter); 747 ssize_t nr; 748 749 if (type == READ) { 750 nr = filp->f_op->read(filp, iovec.iov_base, 751 iovec.iov_len, ppos); 752 } else { 753 nr = filp->f_op->write(filp, iovec.iov_base, 754 iovec.iov_len, ppos); 755 } 756 757 if (nr < 0) { 758 if (!ret) 759 ret = nr; 760 break; 761 } 762 ret += nr; 763 if (nr != iovec.iov_len) 764 break; 765 iov_iter_advance(iter, nr); 766 } 767 768 return ret; 769 } 770 771 static ssize_t do_iter_read(struct file *file, struct iov_iter *iter, 772 loff_t *pos, rwf_t flags) 773 { 774 size_t tot_len; 775 ssize_t ret = 0; 776 777 if (!(file->f_mode & FMODE_READ)) 778 return -EBADF; 779 if (!(file->f_mode & FMODE_CAN_READ)) 780 return -EINVAL; 781 782 tot_len = iov_iter_count(iter); 783 if (!tot_len) 784 goto out; 785 ret = rw_verify_area(READ, file, pos, tot_len); 786 if (ret < 0) 787 return ret; 788 789 if (file->f_op->read_iter) 790 ret = do_iter_readv_writev(file, iter, pos, READ, flags); 791 else 792 ret = do_loop_readv_writev(file, iter, pos, READ, flags); 793 out: 794 if (ret >= 0) 795 fsnotify_access(file); 796 return ret; 797 } 798 799 ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, 800 struct iov_iter *iter) 801 { 802 size_t tot_len; 803 ssize_t ret = 0; 804 805 if (!file->f_op->read_iter) 806 return -EINVAL; 807 if (!(file->f_mode & FMODE_READ)) 808 return -EBADF; 809 if (!(file->f_mode & FMODE_CAN_READ)) 810 return -EINVAL; 811 812 tot_len = iov_iter_count(iter); 813 if (!tot_len) 814 goto out; 815 ret = rw_verify_area(READ, file, &iocb->ki_pos, tot_len); 816 if (ret < 0) 817 return ret; 818 819 ret = call_read_iter(file, iocb, iter); 820 out: 821 if (ret >= 0) 822 fsnotify_access(file); 823 return ret; 824 } 825 EXPORT_SYMBOL(vfs_iocb_iter_read); 826 827 ssize_t vfs_iter_read(struct file *file, struct iov_iter *iter, loff_t *ppos, 828 rwf_t flags) 829 { 830 if (!file->f_op->read_iter) 831 return -EINVAL; 832 return do_iter_read(file, iter, ppos, flags); 833 } 834 EXPORT_SYMBOL(vfs_iter_read); 835 836 static ssize_t do_iter_write(struct file *file, struct iov_iter *iter, 837 loff_t *pos, rwf_t flags) 838 { 839 size_t tot_len; 840 ssize_t ret = 0; 841 842 if (!(file->f_mode & FMODE_WRITE)) 843 return -EBADF; 844 if (!(file->f_mode & FMODE_CAN_WRITE)) 845 return -EINVAL; 846 847 tot_len = iov_iter_count(iter); 848 if (!tot_len) 849 return 0; 850 ret = rw_verify_area(WRITE, file, pos, tot_len); 851 if (ret < 0) 852 return ret; 853 854 if (file->f_op->write_iter) 855 ret = do_iter_readv_writev(file, iter, pos, WRITE, flags); 856 else 857 ret = do_loop_readv_writev(file, iter, pos, WRITE, flags); 858 if (ret > 0) 859 fsnotify_modify(file); 860 return ret; 861 } 862 863 ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, 864 struct iov_iter *iter) 865 { 866 size_t tot_len; 867 ssize_t ret = 0; 868 869 if (!file->f_op->write_iter) 870 return -EINVAL; 871 if (!(file->f_mode & FMODE_WRITE)) 872 return -EBADF; 873 if (!(file->f_mode & FMODE_CAN_WRITE)) 874 return -EINVAL; 875 876 tot_len = iov_iter_count(iter); 877 if (!tot_len) 878 return 0; 879 ret = rw_verify_area(WRITE, file, &iocb->ki_pos, tot_len); 880 if (ret < 0) 881 return ret; 882 883 ret = call_write_iter(file, iocb, iter); 884 if (ret > 0) 885 fsnotify_modify(file); 886 887 return ret; 888 } 889 EXPORT_SYMBOL(vfs_iocb_iter_write); 890 891 ssize_t vfs_iter_write(struct file *file, struct iov_iter *iter, loff_t *ppos, 892 rwf_t flags) 893 { 894 if (!file->f_op->write_iter) 895 return -EINVAL; 896 return do_iter_write(file, iter, ppos, flags); 897 } 898 EXPORT_SYMBOL(vfs_iter_write); 899 900 static ssize_t vfs_readv(struct file *file, const struct iovec __user *vec, 901 unsigned long vlen, loff_t *pos, rwf_t flags) 902 { 903 struct iovec iovstack[UIO_FASTIOV]; 904 struct iovec *iov = iovstack; 905 struct iov_iter iter; 906 ssize_t ret; 907 908 ret = import_iovec(READ, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); 909 if (ret >= 0) { 910 ret = do_iter_read(file, &iter, pos, flags); 911 kfree(iov); 912 } 913 914 return ret; 915 } 916 917 static ssize_t vfs_writev(struct file *file, const struct iovec __user *vec, 918 unsigned long vlen, loff_t *pos, rwf_t flags) 919 { 920 struct iovec iovstack[UIO_FASTIOV]; 921 struct iovec *iov = iovstack; 922 struct iov_iter iter; 923 ssize_t ret; 924 925 ret = import_iovec(WRITE, vec, vlen, ARRAY_SIZE(iovstack), &iov, &iter); 926 if (ret >= 0) { 927 file_start_write(file); 928 ret = do_iter_write(file, &iter, pos, flags); 929 file_end_write(file); 930 kfree(iov); 931 } 932 return ret; 933 } 934 935 static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec, 936 unsigned long vlen, rwf_t flags) 937 { 938 struct fd f = fdget_pos(fd); 939 ssize_t ret = -EBADF; 940 941 if (f.file) { 942 loff_t pos, *ppos = file_ppos(f.file); 943 if (ppos) { 944 pos = *ppos; 945 ppos = &pos; 946 } 947 ret = vfs_readv(f.file, vec, vlen, ppos, flags); 948 if (ret >= 0 && ppos) 949 f.file->f_pos = pos; 950 fdput_pos(f); 951 } 952 953 if (ret > 0) 954 add_rchar(current, ret); 955 inc_syscr(current); 956 return ret; 957 } 958 959 static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec, 960 unsigned long vlen, rwf_t flags) 961 { 962 struct fd f = fdget_pos(fd); 963 ssize_t ret = -EBADF; 964 965 if (f.file) { 966 loff_t pos, *ppos = file_ppos(f.file); 967 if (ppos) { 968 pos = *ppos; 969 ppos = &pos; 970 } 971 ret = vfs_writev(f.file, vec, vlen, ppos, flags); 972 if (ret >= 0 && ppos) 973 f.file->f_pos = pos; 974 fdput_pos(f); 975 } 976 977 if (ret > 0) 978 add_wchar(current, ret); 979 inc_syscw(current); 980 return ret; 981 } 982 983 static inline loff_t pos_from_hilo(unsigned long high, unsigned long low) 984 { 985 #define HALF_LONG_BITS (BITS_PER_LONG / 2) 986 return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low; 987 } 988 989 static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec, 990 unsigned long vlen, loff_t pos, rwf_t flags) 991 { 992 struct fd f; 993 ssize_t ret = -EBADF; 994 995 if (pos < 0) 996 return -EINVAL; 997 998 f = fdget(fd); 999 if (f.file) { 1000 ret = -ESPIPE; 1001 if (f.file->f_mode & FMODE_PREAD) 1002 ret = vfs_readv(f.file, vec, vlen, &pos, flags); 1003 fdput(f); 1004 } 1005 1006 if (ret > 0) 1007 add_rchar(current, ret); 1008 inc_syscr(current); 1009 return ret; 1010 } 1011 1012 static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec, 1013 unsigned long vlen, loff_t pos, rwf_t flags) 1014 { 1015 struct fd f; 1016 ssize_t ret = -EBADF; 1017 1018 if (pos < 0) 1019 return -EINVAL; 1020 1021 f = fdget(fd); 1022 if (f.file) { 1023 ret = -ESPIPE; 1024 if (f.file->f_mode & FMODE_PWRITE) 1025 ret = vfs_writev(f.file, vec, vlen, &pos, flags); 1026 fdput(f); 1027 } 1028 1029 if (ret > 0) 1030 add_wchar(current, ret); 1031 inc_syscw(current); 1032 return ret; 1033 } 1034 1035 SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec, 1036 unsigned long, vlen) 1037 { 1038 return do_readv(fd, vec, vlen, 0); 1039 } 1040 1041 SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec, 1042 unsigned long, vlen) 1043 { 1044 return do_writev(fd, vec, vlen, 0); 1045 } 1046 1047 SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec, 1048 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 1049 { 1050 loff_t pos = pos_from_hilo(pos_h, pos_l); 1051 1052 return do_preadv(fd, vec, vlen, pos, 0); 1053 } 1054 1055 SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec, 1056 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, 1057 rwf_t, flags) 1058 { 1059 loff_t pos = pos_from_hilo(pos_h, pos_l); 1060 1061 if (pos == -1) 1062 return do_readv(fd, vec, vlen, flags); 1063 1064 return do_preadv(fd, vec, vlen, pos, flags); 1065 } 1066 1067 SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec, 1068 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h) 1069 { 1070 loff_t pos = pos_from_hilo(pos_h, pos_l); 1071 1072 return do_pwritev(fd, vec, vlen, pos, 0); 1073 } 1074 1075 SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec, 1076 unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h, 1077 rwf_t, flags) 1078 { 1079 loff_t pos = pos_from_hilo(pos_h, pos_l); 1080 1081 if (pos == -1) 1082 return do_writev(fd, vec, vlen, flags); 1083 1084 return do_pwritev(fd, vec, vlen, pos, flags); 1085 } 1086 1087 /* 1088 * Various compat syscalls. Note that they all pretend to take a native 1089 * iovec - import_iovec will properly treat those as compat_iovecs based on 1090 * in_compat_syscall(). 1091 */ 1092 #ifdef CONFIG_COMPAT 1093 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64 1094 COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd, 1095 const struct iovec __user *, vec, 1096 unsigned long, vlen, loff_t, pos) 1097 { 1098 return do_preadv(fd, vec, vlen, pos, 0); 1099 } 1100 #endif 1101 1102 COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd, 1103 const struct iovec __user *, vec, 1104 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1105 { 1106 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1107 1108 return do_preadv(fd, vec, vlen, pos, 0); 1109 } 1110 1111 #ifdef __ARCH_WANT_COMPAT_SYS_PREADV64V2 1112 COMPAT_SYSCALL_DEFINE5(preadv64v2, unsigned long, fd, 1113 const struct iovec __user *, vec, 1114 unsigned long, vlen, loff_t, pos, rwf_t, flags) 1115 { 1116 if (pos == -1) 1117 return do_readv(fd, vec, vlen, flags); 1118 return do_preadv(fd, vec, vlen, pos, flags); 1119 } 1120 #endif 1121 1122 COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd, 1123 const struct iovec __user *, vec, 1124 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, 1125 rwf_t, flags) 1126 { 1127 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1128 1129 if (pos == -1) 1130 return do_readv(fd, vec, vlen, flags); 1131 return do_preadv(fd, vec, vlen, pos, flags); 1132 } 1133 1134 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64 1135 COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd, 1136 const struct iovec __user *, vec, 1137 unsigned long, vlen, loff_t, pos) 1138 { 1139 return do_pwritev(fd, vec, vlen, pos, 0); 1140 } 1141 #endif 1142 1143 COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd, 1144 const struct iovec __user *,vec, 1145 compat_ulong_t, vlen, u32, pos_low, u32, pos_high) 1146 { 1147 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1148 1149 return do_pwritev(fd, vec, vlen, pos, 0); 1150 } 1151 1152 #ifdef __ARCH_WANT_COMPAT_SYS_PWRITEV64V2 1153 COMPAT_SYSCALL_DEFINE5(pwritev64v2, unsigned long, fd, 1154 const struct iovec __user *, vec, 1155 unsigned long, vlen, loff_t, pos, rwf_t, flags) 1156 { 1157 if (pos == -1) 1158 return do_writev(fd, vec, vlen, flags); 1159 return do_pwritev(fd, vec, vlen, pos, flags); 1160 } 1161 #endif 1162 1163 COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd, 1164 const struct iovec __user *,vec, 1165 compat_ulong_t, vlen, u32, pos_low, u32, pos_high, rwf_t, flags) 1166 { 1167 loff_t pos = ((loff_t)pos_high << 32) | pos_low; 1168 1169 if (pos == -1) 1170 return do_writev(fd, vec, vlen, flags); 1171 return do_pwritev(fd, vec, vlen, pos, flags); 1172 } 1173 #endif /* CONFIG_COMPAT */ 1174 1175 static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos, 1176 size_t count, loff_t max) 1177 { 1178 struct fd in, out; 1179 struct inode *in_inode, *out_inode; 1180 struct pipe_inode_info *opipe; 1181 loff_t pos; 1182 loff_t out_pos; 1183 ssize_t retval; 1184 int fl; 1185 1186 /* 1187 * Get input file, and verify that it is ok.. 1188 */ 1189 retval = -EBADF; 1190 in = fdget(in_fd); 1191 if (!in.file) 1192 goto out; 1193 if (!(in.file->f_mode & FMODE_READ)) 1194 goto fput_in; 1195 retval = -ESPIPE; 1196 if (!ppos) { 1197 pos = in.file->f_pos; 1198 } else { 1199 pos = *ppos; 1200 if (!(in.file->f_mode & FMODE_PREAD)) 1201 goto fput_in; 1202 } 1203 retval = rw_verify_area(READ, in.file, &pos, count); 1204 if (retval < 0) 1205 goto fput_in; 1206 if (count > MAX_RW_COUNT) 1207 count = MAX_RW_COUNT; 1208 1209 /* 1210 * Get output file, and verify that it is ok.. 1211 */ 1212 retval = -EBADF; 1213 out = fdget(out_fd); 1214 if (!out.file) 1215 goto fput_in; 1216 if (!(out.file->f_mode & FMODE_WRITE)) 1217 goto fput_out; 1218 in_inode = file_inode(in.file); 1219 out_inode = file_inode(out.file); 1220 out_pos = out.file->f_pos; 1221 1222 if (!max) 1223 max = min(in_inode->i_sb->s_maxbytes, out_inode->i_sb->s_maxbytes); 1224 1225 if (unlikely(pos + count > max)) { 1226 retval = -EOVERFLOW; 1227 if (pos >= max) 1228 goto fput_out; 1229 count = max - pos; 1230 } 1231 1232 fl = 0; 1233 #if 0 1234 /* 1235 * We need to debate whether we can enable this or not. The 1236 * man page documents EAGAIN return for the output at least, 1237 * and the application is arguably buggy if it doesn't expect 1238 * EAGAIN on a non-blocking file descriptor. 1239 */ 1240 if (in.file->f_flags & O_NONBLOCK) 1241 fl = SPLICE_F_NONBLOCK; 1242 #endif 1243 opipe = get_pipe_info(out.file, true); 1244 if (!opipe) { 1245 retval = rw_verify_area(WRITE, out.file, &out_pos, count); 1246 if (retval < 0) 1247 goto fput_out; 1248 file_start_write(out.file); 1249 retval = do_splice_direct(in.file, &pos, out.file, &out_pos, 1250 count, fl); 1251 file_end_write(out.file); 1252 } else { 1253 if (out.file->f_flags & O_NONBLOCK) 1254 fl |= SPLICE_F_NONBLOCK; 1255 1256 retval = splice_file_to_pipe(in.file, opipe, &pos, count, fl); 1257 } 1258 1259 if (retval > 0) { 1260 add_rchar(current, retval); 1261 add_wchar(current, retval); 1262 fsnotify_access(in.file); 1263 fsnotify_modify(out.file); 1264 out.file->f_pos = out_pos; 1265 if (ppos) 1266 *ppos = pos; 1267 else 1268 in.file->f_pos = pos; 1269 } 1270 1271 inc_syscr(current); 1272 inc_syscw(current); 1273 if (pos > max) 1274 retval = -EOVERFLOW; 1275 1276 fput_out: 1277 fdput(out); 1278 fput_in: 1279 fdput(in); 1280 out: 1281 return retval; 1282 } 1283 1284 SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, off_t __user *, offset, size_t, count) 1285 { 1286 loff_t pos; 1287 off_t off; 1288 ssize_t ret; 1289 1290 if (offset) { 1291 if (unlikely(get_user(off, offset))) 1292 return -EFAULT; 1293 pos = off; 1294 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1295 if (unlikely(put_user(pos, offset))) 1296 return -EFAULT; 1297 return ret; 1298 } 1299 1300 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1301 } 1302 1303 SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, loff_t __user *, offset, size_t, count) 1304 { 1305 loff_t pos; 1306 ssize_t ret; 1307 1308 if (offset) { 1309 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1310 return -EFAULT; 1311 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1312 if (unlikely(put_user(pos, offset))) 1313 return -EFAULT; 1314 return ret; 1315 } 1316 1317 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1318 } 1319 1320 #ifdef CONFIG_COMPAT 1321 COMPAT_SYSCALL_DEFINE4(sendfile, int, out_fd, int, in_fd, 1322 compat_off_t __user *, offset, compat_size_t, count) 1323 { 1324 loff_t pos; 1325 off_t off; 1326 ssize_t ret; 1327 1328 if (offset) { 1329 if (unlikely(get_user(off, offset))) 1330 return -EFAULT; 1331 pos = off; 1332 ret = do_sendfile(out_fd, in_fd, &pos, count, MAX_NON_LFS); 1333 if (unlikely(put_user(pos, offset))) 1334 return -EFAULT; 1335 return ret; 1336 } 1337 1338 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1339 } 1340 1341 COMPAT_SYSCALL_DEFINE4(sendfile64, int, out_fd, int, in_fd, 1342 compat_loff_t __user *, offset, compat_size_t, count) 1343 { 1344 loff_t pos; 1345 ssize_t ret; 1346 1347 if (offset) { 1348 if (unlikely(copy_from_user(&pos, offset, sizeof(loff_t)))) 1349 return -EFAULT; 1350 ret = do_sendfile(out_fd, in_fd, &pos, count, 0); 1351 if (unlikely(put_user(pos, offset))) 1352 return -EFAULT; 1353 return ret; 1354 } 1355 1356 return do_sendfile(out_fd, in_fd, NULL, count, 0); 1357 } 1358 #endif 1359 1360 /** 1361 * generic_copy_file_range - copy data between two files 1362 * @file_in: file structure to read from 1363 * @pos_in: file offset to read from 1364 * @file_out: file structure to write data to 1365 * @pos_out: file offset to write data to 1366 * @len: amount of data to copy 1367 * @flags: copy flags 1368 * 1369 * This is a generic filesystem helper to copy data from one file to another. 1370 * It has no constraints on the source or destination file owners - the files 1371 * can belong to different superblocks and different filesystem types. Short 1372 * copies are allowed. 1373 * 1374 * This should be called from the @file_out filesystem, as per the 1375 * ->copy_file_range() method. 1376 * 1377 * Returns the number of bytes copied or a negative error indicating the 1378 * failure. 1379 */ 1380 1381 ssize_t generic_copy_file_range(struct file *file_in, loff_t pos_in, 1382 struct file *file_out, loff_t pos_out, 1383 size_t len, unsigned int flags) 1384 { 1385 return do_splice_direct(file_in, &pos_in, file_out, &pos_out, 1386 len > MAX_RW_COUNT ? MAX_RW_COUNT : len, 0); 1387 } 1388 EXPORT_SYMBOL(generic_copy_file_range); 1389 1390 /* 1391 * Performs necessary checks before doing a file copy 1392 * 1393 * Can adjust amount of bytes to copy via @req_count argument. 1394 * Returns appropriate error code that caller should return or 1395 * zero in case the copy should be allowed. 1396 */ 1397 static int generic_copy_file_checks(struct file *file_in, loff_t pos_in, 1398 struct file *file_out, loff_t pos_out, 1399 size_t *req_count, unsigned int flags) 1400 { 1401 struct inode *inode_in = file_inode(file_in); 1402 struct inode *inode_out = file_inode(file_out); 1403 uint64_t count = *req_count; 1404 loff_t size_in; 1405 int ret; 1406 1407 ret = generic_file_rw_checks(file_in, file_out); 1408 if (ret) 1409 return ret; 1410 1411 /* 1412 * We allow some filesystems to handle cross sb copy, but passing 1413 * a file of the wrong filesystem type to filesystem driver can result 1414 * in an attempt to dereference the wrong type of ->private_data, so 1415 * avoid doing that until we really have a good reason. 1416 * 1417 * nfs and cifs define several different file_system_type structures 1418 * and several different sets of file_operations, but they all end up 1419 * using the same ->copy_file_range() function pointer. 1420 */ 1421 if (file_out->f_op->copy_file_range) { 1422 if (file_in->f_op->copy_file_range != 1423 file_out->f_op->copy_file_range) 1424 return -EXDEV; 1425 } else if (file_inode(file_in)->i_sb != file_inode(file_out)->i_sb) { 1426 return -EXDEV; 1427 } 1428 1429 /* Don't touch certain kinds of inodes */ 1430 if (IS_IMMUTABLE(inode_out)) 1431 return -EPERM; 1432 1433 if (IS_SWAPFILE(inode_in) || IS_SWAPFILE(inode_out)) 1434 return -ETXTBSY; 1435 1436 /* Ensure offsets don't wrap. */ 1437 if (pos_in + count < pos_in || pos_out + count < pos_out) 1438 return -EOVERFLOW; 1439 1440 /* Shorten the copy to EOF */ 1441 size_in = i_size_read(inode_in); 1442 if (pos_in >= size_in) 1443 count = 0; 1444 else 1445 count = min(count, size_in - (uint64_t)pos_in); 1446 1447 ret = generic_write_check_limits(file_out, pos_out, &count); 1448 if (ret) 1449 return ret; 1450 1451 /* Don't allow overlapped copying within the same file. */ 1452 if (inode_in == inode_out && 1453 pos_out + count > pos_in && 1454 pos_out < pos_in + count) 1455 return -EINVAL; 1456 1457 *req_count = count; 1458 return 0; 1459 } 1460 1461 /* 1462 * copy_file_range() differs from regular file read and write in that it 1463 * specifically allows return partial success. When it does so is up to 1464 * the copy_file_range method. 1465 */ 1466 ssize_t vfs_copy_file_range(struct file *file_in, loff_t pos_in, 1467 struct file *file_out, loff_t pos_out, 1468 size_t len, unsigned int flags) 1469 { 1470 ssize_t ret; 1471 1472 if (flags != 0) 1473 return -EINVAL; 1474 1475 ret = generic_copy_file_checks(file_in, pos_in, file_out, pos_out, &len, 1476 flags); 1477 if (unlikely(ret)) 1478 return ret; 1479 1480 ret = rw_verify_area(READ, file_in, &pos_in, len); 1481 if (unlikely(ret)) 1482 return ret; 1483 1484 ret = rw_verify_area(WRITE, file_out, &pos_out, len); 1485 if (unlikely(ret)) 1486 return ret; 1487 1488 if (len == 0) 1489 return 0; 1490 1491 file_start_write(file_out); 1492 1493 /* 1494 * Cloning is supported by more file systems, so we implement copy on 1495 * same sb using clone, but for filesystems where both clone and copy 1496 * are supported (e.g. nfs,cifs), we only call the copy method. 1497 */ 1498 if (file_out->f_op->copy_file_range) { 1499 ret = file_out->f_op->copy_file_range(file_in, pos_in, 1500 file_out, pos_out, 1501 len, flags); 1502 goto done; 1503 } 1504 1505 if (file_in->f_op->remap_file_range && 1506 file_inode(file_in)->i_sb == file_inode(file_out)->i_sb) { 1507 ret = file_in->f_op->remap_file_range(file_in, pos_in, 1508 file_out, pos_out, 1509 min_t(loff_t, MAX_RW_COUNT, len), 1510 REMAP_FILE_CAN_SHORTEN); 1511 if (ret > 0) 1512 goto done; 1513 } 1514 1515 /* 1516 * We can get here for same sb copy of filesystems that do not implement 1517 * ->copy_file_range() in case filesystem does not support clone or in 1518 * case filesystem supports clone but rejected the clone request (e.g. 1519 * because it was not block aligned). 1520 * 1521 * In both cases, fall back to kernel copy so we are able to maintain a 1522 * consistent story about which filesystems support copy_file_range() 1523 * and which filesystems do not, that will allow userspace tools to 1524 * make consistent desicions w.r.t using copy_file_range(). 1525 */ 1526 ret = generic_copy_file_range(file_in, pos_in, file_out, pos_out, len, 1527 flags); 1528 1529 done: 1530 if (ret > 0) { 1531 fsnotify_access(file_in); 1532 add_rchar(current, ret); 1533 fsnotify_modify(file_out); 1534 add_wchar(current, ret); 1535 } 1536 1537 inc_syscr(current); 1538 inc_syscw(current); 1539 1540 file_end_write(file_out); 1541 1542 return ret; 1543 } 1544 EXPORT_SYMBOL(vfs_copy_file_range); 1545 1546 SYSCALL_DEFINE6(copy_file_range, int, fd_in, loff_t __user *, off_in, 1547 int, fd_out, loff_t __user *, off_out, 1548 size_t, len, unsigned int, flags) 1549 { 1550 loff_t pos_in; 1551 loff_t pos_out; 1552 struct fd f_in; 1553 struct fd f_out; 1554 ssize_t ret = -EBADF; 1555 1556 f_in = fdget(fd_in); 1557 if (!f_in.file) 1558 goto out2; 1559 1560 f_out = fdget(fd_out); 1561 if (!f_out.file) 1562 goto out1; 1563 1564 ret = -EFAULT; 1565 if (off_in) { 1566 if (copy_from_user(&pos_in, off_in, sizeof(loff_t))) 1567 goto out; 1568 } else { 1569 pos_in = f_in.file->f_pos; 1570 } 1571 1572 if (off_out) { 1573 if (copy_from_user(&pos_out, off_out, sizeof(loff_t))) 1574 goto out; 1575 } else { 1576 pos_out = f_out.file->f_pos; 1577 } 1578 1579 ret = vfs_copy_file_range(f_in.file, pos_in, f_out.file, pos_out, len, 1580 flags); 1581 if (ret > 0) { 1582 pos_in += ret; 1583 pos_out += ret; 1584 1585 if (off_in) { 1586 if (copy_to_user(off_in, &pos_in, sizeof(loff_t))) 1587 ret = -EFAULT; 1588 } else { 1589 f_in.file->f_pos = pos_in; 1590 } 1591 1592 if (off_out) { 1593 if (copy_to_user(off_out, &pos_out, sizeof(loff_t))) 1594 ret = -EFAULT; 1595 } else { 1596 f_out.file->f_pos = pos_out; 1597 } 1598 } 1599 1600 out: 1601 fdput(f_out); 1602 out1: 1603 fdput(f_in); 1604 out2: 1605 return ret; 1606 } 1607 1608 /* 1609 * Don't operate on ranges the page cache doesn't support, and don't exceed the 1610 * LFS limits. If pos is under the limit it becomes a short access. If it 1611 * exceeds the limit we return -EFBIG. 1612 */ 1613 int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count) 1614 { 1615 struct inode *inode = file->f_mapping->host; 1616 loff_t max_size = inode->i_sb->s_maxbytes; 1617 loff_t limit = rlimit(RLIMIT_FSIZE); 1618 1619 if (limit != RLIM_INFINITY) { 1620 if (pos >= limit) { 1621 send_sig(SIGXFSZ, current, 0); 1622 return -EFBIG; 1623 } 1624 *count = min(*count, limit - pos); 1625 } 1626 1627 if (!(file->f_flags & O_LARGEFILE)) 1628 max_size = MAX_NON_LFS; 1629 1630 if (unlikely(pos >= max_size)) 1631 return -EFBIG; 1632 1633 *count = min(*count, max_size - pos); 1634 1635 return 0; 1636 } 1637 1638 /* Like generic_write_checks(), but takes size of write instead of iter. */ 1639 int generic_write_checks_count(struct kiocb *iocb, loff_t *count) 1640 { 1641 struct file *file = iocb->ki_filp; 1642 struct inode *inode = file->f_mapping->host; 1643 1644 if (IS_SWAPFILE(inode)) 1645 return -ETXTBSY; 1646 1647 if (!*count) 1648 return 0; 1649 1650 if (iocb->ki_flags & IOCB_APPEND) 1651 iocb->ki_pos = i_size_read(inode); 1652 1653 if ((iocb->ki_flags & IOCB_NOWAIT) && 1654 !((iocb->ki_flags & IOCB_DIRECT) || 1655 (file->f_mode & FMODE_BUF_WASYNC))) 1656 return -EINVAL; 1657 1658 return generic_write_check_limits(iocb->ki_filp, iocb->ki_pos, count); 1659 } 1660 EXPORT_SYMBOL(generic_write_checks_count); 1661 1662 /* 1663 * Performs necessary checks before doing a write 1664 * 1665 * Can adjust writing position or amount of bytes to write. 1666 * Returns appropriate error code that caller should return or 1667 * zero in case that write should be allowed. 1668 */ 1669 ssize_t generic_write_checks(struct kiocb *iocb, struct iov_iter *from) 1670 { 1671 loff_t count = iov_iter_count(from); 1672 int ret; 1673 1674 ret = generic_write_checks_count(iocb, &count); 1675 if (ret) 1676 return ret; 1677 1678 iov_iter_truncate(from, count); 1679 return iov_iter_count(from); 1680 } 1681 EXPORT_SYMBOL(generic_write_checks); 1682 1683 /* 1684 * Performs common checks before doing a file copy/clone 1685 * from @file_in to @file_out. 1686 */ 1687 int generic_file_rw_checks(struct file *file_in, struct file *file_out) 1688 { 1689 struct inode *inode_in = file_inode(file_in); 1690 struct inode *inode_out = file_inode(file_out); 1691 1692 /* Don't copy dirs, pipes, sockets... */ 1693 if (S_ISDIR(inode_in->i_mode) || S_ISDIR(inode_out->i_mode)) 1694 return -EISDIR; 1695 if (!S_ISREG(inode_in->i_mode) || !S_ISREG(inode_out->i_mode)) 1696 return -EINVAL; 1697 1698 if (!(file_in->f_mode & FMODE_READ) || 1699 !(file_out->f_mode & FMODE_WRITE) || 1700 (file_out->f_flags & O_APPEND)) 1701 return -EBADF; 1702 1703 return 0; 1704 } 1705