1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * "splice": joining two ropes together by interweaving their strands. 4 * 5 * This is the "extended pipe" functionality, where a pipe is used as 6 * an arbitrary in-memory buffer. Think of a pipe as a small kernel 7 * buffer that you can use to transfer data from one end to the other. 8 * 9 * The traditional unix read/write is extended with a "splice()" operation 10 * that transfers data buffers to or from a pipe buffer. 11 * 12 * Named by Larry McVoy, original implementation from Linus, extended by 13 * Jens to support splicing to files, network, direct splicing, etc and 14 * fixing lots of bugs. 15 * 16 * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk> 17 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org> 18 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu> 19 * 20 */ 21 #include <linux/bvec.h> 22 #include <linux/fs.h> 23 #include <linux/file.h> 24 #include <linux/pagemap.h> 25 #include <linux/splice.h> 26 #include <linux/memcontrol.h> 27 #include <linux/mm_inline.h> 28 #include <linux/swap.h> 29 #include <linux/writeback.h> 30 #include <linux/export.h> 31 #include <linux/syscalls.h> 32 #include <linux/uio.h> 33 #include <linux/security.h> 34 #include <linux/gfp.h> 35 #include <linux/socket.h> 36 #include <linux/sched/signal.h> 37 38 #include "internal.h" 39 40 /* 41 * Attempt to steal a page from a pipe buffer. This should perhaps go into 42 * a vm helper function, it's already simplified quite a bit by the 43 * addition of remove_mapping(). If success is returned, the caller may 44 * attempt to reuse this page for another destination. 45 */ 46 static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe, 47 struct pipe_buffer *buf) 48 { 49 struct page *page = buf->page; 50 struct address_space *mapping; 51 52 lock_page(page); 53 54 mapping = page_mapping(page); 55 if (mapping) { 56 WARN_ON(!PageUptodate(page)); 57 58 /* 59 * At least for ext2 with nobh option, we need to wait on 60 * writeback completing on this page, since we'll remove it 61 * from the pagecache. Otherwise truncate wont wait on the 62 * page, allowing the disk blocks to be reused by someone else 63 * before we actually wrote our data to them. fs corruption 64 * ensues. 65 */ 66 wait_on_page_writeback(page); 67 68 if (page_has_private(page) && 69 !try_to_release_page(page, GFP_KERNEL)) 70 goto out_unlock; 71 72 /* 73 * If we succeeded in removing the mapping, set LRU flag 74 * and return good. 75 */ 76 if (remove_mapping(mapping, page)) { 77 buf->flags |= PIPE_BUF_FLAG_LRU; 78 return true; 79 } 80 } 81 82 /* 83 * Raced with truncate or failed to remove page from current 84 * address space, unlock and return failure. 85 */ 86 out_unlock: 87 unlock_page(page); 88 return false; 89 } 90 91 static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, 92 struct pipe_buffer *buf) 93 { 94 put_page(buf->page); 95 buf->flags &= ~PIPE_BUF_FLAG_LRU; 96 } 97 98 /* 99 * Check whether the contents of buf is OK to access. Since the content 100 * is a page cache page, IO may be in flight. 101 */ 102 static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, 103 struct pipe_buffer *buf) 104 { 105 struct page *page = buf->page; 106 int err; 107 108 if (!PageUptodate(page)) { 109 lock_page(page); 110 111 /* 112 * Page got truncated/unhashed. This will cause a 0-byte 113 * splice, if this is the first page. 114 */ 115 if (!page->mapping) { 116 err = -ENODATA; 117 goto error; 118 } 119 120 /* 121 * Uh oh, read-error from disk. 122 */ 123 if (!PageUptodate(page)) { 124 err = -EIO; 125 goto error; 126 } 127 128 /* 129 * Page is ok afterall, we are done. 130 */ 131 unlock_page(page); 132 } 133 134 return 0; 135 error: 136 unlock_page(page); 137 return err; 138 } 139 140 const struct pipe_buf_operations page_cache_pipe_buf_ops = { 141 .confirm = page_cache_pipe_buf_confirm, 142 .release = page_cache_pipe_buf_release, 143 .try_steal = page_cache_pipe_buf_try_steal, 144 .get = generic_pipe_buf_get, 145 }; 146 147 static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe, 148 struct pipe_buffer *buf) 149 { 150 if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) 151 return false; 152 153 buf->flags |= PIPE_BUF_FLAG_LRU; 154 return generic_pipe_buf_try_steal(pipe, buf); 155 } 156 157 static const struct pipe_buf_operations user_page_pipe_buf_ops = { 158 .release = page_cache_pipe_buf_release, 159 .try_steal = user_page_pipe_buf_try_steal, 160 .get = generic_pipe_buf_get, 161 }; 162 163 static void wakeup_pipe_readers(struct pipe_inode_info *pipe) 164 { 165 smp_mb(); 166 if (waitqueue_active(&pipe->rd_wait)) 167 wake_up_interruptible(&pipe->rd_wait); 168 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 169 } 170 171 /** 172 * splice_to_pipe - fill passed data into a pipe 173 * @pipe: pipe to fill 174 * @spd: data to fill 175 * 176 * Description: 177 * @spd contains a map of pages and len/offset tuples, along with 178 * the struct pipe_buf_operations associated with these pages. This 179 * function will link that data to the pipe. 180 * 181 */ 182 ssize_t splice_to_pipe(struct pipe_inode_info *pipe, 183 struct splice_pipe_desc *spd) 184 { 185 unsigned int spd_pages = spd->nr_pages; 186 unsigned int tail = pipe->tail; 187 unsigned int head = pipe->head; 188 unsigned int mask = pipe->ring_size - 1; 189 int ret = 0, page_nr = 0; 190 191 if (!spd_pages) 192 return 0; 193 194 if (unlikely(!pipe->readers)) { 195 send_sig(SIGPIPE, current, 0); 196 ret = -EPIPE; 197 goto out; 198 } 199 200 while (!pipe_full(head, tail, pipe->max_usage)) { 201 struct pipe_buffer *buf = &pipe->bufs[head & mask]; 202 203 buf->page = spd->pages[page_nr]; 204 buf->offset = spd->partial[page_nr].offset; 205 buf->len = spd->partial[page_nr].len; 206 buf->private = spd->partial[page_nr].private; 207 buf->ops = spd->ops; 208 buf->flags = 0; 209 210 head++; 211 pipe->head = head; 212 page_nr++; 213 ret += buf->len; 214 215 if (!--spd->nr_pages) 216 break; 217 } 218 219 if (!ret) 220 ret = -EAGAIN; 221 222 out: 223 while (page_nr < spd_pages) 224 spd->spd_release(spd, page_nr++); 225 226 return ret; 227 } 228 EXPORT_SYMBOL_GPL(splice_to_pipe); 229 230 ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) 231 { 232 unsigned int head = pipe->head; 233 unsigned int tail = pipe->tail; 234 unsigned int mask = pipe->ring_size - 1; 235 int ret; 236 237 if (unlikely(!pipe->readers)) { 238 send_sig(SIGPIPE, current, 0); 239 ret = -EPIPE; 240 } else if (pipe_full(head, tail, pipe->max_usage)) { 241 ret = -EAGAIN; 242 } else { 243 pipe->bufs[head & mask] = *buf; 244 pipe->head = head + 1; 245 return buf->len; 246 } 247 pipe_buf_release(pipe, buf); 248 return ret; 249 } 250 EXPORT_SYMBOL(add_to_pipe); 251 252 /* 253 * Check if we need to grow the arrays holding pages and partial page 254 * descriptions. 255 */ 256 int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) 257 { 258 unsigned int max_usage = READ_ONCE(pipe->max_usage); 259 260 spd->nr_pages_max = max_usage; 261 if (max_usage <= PIPE_DEF_BUFFERS) 262 return 0; 263 264 spd->pages = kmalloc_array(max_usage, sizeof(struct page *), GFP_KERNEL); 265 spd->partial = kmalloc_array(max_usage, sizeof(struct partial_page), 266 GFP_KERNEL); 267 268 if (spd->pages && spd->partial) 269 return 0; 270 271 kfree(spd->pages); 272 kfree(spd->partial); 273 return -ENOMEM; 274 } 275 276 void splice_shrink_spd(struct splice_pipe_desc *spd) 277 { 278 if (spd->nr_pages_max <= PIPE_DEF_BUFFERS) 279 return; 280 281 kfree(spd->pages); 282 kfree(spd->partial); 283 } 284 285 /** 286 * generic_file_splice_read - splice data from file to a pipe 287 * @in: file to splice from 288 * @ppos: position in @in 289 * @pipe: pipe to splice to 290 * @len: number of bytes to splice 291 * @flags: splice modifier flags 292 * 293 * Description: 294 * Will read pages from given file and fill them into a pipe. Can be 295 * used as long as it has more or less sane ->read_iter(). 296 * 297 */ 298 ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, 299 struct pipe_inode_info *pipe, size_t len, 300 unsigned int flags) 301 { 302 struct iov_iter to; 303 struct kiocb kiocb; 304 unsigned int i_head; 305 int ret; 306 307 iov_iter_pipe(&to, READ, pipe, len); 308 i_head = to.head; 309 init_sync_kiocb(&kiocb, in); 310 kiocb.ki_pos = *ppos; 311 ret = call_read_iter(in, &kiocb, &to); 312 if (ret > 0) { 313 *ppos = kiocb.ki_pos; 314 file_accessed(in); 315 } else if (ret < 0) { 316 to.head = i_head; 317 to.iov_offset = 0; 318 iov_iter_advance(&to, 0); /* to free what was emitted */ 319 /* 320 * callers of ->splice_read() expect -EAGAIN on 321 * "can't put anything in there", rather than -EFAULT. 322 */ 323 if (ret == -EFAULT) 324 ret = -EAGAIN; 325 } 326 327 return ret; 328 } 329 EXPORT_SYMBOL(generic_file_splice_read); 330 331 const struct pipe_buf_operations default_pipe_buf_ops = { 332 .release = generic_pipe_buf_release, 333 .try_steal = generic_pipe_buf_try_steal, 334 .get = generic_pipe_buf_get, 335 }; 336 337 /* Pipe buffer operations for a socket and similar. */ 338 const struct pipe_buf_operations nosteal_pipe_buf_ops = { 339 .release = generic_pipe_buf_release, 340 .get = generic_pipe_buf_get, 341 }; 342 EXPORT_SYMBOL(nosteal_pipe_buf_ops); 343 344 /* 345 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 346 * using sendpage(). Return the number of bytes sent. 347 */ 348 static int pipe_to_sendpage(struct pipe_inode_info *pipe, 349 struct pipe_buffer *buf, struct splice_desc *sd) 350 { 351 struct file *file = sd->u.file; 352 loff_t pos = sd->pos; 353 int more; 354 355 if (!likely(file->f_op->sendpage)) 356 return -EINVAL; 357 358 more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; 359 360 if (sd->len < sd->total_len && 361 pipe_occupancy(pipe->head, pipe->tail) > 1) 362 more |= MSG_SENDPAGE_NOTLAST; 363 364 return file->f_op->sendpage(file, buf->page, buf->offset, 365 sd->len, &pos, more); 366 } 367 368 static void wakeup_pipe_writers(struct pipe_inode_info *pipe) 369 { 370 smp_mb(); 371 if (waitqueue_active(&pipe->wr_wait)) 372 wake_up_interruptible(&pipe->wr_wait); 373 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 374 } 375 376 /** 377 * splice_from_pipe_feed - feed available data from a pipe to a file 378 * @pipe: pipe to splice from 379 * @sd: information to @actor 380 * @actor: handler that splices the data 381 * 382 * Description: 383 * This function loops over the pipe and calls @actor to do the 384 * actual moving of a single struct pipe_buffer to the desired 385 * destination. It returns when there's no more buffers left in 386 * the pipe or if the requested number of bytes (@sd->total_len) 387 * have been copied. It returns a positive number (one) if the 388 * pipe needs to be filled with more data, zero if the required 389 * number of bytes have been copied and -errno on error. 390 * 391 * This, together with splice_from_pipe_{begin,end,next}, may be 392 * used to implement the functionality of __splice_from_pipe() when 393 * locking is required around copying the pipe buffers to the 394 * destination. 395 */ 396 static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, 397 splice_actor *actor) 398 { 399 unsigned int head = pipe->head; 400 unsigned int tail = pipe->tail; 401 unsigned int mask = pipe->ring_size - 1; 402 int ret; 403 404 while (!pipe_empty(head, tail)) { 405 struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 406 407 sd->len = buf->len; 408 if (sd->len > sd->total_len) 409 sd->len = sd->total_len; 410 411 ret = pipe_buf_confirm(pipe, buf); 412 if (unlikely(ret)) { 413 if (ret == -ENODATA) 414 ret = 0; 415 return ret; 416 } 417 418 ret = actor(pipe, buf, sd); 419 if (ret <= 0) 420 return ret; 421 422 buf->offset += ret; 423 buf->len -= ret; 424 425 sd->num_spliced += ret; 426 sd->len -= ret; 427 sd->pos += ret; 428 sd->total_len -= ret; 429 430 if (!buf->len) { 431 pipe_buf_release(pipe, buf); 432 tail++; 433 pipe->tail = tail; 434 if (pipe->files) 435 sd->need_wakeup = true; 436 } 437 438 if (!sd->total_len) 439 return 0; 440 } 441 442 return 1; 443 } 444 445 /* We know we have a pipe buffer, but maybe it's empty? */ 446 static inline bool eat_empty_buffer(struct pipe_inode_info *pipe) 447 { 448 unsigned int tail = pipe->tail; 449 unsigned int mask = pipe->ring_size - 1; 450 struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 451 452 if (unlikely(!buf->len)) { 453 pipe_buf_release(pipe, buf); 454 pipe->tail = tail+1; 455 return true; 456 } 457 458 return false; 459 } 460 461 /** 462 * splice_from_pipe_next - wait for some data to splice from 463 * @pipe: pipe to splice from 464 * @sd: information about the splice operation 465 * 466 * Description: 467 * This function will wait for some data and return a positive 468 * value (one) if pipe buffers are available. It will return zero 469 * or -errno if no more data needs to be spliced. 470 */ 471 static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) 472 { 473 /* 474 * Check for signal early to make process killable when there are 475 * always buffers available 476 */ 477 if (signal_pending(current)) 478 return -ERESTARTSYS; 479 480 repeat: 481 while (pipe_empty(pipe->head, pipe->tail)) { 482 if (!pipe->writers) 483 return 0; 484 485 if (sd->num_spliced) 486 return 0; 487 488 if (sd->flags & SPLICE_F_NONBLOCK) 489 return -EAGAIN; 490 491 if (signal_pending(current)) 492 return -ERESTARTSYS; 493 494 if (sd->need_wakeup) { 495 wakeup_pipe_writers(pipe); 496 sd->need_wakeup = false; 497 } 498 499 pipe_wait_readable(pipe); 500 } 501 502 if (eat_empty_buffer(pipe)) 503 goto repeat; 504 505 return 1; 506 } 507 508 /** 509 * splice_from_pipe_begin - start splicing from pipe 510 * @sd: information about the splice operation 511 * 512 * Description: 513 * This function should be called before a loop containing 514 * splice_from_pipe_next() and splice_from_pipe_feed() to 515 * initialize the necessary fields of @sd. 516 */ 517 static void splice_from_pipe_begin(struct splice_desc *sd) 518 { 519 sd->num_spliced = 0; 520 sd->need_wakeup = false; 521 } 522 523 /** 524 * splice_from_pipe_end - finish splicing from pipe 525 * @pipe: pipe to splice from 526 * @sd: information about the splice operation 527 * 528 * Description: 529 * This function will wake up pipe writers if necessary. It should 530 * be called after a loop containing splice_from_pipe_next() and 531 * splice_from_pipe_feed(). 532 */ 533 static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) 534 { 535 if (sd->need_wakeup) 536 wakeup_pipe_writers(pipe); 537 } 538 539 /** 540 * __splice_from_pipe - splice data from a pipe to given actor 541 * @pipe: pipe to splice from 542 * @sd: information to @actor 543 * @actor: handler that splices the data 544 * 545 * Description: 546 * This function does little more than loop over the pipe and call 547 * @actor to do the actual moving of a single struct pipe_buffer to 548 * the desired destination. See pipe_to_file, pipe_to_sendpage, or 549 * pipe_to_user. 550 * 551 */ 552 ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, 553 splice_actor *actor) 554 { 555 int ret; 556 557 splice_from_pipe_begin(sd); 558 do { 559 cond_resched(); 560 ret = splice_from_pipe_next(pipe, sd); 561 if (ret > 0) 562 ret = splice_from_pipe_feed(pipe, sd, actor); 563 } while (ret > 0); 564 splice_from_pipe_end(pipe, sd); 565 566 return sd->num_spliced ? sd->num_spliced : ret; 567 } 568 EXPORT_SYMBOL(__splice_from_pipe); 569 570 /** 571 * splice_from_pipe - splice data from a pipe to a file 572 * @pipe: pipe to splice from 573 * @out: file to splice to 574 * @ppos: position in @out 575 * @len: how many bytes to splice 576 * @flags: splice modifier flags 577 * @actor: handler that splices the data 578 * 579 * Description: 580 * See __splice_from_pipe. This function locks the pipe inode, 581 * otherwise it's identical to __splice_from_pipe(). 582 * 583 */ 584 ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, 585 loff_t *ppos, size_t len, unsigned int flags, 586 splice_actor *actor) 587 { 588 ssize_t ret; 589 struct splice_desc sd = { 590 .total_len = len, 591 .flags = flags, 592 .pos = *ppos, 593 .u.file = out, 594 }; 595 596 pipe_lock(pipe); 597 ret = __splice_from_pipe(pipe, &sd, actor); 598 pipe_unlock(pipe); 599 600 return ret; 601 } 602 603 /** 604 * iter_file_splice_write - splice data from a pipe to a file 605 * @pipe: pipe info 606 * @out: file to write to 607 * @ppos: position in @out 608 * @len: number of bytes to splice 609 * @flags: splice modifier flags 610 * 611 * Description: 612 * Will either move or copy pages (determined by @flags options) from 613 * the given pipe inode to the given file. 614 * This one is ->write_iter-based. 615 * 616 */ 617 ssize_t 618 iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, 619 loff_t *ppos, size_t len, unsigned int flags) 620 { 621 struct splice_desc sd = { 622 .total_len = len, 623 .flags = flags, 624 .pos = *ppos, 625 .u.file = out, 626 }; 627 int nbufs = pipe->max_usage; 628 struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec), 629 GFP_KERNEL); 630 ssize_t ret; 631 632 if (unlikely(!array)) 633 return -ENOMEM; 634 635 pipe_lock(pipe); 636 637 splice_from_pipe_begin(&sd); 638 while (sd.total_len) { 639 struct iov_iter from; 640 unsigned int head, tail, mask; 641 size_t left; 642 int n; 643 644 ret = splice_from_pipe_next(pipe, &sd); 645 if (ret <= 0) 646 break; 647 648 if (unlikely(nbufs < pipe->max_usage)) { 649 kfree(array); 650 nbufs = pipe->max_usage; 651 array = kcalloc(nbufs, sizeof(struct bio_vec), 652 GFP_KERNEL); 653 if (!array) { 654 ret = -ENOMEM; 655 break; 656 } 657 } 658 659 head = pipe->head; 660 tail = pipe->tail; 661 mask = pipe->ring_size - 1; 662 663 /* build the vector */ 664 left = sd.total_len; 665 for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++, n++) { 666 struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 667 size_t this_len = buf->len; 668 669 if (this_len > left) 670 this_len = left; 671 672 ret = pipe_buf_confirm(pipe, buf); 673 if (unlikely(ret)) { 674 if (ret == -ENODATA) 675 ret = 0; 676 goto done; 677 } 678 679 array[n].bv_page = buf->page; 680 array[n].bv_len = this_len; 681 array[n].bv_offset = buf->offset; 682 left -= this_len; 683 } 684 685 iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left); 686 ret = vfs_iter_write(out, &from, &sd.pos, 0); 687 if (ret <= 0) 688 break; 689 690 sd.num_spliced += ret; 691 sd.total_len -= ret; 692 *ppos = sd.pos; 693 694 /* dismiss the fully eaten buffers, adjust the partial one */ 695 tail = pipe->tail; 696 while (ret) { 697 struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 698 if (ret >= buf->len) { 699 ret -= buf->len; 700 buf->len = 0; 701 pipe_buf_release(pipe, buf); 702 tail++; 703 pipe->tail = tail; 704 if (pipe->files) 705 sd.need_wakeup = true; 706 } else { 707 buf->offset += ret; 708 buf->len -= ret; 709 ret = 0; 710 } 711 } 712 } 713 done: 714 kfree(array); 715 splice_from_pipe_end(pipe, &sd); 716 717 pipe_unlock(pipe); 718 719 if (sd.num_spliced) 720 ret = sd.num_spliced; 721 722 return ret; 723 } 724 725 EXPORT_SYMBOL(iter_file_splice_write); 726 727 /** 728 * generic_splice_sendpage - splice data from a pipe to a socket 729 * @pipe: pipe to splice from 730 * @out: socket to write to 731 * @ppos: position in @out 732 * @len: number of bytes to splice 733 * @flags: splice modifier flags 734 * 735 * Description: 736 * Will send @len bytes from the pipe to a network socket. No data copying 737 * is involved. 738 * 739 */ 740 ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 741 loff_t *ppos, size_t len, unsigned int flags) 742 { 743 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); 744 } 745 746 EXPORT_SYMBOL(generic_splice_sendpage); 747 748 static int warn_unsupported(struct file *file, const char *op) 749 { 750 pr_debug_ratelimited( 751 "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n", 752 op, file, current->pid, current->comm); 753 return -EINVAL; 754 } 755 756 /* 757 * Attempt to initiate a splice from pipe to file. 758 */ 759 static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, 760 loff_t *ppos, size_t len, unsigned int flags) 761 { 762 if (unlikely(!out->f_op->splice_write)) 763 return warn_unsupported(out, "write"); 764 return out->f_op->splice_write(pipe, out, ppos, len, flags); 765 } 766 767 /* 768 * Attempt to initiate a splice from a file to a pipe. 769 */ 770 static long do_splice_to(struct file *in, loff_t *ppos, 771 struct pipe_inode_info *pipe, size_t len, 772 unsigned int flags) 773 { 774 int ret; 775 776 if (unlikely(!(in->f_mode & FMODE_READ))) 777 return -EBADF; 778 779 ret = rw_verify_area(READ, in, ppos, len); 780 if (unlikely(ret < 0)) 781 return ret; 782 783 if (unlikely(len > MAX_RW_COUNT)) 784 len = MAX_RW_COUNT; 785 786 if (unlikely(!in->f_op->splice_read)) 787 return warn_unsupported(in, "read"); 788 return in->f_op->splice_read(in, ppos, pipe, len, flags); 789 } 790 791 /** 792 * splice_direct_to_actor - splices data directly between two non-pipes 793 * @in: file to splice from 794 * @sd: actor information on where to splice to 795 * @actor: handles the data splicing 796 * 797 * Description: 798 * This is a special case helper to splice directly between two 799 * points, without requiring an explicit pipe. Internally an allocated 800 * pipe is cached in the process, and reused during the lifetime of 801 * that process. 802 * 803 */ 804 ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, 805 splice_direct_actor *actor) 806 { 807 struct pipe_inode_info *pipe; 808 long ret, bytes; 809 umode_t i_mode; 810 size_t len; 811 int i, flags, more; 812 813 /* 814 * We require the input being a regular file, as we don't want to 815 * randomly drop data for eg socket -> socket splicing. Use the 816 * piped splicing for that! 817 */ 818 i_mode = file_inode(in)->i_mode; 819 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) 820 return -EINVAL; 821 822 /* 823 * neither in nor out is a pipe, setup an internal pipe attached to 824 * 'out' and transfer the wanted data from 'in' to 'out' through that 825 */ 826 pipe = current->splice_pipe; 827 if (unlikely(!pipe)) { 828 pipe = alloc_pipe_info(); 829 if (!pipe) 830 return -ENOMEM; 831 832 /* 833 * We don't have an immediate reader, but we'll read the stuff 834 * out of the pipe right after the splice_to_pipe(). So set 835 * PIPE_READERS appropriately. 836 */ 837 pipe->readers = 1; 838 839 current->splice_pipe = pipe; 840 } 841 842 /* 843 * Do the splice. 844 */ 845 ret = 0; 846 bytes = 0; 847 len = sd->total_len; 848 flags = sd->flags; 849 850 /* 851 * Don't block on output, we have to drain the direct pipe. 852 */ 853 sd->flags &= ~SPLICE_F_NONBLOCK; 854 more = sd->flags & SPLICE_F_MORE; 855 856 WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail)); 857 858 while (len) { 859 unsigned int p_space; 860 size_t read_len; 861 loff_t pos = sd->pos, prev_pos = pos; 862 863 /* Don't try to read more the pipe has space for. */ 864 p_space = pipe->max_usage - 865 pipe_occupancy(pipe->head, pipe->tail); 866 read_len = min_t(size_t, len, p_space << PAGE_SHIFT); 867 ret = do_splice_to(in, &pos, pipe, read_len, flags); 868 if (unlikely(ret <= 0)) 869 goto out_release; 870 871 read_len = ret; 872 sd->total_len = read_len; 873 874 /* 875 * If more data is pending, set SPLICE_F_MORE 876 * If this is the last data and SPLICE_F_MORE was not set 877 * initially, clears it. 878 */ 879 if (read_len < len) 880 sd->flags |= SPLICE_F_MORE; 881 else if (!more) 882 sd->flags &= ~SPLICE_F_MORE; 883 /* 884 * NOTE: nonblocking mode only applies to the input. We 885 * must not do the output in nonblocking mode as then we 886 * could get stuck data in the internal pipe: 887 */ 888 ret = actor(pipe, sd); 889 if (unlikely(ret <= 0)) { 890 sd->pos = prev_pos; 891 goto out_release; 892 } 893 894 bytes += ret; 895 len -= ret; 896 sd->pos = pos; 897 898 if (ret < read_len) { 899 sd->pos = prev_pos + ret; 900 goto out_release; 901 } 902 } 903 904 done: 905 pipe->tail = pipe->head = 0; 906 file_accessed(in); 907 return bytes; 908 909 out_release: 910 /* 911 * If we did an incomplete transfer we must release 912 * the pipe buffers in question: 913 */ 914 for (i = 0; i < pipe->ring_size; i++) { 915 struct pipe_buffer *buf = &pipe->bufs[i]; 916 917 if (buf->ops) 918 pipe_buf_release(pipe, buf); 919 } 920 921 if (!bytes) 922 bytes = ret; 923 924 goto done; 925 } 926 EXPORT_SYMBOL(splice_direct_to_actor); 927 928 static int direct_splice_actor(struct pipe_inode_info *pipe, 929 struct splice_desc *sd) 930 { 931 struct file *file = sd->u.file; 932 933 return do_splice_from(pipe, file, sd->opos, sd->total_len, 934 sd->flags); 935 } 936 937 /** 938 * do_splice_direct - splices data directly between two files 939 * @in: file to splice from 940 * @ppos: input file offset 941 * @out: file to splice to 942 * @opos: output file offset 943 * @len: number of bytes to splice 944 * @flags: splice modifier flags 945 * 946 * Description: 947 * For use by do_sendfile(). splice can easily emulate sendfile, but 948 * doing it in the application would incur an extra system call 949 * (splice in + splice out, as compared to just sendfile()). So this helper 950 * can splice directly through a process-private pipe. 951 * 952 */ 953 long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 954 loff_t *opos, size_t len, unsigned int flags) 955 { 956 struct splice_desc sd = { 957 .len = len, 958 .total_len = len, 959 .flags = flags, 960 .pos = *ppos, 961 .u.file = out, 962 .opos = opos, 963 }; 964 long ret; 965 966 if (unlikely(!(out->f_mode & FMODE_WRITE))) 967 return -EBADF; 968 969 if (unlikely(out->f_flags & O_APPEND)) 970 return -EINVAL; 971 972 ret = rw_verify_area(WRITE, out, opos, len); 973 if (unlikely(ret < 0)) 974 return ret; 975 976 ret = splice_direct_to_actor(in, &sd, direct_splice_actor); 977 if (ret > 0) 978 *ppos = sd.pos; 979 980 return ret; 981 } 982 EXPORT_SYMBOL(do_splice_direct); 983 984 static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) 985 { 986 for (;;) { 987 if (unlikely(!pipe->readers)) { 988 send_sig(SIGPIPE, current, 0); 989 return -EPIPE; 990 } 991 if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) 992 return 0; 993 if (flags & SPLICE_F_NONBLOCK) 994 return -EAGAIN; 995 if (signal_pending(current)) 996 return -ERESTARTSYS; 997 pipe_wait_writable(pipe); 998 } 999 } 1000 1001 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 1002 struct pipe_inode_info *opipe, 1003 size_t len, unsigned int flags); 1004 1005 /* 1006 * Determine where to splice to/from. 1007 */ 1008 long do_splice(struct file *in, loff_t *off_in, struct file *out, 1009 loff_t *off_out, size_t len, unsigned int flags) 1010 { 1011 struct pipe_inode_info *ipipe; 1012 struct pipe_inode_info *opipe; 1013 loff_t offset; 1014 long ret; 1015 1016 if (unlikely(!(in->f_mode & FMODE_READ) || 1017 !(out->f_mode & FMODE_WRITE))) 1018 return -EBADF; 1019 1020 ipipe = get_pipe_info(in, true); 1021 opipe = get_pipe_info(out, true); 1022 1023 if (ipipe && opipe) { 1024 if (off_in || off_out) 1025 return -ESPIPE; 1026 1027 /* Splicing to self would be fun, but... */ 1028 if (ipipe == opipe) 1029 return -EINVAL; 1030 1031 if ((in->f_flags | out->f_flags) & O_NONBLOCK) 1032 flags |= SPLICE_F_NONBLOCK; 1033 1034 return splice_pipe_to_pipe(ipipe, opipe, len, flags); 1035 } 1036 1037 if (ipipe) { 1038 if (off_in) 1039 return -ESPIPE; 1040 if (off_out) { 1041 if (!(out->f_mode & FMODE_PWRITE)) 1042 return -EINVAL; 1043 offset = *off_out; 1044 } else { 1045 offset = out->f_pos; 1046 } 1047 1048 if (unlikely(out->f_flags & O_APPEND)) 1049 return -EINVAL; 1050 1051 ret = rw_verify_area(WRITE, out, &offset, len); 1052 if (unlikely(ret < 0)) 1053 return ret; 1054 1055 if (in->f_flags & O_NONBLOCK) 1056 flags |= SPLICE_F_NONBLOCK; 1057 1058 file_start_write(out); 1059 ret = do_splice_from(ipipe, out, &offset, len, flags); 1060 file_end_write(out); 1061 1062 if (!off_out) 1063 out->f_pos = offset; 1064 else 1065 *off_out = offset; 1066 1067 return ret; 1068 } 1069 1070 if (opipe) { 1071 if (off_out) 1072 return -ESPIPE; 1073 if (off_in) { 1074 if (!(in->f_mode & FMODE_PREAD)) 1075 return -EINVAL; 1076 offset = *off_in; 1077 } else { 1078 offset = in->f_pos; 1079 } 1080 1081 if (out->f_flags & O_NONBLOCK) 1082 flags |= SPLICE_F_NONBLOCK; 1083 1084 pipe_lock(opipe); 1085 ret = wait_for_space(opipe, flags); 1086 if (!ret) { 1087 unsigned int p_space; 1088 1089 /* Don't try to read more the pipe has space for. */ 1090 p_space = opipe->max_usage - pipe_occupancy(opipe->head, opipe->tail); 1091 len = min_t(size_t, len, p_space << PAGE_SHIFT); 1092 1093 ret = do_splice_to(in, &offset, opipe, len, flags); 1094 } 1095 pipe_unlock(opipe); 1096 if (ret > 0) 1097 wakeup_pipe_readers(opipe); 1098 if (!off_in) 1099 in->f_pos = offset; 1100 else 1101 *off_in = offset; 1102 1103 return ret; 1104 } 1105 1106 return -EINVAL; 1107 } 1108 1109 static long __do_splice(struct file *in, loff_t __user *off_in, 1110 struct file *out, loff_t __user *off_out, 1111 size_t len, unsigned int flags) 1112 { 1113 struct pipe_inode_info *ipipe; 1114 struct pipe_inode_info *opipe; 1115 loff_t offset, *__off_in = NULL, *__off_out = NULL; 1116 long ret; 1117 1118 ipipe = get_pipe_info(in, true); 1119 opipe = get_pipe_info(out, true); 1120 1121 if (ipipe && off_in) 1122 return -ESPIPE; 1123 if (opipe && off_out) 1124 return -ESPIPE; 1125 1126 if (off_out) { 1127 if (copy_from_user(&offset, off_out, sizeof(loff_t))) 1128 return -EFAULT; 1129 __off_out = &offset; 1130 } 1131 if (off_in) { 1132 if (copy_from_user(&offset, off_in, sizeof(loff_t))) 1133 return -EFAULT; 1134 __off_in = &offset; 1135 } 1136 1137 ret = do_splice(in, __off_in, out, __off_out, len, flags); 1138 if (ret < 0) 1139 return ret; 1140 1141 if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t))) 1142 return -EFAULT; 1143 if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t))) 1144 return -EFAULT; 1145 1146 return ret; 1147 } 1148 1149 static int iter_to_pipe(struct iov_iter *from, 1150 struct pipe_inode_info *pipe, 1151 unsigned flags) 1152 { 1153 struct pipe_buffer buf = { 1154 .ops = &user_page_pipe_buf_ops, 1155 .flags = flags 1156 }; 1157 size_t total = 0; 1158 int ret = 0; 1159 bool failed = false; 1160 1161 while (iov_iter_count(from) && !failed) { 1162 struct page *pages[16]; 1163 ssize_t copied; 1164 size_t start; 1165 int n; 1166 1167 copied = iov_iter_get_pages(from, pages, ~0UL, 16, &start); 1168 if (copied <= 0) { 1169 ret = copied; 1170 break; 1171 } 1172 1173 for (n = 0; copied; n++, start = 0) { 1174 int size = min_t(int, copied, PAGE_SIZE - start); 1175 if (!failed) { 1176 buf.page = pages[n]; 1177 buf.offset = start; 1178 buf.len = size; 1179 ret = add_to_pipe(pipe, &buf); 1180 if (unlikely(ret < 0)) { 1181 failed = true; 1182 } else { 1183 iov_iter_advance(from, ret); 1184 total += ret; 1185 } 1186 } else { 1187 put_page(pages[n]); 1188 } 1189 copied -= size; 1190 } 1191 } 1192 return total ? total : ret; 1193 } 1194 1195 static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 1196 struct splice_desc *sd) 1197 { 1198 int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data); 1199 return n == sd->len ? n : -EFAULT; 1200 } 1201 1202 /* 1203 * For lack of a better implementation, implement vmsplice() to userspace 1204 * as a simple copy of the pipes pages to the user iov. 1205 */ 1206 static long vmsplice_to_user(struct file *file, struct iov_iter *iter, 1207 unsigned int flags) 1208 { 1209 struct pipe_inode_info *pipe = get_pipe_info(file, true); 1210 struct splice_desc sd = { 1211 .total_len = iov_iter_count(iter), 1212 .flags = flags, 1213 .u.data = iter 1214 }; 1215 long ret = 0; 1216 1217 if (!pipe) 1218 return -EBADF; 1219 1220 if (sd.total_len) { 1221 pipe_lock(pipe); 1222 ret = __splice_from_pipe(pipe, &sd, pipe_to_user); 1223 pipe_unlock(pipe); 1224 } 1225 1226 return ret; 1227 } 1228 1229 /* 1230 * vmsplice splices a user address range into a pipe. It can be thought of 1231 * as splice-from-memory, where the regular splice is splice-from-file (or 1232 * to file). In both cases the output is a pipe, naturally. 1233 */ 1234 static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, 1235 unsigned int flags) 1236 { 1237 struct pipe_inode_info *pipe; 1238 long ret = 0; 1239 unsigned buf_flag = 0; 1240 1241 if (flags & SPLICE_F_GIFT) 1242 buf_flag = PIPE_BUF_FLAG_GIFT; 1243 1244 pipe = get_pipe_info(file, true); 1245 if (!pipe) 1246 return -EBADF; 1247 1248 pipe_lock(pipe); 1249 ret = wait_for_space(pipe, flags); 1250 if (!ret) 1251 ret = iter_to_pipe(iter, pipe, buf_flag); 1252 pipe_unlock(pipe); 1253 if (ret > 0) 1254 wakeup_pipe_readers(pipe); 1255 return ret; 1256 } 1257 1258 static int vmsplice_type(struct fd f, int *type) 1259 { 1260 if (!f.file) 1261 return -EBADF; 1262 if (f.file->f_mode & FMODE_WRITE) { 1263 *type = WRITE; 1264 } else if (f.file->f_mode & FMODE_READ) { 1265 *type = READ; 1266 } else { 1267 fdput(f); 1268 return -EBADF; 1269 } 1270 return 0; 1271 } 1272 1273 /* 1274 * Note that vmsplice only really supports true splicing _from_ user memory 1275 * to a pipe, not the other way around. Splicing from user memory is a simple 1276 * operation that can be supported without any funky alignment restrictions 1277 * or nasty vm tricks. We simply map in the user memory and fill them into 1278 * a pipe. The reverse isn't quite as easy, though. There are two possible 1279 * solutions for that: 1280 * 1281 * - memcpy() the data internally, at which point we might as well just 1282 * do a regular read() on the buffer anyway. 1283 * - Lots of nasty vm tricks, that are neither fast nor flexible (it 1284 * has restriction limitations on both ends of the pipe). 1285 * 1286 * Currently we punt and implement it as a normal copy, see pipe_to_user(). 1287 * 1288 */ 1289 SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, 1290 unsigned long, nr_segs, unsigned int, flags) 1291 { 1292 struct iovec iovstack[UIO_FASTIOV]; 1293 struct iovec *iov = iovstack; 1294 struct iov_iter iter; 1295 ssize_t error; 1296 struct fd f; 1297 int type; 1298 1299 if (unlikely(flags & ~SPLICE_F_ALL)) 1300 return -EINVAL; 1301 1302 f = fdget(fd); 1303 error = vmsplice_type(f, &type); 1304 if (error) 1305 return error; 1306 1307 error = import_iovec(type, uiov, nr_segs, 1308 ARRAY_SIZE(iovstack), &iov, &iter); 1309 if (error < 0) 1310 goto out_fdput; 1311 1312 if (!iov_iter_count(&iter)) 1313 error = 0; 1314 else if (iov_iter_rw(&iter) == WRITE) 1315 error = vmsplice_to_pipe(f.file, &iter, flags); 1316 else 1317 error = vmsplice_to_user(f.file, &iter, flags); 1318 1319 kfree(iov); 1320 out_fdput: 1321 fdput(f); 1322 return error; 1323 } 1324 1325 SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, 1326 int, fd_out, loff_t __user *, off_out, 1327 size_t, len, unsigned int, flags) 1328 { 1329 struct fd in, out; 1330 long error; 1331 1332 if (unlikely(!len)) 1333 return 0; 1334 1335 if (unlikely(flags & ~SPLICE_F_ALL)) 1336 return -EINVAL; 1337 1338 error = -EBADF; 1339 in = fdget(fd_in); 1340 if (in.file) { 1341 out = fdget(fd_out); 1342 if (out.file) { 1343 error = __do_splice(in.file, off_in, out.file, off_out, 1344 len, flags); 1345 fdput(out); 1346 } 1347 fdput(in); 1348 } 1349 return error; 1350 } 1351 1352 /* 1353 * Make sure there's data to read. Wait for input if we can, otherwise 1354 * return an appropriate error. 1355 */ 1356 static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) 1357 { 1358 int ret; 1359 1360 /* 1361 * Check the pipe occupancy without the inode lock first. This function 1362 * is speculative anyways, so missing one is ok. 1363 */ 1364 if (!pipe_empty(pipe->head, pipe->tail)) 1365 return 0; 1366 1367 ret = 0; 1368 pipe_lock(pipe); 1369 1370 while (pipe_empty(pipe->head, pipe->tail)) { 1371 if (signal_pending(current)) { 1372 ret = -ERESTARTSYS; 1373 break; 1374 } 1375 if (!pipe->writers) 1376 break; 1377 if (flags & SPLICE_F_NONBLOCK) { 1378 ret = -EAGAIN; 1379 break; 1380 } 1381 pipe_wait_readable(pipe); 1382 } 1383 1384 pipe_unlock(pipe); 1385 return ret; 1386 } 1387 1388 /* 1389 * Make sure there's writeable room. Wait for room if we can, otherwise 1390 * return an appropriate error. 1391 */ 1392 static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) 1393 { 1394 int ret; 1395 1396 /* 1397 * Check pipe occupancy without the inode lock first. This function 1398 * is speculative anyways, so missing one is ok. 1399 */ 1400 if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) 1401 return 0; 1402 1403 ret = 0; 1404 pipe_lock(pipe); 1405 1406 while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { 1407 if (!pipe->readers) { 1408 send_sig(SIGPIPE, current, 0); 1409 ret = -EPIPE; 1410 break; 1411 } 1412 if (flags & SPLICE_F_NONBLOCK) { 1413 ret = -EAGAIN; 1414 break; 1415 } 1416 if (signal_pending(current)) { 1417 ret = -ERESTARTSYS; 1418 break; 1419 } 1420 pipe_wait_writable(pipe); 1421 } 1422 1423 pipe_unlock(pipe); 1424 return ret; 1425 } 1426 1427 /* 1428 * Splice contents of ipipe to opipe. 1429 */ 1430 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 1431 struct pipe_inode_info *opipe, 1432 size_t len, unsigned int flags) 1433 { 1434 struct pipe_buffer *ibuf, *obuf; 1435 unsigned int i_head, o_head; 1436 unsigned int i_tail, o_tail; 1437 unsigned int i_mask, o_mask; 1438 int ret = 0; 1439 bool input_wakeup = false; 1440 1441 1442 retry: 1443 ret = ipipe_prep(ipipe, flags); 1444 if (ret) 1445 return ret; 1446 1447 ret = opipe_prep(opipe, flags); 1448 if (ret) 1449 return ret; 1450 1451 /* 1452 * Potential ABBA deadlock, work around it by ordering lock 1453 * grabbing by pipe info address. Otherwise two different processes 1454 * could deadlock (one doing tee from A -> B, the other from B -> A). 1455 */ 1456 pipe_double_lock(ipipe, opipe); 1457 1458 i_tail = ipipe->tail; 1459 i_mask = ipipe->ring_size - 1; 1460 o_head = opipe->head; 1461 o_mask = opipe->ring_size - 1; 1462 1463 do { 1464 size_t o_len; 1465 1466 if (!opipe->readers) { 1467 send_sig(SIGPIPE, current, 0); 1468 if (!ret) 1469 ret = -EPIPE; 1470 break; 1471 } 1472 1473 i_head = ipipe->head; 1474 o_tail = opipe->tail; 1475 1476 if (pipe_empty(i_head, i_tail) && !ipipe->writers) 1477 break; 1478 1479 /* 1480 * Cannot make any progress, because either the input 1481 * pipe is empty or the output pipe is full. 1482 */ 1483 if (pipe_empty(i_head, i_tail) || 1484 pipe_full(o_head, o_tail, opipe->max_usage)) { 1485 /* Already processed some buffers, break */ 1486 if (ret) 1487 break; 1488 1489 if (flags & SPLICE_F_NONBLOCK) { 1490 ret = -EAGAIN; 1491 break; 1492 } 1493 1494 /* 1495 * We raced with another reader/writer and haven't 1496 * managed to process any buffers. A zero return 1497 * value means EOF, so retry instead. 1498 */ 1499 pipe_unlock(ipipe); 1500 pipe_unlock(opipe); 1501 goto retry; 1502 } 1503 1504 ibuf = &ipipe->bufs[i_tail & i_mask]; 1505 obuf = &opipe->bufs[o_head & o_mask]; 1506 1507 if (len >= ibuf->len) { 1508 /* 1509 * Simply move the whole buffer from ipipe to opipe 1510 */ 1511 *obuf = *ibuf; 1512 ibuf->ops = NULL; 1513 i_tail++; 1514 ipipe->tail = i_tail; 1515 input_wakeup = true; 1516 o_len = obuf->len; 1517 o_head++; 1518 opipe->head = o_head; 1519 } else { 1520 /* 1521 * Get a reference to this pipe buffer, 1522 * so we can copy the contents over. 1523 */ 1524 if (!pipe_buf_get(ipipe, ibuf)) { 1525 if (ret == 0) 1526 ret = -EFAULT; 1527 break; 1528 } 1529 *obuf = *ibuf; 1530 1531 /* 1532 * Don't inherit the gift and merge flags, we need to 1533 * prevent multiple steals of this page. 1534 */ 1535 obuf->flags &= ~PIPE_BUF_FLAG_GIFT; 1536 obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE; 1537 1538 obuf->len = len; 1539 ibuf->offset += len; 1540 ibuf->len -= len; 1541 o_len = len; 1542 o_head++; 1543 opipe->head = o_head; 1544 } 1545 ret += o_len; 1546 len -= o_len; 1547 } while (len); 1548 1549 pipe_unlock(ipipe); 1550 pipe_unlock(opipe); 1551 1552 /* 1553 * If we put data in the output pipe, wakeup any potential readers. 1554 */ 1555 if (ret > 0) 1556 wakeup_pipe_readers(opipe); 1557 1558 if (input_wakeup) 1559 wakeup_pipe_writers(ipipe); 1560 1561 return ret; 1562 } 1563 1564 /* 1565 * Link contents of ipipe to opipe. 1566 */ 1567 static int link_pipe(struct pipe_inode_info *ipipe, 1568 struct pipe_inode_info *opipe, 1569 size_t len, unsigned int flags) 1570 { 1571 struct pipe_buffer *ibuf, *obuf; 1572 unsigned int i_head, o_head; 1573 unsigned int i_tail, o_tail; 1574 unsigned int i_mask, o_mask; 1575 int ret = 0; 1576 1577 /* 1578 * Potential ABBA deadlock, work around it by ordering lock 1579 * grabbing by pipe info address. Otherwise two different processes 1580 * could deadlock (one doing tee from A -> B, the other from B -> A). 1581 */ 1582 pipe_double_lock(ipipe, opipe); 1583 1584 i_tail = ipipe->tail; 1585 i_mask = ipipe->ring_size - 1; 1586 o_head = opipe->head; 1587 o_mask = opipe->ring_size - 1; 1588 1589 do { 1590 if (!opipe->readers) { 1591 send_sig(SIGPIPE, current, 0); 1592 if (!ret) 1593 ret = -EPIPE; 1594 break; 1595 } 1596 1597 i_head = ipipe->head; 1598 o_tail = opipe->tail; 1599 1600 /* 1601 * If we have iterated all input buffers or run out of 1602 * output room, break. 1603 */ 1604 if (pipe_empty(i_head, i_tail) || 1605 pipe_full(o_head, o_tail, opipe->max_usage)) 1606 break; 1607 1608 ibuf = &ipipe->bufs[i_tail & i_mask]; 1609 obuf = &opipe->bufs[o_head & o_mask]; 1610 1611 /* 1612 * Get a reference to this pipe buffer, 1613 * so we can copy the contents over. 1614 */ 1615 if (!pipe_buf_get(ipipe, ibuf)) { 1616 if (ret == 0) 1617 ret = -EFAULT; 1618 break; 1619 } 1620 1621 *obuf = *ibuf; 1622 1623 /* 1624 * Don't inherit the gift and merge flag, we need to prevent 1625 * multiple steals of this page. 1626 */ 1627 obuf->flags &= ~PIPE_BUF_FLAG_GIFT; 1628 obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE; 1629 1630 if (obuf->len > len) 1631 obuf->len = len; 1632 ret += obuf->len; 1633 len -= obuf->len; 1634 1635 o_head++; 1636 opipe->head = o_head; 1637 i_tail++; 1638 } while (len); 1639 1640 pipe_unlock(ipipe); 1641 pipe_unlock(opipe); 1642 1643 /* 1644 * If we put data in the output pipe, wakeup any potential readers. 1645 */ 1646 if (ret > 0) 1647 wakeup_pipe_readers(opipe); 1648 1649 return ret; 1650 } 1651 1652 /* 1653 * This is a tee(1) implementation that works on pipes. It doesn't copy 1654 * any data, it simply references the 'in' pages on the 'out' pipe. 1655 * The 'flags' used are the SPLICE_F_* variants, currently the only 1656 * applicable one is SPLICE_F_NONBLOCK. 1657 */ 1658 long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) 1659 { 1660 struct pipe_inode_info *ipipe = get_pipe_info(in, true); 1661 struct pipe_inode_info *opipe = get_pipe_info(out, true); 1662 int ret = -EINVAL; 1663 1664 if (unlikely(!(in->f_mode & FMODE_READ) || 1665 !(out->f_mode & FMODE_WRITE))) 1666 return -EBADF; 1667 1668 /* 1669 * Duplicate the contents of ipipe to opipe without actually 1670 * copying the data. 1671 */ 1672 if (ipipe && opipe && ipipe != opipe) { 1673 if ((in->f_flags | out->f_flags) & O_NONBLOCK) 1674 flags |= SPLICE_F_NONBLOCK; 1675 1676 /* 1677 * Keep going, unless we encounter an error. The ipipe/opipe 1678 * ordering doesn't really matter. 1679 */ 1680 ret = ipipe_prep(ipipe, flags); 1681 if (!ret) { 1682 ret = opipe_prep(opipe, flags); 1683 if (!ret) 1684 ret = link_pipe(ipipe, opipe, len, flags); 1685 } 1686 } 1687 1688 return ret; 1689 } 1690 1691 SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) 1692 { 1693 struct fd in, out; 1694 int error; 1695 1696 if (unlikely(flags & ~SPLICE_F_ALL)) 1697 return -EINVAL; 1698 1699 if (unlikely(!len)) 1700 return 0; 1701 1702 error = -EBADF; 1703 in = fdget(fdin); 1704 if (in.file) { 1705 out = fdget(fdout); 1706 if (out.file) { 1707 error = do_tee(in.file, out.file, len, flags); 1708 fdput(out); 1709 } 1710 fdput(in); 1711 } 1712 1713 return error; 1714 } 1715