1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * "splice": joining two ropes together by interweaving their strands. 4 * 5 * This is the "extended pipe" functionality, where a pipe is used as 6 * an arbitrary in-memory buffer. Think of a pipe as a small kernel 7 * buffer that you can use to transfer data from one end to the other. 8 * 9 * The traditional unix read/write is extended with a "splice()" operation 10 * that transfers data buffers to or from a pipe buffer. 11 * 12 * Named by Larry McVoy, original implementation from Linus, extended by 13 * Jens to support splicing to files, network, direct splicing, etc and 14 * fixing lots of bugs. 15 * 16 * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk> 17 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org> 18 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu> 19 * 20 */ 21 #include <linux/bvec.h> 22 #include <linux/fs.h> 23 #include <linux/file.h> 24 #include <linux/pagemap.h> 25 #include <linux/splice.h> 26 #include <linux/memcontrol.h> 27 #include <linux/mm_inline.h> 28 #include <linux/swap.h> 29 #include <linux/writeback.h> 30 #include <linux/export.h> 31 #include <linux/syscalls.h> 32 #include <linux/uio.h> 33 #include <linux/security.h> 34 #include <linux/gfp.h> 35 #include <linux/socket.h> 36 #include <linux/sched/signal.h> 37 38 #include "internal.h" 39 40 /* 41 * Attempt to steal a page from a pipe buffer. This should perhaps go into 42 * a vm helper function, it's already simplified quite a bit by the 43 * addition of remove_mapping(). If success is returned, the caller may 44 * attempt to reuse this page for another destination. 45 */ 46 static bool page_cache_pipe_buf_try_steal(struct pipe_inode_info *pipe, 47 struct pipe_buffer *buf) 48 { 49 struct folio *folio = page_folio(buf->page); 50 struct address_space *mapping; 51 52 folio_lock(folio); 53 54 mapping = folio_mapping(folio); 55 if (mapping) { 56 WARN_ON(!folio_test_uptodate(folio)); 57 58 /* 59 * At least for ext2 with nobh option, we need to wait on 60 * writeback completing on this folio, since we'll remove it 61 * from the pagecache. Otherwise truncate wont wait on the 62 * folio, allowing the disk blocks to be reused by someone else 63 * before we actually wrote our data to them. fs corruption 64 * ensues. 65 */ 66 folio_wait_writeback(folio); 67 68 if (folio_has_private(folio) && 69 !filemap_release_folio(folio, GFP_KERNEL)) 70 goto out_unlock; 71 72 /* 73 * If we succeeded in removing the mapping, set LRU flag 74 * and return good. 75 */ 76 if (remove_mapping(mapping, folio)) { 77 buf->flags |= PIPE_BUF_FLAG_LRU; 78 return true; 79 } 80 } 81 82 /* 83 * Raced with truncate or failed to remove folio from current 84 * address space, unlock and return failure. 85 */ 86 out_unlock: 87 folio_unlock(folio); 88 return false; 89 } 90 91 static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, 92 struct pipe_buffer *buf) 93 { 94 put_page(buf->page); 95 buf->flags &= ~PIPE_BUF_FLAG_LRU; 96 } 97 98 /* 99 * Check whether the contents of buf is OK to access. Since the content 100 * is a page cache page, IO may be in flight. 101 */ 102 static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, 103 struct pipe_buffer *buf) 104 { 105 struct page *page = buf->page; 106 int err; 107 108 if (!PageUptodate(page)) { 109 lock_page(page); 110 111 /* 112 * Page got truncated/unhashed. This will cause a 0-byte 113 * splice, if this is the first page. 114 */ 115 if (!page->mapping) { 116 err = -ENODATA; 117 goto error; 118 } 119 120 /* 121 * Uh oh, read-error from disk. 122 */ 123 if (!PageUptodate(page)) { 124 err = -EIO; 125 goto error; 126 } 127 128 /* 129 * Page is ok afterall, we are done. 130 */ 131 unlock_page(page); 132 } 133 134 return 0; 135 error: 136 unlock_page(page); 137 return err; 138 } 139 140 const struct pipe_buf_operations page_cache_pipe_buf_ops = { 141 .confirm = page_cache_pipe_buf_confirm, 142 .release = page_cache_pipe_buf_release, 143 .try_steal = page_cache_pipe_buf_try_steal, 144 .get = generic_pipe_buf_get, 145 }; 146 147 static bool user_page_pipe_buf_try_steal(struct pipe_inode_info *pipe, 148 struct pipe_buffer *buf) 149 { 150 if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) 151 return false; 152 153 buf->flags |= PIPE_BUF_FLAG_LRU; 154 return generic_pipe_buf_try_steal(pipe, buf); 155 } 156 157 static const struct pipe_buf_operations user_page_pipe_buf_ops = { 158 .release = page_cache_pipe_buf_release, 159 .try_steal = user_page_pipe_buf_try_steal, 160 .get = generic_pipe_buf_get, 161 }; 162 163 static void wakeup_pipe_readers(struct pipe_inode_info *pipe) 164 { 165 smp_mb(); 166 if (waitqueue_active(&pipe->rd_wait)) 167 wake_up_interruptible(&pipe->rd_wait); 168 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 169 } 170 171 /** 172 * splice_to_pipe - fill passed data into a pipe 173 * @pipe: pipe to fill 174 * @spd: data to fill 175 * 176 * Description: 177 * @spd contains a map of pages and len/offset tuples, along with 178 * the struct pipe_buf_operations associated with these pages. This 179 * function will link that data to the pipe. 180 * 181 */ 182 ssize_t splice_to_pipe(struct pipe_inode_info *pipe, 183 struct splice_pipe_desc *spd) 184 { 185 unsigned int spd_pages = spd->nr_pages; 186 unsigned int tail = pipe->tail; 187 unsigned int head = pipe->head; 188 unsigned int mask = pipe->ring_size - 1; 189 int ret = 0, page_nr = 0; 190 191 if (!spd_pages) 192 return 0; 193 194 if (unlikely(!pipe->readers)) { 195 send_sig(SIGPIPE, current, 0); 196 ret = -EPIPE; 197 goto out; 198 } 199 200 while (!pipe_full(head, tail, pipe->max_usage)) { 201 struct pipe_buffer *buf = &pipe->bufs[head & mask]; 202 203 buf->page = spd->pages[page_nr]; 204 buf->offset = spd->partial[page_nr].offset; 205 buf->len = spd->partial[page_nr].len; 206 buf->private = spd->partial[page_nr].private; 207 buf->ops = spd->ops; 208 buf->flags = 0; 209 210 head++; 211 pipe->head = head; 212 page_nr++; 213 ret += buf->len; 214 215 if (!--spd->nr_pages) 216 break; 217 } 218 219 if (!ret) 220 ret = -EAGAIN; 221 222 out: 223 while (page_nr < spd_pages) 224 spd->spd_release(spd, page_nr++); 225 226 return ret; 227 } 228 EXPORT_SYMBOL_GPL(splice_to_pipe); 229 230 ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) 231 { 232 unsigned int head = pipe->head; 233 unsigned int tail = pipe->tail; 234 unsigned int mask = pipe->ring_size - 1; 235 int ret; 236 237 if (unlikely(!pipe->readers)) { 238 send_sig(SIGPIPE, current, 0); 239 ret = -EPIPE; 240 } else if (pipe_full(head, tail, pipe->max_usage)) { 241 ret = -EAGAIN; 242 } else { 243 pipe->bufs[head & mask] = *buf; 244 pipe->head = head + 1; 245 return buf->len; 246 } 247 pipe_buf_release(pipe, buf); 248 return ret; 249 } 250 EXPORT_SYMBOL(add_to_pipe); 251 252 /* 253 * Check if we need to grow the arrays holding pages and partial page 254 * descriptions. 255 */ 256 int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) 257 { 258 unsigned int max_usage = READ_ONCE(pipe->max_usage); 259 260 spd->nr_pages_max = max_usage; 261 if (max_usage <= PIPE_DEF_BUFFERS) 262 return 0; 263 264 spd->pages = kmalloc_array(max_usage, sizeof(struct page *), GFP_KERNEL); 265 spd->partial = kmalloc_array(max_usage, sizeof(struct partial_page), 266 GFP_KERNEL); 267 268 if (spd->pages && spd->partial) 269 return 0; 270 271 kfree(spd->pages); 272 kfree(spd->partial); 273 return -ENOMEM; 274 } 275 276 void splice_shrink_spd(struct splice_pipe_desc *spd) 277 { 278 if (spd->nr_pages_max <= PIPE_DEF_BUFFERS) 279 return; 280 281 kfree(spd->pages); 282 kfree(spd->partial); 283 } 284 285 /** 286 * generic_file_splice_read - splice data from file to a pipe 287 * @in: file to splice from 288 * @ppos: position in @in 289 * @pipe: pipe to splice to 290 * @len: number of bytes to splice 291 * @flags: splice modifier flags 292 * 293 * Description: 294 * Will read pages from given file and fill them into a pipe. Can be 295 * used as long as it has more or less sane ->read_iter(). 296 * 297 */ 298 ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, 299 struct pipe_inode_info *pipe, size_t len, 300 unsigned int flags) 301 { 302 struct iov_iter to; 303 struct kiocb kiocb; 304 int ret; 305 306 iov_iter_pipe(&to, READ, pipe, len); 307 init_sync_kiocb(&kiocb, in); 308 kiocb.ki_pos = *ppos; 309 ret = call_read_iter(in, &kiocb, &to); 310 if (ret > 0) { 311 *ppos = kiocb.ki_pos; 312 file_accessed(in); 313 } else if (ret < 0) { 314 /* free what was emitted */ 315 pipe_discard_from(pipe, to.start_head); 316 /* 317 * callers of ->splice_read() expect -EAGAIN on 318 * "can't put anything in there", rather than -EFAULT. 319 */ 320 if (ret == -EFAULT) 321 ret = -EAGAIN; 322 } 323 324 return ret; 325 } 326 EXPORT_SYMBOL(generic_file_splice_read); 327 328 const struct pipe_buf_operations default_pipe_buf_ops = { 329 .release = generic_pipe_buf_release, 330 .try_steal = generic_pipe_buf_try_steal, 331 .get = generic_pipe_buf_get, 332 }; 333 334 /* Pipe buffer operations for a socket and similar. */ 335 const struct pipe_buf_operations nosteal_pipe_buf_ops = { 336 .release = generic_pipe_buf_release, 337 .get = generic_pipe_buf_get, 338 }; 339 EXPORT_SYMBOL(nosteal_pipe_buf_ops); 340 341 /* 342 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 343 * using sendpage(). Return the number of bytes sent. 344 */ 345 static int pipe_to_sendpage(struct pipe_inode_info *pipe, 346 struct pipe_buffer *buf, struct splice_desc *sd) 347 { 348 struct file *file = sd->u.file; 349 loff_t pos = sd->pos; 350 int more; 351 352 if (!likely(file->f_op->sendpage)) 353 return -EINVAL; 354 355 more = (sd->flags & SPLICE_F_MORE) ? MSG_MORE : 0; 356 357 if (sd->len < sd->total_len && 358 pipe_occupancy(pipe->head, pipe->tail) > 1) 359 more |= MSG_SENDPAGE_NOTLAST; 360 361 return file->f_op->sendpage(file, buf->page, buf->offset, 362 sd->len, &pos, more); 363 } 364 365 static void wakeup_pipe_writers(struct pipe_inode_info *pipe) 366 { 367 smp_mb(); 368 if (waitqueue_active(&pipe->wr_wait)) 369 wake_up_interruptible(&pipe->wr_wait); 370 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 371 } 372 373 /** 374 * splice_from_pipe_feed - feed available data from a pipe to a file 375 * @pipe: pipe to splice from 376 * @sd: information to @actor 377 * @actor: handler that splices the data 378 * 379 * Description: 380 * This function loops over the pipe and calls @actor to do the 381 * actual moving of a single struct pipe_buffer to the desired 382 * destination. It returns when there's no more buffers left in 383 * the pipe or if the requested number of bytes (@sd->total_len) 384 * have been copied. It returns a positive number (one) if the 385 * pipe needs to be filled with more data, zero if the required 386 * number of bytes have been copied and -errno on error. 387 * 388 * This, together with splice_from_pipe_{begin,end,next}, may be 389 * used to implement the functionality of __splice_from_pipe() when 390 * locking is required around copying the pipe buffers to the 391 * destination. 392 */ 393 static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_desc *sd, 394 splice_actor *actor) 395 { 396 unsigned int head = pipe->head; 397 unsigned int tail = pipe->tail; 398 unsigned int mask = pipe->ring_size - 1; 399 int ret; 400 401 while (!pipe_empty(head, tail)) { 402 struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 403 404 sd->len = buf->len; 405 if (sd->len > sd->total_len) 406 sd->len = sd->total_len; 407 408 ret = pipe_buf_confirm(pipe, buf); 409 if (unlikely(ret)) { 410 if (ret == -ENODATA) 411 ret = 0; 412 return ret; 413 } 414 415 ret = actor(pipe, buf, sd); 416 if (ret <= 0) 417 return ret; 418 419 buf->offset += ret; 420 buf->len -= ret; 421 422 sd->num_spliced += ret; 423 sd->len -= ret; 424 sd->pos += ret; 425 sd->total_len -= ret; 426 427 if (!buf->len) { 428 pipe_buf_release(pipe, buf); 429 tail++; 430 pipe->tail = tail; 431 if (pipe->files) 432 sd->need_wakeup = true; 433 } 434 435 if (!sd->total_len) 436 return 0; 437 } 438 439 return 1; 440 } 441 442 /* We know we have a pipe buffer, but maybe it's empty? */ 443 static inline bool eat_empty_buffer(struct pipe_inode_info *pipe) 444 { 445 unsigned int tail = pipe->tail; 446 unsigned int mask = pipe->ring_size - 1; 447 struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 448 449 if (unlikely(!buf->len)) { 450 pipe_buf_release(pipe, buf); 451 pipe->tail = tail+1; 452 return true; 453 } 454 455 return false; 456 } 457 458 /** 459 * splice_from_pipe_next - wait for some data to splice from 460 * @pipe: pipe to splice from 461 * @sd: information about the splice operation 462 * 463 * Description: 464 * This function will wait for some data and return a positive 465 * value (one) if pipe buffers are available. It will return zero 466 * or -errno if no more data needs to be spliced. 467 */ 468 static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) 469 { 470 /* 471 * Check for signal early to make process killable when there are 472 * always buffers available 473 */ 474 if (signal_pending(current)) 475 return -ERESTARTSYS; 476 477 repeat: 478 while (pipe_empty(pipe->head, pipe->tail)) { 479 if (!pipe->writers) 480 return 0; 481 482 if (sd->num_spliced) 483 return 0; 484 485 if (sd->flags & SPLICE_F_NONBLOCK) 486 return -EAGAIN; 487 488 if (signal_pending(current)) 489 return -ERESTARTSYS; 490 491 if (sd->need_wakeup) { 492 wakeup_pipe_writers(pipe); 493 sd->need_wakeup = false; 494 } 495 496 pipe_wait_readable(pipe); 497 } 498 499 if (eat_empty_buffer(pipe)) 500 goto repeat; 501 502 return 1; 503 } 504 505 /** 506 * splice_from_pipe_begin - start splicing from pipe 507 * @sd: information about the splice operation 508 * 509 * Description: 510 * This function should be called before a loop containing 511 * splice_from_pipe_next() and splice_from_pipe_feed() to 512 * initialize the necessary fields of @sd. 513 */ 514 static void splice_from_pipe_begin(struct splice_desc *sd) 515 { 516 sd->num_spliced = 0; 517 sd->need_wakeup = false; 518 } 519 520 /** 521 * splice_from_pipe_end - finish splicing from pipe 522 * @pipe: pipe to splice from 523 * @sd: information about the splice operation 524 * 525 * Description: 526 * This function will wake up pipe writers if necessary. It should 527 * be called after a loop containing splice_from_pipe_next() and 528 * splice_from_pipe_feed(). 529 */ 530 static void splice_from_pipe_end(struct pipe_inode_info *pipe, struct splice_desc *sd) 531 { 532 if (sd->need_wakeup) 533 wakeup_pipe_writers(pipe); 534 } 535 536 /** 537 * __splice_from_pipe - splice data from a pipe to given actor 538 * @pipe: pipe to splice from 539 * @sd: information to @actor 540 * @actor: handler that splices the data 541 * 542 * Description: 543 * This function does little more than loop over the pipe and call 544 * @actor to do the actual moving of a single struct pipe_buffer to 545 * the desired destination. See pipe_to_file, pipe_to_sendpage, or 546 * pipe_to_user. 547 * 548 */ 549 ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, 550 splice_actor *actor) 551 { 552 int ret; 553 554 splice_from_pipe_begin(sd); 555 do { 556 cond_resched(); 557 ret = splice_from_pipe_next(pipe, sd); 558 if (ret > 0) 559 ret = splice_from_pipe_feed(pipe, sd, actor); 560 } while (ret > 0); 561 splice_from_pipe_end(pipe, sd); 562 563 return sd->num_spliced ? sd->num_spliced : ret; 564 } 565 EXPORT_SYMBOL(__splice_from_pipe); 566 567 /** 568 * splice_from_pipe - splice data from a pipe to a file 569 * @pipe: pipe to splice from 570 * @out: file to splice to 571 * @ppos: position in @out 572 * @len: how many bytes to splice 573 * @flags: splice modifier flags 574 * @actor: handler that splices the data 575 * 576 * Description: 577 * See __splice_from_pipe. This function locks the pipe inode, 578 * otherwise it's identical to __splice_from_pipe(). 579 * 580 */ 581 ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, 582 loff_t *ppos, size_t len, unsigned int flags, 583 splice_actor *actor) 584 { 585 ssize_t ret; 586 struct splice_desc sd = { 587 .total_len = len, 588 .flags = flags, 589 .pos = *ppos, 590 .u.file = out, 591 }; 592 593 pipe_lock(pipe); 594 ret = __splice_from_pipe(pipe, &sd, actor); 595 pipe_unlock(pipe); 596 597 return ret; 598 } 599 600 /** 601 * iter_file_splice_write - splice data from a pipe to a file 602 * @pipe: pipe info 603 * @out: file to write to 604 * @ppos: position in @out 605 * @len: number of bytes to splice 606 * @flags: splice modifier flags 607 * 608 * Description: 609 * Will either move or copy pages (determined by @flags options) from 610 * the given pipe inode to the given file. 611 * This one is ->write_iter-based. 612 * 613 */ 614 ssize_t 615 iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, 616 loff_t *ppos, size_t len, unsigned int flags) 617 { 618 struct splice_desc sd = { 619 .total_len = len, 620 .flags = flags, 621 .pos = *ppos, 622 .u.file = out, 623 }; 624 int nbufs = pipe->max_usage; 625 struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec), 626 GFP_KERNEL); 627 ssize_t ret; 628 629 if (unlikely(!array)) 630 return -ENOMEM; 631 632 pipe_lock(pipe); 633 634 splice_from_pipe_begin(&sd); 635 while (sd.total_len) { 636 struct iov_iter from; 637 unsigned int head, tail, mask; 638 size_t left; 639 int n; 640 641 ret = splice_from_pipe_next(pipe, &sd); 642 if (ret <= 0) 643 break; 644 645 if (unlikely(nbufs < pipe->max_usage)) { 646 kfree(array); 647 nbufs = pipe->max_usage; 648 array = kcalloc(nbufs, sizeof(struct bio_vec), 649 GFP_KERNEL); 650 if (!array) { 651 ret = -ENOMEM; 652 break; 653 } 654 } 655 656 head = pipe->head; 657 tail = pipe->tail; 658 mask = pipe->ring_size - 1; 659 660 /* build the vector */ 661 left = sd.total_len; 662 for (n = 0; !pipe_empty(head, tail) && left && n < nbufs; tail++) { 663 struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 664 size_t this_len = buf->len; 665 666 /* zero-length bvecs are not supported, skip them */ 667 if (!this_len) 668 continue; 669 this_len = min(this_len, left); 670 671 ret = pipe_buf_confirm(pipe, buf); 672 if (unlikely(ret)) { 673 if (ret == -ENODATA) 674 ret = 0; 675 goto done; 676 } 677 678 array[n].bv_page = buf->page; 679 array[n].bv_len = this_len; 680 array[n].bv_offset = buf->offset; 681 left -= this_len; 682 n++; 683 } 684 685 iov_iter_bvec(&from, WRITE, array, n, sd.total_len - left); 686 ret = vfs_iter_write(out, &from, &sd.pos, 0); 687 if (ret <= 0) 688 break; 689 690 sd.num_spliced += ret; 691 sd.total_len -= ret; 692 *ppos = sd.pos; 693 694 /* dismiss the fully eaten buffers, adjust the partial one */ 695 tail = pipe->tail; 696 while (ret) { 697 struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 698 if (ret >= buf->len) { 699 ret -= buf->len; 700 buf->len = 0; 701 pipe_buf_release(pipe, buf); 702 tail++; 703 pipe->tail = tail; 704 if (pipe->files) 705 sd.need_wakeup = true; 706 } else { 707 buf->offset += ret; 708 buf->len -= ret; 709 ret = 0; 710 } 711 } 712 } 713 done: 714 kfree(array); 715 splice_from_pipe_end(pipe, &sd); 716 717 pipe_unlock(pipe); 718 719 if (sd.num_spliced) 720 ret = sd.num_spliced; 721 722 return ret; 723 } 724 725 EXPORT_SYMBOL(iter_file_splice_write); 726 727 /** 728 * generic_splice_sendpage - splice data from a pipe to a socket 729 * @pipe: pipe to splice from 730 * @out: socket to write to 731 * @ppos: position in @out 732 * @len: number of bytes to splice 733 * @flags: splice modifier flags 734 * 735 * Description: 736 * Will send @len bytes from the pipe to a network socket. No data copying 737 * is involved. 738 * 739 */ 740 ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 741 loff_t *ppos, size_t len, unsigned int flags) 742 { 743 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); 744 } 745 746 EXPORT_SYMBOL(generic_splice_sendpage); 747 748 static int warn_unsupported(struct file *file, const char *op) 749 { 750 pr_debug_ratelimited( 751 "splice %s not supported for file %pD4 (pid: %d comm: %.20s)\n", 752 op, file, current->pid, current->comm); 753 return -EINVAL; 754 } 755 756 /* 757 * Attempt to initiate a splice from pipe to file. 758 */ 759 static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, 760 loff_t *ppos, size_t len, unsigned int flags) 761 { 762 if (unlikely(!out->f_op->splice_write)) 763 return warn_unsupported(out, "write"); 764 return out->f_op->splice_write(pipe, out, ppos, len, flags); 765 } 766 767 /* 768 * Attempt to initiate a splice from a file to a pipe. 769 */ 770 static long do_splice_to(struct file *in, loff_t *ppos, 771 struct pipe_inode_info *pipe, size_t len, 772 unsigned int flags) 773 { 774 unsigned int p_space; 775 int ret; 776 777 if (unlikely(!(in->f_mode & FMODE_READ))) 778 return -EBADF; 779 780 /* Don't try to read more the pipe has space for. */ 781 p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail); 782 len = min_t(size_t, len, p_space << PAGE_SHIFT); 783 784 ret = rw_verify_area(READ, in, ppos, len); 785 if (unlikely(ret < 0)) 786 return ret; 787 788 if (unlikely(len > MAX_RW_COUNT)) 789 len = MAX_RW_COUNT; 790 791 if (unlikely(!in->f_op->splice_read)) 792 return warn_unsupported(in, "read"); 793 return in->f_op->splice_read(in, ppos, pipe, len, flags); 794 } 795 796 /** 797 * splice_direct_to_actor - splices data directly between two non-pipes 798 * @in: file to splice from 799 * @sd: actor information on where to splice to 800 * @actor: handles the data splicing 801 * 802 * Description: 803 * This is a special case helper to splice directly between two 804 * points, without requiring an explicit pipe. Internally an allocated 805 * pipe is cached in the process, and reused during the lifetime of 806 * that process. 807 * 808 */ 809 ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, 810 splice_direct_actor *actor) 811 { 812 struct pipe_inode_info *pipe; 813 long ret, bytes; 814 size_t len; 815 int i, flags, more; 816 817 /* 818 * We require the input to be seekable, as we don't want to randomly 819 * drop data for eg socket -> socket splicing. Use the piped splicing 820 * for that! 821 */ 822 if (unlikely(!(in->f_mode & FMODE_LSEEK))) 823 return -EINVAL; 824 825 /* 826 * neither in nor out is a pipe, setup an internal pipe attached to 827 * 'out' and transfer the wanted data from 'in' to 'out' through that 828 */ 829 pipe = current->splice_pipe; 830 if (unlikely(!pipe)) { 831 pipe = alloc_pipe_info(); 832 if (!pipe) 833 return -ENOMEM; 834 835 /* 836 * We don't have an immediate reader, but we'll read the stuff 837 * out of the pipe right after the splice_to_pipe(). So set 838 * PIPE_READERS appropriately. 839 */ 840 pipe->readers = 1; 841 842 current->splice_pipe = pipe; 843 } 844 845 /* 846 * Do the splice. 847 */ 848 ret = 0; 849 bytes = 0; 850 len = sd->total_len; 851 flags = sd->flags; 852 853 /* 854 * Don't block on output, we have to drain the direct pipe. 855 */ 856 sd->flags &= ~SPLICE_F_NONBLOCK; 857 more = sd->flags & SPLICE_F_MORE; 858 859 WARN_ON_ONCE(!pipe_empty(pipe->head, pipe->tail)); 860 861 while (len) { 862 size_t read_len; 863 loff_t pos = sd->pos, prev_pos = pos; 864 865 ret = do_splice_to(in, &pos, pipe, len, flags); 866 if (unlikely(ret <= 0)) 867 goto out_release; 868 869 read_len = ret; 870 sd->total_len = read_len; 871 872 /* 873 * If more data is pending, set SPLICE_F_MORE 874 * If this is the last data and SPLICE_F_MORE was not set 875 * initially, clears it. 876 */ 877 if (read_len < len) 878 sd->flags |= SPLICE_F_MORE; 879 else if (!more) 880 sd->flags &= ~SPLICE_F_MORE; 881 /* 882 * NOTE: nonblocking mode only applies to the input. We 883 * must not do the output in nonblocking mode as then we 884 * could get stuck data in the internal pipe: 885 */ 886 ret = actor(pipe, sd); 887 if (unlikely(ret <= 0)) { 888 sd->pos = prev_pos; 889 goto out_release; 890 } 891 892 bytes += ret; 893 len -= ret; 894 sd->pos = pos; 895 896 if (ret < read_len) { 897 sd->pos = prev_pos + ret; 898 goto out_release; 899 } 900 } 901 902 done: 903 pipe->tail = pipe->head = 0; 904 file_accessed(in); 905 return bytes; 906 907 out_release: 908 /* 909 * If we did an incomplete transfer we must release 910 * the pipe buffers in question: 911 */ 912 for (i = 0; i < pipe->ring_size; i++) { 913 struct pipe_buffer *buf = &pipe->bufs[i]; 914 915 if (buf->ops) 916 pipe_buf_release(pipe, buf); 917 } 918 919 if (!bytes) 920 bytes = ret; 921 922 goto done; 923 } 924 EXPORT_SYMBOL(splice_direct_to_actor); 925 926 static int direct_splice_actor(struct pipe_inode_info *pipe, 927 struct splice_desc *sd) 928 { 929 struct file *file = sd->u.file; 930 931 return do_splice_from(pipe, file, sd->opos, sd->total_len, 932 sd->flags); 933 } 934 935 /** 936 * do_splice_direct - splices data directly between two files 937 * @in: file to splice from 938 * @ppos: input file offset 939 * @out: file to splice to 940 * @opos: output file offset 941 * @len: number of bytes to splice 942 * @flags: splice modifier flags 943 * 944 * Description: 945 * For use by do_sendfile(). splice can easily emulate sendfile, but 946 * doing it in the application would incur an extra system call 947 * (splice in + splice out, as compared to just sendfile()). So this helper 948 * can splice directly through a process-private pipe. 949 * 950 */ 951 long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 952 loff_t *opos, size_t len, unsigned int flags) 953 { 954 struct splice_desc sd = { 955 .len = len, 956 .total_len = len, 957 .flags = flags, 958 .pos = *ppos, 959 .u.file = out, 960 .opos = opos, 961 }; 962 long ret; 963 964 if (unlikely(!(out->f_mode & FMODE_WRITE))) 965 return -EBADF; 966 967 if (unlikely(out->f_flags & O_APPEND)) 968 return -EINVAL; 969 970 ret = rw_verify_area(WRITE, out, opos, len); 971 if (unlikely(ret < 0)) 972 return ret; 973 974 ret = splice_direct_to_actor(in, &sd, direct_splice_actor); 975 if (ret > 0) 976 *ppos = sd.pos; 977 978 return ret; 979 } 980 EXPORT_SYMBOL(do_splice_direct); 981 982 static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) 983 { 984 for (;;) { 985 if (unlikely(!pipe->readers)) { 986 send_sig(SIGPIPE, current, 0); 987 return -EPIPE; 988 } 989 if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) 990 return 0; 991 if (flags & SPLICE_F_NONBLOCK) 992 return -EAGAIN; 993 if (signal_pending(current)) 994 return -ERESTARTSYS; 995 pipe_wait_writable(pipe); 996 } 997 } 998 999 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 1000 struct pipe_inode_info *opipe, 1001 size_t len, unsigned int flags); 1002 1003 long splice_file_to_pipe(struct file *in, 1004 struct pipe_inode_info *opipe, 1005 loff_t *offset, 1006 size_t len, unsigned int flags) 1007 { 1008 long ret; 1009 1010 pipe_lock(opipe); 1011 ret = wait_for_space(opipe, flags); 1012 if (!ret) 1013 ret = do_splice_to(in, offset, opipe, len, flags); 1014 pipe_unlock(opipe); 1015 if (ret > 0) 1016 wakeup_pipe_readers(opipe); 1017 return ret; 1018 } 1019 1020 /* 1021 * Determine where to splice to/from. 1022 */ 1023 long do_splice(struct file *in, loff_t *off_in, struct file *out, 1024 loff_t *off_out, size_t len, unsigned int flags) 1025 { 1026 struct pipe_inode_info *ipipe; 1027 struct pipe_inode_info *opipe; 1028 loff_t offset; 1029 long ret; 1030 1031 if (unlikely(!(in->f_mode & FMODE_READ) || 1032 !(out->f_mode & FMODE_WRITE))) 1033 return -EBADF; 1034 1035 ipipe = get_pipe_info(in, true); 1036 opipe = get_pipe_info(out, true); 1037 1038 if (ipipe && opipe) { 1039 if (off_in || off_out) 1040 return -ESPIPE; 1041 1042 /* Splicing to self would be fun, but... */ 1043 if (ipipe == opipe) 1044 return -EINVAL; 1045 1046 if ((in->f_flags | out->f_flags) & O_NONBLOCK) 1047 flags |= SPLICE_F_NONBLOCK; 1048 1049 return splice_pipe_to_pipe(ipipe, opipe, len, flags); 1050 } 1051 1052 if (ipipe) { 1053 if (off_in) 1054 return -ESPIPE; 1055 if (off_out) { 1056 if (!(out->f_mode & FMODE_PWRITE)) 1057 return -EINVAL; 1058 offset = *off_out; 1059 } else { 1060 offset = out->f_pos; 1061 } 1062 1063 if (unlikely(out->f_flags & O_APPEND)) 1064 return -EINVAL; 1065 1066 ret = rw_verify_area(WRITE, out, &offset, len); 1067 if (unlikely(ret < 0)) 1068 return ret; 1069 1070 if (in->f_flags & O_NONBLOCK) 1071 flags |= SPLICE_F_NONBLOCK; 1072 1073 file_start_write(out); 1074 ret = do_splice_from(ipipe, out, &offset, len, flags); 1075 file_end_write(out); 1076 1077 if (!off_out) 1078 out->f_pos = offset; 1079 else 1080 *off_out = offset; 1081 1082 return ret; 1083 } 1084 1085 if (opipe) { 1086 if (off_out) 1087 return -ESPIPE; 1088 if (off_in) { 1089 if (!(in->f_mode & FMODE_PREAD)) 1090 return -EINVAL; 1091 offset = *off_in; 1092 } else { 1093 offset = in->f_pos; 1094 } 1095 1096 if (out->f_flags & O_NONBLOCK) 1097 flags |= SPLICE_F_NONBLOCK; 1098 1099 ret = splice_file_to_pipe(in, opipe, &offset, len, flags); 1100 if (!off_in) 1101 in->f_pos = offset; 1102 else 1103 *off_in = offset; 1104 1105 return ret; 1106 } 1107 1108 return -EINVAL; 1109 } 1110 1111 static long __do_splice(struct file *in, loff_t __user *off_in, 1112 struct file *out, loff_t __user *off_out, 1113 size_t len, unsigned int flags) 1114 { 1115 struct pipe_inode_info *ipipe; 1116 struct pipe_inode_info *opipe; 1117 loff_t offset, *__off_in = NULL, *__off_out = NULL; 1118 long ret; 1119 1120 ipipe = get_pipe_info(in, true); 1121 opipe = get_pipe_info(out, true); 1122 1123 if (ipipe && off_in) 1124 return -ESPIPE; 1125 if (opipe && off_out) 1126 return -ESPIPE; 1127 1128 if (off_out) { 1129 if (copy_from_user(&offset, off_out, sizeof(loff_t))) 1130 return -EFAULT; 1131 __off_out = &offset; 1132 } 1133 if (off_in) { 1134 if (copy_from_user(&offset, off_in, sizeof(loff_t))) 1135 return -EFAULT; 1136 __off_in = &offset; 1137 } 1138 1139 ret = do_splice(in, __off_in, out, __off_out, len, flags); 1140 if (ret < 0) 1141 return ret; 1142 1143 if (__off_out && copy_to_user(off_out, __off_out, sizeof(loff_t))) 1144 return -EFAULT; 1145 if (__off_in && copy_to_user(off_in, __off_in, sizeof(loff_t))) 1146 return -EFAULT; 1147 1148 return ret; 1149 } 1150 1151 static int iter_to_pipe(struct iov_iter *from, 1152 struct pipe_inode_info *pipe, 1153 unsigned flags) 1154 { 1155 struct pipe_buffer buf = { 1156 .ops = &user_page_pipe_buf_ops, 1157 .flags = flags 1158 }; 1159 size_t total = 0; 1160 int ret = 0; 1161 1162 while (iov_iter_count(from)) { 1163 struct page *pages[16]; 1164 ssize_t left; 1165 size_t start; 1166 int i, n; 1167 1168 left = iov_iter_get_pages2(from, pages, ~0UL, 16, &start); 1169 if (left <= 0) { 1170 ret = left; 1171 break; 1172 } 1173 1174 n = DIV_ROUND_UP(left + start, PAGE_SIZE); 1175 for (i = 0; i < n; i++) { 1176 int size = min_t(int, left, PAGE_SIZE - start); 1177 1178 buf.page = pages[i]; 1179 buf.offset = start; 1180 buf.len = size; 1181 ret = add_to_pipe(pipe, &buf); 1182 if (unlikely(ret < 0)) { 1183 iov_iter_revert(from, left); 1184 // this one got dropped by add_to_pipe() 1185 while (++i < n) 1186 put_page(pages[i]); 1187 goto out; 1188 } 1189 total += ret; 1190 left -= size; 1191 start = 0; 1192 } 1193 } 1194 out: 1195 return total ? total : ret; 1196 } 1197 1198 static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 1199 struct splice_desc *sd) 1200 { 1201 int n = copy_page_to_iter(buf->page, buf->offset, sd->len, sd->u.data); 1202 return n == sd->len ? n : -EFAULT; 1203 } 1204 1205 /* 1206 * For lack of a better implementation, implement vmsplice() to userspace 1207 * as a simple copy of the pipes pages to the user iov. 1208 */ 1209 static long vmsplice_to_user(struct file *file, struct iov_iter *iter, 1210 unsigned int flags) 1211 { 1212 struct pipe_inode_info *pipe = get_pipe_info(file, true); 1213 struct splice_desc sd = { 1214 .total_len = iov_iter_count(iter), 1215 .flags = flags, 1216 .u.data = iter 1217 }; 1218 long ret = 0; 1219 1220 if (!pipe) 1221 return -EBADF; 1222 1223 if (sd.total_len) { 1224 pipe_lock(pipe); 1225 ret = __splice_from_pipe(pipe, &sd, pipe_to_user); 1226 pipe_unlock(pipe); 1227 } 1228 1229 return ret; 1230 } 1231 1232 /* 1233 * vmsplice splices a user address range into a pipe. It can be thought of 1234 * as splice-from-memory, where the regular splice is splice-from-file (or 1235 * to file). In both cases the output is a pipe, naturally. 1236 */ 1237 static long vmsplice_to_pipe(struct file *file, struct iov_iter *iter, 1238 unsigned int flags) 1239 { 1240 struct pipe_inode_info *pipe; 1241 long ret = 0; 1242 unsigned buf_flag = 0; 1243 1244 if (flags & SPLICE_F_GIFT) 1245 buf_flag = PIPE_BUF_FLAG_GIFT; 1246 1247 pipe = get_pipe_info(file, true); 1248 if (!pipe) 1249 return -EBADF; 1250 1251 pipe_lock(pipe); 1252 ret = wait_for_space(pipe, flags); 1253 if (!ret) 1254 ret = iter_to_pipe(iter, pipe, buf_flag); 1255 pipe_unlock(pipe); 1256 if (ret > 0) 1257 wakeup_pipe_readers(pipe); 1258 return ret; 1259 } 1260 1261 static int vmsplice_type(struct fd f, int *type) 1262 { 1263 if (!f.file) 1264 return -EBADF; 1265 if (f.file->f_mode & FMODE_WRITE) { 1266 *type = WRITE; 1267 } else if (f.file->f_mode & FMODE_READ) { 1268 *type = READ; 1269 } else { 1270 fdput(f); 1271 return -EBADF; 1272 } 1273 return 0; 1274 } 1275 1276 /* 1277 * Note that vmsplice only really supports true splicing _from_ user memory 1278 * to a pipe, not the other way around. Splicing from user memory is a simple 1279 * operation that can be supported without any funky alignment restrictions 1280 * or nasty vm tricks. We simply map in the user memory and fill them into 1281 * a pipe. The reverse isn't quite as easy, though. There are two possible 1282 * solutions for that: 1283 * 1284 * - memcpy() the data internally, at which point we might as well just 1285 * do a regular read() on the buffer anyway. 1286 * - Lots of nasty vm tricks, that are neither fast nor flexible (it 1287 * has restriction limitations on both ends of the pipe). 1288 * 1289 * Currently we punt and implement it as a normal copy, see pipe_to_user(). 1290 * 1291 */ 1292 SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, uiov, 1293 unsigned long, nr_segs, unsigned int, flags) 1294 { 1295 struct iovec iovstack[UIO_FASTIOV]; 1296 struct iovec *iov = iovstack; 1297 struct iov_iter iter; 1298 ssize_t error; 1299 struct fd f; 1300 int type; 1301 1302 if (unlikely(flags & ~SPLICE_F_ALL)) 1303 return -EINVAL; 1304 1305 f = fdget(fd); 1306 error = vmsplice_type(f, &type); 1307 if (error) 1308 return error; 1309 1310 error = import_iovec(type, uiov, nr_segs, 1311 ARRAY_SIZE(iovstack), &iov, &iter); 1312 if (error < 0) 1313 goto out_fdput; 1314 1315 if (!iov_iter_count(&iter)) 1316 error = 0; 1317 else if (iov_iter_rw(&iter) == WRITE) 1318 error = vmsplice_to_pipe(f.file, &iter, flags); 1319 else 1320 error = vmsplice_to_user(f.file, &iter, flags); 1321 1322 kfree(iov); 1323 out_fdput: 1324 fdput(f); 1325 return error; 1326 } 1327 1328 SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, 1329 int, fd_out, loff_t __user *, off_out, 1330 size_t, len, unsigned int, flags) 1331 { 1332 struct fd in, out; 1333 long error; 1334 1335 if (unlikely(!len)) 1336 return 0; 1337 1338 if (unlikely(flags & ~SPLICE_F_ALL)) 1339 return -EINVAL; 1340 1341 error = -EBADF; 1342 in = fdget(fd_in); 1343 if (in.file) { 1344 out = fdget(fd_out); 1345 if (out.file) { 1346 error = __do_splice(in.file, off_in, out.file, off_out, 1347 len, flags); 1348 fdput(out); 1349 } 1350 fdput(in); 1351 } 1352 return error; 1353 } 1354 1355 /* 1356 * Make sure there's data to read. Wait for input if we can, otherwise 1357 * return an appropriate error. 1358 */ 1359 static int ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) 1360 { 1361 int ret; 1362 1363 /* 1364 * Check the pipe occupancy without the inode lock first. This function 1365 * is speculative anyways, so missing one is ok. 1366 */ 1367 if (!pipe_empty(pipe->head, pipe->tail)) 1368 return 0; 1369 1370 ret = 0; 1371 pipe_lock(pipe); 1372 1373 while (pipe_empty(pipe->head, pipe->tail)) { 1374 if (signal_pending(current)) { 1375 ret = -ERESTARTSYS; 1376 break; 1377 } 1378 if (!pipe->writers) 1379 break; 1380 if (flags & SPLICE_F_NONBLOCK) { 1381 ret = -EAGAIN; 1382 break; 1383 } 1384 pipe_wait_readable(pipe); 1385 } 1386 1387 pipe_unlock(pipe); 1388 return ret; 1389 } 1390 1391 /* 1392 * Make sure there's writeable room. Wait for room if we can, otherwise 1393 * return an appropriate error. 1394 */ 1395 static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) 1396 { 1397 int ret; 1398 1399 /* 1400 * Check pipe occupancy without the inode lock first. This function 1401 * is speculative anyways, so missing one is ok. 1402 */ 1403 if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) 1404 return 0; 1405 1406 ret = 0; 1407 pipe_lock(pipe); 1408 1409 while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { 1410 if (!pipe->readers) { 1411 send_sig(SIGPIPE, current, 0); 1412 ret = -EPIPE; 1413 break; 1414 } 1415 if (flags & SPLICE_F_NONBLOCK) { 1416 ret = -EAGAIN; 1417 break; 1418 } 1419 if (signal_pending(current)) { 1420 ret = -ERESTARTSYS; 1421 break; 1422 } 1423 pipe_wait_writable(pipe); 1424 } 1425 1426 pipe_unlock(pipe); 1427 return ret; 1428 } 1429 1430 /* 1431 * Splice contents of ipipe to opipe. 1432 */ 1433 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe, 1434 struct pipe_inode_info *opipe, 1435 size_t len, unsigned int flags) 1436 { 1437 struct pipe_buffer *ibuf, *obuf; 1438 unsigned int i_head, o_head; 1439 unsigned int i_tail, o_tail; 1440 unsigned int i_mask, o_mask; 1441 int ret = 0; 1442 bool input_wakeup = false; 1443 1444 1445 retry: 1446 ret = ipipe_prep(ipipe, flags); 1447 if (ret) 1448 return ret; 1449 1450 ret = opipe_prep(opipe, flags); 1451 if (ret) 1452 return ret; 1453 1454 /* 1455 * Potential ABBA deadlock, work around it by ordering lock 1456 * grabbing by pipe info address. Otherwise two different processes 1457 * could deadlock (one doing tee from A -> B, the other from B -> A). 1458 */ 1459 pipe_double_lock(ipipe, opipe); 1460 1461 i_tail = ipipe->tail; 1462 i_mask = ipipe->ring_size - 1; 1463 o_head = opipe->head; 1464 o_mask = opipe->ring_size - 1; 1465 1466 do { 1467 size_t o_len; 1468 1469 if (!opipe->readers) { 1470 send_sig(SIGPIPE, current, 0); 1471 if (!ret) 1472 ret = -EPIPE; 1473 break; 1474 } 1475 1476 i_head = ipipe->head; 1477 o_tail = opipe->tail; 1478 1479 if (pipe_empty(i_head, i_tail) && !ipipe->writers) 1480 break; 1481 1482 /* 1483 * Cannot make any progress, because either the input 1484 * pipe is empty or the output pipe is full. 1485 */ 1486 if (pipe_empty(i_head, i_tail) || 1487 pipe_full(o_head, o_tail, opipe->max_usage)) { 1488 /* Already processed some buffers, break */ 1489 if (ret) 1490 break; 1491 1492 if (flags & SPLICE_F_NONBLOCK) { 1493 ret = -EAGAIN; 1494 break; 1495 } 1496 1497 /* 1498 * We raced with another reader/writer and haven't 1499 * managed to process any buffers. A zero return 1500 * value means EOF, so retry instead. 1501 */ 1502 pipe_unlock(ipipe); 1503 pipe_unlock(opipe); 1504 goto retry; 1505 } 1506 1507 ibuf = &ipipe->bufs[i_tail & i_mask]; 1508 obuf = &opipe->bufs[o_head & o_mask]; 1509 1510 if (len >= ibuf->len) { 1511 /* 1512 * Simply move the whole buffer from ipipe to opipe 1513 */ 1514 *obuf = *ibuf; 1515 ibuf->ops = NULL; 1516 i_tail++; 1517 ipipe->tail = i_tail; 1518 input_wakeup = true; 1519 o_len = obuf->len; 1520 o_head++; 1521 opipe->head = o_head; 1522 } else { 1523 /* 1524 * Get a reference to this pipe buffer, 1525 * so we can copy the contents over. 1526 */ 1527 if (!pipe_buf_get(ipipe, ibuf)) { 1528 if (ret == 0) 1529 ret = -EFAULT; 1530 break; 1531 } 1532 *obuf = *ibuf; 1533 1534 /* 1535 * Don't inherit the gift and merge flags, we need to 1536 * prevent multiple steals of this page. 1537 */ 1538 obuf->flags &= ~PIPE_BUF_FLAG_GIFT; 1539 obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE; 1540 1541 obuf->len = len; 1542 ibuf->offset += len; 1543 ibuf->len -= len; 1544 o_len = len; 1545 o_head++; 1546 opipe->head = o_head; 1547 } 1548 ret += o_len; 1549 len -= o_len; 1550 } while (len); 1551 1552 pipe_unlock(ipipe); 1553 pipe_unlock(opipe); 1554 1555 /* 1556 * If we put data in the output pipe, wakeup any potential readers. 1557 */ 1558 if (ret > 0) 1559 wakeup_pipe_readers(opipe); 1560 1561 if (input_wakeup) 1562 wakeup_pipe_writers(ipipe); 1563 1564 return ret; 1565 } 1566 1567 /* 1568 * Link contents of ipipe to opipe. 1569 */ 1570 static int link_pipe(struct pipe_inode_info *ipipe, 1571 struct pipe_inode_info *opipe, 1572 size_t len, unsigned int flags) 1573 { 1574 struct pipe_buffer *ibuf, *obuf; 1575 unsigned int i_head, o_head; 1576 unsigned int i_tail, o_tail; 1577 unsigned int i_mask, o_mask; 1578 int ret = 0; 1579 1580 /* 1581 * Potential ABBA deadlock, work around it by ordering lock 1582 * grabbing by pipe info address. Otherwise two different processes 1583 * could deadlock (one doing tee from A -> B, the other from B -> A). 1584 */ 1585 pipe_double_lock(ipipe, opipe); 1586 1587 i_tail = ipipe->tail; 1588 i_mask = ipipe->ring_size - 1; 1589 o_head = opipe->head; 1590 o_mask = opipe->ring_size - 1; 1591 1592 do { 1593 if (!opipe->readers) { 1594 send_sig(SIGPIPE, current, 0); 1595 if (!ret) 1596 ret = -EPIPE; 1597 break; 1598 } 1599 1600 i_head = ipipe->head; 1601 o_tail = opipe->tail; 1602 1603 /* 1604 * If we have iterated all input buffers or run out of 1605 * output room, break. 1606 */ 1607 if (pipe_empty(i_head, i_tail) || 1608 pipe_full(o_head, o_tail, opipe->max_usage)) 1609 break; 1610 1611 ibuf = &ipipe->bufs[i_tail & i_mask]; 1612 obuf = &opipe->bufs[o_head & o_mask]; 1613 1614 /* 1615 * Get a reference to this pipe buffer, 1616 * so we can copy the contents over. 1617 */ 1618 if (!pipe_buf_get(ipipe, ibuf)) { 1619 if (ret == 0) 1620 ret = -EFAULT; 1621 break; 1622 } 1623 1624 *obuf = *ibuf; 1625 1626 /* 1627 * Don't inherit the gift and merge flag, we need to prevent 1628 * multiple steals of this page. 1629 */ 1630 obuf->flags &= ~PIPE_BUF_FLAG_GIFT; 1631 obuf->flags &= ~PIPE_BUF_FLAG_CAN_MERGE; 1632 1633 if (obuf->len > len) 1634 obuf->len = len; 1635 ret += obuf->len; 1636 len -= obuf->len; 1637 1638 o_head++; 1639 opipe->head = o_head; 1640 i_tail++; 1641 } while (len); 1642 1643 pipe_unlock(ipipe); 1644 pipe_unlock(opipe); 1645 1646 /* 1647 * If we put data in the output pipe, wakeup any potential readers. 1648 */ 1649 if (ret > 0) 1650 wakeup_pipe_readers(opipe); 1651 1652 return ret; 1653 } 1654 1655 /* 1656 * This is a tee(1) implementation that works on pipes. It doesn't copy 1657 * any data, it simply references the 'in' pages on the 'out' pipe. 1658 * The 'flags' used are the SPLICE_F_* variants, currently the only 1659 * applicable one is SPLICE_F_NONBLOCK. 1660 */ 1661 long do_tee(struct file *in, struct file *out, size_t len, unsigned int flags) 1662 { 1663 struct pipe_inode_info *ipipe = get_pipe_info(in, true); 1664 struct pipe_inode_info *opipe = get_pipe_info(out, true); 1665 int ret = -EINVAL; 1666 1667 if (unlikely(!(in->f_mode & FMODE_READ) || 1668 !(out->f_mode & FMODE_WRITE))) 1669 return -EBADF; 1670 1671 /* 1672 * Duplicate the contents of ipipe to opipe without actually 1673 * copying the data. 1674 */ 1675 if (ipipe && opipe && ipipe != opipe) { 1676 if ((in->f_flags | out->f_flags) & O_NONBLOCK) 1677 flags |= SPLICE_F_NONBLOCK; 1678 1679 /* 1680 * Keep going, unless we encounter an error. The ipipe/opipe 1681 * ordering doesn't really matter. 1682 */ 1683 ret = ipipe_prep(ipipe, flags); 1684 if (!ret) { 1685 ret = opipe_prep(opipe, flags); 1686 if (!ret) 1687 ret = link_pipe(ipipe, opipe, len, flags); 1688 } 1689 } 1690 1691 return ret; 1692 } 1693 1694 SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) 1695 { 1696 struct fd in, out; 1697 int error; 1698 1699 if (unlikely(flags & ~SPLICE_F_ALL)) 1700 return -EINVAL; 1701 1702 if (unlikely(!len)) 1703 return 0; 1704 1705 error = -EBADF; 1706 in = fdget(fdin); 1707 if (in.file) { 1708 out = fdget(fdout); 1709 if (out.file) { 1710 error = do_tee(in.file, out.file, len, flags); 1711 fdput(out); 1712 } 1713 fdput(in); 1714 } 1715 1716 return error; 1717 } 1718