1 /* 2 * "splice": joining two ropes together by interweaving their strands. 3 * 4 * This is the "extended pipe" functionality, where a pipe is used as 5 * an arbitrary in-memory buffer. Think of a pipe as a small kernel 6 * buffer that you can use to transfer data from one end to the other. 7 * 8 * The traditional unix read/write is extended with a "splice()" operation 9 * that transfers data buffers to or from a pipe buffer. 10 * 11 * Named by Larry McVoy, original implementation from Linus, extended by 12 * Jens to support splicing to files, network, direct splicing, etc and 13 * fixing lots of bugs. 14 * 15 * Copyright (C) 2005-2006 Jens Axboe <axboe@kernel.dk> 16 * Copyright (C) 2005-2006 Linus Torvalds <torvalds@osdl.org> 17 * Copyright (C) 2006 Ingo Molnar <mingo@elte.hu> 18 * 19 */ 20 #include <linux/fs.h> 21 #include <linux/file.h> 22 #include <linux/pagemap.h> 23 #include <linux/splice.h> 24 #include <linux/memcontrol.h> 25 #include <linux/mm_inline.h> 26 #include <linux/swap.h> 27 #include <linux/writeback.h> 28 #include <linux/buffer_head.h> 29 #include <linux/module.h> 30 #include <linux/syscalls.h> 31 #include <linux/uio.h> 32 #include <linux/security.h> 33 34 /* 35 * Attempt to steal a page from a pipe buffer. This should perhaps go into 36 * a vm helper function, it's already simplified quite a bit by the 37 * addition of remove_mapping(). If success is returned, the caller may 38 * attempt to reuse this page for another destination. 39 */ 40 static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe, 41 struct pipe_buffer *buf) 42 { 43 struct page *page = buf->page; 44 struct address_space *mapping; 45 46 lock_page(page); 47 48 mapping = page_mapping(page); 49 if (mapping) { 50 WARN_ON(!PageUptodate(page)); 51 52 /* 53 * At least for ext2 with nobh option, we need to wait on 54 * writeback completing on this page, since we'll remove it 55 * from the pagecache. Otherwise truncate wont wait on the 56 * page, allowing the disk blocks to be reused by someone else 57 * before we actually wrote our data to them. fs corruption 58 * ensues. 59 */ 60 wait_on_page_writeback(page); 61 62 if (PagePrivate(page) && !try_to_release_page(page, GFP_KERNEL)) 63 goto out_unlock; 64 65 /* 66 * If we succeeded in removing the mapping, set LRU flag 67 * and return good. 68 */ 69 if (remove_mapping(mapping, page)) { 70 buf->flags |= PIPE_BUF_FLAG_LRU; 71 return 0; 72 } 73 } 74 75 /* 76 * Raced with truncate or failed to remove page from current 77 * address space, unlock and return failure. 78 */ 79 out_unlock: 80 unlock_page(page); 81 return 1; 82 } 83 84 static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe, 85 struct pipe_buffer *buf) 86 { 87 page_cache_release(buf->page); 88 buf->flags &= ~PIPE_BUF_FLAG_LRU; 89 } 90 91 /* 92 * Check whether the contents of buf is OK to access. Since the content 93 * is a page cache page, IO may be in flight. 94 */ 95 static int page_cache_pipe_buf_confirm(struct pipe_inode_info *pipe, 96 struct pipe_buffer *buf) 97 { 98 struct page *page = buf->page; 99 int err; 100 101 if (!PageUptodate(page)) { 102 lock_page(page); 103 104 /* 105 * Page got truncated/unhashed. This will cause a 0-byte 106 * splice, if this is the first page. 107 */ 108 if (!page->mapping) { 109 err = -ENODATA; 110 goto error; 111 } 112 113 /* 114 * Uh oh, read-error from disk. 115 */ 116 if (!PageUptodate(page)) { 117 err = -EIO; 118 goto error; 119 } 120 121 /* 122 * Page is ok afterall, we are done. 123 */ 124 unlock_page(page); 125 } 126 127 return 0; 128 error: 129 unlock_page(page); 130 return err; 131 } 132 133 static const struct pipe_buf_operations page_cache_pipe_buf_ops = { 134 .can_merge = 0, 135 .map = generic_pipe_buf_map, 136 .unmap = generic_pipe_buf_unmap, 137 .confirm = page_cache_pipe_buf_confirm, 138 .release = page_cache_pipe_buf_release, 139 .steal = page_cache_pipe_buf_steal, 140 .get = generic_pipe_buf_get, 141 }; 142 143 static int user_page_pipe_buf_steal(struct pipe_inode_info *pipe, 144 struct pipe_buffer *buf) 145 { 146 if (!(buf->flags & PIPE_BUF_FLAG_GIFT)) 147 return 1; 148 149 buf->flags |= PIPE_BUF_FLAG_LRU; 150 return generic_pipe_buf_steal(pipe, buf); 151 } 152 153 static const struct pipe_buf_operations user_page_pipe_buf_ops = { 154 .can_merge = 0, 155 .map = generic_pipe_buf_map, 156 .unmap = generic_pipe_buf_unmap, 157 .confirm = generic_pipe_buf_confirm, 158 .release = page_cache_pipe_buf_release, 159 .steal = user_page_pipe_buf_steal, 160 .get = generic_pipe_buf_get, 161 }; 162 163 /** 164 * splice_to_pipe - fill passed data into a pipe 165 * @pipe: pipe to fill 166 * @spd: data to fill 167 * 168 * Description: 169 * @spd contains a map of pages and len/offset tuples, along with 170 * the struct pipe_buf_operations associated with these pages. This 171 * function will link that data to the pipe. 172 * 173 */ 174 ssize_t splice_to_pipe(struct pipe_inode_info *pipe, 175 struct splice_pipe_desc *spd) 176 { 177 unsigned int spd_pages = spd->nr_pages; 178 int ret, do_wakeup, page_nr; 179 180 ret = 0; 181 do_wakeup = 0; 182 page_nr = 0; 183 184 if (pipe->inode) 185 mutex_lock(&pipe->inode->i_mutex); 186 187 for (;;) { 188 if (!pipe->readers) { 189 send_sig(SIGPIPE, current, 0); 190 if (!ret) 191 ret = -EPIPE; 192 break; 193 } 194 195 if (pipe->nrbufs < PIPE_BUFFERS) { 196 int newbuf = (pipe->curbuf + pipe->nrbufs) & (PIPE_BUFFERS - 1); 197 struct pipe_buffer *buf = pipe->bufs + newbuf; 198 199 buf->page = spd->pages[page_nr]; 200 buf->offset = spd->partial[page_nr].offset; 201 buf->len = spd->partial[page_nr].len; 202 buf->private = spd->partial[page_nr].private; 203 buf->ops = spd->ops; 204 if (spd->flags & SPLICE_F_GIFT) 205 buf->flags |= PIPE_BUF_FLAG_GIFT; 206 207 pipe->nrbufs++; 208 page_nr++; 209 ret += buf->len; 210 211 if (pipe->inode) 212 do_wakeup = 1; 213 214 if (!--spd->nr_pages) 215 break; 216 if (pipe->nrbufs < PIPE_BUFFERS) 217 continue; 218 219 break; 220 } 221 222 if (spd->flags & SPLICE_F_NONBLOCK) { 223 if (!ret) 224 ret = -EAGAIN; 225 break; 226 } 227 228 if (signal_pending(current)) { 229 if (!ret) 230 ret = -ERESTARTSYS; 231 break; 232 } 233 234 if (do_wakeup) { 235 smp_mb(); 236 if (waitqueue_active(&pipe->wait)) 237 wake_up_interruptible_sync(&pipe->wait); 238 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 239 do_wakeup = 0; 240 } 241 242 pipe->waiting_writers++; 243 pipe_wait(pipe); 244 pipe->waiting_writers--; 245 } 246 247 if (pipe->inode) { 248 mutex_unlock(&pipe->inode->i_mutex); 249 250 if (do_wakeup) { 251 smp_mb(); 252 if (waitqueue_active(&pipe->wait)) 253 wake_up_interruptible(&pipe->wait); 254 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 255 } 256 } 257 258 while (page_nr < spd_pages) 259 spd->spd_release(spd, page_nr++); 260 261 return ret; 262 } 263 264 static void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) 265 { 266 page_cache_release(spd->pages[i]); 267 } 268 269 static int 270 __generic_file_splice_read(struct file *in, loff_t *ppos, 271 struct pipe_inode_info *pipe, size_t len, 272 unsigned int flags) 273 { 274 struct address_space *mapping = in->f_mapping; 275 unsigned int loff, nr_pages, req_pages; 276 struct page *pages[PIPE_BUFFERS]; 277 struct partial_page partial[PIPE_BUFFERS]; 278 struct page *page; 279 pgoff_t index, end_index; 280 loff_t isize; 281 int error, page_nr; 282 struct splice_pipe_desc spd = { 283 .pages = pages, 284 .partial = partial, 285 .flags = flags, 286 .ops = &page_cache_pipe_buf_ops, 287 .spd_release = spd_release_page, 288 }; 289 290 index = *ppos >> PAGE_CACHE_SHIFT; 291 loff = *ppos & ~PAGE_CACHE_MASK; 292 req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 293 nr_pages = min(req_pages, (unsigned)PIPE_BUFFERS); 294 295 /* 296 * Lookup the (hopefully) full range of pages we need. 297 */ 298 spd.nr_pages = find_get_pages_contig(mapping, index, nr_pages, pages); 299 index += spd.nr_pages; 300 301 /* 302 * If find_get_pages_contig() returned fewer pages than we needed, 303 * readahead/allocate the rest and fill in the holes. 304 */ 305 if (spd.nr_pages < nr_pages) 306 page_cache_sync_readahead(mapping, &in->f_ra, in, 307 index, req_pages - spd.nr_pages); 308 309 error = 0; 310 while (spd.nr_pages < nr_pages) { 311 /* 312 * Page could be there, find_get_pages_contig() breaks on 313 * the first hole. 314 */ 315 page = find_get_page(mapping, index); 316 if (!page) { 317 /* 318 * page didn't exist, allocate one. 319 */ 320 page = page_cache_alloc_cold(mapping); 321 if (!page) 322 break; 323 324 error = add_to_page_cache_lru(page, mapping, index, 325 mapping_gfp_mask(mapping)); 326 if (unlikely(error)) { 327 page_cache_release(page); 328 if (error == -EEXIST) 329 continue; 330 break; 331 } 332 /* 333 * add_to_page_cache() locks the page, unlock it 334 * to avoid convoluting the logic below even more. 335 */ 336 unlock_page(page); 337 } 338 339 pages[spd.nr_pages++] = page; 340 index++; 341 } 342 343 /* 344 * Now loop over the map and see if we need to start IO on any 345 * pages, fill in the partial map, etc. 346 */ 347 index = *ppos >> PAGE_CACHE_SHIFT; 348 nr_pages = spd.nr_pages; 349 spd.nr_pages = 0; 350 for (page_nr = 0; page_nr < nr_pages; page_nr++) { 351 unsigned int this_len; 352 353 if (!len) 354 break; 355 356 /* 357 * this_len is the max we'll use from this page 358 */ 359 this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff); 360 page = pages[page_nr]; 361 362 if (PageReadahead(page)) 363 page_cache_async_readahead(mapping, &in->f_ra, in, 364 page, index, req_pages - page_nr); 365 366 /* 367 * If the page isn't uptodate, we may need to start io on it 368 */ 369 if (!PageUptodate(page)) { 370 /* 371 * If in nonblock mode then dont block on waiting 372 * for an in-flight io page 373 */ 374 if (flags & SPLICE_F_NONBLOCK) { 375 if (!trylock_page(page)) { 376 error = -EAGAIN; 377 break; 378 } 379 } else 380 lock_page(page); 381 382 /* 383 * Page was truncated, or invalidated by the 384 * filesystem. Redo the find/create, but this time the 385 * page is kept locked, so there's no chance of another 386 * race with truncate/invalidate. 387 */ 388 if (!page->mapping) { 389 unlock_page(page); 390 page = find_or_create_page(mapping, index, 391 mapping_gfp_mask(mapping)); 392 393 if (!page) { 394 error = -ENOMEM; 395 break; 396 } 397 page_cache_release(pages[page_nr]); 398 pages[page_nr] = page; 399 } 400 /* 401 * page was already under io and is now done, great 402 */ 403 if (PageUptodate(page)) { 404 unlock_page(page); 405 goto fill_it; 406 } 407 408 /* 409 * need to read in the page 410 */ 411 error = mapping->a_ops->readpage(in, page); 412 if (unlikely(error)) { 413 /* 414 * We really should re-lookup the page here, 415 * but it complicates things a lot. Instead 416 * lets just do what we already stored, and 417 * we'll get it the next time we are called. 418 */ 419 if (error == AOP_TRUNCATED_PAGE) 420 error = 0; 421 422 break; 423 } 424 } 425 fill_it: 426 /* 427 * i_size must be checked after PageUptodate. 428 */ 429 isize = i_size_read(mapping->host); 430 end_index = (isize - 1) >> PAGE_CACHE_SHIFT; 431 if (unlikely(!isize || index > end_index)) 432 break; 433 434 /* 435 * if this is the last page, see if we need to shrink 436 * the length and stop 437 */ 438 if (end_index == index) { 439 unsigned int plen; 440 441 /* 442 * max good bytes in this page 443 */ 444 plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1; 445 if (plen <= loff) 446 break; 447 448 /* 449 * force quit after adding this page 450 */ 451 this_len = min(this_len, plen - loff); 452 len = this_len; 453 } 454 455 partial[page_nr].offset = loff; 456 partial[page_nr].len = this_len; 457 len -= this_len; 458 loff = 0; 459 spd.nr_pages++; 460 index++; 461 } 462 463 /* 464 * Release any pages at the end, if we quit early. 'page_nr' is how far 465 * we got, 'nr_pages' is how many pages are in the map. 466 */ 467 while (page_nr < nr_pages) 468 page_cache_release(pages[page_nr++]); 469 in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; 470 471 if (spd.nr_pages) 472 return splice_to_pipe(pipe, &spd); 473 474 return error; 475 } 476 477 /** 478 * generic_file_splice_read - splice data from file to a pipe 479 * @in: file to splice from 480 * @ppos: position in @in 481 * @pipe: pipe to splice to 482 * @len: number of bytes to splice 483 * @flags: splice modifier flags 484 * 485 * Description: 486 * Will read pages from given file and fill them into a pipe. Can be 487 * used as long as the address_space operations for the source implements 488 * a readpage() hook. 489 * 490 */ 491 ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, 492 struct pipe_inode_info *pipe, size_t len, 493 unsigned int flags) 494 { 495 loff_t isize, left; 496 int ret; 497 498 isize = i_size_read(in->f_mapping->host); 499 if (unlikely(*ppos >= isize)) 500 return 0; 501 502 left = isize - *ppos; 503 if (unlikely(left < len)) 504 len = left; 505 506 ret = __generic_file_splice_read(in, ppos, pipe, len, flags); 507 if (ret > 0) 508 *ppos += ret; 509 510 return ret; 511 } 512 513 EXPORT_SYMBOL(generic_file_splice_read); 514 515 /* 516 * Send 'sd->len' bytes to socket from 'sd->file' at position 'sd->pos' 517 * using sendpage(). Return the number of bytes sent. 518 */ 519 static int pipe_to_sendpage(struct pipe_inode_info *pipe, 520 struct pipe_buffer *buf, struct splice_desc *sd) 521 { 522 struct file *file = sd->u.file; 523 loff_t pos = sd->pos; 524 int ret, more; 525 526 ret = buf->ops->confirm(pipe, buf); 527 if (!ret) { 528 more = (sd->flags & SPLICE_F_MORE) || sd->len < sd->total_len; 529 530 ret = file->f_op->sendpage(file, buf->page, buf->offset, 531 sd->len, &pos, more); 532 } 533 534 return ret; 535 } 536 537 /* 538 * This is a little more tricky than the file -> pipe splicing. There are 539 * basically three cases: 540 * 541 * - Destination page already exists in the address space and there 542 * are users of it. For that case we have no other option that 543 * copying the data. Tough luck. 544 * - Destination page already exists in the address space, but there 545 * are no users of it. Make sure it's uptodate, then drop it. Fall 546 * through to last case. 547 * - Destination page does not exist, we can add the pipe page to 548 * the page cache and avoid the copy. 549 * 550 * If asked to move pages to the output file (SPLICE_F_MOVE is set in 551 * sd->flags), we attempt to migrate pages from the pipe to the output 552 * file address space page cache. This is possible if no one else has 553 * the pipe page referenced outside of the pipe and page cache. If 554 * SPLICE_F_MOVE isn't set, or we cannot move the page, we simply create 555 * a new page in the output file page cache and fill/dirty that. 556 */ 557 static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 558 struct splice_desc *sd) 559 { 560 struct file *file = sd->u.file; 561 struct address_space *mapping = file->f_mapping; 562 unsigned int offset, this_len; 563 struct page *page; 564 void *fsdata; 565 int ret; 566 567 /* 568 * make sure the data in this buffer is uptodate 569 */ 570 ret = buf->ops->confirm(pipe, buf); 571 if (unlikely(ret)) 572 return ret; 573 574 offset = sd->pos & ~PAGE_CACHE_MASK; 575 576 this_len = sd->len; 577 if (this_len + offset > PAGE_CACHE_SIZE) 578 this_len = PAGE_CACHE_SIZE - offset; 579 580 ret = pagecache_write_begin(file, mapping, sd->pos, this_len, 581 AOP_FLAG_UNINTERRUPTIBLE, &page, &fsdata); 582 if (unlikely(ret)) 583 goto out; 584 585 if (buf->page != page) { 586 /* 587 * Careful, ->map() uses KM_USER0! 588 */ 589 char *src = buf->ops->map(pipe, buf, 1); 590 char *dst = kmap_atomic(page, KM_USER1); 591 592 memcpy(dst + offset, src + buf->offset, this_len); 593 flush_dcache_page(page); 594 kunmap_atomic(dst, KM_USER1); 595 buf->ops->unmap(pipe, buf, src); 596 } 597 ret = pagecache_write_end(file, mapping, sd->pos, this_len, this_len, 598 page, fsdata); 599 out: 600 return ret; 601 } 602 603 /** 604 * __splice_from_pipe - splice data from a pipe to given actor 605 * @pipe: pipe to splice from 606 * @sd: information to @actor 607 * @actor: handler that splices the data 608 * 609 * Description: 610 * This function does little more than loop over the pipe and call 611 * @actor to do the actual moving of a single struct pipe_buffer to 612 * the desired destination. See pipe_to_file, pipe_to_sendpage, or 613 * pipe_to_user. 614 * 615 */ 616 ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, 617 splice_actor *actor) 618 { 619 int ret, do_wakeup, err; 620 621 ret = 0; 622 do_wakeup = 0; 623 624 for (;;) { 625 if (pipe->nrbufs) { 626 struct pipe_buffer *buf = pipe->bufs + pipe->curbuf; 627 const struct pipe_buf_operations *ops = buf->ops; 628 629 sd->len = buf->len; 630 if (sd->len > sd->total_len) 631 sd->len = sd->total_len; 632 633 err = actor(pipe, buf, sd); 634 if (err <= 0) { 635 if (!ret && err != -ENODATA) 636 ret = err; 637 638 break; 639 } 640 641 ret += err; 642 buf->offset += err; 643 buf->len -= err; 644 645 sd->len -= err; 646 sd->pos += err; 647 sd->total_len -= err; 648 if (sd->len) 649 continue; 650 651 if (!buf->len) { 652 buf->ops = NULL; 653 ops->release(pipe, buf); 654 pipe->curbuf = (pipe->curbuf + 1) & (PIPE_BUFFERS - 1); 655 pipe->nrbufs--; 656 if (pipe->inode) 657 do_wakeup = 1; 658 } 659 660 if (!sd->total_len) 661 break; 662 } 663 664 if (pipe->nrbufs) 665 continue; 666 if (!pipe->writers) 667 break; 668 if (!pipe->waiting_writers) { 669 if (ret) 670 break; 671 } 672 673 if (sd->flags & SPLICE_F_NONBLOCK) { 674 if (!ret) 675 ret = -EAGAIN; 676 break; 677 } 678 679 if (signal_pending(current)) { 680 if (!ret) 681 ret = -ERESTARTSYS; 682 break; 683 } 684 685 if (do_wakeup) { 686 smp_mb(); 687 if (waitqueue_active(&pipe->wait)) 688 wake_up_interruptible_sync(&pipe->wait); 689 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 690 do_wakeup = 0; 691 } 692 693 pipe_wait(pipe); 694 } 695 696 if (do_wakeup) { 697 smp_mb(); 698 if (waitqueue_active(&pipe->wait)) 699 wake_up_interruptible(&pipe->wait); 700 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 701 } 702 703 return ret; 704 } 705 EXPORT_SYMBOL(__splice_from_pipe); 706 707 /** 708 * splice_from_pipe - splice data from a pipe to a file 709 * @pipe: pipe to splice from 710 * @out: file to splice to 711 * @ppos: position in @out 712 * @len: how many bytes to splice 713 * @flags: splice modifier flags 714 * @actor: handler that splices the data 715 * 716 * Description: 717 * See __splice_from_pipe. This function locks the input and output inodes, 718 * otherwise it's identical to __splice_from_pipe(). 719 * 720 */ 721 ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out, 722 loff_t *ppos, size_t len, unsigned int flags, 723 splice_actor *actor) 724 { 725 ssize_t ret; 726 struct inode *inode = out->f_mapping->host; 727 struct splice_desc sd = { 728 .total_len = len, 729 .flags = flags, 730 .pos = *ppos, 731 .u.file = out, 732 }; 733 734 /* 735 * The actor worker might be calling ->write_begin and 736 * ->write_end. Most of the time, these expect i_mutex to 737 * be held. Since this may result in an ABBA deadlock with 738 * pipe->inode, we have to order lock acquiry here. 739 */ 740 inode_double_lock(inode, pipe->inode); 741 ret = __splice_from_pipe(pipe, &sd, actor); 742 inode_double_unlock(inode, pipe->inode); 743 744 return ret; 745 } 746 747 /** 748 * generic_file_splice_write_nolock - generic_file_splice_write without mutexes 749 * @pipe: pipe info 750 * @out: file to write to 751 * @ppos: position in @out 752 * @len: number of bytes to splice 753 * @flags: splice modifier flags 754 * 755 * Description: 756 * Will either move or copy pages (determined by @flags options) from 757 * the given pipe inode to the given file. The caller is responsible 758 * for acquiring i_mutex on both inodes. 759 * 760 */ 761 ssize_t 762 generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out, 763 loff_t *ppos, size_t len, unsigned int flags) 764 { 765 struct address_space *mapping = out->f_mapping; 766 struct inode *inode = mapping->host; 767 struct splice_desc sd = { 768 .total_len = len, 769 .flags = flags, 770 .pos = *ppos, 771 .u.file = out, 772 }; 773 ssize_t ret; 774 int err; 775 776 err = file_remove_suid(out); 777 if (unlikely(err)) 778 return err; 779 780 ret = __splice_from_pipe(pipe, &sd, pipe_to_file); 781 if (ret > 0) { 782 unsigned long nr_pages; 783 784 *ppos += ret; 785 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 786 787 /* 788 * If file or inode is SYNC and we actually wrote some data, 789 * sync it. 790 */ 791 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 792 err = generic_osync_inode(inode, mapping, 793 OSYNC_METADATA|OSYNC_DATA); 794 795 if (err) 796 ret = err; 797 } 798 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 799 } 800 801 return ret; 802 } 803 804 EXPORT_SYMBOL(generic_file_splice_write_nolock); 805 806 /** 807 * generic_file_splice_write - splice data from a pipe to a file 808 * @pipe: pipe info 809 * @out: file to write to 810 * @ppos: position in @out 811 * @len: number of bytes to splice 812 * @flags: splice modifier flags 813 * 814 * Description: 815 * Will either move or copy pages (determined by @flags options) from 816 * the given pipe inode to the given file. 817 * 818 */ 819 ssize_t 820 generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, 821 loff_t *ppos, size_t len, unsigned int flags) 822 { 823 struct address_space *mapping = out->f_mapping; 824 struct inode *inode = mapping->host; 825 struct splice_desc sd = { 826 .total_len = len, 827 .flags = flags, 828 .pos = *ppos, 829 .u.file = out, 830 }; 831 ssize_t ret; 832 833 inode_double_lock(inode, pipe->inode); 834 ret = file_remove_suid(out); 835 if (likely(!ret)) 836 ret = __splice_from_pipe(pipe, &sd, pipe_to_file); 837 inode_double_unlock(inode, pipe->inode); 838 if (ret > 0) { 839 unsigned long nr_pages; 840 841 *ppos += ret; 842 nr_pages = (ret + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 843 844 /* 845 * If file or inode is SYNC and we actually wrote some data, 846 * sync it. 847 */ 848 if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) { 849 int err; 850 851 mutex_lock(&inode->i_mutex); 852 err = generic_osync_inode(inode, mapping, 853 OSYNC_METADATA|OSYNC_DATA); 854 mutex_unlock(&inode->i_mutex); 855 856 if (err) 857 ret = err; 858 } 859 balance_dirty_pages_ratelimited_nr(mapping, nr_pages); 860 } 861 862 return ret; 863 } 864 865 EXPORT_SYMBOL(generic_file_splice_write); 866 867 /** 868 * generic_splice_sendpage - splice data from a pipe to a socket 869 * @pipe: pipe to splice from 870 * @out: socket to write to 871 * @ppos: position in @out 872 * @len: number of bytes to splice 873 * @flags: splice modifier flags 874 * 875 * Description: 876 * Will send @len bytes from the pipe to a network socket. No data copying 877 * is involved. 878 * 879 */ 880 ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, struct file *out, 881 loff_t *ppos, size_t len, unsigned int flags) 882 { 883 return splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_sendpage); 884 } 885 886 EXPORT_SYMBOL(generic_splice_sendpage); 887 888 /* 889 * Attempt to initiate a splice from pipe to file. 890 */ 891 static long do_splice_from(struct pipe_inode_info *pipe, struct file *out, 892 loff_t *ppos, size_t len, unsigned int flags) 893 { 894 int ret; 895 896 if (unlikely(!out->f_op || !out->f_op->splice_write)) 897 return -EINVAL; 898 899 if (unlikely(!(out->f_mode & FMODE_WRITE))) 900 return -EBADF; 901 902 if (unlikely(out->f_flags & O_APPEND)) 903 return -EINVAL; 904 905 ret = rw_verify_area(WRITE, out, ppos, len); 906 if (unlikely(ret < 0)) 907 return ret; 908 909 return out->f_op->splice_write(pipe, out, ppos, len, flags); 910 } 911 912 /* 913 * Attempt to initiate a splice from a file to a pipe. 914 */ 915 static long do_splice_to(struct file *in, loff_t *ppos, 916 struct pipe_inode_info *pipe, size_t len, 917 unsigned int flags) 918 { 919 int ret; 920 921 if (unlikely(!in->f_op || !in->f_op->splice_read)) 922 return -EINVAL; 923 924 if (unlikely(!(in->f_mode & FMODE_READ))) 925 return -EBADF; 926 927 ret = rw_verify_area(READ, in, ppos, len); 928 if (unlikely(ret < 0)) 929 return ret; 930 931 return in->f_op->splice_read(in, ppos, pipe, len, flags); 932 } 933 934 /** 935 * splice_direct_to_actor - splices data directly between two non-pipes 936 * @in: file to splice from 937 * @sd: actor information on where to splice to 938 * @actor: handles the data splicing 939 * 940 * Description: 941 * This is a special case helper to splice directly between two 942 * points, without requiring an explicit pipe. Internally an allocated 943 * pipe is cached in the process, and reused during the lifetime of 944 * that process. 945 * 946 */ 947 ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, 948 splice_direct_actor *actor) 949 { 950 struct pipe_inode_info *pipe; 951 long ret, bytes; 952 umode_t i_mode; 953 size_t len; 954 int i, flags; 955 956 /* 957 * We require the input being a regular file, as we don't want to 958 * randomly drop data for eg socket -> socket splicing. Use the 959 * piped splicing for that! 960 */ 961 i_mode = in->f_path.dentry->d_inode->i_mode; 962 if (unlikely(!S_ISREG(i_mode) && !S_ISBLK(i_mode))) 963 return -EINVAL; 964 965 /* 966 * neither in nor out is a pipe, setup an internal pipe attached to 967 * 'out' and transfer the wanted data from 'in' to 'out' through that 968 */ 969 pipe = current->splice_pipe; 970 if (unlikely(!pipe)) { 971 pipe = alloc_pipe_info(NULL); 972 if (!pipe) 973 return -ENOMEM; 974 975 /* 976 * We don't have an immediate reader, but we'll read the stuff 977 * out of the pipe right after the splice_to_pipe(). So set 978 * PIPE_READERS appropriately. 979 */ 980 pipe->readers = 1; 981 982 current->splice_pipe = pipe; 983 } 984 985 /* 986 * Do the splice. 987 */ 988 ret = 0; 989 bytes = 0; 990 len = sd->total_len; 991 flags = sd->flags; 992 993 /* 994 * Don't block on output, we have to drain the direct pipe. 995 */ 996 sd->flags &= ~SPLICE_F_NONBLOCK; 997 998 while (len) { 999 size_t read_len; 1000 loff_t pos = sd->pos, prev_pos = pos; 1001 1002 ret = do_splice_to(in, &pos, pipe, len, flags); 1003 if (unlikely(ret <= 0)) 1004 goto out_release; 1005 1006 read_len = ret; 1007 sd->total_len = read_len; 1008 1009 /* 1010 * NOTE: nonblocking mode only applies to the input. We 1011 * must not do the output in nonblocking mode as then we 1012 * could get stuck data in the internal pipe: 1013 */ 1014 ret = actor(pipe, sd); 1015 if (unlikely(ret <= 0)) { 1016 sd->pos = prev_pos; 1017 goto out_release; 1018 } 1019 1020 bytes += ret; 1021 len -= ret; 1022 sd->pos = pos; 1023 1024 if (ret < read_len) { 1025 sd->pos = prev_pos + ret; 1026 goto out_release; 1027 } 1028 } 1029 1030 done: 1031 pipe->nrbufs = pipe->curbuf = 0; 1032 file_accessed(in); 1033 return bytes; 1034 1035 out_release: 1036 /* 1037 * If we did an incomplete transfer we must release 1038 * the pipe buffers in question: 1039 */ 1040 for (i = 0; i < PIPE_BUFFERS; i++) { 1041 struct pipe_buffer *buf = pipe->bufs + i; 1042 1043 if (buf->ops) { 1044 buf->ops->release(pipe, buf); 1045 buf->ops = NULL; 1046 } 1047 } 1048 1049 if (!bytes) 1050 bytes = ret; 1051 1052 goto done; 1053 } 1054 EXPORT_SYMBOL(splice_direct_to_actor); 1055 1056 static int direct_splice_actor(struct pipe_inode_info *pipe, 1057 struct splice_desc *sd) 1058 { 1059 struct file *file = sd->u.file; 1060 1061 return do_splice_from(pipe, file, &sd->pos, sd->total_len, sd->flags); 1062 } 1063 1064 /** 1065 * do_splice_direct - splices data directly between two files 1066 * @in: file to splice from 1067 * @ppos: input file offset 1068 * @out: file to splice to 1069 * @len: number of bytes to splice 1070 * @flags: splice modifier flags 1071 * 1072 * Description: 1073 * For use by do_sendfile(). splice can easily emulate sendfile, but 1074 * doing it in the application would incur an extra system call 1075 * (splice in + splice out, as compared to just sendfile()). So this helper 1076 * can splice directly through a process-private pipe. 1077 * 1078 */ 1079 long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, 1080 size_t len, unsigned int flags) 1081 { 1082 struct splice_desc sd = { 1083 .len = len, 1084 .total_len = len, 1085 .flags = flags, 1086 .pos = *ppos, 1087 .u.file = out, 1088 }; 1089 long ret; 1090 1091 ret = splice_direct_to_actor(in, &sd, direct_splice_actor); 1092 if (ret > 0) 1093 *ppos = sd.pos; 1094 1095 return ret; 1096 } 1097 1098 /* 1099 * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same 1100 * location, so checking ->i_pipe is not enough to verify that this is a 1101 * pipe. 1102 */ 1103 static inline struct pipe_inode_info *pipe_info(struct inode *inode) 1104 { 1105 if (S_ISFIFO(inode->i_mode)) 1106 return inode->i_pipe; 1107 1108 return NULL; 1109 } 1110 1111 /* 1112 * Determine where to splice to/from. 1113 */ 1114 static long do_splice(struct file *in, loff_t __user *off_in, 1115 struct file *out, loff_t __user *off_out, 1116 size_t len, unsigned int flags) 1117 { 1118 struct pipe_inode_info *pipe; 1119 loff_t offset, *off; 1120 long ret; 1121 1122 pipe = pipe_info(in->f_path.dentry->d_inode); 1123 if (pipe) { 1124 if (off_in) 1125 return -ESPIPE; 1126 if (off_out) { 1127 if (out->f_op->llseek == no_llseek) 1128 return -EINVAL; 1129 if (copy_from_user(&offset, off_out, sizeof(loff_t))) 1130 return -EFAULT; 1131 off = &offset; 1132 } else 1133 off = &out->f_pos; 1134 1135 ret = do_splice_from(pipe, out, off, len, flags); 1136 1137 if (off_out && copy_to_user(off_out, off, sizeof(loff_t))) 1138 ret = -EFAULT; 1139 1140 return ret; 1141 } 1142 1143 pipe = pipe_info(out->f_path.dentry->d_inode); 1144 if (pipe) { 1145 if (off_out) 1146 return -ESPIPE; 1147 if (off_in) { 1148 if (in->f_op->llseek == no_llseek) 1149 return -EINVAL; 1150 if (copy_from_user(&offset, off_in, sizeof(loff_t))) 1151 return -EFAULT; 1152 off = &offset; 1153 } else 1154 off = &in->f_pos; 1155 1156 ret = do_splice_to(in, off, pipe, len, flags); 1157 1158 if (off_in && copy_to_user(off_in, off, sizeof(loff_t))) 1159 ret = -EFAULT; 1160 1161 return ret; 1162 } 1163 1164 return -EINVAL; 1165 } 1166 1167 /* 1168 * Map an iov into an array of pages and offset/length tupples. With the 1169 * partial_page structure, we can map several non-contiguous ranges into 1170 * our ones pages[] map instead of splitting that operation into pieces. 1171 * Could easily be exported as a generic helper for other users, in which 1172 * case one would probably want to add a 'max_nr_pages' parameter as well. 1173 */ 1174 static int get_iovec_page_array(const struct iovec __user *iov, 1175 unsigned int nr_vecs, struct page **pages, 1176 struct partial_page *partial, int aligned) 1177 { 1178 int buffers = 0, error = 0; 1179 1180 while (nr_vecs) { 1181 unsigned long off, npages; 1182 struct iovec entry; 1183 void __user *base; 1184 size_t len; 1185 int i; 1186 1187 error = -EFAULT; 1188 if (copy_from_user(&entry, iov, sizeof(entry))) 1189 break; 1190 1191 base = entry.iov_base; 1192 len = entry.iov_len; 1193 1194 /* 1195 * Sanity check this iovec. 0 read succeeds. 1196 */ 1197 error = 0; 1198 if (unlikely(!len)) 1199 break; 1200 error = -EFAULT; 1201 if (!access_ok(VERIFY_READ, base, len)) 1202 break; 1203 1204 /* 1205 * Get this base offset and number of pages, then map 1206 * in the user pages. 1207 */ 1208 off = (unsigned long) base & ~PAGE_MASK; 1209 1210 /* 1211 * If asked for alignment, the offset must be zero and the 1212 * length a multiple of the PAGE_SIZE. 1213 */ 1214 error = -EINVAL; 1215 if (aligned && (off || len & ~PAGE_MASK)) 1216 break; 1217 1218 npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT; 1219 if (npages > PIPE_BUFFERS - buffers) 1220 npages = PIPE_BUFFERS - buffers; 1221 1222 error = get_user_pages_fast((unsigned long)base, npages, 1223 0, &pages[buffers]); 1224 1225 if (unlikely(error <= 0)) 1226 break; 1227 1228 /* 1229 * Fill this contiguous range into the partial page map. 1230 */ 1231 for (i = 0; i < error; i++) { 1232 const int plen = min_t(size_t, len, PAGE_SIZE - off); 1233 1234 partial[buffers].offset = off; 1235 partial[buffers].len = plen; 1236 1237 off = 0; 1238 len -= plen; 1239 buffers++; 1240 } 1241 1242 /* 1243 * We didn't complete this iov, stop here since it probably 1244 * means we have to move some of this into a pipe to 1245 * be able to continue. 1246 */ 1247 if (len) 1248 break; 1249 1250 /* 1251 * Don't continue if we mapped fewer pages than we asked for, 1252 * or if we mapped the max number of pages that we have 1253 * room for. 1254 */ 1255 if (error < npages || buffers == PIPE_BUFFERS) 1256 break; 1257 1258 nr_vecs--; 1259 iov++; 1260 } 1261 1262 if (buffers) 1263 return buffers; 1264 1265 return error; 1266 } 1267 1268 static int pipe_to_user(struct pipe_inode_info *pipe, struct pipe_buffer *buf, 1269 struct splice_desc *sd) 1270 { 1271 char *src; 1272 int ret; 1273 1274 ret = buf->ops->confirm(pipe, buf); 1275 if (unlikely(ret)) 1276 return ret; 1277 1278 /* 1279 * See if we can use the atomic maps, by prefaulting in the 1280 * pages and doing an atomic copy 1281 */ 1282 if (!fault_in_pages_writeable(sd->u.userptr, sd->len)) { 1283 src = buf->ops->map(pipe, buf, 1); 1284 ret = __copy_to_user_inatomic(sd->u.userptr, src + buf->offset, 1285 sd->len); 1286 buf->ops->unmap(pipe, buf, src); 1287 if (!ret) { 1288 ret = sd->len; 1289 goto out; 1290 } 1291 } 1292 1293 /* 1294 * No dice, use slow non-atomic map and copy 1295 */ 1296 src = buf->ops->map(pipe, buf, 0); 1297 1298 ret = sd->len; 1299 if (copy_to_user(sd->u.userptr, src + buf->offset, sd->len)) 1300 ret = -EFAULT; 1301 1302 buf->ops->unmap(pipe, buf, src); 1303 out: 1304 if (ret > 0) 1305 sd->u.userptr += ret; 1306 return ret; 1307 } 1308 1309 /* 1310 * For lack of a better implementation, implement vmsplice() to userspace 1311 * as a simple copy of the pipes pages to the user iov. 1312 */ 1313 static long vmsplice_to_user(struct file *file, const struct iovec __user *iov, 1314 unsigned long nr_segs, unsigned int flags) 1315 { 1316 struct pipe_inode_info *pipe; 1317 struct splice_desc sd; 1318 ssize_t size; 1319 int error; 1320 long ret; 1321 1322 pipe = pipe_info(file->f_path.dentry->d_inode); 1323 if (!pipe) 1324 return -EBADF; 1325 1326 if (pipe->inode) 1327 mutex_lock(&pipe->inode->i_mutex); 1328 1329 error = ret = 0; 1330 while (nr_segs) { 1331 void __user *base; 1332 size_t len; 1333 1334 /* 1335 * Get user address base and length for this iovec. 1336 */ 1337 error = get_user(base, &iov->iov_base); 1338 if (unlikely(error)) 1339 break; 1340 error = get_user(len, &iov->iov_len); 1341 if (unlikely(error)) 1342 break; 1343 1344 /* 1345 * Sanity check this iovec. 0 read succeeds. 1346 */ 1347 if (unlikely(!len)) 1348 break; 1349 if (unlikely(!base)) { 1350 error = -EFAULT; 1351 break; 1352 } 1353 1354 if (unlikely(!access_ok(VERIFY_WRITE, base, len))) { 1355 error = -EFAULT; 1356 break; 1357 } 1358 1359 sd.len = 0; 1360 sd.total_len = len; 1361 sd.flags = flags; 1362 sd.u.userptr = base; 1363 sd.pos = 0; 1364 1365 size = __splice_from_pipe(pipe, &sd, pipe_to_user); 1366 if (size < 0) { 1367 if (!ret) 1368 ret = size; 1369 1370 break; 1371 } 1372 1373 ret += size; 1374 1375 if (size < len) 1376 break; 1377 1378 nr_segs--; 1379 iov++; 1380 } 1381 1382 if (pipe->inode) 1383 mutex_unlock(&pipe->inode->i_mutex); 1384 1385 if (!ret) 1386 ret = error; 1387 1388 return ret; 1389 } 1390 1391 /* 1392 * vmsplice splices a user address range into a pipe. It can be thought of 1393 * as splice-from-memory, where the regular splice is splice-from-file (or 1394 * to file). In both cases the output is a pipe, naturally. 1395 */ 1396 static long vmsplice_to_pipe(struct file *file, const struct iovec __user *iov, 1397 unsigned long nr_segs, unsigned int flags) 1398 { 1399 struct pipe_inode_info *pipe; 1400 struct page *pages[PIPE_BUFFERS]; 1401 struct partial_page partial[PIPE_BUFFERS]; 1402 struct splice_pipe_desc spd = { 1403 .pages = pages, 1404 .partial = partial, 1405 .flags = flags, 1406 .ops = &user_page_pipe_buf_ops, 1407 .spd_release = spd_release_page, 1408 }; 1409 1410 pipe = pipe_info(file->f_path.dentry->d_inode); 1411 if (!pipe) 1412 return -EBADF; 1413 1414 spd.nr_pages = get_iovec_page_array(iov, nr_segs, pages, partial, 1415 flags & SPLICE_F_GIFT); 1416 if (spd.nr_pages <= 0) 1417 return spd.nr_pages; 1418 1419 return splice_to_pipe(pipe, &spd); 1420 } 1421 1422 /* 1423 * Note that vmsplice only really supports true splicing _from_ user memory 1424 * to a pipe, not the other way around. Splicing from user memory is a simple 1425 * operation that can be supported without any funky alignment restrictions 1426 * or nasty vm tricks. We simply map in the user memory and fill them into 1427 * a pipe. The reverse isn't quite as easy, though. There are two possible 1428 * solutions for that: 1429 * 1430 * - memcpy() the data internally, at which point we might as well just 1431 * do a regular read() on the buffer anyway. 1432 * - Lots of nasty vm tricks, that are neither fast nor flexible (it 1433 * has restriction limitations on both ends of the pipe). 1434 * 1435 * Currently we punt and implement it as a normal copy, see pipe_to_user(). 1436 * 1437 */ 1438 SYSCALL_DEFINE4(vmsplice, int, fd, const struct iovec __user *, iov, 1439 unsigned long, nr_segs, unsigned int, flags) 1440 { 1441 struct file *file; 1442 long error; 1443 int fput; 1444 1445 if (unlikely(nr_segs > UIO_MAXIOV)) 1446 return -EINVAL; 1447 else if (unlikely(!nr_segs)) 1448 return 0; 1449 1450 error = -EBADF; 1451 file = fget_light(fd, &fput); 1452 if (file) { 1453 if (file->f_mode & FMODE_WRITE) 1454 error = vmsplice_to_pipe(file, iov, nr_segs, flags); 1455 else if (file->f_mode & FMODE_READ) 1456 error = vmsplice_to_user(file, iov, nr_segs, flags); 1457 1458 fput_light(file, fput); 1459 } 1460 1461 return error; 1462 } 1463 1464 SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, 1465 int, fd_out, loff_t __user *, off_out, 1466 size_t, len, unsigned int, flags) 1467 { 1468 long error; 1469 struct file *in, *out; 1470 int fput_in, fput_out; 1471 1472 if (unlikely(!len)) 1473 return 0; 1474 1475 error = -EBADF; 1476 in = fget_light(fd_in, &fput_in); 1477 if (in) { 1478 if (in->f_mode & FMODE_READ) { 1479 out = fget_light(fd_out, &fput_out); 1480 if (out) { 1481 if (out->f_mode & FMODE_WRITE) 1482 error = do_splice(in, off_in, 1483 out, off_out, 1484 len, flags); 1485 fput_light(out, fput_out); 1486 } 1487 } 1488 1489 fput_light(in, fput_in); 1490 } 1491 1492 return error; 1493 } 1494 1495 /* 1496 * Make sure there's data to read. Wait for input if we can, otherwise 1497 * return an appropriate error. 1498 */ 1499 static int link_ipipe_prep(struct pipe_inode_info *pipe, unsigned int flags) 1500 { 1501 int ret; 1502 1503 /* 1504 * Check ->nrbufs without the inode lock first. This function 1505 * is speculative anyways, so missing one is ok. 1506 */ 1507 if (pipe->nrbufs) 1508 return 0; 1509 1510 ret = 0; 1511 mutex_lock(&pipe->inode->i_mutex); 1512 1513 while (!pipe->nrbufs) { 1514 if (signal_pending(current)) { 1515 ret = -ERESTARTSYS; 1516 break; 1517 } 1518 if (!pipe->writers) 1519 break; 1520 if (!pipe->waiting_writers) { 1521 if (flags & SPLICE_F_NONBLOCK) { 1522 ret = -EAGAIN; 1523 break; 1524 } 1525 } 1526 pipe_wait(pipe); 1527 } 1528 1529 mutex_unlock(&pipe->inode->i_mutex); 1530 return ret; 1531 } 1532 1533 /* 1534 * Make sure there's writeable room. Wait for room if we can, otherwise 1535 * return an appropriate error. 1536 */ 1537 static int link_opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) 1538 { 1539 int ret; 1540 1541 /* 1542 * Check ->nrbufs without the inode lock first. This function 1543 * is speculative anyways, so missing one is ok. 1544 */ 1545 if (pipe->nrbufs < PIPE_BUFFERS) 1546 return 0; 1547 1548 ret = 0; 1549 mutex_lock(&pipe->inode->i_mutex); 1550 1551 while (pipe->nrbufs >= PIPE_BUFFERS) { 1552 if (!pipe->readers) { 1553 send_sig(SIGPIPE, current, 0); 1554 ret = -EPIPE; 1555 break; 1556 } 1557 if (flags & SPLICE_F_NONBLOCK) { 1558 ret = -EAGAIN; 1559 break; 1560 } 1561 if (signal_pending(current)) { 1562 ret = -ERESTARTSYS; 1563 break; 1564 } 1565 pipe->waiting_writers++; 1566 pipe_wait(pipe); 1567 pipe->waiting_writers--; 1568 } 1569 1570 mutex_unlock(&pipe->inode->i_mutex); 1571 return ret; 1572 } 1573 1574 /* 1575 * Link contents of ipipe to opipe. 1576 */ 1577 static int link_pipe(struct pipe_inode_info *ipipe, 1578 struct pipe_inode_info *opipe, 1579 size_t len, unsigned int flags) 1580 { 1581 struct pipe_buffer *ibuf, *obuf; 1582 int ret = 0, i = 0, nbuf; 1583 1584 /* 1585 * Potential ABBA deadlock, work around it by ordering lock 1586 * grabbing by inode address. Otherwise two different processes 1587 * could deadlock (one doing tee from A -> B, the other from B -> A). 1588 */ 1589 inode_double_lock(ipipe->inode, opipe->inode); 1590 1591 do { 1592 if (!opipe->readers) { 1593 send_sig(SIGPIPE, current, 0); 1594 if (!ret) 1595 ret = -EPIPE; 1596 break; 1597 } 1598 1599 /* 1600 * If we have iterated all input buffers or ran out of 1601 * output room, break. 1602 */ 1603 if (i >= ipipe->nrbufs || opipe->nrbufs >= PIPE_BUFFERS) 1604 break; 1605 1606 ibuf = ipipe->bufs + ((ipipe->curbuf + i) & (PIPE_BUFFERS - 1)); 1607 nbuf = (opipe->curbuf + opipe->nrbufs) & (PIPE_BUFFERS - 1); 1608 1609 /* 1610 * Get a reference to this pipe buffer, 1611 * so we can copy the contents over. 1612 */ 1613 ibuf->ops->get(ipipe, ibuf); 1614 1615 obuf = opipe->bufs + nbuf; 1616 *obuf = *ibuf; 1617 1618 /* 1619 * Don't inherit the gift flag, we need to 1620 * prevent multiple steals of this page. 1621 */ 1622 obuf->flags &= ~PIPE_BUF_FLAG_GIFT; 1623 1624 if (obuf->len > len) 1625 obuf->len = len; 1626 1627 opipe->nrbufs++; 1628 ret += obuf->len; 1629 len -= obuf->len; 1630 i++; 1631 } while (len); 1632 1633 /* 1634 * return EAGAIN if we have the potential of some data in the 1635 * future, otherwise just return 0 1636 */ 1637 if (!ret && ipipe->waiting_writers && (flags & SPLICE_F_NONBLOCK)) 1638 ret = -EAGAIN; 1639 1640 inode_double_unlock(ipipe->inode, opipe->inode); 1641 1642 /* 1643 * If we put data in the output pipe, wakeup any potential readers. 1644 */ 1645 if (ret > 0) { 1646 smp_mb(); 1647 if (waitqueue_active(&opipe->wait)) 1648 wake_up_interruptible(&opipe->wait); 1649 kill_fasync(&opipe->fasync_readers, SIGIO, POLL_IN); 1650 } 1651 1652 return ret; 1653 } 1654 1655 /* 1656 * This is a tee(1) implementation that works on pipes. It doesn't copy 1657 * any data, it simply references the 'in' pages on the 'out' pipe. 1658 * The 'flags' used are the SPLICE_F_* variants, currently the only 1659 * applicable one is SPLICE_F_NONBLOCK. 1660 */ 1661 static long do_tee(struct file *in, struct file *out, size_t len, 1662 unsigned int flags) 1663 { 1664 struct pipe_inode_info *ipipe = pipe_info(in->f_path.dentry->d_inode); 1665 struct pipe_inode_info *opipe = pipe_info(out->f_path.dentry->d_inode); 1666 int ret = -EINVAL; 1667 1668 /* 1669 * Duplicate the contents of ipipe to opipe without actually 1670 * copying the data. 1671 */ 1672 if (ipipe && opipe && ipipe != opipe) { 1673 /* 1674 * Keep going, unless we encounter an error. The ipipe/opipe 1675 * ordering doesn't really matter. 1676 */ 1677 ret = link_ipipe_prep(ipipe, flags); 1678 if (!ret) { 1679 ret = link_opipe_prep(opipe, flags); 1680 if (!ret) 1681 ret = link_pipe(ipipe, opipe, len, flags); 1682 } 1683 } 1684 1685 return ret; 1686 } 1687 1688 SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) 1689 { 1690 struct file *in; 1691 int error, fput_in; 1692 1693 if (unlikely(!len)) 1694 return 0; 1695 1696 error = -EBADF; 1697 in = fget_light(fdin, &fput_in); 1698 if (in) { 1699 if (in->f_mode & FMODE_READ) { 1700 int fput_out; 1701 struct file *out = fget_light(fdout, &fput_out); 1702 1703 if (out) { 1704 if (out->f_mode & FMODE_WRITE) 1705 error = do_tee(in, out, len, flags); 1706 fput_light(out, fput_out); 1707 } 1708 } 1709 fput_light(in, fput_in); 1710 } 1711 1712 return error; 1713 } 1714