1 /* 2 * linux/fs/pipe.c 3 * 4 * Copyright (C) 1991, 1992, 1999 Linus Torvalds 5 */ 6 7 #include <linux/mm.h> 8 #include <linux/file.h> 9 #include <linux/poll.h> 10 #include <linux/slab.h> 11 #include <linux/module.h> 12 #include <linux/init.h> 13 #include <linux/fs.h> 14 #include <linux/mount.h> 15 #include <linux/pipe_fs_i.h> 16 #include <linux/uio.h> 17 #include <linux/highmem.h> 18 #include <linux/pagemap.h> 19 20 #include <asm/uaccess.h> 21 #include <asm/ioctls.h> 22 23 /* 24 * We use a start+len construction, which provides full use of the 25 * allocated memory. 26 * -- Florian Coosmann (FGC) 27 * 28 * Reads with count = 0 should always return 0. 29 * -- Julian Bradfield 1999-06-07. 30 * 31 * FIFOs and Pipes now generate SIGIO for both readers and writers. 32 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16 33 * 34 * pipe_read & write cleanup 35 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 36 */ 37 38 /* Drop the inode semaphore and wait for a pipe event, atomically */ 39 void pipe_wait(struct pipe_inode_info *pipe) 40 { 41 DEFINE_WAIT(wait); 42 43 /* 44 * Pipes are system-local resources, so sleeping on them 45 * is considered a noninteractive wait: 46 */ 47 prepare_to_wait(&pipe->wait, &wait, 48 TASK_INTERRUPTIBLE | TASK_NONINTERACTIVE); 49 if (pipe->inode) 50 mutex_unlock(&pipe->inode->i_mutex); 51 schedule(); 52 finish_wait(&pipe->wait, &wait); 53 if (pipe->inode) 54 mutex_lock(&pipe->inode->i_mutex); 55 } 56 57 static int 58 pipe_iov_copy_from_user(void *to, struct iovec *iov, unsigned long len, 59 int atomic) 60 { 61 unsigned long copy; 62 63 while (len > 0) { 64 while (!iov->iov_len) 65 iov++; 66 copy = min_t(unsigned long, len, iov->iov_len); 67 68 if (atomic) { 69 if (__copy_from_user_inatomic(to, iov->iov_base, copy)) 70 return -EFAULT; 71 } else { 72 if (copy_from_user(to, iov->iov_base, copy)) 73 return -EFAULT; 74 } 75 to += copy; 76 len -= copy; 77 iov->iov_base += copy; 78 iov->iov_len -= copy; 79 } 80 return 0; 81 } 82 83 static int 84 pipe_iov_copy_to_user(struct iovec *iov, const void *from, unsigned long len, 85 int atomic) 86 { 87 unsigned long copy; 88 89 while (len > 0) { 90 while (!iov->iov_len) 91 iov++; 92 copy = min_t(unsigned long, len, iov->iov_len); 93 94 if (atomic) { 95 if (__copy_to_user_inatomic(iov->iov_base, from, copy)) 96 return -EFAULT; 97 } else { 98 if (copy_to_user(iov->iov_base, from, copy)) 99 return -EFAULT; 100 } 101 from += copy; 102 len -= copy; 103 iov->iov_base += copy; 104 iov->iov_len -= copy; 105 } 106 return 0; 107 } 108 109 /* 110 * Attempt to pre-fault in the user memory, so we can use atomic copies. 111 * Returns the number of bytes not faulted in. 112 */ 113 static int iov_fault_in_pages_write(struct iovec *iov, unsigned long len) 114 { 115 while (!iov->iov_len) 116 iov++; 117 118 while (len > 0) { 119 unsigned long this_len; 120 121 this_len = min_t(unsigned long, len, iov->iov_len); 122 if (fault_in_pages_writeable(iov->iov_base, this_len)) 123 break; 124 125 len -= this_len; 126 iov++; 127 } 128 129 return len; 130 } 131 132 /* 133 * Pre-fault in the user memory, so we can use atomic copies. 134 */ 135 static void iov_fault_in_pages_read(struct iovec *iov, unsigned long len) 136 { 137 while (!iov->iov_len) 138 iov++; 139 140 while (len > 0) { 141 unsigned long this_len; 142 143 this_len = min_t(unsigned long, len, iov->iov_len); 144 fault_in_pages_readable(iov->iov_base, this_len); 145 len -= this_len; 146 iov++; 147 } 148 } 149 150 static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 151 struct pipe_buffer *buf) 152 { 153 struct page *page = buf->page; 154 155 /* 156 * If nobody else uses this page, and we don't already have a 157 * temporary page, let's keep track of it as a one-deep 158 * allocation cache. (Otherwise just release our reference to it) 159 */ 160 if (page_count(page) == 1 && !pipe->tmp_page) 161 pipe->tmp_page = page; 162 else 163 page_cache_release(page); 164 } 165 166 void *generic_pipe_buf_map(struct pipe_inode_info *pipe, 167 struct pipe_buffer *buf, int atomic) 168 { 169 if (atomic) { 170 buf->flags |= PIPE_BUF_FLAG_ATOMIC; 171 return kmap_atomic(buf->page, KM_USER0); 172 } 173 174 return kmap(buf->page); 175 } 176 177 void generic_pipe_buf_unmap(struct pipe_inode_info *pipe, 178 struct pipe_buffer *buf, void *map_data) 179 { 180 if (buf->flags & PIPE_BUF_FLAG_ATOMIC) { 181 buf->flags &= ~PIPE_BUF_FLAG_ATOMIC; 182 kunmap_atomic(map_data, KM_USER0); 183 } else 184 kunmap(buf->page); 185 } 186 187 int generic_pipe_buf_steal(struct pipe_inode_info *pipe, 188 struct pipe_buffer *buf) 189 { 190 struct page *page = buf->page; 191 192 if (page_count(page) == 1) { 193 lock_page(page); 194 return 0; 195 } 196 197 return 1; 198 } 199 200 void generic_pipe_buf_get(struct pipe_inode_info *info, struct pipe_buffer *buf) 201 { 202 page_cache_get(buf->page); 203 } 204 205 int generic_pipe_buf_pin(struct pipe_inode_info *info, struct pipe_buffer *buf) 206 { 207 return 0; 208 } 209 210 static struct pipe_buf_operations anon_pipe_buf_ops = { 211 .can_merge = 1, 212 .map = generic_pipe_buf_map, 213 .unmap = generic_pipe_buf_unmap, 214 .pin = generic_pipe_buf_pin, 215 .release = anon_pipe_buf_release, 216 .steal = generic_pipe_buf_steal, 217 .get = generic_pipe_buf_get, 218 }; 219 220 static ssize_t 221 pipe_readv(struct file *filp, const struct iovec *_iov, 222 unsigned long nr_segs, loff_t *ppos) 223 { 224 struct inode *inode = filp->f_dentry->d_inode; 225 struct pipe_inode_info *pipe; 226 int do_wakeup; 227 ssize_t ret; 228 struct iovec *iov = (struct iovec *)_iov; 229 size_t total_len; 230 231 total_len = iov_length(iov, nr_segs); 232 /* Null read succeeds. */ 233 if (unlikely(total_len == 0)) 234 return 0; 235 236 do_wakeup = 0; 237 ret = 0; 238 mutex_lock(&inode->i_mutex); 239 pipe = inode->i_pipe; 240 for (;;) { 241 int bufs = pipe->nrbufs; 242 if (bufs) { 243 int curbuf = pipe->curbuf; 244 struct pipe_buffer *buf = pipe->bufs + curbuf; 245 struct pipe_buf_operations *ops = buf->ops; 246 void *addr; 247 size_t chars = buf->len; 248 int error, atomic; 249 250 if (chars > total_len) 251 chars = total_len; 252 253 error = ops->pin(pipe, buf); 254 if (error) { 255 if (!ret) 256 error = ret; 257 break; 258 } 259 260 atomic = !iov_fault_in_pages_write(iov, chars); 261 redo: 262 addr = ops->map(pipe, buf, atomic); 263 error = pipe_iov_copy_to_user(iov, addr + buf->offset, chars, atomic); 264 ops->unmap(pipe, buf, addr); 265 if (unlikely(error)) { 266 /* 267 * Just retry with the slow path if we failed. 268 */ 269 if (atomic) { 270 atomic = 0; 271 goto redo; 272 } 273 if (!ret) 274 ret = error; 275 break; 276 } 277 ret += chars; 278 buf->offset += chars; 279 buf->len -= chars; 280 if (!buf->len) { 281 buf->ops = NULL; 282 ops->release(pipe, buf); 283 curbuf = (curbuf + 1) & (PIPE_BUFFERS-1); 284 pipe->curbuf = curbuf; 285 pipe->nrbufs = --bufs; 286 do_wakeup = 1; 287 } 288 total_len -= chars; 289 if (!total_len) 290 break; /* common path: read succeeded */ 291 } 292 if (bufs) /* More to do? */ 293 continue; 294 if (!pipe->writers) 295 break; 296 if (!pipe->waiting_writers) { 297 /* syscall merging: Usually we must not sleep 298 * if O_NONBLOCK is set, or if we got some data. 299 * But if a writer sleeps in kernel space, then 300 * we can wait for that data without violating POSIX. 301 */ 302 if (ret) 303 break; 304 if (filp->f_flags & O_NONBLOCK) { 305 ret = -EAGAIN; 306 break; 307 } 308 } 309 if (signal_pending(current)) { 310 if (!ret) 311 ret = -ERESTARTSYS; 312 break; 313 } 314 if (do_wakeup) { 315 wake_up_interruptible_sync(&pipe->wait); 316 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 317 } 318 pipe_wait(pipe); 319 } 320 mutex_unlock(&inode->i_mutex); 321 322 /* Signal writers asynchronously that there is more room. */ 323 if (do_wakeup) { 324 wake_up_interruptible(&pipe->wait); 325 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 326 } 327 if (ret > 0) 328 file_accessed(filp); 329 return ret; 330 } 331 332 static ssize_t 333 pipe_read(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 334 { 335 struct iovec iov = { .iov_base = buf, .iov_len = count }; 336 337 return pipe_readv(filp, &iov, 1, ppos); 338 } 339 340 static ssize_t 341 pipe_writev(struct file *filp, const struct iovec *_iov, 342 unsigned long nr_segs, loff_t *ppos) 343 { 344 struct inode *inode = filp->f_dentry->d_inode; 345 struct pipe_inode_info *pipe; 346 ssize_t ret; 347 int do_wakeup; 348 struct iovec *iov = (struct iovec *)_iov; 349 size_t total_len; 350 ssize_t chars; 351 352 total_len = iov_length(iov, nr_segs); 353 /* Null write succeeds. */ 354 if (unlikely(total_len == 0)) 355 return 0; 356 357 do_wakeup = 0; 358 ret = 0; 359 mutex_lock(&inode->i_mutex); 360 pipe = inode->i_pipe; 361 362 if (!pipe->readers) { 363 send_sig(SIGPIPE, current, 0); 364 ret = -EPIPE; 365 goto out; 366 } 367 368 /* We try to merge small writes */ 369 chars = total_len & (PAGE_SIZE-1); /* size of the last buffer */ 370 if (pipe->nrbufs && chars != 0) { 371 int lastbuf = (pipe->curbuf + pipe->nrbufs - 1) & 372 (PIPE_BUFFERS-1); 373 struct pipe_buffer *buf = pipe->bufs + lastbuf; 374 struct pipe_buf_operations *ops = buf->ops; 375 int offset = buf->offset + buf->len; 376 377 if (ops->can_merge && offset + chars <= PAGE_SIZE) { 378 int error, atomic = 1; 379 void *addr; 380 381 error = ops->pin(pipe, buf); 382 if (error) 383 goto out; 384 385 iov_fault_in_pages_read(iov, chars); 386 redo1: 387 addr = ops->map(pipe, buf, atomic); 388 error = pipe_iov_copy_from_user(offset + addr, iov, 389 chars, atomic); 390 ops->unmap(pipe, buf, addr); 391 ret = error; 392 do_wakeup = 1; 393 if (error) { 394 if (atomic) { 395 atomic = 0; 396 goto redo1; 397 } 398 goto out; 399 } 400 buf->len += chars; 401 total_len -= chars; 402 ret = chars; 403 if (!total_len) 404 goto out; 405 } 406 } 407 408 for (;;) { 409 int bufs; 410 411 if (!pipe->readers) { 412 send_sig(SIGPIPE, current, 0); 413 if (!ret) 414 ret = -EPIPE; 415 break; 416 } 417 bufs = pipe->nrbufs; 418 if (bufs < PIPE_BUFFERS) { 419 int newbuf = (pipe->curbuf + bufs) & (PIPE_BUFFERS-1); 420 struct pipe_buffer *buf = pipe->bufs + newbuf; 421 struct page *page = pipe->tmp_page; 422 char *src; 423 int error, atomic = 1; 424 425 if (!page) { 426 page = alloc_page(GFP_HIGHUSER); 427 if (unlikely(!page)) { 428 ret = ret ? : -ENOMEM; 429 break; 430 } 431 pipe->tmp_page = page; 432 } 433 /* Always wake up, even if the copy fails. Otherwise 434 * we lock up (O_NONBLOCK-)readers that sleep due to 435 * syscall merging. 436 * FIXME! Is this really true? 437 */ 438 do_wakeup = 1; 439 chars = PAGE_SIZE; 440 if (chars > total_len) 441 chars = total_len; 442 443 iov_fault_in_pages_read(iov, chars); 444 redo2: 445 if (atomic) 446 src = kmap_atomic(page, KM_USER0); 447 else 448 src = kmap(page); 449 450 error = pipe_iov_copy_from_user(src, iov, chars, 451 atomic); 452 if (atomic) 453 kunmap_atomic(src, KM_USER0); 454 else 455 kunmap(page); 456 457 if (unlikely(error)) { 458 if (atomic) { 459 atomic = 0; 460 goto redo2; 461 } 462 if (!ret) 463 ret = error; 464 break; 465 } 466 ret += chars; 467 468 /* Insert it into the buffer array */ 469 buf->page = page; 470 buf->ops = &anon_pipe_buf_ops; 471 buf->offset = 0; 472 buf->len = chars; 473 pipe->nrbufs = ++bufs; 474 pipe->tmp_page = NULL; 475 476 total_len -= chars; 477 if (!total_len) 478 break; 479 } 480 if (bufs < PIPE_BUFFERS) 481 continue; 482 if (filp->f_flags & O_NONBLOCK) { 483 if (!ret) 484 ret = -EAGAIN; 485 break; 486 } 487 if (signal_pending(current)) { 488 if (!ret) 489 ret = -ERESTARTSYS; 490 break; 491 } 492 if (do_wakeup) { 493 wake_up_interruptible_sync(&pipe->wait); 494 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 495 do_wakeup = 0; 496 } 497 pipe->waiting_writers++; 498 pipe_wait(pipe); 499 pipe->waiting_writers--; 500 } 501 out: 502 mutex_unlock(&inode->i_mutex); 503 if (do_wakeup) { 504 wake_up_interruptible(&pipe->wait); 505 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 506 } 507 if (ret > 0) 508 file_update_time(filp); 509 return ret; 510 } 511 512 static ssize_t 513 pipe_write(struct file *filp, const char __user *buf, 514 size_t count, loff_t *ppos) 515 { 516 struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; 517 518 return pipe_writev(filp, &iov, 1, ppos); 519 } 520 521 static ssize_t 522 bad_pipe_r(struct file *filp, char __user *buf, size_t count, loff_t *ppos) 523 { 524 return -EBADF; 525 } 526 527 static ssize_t 528 bad_pipe_w(struct file *filp, const char __user *buf, size_t count, 529 loff_t *ppos) 530 { 531 return -EBADF; 532 } 533 534 static int 535 pipe_ioctl(struct inode *pino, struct file *filp, 536 unsigned int cmd, unsigned long arg) 537 { 538 struct inode *inode = filp->f_dentry->d_inode; 539 struct pipe_inode_info *pipe; 540 int count, buf, nrbufs; 541 542 switch (cmd) { 543 case FIONREAD: 544 mutex_lock(&inode->i_mutex); 545 pipe = inode->i_pipe; 546 count = 0; 547 buf = pipe->curbuf; 548 nrbufs = pipe->nrbufs; 549 while (--nrbufs >= 0) { 550 count += pipe->bufs[buf].len; 551 buf = (buf+1) & (PIPE_BUFFERS-1); 552 } 553 mutex_unlock(&inode->i_mutex); 554 555 return put_user(count, (int __user *)arg); 556 default: 557 return -EINVAL; 558 } 559 } 560 561 /* No kernel lock held - fine */ 562 static unsigned int 563 pipe_poll(struct file *filp, poll_table *wait) 564 { 565 unsigned int mask; 566 struct inode *inode = filp->f_dentry->d_inode; 567 struct pipe_inode_info *pipe = inode->i_pipe; 568 int nrbufs; 569 570 poll_wait(filp, &pipe->wait, wait); 571 572 /* Reading only -- no need for acquiring the semaphore. */ 573 nrbufs = pipe->nrbufs; 574 mask = 0; 575 if (filp->f_mode & FMODE_READ) { 576 mask = (nrbufs > 0) ? POLLIN | POLLRDNORM : 0; 577 if (!pipe->writers && filp->f_version != pipe->w_counter) 578 mask |= POLLHUP; 579 } 580 581 if (filp->f_mode & FMODE_WRITE) { 582 mask |= (nrbufs < PIPE_BUFFERS) ? POLLOUT | POLLWRNORM : 0; 583 /* 584 * Most Unices do not set POLLERR for FIFOs but on Linux they 585 * behave exactly like pipes for poll(). 586 */ 587 if (!pipe->readers) 588 mask |= POLLERR; 589 } 590 591 return mask; 592 } 593 594 static int 595 pipe_release(struct inode *inode, int decr, int decw) 596 { 597 struct pipe_inode_info *pipe; 598 599 mutex_lock(&inode->i_mutex); 600 pipe = inode->i_pipe; 601 pipe->readers -= decr; 602 pipe->writers -= decw; 603 604 if (!pipe->readers && !pipe->writers) { 605 free_pipe_info(inode); 606 } else { 607 wake_up_interruptible(&pipe->wait); 608 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 609 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 610 } 611 mutex_unlock(&inode->i_mutex); 612 613 return 0; 614 } 615 616 static int 617 pipe_read_fasync(int fd, struct file *filp, int on) 618 { 619 struct inode *inode = filp->f_dentry->d_inode; 620 int retval; 621 622 mutex_lock(&inode->i_mutex); 623 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_readers); 624 mutex_unlock(&inode->i_mutex); 625 626 if (retval < 0) 627 return retval; 628 629 return 0; 630 } 631 632 633 static int 634 pipe_write_fasync(int fd, struct file *filp, int on) 635 { 636 struct inode *inode = filp->f_dentry->d_inode; 637 int retval; 638 639 mutex_lock(&inode->i_mutex); 640 retval = fasync_helper(fd, filp, on, &inode->i_pipe->fasync_writers); 641 mutex_unlock(&inode->i_mutex); 642 643 if (retval < 0) 644 return retval; 645 646 return 0; 647 } 648 649 650 static int 651 pipe_rdwr_fasync(int fd, struct file *filp, int on) 652 { 653 struct inode *inode = filp->f_dentry->d_inode; 654 struct pipe_inode_info *pipe = inode->i_pipe; 655 int retval; 656 657 mutex_lock(&inode->i_mutex); 658 659 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); 660 661 if (retval >= 0) 662 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); 663 664 mutex_unlock(&inode->i_mutex); 665 666 if (retval < 0) 667 return retval; 668 669 return 0; 670 } 671 672 673 static int 674 pipe_read_release(struct inode *inode, struct file *filp) 675 { 676 pipe_read_fasync(-1, filp, 0); 677 return pipe_release(inode, 1, 0); 678 } 679 680 static int 681 pipe_write_release(struct inode *inode, struct file *filp) 682 { 683 pipe_write_fasync(-1, filp, 0); 684 return pipe_release(inode, 0, 1); 685 } 686 687 static int 688 pipe_rdwr_release(struct inode *inode, struct file *filp) 689 { 690 int decr, decw; 691 692 pipe_rdwr_fasync(-1, filp, 0); 693 decr = (filp->f_mode & FMODE_READ) != 0; 694 decw = (filp->f_mode & FMODE_WRITE) != 0; 695 return pipe_release(inode, decr, decw); 696 } 697 698 static int 699 pipe_read_open(struct inode *inode, struct file *filp) 700 { 701 /* We could have perhaps used atomic_t, but this and friends 702 below are the only places. So it doesn't seem worthwhile. */ 703 mutex_lock(&inode->i_mutex); 704 inode->i_pipe->readers++; 705 mutex_unlock(&inode->i_mutex); 706 707 return 0; 708 } 709 710 static int 711 pipe_write_open(struct inode *inode, struct file *filp) 712 { 713 mutex_lock(&inode->i_mutex); 714 inode->i_pipe->writers++; 715 mutex_unlock(&inode->i_mutex); 716 717 return 0; 718 } 719 720 static int 721 pipe_rdwr_open(struct inode *inode, struct file *filp) 722 { 723 mutex_lock(&inode->i_mutex); 724 if (filp->f_mode & FMODE_READ) 725 inode->i_pipe->readers++; 726 if (filp->f_mode & FMODE_WRITE) 727 inode->i_pipe->writers++; 728 mutex_unlock(&inode->i_mutex); 729 730 return 0; 731 } 732 733 /* 734 * The file_operations structs are not static because they 735 * are also used in linux/fs/fifo.c to do operations on FIFOs. 736 */ 737 const struct file_operations read_fifo_fops = { 738 .llseek = no_llseek, 739 .read = pipe_read, 740 .readv = pipe_readv, 741 .write = bad_pipe_w, 742 .poll = pipe_poll, 743 .ioctl = pipe_ioctl, 744 .open = pipe_read_open, 745 .release = pipe_read_release, 746 .fasync = pipe_read_fasync, 747 }; 748 749 const struct file_operations write_fifo_fops = { 750 .llseek = no_llseek, 751 .read = bad_pipe_r, 752 .write = pipe_write, 753 .writev = pipe_writev, 754 .poll = pipe_poll, 755 .ioctl = pipe_ioctl, 756 .open = pipe_write_open, 757 .release = pipe_write_release, 758 .fasync = pipe_write_fasync, 759 }; 760 761 const struct file_operations rdwr_fifo_fops = { 762 .llseek = no_llseek, 763 .read = pipe_read, 764 .readv = pipe_readv, 765 .write = pipe_write, 766 .writev = pipe_writev, 767 .poll = pipe_poll, 768 .ioctl = pipe_ioctl, 769 .open = pipe_rdwr_open, 770 .release = pipe_rdwr_release, 771 .fasync = pipe_rdwr_fasync, 772 }; 773 774 static struct file_operations read_pipe_fops = { 775 .llseek = no_llseek, 776 .read = pipe_read, 777 .readv = pipe_readv, 778 .write = bad_pipe_w, 779 .poll = pipe_poll, 780 .ioctl = pipe_ioctl, 781 .open = pipe_read_open, 782 .release = pipe_read_release, 783 .fasync = pipe_read_fasync, 784 }; 785 786 static struct file_operations write_pipe_fops = { 787 .llseek = no_llseek, 788 .read = bad_pipe_r, 789 .write = pipe_write, 790 .writev = pipe_writev, 791 .poll = pipe_poll, 792 .ioctl = pipe_ioctl, 793 .open = pipe_write_open, 794 .release = pipe_write_release, 795 .fasync = pipe_write_fasync, 796 }; 797 798 static struct file_operations rdwr_pipe_fops = { 799 .llseek = no_llseek, 800 .read = pipe_read, 801 .readv = pipe_readv, 802 .write = pipe_write, 803 .writev = pipe_writev, 804 .poll = pipe_poll, 805 .ioctl = pipe_ioctl, 806 .open = pipe_rdwr_open, 807 .release = pipe_rdwr_release, 808 .fasync = pipe_rdwr_fasync, 809 }; 810 811 struct pipe_inode_info * alloc_pipe_info(struct inode *inode) 812 { 813 struct pipe_inode_info *pipe; 814 815 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL); 816 if (pipe) { 817 init_waitqueue_head(&pipe->wait); 818 pipe->r_counter = pipe->w_counter = 1; 819 pipe->inode = inode; 820 } 821 822 return pipe; 823 } 824 825 void __free_pipe_info(struct pipe_inode_info *pipe) 826 { 827 int i; 828 829 for (i = 0; i < PIPE_BUFFERS; i++) { 830 struct pipe_buffer *buf = pipe->bufs + i; 831 if (buf->ops) 832 buf->ops->release(pipe, buf); 833 } 834 if (pipe->tmp_page) 835 __free_page(pipe->tmp_page); 836 kfree(pipe); 837 } 838 839 void free_pipe_info(struct inode *inode) 840 { 841 __free_pipe_info(inode->i_pipe); 842 inode->i_pipe = NULL; 843 } 844 845 static struct vfsmount *pipe_mnt __read_mostly; 846 static int pipefs_delete_dentry(struct dentry *dentry) 847 { 848 return 1; 849 } 850 851 static struct dentry_operations pipefs_dentry_operations = { 852 .d_delete = pipefs_delete_dentry, 853 }; 854 855 static struct inode * get_pipe_inode(void) 856 { 857 struct inode *inode = new_inode(pipe_mnt->mnt_sb); 858 struct pipe_inode_info *pipe; 859 860 if (!inode) 861 goto fail_inode; 862 863 pipe = alloc_pipe_info(inode); 864 if (!pipe) 865 goto fail_iput; 866 inode->i_pipe = pipe; 867 868 pipe->readers = pipe->writers = 1; 869 inode->i_fop = &rdwr_pipe_fops; 870 871 /* 872 * Mark the inode dirty from the very beginning, 873 * that way it will never be moved to the dirty 874 * list because "mark_inode_dirty()" will think 875 * that it already _is_ on the dirty list. 876 */ 877 inode->i_state = I_DIRTY; 878 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; 879 inode->i_uid = current->fsuid; 880 inode->i_gid = current->fsgid; 881 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; 882 inode->i_blksize = PAGE_SIZE; 883 884 return inode; 885 886 fail_iput: 887 iput(inode); 888 889 fail_inode: 890 return NULL; 891 } 892 893 int do_pipe(int *fd) 894 { 895 struct qstr this; 896 char name[32]; 897 struct dentry *dentry; 898 struct inode * inode; 899 struct file *f1, *f2; 900 int error; 901 int i, j; 902 903 error = -ENFILE; 904 f1 = get_empty_filp(); 905 if (!f1) 906 goto no_files; 907 908 f2 = get_empty_filp(); 909 if (!f2) 910 goto close_f1; 911 912 inode = get_pipe_inode(); 913 if (!inode) 914 goto close_f12; 915 916 error = get_unused_fd(); 917 if (error < 0) 918 goto close_f12_inode; 919 i = error; 920 921 error = get_unused_fd(); 922 if (error < 0) 923 goto close_f12_inode_i; 924 j = error; 925 926 error = -ENOMEM; 927 sprintf(name, "[%lu]", inode->i_ino); 928 this.name = name; 929 this.len = strlen(name); 930 this.hash = inode->i_ino; /* will go */ 931 dentry = d_alloc(pipe_mnt->mnt_sb->s_root, &this); 932 if (!dentry) 933 goto close_f12_inode_i_j; 934 935 dentry->d_op = &pipefs_dentry_operations; 936 d_add(dentry, inode); 937 f1->f_vfsmnt = f2->f_vfsmnt = mntget(mntget(pipe_mnt)); 938 f1->f_dentry = f2->f_dentry = dget(dentry); 939 f1->f_mapping = f2->f_mapping = inode->i_mapping; 940 941 /* read file */ 942 f1->f_pos = f2->f_pos = 0; 943 f1->f_flags = O_RDONLY; 944 f1->f_op = &read_pipe_fops; 945 f1->f_mode = FMODE_READ; 946 f1->f_version = 0; 947 948 /* write file */ 949 f2->f_flags = O_WRONLY; 950 f2->f_op = &write_pipe_fops; 951 f2->f_mode = FMODE_WRITE; 952 f2->f_version = 0; 953 954 fd_install(i, f1); 955 fd_install(j, f2); 956 fd[0] = i; 957 fd[1] = j; 958 959 return 0; 960 961 close_f12_inode_i_j: 962 put_unused_fd(j); 963 close_f12_inode_i: 964 put_unused_fd(i); 965 close_f12_inode: 966 free_pipe_info(inode); 967 iput(inode); 968 close_f12: 969 put_filp(f2); 970 close_f1: 971 put_filp(f1); 972 no_files: 973 return error; 974 } 975 976 /* 977 * pipefs should _never_ be mounted by userland - too much of security hassle, 978 * no real gain from having the whole whorehouse mounted. So we don't need 979 * any operations on the root directory. However, we need a non-trivial 980 * d_name - pipe: will go nicely and kill the special-casing in procfs. 981 */ 982 static int pipefs_get_sb(struct file_system_type *fs_type, 983 int flags, const char *dev_name, void *data, 984 struct vfsmount *mnt) 985 { 986 return get_sb_pseudo(fs_type, "pipe:", NULL, PIPEFS_MAGIC, mnt); 987 } 988 989 static struct file_system_type pipe_fs_type = { 990 .name = "pipefs", 991 .get_sb = pipefs_get_sb, 992 .kill_sb = kill_anon_super, 993 }; 994 995 static int __init init_pipe_fs(void) 996 { 997 int err = register_filesystem(&pipe_fs_type); 998 999 if (!err) { 1000 pipe_mnt = kern_mount(&pipe_fs_type); 1001 if (IS_ERR(pipe_mnt)) { 1002 err = PTR_ERR(pipe_mnt); 1003 unregister_filesystem(&pipe_fs_type); 1004 } 1005 } 1006 return err; 1007 } 1008 1009 static void __exit exit_pipe_fs(void) 1010 { 1011 unregister_filesystem(&pipe_fs_type); 1012 mntput(pipe_mnt); 1013 } 1014 1015 fs_initcall(init_pipe_fs); 1016 module_exit(exit_pipe_fs); 1017