1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * linux/fs/pipe.c 4 * 5 * Copyright (C) 1991, 1992, 1999 Linus Torvalds 6 */ 7 8 #include <linux/mm.h> 9 #include <linux/file.h> 10 #include <linux/poll.h> 11 #include <linux/slab.h> 12 #include <linux/module.h> 13 #include <linux/init.h> 14 #include <linux/fs.h> 15 #include <linux/log2.h> 16 #include <linux/mount.h> 17 #include <linux/pseudo_fs.h> 18 #include <linux/magic.h> 19 #include <linux/pipe_fs_i.h> 20 #include <linux/uio.h> 21 #include <linux/highmem.h> 22 #include <linux/pagemap.h> 23 #include <linux/audit.h> 24 #include <linux/syscalls.h> 25 #include <linux/fcntl.h> 26 #include <linux/memcontrol.h> 27 #include <linux/watch_queue.h> 28 29 #include <linux/uaccess.h> 30 #include <asm/ioctls.h> 31 32 #include "internal.h" 33 34 /* 35 * The max size that a non-root user is allowed to grow the pipe. Can 36 * be set by root in /proc/sys/fs/pipe-max-size 37 */ 38 unsigned int pipe_max_size = 1048576; 39 40 /* Maximum allocatable pages per user. Hard limit is unset by default, soft 41 * matches default values. 42 */ 43 unsigned long pipe_user_pages_hard; 44 unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR; 45 46 /* 47 * We use head and tail indices that aren't masked off, except at the point of 48 * dereference, but rather they're allowed to wrap naturally. This means there 49 * isn't a dead spot in the buffer, but the ring has to be a power of two and 50 * <= 2^31. 51 * -- David Howells 2019-09-23. 52 * 53 * Reads with count = 0 should always return 0. 54 * -- Julian Bradfield 1999-06-07. 55 * 56 * FIFOs and Pipes now generate SIGIO for both readers and writers. 57 * -- Jeremy Elson <jelson@circlemud.org> 2001-08-16 58 * 59 * pipe_read & write cleanup 60 * -- Manfred Spraul <manfred@colorfullife.com> 2002-05-09 61 */ 62 63 static void pipe_lock_nested(struct pipe_inode_info *pipe, int subclass) 64 { 65 if (pipe->files) 66 mutex_lock_nested(&pipe->mutex, subclass); 67 } 68 69 void pipe_lock(struct pipe_inode_info *pipe) 70 { 71 /* 72 * pipe_lock() nests non-pipe inode locks (for writing to a file) 73 */ 74 pipe_lock_nested(pipe, I_MUTEX_PARENT); 75 } 76 EXPORT_SYMBOL(pipe_lock); 77 78 void pipe_unlock(struct pipe_inode_info *pipe) 79 { 80 if (pipe->files) 81 mutex_unlock(&pipe->mutex); 82 } 83 EXPORT_SYMBOL(pipe_unlock); 84 85 static inline void __pipe_lock(struct pipe_inode_info *pipe) 86 { 87 mutex_lock_nested(&pipe->mutex, I_MUTEX_PARENT); 88 } 89 90 static inline void __pipe_unlock(struct pipe_inode_info *pipe) 91 { 92 mutex_unlock(&pipe->mutex); 93 } 94 95 void pipe_double_lock(struct pipe_inode_info *pipe1, 96 struct pipe_inode_info *pipe2) 97 { 98 BUG_ON(pipe1 == pipe2); 99 100 if (pipe1 < pipe2) { 101 pipe_lock_nested(pipe1, I_MUTEX_PARENT); 102 pipe_lock_nested(pipe2, I_MUTEX_CHILD); 103 } else { 104 pipe_lock_nested(pipe2, I_MUTEX_PARENT); 105 pipe_lock_nested(pipe1, I_MUTEX_CHILD); 106 } 107 } 108 109 /* Drop the inode semaphore and wait for a pipe event, atomically */ 110 void pipe_wait(struct pipe_inode_info *pipe) 111 { 112 DEFINE_WAIT(rdwait); 113 DEFINE_WAIT(wrwait); 114 115 /* 116 * Pipes are system-local resources, so sleeping on them 117 * is considered a noninteractive wait: 118 */ 119 prepare_to_wait(&pipe->rd_wait, &rdwait, TASK_INTERRUPTIBLE); 120 prepare_to_wait(&pipe->wr_wait, &wrwait, TASK_INTERRUPTIBLE); 121 pipe_unlock(pipe); 122 schedule(); 123 finish_wait(&pipe->rd_wait, &rdwait); 124 finish_wait(&pipe->wr_wait, &wrwait); 125 pipe_lock(pipe); 126 } 127 128 static void anon_pipe_buf_release(struct pipe_inode_info *pipe, 129 struct pipe_buffer *buf) 130 { 131 struct page *page = buf->page; 132 133 /* 134 * If nobody else uses this page, and we don't already have a 135 * temporary page, let's keep track of it as a one-deep 136 * allocation cache. (Otherwise just release our reference to it) 137 */ 138 if (page_count(page) == 1 && !pipe->tmp_page) 139 pipe->tmp_page = page; 140 else 141 put_page(page); 142 } 143 144 static int anon_pipe_buf_steal(struct pipe_inode_info *pipe, 145 struct pipe_buffer *buf) 146 { 147 struct page *page = buf->page; 148 149 if (page_count(page) == 1) { 150 memcg_kmem_uncharge_page(page, 0); 151 __SetPageLocked(page); 152 return 0; 153 } 154 return 1; 155 } 156 157 /** 158 * generic_pipe_buf_steal - attempt to take ownership of a &pipe_buffer 159 * @pipe: the pipe that the buffer belongs to 160 * @buf: the buffer to attempt to steal 161 * 162 * Description: 163 * This function attempts to steal the &struct page attached to 164 * @buf. If successful, this function returns 0 and returns with 165 * the page locked. The caller may then reuse the page for whatever 166 * he wishes; the typical use is insertion into a different file 167 * page cache. 168 */ 169 int generic_pipe_buf_steal(struct pipe_inode_info *pipe, 170 struct pipe_buffer *buf) 171 { 172 struct page *page = buf->page; 173 174 /* 175 * A reference of one is golden, that means that the owner of this 176 * page is the only one holding a reference to it. lock the page 177 * and return OK. 178 */ 179 if (page_count(page) == 1) { 180 lock_page(page); 181 return 0; 182 } 183 184 return 1; 185 } 186 EXPORT_SYMBOL(generic_pipe_buf_steal); 187 188 /** 189 * generic_pipe_buf_get - get a reference to a &struct pipe_buffer 190 * @pipe: the pipe that the buffer belongs to 191 * @buf: the buffer to get a reference to 192 * 193 * Description: 194 * This function grabs an extra reference to @buf. It's used in 195 * in the tee() system call, when we duplicate the buffers in one 196 * pipe into another. 197 */ 198 bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf) 199 { 200 return try_get_page(buf->page); 201 } 202 EXPORT_SYMBOL(generic_pipe_buf_get); 203 204 /** 205 * generic_pipe_buf_confirm - verify contents of the pipe buffer 206 * @info: the pipe that the buffer belongs to 207 * @buf: the buffer to confirm 208 * 209 * Description: 210 * This function does nothing, because the generic pipe code uses 211 * pages that are always good when inserted into the pipe. 212 */ 213 int generic_pipe_buf_confirm(struct pipe_inode_info *info, 214 struct pipe_buffer *buf) 215 { 216 return 0; 217 } 218 EXPORT_SYMBOL(generic_pipe_buf_confirm); 219 220 /** 221 * generic_pipe_buf_release - put a reference to a &struct pipe_buffer 222 * @pipe: the pipe that the buffer belongs to 223 * @buf: the buffer to put a reference to 224 * 225 * Description: 226 * This function releases a reference to @buf. 227 */ 228 void generic_pipe_buf_release(struct pipe_inode_info *pipe, 229 struct pipe_buffer *buf) 230 { 231 put_page(buf->page); 232 } 233 EXPORT_SYMBOL(generic_pipe_buf_release); 234 235 /* New data written to a pipe may be appended to a buffer with this type. */ 236 static const struct pipe_buf_operations anon_pipe_buf_ops = { 237 .confirm = generic_pipe_buf_confirm, 238 .release = anon_pipe_buf_release, 239 .steal = anon_pipe_buf_steal, 240 .get = generic_pipe_buf_get, 241 }; 242 243 static const struct pipe_buf_operations anon_pipe_buf_nomerge_ops = { 244 .confirm = generic_pipe_buf_confirm, 245 .release = anon_pipe_buf_release, 246 .steal = anon_pipe_buf_steal, 247 .get = generic_pipe_buf_get, 248 }; 249 250 static const struct pipe_buf_operations packet_pipe_buf_ops = { 251 .confirm = generic_pipe_buf_confirm, 252 .release = anon_pipe_buf_release, 253 .steal = anon_pipe_buf_steal, 254 .get = generic_pipe_buf_get, 255 }; 256 257 /** 258 * pipe_buf_mark_unmergeable - mark a &struct pipe_buffer as unmergeable 259 * @buf: the buffer to mark 260 * 261 * Description: 262 * This function ensures that no future writes will be merged into the 263 * given &struct pipe_buffer. This is necessary when multiple pipe buffers 264 * share the same backing page. 265 */ 266 void pipe_buf_mark_unmergeable(struct pipe_buffer *buf) 267 { 268 if (buf->ops == &anon_pipe_buf_ops) 269 buf->ops = &anon_pipe_buf_nomerge_ops; 270 } 271 272 static bool pipe_buf_can_merge(struct pipe_buffer *buf) 273 { 274 return buf->ops == &anon_pipe_buf_ops; 275 } 276 277 /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ 278 static inline bool pipe_readable(const struct pipe_inode_info *pipe) 279 { 280 unsigned int head = READ_ONCE(pipe->head); 281 unsigned int tail = READ_ONCE(pipe->tail); 282 unsigned int writers = READ_ONCE(pipe->writers); 283 284 return !pipe_empty(head, tail) || !writers; 285 } 286 287 static ssize_t 288 pipe_read(struct kiocb *iocb, struct iov_iter *to) 289 { 290 size_t total_len = iov_iter_count(to); 291 struct file *filp = iocb->ki_filp; 292 struct pipe_inode_info *pipe = filp->private_data; 293 bool was_full, wake_next_reader = false; 294 ssize_t ret; 295 296 /* Null read succeeds. */ 297 if (unlikely(total_len == 0)) 298 return 0; 299 300 ret = 0; 301 __pipe_lock(pipe); 302 303 /* 304 * We only wake up writers if the pipe was full when we started 305 * reading in order to avoid unnecessary wakeups. 306 * 307 * But when we do wake up writers, we do so using a sync wakeup 308 * (WF_SYNC), because we want them to get going and generate more 309 * data for us. 310 */ 311 was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); 312 for (;;) { 313 unsigned int head = pipe->head; 314 unsigned int tail = pipe->tail; 315 unsigned int mask = pipe->ring_size - 1; 316 317 #ifdef CONFIG_WATCH_QUEUE 318 if (pipe->note_loss) { 319 struct watch_notification n; 320 321 if (total_len < 8) { 322 if (ret == 0) 323 ret = -ENOBUFS; 324 break; 325 } 326 327 n.type = WATCH_TYPE_META; 328 n.subtype = WATCH_META_LOSS_NOTIFICATION; 329 n.info = watch_sizeof(n); 330 if (copy_to_iter(&n, sizeof(n), to) != sizeof(n)) { 331 if (ret == 0) 332 ret = -EFAULT; 333 break; 334 } 335 ret += sizeof(n); 336 total_len -= sizeof(n); 337 pipe->note_loss = false; 338 } 339 #endif 340 341 if (!pipe_empty(head, tail)) { 342 struct pipe_buffer *buf = &pipe->bufs[tail & mask]; 343 size_t chars = buf->len; 344 size_t written; 345 int error; 346 347 if (chars > total_len) { 348 if (buf->flags & PIPE_BUF_FLAG_WHOLE) { 349 if (ret == 0) 350 ret = -ENOBUFS; 351 break; 352 } 353 chars = total_len; 354 } 355 356 error = pipe_buf_confirm(pipe, buf); 357 if (error) { 358 if (!ret) 359 ret = error; 360 break; 361 } 362 363 written = copy_page_to_iter(buf->page, buf->offset, chars, to); 364 if (unlikely(written < chars)) { 365 if (!ret) 366 ret = -EFAULT; 367 break; 368 } 369 ret += chars; 370 buf->offset += chars; 371 buf->len -= chars; 372 373 /* Was it a packet buffer? Clean up and exit */ 374 if (buf->flags & PIPE_BUF_FLAG_PACKET) { 375 total_len = chars; 376 buf->len = 0; 377 } 378 379 if (!buf->len) { 380 pipe_buf_release(pipe, buf); 381 spin_lock_irq(&pipe->rd_wait.lock); 382 #ifdef CONFIG_WATCH_QUEUE 383 if (buf->flags & PIPE_BUF_FLAG_LOSS) 384 pipe->note_loss = true; 385 #endif 386 tail++; 387 pipe->tail = tail; 388 spin_unlock_irq(&pipe->rd_wait.lock); 389 } 390 total_len -= chars; 391 if (!total_len) 392 break; /* common path: read succeeded */ 393 if (!pipe_empty(head, tail)) /* More to do? */ 394 continue; 395 } 396 397 if (!pipe->writers) 398 break; 399 if (ret) 400 break; 401 if (filp->f_flags & O_NONBLOCK) { 402 ret = -EAGAIN; 403 break; 404 } 405 __pipe_unlock(pipe); 406 407 /* 408 * We only get here if we didn't actually read anything. 409 * 410 * However, we could have seen (and removed) a zero-sized 411 * pipe buffer, and might have made space in the buffers 412 * that way. 413 * 414 * You can't make zero-sized pipe buffers by doing an empty 415 * write (not even in packet mode), but they can happen if 416 * the writer gets an EFAULT when trying to fill a buffer 417 * that already got allocated and inserted in the buffer 418 * array. 419 * 420 * So we still need to wake up any pending writers in the 421 * _very_ unlikely case that the pipe was full, but we got 422 * no data. 423 */ 424 if (unlikely(was_full)) { 425 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); 426 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 427 } 428 429 /* 430 * But because we didn't read anything, at this point we can 431 * just return directly with -ERESTARTSYS if we're interrupted, 432 * since we've done any required wakeups and there's no need 433 * to mark anything accessed. And we've dropped the lock. 434 */ 435 if (wait_event_interruptible_exclusive(pipe->rd_wait, pipe_readable(pipe)) < 0) 436 return -ERESTARTSYS; 437 438 __pipe_lock(pipe); 439 was_full = pipe_full(pipe->head, pipe->tail, pipe->max_usage); 440 wake_next_reader = true; 441 } 442 if (pipe_empty(pipe->head, pipe->tail)) 443 wake_next_reader = false; 444 __pipe_unlock(pipe); 445 446 if (was_full) { 447 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); 448 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 449 } 450 if (wake_next_reader) 451 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); 452 if (ret > 0) 453 file_accessed(filp); 454 return ret; 455 } 456 457 static inline int is_packetized(struct file *file) 458 { 459 return (file->f_flags & O_DIRECT) != 0; 460 } 461 462 /* Done while waiting without holding the pipe lock - thus the READ_ONCE() */ 463 static inline bool pipe_writable(const struct pipe_inode_info *pipe) 464 { 465 unsigned int head = READ_ONCE(pipe->head); 466 unsigned int tail = READ_ONCE(pipe->tail); 467 unsigned int max_usage = READ_ONCE(pipe->max_usage); 468 469 return !pipe_full(head, tail, max_usage) || 470 !READ_ONCE(pipe->readers); 471 } 472 473 static ssize_t 474 pipe_write(struct kiocb *iocb, struct iov_iter *from) 475 { 476 struct file *filp = iocb->ki_filp; 477 struct pipe_inode_info *pipe = filp->private_data; 478 unsigned int head; 479 ssize_t ret = 0; 480 size_t total_len = iov_iter_count(from); 481 ssize_t chars; 482 bool was_empty = false; 483 bool wake_next_writer = false; 484 485 /* Null write succeeds. */ 486 if (unlikely(total_len == 0)) 487 return 0; 488 489 __pipe_lock(pipe); 490 491 if (!pipe->readers) { 492 send_sig(SIGPIPE, current, 0); 493 ret = -EPIPE; 494 goto out; 495 } 496 497 #ifdef CONFIG_WATCH_QUEUE 498 if (pipe->watch_queue) { 499 ret = -EXDEV; 500 goto out; 501 } 502 #endif 503 504 /* 505 * Only wake up if the pipe started out empty, since 506 * otherwise there should be no readers waiting. 507 * 508 * If it wasn't empty we try to merge new data into 509 * the last buffer. 510 * 511 * That naturally merges small writes, but it also 512 * page-aligs the rest of the writes for large writes 513 * spanning multiple pages. 514 */ 515 head = pipe->head; 516 was_empty = pipe_empty(head, pipe->tail); 517 chars = total_len & (PAGE_SIZE-1); 518 if (chars && !was_empty) { 519 unsigned int mask = pipe->ring_size - 1; 520 struct pipe_buffer *buf = &pipe->bufs[(head - 1) & mask]; 521 int offset = buf->offset + buf->len; 522 523 if (pipe_buf_can_merge(buf) && offset + chars <= PAGE_SIZE) { 524 ret = pipe_buf_confirm(pipe, buf); 525 if (ret) 526 goto out; 527 528 ret = copy_page_from_iter(buf->page, offset, chars, from); 529 if (unlikely(ret < chars)) { 530 ret = -EFAULT; 531 goto out; 532 } 533 534 buf->len += ret; 535 if (!iov_iter_count(from)) 536 goto out; 537 } 538 } 539 540 for (;;) { 541 if (!pipe->readers) { 542 send_sig(SIGPIPE, current, 0); 543 if (!ret) 544 ret = -EPIPE; 545 break; 546 } 547 548 head = pipe->head; 549 if (!pipe_full(head, pipe->tail, pipe->max_usage)) { 550 unsigned int mask = pipe->ring_size - 1; 551 struct pipe_buffer *buf = &pipe->bufs[head & mask]; 552 struct page *page = pipe->tmp_page; 553 int copied; 554 555 if (!page) { 556 page = alloc_page(GFP_HIGHUSER | __GFP_ACCOUNT); 557 if (unlikely(!page)) { 558 ret = ret ? : -ENOMEM; 559 break; 560 } 561 pipe->tmp_page = page; 562 } 563 564 /* Allocate a slot in the ring in advance and attach an 565 * empty buffer. If we fault or otherwise fail to use 566 * it, either the reader will consume it or it'll still 567 * be there for the next write. 568 */ 569 spin_lock_irq(&pipe->rd_wait.lock); 570 571 head = pipe->head; 572 if (pipe_full(head, pipe->tail, pipe->max_usage)) { 573 spin_unlock_irq(&pipe->rd_wait.lock); 574 continue; 575 } 576 577 pipe->head = head + 1; 578 spin_unlock_irq(&pipe->rd_wait.lock); 579 580 /* Insert it into the buffer array */ 581 buf = &pipe->bufs[head & mask]; 582 buf->page = page; 583 buf->ops = &anon_pipe_buf_ops; 584 buf->offset = 0; 585 buf->len = 0; 586 buf->flags = 0; 587 if (is_packetized(filp)) { 588 buf->ops = &packet_pipe_buf_ops; 589 buf->flags = PIPE_BUF_FLAG_PACKET; 590 } 591 pipe->tmp_page = NULL; 592 593 copied = copy_page_from_iter(page, 0, PAGE_SIZE, from); 594 if (unlikely(copied < PAGE_SIZE && iov_iter_count(from))) { 595 if (!ret) 596 ret = -EFAULT; 597 break; 598 } 599 ret += copied; 600 buf->offset = 0; 601 buf->len = copied; 602 603 if (!iov_iter_count(from)) 604 break; 605 } 606 607 if (!pipe_full(head, pipe->tail, pipe->max_usage)) 608 continue; 609 610 /* Wait for buffer space to become available. */ 611 if (filp->f_flags & O_NONBLOCK) { 612 if (!ret) 613 ret = -EAGAIN; 614 break; 615 } 616 if (signal_pending(current)) { 617 if (!ret) 618 ret = -ERESTARTSYS; 619 break; 620 } 621 622 /* 623 * We're going to release the pipe lock and wait for more 624 * space. We wake up any readers if necessary, and then 625 * after waiting we need to re-check whether the pipe 626 * become empty while we dropped the lock. 627 */ 628 __pipe_unlock(pipe); 629 if (was_empty) { 630 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); 631 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 632 } 633 wait_event_interruptible_exclusive(pipe->wr_wait, pipe_writable(pipe)); 634 __pipe_lock(pipe); 635 was_empty = pipe_empty(pipe->head, pipe->tail); 636 wake_next_writer = true; 637 } 638 out: 639 if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) 640 wake_next_writer = false; 641 __pipe_unlock(pipe); 642 643 /* 644 * If we do do a wakeup event, we do a 'sync' wakeup, because we 645 * want the reader to start processing things asap, rather than 646 * leave the data pending. 647 * 648 * This is particularly important for small writes, because of 649 * how (for example) the GNU make jobserver uses small writes to 650 * wake up pending jobs 651 */ 652 if (was_empty) { 653 wake_up_interruptible_sync_poll(&pipe->rd_wait, EPOLLIN | EPOLLRDNORM); 654 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 655 } 656 if (wake_next_writer) 657 wake_up_interruptible_sync_poll(&pipe->wr_wait, EPOLLOUT | EPOLLWRNORM); 658 if (ret > 0 && sb_start_write_trylock(file_inode(filp)->i_sb)) { 659 int err = file_update_time(filp); 660 if (err) 661 ret = err; 662 sb_end_write(file_inode(filp)->i_sb); 663 } 664 return ret; 665 } 666 667 static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) 668 { 669 struct pipe_inode_info *pipe = filp->private_data; 670 int count, head, tail, mask; 671 672 switch (cmd) { 673 case FIONREAD: 674 __pipe_lock(pipe); 675 count = 0; 676 head = pipe->head; 677 tail = pipe->tail; 678 mask = pipe->ring_size - 1; 679 680 while (tail != head) { 681 count += pipe->bufs[tail & mask].len; 682 tail++; 683 } 684 __pipe_unlock(pipe); 685 686 return put_user(count, (int __user *)arg); 687 688 #ifdef CONFIG_WATCH_QUEUE 689 case IOC_WATCH_QUEUE_SET_SIZE: { 690 int ret; 691 __pipe_lock(pipe); 692 ret = watch_queue_set_size(pipe, arg); 693 __pipe_unlock(pipe); 694 return ret; 695 } 696 697 case IOC_WATCH_QUEUE_SET_FILTER: 698 return watch_queue_set_filter( 699 pipe, (struct watch_notification_filter __user *)arg); 700 #endif 701 702 default: 703 return -ENOIOCTLCMD; 704 } 705 } 706 707 /* No kernel lock held - fine */ 708 static __poll_t 709 pipe_poll(struct file *filp, poll_table *wait) 710 { 711 __poll_t mask; 712 struct pipe_inode_info *pipe = filp->private_data; 713 unsigned int head, tail; 714 715 /* 716 * Reading pipe state only -- no need for acquiring the semaphore. 717 * 718 * But because this is racy, the code has to add the 719 * entry to the poll table _first_ .. 720 */ 721 if (filp->f_mode & FMODE_READ) 722 poll_wait(filp, &pipe->rd_wait, wait); 723 if (filp->f_mode & FMODE_WRITE) 724 poll_wait(filp, &pipe->wr_wait, wait); 725 726 /* 727 * .. and only then can you do the racy tests. That way, 728 * if something changes and you got it wrong, the poll 729 * table entry will wake you up and fix it. 730 */ 731 head = READ_ONCE(pipe->head); 732 tail = READ_ONCE(pipe->tail); 733 734 mask = 0; 735 if (filp->f_mode & FMODE_READ) { 736 if (!pipe_empty(head, tail)) 737 mask |= EPOLLIN | EPOLLRDNORM; 738 if (!pipe->writers && filp->f_version != pipe->w_counter) 739 mask |= EPOLLHUP; 740 } 741 742 if (filp->f_mode & FMODE_WRITE) { 743 if (!pipe_full(head, tail, pipe->max_usage)) 744 mask |= EPOLLOUT | EPOLLWRNORM; 745 /* 746 * Most Unices do not set EPOLLERR for FIFOs but on Linux they 747 * behave exactly like pipes for poll(). 748 */ 749 if (!pipe->readers) 750 mask |= EPOLLERR; 751 } 752 753 return mask; 754 } 755 756 static void put_pipe_info(struct inode *inode, struct pipe_inode_info *pipe) 757 { 758 int kill = 0; 759 760 spin_lock(&inode->i_lock); 761 if (!--pipe->files) { 762 inode->i_pipe = NULL; 763 kill = 1; 764 } 765 spin_unlock(&inode->i_lock); 766 767 if (kill) 768 free_pipe_info(pipe); 769 } 770 771 static int 772 pipe_release(struct inode *inode, struct file *file) 773 { 774 struct pipe_inode_info *pipe = file->private_data; 775 776 __pipe_lock(pipe); 777 if (file->f_mode & FMODE_READ) 778 pipe->readers--; 779 if (file->f_mode & FMODE_WRITE) 780 pipe->writers--; 781 782 /* Was that the last reader or writer, but not the other side? */ 783 if (!pipe->readers != !pipe->writers) { 784 wake_up_interruptible_all(&pipe->rd_wait); 785 wake_up_interruptible_all(&pipe->wr_wait); 786 kill_fasync(&pipe->fasync_readers, SIGIO, POLL_IN); 787 kill_fasync(&pipe->fasync_writers, SIGIO, POLL_OUT); 788 } 789 __pipe_unlock(pipe); 790 791 put_pipe_info(inode, pipe); 792 return 0; 793 } 794 795 static int 796 pipe_fasync(int fd, struct file *filp, int on) 797 { 798 struct pipe_inode_info *pipe = filp->private_data; 799 int retval = 0; 800 801 __pipe_lock(pipe); 802 if (filp->f_mode & FMODE_READ) 803 retval = fasync_helper(fd, filp, on, &pipe->fasync_readers); 804 if ((filp->f_mode & FMODE_WRITE) && retval >= 0) { 805 retval = fasync_helper(fd, filp, on, &pipe->fasync_writers); 806 if (retval < 0 && (filp->f_mode & FMODE_READ)) 807 /* this can happen only if on == T */ 808 fasync_helper(-1, filp, 0, &pipe->fasync_readers); 809 } 810 __pipe_unlock(pipe); 811 return retval; 812 } 813 814 unsigned long account_pipe_buffers(struct user_struct *user, 815 unsigned long old, unsigned long new) 816 { 817 return atomic_long_add_return(new - old, &user->pipe_bufs); 818 } 819 820 bool too_many_pipe_buffers_soft(unsigned long user_bufs) 821 { 822 unsigned long soft_limit = READ_ONCE(pipe_user_pages_soft); 823 824 return soft_limit && user_bufs > soft_limit; 825 } 826 827 bool too_many_pipe_buffers_hard(unsigned long user_bufs) 828 { 829 unsigned long hard_limit = READ_ONCE(pipe_user_pages_hard); 830 831 return hard_limit && user_bufs > hard_limit; 832 } 833 834 bool pipe_is_unprivileged_user(void) 835 { 836 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); 837 } 838 839 struct pipe_inode_info *alloc_pipe_info(void) 840 { 841 struct pipe_inode_info *pipe; 842 unsigned long pipe_bufs = PIPE_DEF_BUFFERS; 843 struct user_struct *user = get_current_user(); 844 unsigned long user_bufs; 845 unsigned int max_size = READ_ONCE(pipe_max_size); 846 847 pipe = kzalloc(sizeof(struct pipe_inode_info), GFP_KERNEL_ACCOUNT); 848 if (pipe == NULL) 849 goto out_free_uid; 850 851 if (pipe_bufs * PAGE_SIZE > max_size && !capable(CAP_SYS_RESOURCE)) 852 pipe_bufs = max_size >> PAGE_SHIFT; 853 854 user_bufs = account_pipe_buffers(user, 0, pipe_bufs); 855 856 if (too_many_pipe_buffers_soft(user_bufs) && pipe_is_unprivileged_user()) { 857 user_bufs = account_pipe_buffers(user, pipe_bufs, 1); 858 pipe_bufs = 1; 859 } 860 861 if (too_many_pipe_buffers_hard(user_bufs) && pipe_is_unprivileged_user()) 862 goto out_revert_acct; 863 864 pipe->bufs = kcalloc(pipe_bufs, sizeof(struct pipe_buffer), 865 GFP_KERNEL_ACCOUNT); 866 867 if (pipe->bufs) { 868 init_waitqueue_head(&pipe->rd_wait); 869 init_waitqueue_head(&pipe->wr_wait); 870 pipe->r_counter = pipe->w_counter = 1; 871 pipe->max_usage = pipe_bufs; 872 pipe->ring_size = pipe_bufs; 873 pipe->nr_accounted = pipe_bufs; 874 pipe->user = user; 875 mutex_init(&pipe->mutex); 876 return pipe; 877 } 878 879 out_revert_acct: 880 (void) account_pipe_buffers(user, pipe_bufs, 0); 881 kfree(pipe); 882 out_free_uid: 883 free_uid(user); 884 return NULL; 885 } 886 887 void free_pipe_info(struct pipe_inode_info *pipe) 888 { 889 int i; 890 891 #ifdef CONFIG_WATCH_QUEUE 892 if (pipe->watch_queue) { 893 watch_queue_clear(pipe->watch_queue); 894 put_watch_queue(pipe->watch_queue); 895 } 896 #endif 897 898 (void) account_pipe_buffers(pipe->user, pipe->nr_accounted, 0); 899 free_uid(pipe->user); 900 for (i = 0; i < pipe->ring_size; i++) { 901 struct pipe_buffer *buf = pipe->bufs + i; 902 if (buf->ops) 903 pipe_buf_release(pipe, buf); 904 } 905 if (pipe->tmp_page) 906 __free_page(pipe->tmp_page); 907 kfree(pipe->bufs); 908 kfree(pipe); 909 } 910 911 static struct vfsmount *pipe_mnt __read_mostly; 912 913 /* 914 * pipefs_dname() is called from d_path(). 915 */ 916 static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) 917 { 918 return dynamic_dname(dentry, buffer, buflen, "pipe:[%lu]", 919 d_inode(dentry)->i_ino); 920 } 921 922 static const struct dentry_operations pipefs_dentry_operations = { 923 .d_dname = pipefs_dname, 924 }; 925 926 static struct inode * get_pipe_inode(void) 927 { 928 struct inode *inode = new_inode_pseudo(pipe_mnt->mnt_sb); 929 struct pipe_inode_info *pipe; 930 931 if (!inode) 932 goto fail_inode; 933 934 inode->i_ino = get_next_ino(); 935 936 pipe = alloc_pipe_info(); 937 if (!pipe) 938 goto fail_iput; 939 940 inode->i_pipe = pipe; 941 pipe->files = 2; 942 pipe->readers = pipe->writers = 1; 943 inode->i_fop = &pipefifo_fops; 944 945 /* 946 * Mark the inode dirty from the very beginning, 947 * that way it will never be moved to the dirty 948 * list because "mark_inode_dirty()" will think 949 * that it already _is_ on the dirty list. 950 */ 951 inode->i_state = I_DIRTY; 952 inode->i_mode = S_IFIFO | S_IRUSR | S_IWUSR; 953 inode->i_uid = current_fsuid(); 954 inode->i_gid = current_fsgid(); 955 inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode); 956 957 return inode; 958 959 fail_iput: 960 iput(inode); 961 962 fail_inode: 963 return NULL; 964 } 965 966 int create_pipe_files(struct file **res, int flags) 967 { 968 struct inode *inode = get_pipe_inode(); 969 struct file *f; 970 971 if (!inode) 972 return -ENFILE; 973 974 if (flags & O_NOTIFICATION_PIPE) { 975 #ifdef CONFIG_WATCH_QUEUE 976 if (watch_queue_init(inode->i_pipe) < 0) { 977 iput(inode); 978 return -ENOMEM; 979 } 980 #else 981 return -ENOPKG; 982 #endif 983 } 984 985 f = alloc_file_pseudo(inode, pipe_mnt, "", 986 O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)), 987 &pipefifo_fops); 988 if (IS_ERR(f)) { 989 free_pipe_info(inode->i_pipe); 990 iput(inode); 991 return PTR_ERR(f); 992 } 993 994 f->private_data = inode->i_pipe; 995 996 res[0] = alloc_file_clone(f, O_RDONLY | (flags & O_NONBLOCK), 997 &pipefifo_fops); 998 if (IS_ERR(res[0])) { 999 put_pipe_info(inode, inode->i_pipe); 1000 fput(f); 1001 return PTR_ERR(res[0]); 1002 } 1003 res[0]->private_data = inode->i_pipe; 1004 res[1] = f; 1005 stream_open(inode, res[0]); 1006 stream_open(inode, res[1]); 1007 return 0; 1008 } 1009 1010 static int __do_pipe_flags(int *fd, struct file **files, int flags) 1011 { 1012 int error; 1013 int fdw, fdr; 1014 1015 if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT | O_NOTIFICATION_PIPE)) 1016 return -EINVAL; 1017 1018 error = create_pipe_files(files, flags); 1019 if (error) 1020 return error; 1021 1022 error = get_unused_fd_flags(flags); 1023 if (error < 0) 1024 goto err_read_pipe; 1025 fdr = error; 1026 1027 error = get_unused_fd_flags(flags); 1028 if (error < 0) 1029 goto err_fdr; 1030 fdw = error; 1031 1032 audit_fd_pair(fdr, fdw); 1033 fd[0] = fdr; 1034 fd[1] = fdw; 1035 return 0; 1036 1037 err_fdr: 1038 put_unused_fd(fdr); 1039 err_read_pipe: 1040 fput(files[0]); 1041 fput(files[1]); 1042 return error; 1043 } 1044 1045 int do_pipe_flags(int *fd, int flags) 1046 { 1047 struct file *files[2]; 1048 int error = __do_pipe_flags(fd, files, flags); 1049 if (!error) { 1050 fd_install(fd[0], files[0]); 1051 fd_install(fd[1], files[1]); 1052 } 1053 return error; 1054 } 1055 1056 /* 1057 * sys_pipe() is the normal C calling standard for creating 1058 * a pipe. It's not the way Unix traditionally does this, though. 1059 */ 1060 static int do_pipe2(int __user *fildes, int flags) 1061 { 1062 struct file *files[2]; 1063 int fd[2]; 1064 int error; 1065 1066 error = __do_pipe_flags(fd, files, flags); 1067 if (!error) { 1068 if (unlikely(copy_to_user(fildes, fd, sizeof(fd)))) { 1069 fput(files[0]); 1070 fput(files[1]); 1071 put_unused_fd(fd[0]); 1072 put_unused_fd(fd[1]); 1073 error = -EFAULT; 1074 } else { 1075 fd_install(fd[0], files[0]); 1076 fd_install(fd[1], files[1]); 1077 } 1078 } 1079 return error; 1080 } 1081 1082 SYSCALL_DEFINE2(pipe2, int __user *, fildes, int, flags) 1083 { 1084 return do_pipe2(fildes, flags); 1085 } 1086 1087 SYSCALL_DEFINE1(pipe, int __user *, fildes) 1088 { 1089 return do_pipe2(fildes, 0); 1090 } 1091 1092 static int wait_for_partner(struct pipe_inode_info *pipe, unsigned int *cnt) 1093 { 1094 int cur = *cnt; 1095 1096 while (cur == *cnt) { 1097 pipe_wait(pipe); 1098 if (signal_pending(current)) 1099 break; 1100 } 1101 return cur == *cnt ? -ERESTARTSYS : 0; 1102 } 1103 1104 static void wake_up_partner(struct pipe_inode_info *pipe) 1105 { 1106 wake_up_interruptible_all(&pipe->rd_wait); 1107 wake_up_interruptible_all(&pipe->wr_wait); 1108 } 1109 1110 static int fifo_open(struct inode *inode, struct file *filp) 1111 { 1112 struct pipe_inode_info *pipe; 1113 bool is_pipe = inode->i_sb->s_magic == PIPEFS_MAGIC; 1114 int ret; 1115 1116 filp->f_version = 0; 1117 1118 spin_lock(&inode->i_lock); 1119 if (inode->i_pipe) { 1120 pipe = inode->i_pipe; 1121 pipe->files++; 1122 spin_unlock(&inode->i_lock); 1123 } else { 1124 spin_unlock(&inode->i_lock); 1125 pipe = alloc_pipe_info(); 1126 if (!pipe) 1127 return -ENOMEM; 1128 pipe->files = 1; 1129 spin_lock(&inode->i_lock); 1130 if (unlikely(inode->i_pipe)) { 1131 inode->i_pipe->files++; 1132 spin_unlock(&inode->i_lock); 1133 free_pipe_info(pipe); 1134 pipe = inode->i_pipe; 1135 } else { 1136 inode->i_pipe = pipe; 1137 spin_unlock(&inode->i_lock); 1138 } 1139 } 1140 filp->private_data = pipe; 1141 /* OK, we have a pipe and it's pinned down */ 1142 1143 __pipe_lock(pipe); 1144 1145 /* We can only do regular read/write on fifos */ 1146 stream_open(inode, filp); 1147 1148 switch (filp->f_mode & (FMODE_READ | FMODE_WRITE)) { 1149 case FMODE_READ: 1150 /* 1151 * O_RDONLY 1152 * POSIX.1 says that O_NONBLOCK means return with the FIFO 1153 * opened, even when there is no process writing the FIFO. 1154 */ 1155 pipe->r_counter++; 1156 if (pipe->readers++ == 0) 1157 wake_up_partner(pipe); 1158 1159 if (!is_pipe && !pipe->writers) { 1160 if ((filp->f_flags & O_NONBLOCK)) { 1161 /* suppress EPOLLHUP until we have 1162 * seen a writer */ 1163 filp->f_version = pipe->w_counter; 1164 } else { 1165 if (wait_for_partner(pipe, &pipe->w_counter)) 1166 goto err_rd; 1167 } 1168 } 1169 break; 1170 1171 case FMODE_WRITE: 1172 /* 1173 * O_WRONLY 1174 * POSIX.1 says that O_NONBLOCK means return -1 with 1175 * errno=ENXIO when there is no process reading the FIFO. 1176 */ 1177 ret = -ENXIO; 1178 if (!is_pipe && (filp->f_flags & O_NONBLOCK) && !pipe->readers) 1179 goto err; 1180 1181 pipe->w_counter++; 1182 if (!pipe->writers++) 1183 wake_up_partner(pipe); 1184 1185 if (!is_pipe && !pipe->readers) { 1186 if (wait_for_partner(pipe, &pipe->r_counter)) 1187 goto err_wr; 1188 } 1189 break; 1190 1191 case FMODE_READ | FMODE_WRITE: 1192 /* 1193 * O_RDWR 1194 * POSIX.1 leaves this case "undefined" when O_NONBLOCK is set. 1195 * This implementation will NEVER block on a O_RDWR open, since 1196 * the process can at least talk to itself. 1197 */ 1198 1199 pipe->readers++; 1200 pipe->writers++; 1201 pipe->r_counter++; 1202 pipe->w_counter++; 1203 if (pipe->readers == 1 || pipe->writers == 1) 1204 wake_up_partner(pipe); 1205 break; 1206 1207 default: 1208 ret = -EINVAL; 1209 goto err; 1210 } 1211 1212 /* Ok! */ 1213 __pipe_unlock(pipe); 1214 return 0; 1215 1216 err_rd: 1217 if (!--pipe->readers) 1218 wake_up_interruptible(&pipe->wr_wait); 1219 ret = -ERESTARTSYS; 1220 goto err; 1221 1222 err_wr: 1223 if (!--pipe->writers) 1224 wake_up_interruptible_all(&pipe->rd_wait); 1225 ret = -ERESTARTSYS; 1226 goto err; 1227 1228 err: 1229 __pipe_unlock(pipe); 1230 1231 put_pipe_info(inode, pipe); 1232 return ret; 1233 } 1234 1235 const struct file_operations pipefifo_fops = { 1236 .open = fifo_open, 1237 .llseek = no_llseek, 1238 .read_iter = pipe_read, 1239 .write_iter = pipe_write, 1240 .poll = pipe_poll, 1241 .unlocked_ioctl = pipe_ioctl, 1242 .release = pipe_release, 1243 .fasync = pipe_fasync, 1244 }; 1245 1246 /* 1247 * Currently we rely on the pipe array holding a power-of-2 number 1248 * of pages. Returns 0 on error. 1249 */ 1250 unsigned int round_pipe_size(unsigned long size) 1251 { 1252 if (size > (1U << 31)) 1253 return 0; 1254 1255 /* Minimum pipe size, as required by POSIX */ 1256 if (size < PAGE_SIZE) 1257 return PAGE_SIZE; 1258 1259 return roundup_pow_of_two(size); 1260 } 1261 1262 /* 1263 * Resize the pipe ring to a number of slots. 1264 */ 1265 int pipe_resize_ring(struct pipe_inode_info *pipe, unsigned int nr_slots) 1266 { 1267 struct pipe_buffer *bufs; 1268 unsigned int head, tail, mask, n; 1269 1270 /* 1271 * We can shrink the pipe, if arg is greater than the ring occupancy. 1272 * Since we don't expect a lot of shrink+grow operations, just free and 1273 * allocate again like we would do for growing. If the pipe currently 1274 * contains more buffers than arg, then return busy. 1275 */ 1276 mask = pipe->ring_size - 1; 1277 head = pipe->head; 1278 tail = pipe->tail; 1279 n = pipe_occupancy(pipe->head, pipe->tail); 1280 if (nr_slots < n) 1281 return -EBUSY; 1282 1283 bufs = kcalloc(nr_slots, sizeof(*bufs), 1284 GFP_KERNEL_ACCOUNT | __GFP_NOWARN); 1285 if (unlikely(!bufs)) 1286 return -ENOMEM; 1287 1288 /* 1289 * The pipe array wraps around, so just start the new one at zero 1290 * and adjust the indices. 1291 */ 1292 if (n > 0) { 1293 unsigned int h = head & mask; 1294 unsigned int t = tail & mask; 1295 if (h > t) { 1296 memcpy(bufs, pipe->bufs + t, 1297 n * sizeof(struct pipe_buffer)); 1298 } else { 1299 unsigned int tsize = pipe->ring_size - t; 1300 if (h > 0) 1301 memcpy(bufs + tsize, pipe->bufs, 1302 h * sizeof(struct pipe_buffer)); 1303 memcpy(bufs, pipe->bufs + t, 1304 tsize * sizeof(struct pipe_buffer)); 1305 } 1306 } 1307 1308 head = n; 1309 tail = 0; 1310 1311 kfree(pipe->bufs); 1312 pipe->bufs = bufs; 1313 pipe->ring_size = nr_slots; 1314 if (pipe->max_usage > nr_slots) 1315 pipe->max_usage = nr_slots; 1316 pipe->tail = tail; 1317 pipe->head = head; 1318 1319 /* This might have made more room for writers */ 1320 wake_up_interruptible(&pipe->wr_wait); 1321 return 0; 1322 } 1323 1324 /* 1325 * Allocate a new array of pipe buffers and copy the info over. Returns the 1326 * pipe size if successful, or return -ERROR on error. 1327 */ 1328 static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) 1329 { 1330 unsigned long user_bufs; 1331 unsigned int nr_slots, size; 1332 long ret = 0; 1333 1334 #ifdef CONFIG_WATCH_QUEUE 1335 if (pipe->watch_queue) 1336 return -EBUSY; 1337 #endif 1338 1339 size = round_pipe_size(arg); 1340 nr_slots = size >> PAGE_SHIFT; 1341 1342 if (!nr_slots) 1343 return -EINVAL; 1344 1345 /* 1346 * If trying to increase the pipe capacity, check that an 1347 * unprivileged user is not trying to exceed various limits 1348 * (soft limit check here, hard limit check just below). 1349 * Decreasing the pipe capacity is always permitted, even 1350 * if the user is currently over a limit. 1351 */ 1352 if (nr_slots > pipe->max_usage && 1353 size > pipe_max_size && !capable(CAP_SYS_RESOURCE)) 1354 return -EPERM; 1355 1356 user_bufs = account_pipe_buffers(pipe->user, pipe->nr_accounted, nr_slots); 1357 1358 if (nr_slots > pipe->max_usage && 1359 (too_many_pipe_buffers_hard(user_bufs) || 1360 too_many_pipe_buffers_soft(user_bufs)) && 1361 pipe_is_unprivileged_user()) { 1362 ret = -EPERM; 1363 goto out_revert_acct; 1364 } 1365 1366 ret = pipe_resize_ring(pipe, nr_slots); 1367 if (ret < 0) 1368 goto out_revert_acct; 1369 1370 pipe->max_usage = nr_slots; 1371 pipe->nr_accounted = nr_slots; 1372 return pipe->max_usage * PAGE_SIZE; 1373 1374 out_revert_acct: 1375 (void) account_pipe_buffers(pipe->user, nr_slots, pipe->nr_accounted); 1376 return ret; 1377 } 1378 1379 /* 1380 * After the inode slimming patch, i_pipe/i_bdev/i_cdev share the same 1381 * location, so checking ->i_pipe is not enough to verify that this is a 1382 * pipe. 1383 */ 1384 struct pipe_inode_info *get_pipe_info(struct file *file, bool for_splice) 1385 { 1386 struct pipe_inode_info *pipe = file->private_data; 1387 1388 if (file->f_op != &pipefifo_fops || !pipe) 1389 return NULL; 1390 #ifdef CONFIG_WATCH_QUEUE 1391 if (for_splice && pipe->watch_queue) 1392 return NULL; 1393 #endif 1394 return pipe; 1395 } 1396 1397 long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) 1398 { 1399 struct pipe_inode_info *pipe; 1400 long ret; 1401 1402 pipe = get_pipe_info(file, false); 1403 if (!pipe) 1404 return -EBADF; 1405 1406 __pipe_lock(pipe); 1407 1408 switch (cmd) { 1409 case F_SETPIPE_SZ: 1410 ret = pipe_set_size(pipe, arg); 1411 break; 1412 case F_GETPIPE_SZ: 1413 ret = pipe->max_usage * PAGE_SIZE; 1414 break; 1415 default: 1416 ret = -EINVAL; 1417 break; 1418 } 1419 1420 __pipe_unlock(pipe); 1421 return ret; 1422 } 1423 1424 static const struct super_operations pipefs_ops = { 1425 .destroy_inode = free_inode_nonrcu, 1426 .statfs = simple_statfs, 1427 }; 1428 1429 /* 1430 * pipefs should _never_ be mounted by userland - too much of security hassle, 1431 * no real gain from having the whole whorehouse mounted. So we don't need 1432 * any operations on the root directory. However, we need a non-trivial 1433 * d_name - pipe: will go nicely and kill the special-casing in procfs. 1434 */ 1435 1436 static int pipefs_init_fs_context(struct fs_context *fc) 1437 { 1438 struct pseudo_fs_context *ctx = init_pseudo(fc, PIPEFS_MAGIC); 1439 if (!ctx) 1440 return -ENOMEM; 1441 ctx->ops = &pipefs_ops; 1442 ctx->dops = &pipefs_dentry_operations; 1443 return 0; 1444 } 1445 1446 static struct file_system_type pipe_fs_type = { 1447 .name = "pipefs", 1448 .init_fs_context = pipefs_init_fs_context, 1449 .kill_sb = kill_anon_super, 1450 }; 1451 1452 static int __init init_pipe_fs(void) 1453 { 1454 int err = register_filesystem(&pipe_fs_type); 1455 1456 if (!err) { 1457 pipe_mnt = kern_mount(&pipe_fs_type); 1458 if (IS_ERR(pipe_mnt)) { 1459 err = PTR_ERR(pipe_mnt); 1460 unregister_filesystem(&pipe_fs_type); 1461 } 1462 } 1463 return err; 1464 } 1465 1466 fs_initcall(init_pipe_fs); 1467