1 /* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 */ 19 20 /* 21 * This file contains a high-performance replacement for the socket-based 22 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 23 * all features of sockets, but does do everything that pipes normally 24 * do. 25 */ 26 27 /* 28 * This code has two modes of operation, a small write mode and a large 29 * write mode. The small write mode acts like conventional pipes with 30 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 31 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 32 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 33 * the receiving process can copy it directly from the pages in the sending 34 * process. 35 * 36 * If the sending process receives a signal, it is possible that it will 37 * go away, and certainly its address space can change, because control 38 * is returned back to the user-mode side. In that case, the pipe code 39 * arranges to copy the buffer supplied by the user process, to a pageable 40 * kernel buffer, and the receiving process will grab the data from the 41 * pageable kernel buffer. Since signals don't happen all that often, 42 * the copy operation is normally eliminated. 43 * 44 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 45 * happen for small transfers so that the system will not spend all of 46 * its time context switching. 47 * 48 * In order to limit the resource use of pipes, two sysctls exist: 49 * 50 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 51 * address space available to us in pipe_map. Whenever the amount in use 52 * exceeds half of this value, all new pipes will be created with size 53 * SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited 54 * as well. This value is loader tunable only. 55 * 56 * These values are autotuned in subr_param.c. 57 * 58 * Memory usage may be monitored through the sysctls 59 * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired. 60 * 61 * 62 * Locking rules: There are two locks present here: A mutex, used via 63 * PIPE_LOCK, and a flag, used via pipelock(). All locking is done via 64 * the flag, as mutexes can not persist over uiomove. The mutex 65 * exists only to guard access to the flag, and is not in itself a 66 * locking mechanism. 67 * 68 * As pipelock() may have to sleep before it can acquire the flag, it 69 * is important to reread all data after a call to pipelock(); everything 70 * in the structure may have changed. 71 */ 72 73 #include <sys/cdefs.h> 74 __FBSDID("$FreeBSD$"); 75 76 #include "opt_mac.h" 77 78 #include <sys/param.h> 79 #include <sys/systm.h> 80 #include <sys/fcntl.h> 81 #include <sys/file.h> 82 #include <sys/filedesc.h> 83 #include <sys/filio.h> 84 #include <sys/kernel.h> 85 #include <sys/lock.h> 86 #include <sys/mac.h> 87 #include <sys/mutex.h> 88 #include <sys/ttycom.h> 89 #include <sys/stat.h> 90 #include <sys/malloc.h> 91 #include <sys/poll.h> 92 #include <sys/selinfo.h> 93 #include <sys/signalvar.h> 94 #include <sys/sysctl.h> 95 #include <sys/sysproto.h> 96 #include <sys/pipe.h> 97 #include <sys/proc.h> 98 #include <sys/vnode.h> 99 #include <sys/uio.h> 100 #include <sys/event.h> 101 102 #include <vm/vm.h> 103 #include <vm/vm_param.h> 104 #include <vm/vm_object.h> 105 #include <vm/vm_kern.h> 106 #include <vm/vm_extern.h> 107 #include <vm/pmap.h> 108 #include <vm/vm_map.h> 109 #include <vm/vm_page.h> 110 #include <vm/uma.h> 111 112 /* 113 * Use this define if you want to disable *fancy* VM things. Expect an 114 * approx 30% decrease in transfer rate. This could be useful for 115 * NetBSD or OpenBSD. 116 */ 117 /* #define PIPE_NODIRECT */ 118 119 /* 120 * interfaces to the outside world 121 */ 122 static fo_rdwr_t pipe_read; 123 static fo_rdwr_t pipe_write; 124 static fo_ioctl_t pipe_ioctl; 125 static fo_poll_t pipe_poll; 126 static fo_kqfilter_t pipe_kqfilter; 127 static fo_stat_t pipe_stat; 128 static fo_close_t pipe_close; 129 130 static struct fileops pipeops = { 131 .fo_read = pipe_read, 132 .fo_write = pipe_write, 133 .fo_ioctl = pipe_ioctl, 134 .fo_poll = pipe_poll, 135 .fo_kqfilter = pipe_kqfilter, 136 .fo_stat = pipe_stat, 137 .fo_close = pipe_close, 138 .fo_flags = DFLAG_PASSABLE 139 }; 140 141 static void filt_pipedetach(struct knote *kn); 142 static int filt_piperead(struct knote *kn, long hint); 143 static int filt_pipewrite(struct knote *kn, long hint); 144 145 static struct filterops pipe_rfiltops = 146 { 1, NULL, filt_pipedetach, filt_piperead }; 147 static struct filterops pipe_wfiltops = 148 { 1, NULL, filt_pipedetach, filt_pipewrite }; 149 150 /* 151 * Default pipe buffer size(s), this can be kind-of large now because pipe 152 * space is pageable. The pipe code will try to maintain locality of 153 * reference for performance reasons, so small amounts of outstanding I/O 154 * will not wipe the cache. 155 */ 156 #define MINPIPESIZE (PIPE_SIZE/3) 157 #define MAXPIPESIZE (2*PIPE_SIZE/3) 158 159 /* 160 * Limit the number of "big" pipes 161 */ 162 #define LIMITBIGPIPES 32 163 static int nbigpipe; 164 165 static int amountpipes; 166 static int amountpipekva; 167 168 SYSCTL_DECL(_kern_ipc); 169 170 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN, 171 &maxpipekva, 0, "Pipe KVA limit"); 172 SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD, 173 &amountpipes, 0, "Current # of pipes"); 174 SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD, 175 &nbigpipe, 0, "Current # of big pipes"); 176 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 177 &amountpipekva, 0, "Pipe KVA usage"); 178 179 static void pipeinit(void *dummy __unused); 180 static void pipeclose(struct pipe *cpipe); 181 static void pipe_free_kmem(struct pipe *cpipe); 182 static int pipe_create(struct pipe *pipe); 183 static __inline int pipelock(struct pipe *cpipe, int catch); 184 static __inline void pipeunlock(struct pipe *cpipe); 185 static __inline void pipeselwakeup(struct pipe *cpipe); 186 #ifndef PIPE_NODIRECT 187 static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 188 static void pipe_destroy_write_buffer(struct pipe *wpipe); 189 static int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 190 static void pipe_clone_write_buffer(struct pipe *wpipe); 191 #endif 192 static int pipespace(struct pipe *cpipe, int size); 193 static int pipespace_new(struct pipe *cpipe, int size); 194 195 static int pipe_zone_ctor(void *mem, int size, void *arg, int flags); 196 static void pipe_zone_dtor(void *mem, int size, void *arg); 197 static int pipe_zone_init(void *mem, int size, int flags); 198 static void pipe_zone_fini(void *mem, int size); 199 200 static uma_zone_t pipe_zone; 201 202 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 203 204 static void 205 pipeinit(void *dummy __unused) 206 { 207 208 pipe_zone = uma_zcreate("PIPE", sizeof(struct pipepair), 209 pipe_zone_ctor, pipe_zone_dtor, pipe_zone_init, pipe_zone_fini, 210 UMA_ALIGN_PTR, 0); 211 KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 212 } 213 214 static int 215 pipe_zone_ctor(void *mem, int size, void *arg, int flags) 216 { 217 struct pipepair *pp; 218 struct pipe *rpipe, *wpipe; 219 220 KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size")); 221 222 pp = (struct pipepair *)mem; 223 224 /* 225 * We zero both pipe endpoints to make sure all the kmem pointers 226 * are NULL, flag fields are zero'd, etc. We timestamp both 227 * endpoints with the same time. 228 */ 229 rpipe = &pp->pp_rpipe; 230 bzero(rpipe, sizeof(*rpipe)); 231 vfs_timestamp(&rpipe->pipe_ctime); 232 rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; 233 234 wpipe = &pp->pp_wpipe; 235 bzero(wpipe, sizeof(*wpipe)); 236 wpipe->pipe_ctime = rpipe->pipe_ctime; 237 wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; 238 239 rpipe->pipe_peer = wpipe; 240 rpipe->pipe_pair = pp; 241 wpipe->pipe_peer = rpipe; 242 wpipe->pipe_pair = pp; 243 244 /* 245 * Mark both endpoints as present; they will later get free'd 246 * one at a time. When both are free'd, then the whole pair 247 * is released. 248 */ 249 rpipe->pipe_present = 1; 250 wpipe->pipe_present = 1; 251 252 /* 253 * Eventually, the MAC Framework may initialize the label 254 * in ctor or init, but for now we do it elswhere to avoid 255 * blocking in ctor or init. 256 */ 257 pp->pp_label = NULL; 258 259 atomic_add_int(&amountpipes, 2); 260 return (0); 261 } 262 263 static void 264 pipe_zone_dtor(void *mem, int size, void *arg) 265 { 266 struct pipepair *pp; 267 268 KASSERT(size == sizeof(*pp), ("pipe_zone_dtor: wrong size")); 269 270 pp = (struct pipepair *)mem; 271 272 atomic_subtract_int(&amountpipes, 2); 273 } 274 275 static int 276 pipe_zone_init(void *mem, int size, int flags) 277 { 278 struct pipepair *pp; 279 280 KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size")); 281 282 pp = (struct pipepair *)mem; 283 284 mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 285 return (0); 286 } 287 288 static void 289 pipe_zone_fini(void *mem, int size) 290 { 291 struct pipepair *pp; 292 293 KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size")); 294 295 pp = (struct pipepair *)mem; 296 297 mtx_destroy(&pp->pp_mtx); 298 } 299 300 /* 301 * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, 302 * let the zone pick up the pieces via pipeclose(). 303 */ 304 305 /* ARGSUSED */ 306 int 307 pipe(td, uap) 308 struct thread *td; 309 struct pipe_args /* { 310 int dummy; 311 } */ *uap; 312 { 313 struct filedesc *fdp = td->td_proc->p_fd; 314 struct file *rf, *wf; 315 struct pipepair *pp; 316 struct pipe *rpipe, *wpipe; 317 int fd, error; 318 319 pp = uma_zalloc(pipe_zone, M_WAITOK); 320 #ifdef MAC 321 /* 322 * The MAC label is shared between the connected endpoints. As a 323 * result mac_init_pipe() and mac_create_pipe() are called once 324 * for the pair, and not on the endpoints. 325 */ 326 mac_init_pipe(pp); 327 mac_create_pipe(td->td_ucred, pp); 328 #endif 329 rpipe = &pp->pp_rpipe; 330 wpipe = &pp->pp_wpipe; 331 332 if (pipe_create(rpipe) || pipe_create(wpipe)) { 333 pipeclose(rpipe); 334 pipeclose(wpipe); 335 return (ENFILE); 336 } 337 338 rpipe->pipe_state |= PIPE_DIRECTOK; 339 wpipe->pipe_state |= PIPE_DIRECTOK; 340 341 error = falloc(td, &rf, &fd); 342 if (error) { 343 pipeclose(rpipe); 344 pipeclose(wpipe); 345 return (error); 346 } 347 /* An extra reference on `rf' has been held for us by falloc(). */ 348 td->td_retval[0] = fd; 349 350 /* 351 * Warning: once we've gotten past allocation of the fd for the 352 * read-side, we can only drop the read side via fdrop() in order 353 * to avoid races against processes which manage to dup() the read 354 * side while we are blocked trying to allocate the write side. 355 */ 356 FILE_LOCK(rf); 357 rf->f_flag = FREAD | FWRITE; 358 rf->f_type = DTYPE_PIPE; 359 rf->f_data = rpipe; 360 rf->f_ops = &pipeops; 361 FILE_UNLOCK(rf); 362 error = falloc(td, &wf, &fd); 363 if (error) { 364 FILEDESC_LOCK(fdp); 365 if (fdp->fd_ofiles[td->td_retval[0]] == rf) { 366 fdp->fd_ofiles[td->td_retval[0]] = NULL; 367 fdunused(fdp, td->td_retval[0]); 368 FILEDESC_UNLOCK(fdp); 369 fdrop(rf, td); 370 } else { 371 FILEDESC_UNLOCK(fdp); 372 } 373 fdrop(rf, td); 374 /* rpipe has been closed by fdrop(). */ 375 pipeclose(wpipe); 376 return (error); 377 } 378 /* An extra reference on `wf' has been held for us by falloc(). */ 379 FILE_LOCK(wf); 380 wf->f_flag = FREAD | FWRITE; 381 wf->f_type = DTYPE_PIPE; 382 wf->f_data = wpipe; 383 wf->f_ops = &pipeops; 384 FILE_UNLOCK(wf); 385 fdrop(wf, td); 386 td->td_retval[1] = fd; 387 fdrop(rf, td); 388 389 return (0); 390 } 391 392 /* 393 * Allocate kva for pipe circular buffer, the space is pageable 394 * This routine will 'realloc' the size of a pipe safely, if it fails 395 * it will retain the old buffer. 396 * If it fails it will return ENOMEM. 397 */ 398 static int 399 pipespace_new(cpipe, size) 400 struct pipe *cpipe; 401 int size; 402 { 403 caddr_t buffer; 404 int error; 405 static int curfail = 0; 406 static struct timeval lastfail; 407 408 KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); 409 410 size = round_page(size); 411 /* 412 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 413 */ 414 buffer = (caddr_t) vm_map_min(pipe_map); 415 416 /* 417 * The map entry is, by default, pageable. 418 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 419 */ 420 error = vm_map_find(pipe_map, NULL, 0, 421 (vm_offset_t *) &buffer, size, 1, 422 VM_PROT_ALL, VM_PROT_ALL, 0); 423 if (error != KERN_SUCCESS) { 424 if (ppsratecheck(&lastfail, &curfail, 1)) 425 printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n"); 426 return (ENOMEM); 427 } 428 429 /* free old resources if we're resizing */ 430 pipe_free_kmem(cpipe); 431 cpipe->pipe_buffer.buffer = buffer; 432 cpipe->pipe_buffer.size = size; 433 cpipe->pipe_buffer.in = 0; 434 cpipe->pipe_buffer.out = 0; 435 cpipe->pipe_buffer.cnt = 0; 436 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 437 return (0); 438 } 439 440 /* 441 * Wrapper for pipespace_new() that performs locking assertions. 442 */ 443 static int 444 pipespace(cpipe, size) 445 struct pipe *cpipe; 446 int size; 447 { 448 449 KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 450 ("Unlocked pipe passed to pipespace")); 451 return (pipespace_new(cpipe, size)); 452 } 453 454 /* 455 * lock a pipe for I/O, blocking other access 456 */ 457 static __inline int 458 pipelock(cpipe, catch) 459 struct pipe *cpipe; 460 int catch; 461 { 462 int error; 463 464 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 465 while (cpipe->pipe_state & PIPE_LOCKFL) { 466 cpipe->pipe_state |= PIPE_LWANT; 467 error = msleep(cpipe, PIPE_MTX(cpipe), 468 catch ? (PRIBIO | PCATCH) : PRIBIO, 469 "pipelk", 0); 470 if (error != 0) 471 return (error); 472 } 473 cpipe->pipe_state |= PIPE_LOCKFL; 474 return (0); 475 } 476 477 /* 478 * unlock a pipe I/O lock 479 */ 480 static __inline void 481 pipeunlock(cpipe) 482 struct pipe *cpipe; 483 { 484 485 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 486 KASSERT(cpipe->pipe_state & PIPE_LOCKFL, 487 ("Unlocked pipe passed to pipeunlock")); 488 cpipe->pipe_state &= ~PIPE_LOCKFL; 489 if (cpipe->pipe_state & PIPE_LWANT) { 490 cpipe->pipe_state &= ~PIPE_LWANT; 491 wakeup(cpipe); 492 } 493 } 494 495 static __inline void 496 pipeselwakeup(cpipe) 497 struct pipe *cpipe; 498 { 499 500 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 501 if (cpipe->pipe_state & PIPE_SEL) { 502 cpipe->pipe_state &= ~PIPE_SEL; 503 selwakeuppri(&cpipe->pipe_sel, PSOCK); 504 } 505 if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 506 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 507 KNOTE(&cpipe->pipe_sel.si_note, 0); 508 } 509 510 /* 511 * Initialize and allocate VM and memory for pipe. The structure 512 * will start out zero'd from the ctor, so we just manage the kmem. 513 */ 514 static int 515 pipe_create(pipe) 516 struct pipe *pipe; 517 { 518 int error; 519 520 /* 521 * Reduce to 1/4th pipe size if we're over our global max. 522 */ 523 if (amountpipekva > maxpipekva / 2) 524 error = pipespace_new(pipe, SMALL_PIPE_SIZE); 525 else 526 error = pipespace_new(pipe, PIPE_SIZE); 527 return (error); 528 } 529 530 /* ARGSUSED */ 531 static int 532 pipe_read(fp, uio, active_cred, flags, td) 533 struct file *fp; 534 struct uio *uio; 535 struct ucred *active_cred; 536 struct thread *td; 537 int flags; 538 { 539 struct pipe *rpipe = fp->f_data; 540 int error; 541 int nread = 0; 542 u_int size; 543 544 PIPE_LOCK(rpipe); 545 ++rpipe->pipe_busy; 546 error = pipelock(rpipe, 1); 547 if (error) 548 goto unlocked_error; 549 550 #ifdef MAC 551 error = mac_check_pipe_read(active_cred, rpipe->pipe_pair); 552 if (error) 553 goto locked_error; 554 #endif 555 556 while (uio->uio_resid) { 557 /* 558 * normal pipe buffer receive 559 */ 560 if (rpipe->pipe_buffer.cnt > 0) { 561 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 562 if (size > rpipe->pipe_buffer.cnt) 563 size = rpipe->pipe_buffer.cnt; 564 if (size > (u_int) uio->uio_resid) 565 size = (u_int) uio->uio_resid; 566 567 PIPE_UNLOCK(rpipe); 568 error = uiomove( 569 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 570 size, uio); 571 PIPE_LOCK(rpipe); 572 if (error) 573 break; 574 575 rpipe->pipe_buffer.out += size; 576 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 577 rpipe->pipe_buffer.out = 0; 578 579 rpipe->pipe_buffer.cnt -= size; 580 581 /* 582 * If there is no more to read in the pipe, reset 583 * its pointers to the beginning. This improves 584 * cache hit stats. 585 */ 586 if (rpipe->pipe_buffer.cnt == 0) { 587 rpipe->pipe_buffer.in = 0; 588 rpipe->pipe_buffer.out = 0; 589 } 590 nread += size; 591 #ifndef PIPE_NODIRECT 592 /* 593 * Direct copy, bypassing a kernel buffer. 594 */ 595 } else if ((size = rpipe->pipe_map.cnt) && 596 (rpipe->pipe_state & PIPE_DIRECTW)) { 597 if (size > (u_int) uio->uio_resid) 598 size = (u_int) uio->uio_resid; 599 600 PIPE_UNLOCK(rpipe); 601 error = uiomove_fromphys(rpipe->pipe_map.ms, 602 rpipe->pipe_map.pos, size, uio); 603 PIPE_LOCK(rpipe); 604 if (error) 605 break; 606 nread += size; 607 rpipe->pipe_map.pos += size; 608 rpipe->pipe_map.cnt -= size; 609 if (rpipe->pipe_map.cnt == 0) { 610 rpipe->pipe_state &= ~PIPE_DIRECTW; 611 wakeup(rpipe); 612 } 613 #endif 614 } else { 615 /* 616 * detect EOF condition 617 * read returns 0 on EOF, no need to set error 618 */ 619 if (rpipe->pipe_state & PIPE_EOF) 620 break; 621 622 /* 623 * If the "write-side" has been blocked, wake it up now. 624 */ 625 if (rpipe->pipe_state & PIPE_WANTW) { 626 rpipe->pipe_state &= ~PIPE_WANTW; 627 wakeup(rpipe); 628 } 629 630 /* 631 * Break if some data was read. 632 */ 633 if (nread > 0) 634 break; 635 636 /* 637 * Unlock the pipe buffer for our remaining processing. 638 * We will either break out with an error or we will 639 * sleep and relock to loop. 640 */ 641 pipeunlock(rpipe); 642 643 /* 644 * Handle non-blocking mode operation or 645 * wait for more data. 646 */ 647 if (fp->f_flag & FNONBLOCK) { 648 error = EAGAIN; 649 } else { 650 rpipe->pipe_state |= PIPE_WANTR; 651 if ((error = msleep(rpipe, PIPE_MTX(rpipe), 652 PRIBIO | PCATCH, 653 "piperd", 0)) == 0) 654 error = pipelock(rpipe, 1); 655 } 656 if (error) 657 goto unlocked_error; 658 } 659 } 660 #ifdef MAC 661 locked_error: 662 #endif 663 pipeunlock(rpipe); 664 665 /* XXX: should probably do this before getting any locks. */ 666 if (error == 0) 667 vfs_timestamp(&rpipe->pipe_atime); 668 unlocked_error: 669 --rpipe->pipe_busy; 670 671 /* 672 * PIPE_WANT processing only makes sense if pipe_busy is 0. 673 */ 674 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 675 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 676 wakeup(rpipe); 677 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 678 /* 679 * Handle write blocking hysteresis. 680 */ 681 if (rpipe->pipe_state & PIPE_WANTW) { 682 rpipe->pipe_state &= ~PIPE_WANTW; 683 wakeup(rpipe); 684 } 685 } 686 687 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 688 pipeselwakeup(rpipe); 689 690 PIPE_UNLOCK(rpipe); 691 return (error); 692 } 693 694 #ifndef PIPE_NODIRECT 695 /* 696 * Map the sending processes' buffer into kernel space and wire it. 697 * This is similar to a physical write operation. 698 */ 699 static int 700 pipe_build_write_buffer(wpipe, uio) 701 struct pipe *wpipe; 702 struct uio *uio; 703 { 704 pmap_t pmap; 705 u_int size; 706 int i, j; 707 vm_offset_t addr, endaddr; 708 709 PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 710 711 size = (u_int) uio->uio_iov->iov_len; 712 if (size > wpipe->pipe_buffer.size) 713 size = wpipe->pipe_buffer.size; 714 715 pmap = vmspace_pmap(curproc->p_vmspace); 716 endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 717 addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 718 for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 719 /* 720 * vm_fault_quick() can sleep. Consequently, 721 * vm_page_lock_queue() and vm_page_unlock_queue() 722 * should not be performed outside of this loop. 723 */ 724 race: 725 if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { 726 vm_page_lock_queues(); 727 for (j = 0; j < i; j++) 728 vm_page_unhold(wpipe->pipe_map.ms[j]); 729 vm_page_unlock_queues(); 730 return (EFAULT); 731 } 732 wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, 733 VM_PROT_READ); 734 if (wpipe->pipe_map.ms[i] == NULL) 735 goto race; 736 } 737 738 /* 739 * set up the control block 740 */ 741 wpipe->pipe_map.npages = i; 742 wpipe->pipe_map.pos = 743 ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 744 wpipe->pipe_map.cnt = size; 745 746 /* 747 * and update the uio data 748 */ 749 750 uio->uio_iov->iov_len -= size; 751 uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 752 if (uio->uio_iov->iov_len == 0) 753 uio->uio_iov++; 754 uio->uio_resid -= size; 755 uio->uio_offset += size; 756 return (0); 757 } 758 759 /* 760 * unmap and unwire the process buffer 761 */ 762 static void 763 pipe_destroy_write_buffer(wpipe) 764 struct pipe *wpipe; 765 { 766 int i; 767 768 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 769 vm_page_lock_queues(); 770 for (i = 0; i < wpipe->pipe_map.npages; i++) { 771 vm_page_unhold(wpipe->pipe_map.ms[i]); 772 } 773 vm_page_unlock_queues(); 774 wpipe->pipe_map.npages = 0; 775 } 776 777 /* 778 * In the case of a signal, the writing process might go away. This 779 * code copies the data into the circular buffer so that the source 780 * pages can be freed without loss of data. 781 */ 782 static void 783 pipe_clone_write_buffer(wpipe) 784 struct pipe *wpipe; 785 { 786 struct uio uio; 787 struct iovec iov; 788 int size; 789 int pos; 790 791 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 792 size = wpipe->pipe_map.cnt; 793 pos = wpipe->pipe_map.pos; 794 795 wpipe->pipe_buffer.in = size; 796 wpipe->pipe_buffer.out = 0; 797 wpipe->pipe_buffer.cnt = size; 798 wpipe->pipe_state &= ~PIPE_DIRECTW; 799 800 PIPE_UNLOCK(wpipe); 801 iov.iov_base = wpipe->pipe_buffer.buffer; 802 iov.iov_len = size; 803 uio.uio_iov = &iov; 804 uio.uio_iovcnt = 1; 805 uio.uio_offset = 0; 806 uio.uio_resid = size; 807 uio.uio_segflg = UIO_SYSSPACE; 808 uio.uio_rw = UIO_READ; 809 uio.uio_td = curthread; 810 uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio); 811 PIPE_LOCK(wpipe); 812 pipe_destroy_write_buffer(wpipe); 813 } 814 815 /* 816 * This implements the pipe buffer write mechanism. Note that only 817 * a direct write OR a normal pipe write can be pending at any given time. 818 * If there are any characters in the pipe buffer, the direct write will 819 * be deferred until the receiving process grabs all of the bytes from 820 * the pipe buffer. Then the direct mapping write is set-up. 821 */ 822 static int 823 pipe_direct_write(wpipe, uio) 824 struct pipe *wpipe; 825 struct uio *uio; 826 { 827 int error; 828 829 retry: 830 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 831 error = pipelock(wpipe, 1); 832 if (wpipe->pipe_state & PIPE_EOF) 833 error = EPIPE; 834 if (error) { 835 pipeunlock(wpipe); 836 goto error1; 837 } 838 while (wpipe->pipe_state & PIPE_DIRECTW) { 839 if (wpipe->pipe_state & PIPE_WANTR) { 840 wpipe->pipe_state &= ~PIPE_WANTR; 841 wakeup(wpipe); 842 } 843 wpipe->pipe_state |= PIPE_WANTW; 844 pipeunlock(wpipe); 845 error = msleep(wpipe, PIPE_MTX(wpipe), 846 PRIBIO | PCATCH, "pipdww", 0); 847 if (error) 848 goto error1; 849 else 850 goto retry; 851 } 852 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 853 if (wpipe->pipe_buffer.cnt > 0) { 854 if (wpipe->pipe_state & PIPE_WANTR) { 855 wpipe->pipe_state &= ~PIPE_WANTR; 856 wakeup(wpipe); 857 } 858 wpipe->pipe_state |= PIPE_WANTW; 859 pipeunlock(wpipe); 860 error = msleep(wpipe, PIPE_MTX(wpipe), 861 PRIBIO | PCATCH, "pipdwc", 0); 862 if (error) 863 goto error1; 864 else 865 goto retry; 866 } 867 868 wpipe->pipe_state |= PIPE_DIRECTW; 869 870 PIPE_UNLOCK(wpipe); 871 error = pipe_build_write_buffer(wpipe, uio); 872 PIPE_LOCK(wpipe); 873 if (error) { 874 wpipe->pipe_state &= ~PIPE_DIRECTW; 875 pipeunlock(wpipe); 876 goto error1; 877 } 878 879 error = 0; 880 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 881 if (wpipe->pipe_state & PIPE_EOF) { 882 pipe_destroy_write_buffer(wpipe); 883 pipeselwakeup(wpipe); 884 pipeunlock(wpipe); 885 error = EPIPE; 886 goto error1; 887 } 888 if (wpipe->pipe_state & PIPE_WANTR) { 889 wpipe->pipe_state &= ~PIPE_WANTR; 890 wakeup(wpipe); 891 } 892 pipeselwakeup(wpipe); 893 pipeunlock(wpipe); 894 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 895 "pipdwt", 0); 896 pipelock(wpipe, 0); 897 } 898 899 if (wpipe->pipe_state & PIPE_EOF) 900 error = EPIPE; 901 if (wpipe->pipe_state & PIPE_DIRECTW) { 902 /* 903 * this bit of trickery substitutes a kernel buffer for 904 * the process that might be going away. 905 */ 906 pipe_clone_write_buffer(wpipe); 907 } else { 908 pipe_destroy_write_buffer(wpipe); 909 } 910 pipeunlock(wpipe); 911 return (error); 912 913 error1: 914 wakeup(wpipe); 915 return (error); 916 } 917 #endif 918 919 static int 920 pipe_write(fp, uio, active_cred, flags, td) 921 struct file *fp; 922 struct uio *uio; 923 struct ucred *active_cred; 924 struct thread *td; 925 int flags; 926 { 927 int error = 0; 928 int orig_resid; 929 struct pipe *wpipe, *rpipe; 930 931 rpipe = fp->f_data; 932 wpipe = rpipe->pipe_peer; 933 934 PIPE_LOCK(rpipe); 935 error = pipelock(wpipe, 1); 936 if (error) { 937 PIPE_UNLOCK(rpipe); 938 return (error); 939 } 940 /* 941 * detect loss of pipe read side, issue SIGPIPE if lost. 942 */ 943 if ((!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) { 944 pipeunlock(wpipe); 945 PIPE_UNLOCK(rpipe); 946 return (EPIPE); 947 } 948 #ifdef MAC 949 error = mac_check_pipe_write(active_cred, wpipe->pipe_pair); 950 if (error) { 951 pipeunlock(wpipe); 952 PIPE_UNLOCK(rpipe); 953 return (error); 954 } 955 #endif 956 ++wpipe->pipe_busy; 957 958 /* 959 * If it is advantageous to resize the pipe buffer, do 960 * so. 961 */ 962 if ((uio->uio_resid > PIPE_SIZE) && 963 (amountpipekva < maxpipekva / 2) && 964 (nbigpipe < LIMITBIGPIPES) && 965 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 966 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 967 (wpipe->pipe_buffer.cnt == 0)) { 968 969 PIPE_UNLOCK(wpipe); 970 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 971 atomic_add_int(&nbigpipe, 1); 972 PIPE_LOCK(wpipe); 973 } 974 975 pipeunlock(wpipe); 976 977 orig_resid = uio->uio_resid; 978 979 while (uio->uio_resid) { 980 int space; 981 982 pipelock(wpipe, 0); 983 if (wpipe->pipe_state & PIPE_EOF) { 984 pipeunlock(wpipe); 985 error = EPIPE; 986 break; 987 } 988 #ifndef PIPE_NODIRECT 989 /* 990 * If the transfer is large, we can gain performance if 991 * we do process-to-process copies directly. 992 * If the write is non-blocking, we don't use the 993 * direct write mechanism. 994 * 995 * The direct write mechanism will detect the reader going 996 * away on us. 997 */ 998 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 999 (fp->f_flag & FNONBLOCK) == 0) { 1000 pipeunlock(wpipe); 1001 error = pipe_direct_write(wpipe, uio); 1002 if (error) 1003 break; 1004 continue; 1005 } 1006 #endif 1007 1008 /* 1009 * Pipe buffered writes cannot be coincidental with 1010 * direct writes. We wait until the currently executing 1011 * direct write is completed before we start filling the 1012 * pipe buffer. We break out if a signal occurs or the 1013 * reader goes away. 1014 */ 1015 if (wpipe->pipe_state & PIPE_DIRECTW) { 1016 if (wpipe->pipe_state & PIPE_WANTR) { 1017 wpipe->pipe_state &= ~PIPE_WANTR; 1018 wakeup(wpipe); 1019 } 1020 pipeunlock(wpipe); 1021 error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 1022 "pipbww", 0); 1023 if (error) 1024 break; 1025 else 1026 continue; 1027 } 1028 1029 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1030 1031 /* Writes of size <= PIPE_BUF must be atomic. */ 1032 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 1033 space = 0; 1034 1035 if (space > 0) { 1036 int size; /* Transfer size */ 1037 int segsize; /* first segment to transfer */ 1038 1039 /* 1040 * Transfer size is minimum of uio transfer 1041 * and free space in pipe buffer. 1042 */ 1043 if (space > uio->uio_resid) 1044 size = uio->uio_resid; 1045 else 1046 size = space; 1047 /* 1048 * First segment to transfer is minimum of 1049 * transfer size and contiguous space in 1050 * pipe buffer. If first segment to transfer 1051 * is less than the transfer size, we've got 1052 * a wraparound in the buffer. 1053 */ 1054 segsize = wpipe->pipe_buffer.size - 1055 wpipe->pipe_buffer.in; 1056 if (segsize > size) 1057 segsize = size; 1058 1059 /* Transfer first segment */ 1060 1061 PIPE_UNLOCK(rpipe); 1062 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1063 segsize, uio); 1064 PIPE_LOCK(rpipe); 1065 1066 if (error == 0 && segsize < size) { 1067 KASSERT(wpipe->pipe_buffer.in + segsize == 1068 wpipe->pipe_buffer.size, 1069 ("Pipe buffer wraparound disappeared")); 1070 /* 1071 * Transfer remaining part now, to 1072 * support atomic writes. Wraparound 1073 * happened. 1074 */ 1075 1076 PIPE_UNLOCK(rpipe); 1077 error = uiomove( 1078 &wpipe->pipe_buffer.buffer[0], 1079 size - segsize, uio); 1080 PIPE_LOCK(rpipe); 1081 } 1082 if (error == 0) { 1083 wpipe->pipe_buffer.in += size; 1084 if (wpipe->pipe_buffer.in >= 1085 wpipe->pipe_buffer.size) { 1086 KASSERT(wpipe->pipe_buffer.in == 1087 size - segsize + 1088 wpipe->pipe_buffer.size, 1089 ("Expected wraparound bad")); 1090 wpipe->pipe_buffer.in = size - segsize; 1091 } 1092 1093 wpipe->pipe_buffer.cnt += size; 1094 KASSERT(wpipe->pipe_buffer.cnt <= 1095 wpipe->pipe_buffer.size, 1096 ("Pipe buffer overflow")); 1097 } 1098 pipeunlock(wpipe); 1099 } else { 1100 /* 1101 * If the "read-side" has been blocked, wake it up now. 1102 */ 1103 if (wpipe->pipe_state & PIPE_WANTR) { 1104 wpipe->pipe_state &= ~PIPE_WANTR; 1105 wakeup(wpipe); 1106 } 1107 1108 /* 1109 * don't block on non-blocking I/O 1110 */ 1111 if (fp->f_flag & FNONBLOCK) { 1112 error = EAGAIN; 1113 pipeunlock(wpipe); 1114 break; 1115 } 1116 1117 /* 1118 * We have no more space and have something to offer, 1119 * wake up select/poll. 1120 */ 1121 pipeselwakeup(wpipe); 1122 1123 wpipe->pipe_state |= PIPE_WANTW; 1124 pipeunlock(wpipe); 1125 error = msleep(wpipe, PIPE_MTX(rpipe), 1126 PRIBIO | PCATCH, "pipewr", 0); 1127 if (error != 0) 1128 break; 1129 } 1130 } 1131 1132 pipelock(wpipe, 0); 1133 --wpipe->pipe_busy; 1134 1135 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1136 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1137 wakeup(wpipe); 1138 } else if (wpipe->pipe_buffer.cnt > 0) { 1139 /* 1140 * If we have put any characters in the buffer, we wake up 1141 * the reader. 1142 */ 1143 if (wpipe->pipe_state & PIPE_WANTR) { 1144 wpipe->pipe_state &= ~PIPE_WANTR; 1145 wakeup(wpipe); 1146 } 1147 } 1148 1149 /* 1150 * Don't return EPIPE if I/O was successful 1151 */ 1152 if ((wpipe->pipe_buffer.cnt == 0) && 1153 (uio->uio_resid == 0) && 1154 (error == EPIPE)) { 1155 error = 0; 1156 } 1157 1158 if (error == 0) 1159 vfs_timestamp(&wpipe->pipe_mtime); 1160 1161 /* 1162 * We have something to offer, 1163 * wake up select/poll. 1164 */ 1165 if (wpipe->pipe_buffer.cnt) 1166 pipeselwakeup(wpipe); 1167 1168 pipeunlock(wpipe); 1169 PIPE_UNLOCK(rpipe); 1170 return (error); 1171 } 1172 1173 /* 1174 * we implement a very minimal set of ioctls for compatibility with sockets. 1175 */ 1176 static int 1177 pipe_ioctl(fp, cmd, data, active_cred, td) 1178 struct file *fp; 1179 u_long cmd; 1180 void *data; 1181 struct ucred *active_cred; 1182 struct thread *td; 1183 { 1184 struct pipe *mpipe = fp->f_data; 1185 #ifdef MAC 1186 int error; 1187 #endif 1188 1189 PIPE_LOCK(mpipe); 1190 1191 #ifdef MAC 1192 error = mac_check_pipe_ioctl(active_cred, mpipe->pipe_pair, cmd, data); 1193 if (error) { 1194 PIPE_UNLOCK(mpipe); 1195 return (error); 1196 } 1197 #endif 1198 1199 switch (cmd) { 1200 1201 case FIONBIO: 1202 PIPE_UNLOCK(mpipe); 1203 return (0); 1204 1205 case FIOASYNC: 1206 if (*(int *)data) { 1207 mpipe->pipe_state |= PIPE_ASYNC; 1208 } else { 1209 mpipe->pipe_state &= ~PIPE_ASYNC; 1210 } 1211 PIPE_UNLOCK(mpipe); 1212 return (0); 1213 1214 case FIONREAD: 1215 if (mpipe->pipe_state & PIPE_DIRECTW) 1216 *(int *)data = mpipe->pipe_map.cnt; 1217 else 1218 *(int *)data = mpipe->pipe_buffer.cnt; 1219 PIPE_UNLOCK(mpipe); 1220 return (0); 1221 1222 case FIOSETOWN: 1223 PIPE_UNLOCK(mpipe); 1224 return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 1225 1226 case FIOGETOWN: 1227 PIPE_UNLOCK(mpipe); 1228 *(int *)data = fgetown(&mpipe->pipe_sigio); 1229 return (0); 1230 1231 /* This is deprecated, FIOSETOWN should be used instead. */ 1232 case TIOCSPGRP: 1233 PIPE_UNLOCK(mpipe); 1234 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1235 1236 /* This is deprecated, FIOGETOWN should be used instead. */ 1237 case TIOCGPGRP: 1238 PIPE_UNLOCK(mpipe); 1239 *(int *)data = -fgetown(&mpipe->pipe_sigio); 1240 return (0); 1241 1242 } 1243 PIPE_UNLOCK(mpipe); 1244 return (ENOTTY); 1245 } 1246 1247 static int 1248 pipe_poll(fp, events, active_cred, td) 1249 struct file *fp; 1250 int events; 1251 struct ucred *active_cred; 1252 struct thread *td; 1253 { 1254 struct pipe *rpipe = fp->f_data; 1255 struct pipe *wpipe; 1256 int revents = 0; 1257 #ifdef MAC 1258 int error; 1259 #endif 1260 1261 wpipe = rpipe->pipe_peer; 1262 PIPE_LOCK(rpipe); 1263 #ifdef MAC 1264 error = mac_check_pipe_poll(active_cred, rpipe->pipe_pair); 1265 if (error) 1266 goto locked_error; 1267 #endif 1268 if (events & (POLLIN | POLLRDNORM)) 1269 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1270 (rpipe->pipe_buffer.cnt > 0) || 1271 (rpipe->pipe_state & PIPE_EOF)) 1272 revents |= events & (POLLIN | POLLRDNORM); 1273 1274 if (events & (POLLOUT | POLLWRNORM)) 1275 if (!wpipe->pipe_present || (wpipe->pipe_state & PIPE_EOF) || 1276 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1277 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1278 revents |= events & (POLLOUT | POLLWRNORM); 1279 1280 if ((rpipe->pipe_state & PIPE_EOF) || 1281 (!wpipe->pipe_present) || 1282 (wpipe->pipe_state & PIPE_EOF)) 1283 revents |= POLLHUP; 1284 1285 if (revents == 0) { 1286 if (events & (POLLIN | POLLRDNORM)) { 1287 selrecord(td, &rpipe->pipe_sel); 1288 rpipe->pipe_state |= PIPE_SEL; 1289 } 1290 1291 if (events & (POLLOUT | POLLWRNORM)) { 1292 selrecord(td, &wpipe->pipe_sel); 1293 wpipe->pipe_state |= PIPE_SEL; 1294 } 1295 } 1296 #ifdef MAC 1297 locked_error: 1298 #endif 1299 PIPE_UNLOCK(rpipe); 1300 1301 return (revents); 1302 } 1303 1304 /* 1305 * We shouldn't need locks here as we're doing a read and this should 1306 * be a natural race. 1307 */ 1308 static int 1309 pipe_stat(fp, ub, active_cred, td) 1310 struct file *fp; 1311 struct stat *ub; 1312 struct ucred *active_cred; 1313 struct thread *td; 1314 { 1315 struct pipe *pipe = fp->f_data; 1316 #ifdef MAC 1317 int error; 1318 1319 PIPE_LOCK(pipe); 1320 error = mac_check_pipe_stat(active_cred, pipe->pipe_pair); 1321 PIPE_UNLOCK(pipe); 1322 if (error) 1323 return (error); 1324 #endif 1325 bzero(ub, sizeof(*ub)); 1326 ub->st_mode = S_IFIFO; 1327 ub->st_blksize = pipe->pipe_buffer.size; 1328 if (pipe->pipe_state & PIPE_DIRECTW) 1329 ub->st_size = pipe->pipe_map.cnt; 1330 else 1331 ub->st_size = pipe->pipe_buffer.cnt; 1332 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1333 ub->st_atimespec = pipe->pipe_atime; 1334 ub->st_mtimespec = pipe->pipe_mtime; 1335 ub->st_ctimespec = pipe->pipe_ctime; 1336 ub->st_uid = fp->f_cred->cr_uid; 1337 ub->st_gid = fp->f_cred->cr_gid; 1338 /* 1339 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1340 * XXX (st_dev, st_ino) should be unique. 1341 */ 1342 return (0); 1343 } 1344 1345 /* ARGSUSED */ 1346 static int 1347 pipe_close(fp, td) 1348 struct file *fp; 1349 struct thread *td; 1350 { 1351 struct pipe *cpipe = fp->f_data; 1352 1353 fp->f_ops = &badfileops; 1354 fp->f_data = NULL; 1355 funsetown(&cpipe->pipe_sigio); 1356 pipeclose(cpipe); 1357 return (0); 1358 } 1359 1360 static void 1361 pipe_free_kmem(cpipe) 1362 struct pipe *cpipe; 1363 { 1364 1365 KASSERT(!mtx_owned(PIPE_MTX(cpipe)), 1366 ("pipe_free_kmem: pipe mutex locked")); 1367 1368 if (cpipe->pipe_buffer.buffer != NULL) { 1369 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1370 atomic_subtract_int(&nbigpipe, 1); 1371 atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size); 1372 vm_map_remove(pipe_map, 1373 (vm_offset_t)cpipe->pipe_buffer.buffer, 1374 (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 1375 cpipe->pipe_buffer.buffer = NULL; 1376 } 1377 #ifndef PIPE_NODIRECT 1378 { 1379 cpipe->pipe_map.cnt = 0; 1380 cpipe->pipe_map.pos = 0; 1381 cpipe->pipe_map.npages = 0; 1382 } 1383 #endif 1384 } 1385 1386 /* 1387 * shutdown the pipe 1388 */ 1389 static void 1390 pipeclose(cpipe) 1391 struct pipe *cpipe; 1392 { 1393 struct pipepair *pp; 1394 struct pipe *ppipe; 1395 1396 KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); 1397 1398 PIPE_LOCK(cpipe); 1399 pipelock(cpipe, 0); 1400 pp = cpipe->pipe_pair; 1401 1402 pipeselwakeup(cpipe); 1403 1404 /* 1405 * If the other side is blocked, wake it up saying that 1406 * we want to close it down. 1407 */ 1408 cpipe->pipe_state |= PIPE_EOF; 1409 while (cpipe->pipe_busy) { 1410 wakeup(cpipe); 1411 cpipe->pipe_state |= PIPE_WANT; 1412 pipeunlock(cpipe); 1413 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1414 pipelock(cpipe, 0); 1415 } 1416 1417 1418 /* 1419 * Disconnect from peer, if any. 1420 */ 1421 ppipe = cpipe->pipe_peer; 1422 if (ppipe->pipe_present != 0) { 1423 pipeselwakeup(ppipe); 1424 1425 ppipe->pipe_state |= PIPE_EOF; 1426 wakeup(ppipe); 1427 KNOTE(&ppipe->pipe_sel.si_note, 0); 1428 } 1429 1430 /* 1431 * Mark this endpoint as free. Release kmem resources. We 1432 * don't mark this endpoint as unused until we've finished 1433 * doing that, or the pipe might disappear out from under 1434 * us. 1435 */ 1436 PIPE_UNLOCK(cpipe); 1437 pipe_free_kmem(cpipe); 1438 PIPE_LOCK(cpipe); 1439 cpipe->pipe_present = 0; 1440 pipeunlock(cpipe); 1441 1442 /* 1443 * If both endpoints are now closed, release the memory for the 1444 * pipe pair. If not, unlock. 1445 */ 1446 if (ppipe->pipe_present == 0) { 1447 PIPE_UNLOCK(cpipe); 1448 #ifdef MAC 1449 mac_destroy_pipe(pp); 1450 #endif 1451 uma_zfree(pipe_zone, cpipe->pipe_pair); 1452 } else 1453 PIPE_UNLOCK(cpipe); 1454 } 1455 1456 /*ARGSUSED*/ 1457 static int 1458 pipe_kqfilter(struct file *fp, struct knote *kn) 1459 { 1460 struct pipe *cpipe; 1461 1462 cpipe = kn->kn_fp->f_data; 1463 PIPE_LOCK(cpipe); 1464 switch (kn->kn_filter) { 1465 case EVFILT_READ: 1466 kn->kn_fop = &pipe_rfiltops; 1467 break; 1468 case EVFILT_WRITE: 1469 kn->kn_fop = &pipe_wfiltops; 1470 if (!cpipe->pipe_peer->pipe_present) { 1471 /* other end of pipe has been closed */ 1472 PIPE_UNLOCK(cpipe); 1473 return (EPIPE); 1474 } 1475 cpipe = cpipe->pipe_peer; 1476 break; 1477 default: 1478 PIPE_UNLOCK(cpipe); 1479 return (1); 1480 } 1481 1482 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1483 PIPE_UNLOCK(cpipe); 1484 return (0); 1485 } 1486 1487 static void 1488 filt_pipedetach(struct knote *kn) 1489 { 1490 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1491 1492 PIPE_LOCK(cpipe); 1493 if (kn->kn_filter == EVFILT_WRITE) { 1494 if (!cpipe->pipe_peer->pipe_present) { 1495 PIPE_UNLOCK(cpipe); 1496 return; 1497 } 1498 cpipe = cpipe->pipe_peer; 1499 } 1500 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1501 PIPE_UNLOCK(cpipe); 1502 } 1503 1504 /*ARGSUSED*/ 1505 static int 1506 filt_piperead(struct knote *kn, long hint) 1507 { 1508 struct pipe *rpipe = kn->kn_fp->f_data; 1509 struct pipe *wpipe = rpipe->pipe_peer; 1510 1511 PIPE_LOCK(rpipe); 1512 kn->kn_data = rpipe->pipe_buffer.cnt; 1513 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1514 kn->kn_data = rpipe->pipe_map.cnt; 1515 1516 if ((rpipe->pipe_state & PIPE_EOF) || 1517 (!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) { 1518 kn->kn_flags |= EV_EOF; 1519 PIPE_UNLOCK(rpipe); 1520 return (1); 1521 } 1522 PIPE_UNLOCK(rpipe); 1523 return (kn->kn_data > 0); 1524 } 1525 1526 /*ARGSUSED*/ 1527 static int 1528 filt_pipewrite(struct knote *kn, long hint) 1529 { 1530 struct pipe *rpipe = kn->kn_fp->f_data; 1531 struct pipe *wpipe = rpipe->pipe_peer; 1532 1533 PIPE_LOCK(rpipe); 1534 if ((!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) { 1535 kn->kn_data = 0; 1536 kn->kn_flags |= EV_EOF; 1537 PIPE_UNLOCK(rpipe); 1538 return (1); 1539 } 1540 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1541 if (wpipe->pipe_state & PIPE_DIRECTW) 1542 kn->kn_data = 0; 1543 1544 PIPE_UNLOCK(rpipe); 1545 return (kn->kn_data >= PIPE_BUF); 1546 } 1547