1 /* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 */ 19 20 /* 21 * This file contains a high-performance replacement for the socket-based 22 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 23 * all features of sockets, but does do everything that pipes normally 24 * do. 25 */ 26 27 /* 28 * This code has two modes of operation, a small write mode and a large 29 * write mode. The small write mode acts like conventional pipes with 30 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 31 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 32 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 33 * the receiving process can copy it directly from the pages in the sending 34 * process. 35 * 36 * If the sending process receives a signal, it is possible that it will 37 * go away, and certainly its address space can change, because control 38 * is returned back to the user-mode side. In that case, the pipe code 39 * arranges to copy the buffer supplied by the user process, to a pageable 40 * kernel buffer, and the receiving process will grab the data from the 41 * pageable kernel buffer. Since signals don't happen all that often, 42 * the copy operation is normally eliminated. 43 * 44 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 45 * happen for small transfers so that the system will not spend all of 46 * its time context switching. 47 * 48 * In order to limit the resource use of pipes, two sysctls exist: 49 * 50 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 51 * address space available to us in pipe_map. Whenever the amount in use 52 * exceeds half of this value, all new pipes will be created with size 53 * SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited 54 * as well. This value is loader tunable only. 55 * 56 * These values are autotuned in subr_param.c. 57 * 58 * Memory usage may be monitored through the sysctls 59 * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired. 60 * 61 */ 62 63 #include <sys/cdefs.h> 64 __FBSDID("$FreeBSD$"); 65 66 #include "opt_mac.h" 67 68 #include <sys/param.h> 69 #include <sys/systm.h> 70 #include <sys/fcntl.h> 71 #include <sys/file.h> 72 #include <sys/filedesc.h> 73 #include <sys/filio.h> 74 #include <sys/kernel.h> 75 #include <sys/lock.h> 76 #include <sys/mac.h> 77 #include <sys/mutex.h> 78 #include <sys/ttycom.h> 79 #include <sys/stat.h> 80 #include <sys/malloc.h> 81 #include <sys/poll.h> 82 #include <sys/selinfo.h> 83 #include <sys/signalvar.h> 84 #include <sys/sysctl.h> 85 #include <sys/sysproto.h> 86 #include <sys/pipe.h> 87 #include <sys/proc.h> 88 #include <sys/vnode.h> 89 #include <sys/uio.h> 90 #include <sys/event.h> 91 92 #include <vm/vm.h> 93 #include <vm/vm_param.h> 94 #include <vm/vm_object.h> 95 #include <vm/vm_kern.h> 96 #include <vm/vm_extern.h> 97 #include <vm/pmap.h> 98 #include <vm/vm_map.h> 99 #include <vm/vm_page.h> 100 #include <vm/uma.h> 101 102 /* 103 * Use this define if you want to disable *fancy* VM things. Expect an 104 * approx 30% decrease in transfer rate. This could be useful for 105 * NetBSD or OpenBSD. 106 */ 107 /* #define PIPE_NODIRECT */ 108 109 /* 110 * interfaces to the outside world 111 */ 112 static fo_rdwr_t pipe_read; 113 static fo_rdwr_t pipe_write; 114 static fo_ioctl_t pipe_ioctl; 115 static fo_poll_t pipe_poll; 116 static fo_kqfilter_t pipe_kqfilter; 117 static fo_stat_t pipe_stat; 118 static fo_close_t pipe_close; 119 120 static struct fileops pipeops = { 121 .fo_read = pipe_read, 122 .fo_write = pipe_write, 123 .fo_ioctl = pipe_ioctl, 124 .fo_poll = pipe_poll, 125 .fo_kqfilter = pipe_kqfilter, 126 .fo_stat = pipe_stat, 127 .fo_close = pipe_close, 128 .fo_flags = DFLAG_PASSABLE 129 }; 130 131 static void filt_pipedetach(struct knote *kn); 132 static int filt_piperead(struct knote *kn, long hint); 133 static int filt_pipewrite(struct knote *kn, long hint); 134 135 static struct filterops pipe_rfiltops = 136 { 1, NULL, filt_pipedetach, filt_piperead }; 137 static struct filterops pipe_wfiltops = 138 { 1, NULL, filt_pipedetach, filt_pipewrite }; 139 140 /* 141 * Default pipe buffer size(s), this can be kind-of large now because pipe 142 * space is pageable. The pipe code will try to maintain locality of 143 * reference for performance reasons, so small amounts of outstanding I/O 144 * will not wipe the cache. 145 */ 146 #define MINPIPESIZE (PIPE_SIZE/3) 147 #define MAXPIPESIZE (2*PIPE_SIZE/3) 148 149 /* 150 * Limit the number of "big" pipes 151 */ 152 #define LIMITBIGPIPES 32 153 static int nbigpipe; 154 155 static int amountpipes; 156 static int amountpipekva; 157 158 SYSCTL_DECL(_kern_ipc); 159 160 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN, 161 &maxpipekva, 0, "Pipe KVA limit"); 162 SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD, 163 &amountpipes, 0, "Current # of pipes"); 164 SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD, 165 &nbigpipe, 0, "Current # of big pipes"); 166 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 167 &amountpipekva, 0, "Pipe KVA usage"); 168 169 static void pipeinit(void *dummy __unused); 170 static void pipeclose(struct pipe *cpipe); 171 static void pipe_free_kmem(struct pipe *cpipe); 172 static int pipe_create(struct pipe *pipe); 173 static __inline int pipelock(struct pipe *cpipe, int catch); 174 static __inline void pipeunlock(struct pipe *cpipe); 175 static __inline void pipeselwakeup(struct pipe *cpipe); 176 #ifndef PIPE_NODIRECT 177 static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 178 static void pipe_destroy_write_buffer(struct pipe *wpipe); 179 static int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 180 static void pipe_clone_write_buffer(struct pipe *wpipe); 181 #endif 182 static int pipespace(struct pipe *cpipe, int size); 183 184 static void pipe_zone_ctor(void *mem, int size, void *arg); 185 static void pipe_zone_dtor(void *mem, int size, void *arg); 186 static void pipe_zone_init(void *mem, int size); 187 static void pipe_zone_fini(void *mem, int size); 188 189 static uma_zone_t pipe_zone; 190 191 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 192 193 static void 194 pipeinit(void *dummy __unused) 195 { 196 197 pipe_zone = uma_zcreate("PIPE", sizeof(struct pipepair), 198 pipe_zone_ctor, pipe_zone_dtor, pipe_zone_init, pipe_zone_fini, 199 UMA_ALIGN_PTR, 0); 200 KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 201 } 202 203 static void 204 pipe_zone_ctor(void *mem, int size, void *arg) 205 { 206 struct pipepair *pp; 207 struct pipe *rpipe, *wpipe; 208 209 KASSERT(size == sizeof(*pp), ("pipe_zone_ctor: wrong size")); 210 211 pp = (struct pipepair *)mem; 212 213 /* 214 * We zero both pipe endpoints to make sure all the kmem pointers 215 * are NULL, flag fields are zero'd, etc. We timestamp both 216 * endpoints with the same time. 217 */ 218 rpipe = &pp->pp_rpipe; 219 bzero(rpipe, sizeof(*rpipe)); 220 vfs_timestamp(&rpipe->pipe_ctime); 221 rpipe->pipe_atime = rpipe->pipe_mtime = rpipe->pipe_ctime; 222 223 wpipe = &pp->pp_wpipe; 224 bzero(wpipe, sizeof(*wpipe)); 225 wpipe->pipe_ctime = rpipe->pipe_ctime; 226 wpipe->pipe_atime = wpipe->pipe_mtime = rpipe->pipe_ctime; 227 228 rpipe->pipe_peer = wpipe; 229 rpipe->pipe_pair = pp; 230 wpipe->pipe_peer = rpipe; 231 wpipe->pipe_pair = pp; 232 233 /* 234 * Mark both endpoints as present; they will later get free'd 235 * one at a time. When both are free'd, then the whole pair 236 * is released. 237 */ 238 rpipe->pipe_present = 1; 239 wpipe->pipe_present = 1; 240 241 /* 242 * Eventually, the MAC Framework may initialize the label 243 * in ctor or init, but for now we do it elswhere to avoid 244 * blocking in ctor or init. 245 */ 246 pp->pp_label = NULL; 247 248 atomic_add_int(&amountpipes, 2); 249 } 250 251 static void 252 pipe_zone_dtor(void *mem, int size, void *arg) 253 { 254 struct pipepair *pp; 255 256 KASSERT(size == sizeof(*pp), ("pipe_zone_dtor: wrong size")); 257 258 pp = (struct pipepair *)mem; 259 260 atomic_subtract_int(&amountpipes, 2); 261 } 262 263 static void 264 pipe_zone_init(void *mem, int size) 265 { 266 struct pipepair *pp; 267 268 KASSERT(size == sizeof(*pp), ("pipe_zone_init: wrong size")); 269 270 pp = (struct pipepair *)mem; 271 272 mtx_init(&pp->pp_mtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 273 } 274 275 static void 276 pipe_zone_fini(void *mem, int size) 277 { 278 struct pipepair *pp; 279 280 KASSERT(size == sizeof(*pp), ("pipe_zone_fini: wrong size")); 281 282 pp = (struct pipepair *)mem; 283 284 mtx_destroy(&pp->pp_mtx); 285 } 286 287 /* 288 * The pipe system call for the DTYPE_PIPE type of pipes. If we fail, 289 * let the zone pick up the pieces via pipeclose(). 290 */ 291 292 /* ARGSUSED */ 293 int 294 pipe(td, uap) 295 struct thread *td; 296 struct pipe_args /* { 297 int dummy; 298 } */ *uap; 299 { 300 struct filedesc *fdp = td->td_proc->p_fd; 301 struct file *rf, *wf; 302 struct pipepair *pp; 303 struct pipe *rpipe, *wpipe; 304 int fd, error; 305 306 pp = uma_zalloc(pipe_zone, M_WAITOK); 307 #ifdef MAC 308 /* 309 * The MAC label is shared between the connected endpoints. As a 310 * result mac_init_pipe() and mac_create_pipe() are called once 311 * for the pair, and not on the endpoints. 312 */ 313 mac_init_pipe(pp); 314 mac_create_pipe(td->td_ucred, pp); 315 #endif 316 rpipe = &pp->pp_rpipe; 317 wpipe = &pp->pp_wpipe; 318 319 if (pipe_create(rpipe) || pipe_create(wpipe)) { 320 pipeclose(rpipe); 321 pipeclose(wpipe); 322 return (ENFILE); 323 } 324 325 rpipe->pipe_state |= PIPE_DIRECTOK; 326 wpipe->pipe_state |= PIPE_DIRECTOK; 327 328 error = falloc(td, &rf, &fd); 329 if (error) { 330 pipeclose(rpipe); 331 pipeclose(wpipe); 332 return (error); 333 } 334 /* An extra reference on `rf' has been held for us by falloc(). */ 335 td->td_retval[0] = fd; 336 337 /* 338 * Warning: once we've gotten past allocation of the fd for the 339 * read-side, we can only drop the read side via fdrop() in order 340 * to avoid races against processes which manage to dup() the read 341 * side while we are blocked trying to allocate the write side. 342 */ 343 FILE_LOCK(rf); 344 rf->f_flag = FREAD | FWRITE; 345 rf->f_type = DTYPE_PIPE; 346 rf->f_data = rpipe; 347 rf->f_ops = &pipeops; 348 FILE_UNLOCK(rf); 349 error = falloc(td, &wf, &fd); 350 if (error) { 351 FILEDESC_LOCK(fdp); 352 if (fdp->fd_ofiles[td->td_retval[0]] == rf) { 353 fdp->fd_ofiles[td->td_retval[0]] = NULL; 354 fdunused(fdp, td->td_retval[0]); 355 FILEDESC_UNLOCK(fdp); 356 fdrop(rf, td); 357 } else { 358 FILEDESC_UNLOCK(fdp); 359 } 360 fdrop(rf, td); 361 /* rpipe has been closed by fdrop(). */ 362 pipeclose(wpipe); 363 return (error); 364 } 365 /* An extra reference on `wf' has been held for us by falloc(). */ 366 FILE_LOCK(wf); 367 wf->f_flag = FREAD | FWRITE; 368 wf->f_type = DTYPE_PIPE; 369 wf->f_data = wpipe; 370 wf->f_ops = &pipeops; 371 FILE_UNLOCK(wf); 372 fdrop(wf, td); 373 td->td_retval[1] = fd; 374 fdrop(rf, td); 375 376 return (0); 377 } 378 379 /* 380 * Allocate kva for pipe circular buffer, the space is pageable 381 * This routine will 'realloc' the size of a pipe safely, if it fails 382 * it will retain the old buffer. 383 * If it fails it will return ENOMEM. 384 */ 385 static int 386 pipespace(cpipe, size) 387 struct pipe *cpipe; 388 int size; 389 { 390 caddr_t buffer; 391 int error; 392 static int curfail = 0; 393 static struct timeval lastfail; 394 395 KASSERT(!mtx_owned(PIPE_MTX(cpipe)), ("pipespace: pipe mutex locked")); 396 397 size = round_page(size); 398 /* 399 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 400 */ 401 buffer = (caddr_t) vm_map_min(pipe_map); 402 403 /* 404 * The map entry is, by default, pageable. 405 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 406 */ 407 error = vm_map_find(pipe_map, NULL, 0, 408 (vm_offset_t *) &buffer, size, 1, 409 VM_PROT_ALL, VM_PROT_ALL, 0); 410 if (error != KERN_SUCCESS) { 411 if (ppsratecheck(&lastfail, &curfail, 1)) 412 printf("kern.ipc.maxpipekva exceeded; see tuning(7)\n"); 413 return (ENOMEM); 414 } 415 416 /* free old resources if we're resizing */ 417 pipe_free_kmem(cpipe); 418 cpipe->pipe_buffer.buffer = buffer; 419 cpipe->pipe_buffer.size = size; 420 cpipe->pipe_buffer.in = 0; 421 cpipe->pipe_buffer.out = 0; 422 cpipe->pipe_buffer.cnt = 0; 423 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 424 return (0); 425 } 426 427 /* 428 * lock a pipe for I/O, blocking other access 429 */ 430 static __inline int 431 pipelock(cpipe, catch) 432 struct pipe *cpipe; 433 int catch; 434 { 435 int error; 436 437 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 438 while (cpipe->pipe_state & PIPE_LOCKFL) { 439 cpipe->pipe_state |= PIPE_LWANT; 440 error = msleep(cpipe, PIPE_MTX(cpipe), 441 catch ? (PRIBIO | PCATCH) : PRIBIO, 442 "pipelk", 0); 443 if (error != 0) 444 return (error); 445 } 446 cpipe->pipe_state |= PIPE_LOCKFL; 447 return (0); 448 } 449 450 /* 451 * unlock a pipe I/O lock 452 */ 453 static __inline void 454 pipeunlock(cpipe) 455 struct pipe *cpipe; 456 { 457 458 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 459 cpipe->pipe_state &= ~PIPE_LOCKFL; 460 if (cpipe->pipe_state & PIPE_LWANT) { 461 cpipe->pipe_state &= ~PIPE_LWANT; 462 wakeup(cpipe); 463 } 464 } 465 466 static __inline void 467 pipeselwakeup(cpipe) 468 struct pipe *cpipe; 469 { 470 471 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 472 if (cpipe->pipe_state & PIPE_SEL) { 473 cpipe->pipe_state &= ~PIPE_SEL; 474 selwakeuppri(&cpipe->pipe_sel, PSOCK); 475 } 476 if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 477 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 478 KNOTE(&cpipe->pipe_sel.si_note, 0); 479 } 480 481 /* 482 * Initialize and allocate VM and memory for pipe. The structure 483 * will start out zero'd from the ctor, so we just manage the kmem. 484 */ 485 static int 486 pipe_create(pipe) 487 struct pipe *pipe; 488 { 489 int error; 490 491 PIPE_LOCK(pipe); 492 pipelock(pipe, 0); 493 PIPE_UNLOCK(pipe); 494 /* 495 * Reduce to 1/4th pipe size if we're over our global max. 496 */ 497 if (amountpipekva > maxpipekva / 2) 498 error = pipespace(pipe, SMALL_PIPE_SIZE); 499 else 500 error = pipespace(pipe, PIPE_SIZE); 501 PIPE_LOCK(pipe); 502 pipeunlock(pipe); 503 PIPE_UNLOCK(pipe); 504 if (error) 505 return (error); 506 507 return (0); 508 } 509 510 /* ARGSUSED */ 511 static int 512 pipe_read(fp, uio, active_cred, flags, td) 513 struct file *fp; 514 struct uio *uio; 515 struct ucred *active_cred; 516 struct thread *td; 517 int flags; 518 { 519 struct pipe *rpipe = fp->f_data; 520 int error; 521 int nread = 0; 522 u_int size; 523 524 PIPE_LOCK(rpipe); 525 ++rpipe->pipe_busy; 526 error = pipelock(rpipe, 1); 527 if (error) 528 goto unlocked_error; 529 530 #ifdef MAC 531 error = mac_check_pipe_read(active_cred, rpipe->pipe_pair); 532 if (error) 533 goto locked_error; 534 #endif 535 536 while (uio->uio_resid) { 537 /* 538 * normal pipe buffer receive 539 */ 540 if (rpipe->pipe_buffer.cnt > 0) { 541 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 542 if (size > rpipe->pipe_buffer.cnt) 543 size = rpipe->pipe_buffer.cnt; 544 if (size > (u_int) uio->uio_resid) 545 size = (u_int) uio->uio_resid; 546 547 PIPE_UNLOCK(rpipe); 548 error = uiomove( 549 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 550 size, uio); 551 PIPE_LOCK(rpipe); 552 if (error) 553 break; 554 555 rpipe->pipe_buffer.out += size; 556 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 557 rpipe->pipe_buffer.out = 0; 558 559 rpipe->pipe_buffer.cnt -= size; 560 561 /* 562 * If there is no more to read in the pipe, reset 563 * its pointers to the beginning. This improves 564 * cache hit stats. 565 */ 566 if (rpipe->pipe_buffer.cnt == 0) { 567 rpipe->pipe_buffer.in = 0; 568 rpipe->pipe_buffer.out = 0; 569 } 570 nread += size; 571 #ifndef PIPE_NODIRECT 572 /* 573 * Direct copy, bypassing a kernel buffer. 574 */ 575 } else if ((size = rpipe->pipe_map.cnt) && 576 (rpipe->pipe_state & PIPE_DIRECTW)) { 577 if (size > (u_int) uio->uio_resid) 578 size = (u_int) uio->uio_resid; 579 580 PIPE_UNLOCK(rpipe); 581 error = uiomove_fromphys(rpipe->pipe_map.ms, 582 rpipe->pipe_map.pos, size, uio); 583 PIPE_LOCK(rpipe); 584 if (error) 585 break; 586 nread += size; 587 rpipe->pipe_map.pos += size; 588 rpipe->pipe_map.cnt -= size; 589 if (rpipe->pipe_map.cnt == 0) { 590 rpipe->pipe_state &= ~PIPE_DIRECTW; 591 wakeup(rpipe); 592 } 593 #endif 594 } else { 595 /* 596 * detect EOF condition 597 * read returns 0 on EOF, no need to set error 598 */ 599 if (rpipe->pipe_state & PIPE_EOF) 600 break; 601 602 /* 603 * If the "write-side" has been blocked, wake it up now. 604 */ 605 if (rpipe->pipe_state & PIPE_WANTW) { 606 rpipe->pipe_state &= ~PIPE_WANTW; 607 wakeup(rpipe); 608 } 609 610 /* 611 * Break if some data was read. 612 */ 613 if (nread > 0) 614 break; 615 616 /* 617 * Unlock the pipe buffer for our remaining processing. 618 * We will either break out with an error or we will 619 * sleep and relock to loop. 620 */ 621 pipeunlock(rpipe); 622 623 /* 624 * Handle non-blocking mode operation or 625 * wait for more data. 626 */ 627 if (fp->f_flag & FNONBLOCK) { 628 error = EAGAIN; 629 } else { 630 rpipe->pipe_state |= PIPE_WANTR; 631 if ((error = msleep(rpipe, PIPE_MTX(rpipe), 632 PRIBIO | PCATCH, 633 "piperd", 0)) == 0) 634 error = pipelock(rpipe, 1); 635 } 636 if (error) 637 goto unlocked_error; 638 } 639 } 640 #ifdef MAC 641 locked_error: 642 #endif 643 pipeunlock(rpipe); 644 645 /* XXX: should probably do this before getting any locks. */ 646 if (error == 0) 647 vfs_timestamp(&rpipe->pipe_atime); 648 unlocked_error: 649 --rpipe->pipe_busy; 650 651 /* 652 * PIPE_WANT processing only makes sense if pipe_busy is 0. 653 */ 654 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 655 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 656 wakeup(rpipe); 657 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 658 /* 659 * Handle write blocking hysteresis. 660 */ 661 if (rpipe->pipe_state & PIPE_WANTW) { 662 rpipe->pipe_state &= ~PIPE_WANTW; 663 wakeup(rpipe); 664 } 665 } 666 667 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 668 pipeselwakeup(rpipe); 669 670 PIPE_UNLOCK(rpipe); 671 return (error); 672 } 673 674 #ifndef PIPE_NODIRECT 675 /* 676 * Map the sending processes' buffer into kernel space and wire it. 677 * This is similar to a physical write operation. 678 */ 679 static int 680 pipe_build_write_buffer(wpipe, uio) 681 struct pipe *wpipe; 682 struct uio *uio; 683 { 684 pmap_t pmap; 685 u_int size; 686 int i, j; 687 vm_offset_t addr, endaddr; 688 689 PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 690 691 size = (u_int) uio->uio_iov->iov_len; 692 if (size > wpipe->pipe_buffer.size) 693 size = wpipe->pipe_buffer.size; 694 695 pmap = vmspace_pmap(curproc->p_vmspace); 696 endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 697 addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 698 for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 699 /* 700 * vm_fault_quick() can sleep. Consequently, 701 * vm_page_lock_queue() and vm_page_unlock_queue() 702 * should not be performed outside of this loop. 703 */ 704 race: 705 if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { 706 vm_page_lock_queues(); 707 for (j = 0; j < i; j++) 708 vm_page_unhold(wpipe->pipe_map.ms[j]); 709 vm_page_unlock_queues(); 710 return (EFAULT); 711 } 712 wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, 713 VM_PROT_READ); 714 if (wpipe->pipe_map.ms[i] == NULL) 715 goto race; 716 } 717 718 /* 719 * set up the control block 720 */ 721 wpipe->pipe_map.npages = i; 722 wpipe->pipe_map.pos = 723 ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 724 wpipe->pipe_map.cnt = size; 725 726 /* 727 * and update the uio data 728 */ 729 730 uio->uio_iov->iov_len -= size; 731 uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 732 if (uio->uio_iov->iov_len == 0) 733 uio->uio_iov++; 734 uio->uio_resid -= size; 735 uio->uio_offset += size; 736 return (0); 737 } 738 739 /* 740 * unmap and unwire the process buffer 741 */ 742 static void 743 pipe_destroy_write_buffer(wpipe) 744 struct pipe *wpipe; 745 { 746 int i; 747 748 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 749 vm_page_lock_queues(); 750 for (i = 0; i < wpipe->pipe_map.npages; i++) { 751 vm_page_unhold(wpipe->pipe_map.ms[i]); 752 } 753 vm_page_unlock_queues(); 754 wpipe->pipe_map.npages = 0; 755 } 756 757 /* 758 * In the case of a signal, the writing process might go away. This 759 * code copies the data into the circular buffer so that the source 760 * pages can be freed without loss of data. 761 */ 762 static void 763 pipe_clone_write_buffer(wpipe) 764 struct pipe *wpipe; 765 { 766 struct uio uio; 767 struct iovec iov; 768 int size; 769 int pos; 770 771 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 772 size = wpipe->pipe_map.cnt; 773 pos = wpipe->pipe_map.pos; 774 775 wpipe->pipe_buffer.in = size; 776 wpipe->pipe_buffer.out = 0; 777 wpipe->pipe_buffer.cnt = size; 778 wpipe->pipe_state &= ~PIPE_DIRECTW; 779 780 PIPE_UNLOCK(wpipe); 781 iov.iov_base = wpipe->pipe_buffer.buffer; 782 iov.iov_len = size; 783 uio.uio_iov = &iov; 784 uio.uio_iovcnt = 1; 785 uio.uio_offset = 0; 786 uio.uio_resid = size; 787 uio.uio_segflg = UIO_SYSSPACE; 788 uio.uio_rw = UIO_READ; 789 uio.uio_td = curthread; 790 uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio); 791 PIPE_LOCK(wpipe); 792 pipe_destroy_write_buffer(wpipe); 793 } 794 795 /* 796 * This implements the pipe buffer write mechanism. Note that only 797 * a direct write OR a normal pipe write can be pending at any given time. 798 * If there are any characters in the pipe buffer, the direct write will 799 * be deferred until the receiving process grabs all of the bytes from 800 * the pipe buffer. Then the direct mapping write is set-up. 801 */ 802 static int 803 pipe_direct_write(wpipe, uio) 804 struct pipe *wpipe; 805 struct uio *uio; 806 { 807 int error; 808 809 retry: 810 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 811 while (wpipe->pipe_state & PIPE_DIRECTW) { 812 if (wpipe->pipe_state & PIPE_WANTR) { 813 wpipe->pipe_state &= ~PIPE_WANTR; 814 wakeup(wpipe); 815 } 816 wpipe->pipe_state |= PIPE_WANTW; 817 error = msleep(wpipe, PIPE_MTX(wpipe), 818 PRIBIO | PCATCH, "pipdww", 0); 819 if (error) 820 goto error1; 821 if (wpipe->pipe_state & PIPE_EOF) { 822 error = EPIPE; 823 goto error1; 824 } 825 } 826 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 827 if (wpipe->pipe_buffer.cnt > 0) { 828 if (wpipe->pipe_state & PIPE_WANTR) { 829 wpipe->pipe_state &= ~PIPE_WANTR; 830 wakeup(wpipe); 831 } 832 833 wpipe->pipe_state |= PIPE_WANTW; 834 error = msleep(wpipe, PIPE_MTX(wpipe), 835 PRIBIO | PCATCH, "pipdwc", 0); 836 if (error) 837 goto error1; 838 if (wpipe->pipe_state & PIPE_EOF) { 839 error = EPIPE; 840 goto error1; 841 } 842 goto retry; 843 } 844 845 wpipe->pipe_state |= PIPE_DIRECTW; 846 847 pipelock(wpipe, 0); 848 if (wpipe->pipe_state & PIPE_EOF) { 849 error = EPIPE; 850 goto error2; 851 } 852 PIPE_UNLOCK(wpipe); 853 error = pipe_build_write_buffer(wpipe, uio); 854 PIPE_LOCK(wpipe); 855 pipeunlock(wpipe); 856 if (error) { 857 wpipe->pipe_state &= ~PIPE_DIRECTW; 858 goto error1; 859 } 860 861 error = 0; 862 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 863 if (wpipe->pipe_state & PIPE_EOF) { 864 pipelock(wpipe, 0); 865 pipe_destroy_write_buffer(wpipe); 866 pipeselwakeup(wpipe); 867 pipeunlock(wpipe); 868 error = EPIPE; 869 goto error1; 870 } 871 if (wpipe->pipe_state & PIPE_WANTR) { 872 wpipe->pipe_state &= ~PIPE_WANTR; 873 wakeup(wpipe); 874 } 875 pipeselwakeup(wpipe); 876 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 877 "pipdwt", 0); 878 } 879 880 pipelock(wpipe,0); 881 if (wpipe->pipe_state & PIPE_EOF) 882 error = EPIPE; 883 if (wpipe->pipe_state & PIPE_DIRECTW) { 884 /* 885 * this bit of trickery substitutes a kernel buffer for 886 * the process that might be going away. 887 */ 888 pipe_clone_write_buffer(wpipe); 889 } else { 890 pipe_destroy_write_buffer(wpipe); 891 } 892 error2: 893 pipeunlock(wpipe); 894 return (error); 895 896 error1: 897 wakeup(wpipe); 898 return (error); 899 } 900 #endif 901 902 static int 903 pipe_write(fp, uio, active_cred, flags, td) 904 struct file *fp; 905 struct uio *uio; 906 struct ucred *active_cred; 907 struct thread *td; 908 int flags; 909 { 910 int error = 0; 911 int orig_resid; 912 struct pipe *wpipe, *rpipe; 913 914 rpipe = fp->f_data; 915 wpipe = rpipe->pipe_peer; 916 917 PIPE_LOCK(rpipe); 918 /* 919 * detect loss of pipe read side, issue SIGPIPE if lost. 920 */ 921 if ((!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) { 922 PIPE_UNLOCK(rpipe); 923 return (EPIPE); 924 } 925 #ifdef MAC 926 error = mac_check_pipe_write(active_cred, wpipe->pipe_pair); 927 if (error) { 928 PIPE_UNLOCK(rpipe); 929 return (error); 930 } 931 #endif 932 ++wpipe->pipe_busy; 933 934 /* 935 * If it is advantageous to resize the pipe buffer, do 936 * so. 937 */ 938 if ((uio->uio_resid > PIPE_SIZE) && 939 (amountpipekva < maxpipekva / 2) && 940 (nbigpipe < LIMITBIGPIPES) && 941 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 942 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 943 (wpipe->pipe_buffer.cnt == 0)) { 944 945 if ((error = pipelock(wpipe, 1)) == 0) { 946 if (wpipe->pipe_state & PIPE_EOF) 947 error = EPIPE; 948 else { 949 PIPE_UNLOCK(wpipe); 950 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 951 atomic_add_int(&nbigpipe, 1); 952 PIPE_LOCK(wpipe); 953 } 954 pipeunlock(wpipe); 955 } 956 } 957 958 /* 959 * If an early error occured unbusy and return, waking up any pending 960 * readers. 961 */ 962 if (error) { 963 --wpipe->pipe_busy; 964 if ((wpipe->pipe_busy == 0) && 965 (wpipe->pipe_state & PIPE_WANT)) { 966 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 967 wakeup(wpipe); 968 } 969 PIPE_UNLOCK(rpipe); 970 return(error); 971 } 972 973 orig_resid = uio->uio_resid; 974 975 while (uio->uio_resid) { 976 int space; 977 978 #ifndef PIPE_NODIRECT 979 /* 980 * If the transfer is large, we can gain performance if 981 * we do process-to-process copies directly. 982 * If the write is non-blocking, we don't use the 983 * direct write mechanism. 984 * 985 * The direct write mechanism will detect the reader going 986 * away on us. 987 */ 988 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 989 (fp->f_flag & FNONBLOCK) == 0) { 990 error = pipe_direct_write(wpipe, uio); 991 if (error) 992 break; 993 continue; 994 } 995 #endif 996 997 /* 998 * Pipe buffered writes cannot be coincidental with 999 * direct writes. We wait until the currently executing 1000 * direct write is completed before we start filling the 1001 * pipe buffer. We break out if a signal occurs or the 1002 * reader goes away. 1003 */ 1004 retrywrite: 1005 while (wpipe->pipe_state & PIPE_DIRECTW) { 1006 if (wpipe->pipe_state & PIPE_WANTR) { 1007 wpipe->pipe_state &= ~PIPE_WANTR; 1008 wakeup(wpipe); 1009 } 1010 error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 1011 "pipbww", 0); 1012 if (wpipe->pipe_state & PIPE_EOF) { 1013 error = EPIPE; 1014 break; 1015 } 1016 if (error) 1017 break; 1018 } 1019 1020 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1021 1022 /* Writes of size <= PIPE_BUF must be atomic. */ 1023 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 1024 space = 0; 1025 1026 if (space > 0) { 1027 if ((error = pipelock(wpipe,1)) == 0) { 1028 int size; /* Transfer size */ 1029 int segsize; /* first segment to transfer */ 1030 1031 /* 1032 * It is possible for a direct write/EOF to 1033 * slip in on us... handle them here... 1034 */ 1035 if (wpipe->pipe_state & PIPE_EOF) 1036 goto lost_wpipe; 1037 if (wpipe->pipe_state & PIPE_DIRECTW) { 1038 pipeunlock(wpipe); 1039 goto retrywrite; 1040 } 1041 /* 1042 * If a process blocked in uiomove, our 1043 * value for space might be bad. 1044 * 1045 * XXX will we be ok if the reader has gone 1046 * away here? 1047 */ 1048 if (space > wpipe->pipe_buffer.size - 1049 wpipe->pipe_buffer.cnt) { 1050 pipeunlock(wpipe); 1051 goto retrywrite; 1052 } 1053 1054 /* 1055 * Transfer size is minimum of uio transfer 1056 * and free space in pipe buffer. 1057 */ 1058 if (space > uio->uio_resid) 1059 size = uio->uio_resid; 1060 else 1061 size = space; 1062 /* 1063 * First segment to transfer is minimum of 1064 * transfer size and contiguous space in 1065 * pipe buffer. If first segment to transfer 1066 * is less than the transfer size, we've got 1067 * a wraparound in the buffer. 1068 */ 1069 segsize = wpipe->pipe_buffer.size - 1070 wpipe->pipe_buffer.in; 1071 if (segsize > size) 1072 segsize = size; 1073 1074 /* Transfer first segment */ 1075 1076 PIPE_UNLOCK(rpipe); 1077 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1078 segsize, uio); 1079 PIPE_LOCK(rpipe); 1080 1081 if (error == 0 && segsize < size) { 1082 /* 1083 * Transfer remaining part now, to 1084 * support atomic writes. Wraparound 1085 * happened. 1086 */ 1087 if (wpipe->pipe_buffer.in + segsize != 1088 wpipe->pipe_buffer.size) 1089 panic("Expected pipe buffer " 1090 "wraparound disappeared"); 1091 1092 PIPE_UNLOCK(rpipe); 1093 error = uiomove( 1094 &wpipe->pipe_buffer.buffer[0], 1095 size - segsize, uio); 1096 PIPE_LOCK(rpipe); 1097 } 1098 if (error == 0) { 1099 wpipe->pipe_buffer.in += size; 1100 if (wpipe->pipe_buffer.in >= 1101 wpipe->pipe_buffer.size) { 1102 if (wpipe->pipe_buffer.in != 1103 size - segsize + 1104 wpipe->pipe_buffer.size) 1105 panic("Expected " 1106 "wraparound bad"); 1107 wpipe->pipe_buffer.in = size - 1108 segsize; 1109 } 1110 1111 wpipe->pipe_buffer.cnt += size; 1112 if (wpipe->pipe_buffer.cnt > 1113 wpipe->pipe_buffer.size) 1114 panic("Pipe buffer overflow"); 1115 1116 } 1117 lost_wpipe: 1118 pipeunlock(wpipe); 1119 } 1120 if (error) 1121 break; 1122 1123 } else { 1124 /* 1125 * If the "read-side" has been blocked, wake it up now. 1126 */ 1127 if (wpipe->pipe_state & PIPE_WANTR) { 1128 wpipe->pipe_state &= ~PIPE_WANTR; 1129 wakeup(wpipe); 1130 } 1131 1132 /* 1133 * don't block on non-blocking I/O 1134 */ 1135 if (fp->f_flag & FNONBLOCK) { 1136 error = EAGAIN; 1137 break; 1138 } 1139 1140 /* 1141 * We have no more space and have something to offer, 1142 * wake up select/poll. 1143 */ 1144 pipeselwakeup(wpipe); 1145 1146 wpipe->pipe_state |= PIPE_WANTW; 1147 error = msleep(wpipe, PIPE_MTX(rpipe), 1148 PRIBIO | PCATCH, "pipewr", 0); 1149 if (error != 0) 1150 break; 1151 /* 1152 * If read side wants to go away, we just issue a signal 1153 * to ourselves. 1154 */ 1155 if (wpipe->pipe_state & PIPE_EOF) { 1156 error = EPIPE; 1157 break; 1158 } 1159 } 1160 } 1161 1162 --wpipe->pipe_busy; 1163 1164 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1165 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1166 wakeup(wpipe); 1167 } else if (wpipe->pipe_buffer.cnt > 0) { 1168 /* 1169 * If we have put any characters in the buffer, we wake up 1170 * the reader. 1171 */ 1172 if (wpipe->pipe_state & PIPE_WANTR) { 1173 wpipe->pipe_state &= ~PIPE_WANTR; 1174 wakeup(wpipe); 1175 } 1176 } 1177 1178 /* 1179 * Don't return EPIPE if I/O was successful 1180 */ 1181 if ((wpipe->pipe_buffer.cnt == 0) && 1182 (uio->uio_resid == 0) && 1183 (error == EPIPE)) { 1184 error = 0; 1185 } 1186 1187 if (error == 0) 1188 vfs_timestamp(&wpipe->pipe_mtime); 1189 1190 /* 1191 * We have something to offer, 1192 * wake up select/poll. 1193 */ 1194 if (wpipe->pipe_buffer.cnt) 1195 pipeselwakeup(wpipe); 1196 1197 PIPE_UNLOCK(rpipe); 1198 return (error); 1199 } 1200 1201 /* 1202 * we implement a very minimal set of ioctls for compatibility with sockets. 1203 */ 1204 static int 1205 pipe_ioctl(fp, cmd, data, active_cred, td) 1206 struct file *fp; 1207 u_long cmd; 1208 void *data; 1209 struct ucred *active_cred; 1210 struct thread *td; 1211 { 1212 struct pipe *mpipe = fp->f_data; 1213 #ifdef MAC 1214 int error; 1215 #endif 1216 1217 PIPE_LOCK(mpipe); 1218 1219 #ifdef MAC 1220 error = mac_check_pipe_ioctl(active_cred, mpipe->pipe_pair, cmd, data); 1221 if (error) { 1222 PIPE_UNLOCK(mpipe); 1223 return (error); 1224 } 1225 #endif 1226 1227 switch (cmd) { 1228 1229 case FIONBIO: 1230 PIPE_UNLOCK(mpipe); 1231 return (0); 1232 1233 case FIOASYNC: 1234 if (*(int *)data) { 1235 mpipe->pipe_state |= PIPE_ASYNC; 1236 } else { 1237 mpipe->pipe_state &= ~PIPE_ASYNC; 1238 } 1239 PIPE_UNLOCK(mpipe); 1240 return (0); 1241 1242 case FIONREAD: 1243 if (mpipe->pipe_state & PIPE_DIRECTW) 1244 *(int *)data = mpipe->pipe_map.cnt; 1245 else 1246 *(int *)data = mpipe->pipe_buffer.cnt; 1247 PIPE_UNLOCK(mpipe); 1248 return (0); 1249 1250 case FIOSETOWN: 1251 PIPE_UNLOCK(mpipe); 1252 return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 1253 1254 case FIOGETOWN: 1255 PIPE_UNLOCK(mpipe); 1256 *(int *)data = fgetown(&mpipe->pipe_sigio); 1257 return (0); 1258 1259 /* This is deprecated, FIOSETOWN should be used instead. */ 1260 case TIOCSPGRP: 1261 PIPE_UNLOCK(mpipe); 1262 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1263 1264 /* This is deprecated, FIOGETOWN should be used instead. */ 1265 case TIOCGPGRP: 1266 PIPE_UNLOCK(mpipe); 1267 *(int *)data = -fgetown(&mpipe->pipe_sigio); 1268 return (0); 1269 1270 } 1271 PIPE_UNLOCK(mpipe); 1272 return (ENOTTY); 1273 } 1274 1275 static int 1276 pipe_poll(fp, events, active_cred, td) 1277 struct file *fp; 1278 int events; 1279 struct ucred *active_cred; 1280 struct thread *td; 1281 { 1282 struct pipe *rpipe = fp->f_data; 1283 struct pipe *wpipe; 1284 int revents = 0; 1285 #ifdef MAC 1286 int error; 1287 #endif 1288 1289 wpipe = rpipe->pipe_peer; 1290 PIPE_LOCK(rpipe); 1291 #ifdef MAC 1292 error = mac_check_pipe_poll(active_cred, rpipe->pipe_pair); 1293 if (error) 1294 goto locked_error; 1295 #endif 1296 if (events & (POLLIN | POLLRDNORM)) 1297 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1298 (rpipe->pipe_buffer.cnt > 0) || 1299 (rpipe->pipe_state & PIPE_EOF)) 1300 revents |= events & (POLLIN | POLLRDNORM); 1301 1302 if (events & (POLLOUT | POLLWRNORM)) 1303 if (!wpipe->pipe_present || (wpipe->pipe_state & PIPE_EOF) || 1304 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1305 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1306 revents |= events & (POLLOUT | POLLWRNORM); 1307 1308 if ((rpipe->pipe_state & PIPE_EOF) || 1309 (!wpipe->pipe_present) || 1310 (wpipe->pipe_state & PIPE_EOF)) 1311 revents |= POLLHUP; 1312 1313 if (revents == 0) { 1314 if (events & (POLLIN | POLLRDNORM)) { 1315 selrecord(td, &rpipe->pipe_sel); 1316 rpipe->pipe_state |= PIPE_SEL; 1317 } 1318 1319 if (events & (POLLOUT | POLLWRNORM)) { 1320 selrecord(td, &wpipe->pipe_sel); 1321 wpipe->pipe_state |= PIPE_SEL; 1322 } 1323 } 1324 #ifdef MAC 1325 locked_error: 1326 #endif 1327 PIPE_UNLOCK(rpipe); 1328 1329 return (revents); 1330 } 1331 1332 /* 1333 * We shouldn't need locks here as we're doing a read and this should 1334 * be a natural race. 1335 */ 1336 static int 1337 pipe_stat(fp, ub, active_cred, td) 1338 struct file *fp; 1339 struct stat *ub; 1340 struct ucred *active_cred; 1341 struct thread *td; 1342 { 1343 struct pipe *pipe = fp->f_data; 1344 #ifdef MAC 1345 int error; 1346 1347 PIPE_LOCK(pipe); 1348 error = mac_check_pipe_stat(active_cred, pipe->pipe_pair); 1349 PIPE_UNLOCK(pipe); 1350 if (error) 1351 return (error); 1352 #endif 1353 bzero(ub, sizeof(*ub)); 1354 ub->st_mode = S_IFIFO; 1355 ub->st_blksize = pipe->pipe_buffer.size; 1356 if (pipe->pipe_state & PIPE_DIRECTW) 1357 ub->st_size = pipe->pipe_map.cnt; 1358 else 1359 ub->st_size = pipe->pipe_buffer.cnt; 1360 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1361 ub->st_atimespec = pipe->pipe_atime; 1362 ub->st_mtimespec = pipe->pipe_mtime; 1363 ub->st_ctimespec = pipe->pipe_ctime; 1364 ub->st_uid = fp->f_cred->cr_uid; 1365 ub->st_gid = fp->f_cred->cr_gid; 1366 /* 1367 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1368 * XXX (st_dev, st_ino) should be unique. 1369 */ 1370 return (0); 1371 } 1372 1373 /* ARGSUSED */ 1374 static int 1375 pipe_close(fp, td) 1376 struct file *fp; 1377 struct thread *td; 1378 { 1379 struct pipe *cpipe = fp->f_data; 1380 1381 fp->f_ops = &badfileops; 1382 fp->f_data = NULL; 1383 funsetown(&cpipe->pipe_sigio); 1384 pipeclose(cpipe); 1385 return (0); 1386 } 1387 1388 static void 1389 pipe_free_kmem(cpipe) 1390 struct pipe *cpipe; 1391 { 1392 1393 KASSERT(!mtx_owned(PIPE_MTX(cpipe)), 1394 ("pipe_free_kmem: pipe mutex locked")); 1395 1396 if (cpipe->pipe_buffer.buffer != NULL) { 1397 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1398 atomic_subtract_int(&nbigpipe, 1); 1399 atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size); 1400 vm_map_remove(pipe_map, 1401 (vm_offset_t)cpipe->pipe_buffer.buffer, 1402 (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 1403 cpipe->pipe_buffer.buffer = NULL; 1404 } 1405 #ifndef PIPE_NODIRECT 1406 { 1407 cpipe->pipe_map.cnt = 0; 1408 cpipe->pipe_map.pos = 0; 1409 cpipe->pipe_map.npages = 0; 1410 } 1411 #endif 1412 } 1413 1414 /* 1415 * shutdown the pipe 1416 */ 1417 static void 1418 pipeclose(cpipe) 1419 struct pipe *cpipe; 1420 { 1421 struct pipepair *pp; 1422 struct pipe *ppipe; 1423 1424 KASSERT(cpipe != NULL, ("pipeclose: cpipe == NULL")); 1425 1426 PIPE_LOCK(cpipe); 1427 pp = cpipe->pipe_pair; 1428 1429 pipeselwakeup(cpipe); 1430 1431 /* 1432 * If the other side is blocked, wake it up saying that 1433 * we want to close it down. 1434 */ 1435 cpipe->pipe_state |= PIPE_EOF; 1436 while (cpipe->pipe_busy) { 1437 wakeup(cpipe); 1438 cpipe->pipe_state |= PIPE_WANT; 1439 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1440 } 1441 1442 1443 /* 1444 * Disconnect from peer, if any. 1445 */ 1446 ppipe = cpipe->pipe_peer; 1447 if (ppipe->pipe_present != 0) { 1448 pipeselwakeup(ppipe); 1449 1450 ppipe->pipe_state |= PIPE_EOF; 1451 wakeup(ppipe); 1452 KNOTE(&ppipe->pipe_sel.si_note, 0); 1453 } 1454 1455 /* 1456 * Mark this endpoint as free. Release kmem resources. We 1457 * don't mark this endpoint as unused until we've finished 1458 * doing that, or the pipe might disappear out from under 1459 * us. 1460 */ 1461 pipelock(cpipe, 0); 1462 PIPE_UNLOCK(cpipe); 1463 pipe_free_kmem(cpipe); 1464 PIPE_LOCK(cpipe); 1465 cpipe->pipe_present = 0; 1466 pipeunlock(cpipe); 1467 1468 /* 1469 * If both endpoints are now closed, release the memory for the 1470 * pipe pair. If not, unlock. 1471 */ 1472 if (ppipe->pipe_present == 0) { 1473 PIPE_UNLOCK(cpipe); 1474 #ifdef MAC 1475 mac_destroy_pipe(pp); 1476 #endif 1477 uma_zfree(pipe_zone, cpipe->pipe_pair); 1478 } else 1479 PIPE_UNLOCK(cpipe); 1480 } 1481 1482 /*ARGSUSED*/ 1483 static int 1484 pipe_kqfilter(struct file *fp, struct knote *kn) 1485 { 1486 struct pipe *cpipe; 1487 1488 cpipe = kn->kn_fp->f_data; 1489 PIPE_LOCK(cpipe); 1490 switch (kn->kn_filter) { 1491 case EVFILT_READ: 1492 kn->kn_fop = &pipe_rfiltops; 1493 break; 1494 case EVFILT_WRITE: 1495 kn->kn_fop = &pipe_wfiltops; 1496 if (!cpipe->pipe_peer->pipe_present) { 1497 /* other end of pipe has been closed */ 1498 PIPE_UNLOCK(cpipe); 1499 return (EPIPE); 1500 } 1501 cpipe = cpipe->pipe_peer; 1502 break; 1503 default: 1504 PIPE_UNLOCK(cpipe); 1505 return (1); 1506 } 1507 1508 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1509 PIPE_UNLOCK(cpipe); 1510 return (0); 1511 } 1512 1513 static void 1514 filt_pipedetach(struct knote *kn) 1515 { 1516 struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; 1517 1518 PIPE_LOCK(cpipe); 1519 if (kn->kn_filter == EVFILT_WRITE) { 1520 if (!cpipe->pipe_peer->pipe_present) { 1521 PIPE_UNLOCK(cpipe); 1522 return; 1523 } 1524 cpipe = cpipe->pipe_peer; 1525 } 1526 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1527 PIPE_UNLOCK(cpipe); 1528 } 1529 1530 /*ARGSUSED*/ 1531 static int 1532 filt_piperead(struct knote *kn, long hint) 1533 { 1534 struct pipe *rpipe = kn->kn_fp->f_data; 1535 struct pipe *wpipe = rpipe->pipe_peer; 1536 1537 PIPE_LOCK(rpipe); 1538 kn->kn_data = rpipe->pipe_buffer.cnt; 1539 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1540 kn->kn_data = rpipe->pipe_map.cnt; 1541 1542 if ((rpipe->pipe_state & PIPE_EOF) || 1543 (!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) { 1544 kn->kn_flags |= EV_EOF; 1545 PIPE_UNLOCK(rpipe); 1546 return (1); 1547 } 1548 PIPE_UNLOCK(rpipe); 1549 return (kn->kn_data > 0); 1550 } 1551 1552 /*ARGSUSED*/ 1553 static int 1554 filt_pipewrite(struct knote *kn, long hint) 1555 { 1556 struct pipe *rpipe = kn->kn_fp->f_data; 1557 struct pipe *wpipe = rpipe->pipe_peer; 1558 1559 PIPE_LOCK(rpipe); 1560 if ((!wpipe->pipe_present) || (wpipe->pipe_state & PIPE_EOF)) { 1561 kn->kn_data = 0; 1562 kn->kn_flags |= EV_EOF; 1563 PIPE_UNLOCK(rpipe); 1564 return (1); 1565 } 1566 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1567 if (wpipe->pipe_state & PIPE_DIRECTW) 1568 kn->kn_data = 0; 1569 1570 PIPE_UNLOCK(rpipe); 1571 return (kn->kn_data >= PIPE_BUF); 1572 } 1573