1 /* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 */ 19 20 /* 21 * This file contains a high-performance replacement for the socket-based 22 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 23 * all features of sockets, but does do everything that pipes normally 24 * do. 25 */ 26 27 /* 28 * This code has two modes of operation, a small write mode and a large 29 * write mode. The small write mode acts like conventional pipes with 30 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 31 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 32 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 33 * the receiving process can copy it directly from the pages in the sending 34 * process. 35 * 36 * If the sending process receives a signal, it is possible that it will 37 * go away, and certainly its address space can change, because control 38 * is returned back to the user-mode side. In that case, the pipe code 39 * arranges to copy the buffer supplied by the user process, to a pageable 40 * kernel buffer, and the receiving process will grab the data from the 41 * pageable kernel buffer. Since signals don't happen all that often, 42 * the copy operation is normally eliminated. 43 * 44 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 45 * happen for small transfers so that the system will not spend all of 46 * its time context switching. 47 * 48 * In order to limit the resource use of pipes, two sysctls exist: 49 * 50 * kern.ipc.maxpipekva - This is a hard limit on the amount of pageable 51 * address space available to us in pipe_map. Whenever the amount in use 52 * exceeds half of this value, all new pipes will be created with size 53 * SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited 54 * as well. This value is loader tunable only. 55 * 56 * kern.ipc.maxpipekvawired - This value limits the amount of memory that may 57 * be wired in order to facilitate direct copies using page flipping. 58 * Whenever this value is exceeded, pipes will fall back to using regular 59 * copies. This value is sysctl controllable at all times. 60 * 61 * These values are autotuned in subr_param.c. 62 * 63 * Memory usage may be monitored through the sysctls 64 * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired. 65 * 66 */ 67 68 #include <sys/cdefs.h> 69 __FBSDID("$FreeBSD$"); 70 71 #include "opt_mac.h" 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/fcntl.h> 76 #include <sys/file.h> 77 #include <sys/filedesc.h> 78 #include <sys/filio.h> 79 #include <sys/kernel.h> 80 #include <sys/lock.h> 81 #include <sys/mac.h> 82 #include <sys/mutex.h> 83 #include <sys/ttycom.h> 84 #include <sys/stat.h> 85 #include <sys/malloc.h> 86 #include <sys/poll.h> 87 #include <sys/selinfo.h> 88 #include <sys/signalvar.h> 89 #include <sys/sysctl.h> 90 #include <sys/sysproto.h> 91 #include <sys/pipe.h> 92 #include <sys/proc.h> 93 #include <sys/vnode.h> 94 #include <sys/uio.h> 95 #include <sys/event.h> 96 97 #include <vm/vm.h> 98 #include <vm/vm_param.h> 99 #include <vm/vm_object.h> 100 #include <vm/vm_kern.h> 101 #include <vm/vm_extern.h> 102 #include <vm/pmap.h> 103 #include <vm/vm_map.h> 104 #include <vm/vm_page.h> 105 #include <vm/uma.h> 106 107 /* 108 * Use this define if you want to disable *fancy* VM things. Expect an 109 * approx 30% decrease in transfer rate. This could be useful for 110 * NetBSD or OpenBSD. 111 */ 112 /* #define PIPE_NODIRECT */ 113 114 /* 115 * interfaces to the outside world 116 */ 117 static fo_rdwr_t pipe_read; 118 static fo_rdwr_t pipe_write; 119 static fo_ioctl_t pipe_ioctl; 120 static fo_poll_t pipe_poll; 121 static fo_kqfilter_t pipe_kqfilter; 122 static fo_stat_t pipe_stat; 123 static fo_close_t pipe_close; 124 125 static struct fileops pipeops = { 126 .fo_read = pipe_read, 127 .fo_write = pipe_write, 128 .fo_ioctl = pipe_ioctl, 129 .fo_poll = pipe_poll, 130 .fo_kqfilter = pipe_kqfilter, 131 .fo_stat = pipe_stat, 132 .fo_close = pipe_close, 133 .fo_flags = DFLAG_PASSABLE 134 }; 135 136 static void filt_pipedetach(struct knote *kn); 137 static int filt_piperead(struct knote *kn, long hint); 138 static int filt_pipewrite(struct knote *kn, long hint); 139 140 static struct filterops pipe_rfiltops = 141 { 1, NULL, filt_pipedetach, filt_piperead }; 142 static struct filterops pipe_wfiltops = 143 { 1, NULL, filt_pipedetach, filt_pipewrite }; 144 145 /* 146 * Default pipe buffer size(s), this can be kind-of large now because pipe 147 * space is pageable. The pipe code will try to maintain locality of 148 * reference for performance reasons, so small amounts of outstanding I/O 149 * will not wipe the cache. 150 */ 151 #define MINPIPESIZE (PIPE_SIZE/3) 152 #define MAXPIPESIZE (2*PIPE_SIZE/3) 153 154 /* 155 * Limit the number of "big" pipes 156 */ 157 #define LIMITBIGPIPES 32 158 static int nbigpipe; 159 160 static int amountpipes; 161 static int amountpipekva; 162 static int amountpipekvawired; 163 164 SYSCTL_DECL(_kern_ipc); 165 166 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RD, 167 &maxpipekva, 0, "Pipe KVA limit"); 168 SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW, 169 &maxpipekvawired, 0, "Pipe KVA wired limit"); 170 SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD, 171 &amountpipes, 0, "Current # of pipes"); 172 SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD, 173 &nbigpipe, 0, "Current # of big pipes"); 174 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD, 175 &amountpipekva, 0, "Pipe KVA usage"); 176 SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD, 177 &amountpipekvawired, 0, "Pipe wired KVA usage"); 178 179 static void pipeinit(void *dummy __unused); 180 static void pipeclose(struct pipe *cpipe); 181 static void pipe_free_kmem(struct pipe *cpipe); 182 static int pipe_create(struct pipe **cpipep); 183 static __inline int pipelock(struct pipe *cpipe, int catch); 184 static __inline void pipeunlock(struct pipe *cpipe); 185 static __inline void pipeselwakeup(struct pipe *cpipe); 186 #ifndef PIPE_NODIRECT 187 static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 188 static void pipe_destroy_write_buffer(struct pipe *wpipe); 189 static int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 190 static void pipe_clone_write_buffer(struct pipe *wpipe); 191 #endif 192 static int pipespace(struct pipe *cpipe, int size); 193 194 static uma_zone_t pipe_zone; 195 196 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 197 198 static void 199 pipeinit(void *dummy __unused) 200 { 201 202 pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL, 203 NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 204 KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 205 } 206 207 /* 208 * The pipe system call for the DTYPE_PIPE type of pipes 209 */ 210 211 /* ARGSUSED */ 212 int 213 pipe(td, uap) 214 struct thread *td; 215 struct pipe_args /* { 216 int dummy; 217 } */ *uap; 218 { 219 struct filedesc *fdp = td->td_proc->p_fd; 220 struct file *rf, *wf; 221 struct pipe *rpipe, *wpipe; 222 struct mtx *pmtx; 223 int fd, error; 224 225 pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO); 226 227 rpipe = wpipe = NULL; 228 if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 229 pipeclose(rpipe); 230 pipeclose(wpipe); 231 free(pmtx, M_TEMP); 232 return (ENFILE); 233 } 234 235 rpipe->pipe_state |= PIPE_DIRECTOK; 236 wpipe->pipe_state |= PIPE_DIRECTOK; 237 238 error = falloc(td, &rf, &fd); 239 if (error) { 240 pipeclose(rpipe); 241 pipeclose(wpipe); 242 free(pmtx, M_TEMP); 243 return (error); 244 } 245 fhold(rf); 246 td->td_retval[0] = fd; 247 248 /* 249 * Warning: once we've gotten past allocation of the fd for the 250 * read-side, we can only drop the read side via fdrop() in order 251 * to avoid races against processes which manage to dup() the read 252 * side while we are blocked trying to allocate the write side. 253 */ 254 FILE_LOCK(rf); 255 rf->f_flag = FREAD | FWRITE; 256 rf->f_type = DTYPE_PIPE; 257 rf->f_data = rpipe; 258 rf->f_ops = &pipeops; 259 FILE_UNLOCK(rf); 260 error = falloc(td, &wf, &fd); 261 if (error) { 262 FILEDESC_LOCK(fdp); 263 if (fdp->fd_ofiles[td->td_retval[0]] == rf) { 264 fdp->fd_ofiles[td->td_retval[0]] = NULL; 265 FILEDESC_UNLOCK(fdp); 266 fdrop(rf, td); 267 } else 268 FILEDESC_UNLOCK(fdp); 269 fdrop(rf, td); 270 /* rpipe has been closed by fdrop(). */ 271 pipeclose(wpipe); 272 free(pmtx, M_TEMP); 273 return (error); 274 } 275 FILE_LOCK(wf); 276 wf->f_flag = FREAD | FWRITE; 277 wf->f_type = DTYPE_PIPE; 278 wf->f_data = wpipe; 279 wf->f_ops = &pipeops; 280 FILE_UNLOCK(wf); 281 td->td_retval[1] = fd; 282 rpipe->pipe_peer = wpipe; 283 wpipe->pipe_peer = rpipe; 284 #ifdef MAC 285 /* 286 * struct pipe represents a pipe endpoint. The MAC label is shared 287 * between the connected endpoints. As a result mac_init_pipe() and 288 * mac_create_pipe() should only be called on one of the endpoints 289 * after they have been connected. 290 */ 291 mac_init_pipe(rpipe); 292 mac_create_pipe(td->td_ucred, rpipe); 293 #endif 294 mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 295 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; 296 fdrop(rf, td); 297 298 return (0); 299 } 300 301 /* 302 * Allocate kva for pipe circular buffer, the space is pageable 303 * This routine will 'realloc' the size of a pipe safely, if it fails 304 * it will retain the old buffer. 305 * If it fails it will return ENOMEM. 306 */ 307 static int 308 pipespace(cpipe, size) 309 struct pipe *cpipe; 310 int size; 311 { 312 struct vm_object *object; 313 caddr_t buffer; 314 int npages, error; 315 static int curfail = 0; 316 static struct timeval lastfail; 317 318 KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 319 ("pipespace: pipe mutex locked")); 320 321 size = round_page(size); 322 npages = size / PAGE_SIZE; 323 /* 324 * Create an object, I don't like the idea of paging to/from 325 * kernel_object. 326 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 327 */ 328 object = vm_object_allocate(OBJT_DEFAULT, npages); 329 buffer = (caddr_t) vm_map_min(pipe_map); 330 331 /* 332 * Insert the object into the kernel map, and allocate kva for it. 333 * The map entry is, by default, pageable. 334 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 335 */ 336 error = vm_map_find(pipe_map, object, 0, 337 (vm_offset_t *) &buffer, size, 1, 338 VM_PROT_ALL, VM_PROT_ALL, 0); 339 340 if (error != KERN_SUCCESS) { 341 vm_object_deallocate(object); 342 if (ppsratecheck(&lastfail, &curfail, 1)) 343 printf("kern.maxpipekva exceeded, please see tuning(7).\n"); 344 return (ENOMEM); 345 } 346 347 /* free old resources if we're resizing */ 348 pipe_free_kmem(cpipe); 349 cpipe->pipe_buffer.buffer = buffer; 350 cpipe->pipe_buffer.size = size; 351 cpipe->pipe_buffer.in = 0; 352 cpipe->pipe_buffer.out = 0; 353 cpipe->pipe_buffer.cnt = 0; 354 atomic_add_int(&amountpipes, 1); 355 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 356 return (0); 357 } 358 359 /* 360 * initialize and allocate VM and memory for pipe 361 */ 362 static int 363 pipe_create(cpipep) 364 struct pipe **cpipep; 365 { 366 struct pipe *cpipe; 367 int error; 368 369 *cpipep = uma_zalloc(pipe_zone, M_WAITOK); 370 if (*cpipep == NULL) 371 return (ENOMEM); 372 373 cpipe = *cpipep; 374 375 /* 376 * protect so pipeclose() doesn't follow a junk pointer 377 * if pipespace() fails. 378 */ 379 bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 380 cpipe->pipe_state = 0; 381 cpipe->pipe_peer = NULL; 382 cpipe->pipe_busy = 0; 383 384 #ifndef PIPE_NODIRECT 385 /* 386 * pipe data structure initializations to support direct pipe I/O 387 */ 388 cpipe->pipe_map.cnt = 0; 389 cpipe->pipe_map.kva = 0; 390 cpipe->pipe_map.pos = 0; 391 cpipe->pipe_map.npages = 0; 392 /* cpipe->pipe_map.ms[] = invalid */ 393 #endif 394 395 cpipe->pipe_mtxp = NULL; /* avoid pipespace assertion */ 396 /* 397 * Reduce to 1/4th pipe size if we're over our global max. 398 */ 399 if (amountpipekva > maxpipekva / 2) 400 error = pipespace(cpipe, SMALL_PIPE_SIZE); 401 else 402 error = pipespace(cpipe, PIPE_SIZE); 403 if (error) 404 return (error); 405 406 vfs_timestamp(&cpipe->pipe_ctime); 407 cpipe->pipe_atime = cpipe->pipe_ctime; 408 cpipe->pipe_mtime = cpipe->pipe_ctime; 409 410 return (0); 411 } 412 413 414 /* 415 * lock a pipe for I/O, blocking other access 416 */ 417 static __inline int 418 pipelock(cpipe, catch) 419 struct pipe *cpipe; 420 int catch; 421 { 422 int error; 423 424 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 425 while (cpipe->pipe_state & PIPE_LOCKFL) { 426 cpipe->pipe_state |= PIPE_LWANT; 427 error = msleep(cpipe, PIPE_MTX(cpipe), 428 catch ? (PRIBIO | PCATCH) : PRIBIO, 429 "pipelk", 0); 430 if (error != 0) 431 return (error); 432 } 433 cpipe->pipe_state |= PIPE_LOCKFL; 434 return (0); 435 } 436 437 /* 438 * unlock a pipe I/O lock 439 */ 440 static __inline void 441 pipeunlock(cpipe) 442 struct pipe *cpipe; 443 { 444 445 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 446 cpipe->pipe_state &= ~PIPE_LOCKFL; 447 if (cpipe->pipe_state & PIPE_LWANT) { 448 cpipe->pipe_state &= ~PIPE_LWANT; 449 wakeup(cpipe); 450 } 451 } 452 453 static __inline void 454 pipeselwakeup(cpipe) 455 struct pipe *cpipe; 456 { 457 458 if (cpipe->pipe_state & PIPE_SEL) { 459 cpipe->pipe_state &= ~PIPE_SEL; 460 selwakeup(&cpipe->pipe_sel); 461 } 462 if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 463 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 464 KNOTE(&cpipe->pipe_sel.si_note, 0); 465 } 466 467 /* ARGSUSED */ 468 static int 469 pipe_read(fp, uio, active_cred, flags, td) 470 struct file *fp; 471 struct uio *uio; 472 struct ucred *active_cred; 473 struct thread *td; 474 int flags; 475 { 476 struct pipe *rpipe = fp->f_data; 477 int error; 478 int nread = 0; 479 u_int size; 480 481 PIPE_LOCK(rpipe); 482 ++rpipe->pipe_busy; 483 error = pipelock(rpipe, 1); 484 if (error) 485 goto unlocked_error; 486 487 #ifdef MAC 488 error = mac_check_pipe_read(active_cred, rpipe); 489 if (error) 490 goto locked_error; 491 #endif 492 493 while (uio->uio_resid) { 494 /* 495 * normal pipe buffer receive 496 */ 497 if (rpipe->pipe_buffer.cnt > 0) { 498 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 499 if (size > rpipe->pipe_buffer.cnt) 500 size = rpipe->pipe_buffer.cnt; 501 if (size > (u_int) uio->uio_resid) 502 size = (u_int) uio->uio_resid; 503 504 PIPE_UNLOCK(rpipe); 505 error = uiomove( 506 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 507 size, uio); 508 PIPE_LOCK(rpipe); 509 if (error) 510 break; 511 512 rpipe->pipe_buffer.out += size; 513 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 514 rpipe->pipe_buffer.out = 0; 515 516 rpipe->pipe_buffer.cnt -= size; 517 518 /* 519 * If there is no more to read in the pipe, reset 520 * its pointers to the beginning. This improves 521 * cache hit stats. 522 */ 523 if (rpipe->pipe_buffer.cnt == 0) { 524 rpipe->pipe_buffer.in = 0; 525 rpipe->pipe_buffer.out = 0; 526 } 527 nread += size; 528 #ifndef PIPE_NODIRECT 529 /* 530 * Direct copy, bypassing a kernel buffer. 531 */ 532 } else if ((size = rpipe->pipe_map.cnt) && 533 (rpipe->pipe_state & PIPE_DIRECTW)) { 534 caddr_t va; 535 if (size > (u_int) uio->uio_resid) 536 size = (u_int) uio->uio_resid; 537 538 va = (caddr_t) rpipe->pipe_map.kva + 539 rpipe->pipe_map.pos; 540 PIPE_UNLOCK(rpipe); 541 error = uiomove(va, size, uio); 542 PIPE_LOCK(rpipe); 543 if (error) 544 break; 545 nread += size; 546 rpipe->pipe_map.pos += size; 547 rpipe->pipe_map.cnt -= size; 548 if (rpipe->pipe_map.cnt == 0) { 549 rpipe->pipe_state &= ~PIPE_DIRECTW; 550 wakeup(rpipe); 551 } 552 #endif 553 } else { 554 /* 555 * detect EOF condition 556 * read returns 0 on EOF, no need to set error 557 */ 558 if (rpipe->pipe_state & PIPE_EOF) 559 break; 560 561 /* 562 * If the "write-side" has been blocked, wake it up now. 563 */ 564 if (rpipe->pipe_state & PIPE_WANTW) { 565 rpipe->pipe_state &= ~PIPE_WANTW; 566 wakeup(rpipe); 567 } 568 569 /* 570 * Break if some data was read. 571 */ 572 if (nread > 0) 573 break; 574 575 /* 576 * Unlock the pipe buffer for our remaining processing. 577 * We will either break out with an error or we will 578 * sleep and relock to loop. 579 */ 580 pipeunlock(rpipe); 581 582 /* 583 * Handle non-blocking mode operation or 584 * wait for more data. 585 */ 586 if (fp->f_flag & FNONBLOCK) { 587 error = EAGAIN; 588 } else { 589 rpipe->pipe_state |= PIPE_WANTR; 590 if ((error = msleep(rpipe, PIPE_MTX(rpipe), 591 PRIBIO | PCATCH, 592 "piperd", 0)) == 0) 593 error = pipelock(rpipe, 1); 594 } 595 if (error) 596 goto unlocked_error; 597 } 598 } 599 #ifdef MAC 600 locked_error: 601 #endif 602 pipeunlock(rpipe); 603 604 /* XXX: should probably do this before getting any locks. */ 605 if (error == 0) 606 vfs_timestamp(&rpipe->pipe_atime); 607 unlocked_error: 608 --rpipe->pipe_busy; 609 610 /* 611 * PIPE_WANT processing only makes sense if pipe_busy is 0. 612 */ 613 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 614 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 615 wakeup(rpipe); 616 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 617 /* 618 * Handle write blocking hysteresis. 619 */ 620 if (rpipe->pipe_state & PIPE_WANTW) { 621 rpipe->pipe_state &= ~PIPE_WANTW; 622 wakeup(rpipe); 623 } 624 } 625 626 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 627 pipeselwakeup(rpipe); 628 629 PIPE_UNLOCK(rpipe); 630 return (error); 631 } 632 633 #ifndef PIPE_NODIRECT 634 /* 635 * Map the sending processes' buffer into kernel space and wire it. 636 * This is similar to a physical write operation. 637 */ 638 static int 639 pipe_build_write_buffer(wpipe, uio) 640 struct pipe *wpipe; 641 struct uio *uio; 642 { 643 pmap_t pmap; 644 u_int size; 645 int i, j; 646 vm_offset_t addr, endaddr; 647 648 PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 649 650 size = (u_int) uio->uio_iov->iov_len; 651 if (size > wpipe->pipe_buffer.size) 652 size = wpipe->pipe_buffer.size; 653 654 pmap = vmspace_pmap(curproc->p_vmspace); 655 endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 656 addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 657 for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 658 /* 659 * vm_fault_quick() can sleep. Consequently, 660 * vm_page_lock_queue() and vm_page_unlock_queue() 661 * should not be performed outside of this loop. 662 */ 663 race: 664 if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0) { 665 vm_page_lock_queues(); 666 for (j = 0; j < i; j++) 667 vm_page_unhold(wpipe->pipe_map.ms[j]); 668 vm_page_unlock_queues(); 669 return (EFAULT); 670 } 671 wpipe->pipe_map.ms[i] = pmap_extract_and_hold(pmap, addr, 672 VM_PROT_READ); 673 if (wpipe->pipe_map.ms[i] == NULL) 674 goto race; 675 } 676 677 /* 678 * set up the control block 679 */ 680 wpipe->pipe_map.npages = i; 681 wpipe->pipe_map.pos = 682 ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 683 wpipe->pipe_map.cnt = size; 684 685 /* 686 * and map the buffer 687 */ 688 if (wpipe->pipe_map.kva == 0) { 689 /* 690 * We need to allocate space for an extra page because the 691 * address range might (will) span pages at times. 692 */ 693 wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map, 694 wpipe->pipe_buffer.size + PAGE_SIZE); 695 atomic_add_int(&amountpipekvawired, 696 wpipe->pipe_buffer.size + PAGE_SIZE); 697 } 698 pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 699 wpipe->pipe_map.npages); 700 701 /* 702 * and update the uio data 703 */ 704 705 uio->uio_iov->iov_len -= size; 706 uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 707 if (uio->uio_iov->iov_len == 0) 708 uio->uio_iov++; 709 uio->uio_resid -= size; 710 uio->uio_offset += size; 711 return (0); 712 } 713 714 /* 715 * unmap and unwire the process buffer 716 */ 717 static void 718 pipe_destroy_write_buffer(wpipe) 719 struct pipe *wpipe; 720 { 721 int i; 722 723 PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 724 if (wpipe->pipe_map.kva) { 725 pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 726 727 if (amountpipekvawired > maxpipekvawired / 2) { 728 /* Conserve address space */ 729 vm_offset_t kva = wpipe->pipe_map.kva; 730 wpipe->pipe_map.kva = 0; 731 kmem_free(kernel_map, kva, 732 wpipe->pipe_buffer.size + PAGE_SIZE); 733 atomic_subtract_int(&amountpipekvawired, 734 wpipe->pipe_buffer.size + PAGE_SIZE); 735 } 736 } 737 vm_page_lock_queues(); 738 for (i = 0; i < wpipe->pipe_map.npages; i++) { 739 vm_page_unhold(wpipe->pipe_map.ms[i]); 740 } 741 vm_page_unlock_queues(); 742 wpipe->pipe_map.npages = 0; 743 } 744 745 /* 746 * In the case of a signal, the writing process might go away. This 747 * code copies the data into the circular buffer so that the source 748 * pages can be freed without loss of data. 749 */ 750 static void 751 pipe_clone_write_buffer(wpipe) 752 struct pipe *wpipe; 753 { 754 int size; 755 int pos; 756 757 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 758 size = wpipe->pipe_map.cnt; 759 pos = wpipe->pipe_map.pos; 760 761 wpipe->pipe_buffer.in = size; 762 wpipe->pipe_buffer.out = 0; 763 wpipe->pipe_buffer.cnt = size; 764 wpipe->pipe_state &= ~PIPE_DIRECTW; 765 766 PIPE_UNLOCK(wpipe); 767 bcopy((caddr_t) wpipe->pipe_map.kva + pos, 768 wpipe->pipe_buffer.buffer, size); 769 pipe_destroy_write_buffer(wpipe); 770 PIPE_LOCK(wpipe); 771 } 772 773 /* 774 * This implements the pipe buffer write mechanism. Note that only 775 * a direct write OR a normal pipe write can be pending at any given time. 776 * If there are any characters in the pipe buffer, the direct write will 777 * be deferred until the receiving process grabs all of the bytes from 778 * the pipe buffer. Then the direct mapping write is set-up. 779 */ 780 static int 781 pipe_direct_write(wpipe, uio) 782 struct pipe *wpipe; 783 struct uio *uio; 784 { 785 int error; 786 787 retry: 788 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 789 while (wpipe->pipe_state & PIPE_DIRECTW) { 790 if (wpipe->pipe_state & PIPE_WANTR) { 791 wpipe->pipe_state &= ~PIPE_WANTR; 792 wakeup(wpipe); 793 } 794 wpipe->pipe_state |= PIPE_WANTW; 795 error = msleep(wpipe, PIPE_MTX(wpipe), 796 PRIBIO | PCATCH, "pipdww", 0); 797 if (error) 798 goto error1; 799 if (wpipe->pipe_state & PIPE_EOF) { 800 error = EPIPE; 801 goto error1; 802 } 803 } 804 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 805 if (wpipe->pipe_buffer.cnt > 0) { 806 if (wpipe->pipe_state & PIPE_WANTR) { 807 wpipe->pipe_state &= ~PIPE_WANTR; 808 wakeup(wpipe); 809 } 810 811 wpipe->pipe_state |= PIPE_WANTW; 812 error = msleep(wpipe, PIPE_MTX(wpipe), 813 PRIBIO | PCATCH, "pipdwc", 0); 814 if (error) 815 goto error1; 816 if (wpipe->pipe_state & PIPE_EOF) { 817 error = EPIPE; 818 goto error1; 819 } 820 goto retry; 821 } 822 823 wpipe->pipe_state |= PIPE_DIRECTW; 824 825 pipelock(wpipe, 0); 826 PIPE_UNLOCK(wpipe); 827 error = pipe_build_write_buffer(wpipe, uio); 828 PIPE_LOCK(wpipe); 829 pipeunlock(wpipe); 830 if (error) { 831 wpipe->pipe_state &= ~PIPE_DIRECTW; 832 goto error1; 833 } 834 835 error = 0; 836 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 837 if (wpipe->pipe_state & PIPE_EOF) { 838 pipelock(wpipe, 0); 839 PIPE_UNLOCK(wpipe); 840 pipe_destroy_write_buffer(wpipe); 841 PIPE_LOCK(wpipe); 842 pipeselwakeup(wpipe); 843 pipeunlock(wpipe); 844 error = EPIPE; 845 goto error1; 846 } 847 if (wpipe->pipe_state & PIPE_WANTR) { 848 wpipe->pipe_state &= ~PIPE_WANTR; 849 wakeup(wpipe); 850 } 851 pipeselwakeup(wpipe); 852 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 853 "pipdwt", 0); 854 } 855 856 pipelock(wpipe,0); 857 if (wpipe->pipe_state & PIPE_DIRECTW) { 858 /* 859 * this bit of trickery substitutes a kernel buffer for 860 * the process that might be going away. 861 */ 862 pipe_clone_write_buffer(wpipe); 863 } else { 864 PIPE_UNLOCK(wpipe); 865 pipe_destroy_write_buffer(wpipe); 866 PIPE_LOCK(wpipe); 867 } 868 pipeunlock(wpipe); 869 return (error); 870 871 error1: 872 wakeup(wpipe); 873 return (error); 874 } 875 #endif 876 877 static int 878 pipe_write(fp, uio, active_cred, flags, td) 879 struct file *fp; 880 struct uio *uio; 881 struct ucred *active_cred; 882 struct thread *td; 883 int flags; 884 { 885 int error = 0; 886 int orig_resid; 887 struct pipe *wpipe, *rpipe; 888 889 rpipe = fp->f_data; 890 wpipe = rpipe->pipe_peer; 891 892 PIPE_LOCK(rpipe); 893 /* 894 * detect loss of pipe read side, issue SIGPIPE if lost. 895 */ 896 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 897 PIPE_UNLOCK(rpipe); 898 return (EPIPE); 899 } 900 #ifdef MAC 901 error = mac_check_pipe_write(active_cred, wpipe); 902 if (error) { 903 PIPE_UNLOCK(rpipe); 904 return (error); 905 } 906 #endif 907 ++wpipe->pipe_busy; 908 909 /* 910 * If it is advantageous to resize the pipe buffer, do 911 * so. 912 */ 913 if ((uio->uio_resid > PIPE_SIZE) && 914 (amountpipekva < maxpipekva / 2) && 915 (nbigpipe < LIMITBIGPIPES) && 916 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 917 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 918 (wpipe->pipe_buffer.cnt == 0)) { 919 920 if ((error = pipelock(wpipe, 1)) == 0) { 921 PIPE_UNLOCK(wpipe); 922 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 923 atomic_add_int(&nbigpipe, 1); 924 PIPE_LOCK(wpipe); 925 pipeunlock(wpipe); 926 } 927 } 928 929 /* 930 * If an early error occured unbusy and return, waking up any pending 931 * readers. 932 */ 933 if (error) { 934 --wpipe->pipe_busy; 935 if ((wpipe->pipe_busy == 0) && 936 (wpipe->pipe_state & PIPE_WANT)) { 937 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 938 wakeup(wpipe); 939 } 940 PIPE_UNLOCK(rpipe); 941 return(error); 942 } 943 944 orig_resid = uio->uio_resid; 945 946 while (uio->uio_resid) { 947 int space; 948 949 #ifndef PIPE_NODIRECT 950 /* 951 * If the transfer is large, we can gain performance if 952 * we do process-to-process copies directly. 953 * If the write is non-blocking, we don't use the 954 * direct write mechanism. 955 * 956 * The direct write mechanism will detect the reader going 957 * away on us. 958 */ 959 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 960 (fp->f_flag & FNONBLOCK) == 0 && 961 amountpipekvawired + uio->uio_resid < maxpipekvawired) { 962 error = pipe_direct_write(wpipe, uio); 963 if (error) 964 break; 965 continue; 966 } 967 #endif 968 969 /* 970 * Pipe buffered writes cannot be coincidental with 971 * direct writes. We wait until the currently executing 972 * direct write is completed before we start filling the 973 * pipe buffer. We break out if a signal occurs or the 974 * reader goes away. 975 */ 976 retrywrite: 977 while (wpipe->pipe_state & PIPE_DIRECTW) { 978 if (wpipe->pipe_state & PIPE_WANTR) { 979 wpipe->pipe_state &= ~PIPE_WANTR; 980 wakeup(wpipe); 981 } 982 error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 983 "pipbww", 0); 984 if (wpipe->pipe_state & PIPE_EOF) 985 break; 986 if (error) 987 break; 988 } 989 if (wpipe->pipe_state & PIPE_EOF) { 990 error = EPIPE; 991 break; 992 } 993 994 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 995 996 /* Writes of size <= PIPE_BUF must be atomic. */ 997 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 998 space = 0; 999 1000 if (space > 0) { 1001 if ((error = pipelock(wpipe,1)) == 0) { 1002 int size; /* Transfer size */ 1003 int segsize; /* first segment to transfer */ 1004 1005 /* 1006 * It is possible for a direct write to 1007 * slip in on us... handle it here... 1008 */ 1009 if (wpipe->pipe_state & PIPE_DIRECTW) { 1010 pipeunlock(wpipe); 1011 goto retrywrite; 1012 } 1013 /* 1014 * If a process blocked in uiomove, our 1015 * value for space might be bad. 1016 * 1017 * XXX will we be ok if the reader has gone 1018 * away here? 1019 */ 1020 if (space > wpipe->pipe_buffer.size - 1021 wpipe->pipe_buffer.cnt) { 1022 pipeunlock(wpipe); 1023 goto retrywrite; 1024 } 1025 1026 /* 1027 * Transfer size is minimum of uio transfer 1028 * and free space in pipe buffer. 1029 */ 1030 if (space > uio->uio_resid) 1031 size = uio->uio_resid; 1032 else 1033 size = space; 1034 /* 1035 * First segment to transfer is minimum of 1036 * transfer size and contiguous space in 1037 * pipe buffer. If first segment to transfer 1038 * is less than the transfer size, we've got 1039 * a wraparound in the buffer. 1040 */ 1041 segsize = wpipe->pipe_buffer.size - 1042 wpipe->pipe_buffer.in; 1043 if (segsize > size) 1044 segsize = size; 1045 1046 /* Transfer first segment */ 1047 1048 PIPE_UNLOCK(rpipe); 1049 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1050 segsize, uio); 1051 PIPE_LOCK(rpipe); 1052 1053 if (error == 0 && segsize < size) { 1054 /* 1055 * Transfer remaining part now, to 1056 * support atomic writes. Wraparound 1057 * happened. 1058 */ 1059 if (wpipe->pipe_buffer.in + segsize != 1060 wpipe->pipe_buffer.size) 1061 panic("Expected pipe buffer " 1062 "wraparound disappeared"); 1063 1064 PIPE_UNLOCK(rpipe); 1065 error = uiomove( 1066 &wpipe->pipe_buffer.buffer[0], 1067 size - segsize, uio); 1068 PIPE_LOCK(rpipe); 1069 } 1070 if (error == 0) { 1071 wpipe->pipe_buffer.in += size; 1072 if (wpipe->pipe_buffer.in >= 1073 wpipe->pipe_buffer.size) { 1074 if (wpipe->pipe_buffer.in != 1075 size - segsize + 1076 wpipe->pipe_buffer.size) 1077 panic("Expected " 1078 "wraparound bad"); 1079 wpipe->pipe_buffer.in = size - 1080 segsize; 1081 } 1082 1083 wpipe->pipe_buffer.cnt += size; 1084 if (wpipe->pipe_buffer.cnt > 1085 wpipe->pipe_buffer.size) 1086 panic("Pipe buffer overflow"); 1087 1088 } 1089 pipeunlock(wpipe); 1090 } 1091 if (error) 1092 break; 1093 1094 } else { 1095 /* 1096 * If the "read-side" has been blocked, wake it up now. 1097 */ 1098 if (wpipe->pipe_state & PIPE_WANTR) { 1099 wpipe->pipe_state &= ~PIPE_WANTR; 1100 wakeup(wpipe); 1101 } 1102 1103 /* 1104 * don't block on non-blocking I/O 1105 */ 1106 if (fp->f_flag & FNONBLOCK) { 1107 error = EAGAIN; 1108 break; 1109 } 1110 1111 /* 1112 * We have no more space and have something to offer, 1113 * wake up select/poll. 1114 */ 1115 pipeselwakeup(wpipe); 1116 1117 wpipe->pipe_state |= PIPE_WANTW; 1118 error = msleep(wpipe, PIPE_MTX(rpipe), 1119 PRIBIO | PCATCH, "pipewr", 0); 1120 if (error != 0) 1121 break; 1122 /* 1123 * If read side wants to go away, we just issue a signal 1124 * to ourselves. 1125 */ 1126 if (wpipe->pipe_state & PIPE_EOF) { 1127 error = EPIPE; 1128 break; 1129 } 1130 } 1131 } 1132 1133 --wpipe->pipe_busy; 1134 1135 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1136 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1137 wakeup(wpipe); 1138 } else if (wpipe->pipe_buffer.cnt > 0) { 1139 /* 1140 * If we have put any characters in the buffer, we wake up 1141 * the reader. 1142 */ 1143 if (wpipe->pipe_state & PIPE_WANTR) { 1144 wpipe->pipe_state &= ~PIPE_WANTR; 1145 wakeup(wpipe); 1146 } 1147 } 1148 1149 /* 1150 * Don't return EPIPE if I/O was successful 1151 */ 1152 if ((wpipe->pipe_buffer.cnt == 0) && 1153 (uio->uio_resid == 0) && 1154 (error == EPIPE)) { 1155 error = 0; 1156 } 1157 1158 if (error == 0) 1159 vfs_timestamp(&wpipe->pipe_mtime); 1160 1161 /* 1162 * We have something to offer, 1163 * wake up select/poll. 1164 */ 1165 if (wpipe->pipe_buffer.cnt) 1166 pipeselwakeup(wpipe); 1167 1168 PIPE_UNLOCK(rpipe); 1169 return (error); 1170 } 1171 1172 /* 1173 * we implement a very minimal set of ioctls for compatibility with sockets. 1174 */ 1175 static int 1176 pipe_ioctl(fp, cmd, data, active_cred, td) 1177 struct file *fp; 1178 u_long cmd; 1179 void *data; 1180 struct ucred *active_cred; 1181 struct thread *td; 1182 { 1183 struct pipe *mpipe = fp->f_data; 1184 #ifdef MAC 1185 int error; 1186 #endif 1187 1188 PIPE_LOCK(mpipe); 1189 1190 #ifdef MAC 1191 error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data); 1192 if (error) 1193 return (error); 1194 #endif 1195 1196 switch (cmd) { 1197 1198 case FIONBIO: 1199 PIPE_UNLOCK(mpipe); 1200 return (0); 1201 1202 case FIOASYNC: 1203 if (*(int *)data) { 1204 mpipe->pipe_state |= PIPE_ASYNC; 1205 } else { 1206 mpipe->pipe_state &= ~PIPE_ASYNC; 1207 } 1208 PIPE_UNLOCK(mpipe); 1209 return (0); 1210 1211 case FIONREAD: 1212 if (mpipe->pipe_state & PIPE_DIRECTW) 1213 *(int *)data = mpipe->pipe_map.cnt; 1214 else 1215 *(int *)data = mpipe->pipe_buffer.cnt; 1216 PIPE_UNLOCK(mpipe); 1217 return (0); 1218 1219 case FIOSETOWN: 1220 PIPE_UNLOCK(mpipe); 1221 return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 1222 1223 case FIOGETOWN: 1224 PIPE_UNLOCK(mpipe); 1225 *(int *)data = fgetown(&mpipe->pipe_sigio); 1226 return (0); 1227 1228 /* This is deprecated, FIOSETOWN should be used instead. */ 1229 case TIOCSPGRP: 1230 PIPE_UNLOCK(mpipe); 1231 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1232 1233 /* This is deprecated, FIOGETOWN should be used instead. */ 1234 case TIOCGPGRP: 1235 PIPE_UNLOCK(mpipe); 1236 *(int *)data = -fgetown(&mpipe->pipe_sigio); 1237 return (0); 1238 1239 } 1240 PIPE_UNLOCK(mpipe); 1241 return (ENOTTY); 1242 } 1243 1244 static int 1245 pipe_poll(fp, events, active_cred, td) 1246 struct file *fp; 1247 int events; 1248 struct ucred *active_cred; 1249 struct thread *td; 1250 { 1251 struct pipe *rpipe = fp->f_data; 1252 struct pipe *wpipe; 1253 int revents = 0; 1254 #ifdef MAC 1255 int error; 1256 #endif 1257 1258 wpipe = rpipe->pipe_peer; 1259 PIPE_LOCK(rpipe); 1260 #ifdef MAC 1261 error = mac_check_pipe_poll(active_cred, rpipe); 1262 if (error) 1263 goto locked_error; 1264 #endif 1265 if (events & (POLLIN | POLLRDNORM)) 1266 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1267 (rpipe->pipe_buffer.cnt > 0) || 1268 (rpipe->pipe_state & PIPE_EOF)) 1269 revents |= events & (POLLIN | POLLRDNORM); 1270 1271 if (events & (POLLOUT | POLLWRNORM)) 1272 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 1273 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1274 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1275 revents |= events & (POLLOUT | POLLWRNORM); 1276 1277 if ((rpipe->pipe_state & PIPE_EOF) || 1278 (wpipe == NULL) || 1279 (wpipe->pipe_state & PIPE_EOF)) 1280 revents |= POLLHUP; 1281 1282 if (revents == 0) { 1283 if (events & (POLLIN | POLLRDNORM)) { 1284 selrecord(td, &rpipe->pipe_sel); 1285 rpipe->pipe_state |= PIPE_SEL; 1286 } 1287 1288 if (events & (POLLOUT | POLLWRNORM)) { 1289 selrecord(td, &wpipe->pipe_sel); 1290 wpipe->pipe_state |= PIPE_SEL; 1291 } 1292 } 1293 #ifdef MAC 1294 locked_error: 1295 #endif 1296 PIPE_UNLOCK(rpipe); 1297 1298 return (revents); 1299 } 1300 1301 /* 1302 * We shouldn't need locks here as we're doing a read and this should 1303 * be a natural race. 1304 */ 1305 static int 1306 pipe_stat(fp, ub, active_cred, td) 1307 struct file *fp; 1308 struct stat *ub; 1309 struct ucred *active_cred; 1310 struct thread *td; 1311 { 1312 struct pipe *pipe = fp->f_data; 1313 #ifdef MAC 1314 int error; 1315 1316 PIPE_LOCK(pipe); 1317 error = mac_check_pipe_stat(active_cred, pipe); 1318 PIPE_UNLOCK(pipe); 1319 if (error) 1320 return (error); 1321 #endif 1322 bzero(ub, sizeof(*ub)); 1323 ub->st_mode = S_IFIFO; 1324 ub->st_blksize = pipe->pipe_buffer.size; 1325 ub->st_size = pipe->pipe_buffer.cnt; 1326 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1327 ub->st_atimespec = pipe->pipe_atime; 1328 ub->st_mtimespec = pipe->pipe_mtime; 1329 ub->st_ctimespec = pipe->pipe_ctime; 1330 ub->st_uid = fp->f_cred->cr_uid; 1331 ub->st_gid = fp->f_cred->cr_gid; 1332 /* 1333 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1334 * XXX (st_dev, st_ino) should be unique. 1335 */ 1336 return (0); 1337 } 1338 1339 /* ARGSUSED */ 1340 static int 1341 pipe_close(fp, td) 1342 struct file *fp; 1343 struct thread *td; 1344 { 1345 struct pipe *cpipe = fp->f_data; 1346 1347 fp->f_ops = &badfileops; 1348 fp->f_data = NULL; 1349 funsetown(&cpipe->pipe_sigio); 1350 pipeclose(cpipe); 1351 return (0); 1352 } 1353 1354 static void 1355 pipe_free_kmem(cpipe) 1356 struct pipe *cpipe; 1357 { 1358 1359 KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 1360 ("pipespace: pipe mutex locked")); 1361 1362 if (cpipe->pipe_buffer.buffer != NULL) { 1363 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1364 atomic_subtract_int(&nbigpipe, 1); 1365 atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size); 1366 atomic_subtract_int(&amountpipes, 1); 1367 vm_map_remove(pipe_map, 1368 (vm_offset_t)cpipe->pipe_buffer.buffer, 1369 (vm_offset_t)cpipe->pipe_buffer.buffer + cpipe->pipe_buffer.size); 1370 cpipe->pipe_buffer.buffer = NULL; 1371 } 1372 #ifndef PIPE_NODIRECT 1373 if (cpipe->pipe_map.kva != 0) { 1374 atomic_subtract_int(&amountpipekvawired, 1375 cpipe->pipe_buffer.size + PAGE_SIZE); 1376 kmem_free(kernel_map, 1377 cpipe->pipe_map.kva, 1378 cpipe->pipe_buffer.size + PAGE_SIZE); 1379 cpipe->pipe_map.cnt = 0; 1380 cpipe->pipe_map.kva = 0; 1381 cpipe->pipe_map.pos = 0; 1382 cpipe->pipe_map.npages = 0; 1383 } 1384 #endif 1385 } 1386 1387 /* 1388 * shutdown the pipe 1389 */ 1390 static void 1391 pipeclose(cpipe) 1392 struct pipe *cpipe; 1393 { 1394 struct pipe *ppipe; 1395 int hadpeer; 1396 1397 if (cpipe == NULL) 1398 return; 1399 1400 hadpeer = 0; 1401 1402 /* partially created pipes won't have a valid mutex. */ 1403 if (PIPE_MTX(cpipe) != NULL) 1404 PIPE_LOCK(cpipe); 1405 1406 pipeselwakeup(cpipe); 1407 1408 /* 1409 * If the other side is blocked, wake it up saying that 1410 * we want to close it down. 1411 */ 1412 while (cpipe->pipe_busy) { 1413 wakeup(cpipe); 1414 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 1415 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1416 } 1417 1418 #ifdef MAC 1419 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) 1420 mac_destroy_pipe(cpipe); 1421 #endif 1422 1423 /* 1424 * Disconnect from peer 1425 */ 1426 if ((ppipe = cpipe->pipe_peer) != NULL) { 1427 hadpeer++; 1428 pipeselwakeup(ppipe); 1429 1430 ppipe->pipe_state |= PIPE_EOF; 1431 wakeup(ppipe); 1432 KNOTE(&ppipe->pipe_sel.si_note, 0); 1433 ppipe->pipe_peer = NULL; 1434 } 1435 /* 1436 * free resources 1437 */ 1438 if (PIPE_MTX(cpipe) != NULL) { 1439 PIPE_UNLOCK(cpipe); 1440 if (!hadpeer) { 1441 mtx_destroy(PIPE_MTX(cpipe)); 1442 free(PIPE_MTX(cpipe), M_TEMP); 1443 } 1444 } 1445 pipe_free_kmem(cpipe); 1446 uma_zfree(pipe_zone, cpipe); 1447 } 1448 1449 /*ARGSUSED*/ 1450 static int 1451 pipe_kqfilter(struct file *fp, struct knote *kn) 1452 { 1453 struct pipe *cpipe; 1454 1455 cpipe = kn->kn_fp->f_data; 1456 switch (kn->kn_filter) { 1457 case EVFILT_READ: 1458 kn->kn_fop = &pipe_rfiltops; 1459 break; 1460 case EVFILT_WRITE: 1461 kn->kn_fop = &pipe_wfiltops; 1462 cpipe = cpipe->pipe_peer; 1463 if (cpipe == NULL) 1464 /* other end of pipe has been closed */ 1465 return (EPIPE); 1466 break; 1467 default: 1468 return (1); 1469 } 1470 kn->kn_hook = cpipe; 1471 1472 PIPE_LOCK(cpipe); 1473 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1474 PIPE_UNLOCK(cpipe); 1475 return (0); 1476 } 1477 1478 static void 1479 filt_pipedetach(struct knote *kn) 1480 { 1481 struct pipe *cpipe = (struct pipe *)kn->kn_hook; 1482 1483 PIPE_LOCK(cpipe); 1484 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1485 PIPE_UNLOCK(cpipe); 1486 } 1487 1488 /*ARGSUSED*/ 1489 static int 1490 filt_piperead(struct knote *kn, long hint) 1491 { 1492 struct pipe *rpipe = kn->kn_fp->f_data; 1493 struct pipe *wpipe = rpipe->pipe_peer; 1494 1495 PIPE_LOCK(rpipe); 1496 kn->kn_data = rpipe->pipe_buffer.cnt; 1497 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1498 kn->kn_data = rpipe->pipe_map.cnt; 1499 1500 if ((rpipe->pipe_state & PIPE_EOF) || 1501 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1502 kn->kn_flags |= EV_EOF; 1503 PIPE_UNLOCK(rpipe); 1504 return (1); 1505 } 1506 PIPE_UNLOCK(rpipe); 1507 return (kn->kn_data > 0); 1508 } 1509 1510 /*ARGSUSED*/ 1511 static int 1512 filt_pipewrite(struct knote *kn, long hint) 1513 { 1514 struct pipe *rpipe = kn->kn_fp->f_data; 1515 struct pipe *wpipe = rpipe->pipe_peer; 1516 1517 PIPE_LOCK(rpipe); 1518 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1519 kn->kn_data = 0; 1520 kn->kn_flags |= EV_EOF; 1521 PIPE_UNLOCK(rpipe); 1522 return (1); 1523 } 1524 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1525 if (wpipe->pipe_state & PIPE_DIRECTW) 1526 kn->kn_data = 0; 1527 1528 PIPE_UNLOCK(rpipe); 1529 return (kn->kn_data >= PIPE_BUF); 1530 } 1531