1 /* 2 * Copyright (c) 1996 John S. Dyson 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice immediately at the beginning of the file, without modification, 10 * this list of conditions, and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. Absolutely no warranty of function or purpose is made by the author 15 * John S. Dyson. 16 * 4. Modifications may be freely made to this file if the above conditions 17 * are met. 18 */ 19 20 /* 21 * This file contains a high-performance replacement for the socket-based 22 * pipes scheme originally used in FreeBSD/4.4Lite. It does not support 23 * all features of sockets, but does do everything that pipes normally 24 * do. 25 */ 26 27 /* 28 * This code has two modes of operation, a small write mode and a large 29 * write mode. The small write mode acts like conventional pipes with 30 * a kernel buffer. If the buffer is less than PIPE_MINDIRECT, then the 31 * "normal" pipe buffering is done. If the buffer is between PIPE_MINDIRECT 32 * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and 33 * the receiving process can copy it directly from the pages in the sending 34 * process. 35 * 36 * If the sending process receives a signal, it is possible that it will 37 * go away, and certainly its address space can change, because control 38 * is returned back to the user-mode side. In that case, the pipe code 39 * arranges to copy the buffer supplied by the user process, to a pageable 40 * kernel buffer, and the receiving process will grab the data from the 41 * pageable kernel buffer. Since signals don't happen all that often, 42 * the copy operation is normally eliminated. 43 * 44 * The constant PIPE_MINDIRECT is chosen to make sure that buffering will 45 * happen for small transfers so that the system will not spend all of 46 * its time context switching. PIPE_SIZE is constrained by the 47 * amount of kernel virtual memory. 48 */ 49 50 #include <sys/cdefs.h> 51 __FBSDID("$FreeBSD$"); 52 53 #include "opt_mac.h" 54 55 #include <sys/param.h> 56 #include <sys/systm.h> 57 #include <sys/fcntl.h> 58 #include <sys/file.h> 59 #include <sys/filedesc.h> 60 #include <sys/filio.h> 61 #include <sys/kernel.h> 62 #include <sys/lock.h> 63 #include <sys/mac.h> 64 #include <sys/mutex.h> 65 #include <sys/ttycom.h> 66 #include <sys/stat.h> 67 #include <sys/malloc.h> 68 #include <sys/poll.h> 69 #include <sys/selinfo.h> 70 #include <sys/signalvar.h> 71 #include <sys/sysproto.h> 72 #include <sys/pipe.h> 73 #include <sys/proc.h> 74 #include <sys/vnode.h> 75 #include <sys/uio.h> 76 #include <sys/event.h> 77 78 #include <vm/vm.h> 79 #include <vm/vm_param.h> 80 #include <vm/vm_object.h> 81 #include <vm/vm_kern.h> 82 #include <vm/vm_extern.h> 83 #include <vm/pmap.h> 84 #include <vm/vm_map.h> 85 #include <vm/vm_page.h> 86 #include <vm/uma.h> 87 88 /* 89 * Use this define if you want to disable *fancy* VM things. Expect an 90 * approx 30% decrease in transfer rate. This could be useful for 91 * NetBSD or OpenBSD. 92 */ 93 /* #define PIPE_NODIRECT */ 94 95 /* 96 * interfaces to the outside world 97 */ 98 static fo_rdwr_t pipe_read; 99 static fo_rdwr_t pipe_write; 100 static fo_ioctl_t pipe_ioctl; 101 static fo_poll_t pipe_poll; 102 static fo_kqfilter_t pipe_kqfilter; 103 static fo_stat_t pipe_stat; 104 static fo_close_t pipe_close; 105 106 static struct fileops pipeops = { 107 .fo_read = pipe_read, 108 .fo_write = pipe_write, 109 .fo_ioctl = pipe_ioctl, 110 .fo_poll = pipe_poll, 111 .fo_kqfilter = pipe_kqfilter, 112 .fo_stat = pipe_stat, 113 .fo_close = pipe_close, 114 .fo_flags = DFLAG_PASSABLE 115 }; 116 117 static void filt_pipedetach(struct knote *kn); 118 static int filt_piperead(struct knote *kn, long hint); 119 static int filt_pipewrite(struct knote *kn, long hint); 120 121 static struct filterops pipe_rfiltops = 122 { 1, NULL, filt_pipedetach, filt_piperead }; 123 static struct filterops pipe_wfiltops = 124 { 1, NULL, filt_pipedetach, filt_pipewrite }; 125 126 #define PIPE_GET_GIANT(pipe) \ 127 do { \ 128 KASSERT(((pipe)->pipe_state & PIPE_LOCKFL) != 0, \ 129 ("%s:%d PIPE_GET_GIANT: line pipe not locked", \ 130 __FILE__, __LINE__)); \ 131 PIPE_UNLOCK(pipe); \ 132 mtx_lock(&Giant); \ 133 } while (0) 134 135 #define PIPE_DROP_GIANT(pipe) \ 136 do { \ 137 mtx_unlock(&Giant); \ 138 PIPE_LOCK(pipe); \ 139 } while (0) 140 141 /* 142 * Default pipe buffer size(s), this can be kind-of large now because pipe 143 * space is pageable. The pipe code will try to maintain locality of 144 * reference for performance reasons, so small amounts of outstanding I/O 145 * will not wipe the cache. 146 */ 147 #define MINPIPESIZE (PIPE_SIZE/3) 148 #define MAXPIPESIZE (2*PIPE_SIZE/3) 149 150 /* 151 * Maximum amount of kva for pipes -- this is kind-of a soft limit, but 152 * is there so that on large systems, we don't exhaust it. 153 */ 154 #define MAXPIPEKVA (8*1024*1024) 155 156 /* 157 * Limit for direct transfers, we cannot, of course limit 158 * the amount of kva for pipes in general though. 159 */ 160 #define LIMITPIPEKVA (16*1024*1024) 161 162 /* 163 * Limit the number of "big" pipes 164 */ 165 #define LIMITBIGPIPES 32 166 static int nbigpipe; 167 168 static int amountpipekva; 169 170 static void pipeinit(void *dummy __unused); 171 static void pipeclose(struct pipe *cpipe); 172 static void pipe_free_kmem(struct pipe *cpipe); 173 static int pipe_create(struct pipe **cpipep); 174 static __inline int pipelock(struct pipe *cpipe, int catch); 175 static __inline void pipeunlock(struct pipe *cpipe); 176 static __inline void pipeselwakeup(struct pipe *cpipe); 177 #ifndef PIPE_NODIRECT 178 static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio); 179 static void pipe_destroy_write_buffer(struct pipe *wpipe); 180 static int pipe_direct_write(struct pipe *wpipe, struct uio *uio); 181 static void pipe_clone_write_buffer(struct pipe *wpipe); 182 #endif 183 static int pipespace(struct pipe *cpipe, int size); 184 185 static uma_zone_t pipe_zone; 186 187 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL); 188 189 static void 190 pipeinit(void *dummy __unused) 191 { 192 pipe_zone = uma_zcreate("PIPE", sizeof(struct pipe), NULL, 193 NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 194 } 195 196 /* 197 * The pipe system call for the DTYPE_PIPE type of pipes 198 */ 199 200 /* ARGSUSED */ 201 int 202 pipe(td, uap) 203 struct thread *td; 204 struct pipe_args /* { 205 int dummy; 206 } */ *uap; 207 { 208 struct filedesc *fdp = td->td_proc->p_fd; 209 struct file *rf, *wf; 210 struct pipe *rpipe, *wpipe; 211 struct mtx *pmtx; 212 int fd, error; 213 214 KASSERT(pipe_zone != NULL, ("pipe_zone not initialized")); 215 216 pmtx = malloc(sizeof(*pmtx), M_TEMP, M_WAITOK | M_ZERO); 217 218 rpipe = wpipe = NULL; 219 if (pipe_create(&rpipe) || pipe_create(&wpipe)) { 220 pipeclose(rpipe); 221 pipeclose(wpipe); 222 free(pmtx, M_TEMP); 223 return (ENFILE); 224 } 225 226 rpipe->pipe_state |= PIPE_DIRECTOK; 227 wpipe->pipe_state |= PIPE_DIRECTOK; 228 229 error = falloc(td, &rf, &fd); 230 if (error) { 231 pipeclose(rpipe); 232 pipeclose(wpipe); 233 free(pmtx, M_TEMP); 234 return (error); 235 } 236 fhold(rf); 237 td->td_retval[0] = fd; 238 239 /* 240 * Warning: once we've gotten past allocation of the fd for the 241 * read-side, we can only drop the read side via fdrop() in order 242 * to avoid races against processes which manage to dup() the read 243 * side while we are blocked trying to allocate the write side. 244 */ 245 FILE_LOCK(rf); 246 rf->f_flag = FREAD | FWRITE; 247 rf->f_type = DTYPE_PIPE; 248 rf->f_data = rpipe; 249 rf->f_ops = &pipeops; 250 FILE_UNLOCK(rf); 251 error = falloc(td, &wf, &fd); 252 if (error) { 253 FILEDESC_LOCK(fdp); 254 if (fdp->fd_ofiles[td->td_retval[0]] == rf) { 255 fdp->fd_ofiles[td->td_retval[0]] = NULL; 256 FILEDESC_UNLOCK(fdp); 257 fdrop(rf, td); 258 } else 259 FILEDESC_UNLOCK(fdp); 260 fdrop(rf, td); 261 /* rpipe has been closed by fdrop(). */ 262 pipeclose(wpipe); 263 free(pmtx, M_TEMP); 264 return (error); 265 } 266 FILE_LOCK(wf); 267 wf->f_flag = FREAD | FWRITE; 268 wf->f_type = DTYPE_PIPE; 269 wf->f_data = wpipe; 270 wf->f_ops = &pipeops; 271 FILE_UNLOCK(wf); 272 td->td_retval[1] = fd; 273 rpipe->pipe_peer = wpipe; 274 wpipe->pipe_peer = rpipe; 275 #ifdef MAC 276 /* 277 * struct pipe represents a pipe endpoint. The MAC label is shared 278 * between the connected endpoints. As a result mac_init_pipe() and 279 * mac_create_pipe() should only be called on one of the endpoints 280 * after they have been connected. 281 */ 282 mac_init_pipe(rpipe); 283 mac_create_pipe(td->td_ucred, rpipe); 284 #endif 285 mtx_init(pmtx, "pipe mutex", NULL, MTX_DEF | MTX_RECURSE); 286 rpipe->pipe_mtxp = wpipe->pipe_mtxp = pmtx; 287 fdrop(rf, td); 288 289 return (0); 290 } 291 292 /* 293 * Allocate kva for pipe circular buffer, the space is pageable 294 * This routine will 'realloc' the size of a pipe safely, if it fails 295 * it will retain the old buffer. 296 * If it fails it will return ENOMEM. 297 */ 298 static int 299 pipespace(cpipe, size) 300 struct pipe *cpipe; 301 int size; 302 { 303 struct vm_object *object; 304 caddr_t buffer; 305 int npages, error; 306 307 GIANT_REQUIRED; 308 KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 309 ("pipespace: pipe mutex locked")); 310 311 npages = round_page(size)/PAGE_SIZE; 312 /* 313 * Create an object, I don't like the idea of paging to/from 314 * kernel_object. 315 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 316 */ 317 object = vm_object_allocate(OBJT_DEFAULT, npages); 318 buffer = (caddr_t) vm_map_min(kernel_map); 319 320 /* 321 * Insert the object into the kernel map, and allocate kva for it. 322 * The map entry is, by default, pageable. 323 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. 324 */ 325 error = vm_map_find(kernel_map, object, 0, 326 (vm_offset_t *) &buffer, size, 1, 327 VM_PROT_ALL, VM_PROT_ALL, 0); 328 329 if (error != KERN_SUCCESS) { 330 vm_object_deallocate(object); 331 return (ENOMEM); 332 } 333 334 /* free old resources if we're resizing */ 335 pipe_free_kmem(cpipe); 336 cpipe->pipe_buffer.object = object; 337 cpipe->pipe_buffer.buffer = buffer; 338 cpipe->pipe_buffer.size = size; 339 cpipe->pipe_buffer.in = 0; 340 cpipe->pipe_buffer.out = 0; 341 cpipe->pipe_buffer.cnt = 0; 342 atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size); 343 return (0); 344 } 345 346 /* 347 * initialize and allocate VM and memory for pipe 348 */ 349 static int 350 pipe_create(cpipep) 351 struct pipe **cpipep; 352 { 353 struct pipe *cpipe; 354 int error; 355 356 *cpipep = uma_zalloc(pipe_zone, M_WAITOK); 357 if (*cpipep == NULL) 358 return (ENOMEM); 359 360 cpipe = *cpipep; 361 362 /* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */ 363 cpipe->pipe_buffer.object = NULL; 364 #ifndef PIPE_NODIRECT 365 cpipe->pipe_map.kva = 0; 366 #endif 367 /* 368 * protect so pipeclose() doesn't follow a junk pointer 369 * if pipespace() fails. 370 */ 371 bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel)); 372 cpipe->pipe_state = 0; 373 cpipe->pipe_peer = NULL; 374 cpipe->pipe_busy = 0; 375 376 #ifndef PIPE_NODIRECT 377 /* 378 * pipe data structure initializations to support direct pipe I/O 379 */ 380 cpipe->pipe_map.cnt = 0; 381 cpipe->pipe_map.kva = 0; 382 cpipe->pipe_map.pos = 0; 383 cpipe->pipe_map.npages = 0; 384 /* cpipe->pipe_map.ms[] = invalid */ 385 #endif 386 387 cpipe->pipe_mtxp = NULL; /* avoid pipespace assertion */ 388 error = pipespace(cpipe, PIPE_SIZE); 389 if (error) 390 return (error); 391 392 vfs_timestamp(&cpipe->pipe_ctime); 393 cpipe->pipe_atime = cpipe->pipe_ctime; 394 cpipe->pipe_mtime = cpipe->pipe_ctime; 395 396 return (0); 397 } 398 399 400 /* 401 * lock a pipe for I/O, blocking other access 402 */ 403 static __inline int 404 pipelock(cpipe, catch) 405 struct pipe *cpipe; 406 int catch; 407 { 408 int error; 409 410 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 411 while (cpipe->pipe_state & PIPE_LOCKFL) { 412 cpipe->pipe_state |= PIPE_LWANT; 413 error = msleep(cpipe, PIPE_MTX(cpipe), 414 catch ? (PRIBIO | PCATCH) : PRIBIO, 415 "pipelk", 0); 416 if (error != 0) 417 return (error); 418 } 419 cpipe->pipe_state |= PIPE_LOCKFL; 420 return (0); 421 } 422 423 /* 424 * unlock a pipe I/O lock 425 */ 426 static __inline void 427 pipeunlock(cpipe) 428 struct pipe *cpipe; 429 { 430 431 PIPE_LOCK_ASSERT(cpipe, MA_OWNED); 432 cpipe->pipe_state &= ~PIPE_LOCKFL; 433 if (cpipe->pipe_state & PIPE_LWANT) { 434 cpipe->pipe_state &= ~PIPE_LWANT; 435 wakeup(cpipe); 436 } 437 } 438 439 static __inline void 440 pipeselwakeup(cpipe) 441 struct pipe *cpipe; 442 { 443 444 if (cpipe->pipe_state & PIPE_SEL) { 445 cpipe->pipe_state &= ~PIPE_SEL; 446 selwakeup(&cpipe->pipe_sel); 447 } 448 if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio) 449 pgsigio(&cpipe->pipe_sigio, SIGIO, 0); 450 KNOTE(&cpipe->pipe_sel.si_note, 0); 451 } 452 453 /* ARGSUSED */ 454 static int 455 pipe_read(fp, uio, active_cred, flags, td) 456 struct file *fp; 457 struct uio *uio; 458 struct ucred *active_cred; 459 struct thread *td; 460 int flags; 461 { 462 struct pipe *rpipe = fp->f_data; 463 int error; 464 int nread = 0; 465 u_int size; 466 467 PIPE_LOCK(rpipe); 468 ++rpipe->pipe_busy; 469 error = pipelock(rpipe, 1); 470 if (error) 471 goto unlocked_error; 472 473 #ifdef MAC 474 error = mac_check_pipe_read(active_cred, rpipe); 475 if (error) 476 goto locked_error; 477 #endif 478 479 while (uio->uio_resid) { 480 /* 481 * normal pipe buffer receive 482 */ 483 if (rpipe->pipe_buffer.cnt > 0) { 484 size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out; 485 if (size > rpipe->pipe_buffer.cnt) 486 size = rpipe->pipe_buffer.cnt; 487 if (size > (u_int) uio->uio_resid) 488 size = (u_int) uio->uio_resid; 489 490 PIPE_UNLOCK(rpipe); 491 error = uiomove( 492 &rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out], 493 size, uio); 494 PIPE_LOCK(rpipe); 495 if (error) 496 break; 497 498 rpipe->pipe_buffer.out += size; 499 if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size) 500 rpipe->pipe_buffer.out = 0; 501 502 rpipe->pipe_buffer.cnt -= size; 503 504 /* 505 * If there is no more to read in the pipe, reset 506 * its pointers to the beginning. This improves 507 * cache hit stats. 508 */ 509 if (rpipe->pipe_buffer.cnt == 0) { 510 rpipe->pipe_buffer.in = 0; 511 rpipe->pipe_buffer.out = 0; 512 } 513 nread += size; 514 #ifndef PIPE_NODIRECT 515 /* 516 * Direct copy, bypassing a kernel buffer. 517 */ 518 } else if ((size = rpipe->pipe_map.cnt) && 519 (rpipe->pipe_state & PIPE_DIRECTW)) { 520 caddr_t va; 521 if (size > (u_int) uio->uio_resid) 522 size = (u_int) uio->uio_resid; 523 524 va = (caddr_t) rpipe->pipe_map.kva + 525 rpipe->pipe_map.pos; 526 PIPE_UNLOCK(rpipe); 527 error = uiomove(va, size, uio); 528 PIPE_LOCK(rpipe); 529 if (error) 530 break; 531 nread += size; 532 rpipe->pipe_map.pos += size; 533 rpipe->pipe_map.cnt -= size; 534 if (rpipe->pipe_map.cnt == 0) { 535 rpipe->pipe_state &= ~PIPE_DIRECTW; 536 wakeup(rpipe); 537 } 538 #endif 539 } else { 540 /* 541 * detect EOF condition 542 * read returns 0 on EOF, no need to set error 543 */ 544 if (rpipe->pipe_state & PIPE_EOF) 545 break; 546 547 /* 548 * If the "write-side" has been blocked, wake it up now. 549 */ 550 if (rpipe->pipe_state & PIPE_WANTW) { 551 rpipe->pipe_state &= ~PIPE_WANTW; 552 wakeup(rpipe); 553 } 554 555 /* 556 * Break if some data was read. 557 */ 558 if (nread > 0) 559 break; 560 561 /* 562 * Unlock the pipe buffer for our remaining processing. 563 * We will either break out with an error or we will 564 * sleep and relock to loop. 565 */ 566 pipeunlock(rpipe); 567 568 /* 569 * Handle non-blocking mode operation or 570 * wait for more data. 571 */ 572 if (fp->f_flag & FNONBLOCK) { 573 error = EAGAIN; 574 } else { 575 rpipe->pipe_state |= PIPE_WANTR; 576 if ((error = msleep(rpipe, PIPE_MTX(rpipe), 577 PRIBIO | PCATCH, 578 "piperd", 0)) == 0) 579 error = pipelock(rpipe, 1); 580 } 581 if (error) 582 goto unlocked_error; 583 } 584 } 585 #ifdef MAC 586 locked_error: 587 #endif 588 pipeunlock(rpipe); 589 590 /* XXX: should probably do this before getting any locks. */ 591 if (error == 0) 592 vfs_timestamp(&rpipe->pipe_atime); 593 unlocked_error: 594 --rpipe->pipe_busy; 595 596 /* 597 * PIPE_WANT processing only makes sense if pipe_busy is 0. 598 */ 599 if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) { 600 rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW); 601 wakeup(rpipe); 602 } else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) { 603 /* 604 * Handle write blocking hysteresis. 605 */ 606 if (rpipe->pipe_state & PIPE_WANTW) { 607 rpipe->pipe_state &= ~PIPE_WANTW; 608 wakeup(rpipe); 609 } 610 } 611 612 if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF) 613 pipeselwakeup(rpipe); 614 615 PIPE_UNLOCK(rpipe); 616 return (error); 617 } 618 619 #ifndef PIPE_NODIRECT 620 /* 621 * Map the sending processes' buffer into kernel space and wire it. 622 * This is similar to a physical write operation. 623 */ 624 static int 625 pipe_build_write_buffer(wpipe, uio) 626 struct pipe *wpipe; 627 struct uio *uio; 628 { 629 u_int size; 630 int i; 631 vm_offset_t addr, endaddr; 632 vm_paddr_t paddr; 633 634 GIANT_REQUIRED; 635 PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 636 637 size = (u_int) uio->uio_iov->iov_len; 638 if (size > wpipe->pipe_buffer.size) 639 size = wpipe->pipe_buffer.size; 640 641 endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); 642 addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); 643 for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { 644 vm_page_t m; 645 646 /* 647 * vm_fault_quick() can sleep. Consequently, 648 * vm_page_lock_queue() and vm_page_unlock_queue() 649 * should not be performed outside of this loop. 650 */ 651 if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 || 652 (paddr = pmap_extract(vmspace_pmap(curproc->p_vmspace), 653 addr)) == 0) { 654 int j; 655 656 vm_page_lock_queues(); 657 for (j = 0; j < i; j++) 658 vm_page_unwire(wpipe->pipe_map.ms[j], 1); 659 vm_page_unlock_queues(); 660 return (EFAULT); 661 } 662 663 m = PHYS_TO_VM_PAGE(paddr); 664 vm_page_lock_queues(); 665 vm_page_wire(m); 666 vm_page_unlock_queues(); 667 wpipe->pipe_map.ms[i] = m; 668 } 669 670 /* 671 * set up the control block 672 */ 673 wpipe->pipe_map.npages = i; 674 wpipe->pipe_map.pos = 675 ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK; 676 wpipe->pipe_map.cnt = size; 677 678 /* 679 * and map the buffer 680 */ 681 if (wpipe->pipe_map.kva == 0) { 682 /* 683 * We need to allocate space for an extra page because the 684 * address range might (will) span pages at times. 685 */ 686 wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map, 687 wpipe->pipe_buffer.size + PAGE_SIZE); 688 atomic_add_int(&amountpipekva, 689 wpipe->pipe_buffer.size + PAGE_SIZE); 690 } 691 pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, 692 wpipe->pipe_map.npages); 693 694 /* 695 * and update the uio data 696 */ 697 698 uio->uio_iov->iov_len -= size; 699 uio->uio_iov->iov_base = (char *)uio->uio_iov->iov_base + size; 700 if (uio->uio_iov->iov_len == 0) 701 uio->uio_iov++; 702 uio->uio_resid -= size; 703 uio->uio_offset += size; 704 return (0); 705 } 706 707 /* 708 * unmap and unwire the process buffer 709 */ 710 static void 711 pipe_destroy_write_buffer(wpipe) 712 struct pipe *wpipe; 713 { 714 int i; 715 716 GIANT_REQUIRED; 717 PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED); 718 719 if (wpipe->pipe_map.kva) { 720 pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages); 721 722 if (amountpipekva > MAXPIPEKVA) { 723 vm_offset_t kva = wpipe->pipe_map.kva; 724 wpipe->pipe_map.kva = 0; 725 kmem_free(kernel_map, kva, 726 wpipe->pipe_buffer.size + PAGE_SIZE); 727 atomic_subtract_int(&amountpipekva, 728 wpipe->pipe_buffer.size + PAGE_SIZE); 729 } 730 } 731 vm_page_lock_queues(); 732 for (i = 0; i < wpipe->pipe_map.npages; i++) 733 vm_page_unwire(wpipe->pipe_map.ms[i], 1); 734 vm_page_unlock_queues(); 735 wpipe->pipe_map.npages = 0; 736 } 737 738 /* 739 * In the case of a signal, the writing process might go away. This 740 * code copies the data into the circular buffer so that the source 741 * pages can be freed without loss of data. 742 */ 743 static void 744 pipe_clone_write_buffer(wpipe) 745 struct pipe *wpipe; 746 { 747 int size; 748 int pos; 749 750 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 751 size = wpipe->pipe_map.cnt; 752 pos = wpipe->pipe_map.pos; 753 754 wpipe->pipe_buffer.in = size; 755 wpipe->pipe_buffer.out = 0; 756 wpipe->pipe_buffer.cnt = size; 757 wpipe->pipe_state &= ~PIPE_DIRECTW; 758 759 PIPE_GET_GIANT(wpipe); 760 bcopy((caddr_t) wpipe->pipe_map.kva + pos, 761 wpipe->pipe_buffer.buffer, size); 762 pipe_destroy_write_buffer(wpipe); 763 PIPE_DROP_GIANT(wpipe); 764 } 765 766 /* 767 * This implements the pipe buffer write mechanism. Note that only 768 * a direct write OR a normal pipe write can be pending at any given time. 769 * If there are any characters in the pipe buffer, the direct write will 770 * be deferred until the receiving process grabs all of the bytes from 771 * the pipe buffer. Then the direct mapping write is set-up. 772 */ 773 static int 774 pipe_direct_write(wpipe, uio) 775 struct pipe *wpipe; 776 struct uio *uio; 777 { 778 int error; 779 780 retry: 781 PIPE_LOCK_ASSERT(wpipe, MA_OWNED); 782 while (wpipe->pipe_state & PIPE_DIRECTW) { 783 if (wpipe->pipe_state & PIPE_WANTR) { 784 wpipe->pipe_state &= ~PIPE_WANTR; 785 wakeup(wpipe); 786 } 787 wpipe->pipe_state |= PIPE_WANTW; 788 error = msleep(wpipe, PIPE_MTX(wpipe), 789 PRIBIO | PCATCH, "pipdww", 0); 790 if (error) 791 goto error1; 792 if (wpipe->pipe_state & PIPE_EOF) { 793 error = EPIPE; 794 goto error1; 795 } 796 } 797 wpipe->pipe_map.cnt = 0; /* transfer not ready yet */ 798 if (wpipe->pipe_buffer.cnt > 0) { 799 if (wpipe->pipe_state & PIPE_WANTR) { 800 wpipe->pipe_state &= ~PIPE_WANTR; 801 wakeup(wpipe); 802 } 803 804 wpipe->pipe_state |= PIPE_WANTW; 805 error = msleep(wpipe, PIPE_MTX(wpipe), 806 PRIBIO | PCATCH, "pipdwc", 0); 807 if (error) 808 goto error1; 809 if (wpipe->pipe_state & PIPE_EOF) { 810 error = EPIPE; 811 goto error1; 812 } 813 goto retry; 814 } 815 816 wpipe->pipe_state |= PIPE_DIRECTW; 817 818 pipelock(wpipe, 0); 819 PIPE_GET_GIANT(wpipe); 820 error = pipe_build_write_buffer(wpipe, uio); 821 PIPE_DROP_GIANT(wpipe); 822 pipeunlock(wpipe); 823 if (error) { 824 wpipe->pipe_state &= ~PIPE_DIRECTW; 825 goto error1; 826 } 827 828 error = 0; 829 while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) { 830 if (wpipe->pipe_state & PIPE_EOF) { 831 pipelock(wpipe, 0); 832 PIPE_GET_GIANT(wpipe); 833 pipe_destroy_write_buffer(wpipe); 834 PIPE_DROP_GIANT(wpipe); 835 pipeselwakeup(wpipe); 836 pipeunlock(wpipe); 837 error = EPIPE; 838 goto error1; 839 } 840 if (wpipe->pipe_state & PIPE_WANTR) { 841 wpipe->pipe_state &= ~PIPE_WANTR; 842 wakeup(wpipe); 843 } 844 pipeselwakeup(wpipe); 845 error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH, 846 "pipdwt", 0); 847 } 848 849 pipelock(wpipe,0); 850 if (wpipe->pipe_state & PIPE_DIRECTW) { 851 /* 852 * this bit of trickery substitutes a kernel buffer for 853 * the process that might be going away. 854 */ 855 pipe_clone_write_buffer(wpipe); 856 } else { 857 PIPE_GET_GIANT(wpipe); 858 pipe_destroy_write_buffer(wpipe); 859 PIPE_DROP_GIANT(wpipe); 860 } 861 pipeunlock(wpipe); 862 return (error); 863 864 error1: 865 wakeup(wpipe); 866 return (error); 867 } 868 #endif 869 870 static int 871 pipe_write(fp, uio, active_cred, flags, td) 872 struct file *fp; 873 struct uio *uio; 874 struct ucred *active_cred; 875 struct thread *td; 876 int flags; 877 { 878 int error = 0; 879 int orig_resid; 880 struct pipe *wpipe, *rpipe; 881 882 rpipe = fp->f_data; 883 wpipe = rpipe->pipe_peer; 884 885 PIPE_LOCK(rpipe); 886 /* 887 * detect loss of pipe read side, issue SIGPIPE if lost. 888 */ 889 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 890 PIPE_UNLOCK(rpipe); 891 return (EPIPE); 892 } 893 #ifdef MAC 894 error = mac_check_pipe_write(active_cred, wpipe); 895 if (error) { 896 PIPE_UNLOCK(rpipe); 897 return (error); 898 } 899 #endif 900 ++wpipe->pipe_busy; 901 902 /* 903 * If it is advantageous to resize the pipe buffer, do 904 * so. 905 */ 906 if ((uio->uio_resid > PIPE_SIZE) && 907 (nbigpipe < LIMITBIGPIPES) && 908 (wpipe->pipe_state & PIPE_DIRECTW) == 0 && 909 (wpipe->pipe_buffer.size <= PIPE_SIZE) && 910 (wpipe->pipe_buffer.cnt == 0)) { 911 912 if ((error = pipelock(wpipe, 1)) == 0) { 913 PIPE_GET_GIANT(wpipe); 914 if (pipespace(wpipe, BIG_PIPE_SIZE) == 0) 915 nbigpipe++; 916 PIPE_DROP_GIANT(wpipe); 917 pipeunlock(wpipe); 918 } 919 } 920 921 /* 922 * If an early error occured unbusy and return, waking up any pending 923 * readers. 924 */ 925 if (error) { 926 --wpipe->pipe_busy; 927 if ((wpipe->pipe_busy == 0) && 928 (wpipe->pipe_state & PIPE_WANT)) { 929 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 930 wakeup(wpipe); 931 } 932 PIPE_UNLOCK(rpipe); 933 return(error); 934 } 935 936 orig_resid = uio->uio_resid; 937 938 while (uio->uio_resid) { 939 int space; 940 941 #ifndef PIPE_NODIRECT 942 /* 943 * If the transfer is large, we can gain performance if 944 * we do process-to-process copies directly. 945 * If the write is non-blocking, we don't use the 946 * direct write mechanism. 947 * 948 * The direct write mechanism will detect the reader going 949 * away on us. 950 */ 951 if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && 952 (fp->f_flag & FNONBLOCK) == 0 && 953 (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && 954 (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { 955 error = pipe_direct_write(wpipe, uio); 956 if (error) 957 break; 958 continue; 959 } 960 #endif 961 962 /* 963 * Pipe buffered writes cannot be coincidental with 964 * direct writes. We wait until the currently executing 965 * direct write is completed before we start filling the 966 * pipe buffer. We break out if a signal occurs or the 967 * reader goes away. 968 */ 969 retrywrite: 970 while (wpipe->pipe_state & PIPE_DIRECTW) { 971 if (wpipe->pipe_state & PIPE_WANTR) { 972 wpipe->pipe_state &= ~PIPE_WANTR; 973 wakeup(wpipe); 974 } 975 error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH, 976 "pipbww", 0); 977 if (wpipe->pipe_state & PIPE_EOF) 978 break; 979 if (error) 980 break; 981 } 982 if (wpipe->pipe_state & PIPE_EOF) { 983 error = EPIPE; 984 break; 985 } 986 987 space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 988 989 /* Writes of size <= PIPE_BUF must be atomic. */ 990 if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF)) 991 space = 0; 992 993 if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) { 994 if ((error = pipelock(wpipe,1)) == 0) { 995 int size; /* Transfer size */ 996 int segsize; /* first segment to transfer */ 997 998 /* 999 * It is possible for a direct write to 1000 * slip in on us... handle it here... 1001 */ 1002 if (wpipe->pipe_state & PIPE_DIRECTW) { 1003 pipeunlock(wpipe); 1004 goto retrywrite; 1005 } 1006 /* 1007 * If a process blocked in uiomove, our 1008 * value for space might be bad. 1009 * 1010 * XXX will we be ok if the reader has gone 1011 * away here? 1012 */ 1013 if (space > wpipe->pipe_buffer.size - 1014 wpipe->pipe_buffer.cnt) { 1015 pipeunlock(wpipe); 1016 goto retrywrite; 1017 } 1018 1019 /* 1020 * Transfer size is minimum of uio transfer 1021 * and free space in pipe buffer. 1022 */ 1023 if (space > uio->uio_resid) 1024 size = uio->uio_resid; 1025 else 1026 size = space; 1027 /* 1028 * First segment to transfer is minimum of 1029 * transfer size and contiguous space in 1030 * pipe buffer. If first segment to transfer 1031 * is less than the transfer size, we've got 1032 * a wraparound in the buffer. 1033 */ 1034 segsize = wpipe->pipe_buffer.size - 1035 wpipe->pipe_buffer.in; 1036 if (segsize > size) 1037 segsize = size; 1038 1039 /* Transfer first segment */ 1040 1041 PIPE_UNLOCK(rpipe); 1042 error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in], 1043 segsize, uio); 1044 PIPE_LOCK(rpipe); 1045 1046 if (error == 0 && segsize < size) { 1047 /* 1048 * Transfer remaining part now, to 1049 * support atomic writes. Wraparound 1050 * happened. 1051 */ 1052 if (wpipe->pipe_buffer.in + segsize != 1053 wpipe->pipe_buffer.size) 1054 panic("Expected pipe buffer " 1055 "wraparound disappeared"); 1056 1057 PIPE_UNLOCK(rpipe); 1058 error = uiomove( 1059 &wpipe->pipe_buffer.buffer[0], 1060 size - segsize, uio); 1061 PIPE_LOCK(rpipe); 1062 } 1063 if (error == 0) { 1064 wpipe->pipe_buffer.in += size; 1065 if (wpipe->pipe_buffer.in >= 1066 wpipe->pipe_buffer.size) { 1067 if (wpipe->pipe_buffer.in != 1068 size - segsize + 1069 wpipe->pipe_buffer.size) 1070 panic("Expected " 1071 "wraparound bad"); 1072 wpipe->pipe_buffer.in = size - 1073 segsize; 1074 } 1075 1076 wpipe->pipe_buffer.cnt += size; 1077 if (wpipe->pipe_buffer.cnt > 1078 wpipe->pipe_buffer.size) 1079 panic("Pipe buffer overflow"); 1080 1081 } 1082 pipeunlock(wpipe); 1083 } 1084 if (error) 1085 break; 1086 1087 } else { 1088 /* 1089 * If the "read-side" has been blocked, wake it up now. 1090 */ 1091 if (wpipe->pipe_state & PIPE_WANTR) { 1092 wpipe->pipe_state &= ~PIPE_WANTR; 1093 wakeup(wpipe); 1094 } 1095 1096 /* 1097 * don't block on non-blocking I/O 1098 */ 1099 if (fp->f_flag & FNONBLOCK) { 1100 error = EAGAIN; 1101 break; 1102 } 1103 1104 /* 1105 * We have no more space and have something to offer, 1106 * wake up select/poll. 1107 */ 1108 pipeselwakeup(wpipe); 1109 1110 wpipe->pipe_state |= PIPE_WANTW; 1111 error = msleep(wpipe, PIPE_MTX(rpipe), 1112 PRIBIO | PCATCH, "pipewr", 0); 1113 if (error != 0) 1114 break; 1115 /* 1116 * If read side wants to go away, we just issue a signal 1117 * to ourselves. 1118 */ 1119 if (wpipe->pipe_state & PIPE_EOF) { 1120 error = EPIPE; 1121 break; 1122 } 1123 } 1124 } 1125 1126 --wpipe->pipe_busy; 1127 1128 if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) { 1129 wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR); 1130 wakeup(wpipe); 1131 } else if (wpipe->pipe_buffer.cnt > 0) { 1132 /* 1133 * If we have put any characters in the buffer, we wake up 1134 * the reader. 1135 */ 1136 if (wpipe->pipe_state & PIPE_WANTR) { 1137 wpipe->pipe_state &= ~PIPE_WANTR; 1138 wakeup(wpipe); 1139 } 1140 } 1141 1142 /* 1143 * Don't return EPIPE if I/O was successful 1144 */ 1145 if ((wpipe->pipe_buffer.cnt == 0) && 1146 (uio->uio_resid == 0) && 1147 (error == EPIPE)) { 1148 error = 0; 1149 } 1150 1151 if (error == 0) 1152 vfs_timestamp(&wpipe->pipe_mtime); 1153 1154 /* 1155 * We have something to offer, 1156 * wake up select/poll. 1157 */ 1158 if (wpipe->pipe_buffer.cnt) 1159 pipeselwakeup(wpipe); 1160 1161 PIPE_UNLOCK(rpipe); 1162 return (error); 1163 } 1164 1165 /* 1166 * we implement a very minimal set of ioctls for compatibility with sockets. 1167 */ 1168 static int 1169 pipe_ioctl(fp, cmd, data, active_cred, td) 1170 struct file *fp; 1171 u_long cmd; 1172 void *data; 1173 struct ucred *active_cred; 1174 struct thread *td; 1175 { 1176 struct pipe *mpipe = fp->f_data; 1177 #ifdef MAC 1178 int error; 1179 #endif 1180 1181 PIPE_LOCK(mpipe); 1182 1183 #ifdef MAC 1184 error = mac_check_pipe_ioctl(active_cred, mpipe, cmd, data); 1185 if (error) 1186 return (error); 1187 #endif 1188 1189 switch (cmd) { 1190 1191 case FIONBIO: 1192 PIPE_UNLOCK(mpipe); 1193 return (0); 1194 1195 case FIOASYNC: 1196 if (*(int *)data) { 1197 mpipe->pipe_state |= PIPE_ASYNC; 1198 } else { 1199 mpipe->pipe_state &= ~PIPE_ASYNC; 1200 } 1201 PIPE_UNLOCK(mpipe); 1202 return (0); 1203 1204 case FIONREAD: 1205 if (mpipe->pipe_state & PIPE_DIRECTW) 1206 *(int *)data = mpipe->pipe_map.cnt; 1207 else 1208 *(int *)data = mpipe->pipe_buffer.cnt; 1209 PIPE_UNLOCK(mpipe); 1210 return (0); 1211 1212 case FIOSETOWN: 1213 PIPE_UNLOCK(mpipe); 1214 return (fsetown(*(int *)data, &mpipe->pipe_sigio)); 1215 1216 case FIOGETOWN: 1217 PIPE_UNLOCK(mpipe); 1218 *(int *)data = fgetown(&mpipe->pipe_sigio); 1219 return (0); 1220 1221 /* This is deprecated, FIOSETOWN should be used instead. */ 1222 case TIOCSPGRP: 1223 PIPE_UNLOCK(mpipe); 1224 return (fsetown(-(*(int *)data), &mpipe->pipe_sigio)); 1225 1226 /* This is deprecated, FIOGETOWN should be used instead. */ 1227 case TIOCGPGRP: 1228 PIPE_UNLOCK(mpipe); 1229 *(int *)data = -fgetown(&mpipe->pipe_sigio); 1230 return (0); 1231 1232 } 1233 PIPE_UNLOCK(mpipe); 1234 return (ENOTTY); 1235 } 1236 1237 static int 1238 pipe_poll(fp, events, active_cred, td) 1239 struct file *fp; 1240 int events; 1241 struct ucred *active_cred; 1242 struct thread *td; 1243 { 1244 struct pipe *rpipe = fp->f_data; 1245 struct pipe *wpipe; 1246 int revents = 0; 1247 #ifdef MAC 1248 int error; 1249 #endif 1250 1251 wpipe = rpipe->pipe_peer; 1252 PIPE_LOCK(rpipe); 1253 #ifdef MAC 1254 error = mac_check_pipe_poll(active_cred, rpipe); 1255 if (error) 1256 goto locked_error; 1257 #endif 1258 if (events & (POLLIN | POLLRDNORM)) 1259 if ((rpipe->pipe_state & PIPE_DIRECTW) || 1260 (rpipe->pipe_buffer.cnt > 0) || 1261 (rpipe->pipe_state & PIPE_EOF)) 1262 revents |= events & (POLLIN | POLLRDNORM); 1263 1264 if (events & (POLLOUT | POLLWRNORM)) 1265 if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) || 1266 (((wpipe->pipe_state & PIPE_DIRECTW) == 0) && 1267 (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF)) 1268 revents |= events & (POLLOUT | POLLWRNORM); 1269 1270 if ((rpipe->pipe_state & PIPE_EOF) || 1271 (wpipe == NULL) || 1272 (wpipe->pipe_state & PIPE_EOF)) 1273 revents |= POLLHUP; 1274 1275 if (revents == 0) { 1276 if (events & (POLLIN | POLLRDNORM)) { 1277 selrecord(td, &rpipe->pipe_sel); 1278 rpipe->pipe_state |= PIPE_SEL; 1279 } 1280 1281 if (events & (POLLOUT | POLLWRNORM)) { 1282 selrecord(td, &wpipe->pipe_sel); 1283 wpipe->pipe_state |= PIPE_SEL; 1284 } 1285 } 1286 #ifdef MAC 1287 locked_error: 1288 #endif 1289 PIPE_UNLOCK(rpipe); 1290 1291 return (revents); 1292 } 1293 1294 /* 1295 * We shouldn't need locks here as we're doing a read and this should 1296 * be a natural race. 1297 */ 1298 static int 1299 pipe_stat(fp, ub, active_cred, td) 1300 struct file *fp; 1301 struct stat *ub; 1302 struct ucred *active_cred; 1303 struct thread *td; 1304 { 1305 struct pipe *pipe = fp->f_data; 1306 #ifdef MAC 1307 int error; 1308 1309 PIPE_LOCK(pipe); 1310 error = mac_check_pipe_stat(active_cred, pipe); 1311 PIPE_UNLOCK(pipe); 1312 if (error) 1313 return (error); 1314 #endif 1315 bzero(ub, sizeof(*ub)); 1316 ub->st_mode = S_IFIFO; 1317 ub->st_blksize = pipe->pipe_buffer.size; 1318 ub->st_size = pipe->pipe_buffer.cnt; 1319 ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize; 1320 ub->st_atimespec = pipe->pipe_atime; 1321 ub->st_mtimespec = pipe->pipe_mtime; 1322 ub->st_ctimespec = pipe->pipe_ctime; 1323 ub->st_uid = fp->f_cred->cr_uid; 1324 ub->st_gid = fp->f_cred->cr_gid; 1325 /* 1326 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen. 1327 * XXX (st_dev, st_ino) should be unique. 1328 */ 1329 return (0); 1330 } 1331 1332 /* ARGSUSED */ 1333 static int 1334 pipe_close(fp, td) 1335 struct file *fp; 1336 struct thread *td; 1337 { 1338 struct pipe *cpipe = fp->f_data; 1339 1340 fp->f_ops = &badfileops; 1341 fp->f_data = NULL; 1342 funsetown(&cpipe->pipe_sigio); 1343 pipeclose(cpipe); 1344 return (0); 1345 } 1346 1347 static void 1348 pipe_free_kmem(cpipe) 1349 struct pipe *cpipe; 1350 { 1351 1352 GIANT_REQUIRED; 1353 KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)), 1354 ("pipespace: pipe mutex locked")); 1355 1356 if (cpipe->pipe_buffer.buffer != NULL) { 1357 if (cpipe->pipe_buffer.size > PIPE_SIZE) 1358 --nbigpipe; 1359 atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size); 1360 kmem_free(kernel_map, 1361 (vm_offset_t)cpipe->pipe_buffer.buffer, 1362 cpipe->pipe_buffer.size); 1363 cpipe->pipe_buffer.buffer = NULL; 1364 } 1365 #ifndef PIPE_NODIRECT 1366 if (cpipe->pipe_map.kva != 0) { 1367 atomic_subtract_int(&amountpipekva, 1368 cpipe->pipe_buffer.size + PAGE_SIZE); 1369 kmem_free(kernel_map, 1370 cpipe->pipe_map.kva, 1371 cpipe->pipe_buffer.size + PAGE_SIZE); 1372 cpipe->pipe_map.cnt = 0; 1373 cpipe->pipe_map.kva = 0; 1374 cpipe->pipe_map.pos = 0; 1375 cpipe->pipe_map.npages = 0; 1376 } 1377 #endif 1378 } 1379 1380 /* 1381 * shutdown the pipe 1382 */ 1383 static void 1384 pipeclose(cpipe) 1385 struct pipe *cpipe; 1386 { 1387 struct pipe *ppipe; 1388 int hadpeer; 1389 1390 if (cpipe == NULL) 1391 return; 1392 1393 hadpeer = 0; 1394 1395 /* partially created pipes won't have a valid mutex. */ 1396 if (PIPE_MTX(cpipe) != NULL) 1397 PIPE_LOCK(cpipe); 1398 1399 pipeselwakeup(cpipe); 1400 1401 /* 1402 * If the other side is blocked, wake it up saying that 1403 * we want to close it down. 1404 */ 1405 while (cpipe->pipe_busy) { 1406 wakeup(cpipe); 1407 cpipe->pipe_state |= PIPE_WANT | PIPE_EOF; 1408 msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0); 1409 } 1410 1411 #ifdef MAC 1412 if (cpipe->pipe_label != NULL && cpipe->pipe_peer == NULL) 1413 mac_destroy_pipe(cpipe); 1414 #endif 1415 1416 /* 1417 * Disconnect from peer 1418 */ 1419 if ((ppipe = cpipe->pipe_peer) != NULL) { 1420 hadpeer++; 1421 pipeselwakeup(ppipe); 1422 1423 ppipe->pipe_state |= PIPE_EOF; 1424 wakeup(ppipe); 1425 KNOTE(&ppipe->pipe_sel.si_note, 0); 1426 ppipe->pipe_peer = NULL; 1427 } 1428 /* 1429 * free resources 1430 */ 1431 if (PIPE_MTX(cpipe) != NULL) { 1432 PIPE_UNLOCK(cpipe); 1433 if (!hadpeer) { 1434 mtx_destroy(PIPE_MTX(cpipe)); 1435 free(PIPE_MTX(cpipe), M_TEMP); 1436 } 1437 } 1438 mtx_lock(&Giant); 1439 pipe_free_kmem(cpipe); 1440 uma_zfree(pipe_zone, cpipe); 1441 mtx_unlock(&Giant); 1442 } 1443 1444 /*ARGSUSED*/ 1445 static int 1446 pipe_kqfilter(struct file *fp, struct knote *kn) 1447 { 1448 struct pipe *cpipe; 1449 1450 cpipe = kn->kn_fp->f_data; 1451 switch (kn->kn_filter) { 1452 case EVFILT_READ: 1453 kn->kn_fop = &pipe_rfiltops; 1454 break; 1455 case EVFILT_WRITE: 1456 kn->kn_fop = &pipe_wfiltops; 1457 cpipe = cpipe->pipe_peer; 1458 if (cpipe == NULL) 1459 /* other end of pipe has been closed */ 1460 return (EBADF); 1461 break; 1462 default: 1463 return (1); 1464 } 1465 kn->kn_hook = cpipe; 1466 1467 PIPE_LOCK(cpipe); 1468 SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext); 1469 PIPE_UNLOCK(cpipe); 1470 return (0); 1471 } 1472 1473 static void 1474 filt_pipedetach(struct knote *kn) 1475 { 1476 struct pipe *cpipe = (struct pipe *)kn->kn_hook; 1477 1478 PIPE_LOCK(cpipe); 1479 SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext); 1480 PIPE_UNLOCK(cpipe); 1481 } 1482 1483 /*ARGSUSED*/ 1484 static int 1485 filt_piperead(struct knote *kn, long hint) 1486 { 1487 struct pipe *rpipe = kn->kn_fp->f_data; 1488 struct pipe *wpipe = rpipe->pipe_peer; 1489 1490 PIPE_LOCK(rpipe); 1491 kn->kn_data = rpipe->pipe_buffer.cnt; 1492 if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW)) 1493 kn->kn_data = rpipe->pipe_map.cnt; 1494 1495 if ((rpipe->pipe_state & PIPE_EOF) || 1496 (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1497 kn->kn_flags |= EV_EOF; 1498 PIPE_UNLOCK(rpipe); 1499 return (1); 1500 } 1501 PIPE_UNLOCK(rpipe); 1502 return (kn->kn_data > 0); 1503 } 1504 1505 /*ARGSUSED*/ 1506 static int 1507 filt_pipewrite(struct knote *kn, long hint) 1508 { 1509 struct pipe *rpipe = kn->kn_fp->f_data; 1510 struct pipe *wpipe = rpipe->pipe_peer; 1511 1512 PIPE_LOCK(rpipe); 1513 if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) { 1514 kn->kn_data = 0; 1515 kn->kn_flags |= EV_EOF; 1516 PIPE_UNLOCK(rpipe); 1517 return (1); 1518 } 1519 kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt; 1520 if (wpipe->pipe_state & PIPE_DIRECTW) 1521 kn->kn_data = 0; 1522 1523 PIPE_UNLOCK(rpipe); 1524 return (kn->kn_data >= PIPE_BUF); 1525 } 1526