xref: /freebsd/sys/kern/sys_pipe.c (revision b5a8f767a62e0253ce02878cd6d69ea7f9574d1a)
1 /*
2  * Copyright (c) 1996 John S. Dyson
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice immediately at the beginning of the file, without modification,
10  *    this list of conditions, and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. Absolutely no warranty of function or purpose is made by the author
15  *    John S. Dyson.
16  * 4. Modifications may be freely made to this file if the above conditions
17  *    are met.
18  *
19  * $FreeBSD$
20  */
21 
22 /*
23  * This file contains a high-performance replacement for the socket-based
24  * pipes scheme originally used in FreeBSD/4.4Lite.  It does not support
25  * all features of sockets, but does do everything that pipes normally
26  * do.
27  */
28 
29 /*
30  * This code has two modes of operation, a small write mode and a large
31  * write mode.  The small write mode acts like conventional pipes with
32  * a kernel buffer.  If the buffer is less than PIPE_MINDIRECT, then the
33  * "normal" pipe buffering is done.  If the buffer is between PIPE_MINDIRECT
34  * and PIPE_SIZE in size, it is fully mapped and wired into the kernel, and
35  * the receiving process can copy it directly from the pages in the sending
36  * process.
37  *
38  * If the sending process receives a signal, it is possible that it will
39  * go away, and certainly its address space can change, because control
40  * is returned back to the user-mode side.  In that case, the pipe code
41  * arranges to copy the buffer supplied by the user process, to a pageable
42  * kernel buffer, and the receiving process will grab the data from the
43  * pageable kernel buffer.  Since signals don't happen all that often,
44  * the copy operation is normally eliminated.
45  *
46  * The constant PIPE_MINDIRECT is chosen to make sure that buffering will
47  * happen for small transfers so that the system will not spend all of
48  * its time context switching.  PIPE_SIZE is constrained by the
49  * amount of kernel virtual memory.
50  */
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/fcntl.h>
55 #include <sys/file.h>
56 #include <sys/filedesc.h>
57 #include <sys/filio.h>
58 #include <sys/kernel.h>
59 #include <sys/lock.h>
60 #include <sys/mutex.h>
61 #include <sys/ttycom.h>
62 #include <sys/stat.h>
63 #include <sys/poll.h>
64 #include <sys/selinfo.h>
65 #include <sys/signalvar.h>
66 #include <sys/sysproto.h>
67 #include <sys/pipe.h>
68 #include <sys/proc.h>
69 #include <sys/vnode.h>
70 #include <sys/uio.h>
71 #include <sys/event.h>
72 
73 #include <vm/vm.h>
74 #include <vm/vm_param.h>
75 #include <vm/vm_object.h>
76 #include <vm/vm_kern.h>
77 #include <vm/vm_extern.h>
78 #include <vm/pmap.h>
79 #include <vm/vm_map.h>
80 #include <vm/vm_page.h>
81 #include <vm/vm_zone.h>
82 
83 /*
84  * Use this define if you want to disable *fancy* VM things.  Expect an
85  * approx 30% decrease in transfer rate.  This could be useful for
86  * NetBSD or OpenBSD.
87  */
88 /* #define PIPE_NODIRECT */
89 
90 /*
91  * interfaces to the outside world
92  */
93 static int pipe_read(struct file *fp, struct uio *uio,
94 		struct ucred *cred, int flags, struct thread *td);
95 static int pipe_write(struct file *fp, struct uio *uio,
96 		struct ucred *cred, int flags, struct thread *td);
97 static int pipe_close(struct file *fp, struct thread *td);
98 static int pipe_poll(struct file *fp, int events, struct ucred *cred,
99 		struct thread *td);
100 static int pipe_kqfilter(struct file *fp, struct knote *kn);
101 static int pipe_stat(struct file *fp, struct stat *sb, struct thread *td);
102 static int pipe_ioctl(struct file *fp, u_long cmd, caddr_t data, struct thread *td);
103 
104 static struct fileops pipeops = {
105 	pipe_read, pipe_write, pipe_ioctl, pipe_poll, pipe_kqfilter,
106 	pipe_stat, pipe_close
107 };
108 
109 static void	filt_pipedetach(struct knote *kn);
110 static int	filt_piperead(struct knote *kn, long hint);
111 static int	filt_pipewrite(struct knote *kn, long hint);
112 
113 static struct filterops pipe_rfiltops =
114 	{ 1, NULL, filt_pipedetach, filt_piperead };
115 static struct filterops pipe_wfiltops =
116 	{ 1, NULL, filt_pipedetach, filt_pipewrite };
117 
118 #define PIPE_GET_GIANT(pipe)							\
119 	do {								\
120 		PIPE_UNLOCK(wpipe);					\
121 		mtx_lock(&Giant);					\
122 	} while (0)
123 
124 #define PIPE_DROP_GIANT(pipe)						\
125 	do {								\
126 		mtx_unlock(&Giant);					\
127 		PIPE_LOCK(wpipe);					\
128 	} while (0)
129 
130 /*
131  * Default pipe buffer size(s), this can be kind-of large now because pipe
132  * space is pageable.  The pipe code will try to maintain locality of
133  * reference for performance reasons, so small amounts of outstanding I/O
134  * will not wipe the cache.
135  */
136 #define MINPIPESIZE (PIPE_SIZE/3)
137 #define MAXPIPESIZE (2*PIPE_SIZE/3)
138 
139 /*
140  * Maximum amount of kva for pipes -- this is kind-of a soft limit, but
141  * is there so that on large systems, we don't exhaust it.
142  */
143 #define MAXPIPEKVA (8*1024*1024)
144 
145 /*
146  * Limit for direct transfers, we cannot, of course limit
147  * the amount of kva for pipes in general though.
148  */
149 #define LIMITPIPEKVA (16*1024*1024)
150 
151 /*
152  * Limit the number of "big" pipes
153  */
154 #define LIMITBIGPIPES	32
155 static int nbigpipe;
156 
157 static int amountpipekva;
158 
159 static void pipeinit(void *dummy __unused);
160 static void pipeclose(struct pipe *cpipe);
161 static void pipe_free_kmem(struct pipe *cpipe);
162 static int pipe_create(struct pipe **cpipep);
163 static __inline int pipelock(struct pipe *cpipe, int catch);
164 static __inline void pipeunlock(struct pipe *cpipe);
165 static __inline void pipeselwakeup(struct pipe *cpipe);
166 #ifndef PIPE_NODIRECT
167 static int pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio);
168 static void pipe_destroy_write_buffer(struct pipe *wpipe);
169 static int pipe_direct_write(struct pipe *wpipe, struct uio *uio);
170 static void pipe_clone_write_buffer(struct pipe *wpipe);
171 #endif
172 static int pipespace(struct pipe *cpipe, int size);
173 
174 static vm_zone_t pipe_zone;
175 
176 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_ANY, pipeinit, NULL);
177 
178 static void
179 pipeinit(void *dummy __unused)
180 {
181 
182 	pipe_zone = zinit("PIPE", sizeof(struct pipe), 0, 0, 4);
183 }
184 
185 /*
186  * The pipe system call for the DTYPE_PIPE type of pipes
187  */
188 
189 /* ARGSUSED */
190 int
191 pipe(td, uap)
192 	struct thread *td;
193 	struct pipe_args /* {
194 		int	dummy;
195 	} */ *uap;
196 {
197 	struct filedesc *fdp = td->td_proc->p_fd;
198 	struct file *rf, *wf;
199 	struct pipe *rpipe, *wpipe;
200 	int fd, error;
201 
202 	KASSERT(pipe_zone != NULL, ("pipe_zone not initialized"));
203 
204 	rpipe = wpipe = NULL;
205 	if (pipe_create(&rpipe) || pipe_create(&wpipe)) {
206 		pipeclose(rpipe);
207 		pipeclose(wpipe);
208 		return (ENFILE);
209 	}
210 
211 	rpipe->pipe_state |= PIPE_DIRECTOK;
212 	wpipe->pipe_state |= PIPE_DIRECTOK;
213 
214 	error = falloc(td, &rf, &fd);
215 	if (error) {
216 		pipeclose(rpipe);
217 		pipeclose(wpipe);
218 		return (error);
219 	}
220 	fhold(rf);
221 	td->td_retval[0] = fd;
222 
223 	/*
224 	 * Warning: once we've gotten past allocation of the fd for the
225 	 * read-side, we can only drop the read side via fdrop() in order
226 	 * to avoid races against processes which manage to dup() the read
227 	 * side while we are blocked trying to allocate the write side.
228 	 */
229 	FILE_LOCK(rf);
230 	rf->f_flag = FREAD | FWRITE;
231 	rf->f_type = DTYPE_PIPE;
232 	rf->f_data = (caddr_t)rpipe;
233 	rf->f_ops = &pipeops;
234 	FILE_UNLOCK(rf);
235 	error = falloc(td, &wf, &fd);
236 	if (error) {
237 		FILEDESC_LOCK(fdp);
238 		if (fdp->fd_ofiles[td->td_retval[0]] == rf) {
239 			fdp->fd_ofiles[td->td_retval[0]] = NULL;
240 			FILEDESC_UNLOCK(fdp);
241 			fdrop(rf, td);
242 		} else
243 			FILEDESC_UNLOCK(fdp);
244 		fdrop(rf, td);
245 		/* rpipe has been closed by fdrop(). */
246 		pipeclose(wpipe);
247 		return (error);
248 	}
249 	FILE_LOCK(wf);
250 	wf->f_flag = FREAD | FWRITE;
251 	wf->f_type = DTYPE_PIPE;
252 	wf->f_data = (caddr_t)wpipe;
253 	wf->f_ops = &pipeops;
254 	FILE_UNLOCK(wf);
255 	td->td_retval[1] = fd;
256 	rpipe->pipe_peer = wpipe;
257 	wpipe->pipe_peer = rpipe;
258 	rpipe->pipe_mtxp = wpipe->pipe_mtxp = mtx_pool_alloc();
259 	fdrop(rf, td);
260 
261 	return (0);
262 }
263 
264 /*
265  * Allocate kva for pipe circular buffer, the space is pageable
266  * This routine will 'realloc' the size of a pipe safely, if it fails
267  * it will retain the old buffer.
268  * If it fails it will return ENOMEM.
269  */
270 static int
271 pipespace(cpipe, size)
272 	struct pipe *cpipe;
273 	int size;
274 {
275 	struct vm_object *object;
276 	caddr_t buffer;
277 	int npages, error;
278 
279 	GIANT_REQUIRED;
280 	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
281 	       ("pipespace: pipe mutex locked"));
282 
283 	npages = round_page(size)/PAGE_SIZE;
284 	/*
285 	 * Create an object, I don't like the idea of paging to/from
286 	 * kernel_object.
287 	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
288 	 */
289 	object = vm_object_allocate(OBJT_DEFAULT, npages);
290 	buffer = (caddr_t) vm_map_min(kernel_map);
291 
292 	/*
293 	 * Insert the object into the kernel map, and allocate kva for it.
294 	 * The map entry is, by default, pageable.
295 	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
296 	 */
297 	error = vm_map_find(kernel_map, object, 0,
298 		(vm_offset_t *) &buffer, size, 1,
299 		VM_PROT_ALL, VM_PROT_ALL, 0);
300 
301 	if (error != KERN_SUCCESS) {
302 		vm_object_deallocate(object);
303 		return (ENOMEM);
304 	}
305 
306 	/* free old resources if we're resizing */
307 	pipe_free_kmem(cpipe);
308 	cpipe->pipe_buffer.object = object;
309 	cpipe->pipe_buffer.buffer = buffer;
310 	cpipe->pipe_buffer.size = size;
311 	cpipe->pipe_buffer.in = 0;
312 	cpipe->pipe_buffer.out = 0;
313 	cpipe->pipe_buffer.cnt = 0;
314 	amountpipekva += cpipe->pipe_buffer.size;
315 	return (0);
316 }
317 
318 /*
319  * initialize and allocate VM and memory for pipe
320  */
321 static int
322 pipe_create(cpipep)
323 	struct pipe **cpipep;
324 {
325 	struct pipe *cpipe;
326 	int error;
327 
328 	*cpipep = zalloc(pipe_zone);
329 	if (*cpipep == NULL)
330 		return (ENOMEM);
331 
332 	cpipe = *cpipep;
333 
334 	/* so pipespace()->pipe_free_kmem() doesn't follow junk pointer */
335 	cpipe->pipe_buffer.object = NULL;
336 #ifndef PIPE_NODIRECT
337 	cpipe->pipe_map.kva = NULL;
338 #endif
339 	/*
340 	 * protect so pipeclose() doesn't follow a junk pointer
341 	 * if pipespace() fails.
342 	 */
343 	bzero(&cpipe->pipe_sel, sizeof(cpipe->pipe_sel));
344 	cpipe->pipe_state = 0;
345 	cpipe->pipe_peer = NULL;
346 	cpipe->pipe_busy = 0;
347 
348 #ifndef PIPE_NODIRECT
349 	/*
350 	 * pipe data structure initializations to support direct pipe I/O
351 	 */
352 	cpipe->pipe_map.cnt = 0;
353 	cpipe->pipe_map.kva = 0;
354 	cpipe->pipe_map.pos = 0;
355 	cpipe->pipe_map.npages = 0;
356 	/* cpipe->pipe_map.ms[] = invalid */
357 #endif
358 
359 	cpipe->pipe_mtxp = NULL;	/* avoid pipespace assertion */
360 	error = pipespace(cpipe, PIPE_SIZE);
361 	if (error)
362 		return (error);
363 
364 	vfs_timestamp(&cpipe->pipe_ctime);
365 	cpipe->pipe_atime = cpipe->pipe_ctime;
366 	cpipe->pipe_mtime = cpipe->pipe_ctime;
367 
368 	return (0);
369 }
370 
371 
372 /*
373  * lock a pipe for I/O, blocking other access
374  */
375 static __inline int
376 pipelock(cpipe, catch)
377 	struct pipe *cpipe;
378 	int catch;
379 {
380 	int error;
381 
382 	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
383 	while (cpipe->pipe_state & PIPE_LOCKFL) {
384 		cpipe->pipe_state |= PIPE_LWANT;
385 		error = msleep(cpipe, PIPE_MTX(cpipe),
386 		    catch ? (PRIBIO | PCATCH) : PRIBIO,
387 		    "pipelk", 0);
388 		if (error != 0)
389 			return (error);
390 	}
391 	cpipe->pipe_state |= PIPE_LOCKFL;
392 	return (0);
393 }
394 
395 /*
396  * unlock a pipe I/O lock
397  */
398 static __inline void
399 pipeunlock(cpipe)
400 	struct pipe *cpipe;
401 {
402 
403 	PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
404 	cpipe->pipe_state &= ~PIPE_LOCKFL;
405 	if (cpipe->pipe_state & PIPE_LWANT) {
406 		cpipe->pipe_state &= ~PIPE_LWANT;
407 		wakeup(cpipe);
408 	}
409 }
410 
411 static __inline void
412 pipeselwakeup(cpipe)
413 	struct pipe *cpipe;
414 {
415 
416 	if (cpipe->pipe_state & PIPE_SEL) {
417 		cpipe->pipe_state &= ~PIPE_SEL;
418 		selwakeup(&cpipe->pipe_sel);
419 	}
420 	if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
421 		pgsigio(cpipe->pipe_sigio, SIGIO, 0);
422 	KNOTE(&cpipe->pipe_sel.si_note, 0);
423 }
424 
425 /* ARGSUSED */
426 static int
427 pipe_read(fp, uio, cred, flags, td)
428 	struct file *fp;
429 	struct uio *uio;
430 	struct ucred *cred;
431 	struct thread *td;
432 	int flags;
433 {
434 	struct pipe *rpipe = (struct pipe *) fp->f_data;
435 	int error;
436 	int nread = 0;
437 	u_int size;
438 
439 	PIPE_LOCK(rpipe);
440 	++rpipe->pipe_busy;
441 	error = pipelock(rpipe, 1);
442 	if (error)
443 		goto unlocked_error;
444 
445 	while (uio->uio_resid) {
446 		/*
447 		 * normal pipe buffer receive
448 		 */
449 		if (rpipe->pipe_buffer.cnt > 0) {
450 			size = rpipe->pipe_buffer.size - rpipe->pipe_buffer.out;
451 			if (size > rpipe->pipe_buffer.cnt)
452 				size = rpipe->pipe_buffer.cnt;
453 			if (size > (u_int) uio->uio_resid)
454 				size = (u_int) uio->uio_resid;
455 
456 			PIPE_UNLOCK(rpipe);
457 			error = uiomove(&rpipe->pipe_buffer.buffer[rpipe->pipe_buffer.out],
458 					size, uio);
459 			PIPE_LOCK(rpipe);
460 			if (error)
461 				break;
462 
463 			rpipe->pipe_buffer.out += size;
464 			if (rpipe->pipe_buffer.out >= rpipe->pipe_buffer.size)
465 				rpipe->pipe_buffer.out = 0;
466 
467 			rpipe->pipe_buffer.cnt -= size;
468 
469 			/*
470 			 * If there is no more to read in the pipe, reset
471 			 * its pointers to the beginning.  This improves
472 			 * cache hit stats.
473 			 */
474 			if (rpipe->pipe_buffer.cnt == 0) {
475 				rpipe->pipe_buffer.in = 0;
476 				rpipe->pipe_buffer.out = 0;
477 			}
478 			nread += size;
479 #ifndef PIPE_NODIRECT
480 		/*
481 		 * Direct copy, bypassing a kernel buffer.
482 		 */
483 		} else if ((size = rpipe->pipe_map.cnt) &&
484 			   (rpipe->pipe_state & PIPE_DIRECTW)) {
485 			caddr_t	va;
486 			if (size > (u_int) uio->uio_resid)
487 				size = (u_int) uio->uio_resid;
488 
489 			va = (caddr_t) rpipe->pipe_map.kva +
490 			    rpipe->pipe_map.pos;
491 			PIPE_UNLOCK(rpipe);
492 			error = uiomove(va, size, uio);
493 			PIPE_LOCK(rpipe);
494 			if (error)
495 				break;
496 			nread += size;
497 			rpipe->pipe_map.pos += size;
498 			rpipe->pipe_map.cnt -= size;
499 			if (rpipe->pipe_map.cnt == 0) {
500 				rpipe->pipe_state &= ~PIPE_DIRECTW;
501 				wakeup(rpipe);
502 			}
503 #endif
504 		} else {
505 			/*
506 			 * detect EOF condition
507 			 * read returns 0 on EOF, no need to set error
508 			 */
509 			if (rpipe->pipe_state & PIPE_EOF)
510 				break;
511 
512 			/*
513 			 * If the "write-side" has been blocked, wake it up now.
514 			 */
515 			if (rpipe->pipe_state & PIPE_WANTW) {
516 				rpipe->pipe_state &= ~PIPE_WANTW;
517 				wakeup(rpipe);
518 			}
519 
520 			/*
521 			 * Break if some data was read.
522 			 */
523 			if (nread > 0)
524 				break;
525 
526 			/*
527 			 * Unlock the pipe buffer for our remaining processing.  We
528 			 * will either break out with an error or we will sleep and
529 			 * relock to loop.
530 			 */
531 			pipeunlock(rpipe);
532 
533 			/*
534 			 * Handle non-blocking mode operation or
535 			 * wait for more data.
536 			 */
537 			if (fp->f_flag & FNONBLOCK) {
538 				error = EAGAIN;
539 			} else {
540 				rpipe->pipe_state |= PIPE_WANTR;
541 				if ((error = msleep(rpipe, PIPE_MTX(rpipe),
542 				    PRIBIO | PCATCH,
543 				    "piperd", 0)) == 0)
544 					error = pipelock(rpipe, 1);
545 			}
546 			if (error)
547 				goto unlocked_error;
548 		}
549 	}
550 	pipeunlock(rpipe);
551 
552 	/* XXX: should probably do this before getting any locks. */
553 	if (error == 0)
554 		vfs_timestamp(&rpipe->pipe_atime);
555 unlocked_error:
556 	--rpipe->pipe_busy;
557 
558 	/*
559 	 * PIPE_WANT processing only makes sense if pipe_busy is 0.
560 	 */
561 	if ((rpipe->pipe_busy == 0) && (rpipe->pipe_state & PIPE_WANT)) {
562 		rpipe->pipe_state &= ~(PIPE_WANT|PIPE_WANTW);
563 		wakeup(rpipe);
564 	} else if (rpipe->pipe_buffer.cnt < MINPIPESIZE) {
565 		/*
566 		 * Handle write blocking hysteresis.
567 		 */
568 		if (rpipe->pipe_state & PIPE_WANTW) {
569 			rpipe->pipe_state &= ~PIPE_WANTW;
570 			wakeup(rpipe);
571 		}
572 	}
573 
574 	if ((rpipe->pipe_buffer.size - rpipe->pipe_buffer.cnt) >= PIPE_BUF)
575 		pipeselwakeup(rpipe);
576 
577 	PIPE_UNLOCK(rpipe);
578 	return (error);
579 }
580 
581 #ifndef PIPE_NODIRECT
582 /*
583  * Map the sending processes' buffer into kernel space and wire it.
584  * This is similar to a physical write operation.
585  */
586 static int
587 pipe_build_write_buffer(wpipe, uio)
588 	struct pipe *wpipe;
589 	struct uio *uio;
590 {
591 	u_int size;
592 	int i;
593 	vm_offset_t addr, endaddr, paddr;
594 
595 	GIANT_REQUIRED;
596 	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
597 
598 	size = (u_int) uio->uio_iov->iov_len;
599 	if (size > wpipe->pipe_buffer.size)
600 		size = wpipe->pipe_buffer.size;
601 
602 	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
603 	addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
604 	for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
605 		vm_page_t m;
606 
607 		if (vm_fault_quick((caddr_t)addr, VM_PROT_READ) < 0 ||
608 		    (paddr = pmap_kextract(addr)) == 0) {
609 			int j;
610 
611 			for (j = 0; j < i; j++)
612 				vm_page_unwire(wpipe->pipe_map.ms[j], 1);
613 			return (EFAULT);
614 		}
615 
616 		m = PHYS_TO_VM_PAGE(paddr);
617 		vm_page_wire(m);
618 		wpipe->pipe_map.ms[i] = m;
619 	}
620 
621 /*
622  * set up the control block
623  */
624 	wpipe->pipe_map.npages = i;
625 	wpipe->pipe_map.pos =
626 	    ((vm_offset_t) uio->uio_iov->iov_base) & PAGE_MASK;
627 	wpipe->pipe_map.cnt = size;
628 
629 /*
630  * and map the buffer
631  */
632 	if (wpipe->pipe_map.kva == 0) {
633 		/*
634 		 * We need to allocate space for an extra page because the
635 		 * address range might (will) span pages at times.
636 		 */
637 		wpipe->pipe_map.kva = kmem_alloc_pageable(kernel_map,
638 			wpipe->pipe_buffer.size + PAGE_SIZE);
639 		amountpipekva += wpipe->pipe_buffer.size + PAGE_SIZE;
640 	}
641 	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
642 		wpipe->pipe_map.npages);
643 
644 /*
645  * and update the uio data
646  */
647 
648 	uio->uio_iov->iov_len -= size;
649 	uio->uio_iov->iov_base += size;
650 	if (uio->uio_iov->iov_len == 0)
651 		uio->uio_iov++;
652 	uio->uio_resid -= size;
653 	uio->uio_offset += size;
654 	return (0);
655 }
656 
657 /*
658  * unmap and unwire the process buffer
659  */
660 static void
661 pipe_destroy_write_buffer(wpipe)
662 	struct pipe *wpipe;
663 {
664 	int i;
665 
666 	GIANT_REQUIRED;
667 	PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
668 
669 	if (wpipe->pipe_map.kva) {
670 		pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
671 
672 		if (amountpipekva > MAXPIPEKVA) {
673 			vm_offset_t kva = wpipe->pipe_map.kva;
674 			wpipe->pipe_map.kva = 0;
675 			kmem_free(kernel_map, kva,
676 				wpipe->pipe_buffer.size + PAGE_SIZE);
677 			amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
678 		}
679 	}
680 	for (i = 0; i < wpipe->pipe_map.npages; i++)
681 		vm_page_unwire(wpipe->pipe_map.ms[i], 1);
682 	wpipe->pipe_map.npages = 0;
683 }
684 
685 /*
686  * In the case of a signal, the writing process might go away.  This
687  * code copies the data into the circular buffer so that the source
688  * pages can be freed without loss of data.
689  */
690 static void
691 pipe_clone_write_buffer(wpipe)
692 	struct pipe *wpipe;
693 {
694 	int size;
695 	int pos;
696 
697 	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
698 	size = wpipe->pipe_map.cnt;
699 	pos = wpipe->pipe_map.pos;
700 	bcopy((caddr_t) wpipe->pipe_map.kva + pos,
701 	    (caddr_t) wpipe->pipe_buffer.buffer, size);
702 
703 	wpipe->pipe_buffer.in = size;
704 	wpipe->pipe_buffer.out = 0;
705 	wpipe->pipe_buffer.cnt = size;
706 	wpipe->pipe_state &= ~PIPE_DIRECTW;
707 
708 	PIPE_GET_GIANT(wpipe);
709 	pipe_destroy_write_buffer(wpipe);
710 	PIPE_DROP_GIANT(wpipe);
711 }
712 
713 /*
714  * This implements the pipe buffer write mechanism.  Note that only
715  * a direct write OR a normal pipe write can be pending at any given time.
716  * If there are any characters in the pipe buffer, the direct write will
717  * be deferred until the receiving process grabs all of the bytes from
718  * the pipe buffer.  Then the direct mapping write is set-up.
719  */
720 static int
721 pipe_direct_write(wpipe, uio)
722 	struct pipe *wpipe;
723 	struct uio *uio;
724 {
725 	int error;
726 
727 retry:
728 	PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
729 	while (wpipe->pipe_state & PIPE_DIRECTW) {
730 		if (wpipe->pipe_state & PIPE_WANTR) {
731 			wpipe->pipe_state &= ~PIPE_WANTR;
732 			wakeup(wpipe);
733 		}
734 		wpipe->pipe_state |= PIPE_WANTW;
735 		error = msleep(wpipe, PIPE_MTX(wpipe),
736 		    PRIBIO | PCATCH, "pipdww", 0);
737 		if (error)
738 			goto error1;
739 		if (wpipe->pipe_state & PIPE_EOF) {
740 			error = EPIPE;
741 			goto error1;
742 		}
743 	}
744 	wpipe->pipe_map.cnt = 0;	/* transfer not ready yet */
745 	if (wpipe->pipe_buffer.cnt > 0) {
746 		if (wpipe->pipe_state & PIPE_WANTR) {
747 			wpipe->pipe_state &= ~PIPE_WANTR;
748 			wakeup(wpipe);
749 		}
750 
751 		wpipe->pipe_state |= PIPE_WANTW;
752 		error = msleep(wpipe, PIPE_MTX(wpipe),
753 		    PRIBIO | PCATCH, "pipdwc", 0);
754 		if (error)
755 			goto error1;
756 		if (wpipe->pipe_state & PIPE_EOF) {
757 			error = EPIPE;
758 			goto error1;
759 		}
760 		goto retry;
761 	}
762 
763 	wpipe->pipe_state |= PIPE_DIRECTW;
764 
765 	PIPE_GET_GIANT(wpipe);
766 	error = pipe_build_write_buffer(wpipe, uio);
767 	PIPE_DROP_GIANT(wpipe);
768 	if (error) {
769 		wpipe->pipe_state &= ~PIPE_DIRECTW;
770 		goto error1;
771 	}
772 
773 	error = 0;
774 	while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
775 		if (wpipe->pipe_state & PIPE_EOF) {
776 			pipelock(wpipe, 0);
777 			PIPE_GET_GIANT(wpipe);
778 			pipe_destroy_write_buffer(wpipe);
779 			PIPE_DROP_GIANT(wpipe);
780 			pipeunlock(wpipe);
781 			pipeselwakeup(wpipe);
782 			error = EPIPE;
783 			goto error1;
784 		}
785 		if (wpipe->pipe_state & PIPE_WANTR) {
786 			wpipe->pipe_state &= ~PIPE_WANTR;
787 			wakeup(wpipe);
788 		}
789 		pipeselwakeup(wpipe);
790 		error = msleep(wpipe, PIPE_MTX(wpipe), PRIBIO | PCATCH,
791 		    "pipdwt", 0);
792 	}
793 
794 	pipelock(wpipe,0);
795 	if (wpipe->pipe_state & PIPE_DIRECTW) {
796 		/*
797 		 * this bit of trickery substitutes a kernel buffer for
798 		 * the process that might be going away.
799 		 */
800 		pipe_clone_write_buffer(wpipe);
801 	} else {
802 		PIPE_GET_GIANT(wpipe);
803 		pipe_destroy_write_buffer(wpipe);
804 		PIPE_DROP_GIANT(wpipe);
805 	}
806 	pipeunlock(wpipe);
807 	return (error);
808 
809 error1:
810 	wakeup(wpipe);
811 	return (error);
812 }
813 #endif
814 
815 static int
816 pipe_write(fp, uio, cred, flags, td)
817 	struct file *fp;
818 	struct uio *uio;
819 	struct ucred *cred;
820 	struct thread *td;
821 	int flags;
822 {
823 	int error = 0;
824 	int orig_resid;
825 	struct pipe *wpipe, *rpipe;
826 
827 	rpipe = (struct pipe *) fp->f_data;
828 	wpipe = rpipe->pipe_peer;
829 
830 	PIPE_LOCK(rpipe);
831 	/*
832 	 * detect loss of pipe read side, issue SIGPIPE if lost.
833 	 */
834 	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
835 		PIPE_UNLOCK(rpipe);
836 		return (EPIPE);
837 	}
838 	++wpipe->pipe_busy;
839 
840 	/*
841 	 * If it is advantageous to resize the pipe buffer, do
842 	 * so.
843 	 */
844 	if ((uio->uio_resid > PIPE_SIZE) &&
845 		(nbigpipe < LIMITBIGPIPES) &&
846 		(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
847 		(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
848 		(wpipe->pipe_buffer.cnt == 0)) {
849 
850 		if ((error = pipelock(wpipe,1)) == 0) {
851 			PIPE_GET_GIANT(rpipe);
852 			if (pipespace(wpipe, BIG_PIPE_SIZE) == 0)
853 				nbigpipe++;
854 			PIPE_DROP_GIANT(rpipe);
855 			pipeunlock(wpipe);
856 		}
857 	}
858 
859 	/*
860 	 * If an early error occured unbusy and return, waking up any pending
861 	 * readers.
862 	 */
863 	if (error) {
864 		--wpipe->pipe_busy;
865 		if ((wpipe->pipe_busy == 0) &&
866 		    (wpipe->pipe_state & PIPE_WANT)) {
867 			wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
868 			wakeup(wpipe);
869 		}
870 		PIPE_UNLOCK(rpipe);
871 		return(error);
872 	}
873 
874 	KASSERT(wpipe->pipe_buffer.buffer != NULL, ("pipe buffer gone"));
875 
876 	orig_resid = uio->uio_resid;
877 
878 	while (uio->uio_resid) {
879 		int space;
880 
881 #ifndef PIPE_NODIRECT
882 		/*
883 		 * If the transfer is large, we can gain performance if
884 		 * we do process-to-process copies directly.
885 		 * If the write is non-blocking, we don't use the
886 		 * direct write mechanism.
887 		 *
888 		 * The direct write mechanism will detect the reader going
889 		 * away on us.
890 		 */
891 		if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
892 		    (fp->f_flag & FNONBLOCK) == 0 &&
893 			(wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) &&
894 			(uio->uio_iov->iov_len >= PIPE_MINDIRECT)) {
895 			error = pipe_direct_write( wpipe, uio);
896 			if (error)
897 				break;
898 			continue;
899 		}
900 #endif
901 
902 		/*
903 		 * Pipe buffered writes cannot be coincidental with
904 		 * direct writes.  We wait until the currently executing
905 		 * direct write is completed before we start filling the
906 		 * pipe buffer.  We break out if a signal occurs or the
907 		 * reader goes away.
908 		 */
909 	retrywrite:
910 		while (wpipe->pipe_state & PIPE_DIRECTW) {
911 			if (wpipe->pipe_state & PIPE_WANTR) {
912 				wpipe->pipe_state &= ~PIPE_WANTR;
913 				wakeup(wpipe);
914 			}
915 			error = msleep(wpipe, PIPE_MTX(rpipe), PRIBIO | PCATCH,
916 			    "pipbww", 0);
917 			if (wpipe->pipe_state & PIPE_EOF)
918 				break;
919 			if (error)
920 				break;
921 		}
922 		if (wpipe->pipe_state & PIPE_EOF) {
923 			error = EPIPE;
924 			break;
925 		}
926 
927 		space = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
928 
929 		/* Writes of size <= PIPE_BUF must be atomic. */
930 		if ((space < uio->uio_resid) && (orig_resid <= PIPE_BUF))
931 			space = 0;
932 
933 		if (space > 0 && (wpipe->pipe_buffer.cnt < PIPE_SIZE)) {
934 			if ((error = pipelock(wpipe,1)) == 0) {
935 				int size;	/* Transfer size */
936 				int segsize;	/* first segment to transfer */
937 
938 				/*
939 				 * It is possible for a direct write to
940 				 * slip in on us... handle it here...
941 				 */
942 				if (wpipe->pipe_state & PIPE_DIRECTW) {
943 					pipeunlock(wpipe);
944 					goto retrywrite;
945 				}
946 				/*
947 				 * If a process blocked in uiomove, our
948 				 * value for space might be bad.
949 				 *
950 				 * XXX will we be ok if the reader has gone
951 				 * away here?
952 				 */
953 				if (space > wpipe->pipe_buffer.size -
954 				    wpipe->pipe_buffer.cnt) {
955 					pipeunlock(wpipe);
956 					goto retrywrite;
957 				}
958 
959 				/*
960 				 * Transfer size is minimum of uio transfer
961 				 * and free space in pipe buffer.
962 				 */
963 				if (space > uio->uio_resid)
964 					size = uio->uio_resid;
965 				else
966 					size = space;
967 				/*
968 				 * First segment to transfer is minimum of
969 				 * transfer size and contiguous space in
970 				 * pipe buffer.  If first segment to transfer
971 				 * is less than the transfer size, we've got
972 				 * a wraparound in the buffer.
973 				 */
974 				segsize = wpipe->pipe_buffer.size -
975 					wpipe->pipe_buffer.in;
976 				if (segsize > size)
977 					segsize = size;
978 
979 				/* Transfer first segment */
980 
981 				PIPE_UNLOCK(rpipe);
982 				error = uiomove(&wpipe->pipe_buffer.buffer[wpipe->pipe_buffer.in],
983 						segsize, uio);
984 				PIPE_LOCK(rpipe);
985 
986 				if (error == 0 && segsize < size) {
987 					/*
988 					 * Transfer remaining part now, to
989 					 * support atomic writes.  Wraparound
990 					 * happened.
991 					 */
992 					if (wpipe->pipe_buffer.in + segsize !=
993 					    wpipe->pipe_buffer.size)
994 						panic("Expected pipe buffer wraparound disappeared");
995 
996 					PIPE_UNLOCK(rpipe);
997 					error = uiomove(&wpipe->pipe_buffer.buffer[0],
998 							size - segsize, uio);
999 					PIPE_LOCK(rpipe);
1000 				}
1001 				if (error == 0) {
1002 					wpipe->pipe_buffer.in += size;
1003 					if (wpipe->pipe_buffer.in >=
1004 					    wpipe->pipe_buffer.size) {
1005 						if (wpipe->pipe_buffer.in != size - segsize + wpipe->pipe_buffer.size)
1006 							panic("Expected wraparound bad");
1007 						wpipe->pipe_buffer.in = size - segsize;
1008 					}
1009 
1010 					wpipe->pipe_buffer.cnt += size;
1011 					if (wpipe->pipe_buffer.cnt > wpipe->pipe_buffer.size)
1012 						panic("Pipe buffer overflow");
1013 
1014 				}
1015 				pipeunlock(wpipe);
1016 			}
1017 			if (error)
1018 				break;
1019 
1020 		} else {
1021 			/*
1022 			 * If the "read-side" has been blocked, wake it up now.
1023 			 */
1024 			if (wpipe->pipe_state & PIPE_WANTR) {
1025 				wpipe->pipe_state &= ~PIPE_WANTR;
1026 				wakeup(wpipe);
1027 			}
1028 
1029 			/*
1030 			 * don't block on non-blocking I/O
1031 			 */
1032 			if (fp->f_flag & FNONBLOCK) {
1033 				error = EAGAIN;
1034 				break;
1035 			}
1036 
1037 			/*
1038 			 * We have no more space and have something to offer,
1039 			 * wake up select/poll.
1040 			 */
1041 			pipeselwakeup(wpipe);
1042 
1043 			wpipe->pipe_state |= PIPE_WANTW;
1044 			error = msleep(wpipe, PIPE_MTX(rpipe),
1045 			    PRIBIO | PCATCH, "pipewr", 0);
1046 			if (error != 0)
1047 				break;
1048 			/*
1049 			 * If read side wants to go away, we just issue a signal
1050 			 * to ourselves.
1051 			 */
1052 			if (wpipe->pipe_state & PIPE_EOF) {
1053 				error = EPIPE;
1054 				break;
1055 			}
1056 		}
1057 	}
1058 
1059 	--wpipe->pipe_busy;
1060 
1061 	if ((wpipe->pipe_busy == 0) && (wpipe->pipe_state & PIPE_WANT)) {
1062 		wpipe->pipe_state &= ~(PIPE_WANT | PIPE_WANTR);
1063 		wakeup(wpipe);
1064 	} else if (wpipe->pipe_buffer.cnt > 0) {
1065 		/*
1066 		 * If we have put any characters in the buffer, we wake up
1067 		 * the reader.
1068 		 */
1069 		if (wpipe->pipe_state & PIPE_WANTR) {
1070 			wpipe->pipe_state &= ~PIPE_WANTR;
1071 			wakeup(wpipe);
1072 		}
1073 	}
1074 
1075 	/*
1076 	 * Don't return EPIPE if I/O was successful
1077 	 */
1078 	if ((wpipe->pipe_buffer.cnt == 0) &&
1079 	    (uio->uio_resid == 0) &&
1080 	    (error == EPIPE)) {
1081 		error = 0;
1082 	}
1083 
1084 	if (error == 0)
1085 		vfs_timestamp(&wpipe->pipe_mtime);
1086 
1087 	/*
1088 	 * We have something to offer,
1089 	 * wake up select/poll.
1090 	 */
1091 	if (wpipe->pipe_buffer.cnt)
1092 		pipeselwakeup(wpipe);
1093 
1094 	PIPE_UNLOCK(rpipe);
1095 	return (error);
1096 }
1097 
1098 /*
1099  * we implement a very minimal set of ioctls for compatibility with sockets.
1100  */
1101 int
1102 pipe_ioctl(fp, cmd, data, td)
1103 	struct file *fp;
1104 	u_long cmd;
1105 	caddr_t data;
1106 	struct thread *td;
1107 {
1108 	struct pipe *mpipe = (struct pipe *)fp->f_data;
1109 
1110 	switch (cmd) {
1111 
1112 	case FIONBIO:
1113 		return (0);
1114 
1115 	case FIOASYNC:
1116 		PIPE_LOCK(mpipe);
1117 		if (*(int *)data) {
1118 			mpipe->pipe_state |= PIPE_ASYNC;
1119 		} else {
1120 			mpipe->pipe_state &= ~PIPE_ASYNC;
1121 		}
1122 		PIPE_UNLOCK(mpipe);
1123 		return (0);
1124 
1125 	case FIONREAD:
1126 		PIPE_LOCK(mpipe);
1127 		if (mpipe->pipe_state & PIPE_DIRECTW)
1128 			*(int *)data = mpipe->pipe_map.cnt;
1129 		else
1130 			*(int *)data = mpipe->pipe_buffer.cnt;
1131 		PIPE_UNLOCK(mpipe);
1132 		return (0);
1133 
1134 	case FIOSETOWN:
1135 		return (fsetown(*(int *)data, &mpipe->pipe_sigio));
1136 
1137 	case FIOGETOWN:
1138 		*(int *)data = fgetown(mpipe->pipe_sigio);
1139 		return (0);
1140 
1141 	/* This is deprecated, FIOSETOWN should be used instead. */
1142 	case TIOCSPGRP:
1143 		return (fsetown(-(*(int *)data), &mpipe->pipe_sigio));
1144 
1145 	/* This is deprecated, FIOGETOWN should be used instead. */
1146 	case TIOCGPGRP:
1147 		*(int *)data = -fgetown(mpipe->pipe_sigio);
1148 		return (0);
1149 
1150 	}
1151 	return (ENOTTY);
1152 }
1153 
1154 int
1155 pipe_poll(fp, events, cred, td)
1156 	struct file *fp;
1157 	int events;
1158 	struct ucred *cred;
1159 	struct thread *td;
1160 {
1161 	struct pipe *rpipe = (struct pipe *)fp->f_data;
1162 	struct pipe *wpipe;
1163 	int revents = 0;
1164 
1165 	wpipe = rpipe->pipe_peer;
1166 	PIPE_LOCK(rpipe);
1167 	if (events & (POLLIN | POLLRDNORM))
1168 		if ((rpipe->pipe_state & PIPE_DIRECTW) ||
1169 		    (rpipe->pipe_buffer.cnt > 0) ||
1170 		    (rpipe->pipe_state & PIPE_EOF))
1171 			revents |= events & (POLLIN | POLLRDNORM);
1172 
1173 	if (events & (POLLOUT | POLLWRNORM))
1174 		if (wpipe == NULL || (wpipe->pipe_state & PIPE_EOF) ||
1175 		    (((wpipe->pipe_state & PIPE_DIRECTW) == 0) &&
1176 		     (wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt) >= PIPE_BUF))
1177 			revents |= events & (POLLOUT | POLLWRNORM);
1178 
1179 	if ((rpipe->pipe_state & PIPE_EOF) ||
1180 	    (wpipe == NULL) ||
1181 	    (wpipe->pipe_state & PIPE_EOF))
1182 		revents |= POLLHUP;
1183 
1184 	if (revents == 0) {
1185 		if (events & (POLLIN | POLLRDNORM)) {
1186 			selrecord(td, &rpipe->pipe_sel);
1187 			rpipe->pipe_state |= PIPE_SEL;
1188 		}
1189 
1190 		if (events & (POLLOUT | POLLWRNORM)) {
1191 			selrecord(td, &wpipe->pipe_sel);
1192 			wpipe->pipe_state |= PIPE_SEL;
1193 		}
1194 	}
1195 	PIPE_UNLOCK(rpipe);
1196 
1197 	return (revents);
1198 }
1199 
1200 static int
1201 pipe_stat(fp, ub, td)
1202 	struct file *fp;
1203 	struct stat *ub;
1204 	struct thread *td;
1205 {
1206 	struct pipe *pipe = (struct pipe *)fp->f_data;
1207 
1208 	bzero((caddr_t)ub, sizeof(*ub));
1209 	ub->st_mode = S_IFIFO;
1210 	ub->st_blksize = pipe->pipe_buffer.size;
1211 	ub->st_size = pipe->pipe_buffer.cnt;
1212 	ub->st_blocks = (ub->st_size + ub->st_blksize - 1) / ub->st_blksize;
1213 	ub->st_atimespec = pipe->pipe_atime;
1214 	ub->st_mtimespec = pipe->pipe_mtime;
1215 	ub->st_ctimespec = pipe->pipe_ctime;
1216 	ub->st_uid = fp->f_cred->cr_uid;
1217 	ub->st_gid = fp->f_cred->cr_gid;
1218 	/*
1219 	 * Left as 0: st_dev, st_ino, st_nlink, st_rdev, st_flags, st_gen.
1220 	 * XXX (st_dev, st_ino) should be unique.
1221 	 */
1222 	return (0);
1223 }
1224 
1225 /* ARGSUSED */
1226 static int
1227 pipe_close(fp, td)
1228 	struct file *fp;
1229 	struct thread *td;
1230 {
1231 	struct pipe *cpipe = (struct pipe *)fp->f_data;
1232 
1233 	fp->f_ops = &badfileops;
1234 	fp->f_data = NULL;
1235 	funsetown(cpipe->pipe_sigio);
1236 	pipeclose(cpipe);
1237 	return (0);
1238 }
1239 
1240 static void
1241 pipe_free_kmem(cpipe)
1242 	struct pipe *cpipe;
1243 {
1244 
1245 	GIANT_REQUIRED;
1246 	KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
1247 	       ("pipespace: pipe mutex locked"));
1248 
1249 	if (cpipe->pipe_buffer.buffer != NULL) {
1250 		if (cpipe->pipe_buffer.size > PIPE_SIZE)
1251 			--nbigpipe;
1252 		amountpipekva -= cpipe->pipe_buffer.size;
1253 		kmem_free(kernel_map,
1254 			(vm_offset_t)cpipe->pipe_buffer.buffer,
1255 			cpipe->pipe_buffer.size);
1256 		cpipe->pipe_buffer.buffer = NULL;
1257 	}
1258 #ifndef PIPE_NODIRECT
1259 	if (cpipe->pipe_map.kva != NULL) {
1260 		amountpipekva -= cpipe->pipe_buffer.size + PAGE_SIZE;
1261 		kmem_free(kernel_map,
1262 			cpipe->pipe_map.kva,
1263 			cpipe->pipe_buffer.size + PAGE_SIZE);
1264 		cpipe->pipe_map.cnt = 0;
1265 		cpipe->pipe_map.kva = 0;
1266 		cpipe->pipe_map.pos = 0;
1267 		cpipe->pipe_map.npages = 0;
1268 	}
1269 #endif
1270 }
1271 
1272 /*
1273  * shutdown the pipe
1274  */
1275 static void
1276 pipeclose(cpipe)
1277 	struct pipe *cpipe;
1278 {
1279 	struct pipe *ppipe;
1280 
1281 	if (cpipe) {
1282 		PIPE_LOCK(cpipe);
1283 
1284 		pipeselwakeup(cpipe);
1285 
1286 		/*
1287 		 * If the other side is blocked, wake it up saying that
1288 		 * we want to close it down.
1289 		 */
1290 		while (cpipe->pipe_busy) {
1291 			wakeup(cpipe);
1292 			cpipe->pipe_state |= PIPE_WANT | PIPE_EOF;
1293 			msleep(cpipe, PIPE_MTX(cpipe), PRIBIO, "pipecl", 0);
1294 		}
1295 
1296 		/*
1297 		 * Disconnect from peer
1298 		 */
1299 		if ((ppipe = cpipe->pipe_peer) != NULL) {
1300 			pipeselwakeup(ppipe);
1301 
1302 			ppipe->pipe_state |= PIPE_EOF;
1303 			wakeup(ppipe);
1304 			KNOTE(&ppipe->pipe_sel.si_note, 0);
1305 			ppipe->pipe_peer = NULL;
1306 		}
1307 		/*
1308 		 * free resources
1309 		 */
1310 		PIPE_UNLOCK(cpipe);
1311 		mtx_lock(&Giant);
1312 		pipe_free_kmem(cpipe);
1313 		zfree(pipe_zone, cpipe);
1314 		mtx_unlock(&Giant);
1315 	}
1316 }
1317 
1318 /*ARGSUSED*/
1319 static int
1320 pipe_kqfilter(struct file *fp, struct knote *kn)
1321 {
1322 	struct pipe *cpipe;
1323 
1324 	cpipe = (struct pipe *)kn->kn_fp->f_data;
1325 	switch (kn->kn_filter) {
1326 	case EVFILT_READ:
1327 		kn->kn_fop = &pipe_rfiltops;
1328 		break;
1329 	case EVFILT_WRITE:
1330 		kn->kn_fop = &pipe_wfiltops;
1331 		cpipe = cpipe->pipe_peer;
1332 		break;
1333 	default:
1334 		return (1);
1335 	}
1336 	kn->kn_hook = (caddr_t)cpipe;
1337 
1338 	PIPE_LOCK(cpipe);
1339 	SLIST_INSERT_HEAD(&cpipe->pipe_sel.si_note, kn, kn_selnext);
1340 	PIPE_UNLOCK(cpipe);
1341 	return (0);
1342 }
1343 
1344 static void
1345 filt_pipedetach(struct knote *kn)
1346 {
1347 	struct pipe *cpipe = (struct pipe *)kn->kn_hook;
1348 
1349 	PIPE_LOCK(cpipe);
1350 	SLIST_REMOVE(&cpipe->pipe_sel.si_note, kn, knote, kn_selnext);
1351 	PIPE_UNLOCK(cpipe);
1352 }
1353 
1354 /*ARGSUSED*/
1355 static int
1356 filt_piperead(struct knote *kn, long hint)
1357 {
1358 	struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1359 	struct pipe *wpipe = rpipe->pipe_peer;
1360 
1361 	PIPE_LOCK(rpipe);
1362 	kn->kn_data = rpipe->pipe_buffer.cnt;
1363 	if ((kn->kn_data == 0) && (rpipe->pipe_state & PIPE_DIRECTW))
1364 		kn->kn_data = rpipe->pipe_map.cnt;
1365 
1366 	if ((rpipe->pipe_state & PIPE_EOF) ||
1367 	    (wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1368 		kn->kn_flags |= EV_EOF;
1369 		PIPE_UNLOCK(rpipe);
1370 		return (1);
1371 	}
1372 	PIPE_UNLOCK(rpipe);
1373 	return (kn->kn_data > 0);
1374 }
1375 
1376 /*ARGSUSED*/
1377 static int
1378 filt_pipewrite(struct knote *kn, long hint)
1379 {
1380 	struct pipe *rpipe = (struct pipe *)kn->kn_fp->f_data;
1381 	struct pipe *wpipe = rpipe->pipe_peer;
1382 
1383 	PIPE_LOCK(rpipe);
1384 	if ((wpipe == NULL) || (wpipe->pipe_state & PIPE_EOF)) {
1385 		kn->kn_data = 0;
1386 		kn->kn_flags |= EV_EOF;
1387 		PIPE_UNLOCK(rpipe);
1388 		return (1);
1389 	}
1390 	kn->kn_data = wpipe->pipe_buffer.size - wpipe->pipe_buffer.cnt;
1391 	if (wpipe->pipe_state & PIPE_DIRECTW)
1392 		kn->kn_data = 0;
1393 
1394 	PIPE_UNLOCK(rpipe);
1395 	return (kn->kn_data >= PIPE_BUF);
1396 }
1397