xref: /freebsd/sys/kern/sys_generic.c (revision 265fc98f36e8e63420c73d824bcdc52d8c3f805b)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1989, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  * (c) UNIX System Laboratories, Inc.
5df8bae1dSRodney W. Grimes  * All or some portions of this file are derived from material licensed
6df8bae1dSRodney W. Grimes  * to the University of California by American Telephone and Telegraph
7df8bae1dSRodney W. Grimes  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8df8bae1dSRodney W. Grimes  * the permission of UNIX System Laboratories, Inc.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
19df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
20df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
21df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
22df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
23df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
24df8bae1dSRodney W. Grimes  *    without specific prior written permission.
25df8bae1dSRodney W. Grimes  *
26df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
37df8bae1dSRodney W. Grimes  *
38df8bae1dSRodney W. Grimes  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
39c3aac50fSPeter Wemm  * $FreeBSD$
40df8bae1dSRodney W. Grimes  */
41df8bae1dSRodney W. Grimes 
42db6a20e2SGarrett Wollman #include "opt_ktrace.h"
43db6a20e2SGarrett Wollman 
44df8bae1dSRodney W. Grimes #include <sys/param.h>
45df8bae1dSRodney W. Grimes #include <sys/systm.h>
46d2d3e875SBruce Evans #include <sys/sysproto.h>
47df8bae1dSRodney W. Grimes #include <sys/filedesc.h>
4820982410SBruce Evans #include <sys/filio.h>
493ac4d1efSBruce Evans #include <sys/fcntl.h>
50df8bae1dSRodney W. Grimes #include <sys/file.h>
51df8bae1dSRodney W. Grimes #include <sys/proc.h>
52797f2d22SPoul-Henning Kamp #include <sys/signalvar.h>
53df8bae1dSRodney W. Grimes #include <sys/socketvar.h>
54df8bae1dSRodney W. Grimes #include <sys/uio.h>
55df8bae1dSRodney W. Grimes #include <sys/kernel.h>
56df8bae1dSRodney W. Grimes #include <sys/malloc.h>
5742d11757SPeter Wemm #include <sys/poll.h>
5889b71647SPeter Wemm #include <sys/resourcevar.h>
590a2c3d48SGarrett Wollman #include <sys/selinfo.h>
608cb96f20SPeter Wemm #include <sys/sysctl.h>
6142d11757SPeter Wemm #include <sys/sysent.h>
62279d7226SMatthew Dillon #include <sys/bio.h>
63279d7226SMatthew Dillon #include <sys/buf.h>
64265fc98fSSeigo Tanimura #include <sys/condvar.h>
65df8bae1dSRodney W. Grimes #ifdef KTRACE
66df8bae1dSRodney W. Grimes #include <sys/ktrace.h>
67df8bae1dSRodney W. Grimes #endif
68279d7226SMatthew Dillon #include <vm/vm.h>
69279d7226SMatthew Dillon #include <vm/vm_page.h>
70df8bae1dSRodney W. Grimes 
71069e9bc1SDoug Rabson #include <machine/limits.h>
72069e9bc1SDoug Rabson 
73a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
74a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
75a1c995b6SPoul-Henning Kamp MALLOC_DEFINE(M_IOV, "iov", "large iov's");
7655166637SPoul-Henning Kamp 
77ea0237edSJonathan Lemon static int	pollscan __P((struct proc *, struct pollfd *, u_int));
78265fc98fSSeigo Tanimura static int	pollholddrop __P((struct proc *, struct pollfd *, u_int, int));
792087c896SBruce Evans static int	selscan __P((struct proc *, fd_mask **, fd_mask **, int));
80265fc98fSSeigo Tanimura static int	selholddrop __P((struct proc *, fd_mask *, fd_mask *, int, int));
818fe387abSDmitrij Tejblum static int	dofileread __P((struct proc *, struct file *, int, void *,
828fe387abSDmitrij Tejblum 		    size_t, off_t, int));
838fe387abSDmitrij Tejblum static int	dofilewrite __P((struct proc *, struct file *, int,
848fe387abSDmitrij Tejblum 		    const void *, size_t, off_t, int));
858fe387abSDmitrij Tejblum 
868757e5bbSAlfred Perlstein struct file*
87279d7226SMatthew Dillon holdfp(fdp, fd, flag)
888fe387abSDmitrij Tejblum 	struct filedesc* fdp;
898fe387abSDmitrij Tejblum 	int fd, flag;
908fe387abSDmitrij Tejblum {
918fe387abSDmitrij Tejblum 	struct file* fp;
928fe387abSDmitrij Tejblum 
938fe387abSDmitrij Tejblum 	if (((u_int)fd) >= fdp->fd_nfiles ||
948fe387abSDmitrij Tejblum 	    (fp = fdp->fd_ofiles[fd]) == NULL ||
95279d7226SMatthew Dillon 	    (fp->f_flag & flag) == 0) {
968fe387abSDmitrij Tejblum 		return (NULL);
97279d7226SMatthew Dillon 	}
98279d7226SMatthew Dillon 	fhold(fp);
998fe387abSDmitrij Tejblum 	return (fp);
1008fe387abSDmitrij Tejblum }
101d93f860cSPoul-Henning Kamp 
102df8bae1dSRodney W. Grimes /*
103df8bae1dSRodney W. Grimes  * Read system call.
104df8bae1dSRodney W. Grimes  */
105d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
106df8bae1dSRodney W. Grimes struct read_args {
107df8bae1dSRodney W. Grimes 	int	fd;
108134e06feSBruce Evans 	void	*buf;
109134e06feSBruce Evans 	size_t	nbyte;
110df8bae1dSRodney W. Grimes };
111d2d3e875SBruce Evans #endif
11226f9a767SRodney W. Grimes int
113cb226aaaSPoul-Henning Kamp read(p, uap)
114df8bae1dSRodney W. Grimes 	struct proc *p;
115df8bae1dSRodney W. Grimes 	register struct read_args *uap;
116df8bae1dSRodney W. Grimes {
117df8bae1dSRodney W. Grimes 	register struct file *fp;
118279d7226SMatthew Dillon 	int error;
119df8bae1dSRodney W. Grimes 
120279d7226SMatthew Dillon 	if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
121df8bae1dSRodney W. Grimes 		return (EBADF);
122279d7226SMatthew Dillon 	error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0);
123279d7226SMatthew Dillon 	fdrop(fp, p);
124279d7226SMatthew Dillon 	return(error);
125df8bae1dSRodney W. Grimes }
126df8bae1dSRodney W. Grimes 
127df8bae1dSRodney W. Grimes /*
1288fe387abSDmitrij Tejblum  * Pread system call
1294160ccd9SAlan Cox  */
1304160ccd9SAlan Cox #ifndef _SYS_SYSPROTO_H_
1314160ccd9SAlan Cox struct pread_args {
1324160ccd9SAlan Cox 	int	fd;
1334160ccd9SAlan Cox 	void	*buf;
1344160ccd9SAlan Cox 	size_t	nbyte;
1358fe387abSDmitrij Tejblum 	int	pad;
1364160ccd9SAlan Cox 	off_t	offset;
1374160ccd9SAlan Cox };
1384160ccd9SAlan Cox #endif
1394160ccd9SAlan Cox int
1404160ccd9SAlan Cox pread(p, uap)
1414160ccd9SAlan Cox 	struct proc *p;
1424160ccd9SAlan Cox 	register struct pread_args *uap;
1434160ccd9SAlan Cox {
1444160ccd9SAlan Cox 	register struct file *fp;
145279d7226SMatthew Dillon 	int error;
1468fe387abSDmitrij Tejblum 
147279d7226SMatthew Dillon 	if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL)
1488fe387abSDmitrij Tejblum 		return (EBADF);
149279d7226SMatthew Dillon 	if (fp->f_type != DTYPE_VNODE) {
150279d7226SMatthew Dillon 		error = ESPIPE;
151279d7226SMatthew Dillon 	} else {
152279d7226SMatthew Dillon 	    error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte,
153279d7226SMatthew Dillon 		uap->offset, FOF_OFFSET);
154279d7226SMatthew Dillon 	}
155279d7226SMatthew Dillon 	fdrop(fp, p);
156279d7226SMatthew Dillon 	return(error);
1578fe387abSDmitrij Tejblum }
1588fe387abSDmitrij Tejblum 
1598fe387abSDmitrij Tejblum /*
1608fe387abSDmitrij Tejblum  * Code common for read and pread
1618fe387abSDmitrij Tejblum  */
1628fe387abSDmitrij Tejblum int
1638fe387abSDmitrij Tejblum dofileread(p, fp, fd, buf, nbyte, offset, flags)
1648fe387abSDmitrij Tejblum 	struct proc *p;
1658fe387abSDmitrij Tejblum 	struct file *fp;
1668fe387abSDmitrij Tejblum 	int fd, flags;
1678fe387abSDmitrij Tejblum 	void *buf;
1688fe387abSDmitrij Tejblum 	size_t nbyte;
1698fe387abSDmitrij Tejblum 	off_t offset;
1708fe387abSDmitrij Tejblum {
1714160ccd9SAlan Cox 	struct uio auio;
1724160ccd9SAlan Cox 	struct iovec aiov;
1734160ccd9SAlan Cox 	long cnt, error = 0;
1744160ccd9SAlan Cox #ifdef KTRACE
1754160ccd9SAlan Cox 	struct iovec ktriov;
17642ebfbf2SBrian Feldman 	struct uio ktruio;
1773c89e357SBrian Feldman 	int didktr = 0;
1784160ccd9SAlan Cox #endif
1794160ccd9SAlan Cox 
1808fe387abSDmitrij Tejblum 	aiov.iov_base = (caddr_t)buf;
1818fe387abSDmitrij Tejblum 	aiov.iov_len = nbyte;
1824160ccd9SAlan Cox 	auio.uio_iov = &aiov;
1834160ccd9SAlan Cox 	auio.uio_iovcnt = 1;
1848fe387abSDmitrij Tejblum 	auio.uio_offset = offset;
1858fe387abSDmitrij Tejblum 	if (nbyte > INT_MAX)
1864160ccd9SAlan Cox 		return (EINVAL);
1878fe387abSDmitrij Tejblum 	auio.uio_resid = nbyte;
1884160ccd9SAlan Cox 	auio.uio_rw = UIO_READ;
1894160ccd9SAlan Cox 	auio.uio_segflg = UIO_USERSPACE;
1904160ccd9SAlan Cox 	auio.uio_procp = p;
1914160ccd9SAlan Cox #ifdef KTRACE
1924160ccd9SAlan Cox 	/*
1934160ccd9SAlan Cox 	 * if tracing, save a copy of iovec
1944160ccd9SAlan Cox 	 */
19542ebfbf2SBrian Feldman 	if (KTRPOINT(p, KTR_GENIO)) {
1964160ccd9SAlan Cox 		ktriov = aiov;
19742ebfbf2SBrian Feldman 		ktruio = auio;
1983c89e357SBrian Feldman 		didktr = 1;
19942ebfbf2SBrian Feldman 	}
2004160ccd9SAlan Cox #endif
2018fe387abSDmitrij Tejblum 	cnt = nbyte;
202279d7226SMatthew Dillon 
203279d7226SMatthew Dillon 	if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) {
2044160ccd9SAlan Cox 		if (auio.uio_resid != cnt && (error == ERESTART ||
2054160ccd9SAlan Cox 		    error == EINTR || error == EWOULDBLOCK))
2064160ccd9SAlan Cox 			error = 0;
207279d7226SMatthew Dillon 	}
2084160ccd9SAlan Cox 	cnt -= auio.uio_resid;
2094160ccd9SAlan Cox #ifdef KTRACE
2103c89e357SBrian Feldman 	if (didktr && error == 0) {
21142ebfbf2SBrian Feldman 		ktruio.uio_iov = &ktriov;
21242ebfbf2SBrian Feldman 		ktruio.uio_resid = cnt;
21342ebfbf2SBrian Feldman 		ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error);
21442ebfbf2SBrian Feldman 	}
2154160ccd9SAlan Cox #endif
2164160ccd9SAlan Cox 	p->p_retval[0] = cnt;
2174160ccd9SAlan Cox 	return (error);
2184160ccd9SAlan Cox }
2194160ccd9SAlan Cox 
2204160ccd9SAlan Cox /*
221df8bae1dSRodney W. Grimes  * Scatter read system call.
222df8bae1dSRodney W. Grimes  */
223d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
224df8bae1dSRodney W. Grimes struct readv_args {
2257147b19dSBruce Evans 	int	fd;
226df8bae1dSRodney W. Grimes 	struct	iovec *iovp;
227df8bae1dSRodney W. Grimes 	u_int	iovcnt;
228df8bae1dSRodney W. Grimes };
229d2d3e875SBruce Evans #endif
23026f9a767SRodney W. Grimes int
231cb226aaaSPoul-Henning Kamp readv(p, uap)
232df8bae1dSRodney W. Grimes 	struct proc *p;
233df8bae1dSRodney W. Grimes 	register struct readv_args *uap;
234df8bae1dSRodney W. Grimes {
235df8bae1dSRodney W. Grimes 	register struct file *fp;
236df8bae1dSRodney W. Grimes 	register struct filedesc *fdp = p->p_fd;
237df8bae1dSRodney W. Grimes 	struct uio auio;
238df8bae1dSRodney W. Grimes 	register struct iovec *iov;
239df8bae1dSRodney W. Grimes 	struct iovec *needfree;
240df8bae1dSRodney W. Grimes 	struct iovec aiov[UIO_SMALLIOV];
241df8bae1dSRodney W. Grimes 	long i, cnt, error = 0;
242df8bae1dSRodney W. Grimes 	u_int iovlen;
243df8bae1dSRodney W. Grimes #ifdef KTRACE
244df8bae1dSRodney W. Grimes 	struct iovec *ktriov = NULL;
24542ebfbf2SBrian Feldman 	struct uio ktruio;
246df8bae1dSRodney W. Grimes #endif
247df8bae1dSRodney W. Grimes 
248279d7226SMatthew Dillon 	if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL)
249df8bae1dSRodney W. Grimes 		return (EBADF);
250df8bae1dSRodney W. Grimes 	/* note: can't use iovlen until iovcnt is validated */
251df8bae1dSRodney W. Grimes 	iovlen = uap->iovcnt * sizeof (struct iovec);
252df8bae1dSRodney W. Grimes 	if (uap->iovcnt > UIO_SMALLIOV) {
253df8bae1dSRodney W. Grimes 		if (uap->iovcnt > UIO_MAXIOV)
254df8bae1dSRodney W. Grimes 			return (EINVAL);
255df8bae1dSRodney W. Grimes 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
256df8bae1dSRodney W. Grimes 		needfree = iov;
257df8bae1dSRodney W. Grimes 	} else {
258df8bae1dSRodney W. Grimes 		iov = aiov;
259df8bae1dSRodney W. Grimes 		needfree = NULL;
260df8bae1dSRodney W. Grimes 	}
261df8bae1dSRodney W. Grimes 	auio.uio_iov = iov;
262df8bae1dSRodney W. Grimes 	auio.uio_iovcnt = uap->iovcnt;
263df8bae1dSRodney W. Grimes 	auio.uio_rw = UIO_READ;
264df8bae1dSRodney W. Grimes 	auio.uio_segflg = UIO_USERSPACE;
265df8bae1dSRodney W. Grimes 	auio.uio_procp = p;
2662c1011f7SJohn Dyson 	auio.uio_offset = -1;
267bb56ec4aSPoul-Henning Kamp 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
268df8bae1dSRodney W. Grimes 		goto done;
269df8bae1dSRodney W. Grimes 	auio.uio_resid = 0;
270df8bae1dSRodney W. Grimes 	for (i = 0; i < uap->iovcnt; i++) {
271069e9bc1SDoug Rabson 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
272df8bae1dSRodney W. Grimes 			error = EINVAL;
273df8bae1dSRodney W. Grimes 			goto done;
274df8bae1dSRodney W. Grimes 		}
275069e9bc1SDoug Rabson 		auio.uio_resid += iov->iov_len;
276df8bae1dSRodney W. Grimes 		iov++;
277df8bae1dSRodney W. Grimes 	}
278df8bae1dSRodney W. Grimes #ifdef KTRACE
279df8bae1dSRodney W. Grimes 	/*
280df8bae1dSRodney W. Grimes 	 * if tracing, save a copy of iovec
281df8bae1dSRodney W. Grimes 	 */
282df8bae1dSRodney W. Grimes 	if (KTRPOINT(p, KTR_GENIO))  {
283df8bae1dSRodney W. Grimes 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
284df8bae1dSRodney W. Grimes 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
28542ebfbf2SBrian Feldman 		ktruio = auio;
286df8bae1dSRodney W. Grimes 	}
287df8bae1dSRodney W. Grimes #endif
288df8bae1dSRodney W. Grimes 	cnt = auio.uio_resid;
289279d7226SMatthew Dillon 	if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) {
290df8bae1dSRodney W. Grimes 		if (auio.uio_resid != cnt && (error == ERESTART ||
291df8bae1dSRodney W. Grimes 		    error == EINTR || error == EWOULDBLOCK))
292df8bae1dSRodney W. Grimes 			error = 0;
293279d7226SMatthew Dillon 	}
294df8bae1dSRodney W. Grimes 	cnt -= auio.uio_resid;
295df8bae1dSRodney W. Grimes #ifdef KTRACE
296df8bae1dSRodney W. Grimes 	if (ktriov != NULL) {
29742ebfbf2SBrian Feldman 		if (error == 0) {
29842ebfbf2SBrian Feldman 			ktruio.uio_iov = ktriov;
29942ebfbf2SBrian Feldman 			ktruio.uio_resid = cnt;
30042ebfbf2SBrian Feldman 			ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio,
30142ebfbf2SBrian Feldman 			    error);
30242ebfbf2SBrian Feldman 		}
303df8bae1dSRodney W. Grimes 		FREE(ktriov, M_TEMP);
304df8bae1dSRodney W. Grimes 	}
305df8bae1dSRodney W. Grimes #endif
306cb226aaaSPoul-Henning Kamp 	p->p_retval[0] = cnt;
307df8bae1dSRodney W. Grimes done:
308279d7226SMatthew Dillon 	fdrop(fp, p);
309df8bae1dSRodney W. Grimes 	if (needfree)
310df8bae1dSRodney W. Grimes 		FREE(needfree, M_IOV);
311df8bae1dSRodney W. Grimes 	return (error);
312df8bae1dSRodney W. Grimes }
313df8bae1dSRodney W. Grimes 
314df8bae1dSRodney W. Grimes /*
315df8bae1dSRodney W. Grimes  * Write system call
316df8bae1dSRodney W. Grimes  */
317d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
318df8bae1dSRodney W. Grimes struct write_args {
319df8bae1dSRodney W. Grimes 	int	fd;
320134e06feSBruce Evans 	const void *buf;
321134e06feSBruce Evans 	size_t	nbyte;
322df8bae1dSRodney W. Grimes };
323d2d3e875SBruce Evans #endif
32426f9a767SRodney W. Grimes int
325cb226aaaSPoul-Henning Kamp write(p, uap)
326df8bae1dSRodney W. Grimes 	struct proc *p;
327df8bae1dSRodney W. Grimes 	register struct write_args *uap;
328df8bae1dSRodney W. Grimes {
329df8bae1dSRodney W. Grimes 	register struct file *fp;
330279d7226SMatthew Dillon 	int error;
331df8bae1dSRodney W. Grimes 
332279d7226SMatthew Dillon 	if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
333df8bae1dSRodney W. Grimes 		return (EBADF);
334279d7226SMatthew Dillon 	error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0);
335279d7226SMatthew Dillon 	fdrop(fp, p);
336279d7226SMatthew Dillon 	return(error);
337df8bae1dSRodney W. Grimes }
338df8bae1dSRodney W. Grimes 
339df8bae1dSRodney W. Grimes /*
3408fe387abSDmitrij Tejblum  * Pwrite system call
3414160ccd9SAlan Cox  */
3424160ccd9SAlan Cox #ifndef _SYS_SYSPROTO_H_
3434160ccd9SAlan Cox struct pwrite_args {
3444160ccd9SAlan Cox 	int	fd;
3454160ccd9SAlan Cox 	const void *buf;
3464160ccd9SAlan Cox 	size_t	nbyte;
3478fe387abSDmitrij Tejblum 	int	pad;
3484160ccd9SAlan Cox 	off_t	offset;
3494160ccd9SAlan Cox };
3504160ccd9SAlan Cox #endif
3514160ccd9SAlan Cox int
3524160ccd9SAlan Cox pwrite(p, uap)
3534160ccd9SAlan Cox 	struct proc *p;
3544160ccd9SAlan Cox 	register struct pwrite_args *uap;
3554160ccd9SAlan Cox {
3564160ccd9SAlan Cox 	register struct file *fp;
357279d7226SMatthew Dillon 	int error;
3588fe387abSDmitrij Tejblum 
359279d7226SMatthew Dillon 	if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL)
3608fe387abSDmitrij Tejblum 		return (EBADF);
361279d7226SMatthew Dillon 	if (fp->f_type != DTYPE_VNODE) {
362279d7226SMatthew Dillon 		error = ESPIPE;
363279d7226SMatthew Dillon 	} else {
364279d7226SMatthew Dillon 	    error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte,
365279d7226SMatthew Dillon 		uap->offset, FOF_OFFSET);
366279d7226SMatthew Dillon 	}
367279d7226SMatthew Dillon 	fdrop(fp, p);
368279d7226SMatthew Dillon 	return(error);
3698fe387abSDmitrij Tejblum }
3708fe387abSDmitrij Tejblum 
3718fe387abSDmitrij Tejblum static int
3728fe387abSDmitrij Tejblum dofilewrite(p, fp, fd, buf, nbyte, offset, flags)
3738fe387abSDmitrij Tejblum 	struct proc *p;
3748fe387abSDmitrij Tejblum 	struct file *fp;
3758fe387abSDmitrij Tejblum 	int fd, flags;
3768fe387abSDmitrij Tejblum 	const void *buf;
3778fe387abSDmitrij Tejblum 	size_t nbyte;
3788fe387abSDmitrij Tejblum 	off_t offset;
3798fe387abSDmitrij Tejblum {
3804160ccd9SAlan Cox 	struct uio auio;
3814160ccd9SAlan Cox 	struct iovec aiov;
3824160ccd9SAlan Cox 	long cnt, error = 0;
3834160ccd9SAlan Cox #ifdef KTRACE
3844160ccd9SAlan Cox 	struct iovec ktriov;
38542ebfbf2SBrian Feldman 	struct uio ktruio;
3863c89e357SBrian Feldman 	int didktr = 0;
3874160ccd9SAlan Cox #endif
3884160ccd9SAlan Cox 
389b31ae1adSPeter Wemm 	aiov.iov_base = (void *)(uintptr_t)buf;
3908fe387abSDmitrij Tejblum 	aiov.iov_len = nbyte;
3914160ccd9SAlan Cox 	auio.uio_iov = &aiov;
3924160ccd9SAlan Cox 	auio.uio_iovcnt = 1;
3938fe387abSDmitrij Tejblum 	auio.uio_offset = offset;
3948fe387abSDmitrij Tejblum 	if (nbyte > INT_MAX)
3954160ccd9SAlan Cox 		return (EINVAL);
3968fe387abSDmitrij Tejblum 	auio.uio_resid = nbyte;
3974160ccd9SAlan Cox 	auio.uio_rw = UIO_WRITE;
3984160ccd9SAlan Cox 	auio.uio_segflg = UIO_USERSPACE;
3994160ccd9SAlan Cox 	auio.uio_procp = p;
4004160ccd9SAlan Cox #ifdef KTRACE
4014160ccd9SAlan Cox 	/*
40242ebfbf2SBrian Feldman 	 * if tracing, save a copy of iovec and uio
4034160ccd9SAlan Cox 	 */
40442ebfbf2SBrian Feldman 	if (KTRPOINT(p, KTR_GENIO)) {
4054160ccd9SAlan Cox 		ktriov = aiov;
40642ebfbf2SBrian Feldman 		ktruio = auio;
4073c89e357SBrian Feldman 		didktr = 1;
40842ebfbf2SBrian Feldman 	}
4094160ccd9SAlan Cox #endif
4108fe387abSDmitrij Tejblum 	cnt = nbyte;
411c6ab5768SAlfred Perlstein 	if (fp->f_type == DTYPE_VNODE)
412279d7226SMatthew Dillon 		bwillwrite();
41313ccadd4SBrian Feldman 	if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) {
4144160ccd9SAlan Cox 		if (auio.uio_resid != cnt && (error == ERESTART ||
4154160ccd9SAlan Cox 		    error == EINTR || error == EWOULDBLOCK))
4164160ccd9SAlan Cox 			error = 0;
41719eb87d2SJohn Baldwin 		if (error == EPIPE) {
41819eb87d2SJohn Baldwin 			PROC_LOCK(p);
4194160ccd9SAlan Cox 			psignal(p, SIGPIPE);
42019eb87d2SJohn Baldwin 			PROC_UNLOCK(p);
42119eb87d2SJohn Baldwin 		}
4224160ccd9SAlan Cox 	}
4234160ccd9SAlan Cox 	cnt -= auio.uio_resid;
4244160ccd9SAlan Cox #ifdef KTRACE
4253c89e357SBrian Feldman 	if (didktr && error == 0) {
42642ebfbf2SBrian Feldman 		ktruio.uio_iov = &ktriov;
42742ebfbf2SBrian Feldman 		ktruio.uio_resid = cnt;
42842ebfbf2SBrian Feldman 		ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error);
42942ebfbf2SBrian Feldman 	}
4304160ccd9SAlan Cox #endif
4314160ccd9SAlan Cox 	p->p_retval[0] = cnt;
4324160ccd9SAlan Cox 	return (error);
4334160ccd9SAlan Cox }
4344160ccd9SAlan Cox 
4354160ccd9SAlan Cox /*
436df8bae1dSRodney W. Grimes  * Gather write system call
437df8bae1dSRodney W. Grimes  */
438d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
439df8bae1dSRodney W. Grimes struct writev_args {
440df8bae1dSRodney W. Grimes 	int	fd;
441df8bae1dSRodney W. Grimes 	struct	iovec *iovp;
442df8bae1dSRodney W. Grimes 	u_int	iovcnt;
443df8bae1dSRodney W. Grimes };
444d2d3e875SBruce Evans #endif
44526f9a767SRodney W. Grimes int
446cb226aaaSPoul-Henning Kamp writev(p, uap)
447df8bae1dSRodney W. Grimes 	struct proc *p;
448df8bae1dSRodney W. Grimes 	register struct writev_args *uap;
449df8bae1dSRodney W. Grimes {
450df8bae1dSRodney W. Grimes 	register struct file *fp;
451df8bae1dSRodney W. Grimes 	register struct filedesc *fdp = p->p_fd;
452df8bae1dSRodney W. Grimes 	struct uio auio;
453df8bae1dSRodney W. Grimes 	register struct iovec *iov;
454df8bae1dSRodney W. Grimes 	struct iovec *needfree;
455df8bae1dSRodney W. Grimes 	struct iovec aiov[UIO_SMALLIOV];
456df8bae1dSRodney W. Grimes 	long i, cnt, error = 0;
457df8bae1dSRodney W. Grimes 	u_int iovlen;
458df8bae1dSRodney W. Grimes #ifdef KTRACE
459df8bae1dSRodney W. Grimes 	struct iovec *ktriov = NULL;
46042ebfbf2SBrian Feldman 	struct uio ktruio;
461df8bae1dSRodney W. Grimes #endif
462df8bae1dSRodney W. Grimes 
463279d7226SMatthew Dillon 	if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL)
464df8bae1dSRodney W. Grimes 		return (EBADF);
465df8bae1dSRodney W. Grimes 	/* note: can't use iovlen until iovcnt is validated */
466df8bae1dSRodney W. Grimes 	iovlen = uap->iovcnt * sizeof (struct iovec);
467df8bae1dSRodney W. Grimes 	if (uap->iovcnt > UIO_SMALLIOV) {
4681aa3e7ddSBrian Feldman 		if (uap->iovcnt > UIO_MAXIOV) {
4691aa3e7ddSBrian Feldman 			needfree = NULL;
4701aa3e7ddSBrian Feldman 			error = EINVAL;
4711aa3e7ddSBrian Feldman 			goto done;
4721aa3e7ddSBrian Feldman 		}
473df8bae1dSRodney W. Grimes 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
474df8bae1dSRodney W. Grimes 		needfree = iov;
475df8bae1dSRodney W. Grimes 	} else {
476df8bae1dSRodney W. Grimes 		iov = aiov;
477df8bae1dSRodney W. Grimes 		needfree = NULL;
478df8bae1dSRodney W. Grimes 	}
479df8bae1dSRodney W. Grimes 	auio.uio_iov = iov;
480df8bae1dSRodney W. Grimes 	auio.uio_iovcnt = uap->iovcnt;
481df8bae1dSRodney W. Grimes 	auio.uio_rw = UIO_WRITE;
482df8bae1dSRodney W. Grimes 	auio.uio_segflg = UIO_USERSPACE;
483df8bae1dSRodney W. Grimes 	auio.uio_procp = p;
4842c1011f7SJohn Dyson 	auio.uio_offset = -1;
485bb56ec4aSPoul-Henning Kamp 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
486df8bae1dSRodney W. Grimes 		goto done;
487df8bae1dSRodney W. Grimes 	auio.uio_resid = 0;
488df8bae1dSRodney W. Grimes 	for (i = 0; i < uap->iovcnt; i++) {
489069e9bc1SDoug Rabson 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
490df8bae1dSRodney W. Grimes 			error = EINVAL;
491df8bae1dSRodney W. Grimes 			goto done;
492df8bae1dSRodney W. Grimes 		}
493069e9bc1SDoug Rabson 		auio.uio_resid += iov->iov_len;
494df8bae1dSRodney W. Grimes 		iov++;
495df8bae1dSRodney W. Grimes 	}
496df8bae1dSRodney W. Grimes #ifdef KTRACE
497df8bae1dSRodney W. Grimes 	/*
49842ebfbf2SBrian Feldman 	 * if tracing, save a copy of iovec and uio
499df8bae1dSRodney W. Grimes 	 */
500df8bae1dSRodney W. Grimes 	if (KTRPOINT(p, KTR_GENIO))  {
501df8bae1dSRodney W. Grimes 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
502df8bae1dSRodney W. Grimes 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
50342ebfbf2SBrian Feldman 		ktruio = auio;
504df8bae1dSRodney W. Grimes 	}
505df8bae1dSRodney W. Grimes #endif
506df8bae1dSRodney W. Grimes 	cnt = auio.uio_resid;
507a41ce5d3SMatthew Dillon 	if (fp->f_type == DTYPE_VNODE)
5089440653dSMatthew Dillon 		bwillwrite();
50913ccadd4SBrian Feldman 	if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) {
510df8bae1dSRodney W. Grimes 		if (auio.uio_resid != cnt && (error == ERESTART ||
511df8bae1dSRodney W. Grimes 		    error == EINTR || error == EWOULDBLOCK))
512df8bae1dSRodney W. Grimes 			error = 0;
51319eb87d2SJohn Baldwin 		if (error == EPIPE) {
51419eb87d2SJohn Baldwin 			PROC_LOCK(p);
515df8bae1dSRodney W. Grimes 			psignal(p, SIGPIPE);
51619eb87d2SJohn Baldwin 			PROC_UNLOCK(p);
51719eb87d2SJohn Baldwin 		}
518df8bae1dSRodney W. Grimes 	}
519df8bae1dSRodney W. Grimes 	cnt -= auio.uio_resid;
520df8bae1dSRodney W. Grimes #ifdef KTRACE
521df8bae1dSRodney W. Grimes 	if (ktriov != NULL) {
52242ebfbf2SBrian Feldman 		if (error == 0) {
52342ebfbf2SBrian Feldman 			ktruio.uio_iov = ktriov;
52442ebfbf2SBrian Feldman 			ktruio.uio_resid = cnt;
52542ebfbf2SBrian Feldman 			ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio,
52642ebfbf2SBrian Feldman 			    error);
52742ebfbf2SBrian Feldman 		}
528df8bae1dSRodney W. Grimes 		FREE(ktriov, M_TEMP);
529df8bae1dSRodney W. Grimes 	}
530df8bae1dSRodney W. Grimes #endif
531cb226aaaSPoul-Henning Kamp 	p->p_retval[0] = cnt;
532df8bae1dSRodney W. Grimes done:
533d8177437SBrian Feldman 	fdrop(fp, p);
534df8bae1dSRodney W. Grimes 	if (needfree)
535df8bae1dSRodney W. Grimes 		FREE(needfree, M_IOV);
536df8bae1dSRodney W. Grimes 	return (error);
537df8bae1dSRodney W. Grimes }
538df8bae1dSRodney W. Grimes 
539df8bae1dSRodney W. Grimes /*
540df8bae1dSRodney W. Grimes  * Ioctl system call
541df8bae1dSRodney W. Grimes  */
542d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
543df8bae1dSRodney W. Grimes struct ioctl_args {
544df8bae1dSRodney W. Grimes 	int	fd;
545069e9bc1SDoug Rabson 	u_long	com;
546df8bae1dSRodney W. Grimes 	caddr_t	data;
547df8bae1dSRodney W. Grimes };
548d2d3e875SBruce Evans #endif
549df8bae1dSRodney W. Grimes /* ARGSUSED */
55026f9a767SRodney W. Grimes int
551cb226aaaSPoul-Henning Kamp ioctl(p, uap)
552df8bae1dSRodney W. Grimes 	struct proc *p;
553df8bae1dSRodney W. Grimes 	register struct ioctl_args *uap;
554df8bae1dSRodney W. Grimes {
555df8bae1dSRodney W. Grimes 	register struct file *fp;
556df8bae1dSRodney W. Grimes 	register struct filedesc *fdp;
557831b9ef2SDoug Rabson 	register u_long com;
558831b9ef2SDoug Rabson 	int error;
559df8bae1dSRodney W. Grimes 	register u_int size;
560df8bae1dSRodney W. Grimes 	caddr_t data, memp;
561df8bae1dSRodney W. Grimes 	int tmp;
562df8bae1dSRodney W. Grimes #define STK_PARAMS	128
563d2ba455cSMatthew Dillon 	union {
564df8bae1dSRodney W. Grimes 	    char stkbuf[STK_PARAMS];
565d2ba455cSMatthew Dillon 	    long align;
566d2ba455cSMatthew Dillon 	} ubuf;
567df8bae1dSRodney W. Grimes 
568df8bae1dSRodney W. Grimes 	fdp = p->p_fd;
569df8bae1dSRodney W. Grimes 	if ((u_int)uap->fd >= fdp->fd_nfiles ||
570df8bae1dSRodney W. Grimes 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
571df8bae1dSRodney W. Grimes 		return (EBADF);
572df8bae1dSRodney W. Grimes 
573df8bae1dSRodney W. Grimes 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
574df8bae1dSRodney W. Grimes 		return (EBADF);
575df8bae1dSRodney W. Grimes 
576df8bae1dSRodney W. Grimes 	switch (com = uap->com) {
577df8bae1dSRodney W. Grimes 	case FIONCLEX:
578df8bae1dSRodney W. Grimes 		fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
579df8bae1dSRodney W. Grimes 		return (0);
580df8bae1dSRodney W. Grimes 	case FIOCLEX:
581df8bae1dSRodney W. Grimes 		fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
582df8bae1dSRodney W. Grimes 		return (0);
583df8bae1dSRodney W. Grimes 	}
584df8bae1dSRodney W. Grimes 
585df8bae1dSRodney W. Grimes 	/*
586df8bae1dSRodney W. Grimes 	 * Interpret high order word to find amount of data to be
587df8bae1dSRodney W. Grimes 	 * copied to/from the user's address space.
588df8bae1dSRodney W. Grimes 	 */
589df8bae1dSRodney W. Grimes 	size = IOCPARM_LEN(com);
590df8bae1dSRodney W. Grimes 	if (size > IOCPARM_MAX)
591df8bae1dSRodney W. Grimes 		return (ENOTTY);
592279d7226SMatthew Dillon 
593279d7226SMatthew Dillon 	fhold(fp);
594279d7226SMatthew Dillon 
595df8bae1dSRodney W. Grimes 	memp = NULL;
596d2ba455cSMatthew Dillon 	if (size > sizeof (ubuf.stkbuf)) {
597df8bae1dSRodney W. Grimes 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
598df8bae1dSRodney W. Grimes 		data = memp;
599279d7226SMatthew Dillon 	} else {
600d2ba455cSMatthew Dillon 		data = ubuf.stkbuf;
601279d7226SMatthew Dillon 	}
602df8bae1dSRodney W. Grimes 	if (com&IOC_IN) {
603df8bae1dSRodney W. Grimes 		if (size) {
604df8bae1dSRodney W. Grimes 			error = copyin(uap->data, data, (u_int)size);
605df8bae1dSRodney W. Grimes 			if (error) {
606df8bae1dSRodney W. Grimes 				if (memp)
607df8bae1dSRodney W. Grimes 					free(memp, M_IOCTLOPS);
608279d7226SMatthew Dillon 				fdrop(fp, p);
609df8bae1dSRodney W. Grimes 				return (error);
610df8bae1dSRodney W. Grimes 			}
611279d7226SMatthew Dillon 		} else {
612df8bae1dSRodney W. Grimes 			*(caddr_t *)data = uap->data;
613279d7226SMatthew Dillon 		}
614279d7226SMatthew Dillon 	} else if ((com&IOC_OUT) && size) {
615df8bae1dSRodney W. Grimes 		/*
616df8bae1dSRodney W. Grimes 		 * Zero the buffer so the user always
617df8bae1dSRodney W. Grimes 		 * gets back something deterministic.
618df8bae1dSRodney W. Grimes 		 */
619df8bae1dSRodney W. Grimes 		bzero(data, size);
620279d7226SMatthew Dillon 	} else if (com&IOC_VOID) {
621df8bae1dSRodney W. Grimes 		*(caddr_t *)data = uap->data;
622279d7226SMatthew Dillon 	}
623df8bae1dSRodney W. Grimes 
624df8bae1dSRodney W. Grimes 	switch (com) {
625df8bae1dSRodney W. Grimes 
626df8bae1dSRodney W. Grimes 	case FIONBIO:
627bb56ec4aSPoul-Henning Kamp 		if ((tmp = *(int *)data))
628df8bae1dSRodney W. Grimes 			fp->f_flag |= FNONBLOCK;
629df8bae1dSRodney W. Grimes 		else
630df8bae1dSRodney W. Grimes 			fp->f_flag &= ~FNONBLOCK;
63113ccadd4SBrian Feldman 		error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p);
632df8bae1dSRodney W. Grimes 		break;
633df8bae1dSRodney W. Grimes 
634df8bae1dSRodney W. Grimes 	case FIOASYNC:
635bb56ec4aSPoul-Henning Kamp 		if ((tmp = *(int *)data))
636df8bae1dSRodney W. Grimes 			fp->f_flag |= FASYNC;
637df8bae1dSRodney W. Grimes 		else
638df8bae1dSRodney W. Grimes 			fp->f_flag &= ~FASYNC;
63913ccadd4SBrian Feldman 		error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p);
640df8bae1dSRodney W. Grimes 		break;
641df8bae1dSRodney W. Grimes 
642df8bae1dSRodney W. Grimes 	default:
64313ccadd4SBrian Feldman 		error = fo_ioctl(fp, com, data, p);
644df8bae1dSRodney W. Grimes 		/*
645df8bae1dSRodney W. Grimes 		 * Copy any data to user, size was
646df8bae1dSRodney W. Grimes 		 * already set and checked above.
647df8bae1dSRodney W. Grimes 		 */
648df8bae1dSRodney W. Grimes 		if (error == 0 && (com&IOC_OUT) && size)
649df8bae1dSRodney W. Grimes 			error = copyout(data, uap->data, (u_int)size);
650df8bae1dSRodney W. Grimes 		break;
651df8bae1dSRodney W. Grimes 	}
652df8bae1dSRodney W. Grimes 	if (memp)
653df8bae1dSRodney W. Grimes 		free(memp, M_IOCTLOPS);
654279d7226SMatthew Dillon 	fdrop(fp, p);
655df8bae1dSRodney W. Grimes 	return (error);
656df8bae1dSRodney W. Grimes }
657df8bae1dSRodney W. Grimes 
6588cb96f20SPeter Wemm static int	nselcoll;	/* Select collisions since boot */
659265fc98fSSeigo Tanimura struct cv	selwait;
6608cb96f20SPeter Wemm SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
661df8bae1dSRodney W. Grimes 
662df8bae1dSRodney W. Grimes /*
663df8bae1dSRodney W. Grimes  * Select system call.
664df8bae1dSRodney W. Grimes  */
665d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
666df8bae1dSRodney W. Grimes struct select_args {
667b08f7993SSujal Patel 	int	nd;
668df8bae1dSRodney W. Grimes 	fd_set	*in, *ou, *ex;
669df8bae1dSRodney W. Grimes 	struct	timeval *tv;
670df8bae1dSRodney W. Grimes };
671d2d3e875SBruce Evans #endif
67226f9a767SRodney W. Grimes int
673cb226aaaSPoul-Henning Kamp select(p, uap)
674df8bae1dSRodney W. Grimes 	register struct proc *p;
675df8bae1dSRodney W. Grimes 	register struct select_args *uap;
676df8bae1dSRodney W. Grimes {
677d5e4d7e1SBruce Evans 	/*
678d5e4d7e1SBruce Evans 	 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
679d5e4d7e1SBruce Evans 	 * infds with the new FD_SETSIZE of 1024, and more than enough for
680d5e4d7e1SBruce Evans 	 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
681d5e4d7e1SBruce Evans 	 * of 256.
682d5e4d7e1SBruce Evans 	 */
683d5e4d7e1SBruce Evans 	fd_mask s_selbits[howmany(2048, NFDBITS)];
684265fc98fSSeigo Tanimura 	fd_mask s_heldbits[howmany(2048, NFDBITS)];
685265fc98fSSeigo Tanimura 	fd_mask *ibits[3], *obits[3], *selbits, *sbp, *heldbits, *hibits, *hobits;
68600af9731SPoul-Henning Kamp 	struct timeval atv, rtv, ttv;
687265fc98fSSeigo Tanimura 	int ncoll, error, timo, i;
688d5e4d7e1SBruce Evans 	u_int nbufbytes, ncpbytes, nfdbits;
689df8bae1dSRodney W. Grimes 
690b08f7993SSujal Patel 	if (uap->nd < 0)
691acbfbfeaSSujal Patel 		return (EINVAL);
692df8bae1dSRodney W. Grimes 	if (uap->nd > p->p_fd->fd_nfiles)
693df8bae1dSRodney W. Grimes 		uap->nd = p->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
694b08f7993SSujal Patel 
695d5e4d7e1SBruce Evans 	/*
696d5e4d7e1SBruce Evans 	 * Allocate just enough bits for the non-null fd_sets.  Use the
697d5e4d7e1SBruce Evans 	 * preallocated auto buffer if possible.
698d5e4d7e1SBruce Evans 	 */
699d5e4d7e1SBruce Evans 	nfdbits = roundup(uap->nd, NFDBITS);
700d5e4d7e1SBruce Evans 	ncpbytes = nfdbits / NBBY;
701d5e4d7e1SBruce Evans 	nbufbytes = 0;
702d5e4d7e1SBruce Evans 	if (uap->in != NULL)
703d5e4d7e1SBruce Evans 		nbufbytes += 2 * ncpbytes;
704d5e4d7e1SBruce Evans 	if (uap->ou != NULL)
705d5e4d7e1SBruce Evans 		nbufbytes += 2 * ncpbytes;
706d5e4d7e1SBruce Evans 	if (uap->ex != NULL)
707d5e4d7e1SBruce Evans 		nbufbytes += 2 * ncpbytes;
708d5e4d7e1SBruce Evans 	if (nbufbytes <= sizeof s_selbits)
709d5e4d7e1SBruce Evans 		selbits = &s_selbits[0];
710d5e4d7e1SBruce Evans 	else
711d5e4d7e1SBruce Evans 		selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
712265fc98fSSeigo Tanimura 	if (2 * ncpbytes <= sizeof s_heldbits) {
713265fc98fSSeigo Tanimura 		bzero(s_heldbits, sizeof(s_heldbits));
714265fc98fSSeigo Tanimura 		heldbits = &s_heldbits[0];
715265fc98fSSeigo Tanimura 	} else
716265fc98fSSeigo Tanimura 		heldbits = malloc(2 * ncpbytes, M_SELECT, M_WAITOK | M_ZERO);
717b08f7993SSujal Patel 
718b08f7993SSujal Patel 	/*
719d5e4d7e1SBruce Evans 	 * Assign pointers into the bit buffers and fetch the input bits.
720d5e4d7e1SBruce Evans 	 * Put the output buffers together so that they can be bzeroed
721d5e4d7e1SBruce Evans 	 * together.
722b08f7993SSujal Patel 	 */
723d5e4d7e1SBruce Evans 	sbp = selbits;
724265fc98fSSeigo Tanimura 	hibits = heldbits + ncpbytes / sizeof *heldbits;
725265fc98fSSeigo Tanimura 	hobits = heldbits;
726df8bae1dSRodney W. Grimes #define	getbits(name, x) \
727d5e4d7e1SBruce Evans 	do {								\
728d5e4d7e1SBruce Evans 		if (uap->name == NULL)					\
729d5e4d7e1SBruce Evans 			ibits[x] = NULL;				\
730d5e4d7e1SBruce Evans 		else {							\
731d5e4d7e1SBruce Evans 			ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;	\
732d5e4d7e1SBruce Evans 			obits[x] = sbp;					\
733d5e4d7e1SBruce Evans 			sbp += ncpbytes / sizeof *sbp;			\
734d5e4d7e1SBruce Evans 			error = copyin(uap->name, ibits[x], ncpbytes);	\
735265fc98fSSeigo Tanimura 			if (error != 0)					\
736265fc98fSSeigo Tanimura 				goto done_noproclock;			\
737265fc98fSSeigo Tanimura 			for (i = 0;					\
738265fc98fSSeigo Tanimura 			     i < ncpbytes / sizeof ibits[i][0];		\
739265fc98fSSeigo Tanimura 			     i++)					\
740265fc98fSSeigo Tanimura 				hibits[i] |= ibits[x][i];		\
741e04ac2feSJohn Baldwin 		}							\
742d5e4d7e1SBruce Evans 	} while (0)
743df8bae1dSRodney W. Grimes 	getbits(in, 0);
744df8bae1dSRodney W. Grimes 	getbits(ou, 1);
745df8bae1dSRodney W. Grimes 	getbits(ex, 2);
746df8bae1dSRodney W. Grimes #undef	getbits
747d5e4d7e1SBruce Evans 	if (nbufbytes != 0)
748d5e4d7e1SBruce Evans 		bzero(selbits, nbufbytes / 2);
749df8bae1dSRodney W. Grimes 
750df8bae1dSRodney W. Grimes 	if (uap->tv) {
751df8bae1dSRodney W. Grimes 		error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
752df8bae1dSRodney W. Grimes 			sizeof (atv));
753265fc98fSSeigo Tanimura 		if (error)
754265fc98fSSeigo Tanimura 			goto done_noproclock;
755df8bae1dSRodney W. Grimes 		if (itimerfix(&atv)) {
756df8bae1dSRodney W. Grimes 			error = EINVAL;
757265fc98fSSeigo Tanimura 			goto done_noproclock;
758df8bae1dSRodney W. Grimes 		}
759c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
76000af9731SPoul-Henning Kamp 		timevaladd(&atv, &rtv);
7619c386f6bSJohn Baldwin 	} else {
76200af9731SPoul-Henning Kamp 		atv.tv_sec = 0;
7639c386f6bSJohn Baldwin 		atv.tv_usec = 0;
7649c386f6bSJohn Baldwin 	}
765265fc98fSSeigo Tanimura 	selholddrop(p, hibits, hobits, uap->nd, 1);
76600af9731SPoul-Henning Kamp 	timo = 0;
767e04ac2feSJohn Baldwin 	PROC_LOCK(p);
768df8bae1dSRodney W. Grimes retry:
769df8bae1dSRodney W. Grimes 	ncoll = nselcoll;
770df8bae1dSRodney W. Grimes 	p->p_flag |= P_SELECT;
771cb226aaaSPoul-Henning Kamp 	error = selscan(p, ibits, obits, uap->nd);
772cb226aaaSPoul-Henning Kamp 	if (error || p->p_retval[0])
773df8bae1dSRodney W. Grimes 		goto done;
7744da144c0SJohn Baldwin 	if (atv.tv_sec || atv.tv_usec) {
775c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
77600af9731SPoul-Henning Kamp 		if (timevalcmp(&rtv, &atv, >=))
777df8bae1dSRodney W. Grimes 			goto done;
77800af9731SPoul-Henning Kamp 		ttv = atv;
77900af9731SPoul-Henning Kamp 		timevalsub(&ttv, &rtv);
78000af9731SPoul-Henning Kamp 		timo = ttv.tv_sec > 24 * 60 * 60 ?
78100af9731SPoul-Henning Kamp 		    24 * 60 * 60 * hz : tvtohz(&ttv);
782df8bae1dSRodney W. Grimes 	}
783df8bae1dSRodney W. Grimes 	p->p_flag &= ~P_SELECT;
784bfbbc4aaSJason Evans 
785265fc98fSSeigo Tanimura 	if (timo > 0)
786265fc98fSSeigo Tanimura 		error = cv_timedwait_sig(&selwait, &p->p_mtx, timo);
787265fc98fSSeigo Tanimura 	else
788265fc98fSSeigo Tanimura 		error = cv_wait_sig(&selwait, &p->p_mtx);
789bfbbc4aaSJason Evans 
790df8bae1dSRodney W. Grimes 	if (error == 0)
791df8bae1dSRodney W. Grimes 		goto retry;
792265fc98fSSeigo Tanimura 
793df8bae1dSRodney W. Grimes done:
794df8bae1dSRodney W. Grimes 	p->p_flag &= ~P_SELECT;
795e04ac2feSJohn Baldwin 	PROC_UNLOCK(p);
796265fc98fSSeigo Tanimura 	selholddrop(p, hibits, hobits, uap->nd, 0);
797265fc98fSSeigo Tanimura done_noproclock:
798df8bae1dSRodney W. Grimes 	/* select is not restarted after signals... */
799df8bae1dSRodney W. Grimes 	if (error == ERESTART)
800df8bae1dSRodney W. Grimes 		error = EINTR;
801df8bae1dSRodney W. Grimes 	if (error == EWOULDBLOCK)
802df8bae1dSRodney W. Grimes 		error = 0;
803df8bae1dSRodney W. Grimes #define	putbits(name, x) \
804d5e4d7e1SBruce Evans 	if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
805df8bae1dSRodney W. Grimes 		error = error2;
806df8bae1dSRodney W. Grimes 	if (error == 0) {
807df8bae1dSRodney W. Grimes 		int error2;
808df8bae1dSRodney W. Grimes 
809df8bae1dSRodney W. Grimes 		putbits(in, 0);
810df8bae1dSRodney W. Grimes 		putbits(ou, 1);
811df8bae1dSRodney W. Grimes 		putbits(ex, 2);
812df8bae1dSRodney W. Grimes #undef putbits
813df8bae1dSRodney W. Grimes 	}
814d5e4d7e1SBruce Evans 	if (selbits != &s_selbits[0])
815d5e4d7e1SBruce Evans 		free(selbits, M_SELECT);
816265fc98fSSeigo Tanimura 	if (heldbits != &s_heldbits[0])
817265fc98fSSeigo Tanimura 		free(heldbits, M_SELECT);
818df8bae1dSRodney W. Grimes 	return (error);
819df8bae1dSRodney W. Grimes }
820df8bae1dSRodney W. Grimes 
82187b6de2bSPoul-Henning Kamp static int
822265fc98fSSeigo Tanimura selholddrop(p, ibits, obits, nfd, hold)
823265fc98fSSeigo Tanimura 	struct proc *p;
824265fc98fSSeigo Tanimura 	fd_mask *ibits, *obits;
825265fc98fSSeigo Tanimura 	int nfd, hold;
826265fc98fSSeigo Tanimura {
827265fc98fSSeigo Tanimura 	struct filedesc *fdp = p->p_fd;
828265fc98fSSeigo Tanimura 	int i, fd;
829265fc98fSSeigo Tanimura 	fd_mask bits;
830265fc98fSSeigo Tanimura 	struct file *fp;
831265fc98fSSeigo Tanimura 
832265fc98fSSeigo Tanimura 	for (i = 0; i < nfd; i += NFDBITS) {
833265fc98fSSeigo Tanimura 		if (hold)
834265fc98fSSeigo Tanimura 			bits = ibits[i/NFDBITS];
835265fc98fSSeigo Tanimura 		else
836265fc98fSSeigo Tanimura 			bits = obits[i/NFDBITS];
837265fc98fSSeigo Tanimura 		/* ffs(int mask) not portable, fd_mask is long */
838265fc98fSSeigo Tanimura 		for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
839265fc98fSSeigo Tanimura 			if (!(bits & 1))
840265fc98fSSeigo Tanimura 				continue;
841265fc98fSSeigo Tanimura 			fp = fdp->fd_ofiles[fd];
842265fc98fSSeigo Tanimura 			if (fp == NULL)
843265fc98fSSeigo Tanimura 				return (EBADF);
844265fc98fSSeigo Tanimura 			if (hold) {
845265fc98fSSeigo Tanimura 				fhold(fp);
846265fc98fSSeigo Tanimura 				obits[(fd)/NFDBITS] |=
847265fc98fSSeigo Tanimura 				    ((fd_mask)1 << ((fd) % NFDBITS));
848265fc98fSSeigo Tanimura 			} else
849265fc98fSSeigo Tanimura 				fdrop(fp, p);
850265fc98fSSeigo Tanimura 		}
851265fc98fSSeigo Tanimura 	}
852265fc98fSSeigo Tanimura 	return (0);
853265fc98fSSeigo Tanimura }
854265fc98fSSeigo Tanimura 
855265fc98fSSeigo Tanimura static int
856cb226aaaSPoul-Henning Kamp selscan(p, ibits, obits, nfd)
857df8bae1dSRodney W. Grimes 	struct proc *p;
858b08f7993SSujal Patel 	fd_mask **ibits, **obits;
859cb226aaaSPoul-Henning Kamp 	int nfd;
860df8bae1dSRodney W. Grimes {
861f082218cSPeter Wemm 	struct filedesc *fdp = p->p_fd;
862f082218cSPeter Wemm 	int msk, i, fd;
863f082218cSPeter Wemm 	fd_mask bits;
864df8bae1dSRodney W. Grimes 	struct file *fp;
865df8bae1dSRodney W. Grimes 	int n = 0;
8662087c896SBruce Evans 	/* Note: backend also returns POLLHUP/POLLERR if appropriate. */
86742d11757SPeter Wemm 	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
868df8bae1dSRodney W. Grimes 
869df8bae1dSRodney W. Grimes 	for (msk = 0; msk < 3; msk++) {
870d5e4d7e1SBruce Evans 		if (ibits[msk] == NULL)
871d5e4d7e1SBruce Evans 			continue;
872df8bae1dSRodney W. Grimes 		for (i = 0; i < nfd; i += NFDBITS) {
873b08f7993SSujal Patel 			bits = ibits[msk][i/NFDBITS];
874f082218cSPeter Wemm 			/* ffs(int mask) not portable, fd_mask is long */
875f082218cSPeter Wemm 			for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
876f082218cSPeter Wemm 				if (!(bits & 1))
877f082218cSPeter Wemm 					continue;
878df8bae1dSRodney W. Grimes 				fp = fdp->fd_ofiles[fd];
879df8bae1dSRodney W. Grimes 				if (fp == NULL)
880df8bae1dSRodney W. Grimes 					return (EBADF);
88113ccadd4SBrian Feldman 				if (fo_poll(fp, flag[msk], fp->f_cred, p)) {
882b08f7993SSujal Patel 					obits[msk][(fd)/NFDBITS] |=
883f082218cSPeter Wemm 					    ((fd_mask)1 << ((fd) % NFDBITS));
884df8bae1dSRodney W. Grimes 					n++;
885df8bae1dSRodney W. Grimes 				}
886df8bae1dSRodney W. Grimes 			}
887df8bae1dSRodney W. Grimes 		}
888df8bae1dSRodney W. Grimes 	}
889cb226aaaSPoul-Henning Kamp 	p->p_retval[0] = n;
890df8bae1dSRodney W. Grimes 	return (0);
891df8bae1dSRodney W. Grimes }
892df8bae1dSRodney W. Grimes 
89342d11757SPeter Wemm /*
89442d11757SPeter Wemm  * Poll system call.
89542d11757SPeter Wemm  */
89642d11757SPeter Wemm #ifndef _SYS_SYSPROTO_H_
89742d11757SPeter Wemm struct poll_args {
89842d11757SPeter Wemm 	struct pollfd *fds;
89942d11757SPeter Wemm 	u_int	nfds;
90042d11757SPeter Wemm 	int	timeout;
90142d11757SPeter Wemm };
90242d11757SPeter Wemm #endif
90342d11757SPeter Wemm int
904cb226aaaSPoul-Henning Kamp poll(p, uap)
905ea0237edSJonathan Lemon 	struct proc *p;
906ea0237edSJonathan Lemon 	struct poll_args *uap;
90742d11757SPeter Wemm {
90842d11757SPeter Wemm 	caddr_t bits;
90942d11757SPeter Wemm 	char smallbits[32 * sizeof(struct pollfd)];
91000af9731SPoul-Henning Kamp 	struct timeval atv, rtv, ttv;
911265fc98fSSeigo Tanimura 	int ncoll, error = 0, timo;
912ea0237edSJonathan Lemon 	u_int nfds;
91342d11757SPeter Wemm 	size_t ni;
914265fc98fSSeigo Tanimura 	struct pollfd p_heldbits[32];
915265fc98fSSeigo Tanimura 	struct pollfd *heldbits;
91642d11757SPeter Wemm 
91789b71647SPeter Wemm 	nfds = SCARG(uap, nfds);
91889b71647SPeter Wemm 	/*
9192bd5ac33SPeter Wemm 	 * This is kinda bogus.  We have fd limits, but that is not
9202bd5ac33SPeter Wemm 	 * really related to the size of the pollfd array.  Make sure
9212bd5ac33SPeter Wemm 	 * we let the process use at least FD_SETSIZE entries and at
9222bd5ac33SPeter Wemm 	 * least enough for the current limits.  We want to be reasonably
9232bd5ac33SPeter Wemm 	 * safe, but not overly restrictive.
92489b71647SPeter Wemm 	 */
925ea0237edSJonathan Lemon 	if (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE)
92689b71647SPeter Wemm 		return (EINVAL);
92789b71647SPeter Wemm 	ni = nfds * sizeof(struct pollfd);
92842d11757SPeter Wemm 	if (ni > sizeof(smallbits))
92942d11757SPeter Wemm 		bits = malloc(ni, M_TEMP, M_WAITOK);
93042d11757SPeter Wemm 	else
93142d11757SPeter Wemm 		bits = smallbits;
932265fc98fSSeigo Tanimura 	if (ni > sizeof(p_heldbits))
933265fc98fSSeigo Tanimura 		heldbits = malloc(ni, M_TEMP, M_WAITOK);
934265fc98fSSeigo Tanimura 	else {
935265fc98fSSeigo Tanimura 		bzero(p_heldbits, sizeof(p_heldbits));
936265fc98fSSeigo Tanimura 		heldbits = p_heldbits;
937265fc98fSSeigo Tanimura 	}
93842d11757SPeter Wemm 	error = copyin(SCARG(uap, fds), bits, ni);
93942d11757SPeter Wemm 	if (error)
940265fc98fSSeigo Tanimura 		goto done_noproclock;
941265fc98fSSeigo Tanimura 	bcopy(bits, heldbits, ni);
94242d11757SPeter Wemm 	if (SCARG(uap, timeout) != INFTIM) {
94342d11757SPeter Wemm 		atv.tv_sec = SCARG(uap, timeout) / 1000;
94442d11757SPeter Wemm 		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
94542d11757SPeter Wemm 		if (itimerfix(&atv)) {
94642d11757SPeter Wemm 			error = EINVAL;
947265fc98fSSeigo Tanimura 			goto done_noproclock;
94842d11757SPeter Wemm 		}
949c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
95000af9731SPoul-Henning Kamp 		timevaladd(&atv, &rtv);
9519c386f6bSJohn Baldwin 	} else {
95200af9731SPoul-Henning Kamp 		atv.tv_sec = 0;
9539c386f6bSJohn Baldwin 		atv.tv_usec = 0;
9549c386f6bSJohn Baldwin 	}
955265fc98fSSeigo Tanimura 	pollholddrop(p, heldbits, nfds, 1);
95600af9731SPoul-Henning Kamp 	timo = 0;
957265fc98fSSeigo Tanimura 	PROC_LOCK(p);
95842d11757SPeter Wemm retry:
95942d11757SPeter Wemm 	ncoll = nselcoll;
96042d11757SPeter Wemm 	p->p_flag |= P_SELECT;
96189b71647SPeter Wemm 	error = pollscan(p, (struct pollfd *)bits, nfds);
962cb226aaaSPoul-Henning Kamp 	if (error || p->p_retval[0])
96342d11757SPeter Wemm 		goto done;
9644da144c0SJohn Baldwin 	if (atv.tv_sec || atv.tv_usec) {
965c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
96600af9731SPoul-Henning Kamp 		if (timevalcmp(&rtv, &atv, >=))
96742d11757SPeter Wemm 			goto done;
96800af9731SPoul-Henning Kamp 		ttv = atv;
96900af9731SPoul-Henning Kamp 		timevalsub(&ttv, &rtv);
97000af9731SPoul-Henning Kamp 		timo = ttv.tv_sec > 24 * 60 * 60 ?
97100af9731SPoul-Henning Kamp 		    24 * 60 * 60 * hz : tvtohz(&ttv);
97242d11757SPeter Wemm 	}
97342d11757SPeter Wemm 	p->p_flag &= ~P_SELECT;
974265fc98fSSeigo Tanimura 	if (timo > 0)
975265fc98fSSeigo Tanimura 		error = cv_timedwait_sig(&selwait, &p->p_mtx, timo);
976265fc98fSSeigo Tanimura 	else
977265fc98fSSeigo Tanimura 		error = cv_wait_sig(&selwait, &p->p_mtx);
97842d11757SPeter Wemm 	if (error == 0)
97942d11757SPeter Wemm 		goto retry;
980265fc98fSSeigo Tanimura 
98142d11757SPeter Wemm done:
98242d11757SPeter Wemm 	p->p_flag &= ~P_SELECT;
983e04ac2feSJohn Baldwin 	PROC_UNLOCK(p);
984265fc98fSSeigo Tanimura 	pollholddrop(p, heldbits, nfds, 0);
985265fc98fSSeigo Tanimura done_noproclock:
98642d11757SPeter Wemm 	/* poll is not restarted after signals... */
98742d11757SPeter Wemm 	if (error == ERESTART)
98842d11757SPeter Wemm 		error = EINTR;
98942d11757SPeter Wemm 	if (error == EWOULDBLOCK)
99042d11757SPeter Wemm 		error = 0;
99142d11757SPeter Wemm 	if (error == 0) {
99242d11757SPeter Wemm 		error = copyout(bits, SCARG(uap, fds), ni);
99342d11757SPeter Wemm 		if (error)
99442d11757SPeter Wemm 			goto out;
99542d11757SPeter Wemm 	}
99642d11757SPeter Wemm out:
99742d11757SPeter Wemm 	if (ni > sizeof(smallbits))
99842d11757SPeter Wemm 		free(bits, M_TEMP);
999265fc98fSSeigo Tanimura 	if (ni > sizeof(p_heldbits))
1000265fc98fSSeigo Tanimura 		free(heldbits, M_TEMP);
100142d11757SPeter Wemm 	return (error);
100242d11757SPeter Wemm }
100342d11757SPeter Wemm 
100442d11757SPeter Wemm static int
1005265fc98fSSeigo Tanimura pollholddrop(p, fds, nfd, hold)
1006265fc98fSSeigo Tanimura 	struct proc *p;
1007265fc98fSSeigo Tanimura 	struct pollfd *fds;
1008265fc98fSSeigo Tanimura 	u_int nfd;
1009265fc98fSSeigo Tanimura 	int hold;
1010265fc98fSSeigo Tanimura {
1011265fc98fSSeigo Tanimura 	register struct filedesc *fdp = p->p_fd;
1012265fc98fSSeigo Tanimura 	int i;
1013265fc98fSSeigo Tanimura 	struct file *fp;
1014265fc98fSSeigo Tanimura 
1015265fc98fSSeigo Tanimura 	for (i = 0; i < nfd; i++, fds++) {
1016265fc98fSSeigo Tanimura 		if (0 <= fds->fd && fds->fd < fdp->fd_nfiles) {
1017265fc98fSSeigo Tanimura 			fp = fdp->fd_ofiles[fds->fd];
1018265fc98fSSeigo Tanimura 			if (hold) {
1019265fc98fSSeigo Tanimura 				if (fp != NULL) {
1020265fc98fSSeigo Tanimura 					fhold(fp);
1021265fc98fSSeigo Tanimura 					fds->revents = 1;
1022265fc98fSSeigo Tanimura 				} else
1023265fc98fSSeigo Tanimura 					fds->revents = 0;
1024265fc98fSSeigo Tanimura 			} else if(fp != NULL && fds->revents)
1025265fc98fSSeigo Tanimura 				fdrop(fp, p);
1026265fc98fSSeigo Tanimura 		}
1027265fc98fSSeigo Tanimura 	}
1028265fc98fSSeigo Tanimura 	return (0);
1029265fc98fSSeigo Tanimura }
1030265fc98fSSeigo Tanimura 
1031265fc98fSSeigo Tanimura static int
1032cb226aaaSPoul-Henning Kamp pollscan(p, fds, nfd)
103342d11757SPeter Wemm 	struct proc *p;
103442d11757SPeter Wemm 	struct pollfd *fds;
1035ea0237edSJonathan Lemon 	u_int nfd;
103642d11757SPeter Wemm {
103742d11757SPeter Wemm 	register struct filedesc *fdp = p->p_fd;
103842d11757SPeter Wemm 	int i;
103942d11757SPeter Wemm 	struct file *fp;
104042d11757SPeter Wemm 	int n = 0;
104142d11757SPeter Wemm 
104242d11757SPeter Wemm 	for (i = 0; i < nfd; i++, fds++) {
1043337c9691SJordan K. Hubbard 		if (fds->fd >= fdp->fd_nfiles) {
104442d11757SPeter Wemm 			fds->revents = POLLNVAL;
104542d11757SPeter Wemm 			n++;
1046337c9691SJordan K. Hubbard 		} else if (fds->fd < 0) {
1047337c9691SJordan K. Hubbard 			fds->revents = 0;
104842d11757SPeter Wemm 		} else {
104942d11757SPeter Wemm 			fp = fdp->fd_ofiles[fds->fd];
1050279d7226SMatthew Dillon 			if (fp == NULL) {
105142d11757SPeter Wemm 				fds->revents = POLLNVAL;
105242d11757SPeter Wemm 				n++;
105342d11757SPeter Wemm 			} else {
10542087c896SBruce Evans 				/*
10552087c896SBruce Evans 				 * Note: backend also returns POLLHUP and
10562087c896SBruce Evans 				 * POLLERR if appropriate.
10572087c896SBruce Evans 				 */
105813ccadd4SBrian Feldman 				fds->revents = fo_poll(fp, fds->events,
105913ccadd4SBrian Feldman 				    fp->f_cred, p);
106042d11757SPeter Wemm 				if (fds->revents != 0)
106142d11757SPeter Wemm 					n++;
106242d11757SPeter Wemm 			}
106342d11757SPeter Wemm 		}
106442d11757SPeter Wemm 	}
1065cb226aaaSPoul-Henning Kamp 	p->p_retval[0] = n;
106642d11757SPeter Wemm 	return (0);
106742d11757SPeter Wemm }
106842d11757SPeter Wemm 
106942d11757SPeter Wemm /*
107042d11757SPeter Wemm  * OpenBSD poll system call.
107142d11757SPeter Wemm  * XXX this isn't quite a true representation..  OpenBSD uses select ops.
107242d11757SPeter Wemm  */
107342d11757SPeter Wemm #ifndef _SYS_SYSPROTO_H_
107442d11757SPeter Wemm struct openbsd_poll_args {
107542d11757SPeter Wemm 	struct pollfd *fds;
107642d11757SPeter Wemm 	u_int	nfds;
107742d11757SPeter Wemm 	int	timeout;
107842d11757SPeter Wemm };
107942d11757SPeter Wemm #endif
108042d11757SPeter Wemm int
1081cb226aaaSPoul-Henning Kamp openbsd_poll(p, uap)
108242d11757SPeter Wemm 	register struct proc *p;
108342d11757SPeter Wemm 	register struct openbsd_poll_args *uap;
108442d11757SPeter Wemm {
1085cb226aaaSPoul-Henning Kamp 	return (poll(p, (struct poll_args *)uap));
108642d11757SPeter Wemm }
108742d11757SPeter Wemm 
1088df8bae1dSRodney W. Grimes /*ARGSUSED*/
108926f9a767SRodney W. Grimes int
109042d11757SPeter Wemm seltrue(dev, events, p)
1091df8bae1dSRodney W. Grimes 	dev_t dev;
109242d11757SPeter Wemm 	int events;
1093df8bae1dSRodney W. Grimes 	struct proc *p;
1094df8bae1dSRodney W. Grimes {
1095df8bae1dSRodney W. Grimes 
109642d11757SPeter Wemm 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1097df8bae1dSRodney W. Grimes }
1098df8bae1dSRodney W. Grimes 
1099df8bae1dSRodney W. Grimes /*
1100df8bae1dSRodney W. Grimes  * Record a select request.
1101df8bae1dSRodney W. Grimes  */
1102df8bae1dSRodney W. Grimes void
1103df8bae1dSRodney W. Grimes selrecord(selector, sip)
1104df8bae1dSRodney W. Grimes 	struct proc *selector;
1105df8bae1dSRodney W. Grimes 	struct selinfo *sip;
1106df8bae1dSRodney W. Grimes {
1107df8bae1dSRodney W. Grimes 	struct proc *p;
1108df8bae1dSRodney W. Grimes 	pid_t mypid;
1109df8bae1dSRodney W. Grimes 
1110df8bae1dSRodney W. Grimes 	mypid = selector->p_pid;
1111df8bae1dSRodney W. Grimes 	if (sip->si_pid == mypid)
1112df8bae1dSRodney W. Grimes 		return;
1113e04ac2feSJohn Baldwin 	if (sip->si_pid && (p = pfind(sip->si_pid))) {
11149ed346baSBosko Milekic 		mtx_lock_spin(&sched_lock);
1115e04ac2feSJohn Baldwin 	    	if (p->p_wchan == (caddr_t)&selwait) {
11169ed346baSBosko Milekic 			mtx_unlock_spin(&sched_lock);
111733a9ed9dSJohn Baldwin 			PROC_UNLOCK(p);
1118df8bae1dSRodney W. Grimes 			sip->si_flags |= SI_COLL;
1119e04ac2feSJohn Baldwin 			return;
1120e04ac2feSJohn Baldwin 		}
11219ed346baSBosko Milekic 		mtx_unlock_spin(&sched_lock);
112233a9ed9dSJohn Baldwin 		PROC_UNLOCK(p);
1123e04ac2feSJohn Baldwin 	}
1124df8bae1dSRodney W. Grimes 	sip->si_pid = mypid;
1125df8bae1dSRodney W. Grimes }
1126df8bae1dSRodney W. Grimes 
1127df8bae1dSRodney W. Grimes /*
1128df8bae1dSRodney W. Grimes  * Do a wakeup when a selectable event occurs.
1129df8bae1dSRodney W. Grimes  */
1130df8bae1dSRodney W. Grimes void
1131df8bae1dSRodney W. Grimes selwakeup(sip)
1132df8bae1dSRodney W. Grimes 	register struct selinfo *sip;
1133df8bae1dSRodney W. Grimes {
1134df8bae1dSRodney W. Grimes 	register struct proc *p;
1135df8bae1dSRodney W. Grimes 
1136df8bae1dSRodney W. Grimes 	if (sip->si_pid == 0)
1137df8bae1dSRodney W. Grimes 		return;
1138df8bae1dSRodney W. Grimes 	if (sip->si_flags & SI_COLL) {
1139df8bae1dSRodney W. Grimes 		nselcoll++;
1140df8bae1dSRodney W. Grimes 		sip->si_flags &= ~SI_COLL;
1141265fc98fSSeigo Tanimura 		cv_broadcast(&selwait);
1142df8bae1dSRodney W. Grimes 	}
1143df8bae1dSRodney W. Grimes 	p = pfind(sip->si_pid);
1144df8bae1dSRodney W. Grimes 	sip->si_pid = 0;
1145df8bae1dSRodney W. Grimes 	if (p != NULL) {
11469ed346baSBosko Milekic 		mtx_lock_spin(&sched_lock);
1147df8bae1dSRodney W. Grimes 		if (p->p_wchan == (caddr_t)&selwait) {
1148df8bae1dSRodney W. Grimes 			if (p->p_stat == SSLEEP)
1149df8bae1dSRodney W. Grimes 				setrunnable(p);
1150df8bae1dSRodney W. Grimes 			else
1151265fc98fSSeigo Tanimura 				cv_waitq_remove(p);
115233a9ed9dSJohn Baldwin 		} else
1153e04ac2feSJohn Baldwin 			p->p_flag &= ~P_SELECT;
115433a9ed9dSJohn Baldwin 		mtx_unlock_spin(&sched_lock);
1155e04ac2feSJohn Baldwin 		PROC_UNLOCK(p);
1156e04ac2feSJohn Baldwin 	}
1157df8bae1dSRodney W. Grimes }
1158265fc98fSSeigo Tanimura 
1159265fc98fSSeigo Tanimura static void selectinit __P((void *));
1160265fc98fSSeigo Tanimura SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL)
1161265fc98fSSeigo Tanimura 
1162265fc98fSSeigo Tanimura /* ARGSUSED*/
1163265fc98fSSeigo Tanimura static void
1164265fc98fSSeigo Tanimura selectinit(dummy)
1165265fc98fSSeigo Tanimura 	void *dummy;
1166265fc98fSSeigo Tanimura {
1167265fc98fSSeigo Tanimura 	cv_init(&selwait, "select");
1168265fc98fSSeigo Tanimura }
1169