xref: /freebsd/sys/kern/sys_generic.c (revision 104a9b7e3edbd88cdda0698c5c77a2ad6dafcc16)
1df8bae1dSRodney W. Grimes /*
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1989, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  * (c) UNIX System Laboratories, Inc.
5df8bae1dSRodney W. Grimes  * All or some portions of this file are derived from material licensed
6df8bae1dSRodney W. Grimes  * to the University of California by American Telephone and Telegraph
7df8bae1dSRodney W. Grimes  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8df8bae1dSRodney W. Grimes  * the permission of UNIX System Laboratories, Inc.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18df8bae1dSRodney W. Grimes  * 3. All advertising materials mentioning features or use of this software
19df8bae1dSRodney W. Grimes  *    must display the following acknowledgement:
20df8bae1dSRodney W. Grimes  *	This product includes software developed by the University of
21df8bae1dSRodney W. Grimes  *	California, Berkeley and its contributors.
22df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
23df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
24df8bae1dSRodney W. Grimes  *    without specific prior written permission.
25df8bae1dSRodney W. Grimes  *
26df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
37df8bae1dSRodney W. Grimes  *
38df8bae1dSRodney W. Grimes  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
39c3aac50fSPeter Wemm  * $FreeBSD$
40df8bae1dSRodney W. Grimes  */
41df8bae1dSRodney W. Grimes 
42db6a20e2SGarrett Wollman #include "opt_ktrace.h"
43db6a20e2SGarrett Wollman 
44df8bae1dSRodney W. Grimes #include <sys/param.h>
45df8bae1dSRodney W. Grimes #include <sys/systm.h>
46d2d3e875SBruce Evans #include <sys/sysproto.h>
47df8bae1dSRodney W. Grimes #include <sys/filedesc.h>
4820982410SBruce Evans #include <sys/filio.h>
493ac4d1efSBruce Evans #include <sys/fcntl.h>
50df8bae1dSRodney W. Grimes #include <sys/file.h>
51df8bae1dSRodney W. Grimes #include <sys/proc.h>
52797f2d22SPoul-Henning Kamp #include <sys/signalvar.h>
53df8bae1dSRodney W. Grimes #include <sys/socketvar.h>
54df8bae1dSRodney W. Grimes #include <sys/uio.h>
55df8bae1dSRodney W. Grimes #include <sys/kernel.h>
56104a9b7eSAlexander Kabaev #include <sys/limits.h>
57df8bae1dSRodney W. Grimes #include <sys/malloc.h>
5842d11757SPeter Wemm #include <sys/poll.h>
5989b71647SPeter Wemm #include <sys/resourcevar.h>
600a2c3d48SGarrett Wollman #include <sys/selinfo.h>
618f19eb88SIan Dowse #include <sys/syscallsubr.h>
628cb96f20SPeter Wemm #include <sys/sysctl.h>
6342d11757SPeter Wemm #include <sys/sysent.h>
64279d7226SMatthew Dillon #include <sys/bio.h>
65279d7226SMatthew Dillon #include <sys/buf.h>
66265fc98fSSeigo Tanimura #include <sys/condvar.h>
67df8bae1dSRodney W. Grimes #ifdef KTRACE
68df8bae1dSRodney W. Grimes #include <sys/ktrace.h>
69df8bae1dSRodney W. Grimes #endif
70279d7226SMatthew Dillon #include <vm/vm.h>
71279d7226SMatthew Dillon #include <vm/vm_page.h>
72df8bae1dSRodney W. Grimes 
73a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
74a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
75a1c995b6SPoul-Henning Kamp MALLOC_DEFINE(M_IOV, "iov", "large iov's");
7655166637SPoul-Henning Kamp 
77bbbb04ceSAlfred Perlstein static int	pollscan(struct thread *, struct pollfd *, u_int);
78bbbb04ceSAlfred Perlstein static int	selscan(struct thread *, fd_mask **, fd_mask **, int);
79bbbb04ceSAlfred Perlstein static int	dofileread(struct thread *, struct file *, int, void *,
80bbbb04ceSAlfred Perlstein 		    size_t, off_t, int);
81bbbb04ceSAlfred Perlstein static int	dofilewrite(struct thread *, struct file *, int,
82bbbb04ceSAlfred Perlstein 		    const void *, size_t, off_t, int);
838fe387abSDmitrij Tejblum 
84df8bae1dSRodney W. Grimes /*
85df8bae1dSRodney W. Grimes  * Read system call.
86df8bae1dSRodney W. Grimes  */
87d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
88df8bae1dSRodney W. Grimes struct read_args {
89df8bae1dSRodney W. Grimes 	int	fd;
90134e06feSBruce Evans 	void	*buf;
91134e06feSBruce Evans 	size_t	nbyte;
92df8bae1dSRodney W. Grimes };
93d2d3e875SBruce Evans #endif
94ad2edad9SMatthew Dillon /*
95ad2edad9SMatthew Dillon  * MPSAFE
96ad2edad9SMatthew Dillon  */
9726f9a767SRodney W. Grimes int
98b40ce416SJulian Elischer read(td, uap)
99b40ce416SJulian Elischer 	struct thread *td;
100b064d43dSMatthew Dillon 	struct read_args *uap;
101df8bae1dSRodney W. Grimes {
102b064d43dSMatthew Dillon 	struct file *fp;
103279d7226SMatthew Dillon 	int error;
104df8bae1dSRodney W. Grimes 
105b064d43dSMatthew Dillon 	if ((error = fget_read(td, uap->fd, &fp)) == 0) {
106b40ce416SJulian Elischer 		error = dofileread(td, fp, uap->fd, uap->buf,
107ad2edad9SMatthew Dillon 			    uap->nbyte, (off_t)-1, 0);
108b40ce416SJulian Elischer 		fdrop(fp, td);
109ad2edad9SMatthew Dillon 	}
110279d7226SMatthew Dillon 	return(error);
111df8bae1dSRodney W. Grimes }
112df8bae1dSRodney W. Grimes 
113df8bae1dSRodney W. Grimes /*
1148fe387abSDmitrij Tejblum  * Pread system call
1154160ccd9SAlan Cox  */
1164160ccd9SAlan Cox #ifndef _SYS_SYSPROTO_H_
1174160ccd9SAlan Cox struct pread_args {
1184160ccd9SAlan Cox 	int	fd;
1194160ccd9SAlan Cox 	void	*buf;
1204160ccd9SAlan Cox 	size_t	nbyte;
1218fe387abSDmitrij Tejblum 	int	pad;
1224160ccd9SAlan Cox 	off_t	offset;
1234160ccd9SAlan Cox };
1244160ccd9SAlan Cox #endif
125ad2edad9SMatthew Dillon /*
126ad2edad9SMatthew Dillon  * MPSAFE
127ad2edad9SMatthew Dillon  */
1284160ccd9SAlan Cox int
129b40ce416SJulian Elischer pread(td, uap)
130b40ce416SJulian Elischer 	struct thread *td;
131b064d43dSMatthew Dillon 	struct pread_args *uap;
1324160ccd9SAlan Cox {
133b064d43dSMatthew Dillon 	struct file *fp;
134279d7226SMatthew Dillon 	int error;
1358fe387abSDmitrij Tejblum 
13697fa4397SAlfred Perlstein 	if ((error = fget_read(td, uap->fd, &fp)) != 0)
13797fa4397SAlfred Perlstein 		return (error);
138426da3bcSAlfred Perlstein 	if (fp->f_type != DTYPE_VNODE) {
139b064d43dSMatthew Dillon 		error = ESPIPE;
140426da3bcSAlfred Perlstein 	} else {
141426da3bcSAlfred Perlstein 		error = dofileread(td, fp, uap->fd, uap->buf, uap->nbyte,
142426da3bcSAlfred Perlstein 			    uap->offset, FOF_OFFSET);
143b064d43dSMatthew Dillon 	}
144b40ce416SJulian Elischer 	fdrop(fp, td);
145279d7226SMatthew Dillon 	return(error);
1468fe387abSDmitrij Tejblum }
1478fe387abSDmitrij Tejblum 
1488fe387abSDmitrij Tejblum /*
1498fe387abSDmitrij Tejblum  * Code common for read and pread
1508fe387abSDmitrij Tejblum  */
15137c84183SPoul-Henning Kamp static int
152b40ce416SJulian Elischer dofileread(td, fp, fd, buf, nbyte, offset, flags)
153b40ce416SJulian Elischer 	struct thread *td;
1548fe387abSDmitrij Tejblum 	struct file *fp;
1558fe387abSDmitrij Tejblum 	int fd, flags;
1568fe387abSDmitrij Tejblum 	void *buf;
1578fe387abSDmitrij Tejblum 	size_t nbyte;
1588fe387abSDmitrij Tejblum 	off_t offset;
1598fe387abSDmitrij Tejblum {
1604160ccd9SAlan Cox 	struct uio auio;
1614160ccd9SAlan Cox 	struct iovec aiov;
1624160ccd9SAlan Cox 	long cnt, error = 0;
1634160ccd9SAlan Cox #ifdef KTRACE
1644160ccd9SAlan Cox 	struct iovec ktriov;
16542ebfbf2SBrian Feldman 	struct uio ktruio;
1663c89e357SBrian Feldman 	int didktr = 0;
1674160ccd9SAlan Cox #endif
1684160ccd9SAlan Cox 
1690a3e28cfSAlfred Perlstein 	aiov.iov_base = buf;
1708fe387abSDmitrij Tejblum 	aiov.iov_len = nbyte;
1714160ccd9SAlan Cox 	auio.uio_iov = &aiov;
1724160ccd9SAlan Cox 	auio.uio_iovcnt = 1;
1738fe387abSDmitrij Tejblum 	auio.uio_offset = offset;
1748fe387abSDmitrij Tejblum 	if (nbyte > INT_MAX)
1754160ccd9SAlan Cox 		return (EINVAL);
1768fe387abSDmitrij Tejblum 	auio.uio_resid = nbyte;
1774160ccd9SAlan Cox 	auio.uio_rw = UIO_READ;
1784160ccd9SAlan Cox 	auio.uio_segflg = UIO_USERSPACE;
179b40ce416SJulian Elischer 	auio.uio_td = td;
1804160ccd9SAlan Cox #ifdef KTRACE
1814160ccd9SAlan Cox 	/*
1824160ccd9SAlan Cox 	 * if tracing, save a copy of iovec
1834160ccd9SAlan Cox 	 */
18460a9bb19SJohn Baldwin 	if (KTRPOINT(td, KTR_GENIO)) {
1854160ccd9SAlan Cox 		ktriov = aiov;
18642ebfbf2SBrian Feldman 		ktruio = auio;
1873c89e357SBrian Feldman 		didktr = 1;
18842ebfbf2SBrian Feldman 	}
1894160ccd9SAlan Cox #endif
1908fe387abSDmitrij Tejblum 	cnt = nbyte;
191279d7226SMatthew Dillon 
1929ca43589SRobert Watson 	if ((error = fo_read(fp, &auio, td->td_ucred, flags, td))) {
1934160ccd9SAlan Cox 		if (auio.uio_resid != cnt && (error == ERESTART ||
1944160ccd9SAlan Cox 		    error == EINTR || error == EWOULDBLOCK))
1954160ccd9SAlan Cox 			error = 0;
196279d7226SMatthew Dillon 	}
1974160ccd9SAlan Cox 	cnt -= auio.uio_resid;
1984160ccd9SAlan Cox #ifdef KTRACE
1993c89e357SBrian Feldman 	if (didktr && error == 0) {
20042ebfbf2SBrian Feldman 		ktruio.uio_iov = &ktriov;
20142ebfbf2SBrian Feldman 		ktruio.uio_resid = cnt;
20260a9bb19SJohn Baldwin 		ktrgenio(fd, UIO_READ, &ktruio, error);
20342ebfbf2SBrian Feldman 	}
2044160ccd9SAlan Cox #endif
205b40ce416SJulian Elischer 	td->td_retval[0] = cnt;
2064160ccd9SAlan Cox 	return (error);
2074160ccd9SAlan Cox }
2084160ccd9SAlan Cox 
2094160ccd9SAlan Cox /*
210df8bae1dSRodney W. Grimes  * Scatter read system call.
211df8bae1dSRodney W. Grimes  */
212d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
213df8bae1dSRodney W. Grimes struct readv_args {
2147147b19dSBruce Evans 	int	fd;
215df8bae1dSRodney W. Grimes 	struct	iovec *iovp;
216df8bae1dSRodney W. Grimes 	u_int	iovcnt;
217df8bae1dSRodney W. Grimes };
218d2d3e875SBruce Evans #endif
219ad2edad9SMatthew Dillon /*
220ad2edad9SMatthew Dillon  * MPSAFE
221ad2edad9SMatthew Dillon  */
22226f9a767SRodney W. Grimes int
223b40ce416SJulian Elischer readv(td, uap)
224b40ce416SJulian Elischer 	struct thread *td;
225b064d43dSMatthew Dillon 	struct readv_args *uap;
226df8bae1dSRodney W. Grimes {
227b064d43dSMatthew Dillon 	struct file *fp;
228df8bae1dSRodney W. Grimes 	struct uio auio;
229b064d43dSMatthew Dillon 	struct iovec *iov;
230df8bae1dSRodney W. Grimes 	struct iovec *needfree;
231df8bae1dSRodney W. Grimes 	struct iovec aiov[UIO_SMALLIOV];
23282641acdSAlan Cox 	long i, cnt;
23382641acdSAlan Cox 	int error;
234df8bae1dSRodney W. Grimes 	u_int iovlen;
235df8bae1dSRodney W. Grimes #ifdef KTRACE
236df8bae1dSRodney W. Grimes 	struct iovec *ktriov = NULL;
23742ebfbf2SBrian Feldman 	struct uio ktruio;
238df8bae1dSRodney W. Grimes #endif
239df8bae1dSRodney W. Grimes 
240b064d43dSMatthew Dillon 	if ((error = fget_read(td, uap->fd, &fp)) != 0)
24182641acdSAlan Cox 		return (error);
24282641acdSAlan Cox 	needfree = NULL;
243df8bae1dSRodney W. Grimes 	/* note: can't use iovlen until iovcnt is validated */
244df8bae1dSRodney W. Grimes 	iovlen = uap->iovcnt * sizeof (struct iovec);
245df8bae1dSRodney W. Grimes 	if (uap->iovcnt > UIO_SMALLIOV) {
246ad2edad9SMatthew Dillon 		if (uap->iovcnt > UIO_MAXIOV) {
247ad2edad9SMatthew Dillon 			error = EINVAL;
24882641acdSAlan Cox 			goto done;
249ad2edad9SMatthew Dillon 		}
250a163d034SWarner Losh 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
251df8bae1dSRodney W. Grimes 		needfree = iov;
25282641acdSAlan Cox 	} else
253df8bae1dSRodney W. Grimes 		iov = aiov;
254df8bae1dSRodney W. Grimes 	auio.uio_iov = iov;
255df8bae1dSRodney W. Grimes 	auio.uio_iovcnt = uap->iovcnt;
256df8bae1dSRodney W. Grimes 	auio.uio_rw = UIO_READ;
257df8bae1dSRodney W. Grimes 	auio.uio_segflg = UIO_USERSPACE;
258b40ce416SJulian Elischer 	auio.uio_td = td;
2592c1011f7SJohn Dyson 	auio.uio_offset = -1;
2600a3e28cfSAlfred Perlstein 	if ((error = copyin(uap->iovp, iov, iovlen)))
261df8bae1dSRodney W. Grimes 		goto done;
262df8bae1dSRodney W. Grimes 	auio.uio_resid = 0;
263df8bae1dSRodney W. Grimes 	for (i = 0; i < uap->iovcnt; i++) {
264069e9bc1SDoug Rabson 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
265df8bae1dSRodney W. Grimes 			error = EINVAL;
266df8bae1dSRodney W. Grimes 			goto done;
267df8bae1dSRodney W. Grimes 		}
268069e9bc1SDoug Rabson 		auio.uio_resid += iov->iov_len;
269df8bae1dSRodney W. Grimes 		iov++;
270df8bae1dSRodney W. Grimes 	}
271df8bae1dSRodney W. Grimes #ifdef KTRACE
272df8bae1dSRodney W. Grimes 	/*
273df8bae1dSRodney W. Grimes 	 * if tracing, save a copy of iovec
274df8bae1dSRodney W. Grimes 	 */
27560a9bb19SJohn Baldwin 	if (KTRPOINT(td, KTR_GENIO))  {
276a163d034SWarner Losh 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
2770a3e28cfSAlfred Perlstein 		bcopy(auio.uio_iov, ktriov, iovlen);
27842ebfbf2SBrian Feldman 		ktruio = auio;
279df8bae1dSRodney W. Grimes 	}
280df8bae1dSRodney W. Grimes #endif
281df8bae1dSRodney W. Grimes 	cnt = auio.uio_resid;
2829ca43589SRobert Watson 	if ((error = fo_read(fp, &auio, td->td_ucred, 0, td))) {
283df8bae1dSRodney W. Grimes 		if (auio.uio_resid != cnt && (error == ERESTART ||
284df8bae1dSRodney W. Grimes 		    error == EINTR || error == EWOULDBLOCK))
285df8bae1dSRodney W. Grimes 			error = 0;
286279d7226SMatthew Dillon 	}
287df8bae1dSRodney W. Grimes 	cnt -= auio.uio_resid;
288df8bae1dSRodney W. Grimes #ifdef KTRACE
289df8bae1dSRodney W. Grimes 	if (ktriov != NULL) {
29042ebfbf2SBrian Feldman 		if (error == 0) {
29142ebfbf2SBrian Feldman 			ktruio.uio_iov = ktriov;
29242ebfbf2SBrian Feldman 			ktruio.uio_resid = cnt;
29360a9bb19SJohn Baldwin 			ktrgenio(uap->fd, UIO_READ, &ktruio, error);
29442ebfbf2SBrian Feldman 		}
295df8bae1dSRodney W. Grimes 		FREE(ktriov, M_TEMP);
296df8bae1dSRodney W. Grimes 	}
297df8bae1dSRodney W. Grimes #endif
298b40ce416SJulian Elischer 	td->td_retval[0] = cnt;
299df8bae1dSRodney W. Grimes done:
300b40ce416SJulian Elischer 	fdrop(fp, td);
301df8bae1dSRodney W. Grimes 	if (needfree)
302df8bae1dSRodney W. Grimes 		FREE(needfree, M_IOV);
303df8bae1dSRodney W. Grimes 	return (error);
304df8bae1dSRodney W. Grimes }
305df8bae1dSRodney W. Grimes 
306df8bae1dSRodney W. Grimes /*
307df8bae1dSRodney W. Grimes  * Write system call
308df8bae1dSRodney W. Grimes  */
309d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
310df8bae1dSRodney W. Grimes struct write_args {
311df8bae1dSRodney W. Grimes 	int	fd;
312134e06feSBruce Evans 	const void *buf;
313134e06feSBruce Evans 	size_t	nbyte;
314df8bae1dSRodney W. Grimes };
315d2d3e875SBruce Evans #endif
316ad2edad9SMatthew Dillon /*
317ad2edad9SMatthew Dillon  * MPSAFE
318ad2edad9SMatthew Dillon  */
31926f9a767SRodney W. Grimes int
320b40ce416SJulian Elischer write(td, uap)
321b40ce416SJulian Elischer 	struct thread *td;
322b064d43dSMatthew Dillon 	struct write_args *uap;
323df8bae1dSRodney W. Grimes {
324b064d43dSMatthew Dillon 	struct file *fp;
325279d7226SMatthew Dillon 	int error;
326df8bae1dSRodney W. Grimes 
327b064d43dSMatthew Dillon 	if ((error = fget_write(td, uap->fd, &fp)) == 0) {
328b40ce416SJulian Elischer 		error = dofilewrite(td, fp, uap->fd, uap->buf, uap->nbyte,
329ad2edad9SMatthew Dillon 			    (off_t)-1, 0);
330b40ce416SJulian Elischer 		fdrop(fp, td);
331ad2edad9SMatthew Dillon 	} else {
332b064d43dSMatthew Dillon 		error = EBADF;	/* XXX this can't be right */
333ad2edad9SMatthew Dillon 	}
334279d7226SMatthew Dillon 	return(error);
335df8bae1dSRodney W. Grimes }
336df8bae1dSRodney W. Grimes 
337df8bae1dSRodney W. Grimes /*
3388fe387abSDmitrij Tejblum  * Pwrite system call
3394160ccd9SAlan Cox  */
3404160ccd9SAlan Cox #ifndef _SYS_SYSPROTO_H_
3414160ccd9SAlan Cox struct pwrite_args {
3424160ccd9SAlan Cox 	int	fd;
3434160ccd9SAlan Cox 	const void *buf;
3444160ccd9SAlan Cox 	size_t	nbyte;
3458fe387abSDmitrij Tejblum 	int	pad;
3464160ccd9SAlan Cox 	off_t	offset;
3474160ccd9SAlan Cox };
3484160ccd9SAlan Cox #endif
349ad2edad9SMatthew Dillon /*
350ad2edad9SMatthew Dillon  * MPSAFE
351ad2edad9SMatthew Dillon  */
3524160ccd9SAlan Cox int
353b40ce416SJulian Elischer pwrite(td, uap)
354b40ce416SJulian Elischer 	struct thread *td;
355b064d43dSMatthew Dillon 	struct pwrite_args *uap;
3564160ccd9SAlan Cox {
357b064d43dSMatthew Dillon 	struct file *fp;
358279d7226SMatthew Dillon 	int error;
3598fe387abSDmitrij Tejblum 
360b064d43dSMatthew Dillon 	if ((error = fget_write(td, uap->fd, &fp)) == 0) {
361aa11a498SAlfred Perlstein 		if (fp->f_type == DTYPE_VNODE) {
362b064d43dSMatthew Dillon 			error = dofilewrite(td, fp, uap->fd, uap->buf,
363b064d43dSMatthew Dillon 				    uap->nbyte, uap->offset, FOF_OFFSET);
364b064d43dSMatthew Dillon 		} else {
365279d7226SMatthew Dillon 			error = ESPIPE;
366b064d43dSMatthew Dillon 		}
367b40ce416SJulian Elischer 		fdrop(fp, td);
368279d7226SMatthew Dillon 	} else {
369b064d43dSMatthew Dillon 		error = EBADF;	/* this can't be right */
370ad2edad9SMatthew Dillon 	}
371279d7226SMatthew Dillon 	return(error);
3728fe387abSDmitrij Tejblum }
3738fe387abSDmitrij Tejblum 
3748fe387abSDmitrij Tejblum static int
375b40ce416SJulian Elischer dofilewrite(td, fp, fd, buf, nbyte, offset, flags)
376b40ce416SJulian Elischer 	struct thread *td;
3778fe387abSDmitrij Tejblum 	struct file *fp;
3788fe387abSDmitrij Tejblum 	int fd, flags;
3798fe387abSDmitrij Tejblum 	const void *buf;
3808fe387abSDmitrij Tejblum 	size_t nbyte;
3818fe387abSDmitrij Tejblum 	off_t offset;
3828fe387abSDmitrij Tejblum {
3834160ccd9SAlan Cox 	struct uio auio;
3844160ccd9SAlan Cox 	struct iovec aiov;
3854160ccd9SAlan Cox 	long cnt, error = 0;
3864160ccd9SAlan Cox #ifdef KTRACE
3874160ccd9SAlan Cox 	struct iovec ktriov;
38842ebfbf2SBrian Feldman 	struct uio ktruio;
3893c89e357SBrian Feldman 	int didktr = 0;
3904160ccd9SAlan Cox #endif
3914160ccd9SAlan Cox 
392b31ae1adSPeter Wemm 	aiov.iov_base = (void *)(uintptr_t)buf;
3938fe387abSDmitrij Tejblum 	aiov.iov_len = nbyte;
3944160ccd9SAlan Cox 	auio.uio_iov = &aiov;
3954160ccd9SAlan Cox 	auio.uio_iovcnt = 1;
3968fe387abSDmitrij Tejblum 	auio.uio_offset = offset;
3978fe387abSDmitrij Tejblum 	if (nbyte > INT_MAX)
3984160ccd9SAlan Cox 		return (EINVAL);
3998fe387abSDmitrij Tejblum 	auio.uio_resid = nbyte;
4004160ccd9SAlan Cox 	auio.uio_rw = UIO_WRITE;
4014160ccd9SAlan Cox 	auio.uio_segflg = UIO_USERSPACE;
402b40ce416SJulian Elischer 	auio.uio_td = td;
4034160ccd9SAlan Cox #ifdef KTRACE
4044160ccd9SAlan Cox 	/*
40542ebfbf2SBrian Feldman 	 * if tracing, save a copy of iovec and uio
4064160ccd9SAlan Cox 	 */
40760a9bb19SJohn Baldwin 	if (KTRPOINT(td, KTR_GENIO)) {
4084160ccd9SAlan Cox 		ktriov = aiov;
40942ebfbf2SBrian Feldman 		ktruio = auio;
4103c89e357SBrian Feldman 		didktr = 1;
41142ebfbf2SBrian Feldman 	}
4124160ccd9SAlan Cox #endif
4138fe387abSDmitrij Tejblum 	cnt = nbyte;
414c6ab5768SAlfred Perlstein 	if (fp->f_type == DTYPE_VNODE)
415279d7226SMatthew Dillon 		bwillwrite();
4169ca43589SRobert Watson 	if ((error = fo_write(fp, &auio, td->td_ucred, flags, td))) {
4174160ccd9SAlan Cox 		if (auio.uio_resid != cnt && (error == ERESTART ||
4184160ccd9SAlan Cox 		    error == EINTR || error == EWOULDBLOCK))
4194160ccd9SAlan Cox 			error = 0;
420c33c8251SAlfred Perlstein 		/* Socket layer is responsible for issuing SIGPIPE. */
421c33c8251SAlfred Perlstein 		if (error == EPIPE && fp->f_type != DTYPE_SOCKET) {
422b40ce416SJulian Elischer 			PROC_LOCK(td->td_proc);
423b40ce416SJulian Elischer 			psignal(td->td_proc, SIGPIPE);
424b40ce416SJulian Elischer 			PROC_UNLOCK(td->td_proc);
42519eb87d2SJohn Baldwin 		}
4264160ccd9SAlan Cox 	}
4274160ccd9SAlan Cox 	cnt -= auio.uio_resid;
4284160ccd9SAlan Cox #ifdef KTRACE
4293c89e357SBrian Feldman 	if (didktr && error == 0) {
43042ebfbf2SBrian Feldman 		ktruio.uio_iov = &ktriov;
43142ebfbf2SBrian Feldman 		ktruio.uio_resid = cnt;
43260a9bb19SJohn Baldwin 		ktrgenio(fd, UIO_WRITE, &ktruio, error);
43342ebfbf2SBrian Feldman 	}
4344160ccd9SAlan Cox #endif
435b40ce416SJulian Elischer 	td->td_retval[0] = cnt;
4364160ccd9SAlan Cox 	return (error);
4374160ccd9SAlan Cox }
4384160ccd9SAlan Cox 
4394160ccd9SAlan Cox /*
440df8bae1dSRodney W. Grimes  * Gather write system call
441df8bae1dSRodney W. Grimes  */
442d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
443df8bae1dSRodney W. Grimes struct writev_args {
444df8bae1dSRodney W. Grimes 	int	fd;
445df8bae1dSRodney W. Grimes 	struct	iovec *iovp;
446df8bae1dSRodney W. Grimes 	u_int	iovcnt;
447df8bae1dSRodney W. Grimes };
448d2d3e875SBruce Evans #endif
449ad2edad9SMatthew Dillon /*
450ad2edad9SMatthew Dillon  * MPSAFE
451ad2edad9SMatthew Dillon  */
45226f9a767SRodney W. Grimes int
453b40ce416SJulian Elischer writev(td, uap)
454b40ce416SJulian Elischer 	struct thread *td;
455df8bae1dSRodney W. Grimes 	register struct writev_args *uap;
456df8bae1dSRodney W. Grimes {
457b064d43dSMatthew Dillon 	struct file *fp;
458df8bae1dSRodney W. Grimes 	struct uio auio;
459df8bae1dSRodney W. Grimes 	register struct iovec *iov;
460df8bae1dSRodney W. Grimes 	struct iovec *needfree;
461df8bae1dSRodney W. Grimes 	struct iovec aiov[UIO_SMALLIOV];
462df8bae1dSRodney W. Grimes 	long i, cnt, error = 0;
463df8bae1dSRodney W. Grimes 	u_int iovlen;
464df8bae1dSRodney W. Grimes #ifdef KTRACE
465df8bae1dSRodney W. Grimes 	struct iovec *ktriov = NULL;
46642ebfbf2SBrian Feldman 	struct uio ktruio;
467df8bae1dSRodney W. Grimes #endif
468df8bae1dSRodney W. Grimes 
469ad2edad9SMatthew Dillon 	mtx_lock(&Giant);
470b064d43dSMatthew Dillon 	if ((error = fget_write(td, uap->fd, &fp)) != 0) {
471ad2edad9SMatthew Dillon 		error = EBADF;
472ad2edad9SMatthew Dillon 		goto done2;
473ad2edad9SMatthew Dillon 	}
474df8bae1dSRodney W. Grimes 	/* note: can't use iovlen until iovcnt is validated */
475df8bae1dSRodney W. Grimes 	iovlen = uap->iovcnt * sizeof (struct iovec);
476df8bae1dSRodney W. Grimes 	if (uap->iovcnt > UIO_SMALLIOV) {
4771aa3e7ddSBrian Feldman 		if (uap->iovcnt > UIO_MAXIOV) {
4781aa3e7ddSBrian Feldman 			needfree = NULL;
4791aa3e7ddSBrian Feldman 			error = EINVAL;
4801aa3e7ddSBrian Feldman 			goto done;
4811aa3e7ddSBrian Feldman 		}
482a163d034SWarner Losh 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
483df8bae1dSRodney W. Grimes 		needfree = iov;
484df8bae1dSRodney W. Grimes 	} else {
485df8bae1dSRodney W. Grimes 		iov = aiov;
486df8bae1dSRodney W. Grimes 		needfree = NULL;
487df8bae1dSRodney W. Grimes 	}
488df8bae1dSRodney W. Grimes 	auio.uio_iov = iov;
489df8bae1dSRodney W. Grimes 	auio.uio_iovcnt = uap->iovcnt;
490df8bae1dSRodney W. Grimes 	auio.uio_rw = UIO_WRITE;
491df8bae1dSRodney W. Grimes 	auio.uio_segflg = UIO_USERSPACE;
492b40ce416SJulian Elischer 	auio.uio_td = td;
4932c1011f7SJohn Dyson 	auio.uio_offset = -1;
4940a3e28cfSAlfred Perlstein 	if ((error = copyin(uap->iovp, iov, iovlen)))
495df8bae1dSRodney W. Grimes 		goto done;
496df8bae1dSRodney W. Grimes 	auio.uio_resid = 0;
497df8bae1dSRodney W. Grimes 	for (i = 0; i < uap->iovcnt; i++) {
498069e9bc1SDoug Rabson 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
499df8bae1dSRodney W. Grimes 			error = EINVAL;
500df8bae1dSRodney W. Grimes 			goto done;
501df8bae1dSRodney W. Grimes 		}
502069e9bc1SDoug Rabson 		auio.uio_resid += iov->iov_len;
503df8bae1dSRodney W. Grimes 		iov++;
504df8bae1dSRodney W. Grimes 	}
505df8bae1dSRodney W. Grimes #ifdef KTRACE
506df8bae1dSRodney W. Grimes 	/*
50742ebfbf2SBrian Feldman 	 * if tracing, save a copy of iovec and uio
508df8bae1dSRodney W. Grimes 	 */
50960a9bb19SJohn Baldwin 	if (KTRPOINT(td, KTR_GENIO))  {
510a163d034SWarner Losh 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
5110a3e28cfSAlfred Perlstein 		bcopy(auio.uio_iov, ktriov, iovlen);
51242ebfbf2SBrian Feldman 		ktruio = auio;
513df8bae1dSRodney W. Grimes 	}
514df8bae1dSRodney W. Grimes #endif
515df8bae1dSRodney W. Grimes 	cnt = auio.uio_resid;
516a41ce5d3SMatthew Dillon 	if (fp->f_type == DTYPE_VNODE)
5179440653dSMatthew Dillon 		bwillwrite();
5189ca43589SRobert Watson 	if ((error = fo_write(fp, &auio, td->td_ucred, 0, td))) {
519df8bae1dSRodney W. Grimes 		if (auio.uio_resid != cnt && (error == ERESTART ||
520df8bae1dSRodney W. Grimes 		    error == EINTR || error == EWOULDBLOCK))
521df8bae1dSRodney W. Grimes 			error = 0;
52219eb87d2SJohn Baldwin 		if (error == EPIPE) {
523b40ce416SJulian Elischer 			PROC_LOCK(td->td_proc);
524b40ce416SJulian Elischer 			psignal(td->td_proc, SIGPIPE);
525b40ce416SJulian Elischer 			PROC_UNLOCK(td->td_proc);
52619eb87d2SJohn Baldwin 		}
527df8bae1dSRodney W. Grimes 	}
528df8bae1dSRodney W. Grimes 	cnt -= auio.uio_resid;
529df8bae1dSRodney W. Grimes #ifdef KTRACE
530df8bae1dSRodney W. Grimes 	if (ktriov != NULL) {
53142ebfbf2SBrian Feldman 		if (error == 0) {
53242ebfbf2SBrian Feldman 			ktruio.uio_iov = ktriov;
53342ebfbf2SBrian Feldman 			ktruio.uio_resid = cnt;
53460a9bb19SJohn Baldwin 			ktrgenio(uap->fd, UIO_WRITE, &ktruio, error);
53542ebfbf2SBrian Feldman 		}
536df8bae1dSRodney W. Grimes 		FREE(ktriov, M_TEMP);
537df8bae1dSRodney W. Grimes 	}
538df8bae1dSRodney W. Grimes #endif
539b40ce416SJulian Elischer 	td->td_retval[0] = cnt;
540df8bae1dSRodney W. Grimes done:
541b40ce416SJulian Elischer 	fdrop(fp, td);
542df8bae1dSRodney W. Grimes 	if (needfree)
543df8bae1dSRodney W. Grimes 		FREE(needfree, M_IOV);
544ad2edad9SMatthew Dillon done2:
545ad2edad9SMatthew Dillon 	mtx_unlock(&Giant);
546df8bae1dSRodney W. Grimes 	return (error);
547df8bae1dSRodney W. Grimes }
548df8bae1dSRodney W. Grimes 
549df8bae1dSRodney W. Grimes /*
550df8bae1dSRodney W. Grimes  * Ioctl system call
551df8bae1dSRodney W. Grimes  */
552d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
553df8bae1dSRodney W. Grimes struct ioctl_args {
554df8bae1dSRodney W. Grimes 	int	fd;
555069e9bc1SDoug Rabson 	u_long	com;
556df8bae1dSRodney W. Grimes 	caddr_t	data;
557df8bae1dSRodney W. Grimes };
558d2d3e875SBruce Evans #endif
559ad2edad9SMatthew Dillon /*
560ad2edad9SMatthew Dillon  * MPSAFE
561ad2edad9SMatthew Dillon  */
562df8bae1dSRodney W. Grimes /* ARGSUSED */
56326f9a767SRodney W. Grimes int
564b40ce416SJulian Elischer ioctl(td, uap)
565b40ce416SJulian Elischer 	struct thread *td;
566df8bae1dSRodney W. Grimes 	register struct ioctl_args *uap;
567df8bae1dSRodney W. Grimes {
568a4db4953SAlfred Perlstein 	struct file *fp;
569df8bae1dSRodney W. Grimes 	register struct filedesc *fdp;
570831b9ef2SDoug Rabson 	register u_long com;
571ad2edad9SMatthew Dillon 	int error = 0;
572df8bae1dSRodney W. Grimes 	register u_int size;
573df8bae1dSRodney W. Grimes 	caddr_t data, memp;
574df8bae1dSRodney W. Grimes 	int tmp;
575df8bae1dSRodney W. Grimes #define STK_PARAMS	128
576d2ba455cSMatthew Dillon 	union {
577df8bae1dSRodney W. Grimes 	    char stkbuf[STK_PARAMS];
578d2ba455cSMatthew Dillon 	    long align;
579d2ba455cSMatthew Dillon 	} ubuf;
580df8bae1dSRodney W. Grimes 
581a4db4953SAlfred Perlstein 	if ((error = fget(td, uap->fd, &fp)) != 0)
582a4db4953SAlfred Perlstein 		return (error);
583aa11a498SAlfred Perlstein 	mtx_lock(&Giant);
584ad2edad9SMatthew Dillon 	if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
585426da3bcSAlfred Perlstein 		fdrop(fp, td);
586aa11a498SAlfred Perlstein 		mtx_unlock(&Giant);
587426da3bcSAlfred Perlstein 		return (EBADF);
588ad2edad9SMatthew Dillon 	}
589426da3bcSAlfred Perlstein 	fdp = td->td_proc->p_fd;
590df8bae1dSRodney W. Grimes 	switch (com = uap->com) {
591df8bae1dSRodney W. Grimes 	case FIONCLEX:
592426da3bcSAlfred Perlstein 		FILEDESC_LOCK(fdp);
593df8bae1dSRodney W. Grimes 		fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
594426da3bcSAlfred Perlstein 		FILEDESC_UNLOCK(fdp);
595426da3bcSAlfred Perlstein 		fdrop(fp, td);
596aa11a498SAlfred Perlstein 		mtx_unlock(&Giant);
597426da3bcSAlfred Perlstein 		return (0);
598df8bae1dSRodney W. Grimes 	case FIOCLEX:
599426da3bcSAlfred Perlstein 		FILEDESC_LOCK(fdp);
600df8bae1dSRodney W. Grimes 		fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
601426da3bcSAlfred Perlstein 		FILEDESC_UNLOCK(fdp);
602426da3bcSAlfred Perlstein 		fdrop(fp, td);
603aa11a498SAlfred Perlstein 		mtx_unlock(&Giant);
604426da3bcSAlfred Perlstein 		return (0);
605df8bae1dSRodney W. Grimes 	}
606df8bae1dSRodney W. Grimes 
607df8bae1dSRodney W. Grimes 	/*
608df8bae1dSRodney W. Grimes 	 * Interpret high order word to find amount of data to be
609df8bae1dSRodney W. Grimes 	 * copied to/from the user's address space.
610df8bae1dSRodney W. Grimes 	 */
611df8bae1dSRodney W. Grimes 	size = IOCPARM_LEN(com);
612ad2edad9SMatthew Dillon 	if (size > IOCPARM_MAX) {
613426da3bcSAlfred Perlstein 		fdrop(fp, td);
614aa11a498SAlfred Perlstein 		mtx_unlock(&Giant);
615426da3bcSAlfred Perlstein 		return (ENOTTY);
616ad2edad9SMatthew Dillon 	}
617279d7226SMatthew Dillon 
618df8bae1dSRodney W. Grimes 	memp = NULL;
619d2ba455cSMatthew Dillon 	if (size > sizeof (ubuf.stkbuf)) {
620a163d034SWarner Losh 		memp = malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
621df8bae1dSRodney W. Grimes 		data = memp;
622279d7226SMatthew Dillon 	} else {
623d2ba455cSMatthew Dillon 		data = ubuf.stkbuf;
624279d7226SMatthew Dillon 	}
625df8bae1dSRodney W. Grimes 	if (com&IOC_IN) {
626df8bae1dSRodney W. Grimes 		if (size) {
627df8bae1dSRodney W. Grimes 			error = copyin(uap->data, data, (u_int)size);
628df8bae1dSRodney W. Grimes 			if (error) {
629df8bae1dSRodney W. Grimes 				if (memp)
630df8bae1dSRodney W. Grimes 					free(memp, M_IOCTLOPS);
631b40ce416SJulian Elischer 				fdrop(fp, td);
632426da3bcSAlfred Perlstein 				goto done;
633df8bae1dSRodney W. Grimes 			}
634279d7226SMatthew Dillon 		} else {
635df8bae1dSRodney W. Grimes 			*(caddr_t *)data = uap->data;
636279d7226SMatthew Dillon 		}
637279d7226SMatthew Dillon 	} else if ((com&IOC_OUT) && size) {
638df8bae1dSRodney W. Grimes 		/*
639df8bae1dSRodney W. Grimes 		 * Zero the buffer so the user always
640df8bae1dSRodney W. Grimes 		 * gets back something deterministic.
641df8bae1dSRodney W. Grimes 		 */
642df8bae1dSRodney W. Grimes 		bzero(data, size);
643279d7226SMatthew Dillon 	} else if (com&IOC_VOID) {
644df8bae1dSRodney W. Grimes 		*(caddr_t *)data = uap->data;
645279d7226SMatthew Dillon 	}
646df8bae1dSRodney W. Grimes 
647df8bae1dSRodney W. Grimes 	switch (com) {
648df8bae1dSRodney W. Grimes 
649df8bae1dSRodney W. Grimes 	case FIONBIO:
650426da3bcSAlfred Perlstein 		FILE_LOCK(fp);
651bb56ec4aSPoul-Henning Kamp 		if ((tmp = *(int *)data))
652df8bae1dSRodney W. Grimes 			fp->f_flag |= FNONBLOCK;
653df8bae1dSRodney W. Grimes 		else
654df8bae1dSRodney W. Grimes 			fp->f_flag &= ~FNONBLOCK;
655426da3bcSAlfred Perlstein 		FILE_UNLOCK(fp);
656d49fa1caSRobert Watson 		error = fo_ioctl(fp, FIONBIO, &tmp, td->td_ucred, td);
657df8bae1dSRodney W. Grimes 		break;
658df8bae1dSRodney W. Grimes 
659df8bae1dSRodney W. Grimes 	case FIOASYNC:
660426da3bcSAlfred Perlstein 		FILE_LOCK(fp);
661bb56ec4aSPoul-Henning Kamp 		if ((tmp = *(int *)data))
662df8bae1dSRodney W. Grimes 			fp->f_flag |= FASYNC;
663df8bae1dSRodney W. Grimes 		else
664df8bae1dSRodney W. Grimes 			fp->f_flag &= ~FASYNC;
665426da3bcSAlfred Perlstein 		FILE_UNLOCK(fp);
666d49fa1caSRobert Watson 		error = fo_ioctl(fp, FIOASYNC, &tmp, td->td_ucred, td);
667df8bae1dSRodney W. Grimes 		break;
668df8bae1dSRodney W. Grimes 
669df8bae1dSRodney W. Grimes 	default:
670d49fa1caSRobert Watson 		error = fo_ioctl(fp, com, data, td->td_ucred, td);
671df8bae1dSRodney W. Grimes 		/*
672df8bae1dSRodney W. Grimes 		 * Copy any data to user, size was
673df8bae1dSRodney W. Grimes 		 * already set and checked above.
674df8bae1dSRodney W. Grimes 		 */
675df8bae1dSRodney W. Grimes 		if (error == 0 && (com&IOC_OUT) && size)
676df8bae1dSRodney W. Grimes 			error = copyout(data, uap->data, (u_int)size);
677df8bae1dSRodney W. Grimes 		break;
678df8bae1dSRodney W. Grimes 	}
679df8bae1dSRodney W. Grimes 	if (memp)
680df8bae1dSRodney W. Grimes 		free(memp, M_IOCTLOPS);
681b40ce416SJulian Elischer 	fdrop(fp, td);
682426da3bcSAlfred Perlstein done:
683ad2edad9SMatthew Dillon 	mtx_unlock(&Giant);
684df8bae1dSRodney W. Grimes 	return (error);
685df8bae1dSRodney W. Grimes }
686df8bae1dSRodney W. Grimes 
68785f190e4SAlfred Perlstein /*
68885f190e4SAlfred Perlstein  * sellock and selwait are initialized in selectinit() via SYSINIT.
68985f190e4SAlfred Perlstein  */
69085f190e4SAlfred Perlstein struct mtx	sellock;
691265fc98fSSeigo Tanimura struct cv	selwait;
6929ae6d334SKelly Yancey u_int		nselcoll;	/* Select collisions since boot */
6939ae6d334SKelly Yancey SYSCTL_UINT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
694df8bae1dSRodney W. Grimes 
695df8bae1dSRodney W. Grimes /*
696df8bae1dSRodney W. Grimes  * Select system call.
697df8bae1dSRodney W. Grimes  */
698d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
699df8bae1dSRodney W. Grimes struct select_args {
700b08f7993SSujal Patel 	int	nd;
701df8bae1dSRodney W. Grimes 	fd_set	*in, *ou, *ex;
702df8bae1dSRodney W. Grimes 	struct	timeval *tv;
703df8bae1dSRodney W. Grimes };
704d2d3e875SBruce Evans #endif
705ad2edad9SMatthew Dillon /*
706ad2edad9SMatthew Dillon  * MPSAFE
707ad2edad9SMatthew Dillon  */
70826f9a767SRodney W. Grimes int
709b40ce416SJulian Elischer select(td, uap)
710b40ce416SJulian Elischer 	register struct thread *td;
711df8bae1dSRodney W. Grimes 	register struct select_args *uap;
712df8bae1dSRodney W. Grimes {
7138f19eb88SIan Dowse 	struct timeval tv, *tvp;
7148f19eb88SIan Dowse 	int error;
7158f19eb88SIan Dowse 
7168f19eb88SIan Dowse 	if (uap->tv != NULL) {
7178f19eb88SIan Dowse 		error = copyin(uap->tv, &tv, sizeof(tv));
7188f19eb88SIan Dowse 		if (error)
7198f19eb88SIan Dowse 			return (error);
7208f19eb88SIan Dowse 		tvp = &tv;
7218f19eb88SIan Dowse 	} else
7228f19eb88SIan Dowse 		tvp = NULL;
7238f19eb88SIan Dowse 
7248f19eb88SIan Dowse 	return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp));
7258f19eb88SIan Dowse }
7268f19eb88SIan Dowse 
7278f19eb88SIan Dowse int
7288f19eb88SIan Dowse kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
7298f19eb88SIan Dowse     fd_set *fd_ex, struct timeval *tvp)
7308f19eb88SIan Dowse {
731426da3bcSAlfred Perlstein 	struct filedesc *fdp;
732d5e4d7e1SBruce Evans 	/*
733d5e4d7e1SBruce Evans 	 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
734d5e4d7e1SBruce Evans 	 * infds with the new FD_SETSIZE of 1024, and more than enough for
735d5e4d7e1SBruce Evans 	 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
736d5e4d7e1SBruce Evans 	 * of 256.
737d5e4d7e1SBruce Evans 	 */
738d5e4d7e1SBruce Evans 	fd_mask s_selbits[howmany(2048, NFDBITS)];
739eb209311SAlfred Perlstein 	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
74000af9731SPoul-Henning Kamp 	struct timeval atv, rtv, ttv;
7419ae6d334SKelly Yancey 	int error, timo;
7429ae6d334SKelly Yancey 	u_int ncoll, nbufbytes, ncpbytes, nfdbits;
743df8bae1dSRodney W. Grimes 
7448f19eb88SIan Dowse 	if (nd < 0)
745acbfbfeaSSujal Patel 		return (EINVAL);
746426da3bcSAlfred Perlstein 	fdp = td->td_proc->p_fd;
747ad2edad9SMatthew Dillon 	mtx_lock(&Giant);
748426da3bcSAlfred Perlstein 	FILEDESC_LOCK(fdp);
749ad2edad9SMatthew Dillon 
7508f19eb88SIan Dowse 	if (nd > td->td_proc->p_fd->fd_nfiles)
7518f19eb88SIan Dowse 		nd = td->td_proc->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
752426da3bcSAlfred Perlstein 	FILEDESC_UNLOCK(fdp);
753b08f7993SSujal Patel 
754d5e4d7e1SBruce Evans 	/*
755d5e4d7e1SBruce Evans 	 * Allocate just enough bits for the non-null fd_sets.  Use the
756d5e4d7e1SBruce Evans 	 * preallocated auto buffer if possible.
757d5e4d7e1SBruce Evans 	 */
7588f19eb88SIan Dowse 	nfdbits = roundup(nd, NFDBITS);
759d5e4d7e1SBruce Evans 	ncpbytes = nfdbits / NBBY;
760d5e4d7e1SBruce Evans 	nbufbytes = 0;
7618f19eb88SIan Dowse 	if (fd_in != NULL)
762d5e4d7e1SBruce Evans 		nbufbytes += 2 * ncpbytes;
7638f19eb88SIan Dowse 	if (fd_ou != NULL)
764d5e4d7e1SBruce Evans 		nbufbytes += 2 * ncpbytes;
7658f19eb88SIan Dowse 	if (fd_ex != NULL)
766d5e4d7e1SBruce Evans 		nbufbytes += 2 * ncpbytes;
767d5e4d7e1SBruce Evans 	if (nbufbytes <= sizeof s_selbits)
768d5e4d7e1SBruce Evans 		selbits = &s_selbits[0];
769d5e4d7e1SBruce Evans 	else
770a163d034SWarner Losh 		selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
771b08f7993SSujal Patel 
772b08f7993SSujal Patel 	/*
773d5e4d7e1SBruce Evans 	 * Assign pointers into the bit buffers and fetch the input bits.
774d5e4d7e1SBruce Evans 	 * Put the output buffers together so that they can be bzeroed
775d5e4d7e1SBruce Evans 	 * together.
776b08f7993SSujal Patel 	 */
777d5e4d7e1SBruce Evans 	sbp = selbits;
778df8bae1dSRodney W. Grimes #define	getbits(name, x) \
779d5e4d7e1SBruce Evans 	do {								\
7808f19eb88SIan Dowse 		if (name == NULL)					\
781d5e4d7e1SBruce Evans 			ibits[x] = NULL;				\
782d5e4d7e1SBruce Evans 		else {							\
783d5e4d7e1SBruce Evans 			ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;	\
784d5e4d7e1SBruce Evans 			obits[x] = sbp;					\
785d5e4d7e1SBruce Evans 			sbp += ncpbytes / sizeof *sbp;			\
7868f19eb88SIan Dowse 			error = copyin(name, ibits[x], ncpbytes);	\
787265fc98fSSeigo Tanimura 			if (error != 0)					\
78885f190e4SAlfred Perlstein 				goto done_nosellock;			\
789e04ac2feSJohn Baldwin 		}							\
790d5e4d7e1SBruce Evans 	} while (0)
7918f19eb88SIan Dowse 	getbits(fd_in, 0);
7928f19eb88SIan Dowse 	getbits(fd_ou, 1);
7938f19eb88SIan Dowse 	getbits(fd_ex, 2);
794df8bae1dSRodney W. Grimes #undef	getbits
795d5e4d7e1SBruce Evans 	if (nbufbytes != 0)
796d5e4d7e1SBruce Evans 		bzero(selbits, nbufbytes / 2);
797df8bae1dSRodney W. Grimes 
7988f19eb88SIan Dowse 	if (tvp != NULL) {
7998f19eb88SIan Dowse 		atv = *tvp;
800df8bae1dSRodney W. Grimes 		if (itimerfix(&atv)) {
801df8bae1dSRodney W. Grimes 			error = EINVAL;
80285f190e4SAlfred Perlstein 			goto done_nosellock;
803df8bae1dSRodney W. Grimes 		}
804c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
80500af9731SPoul-Henning Kamp 		timevaladd(&atv, &rtv);
8069c386f6bSJohn Baldwin 	} else {
80700af9731SPoul-Henning Kamp 		atv.tv_sec = 0;
8089c386f6bSJohn Baldwin 		atv.tv_usec = 0;
8099c386f6bSJohn Baldwin 	}
81000af9731SPoul-Henning Kamp 	timo = 0;
8112149c527SPeter Wemm 	TAILQ_INIT(&td->td_selq);
81285f190e4SAlfred Perlstein 	mtx_lock(&sellock);
813df8bae1dSRodney W. Grimes retry:
814df8bae1dSRodney W. Grimes 	ncoll = nselcoll;
815fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
816b40ce416SJulian Elischer 	td->td_flags |= TDF_SELECT;
817fea2ab83SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
81885f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
81985f190e4SAlfred Perlstein 
8208f19eb88SIan Dowse 	error = selscan(td, ibits, obits, nd);
82185f190e4SAlfred Perlstein 	mtx_lock(&sellock);
822b40ce416SJulian Elischer 	if (error || td->td_retval[0])
823df8bae1dSRodney W. Grimes 		goto done;
8244da144c0SJohn Baldwin 	if (atv.tv_sec || atv.tv_usec) {
825c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
82685f190e4SAlfred Perlstein 		if (timevalcmp(&rtv, &atv, >=))
827df8bae1dSRodney W. Grimes 			goto done;
82800af9731SPoul-Henning Kamp 		ttv = atv;
82900af9731SPoul-Henning Kamp 		timevalsub(&ttv, &rtv);
83000af9731SPoul-Henning Kamp 		timo = ttv.tv_sec > 24 * 60 * 60 ?
83100af9731SPoul-Henning Kamp 		    24 * 60 * 60 * hz : tvtohz(&ttv);
832df8bae1dSRodney W. Grimes 	}
83385f190e4SAlfred Perlstein 
83485f190e4SAlfred Perlstein 	/*
83585f190e4SAlfred Perlstein 	 * An event of interest may occur while we do not hold
83685f190e4SAlfred Perlstein 	 * sellock, so check TDF_SELECT and the number of
83785f190e4SAlfred Perlstein 	 * collisions and rescan the file descriptors if
83885f190e4SAlfred Perlstein 	 * necessary.
83985f190e4SAlfred Perlstein 	 */
840fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
84185f190e4SAlfred Perlstein 	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
84285f190e4SAlfred Perlstein 		mtx_unlock_spin(&sched_lock);
84385f190e4SAlfred Perlstein 		goto retry;
84485f190e4SAlfred Perlstein 	}
845fea2ab83SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
846bfbbc4aaSJason Evans 
847265fc98fSSeigo Tanimura 	if (timo > 0)
84885f190e4SAlfred Perlstein 		error = cv_timedwait_sig(&selwait, &sellock, timo);
849265fc98fSSeigo Tanimura 	else
85085f190e4SAlfred Perlstein 		error = cv_wait_sig(&selwait, &sellock);
851bfbbc4aaSJason Evans 
852df8bae1dSRodney W. Grimes 	if (error == 0)
853df8bae1dSRodney W. Grimes 		goto retry;
854265fc98fSSeigo Tanimura 
855df8bae1dSRodney W. Grimes done:
85685f190e4SAlfred Perlstein 	clear_selinfo_list(td);
857fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
858b40ce416SJulian Elischer 	td->td_flags &= ~TDF_SELECT;
859fea2ab83SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
86085f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
86185f190e4SAlfred Perlstein 
86285f190e4SAlfred Perlstein done_nosellock:
863df8bae1dSRodney W. Grimes 	/* select is not restarted after signals... */
864df8bae1dSRodney W. Grimes 	if (error == ERESTART)
865df8bae1dSRodney W. Grimes 		error = EINTR;
866df8bae1dSRodney W. Grimes 	if (error == EWOULDBLOCK)
867df8bae1dSRodney W. Grimes 		error = 0;
868df8bae1dSRodney W. Grimes #define	putbits(name, x) \
8698f19eb88SIan Dowse 	if (name && (error2 = copyout(obits[x], name, ncpbytes))) \
870df8bae1dSRodney W. Grimes 		error = error2;
871df8bae1dSRodney W. Grimes 	if (error == 0) {
872df8bae1dSRodney W. Grimes 		int error2;
873df8bae1dSRodney W. Grimes 
8748f19eb88SIan Dowse 		putbits(fd_in, 0);
8758f19eb88SIan Dowse 		putbits(fd_ou, 1);
8768f19eb88SIan Dowse 		putbits(fd_ex, 2);
877df8bae1dSRodney W. Grimes #undef putbits
878df8bae1dSRodney W. Grimes 	}
879d5e4d7e1SBruce Evans 	if (selbits != &s_selbits[0])
880d5e4d7e1SBruce Evans 		free(selbits, M_SELECT);
881ad2edad9SMatthew Dillon 
882ad2edad9SMatthew Dillon 	mtx_unlock(&Giant);
883df8bae1dSRodney W. Grimes 	return (error);
884df8bae1dSRodney W. Grimes }
885df8bae1dSRodney W. Grimes 
886265fc98fSSeigo Tanimura static int
887b40ce416SJulian Elischer selscan(td, ibits, obits, nfd)
888b40ce416SJulian Elischer 	struct thread *td;
889b08f7993SSujal Patel 	fd_mask **ibits, **obits;
890cb226aaaSPoul-Henning Kamp 	int nfd;
891df8bae1dSRodney W. Grimes {
892f082218cSPeter Wemm 	int msk, i, fd;
893f082218cSPeter Wemm 	fd_mask bits;
894df8bae1dSRodney W. Grimes 	struct file *fp;
895df8bae1dSRodney W. Grimes 	int n = 0;
8962087c896SBruce Evans 	/* Note: backend also returns POLLHUP/POLLERR if appropriate. */
89742d11757SPeter Wemm 	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
898eb209311SAlfred Perlstein 	struct filedesc *fdp = td->td_proc->p_fd;
899df8bae1dSRodney W. Grimes 
900eb209311SAlfred Perlstein 	FILEDESC_LOCK(fdp);
901df8bae1dSRodney W. Grimes 	for (msk = 0; msk < 3; msk++) {
902d5e4d7e1SBruce Evans 		if (ibits[msk] == NULL)
903d5e4d7e1SBruce Evans 			continue;
904df8bae1dSRodney W. Grimes 		for (i = 0; i < nfd; i += NFDBITS) {
905b08f7993SSujal Patel 			bits = ibits[msk][i/NFDBITS];
906f082218cSPeter Wemm 			/* ffs(int mask) not portable, fd_mask is long */
907f082218cSPeter Wemm 			for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
908f082218cSPeter Wemm 				if (!(bits & 1))
909f082218cSPeter Wemm 					continue;
910eb209311SAlfred Perlstein 				if ((fp = fget_locked(fdp, fd)) == NULL) {
911eb209311SAlfred Perlstein 					FILEDESC_UNLOCK(fdp);
912df8bae1dSRodney W. Grimes 					return (EBADF);
913eb209311SAlfred Perlstein 				}
914ea6027a8SRobert Watson 				if (fo_poll(fp, flag[msk], td->td_ucred,
915ea6027a8SRobert Watson 				    td)) {
916b08f7993SSujal Patel 					obits[msk][(fd)/NFDBITS] |=
917f082218cSPeter Wemm 					    ((fd_mask)1 << ((fd) % NFDBITS));
918df8bae1dSRodney W. Grimes 					n++;
919df8bae1dSRodney W. Grimes 				}
920df8bae1dSRodney W. Grimes 			}
921df8bae1dSRodney W. Grimes 		}
922df8bae1dSRodney W. Grimes 	}
923eb209311SAlfred Perlstein 	FILEDESC_UNLOCK(fdp);
924b40ce416SJulian Elischer 	td->td_retval[0] = n;
925df8bae1dSRodney W. Grimes 	return (0);
926df8bae1dSRodney W. Grimes }
927df8bae1dSRodney W. Grimes 
92842d11757SPeter Wemm /*
92942d11757SPeter Wemm  * Poll system call.
93042d11757SPeter Wemm  */
93142d11757SPeter Wemm #ifndef _SYS_SYSPROTO_H_
93242d11757SPeter Wemm struct poll_args {
93342d11757SPeter Wemm 	struct pollfd *fds;
93442d11757SPeter Wemm 	u_int	nfds;
93542d11757SPeter Wemm 	int	timeout;
93642d11757SPeter Wemm };
93742d11757SPeter Wemm #endif
938ad2edad9SMatthew Dillon /*
939ad2edad9SMatthew Dillon  * MPSAFE
940ad2edad9SMatthew Dillon  */
94142d11757SPeter Wemm int
942b40ce416SJulian Elischer poll(td, uap)
943b40ce416SJulian Elischer 	struct thread *td;
944ea0237edSJonathan Lemon 	struct poll_args *uap;
94542d11757SPeter Wemm {
94642d11757SPeter Wemm 	caddr_t bits;
94742d11757SPeter Wemm 	char smallbits[32 * sizeof(struct pollfd)];
94800af9731SPoul-Henning Kamp 	struct timeval atv, rtv, ttv;
9499ae6d334SKelly Yancey 	int error = 0, timo;
9509ae6d334SKelly Yancey 	u_int ncoll, nfds;
95142d11757SPeter Wemm 	size_t ni;
95242d11757SPeter Wemm 
953d1e405c5SAlfred Perlstein 	nfds = uap->nfds;
954ad2edad9SMatthew Dillon 
955ad2edad9SMatthew Dillon 	mtx_lock(&Giant);
95689b71647SPeter Wemm 	/*
9572bd5ac33SPeter Wemm 	 * This is kinda bogus.  We have fd limits, but that is not
9582bd5ac33SPeter Wemm 	 * really related to the size of the pollfd array.  Make sure
9592bd5ac33SPeter Wemm 	 * we let the process use at least FD_SETSIZE entries and at
9602bd5ac33SPeter Wemm 	 * least enough for the current limits.  We want to be reasonably
9612bd5ac33SPeter Wemm 	 * safe, but not overly restrictive.
96289b71647SPeter Wemm 	 */
963b40ce416SJulian Elischer 	if ((nfds > td->td_proc->p_rlimit[RLIMIT_NOFILE].rlim_cur) &&
964b40ce416SJulian Elischer 	    (nfds > FD_SETSIZE)) {
965ad2edad9SMatthew Dillon 		error = EINVAL;
966ad2edad9SMatthew Dillon 		goto done2;
967ad2edad9SMatthew Dillon 	}
96889b71647SPeter Wemm 	ni = nfds * sizeof(struct pollfd);
96942d11757SPeter Wemm 	if (ni > sizeof(smallbits))
970a163d034SWarner Losh 		bits = malloc(ni, M_TEMP, M_WAITOK);
97142d11757SPeter Wemm 	else
97242d11757SPeter Wemm 		bits = smallbits;
973d1e405c5SAlfred Perlstein 	error = copyin(uap->fds, bits, ni);
97442d11757SPeter Wemm 	if (error)
97585f190e4SAlfred Perlstein 		goto done_nosellock;
976d1e405c5SAlfred Perlstein 	if (uap->timeout != INFTIM) {
977d1e405c5SAlfred Perlstein 		atv.tv_sec = uap->timeout / 1000;
978d1e405c5SAlfred Perlstein 		atv.tv_usec = (uap->timeout % 1000) * 1000;
97942d11757SPeter Wemm 		if (itimerfix(&atv)) {
98042d11757SPeter Wemm 			error = EINVAL;
98185f190e4SAlfred Perlstein 			goto done_nosellock;
98242d11757SPeter Wemm 		}
983c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
98400af9731SPoul-Henning Kamp 		timevaladd(&atv, &rtv);
9859c386f6bSJohn Baldwin 	} else {
98600af9731SPoul-Henning Kamp 		atv.tv_sec = 0;
9879c386f6bSJohn Baldwin 		atv.tv_usec = 0;
9889c386f6bSJohn Baldwin 	}
98900af9731SPoul-Henning Kamp 	timo = 0;
9902149c527SPeter Wemm 	TAILQ_INIT(&td->td_selq);
99185f190e4SAlfred Perlstein 	mtx_lock(&sellock);
99242d11757SPeter Wemm retry:
99342d11757SPeter Wemm 	ncoll = nselcoll;
994fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
995b40ce416SJulian Elischer 	td->td_flags |= TDF_SELECT;
996fea2ab83SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
99785f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
99885f190e4SAlfred Perlstein 
999b40ce416SJulian Elischer 	error = pollscan(td, (struct pollfd *)bits, nfds);
100085f190e4SAlfred Perlstein 	mtx_lock(&sellock);
1001b40ce416SJulian Elischer 	if (error || td->td_retval[0])
100242d11757SPeter Wemm 		goto done;
10034da144c0SJohn Baldwin 	if (atv.tv_sec || atv.tv_usec) {
1004c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
100585f190e4SAlfred Perlstein 		if (timevalcmp(&rtv, &atv, >=))
100642d11757SPeter Wemm 			goto done;
100700af9731SPoul-Henning Kamp 		ttv = atv;
100800af9731SPoul-Henning Kamp 		timevalsub(&ttv, &rtv);
100900af9731SPoul-Henning Kamp 		timo = ttv.tv_sec > 24 * 60 * 60 ?
101000af9731SPoul-Henning Kamp 		    24 * 60 * 60 * hz : tvtohz(&ttv);
101142d11757SPeter Wemm 	}
101285f190e4SAlfred Perlstein 	/*
101385f190e4SAlfred Perlstein 	 * An event of interest may occur while we do not hold
101485f190e4SAlfred Perlstein 	 * sellock, so check TDF_SELECT and the number of collisions
101585f190e4SAlfred Perlstein 	 * and rescan the file descriptors if necessary.
101685f190e4SAlfred Perlstein 	 */
1017fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
101885f190e4SAlfred Perlstein 	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
1019fea2ab83SJohn Baldwin 		mtx_unlock_spin(&sched_lock);
102085f190e4SAlfred Perlstein 		goto retry;
102185f190e4SAlfred Perlstein 	}
102285f190e4SAlfred Perlstein 	mtx_unlock_spin(&sched_lock);
102385f190e4SAlfred Perlstein 
1024265fc98fSSeigo Tanimura 	if (timo > 0)
102585f190e4SAlfred Perlstein 		error = cv_timedwait_sig(&selwait, &sellock, timo);
1026265fc98fSSeigo Tanimura 	else
102785f190e4SAlfred Perlstein 		error = cv_wait_sig(&selwait, &sellock);
102885f190e4SAlfred Perlstein 
102942d11757SPeter Wemm 	if (error == 0)
103042d11757SPeter Wemm 		goto retry;
1031265fc98fSSeigo Tanimura 
103242d11757SPeter Wemm done:
103385f190e4SAlfred Perlstein 	clear_selinfo_list(td);
1034fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
1035b40ce416SJulian Elischer 	td->td_flags &= ~TDF_SELECT;
1036fea2ab83SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
103785f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
103885f190e4SAlfred Perlstein 
103985f190e4SAlfred Perlstein done_nosellock:
104042d11757SPeter Wemm 	/* poll is not restarted after signals... */
104142d11757SPeter Wemm 	if (error == ERESTART)
104242d11757SPeter Wemm 		error = EINTR;
104342d11757SPeter Wemm 	if (error == EWOULDBLOCK)
104442d11757SPeter Wemm 		error = 0;
104542d11757SPeter Wemm 	if (error == 0) {
1046d1e405c5SAlfred Perlstein 		error = copyout(bits, uap->fds, ni);
104742d11757SPeter Wemm 		if (error)
104842d11757SPeter Wemm 			goto out;
104942d11757SPeter Wemm 	}
105042d11757SPeter Wemm out:
105142d11757SPeter Wemm 	if (ni > sizeof(smallbits))
105242d11757SPeter Wemm 		free(bits, M_TEMP);
1053ad2edad9SMatthew Dillon done2:
1054ad2edad9SMatthew Dillon 	mtx_unlock(&Giant);
105542d11757SPeter Wemm 	return (error);
105642d11757SPeter Wemm }
105742d11757SPeter Wemm 
105842d11757SPeter Wemm static int
1059b40ce416SJulian Elischer pollscan(td, fds, nfd)
1060b40ce416SJulian Elischer 	struct thread *td;
106142d11757SPeter Wemm 	struct pollfd *fds;
1062ea0237edSJonathan Lemon 	u_int nfd;
106342d11757SPeter Wemm {
1064b40ce416SJulian Elischer 	register struct filedesc *fdp = td->td_proc->p_fd;
106542d11757SPeter Wemm 	int i;
106642d11757SPeter Wemm 	struct file *fp;
106742d11757SPeter Wemm 	int n = 0;
106842d11757SPeter Wemm 
1069426da3bcSAlfred Perlstein 	FILEDESC_LOCK(fdp);
1070eb209311SAlfred Perlstein 	for (i = 0; i < nfd; i++, fds++) {
1071337c9691SJordan K. Hubbard 		if (fds->fd >= fdp->fd_nfiles) {
107242d11757SPeter Wemm 			fds->revents = POLLNVAL;
107342d11757SPeter Wemm 			n++;
1074337c9691SJordan K. Hubbard 		} else if (fds->fd < 0) {
1075337c9691SJordan K. Hubbard 			fds->revents = 0;
107642d11757SPeter Wemm 		} else {
107742d11757SPeter Wemm 			fp = fdp->fd_ofiles[fds->fd];
1078279d7226SMatthew Dillon 			if (fp == NULL) {
107942d11757SPeter Wemm 				fds->revents = POLLNVAL;
108042d11757SPeter Wemm 				n++;
108142d11757SPeter Wemm 			} else {
10822087c896SBruce Evans 				/*
10832087c896SBruce Evans 				 * Note: backend also returns POLLHUP and
10842087c896SBruce Evans 				 * POLLERR if appropriate.
10852087c896SBruce Evans 				 */
108613ccadd4SBrian Feldman 				fds->revents = fo_poll(fp, fds->events,
1087ea6027a8SRobert Watson 				    td->td_ucred, td);
108842d11757SPeter Wemm 				if (fds->revents != 0)
108942d11757SPeter Wemm 					n++;
109042d11757SPeter Wemm 			}
109142d11757SPeter Wemm 		}
109242d11757SPeter Wemm 	}
1093eb209311SAlfred Perlstein 	FILEDESC_UNLOCK(fdp);
1094b40ce416SJulian Elischer 	td->td_retval[0] = n;
109542d11757SPeter Wemm 	return (0);
109642d11757SPeter Wemm }
109742d11757SPeter Wemm 
109842d11757SPeter Wemm /*
109942d11757SPeter Wemm  * OpenBSD poll system call.
110042d11757SPeter Wemm  * XXX this isn't quite a true representation..  OpenBSD uses select ops.
110142d11757SPeter Wemm  */
110242d11757SPeter Wemm #ifndef _SYS_SYSPROTO_H_
110342d11757SPeter Wemm struct openbsd_poll_args {
110442d11757SPeter Wemm 	struct pollfd *fds;
110542d11757SPeter Wemm 	u_int	nfds;
110642d11757SPeter Wemm 	int	timeout;
110742d11757SPeter Wemm };
110842d11757SPeter Wemm #endif
1109ad2edad9SMatthew Dillon /*
1110ad2edad9SMatthew Dillon  * MPSAFE
1111ad2edad9SMatthew Dillon  */
111242d11757SPeter Wemm int
1113b40ce416SJulian Elischer openbsd_poll(td, uap)
1114b40ce416SJulian Elischer 	register struct thread *td;
111542d11757SPeter Wemm 	register struct openbsd_poll_args *uap;
111642d11757SPeter Wemm {
1117b40ce416SJulian Elischer 	return (poll(td, (struct poll_args *)uap));
111842d11757SPeter Wemm }
111942d11757SPeter Wemm 
112085f190e4SAlfred Perlstein /*
112185f190e4SAlfred Perlstein  * Remove the references to the thread from all of the objects
112285f190e4SAlfred Perlstein  * we were polling.
112385f190e4SAlfred Perlstein  *
112485f190e4SAlfred Perlstein  * This code assumes that the underlying owner of the selinfo
112585f190e4SAlfred Perlstein  * structure will hold sellock before it changes it, and that
112685f190e4SAlfred Perlstein  * it will unlink itself from our list if it goes away.
112785f190e4SAlfred Perlstein  */
112885f190e4SAlfred Perlstein void
112985f190e4SAlfred Perlstein clear_selinfo_list(td)
113085f190e4SAlfred Perlstein 	struct thread *td;
113185f190e4SAlfred Perlstein {
113285f190e4SAlfred Perlstein 	struct selinfo *si;
113385f190e4SAlfred Perlstein 
113485f190e4SAlfred Perlstein 	mtx_assert(&sellock, MA_OWNED);
113585f190e4SAlfred Perlstein 	TAILQ_FOREACH(si, &td->td_selq, si_thrlist)
113685f190e4SAlfred Perlstein 		si->si_thread = NULL;
113785f190e4SAlfred Perlstein 	TAILQ_INIT(&td->td_selq);
113885f190e4SAlfred Perlstein }
113985f190e4SAlfred Perlstein 
1140df8bae1dSRodney W. Grimes /*ARGSUSED*/
114126f9a767SRodney W. Grimes int
1142b40ce416SJulian Elischer seltrue(dev, events, td)
1143df8bae1dSRodney W. Grimes 	dev_t dev;
114442d11757SPeter Wemm 	int events;
1145b40ce416SJulian Elischer 	struct thread *td;
1146df8bae1dSRodney W. Grimes {
1147df8bae1dSRodney W. Grimes 
114842d11757SPeter Wemm 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1149df8bae1dSRodney W. Grimes }
1150df8bae1dSRodney W. Grimes 
1151df8bae1dSRodney W. Grimes /*
1152df8bae1dSRodney W. Grimes  * Record a select request.
1153df8bae1dSRodney W. Grimes  */
1154df8bae1dSRodney W. Grimes void
1155df8bae1dSRodney W. Grimes selrecord(selector, sip)
1156b40ce416SJulian Elischer 	struct thread *selector;
1157df8bae1dSRodney W. Grimes 	struct selinfo *sip;
1158df8bae1dSRodney W. Grimes {
1159df8bae1dSRodney W. Grimes 
116085f190e4SAlfred Perlstein 	mtx_lock(&sellock);
116185f190e4SAlfred Perlstein 	/*
1162b605b54cSAlfred Perlstein 	 * If the selinfo's thread pointer is NULL then take ownership of it.
1163b605b54cSAlfred Perlstein 	 *
1164b605b54cSAlfred Perlstein 	 * If the thread pointer is not NULL and it points to another
1165b605b54cSAlfred Perlstein 	 * thread, then we have a collision.
1166b605b54cSAlfred Perlstein 	 *
1167b605b54cSAlfred Perlstein 	 * If the thread pointer is not NULL and points back to us then leave
1168b605b54cSAlfred Perlstein 	 * it alone as we've already added pointed it at us and added it to
1169b605b54cSAlfred Perlstein 	 * our list.
117085f190e4SAlfred Perlstein 	 */
117185f190e4SAlfred Perlstein 	if (sip->si_thread == NULL) {
1172b40ce416SJulian Elischer 		sip->si_thread = selector;
117385f190e4SAlfred Perlstein 		TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist);
117485f190e4SAlfred Perlstein 	} else if (sip->si_thread != selector) {
117585f190e4SAlfred Perlstein 		sip->si_flags |= SI_COLL;
117685f190e4SAlfred Perlstein 	}
117785f190e4SAlfred Perlstein 
117885f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
1179df8bae1dSRodney W. Grimes }
1180df8bae1dSRodney W. Grimes 
1181df8bae1dSRodney W. Grimes /*
1182df8bae1dSRodney W. Grimes  * Do a wakeup when a selectable event occurs.
1183df8bae1dSRodney W. Grimes  */
1184df8bae1dSRodney W. Grimes void
1185df8bae1dSRodney W. Grimes selwakeup(sip)
118685f190e4SAlfred Perlstein 	struct selinfo *sip;
1187df8bae1dSRodney W. Grimes {
1188b40ce416SJulian Elischer 	struct thread *td;
1189df8bae1dSRodney W. Grimes 
119085f190e4SAlfred Perlstein 	mtx_lock(&sellock);
119185f190e4SAlfred Perlstein 	td = sip->si_thread;
119285f190e4SAlfred Perlstein 	if ((sip->si_flags & SI_COLL) != 0) {
1193df8bae1dSRodney W. Grimes 		nselcoll++;
1194df8bae1dSRodney W. Grimes 		sip->si_flags &= ~SI_COLL;
1195265fc98fSSeigo Tanimura 		cv_broadcast(&selwait);
1196df8bae1dSRodney W. Grimes 	}
119785f190e4SAlfred Perlstein 	if (td == NULL) {
119885f190e4SAlfred Perlstein 		mtx_unlock(&sellock);
1199b40ce416SJulian Elischer 		return;
1200b40ce416SJulian Elischer 	}
120185f190e4SAlfred Perlstein 	TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
120285f190e4SAlfred Perlstein 	sip->si_thread = NULL;
12039ed346baSBosko Milekic 	mtx_lock_spin(&sched_lock);
12040a3e28cfSAlfred Perlstein 	if (td->td_wchan == &selwait) {
1205b40ce416SJulian Elischer 		cv_waitq_remove(td);
120671fad9fdSJulian Elischer 		TD_CLR_SLEEPING(td);
120771fad9fdSJulian Elischer 		setrunnable(td);
120833a9ed9dSJohn Baldwin 	} else
1209b40ce416SJulian Elischer 		td->td_flags &= ~TDF_SELECT;
121033a9ed9dSJohn Baldwin 	mtx_unlock_spin(&sched_lock);
121185f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
1212df8bae1dSRodney W. Grimes }
1213265fc98fSSeigo Tanimura 
12144d77a549SAlfred Perlstein static void selectinit(void *);
1215265fc98fSSeigo Tanimura SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL)
1216265fc98fSSeigo Tanimura 
1217265fc98fSSeigo Tanimura /* ARGSUSED*/
1218265fc98fSSeigo Tanimura static void
1219265fc98fSSeigo Tanimura selectinit(dummy)
1220265fc98fSSeigo Tanimura 	void *dummy;
1221265fc98fSSeigo Tanimura {
1222265fc98fSSeigo Tanimura 	cv_init(&selwait, "select");
12236008862bSJohn Baldwin 	mtx_init(&sellock, "sellck", NULL, MTX_DEF);
1224265fc98fSSeigo Tanimura }
1225