xref: /freebsd/sys/kern/sys_generic.c (revision 2de92a386e6c6607bc212cdd576e03675d79000d)
19454b2d8SWarner Losh /*-
2df8bae1dSRodney W. Grimes  * Copyright (c) 1982, 1986, 1989, 1993
3df8bae1dSRodney W. Grimes  *	The Regents of the University of California.  All rights reserved.
4df8bae1dSRodney W. Grimes  * (c) UNIX System Laboratories, Inc.
5df8bae1dSRodney W. Grimes  * All or some portions of this file are derived from material licensed
6df8bae1dSRodney W. Grimes  * to the University of California by American Telephone and Telegraph
7df8bae1dSRodney W. Grimes  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8df8bae1dSRodney W. Grimes  * the permission of UNIX System Laboratories, Inc.
9df8bae1dSRodney W. Grimes  *
10df8bae1dSRodney W. Grimes  * Redistribution and use in source and binary forms, with or without
11df8bae1dSRodney W. Grimes  * modification, are permitted provided that the following conditions
12df8bae1dSRodney W. Grimes  * are met:
13df8bae1dSRodney W. Grimes  * 1. Redistributions of source code must retain the above copyright
14df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer.
15df8bae1dSRodney W. Grimes  * 2. Redistributions in binary form must reproduce the above copyright
16df8bae1dSRodney W. Grimes  *    notice, this list of conditions and the following disclaimer in the
17df8bae1dSRodney W. Grimes  *    documentation and/or other materials provided with the distribution.
18df8bae1dSRodney W. Grimes  * 4. Neither the name of the University nor the names of its contributors
19df8bae1dSRodney W. Grimes  *    may be used to endorse or promote products derived from this software
20df8bae1dSRodney W. Grimes  *    without specific prior written permission.
21df8bae1dSRodney W. Grimes  *
22df8bae1dSRodney W. Grimes  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23df8bae1dSRodney W. Grimes  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24df8bae1dSRodney W. Grimes  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25df8bae1dSRodney W. Grimes  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26df8bae1dSRodney W. Grimes  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27df8bae1dSRodney W. Grimes  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28df8bae1dSRodney W. Grimes  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29df8bae1dSRodney W. Grimes  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30df8bae1dSRodney W. Grimes  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31df8bae1dSRodney W. Grimes  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32df8bae1dSRodney W. Grimes  * SUCH DAMAGE.
33df8bae1dSRodney W. Grimes  *
34df8bae1dSRodney W. Grimes  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
35df8bae1dSRodney W. Grimes  */
36df8bae1dSRodney W. Grimes 
37677b542eSDavid E. O'Brien #include <sys/cdefs.h>
38677b542eSDavid E. O'Brien __FBSDID("$FreeBSD$");
39677b542eSDavid E. O'Brien 
402de92a38SPeter Wemm #include "opt_compat.h"
41db6a20e2SGarrett Wollman #include "opt_ktrace.h"
42db6a20e2SGarrett Wollman 
43df8bae1dSRodney W. Grimes #include <sys/param.h>
44df8bae1dSRodney W. Grimes #include <sys/systm.h>
45d2d3e875SBruce Evans #include <sys/sysproto.h>
46df8bae1dSRodney W. Grimes #include <sys/filedesc.h>
4720982410SBruce Evans #include <sys/filio.h>
483ac4d1efSBruce Evans #include <sys/fcntl.h>
49df8bae1dSRodney W. Grimes #include <sys/file.h>
50df8bae1dSRodney W. Grimes #include <sys/proc.h>
51797f2d22SPoul-Henning Kamp #include <sys/signalvar.h>
52df8bae1dSRodney W. Grimes #include <sys/socketvar.h>
53df8bae1dSRodney W. Grimes #include <sys/uio.h>
54df8bae1dSRodney W. Grimes #include <sys/kernel.h>
55104a9b7eSAlexander Kabaev #include <sys/limits.h>
56df8bae1dSRodney W. Grimes #include <sys/malloc.h>
5742d11757SPeter Wemm #include <sys/poll.h>
5889b71647SPeter Wemm #include <sys/resourcevar.h>
590a2c3d48SGarrett Wollman #include <sys/selinfo.h>
6044f3b092SJohn Baldwin #include <sys/sleepqueue.h>
618f19eb88SIan Dowse #include <sys/syscallsubr.h>
628cb96f20SPeter Wemm #include <sys/sysctl.h>
6342d11757SPeter Wemm #include <sys/sysent.h>
649bbee259SAndrey A. Chernov #include <sys/vnode.h>
65279d7226SMatthew Dillon #include <sys/bio.h>
66279d7226SMatthew Dillon #include <sys/buf.h>
67265fc98fSSeigo Tanimura #include <sys/condvar.h>
68df8bae1dSRodney W. Grimes #ifdef KTRACE
69df8bae1dSRodney W. Grimes #include <sys/ktrace.h>
70df8bae1dSRodney W. Grimes #endif
71279d7226SMatthew Dillon #include <vm/vm.h>
72279d7226SMatthew Dillon #include <vm/vm_page.h>
73df8bae1dSRodney W. Grimes 
74a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
75a1c995b6SPoul-Henning Kamp static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
76a1c995b6SPoul-Henning Kamp MALLOC_DEFINE(M_IOV, "iov", "large iov's");
7755166637SPoul-Henning Kamp 
78bbbb04ceSAlfred Perlstein static int	pollscan(struct thread *, struct pollfd *, u_int);
79bbbb04ceSAlfred Perlstein static int	selscan(struct thread *, fd_mask **, fd_mask **, int);
80bbbb04ceSAlfred Perlstein static int	dofileread(struct thread *, struct file *, int, void *,
81bbbb04ceSAlfred Perlstein 		    size_t, off_t, int);
82bbbb04ceSAlfred Perlstein static int	dofilewrite(struct thread *, struct file *, int,
83bbbb04ceSAlfred Perlstein 		    const void *, size_t, off_t, int);
84512824f8SSeigo Tanimura static void	doselwakeup(struct selinfo *, int);
858fe387abSDmitrij Tejblum 
86df8bae1dSRodney W. Grimes /*
87df8bae1dSRodney W. Grimes  * Read system call.
88df8bae1dSRodney W. Grimes  */
89d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
90df8bae1dSRodney W. Grimes struct read_args {
91df8bae1dSRodney W. Grimes 	int	fd;
92134e06feSBruce Evans 	void	*buf;
93134e06feSBruce Evans 	size_t	nbyte;
94df8bae1dSRodney W. Grimes };
95d2d3e875SBruce Evans #endif
96ad2edad9SMatthew Dillon /*
97ad2edad9SMatthew Dillon  * MPSAFE
98ad2edad9SMatthew Dillon  */
9926f9a767SRodney W. Grimes int
100b40ce416SJulian Elischer read(td, uap)
101b40ce416SJulian Elischer 	struct thread *td;
102b064d43dSMatthew Dillon 	struct read_args *uap;
103df8bae1dSRodney W. Grimes {
104b064d43dSMatthew Dillon 	struct file *fp;
105279d7226SMatthew Dillon 	int error;
106df8bae1dSRodney W. Grimes 
107b064d43dSMatthew Dillon 	if ((error = fget_read(td, uap->fd, &fp)) == 0) {
108b40ce416SJulian Elischer 		error = dofileread(td, fp, uap->fd, uap->buf,
109ad2edad9SMatthew Dillon 			    uap->nbyte, (off_t)-1, 0);
110b40ce416SJulian Elischer 		fdrop(fp, td);
111ad2edad9SMatthew Dillon 	}
112279d7226SMatthew Dillon 	return(error);
113df8bae1dSRodney W. Grimes }
114df8bae1dSRodney W. Grimes 
115df8bae1dSRodney W. Grimes /*
1168fe387abSDmitrij Tejblum  * Pread system call
1174160ccd9SAlan Cox  */
1184160ccd9SAlan Cox #ifndef _SYS_SYSPROTO_H_
1194160ccd9SAlan Cox struct pread_args {
1204160ccd9SAlan Cox 	int	fd;
1214160ccd9SAlan Cox 	void	*buf;
1224160ccd9SAlan Cox 	size_t	nbyte;
1238fe387abSDmitrij Tejblum 	int	pad;
1244160ccd9SAlan Cox 	off_t	offset;
1254160ccd9SAlan Cox };
1264160ccd9SAlan Cox #endif
127ad2edad9SMatthew Dillon /*
128ad2edad9SMatthew Dillon  * MPSAFE
129ad2edad9SMatthew Dillon  */
1304160ccd9SAlan Cox int
131b40ce416SJulian Elischer pread(td, uap)
132b40ce416SJulian Elischer 	struct thread *td;
133b064d43dSMatthew Dillon 	struct pread_args *uap;
1344160ccd9SAlan Cox {
135b064d43dSMatthew Dillon 	struct file *fp;
136279d7226SMatthew Dillon 	int error;
1378fe387abSDmitrij Tejblum 
13897fa4397SAlfred Perlstein 	if ((error = fget_read(td, uap->fd, &fp)) != 0)
13997fa4397SAlfred Perlstein 		return (error);
1409bbee259SAndrey A. Chernov 	if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
141b064d43dSMatthew Dillon 		error = ESPIPE;
1429bbee259SAndrey A. Chernov 	else if (uap->offset < 0 && fp->f_vnode->v_type != VCHR)
1439bbee259SAndrey A. Chernov 		error = EINVAL;
1449bbee259SAndrey A. Chernov 	else {
145426da3bcSAlfred Perlstein 		error = dofileread(td, fp, uap->fd, uap->buf, uap->nbyte,
146426da3bcSAlfred Perlstein 			    uap->offset, FOF_OFFSET);
147b064d43dSMatthew Dillon 	}
148b40ce416SJulian Elischer 	fdrop(fp, td);
149279d7226SMatthew Dillon 	return(error);
1508fe387abSDmitrij Tejblum }
1518fe387abSDmitrij Tejblum 
1528fe387abSDmitrij Tejblum /*
1538fe387abSDmitrij Tejblum  * Code common for read and pread
1548fe387abSDmitrij Tejblum  */
15537c84183SPoul-Henning Kamp static int
156b40ce416SJulian Elischer dofileread(td, fp, fd, buf, nbyte, offset, flags)
157b40ce416SJulian Elischer 	struct thread *td;
1588fe387abSDmitrij Tejblum 	struct file *fp;
1598fe387abSDmitrij Tejblum 	int fd, flags;
1608fe387abSDmitrij Tejblum 	void *buf;
1618fe387abSDmitrij Tejblum 	size_t nbyte;
1628fe387abSDmitrij Tejblum 	off_t offset;
1638fe387abSDmitrij Tejblum {
1644160ccd9SAlan Cox 	struct uio auio;
1654160ccd9SAlan Cox 	struct iovec aiov;
166e5e6a464SColin Percival 	ssize_t cnt;
167e5e6a464SColin Percival 	long error = 0;
1684160ccd9SAlan Cox #ifdef KTRACE
169552afd9cSPoul-Henning Kamp 	struct uio *ktruio = NULL;
1704160ccd9SAlan Cox #endif
1714160ccd9SAlan Cox 
1724f8d23d6SPoul-Henning Kamp 	/* Finish zero length reads right here */
1734f8d23d6SPoul-Henning Kamp 	if (nbyte == 0) {
1744f8d23d6SPoul-Henning Kamp 		td->td_retval[0] = 0;
1754f8d23d6SPoul-Henning Kamp 		return(0);
1764f8d23d6SPoul-Henning Kamp 	}
1770a3e28cfSAlfred Perlstein 	aiov.iov_base = buf;
1788fe387abSDmitrij Tejblum 	aiov.iov_len = nbyte;
1794160ccd9SAlan Cox 	auio.uio_iov = &aiov;
1804160ccd9SAlan Cox 	auio.uio_iovcnt = 1;
1818fe387abSDmitrij Tejblum 	auio.uio_offset = offset;
1828fe387abSDmitrij Tejblum 	if (nbyte > INT_MAX)
1834160ccd9SAlan Cox 		return (EINVAL);
1848fe387abSDmitrij Tejblum 	auio.uio_resid = nbyte;
1854160ccd9SAlan Cox 	auio.uio_rw = UIO_READ;
1864160ccd9SAlan Cox 	auio.uio_segflg = UIO_USERSPACE;
187b40ce416SJulian Elischer 	auio.uio_td = td;
1884160ccd9SAlan Cox #ifdef KTRACE
189552afd9cSPoul-Henning Kamp 	if (KTRPOINT(td, KTR_GENIO))
190552afd9cSPoul-Henning Kamp 		ktruio = cloneuio(&auio);
1914160ccd9SAlan Cox #endif
1928fe387abSDmitrij Tejblum 	cnt = nbyte;
193279d7226SMatthew Dillon 
1949ca43589SRobert Watson 	if ((error = fo_read(fp, &auio, td->td_ucred, flags, td))) {
1954160ccd9SAlan Cox 		if (auio.uio_resid != cnt && (error == ERESTART ||
1964160ccd9SAlan Cox 		    error == EINTR || error == EWOULDBLOCK))
1974160ccd9SAlan Cox 			error = 0;
198279d7226SMatthew Dillon 	}
1994160ccd9SAlan Cox 	cnt -= auio.uio_resid;
2004160ccd9SAlan Cox #ifdef KTRACE
201552afd9cSPoul-Henning Kamp 	if (ktruio != NULL) {
202552afd9cSPoul-Henning Kamp 		ktruio->uio_resid = cnt;
203552afd9cSPoul-Henning Kamp 		ktrgenio(fd, UIO_READ, ktruio, error);
20442ebfbf2SBrian Feldman 	}
2054160ccd9SAlan Cox #endif
206b40ce416SJulian Elischer 	td->td_retval[0] = cnt;
2074160ccd9SAlan Cox 	return (error);
2084160ccd9SAlan Cox }
2094160ccd9SAlan Cox 
2104160ccd9SAlan Cox /*
211df8bae1dSRodney W. Grimes  * Scatter read system call.
212df8bae1dSRodney W. Grimes  */
213d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
214df8bae1dSRodney W. Grimes struct readv_args {
2157147b19dSBruce Evans 	int	fd;
216df8bae1dSRodney W. Grimes 	struct	iovec *iovp;
217df8bae1dSRodney W. Grimes 	u_int	iovcnt;
218df8bae1dSRodney W. Grimes };
219d2d3e875SBruce Evans #endif
220ad2edad9SMatthew Dillon /*
221ad2edad9SMatthew Dillon  * MPSAFE
222ad2edad9SMatthew Dillon  */
22326f9a767SRodney W. Grimes int
224552afd9cSPoul-Henning Kamp readv(struct thread *td, struct readv_args *uap)
225df8bae1dSRodney W. Grimes {
226b88ec951SJohn Baldwin 	struct uio *auio;
227b88ec951SJohn Baldwin 	int error;
228b88ec951SJohn Baldwin 
229b88ec951SJohn Baldwin 	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
230b88ec951SJohn Baldwin 	if (error)
231b88ec951SJohn Baldwin 		return (error);
232b88ec951SJohn Baldwin 	error = kern_readv(td, uap->fd, auio);
233b88ec951SJohn Baldwin 	free(auio, M_IOV);
234b88ec951SJohn Baldwin 	return (error);
235b88ec951SJohn Baldwin }
236b88ec951SJohn Baldwin 
237b88ec951SJohn Baldwin int
238b88ec951SJohn Baldwin kern_readv(struct thread *td, int fd, struct uio *auio)
239b88ec951SJohn Baldwin {
240b064d43dSMatthew Dillon 	struct file *fp;
241552afd9cSPoul-Henning Kamp 	long cnt;
24282641acdSAlan Cox 	int error;
243df8bae1dSRodney W. Grimes #ifdef KTRACE
244552afd9cSPoul-Henning Kamp 	struct uio *ktruio = NULL;
245df8bae1dSRodney W. Grimes #endif
246df8bae1dSRodney W. Grimes 
247b88ec951SJohn Baldwin 	error = fget_read(td, fd, &fp);
248552afd9cSPoul-Henning Kamp 	if (error)
24982641acdSAlan Cox 		return (error);
2504f8d23d6SPoul-Henning Kamp 	/* Finish zero length reads right here */
2514f8d23d6SPoul-Henning Kamp 	if (auio->uio_resid == 0) {
2524f8d23d6SPoul-Henning Kamp 		td->td_retval[0] = 0;
2534f8d23d6SPoul-Henning Kamp 		fdrop(fp, td);
2544f8d23d6SPoul-Henning Kamp 		return(0);
2554f8d23d6SPoul-Henning Kamp 	}
256552afd9cSPoul-Henning Kamp 	auio->uio_rw = UIO_READ;
257552afd9cSPoul-Henning Kamp 	auio->uio_td = td;
258df8bae1dSRodney W. Grimes #ifdef KTRACE
259552afd9cSPoul-Henning Kamp 	if (KTRPOINT(td, KTR_GENIO))
260552afd9cSPoul-Henning Kamp 		ktruio = cloneuio(auio);
261df8bae1dSRodney W. Grimes #endif
262552afd9cSPoul-Henning Kamp 	cnt = auio->uio_resid;
263552afd9cSPoul-Henning Kamp 	if ((error = fo_read(fp, auio, td->td_ucred, 0, td))) {
264552afd9cSPoul-Henning Kamp 		if (auio->uio_resid != cnt && (error == ERESTART ||
265df8bae1dSRodney W. Grimes 		    error == EINTR || error == EWOULDBLOCK))
266df8bae1dSRodney W. Grimes 			error = 0;
267279d7226SMatthew Dillon 	}
268552afd9cSPoul-Henning Kamp 	cnt -= auio->uio_resid;
269df8bae1dSRodney W. Grimes #ifdef KTRACE
270552afd9cSPoul-Henning Kamp 	if (ktruio != NULL) {
271552afd9cSPoul-Henning Kamp 		ktruio->uio_resid = cnt;
272b88ec951SJohn Baldwin 		ktrgenio(fd, UIO_READ, ktruio, error);
273df8bae1dSRodney W. Grimes 	}
274df8bae1dSRodney W. Grimes #endif
275b40ce416SJulian Elischer 	td->td_retval[0] = cnt;
276b40ce416SJulian Elischer 	fdrop(fp, td);
277df8bae1dSRodney W. Grimes 	return (error);
278df8bae1dSRodney W. Grimes }
279df8bae1dSRodney W. Grimes 
280df8bae1dSRodney W. Grimes /*
281df8bae1dSRodney W. Grimes  * Write system call
282df8bae1dSRodney W. Grimes  */
283d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
284df8bae1dSRodney W. Grimes struct write_args {
285df8bae1dSRodney W. Grimes 	int	fd;
286134e06feSBruce Evans 	const void *buf;
287134e06feSBruce Evans 	size_t	nbyte;
288df8bae1dSRodney W. Grimes };
289d2d3e875SBruce Evans #endif
290ad2edad9SMatthew Dillon /*
291ad2edad9SMatthew Dillon  * MPSAFE
292ad2edad9SMatthew Dillon  */
29326f9a767SRodney W. Grimes int
294b40ce416SJulian Elischer write(td, uap)
295b40ce416SJulian Elischer 	struct thread *td;
296b064d43dSMatthew Dillon 	struct write_args *uap;
297df8bae1dSRodney W. Grimes {
298b064d43dSMatthew Dillon 	struct file *fp;
299279d7226SMatthew Dillon 	int error;
300df8bae1dSRodney W. Grimes 
301b064d43dSMatthew Dillon 	if ((error = fget_write(td, uap->fd, &fp)) == 0) {
302b40ce416SJulian Elischer 		error = dofilewrite(td, fp, uap->fd, uap->buf, uap->nbyte,
303ad2edad9SMatthew Dillon 			    (off_t)-1, 0);
304b40ce416SJulian Elischer 		fdrop(fp, td);
305ad2edad9SMatthew Dillon 	} else {
306b064d43dSMatthew Dillon 		error = EBADF;	/* XXX this can't be right */
307ad2edad9SMatthew Dillon 	}
308279d7226SMatthew Dillon 	return(error);
309df8bae1dSRodney W. Grimes }
310df8bae1dSRodney W. Grimes 
311df8bae1dSRodney W. Grimes /*
3128fe387abSDmitrij Tejblum  * Pwrite system call
3134160ccd9SAlan Cox  */
3144160ccd9SAlan Cox #ifndef _SYS_SYSPROTO_H_
3154160ccd9SAlan Cox struct pwrite_args {
3164160ccd9SAlan Cox 	int	fd;
3174160ccd9SAlan Cox 	const void *buf;
3184160ccd9SAlan Cox 	size_t	nbyte;
3198fe387abSDmitrij Tejblum 	int	pad;
3204160ccd9SAlan Cox 	off_t	offset;
3214160ccd9SAlan Cox };
3224160ccd9SAlan Cox #endif
323ad2edad9SMatthew Dillon /*
324ad2edad9SMatthew Dillon  * MPSAFE
325ad2edad9SMatthew Dillon  */
3264160ccd9SAlan Cox int
327b40ce416SJulian Elischer pwrite(td, uap)
328b40ce416SJulian Elischer 	struct thread *td;
329b064d43dSMatthew Dillon 	struct pwrite_args *uap;
3304160ccd9SAlan Cox {
331b064d43dSMatthew Dillon 	struct file *fp;
332279d7226SMatthew Dillon 	int error;
3338fe387abSDmitrij Tejblum 
334b064d43dSMatthew Dillon 	if ((error = fget_write(td, uap->fd, &fp)) == 0) {
3359bbee259SAndrey A. Chernov 		if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE))
3362db4b023SPoul-Henning Kamp 			error = ESPIPE;
3379bbee259SAndrey A. Chernov 		else if (uap->offset < 0 && fp->f_vnode->v_type != VCHR)
3389bbee259SAndrey A. Chernov 			error = EINVAL;
3399bbee259SAndrey A. Chernov 		else {
340b064d43dSMatthew Dillon 			error = dofilewrite(td, fp, uap->fd, uap->buf,
341b064d43dSMatthew Dillon 				    uap->nbyte, uap->offset, FOF_OFFSET);
342b064d43dSMatthew Dillon 		}
343b40ce416SJulian Elischer 		fdrop(fp, td);
344279d7226SMatthew Dillon 	} else {
345b064d43dSMatthew Dillon 		error = EBADF;	/* this can't be right */
346ad2edad9SMatthew Dillon 	}
347279d7226SMatthew Dillon 	return(error);
3488fe387abSDmitrij Tejblum }
3498fe387abSDmitrij Tejblum 
3508fe387abSDmitrij Tejblum static int
351b40ce416SJulian Elischer dofilewrite(td, fp, fd, buf, nbyte, offset, flags)
352b40ce416SJulian Elischer 	struct thread *td;
3538fe387abSDmitrij Tejblum 	struct file *fp;
3548fe387abSDmitrij Tejblum 	int fd, flags;
3558fe387abSDmitrij Tejblum 	const void *buf;
3568fe387abSDmitrij Tejblum 	size_t nbyte;
3578fe387abSDmitrij Tejblum 	off_t offset;
3588fe387abSDmitrij Tejblum {
3594160ccd9SAlan Cox 	struct uio auio;
3604160ccd9SAlan Cox 	struct iovec aiov;
361e5e6a464SColin Percival 	ssize_t cnt;
362e5e6a464SColin Percival 	long error = 0;
3634160ccd9SAlan Cox #ifdef KTRACE
364552afd9cSPoul-Henning Kamp 	struct uio *ktruio = NULL;
3654160ccd9SAlan Cox #endif
3664160ccd9SAlan Cox 
367b31ae1adSPeter Wemm 	aiov.iov_base = (void *)(uintptr_t)buf;
3688fe387abSDmitrij Tejblum 	aiov.iov_len = nbyte;
3694160ccd9SAlan Cox 	auio.uio_iov = &aiov;
3704160ccd9SAlan Cox 	auio.uio_iovcnt = 1;
3718fe387abSDmitrij Tejblum 	auio.uio_offset = offset;
3728fe387abSDmitrij Tejblum 	if (nbyte > INT_MAX)
3734160ccd9SAlan Cox 		return (EINVAL);
3748fe387abSDmitrij Tejblum 	auio.uio_resid = nbyte;
3754160ccd9SAlan Cox 	auio.uio_rw = UIO_WRITE;
3764160ccd9SAlan Cox 	auio.uio_segflg = UIO_USERSPACE;
377b40ce416SJulian Elischer 	auio.uio_td = td;
3784160ccd9SAlan Cox #ifdef KTRACE
379552afd9cSPoul-Henning Kamp 	if (KTRPOINT(td, KTR_GENIO))
380552afd9cSPoul-Henning Kamp 		ktruio = cloneuio(&auio);
3814160ccd9SAlan Cox #endif
3828fe387abSDmitrij Tejblum 	cnt = nbyte;
383c6ab5768SAlfred Perlstein 	if (fp->f_type == DTYPE_VNODE)
384279d7226SMatthew Dillon 		bwillwrite();
3859ca43589SRobert Watson 	if ((error = fo_write(fp, &auio, td->td_ucred, flags, td))) {
3864160ccd9SAlan Cox 		if (auio.uio_resid != cnt && (error == ERESTART ||
3874160ccd9SAlan Cox 		    error == EINTR || error == EWOULDBLOCK))
3884160ccd9SAlan Cox 			error = 0;
389c33c8251SAlfred Perlstein 		/* Socket layer is responsible for issuing SIGPIPE. */
390c33c8251SAlfred Perlstein 		if (error == EPIPE && fp->f_type != DTYPE_SOCKET) {
391b40ce416SJulian Elischer 			PROC_LOCK(td->td_proc);
392b40ce416SJulian Elischer 			psignal(td->td_proc, SIGPIPE);
393b40ce416SJulian Elischer 			PROC_UNLOCK(td->td_proc);
39419eb87d2SJohn Baldwin 		}
3954160ccd9SAlan Cox 	}
3964160ccd9SAlan Cox 	cnt -= auio.uio_resid;
3974160ccd9SAlan Cox #ifdef KTRACE
398552afd9cSPoul-Henning Kamp 	if (ktruio != NULL) {
399552afd9cSPoul-Henning Kamp 		ktruio->uio_resid = cnt;
400552afd9cSPoul-Henning Kamp 		ktrgenio(fd, UIO_WRITE, ktruio, error);
40142ebfbf2SBrian Feldman 	}
4024160ccd9SAlan Cox #endif
403b40ce416SJulian Elischer 	td->td_retval[0] = cnt;
4044160ccd9SAlan Cox 	return (error);
4054160ccd9SAlan Cox }
4064160ccd9SAlan Cox 
4074160ccd9SAlan Cox /*
408df8bae1dSRodney W. Grimes  * Gather write system call
409df8bae1dSRodney W. Grimes  */
410d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
411df8bae1dSRodney W. Grimes struct writev_args {
412df8bae1dSRodney W. Grimes 	int	fd;
413df8bae1dSRodney W. Grimes 	struct	iovec *iovp;
414df8bae1dSRodney W. Grimes 	u_int	iovcnt;
415df8bae1dSRodney W. Grimes };
416d2d3e875SBruce Evans #endif
417ad2edad9SMatthew Dillon /*
418ad2edad9SMatthew Dillon  * MPSAFE
419ad2edad9SMatthew Dillon  */
42026f9a767SRodney W. Grimes int
421552afd9cSPoul-Henning Kamp writev(struct thread *td, struct writev_args *uap)
422df8bae1dSRodney W. Grimes {
423b88ec951SJohn Baldwin 	struct uio *auio;
424b88ec951SJohn Baldwin 	int error;
425b88ec951SJohn Baldwin 
426b88ec951SJohn Baldwin 	error = copyinuio(uap->iovp, uap->iovcnt, &auio);
427b88ec951SJohn Baldwin 	if (error)
428b88ec951SJohn Baldwin 		return (error);
429b88ec951SJohn Baldwin 	error = kern_writev(td, uap->fd, auio);
430b88ec951SJohn Baldwin 	free(auio, M_IOV);
431b88ec951SJohn Baldwin 	return (error);
432b88ec951SJohn Baldwin }
433b88ec951SJohn Baldwin 
434b88ec951SJohn Baldwin int
435b88ec951SJohn Baldwin kern_writev(struct thread *td, int fd, struct uio *auio)
436b88ec951SJohn Baldwin {
437b064d43dSMatthew Dillon 	struct file *fp;
438552afd9cSPoul-Henning Kamp 	long cnt;
439552afd9cSPoul-Henning Kamp 	int error;
440df8bae1dSRodney W. Grimes #ifdef KTRACE
441552afd9cSPoul-Henning Kamp 	struct uio *ktruio = NULL;
442df8bae1dSRodney W. Grimes #endif
443df8bae1dSRodney W. Grimes 
444b88ec951SJohn Baldwin 	error = fget_write(td, fd, &fp);
445552afd9cSPoul-Henning Kamp 	if (error)
446882d8469SAlan Cox 		return (EBADF);
447552afd9cSPoul-Henning Kamp 	auio->uio_rw = UIO_WRITE;
448552afd9cSPoul-Henning Kamp 	auio->uio_td = td;
449df8bae1dSRodney W. Grimes #ifdef KTRACE
450552afd9cSPoul-Henning Kamp 	if (KTRPOINT(td, KTR_GENIO))
451552afd9cSPoul-Henning Kamp 		ktruio = cloneuio(auio);
452df8bae1dSRodney W. Grimes #endif
453552afd9cSPoul-Henning Kamp 	cnt = auio->uio_resid;
454a41ce5d3SMatthew Dillon 	if (fp->f_type == DTYPE_VNODE)
4559440653dSMatthew Dillon 		bwillwrite();
456552afd9cSPoul-Henning Kamp 	if ((error = fo_write(fp, auio, td->td_ucred, 0, td))) {
457552afd9cSPoul-Henning Kamp 		if (auio->uio_resid != cnt && (error == ERESTART ||
458df8bae1dSRodney W. Grimes 		    error == EINTR || error == EWOULDBLOCK))
459df8bae1dSRodney W. Grimes 			error = 0;
46019eb87d2SJohn Baldwin 		if (error == EPIPE) {
461b40ce416SJulian Elischer 			PROC_LOCK(td->td_proc);
462b40ce416SJulian Elischer 			psignal(td->td_proc, SIGPIPE);
463b40ce416SJulian Elischer 			PROC_UNLOCK(td->td_proc);
46419eb87d2SJohn Baldwin 		}
465df8bae1dSRodney W. Grimes 	}
466552afd9cSPoul-Henning Kamp 	cnt -= auio->uio_resid;
467df8bae1dSRodney W. Grimes #ifdef KTRACE
468552afd9cSPoul-Henning Kamp 	if (ktruio != NULL) {
469552afd9cSPoul-Henning Kamp 		ktruio->uio_resid = cnt;
470b88ec951SJohn Baldwin 		ktrgenio(fd, UIO_WRITE, ktruio, error);
471df8bae1dSRodney W. Grimes 	}
472df8bae1dSRodney W. Grimes #endif
473b40ce416SJulian Elischer 	td->td_retval[0] = cnt;
474b40ce416SJulian Elischer 	fdrop(fp, td);
475df8bae1dSRodney W. Grimes 	return (error);
476df8bae1dSRodney W. Grimes }
477df8bae1dSRodney W. Grimes 
478df8bae1dSRodney W. Grimes /*
479df8bae1dSRodney W. Grimes  * Ioctl system call
480df8bae1dSRodney W. Grimes  */
481d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
482df8bae1dSRodney W. Grimes struct ioctl_args {
483df8bae1dSRodney W. Grimes 	int	fd;
484069e9bc1SDoug Rabson 	u_long	com;
485df8bae1dSRodney W. Grimes 	caddr_t	data;
486df8bae1dSRodney W. Grimes };
487d2d3e875SBruce Evans #endif
488ad2edad9SMatthew Dillon /*
489ad2edad9SMatthew Dillon  * MPSAFE
490ad2edad9SMatthew Dillon  */
491df8bae1dSRodney W. Grimes /* ARGSUSED */
49226f9a767SRodney W. Grimes int
4933e15c66fSPoul-Henning Kamp ioctl(struct thread *td, struct ioctl_args *uap)
494df8bae1dSRodney W. Grimes {
495a4db4953SAlfred Perlstein 	struct file *fp;
4963e15c66fSPoul-Henning Kamp 	struct filedesc *fdp;
4973e15c66fSPoul-Henning Kamp 	u_long com;
498ad2edad9SMatthew Dillon 	int error = 0;
4993e15c66fSPoul-Henning Kamp 	u_int size;
500df8bae1dSRodney W. Grimes 	caddr_t data, memp;
501df8bae1dSRodney W. Grimes 	int tmp;
502df8bae1dSRodney W. Grimes 
5039fc6aa06SPoul-Henning Kamp 	if (uap->com > 0xffffffff) {
5049fc6aa06SPoul-Henning Kamp 		printf(
5059fc6aa06SPoul-Henning Kamp 		    "WARNING pid %d (%s): ioctl sign-extension ioctl %lx\n",
5069fc6aa06SPoul-Henning Kamp 		    td->td_proc->p_pid, td->td_proc->p_comm, uap->com);
5079fc6aa06SPoul-Henning Kamp 		uap->com &= 0xffffffff;
5089fc6aa06SPoul-Henning Kamp 	}
509a4db4953SAlfred Perlstein 	if ((error = fget(td, uap->fd, &fp)) != 0)
510a4db4953SAlfred Perlstein 		return (error);
511ad2edad9SMatthew Dillon 	if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
512426da3bcSAlfred Perlstein 		fdrop(fp, td);
513426da3bcSAlfred Perlstein 		return (EBADF);
514ad2edad9SMatthew Dillon 	}
515426da3bcSAlfred Perlstein 	fdp = td->td_proc->p_fd;
516df8bae1dSRodney W. Grimes 	switch (com = uap->com) {
517df8bae1dSRodney W. Grimes 	case FIONCLEX:
518124e4c3bSPoul-Henning Kamp 		FILEDESC_LOCK_FAST(fdp);
519df8bae1dSRodney W. Grimes 		fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
520124e4c3bSPoul-Henning Kamp 		FILEDESC_UNLOCK_FAST(fdp);
521426da3bcSAlfred Perlstein 		fdrop(fp, td);
522426da3bcSAlfred Perlstein 		return (0);
523df8bae1dSRodney W. Grimes 	case FIOCLEX:
524124e4c3bSPoul-Henning Kamp 		FILEDESC_LOCK_FAST(fdp);
525df8bae1dSRodney W. Grimes 		fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
526124e4c3bSPoul-Henning Kamp 		FILEDESC_UNLOCK_FAST(fdp);
527426da3bcSAlfred Perlstein 		fdrop(fp, td);
528426da3bcSAlfred Perlstein 		return (0);
529df8bae1dSRodney W. Grimes 	}
530df8bae1dSRodney W. Grimes 
531df8bae1dSRodney W. Grimes 	/*
532df8bae1dSRodney W. Grimes 	 * Interpret high order word to find amount of data to be
533df8bae1dSRodney W. Grimes 	 * copied to/from the user's address space.
534df8bae1dSRodney W. Grimes 	 */
535df8bae1dSRodney W. Grimes 	size = IOCPARM_LEN(com);
536ca51b19bSPoul-Henning Kamp 	if ((size > IOCPARM_MAX) ||
537ca51b19bSPoul-Henning Kamp 	    ((com & (IOC_VOID  | IOC_IN | IOC_OUT)) == 0) ||
5382de92a38SPeter Wemm #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43)
5392de92a38SPeter Wemm 	    ((com & IOC_OUT) && size == 0) ||
5402de92a38SPeter Wemm #else
5412de92a38SPeter Wemm 	    ((com & (IOC_IN | IOC_OUT)) && size == 0) ||
5422de92a38SPeter Wemm #endif
5432de92a38SPeter Wemm 	    ((com & IOC_VOID) && size > 0)) {
544426da3bcSAlfred Perlstein 		fdrop(fp, td);
545426da3bcSAlfred Perlstein 		return (ENOTTY);
546ad2edad9SMatthew Dillon 	}
547279d7226SMatthew Dillon 
548ca51b19bSPoul-Henning Kamp 	if (size > 0) {
549a163d034SWarner Losh 		memp = malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
550df8bae1dSRodney W. Grimes 		data = memp;
551279d7226SMatthew Dillon 	} else {
552ca51b19bSPoul-Henning Kamp 		memp = NULL;
553ca51b19bSPoul-Henning Kamp 		data = (void *)&uap->data;
554279d7226SMatthew Dillon 	}
555df8bae1dSRodney W. Grimes 	if (com & IOC_IN) {
556df8bae1dSRodney W. Grimes 		error = copyin(uap->data, data, (u_int)size);
557df8bae1dSRodney W. Grimes 		if (error) {
558df8bae1dSRodney W. Grimes 			free(memp, M_IOCTLOPS);
559b40ce416SJulian Elischer 			fdrop(fp, td);
5603e15c66fSPoul-Henning Kamp 			return (error);
561df8bae1dSRodney W. Grimes 		}
562ca51b19bSPoul-Henning Kamp 	} else if (com & IOC_OUT) {
563df8bae1dSRodney W. Grimes 		/*
564df8bae1dSRodney W. Grimes 		 * Zero the buffer so the user always
565df8bae1dSRodney W. Grimes 		 * gets back something deterministic.
566df8bae1dSRodney W. Grimes 		 */
567df8bae1dSRodney W. Grimes 		bzero(data, size);
568279d7226SMatthew Dillon 	}
569df8bae1dSRodney W. Grimes 
5708ccf264fSPoul-Henning Kamp 	if (com == FIONBIO) {
571426da3bcSAlfred Perlstein 		FILE_LOCK(fp);
572bb56ec4aSPoul-Henning Kamp 		if ((tmp = *(int *)data))
573df8bae1dSRodney W. Grimes 			fp->f_flag |= FNONBLOCK;
574df8bae1dSRodney W. Grimes 		else
575df8bae1dSRodney W. Grimes 			fp->f_flag &= ~FNONBLOCK;
576426da3bcSAlfred Perlstein 		FILE_UNLOCK(fp);
5778ccf264fSPoul-Henning Kamp 		data = (void *)&tmp;
5788ccf264fSPoul-Henning Kamp 	} else if (com == FIOASYNC) {
579426da3bcSAlfred Perlstein 		FILE_LOCK(fp);
580bb56ec4aSPoul-Henning Kamp 		if ((tmp = *(int *)data))
581df8bae1dSRodney W. Grimes 			fp->f_flag |= FASYNC;
582df8bae1dSRodney W. Grimes 		else
583df8bae1dSRodney W. Grimes 			fp->f_flag &= ~FASYNC;
584426da3bcSAlfred Perlstein 		FILE_UNLOCK(fp);
5858ccf264fSPoul-Henning Kamp 		data = (void *)&tmp;
586df8bae1dSRodney W. Grimes 	}
5878ccf264fSPoul-Henning Kamp 
5888ccf264fSPoul-Henning Kamp 	error = fo_ioctl(fp, com, data, td->td_ucred, td);
5898ccf264fSPoul-Henning Kamp 
5908ccf264fSPoul-Henning Kamp 	if (error == 0 && (com & IOC_OUT))
5918ccf264fSPoul-Henning Kamp 		error = copyout(data, uap->data, (u_int)size);
5928ccf264fSPoul-Henning Kamp 
593ca51b19bSPoul-Henning Kamp 	if (memp != NULL)
594df8bae1dSRodney W. Grimes 		free(memp, M_IOCTLOPS);
595b40ce416SJulian Elischer 	fdrop(fp, td);
596df8bae1dSRodney W. Grimes 	return (error);
597df8bae1dSRodney W. Grimes }
598df8bae1dSRodney W. Grimes 
59985f190e4SAlfred Perlstein /*
60085f190e4SAlfred Perlstein  * sellock and selwait are initialized in selectinit() via SYSINIT.
60185f190e4SAlfred Perlstein  */
60285f190e4SAlfred Perlstein struct mtx	sellock;
603265fc98fSSeigo Tanimura struct cv	selwait;
6049ae6d334SKelly Yancey u_int		nselcoll;	/* Select collisions since boot */
6059ae6d334SKelly Yancey SYSCTL_UINT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
606df8bae1dSRodney W. Grimes 
607df8bae1dSRodney W. Grimes /*
608df8bae1dSRodney W. Grimes  * Select system call.
609df8bae1dSRodney W. Grimes  */
610d2d3e875SBruce Evans #ifndef _SYS_SYSPROTO_H_
611df8bae1dSRodney W. Grimes struct select_args {
612b08f7993SSujal Patel 	int	nd;
613df8bae1dSRodney W. Grimes 	fd_set	*in, *ou, *ex;
614df8bae1dSRodney W. Grimes 	struct	timeval *tv;
615df8bae1dSRodney W. Grimes };
616d2d3e875SBruce Evans #endif
617ad2edad9SMatthew Dillon /*
618ad2edad9SMatthew Dillon  * MPSAFE
619ad2edad9SMatthew Dillon  */
62026f9a767SRodney W. Grimes int
621b40ce416SJulian Elischer select(td, uap)
622b40ce416SJulian Elischer 	register struct thread *td;
623df8bae1dSRodney W. Grimes 	register struct select_args *uap;
624df8bae1dSRodney W. Grimes {
6258f19eb88SIan Dowse 	struct timeval tv, *tvp;
6268f19eb88SIan Dowse 	int error;
6278f19eb88SIan Dowse 
6288f19eb88SIan Dowse 	if (uap->tv != NULL) {
6298f19eb88SIan Dowse 		error = copyin(uap->tv, &tv, sizeof(tv));
6308f19eb88SIan Dowse 		if (error)
6318f19eb88SIan Dowse 			return (error);
6328f19eb88SIan Dowse 		tvp = &tv;
6338f19eb88SIan Dowse 	} else
6348f19eb88SIan Dowse 		tvp = NULL;
6358f19eb88SIan Dowse 
6368f19eb88SIan Dowse 	return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp));
6378f19eb88SIan Dowse }
6388f19eb88SIan Dowse 
6398f19eb88SIan Dowse int
6408f19eb88SIan Dowse kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
6418f19eb88SIan Dowse     fd_set *fd_ex, struct timeval *tvp)
6428f19eb88SIan Dowse {
643426da3bcSAlfred Perlstein 	struct filedesc *fdp;
644d5e4d7e1SBruce Evans 	/*
645d5e4d7e1SBruce Evans 	 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
646d5e4d7e1SBruce Evans 	 * infds with the new FD_SETSIZE of 1024, and more than enough for
647d5e4d7e1SBruce Evans 	 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
648d5e4d7e1SBruce Evans 	 * of 256.
649d5e4d7e1SBruce Evans 	 */
650d5e4d7e1SBruce Evans 	fd_mask s_selbits[howmany(2048, NFDBITS)];
651eb209311SAlfred Perlstein 	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
65200af9731SPoul-Henning Kamp 	struct timeval atv, rtv, ttv;
6539ae6d334SKelly Yancey 	int error, timo;
6549ae6d334SKelly Yancey 	u_int ncoll, nbufbytes, ncpbytes, nfdbits;
655df8bae1dSRodney W. Grimes 
6568f19eb88SIan Dowse 	if (nd < 0)
657acbfbfeaSSujal Patel 		return (EINVAL);
658426da3bcSAlfred Perlstein 	fdp = td->td_proc->p_fd;
659db446e30SPoul-Henning Kamp 
660124e4c3bSPoul-Henning Kamp 	FILEDESC_LOCK_FAST(fdp);
661ad2edad9SMatthew Dillon 
6628f19eb88SIan Dowse 	if (nd > td->td_proc->p_fd->fd_nfiles)
6638f19eb88SIan Dowse 		nd = td->td_proc->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
664124e4c3bSPoul-Henning Kamp 	FILEDESC_UNLOCK_FAST(fdp);
665b08f7993SSujal Patel 
666d5e4d7e1SBruce Evans 	/*
667d5e4d7e1SBruce Evans 	 * Allocate just enough bits for the non-null fd_sets.  Use the
668d5e4d7e1SBruce Evans 	 * preallocated auto buffer if possible.
669d5e4d7e1SBruce Evans 	 */
6708f19eb88SIan Dowse 	nfdbits = roundup(nd, NFDBITS);
671d5e4d7e1SBruce Evans 	ncpbytes = nfdbits / NBBY;
672d5e4d7e1SBruce Evans 	nbufbytes = 0;
6738f19eb88SIan Dowse 	if (fd_in != NULL)
674d5e4d7e1SBruce Evans 		nbufbytes += 2 * ncpbytes;
6758f19eb88SIan Dowse 	if (fd_ou != NULL)
676d5e4d7e1SBruce Evans 		nbufbytes += 2 * ncpbytes;
6778f19eb88SIan Dowse 	if (fd_ex != NULL)
678d5e4d7e1SBruce Evans 		nbufbytes += 2 * ncpbytes;
679d5e4d7e1SBruce Evans 	if (nbufbytes <= sizeof s_selbits)
680d5e4d7e1SBruce Evans 		selbits = &s_selbits[0];
681d5e4d7e1SBruce Evans 	else
682a163d034SWarner Losh 		selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
683b08f7993SSujal Patel 
684b08f7993SSujal Patel 	/*
685d5e4d7e1SBruce Evans 	 * Assign pointers into the bit buffers and fetch the input bits.
686d5e4d7e1SBruce Evans 	 * Put the output buffers together so that they can be bzeroed
687d5e4d7e1SBruce Evans 	 * together.
688b08f7993SSujal Patel 	 */
689d5e4d7e1SBruce Evans 	sbp = selbits;
690df8bae1dSRodney W. Grimes #define	getbits(name, x) \
691d5e4d7e1SBruce Evans 	do {								\
6928f19eb88SIan Dowse 		if (name == NULL)					\
693d5e4d7e1SBruce Evans 			ibits[x] = NULL;				\
694d5e4d7e1SBruce Evans 		else {							\
695d5e4d7e1SBruce Evans 			ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;	\
696d5e4d7e1SBruce Evans 			obits[x] = sbp;					\
697d5e4d7e1SBruce Evans 			sbp += ncpbytes / sizeof *sbp;			\
6988f19eb88SIan Dowse 			error = copyin(name, ibits[x], ncpbytes);	\
699265fc98fSSeigo Tanimura 			if (error != 0)					\
70085f190e4SAlfred Perlstein 				goto done_nosellock;			\
701e04ac2feSJohn Baldwin 		}							\
702d5e4d7e1SBruce Evans 	} while (0)
7038f19eb88SIan Dowse 	getbits(fd_in, 0);
7048f19eb88SIan Dowse 	getbits(fd_ou, 1);
7058f19eb88SIan Dowse 	getbits(fd_ex, 2);
706df8bae1dSRodney W. Grimes #undef	getbits
707d5e4d7e1SBruce Evans 	if (nbufbytes != 0)
708d5e4d7e1SBruce Evans 		bzero(selbits, nbufbytes / 2);
709df8bae1dSRodney W. Grimes 
7108f19eb88SIan Dowse 	if (tvp != NULL) {
7118f19eb88SIan Dowse 		atv = *tvp;
712df8bae1dSRodney W. Grimes 		if (itimerfix(&atv)) {
713df8bae1dSRodney W. Grimes 			error = EINVAL;
71485f190e4SAlfred Perlstein 			goto done_nosellock;
715df8bae1dSRodney W. Grimes 		}
716c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
71700af9731SPoul-Henning Kamp 		timevaladd(&atv, &rtv);
7189c386f6bSJohn Baldwin 	} else {
71900af9731SPoul-Henning Kamp 		atv.tv_sec = 0;
7209c386f6bSJohn Baldwin 		atv.tv_usec = 0;
7219c386f6bSJohn Baldwin 	}
72200af9731SPoul-Henning Kamp 	timo = 0;
7232149c527SPeter Wemm 	TAILQ_INIT(&td->td_selq);
72485f190e4SAlfred Perlstein 	mtx_lock(&sellock);
725df8bae1dSRodney W. Grimes retry:
726df8bae1dSRodney W. Grimes 	ncoll = nselcoll;
727fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
728b40ce416SJulian Elischer 	td->td_flags |= TDF_SELECT;
729fea2ab83SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
73085f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
73185f190e4SAlfred Perlstein 
7328f19eb88SIan Dowse 	error = selscan(td, ibits, obits, nd);
73385f190e4SAlfred Perlstein 	mtx_lock(&sellock);
734b40ce416SJulian Elischer 	if (error || td->td_retval[0])
735df8bae1dSRodney W. Grimes 		goto done;
7364da144c0SJohn Baldwin 	if (atv.tv_sec || atv.tv_usec) {
737c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
73885f190e4SAlfred Perlstein 		if (timevalcmp(&rtv, &atv, >=))
739df8bae1dSRodney W. Grimes 			goto done;
74000af9731SPoul-Henning Kamp 		ttv = atv;
74100af9731SPoul-Henning Kamp 		timevalsub(&ttv, &rtv);
74200af9731SPoul-Henning Kamp 		timo = ttv.tv_sec > 24 * 60 * 60 ?
74300af9731SPoul-Henning Kamp 		    24 * 60 * 60 * hz : tvtohz(&ttv);
744df8bae1dSRodney W. Grimes 	}
74585f190e4SAlfred Perlstein 
74685f190e4SAlfred Perlstein 	/*
74785f190e4SAlfred Perlstein 	 * An event of interest may occur while we do not hold
74885f190e4SAlfred Perlstein 	 * sellock, so check TDF_SELECT and the number of
74985f190e4SAlfred Perlstein 	 * collisions and rescan the file descriptors if
75085f190e4SAlfred Perlstein 	 * necessary.
75185f190e4SAlfred Perlstein 	 */
752fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
75385f190e4SAlfred Perlstein 	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
75485f190e4SAlfred Perlstein 		mtx_unlock_spin(&sched_lock);
75585f190e4SAlfred Perlstein 		goto retry;
75685f190e4SAlfred Perlstein 	}
757fea2ab83SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
758bfbbc4aaSJason Evans 
759265fc98fSSeigo Tanimura 	if (timo > 0)
76085f190e4SAlfred Perlstein 		error = cv_timedwait_sig(&selwait, &sellock, timo);
761265fc98fSSeigo Tanimura 	else
76285f190e4SAlfred Perlstein 		error = cv_wait_sig(&selwait, &sellock);
763bfbbc4aaSJason Evans 
764df8bae1dSRodney W. Grimes 	if (error == 0)
765df8bae1dSRodney W. Grimes 		goto retry;
766265fc98fSSeigo Tanimura 
767df8bae1dSRodney W. Grimes done:
76885f190e4SAlfred Perlstein 	clear_selinfo_list(td);
769fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
770b40ce416SJulian Elischer 	td->td_flags &= ~TDF_SELECT;
771fea2ab83SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
77285f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
77385f190e4SAlfred Perlstein 
77485f190e4SAlfred Perlstein done_nosellock:
775df8bae1dSRodney W. Grimes 	/* select is not restarted after signals... */
776df8bae1dSRodney W. Grimes 	if (error == ERESTART)
777df8bae1dSRodney W. Grimes 		error = EINTR;
778df8bae1dSRodney W. Grimes 	if (error == EWOULDBLOCK)
779df8bae1dSRodney W. Grimes 		error = 0;
780df8bae1dSRodney W. Grimes #define	putbits(name, x) \
7818f19eb88SIan Dowse 	if (name && (error2 = copyout(obits[x], name, ncpbytes))) \
782df8bae1dSRodney W. Grimes 		error = error2;
783df8bae1dSRodney W. Grimes 	if (error == 0) {
784df8bae1dSRodney W. Grimes 		int error2;
785df8bae1dSRodney W. Grimes 
7868f19eb88SIan Dowse 		putbits(fd_in, 0);
7878f19eb88SIan Dowse 		putbits(fd_ou, 1);
7888f19eb88SIan Dowse 		putbits(fd_ex, 2);
789df8bae1dSRodney W. Grimes #undef putbits
790df8bae1dSRodney W. Grimes 	}
791d5e4d7e1SBruce Evans 	if (selbits != &s_selbits[0])
792d5e4d7e1SBruce Evans 		free(selbits, M_SELECT);
793ad2edad9SMatthew Dillon 
794df8bae1dSRodney W. Grimes 	return (error);
795df8bae1dSRodney W. Grimes }
796df8bae1dSRodney W. Grimes 
797265fc98fSSeigo Tanimura static int
798b40ce416SJulian Elischer selscan(td, ibits, obits, nfd)
799b40ce416SJulian Elischer 	struct thread *td;
800b08f7993SSujal Patel 	fd_mask **ibits, **obits;
801cb226aaaSPoul-Henning Kamp 	int nfd;
802df8bae1dSRodney W. Grimes {
803f082218cSPeter Wemm 	int msk, i, fd;
804f082218cSPeter Wemm 	fd_mask bits;
805df8bae1dSRodney W. Grimes 	struct file *fp;
806df8bae1dSRodney W. Grimes 	int n = 0;
8072087c896SBruce Evans 	/* Note: backend also returns POLLHUP/POLLERR if appropriate. */
80842d11757SPeter Wemm 	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
809eb209311SAlfred Perlstein 	struct filedesc *fdp = td->td_proc->p_fd;
810df8bae1dSRodney W. Grimes 
811eb209311SAlfred Perlstein 	FILEDESC_LOCK(fdp);
812df8bae1dSRodney W. Grimes 	for (msk = 0; msk < 3; msk++) {
813d5e4d7e1SBruce Evans 		if (ibits[msk] == NULL)
814d5e4d7e1SBruce Evans 			continue;
815df8bae1dSRodney W. Grimes 		for (i = 0; i < nfd; i += NFDBITS) {
816b08f7993SSujal Patel 			bits = ibits[msk][i/NFDBITS];
817f082218cSPeter Wemm 			/* ffs(int mask) not portable, fd_mask is long */
818f082218cSPeter Wemm 			for (fd = i; bits && fd < nfd; fd++, bits >>= 1) {
819f082218cSPeter Wemm 				if (!(bits & 1))
820f082218cSPeter Wemm 					continue;
821eb209311SAlfred Perlstein 				if ((fp = fget_locked(fdp, fd)) == NULL) {
822eb209311SAlfred Perlstein 					FILEDESC_UNLOCK(fdp);
823df8bae1dSRodney W. Grimes 					return (EBADF);
824eb209311SAlfred Perlstein 				}
825ea6027a8SRobert Watson 				if (fo_poll(fp, flag[msk], td->td_ucred,
826ea6027a8SRobert Watson 				    td)) {
827b08f7993SSujal Patel 					obits[msk][(fd)/NFDBITS] |=
828f082218cSPeter Wemm 					    ((fd_mask)1 << ((fd) % NFDBITS));
829df8bae1dSRodney W. Grimes 					n++;
830df8bae1dSRodney W. Grimes 				}
831df8bae1dSRodney W. Grimes 			}
832df8bae1dSRodney W. Grimes 		}
833df8bae1dSRodney W. Grimes 	}
834eb209311SAlfred Perlstein 	FILEDESC_UNLOCK(fdp);
835b40ce416SJulian Elischer 	td->td_retval[0] = n;
836df8bae1dSRodney W. Grimes 	return (0);
837df8bae1dSRodney W. Grimes }
838df8bae1dSRodney W. Grimes 
83942d11757SPeter Wemm /*
84042d11757SPeter Wemm  * Poll system call.
84142d11757SPeter Wemm  */
84242d11757SPeter Wemm #ifndef _SYS_SYSPROTO_H_
84342d11757SPeter Wemm struct poll_args {
84442d11757SPeter Wemm 	struct pollfd *fds;
84542d11757SPeter Wemm 	u_int	nfds;
84642d11757SPeter Wemm 	int	timeout;
84742d11757SPeter Wemm };
84842d11757SPeter Wemm #endif
849ad2edad9SMatthew Dillon /*
850ad2edad9SMatthew Dillon  * MPSAFE
851ad2edad9SMatthew Dillon  */
85242d11757SPeter Wemm int
853b40ce416SJulian Elischer poll(td, uap)
854b40ce416SJulian Elischer 	struct thread *td;
855ea0237edSJonathan Lemon 	struct poll_args *uap;
85642d11757SPeter Wemm {
8572580f4e5SAndre Oppermann 	struct pollfd *bits;
8582580f4e5SAndre Oppermann 	struct pollfd smallbits[32];
85900af9731SPoul-Henning Kamp 	struct timeval atv, rtv, ttv;
8609ae6d334SKelly Yancey 	int error = 0, timo;
8619ae6d334SKelly Yancey 	u_int ncoll, nfds;
86242d11757SPeter Wemm 	size_t ni;
86342d11757SPeter Wemm 
864d1e405c5SAlfred Perlstein 	nfds = uap->nfds;
865ad2edad9SMatthew Dillon 
8665d8dd01dSRobert Watson 	/*
8672bd5ac33SPeter Wemm 	 * This is kinda bogus.  We have fd limits, but that is not
8682bd5ac33SPeter Wemm 	 * really related to the size of the pollfd array.  Make sure
8692bd5ac33SPeter Wemm 	 * we let the process use at least FD_SETSIZE entries and at
8702bd5ac33SPeter Wemm 	 * least enough for the current limits.  We want to be reasonably
8712bd5ac33SPeter Wemm 	 * safe, but not overly restrictive.
87289b71647SPeter Wemm 	 */
87391d5354aSJohn Baldwin 	PROC_LOCK(td->td_proc);
87491d5354aSJohn Baldwin 	if ((nfds > lim_cur(td->td_proc, RLIMIT_NOFILE)) &&
875b40ce416SJulian Elischer 	    (nfds > FD_SETSIZE)) {
87691d5354aSJohn Baldwin 		PROC_UNLOCK(td->td_proc);
877ad2edad9SMatthew Dillon 		error = EINVAL;
878ad2edad9SMatthew Dillon 		goto done2;
879ad2edad9SMatthew Dillon 	}
88091d5354aSJohn Baldwin 	PROC_UNLOCK(td->td_proc);
88189b71647SPeter Wemm 	ni = nfds * sizeof(struct pollfd);
88242d11757SPeter Wemm 	if (ni > sizeof(smallbits))
883a163d034SWarner Losh 		bits = malloc(ni, M_TEMP, M_WAITOK);
88442d11757SPeter Wemm 	else
88542d11757SPeter Wemm 		bits = smallbits;
886d1e405c5SAlfred Perlstein 	error = copyin(uap->fds, bits, ni);
88742d11757SPeter Wemm 	if (error)
88885f190e4SAlfred Perlstein 		goto done_nosellock;
889d1e405c5SAlfred Perlstein 	if (uap->timeout != INFTIM) {
890d1e405c5SAlfred Perlstein 		atv.tv_sec = uap->timeout / 1000;
891d1e405c5SAlfred Perlstein 		atv.tv_usec = (uap->timeout % 1000) * 1000;
89242d11757SPeter Wemm 		if (itimerfix(&atv)) {
89342d11757SPeter Wemm 			error = EINVAL;
89485f190e4SAlfred Perlstein 			goto done_nosellock;
89542d11757SPeter Wemm 		}
896c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
89700af9731SPoul-Henning Kamp 		timevaladd(&atv, &rtv);
8989c386f6bSJohn Baldwin 	} else {
89900af9731SPoul-Henning Kamp 		atv.tv_sec = 0;
9009c386f6bSJohn Baldwin 		atv.tv_usec = 0;
9019c386f6bSJohn Baldwin 	}
90200af9731SPoul-Henning Kamp 	timo = 0;
9032149c527SPeter Wemm 	TAILQ_INIT(&td->td_selq);
90485f190e4SAlfred Perlstein 	mtx_lock(&sellock);
90542d11757SPeter Wemm retry:
90642d11757SPeter Wemm 	ncoll = nselcoll;
907fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
908b40ce416SJulian Elischer 	td->td_flags |= TDF_SELECT;
909fea2ab83SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
91085f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
91185f190e4SAlfred Perlstein 
9122580f4e5SAndre Oppermann 	error = pollscan(td, bits, nfds);
91385f190e4SAlfred Perlstein 	mtx_lock(&sellock);
914b40ce416SJulian Elischer 	if (error || td->td_retval[0])
91542d11757SPeter Wemm 		goto done;
9164da144c0SJohn Baldwin 	if (atv.tv_sec || atv.tv_usec) {
917c21410e1SPoul-Henning Kamp 		getmicrouptime(&rtv);
91885f190e4SAlfred Perlstein 		if (timevalcmp(&rtv, &atv, >=))
91942d11757SPeter Wemm 			goto done;
92000af9731SPoul-Henning Kamp 		ttv = atv;
92100af9731SPoul-Henning Kamp 		timevalsub(&ttv, &rtv);
92200af9731SPoul-Henning Kamp 		timo = ttv.tv_sec > 24 * 60 * 60 ?
92300af9731SPoul-Henning Kamp 		    24 * 60 * 60 * hz : tvtohz(&ttv);
92442d11757SPeter Wemm 	}
92585f190e4SAlfred Perlstein 	/*
92685f190e4SAlfred Perlstein 	 * An event of interest may occur while we do not hold
92785f190e4SAlfred Perlstein 	 * sellock, so check TDF_SELECT and the number of collisions
92885f190e4SAlfred Perlstein 	 * and rescan the file descriptors if necessary.
92985f190e4SAlfred Perlstein 	 */
930fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
93185f190e4SAlfred Perlstein 	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
932fea2ab83SJohn Baldwin 		mtx_unlock_spin(&sched_lock);
93385f190e4SAlfred Perlstein 		goto retry;
93485f190e4SAlfred Perlstein 	}
93585f190e4SAlfred Perlstein 	mtx_unlock_spin(&sched_lock);
93685f190e4SAlfred Perlstein 
937265fc98fSSeigo Tanimura 	if (timo > 0)
93885f190e4SAlfred Perlstein 		error = cv_timedwait_sig(&selwait, &sellock, timo);
939265fc98fSSeigo Tanimura 	else
94085f190e4SAlfred Perlstein 		error = cv_wait_sig(&selwait, &sellock);
94185f190e4SAlfred Perlstein 
94242d11757SPeter Wemm 	if (error == 0)
94342d11757SPeter Wemm 		goto retry;
944265fc98fSSeigo Tanimura 
94542d11757SPeter Wemm done:
94685f190e4SAlfred Perlstein 	clear_selinfo_list(td);
947fea2ab83SJohn Baldwin 	mtx_lock_spin(&sched_lock);
948b40ce416SJulian Elischer 	td->td_flags &= ~TDF_SELECT;
949fea2ab83SJohn Baldwin 	mtx_unlock_spin(&sched_lock);
95085f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
95185f190e4SAlfred Perlstein 
95285f190e4SAlfred Perlstein done_nosellock:
95342d11757SPeter Wemm 	/* poll is not restarted after signals... */
95442d11757SPeter Wemm 	if (error == ERESTART)
95542d11757SPeter Wemm 		error = EINTR;
95642d11757SPeter Wemm 	if (error == EWOULDBLOCK)
95742d11757SPeter Wemm 		error = 0;
95842d11757SPeter Wemm 	if (error == 0) {
959d1e405c5SAlfred Perlstein 		error = copyout(bits, uap->fds, ni);
96042d11757SPeter Wemm 		if (error)
96142d11757SPeter Wemm 			goto out;
96242d11757SPeter Wemm 	}
96342d11757SPeter Wemm out:
96442d11757SPeter Wemm 	if (ni > sizeof(smallbits))
96542d11757SPeter Wemm 		free(bits, M_TEMP);
966ad2edad9SMatthew Dillon done2:
96742d11757SPeter Wemm 	return (error);
96842d11757SPeter Wemm }
96942d11757SPeter Wemm 
97042d11757SPeter Wemm static int
971b40ce416SJulian Elischer pollscan(td, fds, nfd)
972b40ce416SJulian Elischer 	struct thread *td;
97342d11757SPeter Wemm 	struct pollfd *fds;
974ea0237edSJonathan Lemon 	u_int nfd;
97542d11757SPeter Wemm {
976b40ce416SJulian Elischer 	register struct filedesc *fdp = td->td_proc->p_fd;
97742d11757SPeter Wemm 	int i;
97842d11757SPeter Wemm 	struct file *fp;
97942d11757SPeter Wemm 	int n = 0;
98042d11757SPeter Wemm 
981426da3bcSAlfred Perlstein 	FILEDESC_LOCK(fdp);
982eb209311SAlfred Perlstein 	for (i = 0; i < nfd; i++, fds++) {
983337c9691SJordan K. Hubbard 		if (fds->fd >= fdp->fd_nfiles) {
98442d11757SPeter Wemm 			fds->revents = POLLNVAL;
98542d11757SPeter Wemm 			n++;
986337c9691SJordan K. Hubbard 		} else if (fds->fd < 0) {
987337c9691SJordan K. Hubbard 			fds->revents = 0;
98842d11757SPeter Wemm 		} else {
98942d11757SPeter Wemm 			fp = fdp->fd_ofiles[fds->fd];
990279d7226SMatthew Dillon 			if (fp == NULL) {
99142d11757SPeter Wemm 				fds->revents = POLLNVAL;
99242d11757SPeter Wemm 				n++;
99342d11757SPeter Wemm 			} else {
9942087c896SBruce Evans 				/*
9952087c896SBruce Evans 				 * Note: backend also returns POLLHUP and
9962087c896SBruce Evans 				 * POLLERR if appropriate.
9972087c896SBruce Evans 				 */
99813ccadd4SBrian Feldman 				fds->revents = fo_poll(fp, fds->events,
999ea6027a8SRobert Watson 				    td->td_ucred, td);
100042d11757SPeter Wemm 				if (fds->revents != 0)
100142d11757SPeter Wemm 					n++;
100242d11757SPeter Wemm 			}
100342d11757SPeter Wemm 		}
100442d11757SPeter Wemm 	}
1005eb209311SAlfred Perlstein 	FILEDESC_UNLOCK(fdp);
1006b40ce416SJulian Elischer 	td->td_retval[0] = n;
100742d11757SPeter Wemm 	return (0);
100842d11757SPeter Wemm }
100942d11757SPeter Wemm 
101042d11757SPeter Wemm /*
101142d11757SPeter Wemm  * OpenBSD poll system call.
101242d11757SPeter Wemm  * XXX this isn't quite a true representation..  OpenBSD uses select ops.
101342d11757SPeter Wemm  */
101442d11757SPeter Wemm #ifndef _SYS_SYSPROTO_H_
101542d11757SPeter Wemm struct openbsd_poll_args {
101642d11757SPeter Wemm 	struct pollfd *fds;
101742d11757SPeter Wemm 	u_int	nfds;
101842d11757SPeter Wemm 	int	timeout;
101942d11757SPeter Wemm };
102042d11757SPeter Wemm #endif
1021ad2edad9SMatthew Dillon /*
1022ad2edad9SMatthew Dillon  * MPSAFE
1023ad2edad9SMatthew Dillon  */
102442d11757SPeter Wemm int
1025b40ce416SJulian Elischer openbsd_poll(td, uap)
1026b40ce416SJulian Elischer 	register struct thread *td;
102742d11757SPeter Wemm 	register struct openbsd_poll_args *uap;
102842d11757SPeter Wemm {
1029b40ce416SJulian Elischer 	return (poll(td, (struct poll_args *)uap));
103042d11757SPeter Wemm }
103142d11757SPeter Wemm 
103285f190e4SAlfred Perlstein /*
103385f190e4SAlfred Perlstein  * Remove the references to the thread from all of the objects
103485f190e4SAlfred Perlstein  * we were polling.
103585f190e4SAlfred Perlstein  *
103685f190e4SAlfred Perlstein  * This code assumes that the underlying owner of the selinfo
103785f190e4SAlfred Perlstein  * structure will hold sellock before it changes it, and that
103885f190e4SAlfred Perlstein  * it will unlink itself from our list if it goes away.
103985f190e4SAlfred Perlstein  */
104085f190e4SAlfred Perlstein void
104185f190e4SAlfred Perlstein clear_selinfo_list(td)
104285f190e4SAlfred Perlstein 	struct thread *td;
104385f190e4SAlfred Perlstein {
104485f190e4SAlfred Perlstein 	struct selinfo *si;
104585f190e4SAlfred Perlstein 
104685f190e4SAlfred Perlstein 	mtx_assert(&sellock, MA_OWNED);
104785f190e4SAlfred Perlstein 	TAILQ_FOREACH(si, &td->td_selq, si_thrlist)
104885f190e4SAlfred Perlstein 		si->si_thread = NULL;
104985f190e4SAlfred Perlstein 	TAILQ_INIT(&td->td_selq);
105085f190e4SAlfred Perlstein }
105185f190e4SAlfred Perlstein 
1052df8bae1dSRodney W. Grimes /*
1053df8bae1dSRodney W. Grimes  * Record a select request.
1054df8bae1dSRodney W. Grimes  */
1055df8bae1dSRodney W. Grimes void
1056df8bae1dSRodney W. Grimes selrecord(selector, sip)
1057b40ce416SJulian Elischer 	struct thread *selector;
1058df8bae1dSRodney W. Grimes 	struct selinfo *sip;
1059df8bae1dSRodney W. Grimes {
1060df8bae1dSRodney W. Grimes 
106185f190e4SAlfred Perlstein 	mtx_lock(&sellock);
106285f190e4SAlfred Perlstein 	/*
1063b605b54cSAlfred Perlstein 	 * If the selinfo's thread pointer is NULL then take ownership of it.
1064b605b54cSAlfred Perlstein 	 *
1065b605b54cSAlfred Perlstein 	 * If the thread pointer is not NULL and it points to another
1066b605b54cSAlfred Perlstein 	 * thread, then we have a collision.
1067b605b54cSAlfred Perlstein 	 *
1068b605b54cSAlfred Perlstein 	 * If the thread pointer is not NULL and points back to us then leave
1069b605b54cSAlfred Perlstein 	 * it alone as we've already added pointed it at us and added it to
1070b605b54cSAlfred Perlstein 	 * our list.
107185f190e4SAlfred Perlstein 	 */
107285f190e4SAlfred Perlstein 	if (sip->si_thread == NULL) {
1073b40ce416SJulian Elischer 		sip->si_thread = selector;
107485f190e4SAlfred Perlstein 		TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist);
107585f190e4SAlfred Perlstein 	} else if (sip->si_thread != selector) {
107685f190e4SAlfred Perlstein 		sip->si_flags |= SI_COLL;
107785f190e4SAlfred Perlstein 	}
107885f190e4SAlfred Perlstein 
107985f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
1080df8bae1dSRodney W. Grimes }
1081df8bae1dSRodney W. Grimes 
1082512824f8SSeigo Tanimura /* Wake up a selecting thread. */
1083df8bae1dSRodney W. Grimes void
1084df8bae1dSRodney W. Grimes selwakeup(sip)
108585f190e4SAlfred Perlstein 	struct selinfo *sip;
1086df8bae1dSRodney W. Grimes {
1087512824f8SSeigo Tanimura 	doselwakeup(sip, -1);
1088512824f8SSeigo Tanimura }
1089512824f8SSeigo Tanimura 
1090512824f8SSeigo Tanimura /* Wake up a selecting thread, and set its priority. */
1091512824f8SSeigo Tanimura void
1092512824f8SSeigo Tanimura selwakeuppri(sip, pri)
1093512824f8SSeigo Tanimura 	struct selinfo *sip;
1094512824f8SSeigo Tanimura 	int pri;
1095512824f8SSeigo Tanimura {
1096512824f8SSeigo Tanimura 	doselwakeup(sip, pri);
1097512824f8SSeigo Tanimura }
1098512824f8SSeigo Tanimura 
1099512824f8SSeigo Tanimura /*
1100512824f8SSeigo Tanimura  * Do a wakeup when a selectable event occurs.
1101512824f8SSeigo Tanimura  */
1102512824f8SSeigo Tanimura static void
1103512824f8SSeigo Tanimura doselwakeup(sip, pri)
1104512824f8SSeigo Tanimura 	struct selinfo *sip;
1105512824f8SSeigo Tanimura 	int pri;
1106512824f8SSeigo Tanimura {
1107b40ce416SJulian Elischer 	struct thread *td;
1108df8bae1dSRodney W. Grimes 
110985f190e4SAlfred Perlstein 	mtx_lock(&sellock);
111085f190e4SAlfred Perlstein 	td = sip->si_thread;
111185f190e4SAlfred Perlstein 	if ((sip->si_flags & SI_COLL) != 0) {
1112df8bae1dSRodney W. Grimes 		nselcoll++;
1113df8bae1dSRodney W. Grimes 		sip->si_flags &= ~SI_COLL;
1114512824f8SSeigo Tanimura 		cv_broadcastpri(&selwait, pri);
1115df8bae1dSRodney W. Grimes 	}
111685f190e4SAlfred Perlstein 	if (td == NULL) {
111785f190e4SAlfred Perlstein 		mtx_unlock(&sellock);
1118b40ce416SJulian Elischer 		return;
1119b40ce416SJulian Elischer 	}
112085f190e4SAlfred Perlstein 	TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
112185f190e4SAlfred Perlstein 	sip->si_thread = NULL;
11229ed346baSBosko Milekic 	mtx_lock_spin(&sched_lock);
1123b40ce416SJulian Elischer 	td->td_flags &= ~TDF_SELECT;
112433a9ed9dSJohn Baldwin 	mtx_unlock_spin(&sched_lock);
112544f3b092SJohn Baldwin 	sleepq_remove(td, &selwait);
112685f190e4SAlfred Perlstein 	mtx_unlock(&sellock);
1127df8bae1dSRodney W. Grimes }
1128265fc98fSSeigo Tanimura 
11294d77a549SAlfred Perlstein static void selectinit(void *);
1130265fc98fSSeigo Tanimura SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL)
1131265fc98fSSeigo Tanimura 
1132265fc98fSSeigo Tanimura /* ARGSUSED*/
1133265fc98fSSeigo Tanimura static void
1134265fc98fSSeigo Tanimura selectinit(dummy)
1135265fc98fSSeigo Tanimura 	void *dummy;
1136265fc98fSSeigo Tanimura {
1137265fc98fSSeigo Tanimura 	cv_init(&selwait, "select");
11386008862bSJohn Baldwin 	mtx_init(&sellock, "sellck", NULL, MTX_DEF);
1139265fc98fSSeigo Tanimura }
1140