xref: /freebsd/sys/kern/sys_generic.c (revision a8445737e740901f5f2c8d24c12ef7fc8b00134e)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
39  * $Id: sys_generic.c,v 1.40 1998/08/24 08:39:38 dfr Exp $
40  */
41 
42 #include "opt_ktrace.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/sysproto.h>
47 #include <sys/filedesc.h>
48 #include <sys/filio.h>
49 #include <sys/ttycom.h>
50 #include <sys/fcntl.h>
51 #include <sys/file.h>
52 #include <sys/proc.h>
53 #include <sys/signalvar.h>
54 #include <sys/socketvar.h>
55 #include <sys/uio.h>
56 #include <sys/kernel.h>
57 #include <sys/malloc.h>
58 #include <sys/poll.h>
59 #include <sys/sysent.h>
60 #ifdef KTRACE
61 #include <sys/ktrace.h>
62 #endif
63 
64 #include <machine/limits.h>
65 
66 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
67 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
68 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
69 
70 static int	pollscan __P((struct proc *, struct pollfd *, int));
71 static int	selscan __P((struct proc *, fd_mask **, fd_mask **, int));
72 
73 /*
74  * Read system call.
75  */
76 #ifndef _SYS_SYSPROTO_H_
77 struct read_args {
78 	int	fd;
79 	void	*buf;
80 	size_t	nbyte;
81 };
82 #endif
83 /* ARGSUSED */
84 int
85 read(p, uap)
86 	struct proc *p;
87 	register struct read_args *uap;
88 {
89 	register struct file *fp;
90 	register struct filedesc *fdp = p->p_fd;
91 	struct uio auio;
92 	struct iovec aiov;
93 	long cnt, error = 0;
94 #ifdef KTRACE
95 	struct iovec ktriov;
96 #endif
97 
98 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
99 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
100 	    (fp->f_flag & FREAD) == 0)
101 		return (EBADF);
102 	aiov.iov_base = (caddr_t)uap->buf;
103 	aiov.iov_len = uap->nbyte;
104 	auio.uio_iov = &aiov;
105 	auio.uio_iovcnt = 1;
106 	auio.uio_offset = -1;
107 	if (uap->nbyte > INT_MAX)
108 		return (EINVAL);
109 	auio.uio_resid = uap->nbyte;
110 	auio.uio_rw = UIO_READ;
111 	auio.uio_segflg = UIO_USERSPACE;
112 	auio.uio_procp = p;
113 #ifdef KTRACE
114 	/*
115 	 * if tracing, save a copy of iovec
116 	 */
117 	if (KTRPOINT(p, KTR_GENIO))
118 		ktriov = aiov;
119 #endif
120 	cnt = uap->nbyte;
121 	if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)))
122 		if (auio.uio_resid != cnt && (error == ERESTART ||
123 		    error == EINTR || error == EWOULDBLOCK))
124 			error = 0;
125 	cnt -= auio.uio_resid;
126 #ifdef KTRACE
127 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
128 		ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktriov, cnt, error);
129 #endif
130 	p->p_retval[0] = cnt;
131 	return (error);
132 }
133 
134 /*
135  * Scatter read system call.
136  */
137 #ifndef _SYS_SYSPROTO_H_
138 struct readv_args {
139 	int	fd;
140 	struct	iovec *iovp;
141 	u_int	iovcnt;
142 };
143 #endif
144 int
145 readv(p, uap)
146 	struct proc *p;
147 	register struct readv_args *uap;
148 {
149 	register struct file *fp;
150 	register struct filedesc *fdp = p->p_fd;
151 	struct uio auio;
152 	register struct iovec *iov;
153 	struct iovec *needfree;
154 	struct iovec aiov[UIO_SMALLIOV];
155 	long i, cnt, error = 0;
156 	u_int iovlen;
157 #ifdef KTRACE
158 	struct iovec *ktriov = NULL;
159 #endif
160 
161 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
162 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
163 	    (fp->f_flag & FREAD) == 0)
164 		return (EBADF);
165 	/* note: can't use iovlen until iovcnt is validated */
166 	iovlen = uap->iovcnt * sizeof (struct iovec);
167 	if (uap->iovcnt > UIO_SMALLIOV) {
168 		if (uap->iovcnt > UIO_MAXIOV)
169 			return (EINVAL);
170 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
171 		needfree = iov;
172 	} else {
173 		iov = aiov;
174 		needfree = NULL;
175 	}
176 	auio.uio_iov = iov;
177 	auio.uio_iovcnt = uap->iovcnt;
178 	auio.uio_rw = UIO_READ;
179 	auio.uio_segflg = UIO_USERSPACE;
180 	auio.uio_procp = p;
181 	auio.uio_offset = -1;
182 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
183 		goto done;
184 	auio.uio_resid = 0;
185 	for (i = 0; i < uap->iovcnt; i++) {
186 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
187 			error = EINVAL;
188 			goto done;
189 		}
190 		auio.uio_resid += iov->iov_len;
191 		iov++;
192 	}
193 #ifdef KTRACE
194 	/*
195 	 * if tracing, save a copy of iovec
196 	 */
197 	if (KTRPOINT(p, KTR_GENIO))  {
198 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
199 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
200 	}
201 #endif
202 	cnt = auio.uio_resid;
203 	if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)))
204 		if (auio.uio_resid != cnt && (error == ERESTART ||
205 		    error == EINTR || error == EWOULDBLOCK))
206 			error = 0;
207 	cnt -= auio.uio_resid;
208 #ifdef KTRACE
209 	if (ktriov != NULL) {
210 		if (error == 0)
211 			ktrgenio(p->p_tracep, uap->fd, UIO_READ, ktriov,
212 			    cnt, error);
213 		FREE(ktriov, M_TEMP);
214 	}
215 #endif
216 	p->p_retval[0] = cnt;
217 done:
218 	if (needfree)
219 		FREE(needfree, M_IOV);
220 	return (error);
221 }
222 
223 /*
224  * Write system call
225  */
226 #ifndef _SYS_SYSPROTO_H_
227 struct write_args {
228 	int	fd;
229 	const void *buf;
230 	size_t	nbyte;
231 };
232 #endif
233 int
234 write(p, uap)
235 	struct proc *p;
236 	register struct write_args *uap;
237 {
238 	register struct file *fp;
239 	register struct filedesc *fdp = p->p_fd;
240 	struct uio auio;
241 	struct iovec aiov;
242 	long cnt, error = 0;
243 #ifdef KTRACE
244 	struct iovec ktriov;
245 #endif
246 
247 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
248 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
249 	    (fp->f_flag & FWRITE) == 0)
250 		return (EBADF);
251 	aiov.iov_base = (caddr_t)uap->buf;
252 	aiov.iov_len = uap->nbyte;
253 	auio.uio_iov = &aiov;
254 	auio.uio_iovcnt = 1;
255 	auio.uio_offset = -1;
256 	if (uap->nbyte > INT_MAX)
257 		return (EINVAL);
258 	auio.uio_resid = uap->nbyte;
259 	auio.uio_rw = UIO_WRITE;
260 	auio.uio_segflg = UIO_USERSPACE;
261 	auio.uio_procp = p;
262 #ifdef KTRACE
263 	/*
264 	 * if tracing, save a copy of iovec
265 	 */
266 	if (KTRPOINT(p, KTR_GENIO))
267 		ktriov = aiov;
268 #endif
269 	cnt = uap->nbyte;
270 	if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) {
271 		if (auio.uio_resid != cnt && (error == ERESTART ||
272 		    error == EINTR || error == EWOULDBLOCK))
273 			error = 0;
274 		if (error == EPIPE)
275 			psignal(p, SIGPIPE);
276 	}
277 	cnt -= auio.uio_resid;
278 #ifdef KTRACE
279 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
280 		ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
281 		    &ktriov, cnt, error);
282 #endif
283 	p->p_retval[0] = cnt;
284 	return (error);
285 }
286 
287 /*
288  * Gather write system call
289  */
290 #ifndef _SYS_SYSPROTO_H_
291 struct writev_args {
292 	int	fd;
293 	struct	iovec *iovp;
294 	u_int	iovcnt;
295 };
296 #endif
297 int
298 writev(p, uap)
299 	struct proc *p;
300 	register struct writev_args *uap;
301 {
302 	register struct file *fp;
303 	register struct filedesc *fdp = p->p_fd;
304 	struct uio auio;
305 	register struct iovec *iov;
306 	struct iovec *needfree;
307 	struct iovec aiov[UIO_SMALLIOV];
308 	long i, cnt, error = 0;
309 	u_int iovlen;
310 #ifdef KTRACE
311 	struct iovec *ktriov = NULL;
312 #endif
313 
314 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
315 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
316 	    (fp->f_flag & FWRITE) == 0)
317 		return (EBADF);
318 	/* note: can't use iovlen until iovcnt is validated */
319 	iovlen = uap->iovcnt * sizeof (struct iovec);
320 	if (uap->iovcnt > UIO_SMALLIOV) {
321 		if (uap->iovcnt > UIO_MAXIOV)
322 			return (EINVAL);
323 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
324 		needfree = iov;
325 	} else {
326 		iov = aiov;
327 		needfree = NULL;
328 	}
329 	auio.uio_iov = iov;
330 	auio.uio_iovcnt = uap->iovcnt;
331 	auio.uio_rw = UIO_WRITE;
332 	auio.uio_segflg = UIO_USERSPACE;
333 	auio.uio_procp = p;
334 	auio.uio_offset = -1;
335 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
336 		goto done;
337 	auio.uio_resid = 0;
338 	for (i = 0; i < uap->iovcnt; i++) {
339 		if (iov->iov_len > INT_MAX - auio.uio_resid) {
340 			error = EINVAL;
341 			goto done;
342 		}
343 		auio.uio_resid += iov->iov_len;
344 		iov++;
345 	}
346 #ifdef KTRACE
347 	/*
348 	 * if tracing, save a copy of iovec
349 	 */
350 	if (KTRPOINT(p, KTR_GENIO))  {
351 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
352 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
353 	}
354 #endif
355 	cnt = auio.uio_resid;
356 	if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) {
357 		if (auio.uio_resid != cnt && (error == ERESTART ||
358 		    error == EINTR || error == EWOULDBLOCK))
359 			error = 0;
360 		if (error == EPIPE)
361 			psignal(p, SIGPIPE);
362 	}
363 	cnt -= auio.uio_resid;
364 #ifdef KTRACE
365 	if (ktriov != NULL) {
366 		if (error == 0)
367 			ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
368 				ktriov, cnt, error);
369 		FREE(ktriov, M_TEMP);
370 	}
371 #endif
372 	p->p_retval[0] = cnt;
373 done:
374 	if (needfree)
375 		FREE(needfree, M_IOV);
376 	return (error);
377 }
378 
379 /*
380  * Ioctl system call
381  */
382 #ifndef _SYS_SYSPROTO_H_
383 struct ioctl_args {
384 	int	fd;
385 	u_long	com;
386 	caddr_t	data;
387 };
388 #endif
389 /* ARGSUSED */
390 int
391 ioctl(p, uap)
392 	struct proc *p;
393 	register struct ioctl_args *uap;
394 {
395 	register struct file *fp;
396 	register struct filedesc *fdp;
397 	register u_long com;
398 	int error;
399 	register u_int size;
400 	caddr_t data, memp;
401 	int tmp;
402 #define STK_PARAMS	128
403 	char stkbuf[STK_PARAMS];
404 
405 	fdp = p->p_fd;
406 	if ((u_int)uap->fd >= fdp->fd_nfiles ||
407 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
408 		return (EBADF);
409 
410 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
411 		return (EBADF);
412 
413 	switch (com = uap->com) {
414 	case FIONCLEX:
415 		fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
416 		return (0);
417 	case FIOCLEX:
418 		fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
419 		return (0);
420 	}
421 
422 	/*
423 	 * Interpret high order word to find amount of data to be
424 	 * copied to/from the user's address space.
425 	 */
426 	size = IOCPARM_LEN(com);
427 	if (size > IOCPARM_MAX)
428 		return (ENOTTY);
429 	memp = NULL;
430 	if (size > sizeof (stkbuf)) {
431 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
432 		data = memp;
433 	} else
434 		data = stkbuf;
435 	if (com&IOC_IN) {
436 		if (size) {
437 			error = copyin(uap->data, data, (u_int)size);
438 			if (error) {
439 				if (memp)
440 					free(memp, M_IOCTLOPS);
441 				return (error);
442 			}
443 		} else
444 			*(caddr_t *)data = uap->data;
445 	} else if ((com&IOC_OUT) && size)
446 		/*
447 		 * Zero the buffer so the user always
448 		 * gets back something deterministic.
449 		 */
450 		bzero(data, size);
451 	else if (com&IOC_VOID)
452 		*(caddr_t *)data = uap->data;
453 
454 	switch (com) {
455 
456 	case FIONBIO:
457 		if ((tmp = *(int *)data))
458 			fp->f_flag |= FNONBLOCK;
459 		else
460 			fp->f_flag &= ~FNONBLOCK;
461 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
462 		break;
463 
464 	case FIOASYNC:
465 		if ((tmp = *(int *)data))
466 			fp->f_flag |= FASYNC;
467 		else
468 			fp->f_flag &= ~FASYNC;
469 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
470 		break;
471 
472 	case FIOSETOWN:
473 		tmp = *(int *)data;
474 		if (fp->f_type == DTYPE_SOCKET) {
475 			((struct socket *)fp->f_data)->so_pgid = tmp;
476 			error = 0;
477 			break;
478 		}
479 		if (tmp <= 0) {
480 			tmp = -tmp;
481 		} else {
482 			struct proc *p1 = pfind(tmp);
483 			if (p1 == 0) {
484 				error = ESRCH;
485 				break;
486 			}
487 			tmp = p1->p_pgrp->pg_id;
488 		}
489 		error = (*fp->f_ops->fo_ioctl)
490 			(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
491 		break;
492 
493 	case FIOGETOWN:
494 		if (fp->f_type == DTYPE_SOCKET) {
495 			error = 0;
496 			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
497 			break;
498 		}
499 		error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p);
500 		*(int *)data = -*(int *)data;
501 		break;
502 
503 	default:
504 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
505 		/*
506 		 * Copy any data to user, size was
507 		 * already set and checked above.
508 		 */
509 		if (error == 0 && (com&IOC_OUT) && size)
510 			error = copyout(data, uap->data, (u_int)size);
511 		break;
512 	}
513 	if (memp)
514 		free(memp, M_IOCTLOPS);
515 	return (error);
516 }
517 
518 static int	nselcoll;
519 int	selwait;
520 
521 /*
522  * Select system call.
523  */
524 #ifndef _SYS_SYSPROTO_H_
525 struct select_args {
526 	int	nd;
527 	fd_set	*in, *ou, *ex;
528 	struct	timeval *tv;
529 };
530 #endif
531 int
532 select(p, uap)
533 	register struct proc *p;
534 	register struct select_args *uap;
535 {
536 	/*
537 	 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
538 	 * infds with the new FD_SETSIZE of 1024, and more than enough for
539 	 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
540 	 * of 256.
541 	 */
542 	fd_mask s_selbits[howmany(2048, NFDBITS)];
543 	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
544 	struct timeval atv, rtv, ttv;
545 	int s, ncoll, error, timo;
546 	u_int nbufbytes, ncpbytes, nfdbits;
547 
548 	if (uap->nd < 0)
549 		return (EINVAL);
550 	if (uap->nd > p->p_fd->fd_nfiles)
551 		uap->nd = p->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
552 
553 	/*
554 	 * Allocate just enough bits for the non-null fd_sets.  Use the
555 	 * preallocated auto buffer if possible.
556 	 */
557 	nfdbits = roundup(uap->nd, NFDBITS);
558 	ncpbytes = nfdbits / NBBY;
559 	nbufbytes = 0;
560 	if (uap->in != NULL)
561 		nbufbytes += 2 * ncpbytes;
562 	if (uap->ou != NULL)
563 		nbufbytes += 2 * ncpbytes;
564 	if (uap->ex != NULL)
565 		nbufbytes += 2 * ncpbytes;
566 	if (nbufbytes <= sizeof s_selbits)
567 		selbits = &s_selbits[0];
568 	else
569 		selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
570 
571 	/*
572 	 * Assign pointers into the bit buffers and fetch the input bits.
573 	 * Put the output buffers together so that they can be bzeroed
574 	 * together.
575 	 */
576 	sbp = selbits;
577 #define	getbits(name, x) \
578 	do {								\
579 		if (uap->name == NULL)					\
580 			ibits[x] = NULL;				\
581 		else {							\
582 			ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;	\
583 			obits[x] = sbp;					\
584 			sbp += ncpbytes / sizeof *sbp;			\
585 			error = copyin(uap->name, ibits[x], ncpbytes);	\
586 			if (error != 0)					\
587 				goto done;				\
588 		}							\
589 	} while (0)
590 	getbits(in, 0);
591 	getbits(ou, 1);
592 	getbits(ex, 2);
593 #undef	getbits
594 	if (nbufbytes != 0)
595 		bzero(selbits, nbufbytes / 2);
596 
597 	if (uap->tv) {
598 		error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
599 			sizeof (atv));
600 		if (error)
601 			goto done;
602 		if (itimerfix(&atv)) {
603 			error = EINVAL;
604 			goto done;
605 		}
606 		getmicrouptime(&rtv);
607 		timevaladd(&atv, &rtv);
608 	} else
609 		atv.tv_sec = 0;
610 	timo = 0;
611 retry:
612 	ncoll = nselcoll;
613 	p->p_flag |= P_SELECT;
614 	error = selscan(p, ibits, obits, uap->nd);
615 	if (error || p->p_retval[0])
616 		goto done;
617 	if (atv.tv_sec) {
618 		getmicrouptime(&rtv);
619 		if (timevalcmp(&rtv, &atv, >=))
620 			goto done;
621 		ttv = atv;
622 		timevalsub(&ttv, &rtv);
623 		timo = ttv.tv_sec > 24 * 60 * 60 ?
624 		    24 * 60 * 60 * hz : tvtohz(&ttv);
625 	}
626 	s = splhigh();
627 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
628 		splx(s);
629 		goto retry;
630 	}
631 	p->p_flag &= ~P_SELECT;
632 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
633 	splx(s);
634 	if (error == 0)
635 		goto retry;
636 done:
637 	p->p_flag &= ~P_SELECT;
638 	/* select is not restarted after signals... */
639 	if (error == ERESTART)
640 		error = EINTR;
641 	if (error == EWOULDBLOCK)
642 		error = 0;
643 #define	putbits(name, x) \
644 	if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
645 		error = error2;
646 	if (error == 0) {
647 		int error2;
648 
649 		putbits(in, 0);
650 		putbits(ou, 1);
651 		putbits(ex, 2);
652 #undef putbits
653 	}
654 	if (selbits != &s_selbits[0])
655 		free(selbits, M_SELECT);
656 	return (error);
657 }
658 
659 static int
660 selscan(p, ibits, obits, nfd)
661 	struct proc *p;
662 	fd_mask **ibits, **obits;
663 	int nfd;
664 {
665 	register struct filedesc *fdp = p->p_fd;
666 	register int msk, i, j, fd;
667 	register fd_mask bits;
668 	struct file *fp;
669 	int n = 0;
670 	/* Note: backend also returns POLLHUP/POLLERR if appropriate. */
671 	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
672 
673 	for (msk = 0; msk < 3; msk++) {
674 		if (ibits[msk] == NULL)
675 			continue;
676 		for (i = 0; i < nfd; i += NFDBITS) {
677 			bits = ibits[msk][i/NFDBITS];
678 			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
679 				bits &= ~(1 << j);
680 				fp = fdp->fd_ofiles[fd];
681 				if (fp == NULL)
682 					return (EBADF);
683 				if ((*fp->f_ops->fo_poll)(fp, flag[msk],
684 				    fp->f_cred, p)) {
685 					obits[msk][(fd)/NFDBITS] |=
686 						(1 << ((fd) % NFDBITS));
687 					n++;
688 				}
689 			}
690 		}
691 	}
692 	p->p_retval[0] = n;
693 	return (0);
694 }
695 
696 /*
697  * Poll system call.
698  */
699 #ifndef _SYS_SYSPROTO_H_
700 struct poll_args {
701 	struct pollfd *fds;
702 	u_int	nfds;
703 	int	timeout;
704 };
705 #endif
706 int
707 poll(p, uap)
708 	register struct proc *p;
709 	register struct poll_args *uap;
710 {
711 	caddr_t bits;
712 	char smallbits[32 * sizeof(struct pollfd)];
713 	struct timeval atv, rtv, ttv;
714 	int s, ncoll, error = 0, timo;
715 	size_t ni;
716 
717 	if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
718 		/* forgiving; slightly wrong */
719 		SCARG(uap, nfds) = p->p_fd->fd_nfiles;
720 	}
721 	ni = SCARG(uap, nfds) * sizeof(struct pollfd);
722 	if (ni > sizeof(smallbits))
723 		bits = malloc(ni, M_TEMP, M_WAITOK);
724 	else
725 		bits = smallbits;
726 	error = copyin(SCARG(uap, fds), bits, ni);
727 	if (error)
728 		goto done;
729 	if (SCARG(uap, timeout) != INFTIM) {
730 		atv.tv_sec = SCARG(uap, timeout) / 1000;
731 		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
732 		if (itimerfix(&atv)) {
733 			error = EINVAL;
734 			goto done;
735 		}
736 		getmicrouptime(&rtv);
737 		timevaladd(&atv, &rtv);
738 	} else
739 		atv.tv_sec = 0;
740 	timo = 0;
741 retry:
742 	ncoll = nselcoll;
743 	p->p_flag |= P_SELECT;
744 	error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds));
745 	if (error || p->p_retval[0])
746 		goto done;
747 	if (atv.tv_sec) {
748 		getmicrouptime(&rtv);
749 		if (timevalcmp(&rtv, &atv, >=))
750 			goto done;
751 		ttv = atv;
752 		timevalsub(&ttv, &rtv);
753 		timo = ttv.tv_sec > 24 * 60 * 60 ?
754 		    24 * 60 * 60 * hz : tvtohz(&ttv);
755 	}
756 	s = splhigh();
757 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
758 		splx(s);
759 		goto retry;
760 	}
761 	p->p_flag &= ~P_SELECT;
762 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
763 	splx(s);
764 	if (error == 0)
765 		goto retry;
766 done:
767 	p->p_flag &= ~P_SELECT;
768 	/* poll is not restarted after signals... */
769 	if (error == ERESTART)
770 		error = EINTR;
771 	if (error == EWOULDBLOCK)
772 		error = 0;
773 	if (error == 0) {
774 		error = copyout(bits, SCARG(uap, fds), ni);
775 		if (error)
776 			goto out;
777 	}
778 out:
779 	if (ni > sizeof(smallbits))
780 		free(bits, M_TEMP);
781 	return (error);
782 }
783 
784 static int
785 pollscan(p, fds, nfd)
786 	struct proc *p;
787 	struct pollfd *fds;
788 	int nfd;
789 {
790 	register struct filedesc *fdp = p->p_fd;
791 	int i;
792 	struct file *fp;
793 	int n = 0;
794 
795 	for (i = 0; i < nfd; i++, fds++) {
796 		if ((u_int)fds->fd >= fdp->fd_nfiles) {
797 			fds->revents = POLLNVAL;
798 			n++;
799 		} else {
800 			fp = fdp->fd_ofiles[fds->fd];
801 			if (fp == 0) {
802 				fds->revents = POLLNVAL;
803 				n++;
804 			} else {
805 				/*
806 				 * Note: backend also returns POLLHUP and
807 				 * POLLERR if appropriate.
808 				 */
809 				fds->revents = (*fp->f_ops->fo_poll)(fp,
810 				    fds->events, fp->f_cred, p);
811 				if (fds->revents != 0)
812 					n++;
813 			}
814 		}
815 	}
816 	p->p_retval[0] = n;
817 	return (0);
818 }
819 
820 /*
821  * OpenBSD poll system call.
822  * XXX this isn't quite a true representation..  OpenBSD uses select ops.
823  */
824 #ifndef _SYS_SYSPROTO_H_
825 struct openbsd_poll_args {
826 	struct pollfd *fds;
827 	u_int	nfds;
828 	int	timeout;
829 };
830 #endif
831 int
832 openbsd_poll(p, uap)
833 	register struct proc *p;
834 	register struct openbsd_poll_args *uap;
835 {
836 	return (poll(p, (struct poll_args *)uap));
837 }
838 
839 /*ARGSUSED*/
840 int
841 seltrue(dev, events, p)
842 	dev_t dev;
843 	int events;
844 	struct proc *p;
845 {
846 
847 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
848 }
849 
850 /*
851  * Record a select request.
852  */
853 void
854 selrecord(selector, sip)
855 	struct proc *selector;
856 	struct selinfo *sip;
857 {
858 	struct proc *p;
859 	pid_t mypid;
860 
861 	mypid = selector->p_pid;
862 	if (sip->si_pid == mypid)
863 		return;
864 	if (sip->si_pid && (p = pfind(sip->si_pid)) &&
865 	    p->p_wchan == (caddr_t)&selwait)
866 		sip->si_flags |= SI_COLL;
867 	else
868 		sip->si_pid = mypid;
869 }
870 
871 /*
872  * Do a wakeup when a selectable event occurs.
873  */
874 void
875 selwakeup(sip)
876 	register struct selinfo *sip;
877 {
878 	register struct proc *p;
879 	int s;
880 
881 	if (sip->si_pid == 0)
882 		return;
883 	if (sip->si_flags & SI_COLL) {
884 		nselcoll++;
885 		sip->si_flags &= ~SI_COLL;
886 		wakeup((caddr_t)&selwait);
887 	}
888 	p = pfind(sip->si_pid);
889 	sip->si_pid = 0;
890 	if (p != NULL) {
891 		s = splhigh();
892 		if (p->p_wchan == (caddr_t)&selwait) {
893 			if (p->p_stat == SSLEEP)
894 				setrunnable(p);
895 			else
896 				unsleep(p);
897 		} else if (p->p_flag & P_SELECT)
898 			p->p_flag &= ~P_SELECT;
899 		splx(s);
900 	}
901 }
902