xref: /freebsd/sys/kern/sys_generic.c (revision 11afcc8f9f96d657b8e6f7547c02c1957331fc96)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
39  * $Id: sys_generic.c,v 1.38 1998/05/17 11:52:51 phk Exp $
40  */
41 
42 #include "opt_ktrace.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/sysproto.h>
47 #include <sys/filedesc.h>
48 #include <sys/filio.h>
49 #include <sys/ttycom.h>
50 #include <sys/fcntl.h>
51 #include <sys/file.h>
52 #include <sys/proc.h>
53 #include <sys/signalvar.h>
54 #include <sys/socketvar.h>
55 #include <sys/uio.h>
56 #include <sys/kernel.h>
57 #include <sys/malloc.h>
58 #include <sys/poll.h>
59 #include <sys/sysent.h>
60 #ifdef KTRACE
61 #include <sys/ktrace.h>
62 #endif
63 
64 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
65 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
66 MALLOC_DEFINE(M_IOV, "iov", "large iov's");
67 
68 static int	pollscan __P((struct proc *, struct pollfd *, int));
69 static int	selscan __P((struct proc *, fd_mask **, fd_mask **, int));
70 
71 /*
72  * Read system call.
73  */
74 #ifndef _SYS_SYSPROTO_H_
75 struct read_args {
76 	int	fd;
77 	char	*buf;
78 	u_int	nbyte;
79 };
80 #endif
81 /* ARGSUSED */
82 int
83 read(p, uap)
84 	struct proc *p;
85 	register struct read_args *uap;
86 {
87 	register struct file *fp;
88 	register struct filedesc *fdp = p->p_fd;
89 	struct uio auio;
90 	struct iovec aiov;
91 	long cnt, error = 0;
92 #ifdef KTRACE
93 	struct iovec ktriov;
94 #endif
95 
96 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
97 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
98 	    (fp->f_flag & FREAD) == 0)
99 		return (EBADF);
100 	aiov.iov_base = (caddr_t)uap->buf;
101 	aiov.iov_len = uap->nbyte;
102 	auio.uio_iov = &aiov;
103 	auio.uio_iovcnt = 1;
104 	auio.uio_offset = -1;
105 
106 	auio.uio_resid = uap->nbyte;
107 	if (auio.uio_resid < 0)
108 		return (EINVAL);
109 
110 	auio.uio_rw = UIO_READ;
111 	auio.uio_segflg = UIO_USERSPACE;
112 	auio.uio_procp = p;
113 #ifdef KTRACE
114 	/*
115 	 * if tracing, save a copy of iovec
116 	 */
117 	if (KTRPOINT(p, KTR_GENIO))
118 		ktriov = aiov;
119 #endif
120 	cnt = uap->nbyte;
121 	if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)))
122 		if (auio.uio_resid != cnt && (error == ERESTART ||
123 		    error == EINTR || error == EWOULDBLOCK))
124 			error = 0;
125 	cnt -= auio.uio_resid;
126 #ifdef KTRACE
127 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
128 		ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktriov, cnt, error);
129 #endif
130 	p->p_retval[0] = cnt;
131 	return (error);
132 }
133 
134 /*
135  * Scatter read system call.
136  */
137 #ifndef _SYS_SYSPROTO_H_
138 struct readv_args {
139 	int	fd;
140 	struct	iovec *iovp;
141 	u_int	iovcnt;
142 };
143 #endif
144 int
145 readv(p, uap)
146 	struct proc *p;
147 	register struct readv_args *uap;
148 {
149 	register struct file *fp;
150 	register struct filedesc *fdp = p->p_fd;
151 	struct uio auio;
152 	register struct iovec *iov;
153 	struct iovec *needfree;
154 	struct iovec aiov[UIO_SMALLIOV];
155 	long i, cnt, error = 0;
156 	u_int iovlen;
157 #ifdef KTRACE
158 	struct iovec *ktriov = NULL;
159 #endif
160 
161 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
162 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
163 	    (fp->f_flag & FREAD) == 0)
164 		return (EBADF);
165 	/* note: can't use iovlen until iovcnt is validated */
166 	iovlen = uap->iovcnt * sizeof (struct iovec);
167 	if (uap->iovcnt > UIO_SMALLIOV) {
168 		if (uap->iovcnt > UIO_MAXIOV)
169 			return (EINVAL);
170 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
171 		needfree = iov;
172 	} else {
173 		iov = aiov;
174 		needfree = NULL;
175 	}
176 	auio.uio_iov = iov;
177 	auio.uio_iovcnt = uap->iovcnt;
178 	auio.uio_rw = UIO_READ;
179 	auio.uio_segflg = UIO_USERSPACE;
180 	auio.uio_procp = p;
181 	auio.uio_offset = -1;
182 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
183 		goto done;
184 	auio.uio_resid = 0;
185 	for (i = 0; i < uap->iovcnt; i++) {
186 		auio.uio_resid += iov->iov_len;
187 		if (auio.uio_resid < 0) {
188 			error = EINVAL;
189 			goto done;
190 		}
191 		iov++;
192 	}
193 #ifdef KTRACE
194 	/*
195 	 * if tracing, save a copy of iovec
196 	 */
197 	if (KTRPOINT(p, KTR_GENIO))  {
198 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
199 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
200 	}
201 #endif
202 	cnt = auio.uio_resid;
203 	if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)))
204 		if (auio.uio_resid != cnt && (error == ERESTART ||
205 		    error == EINTR || error == EWOULDBLOCK))
206 			error = 0;
207 	cnt -= auio.uio_resid;
208 #ifdef KTRACE
209 	if (ktriov != NULL) {
210 		if (error == 0)
211 			ktrgenio(p->p_tracep, uap->fd, UIO_READ, ktriov,
212 			    cnt, error);
213 		FREE(ktriov, M_TEMP);
214 	}
215 #endif
216 	p->p_retval[0] = cnt;
217 done:
218 	if (needfree)
219 		FREE(needfree, M_IOV);
220 	return (error);
221 }
222 
223 /*
224  * Write system call
225  */
226 #ifndef _SYS_SYSPROTO_H_
227 struct write_args {
228 	int	fd;
229 	char	*buf;
230 	u_int	nbyte;
231 };
232 #endif
233 int
234 write(p, uap)
235 	struct proc *p;
236 	register struct write_args *uap;
237 {
238 	register struct file *fp;
239 	register struct filedesc *fdp = p->p_fd;
240 	struct uio auio;
241 	struct iovec aiov;
242 	long cnt, error = 0;
243 #ifdef KTRACE
244 	struct iovec ktriov;
245 #endif
246 
247 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
248 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
249 	    (fp->f_flag & FWRITE) == 0)
250 		return (EBADF);
251 	aiov.iov_base = (caddr_t)uap->buf;
252 	aiov.iov_len = uap->nbyte;
253 	auio.uio_iov = &aiov;
254 	auio.uio_iovcnt = 1;
255 	auio.uio_offset = -1;
256 	auio.uio_resid = uap->nbyte;
257 	auio.uio_rw = UIO_WRITE;
258 	auio.uio_segflg = UIO_USERSPACE;
259 	auio.uio_procp = p;
260 #ifdef KTRACE
261 	/*
262 	 * if tracing, save a copy of iovec
263 	 */
264 	if (KTRPOINT(p, KTR_GENIO))
265 		ktriov = aiov;
266 #endif
267 	cnt = uap->nbyte;
268 	if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) {
269 		if (auio.uio_resid != cnt && (error == ERESTART ||
270 		    error == EINTR || error == EWOULDBLOCK))
271 			error = 0;
272 		if (error == EPIPE)
273 			psignal(p, SIGPIPE);
274 	}
275 	cnt -= auio.uio_resid;
276 #ifdef KTRACE
277 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
278 		ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
279 		    &ktriov, cnt, error);
280 #endif
281 	p->p_retval[0] = cnt;
282 	return (error);
283 }
284 
285 /*
286  * Gather write system call
287  */
288 #ifndef _SYS_SYSPROTO_H_
289 struct writev_args {
290 	int	fd;
291 	struct	iovec *iovp;
292 	u_int	iovcnt;
293 };
294 #endif
295 int
296 writev(p, uap)
297 	struct proc *p;
298 	register struct writev_args *uap;
299 {
300 	register struct file *fp;
301 	register struct filedesc *fdp = p->p_fd;
302 	struct uio auio;
303 	register struct iovec *iov;
304 	struct iovec *needfree;
305 	struct iovec aiov[UIO_SMALLIOV];
306 	long i, cnt, error = 0;
307 	u_int iovlen;
308 #ifdef KTRACE
309 	struct iovec *ktriov = NULL;
310 #endif
311 
312 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
313 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
314 	    (fp->f_flag & FWRITE) == 0)
315 		return (EBADF);
316 	/* note: can't use iovlen until iovcnt is validated */
317 	iovlen = uap->iovcnt * sizeof (struct iovec);
318 	if (uap->iovcnt > UIO_SMALLIOV) {
319 		if (uap->iovcnt > UIO_MAXIOV)
320 			return (EINVAL);
321 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
322 		needfree = iov;
323 	} else {
324 		iov = aiov;
325 		needfree = NULL;
326 	}
327 	auio.uio_iov = iov;
328 	auio.uio_iovcnt = uap->iovcnt;
329 	auio.uio_rw = UIO_WRITE;
330 	auio.uio_segflg = UIO_USERSPACE;
331 	auio.uio_procp = p;
332 	auio.uio_offset = -1;
333 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
334 		goto done;
335 	auio.uio_resid = 0;
336 	for (i = 0; i < uap->iovcnt; i++) {
337 		auio.uio_resid += iov->iov_len;
338 		if (auio.uio_resid < 0) {
339 			error = EINVAL;
340 			goto done;
341 		}
342 		iov++;
343 	}
344 #ifdef KTRACE
345 	/*
346 	 * if tracing, save a copy of iovec
347 	 */
348 	if (KTRPOINT(p, KTR_GENIO))  {
349 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
350 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
351 	}
352 #endif
353 	cnt = auio.uio_resid;
354 	if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) {
355 		if (auio.uio_resid != cnt && (error == ERESTART ||
356 		    error == EINTR || error == EWOULDBLOCK))
357 			error = 0;
358 		if (error == EPIPE)
359 			psignal(p, SIGPIPE);
360 	}
361 	cnt -= auio.uio_resid;
362 #ifdef KTRACE
363 	if (ktriov != NULL) {
364 		if (error == 0)
365 			ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
366 				ktriov, cnt, error);
367 		FREE(ktriov, M_TEMP);
368 	}
369 #endif
370 	p->p_retval[0] = cnt;
371 done:
372 	if (needfree)
373 		FREE(needfree, M_IOV);
374 	return (error);
375 }
376 
377 /*
378  * Ioctl system call
379  */
380 #ifndef _SYS_SYSPROTO_H_
381 struct ioctl_args {
382 	int	fd;
383 	int	com;
384 	caddr_t	data;
385 };
386 #endif
387 /* ARGSUSED */
388 int
389 ioctl(p, uap)
390 	struct proc *p;
391 	register struct ioctl_args *uap;
392 {
393 	register struct file *fp;
394 	register struct filedesc *fdp;
395 	register u_long com;
396 	int error;
397 	register u_int size;
398 	caddr_t data, memp;
399 	int tmp;
400 #define STK_PARAMS	128
401 	char stkbuf[STK_PARAMS];
402 
403 	fdp = p->p_fd;
404 	if ((u_int)uap->fd >= fdp->fd_nfiles ||
405 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
406 		return (EBADF);
407 
408 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
409 		return (EBADF);
410 
411 	switch (com = uap->com) {
412 	case FIONCLEX:
413 		fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
414 		return (0);
415 	case FIOCLEX:
416 		fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
417 		return (0);
418 	}
419 
420 	/*
421 	 * Interpret high order word to find amount of data to be
422 	 * copied to/from the user's address space.
423 	 */
424 	size = IOCPARM_LEN(com);
425 	if (size > IOCPARM_MAX)
426 		return (ENOTTY);
427 	memp = NULL;
428 	if (size > sizeof (stkbuf)) {
429 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
430 		data = memp;
431 	} else
432 		data = stkbuf;
433 	if (com&IOC_IN) {
434 		if (size) {
435 			error = copyin(uap->data, data, (u_int)size);
436 			if (error) {
437 				if (memp)
438 					free(memp, M_IOCTLOPS);
439 				return (error);
440 			}
441 		} else
442 			*(caddr_t *)data = uap->data;
443 	} else if ((com&IOC_OUT) && size)
444 		/*
445 		 * Zero the buffer so the user always
446 		 * gets back something deterministic.
447 		 */
448 		bzero(data, size);
449 	else if (com&IOC_VOID)
450 		*(caddr_t *)data = uap->data;
451 
452 	switch (com) {
453 
454 	case FIONBIO:
455 		if ((tmp = *(int *)data))
456 			fp->f_flag |= FNONBLOCK;
457 		else
458 			fp->f_flag &= ~FNONBLOCK;
459 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
460 		break;
461 
462 	case FIOASYNC:
463 		if ((tmp = *(int *)data))
464 			fp->f_flag |= FASYNC;
465 		else
466 			fp->f_flag &= ~FASYNC;
467 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
468 		break;
469 
470 	case FIOSETOWN:
471 		tmp = *(int *)data;
472 		if (fp->f_type == DTYPE_SOCKET) {
473 			((struct socket *)fp->f_data)->so_pgid = tmp;
474 			error = 0;
475 			break;
476 		}
477 		if (tmp <= 0) {
478 			tmp = -tmp;
479 		} else {
480 			struct proc *p1 = pfind(tmp);
481 			if (p1 == 0) {
482 				error = ESRCH;
483 				break;
484 			}
485 			tmp = p1->p_pgrp->pg_id;
486 		}
487 		error = (*fp->f_ops->fo_ioctl)
488 			(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
489 		break;
490 
491 	case FIOGETOWN:
492 		if (fp->f_type == DTYPE_SOCKET) {
493 			error = 0;
494 			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
495 			break;
496 		}
497 		error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p);
498 		*(int *)data = -*(int *)data;
499 		break;
500 
501 	default:
502 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
503 		/*
504 		 * Copy any data to user, size was
505 		 * already set and checked above.
506 		 */
507 		if (error == 0 && (com&IOC_OUT) && size)
508 			error = copyout(data, uap->data, (u_int)size);
509 		break;
510 	}
511 	if (memp)
512 		free(memp, M_IOCTLOPS);
513 	return (error);
514 }
515 
516 static int	nselcoll;
517 int	selwait;
518 
519 /*
520  * Select system call.
521  */
522 #ifndef _SYS_SYSPROTO_H_
523 struct select_args {
524 	int	nd;
525 	fd_set	*in, *ou, *ex;
526 	struct	timeval *tv;
527 };
528 #endif
529 int
530 select(p, uap)
531 	register struct proc *p;
532 	register struct select_args *uap;
533 {
534 	/*
535 	 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
536 	 * infds with the new FD_SETSIZE of 1024, and more than enough for
537 	 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
538 	 * of 256.
539 	 */
540 	fd_mask s_selbits[howmany(2048, NFDBITS)];
541 	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
542 	struct timeval atv, rtv, ttv;
543 	int s, ncoll, error, timo;
544 	u_int nbufbytes, ncpbytes, nfdbits;
545 
546 	if (uap->nd < 0)
547 		return (EINVAL);
548 	if (uap->nd > p->p_fd->fd_nfiles)
549 		uap->nd = p->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
550 
551 	/*
552 	 * Allocate just enough bits for the non-null fd_sets.  Use the
553 	 * preallocated auto buffer if possible.
554 	 */
555 	nfdbits = roundup(uap->nd, NFDBITS);
556 	ncpbytes = nfdbits / NBBY;
557 	nbufbytes = 0;
558 	if (uap->in != NULL)
559 		nbufbytes += 2 * ncpbytes;
560 	if (uap->ou != NULL)
561 		nbufbytes += 2 * ncpbytes;
562 	if (uap->ex != NULL)
563 		nbufbytes += 2 * ncpbytes;
564 	if (nbufbytes <= sizeof s_selbits)
565 		selbits = &s_selbits[0];
566 	else
567 		selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
568 
569 	/*
570 	 * Assign pointers into the bit buffers and fetch the input bits.
571 	 * Put the output buffers together so that they can be bzeroed
572 	 * together.
573 	 */
574 	sbp = selbits;
575 #define	getbits(name, x) \
576 	do {								\
577 		if (uap->name == NULL)					\
578 			ibits[x] = NULL;				\
579 		else {							\
580 			ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;	\
581 			obits[x] = sbp;					\
582 			sbp += ncpbytes / sizeof *sbp;			\
583 			error = copyin(uap->name, ibits[x], ncpbytes);	\
584 			if (error != 0)					\
585 				goto done;				\
586 		}							\
587 	} while (0)
588 	getbits(in, 0);
589 	getbits(ou, 1);
590 	getbits(ex, 2);
591 #undef	getbits
592 	if (nbufbytes != 0)
593 		bzero(selbits, nbufbytes / 2);
594 
595 	if (uap->tv) {
596 		error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
597 			sizeof (atv));
598 		if (error)
599 			goto done;
600 		if (itimerfix(&atv)) {
601 			error = EINVAL;
602 			goto done;
603 		}
604 		getmicrouptime(&rtv);
605 		timevaladd(&atv, &rtv);
606 	} else
607 		atv.tv_sec = 0;
608 	timo = 0;
609 retry:
610 	ncoll = nselcoll;
611 	p->p_flag |= P_SELECT;
612 	error = selscan(p, ibits, obits, uap->nd);
613 	if (error || p->p_retval[0])
614 		goto done;
615 	if (atv.tv_sec) {
616 		getmicrouptime(&rtv);
617 		if (timevalcmp(&rtv, &atv, >=))
618 			goto done;
619 		ttv = atv;
620 		timevalsub(&ttv, &rtv);
621 		timo = ttv.tv_sec > 24 * 60 * 60 ?
622 		    24 * 60 * 60 * hz : tvtohz(&ttv);
623 	}
624 	s = splhigh();
625 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
626 		splx(s);
627 		goto retry;
628 	}
629 	p->p_flag &= ~P_SELECT;
630 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
631 	splx(s);
632 	if (error == 0)
633 		goto retry;
634 done:
635 	p->p_flag &= ~P_SELECT;
636 	/* select is not restarted after signals... */
637 	if (error == ERESTART)
638 		error = EINTR;
639 	if (error == EWOULDBLOCK)
640 		error = 0;
641 #define	putbits(name, x) \
642 	if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
643 		error = error2;
644 	if (error == 0) {
645 		int error2;
646 
647 		putbits(in, 0);
648 		putbits(ou, 1);
649 		putbits(ex, 2);
650 #undef putbits
651 	}
652 	if (selbits != &s_selbits[0])
653 		free(selbits, M_SELECT);
654 	return (error);
655 }
656 
657 static int
658 selscan(p, ibits, obits, nfd)
659 	struct proc *p;
660 	fd_mask **ibits, **obits;
661 	int nfd;
662 {
663 	register struct filedesc *fdp = p->p_fd;
664 	register int msk, i, j, fd;
665 	register fd_mask bits;
666 	struct file *fp;
667 	int n = 0;
668 	/* Note: backend also returns POLLHUP/POLLERR if appropriate. */
669 	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
670 
671 	for (msk = 0; msk < 3; msk++) {
672 		if (ibits[msk] == NULL)
673 			continue;
674 		for (i = 0; i < nfd; i += NFDBITS) {
675 			bits = ibits[msk][i/NFDBITS];
676 			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
677 				bits &= ~(1 << j);
678 				fp = fdp->fd_ofiles[fd];
679 				if (fp == NULL)
680 					return (EBADF);
681 				if ((*fp->f_ops->fo_poll)(fp, flag[msk],
682 				    fp->f_cred, p)) {
683 					obits[msk][(fd)/NFDBITS] |=
684 						(1 << ((fd) % NFDBITS));
685 					n++;
686 				}
687 			}
688 		}
689 	}
690 	p->p_retval[0] = n;
691 	return (0);
692 }
693 
694 /*
695  * Poll system call.
696  */
697 #ifndef _SYS_SYSPROTO_H_
698 struct poll_args {
699 	struct pollfd *fds;
700 	u_int	nfds;
701 	int	timeout;
702 };
703 #endif
704 int
705 poll(p, uap)
706 	register struct proc *p;
707 	register struct poll_args *uap;
708 {
709 	caddr_t bits;
710 	char smallbits[32 * sizeof(struct pollfd)];
711 	struct timeval atv, rtv, ttv;
712 	int s, ncoll, error = 0, timo;
713 	size_t ni;
714 
715 	if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
716 		/* forgiving; slightly wrong */
717 		SCARG(uap, nfds) = p->p_fd->fd_nfiles;
718 	}
719 	ni = SCARG(uap, nfds) * sizeof(struct pollfd);
720 	if (ni > sizeof(smallbits))
721 		bits = malloc(ni, M_TEMP, M_WAITOK);
722 	else
723 		bits = smallbits;
724 	error = copyin(SCARG(uap, fds), bits, ni);
725 	if (error)
726 		goto done;
727 	if (SCARG(uap, timeout) != INFTIM) {
728 		atv.tv_sec = SCARG(uap, timeout) / 1000;
729 		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
730 		if (itimerfix(&atv)) {
731 			error = EINVAL;
732 			goto done;
733 		}
734 		getmicrouptime(&rtv);
735 		timevaladd(&atv, &rtv);
736 	} else
737 		atv.tv_sec = 0;
738 	timo = 0;
739 retry:
740 	ncoll = nselcoll;
741 	p->p_flag |= P_SELECT;
742 	error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds));
743 	if (error || p->p_retval[0])
744 		goto done;
745 	if (atv.tv_sec) {
746 		getmicrouptime(&rtv);
747 		if (timevalcmp(&rtv, &atv, >=))
748 			goto done;
749 		ttv = atv;
750 		timevalsub(&ttv, &rtv);
751 		timo = ttv.tv_sec > 24 * 60 * 60 ?
752 		    24 * 60 * 60 * hz : tvtohz(&ttv);
753 	}
754 	s = splhigh();
755 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
756 		splx(s);
757 		goto retry;
758 	}
759 	p->p_flag &= ~P_SELECT;
760 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
761 	splx(s);
762 	if (error == 0)
763 		goto retry;
764 done:
765 	p->p_flag &= ~P_SELECT;
766 	/* poll is not restarted after signals... */
767 	if (error == ERESTART)
768 		error = EINTR;
769 	if (error == EWOULDBLOCK)
770 		error = 0;
771 	if (error == 0) {
772 		error = copyout(bits, SCARG(uap, fds), ni);
773 		if (error)
774 			goto out;
775 	}
776 out:
777 	if (ni > sizeof(smallbits))
778 		free(bits, M_TEMP);
779 	return (error);
780 }
781 
782 static int
783 pollscan(p, fds, nfd)
784 	struct proc *p;
785 	struct pollfd *fds;
786 	int nfd;
787 {
788 	register struct filedesc *fdp = p->p_fd;
789 	int i;
790 	struct file *fp;
791 	int n = 0;
792 
793 	for (i = 0; i < nfd; i++, fds++) {
794 		if ((u_int)fds->fd >= fdp->fd_nfiles) {
795 			fds->revents = POLLNVAL;
796 			n++;
797 		} else {
798 			fp = fdp->fd_ofiles[fds->fd];
799 			if (fp == 0) {
800 				fds->revents = POLLNVAL;
801 				n++;
802 			} else {
803 				/*
804 				 * Note: backend also returns POLLHUP and
805 				 * POLLERR if appropriate.
806 				 */
807 				fds->revents = (*fp->f_ops->fo_poll)(fp,
808 				    fds->events, fp->f_cred, p);
809 				if (fds->revents != 0)
810 					n++;
811 			}
812 		}
813 	}
814 	p->p_retval[0] = n;
815 	return (0);
816 }
817 
818 /*
819  * OpenBSD poll system call.
820  * XXX this isn't quite a true representation..  OpenBSD uses select ops.
821  */
822 #ifndef _SYS_SYSPROTO_H_
823 struct openbsd_poll_args {
824 	struct pollfd *fds;
825 	u_int	nfds;
826 	int	timeout;
827 };
828 #endif
829 int
830 openbsd_poll(p, uap)
831 	register struct proc *p;
832 	register struct openbsd_poll_args *uap;
833 {
834 	return (poll(p, (struct poll_args *)uap));
835 }
836 
837 /*ARGSUSED*/
838 int
839 seltrue(dev, events, p)
840 	dev_t dev;
841 	int events;
842 	struct proc *p;
843 {
844 
845 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
846 }
847 
848 /*
849  * Record a select request.
850  */
851 void
852 selrecord(selector, sip)
853 	struct proc *selector;
854 	struct selinfo *sip;
855 {
856 	struct proc *p;
857 	pid_t mypid;
858 
859 	mypid = selector->p_pid;
860 	if (sip->si_pid == mypid)
861 		return;
862 	if (sip->si_pid && (p = pfind(sip->si_pid)) &&
863 	    p->p_wchan == (caddr_t)&selwait)
864 		sip->si_flags |= SI_COLL;
865 	else
866 		sip->si_pid = mypid;
867 }
868 
869 /*
870  * Do a wakeup when a selectable event occurs.
871  */
872 void
873 selwakeup(sip)
874 	register struct selinfo *sip;
875 {
876 	register struct proc *p;
877 	int s;
878 
879 	if (sip->si_pid == 0)
880 		return;
881 	if (sip->si_flags & SI_COLL) {
882 		nselcoll++;
883 		sip->si_flags &= ~SI_COLL;
884 		wakeup((caddr_t)&selwait);
885 	}
886 	p = pfind(sip->si_pid);
887 	sip->si_pid = 0;
888 	if (p != NULL) {
889 		s = splhigh();
890 		if (p->p_wchan == (caddr_t)&selwait) {
891 			if (p->p_stat == SSLEEP)
892 				setrunnable(p);
893 			else
894 				unsleep(p);
895 		} else if (p->p_flag & P_SELECT)
896 			p->p_flag &= ~P_SELECT;
897 		splx(s);
898 	}
899 }
900