xref: /freebsd/sys/kern/sys_generic.c (revision 0de89efe5c443f213c7ea28773ef2dc6cf3af2ed)
1 /*
2  * Copyright (c) 1982, 1986, 1989, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * (c) UNIX System Laboratories, Inc.
5  * All or some portions of this file are derived from material licensed
6  * to the University of California by American Telephone and Telegraph
7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8  * the permission of UNIX System Laboratories, Inc.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  * 3. All advertising materials mentioning features or use of this software
19  *    must display the following acknowledgement:
20  *	This product includes software developed by the University of
21  *	California, Berkeley and its contributors.
22  * 4. Neither the name of the University nor the names of its contributors
23  *    may be used to endorse or promote products derived from this software
24  *    without specific prior written permission.
25  *
26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36  * SUCH DAMAGE.
37  *
38  *	@(#)sys_generic.c	8.5 (Berkeley) 1/21/94
39  * $Id: sys_generic.c,v 1.28 1997/09/02 20:05:52 bde Exp $
40  */
41 
42 #include "opt_ktrace.h"
43 
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/sysproto.h>
47 #include <sys/filedesc.h>
48 #include <sys/filio.h>
49 #include <sys/ttycom.h>
50 #include <sys/fcntl.h>
51 #include <sys/file.h>
52 #include <sys/proc.h>
53 #include <sys/signalvar.h>
54 #include <sys/socketvar.h>
55 #include <sys/uio.h>
56 #include <sys/kernel.h>
57 #include <sys/malloc.h>
58 #include <sys/poll.h>
59 #include <sys/sysent.h>
60 #ifdef KTRACE
61 #include <sys/ktrace.h>
62 #endif
63 
64 static int	selscan __P((struct proc *, fd_mask **, fd_mask **, int, int *));
65 static int	pollscan __P((struct proc *, struct pollfd *, int, int *));
66 
67 /*
68  * Read system call.
69  */
70 #ifndef _SYS_SYSPROTO_H_
71 struct read_args {
72 	int	fd;
73 	char	*buf;
74 	u_int	nbyte;
75 };
76 #endif
77 /* ARGSUSED */
78 int
79 read(p, uap, retval)
80 	struct proc *p;
81 	register struct read_args *uap;
82 	int *retval;
83 {
84 	register struct file *fp;
85 	register struct filedesc *fdp = p->p_fd;
86 	struct uio auio;
87 	struct iovec aiov;
88 	long cnt, error = 0;
89 #ifdef KTRACE
90 	struct iovec ktriov;
91 #endif
92 
93 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
94 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
95 	    (fp->f_flag & FREAD) == 0)
96 		return (EBADF);
97 	aiov.iov_base = (caddr_t)uap->buf;
98 	aiov.iov_len = uap->nbyte;
99 	auio.uio_iov = &aiov;
100 	auio.uio_iovcnt = 1;
101 	auio.uio_offset = -1;
102 
103 	auio.uio_resid = uap->nbyte;
104 	if (auio.uio_resid < 0)
105 		return (EINVAL);
106 
107 	auio.uio_rw = UIO_READ;
108 	auio.uio_segflg = UIO_USERSPACE;
109 	auio.uio_procp = p;
110 #ifdef KTRACE
111 	/*
112 	 * if tracing, save a copy of iovec
113 	 */
114 	if (KTRPOINT(p, KTR_GENIO))
115 		ktriov = aiov;
116 #endif
117 	cnt = uap->nbyte;
118 	if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)))
119 		if (auio.uio_resid != cnt && (error == ERESTART ||
120 		    error == EINTR || error == EWOULDBLOCK))
121 			error = 0;
122 	cnt -= auio.uio_resid;
123 #ifdef KTRACE
124 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
125 		ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktriov, cnt, error);
126 #endif
127 	*retval = cnt;
128 	return (error);
129 }
130 
131 /*
132  * Scatter read system call.
133  */
134 #ifndef _SYS_SYSPROTO_H_
135 struct readv_args {
136 	int	fd;
137 	struct	iovec *iovp;
138 	u_int	iovcnt;
139 };
140 #endif
141 int
142 readv(p, uap, retval)
143 	struct proc *p;
144 	register struct readv_args *uap;
145 	int *retval;
146 {
147 	register struct file *fp;
148 	register struct filedesc *fdp = p->p_fd;
149 	struct uio auio;
150 	register struct iovec *iov;
151 	struct iovec *needfree;
152 	struct iovec aiov[UIO_SMALLIOV];
153 	long i, cnt, error = 0;
154 	u_int iovlen;
155 #ifdef KTRACE
156 	struct iovec *ktriov = NULL;
157 #endif
158 
159 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
160 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
161 	    (fp->f_flag & FREAD) == 0)
162 		return (EBADF);
163 	/* note: can't use iovlen until iovcnt is validated */
164 	iovlen = uap->iovcnt * sizeof (struct iovec);
165 	if (uap->iovcnt > UIO_SMALLIOV) {
166 		if (uap->iovcnt > UIO_MAXIOV)
167 			return (EINVAL);
168 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
169 		needfree = iov;
170 	} else {
171 		iov = aiov;
172 		needfree = NULL;
173 	}
174 	auio.uio_iov = iov;
175 	auio.uio_iovcnt = uap->iovcnt;
176 	auio.uio_rw = UIO_READ;
177 	auio.uio_segflg = UIO_USERSPACE;
178 	auio.uio_procp = p;
179 	auio.uio_offset = -1;
180 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
181 		goto done;
182 	auio.uio_resid = 0;
183 	for (i = 0; i < uap->iovcnt; i++) {
184 		auio.uio_resid += iov->iov_len;
185 		if (auio.uio_resid < 0) {
186 			error = EINVAL;
187 			goto done;
188 		}
189 		iov++;
190 	}
191 #ifdef KTRACE
192 	/*
193 	 * if tracing, save a copy of iovec
194 	 */
195 	if (KTRPOINT(p, KTR_GENIO))  {
196 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
197 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
198 	}
199 #endif
200 	cnt = auio.uio_resid;
201 	if ((error = (*fp->f_ops->fo_read)(fp, &auio, fp->f_cred)))
202 		if (auio.uio_resid != cnt && (error == ERESTART ||
203 		    error == EINTR || error == EWOULDBLOCK))
204 			error = 0;
205 	cnt -= auio.uio_resid;
206 #ifdef KTRACE
207 	if (ktriov != NULL) {
208 		if (error == 0)
209 			ktrgenio(p->p_tracep, uap->fd, UIO_READ, ktriov,
210 			    cnt, error);
211 		FREE(ktriov, M_TEMP);
212 	}
213 #endif
214 	*retval = cnt;
215 done:
216 	if (needfree)
217 		FREE(needfree, M_IOV);
218 	return (error);
219 }
220 
221 /*
222  * Write system call
223  */
224 #ifndef _SYS_SYSPROTO_H_
225 struct write_args {
226 	int	fd;
227 	char	*buf;
228 	u_int	nbyte;
229 };
230 #endif
231 int
232 write(p, uap, retval)
233 	struct proc *p;
234 	register struct write_args *uap;
235 	int *retval;
236 {
237 	register struct file *fp;
238 	register struct filedesc *fdp = p->p_fd;
239 	struct uio auio;
240 	struct iovec aiov;
241 	long cnt, error = 0;
242 #ifdef KTRACE
243 	struct iovec ktriov;
244 #endif
245 
246 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
247 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
248 	    (fp->f_flag & FWRITE) == 0)
249 		return (EBADF);
250 	aiov.iov_base = (caddr_t)uap->buf;
251 	aiov.iov_len = uap->nbyte;
252 	auio.uio_iov = &aiov;
253 	auio.uio_iovcnt = 1;
254 	auio.uio_offset = -1;
255 	auio.uio_resid = uap->nbyte;
256 	auio.uio_rw = UIO_WRITE;
257 	auio.uio_segflg = UIO_USERSPACE;
258 	auio.uio_procp = p;
259 #ifdef KTRACE
260 	/*
261 	 * if tracing, save a copy of iovec
262 	 */
263 	if (KTRPOINT(p, KTR_GENIO))
264 		ktriov = aiov;
265 #endif
266 	cnt = uap->nbyte;
267 	if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) {
268 		if (auio.uio_resid != cnt && (error == ERESTART ||
269 		    error == EINTR || error == EWOULDBLOCK))
270 			error = 0;
271 		if (error == EPIPE)
272 			psignal(p, SIGPIPE);
273 	}
274 	cnt -= auio.uio_resid;
275 #ifdef KTRACE
276 	if (KTRPOINT(p, KTR_GENIO) && error == 0)
277 		ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
278 		    &ktriov, cnt, error);
279 #endif
280 	*retval = cnt;
281 	return (error);
282 }
283 
284 /*
285  * Gather write system call
286  */
287 #ifndef _SYS_SYSPROTO_H_
288 struct writev_args {
289 	int	fd;
290 	struct	iovec *iovp;
291 	u_int	iovcnt;
292 };
293 #endif
294 int
295 writev(p, uap, retval)
296 	struct proc *p;
297 	register struct writev_args *uap;
298 	int *retval;
299 {
300 	register struct file *fp;
301 	register struct filedesc *fdp = p->p_fd;
302 	struct uio auio;
303 	register struct iovec *iov;
304 	struct iovec *needfree;
305 	struct iovec aiov[UIO_SMALLIOV];
306 	long i, cnt, error = 0;
307 	u_int iovlen;
308 #ifdef KTRACE
309 	struct iovec *ktriov = NULL;
310 #endif
311 
312 	if (((u_int)uap->fd) >= fdp->fd_nfiles ||
313 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL ||
314 	    (fp->f_flag & FWRITE) == 0)
315 		return (EBADF);
316 	/* note: can't use iovlen until iovcnt is validated */
317 	iovlen = uap->iovcnt * sizeof (struct iovec);
318 	if (uap->iovcnt > UIO_SMALLIOV) {
319 		if (uap->iovcnt > UIO_MAXIOV)
320 			return (EINVAL);
321 		MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK);
322 		needfree = iov;
323 	} else {
324 		iov = aiov;
325 		needfree = NULL;
326 	}
327 	auio.uio_iov = iov;
328 	auio.uio_iovcnt = uap->iovcnt;
329 	auio.uio_rw = UIO_WRITE;
330 	auio.uio_segflg = UIO_USERSPACE;
331 	auio.uio_procp = p;
332 	auio.uio_offset = -1;
333 	if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen)))
334 		goto done;
335 	auio.uio_resid = 0;
336 	for (i = 0; i < uap->iovcnt; i++) {
337 		auio.uio_resid += iov->iov_len;
338 		if (auio.uio_resid < 0) {
339 			error = EINVAL;
340 			goto done;
341 		}
342 		iov++;
343 	}
344 #ifdef KTRACE
345 	/*
346 	 * if tracing, save a copy of iovec
347 	 */
348 	if (KTRPOINT(p, KTR_GENIO))  {
349 		MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
350 		bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
351 	}
352 #endif
353 	cnt = auio.uio_resid;
354 	if ((error = (*fp->f_ops->fo_write)(fp, &auio, fp->f_cred))) {
355 		if (auio.uio_resid != cnt && (error == ERESTART ||
356 		    error == EINTR || error == EWOULDBLOCK))
357 			error = 0;
358 		if (error == EPIPE)
359 			psignal(p, SIGPIPE);
360 	}
361 	cnt -= auio.uio_resid;
362 #ifdef KTRACE
363 	if (ktriov != NULL) {
364 		if (error == 0)
365 			ktrgenio(p->p_tracep, uap->fd, UIO_WRITE,
366 				ktriov, cnt, error);
367 		FREE(ktriov, M_TEMP);
368 	}
369 #endif
370 	*retval = cnt;
371 done:
372 	if (needfree)
373 		FREE(needfree, M_IOV);
374 	return (error);
375 }
376 
377 /*
378  * Ioctl system call
379  */
380 #ifndef _SYS_SYSPROTO_H_
381 struct ioctl_args {
382 	int	fd;
383 	int	com;
384 	caddr_t	data;
385 };
386 #endif
387 /* ARGSUSED */
388 int
389 ioctl(p, uap, retval)
390 	struct proc *p;
391 	register struct ioctl_args *uap;
392 	int *retval;
393 {
394 	register struct file *fp;
395 	register struct filedesc *fdp;
396 	register int com, error;
397 	register u_int size;
398 	caddr_t data, memp;
399 	int tmp;
400 #define STK_PARAMS	128
401 	char stkbuf[STK_PARAMS];
402 
403 	fdp = p->p_fd;
404 	if ((u_int)uap->fd >= fdp->fd_nfiles ||
405 	    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
406 		return (EBADF);
407 
408 	if ((fp->f_flag & (FREAD | FWRITE)) == 0)
409 		return (EBADF);
410 
411 	switch (com = uap->com) {
412 	case FIONCLEX:
413 		fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE;
414 		return (0);
415 	case FIOCLEX:
416 		fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE;
417 		return (0);
418 	}
419 
420 	/*
421 	 * Interpret high order word to find amount of data to be
422 	 * copied to/from the user's address space.
423 	 */
424 	size = IOCPARM_LEN(com);
425 	if (size > IOCPARM_MAX)
426 		return (ENOTTY);
427 	memp = NULL;
428 	if (size > sizeof (stkbuf)) {
429 		memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
430 		data = memp;
431 	} else
432 		data = stkbuf;
433 	if (com&IOC_IN) {
434 		if (size) {
435 			error = copyin(uap->data, data, (u_int)size);
436 			if (error) {
437 				if (memp)
438 					free(memp, M_IOCTLOPS);
439 				return (error);
440 			}
441 		} else
442 			*(caddr_t *)data = uap->data;
443 	} else if ((com&IOC_OUT) && size)
444 		/*
445 		 * Zero the buffer so the user always
446 		 * gets back something deterministic.
447 		 */
448 		bzero(data, size);
449 	else if (com&IOC_VOID)
450 		*(caddr_t *)data = uap->data;
451 
452 	switch (com) {
453 
454 	case FIONBIO:
455 		if ((tmp = *(int *)data))
456 			fp->f_flag |= FNONBLOCK;
457 		else
458 			fp->f_flag &= ~FNONBLOCK;
459 		error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, (caddr_t)&tmp, p);
460 		break;
461 
462 	case FIOASYNC:
463 		if ((tmp = *(int *)data))
464 			fp->f_flag |= FASYNC;
465 		else
466 			fp->f_flag &= ~FASYNC;
467 		error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, (caddr_t)&tmp, p);
468 		break;
469 
470 	case FIOSETOWN:
471 		tmp = *(int *)data;
472 		if (fp->f_type == DTYPE_SOCKET) {
473 			((struct socket *)fp->f_data)->so_pgid = tmp;
474 			error = 0;
475 			break;
476 		}
477 		if (tmp <= 0) {
478 			tmp = -tmp;
479 		} else {
480 			struct proc *p1 = pfind(tmp);
481 			if (p1 == 0) {
482 				error = ESRCH;
483 				break;
484 			}
485 			tmp = p1->p_pgrp->pg_id;
486 		}
487 		error = (*fp->f_ops->fo_ioctl)
488 			(fp, (int)TIOCSPGRP, (caddr_t)&tmp, p);
489 		break;
490 
491 	case FIOGETOWN:
492 		if (fp->f_type == DTYPE_SOCKET) {
493 			error = 0;
494 			*(int *)data = ((struct socket *)fp->f_data)->so_pgid;
495 			break;
496 		}
497 		error = (*fp->f_ops->fo_ioctl)(fp, (int)TIOCGPGRP, data, p);
498 		*(int *)data = -*(int *)data;
499 		break;
500 
501 	default:
502 		error = (*fp->f_ops->fo_ioctl)(fp, com, data, p);
503 		/*
504 		 * Copy any data to user, size was
505 		 * already set and checked above.
506 		 */
507 		if (error == 0 && (com&IOC_OUT) && size)
508 			error = copyout(data, uap->data, (u_int)size);
509 		break;
510 	}
511 	if (memp)
512 		free(memp, M_IOCTLOPS);
513 	return (error);
514 }
515 
516 static int	nselcoll;
517 int	selwait;
518 
519 /*
520  * Select system call.
521  */
522 #ifndef _SYS_SYSPROTO_H_
523 struct select_args {
524 	int	nd;
525 	fd_set	*in, *ou, *ex;
526 	struct	timeval *tv;
527 };
528 #endif
529 int
530 select(p, uap, retval)
531 	register struct proc *p;
532 	register struct select_args *uap;
533 	int *retval;
534 {
535 	/*
536 	 * The magic 2048 here is chosen to be just enough for FD_SETSIZE
537 	 * infds with the new FD_SETSIZE of 1024, and more than enough for
538 	 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE
539 	 * of 256.
540 	 */
541 	fd_mask s_selbits[howmany(2048, NFDBITS)];
542 	fd_mask *ibits[3], *obits[3], *selbits, *sbp;
543 	struct timeval atv;
544 	int s, ncoll, error, timo;
545 	u_int nbufbytes, ncpbytes, nfdbits;
546 
547 	if (uap->nd < 0)
548 		return (EINVAL);
549 	if (uap->nd > p->p_fd->fd_nfiles)
550 		uap->nd = p->p_fd->fd_nfiles;   /* forgiving; slightly wrong */
551 
552 	/*
553 	 * Allocate just enough bits for the non-null fd_sets.  Use the
554 	 * preallocated auto buffer if possible.
555 	 */
556 	nfdbits = roundup(uap->nd, NFDBITS);
557 	ncpbytes = nfdbits / NBBY;
558 	nbufbytes = 0;
559 	if (uap->in != NULL)
560 		nbufbytes += 2 * ncpbytes;
561 	if (uap->ou != NULL)
562 		nbufbytes += 2 * ncpbytes;
563 	if (uap->ex != NULL)
564 		nbufbytes += 2 * ncpbytes;
565 	if (nbufbytes <= sizeof s_selbits)
566 		selbits = &s_selbits[0];
567 	else
568 		selbits = malloc(nbufbytes, M_SELECT, M_WAITOK);
569 
570 	/*
571 	 * Assign pointers into the bit buffers and fetch the input bits.
572 	 * Put the output buffers together so that they can be bzeroed
573 	 * together.
574 	 */
575 	sbp = selbits;
576 #define	getbits(name, x) \
577 	do {								\
578 		if (uap->name == NULL)					\
579 			ibits[x] = NULL;				\
580 		else {							\
581 			ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp;	\
582 			obits[x] = sbp;					\
583 			sbp += ncpbytes / sizeof *sbp;			\
584 			error = copyin(uap->name, ibits[x], ncpbytes);	\
585 			if (error != 0)					\
586 				goto done;				\
587 		}							\
588 	} while (0)
589 	getbits(in, 0);
590 	getbits(ou, 1);
591 	getbits(ex, 2);
592 #undef	getbits
593 	if (nbufbytes != 0)
594 		bzero(selbits, nbufbytes / 2);
595 
596 	if (uap->tv) {
597 		error = copyin((caddr_t)uap->tv, (caddr_t)&atv,
598 			sizeof (atv));
599 		if (error)
600 			goto done;
601 		if (itimerfix(&atv)) {
602 			error = EINVAL;
603 			goto done;
604 		}
605 		s = splclock();
606 		timevaladd(&atv, &time);
607 		timo = hzto(&atv);
608 		/*
609 		 * Avoid inadvertently sleeping forever.
610 		 */
611 		if (timo == 0)
612 			timo = 1;
613 		splx(s);
614 	} else
615 		timo = 0;
616 retry:
617 	ncoll = nselcoll;
618 	p->p_flag |= P_SELECT;
619 	error = selscan(p, ibits, obits, uap->nd, retval);
620 	if (error || *retval)
621 		goto done;
622 	s = splhigh();
623 	/* this should be timercmp(&time, &atv, >=) */
624 	if (uap->tv && (time.tv_sec > atv.tv_sec ||
625 	    (time.tv_sec == atv.tv_sec && time.tv_usec >= atv.tv_usec))) {
626 		splx(s);
627 		goto done;
628 	}
629 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
630 		splx(s);
631 		goto retry;
632 	}
633 	p->p_flag &= ~P_SELECT;
634 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
635 	splx(s);
636 	if (error == 0)
637 		goto retry;
638 done:
639 	p->p_flag &= ~P_SELECT;
640 	/* select is not restarted after signals... */
641 	if (error == ERESTART)
642 		error = EINTR;
643 	if (error == EWOULDBLOCK)
644 		error = 0;
645 #define	putbits(name, x) \
646 	if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \
647 		error = error2;
648 	if (error == 0) {
649 		int error2;
650 
651 		putbits(in, 0);
652 		putbits(ou, 1);
653 		putbits(ex, 2);
654 #undef putbits
655 	}
656 	if (selbits != &s_selbits[0])
657 		free(selbits, M_SELECT);
658 	return (error);
659 }
660 
661 static int
662 selscan(p, ibits, obits, nfd, retval)
663 	struct proc *p;
664 	fd_mask **ibits, **obits;
665 	int nfd, *retval;
666 {
667 	register struct filedesc *fdp = p->p_fd;
668 	register int msk, i, j, fd;
669 	register fd_mask bits;
670 	struct file *fp;
671 	int n = 0;
672 	/* Note; backend also returns POLLHUP/POLLERR if appropriate */
673 	static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND};
674 
675 	for (msk = 0; msk < 3; msk++) {
676 		if (ibits[msk] == NULL)
677 			continue;
678 		for (i = 0; i < nfd; i += NFDBITS) {
679 			bits = ibits[msk][i/NFDBITS];
680 			while ((j = ffs(bits)) && (fd = i + --j) < nfd) {
681 				bits &= ~(1 << j);
682 				fp = fdp->fd_ofiles[fd];
683 				if (fp == NULL)
684 					return (EBADF);
685 				if ((*fp->f_ops->fo_poll)(fp, flag[msk],
686 				    fp->f_cred, p)) {
687 					obits[msk][(fd)/NFDBITS] |=
688 						(1 << ((fd) % NFDBITS));
689 					n++;
690 				}
691 			}
692 		}
693 	}
694 	*retval = n;
695 	return (0);
696 }
697 
698 /*
699  * Poll system call.
700  */
701 #ifndef _SYS_SYSPROTO_H_
702 struct poll_args {
703 	struct pollfd *fds;
704 	u_int	nfds;
705 	int	timeout;
706 };
707 #endif
708 int
709 poll(p, uap, retval)
710 	register struct proc *p;
711 	register struct poll_args *uap;
712 	register_t *retval;
713 {
714 	caddr_t bits;
715 	char smallbits[32 * sizeof(struct pollfd)];
716 	struct timeval atv;
717 	int s, ncoll, error = 0, timo;
718 	size_t ni;
719 
720 	if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) {
721 		/* forgiving; slightly wrong */
722 		SCARG(uap, nfds) = p->p_fd->fd_nfiles;
723 	}
724 	ni = SCARG(uap, nfds) * sizeof(struct pollfd);
725 	if (ni > sizeof(smallbits))
726 		bits = malloc(ni, M_TEMP, M_WAITOK);
727 	else
728 		bits = smallbits;
729 
730 	error = copyin(SCARG(uap, fds), bits, ni);
731 	if (error)
732 		goto done;
733 
734 	if (SCARG(uap, timeout) != INFTIM) {
735 		atv.tv_sec = SCARG(uap, timeout) / 1000;
736 		atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
737 		if (itimerfix(&atv)) {
738 			error = EINVAL;
739 			goto done;
740 		}
741 		s = splclock();
742 		timevaladd(&atv, &time);
743 		timo = hzto(&atv);
744 		/*
745 		 * Avoid inadvertently sleeping forever.
746 		 */
747 		if (timo == 0)
748 			timo = 1;
749 		splx(s);
750 	} else
751 		timo = 0;
752 retry:
753 	ncoll = nselcoll;
754 	p->p_flag |= P_SELECT;
755 	error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds), retval);
756 	if (error || *retval)
757 		goto done;
758 	s = splhigh();
759 	if (timo && timercmp(&time, &atv, >=)) {
760 		splx(s);
761 		goto done;
762 	}
763 	if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) {
764 		splx(s);
765 		goto retry;
766 	}
767 	p->p_flag &= ~P_SELECT;
768 	error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
769 	splx(s);
770 	if (error == 0)
771 		goto retry;
772 done:
773 	p->p_flag &= ~P_SELECT;
774 	/* poll is not restarted after signals... */
775 	if (error == ERESTART)
776 		error = EINTR;
777 	if (error == EWOULDBLOCK)
778 		error = 0;
779 	if (error == 0) {
780 		error = copyout(bits, SCARG(uap, fds), ni);
781 		if (error)
782 			goto out;
783 	}
784 out:
785 	if (ni > sizeof(smallbits))
786 		free(bits, M_TEMP);
787 	return (error);
788 }
789 
790 static int
791 pollscan(p, fds, nfd, retval)
792 	struct proc *p;
793 	struct pollfd *fds;
794 	int nfd;
795 	register_t *retval;
796 {
797 	register struct filedesc *fdp = p->p_fd;
798 	int i;
799 	struct file *fp;
800 	int n = 0;
801 
802 	for (i = 0; i < nfd; i++, fds++) {
803 		if ((u_int)fds->fd >= fdp->fd_nfiles) {
804 			fds->revents = POLLNVAL;
805 			n++;
806 		} else {
807 			fp = fdp->fd_ofiles[fds->fd];
808 			if (fp == 0) {
809 				fds->revents = POLLNVAL;
810 				n++;
811 			} else {
812 				/* Note: backend also returns POLLHUP and
813 				 * POLLERR if appropriate */
814 				fds->revents = (*fp->f_ops->fo_poll)(fp,
815 				    fds->events, fp->f_cred, p);
816 				if (fds->revents != 0)
817 					n++;
818 			}
819 		}
820 	}
821 	*retval = n;
822 	return (0);
823 }
824 
825 /*
826  * OpenBSD poll system call.
827  * XXX this isn't quite a true representation..  OpenBSD uses select ops.
828  */
829 #ifndef _SYS_SYSPROTO_H_
830 struct openbsd_poll_args {
831 	struct pollfd *fds;
832 	u_int	nfds;
833 	int	timeout;
834 };
835 #endif
836 int
837 openbsd_poll(p, uap, retval)
838 	register struct proc *p;
839 	register struct openbsd_poll_args *uap;
840 	register_t *retval;
841 {
842 	return (poll(p, (struct poll_args *)uap, retval));
843 }
844 
845 /*ARGSUSED*/
846 int
847 seltrue(dev, events, p)
848 	dev_t dev;
849 	int events;
850 	struct proc *p;
851 {
852 
853 	return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
854 }
855 
856 /*
857  * Record a select request.
858  */
859 void
860 selrecord(selector, sip)
861 	struct proc *selector;
862 	struct selinfo *sip;
863 {
864 	struct proc *p;
865 	pid_t mypid;
866 
867 	mypid = selector->p_pid;
868 	if (sip->si_pid == mypid)
869 		return;
870 	if (sip->si_pid && (p = pfind(sip->si_pid)) &&
871 	    p->p_wchan == (caddr_t)&selwait)
872 		sip->si_flags |= SI_COLL;
873 	else
874 		sip->si_pid = mypid;
875 }
876 
877 /*
878  * Do a wakeup when a selectable event occurs.
879  */
880 void
881 selwakeup(sip)
882 	register struct selinfo *sip;
883 {
884 	register struct proc *p;
885 	int s;
886 
887 	if (sip->si_pid == 0)
888 		return;
889 	if (sip->si_flags & SI_COLL) {
890 		nselcoll++;
891 		sip->si_flags &= ~SI_COLL;
892 		wakeup((caddr_t)&selwait);
893 	}
894 	p = pfind(sip->si_pid);
895 	sip->si_pid = 0;
896 	if (p != NULL) {
897 		s = splhigh();
898 		if (p->p_wchan == (caddr_t)&selwait) {
899 			if (p->p_stat == SSLEEP)
900 				setrunnable(p);
901 			else
902 				unsleep(p);
903 		} else if (p->p_flag & P_SELECT)
904 			p->p_flag &= ~P_SELECT;
905 		splx(s);
906 	}
907 }
908