1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD$ 40 */ 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/filio.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/signalvar.h> 53 #include <sys/socketvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #include <sys/sysctl.h> 59 #include <sys/sysent.h> 60 #ifdef KTRACE 61 #include <sys/ktrace.h> 62 #endif 63 64 #include <machine/limits.h> 65 66 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 67 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 68 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 69 70 static int pollscan __P((struct proc *, struct pollfd *, int)); 71 static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 72 static struct file* getfp __P((struct filedesc *, int, int)); 73 static int dofileread __P((struct proc *, struct file *, int, void *, 74 size_t, off_t, int)); 75 static int dofilewrite __P((struct proc *, struct file *, int, 76 const void *, size_t, off_t, int)); 77 78 static struct file* 79 getfp(fdp, fd, flag) 80 struct filedesc* fdp; 81 int fd, flag; 82 { 83 struct file* fp; 84 85 if (((u_int)fd) >= fdp->fd_nfiles || 86 (fp = fdp->fd_ofiles[fd]) == NULL || 87 (fp->f_flag & flag) == 0) 88 return (NULL); 89 return (fp); 90 } 91 92 /* 93 * Read system call. 94 */ 95 #ifndef _SYS_SYSPROTO_H_ 96 struct read_args { 97 int fd; 98 void *buf; 99 size_t nbyte; 100 }; 101 #endif 102 int 103 read(p, uap) 104 struct proc *p; 105 register struct read_args *uap; 106 { 107 register struct file *fp; 108 109 if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) 110 return (EBADF); 111 return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); 112 } 113 114 /* 115 * Pread system call 116 */ 117 #ifndef _SYS_SYSPROTO_H_ 118 struct pread_args { 119 int fd; 120 void *buf; 121 size_t nbyte; 122 int pad; 123 off_t offset; 124 }; 125 #endif 126 int 127 pread(p, uap) 128 struct proc *p; 129 register struct pread_args *uap; 130 { 131 register struct file *fp; 132 133 if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) 134 return (EBADF); 135 if (fp->f_type != DTYPE_VNODE) 136 return (ESPIPE); 137 return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, 138 FOF_OFFSET)); 139 } 140 141 /* 142 * Code common for read and pread 143 */ 144 int 145 dofileread(p, fp, fd, buf, nbyte, offset, flags) 146 struct proc *p; 147 struct file *fp; 148 int fd, flags; 149 void *buf; 150 size_t nbyte; 151 off_t offset; 152 { 153 struct uio auio; 154 struct iovec aiov; 155 long cnt, error = 0; 156 #ifdef KTRACE 157 struct iovec ktriov; 158 #endif 159 160 aiov.iov_base = (caddr_t)buf; 161 aiov.iov_len = nbyte; 162 auio.uio_iov = &aiov; 163 auio.uio_iovcnt = 1; 164 auio.uio_offset = offset; 165 if (nbyte > INT_MAX) 166 return (EINVAL); 167 auio.uio_resid = nbyte; 168 auio.uio_rw = UIO_READ; 169 auio.uio_segflg = UIO_USERSPACE; 170 auio.uio_procp = p; 171 #ifdef KTRACE 172 /* 173 * if tracing, save a copy of iovec 174 */ 175 if (KTRPOINT(p, KTR_GENIO)) 176 ktriov = aiov; 177 #endif 178 cnt = nbyte; 179 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) 180 if (auio.uio_resid != cnt && (error == ERESTART || 181 error == EINTR || error == EWOULDBLOCK)) 182 error = 0; 183 cnt -= auio.uio_resid; 184 #ifdef KTRACE 185 if (KTRPOINT(p, KTR_GENIO) && error == 0) 186 ktrgenio(p->p_tracep, fd, UIO_READ, &ktriov, cnt, error); 187 #endif 188 p->p_retval[0] = cnt; 189 return (error); 190 } 191 192 /* 193 * Scatter read system call. 194 */ 195 #ifndef _SYS_SYSPROTO_H_ 196 struct readv_args { 197 int fd; 198 struct iovec *iovp; 199 u_int iovcnt; 200 }; 201 #endif 202 int 203 readv(p, uap) 204 struct proc *p; 205 register struct readv_args *uap; 206 { 207 register struct file *fp; 208 register struct filedesc *fdp = p->p_fd; 209 struct uio auio; 210 register struct iovec *iov; 211 struct iovec *needfree; 212 struct iovec aiov[UIO_SMALLIOV]; 213 long i, cnt, error = 0; 214 u_int iovlen; 215 #ifdef KTRACE 216 struct iovec *ktriov = NULL; 217 #endif 218 219 if ((fp = getfp(fdp, uap->fd, FREAD)) == NULL) 220 return (EBADF); 221 /* note: can't use iovlen until iovcnt is validated */ 222 iovlen = uap->iovcnt * sizeof (struct iovec); 223 if (uap->iovcnt > UIO_SMALLIOV) { 224 if (uap->iovcnt > UIO_MAXIOV) 225 return (EINVAL); 226 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 227 needfree = iov; 228 } else { 229 iov = aiov; 230 needfree = NULL; 231 } 232 auio.uio_iov = iov; 233 auio.uio_iovcnt = uap->iovcnt; 234 auio.uio_rw = UIO_READ; 235 auio.uio_segflg = UIO_USERSPACE; 236 auio.uio_procp = p; 237 auio.uio_offset = -1; 238 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 239 goto done; 240 auio.uio_resid = 0; 241 for (i = 0; i < uap->iovcnt; i++) { 242 if (iov->iov_len > INT_MAX - auio.uio_resid) { 243 error = EINVAL; 244 goto done; 245 } 246 auio.uio_resid += iov->iov_len; 247 iov++; 248 } 249 #ifdef KTRACE 250 /* 251 * if tracing, save a copy of iovec 252 */ 253 if (KTRPOINT(p, KTR_GENIO)) { 254 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 255 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 256 } 257 #endif 258 cnt = auio.uio_resid; 259 if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) 260 if (auio.uio_resid != cnt && (error == ERESTART || 261 error == EINTR || error == EWOULDBLOCK)) 262 error = 0; 263 cnt -= auio.uio_resid; 264 #ifdef KTRACE 265 if (ktriov != NULL) { 266 if (error == 0) 267 ktrgenio(p->p_tracep, uap->fd, UIO_READ, ktriov, 268 cnt, error); 269 FREE(ktriov, M_TEMP); 270 } 271 #endif 272 p->p_retval[0] = cnt; 273 done: 274 if (needfree) 275 FREE(needfree, M_IOV); 276 return (error); 277 } 278 279 /* 280 * Write system call 281 */ 282 #ifndef _SYS_SYSPROTO_H_ 283 struct write_args { 284 int fd; 285 const void *buf; 286 size_t nbyte; 287 }; 288 #endif 289 int 290 write(p, uap) 291 struct proc *p; 292 register struct write_args *uap; 293 { 294 register struct file *fp; 295 296 if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) 297 return (EBADF); 298 return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); 299 } 300 301 /* 302 * Pwrite system call 303 */ 304 #ifndef _SYS_SYSPROTO_H_ 305 struct pwrite_args { 306 int fd; 307 const void *buf; 308 size_t nbyte; 309 int pad; 310 off_t offset; 311 }; 312 #endif 313 int 314 pwrite(p, uap) 315 struct proc *p; 316 register struct pwrite_args *uap; 317 { 318 register struct file *fp; 319 320 if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) 321 return (EBADF); 322 if (fp->f_type != DTYPE_VNODE) 323 return (ESPIPE); 324 return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, 325 FOF_OFFSET)); 326 } 327 328 static int 329 dofilewrite(p, fp, fd, buf, nbyte, offset, flags) 330 struct proc *p; 331 struct file *fp; 332 int fd, flags; 333 const void *buf; 334 size_t nbyte; 335 off_t offset; 336 { 337 struct uio auio; 338 struct iovec aiov; 339 long cnt, error = 0; 340 #ifdef KTRACE 341 struct iovec ktriov; 342 #endif 343 344 aiov.iov_base = (void *)buf; 345 aiov.iov_len = nbyte; 346 auio.uio_iov = &aiov; 347 auio.uio_iovcnt = 1; 348 auio.uio_offset = offset; 349 if (nbyte > INT_MAX) 350 return (EINVAL); 351 auio.uio_resid = nbyte; 352 auio.uio_rw = UIO_WRITE; 353 auio.uio_segflg = UIO_USERSPACE; 354 auio.uio_procp = p; 355 #ifdef KTRACE 356 /* 357 * if tracing, save a copy of iovec 358 */ 359 if (KTRPOINT(p, KTR_GENIO)) 360 ktriov = aiov; 361 #endif 362 cnt = nbyte; 363 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { 364 if (auio.uio_resid != cnt && (error == ERESTART || 365 error == EINTR || error == EWOULDBLOCK)) 366 error = 0; 367 if (error == EPIPE) 368 psignal(p, SIGPIPE); 369 } 370 cnt -= auio.uio_resid; 371 #ifdef KTRACE 372 if (KTRPOINT(p, KTR_GENIO) && error == 0) 373 ktrgenio(p->p_tracep, fd, UIO_WRITE, 374 &ktriov, cnt, error); 375 #endif 376 p->p_retval[0] = cnt; 377 return (error); 378 } 379 380 /* 381 * Gather write system call 382 */ 383 #ifndef _SYS_SYSPROTO_H_ 384 struct writev_args { 385 int fd; 386 struct iovec *iovp; 387 u_int iovcnt; 388 }; 389 #endif 390 int 391 writev(p, uap) 392 struct proc *p; 393 register struct writev_args *uap; 394 { 395 register struct file *fp; 396 register struct filedesc *fdp = p->p_fd; 397 struct uio auio; 398 register struct iovec *iov; 399 struct iovec *needfree; 400 struct iovec aiov[UIO_SMALLIOV]; 401 long i, cnt, error = 0; 402 u_int iovlen; 403 #ifdef KTRACE 404 struct iovec *ktriov = NULL; 405 #endif 406 407 if ((fp = getfp(fdp, uap->fd, FWRITE)) == NULL) 408 return (EBADF); 409 fhold(fp); 410 /* note: can't use iovlen until iovcnt is validated */ 411 iovlen = uap->iovcnt * sizeof (struct iovec); 412 if (uap->iovcnt > UIO_SMALLIOV) { 413 if (uap->iovcnt > UIO_MAXIOV) { 414 needfree = NULL; 415 error = EINVAL; 416 goto done; 417 } 418 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 419 needfree = iov; 420 } else { 421 iov = aiov; 422 needfree = NULL; 423 } 424 auio.uio_iov = iov; 425 auio.uio_iovcnt = uap->iovcnt; 426 auio.uio_rw = UIO_WRITE; 427 auio.uio_segflg = UIO_USERSPACE; 428 auio.uio_procp = p; 429 auio.uio_offset = -1; 430 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 431 goto done; 432 auio.uio_resid = 0; 433 for (i = 0; i < uap->iovcnt; i++) { 434 if (iov->iov_len > INT_MAX - auio.uio_resid) { 435 error = EINVAL; 436 goto done; 437 } 438 auio.uio_resid += iov->iov_len; 439 iov++; 440 } 441 #ifdef KTRACE 442 /* 443 * if tracing, save a copy of iovec 444 */ 445 if (KTRPOINT(p, KTR_GENIO)) { 446 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 447 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 448 } 449 #endif 450 cnt = auio.uio_resid; 451 if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) { 452 if (auio.uio_resid != cnt && (error == ERESTART || 453 error == EINTR || error == EWOULDBLOCK)) 454 error = 0; 455 if (error == EPIPE) 456 psignal(p, SIGPIPE); 457 } 458 cnt -= auio.uio_resid; 459 #ifdef KTRACE 460 if (ktriov != NULL) { 461 if (error == 0) 462 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, 463 ktriov, cnt, error); 464 FREE(ktriov, M_TEMP); 465 } 466 #endif 467 p->p_retval[0] = cnt; 468 done: 469 fdrop(fp, p); 470 if (needfree) 471 FREE(needfree, M_IOV); 472 return (error); 473 } 474 475 /* 476 * Ioctl system call 477 */ 478 #ifndef _SYS_SYSPROTO_H_ 479 struct ioctl_args { 480 int fd; 481 u_long com; 482 caddr_t data; 483 }; 484 #endif 485 /* ARGSUSED */ 486 int 487 ioctl(p, uap) 488 struct proc *p; 489 register struct ioctl_args *uap; 490 { 491 register struct file *fp; 492 register struct filedesc *fdp; 493 register u_long com; 494 int error; 495 register u_int size; 496 caddr_t data, memp; 497 int tmp; 498 #define STK_PARAMS 128 499 union { 500 char stkbuf[STK_PARAMS]; 501 long align; 502 } ubuf; 503 504 fdp = p->p_fd; 505 if ((u_int)uap->fd >= fdp->fd_nfiles || 506 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 507 return (EBADF); 508 509 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 510 return (EBADF); 511 512 switch (com = uap->com) { 513 case FIONCLEX: 514 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 515 return (0); 516 case FIOCLEX: 517 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 518 return (0); 519 } 520 521 /* 522 * Interpret high order word to find amount of data to be 523 * copied to/from the user's address space. 524 */ 525 size = IOCPARM_LEN(com); 526 if (size > IOCPARM_MAX) 527 return (ENOTTY); 528 memp = NULL; 529 if (size > sizeof (ubuf.stkbuf)) { 530 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 531 data = memp; 532 } else 533 data = ubuf.stkbuf; 534 if (com&IOC_IN) { 535 if (size) { 536 error = copyin(uap->data, data, (u_int)size); 537 if (error) { 538 if (memp) 539 free(memp, M_IOCTLOPS); 540 return (error); 541 } 542 } else 543 *(caddr_t *)data = uap->data; 544 } else if ((com&IOC_OUT) && size) 545 /* 546 * Zero the buffer so the user always 547 * gets back something deterministic. 548 */ 549 bzero(data, size); 550 else if (com&IOC_VOID) 551 *(caddr_t *)data = uap->data; 552 553 switch (com) { 554 555 case FIONBIO: 556 if ((tmp = *(int *)data)) 557 fp->f_flag |= FNONBLOCK; 558 else 559 fp->f_flag &= ~FNONBLOCK; 560 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); 561 break; 562 563 case FIOASYNC: 564 if ((tmp = *(int *)data)) 565 fp->f_flag |= FASYNC; 566 else 567 fp->f_flag &= ~FASYNC; 568 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); 569 break; 570 571 default: 572 error = fo_ioctl(fp, com, data, p); 573 /* 574 * Copy any data to user, size was 575 * already set and checked above. 576 */ 577 if (error == 0 && (com&IOC_OUT) && size) 578 error = copyout(data, uap->data, (u_int)size); 579 break; 580 } 581 if (memp) 582 free(memp, M_IOCTLOPS); 583 return (error); 584 } 585 586 static int nselcoll; /* Select collisions since boot */ 587 int selwait; 588 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 589 590 /* 591 * Select system call. 592 */ 593 #ifndef _SYS_SYSPROTO_H_ 594 struct select_args { 595 int nd; 596 fd_set *in, *ou, *ex; 597 struct timeval *tv; 598 }; 599 #endif 600 int 601 select(p, uap) 602 register struct proc *p; 603 register struct select_args *uap; 604 { 605 /* 606 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 607 * infds with the new FD_SETSIZE of 1024, and more than enough for 608 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 609 * of 256. 610 */ 611 fd_mask s_selbits[howmany(2048, NFDBITS)]; 612 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 613 struct timeval atv, rtv, ttv; 614 int s, ncoll, error, timo; 615 u_int nbufbytes, ncpbytes, nfdbits; 616 617 if (uap->nd < 0) 618 return (EINVAL); 619 if (uap->nd > p->p_fd->fd_nfiles) 620 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 621 622 /* 623 * Allocate just enough bits for the non-null fd_sets. Use the 624 * preallocated auto buffer if possible. 625 */ 626 nfdbits = roundup(uap->nd, NFDBITS); 627 ncpbytes = nfdbits / NBBY; 628 nbufbytes = 0; 629 if (uap->in != NULL) 630 nbufbytes += 2 * ncpbytes; 631 if (uap->ou != NULL) 632 nbufbytes += 2 * ncpbytes; 633 if (uap->ex != NULL) 634 nbufbytes += 2 * ncpbytes; 635 if (nbufbytes <= sizeof s_selbits) 636 selbits = &s_selbits[0]; 637 else 638 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 639 640 /* 641 * Assign pointers into the bit buffers and fetch the input bits. 642 * Put the output buffers together so that they can be bzeroed 643 * together. 644 */ 645 sbp = selbits; 646 #define getbits(name, x) \ 647 do { \ 648 if (uap->name == NULL) \ 649 ibits[x] = NULL; \ 650 else { \ 651 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 652 obits[x] = sbp; \ 653 sbp += ncpbytes / sizeof *sbp; \ 654 error = copyin(uap->name, ibits[x], ncpbytes); \ 655 if (error != 0) \ 656 goto done; \ 657 } \ 658 } while (0) 659 getbits(in, 0); 660 getbits(ou, 1); 661 getbits(ex, 2); 662 #undef getbits 663 if (nbufbytes != 0) 664 bzero(selbits, nbufbytes / 2); 665 666 if (uap->tv) { 667 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 668 sizeof (atv)); 669 if (error) 670 goto done; 671 if (itimerfix(&atv)) { 672 error = EINVAL; 673 goto done; 674 } 675 getmicrouptime(&rtv); 676 timevaladd(&atv, &rtv); 677 } else 678 atv.tv_sec = 0; 679 timo = 0; 680 retry: 681 ncoll = nselcoll; 682 p->p_flag |= P_SELECT; 683 error = selscan(p, ibits, obits, uap->nd); 684 if (error || p->p_retval[0]) 685 goto done; 686 if (atv.tv_sec) { 687 getmicrouptime(&rtv); 688 if (timevalcmp(&rtv, &atv, >=)) 689 goto done; 690 ttv = atv; 691 timevalsub(&ttv, &rtv); 692 timo = ttv.tv_sec > 24 * 60 * 60 ? 693 24 * 60 * 60 * hz : tvtohz(&ttv); 694 } 695 s = splhigh(); 696 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 697 splx(s); 698 goto retry; 699 } 700 p->p_flag &= ~P_SELECT; 701 702 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 703 704 splx(s); 705 if (error == 0) 706 goto retry; 707 done: 708 p->p_flag &= ~P_SELECT; 709 /* select is not restarted after signals... */ 710 if (error == ERESTART) 711 error = EINTR; 712 if (error == EWOULDBLOCK) 713 error = 0; 714 #define putbits(name, x) \ 715 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 716 error = error2; 717 if (error == 0) { 718 int error2; 719 720 putbits(in, 0); 721 putbits(ou, 1); 722 putbits(ex, 2); 723 #undef putbits 724 } 725 if (selbits != &s_selbits[0]) 726 free(selbits, M_SELECT); 727 return (error); 728 } 729 730 static int 731 selscan(p, ibits, obits, nfd) 732 struct proc *p; 733 fd_mask **ibits, **obits; 734 int nfd; 735 { 736 struct filedesc *fdp = p->p_fd; 737 int msk, i, fd; 738 fd_mask bits; 739 struct file *fp; 740 int n = 0; 741 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 742 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 743 744 for (msk = 0; msk < 3; msk++) { 745 if (ibits[msk] == NULL) 746 continue; 747 for (i = 0; i < nfd; i += NFDBITS) { 748 bits = ibits[msk][i/NFDBITS]; 749 /* ffs(int mask) not portable, fd_mask is long */ 750 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 751 if (!(bits & 1)) 752 continue; 753 fp = fdp->fd_ofiles[fd]; 754 if (fp == NULL) 755 return (EBADF); 756 if (fo_poll(fp, flag[msk], fp->f_cred, p)) { 757 obits[msk][(fd)/NFDBITS] |= 758 ((fd_mask)1 << ((fd) % NFDBITS)); 759 n++; 760 } 761 } 762 } 763 } 764 p->p_retval[0] = n; 765 return (0); 766 } 767 768 /* 769 * Poll system call. 770 */ 771 #ifndef _SYS_SYSPROTO_H_ 772 struct poll_args { 773 struct pollfd *fds; 774 u_int nfds; 775 int timeout; 776 }; 777 #endif 778 int 779 poll(p, uap) 780 register struct proc *p; 781 register struct poll_args *uap; 782 { 783 caddr_t bits; 784 char smallbits[32 * sizeof(struct pollfd)]; 785 struct timeval atv, rtv, ttv; 786 int s, ncoll, error = 0, timo; 787 size_t ni; 788 789 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 790 /* forgiving; slightly wrong */ 791 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 792 } 793 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 794 if (ni > sizeof(smallbits)) 795 bits = malloc(ni, M_TEMP, M_WAITOK); 796 else 797 bits = smallbits; 798 error = copyin(SCARG(uap, fds), bits, ni); 799 if (error) 800 goto done; 801 if (SCARG(uap, timeout) != INFTIM) { 802 atv.tv_sec = SCARG(uap, timeout) / 1000; 803 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 804 if (itimerfix(&atv)) { 805 error = EINVAL; 806 goto done; 807 } 808 getmicrouptime(&rtv); 809 timevaladd(&atv, &rtv); 810 } else 811 atv.tv_sec = 0; 812 timo = 0; 813 retry: 814 ncoll = nselcoll; 815 p->p_flag |= P_SELECT; 816 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds)); 817 if (error || p->p_retval[0]) 818 goto done; 819 if (atv.tv_sec) { 820 getmicrouptime(&rtv); 821 if (timevalcmp(&rtv, &atv, >=)) 822 goto done; 823 ttv = atv; 824 timevalsub(&ttv, &rtv); 825 timo = ttv.tv_sec > 24 * 60 * 60 ? 826 24 * 60 * 60 * hz : tvtohz(&ttv); 827 } 828 s = splhigh(); 829 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 830 splx(s); 831 goto retry; 832 } 833 p->p_flag &= ~P_SELECT; 834 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 835 splx(s); 836 if (error == 0) 837 goto retry; 838 done: 839 p->p_flag &= ~P_SELECT; 840 /* poll is not restarted after signals... */ 841 if (error == ERESTART) 842 error = EINTR; 843 if (error == EWOULDBLOCK) 844 error = 0; 845 if (error == 0) { 846 error = copyout(bits, SCARG(uap, fds), ni); 847 if (error) 848 goto out; 849 } 850 out: 851 if (ni > sizeof(smallbits)) 852 free(bits, M_TEMP); 853 return (error); 854 } 855 856 static int 857 pollscan(p, fds, nfd) 858 struct proc *p; 859 struct pollfd *fds; 860 int nfd; 861 { 862 register struct filedesc *fdp = p->p_fd; 863 int i; 864 struct file *fp; 865 int n = 0; 866 867 for (i = 0; i < nfd; i++, fds++) { 868 if (fds->fd >= fdp->fd_nfiles) { 869 fds->revents = POLLNVAL; 870 n++; 871 } else if (fds->fd < 0) { 872 fds->revents = 0; 873 } else { 874 fp = fdp->fd_ofiles[fds->fd]; 875 if (fp == 0) { 876 fds->revents = POLLNVAL; 877 n++; 878 } else { 879 /* 880 * Note: backend also returns POLLHUP and 881 * POLLERR if appropriate. 882 */ 883 fds->revents = fo_poll(fp, fds->events, 884 fp->f_cred, p); 885 if (fds->revents != 0) 886 n++; 887 } 888 } 889 } 890 p->p_retval[0] = n; 891 return (0); 892 } 893 894 /* 895 * OpenBSD poll system call. 896 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 897 */ 898 #ifndef _SYS_SYSPROTO_H_ 899 struct openbsd_poll_args { 900 struct pollfd *fds; 901 u_int nfds; 902 int timeout; 903 }; 904 #endif 905 int 906 openbsd_poll(p, uap) 907 register struct proc *p; 908 register struct openbsd_poll_args *uap; 909 { 910 return (poll(p, (struct poll_args *)uap)); 911 } 912 913 /*ARGSUSED*/ 914 int 915 seltrue(dev, events, p) 916 dev_t dev; 917 int events; 918 struct proc *p; 919 { 920 921 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 922 } 923 924 /* 925 * Record a select request. 926 */ 927 void 928 selrecord(selector, sip) 929 struct proc *selector; 930 struct selinfo *sip; 931 { 932 struct proc *p; 933 pid_t mypid; 934 935 mypid = selector->p_pid; 936 if (sip->si_pid == mypid) 937 return; 938 if (sip->si_pid && (p = pfind(sip->si_pid)) && 939 p->p_wchan == (caddr_t)&selwait) 940 sip->si_flags |= SI_COLL; 941 else 942 sip->si_pid = mypid; 943 } 944 945 /* 946 * Do a wakeup when a selectable event occurs. 947 */ 948 void 949 selwakeup(sip) 950 register struct selinfo *sip; 951 { 952 register struct proc *p; 953 int s; 954 955 if (sip->si_pid == 0) 956 return; 957 if (sip->si_flags & SI_COLL) { 958 nselcoll++; 959 sip->si_flags &= ~SI_COLL; 960 wakeup((caddr_t)&selwait); 961 } 962 p = pfind(sip->si_pid); 963 sip->si_pid = 0; 964 if (p != NULL) { 965 s = splhigh(); 966 if (p->p_wchan == (caddr_t)&selwait) { 967 if (p->p_stat == SSLEEP) 968 setrunnable(p); 969 else 970 unsleep(p); 971 } else if (p->p_flag & P_SELECT) 972 p->p_flag &= ~P_SELECT; 973 splx(s); 974 } 975 } 976