1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD$ 40 */ 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/filio.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/signalvar.h> 53 #include <sys/socketvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #include <sys/selinfo.h> 59 #include <sys/sysctl.h> 60 #include <sys/sysent.h> 61 #include <sys/bio.h> 62 #include <sys/buf.h> 63 #ifdef KTRACE 64 #include <sys/ktrace.h> 65 #endif 66 #include <vm/vm.h> 67 #include <vm/vm_page.h> 68 69 #include <machine/limits.h> 70 71 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 72 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 73 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 74 75 static int pollscan __P((struct proc *, struct pollfd *, int)); 76 static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 77 static int dofileread __P((struct proc *, struct file *, int, void *, 78 size_t, off_t, int)); 79 static int dofilewrite __P((struct proc *, struct file *, int, 80 const void *, size_t, off_t, int)); 81 82 struct file* 83 holdfp(fdp, fd, flag) 84 struct filedesc* fdp; 85 int fd, flag; 86 { 87 struct file* fp; 88 89 if (((u_int)fd) >= fdp->fd_nfiles || 90 (fp = fdp->fd_ofiles[fd]) == NULL || 91 (fp->f_flag & flag) == 0) { 92 return (NULL); 93 } 94 fhold(fp); 95 return (fp); 96 } 97 98 /* 99 * Read system call. 100 */ 101 #ifndef _SYS_SYSPROTO_H_ 102 struct read_args { 103 int fd; 104 void *buf; 105 size_t nbyte; 106 }; 107 #endif 108 int 109 read(p, uap) 110 struct proc *p; 111 register struct read_args *uap; 112 { 113 register struct file *fp; 114 int error; 115 116 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 117 return (EBADF); 118 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 119 fdrop(fp, p); 120 return(error); 121 } 122 123 /* 124 * Pread system call 125 */ 126 #ifndef _SYS_SYSPROTO_H_ 127 struct pread_args { 128 int fd; 129 void *buf; 130 size_t nbyte; 131 int pad; 132 off_t offset; 133 }; 134 #endif 135 int 136 pread(p, uap) 137 struct proc *p; 138 register struct pread_args *uap; 139 { 140 register struct file *fp; 141 int error; 142 143 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 144 return (EBADF); 145 if (fp->f_type != DTYPE_VNODE) { 146 error = ESPIPE; 147 } else { 148 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, 149 uap->offset, FOF_OFFSET); 150 } 151 fdrop(fp, p); 152 return(error); 153 } 154 155 /* 156 * Code common for read and pread 157 */ 158 int 159 dofileread(p, fp, fd, buf, nbyte, offset, flags) 160 struct proc *p; 161 struct file *fp; 162 int fd, flags; 163 void *buf; 164 size_t nbyte; 165 off_t offset; 166 { 167 struct uio auio; 168 struct iovec aiov; 169 long cnt, error = 0; 170 #ifdef KTRACE 171 struct iovec ktriov; 172 struct uio ktruio; 173 int didktr = 0; 174 #endif 175 176 aiov.iov_base = (caddr_t)buf; 177 aiov.iov_len = nbyte; 178 auio.uio_iov = &aiov; 179 auio.uio_iovcnt = 1; 180 auio.uio_offset = offset; 181 if (nbyte > INT_MAX) 182 return (EINVAL); 183 auio.uio_resid = nbyte; 184 auio.uio_rw = UIO_READ; 185 auio.uio_segflg = UIO_USERSPACE; 186 auio.uio_procp = p; 187 #ifdef KTRACE 188 /* 189 * if tracing, save a copy of iovec 190 */ 191 if (KTRPOINT(p, KTR_GENIO)) { 192 ktriov = aiov; 193 ktruio = auio; 194 didktr = 1; 195 } 196 #endif 197 cnt = nbyte; 198 199 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) { 200 if (auio.uio_resid != cnt && (error == ERESTART || 201 error == EINTR || error == EWOULDBLOCK)) 202 error = 0; 203 } 204 cnt -= auio.uio_resid; 205 #ifdef KTRACE 206 if (didktr && error == 0) { 207 ktruio.uio_iov = &ktriov; 208 ktruio.uio_resid = cnt; 209 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error); 210 } 211 #endif 212 p->p_retval[0] = cnt; 213 return (error); 214 } 215 216 /* 217 * Scatter read system call. 218 */ 219 #ifndef _SYS_SYSPROTO_H_ 220 struct readv_args { 221 int fd; 222 struct iovec *iovp; 223 u_int iovcnt; 224 }; 225 #endif 226 int 227 readv(p, uap) 228 struct proc *p; 229 register struct readv_args *uap; 230 { 231 register struct file *fp; 232 register struct filedesc *fdp = p->p_fd; 233 struct uio auio; 234 register struct iovec *iov; 235 struct iovec *needfree; 236 struct iovec aiov[UIO_SMALLIOV]; 237 long i, cnt, error = 0; 238 u_int iovlen; 239 #ifdef KTRACE 240 struct iovec *ktriov = NULL; 241 struct uio ktruio; 242 #endif 243 244 if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL) 245 return (EBADF); 246 /* note: can't use iovlen until iovcnt is validated */ 247 iovlen = uap->iovcnt * sizeof (struct iovec); 248 if (uap->iovcnt > UIO_SMALLIOV) { 249 if (uap->iovcnt > UIO_MAXIOV) 250 return (EINVAL); 251 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 252 needfree = iov; 253 } else { 254 iov = aiov; 255 needfree = NULL; 256 } 257 auio.uio_iov = iov; 258 auio.uio_iovcnt = uap->iovcnt; 259 auio.uio_rw = UIO_READ; 260 auio.uio_segflg = UIO_USERSPACE; 261 auio.uio_procp = p; 262 auio.uio_offset = -1; 263 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 264 goto done; 265 auio.uio_resid = 0; 266 for (i = 0; i < uap->iovcnt; i++) { 267 if (iov->iov_len > INT_MAX - auio.uio_resid) { 268 error = EINVAL; 269 goto done; 270 } 271 auio.uio_resid += iov->iov_len; 272 iov++; 273 } 274 #ifdef KTRACE 275 /* 276 * if tracing, save a copy of iovec 277 */ 278 if (KTRPOINT(p, KTR_GENIO)) { 279 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 280 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 281 ktruio = auio; 282 } 283 #endif 284 cnt = auio.uio_resid; 285 if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) { 286 if (auio.uio_resid != cnt && (error == ERESTART || 287 error == EINTR || error == EWOULDBLOCK)) 288 error = 0; 289 } 290 cnt -= auio.uio_resid; 291 #ifdef KTRACE 292 if (ktriov != NULL) { 293 if (error == 0) { 294 ktruio.uio_iov = ktriov; 295 ktruio.uio_resid = cnt; 296 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio, 297 error); 298 } 299 FREE(ktriov, M_TEMP); 300 } 301 #endif 302 p->p_retval[0] = cnt; 303 done: 304 fdrop(fp, p); 305 if (needfree) 306 FREE(needfree, M_IOV); 307 return (error); 308 } 309 310 /* 311 * Write system call 312 */ 313 #ifndef _SYS_SYSPROTO_H_ 314 struct write_args { 315 int fd; 316 const void *buf; 317 size_t nbyte; 318 }; 319 #endif 320 int 321 write(p, uap) 322 struct proc *p; 323 register struct write_args *uap; 324 { 325 register struct file *fp; 326 int error; 327 328 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 329 return (EBADF); 330 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 331 fdrop(fp, p); 332 return(error); 333 } 334 335 /* 336 * Pwrite system call 337 */ 338 #ifndef _SYS_SYSPROTO_H_ 339 struct pwrite_args { 340 int fd; 341 const void *buf; 342 size_t nbyte; 343 int pad; 344 off_t offset; 345 }; 346 #endif 347 int 348 pwrite(p, uap) 349 struct proc *p; 350 register struct pwrite_args *uap; 351 { 352 register struct file *fp; 353 int error; 354 355 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 356 return (EBADF); 357 if (fp->f_type != DTYPE_VNODE) { 358 error = ESPIPE; 359 } else { 360 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, 361 uap->offset, FOF_OFFSET); 362 } 363 fdrop(fp, p); 364 return(error); 365 } 366 367 static int 368 dofilewrite(p, fp, fd, buf, nbyte, offset, flags) 369 struct proc *p; 370 struct file *fp; 371 int fd, flags; 372 const void *buf; 373 size_t nbyte; 374 off_t offset; 375 { 376 struct uio auio; 377 struct iovec aiov; 378 long cnt, error = 0; 379 #ifdef KTRACE 380 struct iovec ktriov; 381 struct uio ktruio; 382 int didktr = 0; 383 #endif 384 385 aiov.iov_base = (void *)(uintptr_t)buf; 386 aiov.iov_len = nbyte; 387 auio.uio_iov = &aiov; 388 auio.uio_iovcnt = 1; 389 auio.uio_offset = offset; 390 if (nbyte > INT_MAX) 391 return (EINVAL); 392 auio.uio_resid = nbyte; 393 auio.uio_rw = UIO_WRITE; 394 auio.uio_segflg = UIO_USERSPACE; 395 auio.uio_procp = p; 396 #ifdef KTRACE 397 /* 398 * if tracing, save a copy of iovec and uio 399 */ 400 if (KTRPOINT(p, KTR_GENIO)) { 401 ktriov = aiov; 402 ktruio = auio; 403 didktr = 1; 404 } 405 #endif 406 cnt = nbyte; 407 if (fp->f_type == DTYPE_VNODE) 408 bwillwrite(); 409 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { 410 if (auio.uio_resid != cnt && (error == ERESTART || 411 error == EINTR || error == EWOULDBLOCK)) 412 error = 0; 413 if (error == EPIPE) 414 psignal(p, SIGPIPE); 415 } 416 cnt -= auio.uio_resid; 417 #ifdef KTRACE 418 if (didktr && error == 0) { 419 ktruio.uio_iov = &ktriov; 420 ktruio.uio_resid = cnt; 421 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error); 422 } 423 #endif 424 p->p_retval[0] = cnt; 425 return (error); 426 } 427 428 /* 429 * Gather write system call 430 */ 431 #ifndef _SYS_SYSPROTO_H_ 432 struct writev_args { 433 int fd; 434 struct iovec *iovp; 435 u_int iovcnt; 436 }; 437 #endif 438 int 439 writev(p, uap) 440 struct proc *p; 441 register struct writev_args *uap; 442 { 443 register struct file *fp; 444 register struct filedesc *fdp = p->p_fd; 445 struct uio auio; 446 register struct iovec *iov; 447 struct iovec *needfree; 448 struct iovec aiov[UIO_SMALLIOV]; 449 long i, cnt, error = 0; 450 u_int iovlen; 451 #ifdef KTRACE 452 struct iovec *ktriov = NULL; 453 struct uio ktruio; 454 #endif 455 456 if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL) 457 return (EBADF); 458 /* note: can't use iovlen until iovcnt is validated */ 459 iovlen = uap->iovcnt * sizeof (struct iovec); 460 if (uap->iovcnt > UIO_SMALLIOV) { 461 if (uap->iovcnt > UIO_MAXIOV) { 462 needfree = NULL; 463 error = EINVAL; 464 goto done; 465 } 466 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 467 needfree = iov; 468 } else { 469 iov = aiov; 470 needfree = NULL; 471 } 472 auio.uio_iov = iov; 473 auio.uio_iovcnt = uap->iovcnt; 474 auio.uio_rw = UIO_WRITE; 475 auio.uio_segflg = UIO_USERSPACE; 476 auio.uio_procp = p; 477 auio.uio_offset = -1; 478 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 479 goto done; 480 auio.uio_resid = 0; 481 for (i = 0; i < uap->iovcnt; i++) { 482 if (iov->iov_len > INT_MAX - auio.uio_resid) { 483 error = EINVAL; 484 goto done; 485 } 486 auio.uio_resid += iov->iov_len; 487 iov++; 488 } 489 #ifdef KTRACE 490 /* 491 * if tracing, save a copy of iovec and uio 492 */ 493 if (KTRPOINT(p, KTR_GENIO)) { 494 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 495 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 496 ktruio = auio; 497 } 498 #endif 499 cnt = auio.uio_resid; 500 if (fp->f_type == DTYPE_VNODE) 501 bwillwrite(); 502 if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) { 503 if (auio.uio_resid != cnt && (error == ERESTART || 504 error == EINTR || error == EWOULDBLOCK)) 505 error = 0; 506 if (error == EPIPE) 507 psignal(p, SIGPIPE); 508 } 509 cnt -= auio.uio_resid; 510 #ifdef KTRACE 511 if (ktriov != NULL) { 512 if (error == 0) { 513 ktruio.uio_iov = ktriov; 514 ktruio.uio_resid = cnt; 515 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio, 516 error); 517 } 518 FREE(ktriov, M_TEMP); 519 } 520 #endif 521 p->p_retval[0] = cnt; 522 done: 523 fdrop(fp, p); 524 if (needfree) 525 FREE(needfree, M_IOV); 526 return (error); 527 } 528 529 /* 530 * Ioctl system call 531 */ 532 #ifndef _SYS_SYSPROTO_H_ 533 struct ioctl_args { 534 int fd; 535 u_long com; 536 caddr_t data; 537 }; 538 #endif 539 /* ARGSUSED */ 540 int 541 ioctl(p, uap) 542 struct proc *p; 543 register struct ioctl_args *uap; 544 { 545 register struct file *fp; 546 register struct filedesc *fdp; 547 register u_long com; 548 int error; 549 register u_int size; 550 caddr_t data, memp; 551 int tmp; 552 #define STK_PARAMS 128 553 union { 554 char stkbuf[STK_PARAMS]; 555 long align; 556 } ubuf; 557 558 fdp = p->p_fd; 559 if ((u_int)uap->fd >= fdp->fd_nfiles || 560 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 561 return (EBADF); 562 563 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 564 return (EBADF); 565 566 switch (com = uap->com) { 567 case FIONCLEX: 568 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 569 return (0); 570 case FIOCLEX: 571 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 572 return (0); 573 } 574 575 /* 576 * Interpret high order word to find amount of data to be 577 * copied to/from the user's address space. 578 */ 579 size = IOCPARM_LEN(com); 580 if (size > IOCPARM_MAX) 581 return (ENOTTY); 582 583 fhold(fp); 584 585 memp = NULL; 586 if (size > sizeof (ubuf.stkbuf)) { 587 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 588 data = memp; 589 } else { 590 data = ubuf.stkbuf; 591 } 592 if (com&IOC_IN) { 593 if (size) { 594 error = copyin(uap->data, data, (u_int)size); 595 if (error) { 596 if (memp) 597 free(memp, M_IOCTLOPS); 598 fdrop(fp, p); 599 return (error); 600 } 601 } else { 602 *(caddr_t *)data = uap->data; 603 } 604 } else if ((com&IOC_OUT) && size) { 605 /* 606 * Zero the buffer so the user always 607 * gets back something deterministic. 608 */ 609 bzero(data, size); 610 } else if (com&IOC_VOID) { 611 *(caddr_t *)data = uap->data; 612 } 613 614 switch (com) { 615 616 case FIONBIO: 617 if ((tmp = *(int *)data)) 618 fp->f_flag |= FNONBLOCK; 619 else 620 fp->f_flag &= ~FNONBLOCK; 621 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); 622 break; 623 624 case FIOASYNC: 625 if ((tmp = *(int *)data)) 626 fp->f_flag |= FASYNC; 627 else 628 fp->f_flag &= ~FASYNC; 629 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); 630 break; 631 632 default: 633 error = fo_ioctl(fp, com, data, p); 634 /* 635 * Copy any data to user, size was 636 * already set and checked above. 637 */ 638 if (error == 0 && (com&IOC_OUT) && size) 639 error = copyout(data, uap->data, (u_int)size); 640 break; 641 } 642 if (memp) 643 free(memp, M_IOCTLOPS); 644 fdrop(fp, p); 645 return (error); 646 } 647 648 static int nselcoll; /* Select collisions since boot */ 649 int selwait; 650 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 651 652 /* 653 * Select system call. 654 */ 655 #ifndef _SYS_SYSPROTO_H_ 656 struct select_args { 657 int nd; 658 fd_set *in, *ou, *ex; 659 struct timeval *tv; 660 }; 661 #endif 662 int 663 select(p, uap) 664 register struct proc *p; 665 register struct select_args *uap; 666 { 667 /* 668 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 669 * infds with the new FD_SETSIZE of 1024, and more than enough for 670 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 671 * of 256. 672 */ 673 fd_mask s_selbits[howmany(2048, NFDBITS)]; 674 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 675 struct timeval atv, rtv, ttv; 676 int s, ncoll, error, timo; 677 u_int nbufbytes, ncpbytes, nfdbits; 678 679 if (uap->nd < 0) 680 return (EINVAL); 681 if (uap->nd > p->p_fd->fd_nfiles) 682 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 683 684 /* 685 * Allocate just enough bits for the non-null fd_sets. Use the 686 * preallocated auto buffer if possible. 687 */ 688 nfdbits = roundup(uap->nd, NFDBITS); 689 ncpbytes = nfdbits / NBBY; 690 nbufbytes = 0; 691 if (uap->in != NULL) 692 nbufbytes += 2 * ncpbytes; 693 if (uap->ou != NULL) 694 nbufbytes += 2 * ncpbytes; 695 if (uap->ex != NULL) 696 nbufbytes += 2 * ncpbytes; 697 if (nbufbytes <= sizeof s_selbits) 698 selbits = &s_selbits[0]; 699 else 700 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 701 702 /* 703 * Assign pointers into the bit buffers and fetch the input bits. 704 * Put the output buffers together so that they can be bzeroed 705 * together. 706 */ 707 sbp = selbits; 708 #define getbits(name, x) \ 709 do { \ 710 if (uap->name == NULL) \ 711 ibits[x] = NULL; \ 712 else { \ 713 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 714 obits[x] = sbp; \ 715 sbp += ncpbytes / sizeof *sbp; \ 716 error = copyin(uap->name, ibits[x], ncpbytes); \ 717 if (error != 0) \ 718 goto done; \ 719 } \ 720 } while (0) 721 getbits(in, 0); 722 getbits(ou, 1); 723 getbits(ex, 2); 724 #undef getbits 725 if (nbufbytes != 0) 726 bzero(selbits, nbufbytes / 2); 727 728 if (uap->tv) { 729 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 730 sizeof (atv)); 731 if (error) 732 goto done; 733 if (itimerfix(&atv)) { 734 error = EINVAL; 735 goto done; 736 } 737 getmicrouptime(&rtv); 738 timevaladd(&atv, &rtv); 739 } else { 740 atv.tv_sec = 0; 741 atv.tv_usec = 0; 742 } 743 timo = 0; 744 retry: 745 ncoll = nselcoll; 746 p->p_flag |= P_SELECT; 747 error = selscan(p, ibits, obits, uap->nd); 748 if (error || p->p_retval[0]) 749 goto done; 750 if (atv.tv_sec || atv.tv_usec) { 751 getmicrouptime(&rtv); 752 if (timevalcmp(&rtv, &atv, >=)) 753 goto done; 754 ttv = atv; 755 timevalsub(&ttv, &rtv); 756 timo = ttv.tv_sec > 24 * 60 * 60 ? 757 24 * 60 * 60 * hz : tvtohz(&ttv); 758 } 759 s = splhigh(); 760 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 761 splx(s); 762 goto retry; 763 } 764 p->p_flag &= ~P_SELECT; 765 766 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 767 768 splx(s); 769 if (error == 0) 770 goto retry; 771 done: 772 p->p_flag &= ~P_SELECT; 773 /* select is not restarted after signals... */ 774 if (error == ERESTART) 775 error = EINTR; 776 if (error == EWOULDBLOCK) 777 error = 0; 778 #define putbits(name, x) \ 779 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 780 error = error2; 781 if (error == 0) { 782 int error2; 783 784 putbits(in, 0); 785 putbits(ou, 1); 786 putbits(ex, 2); 787 #undef putbits 788 } 789 if (selbits != &s_selbits[0]) 790 free(selbits, M_SELECT); 791 return (error); 792 } 793 794 static int 795 selscan(p, ibits, obits, nfd) 796 struct proc *p; 797 fd_mask **ibits, **obits; 798 int nfd; 799 { 800 struct filedesc *fdp = p->p_fd; 801 int msk, i, fd; 802 fd_mask bits; 803 struct file *fp; 804 int n = 0; 805 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 806 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 807 808 for (msk = 0; msk < 3; msk++) { 809 if (ibits[msk] == NULL) 810 continue; 811 for (i = 0; i < nfd; i += NFDBITS) { 812 bits = ibits[msk][i/NFDBITS]; 813 /* ffs(int mask) not portable, fd_mask is long */ 814 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 815 if (!(bits & 1)) 816 continue; 817 fp = fdp->fd_ofiles[fd]; 818 if (fp == NULL) 819 return (EBADF); 820 if (fo_poll(fp, flag[msk], fp->f_cred, p)) { 821 obits[msk][(fd)/NFDBITS] |= 822 ((fd_mask)1 << ((fd) % NFDBITS)); 823 n++; 824 } 825 } 826 } 827 } 828 p->p_retval[0] = n; 829 return (0); 830 } 831 832 /* 833 * Poll system call. 834 */ 835 #ifndef _SYS_SYSPROTO_H_ 836 struct poll_args { 837 struct pollfd *fds; 838 u_int nfds; 839 int timeout; 840 }; 841 #endif 842 int 843 poll(p, uap) 844 register struct proc *p; 845 register struct poll_args *uap; 846 { 847 caddr_t bits; 848 char smallbits[32 * sizeof(struct pollfd)]; 849 struct timeval atv, rtv, ttv; 850 int s, ncoll, error = 0, timo; 851 size_t ni; 852 853 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 854 /* forgiving; slightly wrong */ 855 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 856 } 857 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 858 if (ni > sizeof(smallbits)) 859 bits = malloc(ni, M_TEMP, M_WAITOK); 860 else 861 bits = smallbits; 862 error = copyin(SCARG(uap, fds), bits, ni); 863 if (error) 864 goto done; 865 if (SCARG(uap, timeout) != INFTIM) { 866 atv.tv_sec = SCARG(uap, timeout) / 1000; 867 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 868 if (itimerfix(&atv)) { 869 error = EINVAL; 870 goto done; 871 } 872 getmicrouptime(&rtv); 873 timevaladd(&atv, &rtv); 874 } else { 875 atv.tv_sec = 0; 876 atv.tv_usec = 0; 877 } 878 timo = 0; 879 retry: 880 ncoll = nselcoll; 881 p->p_flag |= P_SELECT; 882 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds)); 883 if (error || p->p_retval[0]) 884 goto done; 885 if (atv.tv_sec || atv.tv_usec) { 886 getmicrouptime(&rtv); 887 if (timevalcmp(&rtv, &atv, >=)) 888 goto done; 889 ttv = atv; 890 timevalsub(&ttv, &rtv); 891 timo = ttv.tv_sec > 24 * 60 * 60 ? 892 24 * 60 * 60 * hz : tvtohz(&ttv); 893 } 894 s = splhigh(); 895 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 896 splx(s); 897 goto retry; 898 } 899 p->p_flag &= ~P_SELECT; 900 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 901 splx(s); 902 if (error == 0) 903 goto retry; 904 done: 905 p->p_flag &= ~P_SELECT; 906 /* poll is not restarted after signals... */ 907 if (error == ERESTART) 908 error = EINTR; 909 if (error == EWOULDBLOCK) 910 error = 0; 911 if (error == 0) { 912 error = copyout(bits, SCARG(uap, fds), ni); 913 if (error) 914 goto out; 915 } 916 out: 917 if (ni > sizeof(smallbits)) 918 free(bits, M_TEMP); 919 return (error); 920 } 921 922 static int 923 pollscan(p, fds, nfd) 924 struct proc *p; 925 struct pollfd *fds; 926 int nfd; 927 { 928 register struct filedesc *fdp = p->p_fd; 929 int i; 930 struct file *fp; 931 int n = 0; 932 933 for (i = 0; i < nfd; i++, fds++) { 934 if (fds->fd >= fdp->fd_nfiles) { 935 fds->revents = POLLNVAL; 936 n++; 937 } else if (fds->fd < 0) { 938 fds->revents = 0; 939 } else { 940 fp = fdp->fd_ofiles[fds->fd]; 941 if (fp == NULL) { 942 fds->revents = POLLNVAL; 943 n++; 944 } else { 945 /* 946 * Note: backend also returns POLLHUP and 947 * POLLERR if appropriate. 948 */ 949 fds->revents = fo_poll(fp, fds->events, 950 fp->f_cred, p); 951 if (fds->revents != 0) 952 n++; 953 } 954 } 955 } 956 p->p_retval[0] = n; 957 return (0); 958 } 959 960 /* 961 * OpenBSD poll system call. 962 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 963 */ 964 #ifndef _SYS_SYSPROTO_H_ 965 struct openbsd_poll_args { 966 struct pollfd *fds; 967 u_int nfds; 968 int timeout; 969 }; 970 #endif 971 int 972 openbsd_poll(p, uap) 973 register struct proc *p; 974 register struct openbsd_poll_args *uap; 975 { 976 return (poll(p, (struct poll_args *)uap)); 977 } 978 979 /*ARGSUSED*/ 980 int 981 seltrue(dev, events, p) 982 dev_t dev; 983 int events; 984 struct proc *p; 985 { 986 987 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 988 } 989 990 /* 991 * Record a select request. 992 */ 993 void 994 selrecord(selector, sip) 995 struct proc *selector; 996 struct selinfo *sip; 997 { 998 struct proc *p; 999 pid_t mypid; 1000 1001 mypid = selector->p_pid; 1002 if (sip->si_pid == mypid) 1003 return; 1004 if (sip->si_pid && (p = pfind(sip->si_pid)) && 1005 p->p_wchan == (caddr_t)&selwait) 1006 sip->si_flags |= SI_COLL; 1007 else 1008 sip->si_pid = mypid; 1009 } 1010 1011 /* 1012 * Do a wakeup when a selectable event occurs. 1013 */ 1014 void 1015 selwakeup(sip) 1016 register struct selinfo *sip; 1017 { 1018 register struct proc *p; 1019 int s; 1020 1021 if (sip->si_pid == 0) 1022 return; 1023 if (sip->si_flags & SI_COLL) { 1024 nselcoll++; 1025 sip->si_flags &= ~SI_COLL; 1026 wakeup((caddr_t)&selwait); 1027 } 1028 p = pfind(sip->si_pid); 1029 sip->si_pid = 0; 1030 if (p != NULL) { 1031 s = splhigh(); 1032 mtx_enter(&sched_lock, MTX_SPIN); 1033 if (p->p_wchan == (caddr_t)&selwait) { 1034 if (p->p_stat == SSLEEP) 1035 setrunnable(p); 1036 else 1037 unsleep(p); 1038 } else if (p->p_flag & P_SELECT) 1039 p->p_flag &= ~P_SELECT; 1040 mtx_exit(&sched_lock, MTX_SPIN); 1041 splx(s); 1042 } 1043 } 1044