1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_compat.h" 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/sysproto.h> 46 #include <sys/filedesc.h> 47 #include <sys/filio.h> 48 #include <sys/fcntl.h> 49 #include <sys/file.h> 50 #include <sys/proc.h> 51 #include <sys/signalvar.h> 52 #include <sys/socketvar.h> 53 #include <sys/uio.h> 54 #include <sys/kernel.h> 55 #include <sys/ktr.h> 56 #include <sys/limits.h> 57 #include <sys/malloc.h> 58 #include <sys/poll.h> 59 #include <sys/resourcevar.h> 60 #include <sys/selinfo.h> 61 #include <sys/sleepqueue.h> 62 #include <sys/syscallsubr.h> 63 #include <sys/sysctl.h> 64 #include <sys/sysent.h> 65 #include <sys/vnode.h> 66 #include <sys/bio.h> 67 #include <sys/buf.h> 68 #include <sys/condvar.h> 69 #ifdef KTRACE 70 #include <sys/ktrace.h> 71 #endif 72 73 #include <security/audit/audit.h> 74 75 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 76 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 77 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 78 79 static int pollout(struct pollfd *, struct pollfd *, u_int); 80 static int pollscan(struct thread *, struct pollfd *, u_int); 81 static int pollrescan(struct thread *); 82 static int selscan(struct thread *, fd_mask **, fd_mask **, int); 83 static int selrescan(struct thread *, fd_mask **, fd_mask **); 84 static void selfdalloc(struct thread *, void *); 85 static void selfdfree(struct seltd *, struct selfd *); 86 static int dofileread(struct thread *, int, struct file *, struct uio *, 87 off_t, int); 88 static int dofilewrite(struct thread *, int, struct file *, struct uio *, 89 off_t, int); 90 static void doselwakeup(struct selinfo *, int); 91 static void seltdinit(struct thread *); 92 static int seltdwait(struct thread *, int); 93 static void seltdclear(struct thread *); 94 95 /* 96 * One seltd per-thread allocated on demand as needed. 97 * 98 * t - protected by st_mtx 99 * k - Only accessed by curthread or read-only 100 */ 101 struct seltd { 102 STAILQ_HEAD(, selfd) st_selq; /* (k) List of selfds. */ 103 struct selfd *st_free1; /* (k) free fd for read set. */ 104 struct selfd *st_free2; /* (k) free fd for write set. */ 105 struct mtx st_mtx; /* Protects struct seltd */ 106 struct cv st_wait; /* (t) Wait channel. */ 107 int st_flags; /* (t) SELTD_ flags. */ 108 }; 109 110 #define SELTD_PENDING 0x0001 /* We have pending events. */ 111 #define SELTD_RESCAN 0x0002 /* Doing a rescan. */ 112 113 /* 114 * One selfd allocated per-thread per-file-descriptor. 115 * f - protected by sf_mtx 116 */ 117 struct selfd { 118 STAILQ_ENTRY(selfd) sf_link; /* (k) fds owned by this td. */ 119 TAILQ_ENTRY(selfd) sf_threads; /* (f) fds on this selinfo. */ 120 struct selinfo *sf_si; /* (f) selinfo when linked. */ 121 struct mtx *sf_mtx; /* Pointer to selinfo mtx. */ 122 struct seltd *sf_td; /* (k) owning seltd. */ 123 void *sf_cookie; /* (k) fd or pollfd. */ 124 }; 125 126 static uma_zone_t selfd_zone; 127 128 #ifndef _SYS_SYSPROTO_H_ 129 struct read_args { 130 int fd; 131 void *buf; 132 size_t nbyte; 133 }; 134 #endif 135 int 136 read(td, uap) 137 struct thread *td; 138 struct read_args *uap; 139 { 140 struct uio auio; 141 struct iovec aiov; 142 int error; 143 144 if (uap->nbyte > INT_MAX) 145 return (EINVAL); 146 aiov.iov_base = uap->buf; 147 aiov.iov_len = uap->nbyte; 148 auio.uio_iov = &aiov; 149 auio.uio_iovcnt = 1; 150 auio.uio_resid = uap->nbyte; 151 auio.uio_segflg = UIO_USERSPACE; 152 error = kern_readv(td, uap->fd, &auio); 153 return(error); 154 } 155 156 /* 157 * Positioned read system call 158 */ 159 #ifndef _SYS_SYSPROTO_H_ 160 struct pread_args { 161 int fd; 162 void *buf; 163 size_t nbyte; 164 int pad; 165 off_t offset; 166 }; 167 #endif 168 int 169 pread(td, uap) 170 struct thread *td; 171 struct pread_args *uap; 172 { 173 struct uio auio; 174 struct iovec aiov; 175 int error; 176 177 if (uap->nbyte > INT_MAX) 178 return (EINVAL); 179 aiov.iov_base = uap->buf; 180 aiov.iov_len = uap->nbyte; 181 auio.uio_iov = &aiov; 182 auio.uio_iovcnt = 1; 183 auio.uio_resid = uap->nbyte; 184 auio.uio_segflg = UIO_USERSPACE; 185 error = kern_preadv(td, uap->fd, &auio, uap->offset); 186 return(error); 187 } 188 189 int 190 freebsd6_pread(td, uap) 191 struct thread *td; 192 struct freebsd6_pread_args *uap; 193 { 194 struct pread_args oargs; 195 196 oargs.fd = uap->fd; 197 oargs.buf = uap->buf; 198 oargs.nbyte = uap->nbyte; 199 oargs.offset = uap->offset; 200 return (pread(td, &oargs)); 201 } 202 203 /* 204 * Scatter read system call. 205 */ 206 #ifndef _SYS_SYSPROTO_H_ 207 struct readv_args { 208 int fd; 209 struct iovec *iovp; 210 u_int iovcnt; 211 }; 212 #endif 213 int 214 readv(struct thread *td, struct readv_args *uap) 215 { 216 struct uio *auio; 217 int error; 218 219 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 220 if (error) 221 return (error); 222 error = kern_readv(td, uap->fd, auio); 223 free(auio, M_IOV); 224 return (error); 225 } 226 227 int 228 kern_readv(struct thread *td, int fd, struct uio *auio) 229 { 230 struct file *fp; 231 int error; 232 233 error = fget_read(td, fd, &fp); 234 if (error) 235 return (error); 236 error = dofileread(td, fd, fp, auio, (off_t)-1, 0); 237 fdrop(fp, td); 238 return (error); 239 } 240 241 /* 242 * Scatter positioned read system call. 243 */ 244 #ifndef _SYS_SYSPROTO_H_ 245 struct preadv_args { 246 int fd; 247 struct iovec *iovp; 248 u_int iovcnt; 249 off_t offset; 250 }; 251 #endif 252 int 253 preadv(struct thread *td, struct preadv_args *uap) 254 { 255 struct uio *auio; 256 int error; 257 258 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 259 if (error) 260 return (error); 261 error = kern_preadv(td, uap->fd, auio, uap->offset); 262 free(auio, M_IOV); 263 return (error); 264 } 265 266 int 267 kern_preadv(td, fd, auio, offset) 268 struct thread *td; 269 int fd; 270 struct uio *auio; 271 off_t offset; 272 { 273 struct file *fp; 274 int error; 275 276 error = fget_read(td, fd, &fp); 277 if (error) 278 return (error); 279 if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) 280 error = ESPIPE; 281 else if (offset < 0 && fp->f_vnode->v_type != VCHR) 282 error = EINVAL; 283 else 284 error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET); 285 fdrop(fp, td); 286 return (error); 287 } 288 289 /* 290 * Common code for readv and preadv that reads data in 291 * from a file using the passed in uio, offset, and flags. 292 */ 293 static int 294 dofileread(td, fd, fp, auio, offset, flags) 295 struct thread *td; 296 int fd; 297 struct file *fp; 298 struct uio *auio; 299 off_t offset; 300 int flags; 301 { 302 ssize_t cnt; 303 int error; 304 #ifdef KTRACE 305 struct uio *ktruio = NULL; 306 #endif 307 308 /* Finish zero length reads right here */ 309 if (auio->uio_resid == 0) { 310 td->td_retval[0] = 0; 311 return(0); 312 } 313 auio->uio_rw = UIO_READ; 314 auio->uio_offset = offset; 315 auio->uio_td = td; 316 #ifdef KTRACE 317 if (KTRPOINT(td, KTR_GENIO)) 318 ktruio = cloneuio(auio); 319 #endif 320 cnt = auio->uio_resid; 321 if ((error = fo_read(fp, auio, td->td_ucred, flags, td))) { 322 if (auio->uio_resid != cnt && (error == ERESTART || 323 error == EINTR || error == EWOULDBLOCK)) 324 error = 0; 325 } 326 cnt -= auio->uio_resid; 327 #ifdef KTRACE 328 if (ktruio != NULL) { 329 ktruio->uio_resid = cnt; 330 ktrgenio(fd, UIO_READ, ktruio, error); 331 } 332 #endif 333 td->td_retval[0] = cnt; 334 return (error); 335 } 336 337 #ifndef _SYS_SYSPROTO_H_ 338 struct write_args { 339 int fd; 340 const void *buf; 341 size_t nbyte; 342 }; 343 #endif 344 int 345 write(td, uap) 346 struct thread *td; 347 struct write_args *uap; 348 { 349 struct uio auio; 350 struct iovec aiov; 351 int error; 352 353 if (uap->nbyte > INT_MAX) 354 return (EINVAL); 355 aiov.iov_base = (void *)(uintptr_t)uap->buf; 356 aiov.iov_len = uap->nbyte; 357 auio.uio_iov = &aiov; 358 auio.uio_iovcnt = 1; 359 auio.uio_resid = uap->nbyte; 360 auio.uio_segflg = UIO_USERSPACE; 361 error = kern_writev(td, uap->fd, &auio); 362 return(error); 363 } 364 365 /* 366 * Positioned write system call. 367 */ 368 #ifndef _SYS_SYSPROTO_H_ 369 struct pwrite_args { 370 int fd; 371 const void *buf; 372 size_t nbyte; 373 int pad; 374 off_t offset; 375 }; 376 #endif 377 int 378 pwrite(td, uap) 379 struct thread *td; 380 struct pwrite_args *uap; 381 { 382 struct uio auio; 383 struct iovec aiov; 384 int error; 385 386 if (uap->nbyte > INT_MAX) 387 return (EINVAL); 388 aiov.iov_base = (void *)(uintptr_t)uap->buf; 389 aiov.iov_len = uap->nbyte; 390 auio.uio_iov = &aiov; 391 auio.uio_iovcnt = 1; 392 auio.uio_resid = uap->nbyte; 393 auio.uio_segflg = UIO_USERSPACE; 394 error = kern_pwritev(td, uap->fd, &auio, uap->offset); 395 return(error); 396 } 397 398 int 399 freebsd6_pwrite(td, uap) 400 struct thread *td; 401 struct freebsd6_pwrite_args *uap; 402 { 403 struct pwrite_args oargs; 404 405 oargs.fd = uap->fd; 406 oargs.buf = uap->buf; 407 oargs.nbyte = uap->nbyte; 408 oargs.offset = uap->offset; 409 return (pwrite(td, &oargs)); 410 } 411 412 /* 413 * Gather write system call. 414 */ 415 #ifndef _SYS_SYSPROTO_H_ 416 struct writev_args { 417 int fd; 418 struct iovec *iovp; 419 u_int iovcnt; 420 }; 421 #endif 422 int 423 writev(struct thread *td, struct writev_args *uap) 424 { 425 struct uio *auio; 426 int error; 427 428 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 429 if (error) 430 return (error); 431 error = kern_writev(td, uap->fd, auio); 432 free(auio, M_IOV); 433 return (error); 434 } 435 436 int 437 kern_writev(struct thread *td, int fd, struct uio *auio) 438 { 439 struct file *fp; 440 int error; 441 442 error = fget_write(td, fd, &fp); 443 if (error) 444 return (error); 445 error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0); 446 fdrop(fp, td); 447 return (error); 448 } 449 450 /* 451 * Gather positioned write system call. 452 */ 453 #ifndef _SYS_SYSPROTO_H_ 454 struct pwritev_args { 455 int fd; 456 struct iovec *iovp; 457 u_int iovcnt; 458 off_t offset; 459 }; 460 #endif 461 int 462 pwritev(struct thread *td, struct pwritev_args *uap) 463 { 464 struct uio *auio; 465 int error; 466 467 error = copyinuio(uap->iovp, uap->iovcnt, &auio); 468 if (error) 469 return (error); 470 error = kern_pwritev(td, uap->fd, auio, uap->offset); 471 free(auio, M_IOV); 472 return (error); 473 } 474 475 int 476 kern_pwritev(td, fd, auio, offset) 477 struct thread *td; 478 struct uio *auio; 479 int fd; 480 off_t offset; 481 { 482 struct file *fp; 483 int error; 484 485 error = fget_write(td, fd, &fp); 486 if (error) 487 return (error); 488 if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) 489 error = ESPIPE; 490 else if (offset < 0 && fp->f_vnode->v_type != VCHR) 491 error = EINVAL; 492 else 493 error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET); 494 fdrop(fp, td); 495 return (error); 496 } 497 498 /* 499 * Common code for writev and pwritev that writes data to 500 * a file using the passed in uio, offset, and flags. 501 */ 502 static int 503 dofilewrite(td, fd, fp, auio, offset, flags) 504 struct thread *td; 505 int fd; 506 struct file *fp; 507 struct uio *auio; 508 off_t offset; 509 int flags; 510 { 511 ssize_t cnt; 512 int error; 513 #ifdef KTRACE 514 struct uio *ktruio = NULL; 515 #endif 516 517 auio->uio_rw = UIO_WRITE; 518 auio->uio_td = td; 519 auio->uio_offset = offset; 520 #ifdef KTRACE 521 if (KTRPOINT(td, KTR_GENIO)) 522 ktruio = cloneuio(auio); 523 #endif 524 cnt = auio->uio_resid; 525 if (fp->f_type == DTYPE_VNODE) 526 bwillwrite(); 527 if ((error = fo_write(fp, auio, td->td_ucred, flags, td))) { 528 if (auio->uio_resid != cnt && (error == ERESTART || 529 error == EINTR || error == EWOULDBLOCK)) 530 error = 0; 531 /* Socket layer is responsible for issuing SIGPIPE. */ 532 if (fp->f_type != DTYPE_SOCKET && error == EPIPE) { 533 PROC_LOCK(td->td_proc); 534 psignal(td->td_proc, SIGPIPE); 535 PROC_UNLOCK(td->td_proc); 536 } 537 } 538 cnt -= auio->uio_resid; 539 #ifdef KTRACE 540 if (ktruio != NULL) { 541 ktruio->uio_resid = cnt; 542 ktrgenio(fd, UIO_WRITE, ktruio, error); 543 } 544 #endif 545 td->td_retval[0] = cnt; 546 return (error); 547 } 548 549 /* 550 * Truncate a file given a file descriptor. 551 * 552 * Can't use fget_write() here, since must return EINVAL and not EBADF if the 553 * descriptor isn't writable. 554 */ 555 int 556 kern_ftruncate(td, fd, length) 557 struct thread *td; 558 int fd; 559 off_t length; 560 { 561 struct file *fp; 562 int error; 563 564 AUDIT_ARG(fd, fd); 565 if (length < 0) 566 return (EINVAL); 567 error = fget(td, fd, &fp); 568 if (error) 569 return (error); 570 AUDIT_ARG(file, td->td_proc, fp); 571 if (!(fp->f_flag & FWRITE)) { 572 fdrop(fp, td); 573 return (EINVAL); 574 } 575 error = fo_truncate(fp, length, td->td_ucred, td); 576 fdrop(fp, td); 577 return (error); 578 } 579 580 #ifndef _SYS_SYSPROTO_H_ 581 struct ftruncate_args { 582 int fd; 583 int pad; 584 off_t length; 585 }; 586 #endif 587 int 588 ftruncate(td, uap) 589 struct thread *td; 590 struct ftruncate_args *uap; 591 { 592 593 return (kern_ftruncate(td, uap->fd, uap->length)); 594 } 595 596 #if defined(COMPAT_43) 597 #ifndef _SYS_SYSPROTO_H_ 598 struct oftruncate_args { 599 int fd; 600 long length; 601 }; 602 #endif 603 int 604 oftruncate(td, uap) 605 struct thread *td; 606 struct oftruncate_args *uap; 607 { 608 609 return (kern_ftruncate(td, uap->fd, uap->length)); 610 } 611 #endif /* COMPAT_43 */ 612 613 #ifndef _SYS_SYSPROTO_H_ 614 struct ioctl_args { 615 int fd; 616 u_long com; 617 caddr_t data; 618 }; 619 #endif 620 /* ARGSUSED */ 621 int 622 ioctl(struct thread *td, struct ioctl_args *uap) 623 { 624 u_long com; 625 int arg, error; 626 u_int size; 627 caddr_t data; 628 629 if (uap->com > 0xffffffff) { 630 printf( 631 "WARNING pid %d (%s): ioctl sign-extension ioctl %lx\n", 632 td->td_proc->p_pid, td->td_name, uap->com); 633 uap->com &= 0xffffffff; 634 } 635 com = uap->com; 636 637 /* 638 * Interpret high order word to find amount of data to be 639 * copied to/from the user's address space. 640 */ 641 size = IOCPARM_LEN(com); 642 if ((size > IOCPARM_MAX) || 643 ((com & (IOC_VOID | IOC_IN | IOC_OUT)) == 0) || 644 #if defined(COMPAT_FREEBSD5) || defined(COMPAT_FREEBSD4) || defined(COMPAT_43) 645 ((com & IOC_OUT) && size == 0) || 646 #else 647 ((com & (IOC_IN | IOC_OUT)) && size == 0) || 648 #endif 649 ((com & IOC_VOID) && size > 0 && size != sizeof(int))) 650 return (ENOTTY); 651 652 if (size > 0) { 653 if (com & IOC_VOID) { 654 /* Integer argument. */ 655 arg = (intptr_t)uap->data; 656 data = (void *)&arg; 657 size = 0; 658 } else 659 data = malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 660 } else 661 data = (void *)&uap->data; 662 if (com & IOC_IN) { 663 error = copyin(uap->data, data, (u_int)size); 664 if (error) { 665 if (size > 0) 666 free(data, M_IOCTLOPS); 667 return (error); 668 } 669 } else if (com & IOC_OUT) { 670 /* 671 * Zero the buffer so the user always 672 * gets back something deterministic. 673 */ 674 bzero(data, size); 675 } 676 677 error = kern_ioctl(td, uap->fd, com, data); 678 679 if (error == 0 && (com & IOC_OUT)) 680 error = copyout(data, uap->data, (u_int)size); 681 682 if (size > 0) 683 free(data, M_IOCTLOPS); 684 return (error); 685 } 686 687 int 688 kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) 689 { 690 struct file *fp; 691 struct filedesc *fdp; 692 int error; 693 int tmp; 694 695 if ((error = fget(td, fd, &fp)) != 0) 696 return (error); 697 if ((fp->f_flag & (FREAD | FWRITE)) == 0) { 698 fdrop(fp, td); 699 return (EBADF); 700 } 701 fdp = td->td_proc->p_fd; 702 switch (com) { 703 case FIONCLEX: 704 FILEDESC_XLOCK(fdp); 705 fdp->fd_ofileflags[fd] &= ~UF_EXCLOSE; 706 FILEDESC_XUNLOCK(fdp); 707 goto out; 708 case FIOCLEX: 709 FILEDESC_XLOCK(fdp); 710 fdp->fd_ofileflags[fd] |= UF_EXCLOSE; 711 FILEDESC_XUNLOCK(fdp); 712 goto out; 713 case FIONBIO: 714 if ((tmp = *(int *)data)) 715 atomic_set_int(&fp->f_flag, FNONBLOCK); 716 else 717 atomic_clear_int(&fp->f_flag, FNONBLOCK); 718 data = (void *)&tmp; 719 break; 720 case FIOASYNC: 721 if ((tmp = *(int *)data)) 722 atomic_set_int(&fp->f_flag, FASYNC); 723 else 724 atomic_clear_int(&fp->f_flag, FASYNC); 725 data = (void *)&tmp; 726 break; 727 } 728 729 error = fo_ioctl(fp, com, data, td->td_ucred, td); 730 out: 731 fdrop(fp, td); 732 return (error); 733 } 734 735 int 736 poll_no_poll(int events) 737 { 738 /* 739 * Return true for read/write. If the user asked for something 740 * special, return POLLNVAL, so that clients have a way of 741 * determining reliably whether or not the extended 742 * functionality is present without hard-coding knowledge 743 * of specific filesystem implementations. 744 */ 745 if (events & ~POLLSTANDARD) 746 return (POLLNVAL); 747 748 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 749 } 750 751 #ifndef _SYS_SYSPROTO_H_ 752 struct select_args { 753 int nd; 754 fd_set *in, *ou, *ex; 755 struct timeval *tv; 756 }; 757 #endif 758 int 759 select(td, uap) 760 register struct thread *td; 761 register struct select_args *uap; 762 { 763 struct timeval tv, *tvp; 764 int error; 765 766 if (uap->tv != NULL) { 767 error = copyin(uap->tv, &tv, sizeof(tv)); 768 if (error) 769 return (error); 770 tvp = &tv; 771 } else 772 tvp = NULL; 773 774 return (kern_select(td, uap->nd, uap->in, uap->ou, uap->ex, tvp)); 775 } 776 777 int 778 kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, 779 fd_set *fd_ex, struct timeval *tvp) 780 { 781 struct filedesc *fdp; 782 /* 783 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 784 * infds with the new FD_SETSIZE of 1024, and more than enough for 785 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 786 * of 256. 787 */ 788 fd_mask s_selbits[howmany(2048, NFDBITS)]; 789 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 790 struct timeval atv, rtv, ttv; 791 int error, timo; 792 u_int nbufbytes, ncpbytes, nfdbits; 793 794 if (nd < 0) 795 return (EINVAL); 796 fdp = td->td_proc->p_fd; 797 798 FILEDESC_SLOCK(fdp); 799 if (nd > td->td_proc->p_fd->fd_nfiles) 800 nd = td->td_proc->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 801 FILEDESC_SUNLOCK(fdp); 802 803 /* 804 * Allocate just enough bits for the non-null fd_sets. Use the 805 * preallocated auto buffer if possible. 806 */ 807 nfdbits = roundup(nd, NFDBITS); 808 ncpbytes = nfdbits / NBBY; 809 nbufbytes = 0; 810 if (fd_in != NULL) 811 nbufbytes += 2 * ncpbytes; 812 if (fd_ou != NULL) 813 nbufbytes += 2 * ncpbytes; 814 if (fd_ex != NULL) 815 nbufbytes += 2 * ncpbytes; 816 if (nbufbytes <= sizeof s_selbits) 817 selbits = &s_selbits[0]; 818 else 819 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 820 821 /* 822 * Assign pointers into the bit buffers and fetch the input bits. 823 * Put the output buffers together so that they can be bzeroed 824 * together. 825 */ 826 sbp = selbits; 827 #define getbits(name, x) \ 828 do { \ 829 if (name == NULL) \ 830 ibits[x] = NULL; \ 831 else { \ 832 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 833 obits[x] = sbp; \ 834 sbp += ncpbytes / sizeof *sbp; \ 835 error = copyin(name, ibits[x], ncpbytes); \ 836 if (error != 0) \ 837 goto done; \ 838 } \ 839 } while (0) 840 getbits(fd_in, 0); 841 getbits(fd_ou, 1); 842 getbits(fd_ex, 2); 843 #undef getbits 844 if (nbufbytes != 0) 845 bzero(selbits, nbufbytes / 2); 846 847 if (tvp != NULL) { 848 atv = *tvp; 849 if (itimerfix(&atv)) { 850 error = EINVAL; 851 goto done; 852 } 853 getmicrouptime(&rtv); 854 timevaladd(&atv, &rtv); 855 } else { 856 atv.tv_sec = 0; 857 atv.tv_usec = 0; 858 } 859 timo = 0; 860 seltdinit(td); 861 /* Iterate until the timeout expires or descriptors become ready. */ 862 for (;;) { 863 error = selscan(td, ibits, obits, nd); 864 if (error || td->td_retval[0] != 0) 865 break; 866 if (atv.tv_sec || atv.tv_usec) { 867 getmicrouptime(&rtv); 868 if (timevalcmp(&rtv, &atv, >=)) 869 break; 870 ttv = atv; 871 timevalsub(&ttv, &rtv); 872 timo = ttv.tv_sec > 24 * 60 * 60 ? 873 24 * 60 * 60 * hz : tvtohz(&ttv); 874 } 875 error = seltdwait(td, timo); 876 if (error) 877 break; 878 error = selrescan(td, ibits, obits); 879 if (error || td->td_retval[0] != 0) 880 break; 881 } 882 seltdclear(td); 883 884 done: 885 /* select is not restarted after signals... */ 886 if (error == ERESTART) 887 error = EINTR; 888 if (error == EWOULDBLOCK) 889 error = 0; 890 #define putbits(name, x) \ 891 if (name && (error2 = copyout(obits[x], name, ncpbytes))) \ 892 error = error2; 893 if (error == 0) { 894 int error2; 895 896 putbits(fd_in, 0); 897 putbits(fd_ou, 1); 898 putbits(fd_ex, 2); 899 #undef putbits 900 } 901 if (selbits != &s_selbits[0]) 902 free(selbits, M_SELECT); 903 904 return (error); 905 } 906 /* 907 * Convert a select bit set to poll flags. 908 * 909 * The backend always returns POLLHUP/POLLERR if appropriate and we 910 * return this as a set bit in any set. 911 */ 912 static int select_flags[3] = { 913 POLLRDNORM | POLLHUP | POLLERR, 914 POLLWRNORM | POLLHUP | POLLERR, 915 POLLRDBAND | POLLHUP | POLLERR 916 }; 917 918 /* 919 * Compute the fo_poll flags required for a fd given by the index and 920 * bit position in the fd_mask array. 921 */ 922 static __inline int 923 selflags(fd_mask **ibits, int idx, fd_mask bit) 924 { 925 int flags; 926 int msk; 927 928 flags = 0; 929 for (msk = 0; msk < 3; msk++) { 930 if (ibits[msk] == NULL) 931 continue; 932 if ((ibits[msk][idx] & bit) == 0) 933 continue; 934 flags |= select_flags[msk]; 935 } 936 return (flags); 937 } 938 939 /* 940 * Set the appropriate output bits given a mask of fired events and the 941 * input bits originally requested. 942 */ 943 static __inline int 944 selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events) 945 { 946 int msk; 947 int n; 948 949 n = 0; 950 for (msk = 0; msk < 3; msk++) { 951 if ((events & select_flags[msk]) == 0) 952 continue; 953 if (ibits[msk] == NULL) 954 continue; 955 if ((ibits[msk][idx] & bit) == 0) 956 continue; 957 /* 958 * XXX Check for a duplicate set. This can occur because a 959 * socket calls selrecord() twice for each poll() call 960 * resulting in two selfds per real fd. selrescan() will 961 * call selsetbits twice as a result. 962 */ 963 if ((obits[msk][idx] & bit) != 0) 964 continue; 965 obits[msk][idx] |= bit; 966 n++; 967 } 968 969 return (n); 970 } 971 972 /* 973 * Traverse the list of fds attached to this thread's seltd and check for 974 * completion. 975 */ 976 static int 977 selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits) 978 { 979 struct filedesc *fdp; 980 struct selinfo *si; 981 struct seltd *stp; 982 struct selfd *sfp; 983 struct selfd *sfn; 984 struct file *fp; 985 fd_mask bit; 986 int fd, ev, n, idx; 987 988 fdp = td->td_proc->p_fd; 989 stp = td->td_sel; 990 n = 0; 991 FILEDESC_SLOCK(fdp); 992 STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { 993 fd = (int)(uintptr_t)sfp->sf_cookie; 994 si = sfp->sf_si; 995 selfdfree(stp, sfp); 996 /* If the selinfo wasn't cleared the event didn't fire. */ 997 if (si != NULL) 998 continue; 999 if ((fp = fget_locked(fdp, fd)) == NULL) { 1000 FILEDESC_SUNLOCK(fdp); 1001 return (EBADF); 1002 } 1003 idx = fd / NFDBITS; 1004 bit = (fd_mask)1 << (fd % NFDBITS); 1005 ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td); 1006 if (ev != 0) 1007 n += selsetbits(ibits, obits, idx, bit, ev); 1008 } 1009 FILEDESC_SUNLOCK(fdp); 1010 stp->st_flags = 0; 1011 td->td_retval[0] = n; 1012 return (0); 1013 } 1014 1015 /* 1016 * Perform the initial filedescriptor scan and register ourselves with 1017 * each selinfo. 1018 */ 1019 static int 1020 selscan(td, ibits, obits, nfd) 1021 struct thread *td; 1022 fd_mask **ibits, **obits; 1023 int nfd; 1024 { 1025 struct filedesc *fdp; 1026 struct file *fp; 1027 fd_mask bit; 1028 int ev, flags, end, fd; 1029 int n, idx; 1030 1031 fdp = td->td_proc->p_fd; 1032 n = 0; 1033 FILEDESC_SLOCK(fdp); 1034 for (idx = 0, fd = 0; fd < nfd; idx++) { 1035 end = imin(fd + NFDBITS, nfd); 1036 for (bit = 1; fd < end; bit <<= 1, fd++) { 1037 /* Compute the list of events we're interested in. */ 1038 flags = selflags(ibits, idx, bit); 1039 if (flags == 0) 1040 continue; 1041 if ((fp = fget_locked(fdp, fd)) == NULL) { 1042 FILEDESC_SUNLOCK(fdp); 1043 return (EBADF); 1044 } 1045 selfdalloc(td, (void *)(uintptr_t)fd); 1046 ev = fo_poll(fp, flags, td->td_ucred, td); 1047 if (ev != 0) 1048 n += selsetbits(ibits, obits, idx, bit, ev); 1049 } 1050 } 1051 1052 FILEDESC_SUNLOCK(fdp); 1053 td->td_retval[0] = n; 1054 return (0); 1055 } 1056 1057 #ifndef _SYS_SYSPROTO_H_ 1058 struct poll_args { 1059 struct pollfd *fds; 1060 u_int nfds; 1061 int timeout; 1062 }; 1063 #endif 1064 int 1065 poll(td, uap) 1066 struct thread *td; 1067 struct poll_args *uap; 1068 { 1069 struct pollfd *bits; 1070 struct pollfd smallbits[32]; 1071 struct timeval atv, rtv, ttv; 1072 int error = 0, timo; 1073 u_int nfds; 1074 size_t ni; 1075 1076 nfds = uap->nfds; 1077 if (nfds > maxfilesperproc && nfds > FD_SETSIZE) 1078 return (EINVAL); 1079 ni = nfds * sizeof(struct pollfd); 1080 if (ni > sizeof(smallbits)) 1081 bits = malloc(ni, M_TEMP, M_WAITOK); 1082 else 1083 bits = smallbits; 1084 error = copyin(uap->fds, bits, ni); 1085 if (error) 1086 goto done; 1087 if (uap->timeout != INFTIM) { 1088 atv.tv_sec = uap->timeout / 1000; 1089 atv.tv_usec = (uap->timeout % 1000) * 1000; 1090 if (itimerfix(&atv)) { 1091 error = EINVAL; 1092 goto done; 1093 } 1094 getmicrouptime(&rtv); 1095 timevaladd(&atv, &rtv); 1096 } else { 1097 atv.tv_sec = 0; 1098 atv.tv_usec = 0; 1099 } 1100 timo = 0; 1101 seltdinit(td); 1102 /* Iterate until the timeout expires or descriptors become ready. */ 1103 for (;;) { 1104 error = pollscan(td, bits, nfds); 1105 if (error || td->td_retval[0] != 0) 1106 break; 1107 if (atv.tv_sec || atv.tv_usec) { 1108 getmicrouptime(&rtv); 1109 if (timevalcmp(&rtv, &atv, >=)) 1110 break; 1111 ttv = atv; 1112 timevalsub(&ttv, &rtv); 1113 timo = ttv.tv_sec > 24 * 60 * 60 ? 1114 24 * 60 * 60 * hz : tvtohz(&ttv); 1115 } 1116 error = seltdwait(td, timo); 1117 if (error) 1118 break; 1119 error = pollrescan(td); 1120 if (error || td->td_retval[0] != 0) 1121 break; 1122 } 1123 seltdclear(td); 1124 1125 done: 1126 /* poll is not restarted after signals... */ 1127 if (error == ERESTART) 1128 error = EINTR; 1129 if (error == EWOULDBLOCK) 1130 error = 0; 1131 if (error == 0) { 1132 error = pollout(bits, uap->fds, nfds); 1133 if (error) 1134 goto out; 1135 } 1136 out: 1137 if (ni > sizeof(smallbits)) 1138 free(bits, M_TEMP); 1139 return (error); 1140 } 1141 1142 static int 1143 pollrescan(struct thread *td) 1144 { 1145 struct seltd *stp; 1146 struct selfd *sfp; 1147 struct selfd *sfn; 1148 struct selinfo *si; 1149 struct filedesc *fdp; 1150 struct file *fp; 1151 struct pollfd *fd; 1152 int n; 1153 1154 n = 0; 1155 fdp = td->td_proc->p_fd; 1156 stp = td->td_sel; 1157 FILEDESC_SLOCK(fdp); 1158 STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) { 1159 fd = (struct pollfd *)sfp->sf_cookie; 1160 si = sfp->sf_si; 1161 selfdfree(stp, sfp); 1162 /* If the selinfo wasn't cleared the event didn't fire. */ 1163 if (si != NULL) 1164 continue; 1165 fp = fdp->fd_ofiles[fd->fd]; 1166 if (fp == NULL) { 1167 fd->revents = POLLNVAL; 1168 n++; 1169 continue; 1170 } 1171 /* 1172 * Note: backend also returns POLLHUP and 1173 * POLLERR if appropriate. 1174 */ 1175 fd->revents = fo_poll(fp, fd->events, td->td_ucred, td); 1176 if (fd->revents != 0) 1177 n++; 1178 } 1179 FILEDESC_SUNLOCK(fdp); 1180 stp->st_flags = 0; 1181 td->td_retval[0] = n; 1182 return (0); 1183 } 1184 1185 1186 static int 1187 pollout(fds, ufds, nfd) 1188 struct pollfd *fds; 1189 struct pollfd *ufds; 1190 u_int nfd; 1191 { 1192 int error = 0; 1193 u_int i = 0; 1194 1195 for (i = 0; i < nfd; i++) { 1196 error = copyout(&fds->revents, &ufds->revents, 1197 sizeof(ufds->revents)); 1198 if (error) 1199 return (error); 1200 fds++; 1201 ufds++; 1202 } 1203 return (0); 1204 } 1205 1206 static int 1207 pollscan(td, fds, nfd) 1208 struct thread *td; 1209 struct pollfd *fds; 1210 u_int nfd; 1211 { 1212 struct filedesc *fdp = td->td_proc->p_fd; 1213 int i; 1214 struct file *fp; 1215 int n = 0; 1216 1217 FILEDESC_SLOCK(fdp); 1218 for (i = 0; i < nfd; i++, fds++) { 1219 if (fds->fd >= fdp->fd_nfiles) { 1220 fds->revents = POLLNVAL; 1221 n++; 1222 } else if (fds->fd < 0) { 1223 fds->revents = 0; 1224 } else { 1225 fp = fdp->fd_ofiles[fds->fd]; 1226 if (fp == NULL) { 1227 fds->revents = POLLNVAL; 1228 n++; 1229 } else { 1230 /* 1231 * Note: backend also returns POLLHUP and 1232 * POLLERR if appropriate. 1233 */ 1234 selfdalloc(td, fds); 1235 fds->revents = fo_poll(fp, fds->events, 1236 td->td_ucred, td); 1237 if (fds->revents != 0) 1238 n++; 1239 } 1240 } 1241 } 1242 FILEDESC_SUNLOCK(fdp); 1243 td->td_retval[0] = n; 1244 return (0); 1245 } 1246 1247 /* 1248 * OpenBSD poll system call. 1249 * 1250 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 1251 */ 1252 #ifndef _SYS_SYSPROTO_H_ 1253 struct openbsd_poll_args { 1254 struct pollfd *fds; 1255 u_int nfds; 1256 int timeout; 1257 }; 1258 #endif 1259 int 1260 openbsd_poll(td, uap) 1261 register struct thread *td; 1262 register struct openbsd_poll_args *uap; 1263 { 1264 return (poll(td, (struct poll_args *)uap)); 1265 } 1266 1267 /* 1268 * XXX This was created specifically to support netncp and netsmb. This 1269 * allows the caller to specify a socket to wait for events on. It returns 1270 * 0 if any events matched and an error otherwise. There is no way to 1271 * determine which events fired. 1272 */ 1273 int 1274 selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td) 1275 { 1276 struct timeval atv, rtv, ttv; 1277 int error, timo; 1278 1279 if (tvp != NULL) { 1280 atv = *tvp; 1281 if (itimerfix(&atv)) 1282 return (EINVAL); 1283 getmicrouptime(&rtv); 1284 timevaladd(&atv, &rtv); 1285 } else { 1286 atv.tv_sec = 0; 1287 atv.tv_usec = 0; 1288 } 1289 1290 timo = 0; 1291 seltdinit(td); 1292 /* 1293 * Iterate until the timeout expires or the socket becomes ready. 1294 */ 1295 for (;;) { 1296 selfdalloc(td, NULL); 1297 error = sopoll(so, events, NULL, td); 1298 /* error here is actually the ready events. */ 1299 if (error) 1300 return (0); 1301 if (atv.tv_sec || atv.tv_usec) { 1302 getmicrouptime(&rtv); 1303 if (timevalcmp(&rtv, &atv, >=)) { 1304 seltdclear(td); 1305 return (EWOULDBLOCK); 1306 } 1307 ttv = atv; 1308 timevalsub(&ttv, &rtv); 1309 timo = ttv.tv_sec > 24 * 60 * 60 ? 1310 24 * 60 * 60 * hz : tvtohz(&ttv); 1311 } 1312 error = seltdwait(td, timo); 1313 seltdclear(td); 1314 if (error) 1315 break; 1316 } 1317 /* XXX Duplicates ncp/smb behavior. */ 1318 if (error == ERESTART) 1319 error = 0; 1320 return (error); 1321 } 1322 1323 /* 1324 * Preallocate two selfds associated with 'cookie'. Some fo_poll routines 1325 * have two select sets, one for read and another for write. 1326 */ 1327 static void 1328 selfdalloc(struct thread *td, void *cookie) 1329 { 1330 struct seltd *stp; 1331 1332 stp = td->td_sel; 1333 if (stp->st_free1 == NULL) 1334 stp->st_free1 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO); 1335 stp->st_free1->sf_td = stp; 1336 stp->st_free1->sf_cookie = cookie; 1337 if (stp->st_free2 == NULL) 1338 stp->st_free2 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO); 1339 stp->st_free2->sf_td = stp; 1340 stp->st_free2->sf_cookie = cookie; 1341 } 1342 1343 static void 1344 selfdfree(struct seltd *stp, struct selfd *sfp) 1345 { 1346 STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link); 1347 mtx_lock(sfp->sf_mtx); 1348 if (sfp->sf_si) 1349 TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads); 1350 mtx_unlock(sfp->sf_mtx); 1351 uma_zfree(selfd_zone, sfp); 1352 } 1353 1354 /* 1355 * Record a select request. 1356 */ 1357 void 1358 selrecord(selector, sip) 1359 struct thread *selector; 1360 struct selinfo *sip; 1361 { 1362 struct selfd *sfp; 1363 struct seltd *stp; 1364 struct mtx *mtxp; 1365 1366 stp = selector->td_sel; 1367 /* 1368 * Don't record when doing a rescan. 1369 */ 1370 if (stp->st_flags & SELTD_RESCAN) 1371 return; 1372 /* 1373 * Grab one of the preallocated descriptors. 1374 */ 1375 sfp = NULL; 1376 if ((sfp = stp->st_free1) != NULL) 1377 stp->st_free1 = NULL; 1378 else if ((sfp = stp->st_free2) != NULL) 1379 stp->st_free2 = NULL; 1380 else 1381 panic("selrecord: No free selfd on selq"); 1382 mtxp = mtx_pool_find(mtxpool_sleep, sip); 1383 /* 1384 * Initialize the sfp and queue it in the thread. 1385 */ 1386 sfp->sf_si = sip; 1387 sfp->sf_mtx = mtxp; 1388 STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link); 1389 /* 1390 * Now that we've locked the sip, check for initialization. 1391 */ 1392 mtx_lock(mtxp); 1393 if (sip->si_mtx == NULL) { 1394 sip->si_mtx = mtxp; 1395 TAILQ_INIT(&sip->si_tdlist); 1396 } 1397 /* 1398 * Add this thread to the list of selfds listening on this selinfo. 1399 */ 1400 TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads); 1401 mtx_unlock(sip->si_mtx); 1402 } 1403 1404 /* Wake up a selecting thread. */ 1405 void 1406 selwakeup(sip) 1407 struct selinfo *sip; 1408 { 1409 doselwakeup(sip, -1); 1410 } 1411 1412 /* Wake up a selecting thread, and set its priority. */ 1413 void 1414 selwakeuppri(sip, pri) 1415 struct selinfo *sip; 1416 int pri; 1417 { 1418 doselwakeup(sip, pri); 1419 } 1420 1421 /* 1422 * Do a wakeup when a selectable event occurs. 1423 */ 1424 static void 1425 doselwakeup(sip, pri) 1426 struct selinfo *sip; 1427 int pri; 1428 { 1429 struct selfd *sfp; 1430 struct selfd *sfn; 1431 struct seltd *stp; 1432 1433 /* If it's not initialized there can't be any waiters. */ 1434 if (sip->si_mtx == NULL) 1435 return; 1436 /* 1437 * Locking the selinfo locks all selfds associated with it. 1438 */ 1439 mtx_lock(sip->si_mtx); 1440 TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) { 1441 /* 1442 * Once we remove this sfp from the list and clear the 1443 * sf_si seltdclear will know to ignore this si. 1444 */ 1445 TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads); 1446 sfp->sf_si = NULL; 1447 stp = sfp->sf_td; 1448 mtx_lock(&stp->st_mtx); 1449 stp->st_flags |= SELTD_PENDING; 1450 cv_broadcastpri(&stp->st_wait, pri); 1451 mtx_unlock(&stp->st_mtx); 1452 } 1453 mtx_unlock(sip->si_mtx); 1454 } 1455 1456 static void 1457 seltdinit(struct thread *td) 1458 { 1459 struct seltd *stp; 1460 1461 if ((stp = td->td_sel) != NULL) 1462 goto out; 1463 td->td_sel = stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO); 1464 mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF); 1465 cv_init(&stp->st_wait, "select"); 1466 out: 1467 stp->st_flags = 0; 1468 STAILQ_INIT(&stp->st_selq); 1469 } 1470 1471 static int 1472 seltdwait(struct thread *td, int timo) 1473 { 1474 struct seltd *stp; 1475 int error; 1476 1477 stp = td->td_sel; 1478 /* 1479 * An event of interest may occur while we do not hold the seltd 1480 * locked so check the pending flag before we sleep. 1481 */ 1482 mtx_lock(&stp->st_mtx); 1483 /* 1484 * Any further calls to selrecord will be a rescan. 1485 */ 1486 stp->st_flags |= SELTD_RESCAN; 1487 if (stp->st_flags & SELTD_PENDING) { 1488 mtx_unlock(&stp->st_mtx); 1489 return (0); 1490 } 1491 if (timo > 0) 1492 error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo); 1493 else 1494 error = cv_wait_sig(&stp->st_wait, &stp->st_mtx); 1495 mtx_unlock(&stp->st_mtx); 1496 1497 return (error); 1498 } 1499 1500 void 1501 seltdfini(struct thread *td) 1502 { 1503 struct seltd *stp; 1504 1505 stp = td->td_sel; 1506 if (stp == NULL) 1507 return; 1508 if (stp->st_free1) 1509 uma_zfree(selfd_zone, stp->st_free1); 1510 if (stp->st_free2) 1511 uma_zfree(selfd_zone, stp->st_free2); 1512 td->td_sel = NULL; 1513 free(stp, M_SELECT); 1514 } 1515 1516 /* 1517 * Remove the references to the thread from all of the objects we were 1518 * polling. 1519 */ 1520 static void 1521 seltdclear(struct thread *td) 1522 { 1523 struct seltd *stp; 1524 struct selfd *sfp; 1525 struct selfd *sfn; 1526 1527 stp = td->td_sel; 1528 STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) 1529 selfdfree(stp, sfp); 1530 stp->st_flags = 0; 1531 } 1532 1533 static void selectinit(void *); 1534 SYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL); 1535 static void 1536 selectinit(void *dummy __unused) 1537 { 1538 selfd_zone = uma_zcreate("selfd", sizeof(struct selfd), NULL, NULL, 1539 NULL, NULL, UMA_ALIGN_PTR, 0); 1540 } 1541