1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD$ 40 */ 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/filio.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/signalvar.h> 53 #include <sys/socketvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #include <sys/resourcevar.h> 59 #include <sys/selinfo.h> 60 #include <sys/sysctl.h> 61 #include <sys/sysent.h> 62 #include <sys/bio.h> 63 #include <sys/buf.h> 64 #ifdef KTRACE 65 #include <sys/ktrace.h> 66 #endif 67 #include <vm/vm.h> 68 #include <vm/vm_page.h> 69 70 #include <machine/limits.h> 71 72 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 73 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 74 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 75 76 static int pollscan __P((struct proc *, struct pollfd *, u_int)); 77 static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 78 static int dofileread __P((struct proc *, struct file *, int, void *, 79 size_t, off_t, int)); 80 static int dofilewrite __P((struct proc *, struct file *, int, 81 const void *, size_t, off_t, int)); 82 83 struct file* 84 holdfp(fdp, fd, flag) 85 struct filedesc* fdp; 86 int fd, flag; 87 { 88 struct file* fp; 89 90 if (((u_int)fd) >= fdp->fd_nfiles || 91 (fp = fdp->fd_ofiles[fd]) == NULL || 92 (fp->f_flag & flag) == 0) { 93 return (NULL); 94 } 95 fhold(fp); 96 return (fp); 97 } 98 99 /* 100 * Read system call. 101 */ 102 #ifndef _SYS_SYSPROTO_H_ 103 struct read_args { 104 int fd; 105 void *buf; 106 size_t nbyte; 107 }; 108 #endif 109 int 110 read(p, uap) 111 struct proc *p; 112 register struct read_args *uap; 113 { 114 register struct file *fp; 115 int error; 116 117 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 118 return (EBADF); 119 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 120 fdrop(fp, p); 121 return(error); 122 } 123 124 /* 125 * Pread system call 126 */ 127 #ifndef _SYS_SYSPROTO_H_ 128 struct pread_args { 129 int fd; 130 void *buf; 131 size_t nbyte; 132 int pad; 133 off_t offset; 134 }; 135 #endif 136 int 137 pread(p, uap) 138 struct proc *p; 139 register struct pread_args *uap; 140 { 141 register struct file *fp; 142 int error; 143 144 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 145 return (EBADF); 146 if (fp->f_type != DTYPE_VNODE) { 147 error = ESPIPE; 148 } else { 149 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, 150 uap->offset, FOF_OFFSET); 151 } 152 fdrop(fp, p); 153 return(error); 154 } 155 156 /* 157 * Code common for read and pread 158 */ 159 int 160 dofileread(p, fp, fd, buf, nbyte, offset, flags) 161 struct proc *p; 162 struct file *fp; 163 int fd, flags; 164 void *buf; 165 size_t nbyte; 166 off_t offset; 167 { 168 struct uio auio; 169 struct iovec aiov; 170 long cnt, error = 0; 171 #ifdef KTRACE 172 struct iovec ktriov; 173 struct uio ktruio; 174 int didktr = 0; 175 #endif 176 177 aiov.iov_base = (caddr_t)buf; 178 aiov.iov_len = nbyte; 179 auio.uio_iov = &aiov; 180 auio.uio_iovcnt = 1; 181 auio.uio_offset = offset; 182 if (nbyte > INT_MAX) 183 return (EINVAL); 184 auio.uio_resid = nbyte; 185 auio.uio_rw = UIO_READ; 186 auio.uio_segflg = UIO_USERSPACE; 187 auio.uio_procp = p; 188 #ifdef KTRACE 189 /* 190 * if tracing, save a copy of iovec 191 */ 192 if (KTRPOINT(p, KTR_GENIO)) { 193 ktriov = aiov; 194 ktruio = auio; 195 didktr = 1; 196 } 197 #endif 198 cnt = nbyte; 199 200 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) { 201 if (auio.uio_resid != cnt && (error == ERESTART || 202 error == EINTR || error == EWOULDBLOCK)) 203 error = 0; 204 } 205 cnt -= auio.uio_resid; 206 #ifdef KTRACE 207 if (didktr && error == 0) { 208 ktruio.uio_iov = &ktriov; 209 ktruio.uio_resid = cnt; 210 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error); 211 } 212 #endif 213 p->p_retval[0] = cnt; 214 return (error); 215 } 216 217 /* 218 * Scatter read system call. 219 */ 220 #ifndef _SYS_SYSPROTO_H_ 221 struct readv_args { 222 int fd; 223 struct iovec *iovp; 224 u_int iovcnt; 225 }; 226 #endif 227 int 228 readv(p, uap) 229 struct proc *p; 230 register struct readv_args *uap; 231 { 232 register struct file *fp; 233 register struct filedesc *fdp = p->p_fd; 234 struct uio auio; 235 register struct iovec *iov; 236 struct iovec *needfree; 237 struct iovec aiov[UIO_SMALLIOV]; 238 long i, cnt, error = 0; 239 u_int iovlen; 240 #ifdef KTRACE 241 struct iovec *ktriov = NULL; 242 struct uio ktruio; 243 #endif 244 245 if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL) 246 return (EBADF); 247 /* note: can't use iovlen until iovcnt is validated */ 248 iovlen = uap->iovcnt * sizeof (struct iovec); 249 if (uap->iovcnt > UIO_SMALLIOV) { 250 if (uap->iovcnt > UIO_MAXIOV) 251 return (EINVAL); 252 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 253 needfree = iov; 254 } else { 255 iov = aiov; 256 needfree = NULL; 257 } 258 auio.uio_iov = iov; 259 auio.uio_iovcnt = uap->iovcnt; 260 auio.uio_rw = UIO_READ; 261 auio.uio_segflg = UIO_USERSPACE; 262 auio.uio_procp = p; 263 auio.uio_offset = -1; 264 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 265 goto done; 266 auio.uio_resid = 0; 267 for (i = 0; i < uap->iovcnt; i++) { 268 if (iov->iov_len > INT_MAX - auio.uio_resid) { 269 error = EINVAL; 270 goto done; 271 } 272 auio.uio_resid += iov->iov_len; 273 iov++; 274 } 275 #ifdef KTRACE 276 /* 277 * if tracing, save a copy of iovec 278 */ 279 if (KTRPOINT(p, KTR_GENIO)) { 280 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 281 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 282 ktruio = auio; 283 } 284 #endif 285 cnt = auio.uio_resid; 286 if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) { 287 if (auio.uio_resid != cnt && (error == ERESTART || 288 error == EINTR || error == EWOULDBLOCK)) 289 error = 0; 290 } 291 cnt -= auio.uio_resid; 292 #ifdef KTRACE 293 if (ktriov != NULL) { 294 if (error == 0) { 295 ktruio.uio_iov = ktriov; 296 ktruio.uio_resid = cnt; 297 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio, 298 error); 299 } 300 FREE(ktriov, M_TEMP); 301 } 302 #endif 303 p->p_retval[0] = cnt; 304 done: 305 fdrop(fp, p); 306 if (needfree) 307 FREE(needfree, M_IOV); 308 return (error); 309 } 310 311 /* 312 * Write system call 313 */ 314 #ifndef _SYS_SYSPROTO_H_ 315 struct write_args { 316 int fd; 317 const void *buf; 318 size_t nbyte; 319 }; 320 #endif 321 int 322 write(p, uap) 323 struct proc *p; 324 register struct write_args *uap; 325 { 326 register struct file *fp; 327 int error; 328 329 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 330 return (EBADF); 331 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 332 fdrop(fp, p); 333 return(error); 334 } 335 336 /* 337 * Pwrite system call 338 */ 339 #ifndef _SYS_SYSPROTO_H_ 340 struct pwrite_args { 341 int fd; 342 const void *buf; 343 size_t nbyte; 344 int pad; 345 off_t offset; 346 }; 347 #endif 348 int 349 pwrite(p, uap) 350 struct proc *p; 351 register struct pwrite_args *uap; 352 { 353 register struct file *fp; 354 int error; 355 356 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 357 return (EBADF); 358 if (fp->f_type != DTYPE_VNODE) { 359 error = ESPIPE; 360 } else { 361 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, 362 uap->offset, FOF_OFFSET); 363 } 364 fdrop(fp, p); 365 return(error); 366 } 367 368 static int 369 dofilewrite(p, fp, fd, buf, nbyte, offset, flags) 370 struct proc *p; 371 struct file *fp; 372 int fd, flags; 373 const void *buf; 374 size_t nbyte; 375 off_t offset; 376 { 377 struct uio auio; 378 struct iovec aiov; 379 long cnt, error = 0; 380 #ifdef KTRACE 381 struct iovec ktriov; 382 struct uio ktruio; 383 int didktr = 0; 384 #endif 385 386 aiov.iov_base = (void *)(uintptr_t)buf; 387 aiov.iov_len = nbyte; 388 auio.uio_iov = &aiov; 389 auio.uio_iovcnt = 1; 390 auio.uio_offset = offset; 391 if (nbyte > INT_MAX) 392 return (EINVAL); 393 auio.uio_resid = nbyte; 394 auio.uio_rw = UIO_WRITE; 395 auio.uio_segflg = UIO_USERSPACE; 396 auio.uio_procp = p; 397 #ifdef KTRACE 398 /* 399 * if tracing, save a copy of iovec and uio 400 */ 401 if (KTRPOINT(p, KTR_GENIO)) { 402 ktriov = aiov; 403 ktruio = auio; 404 didktr = 1; 405 } 406 #endif 407 cnt = nbyte; 408 if (fp->f_type == DTYPE_VNODE) 409 bwillwrite(); 410 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { 411 if (auio.uio_resid != cnt && (error == ERESTART || 412 error == EINTR || error == EWOULDBLOCK)) 413 error = 0; 414 if (error == EPIPE) { 415 PROC_LOCK(p); 416 psignal(p, SIGPIPE); 417 PROC_UNLOCK(p); 418 } 419 } 420 cnt -= auio.uio_resid; 421 #ifdef KTRACE 422 if (didktr && error == 0) { 423 ktruio.uio_iov = &ktriov; 424 ktruio.uio_resid = cnt; 425 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error); 426 } 427 #endif 428 p->p_retval[0] = cnt; 429 return (error); 430 } 431 432 /* 433 * Gather write system call 434 */ 435 #ifndef _SYS_SYSPROTO_H_ 436 struct writev_args { 437 int fd; 438 struct iovec *iovp; 439 u_int iovcnt; 440 }; 441 #endif 442 int 443 writev(p, uap) 444 struct proc *p; 445 register struct writev_args *uap; 446 { 447 register struct file *fp; 448 register struct filedesc *fdp = p->p_fd; 449 struct uio auio; 450 register struct iovec *iov; 451 struct iovec *needfree; 452 struct iovec aiov[UIO_SMALLIOV]; 453 long i, cnt, error = 0; 454 u_int iovlen; 455 #ifdef KTRACE 456 struct iovec *ktriov = NULL; 457 struct uio ktruio; 458 #endif 459 460 if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL) 461 return (EBADF); 462 /* note: can't use iovlen until iovcnt is validated */ 463 iovlen = uap->iovcnt * sizeof (struct iovec); 464 if (uap->iovcnt > UIO_SMALLIOV) { 465 if (uap->iovcnt > UIO_MAXIOV) { 466 needfree = NULL; 467 error = EINVAL; 468 goto done; 469 } 470 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 471 needfree = iov; 472 } else { 473 iov = aiov; 474 needfree = NULL; 475 } 476 auio.uio_iov = iov; 477 auio.uio_iovcnt = uap->iovcnt; 478 auio.uio_rw = UIO_WRITE; 479 auio.uio_segflg = UIO_USERSPACE; 480 auio.uio_procp = p; 481 auio.uio_offset = -1; 482 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 483 goto done; 484 auio.uio_resid = 0; 485 for (i = 0; i < uap->iovcnt; i++) { 486 if (iov->iov_len > INT_MAX - auio.uio_resid) { 487 error = EINVAL; 488 goto done; 489 } 490 auio.uio_resid += iov->iov_len; 491 iov++; 492 } 493 #ifdef KTRACE 494 /* 495 * if tracing, save a copy of iovec and uio 496 */ 497 if (KTRPOINT(p, KTR_GENIO)) { 498 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 499 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 500 ktruio = auio; 501 } 502 #endif 503 cnt = auio.uio_resid; 504 if (fp->f_type == DTYPE_VNODE) 505 bwillwrite(); 506 if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) { 507 if (auio.uio_resid != cnt && (error == ERESTART || 508 error == EINTR || error == EWOULDBLOCK)) 509 error = 0; 510 if (error == EPIPE) { 511 PROC_LOCK(p); 512 psignal(p, SIGPIPE); 513 PROC_UNLOCK(p); 514 } 515 } 516 cnt -= auio.uio_resid; 517 #ifdef KTRACE 518 if (ktriov != NULL) { 519 if (error == 0) { 520 ktruio.uio_iov = ktriov; 521 ktruio.uio_resid = cnt; 522 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio, 523 error); 524 } 525 FREE(ktriov, M_TEMP); 526 } 527 #endif 528 p->p_retval[0] = cnt; 529 done: 530 fdrop(fp, p); 531 if (needfree) 532 FREE(needfree, M_IOV); 533 return (error); 534 } 535 536 /* 537 * Ioctl system call 538 */ 539 #ifndef _SYS_SYSPROTO_H_ 540 struct ioctl_args { 541 int fd; 542 u_long com; 543 caddr_t data; 544 }; 545 #endif 546 /* ARGSUSED */ 547 int 548 ioctl(p, uap) 549 struct proc *p; 550 register struct ioctl_args *uap; 551 { 552 register struct file *fp; 553 register struct filedesc *fdp; 554 register u_long com; 555 int error; 556 register u_int size; 557 caddr_t data, memp; 558 int tmp; 559 #define STK_PARAMS 128 560 union { 561 char stkbuf[STK_PARAMS]; 562 long align; 563 } ubuf; 564 565 fdp = p->p_fd; 566 if ((u_int)uap->fd >= fdp->fd_nfiles || 567 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 568 return (EBADF); 569 570 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 571 return (EBADF); 572 573 switch (com = uap->com) { 574 case FIONCLEX: 575 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 576 return (0); 577 case FIOCLEX: 578 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 579 return (0); 580 } 581 582 /* 583 * Interpret high order word to find amount of data to be 584 * copied to/from the user's address space. 585 */ 586 size = IOCPARM_LEN(com); 587 if (size > IOCPARM_MAX) 588 return (ENOTTY); 589 590 fhold(fp); 591 592 memp = NULL; 593 if (size > sizeof (ubuf.stkbuf)) { 594 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 595 data = memp; 596 } else { 597 data = ubuf.stkbuf; 598 } 599 if (com&IOC_IN) { 600 if (size) { 601 error = copyin(uap->data, data, (u_int)size); 602 if (error) { 603 if (memp) 604 free(memp, M_IOCTLOPS); 605 fdrop(fp, p); 606 return (error); 607 } 608 } else { 609 *(caddr_t *)data = uap->data; 610 } 611 } else if ((com&IOC_OUT) && size) { 612 /* 613 * Zero the buffer so the user always 614 * gets back something deterministic. 615 */ 616 bzero(data, size); 617 } else if (com&IOC_VOID) { 618 *(caddr_t *)data = uap->data; 619 } 620 621 switch (com) { 622 623 case FIONBIO: 624 if ((tmp = *(int *)data)) 625 fp->f_flag |= FNONBLOCK; 626 else 627 fp->f_flag &= ~FNONBLOCK; 628 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); 629 break; 630 631 case FIOASYNC: 632 if ((tmp = *(int *)data)) 633 fp->f_flag |= FASYNC; 634 else 635 fp->f_flag &= ~FASYNC; 636 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); 637 break; 638 639 default: 640 error = fo_ioctl(fp, com, data, p); 641 /* 642 * Copy any data to user, size was 643 * already set and checked above. 644 */ 645 if (error == 0 && (com&IOC_OUT) && size) 646 error = copyout(data, uap->data, (u_int)size); 647 break; 648 } 649 if (memp) 650 free(memp, M_IOCTLOPS); 651 fdrop(fp, p); 652 return (error); 653 } 654 655 static int nselcoll; /* Select collisions since boot */ 656 int selwait; 657 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 658 659 /* 660 * Select system call. 661 */ 662 #ifndef _SYS_SYSPROTO_H_ 663 struct select_args { 664 int nd; 665 fd_set *in, *ou, *ex; 666 struct timeval *tv; 667 }; 668 #endif 669 int 670 select(p, uap) 671 register struct proc *p; 672 register struct select_args *uap; 673 { 674 /* 675 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 676 * infds with the new FD_SETSIZE of 1024, and more than enough for 677 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 678 * of 256. 679 */ 680 fd_mask s_selbits[howmany(2048, NFDBITS)]; 681 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 682 struct timeval atv, rtv, ttv; 683 int s, ncoll, error, timo; 684 u_int nbufbytes, ncpbytes, nfdbits; 685 686 if (uap->nd < 0) 687 return (EINVAL); 688 if (uap->nd > p->p_fd->fd_nfiles) 689 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 690 691 /* 692 * Allocate just enough bits for the non-null fd_sets. Use the 693 * preallocated auto buffer if possible. 694 */ 695 nfdbits = roundup(uap->nd, NFDBITS); 696 ncpbytes = nfdbits / NBBY; 697 nbufbytes = 0; 698 if (uap->in != NULL) 699 nbufbytes += 2 * ncpbytes; 700 if (uap->ou != NULL) 701 nbufbytes += 2 * ncpbytes; 702 if (uap->ex != NULL) 703 nbufbytes += 2 * ncpbytes; 704 if (nbufbytes <= sizeof s_selbits) 705 selbits = &s_selbits[0]; 706 else 707 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 708 709 /* 710 * Assign pointers into the bit buffers and fetch the input bits. 711 * Put the output buffers together so that they can be bzeroed 712 * together. 713 */ 714 sbp = selbits; 715 #define getbits(name, x) \ 716 do { \ 717 if (uap->name == NULL) \ 718 ibits[x] = NULL; \ 719 else { \ 720 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 721 obits[x] = sbp; \ 722 sbp += ncpbytes / sizeof *sbp; \ 723 error = copyin(uap->name, ibits[x], ncpbytes); \ 724 if (error != 0) { \ 725 PROC_LOCK(p); \ 726 goto done; \ 727 } \ 728 } \ 729 } while (0) 730 getbits(in, 0); 731 getbits(ou, 1); 732 getbits(ex, 2); 733 #undef getbits 734 if (nbufbytes != 0) 735 bzero(selbits, nbufbytes / 2); 736 737 if (uap->tv) { 738 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 739 sizeof (atv)); 740 if (error) { 741 PROC_LOCK(p); 742 goto done; 743 } 744 if (itimerfix(&atv)) { 745 error = EINVAL; 746 PROC_LOCK(p); 747 goto done; 748 } 749 getmicrouptime(&rtv); 750 timevaladd(&atv, &rtv); 751 } else { 752 atv.tv_sec = 0; 753 atv.tv_usec = 0; 754 } 755 timo = 0; 756 PROC_LOCK(p); 757 retry: 758 ncoll = nselcoll; 759 p->p_flag |= P_SELECT; 760 PROC_UNLOCK(p); 761 error = selscan(p, ibits, obits, uap->nd); 762 PROC_LOCK(p); 763 if (error || p->p_retval[0]) 764 goto done; 765 if (atv.tv_sec || atv.tv_usec) { 766 getmicrouptime(&rtv); 767 if (timevalcmp(&rtv, &atv, >=)) 768 goto done; 769 ttv = atv; 770 timevalsub(&ttv, &rtv); 771 timo = ttv.tv_sec > 24 * 60 * 60 ? 772 24 * 60 * 60 * hz : tvtohz(&ttv); 773 } 774 s = splhigh(); 775 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 776 splx(s); 777 goto retry; 778 } 779 p->p_flag &= ~P_SELECT; 780 781 error = msleep((caddr_t)&selwait, &p->p_mtx, PSOCK | PCATCH, "select", 782 timo); 783 784 splx(s); 785 if (error == 0) 786 goto retry; 787 done: 788 p->p_flag &= ~P_SELECT; 789 PROC_UNLOCK(p); 790 /* select is not restarted after signals... */ 791 if (error == ERESTART) 792 error = EINTR; 793 if (error == EWOULDBLOCK) 794 error = 0; 795 #define putbits(name, x) \ 796 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 797 error = error2; 798 if (error == 0) { 799 int error2; 800 801 putbits(in, 0); 802 putbits(ou, 1); 803 putbits(ex, 2); 804 #undef putbits 805 } 806 if (selbits != &s_selbits[0]) 807 free(selbits, M_SELECT); 808 return (error); 809 } 810 811 static int 812 selscan(p, ibits, obits, nfd) 813 struct proc *p; 814 fd_mask **ibits, **obits; 815 int nfd; 816 { 817 struct filedesc *fdp = p->p_fd; 818 int msk, i, fd; 819 fd_mask bits; 820 struct file *fp; 821 int n = 0; 822 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 823 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 824 825 for (msk = 0; msk < 3; msk++) { 826 if (ibits[msk] == NULL) 827 continue; 828 for (i = 0; i < nfd; i += NFDBITS) { 829 bits = ibits[msk][i/NFDBITS]; 830 /* ffs(int mask) not portable, fd_mask is long */ 831 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 832 if (!(bits & 1)) 833 continue; 834 fp = fdp->fd_ofiles[fd]; 835 if (fp == NULL) 836 return (EBADF); 837 if (fo_poll(fp, flag[msk], fp->f_cred, p)) { 838 obits[msk][(fd)/NFDBITS] |= 839 ((fd_mask)1 << ((fd) % NFDBITS)); 840 n++; 841 } 842 } 843 } 844 } 845 p->p_retval[0] = n; 846 return (0); 847 } 848 849 /* 850 * Poll system call. 851 */ 852 #ifndef _SYS_SYSPROTO_H_ 853 struct poll_args { 854 struct pollfd *fds; 855 u_int nfds; 856 int timeout; 857 }; 858 #endif 859 int 860 poll(p, uap) 861 struct proc *p; 862 struct poll_args *uap; 863 { 864 caddr_t bits; 865 char smallbits[32 * sizeof(struct pollfd)]; 866 struct timeval atv, rtv, ttv; 867 int s, ncoll, error = 0, timo; 868 u_int nfds; 869 size_t ni; 870 871 nfds = SCARG(uap, nfds); 872 /* 873 * This is kinda bogus. We have fd limits, but that is not 874 * really related to the size of the pollfd array. Make sure 875 * we let the process use at least FD_SETSIZE entries and at 876 * least enough for the current limits. We want to be reasonably 877 * safe, but not overly restrictive. 878 */ 879 if (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE) 880 return (EINVAL); 881 ni = nfds * sizeof(struct pollfd); 882 if (ni > sizeof(smallbits)) 883 bits = malloc(ni, M_TEMP, M_WAITOK); 884 else 885 bits = smallbits; 886 error = copyin(SCARG(uap, fds), bits, ni); 887 PROC_LOCK(p); 888 if (error) 889 goto done; 890 if (SCARG(uap, timeout) != INFTIM) { 891 atv.tv_sec = SCARG(uap, timeout) / 1000; 892 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 893 if (itimerfix(&atv)) { 894 error = EINVAL; 895 goto done; 896 } 897 getmicrouptime(&rtv); 898 timevaladd(&atv, &rtv); 899 } else { 900 atv.tv_sec = 0; 901 atv.tv_usec = 0; 902 } 903 timo = 0; 904 retry: 905 ncoll = nselcoll; 906 p->p_flag |= P_SELECT; 907 PROC_UNLOCK(p); 908 error = pollscan(p, (struct pollfd *)bits, nfds); 909 PROC_LOCK(p); 910 if (error || p->p_retval[0]) 911 goto done; 912 if (atv.tv_sec || atv.tv_usec) { 913 getmicrouptime(&rtv); 914 if (timevalcmp(&rtv, &atv, >=)) 915 goto done; 916 ttv = atv; 917 timevalsub(&ttv, &rtv); 918 timo = ttv.tv_sec > 24 * 60 * 60 ? 919 24 * 60 * 60 * hz : tvtohz(&ttv); 920 } 921 s = splhigh(); 922 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 923 splx(s); 924 goto retry; 925 } 926 p->p_flag &= ~P_SELECT; 927 error = msleep((caddr_t)&selwait, &p->p_mtx, PSOCK | PCATCH, "poll", 928 timo); 929 splx(s); 930 if (error == 0) 931 goto retry; 932 done: 933 p->p_flag &= ~P_SELECT; 934 PROC_UNLOCK(p); 935 /* poll is not restarted after signals... */ 936 if (error == ERESTART) 937 error = EINTR; 938 if (error == EWOULDBLOCK) 939 error = 0; 940 if (error == 0) { 941 error = copyout(bits, SCARG(uap, fds), ni); 942 if (error) 943 goto out; 944 } 945 out: 946 if (ni > sizeof(smallbits)) 947 free(bits, M_TEMP); 948 return (error); 949 } 950 951 static int 952 pollscan(p, fds, nfd) 953 struct proc *p; 954 struct pollfd *fds; 955 u_int nfd; 956 { 957 register struct filedesc *fdp = p->p_fd; 958 int i; 959 struct file *fp; 960 int n = 0; 961 962 for (i = 0; i < nfd; i++, fds++) { 963 if (fds->fd >= fdp->fd_nfiles) { 964 fds->revents = POLLNVAL; 965 n++; 966 } else if (fds->fd < 0) { 967 fds->revents = 0; 968 } else { 969 fp = fdp->fd_ofiles[fds->fd]; 970 if (fp == NULL) { 971 fds->revents = POLLNVAL; 972 n++; 973 } else { 974 /* 975 * Note: backend also returns POLLHUP and 976 * POLLERR if appropriate. 977 */ 978 fds->revents = fo_poll(fp, fds->events, 979 fp->f_cred, p); 980 if (fds->revents != 0) 981 n++; 982 } 983 } 984 } 985 p->p_retval[0] = n; 986 return (0); 987 } 988 989 /* 990 * OpenBSD poll system call. 991 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 992 */ 993 #ifndef _SYS_SYSPROTO_H_ 994 struct openbsd_poll_args { 995 struct pollfd *fds; 996 u_int nfds; 997 int timeout; 998 }; 999 #endif 1000 int 1001 openbsd_poll(p, uap) 1002 register struct proc *p; 1003 register struct openbsd_poll_args *uap; 1004 { 1005 return (poll(p, (struct poll_args *)uap)); 1006 } 1007 1008 /*ARGSUSED*/ 1009 int 1010 seltrue(dev, events, p) 1011 dev_t dev; 1012 int events; 1013 struct proc *p; 1014 { 1015 1016 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1017 } 1018 1019 /* 1020 * Record a select request. 1021 */ 1022 void 1023 selrecord(selector, sip) 1024 struct proc *selector; 1025 struct selinfo *sip; 1026 { 1027 struct proc *p; 1028 pid_t mypid; 1029 1030 mypid = selector->p_pid; 1031 if (sip->si_pid == mypid) 1032 return; 1033 if (sip->si_pid && (p = pfind(sip->si_pid))) { 1034 mtx_lock_spin(&sched_lock); 1035 if (p->p_wchan == (caddr_t)&selwait) { 1036 mtx_unlock_spin(&sched_lock); 1037 PROC_UNLOCK(p); 1038 sip->si_flags |= SI_COLL; 1039 return; 1040 } 1041 mtx_unlock_spin(&sched_lock); 1042 PROC_UNLOCK(p); 1043 } 1044 sip->si_pid = mypid; 1045 } 1046 1047 /* 1048 * Do a wakeup when a selectable event occurs. 1049 */ 1050 void 1051 selwakeup(sip) 1052 register struct selinfo *sip; 1053 { 1054 register struct proc *p; 1055 1056 if (sip->si_pid == 0) 1057 return; 1058 if (sip->si_flags & SI_COLL) { 1059 nselcoll++; 1060 sip->si_flags &= ~SI_COLL; 1061 wakeup((caddr_t)&selwait); 1062 } 1063 p = pfind(sip->si_pid); 1064 sip->si_pid = 0; 1065 if (p != NULL) { 1066 mtx_lock_spin(&sched_lock); 1067 if (p->p_wchan == (caddr_t)&selwait) { 1068 if (p->p_stat == SSLEEP) 1069 setrunnable(p); 1070 else 1071 unsleep(p); 1072 } else 1073 p->p_flag &= ~P_SELECT; 1074 mtx_unlock_spin(&sched_lock); 1075 PROC_UNLOCK(p); 1076 } 1077 } 1078