1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD$ 40 */ 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/filio.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/signalvar.h> 53 #include <sys/socketvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #include <sys/sysctl.h> 59 #include <sys/sysent.h> 60 #include <sys/bio.h> 61 #include <sys/buf.h> 62 #ifdef KTRACE 63 #include <sys/ktrace.h> 64 #endif 65 #include <vm/vm.h> 66 #include <vm/vm_page.h> 67 68 #include <machine/limits.h> 69 70 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 71 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 72 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 73 74 static int pollscan __P((struct proc *, struct pollfd *, int)); 75 static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 76 static int dofileread __P((struct proc *, struct file *, int, void *, 77 size_t, off_t, int)); 78 static int dofilewrite __P((struct proc *, struct file *, int, 79 const void *, size_t, off_t, int)); 80 81 struct file* 82 holdfp(fdp, fd, flag) 83 struct filedesc* fdp; 84 int fd, flag; 85 { 86 struct file* fp; 87 88 if (((u_int)fd) >= fdp->fd_nfiles || 89 (fp = fdp->fd_ofiles[fd]) == NULL || 90 (fp->f_flag & flag) == 0) { 91 return (NULL); 92 } 93 fhold(fp); 94 return (fp); 95 } 96 97 /* 98 * Read system call. 99 */ 100 #ifndef _SYS_SYSPROTO_H_ 101 struct read_args { 102 int fd; 103 void *buf; 104 size_t nbyte; 105 }; 106 #endif 107 int 108 read(p, uap) 109 struct proc *p; 110 register struct read_args *uap; 111 { 112 register struct file *fp; 113 int error; 114 115 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 116 return (EBADF); 117 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 118 fdrop(fp, p); 119 return(error); 120 } 121 122 /* 123 * Pread system call 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct pread_args { 127 int fd; 128 void *buf; 129 size_t nbyte; 130 int pad; 131 off_t offset; 132 }; 133 #endif 134 int 135 pread(p, uap) 136 struct proc *p; 137 register struct pread_args *uap; 138 { 139 register struct file *fp; 140 int error; 141 142 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 143 return (EBADF); 144 if (fp->f_type != DTYPE_VNODE) { 145 error = ESPIPE; 146 } else { 147 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, 148 uap->offset, FOF_OFFSET); 149 } 150 fdrop(fp, p); 151 return(error); 152 } 153 154 /* 155 * Code common for read and pread 156 */ 157 int 158 dofileread(p, fp, fd, buf, nbyte, offset, flags) 159 struct proc *p; 160 struct file *fp; 161 int fd, flags; 162 void *buf; 163 size_t nbyte; 164 off_t offset; 165 { 166 struct uio auio; 167 struct iovec aiov; 168 long cnt, error = 0; 169 #ifdef KTRACE 170 struct iovec ktriov; 171 struct uio ktruio; 172 int didktr = 0; 173 #endif 174 175 aiov.iov_base = (caddr_t)buf; 176 aiov.iov_len = nbyte; 177 auio.uio_iov = &aiov; 178 auio.uio_iovcnt = 1; 179 auio.uio_offset = offset; 180 if (nbyte > INT_MAX) 181 return (EINVAL); 182 auio.uio_resid = nbyte; 183 auio.uio_rw = UIO_READ; 184 auio.uio_segflg = UIO_USERSPACE; 185 auio.uio_procp = p; 186 #ifdef KTRACE 187 /* 188 * if tracing, save a copy of iovec 189 */ 190 if (KTRPOINT(p, KTR_GENIO)) { 191 ktriov = aiov; 192 ktruio = auio; 193 didktr = 1; 194 } 195 #endif 196 cnt = nbyte; 197 198 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) { 199 if (auio.uio_resid != cnt && (error == ERESTART || 200 error == EINTR || error == EWOULDBLOCK)) 201 error = 0; 202 } 203 cnt -= auio.uio_resid; 204 #ifdef KTRACE 205 if (didktr && error == 0) { 206 ktruio.uio_iov = &ktriov; 207 ktruio.uio_resid = cnt; 208 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error); 209 } 210 #endif 211 p->p_retval[0] = cnt; 212 return (error); 213 } 214 215 /* 216 * Scatter read system call. 217 */ 218 #ifndef _SYS_SYSPROTO_H_ 219 struct readv_args { 220 int fd; 221 struct iovec *iovp; 222 u_int iovcnt; 223 }; 224 #endif 225 int 226 readv(p, uap) 227 struct proc *p; 228 register struct readv_args *uap; 229 { 230 register struct file *fp; 231 register struct filedesc *fdp = p->p_fd; 232 struct uio auio; 233 register struct iovec *iov; 234 struct iovec *needfree; 235 struct iovec aiov[UIO_SMALLIOV]; 236 long i, cnt, error = 0; 237 u_int iovlen; 238 #ifdef KTRACE 239 struct iovec *ktriov = NULL; 240 struct uio ktruio; 241 #endif 242 243 if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL) 244 return (EBADF); 245 /* note: can't use iovlen until iovcnt is validated */ 246 iovlen = uap->iovcnt * sizeof (struct iovec); 247 if (uap->iovcnt > UIO_SMALLIOV) { 248 if (uap->iovcnt > UIO_MAXIOV) 249 return (EINVAL); 250 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 251 needfree = iov; 252 } else { 253 iov = aiov; 254 needfree = NULL; 255 } 256 auio.uio_iov = iov; 257 auio.uio_iovcnt = uap->iovcnt; 258 auio.uio_rw = UIO_READ; 259 auio.uio_segflg = UIO_USERSPACE; 260 auio.uio_procp = p; 261 auio.uio_offset = -1; 262 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 263 goto done; 264 auio.uio_resid = 0; 265 for (i = 0; i < uap->iovcnt; i++) { 266 if (iov->iov_len > INT_MAX - auio.uio_resid) { 267 error = EINVAL; 268 goto done; 269 } 270 auio.uio_resid += iov->iov_len; 271 iov++; 272 } 273 #ifdef KTRACE 274 /* 275 * if tracing, save a copy of iovec 276 */ 277 if (KTRPOINT(p, KTR_GENIO)) { 278 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 279 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 280 ktruio = auio; 281 } 282 #endif 283 cnt = auio.uio_resid; 284 if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) { 285 if (auio.uio_resid != cnt && (error == ERESTART || 286 error == EINTR || error == EWOULDBLOCK)) 287 error = 0; 288 } 289 cnt -= auio.uio_resid; 290 #ifdef KTRACE 291 if (ktriov != NULL) { 292 if (error == 0) { 293 ktruio.uio_iov = ktriov; 294 ktruio.uio_resid = cnt; 295 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio, 296 error); 297 } 298 FREE(ktriov, M_TEMP); 299 } 300 #endif 301 p->p_retval[0] = cnt; 302 done: 303 fdrop(fp, p); 304 if (needfree) 305 FREE(needfree, M_IOV); 306 return (error); 307 } 308 309 /* 310 * Write system call 311 */ 312 #ifndef _SYS_SYSPROTO_H_ 313 struct write_args { 314 int fd; 315 const void *buf; 316 size_t nbyte; 317 }; 318 #endif 319 int 320 write(p, uap) 321 struct proc *p; 322 register struct write_args *uap; 323 { 324 register struct file *fp; 325 int error; 326 327 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 328 return (EBADF); 329 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 330 fdrop(fp, p); 331 return(error); 332 } 333 334 /* 335 * Pwrite system call 336 */ 337 #ifndef _SYS_SYSPROTO_H_ 338 struct pwrite_args { 339 int fd; 340 const void *buf; 341 size_t nbyte; 342 int pad; 343 off_t offset; 344 }; 345 #endif 346 int 347 pwrite(p, uap) 348 struct proc *p; 349 register struct pwrite_args *uap; 350 { 351 register struct file *fp; 352 int error; 353 354 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 355 return (EBADF); 356 if (fp->f_type != DTYPE_VNODE) { 357 error = ESPIPE; 358 } else { 359 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, 360 uap->offset, FOF_OFFSET); 361 } 362 fdrop(fp, p); 363 return(error); 364 } 365 366 static int 367 dofilewrite(p, fp, fd, buf, nbyte, offset, flags) 368 struct proc *p; 369 struct file *fp; 370 int fd, flags; 371 const void *buf; 372 size_t nbyte; 373 off_t offset; 374 { 375 struct uio auio; 376 struct iovec aiov; 377 long cnt, error = 0; 378 #ifdef KTRACE 379 struct iovec ktriov; 380 struct uio ktruio; 381 int didktr = 0; 382 #endif 383 384 aiov.iov_base = (void *)(uintptr_t)buf; 385 aiov.iov_len = nbyte; 386 auio.uio_iov = &aiov; 387 auio.uio_iovcnt = 1; 388 auio.uio_offset = offset; 389 if (nbyte > INT_MAX) 390 return (EINVAL); 391 auio.uio_resid = nbyte; 392 auio.uio_rw = UIO_WRITE; 393 auio.uio_segflg = UIO_USERSPACE; 394 auio.uio_procp = p; 395 #ifdef KTRACE 396 /* 397 * if tracing, save a copy of iovec and uio 398 */ 399 if (KTRPOINT(p, KTR_GENIO)) { 400 ktriov = aiov; 401 ktruio = auio; 402 didktr = 1; 403 } 404 #endif 405 cnt = nbyte; 406 if (fp->f_type == DTYPE_VNODE) 407 bwillwrite(); 408 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { 409 if (auio.uio_resid != cnt && (error == ERESTART || 410 error == EINTR || error == EWOULDBLOCK)) 411 error = 0; 412 if (error == EPIPE) 413 psignal(p, SIGPIPE); 414 } 415 cnt -= auio.uio_resid; 416 #ifdef KTRACE 417 if (didktr && error == 0) { 418 ktruio.uio_iov = &ktriov; 419 ktruio.uio_resid = cnt; 420 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error); 421 } 422 #endif 423 p->p_retval[0] = cnt; 424 return (error); 425 } 426 427 /* 428 * Gather write system call 429 */ 430 #ifndef _SYS_SYSPROTO_H_ 431 struct writev_args { 432 int fd; 433 struct iovec *iovp; 434 u_int iovcnt; 435 }; 436 #endif 437 int 438 writev(p, uap) 439 struct proc *p; 440 register struct writev_args *uap; 441 { 442 register struct file *fp; 443 register struct filedesc *fdp = p->p_fd; 444 struct uio auio; 445 register struct iovec *iov; 446 struct iovec *needfree; 447 struct iovec aiov[UIO_SMALLIOV]; 448 long i, cnt, error = 0; 449 u_int iovlen; 450 #ifdef KTRACE 451 struct iovec *ktriov = NULL; 452 struct uio ktruio; 453 #endif 454 455 if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL) 456 return (EBADF); 457 /* note: can't use iovlen until iovcnt is validated */ 458 iovlen = uap->iovcnt * sizeof (struct iovec); 459 if (uap->iovcnt > UIO_SMALLIOV) { 460 if (uap->iovcnt > UIO_MAXIOV) { 461 needfree = NULL; 462 error = EINVAL; 463 goto done; 464 } 465 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 466 needfree = iov; 467 } else { 468 iov = aiov; 469 needfree = NULL; 470 } 471 auio.uio_iov = iov; 472 auio.uio_iovcnt = uap->iovcnt; 473 auio.uio_rw = UIO_WRITE; 474 auio.uio_segflg = UIO_USERSPACE; 475 auio.uio_procp = p; 476 auio.uio_offset = -1; 477 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 478 goto done; 479 auio.uio_resid = 0; 480 for (i = 0; i < uap->iovcnt; i++) { 481 if (iov->iov_len > INT_MAX - auio.uio_resid) { 482 error = EINVAL; 483 goto done; 484 } 485 auio.uio_resid += iov->iov_len; 486 iov++; 487 } 488 #ifdef KTRACE 489 /* 490 * if tracing, save a copy of iovec and uio 491 */ 492 if (KTRPOINT(p, KTR_GENIO)) { 493 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 494 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 495 ktruio = auio; 496 } 497 #endif 498 cnt = auio.uio_resid; 499 if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) { 500 if (auio.uio_resid != cnt && (error == ERESTART || 501 error == EINTR || error == EWOULDBLOCK)) 502 error = 0; 503 if (error == EPIPE) 504 psignal(p, SIGPIPE); 505 } 506 cnt -= auio.uio_resid; 507 #ifdef KTRACE 508 if (ktriov != NULL) { 509 if (error == 0) { 510 ktruio.uio_iov = ktriov; 511 ktruio.uio_resid = cnt; 512 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio, 513 error); 514 } 515 FREE(ktriov, M_TEMP); 516 } 517 #endif 518 p->p_retval[0] = cnt; 519 done: 520 fdrop(fp, p); 521 if (needfree) 522 FREE(needfree, M_IOV); 523 return (error); 524 } 525 526 /* 527 * Ioctl system call 528 */ 529 #ifndef _SYS_SYSPROTO_H_ 530 struct ioctl_args { 531 int fd; 532 u_long com; 533 caddr_t data; 534 }; 535 #endif 536 /* ARGSUSED */ 537 int 538 ioctl(p, uap) 539 struct proc *p; 540 register struct ioctl_args *uap; 541 { 542 register struct file *fp; 543 register struct filedesc *fdp; 544 register u_long com; 545 int error; 546 register u_int size; 547 caddr_t data, memp; 548 int tmp; 549 #define STK_PARAMS 128 550 union { 551 char stkbuf[STK_PARAMS]; 552 long align; 553 } ubuf; 554 555 fdp = p->p_fd; 556 if ((u_int)uap->fd >= fdp->fd_nfiles || 557 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 558 return (EBADF); 559 560 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 561 return (EBADF); 562 563 switch (com = uap->com) { 564 case FIONCLEX: 565 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 566 return (0); 567 case FIOCLEX: 568 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 569 return (0); 570 } 571 572 /* 573 * Interpret high order word to find amount of data to be 574 * copied to/from the user's address space. 575 */ 576 size = IOCPARM_LEN(com); 577 if (size > IOCPARM_MAX) 578 return (ENOTTY); 579 580 fhold(fp); 581 582 memp = NULL; 583 if (size > sizeof (ubuf.stkbuf)) { 584 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 585 data = memp; 586 } else { 587 data = ubuf.stkbuf; 588 } 589 if (com&IOC_IN) { 590 if (size) { 591 error = copyin(uap->data, data, (u_int)size); 592 if (error) { 593 if (memp) 594 free(memp, M_IOCTLOPS); 595 fdrop(fp, p); 596 return (error); 597 } 598 } else { 599 *(caddr_t *)data = uap->data; 600 } 601 } else if ((com&IOC_OUT) && size) { 602 /* 603 * Zero the buffer so the user always 604 * gets back something deterministic. 605 */ 606 bzero(data, size); 607 } else if (com&IOC_VOID) { 608 *(caddr_t *)data = uap->data; 609 } 610 611 switch (com) { 612 613 case FIONBIO: 614 if ((tmp = *(int *)data)) 615 fp->f_flag |= FNONBLOCK; 616 else 617 fp->f_flag &= ~FNONBLOCK; 618 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); 619 break; 620 621 case FIOASYNC: 622 if ((tmp = *(int *)data)) 623 fp->f_flag |= FASYNC; 624 else 625 fp->f_flag &= ~FASYNC; 626 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); 627 break; 628 629 default: 630 error = fo_ioctl(fp, com, data, p); 631 /* 632 * Copy any data to user, size was 633 * already set and checked above. 634 */ 635 if (error == 0 && (com&IOC_OUT) && size) 636 error = copyout(data, uap->data, (u_int)size); 637 break; 638 } 639 if (memp) 640 free(memp, M_IOCTLOPS); 641 fdrop(fp, p); 642 return (error); 643 } 644 645 static int nselcoll; /* Select collisions since boot */ 646 int selwait; 647 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 648 649 /* 650 * Select system call. 651 */ 652 #ifndef _SYS_SYSPROTO_H_ 653 struct select_args { 654 int nd; 655 fd_set *in, *ou, *ex; 656 struct timeval *tv; 657 }; 658 #endif 659 int 660 select(p, uap) 661 register struct proc *p; 662 register struct select_args *uap; 663 { 664 /* 665 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 666 * infds with the new FD_SETSIZE of 1024, and more than enough for 667 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 668 * of 256. 669 */ 670 fd_mask s_selbits[howmany(2048, NFDBITS)]; 671 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 672 struct timeval atv, rtv, ttv; 673 int s, ncoll, error, timo; 674 u_int nbufbytes, ncpbytes, nfdbits; 675 676 if (uap->nd < 0) 677 return (EINVAL); 678 if (uap->nd > p->p_fd->fd_nfiles) 679 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 680 681 /* 682 * Allocate just enough bits for the non-null fd_sets. Use the 683 * preallocated auto buffer if possible. 684 */ 685 nfdbits = roundup(uap->nd, NFDBITS); 686 ncpbytes = nfdbits / NBBY; 687 nbufbytes = 0; 688 if (uap->in != NULL) 689 nbufbytes += 2 * ncpbytes; 690 if (uap->ou != NULL) 691 nbufbytes += 2 * ncpbytes; 692 if (uap->ex != NULL) 693 nbufbytes += 2 * ncpbytes; 694 if (nbufbytes <= sizeof s_selbits) 695 selbits = &s_selbits[0]; 696 else 697 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 698 699 /* 700 * Assign pointers into the bit buffers and fetch the input bits. 701 * Put the output buffers together so that they can be bzeroed 702 * together. 703 */ 704 sbp = selbits; 705 #define getbits(name, x) \ 706 do { \ 707 if (uap->name == NULL) \ 708 ibits[x] = NULL; \ 709 else { \ 710 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 711 obits[x] = sbp; \ 712 sbp += ncpbytes / sizeof *sbp; \ 713 error = copyin(uap->name, ibits[x], ncpbytes); \ 714 if (error != 0) \ 715 goto done; \ 716 } \ 717 } while (0) 718 getbits(in, 0); 719 getbits(ou, 1); 720 getbits(ex, 2); 721 #undef getbits 722 if (nbufbytes != 0) 723 bzero(selbits, nbufbytes / 2); 724 725 if (uap->tv) { 726 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 727 sizeof (atv)); 728 if (error) 729 goto done; 730 if (itimerfix(&atv)) { 731 error = EINVAL; 732 goto done; 733 } 734 getmicrouptime(&rtv); 735 timevaladd(&atv, &rtv); 736 } else { 737 atv.tv_sec = 0; 738 atv.tv_usec = 0; 739 } 740 timo = 0; 741 retry: 742 ncoll = nselcoll; 743 p->p_flag |= P_SELECT; 744 error = selscan(p, ibits, obits, uap->nd); 745 if (error || p->p_retval[0]) 746 goto done; 747 if (atv.tv_sec || atv.tv_usec) { 748 getmicrouptime(&rtv); 749 if (timevalcmp(&rtv, &atv, >=)) 750 goto done; 751 ttv = atv; 752 timevalsub(&ttv, &rtv); 753 timo = ttv.tv_sec > 24 * 60 * 60 ? 754 24 * 60 * 60 * hz : tvtohz(&ttv); 755 } 756 s = splhigh(); 757 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 758 splx(s); 759 goto retry; 760 } 761 p->p_flag &= ~P_SELECT; 762 763 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 764 765 splx(s); 766 if (error == 0) 767 goto retry; 768 done: 769 p->p_flag &= ~P_SELECT; 770 /* select is not restarted after signals... */ 771 if (error == ERESTART) 772 error = EINTR; 773 if (error == EWOULDBLOCK) 774 error = 0; 775 #define putbits(name, x) \ 776 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 777 error = error2; 778 if (error == 0) { 779 int error2; 780 781 putbits(in, 0); 782 putbits(ou, 1); 783 putbits(ex, 2); 784 #undef putbits 785 } 786 if (selbits != &s_selbits[0]) 787 free(selbits, M_SELECT); 788 return (error); 789 } 790 791 static int 792 selscan(p, ibits, obits, nfd) 793 struct proc *p; 794 fd_mask **ibits, **obits; 795 int nfd; 796 { 797 struct filedesc *fdp = p->p_fd; 798 int msk, i, fd; 799 fd_mask bits; 800 struct file *fp; 801 int n = 0; 802 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 803 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 804 805 for (msk = 0; msk < 3; msk++) { 806 if (ibits[msk] == NULL) 807 continue; 808 for (i = 0; i < nfd; i += NFDBITS) { 809 bits = ibits[msk][i/NFDBITS]; 810 /* ffs(int mask) not portable, fd_mask is long */ 811 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 812 if (!(bits & 1)) 813 continue; 814 fp = fdp->fd_ofiles[fd]; 815 if (fp == NULL) 816 return (EBADF); 817 if (fo_poll(fp, flag[msk], fp->f_cred, p)) { 818 obits[msk][(fd)/NFDBITS] |= 819 ((fd_mask)1 << ((fd) % NFDBITS)); 820 n++; 821 } 822 } 823 } 824 } 825 p->p_retval[0] = n; 826 return (0); 827 } 828 829 /* 830 * Poll system call. 831 */ 832 #ifndef _SYS_SYSPROTO_H_ 833 struct poll_args { 834 struct pollfd *fds; 835 u_int nfds; 836 int timeout; 837 }; 838 #endif 839 int 840 poll(p, uap) 841 register struct proc *p; 842 register struct poll_args *uap; 843 { 844 caddr_t bits; 845 char smallbits[32 * sizeof(struct pollfd)]; 846 struct timeval atv, rtv, ttv; 847 int s, ncoll, error = 0, timo; 848 size_t ni; 849 850 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 851 /* forgiving; slightly wrong */ 852 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 853 } 854 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 855 if (ni > sizeof(smallbits)) 856 bits = malloc(ni, M_TEMP, M_WAITOK); 857 else 858 bits = smallbits; 859 error = copyin(SCARG(uap, fds), bits, ni); 860 if (error) 861 goto done; 862 if (SCARG(uap, timeout) != INFTIM) { 863 atv.tv_sec = SCARG(uap, timeout) / 1000; 864 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 865 if (itimerfix(&atv)) { 866 error = EINVAL; 867 goto done; 868 } 869 getmicrouptime(&rtv); 870 timevaladd(&atv, &rtv); 871 } else { 872 atv.tv_sec = 0; 873 atv.tv_usec = 0; 874 } 875 timo = 0; 876 retry: 877 ncoll = nselcoll; 878 p->p_flag |= P_SELECT; 879 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds)); 880 if (error || p->p_retval[0]) 881 goto done; 882 if (atv.tv_sec || atv.tv_usec) { 883 getmicrouptime(&rtv); 884 if (timevalcmp(&rtv, &atv, >=)) 885 goto done; 886 ttv = atv; 887 timevalsub(&ttv, &rtv); 888 timo = ttv.tv_sec > 24 * 60 * 60 ? 889 24 * 60 * 60 * hz : tvtohz(&ttv); 890 } 891 s = splhigh(); 892 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 893 splx(s); 894 goto retry; 895 } 896 p->p_flag &= ~P_SELECT; 897 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 898 splx(s); 899 if (error == 0) 900 goto retry; 901 done: 902 p->p_flag &= ~P_SELECT; 903 /* poll is not restarted after signals... */ 904 if (error == ERESTART) 905 error = EINTR; 906 if (error == EWOULDBLOCK) 907 error = 0; 908 if (error == 0) { 909 error = copyout(bits, SCARG(uap, fds), ni); 910 if (error) 911 goto out; 912 } 913 out: 914 if (ni > sizeof(smallbits)) 915 free(bits, M_TEMP); 916 return (error); 917 } 918 919 static int 920 pollscan(p, fds, nfd) 921 struct proc *p; 922 struct pollfd *fds; 923 int nfd; 924 { 925 register struct filedesc *fdp = p->p_fd; 926 int i; 927 struct file *fp; 928 int n = 0; 929 930 for (i = 0; i < nfd; i++, fds++) { 931 if (fds->fd >= fdp->fd_nfiles) { 932 fds->revents = POLLNVAL; 933 n++; 934 } else if (fds->fd < 0) { 935 fds->revents = 0; 936 } else { 937 fp = fdp->fd_ofiles[fds->fd]; 938 if (fp == NULL) { 939 fds->revents = POLLNVAL; 940 n++; 941 } else { 942 /* 943 * Note: backend also returns POLLHUP and 944 * POLLERR if appropriate. 945 */ 946 fds->revents = fo_poll(fp, fds->events, 947 fp->f_cred, p); 948 if (fds->revents != 0) 949 n++; 950 } 951 } 952 } 953 p->p_retval[0] = n; 954 return (0); 955 } 956 957 /* 958 * OpenBSD poll system call. 959 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 960 */ 961 #ifndef _SYS_SYSPROTO_H_ 962 struct openbsd_poll_args { 963 struct pollfd *fds; 964 u_int nfds; 965 int timeout; 966 }; 967 #endif 968 int 969 openbsd_poll(p, uap) 970 register struct proc *p; 971 register struct openbsd_poll_args *uap; 972 { 973 return (poll(p, (struct poll_args *)uap)); 974 } 975 976 /*ARGSUSED*/ 977 int 978 seltrue(dev, events, p) 979 dev_t dev; 980 int events; 981 struct proc *p; 982 { 983 984 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 985 } 986 987 /* 988 * Record a select request. 989 */ 990 void 991 selrecord(selector, sip) 992 struct proc *selector; 993 struct selinfo *sip; 994 { 995 struct proc *p; 996 pid_t mypid; 997 998 mypid = selector->p_pid; 999 if (sip->si_pid == mypid) 1000 return; 1001 if (sip->si_pid && (p = pfind(sip->si_pid)) && 1002 p->p_wchan == (caddr_t)&selwait) 1003 sip->si_flags |= SI_COLL; 1004 else 1005 sip->si_pid = mypid; 1006 } 1007 1008 /* 1009 * Do a wakeup when a selectable event occurs. 1010 */ 1011 void 1012 selwakeup(sip) 1013 register struct selinfo *sip; 1014 { 1015 register struct proc *p; 1016 int s; 1017 1018 if (sip->si_pid == 0) 1019 return; 1020 if (sip->si_flags & SI_COLL) { 1021 nselcoll++; 1022 sip->si_flags &= ~SI_COLL; 1023 wakeup((caddr_t)&selwait); 1024 } 1025 p = pfind(sip->si_pid); 1026 sip->si_pid = 0; 1027 if (p != NULL) { 1028 s = splhigh(); 1029 mtx_enter(&sched_lock, MTX_SPIN); 1030 if (p->p_wchan == (caddr_t)&selwait) { 1031 if (p->p_stat == SSLEEP) 1032 setrunnable(p); 1033 else 1034 unsleep(p); 1035 } else if (p->p_flag & P_SELECT) 1036 p->p_flag &= ~P_SELECT; 1037 mtx_exit(&sched_lock, MTX_SPIN); 1038 splx(s); 1039 } 1040 } 1041