1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD$ 40 */ 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/filio.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/signalvar.h> 53 #include <sys/socketvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #include <sys/sysctl.h> 59 #include <sys/sysent.h> 60 #include <sys/bio.h> 61 #include <sys/buf.h> 62 #ifdef KTRACE 63 #include <sys/ktrace.h> 64 #endif 65 #include <vm/vm.h> 66 #include <vm/vm_page.h> 67 68 #include <machine/limits.h> 69 70 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 71 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 72 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 73 74 static int pollscan __P((struct proc *, struct pollfd *, int)); 75 static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 76 static int dofileread __P((struct proc *, struct file *, int, void *, 77 size_t, off_t, int)); 78 static int dofilewrite __P((struct proc *, struct file *, int, 79 const void *, size_t, off_t, int)); 80 81 struct file* 82 holdfp(fdp, fd, flag) 83 struct filedesc* fdp; 84 int fd, flag; 85 { 86 struct file* fp; 87 88 if (((u_int)fd) >= fdp->fd_nfiles || 89 (fp = fdp->fd_ofiles[fd]) == NULL || 90 (fp->f_flag & flag) == 0) { 91 return (NULL); 92 } 93 fhold(fp); 94 return (fp); 95 } 96 97 /* 98 * Read system call. 99 */ 100 #ifndef _SYS_SYSPROTO_H_ 101 struct read_args { 102 int fd; 103 void *buf; 104 size_t nbyte; 105 }; 106 #endif 107 int 108 read(p, uap) 109 struct proc *p; 110 register struct read_args *uap; 111 { 112 register struct file *fp; 113 int error; 114 115 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 116 return (EBADF); 117 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 118 fdrop(fp, p); 119 return(error); 120 } 121 122 /* 123 * Pread system call 124 */ 125 #ifndef _SYS_SYSPROTO_H_ 126 struct pread_args { 127 int fd; 128 void *buf; 129 size_t nbyte; 130 int pad; 131 off_t offset; 132 }; 133 #endif 134 int 135 pread(p, uap) 136 struct proc *p; 137 register struct pread_args *uap; 138 { 139 register struct file *fp; 140 int error; 141 142 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 143 return (EBADF); 144 if (fp->f_type != DTYPE_VNODE) { 145 error = ESPIPE; 146 } else { 147 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, 148 uap->offset, FOF_OFFSET); 149 } 150 fdrop(fp, p); 151 return(error); 152 } 153 154 /* 155 * Code common for read and pread 156 */ 157 int 158 dofileread(p, fp, fd, buf, nbyte, offset, flags) 159 struct proc *p; 160 struct file *fp; 161 int fd, flags; 162 void *buf; 163 size_t nbyte; 164 off_t offset; 165 { 166 struct uio auio; 167 struct iovec aiov; 168 long cnt, error = 0; 169 #ifdef KTRACE 170 struct iovec ktriov; 171 struct uio ktruio; 172 int didktr = 0; 173 #endif 174 175 aiov.iov_base = (caddr_t)buf; 176 aiov.iov_len = nbyte; 177 auio.uio_iov = &aiov; 178 auio.uio_iovcnt = 1; 179 auio.uio_offset = offset; 180 if (nbyte > INT_MAX) 181 return (EINVAL); 182 auio.uio_resid = nbyte; 183 auio.uio_rw = UIO_READ; 184 auio.uio_segflg = UIO_USERSPACE; 185 auio.uio_procp = p; 186 #ifdef KTRACE 187 /* 188 * if tracing, save a copy of iovec 189 */ 190 if (KTRPOINT(p, KTR_GENIO)) { 191 ktriov = aiov; 192 ktruio = auio; 193 didktr = 1; 194 } 195 #endif 196 cnt = nbyte; 197 198 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) { 199 if (auio.uio_resid != cnt && (error == ERESTART || 200 error == EINTR || error == EWOULDBLOCK)) 201 error = 0; 202 } 203 cnt -= auio.uio_resid; 204 #ifdef KTRACE 205 if (didktr && error == 0) { 206 ktruio.uio_iov = &ktriov; 207 ktruio.uio_resid = cnt; 208 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error); 209 } 210 #endif 211 p->p_retval[0] = cnt; 212 return (error); 213 } 214 215 /* 216 * Scatter read system call. 217 */ 218 #ifndef _SYS_SYSPROTO_H_ 219 struct readv_args { 220 int fd; 221 struct iovec *iovp; 222 u_int iovcnt; 223 }; 224 #endif 225 int 226 readv(p, uap) 227 struct proc *p; 228 register struct readv_args *uap; 229 { 230 register struct file *fp; 231 register struct filedesc *fdp = p->p_fd; 232 struct uio auio; 233 register struct iovec *iov; 234 struct iovec *needfree; 235 struct iovec aiov[UIO_SMALLIOV]; 236 long i, cnt, error = 0; 237 u_int iovlen; 238 #ifdef KTRACE 239 struct iovec *ktriov = NULL; 240 struct uio ktruio; 241 #endif 242 243 if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL) 244 return (EBADF); 245 /* note: can't use iovlen until iovcnt is validated */ 246 iovlen = uap->iovcnt * sizeof (struct iovec); 247 if (uap->iovcnt > UIO_SMALLIOV) { 248 if (uap->iovcnt > UIO_MAXIOV) 249 return (EINVAL); 250 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 251 needfree = iov; 252 } else { 253 iov = aiov; 254 needfree = NULL; 255 } 256 auio.uio_iov = iov; 257 auio.uio_iovcnt = uap->iovcnt; 258 auio.uio_rw = UIO_READ; 259 auio.uio_segflg = UIO_USERSPACE; 260 auio.uio_procp = p; 261 auio.uio_offset = -1; 262 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 263 goto done; 264 auio.uio_resid = 0; 265 for (i = 0; i < uap->iovcnt; i++) { 266 if (iov->iov_len > INT_MAX - auio.uio_resid) { 267 error = EINVAL; 268 goto done; 269 } 270 auio.uio_resid += iov->iov_len; 271 iov++; 272 } 273 #ifdef KTRACE 274 /* 275 * if tracing, save a copy of iovec 276 */ 277 if (KTRPOINT(p, KTR_GENIO)) { 278 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 279 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 280 ktruio = auio; 281 } 282 #endif 283 cnt = auio.uio_resid; 284 if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) { 285 if (auio.uio_resid != cnt && (error == ERESTART || 286 error == EINTR || error == EWOULDBLOCK)) 287 error = 0; 288 } 289 cnt -= auio.uio_resid; 290 #ifdef KTRACE 291 if (ktriov != NULL) { 292 if (error == 0) { 293 ktruio.uio_iov = ktriov; 294 ktruio.uio_resid = cnt; 295 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio, 296 error); 297 } 298 FREE(ktriov, M_TEMP); 299 } 300 #endif 301 p->p_retval[0] = cnt; 302 done: 303 fdrop(fp, p); 304 if (needfree) 305 FREE(needfree, M_IOV); 306 return (error); 307 } 308 309 /* 310 * Write system call 311 */ 312 #ifndef _SYS_SYSPROTO_H_ 313 struct write_args { 314 int fd; 315 const void *buf; 316 size_t nbyte; 317 }; 318 #endif 319 int 320 write(p, uap) 321 struct proc *p; 322 register struct write_args *uap; 323 { 324 register struct file *fp; 325 int error; 326 327 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 328 return (EBADF); 329 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 330 fdrop(fp, p); 331 return(error); 332 } 333 334 /* 335 * Pwrite system call 336 */ 337 #ifndef _SYS_SYSPROTO_H_ 338 struct pwrite_args { 339 int fd; 340 const void *buf; 341 size_t nbyte; 342 int pad; 343 off_t offset; 344 }; 345 #endif 346 int 347 pwrite(p, uap) 348 struct proc *p; 349 register struct pwrite_args *uap; 350 { 351 register struct file *fp; 352 int error; 353 354 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 355 return (EBADF); 356 if (fp->f_type != DTYPE_VNODE) { 357 error = ESPIPE; 358 } else { 359 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, 360 uap->offset, FOF_OFFSET); 361 } 362 fdrop(fp, p); 363 return(error); 364 } 365 366 static int 367 dofilewrite(p, fp, fd, buf, nbyte, offset, flags) 368 struct proc *p; 369 struct file *fp; 370 int fd, flags; 371 const void *buf; 372 size_t nbyte; 373 off_t offset; 374 { 375 struct uio auio; 376 struct iovec aiov; 377 long cnt, error = 0; 378 #ifdef KTRACE 379 struct iovec ktriov; 380 struct uio ktruio; 381 int didktr = 0; 382 #endif 383 384 aiov.iov_base = (void *)(uintptr_t)buf; 385 aiov.iov_len = nbyte; 386 auio.uio_iov = &aiov; 387 auio.uio_iovcnt = 1; 388 auio.uio_offset = offset; 389 if (nbyte > INT_MAX) 390 return (EINVAL); 391 auio.uio_resid = nbyte; 392 auio.uio_rw = UIO_WRITE; 393 auio.uio_segflg = UIO_USERSPACE; 394 auio.uio_procp = p; 395 #ifdef KTRACE 396 /* 397 * if tracing, save a copy of iovec and uio 398 */ 399 if (KTRPOINT(p, KTR_GENIO)) { 400 ktriov = aiov; 401 ktruio = auio; 402 didktr = 1; 403 } 404 #endif 405 cnt = nbyte; 406 bwillwrite(); 407 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { 408 if (auio.uio_resid != cnt && (error == ERESTART || 409 error == EINTR || error == EWOULDBLOCK)) 410 error = 0; 411 if (error == EPIPE) 412 psignal(p, SIGPIPE); 413 } 414 cnt -= auio.uio_resid; 415 #ifdef KTRACE 416 if (didktr && error == 0) { 417 ktruio.uio_iov = &ktriov; 418 ktruio.uio_resid = cnt; 419 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error); 420 } 421 #endif 422 p->p_retval[0] = cnt; 423 return (error); 424 } 425 426 /* 427 * Gather write system call 428 */ 429 #ifndef _SYS_SYSPROTO_H_ 430 struct writev_args { 431 int fd; 432 struct iovec *iovp; 433 u_int iovcnt; 434 }; 435 #endif 436 int 437 writev(p, uap) 438 struct proc *p; 439 register struct writev_args *uap; 440 { 441 register struct file *fp; 442 register struct filedesc *fdp = p->p_fd; 443 struct uio auio; 444 register struct iovec *iov; 445 struct iovec *needfree; 446 struct iovec aiov[UIO_SMALLIOV]; 447 long i, cnt, error = 0; 448 u_int iovlen; 449 #ifdef KTRACE 450 struct iovec *ktriov = NULL; 451 struct uio ktruio; 452 #endif 453 454 if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL) 455 return (EBADF); 456 /* note: can't use iovlen until iovcnt is validated */ 457 iovlen = uap->iovcnt * sizeof (struct iovec); 458 if (uap->iovcnt > UIO_SMALLIOV) { 459 if (uap->iovcnt > UIO_MAXIOV) { 460 needfree = NULL; 461 error = EINVAL; 462 goto done; 463 } 464 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 465 needfree = iov; 466 } else { 467 iov = aiov; 468 needfree = NULL; 469 } 470 auio.uio_iov = iov; 471 auio.uio_iovcnt = uap->iovcnt; 472 auio.uio_rw = UIO_WRITE; 473 auio.uio_segflg = UIO_USERSPACE; 474 auio.uio_procp = p; 475 auio.uio_offset = -1; 476 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 477 goto done; 478 auio.uio_resid = 0; 479 for (i = 0; i < uap->iovcnt; i++) { 480 if (iov->iov_len > INT_MAX - auio.uio_resid) { 481 error = EINVAL; 482 goto done; 483 } 484 auio.uio_resid += iov->iov_len; 485 iov++; 486 } 487 #ifdef KTRACE 488 /* 489 * if tracing, save a copy of iovec and uio 490 */ 491 if (KTRPOINT(p, KTR_GENIO)) { 492 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 493 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 494 ktruio = auio; 495 } 496 #endif 497 cnt = auio.uio_resid; 498 if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) { 499 if (auio.uio_resid != cnt && (error == ERESTART || 500 error == EINTR || error == EWOULDBLOCK)) 501 error = 0; 502 if (error == EPIPE) 503 psignal(p, SIGPIPE); 504 } 505 cnt -= auio.uio_resid; 506 #ifdef KTRACE 507 if (ktriov != NULL) { 508 if (error == 0) { 509 ktruio.uio_iov = ktriov; 510 ktruio.uio_resid = cnt; 511 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio, 512 error); 513 } 514 FREE(ktriov, M_TEMP); 515 } 516 #endif 517 p->p_retval[0] = cnt; 518 done: 519 fdrop(fp, p); 520 if (needfree) 521 FREE(needfree, M_IOV); 522 return (error); 523 } 524 525 /* 526 * Ioctl system call 527 */ 528 #ifndef _SYS_SYSPROTO_H_ 529 struct ioctl_args { 530 int fd; 531 u_long com; 532 caddr_t data; 533 }; 534 #endif 535 /* ARGSUSED */ 536 int 537 ioctl(p, uap) 538 struct proc *p; 539 register struct ioctl_args *uap; 540 { 541 register struct file *fp; 542 register struct filedesc *fdp; 543 register u_long com; 544 int error; 545 register u_int size; 546 caddr_t data, memp; 547 int tmp; 548 #define STK_PARAMS 128 549 union { 550 char stkbuf[STK_PARAMS]; 551 long align; 552 } ubuf; 553 554 fdp = p->p_fd; 555 if ((u_int)uap->fd >= fdp->fd_nfiles || 556 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 557 return (EBADF); 558 559 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 560 return (EBADF); 561 562 switch (com = uap->com) { 563 case FIONCLEX: 564 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 565 return (0); 566 case FIOCLEX: 567 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 568 return (0); 569 } 570 571 /* 572 * Interpret high order word to find amount of data to be 573 * copied to/from the user's address space. 574 */ 575 size = IOCPARM_LEN(com); 576 if (size > IOCPARM_MAX) 577 return (ENOTTY); 578 579 fhold(fp); 580 581 memp = NULL; 582 if (size > sizeof (ubuf.stkbuf)) { 583 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 584 data = memp; 585 } else { 586 data = ubuf.stkbuf; 587 } 588 if (com&IOC_IN) { 589 if (size) { 590 error = copyin(uap->data, data, (u_int)size); 591 if (error) { 592 if (memp) 593 free(memp, M_IOCTLOPS); 594 fdrop(fp, p); 595 return (error); 596 } 597 } else { 598 *(caddr_t *)data = uap->data; 599 } 600 } else if ((com&IOC_OUT) && size) { 601 /* 602 * Zero the buffer so the user always 603 * gets back something deterministic. 604 */ 605 bzero(data, size); 606 } else if (com&IOC_VOID) { 607 *(caddr_t *)data = uap->data; 608 } 609 610 switch (com) { 611 612 case FIONBIO: 613 if ((tmp = *(int *)data)) 614 fp->f_flag |= FNONBLOCK; 615 else 616 fp->f_flag &= ~FNONBLOCK; 617 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); 618 break; 619 620 case FIOASYNC: 621 if ((tmp = *(int *)data)) 622 fp->f_flag |= FASYNC; 623 else 624 fp->f_flag &= ~FASYNC; 625 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); 626 break; 627 628 default: 629 error = fo_ioctl(fp, com, data, p); 630 /* 631 * Copy any data to user, size was 632 * already set and checked above. 633 */ 634 if (error == 0 && (com&IOC_OUT) && size) 635 error = copyout(data, uap->data, (u_int)size); 636 break; 637 } 638 if (memp) 639 free(memp, M_IOCTLOPS); 640 fdrop(fp, p); 641 return (error); 642 } 643 644 static int nselcoll; /* Select collisions since boot */ 645 int selwait; 646 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 647 648 /* 649 * Select system call. 650 */ 651 #ifndef _SYS_SYSPROTO_H_ 652 struct select_args { 653 int nd; 654 fd_set *in, *ou, *ex; 655 struct timeval *tv; 656 }; 657 #endif 658 int 659 select(p, uap) 660 register struct proc *p; 661 register struct select_args *uap; 662 { 663 /* 664 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 665 * infds with the new FD_SETSIZE of 1024, and more than enough for 666 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 667 * of 256. 668 */ 669 fd_mask s_selbits[howmany(2048, NFDBITS)]; 670 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 671 struct timeval atv, rtv, ttv; 672 int s, ncoll, error, timo; 673 u_int nbufbytes, ncpbytes, nfdbits; 674 675 if (uap->nd < 0) 676 return (EINVAL); 677 if (uap->nd > p->p_fd->fd_nfiles) 678 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 679 680 /* 681 * Allocate just enough bits for the non-null fd_sets. Use the 682 * preallocated auto buffer if possible. 683 */ 684 nfdbits = roundup(uap->nd, NFDBITS); 685 ncpbytes = nfdbits / NBBY; 686 nbufbytes = 0; 687 if (uap->in != NULL) 688 nbufbytes += 2 * ncpbytes; 689 if (uap->ou != NULL) 690 nbufbytes += 2 * ncpbytes; 691 if (uap->ex != NULL) 692 nbufbytes += 2 * ncpbytes; 693 if (nbufbytes <= sizeof s_selbits) 694 selbits = &s_selbits[0]; 695 else 696 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 697 698 /* 699 * Assign pointers into the bit buffers and fetch the input bits. 700 * Put the output buffers together so that they can be bzeroed 701 * together. 702 */ 703 sbp = selbits; 704 #define getbits(name, x) \ 705 do { \ 706 if (uap->name == NULL) \ 707 ibits[x] = NULL; \ 708 else { \ 709 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 710 obits[x] = sbp; \ 711 sbp += ncpbytes / sizeof *sbp; \ 712 error = copyin(uap->name, ibits[x], ncpbytes); \ 713 if (error != 0) \ 714 goto done; \ 715 } \ 716 } while (0) 717 getbits(in, 0); 718 getbits(ou, 1); 719 getbits(ex, 2); 720 #undef getbits 721 if (nbufbytes != 0) 722 bzero(selbits, nbufbytes / 2); 723 724 if (uap->tv) { 725 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 726 sizeof (atv)); 727 if (error) 728 goto done; 729 if (itimerfix(&atv)) { 730 error = EINVAL; 731 goto done; 732 } 733 getmicrouptime(&rtv); 734 timevaladd(&atv, &rtv); 735 } else { 736 atv.tv_sec = 0; 737 atv.tv_usec = 0; 738 } 739 timo = 0; 740 retry: 741 ncoll = nselcoll; 742 p->p_flag |= P_SELECT; 743 error = selscan(p, ibits, obits, uap->nd); 744 if (error || p->p_retval[0]) 745 goto done; 746 if (atv.tv_sec || atv.tv_usec) { 747 getmicrouptime(&rtv); 748 if (timevalcmp(&rtv, &atv, >=)) 749 goto done; 750 ttv = atv; 751 timevalsub(&ttv, &rtv); 752 timo = ttv.tv_sec > 24 * 60 * 60 ? 753 24 * 60 * 60 * hz : tvtohz(&ttv); 754 } 755 s = splhigh(); 756 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 757 splx(s); 758 goto retry; 759 } 760 p->p_flag &= ~P_SELECT; 761 762 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 763 764 splx(s); 765 if (error == 0) 766 goto retry; 767 done: 768 p->p_flag &= ~P_SELECT; 769 /* select is not restarted after signals... */ 770 if (error == ERESTART) 771 error = EINTR; 772 if (error == EWOULDBLOCK) 773 error = 0; 774 #define putbits(name, x) \ 775 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 776 error = error2; 777 if (error == 0) { 778 int error2; 779 780 putbits(in, 0); 781 putbits(ou, 1); 782 putbits(ex, 2); 783 #undef putbits 784 } 785 if (selbits != &s_selbits[0]) 786 free(selbits, M_SELECT); 787 return (error); 788 } 789 790 static int 791 selscan(p, ibits, obits, nfd) 792 struct proc *p; 793 fd_mask **ibits, **obits; 794 int nfd; 795 { 796 struct filedesc *fdp = p->p_fd; 797 int msk, i, fd; 798 fd_mask bits; 799 struct file *fp; 800 int n = 0; 801 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 802 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 803 804 for (msk = 0; msk < 3; msk++) { 805 if (ibits[msk] == NULL) 806 continue; 807 for (i = 0; i < nfd; i += NFDBITS) { 808 bits = ibits[msk][i/NFDBITS]; 809 /* ffs(int mask) not portable, fd_mask is long */ 810 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 811 if (!(bits & 1)) 812 continue; 813 fp = fdp->fd_ofiles[fd]; 814 if (fp == NULL) 815 return (EBADF); 816 if (fo_poll(fp, flag[msk], fp->f_cred, p)) { 817 obits[msk][(fd)/NFDBITS] |= 818 ((fd_mask)1 << ((fd) % NFDBITS)); 819 n++; 820 } 821 } 822 } 823 } 824 p->p_retval[0] = n; 825 return (0); 826 } 827 828 /* 829 * Poll system call. 830 */ 831 #ifndef _SYS_SYSPROTO_H_ 832 struct poll_args { 833 struct pollfd *fds; 834 u_int nfds; 835 int timeout; 836 }; 837 #endif 838 int 839 poll(p, uap) 840 register struct proc *p; 841 register struct poll_args *uap; 842 { 843 caddr_t bits; 844 char smallbits[32 * sizeof(struct pollfd)]; 845 struct timeval atv, rtv, ttv; 846 int s, ncoll, error = 0, timo; 847 size_t ni; 848 849 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 850 /* forgiving; slightly wrong */ 851 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 852 } 853 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 854 if (ni > sizeof(smallbits)) 855 bits = malloc(ni, M_TEMP, M_WAITOK); 856 else 857 bits = smallbits; 858 error = copyin(SCARG(uap, fds), bits, ni); 859 if (error) 860 goto done; 861 if (SCARG(uap, timeout) != INFTIM) { 862 atv.tv_sec = SCARG(uap, timeout) / 1000; 863 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 864 if (itimerfix(&atv)) { 865 error = EINVAL; 866 goto done; 867 } 868 getmicrouptime(&rtv); 869 timevaladd(&atv, &rtv); 870 } else { 871 atv.tv_sec = 0; 872 atv.tv_usec = 0; 873 } 874 timo = 0; 875 retry: 876 ncoll = nselcoll; 877 p->p_flag |= P_SELECT; 878 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds)); 879 if (error || p->p_retval[0]) 880 goto done; 881 if (atv.tv_sec || atv.tv_usec) { 882 getmicrouptime(&rtv); 883 if (timevalcmp(&rtv, &atv, >=)) 884 goto done; 885 ttv = atv; 886 timevalsub(&ttv, &rtv); 887 timo = ttv.tv_sec > 24 * 60 * 60 ? 888 24 * 60 * 60 * hz : tvtohz(&ttv); 889 } 890 s = splhigh(); 891 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 892 splx(s); 893 goto retry; 894 } 895 p->p_flag &= ~P_SELECT; 896 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 897 splx(s); 898 if (error == 0) 899 goto retry; 900 done: 901 p->p_flag &= ~P_SELECT; 902 /* poll is not restarted after signals... */ 903 if (error == ERESTART) 904 error = EINTR; 905 if (error == EWOULDBLOCK) 906 error = 0; 907 if (error == 0) { 908 error = copyout(bits, SCARG(uap, fds), ni); 909 if (error) 910 goto out; 911 } 912 out: 913 if (ni > sizeof(smallbits)) 914 free(bits, M_TEMP); 915 return (error); 916 } 917 918 static int 919 pollscan(p, fds, nfd) 920 struct proc *p; 921 struct pollfd *fds; 922 int nfd; 923 { 924 register struct filedesc *fdp = p->p_fd; 925 int i; 926 struct file *fp; 927 int n = 0; 928 929 for (i = 0; i < nfd; i++, fds++) { 930 if (fds->fd >= fdp->fd_nfiles) { 931 fds->revents = POLLNVAL; 932 n++; 933 } else if (fds->fd < 0) { 934 fds->revents = 0; 935 } else { 936 fp = fdp->fd_ofiles[fds->fd]; 937 if (fp == NULL) { 938 fds->revents = POLLNVAL; 939 n++; 940 } else { 941 /* 942 * Note: backend also returns POLLHUP and 943 * POLLERR if appropriate. 944 */ 945 fds->revents = fo_poll(fp, fds->events, 946 fp->f_cred, p); 947 if (fds->revents != 0) 948 n++; 949 } 950 } 951 } 952 p->p_retval[0] = n; 953 return (0); 954 } 955 956 /* 957 * OpenBSD poll system call. 958 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 959 */ 960 #ifndef _SYS_SYSPROTO_H_ 961 struct openbsd_poll_args { 962 struct pollfd *fds; 963 u_int nfds; 964 int timeout; 965 }; 966 #endif 967 int 968 openbsd_poll(p, uap) 969 register struct proc *p; 970 register struct openbsd_poll_args *uap; 971 { 972 return (poll(p, (struct poll_args *)uap)); 973 } 974 975 /*ARGSUSED*/ 976 int 977 seltrue(dev, events, p) 978 dev_t dev; 979 int events; 980 struct proc *p; 981 { 982 983 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 984 } 985 986 /* 987 * Record a select request. 988 */ 989 void 990 selrecord(selector, sip) 991 struct proc *selector; 992 struct selinfo *sip; 993 { 994 struct proc *p; 995 pid_t mypid; 996 997 mypid = selector->p_pid; 998 if (sip->si_pid == mypid) 999 return; 1000 if (sip->si_pid && (p = pfind(sip->si_pid)) && 1001 p->p_wchan == (caddr_t)&selwait) 1002 sip->si_flags |= SI_COLL; 1003 else 1004 sip->si_pid = mypid; 1005 } 1006 1007 /* 1008 * Do a wakeup when a selectable event occurs. 1009 */ 1010 void 1011 selwakeup(sip) 1012 register struct selinfo *sip; 1013 { 1014 register struct proc *p; 1015 int s; 1016 1017 if (sip->si_pid == 0) 1018 return; 1019 if (sip->si_flags & SI_COLL) { 1020 nselcoll++; 1021 sip->si_flags &= ~SI_COLL; 1022 wakeup((caddr_t)&selwait); 1023 } 1024 p = pfind(sip->si_pid); 1025 sip->si_pid = 0; 1026 if (p != NULL) { 1027 s = splhigh(); 1028 mtx_enter(&sched_lock, MTX_SPIN); 1029 if (p->p_wchan == (caddr_t)&selwait) { 1030 if (p->p_stat == SSLEEP) 1031 setrunnable(p); 1032 else 1033 unsleep(p); 1034 } else if (p->p_flag & P_SELECT) 1035 p->p_flag &= ~P_SELECT; 1036 mtx_exit(&sched_lock, MTX_SPIN); 1037 splx(s); 1038 } 1039 } 1040