1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD$ 40 */ 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/filio.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/signalvar.h> 53 #include <sys/socketvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #include <sys/resourcevar.h> 59 #include <sys/selinfo.h> 60 #include <sys/sysctl.h> 61 #include <sys/sysent.h> 62 #include <sys/bio.h> 63 #include <sys/buf.h> 64 #ifdef KTRACE 65 #include <sys/ktrace.h> 66 #endif 67 #include <vm/vm.h> 68 #include <vm/vm_page.h> 69 70 #include <machine/limits.h> 71 72 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 73 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 74 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 75 76 static int pollscan __P((struct proc *, struct pollfd *, int)); 77 static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 78 static int dofileread __P((struct proc *, struct file *, int, void *, 79 size_t, off_t, int)); 80 static int dofilewrite __P((struct proc *, struct file *, int, 81 const void *, size_t, off_t, int)); 82 83 struct file* 84 holdfp(fdp, fd, flag) 85 struct filedesc* fdp; 86 int fd, flag; 87 { 88 struct file* fp; 89 90 if (((u_int)fd) >= fdp->fd_nfiles || 91 (fp = fdp->fd_ofiles[fd]) == NULL || 92 (fp->f_flag & flag) == 0) { 93 return (NULL); 94 } 95 fhold(fp); 96 return (fp); 97 } 98 99 /* 100 * Read system call. 101 */ 102 #ifndef _SYS_SYSPROTO_H_ 103 struct read_args { 104 int fd; 105 void *buf; 106 size_t nbyte; 107 }; 108 #endif 109 int 110 read(p, uap) 111 struct proc *p; 112 register struct read_args *uap; 113 { 114 register struct file *fp; 115 int error; 116 117 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 118 return (EBADF); 119 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 120 fdrop(fp, p); 121 return(error); 122 } 123 124 /* 125 * Pread system call 126 */ 127 #ifndef _SYS_SYSPROTO_H_ 128 struct pread_args { 129 int fd; 130 void *buf; 131 size_t nbyte; 132 int pad; 133 off_t offset; 134 }; 135 #endif 136 int 137 pread(p, uap) 138 struct proc *p; 139 register struct pread_args *uap; 140 { 141 register struct file *fp; 142 int error; 143 144 if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) 145 return (EBADF); 146 if (fp->f_type != DTYPE_VNODE) { 147 error = ESPIPE; 148 } else { 149 error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, 150 uap->offset, FOF_OFFSET); 151 } 152 fdrop(fp, p); 153 return(error); 154 } 155 156 /* 157 * Code common for read and pread 158 */ 159 int 160 dofileread(p, fp, fd, buf, nbyte, offset, flags) 161 struct proc *p; 162 struct file *fp; 163 int fd, flags; 164 void *buf; 165 size_t nbyte; 166 off_t offset; 167 { 168 struct uio auio; 169 struct iovec aiov; 170 long cnt, error = 0; 171 #ifdef KTRACE 172 struct iovec ktriov; 173 struct uio ktruio; 174 int didktr = 0; 175 #endif 176 177 aiov.iov_base = (caddr_t)buf; 178 aiov.iov_len = nbyte; 179 auio.uio_iov = &aiov; 180 auio.uio_iovcnt = 1; 181 auio.uio_offset = offset; 182 if (nbyte > INT_MAX) 183 return (EINVAL); 184 auio.uio_resid = nbyte; 185 auio.uio_rw = UIO_READ; 186 auio.uio_segflg = UIO_USERSPACE; 187 auio.uio_procp = p; 188 #ifdef KTRACE 189 /* 190 * if tracing, save a copy of iovec 191 */ 192 if (KTRPOINT(p, KTR_GENIO)) { 193 ktriov = aiov; 194 ktruio = auio; 195 didktr = 1; 196 } 197 #endif 198 cnt = nbyte; 199 200 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) { 201 if (auio.uio_resid != cnt && (error == ERESTART || 202 error == EINTR || error == EWOULDBLOCK)) 203 error = 0; 204 } 205 cnt -= auio.uio_resid; 206 #ifdef KTRACE 207 if (didktr && error == 0) { 208 ktruio.uio_iov = &ktriov; 209 ktruio.uio_resid = cnt; 210 ktrgenio(p->p_tracep, fd, UIO_READ, &ktruio, error); 211 } 212 #endif 213 p->p_retval[0] = cnt; 214 return (error); 215 } 216 217 /* 218 * Scatter read system call. 219 */ 220 #ifndef _SYS_SYSPROTO_H_ 221 struct readv_args { 222 int fd; 223 struct iovec *iovp; 224 u_int iovcnt; 225 }; 226 #endif 227 int 228 readv(p, uap) 229 struct proc *p; 230 register struct readv_args *uap; 231 { 232 register struct file *fp; 233 register struct filedesc *fdp = p->p_fd; 234 struct uio auio; 235 register struct iovec *iov; 236 struct iovec *needfree; 237 struct iovec aiov[UIO_SMALLIOV]; 238 long i, cnt, error = 0; 239 u_int iovlen; 240 #ifdef KTRACE 241 struct iovec *ktriov = NULL; 242 struct uio ktruio; 243 #endif 244 245 if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL) 246 return (EBADF); 247 /* note: can't use iovlen until iovcnt is validated */ 248 iovlen = uap->iovcnt * sizeof (struct iovec); 249 if (uap->iovcnt > UIO_SMALLIOV) { 250 if (uap->iovcnt > UIO_MAXIOV) 251 return (EINVAL); 252 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 253 needfree = iov; 254 } else { 255 iov = aiov; 256 needfree = NULL; 257 } 258 auio.uio_iov = iov; 259 auio.uio_iovcnt = uap->iovcnt; 260 auio.uio_rw = UIO_READ; 261 auio.uio_segflg = UIO_USERSPACE; 262 auio.uio_procp = p; 263 auio.uio_offset = -1; 264 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 265 goto done; 266 auio.uio_resid = 0; 267 for (i = 0; i < uap->iovcnt; i++) { 268 if (iov->iov_len > INT_MAX - auio.uio_resid) { 269 error = EINVAL; 270 goto done; 271 } 272 auio.uio_resid += iov->iov_len; 273 iov++; 274 } 275 #ifdef KTRACE 276 /* 277 * if tracing, save a copy of iovec 278 */ 279 if (KTRPOINT(p, KTR_GENIO)) { 280 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 281 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 282 ktruio = auio; 283 } 284 #endif 285 cnt = auio.uio_resid; 286 if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) { 287 if (auio.uio_resid != cnt && (error == ERESTART || 288 error == EINTR || error == EWOULDBLOCK)) 289 error = 0; 290 } 291 cnt -= auio.uio_resid; 292 #ifdef KTRACE 293 if (ktriov != NULL) { 294 if (error == 0) { 295 ktruio.uio_iov = ktriov; 296 ktruio.uio_resid = cnt; 297 ktrgenio(p->p_tracep, uap->fd, UIO_READ, &ktruio, 298 error); 299 } 300 FREE(ktriov, M_TEMP); 301 } 302 #endif 303 p->p_retval[0] = cnt; 304 done: 305 fdrop(fp, p); 306 if (needfree) 307 FREE(needfree, M_IOV); 308 return (error); 309 } 310 311 /* 312 * Write system call 313 */ 314 #ifndef _SYS_SYSPROTO_H_ 315 struct write_args { 316 int fd; 317 const void *buf; 318 size_t nbyte; 319 }; 320 #endif 321 int 322 write(p, uap) 323 struct proc *p; 324 register struct write_args *uap; 325 { 326 register struct file *fp; 327 int error; 328 329 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 330 return (EBADF); 331 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); 332 fdrop(fp, p); 333 return(error); 334 } 335 336 /* 337 * Pwrite system call 338 */ 339 #ifndef _SYS_SYSPROTO_H_ 340 struct pwrite_args { 341 int fd; 342 const void *buf; 343 size_t nbyte; 344 int pad; 345 off_t offset; 346 }; 347 #endif 348 int 349 pwrite(p, uap) 350 struct proc *p; 351 register struct pwrite_args *uap; 352 { 353 register struct file *fp; 354 int error; 355 356 if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) 357 return (EBADF); 358 if (fp->f_type != DTYPE_VNODE) { 359 error = ESPIPE; 360 } else { 361 error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, 362 uap->offset, FOF_OFFSET); 363 } 364 fdrop(fp, p); 365 return(error); 366 } 367 368 static int 369 dofilewrite(p, fp, fd, buf, nbyte, offset, flags) 370 struct proc *p; 371 struct file *fp; 372 int fd, flags; 373 const void *buf; 374 size_t nbyte; 375 off_t offset; 376 { 377 struct uio auio; 378 struct iovec aiov; 379 long cnt, error = 0; 380 #ifdef KTRACE 381 struct iovec ktriov; 382 struct uio ktruio; 383 int didktr = 0; 384 #endif 385 386 aiov.iov_base = (void *)(uintptr_t)buf; 387 aiov.iov_len = nbyte; 388 auio.uio_iov = &aiov; 389 auio.uio_iovcnt = 1; 390 auio.uio_offset = offset; 391 if (nbyte > INT_MAX) 392 return (EINVAL); 393 auio.uio_resid = nbyte; 394 auio.uio_rw = UIO_WRITE; 395 auio.uio_segflg = UIO_USERSPACE; 396 auio.uio_procp = p; 397 #ifdef KTRACE 398 /* 399 * if tracing, save a copy of iovec and uio 400 */ 401 if (KTRPOINT(p, KTR_GENIO)) { 402 ktriov = aiov; 403 ktruio = auio; 404 didktr = 1; 405 } 406 #endif 407 cnt = nbyte; 408 if (fp->f_type == DTYPE_VNODE) 409 bwillwrite(); 410 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { 411 if (auio.uio_resid != cnt && (error == ERESTART || 412 error == EINTR || error == EWOULDBLOCK)) 413 error = 0; 414 if (error == EPIPE) 415 psignal(p, SIGPIPE); 416 } 417 cnt -= auio.uio_resid; 418 #ifdef KTRACE 419 if (didktr && error == 0) { 420 ktruio.uio_iov = &ktriov; 421 ktruio.uio_resid = cnt; 422 ktrgenio(p->p_tracep, fd, UIO_WRITE, &ktruio, error); 423 } 424 #endif 425 p->p_retval[0] = cnt; 426 return (error); 427 } 428 429 /* 430 * Gather write system call 431 */ 432 #ifndef _SYS_SYSPROTO_H_ 433 struct writev_args { 434 int fd; 435 struct iovec *iovp; 436 u_int iovcnt; 437 }; 438 #endif 439 int 440 writev(p, uap) 441 struct proc *p; 442 register struct writev_args *uap; 443 { 444 register struct file *fp; 445 register struct filedesc *fdp = p->p_fd; 446 struct uio auio; 447 register struct iovec *iov; 448 struct iovec *needfree; 449 struct iovec aiov[UIO_SMALLIOV]; 450 long i, cnt, error = 0; 451 u_int iovlen; 452 #ifdef KTRACE 453 struct iovec *ktriov = NULL; 454 struct uio ktruio; 455 #endif 456 457 if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL) 458 return (EBADF); 459 /* note: can't use iovlen until iovcnt is validated */ 460 iovlen = uap->iovcnt * sizeof (struct iovec); 461 if (uap->iovcnt > UIO_SMALLIOV) { 462 if (uap->iovcnt > UIO_MAXIOV) { 463 needfree = NULL; 464 error = EINVAL; 465 goto done; 466 } 467 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 468 needfree = iov; 469 } else { 470 iov = aiov; 471 needfree = NULL; 472 } 473 auio.uio_iov = iov; 474 auio.uio_iovcnt = uap->iovcnt; 475 auio.uio_rw = UIO_WRITE; 476 auio.uio_segflg = UIO_USERSPACE; 477 auio.uio_procp = p; 478 auio.uio_offset = -1; 479 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 480 goto done; 481 auio.uio_resid = 0; 482 for (i = 0; i < uap->iovcnt; i++) { 483 if (iov->iov_len > INT_MAX - auio.uio_resid) { 484 error = EINVAL; 485 goto done; 486 } 487 auio.uio_resid += iov->iov_len; 488 iov++; 489 } 490 #ifdef KTRACE 491 /* 492 * if tracing, save a copy of iovec and uio 493 */ 494 if (KTRPOINT(p, KTR_GENIO)) { 495 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 496 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 497 ktruio = auio; 498 } 499 #endif 500 cnt = auio.uio_resid; 501 if (fp->f_type == DTYPE_VNODE) 502 bwillwrite(); 503 if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) { 504 if (auio.uio_resid != cnt && (error == ERESTART || 505 error == EINTR || error == EWOULDBLOCK)) 506 error = 0; 507 if (error == EPIPE) 508 psignal(p, SIGPIPE); 509 } 510 cnt -= auio.uio_resid; 511 #ifdef KTRACE 512 if (ktriov != NULL) { 513 if (error == 0) { 514 ktruio.uio_iov = ktriov; 515 ktruio.uio_resid = cnt; 516 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, &ktruio, 517 error); 518 } 519 FREE(ktriov, M_TEMP); 520 } 521 #endif 522 p->p_retval[0] = cnt; 523 done: 524 fdrop(fp, p); 525 if (needfree) 526 FREE(needfree, M_IOV); 527 return (error); 528 } 529 530 /* 531 * Ioctl system call 532 */ 533 #ifndef _SYS_SYSPROTO_H_ 534 struct ioctl_args { 535 int fd; 536 u_long com; 537 caddr_t data; 538 }; 539 #endif 540 /* ARGSUSED */ 541 int 542 ioctl(p, uap) 543 struct proc *p; 544 register struct ioctl_args *uap; 545 { 546 register struct file *fp; 547 register struct filedesc *fdp; 548 register u_long com; 549 int error; 550 register u_int size; 551 caddr_t data, memp; 552 int tmp; 553 #define STK_PARAMS 128 554 union { 555 char stkbuf[STK_PARAMS]; 556 long align; 557 } ubuf; 558 559 fdp = p->p_fd; 560 if ((u_int)uap->fd >= fdp->fd_nfiles || 561 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 562 return (EBADF); 563 564 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 565 return (EBADF); 566 567 switch (com = uap->com) { 568 case FIONCLEX: 569 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 570 return (0); 571 case FIOCLEX: 572 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 573 return (0); 574 } 575 576 /* 577 * Interpret high order word to find amount of data to be 578 * copied to/from the user's address space. 579 */ 580 size = IOCPARM_LEN(com); 581 if (size > IOCPARM_MAX) 582 return (ENOTTY); 583 584 fhold(fp); 585 586 memp = NULL; 587 if (size > sizeof (ubuf.stkbuf)) { 588 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 589 data = memp; 590 } else { 591 data = ubuf.stkbuf; 592 } 593 if (com&IOC_IN) { 594 if (size) { 595 error = copyin(uap->data, data, (u_int)size); 596 if (error) { 597 if (memp) 598 free(memp, M_IOCTLOPS); 599 fdrop(fp, p); 600 return (error); 601 } 602 } else { 603 *(caddr_t *)data = uap->data; 604 } 605 } else if ((com&IOC_OUT) && size) { 606 /* 607 * Zero the buffer so the user always 608 * gets back something deterministic. 609 */ 610 bzero(data, size); 611 } else if (com&IOC_VOID) { 612 *(caddr_t *)data = uap->data; 613 } 614 615 switch (com) { 616 617 case FIONBIO: 618 if ((tmp = *(int *)data)) 619 fp->f_flag |= FNONBLOCK; 620 else 621 fp->f_flag &= ~FNONBLOCK; 622 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); 623 break; 624 625 case FIOASYNC: 626 if ((tmp = *(int *)data)) 627 fp->f_flag |= FASYNC; 628 else 629 fp->f_flag &= ~FASYNC; 630 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); 631 break; 632 633 default: 634 error = fo_ioctl(fp, com, data, p); 635 /* 636 * Copy any data to user, size was 637 * already set and checked above. 638 */ 639 if (error == 0 && (com&IOC_OUT) && size) 640 error = copyout(data, uap->data, (u_int)size); 641 break; 642 } 643 if (memp) 644 free(memp, M_IOCTLOPS); 645 fdrop(fp, p); 646 return (error); 647 } 648 649 static int nselcoll; /* Select collisions since boot */ 650 int selwait; 651 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 652 653 /* 654 * Select system call. 655 */ 656 #ifndef _SYS_SYSPROTO_H_ 657 struct select_args { 658 int nd; 659 fd_set *in, *ou, *ex; 660 struct timeval *tv; 661 }; 662 #endif 663 int 664 select(p, uap) 665 register struct proc *p; 666 register struct select_args *uap; 667 { 668 /* 669 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 670 * infds with the new FD_SETSIZE of 1024, and more than enough for 671 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 672 * of 256. 673 */ 674 fd_mask s_selbits[howmany(2048, NFDBITS)]; 675 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 676 struct timeval atv, rtv, ttv; 677 int s, ncoll, error, timo; 678 u_int nbufbytes, ncpbytes, nfdbits; 679 680 if (uap->nd < 0) 681 return (EINVAL); 682 if (uap->nd > p->p_fd->fd_nfiles) 683 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 684 685 /* 686 * Allocate just enough bits for the non-null fd_sets. Use the 687 * preallocated auto buffer if possible. 688 */ 689 nfdbits = roundup(uap->nd, NFDBITS); 690 ncpbytes = nfdbits / NBBY; 691 nbufbytes = 0; 692 if (uap->in != NULL) 693 nbufbytes += 2 * ncpbytes; 694 if (uap->ou != NULL) 695 nbufbytes += 2 * ncpbytes; 696 if (uap->ex != NULL) 697 nbufbytes += 2 * ncpbytes; 698 if (nbufbytes <= sizeof s_selbits) 699 selbits = &s_selbits[0]; 700 else 701 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 702 703 /* 704 * Assign pointers into the bit buffers and fetch the input bits. 705 * Put the output buffers together so that they can be bzeroed 706 * together. 707 */ 708 sbp = selbits; 709 #define getbits(name, x) \ 710 do { \ 711 if (uap->name == NULL) \ 712 ibits[x] = NULL; \ 713 else { \ 714 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 715 obits[x] = sbp; \ 716 sbp += ncpbytes / sizeof *sbp; \ 717 error = copyin(uap->name, ibits[x], ncpbytes); \ 718 if (error != 0) { \ 719 PROC_LOCK(p); \ 720 goto done; \ 721 } \ 722 } \ 723 } while (0) 724 getbits(in, 0); 725 getbits(ou, 1); 726 getbits(ex, 2); 727 #undef getbits 728 if (nbufbytes != 0) 729 bzero(selbits, nbufbytes / 2); 730 731 if (uap->tv) { 732 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 733 sizeof (atv)); 734 if (error) { 735 PROC_LOCK(p); 736 goto done; 737 } 738 if (itimerfix(&atv)) { 739 error = EINVAL; 740 PROC_LOCK(p); 741 goto done; 742 } 743 getmicrouptime(&rtv); 744 timevaladd(&atv, &rtv); 745 } else { 746 atv.tv_sec = 0; 747 atv.tv_usec = 0; 748 } 749 timo = 0; 750 PROC_LOCK(p); 751 retry: 752 ncoll = nselcoll; 753 p->p_flag |= P_SELECT; 754 PROC_UNLOCK(p); 755 error = selscan(p, ibits, obits, uap->nd); 756 PROC_LOCK(p); 757 if (error || p->p_retval[0]) 758 goto done; 759 if (atv.tv_sec || atv.tv_usec) { 760 getmicrouptime(&rtv); 761 if (timevalcmp(&rtv, &atv, >=)) 762 goto done; 763 ttv = atv; 764 timevalsub(&ttv, &rtv); 765 timo = ttv.tv_sec > 24 * 60 * 60 ? 766 24 * 60 * 60 * hz : tvtohz(&ttv); 767 } 768 s = splhigh(); 769 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 770 splx(s); 771 goto retry; 772 } 773 p->p_flag &= ~P_SELECT; 774 775 error = msleep((caddr_t)&selwait, &p->p_mtx, PSOCK | PCATCH, "select", 776 timo); 777 778 splx(s); 779 if (error == 0) 780 goto retry; 781 done: 782 p->p_flag &= ~P_SELECT; 783 PROC_UNLOCK(p); 784 /* select is not restarted after signals... */ 785 if (error == ERESTART) 786 error = EINTR; 787 if (error == EWOULDBLOCK) 788 error = 0; 789 #define putbits(name, x) \ 790 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 791 error = error2; 792 if (error == 0) { 793 int error2; 794 795 putbits(in, 0); 796 putbits(ou, 1); 797 putbits(ex, 2); 798 #undef putbits 799 } 800 if (selbits != &s_selbits[0]) 801 free(selbits, M_SELECT); 802 return (error); 803 } 804 805 static int 806 selscan(p, ibits, obits, nfd) 807 struct proc *p; 808 fd_mask **ibits, **obits; 809 int nfd; 810 { 811 struct filedesc *fdp = p->p_fd; 812 int msk, i, fd; 813 fd_mask bits; 814 struct file *fp; 815 int n = 0; 816 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 817 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 818 819 for (msk = 0; msk < 3; msk++) { 820 if (ibits[msk] == NULL) 821 continue; 822 for (i = 0; i < nfd; i += NFDBITS) { 823 bits = ibits[msk][i/NFDBITS]; 824 /* ffs(int mask) not portable, fd_mask is long */ 825 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 826 if (!(bits & 1)) 827 continue; 828 fp = fdp->fd_ofiles[fd]; 829 if (fp == NULL) 830 return (EBADF); 831 if (fo_poll(fp, flag[msk], fp->f_cred, p)) { 832 obits[msk][(fd)/NFDBITS] |= 833 ((fd_mask)1 << ((fd) % NFDBITS)); 834 n++; 835 } 836 } 837 } 838 } 839 p->p_retval[0] = n; 840 return (0); 841 } 842 843 /* 844 * Poll system call. 845 */ 846 #ifndef _SYS_SYSPROTO_H_ 847 struct poll_args { 848 struct pollfd *fds; 849 u_int nfds; 850 int timeout; 851 }; 852 #endif 853 int 854 poll(p, uap) 855 register struct proc *p; 856 register struct poll_args *uap; 857 { 858 caddr_t bits; 859 char smallbits[32 * sizeof(struct pollfd)]; 860 struct timeval atv, rtv, ttv; 861 int s, ncoll, error = 0, timo, nfds; 862 size_t ni; 863 864 nfds = SCARG(uap, nfds); 865 /* 866 * This is kinda bogus. We have fd limits, but that is not 867 * really related to the size of the pollfd array. Make sure 868 * we let the process use at least FD_SETSIZE entries and at 869 * least enough for the current limits. We want to be reasonably 870 * safe, but not overly restrictive. 871 */ 872 if (nfds > p->p_rlimit[RLIMIT_NOFILE].rlim_cur && nfds > FD_SETSIZE) 873 return (EINVAL); 874 ni = nfds * sizeof(struct pollfd); 875 if (ni > sizeof(smallbits)) 876 bits = malloc(ni, M_TEMP, M_WAITOK); 877 else 878 bits = smallbits; 879 error = copyin(SCARG(uap, fds), bits, ni); 880 PROC_LOCK(p); 881 if (error) 882 goto done; 883 if (SCARG(uap, timeout) != INFTIM) { 884 atv.tv_sec = SCARG(uap, timeout) / 1000; 885 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 886 if (itimerfix(&atv)) { 887 error = EINVAL; 888 goto done; 889 } 890 getmicrouptime(&rtv); 891 timevaladd(&atv, &rtv); 892 } else { 893 atv.tv_sec = 0; 894 atv.tv_usec = 0; 895 } 896 timo = 0; 897 retry: 898 ncoll = nselcoll; 899 p->p_flag |= P_SELECT; 900 PROC_UNLOCK(p); 901 error = pollscan(p, (struct pollfd *)bits, nfds); 902 PROC_LOCK(p); 903 if (error || p->p_retval[0]) 904 goto done; 905 if (atv.tv_sec || atv.tv_usec) { 906 getmicrouptime(&rtv); 907 if (timevalcmp(&rtv, &atv, >=)) 908 goto done; 909 ttv = atv; 910 timevalsub(&ttv, &rtv); 911 timo = ttv.tv_sec > 24 * 60 * 60 ? 912 24 * 60 * 60 * hz : tvtohz(&ttv); 913 } 914 s = splhigh(); 915 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 916 splx(s); 917 goto retry; 918 } 919 p->p_flag &= ~P_SELECT; 920 error = msleep((caddr_t)&selwait, &p->p_mtx, PSOCK | PCATCH, "poll", 921 timo); 922 splx(s); 923 if (error == 0) 924 goto retry; 925 done: 926 p->p_flag &= ~P_SELECT; 927 PROC_UNLOCK(p); 928 /* poll is not restarted after signals... */ 929 if (error == ERESTART) 930 error = EINTR; 931 if (error == EWOULDBLOCK) 932 error = 0; 933 if (error == 0) { 934 error = copyout(bits, SCARG(uap, fds), ni); 935 if (error) 936 goto out; 937 } 938 out: 939 if (ni > sizeof(smallbits)) 940 free(bits, M_TEMP); 941 return (error); 942 } 943 944 static int 945 pollscan(p, fds, nfd) 946 struct proc *p; 947 struct pollfd *fds; 948 int nfd; 949 { 950 register struct filedesc *fdp = p->p_fd; 951 int i; 952 struct file *fp; 953 int n = 0; 954 955 for (i = 0; i < nfd; i++, fds++) { 956 if (fds->fd >= fdp->fd_nfiles) { 957 fds->revents = POLLNVAL; 958 n++; 959 } else if (fds->fd < 0) { 960 fds->revents = 0; 961 } else { 962 fp = fdp->fd_ofiles[fds->fd]; 963 if (fp == NULL) { 964 fds->revents = POLLNVAL; 965 n++; 966 } else { 967 /* 968 * Note: backend also returns POLLHUP and 969 * POLLERR if appropriate. 970 */ 971 fds->revents = fo_poll(fp, fds->events, 972 fp->f_cred, p); 973 if (fds->revents != 0) 974 n++; 975 } 976 } 977 } 978 p->p_retval[0] = n; 979 return (0); 980 } 981 982 /* 983 * OpenBSD poll system call. 984 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 985 */ 986 #ifndef _SYS_SYSPROTO_H_ 987 struct openbsd_poll_args { 988 struct pollfd *fds; 989 u_int nfds; 990 int timeout; 991 }; 992 #endif 993 int 994 openbsd_poll(p, uap) 995 register struct proc *p; 996 register struct openbsd_poll_args *uap; 997 { 998 return (poll(p, (struct poll_args *)uap)); 999 } 1000 1001 /*ARGSUSED*/ 1002 int 1003 seltrue(dev, events, p) 1004 dev_t dev; 1005 int events; 1006 struct proc *p; 1007 { 1008 1009 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 1010 } 1011 1012 /* 1013 * Record a select request. 1014 */ 1015 void 1016 selrecord(selector, sip) 1017 struct proc *selector; 1018 struct selinfo *sip; 1019 { 1020 struct proc *p; 1021 pid_t mypid; 1022 1023 mypid = selector->p_pid; 1024 if (sip->si_pid == mypid) 1025 return; 1026 if (sip->si_pid && (p = pfind(sip->si_pid))) { 1027 mtx_lock_spin(&sched_lock); 1028 if (p->p_wchan == (caddr_t)&selwait) { 1029 mtx_unlock_spin(&sched_lock); 1030 sip->si_flags |= SI_COLL; 1031 return; 1032 } 1033 mtx_unlock_spin(&sched_lock); 1034 } 1035 sip->si_pid = mypid; 1036 } 1037 1038 /* 1039 * Do a wakeup when a selectable event occurs. 1040 */ 1041 void 1042 selwakeup(sip) 1043 register struct selinfo *sip; 1044 { 1045 register struct proc *p; 1046 1047 if (sip->si_pid == 0) 1048 return; 1049 if (sip->si_flags & SI_COLL) { 1050 nselcoll++; 1051 sip->si_flags &= ~SI_COLL; 1052 wakeup((caddr_t)&selwait); 1053 } 1054 p = pfind(sip->si_pid); 1055 sip->si_pid = 0; 1056 if (p != NULL) { 1057 mtx_lock_spin(&sched_lock); 1058 if (p->p_wchan == (caddr_t)&selwait) { 1059 if (p->p_stat == SSLEEP) 1060 setrunnable(p); 1061 else 1062 unsleep(p); 1063 mtx_unlock_spin(&sched_lock); 1064 } else { 1065 mtx_unlock_spin(&sched_lock); 1066 PROC_LOCK(p); 1067 p->p_flag &= ~P_SELECT; 1068 PROC_UNLOCK(p); 1069 } 1070 } 1071 } 1072