1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)sys_generic.c 8.5 (Berkeley) 1/21/94 39 * $FreeBSD$ 40 */ 41 42 #include "opt_ktrace.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/filio.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/proc.h> 52 #include <sys/signalvar.h> 53 #include <sys/socketvar.h> 54 #include <sys/uio.h> 55 #include <sys/kernel.h> 56 #include <sys/malloc.h> 57 #include <sys/poll.h> 58 #include <sys/sysctl.h> 59 #include <sys/sysent.h> 60 #ifdef KTRACE 61 #include <sys/ktrace.h> 62 #endif 63 64 #include <machine/limits.h> 65 66 static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); 67 static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); 68 MALLOC_DEFINE(M_IOV, "iov", "large iov's"); 69 70 static int pollscan __P((struct proc *, struct pollfd *, int)); 71 static int selscan __P((struct proc *, fd_mask **, fd_mask **, int)); 72 static int dofileread __P((struct proc *, struct file *, int, void *, 73 size_t, off_t, int)); 74 static int dofilewrite __P((struct proc *, struct file *, int, 75 const void *, size_t, off_t, int)); 76 77 struct file* 78 getfp(fdp, fd, flag) 79 struct filedesc* fdp; 80 int fd, flag; 81 { 82 struct file* fp; 83 84 if (((u_int)fd) >= fdp->fd_nfiles || 85 (fp = fdp->fd_ofiles[fd]) == NULL || 86 (fp->f_flag & flag) == 0) 87 return (NULL); 88 return (fp); 89 } 90 91 /* 92 * Read system call. 93 */ 94 #ifndef _SYS_SYSPROTO_H_ 95 struct read_args { 96 int fd; 97 void *buf; 98 size_t nbyte; 99 }; 100 #endif 101 int 102 read(p, uap) 103 struct proc *p; 104 register struct read_args *uap; 105 { 106 register struct file *fp; 107 108 if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) 109 return (EBADF); 110 return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); 111 } 112 113 /* 114 * Pread system call 115 */ 116 #ifndef _SYS_SYSPROTO_H_ 117 struct pread_args { 118 int fd; 119 void *buf; 120 size_t nbyte; 121 int pad; 122 off_t offset; 123 }; 124 #endif 125 int 126 pread(p, uap) 127 struct proc *p; 128 register struct pread_args *uap; 129 { 130 register struct file *fp; 131 132 if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) 133 return (EBADF); 134 if (fp->f_type != DTYPE_VNODE) 135 return (ESPIPE); 136 return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, 137 FOF_OFFSET)); 138 } 139 140 /* 141 * Code common for read and pread 142 */ 143 int 144 dofileread(p, fp, fd, buf, nbyte, offset, flags) 145 struct proc *p; 146 struct file *fp; 147 int fd, flags; 148 void *buf; 149 size_t nbyte; 150 off_t offset; 151 { 152 struct uio auio; 153 struct iovec aiov; 154 long cnt, error = 0; 155 #ifdef KTRACE 156 struct iovec ktriov; 157 #endif 158 159 aiov.iov_base = (caddr_t)buf; 160 aiov.iov_len = nbyte; 161 auio.uio_iov = &aiov; 162 auio.uio_iovcnt = 1; 163 auio.uio_offset = offset; 164 if (nbyte > INT_MAX) 165 return (EINVAL); 166 auio.uio_resid = nbyte; 167 auio.uio_rw = UIO_READ; 168 auio.uio_segflg = UIO_USERSPACE; 169 auio.uio_procp = p; 170 #ifdef KTRACE 171 /* 172 * if tracing, save a copy of iovec 173 */ 174 if (KTRPOINT(p, KTR_GENIO)) 175 ktriov = aiov; 176 #endif 177 cnt = nbyte; 178 if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) 179 if (auio.uio_resid != cnt && (error == ERESTART || 180 error == EINTR || error == EWOULDBLOCK)) 181 error = 0; 182 cnt -= auio.uio_resid; 183 #ifdef KTRACE 184 if (KTRPOINT(p, KTR_GENIO) && error == 0) 185 ktrgenio(p->p_tracep, fd, UIO_READ, &ktriov, cnt, error); 186 #endif 187 p->p_retval[0] = cnt; 188 return (error); 189 } 190 191 /* 192 * Scatter read system call. 193 */ 194 #ifndef _SYS_SYSPROTO_H_ 195 struct readv_args { 196 int fd; 197 struct iovec *iovp; 198 u_int iovcnt; 199 }; 200 #endif 201 int 202 readv(p, uap) 203 struct proc *p; 204 register struct readv_args *uap; 205 { 206 register struct file *fp; 207 register struct filedesc *fdp = p->p_fd; 208 struct uio auio; 209 register struct iovec *iov; 210 struct iovec *needfree; 211 struct iovec aiov[UIO_SMALLIOV]; 212 long i, cnt, error = 0; 213 u_int iovlen; 214 #ifdef KTRACE 215 struct iovec *ktriov = NULL; 216 #endif 217 218 if ((fp = getfp(fdp, uap->fd, FREAD)) == NULL) 219 return (EBADF); 220 /* note: can't use iovlen until iovcnt is validated */ 221 iovlen = uap->iovcnt * sizeof (struct iovec); 222 if (uap->iovcnt > UIO_SMALLIOV) { 223 if (uap->iovcnt > UIO_MAXIOV) 224 return (EINVAL); 225 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 226 needfree = iov; 227 } else { 228 iov = aiov; 229 needfree = NULL; 230 } 231 auio.uio_iov = iov; 232 auio.uio_iovcnt = uap->iovcnt; 233 auio.uio_rw = UIO_READ; 234 auio.uio_segflg = UIO_USERSPACE; 235 auio.uio_procp = p; 236 auio.uio_offset = -1; 237 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 238 goto done; 239 auio.uio_resid = 0; 240 for (i = 0; i < uap->iovcnt; i++) { 241 if (iov->iov_len > INT_MAX - auio.uio_resid) { 242 error = EINVAL; 243 goto done; 244 } 245 auio.uio_resid += iov->iov_len; 246 iov++; 247 } 248 #ifdef KTRACE 249 /* 250 * if tracing, save a copy of iovec 251 */ 252 if (KTRPOINT(p, KTR_GENIO)) { 253 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 254 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 255 } 256 #endif 257 cnt = auio.uio_resid; 258 if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) 259 if (auio.uio_resid != cnt && (error == ERESTART || 260 error == EINTR || error == EWOULDBLOCK)) 261 error = 0; 262 cnt -= auio.uio_resid; 263 #ifdef KTRACE 264 if (ktriov != NULL) { 265 if (error == 0) 266 ktrgenio(p->p_tracep, uap->fd, UIO_READ, ktriov, 267 cnt, error); 268 FREE(ktriov, M_TEMP); 269 } 270 #endif 271 p->p_retval[0] = cnt; 272 done: 273 if (needfree) 274 FREE(needfree, M_IOV); 275 return (error); 276 } 277 278 /* 279 * Write system call 280 */ 281 #ifndef _SYS_SYSPROTO_H_ 282 struct write_args { 283 int fd; 284 const void *buf; 285 size_t nbyte; 286 }; 287 #endif 288 int 289 write(p, uap) 290 struct proc *p; 291 register struct write_args *uap; 292 { 293 register struct file *fp; 294 295 if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) 296 return (EBADF); 297 return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); 298 } 299 300 /* 301 * Pwrite system call 302 */ 303 #ifndef _SYS_SYSPROTO_H_ 304 struct pwrite_args { 305 int fd; 306 const void *buf; 307 size_t nbyte; 308 int pad; 309 off_t offset; 310 }; 311 #endif 312 int 313 pwrite(p, uap) 314 struct proc *p; 315 register struct pwrite_args *uap; 316 { 317 register struct file *fp; 318 319 if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) 320 return (EBADF); 321 if (fp->f_type != DTYPE_VNODE) 322 return (ESPIPE); 323 return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, 324 FOF_OFFSET)); 325 } 326 327 static int 328 dofilewrite(p, fp, fd, buf, nbyte, offset, flags) 329 struct proc *p; 330 struct file *fp; 331 int fd, flags; 332 const void *buf; 333 size_t nbyte; 334 off_t offset; 335 { 336 struct uio auio; 337 struct iovec aiov; 338 long cnt, error = 0; 339 #ifdef KTRACE 340 struct iovec ktriov; 341 #endif 342 343 aiov.iov_base = (void *)buf; 344 aiov.iov_len = nbyte; 345 auio.uio_iov = &aiov; 346 auio.uio_iovcnt = 1; 347 auio.uio_offset = offset; 348 if (nbyte > INT_MAX) 349 return (EINVAL); 350 auio.uio_resid = nbyte; 351 auio.uio_rw = UIO_WRITE; 352 auio.uio_segflg = UIO_USERSPACE; 353 auio.uio_procp = p; 354 #ifdef KTRACE 355 /* 356 * if tracing, save a copy of iovec 357 */ 358 if (KTRPOINT(p, KTR_GENIO)) 359 ktriov = aiov; 360 #endif 361 cnt = nbyte; 362 if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { 363 if (auio.uio_resid != cnt && (error == ERESTART || 364 error == EINTR || error == EWOULDBLOCK)) 365 error = 0; 366 if (error == EPIPE) 367 psignal(p, SIGPIPE); 368 } 369 cnt -= auio.uio_resid; 370 #ifdef KTRACE 371 if (KTRPOINT(p, KTR_GENIO) && error == 0) 372 ktrgenio(p->p_tracep, fd, UIO_WRITE, 373 &ktriov, cnt, error); 374 #endif 375 p->p_retval[0] = cnt; 376 return (error); 377 } 378 379 /* 380 * Gather write system call 381 */ 382 #ifndef _SYS_SYSPROTO_H_ 383 struct writev_args { 384 int fd; 385 struct iovec *iovp; 386 u_int iovcnt; 387 }; 388 #endif 389 int 390 writev(p, uap) 391 struct proc *p; 392 register struct writev_args *uap; 393 { 394 register struct file *fp; 395 register struct filedesc *fdp = p->p_fd; 396 struct uio auio; 397 register struct iovec *iov; 398 struct iovec *needfree; 399 struct iovec aiov[UIO_SMALLIOV]; 400 long i, cnt, error = 0; 401 u_int iovlen; 402 #ifdef KTRACE 403 struct iovec *ktriov = NULL; 404 #endif 405 406 if ((fp = getfp(fdp, uap->fd, FWRITE)) == NULL) 407 return (EBADF); 408 fhold(fp); 409 /* note: can't use iovlen until iovcnt is validated */ 410 iovlen = uap->iovcnt * sizeof (struct iovec); 411 if (uap->iovcnt > UIO_SMALLIOV) { 412 if (uap->iovcnt > UIO_MAXIOV) { 413 needfree = NULL; 414 error = EINVAL; 415 goto done; 416 } 417 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 418 needfree = iov; 419 } else { 420 iov = aiov; 421 needfree = NULL; 422 } 423 auio.uio_iov = iov; 424 auio.uio_iovcnt = uap->iovcnt; 425 auio.uio_rw = UIO_WRITE; 426 auio.uio_segflg = UIO_USERSPACE; 427 auio.uio_procp = p; 428 auio.uio_offset = -1; 429 if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) 430 goto done; 431 auio.uio_resid = 0; 432 for (i = 0; i < uap->iovcnt; i++) { 433 if (iov->iov_len > INT_MAX - auio.uio_resid) { 434 error = EINVAL; 435 goto done; 436 } 437 auio.uio_resid += iov->iov_len; 438 iov++; 439 } 440 #ifdef KTRACE 441 /* 442 * if tracing, save a copy of iovec 443 */ 444 if (KTRPOINT(p, KTR_GENIO)) { 445 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK); 446 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen); 447 } 448 #endif 449 cnt = auio.uio_resid; 450 if ((error = fo_write(fp, &auio, fp->f_cred, 0, p))) { 451 if (auio.uio_resid != cnt && (error == ERESTART || 452 error == EINTR || error == EWOULDBLOCK)) 453 error = 0; 454 if (error == EPIPE) 455 psignal(p, SIGPIPE); 456 } 457 cnt -= auio.uio_resid; 458 #ifdef KTRACE 459 if (ktriov != NULL) { 460 if (error == 0) 461 ktrgenio(p->p_tracep, uap->fd, UIO_WRITE, 462 ktriov, cnt, error); 463 FREE(ktriov, M_TEMP); 464 } 465 #endif 466 p->p_retval[0] = cnt; 467 done: 468 fdrop(fp, p); 469 if (needfree) 470 FREE(needfree, M_IOV); 471 return (error); 472 } 473 474 /* 475 * Ioctl system call 476 */ 477 #ifndef _SYS_SYSPROTO_H_ 478 struct ioctl_args { 479 int fd; 480 u_long com; 481 caddr_t data; 482 }; 483 #endif 484 /* ARGSUSED */ 485 int 486 ioctl(p, uap) 487 struct proc *p; 488 register struct ioctl_args *uap; 489 { 490 register struct file *fp; 491 register struct filedesc *fdp; 492 register u_long com; 493 int error; 494 register u_int size; 495 caddr_t data, memp; 496 int tmp; 497 #define STK_PARAMS 128 498 union { 499 char stkbuf[STK_PARAMS]; 500 long align; 501 } ubuf; 502 503 fdp = p->p_fd; 504 if ((u_int)uap->fd >= fdp->fd_nfiles || 505 (fp = fdp->fd_ofiles[uap->fd]) == NULL) 506 return (EBADF); 507 508 if ((fp->f_flag & (FREAD | FWRITE)) == 0) 509 return (EBADF); 510 511 switch (com = uap->com) { 512 case FIONCLEX: 513 fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; 514 return (0); 515 case FIOCLEX: 516 fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; 517 return (0); 518 } 519 520 /* 521 * Interpret high order word to find amount of data to be 522 * copied to/from the user's address space. 523 */ 524 size = IOCPARM_LEN(com); 525 if (size > IOCPARM_MAX) 526 return (ENOTTY); 527 memp = NULL; 528 if (size > sizeof (ubuf.stkbuf)) { 529 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); 530 data = memp; 531 } else 532 data = ubuf.stkbuf; 533 if (com&IOC_IN) { 534 if (size) { 535 error = copyin(uap->data, data, (u_int)size); 536 if (error) { 537 if (memp) 538 free(memp, M_IOCTLOPS); 539 return (error); 540 } 541 } else 542 *(caddr_t *)data = uap->data; 543 } else if ((com&IOC_OUT) && size) 544 /* 545 * Zero the buffer so the user always 546 * gets back something deterministic. 547 */ 548 bzero(data, size); 549 else if (com&IOC_VOID) 550 *(caddr_t *)data = uap->data; 551 552 switch (com) { 553 554 case FIONBIO: 555 if ((tmp = *(int *)data)) 556 fp->f_flag |= FNONBLOCK; 557 else 558 fp->f_flag &= ~FNONBLOCK; 559 error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); 560 break; 561 562 case FIOASYNC: 563 if ((tmp = *(int *)data)) 564 fp->f_flag |= FASYNC; 565 else 566 fp->f_flag &= ~FASYNC; 567 error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); 568 break; 569 570 default: 571 error = fo_ioctl(fp, com, data, p); 572 /* 573 * Copy any data to user, size was 574 * already set and checked above. 575 */ 576 if (error == 0 && (com&IOC_OUT) && size) 577 error = copyout(data, uap->data, (u_int)size); 578 break; 579 } 580 if (memp) 581 free(memp, M_IOCTLOPS); 582 return (error); 583 } 584 585 static int nselcoll; /* Select collisions since boot */ 586 int selwait; 587 SYSCTL_INT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, ""); 588 589 /* 590 * Select system call. 591 */ 592 #ifndef _SYS_SYSPROTO_H_ 593 struct select_args { 594 int nd; 595 fd_set *in, *ou, *ex; 596 struct timeval *tv; 597 }; 598 #endif 599 int 600 select(p, uap) 601 register struct proc *p; 602 register struct select_args *uap; 603 { 604 /* 605 * The magic 2048 here is chosen to be just enough for FD_SETSIZE 606 * infds with the new FD_SETSIZE of 1024, and more than enough for 607 * FD_SETSIZE infds, outfds and exceptfds with the old FD_SETSIZE 608 * of 256. 609 */ 610 fd_mask s_selbits[howmany(2048, NFDBITS)]; 611 fd_mask *ibits[3], *obits[3], *selbits, *sbp; 612 struct timeval atv, rtv, ttv; 613 int s, ncoll, error, timo; 614 u_int nbufbytes, ncpbytes, nfdbits; 615 616 if (uap->nd < 0) 617 return (EINVAL); 618 if (uap->nd > p->p_fd->fd_nfiles) 619 uap->nd = p->p_fd->fd_nfiles; /* forgiving; slightly wrong */ 620 621 /* 622 * Allocate just enough bits for the non-null fd_sets. Use the 623 * preallocated auto buffer if possible. 624 */ 625 nfdbits = roundup(uap->nd, NFDBITS); 626 ncpbytes = nfdbits / NBBY; 627 nbufbytes = 0; 628 if (uap->in != NULL) 629 nbufbytes += 2 * ncpbytes; 630 if (uap->ou != NULL) 631 nbufbytes += 2 * ncpbytes; 632 if (uap->ex != NULL) 633 nbufbytes += 2 * ncpbytes; 634 if (nbufbytes <= sizeof s_selbits) 635 selbits = &s_selbits[0]; 636 else 637 selbits = malloc(nbufbytes, M_SELECT, M_WAITOK); 638 639 /* 640 * Assign pointers into the bit buffers and fetch the input bits. 641 * Put the output buffers together so that they can be bzeroed 642 * together. 643 */ 644 sbp = selbits; 645 #define getbits(name, x) \ 646 do { \ 647 if (uap->name == NULL) \ 648 ibits[x] = NULL; \ 649 else { \ 650 ibits[x] = sbp + nbufbytes / 2 / sizeof *sbp; \ 651 obits[x] = sbp; \ 652 sbp += ncpbytes / sizeof *sbp; \ 653 error = copyin(uap->name, ibits[x], ncpbytes); \ 654 if (error != 0) \ 655 goto done; \ 656 } \ 657 } while (0) 658 getbits(in, 0); 659 getbits(ou, 1); 660 getbits(ex, 2); 661 #undef getbits 662 if (nbufbytes != 0) 663 bzero(selbits, nbufbytes / 2); 664 665 if (uap->tv) { 666 error = copyin((caddr_t)uap->tv, (caddr_t)&atv, 667 sizeof (atv)); 668 if (error) 669 goto done; 670 if (itimerfix(&atv)) { 671 error = EINVAL; 672 goto done; 673 } 674 getmicrouptime(&rtv); 675 timevaladd(&atv, &rtv); 676 } else 677 atv.tv_sec = 0; 678 timo = 0; 679 retry: 680 ncoll = nselcoll; 681 p->p_flag |= P_SELECT; 682 error = selscan(p, ibits, obits, uap->nd); 683 if (error || p->p_retval[0]) 684 goto done; 685 if (atv.tv_sec) { 686 getmicrouptime(&rtv); 687 if (timevalcmp(&rtv, &atv, >=)) 688 goto done; 689 ttv = atv; 690 timevalsub(&ttv, &rtv); 691 timo = ttv.tv_sec > 24 * 60 * 60 ? 692 24 * 60 * 60 * hz : tvtohz(&ttv); 693 } 694 s = splhigh(); 695 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 696 splx(s); 697 goto retry; 698 } 699 p->p_flag &= ~P_SELECT; 700 701 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo); 702 703 splx(s); 704 if (error == 0) 705 goto retry; 706 done: 707 p->p_flag &= ~P_SELECT; 708 /* select is not restarted after signals... */ 709 if (error == ERESTART) 710 error = EINTR; 711 if (error == EWOULDBLOCK) 712 error = 0; 713 #define putbits(name, x) \ 714 if (uap->name && (error2 = copyout(obits[x], uap->name, ncpbytes))) \ 715 error = error2; 716 if (error == 0) { 717 int error2; 718 719 putbits(in, 0); 720 putbits(ou, 1); 721 putbits(ex, 2); 722 #undef putbits 723 } 724 if (selbits != &s_selbits[0]) 725 free(selbits, M_SELECT); 726 return (error); 727 } 728 729 static int 730 selscan(p, ibits, obits, nfd) 731 struct proc *p; 732 fd_mask **ibits, **obits; 733 int nfd; 734 { 735 struct filedesc *fdp = p->p_fd; 736 int msk, i, fd; 737 fd_mask bits; 738 struct file *fp; 739 int n = 0; 740 /* Note: backend also returns POLLHUP/POLLERR if appropriate. */ 741 static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND }; 742 743 for (msk = 0; msk < 3; msk++) { 744 if (ibits[msk] == NULL) 745 continue; 746 for (i = 0; i < nfd; i += NFDBITS) { 747 bits = ibits[msk][i/NFDBITS]; 748 /* ffs(int mask) not portable, fd_mask is long */ 749 for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { 750 if (!(bits & 1)) 751 continue; 752 fp = fdp->fd_ofiles[fd]; 753 if (fp == NULL) 754 return (EBADF); 755 if (fo_poll(fp, flag[msk], fp->f_cred, p)) { 756 obits[msk][(fd)/NFDBITS] |= 757 ((fd_mask)1 << ((fd) % NFDBITS)); 758 n++; 759 } 760 } 761 } 762 } 763 p->p_retval[0] = n; 764 return (0); 765 } 766 767 /* 768 * Poll system call. 769 */ 770 #ifndef _SYS_SYSPROTO_H_ 771 struct poll_args { 772 struct pollfd *fds; 773 u_int nfds; 774 int timeout; 775 }; 776 #endif 777 int 778 poll(p, uap) 779 register struct proc *p; 780 register struct poll_args *uap; 781 { 782 caddr_t bits; 783 char smallbits[32 * sizeof(struct pollfd)]; 784 struct timeval atv, rtv, ttv; 785 int s, ncoll, error = 0, timo; 786 size_t ni; 787 788 if (SCARG(uap, nfds) > p->p_fd->fd_nfiles) { 789 /* forgiving; slightly wrong */ 790 SCARG(uap, nfds) = p->p_fd->fd_nfiles; 791 } 792 ni = SCARG(uap, nfds) * sizeof(struct pollfd); 793 if (ni > sizeof(smallbits)) 794 bits = malloc(ni, M_TEMP, M_WAITOK); 795 else 796 bits = smallbits; 797 error = copyin(SCARG(uap, fds), bits, ni); 798 if (error) 799 goto done; 800 if (SCARG(uap, timeout) != INFTIM) { 801 atv.tv_sec = SCARG(uap, timeout) / 1000; 802 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000; 803 if (itimerfix(&atv)) { 804 error = EINVAL; 805 goto done; 806 } 807 getmicrouptime(&rtv); 808 timevaladd(&atv, &rtv); 809 } else 810 atv.tv_sec = 0; 811 timo = 0; 812 retry: 813 ncoll = nselcoll; 814 p->p_flag |= P_SELECT; 815 error = pollscan(p, (struct pollfd *)bits, SCARG(uap, nfds)); 816 if (error || p->p_retval[0]) 817 goto done; 818 if (atv.tv_sec) { 819 getmicrouptime(&rtv); 820 if (timevalcmp(&rtv, &atv, >=)) 821 goto done; 822 ttv = atv; 823 timevalsub(&ttv, &rtv); 824 timo = ttv.tv_sec > 24 * 60 * 60 ? 825 24 * 60 * 60 * hz : tvtohz(&ttv); 826 } 827 s = splhigh(); 828 if ((p->p_flag & P_SELECT) == 0 || nselcoll != ncoll) { 829 splx(s); 830 goto retry; 831 } 832 p->p_flag &= ~P_SELECT; 833 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo); 834 splx(s); 835 if (error == 0) 836 goto retry; 837 done: 838 p->p_flag &= ~P_SELECT; 839 /* poll is not restarted after signals... */ 840 if (error == ERESTART) 841 error = EINTR; 842 if (error == EWOULDBLOCK) 843 error = 0; 844 if (error == 0) { 845 error = copyout(bits, SCARG(uap, fds), ni); 846 if (error) 847 goto out; 848 } 849 out: 850 if (ni > sizeof(smallbits)) 851 free(bits, M_TEMP); 852 return (error); 853 } 854 855 static int 856 pollscan(p, fds, nfd) 857 struct proc *p; 858 struct pollfd *fds; 859 int nfd; 860 { 861 register struct filedesc *fdp = p->p_fd; 862 int i; 863 struct file *fp; 864 int n = 0; 865 866 for (i = 0; i < nfd; i++, fds++) { 867 if (fds->fd >= fdp->fd_nfiles) { 868 fds->revents = POLLNVAL; 869 n++; 870 } else if (fds->fd < 0) { 871 fds->revents = 0; 872 } else { 873 fp = fdp->fd_ofiles[fds->fd]; 874 if (fp == 0) { 875 fds->revents = POLLNVAL; 876 n++; 877 } else { 878 /* 879 * Note: backend also returns POLLHUP and 880 * POLLERR if appropriate. 881 */ 882 fds->revents = fo_poll(fp, fds->events, 883 fp->f_cred, p); 884 if (fds->revents != 0) 885 n++; 886 } 887 } 888 } 889 p->p_retval[0] = n; 890 return (0); 891 } 892 893 /* 894 * OpenBSD poll system call. 895 * XXX this isn't quite a true representation.. OpenBSD uses select ops. 896 */ 897 #ifndef _SYS_SYSPROTO_H_ 898 struct openbsd_poll_args { 899 struct pollfd *fds; 900 u_int nfds; 901 int timeout; 902 }; 903 #endif 904 int 905 openbsd_poll(p, uap) 906 register struct proc *p; 907 register struct openbsd_poll_args *uap; 908 { 909 return (poll(p, (struct poll_args *)uap)); 910 } 911 912 /*ARGSUSED*/ 913 int 914 seltrue(dev, events, p) 915 dev_t dev; 916 int events; 917 struct proc *p; 918 { 919 920 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM)); 921 } 922 923 /* 924 * Record a select request. 925 */ 926 void 927 selrecord(selector, sip) 928 struct proc *selector; 929 struct selinfo *sip; 930 { 931 struct proc *p; 932 pid_t mypid; 933 934 mypid = selector->p_pid; 935 if (sip->si_pid == mypid) 936 return; 937 if (sip->si_pid && (p = pfind(sip->si_pid)) && 938 p->p_wchan == (caddr_t)&selwait) 939 sip->si_flags |= SI_COLL; 940 else 941 sip->si_pid = mypid; 942 } 943 944 /* 945 * Do a wakeup when a selectable event occurs. 946 */ 947 void 948 selwakeup(sip) 949 register struct selinfo *sip; 950 { 951 register struct proc *p; 952 int s; 953 954 if (sip->si_pid == 0) 955 return; 956 if (sip->si_flags & SI_COLL) { 957 nselcoll++; 958 sip->si_flags &= ~SI_COLL; 959 wakeup((caddr_t)&selwait); 960 } 961 p = pfind(sip->si_pid); 962 sip->si_pid = 0; 963 if (p != NULL) { 964 s = splhigh(); 965 if (p->p_wchan == (caddr_t)&selwait) { 966 if (p->p_stat == SSLEEP) 967 setrunnable(p); 968 else 969 unsleep(p); 970 } else if (p->p_flag & P_SELECT) 971 p->p_flag &= ~P_SELECT; 972 splx(s); 973 } 974 } 975