1 /*- 2 * Copyright (c) 2007 Roman Divacky 3 * Copyright (c) 2014 Dmitry Chagin 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/imgact.h> 36 #include <sys/kernel.h> 37 #include <sys/limits.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/capsicum.h> 41 #include <sys/types.h> 42 #include <sys/user.h> 43 #include <sys/file.h> 44 #include <sys/filedesc.h> 45 #include <sys/filio.h> 46 #include <sys/errno.h> 47 #include <sys/event.h> 48 #include <sys/poll.h> 49 #include <sys/proc.h> 50 #include <sys/selinfo.h> 51 #include <sys/sx.h> 52 #include <sys/syscallsubr.h> 53 #include <sys/timespec.h> 54 55 #ifdef COMPAT_LINUX32 56 #include <machine/../linux32/linux.h> 57 #include <machine/../linux32/linux32_proto.h> 58 #else 59 #include <machine/../linux/linux.h> 60 #include <machine/../linux/linux_proto.h> 61 #endif 62 63 #include <compat/linux/linux_emul.h> 64 #include <compat/linux/linux_event.h> 65 #include <compat/linux/linux_file.h> 66 #include <compat/linux/linux_util.h> 67 68 /* 69 * epoll defines 'struct epoll_event' with the field 'data' as 64 bits 70 * on all architectures. But on 32 bit architectures BSD 'struct kevent' only 71 * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied 72 * data verbatuim. Therefore we allocate 64-bit memory block to pass 73 * user supplied data for every file descriptor. 74 */ 75 76 typedef uint64_t epoll_udata_t; 77 78 struct epoll_emuldata { 79 uint32_t fdc; /* epoll udata max index */ 80 epoll_udata_t udata[1]; /* epoll user data vector */ 81 }; 82 83 #define EPOLL_DEF_SZ 16 84 #define EPOLL_SIZE(fdn) \ 85 (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t)) 86 87 struct epoll_event { 88 uint32_t events; 89 epoll_udata_t data; 90 } 91 #if defined(__amd64__) 92 __attribute__((packed)) 93 #endif 94 ; 95 96 #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 97 98 static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata); 99 static int epoll_to_kevent(struct thread *td, struct file *epfp, 100 int fd, struct epoll_event *l_event, int *kev_flags, 101 struct kevent *kevent, int *nkevents); 102 static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 103 static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 104 static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 105 static int epoll_delete_event(struct thread *td, struct file *epfp, 106 int fd, int filter); 107 static int epoll_delete_all_events(struct thread *td, struct file *epfp, 108 int fd); 109 110 struct epoll_copyin_args { 111 struct kevent *changelist; 112 }; 113 114 struct epoll_copyout_args { 115 struct epoll_event *leventlist; 116 struct proc *p; 117 uint32_t count; 118 int error; 119 }; 120 121 /* eventfd */ 122 typedef uint64_t eventfd_t; 123 124 static fo_rdwr_t eventfd_read; 125 static fo_rdwr_t eventfd_write; 126 static fo_truncate_t eventfd_truncate; 127 static fo_ioctl_t eventfd_ioctl; 128 static fo_poll_t eventfd_poll; 129 static fo_kqfilter_t eventfd_kqfilter; 130 static fo_stat_t eventfd_stat; 131 static fo_close_t eventfd_close; 132 static fo_fill_kinfo_t eventfd_fill_kinfo; 133 134 static struct fileops eventfdops = { 135 .fo_read = eventfd_read, 136 .fo_write = eventfd_write, 137 .fo_truncate = eventfd_truncate, 138 .fo_ioctl = eventfd_ioctl, 139 .fo_poll = eventfd_poll, 140 .fo_kqfilter = eventfd_kqfilter, 141 .fo_stat = eventfd_stat, 142 .fo_close = eventfd_close, 143 .fo_chmod = invfo_chmod, 144 .fo_chown = invfo_chown, 145 .fo_sendfile = invfo_sendfile, 146 .fo_fill_kinfo = eventfd_fill_kinfo, 147 .fo_flags = DFLAG_PASSABLE 148 }; 149 150 static void filt_eventfddetach(struct knote *kn); 151 static int filt_eventfdread(struct knote *kn, long hint); 152 static int filt_eventfdwrite(struct knote *kn, long hint); 153 154 static struct filterops eventfd_rfiltops = { 155 .f_isfd = 1, 156 .f_detach = filt_eventfddetach, 157 .f_event = filt_eventfdread 158 }; 159 static struct filterops eventfd_wfiltops = { 160 .f_isfd = 1, 161 .f_detach = filt_eventfddetach, 162 .f_event = filt_eventfdwrite 163 }; 164 165 struct eventfd { 166 eventfd_t efd_count; 167 uint32_t efd_flags; 168 struct selinfo efd_sel; 169 struct mtx efd_lock; 170 }; 171 172 static int eventfd_create(struct thread *td, uint32_t initval, int flags); 173 174 175 static void 176 epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata) 177 { 178 struct linux_pemuldata *pem; 179 struct epoll_emuldata *emd; 180 struct proc *p; 181 182 p = td->td_proc; 183 184 pem = pem_find(p); 185 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 186 187 LINUX_PEM_XLOCK(pem); 188 if (pem->epoll == NULL) { 189 emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 190 emd->fdc = fd; 191 pem->epoll = emd; 192 } else { 193 emd = pem->epoll; 194 if (fd > emd->fdc) { 195 emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 196 emd->fdc = fd; 197 pem->epoll = emd; 198 } 199 } 200 emd->udata[fd] = udata; 201 LINUX_PEM_XUNLOCK(pem); 202 } 203 204 static int 205 epoll_create_common(struct thread *td, int flags) 206 { 207 int error; 208 209 error = kern_kqueue(td, flags, NULL); 210 if (error) 211 return (error); 212 213 epoll_fd_install(td, EPOLL_DEF_SZ, 0); 214 215 return (0); 216 } 217 218 int 219 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 220 { 221 222 /* 223 * args->size is unused. Linux just tests it 224 * and then forgets it as well. 225 */ 226 if (args->size <= 0) 227 return (EINVAL); 228 229 return (epoll_create_common(td, 0)); 230 } 231 232 int 233 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 234 { 235 int flags; 236 237 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 238 return (EINVAL); 239 240 flags = 0; 241 if ((args->flags & LINUX_O_CLOEXEC) != 0) 242 flags |= O_CLOEXEC; 243 244 return (epoll_create_common(td, flags)); 245 } 246 247 /* Structure converting function from epoll to kevent. */ 248 static int 249 epoll_to_kevent(struct thread *td, struct file *epfp, 250 int fd, struct epoll_event *l_event, int *kev_flags, 251 struct kevent *kevent, int *nkevents) 252 { 253 uint32_t levents = l_event->events; 254 struct linux_pemuldata *pem; 255 struct proc *p; 256 257 /* flags related to how event is registered */ 258 if ((levents & LINUX_EPOLLONESHOT) != 0) 259 *kev_flags |= EV_ONESHOT; 260 if ((levents & LINUX_EPOLLET) != 0) 261 *kev_flags |= EV_CLEAR; 262 if ((levents & LINUX_EPOLLERR) != 0) 263 *kev_flags |= EV_ERROR; 264 if ((levents & LINUX_EPOLLRDHUP) != 0) 265 *kev_flags |= EV_EOF; 266 267 /* flags related to what event is registered */ 268 if ((levents & LINUX_EPOLL_EVRD) != 0) { 269 EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0); 270 ++(*nkevents); 271 } 272 if ((levents & LINUX_EPOLL_EVWR) != 0) { 273 EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); 274 ++(*nkevents); 275 } 276 277 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 278 p = td->td_proc; 279 280 pem = pem_find(p); 281 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 282 KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n")); 283 284 LINUX_PEM_XLOCK(pem); 285 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 286 pem->flags |= LINUX_XUNSUP_EPOLL; 287 LINUX_PEM_XUNLOCK(pem); 288 linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n", 289 levents); 290 } else 291 LINUX_PEM_XUNLOCK(pem); 292 return (EINVAL); 293 } 294 295 return (0); 296 } 297 298 /* 299 * Structure converting function from kevent to epoll. In a case 300 * this is called on error in registration we store the error in 301 * event->data and pick it up later in linux_epoll_ctl(). 302 */ 303 static void 304 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 305 { 306 307 if ((kevent->flags & EV_ERROR) != 0) { 308 l_event->events = LINUX_EPOLLERR; 309 return; 310 } 311 312 switch (kevent->filter) { 313 case EVFILT_READ: 314 l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI; 315 if ((kevent->flags & EV_EOF) != 0) 316 l_event->events |= LINUX_EPOLLRDHUP; 317 break; 318 case EVFILT_WRITE: 319 l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM; 320 break; 321 } 322 } 323 324 /* 325 * Copyout callback used by kevent. This converts kevent 326 * events to epoll events and copies them back to the 327 * userspace. This is also called on error on registering 328 * of the filter. 329 */ 330 static int 331 epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 332 { 333 struct epoll_copyout_args *args; 334 struct linux_pemuldata *pem; 335 struct epoll_emuldata *emd; 336 struct epoll_event *eep; 337 int error, fd, i; 338 339 args = (struct epoll_copyout_args*) arg; 340 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 341 342 pem = pem_find(args->p); 343 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 344 LINUX_PEM_SLOCK(pem); 345 emd = pem->epoll; 346 KASSERT(emd != NULL, ("epoll proc epolldata not found.\n")); 347 348 for (i = 0; i < count; i++) { 349 kevent_to_epoll(&kevp[i], &eep[i]); 350 351 fd = kevp[i].ident; 352 KASSERT(fd <= emd->fdc, ("epoll user data vector" 353 " is too small.\n")); 354 eep[i].data = emd->udata[fd]; 355 } 356 LINUX_PEM_SUNLOCK(pem); 357 358 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 359 if (error == 0) { 360 args->leventlist += count; 361 args->count += count; 362 } else if (args->error == 0) 363 args->error = error; 364 365 free(eep, M_EPOLL); 366 return (error); 367 } 368 369 /* 370 * Copyin callback used by kevent. This copies already 371 * converted filters from kernel memory to the kevent 372 * internal kernel memory. Hence the memcpy instead of 373 * copyin. 374 */ 375 static int 376 epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 377 { 378 struct epoll_copyin_args *args; 379 380 args = (struct epoll_copyin_args*) arg; 381 382 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 383 args->changelist += count; 384 385 return (0); 386 } 387 388 /* 389 * Load epoll filter, convert it to kevent filter 390 * and load it into kevent subsystem. 391 */ 392 int 393 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 394 { 395 struct file *epfp, *fp; 396 struct epoll_copyin_args ciargs; 397 struct kevent kev[2]; 398 struct kevent_copyops k_ops = { &ciargs, 399 NULL, 400 epoll_kev_copyin}; 401 struct epoll_event le; 402 cap_rights_t rights; 403 int kev_flags; 404 int nchanges = 0; 405 int error; 406 407 if (args->op != LINUX_EPOLL_CTL_DEL) { 408 error = copyin(args->event, &le, sizeof(le)); 409 if (error != 0) 410 return (error); 411 } 412 413 error = fget(td, args->epfd, 414 cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp); 415 if (error != 0) 416 return (error); 417 if (epfp->f_type != DTYPE_KQUEUE) 418 goto leave1; 419 420 /* Protect user data vector from incorrectly supplied fd. */ 421 error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp); 422 if (error != 0) 423 goto leave1; 424 425 /* Linux disallows spying on himself */ 426 if (epfp == fp) { 427 error = EINVAL; 428 goto leave0; 429 } 430 431 ciargs.changelist = kev; 432 433 switch (args->op) { 434 case LINUX_EPOLL_CTL_MOD: 435 /* 436 * We don't memorize which events were set for this FD 437 * on this level, so just delete all we could have set: 438 * EVFILT_READ and EVFILT_WRITE, ignoring any errors 439 */ 440 error = epoll_delete_all_events(td, epfp, args->fd); 441 if (error) 442 goto leave0; 443 /* FALLTHROUGH */ 444 445 case LINUX_EPOLL_CTL_ADD: 446 kev_flags = EV_ADD | EV_ENABLE; 447 break; 448 449 case LINUX_EPOLL_CTL_DEL: 450 /* CTL_DEL means unregister this fd with this epoll */ 451 error = epoll_delete_all_events(td, epfp, args->fd); 452 goto leave0; 453 454 default: 455 error = EINVAL; 456 goto leave0; 457 } 458 459 error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags, 460 kev, &nchanges); 461 if (error) 462 goto leave0; 463 464 epoll_fd_install(td, args->fd, le.data); 465 466 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 467 468 leave0: 469 fdrop(fp, td); 470 471 leave1: 472 fdrop(epfp, td); 473 return (error); 474 } 475 476 /* 477 * Wait for a filter to be triggered on the epoll file descriptor. 478 */ 479 static int 480 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 481 int maxevents, int timeout, sigset_t *uset) 482 { 483 struct file *epfp; 484 struct timespec ts, *tsp; 485 cap_rights_t rights; 486 struct epoll_copyout_args coargs; 487 struct kevent_copyops k_ops = { &coargs, 488 epoll_kev_copyout, 489 NULL}; 490 int error; 491 492 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 493 return (EINVAL); 494 495 if (uset != NULL) { 496 error = kern_sigprocmask(td, SIG_SETMASK, uset, 497 &td->td_oldsigmask, 0); 498 if (error != 0) 499 return (error); 500 td->td_pflags |= TDP_OLDMASK; 501 /* 502 * Make sure that ast() is called on return to 503 * usermode and TDP_OLDMASK is cleared, restoring old 504 * sigmask. 505 */ 506 thread_lock(td); 507 td->td_flags |= TDF_ASTPENDING; 508 thread_unlock(td); 509 } 510 511 error = fget(td, epfd, 512 cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp); 513 if (error != 0) 514 return (error); 515 516 coargs.leventlist = events; 517 coargs.p = td->td_proc; 518 coargs.count = 0; 519 coargs.error = 0; 520 521 if (timeout != -1) { 522 if (timeout < 0) { 523 error = EINVAL; 524 goto leave; 525 } 526 /* Convert from milliseconds to timespec. */ 527 ts.tv_sec = timeout / 1000; 528 ts.tv_nsec = (timeout % 1000) * 1000000; 529 tsp = &ts; 530 } else { 531 tsp = NULL; 532 } 533 534 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 535 if (error == 0 && coargs.error != 0) 536 error = coargs.error; 537 538 /* 539 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 540 * Maybe we should translate that but I don't think it matters at all. 541 */ 542 if (error == 0) 543 td->td_retval[0] = coargs.count; 544 leave: 545 fdrop(epfp, td); 546 return (error); 547 } 548 549 int 550 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 551 { 552 553 return (linux_epoll_wait_common(td, args->epfd, args->events, 554 args->maxevents, args->timeout, NULL)); 555 } 556 557 int 558 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 559 { 560 sigset_t mask, *pmask; 561 l_sigset_t lmask; 562 int error; 563 564 if (args->mask != NULL) { 565 error = copyin(args->mask, &lmask, sizeof(l_sigset_t)); 566 if (error != 0) 567 return (error); 568 linux_to_bsd_sigset(&lmask, &mask); 569 pmask = &mask; 570 } else 571 pmask = NULL; 572 return (linux_epoll_wait_common(td, args->epfd, args->events, 573 args->maxevents, args->timeout, pmask)); 574 } 575 576 static int 577 epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) 578 { 579 struct epoll_copyin_args ciargs; 580 struct kevent kev; 581 struct kevent_copyops k_ops = { &ciargs, 582 NULL, 583 epoll_kev_copyin}; 584 int error; 585 586 ciargs.changelist = &kev; 587 EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); 588 589 error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL); 590 591 /* 592 * here we ignore ENONT, because we don't keep track of events here 593 */ 594 if (error == ENOENT) 595 error = 0; 596 return (error); 597 } 598 599 static int 600 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 601 { 602 int error1, error2; 603 604 error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ); 605 error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE); 606 607 /* report any errors we got */ 608 return (error1 == 0 ? error2 : error1); 609 } 610 611 static int 612 eventfd_create(struct thread *td, uint32_t initval, int flags) 613 { 614 struct filedesc *fdp; 615 struct eventfd *efd; 616 struct file *fp; 617 int fflags, fd, error; 618 619 fflags = 0; 620 if ((flags & LINUX_O_CLOEXEC) != 0) 621 fflags |= O_CLOEXEC; 622 623 fdp = td->td_proc->p_fd; 624 error = falloc(td, &fp, &fd, fflags); 625 if (error) 626 return (error); 627 628 efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); 629 efd->efd_flags = flags; 630 efd->efd_count = initval; 631 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 632 633 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 634 635 fflags = FREAD | FWRITE; 636 if ((flags & LINUX_O_NONBLOCK) != 0) 637 fflags |= FNONBLOCK; 638 639 finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); 640 fdrop(fp, td); 641 642 td->td_retval[0] = fd; 643 return (error); 644 } 645 646 int 647 linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 648 { 649 650 return (eventfd_create(td, args->initval, 0)); 651 } 652 653 int 654 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 655 { 656 657 if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) 658 return (EINVAL); 659 660 return (eventfd_create(td, args->initval, args->flags)); 661 } 662 663 static int 664 eventfd_close(struct file *fp, struct thread *td) 665 { 666 struct eventfd *efd; 667 668 efd = fp->f_data; 669 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 670 return (EBADF); 671 672 seldrain(&efd->efd_sel); 673 knlist_destroy(&efd->efd_sel.si_note); 674 675 fp->f_ops = &badfileops; 676 mtx_destroy(&efd->efd_lock); 677 free(efd, M_EPOLL); 678 679 return (0); 680 } 681 682 static int 683 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 684 int flags, struct thread *td) 685 { 686 struct eventfd *efd; 687 eventfd_t count; 688 int error; 689 690 efd = fp->f_data; 691 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 692 return (EBADF); 693 694 if (uio->uio_resid < sizeof(eventfd_t)) 695 return (EINVAL); 696 697 error = 0; 698 mtx_lock(&efd->efd_lock); 699 retry: 700 if (efd->efd_count == 0) { 701 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 702 mtx_unlock(&efd->efd_lock); 703 return (EAGAIN); 704 } 705 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); 706 if (error == 0) 707 goto retry; 708 } 709 if (error == 0) { 710 if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { 711 count = 1; 712 --efd->efd_count; 713 } else { 714 count = efd->efd_count; 715 efd->efd_count = 0; 716 } 717 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 718 selwakeup(&efd->efd_sel); 719 wakeup(&efd->efd_count); 720 mtx_unlock(&efd->efd_lock); 721 error = uiomove(&count, sizeof(eventfd_t), uio); 722 } else 723 mtx_unlock(&efd->efd_lock); 724 725 return (error); 726 } 727 728 static int 729 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 730 int flags, struct thread *td) 731 { 732 struct eventfd *efd; 733 eventfd_t count; 734 int error; 735 736 efd = fp->f_data; 737 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 738 return (EBADF); 739 740 if (uio->uio_resid < sizeof(eventfd_t)) 741 return (EINVAL); 742 743 error = uiomove(&count, sizeof(eventfd_t), uio); 744 if (error) 745 return (error); 746 if (count == UINT64_MAX) 747 return (EINVAL); 748 749 mtx_lock(&efd->efd_lock); 750 retry: 751 if (UINT64_MAX - efd->efd_count <= count) { 752 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 753 mtx_unlock(&efd->efd_lock); 754 /* Do not not return the number of bytes written */ 755 uio->uio_resid += sizeof(eventfd_t); 756 return (EAGAIN); 757 } 758 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 759 PCATCH, "lefdwr", 0); 760 if (error == 0) 761 goto retry; 762 } 763 if (error == 0) { 764 efd->efd_count += count; 765 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 766 selwakeup(&efd->efd_sel); 767 wakeup(&efd->efd_count); 768 } 769 mtx_unlock(&efd->efd_lock); 770 771 return (error); 772 } 773 774 static int 775 eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 776 struct thread *td) 777 { 778 struct eventfd *efd; 779 int revents = 0; 780 781 efd = fp->f_data; 782 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 783 return (POLLERR); 784 785 mtx_lock(&efd->efd_lock); 786 if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) 787 revents |= events & (POLLIN|POLLRDNORM); 788 if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) 789 revents |= events & (POLLOUT|POLLWRNORM); 790 if (revents == 0) 791 selrecord(td, &efd->efd_sel); 792 mtx_unlock(&efd->efd_lock); 793 794 return (revents); 795 } 796 797 /*ARGSUSED*/ 798 static int 799 eventfd_kqfilter(struct file *fp, struct knote *kn) 800 { 801 struct eventfd *efd; 802 803 efd = fp->f_data; 804 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 805 return (EINVAL); 806 807 mtx_lock(&efd->efd_lock); 808 switch (kn->kn_filter) { 809 case EVFILT_READ: 810 kn->kn_fop = &eventfd_rfiltops; 811 break; 812 case EVFILT_WRITE: 813 kn->kn_fop = &eventfd_wfiltops; 814 break; 815 default: 816 mtx_unlock(&efd->efd_lock); 817 return (EINVAL); 818 } 819 820 kn->kn_hook = efd; 821 knlist_add(&efd->efd_sel.si_note, kn, 1); 822 mtx_unlock(&efd->efd_lock); 823 824 return (0); 825 } 826 827 static void 828 filt_eventfddetach(struct knote *kn) 829 { 830 struct eventfd *efd = kn->kn_hook; 831 832 mtx_lock(&efd->efd_lock); 833 knlist_remove(&efd->efd_sel.si_note, kn, 1); 834 mtx_unlock(&efd->efd_lock); 835 } 836 837 /*ARGSUSED*/ 838 static int 839 filt_eventfdread(struct knote *kn, long hint) 840 { 841 struct eventfd *efd = kn->kn_hook; 842 int ret; 843 844 mtx_assert(&efd->efd_lock, MA_OWNED); 845 ret = (efd->efd_count > 0); 846 847 return (ret); 848 } 849 850 /*ARGSUSED*/ 851 static int 852 filt_eventfdwrite(struct knote *kn, long hint) 853 { 854 struct eventfd *efd = kn->kn_hook; 855 int ret; 856 857 mtx_assert(&efd->efd_lock, MA_OWNED); 858 ret = (UINT64_MAX - 1 > efd->efd_count); 859 860 return (ret); 861 } 862 863 /*ARGSUSED*/ 864 static int 865 eventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred, 866 struct thread *td) 867 { 868 869 return (ENXIO); 870 } 871 872 /*ARGSUSED*/ 873 static int 874 eventfd_ioctl(struct file *fp, u_long cmd, void *data, 875 struct ucred *active_cred, struct thread *td) 876 { 877 struct eventfd *efd; 878 879 efd = fp->f_data; 880 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 881 return (EINVAL); 882 883 switch (cmd) 884 { 885 case FIONBIO: 886 if (*(int *)data) 887 efd->efd_flags |= LINUX_O_NONBLOCK; 888 else 889 efd->efd_flags &= ~LINUX_O_NONBLOCK; 890 case FIOASYNC: 891 return (0); 892 default: 893 return (ENXIO); 894 } 895 } 896 897 /*ARGSUSED*/ 898 static int 899 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 900 struct thread *td) 901 { 902 903 return (ENXIO); 904 } 905 906 /*ARGSUSED*/ 907 static int 908 eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 909 { 910 911 kif->kf_type = KF_TYPE_UNKNOWN; 912 return (0); 913 } 914