1 /*- 2 * Copyright (c) 2007 Roman Divacky 3 * Copyright (c) 2014 Dmitry Chagin 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/imgact.h> 36 #include <sys/kernel.h> 37 #include <sys/limits.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/capability.h> 41 #include <sys/types.h> 42 #include <sys/user.h> 43 #include <sys/file.h> 44 #include <sys/filedesc.h> 45 #include <sys/errno.h> 46 #include <sys/event.h> 47 #include <sys/poll.h> 48 #include <sys/proc.h> 49 #include <sys/selinfo.h> 50 #include <sys/sx.h> 51 #include <sys/syscallsubr.h> 52 #include <sys/timespec.h> 53 54 #ifdef COMPAT_LINUX32 55 #include <machine/../linux32/linux.h> 56 #include <machine/../linux32/linux32_proto.h> 57 #else 58 #include <machine/../linux/linux.h> 59 #include <machine/../linux/linux_proto.h> 60 #endif 61 62 #include <compat/linux/linux_emul.h> 63 #include <compat/linux/linux_event.h> 64 #include <compat/linux/linux_file.h> 65 #include <compat/linux/linux_util.h> 66 67 /* 68 * epoll defines 'struct epoll_event' with the field 'data' as 64 bits 69 * on all architectures. But on 32 bit architectures BSD 'struct kevent' only 70 * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied 71 * data verbatuim. Therefore we allocate 64-bit memory block to pass 72 * user supplied data for every file descriptor. 73 */ 74 75 typedef uint64_t epoll_udata_t; 76 77 struct epoll_emuldata { 78 uint32_t fdc; /* epoll udata max index */ 79 epoll_udata_t udata[1]; /* epoll user data vector */ 80 }; 81 82 #define EPOLL_DEF_SZ 16 83 #define EPOLL_SIZE(fdn) \ 84 (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t)) 85 86 struct epoll_event { 87 uint32_t events; 88 epoll_udata_t data; 89 } 90 #if defined(__amd64__) 91 __attribute__((packed)) 92 #endif 93 ; 94 95 #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 96 97 static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata); 98 static int epoll_to_kevent(struct thread *td, struct file *epfp, 99 int fd, struct epoll_event *l_event, int *kev_flags, 100 struct kevent *kevent, int *nkevents); 101 static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 102 static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 103 static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 104 static int epoll_delete_event(struct thread *td, struct file *epfp, 105 int fd, int filter); 106 static int epoll_delete_all_events(struct thread *td, struct file *epfp, 107 int fd); 108 109 struct epoll_copyin_args { 110 struct kevent *changelist; 111 }; 112 113 struct epoll_copyout_args { 114 struct epoll_event *leventlist; 115 struct proc *p; 116 uint32_t count; 117 int error; 118 }; 119 120 /* eventfd */ 121 typedef uint64_t eventfd_t; 122 123 static fo_rdwr_t eventfd_read; 124 static fo_rdwr_t eventfd_write; 125 static fo_truncate_t eventfd_truncate; 126 static fo_ioctl_t eventfd_ioctl; 127 static fo_poll_t eventfd_poll; 128 static fo_kqfilter_t eventfd_kqfilter; 129 static fo_stat_t eventfd_stat; 130 static fo_close_t eventfd_close; 131 static fo_fill_kinfo_t eventfd_fill_kinfo; 132 133 static struct fileops eventfdops = { 134 .fo_read = eventfd_read, 135 .fo_write = eventfd_write, 136 .fo_truncate = eventfd_truncate, 137 .fo_ioctl = eventfd_ioctl, 138 .fo_poll = eventfd_poll, 139 .fo_kqfilter = eventfd_kqfilter, 140 .fo_stat = eventfd_stat, 141 .fo_close = eventfd_close, 142 .fo_chmod = invfo_chmod, 143 .fo_chown = invfo_chown, 144 .fo_sendfile = invfo_sendfile, 145 .fo_fill_kinfo = eventfd_fill_kinfo, 146 .fo_flags = DFLAG_PASSABLE 147 }; 148 149 static void filt_eventfddetach(struct knote *kn); 150 static int filt_eventfdread(struct knote *kn, long hint); 151 static int filt_eventfdwrite(struct knote *kn, long hint); 152 153 static struct filterops eventfd_rfiltops = { 154 .f_isfd = 1, 155 .f_detach = filt_eventfddetach, 156 .f_event = filt_eventfdread 157 }; 158 static struct filterops eventfd_wfiltops = { 159 .f_isfd = 1, 160 .f_detach = filt_eventfddetach, 161 .f_event = filt_eventfdwrite 162 }; 163 164 struct eventfd { 165 eventfd_t efd_count; 166 uint32_t efd_flags; 167 struct selinfo efd_sel; 168 struct mtx efd_lock; 169 }; 170 171 static int eventfd_create(struct thread *td, uint32_t initval, int flags); 172 173 174 static void 175 epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata) 176 { 177 struct linux_pemuldata *pem; 178 struct epoll_emuldata *emd; 179 struct proc *p; 180 181 p = td->td_proc; 182 183 pem = pem_find(p); 184 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 185 186 LINUX_PEM_XLOCK(pem); 187 if (pem->epoll == NULL) { 188 emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 189 emd->fdc = fd; 190 pem->epoll = emd; 191 } else { 192 emd = pem->epoll; 193 if (fd > emd->fdc) { 194 emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 195 emd->fdc = fd; 196 pem->epoll = emd; 197 } 198 } 199 emd->udata[fd] = udata; 200 LINUX_PEM_XUNLOCK(pem); 201 } 202 203 static int 204 epoll_create_common(struct thread *td, int flags) 205 { 206 int error; 207 208 error = kern_kqueue(td, flags); 209 if (error) 210 return (error); 211 212 epoll_fd_install(td, EPOLL_DEF_SZ, 0); 213 214 return (0); 215 } 216 217 int 218 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 219 { 220 221 /* 222 * args->size is unused. Linux just tests it 223 * and then forgets it as well. 224 */ 225 if (args->size <= 0) 226 return (EINVAL); 227 228 return (epoll_create_common(td, 0)); 229 } 230 231 int 232 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 233 { 234 int flags; 235 236 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 237 return (EINVAL); 238 239 flags = 0; 240 if ((args->flags & LINUX_O_CLOEXEC) != 0) 241 flags |= O_CLOEXEC; 242 243 return (epoll_create_common(td, flags)); 244 } 245 246 /* Structure converting function from epoll to kevent. */ 247 static int 248 epoll_to_kevent(struct thread *td, struct file *epfp, 249 int fd, struct epoll_event *l_event, int *kev_flags, 250 struct kevent *kevent, int *nkevents) 251 { 252 uint32_t levents = l_event->events; 253 struct linux_pemuldata *pem; 254 struct proc *p; 255 256 /* flags related to how event is registered */ 257 if ((levents & LINUX_EPOLLONESHOT) != 0) 258 *kev_flags |= EV_ONESHOT; 259 if ((levents & LINUX_EPOLLET) != 0) 260 *kev_flags |= EV_CLEAR; 261 if ((levents & LINUX_EPOLLERR) != 0) 262 *kev_flags |= EV_ERROR; 263 264 /* flags related to what event is registered */ 265 if ((levents & LINUX_EPOLL_EVRD) != 0) { 266 EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0); 267 ++(*nkevents); 268 } 269 if ((levents & LINUX_EPOLL_EVWR) != 0) { 270 EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); 271 ++(*nkevents); 272 } 273 274 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 275 p = td->td_proc; 276 277 pem = pem_find(p); 278 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 279 KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n")); 280 281 LINUX_PEM_XLOCK(pem); 282 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 283 pem->flags |= LINUX_XUNSUP_EPOLL; 284 LINUX_PEM_XUNLOCK(pem); 285 linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n", 286 levents); 287 } else 288 LINUX_PEM_XUNLOCK(pem); 289 return (EINVAL); 290 } 291 292 return (0); 293 } 294 295 /* 296 * Structure converting function from kevent to epoll. In a case 297 * this is called on error in registration we store the error in 298 * event->data and pick it up later in linux_epoll_ctl(). 299 */ 300 static void 301 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 302 { 303 304 if ((kevent->flags & EV_ERROR) != 0) { 305 l_event->events = LINUX_EPOLLERR; 306 return; 307 } 308 309 switch (kevent->filter) { 310 case EVFILT_READ: 311 l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI; 312 break; 313 case EVFILT_WRITE: 314 l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM; 315 break; 316 } 317 } 318 319 /* 320 * Copyout callback used by kevent. This converts kevent 321 * events to epoll events and copies them back to the 322 * userspace. This is also called on error on registering 323 * of the filter. 324 */ 325 static int 326 epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 327 { 328 struct epoll_copyout_args *args; 329 struct linux_pemuldata *pem; 330 struct epoll_emuldata *emd; 331 struct epoll_event *eep; 332 int error, fd, i; 333 334 args = (struct epoll_copyout_args*) arg; 335 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 336 337 pem = pem_find(args->p); 338 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 339 LINUX_PEM_SLOCK(pem); 340 emd = pem->epoll; 341 KASSERT(emd != NULL, ("epoll proc epolldata not found.\n")); 342 343 for (i = 0; i < count; i++) { 344 kevent_to_epoll(&kevp[i], &eep[i]); 345 346 fd = kevp[i].ident; 347 KASSERT(fd <= emd->fdc, ("epoll user data vector" 348 " is too small.\n")); 349 eep[i].data = emd->udata[fd]; 350 } 351 LINUX_PEM_SUNLOCK(pem); 352 353 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 354 if (error == 0) { 355 args->leventlist += count; 356 args->count += count; 357 } else if (args->error == 0) 358 args->error = error; 359 360 free(eep, M_EPOLL); 361 return (error); 362 } 363 364 /* 365 * Copyin callback used by kevent. This copies already 366 * converted filters from kernel memory to the kevent 367 * internal kernel memory. Hence the memcpy instead of 368 * copyin. 369 */ 370 static int 371 epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 372 { 373 struct epoll_copyin_args *args; 374 375 args = (struct epoll_copyin_args*) arg; 376 377 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 378 args->changelist += count; 379 380 return (0); 381 } 382 383 /* 384 * Load epoll filter, convert it to kevent filter 385 * and load it into kevent subsystem. 386 */ 387 int 388 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 389 { 390 struct file *epfp, *fp; 391 struct epoll_copyin_args ciargs; 392 struct kevent kev[2]; 393 struct kevent_copyops k_ops = { &ciargs, 394 NULL, 395 epoll_kev_copyin}; 396 struct epoll_event le; 397 cap_rights_t rights; 398 int kev_flags; 399 int nchanges = 0; 400 int error; 401 402 if (args->op != LINUX_EPOLL_CTL_DEL) { 403 error = copyin(args->event, &le, sizeof(le)); 404 if (error != 0) 405 return (error); 406 } 407 408 error = fget(td, args->epfd, 409 cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp); 410 if (error != 0) 411 return (error); 412 if (epfp->f_type != DTYPE_KQUEUE) 413 goto leave1; 414 415 /* Protect user data vector from incorrectly supplied fd. */ 416 error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp); 417 if (error != 0) 418 goto leave1; 419 420 /* Linux disallows spying on himself */ 421 if (epfp == fp) { 422 error = EINVAL; 423 goto leave0; 424 } 425 426 ciargs.changelist = kev; 427 428 switch (args->op) { 429 case LINUX_EPOLL_CTL_MOD: 430 /* 431 * We don't memorize which events were set for this FD 432 * on this level, so just delete all we could have set: 433 * EVFILT_READ and EVFILT_WRITE, ignoring any errors 434 */ 435 error = epoll_delete_all_events(td, epfp, args->fd); 436 if (error) 437 goto leave0; 438 /* FALLTHROUGH */ 439 440 case LINUX_EPOLL_CTL_ADD: 441 kev_flags = EV_ADD | EV_ENABLE; 442 break; 443 444 case LINUX_EPOLL_CTL_DEL: 445 /* CTL_DEL means unregister this fd with this epoll */ 446 error = epoll_delete_all_events(td, epfp, args->fd); 447 goto leave0; 448 449 default: 450 error = EINVAL; 451 goto leave0; 452 } 453 454 error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags, 455 kev, &nchanges); 456 if (error) 457 goto leave0; 458 459 epoll_fd_install(td, args->fd, le.data); 460 461 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 462 463 leave0: 464 fdrop(fp, td); 465 466 leave1: 467 fdrop(epfp, td); 468 return (error); 469 } 470 471 /* 472 * Wait for a filter to be triggered on the epoll file descriptor. 473 */ 474 int 475 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 476 { 477 struct file *epfp; 478 struct timespec ts, *tsp; 479 cap_rights_t rights; 480 struct epoll_copyout_args coargs; 481 struct kevent_copyops k_ops = { &coargs, 482 epoll_kev_copyout, 483 NULL}; 484 int error; 485 486 if (args->maxevents <= 0 || args->maxevents > LINUX_MAX_EVENTS) 487 return (EINVAL); 488 489 error = fget(td, args->epfd, 490 cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp); 491 if (error != 0) 492 return (error); 493 494 coargs.leventlist = args->events; 495 coargs.p = td->td_proc; 496 coargs.count = 0; 497 coargs.error = 0; 498 499 if (args->timeout != -1) { 500 if (args->timeout < 0) { 501 error = EINVAL; 502 goto leave; 503 } 504 /* Convert from milliseconds to timespec. */ 505 ts.tv_sec = args->timeout / 1000; 506 ts.tv_nsec = (args->timeout % 1000) * 1000000; 507 tsp = &ts; 508 } else { 509 tsp = NULL; 510 } 511 512 error = kern_kevent_fp(td, epfp, 0, args->maxevents, &k_ops, tsp); 513 if (error == 0 && coargs.error != 0) 514 error = coargs.error; 515 516 /* 517 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 518 * Maybe we should translate that but I don't think it matters at all. 519 */ 520 if (error == 0) 521 td->td_retval[0] = coargs.count; 522 leave: 523 fdrop(epfp, td); 524 return (error); 525 } 526 527 static int 528 epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) 529 { 530 struct epoll_copyin_args ciargs; 531 struct kevent kev; 532 struct kevent_copyops k_ops = { &ciargs, 533 NULL, 534 epoll_kev_copyin}; 535 int error; 536 537 ciargs.changelist = &kev; 538 EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); 539 540 error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL); 541 542 /* 543 * here we ignore ENONT, because we don't keep track of events here 544 */ 545 if (error == ENOENT) 546 error = 0; 547 return (error); 548 } 549 550 static int 551 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 552 { 553 int error1, error2; 554 555 error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ); 556 error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE); 557 558 /* report any errors we got */ 559 return (error1 == 0 ? error2 : error1); 560 } 561 562 static int 563 eventfd_create(struct thread *td, uint32_t initval, int flags) 564 { 565 struct filedesc *fdp; 566 struct eventfd *efd; 567 struct file *fp; 568 int fflags, fd, error; 569 570 fflags = 0; 571 if ((flags & LINUX_O_CLOEXEC) != 0) 572 fflags |= O_CLOEXEC; 573 574 fdp = td->td_proc->p_fd; 575 error = falloc(td, &fp, &fd, fflags); 576 if (error) 577 return (error); 578 579 efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); 580 efd->efd_flags = flags; 581 efd->efd_count = initval; 582 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 583 584 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 585 586 fflags = FREAD | FWRITE; 587 if ((flags & LINUX_O_NONBLOCK) != 0) 588 fflags |= FNONBLOCK; 589 590 finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); 591 fdrop(fp, td); 592 593 td->td_retval[0] = fd; 594 return (error); 595 } 596 597 int 598 linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 599 { 600 601 return (eventfd_create(td, args->initval, 0)); 602 } 603 604 int 605 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 606 { 607 608 if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) 609 return (EINVAL); 610 611 return (eventfd_create(td, args->initval, args->flags)); 612 } 613 614 static int 615 eventfd_close(struct file *fp, struct thread *td) 616 { 617 struct eventfd *efd; 618 619 efd = fp->f_data; 620 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 621 return (EBADF); 622 623 seldrain(&efd->efd_sel); 624 knlist_destroy(&efd->efd_sel.si_note); 625 626 fp->f_ops = &badfileops; 627 mtx_destroy(&efd->efd_lock); 628 free(efd, M_EPOLL); 629 630 return (0); 631 } 632 633 static int 634 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 635 int flags, struct thread *td) 636 { 637 struct eventfd *efd; 638 eventfd_t count; 639 int error; 640 641 efd = fp->f_data; 642 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 643 return (EBADF); 644 645 if (uio->uio_resid < sizeof(eventfd_t)) 646 return (EINVAL); 647 648 error = 0; 649 mtx_lock(&efd->efd_lock); 650 retry: 651 if (efd->efd_count == 0) { 652 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 653 mtx_unlock(&efd->efd_lock); 654 return (EAGAIN); 655 } 656 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); 657 if (error == 0) 658 goto retry; 659 } 660 if (error == 0) { 661 if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { 662 count = 1; 663 --efd->efd_count; 664 } else { 665 count = efd->efd_count; 666 efd->efd_count = 0; 667 } 668 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 669 selwakeup(&efd->efd_sel); 670 wakeup(&efd->efd_count); 671 mtx_unlock(&efd->efd_lock); 672 error = uiomove(&count, sizeof(eventfd_t), uio); 673 } else 674 mtx_unlock(&efd->efd_lock); 675 676 return (error); 677 } 678 679 static int 680 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 681 int flags, struct thread *td) 682 { 683 struct eventfd *efd; 684 eventfd_t count; 685 int error; 686 687 efd = fp->f_data; 688 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 689 return (EBADF); 690 691 if (uio->uio_resid < sizeof(eventfd_t)) 692 return (EINVAL); 693 694 error = uiomove(&count, sizeof(eventfd_t), uio); 695 if (error) 696 return (error); 697 if (count == UINT64_MAX) 698 return (EINVAL); 699 700 mtx_lock(&efd->efd_lock); 701 retry: 702 if (UINT64_MAX - efd->efd_count <= count) { 703 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 704 mtx_unlock(&efd->efd_lock); 705 return (EAGAIN); 706 } 707 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 708 PCATCH, "lefdwr", 0); 709 if (error == 0) 710 goto retry; 711 } 712 if (error == 0) { 713 efd->efd_count += count; 714 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 715 selwakeup(&efd->efd_sel); 716 wakeup(&efd->efd_count); 717 } 718 mtx_unlock(&efd->efd_lock); 719 720 return (error); 721 } 722 723 static int 724 eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 725 struct thread *td) 726 { 727 struct eventfd *efd; 728 int revents = 0; 729 730 efd = fp->f_data; 731 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 732 return (POLLERR); 733 734 mtx_lock(&efd->efd_lock); 735 if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) 736 revents |= events & (POLLIN|POLLRDNORM); 737 if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) 738 revents |= events & (POLLOUT|POLLWRNORM); 739 if (revents == 0) 740 selrecord(td, &efd->efd_sel); 741 mtx_unlock(&efd->efd_lock); 742 743 return (revents); 744 } 745 746 /*ARGSUSED*/ 747 static int 748 eventfd_kqfilter(struct file *fp, struct knote *kn) 749 { 750 struct eventfd *efd; 751 752 efd = fp->f_data; 753 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 754 return (EINVAL); 755 756 mtx_lock(&efd->efd_lock); 757 switch (kn->kn_filter) { 758 case EVFILT_READ: 759 kn->kn_fop = &eventfd_rfiltops; 760 break; 761 case EVFILT_WRITE: 762 kn->kn_fop = &eventfd_wfiltops; 763 break; 764 default: 765 mtx_unlock(&efd->efd_lock); 766 return (EINVAL); 767 } 768 769 kn->kn_hook = efd; 770 knlist_add(&efd->efd_sel.si_note, kn, 1); 771 mtx_unlock(&efd->efd_lock); 772 773 return (0); 774 } 775 776 static void 777 filt_eventfddetach(struct knote *kn) 778 { 779 struct eventfd *efd = kn->kn_hook; 780 781 mtx_lock(&efd->efd_lock); 782 knlist_remove(&efd->efd_sel.si_note, kn, 1); 783 mtx_unlock(&efd->efd_lock); 784 } 785 786 /*ARGSUSED*/ 787 static int 788 filt_eventfdread(struct knote *kn, long hint) 789 { 790 struct eventfd *efd = kn->kn_hook; 791 int ret; 792 793 mtx_assert(&efd->efd_lock, MA_OWNED); 794 ret = (efd->efd_count > 0); 795 796 return (ret); 797 } 798 799 /*ARGSUSED*/ 800 static int 801 filt_eventfdwrite(struct knote *kn, long hint) 802 { 803 struct eventfd *efd = kn->kn_hook; 804 int ret; 805 806 mtx_assert(&efd->efd_lock, MA_OWNED); 807 ret = (UINT64_MAX - 1 > efd->efd_count); 808 809 return (ret); 810 } 811 812 /*ARGSUSED*/ 813 static int 814 eventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred, 815 struct thread *td) 816 { 817 818 return (ENXIO); 819 } 820 821 /*ARGSUSED*/ 822 static int 823 eventfd_ioctl(struct file *fp, u_long cmd, void *data, 824 struct ucred *active_cred, struct thread *td) 825 { 826 827 return (ENXIO); 828 } 829 830 /*ARGSUSED*/ 831 static int 832 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 833 struct thread *td) 834 { 835 836 return (ENXIO); 837 } 838 839 /*ARGSUSED*/ 840 static int 841 eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 842 { 843 844 kif->kf_type = KF_TYPE_UNKNOWN; 845 return (0); 846 } 847