1 /*- 2 * Copyright (c) 2007 Roman Divacky 3 * Copyright (c) 2014 Dmitry Chagin 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/imgact.h> 36 #include <sys/kernel.h> 37 #include <sys/limits.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/capability.h> 41 #include <sys/types.h> 42 #include <sys/user.h> 43 #include <sys/file.h> 44 #include <sys/filedesc.h> 45 #include <sys/errno.h> 46 #include <sys/event.h> 47 #include <sys/poll.h> 48 #include <sys/proc.h> 49 #include <sys/selinfo.h> 50 #include <sys/sx.h> 51 #include <sys/syscallsubr.h> 52 #include <sys/timespec.h> 53 54 #ifdef COMPAT_LINUX32 55 #include <machine/../linux32/linux.h> 56 #include <machine/../linux32/linux32_proto.h> 57 #else 58 #include <machine/../linux/linux.h> 59 #include <machine/../linux/linux_proto.h> 60 #endif 61 62 #include <compat/linux/linux_emul.h> 63 #include <compat/linux/linux_event.h> 64 #include <compat/linux/linux_file.h> 65 #include <compat/linux/linux_util.h> 66 67 /* 68 * epoll defines 'struct epoll_event' with the field 'data' as 64 bits 69 * on all architectures. But on 32 bit architectures BSD 'struct kevent' only 70 * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied 71 * data verbatuim. Therefore we allocate 64-bit memory block to pass 72 * user supplied data for every file descriptor. 73 */ 74 75 typedef uint64_t epoll_udata_t; 76 77 struct epoll_emuldata { 78 uint32_t fdc; /* epoll udata max index */ 79 epoll_udata_t udata[1]; /* epoll user data vector */ 80 }; 81 82 #define EPOLL_DEF_SZ 16 83 #define EPOLL_SIZE(fdn) \ 84 (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t)) 85 86 struct epoll_event { 87 uint32_t events; 88 epoll_udata_t data; 89 } 90 #if defined(__amd64__) 91 __attribute__((packed)) 92 #endif 93 ; 94 95 #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 96 97 static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata); 98 static int epoll_to_kevent(struct thread *td, struct file *epfp, 99 int fd, struct epoll_event *l_event, int *kev_flags, 100 struct kevent *kevent, int *nkevents); 101 static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 102 static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 103 static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 104 static int epoll_delete_event(struct thread *td, struct file *epfp, 105 int fd, int filter); 106 static int epoll_delete_all_events(struct thread *td, struct file *epfp, 107 int fd); 108 109 struct epoll_copyin_args { 110 struct kevent *changelist; 111 }; 112 113 struct epoll_copyout_args { 114 struct epoll_event *leventlist; 115 struct proc *p; 116 uint32_t count; 117 int error; 118 }; 119 120 /* eventfd */ 121 typedef uint64_t eventfd_t; 122 123 static fo_rdwr_t eventfd_read; 124 static fo_rdwr_t eventfd_write; 125 static fo_truncate_t eventfd_truncate; 126 static fo_ioctl_t eventfd_ioctl; 127 static fo_poll_t eventfd_poll; 128 static fo_kqfilter_t eventfd_kqfilter; 129 static fo_stat_t eventfd_stat; 130 static fo_close_t eventfd_close; 131 static fo_fill_kinfo_t eventfd_fill_kinfo; 132 133 static struct fileops eventfdops = { 134 .fo_read = eventfd_read, 135 .fo_write = eventfd_write, 136 .fo_truncate = eventfd_truncate, 137 .fo_ioctl = eventfd_ioctl, 138 .fo_poll = eventfd_poll, 139 .fo_kqfilter = eventfd_kqfilter, 140 .fo_stat = eventfd_stat, 141 .fo_close = eventfd_close, 142 .fo_chmod = invfo_chmod, 143 .fo_chown = invfo_chown, 144 .fo_sendfile = invfo_sendfile, 145 .fo_fill_kinfo = eventfd_fill_kinfo, 146 .fo_flags = DFLAG_PASSABLE 147 }; 148 149 static void filt_eventfddetach(struct knote *kn); 150 static int filt_eventfdread(struct knote *kn, long hint); 151 static int filt_eventfdwrite(struct knote *kn, long hint); 152 153 static struct filterops eventfd_rfiltops = { 154 .f_isfd = 1, 155 .f_detach = filt_eventfddetach, 156 .f_event = filt_eventfdread 157 }; 158 static struct filterops eventfd_wfiltops = { 159 .f_isfd = 1, 160 .f_detach = filt_eventfddetach, 161 .f_event = filt_eventfdwrite 162 }; 163 164 struct eventfd { 165 eventfd_t efd_count; 166 uint32_t efd_flags; 167 struct selinfo efd_sel; 168 struct mtx efd_lock; 169 }; 170 171 static int eventfd_create(struct thread *td, uint32_t initval, int flags); 172 173 174 static void 175 epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata) 176 { 177 struct linux_pemuldata *pem; 178 struct epoll_emuldata *emd; 179 struct proc *p; 180 181 p = td->td_proc; 182 183 pem = pem_find(p); 184 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 185 186 LINUX_PEM_XLOCK(pem); 187 if (pem->epoll == NULL) { 188 emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 189 emd->fdc = fd; 190 pem->epoll = emd; 191 } else { 192 emd = pem->epoll; 193 if (fd > emd->fdc) { 194 emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 195 emd->fdc = fd; 196 pem->epoll = emd; 197 } 198 } 199 emd->udata[fd] = udata; 200 LINUX_PEM_XUNLOCK(pem); 201 } 202 203 static int 204 epoll_create_common(struct thread *td, int flags) 205 { 206 int error; 207 208 error = kern_kqueue(td, flags); 209 if (error) 210 return (error); 211 212 epoll_fd_install(td, EPOLL_DEF_SZ, 0); 213 214 return (0); 215 } 216 217 int 218 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 219 { 220 221 /* 222 * args->size is unused. Linux just tests it 223 * and then forgets it as well. 224 */ 225 if (args->size <= 0) 226 return (EINVAL); 227 228 return (epoll_create_common(td, 0)); 229 } 230 231 int 232 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 233 { 234 int flags; 235 236 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 237 return (EINVAL); 238 239 flags = 0; 240 if ((args->flags & LINUX_O_CLOEXEC) != 0) 241 flags |= O_CLOEXEC; 242 243 return (epoll_create_common(td, flags)); 244 } 245 246 /* Structure converting function from epoll to kevent. */ 247 static int 248 epoll_to_kevent(struct thread *td, struct file *epfp, 249 int fd, struct epoll_event *l_event, int *kev_flags, 250 struct kevent *kevent, int *nkevents) 251 { 252 uint32_t levents = l_event->events; 253 struct linux_pemuldata *pem; 254 struct proc *p; 255 256 /* flags related to how event is registered */ 257 if ((levents & LINUX_EPOLLONESHOT) != 0) 258 *kev_flags |= EV_ONESHOT; 259 if ((levents & LINUX_EPOLLET) != 0) 260 *kev_flags |= EV_CLEAR; 261 if ((levents & LINUX_EPOLLERR) != 0) 262 *kev_flags |= EV_ERROR; 263 264 /* flags related to what event is registered */ 265 if ((levents & LINUX_EPOLL_EVRD) != 0) { 266 EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0); 267 ++(*nkevents); 268 } 269 if ((levents & LINUX_EPOLL_EVWR) != 0) { 270 EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); 271 ++(*nkevents); 272 } 273 274 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 275 p = td->td_proc; 276 277 pem = pem_find(p); 278 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 279 KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n")); 280 281 LINUX_PEM_XLOCK(pem); 282 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 283 pem->flags |= LINUX_XUNSUP_EPOLL; 284 LINUX_PEM_XUNLOCK(pem); 285 linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n", 286 levents); 287 } else 288 LINUX_PEM_XUNLOCK(pem); 289 return (EINVAL); 290 } 291 292 return (0); 293 } 294 295 /* 296 * Structure converting function from kevent to epoll. In a case 297 * this is called on error in registration we store the error in 298 * event->data and pick it up later in linux_epoll_ctl(). 299 */ 300 static void 301 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 302 { 303 304 if ((kevent->flags & EV_ERROR) != 0) { 305 l_event->events = LINUX_EPOLLERR; 306 return; 307 } 308 309 switch (kevent->filter) { 310 case EVFILT_READ: 311 l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI; 312 break; 313 case EVFILT_WRITE: 314 l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM; 315 break; 316 } 317 } 318 319 /* 320 * Copyout callback used by kevent. This converts kevent 321 * events to epoll events and copies them back to the 322 * userspace. This is also called on error on registering 323 * of the filter. 324 */ 325 static int 326 epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 327 { 328 struct epoll_copyout_args *args; 329 struct linux_pemuldata *pem; 330 struct epoll_emuldata *emd; 331 struct epoll_event *eep; 332 int error, fd, i; 333 334 args = (struct epoll_copyout_args*) arg; 335 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 336 337 pem = pem_find(args->p); 338 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 339 LINUX_PEM_SLOCK(pem); 340 emd = pem->epoll; 341 KASSERT(emd != NULL, ("epoll proc epolldata not found.\n")); 342 343 for (i = 0; i < count; i++) { 344 kevent_to_epoll(&kevp[i], &eep[i]); 345 346 fd = kevp[i].ident; 347 KASSERT(fd <= emd->fdc, ("epoll user data vector" 348 " is too small.\n")); 349 eep[i].data = emd->udata[fd]; 350 } 351 LINUX_PEM_SUNLOCK(pem); 352 353 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 354 if (error == 0) { 355 args->leventlist += count; 356 args->count += count; 357 } else if (args->error == 0) 358 args->error = error; 359 360 free(eep, M_EPOLL); 361 return (error); 362 } 363 364 /* 365 * Copyin callback used by kevent. This copies already 366 * converted filters from kernel memory to the kevent 367 * internal kernel memory. Hence the memcpy instead of 368 * copyin. 369 */ 370 static int 371 epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 372 { 373 struct epoll_copyin_args *args; 374 375 args = (struct epoll_copyin_args*) arg; 376 377 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 378 args->changelist += count; 379 380 return (0); 381 } 382 383 /* 384 * Load epoll filter, convert it to kevent filter 385 * and load it into kevent subsystem. 386 */ 387 int 388 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 389 { 390 struct file *epfp, *fp; 391 struct epoll_copyin_args ciargs; 392 struct kevent kev[2]; 393 struct kevent_copyops k_ops = { &ciargs, 394 NULL, 395 epoll_kev_copyin}; 396 struct epoll_event le; 397 cap_rights_t rights; 398 int kev_flags; 399 int nchanges = 0; 400 int error; 401 402 if (args->op != LINUX_EPOLL_CTL_DEL) { 403 error = copyin(args->event, &le, sizeof(le)); 404 if (error != 0) 405 return (error); 406 } 407 408 error = fget(td, args->epfd, 409 cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp); 410 if (error != 0) 411 return (error); 412 if (epfp->f_type != DTYPE_KQUEUE) 413 goto leave1; 414 415 /* Protect user data vector from incorrectly supplied fd. */ 416 error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp); 417 if (error != 0) 418 goto leave1; 419 420 /* Linux disallows spying on himself */ 421 if (epfp == fp) { 422 error = EINVAL; 423 goto leave0; 424 } 425 426 ciargs.changelist = kev; 427 428 switch (args->op) { 429 case LINUX_EPOLL_CTL_MOD: 430 /* 431 * We don't memorize which events were set for this FD 432 * on this level, so just delete all we could have set: 433 * EVFILT_READ and EVFILT_WRITE, ignoring any errors 434 */ 435 error = epoll_delete_all_events(td, epfp, args->fd); 436 if (error) 437 goto leave0; 438 /* FALLTHROUGH */ 439 440 case LINUX_EPOLL_CTL_ADD: 441 kev_flags = EV_ADD | EV_ENABLE; 442 break; 443 444 case LINUX_EPOLL_CTL_DEL: 445 /* CTL_DEL means unregister this fd with this epoll */ 446 error = epoll_delete_all_events(td, epfp, args->fd); 447 goto leave0; 448 449 default: 450 error = EINVAL; 451 goto leave0; 452 } 453 454 error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags, 455 kev, &nchanges); 456 if (error) 457 goto leave0; 458 459 epoll_fd_install(td, args->fd, le.data); 460 461 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 462 463 leave0: 464 fdrop(fp, td); 465 466 leave1: 467 fdrop(epfp, td); 468 return (error); 469 } 470 471 /* 472 * Wait for a filter to be triggered on the epoll file descriptor. 473 */ 474 static int 475 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 476 int maxevents, int timeout, sigset_t *uset) 477 { 478 struct file *epfp; 479 struct timespec ts, *tsp; 480 cap_rights_t rights; 481 struct epoll_copyout_args coargs; 482 struct kevent_copyops k_ops = { &coargs, 483 epoll_kev_copyout, 484 NULL}; 485 int error; 486 487 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 488 return (EINVAL); 489 490 if (uset != NULL) { 491 error = kern_sigprocmask(td, SIG_SETMASK, uset, 492 &td->td_oldsigmask, 0); 493 if (error != 0) 494 return (error); 495 td->td_pflags |= TDP_OLDMASK; 496 /* 497 * Make sure that ast() is called on return to 498 * usermode and TDP_OLDMASK is cleared, restoring old 499 * sigmask. 500 */ 501 thread_lock(td); 502 td->td_flags |= TDF_ASTPENDING; 503 thread_unlock(td); 504 } 505 506 error = fget(td, epfd, 507 cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp); 508 if (error != 0) 509 return (error); 510 511 coargs.leventlist = events; 512 coargs.p = td->td_proc; 513 coargs.count = 0; 514 coargs.error = 0; 515 516 if (timeout != -1) { 517 if (timeout < 0) { 518 error = EINVAL; 519 goto leave; 520 } 521 /* Convert from milliseconds to timespec. */ 522 ts.tv_sec = timeout / 1000; 523 ts.tv_nsec = (timeout % 1000) * 1000000; 524 tsp = &ts; 525 } else { 526 tsp = NULL; 527 } 528 529 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 530 if (error == 0 && coargs.error != 0) 531 error = coargs.error; 532 533 /* 534 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 535 * Maybe we should translate that but I don't think it matters at all. 536 */ 537 if (error == 0) 538 td->td_retval[0] = coargs.count; 539 leave: 540 fdrop(epfp, td); 541 return (error); 542 } 543 544 int 545 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 546 { 547 548 return (linux_epoll_wait_common(td, args->epfd, args->events, 549 args->maxevents, args->timeout, NULL)); 550 } 551 552 int 553 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 554 { 555 sigset_t mask, *pmask; 556 l_sigset_t lmask; 557 int error; 558 559 if (args->mask != NULL) { 560 error = copyin(args->mask, &lmask, sizeof(l_sigset_t)); 561 if (error != 0) 562 return (error); 563 linux_to_bsd_sigset(&lmask, &mask); 564 pmask = &mask; 565 } else 566 pmask = NULL; 567 return (linux_epoll_wait_common(td, args->epfd, args->events, 568 args->maxevents, args->timeout, pmask)); 569 } 570 571 static int 572 epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) 573 { 574 struct epoll_copyin_args ciargs; 575 struct kevent kev; 576 struct kevent_copyops k_ops = { &ciargs, 577 NULL, 578 epoll_kev_copyin}; 579 int error; 580 581 ciargs.changelist = &kev; 582 EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); 583 584 error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL); 585 586 /* 587 * here we ignore ENONT, because we don't keep track of events here 588 */ 589 if (error == ENOENT) 590 error = 0; 591 return (error); 592 } 593 594 static int 595 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 596 { 597 int error1, error2; 598 599 error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ); 600 error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE); 601 602 /* report any errors we got */ 603 return (error1 == 0 ? error2 : error1); 604 } 605 606 static int 607 eventfd_create(struct thread *td, uint32_t initval, int flags) 608 { 609 struct filedesc *fdp; 610 struct eventfd *efd; 611 struct file *fp; 612 int fflags, fd, error; 613 614 fflags = 0; 615 if ((flags & LINUX_O_CLOEXEC) != 0) 616 fflags |= O_CLOEXEC; 617 618 fdp = td->td_proc->p_fd; 619 error = falloc(td, &fp, &fd, fflags); 620 if (error) 621 return (error); 622 623 efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); 624 efd->efd_flags = flags; 625 efd->efd_count = initval; 626 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 627 628 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 629 630 fflags = FREAD | FWRITE; 631 if ((flags & LINUX_O_NONBLOCK) != 0) 632 fflags |= FNONBLOCK; 633 634 finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); 635 fdrop(fp, td); 636 637 td->td_retval[0] = fd; 638 return (error); 639 } 640 641 int 642 linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 643 { 644 645 return (eventfd_create(td, args->initval, 0)); 646 } 647 648 int 649 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 650 { 651 652 if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) 653 return (EINVAL); 654 655 return (eventfd_create(td, args->initval, args->flags)); 656 } 657 658 static int 659 eventfd_close(struct file *fp, struct thread *td) 660 { 661 struct eventfd *efd; 662 663 efd = fp->f_data; 664 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 665 return (EBADF); 666 667 seldrain(&efd->efd_sel); 668 knlist_destroy(&efd->efd_sel.si_note); 669 670 fp->f_ops = &badfileops; 671 mtx_destroy(&efd->efd_lock); 672 free(efd, M_EPOLL); 673 674 return (0); 675 } 676 677 static int 678 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 679 int flags, struct thread *td) 680 { 681 struct eventfd *efd; 682 eventfd_t count; 683 int error; 684 685 efd = fp->f_data; 686 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 687 return (EBADF); 688 689 if (uio->uio_resid < sizeof(eventfd_t)) 690 return (EINVAL); 691 692 error = 0; 693 mtx_lock(&efd->efd_lock); 694 retry: 695 if (efd->efd_count == 0) { 696 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 697 mtx_unlock(&efd->efd_lock); 698 return (EAGAIN); 699 } 700 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); 701 if (error == 0) 702 goto retry; 703 } 704 if (error == 0) { 705 if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { 706 count = 1; 707 --efd->efd_count; 708 } else { 709 count = efd->efd_count; 710 efd->efd_count = 0; 711 } 712 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 713 selwakeup(&efd->efd_sel); 714 wakeup(&efd->efd_count); 715 mtx_unlock(&efd->efd_lock); 716 error = uiomove(&count, sizeof(eventfd_t), uio); 717 } else 718 mtx_unlock(&efd->efd_lock); 719 720 return (error); 721 } 722 723 static int 724 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 725 int flags, struct thread *td) 726 { 727 struct eventfd *efd; 728 eventfd_t count; 729 int error; 730 731 efd = fp->f_data; 732 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 733 return (EBADF); 734 735 if (uio->uio_resid < sizeof(eventfd_t)) 736 return (EINVAL); 737 738 error = uiomove(&count, sizeof(eventfd_t), uio); 739 if (error) 740 return (error); 741 if (count == UINT64_MAX) 742 return (EINVAL); 743 744 mtx_lock(&efd->efd_lock); 745 retry: 746 if (UINT64_MAX - efd->efd_count <= count) { 747 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 748 mtx_unlock(&efd->efd_lock); 749 return (EAGAIN); 750 } 751 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 752 PCATCH, "lefdwr", 0); 753 if (error == 0) 754 goto retry; 755 } 756 if (error == 0) { 757 efd->efd_count += count; 758 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 759 selwakeup(&efd->efd_sel); 760 wakeup(&efd->efd_count); 761 } 762 mtx_unlock(&efd->efd_lock); 763 764 return (error); 765 } 766 767 static int 768 eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 769 struct thread *td) 770 { 771 struct eventfd *efd; 772 int revents = 0; 773 774 efd = fp->f_data; 775 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 776 return (POLLERR); 777 778 mtx_lock(&efd->efd_lock); 779 if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) 780 revents |= events & (POLLIN|POLLRDNORM); 781 if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) 782 revents |= events & (POLLOUT|POLLWRNORM); 783 if (revents == 0) 784 selrecord(td, &efd->efd_sel); 785 mtx_unlock(&efd->efd_lock); 786 787 return (revents); 788 } 789 790 /*ARGSUSED*/ 791 static int 792 eventfd_kqfilter(struct file *fp, struct knote *kn) 793 { 794 struct eventfd *efd; 795 796 efd = fp->f_data; 797 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 798 return (EINVAL); 799 800 mtx_lock(&efd->efd_lock); 801 switch (kn->kn_filter) { 802 case EVFILT_READ: 803 kn->kn_fop = &eventfd_rfiltops; 804 break; 805 case EVFILT_WRITE: 806 kn->kn_fop = &eventfd_wfiltops; 807 break; 808 default: 809 mtx_unlock(&efd->efd_lock); 810 return (EINVAL); 811 } 812 813 kn->kn_hook = efd; 814 knlist_add(&efd->efd_sel.si_note, kn, 1); 815 mtx_unlock(&efd->efd_lock); 816 817 return (0); 818 } 819 820 static void 821 filt_eventfddetach(struct knote *kn) 822 { 823 struct eventfd *efd = kn->kn_hook; 824 825 mtx_lock(&efd->efd_lock); 826 knlist_remove(&efd->efd_sel.si_note, kn, 1); 827 mtx_unlock(&efd->efd_lock); 828 } 829 830 /*ARGSUSED*/ 831 static int 832 filt_eventfdread(struct knote *kn, long hint) 833 { 834 struct eventfd *efd = kn->kn_hook; 835 int ret; 836 837 mtx_assert(&efd->efd_lock, MA_OWNED); 838 ret = (efd->efd_count > 0); 839 840 return (ret); 841 } 842 843 /*ARGSUSED*/ 844 static int 845 filt_eventfdwrite(struct knote *kn, long hint) 846 { 847 struct eventfd *efd = kn->kn_hook; 848 int ret; 849 850 mtx_assert(&efd->efd_lock, MA_OWNED); 851 ret = (UINT64_MAX - 1 > efd->efd_count); 852 853 return (ret); 854 } 855 856 /*ARGSUSED*/ 857 static int 858 eventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred, 859 struct thread *td) 860 { 861 862 return (ENXIO); 863 } 864 865 /*ARGSUSED*/ 866 static int 867 eventfd_ioctl(struct file *fp, u_long cmd, void *data, 868 struct ucred *active_cred, struct thread *td) 869 { 870 871 return (ENXIO); 872 } 873 874 /*ARGSUSED*/ 875 static int 876 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 877 struct thread *td) 878 { 879 880 return (ENXIO); 881 } 882 883 /*ARGSUSED*/ 884 static int 885 eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 886 { 887 888 kif->kf_type = KF_TYPE_UNKNOWN; 889 return (0); 890 } 891