1 /*- 2 * Copyright (c) 2007 Roman Divacky 3 * Copyright (c) 2014 Dmitry Chagin 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 #include <sys/cdefs.h> 29 __FBSDID("$FreeBSD$"); 30 31 #include "opt_compat.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/imgact.h> 36 #include <sys/kernel.h> 37 #include <sys/limits.h> 38 #include <sys/lock.h> 39 #include <sys/mutex.h> 40 #include <sys/capability.h> 41 #include <sys/types.h> 42 #include <sys/user.h> 43 #include <sys/file.h> 44 #include <sys/filedesc.h> 45 #include <sys/errno.h> 46 #include <sys/event.h> 47 #include <sys/poll.h> 48 #include <sys/proc.h> 49 #include <sys/selinfo.h> 50 #include <sys/sx.h> 51 #include <sys/syscallsubr.h> 52 #include <sys/timespec.h> 53 54 #ifdef COMPAT_LINUX32 55 #include <machine/../linux32/linux.h> 56 #include <machine/../linux32/linux32_proto.h> 57 #else 58 #include <machine/../linux/linux.h> 59 #include <machine/../linux/linux_proto.h> 60 #endif 61 62 #include <compat/linux/linux_emul.h> 63 #include <compat/linux/linux_event.h> 64 #include <compat/linux/linux_file.h> 65 #include <compat/linux/linux_util.h> 66 67 /* 68 * epoll defines 'struct epoll_event' with the field 'data' as 64 bits 69 * on all architectures. But on 32 bit architectures BSD 'struct kevent' only 70 * has 32 bit opaque pointer as 'udata' field. So we can't pass epoll supplied 71 * data verbatuim. Therefore we allocate 64-bit memory block to pass 72 * user supplied data for every file descriptor. 73 */ 74 75 typedef uint64_t epoll_udata_t; 76 77 struct epoll_emuldata { 78 uint32_t fdc; /* epoll udata max index */ 79 epoll_udata_t udata[1]; /* epoll user data vector */ 80 }; 81 82 #define EPOLL_DEF_SZ 16 83 #define EPOLL_SIZE(fdn) \ 84 (sizeof(struct epoll_emuldata)+(fdn) * sizeof(epoll_udata_t)) 85 86 struct epoll_event { 87 uint32_t events; 88 epoll_udata_t data; 89 } 90 #if defined(__amd64__) 91 __attribute__((packed)) 92 #endif 93 ; 94 95 #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 96 97 static void epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata); 98 static int epoll_to_kevent(struct thread *td, struct file *epfp, 99 int fd, struct epoll_event *l_event, int *kev_flags, 100 struct kevent *kevent, int *nkevents); 101 static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 102 static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 103 static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 104 static int epoll_delete_event(struct thread *td, struct file *epfp, 105 int fd, int filter); 106 static int epoll_delete_all_events(struct thread *td, struct file *epfp, 107 int fd); 108 109 struct epoll_copyin_args { 110 struct kevent *changelist; 111 }; 112 113 struct epoll_copyout_args { 114 struct epoll_event *leventlist; 115 struct proc *p; 116 uint32_t count; 117 int error; 118 }; 119 120 /* eventfd */ 121 typedef uint64_t eventfd_t; 122 123 static fo_rdwr_t eventfd_read; 124 static fo_rdwr_t eventfd_write; 125 static fo_truncate_t eventfd_truncate; 126 static fo_ioctl_t eventfd_ioctl; 127 static fo_poll_t eventfd_poll; 128 static fo_kqfilter_t eventfd_kqfilter; 129 static fo_stat_t eventfd_stat; 130 static fo_close_t eventfd_close; 131 static fo_fill_kinfo_t eventfd_fill_kinfo; 132 133 static struct fileops eventfdops = { 134 .fo_read = eventfd_read, 135 .fo_write = eventfd_write, 136 .fo_truncate = eventfd_truncate, 137 .fo_ioctl = eventfd_ioctl, 138 .fo_poll = eventfd_poll, 139 .fo_kqfilter = eventfd_kqfilter, 140 .fo_stat = eventfd_stat, 141 .fo_close = eventfd_close, 142 .fo_chmod = invfo_chmod, 143 .fo_chown = invfo_chown, 144 .fo_sendfile = invfo_sendfile, 145 .fo_fill_kinfo = eventfd_fill_kinfo, 146 .fo_flags = DFLAG_PASSABLE 147 }; 148 149 static void filt_eventfddetach(struct knote *kn); 150 static int filt_eventfdread(struct knote *kn, long hint); 151 static int filt_eventfdwrite(struct knote *kn, long hint); 152 153 static struct filterops eventfd_rfiltops = { 154 .f_isfd = 1, 155 .f_detach = filt_eventfddetach, 156 .f_event = filt_eventfdread 157 }; 158 static struct filterops eventfd_wfiltops = { 159 .f_isfd = 1, 160 .f_detach = filt_eventfddetach, 161 .f_event = filt_eventfdwrite 162 }; 163 164 struct eventfd { 165 eventfd_t efd_count; 166 uint32_t efd_flags; 167 struct selinfo efd_sel; 168 struct mtx efd_lock; 169 }; 170 171 static int eventfd_create(struct thread *td, uint32_t initval, int flags); 172 173 174 static void 175 epoll_fd_install(struct thread *td, int fd, epoll_udata_t udata) 176 { 177 struct linux_pemuldata *pem; 178 struct epoll_emuldata *emd; 179 struct proc *p; 180 181 p = td->td_proc; 182 183 pem = pem_find(p); 184 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 185 186 LINUX_PEM_XLOCK(pem); 187 if (pem->epoll == NULL) { 188 emd = malloc(EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 189 emd->fdc = fd; 190 pem->epoll = emd; 191 } else { 192 emd = pem->epoll; 193 if (fd > emd->fdc) { 194 emd = realloc(emd, EPOLL_SIZE(fd), M_EPOLL, M_WAITOK); 195 emd->fdc = fd; 196 pem->epoll = emd; 197 } 198 } 199 emd->udata[fd] = udata; 200 LINUX_PEM_XUNLOCK(pem); 201 } 202 203 static int 204 epoll_create_common(struct thread *td, int flags) 205 { 206 int error; 207 208 error = kern_kqueue(td, flags, NULL); 209 if (error) 210 return (error); 211 212 epoll_fd_install(td, EPOLL_DEF_SZ, 0); 213 214 return (0); 215 } 216 217 int 218 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 219 { 220 221 /* 222 * args->size is unused. Linux just tests it 223 * and then forgets it as well. 224 */ 225 if (args->size <= 0) 226 return (EINVAL); 227 228 return (epoll_create_common(td, 0)); 229 } 230 231 int 232 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 233 { 234 int flags; 235 236 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 237 return (EINVAL); 238 239 flags = 0; 240 if ((args->flags & LINUX_O_CLOEXEC) != 0) 241 flags |= O_CLOEXEC; 242 243 return (epoll_create_common(td, flags)); 244 } 245 246 /* Structure converting function from epoll to kevent. */ 247 static int 248 epoll_to_kevent(struct thread *td, struct file *epfp, 249 int fd, struct epoll_event *l_event, int *kev_flags, 250 struct kevent *kevent, int *nkevents) 251 { 252 uint32_t levents = l_event->events; 253 struct linux_pemuldata *pem; 254 struct proc *p; 255 256 /* flags related to how event is registered */ 257 if ((levents & LINUX_EPOLLONESHOT) != 0) 258 *kev_flags |= EV_ONESHOT; 259 if ((levents & LINUX_EPOLLET) != 0) 260 *kev_flags |= EV_CLEAR; 261 if ((levents & LINUX_EPOLLERR) != 0) 262 *kev_flags |= EV_ERROR; 263 if ((levents & LINUX_EPOLLRDHUP) != 0) 264 *kev_flags |= EV_EOF; 265 266 /* flags related to what event is registered */ 267 if ((levents & LINUX_EPOLL_EVRD) != 0) { 268 EV_SET(kevent++, fd, EVFILT_READ, *kev_flags, 0, 0, 0); 269 ++(*nkevents); 270 } 271 if ((levents & LINUX_EPOLL_EVWR) != 0) { 272 EV_SET(kevent++, fd, EVFILT_WRITE, *kev_flags, 0, 0, 0); 273 ++(*nkevents); 274 } 275 276 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 277 p = td->td_proc; 278 279 pem = pem_find(p); 280 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 281 KASSERT(pem->epoll != NULL, ("epoll proc epolldata not found.\n")); 282 283 LINUX_PEM_XLOCK(pem); 284 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 285 pem->flags |= LINUX_XUNSUP_EPOLL; 286 LINUX_PEM_XUNLOCK(pem); 287 linux_msg(td, "epoll_ctl unsupported flags: 0x%x\n", 288 levents); 289 } else 290 LINUX_PEM_XUNLOCK(pem); 291 return (EINVAL); 292 } 293 294 return (0); 295 } 296 297 /* 298 * Structure converting function from kevent to epoll. In a case 299 * this is called on error in registration we store the error in 300 * event->data and pick it up later in linux_epoll_ctl(). 301 */ 302 static void 303 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 304 { 305 306 if ((kevent->flags & EV_ERROR) != 0) { 307 l_event->events = LINUX_EPOLLERR; 308 return; 309 } 310 311 switch (kevent->filter) { 312 case EVFILT_READ: 313 l_event->events = LINUX_EPOLLIN|LINUX_EPOLLRDNORM|LINUX_EPOLLPRI; 314 if ((kevent->flags & EV_EOF) != 0) 315 l_event->events |= LINUX_EPOLLRDHUP; 316 break; 317 case EVFILT_WRITE: 318 l_event->events = LINUX_EPOLLOUT|LINUX_EPOLLWRNORM; 319 break; 320 } 321 } 322 323 /* 324 * Copyout callback used by kevent. This converts kevent 325 * events to epoll events and copies them back to the 326 * userspace. This is also called on error on registering 327 * of the filter. 328 */ 329 static int 330 epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 331 { 332 struct epoll_copyout_args *args; 333 struct linux_pemuldata *pem; 334 struct epoll_emuldata *emd; 335 struct epoll_event *eep; 336 int error, fd, i; 337 338 args = (struct epoll_copyout_args*) arg; 339 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 340 341 pem = pem_find(args->p); 342 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 343 LINUX_PEM_SLOCK(pem); 344 emd = pem->epoll; 345 KASSERT(emd != NULL, ("epoll proc epolldata not found.\n")); 346 347 for (i = 0; i < count; i++) { 348 kevent_to_epoll(&kevp[i], &eep[i]); 349 350 fd = kevp[i].ident; 351 KASSERT(fd <= emd->fdc, ("epoll user data vector" 352 " is too small.\n")); 353 eep[i].data = emd->udata[fd]; 354 } 355 LINUX_PEM_SUNLOCK(pem); 356 357 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 358 if (error == 0) { 359 args->leventlist += count; 360 args->count += count; 361 } else if (args->error == 0) 362 args->error = error; 363 364 free(eep, M_EPOLL); 365 return (error); 366 } 367 368 /* 369 * Copyin callback used by kevent. This copies already 370 * converted filters from kernel memory to the kevent 371 * internal kernel memory. Hence the memcpy instead of 372 * copyin. 373 */ 374 static int 375 epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 376 { 377 struct epoll_copyin_args *args; 378 379 args = (struct epoll_copyin_args*) arg; 380 381 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 382 args->changelist += count; 383 384 return (0); 385 } 386 387 /* 388 * Load epoll filter, convert it to kevent filter 389 * and load it into kevent subsystem. 390 */ 391 int 392 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 393 { 394 struct file *epfp, *fp; 395 struct epoll_copyin_args ciargs; 396 struct kevent kev[2]; 397 struct kevent_copyops k_ops = { &ciargs, 398 NULL, 399 epoll_kev_copyin}; 400 struct epoll_event le; 401 cap_rights_t rights; 402 int kev_flags; 403 int nchanges = 0; 404 int error; 405 406 if (args->op != LINUX_EPOLL_CTL_DEL) { 407 error = copyin(args->event, &le, sizeof(le)); 408 if (error != 0) 409 return (error); 410 } 411 412 error = fget(td, args->epfd, 413 cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &epfp); 414 if (error != 0) 415 return (error); 416 if (epfp->f_type != DTYPE_KQUEUE) 417 goto leave1; 418 419 /* Protect user data vector from incorrectly supplied fd. */ 420 error = fget(td, args->fd, cap_rights_init(&rights, CAP_POLL_EVENT), &fp); 421 if (error != 0) 422 goto leave1; 423 424 /* Linux disallows spying on himself */ 425 if (epfp == fp) { 426 error = EINVAL; 427 goto leave0; 428 } 429 430 ciargs.changelist = kev; 431 432 switch (args->op) { 433 case LINUX_EPOLL_CTL_MOD: 434 /* 435 * We don't memorize which events were set for this FD 436 * on this level, so just delete all we could have set: 437 * EVFILT_READ and EVFILT_WRITE, ignoring any errors 438 */ 439 error = epoll_delete_all_events(td, epfp, args->fd); 440 if (error) 441 goto leave0; 442 /* FALLTHROUGH */ 443 444 case LINUX_EPOLL_CTL_ADD: 445 kev_flags = EV_ADD | EV_ENABLE; 446 break; 447 448 case LINUX_EPOLL_CTL_DEL: 449 /* CTL_DEL means unregister this fd with this epoll */ 450 error = epoll_delete_all_events(td, epfp, args->fd); 451 goto leave0; 452 453 default: 454 error = EINVAL; 455 goto leave0; 456 } 457 458 error = epoll_to_kevent(td, epfp, args->fd, &le, &kev_flags, 459 kev, &nchanges); 460 if (error) 461 goto leave0; 462 463 epoll_fd_install(td, args->fd, le.data); 464 465 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 466 467 leave0: 468 fdrop(fp, td); 469 470 leave1: 471 fdrop(epfp, td); 472 return (error); 473 } 474 475 /* 476 * Wait for a filter to be triggered on the epoll file descriptor. 477 */ 478 static int 479 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 480 int maxevents, int timeout, sigset_t *uset) 481 { 482 struct file *epfp; 483 struct timespec ts, *tsp; 484 cap_rights_t rights; 485 struct epoll_copyout_args coargs; 486 struct kevent_copyops k_ops = { &coargs, 487 epoll_kev_copyout, 488 NULL}; 489 int error; 490 491 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 492 return (EINVAL); 493 494 if (uset != NULL) { 495 error = kern_sigprocmask(td, SIG_SETMASK, uset, 496 &td->td_oldsigmask, 0); 497 if (error != 0) 498 return (error); 499 td->td_pflags |= TDP_OLDMASK; 500 /* 501 * Make sure that ast() is called on return to 502 * usermode and TDP_OLDMASK is cleared, restoring old 503 * sigmask. 504 */ 505 thread_lock(td); 506 td->td_flags |= TDF_ASTPENDING; 507 thread_unlock(td); 508 } 509 510 error = fget(td, epfd, 511 cap_rights_init(&rights, CAP_KQUEUE_EVENT), &epfp); 512 if (error != 0) 513 return (error); 514 515 coargs.leventlist = events; 516 coargs.p = td->td_proc; 517 coargs.count = 0; 518 coargs.error = 0; 519 520 if (timeout != -1) { 521 if (timeout < 0) { 522 error = EINVAL; 523 goto leave; 524 } 525 /* Convert from milliseconds to timespec. */ 526 ts.tv_sec = timeout / 1000; 527 ts.tv_nsec = (timeout % 1000) * 1000000; 528 tsp = &ts; 529 } else { 530 tsp = NULL; 531 } 532 533 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 534 if (error == 0 && coargs.error != 0) 535 error = coargs.error; 536 537 /* 538 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 539 * Maybe we should translate that but I don't think it matters at all. 540 */ 541 if (error == 0) 542 td->td_retval[0] = coargs.count; 543 leave: 544 fdrop(epfp, td); 545 return (error); 546 } 547 548 int 549 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 550 { 551 552 return (linux_epoll_wait_common(td, args->epfd, args->events, 553 args->maxevents, args->timeout, NULL)); 554 } 555 556 int 557 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 558 { 559 sigset_t mask, *pmask; 560 l_sigset_t lmask; 561 int error; 562 563 if (args->mask != NULL) { 564 error = copyin(args->mask, &lmask, sizeof(l_sigset_t)); 565 if (error != 0) 566 return (error); 567 linux_to_bsd_sigset(&lmask, &mask); 568 pmask = &mask; 569 } else 570 pmask = NULL; 571 return (linux_epoll_wait_common(td, args->epfd, args->events, 572 args->maxevents, args->timeout, pmask)); 573 } 574 575 static int 576 epoll_delete_event(struct thread *td, struct file *epfp, int fd, int filter) 577 { 578 struct epoll_copyin_args ciargs; 579 struct kevent kev; 580 struct kevent_copyops k_ops = { &ciargs, 581 NULL, 582 epoll_kev_copyin}; 583 int error; 584 585 ciargs.changelist = &kev; 586 EV_SET(&kev, fd, filter, EV_DELETE | EV_DISABLE, 0, 0, 0); 587 588 error = kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL); 589 590 /* 591 * here we ignore ENONT, because we don't keep track of events here 592 */ 593 if (error == ENOENT) 594 error = 0; 595 return (error); 596 } 597 598 static int 599 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 600 { 601 int error1, error2; 602 603 error1 = epoll_delete_event(td, epfp, fd, EVFILT_READ); 604 error2 = epoll_delete_event(td, epfp, fd, EVFILT_WRITE); 605 606 /* report any errors we got */ 607 return (error1 == 0 ? error2 : error1); 608 } 609 610 static int 611 eventfd_create(struct thread *td, uint32_t initval, int flags) 612 { 613 struct filedesc *fdp; 614 struct eventfd *efd; 615 struct file *fp; 616 int fflags, fd, error; 617 618 fflags = 0; 619 if ((flags & LINUX_O_CLOEXEC) != 0) 620 fflags |= O_CLOEXEC; 621 622 fdp = td->td_proc->p_fd; 623 error = falloc(td, &fp, &fd, fflags); 624 if (error) 625 return (error); 626 627 efd = malloc(sizeof(*efd), M_EPOLL, M_WAITOK | M_ZERO); 628 efd->efd_flags = flags; 629 efd->efd_count = initval; 630 mtx_init(&efd->efd_lock, "eventfd", NULL, MTX_DEF); 631 632 knlist_init_mtx(&efd->efd_sel.si_note, &efd->efd_lock); 633 634 fflags = FREAD | FWRITE; 635 if ((flags & LINUX_O_NONBLOCK) != 0) 636 fflags |= FNONBLOCK; 637 638 finit(fp, fflags, DTYPE_LINUXEFD, efd, &eventfdops); 639 fdrop(fp, td); 640 641 td->td_retval[0] = fd; 642 return (error); 643 } 644 645 int 646 linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 647 { 648 649 return (eventfd_create(td, args->initval, 0)); 650 } 651 652 int 653 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 654 { 655 656 if ((args->flags & ~(LINUX_O_CLOEXEC|LINUX_O_NONBLOCK|LINUX_EFD_SEMAPHORE)) != 0) 657 return (EINVAL); 658 659 return (eventfd_create(td, args->initval, args->flags)); 660 } 661 662 static int 663 eventfd_close(struct file *fp, struct thread *td) 664 { 665 struct eventfd *efd; 666 667 efd = fp->f_data; 668 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 669 return (EBADF); 670 671 seldrain(&efd->efd_sel); 672 knlist_destroy(&efd->efd_sel.si_note); 673 674 fp->f_ops = &badfileops; 675 mtx_destroy(&efd->efd_lock); 676 free(efd, M_EPOLL); 677 678 return (0); 679 } 680 681 static int 682 eventfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 683 int flags, struct thread *td) 684 { 685 struct eventfd *efd; 686 eventfd_t count; 687 int error; 688 689 efd = fp->f_data; 690 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 691 return (EBADF); 692 693 if (uio->uio_resid < sizeof(eventfd_t)) 694 return (EINVAL); 695 696 error = 0; 697 mtx_lock(&efd->efd_lock); 698 retry: 699 if (efd->efd_count == 0) { 700 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 701 mtx_unlock(&efd->efd_lock); 702 return (EAGAIN); 703 } 704 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, PCATCH, "lefdrd", 0); 705 if (error == 0) 706 goto retry; 707 } 708 if (error == 0) { 709 if ((efd->efd_flags & LINUX_EFD_SEMAPHORE) != 0) { 710 count = 1; 711 --efd->efd_count; 712 } else { 713 count = efd->efd_count; 714 efd->efd_count = 0; 715 } 716 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 717 selwakeup(&efd->efd_sel); 718 wakeup(&efd->efd_count); 719 mtx_unlock(&efd->efd_lock); 720 error = uiomove(&count, sizeof(eventfd_t), uio); 721 } else 722 mtx_unlock(&efd->efd_lock); 723 724 return (error); 725 } 726 727 static int 728 eventfd_write(struct file *fp, struct uio *uio, struct ucred *active_cred, 729 int flags, struct thread *td) 730 { 731 struct eventfd *efd; 732 eventfd_t count; 733 int error; 734 735 efd = fp->f_data; 736 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 737 return (EBADF); 738 739 if (uio->uio_resid < sizeof(eventfd_t)) 740 return (EINVAL); 741 742 error = uiomove(&count, sizeof(eventfd_t), uio); 743 if (error) 744 return (error); 745 if (count == UINT64_MAX) 746 return (EINVAL); 747 748 mtx_lock(&efd->efd_lock); 749 retry: 750 if (UINT64_MAX - efd->efd_count <= count) { 751 if ((efd->efd_flags & LINUX_O_NONBLOCK) != 0) { 752 mtx_unlock(&efd->efd_lock); 753 return (EAGAIN); 754 } 755 error = mtx_sleep(&efd->efd_count, &efd->efd_lock, 756 PCATCH, "lefdwr", 0); 757 if (error == 0) 758 goto retry; 759 } 760 if (error == 0) { 761 efd->efd_count += count; 762 KNOTE_LOCKED(&efd->efd_sel.si_note, 0); 763 selwakeup(&efd->efd_sel); 764 wakeup(&efd->efd_count); 765 } 766 mtx_unlock(&efd->efd_lock); 767 768 return (error); 769 } 770 771 static int 772 eventfd_poll(struct file *fp, int events, struct ucred *active_cred, 773 struct thread *td) 774 { 775 struct eventfd *efd; 776 int revents = 0; 777 778 efd = fp->f_data; 779 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 780 return (POLLERR); 781 782 mtx_lock(&efd->efd_lock); 783 if ((events & (POLLIN|POLLRDNORM)) && efd->efd_count > 0) 784 revents |= events & (POLLIN|POLLRDNORM); 785 if ((events & (POLLOUT|POLLWRNORM)) && UINT64_MAX - 1 > efd->efd_count) 786 revents |= events & (POLLOUT|POLLWRNORM); 787 if (revents == 0) 788 selrecord(td, &efd->efd_sel); 789 mtx_unlock(&efd->efd_lock); 790 791 return (revents); 792 } 793 794 /*ARGSUSED*/ 795 static int 796 eventfd_kqfilter(struct file *fp, struct knote *kn) 797 { 798 struct eventfd *efd; 799 800 efd = fp->f_data; 801 if (fp->f_type != DTYPE_LINUXEFD || efd == NULL) 802 return (EINVAL); 803 804 mtx_lock(&efd->efd_lock); 805 switch (kn->kn_filter) { 806 case EVFILT_READ: 807 kn->kn_fop = &eventfd_rfiltops; 808 break; 809 case EVFILT_WRITE: 810 kn->kn_fop = &eventfd_wfiltops; 811 break; 812 default: 813 mtx_unlock(&efd->efd_lock); 814 return (EINVAL); 815 } 816 817 kn->kn_hook = efd; 818 knlist_add(&efd->efd_sel.si_note, kn, 1); 819 mtx_unlock(&efd->efd_lock); 820 821 return (0); 822 } 823 824 static void 825 filt_eventfddetach(struct knote *kn) 826 { 827 struct eventfd *efd = kn->kn_hook; 828 829 mtx_lock(&efd->efd_lock); 830 knlist_remove(&efd->efd_sel.si_note, kn, 1); 831 mtx_unlock(&efd->efd_lock); 832 } 833 834 /*ARGSUSED*/ 835 static int 836 filt_eventfdread(struct knote *kn, long hint) 837 { 838 struct eventfd *efd = kn->kn_hook; 839 int ret; 840 841 mtx_assert(&efd->efd_lock, MA_OWNED); 842 ret = (efd->efd_count > 0); 843 844 return (ret); 845 } 846 847 /*ARGSUSED*/ 848 static int 849 filt_eventfdwrite(struct knote *kn, long hint) 850 { 851 struct eventfd *efd = kn->kn_hook; 852 int ret; 853 854 mtx_assert(&efd->efd_lock, MA_OWNED); 855 ret = (UINT64_MAX - 1 > efd->efd_count); 856 857 return (ret); 858 } 859 860 /*ARGSUSED*/ 861 static int 862 eventfd_truncate(struct file *fp, off_t length, struct ucred *active_cred, 863 struct thread *td) 864 { 865 866 return (ENXIO); 867 } 868 869 /*ARGSUSED*/ 870 static int 871 eventfd_ioctl(struct file *fp, u_long cmd, void *data, 872 struct ucred *active_cred, struct thread *td) 873 { 874 875 return (ENXIO); 876 } 877 878 /*ARGSUSED*/ 879 static int 880 eventfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred, 881 struct thread *td) 882 { 883 884 return (ENXIO); 885 } 886 887 /*ARGSUSED*/ 888 static int 889 eventfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 890 { 891 892 kif->kf_type = KF_TYPE_UNKNOWN; 893 return (0); 894 } 895