1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2007 Roman Divacky 5 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_compat.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/imgact.h> 37 #include <sys/kernel.h> 38 #include <sys/limits.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/callout.h> 42 #include <sys/capsicum.h> 43 #include <sys/types.h> 44 #include <sys/user.h> 45 #include <sys/file.h> 46 #include <sys/filedesc.h> 47 #include <sys/filio.h> 48 #include <sys/errno.h> 49 #include <sys/event.h> 50 #include <sys/poll.h> 51 #include <sys/proc.h> 52 #include <sys/selinfo.h> 53 #include <sys/specialfd.h> 54 #include <sys/sx.h> 55 #include <sys/syscallsubr.h> 56 #include <sys/timespec.h> 57 #include <sys/eventfd.h> 58 59 #ifdef COMPAT_LINUX32 60 #include <machine/../linux32/linux.h> 61 #include <machine/../linux32/linux32_proto.h> 62 #else 63 #include <machine/../linux/linux.h> 64 #include <machine/../linux/linux_proto.h> 65 #endif 66 67 #include <compat/linux/linux_emul.h> 68 #include <compat/linux/linux_event.h> 69 #include <compat/linux/linux_file.h> 70 #include <compat/linux/linux_timer.h> 71 #include <compat/linux/linux_util.h> 72 73 typedef uint64_t epoll_udata_t; 74 75 struct epoll_event { 76 uint32_t events; 77 epoll_udata_t data; 78 } 79 #if defined(__amd64__) 80 __attribute__((packed)) 81 #endif 82 ; 83 84 #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 85 86 static int epoll_to_kevent(struct thread *td, int fd, 87 struct epoll_event *l_event, struct kevent *kevent, 88 int *nkevents); 89 static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 90 static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 91 static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 92 static int epoll_register_kevent(struct thread *td, struct file *epfp, 93 int fd, int filter, unsigned int flags); 94 static int epoll_fd_registered(struct thread *td, struct file *epfp, 95 int fd); 96 static int epoll_delete_all_events(struct thread *td, struct file *epfp, 97 int fd); 98 99 struct epoll_copyin_args { 100 struct kevent *changelist; 101 }; 102 103 struct epoll_copyout_args { 104 struct epoll_event *leventlist; 105 struct proc *p; 106 uint32_t count; 107 int error; 108 }; 109 110 /* timerfd */ 111 typedef uint64_t timerfd_t; 112 113 static fo_rdwr_t timerfd_read; 114 static fo_ioctl_t timerfd_ioctl; 115 static fo_poll_t timerfd_poll; 116 static fo_kqfilter_t timerfd_kqfilter; 117 static fo_stat_t timerfd_stat; 118 static fo_close_t timerfd_close; 119 static fo_fill_kinfo_t timerfd_fill_kinfo; 120 121 static struct fileops timerfdops = { 122 .fo_read = timerfd_read, 123 .fo_write = invfo_rdwr, 124 .fo_truncate = invfo_truncate, 125 .fo_ioctl = timerfd_ioctl, 126 .fo_poll = timerfd_poll, 127 .fo_kqfilter = timerfd_kqfilter, 128 .fo_stat = timerfd_stat, 129 .fo_close = timerfd_close, 130 .fo_chmod = invfo_chmod, 131 .fo_chown = invfo_chown, 132 .fo_sendfile = invfo_sendfile, 133 .fo_fill_kinfo = timerfd_fill_kinfo, 134 .fo_flags = DFLAG_PASSABLE 135 }; 136 137 static void filt_timerfddetach(struct knote *kn); 138 static int filt_timerfdread(struct knote *kn, long hint); 139 140 static struct filterops timerfd_rfiltops = { 141 .f_isfd = 1, 142 .f_detach = filt_timerfddetach, 143 .f_event = filt_timerfdread 144 }; 145 146 struct timerfd { 147 clockid_t tfd_clockid; 148 struct itimerspec tfd_time; 149 struct callout tfd_callout; 150 timerfd_t tfd_count; 151 bool tfd_canceled; 152 struct selinfo tfd_sel; 153 struct mtx tfd_lock; 154 }; 155 156 static void linux_timerfd_expire(void *); 157 static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); 158 159 static int 160 epoll_create_common(struct thread *td, int flags) 161 { 162 163 return (kern_kqueue(td, flags, NULL)); 164 } 165 166 #ifdef LINUX_LEGACY_SYSCALLS 167 int 168 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 169 { 170 171 /* 172 * args->size is unused. Linux just tests it 173 * and then forgets it as well. 174 */ 175 if (args->size <= 0) 176 return (EINVAL); 177 178 return (epoll_create_common(td, 0)); 179 } 180 #endif 181 182 int 183 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 184 { 185 int flags; 186 187 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 188 return (EINVAL); 189 190 flags = 0; 191 if ((args->flags & LINUX_O_CLOEXEC) != 0) 192 flags |= O_CLOEXEC; 193 194 return (epoll_create_common(td, flags)); 195 } 196 197 /* Structure converting function from epoll to kevent. */ 198 static int 199 epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event, 200 struct kevent *kevent, int *nkevents) 201 { 202 uint32_t levents = l_event->events; 203 struct linux_pemuldata *pem; 204 struct proc *p; 205 unsigned short kev_flags = EV_ADD | EV_ENABLE; 206 207 /* flags related to how event is registered */ 208 if ((levents & LINUX_EPOLLONESHOT) != 0) 209 kev_flags |= EV_DISPATCH; 210 if ((levents & LINUX_EPOLLET) != 0) 211 kev_flags |= EV_CLEAR; 212 if ((levents & LINUX_EPOLLERR) != 0) 213 kev_flags |= EV_ERROR; 214 if ((levents & LINUX_EPOLLRDHUP) != 0) 215 kev_flags |= EV_EOF; 216 217 /* flags related to what event is registered */ 218 if ((levents & LINUX_EPOLL_EVRD) != 0) { 219 EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0); 220 kevent->ext[0] = l_event->data; 221 ++kevent; 222 ++(*nkevents); 223 } 224 if ((levents & LINUX_EPOLL_EVWR) != 0) { 225 EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0); 226 kevent->ext[0] = l_event->data; 227 ++kevent; 228 ++(*nkevents); 229 } 230 /* zero event mask is legal */ 231 if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) { 232 EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0); 233 ++(*nkevents); 234 } 235 236 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 237 p = td->td_proc; 238 239 pem = pem_find(p); 240 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 241 242 LINUX_PEM_XLOCK(pem); 243 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 244 pem->flags |= LINUX_XUNSUP_EPOLL; 245 LINUX_PEM_XUNLOCK(pem); 246 linux_msg(td, "epoll_ctl unsupported flags: 0x%x", 247 levents); 248 } else 249 LINUX_PEM_XUNLOCK(pem); 250 return (EINVAL); 251 } 252 253 return (0); 254 } 255 256 /* 257 * Structure converting function from kevent to epoll. In a case 258 * this is called on error in registration we store the error in 259 * event->data and pick it up later in linux_epoll_ctl(). 260 */ 261 static void 262 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 263 { 264 265 l_event->data = kevent->ext[0]; 266 267 if ((kevent->flags & EV_ERROR) != 0) { 268 l_event->events = LINUX_EPOLLERR; 269 return; 270 } 271 272 /* XXX EPOLLPRI, EPOLLHUP */ 273 switch (kevent->filter) { 274 case EVFILT_READ: 275 l_event->events = LINUX_EPOLLIN; 276 if ((kevent->flags & EV_EOF) != 0) 277 l_event->events |= LINUX_EPOLLRDHUP; 278 break; 279 case EVFILT_WRITE: 280 l_event->events = LINUX_EPOLLOUT; 281 break; 282 } 283 } 284 285 /* 286 * Copyout callback used by kevent. This converts kevent 287 * events to epoll events and copies them back to the 288 * userspace. This is also called on error on registering 289 * of the filter. 290 */ 291 static int 292 epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 293 { 294 struct epoll_copyout_args *args; 295 struct epoll_event *eep; 296 int error, i; 297 298 args = (struct epoll_copyout_args*) arg; 299 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 300 301 for (i = 0; i < count; i++) 302 kevent_to_epoll(&kevp[i], &eep[i]); 303 304 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 305 if (error == 0) { 306 args->leventlist += count; 307 args->count += count; 308 } else if (args->error == 0) 309 args->error = error; 310 311 free(eep, M_EPOLL); 312 return (error); 313 } 314 315 /* 316 * Copyin callback used by kevent. This copies already 317 * converted filters from kernel memory to the kevent 318 * internal kernel memory. Hence the memcpy instead of 319 * copyin. 320 */ 321 static int 322 epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 323 { 324 struct epoll_copyin_args *args; 325 326 args = (struct epoll_copyin_args*) arg; 327 328 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 329 args->changelist += count; 330 331 return (0); 332 } 333 334 /* 335 * Load epoll filter, convert it to kevent filter 336 * and load it into kevent subsystem. 337 */ 338 int 339 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 340 { 341 struct file *epfp, *fp; 342 struct epoll_copyin_args ciargs; 343 struct kevent kev[2]; 344 struct kevent_copyops k_ops = { &ciargs, 345 NULL, 346 epoll_kev_copyin}; 347 struct epoll_event le; 348 cap_rights_t rights; 349 int nchanges = 0; 350 int error; 351 352 if (args->op != LINUX_EPOLL_CTL_DEL) { 353 error = copyin(args->event, &le, sizeof(le)); 354 if (error != 0) 355 return (error); 356 } 357 358 error = fget(td, args->epfd, 359 cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), &epfp); 360 if (error != 0) 361 return (error); 362 if (epfp->f_type != DTYPE_KQUEUE) { 363 error = EINVAL; 364 goto leave1; 365 } 366 367 /* Protect user data vector from incorrectly supplied fd. */ 368 error = fget(td, args->fd, 369 cap_rights_init_one(&rights, CAP_POLL_EVENT), &fp); 370 if (error != 0) 371 goto leave1; 372 373 /* Linux disallows spying on himself */ 374 if (epfp == fp) { 375 error = EINVAL; 376 goto leave0; 377 } 378 379 ciargs.changelist = kev; 380 381 if (args->op != LINUX_EPOLL_CTL_DEL) { 382 error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges); 383 if (error != 0) 384 goto leave0; 385 } 386 387 switch (args->op) { 388 case LINUX_EPOLL_CTL_MOD: 389 error = epoll_delete_all_events(td, epfp, args->fd); 390 if (error != 0) 391 goto leave0; 392 break; 393 394 case LINUX_EPOLL_CTL_ADD: 395 if (epoll_fd_registered(td, epfp, args->fd)) { 396 error = EEXIST; 397 goto leave0; 398 } 399 break; 400 401 case LINUX_EPOLL_CTL_DEL: 402 /* CTL_DEL means unregister this fd with this epoll */ 403 error = epoll_delete_all_events(td, epfp, args->fd); 404 goto leave0; 405 406 default: 407 error = EINVAL; 408 goto leave0; 409 } 410 411 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 412 413 leave0: 414 fdrop(fp, td); 415 416 leave1: 417 fdrop(epfp, td); 418 return (error); 419 } 420 421 /* 422 * Wait for a filter to be triggered on the epoll file descriptor. 423 */ 424 static int 425 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 426 int maxevents, int timeout, sigset_t *uset) 427 { 428 struct epoll_copyout_args coargs; 429 struct kevent_copyops k_ops = { &coargs, 430 epoll_kev_copyout, 431 NULL}; 432 struct timespec ts, *tsp; 433 cap_rights_t rights; 434 struct file *epfp; 435 sigset_t omask; 436 int error; 437 438 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 439 return (EINVAL); 440 441 error = fget(td, epfd, 442 cap_rights_init_one(&rights, CAP_KQUEUE_EVENT), &epfp); 443 if (error != 0) 444 return (error); 445 if (epfp->f_type != DTYPE_KQUEUE) { 446 error = EINVAL; 447 goto leave; 448 } 449 if (uset != NULL) { 450 error = kern_sigprocmask(td, SIG_SETMASK, uset, 451 &omask, 0); 452 if (error != 0) 453 goto leave; 454 td->td_pflags |= TDP_OLDMASK; 455 /* 456 * Make sure that ast() is called on return to 457 * usermode and TDP_OLDMASK is cleared, restoring old 458 * sigmask. 459 */ 460 thread_lock(td); 461 td->td_flags |= TDF_ASTPENDING; 462 thread_unlock(td); 463 } 464 465 coargs.leventlist = events; 466 coargs.p = td->td_proc; 467 coargs.count = 0; 468 coargs.error = 0; 469 470 /* 471 * Linux epoll_wait(2) man page states that timeout of -1 causes caller 472 * to block indefinitely. Real implementation does it if any negative 473 * timeout value is passed. 474 */ 475 if (timeout >= 0) { 476 /* Convert from milliseconds to timespec. */ 477 ts.tv_sec = timeout / 1000; 478 ts.tv_nsec = (timeout % 1000) * 1000000; 479 tsp = &ts; 480 } else { 481 tsp = NULL; 482 } 483 484 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 485 if (error == 0 && coargs.error != 0) 486 error = coargs.error; 487 488 /* 489 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 490 * Maybe we should translate that but I don't think it matters at all. 491 */ 492 if (error == 0) 493 td->td_retval[0] = coargs.count; 494 495 if (uset != NULL) 496 error = kern_sigprocmask(td, SIG_SETMASK, &omask, 497 NULL, 0); 498 leave: 499 fdrop(epfp, td); 500 return (error); 501 } 502 503 #ifdef LINUX_LEGACY_SYSCALLS 504 int 505 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 506 { 507 508 return (linux_epoll_wait_common(td, args->epfd, args->events, 509 args->maxevents, args->timeout, NULL)); 510 } 511 #endif 512 513 int 514 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 515 { 516 sigset_t mask, *pmask; 517 l_sigset_t lmask; 518 int error; 519 520 if (args->mask != NULL) { 521 if (args->sigsetsize != sizeof(l_sigset_t)) 522 return (EINVAL); 523 error = copyin(args->mask, &lmask, sizeof(l_sigset_t)); 524 if (error != 0) 525 return (error); 526 linux_to_bsd_sigset(&lmask, &mask); 527 pmask = &mask; 528 } else 529 pmask = NULL; 530 return (linux_epoll_wait_common(td, args->epfd, args->events, 531 args->maxevents, args->timeout, pmask)); 532 } 533 534 static int 535 epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter, 536 unsigned int flags) 537 { 538 struct epoll_copyin_args ciargs; 539 struct kevent kev; 540 struct kevent_copyops k_ops = { &ciargs, 541 NULL, 542 epoll_kev_copyin}; 543 544 ciargs.changelist = &kev; 545 EV_SET(&kev, fd, filter, flags, 0, 0, 0); 546 547 return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL)); 548 } 549 550 static int 551 epoll_fd_registered(struct thread *td, struct file *epfp, int fd) 552 { 553 /* 554 * Set empty filter flags to avoid accidental modification of already 555 * registered events. In the case of event re-registration: 556 * 1. If event does not exists kevent() does nothing and returns ENOENT 557 * 2. If event does exists, it's enabled/disabled state is preserved 558 * but fflags, data and udata fields are overwritten. So we can not 559 * set socket lowats and store user's context pointer in udata. 560 */ 561 if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT || 562 epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT) 563 return (1); 564 565 return (0); 566 } 567 568 static int 569 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 570 { 571 int error1, error2; 572 573 error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE); 574 error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE); 575 576 /* return 0 if at least one result positive */ 577 return (error1 == 0 ? 0 : error2); 578 } 579 580 #ifdef LINUX_LEGACY_SYSCALLS 581 int 582 linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 583 { 584 struct specialfd_eventfd ae; 585 586 bzero(&ae, sizeof(ae)); 587 ae.initval = args->initval; 588 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 589 } 590 #endif 591 592 int 593 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 594 { 595 struct specialfd_eventfd ae; 596 int flags; 597 598 if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK | 599 LINUX_EFD_SEMAPHORE)) != 0) 600 return (EINVAL); 601 flags = 0; 602 if ((args->flags & LINUX_O_CLOEXEC) != 0) 603 flags |= EFD_CLOEXEC; 604 if ((args->flags & LINUX_O_NONBLOCK) != 0) 605 flags |= EFD_NONBLOCK; 606 if ((args->flags & LINUX_EFD_SEMAPHORE) != 0) 607 flags |= EFD_SEMAPHORE; 608 609 bzero(&ae, sizeof(ae)); 610 ae.flags = flags; 611 ae.initval = args->initval; 612 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 613 } 614 615 int 616 linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args) 617 { 618 struct filedesc *fdp; 619 struct timerfd *tfd; 620 struct file *fp; 621 clockid_t clockid; 622 int fflags, fd, error; 623 624 if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0) 625 return (EINVAL); 626 627 error = linux_to_native_clockid(&clockid, args->clockid); 628 if (error != 0) 629 return (error); 630 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 631 return (EINVAL); 632 633 fflags = 0; 634 if ((args->flags & LINUX_TFD_CLOEXEC) != 0) 635 fflags |= O_CLOEXEC; 636 637 fdp = td->td_proc->p_fd; 638 error = falloc(td, &fp, &fd, fflags); 639 if (error != 0) 640 return (error); 641 642 tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO); 643 tfd->tfd_clockid = clockid; 644 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); 645 646 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); 647 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); 648 649 fflags = FREAD; 650 if ((args->flags & LINUX_O_NONBLOCK) != 0) 651 fflags |= FNONBLOCK; 652 653 finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops); 654 fdrop(fp, td); 655 656 td->td_retval[0] = fd; 657 return (error); 658 } 659 660 static int 661 timerfd_close(struct file *fp, struct thread *td) 662 { 663 struct timerfd *tfd; 664 665 tfd = fp->f_data; 666 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 667 return (EINVAL); 668 669 timespecclear(&tfd->tfd_time.it_value); 670 timespecclear(&tfd->tfd_time.it_interval); 671 672 callout_drain(&tfd->tfd_callout); 673 674 seldrain(&tfd->tfd_sel); 675 knlist_destroy(&tfd->tfd_sel.si_note); 676 677 fp->f_ops = &badfileops; 678 mtx_destroy(&tfd->tfd_lock); 679 free(tfd, M_EPOLL); 680 681 return (0); 682 } 683 684 static int 685 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 686 int flags, struct thread *td) 687 { 688 struct timerfd *tfd; 689 timerfd_t count; 690 int error; 691 692 tfd = fp->f_data; 693 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 694 return (EINVAL); 695 696 if (uio->uio_resid < sizeof(timerfd_t)) 697 return (EINVAL); 698 699 error = 0; 700 mtx_lock(&tfd->tfd_lock); 701 retry: 702 if (tfd->tfd_canceled) { 703 tfd->tfd_count = 0; 704 mtx_unlock(&tfd->tfd_lock); 705 return (ECANCELED); 706 } 707 if (tfd->tfd_count == 0) { 708 if ((fp->f_flag & FNONBLOCK) != 0) { 709 mtx_unlock(&tfd->tfd_lock); 710 return (EAGAIN); 711 } 712 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0); 713 if (error == 0) 714 goto retry; 715 } 716 if (error == 0) { 717 count = tfd->tfd_count; 718 tfd->tfd_count = 0; 719 mtx_unlock(&tfd->tfd_lock); 720 error = uiomove(&count, sizeof(timerfd_t), uio); 721 } else 722 mtx_unlock(&tfd->tfd_lock); 723 724 return (error); 725 } 726 727 static int 728 timerfd_poll(struct file *fp, int events, struct ucred *active_cred, 729 struct thread *td) 730 { 731 struct timerfd *tfd; 732 int revents = 0; 733 734 tfd = fp->f_data; 735 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 736 return (POLLERR); 737 738 mtx_lock(&tfd->tfd_lock); 739 if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0) 740 revents |= events & (POLLIN|POLLRDNORM); 741 if (revents == 0) 742 selrecord(td, &tfd->tfd_sel); 743 mtx_unlock(&tfd->tfd_lock); 744 745 return (revents); 746 } 747 748 static int 749 timerfd_kqfilter(struct file *fp, struct knote *kn) 750 { 751 struct timerfd *tfd; 752 753 tfd = fp->f_data; 754 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 755 return (EINVAL); 756 757 if (kn->kn_filter == EVFILT_READ) 758 kn->kn_fop = &timerfd_rfiltops; 759 else 760 return (EINVAL); 761 762 kn->kn_hook = tfd; 763 knlist_add(&tfd->tfd_sel.si_note, kn, 0); 764 765 return (0); 766 } 767 768 static void 769 filt_timerfddetach(struct knote *kn) 770 { 771 struct timerfd *tfd = kn->kn_hook; 772 773 mtx_lock(&tfd->tfd_lock); 774 knlist_remove(&tfd->tfd_sel.si_note, kn, 1); 775 mtx_unlock(&tfd->tfd_lock); 776 } 777 778 static int 779 filt_timerfdread(struct knote *kn, long hint) 780 { 781 struct timerfd *tfd = kn->kn_hook; 782 783 return (tfd->tfd_count > 0); 784 } 785 786 static int 787 timerfd_ioctl(struct file *fp, u_long cmd, void *data, 788 struct ucred *active_cred, struct thread *td) 789 { 790 791 if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD) 792 return (EINVAL); 793 794 switch (cmd) { 795 case FIONBIO: 796 case FIOASYNC: 797 return (0); 798 } 799 800 return (ENOTTY); 801 } 802 803 static int 804 timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 805 { 806 807 return (ENXIO); 808 } 809 810 static int 811 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 812 { 813 814 kif->kf_type = KF_TYPE_UNKNOWN; 815 return (0); 816 } 817 818 static void 819 linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts) 820 { 821 822 if (tfd->tfd_clockid == CLOCK_REALTIME) 823 getnanotime(ts); 824 else /* CLOCK_MONOTONIC */ 825 getnanouptime(ts); 826 } 827 828 static void 829 linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots) 830 { 831 struct timespec cts; 832 833 linux_timerfd_clocktime(tfd, &cts); 834 *ots = tfd->tfd_time; 835 if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) { 836 timespecsub(&ots->it_value, &cts, &ots->it_value); 837 if (ots->it_value.tv_sec < 0 || 838 (ots->it_value.tv_sec == 0 && 839 ots->it_value.tv_nsec == 0)) { 840 ots->it_value.tv_sec = 0; 841 ots->it_value.tv_nsec = 1; 842 } 843 } 844 } 845 846 int 847 linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args) 848 { 849 struct l_itimerspec lots; 850 struct itimerspec ots; 851 struct timerfd *tfd; 852 struct file *fp; 853 int error; 854 855 error = fget(td, args->fd, &cap_read_rights, &fp); 856 if (error != 0) 857 return (error); 858 tfd = fp->f_data; 859 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 860 error = EINVAL; 861 goto out; 862 } 863 864 mtx_lock(&tfd->tfd_lock); 865 linux_timerfd_curval(tfd, &ots); 866 mtx_unlock(&tfd->tfd_lock); 867 868 error = native_to_linux_itimerspec(&lots, &ots); 869 if (error == 0) 870 error = copyout(&lots, args->old_value, sizeof(lots)); 871 872 out: 873 fdrop(fp, td); 874 return (error); 875 } 876 877 int 878 linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args) 879 { 880 struct l_itimerspec lots; 881 struct itimerspec nts, ots; 882 struct timespec cts, ts; 883 struct timerfd *tfd; 884 struct timeval tv; 885 struct file *fp; 886 int error; 887 888 if ((args->flags & ~LINUX_TFD_SETTIME_FLAGS) != 0) 889 return (EINVAL); 890 891 error = copyin(args->new_value, &lots, sizeof(lots)); 892 if (error != 0) 893 return (error); 894 error = linux_to_native_itimerspec(&nts, &lots); 895 if (error != 0) 896 return (error); 897 898 error = fget(td, args->fd, &cap_write_rights, &fp); 899 if (error != 0) 900 return (error); 901 tfd = fp->f_data; 902 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 903 error = EINVAL; 904 goto out; 905 } 906 907 mtx_lock(&tfd->tfd_lock); 908 if (!timespecisset(&nts.it_value)) 909 timespecclear(&nts.it_interval); 910 if (args->old_value != NULL) 911 linux_timerfd_curval(tfd, &ots); 912 913 tfd->tfd_time = nts; 914 tfd->tfd_count = 0; 915 if (timespecisset(&nts.it_value)) { 916 linux_timerfd_clocktime(tfd, &cts); 917 ts = nts.it_value; 918 if ((args->flags & LINUX_TFD_TIMER_ABSTIME) == 0) { 919 timespecadd(&tfd->tfd_time.it_value, &cts, 920 &tfd->tfd_time.it_value); 921 } else { 922 timespecsub(&ts, &cts, &ts); 923 } 924 TIMESPEC_TO_TIMEVAL(&tv, &ts); 925 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 926 linux_timerfd_expire, tfd); 927 tfd->tfd_canceled = false; 928 } else { 929 tfd->tfd_canceled = true; 930 callout_stop(&tfd->tfd_callout); 931 } 932 mtx_unlock(&tfd->tfd_lock); 933 934 if (args->old_value != NULL) { 935 error = native_to_linux_itimerspec(&lots, &ots); 936 if (error == 0) 937 error = copyout(&lots, args->old_value, sizeof(lots)); 938 } 939 940 out: 941 fdrop(fp, td); 942 return (error); 943 } 944 945 static void 946 linux_timerfd_expire(void *arg) 947 { 948 struct timespec cts, ts; 949 struct timeval tv; 950 struct timerfd *tfd; 951 952 tfd = (struct timerfd *)arg; 953 954 linux_timerfd_clocktime(tfd, &cts); 955 if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) { 956 if (timespecisset(&tfd->tfd_time.it_interval)) 957 timespecadd(&tfd->tfd_time.it_value, 958 &tfd->tfd_time.it_interval, 959 &tfd->tfd_time.it_value); 960 else 961 /* single shot timer */ 962 timespecclear(&tfd->tfd_time.it_value); 963 if (timespecisset(&tfd->tfd_time.it_value)) { 964 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 965 TIMESPEC_TO_TIMEVAL(&tv, &ts); 966 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 967 linux_timerfd_expire, tfd); 968 } 969 tfd->tfd_count++; 970 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); 971 selwakeup(&tfd->tfd_sel); 972 wakeup(&tfd->tfd_count); 973 } else if (timespecisset(&tfd->tfd_time.it_value)) { 974 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 975 TIMESPEC_TO_TIMEVAL(&tv, &ts); 976 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 977 linux_timerfd_expire, tfd); 978 } 979 } 980