1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2007 Roman Divacky 5 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_compat.h" 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/imgact.h> 37 #include <sys/kernel.h> 38 #include <sys/limits.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/callout.h> 42 #include <sys/capsicum.h> 43 #include <sys/types.h> 44 #include <sys/user.h> 45 #include <sys/file.h> 46 #include <sys/filedesc.h> 47 #include <sys/filio.h> 48 #include <sys/errno.h> 49 #include <sys/event.h> 50 #include <sys/poll.h> 51 #include <sys/proc.h> 52 #include <sys/selinfo.h> 53 #include <sys/specialfd.h> 54 #include <sys/sx.h> 55 #include <sys/syscallsubr.h> 56 #include <sys/timespec.h> 57 #include <sys/eventfd.h> 58 59 #ifdef COMPAT_LINUX32 60 #include <machine/../linux32/linux.h> 61 #include <machine/../linux32/linux32_proto.h> 62 #else 63 #include <machine/../linux/linux.h> 64 #include <machine/../linux/linux_proto.h> 65 #endif 66 67 #include <compat/linux/linux_emul.h> 68 #include <compat/linux/linux_event.h> 69 #include <compat/linux/linux_file.h> 70 #include <compat/linux/linux_signal.h> 71 #include <compat/linux/linux_timer.h> 72 #include <compat/linux/linux_util.h> 73 74 typedef uint64_t epoll_udata_t; 75 76 struct epoll_event { 77 uint32_t events; 78 epoll_udata_t data; 79 } 80 #if defined(__amd64__) 81 __attribute__((packed)) 82 #endif 83 ; 84 85 #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 86 87 static int epoll_to_kevent(struct thread *td, int fd, 88 struct epoll_event *l_event, struct kevent *kevent, 89 int *nkevents); 90 static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 91 static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 92 static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 93 static int epoll_register_kevent(struct thread *td, struct file *epfp, 94 int fd, int filter, unsigned int flags); 95 static int epoll_fd_registered(struct thread *td, struct file *epfp, 96 int fd); 97 static int epoll_delete_all_events(struct thread *td, struct file *epfp, 98 int fd); 99 100 struct epoll_copyin_args { 101 struct kevent *changelist; 102 }; 103 104 struct epoll_copyout_args { 105 struct epoll_event *leventlist; 106 struct proc *p; 107 uint32_t count; 108 int error; 109 }; 110 111 /* timerfd */ 112 typedef uint64_t timerfd_t; 113 114 static fo_rdwr_t timerfd_read; 115 static fo_ioctl_t timerfd_ioctl; 116 static fo_poll_t timerfd_poll; 117 static fo_kqfilter_t timerfd_kqfilter; 118 static fo_stat_t timerfd_stat; 119 static fo_close_t timerfd_close; 120 static fo_fill_kinfo_t timerfd_fill_kinfo; 121 122 static struct fileops timerfdops = { 123 .fo_read = timerfd_read, 124 .fo_write = invfo_rdwr, 125 .fo_truncate = invfo_truncate, 126 .fo_ioctl = timerfd_ioctl, 127 .fo_poll = timerfd_poll, 128 .fo_kqfilter = timerfd_kqfilter, 129 .fo_stat = timerfd_stat, 130 .fo_close = timerfd_close, 131 .fo_chmod = invfo_chmod, 132 .fo_chown = invfo_chown, 133 .fo_sendfile = invfo_sendfile, 134 .fo_fill_kinfo = timerfd_fill_kinfo, 135 .fo_flags = DFLAG_PASSABLE 136 }; 137 138 static void filt_timerfddetach(struct knote *kn); 139 static int filt_timerfdread(struct knote *kn, long hint); 140 141 static struct filterops timerfd_rfiltops = { 142 .f_isfd = 1, 143 .f_detach = filt_timerfddetach, 144 .f_event = filt_timerfdread 145 }; 146 147 struct timerfd { 148 clockid_t tfd_clockid; 149 struct itimerspec tfd_time; 150 struct callout tfd_callout; 151 timerfd_t tfd_count; 152 bool tfd_canceled; 153 struct selinfo tfd_sel; 154 struct mtx tfd_lock; 155 }; 156 157 static void linux_timerfd_expire(void *); 158 static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); 159 160 static int 161 epoll_create_common(struct thread *td, int flags) 162 { 163 164 return (kern_kqueue(td, flags, NULL)); 165 } 166 167 #ifdef LINUX_LEGACY_SYSCALLS 168 int 169 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 170 { 171 172 /* 173 * args->size is unused. Linux just tests it 174 * and then forgets it as well. 175 */ 176 if (args->size <= 0) 177 return (EINVAL); 178 179 return (epoll_create_common(td, 0)); 180 } 181 #endif 182 183 int 184 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 185 { 186 int flags; 187 188 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 189 return (EINVAL); 190 191 flags = 0; 192 if ((args->flags & LINUX_O_CLOEXEC) != 0) 193 flags |= O_CLOEXEC; 194 195 return (epoll_create_common(td, flags)); 196 } 197 198 /* Structure converting function from epoll to kevent. */ 199 static int 200 epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event, 201 struct kevent *kevent, int *nkevents) 202 { 203 uint32_t levents = l_event->events; 204 struct linux_pemuldata *pem; 205 struct proc *p; 206 unsigned short kev_flags = EV_ADD | EV_ENABLE; 207 208 /* flags related to how event is registered */ 209 if ((levents & LINUX_EPOLLONESHOT) != 0) 210 kev_flags |= EV_DISPATCH; 211 if ((levents & LINUX_EPOLLET) != 0) 212 kev_flags |= EV_CLEAR; 213 if ((levents & LINUX_EPOLLERR) != 0) 214 kev_flags |= EV_ERROR; 215 if ((levents & LINUX_EPOLLRDHUP) != 0) 216 kev_flags |= EV_EOF; 217 218 /* flags related to what event is registered */ 219 if ((levents & LINUX_EPOLL_EVRD) != 0) { 220 EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0); 221 kevent->ext[0] = l_event->data; 222 ++kevent; 223 ++(*nkevents); 224 } 225 if ((levents & LINUX_EPOLL_EVWR) != 0) { 226 EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0); 227 kevent->ext[0] = l_event->data; 228 ++kevent; 229 ++(*nkevents); 230 } 231 /* zero event mask is legal */ 232 if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) { 233 EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0); 234 ++(*nkevents); 235 } 236 237 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 238 p = td->td_proc; 239 240 pem = pem_find(p); 241 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 242 243 LINUX_PEM_XLOCK(pem); 244 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 245 pem->flags |= LINUX_XUNSUP_EPOLL; 246 LINUX_PEM_XUNLOCK(pem); 247 linux_msg(td, "epoll_ctl unsupported flags: 0x%x", 248 levents); 249 } else 250 LINUX_PEM_XUNLOCK(pem); 251 return (EINVAL); 252 } 253 254 return (0); 255 } 256 257 /* 258 * Structure converting function from kevent to epoll. In a case 259 * this is called on error in registration we store the error in 260 * event->data and pick it up later in linux_epoll_ctl(). 261 */ 262 static void 263 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 264 { 265 266 l_event->data = kevent->ext[0]; 267 268 if ((kevent->flags & EV_ERROR) != 0) { 269 l_event->events = LINUX_EPOLLERR; 270 return; 271 } 272 273 /* XXX EPOLLPRI, EPOLLHUP */ 274 switch (kevent->filter) { 275 case EVFILT_READ: 276 l_event->events = LINUX_EPOLLIN; 277 if ((kevent->flags & EV_EOF) != 0) 278 l_event->events |= LINUX_EPOLLRDHUP; 279 break; 280 case EVFILT_WRITE: 281 l_event->events = LINUX_EPOLLOUT; 282 break; 283 } 284 } 285 286 /* 287 * Copyout callback used by kevent. This converts kevent 288 * events to epoll events and copies them back to the 289 * userspace. This is also called on error on registering 290 * of the filter. 291 */ 292 static int 293 epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 294 { 295 struct epoll_copyout_args *args; 296 struct epoll_event *eep; 297 int error, i; 298 299 args = (struct epoll_copyout_args*) arg; 300 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 301 302 for (i = 0; i < count; i++) 303 kevent_to_epoll(&kevp[i], &eep[i]); 304 305 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 306 if (error == 0) { 307 args->leventlist += count; 308 args->count += count; 309 } else if (args->error == 0) 310 args->error = error; 311 312 free(eep, M_EPOLL); 313 return (error); 314 } 315 316 /* 317 * Copyin callback used by kevent. This copies already 318 * converted filters from kernel memory to the kevent 319 * internal kernel memory. Hence the memcpy instead of 320 * copyin. 321 */ 322 static int 323 epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 324 { 325 struct epoll_copyin_args *args; 326 327 args = (struct epoll_copyin_args*) arg; 328 329 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 330 args->changelist += count; 331 332 return (0); 333 } 334 335 /* 336 * Load epoll filter, convert it to kevent filter 337 * and load it into kevent subsystem. 338 */ 339 int 340 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 341 { 342 struct file *epfp, *fp; 343 struct epoll_copyin_args ciargs; 344 struct kevent kev[2]; 345 struct kevent_copyops k_ops = { &ciargs, 346 NULL, 347 epoll_kev_copyin}; 348 struct epoll_event le; 349 cap_rights_t rights; 350 int nchanges = 0; 351 int error; 352 353 if (args->op != LINUX_EPOLL_CTL_DEL) { 354 error = copyin(args->event, &le, sizeof(le)); 355 if (error != 0) 356 return (error); 357 } 358 359 error = fget(td, args->epfd, 360 cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), &epfp); 361 if (error != 0) 362 return (error); 363 if (epfp->f_type != DTYPE_KQUEUE) { 364 error = EINVAL; 365 goto leave1; 366 } 367 368 /* Protect user data vector from incorrectly supplied fd. */ 369 error = fget(td, args->fd, 370 cap_rights_init_one(&rights, CAP_POLL_EVENT), &fp); 371 if (error != 0) 372 goto leave1; 373 374 /* Linux disallows spying on himself */ 375 if (epfp == fp) { 376 error = EINVAL; 377 goto leave0; 378 } 379 380 ciargs.changelist = kev; 381 382 if (args->op != LINUX_EPOLL_CTL_DEL) { 383 error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges); 384 if (error != 0) 385 goto leave0; 386 } 387 388 switch (args->op) { 389 case LINUX_EPOLL_CTL_MOD: 390 error = epoll_delete_all_events(td, epfp, args->fd); 391 if (error != 0) 392 goto leave0; 393 break; 394 395 case LINUX_EPOLL_CTL_ADD: 396 if (epoll_fd_registered(td, epfp, args->fd)) { 397 error = EEXIST; 398 goto leave0; 399 } 400 break; 401 402 case LINUX_EPOLL_CTL_DEL: 403 /* CTL_DEL means unregister this fd with this epoll */ 404 error = epoll_delete_all_events(td, epfp, args->fd); 405 goto leave0; 406 407 default: 408 error = EINVAL; 409 goto leave0; 410 } 411 412 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 413 414 leave0: 415 fdrop(fp, td); 416 417 leave1: 418 fdrop(epfp, td); 419 return (error); 420 } 421 422 /* 423 * Wait for a filter to be triggered on the epoll file descriptor. 424 */ 425 426 static int 427 linux_epoll_wait_ts(struct thread *td, int epfd, struct epoll_event *events, 428 int maxevents, struct timespec *tsp, sigset_t *uset) 429 { 430 struct epoll_copyout_args coargs; 431 struct kevent_copyops k_ops = { &coargs, 432 epoll_kev_copyout, 433 NULL}; 434 cap_rights_t rights; 435 struct file *epfp; 436 sigset_t omask; 437 int error; 438 439 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 440 return (EINVAL); 441 442 error = fget(td, epfd, 443 cap_rights_init_one(&rights, CAP_KQUEUE_EVENT), &epfp); 444 if (error != 0) 445 return (error); 446 if (epfp->f_type != DTYPE_KQUEUE) { 447 error = EINVAL; 448 goto leave; 449 } 450 if (uset != NULL) { 451 error = kern_sigprocmask(td, SIG_SETMASK, uset, 452 &omask, 0); 453 if (error != 0) 454 goto leave; 455 td->td_pflags |= TDP_OLDMASK; 456 /* 457 * Make sure that ast() is called on return to 458 * usermode and TDP_OLDMASK is cleared, restoring old 459 * sigmask. 460 */ 461 thread_lock(td); 462 td->td_flags |= TDF_ASTPENDING; 463 thread_unlock(td); 464 } 465 466 coargs.leventlist = events; 467 coargs.p = td->td_proc; 468 coargs.count = 0; 469 coargs.error = 0; 470 471 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 472 if (error == 0 && coargs.error != 0) 473 error = coargs.error; 474 475 /* 476 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 477 * Maybe we should translate that but I don't think it matters at all. 478 */ 479 if (error == 0) 480 td->td_retval[0] = coargs.count; 481 482 if (uset != NULL) 483 error = kern_sigprocmask(td, SIG_SETMASK, &omask, 484 NULL, 0); 485 leave: 486 fdrop(epfp, td); 487 return (error); 488 } 489 490 static int 491 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 492 int maxevents, int timeout, sigset_t *uset) 493 { 494 struct timespec ts, *tsp; 495 496 /* 497 * Linux epoll_wait(2) man page states that timeout of -1 causes caller 498 * to block indefinitely. Real implementation does it if any negative 499 * timeout value is passed. 500 */ 501 if (timeout >= 0) { 502 /* Convert from milliseconds to timespec. */ 503 ts.tv_sec = timeout / 1000; 504 ts.tv_nsec = (timeout % 1000) * 1000000; 505 tsp = &ts; 506 } else { 507 tsp = NULL; 508 } 509 return (linux_epoll_wait_ts(td, epfd, events, maxevents, tsp, uset)); 510 511 } 512 513 #ifdef LINUX_LEGACY_SYSCALLS 514 int 515 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 516 { 517 518 return (linux_epoll_wait_common(td, args->epfd, args->events, 519 args->maxevents, args->timeout, NULL)); 520 } 521 #endif 522 523 int 524 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 525 { 526 sigset_t mask, *pmask; 527 int error; 528 529 error = linux_copyin_sigset(args->mask, sizeof(l_sigset_t), 530 &mask, &pmask); 531 if (error != 0) 532 return (error); 533 534 return (linux_epoll_wait_common(td, args->epfd, args->events, 535 args->maxevents, args->timeout, pmask)); 536 } 537 538 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 539 int 540 linux_epoll_pwait2_64(struct thread *td, struct linux_epoll_pwait2_64_args *args) 541 { 542 struct timespec ts, *tsa; 543 sigset_t mask, *pmask; 544 int error; 545 546 error = linux_copyin_sigset(args->mask, sizeof(l_sigset_t), 547 &mask, &pmask); 548 if (error != 0) 549 return (error); 550 551 if (args->timeout) { 552 error = linux_get_timespec64(&ts, args->timeout); 553 if (error != 0) 554 return (error); 555 tsa = &ts; 556 } else 557 tsa = NULL; 558 559 return (linux_epoll_wait_ts(td, args->epfd, args->events, 560 args->maxevents, tsa, pmask)); 561 } 562 #else 563 int 564 linux_epoll_pwait2(struct thread *td, struct linux_epoll_pwait2_args *args) 565 { 566 struct timespec ts, *tsa; 567 sigset_t mask, *pmask; 568 int error; 569 570 error = linux_copyin_sigset(args->mask, sizeof(l_sigset_t), 571 &mask, &pmask); 572 if (error != 0) 573 return (error); 574 575 if (args->timeout) { 576 error = linux_get_timespec(&ts, args->timeout); 577 if (error != 0) 578 return (error); 579 tsa = &ts; 580 } else 581 tsa = NULL; 582 583 return (linux_epoll_wait_ts(td, args->epfd, args->events, 584 args->maxevents, tsa, pmask)); 585 } 586 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 587 588 static int 589 epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter, 590 unsigned int flags) 591 { 592 struct epoll_copyin_args ciargs; 593 struct kevent kev; 594 struct kevent_copyops k_ops = { &ciargs, 595 NULL, 596 epoll_kev_copyin}; 597 598 ciargs.changelist = &kev; 599 EV_SET(&kev, fd, filter, flags, 0, 0, 0); 600 601 return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL)); 602 } 603 604 static int 605 epoll_fd_registered(struct thread *td, struct file *epfp, int fd) 606 { 607 /* 608 * Set empty filter flags to avoid accidental modification of already 609 * registered events. In the case of event re-registration: 610 * 1. If event does not exists kevent() does nothing and returns ENOENT 611 * 2. If event does exists, it's enabled/disabled state is preserved 612 * but fflags, data and udata fields are overwritten. So we can not 613 * set socket lowats and store user's context pointer in udata. 614 */ 615 if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT || 616 epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT) 617 return (1); 618 619 return (0); 620 } 621 622 static int 623 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 624 { 625 int error1, error2; 626 627 error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE); 628 error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE); 629 630 /* return 0 if at least one result positive */ 631 return (error1 == 0 ? 0 : error2); 632 } 633 634 #ifdef LINUX_LEGACY_SYSCALLS 635 int 636 linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 637 { 638 struct specialfd_eventfd ae; 639 640 bzero(&ae, sizeof(ae)); 641 ae.initval = args->initval; 642 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 643 } 644 #endif 645 646 int 647 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 648 { 649 struct specialfd_eventfd ae; 650 int flags; 651 652 if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK | 653 LINUX_EFD_SEMAPHORE)) != 0) 654 return (EINVAL); 655 flags = 0; 656 if ((args->flags & LINUX_O_CLOEXEC) != 0) 657 flags |= EFD_CLOEXEC; 658 if ((args->flags & LINUX_O_NONBLOCK) != 0) 659 flags |= EFD_NONBLOCK; 660 if ((args->flags & LINUX_EFD_SEMAPHORE) != 0) 661 flags |= EFD_SEMAPHORE; 662 663 bzero(&ae, sizeof(ae)); 664 ae.flags = flags; 665 ae.initval = args->initval; 666 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 667 } 668 669 int 670 linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args) 671 { 672 struct timerfd *tfd; 673 struct file *fp; 674 clockid_t clockid; 675 int fflags, fd, error; 676 677 if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0) 678 return (EINVAL); 679 680 error = linux_to_native_clockid(&clockid, args->clockid); 681 if (error != 0) 682 return (error); 683 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 684 return (EINVAL); 685 686 fflags = 0; 687 if ((args->flags & LINUX_TFD_CLOEXEC) != 0) 688 fflags |= O_CLOEXEC; 689 690 error = falloc(td, &fp, &fd, fflags); 691 if (error != 0) 692 return (error); 693 694 tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO); 695 tfd->tfd_clockid = clockid; 696 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); 697 698 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); 699 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); 700 701 fflags = FREAD; 702 if ((args->flags & LINUX_O_NONBLOCK) != 0) 703 fflags |= FNONBLOCK; 704 705 finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops); 706 fdrop(fp, td); 707 708 td->td_retval[0] = fd; 709 return (error); 710 } 711 712 static int 713 timerfd_close(struct file *fp, struct thread *td) 714 { 715 struct timerfd *tfd; 716 717 tfd = fp->f_data; 718 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 719 return (EINVAL); 720 721 timespecclear(&tfd->tfd_time.it_value); 722 timespecclear(&tfd->tfd_time.it_interval); 723 724 callout_drain(&tfd->tfd_callout); 725 726 seldrain(&tfd->tfd_sel); 727 knlist_destroy(&tfd->tfd_sel.si_note); 728 729 fp->f_ops = &badfileops; 730 mtx_destroy(&tfd->tfd_lock); 731 free(tfd, M_EPOLL); 732 733 return (0); 734 } 735 736 static int 737 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 738 int flags, struct thread *td) 739 { 740 struct timerfd *tfd; 741 timerfd_t count; 742 int error; 743 744 tfd = fp->f_data; 745 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 746 return (EINVAL); 747 748 if (uio->uio_resid < sizeof(timerfd_t)) 749 return (EINVAL); 750 751 error = 0; 752 mtx_lock(&tfd->tfd_lock); 753 retry: 754 if (tfd->tfd_canceled) { 755 tfd->tfd_count = 0; 756 mtx_unlock(&tfd->tfd_lock); 757 return (ECANCELED); 758 } 759 if (tfd->tfd_count == 0) { 760 if ((fp->f_flag & FNONBLOCK) != 0) { 761 mtx_unlock(&tfd->tfd_lock); 762 return (EAGAIN); 763 } 764 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0); 765 if (error == 0) 766 goto retry; 767 } 768 if (error == 0) { 769 count = tfd->tfd_count; 770 tfd->tfd_count = 0; 771 mtx_unlock(&tfd->tfd_lock); 772 error = uiomove(&count, sizeof(timerfd_t), uio); 773 } else 774 mtx_unlock(&tfd->tfd_lock); 775 776 return (error); 777 } 778 779 static int 780 timerfd_poll(struct file *fp, int events, struct ucred *active_cred, 781 struct thread *td) 782 { 783 struct timerfd *tfd; 784 int revents = 0; 785 786 tfd = fp->f_data; 787 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 788 return (POLLERR); 789 790 mtx_lock(&tfd->tfd_lock); 791 if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0) 792 revents |= events & (POLLIN|POLLRDNORM); 793 if (revents == 0) 794 selrecord(td, &tfd->tfd_sel); 795 mtx_unlock(&tfd->tfd_lock); 796 797 return (revents); 798 } 799 800 static int 801 timerfd_kqfilter(struct file *fp, struct knote *kn) 802 { 803 struct timerfd *tfd; 804 805 tfd = fp->f_data; 806 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 807 return (EINVAL); 808 809 if (kn->kn_filter == EVFILT_READ) 810 kn->kn_fop = &timerfd_rfiltops; 811 else 812 return (EINVAL); 813 814 kn->kn_hook = tfd; 815 knlist_add(&tfd->tfd_sel.si_note, kn, 0); 816 817 return (0); 818 } 819 820 static void 821 filt_timerfddetach(struct knote *kn) 822 { 823 struct timerfd *tfd = kn->kn_hook; 824 825 mtx_lock(&tfd->tfd_lock); 826 knlist_remove(&tfd->tfd_sel.si_note, kn, 1); 827 mtx_unlock(&tfd->tfd_lock); 828 } 829 830 static int 831 filt_timerfdread(struct knote *kn, long hint) 832 { 833 struct timerfd *tfd = kn->kn_hook; 834 835 return (tfd->tfd_count > 0); 836 } 837 838 static int 839 timerfd_ioctl(struct file *fp, u_long cmd, void *data, 840 struct ucred *active_cred, struct thread *td) 841 { 842 843 if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD) 844 return (EINVAL); 845 846 switch (cmd) { 847 case FIONBIO: 848 case FIOASYNC: 849 return (0); 850 } 851 852 return (ENOTTY); 853 } 854 855 static int 856 timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 857 { 858 859 return (ENXIO); 860 } 861 862 static int 863 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 864 { 865 866 kif->kf_type = KF_TYPE_UNKNOWN; 867 return (0); 868 } 869 870 static void 871 linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts) 872 { 873 874 if (tfd->tfd_clockid == CLOCK_REALTIME) 875 getnanotime(ts); 876 else /* CLOCK_MONOTONIC */ 877 getnanouptime(ts); 878 } 879 880 static void 881 linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots) 882 { 883 struct timespec cts; 884 885 linux_timerfd_clocktime(tfd, &cts); 886 *ots = tfd->tfd_time; 887 if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) { 888 timespecsub(&ots->it_value, &cts, &ots->it_value); 889 if (ots->it_value.tv_sec < 0 || 890 (ots->it_value.tv_sec == 0 && 891 ots->it_value.tv_nsec == 0)) { 892 ots->it_value.tv_sec = 0; 893 ots->it_value.tv_nsec = 1; 894 } 895 } 896 } 897 898 static int 899 linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots) 900 { 901 struct timerfd *tfd; 902 struct file *fp; 903 int error; 904 905 error = fget(td, fd, &cap_read_rights, &fp); 906 if (error != 0) 907 return (error); 908 tfd = fp->f_data; 909 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 910 error = EINVAL; 911 goto out; 912 } 913 914 mtx_lock(&tfd->tfd_lock); 915 linux_timerfd_curval(tfd, ots); 916 mtx_unlock(&tfd->tfd_lock); 917 918 out: 919 fdrop(fp, td); 920 return (error); 921 } 922 923 int 924 linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args) 925 { 926 struct l_itimerspec lots; 927 struct itimerspec ots; 928 int error; 929 930 error = linux_timerfd_gettime_common(td, args->fd, &ots); 931 if (error != 0) 932 return (error); 933 error = native_to_linux_itimerspec(&lots, &ots); 934 if (error == 0) 935 error = copyout(&lots, args->old_value, sizeof(lots)); 936 return (error); 937 } 938 939 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 940 int 941 linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args) 942 { 943 struct l_itimerspec64 lots; 944 struct itimerspec ots; 945 int error; 946 947 error = linux_timerfd_gettime_common(td, args->fd, &ots); 948 if (error != 0) 949 return (error); 950 error = native_to_linux_itimerspec64(&lots, &ots); 951 if (error == 0) 952 error = copyout(&lots, args->old_value, sizeof(lots)); 953 return (error); 954 } 955 #endif 956 957 static int 958 linux_timerfd_settime_common(struct thread *td, int fd, int flags, 959 struct itimerspec *nts, struct itimerspec *oval) 960 { 961 struct timespec cts, ts; 962 struct timerfd *tfd; 963 struct timeval tv; 964 struct file *fp; 965 int error; 966 967 if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0) 968 return (EINVAL); 969 970 error = fget(td, fd, &cap_write_rights, &fp); 971 if (error != 0) 972 return (error); 973 tfd = fp->f_data; 974 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 975 error = EINVAL; 976 goto out; 977 } 978 979 mtx_lock(&tfd->tfd_lock); 980 if (!timespecisset(&nts->it_value)) 981 timespecclear(&nts->it_interval); 982 if (oval != NULL) 983 linux_timerfd_curval(tfd, oval); 984 985 bcopy(nts, &tfd->tfd_time, sizeof(*nts)); 986 tfd->tfd_count = 0; 987 if (timespecisset(&nts->it_value)) { 988 linux_timerfd_clocktime(tfd, &cts); 989 ts = nts->it_value; 990 if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) { 991 timespecadd(&tfd->tfd_time.it_value, &cts, 992 &tfd->tfd_time.it_value); 993 } else { 994 timespecsub(&ts, &cts, &ts); 995 } 996 TIMESPEC_TO_TIMEVAL(&tv, &ts); 997 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 998 linux_timerfd_expire, tfd); 999 tfd->tfd_canceled = false; 1000 } else { 1001 tfd->tfd_canceled = true; 1002 callout_stop(&tfd->tfd_callout); 1003 } 1004 mtx_unlock(&tfd->tfd_lock); 1005 1006 out: 1007 fdrop(fp, td); 1008 return (error); 1009 } 1010 1011 int 1012 linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args) 1013 { 1014 struct l_itimerspec lots; 1015 struct itimerspec nts, ots, *pots; 1016 int error; 1017 1018 error = copyin(args->new_value, &lots, sizeof(lots)); 1019 if (error != 0) 1020 return (error); 1021 error = linux_to_native_itimerspec(&nts, &lots); 1022 if (error != 0) 1023 return (error); 1024 pots = (args->old_value != NULL ? &ots : NULL); 1025 error = linux_timerfd_settime_common(td, args->fd, args->flags, 1026 &nts, pots); 1027 if (error == 0 && args->old_value != NULL) { 1028 error = native_to_linux_itimerspec(&lots, &ots); 1029 if (error == 0) 1030 error = copyout(&lots, args->old_value, sizeof(lots)); 1031 } 1032 return (error); 1033 } 1034 1035 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1036 int 1037 linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args *args) 1038 { 1039 struct l_itimerspec64 lots; 1040 struct itimerspec nts, ots, *pots; 1041 int error; 1042 1043 error = copyin(args->new_value, &lots, sizeof(lots)); 1044 if (error != 0) 1045 return (error); 1046 error = linux_to_native_itimerspec64(&nts, &lots); 1047 if (error != 0) 1048 return (error); 1049 pots = (args->old_value != NULL ? &ots : NULL); 1050 error = linux_timerfd_settime_common(td, args->fd, args->flags, 1051 &nts, pots); 1052 if (error == 0 && args->old_value != NULL) { 1053 error = native_to_linux_itimerspec64(&lots, &ots); 1054 if (error == 0) 1055 error = copyout(&lots, args->old_value, sizeof(lots)); 1056 } 1057 return (error); 1058 } 1059 #endif 1060 1061 static void 1062 linux_timerfd_expire(void *arg) 1063 { 1064 struct timespec cts, ts; 1065 struct timeval tv; 1066 struct timerfd *tfd; 1067 1068 tfd = (struct timerfd *)arg; 1069 1070 linux_timerfd_clocktime(tfd, &cts); 1071 if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) { 1072 if (timespecisset(&tfd->tfd_time.it_interval)) 1073 timespecadd(&tfd->tfd_time.it_value, 1074 &tfd->tfd_time.it_interval, 1075 &tfd->tfd_time.it_value); 1076 else 1077 /* single shot timer */ 1078 timespecclear(&tfd->tfd_time.it_value); 1079 if (timespecisset(&tfd->tfd_time.it_value)) { 1080 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 1081 TIMESPEC_TO_TIMEVAL(&tv, &ts); 1082 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 1083 linux_timerfd_expire, tfd); 1084 } 1085 tfd->tfd_count++; 1086 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); 1087 selwakeup(&tfd->tfd_sel); 1088 wakeup(&tfd->tfd_count); 1089 } else if (timespecisset(&tfd->tfd_time.it_value)) { 1090 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 1091 TIMESPEC_TO_TIMEVAL(&tv, &ts); 1092 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 1093 linux_timerfd_expire, tfd); 1094 } 1095 } 1096