1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2007 Roman Divacky 5 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/callout.h> 32 #include <sys/capsicum.h> 33 #include <sys/errno.h> 34 #include <sys/event.h> 35 #include <sys/eventfd.h> 36 #include <sys/file.h> 37 #include <sys/filedesc.h> 38 #include <sys/filio.h> 39 #include <sys/limits.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/poll.h> 43 #include <sys/proc.h> 44 #include <sys/selinfo.h> 45 #include <sys/specialfd.h> 46 #include <sys/sx.h> 47 #include <sys/syscallsubr.h> 48 #include <sys/timespec.h> 49 #include <sys/user.h> 50 51 #ifdef COMPAT_LINUX32 52 #include <machine/../linux32/linux.h> 53 #include <machine/../linux32/linux32_proto.h> 54 #else 55 #include <machine/../linux/linux.h> 56 #include <machine/../linux/linux_proto.h> 57 #endif 58 59 #include <compat/linux/linux_emul.h> 60 #include <compat/linux/linux_event.h> 61 #include <compat/linux/linux_file.h> 62 #include <compat/linux/linux_signal.h> 63 #include <compat/linux/linux_time.h> 64 #include <compat/linux/linux_util.h> 65 66 typedef uint64_t epoll_udata_t; 67 68 struct epoll_event { 69 uint32_t events; 70 epoll_udata_t data; 71 } 72 #if defined(__amd64__) 73 __attribute__((packed)) 74 #endif 75 ; 76 77 #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 78 79 static int epoll_to_kevent(struct thread *td, int fd, 80 struct epoll_event *l_event, struct kevent *kevent, 81 int *nkevents); 82 static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 83 static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 84 static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 85 static int epoll_register_kevent(struct thread *td, struct file *epfp, 86 int fd, int filter, unsigned int flags); 87 static int epoll_fd_registered(struct thread *td, struct file *epfp, 88 int fd); 89 static int epoll_delete_all_events(struct thread *td, struct file *epfp, 90 int fd); 91 92 struct epoll_copyin_args { 93 struct kevent *changelist; 94 }; 95 96 struct epoll_copyout_args { 97 struct epoll_event *leventlist; 98 struct proc *p; 99 uint32_t count; 100 int error; 101 }; 102 103 /* timerfd */ 104 typedef uint64_t timerfd_t; 105 106 static fo_rdwr_t timerfd_read; 107 static fo_ioctl_t timerfd_ioctl; 108 static fo_poll_t timerfd_poll; 109 static fo_kqfilter_t timerfd_kqfilter; 110 static fo_stat_t timerfd_stat; 111 static fo_close_t timerfd_close; 112 static fo_fill_kinfo_t timerfd_fill_kinfo; 113 114 static struct fileops timerfdops = { 115 .fo_read = timerfd_read, 116 .fo_write = invfo_rdwr, 117 .fo_truncate = invfo_truncate, 118 .fo_ioctl = timerfd_ioctl, 119 .fo_poll = timerfd_poll, 120 .fo_kqfilter = timerfd_kqfilter, 121 .fo_stat = timerfd_stat, 122 .fo_close = timerfd_close, 123 .fo_chmod = invfo_chmod, 124 .fo_chown = invfo_chown, 125 .fo_sendfile = invfo_sendfile, 126 .fo_fill_kinfo = timerfd_fill_kinfo, 127 .fo_flags = DFLAG_PASSABLE 128 }; 129 130 static void filt_timerfddetach(struct knote *kn); 131 static int filt_timerfdread(struct knote *kn, long hint); 132 133 static struct filterops timerfd_rfiltops = { 134 .f_isfd = 1, 135 .f_detach = filt_timerfddetach, 136 .f_event = filt_timerfdread 137 }; 138 139 struct timerfd { 140 clockid_t tfd_clockid; 141 struct itimerspec tfd_time; 142 struct callout tfd_callout; 143 timerfd_t tfd_count; 144 bool tfd_canceled; 145 struct selinfo tfd_sel; 146 struct mtx tfd_lock; 147 }; 148 149 static void linux_timerfd_expire(void *); 150 static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); 151 152 static int 153 epoll_create_common(struct thread *td, int flags) 154 { 155 156 return (kern_kqueue(td, flags, NULL)); 157 } 158 159 #ifdef LINUX_LEGACY_SYSCALLS 160 int 161 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 162 { 163 164 /* 165 * args->size is unused. Linux just tests it 166 * and then forgets it as well. 167 */ 168 if (args->size <= 0) 169 return (EINVAL); 170 171 return (epoll_create_common(td, 0)); 172 } 173 #endif 174 175 int 176 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 177 { 178 int flags; 179 180 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 181 return (EINVAL); 182 183 flags = 0; 184 if ((args->flags & LINUX_O_CLOEXEC) != 0) 185 flags |= O_CLOEXEC; 186 187 return (epoll_create_common(td, flags)); 188 } 189 190 /* Structure converting function from epoll to kevent. */ 191 static int 192 epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event, 193 struct kevent *kevent, int *nkevents) 194 { 195 uint32_t levents = l_event->events; 196 struct linux_pemuldata *pem; 197 struct proc *p; 198 unsigned short kev_flags = EV_ADD | EV_ENABLE; 199 200 /* flags related to how event is registered */ 201 if ((levents & LINUX_EPOLLONESHOT) != 0) 202 kev_flags |= EV_DISPATCH; 203 if ((levents & LINUX_EPOLLET) != 0) 204 kev_flags |= EV_CLEAR; 205 if ((levents & LINUX_EPOLLERR) != 0) 206 kev_flags |= EV_ERROR; 207 if ((levents & LINUX_EPOLLRDHUP) != 0) 208 kev_flags |= EV_EOF; 209 210 /* flags related to what event is registered */ 211 if ((levents & LINUX_EPOLL_EVRD) != 0) { 212 EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0); 213 kevent->ext[0] = l_event->data; 214 ++kevent; 215 ++(*nkevents); 216 } 217 if ((levents & LINUX_EPOLL_EVWR) != 0) { 218 EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0); 219 kevent->ext[0] = l_event->data; 220 ++kevent; 221 ++(*nkevents); 222 } 223 /* zero event mask is legal */ 224 if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) { 225 EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0); 226 ++(*nkevents); 227 } 228 229 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 230 p = td->td_proc; 231 232 pem = pem_find(p); 233 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 234 235 LINUX_PEM_XLOCK(pem); 236 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 237 pem->flags |= LINUX_XUNSUP_EPOLL; 238 LINUX_PEM_XUNLOCK(pem); 239 linux_msg(td, "epoll_ctl unsupported flags: 0x%x", 240 levents); 241 } else 242 LINUX_PEM_XUNLOCK(pem); 243 return (EINVAL); 244 } 245 246 return (0); 247 } 248 249 /* 250 * Structure converting function from kevent to epoll. In a case 251 * this is called on error in registration we store the error in 252 * event->data and pick it up later in linux_epoll_ctl(). 253 */ 254 static void 255 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 256 { 257 258 l_event->data = kevent->ext[0]; 259 260 if ((kevent->flags & EV_ERROR) != 0) { 261 l_event->events = LINUX_EPOLLERR; 262 return; 263 } 264 265 /* XXX EPOLLPRI, EPOLLHUP */ 266 switch (kevent->filter) { 267 case EVFILT_READ: 268 l_event->events = LINUX_EPOLLIN; 269 if ((kevent->flags & EV_EOF) != 0) 270 l_event->events |= LINUX_EPOLLRDHUP; 271 break; 272 case EVFILT_WRITE: 273 l_event->events = LINUX_EPOLLOUT; 274 break; 275 } 276 } 277 278 /* 279 * Copyout callback used by kevent. This converts kevent 280 * events to epoll events and copies them back to the 281 * userspace. This is also called on error on registering 282 * of the filter. 283 */ 284 static int 285 epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 286 { 287 struct epoll_copyout_args *args; 288 struct epoll_event *eep; 289 int error, i; 290 291 args = (struct epoll_copyout_args*) arg; 292 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 293 294 for (i = 0; i < count; i++) 295 kevent_to_epoll(&kevp[i], &eep[i]); 296 297 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 298 if (error == 0) { 299 args->leventlist += count; 300 args->count += count; 301 } else if (args->error == 0) 302 args->error = error; 303 304 free(eep, M_EPOLL); 305 return (error); 306 } 307 308 /* 309 * Copyin callback used by kevent. This copies already 310 * converted filters from kernel memory to the kevent 311 * internal kernel memory. Hence the memcpy instead of 312 * copyin. 313 */ 314 static int 315 epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 316 { 317 struct epoll_copyin_args *args; 318 319 args = (struct epoll_copyin_args*) arg; 320 321 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 322 args->changelist += count; 323 324 return (0); 325 } 326 327 /* 328 * Load epoll filter, convert it to kevent filter 329 * and load it into kevent subsystem. 330 */ 331 int 332 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 333 { 334 struct file *epfp, *fp; 335 struct epoll_copyin_args ciargs; 336 struct kevent kev[2]; 337 struct kevent_copyops k_ops = { &ciargs, 338 NULL, 339 epoll_kev_copyin}; 340 struct epoll_event le; 341 cap_rights_t rights; 342 int nchanges = 0; 343 int error; 344 345 if (args->op != LINUX_EPOLL_CTL_DEL) { 346 error = copyin(args->event, &le, sizeof(le)); 347 if (error != 0) 348 return (error); 349 } 350 351 error = fget(td, args->epfd, 352 cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), &epfp); 353 if (error != 0) 354 return (error); 355 if (epfp->f_type != DTYPE_KQUEUE) { 356 error = EINVAL; 357 goto leave1; 358 } 359 360 /* Protect user data vector from incorrectly supplied fd. */ 361 error = fget(td, args->fd, 362 cap_rights_init_one(&rights, CAP_POLL_EVENT), &fp); 363 if (error != 0) 364 goto leave1; 365 366 /* Linux disallows spying on himself */ 367 if (epfp == fp) { 368 error = EINVAL; 369 goto leave0; 370 } 371 372 ciargs.changelist = kev; 373 374 if (args->op != LINUX_EPOLL_CTL_DEL) { 375 error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges); 376 if (error != 0) 377 goto leave0; 378 } 379 380 switch (args->op) { 381 case LINUX_EPOLL_CTL_MOD: 382 error = epoll_delete_all_events(td, epfp, args->fd); 383 if (error != 0) 384 goto leave0; 385 break; 386 387 case LINUX_EPOLL_CTL_ADD: 388 if (epoll_fd_registered(td, epfp, args->fd)) { 389 error = EEXIST; 390 goto leave0; 391 } 392 break; 393 394 case LINUX_EPOLL_CTL_DEL: 395 /* CTL_DEL means unregister this fd with this epoll */ 396 error = epoll_delete_all_events(td, epfp, args->fd); 397 goto leave0; 398 399 default: 400 error = EINVAL; 401 goto leave0; 402 } 403 404 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 405 406 leave0: 407 fdrop(fp, td); 408 409 leave1: 410 fdrop(epfp, td); 411 return (error); 412 } 413 414 /* 415 * Wait for a filter to be triggered on the epoll file descriptor. 416 */ 417 418 static int 419 linux_epoll_wait_ts(struct thread *td, int epfd, struct epoll_event *events, 420 int maxevents, struct timespec *tsp, sigset_t *uset) 421 { 422 struct epoll_copyout_args coargs; 423 struct kevent_copyops k_ops = { &coargs, 424 epoll_kev_copyout, 425 NULL}; 426 cap_rights_t rights; 427 struct file *epfp; 428 sigset_t omask; 429 int error; 430 431 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 432 return (EINVAL); 433 434 error = fget(td, epfd, 435 cap_rights_init_one(&rights, CAP_KQUEUE_EVENT), &epfp); 436 if (error != 0) 437 return (error); 438 if (epfp->f_type != DTYPE_KQUEUE) { 439 error = EINVAL; 440 goto leave; 441 } 442 if (uset != NULL) { 443 error = kern_sigprocmask(td, SIG_SETMASK, uset, 444 &omask, 0); 445 if (error != 0) 446 goto leave; 447 td->td_pflags |= TDP_OLDMASK; 448 /* 449 * Make sure that ast() is called on return to 450 * usermode and TDP_OLDMASK is cleared, restoring old 451 * sigmask. 452 */ 453 ast_sched(td, TDA_SIGSUSPEND); 454 } 455 456 coargs.leventlist = events; 457 coargs.p = td->td_proc; 458 coargs.count = 0; 459 coargs.error = 0; 460 461 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 462 if (error == 0 && coargs.error != 0) 463 error = coargs.error; 464 465 /* 466 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 467 * Maybe we should translate that but I don't think it matters at all. 468 */ 469 if (error == 0) 470 td->td_retval[0] = coargs.count; 471 472 if (uset != NULL) 473 error = kern_sigprocmask(td, SIG_SETMASK, &omask, 474 NULL, 0); 475 leave: 476 fdrop(epfp, td); 477 return (error); 478 } 479 480 static int 481 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 482 int maxevents, int timeout, sigset_t *uset) 483 { 484 struct timespec ts, *tsp; 485 486 /* 487 * Linux epoll_wait(2) man page states that timeout of -1 causes caller 488 * to block indefinitely. Real implementation does it if any negative 489 * timeout value is passed. 490 */ 491 if (timeout >= 0) { 492 /* Convert from milliseconds to timespec. */ 493 ts.tv_sec = timeout / 1000; 494 ts.tv_nsec = (timeout % 1000) * 1000000; 495 tsp = &ts; 496 } else { 497 tsp = NULL; 498 } 499 return (linux_epoll_wait_ts(td, epfd, events, maxevents, tsp, uset)); 500 501 } 502 503 #ifdef LINUX_LEGACY_SYSCALLS 504 int 505 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 506 { 507 508 return (linux_epoll_wait_common(td, args->epfd, args->events, 509 args->maxevents, args->timeout, NULL)); 510 } 511 #endif 512 513 int 514 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 515 { 516 sigset_t mask, *pmask; 517 int error; 518 519 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 520 &mask, &pmask); 521 if (error != 0) 522 return (error); 523 524 return (linux_epoll_wait_common(td, args->epfd, args->events, 525 args->maxevents, args->timeout, pmask)); 526 } 527 528 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 529 int 530 linux_epoll_pwait2_64(struct thread *td, struct linux_epoll_pwait2_64_args *args) 531 { 532 struct timespec ts, *tsa; 533 sigset_t mask, *pmask; 534 int error; 535 536 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 537 &mask, &pmask); 538 if (error != 0) 539 return (error); 540 541 if (args->timeout) { 542 error = linux_get_timespec64(&ts, args->timeout); 543 if (error != 0) 544 return (error); 545 tsa = &ts; 546 } else 547 tsa = NULL; 548 549 return (linux_epoll_wait_ts(td, args->epfd, args->events, 550 args->maxevents, tsa, pmask)); 551 } 552 #else 553 int 554 linux_epoll_pwait2(struct thread *td, struct linux_epoll_pwait2_args *args) 555 { 556 struct timespec ts, *tsa; 557 sigset_t mask, *pmask; 558 int error; 559 560 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 561 &mask, &pmask); 562 if (error != 0) 563 return (error); 564 565 if (args->timeout) { 566 error = linux_get_timespec(&ts, args->timeout); 567 if (error != 0) 568 return (error); 569 tsa = &ts; 570 } else 571 tsa = NULL; 572 573 return (linux_epoll_wait_ts(td, args->epfd, args->events, 574 args->maxevents, tsa, pmask)); 575 } 576 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 577 578 static int 579 epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter, 580 unsigned int flags) 581 { 582 struct epoll_copyin_args ciargs; 583 struct kevent kev; 584 struct kevent_copyops k_ops = { &ciargs, 585 NULL, 586 epoll_kev_copyin}; 587 588 ciargs.changelist = &kev; 589 EV_SET(&kev, fd, filter, flags, 0, 0, 0); 590 591 return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL)); 592 } 593 594 static int 595 epoll_fd_registered(struct thread *td, struct file *epfp, int fd) 596 { 597 /* 598 * Set empty filter flags to avoid accidental modification of already 599 * registered events. In the case of event re-registration: 600 * 1. If event does not exists kevent() does nothing and returns ENOENT 601 * 2. If event does exists, it's enabled/disabled state is preserved 602 * but fflags, data and udata fields are overwritten. So we can not 603 * set socket lowats and store user's context pointer in udata. 604 */ 605 if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT || 606 epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT) 607 return (1); 608 609 return (0); 610 } 611 612 static int 613 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 614 { 615 int error1, error2; 616 617 error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE); 618 error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE); 619 620 /* return 0 if at least one result positive */ 621 return (error1 == 0 ? 0 : error2); 622 } 623 624 #ifdef LINUX_LEGACY_SYSCALLS 625 int 626 linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 627 { 628 struct specialfd_eventfd ae; 629 630 bzero(&ae, sizeof(ae)); 631 ae.initval = args->initval; 632 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 633 } 634 #endif 635 636 int 637 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 638 { 639 struct specialfd_eventfd ae; 640 int flags; 641 642 if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK | 643 LINUX_EFD_SEMAPHORE)) != 0) 644 return (EINVAL); 645 flags = 0; 646 if ((args->flags & LINUX_O_CLOEXEC) != 0) 647 flags |= EFD_CLOEXEC; 648 if ((args->flags & LINUX_O_NONBLOCK) != 0) 649 flags |= EFD_NONBLOCK; 650 if ((args->flags & LINUX_EFD_SEMAPHORE) != 0) 651 flags |= EFD_SEMAPHORE; 652 653 bzero(&ae, sizeof(ae)); 654 ae.flags = flags; 655 ae.initval = args->initval; 656 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 657 } 658 659 int 660 linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args) 661 { 662 struct timerfd *tfd; 663 struct file *fp; 664 clockid_t clockid; 665 int fflags, fd, error; 666 667 if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0) 668 return (EINVAL); 669 670 error = linux_to_native_clockid(&clockid, args->clockid); 671 if (error != 0) 672 return (error); 673 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 674 return (EINVAL); 675 676 fflags = 0; 677 if ((args->flags & LINUX_TFD_CLOEXEC) != 0) 678 fflags |= O_CLOEXEC; 679 680 error = falloc(td, &fp, &fd, fflags); 681 if (error != 0) 682 return (error); 683 684 tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO); 685 tfd->tfd_clockid = clockid; 686 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); 687 688 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); 689 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); 690 691 fflags = FREAD; 692 if ((args->flags & LINUX_O_NONBLOCK) != 0) 693 fflags |= FNONBLOCK; 694 695 finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops); 696 fdrop(fp, td); 697 698 td->td_retval[0] = fd; 699 return (error); 700 } 701 702 static int 703 timerfd_close(struct file *fp, struct thread *td) 704 { 705 struct timerfd *tfd; 706 707 tfd = fp->f_data; 708 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 709 return (EINVAL); 710 711 timespecclear(&tfd->tfd_time.it_value); 712 timespecclear(&tfd->tfd_time.it_interval); 713 714 callout_drain(&tfd->tfd_callout); 715 716 seldrain(&tfd->tfd_sel); 717 knlist_destroy(&tfd->tfd_sel.si_note); 718 719 fp->f_ops = &badfileops; 720 mtx_destroy(&tfd->tfd_lock); 721 free(tfd, M_EPOLL); 722 723 return (0); 724 } 725 726 static int 727 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 728 int flags, struct thread *td) 729 { 730 struct timerfd *tfd; 731 timerfd_t count; 732 int error; 733 734 tfd = fp->f_data; 735 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 736 return (EINVAL); 737 738 if (uio->uio_resid < sizeof(timerfd_t)) 739 return (EINVAL); 740 741 error = 0; 742 mtx_lock(&tfd->tfd_lock); 743 retry: 744 if (tfd->tfd_canceled) { 745 tfd->tfd_count = 0; 746 mtx_unlock(&tfd->tfd_lock); 747 return (ECANCELED); 748 } 749 if (tfd->tfd_count == 0) { 750 if ((fp->f_flag & FNONBLOCK) != 0) { 751 mtx_unlock(&tfd->tfd_lock); 752 return (EAGAIN); 753 } 754 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0); 755 if (error == 0) 756 goto retry; 757 } 758 if (error == 0) { 759 count = tfd->tfd_count; 760 tfd->tfd_count = 0; 761 mtx_unlock(&tfd->tfd_lock); 762 error = uiomove(&count, sizeof(timerfd_t), uio); 763 } else 764 mtx_unlock(&tfd->tfd_lock); 765 766 return (error); 767 } 768 769 static int 770 timerfd_poll(struct file *fp, int events, struct ucred *active_cred, 771 struct thread *td) 772 { 773 struct timerfd *tfd; 774 int revents = 0; 775 776 tfd = fp->f_data; 777 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 778 return (POLLERR); 779 780 mtx_lock(&tfd->tfd_lock); 781 if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0) 782 revents |= events & (POLLIN|POLLRDNORM); 783 if (revents == 0) 784 selrecord(td, &tfd->tfd_sel); 785 mtx_unlock(&tfd->tfd_lock); 786 787 return (revents); 788 } 789 790 static int 791 timerfd_kqfilter(struct file *fp, struct knote *kn) 792 { 793 struct timerfd *tfd; 794 795 tfd = fp->f_data; 796 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 797 return (EINVAL); 798 799 if (kn->kn_filter == EVFILT_READ) 800 kn->kn_fop = &timerfd_rfiltops; 801 else 802 return (EINVAL); 803 804 kn->kn_hook = tfd; 805 knlist_add(&tfd->tfd_sel.si_note, kn, 0); 806 807 return (0); 808 } 809 810 static void 811 filt_timerfddetach(struct knote *kn) 812 { 813 struct timerfd *tfd = kn->kn_hook; 814 815 mtx_lock(&tfd->tfd_lock); 816 knlist_remove(&tfd->tfd_sel.si_note, kn, 1); 817 mtx_unlock(&tfd->tfd_lock); 818 } 819 820 static int 821 filt_timerfdread(struct knote *kn, long hint) 822 { 823 struct timerfd *tfd = kn->kn_hook; 824 825 return (tfd->tfd_count > 0); 826 } 827 828 static int 829 timerfd_ioctl(struct file *fp, u_long cmd, void *data, 830 struct ucred *active_cred, struct thread *td) 831 { 832 833 if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD) 834 return (EINVAL); 835 836 switch (cmd) { 837 case FIONBIO: 838 case FIOASYNC: 839 return (0); 840 } 841 842 return (ENOTTY); 843 } 844 845 static int 846 timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 847 { 848 849 return (ENXIO); 850 } 851 852 static int 853 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 854 { 855 856 kif->kf_type = KF_TYPE_UNKNOWN; 857 return (0); 858 } 859 860 static void 861 linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts) 862 { 863 864 if (tfd->tfd_clockid == CLOCK_REALTIME) 865 getnanotime(ts); 866 else /* CLOCK_MONOTONIC */ 867 getnanouptime(ts); 868 } 869 870 static void 871 linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots) 872 { 873 struct timespec cts; 874 875 linux_timerfd_clocktime(tfd, &cts); 876 *ots = tfd->tfd_time; 877 if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) { 878 timespecsub(&ots->it_value, &cts, &ots->it_value); 879 if (ots->it_value.tv_sec < 0 || 880 (ots->it_value.tv_sec == 0 && 881 ots->it_value.tv_nsec == 0)) { 882 ots->it_value.tv_sec = 0; 883 ots->it_value.tv_nsec = 1; 884 } 885 } 886 } 887 888 static int 889 linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots) 890 { 891 struct timerfd *tfd; 892 struct file *fp; 893 int error; 894 895 error = fget(td, fd, &cap_read_rights, &fp); 896 if (error != 0) 897 return (error); 898 tfd = fp->f_data; 899 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 900 error = EINVAL; 901 goto out; 902 } 903 904 mtx_lock(&tfd->tfd_lock); 905 linux_timerfd_curval(tfd, ots); 906 mtx_unlock(&tfd->tfd_lock); 907 908 out: 909 fdrop(fp, td); 910 return (error); 911 } 912 913 int 914 linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args) 915 { 916 struct l_itimerspec lots; 917 struct itimerspec ots; 918 int error; 919 920 error = linux_timerfd_gettime_common(td, args->fd, &ots); 921 if (error != 0) 922 return (error); 923 error = native_to_linux_itimerspec(&lots, &ots); 924 if (error == 0) 925 error = copyout(&lots, args->old_value, sizeof(lots)); 926 return (error); 927 } 928 929 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 930 int 931 linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args) 932 { 933 struct l_itimerspec64 lots; 934 struct itimerspec ots; 935 int error; 936 937 error = linux_timerfd_gettime_common(td, args->fd, &ots); 938 if (error != 0) 939 return (error); 940 error = native_to_linux_itimerspec64(&lots, &ots); 941 if (error == 0) 942 error = copyout(&lots, args->old_value, sizeof(lots)); 943 return (error); 944 } 945 #endif 946 947 static int 948 linux_timerfd_settime_common(struct thread *td, int fd, int flags, 949 struct itimerspec *nts, struct itimerspec *oval) 950 { 951 struct timespec cts, ts; 952 struct timerfd *tfd; 953 struct timeval tv; 954 struct file *fp; 955 int error; 956 957 if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0) 958 return (EINVAL); 959 960 error = fget(td, fd, &cap_write_rights, &fp); 961 if (error != 0) 962 return (error); 963 tfd = fp->f_data; 964 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 965 error = EINVAL; 966 goto out; 967 } 968 969 mtx_lock(&tfd->tfd_lock); 970 if (!timespecisset(&nts->it_value)) 971 timespecclear(&nts->it_interval); 972 if (oval != NULL) 973 linux_timerfd_curval(tfd, oval); 974 975 bcopy(nts, &tfd->tfd_time, sizeof(*nts)); 976 tfd->tfd_count = 0; 977 if (timespecisset(&nts->it_value)) { 978 linux_timerfd_clocktime(tfd, &cts); 979 ts = nts->it_value; 980 if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) { 981 timespecadd(&tfd->tfd_time.it_value, &cts, 982 &tfd->tfd_time.it_value); 983 } else { 984 timespecsub(&ts, &cts, &ts); 985 } 986 TIMESPEC_TO_TIMEVAL(&tv, &ts); 987 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 988 linux_timerfd_expire, tfd); 989 tfd->tfd_canceled = false; 990 } else { 991 tfd->tfd_canceled = true; 992 callout_stop(&tfd->tfd_callout); 993 } 994 mtx_unlock(&tfd->tfd_lock); 995 996 out: 997 fdrop(fp, td); 998 return (error); 999 } 1000 1001 int 1002 linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args) 1003 { 1004 struct l_itimerspec lots; 1005 struct itimerspec nts, ots, *pots; 1006 int error; 1007 1008 error = copyin(args->new_value, &lots, sizeof(lots)); 1009 if (error != 0) 1010 return (error); 1011 error = linux_to_native_itimerspec(&nts, &lots); 1012 if (error != 0) 1013 return (error); 1014 pots = (args->old_value != NULL ? &ots : NULL); 1015 error = linux_timerfd_settime_common(td, args->fd, args->flags, 1016 &nts, pots); 1017 if (error == 0 && args->old_value != NULL) { 1018 error = native_to_linux_itimerspec(&lots, &ots); 1019 if (error == 0) 1020 error = copyout(&lots, args->old_value, sizeof(lots)); 1021 } 1022 return (error); 1023 } 1024 1025 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1026 int 1027 linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args *args) 1028 { 1029 struct l_itimerspec64 lots; 1030 struct itimerspec nts, ots, *pots; 1031 int error; 1032 1033 error = copyin(args->new_value, &lots, sizeof(lots)); 1034 if (error != 0) 1035 return (error); 1036 error = linux_to_native_itimerspec64(&nts, &lots); 1037 if (error != 0) 1038 return (error); 1039 pots = (args->old_value != NULL ? &ots : NULL); 1040 error = linux_timerfd_settime_common(td, args->fd, args->flags, 1041 &nts, pots); 1042 if (error == 0 && args->old_value != NULL) { 1043 error = native_to_linux_itimerspec64(&lots, &ots); 1044 if (error == 0) 1045 error = copyout(&lots, args->old_value, sizeof(lots)); 1046 } 1047 return (error); 1048 } 1049 #endif 1050 1051 static void 1052 linux_timerfd_expire(void *arg) 1053 { 1054 struct timespec cts, ts; 1055 struct timeval tv; 1056 struct timerfd *tfd; 1057 1058 tfd = (struct timerfd *)arg; 1059 1060 linux_timerfd_clocktime(tfd, &cts); 1061 if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) { 1062 if (timespecisset(&tfd->tfd_time.it_interval)) 1063 timespecadd(&tfd->tfd_time.it_value, 1064 &tfd->tfd_time.it_interval, 1065 &tfd->tfd_time.it_value); 1066 else 1067 /* single shot timer */ 1068 timespecclear(&tfd->tfd_time.it_value); 1069 if (timespecisset(&tfd->tfd_time.it_value)) { 1070 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 1071 TIMESPEC_TO_TIMEVAL(&tv, &ts); 1072 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 1073 linux_timerfd_expire, tfd); 1074 } 1075 tfd->tfd_count++; 1076 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); 1077 selwakeup(&tfd->tfd_sel); 1078 wakeup(&tfd->tfd_count); 1079 } else if (timespecisset(&tfd->tfd_time.it_value)) { 1080 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 1081 TIMESPEC_TO_TIMEVAL(&tv, &ts); 1082 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 1083 linux_timerfd_expire, tfd); 1084 } 1085 } 1086