1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2007 Roman Divacky 5 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/callout.h> 31 #include <sys/capsicum.h> 32 #include <sys/errno.h> 33 #include <sys/event.h> 34 #include <sys/eventfd.h> 35 #include <sys/file.h> 36 #include <sys/filedesc.h> 37 #include <sys/filio.h> 38 #include <sys/limits.h> 39 #include <sys/lock.h> 40 #include <sys/mutex.h> 41 #include <sys/poll.h> 42 #include <sys/proc.h> 43 #include <sys/selinfo.h> 44 #include <sys/specialfd.h> 45 #include <sys/sx.h> 46 #include <sys/syscallsubr.h> 47 #include <sys/timespec.h> 48 #include <sys/user.h> 49 50 #ifdef COMPAT_LINUX32 51 #include <machine/../linux32/linux.h> 52 #include <machine/../linux32/linux32_proto.h> 53 #else 54 #include <machine/../linux/linux.h> 55 #include <machine/../linux/linux_proto.h> 56 #endif 57 58 #include <compat/linux/linux_emul.h> 59 #include <compat/linux/linux_event.h> 60 #include <compat/linux/linux_file.h> 61 #include <compat/linux/linux_signal.h> 62 #include <compat/linux/linux_time.h> 63 #include <compat/linux/linux_util.h> 64 65 typedef uint64_t epoll_udata_t; 66 67 struct epoll_event { 68 uint32_t events; 69 epoll_udata_t data; 70 } 71 #if defined(__amd64__) 72 __attribute__((packed)) 73 #endif 74 ; 75 76 #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 77 78 static int epoll_to_kevent(struct thread *td, int fd, 79 struct epoll_event *l_event, struct kevent *kevent, 80 int *nkevents); 81 static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 82 static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 83 static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 84 static int epoll_register_kevent(struct thread *td, struct file *epfp, 85 int fd, int filter, unsigned int flags); 86 static int epoll_fd_registered(struct thread *td, struct file *epfp, 87 int fd); 88 static int epoll_delete_all_events(struct thread *td, struct file *epfp, 89 int fd); 90 91 struct epoll_copyin_args { 92 struct kevent *changelist; 93 }; 94 95 struct epoll_copyout_args { 96 struct epoll_event *leventlist; 97 struct proc *p; 98 uint32_t count; 99 int error; 100 }; 101 102 /* timerfd */ 103 typedef uint64_t timerfd_t; 104 105 static fo_rdwr_t timerfd_read; 106 static fo_ioctl_t timerfd_ioctl; 107 static fo_poll_t timerfd_poll; 108 static fo_kqfilter_t timerfd_kqfilter; 109 static fo_stat_t timerfd_stat; 110 static fo_close_t timerfd_close; 111 static fo_fill_kinfo_t timerfd_fill_kinfo; 112 113 static struct fileops timerfdops = { 114 .fo_read = timerfd_read, 115 .fo_write = invfo_rdwr, 116 .fo_truncate = invfo_truncate, 117 .fo_ioctl = timerfd_ioctl, 118 .fo_poll = timerfd_poll, 119 .fo_kqfilter = timerfd_kqfilter, 120 .fo_stat = timerfd_stat, 121 .fo_close = timerfd_close, 122 .fo_chmod = invfo_chmod, 123 .fo_chown = invfo_chown, 124 .fo_sendfile = invfo_sendfile, 125 .fo_fill_kinfo = timerfd_fill_kinfo, 126 .fo_flags = DFLAG_PASSABLE 127 }; 128 129 static void filt_timerfddetach(struct knote *kn); 130 static int filt_timerfdread(struct knote *kn, long hint); 131 132 static struct filterops timerfd_rfiltops = { 133 .f_isfd = 1, 134 .f_detach = filt_timerfddetach, 135 .f_event = filt_timerfdread 136 }; 137 138 struct timerfd { 139 clockid_t tfd_clockid; 140 struct itimerspec tfd_time; 141 struct callout tfd_callout; 142 timerfd_t tfd_count; 143 bool tfd_canceled; 144 struct selinfo tfd_sel; 145 struct mtx tfd_lock; 146 }; 147 148 static void linux_timerfd_expire(void *); 149 static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); 150 151 static int 152 epoll_create_common(struct thread *td, int flags) 153 { 154 155 return (kern_kqueue(td, flags, NULL)); 156 } 157 158 #ifdef LINUX_LEGACY_SYSCALLS 159 int 160 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 161 { 162 163 /* 164 * args->size is unused. Linux just tests it 165 * and then forgets it as well. 166 */ 167 if (args->size <= 0) 168 return (EINVAL); 169 170 return (epoll_create_common(td, 0)); 171 } 172 #endif 173 174 int 175 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 176 { 177 int flags; 178 179 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 180 return (EINVAL); 181 182 flags = 0; 183 if ((args->flags & LINUX_O_CLOEXEC) != 0) 184 flags |= O_CLOEXEC; 185 186 return (epoll_create_common(td, flags)); 187 } 188 189 /* Structure converting function from epoll to kevent. */ 190 static int 191 epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event, 192 struct kevent *kevent, int *nkevents) 193 { 194 uint32_t levents = l_event->events; 195 struct linux_pemuldata *pem; 196 struct proc *p; 197 unsigned short kev_flags = EV_ADD | EV_ENABLE; 198 199 /* flags related to how event is registered */ 200 if ((levents & LINUX_EPOLLONESHOT) != 0) 201 kev_flags |= EV_DISPATCH; 202 if ((levents & LINUX_EPOLLET) != 0) 203 kev_flags |= EV_CLEAR; 204 if ((levents & LINUX_EPOLLERR) != 0) 205 kev_flags |= EV_ERROR; 206 if ((levents & LINUX_EPOLLRDHUP) != 0) 207 kev_flags |= EV_EOF; 208 209 /* flags related to what event is registered */ 210 if ((levents & LINUX_EPOLL_EVRD) != 0) { 211 EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0); 212 kevent->ext[0] = l_event->data; 213 ++kevent; 214 ++(*nkevents); 215 } 216 if ((levents & LINUX_EPOLL_EVWR) != 0) { 217 EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0); 218 kevent->ext[0] = l_event->data; 219 ++kevent; 220 ++(*nkevents); 221 } 222 /* zero event mask is legal */ 223 if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) { 224 EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0); 225 ++(*nkevents); 226 } 227 228 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 229 p = td->td_proc; 230 231 pem = pem_find(p); 232 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 233 234 LINUX_PEM_XLOCK(pem); 235 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 236 pem->flags |= LINUX_XUNSUP_EPOLL; 237 LINUX_PEM_XUNLOCK(pem); 238 linux_msg(td, "epoll_ctl unsupported flags: 0x%x", 239 levents); 240 } else 241 LINUX_PEM_XUNLOCK(pem); 242 return (EINVAL); 243 } 244 245 return (0); 246 } 247 248 /* 249 * Structure converting function from kevent to epoll. In a case 250 * this is called on error in registration we store the error in 251 * event->data and pick it up later in linux_epoll_ctl(). 252 */ 253 static void 254 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 255 { 256 257 l_event->data = kevent->ext[0]; 258 259 if ((kevent->flags & EV_ERROR) != 0) { 260 l_event->events = LINUX_EPOLLERR; 261 return; 262 } 263 264 /* XXX EPOLLPRI, EPOLLHUP */ 265 switch (kevent->filter) { 266 case EVFILT_READ: 267 l_event->events = LINUX_EPOLLIN; 268 if ((kevent->flags & EV_EOF) != 0) 269 l_event->events |= LINUX_EPOLLRDHUP; 270 break; 271 case EVFILT_WRITE: 272 l_event->events = LINUX_EPOLLOUT; 273 break; 274 } 275 } 276 277 /* 278 * Copyout callback used by kevent. This converts kevent 279 * events to epoll events and copies them back to the 280 * userspace. This is also called on error on registering 281 * of the filter. 282 */ 283 static int 284 epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 285 { 286 struct epoll_copyout_args *args; 287 struct epoll_event *eep; 288 int error, i; 289 290 args = (struct epoll_copyout_args*) arg; 291 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 292 293 for (i = 0; i < count; i++) 294 kevent_to_epoll(&kevp[i], &eep[i]); 295 296 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 297 if (error == 0) { 298 args->leventlist += count; 299 args->count += count; 300 } else if (args->error == 0) 301 args->error = error; 302 303 free(eep, M_EPOLL); 304 return (error); 305 } 306 307 /* 308 * Copyin callback used by kevent. This copies already 309 * converted filters from kernel memory to the kevent 310 * internal kernel memory. Hence the memcpy instead of 311 * copyin. 312 */ 313 static int 314 epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 315 { 316 struct epoll_copyin_args *args; 317 318 args = (struct epoll_copyin_args*) arg; 319 320 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 321 args->changelist += count; 322 323 return (0); 324 } 325 326 /* 327 * Load epoll filter, convert it to kevent filter 328 * and load it into kevent subsystem. 329 */ 330 int 331 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 332 { 333 struct file *epfp, *fp; 334 struct epoll_copyin_args ciargs; 335 struct kevent kev[2]; 336 struct kevent_copyops k_ops = { &ciargs, 337 NULL, 338 epoll_kev_copyin}; 339 struct epoll_event le; 340 cap_rights_t rights; 341 int nchanges = 0; 342 int error; 343 344 if (args->op != LINUX_EPOLL_CTL_DEL) { 345 error = copyin(args->event, &le, sizeof(le)); 346 if (error != 0) 347 return (error); 348 } 349 350 error = fget(td, args->epfd, 351 cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), &epfp); 352 if (error != 0) 353 return (error); 354 if (epfp->f_type != DTYPE_KQUEUE) { 355 error = EINVAL; 356 goto leave1; 357 } 358 359 /* Protect user data vector from incorrectly supplied fd. */ 360 error = fget(td, args->fd, 361 cap_rights_init_one(&rights, CAP_POLL_EVENT), &fp); 362 if (error != 0) 363 goto leave1; 364 365 /* Linux disallows spying on himself */ 366 if (epfp == fp) { 367 error = EINVAL; 368 goto leave0; 369 } 370 371 ciargs.changelist = kev; 372 373 if (args->op != LINUX_EPOLL_CTL_DEL) { 374 error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges); 375 if (error != 0) 376 goto leave0; 377 } 378 379 switch (args->op) { 380 case LINUX_EPOLL_CTL_MOD: 381 error = epoll_delete_all_events(td, epfp, args->fd); 382 if (error != 0) 383 goto leave0; 384 break; 385 386 case LINUX_EPOLL_CTL_ADD: 387 if (epoll_fd_registered(td, epfp, args->fd)) { 388 error = EEXIST; 389 goto leave0; 390 } 391 break; 392 393 case LINUX_EPOLL_CTL_DEL: 394 /* CTL_DEL means unregister this fd with this epoll */ 395 error = epoll_delete_all_events(td, epfp, args->fd); 396 goto leave0; 397 398 default: 399 error = EINVAL; 400 goto leave0; 401 } 402 403 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 404 405 leave0: 406 fdrop(fp, td); 407 408 leave1: 409 fdrop(epfp, td); 410 return (error); 411 } 412 413 /* 414 * Wait for a filter to be triggered on the epoll file descriptor. 415 */ 416 417 static int 418 linux_epoll_wait_ts(struct thread *td, int epfd, struct epoll_event *events, 419 int maxevents, struct timespec *tsp, sigset_t *uset) 420 { 421 struct epoll_copyout_args coargs; 422 struct kevent_copyops k_ops = { &coargs, 423 epoll_kev_copyout, 424 NULL}; 425 cap_rights_t rights; 426 struct file *epfp; 427 sigset_t omask; 428 int error; 429 430 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 431 return (EINVAL); 432 433 error = fget(td, epfd, 434 cap_rights_init_one(&rights, CAP_KQUEUE_EVENT), &epfp); 435 if (error != 0) 436 return (error); 437 if (epfp->f_type != DTYPE_KQUEUE) { 438 error = EINVAL; 439 goto leave; 440 } 441 if (uset != NULL) { 442 error = kern_sigprocmask(td, SIG_SETMASK, uset, 443 &omask, 0); 444 if (error != 0) 445 goto leave; 446 td->td_pflags |= TDP_OLDMASK; 447 /* 448 * Make sure that ast() is called on return to 449 * usermode and TDP_OLDMASK is cleared, restoring old 450 * sigmask. 451 */ 452 ast_sched(td, TDA_SIGSUSPEND); 453 } 454 455 coargs.leventlist = events; 456 coargs.p = td->td_proc; 457 coargs.count = 0; 458 coargs.error = 0; 459 460 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 461 if (error == 0 && coargs.error != 0) 462 error = coargs.error; 463 464 /* 465 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 466 * Maybe we should translate that but I don't think it matters at all. 467 */ 468 if (error == 0) 469 td->td_retval[0] = coargs.count; 470 471 if (uset != NULL) 472 error = kern_sigprocmask(td, SIG_SETMASK, &omask, 473 NULL, 0); 474 leave: 475 fdrop(epfp, td); 476 return (error); 477 } 478 479 static int 480 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 481 int maxevents, int timeout, sigset_t *uset) 482 { 483 struct timespec ts, *tsp; 484 485 /* 486 * Linux epoll_wait(2) man page states that timeout of -1 causes caller 487 * to block indefinitely. Real implementation does it if any negative 488 * timeout value is passed. 489 */ 490 if (timeout >= 0) { 491 /* Convert from milliseconds to timespec. */ 492 ts.tv_sec = timeout / 1000; 493 ts.tv_nsec = (timeout % 1000) * 1000000; 494 tsp = &ts; 495 } else { 496 tsp = NULL; 497 } 498 return (linux_epoll_wait_ts(td, epfd, events, maxevents, tsp, uset)); 499 500 } 501 502 #ifdef LINUX_LEGACY_SYSCALLS 503 int 504 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 505 { 506 507 return (linux_epoll_wait_common(td, args->epfd, args->events, 508 args->maxevents, args->timeout, NULL)); 509 } 510 #endif 511 512 int 513 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 514 { 515 sigset_t mask, *pmask; 516 int error; 517 518 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 519 &mask, &pmask); 520 if (error != 0) 521 return (error); 522 523 return (linux_epoll_wait_common(td, args->epfd, args->events, 524 args->maxevents, args->timeout, pmask)); 525 } 526 527 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 528 int 529 linux_epoll_pwait2_64(struct thread *td, struct linux_epoll_pwait2_64_args *args) 530 { 531 struct timespec ts, *tsa; 532 sigset_t mask, *pmask; 533 int error; 534 535 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 536 &mask, &pmask); 537 if (error != 0) 538 return (error); 539 540 if (args->timeout) { 541 error = linux_get_timespec64(&ts, args->timeout); 542 if (error != 0) 543 return (error); 544 tsa = &ts; 545 } else 546 tsa = NULL; 547 548 return (linux_epoll_wait_ts(td, args->epfd, args->events, 549 args->maxevents, tsa, pmask)); 550 } 551 #else 552 int 553 linux_epoll_pwait2(struct thread *td, struct linux_epoll_pwait2_args *args) 554 { 555 struct timespec ts, *tsa; 556 sigset_t mask, *pmask; 557 int error; 558 559 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 560 &mask, &pmask); 561 if (error != 0) 562 return (error); 563 564 if (args->timeout) { 565 error = linux_get_timespec(&ts, args->timeout); 566 if (error != 0) 567 return (error); 568 tsa = &ts; 569 } else 570 tsa = NULL; 571 572 return (linux_epoll_wait_ts(td, args->epfd, args->events, 573 args->maxevents, tsa, pmask)); 574 } 575 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 576 577 static int 578 epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter, 579 unsigned int flags) 580 { 581 struct epoll_copyin_args ciargs; 582 struct kevent kev; 583 struct kevent_copyops k_ops = { &ciargs, 584 NULL, 585 epoll_kev_copyin}; 586 587 ciargs.changelist = &kev; 588 EV_SET(&kev, fd, filter, flags, 0, 0, 0); 589 590 return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL)); 591 } 592 593 static int 594 epoll_fd_registered(struct thread *td, struct file *epfp, int fd) 595 { 596 /* 597 * Set empty filter flags to avoid accidental modification of already 598 * registered events. In the case of event re-registration: 599 * 1. If event does not exists kevent() does nothing and returns ENOENT 600 * 2. If event does exists, it's enabled/disabled state is preserved 601 * but fflags, data and udata fields are overwritten. So we can not 602 * set socket lowats and store user's context pointer in udata. 603 */ 604 if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT || 605 epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT) 606 return (1); 607 608 return (0); 609 } 610 611 static int 612 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 613 { 614 int error1, error2; 615 616 error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE); 617 error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE); 618 619 /* return 0 if at least one result positive */ 620 return (error1 == 0 ? 0 : error2); 621 } 622 623 #ifdef LINUX_LEGACY_SYSCALLS 624 int 625 linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 626 { 627 struct specialfd_eventfd ae; 628 629 bzero(&ae, sizeof(ae)); 630 ae.initval = args->initval; 631 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 632 } 633 #endif 634 635 int 636 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 637 { 638 struct specialfd_eventfd ae; 639 int flags; 640 641 if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK | 642 LINUX_EFD_SEMAPHORE)) != 0) 643 return (EINVAL); 644 flags = 0; 645 if ((args->flags & LINUX_O_CLOEXEC) != 0) 646 flags |= EFD_CLOEXEC; 647 if ((args->flags & LINUX_O_NONBLOCK) != 0) 648 flags |= EFD_NONBLOCK; 649 if ((args->flags & LINUX_EFD_SEMAPHORE) != 0) 650 flags |= EFD_SEMAPHORE; 651 652 bzero(&ae, sizeof(ae)); 653 ae.flags = flags; 654 ae.initval = args->initval; 655 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 656 } 657 658 int 659 linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args) 660 { 661 struct timerfd *tfd; 662 struct file *fp; 663 clockid_t clockid; 664 int fflags, fd, error; 665 666 if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0) 667 return (EINVAL); 668 669 error = linux_to_native_clockid(&clockid, args->clockid); 670 if (error != 0) 671 return (error); 672 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 673 return (EINVAL); 674 675 fflags = 0; 676 if ((args->flags & LINUX_TFD_CLOEXEC) != 0) 677 fflags |= O_CLOEXEC; 678 679 error = falloc(td, &fp, &fd, fflags); 680 if (error != 0) 681 return (error); 682 683 tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO); 684 tfd->tfd_clockid = clockid; 685 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); 686 687 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); 688 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); 689 690 fflags = FREAD; 691 if ((args->flags & LINUX_O_NONBLOCK) != 0) 692 fflags |= FNONBLOCK; 693 694 finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops); 695 fdrop(fp, td); 696 697 td->td_retval[0] = fd; 698 return (error); 699 } 700 701 static int 702 timerfd_close(struct file *fp, struct thread *td) 703 { 704 struct timerfd *tfd; 705 706 tfd = fp->f_data; 707 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 708 return (EINVAL); 709 710 timespecclear(&tfd->tfd_time.it_value); 711 timespecclear(&tfd->tfd_time.it_interval); 712 713 callout_drain(&tfd->tfd_callout); 714 715 seldrain(&tfd->tfd_sel); 716 knlist_destroy(&tfd->tfd_sel.si_note); 717 718 fp->f_ops = &badfileops; 719 mtx_destroy(&tfd->tfd_lock); 720 free(tfd, M_EPOLL); 721 722 return (0); 723 } 724 725 static int 726 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 727 int flags, struct thread *td) 728 { 729 struct timerfd *tfd; 730 timerfd_t count; 731 int error; 732 733 tfd = fp->f_data; 734 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 735 return (EINVAL); 736 737 if (uio->uio_resid < sizeof(timerfd_t)) 738 return (EINVAL); 739 740 error = 0; 741 mtx_lock(&tfd->tfd_lock); 742 retry: 743 if (tfd->tfd_canceled) { 744 tfd->tfd_count = 0; 745 mtx_unlock(&tfd->tfd_lock); 746 return (ECANCELED); 747 } 748 if (tfd->tfd_count == 0) { 749 if ((fp->f_flag & FNONBLOCK) != 0) { 750 mtx_unlock(&tfd->tfd_lock); 751 return (EAGAIN); 752 } 753 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0); 754 if (error == 0) 755 goto retry; 756 } 757 if (error == 0) { 758 count = tfd->tfd_count; 759 tfd->tfd_count = 0; 760 mtx_unlock(&tfd->tfd_lock); 761 error = uiomove(&count, sizeof(timerfd_t), uio); 762 } else 763 mtx_unlock(&tfd->tfd_lock); 764 765 return (error); 766 } 767 768 static int 769 timerfd_poll(struct file *fp, int events, struct ucred *active_cred, 770 struct thread *td) 771 { 772 struct timerfd *tfd; 773 int revents = 0; 774 775 tfd = fp->f_data; 776 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 777 return (POLLERR); 778 779 mtx_lock(&tfd->tfd_lock); 780 if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0) 781 revents |= events & (POLLIN|POLLRDNORM); 782 if (revents == 0) 783 selrecord(td, &tfd->tfd_sel); 784 mtx_unlock(&tfd->tfd_lock); 785 786 return (revents); 787 } 788 789 static int 790 timerfd_kqfilter(struct file *fp, struct knote *kn) 791 { 792 struct timerfd *tfd; 793 794 tfd = fp->f_data; 795 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 796 return (EINVAL); 797 798 if (kn->kn_filter == EVFILT_READ) 799 kn->kn_fop = &timerfd_rfiltops; 800 else 801 return (EINVAL); 802 803 kn->kn_hook = tfd; 804 knlist_add(&tfd->tfd_sel.si_note, kn, 0); 805 806 return (0); 807 } 808 809 static void 810 filt_timerfddetach(struct knote *kn) 811 { 812 struct timerfd *tfd = kn->kn_hook; 813 814 mtx_lock(&tfd->tfd_lock); 815 knlist_remove(&tfd->tfd_sel.si_note, kn, 1); 816 mtx_unlock(&tfd->tfd_lock); 817 } 818 819 static int 820 filt_timerfdread(struct knote *kn, long hint) 821 { 822 struct timerfd *tfd = kn->kn_hook; 823 824 return (tfd->tfd_count > 0); 825 } 826 827 static int 828 timerfd_ioctl(struct file *fp, u_long cmd, void *data, 829 struct ucred *active_cred, struct thread *td) 830 { 831 832 if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD) 833 return (EINVAL); 834 835 switch (cmd) { 836 case FIONBIO: 837 case FIOASYNC: 838 return (0); 839 } 840 841 return (ENOTTY); 842 } 843 844 static int 845 timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 846 { 847 848 return (ENXIO); 849 } 850 851 static int 852 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 853 { 854 855 kif->kf_type = KF_TYPE_UNKNOWN; 856 return (0); 857 } 858 859 static void 860 linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts) 861 { 862 863 if (tfd->tfd_clockid == CLOCK_REALTIME) 864 getnanotime(ts); 865 else /* CLOCK_MONOTONIC */ 866 getnanouptime(ts); 867 } 868 869 static void 870 linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots) 871 { 872 struct timespec cts; 873 874 linux_timerfd_clocktime(tfd, &cts); 875 *ots = tfd->tfd_time; 876 if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) { 877 timespecsub(&ots->it_value, &cts, &ots->it_value); 878 if (ots->it_value.tv_sec < 0 || 879 (ots->it_value.tv_sec == 0 && 880 ots->it_value.tv_nsec == 0)) { 881 ots->it_value.tv_sec = 0; 882 ots->it_value.tv_nsec = 1; 883 } 884 } 885 } 886 887 static int 888 linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots) 889 { 890 struct timerfd *tfd; 891 struct file *fp; 892 int error; 893 894 error = fget(td, fd, &cap_read_rights, &fp); 895 if (error != 0) 896 return (error); 897 tfd = fp->f_data; 898 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 899 error = EINVAL; 900 goto out; 901 } 902 903 mtx_lock(&tfd->tfd_lock); 904 linux_timerfd_curval(tfd, ots); 905 mtx_unlock(&tfd->tfd_lock); 906 907 out: 908 fdrop(fp, td); 909 return (error); 910 } 911 912 int 913 linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args) 914 { 915 struct l_itimerspec lots; 916 struct itimerspec ots; 917 int error; 918 919 error = linux_timerfd_gettime_common(td, args->fd, &ots); 920 if (error != 0) 921 return (error); 922 error = native_to_linux_itimerspec(&lots, &ots); 923 if (error == 0) 924 error = copyout(&lots, args->old_value, sizeof(lots)); 925 return (error); 926 } 927 928 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 929 int 930 linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args) 931 { 932 struct l_itimerspec64 lots; 933 struct itimerspec ots; 934 int error; 935 936 error = linux_timerfd_gettime_common(td, args->fd, &ots); 937 if (error != 0) 938 return (error); 939 error = native_to_linux_itimerspec64(&lots, &ots); 940 if (error == 0) 941 error = copyout(&lots, args->old_value, sizeof(lots)); 942 return (error); 943 } 944 #endif 945 946 static int 947 linux_timerfd_settime_common(struct thread *td, int fd, int flags, 948 struct itimerspec *nts, struct itimerspec *oval) 949 { 950 struct timespec cts, ts; 951 struct timerfd *tfd; 952 struct timeval tv; 953 struct file *fp; 954 int error; 955 956 if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0) 957 return (EINVAL); 958 959 error = fget(td, fd, &cap_write_rights, &fp); 960 if (error != 0) 961 return (error); 962 tfd = fp->f_data; 963 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 964 error = EINVAL; 965 goto out; 966 } 967 968 mtx_lock(&tfd->tfd_lock); 969 if (!timespecisset(&nts->it_value)) 970 timespecclear(&nts->it_interval); 971 if (oval != NULL) 972 linux_timerfd_curval(tfd, oval); 973 974 bcopy(nts, &tfd->tfd_time, sizeof(*nts)); 975 tfd->tfd_count = 0; 976 if (timespecisset(&nts->it_value)) { 977 linux_timerfd_clocktime(tfd, &cts); 978 ts = nts->it_value; 979 if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) { 980 timespecadd(&tfd->tfd_time.it_value, &cts, 981 &tfd->tfd_time.it_value); 982 } else { 983 timespecsub(&ts, &cts, &ts); 984 } 985 TIMESPEC_TO_TIMEVAL(&tv, &ts); 986 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 987 linux_timerfd_expire, tfd); 988 tfd->tfd_canceled = false; 989 } else { 990 tfd->tfd_canceled = true; 991 callout_stop(&tfd->tfd_callout); 992 } 993 mtx_unlock(&tfd->tfd_lock); 994 995 out: 996 fdrop(fp, td); 997 return (error); 998 } 999 1000 int 1001 linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args) 1002 { 1003 struct l_itimerspec lots; 1004 struct itimerspec nts, ots, *pots; 1005 int error; 1006 1007 error = copyin(args->new_value, &lots, sizeof(lots)); 1008 if (error != 0) 1009 return (error); 1010 error = linux_to_native_itimerspec(&nts, &lots); 1011 if (error != 0) 1012 return (error); 1013 pots = (args->old_value != NULL ? &ots : NULL); 1014 error = linux_timerfd_settime_common(td, args->fd, args->flags, 1015 &nts, pots); 1016 if (error == 0 && args->old_value != NULL) { 1017 error = native_to_linux_itimerspec(&lots, &ots); 1018 if (error == 0) 1019 error = copyout(&lots, args->old_value, sizeof(lots)); 1020 } 1021 return (error); 1022 } 1023 1024 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1025 int 1026 linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args *args) 1027 { 1028 struct l_itimerspec64 lots; 1029 struct itimerspec nts, ots, *pots; 1030 int error; 1031 1032 error = copyin(args->new_value, &lots, sizeof(lots)); 1033 if (error != 0) 1034 return (error); 1035 error = linux_to_native_itimerspec64(&nts, &lots); 1036 if (error != 0) 1037 return (error); 1038 pots = (args->old_value != NULL ? &ots : NULL); 1039 error = linux_timerfd_settime_common(td, args->fd, args->flags, 1040 &nts, pots); 1041 if (error == 0 && args->old_value != NULL) { 1042 error = native_to_linux_itimerspec64(&lots, &ots); 1043 if (error == 0) 1044 error = copyout(&lots, args->old_value, sizeof(lots)); 1045 } 1046 return (error); 1047 } 1048 #endif 1049 1050 static void 1051 linux_timerfd_expire(void *arg) 1052 { 1053 struct timespec cts, ts; 1054 struct timeval tv; 1055 struct timerfd *tfd; 1056 1057 tfd = (struct timerfd *)arg; 1058 1059 linux_timerfd_clocktime(tfd, &cts); 1060 if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) { 1061 if (timespecisset(&tfd->tfd_time.it_interval)) 1062 timespecadd(&tfd->tfd_time.it_value, 1063 &tfd->tfd_time.it_interval, 1064 &tfd->tfd_time.it_value); 1065 else 1066 /* single shot timer */ 1067 timespecclear(&tfd->tfd_time.it_value); 1068 if (timespecisset(&tfd->tfd_time.it_value)) { 1069 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 1070 TIMESPEC_TO_TIMEVAL(&tv, &ts); 1071 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 1072 linux_timerfd_expire, tfd); 1073 } 1074 tfd->tfd_count++; 1075 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); 1076 selwakeup(&tfd->tfd_sel); 1077 wakeup(&tfd->tfd_count); 1078 } else if (timespecisset(&tfd->tfd_time.it_value)) { 1079 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 1080 TIMESPEC_TO_TIMEVAL(&tv, &ts); 1081 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 1082 linux_timerfd_expire, tfd); 1083 } 1084 } 1085