1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2007 Roman Divacky 5 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/callout.h> 34 #include <sys/capsicum.h> 35 #include <sys/errno.h> 36 #include <sys/event.h> 37 #include <sys/eventfd.h> 38 #include <sys/file.h> 39 #include <sys/filedesc.h> 40 #include <sys/filio.h> 41 #include <sys/limits.h> 42 #include <sys/lock.h> 43 #include <sys/mutex.h> 44 #include <sys/poll.h> 45 #include <sys/proc.h> 46 #include <sys/selinfo.h> 47 #include <sys/specialfd.h> 48 #include <sys/sx.h> 49 #include <sys/syscallsubr.h> 50 #include <sys/timespec.h> 51 #include <sys/user.h> 52 53 #ifdef COMPAT_LINUX32 54 #include <machine/../linux32/linux.h> 55 #include <machine/../linux32/linux32_proto.h> 56 #else 57 #include <machine/../linux/linux.h> 58 #include <machine/../linux/linux_proto.h> 59 #endif 60 61 #include <compat/linux/linux_emul.h> 62 #include <compat/linux/linux_event.h> 63 #include <compat/linux/linux_file.h> 64 #include <compat/linux/linux_signal.h> 65 #include <compat/linux/linux_time.h> 66 #include <compat/linux/linux_util.h> 67 68 typedef uint64_t epoll_udata_t; 69 70 struct epoll_event { 71 uint32_t events; 72 epoll_udata_t data; 73 } 74 #if defined(__amd64__) 75 __attribute__((packed)) 76 #endif 77 ; 78 79 #define LINUX_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event)) 80 81 static int epoll_to_kevent(struct thread *td, int fd, 82 struct epoll_event *l_event, struct kevent *kevent, 83 int *nkevents); 84 static void kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event); 85 static int epoll_kev_copyout(void *arg, struct kevent *kevp, int count); 86 static int epoll_kev_copyin(void *arg, struct kevent *kevp, int count); 87 static int epoll_register_kevent(struct thread *td, struct file *epfp, 88 int fd, int filter, unsigned int flags); 89 static int epoll_fd_registered(struct thread *td, struct file *epfp, 90 int fd); 91 static int epoll_delete_all_events(struct thread *td, struct file *epfp, 92 int fd); 93 94 struct epoll_copyin_args { 95 struct kevent *changelist; 96 }; 97 98 struct epoll_copyout_args { 99 struct epoll_event *leventlist; 100 struct proc *p; 101 uint32_t count; 102 int error; 103 }; 104 105 /* timerfd */ 106 typedef uint64_t timerfd_t; 107 108 static fo_rdwr_t timerfd_read; 109 static fo_ioctl_t timerfd_ioctl; 110 static fo_poll_t timerfd_poll; 111 static fo_kqfilter_t timerfd_kqfilter; 112 static fo_stat_t timerfd_stat; 113 static fo_close_t timerfd_close; 114 static fo_fill_kinfo_t timerfd_fill_kinfo; 115 116 static struct fileops timerfdops = { 117 .fo_read = timerfd_read, 118 .fo_write = invfo_rdwr, 119 .fo_truncate = invfo_truncate, 120 .fo_ioctl = timerfd_ioctl, 121 .fo_poll = timerfd_poll, 122 .fo_kqfilter = timerfd_kqfilter, 123 .fo_stat = timerfd_stat, 124 .fo_close = timerfd_close, 125 .fo_chmod = invfo_chmod, 126 .fo_chown = invfo_chown, 127 .fo_sendfile = invfo_sendfile, 128 .fo_fill_kinfo = timerfd_fill_kinfo, 129 .fo_flags = DFLAG_PASSABLE 130 }; 131 132 static void filt_timerfddetach(struct knote *kn); 133 static int filt_timerfdread(struct knote *kn, long hint); 134 135 static struct filterops timerfd_rfiltops = { 136 .f_isfd = 1, 137 .f_detach = filt_timerfddetach, 138 .f_event = filt_timerfdread 139 }; 140 141 struct timerfd { 142 clockid_t tfd_clockid; 143 struct itimerspec tfd_time; 144 struct callout tfd_callout; 145 timerfd_t tfd_count; 146 bool tfd_canceled; 147 struct selinfo tfd_sel; 148 struct mtx tfd_lock; 149 }; 150 151 static void linux_timerfd_expire(void *); 152 static void linux_timerfd_curval(struct timerfd *, struct itimerspec *); 153 154 static int 155 epoll_create_common(struct thread *td, int flags) 156 { 157 158 return (kern_kqueue(td, flags, NULL)); 159 } 160 161 #ifdef LINUX_LEGACY_SYSCALLS 162 int 163 linux_epoll_create(struct thread *td, struct linux_epoll_create_args *args) 164 { 165 166 /* 167 * args->size is unused. Linux just tests it 168 * and then forgets it as well. 169 */ 170 if (args->size <= 0) 171 return (EINVAL); 172 173 return (epoll_create_common(td, 0)); 174 } 175 #endif 176 177 int 178 linux_epoll_create1(struct thread *td, struct linux_epoll_create1_args *args) 179 { 180 int flags; 181 182 if ((args->flags & ~(LINUX_O_CLOEXEC)) != 0) 183 return (EINVAL); 184 185 flags = 0; 186 if ((args->flags & LINUX_O_CLOEXEC) != 0) 187 flags |= O_CLOEXEC; 188 189 return (epoll_create_common(td, flags)); 190 } 191 192 /* Structure converting function from epoll to kevent. */ 193 static int 194 epoll_to_kevent(struct thread *td, int fd, struct epoll_event *l_event, 195 struct kevent *kevent, int *nkevents) 196 { 197 uint32_t levents = l_event->events; 198 struct linux_pemuldata *pem; 199 struct proc *p; 200 unsigned short kev_flags = EV_ADD | EV_ENABLE; 201 202 /* flags related to how event is registered */ 203 if ((levents & LINUX_EPOLLONESHOT) != 0) 204 kev_flags |= EV_DISPATCH; 205 if ((levents & LINUX_EPOLLET) != 0) 206 kev_flags |= EV_CLEAR; 207 if ((levents & LINUX_EPOLLERR) != 0) 208 kev_flags |= EV_ERROR; 209 if ((levents & LINUX_EPOLLRDHUP) != 0) 210 kev_flags |= EV_EOF; 211 212 /* flags related to what event is registered */ 213 if ((levents & LINUX_EPOLL_EVRD) != 0) { 214 EV_SET(kevent, fd, EVFILT_READ, kev_flags, 0, 0, 0); 215 kevent->ext[0] = l_event->data; 216 ++kevent; 217 ++(*nkevents); 218 } 219 if ((levents & LINUX_EPOLL_EVWR) != 0) { 220 EV_SET(kevent, fd, EVFILT_WRITE, kev_flags, 0, 0, 0); 221 kevent->ext[0] = l_event->data; 222 ++kevent; 223 ++(*nkevents); 224 } 225 /* zero event mask is legal */ 226 if ((levents & (LINUX_EPOLL_EVRD | LINUX_EPOLL_EVWR)) == 0) { 227 EV_SET(kevent++, fd, EVFILT_READ, EV_ADD|EV_DISABLE, 0, 0, 0); 228 ++(*nkevents); 229 } 230 231 if ((levents & ~(LINUX_EPOLL_EVSUP)) != 0) { 232 p = td->td_proc; 233 234 pem = pem_find(p); 235 KASSERT(pem != NULL, ("epoll proc emuldata not found.\n")); 236 237 LINUX_PEM_XLOCK(pem); 238 if ((pem->flags & LINUX_XUNSUP_EPOLL) == 0) { 239 pem->flags |= LINUX_XUNSUP_EPOLL; 240 LINUX_PEM_XUNLOCK(pem); 241 linux_msg(td, "epoll_ctl unsupported flags: 0x%x", 242 levents); 243 } else 244 LINUX_PEM_XUNLOCK(pem); 245 return (EINVAL); 246 } 247 248 return (0); 249 } 250 251 /* 252 * Structure converting function from kevent to epoll. In a case 253 * this is called on error in registration we store the error in 254 * event->data and pick it up later in linux_epoll_ctl(). 255 */ 256 static void 257 kevent_to_epoll(struct kevent *kevent, struct epoll_event *l_event) 258 { 259 260 l_event->data = kevent->ext[0]; 261 262 if ((kevent->flags & EV_ERROR) != 0) { 263 l_event->events = LINUX_EPOLLERR; 264 return; 265 } 266 267 /* XXX EPOLLPRI, EPOLLHUP */ 268 switch (kevent->filter) { 269 case EVFILT_READ: 270 l_event->events = LINUX_EPOLLIN; 271 if ((kevent->flags & EV_EOF) != 0) 272 l_event->events |= LINUX_EPOLLRDHUP; 273 break; 274 case EVFILT_WRITE: 275 l_event->events = LINUX_EPOLLOUT; 276 break; 277 } 278 } 279 280 /* 281 * Copyout callback used by kevent. This converts kevent 282 * events to epoll events and copies them back to the 283 * userspace. This is also called on error on registering 284 * of the filter. 285 */ 286 static int 287 epoll_kev_copyout(void *arg, struct kevent *kevp, int count) 288 { 289 struct epoll_copyout_args *args; 290 struct epoll_event *eep; 291 int error, i; 292 293 args = (struct epoll_copyout_args*) arg; 294 eep = malloc(sizeof(*eep) * count, M_EPOLL, M_WAITOK | M_ZERO); 295 296 for (i = 0; i < count; i++) 297 kevent_to_epoll(&kevp[i], &eep[i]); 298 299 error = copyout(eep, args->leventlist, count * sizeof(*eep)); 300 if (error == 0) { 301 args->leventlist += count; 302 args->count += count; 303 } else if (args->error == 0) 304 args->error = error; 305 306 free(eep, M_EPOLL); 307 return (error); 308 } 309 310 /* 311 * Copyin callback used by kevent. This copies already 312 * converted filters from kernel memory to the kevent 313 * internal kernel memory. Hence the memcpy instead of 314 * copyin. 315 */ 316 static int 317 epoll_kev_copyin(void *arg, struct kevent *kevp, int count) 318 { 319 struct epoll_copyin_args *args; 320 321 args = (struct epoll_copyin_args*) arg; 322 323 memcpy(kevp, args->changelist, count * sizeof(*kevp)); 324 args->changelist += count; 325 326 return (0); 327 } 328 329 /* 330 * Load epoll filter, convert it to kevent filter 331 * and load it into kevent subsystem. 332 */ 333 int 334 linux_epoll_ctl(struct thread *td, struct linux_epoll_ctl_args *args) 335 { 336 struct file *epfp, *fp; 337 struct epoll_copyin_args ciargs; 338 struct kevent kev[2]; 339 struct kevent_copyops k_ops = { &ciargs, 340 NULL, 341 epoll_kev_copyin}; 342 struct epoll_event le; 343 cap_rights_t rights; 344 int nchanges = 0; 345 int error; 346 347 if (args->op != LINUX_EPOLL_CTL_DEL) { 348 error = copyin(args->event, &le, sizeof(le)); 349 if (error != 0) 350 return (error); 351 } 352 353 error = fget(td, args->epfd, 354 cap_rights_init_one(&rights, CAP_KQUEUE_CHANGE), &epfp); 355 if (error != 0) 356 return (error); 357 if (epfp->f_type != DTYPE_KQUEUE) { 358 error = EINVAL; 359 goto leave1; 360 } 361 362 /* Protect user data vector from incorrectly supplied fd. */ 363 error = fget(td, args->fd, 364 cap_rights_init_one(&rights, CAP_POLL_EVENT), &fp); 365 if (error != 0) 366 goto leave1; 367 368 /* Linux disallows spying on himself */ 369 if (epfp == fp) { 370 error = EINVAL; 371 goto leave0; 372 } 373 374 ciargs.changelist = kev; 375 376 if (args->op != LINUX_EPOLL_CTL_DEL) { 377 error = epoll_to_kevent(td, args->fd, &le, kev, &nchanges); 378 if (error != 0) 379 goto leave0; 380 } 381 382 switch (args->op) { 383 case LINUX_EPOLL_CTL_MOD: 384 error = epoll_delete_all_events(td, epfp, args->fd); 385 if (error != 0) 386 goto leave0; 387 break; 388 389 case LINUX_EPOLL_CTL_ADD: 390 if (epoll_fd_registered(td, epfp, args->fd)) { 391 error = EEXIST; 392 goto leave0; 393 } 394 break; 395 396 case LINUX_EPOLL_CTL_DEL: 397 /* CTL_DEL means unregister this fd with this epoll */ 398 error = epoll_delete_all_events(td, epfp, args->fd); 399 goto leave0; 400 401 default: 402 error = EINVAL; 403 goto leave0; 404 } 405 406 error = kern_kevent_fp(td, epfp, nchanges, 0, &k_ops, NULL); 407 408 leave0: 409 fdrop(fp, td); 410 411 leave1: 412 fdrop(epfp, td); 413 return (error); 414 } 415 416 /* 417 * Wait for a filter to be triggered on the epoll file descriptor. 418 */ 419 420 static int 421 linux_epoll_wait_ts(struct thread *td, int epfd, struct epoll_event *events, 422 int maxevents, struct timespec *tsp, sigset_t *uset) 423 { 424 struct epoll_copyout_args coargs; 425 struct kevent_copyops k_ops = { &coargs, 426 epoll_kev_copyout, 427 NULL}; 428 cap_rights_t rights; 429 struct file *epfp; 430 sigset_t omask; 431 int error; 432 433 if (maxevents <= 0 || maxevents > LINUX_MAX_EVENTS) 434 return (EINVAL); 435 436 error = fget(td, epfd, 437 cap_rights_init_one(&rights, CAP_KQUEUE_EVENT), &epfp); 438 if (error != 0) 439 return (error); 440 if (epfp->f_type != DTYPE_KQUEUE) { 441 error = EINVAL; 442 goto leave; 443 } 444 if (uset != NULL) { 445 error = kern_sigprocmask(td, SIG_SETMASK, uset, 446 &omask, 0); 447 if (error != 0) 448 goto leave; 449 td->td_pflags |= TDP_OLDMASK; 450 /* 451 * Make sure that ast() is called on return to 452 * usermode and TDP_OLDMASK is cleared, restoring old 453 * sigmask. 454 */ 455 ast_sched(td, TDA_SIGSUSPEND); 456 } 457 458 coargs.leventlist = events; 459 coargs.p = td->td_proc; 460 coargs.count = 0; 461 coargs.error = 0; 462 463 error = kern_kevent_fp(td, epfp, 0, maxevents, &k_ops, tsp); 464 if (error == 0 && coargs.error != 0) 465 error = coargs.error; 466 467 /* 468 * kern_kevent might return ENOMEM which is not expected from epoll_wait. 469 * Maybe we should translate that but I don't think it matters at all. 470 */ 471 if (error == 0) 472 td->td_retval[0] = coargs.count; 473 474 if (uset != NULL) 475 error = kern_sigprocmask(td, SIG_SETMASK, &omask, 476 NULL, 0); 477 leave: 478 fdrop(epfp, td); 479 return (error); 480 } 481 482 static int 483 linux_epoll_wait_common(struct thread *td, int epfd, struct epoll_event *events, 484 int maxevents, int timeout, sigset_t *uset) 485 { 486 struct timespec ts, *tsp; 487 488 /* 489 * Linux epoll_wait(2) man page states that timeout of -1 causes caller 490 * to block indefinitely. Real implementation does it if any negative 491 * timeout value is passed. 492 */ 493 if (timeout >= 0) { 494 /* Convert from milliseconds to timespec. */ 495 ts.tv_sec = timeout / 1000; 496 ts.tv_nsec = (timeout % 1000) * 1000000; 497 tsp = &ts; 498 } else { 499 tsp = NULL; 500 } 501 return (linux_epoll_wait_ts(td, epfd, events, maxevents, tsp, uset)); 502 503 } 504 505 #ifdef LINUX_LEGACY_SYSCALLS 506 int 507 linux_epoll_wait(struct thread *td, struct linux_epoll_wait_args *args) 508 { 509 510 return (linux_epoll_wait_common(td, args->epfd, args->events, 511 args->maxevents, args->timeout, NULL)); 512 } 513 #endif 514 515 int 516 linux_epoll_pwait(struct thread *td, struct linux_epoll_pwait_args *args) 517 { 518 sigset_t mask, *pmask; 519 int error; 520 521 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 522 &mask, &pmask); 523 if (error != 0) 524 return (error); 525 526 return (linux_epoll_wait_common(td, args->epfd, args->events, 527 args->maxevents, args->timeout, pmask)); 528 } 529 530 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 531 int 532 linux_epoll_pwait2_64(struct thread *td, struct linux_epoll_pwait2_64_args *args) 533 { 534 struct timespec ts, *tsa; 535 sigset_t mask, *pmask; 536 int error; 537 538 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 539 &mask, &pmask); 540 if (error != 0) 541 return (error); 542 543 if (args->timeout) { 544 error = linux_get_timespec64(&ts, args->timeout); 545 if (error != 0) 546 return (error); 547 tsa = &ts; 548 } else 549 tsa = NULL; 550 551 return (linux_epoll_wait_ts(td, args->epfd, args->events, 552 args->maxevents, tsa, pmask)); 553 } 554 #else 555 int 556 linux_epoll_pwait2(struct thread *td, struct linux_epoll_pwait2_args *args) 557 { 558 struct timespec ts, *tsa; 559 sigset_t mask, *pmask; 560 int error; 561 562 error = linux_copyin_sigset(td, args->mask, sizeof(l_sigset_t), 563 &mask, &pmask); 564 if (error != 0) 565 return (error); 566 567 if (args->timeout) { 568 error = linux_get_timespec(&ts, args->timeout); 569 if (error != 0) 570 return (error); 571 tsa = &ts; 572 } else 573 tsa = NULL; 574 575 return (linux_epoll_wait_ts(td, args->epfd, args->events, 576 args->maxevents, tsa, pmask)); 577 } 578 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 579 580 static int 581 epoll_register_kevent(struct thread *td, struct file *epfp, int fd, int filter, 582 unsigned int flags) 583 { 584 struct epoll_copyin_args ciargs; 585 struct kevent kev; 586 struct kevent_copyops k_ops = { &ciargs, 587 NULL, 588 epoll_kev_copyin}; 589 590 ciargs.changelist = &kev; 591 EV_SET(&kev, fd, filter, flags, 0, 0, 0); 592 593 return (kern_kevent_fp(td, epfp, 1, 0, &k_ops, NULL)); 594 } 595 596 static int 597 epoll_fd_registered(struct thread *td, struct file *epfp, int fd) 598 { 599 /* 600 * Set empty filter flags to avoid accidental modification of already 601 * registered events. In the case of event re-registration: 602 * 1. If event does not exists kevent() does nothing and returns ENOENT 603 * 2. If event does exists, it's enabled/disabled state is preserved 604 * but fflags, data and udata fields are overwritten. So we can not 605 * set socket lowats and store user's context pointer in udata. 606 */ 607 if (epoll_register_kevent(td, epfp, fd, EVFILT_READ, 0) != ENOENT || 608 epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, 0) != ENOENT) 609 return (1); 610 611 return (0); 612 } 613 614 static int 615 epoll_delete_all_events(struct thread *td, struct file *epfp, int fd) 616 { 617 int error1, error2; 618 619 error1 = epoll_register_kevent(td, epfp, fd, EVFILT_READ, EV_DELETE); 620 error2 = epoll_register_kevent(td, epfp, fd, EVFILT_WRITE, EV_DELETE); 621 622 /* return 0 if at least one result positive */ 623 return (error1 == 0 ? 0 : error2); 624 } 625 626 #ifdef LINUX_LEGACY_SYSCALLS 627 int 628 linux_eventfd(struct thread *td, struct linux_eventfd_args *args) 629 { 630 struct specialfd_eventfd ae; 631 632 bzero(&ae, sizeof(ae)); 633 ae.initval = args->initval; 634 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 635 } 636 #endif 637 638 int 639 linux_eventfd2(struct thread *td, struct linux_eventfd2_args *args) 640 { 641 struct specialfd_eventfd ae; 642 int flags; 643 644 if ((args->flags & ~(LINUX_O_CLOEXEC | LINUX_O_NONBLOCK | 645 LINUX_EFD_SEMAPHORE)) != 0) 646 return (EINVAL); 647 flags = 0; 648 if ((args->flags & LINUX_O_CLOEXEC) != 0) 649 flags |= EFD_CLOEXEC; 650 if ((args->flags & LINUX_O_NONBLOCK) != 0) 651 flags |= EFD_NONBLOCK; 652 if ((args->flags & LINUX_EFD_SEMAPHORE) != 0) 653 flags |= EFD_SEMAPHORE; 654 655 bzero(&ae, sizeof(ae)); 656 ae.flags = flags; 657 ae.initval = args->initval; 658 return (kern_specialfd(td, SPECIALFD_EVENTFD, &ae)); 659 } 660 661 int 662 linux_timerfd_create(struct thread *td, struct linux_timerfd_create_args *args) 663 { 664 struct timerfd *tfd; 665 struct file *fp; 666 clockid_t clockid; 667 int fflags, fd, error; 668 669 if ((args->flags & ~LINUX_TFD_CREATE_FLAGS) != 0) 670 return (EINVAL); 671 672 error = linux_to_native_clockid(&clockid, args->clockid); 673 if (error != 0) 674 return (error); 675 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 676 return (EINVAL); 677 678 fflags = 0; 679 if ((args->flags & LINUX_TFD_CLOEXEC) != 0) 680 fflags |= O_CLOEXEC; 681 682 error = falloc(td, &fp, &fd, fflags); 683 if (error != 0) 684 return (error); 685 686 tfd = malloc(sizeof(*tfd), M_EPOLL, M_WAITOK | M_ZERO); 687 tfd->tfd_clockid = clockid; 688 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); 689 690 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); 691 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); 692 693 fflags = FREAD; 694 if ((args->flags & LINUX_O_NONBLOCK) != 0) 695 fflags |= FNONBLOCK; 696 697 finit(fp, fflags, DTYPE_LINUXTFD, tfd, &timerfdops); 698 fdrop(fp, td); 699 700 td->td_retval[0] = fd; 701 return (error); 702 } 703 704 static int 705 timerfd_close(struct file *fp, struct thread *td) 706 { 707 struct timerfd *tfd; 708 709 tfd = fp->f_data; 710 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 711 return (EINVAL); 712 713 timespecclear(&tfd->tfd_time.it_value); 714 timespecclear(&tfd->tfd_time.it_interval); 715 716 callout_drain(&tfd->tfd_callout); 717 718 seldrain(&tfd->tfd_sel); 719 knlist_destroy(&tfd->tfd_sel.si_note); 720 721 fp->f_ops = &badfileops; 722 mtx_destroy(&tfd->tfd_lock); 723 free(tfd, M_EPOLL); 724 725 return (0); 726 } 727 728 static int 729 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 730 int flags, struct thread *td) 731 { 732 struct timerfd *tfd; 733 timerfd_t count; 734 int error; 735 736 tfd = fp->f_data; 737 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 738 return (EINVAL); 739 740 if (uio->uio_resid < sizeof(timerfd_t)) 741 return (EINVAL); 742 743 error = 0; 744 mtx_lock(&tfd->tfd_lock); 745 retry: 746 if (tfd->tfd_canceled) { 747 tfd->tfd_count = 0; 748 mtx_unlock(&tfd->tfd_lock); 749 return (ECANCELED); 750 } 751 if (tfd->tfd_count == 0) { 752 if ((fp->f_flag & FNONBLOCK) != 0) { 753 mtx_unlock(&tfd->tfd_lock); 754 return (EAGAIN); 755 } 756 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, PCATCH, "ltfdrd", 0); 757 if (error == 0) 758 goto retry; 759 } 760 if (error == 0) { 761 count = tfd->tfd_count; 762 tfd->tfd_count = 0; 763 mtx_unlock(&tfd->tfd_lock); 764 error = uiomove(&count, sizeof(timerfd_t), uio); 765 } else 766 mtx_unlock(&tfd->tfd_lock); 767 768 return (error); 769 } 770 771 static int 772 timerfd_poll(struct file *fp, int events, struct ucred *active_cred, 773 struct thread *td) 774 { 775 struct timerfd *tfd; 776 int revents = 0; 777 778 tfd = fp->f_data; 779 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 780 return (POLLERR); 781 782 mtx_lock(&tfd->tfd_lock); 783 if ((events & (POLLIN|POLLRDNORM)) && tfd->tfd_count > 0) 784 revents |= events & (POLLIN|POLLRDNORM); 785 if (revents == 0) 786 selrecord(td, &tfd->tfd_sel); 787 mtx_unlock(&tfd->tfd_lock); 788 789 return (revents); 790 } 791 792 static int 793 timerfd_kqfilter(struct file *fp, struct knote *kn) 794 { 795 struct timerfd *tfd; 796 797 tfd = fp->f_data; 798 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) 799 return (EINVAL); 800 801 if (kn->kn_filter == EVFILT_READ) 802 kn->kn_fop = &timerfd_rfiltops; 803 else 804 return (EINVAL); 805 806 kn->kn_hook = tfd; 807 knlist_add(&tfd->tfd_sel.si_note, kn, 0); 808 809 return (0); 810 } 811 812 static void 813 filt_timerfddetach(struct knote *kn) 814 { 815 struct timerfd *tfd = kn->kn_hook; 816 817 mtx_lock(&tfd->tfd_lock); 818 knlist_remove(&tfd->tfd_sel.si_note, kn, 1); 819 mtx_unlock(&tfd->tfd_lock); 820 } 821 822 static int 823 filt_timerfdread(struct knote *kn, long hint) 824 { 825 struct timerfd *tfd = kn->kn_hook; 826 827 return (tfd->tfd_count > 0); 828 } 829 830 static int 831 timerfd_ioctl(struct file *fp, u_long cmd, void *data, 832 struct ucred *active_cred, struct thread *td) 833 { 834 835 if (fp->f_data == NULL || fp->f_type != DTYPE_LINUXTFD) 836 return (EINVAL); 837 838 switch (cmd) { 839 case FIONBIO: 840 case FIOASYNC: 841 return (0); 842 } 843 844 return (ENOTTY); 845 } 846 847 static int 848 timerfd_stat(struct file *fp, struct stat *st, struct ucred *active_cred) 849 { 850 851 return (ENXIO); 852 } 853 854 static int 855 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) 856 { 857 858 kif->kf_type = KF_TYPE_UNKNOWN; 859 return (0); 860 } 861 862 static void 863 linux_timerfd_clocktime(struct timerfd *tfd, struct timespec *ts) 864 { 865 866 if (tfd->tfd_clockid == CLOCK_REALTIME) 867 getnanotime(ts); 868 else /* CLOCK_MONOTONIC */ 869 getnanouptime(ts); 870 } 871 872 static void 873 linux_timerfd_curval(struct timerfd *tfd, struct itimerspec *ots) 874 { 875 struct timespec cts; 876 877 linux_timerfd_clocktime(tfd, &cts); 878 *ots = tfd->tfd_time; 879 if (ots->it_value.tv_sec != 0 || ots->it_value.tv_nsec != 0) { 880 timespecsub(&ots->it_value, &cts, &ots->it_value); 881 if (ots->it_value.tv_sec < 0 || 882 (ots->it_value.tv_sec == 0 && 883 ots->it_value.tv_nsec == 0)) { 884 ots->it_value.tv_sec = 0; 885 ots->it_value.tv_nsec = 1; 886 } 887 } 888 } 889 890 static int 891 linux_timerfd_gettime_common(struct thread *td, int fd, struct itimerspec *ots) 892 { 893 struct timerfd *tfd; 894 struct file *fp; 895 int error; 896 897 error = fget(td, fd, &cap_read_rights, &fp); 898 if (error != 0) 899 return (error); 900 tfd = fp->f_data; 901 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 902 error = EINVAL; 903 goto out; 904 } 905 906 mtx_lock(&tfd->tfd_lock); 907 linux_timerfd_curval(tfd, ots); 908 mtx_unlock(&tfd->tfd_lock); 909 910 out: 911 fdrop(fp, td); 912 return (error); 913 } 914 915 int 916 linux_timerfd_gettime(struct thread *td, struct linux_timerfd_gettime_args *args) 917 { 918 struct l_itimerspec lots; 919 struct itimerspec ots; 920 int error; 921 922 error = linux_timerfd_gettime_common(td, args->fd, &ots); 923 if (error != 0) 924 return (error); 925 error = native_to_linux_itimerspec(&lots, &ots); 926 if (error == 0) 927 error = copyout(&lots, args->old_value, sizeof(lots)); 928 return (error); 929 } 930 931 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 932 int 933 linux_timerfd_gettime64(struct thread *td, struct linux_timerfd_gettime64_args *args) 934 { 935 struct l_itimerspec64 lots; 936 struct itimerspec ots; 937 int error; 938 939 error = linux_timerfd_gettime_common(td, args->fd, &ots); 940 if (error != 0) 941 return (error); 942 error = native_to_linux_itimerspec64(&lots, &ots); 943 if (error == 0) 944 error = copyout(&lots, args->old_value, sizeof(lots)); 945 return (error); 946 } 947 #endif 948 949 static int 950 linux_timerfd_settime_common(struct thread *td, int fd, int flags, 951 struct itimerspec *nts, struct itimerspec *oval) 952 { 953 struct timespec cts, ts; 954 struct timerfd *tfd; 955 struct timeval tv; 956 struct file *fp; 957 int error; 958 959 if ((flags & ~LINUX_TFD_SETTIME_FLAGS) != 0) 960 return (EINVAL); 961 962 error = fget(td, fd, &cap_write_rights, &fp); 963 if (error != 0) 964 return (error); 965 tfd = fp->f_data; 966 if (fp->f_type != DTYPE_LINUXTFD || tfd == NULL) { 967 error = EINVAL; 968 goto out; 969 } 970 971 mtx_lock(&tfd->tfd_lock); 972 if (!timespecisset(&nts->it_value)) 973 timespecclear(&nts->it_interval); 974 if (oval != NULL) 975 linux_timerfd_curval(tfd, oval); 976 977 bcopy(nts, &tfd->tfd_time, sizeof(*nts)); 978 tfd->tfd_count = 0; 979 if (timespecisset(&nts->it_value)) { 980 linux_timerfd_clocktime(tfd, &cts); 981 ts = nts->it_value; 982 if ((flags & LINUX_TFD_TIMER_ABSTIME) == 0) { 983 timespecadd(&tfd->tfd_time.it_value, &cts, 984 &tfd->tfd_time.it_value); 985 } else { 986 timespecsub(&ts, &cts, &ts); 987 } 988 TIMESPEC_TO_TIMEVAL(&tv, &ts); 989 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 990 linux_timerfd_expire, tfd); 991 tfd->tfd_canceled = false; 992 } else { 993 tfd->tfd_canceled = true; 994 callout_stop(&tfd->tfd_callout); 995 } 996 mtx_unlock(&tfd->tfd_lock); 997 998 out: 999 fdrop(fp, td); 1000 return (error); 1001 } 1002 1003 int 1004 linux_timerfd_settime(struct thread *td, struct linux_timerfd_settime_args *args) 1005 { 1006 struct l_itimerspec lots; 1007 struct itimerspec nts, ots, *pots; 1008 int error; 1009 1010 error = copyin(args->new_value, &lots, sizeof(lots)); 1011 if (error != 0) 1012 return (error); 1013 error = linux_to_native_itimerspec(&nts, &lots); 1014 if (error != 0) 1015 return (error); 1016 pots = (args->old_value != NULL ? &ots : NULL); 1017 error = linux_timerfd_settime_common(td, args->fd, args->flags, 1018 &nts, pots); 1019 if (error == 0 && args->old_value != NULL) { 1020 error = native_to_linux_itimerspec(&lots, &ots); 1021 if (error == 0) 1022 error = copyout(&lots, args->old_value, sizeof(lots)); 1023 } 1024 return (error); 1025 } 1026 1027 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1028 int 1029 linux_timerfd_settime64(struct thread *td, struct linux_timerfd_settime64_args *args) 1030 { 1031 struct l_itimerspec64 lots; 1032 struct itimerspec nts, ots, *pots; 1033 int error; 1034 1035 error = copyin(args->new_value, &lots, sizeof(lots)); 1036 if (error != 0) 1037 return (error); 1038 error = linux_to_native_itimerspec64(&nts, &lots); 1039 if (error != 0) 1040 return (error); 1041 pots = (args->old_value != NULL ? &ots : NULL); 1042 error = linux_timerfd_settime_common(td, args->fd, args->flags, 1043 &nts, pots); 1044 if (error == 0 && args->old_value != NULL) { 1045 error = native_to_linux_itimerspec64(&lots, &ots); 1046 if (error == 0) 1047 error = copyout(&lots, args->old_value, sizeof(lots)); 1048 } 1049 return (error); 1050 } 1051 #endif 1052 1053 static void 1054 linux_timerfd_expire(void *arg) 1055 { 1056 struct timespec cts, ts; 1057 struct timeval tv; 1058 struct timerfd *tfd; 1059 1060 tfd = (struct timerfd *)arg; 1061 1062 linux_timerfd_clocktime(tfd, &cts); 1063 if (timespeccmp(&cts, &tfd->tfd_time.it_value, >=)) { 1064 if (timespecisset(&tfd->tfd_time.it_interval)) 1065 timespecadd(&tfd->tfd_time.it_value, 1066 &tfd->tfd_time.it_interval, 1067 &tfd->tfd_time.it_value); 1068 else 1069 /* single shot timer */ 1070 timespecclear(&tfd->tfd_time.it_value); 1071 if (timespecisset(&tfd->tfd_time.it_value)) { 1072 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 1073 TIMESPEC_TO_TIMEVAL(&tv, &ts); 1074 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 1075 linux_timerfd_expire, tfd); 1076 } 1077 tfd->tfd_count++; 1078 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); 1079 selwakeup(&tfd->tfd_sel); 1080 wakeup(&tfd->tfd_count); 1081 } else if (timespecisset(&tfd->tfd_time.it_value)) { 1082 timespecsub(&tfd->tfd_time.it_value, &cts, &ts); 1083 TIMESPEC_TO_TIMEVAL(&tv, &ts); 1084 callout_reset(&tfd->tfd_callout, tvtohz(&tv), 1085 linux_timerfd_expire, tfd); 1086 } 1087 } 1088