1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 5 * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/callout.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/filio.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/mutex.h> 41 #include <sys/poll.h> 42 #include <sys/proc.h> 43 #include <sys/queue.h> 44 #include <sys/selinfo.h> 45 #include <sys/stat.h> 46 #include <sys/sysctl.h> 47 #include <sys/sysent.h> 48 #include <sys/sysproto.h> 49 #include <sys/timerfd.h> 50 #include <sys/timespec.h> 51 #include <sys/uio.h> 52 #include <sys/user.h> 53 54 #include <security/audit/audit.h> 55 56 #ifdef COMPAT_FREEBSD32 57 #include <compat/freebsd32/freebsd32.h> 58 #include <compat/freebsd32/freebsd32_proto.h> 59 #endif 60 61 static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures"); 62 static LIST_HEAD(, timerfd) timerfd_head; 63 static struct unrhdr64 tfdino_unr; 64 65 #define TFD_NOJUMP 0 /* Realtime clock has not jumped. */ 66 #define TFD_READ 1 /* Jumped, tfd has been read since. */ 67 #define TFD_ZREAD 2 /* Jumped backwards, CANCEL_ON_SET=false. */ 68 #define TFD_CANCELED 4 /* Jumped, CANCEL_ON_SET=true. */ 69 #define TFD_JUMPED (TFD_ZREAD | TFD_CANCELED) 70 71 struct timerfd { 72 /* User specified. */ 73 struct itimerspec tfd_time; /* tfd timer */ 74 clockid_t tfd_clockid; /* timing base */ 75 int tfd_flags; /* creation flags */ 76 int tfd_timflags; /* timer flags */ 77 78 /* Used internally. */ 79 timerfd_t tfd_count; /* expiration count since last read */ 80 bool tfd_expired; /* true upon initial expiration */ 81 struct mtx tfd_lock; /* mtx lock */ 82 struct callout tfd_callout; /* expiration notification */ 83 struct selinfo tfd_sel; /* I/O alerts */ 84 struct timespec tfd_boottim; /* cached boottime */ 85 int tfd_jumped; /* timer jump status */ 86 LIST_ENTRY(timerfd) entry; /* entry in list */ 87 88 /* For stat(2). */ 89 ino_t tfd_ino; /* inode number */ 90 struct timespec tfd_atim; /* time of last read */ 91 struct timespec tfd_mtim; /* time of last settime */ 92 struct timespec tfd_birthtim; /* creation time */ 93 }; 94 95 static void 96 timerfd_init(void *data) 97 { 98 new_unrhdr64(&tfdino_unr, 1); 99 } 100 101 SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL); 102 103 static inline void 104 timerfd_getboottime(struct timespec *ts) 105 { 106 struct timeval tv; 107 getboottime(&tv); 108 TIMEVAL_TO_TIMESPEC(&tv, ts); 109 } 110 111 /* 112 * Call when a discontinuous jump has occured in CLOCK_REALTIME and 113 * update timerfd's cached boottime. A jump can be triggered using 114 * functions like clock_settime(2) or settimeofday(2). 115 * 116 * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set 117 * and the realtime clock jumps. 118 * Timer is marked TFD_ZREAD if TFD_TIMER_CANCEL_ON_SET is not set, 119 * but the realtime clock jumps backwards. 120 */ 121 void 122 timerfd_jumped(void) 123 { 124 struct timerfd *tfd; 125 struct timespec boottime, diff; 126 127 timerfd_getboottime(&boottime); 128 LIST_FOREACH(tfd, &timerfd_head, entry) { 129 mtx_lock(&tfd->tfd_lock); 130 if (tfd->tfd_clockid != CLOCK_REALTIME || 131 (tfd->tfd_timflags & TFD_TIMER_ABSTIME) == 0 || 132 timespeccmp(&boottime, &tfd->tfd_boottim, ==)) { 133 mtx_unlock(&tfd->tfd_lock); 134 continue; 135 } 136 137 if (callout_active(&tfd->tfd_callout)) { 138 if ((tfd->tfd_timflags & TFD_TIMER_CANCEL_ON_SET) != 0) 139 tfd->tfd_jumped = TFD_CANCELED; 140 else if (timespeccmp(&boottime, &tfd->tfd_boottim, <)) 141 tfd->tfd_jumped = TFD_ZREAD; 142 143 /* 144 * Do not reschedule callout when 145 * inside interval time loop. 146 */ 147 if (!tfd->tfd_expired) { 148 timespecsub(&boottime, 149 &tfd->tfd_boottim, &diff); 150 timespecsub(&tfd->tfd_time.it_value, 151 &diff, &tfd->tfd_time.it_value); 152 if (callout_stop(&tfd->tfd_callout) == 1) { 153 callout_schedule_sbt(&tfd->tfd_callout, 154 tstosbt(tfd->tfd_time.it_value), 155 0, C_ABSOLUTE); 156 } 157 } 158 } 159 160 tfd->tfd_boottim = boottime; 161 mtx_unlock(&tfd->tfd_lock); 162 } 163 } 164 165 static int 166 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 167 int flags, struct thread *td) 168 { 169 struct timerfd *tfd = fp->f_data; 170 timerfd_t count; 171 int error = 0; 172 173 if (uio->uio_resid < sizeof(timerfd_t)) 174 return (EINVAL); 175 176 mtx_lock(&tfd->tfd_lock); 177 retry: 178 getnanotime(&tfd->tfd_atim); 179 if ((tfd->tfd_jumped & TFD_JUMPED) != 0) { 180 if (tfd->tfd_jumped == TFD_CANCELED) 181 error = ECANCELED; 182 tfd->tfd_jumped = TFD_READ; 183 tfd->tfd_count = 0; 184 mtx_unlock(&tfd->tfd_lock); 185 return (error); 186 } else { 187 tfd->tfd_jumped = TFD_NOJUMP; 188 } 189 if (tfd->tfd_count == 0) { 190 if ((fp->f_flag & FNONBLOCK) != 0) { 191 mtx_unlock(&tfd->tfd_lock); 192 return (EAGAIN); 193 } 194 td->td_rtcgen = atomic_load_acq_int(&rtc_generation); 195 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, 196 PCATCH, "tfdrd", 0); 197 if (error == 0) { 198 goto retry; 199 } else { 200 mtx_unlock(&tfd->tfd_lock); 201 return (error); 202 } 203 } 204 205 count = tfd->tfd_count; 206 tfd->tfd_count = 0; 207 mtx_unlock(&tfd->tfd_lock); 208 error = uiomove(&count, sizeof(timerfd_t), uio); 209 210 return (error); 211 } 212 213 static int 214 timerfd_ioctl(struct file *fp, u_long cmd, void *data, 215 struct ucred *active_cred, struct thread *td) 216 { 217 switch (cmd) { 218 case FIOASYNC: 219 if (*(int *)data != 0) 220 atomic_set_int(&fp->f_flag, FASYNC); 221 else 222 atomic_clear_int(&fp->f_flag, FASYNC); 223 return (0); 224 case FIONBIO: 225 if (*(int *)data != 0) 226 atomic_set_int(&fp->f_flag, FNONBLOCK); 227 else 228 atomic_clear_int(&fp->f_flag, FNONBLOCK); 229 return (0); 230 } 231 return (ENOTTY); 232 } 233 234 static int 235 timerfd_poll(struct file *fp, int events, struct ucred *active_cred, 236 struct thread *td) 237 { 238 struct timerfd *tfd = fp->f_data; 239 int revents = 0; 240 241 mtx_lock(&tfd->tfd_lock); 242 if ((events & (POLLIN | POLLRDNORM)) != 0 && 243 tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ) 244 revents |= events & (POLLIN | POLLRDNORM); 245 if (revents == 0) 246 selrecord(td, &tfd->tfd_sel); 247 mtx_unlock(&tfd->tfd_lock); 248 249 return (revents); 250 } 251 252 static void 253 filt_timerfddetach(struct knote *kn) 254 { 255 struct timerfd *tfd = kn->kn_hook; 256 257 mtx_lock(&tfd->tfd_lock); 258 knlist_remove(&tfd->tfd_sel.si_note, kn, 1); 259 mtx_unlock(&tfd->tfd_lock); 260 } 261 262 static int 263 filt_timerfdread(struct knote *kn, long hint) 264 { 265 struct timerfd *tfd = kn->kn_hook; 266 267 return (tfd->tfd_count > 0); 268 } 269 270 static struct filterops timerfd_rfiltops = { 271 .f_isfd = 1, 272 .f_detach = filt_timerfddetach, 273 .f_event = filt_timerfdread, 274 }; 275 276 static int 277 timerfd_kqfilter(struct file *fp, struct knote *kn) 278 { 279 struct timerfd *tfd = fp->f_data; 280 281 if (kn->kn_filter != EVFILT_READ) 282 return (EINVAL); 283 284 kn->kn_fop = &timerfd_rfiltops; 285 kn->kn_hook = tfd; 286 knlist_add(&tfd->tfd_sel.si_note, kn, 0); 287 288 return (0); 289 } 290 291 static int 292 timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) 293 { 294 struct timerfd *tfd = fp->f_data; 295 296 bzero(sb, sizeof(*sb)); 297 sb->st_nlink = fp->f_count - 1; 298 sb->st_uid = fp->f_cred->cr_uid; 299 sb->st_gid = fp->f_cred->cr_gid; 300 sb->st_blksize = PAGE_SIZE; 301 302 mtx_lock(&tfd->tfd_lock); 303 sb->st_ino = tfd->tfd_ino; 304 sb->st_atim = tfd->tfd_atim; 305 sb->st_mtim = tfd->tfd_mtim; 306 sb->st_birthtim = tfd->tfd_birthtim; 307 mtx_unlock(&tfd->tfd_lock); 308 309 return (0); 310 } 311 312 static int 313 timerfd_close(struct file *fp, struct thread *td) 314 { 315 struct timerfd *tfd = fp->f_data; 316 317 callout_drain(&tfd->tfd_callout); 318 seldrain(&tfd->tfd_sel); 319 knlist_destroy(&tfd->tfd_sel.si_note); 320 mtx_destroy(&tfd->tfd_lock); 321 LIST_REMOVE(tfd, entry); 322 free(tfd, M_TIMERFD); 323 fp->f_ops = &badfileops; 324 325 return (0); 326 } 327 328 static int 329 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, 330 struct filedesc *fdp) 331 { 332 333 struct timerfd *tfd = fp->f_data; 334 335 kif->kf_type = KF_TYPE_TIMERFD; 336 mtx_lock(&tfd->tfd_lock); 337 kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid; 338 kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags; 339 kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd; 340 mtx_unlock(&tfd->tfd_lock); 341 342 return (0); 343 } 344 345 static struct fileops timerfdops = { 346 .fo_read = timerfd_read, 347 .fo_write = invfo_rdwr, 348 .fo_truncate = invfo_truncate, 349 .fo_ioctl = timerfd_ioctl, 350 .fo_poll = timerfd_poll, 351 .fo_kqfilter = timerfd_kqfilter, 352 .fo_stat = timerfd_stat, 353 .fo_close = timerfd_close, 354 .fo_chmod = invfo_chmod, 355 .fo_chown = invfo_chown, 356 .fo_sendfile = invfo_sendfile, 357 .fo_fill_kinfo = timerfd_fill_kinfo, 358 .fo_flags = DFLAG_PASSABLE, 359 }; 360 361 static void 362 timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value) 363 { 364 struct timespec curr_value; 365 366 *old_value = tfd->tfd_time; 367 if (timespecisset(&tfd->tfd_time.it_value)) { 368 nanouptime(&curr_value); 369 timespecsub(&tfd->tfd_time.it_value, &curr_value, 370 &old_value->it_value); 371 } 372 } 373 374 static void 375 timerfd_expire(void *arg) 376 { 377 struct timerfd *tfd = (struct timerfd *)arg; 378 struct timespec uptime; 379 380 ++tfd->tfd_count; 381 tfd->tfd_expired = true; 382 if (timespecisset(&tfd->tfd_time.it_interval)) { 383 /* Count missed events. */ 384 nanouptime(&uptime); 385 if (timespeccmp(&uptime, &tfd->tfd_time.it_value, >)) { 386 timespecsub(&uptime, &tfd->tfd_time.it_value, &uptime); 387 tfd->tfd_count += tstosbt(uptime) / 388 tstosbt(tfd->tfd_time.it_interval); 389 } 390 timespecadd(&tfd->tfd_time.it_value, 391 &tfd->tfd_time.it_interval, &tfd->tfd_time.it_value); 392 callout_schedule_sbt(&tfd->tfd_callout, 393 tstosbt(tfd->tfd_time.it_value), 394 0, C_ABSOLUTE); 395 } else { 396 /* Single shot timer. */ 397 callout_deactivate(&tfd->tfd_callout); 398 timespecclear(&tfd->tfd_time.it_value); 399 } 400 401 wakeup(&tfd->tfd_count); 402 selwakeup(&tfd->tfd_sel); 403 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); 404 } 405 406 int 407 kern_timerfd_create(struct thread *td, int clockid, int flags) 408 { 409 struct file *fp; 410 struct timerfd *tfd; 411 int error, fd, fflags = 0; 412 413 AUDIT_ARG_VALUE(clockid); 414 AUDIT_ARG_FFLAGS(flags); 415 416 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 417 return (EINVAL); 418 if ((flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) != 0) 419 return (EINVAL); 420 if ((flags & TFD_CLOEXEC) != 0) 421 fflags |= O_CLOEXEC; 422 423 tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO); 424 if (tfd == NULL) 425 return (ENOMEM); 426 tfd->tfd_clockid = (clockid_t)clockid; 427 tfd->tfd_flags = flags; 428 tfd->tfd_ino = alloc_unr64(&tfdino_unr); 429 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); 430 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); 431 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); 432 timerfd_getboottime(&tfd->tfd_boottim); 433 getnanotime(&tfd->tfd_birthtim); 434 LIST_INSERT_HEAD(&timerfd_head, tfd, entry); 435 436 error = falloc(td, &fp, &fd, fflags); 437 if (error != 0) 438 return (error); 439 fflags = FREAD; 440 if ((flags & TFD_NONBLOCK) != 0) 441 fflags |= FNONBLOCK; 442 443 finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops); 444 fdrop(fp, td); 445 446 td->td_retval[0] = fd; 447 return (0); 448 } 449 450 int 451 kern_timerfd_gettime(struct thread *td, int fd, struct itimerspec *curr_value) 452 { 453 struct file *fp; 454 struct timerfd *tfd; 455 int error; 456 457 error = fget(td, fd, &cap_write_rights, &fp); 458 if (error != 0) 459 return (error); 460 tfd = fp->f_data; 461 if (tfd == NULL || fp->f_type != DTYPE_TIMERFD) { 462 fdrop(fp, td); 463 return (EINVAL); 464 } 465 466 mtx_lock(&tfd->tfd_lock); 467 timerfd_curval(tfd, curr_value); 468 mtx_unlock(&tfd->tfd_lock); 469 470 fdrop(fp, td); 471 return (0); 472 } 473 474 int 475 kern_timerfd_settime(struct thread *td, int fd, int flags, 476 const struct itimerspec *new_value, struct itimerspec *old_value) 477 { 478 struct file *fp; 479 struct timerfd *tfd; 480 struct timespec ts; 481 int error = 0; 482 483 if ((flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) != 0) 484 return (EINVAL); 485 if (!timespecvalid_interval(&new_value->it_value) || 486 !timespecvalid_interval(&new_value->it_interval)) 487 return (EINVAL); 488 489 error = fget(td, fd, &cap_write_rights, &fp); 490 if (error != 0) 491 return (error); 492 tfd = fp->f_data; 493 if (tfd == NULL || fp->f_type != DTYPE_TIMERFD) { 494 fdrop(fp, td); 495 return (EINVAL); 496 } 497 498 mtx_lock(&tfd->tfd_lock); 499 getnanotime(&tfd->tfd_mtim); 500 tfd->tfd_timflags = flags; 501 502 /* Store old itimerspec, if applicable. */ 503 if (old_value != NULL) 504 timerfd_curval(tfd, old_value); 505 506 /* Set new expiration. */ 507 tfd->tfd_time = *new_value; 508 if (timespecisset(&tfd->tfd_time.it_value)) { 509 if ((flags & TFD_TIMER_ABSTIME) == 0) { 510 nanouptime(&ts); 511 timespecadd(&tfd->tfd_time.it_value, &ts, 512 &tfd->tfd_time.it_value); 513 } else if (tfd->tfd_clockid == CLOCK_REALTIME) { 514 /* ECANCELED if unread jump is pending. */ 515 if (tfd->tfd_jumped == TFD_CANCELED) 516 error = ECANCELED; 517 /* Convert from CLOCK_REALTIME to CLOCK_BOOTTIME. */ 518 timespecsub(&tfd->tfd_time.it_value, &tfd->tfd_boottim, 519 &tfd->tfd_time.it_value); 520 } 521 callout_reset_sbt(&tfd->tfd_callout, 522 tstosbt(tfd->tfd_time.it_value), 523 0, timerfd_expire, tfd, C_ABSOLUTE); 524 } else { 525 callout_stop(&tfd->tfd_callout); 526 } 527 tfd->tfd_count = 0; 528 tfd->tfd_expired = false; 529 tfd->tfd_jumped = TFD_NOJUMP; 530 mtx_unlock(&tfd->tfd_lock); 531 532 fdrop(fp, td); 533 return (error); 534 } 535 536 int 537 sys_timerfd_create(struct thread *td, struct timerfd_create_args *uap) 538 { 539 return (kern_timerfd_create(td, uap->clockid, uap->flags)); 540 } 541 542 int 543 sys_timerfd_gettime(struct thread *td, struct timerfd_gettime_args *uap) 544 { 545 struct itimerspec curr_value; 546 int error; 547 548 error = kern_timerfd_gettime(td, uap->fd, &curr_value); 549 if (error == 0) 550 error = copyout(&curr_value, uap->curr_value, 551 sizeof(curr_value)); 552 553 return (error); 554 } 555 556 int 557 sys_timerfd_settime(struct thread *td, struct timerfd_settime_args *uap) 558 { 559 struct itimerspec new_value, old_value; 560 int error; 561 562 error = copyin(uap->new_value, &new_value, sizeof(new_value)); 563 if (error != 0) 564 return (error); 565 if (uap->old_value == NULL) { 566 error = kern_timerfd_settime(td, uap->fd, uap->flags, 567 &new_value, NULL); 568 } else { 569 error = kern_timerfd_settime(td, uap->fd, uap->flags, 570 &new_value, &old_value); 571 if (error == 0) 572 error = copyout(&old_value, uap->old_value, 573 sizeof(old_value)); 574 } 575 return (error); 576 } 577 578 #ifdef COMPAT_FREEBSD32 579 int 580 freebsd32_timerfd_gettime(struct thread *td, 581 struct freebsd32_timerfd_gettime_args *uap) 582 { 583 struct itimerspec curr_value; 584 struct itimerspec32 curr_value32; 585 int error; 586 587 error = kern_timerfd_gettime(td, uap->fd, &curr_value); 588 if (error == 0) { 589 CP(curr_value, curr_value32, it_value.tv_sec); 590 CP(curr_value, curr_value32, it_value.tv_nsec); 591 CP(curr_value, curr_value32, it_interval.tv_sec); 592 CP(curr_value, curr_value32, it_interval.tv_nsec); 593 error = copyout(&curr_value32, uap->curr_value, 594 sizeof(curr_value32)); 595 } 596 597 return (error); 598 } 599 600 int 601 freebsd32_timerfd_settime(struct thread *td, 602 struct freebsd32_timerfd_settime_args *uap) 603 { 604 struct itimerspec new_value, old_value; 605 struct itimerspec32 new_value32, old_value32; 606 int error; 607 608 error = copyin(uap->new_value, &new_value32, sizeof(new_value32)); 609 if (error != 0) 610 return (error); 611 CP(new_value32, new_value, it_value.tv_sec); 612 CP(new_value32, new_value, it_value.tv_nsec); 613 CP(new_value32, new_value, it_interval.tv_sec); 614 CP(new_value32, new_value, it_interval.tv_nsec); 615 if (uap->old_value == NULL) { 616 error = kern_timerfd_settime(td, uap->fd, uap->flags, 617 &new_value, NULL); 618 } else { 619 error = kern_timerfd_settime(td, uap->fd, uap->flags, 620 &new_value, &old_value); 621 if (error == 0) { 622 CP(old_value, old_value32, it_value.tv_sec); 623 CP(old_value, old_value32, it_value.tv_nsec); 624 CP(old_value, old_value32, it_interval.tv_sec); 625 CP(old_value, old_value32, it_interval.tv_nsec); 626 error = copyout(&old_value32, uap->old_value, 627 sizeof(old_value32)); 628 } 629 } 630 return (error); 631 } 632 #endif 633