1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 5 * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/callout.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/filio.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/mutex.h> 41 #include <sys/poll.h> 42 #include <sys/proc.h> 43 #include <sys/queue.h> 44 #include <sys/selinfo.h> 45 #include <sys/stat.h> 46 #include <sys/sx.h> 47 #include <sys/syscallsubr.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysent.h> 50 #include <sys/sysproto.h> 51 #include <sys/timerfd.h> 52 #include <sys/timespec.h> 53 #include <sys/uio.h> 54 #include <sys/user.h> 55 56 #include <security/audit/audit.h> 57 58 static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures"); 59 60 static struct mtx timerfd_list_lock; 61 static LIST_HEAD(, timerfd) timerfd_list; 62 MTX_SYSINIT(timerfd, &timerfd_list_lock, "timerfd_list_lock", MTX_DEF); 63 64 static struct unrhdr64 tfdino_unr; 65 66 #define TFD_NOJUMP 0 /* Realtime clock has not jumped. */ 67 #define TFD_READ 1 /* Jumped, tfd has been read since. */ 68 #define TFD_ZREAD 2 /* Jumped backwards, CANCEL_ON_SET=false. */ 69 #define TFD_CANCELED 4 /* Jumped, CANCEL_ON_SET=true. */ 70 #define TFD_JUMPED (TFD_ZREAD | TFD_CANCELED) 71 72 /* 73 * One structure allocated per timerfd descriptor. 74 * 75 * Locking semantics: 76 * (t) locked by tfd_lock mtx 77 * (l) locked by timerfd_list_lock sx 78 * (c) const until freeing 79 */ 80 struct timerfd { 81 /* User specified. */ 82 struct itimerspec tfd_time; /* (t) tfd timer */ 83 clockid_t tfd_clockid; /* (c) timing base */ 84 int tfd_flags; /* (c) creation flags */ 85 int tfd_timflags; /* (t) timer flags */ 86 87 /* Used internally. */ 88 timerfd_t tfd_count; /* (t) expiration count since read */ 89 bool tfd_expired; /* (t) true upon initial expiration */ 90 struct mtx tfd_lock; /* tfd mtx lock */ 91 struct callout tfd_callout; /* (t) expiration notification */ 92 struct selinfo tfd_sel; /* (t) I/O alerts */ 93 struct timespec tfd_boottim; /* (t) cached boottime */ 94 int tfd_jumped; /* (t) timer jump status */ 95 LIST_ENTRY(timerfd) entry; /* (l) entry in list */ 96 97 /* For stat(2). */ 98 ino_t tfd_ino; /* (c) inode number */ 99 struct timespec tfd_atim; /* (t) time of last read */ 100 struct timespec tfd_mtim; /* (t) time of last settime */ 101 struct timespec tfd_birthtim; /* (c) creation time */ 102 }; 103 104 static void 105 timerfd_init(void *data) 106 { 107 new_unrhdr64(&tfdino_unr, 1); 108 } 109 110 SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL); 111 112 static inline void 113 timerfd_getboottime(struct timespec *ts) 114 { 115 struct timeval tv; 116 117 getboottime(&tv); 118 TIMEVAL_TO_TIMESPEC(&tv, ts); 119 } 120 121 static void 122 timerfd_wakeup(struct timerfd *tfd) 123 { 124 wakeup(&tfd->tfd_count); 125 selwakeup(&tfd->tfd_sel); 126 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); 127 } 128 129 /* 130 * Call when a discontinuous jump has occured in CLOCK_REALTIME and 131 * update timerfd's cached boottime. A jump can be triggered using 132 * functions like clock_settime(2) or settimeofday(2). 133 * 134 * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set 135 * and the realtime clock jumps. 136 * Timer is marked TFD_ZREAD if TFD_TIMER_CANCEL_ON_SET is not set, 137 * but the realtime clock jumps backwards. 138 */ 139 void 140 timerfd_jumped(void) 141 { 142 struct timerfd *tfd; 143 struct timespec boottime, diff; 144 145 if (LIST_EMPTY(&timerfd_list)) 146 return; 147 148 timerfd_getboottime(&boottime); 149 mtx_lock(&timerfd_list_lock); 150 LIST_FOREACH(tfd, &timerfd_list, entry) { 151 mtx_lock(&tfd->tfd_lock); 152 if (tfd->tfd_clockid != CLOCK_REALTIME || 153 (tfd->tfd_timflags & TFD_TIMER_ABSTIME) == 0 || 154 timespeccmp(&boottime, &tfd->tfd_boottim, ==)) { 155 mtx_unlock(&tfd->tfd_lock); 156 continue; 157 } 158 159 if (callout_active(&tfd->tfd_callout)) { 160 if ((tfd->tfd_timflags & TFD_TIMER_CANCEL_ON_SET) != 0) 161 tfd->tfd_jumped = TFD_CANCELED; 162 else if (timespeccmp(&boottime, &tfd->tfd_boottim, <)) 163 tfd->tfd_jumped = TFD_ZREAD; 164 165 /* 166 * Do not reschedule callout when 167 * inside interval time loop. 168 */ 169 if (!tfd->tfd_expired) { 170 timespecsub(&boottime, 171 &tfd->tfd_boottim, &diff); 172 timespecsub(&tfd->tfd_time.it_value, 173 &diff, &tfd->tfd_time.it_value); 174 if (callout_stop(&tfd->tfd_callout) == 1) { 175 callout_schedule_sbt(&tfd->tfd_callout, 176 tstosbt_sat(tfd->tfd_time.it_value), 177 0, C_ABSOLUTE); 178 } 179 } 180 } 181 182 tfd->tfd_boottim = boottime; 183 if ((tfd->tfd_jumped & TFD_JUMPED) != 0) 184 timerfd_wakeup(tfd); 185 mtx_unlock(&tfd->tfd_lock); 186 } 187 mtx_unlock(&timerfd_list_lock); 188 } 189 190 static int 191 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 192 int flags, struct thread *td) 193 { 194 struct timerfd *tfd = fp->f_data; 195 timerfd_t count; 196 int error = 0; 197 198 if (uio->uio_resid < sizeof(timerfd_t)) 199 return (EINVAL); 200 201 mtx_lock(&tfd->tfd_lock); 202 retry: 203 getnanotime(&tfd->tfd_atim); 204 if ((tfd->tfd_jumped & TFD_JUMPED) != 0) { 205 if (tfd->tfd_jumped == TFD_CANCELED) 206 error = ECANCELED; 207 tfd->tfd_jumped = TFD_READ; 208 tfd->tfd_count = 0; 209 mtx_unlock(&tfd->tfd_lock); 210 return (error); 211 } else { 212 tfd->tfd_jumped = TFD_NOJUMP; 213 } 214 if (tfd->tfd_count == 0) { 215 if ((fp->f_flag & FNONBLOCK) != 0) { 216 mtx_unlock(&tfd->tfd_lock); 217 return (EAGAIN); 218 } 219 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, 220 PCATCH, "tfdrd", 0); 221 if (error == 0) { 222 goto retry; 223 } else { 224 mtx_unlock(&tfd->tfd_lock); 225 return (error); 226 } 227 } 228 229 count = tfd->tfd_count; 230 tfd->tfd_count = 0; 231 mtx_unlock(&tfd->tfd_lock); 232 error = uiomove(&count, sizeof(timerfd_t), uio); 233 234 return (error); 235 } 236 237 static int 238 timerfd_ioctl(struct file *fp, u_long cmd, void *data, 239 struct ucred *active_cred, struct thread *td) 240 { 241 switch (cmd) { 242 case FIOASYNC: 243 if (*(int *)data != 0) 244 atomic_set_int(&fp->f_flag, FASYNC); 245 else 246 atomic_clear_int(&fp->f_flag, FASYNC); 247 return (0); 248 case FIONBIO: 249 if (*(int *)data != 0) 250 atomic_set_int(&fp->f_flag, FNONBLOCK); 251 else 252 atomic_clear_int(&fp->f_flag, FNONBLOCK); 253 return (0); 254 } 255 return (ENOTTY); 256 } 257 258 static int 259 timerfd_poll(struct file *fp, int events, struct ucred *active_cred, 260 struct thread *td) 261 { 262 struct timerfd *tfd = fp->f_data; 263 int revents = 0; 264 265 mtx_lock(&tfd->tfd_lock); 266 if ((events & (POLLIN | POLLRDNORM)) != 0 && 267 tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ) 268 revents |= events & (POLLIN | POLLRDNORM); 269 if (revents == 0) 270 selrecord(td, &tfd->tfd_sel); 271 mtx_unlock(&tfd->tfd_lock); 272 273 return (revents); 274 } 275 276 static void 277 filt_timerfddetach(struct knote *kn) 278 { 279 struct timerfd *tfd = kn->kn_hook; 280 281 mtx_lock(&tfd->tfd_lock); 282 knlist_remove(&tfd->tfd_sel.si_note, kn, 1); 283 mtx_unlock(&tfd->tfd_lock); 284 } 285 286 static int 287 filt_timerfdread(struct knote *kn, long hint) 288 { 289 struct timerfd *tfd = kn->kn_hook; 290 291 mtx_assert(&tfd->tfd_lock, MA_OWNED); 292 kn->kn_data = (int64_t)tfd->tfd_count; 293 return (tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ); 294 } 295 296 static const struct filterops timerfd_rfiltops = { 297 .f_isfd = 1, 298 .f_detach = filt_timerfddetach, 299 .f_event = filt_timerfdread, 300 }; 301 302 static int 303 timerfd_kqfilter(struct file *fp, struct knote *kn) 304 { 305 struct timerfd *tfd = fp->f_data; 306 307 if (kn->kn_filter != EVFILT_READ) 308 return (EINVAL); 309 310 kn->kn_fop = &timerfd_rfiltops; 311 kn->kn_hook = tfd; 312 knlist_add(&tfd->tfd_sel.si_note, kn, 0); 313 314 return (0); 315 } 316 317 static int 318 timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) 319 { 320 struct timerfd *tfd = fp->f_data; 321 322 bzero(sb, sizeof(*sb)); 323 sb->st_nlink = fp->f_count - 1; 324 sb->st_uid = fp->f_cred->cr_uid; 325 sb->st_gid = fp->f_cred->cr_gid; 326 sb->st_blksize = PAGE_SIZE; 327 mtx_lock(&tfd->tfd_lock); 328 sb->st_atim = tfd->tfd_atim; 329 sb->st_mtim = tfd->tfd_mtim; 330 mtx_unlock(&tfd->tfd_lock); 331 sb->st_ctim = sb->st_mtim; 332 sb->st_ino = tfd->tfd_ino; 333 sb->st_birthtim = tfd->tfd_birthtim; 334 335 return (0); 336 } 337 338 static int 339 timerfd_close(struct file *fp, struct thread *td) 340 { 341 struct timerfd *tfd = fp->f_data; 342 343 mtx_lock(&timerfd_list_lock); 344 LIST_REMOVE(tfd, entry); 345 mtx_unlock(&timerfd_list_lock); 346 347 callout_drain(&tfd->tfd_callout); 348 seldrain(&tfd->tfd_sel); 349 knlist_destroy(&tfd->tfd_sel.si_note); 350 mtx_destroy(&tfd->tfd_lock); 351 free(tfd, M_TIMERFD); 352 fp->f_ops = &badfileops; 353 354 return (0); 355 } 356 357 static int 358 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, 359 struct filedesc *fdp) 360 { 361 struct timerfd *tfd = fp->f_data; 362 363 kif->kf_type = KF_TYPE_TIMERFD; 364 kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid; 365 kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags; 366 kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd; 367 368 return (0); 369 } 370 371 static const struct fileops timerfdops = { 372 .fo_read = timerfd_read, 373 .fo_write = invfo_rdwr, 374 .fo_truncate = invfo_truncate, 375 .fo_ioctl = timerfd_ioctl, 376 .fo_poll = timerfd_poll, 377 .fo_kqfilter = timerfd_kqfilter, 378 .fo_stat = timerfd_stat, 379 .fo_close = timerfd_close, 380 .fo_chmod = invfo_chmod, 381 .fo_chown = invfo_chown, 382 .fo_sendfile = invfo_sendfile, 383 .fo_fill_kinfo = timerfd_fill_kinfo, 384 .fo_cmp = file_kcmp_generic, 385 .fo_flags = DFLAG_PASSABLE, 386 }; 387 388 static void 389 timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value) 390 { 391 struct timespec curr_value; 392 393 mtx_assert(&tfd->tfd_lock, MA_OWNED); 394 *old_value = tfd->tfd_time; 395 if (timespecisset(&tfd->tfd_time.it_value)) { 396 nanouptime(&curr_value); 397 timespecsub(&tfd->tfd_time.it_value, &curr_value, 398 &old_value->it_value); 399 } 400 } 401 402 static void 403 timerfd_expire(void *arg) 404 { 405 struct timerfd *tfd = (struct timerfd *)arg; 406 sbintime_t exp, interval, now, next, diff; 407 408 ++tfd->tfd_count; 409 tfd->tfd_expired = true; 410 if (timespecisset(&tfd->tfd_time.it_interval)) { 411 exp = tstosbt_sat(tfd->tfd_time.it_value); 412 interval = tstosbt_sat(tfd->tfd_time.it_interval); 413 now = sbinuptime(); 414 next = now > SBT_MAX - interval ? SBT_MAX : now + interval; 415 416 /* Count missed events. */ 417 if (now > exp) { 418 diff = now - exp; 419 tfd->tfd_count += diff / interval; 420 next -= diff % interval; 421 } 422 423 callout_schedule_sbt(&tfd->tfd_callout, next, 0, C_ABSOLUTE); 424 tfd->tfd_time.it_value = sbttots(next); 425 } else { 426 /* Single shot timer. */ 427 callout_deactivate(&tfd->tfd_callout); 428 timespecclear(&tfd->tfd_time.it_value); 429 } 430 431 timerfd_wakeup(tfd); 432 } 433 434 int 435 kern_timerfd_create(struct thread *td, int clockid, int flags) 436 { 437 struct file *fp; 438 struct timerfd *tfd; 439 int error, fd, fflags; 440 441 AUDIT_ARG_VALUE(clockid); 442 AUDIT_ARG_FFLAGS(flags); 443 444 switch (clockid) { 445 case CLOCK_REALTIME: 446 /* FALLTHROUGH */ 447 case CLOCK_MONOTONIC: 448 /* FALLTHROUGH */ 449 case CLOCK_UPTIME: 450 /* 451 * CLOCK_BOOTTIME should be added once different from 452 * CLOCK_UPTIME 453 */ 454 break; 455 default: 456 return (EINVAL); 457 } 458 if ((flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) != 0) 459 return (EINVAL); 460 461 fflags = FREAD; 462 if ((flags & TFD_CLOEXEC) != 0) 463 fflags |= O_CLOEXEC; 464 if ((flags & TFD_NONBLOCK) != 0) 465 fflags |= FNONBLOCK; 466 467 error = falloc(td, &fp, &fd, fflags); 468 if (error != 0) 469 return (error); 470 471 tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO); 472 tfd->tfd_clockid = (clockid_t)clockid; 473 tfd->tfd_flags = flags; 474 tfd->tfd_ino = alloc_unr64(&tfdino_unr); 475 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); 476 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); 477 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); 478 timerfd_getboottime(&tfd->tfd_boottim); 479 getnanotime(&tfd->tfd_birthtim); 480 mtx_lock(&timerfd_list_lock); 481 LIST_INSERT_HEAD(&timerfd_list, tfd, entry); 482 mtx_unlock(&timerfd_list_lock); 483 484 finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops); 485 486 fdrop(fp, td); 487 488 td->td_retval[0] = fd; 489 return (0); 490 } 491 492 int 493 kern_timerfd_gettime(struct thread *td, int fd, struct itimerspec *curr_value) 494 { 495 struct file *fp; 496 struct timerfd *tfd; 497 int error; 498 499 error = fget(td, fd, &cap_write_rights, &fp); 500 if (error != 0) 501 return (error); 502 if (fp->f_type != DTYPE_TIMERFD) { 503 fdrop(fp, td); 504 return (EINVAL); 505 } 506 tfd = fp->f_data; 507 508 mtx_lock(&tfd->tfd_lock); 509 timerfd_curval(tfd, curr_value); 510 mtx_unlock(&tfd->tfd_lock); 511 512 fdrop(fp, td); 513 return (0); 514 } 515 516 int 517 kern_timerfd_settime(struct thread *td, int fd, int flags, 518 const struct itimerspec *new_value, struct itimerspec *old_value) 519 { 520 struct file *fp; 521 struct timerfd *tfd; 522 struct timespec ts; 523 int error = 0; 524 525 if ((flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) != 0) 526 return (EINVAL); 527 if (!timespecvalid_interval(&new_value->it_value) || 528 !timespecvalid_interval(&new_value->it_interval)) 529 return (EINVAL); 530 531 error = fget(td, fd, &cap_write_rights, &fp); 532 if (error != 0) 533 return (error); 534 if (fp->f_type != DTYPE_TIMERFD) { 535 fdrop(fp, td); 536 return (EINVAL); 537 } 538 tfd = fp->f_data; 539 540 mtx_lock(&tfd->tfd_lock); 541 getnanotime(&tfd->tfd_mtim); 542 tfd->tfd_timflags = flags; 543 544 /* Store old itimerspec, if applicable. */ 545 if (old_value != NULL) 546 timerfd_curval(tfd, old_value); 547 548 /* Set new expiration. */ 549 tfd->tfd_time = *new_value; 550 if (timespecisset(&tfd->tfd_time.it_value)) { 551 if ((flags & TFD_TIMER_ABSTIME) == 0) { 552 nanouptime(&ts); 553 timespecadd(&tfd->tfd_time.it_value, &ts, 554 &tfd->tfd_time.it_value); 555 } else if (tfd->tfd_clockid == CLOCK_REALTIME) { 556 /* ECANCELED if unread jump is pending. */ 557 if (tfd->tfd_jumped == TFD_CANCELED) 558 error = ECANCELED; 559 /* Convert from CLOCK_REALTIME to CLOCK_BOOTTIME. */ 560 timespecsub(&tfd->tfd_time.it_value, &tfd->tfd_boottim, 561 &tfd->tfd_time.it_value); 562 } 563 callout_reset_sbt(&tfd->tfd_callout, 564 tstosbt_sat(tfd->tfd_time.it_value), 565 0, timerfd_expire, tfd, C_ABSOLUTE); 566 } else { 567 callout_stop(&tfd->tfd_callout); 568 } 569 tfd->tfd_count = 0; 570 tfd->tfd_expired = false; 571 tfd->tfd_jumped = TFD_NOJUMP; 572 mtx_unlock(&tfd->tfd_lock); 573 574 fdrop(fp, td); 575 return (error); 576 } 577 578 int 579 sys_timerfd_create(struct thread *td, struct timerfd_create_args *uap) 580 { 581 return (kern_timerfd_create(td, uap->clockid, uap->flags)); 582 } 583 584 int 585 sys_timerfd_gettime(struct thread *td, struct timerfd_gettime_args *uap) 586 { 587 struct itimerspec curr_value; 588 int error; 589 590 error = kern_timerfd_gettime(td, uap->fd, &curr_value); 591 if (error == 0) 592 error = copyout(&curr_value, uap->curr_value, 593 sizeof(curr_value)); 594 595 return (error); 596 } 597 598 int 599 sys_timerfd_settime(struct thread *td, struct timerfd_settime_args *uap) 600 { 601 struct itimerspec new_value, old_value; 602 int error; 603 604 error = copyin(uap->new_value, &new_value, sizeof(new_value)); 605 if (error != 0) 606 return (error); 607 if (uap->old_value == NULL) { 608 error = kern_timerfd_settime(td, uap->fd, uap->flags, 609 &new_value, NULL); 610 } else { 611 error = kern_timerfd_settime(td, uap->fd, uap->flags, 612 &new_value, &old_value); 613 if (error == 0) 614 error = copyout(&old_value, uap->old_value, 615 sizeof(old_value)); 616 } 617 return (error); 618 } 619