1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 5 * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/callout.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/filio.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/mutex.h> 41 #include <sys/poll.h> 42 #include <sys/proc.h> 43 #include <sys/queue.h> 44 #include <sys/selinfo.h> 45 #include <sys/stat.h> 46 #include <sys/sx.h> 47 #include <sys/syscallsubr.h> 48 #include <sys/sysctl.h> 49 #include <sys/sysent.h> 50 #include <sys/sysproto.h> 51 #include <sys/timerfd.h> 52 #include <sys/timespec.h> 53 #include <sys/uio.h> 54 #include <sys/user.h> 55 56 #include <security/audit/audit.h> 57 58 static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures"); 59 60 static struct mtx timerfd_list_lock; 61 static LIST_HEAD(, timerfd) timerfd_list; 62 MTX_SYSINIT(timerfd, &timerfd_list_lock, "timerfd_list_lock", MTX_DEF); 63 64 static struct unrhdr64 tfdino_unr; 65 66 #define TFD_NOJUMP 0 /* Realtime clock has not jumped. */ 67 #define TFD_READ 1 /* Jumped, tfd has been read since. */ 68 #define TFD_ZREAD 2 /* Jumped backwards, CANCEL_ON_SET=false. */ 69 #define TFD_CANCELED 4 /* Jumped, CANCEL_ON_SET=true. */ 70 #define TFD_JUMPED (TFD_ZREAD | TFD_CANCELED) 71 72 /* 73 * One structure allocated per timerfd descriptor. 74 * 75 * Locking semantics: 76 * (t) locked by tfd_lock mtx 77 * (l) locked by timerfd_list_lock sx 78 * (c) const until freeing 79 */ 80 struct timerfd { 81 /* User specified. */ 82 struct itimerspec tfd_time; /* (t) tfd timer */ 83 clockid_t tfd_clockid; /* (c) timing base */ 84 int tfd_flags; /* (c) creation flags */ 85 int tfd_timflags; /* (t) timer flags */ 86 87 /* Used internally. */ 88 timerfd_t tfd_count; /* (t) expiration count since read */ 89 bool tfd_expired; /* (t) true upon initial expiration */ 90 struct mtx tfd_lock; /* tfd mtx lock */ 91 struct callout tfd_callout; /* (t) expiration notification */ 92 struct selinfo tfd_sel; /* (t) I/O alerts */ 93 struct timespec tfd_boottim; /* (t) cached boottime */ 94 int tfd_jumped; /* (t) timer jump status */ 95 LIST_ENTRY(timerfd) entry; /* (l) entry in list */ 96 97 /* For stat(2). */ 98 ino_t tfd_ino; /* (c) inode number */ 99 struct timespec tfd_atim; /* (t) time of last read */ 100 struct timespec tfd_mtim; /* (t) time of last settime */ 101 struct timespec tfd_birthtim; /* (c) creation time */ 102 }; 103 104 static void 105 timerfd_init(void *data) 106 { 107 new_unrhdr64(&tfdino_unr, 1); 108 } 109 110 SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL); 111 112 static inline void 113 timerfd_getboottime(struct timespec *ts) 114 { 115 struct timeval tv; 116 117 getboottime(&tv); 118 TIMEVAL_TO_TIMESPEC(&tv, ts); 119 } 120 121 /* 122 * Call when a discontinuous jump has occured in CLOCK_REALTIME and 123 * update timerfd's cached boottime. A jump can be triggered using 124 * functions like clock_settime(2) or settimeofday(2). 125 * 126 * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set 127 * and the realtime clock jumps. 128 * Timer is marked TFD_ZREAD if TFD_TIMER_CANCEL_ON_SET is not set, 129 * but the realtime clock jumps backwards. 130 */ 131 void 132 timerfd_jumped(void) 133 { 134 struct timerfd *tfd; 135 struct timespec boottime, diff; 136 137 if (LIST_EMPTY(&timerfd_list)) 138 return; 139 140 timerfd_getboottime(&boottime); 141 mtx_lock(&timerfd_list_lock); 142 LIST_FOREACH(tfd, &timerfd_list, entry) { 143 mtx_lock(&tfd->tfd_lock); 144 if (tfd->tfd_clockid != CLOCK_REALTIME || 145 (tfd->tfd_timflags & TFD_TIMER_ABSTIME) == 0 || 146 timespeccmp(&boottime, &tfd->tfd_boottim, ==)) { 147 mtx_unlock(&tfd->tfd_lock); 148 continue; 149 } 150 151 if (callout_active(&tfd->tfd_callout)) { 152 if ((tfd->tfd_timflags & TFD_TIMER_CANCEL_ON_SET) != 0) 153 tfd->tfd_jumped = TFD_CANCELED; 154 else if (timespeccmp(&boottime, &tfd->tfd_boottim, <)) 155 tfd->tfd_jumped = TFD_ZREAD; 156 157 /* 158 * Do not reschedule callout when 159 * inside interval time loop. 160 */ 161 if (!tfd->tfd_expired) { 162 timespecsub(&boottime, 163 &tfd->tfd_boottim, &diff); 164 timespecsub(&tfd->tfd_time.it_value, 165 &diff, &tfd->tfd_time.it_value); 166 if (callout_stop(&tfd->tfd_callout) == 1) { 167 callout_schedule_sbt(&tfd->tfd_callout, 168 tstosbt(tfd->tfd_time.it_value), 169 0, C_ABSOLUTE); 170 } 171 } 172 } 173 174 tfd->tfd_boottim = boottime; 175 mtx_unlock(&tfd->tfd_lock); 176 } 177 mtx_unlock(&timerfd_list_lock); 178 } 179 180 static int 181 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 182 int flags, struct thread *td) 183 { 184 struct timerfd *tfd = fp->f_data; 185 timerfd_t count; 186 int error = 0; 187 188 if (uio->uio_resid < sizeof(timerfd_t)) 189 return (EINVAL); 190 191 mtx_lock(&tfd->tfd_lock); 192 retry: 193 getnanotime(&tfd->tfd_atim); 194 if ((tfd->tfd_jumped & TFD_JUMPED) != 0) { 195 if (tfd->tfd_jumped == TFD_CANCELED) 196 error = ECANCELED; 197 tfd->tfd_jumped = TFD_READ; 198 tfd->tfd_count = 0; 199 mtx_unlock(&tfd->tfd_lock); 200 return (error); 201 } else { 202 tfd->tfd_jumped = TFD_NOJUMP; 203 } 204 if (tfd->tfd_count == 0) { 205 if ((fp->f_flag & FNONBLOCK) != 0) { 206 mtx_unlock(&tfd->tfd_lock); 207 return (EAGAIN); 208 } 209 td->td_rtcgen = atomic_load_acq_int(&rtc_generation); 210 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, 211 PCATCH, "tfdrd", 0); 212 if (error == 0) { 213 goto retry; 214 } else { 215 mtx_unlock(&tfd->tfd_lock); 216 return (error); 217 } 218 } 219 220 count = tfd->tfd_count; 221 tfd->tfd_count = 0; 222 mtx_unlock(&tfd->tfd_lock); 223 error = uiomove(&count, sizeof(timerfd_t), uio); 224 225 return (error); 226 } 227 228 static int 229 timerfd_ioctl(struct file *fp, u_long cmd, void *data, 230 struct ucred *active_cred, struct thread *td) 231 { 232 switch (cmd) { 233 case FIOASYNC: 234 if (*(int *)data != 0) 235 atomic_set_int(&fp->f_flag, FASYNC); 236 else 237 atomic_clear_int(&fp->f_flag, FASYNC); 238 return (0); 239 case FIONBIO: 240 if (*(int *)data != 0) 241 atomic_set_int(&fp->f_flag, FNONBLOCK); 242 else 243 atomic_clear_int(&fp->f_flag, FNONBLOCK); 244 return (0); 245 } 246 return (ENOTTY); 247 } 248 249 static int 250 timerfd_poll(struct file *fp, int events, struct ucred *active_cred, 251 struct thread *td) 252 { 253 struct timerfd *tfd = fp->f_data; 254 int revents = 0; 255 256 mtx_lock(&tfd->tfd_lock); 257 if ((events & (POLLIN | POLLRDNORM)) != 0 && 258 tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ) 259 revents |= events & (POLLIN | POLLRDNORM); 260 if (revents == 0) 261 selrecord(td, &tfd->tfd_sel); 262 mtx_unlock(&tfd->tfd_lock); 263 264 return (revents); 265 } 266 267 static void 268 filt_timerfddetach(struct knote *kn) 269 { 270 struct timerfd *tfd = kn->kn_hook; 271 272 mtx_lock(&tfd->tfd_lock); 273 knlist_remove(&tfd->tfd_sel.si_note, kn, 1); 274 mtx_unlock(&tfd->tfd_lock); 275 } 276 277 static int 278 filt_timerfdread(struct knote *kn, long hint) 279 { 280 struct timerfd *tfd = kn->kn_hook; 281 282 mtx_assert(&tfd->tfd_lock, MA_OWNED); 283 kn->kn_data = (int64_t)tfd->tfd_count; 284 return (tfd->tfd_count > 0); 285 } 286 287 static const struct filterops timerfd_rfiltops = { 288 .f_isfd = 1, 289 .f_detach = filt_timerfddetach, 290 .f_event = filt_timerfdread, 291 }; 292 293 static int 294 timerfd_kqfilter(struct file *fp, struct knote *kn) 295 { 296 struct timerfd *tfd = fp->f_data; 297 298 if (kn->kn_filter != EVFILT_READ) 299 return (EINVAL); 300 301 kn->kn_fop = &timerfd_rfiltops; 302 kn->kn_hook = tfd; 303 knlist_add(&tfd->tfd_sel.si_note, kn, 0); 304 305 return (0); 306 } 307 308 static int 309 timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) 310 { 311 struct timerfd *tfd = fp->f_data; 312 313 bzero(sb, sizeof(*sb)); 314 sb->st_nlink = fp->f_count - 1; 315 sb->st_uid = fp->f_cred->cr_uid; 316 sb->st_gid = fp->f_cred->cr_gid; 317 sb->st_blksize = PAGE_SIZE; 318 mtx_lock(&tfd->tfd_lock); 319 sb->st_atim = tfd->tfd_atim; 320 sb->st_mtim = tfd->tfd_mtim; 321 mtx_unlock(&tfd->tfd_lock); 322 sb->st_ctim = sb->st_mtim; 323 sb->st_ino = tfd->tfd_ino; 324 sb->st_birthtim = tfd->tfd_birthtim; 325 326 return (0); 327 } 328 329 static int 330 timerfd_close(struct file *fp, struct thread *td) 331 { 332 struct timerfd *tfd = fp->f_data; 333 334 mtx_lock(&timerfd_list_lock); 335 LIST_REMOVE(tfd, entry); 336 mtx_unlock(&timerfd_list_lock); 337 338 callout_drain(&tfd->tfd_callout); 339 seldrain(&tfd->tfd_sel); 340 knlist_destroy(&tfd->tfd_sel.si_note); 341 mtx_destroy(&tfd->tfd_lock); 342 free(tfd, M_TIMERFD); 343 fp->f_ops = &badfileops; 344 345 return (0); 346 } 347 348 static int 349 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, 350 struct filedesc *fdp) 351 { 352 struct timerfd *tfd = fp->f_data; 353 354 kif->kf_type = KF_TYPE_TIMERFD; 355 kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid; 356 kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags; 357 kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd; 358 359 return (0); 360 } 361 362 static const struct fileops timerfdops = { 363 .fo_read = timerfd_read, 364 .fo_write = invfo_rdwr, 365 .fo_truncate = invfo_truncate, 366 .fo_ioctl = timerfd_ioctl, 367 .fo_poll = timerfd_poll, 368 .fo_kqfilter = timerfd_kqfilter, 369 .fo_stat = timerfd_stat, 370 .fo_close = timerfd_close, 371 .fo_chmod = invfo_chmod, 372 .fo_chown = invfo_chown, 373 .fo_sendfile = invfo_sendfile, 374 .fo_fill_kinfo = timerfd_fill_kinfo, 375 .fo_cmp = file_kcmp_generic, 376 .fo_flags = DFLAG_PASSABLE, 377 }; 378 379 static void 380 timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value) 381 { 382 struct timespec curr_value; 383 384 mtx_assert(&tfd->tfd_lock, MA_OWNED); 385 *old_value = tfd->tfd_time; 386 if (timespecisset(&tfd->tfd_time.it_value)) { 387 nanouptime(&curr_value); 388 timespecsub(&tfd->tfd_time.it_value, &curr_value, 389 &old_value->it_value); 390 } 391 } 392 393 static void 394 timerfd_expire(void *arg) 395 { 396 struct timerfd *tfd = (struct timerfd *)arg; 397 struct timespec uptime; 398 399 ++tfd->tfd_count; 400 tfd->tfd_expired = true; 401 if (timespecisset(&tfd->tfd_time.it_interval)) { 402 /* Count missed events. */ 403 nanouptime(&uptime); 404 if (timespeccmp(&uptime, &tfd->tfd_time.it_value, >)) { 405 timespecsub(&uptime, &tfd->tfd_time.it_value, &uptime); 406 tfd->tfd_count += tstosbt(uptime) / 407 tstosbt(tfd->tfd_time.it_interval); 408 } 409 timespecadd(&tfd->tfd_time.it_value, 410 &tfd->tfd_time.it_interval, &tfd->tfd_time.it_value); 411 callout_schedule_sbt(&tfd->tfd_callout, 412 tstosbt(tfd->tfd_time.it_value), 413 0, C_ABSOLUTE); 414 } else { 415 /* Single shot timer. */ 416 callout_deactivate(&tfd->tfd_callout); 417 timespecclear(&tfd->tfd_time.it_value); 418 } 419 420 wakeup(&tfd->tfd_count); 421 selwakeup(&tfd->tfd_sel); 422 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); 423 } 424 425 int 426 kern_timerfd_create(struct thread *td, int clockid, int flags) 427 { 428 struct file *fp; 429 struct timerfd *tfd; 430 int error, fd, fflags; 431 432 AUDIT_ARG_VALUE(clockid); 433 AUDIT_ARG_FFLAGS(flags); 434 435 switch (clockid) { 436 case CLOCK_REALTIME: 437 /* FALLTHROUGH */ 438 case CLOCK_MONOTONIC: 439 /* FALLTHROUGH */ 440 case CLOCK_UPTIME: 441 /* 442 * CLOCK_BOOTTIME should be added once different from 443 * CLOCK_UPTIME 444 */ 445 break; 446 default: 447 return (EINVAL); 448 } 449 if ((flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) != 0) 450 return (EINVAL); 451 452 fflags = FREAD; 453 if ((flags & TFD_CLOEXEC) != 0) 454 fflags |= O_CLOEXEC; 455 if ((flags & TFD_NONBLOCK) != 0) 456 fflags |= FNONBLOCK; 457 458 error = falloc(td, &fp, &fd, fflags); 459 if (error != 0) 460 return (error); 461 462 tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO); 463 tfd->tfd_clockid = (clockid_t)clockid; 464 tfd->tfd_flags = flags; 465 tfd->tfd_ino = alloc_unr64(&tfdino_unr); 466 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); 467 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); 468 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); 469 timerfd_getboottime(&tfd->tfd_boottim); 470 getnanotime(&tfd->tfd_birthtim); 471 mtx_lock(&timerfd_list_lock); 472 LIST_INSERT_HEAD(&timerfd_list, tfd, entry); 473 mtx_unlock(&timerfd_list_lock); 474 475 finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops); 476 477 fdrop(fp, td); 478 479 td->td_retval[0] = fd; 480 return (0); 481 } 482 483 int 484 kern_timerfd_gettime(struct thread *td, int fd, struct itimerspec *curr_value) 485 { 486 struct file *fp; 487 struct timerfd *tfd; 488 int error; 489 490 error = fget(td, fd, &cap_write_rights, &fp); 491 if (error != 0) 492 return (error); 493 if (fp->f_type != DTYPE_TIMERFD) { 494 fdrop(fp, td); 495 return (EINVAL); 496 } 497 tfd = fp->f_data; 498 499 mtx_lock(&tfd->tfd_lock); 500 timerfd_curval(tfd, curr_value); 501 mtx_unlock(&tfd->tfd_lock); 502 503 fdrop(fp, td); 504 return (0); 505 } 506 507 int 508 kern_timerfd_settime(struct thread *td, int fd, int flags, 509 const struct itimerspec *new_value, struct itimerspec *old_value) 510 { 511 struct file *fp; 512 struct timerfd *tfd; 513 struct timespec ts; 514 int error = 0; 515 516 if ((flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) != 0) 517 return (EINVAL); 518 if (!timespecvalid_interval(&new_value->it_value) || 519 !timespecvalid_interval(&new_value->it_interval)) 520 return (EINVAL); 521 522 error = fget(td, fd, &cap_write_rights, &fp); 523 if (error != 0) 524 return (error); 525 if (fp->f_type != DTYPE_TIMERFD) { 526 fdrop(fp, td); 527 return (EINVAL); 528 } 529 tfd = fp->f_data; 530 531 mtx_lock(&tfd->tfd_lock); 532 getnanotime(&tfd->tfd_mtim); 533 tfd->tfd_timflags = flags; 534 535 /* Store old itimerspec, if applicable. */ 536 if (old_value != NULL) 537 timerfd_curval(tfd, old_value); 538 539 /* Set new expiration. */ 540 tfd->tfd_time = *new_value; 541 if (timespecisset(&tfd->tfd_time.it_value)) { 542 if ((flags & TFD_TIMER_ABSTIME) == 0) { 543 nanouptime(&ts); 544 timespecadd(&tfd->tfd_time.it_value, &ts, 545 &tfd->tfd_time.it_value); 546 } else if (tfd->tfd_clockid == CLOCK_REALTIME) { 547 /* ECANCELED if unread jump is pending. */ 548 if (tfd->tfd_jumped == TFD_CANCELED) 549 error = ECANCELED; 550 /* Convert from CLOCK_REALTIME to CLOCK_BOOTTIME. */ 551 timespecsub(&tfd->tfd_time.it_value, &tfd->tfd_boottim, 552 &tfd->tfd_time.it_value); 553 } 554 callout_reset_sbt(&tfd->tfd_callout, 555 tstosbt(tfd->tfd_time.it_value), 556 0, timerfd_expire, tfd, C_ABSOLUTE); 557 } else { 558 callout_stop(&tfd->tfd_callout); 559 } 560 tfd->tfd_count = 0; 561 tfd->tfd_expired = false; 562 tfd->tfd_jumped = TFD_NOJUMP; 563 mtx_unlock(&tfd->tfd_lock); 564 565 fdrop(fp, td); 566 return (error); 567 } 568 569 int 570 sys_timerfd_create(struct thread *td, struct timerfd_create_args *uap) 571 { 572 return (kern_timerfd_create(td, uap->clockid, uap->flags)); 573 } 574 575 int 576 sys_timerfd_gettime(struct thread *td, struct timerfd_gettime_args *uap) 577 { 578 struct itimerspec curr_value; 579 int error; 580 581 error = kern_timerfd_gettime(td, uap->fd, &curr_value); 582 if (error == 0) 583 error = copyout(&curr_value, uap->curr_value, 584 sizeof(curr_value)); 585 586 return (error); 587 } 588 589 int 590 sys_timerfd_settime(struct thread *td, struct timerfd_settime_args *uap) 591 { 592 struct itimerspec new_value, old_value; 593 int error; 594 595 error = copyin(uap->new_value, &new_value, sizeof(new_value)); 596 if (error != 0) 597 return (error); 598 if (uap->old_value == NULL) { 599 error = kern_timerfd_settime(td, uap->fd, uap->flags, 600 &new_value, NULL); 601 } else { 602 error = kern_timerfd_settime(td, uap->fd, uap->flags, 603 &new_value, &old_value); 604 if (error == 0) 605 error = copyout(&old_value, uap->old_value, 606 sizeof(old_value)); 607 } 608 return (error); 609 } 610