1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 5 * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/callout.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/filio.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/mutex.h> 41 #include <sys/poll.h> 42 #include <sys/proc.h> 43 #include <sys/queue.h> 44 #include <sys/selinfo.h> 45 #include <sys/stat.h> 46 #include <sys/sx.h> 47 #include <sys/sysctl.h> 48 #include <sys/sysent.h> 49 #include <sys/sysproto.h> 50 #include <sys/timerfd.h> 51 #include <sys/timespec.h> 52 #include <sys/uio.h> 53 #include <sys/user.h> 54 55 #include <security/audit/audit.h> 56 57 static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures"); 58 59 static struct mtx timerfd_list_lock; 60 static LIST_HEAD(, timerfd) timerfd_list; 61 MTX_SYSINIT(timerfd, &timerfd_list_lock, "timerfd_list_lock", MTX_DEF); 62 63 static struct unrhdr64 tfdino_unr; 64 65 #define TFD_NOJUMP 0 /* Realtime clock has not jumped. */ 66 #define TFD_READ 1 /* Jumped, tfd has been read since. */ 67 #define TFD_ZREAD 2 /* Jumped backwards, CANCEL_ON_SET=false. */ 68 #define TFD_CANCELED 4 /* Jumped, CANCEL_ON_SET=true. */ 69 #define TFD_JUMPED (TFD_ZREAD | TFD_CANCELED) 70 71 /* 72 * One structure allocated per timerfd descriptor. 73 * 74 * Locking semantics: 75 * (t) locked by tfd_lock mtx 76 * (l) locked by timerfd_list_lock sx 77 * (c) const until freeing 78 */ 79 struct timerfd { 80 /* User specified. */ 81 struct itimerspec tfd_time; /* (t) tfd timer */ 82 clockid_t tfd_clockid; /* (c) timing base */ 83 int tfd_flags; /* (c) creation flags */ 84 int tfd_timflags; /* (t) timer flags */ 85 86 /* Used internally. */ 87 timerfd_t tfd_count; /* (t) expiration count since read */ 88 bool tfd_expired; /* (t) true upon initial expiration */ 89 struct mtx tfd_lock; /* tfd mtx lock */ 90 struct callout tfd_callout; /* (t) expiration notification */ 91 struct selinfo tfd_sel; /* (t) I/O alerts */ 92 struct timespec tfd_boottim; /* (t) cached boottime */ 93 int tfd_jumped; /* (t) timer jump status */ 94 LIST_ENTRY(timerfd) entry; /* (l) entry in list */ 95 96 /* For stat(2). */ 97 ino_t tfd_ino; /* (c) inode number */ 98 struct timespec tfd_atim; /* (t) time of last read */ 99 struct timespec tfd_mtim; /* (t) time of last settime */ 100 struct timespec tfd_birthtim; /* (c) creation time */ 101 }; 102 103 static void 104 timerfd_init(void *data) 105 { 106 new_unrhdr64(&tfdino_unr, 1); 107 } 108 109 SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL); 110 111 static inline void 112 timerfd_getboottime(struct timespec *ts) 113 { 114 struct timeval tv; 115 116 getboottime(&tv); 117 TIMEVAL_TO_TIMESPEC(&tv, ts); 118 } 119 120 /* 121 * Call when a discontinuous jump has occured in CLOCK_REALTIME and 122 * update timerfd's cached boottime. A jump can be triggered using 123 * functions like clock_settime(2) or settimeofday(2). 124 * 125 * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set 126 * and the realtime clock jumps. 127 * Timer is marked TFD_ZREAD if TFD_TIMER_CANCEL_ON_SET is not set, 128 * but the realtime clock jumps backwards. 129 */ 130 void 131 timerfd_jumped(void) 132 { 133 struct timerfd *tfd; 134 struct timespec boottime, diff; 135 136 if (LIST_EMPTY(&timerfd_list)) 137 return; 138 139 timerfd_getboottime(&boottime); 140 mtx_lock(&timerfd_list_lock); 141 LIST_FOREACH(tfd, &timerfd_list, entry) { 142 mtx_lock(&tfd->tfd_lock); 143 if (tfd->tfd_clockid != CLOCK_REALTIME || 144 (tfd->tfd_timflags & TFD_TIMER_ABSTIME) == 0 || 145 timespeccmp(&boottime, &tfd->tfd_boottim, ==)) { 146 mtx_unlock(&tfd->tfd_lock); 147 continue; 148 } 149 150 if (callout_active(&tfd->tfd_callout)) { 151 if ((tfd->tfd_timflags & TFD_TIMER_CANCEL_ON_SET) != 0) 152 tfd->tfd_jumped = TFD_CANCELED; 153 else if (timespeccmp(&boottime, &tfd->tfd_boottim, <)) 154 tfd->tfd_jumped = TFD_ZREAD; 155 156 /* 157 * Do not reschedule callout when 158 * inside interval time loop. 159 */ 160 if (!tfd->tfd_expired) { 161 timespecsub(&boottime, 162 &tfd->tfd_boottim, &diff); 163 timespecsub(&tfd->tfd_time.it_value, 164 &diff, &tfd->tfd_time.it_value); 165 if (callout_stop(&tfd->tfd_callout) == 1) { 166 callout_schedule_sbt(&tfd->tfd_callout, 167 tstosbt(tfd->tfd_time.it_value), 168 0, C_ABSOLUTE); 169 } 170 } 171 } 172 173 tfd->tfd_boottim = boottime; 174 mtx_unlock(&tfd->tfd_lock); 175 } 176 mtx_unlock(&timerfd_list_lock); 177 } 178 179 static int 180 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 181 int flags, struct thread *td) 182 { 183 struct timerfd *tfd = fp->f_data; 184 timerfd_t count; 185 int error = 0; 186 187 if (uio->uio_resid < sizeof(timerfd_t)) 188 return (EINVAL); 189 190 mtx_lock(&tfd->tfd_lock); 191 retry: 192 getnanotime(&tfd->tfd_atim); 193 if ((tfd->tfd_jumped & TFD_JUMPED) != 0) { 194 if (tfd->tfd_jumped == TFD_CANCELED) 195 error = ECANCELED; 196 tfd->tfd_jumped = TFD_READ; 197 tfd->tfd_count = 0; 198 mtx_unlock(&tfd->tfd_lock); 199 return (error); 200 } else { 201 tfd->tfd_jumped = TFD_NOJUMP; 202 } 203 if (tfd->tfd_count == 0) { 204 if ((fp->f_flag & FNONBLOCK) != 0) { 205 mtx_unlock(&tfd->tfd_lock); 206 return (EAGAIN); 207 } 208 td->td_rtcgen = atomic_load_acq_int(&rtc_generation); 209 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, 210 PCATCH, "tfdrd", 0); 211 if (error == 0) { 212 goto retry; 213 } else { 214 mtx_unlock(&tfd->tfd_lock); 215 return (error); 216 } 217 } 218 219 count = tfd->tfd_count; 220 tfd->tfd_count = 0; 221 mtx_unlock(&tfd->tfd_lock); 222 error = uiomove(&count, sizeof(timerfd_t), uio); 223 224 return (error); 225 } 226 227 static int 228 timerfd_ioctl(struct file *fp, u_long cmd, void *data, 229 struct ucred *active_cred, struct thread *td) 230 { 231 switch (cmd) { 232 case FIOASYNC: 233 if (*(int *)data != 0) 234 atomic_set_int(&fp->f_flag, FASYNC); 235 else 236 atomic_clear_int(&fp->f_flag, FASYNC); 237 return (0); 238 case FIONBIO: 239 if (*(int *)data != 0) 240 atomic_set_int(&fp->f_flag, FNONBLOCK); 241 else 242 atomic_clear_int(&fp->f_flag, FNONBLOCK); 243 return (0); 244 } 245 return (ENOTTY); 246 } 247 248 static int 249 timerfd_poll(struct file *fp, int events, struct ucred *active_cred, 250 struct thread *td) 251 { 252 struct timerfd *tfd = fp->f_data; 253 int revents = 0; 254 255 mtx_lock(&tfd->tfd_lock); 256 if ((events & (POLLIN | POLLRDNORM)) != 0 && 257 tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ) 258 revents |= events & (POLLIN | POLLRDNORM); 259 if (revents == 0) 260 selrecord(td, &tfd->tfd_sel); 261 mtx_unlock(&tfd->tfd_lock); 262 263 return (revents); 264 } 265 266 static void 267 filt_timerfddetach(struct knote *kn) 268 { 269 struct timerfd *tfd = kn->kn_hook; 270 271 mtx_lock(&tfd->tfd_lock); 272 knlist_remove(&tfd->tfd_sel.si_note, kn, 1); 273 mtx_unlock(&tfd->tfd_lock); 274 } 275 276 static int 277 filt_timerfdread(struct knote *kn, long hint) 278 { 279 struct timerfd *tfd = kn->kn_hook; 280 281 mtx_assert(&tfd->tfd_lock, MA_OWNED); 282 kn->kn_data = (int64_t)tfd->tfd_count; 283 return (tfd->tfd_count > 0); 284 } 285 286 static struct filterops timerfd_rfiltops = { 287 .f_isfd = 1, 288 .f_detach = filt_timerfddetach, 289 .f_event = filt_timerfdread, 290 }; 291 292 static int 293 timerfd_kqfilter(struct file *fp, struct knote *kn) 294 { 295 struct timerfd *tfd = fp->f_data; 296 297 if (kn->kn_filter != EVFILT_READ) 298 return (EINVAL); 299 300 kn->kn_fop = &timerfd_rfiltops; 301 kn->kn_hook = tfd; 302 knlist_add(&tfd->tfd_sel.si_note, kn, 0); 303 304 return (0); 305 } 306 307 static int 308 timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) 309 { 310 struct timerfd *tfd = fp->f_data; 311 312 bzero(sb, sizeof(*sb)); 313 sb->st_nlink = fp->f_count - 1; 314 sb->st_uid = fp->f_cred->cr_uid; 315 sb->st_gid = fp->f_cred->cr_gid; 316 sb->st_blksize = PAGE_SIZE; 317 mtx_lock(&tfd->tfd_lock); 318 sb->st_atim = tfd->tfd_atim; 319 sb->st_mtim = tfd->tfd_mtim; 320 mtx_unlock(&tfd->tfd_lock); 321 sb->st_ctim = sb->st_mtim; 322 sb->st_ino = tfd->tfd_ino; 323 sb->st_birthtim = tfd->tfd_birthtim; 324 325 return (0); 326 } 327 328 static int 329 timerfd_close(struct file *fp, struct thread *td) 330 { 331 struct timerfd *tfd = fp->f_data; 332 333 mtx_lock(&timerfd_list_lock); 334 LIST_REMOVE(tfd, entry); 335 mtx_unlock(&timerfd_list_lock); 336 337 callout_drain(&tfd->tfd_callout); 338 seldrain(&tfd->tfd_sel); 339 knlist_destroy(&tfd->tfd_sel.si_note); 340 mtx_destroy(&tfd->tfd_lock); 341 free(tfd, M_TIMERFD); 342 fp->f_ops = &badfileops; 343 344 return (0); 345 } 346 347 static int 348 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, 349 struct filedesc *fdp) 350 { 351 struct timerfd *tfd = fp->f_data; 352 353 kif->kf_type = KF_TYPE_TIMERFD; 354 kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid; 355 kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags; 356 kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd; 357 358 return (0); 359 } 360 361 static struct fileops timerfdops = { 362 .fo_read = timerfd_read, 363 .fo_write = invfo_rdwr, 364 .fo_truncate = invfo_truncate, 365 .fo_ioctl = timerfd_ioctl, 366 .fo_poll = timerfd_poll, 367 .fo_kqfilter = timerfd_kqfilter, 368 .fo_stat = timerfd_stat, 369 .fo_close = timerfd_close, 370 .fo_chmod = invfo_chmod, 371 .fo_chown = invfo_chown, 372 .fo_sendfile = invfo_sendfile, 373 .fo_fill_kinfo = timerfd_fill_kinfo, 374 .fo_flags = DFLAG_PASSABLE, 375 }; 376 377 static void 378 timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value) 379 { 380 struct timespec curr_value; 381 382 mtx_assert(&tfd->tfd_lock, MA_OWNED); 383 *old_value = tfd->tfd_time; 384 if (timespecisset(&tfd->tfd_time.it_value)) { 385 nanouptime(&curr_value); 386 timespecsub(&tfd->tfd_time.it_value, &curr_value, 387 &old_value->it_value); 388 } 389 } 390 391 static void 392 timerfd_expire(void *arg) 393 { 394 struct timerfd *tfd = (struct timerfd *)arg; 395 struct timespec uptime; 396 397 ++tfd->tfd_count; 398 tfd->tfd_expired = true; 399 if (timespecisset(&tfd->tfd_time.it_interval)) { 400 /* Count missed events. */ 401 nanouptime(&uptime); 402 if (timespeccmp(&uptime, &tfd->tfd_time.it_value, >)) { 403 timespecsub(&uptime, &tfd->tfd_time.it_value, &uptime); 404 tfd->tfd_count += tstosbt(uptime) / 405 tstosbt(tfd->tfd_time.it_interval); 406 } 407 timespecadd(&tfd->tfd_time.it_value, 408 &tfd->tfd_time.it_interval, &tfd->tfd_time.it_value); 409 callout_schedule_sbt(&tfd->tfd_callout, 410 tstosbt(tfd->tfd_time.it_value), 411 0, C_ABSOLUTE); 412 } else { 413 /* Single shot timer. */ 414 callout_deactivate(&tfd->tfd_callout); 415 timespecclear(&tfd->tfd_time.it_value); 416 } 417 418 wakeup(&tfd->tfd_count); 419 selwakeup(&tfd->tfd_sel); 420 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); 421 } 422 423 int 424 kern_timerfd_create(struct thread *td, int clockid, int flags) 425 { 426 struct file *fp; 427 struct timerfd *tfd; 428 int error, fd, fflags; 429 430 AUDIT_ARG_VALUE(clockid); 431 AUDIT_ARG_FFLAGS(flags); 432 433 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 434 return (EINVAL); 435 if ((flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) != 0) 436 return (EINVAL); 437 438 fflags = FREAD; 439 if ((flags & TFD_CLOEXEC) != 0) 440 fflags |= O_CLOEXEC; 441 if ((flags & TFD_NONBLOCK) != 0) 442 fflags |= FNONBLOCK; 443 444 error = falloc(td, &fp, &fd, fflags); 445 if (error != 0) 446 return (error); 447 448 tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO); 449 tfd->tfd_clockid = (clockid_t)clockid; 450 tfd->tfd_flags = flags; 451 tfd->tfd_ino = alloc_unr64(&tfdino_unr); 452 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); 453 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); 454 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); 455 timerfd_getboottime(&tfd->tfd_boottim); 456 getnanotime(&tfd->tfd_birthtim); 457 mtx_lock(&timerfd_list_lock); 458 LIST_INSERT_HEAD(&timerfd_list, tfd, entry); 459 mtx_unlock(&timerfd_list_lock); 460 461 finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops); 462 463 fdrop(fp, td); 464 465 td->td_retval[0] = fd; 466 return (0); 467 } 468 469 int 470 kern_timerfd_gettime(struct thread *td, int fd, struct itimerspec *curr_value) 471 { 472 struct file *fp; 473 struct timerfd *tfd; 474 int error; 475 476 error = fget(td, fd, &cap_write_rights, &fp); 477 if (error != 0) 478 return (error); 479 if (fp->f_type != DTYPE_TIMERFD) { 480 fdrop(fp, td); 481 return (EINVAL); 482 } 483 tfd = fp->f_data; 484 485 mtx_lock(&tfd->tfd_lock); 486 timerfd_curval(tfd, curr_value); 487 mtx_unlock(&tfd->tfd_lock); 488 489 fdrop(fp, td); 490 return (0); 491 } 492 493 int 494 kern_timerfd_settime(struct thread *td, int fd, int flags, 495 const struct itimerspec *new_value, struct itimerspec *old_value) 496 { 497 struct file *fp; 498 struct timerfd *tfd; 499 struct timespec ts; 500 int error = 0; 501 502 if ((flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) != 0) 503 return (EINVAL); 504 if (!timespecvalid_interval(&new_value->it_value) || 505 !timespecvalid_interval(&new_value->it_interval)) 506 return (EINVAL); 507 508 error = fget(td, fd, &cap_write_rights, &fp); 509 if (error != 0) 510 return (error); 511 if (fp->f_type != DTYPE_TIMERFD) { 512 fdrop(fp, td); 513 return (EINVAL); 514 } 515 tfd = fp->f_data; 516 517 mtx_lock(&tfd->tfd_lock); 518 getnanotime(&tfd->tfd_mtim); 519 tfd->tfd_timflags = flags; 520 521 /* Store old itimerspec, if applicable. */ 522 if (old_value != NULL) 523 timerfd_curval(tfd, old_value); 524 525 /* Set new expiration. */ 526 tfd->tfd_time = *new_value; 527 if (timespecisset(&tfd->tfd_time.it_value)) { 528 if ((flags & TFD_TIMER_ABSTIME) == 0) { 529 nanouptime(&ts); 530 timespecadd(&tfd->tfd_time.it_value, &ts, 531 &tfd->tfd_time.it_value); 532 } else if (tfd->tfd_clockid == CLOCK_REALTIME) { 533 /* ECANCELED if unread jump is pending. */ 534 if (tfd->tfd_jumped == TFD_CANCELED) 535 error = ECANCELED; 536 /* Convert from CLOCK_REALTIME to CLOCK_BOOTTIME. */ 537 timespecsub(&tfd->tfd_time.it_value, &tfd->tfd_boottim, 538 &tfd->tfd_time.it_value); 539 } 540 callout_reset_sbt(&tfd->tfd_callout, 541 tstosbt(tfd->tfd_time.it_value), 542 0, timerfd_expire, tfd, C_ABSOLUTE); 543 } else { 544 callout_stop(&tfd->tfd_callout); 545 } 546 tfd->tfd_count = 0; 547 tfd->tfd_expired = false; 548 tfd->tfd_jumped = TFD_NOJUMP; 549 mtx_unlock(&tfd->tfd_lock); 550 551 fdrop(fp, td); 552 return (error); 553 } 554 555 int 556 sys_timerfd_create(struct thread *td, struct timerfd_create_args *uap) 557 { 558 return (kern_timerfd_create(td, uap->clockid, uap->flags)); 559 } 560 561 int 562 sys_timerfd_gettime(struct thread *td, struct timerfd_gettime_args *uap) 563 { 564 struct itimerspec curr_value; 565 int error; 566 567 error = kern_timerfd_gettime(td, uap->fd, &curr_value); 568 if (error == 0) 569 error = copyout(&curr_value, uap->curr_value, 570 sizeof(curr_value)); 571 572 return (error); 573 } 574 575 int 576 sys_timerfd_settime(struct thread *td, struct timerfd_settime_args *uap) 577 { 578 struct itimerspec new_value, old_value; 579 int error; 580 581 error = copyin(uap->new_value, &new_value, sizeof(new_value)); 582 if (error != 0) 583 return (error); 584 if (uap->old_value == NULL) { 585 error = kern_timerfd_settime(td, uap->fd, uap->flags, 586 &new_value, NULL); 587 } else { 588 error = kern_timerfd_settime(td, uap->fd, uap->flags, 589 &new_value, &old_value); 590 if (error == 0) 591 error = copyout(&old_value, uap->old_value, 592 sizeof(old_value)); 593 } 594 return (error); 595 } 596