1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2014 Dmitry Chagin <dchagin@FreeBSD.org> 5 * Copyright (c) 2023 Jake Freeland <jfree@FreeBSD.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/callout.h> 32 #include <sys/fcntl.h> 33 #include <sys/file.h> 34 #include <sys/filedesc.h> 35 #include <sys/filio.h> 36 #include <sys/kernel.h> 37 #include <sys/lock.h> 38 #include <sys/malloc.h> 39 #include <sys/mount.h> 40 #include <sys/mutex.h> 41 #include <sys/poll.h> 42 #include <sys/proc.h> 43 #include <sys/queue.h> 44 #include <sys/selinfo.h> 45 #include <sys/stat.h> 46 #include <sys/sx.h> 47 #include <sys/sysctl.h> 48 #include <sys/sysent.h> 49 #include <sys/sysproto.h> 50 #include <sys/timerfd.h> 51 #include <sys/timespec.h> 52 #include <sys/uio.h> 53 #include <sys/user.h> 54 55 #include <security/audit/audit.h> 56 57 static MALLOC_DEFINE(M_TIMERFD, "timerfd", "timerfd structures"); 58 59 static struct mtx timerfd_list_lock; 60 static LIST_HEAD(, timerfd) timerfd_list; 61 MTX_SYSINIT(timerfd, &timerfd_list_lock, "timerfd_list_lock", MTX_DEF); 62 63 static struct unrhdr64 tfdino_unr; 64 65 #define TFD_NOJUMP 0 /* Realtime clock has not jumped. */ 66 #define TFD_READ 1 /* Jumped, tfd has been read since. */ 67 #define TFD_ZREAD 2 /* Jumped backwards, CANCEL_ON_SET=false. */ 68 #define TFD_CANCELED 4 /* Jumped, CANCEL_ON_SET=true. */ 69 #define TFD_JUMPED (TFD_ZREAD | TFD_CANCELED) 70 71 /* 72 * One structure allocated per timerfd descriptor. 73 * 74 * Locking semantics: 75 * (t) locked by tfd_lock mtx 76 * (l) locked by timerfd_list_lock sx 77 * (c) const until freeing 78 */ 79 struct timerfd { 80 /* User specified. */ 81 struct itimerspec tfd_time; /* (t) tfd timer */ 82 clockid_t tfd_clockid; /* (c) timing base */ 83 int tfd_flags; /* (c) creation flags */ 84 int tfd_timflags; /* (t) timer flags */ 85 86 /* Used internally. */ 87 timerfd_t tfd_count; /* (t) expiration count since read */ 88 bool tfd_expired; /* (t) true upon initial expiration */ 89 struct mtx tfd_lock; /* tfd mtx lock */ 90 struct callout tfd_callout; /* (t) expiration notification */ 91 struct selinfo tfd_sel; /* (t) I/O alerts */ 92 struct timespec tfd_boottim; /* (t) cached boottime */ 93 int tfd_jumped; /* (t) timer jump status */ 94 LIST_ENTRY(timerfd) entry; /* (l) entry in list */ 95 96 /* For stat(2). */ 97 ino_t tfd_ino; /* (c) inode number */ 98 struct timespec tfd_atim; /* (t) time of last read */ 99 struct timespec tfd_mtim; /* (t) time of last settime */ 100 struct timespec tfd_birthtim; /* (c) creation time */ 101 }; 102 103 static void 104 timerfd_init(void *data) 105 { 106 new_unrhdr64(&tfdino_unr, 1); 107 } 108 109 SYSINIT(timerfd, SI_SUB_VFS, SI_ORDER_ANY, timerfd_init, NULL); 110 111 static inline void 112 timerfd_getboottime(struct timespec *ts) 113 { 114 struct timeval tv; 115 116 getboottime(&tv); 117 TIMEVAL_TO_TIMESPEC(&tv, ts); 118 } 119 120 /* 121 * Call when a discontinuous jump has occured in CLOCK_REALTIME and 122 * update timerfd's cached boottime. A jump can be triggered using 123 * functions like clock_settime(2) or settimeofday(2). 124 * 125 * Timer is marked TFD_CANCELED if TFD_TIMER_CANCEL_ON_SET is set 126 * and the realtime clock jumps. 127 * Timer is marked TFD_ZREAD if TFD_TIMER_CANCEL_ON_SET is not set, 128 * but the realtime clock jumps backwards. 129 */ 130 void 131 timerfd_jumped(void) 132 { 133 struct timerfd *tfd; 134 struct timespec boottime, diff; 135 136 if (LIST_EMPTY(&timerfd_list)) 137 return; 138 139 timerfd_getboottime(&boottime); 140 mtx_lock(&timerfd_list_lock); 141 LIST_FOREACH(tfd, &timerfd_list, entry) { 142 mtx_lock(&tfd->tfd_lock); 143 if (tfd->tfd_clockid != CLOCK_REALTIME || 144 (tfd->tfd_timflags & TFD_TIMER_ABSTIME) == 0 || 145 timespeccmp(&boottime, &tfd->tfd_boottim, ==)) { 146 mtx_unlock(&tfd->tfd_lock); 147 continue; 148 } 149 150 if (callout_active(&tfd->tfd_callout)) { 151 if ((tfd->tfd_timflags & TFD_TIMER_CANCEL_ON_SET) != 0) 152 tfd->tfd_jumped = TFD_CANCELED; 153 else if (timespeccmp(&boottime, &tfd->tfd_boottim, <)) 154 tfd->tfd_jumped = TFD_ZREAD; 155 156 /* 157 * Do not reschedule callout when 158 * inside interval time loop. 159 */ 160 if (!tfd->tfd_expired) { 161 timespecsub(&boottime, 162 &tfd->tfd_boottim, &diff); 163 timespecsub(&tfd->tfd_time.it_value, 164 &diff, &tfd->tfd_time.it_value); 165 if (callout_stop(&tfd->tfd_callout) == 1) { 166 callout_schedule_sbt(&tfd->tfd_callout, 167 tstosbt(tfd->tfd_time.it_value), 168 0, C_ABSOLUTE); 169 } 170 } 171 } 172 173 tfd->tfd_boottim = boottime; 174 mtx_unlock(&tfd->tfd_lock); 175 } 176 mtx_unlock(&timerfd_list_lock); 177 } 178 179 static int 180 timerfd_read(struct file *fp, struct uio *uio, struct ucred *active_cred, 181 int flags, struct thread *td) 182 { 183 struct timerfd *tfd = fp->f_data; 184 timerfd_t count; 185 int error = 0; 186 187 if (uio->uio_resid < sizeof(timerfd_t)) 188 return (EINVAL); 189 190 mtx_lock(&tfd->tfd_lock); 191 retry: 192 getnanotime(&tfd->tfd_atim); 193 if ((tfd->tfd_jumped & TFD_JUMPED) != 0) { 194 if (tfd->tfd_jumped == TFD_CANCELED) 195 error = ECANCELED; 196 tfd->tfd_jumped = TFD_READ; 197 tfd->tfd_count = 0; 198 mtx_unlock(&tfd->tfd_lock); 199 return (error); 200 } else { 201 tfd->tfd_jumped = TFD_NOJUMP; 202 } 203 if (tfd->tfd_count == 0) { 204 if ((fp->f_flag & FNONBLOCK) != 0) { 205 mtx_unlock(&tfd->tfd_lock); 206 return (EAGAIN); 207 } 208 td->td_rtcgen = atomic_load_acq_int(&rtc_generation); 209 error = mtx_sleep(&tfd->tfd_count, &tfd->tfd_lock, 210 PCATCH, "tfdrd", 0); 211 if (error == 0) { 212 goto retry; 213 } else { 214 mtx_unlock(&tfd->tfd_lock); 215 return (error); 216 } 217 } 218 219 count = tfd->tfd_count; 220 tfd->tfd_count = 0; 221 mtx_unlock(&tfd->tfd_lock); 222 error = uiomove(&count, sizeof(timerfd_t), uio); 223 224 return (error); 225 } 226 227 static int 228 timerfd_ioctl(struct file *fp, u_long cmd, void *data, 229 struct ucred *active_cred, struct thread *td) 230 { 231 switch (cmd) { 232 case FIOASYNC: 233 if (*(int *)data != 0) 234 atomic_set_int(&fp->f_flag, FASYNC); 235 else 236 atomic_clear_int(&fp->f_flag, FASYNC); 237 return (0); 238 case FIONBIO: 239 if (*(int *)data != 0) 240 atomic_set_int(&fp->f_flag, FNONBLOCK); 241 else 242 atomic_clear_int(&fp->f_flag, FNONBLOCK); 243 return (0); 244 } 245 return (ENOTTY); 246 } 247 248 static int 249 timerfd_poll(struct file *fp, int events, struct ucred *active_cred, 250 struct thread *td) 251 { 252 struct timerfd *tfd = fp->f_data; 253 int revents = 0; 254 255 mtx_lock(&tfd->tfd_lock); 256 if ((events & (POLLIN | POLLRDNORM)) != 0 && 257 tfd->tfd_count > 0 && tfd->tfd_jumped != TFD_READ) 258 revents |= events & (POLLIN | POLLRDNORM); 259 if (revents == 0) 260 selrecord(td, &tfd->tfd_sel); 261 mtx_unlock(&tfd->tfd_lock); 262 263 return (revents); 264 } 265 266 static void 267 filt_timerfddetach(struct knote *kn) 268 { 269 struct timerfd *tfd = kn->kn_hook; 270 271 mtx_lock(&tfd->tfd_lock); 272 knlist_remove(&tfd->tfd_sel.si_note, kn, 1); 273 mtx_unlock(&tfd->tfd_lock); 274 } 275 276 static int 277 filt_timerfdread(struct knote *kn, long hint) 278 { 279 struct timerfd *tfd = kn->kn_hook; 280 281 mtx_assert(&tfd->tfd_lock, MA_OWNED); 282 kn->kn_data = (int64_t)tfd->tfd_count; 283 return (tfd->tfd_count > 0); 284 } 285 286 static struct filterops timerfd_rfiltops = { 287 .f_isfd = 1, 288 .f_detach = filt_timerfddetach, 289 .f_event = filt_timerfdread, 290 }; 291 292 static int 293 timerfd_kqfilter(struct file *fp, struct knote *kn) 294 { 295 struct timerfd *tfd = fp->f_data; 296 297 if (kn->kn_filter != EVFILT_READ) 298 return (EINVAL); 299 300 kn->kn_fop = &timerfd_rfiltops; 301 kn->kn_hook = tfd; 302 knlist_add(&tfd->tfd_sel.si_note, kn, 0); 303 304 return (0); 305 } 306 307 static int 308 timerfd_stat(struct file *fp, struct stat *sb, struct ucred *active_cred) 309 { 310 struct timerfd *tfd = fp->f_data; 311 312 bzero(sb, sizeof(*sb)); 313 sb->st_nlink = fp->f_count - 1; 314 sb->st_uid = fp->f_cred->cr_uid; 315 sb->st_gid = fp->f_cred->cr_gid; 316 sb->st_blksize = PAGE_SIZE; 317 mtx_lock(&tfd->tfd_lock); 318 sb->st_atim = tfd->tfd_atim; 319 sb->st_mtim = tfd->tfd_mtim; 320 mtx_unlock(&tfd->tfd_lock); 321 sb->st_ctim = sb->st_mtim; 322 sb->st_ino = tfd->tfd_ino; 323 sb->st_birthtim = tfd->tfd_birthtim; 324 325 return (0); 326 } 327 328 static int 329 timerfd_close(struct file *fp, struct thread *td) 330 { 331 struct timerfd *tfd = fp->f_data; 332 333 mtx_lock(&timerfd_list_lock); 334 LIST_REMOVE(tfd, entry); 335 mtx_unlock(&timerfd_list_lock); 336 337 callout_drain(&tfd->tfd_callout); 338 seldrain(&tfd->tfd_sel); 339 knlist_destroy(&tfd->tfd_sel.si_note); 340 mtx_destroy(&tfd->tfd_lock); 341 free(tfd, M_TIMERFD); 342 fp->f_ops = &badfileops; 343 344 return (0); 345 } 346 347 static int 348 timerfd_fill_kinfo(struct file *fp, struct kinfo_file *kif, 349 struct filedesc *fdp) 350 { 351 struct timerfd *tfd = fp->f_data; 352 353 kif->kf_type = KF_TYPE_TIMERFD; 354 kif->kf_un.kf_timerfd.kf_timerfd_clockid = tfd->tfd_clockid; 355 kif->kf_un.kf_timerfd.kf_timerfd_flags = tfd->tfd_flags; 356 kif->kf_un.kf_timerfd.kf_timerfd_addr = (uintptr_t)tfd; 357 358 return (0); 359 } 360 361 static struct fileops timerfdops = { 362 .fo_read = timerfd_read, 363 .fo_write = invfo_rdwr, 364 .fo_truncate = invfo_truncate, 365 .fo_ioctl = timerfd_ioctl, 366 .fo_poll = timerfd_poll, 367 .fo_kqfilter = timerfd_kqfilter, 368 .fo_stat = timerfd_stat, 369 .fo_close = timerfd_close, 370 .fo_chmod = invfo_chmod, 371 .fo_chown = invfo_chown, 372 .fo_sendfile = invfo_sendfile, 373 .fo_fill_kinfo = timerfd_fill_kinfo, 374 .fo_cmp = file_kcmp_generic, 375 .fo_flags = DFLAG_PASSABLE, 376 }; 377 378 static void 379 timerfd_curval(struct timerfd *tfd, struct itimerspec *old_value) 380 { 381 struct timespec curr_value; 382 383 mtx_assert(&tfd->tfd_lock, MA_OWNED); 384 *old_value = tfd->tfd_time; 385 if (timespecisset(&tfd->tfd_time.it_value)) { 386 nanouptime(&curr_value); 387 timespecsub(&tfd->tfd_time.it_value, &curr_value, 388 &old_value->it_value); 389 } 390 } 391 392 static void 393 timerfd_expire(void *arg) 394 { 395 struct timerfd *tfd = (struct timerfd *)arg; 396 struct timespec uptime; 397 398 ++tfd->tfd_count; 399 tfd->tfd_expired = true; 400 if (timespecisset(&tfd->tfd_time.it_interval)) { 401 /* Count missed events. */ 402 nanouptime(&uptime); 403 if (timespeccmp(&uptime, &tfd->tfd_time.it_value, >)) { 404 timespecsub(&uptime, &tfd->tfd_time.it_value, &uptime); 405 tfd->tfd_count += tstosbt(uptime) / 406 tstosbt(tfd->tfd_time.it_interval); 407 } 408 timespecadd(&tfd->tfd_time.it_value, 409 &tfd->tfd_time.it_interval, &tfd->tfd_time.it_value); 410 callout_schedule_sbt(&tfd->tfd_callout, 411 tstosbt(tfd->tfd_time.it_value), 412 0, C_ABSOLUTE); 413 } else { 414 /* Single shot timer. */ 415 callout_deactivate(&tfd->tfd_callout); 416 timespecclear(&tfd->tfd_time.it_value); 417 } 418 419 wakeup(&tfd->tfd_count); 420 selwakeup(&tfd->tfd_sel); 421 KNOTE_LOCKED(&tfd->tfd_sel.si_note, 0); 422 } 423 424 int 425 kern_timerfd_create(struct thread *td, int clockid, int flags) 426 { 427 struct file *fp; 428 struct timerfd *tfd; 429 int error, fd, fflags; 430 431 AUDIT_ARG_VALUE(clockid); 432 AUDIT_ARG_FFLAGS(flags); 433 434 if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) 435 return (EINVAL); 436 if ((flags & ~(TFD_CLOEXEC | TFD_NONBLOCK)) != 0) 437 return (EINVAL); 438 439 fflags = FREAD; 440 if ((flags & TFD_CLOEXEC) != 0) 441 fflags |= O_CLOEXEC; 442 if ((flags & TFD_NONBLOCK) != 0) 443 fflags |= FNONBLOCK; 444 445 error = falloc(td, &fp, &fd, fflags); 446 if (error != 0) 447 return (error); 448 449 tfd = malloc(sizeof(*tfd), M_TIMERFD, M_WAITOK | M_ZERO); 450 tfd->tfd_clockid = (clockid_t)clockid; 451 tfd->tfd_flags = flags; 452 tfd->tfd_ino = alloc_unr64(&tfdino_unr); 453 mtx_init(&tfd->tfd_lock, "timerfd", NULL, MTX_DEF); 454 callout_init_mtx(&tfd->tfd_callout, &tfd->tfd_lock, 0); 455 knlist_init_mtx(&tfd->tfd_sel.si_note, &tfd->tfd_lock); 456 timerfd_getboottime(&tfd->tfd_boottim); 457 getnanotime(&tfd->tfd_birthtim); 458 mtx_lock(&timerfd_list_lock); 459 LIST_INSERT_HEAD(&timerfd_list, tfd, entry); 460 mtx_unlock(&timerfd_list_lock); 461 462 finit(fp, fflags, DTYPE_TIMERFD, tfd, &timerfdops); 463 464 fdrop(fp, td); 465 466 td->td_retval[0] = fd; 467 return (0); 468 } 469 470 int 471 kern_timerfd_gettime(struct thread *td, int fd, struct itimerspec *curr_value) 472 { 473 struct file *fp; 474 struct timerfd *tfd; 475 int error; 476 477 error = fget(td, fd, &cap_write_rights, &fp); 478 if (error != 0) 479 return (error); 480 if (fp->f_type != DTYPE_TIMERFD) { 481 fdrop(fp, td); 482 return (EINVAL); 483 } 484 tfd = fp->f_data; 485 486 mtx_lock(&tfd->tfd_lock); 487 timerfd_curval(tfd, curr_value); 488 mtx_unlock(&tfd->tfd_lock); 489 490 fdrop(fp, td); 491 return (0); 492 } 493 494 int 495 kern_timerfd_settime(struct thread *td, int fd, int flags, 496 const struct itimerspec *new_value, struct itimerspec *old_value) 497 { 498 struct file *fp; 499 struct timerfd *tfd; 500 struct timespec ts; 501 int error = 0; 502 503 if ((flags & ~(TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)) != 0) 504 return (EINVAL); 505 if (!timespecvalid_interval(&new_value->it_value) || 506 !timespecvalid_interval(&new_value->it_interval)) 507 return (EINVAL); 508 509 error = fget(td, fd, &cap_write_rights, &fp); 510 if (error != 0) 511 return (error); 512 if (fp->f_type != DTYPE_TIMERFD) { 513 fdrop(fp, td); 514 return (EINVAL); 515 } 516 tfd = fp->f_data; 517 518 mtx_lock(&tfd->tfd_lock); 519 getnanotime(&tfd->tfd_mtim); 520 tfd->tfd_timflags = flags; 521 522 /* Store old itimerspec, if applicable. */ 523 if (old_value != NULL) 524 timerfd_curval(tfd, old_value); 525 526 /* Set new expiration. */ 527 tfd->tfd_time = *new_value; 528 if (timespecisset(&tfd->tfd_time.it_value)) { 529 if ((flags & TFD_TIMER_ABSTIME) == 0) { 530 nanouptime(&ts); 531 timespecadd(&tfd->tfd_time.it_value, &ts, 532 &tfd->tfd_time.it_value); 533 } else if (tfd->tfd_clockid == CLOCK_REALTIME) { 534 /* ECANCELED if unread jump is pending. */ 535 if (tfd->tfd_jumped == TFD_CANCELED) 536 error = ECANCELED; 537 /* Convert from CLOCK_REALTIME to CLOCK_BOOTTIME. */ 538 timespecsub(&tfd->tfd_time.it_value, &tfd->tfd_boottim, 539 &tfd->tfd_time.it_value); 540 } 541 callout_reset_sbt(&tfd->tfd_callout, 542 tstosbt(tfd->tfd_time.it_value), 543 0, timerfd_expire, tfd, C_ABSOLUTE); 544 } else { 545 callout_stop(&tfd->tfd_callout); 546 } 547 tfd->tfd_count = 0; 548 tfd->tfd_expired = false; 549 tfd->tfd_jumped = TFD_NOJUMP; 550 mtx_unlock(&tfd->tfd_lock); 551 552 fdrop(fp, td); 553 return (error); 554 } 555 556 int 557 sys_timerfd_create(struct thread *td, struct timerfd_create_args *uap) 558 { 559 return (kern_timerfd_create(td, uap->clockid, uap->flags)); 560 } 561 562 int 563 sys_timerfd_gettime(struct thread *td, struct timerfd_gettime_args *uap) 564 { 565 struct itimerspec curr_value; 566 int error; 567 568 error = kern_timerfd_gettime(td, uap->fd, &curr_value); 569 if (error == 0) 570 error = copyout(&curr_value, uap->curr_value, 571 sizeof(curr_value)); 572 573 return (error); 574 } 575 576 int 577 sys_timerfd_settime(struct thread *td, struct timerfd_settime_args *uap) 578 { 579 struct itimerspec new_value, old_value; 580 int error; 581 582 error = copyin(uap->new_value, &new_value, sizeof(new_value)); 583 if (error != 0) 584 return (error); 585 if (uap->old_value == NULL) { 586 error = kern_timerfd_settime(td, uap->fd, uap->flags, 587 &new_value, NULL); 588 } else { 589 error = kern_timerfd_settime(td, uap->fd, uap->flags, 590 &new_value, &old_value); 591 if (error == 0) 592 error = copyout(&old_value, uap->old_value, 593 sizeof(old_value)); 594 } 595 return (error); 596 } 597