1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/timerfd.c 4 * 5 * Copyright (C) 2007 Davide Libenzi <davidel@xmailserver.org> 6 * 7 * 8 * Thanks to Thomas Gleixner for code reviews and useful comments. 9 * 10 */ 11 12 #include <linux/alarmtimer.h> 13 #include <linux/file.h> 14 #include <linux/poll.h> 15 #include <linux/init.h> 16 #include <linux/fs.h> 17 #include <linux/sched.h> 18 #include <linux/kernel.h> 19 #include <linux/slab.h> 20 #include <linux/list.h> 21 #include <linux/spinlock.h> 22 #include <linux/time.h> 23 #include <linux/hrtimer.h> 24 #include <linux/anon_inodes.h> 25 #include <linux/timerfd.h> 26 #include <linux/syscalls.h> 27 #include <linux/compat.h> 28 #include <linux/rcupdate.h> 29 #include <linux/time_namespace.h> 30 31 struct timerfd_ctx { 32 union { 33 struct hrtimer tmr; 34 struct alarm alarm; 35 } t; 36 ktime_t tintv; 37 ktime_t moffs; 38 wait_queue_head_t wqh; 39 u64 ticks; 40 int clockid; 41 short unsigned expired; 42 short unsigned settime_flags; /* to show in fdinfo */ 43 struct rcu_head rcu; 44 struct list_head clist; 45 spinlock_t cancel_lock; 46 bool might_cancel; 47 }; 48 49 static LIST_HEAD(cancel_list); 50 static DEFINE_SPINLOCK(cancel_lock); 51 52 static inline bool isalarm(struct timerfd_ctx *ctx) 53 { 54 return ctx->clockid == CLOCK_REALTIME_ALARM || 55 ctx->clockid == CLOCK_BOOTTIME_ALARM; 56 } 57 58 /* 59 * This gets called when the timer event triggers. We set the "expired" 60 * flag, but we do not re-arm the timer (in case it's necessary, 61 * tintv != 0) until the timer is accessed. 62 */ 63 static void timerfd_triggered(struct timerfd_ctx *ctx) 64 { 65 unsigned long flags; 66 67 spin_lock_irqsave(&ctx->wqh.lock, flags); 68 ctx->expired = 1; 69 ctx->ticks++; 70 wake_up_locked_poll(&ctx->wqh, EPOLLIN); 71 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 72 } 73 74 static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) 75 { 76 struct timerfd_ctx *ctx = container_of(htmr, struct timerfd_ctx, 77 t.tmr); 78 timerfd_triggered(ctx); 79 return HRTIMER_NORESTART; 80 } 81 82 static void timerfd_alarmproc(struct alarm *alarm, ktime_t now) 83 { 84 struct timerfd_ctx *ctx = container_of(alarm, struct timerfd_ctx, 85 t.alarm); 86 timerfd_triggered(ctx); 87 } 88 89 /* 90 * Called when the clock was set to cancel the timers in the cancel 91 * list. This will wake up processes waiting on these timers. The 92 * wake-up requires ctx->ticks to be non zero, therefore we increment 93 * it before calling wake_up_locked(). 94 */ 95 void timerfd_clock_was_set(void) 96 { 97 ktime_t moffs = ktime_mono_to_real(0); 98 struct timerfd_ctx *ctx; 99 unsigned long flags; 100 101 rcu_read_lock(); 102 list_for_each_entry_rcu(ctx, &cancel_list, clist) { 103 if (!ctx->might_cancel) 104 continue; 105 spin_lock_irqsave(&ctx->wqh.lock, flags); 106 if (ctx->moffs != moffs) { 107 ctx->moffs = KTIME_MAX; 108 ctx->ticks++; 109 wake_up_locked_poll(&ctx->wqh, EPOLLIN); 110 } 111 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 112 } 113 rcu_read_unlock(); 114 } 115 116 static void timerfd_resume_work(struct work_struct *work) 117 { 118 timerfd_clock_was_set(); 119 } 120 121 static DECLARE_WORK(timerfd_work, timerfd_resume_work); 122 123 /* 124 * Invoked from timekeeping_resume(). Defer the actual update to work so 125 * timerfd_clock_was_set() runs in task context. 126 */ 127 void timerfd_resume(void) 128 { 129 schedule_work(&timerfd_work); 130 } 131 132 static void __timerfd_remove_cancel(struct timerfd_ctx *ctx) 133 { 134 if (ctx->might_cancel) { 135 ctx->might_cancel = false; 136 spin_lock(&cancel_lock); 137 list_del_rcu(&ctx->clist); 138 spin_unlock(&cancel_lock); 139 } 140 } 141 142 static void timerfd_remove_cancel(struct timerfd_ctx *ctx) 143 { 144 spin_lock(&ctx->cancel_lock); 145 __timerfd_remove_cancel(ctx); 146 spin_unlock(&ctx->cancel_lock); 147 } 148 149 static bool timerfd_canceled(struct timerfd_ctx *ctx) 150 { 151 if (!ctx->might_cancel || ctx->moffs != KTIME_MAX) 152 return false; 153 ctx->moffs = ktime_mono_to_real(0); 154 return true; 155 } 156 157 static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) 158 { 159 spin_lock(&ctx->cancel_lock); 160 if ((ctx->clockid == CLOCK_REALTIME || 161 ctx->clockid == CLOCK_REALTIME_ALARM) && 162 (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { 163 if (!ctx->might_cancel) { 164 ctx->might_cancel = true; 165 spin_lock(&cancel_lock); 166 list_add_rcu(&ctx->clist, &cancel_list); 167 spin_unlock(&cancel_lock); 168 } 169 } else { 170 __timerfd_remove_cancel(ctx); 171 } 172 spin_unlock(&ctx->cancel_lock); 173 } 174 175 static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 176 { 177 ktime_t remaining; 178 179 if (isalarm(ctx)) 180 remaining = alarm_expires_remaining(&ctx->t.alarm); 181 else 182 remaining = hrtimer_expires_remaining_adjusted(&ctx->t.tmr); 183 184 return remaining < 0 ? 0: remaining; 185 } 186 187 static int timerfd_setup(struct timerfd_ctx *ctx, int flags, 188 const struct itimerspec64 *ktmr) 189 { 190 enum hrtimer_mode htmode; 191 ktime_t texp; 192 int clockid = ctx->clockid; 193 194 htmode = (flags & TFD_TIMER_ABSTIME) ? 195 HRTIMER_MODE_ABS: HRTIMER_MODE_REL; 196 197 texp = timespec64_to_ktime(ktmr->it_value); 198 ctx->expired = 0; 199 ctx->ticks = 0; 200 ctx->tintv = timespec64_to_ktime(ktmr->it_interval); 201 202 if (isalarm(ctx)) { 203 alarm_init(&ctx->t.alarm, 204 ctx->clockid == CLOCK_REALTIME_ALARM ? 205 ALARM_REALTIME : ALARM_BOOTTIME, 206 timerfd_alarmproc); 207 } else { 208 hrtimer_init(&ctx->t.tmr, clockid, htmode); 209 hrtimer_set_expires(&ctx->t.tmr, texp); 210 ctx->t.tmr.function = timerfd_tmrproc; 211 } 212 213 if (texp != 0) { 214 if (flags & TFD_TIMER_ABSTIME) 215 texp = timens_ktime_to_host(clockid, texp); 216 if (isalarm(ctx)) { 217 if (flags & TFD_TIMER_ABSTIME) 218 alarm_start(&ctx->t.alarm, texp); 219 else 220 alarm_start_relative(&ctx->t.alarm, texp); 221 } else { 222 hrtimer_start(&ctx->t.tmr, texp, htmode); 223 } 224 225 if (timerfd_canceled(ctx)) 226 return -ECANCELED; 227 } 228 229 ctx->settime_flags = flags & TFD_SETTIME_FLAGS; 230 return 0; 231 } 232 233 static int timerfd_release(struct inode *inode, struct file *file) 234 { 235 struct timerfd_ctx *ctx = file->private_data; 236 237 timerfd_remove_cancel(ctx); 238 239 if (isalarm(ctx)) 240 alarm_cancel(&ctx->t.alarm); 241 else 242 hrtimer_cancel(&ctx->t.tmr); 243 kfree_rcu(ctx, rcu); 244 return 0; 245 } 246 247 static __poll_t timerfd_poll(struct file *file, poll_table *wait) 248 { 249 struct timerfd_ctx *ctx = file->private_data; 250 __poll_t events = 0; 251 unsigned long flags; 252 253 poll_wait(file, &ctx->wqh, wait); 254 255 spin_lock_irqsave(&ctx->wqh.lock, flags); 256 if (ctx->ticks) 257 events |= EPOLLIN; 258 spin_unlock_irqrestore(&ctx->wqh.lock, flags); 259 260 return events; 261 } 262 263 static ssize_t timerfd_read_iter(struct kiocb *iocb, struct iov_iter *to) 264 { 265 struct file *file = iocb->ki_filp; 266 struct timerfd_ctx *ctx = file->private_data; 267 ssize_t res; 268 u64 ticks = 0; 269 270 if (iov_iter_count(to) < sizeof(ticks)) 271 return -EINVAL; 272 273 spin_lock_irq(&ctx->wqh.lock); 274 if (file->f_flags & O_NONBLOCK || iocb->ki_flags & IOCB_NOWAIT) 275 res = -EAGAIN; 276 else 277 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); 278 279 /* 280 * If clock has changed, we do not care about the 281 * ticks and we do not rearm the timer. Userspace must 282 * reevaluate anyway. 283 */ 284 if (timerfd_canceled(ctx)) { 285 ctx->ticks = 0; 286 ctx->expired = 0; 287 res = -ECANCELED; 288 } 289 290 if (ctx->ticks) { 291 ticks = ctx->ticks; 292 293 if (ctx->expired && ctx->tintv) { 294 /* 295 * If tintv != 0, this is a periodic timer that 296 * needs to be re-armed. We avoid doing it in the timer 297 * callback to avoid DoS attacks specifying a very 298 * short timer period. 299 */ 300 if (isalarm(ctx)) { 301 ticks += alarm_forward_now( 302 &ctx->t.alarm, ctx->tintv) - 1; 303 alarm_restart(&ctx->t.alarm); 304 } else { 305 ticks += hrtimer_forward_now(&ctx->t.tmr, 306 ctx->tintv) - 1; 307 hrtimer_restart(&ctx->t.tmr); 308 } 309 } 310 ctx->expired = 0; 311 ctx->ticks = 0; 312 } 313 spin_unlock_irq(&ctx->wqh.lock); 314 if (ticks) { 315 res = copy_to_iter(&ticks, sizeof(ticks), to); 316 if (!res) 317 res = -EFAULT; 318 } 319 return res; 320 } 321 322 #ifdef CONFIG_PROC_FS 323 static void timerfd_show(struct seq_file *m, struct file *file) 324 { 325 struct timerfd_ctx *ctx = file->private_data; 326 struct timespec64 value, interval; 327 328 spin_lock_irq(&ctx->wqh.lock); 329 value = ktime_to_timespec64(timerfd_get_remaining(ctx)); 330 interval = ktime_to_timespec64(ctx->tintv); 331 spin_unlock_irq(&ctx->wqh.lock); 332 333 seq_printf(m, 334 "clockid: %d\n" 335 "ticks: %llu\n" 336 "settime flags: 0%o\n" 337 "it_value: (%llu, %llu)\n" 338 "it_interval: (%llu, %llu)\n", 339 ctx->clockid, 340 (unsigned long long)ctx->ticks, 341 ctx->settime_flags, 342 (unsigned long long)value.tv_sec, 343 (unsigned long long)value.tv_nsec, 344 (unsigned long long)interval.tv_sec, 345 (unsigned long long)interval.tv_nsec); 346 } 347 #else 348 #define timerfd_show NULL 349 #endif 350 351 #ifdef CONFIG_CHECKPOINT_RESTORE 352 static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg) 353 { 354 struct timerfd_ctx *ctx = file->private_data; 355 int ret = 0; 356 357 switch (cmd) { 358 case TFD_IOC_SET_TICKS: { 359 u64 ticks; 360 361 if (copy_from_user(&ticks, (u64 __user *)arg, sizeof(ticks))) 362 return -EFAULT; 363 if (!ticks) 364 return -EINVAL; 365 366 spin_lock_irq(&ctx->wqh.lock); 367 if (!timerfd_canceled(ctx)) { 368 ctx->ticks = ticks; 369 wake_up_locked_poll(&ctx->wqh, EPOLLIN); 370 } else 371 ret = -ECANCELED; 372 spin_unlock_irq(&ctx->wqh.lock); 373 break; 374 } 375 default: 376 ret = -ENOTTY; 377 break; 378 } 379 380 return ret; 381 } 382 #else 383 #define timerfd_ioctl NULL 384 #endif 385 386 static const struct file_operations timerfd_fops = { 387 .release = timerfd_release, 388 .poll = timerfd_poll, 389 .read_iter = timerfd_read_iter, 390 .llseek = noop_llseek, 391 .show_fdinfo = timerfd_show, 392 .unlocked_ioctl = timerfd_ioctl, 393 }; 394 395 SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) 396 { 397 int ufd; 398 struct timerfd_ctx *ctx; 399 struct file *file; 400 401 /* Check the TFD_* constants for consistency. */ 402 BUILD_BUG_ON(TFD_CLOEXEC != O_CLOEXEC); 403 BUILD_BUG_ON(TFD_NONBLOCK != O_NONBLOCK); 404 405 if ((flags & ~TFD_CREATE_FLAGS) || 406 (clockid != CLOCK_MONOTONIC && 407 clockid != CLOCK_REALTIME && 408 clockid != CLOCK_REALTIME_ALARM && 409 clockid != CLOCK_BOOTTIME && 410 clockid != CLOCK_BOOTTIME_ALARM)) 411 return -EINVAL; 412 413 if ((clockid == CLOCK_REALTIME_ALARM || 414 clockid == CLOCK_BOOTTIME_ALARM) && 415 !capable(CAP_WAKE_ALARM)) 416 return -EPERM; 417 418 ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); 419 if (!ctx) 420 return -ENOMEM; 421 422 init_waitqueue_head(&ctx->wqh); 423 spin_lock_init(&ctx->cancel_lock); 424 ctx->clockid = clockid; 425 426 if (isalarm(ctx)) 427 alarm_init(&ctx->t.alarm, 428 ctx->clockid == CLOCK_REALTIME_ALARM ? 429 ALARM_REALTIME : ALARM_BOOTTIME, 430 timerfd_alarmproc); 431 else 432 hrtimer_init(&ctx->t.tmr, clockid, HRTIMER_MODE_ABS); 433 434 ctx->moffs = ktime_mono_to_real(0); 435 436 ufd = get_unused_fd_flags(flags & TFD_SHARED_FCNTL_FLAGS); 437 if (ufd < 0) { 438 kfree(ctx); 439 return ufd; 440 } 441 442 file = anon_inode_getfile("[timerfd]", &timerfd_fops, ctx, 443 O_RDWR | (flags & TFD_SHARED_FCNTL_FLAGS)); 444 if (IS_ERR(file)) { 445 put_unused_fd(ufd); 446 kfree(ctx); 447 return PTR_ERR(file); 448 } 449 450 file->f_mode |= FMODE_NOWAIT; 451 fd_install(ufd, file); 452 return ufd; 453 } 454 455 static int do_timerfd_settime(int ufd, int flags, 456 const struct itimerspec64 *new, 457 struct itimerspec64 *old) 458 { 459 struct timerfd_ctx *ctx; 460 int ret; 461 462 if ((flags & ~TFD_SETTIME_FLAGS) || 463 !itimerspec64_valid(new)) 464 return -EINVAL; 465 466 CLASS(fd, f)(ufd); 467 if (fd_empty(f)) 468 return -EBADF; 469 470 if (fd_file(f)->f_op != &timerfd_fops) 471 return -EINVAL; 472 473 ctx = fd_file(f)->private_data; 474 475 if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) 476 return -EPERM; 477 478 timerfd_setup_cancel(ctx, flags); 479 480 /* 481 * We need to stop the existing timer before reprogramming 482 * it to the new values. 483 */ 484 for (;;) { 485 spin_lock_irq(&ctx->wqh.lock); 486 487 if (isalarm(ctx)) { 488 if (alarm_try_to_cancel(&ctx->t.alarm) >= 0) 489 break; 490 } else { 491 if (hrtimer_try_to_cancel(&ctx->t.tmr) >= 0) 492 break; 493 } 494 spin_unlock_irq(&ctx->wqh.lock); 495 496 if (isalarm(ctx)) 497 hrtimer_cancel_wait_running(&ctx->t.alarm.timer); 498 else 499 hrtimer_cancel_wait_running(&ctx->t.tmr); 500 } 501 502 /* 503 * If the timer is expired and it's periodic, we need to advance it 504 * because the caller may want to know the previous expiration time. 505 * We do not update "ticks" and "expired" since the timer will be 506 * re-programmed again in the following timerfd_setup() call. 507 */ 508 if (ctx->expired && ctx->tintv) { 509 if (isalarm(ctx)) 510 alarm_forward_now(&ctx->t.alarm, ctx->tintv); 511 else 512 hrtimer_forward_now(&ctx->t.tmr, ctx->tintv); 513 } 514 515 old->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx)); 516 old->it_interval = ktime_to_timespec64(ctx->tintv); 517 518 /* 519 * Re-program the timer to the new value ... 520 */ 521 ret = timerfd_setup(ctx, flags, new); 522 523 spin_unlock_irq(&ctx->wqh.lock); 524 return ret; 525 } 526 527 static int do_timerfd_gettime(int ufd, struct itimerspec64 *t) 528 { 529 struct timerfd_ctx *ctx; 530 CLASS(fd, f)(ufd); 531 532 if (fd_empty(f)) 533 return -EBADF; 534 if (fd_file(f)->f_op != &timerfd_fops) 535 return -EINVAL; 536 ctx = fd_file(f)->private_data; 537 538 spin_lock_irq(&ctx->wqh.lock); 539 if (ctx->expired && ctx->tintv) { 540 ctx->expired = 0; 541 542 if (isalarm(ctx)) { 543 ctx->ticks += 544 alarm_forward_now( 545 &ctx->t.alarm, ctx->tintv) - 1; 546 alarm_restart(&ctx->t.alarm); 547 } else { 548 ctx->ticks += 549 hrtimer_forward_now(&ctx->t.tmr, ctx->tintv) 550 - 1; 551 hrtimer_restart(&ctx->t.tmr); 552 } 553 } 554 t->it_value = ktime_to_timespec64(timerfd_get_remaining(ctx)); 555 t->it_interval = ktime_to_timespec64(ctx->tintv); 556 spin_unlock_irq(&ctx->wqh.lock); 557 return 0; 558 } 559 560 SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, 561 const struct __kernel_itimerspec __user *, utmr, 562 struct __kernel_itimerspec __user *, otmr) 563 { 564 struct itimerspec64 new, old; 565 int ret; 566 567 if (get_itimerspec64(&new, utmr)) 568 return -EFAULT; 569 ret = do_timerfd_settime(ufd, flags, &new, &old); 570 if (ret) 571 return ret; 572 if (otmr && put_itimerspec64(&old, otmr)) 573 return -EFAULT; 574 575 return ret; 576 } 577 578 SYSCALL_DEFINE2(timerfd_gettime, int, ufd, struct __kernel_itimerspec __user *, otmr) 579 { 580 struct itimerspec64 kotmr; 581 int ret = do_timerfd_gettime(ufd, &kotmr); 582 if (ret) 583 return ret; 584 return put_itimerspec64(&kotmr, otmr) ? -EFAULT : 0; 585 } 586 587 #ifdef CONFIG_COMPAT_32BIT_TIME 588 SYSCALL_DEFINE4(timerfd_settime32, int, ufd, int, flags, 589 const struct old_itimerspec32 __user *, utmr, 590 struct old_itimerspec32 __user *, otmr) 591 { 592 struct itimerspec64 new, old; 593 int ret; 594 595 if (get_old_itimerspec32(&new, utmr)) 596 return -EFAULT; 597 ret = do_timerfd_settime(ufd, flags, &new, &old); 598 if (ret) 599 return ret; 600 if (otmr && put_old_itimerspec32(&old, otmr)) 601 return -EFAULT; 602 return ret; 603 } 604 605 SYSCALL_DEFINE2(timerfd_gettime32, int, ufd, 606 struct old_itimerspec32 __user *, otmr) 607 { 608 struct itimerspec64 kotmr; 609 int ret = do_timerfd_gettime(ufd, &kotmr); 610 if (ret) 611 return ret; 612 return put_old_itimerspec32(&kotmr, otmr) ? -EFAULT : 0; 613 } 614 #endif 615