1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org> 4 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 5 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner 6 * 7 * High-resolution kernel timers 8 * 9 * In contrast to the low-resolution timeout API, aka timer wheel, 10 * hrtimers provide finer resolution and accuracy depending on system 11 * configuration and capabilities. 12 * 13 * Started by: Thomas Gleixner and Ingo Molnar 14 * 15 * Credits: 16 * Based on the original timer wheel code 17 * 18 * Help, testing, suggestions, bugfixes, improvements were 19 * provided by: 20 * 21 * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel 22 * et. al. 23 */ 24 25 #include <linux/cpu.h> 26 #include <linux/export.h> 27 #include <linux/percpu.h> 28 #include <linux/hrtimer.h> 29 #include <linux/notifier.h> 30 #include <linux/syscalls.h> 31 #include <linux/interrupt.h> 32 #include <linux/tick.h> 33 #include <linux/err.h> 34 #include <linux/debugobjects.h> 35 #include <linux/sched/signal.h> 36 #include <linux/sched/sysctl.h> 37 #include <linux/sched/rt.h> 38 #include <linux/sched/deadline.h> 39 #include <linux/sched/nohz.h> 40 #include <linux/sched/debug.h> 41 #include <linux/sched/isolation.h> 42 #include <linux/timer.h> 43 #include <linux/freezer.h> 44 #include <linux/compat.h> 45 46 #include <linux/uaccess.h> 47 48 #include <trace/events/timer.h> 49 50 #include "tick-internal.h" 51 52 /* 53 * Constants to set the queued state of the timer (INACTIVE, ENQUEUED) 54 * 55 * The callback state is kept separate in the CPU base because having it in 56 * the timer would required touching the timer after the callback, which 57 * makes it impossible to free the timer from the callback function. 58 * 59 * Therefore we track the callback state in: 60 * 61 * timer->base->cpu_base->running == timer 62 * 63 * On SMP it is possible to have a "callback function running and enqueued" 64 * status. It happens for example when a posix timer expired and the callback 65 * queued a signal. Between dropping the lock which protects the posix timer 66 * and reacquiring the base lock of the hrtimer, another CPU can deliver the 67 * signal and rearm the timer. 68 * 69 * All state transitions are protected by cpu_base->lock. 70 */ 71 #define HRTIMER_STATE_INACTIVE false 72 #define HRTIMER_STATE_ENQUEUED true 73 74 /* 75 * The resolution of the clocks. The resolution value is returned in 76 * the clock_getres() system call to give application programmers an 77 * idea of the (in)accuracy of timers. Timer values are rounded up to 78 * this resolution values. 79 */ 80 #define HIGH_RES_NSEC 1 81 82 /* 83 * Masks for selecting the soft and hard context timers from 84 * cpu_base->active 85 */ 86 #define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT) 87 #define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1) 88 #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) 89 #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) 90 91 static void retrigger_next_event(void *arg); 92 static ktime_t __hrtimer_cb_get_time(clockid_t clock_id); 93 94 /* 95 * The timer bases: 96 * 97 * There are more clockids than hrtimer bases. Thus, we index 98 * into the timer bases by the hrtimer_base_type enum. When trying 99 * to reach a base using a clockid, hrtimer_clockid_to_base() 100 * is used to convert from clockid to the proper hrtimer_base_type. 101 */ 102 103 #define BASE_INIT(idx, cid) \ 104 [idx] = { .index = idx, .clockid = cid } 105 106 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 107 { 108 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), 109 .clock_base = { 110 BASE_INIT(HRTIMER_BASE_MONOTONIC, CLOCK_MONOTONIC), 111 BASE_INIT(HRTIMER_BASE_REALTIME, CLOCK_REALTIME), 112 BASE_INIT(HRTIMER_BASE_BOOTTIME, CLOCK_BOOTTIME), 113 BASE_INIT(HRTIMER_BASE_TAI, CLOCK_TAI), 114 BASE_INIT(HRTIMER_BASE_MONOTONIC_SOFT, CLOCK_MONOTONIC), 115 BASE_INIT(HRTIMER_BASE_REALTIME_SOFT, CLOCK_REALTIME), 116 BASE_INIT(HRTIMER_BASE_BOOTTIME_SOFT, CLOCK_BOOTTIME), 117 BASE_INIT(HRTIMER_BASE_TAI_SOFT, CLOCK_TAI), 118 }, 119 .csd = CSD_INIT(retrigger_next_event, NULL) 120 }; 121 122 static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base) 123 { 124 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) 125 return true; 126 else 127 return likely(base->online); 128 } 129 130 #ifdef CONFIG_HIGH_RES_TIMERS 131 DEFINE_STATIC_KEY_FALSE(hrtimer_highres_enabled_key); 132 133 static void hrtimer_hres_workfn(struct work_struct *work) 134 { 135 static_branch_enable(&hrtimer_highres_enabled_key); 136 } 137 138 static DECLARE_WORK(hrtimer_hres_work, hrtimer_hres_workfn); 139 140 static inline void hrtimer_schedule_hres_work(void) 141 { 142 if (!hrtimer_highres_enabled()) 143 schedule_work(&hrtimer_hres_work); 144 } 145 #else 146 static inline void hrtimer_schedule_hres_work(void) { } 147 #endif 148 149 /* 150 * Functions and macros which are different for UP/SMP systems are kept in a 151 * single place 152 */ 153 #ifdef CONFIG_SMP 154 /* 155 * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base() 156 * such that hrtimer_callback_running() can unconditionally dereference 157 * timer->base->cpu_base 158 */ 159 static struct hrtimer_cpu_base migration_cpu_base = { 160 .clock_base = { 161 [0] = { 162 .cpu_base = &migration_cpu_base, 163 .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq, 164 &migration_cpu_base.lock), 165 }, 166 }, 167 }; 168 169 #define migration_base migration_cpu_base.clock_base[0] 170 171 /* 172 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock 173 * means that all timers which are tied to this base via timer->base are 174 * locked, and the base itself is locked too. 175 * 176 * So __run_timers/migrate_timers can safely modify all timers which could 177 * be found on the lists/queues. 178 * 179 * When the timer's base is locked, and the timer removed from list, it is 180 * possible to set timer->base = &migration_base and drop the lock: the timer 181 * remains locked. 182 */ 183 static struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, 184 unsigned long *flags) 185 __acquires(&timer->base->lock) 186 { 187 for (;;) { 188 struct hrtimer_clock_base *base = READ_ONCE(timer->base); 189 190 if (likely(base != &migration_base)) { 191 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 192 if (likely(base == timer->base)) 193 return base; 194 /* The timer has migrated to another CPU: */ 195 raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags); 196 } 197 cpu_relax(); 198 } 199 } 200 201 /* 202 * Check if the elected target is suitable considering its next 203 * event and the hotplug state of the current CPU. 204 * 205 * If the elected target is remote and its next event is after the timer 206 * to queue, then a remote reprogram is necessary. However there is no 207 * guarantee the IPI handling the operation would arrive in time to meet 208 * the high resolution deadline. In this case the local CPU becomes a 209 * preferred target, unless it is offline. 210 * 211 * High and low resolution modes are handled the same way for simplicity. 212 * 213 * Called with cpu_base->lock of target cpu held. 214 */ 215 static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base, 216 struct hrtimer_cpu_base *new_cpu_base, 217 struct hrtimer_cpu_base *this_cpu_base) 218 { 219 ktime_t expires; 220 221 /* 222 * The local CPU clockevent can be reprogrammed. Also get_target_base() 223 * guarantees it is online. 224 */ 225 if (new_cpu_base == this_cpu_base) 226 return true; 227 228 /* 229 * The offline local CPU can't be the default target if the 230 * next remote target event is after this timer. Keep the 231 * elected new base. An IPI will be issued to reprogram 232 * it as a last resort. 233 */ 234 if (!hrtimer_base_is_online(this_cpu_base)) 235 return true; 236 237 expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); 238 239 return expires >= new_base->cpu_base->expires_next; 240 } 241 242 static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, bool pinned) 243 { 244 if (!hrtimer_base_is_online(base)) { 245 int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER)); 246 247 return &per_cpu(hrtimer_bases, cpu); 248 } 249 250 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 251 if (static_branch_likely(&timers_migration_enabled) && !pinned) 252 return &per_cpu(hrtimer_bases, get_nohz_timer_target()); 253 #endif 254 return base; 255 } 256 257 /* 258 * We switch the timer base to a power-optimized selected CPU target, 259 * if: 260 * - NO_HZ_COMMON is enabled 261 * - timer migration is enabled 262 * - the timer callback is not running 263 * - the timer is not the first expiring timer on the new target 264 * 265 * If one of the above requirements is not fulfilled we move the timer 266 * to the current CPU or leave it on the previously assigned CPU if 267 * the timer callback is currently running. 268 */ 269 static inline struct hrtimer_clock_base * 270 switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, bool pinned) 271 { 272 struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base; 273 struct hrtimer_clock_base *new_base; 274 int basenum = base->index; 275 276 this_cpu_base = this_cpu_ptr(&hrtimer_bases); 277 new_cpu_base = get_target_base(this_cpu_base, pinned); 278 again: 279 new_base = &new_cpu_base->clock_base[basenum]; 280 281 if (base != new_base) { 282 /* 283 * We are trying to move timer to new_base. However we can't 284 * change timer's base while it is running, so we keep it on 285 * the same CPU. No hassle vs. reprogramming the event source 286 * in the high resolution case. The remote CPU will take care 287 * of this when the timer function has completed. There is no 288 * conflict as we hold the lock until the timer is enqueued. 289 */ 290 if (unlikely(hrtimer_callback_running(timer))) 291 return base; 292 293 /* See the comment in lock_hrtimer_base() */ 294 WRITE_ONCE(timer->base, &migration_base); 295 raw_spin_unlock(&base->cpu_base->lock); 296 raw_spin_lock(&new_base->cpu_base->lock); 297 298 if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { 299 raw_spin_unlock(&new_base->cpu_base->lock); 300 raw_spin_lock(&base->cpu_base->lock); 301 new_cpu_base = this_cpu_base; 302 WRITE_ONCE(timer->base, base); 303 goto again; 304 } 305 WRITE_ONCE(timer->base, new_base); 306 } else { 307 if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { 308 new_cpu_base = this_cpu_base; 309 goto again; 310 } 311 } 312 return new_base; 313 } 314 315 #else /* CONFIG_SMP */ 316 317 static inline struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, 318 unsigned long *flags) 319 __acquires(&timer->base->cpu_base->lock) 320 { 321 struct hrtimer_clock_base *base = timer->base; 322 323 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 324 return base; 325 } 326 327 # define switch_hrtimer_base(t, b, p) (b) 328 329 #endif /* !CONFIG_SMP */ 330 331 /* 332 * Functions for the union type storage format of ktime_t which are 333 * too large for inlining: 334 */ 335 #if BITS_PER_LONG < 64 336 /* 337 * Divide a ktime value by a nanosecond value 338 */ 339 s64 __ktime_divns(const ktime_t kt, s64 div) 340 { 341 int sft = 0; 342 s64 dclc; 343 u64 tmp; 344 345 dclc = ktime_to_ns(kt); 346 tmp = dclc < 0 ? -dclc : dclc; 347 348 /* Make sure the divisor is less than 2^32: */ 349 while (div >> 32) { 350 sft++; 351 div >>= 1; 352 } 353 tmp >>= sft; 354 do_div(tmp, (u32) div); 355 return dclc < 0 ? -tmp : tmp; 356 } 357 EXPORT_SYMBOL_GPL(__ktime_divns); 358 #endif /* BITS_PER_LONG < 64 */ 359 360 /* 361 * Add two ktime values and do a safety check for overflow: 362 */ 363 ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) 364 { 365 ktime_t res = ktime_add_unsafe(lhs, rhs); 366 367 /* 368 * We use KTIME_SEC_MAX here, the maximum timeout which we can 369 * return to user space in a timespec: 370 */ 371 if (res < 0 || res < lhs || res < rhs) 372 res = ktime_set(KTIME_SEC_MAX, 0); 373 374 return res; 375 } 376 377 EXPORT_SYMBOL_GPL(ktime_add_safe); 378 379 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS 380 381 static const struct debug_obj_descr hrtimer_debug_descr; 382 383 static void *hrtimer_debug_hint(void *addr) 384 { 385 return ACCESS_PRIVATE((struct hrtimer *)addr, function); 386 } 387 388 /* 389 * fixup_init is called when: 390 * - an active object is initialized 391 */ 392 static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state) 393 { 394 struct hrtimer *timer = addr; 395 396 switch (state) { 397 case ODEBUG_STATE_ACTIVE: 398 hrtimer_cancel(timer); 399 debug_object_init(timer, &hrtimer_debug_descr); 400 return true; 401 default: 402 return false; 403 } 404 } 405 406 /* 407 * fixup_activate is called when: 408 * - an active object is activated 409 * - an unknown non-static object is activated 410 */ 411 static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state) 412 { 413 switch (state) { 414 case ODEBUG_STATE_ACTIVE: 415 WARN_ON(1); 416 fallthrough; 417 default: 418 return false; 419 } 420 } 421 422 /* 423 * fixup_free is called when: 424 * - an active object is freed 425 */ 426 static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state) 427 { 428 struct hrtimer *timer = addr; 429 430 switch (state) { 431 case ODEBUG_STATE_ACTIVE: 432 hrtimer_cancel(timer); 433 debug_object_free(timer, &hrtimer_debug_descr); 434 return true; 435 default: 436 return false; 437 } 438 } 439 440 /* Stub timer callback for improperly used timers. */ 441 static enum hrtimer_restart stub_timer(struct hrtimer *unused) 442 { 443 WARN_ON_ONCE(1); 444 return HRTIMER_NORESTART; 445 } 446 447 /* 448 * hrtimer_fixup_assert_init is called when: 449 * - an untracked/uninit-ed object is found 450 */ 451 static bool hrtimer_fixup_assert_init(void *addr, enum debug_obj_state state) 452 { 453 struct hrtimer *timer = addr; 454 455 switch (state) { 456 case ODEBUG_STATE_NOTAVAILABLE: 457 hrtimer_setup(timer, stub_timer, CLOCK_MONOTONIC, 0); 458 return true; 459 default: 460 return false; 461 } 462 } 463 464 static const struct debug_obj_descr hrtimer_debug_descr = { 465 .name = "hrtimer", 466 .debug_hint = hrtimer_debug_hint, 467 .fixup_init = hrtimer_fixup_init, 468 .fixup_activate = hrtimer_fixup_activate, 469 .fixup_free = hrtimer_fixup_free, 470 .fixup_assert_init = hrtimer_fixup_assert_init, 471 }; 472 473 static inline void debug_hrtimer_init(struct hrtimer *timer) 474 { 475 debug_object_init(timer, &hrtimer_debug_descr); 476 } 477 478 static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) 479 { 480 debug_object_init_on_stack(timer, &hrtimer_debug_descr); 481 } 482 483 static inline void debug_hrtimer_activate(struct hrtimer *timer, enum hrtimer_mode mode) 484 { 485 debug_object_activate(timer, &hrtimer_debug_descr); 486 } 487 488 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) 489 { 490 debug_object_deactivate(timer, &hrtimer_debug_descr); 491 } 492 493 static inline void debug_hrtimer_assert_init(struct hrtimer *timer) 494 { 495 debug_object_assert_init(timer, &hrtimer_debug_descr); 496 } 497 498 void destroy_hrtimer_on_stack(struct hrtimer *timer) 499 { 500 debug_object_free(timer, &hrtimer_debug_descr); 501 } 502 EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); 503 504 #else 505 506 static inline void debug_hrtimer_init(struct hrtimer *timer) { } 507 static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) { } 508 static inline void debug_hrtimer_activate(struct hrtimer *timer, enum hrtimer_mode mode) { } 509 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } 510 static inline void debug_hrtimer_assert_init(struct hrtimer *timer) { } 511 #endif 512 513 static inline void debug_setup(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode) 514 { 515 debug_hrtimer_init(timer); 516 trace_hrtimer_setup(timer, clockid, mode); 517 } 518 519 static inline void debug_setup_on_stack(struct hrtimer *timer, clockid_t clockid, 520 enum hrtimer_mode mode) 521 { 522 debug_hrtimer_init_on_stack(timer); 523 trace_hrtimer_setup(timer, clockid, mode); 524 } 525 526 static inline void debug_activate(struct hrtimer *timer, enum hrtimer_mode mode, bool was_armed) 527 { 528 debug_hrtimer_activate(timer, mode); 529 trace_hrtimer_start(timer, mode, was_armed); 530 } 531 532 #define for_each_active_base(base, cpu_base, active) \ 533 for (unsigned int idx = ffs(active); idx--; idx = ffs((active))) \ 534 for (bool done = false; !done; active &= ~(1U << idx)) \ 535 for (base = &cpu_base->clock_base[idx]; !done; done = true) 536 537 #define hrtimer_from_timerqueue_node(_n) container_of_const(_n, struct hrtimer, node) 538 539 #if defined(CONFIG_NO_HZ_COMMON) 540 /* 541 * Same as hrtimer_bases_next_event() below, but skips the excluded timer and 542 * does not update cpu_base->next_timer/expires. 543 */ 544 static ktime_t hrtimer_bases_next_event_without(struct hrtimer_cpu_base *cpu_base, 545 const struct hrtimer *exclude, 546 unsigned int active, ktime_t expires_next) 547 { 548 struct hrtimer_clock_base *base; 549 ktime_t expires; 550 551 lockdep_assert_held(&cpu_base->lock); 552 553 for_each_active_base(base, cpu_base, active) { 554 expires = ktime_sub(base->expires_next, base->offset); 555 if (expires >= expires_next) 556 continue; 557 558 /* 559 * If the excluded timer is the first on this base evaluate the 560 * next timer. 561 */ 562 struct timerqueue_linked_node *node = timerqueue_linked_first(&base->active); 563 564 if (unlikely(&exclude->node == node)) { 565 node = timerqueue_linked_next(node); 566 if (!node) 567 continue; 568 expires = ktime_sub(node->expires, base->offset); 569 if (expires >= expires_next) 570 continue; 571 } 572 expires_next = expires; 573 } 574 /* If base->offset changed, the result might be negative */ 575 return max(expires_next, 0); 576 } 577 #endif 578 579 static __always_inline struct hrtimer *clock_base_next_timer(struct hrtimer_clock_base *base) 580 { 581 struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active); 582 583 return hrtimer_from_timerqueue_node(next); 584 } 585 586 /* Find the base with the earliest expiry */ 587 static void hrtimer_bases_first(struct hrtimer_cpu_base *cpu_base,unsigned int active, 588 ktime_t *expires_next, struct hrtimer **next_timer) 589 { 590 struct hrtimer_clock_base *base; 591 ktime_t expires; 592 593 for_each_active_base(base, cpu_base, active) { 594 expires = ktime_sub(base->expires_next, base->offset); 595 if (expires < *expires_next) { 596 *expires_next = expires; 597 *next_timer = clock_base_next_timer(base); 598 } 599 } 600 } 601 602 /* 603 * Recomputes cpu_base::*next_timer and returns the earliest expires_next 604 * but does not set cpu_base::*expires_next, that is done by 605 * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating 606 * cpu_base::*expires_next right away, reprogramming logic would no longer 607 * work. 608 * 609 * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, 610 * those timers will get run whenever the softirq gets handled, at the end of 611 * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. 612 * 613 * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. 614 * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual 615 * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. 616 * 617 * @active_mask must be one of: 618 * - HRTIMER_ACTIVE_ALL, 619 * - HRTIMER_ACTIVE_SOFT, or 620 * - HRTIMER_ACTIVE_HARD. 621 */ 622 static ktime_t __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) 623 { 624 struct hrtimer *next_timer = NULL; 625 ktime_t expires_next = KTIME_MAX; 626 unsigned int active; 627 628 lockdep_assert_held(&cpu_base->lock); 629 630 if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { 631 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; 632 if (active) 633 hrtimer_bases_first(cpu_base, active, &expires_next, &next_timer); 634 cpu_base->softirq_next_timer = next_timer; 635 } 636 637 if (active_mask & HRTIMER_ACTIVE_HARD) { 638 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; 639 if (active) 640 hrtimer_bases_first(cpu_base, active, &expires_next, &next_timer); 641 cpu_base->next_timer = next_timer; 642 } 643 return max(expires_next, 0); 644 } 645 646 static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base) 647 { 648 ktime_t expires_next, soft = KTIME_MAX; 649 650 /* 651 * If the soft interrupt has already been activated, ignore the 652 * soft bases. They will be handled in the already raised soft 653 * interrupt. 654 */ 655 if (!cpu_base->softirq_activated) { 656 soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); 657 /* 658 * Update the soft expiry time. clock_settime() might have 659 * affected it. 660 */ 661 cpu_base->softirq_expires_next = soft; 662 } 663 664 expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); 665 /* 666 * If a softirq timer is expiring first, update cpu_base->next_timer 667 * and program the hardware with the soft expiry time. 668 */ 669 if (expires_next > soft) { 670 cpu_base->next_timer = cpu_base->softirq_next_timer; 671 expires_next = soft; 672 } 673 674 return expires_next; 675 } 676 677 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) 678 { 679 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; 680 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; 681 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; 682 683 ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, offs_real, 684 offs_boot, offs_tai); 685 686 base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; 687 base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; 688 base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; 689 690 return now; 691 } 692 693 /* 694 * Is the high resolution mode active in the CPU base. This cannot use the 695 * static key as the CPUs are switched to high resolution mode 696 * asynchronously. 697 */ 698 static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) 699 { 700 return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? 701 cpu_base->hres_active : 0; 702 } 703 704 static inline void hrtimer_rearm_event(ktime_t expires_next, bool deferred) 705 { 706 trace_hrtimer_rearm(expires_next, deferred); 707 tick_program_event(expires_next, 1); 708 } 709 710 static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, struct hrtimer *next_timer, 711 ktime_t expires_next) 712 { 713 cpu_base->expires_next = expires_next; 714 715 /* 716 * If hres is not active, hardware does not have to be 717 * reprogrammed yet. 718 * 719 * If a hang was detected in the last timer interrupt then we 720 * leave the hang delay active in the hardware. We want the 721 * system to make progress. That also prevents the following 722 * scenario: 723 * T1 expires 50ms from now 724 * T2 expires 5s from now 725 * 726 * T1 is removed, so this code is called and would reprogram 727 * the hardware to 5s from now. Any hrtimer_start after that 728 * will not reprogram the hardware due to hang_detected being 729 * set. So we'd effectively block all timers until the T2 event 730 * fires. 731 */ 732 if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) 733 return; 734 735 hrtimer_rearm_event(expires_next, false); 736 } 737 738 /* Reprogram the event source with a evaluation of all clock bases */ 739 static void hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, bool skip_equal) 740 { 741 ktime_t expires_next = hrtimer_update_next_event(cpu_base); 742 743 if (skip_equal && expires_next == cpu_base->expires_next) 744 return; 745 746 __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next); 747 } 748 749 /* High resolution timer related functions */ 750 #ifdef CONFIG_HIGH_RES_TIMERS 751 752 /* High resolution timer enabled ? */ 753 static bool hrtimer_hres_enabled __read_mostly = true; 754 unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; 755 EXPORT_SYMBOL_GPL(hrtimer_resolution); 756 757 /* Enable / Disable high resolution mode */ 758 static int __init setup_hrtimer_hres(char *str) 759 { 760 return (kstrtobool(str, &hrtimer_hres_enabled) == 0); 761 } 762 __setup("highres=", setup_hrtimer_hres); 763 764 /* hrtimer_high_res_enabled - query, if the highres mode is enabled */ 765 static inline bool hrtimer_is_hres_enabled(void) 766 { 767 return hrtimer_hres_enabled; 768 } 769 770 /* Switch to high resolution mode */ 771 static void hrtimer_switch_to_hres(void) 772 { 773 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 774 775 if (tick_init_highres()) { 776 pr_warn("Could not switch to high resolution mode on CPU %u\n", base->cpu); 777 return; 778 } 779 base->hres_active = true; 780 hrtimer_resolution = HIGH_RES_NSEC; 781 782 tick_setup_sched_timer(true); 783 /* "Retrigger" the interrupt to get things going */ 784 retrigger_next_event(NULL); 785 hrtimer_schedule_hres_work(); 786 } 787 788 #else 789 790 static inline bool hrtimer_is_hres_enabled(void) { return 0; } 791 static inline void hrtimer_switch_to_hres(void) { } 792 793 #endif /* CONFIG_HIGH_RES_TIMERS */ 794 795 /* 796 * Retrigger next event is called after clock was set with interrupts 797 * disabled through an SMP function call or directly from low level 798 * resume code. 799 * 800 * This is only invoked when: 801 * - CONFIG_HIGH_RES_TIMERS is enabled. 802 * - CONFIG_NOHZ_COMMON is enabled 803 * 804 * For the other cases this function is empty and because the call sites 805 * are optimized out it vanishes as well, i.e. no need for lots of 806 * #ifdeffery. 807 */ 808 static void retrigger_next_event(void *arg) 809 { 810 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 811 812 /* 813 * When high resolution mode or nohz is active, then the offsets of 814 * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the 815 * next tick will take care of that. 816 * 817 * If high resolution mode is active then the next expiring timer 818 * must be reevaluated and the clock event device reprogrammed if 819 * necessary. 820 * 821 * In the NOHZ case the update of the offset and the reevaluation 822 * of the next expiring timer is enough. The return from the SMP 823 * function call will take care of the reprogramming in case the 824 * CPU was in a NOHZ idle sleep. 825 * 826 * In periodic low resolution mode, the next softirq expiration 827 * must also be updated. 828 */ 829 guard(raw_spinlock)(&base->lock); 830 hrtimer_update_base(base); 831 if (hrtimer_hres_active(base)) 832 hrtimer_force_reprogram(base, /* skip_equal */ false); 833 else 834 hrtimer_update_next_event(base); 835 } 836 837 /* 838 * When a timer is enqueued and expires earlier than the already enqueued 839 * timers, we have to check, whether it expires earlier than the timer for 840 * which the clock event device was armed. 841 * 842 * Called with interrupts disabled and base->cpu_base.lock held 843 */ 844 static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) 845 { 846 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 847 struct hrtimer_clock_base *base = timer->base; 848 ktime_t expires = hrtimer_get_expires(timer); 849 850 WARN_ON_ONCE(expires < 0); 851 852 expires = ktime_sub(expires, base->offset); 853 /* 854 * CLOCK_REALTIME timer might be requested with an absolute 855 * expiry time which is less than base->offset. Set it to 0. 856 */ 857 if (expires < 0) 858 expires = 0; 859 860 if (timer->is_soft) { 861 /* 862 * soft hrtimer could be started on a remote CPU. In this 863 * case softirq_expires_next needs to be updated on the 864 * remote CPU. The soft hrtimer will not expire before the 865 * first hard hrtimer on the remote CPU - 866 * hrtimer_check_target() prevents this case. 867 */ 868 struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base; 869 870 if (timer_cpu_base->softirq_activated) 871 return; 872 873 if (!ktime_before(expires, timer_cpu_base->softirq_expires_next)) 874 return; 875 876 timer_cpu_base->softirq_next_timer = timer; 877 timer_cpu_base->softirq_expires_next = expires; 878 879 if (!ktime_before(expires, timer_cpu_base->expires_next) || !reprogram) 880 return; 881 } 882 883 /* 884 * If the timer is not on the current cpu, we cannot reprogram 885 * the other cpus clock event device. 886 */ 887 if (base->cpu_base != cpu_base) 888 return; 889 890 if (expires >= cpu_base->expires_next) 891 return; 892 893 /* If a deferred rearm is pending skip reprogramming the device */ 894 if (cpu_base->deferred_rearm) 895 return; 896 897 cpu_base->next_timer = timer; 898 899 __hrtimer_reprogram(cpu_base, timer, expires); 900 } 901 902 static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, unsigned int active) 903 { 904 struct hrtimer_clock_base *base; 905 unsigned int seq; 906 ktime_t expires; 907 908 /* 909 * Update the base offsets unconditionally so the following 910 * checks whether the SMP function call is required works. 911 * 912 * The update is safe even when the remote CPU is in the hrtimer 913 * interrupt or the hrtimer soft interrupt and expiring affected 914 * bases. Either it will see the update before handling a base or 915 * it will see it when it finishes the processing and reevaluates 916 * the next expiring timer. 917 */ 918 seq = cpu_base->clock_was_set_seq; 919 hrtimer_update_base(cpu_base); 920 921 /* 922 * If the sequence did not change over the update then the 923 * remote CPU already handled it. 924 */ 925 if (seq == cpu_base->clock_was_set_seq) 926 return false; 927 928 /* If a deferred rearm is pending the remote CPU will take care of it */ 929 if (cpu_base->deferred_rearm) { 930 cpu_base->deferred_needs_update = true; 931 return false; 932 } 933 934 /* 935 * Walk the affected clock bases and check whether the first expiring 936 * timer in a clock base is moving ahead of the first expiring timer of 937 * @cpu_base. If so, the IPI must be invoked because per CPU clock 938 * event devices cannot be remotely reprogrammed. 939 */ 940 active &= cpu_base->active_bases; 941 942 for_each_active_base(base, cpu_base, active) { 943 struct timerqueue_linked_node *next; 944 945 next = timerqueue_linked_first(&base->active); 946 expires = ktime_sub(next->expires, base->offset); 947 if (expires < cpu_base->expires_next) 948 return true; 949 950 /* Extra check for softirq clock bases */ 951 if (base->index < HRTIMER_BASE_MONOTONIC_SOFT) 952 continue; 953 if (cpu_base->softirq_activated) 954 continue; 955 if (expires < cpu_base->softirq_expires_next) 956 return true; 957 } 958 return false; 959 } 960 961 /* 962 * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and 963 * CLOCK_BOOTTIME (for late sleep time injection). 964 * 965 * This requires to update the offsets for these clocks 966 * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this 967 * also requires to eventually reprogram the per CPU clock event devices 968 * when the change moves an affected timer ahead of the first expiring 969 * timer on that CPU. Obviously remote per CPU clock event devices cannot 970 * be reprogrammed. The other reason why an IPI has to be sent is when the 971 * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets 972 * in the tick, which obviously might be stopped, so this has to bring out 973 * the remote CPU which might sleep in idle to get this sorted. 974 */ 975 void clock_was_set(unsigned int bases) 976 { 977 cpumask_var_t mask; 978 979 if (!hrtimer_highres_enabled() && !tick_nohz_is_active()) 980 goto out_timerfd; 981 982 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 983 on_each_cpu(retrigger_next_event, NULL, 1); 984 goto out_timerfd; 985 } 986 987 /* Avoid interrupting CPUs if possible */ 988 scoped_guard(cpus_read_lock) { 989 int cpu; 990 991 for_each_online_cpu(cpu) { 992 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 993 994 guard(raw_spinlock_irqsave)(&cpu_base->lock); 995 if (update_needs_ipi(cpu_base, bases)) 996 cpumask_set_cpu(cpu, mask); 997 } 998 scoped_guard(preempt) 999 smp_call_function_many(mask, retrigger_next_event, NULL, 1); 1000 } 1001 free_cpumask_var(mask); 1002 1003 out_timerfd: 1004 timerfd_clock_was_set(); 1005 } 1006 1007 static void clock_was_set_work(struct work_struct *work) 1008 { 1009 clock_was_set(CLOCK_SET_WALL); 1010 } 1011 1012 static DECLARE_WORK(hrtimer_work, clock_was_set_work); 1013 1014 /* 1015 * Called from timekeeping code to reprogram the hrtimer interrupt device 1016 * on all cpus and to notify timerfd. 1017 */ 1018 void clock_was_set_delayed(void) 1019 { 1020 schedule_work(&hrtimer_work); 1021 } 1022 1023 /* 1024 * Called during resume either directly from via timekeeping_resume() 1025 * or in the case of s2idle from tick_unfreeze() to ensure that the 1026 * hrtimers are up to date. 1027 */ 1028 void hrtimers_resume_local(void) 1029 { 1030 lockdep_assert_irqs_disabled(); 1031 /* Retrigger on the local CPU */ 1032 retrigger_next_event(NULL); 1033 } 1034 1035 /* Counterpart to lock_hrtimer_base above */ 1036 static inline void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 1037 __releases(&timer->base->cpu_base->lock) 1038 { 1039 raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); 1040 } 1041 1042 /** 1043 * hrtimer_forward() - forward the timer expiry 1044 * @timer: hrtimer to forward 1045 * @now: forward past this time 1046 * @interval: the interval to forward 1047 * 1048 * Forward the timer expiry so it will expire in the future. 1049 * 1050 * .. note:: 1051 * This only updates the timer expiry value and does not requeue the timer. 1052 * 1053 * There is also a variant of this function: hrtimer_forward_now(). 1054 * 1055 * Context: Can be safely called from the callback function of @timer. If called 1056 * from other contexts @timer must neither be enqueued nor running the 1057 * callback and the caller needs to take care of serialization. 1058 * 1059 * Return: The number of overruns are returned. 1060 */ 1061 u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) 1062 { 1063 ktime_t delta; 1064 u64 orun = 1; 1065 1066 delta = ktime_sub(now, hrtimer_get_expires(timer)); 1067 1068 if (delta < 0) 1069 return 0; 1070 1071 if (WARN_ON(timer->is_queued)) 1072 return 0; 1073 1074 if (interval < hrtimer_resolution) 1075 interval = hrtimer_resolution; 1076 1077 if (unlikely(delta >= interval)) { 1078 s64 incr = ktime_to_ns(interval); 1079 1080 orun = ktime_divns(delta, incr); 1081 hrtimer_add_expires_ns(timer, incr * orun); 1082 if (hrtimer_get_expires(timer) > now) 1083 return orun; 1084 /* 1085 * This (and the ktime_add() below) is the 1086 * correction for exact: 1087 */ 1088 orun++; 1089 } 1090 hrtimer_add_expires(timer, interval); 1091 1092 return orun; 1093 } 1094 EXPORT_SYMBOL_GPL(hrtimer_forward); 1095 1096 /* 1097 * enqueue_hrtimer - internal function to (re)start a timer 1098 * 1099 * The timer is inserted in expiry order. Insertion into the 1100 * red black tree is O(log(n)). 1101 * 1102 * Returns true when the new timer is the leftmost timer in the tree. 1103 */ 1104 static bool enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, 1105 enum hrtimer_mode mode, bool was_armed) 1106 { 1107 lockdep_assert_held(&base->cpu_base->lock); 1108 1109 debug_activate(timer, mode, was_armed); 1110 WARN_ON_ONCE(!base->cpu_base->online); 1111 1112 base->cpu_base->active_bases |= 1 << base->index; 1113 1114 /* Pairs with the lockless read in hrtimer_is_queued() */ 1115 WRITE_ONCE(timer->is_queued, HRTIMER_STATE_ENQUEUED); 1116 1117 if (!timerqueue_linked_add(&base->active, &timer->node)) 1118 return false; 1119 1120 base->expires_next = hrtimer_get_expires(timer); 1121 return true; 1122 } 1123 1124 static inline void base_update_next_timer(struct hrtimer_clock_base *base) 1125 { 1126 struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active); 1127 1128 base->expires_next = next ? next->expires : KTIME_MAX; 1129 } 1130 1131 /* 1132 * __remove_hrtimer - internal function to remove a timer 1133 * 1134 * High resolution timer mode reprograms the clock event device when the 1135 * timer is the one which expires next. The caller can disable this by setting 1136 * reprogram to zero. This is useful, when the context does a reprogramming 1137 * anyway (e.g. timer interrupt) 1138 */ 1139 static void __remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, 1140 bool newstate, bool reprogram) 1141 { 1142 struct hrtimer_cpu_base *cpu_base = base->cpu_base; 1143 bool was_first; 1144 1145 lockdep_assert_held(&cpu_base->lock); 1146 1147 if (!timer->is_queued) 1148 return; 1149 1150 /* Pairs with the lockless read in hrtimer_is_queued() */ 1151 WRITE_ONCE(timer->is_queued, newstate); 1152 1153 was_first = !timerqueue_linked_prev(&timer->node); 1154 1155 if (!timerqueue_linked_del(&base->active, &timer->node)) 1156 cpu_base->active_bases &= ~(1 << base->index); 1157 1158 /* Nothing to update if this was not the first timer in the base */ 1159 if (!was_first) 1160 return; 1161 1162 base_update_next_timer(base); 1163 1164 /* 1165 * If reprogram is false don't update cpu_base->next_timer and do not 1166 * touch the clock event device. 1167 * 1168 * This happens when removing the first timer on a remote CPU, which 1169 * will be handled by the remote CPU's interrupt. It also happens when 1170 * a local timer is removed to be immediately restarted. That's handled 1171 * at the call site. 1172 */ 1173 if (!reprogram || timer != cpu_base->next_timer || timer->is_lazy) 1174 return; 1175 1176 if (cpu_base->deferred_rearm) 1177 cpu_base->deferred_needs_update = true; 1178 else 1179 hrtimer_force_reprogram(cpu_base, /* skip_equal */ true); 1180 } 1181 1182 static inline bool remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, 1183 bool newstate) 1184 { 1185 lockdep_assert_held(&base->cpu_base->lock); 1186 1187 if (timer->is_queued) { 1188 bool reprogram; 1189 1190 debug_hrtimer_deactivate(timer); 1191 1192 /* 1193 * Remove the timer and force reprogramming when high 1194 * resolution mode is active and the timer is on the current 1195 * CPU. If we remove a timer on another CPU, reprogramming is 1196 * skipped. The interrupt event on this CPU is fired and 1197 * reprogramming happens in the interrupt handler. This is a 1198 * rare case and less expensive than a smp call. 1199 */ 1200 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); 1201 1202 __remove_hrtimer(timer, base, newstate, reprogram); 1203 return true; 1204 } 1205 return false; 1206 } 1207 1208 /* 1209 * Update in place has to retrieve the expiry times of the neighbour nodes 1210 * if they exist. That is cache line neutral because the dequeue/enqueue 1211 * operation is going to need the same cache lines. But there is a big win 1212 * when the dequeue/enqueue can be avoided because the RB tree does not 1213 * have to be rebalanced twice. 1214 */ 1215 static inline bool 1216 hrtimer_can_update_in_place(struct hrtimer *timer, struct hrtimer_clock_base *base, ktime_t expires) 1217 { 1218 struct timerqueue_linked_node *next = timerqueue_linked_next(&timer->node); 1219 struct timerqueue_linked_node *prev = timerqueue_linked_prev(&timer->node); 1220 1221 /* If the new expiry goes behind the next timer, requeue is required */ 1222 if (next && expires > next->expires) 1223 return false; 1224 1225 /* If this is the first timer, update in place */ 1226 if (!prev) 1227 return true; 1228 1229 /* Update in place when it does not go ahead of the previous one */ 1230 return expires >= prev->expires; 1231 } 1232 1233 static inline bool 1234 remove_and_enqueue_same_base(struct hrtimer *timer, struct hrtimer_clock_base *base, 1235 const enum hrtimer_mode mode, ktime_t expires, u64 delta_ns) 1236 { 1237 bool was_first = false; 1238 1239 /* Remove it from the timer queue if active */ 1240 if (timer->is_queued) { 1241 was_first = !timerqueue_linked_prev(&timer->node); 1242 1243 /* Try to update in place to avoid the de/enqueue dance */ 1244 if (hrtimer_can_update_in_place(timer, base, expires)) { 1245 hrtimer_set_expires_range_ns(timer, expires, delta_ns); 1246 trace_hrtimer_start(timer, mode, true); 1247 if (was_first) 1248 base->expires_next = expires; 1249 return was_first; 1250 } 1251 1252 debug_hrtimer_deactivate(timer); 1253 timerqueue_linked_del(&base->active, &timer->node); 1254 } 1255 1256 /* Set the new expiry time */ 1257 hrtimer_set_expires_range_ns(timer, expires, delta_ns); 1258 1259 debug_activate(timer, mode, timer->is_queued); 1260 base->cpu_base->active_bases |= 1 << base->index; 1261 1262 /* Pairs with the lockless read in hrtimer_is_queued() */ 1263 WRITE_ONCE(timer->is_queued, HRTIMER_STATE_ENQUEUED); 1264 1265 /* If it's the first expiring timer now or again, update base */ 1266 if (timerqueue_linked_add(&base->active, &timer->node)) { 1267 base->expires_next = expires; 1268 return true; 1269 } 1270 1271 if (was_first) 1272 base_update_next_timer(base); 1273 1274 return false; 1275 } 1276 1277 static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, 1278 const enum hrtimer_mode mode) 1279 { 1280 #ifdef CONFIG_TIME_LOW_RES 1281 /* 1282 * CONFIG_TIME_LOW_RES indicates that the system has no way to return 1283 * granular time values. For relative timers we add hrtimer_resolution 1284 * (i.e. one jiffy) to prevent short timeouts. 1285 */ 1286 timer->is_rel = mode & HRTIMER_MODE_REL; 1287 if (timer->is_rel) 1288 tim = ktime_add_safe(tim, hrtimer_resolution); 1289 #endif 1290 return tim; 1291 } 1292 1293 static void hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) 1294 { 1295 ktime_t expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); 1296 1297 /* 1298 * Reprogramming needs to be triggered, even if the next soft 1299 * hrtimer expires at the same time as the next hard 1300 * hrtimer. cpu_base->softirq_expires_next needs to be updated! 1301 */ 1302 if (expires == KTIME_MAX) 1303 return; 1304 1305 /* 1306 * cpu_base->next_timer is recomputed by __hrtimer_get_next_event() 1307 * cpu_base->expires_next is only set by hrtimer_reprogram() 1308 */ 1309 hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); 1310 } 1311 1312 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 1313 static __always_inline bool hrtimer_prefer_local(bool is_local, bool is_first, bool is_pinned) 1314 { 1315 if (static_branch_likely(&timers_migration_enabled)) { 1316 /* 1317 * If it is local and the first expiring timer keep it on the local 1318 * CPU to optimize reprogramming of the clockevent device. Also 1319 * avoid switch_hrtimer_base() overhead when local and pinned. 1320 */ 1321 if (!is_local) 1322 return false; 1323 if (is_first || is_pinned) 1324 return true; 1325 1326 /* Honour the NOHZ full restrictions */ 1327 if (!housekeeping_cpu(smp_processor_id(), HK_TYPE_KERNEL_NOISE)) 1328 return false; 1329 1330 /* 1331 * If the tick is not stopped or need_resched() is set, then 1332 * there is no point in moving the timer somewhere else. 1333 */ 1334 return !tick_nohz_tick_stopped() || need_resched(); 1335 } 1336 return is_local; 1337 } 1338 #else 1339 static __always_inline bool hrtimer_prefer_local(bool is_local, bool is_first, bool is_pinned) 1340 { 1341 return is_local; 1342 } 1343 #endif 1344 1345 static inline bool hrtimer_keep_base(struct hrtimer *timer, bool is_local, bool is_first, 1346 bool is_pinned) 1347 { 1348 /* If the timer is running the callback it has to stay on its CPU base. */ 1349 if (unlikely(timer->base->running == timer)) 1350 return true; 1351 1352 return hrtimer_prefer_local(is_local, is_first, is_pinned); 1353 } 1354 1355 enum { 1356 HRTIMER_REPROGRAM_NONE, 1357 HRTIMER_REPROGRAM, 1358 HRTIMER_REPROGRAM_FORCE, 1359 }; 1360 1361 static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, 1362 const enum hrtimer_mode mode, struct hrtimer_clock_base *base) 1363 { 1364 struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases); 1365 bool is_pinned, first, was_first, keep_base = false; 1366 struct hrtimer_cpu_base *cpu_base = base->cpu_base; 1367 1368 was_first = cpu_base->next_timer == timer; 1369 is_pinned = !!(mode & HRTIMER_MODE_PINNED); 1370 1371 /* 1372 * Don't keep it local if this enqueue happens on a unplugged CPU 1373 * after hrtimer_cpu_dying() has been invoked. 1374 */ 1375 if (likely(this_cpu_base->online)) { 1376 bool is_local = cpu_base == this_cpu_base; 1377 1378 keep_base = hrtimer_keep_base(timer, is_local, was_first, is_pinned); 1379 } 1380 1381 /* Calculate absolute expiry time for relative timers */ 1382 if (mode & HRTIMER_MODE_REL) 1383 tim = ktime_add_safe(tim, __hrtimer_cb_get_time(base->clockid)); 1384 /* Compensate for low resolution granularity */ 1385 tim = hrtimer_update_lowres(timer, tim, mode); 1386 1387 /* 1388 * Remove an active timer from the queue. In case it is not queued 1389 * on the current CPU, make sure that remove_hrtimer() updates the 1390 * remote data correctly. 1391 * 1392 * If it's on the current CPU and the first expiring timer, then 1393 * skip reprogramming, keep the timer local and enforce 1394 * reprogramming later if it was the first expiring timer. This 1395 * avoids programming the underlying clock event twice (once at 1396 * removal and once after enqueue). 1397 * 1398 * @keep_base is also true if the timer callback is running on a 1399 * remote CPU and for local pinned timers. 1400 */ 1401 if (likely(keep_base)) { 1402 first = remove_and_enqueue_same_base(timer, base, mode, tim, delta_ns); 1403 } else { 1404 /* Keep the ENQUEUED state in case it is queued */ 1405 bool was_armed = remove_hrtimer(timer, base, HRTIMER_STATE_ENQUEUED); 1406 1407 hrtimer_set_expires_range_ns(timer, tim, delta_ns); 1408 1409 /* Switch the timer base, if necessary: */ 1410 base = switch_hrtimer_base(timer, base, is_pinned); 1411 cpu_base = base->cpu_base; 1412 1413 first = enqueue_hrtimer(timer, base, mode, was_armed); 1414 } 1415 1416 /* If a deferred rearm is pending skip reprogramming the device */ 1417 if (cpu_base->deferred_rearm) { 1418 cpu_base->deferred_needs_update = true; 1419 return HRTIMER_REPROGRAM_NONE; 1420 } 1421 1422 if (!was_first || cpu_base != this_cpu_base) { 1423 /* 1424 * If the current CPU base is online, then the timer is never 1425 * queued on a remote CPU if it would be the first expiring 1426 * timer there unless the timer callback is currently executed 1427 * on the remote CPU. In the latter case the remote CPU will 1428 * re-evaluate the first expiring timer after completing the 1429 * callbacks. 1430 */ 1431 if (likely(hrtimer_base_is_online(this_cpu_base))) 1432 return first ? HRTIMER_REPROGRAM : HRTIMER_REPROGRAM_NONE; 1433 1434 /* 1435 * Timer was enqueued remote because the current base is 1436 * already offline. If the timer is the first to expire, 1437 * kick the remote CPU to reprogram the clock event. 1438 */ 1439 if (first) 1440 smp_call_function_single_async(cpu_base->cpu, &cpu_base->csd); 1441 return HRTIMER_REPROGRAM_NONE; 1442 } 1443 1444 /* 1445 * Special case for the HRTICK timer. It is frequently rearmed and most 1446 * of the time moves the expiry into the future. That's expensive in 1447 * virtual machines and it's better to take the pointless already armed 1448 * interrupt than reprogramming the hardware on every context switch. 1449 * 1450 * If the new expiry is before the armed time, then reprogramming is 1451 * required. 1452 */ 1453 if (timer->is_lazy) { 1454 if (cpu_base->expires_next <= hrtimer_get_expires(timer)) 1455 return HRTIMER_REPROGRAM_NONE; 1456 } 1457 1458 /* 1459 * Timer was the first expiring timer and forced to stay on the 1460 * current CPU to avoid reprogramming on removal and enqueue. Force 1461 * reprogram the hardware by evaluating the new first expiring 1462 * timer. 1463 */ 1464 return HRTIMER_REPROGRAM_FORCE; 1465 } 1466 1467 static int hrtimer_start_range_ns_common(struct hrtimer *timer, ktime_t tim, 1468 u64 delta_ns, const enum hrtimer_mode mode, 1469 struct hrtimer_clock_base *base) 1470 { 1471 /* 1472 * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft 1473 * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard 1474 * expiry mode because unmarked timers are moved to softirq expiry. 1475 */ 1476 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) 1477 WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); 1478 else 1479 WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard); 1480 1481 return __hrtimer_start_range_ns(timer, tim, delta_ns, mode, base); 1482 } 1483 1484 /** 1485 * hrtimer_start_range_ns - (re)start an hrtimer 1486 * @timer: the timer to be added 1487 * @tim: expiry time 1488 * @delta_ns: "slack" range for the timer 1489 * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or 1490 * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); 1491 * softirq based mode is considered for debug purpose only! 1492 */ 1493 void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, u64 delta_ns, 1494 const enum hrtimer_mode mode) 1495 { 1496 struct hrtimer_clock_base *base; 1497 unsigned long flags; 1498 1499 debug_hrtimer_assert_init(timer); 1500 1501 base = lock_hrtimer_base(timer, &flags); 1502 1503 switch (hrtimer_start_range_ns_common(timer, tim, delta_ns, mode, base)) { 1504 case HRTIMER_REPROGRAM: 1505 hrtimer_reprogram(timer, true); 1506 break; 1507 case HRTIMER_REPROGRAM_FORCE: 1508 hrtimer_force_reprogram(timer->base->cpu_base, 1); 1509 break; 1510 case HRTIMER_REPROGRAM_NONE: 1511 break; 1512 } 1513 1514 unlock_hrtimer_base(timer, &flags); 1515 } 1516 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); 1517 1518 static inline bool hrtimer_check_user_timer(struct hrtimer *timer) 1519 { 1520 struct hrtimer_cpu_base *cpu_base = timer->base->cpu_base; 1521 ktime_t expires; 1522 1523 /* 1524 * This uses soft expires because that's the user provided 1525 * expiry time, while expires can be further in the past 1526 * due to a slack value added to the user expiry time. 1527 */ 1528 expires = hrtimer_get_softexpires(timer); 1529 1530 /* Convert to monotonic */ 1531 expires = ktime_sub(expires, timer->base->offset); 1532 1533 /* 1534 * Check whether this timer will end up as the first expiring timer in 1535 * the CPU base. If not, no further checks required as it's then 1536 * guaranteed to expire in the future. 1537 */ 1538 if (expires >= cpu_base->expires_next) 1539 return true; 1540 1541 /* Validate that the expiry time is in the future. */ 1542 if (expires > ktime_get()) 1543 return true; 1544 1545 debug_hrtimer_deactivate(timer); 1546 __remove_hrtimer(timer, timer->base, HRTIMER_STATE_INACTIVE, false); 1547 trace_hrtimer_start_expired(timer); 1548 return false; 1549 } 1550 1551 /** 1552 * hrtimer_start_range_ns_user - (re)start an user controlled hrtimer 1553 * @timer: the timer to be added 1554 * @tim: expiry time 1555 * @delta_ns: "slack" range for the timer 1556 * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or 1557 * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); 1558 * softirq based mode is considered for debug purpose only! 1559 * 1560 * Returns: True when the timer was queued, false if it was already expired 1561 * 1562 * This function cannot invoke the timer callback for expired timers as it might 1563 * be called under a lock which the timer callback needs to acquire. So the 1564 * caller has to handle that case. 1565 */ 1566 bool hrtimer_start_range_ns_user(struct hrtimer *timer, ktime_t tim, 1567 u64 delta_ns, const enum hrtimer_mode mode) 1568 { 1569 struct hrtimer_clock_base *base; 1570 unsigned long flags; 1571 bool ret = true; 1572 1573 debug_hrtimer_assert_init(timer); 1574 1575 base = lock_hrtimer_base(timer, &flags); 1576 1577 switch (hrtimer_start_range_ns_common(timer, tim, delta_ns, mode, base)) { 1578 case HRTIMER_REPROGRAM: 1579 ret = hrtimer_check_user_timer(timer); 1580 if (ret) 1581 hrtimer_reprogram(timer, true); 1582 break; 1583 case HRTIMER_REPROGRAM_FORCE: 1584 ret = hrtimer_check_user_timer(timer); 1585 /* 1586 * The base must always be reevaluated, independent of the 1587 * result above because the timer was the first pending timer. 1588 */ 1589 hrtimer_force_reprogram(timer->base->cpu_base, 1); 1590 break; 1591 case HRTIMER_REPROGRAM_NONE: 1592 break; 1593 } 1594 1595 unlock_hrtimer_base(timer, &flags); 1596 return ret; 1597 } 1598 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns_user); 1599 1600 /** 1601 * hrtimer_try_to_cancel - try to deactivate a timer 1602 * @timer: hrtimer to stop 1603 * 1604 * Returns: 1605 * 1606 * * 0 when the timer was not active 1607 * * 1 when the timer was active 1608 * * -1 when the timer is currently executing the callback function and 1609 * cannot be stopped 1610 */ 1611 int hrtimer_try_to_cancel(struct hrtimer *timer) 1612 { 1613 struct hrtimer_clock_base *base; 1614 unsigned long flags; 1615 int ret = -1; 1616 1617 /* 1618 * Check lockless first. If the timer is not active (neither 1619 * enqueued nor running the callback, nothing to do here. The 1620 * base lock does not serialize against a concurrent enqueue, 1621 * so we can avoid taking it. 1622 */ 1623 if (!hrtimer_active(timer)) 1624 return 0; 1625 1626 base = lock_hrtimer_base(timer, &flags); 1627 1628 if (!hrtimer_callback_running(timer)) { 1629 ret = remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE); 1630 if (ret) 1631 trace_hrtimer_cancel(timer); 1632 } 1633 1634 unlock_hrtimer_base(timer, &flags); 1635 1636 return ret; 1637 1638 } 1639 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); 1640 1641 #ifdef CONFIG_PREEMPT_RT 1642 static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) 1643 { 1644 spin_lock_init(&base->softirq_expiry_lock); 1645 } 1646 1647 static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) 1648 __acquires(&base->softirq_expiry_lock) 1649 { 1650 spin_lock(&base->softirq_expiry_lock); 1651 } 1652 1653 static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) 1654 __releases(&base->softirq_expiry_lock) 1655 { 1656 spin_unlock(&base->softirq_expiry_lock); 1657 } 1658 1659 /* 1660 * The counterpart to hrtimer_cancel_wait_running(). 1661 * 1662 * If there is a waiter for cpu_base->expiry_lock, then it was waiting for 1663 * the timer callback to finish. Drop expiry_lock and reacquire it. That 1664 * allows the waiter to acquire the lock and make progress. 1665 */ 1666 static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, unsigned long flags) 1667 { 1668 if (atomic_read(&cpu_base->timer_waiters)) { 1669 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1670 spin_unlock(&cpu_base->softirq_expiry_lock); 1671 spin_lock(&cpu_base->softirq_expiry_lock); 1672 raw_spin_lock_irq(&cpu_base->lock); 1673 } 1674 } 1675 1676 #ifdef CONFIG_SMP 1677 static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) 1678 { 1679 return base == &migration_base; 1680 } 1681 #else 1682 static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) 1683 { 1684 return false; 1685 } 1686 #endif 1687 1688 /* 1689 * This function is called on PREEMPT_RT kernels when the fast path 1690 * deletion of a timer failed because the timer callback function was 1691 * running. 1692 * 1693 * This prevents priority inversion: if the soft irq thread is preempted 1694 * in the middle of a timer callback, then calling hrtimer_cancel() can 1695 * lead to two issues: 1696 * 1697 * - If the caller is on a remote CPU then it has to spin wait for the timer 1698 * handler to complete. This can result in unbound priority inversion. 1699 * 1700 * - If the caller originates from the task which preempted the timer 1701 * handler on the same CPU, then spin waiting for the timer handler to 1702 * complete is never going to end. 1703 */ 1704 void hrtimer_cancel_wait_running(const struct hrtimer *timer) 1705 { 1706 /* Lockless read. Prevent the compiler from reloading it below */ 1707 struct hrtimer_clock_base *base = READ_ONCE(timer->base); 1708 1709 /* 1710 * Just relax if the timer expires in hard interrupt context or if 1711 * it is currently on the migration base. 1712 */ 1713 if (!timer->is_soft || is_migration_base(base)) { 1714 cpu_relax(); 1715 return; 1716 } 1717 1718 /* 1719 * Mark the base as contended and grab the expiry lock, which is 1720 * held by the softirq across the timer callback. Drop the lock 1721 * immediately so the softirq can expire the next timer. In theory 1722 * the timer could already be running again, but that's more than 1723 * unlikely and just causes another wait loop. 1724 */ 1725 atomic_inc(&base->cpu_base->timer_waiters); 1726 spin_lock_bh(&base->cpu_base->softirq_expiry_lock); 1727 atomic_dec(&base->cpu_base->timer_waiters); 1728 spin_unlock_bh(&base->cpu_base->softirq_expiry_lock); 1729 } 1730 #else 1731 static inline void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { } 1732 static inline void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { } 1733 static inline void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { } 1734 static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, unsigned long fl) { } 1735 #endif 1736 1737 /** 1738 * hrtimer_cancel - cancel a timer and wait for the handler to finish. 1739 * @timer: the timer to be cancelled 1740 * 1741 * Returns: 1742 * 0 when the timer was not active 1743 * 1 when the timer was active 1744 */ 1745 int hrtimer_cancel(struct hrtimer *timer) 1746 { 1747 int ret; 1748 1749 do { 1750 ret = hrtimer_try_to_cancel(timer); 1751 1752 if (ret < 0) 1753 hrtimer_cancel_wait_running(timer); 1754 } while (ret < 0); 1755 return ret; 1756 } 1757 EXPORT_SYMBOL_GPL(hrtimer_cancel); 1758 1759 /** 1760 * __hrtimer_get_remaining - get remaining time for the timer 1761 * @timer: the timer to read 1762 * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y 1763 */ 1764 ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust) 1765 { 1766 unsigned long flags; 1767 ktime_t rem; 1768 1769 lock_hrtimer_base(timer, &flags); 1770 if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust) 1771 rem = hrtimer_expires_remaining_adjusted(timer); 1772 else 1773 rem = hrtimer_expires_remaining(timer); 1774 unlock_hrtimer_base(timer, &flags); 1775 1776 return rem; 1777 } 1778 EXPORT_SYMBOL_GPL(__hrtimer_get_remaining); 1779 1780 #ifdef CONFIG_NO_HZ_COMMON 1781 /** 1782 * hrtimer_get_next_event - get the time until next expiry event 1783 * 1784 * Returns the next expiry time or KTIME_MAX if no timer is pending. 1785 */ 1786 ktime_t hrtimer_get_next_event(void) 1787 { 1788 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1789 ktime_t expires = KTIME_MAX; 1790 1791 guard(raw_spinlock_irqsave)(&cpu_base->lock); 1792 if (!hrtimer_hres_active(cpu_base)) 1793 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); 1794 1795 return expires; 1796 } 1797 1798 /** 1799 * hrtimer_next_event_without - time until next expiry event w/o one timer 1800 * @exclude: timer to exclude 1801 * 1802 * Returns the next expiry time over all timers except for the @exclude one or 1803 * KTIME_MAX if none of them is pending. 1804 */ 1805 ktime_t hrtimer_next_event_without(const struct hrtimer *exclude) 1806 { 1807 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1808 ktime_t expires = KTIME_MAX; 1809 unsigned int active; 1810 1811 guard(raw_spinlock_irqsave)(&cpu_base->lock); 1812 if (!hrtimer_hres_active(cpu_base)) 1813 return expires; 1814 1815 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; 1816 if (active && !cpu_base->softirq_activated) 1817 expires = hrtimer_bases_next_event_without(cpu_base, exclude, active, KTIME_MAX); 1818 1819 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; 1820 if (!active) 1821 return expires; 1822 return hrtimer_bases_next_event_without(cpu_base, exclude, active, expires); 1823 } 1824 #endif 1825 1826 static inline int hrtimer_clockid_to_base(clockid_t clock_id) 1827 { 1828 switch (clock_id) { 1829 case CLOCK_MONOTONIC: 1830 return HRTIMER_BASE_MONOTONIC; 1831 case CLOCK_REALTIME: 1832 return HRTIMER_BASE_REALTIME; 1833 case CLOCK_BOOTTIME: 1834 return HRTIMER_BASE_BOOTTIME; 1835 case CLOCK_TAI: 1836 return HRTIMER_BASE_TAI; 1837 default: 1838 WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); 1839 return HRTIMER_BASE_MONOTONIC; 1840 } 1841 } 1842 1843 static ktime_t __hrtimer_cb_get_time(clockid_t clock_id) 1844 { 1845 switch (clock_id) { 1846 case CLOCK_MONOTONIC: 1847 return ktime_get(); 1848 case CLOCK_REALTIME: 1849 return ktime_get_real(); 1850 case CLOCK_BOOTTIME: 1851 return ktime_get_boottime(); 1852 case CLOCK_TAI: 1853 return ktime_get_clocktai(); 1854 default: 1855 WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); 1856 return ktime_get(); 1857 } 1858 } 1859 1860 ktime_t hrtimer_cb_get_time(const struct hrtimer *timer) 1861 { 1862 return __hrtimer_cb_get_time(timer->base->clockid); 1863 } 1864 EXPORT_SYMBOL_GPL(hrtimer_cb_get_time); 1865 1866 static void __hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*fn)(struct hrtimer *), 1867 clockid_t clock_id, enum hrtimer_mode mode) 1868 { 1869 bool softtimer = !!(mode & HRTIMER_MODE_SOFT); 1870 struct hrtimer_cpu_base *cpu_base; 1871 int base; 1872 1873 /* 1874 * On PREEMPT_RT enabled kernels hrtimers which are not explicitly 1875 * marked for hard interrupt expiry mode are moved into soft 1876 * interrupt context for latency reasons and because the callbacks 1877 * can invoke functions which might sleep on RT, e.g. spin_lock(). 1878 */ 1879 if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD)) 1880 softtimer = true; 1881 1882 memset(timer, 0, sizeof(struct hrtimer)); 1883 1884 cpu_base = raw_cpu_ptr(&hrtimer_bases); 1885 1886 /* 1887 * POSIX magic: Relative CLOCK_REALTIME timers are not affected by 1888 * clock modifications, so they needs to become CLOCK_MONOTONIC to 1889 * ensure POSIX compliance. 1890 */ 1891 if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) 1892 clock_id = CLOCK_MONOTONIC; 1893 1894 base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; 1895 base += hrtimer_clockid_to_base(clock_id); 1896 timer->is_soft = softtimer; 1897 timer->is_hard = !!(mode & HRTIMER_MODE_HARD); 1898 timer->is_lazy = !!(mode & HRTIMER_MODE_LAZY_REARM); 1899 timer->base = &cpu_base->clock_base[base]; 1900 timerqueue_linked_init(&timer->node); 1901 1902 if (WARN_ON_ONCE(!fn)) 1903 ACCESS_PRIVATE(timer, function) = hrtimer_dummy_timeout; 1904 else 1905 ACCESS_PRIVATE(timer, function) = fn; 1906 } 1907 1908 /** 1909 * hrtimer_setup - initialize a timer to the given clock 1910 * @timer: the timer to be initialized 1911 * @function: the callback function 1912 * @clock_id: the clock to be used 1913 * @mode: The modes which are relevant for initialization: 1914 * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, 1915 * HRTIMER_MODE_REL_SOFT 1916 * 1917 * The PINNED variants of the above can be handed in, 1918 * but the PINNED bit is ignored as pinning happens 1919 * when the hrtimer is started 1920 */ 1921 void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), 1922 clockid_t clock_id, enum hrtimer_mode mode) 1923 { 1924 debug_setup(timer, clock_id, mode); 1925 __hrtimer_setup(timer, function, clock_id, mode); 1926 } 1927 EXPORT_SYMBOL_GPL(hrtimer_setup); 1928 1929 /** 1930 * hrtimer_setup_on_stack - initialize a timer on stack memory 1931 * @timer: The timer to be initialized 1932 * @function: the callback function 1933 * @clock_id: The clock to be used 1934 * @mode: The timer mode 1935 * 1936 * Similar to hrtimer_setup(), except that this one must be used if struct hrtimer is in stack 1937 * memory. 1938 */ 1939 void hrtimer_setup_on_stack(struct hrtimer *timer, 1940 enum hrtimer_restart (*function)(struct hrtimer *), 1941 clockid_t clock_id, enum hrtimer_mode mode) 1942 { 1943 debug_setup_on_stack(timer, clock_id, mode); 1944 __hrtimer_setup(timer, function, clock_id, mode); 1945 } 1946 EXPORT_SYMBOL_GPL(hrtimer_setup_on_stack); 1947 1948 /* 1949 * A timer is active, when it is enqueued into the rbtree or the 1950 * callback function is running or it's in the state of being migrated 1951 * to another cpu. 1952 * 1953 * It is important for this function to not return a false negative. 1954 */ 1955 bool hrtimer_active(const struct hrtimer *timer) 1956 { 1957 struct hrtimer_clock_base *base; 1958 unsigned int seq; 1959 1960 do { 1961 base = READ_ONCE(timer->base); 1962 seq = raw_read_seqcount_begin(&base->seq); 1963 1964 if (timer->is_queued || base->running == timer) 1965 return true; 1966 1967 } while (read_seqcount_retry(&base->seq, seq) || base != READ_ONCE(timer->base)); 1968 1969 return false; 1970 } 1971 EXPORT_SYMBOL_GPL(hrtimer_active); 1972 1973 /* 1974 * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3 1975 * distinct sections: 1976 * 1977 * - queued: the timer is queued 1978 * - callback: the timer is being ran 1979 * - post: the timer is inactive or (re)queued 1980 * 1981 * On the read side we ensure we observe timer->is_queued and cpu_base->running 1982 * from the same section, if anything changed while we looked at it, we retry. 1983 * This includes timer->base changing because sequence numbers alone are 1984 * insufficient for that. 1985 * 1986 * The sequence numbers are required because otherwise we could still observe 1987 * a false negative if the read side got smeared over multiple consecutive 1988 * __run_hrtimer() invocations. 1989 */ 1990 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, struct hrtimer_clock_base *base, 1991 struct hrtimer *timer, ktime_t now, unsigned long flags) 1992 __must_hold(&cpu_base->lock) 1993 { 1994 enum hrtimer_restart (*fn)(struct hrtimer *); 1995 bool expires_in_hardirq; 1996 int restart; 1997 1998 lockdep_assert_held(&cpu_base->lock); 1999 2000 debug_hrtimer_deactivate(timer); 2001 base->running = timer; 2002 2003 /* 2004 * Separate the ->running assignment from the ->is_queued assignment. 2005 * 2006 * As with a regular write barrier, this ensures the read side in 2007 * hrtimer_active() cannot observe base->running == NULL && 2008 * timer->is_queued == INACTIVE. 2009 */ 2010 raw_write_seqcount_barrier(&base->seq); 2011 2012 __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, false); 2013 fn = ACCESS_PRIVATE(timer, function); 2014 2015 /* 2016 * Clear the 'is relative' flag for the TIME_LOW_RES case. If the 2017 * timer is restarted with a period then it becomes an absolute 2018 * timer. If its not restarted it does not matter. 2019 */ 2020 if (IS_ENABLED(CONFIG_TIME_LOW_RES)) 2021 timer->is_rel = false; 2022 2023 /* 2024 * The timer is marked as running in the CPU base, so it is 2025 * protected against migration to a different CPU even if the lock 2026 * is dropped. 2027 */ 2028 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 2029 trace_hrtimer_expire_entry(timer, now); 2030 expires_in_hardirq = lockdep_hrtimer_enter(timer); 2031 2032 restart = fn(timer); 2033 2034 lockdep_hrtimer_exit(expires_in_hardirq); 2035 trace_hrtimer_expire_exit(timer); 2036 raw_spin_lock_irq(&cpu_base->lock); 2037 2038 /* 2039 * Note: We clear the running state after enqueue_hrtimer and 2040 * we do not reprogram the event hardware. Happens either in 2041 * hrtimer_start_range_ns() or in hrtimer_interrupt() 2042 * 2043 * Note: Because we dropped the cpu_base->lock above, 2044 * hrtimer_start_range_ns() can have popped in and enqueued the timer 2045 * for us already. 2046 */ 2047 if (restart == HRTIMER_RESTART && !timer->is_queued) 2048 enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS, false); 2049 2050 /* 2051 * Separate the ->running assignment from the ->is_queued assignment. 2052 * 2053 * As with a regular write barrier, this ensures the read side in 2054 * hrtimer_active() cannot observe base->running.timer == NULL && 2055 * timer->is_queued == INACTIVE. 2056 */ 2057 raw_write_seqcount_barrier(&base->seq); 2058 2059 WARN_ON_ONCE(base->running != timer); 2060 base->running = NULL; 2061 } 2062 2063 static __always_inline struct hrtimer *clock_base_next_timer_safe(struct hrtimer_clock_base *base) 2064 { 2065 struct timerqueue_linked_node *next = timerqueue_linked_first(&base->active); 2066 2067 return next ? hrtimer_from_timerqueue_node(next) : NULL; 2068 } 2069 2070 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, 2071 unsigned long flags, unsigned int active_mask) 2072 { 2073 unsigned int active = cpu_base->active_bases & active_mask; 2074 struct hrtimer_clock_base *base; 2075 2076 for_each_active_base(base, cpu_base, active) { 2077 ktime_t basenow = ktime_add(now, base->offset); 2078 struct hrtimer *timer; 2079 2080 while ((timer = clock_base_next_timer(base))) { 2081 /* 2082 * The immediate goal for using the softexpires is 2083 * minimizing wakeups, not running timers at the 2084 * earliest interrupt after their soft expiration. 2085 * This allows us to avoid using a Priority Search 2086 * Tree, which can answer a stabbing query for 2087 * overlapping intervals and instead use the simple 2088 * BST we already have. 2089 * We don't add extra wakeups by delaying timers that 2090 * are right-of a not yet expired timer, because that 2091 * timer will have to trigger a wakeup anyway. 2092 */ 2093 if (basenow < hrtimer_get_softexpires(timer)) 2094 break; 2095 2096 __run_hrtimer(cpu_base, base, timer, basenow, flags); 2097 if (active_mask == HRTIMER_ACTIVE_SOFT) 2098 hrtimer_sync_wait_running(cpu_base, flags); 2099 } 2100 } 2101 } 2102 2103 static __latent_entropy void hrtimer_run_softirq(void) 2104 { 2105 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 2106 unsigned long flags; 2107 ktime_t now; 2108 2109 hrtimer_cpu_base_lock_expiry(cpu_base); 2110 raw_spin_lock_irqsave(&cpu_base->lock, flags); 2111 2112 now = hrtimer_update_base(cpu_base); 2113 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); 2114 2115 cpu_base->softirq_activated = false; 2116 hrtimer_update_softirq_timer(cpu_base, true); 2117 2118 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 2119 hrtimer_cpu_base_unlock_expiry(cpu_base); 2120 } 2121 2122 #ifdef CONFIG_HIGH_RES_TIMERS 2123 2124 /* 2125 * Very similar to hrtimer_force_reprogram(), except it deals with 2126 * deferred_rearm and hang_detected. 2127 */ 2128 static void hrtimer_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next, bool deferred) 2129 { 2130 cpu_base->expires_next = expires_next; 2131 cpu_base->deferred_rearm = false; 2132 2133 if (unlikely(cpu_base->hang_detected)) { 2134 /* 2135 * Give the system a chance to do something else than looping 2136 * on hrtimer interrupts. 2137 */ 2138 expires_next = ktime_add_ns(ktime_get(), 2139 min(100 * NSEC_PER_MSEC, cpu_base->max_hang_time)); 2140 } 2141 hrtimer_rearm_event(expires_next, deferred); 2142 } 2143 2144 #ifdef CONFIG_HRTIMER_REARM_DEFERRED 2145 void __hrtimer_rearm_deferred(void) 2146 { 2147 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 2148 ktime_t expires_next; 2149 2150 if (!cpu_base->deferred_rearm) 2151 return; 2152 2153 guard(raw_spinlock)(&cpu_base->lock); 2154 if (cpu_base->deferred_needs_update) { 2155 hrtimer_update_base(cpu_base); 2156 expires_next = hrtimer_update_next_event(cpu_base); 2157 } else { 2158 /* No timer added/removed. Use the cached value */ 2159 expires_next = cpu_base->deferred_expires_next; 2160 } 2161 hrtimer_rearm(cpu_base, expires_next, true); 2162 } 2163 2164 static __always_inline void 2165 hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next) 2166 { 2167 /* hrtimer_interrupt() just re-evaluated the first expiring timer */ 2168 cpu_base->deferred_needs_update = false; 2169 /* Cache the expiry time */ 2170 cpu_base->deferred_expires_next = expires_next; 2171 set_thread_flag(TIF_HRTIMER_REARM); 2172 } 2173 #else /* CONFIG_HRTIMER_REARM_DEFERRED */ 2174 static __always_inline void 2175 hrtimer_interrupt_rearm(struct hrtimer_cpu_base *cpu_base, ktime_t expires_next) 2176 { 2177 hrtimer_rearm(cpu_base, expires_next, false); 2178 } 2179 #endif /* !CONFIG_HRTIMER_REARM_DEFERRED */ 2180 2181 /* 2182 * High resolution timer interrupt 2183 * Called with interrupts disabled 2184 */ 2185 void hrtimer_interrupt(struct clock_event_device *dev) 2186 { 2187 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 2188 ktime_t expires_next, now, entry_time, delta; 2189 unsigned long flags; 2190 int retries = 0; 2191 2192 BUG_ON(!cpu_base->hres_active); 2193 cpu_base->nr_events++; 2194 dev->next_event = KTIME_MAX; 2195 dev->next_event_forced = 0; 2196 2197 raw_spin_lock_irqsave(&cpu_base->lock, flags); 2198 entry_time = now = hrtimer_update_base(cpu_base); 2199 retry: 2200 cpu_base->deferred_rearm = true; 2201 /* 2202 * Set expires_next to KTIME_MAX, which prevents that remote CPUs queue 2203 * timers while __hrtimer_run_queues() is expiring the clock bases. 2204 * Timers which are re/enqueued on the local CPU are not affected by 2205 * this. 2206 */ 2207 cpu_base->expires_next = KTIME_MAX; 2208 2209 if (!ktime_before(now, cpu_base->softirq_expires_next)) { 2210 cpu_base->softirq_expires_next = KTIME_MAX; 2211 cpu_base->softirq_activated = true; 2212 raise_timer_softirq(HRTIMER_SOFTIRQ); 2213 } 2214 2215 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); 2216 2217 /* 2218 * The next timer was already expired due to: 2219 * - tracing 2220 * - long lasting callbacks 2221 * - being scheduled away when running in a VM 2222 * 2223 * We need to prevent that we loop forever in the hrtiner interrupt 2224 * routine. We give it 3 attempts to avoid overreacting on some 2225 * spurious event. 2226 */ 2227 now = hrtimer_update_base(cpu_base); 2228 expires_next = hrtimer_update_next_event(cpu_base); 2229 cpu_base->hang_detected = false; 2230 if (expires_next < now) { 2231 if (++retries < 3) 2232 goto retry; 2233 2234 delta = ktime_sub(now, entry_time); 2235 cpu_base->max_hang_time = max_t(unsigned int, cpu_base->max_hang_time, delta); 2236 cpu_base->nr_hangs++; 2237 cpu_base->hang_detected = true; 2238 } 2239 2240 hrtimer_interrupt_rearm(cpu_base, expires_next); 2241 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 2242 } 2243 2244 #endif /* !CONFIG_HIGH_RES_TIMERS */ 2245 2246 /* 2247 * Called from run_local_timers in hardirq context every jiffy 2248 */ 2249 void hrtimer_run_queues(void) 2250 { 2251 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 2252 unsigned long flags; 2253 ktime_t now; 2254 2255 if (hrtimer_hres_active(cpu_base)) 2256 return; 2257 2258 /* 2259 * This _is_ ugly: We have to check periodically, whether we 2260 * can switch to highres and / or nohz mode. The clocksource 2261 * switch happens with xtime_lock held. Notification from 2262 * there only sets the check bit in the tick_oneshot code, 2263 * otherwise we might deadlock vs. xtime_lock. 2264 */ 2265 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) { 2266 hrtimer_switch_to_hres(); 2267 return; 2268 } 2269 2270 raw_spin_lock_irqsave(&cpu_base->lock, flags); 2271 now = hrtimer_update_base(cpu_base); 2272 2273 if (!ktime_before(now, cpu_base->softirq_expires_next)) { 2274 cpu_base->softirq_expires_next = KTIME_MAX; 2275 cpu_base->softirq_activated = true; 2276 raise_timer_softirq(HRTIMER_SOFTIRQ); 2277 } 2278 2279 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); 2280 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 2281 } 2282 2283 /* 2284 * Sleep related functions: 2285 */ 2286 static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) 2287 { 2288 struct hrtimer_sleeper *t = container_of(timer, struct hrtimer_sleeper, timer); 2289 struct task_struct *task = t->task; 2290 2291 t->task = NULL; 2292 if (task) 2293 wake_up_process(task); 2294 2295 return HRTIMER_NORESTART; 2296 } 2297 2298 /** 2299 * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer 2300 * @sl: sleeper to be started 2301 * @mode: timer mode abs/rel 2302 * 2303 * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers 2304 * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context) 2305 */ 2306 void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, enum hrtimer_mode mode) 2307 { 2308 /* 2309 * Make the enqueue delivery mode check work on RT. If the sleeper 2310 * was initialized for hard interrupt delivery, force the mode bit. 2311 * This is a special case for hrtimer_sleepers because 2312 * __hrtimer_setup_sleeper() determines the delivery mode on RT so the 2313 * fiddling with this decision is avoided at the call sites. 2314 */ 2315 if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard) 2316 mode |= HRTIMER_MODE_HARD; 2317 2318 /* If already expired, clear the task pointer and set current state to running */ 2319 if (!hrtimer_start_expires_user(&sl->timer, mode)) { 2320 sl->task = NULL; 2321 __set_current_state(TASK_RUNNING); 2322 } 2323 } 2324 EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires); 2325 2326 static void __hrtimer_setup_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, 2327 enum hrtimer_mode mode) 2328 { 2329 /* 2330 * On PREEMPT_RT enabled kernels hrtimers which are not explicitly 2331 * marked for hard interrupt expiry mode are moved into soft 2332 * interrupt context either for latency reasons or because the 2333 * hrtimer callback takes regular spinlocks or invokes other 2334 * functions which are not suitable for hard interrupt context on 2335 * PREEMPT_RT. 2336 * 2337 * The hrtimer_sleeper callback is RT compatible in hard interrupt 2338 * context, but there is a latency concern: Untrusted userspace can 2339 * spawn many threads which arm timers for the same expiry time on 2340 * the same CPU. That causes a latency spike due to the wakeup of 2341 * a gazillion threads. 2342 * 2343 * OTOH, privileged real-time user space applications rely on the 2344 * low latency of hard interrupt wakeups. If the current task is in 2345 * a real-time scheduling class, mark the mode for hard interrupt 2346 * expiry. 2347 */ 2348 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 2349 if (rt_or_dl_task_policy(current) && !(mode & HRTIMER_MODE_SOFT)) 2350 mode |= HRTIMER_MODE_HARD; 2351 } 2352 2353 __hrtimer_setup(&sl->timer, hrtimer_wakeup, clock_id, mode); 2354 sl->task = current; 2355 } 2356 2357 /** 2358 * hrtimer_setup_sleeper_on_stack - initialize a sleeper in stack memory 2359 * @sl: sleeper to be initialized 2360 * @clock_id: the clock to be used 2361 * @mode: timer mode abs/rel 2362 */ 2363 void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, clockid_t clock_id, 2364 enum hrtimer_mode mode) 2365 { 2366 debug_setup_on_stack(&sl->timer, clock_id, mode); 2367 __hrtimer_setup_sleeper(sl, clock_id, mode); 2368 } 2369 EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack); 2370 2371 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) 2372 { 2373 switch(restart->nanosleep.type) { 2374 #ifdef CONFIG_COMPAT_32BIT_TIME 2375 case TT_COMPAT: 2376 if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp)) 2377 return -EFAULT; 2378 break; 2379 #endif 2380 case TT_NATIVE: 2381 if (put_timespec64(ts, restart->nanosleep.rmtp)) 2382 return -EFAULT; 2383 break; 2384 default: 2385 BUG(); 2386 } 2387 return -ERESTART_RESTARTBLOCK; 2388 } 2389 2390 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) 2391 { 2392 struct restart_block *restart; 2393 2394 do { 2395 set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); 2396 hrtimer_sleeper_start_expires(t, mode); 2397 2398 if (likely(t->task)) 2399 schedule(); 2400 2401 hrtimer_cancel(&t->timer); 2402 mode = HRTIMER_MODE_ABS; 2403 2404 } while (t->task && !signal_pending(current)); 2405 2406 __set_current_state(TASK_RUNNING); 2407 2408 if (!t->task) 2409 return 0; 2410 2411 restart = ¤t->restart_block; 2412 if (restart->nanosleep.type != TT_NONE) { 2413 ktime_t rem = hrtimer_expires_remaining(&t->timer); 2414 struct timespec64 rmt; 2415 2416 if (rem <= 0) 2417 return 0; 2418 rmt = ktime_to_timespec64(rem); 2419 2420 return nanosleep_copyout(restart, &rmt); 2421 } 2422 return -ERESTART_RESTARTBLOCK; 2423 } 2424 2425 static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) 2426 { 2427 struct hrtimer_sleeper t; 2428 int ret; 2429 2430 hrtimer_setup_sleeper_on_stack(&t, restart->nanosleep.clockid, HRTIMER_MODE_ABS); 2431 hrtimer_set_expires(&t.timer, restart->nanosleep.expires); 2432 ret = do_nanosleep(&t, HRTIMER_MODE_ABS); 2433 destroy_hrtimer_on_stack(&t.timer); 2434 return ret; 2435 } 2436 2437 long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, const clockid_t clockid) 2438 { 2439 struct restart_block *restart; 2440 struct hrtimer_sleeper t; 2441 int ret; 2442 2443 hrtimer_setup_sleeper_on_stack(&t, clockid, mode); 2444 hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns); 2445 ret = do_nanosleep(&t, mode); 2446 if (ret != -ERESTART_RESTARTBLOCK) 2447 goto out; 2448 2449 /* Absolute timers do not update the rmtp value and restart: */ 2450 if (mode == HRTIMER_MODE_ABS) { 2451 ret = -ERESTARTNOHAND; 2452 goto out; 2453 } 2454 2455 restart = ¤t->restart_block; 2456 restart->nanosleep.clockid = t.timer.base->clockid; 2457 restart->nanosleep.expires = hrtimer_get_expires(&t.timer); 2458 set_restart_fn(restart, hrtimer_nanosleep_restart); 2459 out: 2460 destroy_hrtimer_on_stack(&t.timer); 2461 return ret; 2462 } 2463 2464 #ifdef CONFIG_64BIT 2465 2466 SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, 2467 struct __kernel_timespec __user *, rmtp) 2468 { 2469 struct timespec64 tu; 2470 2471 if (get_timespec64(&tu, rqtp)) 2472 return -EFAULT; 2473 2474 if (!timespec64_valid(&tu)) 2475 return -EINVAL; 2476 2477 current->restart_block.fn = do_no_restart_syscall; 2478 current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; 2479 current->restart_block.nanosleep.rmtp = rmtp; 2480 return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, CLOCK_MONOTONIC); 2481 } 2482 2483 #endif 2484 2485 #ifdef CONFIG_COMPAT_32BIT_TIME 2486 2487 SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, 2488 struct old_timespec32 __user *, rmtp) 2489 { 2490 struct timespec64 tu; 2491 2492 if (get_old_timespec32(&tu, rqtp)) 2493 return -EFAULT; 2494 2495 if (!timespec64_valid(&tu)) 2496 return -EINVAL; 2497 2498 current->restart_block.fn = do_no_restart_syscall; 2499 current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; 2500 current->restart_block.nanosleep.compat_rmtp = rmtp; 2501 return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, CLOCK_MONOTONIC); 2502 } 2503 #endif 2504 2505 /* 2506 * Functions related to boot-time initialization: 2507 */ 2508 int hrtimers_prepare_cpu(unsigned int cpu) 2509 { 2510 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 2511 2512 for (int i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 2513 struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i]; 2514 2515 clock_b->cpu_base = cpu_base; 2516 seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock); 2517 timerqueue_linked_init_head(&clock_b->active); 2518 } 2519 2520 cpu_base->cpu = cpu; 2521 hrtimer_cpu_base_init_expiry_lock(cpu_base); 2522 return 0; 2523 } 2524 2525 int hrtimers_cpu_starting(unsigned int cpu) 2526 { 2527 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 2528 2529 /* Clear out any left over state from a CPU down operation */ 2530 cpu_base->active_bases = 0; 2531 cpu_base->hres_active = false; 2532 cpu_base->hang_detected = false; 2533 cpu_base->next_timer = NULL; 2534 cpu_base->softirq_next_timer = NULL; 2535 cpu_base->expires_next = KTIME_MAX; 2536 cpu_base->softirq_expires_next = KTIME_MAX; 2537 cpu_base->softirq_activated = false; 2538 cpu_base->online = true; 2539 return 0; 2540 } 2541 2542 #ifdef CONFIG_HOTPLUG_CPU 2543 2544 static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, 2545 struct hrtimer_clock_base *new_base) 2546 { 2547 struct timerqueue_linked_node *node; 2548 struct hrtimer *timer; 2549 2550 while ((node = timerqueue_linked_first(&old_base->active))) { 2551 timer = hrtimer_from_timerqueue_node(node); 2552 BUG_ON(hrtimer_callback_running(timer)); 2553 debug_hrtimer_deactivate(timer); 2554 2555 /* 2556 * Mark it as ENQUEUED not INACTIVE otherwise the 2557 * timer could be seen as !active and just vanish away 2558 * under us on another CPU 2559 */ 2560 __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, false); 2561 timer->base = new_base; 2562 /* 2563 * Enqueue the timers on the new cpu. This does not 2564 * reprogram the event device in case the timer 2565 * expires before the earliest on this CPU, but we run 2566 * hrtimer_interrupt after we migrated everything to 2567 * sort out already expired timers and reprogram the 2568 * event device. 2569 */ 2570 enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS, true); 2571 } 2572 } 2573 2574 int hrtimers_cpu_dying(unsigned int dying_cpu) 2575 { 2576 int ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER)); 2577 struct hrtimer_cpu_base *old_base, *new_base; 2578 2579 old_base = this_cpu_ptr(&hrtimer_bases); 2580 new_base = &per_cpu(hrtimer_bases, ncpu); 2581 2582 /* 2583 * The caller is globally serialized and nobody else 2584 * takes two locks at once, deadlock is not possible. 2585 */ 2586 raw_spin_lock(&old_base->lock); 2587 raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING); 2588 2589 for (int i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) 2590 migrate_hrtimer_list(&old_base->clock_base[i], &new_base->clock_base[i]); 2591 2592 /* Tell the other CPU to retrigger the next event */ 2593 smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); 2594 2595 raw_spin_unlock(&new_base->lock); 2596 old_base->online = false; 2597 raw_spin_unlock(&old_base->lock); 2598 2599 return 0; 2600 } 2601 2602 #endif /* CONFIG_HOTPLUG_CPU */ 2603 2604 void __init hrtimers_init(void) 2605 { 2606 hrtimers_prepare_cpu(smp_processor_id()); 2607 hrtimers_cpu_starting(smp_processor_id()); 2608 open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); 2609 } 2610