1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org> 4 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 5 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner 6 * 7 * High-resolution kernel timers 8 * 9 * In contrast to the low-resolution timeout API, aka timer wheel, 10 * hrtimers provide finer resolution and accuracy depending on system 11 * configuration and capabilities. 12 * 13 * Started by: Thomas Gleixner and Ingo Molnar 14 * 15 * Credits: 16 * Based on the original timer wheel code 17 * 18 * Help, testing, suggestions, bugfixes, improvements were 19 * provided by: 20 * 21 * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel 22 * et. al. 23 */ 24 25 #include <linux/cpu.h> 26 #include <linux/export.h> 27 #include <linux/percpu.h> 28 #include <linux/hrtimer.h> 29 #include <linux/notifier.h> 30 #include <linux/syscalls.h> 31 #include <linux/interrupt.h> 32 #include <linux/tick.h> 33 #include <linux/err.h> 34 #include <linux/debugobjects.h> 35 #include <linux/sched/signal.h> 36 #include <linux/sched/sysctl.h> 37 #include <linux/sched/rt.h> 38 #include <linux/sched/deadline.h> 39 #include <linux/sched/nohz.h> 40 #include <linux/sched/debug.h> 41 #include <linux/sched/isolation.h> 42 #include <linux/timer.h> 43 #include <linux/freezer.h> 44 #include <linux/compat.h> 45 46 #include <linux/uaccess.h> 47 48 #include <trace/events/timer.h> 49 50 #include "tick-internal.h" 51 52 /* 53 * The resolution of the clocks. The resolution value is returned in 54 * the clock_getres() system call to give application programmers an 55 * idea of the (in)accuracy of timers. Timer values are rounded up to 56 * this resolution values. 57 */ 58 #define HIGH_RES_NSEC 1 59 60 /* 61 * Masks for selecting the soft and hard context timers from 62 * cpu_base->active 63 */ 64 #define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT) 65 #define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1) 66 #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) 67 #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) 68 69 static void retrigger_next_event(void *arg); 70 static ktime_t __hrtimer_cb_get_time(clockid_t clock_id); 71 72 /* 73 * The timer bases: 74 * 75 * There are more clockids than hrtimer bases. Thus, we index 76 * into the timer bases by the hrtimer_base_type enum. When trying 77 * to reach a base using a clockid, hrtimer_clockid_to_base() 78 * is used to convert from clockid to the proper hrtimer_base_type. 79 */ 80 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 81 { 82 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), 83 .clock_base = 84 { 85 { 86 .index = HRTIMER_BASE_MONOTONIC, 87 .clockid = CLOCK_MONOTONIC, 88 }, 89 { 90 .index = HRTIMER_BASE_REALTIME, 91 .clockid = CLOCK_REALTIME, 92 }, 93 { 94 .index = HRTIMER_BASE_BOOTTIME, 95 .clockid = CLOCK_BOOTTIME, 96 }, 97 { 98 .index = HRTIMER_BASE_TAI, 99 .clockid = CLOCK_TAI, 100 }, 101 { 102 .index = HRTIMER_BASE_MONOTONIC_SOFT, 103 .clockid = CLOCK_MONOTONIC, 104 }, 105 { 106 .index = HRTIMER_BASE_REALTIME_SOFT, 107 .clockid = CLOCK_REALTIME, 108 }, 109 { 110 .index = HRTIMER_BASE_BOOTTIME_SOFT, 111 .clockid = CLOCK_BOOTTIME, 112 }, 113 { 114 .index = HRTIMER_BASE_TAI_SOFT, 115 .clockid = CLOCK_TAI, 116 }, 117 }, 118 .csd = CSD_INIT(retrigger_next_event, NULL) 119 }; 120 121 static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base) 122 { 123 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) 124 return true; 125 else 126 return likely(base->online); 127 } 128 129 /* 130 * Functions and macros which are different for UP/SMP systems are kept in a 131 * single place 132 */ 133 #ifdef CONFIG_SMP 134 135 /* 136 * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base() 137 * such that hrtimer_callback_running() can unconditionally dereference 138 * timer->base->cpu_base 139 */ 140 static struct hrtimer_cpu_base migration_cpu_base = { 141 .clock_base = { { 142 .cpu_base = &migration_cpu_base, 143 .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq, 144 &migration_cpu_base.lock), 145 }, }, 146 }; 147 148 #define migration_base migration_cpu_base.clock_base[0] 149 150 /* 151 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock 152 * means that all timers which are tied to this base via timer->base are 153 * locked, and the base itself is locked too. 154 * 155 * So __run_timers/migrate_timers can safely modify all timers which could 156 * be found on the lists/queues. 157 * 158 * When the timer's base is locked, and the timer removed from list, it is 159 * possible to set timer->base = &migration_base and drop the lock: the timer 160 * remains locked. 161 */ 162 static 163 struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, 164 unsigned long *flags) 165 __acquires(&timer->base->lock) 166 { 167 struct hrtimer_clock_base *base; 168 169 for (;;) { 170 base = READ_ONCE(timer->base); 171 if (likely(base != &migration_base)) { 172 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 173 if (likely(base == timer->base)) 174 return base; 175 /* The timer has migrated to another CPU: */ 176 raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags); 177 } 178 cpu_relax(); 179 } 180 } 181 182 /* 183 * Check if the elected target is suitable considering its next 184 * event and the hotplug state of the current CPU. 185 * 186 * If the elected target is remote and its next event is after the timer 187 * to queue, then a remote reprogram is necessary. However there is no 188 * guarantee the IPI handling the operation would arrive in time to meet 189 * the high resolution deadline. In this case the local CPU becomes a 190 * preferred target, unless it is offline. 191 * 192 * High and low resolution modes are handled the same way for simplicity. 193 * 194 * Called with cpu_base->lock of target cpu held. 195 */ 196 static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base, 197 struct hrtimer_cpu_base *new_cpu_base, 198 struct hrtimer_cpu_base *this_cpu_base) 199 { 200 ktime_t expires; 201 202 /* 203 * The local CPU clockevent can be reprogrammed. Also get_target_base() 204 * guarantees it is online. 205 */ 206 if (new_cpu_base == this_cpu_base) 207 return true; 208 209 /* 210 * The offline local CPU can't be the default target if the 211 * next remote target event is after this timer. Keep the 212 * elected new base. An IPI will be issued to reprogram 213 * it as a last resort. 214 */ 215 if (!hrtimer_base_is_online(this_cpu_base)) 216 return true; 217 218 expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); 219 220 return expires >= new_base->cpu_base->expires_next; 221 } 222 223 static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) 224 { 225 if (!hrtimer_base_is_online(base)) { 226 int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER)); 227 228 return &per_cpu(hrtimer_bases, cpu); 229 } 230 231 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 232 if (static_branch_likely(&timers_migration_enabled) && !pinned) 233 return &per_cpu(hrtimer_bases, get_nohz_timer_target()); 234 #endif 235 return base; 236 } 237 238 /* 239 * We switch the timer base to a power-optimized selected CPU target, 240 * if: 241 * - NO_HZ_COMMON is enabled 242 * - timer migration is enabled 243 * - the timer callback is not running 244 * - the timer is not the first expiring timer on the new target 245 * 246 * If one of the above requirements is not fulfilled we move the timer 247 * to the current CPU or leave it on the previously assigned CPU if 248 * the timer callback is currently running. 249 */ 250 static inline struct hrtimer_clock_base * 251 switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, 252 int pinned) 253 { 254 struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base; 255 struct hrtimer_clock_base *new_base; 256 int basenum = base->index; 257 258 this_cpu_base = this_cpu_ptr(&hrtimer_bases); 259 new_cpu_base = get_target_base(this_cpu_base, pinned); 260 again: 261 new_base = &new_cpu_base->clock_base[basenum]; 262 263 if (base != new_base) { 264 /* 265 * We are trying to move timer to new_base. 266 * However we can't change timer's base while it is running, 267 * so we keep it on the same CPU. No hassle vs. reprogramming 268 * the event source in the high resolution case. The softirq 269 * code will take care of this when the timer function has 270 * completed. There is no conflict as we hold the lock until 271 * the timer is enqueued. 272 */ 273 if (unlikely(hrtimer_callback_running(timer))) 274 return base; 275 276 /* See the comment in lock_hrtimer_base() */ 277 WRITE_ONCE(timer->base, &migration_base); 278 raw_spin_unlock(&base->cpu_base->lock); 279 raw_spin_lock(&new_base->cpu_base->lock); 280 281 if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, 282 this_cpu_base)) { 283 raw_spin_unlock(&new_base->cpu_base->lock); 284 raw_spin_lock(&base->cpu_base->lock); 285 new_cpu_base = this_cpu_base; 286 WRITE_ONCE(timer->base, base); 287 goto again; 288 } 289 WRITE_ONCE(timer->base, new_base); 290 } else { 291 if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { 292 new_cpu_base = this_cpu_base; 293 goto again; 294 } 295 } 296 return new_base; 297 } 298 299 #else /* CONFIG_SMP */ 300 301 static inline struct hrtimer_clock_base * 302 lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 303 __acquires(&timer->base->cpu_base->lock) 304 { 305 struct hrtimer_clock_base *base = timer->base; 306 307 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 308 309 return base; 310 } 311 312 # define switch_hrtimer_base(t, b, p) (b) 313 314 #endif /* !CONFIG_SMP */ 315 316 /* 317 * Functions for the union type storage format of ktime_t which are 318 * too large for inlining: 319 */ 320 #if BITS_PER_LONG < 64 321 /* 322 * Divide a ktime value by a nanosecond value 323 */ 324 s64 __ktime_divns(const ktime_t kt, s64 div) 325 { 326 int sft = 0; 327 s64 dclc; 328 u64 tmp; 329 330 dclc = ktime_to_ns(kt); 331 tmp = dclc < 0 ? -dclc : dclc; 332 333 /* Make sure the divisor is less than 2^32: */ 334 while (div >> 32) { 335 sft++; 336 div >>= 1; 337 } 338 tmp >>= sft; 339 do_div(tmp, (u32) div); 340 return dclc < 0 ? -tmp : tmp; 341 } 342 EXPORT_SYMBOL_GPL(__ktime_divns); 343 #endif /* BITS_PER_LONG >= 64 */ 344 345 /* 346 * Add two ktime values and do a safety check for overflow: 347 */ 348 ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) 349 { 350 ktime_t res = ktime_add_unsafe(lhs, rhs); 351 352 /* 353 * We use KTIME_SEC_MAX here, the maximum timeout which we can 354 * return to user space in a timespec: 355 */ 356 if (res < 0 || res < lhs || res < rhs) 357 res = ktime_set(KTIME_SEC_MAX, 0); 358 359 return res; 360 } 361 362 EXPORT_SYMBOL_GPL(ktime_add_safe); 363 364 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS 365 366 static const struct debug_obj_descr hrtimer_debug_descr; 367 368 static void *hrtimer_debug_hint(void *addr) 369 { 370 return ACCESS_PRIVATE((struct hrtimer *)addr, function); 371 } 372 373 /* 374 * fixup_init is called when: 375 * - an active object is initialized 376 */ 377 static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state) 378 { 379 struct hrtimer *timer = addr; 380 381 switch (state) { 382 case ODEBUG_STATE_ACTIVE: 383 hrtimer_cancel(timer); 384 debug_object_init(timer, &hrtimer_debug_descr); 385 return true; 386 default: 387 return false; 388 } 389 } 390 391 /* 392 * fixup_activate is called when: 393 * - an active object is activated 394 * - an unknown non-static object is activated 395 */ 396 static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state) 397 { 398 switch (state) { 399 case ODEBUG_STATE_ACTIVE: 400 WARN_ON(1); 401 fallthrough; 402 default: 403 return false; 404 } 405 } 406 407 /* 408 * fixup_free is called when: 409 * - an active object is freed 410 */ 411 static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state) 412 { 413 struct hrtimer *timer = addr; 414 415 switch (state) { 416 case ODEBUG_STATE_ACTIVE: 417 hrtimer_cancel(timer); 418 debug_object_free(timer, &hrtimer_debug_descr); 419 return true; 420 default: 421 return false; 422 } 423 } 424 425 static const struct debug_obj_descr hrtimer_debug_descr = { 426 .name = "hrtimer", 427 .debug_hint = hrtimer_debug_hint, 428 .fixup_init = hrtimer_fixup_init, 429 .fixup_activate = hrtimer_fixup_activate, 430 .fixup_free = hrtimer_fixup_free, 431 }; 432 433 static inline void debug_hrtimer_init(struct hrtimer *timer) 434 { 435 debug_object_init(timer, &hrtimer_debug_descr); 436 } 437 438 static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) 439 { 440 debug_object_init_on_stack(timer, &hrtimer_debug_descr); 441 } 442 443 static inline void debug_hrtimer_activate(struct hrtimer *timer, 444 enum hrtimer_mode mode) 445 { 446 debug_object_activate(timer, &hrtimer_debug_descr); 447 } 448 449 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) 450 { 451 debug_object_deactivate(timer, &hrtimer_debug_descr); 452 } 453 454 void destroy_hrtimer_on_stack(struct hrtimer *timer) 455 { 456 debug_object_free(timer, &hrtimer_debug_descr); 457 } 458 EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); 459 460 #else 461 462 static inline void debug_hrtimer_init(struct hrtimer *timer) { } 463 static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) { } 464 static inline void debug_hrtimer_activate(struct hrtimer *timer, 465 enum hrtimer_mode mode) { } 466 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } 467 #endif 468 469 static inline void debug_setup(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode) 470 { 471 debug_hrtimer_init(timer); 472 trace_hrtimer_setup(timer, clockid, mode); 473 } 474 475 static inline void debug_setup_on_stack(struct hrtimer *timer, clockid_t clockid, 476 enum hrtimer_mode mode) 477 { 478 debug_hrtimer_init_on_stack(timer); 479 trace_hrtimer_setup(timer, clockid, mode); 480 } 481 482 static inline void debug_activate(struct hrtimer *timer, 483 enum hrtimer_mode mode) 484 { 485 debug_hrtimer_activate(timer, mode); 486 trace_hrtimer_start(timer, mode); 487 } 488 489 static inline void debug_deactivate(struct hrtimer *timer) 490 { 491 debug_hrtimer_deactivate(timer); 492 trace_hrtimer_cancel(timer); 493 } 494 495 static struct hrtimer_clock_base * 496 __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) 497 { 498 unsigned int idx; 499 500 if (!*active) 501 return NULL; 502 503 idx = __ffs(*active); 504 *active &= ~(1U << idx); 505 506 return &cpu_base->clock_base[idx]; 507 } 508 509 #define for_each_active_base(base, cpu_base, active) \ 510 while ((base = __next_base((cpu_base), &(active)))) 511 512 static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, 513 const struct hrtimer *exclude, 514 unsigned int active, 515 ktime_t expires_next) 516 { 517 struct hrtimer_clock_base *base; 518 ktime_t expires; 519 520 for_each_active_base(base, cpu_base, active) { 521 struct timerqueue_node *next; 522 struct hrtimer *timer; 523 524 next = timerqueue_getnext(&base->active); 525 timer = container_of(next, struct hrtimer, node); 526 if (timer == exclude) { 527 /* Get to the next timer in the queue. */ 528 next = timerqueue_iterate_next(next); 529 if (!next) 530 continue; 531 532 timer = container_of(next, struct hrtimer, node); 533 } 534 expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 535 if (expires < expires_next) { 536 expires_next = expires; 537 538 /* Skip cpu_base update if a timer is being excluded. */ 539 if (exclude) 540 continue; 541 542 if (timer->is_soft) 543 cpu_base->softirq_next_timer = timer; 544 else 545 cpu_base->next_timer = timer; 546 } 547 } 548 /* 549 * clock_was_set() might have changed base->offset of any of 550 * the clock bases so the result might be negative. Fix it up 551 * to prevent a false positive in clockevents_program_event(). 552 */ 553 if (expires_next < 0) 554 expires_next = 0; 555 return expires_next; 556 } 557 558 /* 559 * Recomputes cpu_base::*next_timer and returns the earliest expires_next 560 * but does not set cpu_base::*expires_next, that is done by 561 * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating 562 * cpu_base::*expires_next right away, reprogramming logic would no longer 563 * work. 564 * 565 * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, 566 * those timers will get run whenever the softirq gets handled, at the end of 567 * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. 568 * 569 * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. 570 * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual 571 * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. 572 * 573 * @active_mask must be one of: 574 * - HRTIMER_ACTIVE_ALL, 575 * - HRTIMER_ACTIVE_SOFT, or 576 * - HRTIMER_ACTIVE_HARD. 577 */ 578 static ktime_t 579 __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) 580 { 581 unsigned int active; 582 struct hrtimer *next_timer = NULL; 583 ktime_t expires_next = KTIME_MAX; 584 585 if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { 586 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; 587 cpu_base->softirq_next_timer = NULL; 588 expires_next = __hrtimer_next_event_base(cpu_base, NULL, 589 active, KTIME_MAX); 590 591 next_timer = cpu_base->softirq_next_timer; 592 } 593 594 if (active_mask & HRTIMER_ACTIVE_HARD) { 595 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; 596 cpu_base->next_timer = next_timer; 597 expires_next = __hrtimer_next_event_base(cpu_base, NULL, active, 598 expires_next); 599 } 600 601 return expires_next; 602 } 603 604 static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base) 605 { 606 ktime_t expires_next, soft = KTIME_MAX; 607 608 /* 609 * If the soft interrupt has already been activated, ignore the 610 * soft bases. They will be handled in the already raised soft 611 * interrupt. 612 */ 613 if (!cpu_base->softirq_activated) { 614 soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); 615 /* 616 * Update the soft expiry time. clock_settime() might have 617 * affected it. 618 */ 619 cpu_base->softirq_expires_next = soft; 620 } 621 622 expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); 623 /* 624 * If a softirq timer is expiring first, update cpu_base->next_timer 625 * and program the hardware with the soft expiry time. 626 */ 627 if (expires_next > soft) { 628 cpu_base->next_timer = cpu_base->softirq_next_timer; 629 expires_next = soft; 630 } 631 632 return expires_next; 633 } 634 635 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) 636 { 637 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; 638 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; 639 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; 640 641 ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, 642 offs_real, offs_boot, offs_tai); 643 644 base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; 645 base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; 646 base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; 647 648 return now; 649 } 650 651 /* 652 * Is the high resolution mode active ? 653 */ 654 static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) 655 { 656 return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? 657 cpu_base->hres_active : 0; 658 } 659 660 static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, 661 struct hrtimer *next_timer, 662 ktime_t expires_next) 663 { 664 cpu_base->expires_next = expires_next; 665 666 /* 667 * If hres is not active, hardware does not have to be 668 * reprogrammed yet. 669 * 670 * If a hang was detected in the last timer interrupt then we 671 * leave the hang delay active in the hardware. We want the 672 * system to make progress. That also prevents the following 673 * scenario: 674 * T1 expires 50ms from now 675 * T2 expires 5s from now 676 * 677 * T1 is removed, so this code is called and would reprogram 678 * the hardware to 5s from now. Any hrtimer_start after that 679 * will not reprogram the hardware due to hang_detected being 680 * set. So we'd effectively block all timers until the T2 event 681 * fires. 682 */ 683 if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) 684 return; 685 686 tick_program_event(expires_next, 1); 687 } 688 689 /* 690 * Reprogram the event source with checking both queues for the 691 * next event 692 * Called with interrupts disabled and base->lock held 693 */ 694 static void 695 hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) 696 { 697 ktime_t expires_next; 698 699 expires_next = hrtimer_update_next_event(cpu_base); 700 701 if (skip_equal && expires_next == cpu_base->expires_next) 702 return; 703 704 __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next); 705 } 706 707 /* High resolution timer related functions */ 708 #ifdef CONFIG_HIGH_RES_TIMERS 709 710 /* 711 * High resolution timer enabled ? 712 */ 713 static bool hrtimer_hres_enabled __read_mostly = true; 714 unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; 715 EXPORT_SYMBOL_GPL(hrtimer_resolution); 716 717 /* 718 * Enable / Disable high resolution mode 719 */ 720 static int __init setup_hrtimer_hres(char *str) 721 { 722 return (kstrtobool(str, &hrtimer_hres_enabled) == 0); 723 } 724 725 __setup("highres=", setup_hrtimer_hres); 726 727 /* 728 * hrtimer_high_res_enabled - query, if the highres mode is enabled 729 */ 730 static inline int hrtimer_is_hres_enabled(void) 731 { 732 return hrtimer_hres_enabled; 733 } 734 735 /* 736 * Switch to high resolution mode 737 */ 738 static void hrtimer_switch_to_hres(void) 739 { 740 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 741 742 if (tick_init_highres()) { 743 pr_warn("Could not switch to high resolution mode on CPU %u\n", 744 base->cpu); 745 return; 746 } 747 base->hres_active = 1; 748 hrtimer_resolution = HIGH_RES_NSEC; 749 750 tick_setup_sched_timer(true); 751 /* "Retrigger" the interrupt to get things going */ 752 retrigger_next_event(NULL); 753 } 754 755 #else 756 757 static inline int hrtimer_is_hres_enabled(void) { return 0; } 758 static inline void hrtimer_switch_to_hres(void) { } 759 760 #endif /* CONFIG_HIGH_RES_TIMERS */ 761 /* 762 * Retrigger next event is called after clock was set with interrupts 763 * disabled through an SMP function call or directly from low level 764 * resume code. 765 * 766 * This is only invoked when: 767 * - CONFIG_HIGH_RES_TIMERS is enabled. 768 * - CONFIG_NOHZ_COMMON is enabled 769 * 770 * For the other cases this function is empty and because the call sites 771 * are optimized out it vanishes as well, i.e. no need for lots of 772 * #ifdeffery. 773 */ 774 static void retrigger_next_event(void *arg) 775 { 776 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 777 778 /* 779 * When high resolution mode or nohz is active, then the offsets of 780 * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the 781 * next tick will take care of that. 782 * 783 * If high resolution mode is active then the next expiring timer 784 * must be reevaluated and the clock event device reprogrammed if 785 * necessary. 786 * 787 * In the NOHZ case the update of the offset and the reevaluation 788 * of the next expiring timer is enough. The return from the SMP 789 * function call will take care of the reprogramming in case the 790 * CPU was in a NOHZ idle sleep. 791 * 792 * In periodic low resolution mode, the next softirq expiration 793 * must also be updated. 794 */ 795 raw_spin_lock(&base->lock); 796 hrtimer_update_base(base); 797 if (hrtimer_hres_active(base)) 798 hrtimer_force_reprogram(base, 0); 799 else 800 hrtimer_update_next_event(base); 801 raw_spin_unlock(&base->lock); 802 } 803 804 /* 805 * When a timer is enqueued and expires earlier than the already enqueued 806 * timers, we have to check, whether it expires earlier than the timer for 807 * which the clock event device was armed. 808 * 809 * Called with interrupts disabled and base->cpu_base.lock held 810 */ 811 static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) 812 { 813 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 814 struct hrtimer_clock_base *base = timer->base; 815 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 816 817 WARN_ON_ONCE(hrtimer_get_expires(timer) < 0); 818 819 /* 820 * CLOCK_REALTIME timer might be requested with an absolute 821 * expiry time which is less than base->offset. Set it to 0. 822 */ 823 if (expires < 0) 824 expires = 0; 825 826 if (timer->is_soft) { 827 /* 828 * soft hrtimer could be started on a remote CPU. In this 829 * case softirq_expires_next needs to be updated on the 830 * remote CPU. The soft hrtimer will not expire before the 831 * first hard hrtimer on the remote CPU - 832 * hrtimer_check_target() prevents this case. 833 */ 834 struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base; 835 836 if (timer_cpu_base->softirq_activated) 837 return; 838 839 if (!ktime_before(expires, timer_cpu_base->softirq_expires_next)) 840 return; 841 842 timer_cpu_base->softirq_next_timer = timer; 843 timer_cpu_base->softirq_expires_next = expires; 844 845 if (!ktime_before(expires, timer_cpu_base->expires_next) || 846 !reprogram) 847 return; 848 } 849 850 /* 851 * If the timer is not on the current cpu, we cannot reprogram 852 * the other cpus clock event device. 853 */ 854 if (base->cpu_base != cpu_base) 855 return; 856 857 if (expires >= cpu_base->expires_next) 858 return; 859 860 /* 861 * If the hrtimer interrupt is running, then it will reevaluate the 862 * clock bases and reprogram the clock event device. 863 */ 864 if (cpu_base->in_hrtirq) 865 return; 866 867 cpu_base->next_timer = timer; 868 869 __hrtimer_reprogram(cpu_base, timer, expires); 870 } 871 872 static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, 873 unsigned int active) 874 { 875 struct hrtimer_clock_base *base; 876 unsigned int seq; 877 ktime_t expires; 878 879 /* 880 * Update the base offsets unconditionally so the following 881 * checks whether the SMP function call is required works. 882 * 883 * The update is safe even when the remote CPU is in the hrtimer 884 * interrupt or the hrtimer soft interrupt and expiring affected 885 * bases. Either it will see the update before handling a base or 886 * it will see it when it finishes the processing and reevaluates 887 * the next expiring timer. 888 */ 889 seq = cpu_base->clock_was_set_seq; 890 hrtimer_update_base(cpu_base); 891 892 /* 893 * If the sequence did not change over the update then the 894 * remote CPU already handled it. 895 */ 896 if (seq == cpu_base->clock_was_set_seq) 897 return false; 898 899 /* 900 * If the remote CPU is currently handling an hrtimer interrupt, it 901 * will reevaluate the first expiring timer of all clock bases 902 * before reprogramming. Nothing to do here. 903 */ 904 if (cpu_base->in_hrtirq) 905 return false; 906 907 /* 908 * Walk the affected clock bases and check whether the first expiring 909 * timer in a clock base is moving ahead of the first expiring timer of 910 * @cpu_base. If so, the IPI must be invoked because per CPU clock 911 * event devices cannot be remotely reprogrammed. 912 */ 913 active &= cpu_base->active_bases; 914 915 for_each_active_base(base, cpu_base, active) { 916 struct timerqueue_node *next; 917 918 next = timerqueue_getnext(&base->active); 919 expires = ktime_sub(next->expires, base->offset); 920 if (expires < cpu_base->expires_next) 921 return true; 922 923 /* Extra check for softirq clock bases */ 924 if (base->index < HRTIMER_BASE_MONOTONIC_SOFT) 925 continue; 926 if (cpu_base->softirq_activated) 927 continue; 928 if (expires < cpu_base->softirq_expires_next) 929 return true; 930 } 931 return false; 932 } 933 934 /* 935 * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and 936 * CLOCK_BOOTTIME (for late sleep time injection). 937 * 938 * This requires to update the offsets for these clocks 939 * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this 940 * also requires to eventually reprogram the per CPU clock event devices 941 * when the change moves an affected timer ahead of the first expiring 942 * timer on that CPU. Obviously remote per CPU clock event devices cannot 943 * be reprogrammed. The other reason why an IPI has to be sent is when the 944 * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets 945 * in the tick, which obviously might be stopped, so this has to bring out 946 * the remote CPU which might sleep in idle to get this sorted. 947 */ 948 void clock_was_set(unsigned int bases) 949 { 950 struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases); 951 cpumask_var_t mask; 952 int cpu; 953 954 if (!hrtimer_hres_active(cpu_base) && !tick_nohz_is_active()) 955 goto out_timerfd; 956 957 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 958 on_each_cpu(retrigger_next_event, NULL, 1); 959 goto out_timerfd; 960 } 961 962 /* Avoid interrupting CPUs if possible */ 963 cpus_read_lock(); 964 for_each_online_cpu(cpu) { 965 unsigned long flags; 966 967 cpu_base = &per_cpu(hrtimer_bases, cpu); 968 raw_spin_lock_irqsave(&cpu_base->lock, flags); 969 970 if (update_needs_ipi(cpu_base, bases)) 971 cpumask_set_cpu(cpu, mask); 972 973 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 974 } 975 976 preempt_disable(); 977 smp_call_function_many(mask, retrigger_next_event, NULL, 1); 978 preempt_enable(); 979 cpus_read_unlock(); 980 free_cpumask_var(mask); 981 982 out_timerfd: 983 timerfd_clock_was_set(); 984 } 985 986 static void clock_was_set_work(struct work_struct *work) 987 { 988 clock_was_set(CLOCK_SET_WALL); 989 } 990 991 static DECLARE_WORK(hrtimer_work, clock_was_set_work); 992 993 /* 994 * Called from timekeeping code to reprogram the hrtimer interrupt device 995 * on all cpus and to notify timerfd. 996 */ 997 void clock_was_set_delayed(void) 998 { 999 schedule_work(&hrtimer_work); 1000 } 1001 1002 /* 1003 * Called during resume either directly from via timekeeping_resume() 1004 * or in the case of s2idle from tick_unfreeze() to ensure that the 1005 * hrtimers are up to date. 1006 */ 1007 void hrtimers_resume_local(void) 1008 { 1009 lockdep_assert_irqs_disabled(); 1010 /* Retrigger on the local CPU */ 1011 retrigger_next_event(NULL); 1012 } 1013 1014 /* 1015 * Counterpart to lock_hrtimer_base above: 1016 */ 1017 static inline 1018 void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 1019 __releases(&timer->base->cpu_base->lock) 1020 { 1021 raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); 1022 } 1023 1024 /** 1025 * hrtimer_forward() - forward the timer expiry 1026 * @timer: hrtimer to forward 1027 * @now: forward past this time 1028 * @interval: the interval to forward 1029 * 1030 * Forward the timer expiry so it will expire in the future. 1031 * 1032 * .. note:: 1033 * This only updates the timer expiry value and does not requeue the timer. 1034 * 1035 * There is also a variant of the function hrtimer_forward_now(). 1036 * 1037 * Context: Can be safely called from the callback function of @timer. If called 1038 * from other contexts @timer must neither be enqueued nor running the 1039 * callback and the caller needs to take care of serialization. 1040 * 1041 * Return: The number of overruns are returned. 1042 */ 1043 u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) 1044 { 1045 u64 orun = 1; 1046 ktime_t delta; 1047 1048 delta = ktime_sub(now, hrtimer_get_expires(timer)); 1049 1050 if (delta < 0) 1051 return 0; 1052 1053 if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED)) 1054 return 0; 1055 1056 if (interval < hrtimer_resolution) 1057 interval = hrtimer_resolution; 1058 1059 if (unlikely(delta >= interval)) { 1060 s64 incr = ktime_to_ns(interval); 1061 1062 orun = ktime_divns(delta, incr); 1063 hrtimer_add_expires_ns(timer, incr * orun); 1064 if (hrtimer_get_expires(timer) > now) 1065 return orun; 1066 /* 1067 * This (and the ktime_add() below) is the 1068 * correction for exact: 1069 */ 1070 orun++; 1071 } 1072 hrtimer_add_expires(timer, interval); 1073 1074 return orun; 1075 } 1076 EXPORT_SYMBOL_GPL(hrtimer_forward); 1077 1078 /* 1079 * enqueue_hrtimer - internal function to (re)start a timer 1080 * 1081 * The timer is inserted in expiry order. Insertion into the 1082 * red black tree is O(log(n)). Must hold the base lock. 1083 * 1084 * Returns true when the new timer is the leftmost timer in the tree. 1085 */ 1086 static bool enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, 1087 enum hrtimer_mode mode) 1088 { 1089 debug_activate(timer, mode); 1090 WARN_ON_ONCE(!base->cpu_base->online); 1091 1092 base->cpu_base->active_bases |= 1 << base->index; 1093 1094 /* Pairs with the lockless read in hrtimer_is_queued() */ 1095 WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED); 1096 1097 return timerqueue_add(&base->active, &timer->node); 1098 } 1099 1100 /* 1101 * __remove_hrtimer - internal function to remove a timer 1102 * 1103 * Caller must hold the base lock. 1104 * 1105 * High resolution timer mode reprograms the clock event device when the 1106 * timer is the one which expires next. The caller can disable this by setting 1107 * reprogram to zero. This is useful, when the context does a reprogramming 1108 * anyway (e.g. timer interrupt) 1109 */ 1110 static void __remove_hrtimer(struct hrtimer *timer, 1111 struct hrtimer_clock_base *base, 1112 u8 newstate, int reprogram) 1113 { 1114 struct hrtimer_cpu_base *cpu_base = base->cpu_base; 1115 u8 state = timer->state; 1116 1117 /* Pairs with the lockless read in hrtimer_is_queued() */ 1118 WRITE_ONCE(timer->state, newstate); 1119 if (!(state & HRTIMER_STATE_ENQUEUED)) 1120 return; 1121 1122 if (!timerqueue_del(&base->active, &timer->node)) 1123 cpu_base->active_bases &= ~(1 << base->index); 1124 1125 /* 1126 * Note: If reprogram is false we do not update 1127 * cpu_base->next_timer. This happens when we remove the first 1128 * timer on a remote cpu. No harm as we never dereference 1129 * cpu_base->next_timer. So the worst thing what can happen is 1130 * an superfluous call to hrtimer_force_reprogram() on the 1131 * remote cpu later on if the same timer gets enqueued again. 1132 */ 1133 if (reprogram && timer == cpu_base->next_timer) 1134 hrtimer_force_reprogram(cpu_base, 1); 1135 } 1136 1137 /* 1138 * remove hrtimer, called with base lock held 1139 */ 1140 static inline int 1141 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, 1142 bool restart, bool keep_local) 1143 { 1144 u8 state = timer->state; 1145 1146 if (state & HRTIMER_STATE_ENQUEUED) { 1147 bool reprogram; 1148 1149 /* 1150 * Remove the timer and force reprogramming when high 1151 * resolution mode is active and the timer is on the current 1152 * CPU. If we remove a timer on another CPU, reprogramming is 1153 * skipped. The interrupt event on this CPU is fired and 1154 * reprogramming happens in the interrupt handler. This is a 1155 * rare case and less expensive than a smp call. 1156 */ 1157 debug_deactivate(timer); 1158 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); 1159 1160 /* 1161 * If the timer is not restarted then reprogramming is 1162 * required if the timer is local. If it is local and about 1163 * to be restarted, avoid programming it twice (on removal 1164 * and a moment later when it's requeued). 1165 */ 1166 if (!restart) 1167 state = HRTIMER_STATE_INACTIVE; 1168 else 1169 reprogram &= !keep_local; 1170 1171 __remove_hrtimer(timer, base, state, reprogram); 1172 return 1; 1173 } 1174 return 0; 1175 } 1176 1177 static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, 1178 const enum hrtimer_mode mode) 1179 { 1180 #ifdef CONFIG_TIME_LOW_RES 1181 /* 1182 * CONFIG_TIME_LOW_RES indicates that the system has no way to return 1183 * granular time values. For relative timers we add hrtimer_resolution 1184 * (i.e. one jiffy) to prevent short timeouts. 1185 */ 1186 timer->is_rel = mode & HRTIMER_MODE_REL; 1187 if (timer->is_rel) 1188 tim = ktime_add_safe(tim, hrtimer_resolution); 1189 #endif 1190 return tim; 1191 } 1192 1193 static void 1194 hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) 1195 { 1196 ktime_t expires; 1197 1198 /* 1199 * Find the next SOFT expiration. 1200 */ 1201 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); 1202 1203 /* 1204 * reprogramming needs to be triggered, even if the next soft 1205 * hrtimer expires at the same time than the next hard 1206 * hrtimer. cpu_base->softirq_expires_next needs to be updated! 1207 */ 1208 if (expires == KTIME_MAX) 1209 return; 1210 1211 /* 1212 * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() 1213 * cpu_base->*expires_next is only set by hrtimer_reprogram() 1214 */ 1215 hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); 1216 } 1217 1218 static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 1219 u64 delta_ns, const enum hrtimer_mode mode, 1220 struct hrtimer_clock_base *base) 1221 { 1222 struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases); 1223 struct hrtimer_clock_base *new_base; 1224 bool force_local, first; 1225 1226 /* 1227 * If the timer is on the local cpu base and is the first expiring 1228 * timer then this might end up reprogramming the hardware twice 1229 * (on removal and on enqueue). To avoid that by prevent the 1230 * reprogram on removal, keep the timer local to the current CPU 1231 * and enforce reprogramming after it is queued no matter whether 1232 * it is the new first expiring timer again or not. 1233 */ 1234 force_local = base->cpu_base == this_cpu_base; 1235 force_local &= base->cpu_base->next_timer == timer; 1236 1237 /* 1238 * Don't force local queuing if this enqueue happens on a unplugged 1239 * CPU after hrtimer_cpu_dying() has been invoked. 1240 */ 1241 force_local &= this_cpu_base->online; 1242 1243 /* 1244 * Remove an active timer from the queue. In case it is not queued 1245 * on the current CPU, make sure that remove_hrtimer() updates the 1246 * remote data correctly. 1247 * 1248 * If it's on the current CPU and the first expiring timer, then 1249 * skip reprogramming, keep the timer local and enforce 1250 * reprogramming later if it was the first expiring timer. This 1251 * avoids programming the underlying clock event twice (once at 1252 * removal and once after enqueue). 1253 */ 1254 remove_hrtimer(timer, base, true, force_local); 1255 1256 if (mode & HRTIMER_MODE_REL) 1257 tim = ktime_add_safe(tim, __hrtimer_cb_get_time(base->clockid)); 1258 1259 tim = hrtimer_update_lowres(timer, tim, mode); 1260 1261 hrtimer_set_expires_range_ns(timer, tim, delta_ns); 1262 1263 /* Switch the timer base, if necessary: */ 1264 if (!force_local) { 1265 new_base = switch_hrtimer_base(timer, base, 1266 mode & HRTIMER_MODE_PINNED); 1267 } else { 1268 new_base = base; 1269 } 1270 1271 first = enqueue_hrtimer(timer, new_base, mode); 1272 if (!force_local) { 1273 /* 1274 * If the current CPU base is online, then the timer is 1275 * never queued on a remote CPU if it would be the first 1276 * expiring timer there. 1277 */ 1278 if (hrtimer_base_is_online(this_cpu_base)) 1279 return first; 1280 1281 /* 1282 * Timer was enqueued remote because the current base is 1283 * already offline. If the timer is the first to expire, 1284 * kick the remote CPU to reprogram the clock event. 1285 */ 1286 if (first) { 1287 struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base; 1288 1289 smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd); 1290 } 1291 return 0; 1292 } 1293 1294 /* 1295 * Timer was forced to stay on the current CPU to avoid 1296 * reprogramming on removal and enqueue. Force reprogram the 1297 * hardware by evaluating the new first expiring timer. 1298 */ 1299 hrtimer_force_reprogram(new_base->cpu_base, 1); 1300 return 0; 1301 } 1302 1303 /** 1304 * hrtimer_start_range_ns - (re)start an hrtimer 1305 * @timer: the timer to be added 1306 * @tim: expiry time 1307 * @delta_ns: "slack" range for the timer 1308 * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or 1309 * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); 1310 * softirq based mode is considered for debug purpose only! 1311 */ 1312 void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 1313 u64 delta_ns, const enum hrtimer_mode mode) 1314 { 1315 struct hrtimer_clock_base *base; 1316 unsigned long flags; 1317 1318 /* 1319 * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft 1320 * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard 1321 * expiry mode because unmarked timers are moved to softirq expiry. 1322 */ 1323 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) 1324 WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); 1325 else 1326 WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard); 1327 1328 base = lock_hrtimer_base(timer, &flags); 1329 1330 if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) 1331 hrtimer_reprogram(timer, true); 1332 1333 unlock_hrtimer_base(timer, &flags); 1334 } 1335 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); 1336 1337 /** 1338 * hrtimer_try_to_cancel - try to deactivate a timer 1339 * @timer: hrtimer to stop 1340 * 1341 * Returns: 1342 * 1343 * * 0 when the timer was not active 1344 * * 1 when the timer was active 1345 * * -1 when the timer is currently executing the callback function and 1346 * cannot be stopped 1347 */ 1348 int hrtimer_try_to_cancel(struct hrtimer *timer) 1349 { 1350 struct hrtimer_clock_base *base; 1351 unsigned long flags; 1352 int ret = -1; 1353 1354 /* 1355 * Check lockless first. If the timer is not active (neither 1356 * enqueued nor running the callback, nothing to do here. The 1357 * base lock does not serialize against a concurrent enqueue, 1358 * so we can avoid taking it. 1359 */ 1360 if (!hrtimer_active(timer)) 1361 return 0; 1362 1363 base = lock_hrtimer_base(timer, &flags); 1364 1365 if (!hrtimer_callback_running(timer)) 1366 ret = remove_hrtimer(timer, base, false, false); 1367 1368 unlock_hrtimer_base(timer, &flags); 1369 1370 return ret; 1371 1372 } 1373 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); 1374 1375 #ifdef CONFIG_PREEMPT_RT 1376 static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) 1377 { 1378 spin_lock_init(&base->softirq_expiry_lock); 1379 } 1380 1381 static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) 1382 __acquires(&base->softirq_expiry_lock) 1383 { 1384 spin_lock(&base->softirq_expiry_lock); 1385 } 1386 1387 static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) 1388 __releases(&base->softirq_expiry_lock) 1389 { 1390 spin_unlock(&base->softirq_expiry_lock); 1391 } 1392 1393 /* 1394 * The counterpart to hrtimer_cancel_wait_running(). 1395 * 1396 * If there is a waiter for cpu_base->expiry_lock, then it was waiting for 1397 * the timer callback to finish. Drop expiry_lock and reacquire it. That 1398 * allows the waiter to acquire the lock and make progress. 1399 */ 1400 static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, 1401 unsigned long flags) 1402 { 1403 if (atomic_read(&cpu_base->timer_waiters)) { 1404 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1405 spin_unlock(&cpu_base->softirq_expiry_lock); 1406 spin_lock(&cpu_base->softirq_expiry_lock); 1407 raw_spin_lock_irq(&cpu_base->lock); 1408 } 1409 } 1410 1411 #ifdef CONFIG_SMP 1412 static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) 1413 { 1414 return base == &migration_base; 1415 } 1416 #else 1417 static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) 1418 { 1419 return false; 1420 } 1421 #endif 1422 1423 /* 1424 * This function is called on PREEMPT_RT kernels when the fast path 1425 * deletion of a timer failed because the timer callback function was 1426 * running. 1427 * 1428 * This prevents priority inversion: if the soft irq thread is preempted 1429 * in the middle of a timer callback, then calling hrtimer_cancel() can 1430 * lead to two issues: 1431 * 1432 * - If the caller is on a remote CPU then it has to spin wait for the timer 1433 * handler to complete. This can result in unbound priority inversion. 1434 * 1435 * - If the caller originates from the task which preempted the timer 1436 * handler on the same CPU, then spin waiting for the timer handler to 1437 * complete is never going to end. 1438 */ 1439 void hrtimer_cancel_wait_running(const struct hrtimer *timer) 1440 { 1441 /* Lockless read. Prevent the compiler from reloading it below */ 1442 struct hrtimer_clock_base *base = READ_ONCE(timer->base); 1443 1444 /* 1445 * Just relax if the timer expires in hard interrupt context or if 1446 * it is currently on the migration base. 1447 */ 1448 if (!timer->is_soft || is_migration_base(base)) { 1449 cpu_relax(); 1450 return; 1451 } 1452 1453 /* 1454 * Mark the base as contended and grab the expiry lock, which is 1455 * held by the softirq across the timer callback. Drop the lock 1456 * immediately so the softirq can expire the next timer. In theory 1457 * the timer could already be running again, but that's more than 1458 * unlikely and just causes another wait loop. 1459 */ 1460 atomic_inc(&base->cpu_base->timer_waiters); 1461 spin_lock_bh(&base->cpu_base->softirq_expiry_lock); 1462 atomic_dec(&base->cpu_base->timer_waiters); 1463 spin_unlock_bh(&base->cpu_base->softirq_expiry_lock); 1464 } 1465 #else 1466 static inline void 1467 hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { } 1468 static inline void 1469 hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { } 1470 static inline void 1471 hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { } 1472 static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, 1473 unsigned long flags) { } 1474 #endif 1475 1476 /** 1477 * hrtimer_cancel - cancel a timer and wait for the handler to finish. 1478 * @timer: the timer to be cancelled 1479 * 1480 * Returns: 1481 * 0 when the timer was not active 1482 * 1 when the timer was active 1483 */ 1484 int hrtimer_cancel(struct hrtimer *timer) 1485 { 1486 int ret; 1487 1488 do { 1489 ret = hrtimer_try_to_cancel(timer); 1490 1491 if (ret < 0) 1492 hrtimer_cancel_wait_running(timer); 1493 } while (ret < 0); 1494 return ret; 1495 } 1496 EXPORT_SYMBOL_GPL(hrtimer_cancel); 1497 1498 /** 1499 * __hrtimer_get_remaining - get remaining time for the timer 1500 * @timer: the timer to read 1501 * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y 1502 */ 1503 ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust) 1504 { 1505 unsigned long flags; 1506 ktime_t rem; 1507 1508 lock_hrtimer_base(timer, &flags); 1509 if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust) 1510 rem = hrtimer_expires_remaining_adjusted(timer); 1511 else 1512 rem = hrtimer_expires_remaining(timer); 1513 unlock_hrtimer_base(timer, &flags); 1514 1515 return rem; 1516 } 1517 EXPORT_SYMBOL_GPL(__hrtimer_get_remaining); 1518 1519 #ifdef CONFIG_NO_HZ_COMMON 1520 /** 1521 * hrtimer_get_next_event - get the time until next expiry event 1522 * 1523 * Returns the next expiry time or KTIME_MAX if no timer is pending. 1524 */ 1525 u64 hrtimer_get_next_event(void) 1526 { 1527 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1528 u64 expires = KTIME_MAX; 1529 unsigned long flags; 1530 1531 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1532 1533 if (!hrtimer_hres_active(cpu_base)) 1534 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); 1535 1536 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1537 1538 return expires; 1539 } 1540 1541 /** 1542 * hrtimer_next_event_without - time until next expiry event w/o one timer 1543 * @exclude: timer to exclude 1544 * 1545 * Returns the next expiry time over all timers except for the @exclude one or 1546 * KTIME_MAX if none of them is pending. 1547 */ 1548 u64 hrtimer_next_event_without(const struct hrtimer *exclude) 1549 { 1550 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1551 u64 expires = KTIME_MAX; 1552 unsigned long flags; 1553 1554 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1555 1556 if (hrtimer_hres_active(cpu_base)) { 1557 unsigned int active; 1558 1559 if (!cpu_base->softirq_activated) { 1560 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; 1561 expires = __hrtimer_next_event_base(cpu_base, exclude, 1562 active, KTIME_MAX); 1563 } 1564 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; 1565 expires = __hrtimer_next_event_base(cpu_base, exclude, active, 1566 expires); 1567 } 1568 1569 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1570 1571 return expires; 1572 } 1573 #endif 1574 1575 static inline int hrtimer_clockid_to_base(clockid_t clock_id) 1576 { 1577 switch (clock_id) { 1578 case CLOCK_MONOTONIC: 1579 return HRTIMER_BASE_MONOTONIC; 1580 case CLOCK_REALTIME: 1581 return HRTIMER_BASE_REALTIME; 1582 case CLOCK_BOOTTIME: 1583 return HRTIMER_BASE_BOOTTIME; 1584 case CLOCK_TAI: 1585 return HRTIMER_BASE_TAI; 1586 default: 1587 WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); 1588 return HRTIMER_BASE_MONOTONIC; 1589 } 1590 } 1591 1592 static ktime_t __hrtimer_cb_get_time(clockid_t clock_id) 1593 { 1594 switch (clock_id) { 1595 case CLOCK_MONOTONIC: 1596 return ktime_get(); 1597 case CLOCK_REALTIME: 1598 return ktime_get_real(); 1599 case CLOCK_BOOTTIME: 1600 return ktime_get_boottime(); 1601 case CLOCK_TAI: 1602 return ktime_get_clocktai(); 1603 default: 1604 WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); 1605 return ktime_get(); 1606 } 1607 } 1608 1609 ktime_t hrtimer_cb_get_time(const struct hrtimer *timer) 1610 { 1611 return __hrtimer_cb_get_time(timer->base->clockid); 1612 } 1613 EXPORT_SYMBOL_GPL(hrtimer_cb_get_time); 1614 1615 static void __hrtimer_setup(struct hrtimer *timer, 1616 enum hrtimer_restart (*function)(struct hrtimer *), 1617 clockid_t clock_id, enum hrtimer_mode mode) 1618 { 1619 bool softtimer = !!(mode & HRTIMER_MODE_SOFT); 1620 struct hrtimer_cpu_base *cpu_base; 1621 int base; 1622 1623 /* 1624 * On PREEMPT_RT enabled kernels hrtimers which are not explicitly 1625 * marked for hard interrupt expiry mode are moved into soft 1626 * interrupt context for latency reasons and because the callbacks 1627 * can invoke functions which might sleep on RT, e.g. spin_lock(). 1628 */ 1629 if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD)) 1630 softtimer = true; 1631 1632 memset(timer, 0, sizeof(struct hrtimer)); 1633 1634 cpu_base = raw_cpu_ptr(&hrtimer_bases); 1635 1636 /* 1637 * POSIX magic: Relative CLOCK_REALTIME timers are not affected by 1638 * clock modifications, so they needs to become CLOCK_MONOTONIC to 1639 * ensure POSIX compliance. 1640 */ 1641 if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) 1642 clock_id = CLOCK_MONOTONIC; 1643 1644 base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; 1645 base += hrtimer_clockid_to_base(clock_id); 1646 timer->is_soft = softtimer; 1647 timer->is_hard = !!(mode & HRTIMER_MODE_HARD); 1648 timer->base = &cpu_base->clock_base[base]; 1649 timerqueue_init(&timer->node); 1650 1651 if (WARN_ON_ONCE(!function)) 1652 ACCESS_PRIVATE(timer, function) = hrtimer_dummy_timeout; 1653 else 1654 ACCESS_PRIVATE(timer, function) = function; 1655 } 1656 1657 /** 1658 * hrtimer_setup - initialize a timer to the given clock 1659 * @timer: the timer to be initialized 1660 * @function: the callback function 1661 * @clock_id: the clock to be used 1662 * @mode: The modes which are relevant for initialization: 1663 * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, 1664 * HRTIMER_MODE_REL_SOFT 1665 * 1666 * The PINNED variants of the above can be handed in, 1667 * but the PINNED bit is ignored as pinning happens 1668 * when the hrtimer is started 1669 */ 1670 void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), 1671 clockid_t clock_id, enum hrtimer_mode mode) 1672 { 1673 debug_setup(timer, clock_id, mode); 1674 __hrtimer_setup(timer, function, clock_id, mode); 1675 } 1676 EXPORT_SYMBOL_GPL(hrtimer_setup); 1677 1678 /** 1679 * hrtimer_setup_on_stack - initialize a timer on stack memory 1680 * @timer: The timer to be initialized 1681 * @function: the callback function 1682 * @clock_id: The clock to be used 1683 * @mode: The timer mode 1684 * 1685 * Similar to hrtimer_setup(), except that this one must be used if struct hrtimer is in stack 1686 * memory. 1687 */ 1688 void hrtimer_setup_on_stack(struct hrtimer *timer, 1689 enum hrtimer_restart (*function)(struct hrtimer *), 1690 clockid_t clock_id, enum hrtimer_mode mode) 1691 { 1692 debug_setup_on_stack(timer, clock_id, mode); 1693 __hrtimer_setup(timer, function, clock_id, mode); 1694 } 1695 EXPORT_SYMBOL_GPL(hrtimer_setup_on_stack); 1696 1697 /* 1698 * A timer is active, when it is enqueued into the rbtree or the 1699 * callback function is running or it's in the state of being migrated 1700 * to another cpu. 1701 * 1702 * It is important for this function to not return a false negative. 1703 */ 1704 bool hrtimer_active(const struct hrtimer *timer) 1705 { 1706 struct hrtimer_clock_base *base; 1707 unsigned int seq; 1708 1709 do { 1710 base = READ_ONCE(timer->base); 1711 seq = raw_read_seqcount_begin(&base->seq); 1712 1713 if (timer->state != HRTIMER_STATE_INACTIVE || 1714 base->running == timer) 1715 return true; 1716 1717 } while (read_seqcount_retry(&base->seq, seq) || 1718 base != READ_ONCE(timer->base)); 1719 1720 return false; 1721 } 1722 EXPORT_SYMBOL_GPL(hrtimer_active); 1723 1724 /* 1725 * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3 1726 * distinct sections: 1727 * 1728 * - queued: the timer is queued 1729 * - callback: the timer is being ran 1730 * - post: the timer is inactive or (re)queued 1731 * 1732 * On the read side we ensure we observe timer->state and cpu_base->running 1733 * from the same section, if anything changed while we looked at it, we retry. 1734 * This includes timer->base changing because sequence numbers alone are 1735 * insufficient for that. 1736 * 1737 * The sequence numbers are required because otherwise we could still observe 1738 * a false negative if the read side got smeared over multiple consecutive 1739 * __run_hrtimer() invocations. 1740 */ 1741 1742 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, 1743 struct hrtimer_clock_base *base, 1744 struct hrtimer *timer, ktime_t *now, 1745 unsigned long flags) __must_hold(&cpu_base->lock) 1746 { 1747 enum hrtimer_restart (*fn)(struct hrtimer *); 1748 bool expires_in_hardirq; 1749 int restart; 1750 1751 lockdep_assert_held(&cpu_base->lock); 1752 1753 debug_hrtimer_deactivate(timer); 1754 base->running = timer; 1755 1756 /* 1757 * Separate the ->running assignment from the ->state assignment. 1758 * 1759 * As with a regular write barrier, this ensures the read side in 1760 * hrtimer_active() cannot observe base->running == NULL && 1761 * timer->state == INACTIVE. 1762 */ 1763 raw_write_seqcount_barrier(&base->seq); 1764 1765 __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); 1766 fn = ACCESS_PRIVATE(timer, function); 1767 1768 /* 1769 * Clear the 'is relative' flag for the TIME_LOW_RES case. If the 1770 * timer is restarted with a period then it becomes an absolute 1771 * timer. If its not restarted it does not matter. 1772 */ 1773 if (IS_ENABLED(CONFIG_TIME_LOW_RES)) 1774 timer->is_rel = false; 1775 1776 /* 1777 * The timer is marked as running in the CPU base, so it is 1778 * protected against migration to a different CPU even if the lock 1779 * is dropped. 1780 */ 1781 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1782 trace_hrtimer_expire_entry(timer, now); 1783 expires_in_hardirq = lockdep_hrtimer_enter(timer); 1784 1785 restart = fn(timer); 1786 1787 lockdep_hrtimer_exit(expires_in_hardirq); 1788 trace_hrtimer_expire_exit(timer); 1789 raw_spin_lock_irq(&cpu_base->lock); 1790 1791 /* 1792 * Note: We clear the running state after enqueue_hrtimer and 1793 * we do not reprogram the event hardware. Happens either in 1794 * hrtimer_start_range_ns() or in hrtimer_interrupt() 1795 * 1796 * Note: Because we dropped the cpu_base->lock above, 1797 * hrtimer_start_range_ns() can have popped in and enqueued the timer 1798 * for us already. 1799 */ 1800 if (restart != HRTIMER_NORESTART && 1801 !(timer->state & HRTIMER_STATE_ENQUEUED)) 1802 enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); 1803 1804 /* 1805 * Separate the ->running assignment from the ->state assignment. 1806 * 1807 * As with a regular write barrier, this ensures the read side in 1808 * hrtimer_active() cannot observe base->running.timer == NULL && 1809 * timer->state == INACTIVE. 1810 */ 1811 raw_write_seqcount_barrier(&base->seq); 1812 1813 WARN_ON_ONCE(base->running != timer); 1814 base->running = NULL; 1815 } 1816 1817 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, 1818 unsigned long flags, unsigned int active_mask) 1819 { 1820 struct hrtimer_clock_base *base; 1821 unsigned int active = cpu_base->active_bases & active_mask; 1822 1823 for_each_active_base(base, cpu_base, active) { 1824 struct timerqueue_node *node; 1825 ktime_t basenow; 1826 1827 basenow = ktime_add(now, base->offset); 1828 1829 while ((node = timerqueue_getnext(&base->active))) { 1830 struct hrtimer *timer; 1831 1832 timer = container_of(node, struct hrtimer, node); 1833 1834 /* 1835 * The immediate goal for using the softexpires is 1836 * minimizing wakeups, not running timers at the 1837 * earliest interrupt after their soft expiration. 1838 * This allows us to avoid using a Priority Search 1839 * Tree, which can answer a stabbing query for 1840 * overlapping intervals and instead use the simple 1841 * BST we already have. 1842 * We don't add extra wakeups by delaying timers that 1843 * are right-of a not yet expired timer, because that 1844 * timer will have to trigger a wakeup anyway. 1845 */ 1846 if (basenow < hrtimer_get_softexpires(timer)) 1847 break; 1848 1849 __run_hrtimer(cpu_base, base, timer, &basenow, flags); 1850 if (active_mask == HRTIMER_ACTIVE_SOFT) 1851 hrtimer_sync_wait_running(cpu_base, flags); 1852 } 1853 } 1854 } 1855 1856 static __latent_entropy void hrtimer_run_softirq(void) 1857 { 1858 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1859 unsigned long flags; 1860 ktime_t now; 1861 1862 hrtimer_cpu_base_lock_expiry(cpu_base); 1863 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1864 1865 now = hrtimer_update_base(cpu_base); 1866 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); 1867 1868 cpu_base->softirq_activated = 0; 1869 hrtimer_update_softirq_timer(cpu_base, true); 1870 1871 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1872 hrtimer_cpu_base_unlock_expiry(cpu_base); 1873 } 1874 1875 #ifdef CONFIG_HIGH_RES_TIMERS 1876 1877 /* 1878 * High resolution timer interrupt 1879 * Called with interrupts disabled 1880 */ 1881 void hrtimer_interrupt(struct clock_event_device *dev) 1882 { 1883 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1884 ktime_t expires_next, now, entry_time, delta; 1885 unsigned long flags; 1886 int retries = 0; 1887 1888 BUG_ON(!cpu_base->hres_active); 1889 cpu_base->nr_events++; 1890 dev->next_event = KTIME_MAX; 1891 1892 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1893 entry_time = now = hrtimer_update_base(cpu_base); 1894 retry: 1895 cpu_base->in_hrtirq = 1; 1896 /* 1897 * We set expires_next to KTIME_MAX here with cpu_base->lock 1898 * held to prevent that a timer is enqueued in our queue via 1899 * the migration code. This does not affect enqueueing of 1900 * timers which run their callback and need to be requeued on 1901 * this CPU. 1902 */ 1903 cpu_base->expires_next = KTIME_MAX; 1904 1905 if (!ktime_before(now, cpu_base->softirq_expires_next)) { 1906 cpu_base->softirq_expires_next = KTIME_MAX; 1907 cpu_base->softirq_activated = 1; 1908 raise_timer_softirq(HRTIMER_SOFTIRQ); 1909 } 1910 1911 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); 1912 1913 /* Reevaluate the clock bases for the [soft] next expiry */ 1914 expires_next = hrtimer_update_next_event(cpu_base); 1915 /* 1916 * Store the new expiry value so the migration code can verify 1917 * against it. 1918 */ 1919 cpu_base->expires_next = expires_next; 1920 cpu_base->in_hrtirq = 0; 1921 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1922 1923 /* Reprogramming necessary ? */ 1924 if (!tick_program_event(expires_next, 0)) { 1925 cpu_base->hang_detected = 0; 1926 return; 1927 } 1928 1929 /* 1930 * The next timer was already expired due to: 1931 * - tracing 1932 * - long lasting callbacks 1933 * - being scheduled away when running in a VM 1934 * 1935 * We need to prevent that we loop forever in the hrtimer 1936 * interrupt routine. We give it 3 attempts to avoid 1937 * overreacting on some spurious event. 1938 * 1939 * Acquire base lock for updating the offsets and retrieving 1940 * the current time. 1941 */ 1942 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1943 now = hrtimer_update_base(cpu_base); 1944 cpu_base->nr_retries++; 1945 if (++retries < 3) 1946 goto retry; 1947 /* 1948 * Give the system a chance to do something else than looping 1949 * here. We stored the entry time, so we know exactly how long 1950 * we spent here. We schedule the next event this amount of 1951 * time away. 1952 */ 1953 cpu_base->nr_hangs++; 1954 cpu_base->hang_detected = 1; 1955 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1956 1957 delta = ktime_sub(now, entry_time); 1958 if ((unsigned int)delta > cpu_base->max_hang_time) 1959 cpu_base->max_hang_time = (unsigned int) delta; 1960 /* 1961 * Limit it to a sensible value as we enforce a longer 1962 * delay. Give the CPU at least 100ms to catch up. 1963 */ 1964 if (delta > 100 * NSEC_PER_MSEC) 1965 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); 1966 else 1967 expires_next = ktime_add(now, delta); 1968 tick_program_event(expires_next, 1); 1969 pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); 1970 } 1971 #endif /* !CONFIG_HIGH_RES_TIMERS */ 1972 1973 /* 1974 * Called from run_local_timers in hardirq context every jiffy 1975 */ 1976 void hrtimer_run_queues(void) 1977 { 1978 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1979 unsigned long flags; 1980 ktime_t now; 1981 1982 if (hrtimer_hres_active(cpu_base)) 1983 return; 1984 1985 /* 1986 * This _is_ ugly: We have to check periodically, whether we 1987 * can switch to highres and / or nohz mode. The clocksource 1988 * switch happens with xtime_lock held. Notification from 1989 * there only sets the check bit in the tick_oneshot code, 1990 * otherwise we might deadlock vs. xtime_lock. 1991 */ 1992 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) { 1993 hrtimer_switch_to_hres(); 1994 return; 1995 } 1996 1997 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1998 now = hrtimer_update_base(cpu_base); 1999 2000 if (!ktime_before(now, cpu_base->softirq_expires_next)) { 2001 cpu_base->softirq_expires_next = KTIME_MAX; 2002 cpu_base->softirq_activated = 1; 2003 raise_timer_softirq(HRTIMER_SOFTIRQ); 2004 } 2005 2006 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); 2007 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 2008 } 2009 2010 /* 2011 * Sleep related functions: 2012 */ 2013 static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) 2014 { 2015 struct hrtimer_sleeper *t = 2016 container_of(timer, struct hrtimer_sleeper, timer); 2017 struct task_struct *task = t->task; 2018 2019 t->task = NULL; 2020 if (task) 2021 wake_up_process(task); 2022 2023 return HRTIMER_NORESTART; 2024 } 2025 2026 /** 2027 * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer 2028 * @sl: sleeper to be started 2029 * @mode: timer mode abs/rel 2030 * 2031 * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers 2032 * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context) 2033 */ 2034 void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, 2035 enum hrtimer_mode mode) 2036 { 2037 /* 2038 * Make the enqueue delivery mode check work on RT. If the sleeper 2039 * was initialized for hard interrupt delivery, force the mode bit. 2040 * This is a special case for hrtimer_sleepers because 2041 * __hrtimer_setup_sleeper() determines the delivery mode on RT so the 2042 * fiddling with this decision is avoided at the call sites. 2043 */ 2044 if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard) 2045 mode |= HRTIMER_MODE_HARD; 2046 2047 hrtimer_start_expires(&sl->timer, mode); 2048 } 2049 EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires); 2050 2051 static void __hrtimer_setup_sleeper(struct hrtimer_sleeper *sl, 2052 clockid_t clock_id, enum hrtimer_mode mode) 2053 { 2054 /* 2055 * On PREEMPT_RT enabled kernels hrtimers which are not explicitly 2056 * marked for hard interrupt expiry mode are moved into soft 2057 * interrupt context either for latency reasons or because the 2058 * hrtimer callback takes regular spinlocks or invokes other 2059 * functions which are not suitable for hard interrupt context on 2060 * PREEMPT_RT. 2061 * 2062 * The hrtimer_sleeper callback is RT compatible in hard interrupt 2063 * context, but there is a latency concern: Untrusted userspace can 2064 * spawn many threads which arm timers for the same expiry time on 2065 * the same CPU. That causes a latency spike due to the wakeup of 2066 * a gazillion threads. 2067 * 2068 * OTOH, privileged real-time user space applications rely on the 2069 * low latency of hard interrupt wakeups. If the current task is in 2070 * a real-time scheduling class, mark the mode for hard interrupt 2071 * expiry. 2072 */ 2073 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 2074 if (rt_or_dl_task_policy(current) && !(mode & HRTIMER_MODE_SOFT)) 2075 mode |= HRTIMER_MODE_HARD; 2076 } 2077 2078 __hrtimer_setup(&sl->timer, hrtimer_wakeup, clock_id, mode); 2079 sl->task = current; 2080 } 2081 2082 /** 2083 * hrtimer_setup_sleeper_on_stack - initialize a sleeper in stack memory 2084 * @sl: sleeper to be initialized 2085 * @clock_id: the clock to be used 2086 * @mode: timer mode abs/rel 2087 */ 2088 void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, 2089 clockid_t clock_id, enum hrtimer_mode mode) 2090 { 2091 debug_setup_on_stack(&sl->timer, clock_id, mode); 2092 __hrtimer_setup_sleeper(sl, clock_id, mode); 2093 } 2094 EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack); 2095 2096 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) 2097 { 2098 switch(restart->nanosleep.type) { 2099 #ifdef CONFIG_COMPAT_32BIT_TIME 2100 case TT_COMPAT: 2101 if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp)) 2102 return -EFAULT; 2103 break; 2104 #endif 2105 case TT_NATIVE: 2106 if (put_timespec64(ts, restart->nanosleep.rmtp)) 2107 return -EFAULT; 2108 break; 2109 default: 2110 BUG(); 2111 } 2112 return -ERESTART_RESTARTBLOCK; 2113 } 2114 2115 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) 2116 { 2117 struct restart_block *restart; 2118 2119 do { 2120 set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); 2121 hrtimer_sleeper_start_expires(t, mode); 2122 2123 if (likely(t->task)) 2124 schedule(); 2125 2126 hrtimer_cancel(&t->timer); 2127 mode = HRTIMER_MODE_ABS; 2128 2129 } while (t->task && !signal_pending(current)); 2130 2131 __set_current_state(TASK_RUNNING); 2132 2133 if (!t->task) 2134 return 0; 2135 2136 restart = ¤t->restart_block; 2137 if (restart->nanosleep.type != TT_NONE) { 2138 ktime_t rem = hrtimer_expires_remaining(&t->timer); 2139 struct timespec64 rmt; 2140 2141 if (rem <= 0) 2142 return 0; 2143 rmt = ktime_to_timespec64(rem); 2144 2145 return nanosleep_copyout(restart, &rmt); 2146 } 2147 return -ERESTART_RESTARTBLOCK; 2148 } 2149 2150 static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) 2151 { 2152 struct hrtimer_sleeper t; 2153 int ret; 2154 2155 hrtimer_setup_sleeper_on_stack(&t, restart->nanosleep.clockid, HRTIMER_MODE_ABS); 2156 hrtimer_set_expires(&t.timer, restart->nanosleep.expires); 2157 ret = do_nanosleep(&t, HRTIMER_MODE_ABS); 2158 destroy_hrtimer_on_stack(&t.timer); 2159 return ret; 2160 } 2161 2162 long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, 2163 const clockid_t clockid) 2164 { 2165 struct restart_block *restart; 2166 struct hrtimer_sleeper t; 2167 int ret = 0; 2168 2169 hrtimer_setup_sleeper_on_stack(&t, clockid, mode); 2170 hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns); 2171 ret = do_nanosleep(&t, mode); 2172 if (ret != -ERESTART_RESTARTBLOCK) 2173 goto out; 2174 2175 /* Absolute timers do not update the rmtp value and restart: */ 2176 if (mode == HRTIMER_MODE_ABS) { 2177 ret = -ERESTARTNOHAND; 2178 goto out; 2179 } 2180 2181 restart = ¤t->restart_block; 2182 restart->nanosleep.clockid = t.timer.base->clockid; 2183 restart->nanosleep.expires = hrtimer_get_expires(&t.timer); 2184 set_restart_fn(restart, hrtimer_nanosleep_restart); 2185 out: 2186 destroy_hrtimer_on_stack(&t.timer); 2187 return ret; 2188 } 2189 2190 #ifdef CONFIG_64BIT 2191 2192 SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, 2193 struct __kernel_timespec __user *, rmtp) 2194 { 2195 struct timespec64 tu; 2196 2197 if (get_timespec64(&tu, rqtp)) 2198 return -EFAULT; 2199 2200 if (!timespec64_valid(&tu)) 2201 return -EINVAL; 2202 2203 current->restart_block.fn = do_no_restart_syscall; 2204 current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; 2205 current->restart_block.nanosleep.rmtp = rmtp; 2206 return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, 2207 CLOCK_MONOTONIC); 2208 } 2209 2210 #endif 2211 2212 #ifdef CONFIG_COMPAT_32BIT_TIME 2213 2214 SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, 2215 struct old_timespec32 __user *, rmtp) 2216 { 2217 struct timespec64 tu; 2218 2219 if (get_old_timespec32(&tu, rqtp)) 2220 return -EFAULT; 2221 2222 if (!timespec64_valid(&tu)) 2223 return -EINVAL; 2224 2225 current->restart_block.fn = do_no_restart_syscall; 2226 current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; 2227 current->restart_block.nanosleep.compat_rmtp = rmtp; 2228 return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, 2229 CLOCK_MONOTONIC); 2230 } 2231 #endif 2232 2233 /* 2234 * Functions related to boot-time initialization: 2235 */ 2236 int hrtimers_prepare_cpu(unsigned int cpu) 2237 { 2238 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 2239 int i; 2240 2241 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 2242 struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i]; 2243 2244 clock_b->cpu_base = cpu_base; 2245 seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock); 2246 timerqueue_init_head(&clock_b->active); 2247 } 2248 2249 cpu_base->cpu = cpu; 2250 hrtimer_cpu_base_init_expiry_lock(cpu_base); 2251 return 0; 2252 } 2253 2254 int hrtimers_cpu_starting(unsigned int cpu) 2255 { 2256 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 2257 2258 /* Clear out any left over state from a CPU down operation */ 2259 cpu_base->active_bases = 0; 2260 cpu_base->hres_active = 0; 2261 cpu_base->hang_detected = 0; 2262 cpu_base->next_timer = NULL; 2263 cpu_base->softirq_next_timer = NULL; 2264 cpu_base->expires_next = KTIME_MAX; 2265 cpu_base->softirq_expires_next = KTIME_MAX; 2266 cpu_base->online = 1; 2267 return 0; 2268 } 2269 2270 #ifdef CONFIG_HOTPLUG_CPU 2271 2272 static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, 2273 struct hrtimer_clock_base *new_base) 2274 { 2275 struct hrtimer *timer; 2276 struct timerqueue_node *node; 2277 2278 while ((node = timerqueue_getnext(&old_base->active))) { 2279 timer = container_of(node, struct hrtimer, node); 2280 BUG_ON(hrtimer_callback_running(timer)); 2281 debug_deactivate(timer); 2282 2283 /* 2284 * Mark it as ENQUEUED not INACTIVE otherwise the 2285 * timer could be seen as !active and just vanish away 2286 * under us on another CPU 2287 */ 2288 __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); 2289 timer->base = new_base; 2290 /* 2291 * Enqueue the timers on the new cpu. This does not 2292 * reprogram the event device in case the timer 2293 * expires before the earliest on this CPU, but we run 2294 * hrtimer_interrupt after we migrated everything to 2295 * sort out already expired timers and reprogram the 2296 * event device. 2297 */ 2298 enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); 2299 } 2300 } 2301 2302 int hrtimers_cpu_dying(unsigned int dying_cpu) 2303 { 2304 int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER)); 2305 struct hrtimer_cpu_base *old_base, *new_base; 2306 2307 old_base = this_cpu_ptr(&hrtimer_bases); 2308 new_base = &per_cpu(hrtimer_bases, ncpu); 2309 2310 /* 2311 * The caller is globally serialized and nobody else 2312 * takes two locks at once, deadlock is not possible. 2313 */ 2314 raw_spin_lock(&old_base->lock); 2315 raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING); 2316 2317 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 2318 migrate_hrtimer_list(&old_base->clock_base[i], 2319 &new_base->clock_base[i]); 2320 } 2321 2322 /* Tell the other CPU to retrigger the next event */ 2323 smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); 2324 2325 raw_spin_unlock(&new_base->lock); 2326 old_base->online = 0; 2327 raw_spin_unlock(&old_base->lock); 2328 2329 return 0; 2330 } 2331 2332 #endif /* CONFIG_HOTPLUG_CPU */ 2333 2334 void __init hrtimers_init(void) 2335 { 2336 hrtimers_prepare_cpu(smp_processor_id()); 2337 hrtimers_cpu_starting(smp_processor_id()); 2338 open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); 2339 } 2340