1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 4 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 5 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner 6 * 7 * High-resolution kernel timers 8 * 9 * In contrast to the low-resolution timeout API, aka timer wheel, 10 * hrtimers provide finer resolution and accuracy depending on system 11 * configuration and capabilities. 12 * 13 * Started by: Thomas Gleixner and Ingo Molnar 14 * 15 * Credits: 16 * Based on the original timer wheel code 17 * 18 * Help, testing, suggestions, bugfixes, improvements were 19 * provided by: 20 * 21 * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel 22 * et. al. 23 */ 24 25 #include <linux/cpu.h> 26 #include <linux/export.h> 27 #include <linux/percpu.h> 28 #include <linux/hrtimer.h> 29 #include <linux/notifier.h> 30 #include <linux/syscalls.h> 31 #include <linux/interrupt.h> 32 #include <linux/tick.h> 33 #include <linux/err.h> 34 #include <linux/debugobjects.h> 35 #include <linux/sched/signal.h> 36 #include <linux/sched/sysctl.h> 37 #include <linux/sched/rt.h> 38 #include <linux/sched/deadline.h> 39 #include <linux/sched/nohz.h> 40 #include <linux/sched/debug.h> 41 #include <linux/sched/isolation.h> 42 #include <linux/timer.h> 43 #include <linux/freezer.h> 44 #include <linux/compat.h> 45 46 #include <linux/uaccess.h> 47 48 #include <trace/events/timer.h> 49 50 #include "tick-internal.h" 51 52 /* 53 * Masks for selecting the soft and hard context timers from 54 * cpu_base->active 55 */ 56 #define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT) 57 #define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1) 58 #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) 59 #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) 60 61 static void retrigger_next_event(void *arg); 62 static ktime_t __hrtimer_cb_get_time(clockid_t clock_id); 63 64 /* 65 * The timer bases: 66 * 67 * There are more clockids than hrtimer bases. Thus, we index 68 * into the timer bases by the hrtimer_base_type enum. When trying 69 * to reach a base using a clockid, hrtimer_clockid_to_base() 70 * is used to convert from clockid to the proper hrtimer_base_type. 71 */ 72 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 73 { 74 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), 75 .clock_base = 76 { 77 { 78 .index = HRTIMER_BASE_MONOTONIC, 79 .clockid = CLOCK_MONOTONIC, 80 }, 81 { 82 .index = HRTIMER_BASE_REALTIME, 83 .clockid = CLOCK_REALTIME, 84 }, 85 { 86 .index = HRTIMER_BASE_BOOTTIME, 87 .clockid = CLOCK_BOOTTIME, 88 }, 89 { 90 .index = HRTIMER_BASE_TAI, 91 .clockid = CLOCK_TAI, 92 }, 93 { 94 .index = HRTIMER_BASE_MONOTONIC_SOFT, 95 .clockid = CLOCK_MONOTONIC, 96 }, 97 { 98 .index = HRTIMER_BASE_REALTIME_SOFT, 99 .clockid = CLOCK_REALTIME, 100 }, 101 { 102 .index = HRTIMER_BASE_BOOTTIME_SOFT, 103 .clockid = CLOCK_BOOTTIME, 104 }, 105 { 106 .index = HRTIMER_BASE_TAI_SOFT, 107 .clockid = CLOCK_TAI, 108 }, 109 }, 110 .csd = CSD_INIT(retrigger_next_event, NULL) 111 }; 112 113 static inline bool hrtimer_base_is_online(struct hrtimer_cpu_base *base) 114 { 115 if (!IS_ENABLED(CONFIG_HOTPLUG_CPU)) 116 return true; 117 else 118 return likely(base->online); 119 } 120 121 /* 122 * Functions and macros which are different for UP/SMP systems are kept in a 123 * single place 124 */ 125 #ifdef CONFIG_SMP 126 127 /* 128 * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base() 129 * such that hrtimer_callback_running() can unconditionally dereference 130 * timer->base->cpu_base 131 */ 132 static struct hrtimer_cpu_base migration_cpu_base = { 133 .clock_base = { { 134 .cpu_base = &migration_cpu_base, 135 .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq, 136 &migration_cpu_base.lock), 137 }, }, 138 }; 139 140 #define migration_base migration_cpu_base.clock_base[0] 141 142 /* 143 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock 144 * means that all timers which are tied to this base via timer->base are 145 * locked, and the base itself is locked too. 146 * 147 * So __run_timers/migrate_timers can safely modify all timers which could 148 * be found on the lists/queues. 149 * 150 * When the timer's base is locked, and the timer removed from list, it is 151 * possible to set timer->base = &migration_base and drop the lock: the timer 152 * remains locked. 153 */ 154 static 155 struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, 156 unsigned long *flags) 157 __acquires(&timer->base->lock) 158 { 159 struct hrtimer_clock_base *base; 160 161 for (;;) { 162 base = READ_ONCE(timer->base); 163 if (likely(base != &migration_base)) { 164 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 165 if (likely(base == timer->base)) 166 return base; 167 /* The timer has migrated to another CPU: */ 168 raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags); 169 } 170 cpu_relax(); 171 } 172 } 173 174 /* 175 * Check if the elected target is suitable considering its next 176 * event and the hotplug state of the current CPU. 177 * 178 * If the elected target is remote and its next event is after the timer 179 * to queue, then a remote reprogram is necessary. However there is no 180 * guarantee the IPI handling the operation would arrive in time to meet 181 * the high resolution deadline. In this case the local CPU becomes a 182 * preferred target, unless it is offline. 183 * 184 * High and low resolution modes are handled the same way for simplicity. 185 * 186 * Called with cpu_base->lock of target cpu held. 187 */ 188 static bool hrtimer_suitable_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base, 189 struct hrtimer_cpu_base *new_cpu_base, 190 struct hrtimer_cpu_base *this_cpu_base) 191 { 192 ktime_t expires; 193 194 /* 195 * The local CPU clockevent can be reprogrammed. Also get_target_base() 196 * guarantees it is online. 197 */ 198 if (new_cpu_base == this_cpu_base) 199 return true; 200 201 /* 202 * The offline local CPU can't be the default target if the 203 * next remote target event is after this timer. Keep the 204 * elected new base. An IPI will be issued to reprogram 205 * it as a last resort. 206 */ 207 if (!hrtimer_base_is_online(this_cpu_base)) 208 return true; 209 210 expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); 211 212 return expires >= new_base->cpu_base->expires_next; 213 } 214 215 static inline struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, int pinned) 216 { 217 if (!hrtimer_base_is_online(base)) { 218 int cpu = cpumask_any_and(cpu_online_mask, housekeeping_cpumask(HK_TYPE_TIMER)); 219 220 return &per_cpu(hrtimer_bases, cpu); 221 } 222 223 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 224 if (static_branch_likely(&timers_migration_enabled) && !pinned) 225 return &per_cpu(hrtimer_bases, get_nohz_timer_target()); 226 #endif 227 return base; 228 } 229 230 /* 231 * We switch the timer base to a power-optimized selected CPU target, 232 * if: 233 * - NO_HZ_COMMON is enabled 234 * - timer migration is enabled 235 * - the timer callback is not running 236 * - the timer is not the first expiring timer on the new target 237 * 238 * If one of the above requirements is not fulfilled we move the timer 239 * to the current CPU or leave it on the previously assigned CPU if 240 * the timer callback is currently running. 241 */ 242 static inline struct hrtimer_clock_base * 243 switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, 244 int pinned) 245 { 246 struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base; 247 struct hrtimer_clock_base *new_base; 248 int basenum = base->index; 249 250 this_cpu_base = this_cpu_ptr(&hrtimer_bases); 251 new_cpu_base = get_target_base(this_cpu_base, pinned); 252 again: 253 new_base = &new_cpu_base->clock_base[basenum]; 254 255 if (base != new_base) { 256 /* 257 * We are trying to move timer to new_base. 258 * However we can't change timer's base while it is running, 259 * so we keep it on the same CPU. No hassle vs. reprogramming 260 * the event source in the high resolution case. The softirq 261 * code will take care of this when the timer function has 262 * completed. There is no conflict as we hold the lock until 263 * the timer is enqueued. 264 */ 265 if (unlikely(hrtimer_callback_running(timer))) 266 return base; 267 268 /* See the comment in lock_hrtimer_base() */ 269 WRITE_ONCE(timer->base, &migration_base); 270 raw_spin_unlock(&base->cpu_base->lock); 271 raw_spin_lock(&new_base->cpu_base->lock); 272 273 if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, 274 this_cpu_base)) { 275 raw_spin_unlock(&new_base->cpu_base->lock); 276 raw_spin_lock(&base->cpu_base->lock); 277 new_cpu_base = this_cpu_base; 278 WRITE_ONCE(timer->base, base); 279 goto again; 280 } 281 WRITE_ONCE(timer->base, new_base); 282 } else { 283 if (!hrtimer_suitable_target(timer, new_base, new_cpu_base, this_cpu_base)) { 284 new_cpu_base = this_cpu_base; 285 goto again; 286 } 287 } 288 return new_base; 289 } 290 291 #else /* CONFIG_SMP */ 292 293 static inline struct hrtimer_clock_base * 294 lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 295 __acquires(&timer->base->cpu_base->lock) 296 { 297 struct hrtimer_clock_base *base = timer->base; 298 299 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 300 301 return base; 302 } 303 304 # define switch_hrtimer_base(t, b, p) (b) 305 306 #endif /* !CONFIG_SMP */ 307 308 /* 309 * Functions for the union type storage format of ktime_t which are 310 * too large for inlining: 311 */ 312 #if BITS_PER_LONG < 64 313 /* 314 * Divide a ktime value by a nanosecond value 315 */ 316 s64 __ktime_divns(const ktime_t kt, s64 div) 317 { 318 int sft = 0; 319 s64 dclc; 320 u64 tmp; 321 322 dclc = ktime_to_ns(kt); 323 tmp = dclc < 0 ? -dclc : dclc; 324 325 /* Make sure the divisor is less than 2^32: */ 326 while (div >> 32) { 327 sft++; 328 div >>= 1; 329 } 330 tmp >>= sft; 331 do_div(tmp, (u32) div); 332 return dclc < 0 ? -tmp : tmp; 333 } 334 EXPORT_SYMBOL_GPL(__ktime_divns); 335 #endif /* BITS_PER_LONG >= 64 */ 336 337 /* 338 * Add two ktime values and do a safety check for overflow: 339 */ 340 ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) 341 { 342 ktime_t res = ktime_add_unsafe(lhs, rhs); 343 344 /* 345 * We use KTIME_SEC_MAX here, the maximum timeout which we can 346 * return to user space in a timespec: 347 */ 348 if (res < 0 || res < lhs || res < rhs) 349 res = ktime_set(KTIME_SEC_MAX, 0); 350 351 return res; 352 } 353 354 EXPORT_SYMBOL_GPL(ktime_add_safe); 355 356 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS 357 358 static const struct debug_obj_descr hrtimer_debug_descr; 359 360 static void *hrtimer_debug_hint(void *addr) 361 { 362 return ACCESS_PRIVATE((struct hrtimer *)addr, function); 363 } 364 365 /* 366 * fixup_init is called when: 367 * - an active object is initialized 368 */ 369 static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state) 370 { 371 struct hrtimer *timer = addr; 372 373 switch (state) { 374 case ODEBUG_STATE_ACTIVE: 375 hrtimer_cancel(timer); 376 debug_object_init(timer, &hrtimer_debug_descr); 377 return true; 378 default: 379 return false; 380 } 381 } 382 383 /* 384 * fixup_activate is called when: 385 * - an active object is activated 386 * - an unknown non-static object is activated 387 */ 388 static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state) 389 { 390 switch (state) { 391 case ODEBUG_STATE_ACTIVE: 392 WARN_ON(1); 393 fallthrough; 394 default: 395 return false; 396 } 397 } 398 399 /* 400 * fixup_free is called when: 401 * - an active object is freed 402 */ 403 static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state) 404 { 405 struct hrtimer *timer = addr; 406 407 switch (state) { 408 case ODEBUG_STATE_ACTIVE: 409 hrtimer_cancel(timer); 410 debug_object_free(timer, &hrtimer_debug_descr); 411 return true; 412 default: 413 return false; 414 } 415 } 416 417 static const struct debug_obj_descr hrtimer_debug_descr = { 418 .name = "hrtimer", 419 .debug_hint = hrtimer_debug_hint, 420 .fixup_init = hrtimer_fixup_init, 421 .fixup_activate = hrtimer_fixup_activate, 422 .fixup_free = hrtimer_fixup_free, 423 }; 424 425 static inline void debug_hrtimer_init(struct hrtimer *timer) 426 { 427 debug_object_init(timer, &hrtimer_debug_descr); 428 } 429 430 static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) 431 { 432 debug_object_init_on_stack(timer, &hrtimer_debug_descr); 433 } 434 435 static inline void debug_hrtimer_activate(struct hrtimer *timer, 436 enum hrtimer_mode mode) 437 { 438 debug_object_activate(timer, &hrtimer_debug_descr); 439 } 440 441 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) 442 { 443 debug_object_deactivate(timer, &hrtimer_debug_descr); 444 } 445 446 void destroy_hrtimer_on_stack(struct hrtimer *timer) 447 { 448 debug_object_free(timer, &hrtimer_debug_descr); 449 } 450 EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); 451 452 #else 453 454 static inline void debug_hrtimer_init(struct hrtimer *timer) { } 455 static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) { } 456 static inline void debug_hrtimer_activate(struct hrtimer *timer, 457 enum hrtimer_mode mode) { } 458 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } 459 #endif 460 461 static inline void debug_setup(struct hrtimer *timer, clockid_t clockid, enum hrtimer_mode mode) 462 { 463 debug_hrtimer_init(timer); 464 trace_hrtimer_setup(timer, clockid, mode); 465 } 466 467 static inline void debug_setup_on_stack(struct hrtimer *timer, clockid_t clockid, 468 enum hrtimer_mode mode) 469 { 470 debug_hrtimer_init_on_stack(timer); 471 trace_hrtimer_setup(timer, clockid, mode); 472 } 473 474 static inline void debug_activate(struct hrtimer *timer, 475 enum hrtimer_mode mode) 476 { 477 debug_hrtimer_activate(timer, mode); 478 trace_hrtimer_start(timer, mode); 479 } 480 481 static inline void debug_deactivate(struct hrtimer *timer) 482 { 483 debug_hrtimer_deactivate(timer); 484 trace_hrtimer_cancel(timer); 485 } 486 487 static struct hrtimer_clock_base * 488 __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) 489 { 490 unsigned int idx; 491 492 if (!*active) 493 return NULL; 494 495 idx = __ffs(*active); 496 *active &= ~(1U << idx); 497 498 return &cpu_base->clock_base[idx]; 499 } 500 501 #define for_each_active_base(base, cpu_base, active) \ 502 while ((base = __next_base((cpu_base), &(active)))) 503 504 static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, 505 const struct hrtimer *exclude, 506 unsigned int active, 507 ktime_t expires_next) 508 { 509 struct hrtimer_clock_base *base; 510 ktime_t expires; 511 512 for_each_active_base(base, cpu_base, active) { 513 struct timerqueue_node *next; 514 struct hrtimer *timer; 515 516 next = timerqueue_getnext(&base->active); 517 timer = container_of(next, struct hrtimer, node); 518 if (timer == exclude) { 519 /* Get to the next timer in the queue. */ 520 next = timerqueue_iterate_next(next); 521 if (!next) 522 continue; 523 524 timer = container_of(next, struct hrtimer, node); 525 } 526 expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 527 if (expires < expires_next) { 528 expires_next = expires; 529 530 /* Skip cpu_base update if a timer is being excluded. */ 531 if (exclude) 532 continue; 533 534 if (timer->is_soft) 535 cpu_base->softirq_next_timer = timer; 536 else 537 cpu_base->next_timer = timer; 538 } 539 } 540 /* 541 * clock_was_set() might have changed base->offset of any of 542 * the clock bases so the result might be negative. Fix it up 543 * to prevent a false positive in clockevents_program_event(). 544 */ 545 if (expires_next < 0) 546 expires_next = 0; 547 return expires_next; 548 } 549 550 /* 551 * Recomputes cpu_base::*next_timer and returns the earliest expires_next 552 * but does not set cpu_base::*expires_next, that is done by 553 * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating 554 * cpu_base::*expires_next right away, reprogramming logic would no longer 555 * work. 556 * 557 * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, 558 * those timers will get run whenever the softirq gets handled, at the end of 559 * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. 560 * 561 * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. 562 * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual 563 * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. 564 * 565 * @active_mask must be one of: 566 * - HRTIMER_ACTIVE_ALL, 567 * - HRTIMER_ACTIVE_SOFT, or 568 * - HRTIMER_ACTIVE_HARD. 569 */ 570 static ktime_t 571 __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) 572 { 573 unsigned int active; 574 struct hrtimer *next_timer = NULL; 575 ktime_t expires_next = KTIME_MAX; 576 577 if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { 578 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; 579 cpu_base->softirq_next_timer = NULL; 580 expires_next = __hrtimer_next_event_base(cpu_base, NULL, 581 active, KTIME_MAX); 582 583 next_timer = cpu_base->softirq_next_timer; 584 } 585 586 if (active_mask & HRTIMER_ACTIVE_HARD) { 587 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; 588 cpu_base->next_timer = next_timer; 589 expires_next = __hrtimer_next_event_base(cpu_base, NULL, active, 590 expires_next); 591 } 592 593 return expires_next; 594 } 595 596 static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base) 597 { 598 ktime_t expires_next, soft = KTIME_MAX; 599 600 /* 601 * If the soft interrupt has already been activated, ignore the 602 * soft bases. They will be handled in the already raised soft 603 * interrupt. 604 */ 605 if (!cpu_base->softirq_activated) { 606 soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); 607 /* 608 * Update the soft expiry time. clock_settime() might have 609 * affected it. 610 */ 611 cpu_base->softirq_expires_next = soft; 612 } 613 614 expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); 615 /* 616 * If a softirq timer is expiring first, update cpu_base->next_timer 617 * and program the hardware with the soft expiry time. 618 */ 619 if (expires_next > soft) { 620 cpu_base->next_timer = cpu_base->softirq_next_timer; 621 expires_next = soft; 622 } 623 624 return expires_next; 625 } 626 627 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) 628 { 629 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; 630 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; 631 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; 632 633 ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, 634 offs_real, offs_boot, offs_tai); 635 636 base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; 637 base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; 638 base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; 639 640 return now; 641 } 642 643 /* 644 * Is the high resolution mode active ? 645 */ 646 static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) 647 { 648 return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? 649 cpu_base->hres_active : 0; 650 } 651 652 static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, 653 struct hrtimer *next_timer, 654 ktime_t expires_next) 655 { 656 cpu_base->expires_next = expires_next; 657 658 /* 659 * If hres is not active, hardware does not have to be 660 * reprogrammed yet. 661 * 662 * If a hang was detected in the last timer interrupt then we 663 * leave the hang delay active in the hardware. We want the 664 * system to make progress. That also prevents the following 665 * scenario: 666 * T1 expires 50ms from now 667 * T2 expires 5s from now 668 * 669 * T1 is removed, so this code is called and would reprogram 670 * the hardware to 5s from now. Any hrtimer_start after that 671 * will not reprogram the hardware due to hang_detected being 672 * set. So we'd effectively block all timers until the T2 event 673 * fires. 674 */ 675 if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) 676 return; 677 678 tick_program_event(expires_next, 1); 679 } 680 681 /* 682 * Reprogram the event source with checking both queues for the 683 * next event 684 * Called with interrupts disabled and base->lock held 685 */ 686 static void 687 hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) 688 { 689 ktime_t expires_next; 690 691 expires_next = hrtimer_update_next_event(cpu_base); 692 693 if (skip_equal && expires_next == cpu_base->expires_next) 694 return; 695 696 __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next); 697 } 698 699 /* High resolution timer related functions */ 700 #ifdef CONFIG_HIGH_RES_TIMERS 701 702 /* 703 * High resolution timer enabled ? 704 */ 705 static bool hrtimer_hres_enabled __read_mostly = true; 706 unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; 707 EXPORT_SYMBOL_GPL(hrtimer_resolution); 708 709 /* 710 * Enable / Disable high resolution mode 711 */ 712 static int __init setup_hrtimer_hres(char *str) 713 { 714 return (kstrtobool(str, &hrtimer_hres_enabled) == 0); 715 } 716 717 __setup("highres=", setup_hrtimer_hres); 718 719 /* 720 * hrtimer_high_res_enabled - query, if the highres mode is enabled 721 */ 722 static inline int hrtimer_is_hres_enabled(void) 723 { 724 return hrtimer_hres_enabled; 725 } 726 727 /* 728 * Switch to high resolution mode 729 */ 730 static void hrtimer_switch_to_hres(void) 731 { 732 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 733 734 if (tick_init_highres()) { 735 pr_warn("Could not switch to high resolution mode on CPU %u\n", 736 base->cpu); 737 return; 738 } 739 base->hres_active = 1; 740 hrtimer_resolution = HIGH_RES_NSEC; 741 742 tick_setup_sched_timer(true); 743 /* "Retrigger" the interrupt to get things going */ 744 retrigger_next_event(NULL); 745 } 746 747 #else 748 749 static inline int hrtimer_is_hres_enabled(void) { return 0; } 750 static inline void hrtimer_switch_to_hres(void) { } 751 752 #endif /* CONFIG_HIGH_RES_TIMERS */ 753 /* 754 * Retrigger next event is called after clock was set with interrupts 755 * disabled through an SMP function call or directly from low level 756 * resume code. 757 * 758 * This is only invoked when: 759 * - CONFIG_HIGH_RES_TIMERS is enabled. 760 * - CONFIG_NOHZ_COMMON is enabled 761 * 762 * For the other cases this function is empty and because the call sites 763 * are optimized out it vanishes as well, i.e. no need for lots of 764 * #ifdeffery. 765 */ 766 static void retrigger_next_event(void *arg) 767 { 768 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 769 770 /* 771 * When high resolution mode or nohz is active, then the offsets of 772 * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the 773 * next tick will take care of that. 774 * 775 * If high resolution mode is active then the next expiring timer 776 * must be reevaluated and the clock event device reprogrammed if 777 * necessary. 778 * 779 * In the NOHZ case the update of the offset and the reevaluation 780 * of the next expiring timer is enough. The return from the SMP 781 * function call will take care of the reprogramming in case the 782 * CPU was in a NOHZ idle sleep. 783 * 784 * In periodic low resolution mode, the next softirq expiration 785 * must also be updated. 786 */ 787 raw_spin_lock(&base->lock); 788 hrtimer_update_base(base); 789 if (hrtimer_hres_active(base)) 790 hrtimer_force_reprogram(base, 0); 791 else 792 hrtimer_update_next_event(base); 793 raw_spin_unlock(&base->lock); 794 } 795 796 /* 797 * When a timer is enqueued and expires earlier than the already enqueued 798 * timers, we have to check, whether it expires earlier than the timer for 799 * which the clock event device was armed. 800 * 801 * Called with interrupts disabled and base->cpu_base.lock held 802 */ 803 static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) 804 { 805 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 806 struct hrtimer_clock_base *base = timer->base; 807 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 808 809 WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); 810 811 /* 812 * CLOCK_REALTIME timer might be requested with an absolute 813 * expiry time which is less than base->offset. Set it to 0. 814 */ 815 if (expires < 0) 816 expires = 0; 817 818 if (timer->is_soft) { 819 /* 820 * soft hrtimer could be started on a remote CPU. In this 821 * case softirq_expires_next needs to be updated on the 822 * remote CPU. The soft hrtimer will not expire before the 823 * first hard hrtimer on the remote CPU - 824 * hrtimer_check_target() prevents this case. 825 */ 826 struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base; 827 828 if (timer_cpu_base->softirq_activated) 829 return; 830 831 if (!ktime_before(expires, timer_cpu_base->softirq_expires_next)) 832 return; 833 834 timer_cpu_base->softirq_next_timer = timer; 835 timer_cpu_base->softirq_expires_next = expires; 836 837 if (!ktime_before(expires, timer_cpu_base->expires_next) || 838 !reprogram) 839 return; 840 } 841 842 /* 843 * If the timer is not on the current cpu, we cannot reprogram 844 * the other cpus clock event device. 845 */ 846 if (base->cpu_base != cpu_base) 847 return; 848 849 if (expires >= cpu_base->expires_next) 850 return; 851 852 /* 853 * If the hrtimer interrupt is running, then it will reevaluate the 854 * clock bases and reprogram the clock event device. 855 */ 856 if (cpu_base->in_hrtirq) 857 return; 858 859 cpu_base->next_timer = timer; 860 861 __hrtimer_reprogram(cpu_base, timer, expires); 862 } 863 864 static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, 865 unsigned int active) 866 { 867 struct hrtimer_clock_base *base; 868 unsigned int seq; 869 ktime_t expires; 870 871 /* 872 * Update the base offsets unconditionally so the following 873 * checks whether the SMP function call is required works. 874 * 875 * The update is safe even when the remote CPU is in the hrtimer 876 * interrupt or the hrtimer soft interrupt and expiring affected 877 * bases. Either it will see the update before handling a base or 878 * it will see it when it finishes the processing and reevaluates 879 * the next expiring timer. 880 */ 881 seq = cpu_base->clock_was_set_seq; 882 hrtimer_update_base(cpu_base); 883 884 /* 885 * If the sequence did not change over the update then the 886 * remote CPU already handled it. 887 */ 888 if (seq == cpu_base->clock_was_set_seq) 889 return false; 890 891 /* 892 * If the remote CPU is currently handling an hrtimer interrupt, it 893 * will reevaluate the first expiring timer of all clock bases 894 * before reprogramming. Nothing to do here. 895 */ 896 if (cpu_base->in_hrtirq) 897 return false; 898 899 /* 900 * Walk the affected clock bases and check whether the first expiring 901 * timer in a clock base is moving ahead of the first expiring timer of 902 * @cpu_base. If so, the IPI must be invoked because per CPU clock 903 * event devices cannot be remotely reprogrammed. 904 */ 905 active &= cpu_base->active_bases; 906 907 for_each_active_base(base, cpu_base, active) { 908 struct timerqueue_node *next; 909 910 next = timerqueue_getnext(&base->active); 911 expires = ktime_sub(next->expires, base->offset); 912 if (expires < cpu_base->expires_next) 913 return true; 914 915 /* Extra check for softirq clock bases */ 916 if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT) 917 continue; 918 if (cpu_base->softirq_activated) 919 continue; 920 if (expires < cpu_base->softirq_expires_next) 921 return true; 922 } 923 return false; 924 } 925 926 /* 927 * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and 928 * CLOCK_BOOTTIME (for late sleep time injection). 929 * 930 * This requires to update the offsets for these clocks 931 * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this 932 * also requires to eventually reprogram the per CPU clock event devices 933 * when the change moves an affected timer ahead of the first expiring 934 * timer on that CPU. Obviously remote per CPU clock event devices cannot 935 * be reprogrammed. The other reason why an IPI has to be sent is when the 936 * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets 937 * in the tick, which obviously might be stopped, so this has to bring out 938 * the remote CPU which might sleep in idle to get this sorted. 939 */ 940 void clock_was_set(unsigned int bases) 941 { 942 struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases); 943 cpumask_var_t mask; 944 int cpu; 945 946 if (!hrtimer_hres_active(cpu_base) && !tick_nohz_active) 947 goto out_timerfd; 948 949 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 950 on_each_cpu(retrigger_next_event, NULL, 1); 951 goto out_timerfd; 952 } 953 954 /* Avoid interrupting CPUs if possible */ 955 cpus_read_lock(); 956 for_each_online_cpu(cpu) { 957 unsigned long flags; 958 959 cpu_base = &per_cpu(hrtimer_bases, cpu); 960 raw_spin_lock_irqsave(&cpu_base->lock, flags); 961 962 if (update_needs_ipi(cpu_base, bases)) 963 cpumask_set_cpu(cpu, mask); 964 965 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 966 } 967 968 preempt_disable(); 969 smp_call_function_many(mask, retrigger_next_event, NULL, 1); 970 preempt_enable(); 971 cpus_read_unlock(); 972 free_cpumask_var(mask); 973 974 out_timerfd: 975 timerfd_clock_was_set(); 976 } 977 978 static void clock_was_set_work(struct work_struct *work) 979 { 980 clock_was_set(CLOCK_SET_WALL); 981 } 982 983 static DECLARE_WORK(hrtimer_work, clock_was_set_work); 984 985 /* 986 * Called from timekeeping code to reprogram the hrtimer interrupt device 987 * on all cpus and to notify timerfd. 988 */ 989 void clock_was_set_delayed(void) 990 { 991 schedule_work(&hrtimer_work); 992 } 993 994 /* 995 * Called during resume either directly from via timekeeping_resume() 996 * or in the case of s2idle from tick_unfreeze() to ensure that the 997 * hrtimers are up to date. 998 */ 999 void hrtimers_resume_local(void) 1000 { 1001 lockdep_assert_irqs_disabled(); 1002 /* Retrigger on the local CPU */ 1003 retrigger_next_event(NULL); 1004 } 1005 1006 /* 1007 * Counterpart to lock_hrtimer_base above: 1008 */ 1009 static inline 1010 void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 1011 __releases(&timer->base->cpu_base->lock) 1012 { 1013 raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); 1014 } 1015 1016 /** 1017 * hrtimer_forward() - forward the timer expiry 1018 * @timer: hrtimer to forward 1019 * @now: forward past this time 1020 * @interval: the interval to forward 1021 * 1022 * Forward the timer expiry so it will expire in the future. 1023 * 1024 * .. note:: 1025 * This only updates the timer expiry value and does not requeue the timer. 1026 * 1027 * There is also a variant of the function hrtimer_forward_now(). 1028 * 1029 * Context: Can be safely called from the callback function of @timer. If called 1030 * from other contexts @timer must neither be enqueued nor running the 1031 * callback and the caller needs to take care of serialization. 1032 * 1033 * Return: The number of overruns are returned. 1034 */ 1035 u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) 1036 { 1037 u64 orun = 1; 1038 ktime_t delta; 1039 1040 delta = ktime_sub(now, hrtimer_get_expires(timer)); 1041 1042 if (delta < 0) 1043 return 0; 1044 1045 if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED)) 1046 return 0; 1047 1048 if (interval < hrtimer_resolution) 1049 interval = hrtimer_resolution; 1050 1051 if (unlikely(delta >= interval)) { 1052 s64 incr = ktime_to_ns(interval); 1053 1054 orun = ktime_divns(delta, incr); 1055 hrtimer_add_expires_ns(timer, incr * orun); 1056 if (hrtimer_get_expires_tv64(timer) > now) 1057 return orun; 1058 /* 1059 * This (and the ktime_add() below) is the 1060 * correction for exact: 1061 */ 1062 orun++; 1063 } 1064 hrtimer_add_expires(timer, interval); 1065 1066 return orun; 1067 } 1068 EXPORT_SYMBOL_GPL(hrtimer_forward); 1069 1070 /* 1071 * enqueue_hrtimer - internal function to (re)start a timer 1072 * 1073 * The timer is inserted in expiry order. Insertion into the 1074 * red black tree is O(log(n)). Must hold the base lock. 1075 * 1076 * Returns true when the new timer is the leftmost timer in the tree. 1077 */ 1078 static bool enqueue_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, 1079 enum hrtimer_mode mode) 1080 { 1081 debug_activate(timer, mode); 1082 WARN_ON_ONCE(!base->cpu_base->online); 1083 1084 base->cpu_base->active_bases |= 1 << base->index; 1085 1086 /* Pairs with the lockless read in hrtimer_is_queued() */ 1087 WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED); 1088 1089 return timerqueue_add(&base->active, &timer->node); 1090 } 1091 1092 /* 1093 * __remove_hrtimer - internal function to remove a timer 1094 * 1095 * Caller must hold the base lock. 1096 * 1097 * High resolution timer mode reprograms the clock event device when the 1098 * timer is the one which expires next. The caller can disable this by setting 1099 * reprogram to zero. This is useful, when the context does a reprogramming 1100 * anyway (e.g. timer interrupt) 1101 */ 1102 static void __remove_hrtimer(struct hrtimer *timer, 1103 struct hrtimer_clock_base *base, 1104 u8 newstate, int reprogram) 1105 { 1106 struct hrtimer_cpu_base *cpu_base = base->cpu_base; 1107 u8 state = timer->state; 1108 1109 /* Pairs with the lockless read in hrtimer_is_queued() */ 1110 WRITE_ONCE(timer->state, newstate); 1111 if (!(state & HRTIMER_STATE_ENQUEUED)) 1112 return; 1113 1114 if (!timerqueue_del(&base->active, &timer->node)) 1115 cpu_base->active_bases &= ~(1 << base->index); 1116 1117 /* 1118 * Note: If reprogram is false we do not update 1119 * cpu_base->next_timer. This happens when we remove the first 1120 * timer on a remote cpu. No harm as we never dereference 1121 * cpu_base->next_timer. So the worst thing what can happen is 1122 * an superfluous call to hrtimer_force_reprogram() on the 1123 * remote cpu later on if the same timer gets enqueued again. 1124 */ 1125 if (reprogram && timer == cpu_base->next_timer) 1126 hrtimer_force_reprogram(cpu_base, 1); 1127 } 1128 1129 /* 1130 * remove hrtimer, called with base lock held 1131 */ 1132 static inline int 1133 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, 1134 bool restart, bool keep_local) 1135 { 1136 u8 state = timer->state; 1137 1138 if (state & HRTIMER_STATE_ENQUEUED) { 1139 bool reprogram; 1140 1141 /* 1142 * Remove the timer and force reprogramming when high 1143 * resolution mode is active and the timer is on the current 1144 * CPU. If we remove a timer on another CPU, reprogramming is 1145 * skipped. The interrupt event on this CPU is fired and 1146 * reprogramming happens in the interrupt handler. This is a 1147 * rare case and less expensive than a smp call. 1148 */ 1149 debug_deactivate(timer); 1150 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); 1151 1152 /* 1153 * If the timer is not restarted then reprogramming is 1154 * required if the timer is local. If it is local and about 1155 * to be restarted, avoid programming it twice (on removal 1156 * and a moment later when it's requeued). 1157 */ 1158 if (!restart) 1159 state = HRTIMER_STATE_INACTIVE; 1160 else 1161 reprogram &= !keep_local; 1162 1163 __remove_hrtimer(timer, base, state, reprogram); 1164 return 1; 1165 } 1166 return 0; 1167 } 1168 1169 static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, 1170 const enum hrtimer_mode mode) 1171 { 1172 #ifdef CONFIG_TIME_LOW_RES 1173 /* 1174 * CONFIG_TIME_LOW_RES indicates that the system has no way to return 1175 * granular time values. For relative timers we add hrtimer_resolution 1176 * (i.e. one jiffy) to prevent short timeouts. 1177 */ 1178 timer->is_rel = mode & HRTIMER_MODE_REL; 1179 if (timer->is_rel) 1180 tim = ktime_add_safe(tim, hrtimer_resolution); 1181 #endif 1182 return tim; 1183 } 1184 1185 static void 1186 hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) 1187 { 1188 ktime_t expires; 1189 1190 /* 1191 * Find the next SOFT expiration. 1192 */ 1193 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); 1194 1195 /* 1196 * reprogramming needs to be triggered, even if the next soft 1197 * hrtimer expires at the same time than the next hard 1198 * hrtimer. cpu_base->softirq_expires_next needs to be updated! 1199 */ 1200 if (expires == KTIME_MAX) 1201 return; 1202 1203 /* 1204 * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() 1205 * cpu_base->*expires_next is only set by hrtimer_reprogram() 1206 */ 1207 hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); 1208 } 1209 1210 static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 1211 u64 delta_ns, const enum hrtimer_mode mode, 1212 struct hrtimer_clock_base *base) 1213 { 1214 struct hrtimer_cpu_base *this_cpu_base = this_cpu_ptr(&hrtimer_bases); 1215 struct hrtimer_clock_base *new_base; 1216 bool force_local, first; 1217 1218 /* 1219 * If the timer is on the local cpu base and is the first expiring 1220 * timer then this might end up reprogramming the hardware twice 1221 * (on removal and on enqueue). To avoid that by prevent the 1222 * reprogram on removal, keep the timer local to the current CPU 1223 * and enforce reprogramming after it is queued no matter whether 1224 * it is the new first expiring timer again or not. 1225 */ 1226 force_local = base->cpu_base == this_cpu_base; 1227 force_local &= base->cpu_base->next_timer == timer; 1228 1229 /* 1230 * Don't force local queuing if this enqueue happens on a unplugged 1231 * CPU after hrtimer_cpu_dying() has been invoked. 1232 */ 1233 force_local &= this_cpu_base->online; 1234 1235 /* 1236 * Remove an active timer from the queue. In case it is not queued 1237 * on the current CPU, make sure that remove_hrtimer() updates the 1238 * remote data correctly. 1239 * 1240 * If it's on the current CPU and the first expiring timer, then 1241 * skip reprogramming, keep the timer local and enforce 1242 * reprogramming later if it was the first expiring timer. This 1243 * avoids programming the underlying clock event twice (once at 1244 * removal and once after enqueue). 1245 */ 1246 remove_hrtimer(timer, base, true, force_local); 1247 1248 if (mode & HRTIMER_MODE_REL) 1249 tim = ktime_add_safe(tim, __hrtimer_cb_get_time(base->clockid)); 1250 1251 tim = hrtimer_update_lowres(timer, tim, mode); 1252 1253 hrtimer_set_expires_range_ns(timer, tim, delta_ns); 1254 1255 /* Switch the timer base, if necessary: */ 1256 if (!force_local) { 1257 new_base = switch_hrtimer_base(timer, base, 1258 mode & HRTIMER_MODE_PINNED); 1259 } else { 1260 new_base = base; 1261 } 1262 1263 first = enqueue_hrtimer(timer, new_base, mode); 1264 if (!force_local) { 1265 /* 1266 * If the current CPU base is online, then the timer is 1267 * never queued on a remote CPU if it would be the first 1268 * expiring timer there. 1269 */ 1270 if (hrtimer_base_is_online(this_cpu_base)) 1271 return first; 1272 1273 /* 1274 * Timer was enqueued remote because the current base is 1275 * already offline. If the timer is the first to expire, 1276 * kick the remote CPU to reprogram the clock event. 1277 */ 1278 if (first) { 1279 struct hrtimer_cpu_base *new_cpu_base = new_base->cpu_base; 1280 1281 smp_call_function_single_async(new_cpu_base->cpu, &new_cpu_base->csd); 1282 } 1283 return 0; 1284 } 1285 1286 /* 1287 * Timer was forced to stay on the current CPU to avoid 1288 * reprogramming on removal and enqueue. Force reprogram the 1289 * hardware by evaluating the new first expiring timer. 1290 */ 1291 hrtimer_force_reprogram(new_base->cpu_base, 1); 1292 return 0; 1293 } 1294 1295 /** 1296 * hrtimer_start_range_ns - (re)start an hrtimer 1297 * @timer: the timer to be added 1298 * @tim: expiry time 1299 * @delta_ns: "slack" range for the timer 1300 * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or 1301 * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); 1302 * softirq based mode is considered for debug purpose only! 1303 */ 1304 void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 1305 u64 delta_ns, const enum hrtimer_mode mode) 1306 { 1307 struct hrtimer_clock_base *base; 1308 unsigned long flags; 1309 1310 /* 1311 * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft 1312 * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard 1313 * expiry mode because unmarked timers are moved to softirq expiry. 1314 */ 1315 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) 1316 WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); 1317 else 1318 WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard); 1319 1320 base = lock_hrtimer_base(timer, &flags); 1321 1322 if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) 1323 hrtimer_reprogram(timer, true); 1324 1325 unlock_hrtimer_base(timer, &flags); 1326 } 1327 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); 1328 1329 /** 1330 * hrtimer_try_to_cancel - try to deactivate a timer 1331 * @timer: hrtimer to stop 1332 * 1333 * Returns: 1334 * 1335 * * 0 when the timer was not active 1336 * * 1 when the timer was active 1337 * * -1 when the timer is currently executing the callback function and 1338 * cannot be stopped 1339 */ 1340 int hrtimer_try_to_cancel(struct hrtimer *timer) 1341 { 1342 struct hrtimer_clock_base *base; 1343 unsigned long flags; 1344 int ret = -1; 1345 1346 /* 1347 * Check lockless first. If the timer is not active (neither 1348 * enqueued nor running the callback, nothing to do here. The 1349 * base lock does not serialize against a concurrent enqueue, 1350 * so we can avoid taking it. 1351 */ 1352 if (!hrtimer_active(timer)) 1353 return 0; 1354 1355 base = lock_hrtimer_base(timer, &flags); 1356 1357 if (!hrtimer_callback_running(timer)) 1358 ret = remove_hrtimer(timer, base, false, false); 1359 1360 unlock_hrtimer_base(timer, &flags); 1361 1362 return ret; 1363 1364 } 1365 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); 1366 1367 #ifdef CONFIG_PREEMPT_RT 1368 static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) 1369 { 1370 spin_lock_init(&base->softirq_expiry_lock); 1371 } 1372 1373 static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) 1374 __acquires(&base->softirq_expiry_lock) 1375 { 1376 spin_lock(&base->softirq_expiry_lock); 1377 } 1378 1379 static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) 1380 __releases(&base->softirq_expiry_lock) 1381 { 1382 spin_unlock(&base->softirq_expiry_lock); 1383 } 1384 1385 /* 1386 * The counterpart to hrtimer_cancel_wait_running(). 1387 * 1388 * If there is a waiter for cpu_base->expiry_lock, then it was waiting for 1389 * the timer callback to finish. Drop expiry_lock and reacquire it. That 1390 * allows the waiter to acquire the lock and make progress. 1391 */ 1392 static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, 1393 unsigned long flags) 1394 { 1395 if (atomic_read(&cpu_base->timer_waiters)) { 1396 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1397 spin_unlock(&cpu_base->softirq_expiry_lock); 1398 spin_lock(&cpu_base->softirq_expiry_lock); 1399 raw_spin_lock_irq(&cpu_base->lock); 1400 } 1401 } 1402 1403 #ifdef CONFIG_SMP 1404 static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) 1405 { 1406 return base == &migration_base; 1407 } 1408 #else 1409 static __always_inline bool is_migration_base(struct hrtimer_clock_base *base) 1410 { 1411 return false; 1412 } 1413 #endif 1414 1415 /* 1416 * This function is called on PREEMPT_RT kernels when the fast path 1417 * deletion of a timer failed because the timer callback function was 1418 * running. 1419 * 1420 * This prevents priority inversion: if the soft irq thread is preempted 1421 * in the middle of a timer callback, then calling hrtimer_cancel() can 1422 * lead to two issues: 1423 * 1424 * - If the caller is on a remote CPU then it has to spin wait for the timer 1425 * handler to complete. This can result in unbound priority inversion. 1426 * 1427 * - If the caller originates from the task which preempted the timer 1428 * handler on the same CPU, then spin waiting for the timer handler to 1429 * complete is never going to end. 1430 */ 1431 void hrtimer_cancel_wait_running(const struct hrtimer *timer) 1432 { 1433 /* Lockless read. Prevent the compiler from reloading it below */ 1434 struct hrtimer_clock_base *base = READ_ONCE(timer->base); 1435 1436 /* 1437 * Just relax if the timer expires in hard interrupt context or if 1438 * it is currently on the migration base. 1439 */ 1440 if (!timer->is_soft || is_migration_base(base)) { 1441 cpu_relax(); 1442 return; 1443 } 1444 1445 /* 1446 * Mark the base as contended and grab the expiry lock, which is 1447 * held by the softirq across the timer callback. Drop the lock 1448 * immediately so the softirq can expire the next timer. In theory 1449 * the timer could already be running again, but that's more than 1450 * unlikely and just causes another wait loop. 1451 */ 1452 atomic_inc(&base->cpu_base->timer_waiters); 1453 spin_lock_bh(&base->cpu_base->softirq_expiry_lock); 1454 atomic_dec(&base->cpu_base->timer_waiters); 1455 spin_unlock_bh(&base->cpu_base->softirq_expiry_lock); 1456 } 1457 #else 1458 static inline void 1459 hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { } 1460 static inline void 1461 hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { } 1462 static inline void 1463 hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { } 1464 static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, 1465 unsigned long flags) { } 1466 #endif 1467 1468 /** 1469 * hrtimer_cancel - cancel a timer and wait for the handler to finish. 1470 * @timer: the timer to be cancelled 1471 * 1472 * Returns: 1473 * 0 when the timer was not active 1474 * 1 when the timer was active 1475 */ 1476 int hrtimer_cancel(struct hrtimer *timer) 1477 { 1478 int ret; 1479 1480 do { 1481 ret = hrtimer_try_to_cancel(timer); 1482 1483 if (ret < 0) 1484 hrtimer_cancel_wait_running(timer); 1485 } while (ret < 0); 1486 return ret; 1487 } 1488 EXPORT_SYMBOL_GPL(hrtimer_cancel); 1489 1490 /** 1491 * __hrtimer_get_remaining - get remaining time for the timer 1492 * @timer: the timer to read 1493 * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y 1494 */ 1495 ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust) 1496 { 1497 unsigned long flags; 1498 ktime_t rem; 1499 1500 lock_hrtimer_base(timer, &flags); 1501 if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust) 1502 rem = hrtimer_expires_remaining_adjusted(timer); 1503 else 1504 rem = hrtimer_expires_remaining(timer); 1505 unlock_hrtimer_base(timer, &flags); 1506 1507 return rem; 1508 } 1509 EXPORT_SYMBOL_GPL(__hrtimer_get_remaining); 1510 1511 #ifdef CONFIG_NO_HZ_COMMON 1512 /** 1513 * hrtimer_get_next_event - get the time until next expiry event 1514 * 1515 * Returns the next expiry time or KTIME_MAX if no timer is pending. 1516 */ 1517 u64 hrtimer_get_next_event(void) 1518 { 1519 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1520 u64 expires = KTIME_MAX; 1521 unsigned long flags; 1522 1523 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1524 1525 if (!hrtimer_hres_active(cpu_base)) 1526 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); 1527 1528 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1529 1530 return expires; 1531 } 1532 1533 /** 1534 * hrtimer_next_event_without - time until next expiry event w/o one timer 1535 * @exclude: timer to exclude 1536 * 1537 * Returns the next expiry time over all timers except for the @exclude one or 1538 * KTIME_MAX if none of them is pending. 1539 */ 1540 u64 hrtimer_next_event_without(const struct hrtimer *exclude) 1541 { 1542 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1543 u64 expires = KTIME_MAX; 1544 unsigned long flags; 1545 1546 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1547 1548 if (hrtimer_hres_active(cpu_base)) { 1549 unsigned int active; 1550 1551 if (!cpu_base->softirq_activated) { 1552 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; 1553 expires = __hrtimer_next_event_base(cpu_base, exclude, 1554 active, KTIME_MAX); 1555 } 1556 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; 1557 expires = __hrtimer_next_event_base(cpu_base, exclude, active, 1558 expires); 1559 } 1560 1561 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1562 1563 return expires; 1564 } 1565 #endif 1566 1567 static inline int hrtimer_clockid_to_base(clockid_t clock_id) 1568 { 1569 switch (clock_id) { 1570 case CLOCK_MONOTONIC: 1571 return HRTIMER_BASE_MONOTONIC; 1572 case CLOCK_REALTIME: 1573 return HRTIMER_BASE_REALTIME; 1574 case CLOCK_BOOTTIME: 1575 return HRTIMER_BASE_BOOTTIME; 1576 case CLOCK_TAI: 1577 return HRTIMER_BASE_TAI; 1578 default: 1579 WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); 1580 return HRTIMER_BASE_MONOTONIC; 1581 } 1582 } 1583 1584 static ktime_t __hrtimer_cb_get_time(clockid_t clock_id) 1585 { 1586 switch (clock_id) { 1587 case CLOCK_MONOTONIC: 1588 return ktime_get(); 1589 case CLOCK_REALTIME: 1590 return ktime_get_real(); 1591 case CLOCK_BOOTTIME: 1592 return ktime_get_boottime(); 1593 case CLOCK_TAI: 1594 return ktime_get_clocktai(); 1595 default: 1596 WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); 1597 return ktime_get(); 1598 } 1599 } 1600 1601 ktime_t hrtimer_cb_get_time(const struct hrtimer *timer) 1602 { 1603 return __hrtimer_cb_get_time(timer->base->clockid); 1604 } 1605 EXPORT_SYMBOL_GPL(hrtimer_cb_get_time); 1606 1607 static void __hrtimer_setup(struct hrtimer *timer, 1608 enum hrtimer_restart (*function)(struct hrtimer *), 1609 clockid_t clock_id, enum hrtimer_mode mode) 1610 { 1611 bool softtimer = !!(mode & HRTIMER_MODE_SOFT); 1612 struct hrtimer_cpu_base *cpu_base; 1613 int base; 1614 1615 /* 1616 * On PREEMPT_RT enabled kernels hrtimers which are not explicitly 1617 * marked for hard interrupt expiry mode are moved into soft 1618 * interrupt context for latency reasons and because the callbacks 1619 * can invoke functions which might sleep on RT, e.g. spin_lock(). 1620 */ 1621 if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD)) 1622 softtimer = true; 1623 1624 memset(timer, 0, sizeof(struct hrtimer)); 1625 1626 cpu_base = raw_cpu_ptr(&hrtimer_bases); 1627 1628 /* 1629 * POSIX magic: Relative CLOCK_REALTIME timers are not affected by 1630 * clock modifications, so they needs to become CLOCK_MONOTONIC to 1631 * ensure POSIX compliance. 1632 */ 1633 if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) 1634 clock_id = CLOCK_MONOTONIC; 1635 1636 base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; 1637 base += hrtimer_clockid_to_base(clock_id); 1638 timer->is_soft = softtimer; 1639 timer->is_hard = !!(mode & HRTIMER_MODE_HARD); 1640 timer->base = &cpu_base->clock_base[base]; 1641 timerqueue_init(&timer->node); 1642 1643 if (WARN_ON_ONCE(!function)) 1644 ACCESS_PRIVATE(timer, function) = hrtimer_dummy_timeout; 1645 else 1646 ACCESS_PRIVATE(timer, function) = function; 1647 } 1648 1649 /** 1650 * hrtimer_setup - initialize a timer to the given clock 1651 * @timer: the timer to be initialized 1652 * @function: the callback function 1653 * @clock_id: the clock to be used 1654 * @mode: The modes which are relevant for initialization: 1655 * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, 1656 * HRTIMER_MODE_REL_SOFT 1657 * 1658 * The PINNED variants of the above can be handed in, 1659 * but the PINNED bit is ignored as pinning happens 1660 * when the hrtimer is started 1661 */ 1662 void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), 1663 clockid_t clock_id, enum hrtimer_mode mode) 1664 { 1665 debug_setup(timer, clock_id, mode); 1666 __hrtimer_setup(timer, function, clock_id, mode); 1667 } 1668 EXPORT_SYMBOL_GPL(hrtimer_setup); 1669 1670 /** 1671 * hrtimer_setup_on_stack - initialize a timer on stack memory 1672 * @timer: The timer to be initialized 1673 * @function: the callback function 1674 * @clock_id: The clock to be used 1675 * @mode: The timer mode 1676 * 1677 * Similar to hrtimer_setup(), except that this one must be used if struct hrtimer is in stack 1678 * memory. 1679 */ 1680 void hrtimer_setup_on_stack(struct hrtimer *timer, 1681 enum hrtimer_restart (*function)(struct hrtimer *), 1682 clockid_t clock_id, enum hrtimer_mode mode) 1683 { 1684 debug_setup_on_stack(timer, clock_id, mode); 1685 __hrtimer_setup(timer, function, clock_id, mode); 1686 } 1687 EXPORT_SYMBOL_GPL(hrtimer_setup_on_stack); 1688 1689 /* 1690 * A timer is active, when it is enqueued into the rbtree or the 1691 * callback function is running or it's in the state of being migrated 1692 * to another cpu. 1693 * 1694 * It is important for this function to not return a false negative. 1695 */ 1696 bool hrtimer_active(const struct hrtimer *timer) 1697 { 1698 struct hrtimer_clock_base *base; 1699 unsigned int seq; 1700 1701 do { 1702 base = READ_ONCE(timer->base); 1703 seq = raw_read_seqcount_begin(&base->seq); 1704 1705 if (timer->state != HRTIMER_STATE_INACTIVE || 1706 base->running == timer) 1707 return true; 1708 1709 } while (read_seqcount_retry(&base->seq, seq) || 1710 base != READ_ONCE(timer->base)); 1711 1712 return false; 1713 } 1714 EXPORT_SYMBOL_GPL(hrtimer_active); 1715 1716 /* 1717 * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3 1718 * distinct sections: 1719 * 1720 * - queued: the timer is queued 1721 * - callback: the timer is being ran 1722 * - post: the timer is inactive or (re)queued 1723 * 1724 * On the read side we ensure we observe timer->state and cpu_base->running 1725 * from the same section, if anything changed while we looked at it, we retry. 1726 * This includes timer->base changing because sequence numbers alone are 1727 * insufficient for that. 1728 * 1729 * The sequence numbers are required because otherwise we could still observe 1730 * a false negative if the read side got smeared over multiple consecutive 1731 * __run_hrtimer() invocations. 1732 */ 1733 1734 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, 1735 struct hrtimer_clock_base *base, 1736 struct hrtimer *timer, ktime_t *now, 1737 unsigned long flags) __must_hold(&cpu_base->lock) 1738 { 1739 enum hrtimer_restart (*fn)(struct hrtimer *); 1740 bool expires_in_hardirq; 1741 int restart; 1742 1743 lockdep_assert_held(&cpu_base->lock); 1744 1745 debug_deactivate(timer); 1746 base->running = timer; 1747 1748 /* 1749 * Separate the ->running assignment from the ->state assignment. 1750 * 1751 * As with a regular write barrier, this ensures the read side in 1752 * hrtimer_active() cannot observe base->running == NULL && 1753 * timer->state == INACTIVE. 1754 */ 1755 raw_write_seqcount_barrier(&base->seq); 1756 1757 __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); 1758 fn = ACCESS_PRIVATE(timer, function); 1759 1760 /* 1761 * Clear the 'is relative' flag for the TIME_LOW_RES case. If the 1762 * timer is restarted with a period then it becomes an absolute 1763 * timer. If its not restarted it does not matter. 1764 */ 1765 if (IS_ENABLED(CONFIG_TIME_LOW_RES)) 1766 timer->is_rel = false; 1767 1768 /* 1769 * The timer is marked as running in the CPU base, so it is 1770 * protected against migration to a different CPU even if the lock 1771 * is dropped. 1772 */ 1773 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1774 trace_hrtimer_expire_entry(timer, now); 1775 expires_in_hardirq = lockdep_hrtimer_enter(timer); 1776 1777 restart = fn(timer); 1778 1779 lockdep_hrtimer_exit(expires_in_hardirq); 1780 trace_hrtimer_expire_exit(timer); 1781 raw_spin_lock_irq(&cpu_base->lock); 1782 1783 /* 1784 * Note: We clear the running state after enqueue_hrtimer and 1785 * we do not reprogram the event hardware. Happens either in 1786 * hrtimer_start_range_ns() or in hrtimer_interrupt() 1787 * 1788 * Note: Because we dropped the cpu_base->lock above, 1789 * hrtimer_start_range_ns() can have popped in and enqueued the timer 1790 * for us already. 1791 */ 1792 if (restart != HRTIMER_NORESTART && 1793 !(timer->state & HRTIMER_STATE_ENQUEUED)) 1794 enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); 1795 1796 /* 1797 * Separate the ->running assignment from the ->state assignment. 1798 * 1799 * As with a regular write barrier, this ensures the read side in 1800 * hrtimer_active() cannot observe base->running.timer == NULL && 1801 * timer->state == INACTIVE. 1802 */ 1803 raw_write_seqcount_barrier(&base->seq); 1804 1805 WARN_ON_ONCE(base->running != timer); 1806 base->running = NULL; 1807 } 1808 1809 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, 1810 unsigned long flags, unsigned int active_mask) 1811 { 1812 struct hrtimer_clock_base *base; 1813 unsigned int active = cpu_base->active_bases & active_mask; 1814 1815 for_each_active_base(base, cpu_base, active) { 1816 struct timerqueue_node *node; 1817 ktime_t basenow; 1818 1819 basenow = ktime_add(now, base->offset); 1820 1821 while ((node = timerqueue_getnext(&base->active))) { 1822 struct hrtimer *timer; 1823 1824 timer = container_of(node, struct hrtimer, node); 1825 1826 /* 1827 * The immediate goal for using the softexpires is 1828 * minimizing wakeups, not running timers at the 1829 * earliest interrupt after their soft expiration. 1830 * This allows us to avoid using a Priority Search 1831 * Tree, which can answer a stabbing query for 1832 * overlapping intervals and instead use the simple 1833 * BST we already have. 1834 * We don't add extra wakeups by delaying timers that 1835 * are right-of a not yet expired timer, because that 1836 * timer will have to trigger a wakeup anyway. 1837 */ 1838 if (basenow < hrtimer_get_softexpires_tv64(timer)) 1839 break; 1840 1841 __run_hrtimer(cpu_base, base, timer, &basenow, flags); 1842 if (active_mask == HRTIMER_ACTIVE_SOFT) 1843 hrtimer_sync_wait_running(cpu_base, flags); 1844 } 1845 } 1846 } 1847 1848 static __latent_entropy void hrtimer_run_softirq(void) 1849 { 1850 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1851 unsigned long flags; 1852 ktime_t now; 1853 1854 hrtimer_cpu_base_lock_expiry(cpu_base); 1855 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1856 1857 now = hrtimer_update_base(cpu_base); 1858 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); 1859 1860 cpu_base->softirq_activated = 0; 1861 hrtimer_update_softirq_timer(cpu_base, true); 1862 1863 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1864 hrtimer_cpu_base_unlock_expiry(cpu_base); 1865 } 1866 1867 #ifdef CONFIG_HIGH_RES_TIMERS 1868 1869 /* 1870 * High resolution timer interrupt 1871 * Called with interrupts disabled 1872 */ 1873 void hrtimer_interrupt(struct clock_event_device *dev) 1874 { 1875 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1876 ktime_t expires_next, now, entry_time, delta; 1877 unsigned long flags; 1878 int retries = 0; 1879 1880 BUG_ON(!cpu_base->hres_active); 1881 cpu_base->nr_events++; 1882 dev->next_event = KTIME_MAX; 1883 1884 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1885 entry_time = now = hrtimer_update_base(cpu_base); 1886 retry: 1887 cpu_base->in_hrtirq = 1; 1888 /* 1889 * We set expires_next to KTIME_MAX here with cpu_base->lock 1890 * held to prevent that a timer is enqueued in our queue via 1891 * the migration code. This does not affect enqueueing of 1892 * timers which run their callback and need to be requeued on 1893 * this CPU. 1894 */ 1895 cpu_base->expires_next = KTIME_MAX; 1896 1897 if (!ktime_before(now, cpu_base->softirq_expires_next)) { 1898 cpu_base->softirq_expires_next = KTIME_MAX; 1899 cpu_base->softirq_activated = 1; 1900 raise_timer_softirq(HRTIMER_SOFTIRQ); 1901 } 1902 1903 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); 1904 1905 /* Reevaluate the clock bases for the [soft] next expiry */ 1906 expires_next = hrtimer_update_next_event(cpu_base); 1907 /* 1908 * Store the new expiry value so the migration code can verify 1909 * against it. 1910 */ 1911 cpu_base->expires_next = expires_next; 1912 cpu_base->in_hrtirq = 0; 1913 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1914 1915 /* Reprogramming necessary ? */ 1916 if (!tick_program_event(expires_next, 0)) { 1917 cpu_base->hang_detected = 0; 1918 return; 1919 } 1920 1921 /* 1922 * The next timer was already expired due to: 1923 * - tracing 1924 * - long lasting callbacks 1925 * - being scheduled away when running in a VM 1926 * 1927 * We need to prevent that we loop forever in the hrtimer 1928 * interrupt routine. We give it 3 attempts to avoid 1929 * overreacting on some spurious event. 1930 * 1931 * Acquire base lock for updating the offsets and retrieving 1932 * the current time. 1933 */ 1934 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1935 now = hrtimer_update_base(cpu_base); 1936 cpu_base->nr_retries++; 1937 if (++retries < 3) 1938 goto retry; 1939 /* 1940 * Give the system a chance to do something else than looping 1941 * here. We stored the entry time, so we know exactly how long 1942 * we spent here. We schedule the next event this amount of 1943 * time away. 1944 */ 1945 cpu_base->nr_hangs++; 1946 cpu_base->hang_detected = 1; 1947 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1948 1949 delta = ktime_sub(now, entry_time); 1950 if ((unsigned int)delta > cpu_base->max_hang_time) 1951 cpu_base->max_hang_time = (unsigned int) delta; 1952 /* 1953 * Limit it to a sensible value as we enforce a longer 1954 * delay. Give the CPU at least 100ms to catch up. 1955 */ 1956 if (delta > 100 * NSEC_PER_MSEC) 1957 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); 1958 else 1959 expires_next = ktime_add(now, delta); 1960 tick_program_event(expires_next, 1); 1961 pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); 1962 } 1963 #endif /* !CONFIG_HIGH_RES_TIMERS */ 1964 1965 /* 1966 * Called from run_local_timers in hardirq context every jiffy 1967 */ 1968 void hrtimer_run_queues(void) 1969 { 1970 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1971 unsigned long flags; 1972 ktime_t now; 1973 1974 if (hrtimer_hres_active(cpu_base)) 1975 return; 1976 1977 /* 1978 * This _is_ ugly: We have to check periodically, whether we 1979 * can switch to highres and / or nohz mode. The clocksource 1980 * switch happens with xtime_lock held. Notification from 1981 * there only sets the check bit in the tick_oneshot code, 1982 * otherwise we might deadlock vs. xtime_lock. 1983 */ 1984 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) { 1985 hrtimer_switch_to_hres(); 1986 return; 1987 } 1988 1989 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1990 now = hrtimer_update_base(cpu_base); 1991 1992 if (!ktime_before(now, cpu_base->softirq_expires_next)) { 1993 cpu_base->softirq_expires_next = KTIME_MAX; 1994 cpu_base->softirq_activated = 1; 1995 raise_timer_softirq(HRTIMER_SOFTIRQ); 1996 } 1997 1998 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); 1999 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 2000 } 2001 2002 /* 2003 * Sleep related functions: 2004 */ 2005 static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) 2006 { 2007 struct hrtimer_sleeper *t = 2008 container_of(timer, struct hrtimer_sleeper, timer); 2009 struct task_struct *task = t->task; 2010 2011 t->task = NULL; 2012 if (task) 2013 wake_up_process(task); 2014 2015 return HRTIMER_NORESTART; 2016 } 2017 2018 /** 2019 * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer 2020 * @sl: sleeper to be started 2021 * @mode: timer mode abs/rel 2022 * 2023 * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers 2024 * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context) 2025 */ 2026 void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, 2027 enum hrtimer_mode mode) 2028 { 2029 /* 2030 * Make the enqueue delivery mode check work on RT. If the sleeper 2031 * was initialized for hard interrupt delivery, force the mode bit. 2032 * This is a special case for hrtimer_sleepers because 2033 * __hrtimer_setup_sleeper() determines the delivery mode on RT so the 2034 * fiddling with this decision is avoided at the call sites. 2035 */ 2036 if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard) 2037 mode |= HRTIMER_MODE_HARD; 2038 2039 hrtimer_start_expires(&sl->timer, mode); 2040 } 2041 EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires); 2042 2043 static void __hrtimer_setup_sleeper(struct hrtimer_sleeper *sl, 2044 clockid_t clock_id, enum hrtimer_mode mode) 2045 { 2046 /* 2047 * On PREEMPT_RT enabled kernels hrtimers which are not explicitly 2048 * marked for hard interrupt expiry mode are moved into soft 2049 * interrupt context either for latency reasons or because the 2050 * hrtimer callback takes regular spinlocks or invokes other 2051 * functions which are not suitable for hard interrupt context on 2052 * PREEMPT_RT. 2053 * 2054 * The hrtimer_sleeper callback is RT compatible in hard interrupt 2055 * context, but there is a latency concern: Untrusted userspace can 2056 * spawn many threads which arm timers for the same expiry time on 2057 * the same CPU. That causes a latency spike due to the wakeup of 2058 * a gazillion threads. 2059 * 2060 * OTOH, privileged real-time user space applications rely on the 2061 * low latency of hard interrupt wakeups. If the current task is in 2062 * a real-time scheduling class, mark the mode for hard interrupt 2063 * expiry. 2064 */ 2065 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 2066 if (rt_or_dl_task_policy(current) && !(mode & HRTIMER_MODE_SOFT)) 2067 mode |= HRTIMER_MODE_HARD; 2068 } 2069 2070 __hrtimer_setup(&sl->timer, hrtimer_wakeup, clock_id, mode); 2071 sl->task = current; 2072 } 2073 2074 /** 2075 * hrtimer_setup_sleeper_on_stack - initialize a sleeper in stack memory 2076 * @sl: sleeper to be initialized 2077 * @clock_id: the clock to be used 2078 * @mode: timer mode abs/rel 2079 */ 2080 void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, 2081 clockid_t clock_id, enum hrtimer_mode mode) 2082 { 2083 debug_setup_on_stack(&sl->timer, clock_id, mode); 2084 __hrtimer_setup_sleeper(sl, clock_id, mode); 2085 } 2086 EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack); 2087 2088 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) 2089 { 2090 switch(restart->nanosleep.type) { 2091 #ifdef CONFIG_COMPAT_32BIT_TIME 2092 case TT_COMPAT: 2093 if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp)) 2094 return -EFAULT; 2095 break; 2096 #endif 2097 case TT_NATIVE: 2098 if (put_timespec64(ts, restart->nanosleep.rmtp)) 2099 return -EFAULT; 2100 break; 2101 default: 2102 BUG(); 2103 } 2104 return -ERESTART_RESTARTBLOCK; 2105 } 2106 2107 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) 2108 { 2109 struct restart_block *restart; 2110 2111 do { 2112 set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); 2113 hrtimer_sleeper_start_expires(t, mode); 2114 2115 if (likely(t->task)) 2116 schedule(); 2117 2118 hrtimer_cancel(&t->timer); 2119 mode = HRTIMER_MODE_ABS; 2120 2121 } while (t->task && !signal_pending(current)); 2122 2123 __set_current_state(TASK_RUNNING); 2124 2125 if (!t->task) 2126 return 0; 2127 2128 restart = ¤t->restart_block; 2129 if (restart->nanosleep.type != TT_NONE) { 2130 ktime_t rem = hrtimer_expires_remaining(&t->timer); 2131 struct timespec64 rmt; 2132 2133 if (rem <= 0) 2134 return 0; 2135 rmt = ktime_to_timespec64(rem); 2136 2137 return nanosleep_copyout(restart, &rmt); 2138 } 2139 return -ERESTART_RESTARTBLOCK; 2140 } 2141 2142 static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) 2143 { 2144 struct hrtimer_sleeper t; 2145 int ret; 2146 2147 hrtimer_setup_sleeper_on_stack(&t, restart->nanosleep.clockid, HRTIMER_MODE_ABS); 2148 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); 2149 ret = do_nanosleep(&t, HRTIMER_MODE_ABS); 2150 destroy_hrtimer_on_stack(&t.timer); 2151 return ret; 2152 } 2153 2154 long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, 2155 const clockid_t clockid) 2156 { 2157 struct restart_block *restart; 2158 struct hrtimer_sleeper t; 2159 int ret = 0; 2160 2161 hrtimer_setup_sleeper_on_stack(&t, clockid, mode); 2162 hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns); 2163 ret = do_nanosleep(&t, mode); 2164 if (ret != -ERESTART_RESTARTBLOCK) 2165 goto out; 2166 2167 /* Absolute timers do not update the rmtp value and restart: */ 2168 if (mode == HRTIMER_MODE_ABS) { 2169 ret = -ERESTARTNOHAND; 2170 goto out; 2171 } 2172 2173 restart = ¤t->restart_block; 2174 restart->nanosleep.clockid = t.timer.base->clockid; 2175 restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); 2176 set_restart_fn(restart, hrtimer_nanosleep_restart); 2177 out: 2178 destroy_hrtimer_on_stack(&t.timer); 2179 return ret; 2180 } 2181 2182 #ifdef CONFIG_64BIT 2183 2184 SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, 2185 struct __kernel_timespec __user *, rmtp) 2186 { 2187 struct timespec64 tu; 2188 2189 if (get_timespec64(&tu, rqtp)) 2190 return -EFAULT; 2191 2192 if (!timespec64_valid(&tu)) 2193 return -EINVAL; 2194 2195 current->restart_block.fn = do_no_restart_syscall; 2196 current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; 2197 current->restart_block.nanosleep.rmtp = rmtp; 2198 return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, 2199 CLOCK_MONOTONIC); 2200 } 2201 2202 #endif 2203 2204 #ifdef CONFIG_COMPAT_32BIT_TIME 2205 2206 SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, 2207 struct old_timespec32 __user *, rmtp) 2208 { 2209 struct timespec64 tu; 2210 2211 if (get_old_timespec32(&tu, rqtp)) 2212 return -EFAULT; 2213 2214 if (!timespec64_valid(&tu)) 2215 return -EINVAL; 2216 2217 current->restart_block.fn = do_no_restart_syscall; 2218 current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; 2219 current->restart_block.nanosleep.compat_rmtp = rmtp; 2220 return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, 2221 CLOCK_MONOTONIC); 2222 } 2223 #endif 2224 2225 /* 2226 * Functions related to boot-time initialization: 2227 */ 2228 int hrtimers_prepare_cpu(unsigned int cpu) 2229 { 2230 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 2231 int i; 2232 2233 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 2234 struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i]; 2235 2236 clock_b->cpu_base = cpu_base; 2237 seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock); 2238 timerqueue_init_head(&clock_b->active); 2239 } 2240 2241 cpu_base->cpu = cpu; 2242 hrtimer_cpu_base_init_expiry_lock(cpu_base); 2243 return 0; 2244 } 2245 2246 int hrtimers_cpu_starting(unsigned int cpu) 2247 { 2248 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 2249 2250 /* Clear out any left over state from a CPU down operation */ 2251 cpu_base->active_bases = 0; 2252 cpu_base->hres_active = 0; 2253 cpu_base->hang_detected = 0; 2254 cpu_base->next_timer = NULL; 2255 cpu_base->softirq_next_timer = NULL; 2256 cpu_base->expires_next = KTIME_MAX; 2257 cpu_base->softirq_expires_next = KTIME_MAX; 2258 cpu_base->online = 1; 2259 return 0; 2260 } 2261 2262 #ifdef CONFIG_HOTPLUG_CPU 2263 2264 static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, 2265 struct hrtimer_clock_base *new_base) 2266 { 2267 struct hrtimer *timer; 2268 struct timerqueue_node *node; 2269 2270 while ((node = timerqueue_getnext(&old_base->active))) { 2271 timer = container_of(node, struct hrtimer, node); 2272 BUG_ON(hrtimer_callback_running(timer)); 2273 debug_deactivate(timer); 2274 2275 /* 2276 * Mark it as ENQUEUED not INACTIVE otherwise the 2277 * timer could be seen as !active and just vanish away 2278 * under us on another CPU 2279 */ 2280 __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); 2281 timer->base = new_base; 2282 /* 2283 * Enqueue the timers on the new cpu. This does not 2284 * reprogram the event device in case the timer 2285 * expires before the earliest on this CPU, but we run 2286 * hrtimer_interrupt after we migrated everything to 2287 * sort out already expired timers and reprogram the 2288 * event device. 2289 */ 2290 enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); 2291 } 2292 } 2293 2294 int hrtimers_cpu_dying(unsigned int dying_cpu) 2295 { 2296 int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER)); 2297 struct hrtimer_cpu_base *old_base, *new_base; 2298 2299 old_base = this_cpu_ptr(&hrtimer_bases); 2300 new_base = &per_cpu(hrtimer_bases, ncpu); 2301 2302 /* 2303 * The caller is globally serialized and nobody else 2304 * takes two locks at once, deadlock is not possible. 2305 */ 2306 raw_spin_lock(&old_base->lock); 2307 raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING); 2308 2309 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 2310 migrate_hrtimer_list(&old_base->clock_base[i], 2311 &new_base->clock_base[i]); 2312 } 2313 2314 /* Tell the other CPU to retrigger the next event */ 2315 smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); 2316 2317 raw_spin_unlock(&new_base->lock); 2318 old_base->online = 0; 2319 raw_spin_unlock(&old_base->lock); 2320 2321 return 0; 2322 } 2323 2324 #endif /* CONFIG_HOTPLUG_CPU */ 2325 2326 void __init hrtimers_init(void) 2327 { 2328 hrtimers_prepare_cpu(smp_processor_id()); 2329 hrtimers_cpu_starting(smp_processor_id()); 2330 open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); 2331 } 2332