1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 4 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 5 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner 6 * 7 * High-resolution kernel timers 8 * 9 * In contrast to the low-resolution timeout API, aka timer wheel, 10 * hrtimers provide finer resolution and accuracy depending on system 11 * configuration and capabilities. 12 * 13 * Started by: Thomas Gleixner and Ingo Molnar 14 * 15 * Credits: 16 * Based on the original timer wheel code 17 * 18 * Help, testing, suggestions, bugfixes, improvements were 19 * provided by: 20 * 21 * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel 22 * et. al. 23 */ 24 25 #include <linux/cpu.h> 26 #include <linux/export.h> 27 #include <linux/percpu.h> 28 #include <linux/hrtimer.h> 29 #include <linux/notifier.h> 30 #include <linux/syscalls.h> 31 #include <linux/interrupt.h> 32 #include <linux/tick.h> 33 #include <linux/err.h> 34 #include <linux/debugobjects.h> 35 #include <linux/sched/signal.h> 36 #include <linux/sched/sysctl.h> 37 #include <linux/sched/rt.h> 38 #include <linux/sched/deadline.h> 39 #include <linux/sched/nohz.h> 40 #include <linux/sched/debug.h> 41 #include <linux/sched/isolation.h> 42 #include <linux/timer.h> 43 #include <linux/freezer.h> 44 #include <linux/compat.h> 45 46 #include <linux/uaccess.h> 47 48 #include <trace/events/timer.h> 49 50 #include "tick-internal.h" 51 52 /* 53 * Masks for selecting the soft and hard context timers from 54 * cpu_base->active 55 */ 56 #define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT) 57 #define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1) 58 #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) 59 #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) 60 61 /* 62 * The timer bases: 63 * 64 * There are more clockids than hrtimer bases. Thus, we index 65 * into the timer bases by the hrtimer_base_type enum. When trying 66 * to reach a base using a clockid, hrtimer_clockid_to_base() 67 * is used to convert from clockid to the proper hrtimer_base_type. 68 */ 69 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 70 { 71 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), 72 .clock_base = 73 { 74 { 75 .index = HRTIMER_BASE_MONOTONIC, 76 .clockid = CLOCK_MONOTONIC, 77 .get_time = &ktime_get, 78 }, 79 { 80 .index = HRTIMER_BASE_REALTIME, 81 .clockid = CLOCK_REALTIME, 82 .get_time = &ktime_get_real, 83 }, 84 { 85 .index = HRTIMER_BASE_BOOTTIME, 86 .clockid = CLOCK_BOOTTIME, 87 .get_time = &ktime_get_boottime, 88 }, 89 { 90 .index = HRTIMER_BASE_TAI, 91 .clockid = CLOCK_TAI, 92 .get_time = &ktime_get_clocktai, 93 }, 94 { 95 .index = HRTIMER_BASE_MONOTONIC_SOFT, 96 .clockid = CLOCK_MONOTONIC, 97 .get_time = &ktime_get, 98 }, 99 { 100 .index = HRTIMER_BASE_REALTIME_SOFT, 101 .clockid = CLOCK_REALTIME, 102 .get_time = &ktime_get_real, 103 }, 104 { 105 .index = HRTIMER_BASE_BOOTTIME_SOFT, 106 .clockid = CLOCK_BOOTTIME, 107 .get_time = &ktime_get_boottime, 108 }, 109 { 110 .index = HRTIMER_BASE_TAI_SOFT, 111 .clockid = CLOCK_TAI, 112 .get_time = &ktime_get_clocktai, 113 }, 114 } 115 }; 116 117 static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { 118 /* Make sure we catch unsupported clockids */ 119 [0 ... MAX_CLOCKS - 1] = HRTIMER_MAX_CLOCK_BASES, 120 121 [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, 122 [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, 123 [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, 124 [CLOCK_TAI] = HRTIMER_BASE_TAI, 125 }; 126 127 /* 128 * Functions and macros which are different for UP/SMP systems are kept in a 129 * single place 130 */ 131 #ifdef CONFIG_SMP 132 133 /* 134 * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base() 135 * such that hrtimer_callback_running() can unconditionally dereference 136 * timer->base->cpu_base 137 */ 138 static struct hrtimer_cpu_base migration_cpu_base = { 139 .clock_base = { { 140 .cpu_base = &migration_cpu_base, 141 .seq = SEQCNT_RAW_SPINLOCK_ZERO(migration_cpu_base.seq, 142 &migration_cpu_base.lock), 143 }, }, 144 }; 145 146 #define migration_base migration_cpu_base.clock_base[0] 147 148 static inline bool is_migration_base(struct hrtimer_clock_base *base) 149 { 150 return base == &migration_base; 151 } 152 153 /* 154 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock 155 * means that all timers which are tied to this base via timer->base are 156 * locked, and the base itself is locked too. 157 * 158 * So __run_timers/migrate_timers can safely modify all timers which could 159 * be found on the lists/queues. 160 * 161 * When the timer's base is locked, and the timer removed from list, it is 162 * possible to set timer->base = &migration_base and drop the lock: the timer 163 * remains locked. 164 */ 165 static 166 struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, 167 unsigned long *flags) 168 __acquires(&timer->base->lock) 169 { 170 struct hrtimer_clock_base *base; 171 172 for (;;) { 173 base = READ_ONCE(timer->base); 174 if (likely(base != &migration_base)) { 175 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 176 if (likely(base == timer->base)) 177 return base; 178 /* The timer has migrated to another CPU: */ 179 raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags); 180 } 181 cpu_relax(); 182 } 183 } 184 185 /* 186 * We do not migrate the timer when it is expiring before the next 187 * event on the target cpu. When high resolution is enabled, we cannot 188 * reprogram the target cpu hardware and we would cause it to fire 189 * late. To keep it simple, we handle the high resolution enabled and 190 * disabled case similar. 191 * 192 * Called with cpu_base->lock of target cpu held. 193 */ 194 static int 195 hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) 196 { 197 ktime_t expires; 198 199 expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); 200 return expires < new_base->cpu_base->expires_next; 201 } 202 203 static inline 204 struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, 205 int pinned) 206 { 207 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 208 if (static_branch_likely(&timers_migration_enabled) && !pinned) 209 return &per_cpu(hrtimer_bases, get_nohz_timer_target()); 210 #endif 211 return base; 212 } 213 214 /* 215 * We switch the timer base to a power-optimized selected CPU target, 216 * if: 217 * - NO_HZ_COMMON is enabled 218 * - timer migration is enabled 219 * - the timer callback is not running 220 * - the timer is not the first expiring timer on the new target 221 * 222 * If one of the above requirements is not fulfilled we move the timer 223 * to the current CPU or leave it on the previously assigned CPU if 224 * the timer callback is currently running. 225 */ 226 static inline struct hrtimer_clock_base * 227 switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, 228 int pinned) 229 { 230 struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base; 231 struct hrtimer_clock_base *new_base; 232 int basenum = base->index; 233 234 this_cpu_base = this_cpu_ptr(&hrtimer_bases); 235 new_cpu_base = get_target_base(this_cpu_base, pinned); 236 again: 237 new_base = &new_cpu_base->clock_base[basenum]; 238 239 if (base != new_base) { 240 /* 241 * We are trying to move timer to new_base. 242 * However we can't change timer's base while it is running, 243 * so we keep it on the same CPU. No hassle vs. reprogramming 244 * the event source in the high resolution case. The softirq 245 * code will take care of this when the timer function has 246 * completed. There is no conflict as we hold the lock until 247 * the timer is enqueued. 248 */ 249 if (unlikely(hrtimer_callback_running(timer))) 250 return base; 251 252 /* See the comment in lock_hrtimer_base() */ 253 WRITE_ONCE(timer->base, &migration_base); 254 raw_spin_unlock(&base->cpu_base->lock); 255 raw_spin_lock(&new_base->cpu_base->lock); 256 257 if (new_cpu_base != this_cpu_base && 258 hrtimer_check_target(timer, new_base)) { 259 raw_spin_unlock(&new_base->cpu_base->lock); 260 raw_spin_lock(&base->cpu_base->lock); 261 new_cpu_base = this_cpu_base; 262 WRITE_ONCE(timer->base, base); 263 goto again; 264 } 265 WRITE_ONCE(timer->base, new_base); 266 } else { 267 if (new_cpu_base != this_cpu_base && 268 hrtimer_check_target(timer, new_base)) { 269 new_cpu_base = this_cpu_base; 270 goto again; 271 } 272 } 273 return new_base; 274 } 275 276 #else /* CONFIG_SMP */ 277 278 static inline bool is_migration_base(struct hrtimer_clock_base *base) 279 { 280 return false; 281 } 282 283 static inline struct hrtimer_clock_base * 284 lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 285 __acquires(&timer->base->cpu_base->lock) 286 { 287 struct hrtimer_clock_base *base = timer->base; 288 289 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 290 291 return base; 292 } 293 294 # define switch_hrtimer_base(t, b, p) (b) 295 296 #endif /* !CONFIG_SMP */ 297 298 /* 299 * Functions for the union type storage format of ktime_t which are 300 * too large for inlining: 301 */ 302 #if BITS_PER_LONG < 64 303 /* 304 * Divide a ktime value by a nanosecond value 305 */ 306 s64 __ktime_divns(const ktime_t kt, s64 div) 307 { 308 int sft = 0; 309 s64 dclc; 310 u64 tmp; 311 312 dclc = ktime_to_ns(kt); 313 tmp = dclc < 0 ? -dclc : dclc; 314 315 /* Make sure the divisor is less than 2^32: */ 316 while (div >> 32) { 317 sft++; 318 div >>= 1; 319 } 320 tmp >>= sft; 321 do_div(tmp, (u32) div); 322 return dclc < 0 ? -tmp : tmp; 323 } 324 EXPORT_SYMBOL_GPL(__ktime_divns); 325 #endif /* BITS_PER_LONG >= 64 */ 326 327 /* 328 * Add two ktime values and do a safety check for overflow: 329 */ 330 ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) 331 { 332 ktime_t res = ktime_add_unsafe(lhs, rhs); 333 334 /* 335 * We use KTIME_SEC_MAX here, the maximum timeout which we can 336 * return to user space in a timespec: 337 */ 338 if (res < 0 || res < lhs || res < rhs) 339 res = ktime_set(KTIME_SEC_MAX, 0); 340 341 return res; 342 } 343 344 EXPORT_SYMBOL_GPL(ktime_add_safe); 345 346 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS 347 348 static const struct debug_obj_descr hrtimer_debug_descr; 349 350 static void *hrtimer_debug_hint(void *addr) 351 { 352 return ((struct hrtimer *) addr)->function; 353 } 354 355 /* 356 * fixup_init is called when: 357 * - an active object is initialized 358 */ 359 static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state) 360 { 361 struct hrtimer *timer = addr; 362 363 switch (state) { 364 case ODEBUG_STATE_ACTIVE: 365 hrtimer_cancel(timer); 366 debug_object_init(timer, &hrtimer_debug_descr); 367 return true; 368 default: 369 return false; 370 } 371 } 372 373 /* 374 * fixup_activate is called when: 375 * - an active object is activated 376 * - an unknown non-static object is activated 377 */ 378 static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state) 379 { 380 switch (state) { 381 case ODEBUG_STATE_ACTIVE: 382 WARN_ON(1); 383 fallthrough; 384 default: 385 return false; 386 } 387 } 388 389 /* 390 * fixup_free is called when: 391 * - an active object is freed 392 */ 393 static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state) 394 { 395 struct hrtimer *timer = addr; 396 397 switch (state) { 398 case ODEBUG_STATE_ACTIVE: 399 hrtimer_cancel(timer); 400 debug_object_free(timer, &hrtimer_debug_descr); 401 return true; 402 default: 403 return false; 404 } 405 } 406 407 static const struct debug_obj_descr hrtimer_debug_descr = { 408 .name = "hrtimer", 409 .debug_hint = hrtimer_debug_hint, 410 .fixup_init = hrtimer_fixup_init, 411 .fixup_activate = hrtimer_fixup_activate, 412 .fixup_free = hrtimer_fixup_free, 413 }; 414 415 static inline void debug_hrtimer_init(struct hrtimer *timer) 416 { 417 debug_object_init(timer, &hrtimer_debug_descr); 418 } 419 420 static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) 421 { 422 debug_object_init_on_stack(timer, &hrtimer_debug_descr); 423 } 424 425 static inline void debug_hrtimer_activate(struct hrtimer *timer, 426 enum hrtimer_mode mode) 427 { 428 debug_object_activate(timer, &hrtimer_debug_descr); 429 } 430 431 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) 432 { 433 debug_object_deactivate(timer, &hrtimer_debug_descr); 434 } 435 436 void destroy_hrtimer_on_stack(struct hrtimer *timer) 437 { 438 debug_object_free(timer, &hrtimer_debug_descr); 439 } 440 EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); 441 442 #else 443 444 static inline void debug_hrtimer_init(struct hrtimer *timer) { } 445 static inline void debug_hrtimer_init_on_stack(struct hrtimer *timer) { } 446 static inline void debug_hrtimer_activate(struct hrtimer *timer, 447 enum hrtimer_mode mode) { } 448 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } 449 #endif 450 451 static inline void 452 debug_init(struct hrtimer *timer, clockid_t clockid, 453 enum hrtimer_mode mode) 454 { 455 debug_hrtimer_init(timer); 456 trace_hrtimer_init(timer, clockid, mode); 457 } 458 459 static inline void debug_init_on_stack(struct hrtimer *timer, clockid_t clockid, 460 enum hrtimer_mode mode) 461 { 462 debug_hrtimer_init_on_stack(timer); 463 trace_hrtimer_init(timer, clockid, mode); 464 } 465 466 static inline void debug_activate(struct hrtimer *timer, 467 enum hrtimer_mode mode) 468 { 469 debug_hrtimer_activate(timer, mode); 470 trace_hrtimer_start(timer, mode); 471 } 472 473 static inline void debug_deactivate(struct hrtimer *timer) 474 { 475 debug_hrtimer_deactivate(timer); 476 trace_hrtimer_cancel(timer); 477 } 478 479 static struct hrtimer_clock_base * 480 __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) 481 { 482 unsigned int idx; 483 484 if (!*active) 485 return NULL; 486 487 idx = __ffs(*active); 488 *active &= ~(1U << idx); 489 490 return &cpu_base->clock_base[idx]; 491 } 492 493 #define for_each_active_base(base, cpu_base, active) \ 494 while ((base = __next_base((cpu_base), &(active)))) 495 496 static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, 497 const struct hrtimer *exclude, 498 unsigned int active, 499 ktime_t expires_next) 500 { 501 struct hrtimer_clock_base *base; 502 ktime_t expires; 503 504 for_each_active_base(base, cpu_base, active) { 505 struct timerqueue_node *next; 506 struct hrtimer *timer; 507 508 next = timerqueue_getnext(&base->active); 509 timer = container_of(next, struct hrtimer, node); 510 if (timer == exclude) { 511 /* Get to the next timer in the queue. */ 512 next = timerqueue_iterate_next(next); 513 if (!next) 514 continue; 515 516 timer = container_of(next, struct hrtimer, node); 517 } 518 expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 519 if (expires < expires_next) { 520 expires_next = expires; 521 522 /* Skip cpu_base update if a timer is being excluded. */ 523 if (exclude) 524 continue; 525 526 if (timer->is_soft) 527 cpu_base->softirq_next_timer = timer; 528 else 529 cpu_base->next_timer = timer; 530 } 531 } 532 /* 533 * clock_was_set() might have changed base->offset of any of 534 * the clock bases so the result might be negative. Fix it up 535 * to prevent a false positive in clockevents_program_event(). 536 */ 537 if (expires_next < 0) 538 expires_next = 0; 539 return expires_next; 540 } 541 542 /* 543 * Recomputes cpu_base::*next_timer and returns the earliest expires_next 544 * but does not set cpu_base::*expires_next, that is done by 545 * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating 546 * cpu_base::*expires_next right away, reprogramming logic would no longer 547 * work. 548 * 549 * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, 550 * those timers will get run whenever the softirq gets handled, at the end of 551 * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. 552 * 553 * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. 554 * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual 555 * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. 556 * 557 * @active_mask must be one of: 558 * - HRTIMER_ACTIVE_ALL, 559 * - HRTIMER_ACTIVE_SOFT, or 560 * - HRTIMER_ACTIVE_HARD. 561 */ 562 static ktime_t 563 __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) 564 { 565 unsigned int active; 566 struct hrtimer *next_timer = NULL; 567 ktime_t expires_next = KTIME_MAX; 568 569 if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { 570 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; 571 cpu_base->softirq_next_timer = NULL; 572 expires_next = __hrtimer_next_event_base(cpu_base, NULL, 573 active, KTIME_MAX); 574 575 next_timer = cpu_base->softirq_next_timer; 576 } 577 578 if (active_mask & HRTIMER_ACTIVE_HARD) { 579 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; 580 cpu_base->next_timer = next_timer; 581 expires_next = __hrtimer_next_event_base(cpu_base, NULL, active, 582 expires_next); 583 } 584 585 return expires_next; 586 } 587 588 static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base) 589 { 590 ktime_t expires_next, soft = KTIME_MAX; 591 592 /* 593 * If the soft interrupt has already been activated, ignore the 594 * soft bases. They will be handled in the already raised soft 595 * interrupt. 596 */ 597 if (!cpu_base->softirq_activated) { 598 soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); 599 /* 600 * Update the soft expiry time. clock_settime() might have 601 * affected it. 602 */ 603 cpu_base->softirq_expires_next = soft; 604 } 605 606 expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD); 607 /* 608 * If a softirq timer is expiring first, update cpu_base->next_timer 609 * and program the hardware with the soft expiry time. 610 */ 611 if (expires_next > soft) { 612 cpu_base->next_timer = cpu_base->softirq_next_timer; 613 expires_next = soft; 614 } 615 616 return expires_next; 617 } 618 619 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) 620 { 621 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; 622 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; 623 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; 624 625 ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, 626 offs_real, offs_boot, offs_tai); 627 628 base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; 629 base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; 630 base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; 631 632 return now; 633 } 634 635 /* 636 * Is the high resolution mode active ? 637 */ 638 static inline int hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) 639 { 640 return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? 641 cpu_base->hres_active : 0; 642 } 643 644 static void __hrtimer_reprogram(struct hrtimer_cpu_base *cpu_base, 645 struct hrtimer *next_timer, 646 ktime_t expires_next) 647 { 648 cpu_base->expires_next = expires_next; 649 650 /* 651 * If hres is not active, hardware does not have to be 652 * reprogrammed yet. 653 * 654 * If a hang was detected in the last timer interrupt then we 655 * leave the hang delay active in the hardware. We want the 656 * system to make progress. That also prevents the following 657 * scenario: 658 * T1 expires 50ms from now 659 * T2 expires 5s from now 660 * 661 * T1 is removed, so this code is called and would reprogram 662 * the hardware to 5s from now. Any hrtimer_start after that 663 * will not reprogram the hardware due to hang_detected being 664 * set. So we'd effectively block all timers until the T2 event 665 * fires. 666 */ 667 if (!hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) 668 return; 669 670 tick_program_event(expires_next, 1); 671 } 672 673 /* 674 * Reprogram the event source with checking both queues for the 675 * next event 676 * Called with interrupts disabled and base->lock held 677 */ 678 static void 679 hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) 680 { 681 ktime_t expires_next; 682 683 expires_next = hrtimer_update_next_event(cpu_base); 684 685 if (skip_equal && expires_next == cpu_base->expires_next) 686 return; 687 688 __hrtimer_reprogram(cpu_base, cpu_base->next_timer, expires_next); 689 } 690 691 /* High resolution timer related functions */ 692 #ifdef CONFIG_HIGH_RES_TIMERS 693 694 /* 695 * High resolution timer enabled ? 696 */ 697 static bool hrtimer_hres_enabled __read_mostly = true; 698 unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; 699 EXPORT_SYMBOL_GPL(hrtimer_resolution); 700 701 /* 702 * Enable / Disable high resolution mode 703 */ 704 static int __init setup_hrtimer_hres(char *str) 705 { 706 return (kstrtobool(str, &hrtimer_hres_enabled) == 0); 707 } 708 709 __setup("highres=", setup_hrtimer_hres); 710 711 /* 712 * hrtimer_high_res_enabled - query, if the highres mode is enabled 713 */ 714 static inline int hrtimer_is_hres_enabled(void) 715 { 716 return hrtimer_hres_enabled; 717 } 718 719 static void retrigger_next_event(void *arg); 720 721 /* 722 * Switch to high resolution mode 723 */ 724 static void hrtimer_switch_to_hres(void) 725 { 726 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 727 728 if (tick_init_highres()) { 729 pr_warn("Could not switch to high resolution mode on CPU %u\n", 730 base->cpu); 731 return; 732 } 733 base->hres_active = 1; 734 hrtimer_resolution = HIGH_RES_NSEC; 735 736 tick_setup_sched_timer(true); 737 /* "Retrigger" the interrupt to get things going */ 738 retrigger_next_event(NULL); 739 } 740 741 #else 742 743 static inline int hrtimer_is_hres_enabled(void) { return 0; } 744 static inline void hrtimer_switch_to_hres(void) { } 745 746 #endif /* CONFIG_HIGH_RES_TIMERS */ 747 /* 748 * Retrigger next event is called after clock was set with interrupts 749 * disabled through an SMP function call or directly from low level 750 * resume code. 751 * 752 * This is only invoked when: 753 * - CONFIG_HIGH_RES_TIMERS is enabled. 754 * - CONFIG_NOHZ_COMMON is enabled 755 * 756 * For the other cases this function is empty and because the call sites 757 * are optimized out it vanishes as well, i.e. no need for lots of 758 * #ifdeffery. 759 */ 760 static void retrigger_next_event(void *arg) 761 { 762 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 763 764 /* 765 * When high resolution mode or nohz is active, then the offsets of 766 * CLOCK_REALTIME/TAI/BOOTTIME have to be updated. Otherwise the 767 * next tick will take care of that. 768 * 769 * If high resolution mode is active then the next expiring timer 770 * must be reevaluated and the clock event device reprogrammed if 771 * necessary. 772 * 773 * In the NOHZ case the update of the offset and the reevaluation 774 * of the next expiring timer is enough. The return from the SMP 775 * function call will take care of the reprogramming in case the 776 * CPU was in a NOHZ idle sleep. 777 */ 778 if (!hrtimer_hres_active(base) && !tick_nohz_active) 779 return; 780 781 raw_spin_lock(&base->lock); 782 hrtimer_update_base(base); 783 if (hrtimer_hres_active(base)) 784 hrtimer_force_reprogram(base, 0); 785 else 786 hrtimer_update_next_event(base); 787 raw_spin_unlock(&base->lock); 788 } 789 790 /* 791 * When a timer is enqueued and expires earlier than the already enqueued 792 * timers, we have to check, whether it expires earlier than the timer for 793 * which the clock event device was armed. 794 * 795 * Called with interrupts disabled and base->cpu_base.lock held 796 */ 797 static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) 798 { 799 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 800 struct hrtimer_clock_base *base = timer->base; 801 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 802 803 WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); 804 805 /* 806 * CLOCK_REALTIME timer might be requested with an absolute 807 * expiry time which is less than base->offset. Set it to 0. 808 */ 809 if (expires < 0) 810 expires = 0; 811 812 if (timer->is_soft) { 813 /* 814 * soft hrtimer could be started on a remote CPU. In this 815 * case softirq_expires_next needs to be updated on the 816 * remote CPU. The soft hrtimer will not expire before the 817 * first hard hrtimer on the remote CPU - 818 * hrtimer_check_target() prevents this case. 819 */ 820 struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base; 821 822 if (timer_cpu_base->softirq_activated) 823 return; 824 825 if (!ktime_before(expires, timer_cpu_base->softirq_expires_next)) 826 return; 827 828 timer_cpu_base->softirq_next_timer = timer; 829 timer_cpu_base->softirq_expires_next = expires; 830 831 if (!ktime_before(expires, timer_cpu_base->expires_next) || 832 !reprogram) 833 return; 834 } 835 836 /* 837 * If the timer is not on the current cpu, we cannot reprogram 838 * the other cpus clock event device. 839 */ 840 if (base->cpu_base != cpu_base) 841 return; 842 843 if (expires >= cpu_base->expires_next) 844 return; 845 846 /* 847 * If the hrtimer interrupt is running, then it will reevaluate the 848 * clock bases and reprogram the clock event device. 849 */ 850 if (cpu_base->in_hrtirq) 851 return; 852 853 cpu_base->next_timer = timer; 854 855 __hrtimer_reprogram(cpu_base, timer, expires); 856 } 857 858 static bool update_needs_ipi(struct hrtimer_cpu_base *cpu_base, 859 unsigned int active) 860 { 861 struct hrtimer_clock_base *base; 862 unsigned int seq; 863 ktime_t expires; 864 865 /* 866 * Update the base offsets unconditionally so the following 867 * checks whether the SMP function call is required works. 868 * 869 * The update is safe even when the remote CPU is in the hrtimer 870 * interrupt or the hrtimer soft interrupt and expiring affected 871 * bases. Either it will see the update before handling a base or 872 * it will see it when it finishes the processing and reevaluates 873 * the next expiring timer. 874 */ 875 seq = cpu_base->clock_was_set_seq; 876 hrtimer_update_base(cpu_base); 877 878 /* 879 * If the sequence did not change over the update then the 880 * remote CPU already handled it. 881 */ 882 if (seq == cpu_base->clock_was_set_seq) 883 return false; 884 885 /* 886 * If the remote CPU is currently handling an hrtimer interrupt, it 887 * will reevaluate the first expiring timer of all clock bases 888 * before reprogramming. Nothing to do here. 889 */ 890 if (cpu_base->in_hrtirq) 891 return false; 892 893 /* 894 * Walk the affected clock bases and check whether the first expiring 895 * timer in a clock base is moving ahead of the first expiring timer of 896 * @cpu_base. If so, the IPI must be invoked because per CPU clock 897 * event devices cannot be remotely reprogrammed. 898 */ 899 active &= cpu_base->active_bases; 900 901 for_each_active_base(base, cpu_base, active) { 902 struct timerqueue_node *next; 903 904 next = timerqueue_getnext(&base->active); 905 expires = ktime_sub(next->expires, base->offset); 906 if (expires < cpu_base->expires_next) 907 return true; 908 909 /* Extra check for softirq clock bases */ 910 if (base->clockid < HRTIMER_BASE_MONOTONIC_SOFT) 911 continue; 912 if (cpu_base->softirq_activated) 913 continue; 914 if (expires < cpu_base->softirq_expires_next) 915 return true; 916 } 917 return false; 918 } 919 920 /* 921 * Clock was set. This might affect CLOCK_REALTIME, CLOCK_TAI and 922 * CLOCK_BOOTTIME (for late sleep time injection). 923 * 924 * This requires to update the offsets for these clocks 925 * vs. CLOCK_MONOTONIC. When high resolution timers are enabled, then this 926 * also requires to eventually reprogram the per CPU clock event devices 927 * when the change moves an affected timer ahead of the first expiring 928 * timer on that CPU. Obviously remote per CPU clock event devices cannot 929 * be reprogrammed. The other reason why an IPI has to be sent is when the 930 * system is in !HIGH_RES and NOHZ mode. The NOHZ mode updates the offsets 931 * in the tick, which obviously might be stopped, so this has to bring out 932 * the remote CPU which might sleep in idle to get this sorted. 933 */ 934 void clock_was_set(unsigned int bases) 935 { 936 struct hrtimer_cpu_base *cpu_base = raw_cpu_ptr(&hrtimer_bases); 937 cpumask_var_t mask; 938 int cpu; 939 940 if (!hrtimer_hres_active(cpu_base) && !tick_nohz_active) 941 goto out_timerfd; 942 943 if (!zalloc_cpumask_var(&mask, GFP_KERNEL)) { 944 on_each_cpu(retrigger_next_event, NULL, 1); 945 goto out_timerfd; 946 } 947 948 /* Avoid interrupting CPUs if possible */ 949 cpus_read_lock(); 950 for_each_online_cpu(cpu) { 951 unsigned long flags; 952 953 cpu_base = &per_cpu(hrtimer_bases, cpu); 954 raw_spin_lock_irqsave(&cpu_base->lock, flags); 955 956 if (update_needs_ipi(cpu_base, bases)) 957 cpumask_set_cpu(cpu, mask); 958 959 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 960 } 961 962 preempt_disable(); 963 smp_call_function_many(mask, retrigger_next_event, NULL, 1); 964 preempt_enable(); 965 cpus_read_unlock(); 966 free_cpumask_var(mask); 967 968 out_timerfd: 969 timerfd_clock_was_set(); 970 } 971 972 static void clock_was_set_work(struct work_struct *work) 973 { 974 clock_was_set(CLOCK_SET_WALL); 975 } 976 977 static DECLARE_WORK(hrtimer_work, clock_was_set_work); 978 979 /* 980 * Called from timekeeping code to reprogram the hrtimer interrupt device 981 * on all cpus and to notify timerfd. 982 */ 983 void clock_was_set_delayed(void) 984 { 985 schedule_work(&hrtimer_work); 986 } 987 988 /* 989 * Called during resume either directly from via timekeeping_resume() 990 * or in the case of s2idle from tick_unfreeze() to ensure that the 991 * hrtimers are up to date. 992 */ 993 void hrtimers_resume_local(void) 994 { 995 lockdep_assert_irqs_disabled(); 996 /* Retrigger on the local CPU */ 997 retrigger_next_event(NULL); 998 } 999 1000 /* 1001 * Counterpart to lock_hrtimer_base above: 1002 */ 1003 static inline 1004 void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 1005 __releases(&timer->base->cpu_base->lock) 1006 { 1007 raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); 1008 } 1009 1010 /** 1011 * hrtimer_forward() - forward the timer expiry 1012 * @timer: hrtimer to forward 1013 * @now: forward past this time 1014 * @interval: the interval to forward 1015 * 1016 * Forward the timer expiry so it will expire in the future. 1017 * 1018 * .. note:: 1019 * This only updates the timer expiry value and does not requeue the timer. 1020 * 1021 * There is also a variant of the function hrtimer_forward_now(). 1022 * 1023 * Context: Can be safely called from the callback function of @timer. If called 1024 * from other contexts @timer must neither be enqueued nor running the 1025 * callback and the caller needs to take care of serialization. 1026 * 1027 * Return: The number of overruns are returned. 1028 */ 1029 u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) 1030 { 1031 u64 orun = 1; 1032 ktime_t delta; 1033 1034 delta = ktime_sub(now, hrtimer_get_expires(timer)); 1035 1036 if (delta < 0) 1037 return 0; 1038 1039 if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED)) 1040 return 0; 1041 1042 if (interval < hrtimer_resolution) 1043 interval = hrtimer_resolution; 1044 1045 if (unlikely(delta >= interval)) { 1046 s64 incr = ktime_to_ns(interval); 1047 1048 orun = ktime_divns(delta, incr); 1049 hrtimer_add_expires_ns(timer, incr * orun); 1050 if (hrtimer_get_expires_tv64(timer) > now) 1051 return orun; 1052 /* 1053 * This (and the ktime_add() below) is the 1054 * correction for exact: 1055 */ 1056 orun++; 1057 } 1058 hrtimer_add_expires(timer, interval); 1059 1060 return orun; 1061 } 1062 EXPORT_SYMBOL_GPL(hrtimer_forward); 1063 1064 /* 1065 * enqueue_hrtimer - internal function to (re)start a timer 1066 * 1067 * The timer is inserted in expiry order. Insertion into the 1068 * red black tree is O(log(n)). Must hold the base lock. 1069 * 1070 * Returns 1 when the new timer is the leftmost timer in the tree. 1071 */ 1072 static int enqueue_hrtimer(struct hrtimer *timer, 1073 struct hrtimer_clock_base *base, 1074 enum hrtimer_mode mode) 1075 { 1076 debug_activate(timer, mode); 1077 WARN_ON_ONCE(!base->cpu_base->online); 1078 1079 base->cpu_base->active_bases |= 1 << base->index; 1080 1081 /* Pairs with the lockless read in hrtimer_is_queued() */ 1082 WRITE_ONCE(timer->state, HRTIMER_STATE_ENQUEUED); 1083 1084 return timerqueue_add(&base->active, &timer->node); 1085 } 1086 1087 /* 1088 * __remove_hrtimer - internal function to remove a timer 1089 * 1090 * Caller must hold the base lock. 1091 * 1092 * High resolution timer mode reprograms the clock event device when the 1093 * timer is the one which expires next. The caller can disable this by setting 1094 * reprogram to zero. This is useful, when the context does a reprogramming 1095 * anyway (e.g. timer interrupt) 1096 */ 1097 static void __remove_hrtimer(struct hrtimer *timer, 1098 struct hrtimer_clock_base *base, 1099 u8 newstate, int reprogram) 1100 { 1101 struct hrtimer_cpu_base *cpu_base = base->cpu_base; 1102 u8 state = timer->state; 1103 1104 /* Pairs with the lockless read in hrtimer_is_queued() */ 1105 WRITE_ONCE(timer->state, newstate); 1106 if (!(state & HRTIMER_STATE_ENQUEUED)) 1107 return; 1108 1109 if (!timerqueue_del(&base->active, &timer->node)) 1110 cpu_base->active_bases &= ~(1 << base->index); 1111 1112 /* 1113 * Note: If reprogram is false we do not update 1114 * cpu_base->next_timer. This happens when we remove the first 1115 * timer on a remote cpu. No harm as we never dereference 1116 * cpu_base->next_timer. So the worst thing what can happen is 1117 * an superfluous call to hrtimer_force_reprogram() on the 1118 * remote cpu later on if the same timer gets enqueued again. 1119 */ 1120 if (reprogram && timer == cpu_base->next_timer) 1121 hrtimer_force_reprogram(cpu_base, 1); 1122 } 1123 1124 /* 1125 * remove hrtimer, called with base lock held 1126 */ 1127 static inline int 1128 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, 1129 bool restart, bool keep_local) 1130 { 1131 u8 state = timer->state; 1132 1133 if (state & HRTIMER_STATE_ENQUEUED) { 1134 bool reprogram; 1135 1136 /* 1137 * Remove the timer and force reprogramming when high 1138 * resolution mode is active and the timer is on the current 1139 * CPU. If we remove a timer on another CPU, reprogramming is 1140 * skipped. The interrupt event on this CPU is fired and 1141 * reprogramming happens in the interrupt handler. This is a 1142 * rare case and less expensive than a smp call. 1143 */ 1144 debug_deactivate(timer); 1145 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); 1146 1147 /* 1148 * If the timer is not restarted then reprogramming is 1149 * required if the timer is local. If it is local and about 1150 * to be restarted, avoid programming it twice (on removal 1151 * and a moment later when it's requeued). 1152 */ 1153 if (!restart) 1154 state = HRTIMER_STATE_INACTIVE; 1155 else 1156 reprogram &= !keep_local; 1157 1158 __remove_hrtimer(timer, base, state, reprogram); 1159 return 1; 1160 } 1161 return 0; 1162 } 1163 1164 static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, 1165 const enum hrtimer_mode mode) 1166 { 1167 #ifdef CONFIG_TIME_LOW_RES 1168 /* 1169 * CONFIG_TIME_LOW_RES indicates that the system has no way to return 1170 * granular time values. For relative timers we add hrtimer_resolution 1171 * (i.e. one jiffy) to prevent short timeouts. 1172 */ 1173 timer->is_rel = mode & HRTIMER_MODE_REL; 1174 if (timer->is_rel) 1175 tim = ktime_add_safe(tim, hrtimer_resolution); 1176 #endif 1177 return tim; 1178 } 1179 1180 static void 1181 hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) 1182 { 1183 ktime_t expires; 1184 1185 /* 1186 * Find the next SOFT expiration. 1187 */ 1188 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); 1189 1190 /* 1191 * reprogramming needs to be triggered, even if the next soft 1192 * hrtimer expires at the same time than the next hard 1193 * hrtimer. cpu_base->softirq_expires_next needs to be updated! 1194 */ 1195 if (expires == KTIME_MAX) 1196 return; 1197 1198 /* 1199 * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() 1200 * cpu_base->*expires_next is only set by hrtimer_reprogram() 1201 */ 1202 hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); 1203 } 1204 1205 static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 1206 u64 delta_ns, const enum hrtimer_mode mode, 1207 struct hrtimer_clock_base *base) 1208 { 1209 struct hrtimer_clock_base *new_base; 1210 bool force_local, first; 1211 1212 /* 1213 * If the timer is on the local cpu base and is the first expiring 1214 * timer then this might end up reprogramming the hardware twice 1215 * (on removal and on enqueue). To avoid that by prevent the 1216 * reprogram on removal, keep the timer local to the current CPU 1217 * and enforce reprogramming after it is queued no matter whether 1218 * it is the new first expiring timer again or not. 1219 */ 1220 force_local = base->cpu_base == this_cpu_ptr(&hrtimer_bases); 1221 force_local &= base->cpu_base->next_timer == timer; 1222 1223 /* 1224 * Remove an active timer from the queue. In case it is not queued 1225 * on the current CPU, make sure that remove_hrtimer() updates the 1226 * remote data correctly. 1227 * 1228 * If it's on the current CPU and the first expiring timer, then 1229 * skip reprogramming, keep the timer local and enforce 1230 * reprogramming later if it was the first expiring timer. This 1231 * avoids programming the underlying clock event twice (once at 1232 * removal and once after enqueue). 1233 */ 1234 remove_hrtimer(timer, base, true, force_local); 1235 1236 if (mode & HRTIMER_MODE_REL) 1237 tim = ktime_add_safe(tim, base->get_time()); 1238 1239 tim = hrtimer_update_lowres(timer, tim, mode); 1240 1241 hrtimer_set_expires_range_ns(timer, tim, delta_ns); 1242 1243 /* Switch the timer base, if necessary: */ 1244 if (!force_local) { 1245 new_base = switch_hrtimer_base(timer, base, 1246 mode & HRTIMER_MODE_PINNED); 1247 } else { 1248 new_base = base; 1249 } 1250 1251 first = enqueue_hrtimer(timer, new_base, mode); 1252 if (!force_local) 1253 return first; 1254 1255 /* 1256 * Timer was forced to stay on the current CPU to avoid 1257 * reprogramming on removal and enqueue. Force reprogram the 1258 * hardware by evaluating the new first expiring timer. 1259 */ 1260 hrtimer_force_reprogram(new_base->cpu_base, 1); 1261 return 0; 1262 } 1263 1264 /** 1265 * hrtimer_start_range_ns - (re)start an hrtimer 1266 * @timer: the timer to be added 1267 * @tim: expiry time 1268 * @delta_ns: "slack" range for the timer 1269 * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or 1270 * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); 1271 * softirq based mode is considered for debug purpose only! 1272 */ 1273 void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 1274 u64 delta_ns, const enum hrtimer_mode mode) 1275 { 1276 struct hrtimer_clock_base *base; 1277 unsigned long flags; 1278 1279 if (WARN_ON_ONCE(!timer->function)) 1280 return; 1281 /* 1282 * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft 1283 * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard 1284 * expiry mode because unmarked timers are moved to softirq expiry. 1285 */ 1286 if (!IS_ENABLED(CONFIG_PREEMPT_RT)) 1287 WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); 1288 else 1289 WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard); 1290 1291 base = lock_hrtimer_base(timer, &flags); 1292 1293 if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) 1294 hrtimer_reprogram(timer, true); 1295 1296 unlock_hrtimer_base(timer, &flags); 1297 } 1298 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); 1299 1300 /** 1301 * hrtimer_try_to_cancel - try to deactivate a timer 1302 * @timer: hrtimer to stop 1303 * 1304 * Returns: 1305 * 1306 * * 0 when the timer was not active 1307 * * 1 when the timer was active 1308 * * -1 when the timer is currently executing the callback function and 1309 * cannot be stopped 1310 */ 1311 int hrtimer_try_to_cancel(struct hrtimer *timer) 1312 { 1313 struct hrtimer_clock_base *base; 1314 unsigned long flags; 1315 int ret = -1; 1316 1317 /* 1318 * Check lockless first. If the timer is not active (neither 1319 * enqueued nor running the callback, nothing to do here. The 1320 * base lock does not serialize against a concurrent enqueue, 1321 * so we can avoid taking it. 1322 */ 1323 if (!hrtimer_active(timer)) 1324 return 0; 1325 1326 base = lock_hrtimer_base(timer, &flags); 1327 1328 if (!hrtimer_callback_running(timer)) 1329 ret = remove_hrtimer(timer, base, false, false); 1330 1331 unlock_hrtimer_base(timer, &flags); 1332 1333 return ret; 1334 1335 } 1336 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); 1337 1338 #ifdef CONFIG_PREEMPT_RT 1339 static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) 1340 { 1341 spin_lock_init(&base->softirq_expiry_lock); 1342 } 1343 1344 static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) 1345 __acquires(&base->softirq_expiry_lock) 1346 { 1347 spin_lock(&base->softirq_expiry_lock); 1348 } 1349 1350 static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) 1351 __releases(&base->softirq_expiry_lock) 1352 { 1353 spin_unlock(&base->softirq_expiry_lock); 1354 } 1355 1356 /* 1357 * The counterpart to hrtimer_cancel_wait_running(). 1358 * 1359 * If there is a waiter for cpu_base->expiry_lock, then it was waiting for 1360 * the timer callback to finish. Drop expiry_lock and reacquire it. That 1361 * allows the waiter to acquire the lock and make progress. 1362 */ 1363 static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, 1364 unsigned long flags) 1365 { 1366 if (atomic_read(&cpu_base->timer_waiters)) { 1367 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1368 spin_unlock(&cpu_base->softirq_expiry_lock); 1369 spin_lock(&cpu_base->softirq_expiry_lock); 1370 raw_spin_lock_irq(&cpu_base->lock); 1371 } 1372 } 1373 1374 /* 1375 * This function is called on PREEMPT_RT kernels when the fast path 1376 * deletion of a timer failed because the timer callback function was 1377 * running. 1378 * 1379 * This prevents priority inversion: if the soft irq thread is preempted 1380 * in the middle of a timer callback, then calling del_timer_sync() can 1381 * lead to two issues: 1382 * 1383 * - If the caller is on a remote CPU then it has to spin wait for the timer 1384 * handler to complete. This can result in unbound priority inversion. 1385 * 1386 * - If the caller originates from the task which preempted the timer 1387 * handler on the same CPU, then spin waiting for the timer handler to 1388 * complete is never going to end. 1389 */ 1390 void hrtimer_cancel_wait_running(const struct hrtimer *timer) 1391 { 1392 /* Lockless read. Prevent the compiler from reloading it below */ 1393 struct hrtimer_clock_base *base = READ_ONCE(timer->base); 1394 1395 /* 1396 * Just relax if the timer expires in hard interrupt context or if 1397 * it is currently on the migration base. 1398 */ 1399 if (!timer->is_soft || is_migration_base(base)) { 1400 cpu_relax(); 1401 return; 1402 } 1403 1404 /* 1405 * Mark the base as contended and grab the expiry lock, which is 1406 * held by the softirq across the timer callback. Drop the lock 1407 * immediately so the softirq can expire the next timer. In theory 1408 * the timer could already be running again, but that's more than 1409 * unlikely and just causes another wait loop. 1410 */ 1411 atomic_inc(&base->cpu_base->timer_waiters); 1412 spin_lock_bh(&base->cpu_base->softirq_expiry_lock); 1413 atomic_dec(&base->cpu_base->timer_waiters); 1414 spin_unlock_bh(&base->cpu_base->softirq_expiry_lock); 1415 } 1416 #else 1417 static inline void 1418 hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { } 1419 static inline void 1420 hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { } 1421 static inline void 1422 hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { } 1423 static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, 1424 unsigned long flags) { } 1425 #endif 1426 1427 /** 1428 * hrtimer_cancel - cancel a timer and wait for the handler to finish. 1429 * @timer: the timer to be cancelled 1430 * 1431 * Returns: 1432 * 0 when the timer was not active 1433 * 1 when the timer was active 1434 */ 1435 int hrtimer_cancel(struct hrtimer *timer) 1436 { 1437 int ret; 1438 1439 do { 1440 ret = hrtimer_try_to_cancel(timer); 1441 1442 if (ret < 0) 1443 hrtimer_cancel_wait_running(timer); 1444 } while (ret < 0); 1445 return ret; 1446 } 1447 EXPORT_SYMBOL_GPL(hrtimer_cancel); 1448 1449 /** 1450 * __hrtimer_get_remaining - get remaining time for the timer 1451 * @timer: the timer to read 1452 * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y 1453 */ 1454 ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust) 1455 { 1456 unsigned long flags; 1457 ktime_t rem; 1458 1459 lock_hrtimer_base(timer, &flags); 1460 if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust) 1461 rem = hrtimer_expires_remaining_adjusted(timer); 1462 else 1463 rem = hrtimer_expires_remaining(timer); 1464 unlock_hrtimer_base(timer, &flags); 1465 1466 return rem; 1467 } 1468 EXPORT_SYMBOL_GPL(__hrtimer_get_remaining); 1469 1470 #ifdef CONFIG_NO_HZ_COMMON 1471 /** 1472 * hrtimer_get_next_event - get the time until next expiry event 1473 * 1474 * Returns the next expiry time or KTIME_MAX if no timer is pending. 1475 */ 1476 u64 hrtimer_get_next_event(void) 1477 { 1478 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1479 u64 expires = KTIME_MAX; 1480 unsigned long flags; 1481 1482 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1483 1484 if (!hrtimer_hres_active(cpu_base)) 1485 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); 1486 1487 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1488 1489 return expires; 1490 } 1491 1492 /** 1493 * hrtimer_next_event_without - time until next expiry event w/o one timer 1494 * @exclude: timer to exclude 1495 * 1496 * Returns the next expiry time over all timers except for the @exclude one or 1497 * KTIME_MAX if none of them is pending. 1498 */ 1499 u64 hrtimer_next_event_without(const struct hrtimer *exclude) 1500 { 1501 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1502 u64 expires = KTIME_MAX; 1503 unsigned long flags; 1504 1505 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1506 1507 if (hrtimer_hres_active(cpu_base)) { 1508 unsigned int active; 1509 1510 if (!cpu_base->softirq_activated) { 1511 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; 1512 expires = __hrtimer_next_event_base(cpu_base, exclude, 1513 active, KTIME_MAX); 1514 } 1515 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; 1516 expires = __hrtimer_next_event_base(cpu_base, exclude, active, 1517 expires); 1518 } 1519 1520 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1521 1522 return expires; 1523 } 1524 #endif 1525 1526 static inline int hrtimer_clockid_to_base(clockid_t clock_id) 1527 { 1528 if (likely(clock_id < MAX_CLOCKS)) { 1529 int base = hrtimer_clock_to_base_table[clock_id]; 1530 1531 if (likely(base != HRTIMER_MAX_CLOCK_BASES)) 1532 return base; 1533 } 1534 WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); 1535 return HRTIMER_BASE_MONOTONIC; 1536 } 1537 1538 static enum hrtimer_restart hrtimer_dummy_timeout(struct hrtimer *unused) 1539 { 1540 return HRTIMER_NORESTART; 1541 } 1542 1543 static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, 1544 enum hrtimer_mode mode) 1545 { 1546 bool softtimer = !!(mode & HRTIMER_MODE_SOFT); 1547 struct hrtimer_cpu_base *cpu_base; 1548 int base; 1549 1550 /* 1551 * On PREEMPT_RT enabled kernels hrtimers which are not explicitly 1552 * marked for hard interrupt expiry mode are moved into soft 1553 * interrupt context for latency reasons and because the callbacks 1554 * can invoke functions which might sleep on RT, e.g. spin_lock(). 1555 */ 1556 if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD)) 1557 softtimer = true; 1558 1559 memset(timer, 0, sizeof(struct hrtimer)); 1560 1561 cpu_base = raw_cpu_ptr(&hrtimer_bases); 1562 1563 /* 1564 * POSIX magic: Relative CLOCK_REALTIME timers are not affected by 1565 * clock modifications, so they needs to become CLOCK_MONOTONIC to 1566 * ensure POSIX compliance. 1567 */ 1568 if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) 1569 clock_id = CLOCK_MONOTONIC; 1570 1571 base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; 1572 base += hrtimer_clockid_to_base(clock_id); 1573 timer->is_soft = softtimer; 1574 timer->is_hard = !!(mode & HRTIMER_MODE_HARD); 1575 timer->base = &cpu_base->clock_base[base]; 1576 timerqueue_init(&timer->node); 1577 } 1578 1579 static void __hrtimer_setup(struct hrtimer *timer, 1580 enum hrtimer_restart (*function)(struct hrtimer *), 1581 clockid_t clock_id, enum hrtimer_mode mode) 1582 { 1583 __hrtimer_init(timer, clock_id, mode); 1584 1585 if (WARN_ON_ONCE(!function)) 1586 timer->function = hrtimer_dummy_timeout; 1587 else 1588 timer->function = function; 1589 } 1590 1591 /** 1592 * hrtimer_init - initialize a timer to the given clock 1593 * @timer: the timer to be initialized 1594 * @clock_id: the clock to be used 1595 * @mode: The modes which are relevant for initialization: 1596 * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, 1597 * HRTIMER_MODE_REL_SOFT 1598 * 1599 * The PINNED variants of the above can be handed in, 1600 * but the PINNED bit is ignored as pinning happens 1601 * when the hrtimer is started 1602 */ 1603 void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, 1604 enum hrtimer_mode mode) 1605 { 1606 debug_init(timer, clock_id, mode); 1607 __hrtimer_init(timer, clock_id, mode); 1608 } 1609 EXPORT_SYMBOL_GPL(hrtimer_init); 1610 1611 /** 1612 * hrtimer_setup - initialize a timer to the given clock 1613 * @timer: the timer to be initialized 1614 * @function: the callback function 1615 * @clock_id: the clock to be used 1616 * @mode: The modes which are relevant for initialization: 1617 * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, 1618 * HRTIMER_MODE_REL_SOFT 1619 * 1620 * The PINNED variants of the above can be handed in, 1621 * but the PINNED bit is ignored as pinning happens 1622 * when the hrtimer is started 1623 */ 1624 void hrtimer_setup(struct hrtimer *timer, enum hrtimer_restart (*function)(struct hrtimer *), 1625 clockid_t clock_id, enum hrtimer_mode mode) 1626 { 1627 debug_init(timer, clock_id, mode); 1628 __hrtimer_setup(timer, function, clock_id, mode); 1629 } 1630 EXPORT_SYMBOL_GPL(hrtimer_setup); 1631 1632 /** 1633 * hrtimer_setup_on_stack - initialize a timer on stack memory 1634 * @timer: The timer to be initialized 1635 * @function: the callback function 1636 * @clock_id: The clock to be used 1637 * @mode: The timer mode 1638 * 1639 * Similar to hrtimer_setup(), except that this one must be used if struct hrtimer is in stack 1640 * memory. 1641 */ 1642 void hrtimer_setup_on_stack(struct hrtimer *timer, 1643 enum hrtimer_restart (*function)(struct hrtimer *), 1644 clockid_t clock_id, enum hrtimer_mode mode) 1645 { 1646 debug_init_on_stack(timer, clock_id, mode); 1647 __hrtimer_setup(timer, function, clock_id, mode); 1648 } 1649 EXPORT_SYMBOL_GPL(hrtimer_setup_on_stack); 1650 1651 /* 1652 * A timer is active, when it is enqueued into the rbtree or the 1653 * callback function is running or it's in the state of being migrated 1654 * to another cpu. 1655 * 1656 * It is important for this function to not return a false negative. 1657 */ 1658 bool hrtimer_active(const struct hrtimer *timer) 1659 { 1660 struct hrtimer_clock_base *base; 1661 unsigned int seq; 1662 1663 do { 1664 base = READ_ONCE(timer->base); 1665 seq = raw_read_seqcount_begin(&base->seq); 1666 1667 if (timer->state != HRTIMER_STATE_INACTIVE || 1668 base->running == timer) 1669 return true; 1670 1671 } while (read_seqcount_retry(&base->seq, seq) || 1672 base != READ_ONCE(timer->base)); 1673 1674 return false; 1675 } 1676 EXPORT_SYMBOL_GPL(hrtimer_active); 1677 1678 /* 1679 * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3 1680 * distinct sections: 1681 * 1682 * - queued: the timer is queued 1683 * - callback: the timer is being ran 1684 * - post: the timer is inactive or (re)queued 1685 * 1686 * On the read side we ensure we observe timer->state and cpu_base->running 1687 * from the same section, if anything changed while we looked at it, we retry. 1688 * This includes timer->base changing because sequence numbers alone are 1689 * insufficient for that. 1690 * 1691 * The sequence numbers are required because otherwise we could still observe 1692 * a false negative if the read side got smeared over multiple consecutive 1693 * __run_hrtimer() invocations. 1694 */ 1695 1696 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, 1697 struct hrtimer_clock_base *base, 1698 struct hrtimer *timer, ktime_t *now, 1699 unsigned long flags) __must_hold(&cpu_base->lock) 1700 { 1701 enum hrtimer_restart (*fn)(struct hrtimer *); 1702 bool expires_in_hardirq; 1703 int restart; 1704 1705 lockdep_assert_held(&cpu_base->lock); 1706 1707 debug_deactivate(timer); 1708 base->running = timer; 1709 1710 /* 1711 * Separate the ->running assignment from the ->state assignment. 1712 * 1713 * As with a regular write barrier, this ensures the read side in 1714 * hrtimer_active() cannot observe base->running == NULL && 1715 * timer->state == INACTIVE. 1716 */ 1717 raw_write_seqcount_barrier(&base->seq); 1718 1719 __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); 1720 fn = timer->function; 1721 1722 /* 1723 * Clear the 'is relative' flag for the TIME_LOW_RES case. If the 1724 * timer is restarted with a period then it becomes an absolute 1725 * timer. If its not restarted it does not matter. 1726 */ 1727 if (IS_ENABLED(CONFIG_TIME_LOW_RES)) 1728 timer->is_rel = false; 1729 1730 /* 1731 * The timer is marked as running in the CPU base, so it is 1732 * protected against migration to a different CPU even if the lock 1733 * is dropped. 1734 */ 1735 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1736 trace_hrtimer_expire_entry(timer, now); 1737 expires_in_hardirq = lockdep_hrtimer_enter(timer); 1738 1739 restart = fn(timer); 1740 1741 lockdep_hrtimer_exit(expires_in_hardirq); 1742 trace_hrtimer_expire_exit(timer); 1743 raw_spin_lock_irq(&cpu_base->lock); 1744 1745 /* 1746 * Note: We clear the running state after enqueue_hrtimer and 1747 * we do not reprogram the event hardware. Happens either in 1748 * hrtimer_start_range_ns() or in hrtimer_interrupt() 1749 * 1750 * Note: Because we dropped the cpu_base->lock above, 1751 * hrtimer_start_range_ns() can have popped in and enqueued the timer 1752 * for us already. 1753 */ 1754 if (restart != HRTIMER_NORESTART && 1755 !(timer->state & HRTIMER_STATE_ENQUEUED)) 1756 enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); 1757 1758 /* 1759 * Separate the ->running assignment from the ->state assignment. 1760 * 1761 * As with a regular write barrier, this ensures the read side in 1762 * hrtimer_active() cannot observe base->running.timer == NULL && 1763 * timer->state == INACTIVE. 1764 */ 1765 raw_write_seqcount_barrier(&base->seq); 1766 1767 WARN_ON_ONCE(base->running != timer); 1768 base->running = NULL; 1769 } 1770 1771 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, 1772 unsigned long flags, unsigned int active_mask) 1773 { 1774 struct hrtimer_clock_base *base; 1775 unsigned int active = cpu_base->active_bases & active_mask; 1776 1777 for_each_active_base(base, cpu_base, active) { 1778 struct timerqueue_node *node; 1779 ktime_t basenow; 1780 1781 basenow = ktime_add(now, base->offset); 1782 1783 while ((node = timerqueue_getnext(&base->active))) { 1784 struct hrtimer *timer; 1785 1786 timer = container_of(node, struct hrtimer, node); 1787 1788 /* 1789 * The immediate goal for using the softexpires is 1790 * minimizing wakeups, not running timers at the 1791 * earliest interrupt after their soft expiration. 1792 * This allows us to avoid using a Priority Search 1793 * Tree, which can answer a stabbing query for 1794 * overlapping intervals and instead use the simple 1795 * BST we already have. 1796 * We don't add extra wakeups by delaying timers that 1797 * are right-of a not yet expired timer, because that 1798 * timer will have to trigger a wakeup anyway. 1799 */ 1800 if (basenow < hrtimer_get_softexpires_tv64(timer)) 1801 break; 1802 1803 __run_hrtimer(cpu_base, base, timer, &basenow, flags); 1804 if (active_mask == HRTIMER_ACTIVE_SOFT) 1805 hrtimer_sync_wait_running(cpu_base, flags); 1806 } 1807 } 1808 } 1809 1810 static __latent_entropy void hrtimer_run_softirq(void) 1811 { 1812 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1813 unsigned long flags; 1814 ktime_t now; 1815 1816 hrtimer_cpu_base_lock_expiry(cpu_base); 1817 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1818 1819 now = hrtimer_update_base(cpu_base); 1820 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); 1821 1822 cpu_base->softirq_activated = 0; 1823 hrtimer_update_softirq_timer(cpu_base, true); 1824 1825 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1826 hrtimer_cpu_base_unlock_expiry(cpu_base); 1827 } 1828 1829 #ifdef CONFIG_HIGH_RES_TIMERS 1830 1831 /* 1832 * High resolution timer interrupt 1833 * Called with interrupts disabled 1834 */ 1835 void hrtimer_interrupt(struct clock_event_device *dev) 1836 { 1837 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1838 ktime_t expires_next, now, entry_time, delta; 1839 unsigned long flags; 1840 int retries = 0; 1841 1842 BUG_ON(!cpu_base->hres_active); 1843 cpu_base->nr_events++; 1844 dev->next_event = KTIME_MAX; 1845 1846 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1847 entry_time = now = hrtimer_update_base(cpu_base); 1848 retry: 1849 cpu_base->in_hrtirq = 1; 1850 /* 1851 * We set expires_next to KTIME_MAX here with cpu_base->lock 1852 * held to prevent that a timer is enqueued in our queue via 1853 * the migration code. This does not affect enqueueing of 1854 * timers which run their callback and need to be requeued on 1855 * this CPU. 1856 */ 1857 cpu_base->expires_next = KTIME_MAX; 1858 1859 if (!ktime_before(now, cpu_base->softirq_expires_next)) { 1860 cpu_base->softirq_expires_next = KTIME_MAX; 1861 cpu_base->softirq_activated = 1; 1862 raise_timer_softirq(HRTIMER_SOFTIRQ); 1863 } 1864 1865 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); 1866 1867 /* Reevaluate the clock bases for the [soft] next expiry */ 1868 expires_next = hrtimer_update_next_event(cpu_base); 1869 /* 1870 * Store the new expiry value so the migration code can verify 1871 * against it. 1872 */ 1873 cpu_base->expires_next = expires_next; 1874 cpu_base->in_hrtirq = 0; 1875 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1876 1877 /* Reprogramming necessary ? */ 1878 if (!tick_program_event(expires_next, 0)) { 1879 cpu_base->hang_detected = 0; 1880 return; 1881 } 1882 1883 /* 1884 * The next timer was already expired due to: 1885 * - tracing 1886 * - long lasting callbacks 1887 * - being scheduled away when running in a VM 1888 * 1889 * We need to prevent that we loop forever in the hrtimer 1890 * interrupt routine. We give it 3 attempts to avoid 1891 * overreacting on some spurious event. 1892 * 1893 * Acquire base lock for updating the offsets and retrieving 1894 * the current time. 1895 */ 1896 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1897 now = hrtimer_update_base(cpu_base); 1898 cpu_base->nr_retries++; 1899 if (++retries < 3) 1900 goto retry; 1901 /* 1902 * Give the system a chance to do something else than looping 1903 * here. We stored the entry time, so we know exactly how long 1904 * we spent here. We schedule the next event this amount of 1905 * time away. 1906 */ 1907 cpu_base->nr_hangs++; 1908 cpu_base->hang_detected = 1; 1909 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1910 1911 delta = ktime_sub(now, entry_time); 1912 if ((unsigned int)delta > cpu_base->max_hang_time) 1913 cpu_base->max_hang_time = (unsigned int) delta; 1914 /* 1915 * Limit it to a sensible value as we enforce a longer 1916 * delay. Give the CPU at least 100ms to catch up. 1917 */ 1918 if (delta > 100 * NSEC_PER_MSEC) 1919 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); 1920 else 1921 expires_next = ktime_add(now, delta); 1922 tick_program_event(expires_next, 1); 1923 pr_warn_once("hrtimer: interrupt took %llu ns\n", ktime_to_ns(delta)); 1924 } 1925 #endif /* !CONFIG_HIGH_RES_TIMERS */ 1926 1927 /* 1928 * Called from run_local_timers in hardirq context every jiffy 1929 */ 1930 void hrtimer_run_queues(void) 1931 { 1932 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1933 unsigned long flags; 1934 ktime_t now; 1935 1936 if (hrtimer_hres_active(cpu_base)) 1937 return; 1938 1939 /* 1940 * This _is_ ugly: We have to check periodically, whether we 1941 * can switch to highres and / or nohz mode. The clocksource 1942 * switch happens with xtime_lock held. Notification from 1943 * there only sets the check bit in the tick_oneshot code, 1944 * otherwise we might deadlock vs. xtime_lock. 1945 */ 1946 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) { 1947 hrtimer_switch_to_hres(); 1948 return; 1949 } 1950 1951 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1952 now = hrtimer_update_base(cpu_base); 1953 1954 if (!ktime_before(now, cpu_base->softirq_expires_next)) { 1955 cpu_base->softirq_expires_next = KTIME_MAX; 1956 cpu_base->softirq_activated = 1; 1957 raise_timer_softirq(HRTIMER_SOFTIRQ); 1958 } 1959 1960 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); 1961 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1962 } 1963 1964 /* 1965 * Sleep related functions: 1966 */ 1967 static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) 1968 { 1969 struct hrtimer_sleeper *t = 1970 container_of(timer, struct hrtimer_sleeper, timer); 1971 struct task_struct *task = t->task; 1972 1973 t->task = NULL; 1974 if (task) 1975 wake_up_process(task); 1976 1977 return HRTIMER_NORESTART; 1978 } 1979 1980 /** 1981 * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer 1982 * @sl: sleeper to be started 1983 * @mode: timer mode abs/rel 1984 * 1985 * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers 1986 * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context) 1987 */ 1988 void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, 1989 enum hrtimer_mode mode) 1990 { 1991 /* 1992 * Make the enqueue delivery mode check work on RT. If the sleeper 1993 * was initialized for hard interrupt delivery, force the mode bit. 1994 * This is a special case for hrtimer_sleepers because 1995 * __hrtimer_init_sleeper() determines the delivery mode on RT so the 1996 * fiddling with this decision is avoided at the call sites. 1997 */ 1998 if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard) 1999 mode |= HRTIMER_MODE_HARD; 2000 2001 hrtimer_start_expires(&sl->timer, mode); 2002 } 2003 EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires); 2004 2005 static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, 2006 clockid_t clock_id, enum hrtimer_mode mode) 2007 { 2008 /* 2009 * On PREEMPT_RT enabled kernels hrtimers which are not explicitly 2010 * marked for hard interrupt expiry mode are moved into soft 2011 * interrupt context either for latency reasons or because the 2012 * hrtimer callback takes regular spinlocks or invokes other 2013 * functions which are not suitable for hard interrupt context on 2014 * PREEMPT_RT. 2015 * 2016 * The hrtimer_sleeper callback is RT compatible in hard interrupt 2017 * context, but there is a latency concern: Untrusted userspace can 2018 * spawn many threads which arm timers for the same expiry time on 2019 * the same CPU. That causes a latency spike due to the wakeup of 2020 * a gazillion threads. 2021 * 2022 * OTOH, privileged real-time user space applications rely on the 2023 * low latency of hard interrupt wakeups. If the current task is in 2024 * a real-time scheduling class, mark the mode for hard interrupt 2025 * expiry. 2026 */ 2027 if (IS_ENABLED(CONFIG_PREEMPT_RT)) { 2028 if (rt_or_dl_task_policy(current) && !(mode & HRTIMER_MODE_SOFT)) 2029 mode |= HRTIMER_MODE_HARD; 2030 } 2031 2032 __hrtimer_init(&sl->timer, clock_id, mode); 2033 sl->timer.function = hrtimer_wakeup; 2034 sl->task = current; 2035 } 2036 2037 /** 2038 * hrtimer_setup_sleeper_on_stack - initialize a sleeper in stack memory 2039 * @sl: sleeper to be initialized 2040 * @clock_id: the clock to be used 2041 * @mode: timer mode abs/rel 2042 */ 2043 void hrtimer_setup_sleeper_on_stack(struct hrtimer_sleeper *sl, 2044 clockid_t clock_id, enum hrtimer_mode mode) 2045 { 2046 debug_init_on_stack(&sl->timer, clock_id, mode); 2047 __hrtimer_init_sleeper(sl, clock_id, mode); 2048 } 2049 EXPORT_SYMBOL_GPL(hrtimer_setup_sleeper_on_stack); 2050 2051 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) 2052 { 2053 switch(restart->nanosleep.type) { 2054 #ifdef CONFIG_COMPAT_32BIT_TIME 2055 case TT_COMPAT: 2056 if (put_old_timespec32(ts, restart->nanosleep.compat_rmtp)) 2057 return -EFAULT; 2058 break; 2059 #endif 2060 case TT_NATIVE: 2061 if (put_timespec64(ts, restart->nanosleep.rmtp)) 2062 return -EFAULT; 2063 break; 2064 default: 2065 BUG(); 2066 } 2067 return -ERESTART_RESTARTBLOCK; 2068 } 2069 2070 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) 2071 { 2072 struct restart_block *restart; 2073 2074 do { 2075 set_current_state(TASK_INTERRUPTIBLE|TASK_FREEZABLE); 2076 hrtimer_sleeper_start_expires(t, mode); 2077 2078 if (likely(t->task)) 2079 schedule(); 2080 2081 hrtimer_cancel(&t->timer); 2082 mode = HRTIMER_MODE_ABS; 2083 2084 } while (t->task && !signal_pending(current)); 2085 2086 __set_current_state(TASK_RUNNING); 2087 2088 if (!t->task) 2089 return 0; 2090 2091 restart = ¤t->restart_block; 2092 if (restart->nanosleep.type != TT_NONE) { 2093 ktime_t rem = hrtimer_expires_remaining(&t->timer); 2094 struct timespec64 rmt; 2095 2096 if (rem <= 0) 2097 return 0; 2098 rmt = ktime_to_timespec64(rem); 2099 2100 return nanosleep_copyout(restart, &rmt); 2101 } 2102 return -ERESTART_RESTARTBLOCK; 2103 } 2104 2105 static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) 2106 { 2107 struct hrtimer_sleeper t; 2108 int ret; 2109 2110 hrtimer_setup_sleeper_on_stack(&t, restart->nanosleep.clockid, HRTIMER_MODE_ABS); 2111 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); 2112 ret = do_nanosleep(&t, HRTIMER_MODE_ABS); 2113 destroy_hrtimer_on_stack(&t.timer); 2114 return ret; 2115 } 2116 2117 long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode, 2118 const clockid_t clockid) 2119 { 2120 struct restart_block *restart; 2121 struct hrtimer_sleeper t; 2122 int ret = 0; 2123 2124 hrtimer_setup_sleeper_on_stack(&t, clockid, mode); 2125 hrtimer_set_expires_range_ns(&t.timer, rqtp, current->timer_slack_ns); 2126 ret = do_nanosleep(&t, mode); 2127 if (ret != -ERESTART_RESTARTBLOCK) 2128 goto out; 2129 2130 /* Absolute timers do not update the rmtp value and restart: */ 2131 if (mode == HRTIMER_MODE_ABS) { 2132 ret = -ERESTARTNOHAND; 2133 goto out; 2134 } 2135 2136 restart = ¤t->restart_block; 2137 restart->nanosleep.clockid = t.timer.base->clockid; 2138 restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); 2139 set_restart_fn(restart, hrtimer_nanosleep_restart); 2140 out: 2141 destroy_hrtimer_on_stack(&t.timer); 2142 return ret; 2143 } 2144 2145 #ifdef CONFIG_64BIT 2146 2147 SYSCALL_DEFINE2(nanosleep, struct __kernel_timespec __user *, rqtp, 2148 struct __kernel_timespec __user *, rmtp) 2149 { 2150 struct timespec64 tu; 2151 2152 if (get_timespec64(&tu, rqtp)) 2153 return -EFAULT; 2154 2155 if (!timespec64_valid(&tu)) 2156 return -EINVAL; 2157 2158 current->restart_block.fn = do_no_restart_syscall; 2159 current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; 2160 current->restart_block.nanosleep.rmtp = rmtp; 2161 return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, 2162 CLOCK_MONOTONIC); 2163 } 2164 2165 #endif 2166 2167 #ifdef CONFIG_COMPAT_32BIT_TIME 2168 2169 SYSCALL_DEFINE2(nanosleep_time32, struct old_timespec32 __user *, rqtp, 2170 struct old_timespec32 __user *, rmtp) 2171 { 2172 struct timespec64 tu; 2173 2174 if (get_old_timespec32(&tu, rqtp)) 2175 return -EFAULT; 2176 2177 if (!timespec64_valid(&tu)) 2178 return -EINVAL; 2179 2180 current->restart_block.fn = do_no_restart_syscall; 2181 current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; 2182 current->restart_block.nanosleep.compat_rmtp = rmtp; 2183 return hrtimer_nanosleep(timespec64_to_ktime(tu), HRTIMER_MODE_REL, 2184 CLOCK_MONOTONIC); 2185 } 2186 #endif 2187 2188 /* 2189 * Functions related to boot-time initialization: 2190 */ 2191 int hrtimers_prepare_cpu(unsigned int cpu) 2192 { 2193 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 2194 int i; 2195 2196 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 2197 struct hrtimer_clock_base *clock_b = &cpu_base->clock_base[i]; 2198 2199 clock_b->cpu_base = cpu_base; 2200 seqcount_raw_spinlock_init(&clock_b->seq, &cpu_base->lock); 2201 timerqueue_init_head(&clock_b->active); 2202 } 2203 2204 cpu_base->cpu = cpu; 2205 cpu_base->active_bases = 0; 2206 cpu_base->hres_active = 0; 2207 cpu_base->hang_detected = 0; 2208 cpu_base->next_timer = NULL; 2209 cpu_base->softirq_next_timer = NULL; 2210 cpu_base->expires_next = KTIME_MAX; 2211 cpu_base->softirq_expires_next = KTIME_MAX; 2212 cpu_base->online = 1; 2213 hrtimer_cpu_base_init_expiry_lock(cpu_base); 2214 return 0; 2215 } 2216 2217 #ifdef CONFIG_HOTPLUG_CPU 2218 2219 static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, 2220 struct hrtimer_clock_base *new_base) 2221 { 2222 struct hrtimer *timer; 2223 struct timerqueue_node *node; 2224 2225 while ((node = timerqueue_getnext(&old_base->active))) { 2226 timer = container_of(node, struct hrtimer, node); 2227 BUG_ON(hrtimer_callback_running(timer)); 2228 debug_deactivate(timer); 2229 2230 /* 2231 * Mark it as ENQUEUED not INACTIVE otherwise the 2232 * timer could be seen as !active and just vanish away 2233 * under us on another CPU 2234 */ 2235 __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); 2236 timer->base = new_base; 2237 /* 2238 * Enqueue the timers on the new cpu. This does not 2239 * reprogram the event device in case the timer 2240 * expires before the earliest on this CPU, but we run 2241 * hrtimer_interrupt after we migrated everything to 2242 * sort out already expired timers and reprogram the 2243 * event device. 2244 */ 2245 enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); 2246 } 2247 } 2248 2249 int hrtimers_cpu_dying(unsigned int dying_cpu) 2250 { 2251 int i, ncpu = cpumask_any_and(cpu_active_mask, housekeeping_cpumask(HK_TYPE_TIMER)); 2252 struct hrtimer_cpu_base *old_base, *new_base; 2253 2254 old_base = this_cpu_ptr(&hrtimer_bases); 2255 new_base = &per_cpu(hrtimer_bases, ncpu); 2256 2257 /* 2258 * The caller is globally serialized and nobody else 2259 * takes two locks at once, deadlock is not possible. 2260 */ 2261 raw_spin_lock(&old_base->lock); 2262 raw_spin_lock_nested(&new_base->lock, SINGLE_DEPTH_NESTING); 2263 2264 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 2265 migrate_hrtimer_list(&old_base->clock_base[i], 2266 &new_base->clock_base[i]); 2267 } 2268 2269 /* 2270 * The migration might have changed the first expiring softirq 2271 * timer on this CPU. Update it. 2272 */ 2273 __hrtimer_get_next_event(new_base, HRTIMER_ACTIVE_SOFT); 2274 /* Tell the other CPU to retrigger the next event */ 2275 smp_call_function_single(ncpu, retrigger_next_event, NULL, 0); 2276 2277 raw_spin_unlock(&new_base->lock); 2278 old_base->online = 0; 2279 raw_spin_unlock(&old_base->lock); 2280 2281 return 0; 2282 } 2283 2284 #endif /* CONFIG_HOTPLUG_CPU */ 2285 2286 void __init hrtimers_init(void) 2287 { 2288 hrtimers_prepare_cpu(smp_processor_id()); 2289 open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); 2290 } 2291