1 /* 2 * linux/kernel/hrtimer.c 3 * 4 * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de> 5 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 6 * Copyright(C) 2006-2007 Timesys Corp., Thomas Gleixner 7 * 8 * High-resolution kernel timers 9 * 10 * In contrast to the low-resolution timeout API implemented in 11 * kernel/timer.c, hrtimers provide finer resolution and accuracy 12 * depending on system configuration and capabilities. 13 * 14 * These timers are currently used for: 15 * - itimers 16 * - POSIX timers 17 * - nanosleep 18 * - precise in-kernel timing 19 * 20 * Started by: Thomas Gleixner and Ingo Molnar 21 * 22 * Credits: 23 * based on kernel/timer.c 24 * 25 * Help, testing, suggestions, bugfixes, improvements were 26 * provided by: 27 * 28 * George Anzinger, Andrew Morton, Steven Rostedt, Roman Zippel 29 * et. al. 30 * 31 * For licencing details see kernel-base/COPYING 32 */ 33 34 #include <linux/cpu.h> 35 #include <linux/export.h> 36 #include <linux/percpu.h> 37 #include <linux/hrtimer.h> 38 #include <linux/notifier.h> 39 #include <linux/syscalls.h> 40 #include <linux/kallsyms.h> 41 #include <linux/interrupt.h> 42 #include <linux/tick.h> 43 #include <linux/seq_file.h> 44 #include <linux/err.h> 45 #include <linux/debugobjects.h> 46 #include <linux/sched/signal.h> 47 #include <linux/sched/sysctl.h> 48 #include <linux/sched/rt.h> 49 #include <linux/sched/deadline.h> 50 #include <linux/sched/nohz.h> 51 #include <linux/sched/debug.h> 52 #include <linux/timer.h> 53 #include <linux/freezer.h> 54 #include <linux/compat.h> 55 56 #include <linux/uaccess.h> 57 58 #include <trace/events/timer.h> 59 60 #include "tick-internal.h" 61 62 /* 63 * Masks for selecting the soft and hard context timers from 64 * cpu_base->active 65 */ 66 #define MASK_SHIFT (HRTIMER_BASE_MONOTONIC_SOFT) 67 #define HRTIMER_ACTIVE_HARD ((1U << MASK_SHIFT) - 1) 68 #define HRTIMER_ACTIVE_SOFT (HRTIMER_ACTIVE_HARD << MASK_SHIFT) 69 #define HRTIMER_ACTIVE_ALL (HRTIMER_ACTIVE_SOFT | HRTIMER_ACTIVE_HARD) 70 71 /* 72 * The timer bases: 73 * 74 * There are more clockids than hrtimer bases. Thus, we index 75 * into the timer bases by the hrtimer_base_type enum. When trying 76 * to reach a base using a clockid, hrtimer_clockid_to_base() 77 * is used to convert from clockid to the proper hrtimer_base_type. 78 */ 79 DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 80 { 81 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), 82 .clock_base = 83 { 84 { 85 .index = HRTIMER_BASE_MONOTONIC, 86 .clockid = CLOCK_MONOTONIC, 87 .get_time = &ktime_get, 88 }, 89 { 90 .index = HRTIMER_BASE_REALTIME, 91 .clockid = CLOCK_REALTIME, 92 .get_time = &ktime_get_real, 93 }, 94 { 95 .index = HRTIMER_BASE_BOOTTIME, 96 .clockid = CLOCK_BOOTTIME, 97 .get_time = &ktime_get_boottime, 98 }, 99 { 100 .index = HRTIMER_BASE_TAI, 101 .clockid = CLOCK_TAI, 102 .get_time = &ktime_get_clocktai, 103 }, 104 { 105 .index = HRTIMER_BASE_MONOTONIC_SOFT, 106 .clockid = CLOCK_MONOTONIC, 107 .get_time = &ktime_get, 108 }, 109 { 110 .index = HRTIMER_BASE_REALTIME_SOFT, 111 .clockid = CLOCK_REALTIME, 112 .get_time = &ktime_get_real, 113 }, 114 { 115 .index = HRTIMER_BASE_BOOTTIME_SOFT, 116 .clockid = CLOCK_BOOTTIME, 117 .get_time = &ktime_get_boottime, 118 }, 119 { 120 .index = HRTIMER_BASE_TAI_SOFT, 121 .clockid = CLOCK_TAI, 122 .get_time = &ktime_get_clocktai, 123 }, 124 } 125 }; 126 127 static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { 128 /* Make sure we catch unsupported clockids */ 129 [0 ... MAX_CLOCKS - 1] = HRTIMER_MAX_CLOCK_BASES, 130 131 [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, 132 [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, 133 [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, 134 [CLOCK_TAI] = HRTIMER_BASE_TAI, 135 }; 136 137 /* 138 * Functions and macros which are different for UP/SMP systems are kept in a 139 * single place 140 */ 141 #ifdef CONFIG_SMP 142 143 /* 144 * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base() 145 * such that hrtimer_callback_running() can unconditionally dereference 146 * timer->base->cpu_base 147 */ 148 static struct hrtimer_cpu_base migration_cpu_base = { 149 .clock_base = { { .cpu_base = &migration_cpu_base, }, }, 150 }; 151 152 #define migration_base migration_cpu_base.clock_base[0] 153 154 /* 155 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock 156 * means that all timers which are tied to this base via timer->base are 157 * locked, and the base itself is locked too. 158 * 159 * So __run_timers/migrate_timers can safely modify all timers which could 160 * be found on the lists/queues. 161 * 162 * When the timer's base is locked, and the timer removed from list, it is 163 * possible to set timer->base = &migration_base and drop the lock: the timer 164 * remains locked. 165 */ 166 static 167 struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, 168 unsigned long *flags) 169 { 170 struct hrtimer_clock_base *base; 171 172 for (;;) { 173 base = timer->base; 174 if (likely(base != &migration_base)) { 175 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 176 if (likely(base == timer->base)) 177 return base; 178 /* The timer has migrated to another CPU: */ 179 raw_spin_unlock_irqrestore(&base->cpu_base->lock, *flags); 180 } 181 cpu_relax(); 182 } 183 } 184 185 /* 186 * We do not migrate the timer when it is expiring before the next 187 * event on the target cpu. When high resolution is enabled, we cannot 188 * reprogram the target cpu hardware and we would cause it to fire 189 * late. To keep it simple, we handle the high resolution enabled and 190 * disabled case similar. 191 * 192 * Called with cpu_base->lock of target cpu held. 193 */ 194 static int 195 hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) 196 { 197 ktime_t expires; 198 199 expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); 200 return expires < new_base->cpu_base->expires_next; 201 } 202 203 static inline 204 struct hrtimer_cpu_base *get_target_base(struct hrtimer_cpu_base *base, 205 int pinned) 206 { 207 #if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON) 208 if (static_branch_likely(&timers_migration_enabled) && !pinned) 209 return &per_cpu(hrtimer_bases, get_nohz_timer_target()); 210 #endif 211 return base; 212 } 213 214 /* 215 * We switch the timer base to a power-optimized selected CPU target, 216 * if: 217 * - NO_HZ_COMMON is enabled 218 * - timer migration is enabled 219 * - the timer callback is not running 220 * - the timer is not the first expiring timer on the new target 221 * 222 * If one of the above requirements is not fulfilled we move the timer 223 * to the current CPU or leave it on the previously assigned CPU if 224 * the timer callback is currently running. 225 */ 226 static inline struct hrtimer_clock_base * 227 switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, 228 int pinned) 229 { 230 struct hrtimer_cpu_base *new_cpu_base, *this_cpu_base; 231 struct hrtimer_clock_base *new_base; 232 int basenum = base->index; 233 234 this_cpu_base = this_cpu_ptr(&hrtimer_bases); 235 new_cpu_base = get_target_base(this_cpu_base, pinned); 236 again: 237 new_base = &new_cpu_base->clock_base[basenum]; 238 239 if (base != new_base) { 240 /* 241 * We are trying to move timer to new_base. 242 * However we can't change timer's base while it is running, 243 * so we keep it on the same CPU. No hassle vs. reprogramming 244 * the event source in the high resolution case. The softirq 245 * code will take care of this when the timer function has 246 * completed. There is no conflict as we hold the lock until 247 * the timer is enqueued. 248 */ 249 if (unlikely(hrtimer_callback_running(timer))) 250 return base; 251 252 /* See the comment in lock_hrtimer_base() */ 253 timer->base = &migration_base; 254 raw_spin_unlock(&base->cpu_base->lock); 255 raw_spin_lock(&new_base->cpu_base->lock); 256 257 if (new_cpu_base != this_cpu_base && 258 hrtimer_check_target(timer, new_base)) { 259 raw_spin_unlock(&new_base->cpu_base->lock); 260 raw_spin_lock(&base->cpu_base->lock); 261 new_cpu_base = this_cpu_base; 262 timer->base = base; 263 goto again; 264 } 265 timer->base = new_base; 266 } else { 267 if (new_cpu_base != this_cpu_base && 268 hrtimer_check_target(timer, new_base)) { 269 new_cpu_base = this_cpu_base; 270 goto again; 271 } 272 } 273 return new_base; 274 } 275 276 #else /* CONFIG_SMP */ 277 278 static inline struct hrtimer_clock_base * 279 lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 280 { 281 struct hrtimer_clock_base *base = timer->base; 282 283 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 284 285 return base; 286 } 287 288 # define switch_hrtimer_base(t, b, p) (b) 289 290 #endif /* !CONFIG_SMP */ 291 292 /* 293 * Functions for the union type storage format of ktime_t which are 294 * too large for inlining: 295 */ 296 #if BITS_PER_LONG < 64 297 /* 298 * Divide a ktime value by a nanosecond value 299 */ 300 s64 __ktime_divns(const ktime_t kt, s64 div) 301 { 302 int sft = 0; 303 s64 dclc; 304 u64 tmp; 305 306 dclc = ktime_to_ns(kt); 307 tmp = dclc < 0 ? -dclc : dclc; 308 309 /* Make sure the divisor is less than 2^32: */ 310 while (div >> 32) { 311 sft++; 312 div >>= 1; 313 } 314 tmp >>= sft; 315 do_div(tmp, (unsigned long) div); 316 return dclc < 0 ? -tmp : tmp; 317 } 318 EXPORT_SYMBOL_GPL(__ktime_divns); 319 #endif /* BITS_PER_LONG >= 64 */ 320 321 /* 322 * Add two ktime values and do a safety check for overflow: 323 */ 324 ktime_t ktime_add_safe(const ktime_t lhs, const ktime_t rhs) 325 { 326 ktime_t res = ktime_add_unsafe(lhs, rhs); 327 328 /* 329 * We use KTIME_SEC_MAX here, the maximum timeout which we can 330 * return to user space in a timespec: 331 */ 332 if (res < 0 || res < lhs || res < rhs) 333 res = ktime_set(KTIME_SEC_MAX, 0); 334 335 return res; 336 } 337 338 EXPORT_SYMBOL_GPL(ktime_add_safe); 339 340 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS 341 342 static struct debug_obj_descr hrtimer_debug_descr; 343 344 static void *hrtimer_debug_hint(void *addr) 345 { 346 return ((struct hrtimer *) addr)->function; 347 } 348 349 /* 350 * fixup_init is called when: 351 * - an active object is initialized 352 */ 353 static bool hrtimer_fixup_init(void *addr, enum debug_obj_state state) 354 { 355 struct hrtimer *timer = addr; 356 357 switch (state) { 358 case ODEBUG_STATE_ACTIVE: 359 hrtimer_cancel(timer); 360 debug_object_init(timer, &hrtimer_debug_descr); 361 return true; 362 default: 363 return false; 364 } 365 } 366 367 /* 368 * fixup_activate is called when: 369 * - an active object is activated 370 * - an unknown non-static object is activated 371 */ 372 static bool hrtimer_fixup_activate(void *addr, enum debug_obj_state state) 373 { 374 switch (state) { 375 case ODEBUG_STATE_ACTIVE: 376 WARN_ON(1); 377 378 default: 379 return false; 380 } 381 } 382 383 /* 384 * fixup_free is called when: 385 * - an active object is freed 386 */ 387 static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state) 388 { 389 struct hrtimer *timer = addr; 390 391 switch (state) { 392 case ODEBUG_STATE_ACTIVE: 393 hrtimer_cancel(timer); 394 debug_object_free(timer, &hrtimer_debug_descr); 395 return true; 396 default: 397 return false; 398 } 399 } 400 401 static struct debug_obj_descr hrtimer_debug_descr = { 402 .name = "hrtimer", 403 .debug_hint = hrtimer_debug_hint, 404 .fixup_init = hrtimer_fixup_init, 405 .fixup_activate = hrtimer_fixup_activate, 406 .fixup_free = hrtimer_fixup_free, 407 }; 408 409 static inline void debug_hrtimer_init(struct hrtimer *timer) 410 { 411 debug_object_init(timer, &hrtimer_debug_descr); 412 } 413 414 static inline void debug_hrtimer_activate(struct hrtimer *timer, 415 enum hrtimer_mode mode) 416 { 417 debug_object_activate(timer, &hrtimer_debug_descr); 418 } 419 420 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) 421 { 422 debug_object_deactivate(timer, &hrtimer_debug_descr); 423 } 424 425 static inline void debug_hrtimer_free(struct hrtimer *timer) 426 { 427 debug_object_free(timer, &hrtimer_debug_descr); 428 } 429 430 static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, 431 enum hrtimer_mode mode); 432 433 void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, 434 enum hrtimer_mode mode) 435 { 436 debug_object_init_on_stack(timer, &hrtimer_debug_descr); 437 __hrtimer_init(timer, clock_id, mode); 438 } 439 EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); 440 441 void destroy_hrtimer_on_stack(struct hrtimer *timer) 442 { 443 debug_object_free(timer, &hrtimer_debug_descr); 444 } 445 EXPORT_SYMBOL_GPL(destroy_hrtimer_on_stack); 446 447 #else 448 449 static inline void debug_hrtimer_init(struct hrtimer *timer) { } 450 static inline void debug_hrtimer_activate(struct hrtimer *timer, 451 enum hrtimer_mode mode) { } 452 static inline void debug_hrtimer_deactivate(struct hrtimer *timer) { } 453 #endif 454 455 static inline void 456 debug_init(struct hrtimer *timer, clockid_t clockid, 457 enum hrtimer_mode mode) 458 { 459 debug_hrtimer_init(timer); 460 trace_hrtimer_init(timer, clockid, mode); 461 } 462 463 static inline void debug_activate(struct hrtimer *timer, 464 enum hrtimer_mode mode) 465 { 466 debug_hrtimer_activate(timer, mode); 467 trace_hrtimer_start(timer, mode); 468 } 469 470 static inline void debug_deactivate(struct hrtimer *timer) 471 { 472 debug_hrtimer_deactivate(timer); 473 trace_hrtimer_cancel(timer); 474 } 475 476 static struct hrtimer_clock_base * 477 __next_base(struct hrtimer_cpu_base *cpu_base, unsigned int *active) 478 { 479 unsigned int idx; 480 481 if (!*active) 482 return NULL; 483 484 idx = __ffs(*active); 485 *active &= ~(1U << idx); 486 487 return &cpu_base->clock_base[idx]; 488 } 489 490 #define for_each_active_base(base, cpu_base, active) \ 491 while ((base = __next_base((cpu_base), &(active)))) 492 493 static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base, 494 unsigned int active, 495 ktime_t expires_next) 496 { 497 struct hrtimer_clock_base *base; 498 ktime_t expires; 499 500 for_each_active_base(base, cpu_base, active) { 501 struct timerqueue_node *next; 502 struct hrtimer *timer; 503 504 next = timerqueue_getnext(&base->active); 505 timer = container_of(next, struct hrtimer, node); 506 expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 507 if (expires < expires_next) { 508 expires_next = expires; 509 if (timer->is_soft) 510 cpu_base->softirq_next_timer = timer; 511 else 512 cpu_base->next_timer = timer; 513 } 514 } 515 /* 516 * clock_was_set() might have changed base->offset of any of 517 * the clock bases so the result might be negative. Fix it up 518 * to prevent a false positive in clockevents_program_event(). 519 */ 520 if (expires_next < 0) 521 expires_next = 0; 522 return expires_next; 523 } 524 525 /* 526 * Recomputes cpu_base::*next_timer and returns the earliest expires_next but 527 * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram. 528 * 529 * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases, 530 * those timers will get run whenever the softirq gets handled, at the end of 531 * hrtimer_run_softirq(), hrtimer_update_softirq_timer() will re-add these bases. 532 * 533 * Therefore softirq values are those from the HRTIMER_ACTIVE_SOFT clock bases. 534 * The !softirq values are the minima across HRTIMER_ACTIVE_ALL, unless an actual 535 * softirq is pending, in which case they're the minima of HRTIMER_ACTIVE_HARD. 536 * 537 * @active_mask must be one of: 538 * - HRTIMER_ACTIVE_ALL, 539 * - HRTIMER_ACTIVE_SOFT, or 540 * - HRTIMER_ACTIVE_HARD. 541 */ 542 static ktime_t 543 __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_mask) 544 { 545 unsigned int active; 546 struct hrtimer *next_timer = NULL; 547 ktime_t expires_next = KTIME_MAX; 548 549 if (!cpu_base->softirq_activated && (active_mask & HRTIMER_ACTIVE_SOFT)) { 550 active = cpu_base->active_bases & HRTIMER_ACTIVE_SOFT; 551 cpu_base->softirq_next_timer = NULL; 552 expires_next = __hrtimer_next_event_base(cpu_base, active, KTIME_MAX); 553 554 next_timer = cpu_base->softirq_next_timer; 555 } 556 557 if (active_mask & HRTIMER_ACTIVE_HARD) { 558 active = cpu_base->active_bases & HRTIMER_ACTIVE_HARD; 559 cpu_base->next_timer = next_timer; 560 expires_next = __hrtimer_next_event_base(cpu_base, active, expires_next); 561 } 562 563 return expires_next; 564 } 565 566 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base) 567 { 568 ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset; 569 ktime_t *offs_boot = &base->clock_base[HRTIMER_BASE_BOOTTIME].offset; 570 ktime_t *offs_tai = &base->clock_base[HRTIMER_BASE_TAI].offset; 571 572 ktime_t now = ktime_get_update_offsets_now(&base->clock_was_set_seq, 573 offs_real, offs_boot, offs_tai); 574 575 base->clock_base[HRTIMER_BASE_REALTIME_SOFT].offset = *offs_real; 576 base->clock_base[HRTIMER_BASE_BOOTTIME_SOFT].offset = *offs_boot; 577 base->clock_base[HRTIMER_BASE_TAI_SOFT].offset = *offs_tai; 578 579 return now; 580 } 581 582 /* 583 * Is the high resolution mode active ? 584 */ 585 static inline int __hrtimer_hres_active(struct hrtimer_cpu_base *cpu_base) 586 { 587 return IS_ENABLED(CONFIG_HIGH_RES_TIMERS) ? 588 cpu_base->hres_active : 0; 589 } 590 591 static inline int hrtimer_hres_active(void) 592 { 593 return __hrtimer_hres_active(this_cpu_ptr(&hrtimer_bases)); 594 } 595 596 /* 597 * Reprogram the event source with checking both queues for the 598 * next event 599 * Called with interrupts disabled and base->lock held 600 */ 601 static void 602 hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal) 603 { 604 ktime_t expires_next; 605 606 /* 607 * Find the current next expiration time. 608 */ 609 expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); 610 611 if (cpu_base->next_timer && cpu_base->next_timer->is_soft) { 612 /* 613 * When the softirq is activated, hrtimer has to be 614 * programmed with the first hard hrtimer because soft 615 * timer interrupt could occur too late. 616 */ 617 if (cpu_base->softirq_activated) 618 expires_next = __hrtimer_get_next_event(cpu_base, 619 HRTIMER_ACTIVE_HARD); 620 else 621 cpu_base->softirq_expires_next = expires_next; 622 } 623 624 if (skip_equal && expires_next == cpu_base->expires_next) 625 return; 626 627 cpu_base->expires_next = expires_next; 628 629 /* 630 * If hres is not active, hardware does not have to be 631 * reprogrammed yet. 632 * 633 * If a hang was detected in the last timer interrupt then we 634 * leave the hang delay active in the hardware. We want the 635 * system to make progress. That also prevents the following 636 * scenario: 637 * T1 expires 50ms from now 638 * T2 expires 5s from now 639 * 640 * T1 is removed, so this code is called and would reprogram 641 * the hardware to 5s from now. Any hrtimer_start after that 642 * will not reprogram the hardware due to hang_detected being 643 * set. So we'd effectivly block all timers until the T2 event 644 * fires. 645 */ 646 if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) 647 return; 648 649 tick_program_event(cpu_base->expires_next, 1); 650 } 651 652 /* High resolution timer related functions */ 653 #ifdef CONFIG_HIGH_RES_TIMERS 654 655 /* 656 * High resolution timer enabled ? 657 */ 658 static bool hrtimer_hres_enabled __read_mostly = true; 659 unsigned int hrtimer_resolution __read_mostly = LOW_RES_NSEC; 660 EXPORT_SYMBOL_GPL(hrtimer_resolution); 661 662 /* 663 * Enable / Disable high resolution mode 664 */ 665 static int __init setup_hrtimer_hres(char *str) 666 { 667 return (kstrtobool(str, &hrtimer_hres_enabled) == 0); 668 } 669 670 __setup("highres=", setup_hrtimer_hres); 671 672 /* 673 * hrtimer_high_res_enabled - query, if the highres mode is enabled 674 */ 675 static inline int hrtimer_is_hres_enabled(void) 676 { 677 return hrtimer_hres_enabled; 678 } 679 680 /* 681 * Retrigger next event is called after clock was set 682 * 683 * Called with interrupts disabled via on_each_cpu() 684 */ 685 static void retrigger_next_event(void *arg) 686 { 687 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 688 689 if (!__hrtimer_hres_active(base)) 690 return; 691 692 raw_spin_lock(&base->lock); 693 hrtimer_update_base(base); 694 hrtimer_force_reprogram(base, 0); 695 raw_spin_unlock(&base->lock); 696 } 697 698 /* 699 * Switch to high resolution mode 700 */ 701 static void hrtimer_switch_to_hres(void) 702 { 703 struct hrtimer_cpu_base *base = this_cpu_ptr(&hrtimer_bases); 704 705 if (tick_init_highres()) { 706 printk(KERN_WARNING "Could not switch to high resolution " 707 "mode on CPU %d\n", base->cpu); 708 return; 709 } 710 base->hres_active = 1; 711 hrtimer_resolution = HIGH_RES_NSEC; 712 713 tick_setup_sched_timer(); 714 /* "Retrigger" the interrupt to get things going */ 715 retrigger_next_event(NULL); 716 } 717 718 static void clock_was_set_work(struct work_struct *work) 719 { 720 clock_was_set(); 721 } 722 723 static DECLARE_WORK(hrtimer_work, clock_was_set_work); 724 725 /* 726 * Called from timekeeping and resume code to reprogram the hrtimer 727 * interrupt device on all cpus. 728 */ 729 void clock_was_set_delayed(void) 730 { 731 schedule_work(&hrtimer_work); 732 } 733 734 #else 735 736 static inline int hrtimer_is_hres_enabled(void) { return 0; } 737 static inline void hrtimer_switch_to_hres(void) { } 738 static inline void retrigger_next_event(void *arg) { } 739 740 #endif /* CONFIG_HIGH_RES_TIMERS */ 741 742 /* 743 * When a timer is enqueued and expires earlier than the already enqueued 744 * timers, we have to check, whether it expires earlier than the timer for 745 * which the clock event device was armed. 746 * 747 * Called with interrupts disabled and base->cpu_base.lock held 748 */ 749 static void hrtimer_reprogram(struct hrtimer *timer, bool reprogram) 750 { 751 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 752 struct hrtimer_clock_base *base = timer->base; 753 ktime_t expires = ktime_sub(hrtimer_get_expires(timer), base->offset); 754 755 WARN_ON_ONCE(hrtimer_get_expires_tv64(timer) < 0); 756 757 /* 758 * CLOCK_REALTIME timer might be requested with an absolute 759 * expiry time which is less than base->offset. Set it to 0. 760 */ 761 if (expires < 0) 762 expires = 0; 763 764 if (timer->is_soft) { 765 /* 766 * soft hrtimer could be started on a remote CPU. In this 767 * case softirq_expires_next needs to be updated on the 768 * remote CPU. The soft hrtimer will not expire before the 769 * first hard hrtimer on the remote CPU - 770 * hrtimer_check_target() prevents this case. 771 */ 772 struct hrtimer_cpu_base *timer_cpu_base = base->cpu_base; 773 774 if (timer_cpu_base->softirq_activated) 775 return; 776 777 if (!ktime_before(expires, timer_cpu_base->softirq_expires_next)) 778 return; 779 780 timer_cpu_base->softirq_next_timer = timer; 781 timer_cpu_base->softirq_expires_next = expires; 782 783 if (!ktime_before(expires, timer_cpu_base->expires_next) || 784 !reprogram) 785 return; 786 } 787 788 /* 789 * If the timer is not on the current cpu, we cannot reprogram 790 * the other cpus clock event device. 791 */ 792 if (base->cpu_base != cpu_base) 793 return; 794 795 /* 796 * If the hrtimer interrupt is running, then it will 797 * reevaluate the clock bases and reprogram the clock event 798 * device. The callbacks are always executed in hard interrupt 799 * context so we don't need an extra check for a running 800 * callback. 801 */ 802 if (cpu_base->in_hrtirq) 803 return; 804 805 if (expires >= cpu_base->expires_next) 806 return; 807 808 /* Update the pointer to the next expiring timer */ 809 cpu_base->next_timer = timer; 810 cpu_base->expires_next = expires; 811 812 /* 813 * If hres is not active, hardware does not have to be 814 * programmed yet. 815 * 816 * If a hang was detected in the last timer interrupt then we 817 * do not schedule a timer which is earlier than the expiry 818 * which we enforced in the hang detection. We want the system 819 * to make progress. 820 */ 821 if (!__hrtimer_hres_active(cpu_base) || cpu_base->hang_detected) 822 return; 823 824 /* 825 * Program the timer hardware. We enforce the expiry for 826 * events which are already in the past. 827 */ 828 tick_program_event(expires, 1); 829 } 830 831 /* 832 * Clock realtime was set 833 * 834 * Change the offset of the realtime clock vs. the monotonic 835 * clock. 836 * 837 * We might have to reprogram the high resolution timer interrupt. On 838 * SMP we call the architecture specific code to retrigger _all_ high 839 * resolution timer interrupts. On UP we just disable interrupts and 840 * call the high resolution interrupt code. 841 */ 842 void clock_was_set(void) 843 { 844 #ifdef CONFIG_HIGH_RES_TIMERS 845 /* Retrigger the CPU local events everywhere */ 846 on_each_cpu(retrigger_next_event, NULL, 1); 847 #endif 848 timerfd_clock_was_set(); 849 } 850 851 /* 852 * During resume we might have to reprogram the high resolution timer 853 * interrupt on all online CPUs. However, all other CPUs will be 854 * stopped with IRQs interrupts disabled so the clock_was_set() call 855 * must be deferred. 856 */ 857 void hrtimers_resume(void) 858 { 859 lockdep_assert_irqs_disabled(); 860 /* Retrigger on the local CPU */ 861 retrigger_next_event(NULL); 862 /* And schedule a retrigger for all others */ 863 clock_was_set_delayed(); 864 } 865 866 /* 867 * Counterpart to lock_hrtimer_base above: 868 */ 869 static inline 870 void unlock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 871 { 872 raw_spin_unlock_irqrestore(&timer->base->cpu_base->lock, *flags); 873 } 874 875 /** 876 * hrtimer_forward - forward the timer expiry 877 * @timer: hrtimer to forward 878 * @now: forward past this time 879 * @interval: the interval to forward 880 * 881 * Forward the timer expiry so it will expire in the future. 882 * Returns the number of overruns. 883 * 884 * Can be safely called from the callback function of @timer. If 885 * called from other contexts @timer must neither be enqueued nor 886 * running the callback and the caller needs to take care of 887 * serialization. 888 * 889 * Note: This only updates the timer expiry value and does not requeue 890 * the timer. 891 */ 892 u64 hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval) 893 { 894 u64 orun = 1; 895 ktime_t delta; 896 897 delta = ktime_sub(now, hrtimer_get_expires(timer)); 898 899 if (delta < 0) 900 return 0; 901 902 if (WARN_ON(timer->state & HRTIMER_STATE_ENQUEUED)) 903 return 0; 904 905 if (interval < hrtimer_resolution) 906 interval = hrtimer_resolution; 907 908 if (unlikely(delta >= interval)) { 909 s64 incr = ktime_to_ns(interval); 910 911 orun = ktime_divns(delta, incr); 912 hrtimer_add_expires_ns(timer, incr * orun); 913 if (hrtimer_get_expires_tv64(timer) > now) 914 return orun; 915 /* 916 * This (and the ktime_add() below) is the 917 * correction for exact: 918 */ 919 orun++; 920 } 921 hrtimer_add_expires(timer, interval); 922 923 return orun; 924 } 925 EXPORT_SYMBOL_GPL(hrtimer_forward); 926 927 /* 928 * enqueue_hrtimer - internal function to (re)start a timer 929 * 930 * The timer is inserted in expiry order. Insertion into the 931 * red black tree is O(log(n)). Must hold the base lock. 932 * 933 * Returns 1 when the new timer is the leftmost timer in the tree. 934 */ 935 static int enqueue_hrtimer(struct hrtimer *timer, 936 struct hrtimer_clock_base *base, 937 enum hrtimer_mode mode) 938 { 939 debug_activate(timer, mode); 940 941 base->cpu_base->active_bases |= 1 << base->index; 942 943 timer->state = HRTIMER_STATE_ENQUEUED; 944 945 return timerqueue_add(&base->active, &timer->node); 946 } 947 948 /* 949 * __remove_hrtimer - internal function to remove a timer 950 * 951 * Caller must hold the base lock. 952 * 953 * High resolution timer mode reprograms the clock event device when the 954 * timer is the one which expires next. The caller can disable this by setting 955 * reprogram to zero. This is useful, when the context does a reprogramming 956 * anyway (e.g. timer interrupt) 957 */ 958 static void __remove_hrtimer(struct hrtimer *timer, 959 struct hrtimer_clock_base *base, 960 u8 newstate, int reprogram) 961 { 962 struct hrtimer_cpu_base *cpu_base = base->cpu_base; 963 u8 state = timer->state; 964 965 timer->state = newstate; 966 if (!(state & HRTIMER_STATE_ENQUEUED)) 967 return; 968 969 if (!timerqueue_del(&base->active, &timer->node)) 970 cpu_base->active_bases &= ~(1 << base->index); 971 972 /* 973 * Note: If reprogram is false we do not update 974 * cpu_base->next_timer. This happens when we remove the first 975 * timer on a remote cpu. No harm as we never dereference 976 * cpu_base->next_timer. So the worst thing what can happen is 977 * an superflous call to hrtimer_force_reprogram() on the 978 * remote cpu later on if the same timer gets enqueued again. 979 */ 980 if (reprogram && timer == cpu_base->next_timer) 981 hrtimer_force_reprogram(cpu_base, 1); 982 } 983 984 /* 985 * remove hrtimer, called with base lock held 986 */ 987 static inline int 988 remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool restart) 989 { 990 if (hrtimer_is_queued(timer)) { 991 u8 state = timer->state; 992 int reprogram; 993 994 /* 995 * Remove the timer and force reprogramming when high 996 * resolution mode is active and the timer is on the current 997 * CPU. If we remove a timer on another CPU, reprogramming is 998 * skipped. The interrupt event on this CPU is fired and 999 * reprogramming happens in the interrupt handler. This is a 1000 * rare case and less expensive than a smp call. 1001 */ 1002 debug_deactivate(timer); 1003 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); 1004 1005 if (!restart) 1006 state = HRTIMER_STATE_INACTIVE; 1007 1008 __remove_hrtimer(timer, base, state, reprogram); 1009 return 1; 1010 } 1011 return 0; 1012 } 1013 1014 static inline ktime_t hrtimer_update_lowres(struct hrtimer *timer, ktime_t tim, 1015 const enum hrtimer_mode mode) 1016 { 1017 #ifdef CONFIG_TIME_LOW_RES 1018 /* 1019 * CONFIG_TIME_LOW_RES indicates that the system has no way to return 1020 * granular time values. For relative timers we add hrtimer_resolution 1021 * (i.e. one jiffie) to prevent short timeouts. 1022 */ 1023 timer->is_rel = mode & HRTIMER_MODE_REL; 1024 if (timer->is_rel) 1025 tim = ktime_add_safe(tim, hrtimer_resolution); 1026 #endif 1027 return tim; 1028 } 1029 1030 static void 1031 hrtimer_update_softirq_timer(struct hrtimer_cpu_base *cpu_base, bool reprogram) 1032 { 1033 ktime_t expires; 1034 1035 /* 1036 * Find the next SOFT expiration. 1037 */ 1038 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT); 1039 1040 /* 1041 * reprogramming needs to be triggered, even if the next soft 1042 * hrtimer expires at the same time than the next hard 1043 * hrtimer. cpu_base->softirq_expires_next needs to be updated! 1044 */ 1045 if (expires == KTIME_MAX) 1046 return; 1047 1048 /* 1049 * cpu_base->*next_timer is recomputed by __hrtimer_get_next_event() 1050 * cpu_base->*expires_next is only set by hrtimer_reprogram() 1051 */ 1052 hrtimer_reprogram(cpu_base->softirq_next_timer, reprogram); 1053 } 1054 1055 static int __hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 1056 u64 delta_ns, const enum hrtimer_mode mode, 1057 struct hrtimer_clock_base *base) 1058 { 1059 struct hrtimer_clock_base *new_base; 1060 1061 /* Remove an active timer from the queue: */ 1062 remove_hrtimer(timer, base, true); 1063 1064 if (mode & HRTIMER_MODE_REL) 1065 tim = ktime_add_safe(tim, base->get_time()); 1066 1067 tim = hrtimer_update_lowres(timer, tim, mode); 1068 1069 hrtimer_set_expires_range_ns(timer, tim, delta_ns); 1070 1071 /* Switch the timer base, if necessary: */ 1072 new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); 1073 1074 return enqueue_hrtimer(timer, new_base, mode); 1075 } 1076 1077 /** 1078 * hrtimer_start_range_ns - (re)start an hrtimer 1079 * @timer: the timer to be added 1080 * @tim: expiry time 1081 * @delta_ns: "slack" range for the timer 1082 * @mode: timer mode: absolute (HRTIMER_MODE_ABS) or 1083 * relative (HRTIMER_MODE_REL), and pinned (HRTIMER_MODE_PINNED); 1084 * softirq based mode is considered for debug purpose only! 1085 */ 1086 void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, 1087 u64 delta_ns, const enum hrtimer_mode mode) 1088 { 1089 struct hrtimer_clock_base *base; 1090 unsigned long flags; 1091 1092 /* 1093 * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft 1094 * match. 1095 */ 1096 WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); 1097 1098 base = lock_hrtimer_base(timer, &flags); 1099 1100 if (__hrtimer_start_range_ns(timer, tim, delta_ns, mode, base)) 1101 hrtimer_reprogram(timer, true); 1102 1103 unlock_hrtimer_base(timer, &flags); 1104 } 1105 EXPORT_SYMBOL_GPL(hrtimer_start_range_ns); 1106 1107 /** 1108 * hrtimer_try_to_cancel - try to deactivate a timer 1109 * @timer: hrtimer to stop 1110 * 1111 * Returns: 1112 * 0 when the timer was not active 1113 * 1 when the timer was active 1114 * -1 when the timer is currently executing the callback function and 1115 * cannot be stopped 1116 */ 1117 int hrtimer_try_to_cancel(struct hrtimer *timer) 1118 { 1119 struct hrtimer_clock_base *base; 1120 unsigned long flags; 1121 int ret = -1; 1122 1123 /* 1124 * Check lockless first. If the timer is not active (neither 1125 * enqueued nor running the callback, nothing to do here. The 1126 * base lock does not serialize against a concurrent enqueue, 1127 * so we can avoid taking it. 1128 */ 1129 if (!hrtimer_active(timer)) 1130 return 0; 1131 1132 base = lock_hrtimer_base(timer, &flags); 1133 1134 if (!hrtimer_callback_running(timer)) 1135 ret = remove_hrtimer(timer, base, false); 1136 1137 unlock_hrtimer_base(timer, &flags); 1138 1139 return ret; 1140 1141 } 1142 EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); 1143 1144 /** 1145 * hrtimer_cancel - cancel a timer and wait for the handler to finish. 1146 * @timer: the timer to be cancelled 1147 * 1148 * Returns: 1149 * 0 when the timer was not active 1150 * 1 when the timer was active 1151 */ 1152 int hrtimer_cancel(struct hrtimer *timer) 1153 { 1154 for (;;) { 1155 int ret = hrtimer_try_to_cancel(timer); 1156 1157 if (ret >= 0) 1158 return ret; 1159 cpu_relax(); 1160 } 1161 } 1162 EXPORT_SYMBOL_GPL(hrtimer_cancel); 1163 1164 /** 1165 * hrtimer_get_remaining - get remaining time for the timer 1166 * @timer: the timer to read 1167 * @adjust: adjust relative timers when CONFIG_TIME_LOW_RES=y 1168 */ 1169 ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust) 1170 { 1171 unsigned long flags; 1172 ktime_t rem; 1173 1174 lock_hrtimer_base(timer, &flags); 1175 if (IS_ENABLED(CONFIG_TIME_LOW_RES) && adjust) 1176 rem = hrtimer_expires_remaining_adjusted(timer); 1177 else 1178 rem = hrtimer_expires_remaining(timer); 1179 unlock_hrtimer_base(timer, &flags); 1180 1181 return rem; 1182 } 1183 EXPORT_SYMBOL_GPL(__hrtimer_get_remaining); 1184 1185 #ifdef CONFIG_NO_HZ_COMMON 1186 /** 1187 * hrtimer_get_next_event - get the time until next expiry event 1188 * 1189 * Returns the next expiry time or KTIME_MAX if no timer is pending. 1190 */ 1191 u64 hrtimer_get_next_event(void) 1192 { 1193 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1194 u64 expires = KTIME_MAX; 1195 unsigned long flags; 1196 1197 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1198 1199 if (!__hrtimer_hres_active(cpu_base)) 1200 expires = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); 1201 1202 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1203 1204 return expires; 1205 } 1206 #endif 1207 1208 static inline int hrtimer_clockid_to_base(clockid_t clock_id) 1209 { 1210 if (likely(clock_id < MAX_CLOCKS)) { 1211 int base = hrtimer_clock_to_base_table[clock_id]; 1212 1213 if (likely(base != HRTIMER_MAX_CLOCK_BASES)) 1214 return base; 1215 } 1216 WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); 1217 return HRTIMER_BASE_MONOTONIC; 1218 } 1219 1220 static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, 1221 enum hrtimer_mode mode) 1222 { 1223 bool softtimer = !!(mode & HRTIMER_MODE_SOFT); 1224 int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; 1225 struct hrtimer_cpu_base *cpu_base; 1226 1227 memset(timer, 0, sizeof(struct hrtimer)); 1228 1229 cpu_base = raw_cpu_ptr(&hrtimer_bases); 1230 1231 /* 1232 * POSIX magic: Relative CLOCK_REALTIME timers are not affected by 1233 * clock modifications, so they needs to become CLOCK_MONOTONIC to 1234 * ensure POSIX compliance. 1235 */ 1236 if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) 1237 clock_id = CLOCK_MONOTONIC; 1238 1239 base += hrtimer_clockid_to_base(clock_id); 1240 timer->is_soft = softtimer; 1241 timer->base = &cpu_base->clock_base[base]; 1242 timerqueue_init(&timer->node); 1243 } 1244 1245 /** 1246 * hrtimer_init - initialize a timer to the given clock 1247 * @timer: the timer to be initialized 1248 * @clock_id: the clock to be used 1249 * @mode: The modes which are relevant for intitialization: 1250 * HRTIMER_MODE_ABS, HRTIMER_MODE_REL, HRTIMER_MODE_ABS_SOFT, 1251 * HRTIMER_MODE_REL_SOFT 1252 * 1253 * The PINNED variants of the above can be handed in, 1254 * but the PINNED bit is ignored as pinning happens 1255 * when the hrtimer is started 1256 */ 1257 void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, 1258 enum hrtimer_mode mode) 1259 { 1260 debug_init(timer, clock_id, mode); 1261 __hrtimer_init(timer, clock_id, mode); 1262 } 1263 EXPORT_SYMBOL_GPL(hrtimer_init); 1264 1265 /* 1266 * A timer is active, when it is enqueued into the rbtree or the 1267 * callback function is running or it's in the state of being migrated 1268 * to another cpu. 1269 * 1270 * It is important for this function to not return a false negative. 1271 */ 1272 bool hrtimer_active(const struct hrtimer *timer) 1273 { 1274 struct hrtimer_clock_base *base; 1275 unsigned int seq; 1276 1277 do { 1278 base = READ_ONCE(timer->base); 1279 seq = raw_read_seqcount_begin(&base->seq); 1280 1281 if (timer->state != HRTIMER_STATE_INACTIVE || 1282 base->running == timer) 1283 return true; 1284 1285 } while (read_seqcount_retry(&base->seq, seq) || 1286 base != READ_ONCE(timer->base)); 1287 1288 return false; 1289 } 1290 EXPORT_SYMBOL_GPL(hrtimer_active); 1291 1292 /* 1293 * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3 1294 * distinct sections: 1295 * 1296 * - queued: the timer is queued 1297 * - callback: the timer is being ran 1298 * - post: the timer is inactive or (re)queued 1299 * 1300 * On the read side we ensure we observe timer->state and cpu_base->running 1301 * from the same section, if anything changed while we looked at it, we retry. 1302 * This includes timer->base changing because sequence numbers alone are 1303 * insufficient for that. 1304 * 1305 * The sequence numbers are required because otherwise we could still observe 1306 * a false negative if the read side got smeared over multiple consequtive 1307 * __run_hrtimer() invocations. 1308 */ 1309 1310 static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, 1311 struct hrtimer_clock_base *base, 1312 struct hrtimer *timer, ktime_t *now, 1313 unsigned long flags) 1314 { 1315 enum hrtimer_restart (*fn)(struct hrtimer *); 1316 int restart; 1317 1318 lockdep_assert_held(&cpu_base->lock); 1319 1320 debug_deactivate(timer); 1321 base->running = timer; 1322 1323 /* 1324 * Separate the ->running assignment from the ->state assignment. 1325 * 1326 * As with a regular write barrier, this ensures the read side in 1327 * hrtimer_active() cannot observe base->running == NULL && 1328 * timer->state == INACTIVE. 1329 */ 1330 raw_write_seqcount_barrier(&base->seq); 1331 1332 __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); 1333 fn = timer->function; 1334 1335 /* 1336 * Clear the 'is relative' flag for the TIME_LOW_RES case. If the 1337 * timer is restarted with a period then it becomes an absolute 1338 * timer. If its not restarted it does not matter. 1339 */ 1340 if (IS_ENABLED(CONFIG_TIME_LOW_RES)) 1341 timer->is_rel = false; 1342 1343 /* 1344 * The timer is marked as running in the CPU base, so it is 1345 * protected against migration to a different CPU even if the lock 1346 * is dropped. 1347 */ 1348 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1349 trace_hrtimer_expire_entry(timer, now); 1350 restart = fn(timer); 1351 trace_hrtimer_expire_exit(timer); 1352 raw_spin_lock_irq(&cpu_base->lock); 1353 1354 /* 1355 * Note: We clear the running state after enqueue_hrtimer and 1356 * we do not reprogram the event hardware. Happens either in 1357 * hrtimer_start_range_ns() or in hrtimer_interrupt() 1358 * 1359 * Note: Because we dropped the cpu_base->lock above, 1360 * hrtimer_start_range_ns() can have popped in and enqueued the timer 1361 * for us already. 1362 */ 1363 if (restart != HRTIMER_NORESTART && 1364 !(timer->state & HRTIMER_STATE_ENQUEUED)) 1365 enqueue_hrtimer(timer, base, HRTIMER_MODE_ABS); 1366 1367 /* 1368 * Separate the ->running assignment from the ->state assignment. 1369 * 1370 * As with a regular write barrier, this ensures the read side in 1371 * hrtimer_active() cannot observe base->running.timer == NULL && 1372 * timer->state == INACTIVE. 1373 */ 1374 raw_write_seqcount_barrier(&base->seq); 1375 1376 WARN_ON_ONCE(base->running != timer); 1377 base->running = NULL; 1378 } 1379 1380 static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, 1381 unsigned long flags, unsigned int active_mask) 1382 { 1383 struct hrtimer_clock_base *base; 1384 unsigned int active = cpu_base->active_bases & active_mask; 1385 1386 for_each_active_base(base, cpu_base, active) { 1387 struct timerqueue_node *node; 1388 ktime_t basenow; 1389 1390 basenow = ktime_add(now, base->offset); 1391 1392 while ((node = timerqueue_getnext(&base->active))) { 1393 struct hrtimer *timer; 1394 1395 timer = container_of(node, struct hrtimer, node); 1396 1397 /* 1398 * The immediate goal for using the softexpires is 1399 * minimizing wakeups, not running timers at the 1400 * earliest interrupt after their soft expiration. 1401 * This allows us to avoid using a Priority Search 1402 * Tree, which can answer a stabbing querry for 1403 * overlapping intervals and instead use the simple 1404 * BST we already have. 1405 * We don't add extra wakeups by delaying timers that 1406 * are right-of a not yet expired timer, because that 1407 * timer will have to trigger a wakeup anyway. 1408 */ 1409 if (basenow < hrtimer_get_softexpires_tv64(timer)) 1410 break; 1411 1412 __run_hrtimer(cpu_base, base, timer, &basenow, flags); 1413 } 1414 } 1415 } 1416 1417 static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) 1418 { 1419 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1420 unsigned long flags; 1421 ktime_t now; 1422 1423 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1424 1425 now = hrtimer_update_base(cpu_base); 1426 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_SOFT); 1427 1428 cpu_base->softirq_activated = 0; 1429 hrtimer_update_softirq_timer(cpu_base, true); 1430 1431 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1432 } 1433 1434 #ifdef CONFIG_HIGH_RES_TIMERS 1435 1436 /* 1437 * High resolution timer interrupt 1438 * Called with interrupts disabled 1439 */ 1440 void hrtimer_interrupt(struct clock_event_device *dev) 1441 { 1442 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1443 ktime_t expires_next, now, entry_time, delta; 1444 unsigned long flags; 1445 int retries = 0; 1446 1447 BUG_ON(!cpu_base->hres_active); 1448 cpu_base->nr_events++; 1449 dev->next_event = KTIME_MAX; 1450 1451 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1452 entry_time = now = hrtimer_update_base(cpu_base); 1453 retry: 1454 cpu_base->in_hrtirq = 1; 1455 /* 1456 * We set expires_next to KTIME_MAX here with cpu_base->lock 1457 * held to prevent that a timer is enqueued in our queue via 1458 * the migration code. This does not affect enqueueing of 1459 * timers which run their callback and need to be requeued on 1460 * this CPU. 1461 */ 1462 cpu_base->expires_next = KTIME_MAX; 1463 1464 if (!ktime_before(now, cpu_base->softirq_expires_next)) { 1465 cpu_base->softirq_expires_next = KTIME_MAX; 1466 cpu_base->softirq_activated = 1; 1467 raise_softirq_irqoff(HRTIMER_SOFTIRQ); 1468 } 1469 1470 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); 1471 1472 /* Reevaluate the clock bases for the next expiry */ 1473 expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL); 1474 /* 1475 * Store the new expiry value so the migration code can verify 1476 * against it. 1477 */ 1478 cpu_base->expires_next = expires_next; 1479 cpu_base->in_hrtirq = 0; 1480 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1481 1482 /* Reprogramming necessary ? */ 1483 if (!tick_program_event(expires_next, 0)) { 1484 cpu_base->hang_detected = 0; 1485 return; 1486 } 1487 1488 /* 1489 * The next timer was already expired due to: 1490 * - tracing 1491 * - long lasting callbacks 1492 * - being scheduled away when running in a VM 1493 * 1494 * We need to prevent that we loop forever in the hrtimer 1495 * interrupt routine. We give it 3 attempts to avoid 1496 * overreacting on some spurious event. 1497 * 1498 * Acquire base lock for updating the offsets and retrieving 1499 * the current time. 1500 */ 1501 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1502 now = hrtimer_update_base(cpu_base); 1503 cpu_base->nr_retries++; 1504 if (++retries < 3) 1505 goto retry; 1506 /* 1507 * Give the system a chance to do something else than looping 1508 * here. We stored the entry time, so we know exactly how long 1509 * we spent here. We schedule the next event this amount of 1510 * time away. 1511 */ 1512 cpu_base->nr_hangs++; 1513 cpu_base->hang_detected = 1; 1514 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1515 1516 delta = ktime_sub(now, entry_time); 1517 if ((unsigned int)delta > cpu_base->max_hang_time) 1518 cpu_base->max_hang_time = (unsigned int) delta; 1519 /* 1520 * Limit it to a sensible value as we enforce a longer 1521 * delay. Give the CPU at least 100ms to catch up. 1522 */ 1523 if (delta > 100 * NSEC_PER_MSEC) 1524 expires_next = ktime_add_ns(now, 100 * NSEC_PER_MSEC); 1525 else 1526 expires_next = ktime_add(now, delta); 1527 tick_program_event(expires_next, 1); 1528 printk_once(KERN_WARNING "hrtimer: interrupt took %llu ns\n", 1529 ktime_to_ns(delta)); 1530 } 1531 1532 /* called with interrupts disabled */ 1533 static inline void __hrtimer_peek_ahead_timers(void) 1534 { 1535 struct tick_device *td; 1536 1537 if (!hrtimer_hres_active()) 1538 return; 1539 1540 td = this_cpu_ptr(&tick_cpu_device); 1541 if (td && td->evtdev) 1542 hrtimer_interrupt(td->evtdev); 1543 } 1544 1545 #else /* CONFIG_HIGH_RES_TIMERS */ 1546 1547 static inline void __hrtimer_peek_ahead_timers(void) { } 1548 1549 #endif /* !CONFIG_HIGH_RES_TIMERS */ 1550 1551 /* 1552 * Called from run_local_timers in hardirq context every jiffy 1553 */ 1554 void hrtimer_run_queues(void) 1555 { 1556 struct hrtimer_cpu_base *cpu_base = this_cpu_ptr(&hrtimer_bases); 1557 unsigned long flags; 1558 ktime_t now; 1559 1560 if (__hrtimer_hres_active(cpu_base)) 1561 return; 1562 1563 /* 1564 * This _is_ ugly: We have to check periodically, whether we 1565 * can switch to highres and / or nohz mode. The clocksource 1566 * switch happens with xtime_lock held. Notification from 1567 * there only sets the check bit in the tick_oneshot code, 1568 * otherwise we might deadlock vs. xtime_lock. 1569 */ 1570 if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) { 1571 hrtimer_switch_to_hres(); 1572 return; 1573 } 1574 1575 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1576 now = hrtimer_update_base(cpu_base); 1577 1578 if (!ktime_before(now, cpu_base->softirq_expires_next)) { 1579 cpu_base->softirq_expires_next = KTIME_MAX; 1580 cpu_base->softirq_activated = 1; 1581 raise_softirq_irqoff(HRTIMER_SOFTIRQ); 1582 } 1583 1584 __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD); 1585 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1586 } 1587 1588 /* 1589 * Sleep related functions: 1590 */ 1591 static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) 1592 { 1593 struct hrtimer_sleeper *t = 1594 container_of(timer, struct hrtimer_sleeper, timer); 1595 struct task_struct *task = t->task; 1596 1597 t->task = NULL; 1598 if (task) 1599 wake_up_process(task); 1600 1601 return HRTIMER_NORESTART; 1602 } 1603 1604 void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) 1605 { 1606 sl->timer.function = hrtimer_wakeup; 1607 sl->task = task; 1608 } 1609 EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); 1610 1611 int nanosleep_copyout(struct restart_block *restart, struct timespec64 *ts) 1612 { 1613 switch(restart->nanosleep.type) { 1614 #ifdef CONFIG_COMPAT 1615 case TT_COMPAT: 1616 if (compat_put_timespec64(ts, restart->nanosleep.compat_rmtp)) 1617 return -EFAULT; 1618 break; 1619 #endif 1620 case TT_NATIVE: 1621 if (put_timespec64(ts, restart->nanosleep.rmtp)) 1622 return -EFAULT; 1623 break; 1624 default: 1625 BUG(); 1626 } 1627 return -ERESTART_RESTARTBLOCK; 1628 } 1629 1630 static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mode) 1631 { 1632 struct restart_block *restart; 1633 1634 hrtimer_init_sleeper(t, current); 1635 1636 do { 1637 set_current_state(TASK_INTERRUPTIBLE); 1638 hrtimer_start_expires(&t->timer, mode); 1639 1640 if (likely(t->task)) 1641 freezable_schedule(); 1642 1643 hrtimer_cancel(&t->timer); 1644 mode = HRTIMER_MODE_ABS; 1645 1646 } while (t->task && !signal_pending(current)); 1647 1648 __set_current_state(TASK_RUNNING); 1649 1650 if (!t->task) 1651 return 0; 1652 1653 restart = ¤t->restart_block; 1654 if (restart->nanosleep.type != TT_NONE) { 1655 ktime_t rem = hrtimer_expires_remaining(&t->timer); 1656 struct timespec64 rmt; 1657 1658 if (rem <= 0) 1659 return 0; 1660 rmt = ktime_to_timespec64(rem); 1661 1662 return nanosleep_copyout(restart, &rmt); 1663 } 1664 return -ERESTART_RESTARTBLOCK; 1665 } 1666 1667 static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) 1668 { 1669 struct hrtimer_sleeper t; 1670 int ret; 1671 1672 hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, 1673 HRTIMER_MODE_ABS); 1674 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); 1675 1676 ret = do_nanosleep(&t, HRTIMER_MODE_ABS); 1677 destroy_hrtimer_on_stack(&t.timer); 1678 return ret; 1679 } 1680 1681 long hrtimer_nanosleep(const struct timespec64 *rqtp, 1682 const enum hrtimer_mode mode, const clockid_t clockid) 1683 { 1684 struct restart_block *restart; 1685 struct hrtimer_sleeper t; 1686 int ret = 0; 1687 u64 slack; 1688 1689 slack = current->timer_slack_ns; 1690 if (dl_task(current) || rt_task(current)) 1691 slack = 0; 1692 1693 hrtimer_init_on_stack(&t.timer, clockid, mode); 1694 hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); 1695 ret = do_nanosleep(&t, mode); 1696 if (ret != -ERESTART_RESTARTBLOCK) 1697 goto out; 1698 1699 /* Absolute timers do not update the rmtp value and restart: */ 1700 if (mode == HRTIMER_MODE_ABS) { 1701 ret = -ERESTARTNOHAND; 1702 goto out; 1703 } 1704 1705 restart = ¤t->restart_block; 1706 restart->fn = hrtimer_nanosleep_restart; 1707 restart->nanosleep.clockid = t.timer.base->clockid; 1708 restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer); 1709 out: 1710 destroy_hrtimer_on_stack(&t.timer); 1711 return ret; 1712 } 1713 1714 SYSCALL_DEFINE2(nanosleep, struct timespec __user *, rqtp, 1715 struct timespec __user *, rmtp) 1716 { 1717 struct timespec64 tu; 1718 1719 if (get_timespec64(&tu, rqtp)) 1720 return -EFAULT; 1721 1722 if (!timespec64_valid(&tu)) 1723 return -EINVAL; 1724 1725 current->restart_block.nanosleep.type = rmtp ? TT_NATIVE : TT_NONE; 1726 current->restart_block.nanosleep.rmtp = rmtp; 1727 return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC); 1728 } 1729 1730 #ifdef CONFIG_COMPAT 1731 1732 COMPAT_SYSCALL_DEFINE2(nanosleep, struct compat_timespec __user *, rqtp, 1733 struct compat_timespec __user *, rmtp) 1734 { 1735 struct timespec64 tu; 1736 1737 if (compat_get_timespec64(&tu, rqtp)) 1738 return -EFAULT; 1739 1740 if (!timespec64_valid(&tu)) 1741 return -EINVAL; 1742 1743 current->restart_block.nanosleep.type = rmtp ? TT_COMPAT : TT_NONE; 1744 current->restart_block.nanosleep.compat_rmtp = rmtp; 1745 return hrtimer_nanosleep(&tu, HRTIMER_MODE_REL, CLOCK_MONOTONIC); 1746 } 1747 #endif 1748 1749 /* 1750 * Functions related to boot-time initialization: 1751 */ 1752 int hrtimers_prepare_cpu(unsigned int cpu) 1753 { 1754 struct hrtimer_cpu_base *cpu_base = &per_cpu(hrtimer_bases, cpu); 1755 int i; 1756 1757 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1758 cpu_base->clock_base[i].cpu_base = cpu_base; 1759 timerqueue_init_head(&cpu_base->clock_base[i].active); 1760 } 1761 1762 cpu_base->cpu = cpu; 1763 cpu_base->active_bases = 0; 1764 cpu_base->hres_active = 0; 1765 cpu_base->hang_detected = 0; 1766 cpu_base->next_timer = NULL; 1767 cpu_base->softirq_next_timer = NULL; 1768 cpu_base->expires_next = KTIME_MAX; 1769 cpu_base->softirq_expires_next = KTIME_MAX; 1770 return 0; 1771 } 1772 1773 #ifdef CONFIG_HOTPLUG_CPU 1774 1775 static void migrate_hrtimer_list(struct hrtimer_clock_base *old_base, 1776 struct hrtimer_clock_base *new_base) 1777 { 1778 struct hrtimer *timer; 1779 struct timerqueue_node *node; 1780 1781 while ((node = timerqueue_getnext(&old_base->active))) { 1782 timer = container_of(node, struct hrtimer, node); 1783 BUG_ON(hrtimer_callback_running(timer)); 1784 debug_deactivate(timer); 1785 1786 /* 1787 * Mark it as ENQUEUED not INACTIVE otherwise the 1788 * timer could be seen as !active and just vanish away 1789 * under us on another CPU 1790 */ 1791 __remove_hrtimer(timer, old_base, HRTIMER_STATE_ENQUEUED, 0); 1792 timer->base = new_base; 1793 /* 1794 * Enqueue the timers on the new cpu. This does not 1795 * reprogram the event device in case the timer 1796 * expires before the earliest on this CPU, but we run 1797 * hrtimer_interrupt after we migrated everything to 1798 * sort out already expired timers and reprogram the 1799 * event device. 1800 */ 1801 enqueue_hrtimer(timer, new_base, HRTIMER_MODE_ABS); 1802 } 1803 } 1804 1805 int hrtimers_dead_cpu(unsigned int scpu) 1806 { 1807 struct hrtimer_cpu_base *old_base, *new_base; 1808 int i; 1809 1810 BUG_ON(cpu_online(scpu)); 1811 tick_cancel_sched_timer(scpu); 1812 1813 /* 1814 * this BH disable ensures that raise_softirq_irqoff() does 1815 * not wakeup ksoftirqd (and acquire the pi-lock) while 1816 * holding the cpu_base lock 1817 */ 1818 local_bh_disable(); 1819 local_irq_disable(); 1820 old_base = &per_cpu(hrtimer_bases, scpu); 1821 new_base = this_cpu_ptr(&hrtimer_bases); 1822 /* 1823 * The caller is globally serialized and nobody else 1824 * takes two locks at once, deadlock is not possible. 1825 */ 1826 raw_spin_lock(&new_base->lock); 1827 raw_spin_lock_nested(&old_base->lock, SINGLE_DEPTH_NESTING); 1828 1829 for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { 1830 migrate_hrtimer_list(&old_base->clock_base[i], 1831 &new_base->clock_base[i]); 1832 } 1833 1834 /* 1835 * The migration might have changed the first expiring softirq 1836 * timer on this CPU. Update it. 1837 */ 1838 hrtimer_update_softirq_timer(new_base, false); 1839 1840 raw_spin_unlock(&old_base->lock); 1841 raw_spin_unlock(&new_base->lock); 1842 1843 /* Check, if we got expired work to do */ 1844 __hrtimer_peek_ahead_timers(); 1845 local_irq_enable(); 1846 local_bh_enable(); 1847 return 0; 1848 } 1849 1850 #endif /* CONFIG_HOTPLUG_CPU */ 1851 1852 void __init hrtimers_init(void) 1853 { 1854 hrtimers_prepare_cpu(smp_processor_id()); 1855 open_softirq(HRTIMER_SOFTIRQ, hrtimer_run_softirq); 1856 } 1857 1858 /** 1859 * schedule_hrtimeout_range_clock - sleep until timeout 1860 * @expires: timeout value (ktime_t) 1861 * @delta: slack in expires timeout (ktime_t) 1862 * @mode: timer mode 1863 * @clock_id: timer clock to be used 1864 */ 1865 int __sched 1866 schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, 1867 const enum hrtimer_mode mode, clockid_t clock_id) 1868 { 1869 struct hrtimer_sleeper t; 1870 1871 /* 1872 * Optimize when a zero timeout value is given. It does not 1873 * matter whether this is an absolute or a relative time. 1874 */ 1875 if (expires && *expires == 0) { 1876 __set_current_state(TASK_RUNNING); 1877 return 0; 1878 } 1879 1880 /* 1881 * A NULL parameter means "infinite" 1882 */ 1883 if (!expires) { 1884 schedule(); 1885 return -EINTR; 1886 } 1887 1888 hrtimer_init_on_stack(&t.timer, clock_id, mode); 1889 hrtimer_set_expires_range_ns(&t.timer, *expires, delta); 1890 1891 hrtimer_init_sleeper(&t, current); 1892 1893 hrtimer_start_expires(&t.timer, mode); 1894 1895 if (likely(t.task)) 1896 schedule(); 1897 1898 hrtimer_cancel(&t.timer); 1899 destroy_hrtimer_on_stack(&t.timer); 1900 1901 __set_current_state(TASK_RUNNING); 1902 1903 return !t.task ? 0 : -EINTR; 1904 } 1905 1906 /** 1907 * schedule_hrtimeout_range - sleep until timeout 1908 * @expires: timeout value (ktime_t) 1909 * @delta: slack in expires timeout (ktime_t) 1910 * @mode: timer mode 1911 * 1912 * Make the current task sleep until the given expiry time has 1913 * elapsed. The routine will return immediately unless 1914 * the current task state has been set (see set_current_state()). 1915 * 1916 * The @delta argument gives the kernel the freedom to schedule the 1917 * actual wakeup to a time that is both power and performance friendly. 1918 * The kernel give the normal best effort behavior for "@expires+@delta", 1919 * but may decide to fire the timer earlier, but no earlier than @expires. 1920 * 1921 * You can set the task state as follows - 1922 * 1923 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to 1924 * pass before the routine returns unless the current task is explicitly 1925 * woken up, (e.g. by wake_up_process()). 1926 * 1927 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 1928 * delivered to the current task or the current task is explicitly woken 1929 * up. 1930 * 1931 * The current task state is guaranteed to be TASK_RUNNING when this 1932 * routine returns. 1933 * 1934 * Returns 0 when the timer has expired. If the task was woken before the 1935 * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or 1936 * by an explicit wakeup, it returns -EINTR. 1937 */ 1938 int __sched schedule_hrtimeout_range(ktime_t *expires, u64 delta, 1939 const enum hrtimer_mode mode) 1940 { 1941 return schedule_hrtimeout_range_clock(expires, delta, mode, 1942 CLOCK_MONOTONIC); 1943 } 1944 EXPORT_SYMBOL_GPL(schedule_hrtimeout_range); 1945 1946 /** 1947 * schedule_hrtimeout - sleep until timeout 1948 * @expires: timeout value (ktime_t) 1949 * @mode: timer mode 1950 * 1951 * Make the current task sleep until the given expiry time has 1952 * elapsed. The routine will return immediately unless 1953 * the current task state has been set (see set_current_state()). 1954 * 1955 * You can set the task state as follows - 1956 * 1957 * %TASK_UNINTERRUPTIBLE - at least @timeout time is guaranteed to 1958 * pass before the routine returns unless the current task is explicitly 1959 * woken up, (e.g. by wake_up_process()). 1960 * 1961 * %TASK_INTERRUPTIBLE - the routine may return early if a signal is 1962 * delivered to the current task or the current task is explicitly woken 1963 * up. 1964 * 1965 * The current task state is guaranteed to be TASK_RUNNING when this 1966 * routine returns. 1967 * 1968 * Returns 0 when the timer has expired. If the task was woken before the 1969 * timer expired by a signal (only possible in state TASK_INTERRUPTIBLE) or 1970 * by an explicit wakeup, it returns -EINTR. 1971 */ 1972 int __sched schedule_hrtimeout(ktime_t *expires, 1973 const enum hrtimer_mode mode) 1974 { 1975 return schedule_hrtimeout_range(expires, 0, mode); 1976 } 1977 EXPORT_SYMBOL_GPL(schedule_hrtimeout); 1978