1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This file contains functions which emulate a local clock-event 4 * device via a broadcast event source. 5 * 6 * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org> 7 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 8 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner 9 */ 10 #include <linux/cpu.h> 11 #include <linux/err.h> 12 #include <linux/hrtimer.h> 13 #include <linux/interrupt.h> 14 #include <linux/percpu.h> 15 #include <linux/profile.h> 16 #include <linux/sched.h> 17 #include <linux/smp.h> 18 #include <linux/module.h> 19 20 #include "tick-internal.h" 21 22 /* 23 * Broadcast support for broken x86 hardware, where the local apic 24 * timer stops in C3 state. 25 */ 26 27 static struct tick_device tick_broadcast_device; 28 static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; 29 static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; 30 static cpumask_var_t tmpmask __cpumask_var_read_mostly; 31 static int tick_broadcast_forced; 32 33 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 34 35 #ifdef CONFIG_TICK_ONESHOT 36 static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device); 37 38 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic); 39 static void tick_broadcast_clear_oneshot(int cpu); 40 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); 41 # ifdef CONFIG_HOTPLUG_CPU 42 static void tick_broadcast_oneshot_offline(unsigned int cpu); 43 # endif 44 #else 45 static inline void 46 tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { BUG(); } 47 static inline void tick_broadcast_clear_oneshot(int cpu) { } 48 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } 49 # ifdef CONFIG_HOTPLUG_CPU 50 static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { } 51 # endif 52 #endif 53 54 /* 55 * Debugging: see timer_list.c 56 */ 57 struct tick_device *tick_get_broadcast_device(void) 58 { 59 return &tick_broadcast_device; 60 } 61 62 struct cpumask *tick_get_broadcast_mask(void) 63 { 64 return tick_broadcast_mask; 65 } 66 67 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu); 68 69 const struct clock_event_device *tick_get_wakeup_device(int cpu) 70 { 71 return tick_get_oneshot_wakeup_device(cpu); 72 } 73 74 /* 75 * Start the device in periodic mode 76 */ 77 static void tick_broadcast_start_periodic(struct clock_event_device *bc) 78 { 79 if (bc) { 80 bc->next_event_forced = 0; 81 tick_setup_periodic(bc, 1); 82 } 83 } 84 85 /* 86 * Check, if the device can be utilized as broadcast device: 87 */ 88 static bool tick_check_broadcast_device(struct clock_event_device *curdev, 89 struct clock_event_device *newdev) 90 { 91 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 92 (newdev->features & CLOCK_EVT_FEAT_PERCPU) || 93 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 94 return false; 95 96 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && 97 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 98 return false; 99 100 return !curdev || newdev->rating > curdev->rating; 101 } 102 103 #ifdef CONFIG_TICK_ONESHOT 104 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) 105 { 106 return per_cpu(tick_oneshot_wakeup_device, cpu); 107 } 108 109 static void tick_oneshot_wakeup_handler(struct clock_event_device *wd) 110 { 111 /* 112 * If we woke up early and the tick was reprogrammed in the 113 * meantime then this may be spurious but harmless. 114 */ 115 tick_receive_broadcast(); 116 } 117 118 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, 119 int cpu) 120 { 121 struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu); 122 123 if (!newdev) 124 goto set_device; 125 126 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 127 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 128 return false; 129 130 if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) || 131 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 132 return false; 133 134 if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) 135 return false; 136 137 if (curdev && newdev->rating <= curdev->rating) 138 return false; 139 140 if (!try_module_get(newdev->owner)) 141 return false; 142 143 newdev->event_handler = tick_oneshot_wakeup_handler; 144 set_device: 145 clockevents_exchange_device(curdev, newdev); 146 per_cpu(tick_oneshot_wakeup_device, cpu) = newdev; 147 return true; 148 } 149 #else 150 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) 151 { 152 return NULL; 153 } 154 155 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, 156 int cpu) 157 { 158 return false; 159 } 160 #endif 161 162 /* 163 * Conditionally install/replace broadcast device 164 */ 165 void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) 166 { 167 struct clock_event_device *cur = tick_broadcast_device.evtdev; 168 169 if (tick_set_oneshot_wakeup_device(dev, cpu)) 170 return; 171 172 if (!tick_check_broadcast_device(cur, dev)) 173 return; 174 175 if (!try_module_get(dev->owner)) 176 return; 177 178 clockevents_exchange_device(cur, dev); 179 if (cur) 180 cur->event_handler = clockevents_handle_noop; 181 tick_broadcast_device.evtdev = dev; 182 if (!cpumask_empty(tick_broadcast_mask)) 183 tick_broadcast_start_periodic(dev); 184 185 if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) 186 return; 187 188 /* 189 * If the system already runs in oneshot mode, switch the newly 190 * registered broadcast device to oneshot mode explicitly. 191 */ 192 if (tick_broadcast_oneshot_active()) { 193 tick_broadcast_switch_to_oneshot(); 194 return; 195 } 196 197 /* 198 * Inform all cpus about this. We might be in a situation 199 * where we did not switch to oneshot mode because the per cpu 200 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack 201 * of a oneshot capable broadcast device. Without that 202 * notification the systems stays stuck in periodic mode 203 * forever. 204 */ 205 tick_clock_notify(); 206 } 207 208 /* 209 * Check, if the device is the broadcast device 210 */ 211 int tick_is_broadcast_device(struct clock_event_device *dev) 212 { 213 return (dev && tick_broadcast_device.evtdev == dev); 214 } 215 216 int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) 217 { 218 int ret = -ENODEV; 219 220 if (tick_is_broadcast_device(dev)) { 221 raw_spin_lock(&tick_broadcast_lock); 222 ret = __clockevents_update_freq(dev, freq); 223 raw_spin_unlock(&tick_broadcast_lock); 224 } 225 return ret; 226 } 227 228 229 static void err_broadcast(const struct cpumask *mask) 230 { 231 pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); 232 } 233 234 static void tick_device_setup_broadcast_func(struct clock_event_device *dev) 235 { 236 if (!dev->broadcast) 237 dev->broadcast = tick_broadcast; 238 if (!dev->broadcast) { 239 pr_warn_once("%s depends on broadcast, but no broadcast function available\n", 240 dev->name); 241 dev->broadcast = err_broadcast; 242 } 243 } 244 245 /* 246 * Check, if the device is dysfunctional and a placeholder, which 247 * needs to be handled by the broadcast device. 248 */ 249 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) 250 { 251 struct clock_event_device *bc = tick_broadcast_device.evtdev; 252 unsigned long flags; 253 int ret = 0; 254 255 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 256 257 /* 258 * Devices might be registered with both periodic and oneshot 259 * mode disabled. This signals, that the device needs to be 260 * operated from the broadcast device and is a placeholder for 261 * the cpu local device. 262 */ 263 if (!tick_device_is_functional(dev)) { 264 dev->event_handler = tick_handle_periodic; 265 tick_device_setup_broadcast_func(dev); 266 cpumask_set_cpu(cpu, tick_broadcast_mask); 267 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 268 tick_broadcast_start_periodic(bc); 269 else 270 tick_broadcast_setup_oneshot(bc, false); 271 ret = 1; 272 } else { 273 /* 274 * Clear the broadcast bit for this cpu if the 275 * device is not power state affected. 276 */ 277 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 278 cpumask_clear_cpu(cpu, tick_broadcast_mask); 279 else 280 tick_device_setup_broadcast_func(dev); 281 282 /* 283 * Clear the broadcast bit if the CPU is not in 284 * periodic broadcast on state. 285 */ 286 if (!cpumask_test_cpu(cpu, tick_broadcast_on)) 287 cpumask_clear_cpu(cpu, tick_broadcast_mask); 288 289 switch (tick_broadcast_device.mode) { 290 case TICKDEV_MODE_ONESHOT: 291 /* 292 * If the system is in oneshot mode we can 293 * unconditionally clear the oneshot mask bit, 294 * because the CPU is running and therefore 295 * not in an idle state which causes the power 296 * state affected device to stop. Let the 297 * caller initialize the device. 298 */ 299 tick_broadcast_clear_oneshot(cpu); 300 ret = 0; 301 break; 302 303 case TICKDEV_MODE_PERIODIC: 304 /* 305 * If the system is in periodic mode, check 306 * whether the broadcast device can be 307 * switched off now. 308 */ 309 if (cpumask_empty(tick_broadcast_mask) && bc) 310 clockevents_shutdown(bc); 311 /* 312 * If we kept the cpu in the broadcast mask, 313 * tell the caller to leave the per cpu device 314 * in shutdown state. The periodic interrupt 315 * is delivered by the broadcast device, if 316 * the broadcast device exists and is not 317 * hrtimer based. 318 */ 319 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 320 ret = cpumask_test_cpu(cpu, tick_broadcast_mask); 321 break; 322 default: 323 break; 324 } 325 } 326 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 327 return ret; 328 } 329 330 int tick_receive_broadcast(void) 331 { 332 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 333 struct clock_event_device *evt = td->evtdev; 334 335 if (!evt) 336 return -ENODEV; 337 338 if (!evt->event_handler) 339 return -EINVAL; 340 341 evt->event_handler(evt); 342 return 0; 343 } 344 345 /* 346 * Broadcast the event to the cpus, which are set in the mask (mangled). 347 */ 348 static bool tick_do_broadcast(struct cpumask *mask) 349 { 350 int cpu = smp_processor_id(); 351 struct tick_device *td; 352 bool local = false; 353 354 /* 355 * Check, if the current cpu is in the mask 356 */ 357 if (cpumask_test_cpu(cpu, mask)) { 358 struct clock_event_device *bc = tick_broadcast_device.evtdev; 359 360 cpumask_clear_cpu(cpu, mask); 361 /* 362 * We only run the local handler, if the broadcast 363 * device is not hrtimer based. Otherwise we run into 364 * a hrtimer recursion. 365 * 366 * local timer_interrupt() 367 * local_handler() 368 * expire_hrtimers() 369 * bc_handler() 370 * local_handler() 371 * expire_hrtimers() 372 */ 373 local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); 374 } 375 376 if (!cpumask_empty(mask)) { 377 /* 378 * It might be necessary to actually check whether the devices 379 * have different broadcast functions. For now, just use the 380 * one of the first device. This works as long as we have this 381 * misfeature only on x86 (lapic) 382 */ 383 td = &per_cpu(tick_cpu_device, cpumask_first(mask)); 384 td->evtdev->broadcast(mask); 385 } 386 return local; 387 } 388 389 /* 390 * Periodic broadcast: 391 * - invoke the broadcast handlers 392 */ 393 static bool tick_do_periodic_broadcast(void) 394 { 395 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); 396 return tick_do_broadcast(tmpmask); 397 } 398 399 /* 400 * Event handler for periodic broadcast ticks 401 */ 402 static void tick_handle_periodic_broadcast(struct clock_event_device *dev) 403 { 404 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 405 bool bc_local; 406 407 raw_spin_lock(&tick_broadcast_lock); 408 tick_broadcast_device.evtdev->next_event_forced = 0; 409 410 /* Handle spurious interrupts gracefully */ 411 if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { 412 raw_spin_unlock(&tick_broadcast_lock); 413 return; 414 } 415 416 bc_local = tick_do_periodic_broadcast(); 417 418 if (clockevent_state_oneshot(dev)) { 419 ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC); 420 421 clockevents_program_event(dev, next, true); 422 } 423 raw_spin_unlock(&tick_broadcast_lock); 424 425 /* 426 * We run the handler of the local cpu after dropping 427 * tick_broadcast_lock because the handler might deadlock when 428 * trying to switch to oneshot mode. 429 */ 430 if (bc_local) 431 td->evtdev->event_handler(td->evtdev); 432 } 433 434 /** 435 * tick_broadcast_control - Enable/disable or force broadcast mode 436 * @mode: The selected broadcast mode 437 * 438 * Called when the system enters a state where affected tick devices 439 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. 440 */ 441 void tick_broadcast_control(enum tick_broadcast_mode mode) 442 { 443 struct clock_event_device *bc, *dev; 444 struct tick_device *td; 445 int cpu, bc_stopped; 446 unsigned long flags; 447 448 /* Protects also the local clockevent device. */ 449 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 450 td = this_cpu_ptr(&tick_cpu_device); 451 dev = td->evtdev; 452 453 /* 454 * Is the device not affected by the powerstate ? 455 */ 456 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) 457 goto out; 458 459 if (!tick_device_is_functional(dev)) 460 goto out; 461 462 cpu = smp_processor_id(); 463 bc = tick_broadcast_device.evtdev; 464 bc_stopped = cpumask_empty(tick_broadcast_mask); 465 466 switch (mode) { 467 case TICK_BROADCAST_FORCE: 468 tick_broadcast_forced = 1; 469 fallthrough; 470 case TICK_BROADCAST_ON: 471 cpumask_set_cpu(cpu, tick_broadcast_on); 472 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { 473 /* 474 * Only shutdown the cpu local device, if: 475 * 476 * - the broadcast device exists 477 * - the broadcast device is not a hrtimer based one 478 * - the broadcast device is in periodic mode to 479 * avoid a hiccup during switch to oneshot mode 480 */ 481 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && 482 tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 483 clockevents_shutdown(dev); 484 } 485 break; 486 487 case TICK_BROADCAST_OFF: 488 if (tick_broadcast_forced) 489 break; 490 cpumask_clear_cpu(cpu, tick_broadcast_on); 491 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { 492 if (tick_broadcast_device.mode == 493 TICKDEV_MODE_PERIODIC) 494 tick_setup_periodic(dev, 0); 495 } 496 break; 497 } 498 499 if (bc) { 500 if (cpumask_empty(tick_broadcast_mask)) { 501 if (!bc_stopped) 502 clockevents_shutdown(bc); 503 } else if (bc_stopped) { 504 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 505 tick_broadcast_start_periodic(bc); 506 else 507 tick_broadcast_setup_oneshot(bc, false); 508 } 509 } 510 out: 511 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 512 } 513 EXPORT_SYMBOL_GPL(tick_broadcast_control); 514 515 /* 516 * Set the periodic handler depending on broadcast on/off 517 */ 518 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) 519 { 520 if (!broadcast) 521 dev->event_handler = tick_handle_periodic; 522 else 523 dev->event_handler = tick_handle_periodic_broadcast; 524 } 525 526 #ifdef CONFIG_HOTPLUG_CPU 527 static void tick_shutdown_broadcast(void) 528 { 529 struct clock_event_device *bc = tick_broadcast_device.evtdev; 530 531 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 532 if (bc && cpumask_empty(tick_broadcast_mask)) 533 clockevents_shutdown(bc); 534 } 535 } 536 537 /* 538 * Remove a CPU from broadcasting 539 */ 540 void tick_broadcast_offline(unsigned int cpu) 541 { 542 raw_spin_lock(&tick_broadcast_lock); 543 cpumask_clear_cpu(cpu, tick_broadcast_mask); 544 cpumask_clear_cpu(cpu, tick_broadcast_on); 545 tick_broadcast_oneshot_offline(cpu); 546 tick_shutdown_broadcast(); 547 raw_spin_unlock(&tick_broadcast_lock); 548 } 549 550 #endif 551 552 void tick_suspend_broadcast(void) 553 { 554 struct clock_event_device *bc; 555 unsigned long flags; 556 557 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 558 559 bc = tick_broadcast_device.evtdev; 560 if (bc) 561 clockevents_shutdown(bc); 562 563 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 564 } 565 566 /* 567 * This is called from tick_resume_local() on a resuming CPU. That's 568 * called from the core resume function, tick_unfreeze() and the magic XEN 569 * resume hackery. 570 * 571 * In none of these cases the broadcast device mode can change and the 572 * bit of the resuming CPU in the broadcast mask is safe as well. 573 */ 574 bool tick_resume_check_broadcast(void) 575 { 576 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) 577 return false; 578 else 579 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); 580 } 581 582 void tick_resume_broadcast(void) 583 { 584 struct clock_event_device *bc; 585 unsigned long flags; 586 587 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 588 589 bc = tick_broadcast_device.evtdev; 590 591 if (bc) { 592 clockevents_tick_resume(bc); 593 594 switch (tick_broadcast_device.mode) { 595 case TICKDEV_MODE_PERIODIC: 596 if (!cpumask_empty(tick_broadcast_mask)) 597 tick_broadcast_start_periodic(bc); 598 break; 599 case TICKDEV_MODE_ONESHOT: 600 if (!cpumask_empty(tick_broadcast_mask)) 601 tick_resume_broadcast_oneshot(bc); 602 break; 603 } 604 } 605 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 606 } 607 608 #ifdef CONFIG_TICK_ONESHOT 609 610 static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; 611 static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; 612 static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; 613 614 /* 615 * Exposed for debugging: see timer_list.c 616 */ 617 struct cpumask *tick_get_broadcast_oneshot_mask(void) 618 { 619 return tick_broadcast_oneshot_mask; 620 } 621 622 /* 623 * Called before going idle with interrupts disabled. Checks whether a 624 * broadcast event from the other core is about to happen. We detected 625 * that in tick_broadcast_oneshot_control(). The callsite can use this 626 * to avoid a deep idle transition as we are about to get the 627 * broadcast IPI right away. 628 */ 629 noinstr int tick_check_broadcast_expired(void) 630 { 631 #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H 632 return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask)); 633 #else 634 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); 635 #endif 636 } 637 638 /* 639 * Set broadcast interrupt affinity 640 */ 641 static void tick_broadcast_set_affinity(struct clock_event_device *bc, 642 const struct cpumask *cpumask) 643 { 644 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) 645 return; 646 647 if (cpumask_equal(bc->cpumask, cpumask)) 648 return; 649 650 bc->cpumask = cpumask; 651 irq_set_affinity(bc->irq, bc->cpumask); 652 } 653 654 static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu, 655 ktime_t expires) 656 { 657 if (!clockevent_state_oneshot(bc)) 658 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 659 660 clockevents_program_event(bc, expires, 1); 661 tick_broadcast_set_affinity(bc, cpumask_of(cpu)); 662 } 663 664 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) 665 { 666 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 667 } 668 669 /* 670 * Called from irq_enter() when idle was interrupted to reenable the 671 * per cpu device. 672 */ 673 void tick_check_oneshot_broadcast_this_cpu(void) 674 { 675 if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { 676 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 677 678 /* 679 * We might be in the middle of switching over from 680 * periodic to oneshot. If the CPU has not yet 681 * switched over, leave the device alone. 682 */ 683 if (td->mode == TICKDEV_MODE_ONESHOT) { 684 clockevents_switch_state(td->evtdev, 685 CLOCK_EVT_STATE_ONESHOT); 686 } 687 } 688 } 689 690 /* 691 * Handle oneshot mode broadcasting 692 */ 693 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) 694 { 695 struct tick_device *td; 696 ktime_t now, next_event; 697 int cpu, next_cpu = 0; 698 bool bc_local; 699 700 raw_spin_lock(&tick_broadcast_lock); 701 dev->next_event = KTIME_MAX; 702 tick_broadcast_device.evtdev->next_event_forced = 0; 703 next_event = KTIME_MAX; 704 cpumask_clear(tmpmask); 705 now = ktime_get(); 706 /* Find all expired events */ 707 for_each_cpu(cpu, tick_broadcast_oneshot_mask) { 708 /* 709 * Required for !SMP because for_each_cpu() reports 710 * unconditionally CPU0 as set on UP kernels. 711 */ 712 if (!IS_ENABLED(CONFIG_SMP) && 713 cpumask_empty(tick_broadcast_oneshot_mask)) 714 break; 715 716 td = &per_cpu(tick_cpu_device, cpu); 717 if (td->evtdev->next_event <= now) { 718 cpumask_set_cpu(cpu, tmpmask); 719 /* 720 * Mark the remote cpu in the pending mask, so 721 * it can avoid reprogramming the cpu local 722 * timer in tick_broadcast_oneshot_control(). 723 */ 724 cpumask_set_cpu(cpu, tick_broadcast_pending_mask); 725 } else if (td->evtdev->next_event < next_event) { 726 next_event = td->evtdev->next_event; 727 next_cpu = cpu; 728 } 729 } 730 731 /* 732 * Remove the current cpu from the pending mask. The event is 733 * delivered immediately in tick_do_broadcast() ! 734 */ 735 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); 736 737 /* Take care of enforced broadcast requests */ 738 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); 739 cpumask_clear(tick_broadcast_force_mask); 740 741 /* 742 * Sanity check. Catch the case where we try to broadcast to 743 * offline cpus. 744 */ 745 if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) 746 cpumask_and(tmpmask, tmpmask, cpu_online_mask); 747 748 /* 749 * Wakeup the cpus which have an expired event. 750 */ 751 bc_local = tick_do_broadcast(tmpmask); 752 753 /* 754 * Two reasons for reprogram: 755 * 756 * - The global event did not expire any CPU local 757 * events. This happens in dyntick mode, as the maximum PIT 758 * delta is quite small. 759 * 760 * - There are pending events on sleeping CPUs which were not 761 * in the event mask 762 */ 763 if (next_event != KTIME_MAX) 764 tick_broadcast_set_event(dev, next_cpu, next_event); 765 766 raw_spin_unlock(&tick_broadcast_lock); 767 768 if (bc_local) { 769 td = this_cpu_ptr(&tick_cpu_device); 770 td->evtdev->event_handler(td->evtdev); 771 } 772 } 773 774 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) 775 { 776 if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 777 return 0; 778 if (bc->next_event == KTIME_MAX) 779 return 0; 780 return bc->bound_on == cpu ? -EBUSY : 0; 781 } 782 783 static void broadcast_shutdown_local(struct clock_event_device *bc, 784 struct clock_event_device *dev) 785 { 786 /* 787 * For hrtimer based broadcasting we cannot shutdown the cpu 788 * local device if our own event is the first one to expire or 789 * if we own the broadcast timer. 790 */ 791 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { 792 if (broadcast_needs_cpu(bc, smp_processor_id())) 793 return; 794 if (dev->next_event < bc->next_event) 795 return; 796 } 797 clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 798 } 799 800 static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state, 801 struct tick_device *td, 802 int cpu) 803 { 804 struct clock_event_device *bc, *dev = td->evtdev; 805 int ret = 0; 806 ktime_t now; 807 808 raw_spin_lock(&tick_broadcast_lock); 809 bc = tick_broadcast_device.evtdev; 810 811 if (state == TICK_BROADCAST_ENTER) { 812 /* 813 * If the current CPU owns the hrtimer broadcast 814 * mechanism, it cannot go deep idle and we do not add 815 * the CPU to the broadcast mask. We don't have to go 816 * through the EXIT path as the local timer is not 817 * shutdown. 818 */ 819 ret = broadcast_needs_cpu(bc, cpu); 820 if (ret) 821 goto out; 822 823 /* 824 * If the broadcast device is in periodic mode, we 825 * return. 826 */ 827 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 828 /* If it is a hrtimer based broadcast, return busy */ 829 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) 830 ret = -EBUSY; 831 goto out; 832 } 833 834 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { 835 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); 836 837 /* Conditionally shut down the local timer. */ 838 broadcast_shutdown_local(bc, dev); 839 840 /* 841 * We only reprogram the broadcast timer if we 842 * did not mark ourself in the force mask and 843 * if the cpu local event is earlier than the 844 * broadcast event. If the current CPU is in 845 * the force mask, then we are going to be 846 * woken by the IPI right away; we return 847 * busy, so the CPU does not try to go deep 848 * idle. 849 */ 850 if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { 851 ret = -EBUSY; 852 } else if (dev->next_event < bc->next_event) { 853 tick_broadcast_set_event(bc, cpu, dev->next_event); 854 /* 855 * In case of hrtimer broadcasts the 856 * programming might have moved the 857 * timer to this cpu. If yes, remove 858 * us from the broadcast mask and 859 * return busy. 860 */ 861 ret = broadcast_needs_cpu(bc, cpu); 862 if (ret) { 863 cpumask_clear_cpu(cpu, 864 tick_broadcast_oneshot_mask); 865 } 866 } 867 } 868 } else { 869 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { 870 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 871 /* 872 * The cpu which was handling the broadcast 873 * timer marked this cpu in the broadcast 874 * pending mask and fired the broadcast 875 * IPI. So we are going to handle the expired 876 * event anyway via the broadcast IPI 877 * handler. No need to reprogram the timer 878 * with an already expired event. 879 */ 880 if (cpumask_test_and_clear_cpu(cpu, 881 tick_broadcast_pending_mask)) 882 goto out; 883 884 /* 885 * Bail out if there is no next event. 886 */ 887 if (dev->next_event == KTIME_MAX) 888 goto out; 889 /* 890 * If the pending bit is not set, then we are 891 * either the CPU handling the broadcast 892 * interrupt or we got woken by something else. 893 * 894 * We are no longer in the broadcast mask, so 895 * if the cpu local expiry time is already 896 * reached, we would reprogram the cpu local 897 * timer with an already expired event. 898 * 899 * This can lead to a ping-pong when we return 900 * to idle and therefore rearm the broadcast 901 * timer before the cpu local timer was able 902 * to fire. This happens because the forced 903 * reprogramming makes sure that the event 904 * will happen in the future and depending on 905 * the min_delta setting this might be far 906 * enough out that the ping-pong starts. 907 * 908 * If the cpu local next_event has expired 909 * then we know that the broadcast timer 910 * next_event has expired as well and 911 * broadcast is about to be handled. So we 912 * avoid reprogramming and enforce that the 913 * broadcast handler, which did not run yet, 914 * will invoke the cpu local handler. 915 * 916 * We cannot call the handler directly from 917 * here, because we might be in a NOHZ phase 918 * and we did not go through the irq_enter() 919 * nohz fixups. 920 */ 921 now = ktime_get(); 922 if (dev->next_event <= now) { 923 cpumask_set_cpu(cpu, tick_broadcast_force_mask); 924 goto out; 925 } 926 /* 927 * We got woken by something else. Reprogram 928 * the cpu local timer device. 929 */ 930 tick_program_event(dev->next_event, 1); 931 } 932 } 933 out: 934 raw_spin_unlock(&tick_broadcast_lock); 935 return ret; 936 } 937 938 static int tick_oneshot_wakeup_control(enum tick_broadcast_state state, 939 struct tick_device *td, 940 int cpu) 941 { 942 struct clock_event_device *dev, *wd; 943 944 dev = td->evtdev; 945 if (td->mode != TICKDEV_MODE_ONESHOT) 946 return -EINVAL; 947 948 wd = tick_get_oneshot_wakeup_device(cpu); 949 if (!wd) 950 return -ENODEV; 951 952 switch (state) { 953 case TICK_BROADCAST_ENTER: 954 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); 955 clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT); 956 clockevents_program_event(wd, dev->next_event, 1); 957 break; 958 case TICK_BROADCAST_EXIT: 959 /* We may have transitioned to oneshot mode while idle */ 960 if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT) 961 return -ENODEV; 962 } 963 964 return 0; 965 } 966 967 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 968 { 969 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 970 int cpu = smp_processor_id(); 971 972 if (!tick_oneshot_wakeup_control(state, td, cpu)) 973 return 0; 974 975 if (tick_broadcast_device.evtdev) 976 return ___tick_broadcast_oneshot_control(state, td, cpu); 977 978 /* 979 * If there is no broadcast or wakeup device, tell the caller not 980 * to go into deep idle. 981 */ 982 return -EBUSY; 983 } 984 985 /* 986 * Reset the one shot broadcast for a cpu 987 * 988 * Called with tick_broadcast_lock held 989 */ 990 static void tick_broadcast_clear_oneshot(int cpu) 991 { 992 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 993 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 994 } 995 996 static void tick_broadcast_init_next_event(struct cpumask *mask, 997 ktime_t expires) 998 { 999 struct tick_device *td; 1000 int cpu; 1001 1002 for_each_cpu(cpu, mask) { 1003 td = &per_cpu(tick_cpu_device, cpu); 1004 if (td->evtdev) 1005 td->evtdev->next_event = expires; 1006 } 1007 } 1008 1009 static inline ktime_t tick_get_next_period(void) 1010 { 1011 ktime_t next; 1012 1013 /* 1014 * Protect against concurrent updates (store /load tearing on 1015 * 32bit). It does not matter if the time is already in the 1016 * past. The broadcast device which is about to be programmed will 1017 * fire in any case. 1018 */ 1019 raw_spin_lock(&jiffies_lock); 1020 next = tick_next_period; 1021 raw_spin_unlock(&jiffies_lock); 1022 return next; 1023 } 1024 1025 /** 1026 * tick_broadcast_setup_oneshot - setup the broadcast device 1027 * @bc: the broadcast device 1028 * @from_periodic: true if called from periodic mode 1029 */ 1030 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, 1031 bool from_periodic) 1032 { 1033 int cpu = smp_processor_id(); 1034 ktime_t nexttick = 0; 1035 1036 if (!bc) 1037 return; 1038 1039 /* 1040 * When the broadcast device was switched to oneshot by the first 1041 * CPU handling the NOHZ change, the other CPUs will reach this 1042 * code via hrtimer_run_queues() -> tick_check_oneshot_change() 1043 * too. Set up the broadcast device only once! 1044 */ 1045 if (bc->event_handler == tick_handle_oneshot_broadcast) { 1046 /* 1047 * The CPU which switched from periodic to oneshot mode 1048 * set the broadcast oneshot bit for all other CPUs which 1049 * are in the general (periodic) broadcast mask to ensure 1050 * that CPUs which wait for the periodic broadcast are 1051 * woken up. 1052 * 1053 * Clear the bit for the local CPU as the set bit would 1054 * prevent the first tick_broadcast_enter() after this CPU 1055 * switched to oneshot state to program the broadcast 1056 * device. 1057 * 1058 * This code can also be reached via tick_broadcast_control(), 1059 * but this cannot avoid the tick_broadcast_clear_oneshot() 1060 * as that would break the periodic to oneshot transition of 1061 * secondary CPUs. But that's harmless as the below only 1062 * clears already cleared bits. 1063 */ 1064 tick_broadcast_clear_oneshot(cpu); 1065 return; 1066 } 1067 1068 1069 bc->event_handler = tick_handle_oneshot_broadcast; 1070 bc->next_event_forced = 0; 1071 bc->next_event = KTIME_MAX; 1072 1073 /* 1074 * When the tick mode is switched from periodic to oneshot it must 1075 * be ensured that CPUs which are waiting for periodic broadcast 1076 * get their wake-up at the next tick. This is achieved by ORing 1077 * tick_broadcast_mask into tick_broadcast_oneshot_mask. 1078 * 1079 * For other callers, e.g. broadcast device replacement, 1080 * tick_broadcast_oneshot_mask must not be touched as this would 1081 * set bits for CPUs which are already NOHZ, but not idle. Their 1082 * next tick_broadcast_enter() would observe the bit set and fail 1083 * to update the expiry time and the broadcast event device. 1084 */ 1085 if (from_periodic) { 1086 cpumask_copy(tmpmask, tick_broadcast_mask); 1087 /* Remove the local CPU as it is obviously not idle */ 1088 cpumask_clear_cpu(cpu, tmpmask); 1089 cpumask_or(tick_broadcast_oneshot_mask, tick_broadcast_oneshot_mask, tmpmask); 1090 1091 /* 1092 * Ensure that the oneshot broadcast handler will wake the 1093 * CPUs which are still waiting for periodic broadcast. 1094 */ 1095 nexttick = tick_get_next_period(); 1096 tick_broadcast_init_next_event(tmpmask, nexttick); 1097 1098 /* 1099 * If the underlying broadcast clock event device is 1100 * already in oneshot state, then there is nothing to do. 1101 * The device was already armed for the next tick 1102 * in tick_handle_broadcast_periodic() 1103 */ 1104 if (clockevent_state_oneshot(bc)) 1105 return; 1106 } 1107 1108 /* 1109 * When switching from periodic to oneshot mode arm the broadcast 1110 * device for the next tick. 1111 * 1112 * If the broadcast device has been replaced in oneshot mode and 1113 * the oneshot broadcast mask is not empty, then arm it to expire 1114 * immediately in order to reevaluate the next expiring timer. 1115 * @nexttick is 0 and therefore in the past which will cause the 1116 * clockevent code to force an event. 1117 * 1118 * For both cases the programming can be avoided when the oneshot 1119 * broadcast mask is empty. 1120 * 1121 * tick_broadcast_set_event() implicitly switches the broadcast 1122 * device to oneshot state. 1123 */ 1124 if (!cpumask_empty(tick_broadcast_oneshot_mask)) 1125 tick_broadcast_set_event(bc, cpu, nexttick); 1126 } 1127 1128 /* 1129 * Select oneshot operating mode for the broadcast device 1130 */ 1131 void tick_broadcast_switch_to_oneshot(void) 1132 { 1133 struct clock_event_device *bc; 1134 enum tick_device_mode oldmode; 1135 unsigned long flags; 1136 1137 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 1138 1139 oldmode = tick_broadcast_device.mode; 1140 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; 1141 bc = tick_broadcast_device.evtdev; 1142 if (bc) 1143 tick_broadcast_setup_oneshot(bc, oldmode == TICKDEV_MODE_PERIODIC); 1144 1145 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 1146 } 1147 1148 #ifdef CONFIG_HOTPLUG_CPU 1149 void hotplug_cpu__broadcast_tick_pull(int deadcpu) 1150 { 1151 struct clock_event_device *bc; 1152 unsigned long flags; 1153 1154 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 1155 bc = tick_broadcast_device.evtdev; 1156 1157 if (bc && broadcast_needs_cpu(bc, deadcpu)) { 1158 /* 1159 * If the broadcast force bit of the current CPU is set, 1160 * then the current CPU has not yet reprogrammed the local 1161 * timer device to avoid a ping-pong race. See 1162 * ___tick_broadcast_oneshot_control(). 1163 * 1164 * If the broadcast device is hrtimer based then 1165 * programming the broadcast event below does not have any 1166 * effect because the local clockevent device is not 1167 * running and not programmed because the broadcast event 1168 * is not earlier than the pending event of the local clock 1169 * event device. As a consequence all CPUs waiting for a 1170 * broadcast event are stuck forever. 1171 * 1172 * Detect this condition and reprogram the cpu local timer 1173 * device to avoid the starvation. 1174 */ 1175 if (tick_check_broadcast_expired()) { 1176 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 1177 1178 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); 1179 tick_program_event(td->evtdev->next_event, 1); 1180 } 1181 1182 /* This moves the broadcast assignment to this CPU: */ 1183 bc->next_event_forced = 0; 1184 clockevents_program_event(bc, bc->next_event, 1); 1185 } 1186 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 1187 } 1188 1189 /* 1190 * Remove a dying CPU from broadcasting 1191 */ 1192 static void tick_broadcast_oneshot_offline(unsigned int cpu) 1193 { 1194 if (tick_get_oneshot_wakeup_device(cpu)) 1195 tick_set_oneshot_wakeup_device(NULL, cpu); 1196 1197 /* 1198 * Clear the broadcast masks for the dead cpu, but do not stop 1199 * the broadcast device! 1200 */ 1201 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 1202 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 1203 cpumask_clear_cpu(cpu, tick_broadcast_force_mask); 1204 } 1205 #endif 1206 1207 /* 1208 * Check, whether the broadcast device is in one shot mode 1209 */ 1210 int tick_broadcast_oneshot_active(void) 1211 { 1212 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; 1213 } 1214 1215 /* 1216 * Check whether the broadcast device supports oneshot. 1217 */ 1218 bool tick_broadcast_oneshot_available(void) 1219 { 1220 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1221 1222 return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; 1223 } 1224 1225 #else 1226 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 1227 { 1228 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1229 1230 if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) 1231 return -EBUSY; 1232 1233 return 0; 1234 } 1235 #endif 1236 1237 void __init tick_broadcast_init(void) 1238 { 1239 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); 1240 zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); 1241 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); 1242 #ifdef CONFIG_TICK_ONESHOT 1243 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); 1244 zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); 1245 zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); 1246 #endif 1247 } 1248