1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * This file contains functions which emulate a local clock-event 4 * device via a broadcast event source. 5 * 6 * Copyright(C) 2005-2006, Linutronix GmbH, Thomas Gleixner <tglx@kernel.org> 7 * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar 8 * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner 9 */ 10 #include <linux/cpu.h> 11 #include <linux/err.h> 12 #include <linux/hrtimer.h> 13 #include <linux/interrupt.h> 14 #include <linux/percpu.h> 15 #include <linux/profile.h> 16 #include <linux/sched.h> 17 #include <linux/smp.h> 18 #include <linux/module.h> 19 20 #include "tick-internal.h" 21 22 /* 23 * Broadcast support for broken x86 hardware, where the local apic 24 * timer stops in C3 state. 25 */ 26 27 static struct tick_device tick_broadcast_device; 28 static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; 29 static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; 30 static cpumask_var_t tmpmask __cpumask_var_read_mostly; 31 static int tick_broadcast_forced; 32 33 static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); 34 35 #ifdef CONFIG_TICK_ONESHOT 36 static DEFINE_PER_CPU(struct clock_event_device *, tick_oneshot_wakeup_device); 37 38 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic); 39 static void tick_broadcast_clear_oneshot(int cpu); 40 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); 41 # ifdef CONFIG_HOTPLUG_CPU 42 static void tick_broadcast_oneshot_offline(unsigned int cpu); 43 # endif 44 #else 45 static inline void 46 tick_broadcast_setup_oneshot(struct clock_event_device *bc, bool from_periodic) { BUG(); } 47 static inline void tick_broadcast_clear_oneshot(int cpu) { } 48 static inline void tick_resume_broadcast_oneshot(struct clock_event_device *bc) { } 49 # ifdef CONFIG_HOTPLUG_CPU 50 static inline void tick_broadcast_oneshot_offline(unsigned int cpu) { } 51 # endif 52 #endif 53 54 /* 55 * Debugging: see timer_list.c 56 */ 57 struct tick_device *tick_get_broadcast_device(void) 58 { 59 return &tick_broadcast_device; 60 } 61 62 struct cpumask *tick_get_broadcast_mask(void) 63 { 64 return tick_broadcast_mask; 65 } 66 67 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu); 68 69 const struct clock_event_device *tick_get_wakeup_device(int cpu) 70 { 71 return tick_get_oneshot_wakeup_device(cpu); 72 } 73 74 /* 75 * Start the device in periodic mode 76 */ 77 static void tick_broadcast_start_periodic(struct clock_event_device *bc) 78 { 79 if (bc) { 80 bc->next_event_forced = 0; 81 tick_setup_periodic(bc, 1); 82 } 83 } 84 85 /* 86 * Check, if the device can be utilized as broadcast device: 87 */ 88 static bool tick_check_broadcast_device(struct clock_event_device *curdev, 89 struct clock_event_device *newdev) 90 { 91 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 92 (newdev->features & CLOCK_EVT_FEAT_PERCPU) || 93 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 94 return false; 95 96 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT && 97 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 98 return false; 99 100 return !curdev || newdev->rating > curdev->rating; 101 } 102 103 #ifdef CONFIG_TICK_ONESHOT 104 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) 105 { 106 return per_cpu(tick_oneshot_wakeup_device, cpu); 107 } 108 109 static void tick_oneshot_wakeup_handler(struct clock_event_device *wd) 110 { 111 wd->next_event_forced = 0; 112 /* 113 * If we woke up early and the tick was reprogrammed in the 114 * meantime then this may be spurious but harmless. 115 */ 116 tick_receive_broadcast(); 117 } 118 119 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, 120 int cpu) 121 { 122 struct clock_event_device *curdev = tick_get_oneshot_wakeup_device(cpu); 123 124 if (!newdev) 125 goto set_device; 126 127 if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) || 128 (newdev->features & CLOCK_EVT_FEAT_C3STOP)) 129 return false; 130 131 if (!(newdev->features & CLOCK_EVT_FEAT_PERCPU) || 132 !(newdev->features & CLOCK_EVT_FEAT_ONESHOT)) 133 return false; 134 135 if (!cpumask_equal(newdev->cpumask, cpumask_of(cpu))) 136 return false; 137 138 if (curdev && newdev->rating <= curdev->rating) 139 return false; 140 141 if (!try_module_get(newdev->owner)) 142 return false; 143 144 newdev->event_handler = tick_oneshot_wakeup_handler; 145 set_device: 146 clockevents_exchange_device(curdev, newdev); 147 per_cpu(tick_oneshot_wakeup_device, cpu) = newdev; 148 return true; 149 } 150 #else 151 static struct clock_event_device *tick_get_oneshot_wakeup_device(int cpu) 152 { 153 return NULL; 154 } 155 156 static bool tick_set_oneshot_wakeup_device(struct clock_event_device *newdev, 157 int cpu) 158 { 159 return false; 160 } 161 #endif 162 163 /* 164 * Conditionally install/replace broadcast device 165 */ 166 void tick_install_broadcast_device(struct clock_event_device *dev, int cpu) 167 { 168 struct clock_event_device *cur = tick_broadcast_device.evtdev; 169 170 if (tick_set_oneshot_wakeup_device(dev, cpu)) 171 return; 172 173 if (!tick_check_broadcast_device(cur, dev)) 174 return; 175 176 if (!try_module_get(dev->owner)) 177 return; 178 179 clockevents_exchange_device(cur, dev); 180 if (cur) 181 cur->event_handler = clockevents_handle_noop; 182 tick_broadcast_device.evtdev = dev; 183 if (!cpumask_empty(tick_broadcast_mask)) 184 tick_broadcast_start_periodic(dev); 185 186 if (!(dev->features & CLOCK_EVT_FEAT_ONESHOT)) 187 return; 188 189 /* 190 * If the system already runs in oneshot mode, switch the newly 191 * registered broadcast device to oneshot mode explicitly. 192 */ 193 if (tick_broadcast_oneshot_active()) { 194 tick_broadcast_switch_to_oneshot(); 195 return; 196 } 197 198 /* 199 * Inform all cpus about this. We might be in a situation 200 * where we did not switch to oneshot mode because the per cpu 201 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack 202 * of a oneshot capable broadcast device. Without that 203 * notification the systems stays stuck in periodic mode 204 * forever. 205 */ 206 tick_clock_notify(); 207 } 208 209 /* 210 * Check, if the device is the broadcast device 211 */ 212 int tick_is_broadcast_device(struct clock_event_device *dev) 213 { 214 return (dev && tick_broadcast_device.evtdev == dev); 215 } 216 217 int tick_broadcast_update_freq(struct clock_event_device *dev, u32 freq) 218 { 219 int ret = -ENODEV; 220 221 if (tick_is_broadcast_device(dev)) { 222 raw_spin_lock(&tick_broadcast_lock); 223 ret = __clockevents_update_freq(dev, freq); 224 raw_spin_unlock(&tick_broadcast_lock); 225 } 226 return ret; 227 } 228 229 230 static void err_broadcast(const struct cpumask *mask) 231 { 232 pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n"); 233 } 234 235 static void tick_device_setup_broadcast_func(struct clock_event_device *dev) 236 { 237 if (!dev->broadcast) 238 dev->broadcast = tick_broadcast; 239 if (!dev->broadcast) { 240 pr_warn_once("%s depends on broadcast, but no broadcast function available\n", 241 dev->name); 242 dev->broadcast = err_broadcast; 243 } 244 } 245 246 /* 247 * Check, if the device is dysfunctional and a placeholder, which 248 * needs to be handled by the broadcast device. 249 */ 250 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu) 251 { 252 struct clock_event_device *bc = tick_broadcast_device.evtdev; 253 unsigned long flags; 254 int ret = 0; 255 256 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 257 258 /* 259 * Devices might be registered with both periodic and oneshot 260 * mode disabled. This signals, that the device needs to be 261 * operated from the broadcast device and is a placeholder for 262 * the cpu local device. 263 */ 264 if (!tick_device_is_functional(dev)) { 265 dev->event_handler = tick_handle_periodic; 266 tick_device_setup_broadcast_func(dev); 267 cpumask_set_cpu(cpu, tick_broadcast_mask); 268 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 269 tick_broadcast_start_periodic(bc); 270 else 271 tick_broadcast_setup_oneshot(bc, false); 272 ret = 1; 273 } else { 274 /* 275 * Clear the broadcast bit for this cpu if the 276 * device is not power state affected. 277 */ 278 if (!(dev->features & CLOCK_EVT_FEAT_C3STOP)) 279 cpumask_clear_cpu(cpu, tick_broadcast_mask); 280 else 281 tick_device_setup_broadcast_func(dev); 282 283 /* 284 * Clear the broadcast bit if the CPU is not in 285 * periodic broadcast on state. 286 */ 287 if (!cpumask_test_cpu(cpu, tick_broadcast_on)) 288 cpumask_clear_cpu(cpu, tick_broadcast_mask); 289 290 switch (tick_broadcast_device.mode) { 291 case TICKDEV_MODE_ONESHOT: 292 /* 293 * If the system is in oneshot mode we can 294 * unconditionally clear the oneshot mask bit, 295 * because the CPU is running and therefore 296 * not in an idle state which causes the power 297 * state affected device to stop. Let the 298 * caller initialize the device. 299 */ 300 tick_broadcast_clear_oneshot(cpu); 301 ret = 0; 302 break; 303 304 case TICKDEV_MODE_PERIODIC: 305 /* 306 * If the system is in periodic mode, check 307 * whether the broadcast device can be 308 * switched off now. 309 */ 310 if (cpumask_empty(tick_broadcast_mask) && bc) 311 clockevents_shutdown(bc); 312 /* 313 * If we kept the cpu in the broadcast mask, 314 * tell the caller to leave the per cpu device 315 * in shutdown state. The periodic interrupt 316 * is delivered by the broadcast device, if 317 * the broadcast device exists and is not 318 * hrtimer based. 319 */ 320 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 321 ret = cpumask_test_cpu(cpu, tick_broadcast_mask); 322 break; 323 default: 324 break; 325 } 326 } 327 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 328 return ret; 329 } 330 331 int tick_receive_broadcast(void) 332 { 333 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 334 struct clock_event_device *evt = td->evtdev; 335 336 if (!evt) 337 return -ENODEV; 338 339 if (!evt->event_handler) 340 return -EINVAL; 341 342 evt->event_handler(evt); 343 return 0; 344 } 345 346 /* 347 * Broadcast the event to the cpus, which are set in the mask (mangled). 348 */ 349 static bool tick_do_broadcast(struct cpumask *mask) 350 { 351 int cpu = smp_processor_id(); 352 struct tick_device *td; 353 bool local = false; 354 355 /* 356 * Check, if the current cpu is in the mask 357 */ 358 if (cpumask_test_cpu(cpu, mask)) { 359 struct clock_event_device *bc = tick_broadcast_device.evtdev; 360 361 cpumask_clear_cpu(cpu, mask); 362 /* 363 * We only run the local handler, if the broadcast 364 * device is not hrtimer based. Otherwise we run into 365 * a hrtimer recursion. 366 * 367 * local timer_interrupt() 368 * local_handler() 369 * expire_hrtimers() 370 * bc_handler() 371 * local_handler() 372 * expire_hrtimers() 373 */ 374 local = !(bc->features & CLOCK_EVT_FEAT_HRTIMER); 375 } 376 377 if (!cpumask_empty(mask)) { 378 /* 379 * It might be necessary to actually check whether the devices 380 * have different broadcast functions. For now, just use the 381 * one of the first device. This works as long as we have this 382 * misfeature only on x86 (lapic) 383 */ 384 td = &per_cpu(tick_cpu_device, cpumask_first(mask)); 385 td->evtdev->broadcast(mask); 386 } 387 return local; 388 } 389 390 /* 391 * Periodic broadcast: 392 * - invoke the broadcast handlers 393 */ 394 static bool tick_do_periodic_broadcast(void) 395 { 396 cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask); 397 return tick_do_broadcast(tmpmask); 398 } 399 400 /* 401 * Event handler for periodic broadcast ticks 402 */ 403 static void tick_handle_periodic_broadcast(struct clock_event_device *dev) 404 { 405 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 406 bool bc_local; 407 408 raw_spin_lock(&tick_broadcast_lock); 409 tick_broadcast_device.evtdev->next_event_forced = 0; 410 411 /* Handle spurious interrupts gracefully */ 412 if (clockevent_state_shutdown(tick_broadcast_device.evtdev)) { 413 raw_spin_unlock(&tick_broadcast_lock); 414 return; 415 } 416 417 bc_local = tick_do_periodic_broadcast(); 418 419 if (clockevent_state_oneshot(dev)) { 420 ktime_t next = ktime_add_ns(dev->next_event, TICK_NSEC); 421 422 clockevents_program_event(dev, next, true); 423 } 424 raw_spin_unlock(&tick_broadcast_lock); 425 426 /* 427 * We run the handler of the local cpu after dropping 428 * tick_broadcast_lock because the handler might deadlock when 429 * trying to switch to oneshot mode. 430 */ 431 if (bc_local) 432 td->evtdev->event_handler(td->evtdev); 433 } 434 435 /** 436 * tick_broadcast_control - Enable/disable or force broadcast mode 437 * @mode: The selected broadcast mode 438 * 439 * Called when the system enters a state where affected tick devices 440 * might stop. Note: TICK_BROADCAST_FORCE cannot be undone. 441 */ 442 void tick_broadcast_control(enum tick_broadcast_mode mode) 443 { 444 struct clock_event_device *bc, *dev; 445 struct tick_device *td; 446 int cpu, bc_stopped; 447 unsigned long flags; 448 449 /* Protects also the local clockevent device. */ 450 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 451 td = this_cpu_ptr(&tick_cpu_device); 452 dev = td->evtdev; 453 454 /* 455 * Is the device not affected by the powerstate ? 456 */ 457 if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP)) 458 goto out; 459 460 if (!tick_device_is_functional(dev)) 461 goto out; 462 463 cpu = smp_processor_id(); 464 bc = tick_broadcast_device.evtdev; 465 bc_stopped = cpumask_empty(tick_broadcast_mask); 466 467 switch (mode) { 468 case TICK_BROADCAST_FORCE: 469 tick_broadcast_forced = 1; 470 fallthrough; 471 case TICK_BROADCAST_ON: 472 cpumask_set_cpu(cpu, tick_broadcast_on); 473 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) { 474 /* 475 * Only shutdown the cpu local device, if: 476 * 477 * - the broadcast device exists 478 * - the broadcast device is not a hrtimer based one 479 * - the broadcast device is in periodic mode to 480 * avoid a hiccup during switch to oneshot mode 481 */ 482 if (bc && !(bc->features & CLOCK_EVT_FEAT_HRTIMER) && 483 tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 484 clockevents_shutdown(dev); 485 } 486 break; 487 488 case TICK_BROADCAST_OFF: 489 if (tick_broadcast_forced) 490 break; 491 cpumask_clear_cpu(cpu, tick_broadcast_on); 492 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) { 493 if (tick_broadcast_device.mode == 494 TICKDEV_MODE_PERIODIC) 495 tick_setup_periodic(dev, 0); 496 } 497 break; 498 } 499 500 if (bc) { 501 if (cpumask_empty(tick_broadcast_mask)) { 502 if (!bc_stopped) 503 clockevents_shutdown(bc); 504 } else if (bc_stopped) { 505 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) 506 tick_broadcast_start_periodic(bc); 507 else 508 tick_broadcast_setup_oneshot(bc, false); 509 } 510 } 511 out: 512 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 513 } 514 EXPORT_SYMBOL_GPL(tick_broadcast_control); 515 516 /* 517 * Set the periodic handler depending on broadcast on/off 518 */ 519 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast) 520 { 521 if (!broadcast) 522 dev->event_handler = tick_handle_periodic; 523 else 524 dev->event_handler = tick_handle_periodic_broadcast; 525 } 526 527 #ifdef CONFIG_HOTPLUG_CPU 528 static void tick_shutdown_broadcast(void) 529 { 530 struct clock_event_device *bc = tick_broadcast_device.evtdev; 531 532 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 533 if (bc && cpumask_empty(tick_broadcast_mask)) 534 clockevents_shutdown(bc); 535 } 536 } 537 538 /* 539 * Remove a CPU from broadcasting 540 */ 541 void tick_broadcast_offline(unsigned int cpu) 542 { 543 raw_spin_lock(&tick_broadcast_lock); 544 cpumask_clear_cpu(cpu, tick_broadcast_mask); 545 cpumask_clear_cpu(cpu, tick_broadcast_on); 546 tick_broadcast_oneshot_offline(cpu); 547 tick_shutdown_broadcast(); 548 raw_spin_unlock(&tick_broadcast_lock); 549 } 550 551 #endif 552 553 void tick_suspend_broadcast(void) 554 { 555 struct clock_event_device *bc; 556 unsigned long flags; 557 558 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 559 560 bc = tick_broadcast_device.evtdev; 561 if (bc) 562 clockevents_shutdown(bc); 563 564 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 565 } 566 567 /* 568 * This is called from tick_resume_local() on a resuming CPU. That's 569 * called from the core resume function, tick_unfreeze() and the magic XEN 570 * resume hackery. 571 * 572 * In none of these cases the broadcast device mode can change and the 573 * bit of the resuming CPU in the broadcast mask is safe as well. 574 */ 575 bool tick_resume_check_broadcast(void) 576 { 577 if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT) 578 return false; 579 else 580 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_mask); 581 } 582 583 void tick_resume_broadcast(void) 584 { 585 struct clock_event_device *bc; 586 unsigned long flags; 587 588 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 589 590 bc = tick_broadcast_device.evtdev; 591 592 if (bc) { 593 clockevents_tick_resume(bc); 594 595 switch (tick_broadcast_device.mode) { 596 case TICKDEV_MODE_PERIODIC: 597 if (!cpumask_empty(tick_broadcast_mask)) 598 tick_broadcast_start_periodic(bc); 599 break; 600 case TICKDEV_MODE_ONESHOT: 601 if (!cpumask_empty(tick_broadcast_mask)) 602 tick_resume_broadcast_oneshot(bc); 603 break; 604 } 605 } 606 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 607 } 608 609 #ifdef CONFIG_TICK_ONESHOT 610 611 static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; 612 static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; 613 static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; 614 615 /* 616 * Exposed for debugging: see timer_list.c 617 */ 618 struct cpumask *tick_get_broadcast_oneshot_mask(void) 619 { 620 return tick_broadcast_oneshot_mask; 621 } 622 623 /* 624 * Called before going idle with interrupts disabled. Checks whether a 625 * broadcast event from the other core is about to happen. We detected 626 * that in tick_broadcast_oneshot_control(). The callsite can use this 627 * to avoid a deep idle transition as we are about to get the 628 * broadcast IPI right away. 629 */ 630 noinstr int tick_check_broadcast_expired(void) 631 { 632 #ifdef _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H 633 return arch_test_bit(smp_processor_id(), cpumask_bits(tick_broadcast_force_mask)); 634 #else 635 return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask); 636 #endif 637 } 638 639 /* 640 * Set broadcast interrupt affinity 641 */ 642 static void tick_broadcast_set_affinity(struct clock_event_device *bc, 643 const struct cpumask *cpumask) 644 { 645 if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ)) 646 return; 647 648 if (cpumask_equal(bc->cpumask, cpumask)) 649 return; 650 651 bc->cpumask = cpumask; 652 irq_set_affinity(bc->irq, bc->cpumask); 653 } 654 655 static void tick_broadcast_set_event(struct clock_event_device *bc, int cpu, 656 ktime_t expires) 657 { 658 if (!clockevent_state_oneshot(bc)) 659 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 660 661 clockevents_program_event(bc, expires, 1); 662 tick_broadcast_set_affinity(bc, cpumask_of(cpu)); 663 } 664 665 static void tick_resume_broadcast_oneshot(struct clock_event_device *bc) 666 { 667 clockevents_switch_state(bc, CLOCK_EVT_STATE_ONESHOT); 668 } 669 670 /* 671 * Called from irq_enter() when idle was interrupted to reenable the 672 * per cpu device. 673 */ 674 void tick_check_oneshot_broadcast_this_cpu(void) 675 { 676 if (cpumask_test_cpu(smp_processor_id(), tick_broadcast_oneshot_mask)) { 677 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 678 679 /* 680 * We might be in the middle of switching over from 681 * periodic to oneshot. If the CPU has not yet 682 * switched over, leave the device alone. 683 */ 684 if (td->mode == TICKDEV_MODE_ONESHOT) { 685 clockevents_switch_state(td->evtdev, 686 CLOCK_EVT_STATE_ONESHOT); 687 } 688 } 689 } 690 691 /* 692 * Handle oneshot mode broadcasting 693 */ 694 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) 695 { 696 struct tick_device *td; 697 ktime_t now, next_event; 698 int cpu, next_cpu = 0; 699 bool bc_local; 700 701 raw_spin_lock(&tick_broadcast_lock); 702 dev->next_event = KTIME_MAX; 703 tick_broadcast_device.evtdev->next_event_forced = 0; 704 next_event = KTIME_MAX; 705 cpumask_clear(tmpmask); 706 now = ktime_get(); 707 /* Find all expired events */ 708 for_each_cpu(cpu, tick_broadcast_oneshot_mask) { 709 /* 710 * Required for !SMP because for_each_cpu() reports 711 * unconditionally CPU0 as set on UP kernels. 712 */ 713 if (!IS_ENABLED(CONFIG_SMP) && 714 cpumask_empty(tick_broadcast_oneshot_mask)) 715 break; 716 717 td = &per_cpu(tick_cpu_device, cpu); 718 if (td->evtdev->next_event <= now) { 719 cpumask_set_cpu(cpu, tmpmask); 720 /* 721 * Mark the remote cpu in the pending mask, so 722 * it can avoid reprogramming the cpu local 723 * timer in tick_broadcast_oneshot_control(). 724 */ 725 cpumask_set_cpu(cpu, tick_broadcast_pending_mask); 726 } else if (td->evtdev->next_event < next_event) { 727 next_event = td->evtdev->next_event; 728 next_cpu = cpu; 729 } 730 } 731 732 /* 733 * Remove the current cpu from the pending mask. The event is 734 * delivered immediately in tick_do_broadcast() ! 735 */ 736 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask); 737 738 /* Take care of enforced broadcast requests */ 739 cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask); 740 cpumask_clear(tick_broadcast_force_mask); 741 742 /* 743 * Sanity check. Catch the case where we try to broadcast to 744 * offline cpus. 745 */ 746 if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask))) 747 cpumask_and(tmpmask, tmpmask, cpu_online_mask); 748 749 /* 750 * Wakeup the cpus which have an expired event. 751 */ 752 bc_local = tick_do_broadcast(tmpmask); 753 754 /* 755 * Two reasons for reprogram: 756 * 757 * - The global event did not expire any CPU local 758 * events. This happens in dyntick mode, as the maximum PIT 759 * delta is quite small. 760 * 761 * - There are pending events on sleeping CPUs which were not 762 * in the event mask 763 */ 764 if (next_event != KTIME_MAX) 765 tick_broadcast_set_event(dev, next_cpu, next_event); 766 767 raw_spin_unlock(&tick_broadcast_lock); 768 769 if (bc_local) { 770 td = this_cpu_ptr(&tick_cpu_device); 771 td->evtdev->event_handler(td->evtdev); 772 } 773 } 774 775 static int broadcast_needs_cpu(struct clock_event_device *bc, int cpu) 776 { 777 if (!(bc->features & CLOCK_EVT_FEAT_HRTIMER)) 778 return 0; 779 if (bc->next_event == KTIME_MAX) 780 return 0; 781 return bc->bound_on == cpu ? -EBUSY : 0; 782 } 783 784 static void broadcast_shutdown_local(struct clock_event_device *bc, 785 struct clock_event_device *dev) 786 { 787 /* 788 * For hrtimer based broadcasting we cannot shutdown the cpu 789 * local device if our own event is the first one to expire or 790 * if we own the broadcast timer. 791 */ 792 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) { 793 if (broadcast_needs_cpu(bc, smp_processor_id())) 794 return; 795 if (dev->next_event < bc->next_event) 796 return; 797 } 798 clockevents_switch_state(dev, CLOCK_EVT_STATE_SHUTDOWN); 799 } 800 801 static int ___tick_broadcast_oneshot_control(enum tick_broadcast_state state, 802 struct tick_device *td, 803 int cpu) 804 { 805 struct clock_event_device *bc, *dev = td->evtdev; 806 int ret = 0; 807 ktime_t now; 808 809 raw_spin_lock(&tick_broadcast_lock); 810 bc = tick_broadcast_device.evtdev; 811 812 if (state == TICK_BROADCAST_ENTER) { 813 /* 814 * If the current CPU owns the hrtimer broadcast 815 * mechanism, it cannot go deep idle and we do not add 816 * the CPU to the broadcast mask. We don't have to go 817 * through the EXIT path as the local timer is not 818 * shutdown. 819 */ 820 ret = broadcast_needs_cpu(bc, cpu); 821 if (ret) 822 goto out; 823 824 /* 825 * If the broadcast device is in periodic mode, we 826 * return. 827 */ 828 if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) { 829 /* If it is a hrtimer based broadcast, return busy */ 830 if (bc->features & CLOCK_EVT_FEAT_HRTIMER) 831 ret = -EBUSY; 832 goto out; 833 } 834 835 if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) { 836 WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask)); 837 838 /* Conditionally shut down the local timer. */ 839 broadcast_shutdown_local(bc, dev); 840 841 /* 842 * We only reprogram the broadcast timer if we 843 * did not mark ourself in the force mask and 844 * if the cpu local event is earlier than the 845 * broadcast event. If the current CPU is in 846 * the force mask, then we are going to be 847 * woken by the IPI right away; we return 848 * busy, so the CPU does not try to go deep 849 * idle. 850 */ 851 if (cpumask_test_cpu(cpu, tick_broadcast_force_mask)) { 852 ret = -EBUSY; 853 } else if (dev->next_event < bc->next_event) { 854 tick_broadcast_set_event(bc, cpu, dev->next_event); 855 /* 856 * In case of hrtimer broadcasts the 857 * programming might have moved the 858 * timer to this cpu. If yes, remove 859 * us from the broadcast mask and 860 * return busy. 861 */ 862 ret = broadcast_needs_cpu(bc, cpu); 863 if (ret) { 864 cpumask_clear_cpu(cpu, 865 tick_broadcast_oneshot_mask); 866 } 867 } 868 } 869 } else { 870 if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) { 871 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT); 872 /* 873 * The cpu which was handling the broadcast 874 * timer marked this cpu in the broadcast 875 * pending mask and fired the broadcast 876 * IPI. So we are going to handle the expired 877 * event anyway via the broadcast IPI 878 * handler. No need to reprogram the timer 879 * with an already expired event. 880 */ 881 if (cpumask_test_and_clear_cpu(cpu, 882 tick_broadcast_pending_mask)) 883 goto out; 884 885 /* 886 * Bail out if there is no next event. 887 */ 888 if (dev->next_event == KTIME_MAX) 889 goto out; 890 /* 891 * If the pending bit is not set, then we are 892 * either the CPU handling the broadcast 893 * interrupt or we got woken by something else. 894 * 895 * We are no longer in the broadcast mask, so 896 * if the cpu local expiry time is already 897 * reached, we would reprogram the cpu local 898 * timer with an already expired event. 899 * 900 * This can lead to a ping-pong when we return 901 * to idle and therefore rearm the broadcast 902 * timer before the cpu local timer was able 903 * to fire. This happens because the forced 904 * reprogramming makes sure that the event 905 * will happen in the future and depending on 906 * the min_delta setting this might be far 907 * enough out that the ping-pong starts. 908 * 909 * If the cpu local next_event has expired 910 * then we know that the broadcast timer 911 * next_event has expired as well and 912 * broadcast is about to be handled. So we 913 * avoid reprogramming and enforce that the 914 * broadcast handler, which did not run yet, 915 * will invoke the cpu local handler. 916 * 917 * We cannot call the handler directly from 918 * here, because we might be in a NOHZ phase 919 * and we did not go through the irq_enter() 920 * nohz fixups. 921 */ 922 now = ktime_get(); 923 if (dev->next_event <= now) { 924 cpumask_set_cpu(cpu, tick_broadcast_force_mask); 925 goto out; 926 } 927 /* 928 * We got woken by something else. Reprogram 929 * the cpu local timer device. 930 */ 931 tick_program_event(dev->next_event, 1); 932 } 933 } 934 out: 935 raw_spin_unlock(&tick_broadcast_lock); 936 return ret; 937 } 938 939 static int tick_oneshot_wakeup_control(enum tick_broadcast_state state, 940 struct tick_device *td, 941 int cpu) 942 { 943 struct clock_event_device *dev, *wd; 944 945 dev = td->evtdev; 946 if (td->mode != TICKDEV_MODE_ONESHOT) 947 return -EINVAL; 948 949 wd = tick_get_oneshot_wakeup_device(cpu); 950 if (!wd) 951 return -ENODEV; 952 953 switch (state) { 954 case TICK_BROADCAST_ENTER: 955 clockevents_switch_state(dev, CLOCK_EVT_STATE_ONESHOT_STOPPED); 956 clockevents_switch_state(wd, CLOCK_EVT_STATE_ONESHOT); 957 clockevents_program_event(wd, dev->next_event, 1); 958 break; 959 case TICK_BROADCAST_EXIT: 960 /* We may have transitioned to oneshot mode while idle */ 961 if (clockevent_get_state(wd) != CLOCK_EVT_STATE_ONESHOT) 962 return -ENODEV; 963 } 964 965 return 0; 966 } 967 968 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 969 { 970 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 971 int cpu = smp_processor_id(); 972 973 if (!tick_oneshot_wakeup_control(state, td, cpu)) 974 return 0; 975 976 if (tick_broadcast_device.evtdev) 977 return ___tick_broadcast_oneshot_control(state, td, cpu); 978 979 /* 980 * If there is no broadcast or wakeup device, tell the caller not 981 * to go into deep idle. 982 */ 983 return -EBUSY; 984 } 985 986 /* 987 * Reset the one shot broadcast for a cpu 988 * 989 * Called with tick_broadcast_lock held 990 */ 991 static void tick_broadcast_clear_oneshot(int cpu) 992 { 993 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 994 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 995 } 996 997 static void tick_broadcast_init_next_event(struct cpumask *mask, 998 ktime_t expires) 999 { 1000 struct tick_device *td; 1001 int cpu; 1002 1003 for_each_cpu(cpu, mask) { 1004 td = &per_cpu(tick_cpu_device, cpu); 1005 if (td->evtdev) 1006 td->evtdev->next_event = expires; 1007 } 1008 } 1009 1010 static inline ktime_t tick_get_next_period(void) 1011 { 1012 ktime_t next; 1013 1014 /* 1015 * Protect against concurrent updates (store /load tearing on 1016 * 32bit). It does not matter if the time is already in the 1017 * past. The broadcast device which is about to be programmed will 1018 * fire in any case. 1019 */ 1020 raw_spin_lock(&jiffies_lock); 1021 next = tick_next_period; 1022 raw_spin_unlock(&jiffies_lock); 1023 return next; 1024 } 1025 1026 /** 1027 * tick_broadcast_setup_oneshot - setup the broadcast device 1028 * @bc: the broadcast device 1029 * @from_periodic: true if called from periodic mode 1030 */ 1031 static void tick_broadcast_setup_oneshot(struct clock_event_device *bc, 1032 bool from_periodic) 1033 { 1034 int cpu = smp_processor_id(); 1035 ktime_t nexttick = 0; 1036 1037 if (!bc) 1038 return; 1039 1040 /* 1041 * When the broadcast device was switched to oneshot by the first 1042 * CPU handling the NOHZ change, the other CPUs will reach this 1043 * code via hrtimer_run_queues() -> tick_check_oneshot_change() 1044 * too. Set up the broadcast device only once! 1045 */ 1046 if (bc->event_handler == tick_handle_oneshot_broadcast) { 1047 /* 1048 * The CPU which switched from periodic to oneshot mode 1049 * set the broadcast oneshot bit for all other CPUs which 1050 * are in the general (periodic) broadcast mask to ensure 1051 * that CPUs which wait for the periodic broadcast are 1052 * woken up. 1053 * 1054 * Clear the bit for the local CPU as the set bit would 1055 * prevent the first tick_broadcast_enter() after this CPU 1056 * switched to oneshot state to program the broadcast 1057 * device. 1058 * 1059 * This code can also be reached via tick_broadcast_control(), 1060 * but this cannot avoid the tick_broadcast_clear_oneshot() 1061 * as that would break the periodic to oneshot transition of 1062 * secondary CPUs. But that's harmless as the below only 1063 * clears already cleared bits. 1064 */ 1065 tick_broadcast_clear_oneshot(cpu); 1066 return; 1067 } 1068 1069 1070 bc->event_handler = tick_handle_oneshot_broadcast; 1071 bc->next_event_forced = 0; 1072 bc->next_event = KTIME_MAX; 1073 1074 /* 1075 * When the tick mode is switched from periodic to oneshot it must 1076 * be ensured that CPUs which are waiting for periodic broadcast 1077 * get their wake-up at the next tick. This is achieved by ORing 1078 * tick_broadcast_mask into tick_broadcast_oneshot_mask. 1079 * 1080 * For other callers, e.g. broadcast device replacement, 1081 * tick_broadcast_oneshot_mask must not be touched as this would 1082 * set bits for CPUs which are already NOHZ, but not idle. Their 1083 * next tick_broadcast_enter() would observe the bit set and fail 1084 * to update the expiry time and the broadcast event device. 1085 */ 1086 if (from_periodic) { 1087 cpumask_copy(tmpmask, tick_broadcast_mask); 1088 /* Remove the local CPU as it is obviously not idle */ 1089 cpumask_clear_cpu(cpu, tmpmask); 1090 cpumask_or(tick_broadcast_oneshot_mask, tick_broadcast_oneshot_mask, tmpmask); 1091 1092 /* 1093 * Ensure that the oneshot broadcast handler will wake the 1094 * CPUs which are still waiting for periodic broadcast. 1095 */ 1096 nexttick = tick_get_next_period(); 1097 tick_broadcast_init_next_event(tmpmask, nexttick); 1098 1099 /* 1100 * If the underlying broadcast clock event device is 1101 * already in oneshot state, then there is nothing to do. 1102 * The device was already armed for the next tick 1103 * in tick_handle_broadcast_periodic() 1104 */ 1105 if (clockevent_state_oneshot(bc)) 1106 return; 1107 } 1108 1109 /* 1110 * When switching from periodic to oneshot mode arm the broadcast 1111 * device for the next tick. 1112 * 1113 * If the broadcast device has been replaced in oneshot mode and 1114 * the oneshot broadcast mask is not empty, then arm it to expire 1115 * immediately in order to reevaluate the next expiring timer. 1116 * @nexttick is 0 and therefore in the past which will cause the 1117 * clockevent code to force an event. 1118 * 1119 * For both cases the programming can be avoided when the oneshot 1120 * broadcast mask is empty. 1121 * 1122 * tick_broadcast_set_event() implicitly switches the broadcast 1123 * device to oneshot state. 1124 */ 1125 if (!cpumask_empty(tick_broadcast_oneshot_mask)) 1126 tick_broadcast_set_event(bc, cpu, nexttick); 1127 } 1128 1129 /* 1130 * Select oneshot operating mode for the broadcast device 1131 */ 1132 void tick_broadcast_switch_to_oneshot(void) 1133 { 1134 struct clock_event_device *bc; 1135 enum tick_device_mode oldmode; 1136 unsigned long flags; 1137 1138 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 1139 1140 oldmode = tick_broadcast_device.mode; 1141 tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT; 1142 bc = tick_broadcast_device.evtdev; 1143 if (bc) 1144 tick_broadcast_setup_oneshot(bc, oldmode == TICKDEV_MODE_PERIODIC); 1145 1146 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 1147 } 1148 1149 #ifdef CONFIG_HOTPLUG_CPU 1150 void hotplug_cpu__broadcast_tick_pull(int deadcpu) 1151 { 1152 struct clock_event_device *bc; 1153 unsigned long flags; 1154 1155 raw_spin_lock_irqsave(&tick_broadcast_lock, flags); 1156 bc = tick_broadcast_device.evtdev; 1157 1158 if (bc && broadcast_needs_cpu(bc, deadcpu)) { 1159 /* 1160 * If the broadcast force bit of the current CPU is set, 1161 * then the current CPU has not yet reprogrammed the local 1162 * timer device to avoid a ping-pong race. See 1163 * ___tick_broadcast_oneshot_control(). 1164 * 1165 * If the broadcast device is hrtimer based then 1166 * programming the broadcast event below does not have any 1167 * effect because the local clockevent device is not 1168 * running and not programmed because the broadcast event 1169 * is not earlier than the pending event of the local clock 1170 * event device. As a consequence all CPUs waiting for a 1171 * broadcast event are stuck forever. 1172 * 1173 * Detect this condition and reprogram the cpu local timer 1174 * device to avoid the starvation. 1175 */ 1176 if (tick_check_broadcast_expired()) { 1177 struct tick_device *td = this_cpu_ptr(&tick_cpu_device); 1178 1179 cpumask_clear_cpu(smp_processor_id(), tick_broadcast_force_mask); 1180 tick_program_event(td->evtdev->next_event, 1); 1181 } 1182 1183 /* This moves the broadcast assignment to this CPU: */ 1184 bc->next_event_forced = 0; 1185 clockevents_program_event(bc, bc->next_event, 1); 1186 } 1187 raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags); 1188 } 1189 1190 /* 1191 * Remove a dying CPU from broadcasting 1192 */ 1193 static void tick_broadcast_oneshot_offline(unsigned int cpu) 1194 { 1195 if (tick_get_oneshot_wakeup_device(cpu)) 1196 tick_set_oneshot_wakeup_device(NULL, cpu); 1197 1198 /* 1199 * Clear the broadcast masks for the dead cpu, but do not stop 1200 * the broadcast device! 1201 */ 1202 cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); 1203 cpumask_clear_cpu(cpu, tick_broadcast_pending_mask); 1204 cpumask_clear_cpu(cpu, tick_broadcast_force_mask); 1205 } 1206 #endif 1207 1208 /* 1209 * Check, whether the broadcast device is in one shot mode 1210 */ 1211 int tick_broadcast_oneshot_active(void) 1212 { 1213 return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT; 1214 } 1215 1216 /* 1217 * Check whether the broadcast device supports oneshot. 1218 */ 1219 bool tick_broadcast_oneshot_available(void) 1220 { 1221 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1222 1223 return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false; 1224 } 1225 1226 #else 1227 int __tick_broadcast_oneshot_control(enum tick_broadcast_state state) 1228 { 1229 struct clock_event_device *bc = tick_broadcast_device.evtdev; 1230 1231 if (!bc || (bc->features & CLOCK_EVT_FEAT_HRTIMER)) 1232 return -EBUSY; 1233 1234 return 0; 1235 } 1236 #endif 1237 1238 void __init tick_broadcast_init(void) 1239 { 1240 zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT); 1241 zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT); 1242 zalloc_cpumask_var(&tmpmask, GFP_NOWAIT); 1243 #ifdef CONFIG_TICK_ONESHOT 1244 zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT); 1245 zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT); 1246 zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT); 1247 #endif 1248 } 1249