xref: /linux/kernel/time/tick-broadcast.c (revision ff5599816711d2e67da2d7561fd36ac48debd433)
1 /*
2  * linux/kernel/time/tick-broadcast.c
3  *
4  * This file contains functions which emulate a local clock-event
5  * device via a broadcast event source.
6  *
7  * Copyright(C) 2005-2006, Thomas Gleixner <tglx@linutronix.de>
8  * Copyright(C) 2005-2007, Red Hat, Inc., Ingo Molnar
9  * Copyright(C) 2006-2007, Timesys Corp., Thomas Gleixner
10  *
11  * This code is licenced under the GPL version 2. For details see
12  * kernel-base/COPYING.
13  */
14 #include <linux/cpu.h>
15 #include <linux/err.h>
16 #include <linux/hrtimer.h>
17 #include <linux/interrupt.h>
18 #include <linux/percpu.h>
19 #include <linux/profile.h>
20 #include <linux/sched.h>
21 #include <linux/smp.h>
22 #include <linux/module.h>
23 
24 #include "tick-internal.h"
25 
26 /*
27  * Broadcast support for broken x86 hardware, where the local apic
28  * timer stops in C3 state.
29  */
30 
31 static struct tick_device tick_broadcast_device;
32 static cpumask_var_t tick_broadcast_mask;
33 static cpumask_var_t tick_broadcast_on;
34 static cpumask_var_t tmpmask;
35 static DEFINE_RAW_SPINLOCK(tick_broadcast_lock);
36 static int tick_broadcast_force;
37 
38 #ifdef CONFIG_TICK_ONESHOT
39 static void tick_broadcast_clear_oneshot(int cpu);
40 #else
41 static inline void tick_broadcast_clear_oneshot(int cpu) { }
42 #endif
43 
44 /*
45  * Debugging: see timer_list.c
46  */
47 struct tick_device *tick_get_broadcast_device(void)
48 {
49 	return &tick_broadcast_device;
50 }
51 
52 struct cpumask *tick_get_broadcast_mask(void)
53 {
54 	return tick_broadcast_mask;
55 }
56 
57 /*
58  * Start the device in periodic mode
59  */
60 static void tick_broadcast_start_periodic(struct clock_event_device *bc)
61 {
62 	if (bc)
63 		tick_setup_periodic(bc, 1);
64 }
65 
66 /*
67  * Check, if the device can be utilized as broadcast device:
68  */
69 static bool tick_check_broadcast_device(struct clock_event_device *curdev,
70 					struct clock_event_device *newdev)
71 {
72 	if ((newdev->features & CLOCK_EVT_FEAT_DUMMY) ||
73 	    (newdev->features & CLOCK_EVT_FEAT_C3STOP))
74 		return false;
75 
76 	if (tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT &&
77 	    !(newdev->features & CLOCK_EVT_FEAT_ONESHOT))
78 		return false;
79 
80 	return !curdev || newdev->rating > curdev->rating;
81 }
82 
83 /*
84  * Conditionally install/replace broadcast device
85  */
86 void tick_install_broadcast_device(struct clock_event_device *dev)
87 {
88 	struct clock_event_device *cur = tick_broadcast_device.evtdev;
89 
90 	if (!tick_check_broadcast_device(cur, dev))
91 		return;
92 
93 	if (!try_module_get(dev->owner))
94 		return;
95 
96 	clockevents_exchange_device(cur, dev);
97 	if (cur)
98 		cur->event_handler = clockevents_handle_noop;
99 	tick_broadcast_device.evtdev = dev;
100 	if (!cpumask_empty(tick_broadcast_mask))
101 		tick_broadcast_start_periodic(dev);
102 	/*
103 	 * Inform all cpus about this. We might be in a situation
104 	 * where we did not switch to oneshot mode because the per cpu
105 	 * devices are affected by CLOCK_EVT_FEAT_C3STOP and the lack
106 	 * of a oneshot capable broadcast device. Without that
107 	 * notification the systems stays stuck in periodic mode
108 	 * forever.
109 	 */
110 	if (dev->features & CLOCK_EVT_FEAT_ONESHOT)
111 		tick_clock_notify();
112 }
113 
114 /*
115  * Check, if the device is the broadcast device
116  */
117 int tick_is_broadcast_device(struct clock_event_device *dev)
118 {
119 	return (dev && tick_broadcast_device.evtdev == dev);
120 }
121 
122 static void err_broadcast(const struct cpumask *mask)
123 {
124 	pr_crit_once("Failed to broadcast timer tick. Some CPUs may be unresponsive.\n");
125 }
126 
127 static void tick_device_setup_broadcast_func(struct clock_event_device *dev)
128 {
129 	if (!dev->broadcast)
130 		dev->broadcast = tick_broadcast;
131 	if (!dev->broadcast) {
132 		pr_warn_once("%s depends on broadcast, but no broadcast function available\n",
133 			     dev->name);
134 		dev->broadcast = err_broadcast;
135 	}
136 }
137 
138 /*
139  * Check, if the device is disfunctional and a place holder, which
140  * needs to be handled by the broadcast device.
141  */
142 int tick_device_uses_broadcast(struct clock_event_device *dev, int cpu)
143 {
144 	struct clock_event_device *bc = tick_broadcast_device.evtdev;
145 	unsigned long flags;
146 	int ret;
147 
148 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
149 
150 	/*
151 	 * Devices might be registered with both periodic and oneshot
152 	 * mode disabled. This signals, that the device needs to be
153 	 * operated from the broadcast device and is a placeholder for
154 	 * the cpu local device.
155 	 */
156 	if (!tick_device_is_functional(dev)) {
157 		dev->event_handler = tick_handle_periodic;
158 		tick_device_setup_broadcast_func(dev);
159 		cpumask_set_cpu(cpu, tick_broadcast_mask);
160 		tick_broadcast_start_periodic(bc);
161 		ret = 1;
162 	} else {
163 		/*
164 		 * Clear the broadcast bit for this cpu if the
165 		 * device is not power state affected.
166 		 */
167 		if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
168 			cpumask_clear_cpu(cpu, tick_broadcast_mask);
169 		else
170 			tick_device_setup_broadcast_func(dev);
171 
172 		/*
173 		 * Clear the broadcast bit if the CPU is not in
174 		 * periodic broadcast on state.
175 		 */
176 		if (!cpumask_test_cpu(cpu, tick_broadcast_on))
177 			cpumask_clear_cpu(cpu, tick_broadcast_mask);
178 
179 		switch (tick_broadcast_device.mode) {
180 		case TICKDEV_MODE_ONESHOT:
181 			/*
182 			 * If the system is in oneshot mode we can
183 			 * unconditionally clear the oneshot mask bit,
184 			 * because the CPU is running and therefore
185 			 * not in an idle state which causes the power
186 			 * state affected device to stop. Let the
187 			 * caller initialize the device.
188 			 */
189 			tick_broadcast_clear_oneshot(cpu);
190 			ret = 0;
191 			break;
192 
193 		case TICKDEV_MODE_PERIODIC:
194 			/*
195 			 * If the system is in periodic mode, check
196 			 * whether the broadcast device can be
197 			 * switched off now.
198 			 */
199 			if (cpumask_empty(tick_broadcast_mask) && bc)
200 				clockevents_shutdown(bc);
201 			/*
202 			 * If we kept the cpu in the broadcast mask,
203 			 * tell the caller to leave the per cpu device
204 			 * in shutdown state. The periodic interrupt
205 			 * is delivered by the broadcast device.
206 			 */
207 			ret = cpumask_test_cpu(cpu, tick_broadcast_mask);
208 			break;
209 		default:
210 			/* Nothing to do */
211 			ret = 0;
212 			break;
213 		}
214 	}
215 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
216 	return ret;
217 }
218 
219 #ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST
220 int tick_receive_broadcast(void)
221 {
222 	struct tick_device *td = this_cpu_ptr(&tick_cpu_device);
223 	struct clock_event_device *evt = td->evtdev;
224 
225 	if (!evt)
226 		return -ENODEV;
227 
228 	if (!evt->event_handler)
229 		return -EINVAL;
230 
231 	evt->event_handler(evt);
232 	return 0;
233 }
234 #endif
235 
236 /*
237  * Broadcast the event to the cpus, which are set in the mask (mangled).
238  */
239 static void tick_do_broadcast(struct cpumask *mask)
240 {
241 	int cpu = smp_processor_id();
242 	struct tick_device *td;
243 
244 	/*
245 	 * Check, if the current cpu is in the mask
246 	 */
247 	if (cpumask_test_cpu(cpu, mask)) {
248 		cpumask_clear_cpu(cpu, mask);
249 		td = &per_cpu(tick_cpu_device, cpu);
250 		td->evtdev->event_handler(td->evtdev);
251 	}
252 
253 	if (!cpumask_empty(mask)) {
254 		/*
255 		 * It might be necessary to actually check whether the devices
256 		 * have different broadcast functions. For now, just use the
257 		 * one of the first device. This works as long as we have this
258 		 * misfeature only on x86 (lapic)
259 		 */
260 		td = &per_cpu(tick_cpu_device, cpumask_first(mask));
261 		td->evtdev->broadcast(mask);
262 	}
263 }
264 
265 /*
266  * Periodic broadcast:
267  * - invoke the broadcast handlers
268  */
269 static void tick_do_periodic_broadcast(void)
270 {
271 	raw_spin_lock(&tick_broadcast_lock);
272 
273 	cpumask_and(tmpmask, cpu_online_mask, tick_broadcast_mask);
274 	tick_do_broadcast(tmpmask);
275 
276 	raw_spin_unlock(&tick_broadcast_lock);
277 }
278 
279 /*
280  * Event handler for periodic broadcast ticks
281  */
282 static void tick_handle_periodic_broadcast(struct clock_event_device *dev)
283 {
284 	ktime_t next;
285 
286 	tick_do_periodic_broadcast();
287 
288 	/*
289 	 * The device is in periodic mode. No reprogramming necessary:
290 	 */
291 	if (dev->mode == CLOCK_EVT_MODE_PERIODIC)
292 		return;
293 
294 	/*
295 	 * Setup the next period for devices, which do not have
296 	 * periodic mode. We read dev->next_event first and add to it
297 	 * when the event already expired. clockevents_program_event()
298 	 * sets dev->next_event only when the event is really
299 	 * programmed to the device.
300 	 */
301 	for (next = dev->next_event; ;) {
302 		next = ktime_add(next, tick_period);
303 
304 		if (!clockevents_program_event(dev, next, false))
305 			return;
306 		tick_do_periodic_broadcast();
307 	}
308 }
309 
310 /*
311  * Powerstate information: The system enters/leaves a state, where
312  * affected devices might stop
313  */
314 static void tick_do_broadcast_on_off(unsigned long *reason)
315 {
316 	struct clock_event_device *bc, *dev;
317 	struct tick_device *td;
318 	unsigned long flags;
319 	int cpu, bc_stopped;
320 
321 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
322 
323 	cpu = smp_processor_id();
324 	td = &per_cpu(tick_cpu_device, cpu);
325 	dev = td->evtdev;
326 	bc = tick_broadcast_device.evtdev;
327 
328 	/*
329 	 * Is the device not affected by the powerstate ?
330 	 */
331 	if (!dev || !(dev->features & CLOCK_EVT_FEAT_C3STOP))
332 		goto out;
333 
334 	if (!tick_device_is_functional(dev))
335 		goto out;
336 
337 	bc_stopped = cpumask_empty(tick_broadcast_mask);
338 
339 	switch (*reason) {
340 	case CLOCK_EVT_NOTIFY_BROADCAST_ON:
341 	case CLOCK_EVT_NOTIFY_BROADCAST_FORCE:
342 		cpumask_set_cpu(cpu, tick_broadcast_on);
343 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_mask)) {
344 			if (tick_broadcast_device.mode ==
345 			    TICKDEV_MODE_PERIODIC)
346 				clockevents_shutdown(dev);
347 		}
348 		if (*reason == CLOCK_EVT_NOTIFY_BROADCAST_FORCE)
349 			tick_broadcast_force = 1;
350 		break;
351 	case CLOCK_EVT_NOTIFY_BROADCAST_OFF:
352 		if (tick_broadcast_force)
353 			break;
354 		cpumask_clear_cpu(cpu, tick_broadcast_on);
355 		if (!tick_device_is_functional(dev))
356 			break;
357 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_mask)) {
358 			if (tick_broadcast_device.mode ==
359 			    TICKDEV_MODE_PERIODIC)
360 				tick_setup_periodic(dev, 0);
361 		}
362 		break;
363 	}
364 
365 	if (cpumask_empty(tick_broadcast_mask)) {
366 		if (!bc_stopped)
367 			clockevents_shutdown(bc);
368 	} else if (bc_stopped) {
369 		if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
370 			tick_broadcast_start_periodic(bc);
371 		else
372 			tick_broadcast_setup_oneshot(bc);
373 	}
374 out:
375 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
376 }
377 
378 /*
379  * Powerstate information: The system enters/leaves a state, where
380  * affected devices might stop.
381  */
382 void tick_broadcast_on_off(unsigned long reason, int *oncpu)
383 {
384 	if (!cpumask_test_cpu(*oncpu, cpu_online_mask))
385 		printk(KERN_ERR "tick-broadcast: ignoring broadcast for "
386 		       "offline CPU #%d\n", *oncpu);
387 	else
388 		tick_do_broadcast_on_off(&reason);
389 }
390 
391 /*
392  * Set the periodic handler depending on broadcast on/off
393  */
394 void tick_set_periodic_handler(struct clock_event_device *dev, int broadcast)
395 {
396 	if (!broadcast)
397 		dev->event_handler = tick_handle_periodic;
398 	else
399 		dev->event_handler = tick_handle_periodic_broadcast;
400 }
401 
402 /*
403  * Remove a CPU from broadcasting
404  */
405 void tick_shutdown_broadcast(unsigned int *cpup)
406 {
407 	struct clock_event_device *bc;
408 	unsigned long flags;
409 	unsigned int cpu = *cpup;
410 
411 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
412 
413 	bc = tick_broadcast_device.evtdev;
414 	cpumask_clear_cpu(cpu, tick_broadcast_mask);
415 	cpumask_clear_cpu(cpu, tick_broadcast_on);
416 
417 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC) {
418 		if (bc && cpumask_empty(tick_broadcast_mask))
419 			clockevents_shutdown(bc);
420 	}
421 
422 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
423 }
424 
425 void tick_suspend_broadcast(void)
426 {
427 	struct clock_event_device *bc;
428 	unsigned long flags;
429 
430 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
431 
432 	bc = tick_broadcast_device.evtdev;
433 	if (bc)
434 		clockevents_shutdown(bc);
435 
436 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
437 }
438 
439 int tick_resume_broadcast(void)
440 {
441 	struct clock_event_device *bc;
442 	unsigned long flags;
443 	int broadcast = 0;
444 
445 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
446 
447 	bc = tick_broadcast_device.evtdev;
448 
449 	if (bc) {
450 		clockevents_set_mode(bc, CLOCK_EVT_MODE_RESUME);
451 
452 		switch (tick_broadcast_device.mode) {
453 		case TICKDEV_MODE_PERIODIC:
454 			if (!cpumask_empty(tick_broadcast_mask))
455 				tick_broadcast_start_periodic(bc);
456 			broadcast = cpumask_test_cpu(smp_processor_id(),
457 						     tick_broadcast_mask);
458 			break;
459 		case TICKDEV_MODE_ONESHOT:
460 			if (!cpumask_empty(tick_broadcast_mask))
461 				broadcast = tick_resume_broadcast_oneshot(bc);
462 			break;
463 		}
464 	}
465 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
466 
467 	return broadcast;
468 }
469 
470 
471 #ifdef CONFIG_TICK_ONESHOT
472 
473 static cpumask_var_t tick_broadcast_oneshot_mask;
474 static cpumask_var_t tick_broadcast_pending_mask;
475 static cpumask_var_t tick_broadcast_force_mask;
476 
477 /*
478  * Exposed for debugging: see timer_list.c
479  */
480 struct cpumask *tick_get_broadcast_oneshot_mask(void)
481 {
482 	return tick_broadcast_oneshot_mask;
483 }
484 
485 /*
486  * Called before going idle with interrupts disabled. Checks whether a
487  * broadcast event from the other core is about to happen. We detected
488  * that in tick_broadcast_oneshot_control(). The callsite can use this
489  * to avoid a deep idle transition as we are about to get the
490  * broadcast IPI right away.
491  */
492 int tick_check_broadcast_expired(void)
493 {
494 	return cpumask_test_cpu(smp_processor_id(), tick_broadcast_force_mask);
495 }
496 
497 /*
498  * Set broadcast interrupt affinity
499  */
500 static void tick_broadcast_set_affinity(struct clock_event_device *bc,
501 					const struct cpumask *cpumask)
502 {
503 	if (!(bc->features & CLOCK_EVT_FEAT_DYNIRQ))
504 		return;
505 
506 	if (cpumask_equal(bc->cpumask, cpumask))
507 		return;
508 
509 	bc->cpumask = cpumask;
510 	irq_set_affinity(bc->irq, bc->cpumask);
511 }
512 
513 static int tick_broadcast_set_event(struct clock_event_device *bc, int cpu,
514 				    ktime_t expires, int force)
515 {
516 	int ret;
517 
518 	if (bc->mode != CLOCK_EVT_MODE_ONESHOT)
519 		clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
520 
521 	ret = clockevents_program_event(bc, expires, force);
522 	if (!ret)
523 		tick_broadcast_set_affinity(bc, cpumask_of(cpu));
524 	return ret;
525 }
526 
527 int tick_resume_broadcast_oneshot(struct clock_event_device *bc)
528 {
529 	clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
530 	return 0;
531 }
532 
533 /*
534  * Called from irq_enter() when idle was interrupted to reenable the
535  * per cpu device.
536  */
537 void tick_check_oneshot_broadcast(int cpu)
538 {
539 	if (cpumask_test_cpu(cpu, tick_broadcast_oneshot_mask)) {
540 		struct tick_device *td = &per_cpu(tick_cpu_device, cpu);
541 
542 		/*
543 		 * We might be in the middle of switching over from
544 		 * periodic to oneshot. If the CPU has not yet
545 		 * switched over, leave the device alone.
546 		 */
547 		if (td->mode == TICKDEV_MODE_ONESHOT) {
548 			clockevents_set_mode(td->evtdev,
549 					     CLOCK_EVT_MODE_ONESHOT);
550 		}
551 	}
552 }
553 
554 /*
555  * Handle oneshot mode broadcasting
556  */
557 static void tick_handle_oneshot_broadcast(struct clock_event_device *dev)
558 {
559 	struct tick_device *td;
560 	ktime_t now, next_event;
561 	int cpu, next_cpu = 0;
562 
563 	raw_spin_lock(&tick_broadcast_lock);
564 again:
565 	dev->next_event.tv64 = KTIME_MAX;
566 	next_event.tv64 = KTIME_MAX;
567 	cpumask_clear(tmpmask);
568 	now = ktime_get();
569 	/* Find all expired events */
570 	for_each_cpu(cpu, tick_broadcast_oneshot_mask) {
571 		td = &per_cpu(tick_cpu_device, cpu);
572 		if (td->evtdev->next_event.tv64 <= now.tv64) {
573 			cpumask_set_cpu(cpu, tmpmask);
574 			/*
575 			 * Mark the remote cpu in the pending mask, so
576 			 * it can avoid reprogramming the cpu local
577 			 * timer in tick_broadcast_oneshot_control().
578 			 */
579 			cpumask_set_cpu(cpu, tick_broadcast_pending_mask);
580 		} else if (td->evtdev->next_event.tv64 < next_event.tv64) {
581 			next_event.tv64 = td->evtdev->next_event.tv64;
582 			next_cpu = cpu;
583 		}
584 	}
585 
586 	/*
587 	 * Remove the current cpu from the pending mask. The event is
588 	 * delivered immediately in tick_do_broadcast() !
589 	 */
590 	cpumask_clear_cpu(smp_processor_id(), tick_broadcast_pending_mask);
591 
592 	/* Take care of enforced broadcast requests */
593 	cpumask_or(tmpmask, tmpmask, tick_broadcast_force_mask);
594 	cpumask_clear(tick_broadcast_force_mask);
595 
596 	/*
597 	 * Sanity check. Catch the case where we try to broadcast to
598 	 * offline cpus.
599 	 */
600 	if (WARN_ON_ONCE(!cpumask_subset(tmpmask, cpu_online_mask)))
601 		cpumask_and(tmpmask, tmpmask, cpu_online_mask);
602 
603 	/*
604 	 * Wakeup the cpus which have an expired event.
605 	 */
606 	tick_do_broadcast(tmpmask);
607 
608 	/*
609 	 * Two reasons for reprogram:
610 	 *
611 	 * - The global event did not expire any CPU local
612 	 * events. This happens in dyntick mode, as the maximum PIT
613 	 * delta is quite small.
614 	 *
615 	 * - There are pending events on sleeping CPUs which were not
616 	 * in the event mask
617 	 */
618 	if (next_event.tv64 != KTIME_MAX) {
619 		/*
620 		 * Rearm the broadcast device. If event expired,
621 		 * repeat the above
622 		 */
623 		if (tick_broadcast_set_event(dev, next_cpu, next_event, 0))
624 			goto again;
625 	}
626 	raw_spin_unlock(&tick_broadcast_lock);
627 }
628 
629 /*
630  * Powerstate information: The system enters/leaves a state, where
631  * affected devices might stop
632  */
633 void tick_broadcast_oneshot_control(unsigned long reason)
634 {
635 	struct clock_event_device *bc, *dev;
636 	struct tick_device *td;
637 	unsigned long flags;
638 	ktime_t now;
639 	int cpu;
640 
641 	/*
642 	 * Periodic mode does not care about the enter/exit of power
643 	 * states
644 	 */
645 	if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
646 		return;
647 
648 	/*
649 	 * We are called with preemtion disabled from the depth of the
650 	 * idle code, so we can't be moved away.
651 	 */
652 	cpu = smp_processor_id();
653 	td = &per_cpu(tick_cpu_device, cpu);
654 	dev = td->evtdev;
655 
656 	if (!(dev->features & CLOCK_EVT_FEAT_C3STOP))
657 		return;
658 
659 	bc = tick_broadcast_device.evtdev;
660 
661 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
662 	if (reason == CLOCK_EVT_NOTIFY_BROADCAST_ENTER) {
663 		if (!cpumask_test_and_set_cpu(cpu, tick_broadcast_oneshot_mask)) {
664 			WARN_ON_ONCE(cpumask_test_cpu(cpu, tick_broadcast_pending_mask));
665 			clockevents_set_mode(dev, CLOCK_EVT_MODE_SHUTDOWN);
666 			/*
667 			 * We only reprogram the broadcast timer if we
668 			 * did not mark ourself in the force mask and
669 			 * if the cpu local event is earlier than the
670 			 * broadcast event. If the current CPU is in
671 			 * the force mask, then we are going to be
672 			 * woken by the IPI right away.
673 			 */
674 			if (!cpumask_test_cpu(cpu, tick_broadcast_force_mask) &&
675 			    dev->next_event.tv64 < bc->next_event.tv64)
676 				tick_broadcast_set_event(bc, cpu, dev->next_event, 1);
677 		}
678 	} else {
679 		if (cpumask_test_and_clear_cpu(cpu, tick_broadcast_oneshot_mask)) {
680 			clockevents_set_mode(dev, CLOCK_EVT_MODE_ONESHOT);
681 			/*
682 			 * The cpu which was handling the broadcast
683 			 * timer marked this cpu in the broadcast
684 			 * pending mask and fired the broadcast
685 			 * IPI. So we are going to handle the expired
686 			 * event anyway via the broadcast IPI
687 			 * handler. No need to reprogram the timer
688 			 * with an already expired event.
689 			 */
690 			if (cpumask_test_and_clear_cpu(cpu,
691 				       tick_broadcast_pending_mask))
692 				goto out;
693 
694 			/*
695 			 * Bail out if there is no next event.
696 			 */
697 			if (dev->next_event.tv64 == KTIME_MAX)
698 				goto out;
699 			/*
700 			 * If the pending bit is not set, then we are
701 			 * either the CPU handling the broadcast
702 			 * interrupt or we got woken by something else.
703 			 *
704 			 * We are not longer in the broadcast mask, so
705 			 * if the cpu local expiry time is already
706 			 * reached, we would reprogram the cpu local
707 			 * timer with an already expired event.
708 			 *
709 			 * This can lead to a ping-pong when we return
710 			 * to idle and therefor rearm the broadcast
711 			 * timer before the cpu local timer was able
712 			 * to fire. This happens because the forced
713 			 * reprogramming makes sure that the event
714 			 * will happen in the future and depending on
715 			 * the min_delta setting this might be far
716 			 * enough out that the ping-pong starts.
717 			 *
718 			 * If the cpu local next_event has expired
719 			 * then we know that the broadcast timer
720 			 * next_event has expired as well and
721 			 * broadcast is about to be handled. So we
722 			 * avoid reprogramming and enforce that the
723 			 * broadcast handler, which did not run yet,
724 			 * will invoke the cpu local handler.
725 			 *
726 			 * We cannot call the handler directly from
727 			 * here, because we might be in a NOHZ phase
728 			 * and we did not go through the irq_enter()
729 			 * nohz fixups.
730 			 */
731 			now = ktime_get();
732 			if (dev->next_event.tv64 <= now.tv64) {
733 				cpumask_set_cpu(cpu, tick_broadcast_force_mask);
734 				goto out;
735 			}
736 			/*
737 			 * We got woken by something else. Reprogram
738 			 * the cpu local timer device.
739 			 */
740 			tick_program_event(dev->next_event, 1);
741 		}
742 	}
743 out:
744 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
745 }
746 
747 /*
748  * Reset the one shot broadcast for a cpu
749  *
750  * Called with tick_broadcast_lock held
751  */
752 static void tick_broadcast_clear_oneshot(int cpu)
753 {
754 	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
755 }
756 
757 static void tick_broadcast_init_next_event(struct cpumask *mask,
758 					   ktime_t expires)
759 {
760 	struct tick_device *td;
761 	int cpu;
762 
763 	for_each_cpu(cpu, mask) {
764 		td = &per_cpu(tick_cpu_device, cpu);
765 		if (td->evtdev)
766 			td->evtdev->next_event = expires;
767 	}
768 }
769 
770 /**
771  * tick_broadcast_setup_oneshot - setup the broadcast device
772  */
773 void tick_broadcast_setup_oneshot(struct clock_event_device *bc)
774 {
775 	int cpu = smp_processor_id();
776 
777 	/* Set it up only once ! */
778 	if (bc->event_handler != tick_handle_oneshot_broadcast) {
779 		int was_periodic = bc->mode == CLOCK_EVT_MODE_PERIODIC;
780 
781 		bc->event_handler = tick_handle_oneshot_broadcast;
782 
783 		/*
784 		 * We must be careful here. There might be other CPUs
785 		 * waiting for periodic broadcast. We need to set the
786 		 * oneshot_mask bits for those and program the
787 		 * broadcast device to fire.
788 		 */
789 		cpumask_copy(tmpmask, tick_broadcast_mask);
790 		cpumask_clear_cpu(cpu, tmpmask);
791 		cpumask_or(tick_broadcast_oneshot_mask,
792 			   tick_broadcast_oneshot_mask, tmpmask);
793 
794 		if (was_periodic && !cpumask_empty(tmpmask)) {
795 			clockevents_set_mode(bc, CLOCK_EVT_MODE_ONESHOT);
796 			tick_broadcast_init_next_event(tmpmask,
797 						       tick_next_period);
798 			tick_broadcast_set_event(bc, cpu, tick_next_period, 1);
799 		} else
800 			bc->next_event.tv64 = KTIME_MAX;
801 	} else {
802 		/*
803 		 * The first cpu which switches to oneshot mode sets
804 		 * the bit for all other cpus which are in the general
805 		 * (periodic) broadcast mask. So the bit is set and
806 		 * would prevent the first broadcast enter after this
807 		 * to program the bc device.
808 		 */
809 		tick_broadcast_clear_oneshot(cpu);
810 	}
811 }
812 
813 /*
814  * Select oneshot operating mode for the broadcast device
815  */
816 void tick_broadcast_switch_to_oneshot(void)
817 {
818 	struct clock_event_device *bc;
819 	unsigned long flags;
820 
821 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
822 
823 	tick_broadcast_device.mode = TICKDEV_MODE_ONESHOT;
824 	bc = tick_broadcast_device.evtdev;
825 	if (bc)
826 		tick_broadcast_setup_oneshot(bc);
827 
828 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
829 }
830 
831 
832 /*
833  * Remove a dead CPU from broadcasting
834  */
835 void tick_shutdown_broadcast_oneshot(unsigned int *cpup)
836 {
837 	unsigned long flags;
838 	unsigned int cpu = *cpup;
839 
840 	raw_spin_lock_irqsave(&tick_broadcast_lock, flags);
841 
842 	/*
843 	 * Clear the broadcast masks for the dead cpu, but do not stop
844 	 * the broadcast device!
845 	 */
846 	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask);
847 	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);
848 	cpumask_clear_cpu(cpu, tick_broadcast_force_mask);
849 
850 	raw_spin_unlock_irqrestore(&tick_broadcast_lock, flags);
851 }
852 
853 /*
854  * Check, whether the broadcast device is in one shot mode
855  */
856 int tick_broadcast_oneshot_active(void)
857 {
858 	return tick_broadcast_device.mode == TICKDEV_MODE_ONESHOT;
859 }
860 
861 /*
862  * Check whether the broadcast device supports oneshot.
863  */
864 bool tick_broadcast_oneshot_available(void)
865 {
866 	struct clock_event_device *bc = tick_broadcast_device.evtdev;
867 
868 	return bc ? bc->features & CLOCK_EVT_FEAT_ONESHOT : false;
869 }
870 
871 #endif
872 
873 void __init tick_broadcast_init(void)
874 {
875 	zalloc_cpumask_var(&tick_broadcast_mask, GFP_NOWAIT);
876 	zalloc_cpumask_var(&tick_broadcast_on, GFP_NOWAIT);
877 	zalloc_cpumask_var(&tmpmask, GFP_NOWAIT);
878 #ifdef CONFIG_TICK_ONESHOT
879 	zalloc_cpumask_var(&tick_broadcast_oneshot_mask, GFP_NOWAIT);
880 	zalloc_cpumask_var(&tick_broadcast_pending_mask, GFP_NOWAIT);
881 	zalloc_cpumask_var(&tick_broadcast_force_mask, GFP_NOWAIT);
882 #endif
883 }
884