xref: /linux/arch/x86/xen/time.c (revision a1ff5a7d78a036d6c2178ee5acd6ba4946243800)
1b2441318SGreg Kroah-Hartman // SPDX-License-Identifier: GPL-2.0
29702785aSThomas Gleixner /*
39702785aSThomas Gleixner  * Xen time implementation.
49702785aSThomas Gleixner  *
59702785aSThomas Gleixner  * This is implemented in terms of a clocksource driver which uses
69702785aSThomas Gleixner  * the hypervisor clock as a nanosecond timebase, and a clockevent
79702785aSThomas Gleixner  * driver which uses the hypervisor's timer mechanism.
89702785aSThomas Gleixner  *
99702785aSThomas Gleixner  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
109702785aSThomas Gleixner  */
119702785aSThomas Gleixner #include <linux/kernel.h>
129702785aSThomas Gleixner #include <linux/interrupt.h>
139702785aSThomas Gleixner #include <linux/clocksource.h>
149702785aSThomas Gleixner #include <linux/clockchips.h>
155a0e3ad6STejun Heo #include <linux/gfp.h>
16c9d76a24SKonrad Rzeszutek Wilk #include <linux/slab.h>
175584880eSDavid Vrabel #include <linux/pvclock_gtod.h>
1876096863SStefano Stabellini #include <linux/timekeeper_internal.h>
199702785aSThomas Gleixner 
201c7b67f7SGerd Hoffmann #include <asm/pvclock.h>
219702785aSThomas Gleixner #include <asm/xen/hypervisor.h>
229702785aSThomas Gleixner #include <asm/xen/hypercall.h>
2399a7bcafSKrister Johansen #include <asm/xen/cpuid.h>
249702785aSThomas Gleixner 
259702785aSThomas Gleixner #include <xen/events.h>
26409771d2SStefano Stabellini #include <xen/features.h>
279702785aSThomas Gleixner #include <xen/interface/xen.h>
289702785aSThomas Gleixner #include <xen/interface/vcpu.h>
299702785aSThomas Gleixner 
309702785aSThomas Gleixner #include "xen-ops.h"
319702785aSThomas Gleixner 
322ec16bc0SRyan Thibodeaux /* Minimum amount of time until next clock event fires */
33*ad162488SFrediano Ziglio #define TIMER_SLOP	1
349702785aSThomas Gleixner 
3538669ba2SPavel Tatashin static u64 xen_sched_clock_offset __read_mostly;
3638669ba2SPavel Tatashin 
37e93ef949SAlok Kataria /* Get the TSC speed from Xen */
xen_tsc_khz(void)38409771d2SStefano Stabellini static unsigned long xen_tsc_khz(void)
399702785aSThomas Gleixner {
403807f345SGlauber Costa 	struct pvclock_vcpu_time_info *info =
419702785aSThomas Gleixner 		&HYPERVISOR_shared_info->vcpu_info[0].time;
429702785aSThomas Gleixner 
43898ec52dSHayato Ohhashi 	setup_force_cpu_cap(X86_FEATURE_TSC_KNOWN_FREQ);
443807f345SGlauber Costa 	return pvclock_tsc_khz(info);
459702785aSThomas Gleixner }
469702785aSThomas Gleixner 
xen_clocksource_read(void)477b25b9cbSPavel Tatashin static u64 xen_clocksource_read(void)
489702785aSThomas Gleixner {
491c7b67f7SGerd Hoffmann         struct pvclock_vcpu_time_info *src;
50a5a1d1c2SThomas Gleixner 	u64 ret;
519702785aSThomas Gleixner 
52f1c39625SJeremy Fitzhardinge 	preempt_disable_notrace();
533251f20bSBoris Ostrovsky 	src = &__this_cpu_read(xen_vcpu)->time;
541c7b67f7SGerd Hoffmann 	ret = pvclock_clocksource_read(src);
55f1c39625SJeremy Fitzhardinge 	preempt_enable_notrace();
569702785aSThomas Gleixner 	return ret;
579702785aSThomas Gleixner }
589702785aSThomas Gleixner 
xen_clocksource_get_cycles(struct clocksource * cs)59a5a1d1c2SThomas Gleixner static u64 xen_clocksource_get_cycles(struct clocksource *cs)
608e19608eSMagnus Damm {
618e19608eSMagnus Damm 	return xen_clocksource_read();
628e19608eSMagnus Damm }
638e19608eSMagnus Damm 
xen_sched_clock(void)648739c681SPeter Zijlstra static noinstr u64 xen_sched_clock(void)
6538669ba2SPavel Tatashin {
668739c681SPeter Zijlstra         struct pvclock_vcpu_time_info *src;
678739c681SPeter Zijlstra 	u64 ret;
688739c681SPeter Zijlstra 
698739c681SPeter Zijlstra 	src = &__this_cpu_read(xen_vcpu)->time;
708739c681SPeter Zijlstra 	ret = pvclock_clocksource_read_nowd(src);
718739c681SPeter Zijlstra 	ret -= xen_sched_clock_offset;
725c5e9a2bSPeter Zijlstra 
738739c681SPeter Zijlstra 	return ret;
7438669ba2SPavel Tatashin }
7538669ba2SPavel Tatashin 
xen_read_wallclock(struct timespec64 * ts)76e27c4929SArnd Bergmann static void xen_read_wallclock(struct timespec64 *ts)
779702785aSThomas Gleixner {
781c7b67f7SGerd Hoffmann 	struct shared_info *s = HYPERVISOR_shared_info;
791c7b67f7SGerd Hoffmann 	struct pvclock_wall_clock *wall_clock = &(s->wc);
801c7b67f7SGerd Hoffmann         struct pvclock_vcpu_time_info *vcpu_time;
819702785aSThomas Gleixner 
821c7b67f7SGerd Hoffmann 	vcpu_time = &get_cpu_var(xen_vcpu)->time;
831c7b67f7SGerd Hoffmann 	pvclock_read_wallclock(wall_clock, vcpu_time, ts);
841c7b67f7SGerd Hoffmann 	put_cpu_var(xen_vcpu);
859702785aSThomas Gleixner }
869702785aSThomas Gleixner 
xen_get_wallclock(struct timespec64 * now)87e27c4929SArnd Bergmann static void xen_get_wallclock(struct timespec64 *now)
889702785aSThomas Gleixner {
893565184eSDavid Vrabel 	xen_read_wallclock(now);
909702785aSThomas Gleixner }
919702785aSThomas Gleixner 
xen_set_wallclock(const struct timespec64 * now)92e27c4929SArnd Bergmann static int xen_set_wallclock(const struct timespec64 *now)
939702785aSThomas Gleixner {
94b5494ad8SBoris Ostrovsky 	return -ENODEV;
959702785aSThomas Gleixner }
969702785aSThomas Gleixner 
xen_pvclock_gtod_notify(struct notifier_block * nb,unsigned long was_set,void * priv)9747433b8cSDavid Vrabel static int xen_pvclock_gtod_notify(struct notifier_block *nb,
9847433b8cSDavid Vrabel 				   unsigned long was_set, void *priv)
995584880eSDavid Vrabel {
10047433b8cSDavid Vrabel 	/* Protected by the calling core code serialization */
101187b26a9SStefano Stabellini 	static struct timespec64 next_sync;
1025584880eSDavid Vrabel 
10347433b8cSDavid Vrabel 	struct xen_platform_op op;
10476096863SStefano Stabellini 	struct timespec64 now;
10576096863SStefano Stabellini 	struct timekeeper *tk = priv;
10676096863SStefano Stabellini 	static bool settime64_supported = true;
10776096863SStefano Stabellini 	int ret;
1085584880eSDavid Vrabel 
10976096863SStefano Stabellini 	now.tv_sec = tk->xtime_sec;
11076096863SStefano Stabellini 	now.tv_nsec = (long)(tk->tkr_mono.xtime_nsec >> tk->tkr_mono.shift);
1115584880eSDavid Vrabel 
11247433b8cSDavid Vrabel 	/*
11347433b8cSDavid Vrabel 	 * We only take the expensive HV call when the clock was set
11447433b8cSDavid Vrabel 	 * or when the 11 minutes RTC synchronization time elapsed.
11547433b8cSDavid Vrabel 	 */
116187b26a9SStefano Stabellini 	if (!was_set && timespec64_compare(&now, &next_sync) < 0)
11747433b8cSDavid Vrabel 		return NOTIFY_OK;
1189702785aSThomas Gleixner 
11976096863SStefano Stabellini again:
12076096863SStefano Stabellini 	if (settime64_supported) {
12176096863SStefano Stabellini 		op.cmd = XENPF_settime64;
12276096863SStefano Stabellini 		op.u.settime64.mbz = 0;
12376096863SStefano Stabellini 		op.u.settime64.secs = now.tv_sec;
12476096863SStefano Stabellini 		op.u.settime64.nsecs = now.tv_nsec;
12576096863SStefano Stabellini 		op.u.settime64.system_time = xen_clocksource_read();
12676096863SStefano Stabellini 	} else {
127f3d6027eSStefano Stabellini 		op.cmd = XENPF_settime32;
128f3d6027eSStefano Stabellini 		op.u.settime32.secs = now.tv_sec;
129f3d6027eSStefano Stabellini 		op.u.settime32.nsecs = now.tv_nsec;
130f3d6027eSStefano Stabellini 		op.u.settime32.system_time = xen_clocksource_read();
13176096863SStefano Stabellini 	}
1329702785aSThomas Gleixner 
13376096863SStefano Stabellini 	ret = HYPERVISOR_platform_op(&op);
13476096863SStefano Stabellini 
13576096863SStefano Stabellini 	if (ret == -ENOSYS && settime64_supported) {
13676096863SStefano Stabellini 		settime64_supported = false;
13776096863SStefano Stabellini 		goto again;
13876096863SStefano Stabellini 	}
13976096863SStefano Stabellini 	if (ret < 0)
14076096863SStefano Stabellini 		return NOTIFY_BAD;
1419702785aSThomas Gleixner 
14247433b8cSDavid Vrabel 	/*
14347433b8cSDavid Vrabel 	 * Move the next drift compensation time 11 minutes
14447433b8cSDavid Vrabel 	 * ahead. That's emulating the sync_cmos_clock() update for
14547433b8cSDavid Vrabel 	 * the hardware RTC.
14647433b8cSDavid Vrabel 	 */
14747433b8cSDavid Vrabel 	next_sync = now;
14847433b8cSDavid Vrabel 	next_sync.tv_sec += 11 * 60;
14947433b8cSDavid Vrabel 
1505584880eSDavid Vrabel 	return NOTIFY_OK;
1519702785aSThomas Gleixner }
1529702785aSThomas Gleixner 
1535584880eSDavid Vrabel static struct notifier_block xen_pvclock_gtod_notifier = {
1545584880eSDavid Vrabel 	.notifier_call = xen_pvclock_gtod_notify,
1555584880eSDavid Vrabel };
1565584880eSDavid Vrabel 
xen_cs_enable(struct clocksource * cs)157eec399ddSThomas Gleixner static int xen_cs_enable(struct clocksource *cs)
158eec399ddSThomas Gleixner {
159b95a8a27SThomas Gleixner 	vclocks_set_used(VDSO_CLOCKMODE_PVCLOCK);
160eec399ddSThomas Gleixner 	return 0;
161eec399ddSThomas Gleixner }
162eec399ddSThomas Gleixner 
1639702785aSThomas Gleixner static struct clocksource xen_clocksource __read_mostly = {
1649702785aSThomas Gleixner 	.name	= "xen",
1659702785aSThomas Gleixner 	.rating	= 400,
1669702785aSThomas Gleixner 	.read	= xen_clocksource_get_cycles,
167eec399ddSThomas Gleixner 	.mask	= CLOCKSOURCE_MASK(64),
1689702785aSThomas Gleixner 	.flags	= CLOCK_SOURCE_IS_CONTINUOUS,
169eec399ddSThomas Gleixner 	.enable = xen_cs_enable,
1709702785aSThomas Gleixner };
1719702785aSThomas Gleixner 
1729702785aSThomas Gleixner /*
1739702785aSThomas Gleixner    Xen clockevent implementation
1749702785aSThomas Gleixner 
1759702785aSThomas Gleixner    Xen has two clockevent implementations:
1769702785aSThomas Gleixner 
1779702785aSThomas Gleixner    The old timer_op one works with all released versions of Xen prior
1789702785aSThomas Gleixner    to version 3.0.4.  This version of the hypervisor provides a
1799702785aSThomas Gleixner    single-shot timer with nanosecond resolution.  However, sharing the
1809702785aSThomas Gleixner    same event channel is a 100Hz tick which is delivered while the
1819702785aSThomas Gleixner    vcpu is running.  We don't care about or use this tick, but it will
1829702785aSThomas Gleixner    cause the core time code to think the timer fired too soon, and
1839702785aSThomas Gleixner    will end up resetting it each time.  It could be filtered, but
1849702785aSThomas Gleixner    doing so has complications when the ktime clocksource is not yet
1859702785aSThomas Gleixner    the xen clocksource (ie, at boot time).
1869702785aSThomas Gleixner 
1879702785aSThomas Gleixner    The new vcpu_op-based timer interface allows the tick timer period
1889702785aSThomas Gleixner    to be changed or turned off.  The tick timer is not useful as a
1899702785aSThomas Gleixner    periodic timer because events are only delivered to running vcpus.
1909702785aSThomas Gleixner    The one-shot timer can report when a timeout is in the past, so
1919702785aSThomas Gleixner    set_next_event is capable of returning -ETIME when appropriate.
1929702785aSThomas Gleixner    This interface is used when available.
1939702785aSThomas Gleixner */
1949702785aSThomas Gleixner 
1959702785aSThomas Gleixner 
1969702785aSThomas Gleixner /*
1979702785aSThomas Gleixner   Get a hypervisor absolute time.  In theory we could maintain an
1989702785aSThomas Gleixner   offset between the kernel's time and the hypervisor's time, and
1999702785aSThomas Gleixner   apply that to a kernel's absolute timeout.  Unfortunately the
2009702785aSThomas Gleixner   hypervisor and kernel times can drift even if the kernel is using
2019702785aSThomas Gleixner   the Xen clocksource, because ntp can warp the kernel's clocksource.
2029702785aSThomas Gleixner */
get_abs_timeout(unsigned long delta)2039702785aSThomas Gleixner static s64 get_abs_timeout(unsigned long delta)
2049702785aSThomas Gleixner {
2059702785aSThomas Gleixner 	return xen_clocksource_read() + delta;
2069702785aSThomas Gleixner }
2079702785aSThomas Gleixner 
xen_timerop_shutdown(struct clock_event_device * evt)208955381ddSViresh Kumar static int xen_timerop_shutdown(struct clock_event_device *evt)
2099702785aSThomas Gleixner {
210955381ddSViresh Kumar 	/* cancel timeout */
211955381ddSViresh Kumar 	HYPERVISOR_set_timer_op(0);
2129702785aSThomas Gleixner 
213955381ddSViresh Kumar 	return 0;
2149702785aSThomas Gleixner }
2159702785aSThomas Gleixner 
xen_timerop_set_next_event(unsigned long delta,struct clock_event_device * evt)2169702785aSThomas Gleixner static int xen_timerop_set_next_event(unsigned long delta,
2179702785aSThomas Gleixner 				      struct clock_event_device *evt)
2189702785aSThomas Gleixner {
219955381ddSViresh Kumar 	WARN_ON(!clockevent_state_oneshot(evt));
2209702785aSThomas Gleixner 
2219702785aSThomas Gleixner 	if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
2229702785aSThomas Gleixner 		BUG();
2239702785aSThomas Gleixner 
2249702785aSThomas Gleixner 	/* We may have missed the deadline, but there's no real way of
2259702785aSThomas Gleixner 	   knowing for sure.  If the event was in the past, then we'll
2269702785aSThomas Gleixner 	   get an immediate interrupt. */
2279702785aSThomas Gleixner 
2289702785aSThomas Gleixner 	return 0;
2299702785aSThomas Gleixner }
2309702785aSThomas Gleixner 
2312ec16bc0SRyan Thibodeaux static struct clock_event_device xen_timerop_clockevent __ro_after_init = {
2329702785aSThomas Gleixner 	.name			= "xen",
2339702785aSThomas Gleixner 	.features		= CLOCK_EVT_FEAT_ONESHOT,
2349702785aSThomas Gleixner 
2359702785aSThomas Gleixner 	.max_delta_ns		= 0xffffffff,
2363d18d661SNicolai Stange 	.max_delta_ticks	= 0xffffffff,
2379702785aSThomas Gleixner 	.min_delta_ns		= TIMER_SLOP,
2383d18d661SNicolai Stange 	.min_delta_ticks	= TIMER_SLOP,
2399702785aSThomas Gleixner 
2409702785aSThomas Gleixner 	.mult			= 1,
2419702785aSThomas Gleixner 	.shift			= 0,
2429702785aSThomas Gleixner 	.rating			= 500,
2439702785aSThomas Gleixner 
244955381ddSViresh Kumar 	.set_state_shutdown	= xen_timerop_shutdown,
2459702785aSThomas Gleixner 	.set_next_event		= xen_timerop_set_next_event,
2469702785aSThomas Gleixner };
2479702785aSThomas Gleixner 
xen_vcpuop_shutdown(struct clock_event_device * evt)248955381ddSViresh Kumar static int xen_vcpuop_shutdown(struct clock_event_device *evt)
2499702785aSThomas Gleixner {
2509702785aSThomas Gleixner 	int cpu = smp_processor_id();
2519702785aSThomas Gleixner 
252ad5475f9SVitaly Kuznetsov 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, xen_vcpu_nr(cpu),
253ad5475f9SVitaly Kuznetsov 			       NULL) ||
254ad5475f9SVitaly Kuznetsov 	    HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
255ad5475f9SVitaly Kuznetsov 			       NULL))
2569702785aSThomas Gleixner 		BUG();
257955381ddSViresh Kumar 
258955381ddSViresh Kumar 	return 0;
2599702785aSThomas Gleixner }
260955381ddSViresh Kumar 
xen_vcpuop_set_oneshot(struct clock_event_device * evt)261955381ddSViresh Kumar static int xen_vcpuop_set_oneshot(struct clock_event_device *evt)
262955381ddSViresh Kumar {
263955381ddSViresh Kumar 	int cpu = smp_processor_id();
264955381ddSViresh Kumar 
265ad5475f9SVitaly Kuznetsov 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
266ad5475f9SVitaly Kuznetsov 			       NULL))
267955381ddSViresh Kumar 		BUG();
268955381ddSViresh Kumar 
269955381ddSViresh Kumar 	return 0;
2709702785aSThomas Gleixner }
2719702785aSThomas Gleixner 
xen_vcpuop_set_next_event(unsigned long delta,struct clock_event_device * evt)2729702785aSThomas Gleixner static int xen_vcpuop_set_next_event(unsigned long delta,
2739702785aSThomas Gleixner 				     struct clock_event_device *evt)
2749702785aSThomas Gleixner {
2759702785aSThomas Gleixner 	int cpu = smp_processor_id();
2769702785aSThomas Gleixner 	struct vcpu_set_singleshot_timer single;
2779702785aSThomas Gleixner 	int ret;
2789702785aSThomas Gleixner 
279955381ddSViresh Kumar 	WARN_ON(!clockevent_state_oneshot(evt));
2809702785aSThomas Gleixner 
2819702785aSThomas Gleixner 	single.timeout_abs_ns = get_abs_timeout(delta);
282c06b6d70SStefano Stabellini 	/* Get an event anyway, even if the timeout is already expired */
283c06b6d70SStefano Stabellini 	single.flags = 0;
2849702785aSThomas Gleixner 
285ad5475f9SVitaly Kuznetsov 	ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, xen_vcpu_nr(cpu),
286ad5475f9SVitaly Kuznetsov 				 &single);
287c06b6d70SStefano Stabellini 	BUG_ON(ret != 0);
2889702785aSThomas Gleixner 
2899702785aSThomas Gleixner 	return ret;
2909702785aSThomas Gleixner }
2919702785aSThomas Gleixner 
2922ec16bc0SRyan Thibodeaux static struct clock_event_device xen_vcpuop_clockevent __ro_after_init = {
2939702785aSThomas Gleixner 	.name = "xen",
2949702785aSThomas Gleixner 	.features = CLOCK_EVT_FEAT_ONESHOT,
2959702785aSThomas Gleixner 
2969702785aSThomas Gleixner 	.max_delta_ns = 0xffffffff,
2973d18d661SNicolai Stange 	.max_delta_ticks = 0xffffffff,
2989702785aSThomas Gleixner 	.min_delta_ns = TIMER_SLOP,
2993d18d661SNicolai Stange 	.min_delta_ticks = TIMER_SLOP,
3009702785aSThomas Gleixner 
3019702785aSThomas Gleixner 	.mult = 1,
3029702785aSThomas Gleixner 	.shift = 0,
3039702785aSThomas Gleixner 	.rating = 500,
3049702785aSThomas Gleixner 
305955381ddSViresh Kumar 	.set_state_shutdown = xen_vcpuop_shutdown,
306955381ddSViresh Kumar 	.set_state_oneshot = xen_vcpuop_set_oneshot,
3079702785aSThomas Gleixner 	.set_next_event = xen_vcpuop_set_next_event,
3089702785aSThomas Gleixner };
3099702785aSThomas Gleixner 
3109702785aSThomas Gleixner static const struct clock_event_device *xen_clockevent =
3119702785aSThomas Gleixner 	&xen_timerop_clockevent;
31231620a19SKonrad Rzeszutek Wilk 
31331620a19SKonrad Rzeszutek Wilk struct xen_clock_event_device {
31431620a19SKonrad Rzeszutek Wilk 	struct clock_event_device evt;
3157be0772dSVitaly Kuznetsov 	char name[16];
31631620a19SKonrad Rzeszutek Wilk };
31731620a19SKonrad Rzeszutek Wilk static DEFINE_PER_CPU(struct xen_clock_event_device, xen_clock_events) = { .evt.irq = -1 };
3189702785aSThomas Gleixner 
xen_timer_interrupt(int irq,void * dev_id)3199702785aSThomas Gleixner static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
3209702785aSThomas Gleixner {
32189cbc767SChristoph Lameter 	struct clock_event_device *evt = this_cpu_ptr(&xen_clock_events.evt);
3229702785aSThomas Gleixner 	irqreturn_t ret;
3239702785aSThomas Gleixner 
3249702785aSThomas Gleixner 	ret = IRQ_NONE;
3259702785aSThomas Gleixner 	if (evt->event_handler) {
3269702785aSThomas Gleixner 		evt->event_handler(evt);
3279702785aSThomas Gleixner 		ret = IRQ_HANDLED;
3289702785aSThomas Gleixner 	}
3299702785aSThomas Gleixner 
3309702785aSThomas Gleixner 	return ret;
3319702785aSThomas Gleixner }
3329702785aSThomas Gleixner 
xen_teardown_timer(int cpu)33309e99da7SKonrad Rzeszutek Wilk void xen_teardown_timer(int cpu)
33409e99da7SKonrad Rzeszutek Wilk {
33509e99da7SKonrad Rzeszutek Wilk 	struct clock_event_device *evt;
33609e99da7SKonrad Rzeszutek Wilk 	evt = &per_cpu(xen_clock_events, cpu).evt;
33709e99da7SKonrad Rzeszutek Wilk 
33809e99da7SKonrad Rzeszutek Wilk 	if (evt->irq >= 0) {
33909e99da7SKonrad Rzeszutek Wilk 		unbind_from_irqhandler(evt->irq, NULL);
34009e99da7SKonrad Rzeszutek Wilk 		evt->irq = -1;
34109e99da7SKonrad Rzeszutek Wilk 	}
34209e99da7SKonrad Rzeszutek Wilk }
34309e99da7SKonrad Rzeszutek Wilk 
xen_setup_timer(int cpu)3449702785aSThomas Gleixner void xen_setup_timer(int cpu)
3459702785aSThomas Gleixner {
3467be0772dSVitaly Kuznetsov 	struct xen_clock_event_device *xevt = &per_cpu(xen_clock_events, cpu);
3477be0772dSVitaly Kuznetsov 	struct clock_event_device *evt = &xevt->evt;
3489702785aSThomas Gleixner 	int irq;
3499702785aSThomas Gleixner 
350ef35a4e6SKonrad Rzeszutek Wilk 	WARN(evt->irq >= 0, "IRQ%d for CPU%d is already allocated\n", evt->irq, cpu);
35109e99da7SKonrad Rzeszutek Wilk 	if (evt->irq >= 0)
35209e99da7SKonrad Rzeszutek Wilk 		xen_teardown_timer(cpu);
353ef35a4e6SKonrad Rzeszutek Wilk 
3549702785aSThomas Gleixner 	printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
3559702785aSThomas Gleixner 
3567be0772dSVitaly Kuznetsov 	snprintf(xevt->name, sizeof(xevt->name), "timer%d", cpu);
3579702785aSThomas Gleixner 
3589702785aSThomas Gleixner 	irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
3599d71cee6SMichael Opdenacker 				      IRQF_PERCPU|IRQF_NOBALANCING|IRQF_TIMER|
3608d5999dfSDavid Vrabel 				      IRQF_FORCE_RESUME|IRQF_EARLY_RESUME,
3617be0772dSVitaly Kuznetsov 				      xevt->name, NULL);
3628785c676SDavid Vrabel 	(void)xen_set_irq_priority(irq, XEN_IRQ_PRIORITY_MAX);
3639702785aSThomas Gleixner 
3649702785aSThomas Gleixner 	memcpy(evt, xen_clockevent, sizeof(*evt));
3659702785aSThomas Gleixner 
366320ab2b0SRusty Russell 	evt->cpumask = cpumask_of(cpu);
3679702785aSThomas Gleixner 	evt->irq = irq;
3689702785aSThomas Gleixner }
3699702785aSThomas Gleixner 
370d68d82afSAlex Nixon 
xen_setup_cpu_clockevents(void)3719702785aSThomas Gleixner void xen_setup_cpu_clockevents(void)
3729702785aSThomas Gleixner {
37389cbc767SChristoph Lameter 	clockevents_register_device(this_cpu_ptr(&xen_clock_events.evt));
3749702785aSThomas Gleixner }
3759702785aSThomas Gleixner 
xen_timer_resume(void)376d07af1f0SJeremy Fitzhardinge void xen_timer_resume(void)
377d07af1f0SJeremy Fitzhardinge {
378d07af1f0SJeremy Fitzhardinge 	int cpu;
379d07af1f0SJeremy Fitzhardinge 
380d07af1f0SJeremy Fitzhardinge 	if (xen_clockevent != &xen_vcpuop_clockevent)
381d07af1f0SJeremy Fitzhardinge 		return;
382d07af1f0SJeremy Fitzhardinge 
383d07af1f0SJeremy Fitzhardinge 	for_each_online_cpu(cpu) {
384ad5475f9SVitaly Kuznetsov 		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer,
385ad5475f9SVitaly Kuznetsov 				       xen_vcpu_nr(cpu), NULL))
386d07af1f0SJeremy Fitzhardinge 			BUG();
387d07af1f0SJeremy Fitzhardinge 	}
388d07af1f0SJeremy Fitzhardinge }
389d07af1f0SJeremy Fitzhardinge 
3902229f70bSJoao Martins static struct pvclock_vsyscall_time_info *xen_clock __read_mostly;
391867cefb4SJuergen Gross static u64 xen_clock_value_saved;
3922229f70bSJoao Martins 
xen_save_time_memory_area(void)3932229f70bSJoao Martins void xen_save_time_memory_area(void)
3942229f70bSJoao Martins {
3952229f70bSJoao Martins 	struct vcpu_register_time_memory_area t;
3962229f70bSJoao Martins 	int ret;
3972229f70bSJoao Martins 
398867cefb4SJuergen Gross 	xen_clock_value_saved = xen_clocksource_read() - xen_sched_clock_offset;
399867cefb4SJuergen Gross 
4002229f70bSJoao Martins 	if (!xen_clock)
4012229f70bSJoao Martins 		return;
4022229f70bSJoao Martins 
4032229f70bSJoao Martins 	t.addr.v = NULL;
4042229f70bSJoao Martins 
4052229f70bSJoao Martins 	ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
4062229f70bSJoao Martins 	if (ret != 0)
4072229f70bSJoao Martins 		pr_notice("Cannot save secondary vcpu_time_info (err %d)",
4082229f70bSJoao Martins 			  ret);
4092229f70bSJoao Martins 	else
4102229f70bSJoao Martins 		clear_page(xen_clock);
4112229f70bSJoao Martins }
4122229f70bSJoao Martins 
xen_restore_time_memory_area(void)4132229f70bSJoao Martins void xen_restore_time_memory_area(void)
4142229f70bSJoao Martins {
4152229f70bSJoao Martins 	struct vcpu_register_time_memory_area t;
4162229f70bSJoao Martins 	int ret;
4172229f70bSJoao Martins 
4182229f70bSJoao Martins 	if (!xen_clock)
419867cefb4SJuergen Gross 		goto out;
4202229f70bSJoao Martins 
4212229f70bSJoao Martins 	t.addr.v = &xen_clock->pvti;
4222229f70bSJoao Martins 
4232229f70bSJoao Martins 	ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
4242229f70bSJoao Martins 
4252229f70bSJoao Martins 	/*
426b95a8a27SThomas Gleixner 	 * We don't disable VDSO_CLOCKMODE_PVCLOCK entirely if it fails to
427b95a8a27SThomas Gleixner 	 * register the secondary time info with Xen or if we migrated to a
428b95a8a27SThomas Gleixner 	 * host without the necessary flags. On both of these cases what
429b95a8a27SThomas Gleixner 	 * happens is either process seeing a zeroed out pvti or seeing no
430b95a8a27SThomas Gleixner 	 * PVCLOCK_TSC_STABLE_BIT bit set. Userspace checks the latter and
431b95a8a27SThomas Gleixner 	 * if 0, it discards the data in pvti and fallbacks to a system
432b95a8a27SThomas Gleixner 	 * call for a reliable timestamp.
4332229f70bSJoao Martins 	 */
4342229f70bSJoao Martins 	if (ret != 0)
4352229f70bSJoao Martins 		pr_notice("Cannot restore secondary vcpu_time_info (err %d)",
4362229f70bSJoao Martins 			  ret);
437867cefb4SJuergen Gross 
438867cefb4SJuergen Gross out:
439867cefb4SJuergen Gross 	/* Need pvclock_resume() before using xen_clocksource_read(). */
440867cefb4SJuergen Gross 	pvclock_resume();
441867cefb4SJuergen Gross 	xen_sched_clock_offset = xen_clocksource_read() - xen_clock_value_saved;
4422229f70bSJoao Martins }
4432229f70bSJoao Martins 
xen_setup_vsyscall_time_info(void)4442229f70bSJoao Martins static void xen_setup_vsyscall_time_info(void)
4452229f70bSJoao Martins {
4462229f70bSJoao Martins 	struct vcpu_register_time_memory_area t;
4472229f70bSJoao Martins 	struct pvclock_vsyscall_time_info *ti;
4482229f70bSJoao Martins 	int ret;
4492229f70bSJoao Martins 
4502229f70bSJoao Martins 	ti = (struct pvclock_vsyscall_time_info *)get_zeroed_page(GFP_KERNEL);
4512229f70bSJoao Martins 	if (!ti)
4522229f70bSJoao Martins 		return;
4532229f70bSJoao Martins 
4542229f70bSJoao Martins 	t.addr.v = &ti->pvti;
4552229f70bSJoao Martins 
4562229f70bSJoao Martins 	ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area, 0, &t);
4572229f70bSJoao Martins 	if (ret) {
458b95a8a27SThomas Gleixner 		pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (err %d)\n", ret);
4592229f70bSJoao Martins 		free_page((unsigned long)ti);
4602229f70bSJoao Martins 		return;
4612229f70bSJoao Martins 	}
4622229f70bSJoao Martins 
4632229f70bSJoao Martins 	/*
4642229f70bSJoao Martins 	 * If primary time info had this bit set, secondary should too since
4652229f70bSJoao Martins 	 * it's the same data on both just different memory regions. But we
4662229f70bSJoao Martins 	 * still check it in case hypervisor is buggy.
4672229f70bSJoao Martins 	 */
4682229f70bSJoao Martins 	if (!(ti->pvti.flags & PVCLOCK_TSC_STABLE_BIT)) {
4692229f70bSJoao Martins 		t.addr.v = NULL;
4702229f70bSJoao Martins 		ret = HYPERVISOR_vcpu_op(VCPUOP_register_vcpu_time_memory_area,
4712229f70bSJoao Martins 					 0, &t);
4722229f70bSJoao Martins 		if (!ret)
4732229f70bSJoao Martins 			free_page((unsigned long)ti);
4742229f70bSJoao Martins 
475b95a8a27SThomas Gleixner 		pr_notice("xen: VDSO_CLOCKMODE_PVCLOCK not supported (tsc unstable)\n");
4762229f70bSJoao Martins 		return;
4772229f70bSJoao Martins 	}
4782229f70bSJoao Martins 
4792229f70bSJoao Martins 	xen_clock = ti;
4802229f70bSJoao Martins 	pvclock_set_pvti_cpu0_va(xen_clock);
4812229f70bSJoao Martins 
482b95a8a27SThomas Gleixner 	xen_clocksource.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
4832229f70bSJoao Martins }
4842229f70bSJoao Martins 
485caea091eSKrister Johansen /*
486caea091eSKrister Johansen  * Check if it is possible to safely use the tsc as a clocksource.  This is
487caea091eSKrister Johansen  * only true if the hypervisor notifies the guest that its tsc is invariant,
488caea091eSKrister Johansen  * the tsc is stable, and the tsc instruction will never be emulated.
489caea091eSKrister Johansen  */
xen_tsc_safe_clocksource(void)490caea091eSKrister Johansen static int __init xen_tsc_safe_clocksource(void)
491caea091eSKrister Johansen {
492caea091eSKrister Johansen 	u32 eax, ebx, ecx, edx;
493caea091eSKrister Johansen 
494caea091eSKrister Johansen 	if (!(boot_cpu_has(X86_FEATURE_CONSTANT_TSC)))
495caea091eSKrister Johansen 		return 0;
496caea091eSKrister Johansen 
497caea091eSKrister Johansen 	if (!(boot_cpu_has(X86_FEATURE_NONSTOP_TSC)))
498caea091eSKrister Johansen 		return 0;
499caea091eSKrister Johansen 
500caea091eSKrister Johansen 	if (check_tsc_unstable())
501caea091eSKrister Johansen 		return 0;
502caea091eSKrister Johansen 
503caea091eSKrister Johansen 	/* Leaf 4, sub-leaf 0 (0x40000x03) */
504caea091eSKrister Johansen 	cpuid_count(xen_cpuid_base() + 3, 0, &eax, &ebx, &ecx, &edx);
505caea091eSKrister Johansen 
50699a7bcafSKrister Johansen 	return ebx == XEN_CPUID_TSC_MODE_NEVER_EMULATE;
507caea091eSKrister Johansen }
508caea091eSKrister Johansen 
xen_time_init(void)509fb6ce5deSDaniel Kiper static void __init xen_time_init(void)
5109702785aSThomas Gleixner {
511b8888080SJoao Martins 	struct pvclock_vcpu_time_info *pvti;
5129702785aSThomas Gleixner 	int cpu = smp_processor_id();
513e27c4929SArnd Bergmann 	struct timespec64 tp;
5149702785aSThomas Gleixner 
515caea091eSKrister Johansen 	/*
516caea091eSKrister Johansen 	 * As Dom0 is never moved, no penalty on using TSC there.
517caea091eSKrister Johansen 	 *
518caea091eSKrister Johansen 	 * If it is possible for the guest to determine that the tsc is a safe
519caea091eSKrister Johansen 	 * clocksource, then set xen_clocksource rating below that of the tsc
520caea091eSKrister Johansen 	 * so that the system prefers tsc instead.
521caea091eSKrister Johansen 	 */
52294dd85f6SPalik, Imre 	if (xen_initial_domain())
52394dd85f6SPalik, Imre 		xen_clocksource.rating = 275;
524caea091eSKrister Johansen 	else if (xen_tsc_safe_clocksource())
525caea091eSKrister Johansen 		xen_clocksource.rating = 299;
52694dd85f6SPalik, Imre 
527b01cc1b0SJohn Stultz 	clocksource_register_hz(&xen_clocksource, NSEC_PER_SEC);
5289702785aSThomas Gleixner 
529ad5475f9SVitaly Kuznetsov 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, xen_vcpu_nr(cpu),
530ad5475f9SVitaly Kuznetsov 			       NULL) == 0) {
5319702785aSThomas Gleixner 		/* Successfully turned off 100Hz tick, so we have the
5329702785aSThomas Gleixner 		   vcpuop-based timer interface */
5339702785aSThomas Gleixner 		printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
5349702785aSThomas Gleixner 		xen_clockevent = &xen_vcpuop_clockevent;
5359702785aSThomas Gleixner 	}
5369702785aSThomas Gleixner 
5379702785aSThomas Gleixner 	/* Set initial system time with full resolution */
538c4507257SJohn Stultz 	xen_read_wallclock(&tp);
539e27c4929SArnd Bergmann 	do_settimeofday64(&tp);
5409702785aSThomas Gleixner 
541404ee5b1SAndi Kleen 	setup_force_cpu_cap(X86_FEATURE_TSC);
5429702785aSThomas Gleixner 
543b8888080SJoao Martins 	/*
544b8888080SJoao Martins 	 * We check ahead on the primary time info if this
545b8888080SJoao Martins 	 * bit is supported hence speeding up Xen clocksource.
546b8888080SJoao Martins 	 */
547b8888080SJoao Martins 	pvti = &__this_cpu_read(xen_vcpu)->time;
5482229f70bSJoao Martins 	if (pvti->flags & PVCLOCK_TSC_STABLE_BIT) {
549b8888080SJoao Martins 		pvclock_set_flags(PVCLOCK_TSC_STABLE_BIT);
5502229f70bSJoao Martins 		xen_setup_vsyscall_time_info();
5512229f70bSJoao Martins 	}
552b8888080SJoao Martins 
553be012920SIan Campbell 	xen_setup_runstate_info(cpu);
5549702785aSThomas Gleixner 	xen_setup_timer(cpu);
5559702785aSThomas Gleixner 	xen_setup_cpu_clockevents();
5565584880eSDavid Vrabel 
557ecb23dc6SJuergen Gross 	xen_time_setup_guest();
558ecb23dc6SJuergen Gross 
5595584880eSDavid Vrabel 	if (xen_initial_domain())
5605584880eSDavid Vrabel 		pvclock_gtod_register_notifier(&xen_pvclock_gtod_notifier);
5619702785aSThomas Gleixner }
562409771d2SStefano Stabellini 
xen_init_time_common(void)563a0e2bf7cSJuergen Gross static void __init xen_init_time_common(void)
564409771d2SStefano Stabellini {
56538669ba2SPavel Tatashin 	xen_sched_clock_offset = xen_clocksource_read();
566a0e2bf7cSJuergen Gross 	static_call_update(pv_steal_clock, xen_steal_clock);
567a0e2bf7cSJuergen Gross 	paravirt_set_sched_clock(xen_sched_clock);
568a0e2bf7cSJuergen Gross 
569a0e2bf7cSJuergen Gross 	x86_platform.calibrate_tsc = xen_tsc_khz;
570a0e2bf7cSJuergen Gross 	x86_platform.get_wallclock = xen_get_wallclock;
571a0e2bf7cSJuergen Gross }
572a0e2bf7cSJuergen Gross 
xen_init_time_ops(void)573a0e2bf7cSJuergen Gross void __init xen_init_time_ops(void)
574a0e2bf7cSJuergen Gross {
575a0e2bf7cSJuergen Gross 	xen_init_time_common();
576409771d2SStefano Stabellini 
577409771d2SStefano Stabellini 	x86_init.timers.timer_init = xen_time_init;
578409771d2SStefano Stabellini 	x86_init.timers.setup_percpu_clockev = x86_init_noop;
579409771d2SStefano Stabellini 	x86_cpuinit.setup_percpu_clockev = x86_init_noop;
580409771d2SStefano Stabellini 
58147433b8cSDavid Vrabel 	/* Dom0 uses the native method to set the hardware RTC. */
58247433b8cSDavid Vrabel 	if (!xen_initial_domain())
583409771d2SStefano Stabellini 		x86_platform.set_wallclock = xen_set_wallclock;
584409771d2SStefano Stabellini }
585409771d2SStefano Stabellini 
586ca65f9fcSStefano Stabellini #ifdef CONFIG_XEN_PVHVM
xen_hvm_setup_cpu_clockevents(void)587409771d2SStefano Stabellini static void xen_hvm_setup_cpu_clockevents(void)
588409771d2SStefano Stabellini {
589409771d2SStefano Stabellini 	int cpu = smp_processor_id();
590409771d2SStefano Stabellini 	xen_setup_runstate_info(cpu);
5917918c92aSKonrad Rzeszutek Wilk 	/*
5927918c92aSKonrad Rzeszutek Wilk 	 * xen_setup_timer(cpu) - snprintf is bad in atomic context. Hence
5937918c92aSKonrad Rzeszutek Wilk 	 * doing it xen_hvm_cpu_notify (which gets called by smp_init during
5947918c92aSKonrad Rzeszutek Wilk 	 * early bootup and also during CPU hotplug events).
5957918c92aSKonrad Rzeszutek Wilk 	 */
596409771d2SStefano Stabellini 	xen_setup_cpu_clockevents();
597409771d2SStefano Stabellini }
598409771d2SStefano Stabellini 
xen_hvm_init_time_ops(void)599fb6ce5deSDaniel Kiper void __init xen_hvm_init_time_ops(void)
600409771d2SStefano Stabellini {
601eed05744SDongli Zhang 	static bool hvm_time_initialized;
602eed05744SDongli Zhang 
603eed05744SDongli Zhang 	if (hvm_time_initialized)
604eed05744SDongli Zhang 		return;
605eed05744SDongli Zhang 
60684d582d2SBoris Ostrovsky 	/*
60784d582d2SBoris Ostrovsky 	 * vector callback is needed otherwise we cannot receive interrupts
60884d582d2SBoris Ostrovsky 	 * on cpu > 0 and at this point we don't know how many cpus are
60984d582d2SBoris Ostrovsky 	 * available.
61084d582d2SBoris Ostrovsky 	 */
61184d582d2SBoris Ostrovsky 	if (!xen_have_vector_callback)
61284d582d2SBoris Ostrovsky 		return;
61384d582d2SBoris Ostrovsky 
614409771d2SStefano Stabellini 	if (!xen_feature(XENFEAT_hvm_safe_pvclock)) {
615eed05744SDongli Zhang 		pr_info_once("Xen doesn't support pvclock on HVM, disable pv timer");
616eed05744SDongli Zhang 		return;
617eed05744SDongli Zhang 	}
618eed05744SDongli Zhang 
619eed05744SDongli Zhang 	/*
620eed05744SDongli Zhang 	 * Only MAX_VIRT_CPUS 'vcpu_info' are embedded inside 'shared_info'.
621eed05744SDongli Zhang 	 * The __this_cpu_read(xen_vcpu) is still NULL when Xen HVM guest
622eed05744SDongli Zhang 	 * boots on vcpu >= MAX_VIRT_CPUS (e.g., kexec), To access
623eed05744SDongli Zhang 	 * __this_cpu_read(xen_vcpu) via xen_clocksource_read() will panic.
624eed05744SDongli Zhang 	 *
625eed05744SDongli Zhang 	 * The xen_hvm_init_time_ops() should be called again later after
626eed05744SDongli Zhang 	 * __this_cpu_read(xen_vcpu) is available.
627eed05744SDongli Zhang 	 */
628eed05744SDongli Zhang 	if (!__this_cpu_read(xen_vcpu)) {
629eed05744SDongli Zhang 		pr_info("Delay xen_init_time_common() as kernel is running on vcpu=%d\n",
630eed05744SDongli Zhang 			xen_vcpu_nr(0));
631409771d2SStefano Stabellini 		return;
632409771d2SStefano Stabellini 	}
633409771d2SStefano Stabellini 
634a0e2bf7cSJuergen Gross 	xen_init_time_common();
635a0e2bf7cSJuergen Gross 
636409771d2SStefano Stabellini 	x86_init.timers.setup_percpu_clockev = xen_time_init;
637409771d2SStefano Stabellini 	x86_cpuinit.setup_percpu_clockev = xen_hvm_setup_cpu_clockevents;
638409771d2SStefano Stabellini 
639409771d2SStefano Stabellini 	x86_platform.set_wallclock = xen_set_wallclock;
640eed05744SDongli Zhang 
641eed05744SDongli Zhang 	hvm_time_initialized = true;
642409771d2SStefano Stabellini }
643ca65f9fcSStefano Stabellini #endif
6442ec16bc0SRyan Thibodeaux 
6452ec16bc0SRyan Thibodeaux /* Kernel parameter to specify Xen timer slop */
parse_xen_timer_slop(char * ptr)6462ec16bc0SRyan Thibodeaux static int __init parse_xen_timer_slop(char *ptr)
6472ec16bc0SRyan Thibodeaux {
6482ec16bc0SRyan Thibodeaux 	unsigned long slop = memparse(ptr, NULL);
6492ec16bc0SRyan Thibodeaux 
6502ec16bc0SRyan Thibodeaux 	xen_timerop_clockevent.min_delta_ns = slop;
6512ec16bc0SRyan Thibodeaux 	xen_timerop_clockevent.min_delta_ticks = slop;
6522ec16bc0SRyan Thibodeaux 	xen_vcpuop_clockevent.min_delta_ns = slop;
6532ec16bc0SRyan Thibodeaux 	xen_vcpuop_clockevent.min_delta_ticks = slop;
6542ec16bc0SRyan Thibodeaux 
6552ec16bc0SRyan Thibodeaux 	return 0;
6562ec16bc0SRyan Thibodeaux }
6572ec16bc0SRyan Thibodeaux early_param("xen_timer_slop", parse_xen_timer_slop);
658