xref: /linux/arch/x86/xen/time.c (revision f595ec964daf7f99668039d7303ddedd09a75142)
19702785aSThomas Gleixner /*
29702785aSThomas Gleixner  * Xen time implementation.
39702785aSThomas Gleixner  *
49702785aSThomas Gleixner  * This is implemented in terms of a clocksource driver which uses
59702785aSThomas Gleixner  * the hypervisor clock as a nanosecond timebase, and a clockevent
69702785aSThomas Gleixner  * driver which uses the hypervisor's timer mechanism.
79702785aSThomas Gleixner  *
89702785aSThomas Gleixner  * Jeremy Fitzhardinge <jeremy@xensource.com>, XenSource Inc, 2007
99702785aSThomas Gleixner  */
109702785aSThomas Gleixner #include <linux/kernel.h>
119702785aSThomas Gleixner #include <linux/interrupt.h>
129702785aSThomas Gleixner #include <linux/clocksource.h>
139702785aSThomas Gleixner #include <linux/clockchips.h>
149702785aSThomas Gleixner #include <linux/kernel_stat.h>
15*f595ec96SJeremy Fitzhardinge #include <linux/math64.h>
169702785aSThomas Gleixner 
179702785aSThomas Gleixner #include <asm/xen/hypervisor.h>
189702785aSThomas Gleixner #include <asm/xen/hypercall.h>
199702785aSThomas Gleixner 
209702785aSThomas Gleixner #include <xen/events.h>
219702785aSThomas Gleixner #include <xen/interface/xen.h>
229702785aSThomas Gleixner #include <xen/interface/vcpu.h>
239702785aSThomas Gleixner 
249702785aSThomas Gleixner #include "xen-ops.h"
259702785aSThomas Gleixner 
269702785aSThomas Gleixner #define XEN_SHIFT 22
279702785aSThomas Gleixner 
289702785aSThomas Gleixner /* Xen may fire a timer up to this many ns early */
299702785aSThomas Gleixner #define TIMER_SLOP	100000
309702785aSThomas Gleixner #define NS_PER_TICK	(1000000000LL / HZ)
319702785aSThomas Gleixner 
329702785aSThomas Gleixner static cycle_t xen_clocksource_read(void);
339702785aSThomas Gleixner 
349702785aSThomas Gleixner /* These are perodically updated in shared_info, and then copied here. */
359702785aSThomas Gleixner struct shadow_time_info {
369702785aSThomas Gleixner 	u64 tsc_timestamp;     /* TSC at last update of time vals.  */
379702785aSThomas Gleixner 	u64 system_timestamp;  /* Time, in nanosecs, since boot.    */
389702785aSThomas Gleixner 	u32 tsc_to_nsec_mul;
399702785aSThomas Gleixner 	int tsc_shift;
409702785aSThomas Gleixner 	u32 version;
419702785aSThomas Gleixner };
429702785aSThomas Gleixner 
439702785aSThomas Gleixner static DEFINE_PER_CPU(struct shadow_time_info, shadow_time);
449702785aSThomas Gleixner 
459702785aSThomas Gleixner /* runstate info updated by Xen */
469702785aSThomas Gleixner static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate);
479702785aSThomas Gleixner 
489702785aSThomas Gleixner /* snapshots of runstate info */
499702785aSThomas Gleixner static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate_snapshot);
509702785aSThomas Gleixner 
519702785aSThomas Gleixner /* unused ns of stolen and blocked time */
529702785aSThomas Gleixner static DEFINE_PER_CPU(u64, residual_stolen);
539702785aSThomas Gleixner static DEFINE_PER_CPU(u64, residual_blocked);
549702785aSThomas Gleixner 
559702785aSThomas Gleixner /* return an consistent snapshot of 64-bit time/counter value */
569702785aSThomas Gleixner static u64 get64(const u64 *p)
579702785aSThomas Gleixner {
589702785aSThomas Gleixner 	u64 ret;
599702785aSThomas Gleixner 
609702785aSThomas Gleixner 	if (BITS_PER_LONG < 64) {
619702785aSThomas Gleixner 		u32 *p32 = (u32 *)p;
629702785aSThomas Gleixner 		u32 h, l;
639702785aSThomas Gleixner 
649702785aSThomas Gleixner 		/*
659702785aSThomas Gleixner 		 * Read high then low, and then make sure high is
669702785aSThomas Gleixner 		 * still the same; this will only loop if low wraps
679702785aSThomas Gleixner 		 * and carries into high.
689702785aSThomas Gleixner 		 * XXX some clean way to make this endian-proof?
699702785aSThomas Gleixner 		 */
709702785aSThomas Gleixner 		do {
719702785aSThomas Gleixner 			h = p32[1];
729702785aSThomas Gleixner 			barrier();
739702785aSThomas Gleixner 			l = p32[0];
749702785aSThomas Gleixner 			barrier();
759702785aSThomas Gleixner 		} while (p32[1] != h);
769702785aSThomas Gleixner 
779702785aSThomas Gleixner 		ret = (((u64)h) << 32) | l;
789702785aSThomas Gleixner 	} else
799702785aSThomas Gleixner 		ret = *p;
809702785aSThomas Gleixner 
819702785aSThomas Gleixner 	return ret;
829702785aSThomas Gleixner }
839702785aSThomas Gleixner 
849702785aSThomas Gleixner /*
859702785aSThomas Gleixner  * Runstate accounting
869702785aSThomas Gleixner  */
879702785aSThomas Gleixner static void get_runstate_snapshot(struct vcpu_runstate_info *res)
889702785aSThomas Gleixner {
899702785aSThomas Gleixner 	u64 state_time;
909702785aSThomas Gleixner 	struct vcpu_runstate_info *state;
919702785aSThomas Gleixner 
929702785aSThomas Gleixner 	BUG_ON(preemptible());
939702785aSThomas Gleixner 
949702785aSThomas Gleixner 	state = &__get_cpu_var(runstate);
959702785aSThomas Gleixner 
969702785aSThomas Gleixner 	/*
979702785aSThomas Gleixner 	 * The runstate info is always updated by the hypervisor on
989702785aSThomas Gleixner 	 * the current CPU, so there's no need to use anything
999702785aSThomas Gleixner 	 * stronger than a compiler barrier when fetching it.
1009702785aSThomas Gleixner 	 */
1019702785aSThomas Gleixner 	do {
1029702785aSThomas Gleixner 		state_time = get64(&state->state_entry_time);
1039702785aSThomas Gleixner 		barrier();
1049702785aSThomas Gleixner 		*res = *state;
1059702785aSThomas Gleixner 		barrier();
1069702785aSThomas Gleixner 	} while (get64(&state->state_entry_time) != state_time);
1079702785aSThomas Gleixner }
1089702785aSThomas Gleixner 
109f0d73394SJeremy Fitzhardinge /* return true when a vcpu could run but has no real cpu to run on */
110f0d73394SJeremy Fitzhardinge bool xen_vcpu_stolen(int vcpu)
111f0d73394SJeremy Fitzhardinge {
112f0d73394SJeremy Fitzhardinge 	return per_cpu(runstate, vcpu).state == RUNSTATE_runnable;
113f0d73394SJeremy Fitzhardinge }
114f0d73394SJeremy Fitzhardinge 
1159702785aSThomas Gleixner static void setup_runstate_info(int cpu)
1169702785aSThomas Gleixner {
1179702785aSThomas Gleixner 	struct vcpu_register_runstate_memory_area area;
1189702785aSThomas Gleixner 
1199702785aSThomas Gleixner 	area.addr.v = &per_cpu(runstate, cpu);
1209702785aSThomas Gleixner 
1219702785aSThomas Gleixner 	if (HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area,
1229702785aSThomas Gleixner 			       cpu, &area))
1239702785aSThomas Gleixner 		BUG();
1249702785aSThomas Gleixner }
1259702785aSThomas Gleixner 
1269702785aSThomas Gleixner static void do_stolen_accounting(void)
1279702785aSThomas Gleixner {
1289702785aSThomas Gleixner 	struct vcpu_runstate_info state;
1299702785aSThomas Gleixner 	struct vcpu_runstate_info *snap;
1309702785aSThomas Gleixner 	s64 blocked, runnable, offline, stolen;
1319702785aSThomas Gleixner 	cputime_t ticks;
1329702785aSThomas Gleixner 
1339702785aSThomas Gleixner 	get_runstate_snapshot(&state);
1349702785aSThomas Gleixner 
1359702785aSThomas Gleixner 	WARN_ON(state.state != RUNSTATE_running);
1369702785aSThomas Gleixner 
1379702785aSThomas Gleixner 	snap = &__get_cpu_var(runstate_snapshot);
1389702785aSThomas Gleixner 
1399702785aSThomas Gleixner 	/* work out how much time the VCPU has not been runn*ing*  */
1409702785aSThomas Gleixner 	blocked = state.time[RUNSTATE_blocked] - snap->time[RUNSTATE_blocked];
1419702785aSThomas Gleixner 	runnable = state.time[RUNSTATE_runnable] - snap->time[RUNSTATE_runnable];
1429702785aSThomas Gleixner 	offline = state.time[RUNSTATE_offline] - snap->time[RUNSTATE_offline];
1439702785aSThomas Gleixner 
1449702785aSThomas Gleixner 	*snap = state;
1459702785aSThomas Gleixner 
1469702785aSThomas Gleixner 	/* Add the appropriate number of ticks of stolen time,
1479702785aSThomas Gleixner 	   including any left-overs from last time.  Passing NULL to
1489702785aSThomas Gleixner 	   account_steal_time accounts the time as stolen. */
1499702785aSThomas Gleixner 	stolen = runnable + offline + __get_cpu_var(residual_stolen);
1509702785aSThomas Gleixner 
1519702785aSThomas Gleixner 	if (stolen < 0)
1529702785aSThomas Gleixner 		stolen = 0;
1539702785aSThomas Gleixner 
154*f595ec96SJeremy Fitzhardinge 	ticks = iter_div_u64_rem(stolen, NS_PER_TICK, &stolen);
1559702785aSThomas Gleixner 	__get_cpu_var(residual_stolen) = stolen;
1569702785aSThomas Gleixner 	account_steal_time(NULL, ticks);
1579702785aSThomas Gleixner 
1589702785aSThomas Gleixner 	/* Add the appropriate number of ticks of blocked time,
1599702785aSThomas Gleixner 	   including any left-overs from last time.  Passing idle to
1609702785aSThomas Gleixner 	   account_steal_time accounts the time as idle/wait. */
1619702785aSThomas Gleixner 	blocked += __get_cpu_var(residual_blocked);
1629702785aSThomas Gleixner 
1639702785aSThomas Gleixner 	if (blocked < 0)
1649702785aSThomas Gleixner 		blocked = 0;
1659702785aSThomas Gleixner 
166*f595ec96SJeremy Fitzhardinge 	ticks = iter_div_u64_rem(blocked, NS_PER_TICK, &blocked);
1679702785aSThomas Gleixner 	__get_cpu_var(residual_blocked) = blocked;
1689702785aSThomas Gleixner 	account_steal_time(idle_task(smp_processor_id()), ticks);
1699702785aSThomas Gleixner }
1709702785aSThomas Gleixner 
1719702785aSThomas Gleixner /*
1729702785aSThomas Gleixner  * Xen sched_clock implementation.  Returns the number of unstolen
1739702785aSThomas Gleixner  * nanoseconds, which is nanoseconds the VCPU spent in RUNNING+BLOCKED
1749702785aSThomas Gleixner  * states.
1759702785aSThomas Gleixner  */
1769702785aSThomas Gleixner unsigned long long xen_sched_clock(void)
1779702785aSThomas Gleixner {
1789702785aSThomas Gleixner 	struct vcpu_runstate_info state;
1799702785aSThomas Gleixner 	cycle_t now;
1809702785aSThomas Gleixner 	u64 ret;
1819702785aSThomas Gleixner 	s64 offset;
1829702785aSThomas Gleixner 
1839702785aSThomas Gleixner 	/*
1849702785aSThomas Gleixner 	 * Ideally sched_clock should be called on a per-cpu basis
1859702785aSThomas Gleixner 	 * anyway, so preempt should already be disabled, but that's
1869702785aSThomas Gleixner 	 * not current practice at the moment.
1879702785aSThomas Gleixner 	 */
1889702785aSThomas Gleixner 	preempt_disable();
1899702785aSThomas Gleixner 
1909702785aSThomas Gleixner 	now = xen_clocksource_read();
1919702785aSThomas Gleixner 
1929702785aSThomas Gleixner 	get_runstate_snapshot(&state);
1939702785aSThomas Gleixner 
1949702785aSThomas Gleixner 	WARN_ON(state.state != RUNSTATE_running);
1959702785aSThomas Gleixner 
1969702785aSThomas Gleixner 	offset = now - state.state_entry_time;
1979702785aSThomas Gleixner 	if (offset < 0)
1989702785aSThomas Gleixner 		offset = 0;
1999702785aSThomas Gleixner 
2009702785aSThomas Gleixner 	ret = state.time[RUNSTATE_blocked] +
2019702785aSThomas Gleixner 		state.time[RUNSTATE_running] +
2029702785aSThomas Gleixner 		offset;
2039702785aSThomas Gleixner 
2049702785aSThomas Gleixner 	preempt_enable();
2059702785aSThomas Gleixner 
2069702785aSThomas Gleixner 	return ret;
2079702785aSThomas Gleixner }
2089702785aSThomas Gleixner 
2099702785aSThomas Gleixner 
2109702785aSThomas Gleixner /* Get the CPU speed from Xen */
2119702785aSThomas Gleixner unsigned long xen_cpu_khz(void)
2129702785aSThomas Gleixner {
21388a5ac89SHarvey Harrison 	u64 xen_khz = 1000000ULL << 32;
2149702785aSThomas Gleixner 	const struct vcpu_time_info *info =
2159702785aSThomas Gleixner 		&HYPERVISOR_shared_info->vcpu_info[0].time;
2169702785aSThomas Gleixner 
21788a5ac89SHarvey Harrison 	do_div(xen_khz, info->tsc_to_system_mul);
2189702785aSThomas Gleixner 	if (info->tsc_shift < 0)
21988a5ac89SHarvey Harrison 		xen_khz <<= -info->tsc_shift;
2209702785aSThomas Gleixner 	else
22188a5ac89SHarvey Harrison 		xen_khz >>= info->tsc_shift;
2229702785aSThomas Gleixner 
22388a5ac89SHarvey Harrison 	return xen_khz;
2249702785aSThomas Gleixner }
2259702785aSThomas Gleixner 
2269702785aSThomas Gleixner /*
2279702785aSThomas Gleixner  * Reads a consistent set of time-base values from Xen, into a shadow data
2289702785aSThomas Gleixner  * area.
2299702785aSThomas Gleixner  */
2309702785aSThomas Gleixner static unsigned get_time_values_from_xen(void)
2319702785aSThomas Gleixner {
2329702785aSThomas Gleixner 	struct vcpu_time_info   *src;
2339702785aSThomas Gleixner 	struct shadow_time_info *dst;
2349702785aSThomas Gleixner 
2359702785aSThomas Gleixner 	/* src is shared memory with the hypervisor, so we need to
2369702785aSThomas Gleixner 	   make sure we get a consistent snapshot, even in the face of
2379702785aSThomas Gleixner 	   being preempted. */
2389702785aSThomas Gleixner 	src = &__get_cpu_var(xen_vcpu)->time;
2399702785aSThomas Gleixner 	dst = &__get_cpu_var(shadow_time);
2409702785aSThomas Gleixner 
2419702785aSThomas Gleixner 	do {
2429702785aSThomas Gleixner 		dst->version = src->version;
2439702785aSThomas Gleixner 		rmb();		/* fetch version before data */
2449702785aSThomas Gleixner 		dst->tsc_timestamp     = src->tsc_timestamp;
2459702785aSThomas Gleixner 		dst->system_timestamp  = src->system_time;
2469702785aSThomas Gleixner 		dst->tsc_to_nsec_mul   = src->tsc_to_system_mul;
2479702785aSThomas Gleixner 		dst->tsc_shift         = src->tsc_shift;
2489702785aSThomas Gleixner 		rmb();		/* test version after fetching data */
2499702785aSThomas Gleixner 	} while ((src->version & 1) | (dst->version ^ src->version));
2509702785aSThomas Gleixner 
2519702785aSThomas Gleixner 	return dst->version;
2529702785aSThomas Gleixner }
2539702785aSThomas Gleixner 
2549702785aSThomas Gleixner /*
2559702785aSThomas Gleixner  * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
2569702785aSThomas Gleixner  * yielding a 64-bit result.
2579702785aSThomas Gleixner  */
2589702785aSThomas Gleixner static inline u64 scale_delta(u64 delta, u32 mul_frac, int shift)
2599702785aSThomas Gleixner {
2609702785aSThomas Gleixner 	u64 product;
2619702785aSThomas Gleixner #ifdef __i386__
2629702785aSThomas Gleixner 	u32 tmp1, tmp2;
2639702785aSThomas Gleixner #endif
2649702785aSThomas Gleixner 
2659702785aSThomas Gleixner 	if (shift < 0)
2669702785aSThomas Gleixner 		delta >>= -shift;
2679702785aSThomas Gleixner 	else
2689702785aSThomas Gleixner 		delta <<= shift;
2699702785aSThomas Gleixner 
2709702785aSThomas Gleixner #ifdef __i386__
2719702785aSThomas Gleixner 	__asm__ (
2729702785aSThomas Gleixner 		"mul  %5       ; "
2739702785aSThomas Gleixner 		"mov  %4,%%eax ; "
2749702785aSThomas Gleixner 		"mov  %%edx,%4 ; "
2759702785aSThomas Gleixner 		"mul  %5       ; "
2769702785aSThomas Gleixner 		"xor  %5,%5    ; "
2779702785aSThomas Gleixner 		"add  %4,%%eax ; "
2789702785aSThomas Gleixner 		"adc  %5,%%edx ; "
2799702785aSThomas Gleixner 		: "=A" (product), "=r" (tmp1), "=r" (tmp2)
2809702785aSThomas Gleixner 		: "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (mul_frac) );
2819702785aSThomas Gleixner #elif __x86_64__
2829702785aSThomas Gleixner 	__asm__ (
2839702785aSThomas Gleixner 		"mul %%rdx ; shrd $32,%%rdx,%%rax"
2849702785aSThomas Gleixner 		: "=a" (product) : "0" (delta), "d" ((u64)mul_frac) );
2859702785aSThomas Gleixner #else
2869702785aSThomas Gleixner #error implement me!
2879702785aSThomas Gleixner #endif
2889702785aSThomas Gleixner 
2899702785aSThomas Gleixner 	return product;
2909702785aSThomas Gleixner }
2919702785aSThomas Gleixner 
2929702785aSThomas Gleixner static u64 get_nsec_offset(struct shadow_time_info *shadow)
2939702785aSThomas Gleixner {
2949702785aSThomas Gleixner 	u64 now, delta;
2959702785aSThomas Gleixner 	now = native_read_tsc();
2969702785aSThomas Gleixner 	delta = now - shadow->tsc_timestamp;
2979702785aSThomas Gleixner 	return scale_delta(delta, shadow->tsc_to_nsec_mul, shadow->tsc_shift);
2989702785aSThomas Gleixner }
2999702785aSThomas Gleixner 
3009702785aSThomas Gleixner static cycle_t xen_clocksource_read(void)
3019702785aSThomas Gleixner {
3029702785aSThomas Gleixner 	struct shadow_time_info *shadow = &get_cpu_var(shadow_time);
3039702785aSThomas Gleixner 	cycle_t ret;
3049702785aSThomas Gleixner 	unsigned version;
3059702785aSThomas Gleixner 
3069702785aSThomas Gleixner 	do {
3079702785aSThomas Gleixner 		version = get_time_values_from_xen();
3089702785aSThomas Gleixner 		barrier();
3099702785aSThomas Gleixner 		ret = shadow->system_timestamp + get_nsec_offset(shadow);
3109702785aSThomas Gleixner 		barrier();
3119702785aSThomas Gleixner 	} while (version != __get_cpu_var(xen_vcpu)->time.version);
3129702785aSThomas Gleixner 
3139702785aSThomas Gleixner 	put_cpu_var(shadow_time);
3149702785aSThomas Gleixner 
3159702785aSThomas Gleixner 	return ret;
3169702785aSThomas Gleixner }
3179702785aSThomas Gleixner 
3189702785aSThomas Gleixner static void xen_read_wallclock(struct timespec *ts)
3199702785aSThomas Gleixner {
3209702785aSThomas Gleixner 	const struct shared_info *s = HYPERVISOR_shared_info;
3219702785aSThomas Gleixner 	u32 version;
3229702785aSThomas Gleixner 	u64 delta;
3239702785aSThomas Gleixner 	struct timespec now;
3249702785aSThomas Gleixner 
3259702785aSThomas Gleixner 	/* get wallclock at system boot */
3269702785aSThomas Gleixner 	do {
3279702785aSThomas Gleixner 		version = s->wc_version;
3289702785aSThomas Gleixner 		rmb();		/* fetch version before time */
3299702785aSThomas Gleixner 		now.tv_sec  = s->wc_sec;
3309702785aSThomas Gleixner 		now.tv_nsec = s->wc_nsec;
3319702785aSThomas Gleixner 		rmb();		/* fetch time before checking version */
3329702785aSThomas Gleixner 	} while ((s->wc_version & 1) | (version ^ s->wc_version));
3339702785aSThomas Gleixner 
3349702785aSThomas Gleixner 	delta = xen_clocksource_read();	/* time since system boot */
3359702785aSThomas Gleixner 	delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec;
3369702785aSThomas Gleixner 
3379702785aSThomas Gleixner 	now.tv_nsec = do_div(delta, NSEC_PER_SEC);
3389702785aSThomas Gleixner 	now.tv_sec = delta;
3399702785aSThomas Gleixner 
3409702785aSThomas Gleixner 	set_normalized_timespec(ts, now.tv_sec, now.tv_nsec);
3419702785aSThomas Gleixner }
3429702785aSThomas Gleixner 
3439702785aSThomas Gleixner unsigned long xen_get_wallclock(void)
3449702785aSThomas Gleixner {
3459702785aSThomas Gleixner 	struct timespec ts;
3469702785aSThomas Gleixner 
3479702785aSThomas Gleixner 	xen_read_wallclock(&ts);
3489702785aSThomas Gleixner 
3499702785aSThomas Gleixner 	return ts.tv_sec;
3509702785aSThomas Gleixner }
3519702785aSThomas Gleixner 
3529702785aSThomas Gleixner int xen_set_wallclock(unsigned long now)
3539702785aSThomas Gleixner {
3549702785aSThomas Gleixner 	/* do nothing for domU */
3559702785aSThomas Gleixner 	return -1;
3569702785aSThomas Gleixner }
3579702785aSThomas Gleixner 
3589702785aSThomas Gleixner static struct clocksource xen_clocksource __read_mostly = {
3599702785aSThomas Gleixner 	.name = "xen",
3609702785aSThomas Gleixner 	.rating = 400,
3619702785aSThomas Gleixner 	.read = xen_clocksource_read,
3629702785aSThomas Gleixner 	.mask = ~0,
3639702785aSThomas Gleixner 	.mult = 1<<XEN_SHIFT,		/* time directly in nanoseconds */
3649702785aSThomas Gleixner 	.shift = XEN_SHIFT,
3659702785aSThomas Gleixner 	.flags = CLOCK_SOURCE_IS_CONTINUOUS,
3669702785aSThomas Gleixner };
3679702785aSThomas Gleixner 
3689702785aSThomas Gleixner /*
3699702785aSThomas Gleixner    Xen clockevent implementation
3709702785aSThomas Gleixner 
3719702785aSThomas Gleixner    Xen has two clockevent implementations:
3729702785aSThomas Gleixner 
3739702785aSThomas Gleixner    The old timer_op one works with all released versions of Xen prior
3749702785aSThomas Gleixner    to version 3.0.4.  This version of the hypervisor provides a
3759702785aSThomas Gleixner    single-shot timer with nanosecond resolution.  However, sharing the
3769702785aSThomas Gleixner    same event channel is a 100Hz tick which is delivered while the
3779702785aSThomas Gleixner    vcpu is running.  We don't care about or use this tick, but it will
3789702785aSThomas Gleixner    cause the core time code to think the timer fired too soon, and
3799702785aSThomas Gleixner    will end up resetting it each time.  It could be filtered, but
3809702785aSThomas Gleixner    doing so has complications when the ktime clocksource is not yet
3819702785aSThomas Gleixner    the xen clocksource (ie, at boot time).
3829702785aSThomas Gleixner 
3839702785aSThomas Gleixner    The new vcpu_op-based timer interface allows the tick timer period
3849702785aSThomas Gleixner    to be changed or turned off.  The tick timer is not useful as a
3859702785aSThomas Gleixner    periodic timer because events are only delivered to running vcpus.
3869702785aSThomas Gleixner    The one-shot timer can report when a timeout is in the past, so
3879702785aSThomas Gleixner    set_next_event is capable of returning -ETIME when appropriate.
3889702785aSThomas Gleixner    This interface is used when available.
3899702785aSThomas Gleixner */
3909702785aSThomas Gleixner 
3919702785aSThomas Gleixner 
3929702785aSThomas Gleixner /*
3939702785aSThomas Gleixner   Get a hypervisor absolute time.  In theory we could maintain an
3949702785aSThomas Gleixner   offset between the kernel's time and the hypervisor's time, and
3959702785aSThomas Gleixner   apply that to a kernel's absolute timeout.  Unfortunately the
3969702785aSThomas Gleixner   hypervisor and kernel times can drift even if the kernel is using
3979702785aSThomas Gleixner   the Xen clocksource, because ntp can warp the kernel's clocksource.
3989702785aSThomas Gleixner */
3999702785aSThomas Gleixner static s64 get_abs_timeout(unsigned long delta)
4009702785aSThomas Gleixner {
4019702785aSThomas Gleixner 	return xen_clocksource_read() + delta;
4029702785aSThomas Gleixner }
4039702785aSThomas Gleixner 
4049702785aSThomas Gleixner static void xen_timerop_set_mode(enum clock_event_mode mode,
4059702785aSThomas Gleixner 				 struct clock_event_device *evt)
4069702785aSThomas Gleixner {
4079702785aSThomas Gleixner 	switch (mode) {
4089702785aSThomas Gleixner 	case CLOCK_EVT_MODE_PERIODIC:
4099702785aSThomas Gleixner 		/* unsupported */
4109702785aSThomas Gleixner 		WARN_ON(1);
4119702785aSThomas Gleixner 		break;
4129702785aSThomas Gleixner 
4139702785aSThomas Gleixner 	case CLOCK_EVT_MODE_ONESHOT:
4149702785aSThomas Gleixner 	case CLOCK_EVT_MODE_RESUME:
4159702785aSThomas Gleixner 		break;
4169702785aSThomas Gleixner 
4179702785aSThomas Gleixner 	case CLOCK_EVT_MODE_UNUSED:
4189702785aSThomas Gleixner 	case CLOCK_EVT_MODE_SHUTDOWN:
4199702785aSThomas Gleixner 		HYPERVISOR_set_timer_op(0);  /* cancel timeout */
4209702785aSThomas Gleixner 		break;
4219702785aSThomas Gleixner 	}
4229702785aSThomas Gleixner }
4239702785aSThomas Gleixner 
4249702785aSThomas Gleixner static int xen_timerop_set_next_event(unsigned long delta,
4259702785aSThomas Gleixner 				      struct clock_event_device *evt)
4269702785aSThomas Gleixner {
4279702785aSThomas Gleixner 	WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
4289702785aSThomas Gleixner 
4299702785aSThomas Gleixner 	if (HYPERVISOR_set_timer_op(get_abs_timeout(delta)) < 0)
4309702785aSThomas Gleixner 		BUG();
4319702785aSThomas Gleixner 
4329702785aSThomas Gleixner 	/* We may have missed the deadline, but there's no real way of
4339702785aSThomas Gleixner 	   knowing for sure.  If the event was in the past, then we'll
4349702785aSThomas Gleixner 	   get an immediate interrupt. */
4359702785aSThomas Gleixner 
4369702785aSThomas Gleixner 	return 0;
4379702785aSThomas Gleixner }
4389702785aSThomas Gleixner 
4399702785aSThomas Gleixner static const struct clock_event_device xen_timerop_clockevent = {
4409702785aSThomas Gleixner 	.name = "xen",
4419702785aSThomas Gleixner 	.features = CLOCK_EVT_FEAT_ONESHOT,
4429702785aSThomas Gleixner 
4439702785aSThomas Gleixner 	.max_delta_ns = 0xffffffff,
4449702785aSThomas Gleixner 	.min_delta_ns = TIMER_SLOP,
4459702785aSThomas Gleixner 
4469702785aSThomas Gleixner 	.mult = 1,
4479702785aSThomas Gleixner 	.shift = 0,
4489702785aSThomas Gleixner 	.rating = 500,
4499702785aSThomas Gleixner 
4509702785aSThomas Gleixner 	.set_mode = xen_timerop_set_mode,
4519702785aSThomas Gleixner 	.set_next_event = xen_timerop_set_next_event,
4529702785aSThomas Gleixner };
4539702785aSThomas Gleixner 
4549702785aSThomas Gleixner 
4559702785aSThomas Gleixner 
4569702785aSThomas Gleixner static void xen_vcpuop_set_mode(enum clock_event_mode mode,
4579702785aSThomas Gleixner 				struct clock_event_device *evt)
4589702785aSThomas Gleixner {
4599702785aSThomas Gleixner 	int cpu = smp_processor_id();
4609702785aSThomas Gleixner 
4619702785aSThomas Gleixner 	switch (mode) {
4629702785aSThomas Gleixner 	case CLOCK_EVT_MODE_PERIODIC:
4639702785aSThomas Gleixner 		WARN_ON(1);	/* unsupported */
4649702785aSThomas Gleixner 		break;
4659702785aSThomas Gleixner 
4669702785aSThomas Gleixner 	case CLOCK_EVT_MODE_ONESHOT:
4679702785aSThomas Gleixner 		if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
4689702785aSThomas Gleixner 			BUG();
4699702785aSThomas Gleixner 		break;
4709702785aSThomas Gleixner 
4719702785aSThomas Gleixner 	case CLOCK_EVT_MODE_UNUSED:
4729702785aSThomas Gleixner 	case CLOCK_EVT_MODE_SHUTDOWN:
4739702785aSThomas Gleixner 		if (HYPERVISOR_vcpu_op(VCPUOP_stop_singleshot_timer, cpu, NULL) ||
4749702785aSThomas Gleixner 		    HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL))
4759702785aSThomas Gleixner 			BUG();
4769702785aSThomas Gleixner 		break;
4779702785aSThomas Gleixner 	case CLOCK_EVT_MODE_RESUME:
4789702785aSThomas Gleixner 		break;
4799702785aSThomas Gleixner 	}
4809702785aSThomas Gleixner }
4819702785aSThomas Gleixner 
4829702785aSThomas Gleixner static int xen_vcpuop_set_next_event(unsigned long delta,
4839702785aSThomas Gleixner 				     struct clock_event_device *evt)
4849702785aSThomas Gleixner {
4859702785aSThomas Gleixner 	int cpu = smp_processor_id();
4869702785aSThomas Gleixner 	struct vcpu_set_singleshot_timer single;
4879702785aSThomas Gleixner 	int ret;
4889702785aSThomas Gleixner 
4899702785aSThomas Gleixner 	WARN_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT);
4909702785aSThomas Gleixner 
4919702785aSThomas Gleixner 	single.timeout_abs_ns = get_abs_timeout(delta);
4929702785aSThomas Gleixner 	single.flags = VCPU_SSHOTTMR_future;
4939702785aSThomas Gleixner 
4949702785aSThomas Gleixner 	ret = HYPERVISOR_vcpu_op(VCPUOP_set_singleshot_timer, cpu, &single);
4959702785aSThomas Gleixner 
4969702785aSThomas Gleixner 	BUG_ON(ret != 0 && ret != -ETIME);
4979702785aSThomas Gleixner 
4989702785aSThomas Gleixner 	return ret;
4999702785aSThomas Gleixner }
5009702785aSThomas Gleixner 
5019702785aSThomas Gleixner static const struct clock_event_device xen_vcpuop_clockevent = {
5029702785aSThomas Gleixner 	.name = "xen",
5039702785aSThomas Gleixner 	.features = CLOCK_EVT_FEAT_ONESHOT,
5049702785aSThomas Gleixner 
5059702785aSThomas Gleixner 	.max_delta_ns = 0xffffffff,
5069702785aSThomas Gleixner 	.min_delta_ns = TIMER_SLOP,
5079702785aSThomas Gleixner 
5089702785aSThomas Gleixner 	.mult = 1,
5099702785aSThomas Gleixner 	.shift = 0,
5109702785aSThomas Gleixner 	.rating = 500,
5119702785aSThomas Gleixner 
5129702785aSThomas Gleixner 	.set_mode = xen_vcpuop_set_mode,
5139702785aSThomas Gleixner 	.set_next_event = xen_vcpuop_set_next_event,
5149702785aSThomas Gleixner };
5159702785aSThomas Gleixner 
5169702785aSThomas Gleixner static const struct clock_event_device *xen_clockevent =
5179702785aSThomas Gleixner 	&xen_timerop_clockevent;
5189702785aSThomas Gleixner static DEFINE_PER_CPU(struct clock_event_device, xen_clock_events);
5199702785aSThomas Gleixner 
5209702785aSThomas Gleixner static irqreturn_t xen_timer_interrupt(int irq, void *dev_id)
5219702785aSThomas Gleixner {
5229702785aSThomas Gleixner 	struct clock_event_device *evt = &__get_cpu_var(xen_clock_events);
5239702785aSThomas Gleixner 	irqreturn_t ret;
5249702785aSThomas Gleixner 
5259702785aSThomas Gleixner 	ret = IRQ_NONE;
5269702785aSThomas Gleixner 	if (evt->event_handler) {
5279702785aSThomas Gleixner 		evt->event_handler(evt);
5289702785aSThomas Gleixner 		ret = IRQ_HANDLED;
5299702785aSThomas Gleixner 	}
5309702785aSThomas Gleixner 
5319702785aSThomas Gleixner 	do_stolen_accounting();
5329702785aSThomas Gleixner 
5339702785aSThomas Gleixner 	return ret;
5349702785aSThomas Gleixner }
5359702785aSThomas Gleixner 
5369702785aSThomas Gleixner void xen_setup_timer(int cpu)
5379702785aSThomas Gleixner {
5389702785aSThomas Gleixner 	const char *name;
5399702785aSThomas Gleixner 	struct clock_event_device *evt;
5409702785aSThomas Gleixner 	int irq;
5419702785aSThomas Gleixner 
5429702785aSThomas Gleixner 	printk(KERN_INFO "installing Xen timer for CPU %d\n", cpu);
5439702785aSThomas Gleixner 
5449702785aSThomas Gleixner 	name = kasprintf(GFP_KERNEL, "timer%d", cpu);
5459702785aSThomas Gleixner 	if (!name)
5469702785aSThomas Gleixner 		name = "<timer kasprintf failed>";
5479702785aSThomas Gleixner 
5489702785aSThomas Gleixner 	irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, xen_timer_interrupt,
5499702785aSThomas Gleixner 				      IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING,
5509702785aSThomas Gleixner 				      name, NULL);
5519702785aSThomas Gleixner 
5529702785aSThomas Gleixner 	evt = &per_cpu(xen_clock_events, cpu);
5539702785aSThomas Gleixner 	memcpy(evt, xen_clockevent, sizeof(*evt));
5549702785aSThomas Gleixner 
5559702785aSThomas Gleixner 	evt->cpumask = cpumask_of_cpu(cpu);
5569702785aSThomas Gleixner 	evt->irq = irq;
5579702785aSThomas Gleixner 
5589702785aSThomas Gleixner 	setup_runstate_info(cpu);
5599702785aSThomas Gleixner }
5609702785aSThomas Gleixner 
5619702785aSThomas Gleixner void xen_setup_cpu_clockevents(void)
5629702785aSThomas Gleixner {
5639702785aSThomas Gleixner 	BUG_ON(preemptible());
5649702785aSThomas Gleixner 
5659702785aSThomas Gleixner 	clockevents_register_device(&__get_cpu_var(xen_clock_events));
5669702785aSThomas Gleixner }
5679702785aSThomas Gleixner 
5689702785aSThomas Gleixner __init void xen_time_init(void)
5699702785aSThomas Gleixner {
5709702785aSThomas Gleixner 	int cpu = smp_processor_id();
5719702785aSThomas Gleixner 
5729702785aSThomas Gleixner 	get_time_values_from_xen();
5739702785aSThomas Gleixner 
5749702785aSThomas Gleixner 	clocksource_register(&xen_clocksource);
5759702785aSThomas Gleixner 
5769702785aSThomas Gleixner 	if (HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, cpu, NULL) == 0) {
5779702785aSThomas Gleixner 		/* Successfully turned off 100Hz tick, so we have the
5789702785aSThomas Gleixner 		   vcpuop-based timer interface */
5799702785aSThomas Gleixner 		printk(KERN_DEBUG "Xen: using vcpuop timer interface\n");
5809702785aSThomas Gleixner 		xen_clockevent = &xen_vcpuop_clockevent;
5819702785aSThomas Gleixner 	}
5829702785aSThomas Gleixner 
5839702785aSThomas Gleixner 	/* Set initial system time with full resolution */
5849702785aSThomas Gleixner 	xen_read_wallclock(&xtime);
5859702785aSThomas Gleixner 	set_normalized_timespec(&wall_to_monotonic,
5869702785aSThomas Gleixner 				-xtime.tv_sec, -xtime.tv_nsec);
5879702785aSThomas Gleixner 
588404ee5b1SAndi Kleen 	setup_force_cpu_cap(X86_FEATURE_TSC);
5899702785aSThomas Gleixner 
5909702785aSThomas Gleixner 	xen_setup_timer(cpu);
5919702785aSThomas Gleixner 	xen_setup_cpu_clockevents();
5929702785aSThomas Gleixner }
593