xref: /titanic_44/usr/src/uts/i86xpv/os/xpv_timestamp.c (revision 990b4856d0eaada6f8140335733a1b1771ed2746)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/types.h>
30 #include <sys/clock.h>
31 #include <sys/panic.h>
32 #include <sys/atomic.h>
33 #include <sys/hypervisor.h>
34 
35 #include <sys/archsystm.h>
36 
37 /*
38  * On the hypervisor, we have a virtualized system time based upon the
39  * information provided for each VCPU, which is updated every time it is
40  * scheduled onto a real CPU.  Thus, none of the traditional code in
41  * i86pc/os/timestamp.c applies, our gethrtime() implementation is run through
42  * the PSM, and there is no scaling step to apply.
43  *
44  * However, the platform does not guarantee monotonicity; thus we have to fake
45  * this up, which is a deeply unpleasant thing to have to do.
46  *
47  * Note that the virtualized interface still relies on the current TSC to
48  * calculate the time in nanoseconds since the VCPU was scheduled, and is thus
49  * subject to all the problems with that.  For the most part, the hypervisor is
50  * supposed to deal with them.
51  *
52  * Another wrinkle involves suspend/resume/migration.  If we come back and time
53  * is apparently less, we may have resumed on a different machine or on the
54  * same machine after a reboot.  In this case we need to maintain an addend to
55  * ensure time continues reasonably.  Otherwise we could end up taking a very
56  * long time to expire cyclics in the heap.  Thus we have two functions:
57  *
58  * xpv_getsystime()
59  *
60  *	The unadulterated system time from the hypervisor.  This is only to be
61  *	used when programming the hypervisor (setting a timer or calculating
62  *	the TOD).
63  *
64  * xpv_gethrtime()
65  *
66  *	This is the monotonic hrtime counter to be used by everything else such
67  *	as the cyclic subsystem.  We should never pass an hrtime directly into
68  *	a hypervisor interface, as hrtime_addend may well be non-zero.
69  */
70 
71 static volatile hrtime_t hrtime_last;
72 static int hrtime_fake_mt = 1;
73 static hrtime_t hrtime_suspend_time;
74 static hrtime_t hrtime_addend;
75 
76 /*
77  * These functions are used in DTrace probe context, and must be removed from
78  * fbt consideration.  Currently fbt ignores all weak symbols, so this will
79  * achieve that.
80  */
81 #pragma weak xpv_gethrtime = dtrace_xpv_gethrtime
82 #pragma weak xpv_getsystime = dtrace_xpv_getsystime
83 #pragma weak dtrace_gethrtime = dtrace_xpv_gethrtime
84 #pragma weak tsc_read = dtrace_xpv_gethrtime
85 
86 hrtime_t
87 dtrace_xpv_getsystime(void)
88 {
89 	vcpu_time_info_t *src;
90 	vcpu_time_info_t __vti, *dst = &__vti;
91 	uint64_t tsc_delta;
92 	kthread_t *t = curthread;
93 	uint64_t tsc;
94 	hrtime_t result;
95 
96 	/*
97 	 * This stops us from wandering off the virtual cpu.
98 	 */
99 	t->t_preempt++;
100 
101 	src = &CPU->cpu_m.mcpu_vcpu_info->time;
102 
103 	/*
104 	 * Loop until version has not been changed during our update, and a Xen
105 	 * update is not under way (lowest bit is set).
106 	 */
107 	do {
108 		dst->version = src->version;
109 
110 		membar_consumer();
111 
112 		dst->tsc_timestamp = src->tsc_timestamp;
113 		dst->system_time = src->system_time;
114 		dst->tsc_to_system_mul = src->tsc_to_system_mul;
115 		dst->tsc_shift = src->tsc_shift;
116 
117 		/*
118 		 * Note that this use of the -actual- TSC register
119 		 * should probably be the SOLE one in the system on this
120 		 * paravirtualized platform.
121 		 */
122 		tsc = __rdtsc_insn();
123 		tsc_delta = tsc - dst->tsc_timestamp;
124 
125 		membar_consumer();
126 
127 	} while ((src->version & 1) | (dst->version ^ src->version));
128 
129 	if (dst->tsc_shift >= 0)
130 		tsc_delta <<= dst->tsc_shift;
131 	else if (dst->tsc_shift < 0)
132 		tsc_delta >>= -dst->tsc_shift;
133 
134 	result = dst->system_time +
135 	    ((uint64_t)(tsc_delta * (uint64_t)dst->tsc_to_system_mul) >> 32);
136 
137 	t->t_preempt--;
138 
139 	return (result);
140 }
141 
142 hrtime_t
143 dtrace_xpv_gethrtime(void)
144 {
145 	hrtime_t result = xpv_getsystime() + hrtime_addend;
146 
147 	if (hrtime_fake_mt) {
148 		hrtime_t last;
149 		do {
150 			last = hrtime_last;
151 			if (result < last)
152 				result = last + 1;
153 		} while (atomic_cas_64((volatile uint64_t *)&hrtime_last,
154 		    last, result) != last);
155 	}
156 
157 	return (result);
158 }
159 
160 void
161 xpv_time_suspend(void)
162 {
163 	hrtime_suspend_time = xpv_getsystime();
164 }
165 
166 void
167 xpv_time_resume(void)
168 {
169 	hrtime_t delta = xpv_getsystime() - hrtime_suspend_time;
170 
171 	if (delta < 0)
172 		hrtime_addend += -delta;
173 }
174