1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2016 Joyent, Inc.
26 */
27
28 #include <sys/types.h>
29 #include <sys/clock.h>
30 #include <sys/panic.h>
31 #include <sys/atomic.h>
32 #include <sys/hypervisor.h>
33
34 #include <sys/archsystm.h>
35
36 /*
37 * On the hypervisor, we have a virtualized system time based upon the
38 * information provided for each VCPU, which is updated every time it is
39 * scheduled onto a real CPU. Thus, none of the traditional code in
40 * i86pc/os/timestamp.c applies, our gethrtime() implementation is run through
41 * the PSM, and there is no scaling step to apply.
42 *
43 * However, the platform does not guarantee monotonicity; thus we have to fake
44 * this up, which is a deeply unpleasant thing to have to do.
45 *
46 * Note that the virtualized interface still relies on the current TSC to
47 * calculate the time in nanoseconds since the VCPU was scheduled, and is thus
48 * subject to all the problems with that. For the most part, the hypervisor is
49 * supposed to deal with them.
50 *
51 * Another wrinkle involves suspend/resume/migration. If we come back and time
52 * is apparently less, we may have resumed on a different machine or on the
53 * same machine after a reboot. In this case we need to maintain an addend to
54 * ensure time continues reasonably. Otherwise we could end up taking a very
55 * long time to expire cyclics in the heap. Thus we have two functions:
56 *
57 * xpv_getsystime()
58 *
59 * The unadulterated system time from the hypervisor. This is only to be
60 * used when programming the hypervisor (setting a timer or calculating
61 * the TOD).
62 *
63 * xpv_gethrtime()
64 *
65 * This is the monotonic hrtime counter to be used by everything else such
66 * as the cyclic subsystem. We should never pass an hrtime directly into
67 * a hypervisor interface, as hrtime_addend may well be non-zero.
68 */
69
70 int hrtime_fake_mt = 1;
71 static volatile hrtime_t hrtime_last;
72 static hrtime_t hrtime_suspend_time;
73 static hrtime_t hrtime_addend;
74
75 volatile uint32_t hres_lock;
76 hrtime_t hres_last_tick;
77 int64_t hrestime_adj;
78 volatile timestruc_t hrestime;
79
80 /*
81 * These functions are used in DTrace probe context, and must be removed from
82 * fbt consideration. Currently fbt ignores all weak symbols, so this will
83 * achieve that.
84 */
85 #pragma weak xpv_gethrtime = dtrace_xpv_gethrtime
86 #pragma weak xpv_getsystime = dtrace_xpv_getsystime
87 #pragma weak dtrace_gethrtime = dtrace_xpv_gethrtime
88 #pragma weak tsc_read = dtrace_xpv_gethrtime
89
90 hrtime_t
dtrace_xpv_getsystime(void)91 dtrace_xpv_getsystime(void)
92 {
93 vcpu_time_info_t *src;
94 vcpu_time_info_t __vti, *dst = &__vti;
95 uint64_t tsc_delta;
96 uint64_t tsc;
97 hrtime_t result;
98 uint32_t stamp;
99
100 src = &CPU->cpu_m.mcpu_vcpu_info->time;
101
102 /*
103 * Loop until version has not been changed during our update, and a Xen
104 * update is not under way (lowest bit is set).
105 */
106 do {
107 dst->version = src->version;
108 stamp = CPU->cpu_m.mcpu_istamp;
109
110 membar_consumer();
111
112 dst->tsc_timestamp = src->tsc_timestamp;
113 dst->system_time = src->system_time;
114 dst->tsc_to_system_mul = src->tsc_to_system_mul;
115 dst->tsc_shift = src->tsc_shift;
116
117 /*
118 * Note that this use of the -actual- TSC register
119 * should probably be the SOLE one in the system on this
120 * paravirtualized platform.
121 */
122 tsc = __rdtsc_insn();
123 tsc_delta = tsc - dst->tsc_timestamp;
124
125 membar_consumer();
126
127 } while (((src->version & 1) | (dst->version ^ src->version)) ||
128 CPU->cpu_m.mcpu_istamp != stamp);
129
130 if (dst->tsc_shift >= 0)
131 tsc_delta <<= dst->tsc_shift;
132 else if (dst->tsc_shift < 0)
133 tsc_delta >>= -dst->tsc_shift;
134
135 result = dst->system_time +
136 ((uint64_t)(tsc_delta * (uint64_t)dst->tsc_to_system_mul) >> 32);
137
138 return (result);
139 }
140
141 hrtime_t
dtrace_xpv_gethrtime(void)142 dtrace_xpv_gethrtime(void)
143 {
144 hrtime_t result = xpv_getsystime() + hrtime_addend;
145
146 if (hrtime_fake_mt) {
147 hrtime_t last;
148 do {
149 last = hrtime_last;
150 if (result < last)
151 result = last + 1;
152 } while (atomic_cas_64((volatile uint64_t *)&hrtime_last,
153 last, result) != last);
154 }
155
156 return (result);
157 }
158
159 void
xpv_time_suspend(void)160 xpv_time_suspend(void)
161 {
162 hrtime_suspend_time = xpv_getsystime();
163 }
164
165 void
xpv_time_resume(void)166 xpv_time_resume(void)
167 {
168 hrtime_t delta = xpv_getsystime() - hrtime_suspend_time;
169
170 if (delta < 0)
171 hrtime_addend += -delta;
172 }
173