1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * vmx_nested_tsc_scaling_test
4 *
5 * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
6 *
7 * This test case verifies that nested TSC scaling behaves as expected when
8 * both L1 and L2 are scaled using different ratios. For this test we scale
9 * L1 down and scale L2 up.
10 */
11
12 #include <time.h>
13
14 #include "kvm_util.h"
15 #include "vmx.h"
16 #include "svm_util.h"
17 #include "kselftest.h"
18
19 /* L2 is scaled up (from L1's perspective) by this factor */
20 #define L2_SCALE_FACTOR 4ULL
21
22 #define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
23 #define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
24
25 #define L2_GUEST_STACK_SIZE 64
26
27 enum { USLEEP, UCHECK_L1, UCHECK_L2 };
28 #define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec)
29 #define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
30
31
32 /*
33 * This function checks whether the "actual" TSC frequency of a guest matches
34 * its expected frequency. In order to account for delays in taking the TSC
35 * measurements, a difference of 1% between the actual and the expected value
36 * is tolerated.
37 */
compare_tsc_freq(uint64_t actual,uint64_t expected)38 static void compare_tsc_freq(uint64_t actual, uint64_t expected)
39 {
40 uint64_t tolerance, thresh_low, thresh_high;
41
42 tolerance = expected / 100;
43 thresh_low = expected - tolerance;
44 thresh_high = expected + tolerance;
45
46 TEST_ASSERT(thresh_low < actual,
47 "TSC freq is expected to be between %"PRIu64" and %"PRIu64
48 " but it actually is %"PRIu64,
49 thresh_low, thresh_high, actual);
50 TEST_ASSERT(thresh_high > actual,
51 "TSC freq is expected to be between %"PRIu64" and %"PRIu64
52 " but it actually is %"PRIu64,
53 thresh_low, thresh_high, actual);
54 }
55
check_tsc_freq(int level)56 static void check_tsc_freq(int level)
57 {
58 uint64_t tsc_start, tsc_end, tsc_freq;
59
60 /*
61 * Reading the TSC twice with about a second's difference should give
62 * us an approximation of the TSC frequency from the guest's
63 * perspective. Now, this won't be completely accurate, but it should
64 * be good enough for the purposes of this test.
65 */
66 tsc_start = rdmsr(MSR_IA32_TSC);
67 GUEST_SLEEP(1);
68 tsc_end = rdmsr(MSR_IA32_TSC);
69
70 tsc_freq = tsc_end - tsc_start;
71
72 GUEST_CHECK(level, tsc_freq);
73 }
74
l2_guest_code(void)75 static void l2_guest_code(void)
76 {
77 check_tsc_freq(UCHECK_L2);
78
79 /* exit to L1 */
80 __asm__ __volatile__("vmcall");
81 }
82
l1_svm_code(struct svm_test_data * svm)83 static void l1_svm_code(struct svm_test_data *svm)
84 {
85 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
86
87 /* check that L1's frequency looks alright before launching L2 */
88 check_tsc_freq(UCHECK_L1);
89
90 generic_svm_setup(svm, l2_guest_code,
91 &l2_guest_stack[L2_GUEST_STACK_SIZE]);
92
93 /* enable TSC scaling for L2 */
94 wrmsr(MSR_AMD64_TSC_RATIO, L2_SCALE_FACTOR << 32);
95
96 /* launch L2 */
97 run_guest(svm->vmcb, svm->vmcb_gpa);
98 GUEST_ASSERT(svm->vmcb->control.exit_code == SVM_EXIT_VMMCALL);
99
100 /* check that L1's frequency still looks good */
101 check_tsc_freq(UCHECK_L1);
102
103 GUEST_DONE();
104 }
105
l1_vmx_code(struct vmx_pages * vmx_pages)106 static void l1_vmx_code(struct vmx_pages *vmx_pages)
107 {
108 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
109 uint32_t control;
110
111 /* check that L1's frequency looks alright before launching L2 */
112 check_tsc_freq(UCHECK_L1);
113
114 GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
115 GUEST_ASSERT(load_vmcs(vmx_pages));
116
117 /* prepare the VMCS for L2 execution */
118 prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
119
120 /* enable TSC offsetting and TSC scaling for L2 */
121 control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
122 control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
123 vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
124
125 control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
126 control |= SECONDARY_EXEC_TSC_SCALING;
127 vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
128
129 vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
130 vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
131 vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
132
133 /* launch L2 */
134 GUEST_ASSERT(!vmlaunch());
135 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
136
137 /* check that L1's frequency still looks good */
138 check_tsc_freq(UCHECK_L1);
139
140 GUEST_DONE();
141 }
142
l1_guest_code(void * data)143 static void l1_guest_code(void *data)
144 {
145 if (this_cpu_has(X86_FEATURE_VMX))
146 l1_vmx_code(data);
147 else
148 l1_svm_code(data);
149 }
150
main(int argc,char * argv[])151 int main(int argc, char *argv[])
152 {
153 struct kvm_vcpu *vcpu;
154 struct kvm_vm *vm;
155 vm_vaddr_t guest_gva = 0;
156
157 uint64_t tsc_start, tsc_end;
158 uint64_t tsc_khz;
159 uint64_t l1_scale_factor;
160 uint64_t l0_tsc_freq = 0;
161 uint64_t l1_tsc_freq = 0;
162 uint64_t l2_tsc_freq = 0;
163
164 TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX) ||
165 kvm_cpu_has(X86_FEATURE_SVM));
166 TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
167 TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
168
169 /*
170 * We set L1's scale factor to be a random number from 2 to 10.
171 * Ideally we would do the same for L2's factor but that one is
172 * referenced by both main() and l1_guest_code() and using a global
173 * variable does not work.
174 */
175 srand(time(NULL));
176 l1_scale_factor = (rand() % 9) + 2;
177 printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
178 printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
179
180 tsc_start = rdtsc();
181 sleep(1);
182 tsc_end = rdtsc();
183
184 l0_tsc_freq = tsc_end - tsc_start;
185 printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
186
187 vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
188
189 if (kvm_cpu_has(X86_FEATURE_VMX))
190 vcpu_alloc_vmx(vm, &guest_gva);
191 else
192 vcpu_alloc_svm(vm, &guest_gva);
193
194 vcpu_args_set(vcpu, 1, guest_gva);
195
196 tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
197 TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
198
199 /* scale down L1's TSC frequency */
200 vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
201
202 for (;;) {
203 struct ucall uc;
204
205 vcpu_run(vcpu);
206 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
207
208 switch (get_ucall(vcpu, &uc)) {
209 case UCALL_ABORT:
210 REPORT_GUEST_ASSERT(uc);
211 case UCALL_SYNC:
212 switch (uc.args[0]) {
213 case USLEEP:
214 sleep(uc.args[1]);
215 break;
216 case UCHECK_L1:
217 l1_tsc_freq = uc.args[1];
218 printf("L1's TSC frequency is around: %"PRIu64
219 "\n", l1_tsc_freq);
220
221 compare_tsc_freq(l1_tsc_freq,
222 l0_tsc_freq / l1_scale_factor);
223 break;
224 case UCHECK_L2:
225 l2_tsc_freq = uc.args[1];
226 printf("L2's TSC frequency is around: %"PRIu64
227 "\n", l2_tsc_freq);
228
229 compare_tsc_freq(l2_tsc_freq,
230 l1_tsc_freq * L2_SCALE_FACTOR);
231 break;
232 }
233 break;
234 case UCALL_DONE:
235 goto done;
236 default:
237 TEST_FAIL("Unknown ucall %lu", uc.cmd);
238 }
239 }
240
241 done:
242 kvm_vm_free(vm);
243 return 0;
244 }
245