xref: /linux/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c (revision 3efc57369a0ce8f76bf0804f7e673982384e4ac9)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Copyright (C) 2018, Red Hat, Inc.
4  *
5  * Tests for Enlightened VMCS, including nested guest state.
6  */
7 #include <fcntl.h>
8 #include <stdio.h>
9 #include <stdlib.h>
10 #include <string.h>
11 #include <sys/ioctl.h>
12 #include <linux/bitmap.h>
13 
14 #include "test_util.h"
15 
16 #include "kvm_util.h"
17 
18 #include "hyperv.h"
19 #include "vmx.h"
20 
21 static int ud_count;
22 
guest_ud_handler(struct ex_regs * regs)23 static void guest_ud_handler(struct ex_regs *regs)
24 {
25 	ud_count++;
26 	regs->rip += 3; /* VMLAUNCH */
27 }
28 
guest_nmi_handler(struct ex_regs * regs)29 static void guest_nmi_handler(struct ex_regs *regs)
30 {
31 }
32 
rdmsr_from_l2(uint32_t msr)33 static inline void rdmsr_from_l2(uint32_t msr)
34 {
35 	/* Currently, L1 doesn't preserve GPRs during vmexits. */
36 	__asm__ __volatile__ ("rdmsr" : : "c"(msr) :
37 			      "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
38 			      "r10", "r11", "r12", "r13", "r14", "r15");
39 }
40 
41 /* Exit to L1 from L2 with RDMSR instruction */
l2_guest_code(void)42 void l2_guest_code(void)
43 {
44 	u64 unused;
45 
46 	GUEST_SYNC(7);
47 
48 	GUEST_SYNC(8);
49 
50 	/* Forced exit to L1 upon restore */
51 	GUEST_SYNC(9);
52 
53 	vmcall();
54 
55 	/* MSR-Bitmap tests */
56 	rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
57 	rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
58 	rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
59 	vmcall();
60 	rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
61 
62 	/* L2 TLB flush tests */
63 	hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
64 			 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
65 	rdmsr_from_l2(MSR_FS_BASE);
66 	/*
67 	 * Note: hypercall status (RAX) is not preserved correctly by L1 after
68 	 * synthetic vmexit, use unchecked version.
69 	 */
70 	__hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
71 			   HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
72 			   &unused);
73 
74 	/* Done, exit to L1 and never come back.  */
75 	vmcall();
76 }
77 
guest_code(struct vmx_pages * vmx_pages,struct hyperv_test_pages * hv_pages,vm_vaddr_t hv_hcall_page_gpa)78 void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
79 		vm_vaddr_t hv_hcall_page_gpa)
80 {
81 #define L2_GUEST_STACK_SIZE 64
82 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
83 
84 	wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
85 	wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
86 
87 	x2apic_enable();
88 
89 	GUEST_SYNC(1);
90 	GUEST_SYNC(2);
91 
92 	enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
93 	evmcs_enable();
94 
95 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
96 	GUEST_SYNC(3);
97 	GUEST_ASSERT(load_evmcs(hv_pages));
98 	GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
99 
100 	GUEST_SYNC(4);
101 	GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
102 
103 	prepare_vmcs(vmx_pages, l2_guest_code,
104 		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
105 
106 	GUEST_SYNC(5);
107 	GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
108 	current_evmcs->revision_id = -1u;
109 	GUEST_ASSERT(vmlaunch());
110 	current_evmcs->revision_id = EVMCS_VERSION;
111 	GUEST_SYNC(6);
112 
113 	vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
114 		PIN_BASED_NMI_EXITING);
115 
116 	/* L2 TLB flush setup */
117 	current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
118 	current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
119 	current_evmcs->hv_vm_id = 1;
120 	current_evmcs->hv_vp_id = 1;
121 	current_vp_assist->nested_control.features.directhypercall = 1;
122 	*(u32 *)(hv_pages->partition_assist) = 0;
123 
124 	GUEST_ASSERT(!vmlaunch());
125 	GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
126 	GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
127 	GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
128 
129 	/*
130 	 * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
131 	 * up-to-date (RIP points where it should and not at the beginning
132 	 * of l2_guest_code(). GUEST_SYNC(9) checkes that.
133 	 */
134 	GUEST_ASSERT(!vmresume());
135 
136 	GUEST_SYNC(10);
137 
138 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
139 	current_evmcs->guest_rip += 3; /* vmcall */
140 
141 	/* Intercept RDMSR 0xc0000100 */
142 	vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
143 		CPU_BASED_USE_MSR_BITMAPS);
144 	__set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
145 	GUEST_ASSERT(!vmresume());
146 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
147 	current_evmcs->guest_rip += 2; /* rdmsr */
148 
149 	/* Enable enlightened MSR bitmap */
150 	current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
151 	GUEST_ASSERT(!vmresume());
152 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
153 	current_evmcs->guest_rip += 2; /* rdmsr */
154 
155 	/* Intercept RDMSR 0xc0000101 without telling KVM about it */
156 	__set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
157 	/* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
158 	current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
159 	GUEST_ASSERT(!vmresume());
160 	/* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
161 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
162 	current_evmcs->guest_rip += 3; /* vmcall */
163 
164 	/* Now tell KVM we've changed MSR-Bitmap */
165 	current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
166 	GUEST_ASSERT(!vmresume());
167 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
168 	current_evmcs->guest_rip += 2; /* rdmsr */
169 
170 	/*
171 	 * L2 TLB flush test. First VMCALL should be handled directly by L0,
172 	 * no VMCALL exit expected.
173 	 */
174 	GUEST_ASSERT(!vmresume());
175 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
176 	current_evmcs->guest_rip += 2; /* rdmsr */
177 	/* Enable synthetic vmexit */
178 	*(u32 *)(hv_pages->partition_assist) = 1;
179 	GUEST_ASSERT(!vmresume());
180 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
181 
182 	GUEST_ASSERT(!vmresume());
183 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
184 	GUEST_SYNC(11);
185 
186 	/* Try enlightened vmptrld with an incorrect GPA */
187 	evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
188 	GUEST_ASSERT(vmlaunch());
189 	GUEST_ASSERT(ud_count == 1);
190 	GUEST_DONE();
191 }
192 
inject_nmi(struct kvm_vcpu * vcpu)193 void inject_nmi(struct kvm_vcpu *vcpu)
194 {
195 	struct kvm_vcpu_events events;
196 
197 	vcpu_events_get(vcpu, &events);
198 
199 	events.nmi.pending = 1;
200 	events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
201 
202 	vcpu_events_set(vcpu, &events);
203 }
204 
save_restore_vm(struct kvm_vm * vm,struct kvm_vcpu * vcpu)205 static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
206 					struct kvm_vcpu *vcpu)
207 {
208 	struct kvm_regs regs1, regs2;
209 	struct kvm_x86_state *state;
210 
211 	state = vcpu_save_state(vcpu);
212 	memset(&regs1, 0, sizeof(regs1));
213 	vcpu_regs_get(vcpu, &regs1);
214 
215 	kvm_vm_release(vm);
216 
217 	/* Restore state in a new VM.  */
218 	vcpu = vm_recreate_with_one_vcpu(vm);
219 	vcpu_set_hv_cpuid(vcpu);
220 	vcpu_enable_evmcs(vcpu);
221 	vcpu_load_state(vcpu, state);
222 	kvm_x86_state_cleanup(state);
223 
224 	memset(&regs2, 0, sizeof(regs2));
225 	vcpu_regs_get(vcpu, &regs2);
226 	TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
227 		    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
228 		    (ulong) regs2.rdi, (ulong) regs2.rsi);
229 	return vcpu;
230 }
231 
main(int argc,char * argv[])232 int main(int argc, char *argv[])
233 {
234 	vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
235 	vm_vaddr_t hcall_page;
236 
237 	struct kvm_vcpu *vcpu;
238 	struct kvm_vm *vm;
239 	struct ucall uc;
240 	int stage;
241 
242 	TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
243 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
244 	TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
245 	TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH));
246 
247 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
248 
249 	hcall_page = vm_vaddr_alloc_pages(vm, 1);
250 	memset(addr_gva2hva(vm, hcall_page), 0x0,  getpagesize());
251 
252 	vcpu_set_hv_cpuid(vcpu);
253 	vcpu_enable_evmcs(vcpu);
254 
255 	vcpu_alloc_vmx(vm, &vmx_pages_gva);
256 	vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
257 	vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
258 	vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
259 
260 	vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
261 	vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
262 
263 	pr_info("Running L1 which uses EVMCS to run L2\n");
264 
265 	for (stage = 1;; stage++) {
266 		vcpu_run(vcpu);
267 		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
268 
269 		switch (get_ucall(vcpu, &uc)) {
270 		case UCALL_ABORT:
271 			REPORT_GUEST_ASSERT(uc);
272 			/* NOT REACHED */
273 		case UCALL_SYNC:
274 			break;
275 		case UCALL_DONE:
276 			goto done;
277 		default:
278 			TEST_FAIL("Unknown ucall %lu", uc.cmd);
279 		}
280 
281 		/* UCALL_SYNC is handled here.  */
282 		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
283 			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
284 			    stage, (ulong)uc.args[1]);
285 
286 		vcpu = save_restore_vm(vm, vcpu);
287 
288 		/* Force immediate L2->L1 exit before resuming */
289 		if (stage == 8) {
290 			pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
291 			inject_nmi(vcpu);
292 		}
293 
294 		/*
295 		 * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
296 		 * restored VM (before the first KVM_RUN) to check that
297 		 * KVM_STATE_NESTED_EVMCS is not lost.
298 		 */
299 		if (stage == 9) {
300 			pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
301 			vcpu = save_restore_vm(vm, vcpu);
302 		}
303 	}
304 
305 done:
306 	kvm_vm_free(vm);
307 }
308