xref: /linux/tools/testing/selftests/kvm/x86/state_test.c (revision e5cdd34b5f74c4a0c72fe43092192f347d999e77)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * KVM_GET/SET_* tests
4  *
5  * Copyright (C) 2018, Red Hat, Inc.
6  *
7  * Tests for vCPU state save/restore, including nested guest state.
8  */
9 #include <fcntl.h>
10 #include <stdio.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/ioctl.h>
14 
15 #include "test_util.h"
16 
17 #include "kvm_util.h"
18 #include "processor.h"
19 #include "vmx.h"
20 #include "svm_util.h"
21 
22 #define L2_GUEST_STACK_SIZE 256
23 
24 void svm_l2_guest_code(void)
25 {
26 	GUEST_SYNC(4);
27 	/* Exit to L1 */
28 	vmcall();
29 	clgi();
30 	GUEST_SYNC(6);
31 	stgi();
32 	/* Done, exit to L1 and never come back.  */
33 	vmcall();
34 }
35 
36 static void svm_l1_guest_code(struct svm_test_data *svm)
37 {
38 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
39 	struct vmcb *vmcb = svm->vmcb;
40 
41 	GUEST_ASSERT(svm->vmcb_gpa);
42 	/* Prepare for L2 execution. */
43 	generic_svm_setup(svm, svm_l2_guest_code,
44 			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
45 
46 	vmcb->control.int_ctl |= (V_GIF_ENABLE_MASK | V_GIF_MASK);
47 
48 	GUEST_SYNC(3);
49 	run_guest(vmcb, svm->vmcb_gpa);
50 	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
51 	GUEST_SYNC(5);
52 	vmcb->save.rip += 3;
53 	run_guest(vmcb, svm->vmcb_gpa);
54 	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
55 	GUEST_SYNC(7);
56 }
57 
58 void vmx_l2_guest_code(void)
59 {
60 	GUEST_SYNC(6);
61 
62 	/* Exit to L1 */
63 	vmcall();
64 
65 	/* L1 has now set up a shadow VMCS for us.  */
66 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
67 	GUEST_SYNC(10);
68 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
69 	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee));
70 	GUEST_SYNC(11);
71 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee);
72 	GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee));
73 	GUEST_SYNC(12);
74 
75 	/* Done, exit to L1 and never come back.  */
76 	vmcall();
77 }
78 
79 static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
80 {
81 	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
82 
83 	GUEST_ASSERT(vmx_pages->vmcs_gpa);
84 	GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
85 	GUEST_SYNC(3);
86 	GUEST_ASSERT(load_vmcs(vmx_pages));
87 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
88 
89 	GUEST_SYNC(4);
90 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
91 
92 	prepare_vmcs(vmx_pages, vmx_l2_guest_code,
93 		     &l2_guest_stack[L2_GUEST_STACK_SIZE]);
94 
95 	GUEST_SYNC(5);
96 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
97 	GUEST_ASSERT(!vmlaunch());
98 	GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa);
99 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
100 
101 	/* Check that the launched state is preserved.  */
102 	GUEST_ASSERT(vmlaunch());
103 
104 	GUEST_ASSERT(!vmresume());
105 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
106 
107 	GUEST_SYNC(7);
108 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
109 
110 	GUEST_ASSERT(!vmresume());
111 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
112 
113 	vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3);
114 
115 	vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS);
116 	vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa);
117 
118 	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
119 	GUEST_ASSERT(vmlaunch());
120 	GUEST_SYNC(8);
121 	GUEST_ASSERT(vmlaunch());
122 	GUEST_ASSERT(vmresume());
123 
124 	vmwrite(GUEST_RIP, 0xc0ffee);
125 	GUEST_SYNC(9);
126 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee);
127 
128 	GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa));
129 	GUEST_ASSERT(!vmresume());
130 	GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
131 
132 	GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa));
133 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
134 	GUEST_ASSERT(vmlaunch());
135 	GUEST_ASSERT(vmresume());
136 	GUEST_SYNC(13);
137 	GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee);
138 	GUEST_ASSERT(vmlaunch());
139 	GUEST_ASSERT(vmresume());
140 }
141 
142 static void __attribute__((__flatten__)) guest_code(void *arg)
143 {
144 	GUEST_SYNC(1);
145 
146 	if (this_cpu_has(X86_FEATURE_XSAVE)) {
147 		uint64_t supported_xcr0 = this_cpu_supported_xcr0();
148 		uint8_t buffer[PAGE_SIZE];
149 
150 		memset(buffer, 0xcc, sizeof(buffer));
151 
152 		/*
153 		 * Modify state for all supported xfeatures to take them out of
154 		 * their "init" state, i.e. to make them show up in XSTATE_BV.
155 		 *
156 		 * Note off-by-default features, e.g. AMX, are out of scope for
157 		 * this particular testcase as they have a different ABI.
158 		 */
159 		GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
160 		asm volatile ("fincstp");
161 
162 		GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
163 		asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
164 
165 		if (supported_xcr0 & XFEATURE_MASK_YMM)
166 			asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
167 
168 		if (supported_xcr0 & XFEATURE_MASK_AVX512) {
169 			asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
170 			asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
171 			asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
172 		}
173 
174 		if (this_cpu_has(X86_FEATURE_MPX)) {
175 			uint64_t bounds[2] = { 10, 0xffffffffull };
176 			uint64_t output[2] = { };
177 
178 			GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
179 			GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
180 
181 			/*
182 			 * Don't bother trying to get BNDCSR into the INUSE
183 			 * state.  MSR_IA32_BNDCFGS doesn't count as it isn't
184 			 * managed via XSAVE/XRSTOR, and BNDCFGU can only be
185 			 * modified by XRSTOR.  Stuffing XSTATE_BV in the host
186 			 * is simpler than doing XRSTOR here in the guest.
187 			 *
188 			 * However, temporarily enable MPX in BNDCFGS so that
189 			 * BNDMOV actually loads BND1.  If MPX isn't *fully*
190 			 * enabled, all MPX instructions are treated as NOPs.
191 			 *
192 			 * Hand encode "bndmov (%rax),%bnd1" as support for MPX
193 			 * mnemonics/registers has been removed from gcc and
194 			 * clang (and was never fully supported by clang).
195 			 */
196 			wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
197 			asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
198 			/*
199 			 * Hand encode "bndmov %bnd1, (%rax)" to sanity check
200 			 * that BND1 actually got loaded.
201 			 */
202 			asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
203 			wrmsr(MSR_IA32_BNDCFGS, 0);
204 
205 			GUEST_ASSERT_EQ(bounds[0], output[0]);
206 			GUEST_ASSERT_EQ(bounds[1], output[1]);
207 		}
208 		if (this_cpu_has(X86_FEATURE_PKU)) {
209 			GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
210 			set_cr4(get_cr4() | X86_CR4_PKE);
211 			GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
212 
213 			wrpkru(-1u);
214 		}
215 	}
216 
217 	GUEST_SYNC(2);
218 
219 	if (arg) {
220 		if (this_cpu_has(X86_FEATURE_SVM))
221 			svm_l1_guest_code(arg);
222 		else
223 			vmx_l1_guest_code(arg);
224 	}
225 
226 	GUEST_DONE();
227 }
228 
229 void svm_check_nested_state(int stage, struct kvm_x86_state *state)
230 {
231 	struct vmcb *vmcb = (struct vmcb *)state->nested.data.svm;
232 
233 	if (kvm_cpu_has(X86_FEATURE_VGIF)) {
234 		if (stage == 4)
235 			TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 1);
236 		if (stage == 6)
237 			TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 0);
238 	}
239 
240 	if (kvm_cpu_has(X86_FEATURE_NRIPS)) {
241 		/*
242 		 * GUEST_SYNC() causes IO emulation in KVM, in which case the
243 		 * RIP is advanced before exiting to userspace. Hence, the RIP
244 		 * in the saved state should be the same as nRIP saved by the
245 		 * CPU in the VMCB.
246 		 */
247 		if (stage == 6)
248 			TEST_ASSERT_EQ(vmcb->control.next_rip, state->regs.rip);
249 	}
250 }
251 
252 void check_nested_state(int stage, struct kvm_x86_state *state)
253 {
254 	if (kvm_has_cap(KVM_CAP_NESTED_STATE) && kvm_cpu_has(X86_FEATURE_SVM))
255 		svm_check_nested_state(stage, state);
256 }
257 
258 int main(int argc, char *argv[])
259 {
260 	uint64_t *xstate_bv, saved_xstate_bv;
261 	vm_vaddr_t nested_gva = 0;
262 	struct kvm_cpuid2 empty_cpuid = {};
263 	struct kvm_regs regs1, regs2;
264 	struct kvm_vcpu *vcpu, *vcpuN;
265 	struct kvm_vm *vm;
266 	struct kvm_x86_state *state;
267 	struct ucall uc;
268 	int stage;
269 
270 	/* Create VM */
271 	vm = vm_create_with_one_vcpu(&vcpu, guest_code);
272 
273 	vcpu_regs_get(vcpu, &regs1);
274 
275 	if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
276 		if (kvm_cpu_has(X86_FEATURE_SVM))
277 			vcpu_alloc_svm(vm, &nested_gva);
278 		else if (kvm_cpu_has(X86_FEATURE_VMX))
279 			vcpu_alloc_vmx(vm, &nested_gva);
280 	}
281 
282 	if (!nested_gva)
283 		pr_info("will skip nested state checks\n");
284 
285 	vcpu_args_set(vcpu, 1, nested_gva);
286 
287 	for (stage = 1;; stage++) {
288 		vcpu_run(vcpu);
289 		TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
290 
291 		switch (get_ucall(vcpu, &uc)) {
292 		case UCALL_ABORT:
293 			REPORT_GUEST_ASSERT(uc);
294 			/* NOT REACHED */
295 		case UCALL_SYNC:
296 			break;
297 		case UCALL_DONE:
298 			goto done;
299 		default:
300 			TEST_FAIL("Unknown ucall %lu", uc.cmd);
301 		}
302 
303 		/* UCALL_SYNC is handled here.  */
304 		TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
305 			    uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
306 			    stage, (ulong)uc.args[1]);
307 
308 		state = vcpu_save_state(vcpu);
309 		memset(&regs1, 0, sizeof(regs1));
310 		vcpu_regs_get(vcpu, &regs1);
311 
312 		kvm_vm_release(vm);
313 
314 		check_nested_state(stage, state);
315 
316 		/* Restore state in a new VM.  */
317 		vcpu = vm_recreate_with_one_vcpu(vm);
318 		vcpu_load_state(vcpu, state);
319 
320 		/*
321 		 * Restore XSAVE state in a dummy vCPU, first without doing
322 		 * KVM_SET_CPUID2, and then with an empty guest CPUID.  Except
323 		 * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
324 		 * allow KVM_SET_XSAVE regardless of guest CPUID.  Manually
325 		 * load only XSAVE state, MSRs in particular have a much more
326 		 * convoluted ABI.
327 		 *
328 		 * Load two versions of XSAVE state: one with the actual guest
329 		 * XSAVE state, and one with all supported features forced "on"
330 		 * in xstate_bv, e.g. to ensure that KVM allows loading all
331 		 * supported features, even if something goes awry in saving
332 		 * the original snapshot.
333 		 */
334 		xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
335 		saved_xstate_bv = *xstate_bv;
336 
337 		vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
338 		vcpu_xsave_set(vcpuN, state->xsave);
339 		*xstate_bv = kvm_cpu_supported_xcr0();
340 		vcpu_xsave_set(vcpuN, state->xsave);
341 
342 		vcpu_init_cpuid(vcpuN, &empty_cpuid);
343 		vcpu_xsave_set(vcpuN, state->xsave);
344 		*xstate_bv = saved_xstate_bv;
345 		vcpu_xsave_set(vcpuN, state->xsave);
346 
347 		kvm_x86_state_cleanup(state);
348 
349 		memset(&regs2, 0, sizeof(regs2));
350 		vcpu_regs_get(vcpu, &regs2);
351 		TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
352 			    "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
353 			    (ulong) regs2.rdi, (ulong) regs2.rsi);
354 	}
355 
356 done:
357 	kvm_vm_free(vm);
358 }
359