1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * KVM_GET/SET_* tests 4 * 5 * Copyright (C) 2018, Red Hat, Inc. 6 * 7 * Tests for vCPU state save/restore, including nested guest state. 8 */ 9 #include <fcntl.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/ioctl.h> 14 15 #include "test_util.h" 16 17 #include "kvm_util.h" 18 #include "processor.h" 19 #include "vmx.h" 20 #include "svm_util.h" 21 22 #define L2_GUEST_STACK_SIZE 256 23 24 void svm_l2_guest_code(void) 25 { 26 GUEST_SYNC(4); 27 /* Exit to L1 */ 28 vmcall(); 29 GUEST_SYNC(6); 30 /* Done, exit to L1 and never come back. */ 31 vmcall(); 32 } 33 34 static void svm_l1_guest_code(struct svm_test_data *svm) 35 { 36 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 37 struct vmcb *vmcb = svm->vmcb; 38 39 GUEST_ASSERT(svm->vmcb_gpa); 40 /* Prepare for L2 execution. */ 41 generic_svm_setup(svm, svm_l2_guest_code, 42 &l2_guest_stack[L2_GUEST_STACK_SIZE]); 43 44 GUEST_SYNC(3); 45 run_guest(vmcb, svm->vmcb_gpa); 46 GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); 47 GUEST_SYNC(5); 48 vmcb->save.rip += 3; 49 run_guest(vmcb, svm->vmcb_gpa); 50 GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); 51 GUEST_SYNC(7); 52 } 53 54 void vmx_l2_guest_code(void) 55 { 56 GUEST_SYNC(6); 57 58 /* Exit to L1 */ 59 vmcall(); 60 61 /* L1 has now set up a shadow VMCS for us. */ 62 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); 63 GUEST_SYNC(10); 64 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); 65 GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee)); 66 GUEST_SYNC(11); 67 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee); 68 GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee)); 69 GUEST_SYNC(12); 70 71 /* Done, exit to L1 and never come back. */ 72 vmcall(); 73 } 74 75 static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) 76 { 77 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 78 79 GUEST_ASSERT(vmx_pages->vmcs_gpa); 80 GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); 81 GUEST_SYNC(3); 82 GUEST_ASSERT(load_vmcs(vmx_pages)); 83 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 84 85 GUEST_SYNC(4); 86 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 87 88 prepare_vmcs(vmx_pages, vmx_l2_guest_code, 89 &l2_guest_stack[L2_GUEST_STACK_SIZE]); 90 91 GUEST_SYNC(5); 92 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 93 GUEST_ASSERT(!vmlaunch()); 94 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 95 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 96 97 /* Check that the launched state is preserved. */ 98 GUEST_ASSERT(vmlaunch()); 99 100 GUEST_ASSERT(!vmresume()); 101 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 102 103 GUEST_SYNC(7); 104 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 105 106 GUEST_ASSERT(!vmresume()); 107 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 108 109 vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); 110 111 vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); 112 vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa); 113 114 GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); 115 GUEST_ASSERT(vmlaunch()); 116 GUEST_SYNC(8); 117 GUEST_ASSERT(vmlaunch()); 118 GUEST_ASSERT(vmresume()); 119 120 vmwrite(GUEST_RIP, 0xc0ffee); 121 GUEST_SYNC(9); 122 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); 123 124 GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa)); 125 GUEST_ASSERT(!vmresume()); 126 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 127 128 GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); 129 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); 130 GUEST_ASSERT(vmlaunch()); 131 GUEST_ASSERT(vmresume()); 132 GUEST_SYNC(13); 133 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); 134 GUEST_ASSERT(vmlaunch()); 135 GUEST_ASSERT(vmresume()); 136 } 137 138 static void __attribute__((__flatten__)) guest_code(void *arg) 139 { 140 GUEST_SYNC(1); 141 142 if (this_cpu_has(X86_FEATURE_XSAVE)) { 143 uint64_t supported_xcr0 = this_cpu_supported_xcr0(); 144 uint8_t buffer[4096]; 145 146 memset(buffer, 0xcc, sizeof(buffer)); 147 148 /* 149 * Modify state for all supported xfeatures to take them out of 150 * their "init" state, i.e. to make them show up in XSTATE_BV. 151 * 152 * Note off-by-default features, e.g. AMX, are out of scope for 153 * this particular testcase as they have a different ABI. 154 */ 155 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); 156 asm volatile ("fincstp"); 157 158 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); 159 asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); 160 161 if (supported_xcr0 & XFEATURE_MASK_YMM) 162 asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); 163 164 if (supported_xcr0 & XFEATURE_MASK_AVX512) { 165 asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); 166 asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); 167 asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); 168 } 169 170 if (this_cpu_has(X86_FEATURE_MPX)) { 171 uint64_t bounds[2] = { 10, 0xffffffffull }; 172 uint64_t output[2] = { }; 173 174 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); 175 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); 176 177 /* 178 * Don't bother trying to get BNDCSR into the INUSE 179 * state. MSR_IA32_BNDCFGS doesn't count as it isn't 180 * managed via XSAVE/XRSTOR, and BNDCFGU can only be 181 * modified by XRSTOR. Stuffing XSTATE_BV in the host 182 * is simpler than doing XRSTOR here in the guest. 183 * 184 * However, temporarily enable MPX in BNDCFGS so that 185 * BNDMOV actually loads BND1. If MPX isn't *fully* 186 * enabled, all MPX instructions are treated as NOPs. 187 * 188 * Hand encode "bndmov (%rax),%bnd1" as support for MPX 189 * mnemonics/registers has been removed from gcc and 190 * clang (and was never fully supported by clang). 191 */ 192 wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); 193 asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); 194 /* 195 * Hand encode "bndmov %bnd1, (%rax)" to sanity check 196 * that BND1 actually got loaded. 197 */ 198 asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); 199 wrmsr(MSR_IA32_BNDCFGS, 0); 200 201 GUEST_ASSERT_EQ(bounds[0], output[0]); 202 GUEST_ASSERT_EQ(bounds[1], output[1]); 203 } 204 if (this_cpu_has(X86_FEATURE_PKU)) { 205 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); 206 set_cr4(get_cr4() | X86_CR4_PKE); 207 GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); 208 209 wrpkru(-1u); 210 } 211 } 212 213 GUEST_SYNC(2); 214 215 if (arg) { 216 if (this_cpu_has(X86_FEATURE_SVM)) 217 svm_l1_guest_code(arg); 218 else 219 vmx_l1_guest_code(arg); 220 } 221 222 GUEST_DONE(); 223 } 224 225 int main(int argc, char *argv[]) 226 { 227 uint64_t *xstate_bv, saved_xstate_bv; 228 vm_vaddr_t nested_gva = 0; 229 struct kvm_cpuid2 empty_cpuid = {}; 230 struct kvm_regs regs1, regs2; 231 struct kvm_vcpu *vcpu, *vcpuN; 232 struct kvm_vm *vm; 233 struct kvm_x86_state *state; 234 struct ucall uc; 235 int stage; 236 237 /* Create VM */ 238 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 239 240 vcpu_regs_get(vcpu, ®s1); 241 242 if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { 243 if (kvm_cpu_has(X86_FEATURE_SVM)) 244 vcpu_alloc_svm(vm, &nested_gva); 245 else if (kvm_cpu_has(X86_FEATURE_VMX)) 246 vcpu_alloc_vmx(vm, &nested_gva); 247 } 248 249 if (!nested_gva) 250 pr_info("will skip nested state checks\n"); 251 252 vcpu_args_set(vcpu, 1, nested_gva); 253 254 for (stage = 1;; stage++) { 255 vcpu_run(vcpu); 256 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 257 258 switch (get_ucall(vcpu, &uc)) { 259 case UCALL_ABORT: 260 REPORT_GUEST_ASSERT(uc); 261 /* NOT REACHED */ 262 case UCALL_SYNC: 263 break; 264 case UCALL_DONE: 265 goto done; 266 default: 267 TEST_FAIL("Unknown ucall %lu", uc.cmd); 268 } 269 270 /* UCALL_SYNC is handled here. */ 271 TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && 272 uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", 273 stage, (ulong)uc.args[1]); 274 275 state = vcpu_save_state(vcpu); 276 memset(®s1, 0, sizeof(regs1)); 277 vcpu_regs_get(vcpu, ®s1); 278 279 kvm_vm_release(vm); 280 281 /* Restore state in a new VM. */ 282 vcpu = vm_recreate_with_one_vcpu(vm); 283 vcpu_load_state(vcpu, state); 284 285 /* 286 * Restore XSAVE state in a dummy vCPU, first without doing 287 * KVM_SET_CPUID2, and then with an empty guest CPUID. Except 288 * for off-by-default xfeatures, e.g. AMX, KVM is supposed to 289 * allow KVM_SET_XSAVE regardless of guest CPUID. Manually 290 * load only XSAVE state, MSRs in particular have a much more 291 * convoluted ABI. 292 * 293 * Load two versions of XSAVE state: one with the actual guest 294 * XSAVE state, and one with all supported features forced "on" 295 * in xstate_bv, e.g. to ensure that KVM allows loading all 296 * supported features, even if something goes awry in saving 297 * the original snapshot. 298 */ 299 xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; 300 saved_xstate_bv = *xstate_bv; 301 302 vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); 303 vcpu_xsave_set(vcpuN, state->xsave); 304 *xstate_bv = kvm_cpu_supported_xcr0(); 305 vcpu_xsave_set(vcpuN, state->xsave); 306 307 vcpu_init_cpuid(vcpuN, &empty_cpuid); 308 vcpu_xsave_set(vcpuN, state->xsave); 309 *xstate_bv = saved_xstate_bv; 310 vcpu_xsave_set(vcpuN, state->xsave); 311 312 kvm_x86_state_cleanup(state); 313 314 memset(®s2, 0, sizeof(regs2)); 315 vcpu_regs_get(vcpu, ®s2); 316 TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), 317 "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", 318 (ulong) regs2.rdi, (ulong) regs2.rsi); 319 } 320 321 done: 322 kvm_vm_free(vm); 323 } 324