1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * KVM_GET/SET_* tests 4 * 5 * Copyright (C) 2018, Red Hat, Inc. 6 * 7 * Tests for vCPU state save/restore, including nested guest state. 8 */ 9 #include <fcntl.h> 10 #include <stdio.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/ioctl.h> 14 15 #include "test_util.h" 16 17 #include "kvm_util.h" 18 #include "processor.h" 19 #include "vmx.h" 20 #include "svm_util.h" 21 22 #define L2_GUEST_STACK_SIZE 256 23 24 void svm_l2_guest_code(void) 25 { 26 GUEST_SYNC(4); 27 /* Exit to L1 */ 28 vmcall(); 29 clgi(); 30 GUEST_SYNC(6); 31 stgi(); 32 /* Done, exit to L1 and never come back. */ 33 vmcall(); 34 } 35 36 static void svm_l1_guest_code(struct svm_test_data *svm) 37 { 38 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 39 struct vmcb *vmcb = svm->vmcb; 40 41 GUEST_ASSERT(svm->vmcb_gpa); 42 /* Prepare for L2 execution. */ 43 generic_svm_setup(svm, svm_l2_guest_code, 44 &l2_guest_stack[L2_GUEST_STACK_SIZE]); 45 46 vmcb->control.int_ctl |= (V_GIF_ENABLE_MASK | V_GIF_MASK); 47 48 GUEST_SYNC(3); 49 run_guest(vmcb, svm->vmcb_gpa); 50 GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); 51 GUEST_SYNC(5); 52 vmcb->save.rip += 3; 53 run_guest(vmcb, svm->vmcb_gpa); 54 GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL); 55 GUEST_SYNC(7); 56 } 57 58 void vmx_l2_guest_code(void) 59 { 60 GUEST_SYNC(6); 61 62 /* Exit to L1 */ 63 vmcall(); 64 65 /* L1 has now set up a shadow VMCS for us. */ 66 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); 67 GUEST_SYNC(10); 68 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); 69 GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0fffee)); 70 GUEST_SYNC(11); 71 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0fffee); 72 GUEST_ASSERT(!vmwrite(GUEST_RIP, 0xc0ffffee)); 73 GUEST_SYNC(12); 74 75 /* Done, exit to L1 and never come back. */ 76 vmcall(); 77 } 78 79 static void vmx_l1_guest_code(struct vmx_pages *vmx_pages) 80 { 81 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 82 83 GUEST_ASSERT(vmx_pages->vmcs_gpa); 84 GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); 85 GUEST_SYNC(3); 86 GUEST_ASSERT(load_vmcs(vmx_pages)); 87 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 88 89 GUEST_SYNC(4); 90 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 91 92 prepare_vmcs(vmx_pages, vmx_l2_guest_code, 93 &l2_guest_stack[L2_GUEST_STACK_SIZE]); 94 95 GUEST_SYNC(5); 96 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 97 GUEST_ASSERT(!vmlaunch()); 98 GUEST_ASSERT(vmptrstz() == vmx_pages->vmcs_gpa); 99 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 100 101 /* Check that the launched state is preserved. */ 102 GUEST_ASSERT(vmlaunch()); 103 104 GUEST_ASSERT(!vmresume()); 105 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 106 107 GUEST_SYNC(7); 108 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 109 110 GUEST_ASSERT(!vmresume()); 111 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 112 113 vmwrite(GUEST_RIP, vmreadz(GUEST_RIP) + 3); 114 115 vmwrite(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_SHADOW_VMCS); 116 vmwrite(VMCS_LINK_POINTER, vmx_pages->shadow_vmcs_gpa); 117 118 GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); 119 GUEST_ASSERT(vmlaunch()); 120 GUEST_SYNC(8); 121 GUEST_ASSERT(vmlaunch()); 122 GUEST_ASSERT(vmresume()); 123 124 vmwrite(GUEST_RIP, 0xc0ffee); 125 GUEST_SYNC(9); 126 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffee); 127 128 GUEST_ASSERT(!vmptrld(vmx_pages->vmcs_gpa)); 129 GUEST_ASSERT(!vmresume()); 130 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 131 132 GUEST_ASSERT(!vmptrld(vmx_pages->shadow_vmcs_gpa)); 133 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); 134 GUEST_ASSERT(vmlaunch()); 135 GUEST_ASSERT(vmresume()); 136 GUEST_SYNC(13); 137 GUEST_ASSERT(vmreadz(GUEST_RIP) == 0xc0ffffee); 138 GUEST_ASSERT(vmlaunch()); 139 GUEST_ASSERT(vmresume()); 140 } 141 142 static void __attribute__((__flatten__)) guest_code(void *arg) 143 { 144 GUEST_SYNC(1); 145 146 if (this_cpu_has(X86_FEATURE_XSAVE)) { 147 uint64_t supported_xcr0 = this_cpu_supported_xcr0(); 148 uint8_t buffer[PAGE_SIZE]; 149 150 memset(buffer, 0xcc, sizeof(buffer)); 151 152 /* 153 * Modify state for all supported xfeatures to take them out of 154 * their "init" state, i.e. to make them show up in XSTATE_BV. 155 * 156 * Note off-by-default features, e.g. AMX, are out of scope for 157 * this particular testcase as they have a different ABI. 158 */ 159 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP); 160 asm volatile ("fincstp"); 161 162 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE); 163 asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer)); 164 165 if (supported_xcr0 & XFEATURE_MASK_YMM) 166 asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer)); 167 168 if (supported_xcr0 & XFEATURE_MASK_AVX512) { 169 asm volatile ("kmovq %0, %%k1" :: "r" (-1ull)); 170 asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer)); 171 asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer)); 172 } 173 174 if (this_cpu_has(X86_FEATURE_MPX)) { 175 uint64_t bounds[2] = { 10, 0xffffffffull }; 176 uint64_t output[2] = { }; 177 178 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS); 179 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR); 180 181 /* 182 * Don't bother trying to get BNDCSR into the INUSE 183 * state. MSR_IA32_BNDCFGS doesn't count as it isn't 184 * managed via XSAVE/XRSTOR, and BNDCFGU can only be 185 * modified by XRSTOR. Stuffing XSTATE_BV in the host 186 * is simpler than doing XRSTOR here in the guest. 187 * 188 * However, temporarily enable MPX in BNDCFGS so that 189 * BNDMOV actually loads BND1. If MPX isn't *fully* 190 * enabled, all MPX instructions are treated as NOPs. 191 * 192 * Hand encode "bndmov (%rax),%bnd1" as support for MPX 193 * mnemonics/registers has been removed from gcc and 194 * clang (and was never fully supported by clang). 195 */ 196 wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0)); 197 asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds)); 198 /* 199 * Hand encode "bndmov %bnd1, (%rax)" to sanity check 200 * that BND1 actually got loaded. 201 */ 202 asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output)); 203 wrmsr(MSR_IA32_BNDCFGS, 0); 204 205 GUEST_ASSERT_EQ(bounds[0], output[0]); 206 GUEST_ASSERT_EQ(bounds[1], output[1]); 207 } 208 if (this_cpu_has(X86_FEATURE_PKU)) { 209 GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU); 210 set_cr4(get_cr4() | X86_CR4_PKE); 211 GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE)); 212 213 wrpkru(-1u); 214 } 215 } 216 217 GUEST_SYNC(2); 218 219 if (arg) { 220 if (this_cpu_has(X86_FEATURE_SVM)) 221 svm_l1_guest_code(arg); 222 else 223 vmx_l1_guest_code(arg); 224 } 225 226 GUEST_DONE(); 227 } 228 229 void svm_check_nested_state(int stage, struct kvm_x86_state *state) 230 { 231 struct vmcb *vmcb = (struct vmcb *)state->nested.data.svm; 232 233 if (kvm_cpu_has(X86_FEATURE_VGIF)) { 234 if (stage == 4) 235 TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 1); 236 if (stage == 6) 237 TEST_ASSERT_EQ(!!(vmcb->control.int_ctl & V_GIF_MASK), 0); 238 } 239 240 if (kvm_cpu_has(X86_FEATURE_NRIPS)) { 241 /* 242 * GUEST_SYNC() causes IO emulation in KVM, in which case the 243 * RIP is advanced before exiting to userspace. Hence, the RIP 244 * in the saved state should be the same as nRIP saved by the 245 * CPU in the VMCB. 246 */ 247 if (stage == 6) 248 TEST_ASSERT_EQ(vmcb->control.next_rip, state->regs.rip); 249 } 250 } 251 252 void check_nested_state(int stage, struct kvm_x86_state *state) 253 { 254 if (kvm_has_cap(KVM_CAP_NESTED_STATE) && kvm_cpu_has(X86_FEATURE_SVM)) 255 svm_check_nested_state(stage, state); 256 } 257 258 int main(int argc, char *argv[]) 259 { 260 uint64_t *xstate_bv, saved_xstate_bv; 261 vm_vaddr_t nested_gva = 0; 262 struct kvm_cpuid2 empty_cpuid = {}; 263 struct kvm_regs regs1, regs2; 264 struct kvm_vcpu *vcpu, *vcpuN; 265 struct kvm_vm *vm; 266 struct kvm_x86_state *state; 267 struct ucall uc; 268 int stage; 269 270 /* Create VM */ 271 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 272 273 vcpu_regs_get(vcpu, ®s1); 274 275 if (kvm_has_cap(KVM_CAP_NESTED_STATE)) { 276 if (kvm_cpu_has(X86_FEATURE_SVM)) 277 vcpu_alloc_svm(vm, &nested_gva); 278 else if (kvm_cpu_has(X86_FEATURE_VMX)) 279 vcpu_alloc_vmx(vm, &nested_gva); 280 } 281 282 if (!nested_gva) 283 pr_info("will skip nested state checks\n"); 284 285 vcpu_args_set(vcpu, 1, nested_gva); 286 287 for (stage = 1;; stage++) { 288 vcpu_run(vcpu); 289 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 290 291 switch (get_ucall(vcpu, &uc)) { 292 case UCALL_ABORT: 293 REPORT_GUEST_ASSERT(uc); 294 /* NOT REACHED */ 295 case UCALL_SYNC: 296 break; 297 case UCALL_DONE: 298 goto done; 299 default: 300 TEST_FAIL("Unknown ucall %lu", uc.cmd); 301 } 302 303 /* UCALL_SYNC is handled here. */ 304 TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && 305 uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", 306 stage, (ulong)uc.args[1]); 307 308 state = vcpu_save_state(vcpu); 309 memset(®s1, 0, sizeof(regs1)); 310 vcpu_regs_get(vcpu, ®s1); 311 312 kvm_vm_release(vm); 313 314 check_nested_state(stage, state); 315 316 /* Restore state in a new VM. */ 317 vcpu = vm_recreate_with_one_vcpu(vm); 318 vcpu_load_state(vcpu, state); 319 320 /* 321 * Restore XSAVE state in a dummy vCPU, first without doing 322 * KVM_SET_CPUID2, and then with an empty guest CPUID. Except 323 * for off-by-default xfeatures, e.g. AMX, KVM is supposed to 324 * allow KVM_SET_XSAVE regardless of guest CPUID. Manually 325 * load only XSAVE state, MSRs in particular have a much more 326 * convoluted ABI. 327 * 328 * Load two versions of XSAVE state: one with the actual guest 329 * XSAVE state, and one with all supported features forced "on" 330 * in xstate_bv, e.g. to ensure that KVM allows loading all 331 * supported features, even if something goes awry in saving 332 * the original snapshot. 333 */ 334 xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512]; 335 saved_xstate_bv = *xstate_bv; 336 337 vcpuN = __vm_vcpu_add(vm, vcpu->id + 1); 338 vcpu_xsave_set(vcpuN, state->xsave); 339 *xstate_bv = kvm_cpu_supported_xcr0(); 340 vcpu_xsave_set(vcpuN, state->xsave); 341 342 vcpu_init_cpuid(vcpuN, &empty_cpuid); 343 vcpu_xsave_set(vcpuN, state->xsave); 344 *xstate_bv = saved_xstate_bv; 345 vcpu_xsave_set(vcpuN, state->xsave); 346 347 kvm_x86_state_cleanup(state); 348 349 memset(®s2, 0, sizeof(regs2)); 350 vcpu_regs_get(vcpu, ®s2); 351 TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), 352 "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", 353 (ulong) regs2.rdi, (ulong) regs2.rsi); 354 } 355 356 done: 357 kvm_vm_free(vm); 358 } 359