1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018, Red Hat, Inc. 4 * 5 * Tests for Enlightened VMCS, including nested guest state. 6 */ 7 #include <fcntl.h> 8 #include <stdio.h> 9 #include <stdlib.h> 10 #include <string.h> 11 #include <sys/ioctl.h> 12 #include <linux/bitmap.h> 13 14 #include "test_util.h" 15 16 #include "kvm_util.h" 17 18 #include "hyperv.h" 19 #include "vmx.h" 20 21 static int ud_count; 22 23 static void guest_ud_handler(struct ex_regs *regs) 24 { 25 ud_count++; 26 regs->rip += 3; /* VMLAUNCH */ 27 } 28 29 static void guest_nmi_handler(struct ex_regs *regs) 30 { 31 } 32 33 static inline void rdmsr_from_l2(uint32_t msr) 34 { 35 /* Currently, L1 doesn't preserve GPRs during vmexits. */ 36 __asm__ __volatile__ ("rdmsr" : : "c"(msr) : 37 "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9", 38 "r10", "r11", "r12", "r13", "r14", "r15"); 39 } 40 41 /* Exit to L1 from L2 with RDMSR instruction */ 42 void l2_guest_code(void) 43 { 44 u64 unused; 45 46 GUEST_SYNC(7); 47 48 GUEST_SYNC(8); 49 50 /* Forced exit to L1 upon restore */ 51 GUEST_SYNC(9); 52 53 vmcall(); 54 55 /* MSR-Bitmap tests */ 56 rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ 57 rdmsr_from_l2(MSR_FS_BASE); /* intercepted */ 58 rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */ 59 vmcall(); 60 rdmsr_from_l2(MSR_GS_BASE); /* intercepted */ 61 62 /* L2 TLB flush tests */ 63 hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, 64 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS); 65 rdmsr_from_l2(MSR_FS_BASE); 66 /* 67 * Note: hypercall status (RAX) is not preserved correctly by L1 after 68 * synthetic vmexit, use unchecked version. 69 */ 70 __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0, 71 HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS, 72 &unused); 73 74 /* Done, exit to L1 and never come back. */ 75 vmcall(); 76 } 77 78 void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages, 79 vm_vaddr_t hv_hcall_page_gpa) 80 { 81 #define L2_GUEST_STACK_SIZE 64 82 unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE]; 83 84 wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID); 85 wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa); 86 87 x2apic_enable(); 88 89 GUEST_SYNC(1); 90 GUEST_SYNC(2); 91 92 enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist); 93 evmcs_enable(); 94 95 GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages)); 96 GUEST_SYNC(3); 97 GUEST_ASSERT(load_evmcs(hv_pages)); 98 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); 99 100 GUEST_SYNC(4); 101 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); 102 103 prepare_vmcs(vmx_pages, l2_guest_code, 104 &l2_guest_stack[L2_GUEST_STACK_SIZE]); 105 106 GUEST_SYNC(5); 107 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); 108 current_evmcs->revision_id = -1u; 109 GUEST_ASSERT(vmlaunch()); 110 current_evmcs->revision_id = EVMCS_VERSION; 111 GUEST_SYNC(6); 112 113 vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) | 114 PIN_BASED_NMI_EXITING); 115 116 /* L2 TLB flush setup */ 117 current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa; 118 current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1; 119 current_evmcs->hv_vm_id = 1; 120 current_evmcs->hv_vp_id = 1; 121 current_vp_assist->nested_control.features.directhypercall = 1; 122 *(u32 *)(hv_pages->partition_assist) = 0; 123 124 GUEST_ASSERT(!vmlaunch()); 125 GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI); 126 GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR); 127 GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa); 128 129 /* 130 * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is 131 * up-to-date (RIP points where it should and not at the beginning 132 * of l2_guest_code(). GUEST_SYNC(9) checkes that. 133 */ 134 GUEST_ASSERT(!vmresume()); 135 136 GUEST_SYNC(10); 137 138 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 139 current_evmcs->guest_rip += 3; /* vmcall */ 140 141 /* Intercept RDMSR 0xc0000100 */ 142 vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) | 143 CPU_BASED_USE_MSR_BITMAPS); 144 __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400); 145 GUEST_ASSERT(!vmresume()); 146 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); 147 current_evmcs->guest_rip += 2; /* rdmsr */ 148 149 /* Enable enlightened MSR bitmap */ 150 current_evmcs->hv_enlightenments_control.msr_bitmap = 1; 151 GUEST_ASSERT(!vmresume()); 152 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); 153 current_evmcs->guest_rip += 2; /* rdmsr */ 154 155 /* Intercept RDMSR 0xc0000101 without telling KVM about it */ 156 __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400); 157 /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */ 158 current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; 159 GUEST_ASSERT(!vmresume()); 160 /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */ 161 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 162 current_evmcs->guest_rip += 3; /* vmcall */ 163 164 /* Now tell KVM we've changed MSR-Bitmap */ 165 current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP; 166 GUEST_ASSERT(!vmresume()); 167 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); 168 current_evmcs->guest_rip += 2; /* rdmsr */ 169 170 /* 171 * L2 TLB flush test. First VMCALL should be handled directly by L0, 172 * no VMCALL exit expected. 173 */ 174 GUEST_ASSERT(!vmresume()); 175 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ); 176 current_evmcs->guest_rip += 2; /* rdmsr */ 177 /* Enable synthetic vmexit */ 178 *(u32 *)(hv_pages->partition_assist) = 1; 179 GUEST_ASSERT(!vmresume()); 180 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH); 181 182 GUEST_ASSERT(!vmresume()); 183 GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL); 184 GUEST_SYNC(11); 185 186 /* Try enlightened vmptrld with an incorrect GPA */ 187 evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs); 188 GUEST_ASSERT(vmlaunch()); 189 GUEST_ASSERT(ud_count == 1); 190 GUEST_DONE(); 191 } 192 193 void inject_nmi(struct kvm_vcpu *vcpu) 194 { 195 struct kvm_vcpu_events events; 196 197 vcpu_events_get(vcpu, &events); 198 199 events.nmi.pending = 1; 200 events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING; 201 202 vcpu_events_set(vcpu, &events); 203 } 204 205 static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm, 206 struct kvm_vcpu *vcpu) 207 { 208 struct kvm_regs regs1, regs2; 209 struct kvm_x86_state *state; 210 211 state = vcpu_save_state(vcpu); 212 memset(®s1, 0, sizeof(regs1)); 213 vcpu_regs_get(vcpu, ®s1); 214 215 kvm_vm_release(vm); 216 217 /* Restore state in a new VM. */ 218 vcpu = vm_recreate_with_one_vcpu(vm); 219 vcpu_set_hv_cpuid(vcpu); 220 vcpu_enable_evmcs(vcpu); 221 vcpu_load_state(vcpu, state); 222 kvm_x86_state_cleanup(state); 223 224 memset(®s2, 0, sizeof(regs2)); 225 vcpu_regs_get(vcpu, ®s2); 226 TEST_ASSERT(!memcmp(®s1, ®s2, sizeof(regs2)), 227 "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx", 228 (ulong) regs2.rdi, (ulong) regs2.rsi); 229 return vcpu; 230 } 231 232 int main(int argc, char *argv[]) 233 { 234 vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0; 235 vm_vaddr_t hcall_page; 236 237 struct kvm_vcpu *vcpu; 238 struct kvm_vm *vm; 239 struct ucall uc; 240 int stage; 241 242 TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX)); 243 TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE)); 244 TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)); 245 TEST_REQUIRE(kvm_hv_cpu_has(HV_X64_NESTED_DIRECT_FLUSH)); 246 247 vm = vm_create_with_one_vcpu(&vcpu, guest_code); 248 249 hcall_page = vm_vaddr_alloc_pages(vm, 1); 250 memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize()); 251 252 vcpu_set_hv_cpuid(vcpu); 253 vcpu_enable_evmcs(vcpu); 254 255 vcpu_alloc_vmx(vm, &vmx_pages_gva); 256 vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva); 257 vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page)); 258 vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id); 259 260 vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler); 261 vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler); 262 263 pr_info("Running L1 which uses EVMCS to run L2\n"); 264 265 for (stage = 1;; stage++) { 266 vcpu_run(vcpu); 267 TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO); 268 269 switch (get_ucall(vcpu, &uc)) { 270 case UCALL_ABORT: 271 REPORT_GUEST_ASSERT(uc); 272 /* NOT REACHED */ 273 case UCALL_SYNC: 274 break; 275 case UCALL_DONE: 276 goto done; 277 default: 278 TEST_FAIL("Unknown ucall %lu", uc.cmd); 279 } 280 281 /* UCALL_SYNC is handled here. */ 282 TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") && 283 uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx", 284 stage, (ulong)uc.args[1]); 285 286 vcpu = save_restore_vm(vm, vcpu); 287 288 /* Force immediate L2->L1 exit before resuming */ 289 if (stage == 8) { 290 pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n"); 291 inject_nmi(vcpu); 292 } 293 294 /* 295 * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly 296 * restored VM (before the first KVM_RUN) to check that 297 * KVM_STATE_NESTED_EVMCS is not lost. 298 */ 299 if (stage == 9) { 300 pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n"); 301 vcpu = save_restore_vm(vm, vcpu); 302 } 303 } 304 305 done: 306 kvm_vm_free(vm); 307 } 308