1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Kernel-based Virtual Machine driver for Linux 4 * 5 * AMD SVM support 6 * 7 * Copyright (C) 2006 Qumranet, Inc. 8 * Copyright 2010 Red Hat, Inc. and/or its affiliates. 9 * 10 * Authors: 11 * Yaniv Kamay <yaniv@qumranet.com> 12 * Avi Kivity <avi@qumranet.com> 13 */ 14 15 #define pr_fmt(fmt) "SVM: " fmt 16 17 #include <linux/kvm_types.h> 18 #include <linux/kvm_host.h> 19 #include <linux/kernel.h> 20 21 #include <asm/msr-index.h> 22 23 #include "kvm_emulate.h" 24 #include "trace.h" 25 #include "mmu.h" 26 #include "x86.h" 27 #include "svm.h" 28 29 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, 30 struct x86_exception *fault) 31 { 32 struct vcpu_svm *svm = to_svm(vcpu); 33 34 if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) { 35 /* 36 * TODO: track the cause of the nested page fault, and 37 * correctly fill in the high bits of exit_info_1. 38 */ 39 svm->vmcb->control.exit_code = SVM_EXIT_NPF; 40 svm->vmcb->control.exit_code_hi = 0; 41 svm->vmcb->control.exit_info_1 = (1ULL << 32); 42 svm->vmcb->control.exit_info_2 = fault->address; 43 } 44 45 svm->vmcb->control.exit_info_1 &= ~0xffffffffULL; 46 svm->vmcb->control.exit_info_1 |= fault->error_code; 47 48 /* 49 * The present bit is always zero for page structure faults on real 50 * hardware. 51 */ 52 if (svm->vmcb->control.exit_info_1 & (2ULL << 32)) 53 svm->vmcb->control.exit_info_1 &= ~1; 54 55 nested_svm_vmexit(svm); 56 } 57 58 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) 59 { 60 struct vcpu_svm *svm = to_svm(vcpu); 61 u64 cr3 = svm->nested.nested_cr3; 62 u64 pdpte; 63 int ret; 64 65 ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(__sme_clr(cr3)), &pdpte, 66 offset_in_page(cr3) + index * 8, 8); 67 if (ret) 68 return 0; 69 return pdpte; 70 } 71 72 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) 73 { 74 struct vcpu_svm *svm = to_svm(vcpu); 75 76 return svm->nested.nested_cr3; 77 } 78 79 static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 80 { 81 WARN_ON(mmu_is_nested(vcpu)); 82 83 vcpu->arch.mmu = &vcpu->arch.guest_mmu; 84 kvm_init_shadow_mmu(vcpu); 85 vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3; 86 vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr; 87 vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit; 88 vcpu->arch.mmu->shadow_root_level = kvm_x86_ops.get_tdp_level(vcpu); 89 reset_shadow_zero_bits_mask(vcpu, vcpu->arch.mmu); 90 vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 91 } 92 93 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) 94 { 95 vcpu->arch.mmu = &vcpu->arch.root_mmu; 96 vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; 97 } 98 99 void recalc_intercepts(struct vcpu_svm *svm) 100 { 101 struct vmcb_control_area *c, *h; 102 struct nested_state *g; 103 104 mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 105 106 if (!is_guest_mode(&svm->vcpu)) 107 return; 108 109 c = &svm->vmcb->control; 110 h = &svm->nested.hsave->control; 111 g = &svm->nested; 112 113 c->intercept_cr = h->intercept_cr; 114 c->intercept_dr = h->intercept_dr; 115 c->intercept_exceptions = h->intercept_exceptions; 116 c->intercept = h->intercept; 117 118 if (svm->vcpu.arch.hflags & HF_VINTR_MASK) { 119 /* We only want the cr8 intercept bits of L1 */ 120 c->intercept_cr &= ~(1U << INTERCEPT_CR8_READ); 121 c->intercept_cr &= ~(1U << INTERCEPT_CR8_WRITE); 122 123 /* 124 * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not 125 * affect any interrupt we may want to inject; therefore, 126 * interrupt window vmexits are irrelevant to L0. 127 */ 128 c->intercept &= ~(1ULL << INTERCEPT_VINTR); 129 } 130 131 /* We don't want to see VMMCALLs from a nested guest */ 132 c->intercept &= ~(1ULL << INTERCEPT_VMMCALL); 133 134 c->intercept_cr |= g->intercept_cr; 135 c->intercept_dr |= g->intercept_dr; 136 c->intercept_exceptions |= g->intercept_exceptions; 137 c->intercept |= g->intercept; 138 } 139 140 static void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *from_vmcb) 141 { 142 struct vmcb_control_area *dst = &dst_vmcb->control; 143 struct vmcb_control_area *from = &from_vmcb->control; 144 145 dst->intercept_cr = from->intercept_cr; 146 dst->intercept_dr = from->intercept_dr; 147 dst->intercept_exceptions = from->intercept_exceptions; 148 dst->intercept = from->intercept; 149 dst->iopm_base_pa = from->iopm_base_pa; 150 dst->msrpm_base_pa = from->msrpm_base_pa; 151 dst->tsc_offset = from->tsc_offset; 152 dst->asid = from->asid; 153 dst->tlb_ctl = from->tlb_ctl; 154 dst->int_ctl = from->int_ctl; 155 dst->int_vector = from->int_vector; 156 dst->int_state = from->int_state; 157 dst->exit_code = from->exit_code; 158 dst->exit_code_hi = from->exit_code_hi; 159 dst->exit_info_1 = from->exit_info_1; 160 dst->exit_info_2 = from->exit_info_2; 161 dst->exit_int_info = from->exit_int_info; 162 dst->exit_int_info_err = from->exit_int_info_err; 163 dst->nested_ctl = from->nested_ctl; 164 dst->event_inj = from->event_inj; 165 dst->event_inj_err = from->event_inj_err; 166 dst->nested_cr3 = from->nested_cr3; 167 dst->virt_ext = from->virt_ext; 168 dst->pause_filter_count = from->pause_filter_count; 169 dst->pause_filter_thresh = from->pause_filter_thresh; 170 } 171 172 static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) 173 { 174 /* 175 * This function merges the msr permission bitmaps of kvm and the 176 * nested vmcb. It is optimized in that it only merges the parts where 177 * the kvm msr permission bitmap may contain zero bits 178 */ 179 int i; 180 181 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 182 return true; 183 184 for (i = 0; i < MSRPM_OFFSETS; i++) { 185 u32 value, p; 186 u64 offset; 187 188 if (msrpm_offsets[i] == 0xffffffff) 189 break; 190 191 p = msrpm_offsets[i]; 192 offset = svm->nested.vmcb_msrpm + (p * 4); 193 194 if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4)) 195 return false; 196 197 svm->nested.msrpm[p] = svm->msrpm[p] | value; 198 } 199 200 svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm)); 201 202 return true; 203 } 204 205 static bool nested_vmcb_checks(struct vmcb *vmcb) 206 { 207 if ((vmcb->save.efer & EFER_SVME) == 0) 208 return false; 209 210 if ((vmcb->control.intercept & (1ULL << INTERCEPT_VMRUN)) == 0) 211 return false; 212 213 if (vmcb->control.asid == 0) 214 return false; 215 216 if ((vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && 217 !npt_enabled) 218 return false; 219 220 return true; 221 } 222 223 void enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa, 224 struct vmcb *nested_vmcb, struct kvm_host_map *map) 225 { 226 bool evaluate_pending_interrupts = 227 is_intercept(svm, INTERCEPT_VINTR) || 228 is_intercept(svm, INTERCEPT_IRET); 229 230 if (kvm_get_rflags(&svm->vcpu) & X86_EFLAGS_IF) 231 svm->vcpu.arch.hflags |= HF_HIF_MASK; 232 else 233 svm->vcpu.arch.hflags &= ~HF_HIF_MASK; 234 235 if (nested_vmcb->control.nested_ctl & SVM_NESTED_CTL_NP_ENABLE) { 236 svm->nested.nested_cr3 = nested_vmcb->control.nested_cr3; 237 nested_svm_init_mmu_context(&svm->vcpu); 238 } 239 240 /* Load the nested guest state */ 241 svm->vmcb->save.es = nested_vmcb->save.es; 242 svm->vmcb->save.cs = nested_vmcb->save.cs; 243 svm->vmcb->save.ss = nested_vmcb->save.ss; 244 svm->vmcb->save.ds = nested_vmcb->save.ds; 245 svm->vmcb->save.gdtr = nested_vmcb->save.gdtr; 246 svm->vmcb->save.idtr = nested_vmcb->save.idtr; 247 kvm_set_rflags(&svm->vcpu, nested_vmcb->save.rflags); 248 svm_set_efer(&svm->vcpu, nested_vmcb->save.efer); 249 svm_set_cr0(&svm->vcpu, nested_vmcb->save.cr0); 250 svm_set_cr4(&svm->vcpu, nested_vmcb->save.cr4); 251 if (npt_enabled) { 252 svm->vmcb->save.cr3 = nested_vmcb->save.cr3; 253 svm->vcpu.arch.cr3 = nested_vmcb->save.cr3; 254 } else 255 (void)kvm_set_cr3(&svm->vcpu, nested_vmcb->save.cr3); 256 257 /* Guest paging mode is active - reset mmu */ 258 kvm_mmu_reset_context(&svm->vcpu); 259 260 svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = nested_vmcb->save.cr2; 261 kvm_rax_write(&svm->vcpu, nested_vmcb->save.rax); 262 kvm_rsp_write(&svm->vcpu, nested_vmcb->save.rsp); 263 kvm_rip_write(&svm->vcpu, nested_vmcb->save.rip); 264 265 /* In case we don't even reach vcpu_run, the fields are not updated */ 266 svm->vmcb->save.rax = nested_vmcb->save.rax; 267 svm->vmcb->save.rsp = nested_vmcb->save.rsp; 268 svm->vmcb->save.rip = nested_vmcb->save.rip; 269 svm->vmcb->save.dr7 = nested_vmcb->save.dr7; 270 svm->vmcb->save.dr6 = nested_vmcb->save.dr6; 271 svm->vmcb->save.cpl = nested_vmcb->save.cpl; 272 273 svm->nested.vmcb_msrpm = nested_vmcb->control.msrpm_base_pa & ~0x0fffULL; 274 svm->nested.vmcb_iopm = nested_vmcb->control.iopm_base_pa & ~0x0fffULL; 275 276 /* cache intercepts */ 277 svm->nested.intercept_cr = nested_vmcb->control.intercept_cr; 278 svm->nested.intercept_dr = nested_vmcb->control.intercept_dr; 279 svm->nested.intercept_exceptions = nested_vmcb->control.intercept_exceptions; 280 svm->nested.intercept = nested_vmcb->control.intercept; 281 282 svm_flush_tlb(&svm->vcpu, true); 283 svm->vmcb->control.int_ctl = nested_vmcb->control.int_ctl | V_INTR_MASKING_MASK; 284 if (nested_vmcb->control.int_ctl & V_INTR_MASKING_MASK) 285 svm->vcpu.arch.hflags |= HF_VINTR_MASK; 286 else 287 svm->vcpu.arch.hflags &= ~HF_VINTR_MASK; 288 289 svm->vcpu.arch.tsc_offset += nested_vmcb->control.tsc_offset; 290 svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset; 291 292 svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext; 293 svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; 294 svm->vmcb->control.int_state = nested_vmcb->control.int_state; 295 svm->vmcb->control.event_inj = nested_vmcb->control.event_inj; 296 svm->vmcb->control.event_inj_err = nested_vmcb->control.event_inj_err; 297 298 svm->vmcb->control.pause_filter_count = 299 nested_vmcb->control.pause_filter_count; 300 svm->vmcb->control.pause_filter_thresh = 301 nested_vmcb->control.pause_filter_thresh; 302 303 kvm_vcpu_unmap(&svm->vcpu, map, true); 304 305 /* Enter Guest-Mode */ 306 enter_guest_mode(&svm->vcpu); 307 308 /* 309 * Merge guest and host intercepts - must be called with vcpu in 310 * guest-mode to take affect here 311 */ 312 recalc_intercepts(svm); 313 314 svm->nested.vmcb = vmcb_gpa; 315 316 /* 317 * If L1 had a pending IRQ/NMI before executing VMRUN, 318 * which wasn't delivered because it was disallowed (e.g. 319 * interrupts disabled), L0 needs to evaluate if this pending 320 * event should cause an exit from L2 to L1 or be delivered 321 * directly to L2. 322 * 323 * Usually this would be handled by the processor noticing an 324 * IRQ/NMI window request. However, VMRUN can unblock interrupts 325 * by implicitly setting GIF, so force L0 to perform pending event 326 * evaluation by requesting a KVM_REQ_EVENT. 327 */ 328 enable_gif(svm); 329 if (unlikely(evaluate_pending_interrupts)) 330 kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); 331 332 mark_all_dirty(svm->vmcb); 333 } 334 335 int nested_svm_vmrun(struct vcpu_svm *svm) 336 { 337 int ret; 338 struct vmcb *nested_vmcb; 339 struct vmcb *hsave = svm->nested.hsave; 340 struct vmcb *vmcb = svm->vmcb; 341 struct kvm_host_map map; 342 u64 vmcb_gpa; 343 344 vmcb_gpa = svm->vmcb->save.rax; 345 346 ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map); 347 if (ret == -EINVAL) { 348 kvm_inject_gp(&svm->vcpu, 0); 349 return 1; 350 } else if (ret) { 351 return kvm_skip_emulated_instruction(&svm->vcpu); 352 } 353 354 ret = kvm_skip_emulated_instruction(&svm->vcpu); 355 356 nested_vmcb = map.hva; 357 358 if (!nested_vmcb_checks(nested_vmcb)) { 359 nested_vmcb->control.exit_code = SVM_EXIT_ERR; 360 nested_vmcb->control.exit_code_hi = 0; 361 nested_vmcb->control.exit_info_1 = 0; 362 nested_vmcb->control.exit_info_2 = 0; 363 364 kvm_vcpu_unmap(&svm->vcpu, &map, true); 365 366 return ret; 367 } 368 369 trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb_gpa, 370 nested_vmcb->save.rip, 371 nested_vmcb->control.int_ctl, 372 nested_vmcb->control.event_inj, 373 nested_vmcb->control.nested_ctl); 374 375 trace_kvm_nested_intercepts(nested_vmcb->control.intercept_cr & 0xffff, 376 nested_vmcb->control.intercept_cr >> 16, 377 nested_vmcb->control.intercept_exceptions, 378 nested_vmcb->control.intercept); 379 380 /* Clear internal status */ 381 kvm_clear_exception_queue(&svm->vcpu); 382 kvm_clear_interrupt_queue(&svm->vcpu); 383 384 /* 385 * Save the old vmcb, so we don't need to pick what we save, but can 386 * restore everything when a VMEXIT occurs 387 */ 388 hsave->save.es = vmcb->save.es; 389 hsave->save.cs = vmcb->save.cs; 390 hsave->save.ss = vmcb->save.ss; 391 hsave->save.ds = vmcb->save.ds; 392 hsave->save.gdtr = vmcb->save.gdtr; 393 hsave->save.idtr = vmcb->save.idtr; 394 hsave->save.efer = svm->vcpu.arch.efer; 395 hsave->save.cr0 = kvm_read_cr0(&svm->vcpu); 396 hsave->save.cr4 = svm->vcpu.arch.cr4; 397 hsave->save.rflags = kvm_get_rflags(&svm->vcpu); 398 hsave->save.rip = kvm_rip_read(&svm->vcpu); 399 hsave->save.rsp = vmcb->save.rsp; 400 hsave->save.rax = vmcb->save.rax; 401 if (npt_enabled) 402 hsave->save.cr3 = vmcb->save.cr3; 403 else 404 hsave->save.cr3 = kvm_read_cr3(&svm->vcpu); 405 406 copy_vmcb_control_area(hsave, vmcb); 407 408 enter_svm_guest_mode(svm, vmcb_gpa, nested_vmcb, &map); 409 410 if (!nested_svm_vmrun_msrpm(svm)) { 411 svm->vmcb->control.exit_code = SVM_EXIT_ERR; 412 svm->vmcb->control.exit_code_hi = 0; 413 svm->vmcb->control.exit_info_1 = 0; 414 svm->vmcb->control.exit_info_2 = 0; 415 416 nested_svm_vmexit(svm); 417 } 418 419 return ret; 420 } 421 422 void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb) 423 { 424 to_vmcb->save.fs = from_vmcb->save.fs; 425 to_vmcb->save.gs = from_vmcb->save.gs; 426 to_vmcb->save.tr = from_vmcb->save.tr; 427 to_vmcb->save.ldtr = from_vmcb->save.ldtr; 428 to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base; 429 to_vmcb->save.star = from_vmcb->save.star; 430 to_vmcb->save.lstar = from_vmcb->save.lstar; 431 to_vmcb->save.cstar = from_vmcb->save.cstar; 432 to_vmcb->save.sfmask = from_vmcb->save.sfmask; 433 to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs; 434 to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp; 435 to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; 436 } 437 438 int nested_svm_vmexit(struct vcpu_svm *svm) 439 { 440 int rc; 441 struct vmcb *nested_vmcb; 442 struct vmcb *hsave = svm->nested.hsave; 443 struct vmcb *vmcb = svm->vmcb; 444 struct kvm_host_map map; 445 446 trace_kvm_nested_vmexit_inject(vmcb->control.exit_code, 447 vmcb->control.exit_info_1, 448 vmcb->control.exit_info_2, 449 vmcb->control.exit_int_info, 450 vmcb->control.exit_int_info_err, 451 KVM_ISA_SVM); 452 453 rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map); 454 if (rc) { 455 if (rc == -EINVAL) 456 kvm_inject_gp(&svm->vcpu, 0); 457 return 1; 458 } 459 460 nested_vmcb = map.hva; 461 462 /* Exit Guest-Mode */ 463 leave_guest_mode(&svm->vcpu); 464 svm->nested.vmcb = 0; 465 466 /* Give the current vmcb to the guest */ 467 disable_gif(svm); 468 469 nested_vmcb->save.es = vmcb->save.es; 470 nested_vmcb->save.cs = vmcb->save.cs; 471 nested_vmcb->save.ss = vmcb->save.ss; 472 nested_vmcb->save.ds = vmcb->save.ds; 473 nested_vmcb->save.gdtr = vmcb->save.gdtr; 474 nested_vmcb->save.idtr = vmcb->save.idtr; 475 nested_vmcb->save.efer = svm->vcpu.arch.efer; 476 nested_vmcb->save.cr0 = kvm_read_cr0(&svm->vcpu); 477 nested_vmcb->save.cr3 = kvm_read_cr3(&svm->vcpu); 478 nested_vmcb->save.cr2 = vmcb->save.cr2; 479 nested_vmcb->save.cr4 = svm->vcpu.arch.cr4; 480 nested_vmcb->save.rflags = kvm_get_rflags(&svm->vcpu); 481 nested_vmcb->save.rip = vmcb->save.rip; 482 nested_vmcb->save.rsp = vmcb->save.rsp; 483 nested_vmcb->save.rax = vmcb->save.rax; 484 nested_vmcb->save.dr7 = vmcb->save.dr7; 485 nested_vmcb->save.dr6 = vmcb->save.dr6; 486 nested_vmcb->save.cpl = vmcb->save.cpl; 487 488 nested_vmcb->control.int_ctl = vmcb->control.int_ctl; 489 nested_vmcb->control.int_vector = vmcb->control.int_vector; 490 nested_vmcb->control.int_state = vmcb->control.int_state; 491 nested_vmcb->control.exit_code = vmcb->control.exit_code; 492 nested_vmcb->control.exit_code_hi = vmcb->control.exit_code_hi; 493 nested_vmcb->control.exit_info_1 = vmcb->control.exit_info_1; 494 nested_vmcb->control.exit_info_2 = vmcb->control.exit_info_2; 495 nested_vmcb->control.exit_int_info = vmcb->control.exit_int_info; 496 nested_vmcb->control.exit_int_info_err = vmcb->control.exit_int_info_err; 497 498 if (svm->nrips_enabled) 499 nested_vmcb->control.next_rip = vmcb->control.next_rip; 500 501 /* 502 * If we emulate a VMRUN/#VMEXIT in the same host #vmexit cycle we have 503 * to make sure that we do not lose injected events. So check event_inj 504 * here and copy it to exit_int_info if it is valid. 505 * Exit_int_info and event_inj can't be both valid because the case 506 * below only happens on a VMRUN instruction intercept which has 507 * no valid exit_int_info set. 508 */ 509 if (vmcb->control.event_inj & SVM_EVTINJ_VALID) { 510 struct vmcb_control_area *nc = &nested_vmcb->control; 511 512 nc->exit_int_info = vmcb->control.event_inj; 513 nc->exit_int_info_err = vmcb->control.event_inj_err; 514 } 515 516 nested_vmcb->control.tlb_ctl = 0; 517 nested_vmcb->control.event_inj = 0; 518 nested_vmcb->control.event_inj_err = 0; 519 520 nested_vmcb->control.pause_filter_count = 521 svm->vmcb->control.pause_filter_count; 522 nested_vmcb->control.pause_filter_thresh = 523 svm->vmcb->control.pause_filter_thresh; 524 525 /* We always set V_INTR_MASKING and remember the old value in hflags */ 526 if (!(svm->vcpu.arch.hflags & HF_VINTR_MASK)) 527 nested_vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK; 528 529 /* Restore the original control entries */ 530 copy_vmcb_control_area(vmcb, hsave); 531 532 svm->vcpu.arch.tsc_offset = svm->vmcb->control.tsc_offset; 533 kvm_clear_exception_queue(&svm->vcpu); 534 kvm_clear_interrupt_queue(&svm->vcpu); 535 536 svm->nested.nested_cr3 = 0; 537 538 /* Restore selected save entries */ 539 svm->vmcb->save.es = hsave->save.es; 540 svm->vmcb->save.cs = hsave->save.cs; 541 svm->vmcb->save.ss = hsave->save.ss; 542 svm->vmcb->save.ds = hsave->save.ds; 543 svm->vmcb->save.gdtr = hsave->save.gdtr; 544 svm->vmcb->save.idtr = hsave->save.idtr; 545 kvm_set_rflags(&svm->vcpu, hsave->save.rflags); 546 svm_set_efer(&svm->vcpu, hsave->save.efer); 547 svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE); 548 svm_set_cr4(&svm->vcpu, hsave->save.cr4); 549 if (npt_enabled) { 550 svm->vmcb->save.cr3 = hsave->save.cr3; 551 svm->vcpu.arch.cr3 = hsave->save.cr3; 552 } else { 553 (void)kvm_set_cr3(&svm->vcpu, hsave->save.cr3); 554 } 555 kvm_rax_write(&svm->vcpu, hsave->save.rax); 556 kvm_rsp_write(&svm->vcpu, hsave->save.rsp); 557 kvm_rip_write(&svm->vcpu, hsave->save.rip); 558 svm->vmcb->save.dr7 = 0; 559 svm->vmcb->save.cpl = 0; 560 svm->vmcb->control.exit_int_info = 0; 561 562 mark_all_dirty(svm->vmcb); 563 564 kvm_vcpu_unmap(&svm->vcpu, &map, true); 565 566 nested_svm_uninit_mmu_context(&svm->vcpu); 567 kvm_mmu_reset_context(&svm->vcpu); 568 kvm_mmu_load(&svm->vcpu); 569 570 /* 571 * Drop what we picked up for L2 via svm_complete_interrupts() so it 572 * doesn't end up in L1. 573 */ 574 svm->vcpu.arch.nmi_injected = false; 575 kvm_clear_exception_queue(&svm->vcpu); 576 kvm_clear_interrupt_queue(&svm->vcpu); 577 578 return 0; 579 } 580 581 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) 582 { 583 u32 offset, msr, value; 584 int write, mask; 585 586 if (!(svm->nested.intercept & (1ULL << INTERCEPT_MSR_PROT))) 587 return NESTED_EXIT_HOST; 588 589 msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 590 offset = svm_msrpm_offset(msr); 591 write = svm->vmcb->control.exit_info_1 & 1; 592 mask = 1 << ((2 * (msr & 0xf)) + write); 593 594 if (offset == MSR_INVALID) 595 return NESTED_EXIT_DONE; 596 597 /* Offset is in 32 bit units but need in 8 bit units */ 598 offset *= 4; 599 600 if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.vmcb_msrpm + offset, &value, 4)) 601 return NESTED_EXIT_DONE; 602 603 return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 604 } 605 606 /* DB exceptions for our internal use must not cause vmexit */ 607 static int nested_svm_intercept_db(struct vcpu_svm *svm) 608 { 609 unsigned long dr6; 610 611 /* if we're not singlestepping, it's not ours */ 612 if (!svm->nmi_singlestep) 613 return NESTED_EXIT_DONE; 614 615 /* if it's not a singlestep exception, it's not ours */ 616 if (kvm_get_dr(&svm->vcpu, 6, &dr6)) 617 return NESTED_EXIT_DONE; 618 if (!(dr6 & DR6_BS)) 619 return NESTED_EXIT_DONE; 620 621 /* if the guest is singlestepping, it should get the vmexit */ 622 if (svm->nmi_singlestep_guest_rflags & X86_EFLAGS_TF) { 623 disable_nmi_singlestep(svm); 624 return NESTED_EXIT_DONE; 625 } 626 627 /* it's ours, the nested hypervisor must not see this one */ 628 return NESTED_EXIT_HOST; 629 } 630 631 static int nested_svm_intercept_ioio(struct vcpu_svm *svm) 632 { 633 unsigned port, size, iopm_len; 634 u16 val, mask; 635 u8 start_bit; 636 u64 gpa; 637 638 if (!(svm->nested.intercept & (1ULL << INTERCEPT_IOIO_PROT))) 639 return NESTED_EXIT_HOST; 640 641 port = svm->vmcb->control.exit_info_1 >> 16; 642 size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> 643 SVM_IOIO_SIZE_SHIFT; 644 gpa = svm->nested.vmcb_iopm + (port / 8); 645 start_bit = port % 8; 646 iopm_len = (start_bit + size > 8) ? 2 : 1; 647 mask = (0xf >> (4 - size)) << start_bit; 648 val = 0; 649 650 if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len)) 651 return NESTED_EXIT_DONE; 652 653 return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 654 } 655 656 static int nested_svm_intercept(struct vcpu_svm *svm) 657 { 658 u32 exit_code = svm->vmcb->control.exit_code; 659 int vmexit = NESTED_EXIT_HOST; 660 661 switch (exit_code) { 662 case SVM_EXIT_MSR: 663 vmexit = nested_svm_exit_handled_msr(svm); 664 break; 665 case SVM_EXIT_IOIO: 666 vmexit = nested_svm_intercept_ioio(svm); 667 break; 668 case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { 669 u32 bit = 1U << (exit_code - SVM_EXIT_READ_CR0); 670 if (svm->nested.intercept_cr & bit) 671 vmexit = NESTED_EXIT_DONE; 672 break; 673 } 674 case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { 675 u32 bit = 1U << (exit_code - SVM_EXIT_READ_DR0); 676 if (svm->nested.intercept_dr & bit) 677 vmexit = NESTED_EXIT_DONE; 678 break; 679 } 680 case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { 681 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); 682 if (svm->nested.intercept_exceptions & excp_bits) { 683 if (exit_code == SVM_EXIT_EXCP_BASE + DB_VECTOR) 684 vmexit = nested_svm_intercept_db(svm); 685 else 686 vmexit = NESTED_EXIT_DONE; 687 } 688 /* async page fault always cause vmexit */ 689 else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && 690 svm->vcpu.arch.exception.nested_apf != 0) 691 vmexit = NESTED_EXIT_DONE; 692 break; 693 } 694 case SVM_EXIT_ERR: { 695 vmexit = NESTED_EXIT_DONE; 696 break; 697 } 698 default: { 699 u64 exit_bits = 1ULL << (exit_code - SVM_EXIT_INTR); 700 if (svm->nested.intercept & exit_bits) 701 vmexit = NESTED_EXIT_DONE; 702 } 703 } 704 705 return vmexit; 706 } 707 708 int nested_svm_exit_handled(struct vcpu_svm *svm) 709 { 710 int vmexit; 711 712 vmexit = nested_svm_intercept(svm); 713 714 if (vmexit == NESTED_EXIT_DONE) 715 nested_svm_vmexit(svm); 716 717 return vmexit; 718 } 719 720 int nested_svm_check_permissions(struct vcpu_svm *svm) 721 { 722 if (!(svm->vcpu.arch.efer & EFER_SVME) || 723 !is_paging(&svm->vcpu)) { 724 kvm_queue_exception(&svm->vcpu, UD_VECTOR); 725 return 1; 726 } 727 728 if (svm->vmcb->save.cpl) { 729 kvm_inject_gp(&svm->vcpu, 0); 730 return 1; 731 } 732 733 return 0; 734 } 735 736 int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr, 737 bool has_error_code, u32 error_code) 738 { 739 int vmexit; 740 741 if (!is_guest_mode(&svm->vcpu)) 742 return 0; 743 744 vmexit = nested_svm_intercept(svm); 745 if (vmexit != NESTED_EXIT_DONE) 746 return 0; 747 748 svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; 749 svm->vmcb->control.exit_code_hi = 0; 750 svm->vmcb->control.exit_info_1 = error_code; 751 752 /* 753 * EXITINFO2 is undefined for all exception intercepts other 754 * than #PF. 755 */ 756 if (svm->vcpu.arch.exception.nested_apf) 757 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; 758 else if (svm->vcpu.arch.exception.has_payload) 759 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; 760 else 761 svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; 762 763 svm->nested.exit_required = true; 764 return vmexit; 765 } 766 767 static void nested_svm_intr(struct vcpu_svm *svm) 768 { 769 svm->vmcb->control.exit_code = SVM_EXIT_INTR; 770 svm->vmcb->control.exit_info_1 = 0; 771 svm->vmcb->control.exit_info_2 = 0; 772 773 /* nested_svm_vmexit this gets called afterwards from handle_exit */ 774 svm->nested.exit_required = true; 775 trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); 776 } 777 778 static bool nested_exit_on_intr(struct vcpu_svm *svm) 779 { 780 return (svm->nested.intercept & 1ULL); 781 } 782 783 int svm_check_nested_events(struct kvm_vcpu *vcpu) 784 { 785 struct vcpu_svm *svm = to_svm(vcpu); 786 bool block_nested_events = 787 kvm_event_needs_reinjection(vcpu) || svm->nested.exit_required; 788 789 if (kvm_cpu_has_interrupt(vcpu) && nested_exit_on_intr(svm)) { 790 if (block_nested_events) 791 return -EBUSY; 792 nested_svm_intr(svm); 793 return 0; 794 } 795 796 return 0; 797 } 798 799 int nested_svm_exit_special(struct vcpu_svm *svm) 800 { 801 u32 exit_code = svm->vmcb->control.exit_code; 802 803 switch (exit_code) { 804 case SVM_EXIT_INTR: 805 case SVM_EXIT_NMI: 806 case SVM_EXIT_EXCP_BASE + MC_VECTOR: 807 return NESTED_EXIT_HOST; 808 case SVM_EXIT_NPF: 809 /* For now we are always handling NPFs when using them */ 810 if (npt_enabled) 811 return NESTED_EXIT_HOST; 812 break; 813 case SVM_EXIT_EXCP_BASE + PF_VECTOR: 814 /* When we're shadowing, trap PFs, but not async PF */ 815 if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0) 816 return NESTED_EXIT_HOST; 817 break; 818 default: 819 break; 820 } 821 822 return NESTED_EXIT_CONTINUE; 823 } 824