1883b0a91SJoerg Roedel // SPDX-License-Identifier: GPL-2.0-only 2883b0a91SJoerg Roedel /* 3883b0a91SJoerg Roedel * Kernel-based Virtual Machine driver for Linux 4883b0a91SJoerg Roedel * 5883b0a91SJoerg Roedel * AMD SVM support 6883b0a91SJoerg Roedel * 7883b0a91SJoerg Roedel * Copyright (C) 2006 Qumranet, Inc. 8883b0a91SJoerg Roedel * Copyright 2010 Red Hat, Inc. and/or its affiliates. 9883b0a91SJoerg Roedel * 10883b0a91SJoerg Roedel * Authors: 11883b0a91SJoerg Roedel * Yaniv Kamay <yaniv@qumranet.com> 12883b0a91SJoerg Roedel * Avi Kivity <avi@qumranet.com> 13883b0a91SJoerg Roedel */ 14883b0a91SJoerg Roedel 15883b0a91SJoerg Roedel #define pr_fmt(fmt) "SVM: " fmt 16883b0a91SJoerg Roedel 17883b0a91SJoerg Roedel #include <linux/kvm_types.h> 18883b0a91SJoerg Roedel #include <linux/kvm_host.h> 19883b0a91SJoerg Roedel #include <linux/kernel.h> 20883b0a91SJoerg Roedel 21883b0a91SJoerg Roedel #include <asm/msr-index.h> 225679b803SPaolo Bonzini #include <asm/debugreg.h> 23883b0a91SJoerg Roedel 24883b0a91SJoerg Roedel #include "kvm_emulate.h" 25883b0a91SJoerg Roedel #include "trace.h" 26883b0a91SJoerg Roedel #include "mmu.h" 27883b0a91SJoerg Roedel #include "x86.h" 28cc440cdaSPaolo Bonzini #include "cpuid.h" 295b672408SPaolo Bonzini #include "lapic.h" 30883b0a91SJoerg Roedel #include "svm.h" 31883b0a91SJoerg Roedel 3211f0cbf0SSean Christopherson #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK 3311f0cbf0SSean Christopherson 34883b0a91SJoerg Roedel static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu, 35883b0a91SJoerg Roedel struct x86_exception *fault) 36883b0a91SJoerg Roedel { 37883b0a91SJoerg Roedel struct vcpu_svm *svm = to_svm(vcpu); 38883b0a91SJoerg Roedel 39883b0a91SJoerg Roedel if (svm->vmcb->control.exit_code != SVM_EXIT_NPF) { 40883b0a91SJoerg Roedel /* 41883b0a91SJoerg Roedel * TODO: track the cause of the nested page fault, and 42883b0a91SJoerg Roedel * correctly fill in the high bits of exit_info_1. 43883b0a91SJoerg Roedel */ 44883b0a91SJoerg Roedel svm->vmcb->control.exit_code = SVM_EXIT_NPF; 45883b0a91SJoerg Roedel svm->vmcb->control.exit_code_hi = 0; 46883b0a91SJoerg Roedel svm->vmcb->control.exit_info_1 = (1ULL << 32); 47883b0a91SJoerg Roedel svm->vmcb->control.exit_info_2 = fault->address; 48883b0a91SJoerg Roedel } 49883b0a91SJoerg Roedel 50883b0a91SJoerg Roedel svm->vmcb->control.exit_info_1 &= ~0xffffffffULL; 51883b0a91SJoerg Roedel svm->vmcb->control.exit_info_1 |= fault->error_code; 52883b0a91SJoerg Roedel 53883b0a91SJoerg Roedel nested_svm_vmexit(svm); 54883b0a91SJoerg Roedel } 55883b0a91SJoerg Roedel 56a04aead1SPaolo Bonzini static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_exception *fault) 57a04aead1SPaolo Bonzini { 58a04aead1SPaolo Bonzini struct vcpu_svm *svm = to_svm(vcpu); 59a04aead1SPaolo Bonzini WARN_ON(!is_guest_mode(vcpu)); 60a04aead1SPaolo Bonzini 61a04aead1SPaolo Bonzini if (vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) && 62a04aead1SPaolo Bonzini !svm->nested.nested_run_pending) { 63a04aead1SPaolo Bonzini svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR; 64a04aead1SPaolo Bonzini svm->vmcb->control.exit_code_hi = 0; 65a04aead1SPaolo Bonzini svm->vmcb->control.exit_info_1 = fault->error_code; 66a04aead1SPaolo Bonzini svm->vmcb->control.exit_info_2 = fault->address; 67a04aead1SPaolo Bonzini nested_svm_vmexit(svm); 68a04aead1SPaolo Bonzini } else { 69a04aead1SPaolo Bonzini kvm_inject_page_fault(vcpu, fault); 70a04aead1SPaolo Bonzini } 71a04aead1SPaolo Bonzini } 72a04aead1SPaolo Bonzini 73883b0a91SJoerg Roedel static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index) 74883b0a91SJoerg Roedel { 75883b0a91SJoerg Roedel struct vcpu_svm *svm = to_svm(vcpu); 76e670bf68SPaolo Bonzini u64 cr3 = svm->nested.ctl.nested_cr3; 77883b0a91SJoerg Roedel u64 pdpte; 78883b0a91SJoerg Roedel int ret; 79883b0a91SJoerg Roedel 802732be90SSean Christopherson ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte, 81883b0a91SJoerg Roedel offset_in_page(cr3) + index * 8, 8); 82883b0a91SJoerg Roedel if (ret) 83883b0a91SJoerg Roedel return 0; 84883b0a91SJoerg Roedel return pdpte; 85883b0a91SJoerg Roedel } 86883b0a91SJoerg Roedel 87883b0a91SJoerg Roedel static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) 88883b0a91SJoerg Roedel { 89883b0a91SJoerg Roedel struct vcpu_svm *svm = to_svm(vcpu); 90883b0a91SJoerg Roedel 91e670bf68SPaolo Bonzini return svm->nested.ctl.nested_cr3; 92883b0a91SJoerg Roedel } 93883b0a91SJoerg Roedel 94883b0a91SJoerg Roedel static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) 95883b0a91SJoerg Roedel { 96929d1cfaSPaolo Bonzini struct vcpu_svm *svm = to_svm(vcpu); 97929d1cfaSPaolo Bonzini 98883b0a91SJoerg Roedel WARN_ON(mmu_is_nested(vcpu)); 99883b0a91SJoerg Roedel 100883b0a91SJoerg Roedel vcpu->arch.mmu = &vcpu->arch.guest_mmu; 10131e96bc6SSean Christopherson 10231e96bc6SSean Christopherson /* 10331e96bc6SSean Christopherson * The NPT format depends on L1's CR4 and EFER, which is in vmcb01. Note, 10431e96bc6SSean Christopherson * when called via KVM_SET_NESTED_STATE, that state may _not_ match current 10531e96bc6SSean Christopherson * vCPU state. CR0.WP is explicitly ignored, while CR0.PG is required. 10631e96bc6SSean Christopherson */ 1074995a368SCathy Avery kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4, 1084995a368SCathy Avery svm->vmcb01.ptr->save.efer, 1090f04a2acSVitaly Kuznetsov svm->nested.ctl.nested_cr3); 110883b0a91SJoerg Roedel vcpu->arch.mmu->get_guest_pgd = nested_svm_get_tdp_cr3; 111883b0a91SJoerg Roedel vcpu->arch.mmu->get_pdptr = nested_svm_get_tdp_pdptr; 112883b0a91SJoerg Roedel vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit; 113883b0a91SJoerg Roedel vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; 114883b0a91SJoerg Roedel } 115883b0a91SJoerg Roedel 116883b0a91SJoerg Roedel static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) 117883b0a91SJoerg Roedel { 118883b0a91SJoerg Roedel vcpu->arch.mmu = &vcpu->arch.root_mmu; 119883b0a91SJoerg Roedel vcpu->arch.walk_mmu = &vcpu->arch.root_mmu; 120883b0a91SJoerg Roedel } 121883b0a91SJoerg Roedel 122883b0a91SJoerg Roedel void recalc_intercepts(struct vcpu_svm *svm) 123883b0a91SJoerg Roedel { 124e670bf68SPaolo Bonzini struct vmcb_control_area *c, *h, *g; 125c45ad722SBabu Moger unsigned int i; 126883b0a91SJoerg Roedel 12706e7852cSJoerg Roedel vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 128883b0a91SJoerg Roedel 129883b0a91SJoerg Roedel if (!is_guest_mode(&svm->vcpu)) 130883b0a91SJoerg Roedel return; 131883b0a91SJoerg Roedel 132883b0a91SJoerg Roedel c = &svm->vmcb->control; 1334995a368SCathy Avery h = &svm->vmcb01.ptr->control; 134e670bf68SPaolo Bonzini g = &svm->nested.ctl; 135883b0a91SJoerg Roedel 136c45ad722SBabu Moger for (i = 0; i < MAX_INTERCEPT; i++) 137c45ad722SBabu Moger c->intercepts[i] = h->intercepts[i]; 138c45ad722SBabu Moger 139e9fd761aSPaolo Bonzini if (g->int_ctl & V_INTR_MASKING_MASK) { 140883b0a91SJoerg Roedel /* We only want the cr8 intercept bits of L1 */ 14103bfeeb9SBabu Moger vmcb_clr_intercept(c, INTERCEPT_CR8_READ); 14203bfeeb9SBabu Moger vmcb_clr_intercept(c, INTERCEPT_CR8_WRITE); 143883b0a91SJoerg Roedel 144883b0a91SJoerg Roedel /* 145883b0a91SJoerg Roedel * Once running L2 with HF_VINTR_MASK, EFLAGS.IF does not 146883b0a91SJoerg Roedel * affect any interrupt we may want to inject; therefore, 147883b0a91SJoerg Roedel * interrupt window vmexits are irrelevant to L0. 148883b0a91SJoerg Roedel */ 149c62e2e94SBabu Moger vmcb_clr_intercept(c, INTERCEPT_VINTR); 150883b0a91SJoerg Roedel } 151883b0a91SJoerg Roedel 152883b0a91SJoerg Roedel /* We don't want to see VMMCALLs from a nested guest */ 153c62e2e94SBabu Moger vmcb_clr_intercept(c, INTERCEPT_VMMCALL); 154883b0a91SJoerg Roedel 155c45ad722SBabu Moger for (i = 0; i < MAX_INTERCEPT; i++) 156c45ad722SBabu Moger c->intercepts[i] |= g->intercepts[i]; 1574b639a9fSMaxim Levitsky 1584b639a9fSMaxim Levitsky /* If SMI is not intercepted, ignore guest SMI intercept as well */ 1594b639a9fSMaxim Levitsky if (!intercept_smi) 1604b639a9fSMaxim Levitsky vmcb_clr_intercept(c, INTERCEPT_SMI); 161c7dfa400SMaxim Levitsky 162c7dfa400SMaxim Levitsky vmcb_set_intercept(c, INTERCEPT_VMLOAD); 163c7dfa400SMaxim Levitsky vmcb_set_intercept(c, INTERCEPT_VMSAVE); 164883b0a91SJoerg Roedel } 165883b0a91SJoerg Roedel 1662f675917SPaolo Bonzini static void copy_vmcb_control_area(struct vmcb_control_area *dst, 1672f675917SPaolo Bonzini struct vmcb_control_area *from) 168883b0a91SJoerg Roedel { 169c45ad722SBabu Moger unsigned int i; 170c45ad722SBabu Moger 171c45ad722SBabu Moger for (i = 0; i < MAX_INTERCEPT; i++) 172c45ad722SBabu Moger dst->intercepts[i] = from->intercepts[i]; 173c45ad722SBabu Moger 174883b0a91SJoerg Roedel dst->iopm_base_pa = from->iopm_base_pa; 175883b0a91SJoerg Roedel dst->msrpm_base_pa = from->msrpm_base_pa; 176883b0a91SJoerg Roedel dst->tsc_offset = from->tsc_offset; 1776c0238c4SPaolo Bonzini /* asid not copied, it is handled manually for svm->vmcb. */ 178883b0a91SJoerg Roedel dst->tlb_ctl = from->tlb_ctl; 179883b0a91SJoerg Roedel dst->int_ctl = from->int_ctl; 180883b0a91SJoerg Roedel dst->int_vector = from->int_vector; 181883b0a91SJoerg Roedel dst->int_state = from->int_state; 182883b0a91SJoerg Roedel dst->exit_code = from->exit_code; 183883b0a91SJoerg Roedel dst->exit_code_hi = from->exit_code_hi; 184883b0a91SJoerg Roedel dst->exit_info_1 = from->exit_info_1; 185883b0a91SJoerg Roedel dst->exit_info_2 = from->exit_info_2; 186883b0a91SJoerg Roedel dst->exit_int_info = from->exit_int_info; 187883b0a91SJoerg Roedel dst->exit_int_info_err = from->exit_int_info_err; 188883b0a91SJoerg Roedel dst->nested_ctl = from->nested_ctl; 189883b0a91SJoerg Roedel dst->event_inj = from->event_inj; 190883b0a91SJoerg Roedel dst->event_inj_err = from->event_inj_err; 191883b0a91SJoerg Roedel dst->nested_cr3 = from->nested_cr3; 192883b0a91SJoerg Roedel dst->virt_ext = from->virt_ext; 193883b0a91SJoerg Roedel dst->pause_filter_count = from->pause_filter_count; 194883b0a91SJoerg Roedel dst->pause_filter_thresh = from->pause_filter_thresh; 195883b0a91SJoerg Roedel } 196883b0a91SJoerg Roedel 197883b0a91SJoerg Roedel static bool nested_svm_vmrun_msrpm(struct vcpu_svm *svm) 198883b0a91SJoerg Roedel { 199883b0a91SJoerg Roedel /* 200883b0a91SJoerg Roedel * This function merges the msr permission bitmaps of kvm and the 201883b0a91SJoerg Roedel * nested vmcb. It is optimized in that it only merges the parts where 202883b0a91SJoerg Roedel * the kvm msr permission bitmap may contain zero bits 203883b0a91SJoerg Roedel */ 204883b0a91SJoerg Roedel int i; 205883b0a91SJoerg Roedel 206c62e2e94SBabu Moger if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) 207883b0a91SJoerg Roedel return true; 208883b0a91SJoerg Roedel 209883b0a91SJoerg Roedel for (i = 0; i < MSRPM_OFFSETS; i++) { 210883b0a91SJoerg Roedel u32 value, p; 211883b0a91SJoerg Roedel u64 offset; 212883b0a91SJoerg Roedel 213883b0a91SJoerg Roedel if (msrpm_offsets[i] == 0xffffffff) 214883b0a91SJoerg Roedel break; 215883b0a91SJoerg Roedel 216883b0a91SJoerg Roedel p = msrpm_offsets[i]; 217e670bf68SPaolo Bonzini offset = svm->nested.ctl.msrpm_base_pa + (p * 4); 218883b0a91SJoerg Roedel 219883b0a91SJoerg Roedel if (kvm_vcpu_read_guest(&svm->vcpu, offset, &value, 4)) 220883b0a91SJoerg Roedel return false; 221883b0a91SJoerg Roedel 222883b0a91SJoerg Roedel svm->nested.msrpm[p] = svm->msrpm[p] | value; 223883b0a91SJoerg Roedel } 224883b0a91SJoerg Roedel 225883b0a91SJoerg Roedel svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm)); 226883b0a91SJoerg Roedel 227883b0a91SJoerg Roedel return true; 228883b0a91SJoerg Roedel } 229883b0a91SJoerg Roedel 230ee695f22SKrish Sadhukhan /* 231ee695f22SKrish Sadhukhan * Bits 11:0 of bitmap address are ignored by hardware 232ee695f22SKrish Sadhukhan */ 233ee695f22SKrish Sadhukhan static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size) 234ee695f22SKrish Sadhukhan { 235ee695f22SKrish Sadhukhan u64 addr = PAGE_ALIGN(pa); 236ee695f22SKrish Sadhukhan 237ee695f22SKrish Sadhukhan return kvm_vcpu_is_legal_gpa(vcpu, addr) && 238ee695f22SKrish Sadhukhan kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1); 239ee695f22SKrish Sadhukhan } 240ee695f22SKrish Sadhukhan 241*174a921bSKrish Sadhukhan static bool nested_svm_check_tlb_ctl(struct kvm_vcpu *vcpu, u8 tlb_ctl) 242*174a921bSKrish Sadhukhan { 243*174a921bSKrish Sadhukhan /* Nested FLUSHBYASID is not supported yet. */ 244*174a921bSKrish Sadhukhan switch(tlb_ctl) { 245*174a921bSKrish Sadhukhan case TLB_CONTROL_DO_NOTHING: 246*174a921bSKrish Sadhukhan case TLB_CONTROL_FLUSH_ALL_ASID: 247*174a921bSKrish Sadhukhan return true; 248*174a921bSKrish Sadhukhan default: 249*174a921bSKrish Sadhukhan return false; 250*174a921bSKrish Sadhukhan } 251*174a921bSKrish Sadhukhan } 252*174a921bSKrish Sadhukhan 253ee695f22SKrish Sadhukhan static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu, 254ee695f22SKrish Sadhukhan struct vmcb_control_area *control) 255ca46d739SPaolo Bonzini { 25611f0cbf0SSean Christopherson if (CC(!vmcb_is_intercept(control, INTERCEPT_VMRUN))) 257ca46d739SPaolo Bonzini return false; 258ca46d739SPaolo Bonzini 25911f0cbf0SSean Christopherson if (CC(control->asid == 0)) 260ca46d739SPaolo Bonzini return false; 261ca46d739SPaolo Bonzini 26211f0cbf0SSean Christopherson if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled)) 263ca46d739SPaolo Bonzini return false; 264ca46d739SPaolo Bonzini 265ee695f22SKrish Sadhukhan if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa, 266ee695f22SKrish Sadhukhan MSRPM_SIZE))) 267ee695f22SKrish Sadhukhan return false; 268ee695f22SKrish Sadhukhan if (CC(!nested_svm_check_bitmap_pa(vcpu, control->iopm_base_pa, 269ee695f22SKrish Sadhukhan IOPM_SIZE))) 270ee695f22SKrish Sadhukhan return false; 271ee695f22SKrish Sadhukhan 272*174a921bSKrish Sadhukhan if (CC(!nested_svm_check_tlb_ctl(vcpu, control->tlb_ctl))) 273*174a921bSKrish Sadhukhan return false; 274*174a921bSKrish Sadhukhan 275ca46d739SPaolo Bonzini return true; 276ca46d739SPaolo Bonzini } 277ca46d739SPaolo Bonzini 27863129754SPaolo Bonzini static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu, 2796906e06dSKrish Sadhukhan struct vmcb_save_area *save) 280883b0a91SJoerg Roedel { 2816906e06dSKrish Sadhukhan /* 2826906e06dSKrish Sadhukhan * These checks are also performed by KVM_SET_SREGS, 2836906e06dSKrish Sadhukhan * except that EFER.LMA is not checked by SVM against 2846906e06dSKrish Sadhukhan * CR0.PG && EFER.LME. 2856906e06dSKrish Sadhukhan */ 2866906e06dSKrish Sadhukhan if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) { 28711f0cbf0SSean Christopherson if (CC(!(save->cr4 & X86_CR4_PAE)) || 28811f0cbf0SSean Christopherson CC(!(save->cr0 & X86_CR0_PE)) || 28911f0cbf0SSean Christopherson CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3))) 290761e4169SKrish Sadhukhan return false; 291761e4169SKrish Sadhukhan } 2926906e06dSKrish Sadhukhan 29311f0cbf0SSean Christopherson if (CC(!kvm_is_valid_cr4(vcpu, save->cr4))) 29411f0cbf0SSean Christopherson return false; 29511f0cbf0SSean Christopherson 29611f0cbf0SSean Christopherson return true; 2976906e06dSKrish Sadhukhan } 2986906e06dSKrish Sadhukhan 2996906e06dSKrish Sadhukhan /* Common checks that apply to both L1 and L2 state. */ 30063129754SPaolo Bonzini static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu, 3016906e06dSKrish Sadhukhan struct vmcb_save_area *save) 3026906e06dSKrish Sadhukhan { 3033c346c0cSPaolo Bonzini /* 3043c346c0cSPaolo Bonzini * FIXME: these should be done after copying the fields, 3053c346c0cSPaolo Bonzini * to avoid TOC/TOU races. For these save area checks 3063c346c0cSPaolo Bonzini * the possible damage is limited since kvm_set_cr0 and 3073c346c0cSPaolo Bonzini * kvm_set_cr4 handle failure; EFER_SVME is an exception 3083c346c0cSPaolo Bonzini * so it is force-set later in nested_prepare_vmcb_save. 3093c346c0cSPaolo Bonzini */ 31011f0cbf0SSean Christopherson if (CC(!(save->efer & EFER_SVME))) 3116906e06dSKrish Sadhukhan return false; 3126906e06dSKrish Sadhukhan 31311f0cbf0SSean Christopherson if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) || 31411f0cbf0SSean Christopherson CC(save->cr0 & ~0xffffffffULL)) 3156906e06dSKrish Sadhukhan return false; 3166906e06dSKrish Sadhukhan 31711f0cbf0SSean Christopherson if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7))) 3186906e06dSKrish Sadhukhan return false; 3196906e06dSKrish Sadhukhan 32063129754SPaolo Bonzini if (!nested_vmcb_check_cr3_cr4(vcpu, save)) 3216906e06dSKrish Sadhukhan return false; 3226906e06dSKrish Sadhukhan 32363129754SPaolo Bonzini if (CC(!kvm_valid_efer(vcpu, save->efer))) 3246906e06dSKrish Sadhukhan return false; 3256906e06dSKrish Sadhukhan 3266906e06dSKrish Sadhukhan return true; 3276906e06dSKrish Sadhukhan } 3286906e06dSKrish Sadhukhan 329bb00bd9cSVitaly Kuznetsov void nested_load_control_from_vmcb12(struct vcpu_svm *svm, 3303e06f016SPaolo Bonzini struct vmcb_control_area *control) 3313e06f016SPaolo Bonzini { 332e670bf68SPaolo Bonzini copy_vmcb_control_area(&svm->nested.ctl, control); 3333e06f016SPaolo Bonzini 334cc440cdaSPaolo Bonzini /* Copy it here because nested_svm_check_controls will check it. */ 335cc440cdaSPaolo Bonzini svm->nested.ctl.asid = control->asid; 336e670bf68SPaolo Bonzini svm->nested.ctl.msrpm_base_pa &= ~0x0fffULL; 337e670bf68SPaolo Bonzini svm->nested.ctl.iopm_base_pa &= ~0x0fffULL; 3383e06f016SPaolo Bonzini } 3393e06f016SPaolo Bonzini 3402d8a42beSPaolo Bonzini /* 3412d8a42beSPaolo Bonzini * Synchronize fields that are written by the processor, so that 3429e8f0fbfSPaolo Bonzini * they can be copied back into the vmcb12. 3432d8a42beSPaolo Bonzini */ 3449e8f0fbfSPaolo Bonzini void nested_sync_control_from_vmcb02(struct vcpu_svm *svm) 3452d8a42beSPaolo Bonzini { 3462d8a42beSPaolo Bonzini u32 mask; 3472d8a42beSPaolo Bonzini svm->nested.ctl.event_inj = svm->vmcb->control.event_inj; 3482d8a42beSPaolo Bonzini svm->nested.ctl.event_inj_err = svm->vmcb->control.event_inj_err; 3492d8a42beSPaolo Bonzini 3502d8a42beSPaolo Bonzini /* Only a few fields of int_ctl are written by the processor. */ 3512d8a42beSPaolo Bonzini mask = V_IRQ_MASK | V_TPR_MASK; 3522d8a42beSPaolo Bonzini if (!(svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK) && 353a284ba56SJoerg Roedel svm_is_intercept(svm, INTERCEPT_VINTR)) { 3542d8a42beSPaolo Bonzini /* 3552d8a42beSPaolo Bonzini * In order to request an interrupt window, L0 is usurping 3562d8a42beSPaolo Bonzini * svm->vmcb->control.int_ctl and possibly setting V_IRQ 3572d8a42beSPaolo Bonzini * even if it was clear in L1's VMCB. Restoring it would be 3582d8a42beSPaolo Bonzini * wrong. However, in this case V_IRQ will remain true until 3592d8a42beSPaolo Bonzini * interrupt_window_interception calls svm_clear_vintr and 3602d8a42beSPaolo Bonzini * restores int_ctl. We can just leave it aside. 3612d8a42beSPaolo Bonzini */ 3622d8a42beSPaolo Bonzini mask &= ~V_IRQ_MASK; 3632d8a42beSPaolo Bonzini } 3642d8a42beSPaolo Bonzini svm->nested.ctl.int_ctl &= ~mask; 3652d8a42beSPaolo Bonzini svm->nested.ctl.int_ctl |= svm->vmcb->control.int_ctl & mask; 3662d8a42beSPaolo Bonzini } 3672d8a42beSPaolo Bonzini 36836e2e983SPaolo Bonzini /* 36936e2e983SPaolo Bonzini * Transfer any event that L0 or L1 wanted to inject into L2 to 37036e2e983SPaolo Bonzini * EXIT_INT_INFO. 37136e2e983SPaolo Bonzini */ 3729e8f0fbfSPaolo Bonzini static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm, 3730dd16b5bSMaxim Levitsky struct vmcb *vmcb12) 37436e2e983SPaolo Bonzini { 37536e2e983SPaolo Bonzini struct kvm_vcpu *vcpu = &svm->vcpu; 37636e2e983SPaolo Bonzini u32 exit_int_info = 0; 37736e2e983SPaolo Bonzini unsigned int nr; 37836e2e983SPaolo Bonzini 37936e2e983SPaolo Bonzini if (vcpu->arch.exception.injected) { 38036e2e983SPaolo Bonzini nr = vcpu->arch.exception.nr; 38136e2e983SPaolo Bonzini exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT; 38236e2e983SPaolo Bonzini 38336e2e983SPaolo Bonzini if (vcpu->arch.exception.has_error_code) { 38436e2e983SPaolo Bonzini exit_int_info |= SVM_EVTINJ_VALID_ERR; 3850dd16b5bSMaxim Levitsky vmcb12->control.exit_int_info_err = 38636e2e983SPaolo Bonzini vcpu->arch.exception.error_code; 38736e2e983SPaolo Bonzini } 38836e2e983SPaolo Bonzini 38936e2e983SPaolo Bonzini } else if (vcpu->arch.nmi_injected) { 39036e2e983SPaolo Bonzini exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI; 39136e2e983SPaolo Bonzini 39236e2e983SPaolo Bonzini } else if (vcpu->arch.interrupt.injected) { 39336e2e983SPaolo Bonzini nr = vcpu->arch.interrupt.nr; 39436e2e983SPaolo Bonzini exit_int_info = nr | SVM_EVTINJ_VALID; 39536e2e983SPaolo Bonzini 39636e2e983SPaolo Bonzini if (vcpu->arch.interrupt.soft) 39736e2e983SPaolo Bonzini exit_int_info |= SVM_EVTINJ_TYPE_SOFT; 39836e2e983SPaolo Bonzini else 39936e2e983SPaolo Bonzini exit_int_info |= SVM_EVTINJ_TYPE_INTR; 40036e2e983SPaolo Bonzini } 40136e2e983SPaolo Bonzini 4020dd16b5bSMaxim Levitsky vmcb12->control.exit_int_info = exit_int_info; 40336e2e983SPaolo Bonzini } 40436e2e983SPaolo Bonzini 40562156f6cSVitaly Kuznetsov static inline bool nested_npt_enabled(struct vcpu_svm *svm) 40662156f6cSVitaly Kuznetsov { 40762156f6cSVitaly Kuznetsov return svm->nested.ctl.nested_ctl & SVM_NESTED_CTL_NP_ENABLE; 40862156f6cSVitaly Kuznetsov } 40962156f6cSVitaly Kuznetsov 410d2e56019SSean Christopherson static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu) 411d2e56019SSean Christopherson { 412d2e56019SSean Christopherson /* 413d2e56019SSean Christopherson * TODO: optimize unconditional TLB flush/MMU sync. A partial list of 414d2e56019SSean Christopherson * things to fix before this can be conditional: 415d2e56019SSean Christopherson * 416d2e56019SSean Christopherson * - Flush TLBs for both L1 and L2 remote TLB flush 417d2e56019SSean Christopherson * - Honor L1's request to flush an ASID on nested VMRUN 418d2e56019SSean Christopherson * - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*] 419d2e56019SSean Christopherson * - Don't crush a pending TLB flush in vmcb02 on nested VMRUN 420d2e56019SSean Christopherson * - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST 421d2e56019SSean Christopherson * 422d2e56019SSean Christopherson * [*] Unlike nested EPT, SVM's ASID management can invalidate nested 423d2e56019SSean Christopherson * NPT guest-physical mappings on VMRUN. 424d2e56019SSean Christopherson */ 425d2e56019SSean Christopherson kvm_make_request(KVM_REQ_MMU_SYNC, vcpu); 426d2e56019SSean Christopherson kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu); 427d2e56019SSean Christopherson } 428d2e56019SSean Christopherson 42962156f6cSVitaly Kuznetsov /* 430d82aaef9SVitaly Kuznetsov * Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true 431d82aaef9SVitaly Kuznetsov * if we are emulating VM-Entry into a guest with NPT enabled. 43262156f6cSVitaly Kuznetsov */ 43362156f6cSVitaly Kuznetsov static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, 434b222b0b8SMaxim Levitsky bool nested_npt, bool reload_pdptrs) 43562156f6cSVitaly Kuznetsov { 43611f0cbf0SSean Christopherson if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) 437a506fdd2SVitaly Kuznetsov return -EINVAL; 438a506fdd2SVitaly Kuznetsov 439b222b0b8SMaxim Levitsky if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) && 440a36dbec6SSean Christopherson CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) 441a506fdd2SVitaly Kuznetsov return -EINVAL; 442a506fdd2SVitaly Kuznetsov 443a506fdd2SVitaly Kuznetsov if (!nested_npt) 444b5129100SSean Christopherson kvm_mmu_new_pgd(vcpu, cr3); 445a506fdd2SVitaly Kuznetsov 446a506fdd2SVitaly Kuznetsov vcpu->arch.cr3 = cr3; 447a506fdd2SVitaly Kuznetsov kvm_register_mark_available(vcpu, VCPU_EXREG_CR3); 448a506fdd2SVitaly Kuznetsov 449616007c8SSean Christopherson /* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */ 450c9060662SSean Christopherson kvm_init_mmu(vcpu); 451a506fdd2SVitaly Kuznetsov 452a506fdd2SVitaly Kuznetsov return 0; 45362156f6cSVitaly Kuznetsov } 45462156f6cSVitaly Kuznetsov 4554995a368SCathy Avery void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm) 4564995a368SCathy Avery { 4574995a368SCathy Avery if (!svm->nested.vmcb02.ptr) 4584995a368SCathy Avery return; 4594995a368SCathy Avery 4604995a368SCathy Avery /* FIXME: merge g_pat from vmcb01 and vmcb12. */ 4614995a368SCathy Avery svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat; 4624995a368SCathy Avery } 4634995a368SCathy Avery 4649e8f0fbfSPaolo Bonzini static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12) 465883b0a91SJoerg Roedel { 4668173396eSCathy Avery bool new_vmcb12 = false; 4678173396eSCathy Avery 4684995a368SCathy Avery nested_vmcb02_compute_g_pat(svm); 4694995a368SCathy Avery 470883b0a91SJoerg Roedel /* Load the nested guest state */ 4718173396eSCathy Avery if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) { 4728173396eSCathy Avery new_vmcb12 = true; 4738173396eSCathy Avery svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa; 4748173396eSCathy Avery } 4758173396eSCathy Avery 4768173396eSCathy Avery if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) { 4770dd16b5bSMaxim Levitsky svm->vmcb->save.es = vmcb12->save.es; 4780dd16b5bSMaxim Levitsky svm->vmcb->save.cs = vmcb12->save.cs; 4790dd16b5bSMaxim Levitsky svm->vmcb->save.ss = vmcb12->save.ss; 4800dd16b5bSMaxim Levitsky svm->vmcb->save.ds = vmcb12->save.ds; 4814bb170a5SPaolo Bonzini svm->vmcb->save.cpl = vmcb12->save.cpl; 4824bb170a5SPaolo Bonzini vmcb_mark_dirty(svm->vmcb, VMCB_SEG); 4838173396eSCathy Avery } 4844bb170a5SPaolo Bonzini 4858173396eSCathy Avery if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) { 4860dd16b5bSMaxim Levitsky svm->vmcb->save.gdtr = vmcb12->save.gdtr; 4870dd16b5bSMaxim Levitsky svm->vmcb->save.idtr = vmcb12->save.idtr; 4884bb170a5SPaolo Bonzini vmcb_mark_dirty(svm->vmcb, VMCB_DT); 4898173396eSCathy Avery } 4904bb170a5SPaolo Bonzini 4918cce12b3SPaolo Bonzini kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED); 4923c346c0cSPaolo Bonzini 4933c346c0cSPaolo Bonzini /* 4943c346c0cSPaolo Bonzini * Force-set EFER_SVME even though it is checked earlier on the 4953c346c0cSPaolo Bonzini * VMCB12, because the guest can flip the bit between the check 4963c346c0cSPaolo Bonzini * and now. Clearing EFER_SVME would call svm_free_nested. 4973c346c0cSPaolo Bonzini */ 4983c346c0cSPaolo Bonzini svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME); 4993c346c0cSPaolo Bonzini 5000dd16b5bSMaxim Levitsky svm_set_cr0(&svm->vcpu, vmcb12->save.cr0); 5010dd16b5bSMaxim Levitsky svm_set_cr4(&svm->vcpu, vmcb12->save.cr4); 5024bb170a5SPaolo Bonzini 5034bb170a5SPaolo Bonzini svm->vcpu.arch.cr2 = vmcb12->save.cr2; 5048173396eSCathy Avery 5050dd16b5bSMaxim Levitsky kvm_rax_write(&svm->vcpu, vmcb12->save.rax); 5060dd16b5bSMaxim Levitsky kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp); 5070dd16b5bSMaxim Levitsky kvm_rip_write(&svm->vcpu, vmcb12->save.rip); 508883b0a91SJoerg Roedel 509883b0a91SJoerg Roedel /* In case we don't even reach vcpu_run, the fields are not updated */ 5100dd16b5bSMaxim Levitsky svm->vmcb->save.rax = vmcb12->save.rax; 5110dd16b5bSMaxim Levitsky svm->vmcb->save.rsp = vmcb12->save.rsp; 5120dd16b5bSMaxim Levitsky svm->vmcb->save.rip = vmcb12->save.rip; 5134bb170a5SPaolo Bonzini 5148173396eSCathy Avery /* These bits will be set properly on the first execution when new_vmc12 is true */ 5158173396eSCathy Avery if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) { 5168cce12b3SPaolo Bonzini svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1; 5179a3ecd5eSChenyi Qiang svm->vcpu.arch.dr6 = vmcb12->save.dr6 | DR6_ACTIVE_LOW; 5184bb170a5SPaolo Bonzini vmcb_mark_dirty(svm->vmcb, VMCB_DR); 519f241d711SPaolo Bonzini } 5208173396eSCathy Avery } 521883b0a91SJoerg Roedel 5229e8f0fbfSPaolo Bonzini static void nested_vmcb02_prepare_control(struct vcpu_svm *svm) 523f241d711SPaolo Bonzini { 5240f923e07SMaxim Levitsky const u32 int_ctl_vmcb01_bits = 5250f923e07SMaxim Levitsky V_INTR_MASKING_MASK | V_GIF_MASK | V_GIF_ENABLE_MASK; 5260f923e07SMaxim Levitsky 5270f923e07SMaxim Levitsky const u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK; 5280f923e07SMaxim Levitsky 529d2e56019SSean Christopherson struct kvm_vcpu *vcpu = &svm->vcpu; 53062156f6cSVitaly Kuznetsov 5317c3ecfcdSPaolo Bonzini /* 5327c3ecfcdSPaolo Bonzini * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2, 5337c3ecfcdSPaolo Bonzini * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes. 5347c3ecfcdSPaolo Bonzini */ 5354995a368SCathy Avery 5367c3ecfcdSPaolo Bonzini /* 5377c3ecfcdSPaolo Bonzini * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id, 5387c3ecfcdSPaolo Bonzini * avic_physical_id. 5397c3ecfcdSPaolo Bonzini */ 540feea0136SMaxim Levitsky WARN_ON(kvm_apicv_activated(svm->vcpu.kvm)); 5417c3ecfcdSPaolo Bonzini 5427c3ecfcdSPaolo Bonzini /* Copied from vmcb01. msrpm_base can be overwritten later. */ 5437c3ecfcdSPaolo Bonzini svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl; 5447c3ecfcdSPaolo Bonzini svm->vmcb->control.iopm_base_pa = svm->vmcb01.ptr->control.iopm_base_pa; 5457c3ecfcdSPaolo Bonzini svm->vmcb->control.msrpm_base_pa = svm->vmcb01.ptr->control.msrpm_base_pa; 5467c3ecfcdSPaolo Bonzini 5477c3ecfcdSPaolo Bonzini /* Done at vmrun: asid. */ 5487c3ecfcdSPaolo Bonzini 5497c3ecfcdSPaolo Bonzini /* Also overwritten later if necessary. */ 5507c3ecfcdSPaolo Bonzini svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING; 5517c3ecfcdSPaolo Bonzini 5527c3ecfcdSPaolo Bonzini /* nested_cr3. */ 55362156f6cSVitaly Kuznetsov if (nested_npt_enabled(svm)) 554d2e56019SSean Christopherson nested_svm_init_mmu_context(vcpu); 55569cb8774SPaolo Bonzini 5565228eb96SMaxim Levitsky vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset( 5575228eb96SMaxim Levitsky vcpu->arch.l1_tsc_offset, 5585228eb96SMaxim Levitsky svm->nested.ctl.tsc_offset, 5595228eb96SMaxim Levitsky svm->tsc_ratio_msr); 5605228eb96SMaxim Levitsky 5615228eb96SMaxim Levitsky svm->vmcb->control.tsc_offset = vcpu->arch.tsc_offset; 5625228eb96SMaxim Levitsky 5635228eb96SMaxim Levitsky if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) { 5645228eb96SMaxim Levitsky WARN_ON(!svm->tsc_scaling_enabled); 5655228eb96SMaxim Levitsky nested_svm_update_tsc_ratio_msr(vcpu); 5665228eb96SMaxim Levitsky } 567883b0a91SJoerg Roedel 56891b7130cSPaolo Bonzini svm->vmcb->control.int_ctl = 5690f923e07SMaxim Levitsky (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) | 5700f923e07SMaxim Levitsky (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits); 57191b7130cSPaolo Bonzini 572e670bf68SPaolo Bonzini svm->vmcb->control.int_vector = svm->nested.ctl.int_vector; 573e670bf68SPaolo Bonzini svm->vmcb->control.int_state = svm->nested.ctl.int_state; 574e670bf68SPaolo Bonzini svm->vmcb->control.event_inj = svm->nested.ctl.event_inj; 575e670bf68SPaolo Bonzini svm->vmcb->control.event_inj_err = svm->nested.ctl.event_inj_err; 576883b0a91SJoerg Roedel 577d2e56019SSean Christopherson nested_svm_transition_tlb_flush(vcpu); 578d2e56019SSean Christopherson 579883b0a91SJoerg Roedel /* Enter Guest-Mode */ 580d2e56019SSean Christopherson enter_guest_mode(vcpu); 581883b0a91SJoerg Roedel 582883b0a91SJoerg Roedel /* 583883b0a91SJoerg Roedel * Merge guest and host intercepts - must be called with vcpu in 5844bb170a5SPaolo Bonzini * guest-mode to take effect. 585883b0a91SJoerg Roedel */ 586883b0a91SJoerg Roedel recalc_intercepts(svm); 587f241d711SPaolo Bonzini } 588f241d711SPaolo Bonzini 589d00b99c5SBabu Moger static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb) 590d00b99c5SBabu Moger { 591d00b99c5SBabu Moger /* 592d00b99c5SBabu Moger * Some VMCB state is shared between L1 and L2 and thus has to be 593d00b99c5SBabu Moger * moved at the time of nested vmrun and vmexit. 594d00b99c5SBabu Moger * 595d00b99c5SBabu Moger * VMLOAD/VMSAVE state would also belong in this category, but KVM 596d00b99c5SBabu Moger * always performs VMLOAD and VMSAVE from the VMCB01. 597d00b99c5SBabu Moger */ 598d00b99c5SBabu Moger to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl; 599d00b99c5SBabu Moger } 600d00b99c5SBabu Moger 60163129754SPaolo Bonzini int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, 602e85d3e7bSMaxim Levitsky struct vmcb *vmcb12, bool from_vmrun) 603f241d711SPaolo Bonzini { 60463129754SPaolo Bonzini struct vcpu_svm *svm = to_svm(vcpu); 605a506fdd2SVitaly Kuznetsov int ret; 606a506fdd2SVitaly Kuznetsov 607954f419bSMaxim Levitsky trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa, 608954f419bSMaxim Levitsky vmcb12->save.rip, 609954f419bSMaxim Levitsky vmcb12->control.int_ctl, 610954f419bSMaxim Levitsky vmcb12->control.event_inj, 611954f419bSMaxim Levitsky vmcb12->control.nested_ctl); 612954f419bSMaxim Levitsky 613954f419bSMaxim Levitsky trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff, 614954f419bSMaxim Levitsky vmcb12->control.intercepts[INTERCEPT_CR] >> 16, 615954f419bSMaxim Levitsky vmcb12->control.intercepts[INTERCEPT_EXCEPTION], 616954f419bSMaxim Levitsky vmcb12->control.intercepts[INTERCEPT_WORD3], 617954f419bSMaxim Levitsky vmcb12->control.intercepts[INTERCEPT_WORD4], 618954f419bSMaxim Levitsky vmcb12->control.intercepts[INTERCEPT_WORD5]); 619954f419bSMaxim Levitsky 620954f419bSMaxim Levitsky 6210dd16b5bSMaxim Levitsky svm->nested.vmcb12_gpa = vmcb12_gpa; 6224995a368SCathy Avery 6234995a368SCathy Avery WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr); 6244995a368SCathy Avery 625d00b99c5SBabu Moger nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr); 6264995a368SCathy Avery 6274995a368SCathy Avery svm_switch_vmcb(svm, &svm->nested.vmcb02); 6289e8f0fbfSPaolo Bonzini nested_vmcb02_prepare_control(svm); 6299e8f0fbfSPaolo Bonzini nested_vmcb02_prepare_save(svm, vmcb12); 630f241d711SPaolo Bonzini 6310dd16b5bSMaxim Levitsky ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3, 632e85d3e7bSMaxim Levitsky nested_npt_enabled(svm), from_vmrun); 633a506fdd2SVitaly Kuznetsov if (ret) 634a506fdd2SVitaly Kuznetsov return ret; 635a506fdd2SVitaly Kuznetsov 636a04aead1SPaolo Bonzini if (!npt_enabled) 63763129754SPaolo Bonzini vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested; 638a04aead1SPaolo Bonzini 639e85d3e7bSMaxim Levitsky if (!from_vmrun) 640e85d3e7bSMaxim Levitsky kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); 641e85d3e7bSMaxim Levitsky 642ffdf7f9eSPaolo Bonzini svm_set_gif(svm, true); 64359cd9bc5SVitaly Kuznetsov 64459cd9bc5SVitaly Kuznetsov return 0; 645883b0a91SJoerg Roedel } 646883b0a91SJoerg Roedel 64763129754SPaolo Bonzini int nested_svm_vmrun(struct kvm_vcpu *vcpu) 648883b0a91SJoerg Roedel { 64963129754SPaolo Bonzini struct vcpu_svm *svm = to_svm(vcpu); 650883b0a91SJoerg Roedel int ret; 6510dd16b5bSMaxim Levitsky struct vmcb *vmcb12; 652883b0a91SJoerg Roedel struct kvm_host_map map; 6530dd16b5bSMaxim Levitsky u64 vmcb12_gpa; 654883b0a91SJoerg Roedel 655fb79f566SVitaly Kuznetsov if (!svm->nested.hsave_msr) { 656fb79f566SVitaly Kuznetsov kvm_inject_gp(vcpu, 0); 657fb79f566SVitaly Kuznetsov return 1; 658fb79f566SVitaly Kuznetsov } 659fb79f566SVitaly Kuznetsov 66063129754SPaolo Bonzini if (is_smm(vcpu)) { 66163129754SPaolo Bonzini kvm_queue_exception(vcpu, UD_VECTOR); 6627c67f546SPaolo Bonzini return 1; 6637c67f546SPaolo Bonzini } 664883b0a91SJoerg Roedel 6650dd16b5bSMaxim Levitsky vmcb12_gpa = svm->vmcb->save.rax; 66663129754SPaolo Bonzini ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map); 667883b0a91SJoerg Roedel if (ret == -EINVAL) { 66863129754SPaolo Bonzini kvm_inject_gp(vcpu, 0); 669883b0a91SJoerg Roedel return 1; 670883b0a91SJoerg Roedel } else if (ret) { 67163129754SPaolo Bonzini return kvm_skip_emulated_instruction(vcpu); 672883b0a91SJoerg Roedel } 673883b0a91SJoerg Roedel 67463129754SPaolo Bonzini ret = kvm_skip_emulated_instruction(vcpu); 675883b0a91SJoerg Roedel 6760dd16b5bSMaxim Levitsky vmcb12 = map.hva; 677883b0a91SJoerg Roedel 6782fcf4876SMaxim Levitsky if (WARN_ON_ONCE(!svm->nested.initialized)) 6792fcf4876SMaxim Levitsky return -EINVAL; 6802fcf4876SMaxim Levitsky 681cb9b6a1bSPaolo Bonzini nested_load_control_from_vmcb12(svm, &vmcb12->control); 682cb9b6a1bSPaolo Bonzini 683cb9b6a1bSPaolo Bonzini if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) || 684ee695f22SKrish Sadhukhan !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) { 6850dd16b5bSMaxim Levitsky vmcb12->control.exit_code = SVM_EXIT_ERR; 6860dd16b5bSMaxim Levitsky vmcb12->control.exit_code_hi = 0; 6870dd16b5bSMaxim Levitsky vmcb12->control.exit_info_1 = 0; 6880dd16b5bSMaxim Levitsky vmcb12->control.exit_info_2 = 0; 68969c9dfa2SPaolo Bonzini goto out; 690883b0a91SJoerg Roedel } 691883b0a91SJoerg Roedel 692883b0a91SJoerg Roedel /* 6934995a368SCathy Avery * Since vmcb01 is not in use, we can use it to store some of the L1 6944995a368SCathy Avery * state. 695883b0a91SJoerg Roedel */ 69663129754SPaolo Bonzini svm->vmcb01.ptr->save.efer = vcpu->arch.efer; 69763129754SPaolo Bonzini svm->vmcb01.ptr->save.cr0 = kvm_read_cr0(vcpu); 69863129754SPaolo Bonzini svm->vmcb01.ptr->save.cr4 = vcpu->arch.cr4; 69963129754SPaolo Bonzini svm->vmcb01.ptr->save.rflags = kvm_get_rflags(vcpu); 70063129754SPaolo Bonzini svm->vmcb01.ptr->save.rip = kvm_rip_read(vcpu); 701883b0a91SJoerg Roedel 7024995a368SCathy Avery if (!npt_enabled) 70363129754SPaolo Bonzini svm->vmcb01.ptr->save.cr3 = kvm_read_cr3(vcpu); 704883b0a91SJoerg Roedel 705f74f9414SPaolo Bonzini svm->nested.nested_run_pending = 1; 706883b0a91SJoerg Roedel 707e85d3e7bSMaxim Levitsky if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true)) 70859cd9bc5SVitaly Kuznetsov goto out_exit_err; 70959cd9bc5SVitaly Kuznetsov 71059cd9bc5SVitaly Kuznetsov if (nested_svm_vmrun_msrpm(svm)) 71159cd9bc5SVitaly Kuznetsov goto out; 71259cd9bc5SVitaly Kuznetsov 71359cd9bc5SVitaly Kuznetsov out_exit_err: 714ebdb3dbaSVitaly Kuznetsov svm->nested.nested_run_pending = 0; 715ebdb3dbaSVitaly Kuznetsov 716883b0a91SJoerg Roedel svm->vmcb->control.exit_code = SVM_EXIT_ERR; 717883b0a91SJoerg Roedel svm->vmcb->control.exit_code_hi = 0; 718883b0a91SJoerg Roedel svm->vmcb->control.exit_info_1 = 0; 719883b0a91SJoerg Roedel svm->vmcb->control.exit_info_2 = 0; 720883b0a91SJoerg Roedel 721883b0a91SJoerg Roedel nested_svm_vmexit(svm); 722883b0a91SJoerg Roedel 72369c9dfa2SPaolo Bonzini out: 72463129754SPaolo Bonzini kvm_vcpu_unmap(vcpu, &map, true); 72569c9dfa2SPaolo Bonzini 726883b0a91SJoerg Roedel return ret; 727883b0a91SJoerg Roedel } 728883b0a91SJoerg Roedel 7290a758290SVitaly Kuznetsov /* Copy state save area fields which are handled by VMRUN */ 7302bb16beaSVitaly Kuznetsov void svm_copy_vmrun_state(struct vmcb_save_area *to_save, 7312bb16beaSVitaly Kuznetsov struct vmcb_save_area *from_save) 7320a758290SVitaly Kuznetsov { 7330a758290SVitaly Kuznetsov to_save->es = from_save->es; 7340a758290SVitaly Kuznetsov to_save->cs = from_save->cs; 7350a758290SVitaly Kuznetsov to_save->ss = from_save->ss; 7360a758290SVitaly Kuznetsov to_save->ds = from_save->ds; 7370a758290SVitaly Kuznetsov to_save->gdtr = from_save->gdtr; 7380a758290SVitaly Kuznetsov to_save->idtr = from_save->idtr; 7390a758290SVitaly Kuznetsov to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED; 7400a758290SVitaly Kuznetsov to_save->efer = from_save->efer; 7410a758290SVitaly Kuznetsov to_save->cr0 = from_save->cr0; 7420a758290SVitaly Kuznetsov to_save->cr3 = from_save->cr3; 7430a758290SVitaly Kuznetsov to_save->cr4 = from_save->cr4; 7440a758290SVitaly Kuznetsov to_save->rax = from_save->rax; 7450a758290SVitaly Kuznetsov to_save->rsp = from_save->rsp; 7460a758290SVitaly Kuznetsov to_save->rip = from_save->rip; 7470a758290SVitaly Kuznetsov to_save->cpl = 0; 7480a758290SVitaly Kuznetsov } 7490a758290SVitaly Kuznetsov 7502bb16beaSVitaly Kuznetsov void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb) 751883b0a91SJoerg Roedel { 752883b0a91SJoerg Roedel to_vmcb->save.fs = from_vmcb->save.fs; 753883b0a91SJoerg Roedel to_vmcb->save.gs = from_vmcb->save.gs; 754883b0a91SJoerg Roedel to_vmcb->save.tr = from_vmcb->save.tr; 755883b0a91SJoerg Roedel to_vmcb->save.ldtr = from_vmcb->save.ldtr; 756883b0a91SJoerg Roedel to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base; 757883b0a91SJoerg Roedel to_vmcb->save.star = from_vmcb->save.star; 758883b0a91SJoerg Roedel to_vmcb->save.lstar = from_vmcb->save.lstar; 759883b0a91SJoerg Roedel to_vmcb->save.cstar = from_vmcb->save.cstar; 760883b0a91SJoerg Roedel to_vmcb->save.sfmask = from_vmcb->save.sfmask; 761883b0a91SJoerg Roedel to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs; 762883b0a91SJoerg Roedel to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp; 763883b0a91SJoerg Roedel to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip; 764883b0a91SJoerg Roedel } 765883b0a91SJoerg Roedel 766883b0a91SJoerg Roedel int nested_svm_vmexit(struct vcpu_svm *svm) 767883b0a91SJoerg Roedel { 76863129754SPaolo Bonzini struct kvm_vcpu *vcpu = &svm->vcpu; 7690dd16b5bSMaxim Levitsky struct vmcb *vmcb12; 770883b0a91SJoerg Roedel struct vmcb *vmcb = svm->vmcb; 771883b0a91SJoerg Roedel struct kvm_host_map map; 77263129754SPaolo Bonzini int rc; 773883b0a91SJoerg Roedel 774cb6a32c2SSean Christopherson /* Triple faults in L2 should never escape. */ 775cb6a32c2SSean Christopherson WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)); 776cb6a32c2SSean Christopherson 77763129754SPaolo Bonzini rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map); 778883b0a91SJoerg Roedel if (rc) { 779883b0a91SJoerg Roedel if (rc == -EINVAL) 78063129754SPaolo Bonzini kvm_inject_gp(vcpu, 0); 781883b0a91SJoerg Roedel return 1; 782883b0a91SJoerg Roedel } 783883b0a91SJoerg Roedel 7840dd16b5bSMaxim Levitsky vmcb12 = map.hva; 785883b0a91SJoerg Roedel 786883b0a91SJoerg Roedel /* Exit Guest-Mode */ 78763129754SPaolo Bonzini leave_guest_mode(vcpu); 7880dd16b5bSMaxim Levitsky svm->nested.vmcb12_gpa = 0; 7892d8a42beSPaolo Bonzini WARN_ON_ONCE(svm->nested.nested_run_pending); 790883b0a91SJoerg Roedel 79163129754SPaolo Bonzini kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); 792f2c7ef3bSMaxim Levitsky 79338c0b192SPaolo Bonzini /* in case we halted in L2 */ 79438c0b192SPaolo Bonzini svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE; 79538c0b192SPaolo Bonzini 796883b0a91SJoerg Roedel /* Give the current vmcb to the guest */ 797883b0a91SJoerg Roedel 7980dd16b5bSMaxim Levitsky vmcb12->save.es = vmcb->save.es; 7990dd16b5bSMaxim Levitsky vmcb12->save.cs = vmcb->save.cs; 8000dd16b5bSMaxim Levitsky vmcb12->save.ss = vmcb->save.ss; 8010dd16b5bSMaxim Levitsky vmcb12->save.ds = vmcb->save.ds; 8020dd16b5bSMaxim Levitsky vmcb12->save.gdtr = vmcb->save.gdtr; 8030dd16b5bSMaxim Levitsky vmcb12->save.idtr = vmcb->save.idtr; 8040dd16b5bSMaxim Levitsky vmcb12->save.efer = svm->vcpu.arch.efer; 80563129754SPaolo Bonzini vmcb12->save.cr0 = kvm_read_cr0(vcpu); 80663129754SPaolo Bonzini vmcb12->save.cr3 = kvm_read_cr3(vcpu); 8070dd16b5bSMaxim Levitsky vmcb12->save.cr2 = vmcb->save.cr2; 8080dd16b5bSMaxim Levitsky vmcb12->save.cr4 = svm->vcpu.arch.cr4; 80963129754SPaolo Bonzini vmcb12->save.rflags = kvm_get_rflags(vcpu); 81063129754SPaolo Bonzini vmcb12->save.rip = kvm_rip_read(vcpu); 81163129754SPaolo Bonzini vmcb12->save.rsp = kvm_rsp_read(vcpu); 81263129754SPaolo Bonzini vmcb12->save.rax = kvm_rax_read(vcpu); 8130dd16b5bSMaxim Levitsky vmcb12->save.dr7 = vmcb->save.dr7; 8140dd16b5bSMaxim Levitsky vmcb12->save.dr6 = svm->vcpu.arch.dr6; 8150dd16b5bSMaxim Levitsky vmcb12->save.cpl = vmcb->save.cpl; 816883b0a91SJoerg Roedel 8170dd16b5bSMaxim Levitsky vmcb12->control.int_state = vmcb->control.int_state; 8180dd16b5bSMaxim Levitsky vmcb12->control.exit_code = vmcb->control.exit_code; 8190dd16b5bSMaxim Levitsky vmcb12->control.exit_code_hi = vmcb->control.exit_code_hi; 8200dd16b5bSMaxim Levitsky vmcb12->control.exit_info_1 = vmcb->control.exit_info_1; 8210dd16b5bSMaxim Levitsky vmcb12->control.exit_info_2 = vmcb->control.exit_info_2; 82236e2e983SPaolo Bonzini 8230dd16b5bSMaxim Levitsky if (vmcb12->control.exit_code != SVM_EXIT_ERR) 8249e8f0fbfSPaolo Bonzini nested_save_pending_event_to_vmcb12(svm, vmcb12); 825883b0a91SJoerg Roedel 826883b0a91SJoerg Roedel if (svm->nrips_enabled) 8270dd16b5bSMaxim Levitsky vmcb12->control.next_rip = vmcb->control.next_rip; 828883b0a91SJoerg Roedel 8290dd16b5bSMaxim Levitsky vmcb12->control.int_ctl = svm->nested.ctl.int_ctl; 8300dd16b5bSMaxim Levitsky vmcb12->control.tlb_ctl = svm->nested.ctl.tlb_ctl; 8310dd16b5bSMaxim Levitsky vmcb12->control.event_inj = svm->nested.ctl.event_inj; 8320dd16b5bSMaxim Levitsky vmcb12->control.event_inj_err = svm->nested.ctl.event_inj_err; 833883b0a91SJoerg Roedel 834d00b99c5SBabu Moger nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr); 835d00b99c5SBabu Moger 8364995a368SCathy Avery svm_switch_vmcb(svm, &svm->vmcb01); 8374995a368SCathy Avery 8384995a368SCathy Avery /* 8394995a368SCathy Avery * On vmexit the GIF is set to false and 8404995a368SCathy Avery * no event can be injected in L1. 8414995a368SCathy Avery */ 8429883764aSMaxim Levitsky svm_set_gif(svm, false); 8434995a368SCathy Avery svm->vmcb->control.exit_int_info = 0; 8449883764aSMaxim Levitsky 8457ca62d13SPaolo Bonzini svm->vcpu.arch.tsc_offset = svm->vcpu.arch.l1_tsc_offset; 8467ca62d13SPaolo Bonzini if (svm->vmcb->control.tsc_offset != svm->vcpu.arch.tsc_offset) { 8477ca62d13SPaolo Bonzini svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset; 8487ca62d13SPaolo Bonzini vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS); 8497ca62d13SPaolo Bonzini } 85018fc6c55SPaolo Bonzini 8515228eb96SMaxim Levitsky if (svm->tsc_ratio_msr != kvm_default_tsc_scaling_ratio) { 8525228eb96SMaxim Levitsky WARN_ON(!svm->tsc_scaling_enabled); 8535228eb96SMaxim Levitsky vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio; 8545228eb96SMaxim Levitsky svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio); 8555228eb96SMaxim Levitsky } 8565228eb96SMaxim Levitsky 857e670bf68SPaolo Bonzini svm->nested.ctl.nested_cr3 = 0; 858883b0a91SJoerg Roedel 8594995a368SCathy Avery /* 8604995a368SCathy Avery * Restore processor state that had been saved in vmcb01 8614995a368SCathy Avery */ 86263129754SPaolo Bonzini kvm_set_rflags(vcpu, svm->vmcb->save.rflags); 86363129754SPaolo Bonzini svm_set_efer(vcpu, svm->vmcb->save.efer); 86463129754SPaolo Bonzini svm_set_cr0(vcpu, svm->vmcb->save.cr0 | X86_CR0_PE); 86563129754SPaolo Bonzini svm_set_cr4(vcpu, svm->vmcb->save.cr4); 86663129754SPaolo Bonzini kvm_rax_write(vcpu, svm->vmcb->save.rax); 86763129754SPaolo Bonzini kvm_rsp_write(vcpu, svm->vmcb->save.rsp); 86863129754SPaolo Bonzini kvm_rip_write(vcpu, svm->vmcb->save.rip); 8694995a368SCathy Avery 8704995a368SCathy Avery svm->vcpu.arch.dr7 = DR7_FIXED_1; 8714995a368SCathy Avery kvm_update_dr7(&svm->vcpu); 872883b0a91SJoerg Roedel 8730dd16b5bSMaxim Levitsky trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code, 8740dd16b5bSMaxim Levitsky vmcb12->control.exit_info_1, 8750dd16b5bSMaxim Levitsky vmcb12->control.exit_info_2, 8760dd16b5bSMaxim Levitsky vmcb12->control.exit_int_info, 8770dd16b5bSMaxim Levitsky vmcb12->control.exit_int_info_err, 87836e2e983SPaolo Bonzini KVM_ISA_SVM); 87936e2e983SPaolo Bonzini 88063129754SPaolo Bonzini kvm_vcpu_unmap(vcpu, &map, true); 881883b0a91SJoerg Roedel 882d2e56019SSean Christopherson nested_svm_transition_tlb_flush(vcpu); 883d2e56019SSean Christopherson 88463129754SPaolo Bonzini nested_svm_uninit_mmu_context(vcpu); 885bf7dea42SVitaly Kuznetsov 886b222b0b8SMaxim Levitsky rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false, true); 887d82aaef9SVitaly Kuznetsov if (rc) 888d82aaef9SVitaly Kuznetsov return 1; 889bf7dea42SVitaly Kuznetsov 890883b0a91SJoerg Roedel /* 891883b0a91SJoerg Roedel * Drop what we picked up for L2 via svm_complete_interrupts() so it 892883b0a91SJoerg Roedel * doesn't end up in L1. 893883b0a91SJoerg Roedel */ 894883b0a91SJoerg Roedel svm->vcpu.arch.nmi_injected = false; 89563129754SPaolo Bonzini kvm_clear_exception_queue(vcpu); 89663129754SPaolo Bonzini kvm_clear_interrupt_queue(vcpu); 897883b0a91SJoerg Roedel 8989a7de6ecSKrish Sadhukhan /* 8999a7de6ecSKrish Sadhukhan * If we are here following the completion of a VMRUN that 9009a7de6ecSKrish Sadhukhan * is being single-stepped, queue the pending #DB intercept 9019a7de6ecSKrish Sadhukhan * right now so that it an be accounted for before we execute 9029a7de6ecSKrish Sadhukhan * L1's next instruction. 9039a7de6ecSKrish Sadhukhan */ 9049a7de6ecSKrish Sadhukhan if (unlikely(svm->vmcb->save.rflags & X86_EFLAGS_TF)) 9059a7de6ecSKrish Sadhukhan kvm_queue_exception(&(svm->vcpu), DB_VECTOR); 9069a7de6ecSKrish Sadhukhan 907883b0a91SJoerg Roedel return 0; 908883b0a91SJoerg Roedel } 909883b0a91SJoerg Roedel 910cb6a32c2SSean Christopherson static void nested_svm_triple_fault(struct kvm_vcpu *vcpu) 911cb6a32c2SSean Christopherson { 9123a87c7e0SSean Christopherson nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN); 913cb6a32c2SSean Christopherson } 914cb6a32c2SSean Christopherson 9152fcf4876SMaxim Levitsky int svm_allocate_nested(struct vcpu_svm *svm) 9162fcf4876SMaxim Levitsky { 9174995a368SCathy Avery struct page *vmcb02_page; 9182fcf4876SMaxim Levitsky 9192fcf4876SMaxim Levitsky if (svm->nested.initialized) 9202fcf4876SMaxim Levitsky return 0; 9212fcf4876SMaxim Levitsky 9224995a368SCathy Avery vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO); 9234995a368SCathy Avery if (!vmcb02_page) 9242fcf4876SMaxim Levitsky return -ENOMEM; 9254995a368SCathy Avery svm->nested.vmcb02.ptr = page_address(vmcb02_page); 9264995a368SCathy Avery svm->nested.vmcb02.pa = __sme_set(page_to_pfn(vmcb02_page) << PAGE_SHIFT); 9272fcf4876SMaxim Levitsky 9282fcf4876SMaxim Levitsky svm->nested.msrpm = svm_vcpu_alloc_msrpm(); 9292fcf4876SMaxim Levitsky if (!svm->nested.msrpm) 9304995a368SCathy Avery goto err_free_vmcb02; 9312fcf4876SMaxim Levitsky svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm); 9322fcf4876SMaxim Levitsky 9332fcf4876SMaxim Levitsky svm->nested.initialized = true; 9342fcf4876SMaxim Levitsky return 0; 9352fcf4876SMaxim Levitsky 9364995a368SCathy Avery err_free_vmcb02: 9374995a368SCathy Avery __free_page(vmcb02_page); 9382fcf4876SMaxim Levitsky return -ENOMEM; 9392fcf4876SMaxim Levitsky } 9402fcf4876SMaxim Levitsky 9412fcf4876SMaxim Levitsky void svm_free_nested(struct vcpu_svm *svm) 9422fcf4876SMaxim Levitsky { 9432fcf4876SMaxim Levitsky if (!svm->nested.initialized) 9442fcf4876SMaxim Levitsky return; 9452fcf4876SMaxim Levitsky 9462fcf4876SMaxim Levitsky svm_vcpu_free_msrpm(svm->nested.msrpm); 9472fcf4876SMaxim Levitsky svm->nested.msrpm = NULL; 9482fcf4876SMaxim Levitsky 9494995a368SCathy Avery __free_page(virt_to_page(svm->nested.vmcb02.ptr)); 9504995a368SCathy Avery svm->nested.vmcb02.ptr = NULL; 9512fcf4876SMaxim Levitsky 952c74ad08fSMaxim Levitsky /* 953c74ad08fSMaxim Levitsky * When last_vmcb12_gpa matches the current vmcb12 gpa, 954c74ad08fSMaxim Levitsky * some vmcb12 fields are not loaded if they are marked clean 955c74ad08fSMaxim Levitsky * in the vmcb12, since in this case they are up to date already. 956c74ad08fSMaxim Levitsky * 957c74ad08fSMaxim Levitsky * When the vmcb02 is freed, this optimization becomes invalid. 958c74ad08fSMaxim Levitsky */ 959c74ad08fSMaxim Levitsky svm->nested.last_vmcb12_gpa = INVALID_GPA; 960c74ad08fSMaxim Levitsky 9612fcf4876SMaxim Levitsky svm->nested.initialized = false; 9622fcf4876SMaxim Levitsky } 9632fcf4876SMaxim Levitsky 964c513f484SPaolo Bonzini /* 965c513f484SPaolo Bonzini * Forcibly leave nested mode in order to be able to reset the VCPU later on. 966c513f484SPaolo Bonzini */ 967c513f484SPaolo Bonzini void svm_leave_nested(struct vcpu_svm *svm) 968c513f484SPaolo Bonzini { 96963129754SPaolo Bonzini struct kvm_vcpu *vcpu = &svm->vcpu; 97063129754SPaolo Bonzini 97163129754SPaolo Bonzini if (is_guest_mode(vcpu)) { 972c513f484SPaolo Bonzini svm->nested.nested_run_pending = 0; 973c74ad08fSMaxim Levitsky svm->nested.vmcb12_gpa = INVALID_GPA; 974c74ad08fSMaxim Levitsky 97563129754SPaolo Bonzini leave_guest_mode(vcpu); 9764995a368SCathy Avery 977deee59baSMaxim Levitsky svm_switch_vmcb(svm, &svm->vmcb01); 9784995a368SCathy Avery 97963129754SPaolo Bonzini nested_svm_uninit_mmu_context(vcpu); 98056fe28deSMaxim Levitsky vmcb_mark_all_dirty(svm->vmcb); 981c513f484SPaolo Bonzini } 982a7d5c7ceSPaolo Bonzini 98363129754SPaolo Bonzini kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); 984c513f484SPaolo Bonzini } 985c513f484SPaolo Bonzini 986883b0a91SJoerg Roedel static int nested_svm_exit_handled_msr(struct vcpu_svm *svm) 987883b0a91SJoerg Roedel { 988883b0a91SJoerg Roedel u32 offset, msr, value; 989883b0a91SJoerg Roedel int write, mask; 990883b0a91SJoerg Roedel 991c62e2e94SBabu Moger if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT))) 992883b0a91SJoerg Roedel return NESTED_EXIT_HOST; 993883b0a91SJoerg Roedel 994883b0a91SJoerg Roedel msr = svm->vcpu.arch.regs[VCPU_REGS_RCX]; 995883b0a91SJoerg Roedel offset = svm_msrpm_offset(msr); 996883b0a91SJoerg Roedel write = svm->vmcb->control.exit_info_1 & 1; 997883b0a91SJoerg Roedel mask = 1 << ((2 * (msr & 0xf)) + write); 998883b0a91SJoerg Roedel 999883b0a91SJoerg Roedel if (offset == MSR_INVALID) 1000883b0a91SJoerg Roedel return NESTED_EXIT_DONE; 1001883b0a91SJoerg Roedel 1002883b0a91SJoerg Roedel /* Offset is in 32 bit units but need in 8 bit units */ 1003883b0a91SJoerg Roedel offset *= 4; 1004883b0a91SJoerg Roedel 1005e670bf68SPaolo Bonzini if (kvm_vcpu_read_guest(&svm->vcpu, svm->nested.ctl.msrpm_base_pa + offset, &value, 4)) 1006883b0a91SJoerg Roedel return NESTED_EXIT_DONE; 1007883b0a91SJoerg Roedel 1008883b0a91SJoerg Roedel return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 1009883b0a91SJoerg Roedel } 1010883b0a91SJoerg Roedel 1011883b0a91SJoerg Roedel static int nested_svm_intercept_ioio(struct vcpu_svm *svm) 1012883b0a91SJoerg Roedel { 1013883b0a91SJoerg Roedel unsigned port, size, iopm_len; 1014883b0a91SJoerg Roedel u16 val, mask; 1015883b0a91SJoerg Roedel u8 start_bit; 1016883b0a91SJoerg Roedel u64 gpa; 1017883b0a91SJoerg Roedel 1018c62e2e94SBabu Moger if (!(vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT))) 1019883b0a91SJoerg Roedel return NESTED_EXIT_HOST; 1020883b0a91SJoerg Roedel 1021883b0a91SJoerg Roedel port = svm->vmcb->control.exit_info_1 >> 16; 1022883b0a91SJoerg Roedel size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >> 1023883b0a91SJoerg Roedel SVM_IOIO_SIZE_SHIFT; 1024e670bf68SPaolo Bonzini gpa = svm->nested.ctl.iopm_base_pa + (port / 8); 1025883b0a91SJoerg Roedel start_bit = port % 8; 1026883b0a91SJoerg Roedel iopm_len = (start_bit + size > 8) ? 2 : 1; 1027883b0a91SJoerg Roedel mask = (0xf >> (4 - size)) << start_bit; 1028883b0a91SJoerg Roedel val = 0; 1029883b0a91SJoerg Roedel 1030883b0a91SJoerg Roedel if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len)) 1031883b0a91SJoerg Roedel return NESTED_EXIT_DONE; 1032883b0a91SJoerg Roedel 1033883b0a91SJoerg Roedel return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST; 1034883b0a91SJoerg Roedel } 1035883b0a91SJoerg Roedel 1036883b0a91SJoerg Roedel static int nested_svm_intercept(struct vcpu_svm *svm) 1037883b0a91SJoerg Roedel { 1038883b0a91SJoerg Roedel u32 exit_code = svm->vmcb->control.exit_code; 1039883b0a91SJoerg Roedel int vmexit = NESTED_EXIT_HOST; 1040883b0a91SJoerg Roedel 1041883b0a91SJoerg Roedel switch (exit_code) { 1042883b0a91SJoerg Roedel case SVM_EXIT_MSR: 1043883b0a91SJoerg Roedel vmexit = nested_svm_exit_handled_msr(svm); 1044883b0a91SJoerg Roedel break; 1045883b0a91SJoerg Roedel case SVM_EXIT_IOIO: 1046883b0a91SJoerg Roedel vmexit = nested_svm_intercept_ioio(svm); 1047883b0a91SJoerg Roedel break; 1048883b0a91SJoerg Roedel case SVM_EXIT_READ_CR0 ... SVM_EXIT_WRITE_CR8: { 104903bfeeb9SBabu Moger if (vmcb_is_intercept(&svm->nested.ctl, exit_code)) 1050883b0a91SJoerg Roedel vmexit = NESTED_EXIT_DONE; 1051883b0a91SJoerg Roedel break; 1052883b0a91SJoerg Roedel } 1053883b0a91SJoerg Roedel case SVM_EXIT_READ_DR0 ... SVM_EXIT_WRITE_DR7: { 105430abaa88SBabu Moger if (vmcb_is_intercept(&svm->nested.ctl, exit_code)) 1055883b0a91SJoerg Roedel vmexit = NESTED_EXIT_DONE; 1056883b0a91SJoerg Roedel break; 1057883b0a91SJoerg Roedel } 1058883b0a91SJoerg Roedel case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { 10597c86663bSPaolo Bonzini /* 10607c86663bSPaolo Bonzini * Host-intercepted exceptions have been checked already in 10617c86663bSPaolo Bonzini * nested_svm_exit_special. There is nothing to do here, 10627c86663bSPaolo Bonzini * the vmexit is injected by svm_check_nested_events. 10637c86663bSPaolo Bonzini */ 1064883b0a91SJoerg Roedel vmexit = NESTED_EXIT_DONE; 1065883b0a91SJoerg Roedel break; 1066883b0a91SJoerg Roedel } 1067883b0a91SJoerg Roedel case SVM_EXIT_ERR: { 1068883b0a91SJoerg Roedel vmexit = NESTED_EXIT_DONE; 1069883b0a91SJoerg Roedel break; 1070883b0a91SJoerg Roedel } 1071883b0a91SJoerg Roedel default: { 1072c62e2e94SBabu Moger if (vmcb_is_intercept(&svm->nested.ctl, exit_code)) 1073883b0a91SJoerg Roedel vmexit = NESTED_EXIT_DONE; 1074883b0a91SJoerg Roedel } 1075883b0a91SJoerg Roedel } 1076883b0a91SJoerg Roedel 1077883b0a91SJoerg Roedel return vmexit; 1078883b0a91SJoerg Roedel } 1079883b0a91SJoerg Roedel 1080883b0a91SJoerg Roedel int nested_svm_exit_handled(struct vcpu_svm *svm) 1081883b0a91SJoerg Roedel { 1082883b0a91SJoerg Roedel int vmexit; 1083883b0a91SJoerg Roedel 1084883b0a91SJoerg Roedel vmexit = nested_svm_intercept(svm); 1085883b0a91SJoerg Roedel 1086883b0a91SJoerg Roedel if (vmexit == NESTED_EXIT_DONE) 1087883b0a91SJoerg Roedel nested_svm_vmexit(svm); 1088883b0a91SJoerg Roedel 1089883b0a91SJoerg Roedel return vmexit; 1090883b0a91SJoerg Roedel } 1091883b0a91SJoerg Roedel 109263129754SPaolo Bonzini int nested_svm_check_permissions(struct kvm_vcpu *vcpu) 1093883b0a91SJoerg Roedel { 109463129754SPaolo Bonzini if (!(vcpu->arch.efer & EFER_SVME) || !is_paging(vcpu)) { 109563129754SPaolo Bonzini kvm_queue_exception(vcpu, UD_VECTOR); 1096883b0a91SJoerg Roedel return 1; 1097883b0a91SJoerg Roedel } 1098883b0a91SJoerg Roedel 109963129754SPaolo Bonzini if (to_svm(vcpu)->vmcb->save.cpl) { 110063129754SPaolo Bonzini kvm_inject_gp(vcpu, 0); 1101883b0a91SJoerg Roedel return 1; 1102883b0a91SJoerg Roedel } 1103883b0a91SJoerg Roedel 1104883b0a91SJoerg Roedel return 0; 1105883b0a91SJoerg Roedel } 1106883b0a91SJoerg Roedel 11077c86663bSPaolo Bonzini static bool nested_exit_on_exception(struct vcpu_svm *svm) 1108883b0a91SJoerg Roedel { 11097c86663bSPaolo Bonzini unsigned int nr = svm->vcpu.arch.exception.nr; 1110883b0a91SJoerg Roedel 11119780d51dSBabu Moger return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(nr)); 11127c86663bSPaolo Bonzini } 1113883b0a91SJoerg Roedel 11147c86663bSPaolo Bonzini static void nested_svm_inject_exception_vmexit(struct vcpu_svm *svm) 11157c86663bSPaolo Bonzini { 11167c86663bSPaolo Bonzini unsigned int nr = svm->vcpu.arch.exception.nr; 1117883b0a91SJoerg Roedel 1118883b0a91SJoerg Roedel svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; 1119883b0a91SJoerg Roedel svm->vmcb->control.exit_code_hi = 0; 11207c86663bSPaolo Bonzini 11217c86663bSPaolo Bonzini if (svm->vcpu.arch.exception.has_error_code) 11227c86663bSPaolo Bonzini svm->vmcb->control.exit_info_1 = svm->vcpu.arch.exception.error_code; 1123883b0a91SJoerg Roedel 1124883b0a91SJoerg Roedel /* 1125883b0a91SJoerg Roedel * EXITINFO2 is undefined for all exception intercepts other 1126883b0a91SJoerg Roedel * than #PF. 1127883b0a91SJoerg Roedel */ 11287c86663bSPaolo Bonzini if (nr == PF_VECTOR) { 1129883b0a91SJoerg Roedel if (svm->vcpu.arch.exception.nested_apf) 1130883b0a91SJoerg Roedel svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token; 1131883b0a91SJoerg Roedel else if (svm->vcpu.arch.exception.has_payload) 1132883b0a91SJoerg Roedel svm->vmcb->control.exit_info_2 = svm->vcpu.arch.exception.payload; 1133883b0a91SJoerg Roedel else 1134883b0a91SJoerg Roedel svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; 11357c86663bSPaolo Bonzini } else if (nr == DB_VECTOR) { 11367c86663bSPaolo Bonzini /* See inject_pending_event. */ 11377c86663bSPaolo Bonzini kvm_deliver_exception_payload(&svm->vcpu); 11387c86663bSPaolo Bonzini if (svm->vcpu.arch.dr7 & DR7_GD) { 11397c86663bSPaolo Bonzini svm->vcpu.arch.dr7 &= ~DR7_GD; 11407c86663bSPaolo Bonzini kvm_update_dr7(&svm->vcpu); 11417c86663bSPaolo Bonzini } 11427c86663bSPaolo Bonzini } else 11437c86663bSPaolo Bonzini WARN_ON(svm->vcpu.arch.exception.has_payload); 1144883b0a91SJoerg Roedel 11457c86663bSPaolo Bonzini nested_svm_vmexit(svm); 1146883b0a91SJoerg Roedel } 1147883b0a91SJoerg Roedel 11485b672408SPaolo Bonzini static inline bool nested_exit_on_init(struct vcpu_svm *svm) 11495b672408SPaolo Bonzini { 1150c62e2e94SBabu Moger return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INIT); 11515b672408SPaolo Bonzini } 11525b672408SPaolo Bonzini 115333b22172SPaolo Bonzini static int svm_check_nested_events(struct kvm_vcpu *vcpu) 1154883b0a91SJoerg Roedel { 1155883b0a91SJoerg Roedel struct vcpu_svm *svm = to_svm(vcpu); 1156883b0a91SJoerg Roedel bool block_nested_events = 1157bd279629SPaolo Bonzini kvm_event_needs_reinjection(vcpu) || svm->nested.nested_run_pending; 11585b672408SPaolo Bonzini struct kvm_lapic *apic = vcpu->arch.apic; 11595b672408SPaolo Bonzini 11605b672408SPaolo Bonzini if (lapic_in_kernel(vcpu) && 11615b672408SPaolo Bonzini test_bit(KVM_APIC_INIT, &apic->pending_events)) { 11625b672408SPaolo Bonzini if (block_nested_events) 11635b672408SPaolo Bonzini return -EBUSY; 11645b672408SPaolo Bonzini if (!nested_exit_on_init(svm)) 11655b672408SPaolo Bonzini return 0; 11663a87c7e0SSean Christopherson nested_svm_simple_vmexit(svm, SVM_EXIT_INIT); 11675b672408SPaolo Bonzini return 0; 11685b672408SPaolo Bonzini } 1169883b0a91SJoerg Roedel 11707c86663bSPaolo Bonzini if (vcpu->arch.exception.pending) { 11714020da3bSMaxim Levitsky /* 11724020da3bSMaxim Levitsky * Only a pending nested run can block a pending exception. 11734020da3bSMaxim Levitsky * Otherwise an injected NMI/interrupt should either be 11744020da3bSMaxim Levitsky * lost or delivered to the nested hypervisor in the EXITINTINFO 11754020da3bSMaxim Levitsky * vmcb field, while delivering the pending exception. 11764020da3bSMaxim Levitsky */ 11774020da3bSMaxim Levitsky if (svm->nested.nested_run_pending) 11787c86663bSPaolo Bonzini return -EBUSY; 11797c86663bSPaolo Bonzini if (!nested_exit_on_exception(svm)) 11807c86663bSPaolo Bonzini return 0; 11817c86663bSPaolo Bonzini nested_svm_inject_exception_vmexit(svm); 11827c86663bSPaolo Bonzini return 0; 11837c86663bSPaolo Bonzini } 11847c86663bSPaolo Bonzini 1185221e7610SPaolo Bonzini if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) { 118655714cddSPaolo Bonzini if (block_nested_events) 118755714cddSPaolo Bonzini return -EBUSY; 1188221e7610SPaolo Bonzini if (!nested_exit_on_smi(svm)) 1189221e7610SPaolo Bonzini return 0; 11903a87c7e0SSean Christopherson nested_svm_simple_vmexit(svm, SVM_EXIT_SMI); 119155714cddSPaolo Bonzini return 0; 119255714cddSPaolo Bonzini } 119355714cddSPaolo Bonzini 1194221e7610SPaolo Bonzini if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) { 11959c3d370aSCathy Avery if (block_nested_events) 11969c3d370aSCathy Avery return -EBUSY; 1197221e7610SPaolo Bonzini if (!nested_exit_on_nmi(svm)) 1198221e7610SPaolo Bonzini return 0; 11993a87c7e0SSean Christopherson nested_svm_simple_vmexit(svm, SVM_EXIT_NMI); 12009c3d370aSCathy Avery return 0; 12019c3d370aSCathy Avery } 12029c3d370aSCathy Avery 1203221e7610SPaolo Bonzini if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) { 1204883b0a91SJoerg Roedel if (block_nested_events) 1205883b0a91SJoerg Roedel return -EBUSY; 1206221e7610SPaolo Bonzini if (!nested_exit_on_intr(svm)) 1207221e7610SPaolo Bonzini return 0; 12083a87c7e0SSean Christopherson trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip); 12093a87c7e0SSean Christopherson nested_svm_simple_vmexit(svm, SVM_EXIT_INTR); 1210883b0a91SJoerg Roedel return 0; 1211883b0a91SJoerg Roedel } 1212883b0a91SJoerg Roedel 1213883b0a91SJoerg Roedel return 0; 1214883b0a91SJoerg Roedel } 1215883b0a91SJoerg Roedel 1216883b0a91SJoerg Roedel int nested_svm_exit_special(struct vcpu_svm *svm) 1217883b0a91SJoerg Roedel { 1218883b0a91SJoerg Roedel u32 exit_code = svm->vmcb->control.exit_code; 1219883b0a91SJoerg Roedel 1220883b0a91SJoerg Roedel switch (exit_code) { 1221883b0a91SJoerg Roedel case SVM_EXIT_INTR: 1222883b0a91SJoerg Roedel case SVM_EXIT_NMI: 1223883b0a91SJoerg Roedel case SVM_EXIT_NPF: 1224883b0a91SJoerg Roedel return NESTED_EXIT_HOST; 12257c86663bSPaolo Bonzini case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: { 12267c86663bSPaolo Bonzini u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE); 12277c86663bSPaolo Bonzini 12284995a368SCathy Avery if (svm->vmcb01.ptr->control.intercepts[INTERCEPT_EXCEPTION] & 12299780d51dSBabu Moger excp_bits) 12307c86663bSPaolo Bonzini return NESTED_EXIT_HOST; 12317c86663bSPaolo Bonzini else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR && 123268fd66f1SVitaly Kuznetsov svm->vcpu.arch.apf.host_apf_flags) 1233a3535be7SPaolo Bonzini /* Trap async PF even if not shadowing */ 1234883b0a91SJoerg Roedel return NESTED_EXIT_HOST; 1235883b0a91SJoerg Roedel break; 12367c86663bSPaolo Bonzini } 1237883b0a91SJoerg Roedel default: 1238883b0a91SJoerg Roedel break; 1239883b0a91SJoerg Roedel } 1240883b0a91SJoerg Roedel 1241883b0a91SJoerg Roedel return NESTED_EXIT_CONTINUE; 1242883b0a91SJoerg Roedel } 124333b22172SPaolo Bonzini 12445228eb96SMaxim Levitsky void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu) 12455228eb96SMaxim Levitsky { 12465228eb96SMaxim Levitsky struct vcpu_svm *svm = to_svm(vcpu); 12475228eb96SMaxim Levitsky 12485228eb96SMaxim Levitsky vcpu->arch.tsc_scaling_ratio = 12495228eb96SMaxim Levitsky kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio, 12505228eb96SMaxim Levitsky svm->tsc_ratio_msr); 12515228eb96SMaxim Levitsky svm_write_tsc_multiplier(vcpu, vcpu->arch.tsc_scaling_ratio); 12525228eb96SMaxim Levitsky } 12535228eb96SMaxim Levitsky 1254cc440cdaSPaolo Bonzini static int svm_get_nested_state(struct kvm_vcpu *vcpu, 1255cc440cdaSPaolo Bonzini struct kvm_nested_state __user *user_kvm_nested_state, 1256cc440cdaSPaolo Bonzini u32 user_data_size) 1257cc440cdaSPaolo Bonzini { 1258cc440cdaSPaolo Bonzini struct vcpu_svm *svm; 1259cc440cdaSPaolo Bonzini struct kvm_nested_state kvm_state = { 1260cc440cdaSPaolo Bonzini .flags = 0, 1261cc440cdaSPaolo Bonzini .format = KVM_STATE_NESTED_FORMAT_SVM, 1262cc440cdaSPaolo Bonzini .size = sizeof(kvm_state), 1263cc440cdaSPaolo Bonzini }; 1264cc440cdaSPaolo Bonzini struct vmcb __user *user_vmcb = (struct vmcb __user *) 1265cc440cdaSPaolo Bonzini &user_kvm_nested_state->data.svm[0]; 1266cc440cdaSPaolo Bonzini 1267cc440cdaSPaolo Bonzini if (!vcpu) 1268cc440cdaSPaolo Bonzini return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE; 1269cc440cdaSPaolo Bonzini 1270cc440cdaSPaolo Bonzini svm = to_svm(vcpu); 1271cc440cdaSPaolo Bonzini 1272cc440cdaSPaolo Bonzini if (user_data_size < kvm_state.size) 1273cc440cdaSPaolo Bonzini goto out; 1274cc440cdaSPaolo Bonzini 1275cc440cdaSPaolo Bonzini /* First fill in the header and copy it out. */ 1276cc440cdaSPaolo Bonzini if (is_guest_mode(vcpu)) { 12770dd16b5bSMaxim Levitsky kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb12_gpa; 1278cc440cdaSPaolo Bonzini kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE; 1279cc440cdaSPaolo Bonzini kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE; 1280cc440cdaSPaolo Bonzini 1281cc440cdaSPaolo Bonzini if (svm->nested.nested_run_pending) 1282cc440cdaSPaolo Bonzini kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING; 1283cc440cdaSPaolo Bonzini } 1284cc440cdaSPaolo Bonzini 1285cc440cdaSPaolo Bonzini if (gif_set(svm)) 1286cc440cdaSPaolo Bonzini kvm_state.flags |= KVM_STATE_NESTED_GIF_SET; 1287cc440cdaSPaolo Bonzini 1288cc440cdaSPaolo Bonzini if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state))) 1289cc440cdaSPaolo Bonzini return -EFAULT; 1290cc440cdaSPaolo Bonzini 1291cc440cdaSPaolo Bonzini if (!is_guest_mode(vcpu)) 1292cc440cdaSPaolo Bonzini goto out; 1293cc440cdaSPaolo Bonzini 1294cc440cdaSPaolo Bonzini /* 1295cc440cdaSPaolo Bonzini * Copy over the full size of the VMCB rather than just the size 1296cc440cdaSPaolo Bonzini * of the structs. 1297cc440cdaSPaolo Bonzini */ 1298cc440cdaSPaolo Bonzini if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE)) 1299cc440cdaSPaolo Bonzini return -EFAULT; 1300cc440cdaSPaolo Bonzini if (copy_to_user(&user_vmcb->control, &svm->nested.ctl, 1301cc440cdaSPaolo Bonzini sizeof(user_vmcb->control))) 1302cc440cdaSPaolo Bonzini return -EFAULT; 13034995a368SCathy Avery if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save, 1304cc440cdaSPaolo Bonzini sizeof(user_vmcb->save))) 1305cc440cdaSPaolo Bonzini return -EFAULT; 1306cc440cdaSPaolo Bonzini out: 1307cc440cdaSPaolo Bonzini return kvm_state.size; 1308cc440cdaSPaolo Bonzini } 1309cc440cdaSPaolo Bonzini 1310cc440cdaSPaolo Bonzini static int svm_set_nested_state(struct kvm_vcpu *vcpu, 1311cc440cdaSPaolo Bonzini struct kvm_nested_state __user *user_kvm_nested_state, 1312cc440cdaSPaolo Bonzini struct kvm_nested_state *kvm_state) 1313cc440cdaSPaolo Bonzini { 1314cc440cdaSPaolo Bonzini struct vcpu_svm *svm = to_svm(vcpu); 1315cc440cdaSPaolo Bonzini struct vmcb __user *user_vmcb = (struct vmcb __user *) 1316cc440cdaSPaolo Bonzini &user_kvm_nested_state->data.svm[0]; 13176ccbd29aSJoerg Roedel struct vmcb_control_area *ctl; 13186ccbd29aSJoerg Roedel struct vmcb_save_area *save; 1319dbc4739bSSean Christopherson unsigned long cr0; 13206ccbd29aSJoerg Roedel int ret; 1321cc440cdaSPaolo Bonzini 13226ccbd29aSJoerg Roedel BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) > 13236ccbd29aSJoerg Roedel KVM_STATE_NESTED_SVM_VMCB_SIZE); 13246ccbd29aSJoerg Roedel 1325cc440cdaSPaolo Bonzini if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM) 1326cc440cdaSPaolo Bonzini return -EINVAL; 1327cc440cdaSPaolo Bonzini 1328cc440cdaSPaolo Bonzini if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE | 1329cc440cdaSPaolo Bonzini KVM_STATE_NESTED_RUN_PENDING | 1330cc440cdaSPaolo Bonzini KVM_STATE_NESTED_GIF_SET)) 1331cc440cdaSPaolo Bonzini return -EINVAL; 1332cc440cdaSPaolo Bonzini 1333cc440cdaSPaolo Bonzini /* 1334cc440cdaSPaolo Bonzini * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's 1335cc440cdaSPaolo Bonzini * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed. 1336cc440cdaSPaolo Bonzini */ 1337cc440cdaSPaolo Bonzini if (!(vcpu->arch.efer & EFER_SVME)) { 1338cc440cdaSPaolo Bonzini /* GIF=1 and no guest mode are required if SVME=0. */ 1339cc440cdaSPaolo Bonzini if (kvm_state->flags != KVM_STATE_NESTED_GIF_SET) 1340cc440cdaSPaolo Bonzini return -EINVAL; 1341cc440cdaSPaolo Bonzini } 1342cc440cdaSPaolo Bonzini 1343cc440cdaSPaolo Bonzini /* SMM temporarily disables SVM, so we cannot be in guest mode. */ 1344cc440cdaSPaolo Bonzini if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) 1345cc440cdaSPaolo Bonzini return -EINVAL; 1346cc440cdaSPaolo Bonzini 1347cc440cdaSPaolo Bonzini if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) { 1348cc440cdaSPaolo Bonzini svm_leave_nested(svm); 1349d5cd6f34SVitaly Kuznetsov svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); 1350d5cd6f34SVitaly Kuznetsov return 0; 1351cc440cdaSPaolo Bonzini } 1352cc440cdaSPaolo Bonzini 1353cc440cdaSPaolo Bonzini if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa)) 1354cc440cdaSPaolo Bonzini return -EINVAL; 1355cc440cdaSPaolo Bonzini if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE) 1356cc440cdaSPaolo Bonzini return -EINVAL; 1357cc440cdaSPaolo Bonzini 13586ccbd29aSJoerg Roedel ret = -ENOMEM; 1359eba04b20SSean Christopherson ctl = kzalloc(sizeof(*ctl), GFP_KERNEL_ACCOUNT); 1360eba04b20SSean Christopherson save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT); 13616ccbd29aSJoerg Roedel if (!ctl || !save) 13626ccbd29aSJoerg Roedel goto out_free; 13636ccbd29aSJoerg Roedel 13646ccbd29aSJoerg Roedel ret = -EFAULT; 13656ccbd29aSJoerg Roedel if (copy_from_user(ctl, &user_vmcb->control, sizeof(*ctl))) 13666ccbd29aSJoerg Roedel goto out_free; 13676ccbd29aSJoerg Roedel if (copy_from_user(save, &user_vmcb->save, sizeof(*save))) 13686ccbd29aSJoerg Roedel goto out_free; 13696ccbd29aSJoerg Roedel 13706ccbd29aSJoerg Roedel ret = -EINVAL; 1371ee695f22SKrish Sadhukhan if (!nested_vmcb_check_controls(vcpu, ctl)) 13726ccbd29aSJoerg Roedel goto out_free; 1373cc440cdaSPaolo Bonzini 1374cc440cdaSPaolo Bonzini /* 1375cc440cdaSPaolo Bonzini * Processor state contains L2 state. Check that it is 1376cb9b6a1bSPaolo Bonzini * valid for guest mode (see nested_vmcb_check_save). 1377cc440cdaSPaolo Bonzini */ 1378cc440cdaSPaolo Bonzini cr0 = kvm_read_cr0(vcpu); 1379cc440cdaSPaolo Bonzini if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW)) 13806ccbd29aSJoerg Roedel goto out_free; 1381cc440cdaSPaolo Bonzini 1382cc440cdaSPaolo Bonzini /* 1383cc440cdaSPaolo Bonzini * Validate host state saved from before VMRUN (see 1384cc440cdaSPaolo Bonzini * nested_svm_check_permissions). 1385cc440cdaSPaolo Bonzini */ 13866906e06dSKrish Sadhukhan if (!(save->cr0 & X86_CR0_PG) || 13876906e06dSKrish Sadhukhan !(save->cr0 & X86_CR0_PE) || 13886906e06dSKrish Sadhukhan (save->rflags & X86_EFLAGS_VM) || 138963129754SPaolo Bonzini !nested_vmcb_valid_sregs(vcpu, save)) 13906ccbd29aSJoerg Roedel goto out_free; 1391cc440cdaSPaolo Bonzini 1392cc440cdaSPaolo Bonzini /* 1393b222b0b8SMaxim Levitsky * While the nested guest CR3 is already checked and set by 1394b222b0b8SMaxim Levitsky * KVM_SET_SREGS, it was set when nested state was yet loaded, 1395b222b0b8SMaxim Levitsky * thus MMU might not be initialized correctly. 1396b222b0b8SMaxim Levitsky * Set it again to fix this. 1397b222b0b8SMaxim Levitsky */ 1398b222b0b8SMaxim Levitsky 1399b222b0b8SMaxim Levitsky ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3, 1400b222b0b8SMaxim Levitsky nested_npt_enabled(svm), false); 1401b222b0b8SMaxim Levitsky if (WARN_ON_ONCE(ret)) 1402b222b0b8SMaxim Levitsky goto out_free; 1403b222b0b8SMaxim Levitsky 1404b222b0b8SMaxim Levitsky 1405b222b0b8SMaxim Levitsky /* 14064995a368SCathy Avery * All checks done, we can enter guest mode. Userspace provides 14074995a368SCathy Avery * vmcb12.control, which will be combined with L1 and stored into 14084995a368SCathy Avery * vmcb02, and the L1 save state which we store in vmcb01. 14094995a368SCathy Avery * L2 registers if needed are moved from the current VMCB to VMCB02. 1410cc440cdaSPaolo Bonzini */ 141181f76adaSMaxim Levitsky 14129d290e16SMaxim Levitsky if (is_guest_mode(vcpu)) 14139d290e16SMaxim Levitsky svm_leave_nested(svm); 14149d290e16SMaxim Levitsky else 14159d290e16SMaxim Levitsky svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save; 14169d290e16SMaxim Levitsky 1417063ab16cSMaxim Levitsky svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET)); 1418063ab16cSMaxim Levitsky 141981f76adaSMaxim Levitsky svm->nested.nested_run_pending = 142081f76adaSMaxim Levitsky !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING); 142181f76adaSMaxim Levitsky 14220dd16b5bSMaxim Levitsky svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa; 1423c08f390aSPaolo Bonzini 14242bb16beaSVitaly Kuznetsov svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save); 14259e8f0fbfSPaolo Bonzini nested_load_control_from_vmcb12(svm, ctl); 14264995a368SCathy Avery 14274995a368SCathy Avery svm_switch_vmcb(svm, &svm->nested.vmcb02); 14289e8f0fbfSPaolo Bonzini nested_vmcb02_prepare_control(svm); 1429a7d5c7ceSPaolo Bonzini kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu); 14306ccbd29aSJoerg Roedel ret = 0; 14316ccbd29aSJoerg Roedel out_free: 14326ccbd29aSJoerg Roedel kfree(save); 14336ccbd29aSJoerg Roedel kfree(ctl); 14346ccbd29aSJoerg Roedel 14356ccbd29aSJoerg Roedel return ret; 1436cc440cdaSPaolo Bonzini } 1437cc440cdaSPaolo Bonzini 1438232f75d3SMaxim Levitsky static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu) 1439232f75d3SMaxim Levitsky { 1440232f75d3SMaxim Levitsky struct vcpu_svm *svm = to_svm(vcpu); 1441232f75d3SMaxim Levitsky 1442232f75d3SMaxim Levitsky if (WARN_ON(!is_guest_mode(vcpu))) 1443232f75d3SMaxim Levitsky return true; 1444232f75d3SMaxim Levitsky 1445158a48ecSMaxim Levitsky if (!vcpu->arch.pdptrs_from_userspace && 1446158a48ecSMaxim Levitsky !nested_npt_enabled(svm) && is_pae_paging(vcpu)) 1447b222b0b8SMaxim Levitsky /* 1448b222b0b8SMaxim Levitsky * Reload the guest's PDPTRs since after a migration 1449b222b0b8SMaxim Levitsky * the guest CR3 might be restored prior to setting the nested 1450b222b0b8SMaxim Levitsky * state which can lead to a load of wrong PDPTRs. 1451b222b0b8SMaxim Levitsky */ 1452b222b0b8SMaxim Levitsky if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3))) 1453232f75d3SMaxim Levitsky return false; 1454232f75d3SMaxim Levitsky 1455232f75d3SMaxim Levitsky if (!nested_svm_vmrun_msrpm(svm)) { 1456232f75d3SMaxim Levitsky vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; 1457232f75d3SMaxim Levitsky vcpu->run->internal.suberror = 1458232f75d3SMaxim Levitsky KVM_INTERNAL_ERROR_EMULATION; 1459232f75d3SMaxim Levitsky vcpu->run->internal.ndata = 0; 1460232f75d3SMaxim Levitsky return false; 1461232f75d3SMaxim Levitsky } 1462232f75d3SMaxim Levitsky 1463232f75d3SMaxim Levitsky return true; 1464232f75d3SMaxim Levitsky } 1465232f75d3SMaxim Levitsky 146633b22172SPaolo Bonzini struct kvm_x86_nested_ops svm_nested_ops = { 146733b22172SPaolo Bonzini .check_events = svm_check_nested_events, 1468cb6a32c2SSean Christopherson .triple_fault = nested_svm_triple_fault, 1469a7d5c7ceSPaolo Bonzini .get_nested_state_pages = svm_get_nested_state_pages, 1470cc440cdaSPaolo Bonzini .get_state = svm_get_nested_state, 1471cc440cdaSPaolo Bonzini .set_state = svm_set_nested_state, 147233b22172SPaolo Bonzini }; 1473