xref: /linux/arch/x86/kvm/svm/nested.c (revision 0de1020f7bbb3e1c9cd5b6f3eb4bdd661b1ff735)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine driver for Linux
4  *
5  * AMD SVM support
6  *
7  * Copyright (C) 2006 Qumranet, Inc.
8  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9  *
10  * Authors:
11  *   Yaniv Kamay  <yaniv@qumranet.com>
12  *   Avi Kivity   <avi@qumranet.com>
13  */
14 
15 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
16 
17 #include <linux/kvm_types.h>
18 #include <linux/kvm_host.h>
19 #include <linux/kernel.h>
20 
21 #include <asm/msr-index.h>
22 #include <asm/debugreg.h>
23 
24 #include "kvm_emulate.h"
25 #include "trace.h"
26 #include "mmu.h"
27 #include "x86.h"
28 #include "smm.h"
29 #include "cpuid.h"
30 #include "lapic.h"
31 #include "svm.h"
32 #include "hyperv.h"
33 
34 #define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
35 
36 static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
37 				       struct x86_exception *fault,
38 				       bool from_hardware)
39 {
40 	struct vcpu_svm *svm = to_svm(vcpu);
41 	struct vmcb *vmcb = svm->vmcb;
42 	u64 fault_stage;
43 
44 	/*
45 	 * For hardware NPF exits, the GUEST_FAULT_STAGE bits are only
46 	 * available in the hardware exit_info_1, since the guest_mmu
47 	 * walker doesn't know whether the faulting GPA was a page table
48 	 * page or final page from L2's perspective.
49 	 */
50 	if (from_hardware)
51 		fault_stage = vmcb->control.exit_info_1 &
52 			      PFERR_GUEST_FAULT_STAGE_MASK;
53 	else
54 		fault_stage = fault->error_code & PFERR_GUEST_FAULT_STAGE_MASK;
55 
56 	/*
57 	 * All nested page faults should be annotated as occurring on the
58 	 * final translation *or* the page walk. Arbitrarily choose "final"
59 	 * if KVM is buggy and enumerated both or neither.
60 	 */
61 	if (WARN_ON_ONCE(hweight64(fault_stage) != 1))
62 		fault_stage = PFERR_GUEST_FINAL_MASK;
63 
64 	vmcb->control.exit_code = SVM_EXIT_NPF;
65 	vmcb->control.exit_info_1 = fault_stage |
66 				    (fault->error_code & ~PFERR_GUEST_FAULT_STAGE_MASK);
67 	vmcb->control.exit_info_2 = fault->address;
68 
69 	nested_svm_vmexit(svm);
70 }
71 
72 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
73 {
74 	struct vcpu_svm *svm = to_svm(vcpu);
75 	u64 cr3 = svm->nested.ctl.nested_cr3;
76 	u64 pdpte;
77 	int ret;
78 
79 	/*
80 	 * Note, nCR3 is "assumed" to be 32-byte aligned, i.e. the CPU ignores
81 	 * nCR3[4:0] when loading PDPTEs from memory.
82 	 */
83 	ret = kvm_vcpu_read_guest_page(vcpu, gpa_to_gfn(cr3), &pdpte,
84 				       (cr3 & GENMASK(11, 5)) + index * 8, 8);
85 	if (ret)
86 		return 0;
87 	return pdpte;
88 }
89 
90 static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu)
91 {
92 	struct vcpu_svm *svm = to_svm(vcpu);
93 
94 	return svm->nested.ctl.nested_cr3;
95 }
96 
97 static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
98 {
99 	struct vcpu_svm *svm = to_svm(vcpu);
100 
101 	WARN_ON(mmu_is_nested(vcpu));
102 
103 	vcpu->arch.mmu = &vcpu->arch.guest_mmu;
104 
105 	/*
106 	 * The NPT format depends on L1's CR4 and EFER, which is in vmcb01.  Note,
107 	 * when called via KVM_SET_NESTED_STATE, that state may _not_ match current
108 	 * vCPU state.  CR0.WP is explicitly ignored, while CR0.PG is required.
109 	 */
110 	kvm_init_shadow_npt_mmu(vcpu, svm->vmcb01.ptr->save.cr4,
111 				svm->vmcb01.ptr->save.efer,
112 				svm->nested.ctl.nested_cr3,
113 				svm->nested.ctl.misc_ctl);
114 	vcpu->arch.mmu->get_guest_pgd     = nested_svm_get_tdp_cr3;
115 	vcpu->arch.mmu->get_pdptr         = nested_svm_get_tdp_pdptr;
116 	vcpu->arch.mmu->inject_page_fault = nested_svm_inject_npf_exit;
117 	vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
118 }
119 
120 static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu)
121 {
122 	vcpu->arch.mmu = &vcpu->arch.root_mmu;
123 	vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
124 }
125 
126 static bool nested_vmcb_needs_vls_intercept(struct vcpu_svm *svm)
127 {
128 	if (!guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_V_VMSAVE_VMLOAD))
129 		return true;
130 
131 	if (!nested_npt_enabled(svm))
132 		return true;
133 
134 	if (!(svm->nested.ctl.misc_ctl2 & SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE))
135 		return true;
136 
137 	return false;
138 }
139 
140 void nested_vmcb02_recalc_intercepts(struct vcpu_svm *svm)
141 {
142 	struct vmcb_ctrl_area_cached *vmcb12_ctrl = &svm->nested.ctl;
143 	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
144 	struct vmcb *vmcb01 = svm->vmcb01.ptr;
145 	unsigned int i;
146 
147 	if (WARN_ON_ONCE(svm->vmcb != vmcb02))
148 		return;
149 
150 	vmcb_mark_dirty(vmcb02, VMCB_INTERCEPTS);
151 
152 	for (i = 0; i < MAX_INTERCEPT; i++)
153 		vmcb02->control.intercepts[i] = vmcb01->control.intercepts[i];
154 
155 	if (vmcb12_ctrl->int_ctl & V_INTR_MASKING_MASK) {
156 		/*
157 		 * If L2 is active and V_INTR_MASKING is enabled in vmcb12,
158 		 * disable intercept of CR8 writes as L2's CR8 does not affect
159 		 * any interrupt KVM may want to inject.
160 		 *
161 		 * Similarly, disable intercept of virtual interrupts (used to
162 		 * detect interrupt windows) if the saved RFLAGS.IF is '0', as
163 		 * the effective RFLAGS.IF for L1 interrupts will never be set
164 		 * while L2 is running (L2's RFLAGS.IF doesn't affect L1 IRQs).
165 		 */
166 		vmcb_clr_intercept(&vmcb02->control, INTERCEPT_CR8_WRITE);
167 		if (!(vmcb01->save.rflags & X86_EFLAGS_IF))
168 			vmcb_clr_intercept(&vmcb02->control, INTERCEPT_VINTR);
169 	}
170 
171 	for (i = 0; i < MAX_INTERCEPT; i++)
172 		vmcb02->control.intercepts[i] |= vmcb12_ctrl->intercepts[i];
173 
174 	/* If SMI is not intercepted, ignore guest SMI intercept as well  */
175 	if (!intercept_smi)
176 		vmcb_clr_intercept(&vmcb02->control, INTERCEPT_SMI);
177 
178 	if (nested_vmcb_needs_vls_intercept(svm)) {
179 		/*
180 		 * If the virtual VMLOAD/VMSAVE is not enabled for the L2,
181 		 * we must intercept these instructions to correctly
182 		 * emulate them in case L1 doesn't intercept them.
183 		 */
184 		vmcb_set_intercept(&vmcb02->control, INTERCEPT_VMLOAD);
185 		vmcb_set_intercept(&vmcb02->control, INTERCEPT_VMSAVE);
186 	} else {
187 		WARN_ON_ONCE(!(vmcb02->control.misc_ctl2 & SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE));
188 	}
189 }
190 
191 /*
192  * This array (and its actual size) holds the set of offsets (indexing by chunk
193  * size) to process when merging vmcb12's MSRPM with vmcb01's MSRPM.  Note, the
194  * set of MSRs for which interception is disabled in vmcb01 is per-vCPU, e.g.
195  * based on CPUID features.  This array only tracks MSRs that *might* be passed
196  * through to the guest.
197  *
198  * Hardcode the capacity of the array based on the maximum number of _offsets_.
199  * MSRs are batched together, so there are fewer offsets than MSRs.
200  */
201 static int nested_svm_msrpm_merge_offsets[10] __ro_after_init;
202 static int nested_svm_nr_msrpm_merge_offsets __ro_after_init;
203 typedef unsigned long nsvm_msrpm_merge_t;
204 
205 int __init nested_svm_init_msrpm_merge_offsets(void)
206 {
207 	static const u32 merge_msrs[] __initconst = {
208 		MSR_STAR,
209 		MSR_IA32_SYSENTER_CS,
210 		MSR_IA32_SYSENTER_EIP,
211 		MSR_IA32_SYSENTER_ESP,
212 	#ifdef CONFIG_X86_64
213 		MSR_GS_BASE,
214 		MSR_FS_BASE,
215 		MSR_KERNEL_GS_BASE,
216 		MSR_LSTAR,
217 		MSR_CSTAR,
218 		MSR_SYSCALL_MASK,
219 	#endif
220 		MSR_IA32_SPEC_CTRL,
221 		MSR_IA32_PRED_CMD,
222 		MSR_IA32_FLUSH_CMD,
223 		MSR_IA32_APERF,
224 		MSR_IA32_MPERF,
225 		MSR_IA32_LASTBRANCHFROMIP,
226 		MSR_IA32_LASTBRANCHTOIP,
227 		MSR_IA32_LASTINTFROMIP,
228 		MSR_IA32_LASTINTTOIP,
229 
230 		MSR_K7_PERFCTR0,
231 		MSR_K7_PERFCTR1,
232 		MSR_K7_PERFCTR2,
233 		MSR_K7_PERFCTR3,
234 		MSR_F15H_PERF_CTR0,
235 		MSR_F15H_PERF_CTR1,
236 		MSR_F15H_PERF_CTR2,
237 		MSR_F15H_PERF_CTR3,
238 		MSR_F15H_PERF_CTR4,
239 		MSR_F15H_PERF_CTR5,
240 
241 		MSR_AMD64_PERF_CNTR_GLOBAL_CTL,
242 		MSR_AMD64_PERF_CNTR_GLOBAL_STATUS,
243 		MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_CLR,
244 		MSR_AMD64_PERF_CNTR_GLOBAL_STATUS_SET,
245 	};
246 	int i, j;
247 
248 	for (i = 0; i < ARRAY_SIZE(merge_msrs); i++) {
249 		int bit_nr = svm_msrpm_bit_nr(merge_msrs[i]);
250 		u32 offset;
251 
252 		if (WARN_ON(bit_nr < 0))
253 			return -EIO;
254 
255 		/*
256 		 * Merging is done in chunks to reduce the number of accesses
257 		 * to L1's bitmap.
258 		 */
259 		offset = bit_nr / BITS_PER_BYTE / sizeof(nsvm_msrpm_merge_t);
260 
261 		for (j = 0; j < nested_svm_nr_msrpm_merge_offsets; j++) {
262 			if (nested_svm_msrpm_merge_offsets[j] == offset)
263 				break;
264 		}
265 
266 		if (j < nested_svm_nr_msrpm_merge_offsets)
267 			continue;
268 
269 		if (WARN_ON(j >= ARRAY_SIZE(nested_svm_msrpm_merge_offsets)))
270 			return -EIO;
271 
272 		nested_svm_msrpm_merge_offsets[j] = offset;
273 		nested_svm_nr_msrpm_merge_offsets++;
274 	}
275 
276 	return 0;
277 }
278 
279 /*
280  * Merge L0's (KVM) and L1's (Nested VMCB) MSR permission bitmaps. The function
281  * is optimized in that it only merges the parts where KVM MSR permission bitmap
282  * may contain zero bits.
283  */
284 static bool nested_svm_merge_msrpm(struct kvm_vcpu *vcpu)
285 {
286 	struct vcpu_svm *svm = to_svm(vcpu);
287 	nsvm_msrpm_merge_t *msrpm02 = svm->nested.msrpm;
288 	nsvm_msrpm_merge_t *msrpm01 = svm->msrpm;
289 	int i;
290 
291 	/*
292 	 * MSR bitmap update can be skipped when:
293 	 * - MSR bitmap for L1 hasn't changed.
294 	 * - Nested hypervisor (L1) is attempting to launch the same L2 as
295 	 *   before.
296 	 * - Nested hypervisor (L1) is using Hyper-V emulation interface and
297 	 * tells KVM (L0) there were no changes in MSR bitmap for L2.
298 	 */
299 #ifdef CONFIG_KVM_HYPERV
300 	if (!svm->nested.force_msr_bitmap_recalc) {
301 		struct hv_vmcb_enlightenments *hve = &svm->nested.ctl.hv_enlightenments;
302 
303 		if (kvm_hv_hypercall_enabled(vcpu) &&
304 		    hve->hv_enlightenments_control.msr_bitmap &&
305 		    (svm->nested.ctl.clean & BIT(HV_VMCB_NESTED_ENLIGHTENMENTS)))
306 			goto set_msrpm_base_pa;
307 	}
308 #endif
309 
310 	if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
311 		return true;
312 
313 	for (i = 0; i < nested_svm_nr_msrpm_merge_offsets; i++) {
314 		const int p = nested_svm_msrpm_merge_offsets[i];
315 		nsvm_msrpm_merge_t l1_val;
316 		gpa_t gpa;
317 
318 		gpa = svm->nested.ctl.msrpm_base_pa + (p * sizeof(l1_val));
319 
320 		if (kvm_vcpu_read_guest(vcpu, gpa, &l1_val, sizeof(l1_val)))
321 			return false;
322 
323 		msrpm02[p] = msrpm01[p] | l1_val;
324 	}
325 
326 	svm->nested.force_msr_bitmap_recalc = false;
327 
328 #ifdef CONFIG_KVM_HYPERV
329 set_msrpm_base_pa:
330 #endif
331 	svm->vmcb->control.msrpm_base_pa = __sme_set(__pa(svm->nested.msrpm));
332 
333 	return true;
334 }
335 
336 /*
337  * Bits 11:0 of bitmap address are ignored by hardware
338  */
339 static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
340 {
341 	u64 addr = PAGE_ALIGN(pa);
342 
343 	return kvm_vcpu_is_legal_gpa(vcpu, addr) &&
344 	    kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
345 }
346 
347 static bool nested_svm_event_inj_valid_exept(struct kvm_vcpu *vcpu, u8 vector)
348 {
349 	/*
350 	 * Vectors that do not correspond to a defined exception are invalid
351 	 * (including #NMI and reserved vectors). In a best effort to define
352 	 * valid exceptions based on the virtual CPU, make all exceptions always
353 	 * valid except those obviously tied to a CPU feature.
354 	 */
355 	switch (vector) {
356 	case DE_VECTOR: case DB_VECTOR: case BP_VECTOR: case OF_VECTOR:
357 	case BR_VECTOR: case UD_VECTOR: case NM_VECTOR: case DF_VECTOR:
358 	case TS_VECTOR: case NP_VECTOR: case SS_VECTOR: case GP_VECTOR:
359 	case PF_VECTOR: case MF_VECTOR: case AC_VECTOR: case MC_VECTOR:
360 	case XM_VECTOR: case HV_VECTOR: case SX_VECTOR:
361 		return true;
362 	case CP_VECTOR:
363 		return guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK);
364 	case VC_VECTOR:
365 		return guest_cpu_cap_has(vcpu, X86_FEATURE_SEV_ES);
366 	}
367 	return false;
368 }
369 
370 /*
371  * According to the APM, VMRUN exits with SVM_EXIT_ERR if SVM_EVTINJ_VALID is
372  * set and:
373  * - The type of event_inj is not one of the defined values.
374  * - The type is SVM_EVTINJ_TYPE_EXEPT, but the vector is not a valid exception.
375  */
376 static bool nested_svm_check_event_inj(struct kvm_vcpu *vcpu, u32 event_inj)
377 {
378 	u32 type = event_inj & SVM_EVTINJ_TYPE_MASK;
379 	u8 vector = event_inj & SVM_EVTINJ_VEC_MASK;
380 
381 	if (!(event_inj & SVM_EVTINJ_VALID))
382 		return true;
383 
384 	if (type != SVM_EVTINJ_TYPE_INTR && type != SVM_EVTINJ_TYPE_NMI &&
385 	    type != SVM_EVTINJ_TYPE_EXEPT && type != SVM_EVTINJ_TYPE_SOFT)
386 		return false;
387 
388 	if (type == SVM_EVTINJ_TYPE_EXEPT &&
389 	    !nested_svm_event_inj_valid_exept(vcpu, vector))
390 		return false;
391 
392 	return true;
393 }
394 
395 static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
396 				       struct vmcb_ctrl_area_cached *control)
397 {
398 	if (CC(!vmcb12_is_intercept(control, INTERCEPT_VMRUN)))
399 		return false;
400 
401 	if (CC(control->asid == 0))
402 		return false;
403 
404 	if (CC((control->misc_ctl & SVM_MISC_ENABLE_NP) &&
405 	       !kvm_vcpu_is_legal_gpa(vcpu, control->nested_cr3)))
406 		return false;
407 
408 	if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
409 					   MSRPM_SIZE)))
410 		return false;
411 	if (CC(!nested_svm_check_bitmap_pa(vcpu, control->iopm_base_pa,
412 					   IOPM_SIZE)))
413 		return false;
414 
415 	if (CC((control->int_ctl & V_NMI_ENABLE_MASK) &&
416 	       !vmcb12_is_intercept(control, INTERCEPT_NMI))) {
417 		return false;
418 	}
419 
420 	if (CC(!nested_svm_check_event_inj(vcpu, control->event_inj)))
421 		return false;
422 
423 	return true;
424 }
425 
426 /* Common checks that apply to both L1 and L2 state.  */
427 static bool nested_vmcb_check_save(struct kvm_vcpu *vcpu,
428 				   struct vmcb_save_area_cached *save)
429 {
430 	if (CC(!(save->efer & EFER_SVME)))
431 		return false;
432 
433 	if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
434 	    CC(save->cr0 & ~0xffffffffULL))
435 		return false;
436 
437 	if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
438 		return false;
439 
440 	/*
441 	 * These checks are also performed by KVM_SET_SREGS,
442 	 * except that EFER.LMA is not checked by SVM against
443 	 * CR0.PG && EFER.LME.
444 	 */
445 	if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
446 		if (CC(!(save->cr4 & X86_CR4_PAE)) ||
447 		    CC(!(save->cr0 & X86_CR0_PE)) ||
448 		    CC(!kvm_vcpu_is_legal_cr3(vcpu, save->cr3)))
449 			return false;
450 
451 		if (CC((save->cs.attrib & SVM_SELECTOR_L_MASK) &&
452 		       (save->cs.attrib & SVM_SELECTOR_DB_MASK)))
453 			return false;
454 	}
455 
456 	/* Note, SVM doesn't have any additional restrictions on CR4. */
457 	if (CC(!__kvm_is_valid_cr4(vcpu, save->cr4)))
458 		return false;
459 
460 	if (CC(!kvm_valid_efer(vcpu, save->efer)))
461 		return false;
462 
463 	return true;
464 }
465 
466 int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu)
467 {
468 	struct vcpu_svm *svm = to_svm(vcpu);
469 
470 	if (!nested_vmcb_check_save(vcpu, &svm->nested.save) ||
471 	    !nested_vmcb_check_controls(vcpu, &svm->nested.ctl))
472 		return -EINVAL;
473 
474 	return 0;
475 }
476 
477 /*
478  * If a feature is not advertised to L1, clear the corresponding vmcb12
479  * intercept.
480  */
481 #define __nested_svm_sanitize_intercept(__vcpu, __control, fname, iname)	\
482 do {										\
483 	if (!guest_cpu_cap_has(__vcpu, X86_FEATURE_##fname))			\
484 		vmcb12_clr_intercept(__control, INTERCEPT_##iname);		\
485 } while (0)
486 
487 #define nested_svm_sanitize_intercept(__vcpu, __control, name)			\
488 	__nested_svm_sanitize_intercept(__vcpu, __control, name, name)
489 
490 static
491 void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
492 					 struct vmcb_ctrl_area_cached *to,
493 					 struct vmcb_control_area *from)
494 {
495 	unsigned int i;
496 
497 	for (i = 0; i < MAX_INTERCEPT; i++)
498 		to->intercepts[i] = from->intercepts[i];
499 
500 	__nested_svm_sanitize_intercept(vcpu, to, XSAVE, XSETBV);
501 	nested_svm_sanitize_intercept(vcpu, to, INVPCID);
502 	nested_svm_sanitize_intercept(vcpu, to, RDTSCP);
503 	nested_svm_sanitize_intercept(vcpu, to, SKINIT);
504 	nested_svm_sanitize_intercept(vcpu, to, RDPRU);
505 
506 	/* Always clear misc_ctl bits that the guest cannot use */
507 	to->misc_ctl = from->misc_ctl;
508 	if (!guest_cpu_cap_has(vcpu, X86_FEATURE_NPT))
509 		to->misc_ctl &= ~SVM_MISC_ENABLE_NP;
510 
511 	if (!gmet_enabled || !guest_cpu_cap_has(vcpu, X86_FEATURE_GMET))
512 		to->misc_ctl &= ~SVM_MISC_ENABLE_GMET;
513 
514 	to->iopm_base_pa        = from->iopm_base_pa & PAGE_MASK;
515 	to->msrpm_base_pa       = from->msrpm_base_pa & PAGE_MASK;
516 	to->tsc_offset          = from->tsc_offset;
517 	to->tlb_ctl             = from->tlb_ctl & TLB_CONTROL_MASK;
518 	to->erap_ctl            = from->erap_ctl;
519 	to->int_ctl             = from->int_ctl;
520 	to->int_vector          = from->int_vector & SVM_INT_VECTOR_MASK;
521 	to->int_state           = from->int_state & SVM_INTERRUPT_SHADOW_MASK;
522 	to->exit_code           = from->exit_code;
523 	to->exit_info_1         = from->exit_info_1;
524 	to->exit_info_2         = from->exit_info_2;
525 	to->exit_int_info       = from->exit_int_info;
526 	to->exit_int_info_err   = from->exit_int_info_err;
527 	to->event_inj           = from->event_inj & ~SVM_EVTINJ_RESERVED_BITS;
528 	to->event_inj_err       = from->event_inj_err;
529 	to->next_rip            = from->next_rip;
530 	to->nested_cr3          = from->nested_cr3;
531 	to->misc_ctl2		= from->misc_ctl2;
532 	to->pause_filter_count  = from->pause_filter_count;
533 	to->pause_filter_thresh = from->pause_filter_thresh;
534 
535 	/* Copy asid here because nested_vmcb_check_controls() will check it */
536 	to->asid           = from->asid;
537 	to->clean = from->clean;
538 
539 #ifdef CONFIG_KVM_HYPERV
540 	/* Hyper-V extensions (Enlightened VMCB) */
541 	if (kvm_hv_hypercall_enabled(vcpu)) {
542 		memcpy(&to->hv_enlightenments, &from->hv_enlightenments,
543 		       sizeof(to->hv_enlightenments));
544 	}
545 #endif
546 }
547 
548 void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
549 				       struct vmcb_control_area *control)
550 {
551 	__nested_copy_vmcb_control_to_cache(&svm->vcpu, &svm->nested.ctl, control);
552 }
553 
554 static void __nested_copy_vmcb_save_to_cache(struct vmcb_save_area_cached *to,
555 					     struct vmcb_save_area *from)
556 {
557 	to->es = from->es;
558 	to->cs = from->cs;
559 	to->ss = from->ss;
560 	to->ds = from->ds;
561 	to->gdtr = from->gdtr;
562 	to->idtr = from->idtr;
563 
564 	to->cpl = from->cpl;
565 
566 	to->efer = from->efer;
567 	to->cr4 = from->cr4;
568 	to->cr3 = from->cr3;
569 	to->cr0 = from->cr0;
570 	to->dr7 = from->dr7;
571 	to->dr6 = from->dr6;
572 
573 	to->rflags = from->rflags;
574 	to->rip = from->rip;
575 	to->rsp = from->rsp;
576 
577 	to->s_cet = from->s_cet;
578 	to->ssp = from->ssp;
579 	to->isst_addr = from->isst_addr;
580 
581 	to->rax = from->rax;
582 	to->cr2 = from->cr2;
583 
584 	svm_copy_lbrs(to, from);
585 }
586 
587 void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
588 				    struct vmcb_save_area *save)
589 {
590 	__nested_copy_vmcb_save_to_cache(&svm->nested.save, save);
591 }
592 
593 /*
594  * Synchronize fields that are written by the processor, so that
595  * they can be copied back into the vmcb12.
596  */
597 void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
598 {
599 	u32 mask;
600 	svm->nested.ctl.event_inj      = svm->vmcb->control.event_inj;
601 	svm->nested.ctl.event_inj_err  = svm->vmcb->control.event_inj_err;
602 	svm->nested.ctl.int_state	= svm->vmcb->control.int_state;
603 
604 	/* Only a few fields of int_ctl are written by the processor.  */
605 	mask = V_IRQ_MASK | V_TPR_MASK;
606 	/*
607 	 * Don't sync vmcb02 V_IRQ back to vmcb12 if KVM (L0) is intercepting
608 	 * virtual interrupts in order to request an interrupt window, as KVM
609 	 * has usurped vmcb02's int_ctl.  If an interrupt window opens before
610 	 * the next VM-Exit, svm_clear_vintr() will restore vmcb12's int_ctl.
611 	 * If no window opens, V_IRQ will be correctly preserved in vmcb12's
612 	 * int_ctl (because it was never recognized while L2 was running).
613 	 */
614 	if (svm_is_intercept(svm, INTERCEPT_VINTR) &&
615 	    !vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_VINTR))
616 		mask &= ~V_IRQ_MASK;
617 
618 	if (nested_vgif_enabled(svm))
619 		mask |= V_GIF_MASK;
620 
621 	if (nested_vnmi_enabled(svm))
622 		mask |= V_NMI_BLOCKING_MASK | V_NMI_PENDING_MASK;
623 
624 	svm->nested.ctl.int_ctl        &= ~mask;
625 	svm->nested.ctl.int_ctl        |= svm->vmcb->control.int_ctl & mask;
626 }
627 
628 /*
629  * Transfer any event that L0 or L1 wanted to inject into L2 to
630  * EXIT_INT_INFO.
631  */
632 static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
633 						struct vmcb *vmcb12)
634 {
635 	struct kvm_vcpu *vcpu = &svm->vcpu;
636 	u32 exit_int_info = 0;
637 	unsigned int nr;
638 
639 	if (vcpu->arch.exception.injected) {
640 		nr = vcpu->arch.exception.vector;
641 		exit_int_info = nr | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
642 
643 		if (vcpu->arch.exception.has_error_code) {
644 			exit_int_info |= SVM_EVTINJ_VALID_ERR;
645 			vmcb12->control.exit_int_info_err =
646 				vcpu->arch.exception.error_code;
647 		}
648 
649 	} else if (vcpu->arch.nmi_injected) {
650 		exit_int_info = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
651 
652 	} else if (vcpu->arch.interrupt.injected) {
653 		nr = vcpu->arch.interrupt.nr;
654 		exit_int_info = nr | SVM_EVTINJ_VALID;
655 
656 		if (vcpu->arch.interrupt.soft)
657 			exit_int_info |= SVM_EVTINJ_TYPE_SOFT;
658 		else
659 			exit_int_info |= SVM_EVTINJ_TYPE_INTR;
660 	}
661 
662 	vmcb12->control.exit_int_info = exit_int_info;
663 }
664 
665 static void nested_svm_transition_tlb_flush(struct kvm_vcpu *vcpu)
666 {
667 	/* Handle pending Hyper-V TLB flush requests */
668 	kvm_hv_nested_transtion_tlb_flush(vcpu, npt_enabled);
669 
670 	/*
671 	 * TODO: optimize unconditional TLB flush/MMU sync.  A partial list of
672 	 * things to fix before this can be conditional:
673 	 *
674 	 *  - Flush TLBs for both L1 and L2 remote TLB flush
675 	 *  - Honor L1's request to flush an ASID on nested VMRUN
676 	 *  - Sync nested NPT MMU on VMRUN that flushes L2's ASID[*]
677 	 *  - Don't crush a pending TLB flush in vmcb02 on nested VMRUN
678 	 *  - Flush L1's ASID on KVM_REQ_TLB_FLUSH_GUEST
679 	 *
680 	 * [*] Unlike nested EPT, SVM's ASID management can invalidate nested
681 	 *     NPT guest-physical mappings on VMRUN.
682 	 */
683 	kvm_make_request(KVM_REQ_MMU_SYNC, vcpu);
684 	kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
685 }
686 
687 /*
688  * Load guest's/host's cr3 on nested vmentry or vmexit. @nested_npt is true
689  * if we are emulating VM-Entry into a guest with NPT enabled.
690  */
691 static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
692 			       bool nested_npt, bool reload_pdptrs)
693 {
694 	if (CC(!kvm_vcpu_is_legal_cr3(vcpu, cr3)))
695 		return -EINVAL;
696 
697 	if (reload_pdptrs && !nested_npt && is_pae_paging(vcpu) &&
698 	    CC(!load_pdptrs(vcpu, cr3)))
699 		return -EINVAL;
700 
701 	vcpu->arch.cr3 = cr3;
702 
703 	/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
704 	kvm_init_mmu(vcpu);
705 
706 	if (!nested_npt)
707 		kvm_mmu_new_pgd(vcpu, cr3);
708 
709 	return 0;
710 }
711 
712 void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
713 {
714 	if (!svm->nested.vmcb02.ptr)
715 		return;
716 
717 	/* FIXME: merge g_pat from vmcb01 and vmcb12.  */
718 	svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
719 }
720 
721 static bool nested_vmcb12_has_lbrv(struct kvm_vcpu *vcpu)
722 {
723 	return guest_cpu_cap_has(vcpu, X86_FEATURE_LBRV) &&
724 		(to_svm(vcpu)->nested.ctl.misc_ctl2 & SVM_MISC2_ENABLE_V_LBR);
725 }
726 
727 static void nested_vmcb02_prepare_save(struct vcpu_svm *svm)
728 {
729 	struct vmcb_ctrl_area_cached *control = &svm->nested.ctl;
730 	struct vmcb_save_area_cached *save = &svm->nested.save;
731 	bool new_vmcb12 = false;
732 	struct vmcb *vmcb01 = svm->vmcb01.ptr;
733 	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
734 	struct kvm_vcpu *vcpu = &svm->vcpu;
735 
736 	nested_vmcb02_compute_g_pat(svm);
737 	vmcb_mark_dirty(vmcb02, VMCB_NPT);
738 
739 	/* Load the nested guest state */
740 	if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) {
741 		new_vmcb12 = true;
742 		svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa;
743 		svm->nested.force_msr_bitmap_recalc = true;
744 	}
745 
746 	if (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_SEG))) {
747 		vmcb02->save.es = save->es;
748 		vmcb02->save.cs = save->cs;
749 		vmcb02->save.ss = save->ss;
750 		vmcb02->save.ds = save->ds;
751 		vmcb02->save.cpl = save->cpl;
752 		vmcb_mark_dirty(vmcb02, VMCB_SEG);
753 	}
754 
755 	if (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_DT))) {
756 		vmcb02->save.gdtr = save->gdtr;
757 		vmcb02->save.idtr = save->idtr;
758 		vmcb_mark_dirty(vmcb02, VMCB_DT);
759 	}
760 
761 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK) &&
762 	    (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_CET)))) {
763 		vmcb02->save.s_cet  = save->s_cet;
764 		vmcb02->save.isst_addr = save->isst_addr;
765 		vmcb02->save.ssp = save->ssp;
766 		vmcb_mark_dirty(vmcb02, VMCB_CET);
767 	}
768 
769 	kvm_set_rflags(vcpu, save->rflags | X86_EFLAGS_FIXED);
770 
771 	svm_set_efer(vcpu, svm->nested.save.efer);
772 
773 	svm_set_cr0(vcpu, svm->nested.save.cr0);
774 	svm_set_cr4(vcpu, svm->nested.save.cr4);
775 
776 	svm->vcpu.arch.cr2 = save->cr2;
777 
778 	kvm_rax_write(vcpu, save->rax);
779 	kvm_rsp_write(vcpu, save->rsp);
780 	kvm_rip_write(vcpu, save->rip);
781 
782 	/* In case we don't even reach vcpu_run, the fields are not updated */
783 	vmcb02->save.rax = save->rax;
784 	vmcb02->save.rsp = save->rsp;
785 	vmcb02->save.rip = save->rip;
786 
787 	if (unlikely(new_vmcb12 || vmcb12_is_dirty(control, VMCB_DR))) {
788 		vmcb02->save.dr7 = svm->nested.save.dr7 | DR7_FIXED_1;
789 		svm->vcpu.arch.dr6  = svm->nested.save.dr6 | DR6_ACTIVE_LOW;
790 		vmcb_mark_dirty(vmcb02, VMCB_DR);
791 	}
792 
793 	if (nested_vmcb12_has_lbrv(vcpu)) {
794 		/*
795 		 * Reserved bits of DEBUGCTL are ignored.  Be consistent with
796 		 * svm_set_msr's definition of reserved bits.
797 		 */
798 		svm_copy_lbrs(&vmcb02->save, save);
799 		vmcb02->save.dbgctl &= ~DEBUGCTL_RESERVED_BITS;
800 	} else {
801 		svm_copy_lbrs(&vmcb02->save, &vmcb01->save);
802 	}
803 	vmcb_mark_dirty(vmcb02, VMCB_LBR);
804 	svm_update_lbrv(&svm->vcpu);
805 }
806 
807 static inline bool is_evtinj_soft(u32 evtinj)
808 {
809 	u32 type = evtinj & SVM_EVTINJ_TYPE_MASK;
810 	u8 vector = evtinj & SVM_EVTINJ_VEC_MASK;
811 
812 	if (!(evtinj & SVM_EVTINJ_VALID))
813 		return false;
814 
815 	if (type == SVM_EVTINJ_TYPE_SOFT)
816 		return true;
817 
818 	return type == SVM_EVTINJ_TYPE_EXEPT && kvm_exception_is_soft(vector);
819 }
820 
821 static bool is_evtinj_nmi(u32 evtinj)
822 {
823 	u32 type = evtinj & SVM_EVTINJ_TYPE_MASK;
824 
825 	if (!(evtinj & SVM_EVTINJ_VALID))
826 		return false;
827 
828 	return type == SVM_EVTINJ_TYPE_NMI;
829 }
830 
831 static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
832 {
833 	u32 int_ctl_vmcb01_bits = V_INTR_MASKING_MASK;
834 	u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
835 
836 	struct vmcb_ctrl_area_cached *vmcb12_ctrl = &svm->nested.ctl;
837 	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
838 	struct vmcb *vmcb01 = svm->vmcb01.ptr;
839 	struct kvm_vcpu *vcpu = &svm->vcpu;
840 	u32 pause_count12, pause_thresh12;
841 
842 	nested_svm_transition_tlb_flush(vcpu);
843 
844 	/* Enter Guest-Mode */
845 	enter_guest_mode(vcpu);
846 
847 	/*
848 	 * Filled at exit: exit_code, exit_info_1, exit_info_2, exit_int_info,
849 	 * exit_int_info_err, next_rip, insn_len, insn_bytes.
850 	 */
851 
852 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_VGIF) &&
853 	    (vmcb12_ctrl->int_ctl & V_GIF_ENABLE_MASK))
854 		int_ctl_vmcb12_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
855 	else
856 		int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
857 
858 	if (vnmi) {
859 		if (vmcb01->control.int_ctl & V_NMI_PENDING_MASK) {
860 			svm->vcpu.arch.nmi_pending++;
861 			kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
862 		}
863 		if (nested_vnmi_enabled(svm))
864 			int_ctl_vmcb12_bits |= (V_NMI_PENDING_MASK |
865 						V_NMI_ENABLE_MASK |
866 						V_NMI_BLOCKING_MASK);
867 	}
868 
869 	/*
870 	 * Copied from vmcb01.  msrpm_base can be overwritten later.
871 	 *
872 	 * SVM_MISC_ENABLE_NP in vmcb12 is only used for consistency checks.  If
873 	 * L1 enables NPTs, KVM shadows L1's NPTs and uses those to run L2. If
874 	 * L1 disables NPT, KVM runs L2 with the same NPTs used to run L1. For
875 	 * the latter, L1 runs L2 with shadow page tables that translate L2 GVAs
876 	 * to L1 GPAs, so the same NPTs can be used for L1 and L2.
877 	 */
878 	vmcb02->control.misc_ctl = vmcb01->control.misc_ctl & (SVM_MISC_ENABLE_NP | SVM_MISC_ENABLE_GMET);
879 	vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
880 	vmcb02->control.msrpm_base_pa = vmcb01->control.msrpm_base_pa;
881 	vmcb_mark_dirty(vmcb02, VMCB_PERM_MAP);
882 
883 	/*
884 	 * Stash vmcb02's counter if the guest hasn't moved past the guilty
885 	 * instruction; otherwise, reset the counter to '0'.
886 	 *
887 	 * In order to detect if L2 has made forward progress or not, track the
888 	 * RIP at which a bus lock has occurred on a per-vmcb12 basis.  If RIP
889 	 * is changed, guest has clearly made forward progress, bus_lock_counter
890 	 * still remained '1', so reset bus_lock_counter to '0'. Eg. In the
891 	 * scenario, where a buslock happened in L1 before VMRUN, the bus lock
892 	 * firmly happened on an instruction in the past. Even if vmcb01's
893 	 * counter is still '1', (because the guilty instruction got patched),
894 	 * the vCPU has clearly made forward progress and so KVM should reset
895 	 * vmcb02's counter to '0'.
896 	 *
897 	 * If the RIP hasn't changed, stash the bus lock counter at nested VMRUN
898 	 * to prevent the same guilty instruction from triggering a VM-Exit. Eg.
899 	 * if userspace rate-limits the vCPU, then it's entirely possible that
900 	 * L1's tick interrupt is pending by the time userspace re-runs the
901 	 * vCPU.  If KVM unconditionally clears the counter on VMRUN, then when
902 	 * L1 re-enters L2, the same instruction will trigger a VM-Exit and the
903 	 * entire cycle start over.
904 	 */
905 	if (vmcb02->save.rip && (svm->nested.last_bus_lock_rip == vmcb02->save.rip))
906 		vmcb02->control.bus_lock_counter = 1;
907 	else
908 		vmcb02->control.bus_lock_counter = 0;
909 
910 	/* Done at vmrun: asid.  */
911 
912 	/* Also overwritten later if necessary.  */
913 	vmcb02->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
914 
915 	/* Use vmcb01 MMU and format if guest does not use nNPT */
916 	if (nested_npt_enabled(svm)) {
917 		vmcb02->control.misc_ctl &= ~SVM_MISC_ENABLE_GMET;
918 		vmcb02->control.misc_ctl |= (svm->nested.ctl.misc_ctl & SVM_MISC_ENABLE_GMET);
919 
920 		nested_svm_init_mmu_context(vcpu);
921 	}
922 
923 	vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(vcpu->arch.l1_tsc_offset,
924 							   vmcb12_ctrl->tsc_offset,
925 							   svm->tsc_ratio_msr);
926 
927 	vmcb02->control.tsc_offset = vcpu->arch.tsc_offset;
928 
929 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_TSCRATEMSR) &&
930 	    svm->tsc_ratio_msr != kvm_caps.default_tsc_scaling_ratio)
931 		nested_svm_update_tsc_ratio_msr(vcpu);
932 
933 	vmcb02->control.int_ctl             =
934 		(vmcb12_ctrl->int_ctl & int_ctl_vmcb12_bits) |
935 		(vmcb01->control.int_ctl & int_ctl_vmcb01_bits);
936 
937 	vmcb02->control.int_vector          = vmcb12_ctrl->int_vector;
938 	vmcb02->control.int_state           = vmcb12_ctrl->int_state;
939 	vmcb02->control.event_inj           = vmcb12_ctrl->event_inj;
940 	vmcb02->control.event_inj_err       = vmcb12_ctrl->event_inj_err;
941 
942 	/*
943 	 * If nrips is exposed to L1, take NextRIP as-is.  Otherwise, L1
944 	 * advances L2's RIP before VMRUN instead of using NextRIP. KVM will
945 	 * stuff the current RIP as vmcb02's NextRIP before L2 is run.  After
946 	 * the first run of L2 (e.g. after save+restore), NextRIP is updated by
947 	 * the CPU and/or KVM and should be used regardless of L1's support.
948 	 */
949 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
950 	    !vcpu->arch.nested_run_pending)
951 		vmcb02->control.next_rip = vmcb12_ctrl->next_rip;
952 
953 	svm->nmi_l1_to_l2 = is_evtinj_nmi(vmcb02->control.event_inj);
954 
955 	/*
956 	 * soft_int_csbase, soft_int_old_rip, and soft_int_next_rip (if L1
957 	 * doesn't have NRIPS) are initialized later, before the vCPU is run.
958 	 */
959 	if (is_evtinj_soft(vmcb02->control.event_inj)) {
960 		svm->soft_int_injected = true;
961 		if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS) ||
962 		    !vcpu->arch.nested_run_pending)
963 			svm->soft_int_next_rip = vmcb12_ctrl->next_rip;
964 	}
965 
966 	/* SVM_MISC2_ENABLE_V_LBR is controlled by svm_update_lbrv() */
967 
968 	if (!nested_vmcb_needs_vls_intercept(svm))
969 		vmcb02->control.misc_ctl2 |= SVM_MISC2_ENABLE_V_VMLOAD_VMSAVE;
970 
971 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_PAUSEFILTER))
972 		pause_count12 = vmcb12_ctrl->pause_filter_count;
973 	else
974 		pause_count12 = 0;
975 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_PFTHRESHOLD))
976 		pause_thresh12 = vmcb12_ctrl->pause_filter_thresh;
977 	else
978 		pause_thresh12 = 0;
979 	if (kvm_pause_in_guest(svm->vcpu.kvm)) {
980 		/* use guest values since host doesn't intercept PAUSE */
981 		vmcb02->control.pause_filter_count = pause_count12;
982 		vmcb02->control.pause_filter_thresh = pause_thresh12;
983 
984 	} else {
985 		/* start from host values otherwise */
986 		vmcb02->control.pause_filter_count = vmcb01->control.pause_filter_count;
987 		vmcb02->control.pause_filter_thresh = vmcb01->control.pause_filter_thresh;
988 
989 		/* ... but ensure filtering is disabled if so requested.  */
990 		if (vmcb12_is_intercept(vmcb12_ctrl, INTERCEPT_PAUSE)) {
991 			if (!pause_count12)
992 				vmcb02->control.pause_filter_count = 0;
993 			if (!pause_thresh12)
994 				vmcb02->control.pause_filter_thresh = 0;
995 		}
996 	}
997 
998 	/*
999 	 * Take ALLOW_LARGER_RAP from vmcb12 even though it should be safe to
1000 	 * let L2 use a larger RAP since KVM will emulate the necessary clears,
1001 	 * as it's possible L1 deliberately wants to restrict L2 to the legacy
1002 	 * RAP size.  Unconditionally clear the RAP on nested VMRUN, as KVM is
1003 	 * responsible for emulating the host vs. guest tags (L1 is the "host",
1004 	 * L2 is the "guest").
1005 	 */
1006 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
1007 		vmcb02->control.erap_ctl = (vmcb12_ctrl->erap_ctl &
1008 					    ERAP_CONTROL_ALLOW_LARGER_RAP) |
1009 					   ERAP_CONTROL_CLEAR_RAP;
1010 
1011 	/*
1012 	 * Merge guest and host intercepts - must be called with vcpu in
1013 	 * guest-mode to take effect.
1014 	 */
1015 	nested_vmcb02_recalc_intercepts(svm);
1016 }
1017 
1018 static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
1019 {
1020 	/*
1021 	 * Some VMCB state is shared between L1 and L2 and thus has to be
1022 	 * moved at the time of nested vmrun and vmexit.
1023 	 *
1024 	 * VMLOAD/VMSAVE state would also belong in this category, but KVM
1025 	 * always performs VMLOAD and VMSAVE from the VMCB01.
1026 	 */
1027 	to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl;
1028 }
1029 
1030 int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa, bool from_vmrun)
1031 {
1032 	struct vcpu_svm *svm = to_svm(vcpu);
1033 	struct vmcb_ctrl_area_cached *control = &svm->nested.ctl;
1034 	struct vmcb_save_area_cached *save = &svm->nested.save;
1035 	int ret;
1036 
1037 	trace_kvm_nested_vmenter(svm->vmcb->save.rip,
1038 				 vmcb12_gpa,
1039 				 save->rip,
1040 				 control->int_ctl,
1041 				 control->event_inj,
1042 				 control->misc_ctl,
1043 				 control->nested_cr3,
1044 				 save->cr3,
1045 				 KVM_ISA_SVM);
1046 
1047 	trace_kvm_nested_intercepts(control->intercepts[INTERCEPT_CR] & 0xffff,
1048 				    control->intercepts[INTERCEPT_CR] >> 16,
1049 				    control->intercepts[INTERCEPT_EXCEPTION],
1050 				    control->intercepts[INTERCEPT_WORD3],
1051 				    control->intercepts[INTERCEPT_WORD4],
1052 				    control->intercepts[INTERCEPT_WORD5]);
1053 
1054 
1055 	svm->nested.vmcb12_gpa = vmcb12_gpa;
1056 
1057 	WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr);
1058 
1059 	nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
1060 
1061 	svm_switch_vmcb(svm, &svm->nested.vmcb02);
1062 	nested_vmcb02_prepare_control(svm);
1063 	nested_vmcb02_prepare_save(svm);
1064 
1065 	ret = nested_svm_load_cr3(&svm->vcpu, svm->nested.save.cr3,
1066 				  nested_npt_enabled(svm), from_vmrun);
1067 	if (ret)
1068 		return ret;
1069 
1070 	if (!from_vmrun)
1071 		kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
1072 
1073 	svm_set_gif(svm, true);
1074 
1075 	if (kvm_vcpu_apicv_active(vcpu))
1076 		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
1077 
1078 	nested_svm_hv_update_vm_vp_ids(vcpu);
1079 
1080 	return 0;
1081 }
1082 
1083 static int nested_svm_copy_vmcb12_to_cache(struct kvm_vcpu *vcpu, u64 vmcb12_gpa)
1084 {
1085 	struct vcpu_svm *svm = to_svm(vcpu);
1086 	struct kvm_host_map map;
1087 	struct vmcb *vmcb12;
1088 	int r = 0;
1089 
1090 	if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map))
1091 		return -EFAULT;
1092 
1093 	vmcb12 = map.hva;
1094 	nested_copy_vmcb_control_to_cache(svm, &vmcb12->control);
1095 	nested_copy_vmcb_save_to_cache(svm, &vmcb12->save);
1096 
1097 	if (nested_svm_check_cached_vmcb12(vcpu) < 0) {
1098 		vmcb12->control.exit_code = SVM_EXIT_ERR;
1099 		vmcb12->control.exit_info_1 = 0;
1100 		vmcb12->control.exit_info_2 = 0;
1101 		vmcb12->control.event_inj = 0;
1102 		vmcb12->control.event_inj_err = 0;
1103 		svm_set_gif(svm, false);
1104 		r = -EINVAL;
1105 	}
1106 
1107 	kvm_vcpu_unmap(vcpu, &map);
1108 	return r;
1109 }
1110 
1111 int nested_svm_vmrun(struct kvm_vcpu *vcpu)
1112 {
1113 	struct vcpu_svm *svm = to_svm(vcpu);
1114 	int ret;
1115 	u64 vmcb12_gpa;
1116 	struct vmcb *vmcb01 = svm->vmcb01.ptr;
1117 
1118 	if (!svm->nested.hsave_msr) {
1119 		kvm_inject_gp(vcpu, 0);
1120 		return 1;
1121 	}
1122 
1123 	if (is_smm(vcpu)) {
1124 		kvm_queue_exception(vcpu, UD_VECTOR);
1125 		return 1;
1126 	}
1127 
1128 	/* This fails when VP assist page is enabled but the supplied GPA is bogus */
1129 	ret = kvm_hv_verify_vp_assist(vcpu);
1130 	if (ret) {
1131 		kvm_inject_gp(vcpu, 0);
1132 		return ret;
1133 	}
1134 
1135 	if (WARN_ON_ONCE(!svm->nested.initialized))
1136 		return -EINVAL;
1137 
1138 	vmcb12_gpa = kvm_register_read(vcpu, VCPU_REGS_RAX);
1139 	if (!page_address_valid(vcpu, vmcb12_gpa)) {
1140 		kvm_inject_gp(vcpu, 0);
1141 		return 1;
1142 	}
1143 
1144 	ret = nested_svm_copy_vmcb12_to_cache(vcpu, vmcb12_gpa);
1145 	if (ret) {
1146 		if (ret == -EFAULT)
1147 			return kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL);
1148 
1149 		/* Advance RIP past VMRUN as part of the nested #VMEXIT. */
1150 		return kvm_skip_emulated_instruction(vcpu);
1151 	}
1152 
1153 	/* At this point, VMRUN is guaranteed to not fault; advance RIP. */
1154 	ret = kvm_skip_emulated_instruction(vcpu);
1155 
1156 	/*
1157 	 * Since vmcb01 is not in use, we can use it to store some of the L1
1158 	 * state.
1159 	 */
1160 	vmcb01->save.efer   = vcpu->arch.efer;
1161 	vmcb01->save.cr0    = kvm_read_cr0(vcpu);
1162 	vmcb01->save.cr4    = vcpu->arch.cr4;
1163 	vmcb01->save.rflags = kvm_get_rflags(vcpu);
1164 	vmcb01->save.rip    = kvm_rip_read(vcpu);
1165 
1166 	if (!npt_enabled)
1167 		vmcb01->save.cr3 = kvm_read_cr3(vcpu);
1168 
1169 	vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING;
1170 
1171 	if (enter_svm_guest_mode(vcpu, vmcb12_gpa, true) ||
1172 	    !nested_svm_merge_msrpm(vcpu)) {
1173 		vcpu->arch.nested_run_pending = 0;
1174 		svm->nmi_l1_to_l2 = false;
1175 		svm->soft_int_injected = false;
1176 
1177 		svm->vmcb->control.exit_code    = SVM_EXIT_ERR;
1178 		svm->vmcb->control.exit_info_1  = 0;
1179 		svm->vmcb->control.exit_info_2  = 0;
1180 
1181 		nested_svm_vmexit(svm);
1182 	}
1183 
1184 	return ret;
1185 }
1186 
1187 /* Copy state save area fields which are handled by VMRUN */
1188 void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
1189 			  struct vmcb_save_area *from_save)
1190 {
1191 	to_save->es = from_save->es;
1192 	to_save->cs = from_save->cs;
1193 	to_save->ss = from_save->ss;
1194 	to_save->ds = from_save->ds;
1195 	to_save->gdtr = from_save->gdtr;
1196 	to_save->idtr = from_save->idtr;
1197 	to_save->rflags = from_save->rflags | X86_EFLAGS_FIXED;
1198 	to_save->efer = from_save->efer;
1199 	to_save->cr0 = from_save->cr0;
1200 	to_save->cr3 = from_save->cr3;
1201 	to_save->cr4 = from_save->cr4;
1202 	to_save->rax = from_save->rax;
1203 	to_save->rsp = from_save->rsp;
1204 	to_save->rip = from_save->rip;
1205 	to_save->cpl = 0;
1206 
1207 	if (kvm_cpu_cap_has(X86_FEATURE_SHSTK)) {
1208 		to_save->s_cet  = from_save->s_cet;
1209 		to_save->isst_addr = from_save->isst_addr;
1210 		to_save->ssp = from_save->ssp;
1211 	}
1212 
1213 	if (kvm_cpu_cap_has(X86_FEATURE_LBRV)) {
1214 		svm_copy_lbrs(to_save, from_save);
1215 		to_save->dbgctl &= ~DEBUGCTL_RESERVED_BITS;
1216 	}
1217 }
1218 
1219 void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb)
1220 {
1221 	to_vmcb->save.fs = from_vmcb->save.fs;
1222 	to_vmcb->save.gs = from_vmcb->save.gs;
1223 	to_vmcb->save.tr = from_vmcb->save.tr;
1224 	to_vmcb->save.ldtr = from_vmcb->save.ldtr;
1225 	to_vmcb->save.kernel_gs_base = from_vmcb->save.kernel_gs_base;
1226 	to_vmcb->save.star = from_vmcb->save.star;
1227 	to_vmcb->save.lstar = from_vmcb->save.lstar;
1228 	to_vmcb->save.cstar = from_vmcb->save.cstar;
1229 	to_vmcb->save.sfmask = from_vmcb->save.sfmask;
1230 	to_vmcb->save.sysenter_cs = from_vmcb->save.sysenter_cs;
1231 	to_vmcb->save.sysenter_esp = from_vmcb->save.sysenter_esp;
1232 	to_vmcb->save.sysenter_eip = from_vmcb->save.sysenter_eip;
1233 }
1234 
1235 static int nested_svm_vmexit_update_vmcb12(struct kvm_vcpu *vcpu)
1236 {
1237 	struct vcpu_svm *svm = to_svm(vcpu);
1238 	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
1239 	struct kvm_host_map map;
1240 	struct vmcb *vmcb12;
1241 	int rc;
1242 
1243 	rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
1244 	if (rc)
1245 		return rc;
1246 
1247 	vmcb12 = map.hva;
1248 
1249 	vmcb12->save.es     = vmcb02->save.es;
1250 	vmcb12->save.cs     = vmcb02->save.cs;
1251 	vmcb12->save.ss     = vmcb02->save.ss;
1252 	vmcb12->save.ds     = vmcb02->save.ds;
1253 	vmcb12->save.gdtr   = vmcb02->save.gdtr;
1254 	vmcb12->save.idtr   = vmcb02->save.idtr;
1255 	vmcb12->save.efer   = svm->vcpu.arch.efer;
1256 	vmcb12->save.cr0    = kvm_read_cr0(vcpu);
1257 	vmcb12->save.cr3    = kvm_read_cr3(vcpu);
1258 	vmcb12->save.cr2    = vcpu->arch.cr2;
1259 	vmcb12->save.cr4    = svm->vcpu.arch.cr4;
1260 	vmcb12->save.rflags = kvm_get_rflags(vcpu);
1261 	vmcb12->save.rip    = kvm_rip_read(vcpu);
1262 	vmcb12->save.rsp    = kvm_rsp_read(vcpu);
1263 	vmcb12->save.rax    = kvm_rax_read(vcpu);
1264 	vmcb12->save.dr7    = vmcb02->save.dr7;
1265 	vmcb12->save.dr6    = svm->vcpu.arch.dr6;
1266 	vmcb12->save.cpl    = vmcb02->save.cpl;
1267 
1268 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_SHSTK)) {
1269 		vmcb12->save.s_cet	= vmcb02->save.s_cet;
1270 		vmcb12->save.isst_addr	= vmcb02->save.isst_addr;
1271 		vmcb12->save.ssp	= vmcb02->save.ssp;
1272 	}
1273 
1274 	vmcb12->control.int_state         = vmcb02->control.int_state;
1275 	vmcb12->control.exit_code         = vmcb02->control.exit_code;
1276 	vmcb12->control.exit_info_1       = vmcb02->control.exit_info_1;
1277 	vmcb12->control.exit_info_2       = vmcb02->control.exit_info_2;
1278 
1279 	if (!svm_is_vmrun_failure(vmcb12->control.exit_code))
1280 		nested_save_pending_event_to_vmcb12(svm, vmcb12);
1281 
1282 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_NRIPS))
1283 		vmcb12->control.next_rip  = vmcb02->control.next_rip;
1284 
1285 	if (nested_vmcb12_has_lbrv(vcpu))
1286 		svm_copy_lbrs(&vmcb12->save, &vmcb02->save);
1287 
1288 	vmcb12->control.event_inj	  = 0;
1289 	vmcb12->control.event_inj_err	  = 0;
1290 	vmcb12->control.int_ctl           = svm->nested.ctl.int_ctl;
1291 
1292 	trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
1293 				       vmcb12->control.exit_info_1,
1294 				       vmcb12->control.exit_info_2,
1295 				       vmcb12->control.exit_int_info,
1296 				       vmcb12->control.exit_int_info_err,
1297 				       KVM_ISA_SVM);
1298 
1299 	kvm_vcpu_unmap(vcpu, &map);
1300 	return 0;
1301 }
1302 
1303 void nested_svm_vmexit(struct vcpu_svm *svm)
1304 {
1305 	struct kvm_vcpu *vcpu = &svm->vcpu;
1306 	struct vmcb *vmcb01 = svm->vmcb01.ptr;
1307 	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
1308 
1309 	if (nested_svm_vmexit_update_vmcb12(vcpu))
1310 		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
1311 
1312 	/* Exit Guest-Mode */
1313 	leave_guest_mode(vcpu);
1314 	svm->nested.vmcb12_gpa = 0;
1315 
1316 	kvm_warn_on_nested_run_pending(vcpu);
1317 
1318 	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
1319 
1320 	/* in case we halted in L2 */
1321 	kvm_set_mp_state(vcpu, KVM_MP_STATE_RUNNABLE);
1322 
1323 	if (!kvm_pause_in_guest(vcpu->kvm)) {
1324 		vmcb01->control.pause_filter_count = vmcb02->control.pause_filter_count;
1325 		vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
1326 
1327 	}
1328 
1329 	/*
1330 	 * Invalidate last_bus_lock_rip unless KVM is still waiting for the
1331 	 * guest to make forward progress before re-enabling bus lock detection.
1332 	 */
1333 	if (!vmcb02->control.bus_lock_counter)
1334 		svm->nested.last_bus_lock_rip = INVALID_GPA;
1335 
1336 	nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
1337 
1338 	kvm_nested_vmexit_handle_ibrs(vcpu);
1339 
1340 	if (guest_cpu_cap_has(vcpu, X86_FEATURE_ERAPS))
1341 		vmcb01->control.erap_ctl |= ERAP_CONTROL_CLEAR_RAP;
1342 
1343 	svm_switch_vmcb(svm, &svm->vmcb01);
1344 
1345 	/*
1346 	 * Rules for synchronizing int_ctl bits from vmcb02 to vmcb01:
1347 	 *
1348 	 * V_IRQ, V_IRQ_VECTOR, V_INTR_PRIO_MASK, V_IGN_TPR:  If L1 doesn't
1349 	 * intercept interrupts, then KVM will use vmcb02's V_IRQ (and related
1350 	 * flags) to detect interrupt windows for L1 IRQs (even if L1 uses
1351 	 * virtual interrupt masking).  Raise KVM_REQ_EVENT to ensure that
1352 	 * KVM re-requests an interrupt window if necessary, which implicitly
1353 	 * copies this bits from vmcb02 to vmcb01.
1354 	 *
1355 	 * V_TPR: If L1 doesn't use virtual interrupt masking, then L1's vTPR
1356 	 * is stored in vmcb02, but its value doesn't need to be copied from/to
1357 	 * vmcb01 because it is copied from/to the virtual APIC's TPR register
1358 	 * on each VM entry/exit.
1359 	 *
1360 	 * V_GIF: If nested vGIF is not used, KVM uses vmcb02's V_GIF for L1's
1361 	 * V_GIF.  However, GIF is architecturally clear on each VM exit, thus
1362 	 * there is no need to copy V_GIF from vmcb02 to vmcb01.
1363 	 */
1364 	if (!nested_exit_on_intr(svm))
1365 		kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
1366 
1367 	if (!nested_vmcb12_has_lbrv(vcpu)) {
1368 		svm_copy_lbrs(&vmcb01->save, &vmcb02->save);
1369 		vmcb_mark_dirty(vmcb01, VMCB_LBR);
1370 	}
1371 
1372 	svm_update_lbrv(vcpu);
1373 
1374 	if (vnmi) {
1375 		if (vmcb02->control.int_ctl & V_NMI_BLOCKING_MASK)
1376 			vmcb01->control.int_ctl |= V_NMI_BLOCKING_MASK;
1377 		else
1378 			vmcb01->control.int_ctl &= ~V_NMI_BLOCKING_MASK;
1379 
1380 		if (vcpu->arch.nmi_pending) {
1381 			vcpu->arch.nmi_pending--;
1382 			vmcb01->control.int_ctl |= V_NMI_PENDING_MASK;
1383 		} else {
1384 			vmcb01->control.int_ctl &= ~V_NMI_PENDING_MASK;
1385 		}
1386 	}
1387 
1388 	/*
1389 	 * On vmexit the  GIF is set to false and
1390 	 * no event can be injected in L1.
1391 	 */
1392 	svm_set_gif(svm, false);
1393 	vmcb01->control.exit_int_info = 0;
1394 
1395 	svm->vcpu.arch.tsc_offset = svm->vcpu.arch.l1_tsc_offset;
1396 	if (vmcb01->control.tsc_offset != svm->vcpu.arch.tsc_offset) {
1397 		vmcb01->control.tsc_offset = svm->vcpu.arch.tsc_offset;
1398 		vmcb_mark_dirty(vmcb01, VMCB_INTERCEPTS);
1399 	}
1400 
1401 	if (kvm_caps.has_tsc_control &&
1402 	    vcpu->arch.tsc_scaling_ratio != vcpu->arch.l1_tsc_scaling_ratio) {
1403 		vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
1404 		svm_write_tsc_multiplier(vcpu);
1405 	}
1406 
1407 	svm->nested.ctl.nested_cr3 = 0;
1408 
1409 	/*
1410 	 * Restore processor state that had been saved in vmcb01
1411 	 */
1412 	kvm_set_rflags(vcpu, vmcb01->save.rflags);
1413 	svm_set_efer(vcpu, vmcb01->save.efer);
1414 	svm_set_cr0(vcpu, vmcb01->save.cr0 | X86_CR0_PE);
1415 	svm_set_cr4(vcpu, vmcb01->save.cr4);
1416 	kvm_rax_write(vcpu, vmcb01->save.rax);
1417 	kvm_rsp_write(vcpu, vmcb01->save.rsp);
1418 	kvm_rip_write(vcpu, vmcb01->save.rip);
1419 
1420 	svm->vcpu.arch.dr7 = DR7_FIXED_1;
1421 	kvm_update_dr7(&svm->vcpu);
1422 
1423 	nested_svm_transition_tlb_flush(vcpu);
1424 
1425 	nested_svm_uninit_mmu_context(vcpu);
1426 
1427 	if (nested_svm_load_cr3(vcpu, vmcb01->save.cr3, false, true))
1428 		kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
1429 
1430 	/* Drop tracking for L1->L2 injected NMIs and soft IRQs */
1431 	svm->nmi_l1_to_l2 = false;
1432 	svm->soft_int_injected = false;
1433 
1434 	/*
1435 	 * Drop what we picked up for L2 via svm_complete_interrupts() so it
1436 	 * doesn't end up in L1.
1437 	 */
1438 	svm->vcpu.arch.nmi_injected = false;
1439 	kvm_clear_exception_queue(vcpu);
1440 	kvm_clear_interrupt_queue(vcpu);
1441 
1442 	/*
1443 	 * If we are here following the completion of a VMRUN that
1444 	 * is being single-stepped, queue the pending #DB intercept
1445 	 * right now so that it an be accounted for before we execute
1446 	 * L1's next instruction.
1447 	 */
1448 	if (unlikely(vmcb01->save.rflags & X86_EFLAGS_TF))
1449 		kvm_queue_exception(&(svm->vcpu), DB_VECTOR);
1450 
1451 	/*
1452 	 * Un-inhibit the AVIC right away, so that other vCPUs can start
1453 	 * to benefit from it right away.
1454 	 */
1455 	if (kvm_apicv_activated(vcpu->kvm))
1456 		__kvm_vcpu_update_apicv(vcpu);
1457 }
1458 
1459 static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
1460 {
1461 	struct vcpu_svm *svm = to_svm(vcpu);
1462 
1463 	if (!vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SHUTDOWN))
1464 		return;
1465 
1466 	kvm_clear_request(KVM_REQ_TRIPLE_FAULT, vcpu);
1467 	nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN);
1468 }
1469 
1470 int svm_allocate_nested(struct vcpu_svm *svm)
1471 {
1472 	struct page *vmcb02_page;
1473 
1474 	if (svm->nested.initialized)
1475 		return 0;
1476 
1477 	vmcb02_page = snp_safe_alloc_page();
1478 	if (!vmcb02_page)
1479 		return -ENOMEM;
1480 	svm->nested.vmcb02.ptr = page_address(vmcb02_page);
1481 	svm->nested.vmcb02.pa = __sme_set(page_to_pfn(vmcb02_page) << PAGE_SHIFT);
1482 
1483 	svm->nested.msrpm = svm_vcpu_alloc_msrpm();
1484 	if (!svm->nested.msrpm)
1485 		goto err_free_vmcb02;
1486 
1487 	svm->nested.initialized = true;
1488 	return 0;
1489 
1490 err_free_vmcb02:
1491 	__free_page(vmcb02_page);
1492 	return -ENOMEM;
1493 }
1494 
1495 void svm_free_nested(struct vcpu_svm *svm)
1496 {
1497 	if (!svm->nested.initialized)
1498 		return;
1499 
1500 	if (WARN_ON_ONCE(svm->vmcb != svm->vmcb01.ptr))
1501 		svm_switch_vmcb(svm, &svm->vmcb01);
1502 
1503 	svm_vcpu_free_msrpm(svm->nested.msrpm);
1504 	svm->nested.msrpm = NULL;
1505 
1506 	__free_page(virt_to_page(svm->nested.vmcb02.ptr));
1507 	svm->nested.vmcb02.ptr = NULL;
1508 
1509 	/*
1510 	 * When last_vmcb12_gpa matches the current vmcb12 gpa,
1511 	 * some vmcb12 fields are not loaded if they are marked clean
1512 	 * in the vmcb12, since in this case they are up to date already.
1513 	 *
1514 	 * When the vmcb02 is freed, this optimization becomes invalid.
1515 	 */
1516 	svm->nested.last_vmcb12_gpa = INVALID_GPA;
1517 
1518 	svm->nested.initialized = false;
1519 }
1520 
1521 void svm_leave_nested(struct kvm_vcpu *vcpu)
1522 {
1523 	struct vcpu_svm *svm = to_svm(vcpu);
1524 
1525 	if (is_guest_mode(vcpu)) {
1526 		vcpu->arch.nested_run_pending = 0;
1527 		svm->nested.vmcb12_gpa = INVALID_GPA;
1528 
1529 		leave_guest_mode(vcpu);
1530 
1531 		svm_switch_vmcb(svm, &svm->vmcb01);
1532 
1533 		nested_svm_uninit_mmu_context(vcpu);
1534 		vmcb_mark_all_dirty(svm->vmcb);
1535 
1536 		svm_set_gif(svm, true);
1537 
1538 		if (kvm_apicv_activated(vcpu->kvm))
1539 			kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
1540 	}
1541 
1542 	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
1543 }
1544 
1545 static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
1546 {
1547 	gpa_t base = svm->nested.ctl.msrpm_base_pa;
1548 	int write, bit_nr;
1549 	u8 value, mask;
1550 	u32 msr;
1551 
1552 	if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_MSR_PROT)))
1553 		return NESTED_EXIT_HOST;
1554 
1555 	msr    = svm->vcpu.arch.regs[VCPU_REGS_RCX];
1556 	bit_nr = svm_msrpm_bit_nr(msr);
1557 	write  = svm->vmcb->control.exit_info_1 & 1;
1558 
1559 	if (bit_nr < 0)
1560 		return NESTED_EXIT_DONE;
1561 
1562 	if (kvm_vcpu_read_guest(&svm->vcpu, base + bit_nr / BITS_PER_BYTE,
1563 				&value, sizeof(value)))
1564 		return NESTED_EXIT_DONE;
1565 
1566 	mask = BIT(write) << (bit_nr & (BITS_PER_BYTE - 1));
1567 	return (value & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1568 }
1569 
1570 static int nested_svm_intercept_ioio(struct vcpu_svm *svm)
1571 {
1572 	unsigned port, size, iopm_len;
1573 	u16 val, mask;
1574 	u8 start_bit;
1575 	u64 gpa;
1576 
1577 	if (!(vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_IOIO_PROT)))
1578 		return NESTED_EXIT_HOST;
1579 
1580 	port = svm->vmcb->control.exit_info_1 >> 16;
1581 	size = (svm->vmcb->control.exit_info_1 & SVM_IOIO_SIZE_MASK) >>
1582 		SVM_IOIO_SIZE_SHIFT;
1583 	gpa  = svm->nested.ctl.iopm_base_pa + (port / 8);
1584 	start_bit = port % 8;
1585 	iopm_len = (start_bit + size > 8) ? 2 : 1;
1586 	mask = (0xf >> (4 - size)) << start_bit;
1587 	val = 0;
1588 
1589 	if (kvm_vcpu_read_guest(&svm->vcpu, gpa, &val, iopm_len))
1590 		return NESTED_EXIT_DONE;
1591 
1592 	return (val & mask) ? NESTED_EXIT_DONE : NESTED_EXIT_HOST;
1593 }
1594 
1595 static int nested_svm_intercept(struct vcpu_svm *svm)
1596 {
1597 	u64 exit_code = svm->vmcb->control.exit_code;
1598 	int vmexit = NESTED_EXIT_HOST;
1599 
1600 	if (svm_is_vmrun_failure(exit_code))
1601 		return NESTED_EXIT_DONE;
1602 
1603 	switch (exit_code) {
1604 	case SVM_EXIT_MSR:
1605 		vmexit = nested_svm_exit_handled_msr(svm);
1606 		break;
1607 	case SVM_EXIT_IOIO:
1608 		vmexit = nested_svm_intercept_ioio(svm);
1609 		break;
1610 	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f:
1611 		/*
1612 		 * Host-intercepted exceptions have been checked already in
1613 		 * nested_svm_exit_special.  There is nothing to do here,
1614 		 * the vmexit is injected by svm_check_nested_events.
1615 		 */
1616 		vmexit = NESTED_EXIT_DONE;
1617 		break;
1618 	default:
1619 		if (vmcb12_is_intercept(&svm->nested.ctl, exit_code))
1620 			vmexit = NESTED_EXIT_DONE;
1621 		break;
1622 	}
1623 
1624 	return vmexit;
1625 }
1626 
1627 int nested_svm_exit_handled(struct vcpu_svm *svm)
1628 {
1629 	int vmexit;
1630 
1631 	vmexit = nested_svm_intercept(svm);
1632 
1633 	if (vmexit == NESTED_EXIT_DONE)
1634 		nested_svm_vmexit(svm);
1635 
1636 	return vmexit;
1637 }
1638 
1639 int nested_svm_check_permissions(struct kvm_vcpu *vcpu)
1640 {
1641 	if (!(vcpu->arch.efer & EFER_SVME) || !is_paging(vcpu)) {
1642 		kvm_queue_exception(vcpu, UD_VECTOR);
1643 		return 1;
1644 	}
1645 
1646 	if (to_svm(vcpu)->vmcb->save.cpl) {
1647 		kvm_inject_gp(vcpu, 0);
1648 		return 1;
1649 	}
1650 
1651 	return 0;
1652 }
1653 
1654 static bool nested_svm_is_exception_vmexit(struct kvm_vcpu *vcpu, u8 vector,
1655 					   u32 error_code)
1656 {
1657 	struct vcpu_svm *svm = to_svm(vcpu);
1658 
1659 	return (svm->nested.ctl.intercepts[INTERCEPT_EXCEPTION] & BIT(vector));
1660 }
1661 
1662 static void nested_svm_inject_exception_vmexit(struct kvm_vcpu *vcpu)
1663 {
1664 	struct kvm_queued_exception *ex = &vcpu->arch.exception_vmexit;
1665 	struct vcpu_svm *svm = to_svm(vcpu);
1666 	struct vmcb *vmcb = svm->vmcb;
1667 
1668 	vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + ex->vector;
1669 
1670 	if (ex->has_error_code)
1671 		vmcb->control.exit_info_1 = ex->error_code;
1672 
1673 	/*
1674 	 * EXITINFO2 is undefined for all exception intercepts other
1675 	 * than #PF.
1676 	 */
1677 	if (ex->vector == PF_VECTOR) {
1678 		if (ex->has_payload)
1679 			vmcb->control.exit_info_2 = ex->payload;
1680 		else
1681 			vmcb->control.exit_info_2 = vcpu->arch.cr2;
1682 	} else if (ex->vector == DB_VECTOR) {
1683 		/* See kvm_check_and_inject_events().  */
1684 		kvm_deliver_exception_payload(vcpu, ex);
1685 
1686 		if (vcpu->arch.dr7 & DR7_GD) {
1687 			vcpu->arch.dr7 &= ~DR7_GD;
1688 			kvm_update_dr7(vcpu);
1689 		}
1690 	} else {
1691 		WARN_ON(ex->has_payload);
1692 	}
1693 
1694 	nested_svm_vmexit(svm);
1695 }
1696 
1697 static inline bool nested_exit_on_init(struct vcpu_svm *svm)
1698 {
1699 	return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
1700 }
1701 
1702 static int svm_check_nested_events(struct kvm_vcpu *vcpu)
1703 {
1704 	struct kvm_lapic *apic = vcpu->arch.apic;
1705 	struct vcpu_svm *svm = to_svm(vcpu);
1706 	/*
1707 	 * Only a pending nested run blocks a pending exception.  If there is a
1708 	 * previously injected event, the pending exception occurred while said
1709 	 * event was being delivered and thus needs to be handled.
1710 	 */
1711 	bool block_nested_exceptions = vcpu->arch.nested_run_pending;
1712 	/*
1713 	 * New events (not exceptions) are only recognized at instruction
1714 	 * boundaries.  If an event needs reinjection, then KVM is handling a
1715 	 * VM-Exit that occurred _during_ instruction execution; new events are
1716 	 * blocked until the instruction completes.
1717 	 */
1718 	bool block_nested_events = block_nested_exceptions ||
1719 				   kvm_event_needs_reinjection(vcpu);
1720 
1721 	if (lapic_in_kernel(vcpu) &&
1722 	    test_bit(KVM_APIC_INIT, &apic->pending_events)) {
1723 		if (block_nested_events)
1724 			return -EBUSY;
1725 		if (!nested_exit_on_init(svm))
1726 			return 0;
1727 		nested_svm_simple_vmexit(svm, SVM_EXIT_INIT);
1728 		return 0;
1729 	}
1730 
1731 	if (vcpu->arch.exception_vmexit.pending) {
1732 		if (block_nested_exceptions)
1733                         return -EBUSY;
1734 		nested_svm_inject_exception_vmexit(vcpu);
1735 		return 0;
1736 	}
1737 
1738 	if (vcpu->arch.exception.pending) {
1739 		if (block_nested_exceptions)
1740 			return -EBUSY;
1741 		return 0;
1742 	}
1743 
1744 #ifdef CONFIG_KVM_SMM
1745 	if (vcpu->arch.smi_pending && !svm_smi_blocked(vcpu)) {
1746 		if (block_nested_events)
1747 			return -EBUSY;
1748 		if (!nested_exit_on_smi(svm))
1749 			return 0;
1750 		nested_svm_simple_vmexit(svm, SVM_EXIT_SMI);
1751 		return 0;
1752 	}
1753 #endif
1754 
1755 	if (vcpu->arch.nmi_pending && !svm_nmi_blocked(vcpu)) {
1756 		if (block_nested_events)
1757 			return -EBUSY;
1758 		if (!nested_exit_on_nmi(svm))
1759 			return 0;
1760 		nested_svm_simple_vmexit(svm, SVM_EXIT_NMI);
1761 		return 0;
1762 	}
1763 
1764 	if (kvm_cpu_has_interrupt(vcpu) && !svm_interrupt_blocked(vcpu)) {
1765 		if (block_nested_events)
1766 			return -EBUSY;
1767 		if (!nested_exit_on_intr(svm))
1768 			return 0;
1769 		trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
1770 		nested_svm_simple_vmexit(svm, SVM_EXIT_INTR);
1771 		return 0;
1772 	}
1773 
1774 	return 0;
1775 }
1776 
1777 int nested_svm_exit_special(struct vcpu_svm *svm)
1778 {
1779 	u32 exit_code = svm->vmcb->control.exit_code;
1780 	struct kvm_vcpu *vcpu = &svm->vcpu;
1781 
1782 	switch (exit_code) {
1783 	case SVM_EXIT_INTR:
1784 	case SVM_EXIT_NMI:
1785 	case SVM_EXIT_NPF:
1786 		return NESTED_EXIT_HOST;
1787 	case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
1788 		u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
1789 
1790 		if (svm->vmcb01.ptr->control.intercepts[INTERCEPT_EXCEPTION] &
1791 		    excp_bits)
1792 			return NESTED_EXIT_HOST;
1793 		else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
1794 			 svm->vcpu.arch.apf.host_apf_flags)
1795 			/* Trap async PF even if not shadowing */
1796 			return NESTED_EXIT_HOST;
1797 		break;
1798 	}
1799 	case SVM_EXIT_VMMCALL:
1800 		/* Hyper-V L2 TLB flush hypercall is handled by L0 */
1801 		if (nested_svm_is_l2_tlb_flush_hcall(vcpu))
1802 			return NESTED_EXIT_HOST;
1803 		break;
1804 	default:
1805 		break;
1806 	}
1807 
1808 	return NESTED_EXIT_CONTINUE;
1809 }
1810 
1811 void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu)
1812 {
1813 	struct vcpu_svm *svm = to_svm(vcpu);
1814 
1815 	vcpu->arch.tsc_scaling_ratio =
1816 		kvm_calc_nested_tsc_multiplier(vcpu->arch.l1_tsc_scaling_ratio,
1817 					       svm->tsc_ratio_msr);
1818 	svm_write_tsc_multiplier(vcpu);
1819 }
1820 
1821 /* Inverse operation of nested_copy_vmcb_control_to_cache(). asid is copied too. */
1822 static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
1823 					      struct vmcb_ctrl_area_cached *from)
1824 {
1825 	unsigned int i;
1826 
1827 	memset(dst, 0, sizeof(struct vmcb_control_area));
1828 
1829 	for (i = 0; i < MAX_INTERCEPT; i++)
1830 		dst->intercepts[i] = from->intercepts[i];
1831 
1832 	dst->iopm_base_pa         = from->iopm_base_pa;
1833 	dst->msrpm_base_pa        = from->msrpm_base_pa;
1834 	dst->tsc_offset           = from->tsc_offset;
1835 	dst->asid                 = from->asid;
1836 	dst->tlb_ctl              = from->tlb_ctl;
1837 	dst->erap_ctl             = from->erap_ctl;
1838 	dst->int_ctl              = from->int_ctl;
1839 	dst->int_vector           = from->int_vector;
1840 	dst->int_state            = from->int_state;
1841 	dst->exit_code            = from->exit_code;
1842 	dst->exit_info_1          = from->exit_info_1;
1843 	dst->exit_info_2          = from->exit_info_2;
1844 	dst->exit_int_info        = from->exit_int_info;
1845 	dst->exit_int_info_err    = from->exit_int_info_err;
1846 	dst->misc_ctl		  = from->misc_ctl;
1847 	dst->event_inj            = from->event_inj;
1848 	dst->event_inj_err        = from->event_inj_err;
1849 	dst->next_rip             = from->next_rip;
1850 	dst->nested_cr3		  = from->nested_cr3;
1851 	dst->misc_ctl2		  = from->misc_ctl2;
1852 	dst->pause_filter_count   = from->pause_filter_count;
1853 	dst->pause_filter_thresh  = from->pause_filter_thresh;
1854 	/* 'clean' and 'hv_enlightenments' are not changed by KVM */
1855 }
1856 
1857 static int svm_get_nested_state(struct kvm_vcpu *vcpu,
1858 				struct kvm_nested_state __user *user_kvm_nested_state,
1859 				u32 user_data_size)
1860 {
1861 	struct vcpu_svm *svm;
1862 	struct vmcb_control_area *ctl;
1863 	unsigned long r;
1864 	struct kvm_nested_state kvm_state = {
1865 		.flags = 0,
1866 		.format = KVM_STATE_NESTED_FORMAT_SVM,
1867 		.size = sizeof(kvm_state),
1868 	};
1869 	struct vmcb __user *user_vmcb = (struct vmcb __user *)
1870 		&user_kvm_nested_state->data.svm[0];
1871 
1872 	if (!vcpu)
1873 		return kvm_state.size + KVM_STATE_NESTED_SVM_VMCB_SIZE;
1874 
1875 	svm = to_svm(vcpu);
1876 
1877 	if (user_data_size < kvm_state.size)
1878 		goto out;
1879 
1880 	/* First fill in the header and copy it out.  */
1881 	if (is_guest_mode(vcpu)) {
1882 		kvm_state.hdr.svm.vmcb_pa = svm->nested.vmcb12_gpa;
1883 		kvm_state.size += KVM_STATE_NESTED_SVM_VMCB_SIZE;
1884 		kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
1885 
1886 		if (vcpu->arch.nested_run_pending)
1887 			kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
1888 	}
1889 
1890 	if (gif_set(svm))
1891 		kvm_state.flags |= KVM_STATE_NESTED_GIF_SET;
1892 
1893 	if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
1894 		return -EFAULT;
1895 
1896 	if (!is_guest_mode(vcpu))
1897 		goto out;
1898 
1899 	/*
1900 	 * Copy over the full size of the VMCB rather than just the size
1901 	 * of the structs.
1902 	 */
1903 	if (clear_user(user_vmcb, KVM_STATE_NESTED_SVM_VMCB_SIZE))
1904 		return -EFAULT;
1905 
1906 	ctl = kzalloc_obj(*ctl);
1907 	if (!ctl)
1908 		return -ENOMEM;
1909 
1910 	nested_copy_vmcb_cache_to_control(ctl, &svm->nested.ctl);
1911 	r = copy_to_user(&user_vmcb->control, ctl,
1912 			 sizeof(user_vmcb->control));
1913 	kfree(ctl);
1914 	if (r)
1915 		return -EFAULT;
1916 
1917 	if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
1918 			 sizeof(user_vmcb->save)))
1919 		return -EFAULT;
1920 out:
1921 	return kvm_state.size;
1922 }
1923 
1924 static int svm_set_nested_state(struct kvm_vcpu *vcpu,
1925 				struct kvm_nested_state __user *user_kvm_nested_state,
1926 				struct kvm_nested_state *kvm_state)
1927 {
1928 	struct vcpu_svm *svm = to_svm(vcpu);
1929 	struct vmcb __user *user_vmcb = (struct vmcb __user *)
1930 		&user_kvm_nested_state->data.svm[0];
1931 	struct vmcb_control_area *ctl;
1932 	struct vmcb_save_area *save;
1933 	struct vmcb_save_area_cached save_cached;
1934 	struct vmcb_ctrl_area_cached ctl_cached;
1935 	unsigned long cr0;
1936 	int ret;
1937 
1938 	BUILD_BUG_ON(sizeof(struct vmcb_control_area) + sizeof(struct vmcb_save_area) >
1939 		     KVM_STATE_NESTED_SVM_VMCB_SIZE);
1940 
1941 	if (kvm_state->format != KVM_STATE_NESTED_FORMAT_SVM)
1942 		return -EINVAL;
1943 
1944 	if (kvm_state->flags & ~(KVM_STATE_NESTED_GUEST_MODE |
1945 				 KVM_STATE_NESTED_RUN_PENDING |
1946 				 KVM_STATE_NESTED_GIF_SET))
1947 		return -EINVAL;
1948 
1949 	/*
1950 	 * If in guest mode, vcpu->arch.efer actually refers to the L2 guest's
1951 	 * EFER.SVME, but EFER.SVME still has to be 1 for VMRUN to succeed.
1952 	 * If SVME is disabled, the only valid states are "none" and GIF=1
1953 	 * (clearing SVME does NOT set GIF, i.e. GIF=0 is allowed).
1954 	 */
1955 	if (!(vcpu->arch.efer & EFER_SVME) && kvm_state->flags &&
1956 	    kvm_state->flags != KVM_STATE_NESTED_GIF_SET)
1957 		return -EINVAL;
1958 
1959 	/* SMM temporarily disables SVM, so we cannot be in guest mode.  */
1960 	if (is_smm(vcpu) && (kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
1961 		return -EINVAL;
1962 
1963 	if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE)) {
1964 		svm_leave_nested(vcpu);
1965 		svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
1966 		return 0;
1967 	}
1968 
1969 	if (!page_address_valid(vcpu, kvm_state->hdr.svm.vmcb_pa))
1970 		return -EINVAL;
1971 	if (kvm_state->size < sizeof(*kvm_state) + KVM_STATE_NESTED_SVM_VMCB_SIZE)
1972 		return -EINVAL;
1973 
1974 	ctl = memdup_user(&user_vmcb->control, sizeof(*ctl));
1975 	if (IS_ERR(ctl))
1976 		return PTR_ERR(ctl);
1977 
1978 	save = memdup_user(&user_vmcb->save, sizeof(*save));
1979 	if (IS_ERR(save)) {
1980 		kfree(ctl);
1981 		return PTR_ERR(save);
1982 	}
1983 
1984 	ret = -EINVAL;
1985 	__nested_copy_vmcb_control_to_cache(vcpu, &ctl_cached, ctl);
1986 	if (!nested_vmcb_check_controls(vcpu, &ctl_cached))
1987 		goto out_free;
1988 
1989 	/*
1990 	 * Processor state contains L2 state.  Check that it is
1991 	 * valid for guest mode (see nested_vmcb_check_save()).
1992 	 */
1993 	cr0 = kvm_read_cr0(vcpu);
1994         if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
1995 		goto out_free;
1996 
1997 	/*
1998 	 * Validate host state saved from before VMRUN (see
1999 	 * nested_svm_check_permissions).
2000 	 */
2001 	__nested_copy_vmcb_save_to_cache(&save_cached, save);
2002 	if (!(save->cr0 & X86_CR0_PG) ||
2003 	    !(save->cr0 & X86_CR0_PE) ||
2004 	    (save->rflags & X86_EFLAGS_VM) ||
2005 	    !nested_vmcb_check_save(vcpu, &save_cached))
2006 		goto out_free;
2007 
2008 
2009 	/*
2010 	 * All checks done, we can enter guest mode. Userspace provides
2011 	 * vmcb12.control, which will be combined with L1 and stored into
2012 	 * vmcb02, and the L1 save state which we store in vmcb01.
2013 	 * L2 registers if needed are moved from the current VMCB to VMCB02.
2014 	 */
2015 
2016 	if (is_guest_mode(vcpu))
2017 		svm_leave_nested(vcpu);
2018 	else
2019 		svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
2020 
2021 	svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
2022 
2023 	if (kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING)
2024 		vcpu->arch.nested_run_pending = KVM_NESTED_RUN_PENDING_UNTRUSTED;
2025 	else
2026 		vcpu->arch.nested_run_pending = 0;
2027 
2028 	svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
2029 
2030 	svm_copy_vmrun_state(&svm->vmcb01.ptr->save, save);
2031 	nested_copy_vmcb_control_to_cache(svm, ctl);
2032 
2033 	svm_switch_vmcb(svm, &svm->nested.vmcb02);
2034 	nested_vmcb02_prepare_control(svm);
2035 
2036 	/*
2037 	 * Any previously restored state (e.g. KVM_SET_SREGS) would mark fields
2038 	 * dirty in vmcb01 instead of vmcb02, so mark all of vmcb02 dirty here.
2039 	 */
2040 	vmcb_mark_all_dirty(svm->vmcb);
2041 
2042 	/*
2043 	 * While the nested guest CR3 is already checked and set by
2044 	 * KVM_SET_SREGS, it was set when nested state was yet loaded,
2045 	 * thus MMU might not be initialized correctly.
2046 	 * Set it again to fix this.
2047 	 */
2048 	ret = nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
2049 				  nested_npt_enabled(svm), false);
2050 	if (ret)
2051 		goto out_free;
2052 
2053 	svm->nested.force_msr_bitmap_recalc = true;
2054 
2055 	if (kvm_vcpu_apicv_active(vcpu))
2056 		kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
2057 
2058 	kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
2059 	ret = 0;
2060 out_free:
2061 	kfree(save);
2062 	kfree(ctl);
2063 
2064 	return ret;
2065 }
2066 
2067 static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
2068 {
2069 	if (WARN_ON(!is_guest_mode(vcpu)))
2070 		return true;
2071 
2072 	if (!vcpu->arch.pdptrs_from_userspace &&
2073 	    !nested_npt_enabled(to_svm(vcpu)) && is_pae_paging(vcpu))
2074 		/*
2075 		 * Reload the guest's PDPTRs since after a migration
2076 		 * the guest CR3 might be restored prior to setting the nested
2077 		 * state which can lead to a load of wrong PDPTRs.
2078 		 */
2079 		if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
2080 			return false;
2081 
2082 	if (!nested_svm_merge_msrpm(vcpu)) {
2083 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
2084 		vcpu->run->internal.suberror =
2085 			KVM_INTERNAL_ERROR_EMULATION;
2086 		vcpu->run->internal.ndata = 0;
2087 		return false;
2088 	}
2089 
2090 	if (kvm_hv_verify_vp_assist(vcpu))
2091 		return false;
2092 
2093 	return true;
2094 }
2095 
2096 static gpa_t svm_translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa,
2097 				      u64 access,
2098 				      struct x86_exception *exception,
2099 				      u64 pte_access)
2100 {
2101 	struct vcpu_svm *svm = to_svm(vcpu);
2102 	struct kvm_mmu *mmu = vcpu->arch.mmu;
2103 
2104 	BUG_ON(!mmu_is_nested(vcpu));
2105 
2106 	/* Non-GMET walks are always user-walks */
2107 	if (!(svm->nested.ctl.misc_ctl & SVM_MISC_ENABLE_GMET))
2108 		access |= PFERR_USER_MASK;
2109 
2110 	return mmu->gva_to_gpa(vcpu, mmu, gpa, access, exception);
2111 }
2112 
2113 struct kvm_x86_nested_ops svm_nested_ops = {
2114 	.leave_nested = svm_leave_nested,
2115 	.translate_nested_gpa = svm_translate_nested_gpa,
2116 	.is_exception_vmexit = nested_svm_is_exception_vmexit,
2117 	.check_events = svm_check_nested_events,
2118 	.triple_fault = nested_svm_triple_fault,
2119 	.get_nested_state_pages = svm_get_nested_state_pages,
2120 	.get_state = svm_get_nested_state,
2121 	.set_state = svm_set_nested_state,
2122 	.hv_inject_synthetic_vmexit_post_tlb_flush = svm_hv_inject_synthetic_vmexit_post_tlb_flush,
2123 };
2124