xref: /linux/arch/x86/kvm/svm/svm.h (revision 743204d772648242c50ceebc72e8ff31aab1cff4)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * Kernel-based Virtual Machine driver for Linux
4  *
5  * AMD SVM support
6  *
7  * Copyright (C) 2006 Qumranet, Inc.
8  * Copyright 2010 Red Hat, Inc. and/or its affiliates.
9  *
10  * Authors:
11  *   Yaniv Kamay  <yaniv@qumranet.com>
12  *   Avi Kivity   <avi@qumranet.com>
13  */
14 
15 #ifndef __SVM_SVM_H
16 #define __SVM_SVM_H
17 
18 #include <linux/kvm_types.h>
19 #include <linux/kvm_host.h>
20 #include <linux/bits.h>
21 
22 #include <asm/svm.h>
23 #include <asm/sev-common.h>
24 
25 #include "cpuid.h"
26 #include "regs.h"
27 #include "x86.h"
28 #include "pmu.h"
29 
30 /*
31  * Helpers to convert to/from physical addresses for pages whose address is
32  * consumed directly by hardware.  Even though it's a physical address, SVM
33  * often restricts the address to the natural width, hence 'unsigned long'
34  * instead of 'hpa_t'.
35  */
36 static inline unsigned long __sme_page_pa(struct page *page)
37 {
38 	return __sme_set(page_to_pfn(page) << PAGE_SHIFT);
39 }
40 
41 static inline struct page *__sme_pa_to_page(unsigned long pa)
42 {
43 	return pfn_to_page(__sme_clr(pa) >> PAGE_SHIFT);
44 }
45 
46 #define	IOPM_SIZE PAGE_SIZE * 3
47 #define	MSRPM_SIZE PAGE_SIZE * 2
48 
49 extern bool gmet_enabled;
50 extern bool npt_enabled;
51 extern int nrips;
52 extern int vgif;
53 extern bool intercept_smi;
54 extern bool vnmi;
55 extern int lbrv;
56 
57 extern int tsc_aux_uret_slot __ro_after_init;
58 
59 extern struct kvm_x86_ops svm_x86_ops __initdata;
60 
61 /*
62  * Clean bits in VMCB.
63  * VMCB_ALL_CLEAN_MASK might also need to
64  * be updated if this enum is modified.
65  */
66 enum {
67 	VMCB_INTERCEPTS, /* Intercept vectors, TSC offset,
68 			    pause filter count */
69 	VMCB_PERM_MAP,   /* IOPM Base and MSRPM Base */
70 	VMCB_ASID,	 /* ASID */
71 	VMCB_INTR,	 /* int_ctl, int_vector */
72 	VMCB_NPT,        /* npt_en, nCR3, gPAT */
73 	VMCB_CR,	 /* CR0, CR3, CR4, EFER */
74 	VMCB_DR,         /* DR6, DR7 */
75 	VMCB_DT,         /* GDT, IDT */
76 	VMCB_SEG,        /* CS, DS, SS, ES, CPL */
77 	VMCB_CR2,        /* CR2 only */
78 	VMCB_LBR,        /* DBGCTL, BR_FROM, BR_TO, LAST_EX_FROM, LAST_EX_TO */
79 	VMCB_AVIC,       /* AVIC APIC_BAR, AVIC APIC_BACKING_PAGE,
80 			  * AVIC PHYSICAL_TABLE pointer,
81 			  * AVIC LOGICAL_TABLE pointer
82 			  */
83 	VMCB_CET,	 /* S_CET, SSP, ISST_ADDR */
84 	VMCB_SW = 31,    /* Reserved for hypervisor/software use */
85 };
86 
87 #define VMCB_ALL_CLEAN_MASK (					\
88 	(1U << VMCB_INTERCEPTS) | (1U << VMCB_PERM_MAP) |	\
89 	(1U << VMCB_ASID) | (1U << VMCB_INTR) |			\
90 	(1U << VMCB_NPT) | (1U << VMCB_CR) | (1U << VMCB_DR) |	\
91 	(1U << VMCB_DT) | (1U << VMCB_SEG) | (1U << VMCB_CR2) |	\
92 	(1U << VMCB_LBR) | (1U << VMCB_AVIC) | (1U << VMCB_CET) | \
93 	(1U << VMCB_SW))
94 
95 /* TPR and CR2 are always written before VMRUN */
96 #define VMCB_ALWAYS_DIRTY_MASK	((1U << VMCB_INTR) | (1U << VMCB_CR2))
97 
98 #ifdef CONFIG_KVM_AMD_SEV
99 struct kvm_sev_info {
100 	bool active;		/* SEV enabled guest */
101 	bool es_active;		/* SEV-ES enabled guest */
102 	bool need_init;		/* waiting for SEV_INIT2 */
103 	unsigned int asid;	/* ASID used for this guest */
104 	unsigned int handle;	/* SEV firmware handle */
105 	int fd;			/* SEV device fd */
106 	unsigned long policy;
107 	unsigned long pages_locked; /* Number of pages locked */
108 	struct list_head regions_list;  /* List of registered regions */
109 	u64 ap_jump_table;	/* SEV-ES AP Jump Table address */
110 	u64 vmsa_features;
111 	u16 ghcb_version;	/* Highest guest GHCB protocol version allowed */
112 	struct kvm *enc_context_owner; /* Owner of copied encryption context */
113 	struct list_head mirror_vms; /* List of VMs mirroring */
114 	struct list_head mirror_entry; /* Use as a list entry of mirrors */
115 	struct misc_cg *misc_cg; /* For misc cgroup accounting */
116 	atomic_t migration_in_progress;
117 	void *snp_context;      /* SNP guest context page */
118 	void *guest_req_buf;    /* Bounce buffer for SNP Guest Request input */
119 	void *guest_resp_buf;   /* Bounce buffer for SNP Guest Request output */
120 	struct mutex guest_req_mutex; /* Must acquire before using bounce buffers */
121 	cpumask_var_t have_run_cpus; /* CPUs that have done VMRUN for this VM. */
122 	bool snp_certs_enabled;	/* SNP certificate-fetching support. */
123 };
124 #endif
125 
126 struct kvm_svm {
127 	struct kvm kvm;
128 
129 	/* Struct members for AVIC */
130 	u32 avic_vm_id;
131 	u32 *avic_logical_id_table;
132 	u64 *avic_physical_id_table;
133 	struct hlist_node hnode;
134 
135 #ifdef CONFIG_KVM_AMD_SEV
136 	struct kvm_sev_info sev_info;
137 #endif
138 };
139 
140 struct kvm_vcpu;
141 
142 struct kvm_vmcb_info {
143 	struct vmcb *ptr;
144 	unsigned long pa;
145 	int cpu;
146 	uint64_t asid_generation;
147 };
148 
149 struct vmcb_save_area_cached {
150 	struct vmcb_seg es;
151 	struct vmcb_seg cs;
152 	struct vmcb_seg ss;
153 	struct vmcb_seg ds;
154 	struct vmcb_seg gdtr;
155 	struct vmcb_seg idtr;
156 	u8 cpl;
157 	u64 efer;
158 	u64 cr4;
159 	u64 cr3;
160 	u64 cr0;
161 	u64 dr7;
162 	u64 dr6;
163 	u64 rflags;
164 	u64 rip;
165 	u64 rsp;
166 	u64 s_cet;
167 	u64 ssp;
168 	u64 isst_addr;
169 	u64 rax;
170 	u64 cr2;
171 	u64 g_pat;
172 	u64 dbgctl;
173 	u64 br_from;
174 	u64 br_to;
175 	u64 last_excp_from;
176 	u64 last_excp_to;
177 };
178 
179 struct vmcb_ctrl_area_cached {
180 	u32 intercepts[MAX_INTERCEPT];
181 	u16 pause_filter_thresh;
182 	u16 pause_filter_count;
183 	u64 iopm_base_pa;
184 	u64 msrpm_base_pa;
185 	u64 tsc_offset;
186 	u32 asid;
187 	u8 tlb_ctl;
188 	u8 erap_ctl;
189 	u32 int_ctl;
190 	u32 int_vector;
191 	u32 int_state;
192 	u64 exit_code;
193 	u64 exit_info_1;
194 	u64 exit_info_2;
195 	u32 exit_int_info;
196 	u32 exit_int_info_err;
197 	u64 misc_ctl;
198 	u32 event_inj;
199 	u32 event_inj_err;
200 	u64 next_rip;
201 	u64 nested_cr3;
202 	u64 misc_ctl2;
203 	u32 clean;
204 	union {
205 #if IS_ENABLED(CONFIG_HYPERV) || IS_ENABLED(CONFIG_KVM_HYPERV)
206 		struct hv_vmcb_enlightenments hv_enlightenments;
207 #endif
208 		u8 reserved_sw[32];
209 	};
210 };
211 
212 struct svm_nested_state {
213 	struct kvm_vmcb_info vmcb02;
214 	u64 hsave_msr;
215 	u64 vm_cr_msr;
216 	u64 vmcb12_gpa;
217 	u64 last_vmcb12_gpa;
218 	u64 last_bus_lock_rip;
219 
220 	/*
221 	 * The MSR permissions map used for vmcb02, which is the merge result
222 	 * of vmcb01 and vmcb12
223 	 */
224 	void *msrpm;
225 
226 	/* cache for control fields of the guest */
227 	struct vmcb_ctrl_area_cached ctl;
228 
229 	/*
230 	 * Note: this struct is not kept up-to-date while L2 runs; it is only
231 	 * valid within nested_svm_vmrun.
232 	 */
233 	struct vmcb_save_area_cached save;
234 
235 	bool initialized;
236 
237 	/*
238 	 * Indicates whether MSR bitmap for L2 needs to be rebuilt due to
239 	 * changes in MSR bitmap for L1 or switching to a different L2. Note,
240 	 * this flag can only be used reliably in conjunction with a paravirt L1
241 	 * which informs L0 whether any changes to MSR bitmap for L2 were done
242 	 * on its side.
243 	 */
244 	bool force_msr_bitmap_recalc;
245 };
246 
247 struct vcpu_sev_es_state {
248 	/* SEV-ES support */
249 	struct sev_es_save_area *vmsa;
250 	struct ghcb *ghcb;
251 	u8 valid_bitmap[16];
252 	struct kvm_host_map ghcb_map;
253 	bool received_first_sipi;
254 	unsigned int ap_reset_hold_type;
255 
256 	/* SEV-ES scratch area support */
257 	u64 sw_scratch;
258 	void *ghcb_sa;
259 	u32 ghcb_sa_len;
260 	bool ghcb_sa_sync;
261 	bool ghcb_sa_free;
262 
263 	/* SNP Page-State-Change buffer entries currently being processed */
264 	struct {
265 		u16 cur_idx;
266 		u16 end_idx;
267 		u16 batch_size;
268 		bool is_2m;
269 	} psc;
270 
271 	u64 ghcb_registered_gpa;
272 
273 	struct mutex snp_vmsa_mutex; /* Used to handle concurrent updates of VMSA. */
274 	gpa_t snp_vmsa_gpa;
275 	bool snp_ap_waiting_for_reset;
276 	bool snp_has_guest_vmsa;
277 };
278 
279 struct vcpu_svm {
280 	struct kvm_vcpu vcpu;
281 	/* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */
282 	struct vmcb *vmcb;
283 	struct kvm_vmcb_info vmcb01;
284 	struct kvm_vmcb_info *current_vmcb;
285 	u32 asid;
286 	u32 sysenter_esp_hi;
287 	u32 sysenter_eip_hi;
288 	uint64_t tsc_aux;
289 
290 	u64 msr_decfg;
291 
292 	u64 next_rip;
293 
294 	u64 spec_ctrl;
295 
296 	u64 tsc_ratio_msr;
297 	/*
298 	 * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be
299 	 * translated into the appropriate L2_CFG bits on the host to
300 	 * perform speculative control.
301 	 */
302 	u64 virt_spec_ctrl;
303 
304 	void *msrpm;
305 
306 	ulong nmi_iret_rip;
307 
308 	struct svm_nested_state nested;
309 
310 	/* NMI mask value, used when vNMI is not enabled */
311 	bool nmi_masked;
312 
313 	/*
314 	 * True when NMIs are still masked but guest IRET was just intercepted
315 	 * and KVM is waiting for RIP to change, which will signal that the
316 	 * intercepted IRET was retired and thus NMI can be unmasked.
317 	 */
318 	bool awaiting_iret_completion;
319 
320 	/*
321 	 * Set when KVM is awaiting IRET completion and needs to inject NMIs as
322 	 * soon as the IRET completes (e.g. NMI is pending injection).  KVM
323 	 * temporarily steals RFLAGS.TF to single-step the guest in this case
324 	 * in order to regain control as soon as the NMI-blocking condition
325 	 * goes away.
326 	 */
327 	bool nmi_singlestep;
328 	u64 nmi_singlestep_guest_rflags;
329 
330 	bool nmi_l1_to_l2;
331 
332 	unsigned long soft_int_csbase;
333 	unsigned long soft_int_old_rip;
334 	unsigned long soft_int_next_rip;
335 	bool soft_int_injected;
336 
337 	u32 ldr_reg;
338 	u32 dfr_reg;
339 
340 	/* This is essentially a shadow of the vCPU's actual entry in the
341 	 * Physical ID table that is programmed into the VMCB, i.e. that is
342 	 * seen by the CPU.  If IPI virtualization is disabled, IsRunning is
343 	 * only ever set in the shadow, i.e. is never propagated to the "real"
344 	 * table, so that hardware never sees IsRunning=1.
345 	 */
346 	u64 avic_physical_id_entry;
347 
348 	/*
349 	 * Per-vCPU list of irqfds that are eligible to post IRQs directly to
350 	 * the vCPU (a.k.a. device posted IRQs, a.k.a. IRQ bypass).  The list
351 	 * is used to reconfigure IRTEs when the vCPU is loaded/put (to set the
352 	 * target pCPU), when AVIC is toggled on/off (to (de)activate bypass),
353 	 * and if the irqfd becomes ineligible for posting (to put the IRTE
354 	 * back into remapped mode).
355 	 */
356 	struct list_head ir_list;
357 	raw_spinlock_t ir_list_lock;
358 
359 	struct vcpu_sev_es_state sev_es;
360 
361 	bool guest_state_loaded;
362 
363 	bool avic_irq_window;
364 	bool x2avic_msrs_intercepted;
365 	bool lbr_msrs_intercepted;
366 
367 	/* Guest GIF value, used when vGIF is not enabled */
368 	bool guest_gif;
369 };
370 
371 struct svm_cpu_data {
372 	u64 asid_generation;
373 	u32 max_asid;
374 	u32 next_asid;
375 	u32 min_asid;
376 
377 	bool bp_spec_reduce_set;
378 
379 	struct vmcb *save_area;
380 	unsigned long save_area_pa;
381 
382 	/* index = sev_asid, value = vmcb pointer */
383 	struct vmcb **sev_vmcbs;
384 };
385 
386 DECLARE_PER_CPU(struct svm_cpu_data, svm_data);
387 
388 static __always_inline struct kvm_svm *to_kvm_svm(struct kvm *kvm)
389 {
390 	return container_of(kvm, struct kvm_svm, kvm);
391 }
392 
393 #ifdef CONFIG_KVM_AMD_SEV
394 static __always_inline struct kvm_sev_info *to_kvm_sev_info(struct kvm *kvm)
395 {
396 	return &to_kvm_svm(kvm)->sev_info;
397 }
398 
399 static __always_inline bool ____sev_guest(struct kvm *kvm)
400 {
401 	return to_kvm_sev_info(kvm)->active;
402 }
403 static __always_inline bool ____sev_es_guest(struct kvm *kvm)
404 {
405 	struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
406 
407 	return sev->es_active && !WARN_ON_ONCE(!sev->active);
408 }
409 
410 static __always_inline bool ____sev_snp_guest(struct kvm *kvm)
411 {
412 	struct kvm_sev_info *sev = to_kvm_sev_info(kvm);
413 
414 	return (sev->vmsa_features & SVM_SEV_FEAT_SNP_ACTIVE) &&
415 	       !WARN_ON_ONCE(!____sev_es_guest(kvm));
416 }
417 
418 static __always_inline bool is_sev_guest(struct kvm_vcpu *vcpu)
419 {
420 	return ____sev_guest(vcpu->kvm);
421 }
422 static __always_inline bool is_sev_es_guest(struct kvm_vcpu *vcpu)
423 {
424 	return ____sev_es_guest(vcpu->kvm);
425 }
426 
427 static __always_inline bool is_sev_snp_guest(struct kvm_vcpu *vcpu)
428 {
429 	return ____sev_snp_guest(vcpu->kvm);
430 }
431 #else
432 static __always_inline bool is_sev_guest(struct kvm_vcpu *vcpu)
433 {
434 	return false;
435 }
436 static __always_inline bool is_sev_es_guest(struct kvm_vcpu *vcpu)
437 {
438 	return false;
439 }
440 
441 static __always_inline bool is_sev_snp_guest(struct kvm_vcpu *vcpu)
442 {
443 	return false;
444 }
445 #endif
446 
447 static inline bool ghcb_gpa_is_registered(struct vcpu_svm *svm, u64 val)
448 {
449 	return svm->sev_es.ghcb_registered_gpa == val;
450 }
451 
452 static inline void vmcb_mark_all_dirty(struct vmcb *vmcb)
453 {
454 	vmcb->control.clean = 0;
455 }
456 
457 static inline void vmcb_mark_all_clean(struct vmcb *vmcb)
458 {
459 	vmcb->control.clean = VMCB_ALL_CLEAN_MASK
460 			       & ~VMCB_ALWAYS_DIRTY_MASK;
461 }
462 
463 static inline void vmcb_mark_dirty(struct vmcb *vmcb, int bit)
464 {
465 	vmcb->control.clean &= ~(1 << bit);
466 }
467 
468 static inline bool vmcb12_is_dirty(struct vmcb_ctrl_area_cached *control, int bit)
469 {
470 	return !test_bit(bit, (unsigned long *)&control->clean);
471 }
472 
473 static inline void vmcb_set_gpat(struct vmcb *vmcb, u64 data)
474 {
475 	vmcb->save.g_pat = data;
476 	vmcb_mark_dirty(vmcb, VMCB_NPT);
477 }
478 
479 static __always_inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
480 {
481 	return container_of(vcpu, struct vcpu_svm, vcpu);
482 }
483 
484 static inline bool svm_is_vmrun_failure(u64 exit_code)
485 {
486 	if (cpu_feature_enabled(X86_FEATURE_HYPERVISOR))
487 		return (u32)exit_code == (u32)SVM_EXIT_ERR;
488 
489 	return exit_code == SVM_EXIT_ERR;
490 }
491 
492 /*
493  * Only the PDPTRs are loaded on demand into the shadow MMU.  All other
494  * fields are synchronized on VM-Exit, because accessing the VMCB is cheap.
495  *
496  * CR3 might be out of date in the VMCB but it is not marked dirty; instead,
497  * KVM_REQ_LOAD_MMU_PGD is always requested when the cached vcpu->arch.cr3
498  * is changed.  svm_load_mmu_pgd() then syncs the new CR3 value into the VMCB.
499  */
500 #define SVM_REGS_LAZY_LOAD_SET	(BIT(VCPU_REG_PDPTR))
501 
502 static inline void __vmcb_set_intercept(unsigned long *intercepts, u32 bit)
503 {
504 	WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
505 	__set_bit(bit, intercepts);
506 }
507 
508 static inline void __vmcb_clr_intercept(unsigned long *intercepts, u32 bit)
509 {
510 	WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
511 	__clear_bit(bit, intercepts);
512 }
513 
514 static inline bool __vmcb_is_intercept(unsigned long *intercepts, u32 bit)
515 {
516 	WARN_ON_ONCE(bit >= 32 * MAX_INTERCEPT);
517 	return test_bit(bit, intercepts);
518 }
519 
520 static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
521 {
522 	__vmcb_set_intercept((unsigned long *)&control->intercepts, bit);
523 }
524 
525 static inline void vmcb_clr_intercept(struct vmcb_control_area *control, u32 bit)
526 {
527 	__vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
528 }
529 
530 static inline bool vmcb_is_intercept(struct vmcb_control_area *control, u32 bit)
531 {
532 	return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
533 }
534 
535 static inline void vmcb12_clr_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
536 {
537 	__vmcb_clr_intercept((unsigned long *)&control->intercepts, bit);
538 }
539 
540 static inline bool vmcb12_is_intercept(struct vmcb_ctrl_area_cached *control, u32 bit)
541 {
542 	return __vmcb_is_intercept((unsigned long *)&control->intercepts, bit);
543 }
544 
545 void nested_vmcb02_recalc_intercepts(struct vcpu_svm *svm);
546 
547 static inline void svm_mark_intercepts_dirty(struct vcpu_svm *svm)
548 {
549 	vmcb_mark_dirty(svm->vmcb01.ptr, VMCB_INTERCEPTS);
550 
551 	/*
552 	 * If L2 is active, recalculate the intercepts for vmcb02 to account
553 	 * for the changes made to vmcb01.  All intercept configuration is done
554 	 * for vmcb01 and then propagated to vmcb02 to combine KVM's intercepts
555 	 * with L1's intercepts (from the vmcb12 snapshot).
556 	 */
557 	if (is_guest_mode(&svm->vcpu))
558 		nested_vmcb02_recalc_intercepts(svm);
559 }
560 
561 static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
562 {
563 	struct vmcb *vmcb = svm->vmcb01.ptr;
564 
565 	WARN_ON_ONCE(bit >= 32);
566 	vmcb_set_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
567 
568 	svm_mark_intercepts_dirty(svm);
569 }
570 
571 static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
572 {
573 	struct vmcb *vmcb = svm->vmcb01.ptr;
574 
575 	WARN_ON_ONCE(bit >= 32);
576 	vmcb_clr_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
577 
578 	svm_mark_intercepts_dirty(svm);
579 }
580 
581 static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
582 {
583 	struct vmcb *vmcb = svm->vmcb01.ptr;
584 
585 	vmcb_set_intercept(&vmcb->control, bit);
586 
587 	svm_mark_intercepts_dirty(svm);
588 }
589 
590 static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
591 {
592 	struct vmcb *vmcb = svm->vmcb01.ptr;
593 
594 	vmcb_clr_intercept(&vmcb->control, bit);
595 
596 	svm_mark_intercepts_dirty(svm);
597 }
598 
599 static inline bool svm_is_intercept(struct vcpu_svm *svm, int bit)
600 {
601 	return vmcb_is_intercept(&svm->vmcb->control, bit);
602 }
603 
604 static inline bool nested_vgif_enabled(struct vcpu_svm *svm)
605 {
606 	return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_VGIF) &&
607 	       (svm->nested.ctl.int_ctl & V_GIF_ENABLE_MASK);
608 }
609 
610 static inline struct vmcb *get_vgif_vmcb(struct vcpu_svm *svm)
611 {
612 	if (!vgif)
613 		return NULL;
614 
615 	if (is_guest_mode(&svm->vcpu) && !nested_vgif_enabled(svm))
616 		return svm->nested.vmcb02.ptr;
617 	else
618 		return svm->vmcb01.ptr;
619 }
620 
621 static inline void enable_gif(struct vcpu_svm *svm)
622 {
623 	struct vmcb *vmcb = get_vgif_vmcb(svm);
624 
625 	if (vmcb)
626 		vmcb->control.int_ctl |= V_GIF_MASK;
627 	else
628 		svm->guest_gif = true;
629 }
630 
631 static inline void disable_gif(struct vcpu_svm *svm)
632 {
633 	struct vmcb *vmcb = get_vgif_vmcb(svm);
634 
635 	if (vmcb)
636 		vmcb->control.int_ctl &= ~V_GIF_MASK;
637 	else
638 		svm->guest_gif = false;
639 }
640 
641 static inline bool gif_set(struct vcpu_svm *svm)
642 {
643 	struct vmcb *vmcb = get_vgif_vmcb(svm);
644 
645 	if (vmcb)
646 		return !!(vmcb->control.int_ctl & V_GIF_MASK);
647 	else
648 		return svm->guest_gif;
649 }
650 
651 static inline bool nested_npt_enabled(struct vcpu_svm *svm)
652 {
653 	return svm->nested.ctl.misc_ctl & SVM_MISC_ENABLE_NP;
654 }
655 
656 static inline bool l2_has_separate_pat(struct kvm_vcpu *vcpu)
657 {
658 	/*
659 	 * If KVM_X86_QUIRK_NESTED_SVM_SHARED_PAT is disabled while a vCPU
660 	 * is running, the L2 IA32_PAT semantics for that vCPU are undefined.
661 	 */
662 	return nested_npt_enabled(to_svm(vcpu)) &&
663 	       !kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_NESTED_SVM_SHARED_PAT);
664 }
665 
666 static inline bool nested_vnmi_enabled(struct vcpu_svm *svm)
667 {
668 	return guest_cpu_cap_has(&svm->vcpu, X86_FEATURE_VNMI) &&
669 	       (svm->nested.ctl.int_ctl & V_NMI_ENABLE_MASK);
670 }
671 
672 static inline bool is_x2apic_msrpm_offset(u32 offset)
673 {
674 	/* 4 msrs per u8, and 4 u8 in u32 */
675 	u32 msr = offset * 16;
676 
677 	return (msr >= APIC_BASE_MSR) &&
678 	       (msr < (APIC_BASE_MSR + 0x100));
679 }
680 
681 static inline struct vmcb *get_vnmi_vmcb_l1(struct vcpu_svm *svm)
682 {
683 	if (!vnmi)
684 		return NULL;
685 
686 	if (is_guest_mode(&svm->vcpu))
687 		return NULL;
688 	else
689 		return svm->vmcb01.ptr;
690 }
691 
692 static inline bool is_vnmi_enabled(struct vcpu_svm *svm)
693 {
694 	struct vmcb *vmcb = get_vnmi_vmcb_l1(svm);
695 
696 	if (vmcb)
697 		return !!(vmcb->control.int_ctl & V_NMI_ENABLE_MASK);
698 	else
699 		return false;
700 }
701 
702 static inline void svm_vmgexit_set_return_code(struct vcpu_svm *svm,
703 						u64 response, u64 data)
704 {
705 	ghcb_set_sw_exit_info_1(svm->sev_es.ghcb, response);
706 	ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, data);
707 }
708 
709 static inline void svm_vmgexit_inject_exception(struct vcpu_svm *svm, u8 vector)
710 {
711 	u64 data = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT | vector;
712 
713 	svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_ISSUE_EXCEPTION, data);
714 }
715 
716 static inline void svm_vmgexit_bad_input(struct vcpu_svm *svm, u64 suberror)
717 {
718 	svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_MALFORMED_INPUT, suberror);
719 }
720 
721 static inline void svm_vmgexit_success(struct vcpu_svm *svm, u64 data)
722 {
723 	svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_NO_ACTION, data);
724 }
725 
726 static inline void svm_vmgexit_no_action(struct vcpu_svm *svm, u64 data)
727 {
728 	svm_vmgexit_set_return_code(svm, GHCB_HV_RESP_NO_ACTION, data);
729 }
730 
731 /*
732  * The MSRPM is 8KiB in size, divided into four 2KiB ranges (the fourth range
733  * is reserved).  Each MSR within a range is covered by two bits, one each for
734  * read (bit 0) and write (bit 1), where a bit value of '1' means intercepted.
735  */
736 #define SVM_MSRPM_BYTES_PER_RANGE 2048
737 #define SVM_BITS_PER_MSR 2
738 #define SVM_MSRS_PER_BYTE (BITS_PER_BYTE / SVM_BITS_PER_MSR)
739 #define SVM_MSRS_PER_RANGE (SVM_MSRPM_BYTES_PER_RANGE * SVM_MSRS_PER_BYTE)
740 static_assert(SVM_MSRS_PER_RANGE == 8192);
741 #define SVM_MSRPM_OFFSET_MASK (SVM_MSRS_PER_RANGE - 1)
742 
743 static __always_inline int svm_msrpm_bit_nr(u32 msr)
744 {
745 	int range_nr;
746 
747 	switch (msr & ~SVM_MSRPM_OFFSET_MASK) {
748 	case 0:
749 		range_nr = 0;
750 		break;
751 	case 0xc0000000:
752 		range_nr = 1;
753 		break;
754 	case 0xc0010000:
755 		range_nr = 2;
756 		break;
757 	default:
758 		return -EINVAL;
759 	}
760 
761 	return range_nr * SVM_MSRPM_BYTES_PER_RANGE * BITS_PER_BYTE +
762 	       (msr & SVM_MSRPM_OFFSET_MASK) * SVM_BITS_PER_MSR;
763 }
764 
765 #define __BUILD_SVM_MSR_BITMAP_HELPER(rtype, action, bitop, access, bit_rw)	\
766 static inline rtype svm_##action##_msr_bitmap_##access(unsigned long *bitmap,	\
767 						       u32 msr)			\
768 {										\
769 	int bit_nr;								\
770 										\
771 	bit_nr = svm_msrpm_bit_nr(msr);						\
772 	if (bit_nr < 0)								\
773 		return (rtype)true;						\
774 										\
775 	return bitop##_bit(bit_nr + bit_rw, bitmap);				\
776 }
777 
778 #define BUILD_SVM_MSR_BITMAP_HELPERS(ret_type, action, bitop)			\
779 	__BUILD_SVM_MSR_BITMAP_HELPER(ret_type, action, bitop, read,  0)	\
780 	__BUILD_SVM_MSR_BITMAP_HELPER(ret_type, action, bitop, write, 1)
781 
782 BUILD_SVM_MSR_BITMAP_HELPERS(bool, test, test)
783 BUILD_SVM_MSR_BITMAP_HELPERS(void, clear, __clear)
784 BUILD_SVM_MSR_BITMAP_HELPERS(void, set, __set)
785 
786 #define DEBUGCTL_RESERVED_BITS (~DEBUGCTLMSR_LBR)
787 
788 /* svm.c */
789 extern bool dump_invalid_vmcb;
790 
791 void *svm_alloc_permissions_map(unsigned long size, gfp_t gfp_mask);
792 
793 static inline void *svm_vcpu_alloc_msrpm(void)
794 {
795 	return svm_alloc_permissions_map(MSRPM_SIZE, GFP_KERNEL_ACCOUNT);
796 }
797 
798 #define svm_copy_lbrs(to, from)					\
799 do {								\
800 	(to)->dbgctl		= (from)->dbgctl;		\
801 	(to)->br_from		= (from)->br_from;		\
802 	(to)->br_to		= (from)->br_to;		\
803 	(to)->last_excp_from	= (from)->last_excp_from;	\
804 	(to)->last_excp_to	= (from)->last_excp_to;		\
805 } while (0)
806 
807 void svm_vcpu_free_msrpm(void *msrpm);
808 void svm_enable_lbrv(struct kvm_vcpu *vcpu);
809 void svm_update_lbrv(struct kvm_vcpu *vcpu);
810 
811 int svm_set_efer(struct kvm_vcpu *vcpu, u64 efer);
812 void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0);
813 void svm_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
814 void disable_nmi_singlestep(struct vcpu_svm *svm);
815 bool svm_smi_blocked(struct kvm_vcpu *vcpu);
816 bool svm_nmi_blocked(struct kvm_vcpu *vcpu);
817 bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
818 void svm_set_gif(struct vcpu_svm *svm, bool value);
819 int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code);
820 void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
821 			  int read, int write);
822 void svm_complete_interrupt_delivery(struct kvm_vcpu *vcpu, int delivery_mode,
823 				     int trig_mode, int vec);
824 
825 void svm_set_intercept_for_msr(struct kvm_vcpu *vcpu, u32 msr, int type, bool set);
826 
827 static inline void svm_disable_intercept_for_msr(struct kvm_vcpu *vcpu,
828 						 u32 msr, int type)
829 {
830 	svm_set_intercept_for_msr(vcpu, msr, type, false);
831 }
832 
833 static inline void svm_enable_intercept_for_msr(struct kvm_vcpu *vcpu,
834 						u32 msr, int type)
835 {
836 	svm_set_intercept_for_msr(vcpu, msr, type, true);
837 }
838 
839 int svm_skip_emulated_instruction(struct kvm_vcpu *vcpu);
840 
841 /* nested.c */
842 
843 #define NESTED_EXIT_HOST	0	/* Exit handled on host level */
844 #define NESTED_EXIT_DONE	1	/* Exit caused nested vmexit  */
845 #define NESTED_EXIT_CONTINUE	2	/* Further checks needed      */
846 
847 static inline bool nested_svm_virtualize_tpr(struct kvm_vcpu *vcpu)
848 {
849 	struct vcpu_svm *svm = to_svm(vcpu);
850 
851 	return is_guest_mode(vcpu) && (svm->nested.ctl.int_ctl & V_INTR_MASKING_MASK);
852 }
853 
854 static inline bool nested_exit_on_smi(struct vcpu_svm *svm)
855 {
856 	return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_SMI);
857 }
858 
859 static inline bool nested_exit_on_intr(struct vcpu_svm *svm)
860 {
861 	return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_INTR);
862 }
863 
864 static inline bool nested_exit_on_nmi(struct vcpu_svm *svm)
865 {
866 	return vmcb12_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
867 }
868 
869 int __init nested_svm_init_msrpm_merge_offsets(void);
870 
871 int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, bool from_vmrun);
872 void svm_leave_nested(struct kvm_vcpu *vcpu);
873 void svm_free_nested(struct vcpu_svm *svm);
874 int svm_allocate_nested(struct vcpu_svm *svm);
875 int nested_svm_vmrun(struct kvm_vcpu *vcpu);
876 void svm_copy_vmrun_state(struct vmcb_save_area *to_save,
877 			  struct vmcb_save_area *from_save);
878 void svm_copy_vmloadsave_state(struct vmcb *to_vmcb, struct vmcb *from_vmcb);
879 void nested_svm_vmexit(struct vcpu_svm *svm);
880 
881 static inline void nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
882 {
883 	svm->vmcb->control.exit_code	= exit_code;
884 	svm->vmcb->control.exit_info_1	= 0;
885 	svm->vmcb->control.exit_info_2	= 0;
886 	nested_svm_vmexit(svm);
887 }
888 
889 int nested_svm_exit_handled(struct vcpu_svm *svm);
890 int nested_svm_check_permissions(struct kvm_vcpu *vcpu);
891 int nested_svm_check_cached_vmcb12(struct kvm_vcpu *vcpu);
892 int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
893 			       bool has_error_code, u32 error_code);
894 int nested_svm_exit_special(struct vcpu_svm *svm);
895 void nested_svm_update_tsc_ratio_msr(struct kvm_vcpu *vcpu);
896 void svm_write_tsc_multiplier(struct kvm_vcpu *vcpu);
897 void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
898 				       struct vmcb_control_area *control);
899 void nested_copy_vmcb_save_to_cache(struct vcpu_svm *svm,
900 				    struct vmcb_save_area *save);
901 void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
902 void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
903 
904 
905 static inline void __svm_pmu_handle_nested_transition(struct vcpu_svm *svm,
906 						      bool defer)
907 {
908 	struct kvm_pmu *pmu = vcpu_to_pmu(&svm->vcpu);
909 	u64 counters = *(u64 *)pmu->pmc_has_mode_specific_enables;
910 
911 	__kvm_pmu_reprogram_counters(pmu, counters, defer);
912 }
913 
914 static inline void svm_pmu_handle_nested_transition(struct vcpu_svm *svm)
915 {
916 	/*
917 	 * Do NOT defer reprogramming the counters by default.  Instructions
918 	 * causing a state change are counted based on the _new_ CPU state
919 	 * (e.g. a successful VMRUN is counted in guest mode). Hence, the
920 	 * counters should be reprogrammed with the new state _before_ the
921 	 * instruction is potentially counted upon emulation completion.
922 	 */
923 	__svm_pmu_handle_nested_transition(svm, false);
924 }
925 
926 extern struct kvm_x86_nested_ops svm_nested_ops;
927 
928 /* avic.c */
929 #define AVIC_REQUIRED_APICV_INHIBITS			\
930 (							\
931 	BIT(APICV_INHIBIT_REASON_DISABLED) |		\
932 	BIT(APICV_INHIBIT_REASON_ABSENT) |		\
933 	BIT(APICV_INHIBIT_REASON_HYPERV) |		\
934 	BIT(APICV_INHIBIT_REASON_NESTED) |		\
935 	BIT(APICV_INHIBIT_REASON_IRQWIN) |		\
936 	BIT(APICV_INHIBIT_REASON_PIT_REINJ) |		\
937 	BIT(APICV_INHIBIT_REASON_BLOCKIRQ) |		\
938 	BIT(APICV_INHIBIT_REASON_SEV)      |		\
939 	BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_ALIASED) |	\
940 	BIT(APICV_INHIBIT_REASON_APIC_ID_MODIFIED) |	\
941 	BIT(APICV_INHIBIT_REASON_APIC_BASE_MODIFIED) |	\
942 	BIT(APICV_INHIBIT_REASON_LOGICAL_ID_ALIASED) |	\
943 	BIT(APICV_INHIBIT_REASON_PHYSICAL_ID_TOO_BIG)	\
944 )
945 
946 bool __init avic_hardware_setup(void);
947 void avic_hardware_unsetup(void);
948 int avic_alloc_physical_id_table(struct kvm *kvm);
949 void avic_vm_destroy(struct kvm *kvm);
950 int avic_vm_init(struct kvm *kvm);
951 void avic_init_vmcb(struct vcpu_svm *svm, struct vmcb *vmcb);
952 int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
953 int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
954 int avic_init_vcpu(struct vcpu_svm *svm);
955 void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
956 void avic_vcpu_put(struct kvm_vcpu *vcpu);
957 void avic_apicv_post_state_restore(struct kvm_vcpu *vcpu);
958 void avic_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu);
959 int avic_pi_update_irte(struct kvm_kernel_irqfd *irqfd, struct kvm *kvm,
960 			unsigned int host_irq, uint32_t guest_irq,
961 			struct kvm_vcpu *vcpu, u32 vector);
962 void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
963 void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
964 void avic_ring_doorbell(struct kvm_vcpu *vcpu);
965 unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu);
966 void avic_refresh_virtual_apic_mode(struct kvm_vcpu *vcpu);
967 
968 
969 /* sev.c */
970 
971 int pre_sev_run(struct vcpu_svm *svm, int cpu);
972 void sev_init_vmcb(struct vcpu_svm *svm, bool init_event);
973 void sev_vcpu_after_set_cpuid(struct vcpu_svm *svm);
974 int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
975 void sev_es_recalc_msr_intercepts(struct kvm_vcpu *vcpu);
976 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
977 void sev_es_prepare_switch_to_guest(struct vcpu_svm *svm, struct sev_es_save_area *hostsa);
978 void sev_es_unmap_ghcb(struct vcpu_svm *svm);
979 
980 #ifdef CONFIG_KVM_AMD_SEV
981 int sev_mem_enc_ioctl(struct kvm *kvm, void __user *argp);
982 int sev_mem_enc_register_region(struct kvm *kvm,
983 				struct kvm_enc_region *range);
984 int sev_mem_enc_unregister_region(struct kvm *kvm,
985 				  struct kvm_enc_region *range);
986 int sev_vm_copy_enc_context_from(struct kvm *kvm, unsigned int source_fd);
987 int sev_vm_move_enc_context_from(struct kvm *kvm, unsigned int source_fd);
988 void sev_guest_memory_reclaimed(struct kvm *kvm);
989 int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
990 
991 /* These symbols are used in common code and are stubbed below.  */
992 
993 struct page *snp_safe_alloc_page_node(int node, gfp_t gfp);
994 static inline struct page *snp_safe_alloc_page(void)
995 {
996 	return snp_safe_alloc_page_node(numa_node_id(), GFP_KERNEL_ACCOUNT);
997 }
998 
999 int sev_vcpu_create(struct kvm_vcpu *vcpu);
1000 void sev_free_vcpu(struct kvm_vcpu *vcpu);
1001 void sev_vm_init(struct kvm *kvm);
1002 void sev_vm_destroy(struct kvm *kvm);
1003 void __init sev_set_cpu_caps(void);
1004 void __init sev_hardware_setup(void);
1005 void sev_hardware_unsetup(void);
1006 int sev_cpu_init(struct svm_cpu_data *sd);
1007 int sev_dev_get_attr(u32 group, u64 attr, u64 *val);
1008 extern unsigned int max_sev_asid;
1009 void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code);
1010 int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order);
1011 void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end);
1012 int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private);
1013 struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu);
1014 void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa);
1015 #else
1016 static inline struct page *snp_safe_alloc_page_node(int node, gfp_t gfp)
1017 {
1018 	return alloc_pages_node(node, gfp | __GFP_ZERO, 0);
1019 }
1020 
1021 static inline struct page *snp_safe_alloc_page(void)
1022 {
1023 	return snp_safe_alloc_page_node(numa_node_id(), GFP_KERNEL_ACCOUNT);
1024 }
1025 
1026 static inline int sev_vcpu_create(struct kvm_vcpu *vcpu) { return 0; }
1027 static inline void sev_free_vcpu(struct kvm_vcpu *vcpu) {}
1028 static inline void sev_vm_init(struct kvm *kvm) {}
1029 static inline void sev_vm_destroy(struct kvm *kvm) {}
1030 static inline void __init sev_set_cpu_caps(void) {}
1031 static inline void __init sev_hardware_setup(void) {}
1032 static inline void sev_hardware_unsetup(void) {}
1033 static inline int sev_cpu_init(struct svm_cpu_data *sd) { return 0; }
1034 static inline int sev_dev_get_attr(u32 group, u64 attr, u64 *val) { return -ENXIO; }
1035 #define max_sev_asid 0
1036 static inline void sev_handle_rmp_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u64 error_code) {}
1037 static inline int sev_gmem_prepare(struct kvm *kvm, kvm_pfn_t pfn, gfn_t gfn, int max_order)
1038 {
1039 	return 0;
1040 }
1041 static inline void sev_gmem_invalidate(kvm_pfn_t start, kvm_pfn_t end) {}
1042 static inline int sev_gmem_max_mapping_level(struct kvm *kvm, kvm_pfn_t pfn, bool is_private)
1043 {
1044 	return 0;
1045 }
1046 
1047 static inline struct vmcb_save_area *sev_decrypt_vmsa(struct kvm_vcpu *vcpu)
1048 {
1049 	return NULL;
1050 }
1051 static inline void sev_free_decrypted_vmsa(struct kvm_vcpu *vcpu, struct vmcb_save_area *vmsa) {}
1052 #endif
1053 
1054 /* vmenter.S */
1055 
1056 void __svm_sev_es_vcpu_run(struct vcpu_svm *svm, unsigned int flags,
1057 			   struct sev_es_save_area *hostsa);
1058 void __svm_vcpu_run(struct vcpu_svm *svm, unsigned int flags);
1059 
1060 #define DEFINE_KVM_GHCB_ACCESSORS(field)						\
1061 static __always_inline u64 kvm_ghcb_get_##field(struct vcpu_svm *svm)			\
1062 {											\
1063 	return READ_ONCE(svm->sev_es.ghcb->save.field);					\
1064 }											\
1065 											\
1066 static __always_inline bool kvm_ghcb_##field##_is_valid(const struct vcpu_svm *svm)	\
1067 {											\
1068 	return test_bit(GHCB_BITMAP_IDX(field),						\
1069 			(unsigned long *)&svm->sev_es.valid_bitmap);			\
1070 }											\
1071 											\
1072 static __always_inline u64 kvm_ghcb_get_##field##_if_valid(struct vcpu_svm *svm)	\
1073 {											\
1074 	return kvm_ghcb_##field##_is_valid(svm) ? kvm_ghcb_get_##field(svm) : 0;	\
1075 }
1076 
1077 DEFINE_KVM_GHCB_ACCESSORS(cpl)
1078 DEFINE_KVM_GHCB_ACCESSORS(rax)
1079 DEFINE_KVM_GHCB_ACCESSORS(rcx)
1080 DEFINE_KVM_GHCB_ACCESSORS(rdx)
1081 DEFINE_KVM_GHCB_ACCESSORS(rbx)
1082 DEFINE_KVM_GHCB_ACCESSORS(rsi)
1083 DEFINE_KVM_GHCB_ACCESSORS(sw_exit_code)
1084 DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_1)
1085 DEFINE_KVM_GHCB_ACCESSORS(sw_exit_info_2)
1086 DEFINE_KVM_GHCB_ACCESSORS(sw_scratch)
1087 DEFINE_KVM_GHCB_ACCESSORS(xcr0)
1088 DEFINE_KVM_GHCB_ACCESSORS(xss)
1089 
1090 #endif
1091